diff options
Diffstat (limited to 'usr/src/uts/common')
129 files changed, 2 insertions, 85611 deletions
diff --git a/usr/src/uts/common/Makefile.files b/usr/src/uts/common/Makefile.files index 4c6be296cb..c9d04fb798 100644 --- a/usr/src/uts/common/Makefile.files +++ b/usr/src/uts/common/Makefile.files @@ -23,7 +23,7 @@ # Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved. # Copyright (c) 2011, 2014 by Delphix. All rights reserved. # Copyright (c) 2013 by Saso Kiselkov. All rights reserved. -# Copyright 2015 Nexenta Systems, Inc. All rights reserved. +# Copyright 2018 Nexenta Systems, Inc. # Copyright 2016 Garrett D'Amore <garrett@damore.org> # Copyright 2017 Joyent, Inc. # Copyright 2016 OmniTI Computer Consulting, Inc. All rights reserved. @@ -886,72 +886,6 @@ SBP2_OBJS += cfgrom.o sbp2.o PMODEM_OBJS += pmodem.o pmodem_cis.o cis.o cis_callout.o cis_handlers.o cis_params.o -DSW_OBJS += dsw.o dsw_dev.o ii_tree.o - -NCALL_OBJS += ncall.o \ - ncall_stub.o - -RDC_OBJS += rdc.o \ - rdc_dev.o \ - rdc_io.o \ - rdc_clnt.o \ - rdc_prot_xdr.o \ - rdc_svc.o \ - rdc_bitmap.o \ - rdc_health.o \ - rdc_subr.o \ - rdc_diskq.o - -RDCSRV_OBJS += rdcsrv.o - -RDCSTUB_OBJS += rdc_stub.o - -SDBC_OBJS += sd_bcache.o \ - sd_bio.o \ - sd_conf.o \ - sd_ft.o \ - sd_hash.o \ - sd_io.o \ - sd_misc.o \ - sd_pcu.o \ - sd_tdaemon.o \ - sd_trace.o \ - sd_iob_impl0.o \ - sd_iob_impl1.o \ - sd_iob_impl2.o \ - sd_iob_impl3.o \ - sd_iob_impl4.o \ - sd_iob_impl5.o \ - sd_iob_impl6.o \ - sd_iob_impl7.o \ - safestore.o \ - safestore_ram.o - -NSCTL_OBJS += nsctl.o \ - nsc_cache.o \ - nsc_disk.o \ - nsc_dev.o \ - nsc_freeze.o \ - nsc_gen.o \ - nsc_mem.o \ - nsc_ncallio.o \ - nsc_power.o \ - nsc_resv.o \ - nsc_rmspin.o \ - nsc_solaris.o \ - nsc_trap.o \ - nsc_list.o -UNISTAT_OBJS += spuni.o \ - spcs_s_k.o - -NSKERN_OBJS += nsc_ddi.o \ - nsc_proc.o \ - nsc_raw.o \ - nsc_thread.o \ - nskernd.o - -SV_OBJS += sv.o - PMCS_OBJS += pmcs_attach.o pmcs_ds.o pmcs_intr.o pmcs_nvram.o pmcs_sata.o \ pmcs_scsa.o pmcs_smhba.o pmcs_subr.o pmcs_fwlog.o diff --git a/usr/src/uts/common/Makefile.rules b/usr/src/uts/common/Makefile.rules index 7c9834f3d3..6c48ec8930 100644 --- a/usr/src/uts/common/Makefile.rules +++ b/usr/src/uts/common/Makefile.rules @@ -24,7 +24,7 @@ # Copyright 2016 Garrett D'Amore <garrett@damore.org> # Copyright 2013 Saso Kiselkov. All rights reserved. # Copyright 2017 Joyent, Inc. -# Copyright 2016 Nexenta Systems, Inc. +# Copyright 2018 Nexenta Systems, Inc. # Copyright (c) 2016 by Delphix. All rights reserved. # @@ -466,38 +466,6 @@ $(OBJS_DIR)/%.o: $(KMECHKRB5_BASE)/profile/%.c $(COMPILE.c) $(KGSSDFLAGS) -o $@ $< $(CTFCONVERT_O) -$(OBJS_DIR)/%.o: $(UTSBASE)/common/avs/ncall/%.c - $(COMPILE.c) -o $@ $< - $(CTFCONVERT_O) - -$(OBJS_DIR)/%.o: $(UTSBASE)/common/avs/ns/dsw/%.c - $(COMPILE.c) -o $@ $< - $(CTFCONVERT_O) - -$(OBJS_DIR)/%.o: $(UTSBASE)/common/avs/ns/nsctl/%.c - $(COMPILE.c) -o $@ $< - $(CTFCONVERT_O) - -$(OBJS_DIR)/%.o: $(UTSBASE)/common/avs/ns/rdc/%.c - $(COMPILE.c) -o $@ $< - $(CTFCONVERT_O) - -$(OBJS_DIR)/%.o: $(UTSBASE)/common/avs/ns/sdbc/%.c - $(COMPILE.c) -o $@ $< - $(CTFCONVERT_O) - -$(OBJS_DIR)/%.o: $(UTSBASE)/common/avs/ns/solaris/%.c - $(COMPILE.c) -o $@ $< - $(CTFCONVERT_O) - -$(OBJS_DIR)/%.o: $(UTSBASE)/common/avs/ns/sv/%.c - $(COMPILE.c) -o $@ $< - $(CTFCONVERT_O) - -$(OBJS_DIR)/%.o: $(UTSBASE)/common/avs/ns/unistat/%.c - $(COMPILE.c) -o $@ $< - $(CTFCONVERT_O) - $(OBJS_DIR)/%.o: $(UTSBASE)/common/idmap/%.c $(COMPILE.c) -o $@ $< $(CTFCONVERT_O) @@ -1781,30 +1749,6 @@ $(LINTS_DIR)/%.ln: $(COMMONBASE)/secflags/%.c $(LINTS_DIR)/%.ln: $(COMMONBASE)/smbios/%.c @($(LHEAD) $(LINT.c) $< $(LTAIL)) -$(LINTS_DIR)/%.ln: $(UTSBASE)/common/avs/ncall/%.c - @($(LHEAD) $(LINT.c) $< $(LTAIL)) - -$(LINTS_DIR)/%.ln: $(UTSBASE)/common/avs/ns/dsw/%.c - @($(LHEAD) $(LINT.c) $< $(LTAIL)) - -$(LINTS_DIR)/%.ln: $(UTSBASE)/common/avs/ns/nsctl/%.c - @($(LHEAD) $(LINT.c) $< $(LTAIL)) - -$(LINTS_DIR)/%.ln: $(UTSBASE)/common/avs/ns/rdc/%.c - @($(LHEAD) $(LINT.c) $< $(LTAIL)) - -$(LINTS_DIR)/%.ln: $(UTSBASE)/common/avs/ns/sdbc/%.c - @($(LHEAD) $(LINT.c) $< $(LTAIL)) - -$(LINTS_DIR)/%.ln: $(UTSBASE)/common/avs/ns/solaris/%.c - @($(LHEAD) $(LINT.c) $< $(LTAIL)) - -$(LINTS_DIR)/%.ln: $(UTSBASE)/common/avs/ns/sv/%.c - @($(LHEAD) $(LINT.c) $< $(LTAIL)) - -$(LINTS_DIR)/%.ln: $(UTSBASE)/common/avs/ns/unistat/%.c - @($(LHEAD) $(LINT.c) $< $(LTAIL)) - $(LINTS_DIR)/%.ln: $(UTSBASE)/common/des/%.c @($(LHEAD) $(LINT.c) $< $(LTAIL)) diff --git a/usr/src/uts/common/avs/Makefile b/usr/src/uts/common/avs/Makefile deleted file mode 100644 index 84258beae6..0000000000 --- a/usr/src/uts/common/avs/Makefile +++ /dev/null @@ -1,51 +0,0 @@ -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License (the "License"). -# You may not use this file except in compliance with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -# Copyright 2008 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# -# include global definitions - -AVS_SUBDIRS = ncall \ - ns \ - ns/dsw \ - ns/nsctl \ - ns/rdc \ - ns/sdbc \ - ns/solaris \ - ns/sv \ - ns/unistat - -# install rules -install_h:= TARGET= install_h -check:= TARGET= check -clean:= TARGET= clean -clobber:= TARGET= clobber - -# standards checking rules - -install_h check: $(AVS_SUBDIRS) - -clean clobber: ns/rdc - -$(AVS_SUBDIRS): FRC - @cd $@; pwd; $(MAKE) $(TARGET) - -FRC: diff --git a/usr/src/uts/common/avs/Makefile.com b/usr/src/uts/common/avs/Makefile.com deleted file mode 100644 index 130bab096a..0000000000 --- a/usr/src/uts/common/avs/Makefile.com +++ /dev/null @@ -1,28 +0,0 @@ -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License (the "License"). -# You may not use this file except in compliance with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -# Copyright 2008 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# -# uts/common/avs/Makefile.com -# -CFLAGS += -DNSC_MULTI_TERABYTE - -LINTFLAGS += -DNSC_MULTI_TERABYTE diff --git a/usr/src/uts/common/avs/ncall/Makefile b/usr/src/uts/common/avs/ncall/Makefile deleted file mode 100644 index db7613ebc1..0000000000 --- a/usr/src/uts/common/avs/ncall/Makefile +++ /dev/null @@ -1,51 +0,0 @@ -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License (the "License"). -# You may not use this file except in compliance with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -# Copyright 2008 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# -# include global definitions -include ../../../../Makefile.master - -HDRS= ncall.h ncall_module.h - -ROOTDIRS= $(ROOT)/usr/include/sys/ncall - -ROOTHDRS= $(HDRS:%=$(ROOTDIRS)/%) - -CHECKHDRS= $(HDRS:%.h=%.check) - -# install rule -$(ROOTDIRS)/%: % - $(INS.file) - -install_h := TARGET= install_h -check := TARGET= check - -.KEEP_STATE: - -.PARALLEL: $(CHECKHDRS) - -install_h: $(SUBDIRS) $(ROOTDIRS) $(ROOTHDRS) - -$(ROOTDIRS): - $(INS.dir) - -check: $(CHECKHDRS) diff --git a/usr/src/uts/common/avs/ncall/ncall.c b/usr/src/uts/common/avs/ncall/ncall.c deleted file mode 100644 index ccf6648bc3..0000000000 --- a/usr/src/uts/common/avs/ncall/ncall.c +++ /dev/null @@ -1,769 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - * Copyright (c) 2016 by Delphix. All rights reserved. - */ - -/* - * Media independent RPC-like comms - */ - -#include <sys/types.h> -#include <sys/conf.h> -#include <sys/stat.h> -#include <sys/errno.h> -#include <sys/cmn_err.h> -#include <sys/ksynch.h> -#include <sys/kmem.h> -#include <sys/modctl.h> -#include <sys/ddi.h> -#include <sys/sunddi.h> - -#include <sys/varargs.h> -#ifdef DS_DDICT -#include <sys/nsctl/contract.h> -#endif -#include "ncall.h" -#include "ncall_module.h" - -#include <sys/nsctl/nsvers.h> - -/* - * cb_ops functions. - */ - -static int ncallioctl(dev_t, int, intptr_t, int, cred_t *, int *); -static int ncallprint(dev_t, char *); - - -static struct cb_ops ncall_cb_ops = { - nulldev, /* open */ - nulldev, /* close */ - nulldev, /* strategy */ - ncallprint, - nodev, /* dump */ - nodev, /* read */ - nodev, /* write */ - ncallioctl, - nodev, /* devmap */ - nodev, /* mmap */ - nodev, /* segmap */ - nochpoll, /* poll */ - ddi_prop_op, - NULL, /* NOT a stream */ - D_NEW | D_MP | D_64BIT, - CB_REV, - nodev, /* aread */ - nodev, /* awrite */ -}; - - -/* - * dev_ops functions. - */ - -static int ncall_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **); -static int ncall_attach(dev_info_t *, ddi_attach_cmd_t); -static int ncall_detach(dev_info_t *, ddi_detach_cmd_t); - -static struct dev_ops ncall_ops = { - DEVO_REV, - 0, - ncall_getinfo, - nulldev, /* identify */ - nulldev, /* probe */ - ncall_attach, - ncall_detach, - nodev, /* reset */ - &ncall_cb_ops, - (struct bus_ops *)0, - NULL /* power */ -}; - -/* - * Module linkage. - */ - -extern struct mod_ops mod_driverops; - -static struct modldrv modldrv = { - &mod_driverops, - "nws:Kernel Call:" ISS_VERSION_STR, - &ncall_ops -}; - -static struct modlinkage modlinkage = { - MODREV_1, - &modldrv, - 0 -}; - -typedef struct ncall_modinfo_s { - struct ncall_modinfo_s *next; - ncall_module_t *module; -} ncall_modinfo_t; - -static dev_info_t *ncall_dip; /* Single DIP for driver */ -static kmutex_t ncall_mutex; - -static ncall_modinfo_t *ncall_modules; -static int ncall_active; - -static ncall_node_t ncall_nodeinfo; - -static int ncallgetnodes(intptr_t, int, int *); -extern void ncall_init_stub(void); - -int -_init(void) -{ - int error; - - mutex_init(&ncall_mutex, NULL, MUTEX_DRIVER, NULL); - - if ((error = mod_install(&modlinkage)) != 0) { - mutex_destroy(&ncall_mutex); - return (error); - } - - return (0); -} - - -int -_fini(void) -{ - int error; - - if ((error = mod_remove(&modlinkage)) != 0) - return (error); - - mutex_destroy(&ncall_mutex); - return (error); -} - - -int -_info(struct modinfo *modinfop) -{ - return (mod_info(&modlinkage, modinfop)); -} - -static int -ncall_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) -{ - switch (cmd) { - - case DDI_ATTACH: - ncall_dip = dip; - - if (ddi_create_minor_node(dip, "c,ncall", S_IFCHR, - 0, DDI_PSEUDO, 0) != DDI_SUCCESS) - goto failed; - - ddi_report_dev(dip); - - return (DDI_SUCCESS); - - default: - return (DDI_FAILURE); - } - -failed: - (void) ncall_detach(dip, DDI_DETACH); - return (DDI_FAILURE); -} - - -static int -ncall_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) -{ - switch (cmd) { - - case DDI_DETACH: - - /* - * If still active, then refuse to detach. - */ - - if (ncall_modules != NULL || ncall_active) - return (DDI_FAILURE); - - /* - * Remove all minor nodes. - */ - - ddi_remove_minor_node(dip, NULL); - ncall_dip = NULL; - - return (DDI_SUCCESS); - - default: - return (DDI_FAILURE); - } -} - - -/* ARGSUSED */ - -static int -ncall_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) -{ - int rc = DDI_FAILURE; - - switch (infocmd) { - - case DDI_INFO_DEVT2DEVINFO: - *result = ncall_dip; - rc = DDI_SUCCESS; - break; - - case DDI_INFO_DEVT2INSTANCE: - /* - * We only have a single instance. - */ - *result = 0; - rc = DDI_SUCCESS; - break; - - default: - break; - } - - return (rc); -} - - -/* ARGSUSED */ -static int -ncallprint(dev_t dev, char *str) -{ - cmn_err(CE_WARN, "%s%d: %s", ddi_get_name(ncall_dip), - ddi_get_instance(ncall_dip), str); - - return (0); -} - - -int -ncall_register_module(ncall_module_t *mp, ncall_node_t *nodep) -{ - ncall_modinfo_t *new; - int rc = 0; - - if (mp == NULL || mp->ncall_version != NCALL_MODULE_VER) - return (EINVAL); - - new = kmem_alloc(sizeof (*new), KM_SLEEP); - - if (new != NULL) { - new->module = mp; - - mutex_enter(&ncall_mutex); - - new->next = ncall_modules; - ncall_modules = new; - - mutex_exit(&ncall_mutex); - } else { - rc = ENOMEM; - } - - *nodep = ncall_nodeinfo; /* structure copy */ - return (rc); -} - - -int -ncall_unregister_module(ncall_module_t *mod) -{ - ncall_modinfo_t **mpp; - int rc = ESRCH; - - mutex_enter(&ncall_mutex); - - for (mpp = &ncall_modules; *mpp != NULL; mpp = &((*mpp)->next)) { - if ((*mpp)->module == mod) { - *mpp = (*mpp)->next; - rc = 0; - break; - } - } - - mutex_exit(&ncall_mutex); - - return (rc); -} - - -static int -ncall_stop(void) -{ - ncall_modinfo_t *mod; - int rc = 0; - - mutex_enter(&ncall_mutex); - - while ((rc == 0) && ((mod = ncall_modules) != NULL)) { - mutex_exit(&ncall_mutex); - - rc = (*mod->module->ncall_stop)(); - - mutex_enter(&ncall_mutex); - } - - mutex_exit(&ncall_mutex); - - return (rc); -} - - -/* ARGSUSED */ -static int ncallioctl(dev_t dev, int cmd, intptr_t arg, int mode, - cred_t *crp, int *rvalp) -{ - ncall_node_t node = { 0, }; - int mirror; - int rc = 0; - - *rvalp = 0; - - if ((rc = drv_priv(crp)) != 0) - return (rc); - - switch (cmd) { - - case NC_IOC_START: - if (ncall_active) { - rc = EALREADY; - break; - } - - if (ddi_copyin((void *)arg, &node, sizeof (node), mode) < 0) - return (EFAULT); - - bcopy(&node, &ncall_nodeinfo, sizeof (ncall_nodeinfo)); - ncall_init_stub(); - ncall_active = 1; - break; - - case NC_IOC_STOP: - ncall_active = 0; - rc = ncall_stop(); - break; - - case NC_IOC_GETNODE: - if (!ncall_active) { - rc = ENONET; - break; - } - if (ddi_copyout(&ncall_nodeinfo, (void *)arg, - sizeof (ncall_nodeinfo), mode) < 0) { - rc = EFAULT; - break; - } - mirror = ncall_mirror(ncall_nodeinfo.nc_nodeid); - /* - * can't return -1, as this will mask the ioctl - * failure, so return 0. - */ - if (mirror == -1) - mirror = 0; - *rvalp = mirror; - break; - - case NC_IOC_GETNETNODES: - rc = ncallgetnodes(arg, mode, rvalp); - break; - - case NC_IOC_PING: - if (!ncall_active) { - rc = ENONET; - break; - } - - if (ddi_copyin((void *)arg, &node, sizeof (node), mode) < 0) { - rc = EFAULT; - break; - } - - node.nc_nodename[sizeof (node.nc_nodename)-1] = '\0'; - rc = ncall_ping(node.nc_nodename, rvalp); - break; - - default: - rc = EINVAL; - break; - } - - return (rc); -} - - -void -ncall_register_svc(int svc_id, void (*func)(ncall_t *, int *)) -{ - if (ncall_modules) - (*ncall_modules->module->ncall_register_svc)(svc_id, func); -} - - -void -ncall_unregister_svc(int svc_id) -{ - if (ncall_modules) - (*ncall_modules->module->ncall_unregister_svc)(svc_id); -} - - -int -ncall_nodeid(char *nodename) -{ - if (ncall_modules) - return ((ncall_modules->module->ncall_nodeid)(nodename)); - else - return (0); -} - - -char * -ncall_nodename(int nodeid) -{ - if (ncall_modules) - return ((*ncall_modules->module->ncall_nodename)(nodeid)); - else - return ("unknown"); -} - - -int -ncall_mirror(int nodeid) -{ - if (ncall_modules) - return ((*ncall_modules->module->ncall_mirror)(nodeid)); - else - return (-1); -} - - -int -ncall_self(void) -{ - if (ncall_modules) - return ((*ncall_modules->module->ncall_self)()); - else - return (-1); -} - - -int -ncall_alloc(int host_id, int flags, int net, ncall_t **ncall_p) -{ - int rc = ENOLINK; - - if (ncall_modules) - rc = (*ncall_modules->module->ncall_alloc)(host_id, - flags, net, ncall_p); - - return (rc); -} - - -int -ncall_timedsend(ncall_t *ncall, int flags, int svc_id, - struct timeval *t, ...) -{ - va_list ap; - int rc = ENOLINK; - - va_start(ap, t); - - if (ncall_modules) - rc = (*ncall_modules->module->ncall_timedsend)(ncall, flags, - svc_id, t, ap); - - va_end(ap); - - return (rc); -} - -int -ncall_timedsendnotify(ncall_t *ncall, int flags, int svc_id, - struct timeval *t, void (*ncall_callback)(ncall_t *, void *), - void *vptr, ...) -{ - va_list ap; - int rc = ENOLINK; - - va_start(ap, vptr); - - if (ncall_modules) - rc = (*ncall_modules->module->ncall_timedsendnotify)(ncall, - flags, svc_id, t, ncall_callback, vptr, ap); - va_end(ap); - - return (rc); -} - -int -ncall_broadcast(ncall_t *ncall, int flags, int svc_id, - struct timeval *t, ...) -{ - va_list ap; - int rc = ENOLINK; - - va_start(ap, t); - - if (ncall_modules) - rc = (*ncall_modules->module->ncall_broadcast)(ncall, flags, - svc_id, t, ap); - va_end(ap); - - return (rc); -} - - -int -ncall_send(ncall_t *ncall, int flags, int svc_id, ...) -{ - va_list ap; - int rc = ENOLINK; - - va_start(ap, svc_id); - - if (ncall_modules) - rc = (*ncall_modules->module->ncall_timedsend)(ncall, flags, - svc_id, NULL, ap); - - va_end(ap); - - return (rc); -} - - -int -ncall_read_reply(ncall_t *ncall, int n, ...) -{ - va_list ap; - int rc = ENOLINK; - - va_start(ap, n); - - if (ncall_modules) - rc = (*ncall_modules->module->ncall_read_reply)(ncall, n, ap); - - va_end(ap); - - return (rc); -} - - -void -ncall_reset(ncall_t *ncall) -{ - if (ncall_modules) - (*ncall_modules->module->ncall_reset)(ncall); -} - - -void -ncall_free(ncall_t *ncall) -{ - if (ncall_modules) - (*ncall_modules->module->ncall_free)(ncall); -} - - -int -ncall_put_data(ncall_t *ncall, void *data, int len) -{ - int rc = ENOLINK; - - if (ncall_modules) - rc = (*ncall_modules->module->ncall_put_data)(ncall, data, len); - - return (rc); -} - - -int -ncall_get_data(ncall_t *ncall, void *data, int len) -{ - int rc = ENOLINK; - - if (ncall_modules) - rc = (*ncall_modules->module->ncall_get_data)(ncall, data, len); - - return (rc); -} - - -int -ncall_sender(ncall_t *ncall) -{ - int rc = -1; - - if (ncall_modules) - rc = (*ncall_modules->module->ncall_sender)(ncall); - - return (rc); -} - - -void -ncall_reply(ncall_t *ncall, ...) -{ - va_list ap; - - if (ncall_modules) { - va_start(ap, ncall); - - (*ncall_modules->module->ncall_reply)(ncall, ap); - - va_end(ap); - } -} - - -void -ncall_pend(ncall_t *ncall) -{ - if (ncall_modules) - (*ncall_modules->module->ncall_pend)(ncall); -} - - -void -ncall_done(ncall_t *ncall) -{ - if (ncall_modules) - (*ncall_modules->module->ncall_done)(ncall); -} - -int -ncall_ping(char *nodename, int *up) -{ - int rc = ENOLINK; - if (ncall_modules) - rc = (*ncall_modules->module->ncall_ping)(nodename, up); - return (rc); -} - -int -ncall_maxnodes() -{ - int rc = 0; - - if (ncall_modules) - rc = (*ncall_modules->module->ncall_maxnodes)(); - - return (rc); -} - -int -ncall_nextnode(void **vptr) -{ - int rc = 0; - - if (ncall_modules) - rc = (*ncall_modules->module->ncall_nextnode)(vptr); - - return (rc); -} - -int -ncall_errcode(ncall_t *ncall, int *result) -{ - int rc = ENOLINK; - if (ncall_modules) - rc = (*ncall_modules->module->ncall_errcode)(ncall, result); - - return (rc); -} - -static int -ncallgetnodes(intptr_t uaddr, int mode, int *rvalp) -{ - ncall_node_t *nodelist; - int slot; - int rc; - int nodecnt; - int nodeid; - void *sequence; - char *nodename; - - rc = 0; - - nodecnt = ncall_maxnodes(); - if (nodecnt <= 0) { - return (ENONET); - } - - /* - * If the user passes up a null address argument, then - * they don't want the actual nodes, but the configured - * maximum, so space can be correctly allocated. - */ - - if (uaddr == NULL) { - *rvalp = nodecnt; - return (0); - } - nodelist = kmem_zalloc(sizeof (*nodelist) * nodecnt, KM_SLEEP); - - slot = 0; - sequence = NULL; - while ((nodeid = ncall_nextnode(&sequence)) > 0) { - nodename = ncall_nodename(nodeid); - /* - * There is a small window where nextnode can - * return a valid nodeid, and it being disabled - * which will get nodename to return "". - * Discard the nodeid if this happens. - */ - if (strlen(nodename) > 0) { - int size = sizeof (nodelist[slot].nc_nodename) - 1; - ASSERT(slot < nodecnt); - /* - * make sure its null terminated when it - * gets to userland. - */ - nodelist[slot].nc_nodename[size] = 0; - (void) strncpy(nodelist[slot].nc_nodename, nodename, - size); - nodelist[slot].nc_nodeid = nodeid; - slot++; - } - } - if (ddi_copyout(nodelist, (void *)uaddr, sizeof (*nodelist) * slot, - mode) < 0) { - rc = EFAULT; - } else { - /* - * tell them how many have come back. - */ - *rvalp = slot; - } - kmem_free(nodelist, sizeof (*nodelist) * nodecnt); - return (rc); -} diff --git a/usr/src/uts/common/avs/ncall/ncall.conf b/usr/src/uts/common/avs/ncall/ncall.conf deleted file mode 100644 index ab9b05bda6..0000000000 --- a/usr/src/uts/common/avs/ncall/ncall.conf +++ /dev/null @@ -1,26 +0,0 @@ -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License (the "License"). -# You may not use this file except in compliance with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -# Copyright 2008 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# -# ncall (media independent messaging) -# -name="ncall" parent="pseudo" instance=0; diff --git a/usr/src/uts/common/avs/ncall/ncall.h b/usr/src/uts/common/avs/ncall/ncall.h deleted file mode 100644 index 4c1328e8d6..0000000000 --- a/usr/src/uts/common/avs/ncall/ncall.h +++ /dev/null @@ -1,121 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _NCALL_H -#define _NCALL_H - -#ifdef __cplusplus -extern "C" { -#endif - -#ifndef DS_DDICT -#include <sys/time.h> -#endif - -#ifdef _KERNEL - -/* - * ncall_t is opaque RPC pointer - */ -typedef struct ncall_s { - int opaque; -} ncall_t; - -#define NCALL_DATA_SZ 8192 /* ncall_put/get_data max size */ -#define NCALL_BROADCAST_ID (-2) /* magic broadcast nodeid */ -/* - * ncall send flags - */ -#define NCALL_PEND 1 /* disconnect immediately */ -#define NCALL_UNUSED 2 /* unused */ -#define NCALL_ASYNC 4 /* asynchronous send (ncall_free implied) */ -#define NCALL_RDATA 8 /* allocate a buffer to receive data in */ - -extern void ncall_register_svc(int, void (*)(ncall_t *, int *)); -extern void ncall_unregister_svc(int); - -extern int ncall_nodeid(char *); -extern char *ncall_nodename(int); -extern int ncall_mirror(int); -extern int ncall_self(void); - -extern int ncall_alloc(int, int, int, ncall_t **); -extern int ncall_timedsend(ncall_t *, int, int, struct timeval *, ...); -extern int ncall_timedsendnotify(ncall_t *, int, int, struct timeval *, - void (*)(ncall_t *, void *), void *, ...); -extern int ncall_broadcast(ncall_t *, int, int, struct timeval *, ...); -extern int ncall_send(ncall_t *, int, int, ...); -extern int ncall_read_reply(ncall_t *, int, ...); -extern void ncall_reset(ncall_t *); -extern void ncall_free(ncall_t *); - -extern int ncall_put_data(ncall_t *, void *, int); -extern int ncall_get_data(ncall_t *, void *, int); - -extern int ncall_sender(ncall_t *); -extern void ncall_reply(ncall_t *, ...); -extern void ncall_pend(ncall_t *); -extern void ncall_done(ncall_t *); -extern int ncall_ping(char *, int *); -extern int ncall_maxnodes(void); -extern int ncall_nextnode(void **); -extern int ncall_errcode(ncall_t *, int *); - -#endif /* _KERNEL */ - -#define NCALLNMLN 257 - -/* - * Basic node info - */ -typedef struct ncall_node_s { - char nc_nodename[NCALLNMLN]; /* Nodename */ - int nc_nodeid; /* Nodeid */ -} ncall_node_t; - - -#define _NCIOC_(x) (('N'<<16)|('C'<<8)|(x)) - -#define NC_IOC_GETNODE _NCIOC_(0) /* return this node */ -#define NC_IOC_START _NCIOC_(1) /* ncall core and stubs start */ -#define NC_IOC_STOP _NCIOC_(2) /* ncall stop */ -#define NC_IOC_GETNETNODES _NCIOC_(3) /* ncalladm -i */ -#define NC_IOC_PING _NCIOC_(4) /* ncalladm -p */ -/* - * _NCIOC_(5) to _NCIOC_(20) are reserved for the implementation module - */ - -#define NCALL_NSC 100 /* 100 - 109 */ -#define NCALL_UNUSED1 110 /* 110 - 119 */ -#define NCALL_UNUSED2 120 /* 120 - 129 */ -#define NCALL_SDBC 130 /* 130 - 149 */ -#define NCALL_STE 150 /* 150 - 159 */ -#define NCALL_HM 160 /* 160 - 169 */ - -#ifdef __cplusplus -} -#endif - -#endif /* _NCALL_H */ diff --git a/usr/src/uts/common/avs/ncall/ncall_module.h b/usr/src/uts/common/avs/ncall/ncall_module.h deleted file mode 100644 index 552f88c615..0000000000 --- a/usr/src/uts/common/avs/ncall/ncall_module.h +++ /dev/null @@ -1,85 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _NCALL_MODULE_H -#define _NCALL_MODULE_H - -#ifdef __cplusplus -extern "C" { -#endif - -#ifdef _KERNEL - -#define NCALL_MODULE_VER 4 - -typedef struct ncall_module_s { - int ncall_version; - char *ncall_name; - - int (*ncall_stop)(void); - - void (*ncall_register_svc)(int, void (*)(ncall_t *, int *)); - void (*ncall_unregister_svc)(int); - - int (*ncall_nodeid)(char *); - char *(*ncall_nodename)(int); - int (*ncall_mirror)(int); - int (*ncall_self)(void); - - int (*ncall_alloc)(int, int, int, ncall_t **); - int (*ncall_timedsend)(ncall_t *, int, int, struct timeval *, - va_list); - int (*ncall_timedsendnotify)(ncall_t *, int, int, struct timeval *, - void (*)(ncall_t *, void *), void *, va_list); - int (*ncall_broadcast)(ncall_t *, int, int, struct timeval *, - va_list); - int (*ncall_read_reply)(ncall_t *, int, va_list); - void (*ncall_reset)(ncall_t *); - void (*ncall_free)(ncall_t *); - - int (*ncall_put_data)(ncall_t *, void *, int); - int (*ncall_get_data)(ncall_t *, void *, int); - - int (*ncall_sender)(ncall_t *); - void (*ncall_reply)(ncall_t *, va_list); - void (*ncall_pend)(ncall_t *); - void (*ncall_done)(ncall_t *); - - int (*ncall_ping)(char *, int *); - int (*ncall_maxnodes)(void); - int (*ncall_nextnode)(void **); - int (*ncall_errcode)(ncall_t *, int *); -} ncall_module_t; - -extern int ncall_register_module(ncall_module_t *, ncall_node_t *); -extern int ncall_unregister_module(ncall_module_t *); - -#endif /* _KERNEL */ - -#ifdef __cplusplus -} -#endif - -#endif /* _NCALL_MODULE_H */ diff --git a/usr/src/uts/common/avs/ncall/ncall_stub.c b/usr/src/uts/common/avs/ncall/ncall_stub.c deleted file mode 100644 index 3756320e7e..0000000000 --- a/usr/src/uts/common/avs/ncall/ncall_stub.c +++ /dev/null @@ -1,265 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#include <sys/types.h> -#include <sys/errno.h> -#include <sys/ddi.h> -#include <sys/sunddi.h> - -#include <sys/varargs.h> -#ifdef DS_DDICT -#include <sys/nsctl/contract.h> -#endif -#include "ncall.h" -#include "ncall_module.h" - -static ncall_node_t nodeinfo; - - -/* ARGSUSED */ -void -ncall_stub_register_svc(int svc_id, void (*func)(ncall_t *, int *)) -{ - ; -} - -/* ARGSUSED */ -void -ncall_stub_unregister_svc(int svc_id) -{ - ; -} - -/* ARGSUSED */ -int -ncall_stub_nodeid(char *nodename) -{ - return (nodeinfo.nc_nodeid); -} - -/* ARGSUSED */ -char * -ncall_stub_nodename(int nodeid) -{ - if (nodeid == nodeinfo.nc_nodeid) - return (nodeinfo.nc_nodename); - else - return (""); -} - -/* ARGSUSED */ -int -ncall_stub_mirror(int nodeid) -{ - return (-1); -} - -/* ARGSUSED */ -int -ncall_stub_self(void) -{ - return (nodeinfo.nc_nodeid); -} - -/* ARGSUSED */ -int -ncall_stub_alloc(int host_id, int flags, int net, ncall_t **ncall_p) -{ - return (ENOLINK); -} - -/* ARGSUSED */ -int -ncall_stub_timedsend(ncall_t *ncall, int flags, int svc_id, - struct timeval *t, va_list ap) -{ - return (ENOLINK); -} - -/* ARGSUSED */ -int -ncall_stub_timedsendnotify(ncall_t *ncall, int flags, int svc_id, - struct timeval *t, void (*ncall_callback)(ncall_t *, void *), void *vptr, - va_list ap) -{ - return (ENOLINK); -} - -/* ARGSUSED */ -int -ncall_stub_broadcast(ncall_t *ncall, int flags, int svc_id, - struct timeval *t, va_list ap) -{ - return (ENOLINK); -} - -/* ARGSUSED */ -int -ncall_stub_read_reply(ncall_t *ncall, int n, va_list ap) -{ - return (ENOLINK); -} - -/* ARGSUSED */ -void -ncall_stub_reset(ncall_t *ncall) -{ - ; -} - -/* ARGSUSED */ -void -ncall_stub_free(ncall_t *ncall) -{ - ; -} - -/* ARGSUSED */ -int -ncall_stub_put_data(ncall_t *ncall, void *data, int len) -{ - return (ENOLINK); -} - -/* ARGSUSED */ -int -ncall_stub_get_data(ncall_t *ncall, void *data, int len) -{ - return (ENOLINK); -} - -/* ARGSUSED */ -int -ncall_stub_sender(ncall_t *ncall) -{ - return (nodeinfo.nc_nodeid); -} - -/* ARGSUSED */ -void -ncall_stub_reply(ncall_t *ncall, va_list ap) -{ - ; -} - -/* ARGSUSED */ -void -ncall_stub_pend(ncall_t *ncall) -{ - ; -} - -/* ARGSUSED */ -void -ncall_stub_done(ncall_t *ncall) -{ - ; -} - -int -ncall_stub_ping(char *nodename, int *up) -{ - int rc = 0; - - if (strcmp(nodename, nodeinfo.nc_nodename) == 0) { - *up = 1; - } else { - rc = EHOSTUNREACH; - *up = 0; - } - - return (rc); -} - -/* ARGSUSED */ -int -ncall_stub_maxnodes() -{ - return (0); -} - - -/* ARGSUSED */ -int -ncall_stub_nextnode(void **vptr) -{ - return (0); -} - -/* ARGSUSED */ -int -ncall_stub_errcode(ncall_t *ncall, int *result) -{ - return (ENOLINK); -} - - - - -static int ncall_stub_stop(void); - -static ncall_module_t ncall_stubinfo = { - NCALL_MODULE_VER, - "ncall stubs", - ncall_stub_stop, - ncall_stub_register_svc, - ncall_stub_unregister_svc, - ncall_stub_nodeid, - ncall_stub_nodename, - ncall_stub_mirror, - ncall_stub_self, - ncall_stub_alloc, - ncall_stub_timedsend, - ncall_stub_timedsendnotify, - ncall_stub_broadcast, - ncall_stub_read_reply, - ncall_stub_reset, - ncall_stub_free, - ncall_stub_put_data, - ncall_stub_get_data, - ncall_stub_sender, - ncall_stub_reply, - ncall_stub_pend, - ncall_stub_done, - ncall_stub_ping, - ncall_stub_maxnodes, - ncall_stub_nextnode, - ncall_stub_errcode -}; - - -static int -ncall_stub_stop(void) -{ - bzero(&nodeinfo, sizeof (nodeinfo)); - return (ncall_unregister_module(&ncall_stubinfo)); -} - - -void -ncall_init_stub(void) -{ - (void) ncall_register_module(&ncall_stubinfo, &nodeinfo); -} diff --git a/usr/src/uts/common/avs/ns/Makefile b/usr/src/uts/common/avs/ns/Makefile deleted file mode 100644 index 4c770fdb16..0000000000 --- a/usr/src/uts/common/avs/ns/Makefile +++ /dev/null @@ -1,57 +0,0 @@ -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License (the "License"). -# You may not use this file except in compliance with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -# Copyright 2008 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# -# include global definitions -include ../../../../Makefile.master - -HDRS= \ - model.h \ - contract.h \ - nsctl_inter.h \ - ncall_inter.h \ - nsctl.h - -ROOTDIR= $(ROOT)/usr/include/sys/nsctl - -ROOTHDRS= $(HDRS:%=$(ROOTDIR)/%) - -# install rules -$(ROOTDIR)/%: % - $(INS.file) - -# standards checking rules -%.check: %.h - $(DOT_H_CHECK) - -CHECKHDRS= $(HDRS:%.h=%.check) - -.KEEP_STATE: - -.PARALLEL: $(CHECKHDRS) - -install_h: $(ROOTDIR) $(ROOTHDRS) - -$(ROOTDIR): - $(INS.dir) - -check: $(CHECKHDRS) diff --git a/usr/src/uts/common/avs/ns/contract.h b/usr/src/uts/common/avs/ns/contract.h deleted file mode 100644 index 0a4ec90e43..0000000000 --- a/usr/src/uts/common/avs/ns/contract.h +++ /dev/null @@ -1,535 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -/* - * The sole purpose of this file is to document our violations of the DDI - * in Solaris and to get ddict to run on the data services stack. - * Definitions and declarations contained in this file are never compiled - * into the code. It is only included if we are running ddict on our src. - * - * IMPORTANT NOTE: - * Many of the declarations are not correct. It does not matter. - * Structure declarations only define the fields we require. - * Structures which we use in an opaque manner are defined as void * - */ - -#ifndef _SYS_CONTRACT_H -#define _SYS_CONTRACT_H - -#ifdef __cplusplus -extern "C" { -#endif - -/* - * Define our interfaces for nsctl because ddict is stupid - * about intermodule dependencies - */ -#include <sys/nsctl/nsctl_inter.h> - -/* - * Define our ncall interfaces - */ -#include <sys/nsctl/ncall_inter.h> - -/* - * The STRUCT_DECL definitions in the ddict headers are fouled up - * we include our own model.h here to redefine it to avoid errors. - */ -#if !defined(_SunOS_5_6) -#include <sys/nsctl/model.h> -#endif - -/* - * General violations - * Everybody violates these - * Why are they called ddi if it is not part of it? - */ - -#define DDI_PROP_NOTPROM 0 - -int ddi_rele_driver(void) { } -int ddi_hold_installed_driver(void) { } - -/* - * SV module violations - */ -void *curthread; -int devcnt; - -/* - * The following from vnode.h - */ -typedef struct vode { - int v_lock; /* SDBC uses this too */ - int v_type; /* nskern too */ - int v_rdev; /* nskern too */ -} vnode_t; - -#define FOLLOW 0 -#define NULLVPP NULL -#define AT_RDEV 0 -#define VOP_GETATTR(vp, vap, f, cr) ((void)0) -#define VN_RELE(vp) ((void)0) - -/* - * The fields we use from vattr_t - */ -typedef struct vattr { - uint_t va_mask; - dev_t va_rdev; - int va_size; /* nskern */ -} vattr_t; - -int lookupname(void, void, void, void, void) { } - -/* - * End of SV module violations - */ - -/* - * DSW/II module violations - */ - -/* - * This is really bogus that ddict does not understand sys/inttypes.h - */ -#define INT32_MIN 0 -#define INT32_MAX 0 -#define INT64_MAX 0 - -/* - * End of DSW/II module violations - */ - -/* - * UNISTAT module violations - */ - -void mod_miscops; -typedef enum { B_FALSE, B_TRUE } boolean_t; - -/* - * End of UNISTAT module violations - */ - -/* - * NSCTL module violations - */ -#define ERESTART 0 -#define EUSERS 0 -#define ENAMETOOLONG 0 -#define ENOSYS 0 -#define FOPEN 0 -int ddi_name_to_major() { } -/* - * End of NSCTL module violations - */ - -/* - * NSKERN module violations - */ -#define UL_GETFSIZE 0 -#define USHRT_MAX 0 - -typedef u_longlong_t rlim64_t; -int ulimit() { } -int maxphys; - -#define AT_SIZE 0 -#define VBLK 0 -#define VCHR 0 -#define VREG 0 -#define VLNK 0 - -#define VOP_CLOSE(vp, f, c, o, cr) ((void)0) -#define VOP_RWLOCK(vp, w) ((void)0) -#define VOP_RWUNLOCK(vp, w) ((void)0) -#define VOP_READ(vp, uiop, iof, cr) ((void)0) -#define VOP_WRITE(vp, uiop, iof, cr) ((void)0) - -int vn_open(char *pnamep, void seg, int filemode, int createmode, - struct vnode **vpp, void crwhy, mode_t umask) { } - -/* - * End of NSKERN module violations - */ - -/* - * NVRAM module violations - */ -#define MMU_PAGESIZE 0 - -#ifndef MAXNAMELEN -#define MAXNAMELEN 1 -#endif - -#define DEVMAP_DEFAULTS 0 -#define PFN_INVALID -1 - -char hw_serial[1]; -int mmu_ptob(void arg) { } -int roundup(void arg) { } - -/* - * End of NVRAM module violations - */ - -/* - * RDCSVR (SNDR) module - * Contract PSARC 2001/699 - */ -#define DUP_DONE 0 -#define DUP_ERROR 0 -#define DUP_INPROGRESS 0 -#define DUP_NEW 0 -#define DUP_DROP 0 - -#define RPC_MAXDATASIZE 0 - - -typedef void * file_t; /* opaque */ -typedef void SVCXPRT; /* opaque */ -typedef void SVCMASTERXPRT; /* opaque */ -typedef void xdrproc_t; /* opaque */ -typedef int enum_t; - -typedef struct svc_req { /* required fields */ - int rq_vers; - int rq_proc; -} svc_req_t; - -void SVC_FREEARGS(void xprt, void a, void *b) { } -void SVC_DUP(void xprt, void req, void i, void j, void *dr) { } -void svcerr_systemerr(void xprt) { } -void svcerr_noproc(void xprt) { } -void SVC_DUPDONE(void xprt, void dr, void a, void b, void c) { } - -SVCXPRT *svc_tli_kcreate(void *f, void *n, void *b, void **x, void *t, - uint_t c, uint_t d) { } - -/* - * non-ddi not under contracts - */ -struct netbuf { - int maxlen; - int len; - char *buf; -} - -/* - * End of RDCSRV module Contracts - */ - -/* - * RDC (SNDR) module - * Contract PSARC 2001/699 - */ - -typedef u_longlong_t rpcproc_t; -typedef u_longlong_t xdrproc_t; -typedef u_longlong_t rpcvers_t; -#define __dontcare__ -1 -#define RPC_INTR 0 -#define RPC_SUCCESS 0 -#define RPC_TLIERROR 0 -#define RPC_XPRTFAILED 0 -#define RPC_VERSMISMATCH 0 -#define RPC_PROGVERSMISMATCH 0 -#define RPC_INPROGRESS 0 - -#define ENOEXEC 0 -#define EBADF 0 - -/* - * XDR routines - * from rpc/xdr.h - */ -typedef void * XDR; /* opaque */ -int xdr_void() { } -int xdr_int() { } -int xdr_union() { } -int xdr_enum() { } -int xdr_u_int() { } -int xdr_u_longlong_t() { } -int xdr_opaque() { } -int xdr_bytes() { } -int xdr_array() { } -#define NULL_xdrproc_t ((xdrproc_t)0) - -/* - * The following imported rpc/clnt.h - */ - -/* Client is mostly opaque exccept for the following */ - -typedef struct __client { /* required fields */ - void *cl_auth; - bool_t cl_nosignal; -} CLIENT; - -#define CLSET_PROGRESS 0 -#define KNC_STRSIZE 128 -struct knetconfig { - unsigned int knc_semantics; - caddr_t knc_protofmly; - caddr_t knc_proto; - dev_t knc_rdev; -}; - -void *clnt_sperrno() { } -void IS_UNRECOVERABLE_RPC(a) { } -void CLNT_CONTROL(cl, request, info) { } -void AUTH_DESTROY(void *a) { } -void CLNT_DESTROY(void *a) { } - -int clnt_tli_kcreate(void *a, void *b, void c, void d, void e, void f, - void *g, void **h) { } - -int clnt_tli_kinit(void *h, void *config, void *addr, uint_t a, int b, - void *c) { } - -void CLNT_CALL(void, void, void, void, void, void, void) { } - -/* - * The following imported from rpc/svc.h - */ -void svc_sendreply() { } -void svcerr_decode() { } -void SVC_GETARGS() { } - -/* - * The following imported from sys/file.h - */ - -void getf(void) { } -void releasef(void) { } - -/* - * Not under contract - */ -void sigintr(void) { } -void sigunintr(void) { } -dev_t expldev() { } - -/* - * End of RDC module - */ - -/* - * SDBC module violations - */ - -/* - * devid uses internal structure - * from sys/ddi_impldefs.h - */ -typedef struct impl_devid { - uchar_t did_type_hi; - uchar_t did_type_lo; -} impl_devid_t; - -#define DEVID_GETTYPE(devid) 0 -#define DEVID_SCSI_SERIAL 0 - -#define ENOLINK 0 /* NCALL too */ -#define E2BIG 0 -#define ENOENT 0 -#define EIDRM 0 - -#define B_KERNBUF 0 -#define KSTAT_TYPE_RAW 0 -#define MAXPATHLEN 0 - -#define VN_HOLD(bp) ((void)0) - -/* Page list IO stuff */ -typedef struct page { - int v_count; /* sdbc */ -} page_t; -page_t kvp; /* We use the kernel segment */ -int page_add(void) { } -int page_find(void) { } -int page_list_concat(void) { } -int pageio_setup(void) { } -int pageio_done(void) { } - -void kobj_getsymvalue(void) { } -int ddi_dev_pathname(void) { } - -/* - * HACK ALERT - * struct buf hack for ddict. - * SDBC currently violates in struct buf - * b_pages - * b_proc - * which we will define as the pad fields for ddict since - * we can not overload the definition of struct buf with our own stuff. - */ - -#define b_pages b_pad7 /* b_pages in struct buf */ -#define b_proc b_pad8 /* b_proc in struct buf */ -#define b_forw b_pad1 /* b_forw in struct buf */ -#define b_back b_pad2 /* b_back in struct buf */ - -/* - * End of SDBC moduel violations - */ - -/* - * SCMTEST module violations - */ - -#define ESRCH 0 /* NCALL too */ - -/* - * End of SCMTEST module violations - */ -/* - * SFTM module violations - * Note: XXX This list is currently incomplete - */ - -typedef void * cqe_t; /* opaque */ -typedef void * fcal_packet_t; /* opaque */ -typedef void * soc_response_t; /* opaque */ -typedef void * la_els_logi_t; /* opaque */ -typedef void * la_els_adisc_t; /* opaque */ -typedef void * fcp_rsp_t; /* opaque */ -typedef void * soc_request_t; /* opaque */ -typedef void * els_payload_t; /* opaque */ -typedef void * la_els_logo_t; /* opaque */ -typedef void * fc_frame_header_t; /* opaque */ - -typedef struct la_els_prli_s { - uchar_t ls_code; - uchar_t page_length; - ushort_t payload_length; - uchar_t service_params[1]; -} la_els_prli_t; - -typedef la_els_prli_t la_els_prli_reply_t; -typedef la_els_prli_t la_els_prlo_t; -typedef la_els_prli_t la_els_prlo_reply_t; - -/* - * The following from /usr/include/sys/fc4/fcp.h - */ -typedef struct fcp_cntl { - uchar_t cntl_reserved_1 : 5, - cntl_qtype : 3; - uchar_t cntl_kill_tsk : 1, - cntl_clr_aca : 1, - cntl_reset : 1, - cntl_reserved_2 : 2, - cntl_clr_tsk : 1, - cntl_abort_tsk : 1, - cntl_reserved_3 : 1; - uchar_t cntl_reserved_4 : 6, - cntl_read_data : 1, - cntl_write_data : 1; -} fcp_cntl_t; - -typedef struct fcp_ent_addr { - ushort_t ent_addr_0; - ushort_t ent_addr_1; - ushort_t ent_addr_2; - ushort_t ent_addr_3; -} fcp_ent_addr_t; - -typedef struct fcp_cmd_s { - fcp_ent_addr_t fcp_ent_addr; - fcp_cntl_t fcp_cntl; - uchar_t fcp_cdb[1]; - int fcp_data_len; -} fcp_cmd_t; - -typedef struct fcal_transport { - uchar_t dummy1; - uchar_t dummy2; -} fcal_transport_t; - -/* - * End of SFTM module violations - */ - -/* - * STE module violations - */ - -typedef void la_wwn_t; /* opaque */ -/* WWN formats from sys/fcal/fcal_linkapp.h */ -typedef union la_wwn { - uchar_t raw_wwn[8]; - struct { - uint_t naa_id : 4; - uint_t nport_id : 12; - uint_t wwn_hi : 16; - uint_t wwn_lo; - } w; -} la_wwn_t; - -insque(void) { } -remque(void) { } -snprintf(void) { } - -/* - * STE uses inq_serial and inq_ackqreqq from struct scsi_inquiry - */ -#define inq_serial inq_pid -#define inq_ackqreqq inq_pid -/* - * End of STE module violations - */ - -/* - * NCALL module violations - */ -#define ENONET 0 - -/* NCALLSRV */ -typedef int bool_t; - -/* NCALLIP */ -#ifndef TRUE -#define TRUE 1 -#endif - -#ifndef FALSE -#define FALSE 0 -#endif - -#define ERANGE 0 -#define ENODATA 0 - -#define RPC_TIMEDOUT 0 - -/* - * End of NCALL violations - */ -#ifdef __cplusplus -} -#endif - -#endif /* _SYS_CONTRACT_H */ diff --git a/usr/src/uts/common/avs/ns/dsw/Makefile b/usr/src/uts/common/avs/ns/dsw/Makefile deleted file mode 100644 index 9191e00d76..0000000000 --- a/usr/src/uts/common/avs/ns/dsw/Makefile +++ /dev/null @@ -1,48 +0,0 @@ -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License (the "License"). -# You may not use this file except in compliance with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -# Copyright 2008 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# -# include global definitions -include ../../../../../Makefile.master - -HDRS= dsw.h dsw_dev.h - -ROOTDIRS= $(ROOT)/usr/include/sys/nsctl - -ROOTHDRS= $(HDRS:%=$(ROOTDIRS)/%) - -CHECKHDRS= $(HDRS:%.h=%.check) - -# install rule -$(ROOTDIRS)/%: % - $(INS.file) - -.KEEP_STATE: - -.PARALLEL: $(CHECKHDRS) - -install_h: $(ROOTDIRS) $(ROOTHDRS) - -$(ROOTDIRS): - $(INS.dir) - -check: $(CHECKHDRS) diff --git a/usr/src/uts/common/avs/ns/dsw/dsw.c b/usr/src/uts/common/avs/ns/dsw/dsw.c deleted file mode 100644 index 270f97dfc2..0000000000 --- a/usr/src/uts/common/avs/ns/dsw/dsw.c +++ /dev/null @@ -1,655 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#define _DSW_ - -#include <sys/types.h> -#include <sys/ksynch.h> -#include <sys/kmem.h> -#include <sys/errno.h> -#include <sys/conf.h> -#include <sys/cmn_err.h> -#include <sys/modctl.h> -#include <sys/cred.h> -#include <sys/file.h> -#include <sys/ddi.h> -#include <sys/unistat/spcs_s.h> -#include <sys/dkio.h> - -#ifdef DS_DDICT -#include "../contract.h" -#endif - -#include <sys/nsctl/nsctl.h> -#include <sys/nsctl/nsvers.h> - -#include <sys/sdt.h> /* dtrace is S10 or later */ - -#include "dsw.h" -#include "dsw_dev.h" - -#define DIDINIT 0x01 -#define DIDNODES 0x02 - - -static int iiopen(dev_t *devp, int flag, int otyp, cred_t *crp); -static int iiclose(dev_t dev, int flag, int otyp, cred_t *crp); -static int iiprint(dev_t dev, char *str); -static int iiioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *crp, - int *rvp); -static int iiprobe(dev_info_t *dip); -static int iiattach(dev_info_t *dip, ddi_attach_cmd_t cmd); -static int iidetach(dev_info_t *dip, ddi_detach_cmd_t cmd); -static int iistrat(struct buf *); -static int iiread(); - - -static kstat_t *ii_gkstat = NULL; -iigkstat_t iigkstat = { - { "ii_debug", KSTAT_DATA_ULONG }, - { "ii_bitmap", KSTAT_DATA_ULONG }, - { "ii_throttle_unit", KSTAT_DATA_ULONG }, - { "ii_throttle_delay", KSTAT_DATA_ULONG }, - { "ii_copy_direct", KSTAT_DATA_ULONG }, - { "num-sets", KSTAT_DATA_ULONG }, - { "assoc-over", KSTAT_DATA_ULONG }, - { "spilled-over", KSTAT_DATA_ULONG }, -}; - -static struct cb_ops ii_cb_ops = { - iiopen, - iiclose, - iistrat, /* dummy strategy */ - iiprint, - nodev, /* no dump */ - iiread, /* dummy read */ - nodev, /* no write */ - iiioctl, - nodev, /* no devmap */ - nodev, /* no mmap */ - nodev, /* no segmap */ - nochpoll, - ddi_prop_op, - NULL, /* not STREAMS */ - D_NEW | D_MP -}; - -static struct dev_ops ii_ops = { - DEVO_REV, - 0, - nodev, /* no getinfo */ - nulldev, - iiprobe, - iiattach, - iidetach, - nodev, /* no reset */ - &ii_cb_ops, - (struct bus_ops *)NULL -}; - -static struct modldrv ii_ldrv = { - &mod_driverops, - "nws:Point-in-Time:" ISS_VERSION_STR, - &ii_ops -}; - -static struct modlinkage ii_modlinkage = { - MODREV_1, - &ii_ldrv, - NULL -}; - -struct ii_state { - dev_info_t *dip; - int instance; -}; - -/* used for logging sysevent, gets set in _ii_attach */ -dev_info_t *ii_dip = NULL; - -extern _ii_info_t *_ii_info_top; -extern _ii_lsthead_t *_ii_cluster_top; -extern _ii_lsthead_t *_ii_group_top; -extern kmutex_t _ii_cluster_mutex; -extern kmutex_t _ii_group_mutex; - -const int dsw_major_rev = ISS_VERSION_MAJ; /* Major release number */ -const int dsw_minor_rev = ISS_VERSION_MIN; /* Minor release number */ -const int dsw_micro_rev = ISS_VERSION_MIC; /* Micro release number */ -const int dsw_baseline_rev = ISS_VERSION_NUM; /* Baseline revision */ -static void *ii_statep; - -extern int _ii_init_dev(); -extern void _ii_deinit_dev(); -extern int _ii_config(intptr_t arg, int ilp32, int *rvp, int iflags); -extern int _ii_disable(intptr_t arg, int ilp32, int *rvp); -extern int _ii_suspend(intptr_t arg, int ilp32, int *rvp); -extern int _ii_bitmap(intptr_t arg, int ilp32, int *rvp); -extern int _ii_segment(intptr_t arg, int ilp32, int *rvp); -extern int _ii_abort(intptr_t arg, int ilp32, int *rvp); -extern int _ii_acopy(intptr_t arg, int ilp32, int *rvp); -extern int _ii_copy(intptr_t arg, int ilp32, int *rvp); -extern int _ii_shutdown(intptr_t arg, int *rvp); -extern int _ii_stat(intptr_t arg, int ilp32, int *rvp); -extern int _ii_version(intptr_t arg, int ilp32, int *rvp); -extern int _ii_wait(intptr_t arg, int ilp32, int *rvp); -extern int _ii_reset(intptr_t arg, int ilp32, int *rvp); -extern int _ii_offline(intptr_t arg, int ilp32, int *rvp); -extern int _ii_list(intptr_t arg, int ilp32, int *rvp); -extern int _ii_listlen(int cmd, int ilp32, int *rvp); -extern int _ii_export(intptr_t arg, int ilp32, int *rvp); -extern int _ii_join(intptr_t arg, int ilp32, int *rvp); -extern int _ii_copyparm(intptr_t arg, int ilp32, int *rvp); -extern int _ii_ocreate(intptr_t arg, int ilp32, int *rvp); -extern int _ii_oattach(intptr_t arg, int ilp32, int *rvp); -extern int _ii_odetach(intptr_t arg, int ilp32, int *rvp); -extern int _ii_olist(intptr_t arg, int ilp32, int *rvp); -extern int _ii_ostat(intptr_t arg, int ilp32, int *rvp, int is_iost_2); -extern int _ii_bitsset(intptr_t arg, int ilp32, int cmd, int *rvp); -extern int _ii_gc_list(intptr_t, int, int *, kmutex_t *, _ii_lsthead_t *); -extern int _ii_clist(intptr_t arg, int ilp32, int *rvp); -extern int _ii_move_grp(intptr_t arg, int ilp32, int *rvp); -extern int _ii_change_tag(intptr_t arg, int ilp32, int *rvp); -extern int ii_debug; -extern int ii_throttle_unit; -extern int ii_throttle_delay; -extern int ii_copy_direct; -extern int ii_bitmap; - -int -_init(void) -{ - int error; - - error = ddi_soft_state_init(&ii_statep, sizeof (struct ii_state), 1); - if (!error) { - error = mod_install(&ii_modlinkage); - if (error) - ddi_soft_state_fini(&ii_statep); - } - - return (error); -} - -int -_fini(void) -{ - int error; - - error = mod_remove(&ii_modlinkage); - if (!error) - ddi_soft_state_fini(&ii_statep); - - return (error); -} - -int -_info(struct modinfo *modinfop) -{ - int rc; - - rc = mod_info(&ii_modlinkage, modinfop); - - return (rc); -} - -/* ARGSUSED */ - -static int -iiprobe(dev_info_t *dip) -{ - return (DDI_PROBE_SUCCESS); -} - -/*ARGSUSED*/ -static int -ii_stats_update(kstat_t *ksp, int rw) -{ - if (KSTAT_WRITE == rw) { - return (EACCES); - } - - /* - * We do nothing here for now -- the kstat structure is - * updated in-place - */ - - return (0); -} - -static void -ii_create_kstats() -{ - /* create global info structure */ - if (!ii_gkstat) { - ii_gkstat = kstat_create("ii", 0, "global", "StorEdge", - KSTAT_TYPE_NAMED, - sizeof (iigkstat) / sizeof (kstat_named_t), - KSTAT_FLAG_VIRTUAL); - if (ii_gkstat) { - ii_gkstat->ks_data = &iigkstat; - ii_gkstat->ks_update = ii_stats_update; - ii_gkstat->ks_private = 0; - kstat_install(ii_gkstat); - - /* fill in immutable values */ - iigkstat.ii_debug.value.ul = ii_debug; - iigkstat.ii_bitmap.value.ul = ii_bitmap; - iigkstat.ii_throttle_unit.value.ul = ii_throttle_unit; - iigkstat.ii_throttle_delay.value.ul = - ii_throttle_delay; - iigkstat.ii_copy_direct.value.ul = ii_copy_direct; - } else { - cmn_err(CE_WARN, "!Unable to create II global stats"); - } - } -} - -static int -iiattach(dev_info_t *dip, ddi_attach_cmd_t cmd) -{ - struct ii_state *xsp; - int instance; - int i; - intptr_t flags; - - if (cmd != DDI_ATTACH) { - return (DDI_FAILURE); - } - /* save the dev_info_t to be used in logging using ddi_log_sysevent */ - ii_dip = dip; - - instance = ddi_get_instance(dip); - if (ddi_soft_state_zalloc(ii_statep, instance) != 0) { - cmn_err(CE_WARN, "!ii: no memory for instance %d state.", - instance); - return (DDI_FAILURE); - } - - flags = 0; - xsp = ddi_get_soft_state(ii_statep, instance); - if (xsp == NULL) { - cmn_err(CE_WARN, - "!ii: attach: could not get state for instance %d.", - instance); - goto out; - } - - ii_debug = ddi_prop_get_int(DDI_DEV_T_ANY, dip, - DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, "ii_debug", 0); - if (ii_debug != 0) { -#ifdef DEBUG - cmn_err(CE_NOTE, "!ii: initializing ii version %d.%d.%d.%d", - dsw_major_rev, dsw_minor_rev, - dsw_micro_rev, dsw_baseline_rev); -#else - if (dsw_micro_rev) { - cmn_err(CE_NOTE, "!ii: initializing ii vers %d.%d.%d", - dsw_major_rev, dsw_minor_rev, dsw_micro_rev); - } else { - cmn_err(CE_NOTE, "!ii: initializing ii version %d.%d", - dsw_major_rev, dsw_minor_rev); - } -#endif - switch (ii_debug) { - case 1: - case 2: cmn_err(CE_NOTE, - "!ii: ii_debug=%d is enabled.", ii_debug); - break; - default: - cmn_err(CE_WARN, - "!ii: Value of ii_debug=%d is not 0,1 or 2.", - ii_debug); - } - } - - ii_bitmap = ddi_prop_get_int(DDI_DEV_T_ANY, dip, - DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, "ii_bitmap", II_WTHRU); - switch (ii_bitmap) { - case II_KMEM: - if (ii_debug > 0) - cmn_err(CE_NOTE, "!ii: ii_bitmap is in memory"); - break; - case II_FWC: - if (ii_debug > 0) - cmn_err(CE_NOTE, "!ii: ii_bitmap is on disk," - " no FWC"); - break; - case II_WTHRU: - if (ii_debug > 0) - cmn_err(CE_NOTE, "!ii: ii_bitmap is on disk"); - break; - default: - cmn_err(CE_NOTE, - "!ii: ii_bitmap=%d out of range; " - "defaulting WTHRU(%d)", ii_bitmap, II_WTHRU); - ii_bitmap = II_WTHRU; - } - - /* pick up these values if in ii.conf, otherwise leave alone */ - i = ddi_prop_get_int(DDI_DEV_T_ANY, dip, - DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, "ii_throttle_unit", 0); - if (i > 0) { - ii_throttle_unit = i; - if ((ii_throttle_unit < MIN_THROTTLE_UNIT) || - (ii_throttle_unit > MAX_THROTTLE_UNIT) || - (ii_debug > 0)) - cmn_err(CE_NOTE, - "!ii: ii_throttle_unit=%d", ii_throttle_unit); - } - - i = ddi_prop_get_int(DDI_DEV_T_ANY, dip, - DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, "ii_throttle_delay", 0); - if (i > 0) { - ii_throttle_delay = i; - if ((ii_throttle_delay < MIN_THROTTLE_DELAY) || - (ii_throttle_delay > MIN_THROTTLE_DELAY) || - (ii_debug > 0)) - cmn_err(CE_NOTE, - "!ii: ii_throttle_delay=%d", ii_throttle_delay); - } - - ii_copy_direct = ddi_prop_get_int(DDI_DEV_T_ANY, dip, - DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, "ii_copy_direct", 1); - if (i > 0) { - ii_copy_direct = i; - if ((ii_copy_direct < 0) || (ii_copy_direct > 1)) - cmn_err(CE_NOTE, - "!ii: ii_copy_direct=%d", ii_copy_direct); - } - - if (_ii_init_dev()) { - cmn_err(CE_WARN, "!ii: _ii_init_dev failed"); - goto out; - } - flags |= DIDINIT; - - xsp->dip = dip; - xsp->instance = instance; - - if (ddi_create_minor_node(dip, "ii", S_IFCHR, instance, DDI_PSEUDO, 0) - != DDI_SUCCESS) { - cmn_err(CE_WARN, "!ii: could not create node."); - goto out; - } - flags |= DIDNODES; - - ddi_set_driver_private(dip, (caddr_t)flags); - ddi_report_dev(dip); - - ii_create_kstats(); - - return (DDI_SUCCESS); - -out: - ddi_set_driver_private(dip, (caddr_t)flags); - (void) iidetach(dip, DDI_DETACH); - - return (DDI_FAILURE); -} - -static int -iidetach(dev_info_t *dip, ddi_detach_cmd_t cmd) -{ - struct ii_state *xsp; - int instance; - intptr_t flags; - - if (cmd != DDI_DETACH) { - return (DDI_FAILURE); - } - - if (_ii_info_top) { - return (DDI_FAILURE); /* busy */ - } - - instance = ddi_get_instance(dip); - xsp = ddi_get_soft_state(ii_statep, instance); - if (xsp == NULL) { - cmn_err(CE_WARN, - "!ii: detach: could not get state for instance %d.", - instance); - return (DDI_FAILURE); - } - - flags = (intptr_t)ddi_get_driver_private(dip); - if (flags & DIDNODES) - ddi_remove_minor_node(dip, NULL); - if (flags & DIDINIT) - _ii_deinit_dev(); - - ddi_soft_state_free(ii_statep, instance); - - if (ii_gkstat) { - kstat_delete(ii_gkstat); - ii_gkstat = NULL; - } - - return (DDI_SUCCESS); -} - - -/* ARGSUSED */ - -static int -iiopen(dev_t *devp, int flag, int otyp, cred_t *crp) -{ - int error; - - error = drv_priv(crp); - - return (error); -} - - -/* ARGSUSED */ - -static int -iiclose(dev_t dev, int flag, int otyp, cred_t *crp) -{ - return (0); -} - -/* ARGSUSED */ - -static int -iiprint(dev_t dev, char *str) -{ - int instance = 0; - - cmn_err(CE_WARN, "!ii%d: %s", instance, str); - return (0); -} - -/* ARGSUSED */ - -static int -iiioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *crp, int *rvp) -{ - int rc; - int ilp32; - - ilp32 = (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32); - - - switch (cmd) { - case DSWIOC_WAIT: - rc = _ii_wait(arg, ilp32, rvp); - break; - - case DSWIOC_RESET: - rc = _ii_reset(arg, ilp32, rvp); - break; - - case DSWIOC_VERSION: - rc = _ii_version(arg, ilp32, rvp); - break; - - case DSWIOC_ENABLE: - rc = _ii_config(arg, ilp32, rvp, 0); - break; - - case DSWIOC_RESUME: - rc = _ii_config(arg, ilp32, rvp, II_EXISTING); - break; - - case DSWIOC_DISABLE: - rc = _ii_disable(arg, ilp32, rvp); - break; - - case DSWIOC_SUSPEND: - rc = _ii_suspend(arg, ilp32, rvp); - break; - - case DSWIOC_ACOPY: - rc = _ii_acopy(arg, ilp32, rvp); - break; - - case DSWIOC_COPY: - rc = _ii_copy(arg, ilp32, rvp); - break; - - case DSWIOC_SHUTDOWN: - rc = _ii_shutdown(arg, rvp); - break; - - case DSWIOC_STAT: - rc = _ii_stat(arg, ilp32, rvp); - break; - - case DSWIOC_BITMAP: - rc = _ii_bitmap(arg, ilp32, rvp); - break; - - case DSWIOC_SEGMENT: - rc = _ii_segment(arg, ilp32, rvp); - break; - - case DSWIOC_ABORT: - rc = _ii_abort(arg, ilp32, rvp); - break; - - case DSWIOC_OFFLINE: - rc = _ii_offline(arg, ilp32, rvp); - break; - - case DSWIOC_LIST: - rc = _ii_list(arg, ilp32, rvp); - break; - - case DSWIOC_LISTLEN: - case DSWIOC_OLISTLEN: - rc = _ii_listlen(cmd, ilp32, rvp); - break; - - case DSWIOC_EXPORT: - rc = _ii_export(arg, ilp32, rvp); - break; - - case DSWIOC_IMPORT: - rc = _ii_config(arg, ilp32, rvp, II_IMPORT); - break; - - case DSWIOC_JOIN: - rc = _ii_join(arg, ilp32, rvp); - break; - - case DSWIOC_COPYP: - rc = _ii_copyparm(arg, ilp32, rvp); - break; - - case DSWIOC_OCREAT: - rc = _ii_ocreate(arg, ilp32, rvp); - break; - - case DSWIOC_OATTACH: - rc = _ii_oattach(arg, ilp32, rvp); - break; - - case DSWIOC_ODETACH: - rc = _ii_odetach(arg, ilp32, rvp); - break; - - case DSWIOC_OLIST: - rc = _ii_olist(arg, ilp32, rvp); - break; - - case DSWIOC_OSTAT: - rc = _ii_ostat(arg, ilp32, rvp, FALSE); - break; - - case DSWIOC_OSTAT2: - rc = _ii_ostat(arg, ilp32, rvp, TRUE); - break; - - case DSWIOC_SBITSSET: - case DSWIOC_CBITSSET: - rc = _ii_bitsset(arg, ilp32, cmd, rvp); - break; - - case DSWIOC_CLIST: - rc = _ii_gc_list(arg, ilp32, rvp, &_ii_cluster_mutex, - _ii_cluster_top); - break; - - case DSWIOC_GLIST: - rc = _ii_gc_list(arg, ilp32, rvp, &_ii_group_mutex, - _ii_group_top); - break; - - case DSWIOC_MOVEGRP: - rc = _ii_move_grp(arg, ilp32, rvp); - break; - - case DSWIOC_CHANGETAG: - rc = _ii_change_tag(arg, ilp32, rvp); - break; - - default: - rc = EINVAL; - break; - } - - return (rc); -} - -/* - * dummy function - */ - -static int -iistrat(struct buf *bp) -{ - bp->b_error = EIO; - biodone(bp); - - return (0); -} - -static int -iiread() -{ - return (EIO); -} diff --git a/usr/src/uts/common/avs/ns/dsw/dsw.h b/usr/src/uts/common/avs/ns/dsw/dsw.h deleted file mode 100644 index 3243e9a6e9..0000000000 --- a/usr/src/uts/common/avs/ns/dsw/dsw.h +++ /dev/null @@ -1,485 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _DSW_H -#define _DSW_H - -#ifdef __cplusplus -extern "C" { -#endif - -/* - * Miscellaneous defines - */ - -#define DSW_BITS 8 /* # of bits in a byte */ -#define DSW_SIZE 64 /* fba's in a DSW chunk */ - - -/* - * Ioctl definitions - */ - -#define _D_(x) (('D'<<16)|('W'<<8)|(x)) - -#define DSWIOC_ENABLE _D_(1) /* Configure DSW pair */ -#define DSWIOC_RESUME _D_(2) /* Resume a DSW pair */ -#define DSWIOC_SUSPEND _D_(3) /* Suspend a DSW pair */ -#define DSWIOC_COPY _D_(4) /* Copy DSW volume over its pair */ -#define DSWIOC_BITMAP _D_(5) /* Get bitmap */ -#define DSWIOC_STAT _D_(6) /* Get state of shadow */ -#define DSWIOC_DISABLE _D_(7) /* Deconfigure DSW pair */ -#define DSWIOC_SHUTDOWN _D_(8) /* Suspend all DSW pairs */ -#define DSWIOC_ABORT _D_(9) /* Abort Copy of DSW pair */ -#define DSWIOC_VERSION _D_(10) /* DataShadow version */ -#define DSWIOC_RESET _D_(11) /* Reset DataShadow set */ -#define DSWIOC_OFFLINE _D_(12) /* Offline volumes */ -#define DSWIOC_WAIT _D_(13) /* Wait for copy to complete */ -#define DSWIOC_LIST _D_(14) /* List current kernel shadow groups */ -#define DSWIOC_ACOPY _D_(15) /* Copy DSW volumes over their pairs */ -#define DSWIOC_EXPORT _D_(16) /* Export the shadow volume */ -#define DSWIOC_IMPORT _D_(17) /* Import shadow volume */ -#define DSWIOC_JOIN _D_(18) /* Rejoin previously exported shadow */ -#define DSWIOC_COPYP _D_(19) /* Set and get copy parameters */ -#define DSWIOC_OCREAT _D_(20) /* Create overflow volume */ -#define DSWIOC_OATTACH _D_(21) /* Attach overflow volume */ -#define DSWIOC_ODETACH _D_(22) /* Detach overflow volume */ -#define DSWIOC_OLIST _D_(23) /* List overflow volumes */ -#define DSWIOC_OSTAT _D_(24) /* Stat overflow volume */ -#define DSWIOC_SBITSSET _D_(25) /* Get # of bits set in shadow bitmap */ -#define DSWIOC_CBITSSET _D_(26) /* Get # of bits set in copy bitmap */ -#define DSWIOC_LISTLEN _D_(27) /* length of DSWIOC_LIST data */ -#define DSWIOC_OLISTLEN _D_(28) /* length of DSWIOC_OLIST data */ -#define DSWIOC_SEGMENT _D_(29) /* Get segemented bitmaps */ -#define DSWIOC_MOVEGRP _D_(30) /* Move set from one group to another */ -#define DSWIOC_CLIST _D_(31) /* get list of resource groups */ -#define DSWIOC_GLIST _D_(32) /* get list of groups */ -#define DSWIOC_CHANGETAG _D_(33) /* change the cluster tag of a set */ -#define DSWIOC_OSTAT2 _D_(34) /* Stat overflow volume enhanced */ - -/* - * Config and status flags - */ - -#define DSW_GOLDEN 0x0001 /* the set is independent */ - -#define DSW_COPYINGP 0x0100 /* Copy in progress */ -#define DSW_COPYINGM 0x0200 /* Copying master to shadow */ -#define DSW_COPYINGS 0x0400 /* Copying shadow to master */ -#define DSW_COPYING 0x0600 /* Copying, may be in progress */ -#define DSW_COPY_FLAGS 0x0700 /* Copy flags */ -#define DSW_COPYINGX 0x0800 /* Copy exit requested */ -#define DSW_OFFLINE 0xf000 /* An underlying volume offline */ -#define DSW_BMPOFFLINE 0x1000 /* Bitmap volume offline */ -#define DSW_SHDOFFLINE 0x2000 /* Shadow volume offline */ -#define DSW_MSTOFFLINE 0x4000 /* Master volume offline */ -#define DSW_OVROFFLINE 0x8000 /* Overflow volume offline */ -#define DSW_TREEMAP 0x10000 /* Shadow volume accessed by an index */ -#define DSW_OVERFLOW 0x20000 /* Shadow volume has overflowed */ -#define DSW_SHDEXPORT 0x40000 /* Shadow volume has been exported */ -#define DSW_SHDIMPORT 0x80000 /* Shadow volume has been imported */ -#define DSW_VOVERFLOW 0x100000 /* Shadow volume using overflow vol */ -#define DSW_HANGING 0x200000 /* Hanging master structure */ -#define DSW_CFGOFFLINE 0x400000 /* config db is offline */ -#define DSW_OVRHDRDRTY 0x800000 /* Overflow header dirty */ -#define DSW_RESIZED 0x1000000 /* mst_size != shd_size */ -#define DSW_FRECLAIM 0x2000000 /* force the reclaim of an ovr vol */ - -/* - * used for SNMP trap only. - * These flags help distinguish between enable and resume, - * suspend and disable. - * Note that DSW_HANGING is set for both suspend and disable - */ -#define DSW_SNMP_CLR 0 /* no flag is set */ -#define DSW_SNMP_DISABLE 1 /* Set is disabled */ -#define DSW_SNMP_SUSPEND 2 /* Set is suspended */ -#define DSW_SNMP_ENABLE 3 /* Set is enabled */ -#define DSW_SNMP_RESUME 4 /* Set is resumed */ -#define DSW_SNMP_OVER_ATTACH 5 /* overflow attached */ -#define DSW_SNMP_OVER_DETACH 6 /* overflow detached */ -#define DSW_SNMP_UPDATE 7 /* update operation */ -#define DSW_SNMP_COPIED 8 /* copy operation */ - - /* Overflow volume flags */ -#define IIO_OFFLINE 0x0001 /* Volume is offline */ -#define IIO_HDR_WRTN 0x0002 /* Header written */ -#define IIO_CNTR_INVLD 0x0004 /* Overflow counters invalid */ -#define IIO_VOL_UPDATE 0x0008 /* Performing group update */ - -#define DSW_NAMELEN 64 /* NSC_MAXPATH - don't change without */ - /* amending header version number */ - -#define DSWDEV "/dev/ii" -#define II_IMPORTED_SHADOW "<imported_shadow>" - -/* - * Configuration parameter defines - * ii_bitmap, ii_throttle_unit, ii_throttle_delay - */ -#define II_KMEM 0 /* Load/store on resume/suspend, in memory */ -#define II_WTHRU 1 /* Read/write bitmap thru to bitmap volume */ -#define II_FWC 2 /* Read/write bitmap to FWC, else WTHRU */ - -#define MIN_THROTTLE_UNIT 100 /* Min. number of units to transfer */ -#define MAX_THROTTLE_UNIT 60000 /* Max. number of units to transfer */ -#define MIN_THROTTLE_DELAY 2 /* Min. delay between unit transfer */ -#define MAX_THROTTLE_DELAY 10000 /* Max. delay between unit transfer */ - -/* - * DSW user config structure - */ - -typedef struct dsw_config_s { - spcs_s_info_t status; - char master_vol[DSW_NAMELEN]; - char shadow_vol[DSW_NAMELEN]; - char bitmap_vol[DSW_NAMELEN]; - char cluster_tag[DSW_NAMELEN]; - char group_name[DSW_NAMELEN]; - int flag; -} dsw_config_t; - -/* - * DSW segmented bitmap I/O structure - */ -typedef struct dsw_segment_s { - spcs_s_info_t status; - char shadow_vol[DSW_NAMELEN]; - unsigned seg_number; /* 32KB Segment number to start at */ - unsigned char *shd_bitmap; /* pointer to shadow bitmap */ - int shd_size; /* size of shadow bitmap */ - unsigned char *cpy_bitmap; /* pointer to copy bitmap */ - int cpy_size; /* size of copy bitmap */ - unsigned char *idx_bitmap; /* pointer to index table */ - int idx_size; /* size of index table */ -} dsw_segment_t; - -/* - * DSW user bitmap structure - */ - -typedef struct dsw_bitmap_s { - spcs_s_info_t status; - char shadow_vol[DSW_NAMELEN]; - unsigned char *shd_bitmap; /* pointer to shadow bitmap */ - uint64_t shd_size; /* size of shadow bitmap */ - uint64_t copy_size; /* size of copy bitmap */ - unsigned char *copy_bitmap; /* pointer to copy bitmap */ -} dsw_bitmap_t; - - -/* - * DSW general ioctl structure - */ - -typedef struct dsw_ioctl_s { - spcs_s_info_t status; - char shadow_vol[DSW_NAMELEN]; - int flags; - pid_t pid; -} dsw_ioctl_t; - - -/* - * DSW general atomic ioctl structure operating on several Image sets - */ - -typedef struct dsw_aioctl_s { - spcs_s_info_t status; - int flags; - int count; - pid_t pid; - char shadow_vol[DSW_NAMELEN]; /* start of list of image sets */ -} dsw_aioctl_t; - - -/* - * DSW stat ioctl structure - */ - -typedef struct dsw_stat_s { - spcs_s_info_t status; - char shadow_vol[DSW_NAMELEN]; - int stat; - uint64_t size; - char overflow_vol[DSW_NAMELEN]; - uint64_t shdsize; - uint64_t shdused; - char group_name[DSW_NAMELEN]; - char cluster_tag[DSW_NAMELEN]; - uint64_t mtime; -} dsw_stat_t; - - -/* - * DSW version ioctl structure - */ - -typedef struct dsw_version_s { - spcs_s_info_t status; - int major; /* Major release number */ - int minor; /* Minor release number */ - int micro; /* Micro release number */ - int baseline; /* Baseline revision number */ -} dsw_version_t; - -/* - * DSW get bits set in bitmap structure - */ - -typedef struct dsw_bitsset_s { - spcs_s_info_t status; - char shadow_vol[DSW_NAMELEN]; - uint64_t tot_size; /* total number of bits in map */ - uint64_t tot_set; /* number of bitmap bits set */ -} dsw_bitsset_t; - - -/* - * DSW list ioctl structure - */ - -typedef struct dsw_list_s { - spcs_s_info_t status; - int list_size; /* number of elements in list */ - int list_used; /* number of elements returned */ - dsw_config_t *list; -} dsw_list_t; - -/* - * DSW copy parameter structure - */ - -typedef struct dsw_copyp_s { - spcs_s_info_t status; - char shadow_vol[DSW_NAMELEN]; - int copy_unit; - int copy_delay; -} dsw_copyp_t; - -/* - * DSW ostat ioctl structure - */ - -typedef struct dsw_ostat_s { - spcs_s_info_t status; - char overflow_vol[DSW_NAMELEN]; - int drefcnt; - uint64_t used; - uint64_t unused; - uint64_t nchunks; - int crefcnt; - int flags; - int hversion; - int hmagic; -} dsw_ostat_t; - -/* - * DSW move group structure - */ - -typedef struct dsw_movegrp_s { - spcs_s_info_t status; - char shadow_vol[DSW_NAMELEN]; - char new_group[DSW_NAMELEN]; -} dsw_movegrp_t; - -/* - * II_PIT_PROPS structure - */ -typedef struct pit_props_s { - int iirc; - int mstid; - int shdid; - int bmpid; - int ovrid; - char group[DSW_NAMELEN]; - char cluster[DSW_NAMELEN]; - int has_overflow; - int flags; - uint64_t size; - int64_t shdchks; - int64_t copybits; - int64_t shdbits; -} pit_props_t; - -/* - * II_PIT_UPDATE structure - */ -typedef struct pit_update_s { - int iirc; - char direction; -} pit_update_t; - -#ifdef _KERNEL -/* - * 32 bit versions of ioctl structures - */ - -typedef struct dsw_config32_s { - spcs_s_info32_t status; - char master_vol[DSW_NAMELEN]; - char shadow_vol[DSW_NAMELEN]; - char bitmap_vol[DSW_NAMELEN]; - char cluster_tag[DSW_NAMELEN]; - char group_name[DSW_NAMELEN]; - int flag; -} dsw_config32_t; - -/* - * DSW segmented bitmap I/O structure - */ -typedef struct dsw_segment32_s { - spcs_s_info32_t status; - char shadow_vol[DSW_NAMELEN]; - uint32_t seg_number; - uint32_t shd_bitmap; - int shd_size; - uint32_t cpy_bitmap; - int cpy_size; - uint32_t idx_bitmap; - int idx_size; -} dsw_segment32_t; - -/* - * DSW user bitmap structure - */ - -typedef struct dsw_bitmap32_s { - spcs_s_info32_t status; - char shadow_vol[DSW_NAMELEN]; - uint32_t shd_bitmap; /* 32 bit pointer value */ - uint64_t shd_size; - uint64_t copy_size; - uint32_t copy_bitmap; /* 32 bit pointer value */ -} dsw_bitmap32_t; - -typedef struct dsw_ioctl32_s { - spcs_s_info32_t status; - char shadow_vol[DSW_NAMELEN]; - int flags; - pid_t pid; -} dsw_ioctl32_t; - -typedef struct dsw_stat32_s { - spcs_s_info32_t status; - char shadow_vol[DSW_NAMELEN]; - int stat; - uint64_t size; - char overflow_vol[DSW_NAMELEN]; - uint64_t shdsize; - uint64_t shdused; - char group_name[DSW_NAMELEN]; - char cluster_tag[DSW_NAMELEN]; - uint64_t mtime; -} dsw_stat32_t; - -typedef struct dsw_version32_s { - spcs_s_info32_t status; - int major; /* Major release number */ - int minor; /* Minor release number */ - int micro; /* Micro release number */ - int baseline; /* Baseline revision number */ -} dsw_version32_t; - -typedef struct dsw_bitsset32_s { - spcs_s_info32_t status; - char shadow_vol[DSW_NAMELEN]; - uint64_t tot_size; /* total number of bits in map */ - uint64_t tot_set; /* number of bitmap bits set */ -} dsw_bitsset32_t; - -typedef struct dsw_list32_s { - spcs_s_info32_t status; - int list_size; - int list_used; - uint32_t list; -} dsw_list32_t; - -typedef struct dsw_aioctl32_s { - spcs_s_info32_t status; - int flags; - int count; - pid_t pid; - char shadow_vol[DSW_NAMELEN]; /* start of list of image sets */ -} dsw_aioctl32_t; - -typedef struct dsw_copyp32_s { - spcs_s_info32_t status; - char shadow_vol[DSW_NAMELEN]; - int copy_unit; - int copy_delay; -} dsw_copyp32_t; - -typedef struct dsw_ostat32_s { - spcs_s_info32_t status; - char overflow_vol[DSW_NAMELEN]; - int drefcnt; - uint64_t used; - uint64_t unused; - uint64_t nchunks; - int crefcnt; - int flags; - int hversion; - int hmagic; -} dsw_ostat32_t; - -/* - * DSW move group structure - */ - -typedef struct dsw_movegrp32_s { - spcs_s_info32_t status; - char shadow_vol[DSW_NAMELEN]; - char new_group[DSW_NAMELEN]; -} dsw_movegrp32_t; - -#endif /* _KERNEL */ - -/* dsw_copy dsw_ioctl_t flag bits */ -#define CV_BMP_ONLY 0x00000001 /* copy only chunks flagged by bitmap */ -#define CV_SHD2MST 0x00000002 /* copy shadow to master */ -#define CV_LOCK_PID 0x00000004 /* On copy/update, lock PIT by PID */ -#define CV_CLR_BMP 0x00000010 /* clear bits in bit map during copy */ -#define CV_IS_CLUSTER 0x00000020 /* struct refers to cluster */ -#define CV_IS_GROUP 0x00000040 /* struct refers to group (cpy/upd) */ -#define CV_SIBLING 0x00010000 /* internal copy_on_write flag */ - -/* nsc_control commands */ - -#define II_CONTROL(x) ('I' << 24 | 'I' << 16 | (x)) /* 0x49490000 */ - -#define II_PIT_COPY II_CONTROL(1) /* Perform an II Copy */ -#define II_PIT_UPDATE II_CONTROL(2) /* Perform an II Update */ -#define II_PIT_ABORT II_CONTROL(3) /* Perform an II Abort */ -#define II_PIT_WAIT II_CONTROL(4) /* Perform an II Wait */ -#define II_PIT_PROPS II_CONTROL(5) /* Perform an II Properties */ - -#ifdef __cplusplus -} -#endif - -#endif /* _DSW_H */ diff --git a/usr/src/uts/common/avs/ns/dsw/dsw_dev.c b/usr/src/uts/common/avs/ns/dsw/dsw_dev.c deleted file mode 100644 index 098990bc15..0000000000 --- a/usr/src/uts/common/avs/ns/dsw/dsw_dev.c +++ /dev/null @@ -1,10386 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#include <sys/types.h> -#include <sys/time.h> -#include <sys/ksynch.h> -#include <sys/kmem.h> -#include <sys/errno.h> -#include <sys/cmn_err.h> -#include <sys/debug.h> -#include <sys/ddi.h> -#include <sys/nsc_thread.h> -#include <sys/sysmacros.h> -#include <sys/unistat/spcs_s.h> -#include <sys/unistat/spcs_errors.h> - -#include <sys/unistat/spcs_s_k.h> -#include <sys/nsctl/nsctl.h> -#include "dsw.h" -#include "dsw_dev.h" -#include "../rdc/rdc_update.h" -#include <sys/nskernd.h> - -#include <sys/sdt.h> /* dtrace is S10 or later */ - -#ifdef DS_DDICT -#include "../contract.h" -#endif - -/* - * Instant Image - * - * This file contains the core implementation of II. - * - * II is implemented as a simple filter module that pushes itself between - * user (SV, STE, etc.) and SDBC or NET. - * - */ - - -#define REMOTE_VOL(s, ip) (((s) && ((ip->bi_flags)&DSW_SHDEXPORT)) || \ - (!(s)&&((ip->bi_flags)&DSW_SHDIMPORT))) - -#define total_ref(ip) ((ip->bi_shdref + ip->bi_shdrref + ip->bi_bmpref) + \ - (NSHADOWS(ip) ? 0 : ip->bi_mstref + ip->bi_mstrref)) - - -#define II_TAIL_COPY(d, s, m, t) bcopy(&(s.m), &(d.m), \ - sizeof (d) - (uintptr_t)&((t *)0)->m) -extern dev_info_t *ii_dip; - -#define II_LINK_CLUSTER(ip, cluster) \ - _ii_ll_add(ip, &_ii_cluster_mutex, &_ii_cluster_top, cluster, \ - &ip->bi_cluster) -#define II_UNLINK_CLUSTER(ip) \ - _ii_ll_remove(ip, &_ii_cluster_mutex, &_ii_cluster_top, &ip->bi_cluster) - -#define II_LINK_GROUP(ip, group) \ - _ii_ll_add(ip, &_ii_group_mutex, &_ii_group_top, group, &ip->bi_group) -#define II_UNLINK_GROUP(ip) \ - _ii_ll_remove(ip, &_ii_group_mutex, &_ii_group_top, &ip->bi_group) - -_ii_info_t *_ii_info_top; -_ii_info_t *_ii_mst_top = 0; -_ii_overflow_t *_ii_overflow_top; -_ii_lsthead_t *_ii_cluster_top; -_ii_lsthead_t *_ii_group_top; - -int ii_debug; /* level of cmn_err noise */ -int ii_bitmap; /* bitmap operations switch */ -uint_t ii_header = 16; /* Undocumented tunable (with adb!), start */ - /* of area cleared in volume when a dependent */ - /* shadow is disabled. */ - /* max # of chunks in copy loop before delay */ -int ii_throttle_unit = MIN_THROTTLE_UNIT; - /* length of delay during update loop */ -int ii_throttle_delay = MIN_THROTTLE_DELAY; -int ii_copy_direct = 1; -int ii_nconcopy = 10; /* default value when starting with no cache */ -kmutex_t _ii_cluster_mutex; -kmutex_t _ii_group_mutex; - -static int _ii_shutting_down = 0; -static nsc_io_t *_ii_io, *_ii_ior; -static nsc_mem_t *_ii_local_mem; -static nsc_def_t _ii_fd_def[], _ii_io_def[], _ii_ior_def[]; -static kmutex_t _ii_info_mutex; -static kmutex_t _ii_overflow_mutex; -static kmutex_t _ii_config_mutex; -static _ii_bmp_ops_t alloc_buf_bmp, kmem_buf_bmp; -static nsc_svc_t *ii_volume_update; /* IIVolumeUpdate token */ -static nsc_svc_t *ii_report_luns; /* IIReportLuns token */ -static nsc_svc_t *ii_get_initiators; /* IIGetInitiators token */ -static ksema_t _ii_concopy_sema; -static int _ii_concopy_init = 0; -static int _ii_instance = 0; - -void _ii_deinit_dev(); - -static void _ii_info_free(_ii_info_t *ip); -static void _ii_info_freeshd(_ii_info_t *ip); -static void ii_sibling_free(_ii_info_t *ip); -ii_header_t *_ii_bm_header_get(_ii_info_t *ip, nsc_buf_t **tmp); -int _ii_bm_header_put(ii_header_t *hdr, _ii_info_t *ip, - nsc_buf_t *tmp); -static void _ii_bm_header_free(ii_header_t *hdr, _ii_info_t *ip, - nsc_buf_t *tmp); -static int _ii_copyvol(_ii_info_t *, int, int, spcs_s_info_t, int); -static void _ii_stopvol(_ii_info_t *ip); -static int _ii_stopcopy(_ii_info_t *ip); -static _ii_info_t *_ii_find_set(char *volume); -static _ii_info_t *_ii_find_vol(char *, int); -static _ii_overflow_t *_ii_find_overflow(char *volume); -static void _ii_ioctl_done(_ii_info_t *ip); -static void _ii_lock_chunk(_ii_info_t *ip, chunkid_t); -static void _ii_unlock_chunks(_ii_info_t *ip, chunkid_t, int); -void _ii_error(_ii_info_t *ip, int error_type); -static nsc_buf_t *_ii_alloc_handle(void (*d_cb)(), void (*r_cb)(), - void (*w_cb)(), ii_fd_t *bfd); -static int _ii_free_handle(ii_buf_t *h, ii_fd_t *bfd); -extern nsc_size_t ii_btsize(nsc_size_t); -extern int ii_tinit(_ii_info_t *); -extern chunkid_t ii_tsearch(_ii_info_t *, chunkid_t); -extern void ii_tdelete(_ii_info_t *, chunkid_t); -extern void ii_reclaim_overflow(_ii_info_t *); -static void ii_overflow_free(_ii_info_t *ip, int disable); -static int ii_overflow_attach(_ii_info_t *, char *, int); -int _ii_nsc_io(_ii_info_t *, int, nsc_fd_t *, int, nsc_off_t, unsigned char *, - nsc_size_t); -static nsc_path_t *_ii_register_path(char *path, int type, nsc_io_t *io); -static int _ii_unregister_path(nsc_path_t *sp, int flag, char *type); -static int _ii_reserve_begin(_ii_info_t *ip); -static int _ii_wait_for_it(_ii_info_t *ip); -static void _ii_reserve_end(_ii_info_t *ip); -static kstat_t *_ii_overflow_kstat_create(_ii_info_t *ip, _ii_overflow_t *op); -static int _ii_ll_add(_ii_info_t *, kmutex_t *, _ii_lsthead_t **, char *, - char **); -static int _ii_ll_remove(_ii_info_t *, kmutex_t *, _ii_lsthead_t **, char **); -#define _ii_unlock_chunk(ip, chunk) _ii_unlock_chunks(ip, chunk, 1) -extern const int dsw_major_rev; -extern const int dsw_minor_rev; -extern const int dsw_micro_rev; -extern const int dsw_baseline_rev; - -/* - * These constants are used by ii_overflow_free() to indicate how the - * reclamation should take place. - * NO_RECLAIM: just detach the overflow from the set; do not - * attempt to reclaim chunks, do not decrement the - * used-by count - * RECLAIM: reclaim all chunks before decrementing the used-by count - * INIT_OVR: decrement the used-by count only; do not reclaim chunks - */ - -#define NO_RECLAIM 0 -#define RECLAIM 1 -#define INIT_OVR 2 - -struct copy_args { /* arguments passed to copy process */ - _ii_info_t *ip; - int flag; - int rtype; - int wait; - spcs_s_info_t kstatus; - int rc; -}; - -/* set-specific kstats info */ -ii_kstat_set_t ii_kstat_set = { - { DSW_SKSTAT_SIZE, KSTAT_DATA_ULONG }, - { DSW_SKSTAT_MTIME, KSTAT_DATA_ULONG }, - { DSW_SKSTAT_FLAGS, KSTAT_DATA_ULONG }, - { DSW_SKSTAT_THROTTLE_UNIT, KSTAT_DATA_ULONG }, - { DSW_SKSTAT_THROTTLE_DELAY, KSTAT_DATA_ULONG }, - { DSW_SKSTAT_SHDCHKS, KSTAT_DATA_ULONG }, - { DSW_SKSTAT_SHDCHKUSED, KSTAT_DATA_ULONG }, - { DSW_SKSTAT_SHDBITS, KSTAT_DATA_ULONG }, - { DSW_SKSTAT_COPYBITS, KSTAT_DATA_ULONG }, - { DSW_SKSTAT_MSTA, KSTAT_DATA_CHAR }, - { DSW_SKSTAT_MSTB, KSTAT_DATA_CHAR }, - { DSW_SKSTAT_MSTC, KSTAT_DATA_CHAR }, - { DSW_SKSTAT_MSTD, KSTAT_DATA_CHAR }, - { DSW_SKSTAT_SETA, KSTAT_DATA_CHAR }, - { DSW_SKSTAT_SETB, KSTAT_DATA_CHAR }, - { DSW_SKSTAT_SETC, KSTAT_DATA_CHAR }, - { DSW_SKSTAT_SETD, KSTAT_DATA_CHAR }, - { DSW_SKSTAT_BMPA, KSTAT_DATA_CHAR }, - { DSW_SKSTAT_BMPB, KSTAT_DATA_CHAR }, - { DSW_SKSTAT_BMPC, KSTAT_DATA_CHAR }, - { DSW_SKSTAT_BMPD, KSTAT_DATA_CHAR }, - { DSW_SKSTAT_OVRA, KSTAT_DATA_CHAR }, - { DSW_SKSTAT_OVRB, KSTAT_DATA_CHAR }, - { DSW_SKSTAT_OVRC, KSTAT_DATA_CHAR }, - { DSW_SKSTAT_OVRD, KSTAT_DATA_CHAR }, - { DSW_SKSTAT_MSTIO, KSTAT_DATA_CHAR }, - { DSW_SKSTAT_SHDIO, KSTAT_DATA_CHAR }, - { DSW_SKSTAT_BMPIO, KSTAT_DATA_CHAR }, - { DSW_SKSTAT_OVRIO, KSTAT_DATA_CHAR }, -}; - -/* - * _ii_init_dev - * Initialise the shadow driver - * - */ - -int -_ii_init_dev() -{ - _ii_io = nsc_register_io("ii", NSC_II_ID|NSC_REFCNT|NSC_FILTER, - _ii_io_def); - if (_ii_io == NULL) - cmn_err(CE_WARN, "!ii: nsc_register_io failed."); - - _ii_ior = nsc_register_io("ii-raw", NSC_IIR_ID|NSC_REFCNT|NSC_FILTER, - _ii_ior_def); - if (_ii_ior == NULL) - cmn_err(CE_WARN, "!ii: nsc_register_io r failed."); - - _ii_local_mem = nsc_register_mem("ii:kmem", NSC_MEM_LOCAL, 0); - if (_ii_local_mem == NULL) - cmn_err(CE_WARN, "!ii: nsc_register_mem failed."); - - - if (!_ii_io || !_ii_ior || !_ii_local_mem) { - _ii_deinit_dev(); - return (ENOMEM); - } - - mutex_init(&_ii_info_mutex, NULL, MUTEX_DRIVER, NULL); - mutex_init(&_ii_overflow_mutex, NULL, MUTEX_DRIVER, NULL); - mutex_init(&_ii_config_mutex, NULL, MUTEX_DRIVER, NULL); - mutex_init(&_ii_cluster_mutex, NULL, MUTEX_DRIVER, NULL); - mutex_init(&_ii_group_mutex, NULL, MUTEX_DRIVER, NULL); - - ii_volume_update = nsc_register_svc("RDCVolumeUpdated", 0); - ii_report_luns = nsc_register_svc("IIReportLuns", 0); - ii_get_initiators = nsc_register_svc("IIGetInitiators", 0); - - if (!ii_volume_update || !ii_report_luns || !ii_get_initiators) { - _ii_deinit_dev(); - return (ENOMEM); - } - - return (0); -} - - -/* - * _ii_deinit_dev - * De-initialise the shadow driver - * - */ - -void -_ii_deinit_dev() -{ - - if (_ii_io) - (void) nsc_unregister_io(_ii_io, 0); - - if (_ii_ior) - (void) nsc_unregister_io(_ii_ior, 0); - - if (_ii_local_mem) - (void) nsc_unregister_mem(_ii_local_mem); - - if (ii_volume_update) - (void) nsc_unregister_svc(ii_volume_update); - - if (ii_report_luns) - (void) nsc_unregister_svc(ii_report_luns); - - if (ii_get_initiators) - (void) nsc_unregister_svc(ii_get_initiators); - - mutex_destroy(&_ii_info_mutex); - mutex_destroy(&_ii_overflow_mutex); - mutex_destroy(&_ii_config_mutex); - mutex_destroy(&_ii_cluster_mutex); - mutex_destroy(&_ii_group_mutex); - if (_ii_concopy_init) - sema_destroy(&_ii_concopy_sema); - _ii_concopy_init = 0; - -} - -static char * -ii_pathname(nsc_fd_t *fd) -{ - char *rc; - - if (fd == NULL || (rc = nsc_pathname(fd)) == NULL) - return (""); - else - return (rc); -} - - -/* - * _ii_rlse_d - * Internal mechanics of _ii_rlse_devs(). Takes care of - * resetting the ownership information as required. - */ - -static void -_ii_rlse_d(ip, mst, raw) -_ii_info_t *ip; -int mst, raw; -{ - _ii_info_dev_t *cip; - _ii_info_dev_t *rip; - - rip = mst ? (ip->bi_mstrdev) : &(ip->bi_shdrdev); - cip = mst ? (ip->bi_mstdev) : &(ip->bi_shddev); - - DTRACE_PROBE2(_ii_rlse_d_type, - _ii_info_dev_t *, rip, - _ii_info_dev_t *, cip); - - - if (RSRV(cip)) { - if (raw) { - ASSERT(cip->bi_orsrv > 0); - cip->bi_orsrv--; - } else { - ASSERT(cip->bi_rsrv > 0); - cip->bi_rsrv--; - } - - if (cip->bi_rsrv > 0) { - nsc_set_owner(cip->bi_fd, cip->bi_iodev); - } else if (cip->bi_orsrv > 0) { - nsc_set_owner(cip->bi_fd, rip->bi_iodev); - } else { - nsc_set_owner(cip->bi_fd, NULL); - } - - if (!RSRV(cip)) { - nsc_release(cip->bi_fd); - } - } else { - if (raw) { - ASSERT(rip->bi_rsrv > 0); - rip->bi_rsrv--; - } else { - ASSERT(rip->bi_orsrv > 0); - rip->bi_orsrv--; - } - - if (rip->bi_rsrv > 0) { - nsc_set_owner(rip->bi_fd, rip->bi_iodev); - } else if (rip->bi_orsrv > 0) { - nsc_set_owner(rip->bi_fd, cip->bi_iodev); - } else { - nsc_set_owner(rip->bi_fd, NULL); - } - - if (!RSRV(rip)) { - rip->bi_flag = 0; - nsc_release(rip->bi_fd); - cv_broadcast(&ip->bi_releasecv); - } - } - -} - - -/* - * _ii_rlse_devs - * Release named underlying devices. - * - * NOTE: the 'devs' argument must be the same as that passed to - * the preceding _ii_rsrv_devs call. - */ - -void -_ii_rlse_devs(ip, devs) -_ii_info_t *ip; -int devs; -{ - - ASSERT(!(devs & (MST|SHD))); - - ASSERT(ip->bi_head != (_ii_info_t *)0xdeadbeef); - if (!ip) { - cmn_err(CE_WARN, "!ii: _ii_rlse_devs null ip"); - return; - } - - mutex_enter(&ip->bi_rsrvmutex); - - DTRACE_PROBE(_ii_rlse_devs_mutex); - - if ((devs&(MST|MSTR)) != 0 && (ip->bi_flags&DSW_SHDIMPORT) == 0) { - if (NSHADOWS(ip) && ip != ip->bi_master) - _ii_rlse_devs(ip->bi_master, devs&(MST|MSTR)); - else - _ii_rlse_d(ip, 1, (devs&MSTR)); - } - - if ((devs&(SHD|SHDR)) != 0 && (ip->bi_flags&DSW_SHDEXPORT) == 0) { - _ii_rlse_d(ip, 0, (devs&SHDR)); - } - - if ((devs&BMP) != 0 && ip->bi_bmpfd) { - if (--(ip->bi_bmprsrv) == 0) - nsc_release(ip->bi_bmpfd); - } - - ASSERT(ip->bi_bmprsrv >= 0); - ASSERT(ip->bi_shdrsrv >= 0); - ASSERT(ip->bi_shdrrsrv >= 0); - mutex_exit(&ip->bi_rsrvmutex); - -} - - -/* - * _ii_rsrv_d - * Reserve device flagged, unless its companion is already reserved, - * in that case increase the reserve on the companion. - */ - -static int -_ii_rsrv_d(int raw, _ii_info_dev_t *rid, _ii_info_dev_t *cid, int flag, - _ii_info_t *ip) -{ - _ii_info_dev_t *p = NULL; - int other = 0; - int rc; - - /* - * If user wants to do a cache reserve and it's already - * raw reserved, we need to do a real nsc_reserve, so wait - * until the release has been done. - */ - if (RSRV(rid) && (flag == II_EXTERNAL) && - (raw == 0) && (rid->bi_flag != II_EXTERNAL)) { - ip->bi_release++; - while (RSRV(rid)) { - DTRACE_PROBE1(_ii_rsrv_d_wait, _ii_info_dev_t *, rid); - cv_wait(&ip->bi_releasecv, &ip->bi_rsrvmutex); - DTRACE_PROBE1(_ii_rsrv_d_resume, _ii_info_dev_t *, rid); - } - ip->bi_release--; - } - - if (RSRV(rid)) { - p = rid; - if (!raw) { - other = 1; - } - } else if (RSRV(cid)) { - p = cid; - if (raw) { - other = 1; - } - } - - if (p) { - if (other) { - p->bi_orsrv++; - } else { - p->bi_rsrv++; - } - - if (p->bi_iodev) { - nsc_set_owner(p->bi_fd, p->bi_iodev); - } - - return (0); - } - p = raw ? rid : cid; - - if ((rc = nsc_reserve(p->bi_fd, 0)) == 0) { - if (p->bi_iodev) { - nsc_set_owner(p->bi_fd, p->bi_iodev); - } - p->bi_rsrv++; - if (raw) - p->bi_flag = flag; - } - - return (rc); -} - -/* - * _ii_rsrv_devs - * Reserve named underlying devices. - * - */ - -int -_ii_rsrv_devs(_ii_info_t *ip, int devs, int flag) -{ - int rc = 0; - int got = 0; - - ASSERT(!(devs & (MST|SHD))); - - if (!ip) { - cmn_err(CE_WARN, "!ii: _ii_rsrv_devs null ip"); - return (EINVAL); - } - - mutex_enter(&ip->bi_rsrvmutex); - - DTRACE_PROBE(_ii_rsrv_devs_mutex); - - if (rc == 0 && (devs&(MST|MSTR)) != 0 && - (ip->bi_flags&DSW_SHDIMPORT) == 0) { - DTRACE_PROBE(_ii_rsrv_devs_master); - if (NSHADOWS(ip) && ip != ip->bi_master) { - if ((rc = _ii_rsrv_devs(ip->bi_master, devs&(MST|MSTR), - flag)) != 0) { - cmn_err(CE_WARN, - "!ii: nsc_reserve multi-master failed"); - } else { - got |= devs&(MST|MSTR); - } - } else { - if ((rc = _ii_rsrv_d((devs&MSTR) != 0, ip->bi_mstrdev, - ip->bi_mstdev, flag, ip)) != 0) { - cmn_err(CE_WARN, - "!ii: nsc_reserve master failed %d", rc); - } else { - got |= (devs&(MST|MSTR)); - } - } - } - - if (rc == 0 && (devs&(SHD|SHDR)) != 0 && - (ip->bi_flags&DSW_SHDEXPORT) == 0) { - DTRACE_PROBE(_ii_rsrv_devs_shadow); - if ((rc = _ii_rsrv_d((devs&SHDR) != 0, &ip->bi_shdrdev, - &ip->bi_shddev, flag, ip)) != 0) { - cmn_err(CE_WARN, - "!ii: nsc_reserve shadow failed %d", rc); - } else { - got |= (devs&(SHD|SHDR)); - } - } - - if (rc == 0 && (devs&BMP) != 0 && ip->bi_bmpfd) { - DTRACE_PROBE(_ii_rsrv_devs_bitmap); - if ((ip->bi_bmprsrv == 0) && - (rc = nsc_reserve(ip->bi_bmpfd, 0)) != 0) { - cmn_err(CE_WARN, - "!ii: nsc_reserve bitmap failed %d", rc); - } else { - (ip->bi_bmprsrv)++; - got |= BMP; - } - } - mutex_exit(&ip->bi_rsrvmutex); - if (rc != 0 && got != 0) - _ii_rlse_devs(ip, got); - - return (rc); -} - -static int -_ii_reserve_begin(_ii_info_t *ip) -{ - int rc; - - mutex_enter(&ip->bi_rlsemutex); - if ((rc = _ii_wait_for_it(ip)) == 0) { - ++ip->bi_rsrvcnt; - } - mutex_exit(&ip->bi_rlsemutex); - - return (rc); -} - -static int -_ii_wait_for_it(_ii_info_t *ip) -{ - int nosig; - - nosig = 1; - while (ip->bi_rsrvcnt > 0) { - nosig = cv_wait_sig(&ip->bi_reservecv, &ip->bi_rlsemutex); - if (!nosig) { - break; - } - } - - return (nosig? 0 : EINTR); -} - -static void -_ii_reserve_end(_ii_info_t *ip) -{ - mutex_enter(&ip->bi_rlsemutex); - if (ip->bi_rsrvcnt <= 0) { - mutex_exit(&ip->bi_rlsemutex); - return; - } - --ip->bi_rsrvcnt; - mutex_exit(&ip->bi_rlsemutex); - cv_broadcast(&ip->bi_reservecv); - -} - -static int -ii_fill_copy_bmp(_ii_info_t *ip) -{ - int rc; - chunkid_t max_chunk, chunk_num; - - if ((rc = II_FILL_COPY_BMP(ip)) != 0) - return (rc); - /* - * make certain that the last bits of the last byte of the bitmap - * aren't filled as they may be copied out to the user. - */ - - chunk_num = ip->bi_size / DSW_SIZE; - if ((ip->bi_size % DSW_SIZE) != 0) - ++chunk_num; - - max_chunk = chunk_num; - if ((max_chunk & 0x7) != 0) - max_chunk = (max_chunk + 7) & ~7; - - DTRACE_PROBE2(_ii_fill_copy_bmp_chunks, chunkid_t, chunk_num, - chunkid_t, max_chunk); - - for (; chunk_num < max_chunk; chunk_num++) { - (void) II_CLR_COPY_BIT(ip, chunk_num); - } - - return (0); -} - -static int -ii_update_denied(_ii_info_t *ip, spcs_s_info_t kstatus, - int direction, int all) -{ - rdc_update_t update; - int size; - unsigned char *bmp; - - update.volume = direction == CV_SHD2MST ? ii_pathname(MSTFD(ip)) : - ip->bi_keyname; - update.denied = 0; - update.protocol = RDC_SVC_ONRETURN; - update.size = size = FBA_SIZE(DSW_BM_FBA_LEN(ip->bi_size)); - update.status = kstatus; - update.bitmap = bmp = kmem_alloc(update.size, KM_SLEEP); - if (bmp == NULL) { - spcs_s_add(kstatus, ENOMEM); - return (1); - } - - DTRACE_PROBE2(_ii_update_denied, int, all, int, size); - - if (all) { - while (size-- > 0) - *bmp++ = (unsigned char)0xff; - } else { - if (II_CHANGE_BMP(ip, update.bitmap) != 0) { - /* failed to read bitmap */ - spcs_s_add(kstatus, EIO); - update.denied = 1; - } - } - - /* check that no user of volume objects */ - if (update.denied == 0) { - (void) nsc_call_svc(ii_volume_update, (intptr_t)&update); - } - kmem_free(update.bitmap, FBA_SIZE(DSW_BM_FBA_LEN(ip->bi_size))); - - return (update.denied); -} - -static int -ii_need_same_size(_ii_info_t *ip) -{ - rdc_update_t update; - - update.volume = ip->bi_keyname; - update.denied = 0; - update.protocol = RDC_SVC_VOL_ENABLED; - - (void) nsc_call_svc(ii_volume_update, (intptr_t)&update); - - return (update.denied); -} - -/* - * ii_volume: check if vol is already known to Instant Image and return - * volume type if it is. - */ - -static int -ii_volume(char *vol, int locked) -{ - _ii_info_t *ip; - _ii_overflow_t *op; - int rc = NONE; - - /* scan overflow volume list */ - mutex_enter(&_ii_overflow_mutex); - - DTRACE_PROBE(_ii_volume_mutex); - - for (op = _ii_overflow_top; op; op = op->ii_next) { - if (strcmp(vol, op->ii_volname) == 0) - break; - } - mutex_exit(&_ii_overflow_mutex); - if (op) { - return (OVR); - } - - if (!locked) { - mutex_enter(&_ii_info_mutex); - } - - DTRACE_PROBE(_ii_volume_mutex2); - - for (ip = _ii_info_top; ip; ip = ip->bi_next) { - if (strcmp(vol, ii_pathname(ip->bi_mstfd)) == 0) { - rc = MST; - break; - } - if (strcmp(vol, ip->bi_keyname) == 0) { - rc = SHD; - break; - } - if (strcmp(vol, ii_pathname(ip->bi_bmpfd)) == 0) { - rc = BMP; - break; - } - } - DTRACE_PROBE1(_ii_volume_data, int, rc); - - if (!locked) { - mutex_exit(&_ii_info_mutex); - } - - return (rc); -} - -/* - * ii_open_shadow: open shadow volume for both cached and raw access, - * if the normal device open fails attempt a file open to allow - * shadowing into a file. - */ - -static int -ii_open_shadow(_ii_info_t *ip, char *shadow_vol) -{ - int rc = 0; - int file_rc = 0; - - ip->bi_shdfd = nsc_open(shadow_vol, - NSC_IIR_ID|NSC_DEVICE|NSC_RDWR, _ii_fd_def, - (blind_t)&(ip->bi_shddev), &rc); - if (!ip->bi_shdfd) { - ip->bi_shdfd = nsc_open(shadow_vol, - NSC_IIR_ID|NSC_FILE|NSC_RDWR, _ii_fd_def, - (blind_t)&(ip->bi_shddev), &file_rc); - file_rc = 1; - if (!ip->bi_shdfd) { - return (rc); - } - DTRACE_PROBE(_ii_open_shadow); - } - else - DTRACE_PROBE(_ii_open_shadow); - - if (file_rc == 0) { - ip->bi_shdrfd = nsc_open(shadow_vol, - NSC_IIR_ID|NSC_DEVICE|NSC_RDWR, _ii_fd_def, - (blind_t)&(ip->bi_shdrdev), &rc); - DTRACE_PROBE(_ii_open_shadow); - } else { - ip->bi_shdrfd = nsc_open(shadow_vol, - NSC_IIR_ID|NSC_FILE|NSC_RDWR, _ii_fd_def, - (blind_t)&(ip->bi_shdrdev), &rc); - DTRACE_PROBE(_ii_open_shadow); - } - - if (!ip->bi_shdrfd) { - (void) nsc_close(ip->bi_shdfd); - DTRACE_PROBE(_ii_open_shadow); - return (rc); - } - - return (0); -} - -static void -ii_register_shd(_ii_info_t *ip) -{ - ip->bi_shd_tok = _ii_register_path(ip->bi_keyname, - NSC_CACHE, _ii_io); - ip->bi_shdr_tok = _ii_register_path(ip->bi_keyname, - NSC_DEVICE, _ii_ior); - -} - -static void -ii_register_mst(_ii_info_t *ip) -{ - ip->bi_mst_tok = _ii_register_path(ii_pathname(ip->bi_mstfd), - NSC_CACHE, _ii_io); - ip->bi_mstr_tok = _ii_register_path(ii_pathname(ip->bi_mstrfd), - NSC_DEVICE, _ii_ior); - -} - -static int -ii_register_ok(_ii_info_t *ip) -{ - int rc; - int sibling; - int exported; - - rc = 1; - sibling = NSHADOWS(ip) && ip != ip->bi_head; - exported = ip->bi_flags & DSW_SHDEXPORT; - - if ((ip->bi_bmpfd && !ip->bi_bmp_tok) || (!exported && ( - !ip->bi_shd_tok || !ip->bi_shdr_tok))) - rc = 0; - else if (!sibling && (!ip->bi_mst_tok || !ip->bi_mstr_tok)) - rc = 0; - - return (rc); -} - -#ifndef DISABLE_KSTATS - -/* - * _ii_kstat_create - * Create and install kstat_io data - * - * Calling/Exit State: - * Returns 0 if kstats couldn't be created, otherwise it returns - * a pointer to the created kstat_t. - */ - -static kstat_t * -_ii_kstat_create(_ii_info_t *ip, char *type) -{ - kstat_t *result; - char name[ IOSTAT_NAME_LEN ]; - int setnum; - char *nptr; - static int mstnum = 0; - static int shdbmpnum = -1; - - switch (*type) { - case 'm': - setnum = mstnum++; - nptr = ip->bi_kstat_io.mstio; - break; - case 's': - /* assumption: shadow kstats created before bitmap */ - setnum = ++shdbmpnum; - nptr = ip->bi_kstat_io.shdio; - break; - case 'b': - setnum = shdbmpnum; - nptr = ip->bi_kstat_io.bmpio; - break; - default: - cmn_err(CE_WARN, "!Unable to determine kstat type (%c)", *type); - setnum = -1; - break; - } - /* - * The name of the kstat, defined below, is designed to work - * with the 'iostat -x' command. This command leaves only - * 9 characters for the name, and the kstats built in to Solaris - * all seem to be of the form <service><number>. For that - * reason, we have chosen ii<type><number>, where <type> is - * m, s, b, or o (for master, shadow, bitmap, and overflow - * respectively), and the number is monotonically increasing from - * 0 for each time one of those <type>s are created. Note that - * the shadow and bitmap are always created in pairs and so, for - * any given set, they will have the same <number>. - */ - (void) sprintf(name, "ii%c%d", *type, setnum); - (void) strncpy(nptr, name, IOSTAT_NAME_LEN); - result = kstat_create("ii", 0, name, "disk", KSTAT_TYPE_IO, 1, 0); - if (result) { - result->ks_private = ip; - result->ks_lock = &ip->bi_kstat_io.statmutex; - kstat_install(result); - } else { - cmn_err(CE_WARN, "!Unable to create %s kstats for set %s", type, - ip->bi_keyname); - } - - return (result); -} - -/* - * _ii_overflow_kstat_create - * Create and install kstat_io data for an overflow volume - * - * Calling/Exit State: - * Returns 0 if kstats couldn't be created, otherwise it returns - * a pointer to the created kstat_t. - * - * See comments in _ii_kstat_create for additional information. - * - */ -static kstat_t * -_ii_overflow_kstat_create(_ii_info_t *ip, _ii_overflow_t *op) -{ - kstat_t *result; - char *nptr; - char name [IOSTAT_NAME_LEN]; - static int ovrnum = 0; - int setnum = ovrnum++; - - nptr = ip->bi_kstat_io.ovrio; - - (void) sprintf(name, "iio%d", setnum); - (void) strncpy(nptr, name, IOSTAT_NAME_LEN); - - mutex_init(&op->ii_kstat_mutex, NULL, MUTEX_DRIVER, NULL); - - if ((result = - kstat_create("ii", 0, name, "disk", KSTAT_TYPE_IO, 1, 0))) { - result->ks_private = ip; - result->ks_lock = &op->ii_kstat_mutex; - kstat_install(result); - } else { - mutex_destroy(&op->ii_kstat_mutex); - cmn_err(CE_WARN, "!Unabled to create overflow kstat for set " - "%s", ip->bi_keyname); - } - - return (result); -} - -#endif - -static void -ii_str_kstat_copy(char *str, char *p1, char *p2, char *p3, char *p4) -{ - static int whinged = 0; - char *part[ 4 ]; - char fulldata[ DSW_NAMELEN ]; - int i, offset, remain; - int num_parts; - int leftover; - int kscharsize = KSTAT_DATA_CHAR_LEN - 1; - - /* - * NOTE: the following lines must be changed if DSW_NAMELEN - * ever changes. You'll need a part[] for every kscharsize - * characters (or fraction thereof). The ii_kstat_set_t - * definition in dsw_dev.h will also need new ovr_? entries. - */ - part[ 0 ] = p1; - part[ 1 ] = p2; - part[ 2 ] = p3; - part[ 3 ] = p4; - - bzero(fulldata, DSW_NAMELEN); - if (str) { - (void) strncpy(fulldata, str, DSW_NAMELEN); - } - - num_parts = DSW_NAMELEN / kscharsize; - leftover = DSW_NAMELEN % kscharsize; - if (leftover) { - ++num_parts; - } - - if (num_parts > sizeof (part) / sizeof (part[0])) { - /* - * DSW_NAMELEN is 64 and kscharsize is 15. - * It's always "whinged" - */ - if (!whinged) { -#ifdef DEBUG - cmn_err(CE_WARN, "!May not have enough room " - "to store volume name in kstats"); -#endif - whinged = 1; - } - num_parts = sizeof (part) / sizeof (part[0]); - } - - offset = 0; - remain = DSW_NAMELEN; - for (i = 0; i < num_parts; i++) { - int to_copy = remain > kscharsize? kscharsize : remain; - bcopy(&fulldata[ offset ], part[ i ], to_copy); - offset += to_copy; - remain -= to_copy; - } -} - -static int -ii_set_stats_update(kstat_t *ksp, int rw) -{ - _ii_info_t *ip = (_ii_info_t *)ksp->ks_private; - ii_kstat_set_t *kp = (ii_kstat_set_t *)ksp->ks_data; - - if (KSTAT_WRITE == rw) { - return (EACCES); - } - - /* copy values over */ - kp->size.value.ul = ip->bi_size; - kp->flags.value.ul = ip->bi_flags; - kp->unit.value.ul = ip->bi_throttle_unit; - kp->delay.value.ul = ip->bi_throttle_delay; - kp->mtime.value.ul = ip->bi_mtime; - - /* update bitmap counters if necessary */ - if (ip->bi_state & DSW_CNTCPYBITS) { - ip->bi_copybits = 0; - if (_ii_rsrv_devs(ip, BMP, II_INTERNAL) == 0) { - ip->bi_state &= ~DSW_CNTCPYBITS; - II_CNT_BITS(ip, ip->bi_copyfba, - &ip->bi_copybits, - DSW_BM_SIZE_BYTES(ip)); - _ii_rlse_devs(ip, BMP); - } - } - - if (ip->bi_state & DSW_CNTSHDBITS) { - ip->bi_shdbits = 0; - if (_ii_rsrv_devs(ip, BMP, II_INTERNAL) == 0) { - ip->bi_state &= ~DSW_CNTSHDBITS; - II_CNT_BITS(ip, ip->bi_shdfba, - &ip->bi_shdbits, - DSW_BM_SIZE_BYTES(ip)); - _ii_rlse_devs(ip, BMP); - } - } - - kp->copybits.value.ul = ip->bi_copybits; - kp->shdbits.value.ul = ip->bi_shdbits; - - /* copy volume names */ - ii_str_kstat_copy(ii_pathname(MSTFD(ip)), - kp->mst_a.value.c, kp->mst_b.value.c, - kp->mst_c.value.c, kp->mst_d.value.c); - - ii_str_kstat_copy(ip->bi_keyname, kp->set_a.value.c, kp->set_b.value.c, - kp->set_c.value.c, kp->set_d.value.c); - - ii_str_kstat_copy(ii_pathname(ip->bi_bmpfd), - kp->bmp_a.value.c, kp->bmp_b.value.c, - kp->bmp_c.value.c, kp->bmp_d.value.c); - - if (ip->bi_overflow) { - ii_str_kstat_copy(ip->bi_overflow->ii_volname, - kp->ovr_a.value.c, kp->ovr_b.value.c, kp->ovr_c.value.c, - kp->ovr_d.value.c); - (void) strlcpy(kp->ovr_io.value.c, ip->bi_kstat_io.ovrio, - KSTAT_DATA_CHAR_LEN); - } else { - ii_str_kstat_copy("", kp->ovr_a.value.c, kp->ovr_b.value.c, - kp->ovr_c.value.c, kp->ovr_d.value.c); - bzero(kp->ovr_io.value.c, KSTAT_DATA_CHAR_LEN); - } - if ((ip->bi_flags) & DSW_TREEMAP) { - kp->shdchks.value.ul = ip->bi_shdchks; - kp->shdchkused.value.ul = ip->bi_shdchkused; - } else { - kp->shdchks.value.ul = 0; - kp->shdchkused.value.ul = 0; - } - /* make sure value.c are always null terminated */ - (void) strlcpy(kp->mst_io.value.c, ip->bi_kstat_io.mstio, - KSTAT_DATA_CHAR_LEN); - (void) strlcpy(kp->shd_io.value.c, ip->bi_kstat_io.shdio, - KSTAT_DATA_CHAR_LEN); - (void) strlcpy(kp->bmp_io.value.c, ip->bi_kstat_io.bmpio, - KSTAT_DATA_CHAR_LEN); - - return (0); -} - -/* - * _ii_config - * Configure an II device pair - * - * Calling/Exit State: - * Returns 0 if the pairing was configured, otherwise an - * error code. The ioctl data stucture is copied out to the user - * and contains any additional error information, and the master - * and shadow volume names if not supplied by the user. - * - * Description: - * Reads the user configuration structure and attempts - * to establish an II pairing. The snapshot of the master - * device is established at this point in time. - */ - -int -_ii_config(intptr_t arg, int ilp32, int *rvp, int iflags) -{ - dsw_config_t uconf; - dsw_config32_t *uconf32; - _ii_info_t *ip, *hip, **ipp; - int rc; - int type; - int nshadows; - int add_to_mst_top; - int import; - int existing; - int resized; - nsc_size_t mst_size, shd_size, bmp_size; - nsc_off_t shdfba; - nsc_off_t copyfba; - int keylen, keyoffset; - ii_header_t *bm_header; - nsc_buf_t *tmp; - spcs_s_info_t kstatus; - spcs_s_info32_t ustatus32; - int rtype; - uint_t hints; - - /* Import is a once only operation like an enable */ - ASSERT((iflags&(II_EXISTING|II_IMPORT)) != (II_EXISTING|II_IMPORT)); - existing = (iflags&II_EXISTING) != 0; - import = (iflags&II_IMPORT) != 0; - *rvp = 0; - if (ilp32) { - uconf32 = kmem_zalloc(sizeof (dsw_config32_t), KM_SLEEP); - if (uconf32 == NULL) { - return (ENOMEM); - } - if (copyin((void *)arg, uconf32, sizeof (*uconf32)) < 0) - return (EFAULT); - II_TAIL_COPY(uconf, (*uconf32), master_vol, dsw_config_t); - uconf.status = (spcs_s_info_t)uconf32->status; - ustatus32 = uconf32->status; - kmem_free(uconf32, sizeof (dsw_config32_t)); - } else if (copyin((void *)arg, &uconf, sizeof (uconf)) < 0) - return (EFAULT); - - DTRACE_PROBE3(_ii_config_info, char *, uconf.master_vol, - char *, uconf.shadow_vol, char *, uconf.bitmap_vol); - - kstatus = spcs_s_kcreate(); - if (kstatus == NULL) - return (ENOMEM); - - if (_ii_shutting_down) - return (spcs_s_ocopyoutf(&kstatus, uconf.status, - DSW_ESHUTDOWN)); - - if (uconf.bitmap_vol[0] == 0) - return (spcs_s_ocopyoutf(&kstatus, uconf.status, DSW_EEMPTY)); - - mutex_enter(&_ii_config_mutex); - ip = nsc_kmem_zalloc(sizeof (*ip), KM_SLEEP, _ii_local_mem); - if (!ip) { - mutex_exit(&_ii_config_mutex); - return (spcs_s_ocopyoutf(&kstatus, uconf.status, ENOMEM)); - } - ip->bi_mstdev = nsc_kmem_zalloc(sizeof (*ip->bi_mstdev), KM_SLEEP, - _ii_local_mem); - ip->bi_mstrdev = nsc_kmem_zalloc(sizeof (*ip->bi_mstdev), KM_SLEEP, - _ii_local_mem); - if (ip->bi_mstdev == NULL || ip->bi_mstrdev == NULL) { - mutex_exit(&_ii_config_mutex); - _ii_info_free(ip); - return (spcs_s_ocopyoutf(&kstatus, uconf.status, ENOMEM)); - } - - ip->bi_disabled = 1; /* mark as disabled until we are ready to go */ - mutex_init(&ip->bi_mutex, NULL, MUTEX_DRIVER, NULL); - mutex_init(&ip->bi_bmpmutex, NULL, MUTEX_DRIVER, NULL); - mutex_init(&ip->bi_rsrvmutex, NULL, MUTEX_DRIVER, NULL); - mutex_init(&ip->bi_rlsemutex, NULL, MUTEX_DRIVER, NULL); - mutex_init(&ip->bi_chksmutex, NULL, MUTEX_DRIVER, NULL); - cv_init(&ip->bi_copydonecv, NULL, CV_DRIVER, NULL); - cv_init(&ip->bi_reservecv, NULL, CV_DRIVER, NULL); - cv_init(&ip->bi_releasecv, NULL, CV_DRIVER, NULL); - cv_init(&ip->bi_ioctlcv, NULL, CV_DRIVER, NULL); - cv_init(&ip->bi_closingcv, NULL, CV_DRIVER, NULL); - cv_init(&ip->bi_busycv, NULL, CV_DRIVER, NULL); - rw_init(&ip->bi_busyrw, NULL, RW_DRIVER, NULL); - rw_init(&ip->bi_linkrw, NULL, RW_DRIVER, NULL); - (void) strncpy(ip->bi_keyname, uconf.shadow_vol, DSW_NAMELEN); - ip->bi_keyname[DSW_NAMELEN-1] = '\0'; - ip->bi_throttle_unit = ii_throttle_unit; - ip->bi_throttle_delay = ii_throttle_delay; - - /* First check the list to see if uconf.bitmap_vol's already there */ - - if (ii_volume(uconf.bitmap_vol, 0) != NONE) { - DTRACE_PROBE(_ii_config_bmp_found); - mutex_exit(&_ii_config_mutex); - _ii_info_free(ip); - return (spcs_s_ocopyoutf(&kstatus, uconf.status, DSW_EINUSE)); - } - - ip->bi_bmpfd = nsc_open(uconf.bitmap_vol, - NSC_IIR_ID|NSC_FILE|NSC_RDWR, NULL, (blind_t)&(ip->bi_bmpdev), &rc); - if (!ip->bi_bmpfd) - ip->bi_bmpfd = nsc_open(uconf.bitmap_vol, - NSC_IIR_ID|NSC_CACHE|NSC_DEVICE|NSC_RDWR, NULL, - (blind_t)&(ip->bi_bmpdev), &rc); - if (!ip->bi_bmpfd && !existing) { - mutex_exit(&_ii_config_mutex); - _ii_info_free(ip); - spcs_s_add(kstatus, rc); - DTRACE_PROBE(_ii_config_no_bmp); - return (spcs_s_ocopyoutf(&kstatus, uconf.status, DSW_EOPEN)); - } - - if (import) { - uconf.flag = DSW_GOLDEN; - II_FLAG_SETX(DSW_SHDIMPORT|DSW_GOLDEN, ip); - } - - if (existing) { - - DTRACE_PROBE(_ii_config_existing); - /* - * ii_config is used by enable, import and resume (existing) - * If not importing or resuming, then this must be enable. - * Indicate this fact for SNMP use. - */ - - if (!ip->bi_bmpfd) { - /* - * Couldn't read bitmap, mark master and shadow as - * unusable. - */ - II_FLAG_ASSIGN(DSW_BMPOFFLINE|DSW_MSTOFFLINE| - DSW_SHDOFFLINE, ip); - - /* - * Set cluster tag for this element so it can - * be suspended later - */ - (void) II_LINK_CLUSTER(ip, uconf.cluster_tag); - - /* need to check on master, might be shared */ - goto header_checked; - } - /* check the header */ - (void) _ii_rsrv_devs(ip, BMP, II_INTERNAL); - - /* get first block of bit map */ - mutex_enter(&ip->bi_mutex); - bm_header = _ii_bm_header_get(ip, &tmp); - mutex_exit(&ip->bi_mutex); - if (bm_header == NULL) { - if (ii_debug > 0) - cmn_err(CE_WARN, - "!ii: _ii_bm_header_get returned NULL"); - mutex_exit(&_ii_config_mutex); - _ii_info_free(ip); - return (spcs_s_ocopyoutf(&kstatus, uconf.status, - DSW_EHDRBMP)); - } - - if (bm_header->ii_magic != DSW_DIRTY && - bm_header->ii_magic != DSW_CLEAN) { - mutex_exit(&_ii_config_mutex); - _ii_bm_header_free(bm_header, ip, tmp); - _ii_info_free(ip); - return (spcs_s_ocopyoutf(&kstatus, uconf.status, - DSW_EINVALBMP)); - } - - II_FLAG_ASSIGN(bm_header->ii_state, ip); - /* Restore copy throttle parameters, if header version is 3 */ - if (bm_header->ii_version >= 3) { /* II_HEADER_VERSION */ - ip->bi_throttle_delay = bm_header->ii_throttle_delay; - ip->bi_throttle_unit = bm_header->ii_throttle_unit; - } - - /* Restore cluster & group names, if header version is 4 */ - if (bm_header->ii_version >= 4) { - /* cluster */ - if (*bm_header->clstr_name) { - (void) strncpy(uconf.cluster_tag, - bm_header->clstr_name, DSW_NAMELEN); - (void) II_LINK_CLUSTER(ip, uconf.cluster_tag); - } - - /* group */ - if (*bm_header->group_name) { - (void) strncpy(uconf.group_name, - bm_header->group_name, DSW_NAMELEN); - (void) II_LINK_GROUP(ip, uconf.group_name); - } - } - /* restore latest modification time, if header version >= 5 */ - if (bm_header->ii_version >= 5) { - ip->bi_mtime = bm_header->ii_mtime; - } - - /* Fetch master and shadow names from bitmap header */ - if (uconf.master_vol[0] == 0) - (void) strncpy(uconf.master_vol, bm_header->master_vol, - DSW_NAMELEN); - if (uconf.shadow_vol[0] == 0) - (void) strncpy(uconf.shadow_vol, bm_header->shadow_vol, - DSW_NAMELEN); - - /* return the fetched names to the user */ - if (ilp32) { - uconf32 = kmem_zalloc(sizeof (dsw_config32_t), - KM_SLEEP); - if (uconf32 == NULL) { - mutex_exit(&_ii_config_mutex); - _ii_bm_header_free(bm_header, ip, tmp); - _ii_rlse_devs(ip, BMP); - _ii_info_free(ip); - return (ENOMEM); - } - uconf32->status = ustatus32; - II_TAIL_COPY((*uconf32), uconf, master_vol, - dsw_config32_t); - rc = copyout(uconf32, (void *)arg, sizeof (*uconf32)); - kmem_free(uconf32, sizeof (dsw_config32_t)); - } else { - rc = copyout(&uconf, (void *)arg, sizeof (uconf)); - } - if (rc) { - mutex_exit(&_ii_config_mutex); - _ii_bm_header_free(bm_header, ip, tmp); - _ii_rlse_devs(ip, BMP); - _ii_info_free(ip); - return (EFAULT); - } - - if (strncmp(bm_header->bitmap_vol, uconf.bitmap_vol, - DSW_NAMELEN) || ((!(ip->bi_flags&DSW_SHDIMPORT)) && - strncmp(bm_header->master_vol, uconf.master_vol, - DSW_NAMELEN)) || strncmp(bm_header->shadow_vol, - uconf.shadow_vol, DSW_NAMELEN)) { - mutex_exit(&_ii_config_mutex); - _ii_bm_header_free(bm_header, ip, tmp); - _ii_rlse_devs(ip, BMP); - _ii_info_free(ip); - return (spcs_s_ocopyoutf(&kstatus, uconf.status, - DSW_EMISMATCH)); - } - shdfba = bm_header->ii_shdfba; - copyfba = bm_header->ii_copyfba; - if ((ip->bi_flags)&DSW_TREEMAP) { - if (ii_debug > 0) - cmn_err(CE_NOTE, - "!II: Resuming short shadow volume"); - - ip->bi_mstchks = bm_header->ii_mstchks; - ip->bi_shdchks = bm_header->ii_shdchks; - ip->bi_shdchkused = bm_header->ii_shdchkused; - ip->bi_shdfchk = bm_header->ii_shdfchk; - - if (bm_header->overflow_vol[0] != 0) - if ((rc = ii_overflow_attach(ip, - bm_header->overflow_vol, 0)) != 0) { - mutex_exit(&_ii_config_mutex); - _ii_bm_header_free(bm_header, ip, tmp); - _ii_rlse_devs(ip, BMP); - _ii_info_free(ip); - return (spcs_s_ocopyoutf(&kstatus, - uconf.status, rc)); - } - } - _ii_bm_header_free(bm_header, ip, tmp); - _ii_rlse_devs(ip, BMP); - } -header_checked: - - if (ip->bi_flags&DSW_SHDIMPORT) - (void) strcpy(uconf.master_vol, "<imported shadow>"); - if (!uconf.master_vol[0] || !uconf.shadow_vol[0]) { - mutex_exit(&_ii_config_mutex); - _ii_info_free(ip); - return (spcs_s_ocopyoutf(&kstatus, uconf.status, DSW_EEMPTY)); - } - - /* check that no volume has been given twice */ - if (strncmp(uconf.master_vol, uconf.shadow_vol, DSW_NAMELEN) == 0) { - mutex_exit(&_ii_config_mutex); - _ii_info_free(ip); - return (spcs_s_ocopyoutf(&kstatus, uconf.status, DSW_EOPEN)); - } - - if (strncmp(uconf.master_vol, uconf.bitmap_vol, DSW_NAMELEN) == 0) { - mutex_exit(&_ii_config_mutex); - _ii_info_free(ip); - return (spcs_s_ocopyoutf(&kstatus, uconf.status, DSW_EOPEN)); - } - - if (strncmp(uconf.bitmap_vol, uconf.shadow_vol, DSW_NAMELEN) == 0) { - mutex_exit(&_ii_config_mutex); - _ii_info_free(ip); - return (spcs_s_ocopyoutf(&kstatus, uconf.status, DSW_EOPEN)); - } - - /* check that master is not already a bitmap, shadow or overflow */ - type = ii_volume(uconf.master_vol, 1); - if (type != NONE && type != MST) { - mutex_exit(&_ii_config_mutex); - _ii_info_free(ip); - return (spcs_s_ocopyoutf(&kstatus, uconf.status, DSW_EINUSE)); - } - - /* check that shadow is not used as anything else */ - type = ii_volume(uconf.shadow_vol, 1); - if (type != NONE && type != SHD) { - mutex_exit(&_ii_config_mutex); - _ii_info_free(ip); - return (spcs_s_ocopyoutf(&kstatus, uconf.status, DSW_EINUSE)); - } - - /* Setup the table bitmap operations table */ - switch (ii_bitmap) { - case II_KMEM: - if (ii_debug > 0) - cmn_err(CE_NOTE, "!ii: using volatile bitmaps"); - ip->bi_bitmap_ops = &kmem_buf_bmp; - break; - case II_FWC: - hints = 0; - (void) nsc_node_hints(&hints); - if ((hints & NSC_FORCED_WRTHRU) == 0) - ip->bi_bitmap_ops = &kmem_buf_bmp; - else - ip->bi_bitmap_ops = &alloc_buf_bmp; - if (ii_debug > 0) { - cmn_err(CE_NOTE, "!ii: chosen to use %s bitmaps", - ip->bi_bitmap_ops == &kmem_buf_bmp ? - "volatile" : "persistent"); - } - break; - case II_WTHRU: - default: - if (ii_debug > 0) - cmn_err(CE_NOTE, "!ii: using persistent bitmaps"); - ip->bi_bitmap_ops = &alloc_buf_bmp; - break; - } - - /* - * If we found aother shadow volume with the same name, - * If this is an resume operation, - * If this shadow is in the exported state - * then try an on the fly join instead - */ - for (hip = _ii_info_top; hip; hip = hip->bi_next) - if (strcmp(uconf.shadow_vol, hip->bi_keyname) == 0) - break; - if ((hip) && (type == SHD) && existing && - (ip->bi_flags & DSW_SHDEXPORT)) { - - /* - * Stop any copy in progress - */ - while (_ii_stopcopy(hip) == EINTR) - ; - - /* - * Start the imported shadow teardown - */ - mutex_enter(&hip->bi_mutex); - - /* disable accesss to imported shadow */ - hip->bi_disabled = 1; - - /* Wait for any I/O's to complete */ - while (hip->bi_ioctl) { - hip->bi_state |= DSW_IOCTL; - cv_wait(&hip->bi_ioctlcv, &hip->bi_mutex); - } - mutex_exit(&hip->bi_mutex); - - /* this rw_enter forces us to drain all active IO */ - rw_enter(&hip->bi_linkrw, RW_WRITER); - rw_exit(&hip->bi_linkrw); - - /* remove ip from _ii_info_top linked list */ - mutex_enter(&_ii_info_mutex); - for (ipp = &_ii_info_top; *ipp; ipp = &((*ipp)->bi_next)) { - if (hip == *ipp) { - *ipp = hip->bi_next; - break; - } - } - if (hip->bi_kstat) { - kstat_delete(hip->bi_kstat); - hip->bi_kstat = NULL; - } - mutex_exit(&_ii_info_mutex); - - /* Gain access to both bitmap volumes */ - rtype = BMP; - if (((rc = _ii_rsrv_devs(hip, rtype, II_INTERNAL)) != 0) || - ((rc = _ii_rsrv_devs(ip, rtype, II_INTERNAL)) != 0)) { - mutex_exit(&_ii_config_mutex); - _ii_info_free(ip); - return (spcs_s_ocopyoutf(&kstatus, uconf.status, rc)); - } - - /* Merge imported bitmap */ - rc = II_JOIN_BMP(ip, hip); - - /* Release access to bitmap volume */ - _ii_rlse_devs(hip, rtype); - ii_sibling_free(hip); - - /* Clear the fact that we are exported */ - mutex_enter(&ip->bi_mutex); - II_FLAG_CLR(DSW_SHDEXPORT, ip); - - /* Release resources */ - mutex_exit(&ip->bi_mutex); - _ii_rlse_devs(ip, BMP); - - } else if (type != NONE) { - mutex_exit(&_ii_config_mutex); - _ii_info_free(ip); - return (spcs_s_ocopyoutf(&kstatus, uconf.status, DSW_EINUSE)); - } - - /* - * Handle non-exported shadow - */ - if ((ip->bi_flags & DSW_SHDEXPORT) == 0) { - if ((rc = ii_open_shadow(ip, uconf.shadow_vol)) != 0) { - mutex_exit(&_ii_config_mutex); - _ii_info_free(ip); - spcs_s_add(kstatus, rc); - return (spcs_s_ocopyoutf(&kstatus, uconf.status, - DSW_EOPEN)); - } - } - - /* - * allocate _ii_concopy_sema and set to a value that won't allow - * all cache to be allocated by copy loops. - */ - - if (_ii_concopy_init == 0 && ip->bi_bmpfd != NULL) { - int asize = 0, wsize; - nsc_size_t cfbas, maxfbas; - - (void) nsc_cache_sizes(&asize, &wsize); - - if (asize > 0) { - cfbas = FBA_NUM(asize); - (void) _ii_rsrv_devs(ip, BMP, II_INTERNAL); - rc = nsc_maxfbas(ip->bi_bmpfd, 0, &maxfbas); - _ii_rlse_devs(ip, BMP); - if (!II_SUCCESS(rc)) - maxfbas = 1024; /* i.e. _SD_MAX_FBAS */ - ii_nconcopy = cfbas / (maxfbas * 2) / 3; - } - if (ii_nconcopy < 2) - ii_nconcopy = 2; - ASSERT(ii_nconcopy > 0); - sema_init(&_ii_concopy_sema, ii_nconcopy, NULL, - SEMA_DRIVER, NULL); - _ii_concopy_init = 1; - } - - /* check for shared master volume */ - for (hip = _ii_mst_top; hip; hip = hip->bi_nextmst) - if (strcmp(uconf.master_vol, ii_pathname(hip->bi_mstfd)) == 0) - break; - add_to_mst_top = (hip == NULL); - if (!hip) - for (hip = _ii_info_top; hip; hip = hip->bi_next) - if (strcmp(uconf.master_vol, - ii_pathname(hip->bi_mstfd)) == 0) - break; - nshadows = (hip != NULL); - - /* Check if master is offline */ - if (hip) { - if (hip->bi_flags & DSW_MSTOFFLINE) { - mutex_exit(&_ii_config_mutex); - _ii_info_free(ip); - return (spcs_s_ocopyoutf(&kstatus, uconf.status, - DSW_EOFFLINE)); - } - } - - if (!nshadows && (ip->bi_flags&DSW_SHDIMPORT) == 0) { - ip->bi_mstfd = nsc_open(uconf.master_vol, - NSC_IIR_ID|NSC_DEVICE|NSC_RDWR, _ii_fd_def, - (blind_t)(ip->bi_mstdev), &rc); - if (!ip->bi_mstfd) { - mutex_exit(&_ii_config_mutex); - _ii_info_free(ip); - spcs_s_add(kstatus, rc); - return (spcs_s_ocopyoutf(&kstatus, uconf.status, - DSW_EOPEN)); - } - - ip->bi_mstrfd = nsc_open(uconf.master_vol, - NSC_IIR_ID|NSC_DEVICE|NSC_RDWR, _ii_fd_def, - (blind_t)(ip->bi_mstrdev), &rc); - if (!ip->bi_mstrfd) { - mutex_exit(&_ii_config_mutex); - _ii_info_free(ip); - spcs_s_add(kstatus, rc); - return (spcs_s_ocopyoutf(&kstatus, uconf.status, - DSW_EOPEN)); - } - } - - ip->bi_head = ip; - ip->bi_master = ip; - - mutex_enter(&_ii_info_mutex); - ip->bi_next = _ii_info_top; - _ii_info_top = ip; - if (nshadows) { - /* link new shadow group together with others sharing master */ - if (ii_debug > 0) - cmn_err(CE_NOTE, - "!II: shadow %s shares master %s with other shadow" - " groups", uconf.shadow_vol, uconf.master_vol); - hip = hip->bi_head; - nsc_kmem_free(ip->bi_mstrdev, sizeof (*ip->bi_mstrdev)); - nsc_kmem_free(ip->bi_mstdev, sizeof (*ip->bi_mstdev)); - ip->bi_mstrdev = hip->bi_mstrdev; - ip->bi_mstdev = hip->bi_mstdev; - ip->bi_head = hip; - ip->bi_sibling = hip->bi_sibling; - if (add_to_mst_top) { - hip->bi_nextmst = _ii_mst_top; - _ii_mst_top = hip; - } - hip->bi_sibling = ip; - ip->bi_master = ip->bi_head->bi_master; - } - mutex_exit(&_ii_info_mutex); - mutex_exit(&_ii_config_mutex); - - keylen = strlen(ip->bi_keyname); - if (keylen > KSTAT_STRLEN - 1) { - keyoffset = keylen + 1 - KSTAT_STRLEN; - } else { - keyoffset = 0; - } - ip->bi_kstat = kstat_create("ii", _ii_instance++, - &ip->bi_keyname[ keyoffset ], "iiset", KSTAT_TYPE_NAMED, - sizeof (ii_kstat_set) / sizeof (kstat_named_t), - KSTAT_FLAG_VIRTUAL); - if (ip->bi_kstat) { - ip->bi_kstat->ks_data = &ii_kstat_set; - ip->bi_kstat->ks_update = ii_set_stats_update; - ip->bi_kstat->ks_private = ip; - kstat_install(ip->bi_kstat); - } else { - cmn_err(CE_WARN, "!Unable to create set-specific kstats"); - } - -#ifndef DISABLE_KSTATS - /* create kstats information */ - mutex_init(&ip->bi_kstat_io.statmutex, NULL, MUTEX_DRIVER, NULL); - if (ip == ip->bi_master) { - ip->bi_kstat_io.master = _ii_kstat_create(ip, "master"); - } else { - ip->bi_kstat_io.master = ip->bi_master->bi_kstat_io.master; - (void) strlcpy(ip->bi_kstat_io.mstio, - ip->bi_master->bi_kstat_io.mstio, KSTAT_DATA_CHAR_LEN); - } - ip->bi_kstat_io.shadow = _ii_kstat_create(ip, "shadow"); - ip->bi_kstat_io.bitmap = _ii_kstat_create(ip, "bitmap"); -#endif - - (void) _ii_reserve_begin(ip); - rtype = MSTR|SHDR|BMP; - if ((rc = _ii_rsrv_devs(ip, rtype, II_INTERNAL)) != 0) { - spcs_s_add(kstatus, rc); - rc = DSW_ERSRVFAIL; - goto fail; - } - - if (ip->bi_flags&DSW_SHDIMPORT) { - rc = 0; /* no master for imported volumes */ - mst_size = 0; - } else - rc = nsc_partsize(MSTFD(ip), &mst_size); - if (rc == 0 && (ip->bi_flags&DSW_SHDEXPORT) == 0) - rc = nsc_partsize(SHDFD(ip), &shd_size); - if (!ip->bi_bmpfd) - rc = EINVAL; - if (rc == 0) - rc = nsc_partsize(ip->bi_bmpfd, &bmp_size); - - if (ip->bi_flags&DSW_SHDIMPORT) - ip->bi_size = shd_size; - else - ip->bi_size = mst_size; - - if ((((ip->bi_flags&DSW_SHDIMPORT) != DSW_SHDIMPORT) && - (mst_size < 1)) || - (((ip->bi_flags&DSW_SHDEXPORT) != DSW_SHDEXPORT) && - (shd_size < 1)) || - ((rc == 0) && (bmp_size < 1))) { - /* could be really zero, or could be > 1 TB; fail the enable */ - rc = EINVAL; - } - - if (rc != 0) { /* rc set means an nsc_partsize() failed */ - /* - * If existing group, mark bitmap as offline and set - * bmp_size to "right size". - */ - if (existing) { - bmp_size = 2 * DSW_BM_FBA_LEN(mst_size) + - DSW_SHD_BM_OFFSET; - goto no_more_bmp_tests; - } - spcs_s_add(kstatus, rc); - rc = DSW_EPARTSIZE; - _ii_rlse_devs(ip, rtype); - _ii_reserve_end(ip); - goto fail; - } - - if (ip->bi_flags&DSW_SHDIMPORT) - mst_size = shd_size; - if (ip->bi_flags&DSW_SHDEXPORT) - shd_size = mst_size; - /* - * Check with RDC if the master & shadow sizes are different. - * Once II is enabled, the shadow size will be made to appear - * the same as the master, and this will panic RDC if we're - * changing sizes on it. - */ - resized = (shd_size != mst_size); - if (resized && ii_need_same_size(ip)) { - cmn_err(CE_WARN, "!Cannot enable II set: would change volume " - "size on RDC"); - rc = DSW_EOPACKAGE; - _ii_rlse_devs(ip, rtype); - _ii_reserve_end(ip); - goto fail; - } - if (bmp_size < 2 * DSW_BM_FBA_LEN(mst_size) + DSW_SHD_BM_OFFSET) { - /* bitmap volume too small */ - if (ii_debug > 0) - cmn_err(CE_NOTE, - "!ii: invalid sizes: bmp %" NSC_SZFMT " mst %" - NSC_SZFMT " %" NSC_SZFMT "", - bmp_size, mst_size, DSW_BM_FBA_LEN(mst_size)); - rc = DSW_EBMPSIZE; - _ii_rlse_devs(ip, rtype); - _ii_reserve_end(ip); - goto fail; - } - if ((shd_size < mst_size) && (uconf.flag&DSW_GOLDEN) != 0) { - /* shadow volume too small */ - if (ii_debug > 0) - cmn_err(CE_NOTE, "!shd size too small (%" NSC_SZFMT - ") for independent set's master (%" NSC_SZFMT ")", - shd_size, mst_size); - rc = DSW_ESHDSIZE; - _ii_rlse_devs(ip, rtype); - _ii_reserve_end(ip); - goto fail; - } - - ip->bi_busy = kmem_zalloc(1 + (ip->bi_size / (DSW_SIZE * DSW_BITS)), - KM_SLEEP); - if (!ip->bi_busy) { - rc = ENOMEM; - _ii_rlse_devs(ip, rtype); - _ii_reserve_end(ip); - goto fail; - } - - if (existing == 0) { - - DTRACE_PROBE(_ii_config); - - /* first time this shadow has been set up */ - mutex_enter(&ip->bi_mutex); - bm_header = _ii_bm_header_get(ip, &tmp); - mutex_exit(&ip->bi_mutex); - if (bm_header == NULL) { - if (ii_debug > 0) - cmn_err(CE_WARN, - "!ii: _ii_bm_header_get returned NULL"); - rc = DSW_EHDRBMP; - _ii_rlse_devs(ip, rtype); - _ii_reserve_end(ip); - goto fail; - } - bzero(bm_header, sizeof (*bm_header)); - /* copy pathnames into it */ - (void) strncpy(bm_header->master_vol, uconf.master_vol, - DSW_NAMELEN); - (void) strncpy(bm_header->shadow_vol, uconf.shadow_vol, - DSW_NAMELEN); - (void) strncpy(bm_header->bitmap_vol, uconf.bitmap_vol, - DSW_NAMELEN); - (void) strncpy(bm_header->clstr_name, uconf.cluster_tag, - DSW_NAMELEN); - (void) strncpy(bm_header->group_name, uconf.group_name, - DSW_NAMELEN); - - if (uconf.cluster_tag[0] != 0) - (void) II_LINK_CLUSTER(ip, uconf.cluster_tag); - - if (uconf.group_name[0] != 0) - (void) II_LINK_GROUP(ip, uconf.group_name); - - - bm_header->ii_state = (uconf.flag & DSW_GOLDEN); - II_FLAG_ASSIGN(bm_header->ii_state, ip); - - if (import) { - II_FLAG_SETX(DSW_SHDIMPORT, ip); - bm_header->ii_state |= DSW_SHDIMPORT; - } - if (resized) { - II_FLAG_SETX(DSW_RESIZED, ip); - bm_header->ii_state |= DSW_RESIZED; - } - bm_header->ii_type = (uconf.flag & DSW_GOLDEN) ? - DSW_GOLDEN_TYPE : DSW_QUICK_TYPE; - bm_header->ii_magic = DSW_DIRTY; - bm_header->ii_version = II_HEADER_VERSION; - bm_header->ii_shdfba = DSW_SHD_BM_OFFSET; - bm_header->ii_copyfba = DSW_COPY_BM_OFFSET; - bm_header->ii_throttle_delay = ip->bi_throttle_delay; - bm_header->ii_throttle_unit = ip->bi_throttle_unit; - ip->bi_shdfba = bm_header->ii_shdfba; - ip->bi_copyfba = bm_header->ii_copyfba; - ip->bi_mtime = ddi_get_time(); - - /* write it to disk */ - mutex_enter(&ip->bi_mutex); - rc = _ii_bm_header_put(bm_header, ip, tmp); - mutex_exit(&ip->bi_mutex); - if (!II_SUCCESS(rc)) { - spcs_s_add(kstatus, rc); - rc = DSW_EHDRBMP; - _ii_rlse_devs(ip, rtype); - _ii_reserve_end(ip); - goto fail; - } - if ((shd_size < mst_size) && (uconf.flag & DSW_GOLDEN) == 0) { - /* - * shadow volume smaller than master, must use a dependent - * copy with a bitmap file stored mapping for chunk locations. - */ - /* number of chunks in shadow volume */ - nsc_size_t shd_chunks; - nsc_size_t bmp_chunks; - nsc_size_t tmp_chunks; - - if (ii_debug > 1) - cmn_err(CE_NOTE, "!ii: using tree index on %s", - uconf.master_vol); - shd_chunks = shd_size / DSW_SIZE; - /* do not add in partial chunk at end */ - - ip->bi_mstchks = mst_size / DSW_SIZE; - if (mst_size % DSW_SIZE != 0) - ip->bi_mstchks++; - bmp_chunks = ii_btsize(bmp_size - ip->bi_copyfba - - DSW_BM_FBA_LEN(ip->bi_size)); - tmp_chunks = ip->bi_copyfba + - DSW_BM_FBA_LEN(ip->bi_size); - if (bmp_chunks < (nsc_size_t)ip->bi_mstchks) { - if (ii_debug > -1) { - cmn_err(CE_NOTE, "!ii: bitmap vol too" - "small: %" NSC_SZFMT " vs. %" - NSC_SZFMT, bmp_size, - tmp_chunks); - } - spcs_s_add(kstatus, rc); - rc = DSW_EHDRBMP; - _ii_rlse_devs(ip, rtype); - _ii_reserve_end(ip); - goto fail; - } - mutex_enter(&ip->bi_mutex); - II_FLAG_SET(DSW_TREEMAP, ip); - mutex_exit(&ip->bi_mutex); - - /* following values are written to header by ii_tinit */ -#if (defined(NSC_MULTI_TERABYTE) && !defined(II_MULTIMULTI_TERABYTE)) - ASSERT(shd_chunks <= INT32_MAX); - ASSERT(mst_size / DSW_SIZE <= INT32_MAX); -#endif - ip->bi_mstchks = mst_size / DSW_SIZE; - if (mst_size % DSW_SIZE != 0) - ip->bi_mstchks++; -#ifdef II_MULTIMULTI_TERABYTE - ip->bi_shdchks = shd_chunks; -#else - /* still have 31 bit chunkid's */ - ip->bi_shdchks = (chunkid_t)shd_chunks; -#endif - ip->bi_shdchkused = 0; - rc = ii_tinit(ip); - } else { - ip->bi_shdchks = shd_size / DSW_SIZE; - ip->bi_shdchkused = 0; - } - if (rc == 0) - rc = II_LOAD_BMP(ip, 1); - if (rc == 0) - rc = II_ZEROBM(ip); - if (rc == 0) - rc = II_COPYBM(ip); /* also clear copy bitmap */ - if (rc == 0 && (uconf.flag & DSW_GOLDEN) && !import) - rc = ii_fill_copy_bmp(ip); - if (rc) { - spcs_s_add(kstatus, rc); - rc = DSW_EHDRBMP; - _ii_rlse_devs(ip, rtype); - goto fail; - } - /* check that changing shadow won't upset RDC */ - if (ii_update_denied(ip, kstatus, 0, 1)) { - rc = DSW_EOPACKAGE; - _ii_rlse_devs(ip, rtype); - _ii_reserve_end(ip); - goto fail; - } - ip->bi_disabled = 0; /* all okay and ready, we can go now */ - _ii_rlse_devs(ip, rtype); - /* no _ii_reserve_end() here - we must register first */ - ip->bi_bmp_tok = _ii_register_path(ii_pathname(ip->bi_bmpfd), - NSC_CACHE|NSC_DEVICE, _ii_io); - if (!nshadows) - ii_register_mst(ip); - ii_register_shd(ip); - - if (!ii_register_ok(ip)) { - ip->bi_disabled = 1; /* argh */ - rc = DSW_EREGISTER; - goto fail; - } - /* no _ii_reserve_begin() here -- we're still in process */ - (void) _ii_rsrv_devs(ip, rtype, II_INTERNAL); - - if (ii_debug > 0) - cmn_err(CE_NOTE, "!ii: config: master %s shadow %s", - uconf.master_vol, uconf.shadow_vol); - rc = 0; - if ((uconf.flag & DSW_GOLDEN) && !import) { - mutex_enter(&ip->bi_mutex); - II_FLAG_SET(DSW_COPYINGM | DSW_COPYINGP, ip); - ip->bi_ioctl++; /* we are effectively in an ioctl */ - mutex_exit(&ip->bi_mutex); - rc = _ii_copyvol(ip, 0, rtype, kstatus, 1); - } - _ii_rlse_devs(ip, rtype); - _ii_reserve_end(ip); - - ++iigkstat.num_sets.value.ul; - - return (spcs_s_ocopyoutf(&kstatus, uconf.status, rc)); - } - - ip->bi_shdchks = shd_size / DSW_SIZE; - ip->bi_shdfba = shdfba; - ip->bi_copyfba = copyfba; - rc = II_LOAD_BMP(ip, 0); /* reload saved bitmap */ - mutex_enter(&ip->bi_mutex); - if (rc == 0) - bm_header = _ii_bm_header_get(ip, &tmp); - mutex_exit(&ip->bi_mutex); - if (rc || bm_header == NULL) { - if (existing) { - goto no_more_bmp_tests; - } - rc = DSW_EHDRBMP; - goto fail; - } - - /* - * If the header is dirty and it wasn't kept on persistent storage - * then the bitmaps must be assumed to be bad. - */ - if (bm_header->ii_magic == DSW_DIRTY && - ip->bi_bitmap_ops != &alloc_buf_bmp) { - type = bm_header->ii_type; - _ii_bm_header_free(bm_header, ip, tmp); - if (type == DSW_GOLDEN_TYPE) { - if ((ip->bi_flags & DSW_COPYINGM) != 0) - _ii_error(ip, DSW_SHDOFFLINE); - else if ((ip->bi_flags & DSW_COPYINGS) != 0) - _ii_error(ip, DSW_MSTOFFLINE); - else { - /* No copying, so they're just different */ - rc = ii_fill_copy_bmp(ip); - if (rc) { - spcs_s_add(kstatus, rc); - rc = DSW_EHDRBMP; - goto fail; - } - } - } else - _ii_error(ip, DSW_SHDOFFLINE); - - mutex_enter(&ip->bi_mutex); - bm_header = _ii_bm_header_get(ip, &tmp); - mutex_exit(&ip->bi_mutex); - if (bm_header == NULL) { - rc = DSW_EHDRBMP; - goto fail; - } - } - - bm_header->ii_magic = DSW_DIRTY; - mutex_enter(&ip->bi_mutex); - rc = _ii_bm_header_put(bm_header, ip, tmp); - mutex_exit(&ip->bi_mutex); - if (!II_SUCCESS(rc)) { - spcs_s_add(kstatus, rc); - rc = DSW_EHDRBMP; - goto fail; - } - - ip->bi_bmp_tok = _ii_register_path(ii_pathname(ip->bi_bmpfd), - NSC_CACHE|NSC_DEVICE, _ii_io); -no_more_bmp_tests: - _ii_rlse_devs(ip, rtype); - ip->bi_disabled = 0; /* all okay and ready, we can go now */ - if (!nshadows) - ii_register_mst(ip); - if ((ip->bi_flags & DSW_SHDEXPORT) == 0) - ii_register_shd(ip); - - if (!ii_register_ok(ip)) { - rc = DSW_EREGISTER; - goto fail; - } - _ii_reserve_end(ip); - - if (ii_debug > 0) - cmn_err(CE_NOTE, "!ii: config: master %s shadow %s", - uconf.master_vol, uconf.shadow_vol); - - rc = 0; - if (ip->bi_flags & DSW_COPYINGP) { - /* Copy was in progress, so continue it */ - (void) _ii_rsrv_devs(ip, rtype, II_INTERNAL); - mutex_enter(&ip->bi_mutex); - ip->bi_ioctl++; /* we are effectively in an ioctl */ - mutex_exit(&ip->bi_mutex); - rc = _ii_copyvol(ip, ((ip->bi_flags & DSW_COPYINGS) != 0) ? - CV_SHD2MST : 0, rtype, kstatus, 0); - } - - ++iigkstat.num_sets.value.ul; - - return (spcs_s_ocopyoutf(&kstatus, uconf.status, rc)); - -fail: - /* remove ip from _ii_info_top linked list */ - mutex_enter(&_ii_info_mutex); - for (ipp = &_ii_info_top; *ipp; ipp = &((*ipp)->bi_next)) { - if (ip == *ipp) { - *ipp = ip->bi_next; - break; - } - } - mutex_exit(&_ii_info_mutex); - ii_sibling_free(ip); - - return (spcs_s_ocopyoutf(&kstatus, uconf.status, rc)); -} - -static int -_ii_perform_disable(char *setname, spcs_s_info_t *kstatusp, int reclaim) -{ - _ii_info_t **xip, *ip; - _ii_overflow_t *op; - nsc_buf_t *tmp = NULL; - int rc; - ii_header_t *bm_header; - int rtype; - - mutex_enter(&_ii_info_mutex); - ip = _ii_find_set(setname); - if (ip == NULL) { - mutex_exit(&_ii_info_mutex); - return (DSW_ENOTFOUND); - } - - if ((ip->bi_flags & DSW_GOLDEN) && - ((ip->bi_flags & DSW_COPYINGP) != 0)) { - /* - * Cannot disable an independent copy while still copying - * as it means that a data dependency exists. - */ - mutex_exit(&_ii_info_mutex); - _ii_ioctl_done(ip); - mutex_exit(&ip->bi_mutex); - DTRACE_PROBE(_ii_perform_disable_end_DSW_EDEPENDENCY); - return (DSW_EDEPENDENCY); - } - - if ((ip->bi_flags & DSW_GOLDEN) == 0 && - ii_update_denied(ip, *kstatusp, 0, 1)) { - /* Cannot disable a dependent shadow while RDC is unsure */ - mutex_exit(&_ii_info_mutex); - _ii_ioctl_done(ip); - mutex_exit(&ip->bi_mutex); - DTRACE_PROBE(DSW_EOPACKAGE); - return (DSW_EOPACKAGE); - } - - if (((ip->bi_flags & DSW_RESIZED) == DSW_RESIZED) && - ii_need_same_size(ip)) { - /* We can't disable the set whilst RDC is using it */ - mutex_exit(&_ii_info_mutex); - _ii_ioctl_done(ip); - mutex_exit(&ip->bi_mutex); - cmn_err(CE_WARN, "!Cannot disable II set: would change " - "volume size on RDC"); - DTRACE_PROBE(DSW_EOPACKAGE_resize); - return (DSW_EOPACKAGE); - } - - ip->bi_disabled = 1; - if (NSHADOWS(ip) && (ip->bi_master == ip)) { - ip->bi_flags &= (~DSW_COPYING); - ip->bi_state |= DSW_MULTIMST; - } - mutex_exit(&_ii_info_mutex); - - _ii_ioctl_done(ip); - mutex_exit(&ip->bi_mutex); - - _ii_stopvol(ip); - - rtype = SHDR|BMP; - if ((rc = _ii_rsrv_devs(ip, rtype, II_INTERNAL)) != 0) { - spcs_s_add(*kstatusp, rc); - DTRACE_PROBE(DSW_ERSRVFAIL); - return (DSW_ERSRVFAIL); - } - - if ((ii_header < 128) && - (((ip->bi_flags & DSW_GOLDEN) == 0) || - (ip->bi_flags & DSW_COPYING))) { - /* - * Not a full copy so attempt to prevent use of partial copy - * by clearing where the first ufs super-block would be - * located. Solaris often incorporates the disk header into - * the start of the first slice, so avoid clearing the very - * first 16 blocks of the volume. - */ - - if (ii_debug > 1) - cmn_err(CE_NOTE, "!ii: Shadow copy invalidated"); - II_READ_START(ip, shadow); - rc = nsc_alloc_buf(SHDFD(ip), ii_header, 128 - ii_header, - NSC_RDWRBUF, &tmp); - II_READ_END(ip, shadow, rc, 128 - ii_header); - if (II_SUCCESS(rc)) { - rc = nsc_zero(tmp, ii_header, 128 - ii_header, 0); - if (II_SUCCESS(rc)) { - II_NSC_WRITE(ip, shadow, rc, tmp, ii_header, - (128 - ii_header), 0); - } - } - if (tmp) - (void) nsc_free_buf(tmp); - if (!II_SUCCESS(rc)) - _ii_error(ip, DSW_SHDOFFLINE); - } - - /* this rw_enter forces us to drain all active IO */ - rw_enter(&ip->bi_linkrw, RW_WRITER); - rw_exit(&ip->bi_linkrw); - - /* remove ip from _ii_info_top linked list */ - mutex_enter(&_ii_info_mutex); - for (xip = &_ii_info_top; *xip; xip = &((*xip)->bi_next)) { - if (ip == *xip) { - *xip = ip->bi_next; - break; - } - } - if (ip->bi_kstat) { - kstat_delete(ip->bi_kstat); - ip->bi_kstat = NULL; - } - mutex_exit(&_ii_info_mutex); - - rc = II_SAVE_BMP(ip, 1); - mutex_enter(&ip->bi_mutex); - if (rc == 0) - bm_header = _ii_bm_header_get(ip, &tmp); - if (rc == 0 && bm_header) { - if (ii_debug > 1) - cmn_err(CE_NOTE, "!ii: Invalid header written"); - bm_header->ii_magic = DSW_INVALID; - /* write it to disk */ - (void) _ii_bm_header_put(bm_header, ip, tmp); - } - mutex_exit(&ip->bi_mutex); - - op = ip->bi_overflow; - if (op && (reclaim == -1)) { - reclaim = (op->ii_drefcnt == 1? NO_RECLAIM : RECLAIM); - } - - if ((op != NULL) && (op->ii_hversion >= 1) && - (op->ii_hmagic == II_OMAGIC)) { - mutex_enter(&_ii_overflow_mutex); - if (ip->bi_flags & DSW_OVRHDRDRTY) { - mutex_enter(&ip->bi_mutex); - ip->bi_flags &= ~DSW_OVRHDRDRTY; - mutex_exit(&ip->bi_mutex); - ASSERT(op->ii_urefcnt > 0); - op->ii_urefcnt--; - } - if (op->ii_urefcnt == 0) { - op->ii_flags &= ~IIO_CNTR_INVLD; - op->ii_unused = op->ii_nchunks - 1; - } - mutex_exit(&_ii_overflow_mutex); - } - ii_overflow_free(ip, reclaim); - _ii_rlse_devs(ip, rtype); - - ii_sibling_free(ip); - - --iigkstat.num_sets.value.ul; - return (0); -} - -/* - * _ii_disable - * Deconfigures an II pair - * - * Calling/Exit State: - * Returns 0 if the pair was disabled. Otherwise an error code - * is returned and any additional error information is copied - * out to the user. - * - * Description: - * Reads the user configuration structure and attempts to - * deconfigure that pairing based on the master device pathname. - */ - -int -_ii_disable(intptr_t arg, int ilp32, int *rvp) -{ - dsw_ioctl_t uparms; - dsw_ioctl32_t uparms32; - _ii_overflow_t *op; - int rc, rerr; - spcs_s_info_t kstatus; - uint64_t hash; - int reclaim; - _ii_lsthead_t *oldhead, **head; - _ii_lstinfo_t *np, **xnp, *oldp; - - *rvp = 0; - - if (ilp32) { - if (copyin((void *)arg, &uparms32, sizeof (uparms32)) < 0) - return (EFAULT); - II_TAIL_COPY(uparms, uparms32, shadow_vol, dsw_ioctl_t); - uparms.status = (spcs_s_info_t)uparms32.status; - } else if (copyin((void *)arg, &uparms, sizeof (uparms)) < 0) - return (EFAULT); - - kstatus = spcs_s_kcreate(); - if (kstatus == NULL) - return (ENOMEM); - - if (!uparms.shadow_vol[0]) - return (spcs_s_ocopyoutf(&kstatus, uparms.status, DSW_EEMPTY)); - - DTRACE_PROBE2(_ii_disable_info, char *, uparms.shadow_vol, - int, uparms.flags); - - /* group or single set? */ - if (uparms.flags & CV_IS_GROUP) { - hash = nsc_strhash(uparms.shadow_vol); - mutex_enter(&_ii_group_mutex); - for (head = &_ii_group_top; *head; - head = &((*head)->lst_next)) { - if ((hash == (*head)->lst_hash) && - strncmp((*head)->lst_name, uparms.shadow_vol, - DSW_NAMELEN) == 0) - break; - } - - if (!*head) { - mutex_exit(&_ii_group_mutex); - return (spcs_s_ocopyoutf(&kstatus, uparms.status, - DSW_EGNOTFOUND)); - } - - /* clear any overflow vol usage counts */ - for (np = (*head)->lst_start; np; np = np->lst_next) { - if (np->lst_ip->bi_overflow) { - np->lst_ip->bi_overflow->ii_detachcnt = 0; - } - } - - /* now increment */ - for (np = (*head)->lst_start; np; np = np->lst_next) { - if (np->lst_ip->bi_overflow) { - ++np->lst_ip->bi_overflow->ii_detachcnt; - } - } - - /* finally, disable all group members */ - rerr = 0; - xnp = &(*head)->lst_start; - while (*xnp) { - op = (*xnp)->lst_ip->bi_overflow; - if (op) { - reclaim = (op->ii_drefcnt == op->ii_detachcnt? - NO_RECLAIM : RECLAIM); - --op->ii_detachcnt; - } - - /* clear out the group pointer */ - (*xnp)->lst_ip->bi_group = NULL; - - rc = _ii_perform_disable((*xnp)->lst_ip->bi_keyname, - &kstatus, reclaim); - if (rc) { - /* restore group name */ - (*xnp)->lst_ip->bi_group = (*head)->lst_name; - - /* restore detachcnt */ - if (op) { - ++op->ii_detachcnt; - } - - /* don't delete branch */ - ++rerr; - spcs_s_add(kstatus, rc); - - /* move forward in linked list */ - xnp = &(*xnp)->lst_next; - } else { - oldp = (*xnp); - *xnp = (*xnp)->lst_next; - kmem_free(oldp, sizeof (_ii_lstinfo_t)); - } - } - if (rerr) { - mutex_exit(&_ii_group_mutex); - return (spcs_s_ocopyoutf(&kstatus, uparms.status, - DSW_EDISABLE)); - } - /* no errors, all sets disabled, OK to free list head */ - oldhead = *head; - *head = (*head)->lst_next; - kmem_free(oldhead, sizeof (_ii_lsthead_t)); - mutex_exit(&_ii_group_mutex); - } else { - /* only a single set is being disabled */ - rc = _ii_perform_disable(uparms.shadow_vol, &kstatus, -1); - if (rc) - return (spcs_s_ocopyoutf(&kstatus, uparms.status, rc)); - } - - spcs_s_kfree(kstatus); - - return (0); -} - - -/* - * _ii_stat - * Get state of the shadow. - * - * Calling/Exit State: - * Returns 0 on success, otherwise an error code is returned - * and any additional error information is copied out to the user. - * The size variable in the dsw_stat_t is set to the FBA size - * of the volume, the stat variable is set to the state, and - * the structure is copied out. - */ -/*ARGSUSED*/ -int -_ii_stat(intptr_t arg, int ilp32, int *rvp) -{ - dsw_stat_t ustat; - dsw_stat32_t ustat32; - _ii_info_t *ip; - spcs_s_info_t kstatus; - char *group, *cluster; - - if (ilp32) { - if (copyin((void *)arg, &ustat32, sizeof (ustat32)) < 0) - return (EFAULT); - II_TAIL_COPY(ustat, ustat32, shadow_vol, dsw_stat_t); - ustat.status = (spcs_s_info_t)ustat32.status; - } else if (copyin((void *)arg, &ustat, sizeof (ustat)) < 0) - return (EFAULT); - - kstatus = spcs_s_kcreate(); - if (kstatus == NULL) - return (ENOMEM); - - if (!ustat.shadow_vol[0]) - return (spcs_s_ocopyoutf(&kstatus, ustat.status, DSW_EEMPTY)); - - mutex_enter(&_ii_info_mutex); - ip = _ii_find_set(ustat.shadow_vol); - mutex_exit(&_ii_info_mutex); - if (ip == NULL) - return (spcs_s_ocopyoutf(&kstatus, ustat.status, - DSW_ENOTFOUND)); - - ustat.stat = ip->bi_flags; - ustat.size = ip->bi_size; - ustat.mtime = ip->bi_mtime; - - if (ilp32) - bzero(ustat32.overflow_vol, DSW_NAMELEN); - else - bzero(ustat.overflow_vol, DSW_NAMELEN); - if (ip->bi_overflow) { - (void) strncpy(ilp32 ? ustat32.overflow_vol : - ustat.overflow_vol, ip->bi_overflow->ii_volname, - DSW_NAMELEN); - } - - ustat.shdsize = ip->bi_shdchks; - if ((ip->bi_flags) & DSW_TREEMAP) { - ustat.shdused = ip->bi_shdchkused; - } else { - ustat.shdused = 0; - } - - /* copy over group and cluster associations */ - group = ilp32? ustat32.group_name : ustat.group_name; - cluster = ilp32? ustat32.cluster_tag : ustat.cluster_tag; - bzero(group, DSW_NAMELEN); - bzero(cluster, DSW_NAMELEN); - if (ip->bi_group) - (void) strncpy(group, ip->bi_group, DSW_NAMELEN); - if (ip->bi_cluster) - (void) strncpy(cluster, ip->bi_cluster, DSW_NAMELEN); - - _ii_ioctl_done(ip); - mutex_exit(&ip->bi_mutex); - - spcs_s_kfree(kstatus); - if (ilp32) { - ustat32.stat = ustat.stat; - ustat32.size = ustat.size; - ustat32.shdsize = ustat.shdsize; - ustat32.shdused = ustat.shdused; - ustat32.mtime = ustat.mtime; - if (copyout(&ustat32, (void *)arg, sizeof (ustat32))) - return (EFAULT); - } else if (copyout(&ustat, (void *)arg, sizeof (ustat))) - return (EFAULT); - - return (0); -} - - -/* - * _ii_list - * List what shadow sets are currently configured. - * - * Calling/Exit State: - * Returns 0 on success, otherwise an error code is returned - * and any additional error information is copied out to the user. - */ -/*ARGSUSED*/ -int -_ii_list(intptr_t arg, int ilp32, int *rvp) -{ - dsw_list_t ulist; - dsw_list32_t ulist32; - _ii_info_t *ip; - dsw_config_t cf, *cfp; - dsw_config32_t cf32, *cf32p; - int rc; - int used; - spcs_s_info_t kstatus; - - if (ilp32) { - if (copyin((void *)arg, &ulist32, sizeof (ulist32)) < 0) - return (EFAULT); - II_TAIL_COPY(ulist, ulist32, list_size, dsw_list_t); - ulist.status = (spcs_s_info_t)ulist32.status; - } else if (copyin((void *)arg, &ulist, sizeof (ulist)) < 0) - return (EFAULT); - - kstatus = spcs_s_kcreate(); - if (kstatus == NULL) - return (ENOMEM); - - cf32p = (dsw_config32_t *)(unsigned long)ulist32.list; - cfp = ulist.list; - ulist.list_used = 0; - mutex_enter(&_ii_info_mutex); - ip = _ii_info_top; - - DTRACE_PROBE1(_ii_list_count, int, ulist.list_size); - - for (rc = used = 0; used < ulist.list_size && ip; ip = ip->bi_next) { - - if (ip->bi_disabled) - continue; - - mutex_enter(&ip->bi_mutex); - ip->bi_ioctl++; - if (ilp32) { - bzero(&cf32, sizeof (cf32)); - cf32.flag = ip->bi_flags; - (void) strncpy(cf32.master_vol, - ii_pathname(ip->bi_mstfd), DSW_NAMELEN); - (void) strncpy(cf32.shadow_vol, - ip->bi_keyname, DSW_NAMELEN); - (void) strncpy(cf32.bitmap_vol, (ip->bi_bmpfd) - ? ii_pathname(ip->bi_bmpfd) - : "<offline_bitmap>", DSW_NAMELEN); - if (copyout(&cf32, (void *)cf32p, sizeof (cf32))) - rc = EFAULT; - cf32p++; - } else { - bzero(&cf, sizeof (cf)); - cf.flag = ip->bi_flags; - (void) strncpy(cf.master_vol, - ii_pathname(ip->bi_mstfd), DSW_NAMELEN); - (void) strncpy(cf.shadow_vol, - ip->bi_keyname, DSW_NAMELEN); - (void) strncpy(cf.bitmap_vol, (ip->bi_bmpfd) - ? ii_pathname(ip->bi_bmpfd) - : "<offline_bitmap>", DSW_NAMELEN); - if (copyout(&cf, (void *)cfp, sizeof (cf))) - rc = EFAULT; - cfp++; - } - _ii_ioctl_done(ip); - mutex_exit(&ip->bi_mutex); - used++; - } - mutex_exit(&_ii_info_mutex); - - spcs_s_kfree(kstatus); - if (rc) - return (rc); - - ulist.list_used = used; - if (ilp32) { - ulist32.list_used = ulist.list_used; - if (copyout(&ulist32, (void *)arg, sizeof (ulist32))) - return (EFAULT); - } else if (copyout(&ulist, (void *)arg, sizeof (ulist))) - return (EFAULT); - - return (0); -} - -/* - * _ii_listlen - * Counts the number of items the DSWIOC_LIST and DSWIOC_OLIST - * ioctl calls would return. - * - * Calling/Exit State: - * Returns 0 on success, otherwise an error code is returned. - * Result is returned as successful ioctl value. - */ -/*ARGSUSED*/ -int -_ii_listlen(int cmd, int ilp32, int *rvp) -{ - _ii_info_t *ip; - _ii_overflow_t *op; - int count = 0; - - switch (cmd) { - - case DSWIOC_LISTLEN: - mutex_enter(&_ii_info_mutex); - for (ip = _ii_info_top; ip; ip = ip->bi_next) { - if (ip->bi_disabled == 0) { - count++; - } - } - mutex_exit(&_ii_info_mutex); - break; - case DSWIOC_OLISTLEN: - mutex_enter(&_ii_overflow_mutex); - for (op = _ii_overflow_top; op; op = op->ii_next) - count++; - mutex_exit(&_ii_overflow_mutex); - break; - default: - return (EINVAL); - } - *rvp = count; - - return (0); -} - -/* - * _ii_report_bmp - * - * Report to the user daemon that the bitmap has gone bad - */ -static int -_ii_report_bmp(_ii_info_t *ip) -{ - int rc; - struct nskernd *nsk; - - nsk = kmem_zalloc(sizeof (*nsk), KM_SLEEP); - if (!nsk) { - return (ENOMEM); - } - nsk->command = NSKERND_IIBITMAP; - nsk->data1 = (int64_t)(ip->bi_flags | DSW_BMPOFFLINE); - (void) strncpy(nsk->char1, ip->bi_keyname, - min(DSW_NAMELEN, NSC_MAXPATH)); - - rc = nskernd_get(nsk); - if (rc == 0) { - rc = (int)nsk->data1; - } - if (rc == 0) { - DTRACE_PROBE(_ii_report_bmp_end); - } else { - DTRACE_PROBE1(_ii_report_bmp_end_2, int, rc); - } - kmem_free(nsk, sizeof (*nsk)); - return (rc); -} - -/* - * _ii_offline - * Set volume offline flag(s) for a shadow. - * - * Calling/Exit State: - * Returns 0 on success, otherwise an error code is returned - * and any additional error information is copied out to the user. - */ -/*ARGSUSED*/ -int -_ii_offline(intptr_t arg, int ilp32, int *rvp) -{ - dsw_ioctl_t uparms; - dsw_ioctl32_t uparms32; - _ii_info_t *ip; - int rc; - spcs_s_info_t kstatus; - - if (ilp32) { - if (copyin((void *)arg, &uparms32, sizeof (uparms32)) < 0) - return (EFAULT); - II_TAIL_COPY(uparms, uparms32, shadow_vol, dsw_ioctl_t); - uparms.status = (spcs_s_info_t)uparms32.status; - } else if (copyin((void *)arg, &uparms, sizeof (uparms)) < 0) - return (EFAULT); - - kstatus = spcs_s_kcreate(); - if (kstatus == NULL) - return (ENOMEM); - - if (!uparms.shadow_vol[0]) - return (spcs_s_ocopyoutf(&kstatus, uparms.status, DSW_EEMPTY)); - - mutex_enter(&_ii_info_mutex); - ip = _ii_find_set(uparms.shadow_vol); - mutex_exit(&_ii_info_mutex); - if (ip == NULL) - return (spcs_s_ocopyoutf(&kstatus, uparms.status, - DSW_ENOTFOUND)); - - if ((rc = _ii_rsrv_devs(ip, BMP, II_INTERNAL)) != 0) { - _ii_ioctl_done(ip); - mutex_exit(&ip->bi_mutex); - spcs_s_add(kstatus, rc); - return (spcs_s_ocopyoutf(&kstatus, uparms.status, - DSW_ERSRVFAIL)); - } - - mutex_exit(&ip->bi_mutex); - _ii_error(ip, uparms.flags & DSW_OFFLINE); - mutex_enter(&ip->bi_mutex); - _ii_ioctl_done(ip); - mutex_exit(&ip->bi_mutex); - - _ii_rlse_devs(ip, BMP); - - spcs_s_kfree(kstatus); - - return (0); -} - - -/* - * _ii_wait - * Wait for a copy to complete. - * - * Calling/Exit State: - * Returns 0 if the copy completed, otherwise error code. - * - */ -/*ARGSUSED*/ -int -_ii_wait(intptr_t arg, int ilp32, int *rvp) -{ - dsw_ioctl_t uparms; - dsw_ioctl32_t uparms32; - _ii_info_t *ip; - int rc = 0; - spcs_s_info_t kstatus; - - if (ilp32) { - if (copyin((void *)arg, &uparms32, sizeof (uparms32)) < 0) - return (EFAULT); - II_TAIL_COPY(uparms, uparms32, shadow_vol, dsw_ioctl_t); - uparms.status = (spcs_s_info_t)uparms32.status; - uparms.pid = uparms32.pid; - } else if (copyin((void *)arg, &uparms, sizeof (uparms)) < 0) - return (EFAULT); - - kstatus = spcs_s_kcreate(); - if (kstatus == NULL) - return (ENOMEM); - - if (!uparms.shadow_vol[0]) - return (spcs_s_ocopyoutf(&kstatus, uparms.status, DSW_EEMPTY)); - - mutex_enter(&_ii_info_mutex); - ip = _ii_find_set(uparms.shadow_vol); - mutex_exit(&_ii_info_mutex); - if (ip == NULL) - return (spcs_s_ocopyoutf(&kstatus, uparms.status, - DSW_ENOTFOUND)); - - while (ip->bi_flags & DSW_COPYINGP) { - if (cv_wait_sig(&ip->bi_copydonecv, &ip->bi_mutex) == 0) { - /* Awoken by a signal */ - rc = EINTR; - break; - } - } - - /* Is this an attempt to unlock the copy/update PID? */ - if (uparms.flags & CV_LOCK_PID) { - if (ip->bi_locked_pid == 0) { - rc = DSW_ENOTLOCKED; - } else if (uparms.pid == -1) { - cmn_err(CE_WARN, "!ii: Copy/Update PID %d, cleared", - ip->bi_locked_pid); - ip->bi_locked_pid = 0; - } else if (uparms.pid != ip->bi_locked_pid) { - rc = DSW_EINUSE; - } else { - ip->bi_locked_pid = 0; - } - } - - _ii_ioctl_done(ip); - mutex_exit(&ip->bi_mutex); - - return (spcs_s_ocopyoutf(&kstatus, uparms.status, rc)); -} - - -static int -_ii_reset_mstvol(_ii_info_t *ip) -{ - _ii_info_t *xip; - - if (!NSHADOWS(ip)) - return (DSW_COPYINGS | DSW_COPYINGP); - - /* check for siblings updating master */ - for (xip = ip->bi_head; xip; xip = xip->bi_sibling) { - if (xip == ip) - continue; - /* check if master is okay */ - if ((xip->bi_flags & DSW_MSTOFFLINE) == 0) { - return (0); - } - } - - return (DSW_COPYINGS | DSW_COPYINGP); -} - -/* - * _ii_reset - * Reset offlined underlying volumes - * - * Calling/Exit State: - * Returns 0 on success, otherwise an error code is returned - * and any additional error information is copied out to the user. - */ -/*ARGSUSED*/ -int -_ii_reset(intptr_t arg, int ilp32, int *rvp) -{ - dsw_ioctl_t uparms; - dsw_ioctl32_t uparms32; - _ii_info_t *ip; - nsc_buf_t *tmp = NULL; - int rc; - int flags; - ii_header_t *bm_header; - spcs_s_info_t kstatus; - int rtype; - - if (ilp32) { - if (copyin((void *)arg, &uparms32, sizeof (uparms32)) < 0) - return (EFAULT); - II_TAIL_COPY(uparms, uparms32, shadow_vol, dsw_ioctl_t); - uparms.status = (spcs_s_info_t)uparms32.status; - } else if (copyin((void *)arg, &uparms, sizeof (uparms)) < 0) - return (EFAULT); - - kstatus = spcs_s_kcreate(); - if (kstatus == NULL) - return (ENOMEM); - - if (!uparms.shadow_vol[0]) - return (spcs_s_ocopyoutf(&kstatus, uparms.status, DSW_EEMPTY)); - - mutex_enter(&_ii_info_mutex); - ip = _ii_find_set(uparms.shadow_vol); - mutex_exit(&_ii_info_mutex); - if (ip == NULL) - return (spcs_s_ocopyoutf(&kstatus, uparms.status, - DSW_ENOTFOUND)); - - mutex_exit(&ip->bi_mutex); - - /* Figure out what to do according to what was flagged as */ - - if ((ip->bi_flags & DSW_OFFLINE) == 0) { - /* Nothing offline, so no op */ - mutex_enter(&ip->bi_mutex); - _ii_ioctl_done(ip); - mutex_exit(&ip->bi_mutex); - spcs_s_kfree(kstatus); - return (0); - } - - if (!ip->bi_bmpfd) { - /* No bitmap fd, can't do anything */ - mutex_enter(&ip->bi_mutex); - _ii_ioctl_done(ip); - mutex_exit(&ip->bi_mutex); - spcs_s_kfree(kstatus); - return (DSW_EHDRBMP); - } - - rtype = MSTR|SHDR|BMP; - if ((rc = _ii_rsrv_devs(ip, rtype, II_INTERNAL)) != 0) { - mutex_enter(&ip->bi_mutex); - _ii_ioctl_done(ip); - mutex_exit(&ip->bi_mutex); - spcs_s_add(kstatus, rc); - return (spcs_s_ocopyoutf(&kstatus, uparms.status, - DSW_ERSRVFAIL)); - } - - /* - * Cannot use _ii_bm_header_get as it will fail if DSW_BMPOFFLINE - */ - II_READ_START(ip, bitmap); - rc = nsc_alloc_buf(ip->bi_bmpfd, 0, FBA_LEN(sizeof (ii_header_t)), - NSC_RDWRBUF, &tmp); - II_READ_END(ip, bitmap, rc, FBA_LEN(sizeof (ii_header_t))); - if (!II_SUCCESS(rc)) { - _ii_rlse_devs(ip, rtype); - mutex_enter(&ip->bi_mutex); - _ii_ioctl_done(ip); - mutex_exit(&ip->bi_mutex); - if (tmp) - (void) nsc_free_buf(tmp); - _ii_error(ip, DSW_BMPOFFLINE); - spcs_s_add(kstatus, rc); - return (spcs_s_ocopyoutf(&kstatus, uparms.status, DSW_EHDRBMP)); - } - - bm_header = (ii_header_t *)(tmp)->sb_vec[0].sv_addr; - if (bm_header == NULL) { - _ii_rlse_devs(ip, rtype); - mutex_enter(&ip->bi_mutex); - _ii_ioctl_done(ip); - mutex_exit(&ip->bi_mutex); - if (tmp) - (void) nsc_free_buf(tmp); - return (spcs_s_ocopyoutf(&kstatus, uparms.status, DSW_EHDRBMP)); - } - - flags = ip->bi_flags & ~DSW_COPY_FLAGS; - if ((flags & (DSW_SHDIMPORT|DSW_SHDEXPORT)) == 0) { - if (((flags & DSW_SHDOFFLINE) == 0) && - ((flags & DSW_MSTOFFLINE) == DSW_MSTOFFLINE)) { - /* Shadow was OK but master was offline */ - flags |= _ii_reset_mstvol(ip); - } else if ((flags & DSW_SHDOFFLINE) == DSW_SHDOFFLINE) { - /* Shadow was offline, don't care what the master was */ - flags |= (DSW_COPYINGM | DSW_COPYINGP); - } - } - if (ip->bi_flags & DSW_VOVERFLOW) { - ip->bi_flags &= ~DSW_VOVERFLOW; - ip->bi_flags |= DSW_FRECLAIM; - } - flags &= ~(DSW_OFFLINE | DSW_CFGOFFLINE | DSW_VOVERFLOW | DSW_OVERFLOW); - if ((ip->bi_flags & DSW_BMPOFFLINE) == DSW_BMPOFFLINE) { - /* free any overflow allocation */ - ii_overflow_free(ip, INIT_OVR); - /* Bitmap now OK, so set up new bitmap header */ - (void) strncpy(bm_header->master_vol, ii_pathname(ip->bi_mstfd), - DSW_NAMELEN); - (void) strncpy(bm_header->shadow_vol, ii_pathname(ip->bi_shdfd), - DSW_NAMELEN); - (void) strncpy(bm_header->bitmap_vol, ii_pathname(ip->bi_bmpfd), - DSW_NAMELEN); - if (ip->bi_cluster) { - (void) strncpy(bm_header->clstr_name, ip->bi_cluster, - DSW_NAMELEN); - } - if (ip->bi_group) { - (void) strncpy(bm_header->group_name, ip->bi_group, - DSW_NAMELEN); - } - bm_header->ii_type = (flags & DSW_GOLDEN) ? - DSW_GOLDEN_TYPE : DSW_QUICK_TYPE; - bm_header->ii_magic = DSW_DIRTY; - bm_header->ii_version = II_HEADER_VERSION; - bm_header->ii_shdfba = DSW_SHD_BM_OFFSET; - bm_header->ii_copyfba = DSW_COPY_BM_OFFSET; - bm_header->ii_throttle_delay = ip->bi_throttle_delay; - bm_header->ii_throttle_unit = ip->bi_throttle_unit; - ip->bi_shdfba = bm_header->ii_shdfba; - ip->bi_copyfba = bm_header->ii_copyfba; - } else if ((ip->bi_flags & DSW_SHDOFFLINE) == DSW_SHDOFFLINE) { - /* bitmap didn't go offline, but shadow did */ - if (ip->bi_overflow) { - ii_overflow_free(ip, RECLAIM); - } - } - _ii_lock_chunk(ip, II_NULLCHUNK); - mutex_enter(&ip->bi_mutex); - II_FLAG_ASSIGN(flags, ip); - - mutex_exit(&ip->bi_mutex); - rc = ii_fill_copy_bmp(ip); - if (rc == 0) - rc = II_ZEROBM(ip); - if (rc == 0) { - if ((ip->bi_flags&(DSW_GOLDEN)) == 0) { - /* just clear bitmaps for dependent copy */ - if (ip->bi_flags & DSW_TREEMAP) { - bm_header->ii_state = ip->bi_flags; - mutex_enter(&ip->bi_mutex); - rc = _ii_bm_header_put(bm_header, ip, tmp); - mutex_exit(&ip->bi_mutex); - tmp = NULL; - if (rc == 0) { - rc = ii_tinit(ip); - if (rc == 0) { - mutex_enter(&ip->bi_mutex); - bm_header = - _ii_bm_header_get(ip, &tmp); - mutex_exit(&ip->bi_mutex); - } - } - } - - if (rc == 0) - II_FLAG_CLRX(DSW_COPY_FLAGS, ip); - /* - * if copy flags were set, another process may be - * waiting - */ - if (rc == 0 && (flags & DSW_COPYINGP)) - cv_broadcast(&ip->bi_copydonecv); - - if (rc == 0) - rc = II_COPYBM(ip); - } - } - _ii_unlock_chunk(ip, II_NULLCHUNK); - if (rc) { - if (tmp) - _ii_bm_header_free(bm_header, ip, tmp); - mutex_enter(&ip->bi_mutex); - _ii_ioctl_done(ip); - mutex_exit(&ip->bi_mutex); - _ii_rlse_devs(ip, rtype); - spcs_s_add(kstatus, rc); - return (spcs_s_ocopyoutf(&kstatus, uparms.status, DSW_EHDRBMP)); - } - bm_header->ii_state = ip->bi_flags; - mutex_enter(&ip->bi_mutex); - rc = _ii_bm_header_put(bm_header, ip, tmp); - if (!II_SUCCESS(rc)) { - _ii_ioctl_done(ip); - mutex_exit(&ip->bi_mutex); - _ii_rlse_devs(ip, rtype); - spcs_s_add(kstatus, rc); - return (spcs_s_ocopyoutf(&kstatus, uparms.status, DSW_EHDRBMP)); - } - - /* check with RDC */ - if (ii_update_denied(ip, kstatus, (ip->bi_flags & DSW_COPYINGS) ? - CV_SHD2MST : 0, 1)) { - _ii_ioctl_done(ip); - mutex_exit(&ip->bi_mutex); - _ii_rlse_devs(ip, rtype); - return (spcs_s_ocopyoutf(&kstatus, uparms.status, rc)); - } - - /* don't perform copy for dependent shadows */ - if ((ip->bi_flags&(DSW_GOLDEN)) == 0) { - _ii_ioctl_done(ip); - mutex_exit(&ip->bi_mutex); - _ii_rlse_devs(ip, rtype); - return (spcs_s_ocopyoutf(&kstatus, uparms.status, rc)); - } - - mutex_exit(&ip->bi_mutex); - /* _ii_copyvol calls _ii_ioctl_done() */ - if (ip->bi_flags & DSW_COPYINGS) - rc = _ii_copyvol(ip, CV_SHD2MST, rtype, kstatus, 1); - else if (ip->bi_flags & DSW_COPYINGM) - rc = _ii_copyvol(ip, 0, rtype, kstatus, 1); - else { - mutex_enter(&ip->bi_mutex); - _ii_ioctl_done(ip); - mutex_exit(&ip->bi_mutex); - } - - _ii_rlse_devs(ip, rtype); - - return (spcs_s_ocopyoutf(&kstatus, uparms.status, rc)); -} - - -/* - * _ii_version - * Get version of the InstantImage module. - * - * Calling/Exit State: - * Returns 0 on success, otherwise EFAULT is returned. - * The major and minor revisions are copied out to the user if - * successful. - */ -/*ARGSUSED*/ -int -_ii_version(intptr_t arg, int ilp32, int *rvp) -{ - dsw_version_t uversion; - dsw_version32_t uversion32; - - if (ilp32) { - if (copyin((void *)arg, &uversion32, sizeof (uversion32)) < 0) - return (EFAULT); - - uversion32.major = dsw_major_rev; - uversion32.minor = dsw_minor_rev; - uversion32.micro = dsw_micro_rev; - uversion32.baseline = dsw_baseline_rev; - - if (copyout(&uversion32, (void *)arg, sizeof (uversion32))) - return (EFAULT); - } else { - if (copyin((void *)arg, &uversion, sizeof (uversion)) < 0) - return (EFAULT); - - uversion.major = dsw_major_rev; - uversion.minor = dsw_minor_rev; - uversion.micro = dsw_micro_rev; - uversion.baseline = dsw_baseline_rev; - - if (copyout(&uversion, (void *)arg, sizeof (uversion))) - return (EFAULT); - } - - return (0); -} - -/* - * _ii_copyparm - * Get and set copy parameters. - * - * Calling/Exit State: - * Returns 0 on success, otherwise EFAULT is returned. - * The previous values are returned to the user. - */ -/*ARGSUSED*/ -int -_ii_copyparm(intptr_t arg, int ilp32, int *rvp) -{ - dsw_copyp_t copyp; - dsw_copyp32_t copyp32; - spcs_s_info_t kstatus; - _ii_info_t *ip; - int rc = 0; - int tmp; - - if (ilp32) { - if (copyin((void *)arg, ©p32, sizeof (copyp32)) < 0) - return (EFAULT); - II_TAIL_COPY(copyp, copyp32, shadow_vol, dsw_copyp_t); - copyp.status = (spcs_s_info_t)copyp32.status; - } else if (copyin((void *)arg, ©p, sizeof (copyp)) < 0) - return (EFAULT); - - kstatus = spcs_s_kcreate(); - if (kstatus == NULL) - return (ENOMEM); - - if (!copyp.shadow_vol[0]) - return (spcs_s_ocopyoutf(&kstatus, copyp.status, DSW_EEMPTY)); - - mutex_enter(&_ii_info_mutex); - ip = _ii_find_set(copyp.shadow_vol); - mutex_exit(&_ii_info_mutex); - if (ip == NULL) - return (spcs_s_ocopyoutf(&kstatus, copyp.status, - DSW_ENOTFOUND)); - - tmp = ip->bi_throttle_delay; - if (copyp.copy_delay != -1) { - if (copyp.copy_delay >= MIN_THROTTLE_DELAY && - copyp.copy_delay <= MAX_THROTTLE_DELAY) - ip->bi_throttle_delay = copyp.copy_delay; - else { - cmn_err(CE_WARN, "!ii: delay out of range %d", - copyp.copy_delay); - rc = EINVAL; - } - } - copyp.copy_delay = tmp; - - tmp = ip->bi_throttle_unit; - if (copyp.copy_unit != -1) { - if (copyp.copy_unit >= MIN_THROTTLE_UNIT && - copyp.copy_unit <= MAX_THROTTLE_UNIT) { - if (rc != EINVAL) - ip->bi_throttle_unit = copyp.copy_unit; - } else { - cmn_err(CE_WARN, "!ii: unit out of range %d", - copyp.copy_unit); - if (rc != EINVAL) { - rc = EINVAL; - ip->bi_throttle_delay = copyp.copy_delay; - } - } - } - copyp.copy_unit = tmp; - - _ii_ioctl_done(ip); - mutex_exit(&ip->bi_mutex); - - if (ilp32) { - copyp32.copy_delay = copyp.copy_delay; - copyp32.copy_unit = copyp.copy_unit; - if (copyout(©p32, (void *)arg, sizeof (copyp32)) < 0) - return (EFAULT); - } else if (copyout(©p, (void *)arg, sizeof (copyp))) - return (EFAULT); - - return (spcs_s_ocopyoutf(&kstatus, copyp.status, rc)); -} - - -/* - * _ii_suspend_vol - * suspend an individual InstantImage group - * - * Calling/Exit State: - * Returns 0 on success, nonzero otherwise - */ - -int -_ii_suspend_vol(_ii_info_t *ip) -{ - _ii_info_t **xip; - int copy_flag; - int rc; - nsc_buf_t *tmp = NULL; - ii_header_t *bm_header; - - copy_flag = ip->bi_flags & DSW_COPY_FLAGS; - - _ii_stopvol(ip); - ASSERT(total_ref(ip) == 0); - - if ((rc = _ii_rsrv_devs(ip, BMP, II_INTERNAL)) != 0) - return (rc); - - /* this rw_enter forces us to drain all active IO */ - rw_enter(&ip->bi_linkrw, RW_WRITER); - rw_exit(&ip->bi_linkrw); - - mutex_enter(&_ii_info_mutex); - for (xip = &_ii_info_top; *xip; xip = &(*xip)->bi_next) { - if (ip == *xip) - break; - } - *xip = ip->bi_next; - mutex_exit(&_ii_info_mutex); - - rc = II_SAVE_BMP(ip, 1); - mutex_enter(&ip->bi_mutex); - if (rc == 0) - bm_header = _ii_bm_header_get(ip, &tmp); - if (rc == 0 && bm_header) { - bm_header->ii_magic = DSW_CLEAN; - bm_header->ii_state |= copy_flag; - bm_header->ii_throttle_delay = ip->bi_throttle_delay; - bm_header->ii_throttle_unit = ip->bi_throttle_unit; - /* copy over the mtime */ - bm_header->ii_mtime = ip->bi_mtime; - /* write it to disk */ - rc = _ii_bm_header_put(bm_header, ip, tmp); - } - --iigkstat.num_sets.value.ul; - mutex_exit(&ip->bi_mutex); - - ii_overflow_free(ip, NO_RECLAIM); - _ii_rlse_devs(ip, BMP); - - ii_sibling_free(ip); - - return (rc); -} - -/* - * _ii_suspend_cluster - * Cluster resource group is switching over to another node, so - * all shadowed volumes in that group are suspended. - * - * Returns 0 on success, or ESRCH if the name of the cluster resource - * group couldn't be found. - */ -int -_ii_suspend_cluster(char *shadow_vol) -{ - int found, last; - uint64_t hash; - _ii_info_t *ip; - _ii_lsthead_t **cp, *xcp; - _ii_lstinfo_t **np, *xnp; - - /* find appropriate cluster list */ - mutex_enter(&_ii_cluster_mutex); - hash = nsc_strhash(shadow_vol); - for (cp = &_ii_cluster_top; *cp; cp = &((*cp)->lst_next)) { - if ((hash == (*cp)->lst_hash) && strncmp(shadow_vol, - (*cp)->lst_name, DSW_NAMELEN) == 0) - break; - } - - if (!*cp) { - mutex_exit(&_ii_cluster_mutex); - return (DSW_ECNOTFOUND); - } - - found = 1; - last = 0; - while (found && !last) { - found = 0; - - mutex_enter(&_ii_info_mutex); - for (np = &(*cp)->lst_start; *np; np = &((*np)->lst_next)) { - ip = (*np)->lst_ip; - - if (ip->bi_disabled) - continue; - - found++; - - ip->bi_disabled = 1; - if (NSHADOWS(ip) && (ip->bi_master == ip)) { - ip->bi_flags &= (~DSW_COPYING); - ip->bi_state |= DSW_MULTIMST; - } - mutex_exit(&_ii_info_mutex); - - xnp = *np; - *np = (*np)->lst_next; - kmem_free(xnp, sizeof (_ii_lstinfo_t)); - ip->bi_cluster = NULL; - - (void) _ii_suspend_vol(ip); - break; - } - if (found == 0) - mutex_exit(&_ii_info_mutex); - else if (!(*cp)->lst_start) { - xcp = *cp; - *cp = (*cp)->lst_next; - kmem_free(xcp, sizeof (_ii_lsthead_t)); - last = 1; - } - } - mutex_exit(&_ii_cluster_mutex); - - return (0); -} - -/* - * _ii_shutdown - * System is shutting down, so all shadowed volumes are suspended. - * - * This always succeeds, so always returns 0. - */ - -/* ARGSUSED */ - -int -_ii_shutdown(intptr_t arg, int *rvp) -{ - _ii_info_t **xip, *ip; - int found; - - *rvp = 0; - - _ii_shutting_down = 1; - - /* Go through the list until only disabled entries are found */ - - found = 1; - while (found) { - found = 0; - - mutex_enter(&_ii_info_mutex); - for (xip = &_ii_info_top; *xip; xip = &(*xip)->bi_next) { - ip = *xip; - if (ip->bi_disabled) { - /* Also covers not fully configured yet */ - continue; - } - found++; - - ip->bi_disabled = 1; - mutex_exit(&_ii_info_mutex); - - (void) _ii_suspend_vol(ip); - - break; - } - if (found == 0) - mutex_exit(&_ii_info_mutex); - } - - _ii_shutting_down = 0; - - return (0); -} - -/* - * _ii_suspend - * Suspend an InstantImage, saving its state to allow a subsequent resume. - * - * Calling/Exit State: - * Returns 0 if the pair was suspended. Otherwise an error code - * is returned and any additional error information is copied - * out to the user. - */ - -/* ARGSUSED */ - -int -_ii_suspend(intptr_t arg, int ilp32, int *rvp) -{ - dsw_ioctl_t uparms; - dsw_ioctl32_t uparms32; - _ii_info_t *ip; - int rc; - spcs_s_info_t kstatus; - - *rvp = 0; - - if (ilp32) { - if (copyin((void *)arg, &uparms32, sizeof (uparms32)) < 0) - return (EFAULT); - II_TAIL_COPY(uparms, uparms32, shadow_vol, dsw_ioctl_t); - uparms.status = (spcs_s_info_t)uparms32.status; - } else if (copyin((void *)arg, &uparms, sizeof (uparms)) < 0) - return (EFAULT); - - kstatus = spcs_s_kcreate(); - if (kstatus == NULL) - return (ENOMEM); - - if (!uparms.shadow_vol[0]) - return (spcs_s_ocopyoutf(&kstatus, uparms.status, DSW_EEMPTY)); - - if ((uparms.flags & CV_IS_CLUSTER) != 0) { - rc = _ii_suspend_cluster(uparms.shadow_vol); - } else { - mutex_enter(&_ii_info_mutex); - ip = _ii_find_set(uparms.shadow_vol); - if (ip == NULL) { - mutex_exit(&_ii_info_mutex); - return (spcs_s_ocopyoutf(&kstatus, uparms.status, - DSW_ENOTFOUND)); - } - - ip->bi_disabled = 1; - if (NSHADOWS(ip) && (ip->bi_master == ip)) { - ip->bi_flags &= (~DSW_COPYING); - ip->bi_state |= DSW_MULTIMST; - } - mutex_exit(&_ii_info_mutex); - - _ii_ioctl_done(ip); - mutex_exit(&ip->bi_mutex); - - rc = _ii_suspend_vol(ip); - } - - return (spcs_s_ocopyoutf(&kstatus, uparms.status, rc)); -} - - -/* - * _ii_abort - * Stop any copying process for shadow. - * - * Calling/Exit State: - * Returns 0 if the abort succeeded. Otherwise an error code - * is returned and any additional error information is copied - * out to the user. - */ - -/* ARGSUSED */ - -int -_ii_abort(intptr_t arg, int ilp32, int *rvp) -{ - dsw_ioctl_t uabort; - dsw_ioctl32_t uabort32; - _ii_info_t *ip; - int rc; - spcs_s_info_t kstatus; - - if (ilp32) { - if (copyin((void *)arg, &uabort32, sizeof (uabort32)) < 0) - return (EFAULT); - II_TAIL_COPY(uabort, uabort32, shadow_vol, dsw_ioctl_t); - uabort.status = (spcs_s_info_t)uabort32.status; - } else if (copyin((void *)arg, &uabort, sizeof (uabort)) < 0) - return (EFAULT); - - kstatus = spcs_s_kcreate(); - if (kstatus == NULL) - return (ENOMEM); - - if (!uabort.shadow_vol[0]) - return (spcs_s_ocopyoutf(&kstatus, uabort.status, DSW_EEMPTY)); - - mutex_enter(&_ii_info_mutex); - ip = _ii_find_set(uabort.shadow_vol); - mutex_exit(&_ii_info_mutex); - if (ip == NULL) - return (spcs_s_ocopyoutf(&kstatus, uabort.status, - DSW_ENOTFOUND)); - - mutex_exit(&ip->bi_mutex); - - rc = _ii_stopcopy(ip); - - mutex_enter(&ip->bi_mutex); - _ii_ioctl_done(ip); - mutex_exit(&ip->bi_mutex); - - return (spcs_s_ocopyoutf(&kstatus, uabort.status, rc)); -} - - -/* - * _ii_segment - * Copy out II pair bitmaps (cpy, shd, idx) in segments - * - * Calling/Exit State: - * Returns 0 if the operation succeeded. Otherwise an error code - * is returned and any additional error information is copied - * out to the user. - * - */ -int -_ii_segment(intptr_t arg, int ilp32, int *rvp) -{ - dsw_segment_t usegment; - dsw_segment32_t usegment32; - _ii_info_t *ip; - int rc, size; - spcs_s_info_t kstatus; - int32_t bi_idxfba; - - *rvp = 0; - - if (ilp32) { - if (copyin((void *)arg, &usegment32, sizeof (usegment32))) - return (EFAULT); - usegment.status = (spcs_s_info_t)usegment32.status; - bcopy(usegment32.shadow_vol, usegment.shadow_vol, DSW_NAMELEN); - usegment.seg_number = (unsigned)usegment32.seg_number; - usegment.shd_bitmap = - (unsigned char *)(unsigned long)usegment32.shd_bitmap; - usegment.shd_size = usegment32.shd_size; - usegment.cpy_bitmap = - (unsigned char *)(unsigned long)usegment32.cpy_bitmap; - usegment.cpy_size = usegment32.cpy_size; - usegment.idx_bitmap = - (unsigned char *)(unsigned long)usegment32.idx_bitmap; - usegment.idx_size = usegment32.idx_size; - } else if (copyin((void *)arg, &usegment, sizeof (usegment))) - return (EFAULT); - - kstatus = spcs_s_kcreate(); - if (kstatus == NULL) - return (ENOMEM); - - if (usegment.shadow_vol[0]) { - mutex_enter(&_ii_info_mutex); - ip = _ii_find_set(usegment.shadow_vol); - mutex_exit(&_ii_info_mutex); - if (ip == NULL) - return (spcs_s_ocopyoutf(&kstatus, usegment.status, - DSW_ENOTFOUND)); - } else - return (spcs_s_ocopyoutf(&kstatus, usegment.status, - DSW_EEMPTY)); - - mutex_exit(&ip->bi_mutex); - - size = ((((ip->bi_size + (DSW_SIZE-1)) - / DSW_SIZE) + (DSW_BITS-1))) / DSW_BITS; - bi_idxfba = ip->bi_copyfba + (ip->bi_copyfba - ip->bi_shdfba); - if (((nsc_size_t)usegment.seg_number > DSW_BM_FBA_LEN(ip->bi_size)) || - (usegment.shd_size > size) || - (usegment.cpy_size > size) || - (!(ip->bi_flags & DSW_GOLDEN) && (usegment.idx_size > size*32))) { - _ii_ioctl_done(ip); - return (spcs_s_ocopyoutf(&kstatus, usegment.status, - DSW_EMISMATCH)); - } - - if ((rc = _ii_rsrv_devs(ip, BMP, II_INTERNAL)) != 0) { - mutex_enter(&ip->bi_mutex); - _ii_ioctl_done(ip); - mutex_exit(&ip->bi_mutex); - spcs_s_add(kstatus, rc); - return (spcs_s_ocopyoutf(&kstatus, usegment.status, - DSW_ERSRVFAIL)); - } - - if (usegment.shd_bitmap && usegment.shd_size > 0) - rc = II_CO_BMP(ip, ip->bi_shdfba+usegment.seg_number, - usegment.shd_bitmap, usegment.shd_size); - if (rc == 0 && usegment.cpy_bitmap && usegment.cpy_size > 0) - rc = II_CO_BMP(ip, ip->bi_copyfba+usegment.seg_number, - usegment.cpy_bitmap, usegment.cpy_size); - if (!(ip->bi_flags & DSW_GOLDEN)) { - if (rc == 0 && usegment.idx_bitmap && usegment.idx_size > 0) - rc = II_CO_BMP(ip, bi_idxfba+usegment.seg_number*32, - usegment.idx_bitmap, usegment.idx_size); - } - - _ii_rlse_devs(ip, BMP); - mutex_enter(&ip->bi_mutex); - _ii_ioctl_done(ip); - mutex_exit(&ip->bi_mutex); - if (rc) { - spcs_s_add(kstatus, rc); - return (spcs_s_ocopyoutf(&kstatus, usegment.status, DSW_EIO)); - } - - spcs_s_kfree(kstatus); - return (0); -} - - -/* - * _ii_bitmap - * Copy out II pair bitmaps to user program - * - * Calling/Exit State: - * Returns 0 if the operation succeeded. Otherwise an error code - * is returned and any additional error information is copied - * out to the user. - */ - -int -_ii_bitmap(intptr_t arg, int ilp32, int *rvp) -{ - dsw_bitmap_t ubitmap; - dsw_bitmap32_t ubitmap32; - _ii_info_t *ip; - int rc; - spcs_s_info_t kstatus; - - *rvp = 0; - - if (ilp32) { - if (copyin((void *)arg, &ubitmap32, sizeof (ubitmap32))) - return (EFAULT); - ubitmap.status = (spcs_s_info_t)ubitmap32.status; - bcopy(ubitmap32.shadow_vol, ubitmap.shadow_vol, DSW_NAMELEN); - ubitmap.shd_bitmap = - (unsigned char *)(unsigned long)ubitmap32.shd_bitmap; - ubitmap.shd_size = ubitmap32.shd_size; - ubitmap.copy_bitmap = - (unsigned char *)(unsigned long)ubitmap32.copy_bitmap; - ubitmap.copy_size = ubitmap32.copy_size; - } else if (copyin((void *)arg, &ubitmap, sizeof (ubitmap))) - return (EFAULT); - - kstatus = spcs_s_kcreate(); - if (kstatus == NULL) - return (ENOMEM); - - if (!ubitmap.shadow_vol[0]) - return (spcs_s_ocopyoutf(&kstatus, ubitmap.status, DSW_EEMPTY)); - - mutex_enter(&_ii_info_mutex); - ip = _ii_find_set(ubitmap.shadow_vol); - mutex_exit(&_ii_info_mutex); - if (ip == NULL) - return (spcs_s_ocopyoutf(&kstatus, ubitmap.status, - DSW_ENOTFOUND)); - - mutex_exit(&ip->bi_mutex); - - if ((rc = _ii_rsrv_devs(ip, BMP, II_INTERNAL)) != 0) { - mutex_enter(&ip->bi_mutex); - _ii_ioctl_done(ip); - mutex_exit(&ip->bi_mutex); - spcs_s_add(kstatus, rc); - return (spcs_s_ocopyoutf(&kstatus, ubitmap.status, - DSW_ERSRVFAIL)); - } - - if (ubitmap.shd_bitmap && ubitmap.shd_size > 0) - rc = II_CO_BMP(ip, ip->bi_shdfba, ubitmap.shd_bitmap, - ubitmap.shd_size); - if (rc == 0 && ubitmap.copy_bitmap && ubitmap.copy_size > 0) - rc = II_CO_BMP(ip, ip->bi_copyfba, ubitmap.copy_bitmap, - ubitmap.copy_size); - _ii_rlse_devs(ip, BMP); - mutex_enter(&ip->bi_mutex); - _ii_ioctl_done(ip); - mutex_exit(&ip->bi_mutex); - if (rc) { - spcs_s_add(kstatus, rc); - return (spcs_s_ocopyoutf(&kstatus, ubitmap.status, DSW_EIO)); - } - - spcs_s_kfree(kstatus); - - return (0); -} - -/* - * _ii_export - * Exports the shadow volume - * - * Calling/Exit State: - * Returns 0 if the shadow was exported. Otherwise an error code - * is returned and any additional error information is copied - * out to the user. - * - * Description: - */ - -int -_ii_export(intptr_t arg, int ilp32, int *rvp) -{ - dsw_ioctl_t uparms; - dsw_ioctl32_t uparms32; - _ii_info_t *ip; - nsc_fd_t *fd; - int rc = 0; - spcs_s_info_t kstatus; - - *rvp = 0; - - if (ilp32) { - if (copyin((void *)arg, &uparms32, sizeof (uparms32)) < 0) - return (EFAULT); - II_TAIL_COPY(uparms, uparms32, shadow_vol, dsw_ioctl_t); - uparms.status = (spcs_s_info_t)uparms32.status; - } else if (copyin((void *)arg, &uparms, sizeof (uparms)) < 0) - return (EFAULT); - - kstatus = spcs_s_kcreate(); - if (kstatus == NULL) - return (ENOMEM); - - if (!uparms.shadow_vol[0]) - return (spcs_s_ocopyoutf(&kstatus, uparms.status, DSW_EEMPTY)); - - mutex_enter(&_ii_info_mutex); - ip = _ii_find_set(uparms.shadow_vol); - mutex_exit(&_ii_info_mutex); - if (ip == NULL) - return (spcs_s_ocopyoutf(&kstatus, uparms.status, - DSW_ENOTFOUND)); - - if ((ip->bi_flags & DSW_GOLDEN) == 0 || - ((ip->bi_flags & (DSW_COPYING|DSW_SHDEXPORT|DSW_SHDIMPORT)) != 0)) { - /* - * Cannot export a dependent copy or while still copying or - * the shadow is already in an exported state - */ - rc = ip->bi_flags & (DSW_SHDEXPORT|DSW_SHDIMPORT) - ? DSW_EALREADY : DSW_EDEPENDENCY; - _ii_ioctl_done(ip); - mutex_exit(&ip->bi_mutex); - return (spcs_s_ocopyoutf(&kstatus, uparms.status, rc)); - } - if ((rc = _ii_rsrv_devs(ip, BMP, II_INTERNAL)) != 0) { - _ii_ioctl_done(ip); - mutex_exit(&ip->bi_mutex); - spcs_s_add(kstatus, rc); - return (spcs_s_ocopyoutf(&kstatus, uparms.status, - DSW_ERSRVFAIL)); - } - II_FLAG_SET(DSW_SHDEXPORT, ip); - - mutex_exit(&ip->bi_mutex); - - /* this rw_enter forces us to drain all active IO */ - rw_enter(&ip->bi_linkrw, RW_WRITER); - rw_exit(&ip->bi_linkrw); - - mutex_enter(&ip->bi_mutex); - - _ii_rlse_devs(ip, BMP); - - /* Shut shadow volume. */ - if (ip->bi_shdfd) { - if (ip->bi_shdrsrv) { - nsc_release(ip->bi_shdfd); - ip->bi_shdrsrv = NULL; - } - fd = ip->bi_shdfd; - ip->bi_shdfd = NULL; - mutex_exit(&ip->bi_mutex); - (void) nsc_close(fd); - mutex_enter(&ip->bi_mutex); - } - - if (ip->bi_shdrfd) { - if (ip->bi_shdrrsrv) { - nsc_release(ip->bi_shdrfd); - ip->bi_shdrrsrv = NULL; - } - fd = ip->bi_shdrfd; - ip->bi_shdrfd = NULL; - mutex_exit(&ip->bi_mutex); - (void) nsc_close(fd); - mutex_enter(&ip->bi_mutex); - } - _ii_ioctl_done(ip); - mutex_exit(&ip->bi_mutex); - - (void) _ii_reserve_begin(ip); - if (ip->bi_shd_tok) { - (void) _ii_unregister_path(ip->bi_shd_tok, 0, "shadow"); - ip->bi_shd_tok = NULL; - } - - if (ip->bi_shdr_tok) { - (void) _ii_unregister_path(ip->bi_shdr_tok, 0, - "raw shadow"); - ip->bi_shdr_tok = NULL; - } - _ii_reserve_end(ip); - - spcs_s_kfree(kstatus); - - return (0); -} - -/* - * _ii_join - * Rejoins the shadow volume - * - * Calling/Exit State: - * Returns 0 if the shadow was exported. Otherwise an error code - * is returned and any additional error information is copied - * out to the user. - * - * Description: - */ - -int -_ii_join(intptr_t arg, int ilp32, int *rvp) -{ - dsw_bitmap_t ubitmap; - dsw_bitmap32_t ubitmap32; - _ii_info_t *ip; - uint64_t bm_size; - int rc = 0; - int rtype = 0; - spcs_s_info_t kstatus; - - *rvp = 0; - - if (ilp32) { - if (copyin((void *)arg, &ubitmap32, sizeof (ubitmap32)) < 0) - return (EFAULT); - II_TAIL_COPY(ubitmap, ubitmap32, shadow_vol, dsw_bitmap_t); - ubitmap.status = (spcs_s_info_t)ubitmap32.status; - ubitmap.shd_bitmap = - (unsigned char *)(unsigned long)ubitmap32.shd_bitmap; - ubitmap.shd_size = ubitmap32.shd_size; - } else if (copyin((void *)arg, &ubitmap, sizeof (ubitmap)) < 0) - return (EFAULT); - - kstatus = spcs_s_kcreate(); - if (kstatus == NULL) - return (ENOMEM); - - if (!ubitmap.shadow_vol[0]) - return (spcs_s_ocopyoutf(&kstatus, ubitmap.status, DSW_EEMPTY)); - - mutex_enter(&_ii_info_mutex); - ip = _ii_find_set(ubitmap.shadow_vol); - mutex_exit(&_ii_info_mutex); - if (ip == NULL) - return (spcs_s_ocopyoutf(&kstatus, ubitmap.status, - DSW_ENOTFOUND)); - - /* - * Check that group has shadow exported. - */ - if ((ip->bi_flags & DSW_SHDEXPORT) == 0) { - /* - * Cannot join if the shadow isn't exported. - */ - _ii_ioctl_done(ip); - mutex_exit(&ip->bi_mutex); - return (spcs_s_ocopyoutf(&kstatus, ubitmap.status, - DSW_ENOTEXPORTED)); - } - /* check bitmap is at least large enough for master volume size */ - bm_size = FBA_SIZE(DSW_BM_FBA_LEN(ip->bi_size)); - if (ubitmap.shd_size < bm_size) { - /* bitmap is to small */ - _ii_ioctl_done(ip); - mutex_exit(&ip->bi_mutex); - return (spcs_s_ocopyoutf(&kstatus, ubitmap.status, - DSW_EINVALBMP)); - } - /* read in bitmap and or with differences bitmap */ - rtype = BMP; - if ((rc = _ii_rsrv_devs(ip, rtype, II_INTERNAL)) != 0) { - _ii_ioctl_done(ip); - mutex_exit(&ip->bi_mutex); - spcs_s_add(kstatus, rc); - return (spcs_s_ocopyoutf(&kstatus, ubitmap.status, - DSW_ERSRVFAIL)); - } - rc = II_CI_BMP(ip, ip->bi_shdfba, ubitmap.shd_bitmap, - ubitmap.shd_size); - /* open up shadow */ - if ((rc = ii_open_shadow(ip, ip->bi_keyname)) != 0) { - _ii_ioctl_done(ip); - mutex_exit(&ip->bi_mutex); - spcs_s_add(kstatus, rc); - _ii_rlse_devs(ip, rtype); - return (spcs_s_ocopyoutf(&kstatus, ubitmap.status, DSW_EOPEN)); - } - ii_register_shd(ip); - if (!rc) - II_FLAG_CLR(DSW_SHDEXPORT, ip); - _ii_ioctl_done(ip); - mutex_exit(&ip->bi_mutex); - _ii_rlse_devs(ip, rtype); - - if (rc) { - spcs_s_add(kstatus, rc); - return (spcs_s_ocopyoutf(&kstatus, ubitmap.status, DSW_EIO)); - } - - spcs_s_kfree(kstatus); - - return (0); -} - - -/* - * _ii_ocreate - * Configures a volume suitable for use as an overflow volume. - * - * Calling/Exit State: - * Returns 0 if the volume was configured successfully. Otherwise - * an error code is returned and any additional error information - * is copied out to the user. - * - * Description: - */ - -int -_ii_ocreate(intptr_t arg, int ilp32, int *rvp) -{ - dsw_ioctl_t uioctl; - dsw_ioctl32_t uioctl32; - _ii_overflow_t ov; - _ii_overflow_t *op = &ov; - int rc = 0; - nsc_fd_t *fd; - nsc_iodev_t *iodev; - nsc_size_t vol_size; - char *overflow_vol; - spcs_s_info_t kstatus; - - *rvp = 0; - - if (ilp32) { - if (copyin((void *)arg, &uioctl32, sizeof (uioctl32)) < 0) - return (EFAULT); - II_TAIL_COPY(uioctl, uioctl32, shadow_vol, dsw_ioctl_t); - uioctl.status = (spcs_s_info_t)uioctl32.status; - } else if (copyin((void *)arg, &uioctl, sizeof (uioctl)) < 0) - return (EFAULT); - - overflow_vol = uioctl.shadow_vol; - kstatus = spcs_s_kcreate(); - if (kstatus == NULL) - return (ENOMEM); - - if (!overflow_vol[0]) - return (spcs_s_ocopyoutf(&kstatus, uioctl.status, DSW_EEMPTY)); - - if (ii_volume(overflow_vol, 0) != NONE) - return (spcs_s_ocopyoutf(&kstatus, uioctl.status, DSW_EINUSE)); - - fd = nsc_open(overflow_vol, - NSC_IIR_ID|NSC_FILE|NSC_RDWR, NULL, (blind_t)&(iodev), &rc); - if (!fd) - fd = nsc_open(uioctl.shadow_vol, - NSC_IIR_ID|NSC_DEVICE|NSC_RDWR, NULL, - (blind_t)&(iodev), &rc); - if (fd == NULL) { - spcs_s_add(kstatus, rc); - return (spcs_s_ocopyoutf(&kstatus, uioctl.status, DSW_EIO)); - } - if ((rc = nsc_reserve(fd, 0)) != 0) { - spcs_s_add(kstatus, rc); - (void) nsc_close(fd); - return (spcs_s_ocopyoutf(&kstatus, uioctl.status, - DSW_ERSRVFAIL)); - } - /* setup magic number etc; */ - rc = nsc_partsize(fd, &vol_size); - if (rc) { - spcs_s_add(kstatus, rc); - (void) nsc_close(fd); - return (spcs_s_ocopyoutf(&kstatus, uioctl.status, DSW_EIO)); - } - op->ii_hmagic = II_OMAGIC; - /* take 1 off as chunk 0 contains header */ - op->ii_nchunks = (vol_size / DSW_SIZE) -1; - op->ii_drefcnt = 0; - op->ii_used = 1; /* we have used the header */ - op->ii_unused = op->ii_nchunks - op->ii_used; - op->ii_freehead = II_NULLNODE; - op->ii_hversion = OV_HEADER_VERSION; - op->ii_flags = 0; - op->ii_urefcnt = 0; - (void) strncpy(op->ii_volname, uioctl.shadow_vol, DSW_NAMELEN); - rc = _ii_nsc_io(0, KS_NA, fd, NSC_WRBUF, II_OHEADER_FBA, - (unsigned char *)&op->ii_do, sizeof (op->ii_do)); - (void) nsc_release(fd); - (void) nsc_close(fd); - if (rc) { - spcs_s_add(kstatus, rc); - return (spcs_s_ocopyoutf(&kstatus, uioctl.status, DSW_EIO)); - } - - spcs_s_kfree(kstatus); - - return (0); -} - - -/* - * _ii_oattach - * Attaches the volume in the "bitmap_vol" field as an overflow volume. - * - * Calling/Exit State: - * Returns 0 if the volume was attached. Fails if the shadow group - * is of the wrong type (eg independent) or already has an overflow - * volume attached. - * - * Description: - */ - -int -_ii_oattach(intptr_t arg, int ilp32, int *rvp) -{ - dsw_config_t uconfig; - dsw_config32_t uconfig32; - _ii_info_t *ip; - int rc = 0; - int rtype = 0; - ii_header_t *bm_header; - nsc_buf_t *tmp = NULL; - spcs_s_info_t kstatus; - - *rvp = 0; - - if (ilp32) { - if (copyin((void *)arg, &uconfig32, sizeof (uconfig32)) < 0) - return (EFAULT); - II_TAIL_COPY(uconfig, uconfig32, shadow_vol, dsw_config_t); - uconfig.status = (spcs_s_info_t)uconfig32.status; - } else if (copyin((void *)arg, &uconfig, sizeof (uconfig)) < 0) - return (EFAULT); - - kstatus = spcs_s_kcreate(); - if (kstatus == NULL) - return (ENOMEM); - - if (!uconfig.shadow_vol[0]) - return (spcs_s_ocopyoutf(&kstatus, uconfig.status, DSW_EEMPTY)); - - switch (ii_volume(uconfig.bitmap_vol, 0)) { - case NONE: - case OVR: - break; - default: - return (spcs_s_ocopyoutf(&kstatus, uconfig.status, DSW_EINUSE)); - } - mutex_enter(&_ii_info_mutex); - ip = _ii_find_set(uconfig.shadow_vol); - mutex_exit(&_ii_info_mutex); - if (ip == NULL) - return (spcs_s_ocopyoutf(&kstatus, uconfig.status, - DSW_ENOTFOUND)); - - /* check shadow doesn't already have an overflow volume */ - if (ip->bi_overflow) { - _ii_ioctl_done(ip); - mutex_exit(&ip->bi_mutex); - return (spcs_s_ocopyoutf(&kstatus, uconfig.status, - DSW_EALREADY)); - } - /* check shadow is mapped so can have an overflow */ - if ((ip->bi_flags&DSW_TREEMAP) == 0) { - _ii_ioctl_done(ip); - mutex_exit(&ip->bi_mutex); - return (spcs_s_ocopyoutf(&kstatus, uconfig.status, - DSW_EWRONGTYPE)); - } - rtype = BMP; - if ((rc = _ii_rsrv_devs(ip, rtype, II_INTERNAL)) != 0) { - _ii_ioctl_done(ip); - mutex_exit(&ip->bi_mutex); - spcs_s_add(kstatus, rc); - return (spcs_s_ocopyoutf(&kstatus, uconfig.status, - DSW_ERSRVFAIL)); - } - /* attach volume */ - if ((rc = ii_overflow_attach(ip, uconfig.bitmap_vol, 1)) != 0) { - _ii_ioctl_done(ip); - mutex_exit(&ip->bi_mutex); - _ii_rlse_devs(ip, rtype); - return (spcs_s_ocopyoutf(&kstatus, uconfig.status, rc)); - } - - /* re-write header so shadow can be restarted with overflow volume */ - - bm_header = _ii_bm_header_get(ip, &tmp); - if (bm_header == NULL) { - /* detach volume */ - ii_overflow_free(ip, RECLAIM); - _ii_ioctl_done(ip); - mutex_exit(&ip->bi_mutex); - _ii_rlse_devs(ip, rtype); - return (spcs_s_ocopyoutf(&kstatus, uconfig.status, - DSW_EHDRBMP)); - } - (void) strncpy(bm_header->overflow_vol, uconfig.bitmap_vol, - DSW_NAMELEN); - (void) _ii_bm_header_put(bm_header, ip, tmp); - _ii_rlse_devs(ip, rtype); - _ii_ioctl_done(ip); - mutex_exit(&ip->bi_mutex); - - spcs_s_kfree(kstatus); - - return (0); -} - - -/* - * _ii_odetach - * Breaks the link with the overflow volume. - * - * Calling/Exit State: - * Returns 0 if the overflow volume was detached. Otherwise an error code - * is returned and any additional error information is copied - * out to the user. - * - * Description: - */ - -int -_ii_odetach(intptr_t arg, int ilp32, int *rvp) -{ - dsw_bitmap_t ubitmap; - dsw_bitmap32_t ubitmap32; - _ii_info_t *ip; - int rc = 0; - int rtype = 0; - ii_header_t *bm_header; - nsc_buf_t *tmp = NULL; - spcs_s_info_t kstatus; - - *rvp = 0; - - if (ilp32) { - if (copyin((void *)arg, &ubitmap32, sizeof (ubitmap32)) < 0) - return (EFAULT); - II_TAIL_COPY(ubitmap, ubitmap32, shadow_vol, dsw_bitmap_t); - ubitmap.status = (spcs_s_info_t)ubitmap32.status; - } else if (copyin((void *)arg, &ubitmap, sizeof (ubitmap)) < 0) - return (EFAULT); - - kstatus = spcs_s_kcreate(); - if (kstatus == NULL) - return (ENOMEM); - - if (!ubitmap.shadow_vol[0]) - return (spcs_s_ocopyoutf(&kstatus, ubitmap.status, DSW_EEMPTY)); - - mutex_enter(&_ii_info_mutex); - ip = _ii_find_set(ubitmap.shadow_vol); - mutex_exit(&_ii_info_mutex); - if (ip == NULL) - return (spcs_s_ocopyoutf(&kstatus, ubitmap.status, - DSW_ENOTFOUND)); - - if ((ip->bi_flags&DSW_VOVERFLOW) != 0) { - _ii_ioctl_done(ip); - mutex_exit(&ip->bi_mutex); - return (spcs_s_ocopyoutf(&kstatus, ubitmap.status, - DSW_EODEPENDENCY)); - } - rtype = BMP; - if ((rc = _ii_rsrv_devs(ip, rtype, II_INTERNAL)) != 0) { - _ii_ioctl_done(ip); - mutex_exit(&ip->bi_mutex); - spcs_s_add(kstatus, rc); - return (spcs_s_ocopyoutf(&kstatus, ubitmap.status, - DSW_ERSRVFAIL)); - } - ii_overflow_free(ip, RECLAIM); - /* re-write header to break link with overflow volume */ - - bm_header = _ii_bm_header_get(ip, &tmp); - if (bm_header == NULL) { - _ii_rlse_devs(ip, rtype); - _ii_ioctl_done(ip); - mutex_exit(&ip->bi_mutex); - return (spcs_s_ocopyoutf(&kstatus, ubitmap.status, - DSW_EHDRBMP)); - } - bzero(bm_header->overflow_vol, DSW_NAMELEN); - (void) _ii_bm_header_put(bm_header, ip, tmp); - - _ii_rlse_devs(ip, rtype); - _ii_ioctl_done(ip); - - mutex_exit(&ip->bi_mutex); - if (rc) { - spcs_s_add(kstatus, rc); - return (spcs_s_ocopyoutf(&kstatus, ubitmap.status, DSW_EIO)); - } - - spcs_s_kfree(kstatus); - - --iigkstat.assoc_over.value.ul; - - return (0); -} - - -/* - * _ii_gc_list - * Returns a list of all lists, or all entries in a list - * - */ -int -_ii_gc_list(intptr_t arg, int ilp32, int *rvp, kmutex_t *mutex, - _ii_lsthead_t *lst) -{ - dsw_aioctl_t ulist; - dsw_aioctl32_t ulist32; - size_t name_offset; - int i; - spcs_s_info_t kstatus; - char *carg = (char *)arg; - uint64_t hash; - _ii_lsthead_t *cp; - _ii_lstinfo_t *np; - - *rvp = 0; - name_offset = offsetof(dsw_aioctl_t, shadow_vol[0]); - if (ilp32) { - if (copyin((void *) arg, &ulist32, sizeof (ulist32)) < 0) - return (EFAULT); - II_TAIL_COPY(ulist, ulist32, flags, dsw_aioctl_t); - ulist.status = (spcs_s_info_t)ulist32.status; - name_offset = offsetof(dsw_aioctl32_t, shadow_vol[0]); - } else if (copyin((void *) arg, &ulist, sizeof (ulist)) < 0) - return (EFAULT); - - kstatus = spcs_s_kcreate(); - if (kstatus == NULL) - return (ENOMEM); - - mutex_enter(mutex); - if (ulist.shadow_vol[ 0 ] != 0) { - /* search for specific list */ - hash = nsc_strhash(ulist.shadow_vol); - for (cp = lst; cp; cp = cp->lst_next) { - if ((hash == cp->lst_hash) && strncmp(ulist.shadow_vol, - cp->lst_name, DSW_NAMELEN) == 0) { - break; - } - } - if (cp) { - for (i = 0, np = cp->lst_start; i < ulist.count && np; - np = np->lst_next, carg += DSW_NAMELEN, i++) { - if (copyout(np->lst_ip->bi_keyname, - carg + name_offset, DSW_NAMELEN)) { - mutex_exit(mutex); - return (spcs_s_ocopyoutf(&kstatus, - ulist.status, EFAULT)); - } - } - } else { - i = 0; - } - } else { - /* return full list */ - for (i = 0, cp = lst; i < ulist.count && cp; - carg += DSW_NAMELEN, i++, cp = cp->lst_next) { - if (copyout(cp->lst_name, carg + name_offset, - DSW_NAMELEN)) { - mutex_exit(mutex); - return (spcs_s_ocopyoutf(&kstatus, ulist.status, - EFAULT)); - } - } - } - mutex_exit(mutex); - ulist32.count = ulist.count = i; - - if (ilp32) { - if (copyout(&ulist32, (void *) arg, name_offset)) - return (EFAULT); - } else { - if (copyout(&ulist, (void*) arg, name_offset)) - return (EFAULT); - } - - return (spcs_s_ocopyoutf(&kstatus, ulist.status, 0)); -} - -/* - * _ii_olist - * Breaks the link with the overflow volume. - * - * Calling/Exit State: - * Returns 0 if the overflow volume was detached. Otherwise an error code - * is returned and any additional error information is copied - * out to the user. - * - * Description: - */ - -int -_ii_olist(intptr_t arg, int ilp32, int *rvp) -{ - dsw_aioctl_t ulist; - dsw_aioctl32_t ulist32; - _ii_overflow_t *op; - size_t name_offset; - int rc = 0; - int i; - char *carg = (char *)arg; - spcs_s_info_t kstatus; - - *rvp = 0; - - name_offset = offsetof(dsw_aioctl_t, shadow_vol[0]); - if (ilp32) { - if (copyin((void *)arg, &ulist32, sizeof (ulist32)) < 0) - return (EFAULT); - II_TAIL_COPY(ulist, ulist32, flags, dsw_aioctl_t); - ulist.status = (spcs_s_info_t)ulist32.status; - name_offset = offsetof(dsw_aioctl32_t, shadow_vol[0]); - } else if (copyin((void *)arg, &ulist, sizeof (ulist)) < 0) - return (EFAULT); - - kstatus = spcs_s_kcreate(); - if (kstatus == NULL) - return (ENOMEM); - - i = 0; - - mutex_enter(&_ii_overflow_mutex); - for (op = _ii_overflow_top; i < ulist.count && op; - carg += DSW_NAMELEN) { - if (copyout(op->ii_volname, carg+name_offset, DSW_NAMELEN)) { - mutex_exit(&_ii_overflow_mutex); - return (spcs_s_ocopyoutf(&kstatus, ulist.status, - EFAULT)); - } - i++; - op = op->ii_next; - } - mutex_exit(&_ii_overflow_mutex); - ulist32.count = ulist.count = i; - /* return count of items listed to user */ - if (ilp32) { - if (copyout(&ulist32, (void *)arg, name_offset)) - return (EFAULT); - } else { - if (copyout(&ulist, (void *)arg, name_offset)) - return (EFAULT); - } - - return (spcs_s_ocopyoutf(&kstatus, ulist.status, rc)); -} - -/* - * _ii_ostat - * Breaks the link with the overflow volume. - * - * Calling/Exit State: - * Returns 0 if the overflow volume was detached. Otherwise an error code - * is returned and any additional error information is copied - * out to the user. - * - * Description: - */ - -int -_ii_ostat(intptr_t arg, int ilp32, int *rvp, int is_iost_2) -{ - dsw_ostat_t ustat; - dsw_ostat32_t ustat32; - _ii_overflow_t *op; - spcs_s_info_t kstatus; - - *rvp = 0; - - if (ilp32) { - if (copyin((void *)arg, &ustat32, sizeof (ustat32)) < 0) - return (EFAULT); - II_TAIL_COPY(ustat, ustat32, overflow_vol, dsw_ostat_t); - ustat.status = (spcs_s_info_t)ustat32.status; - } else if (copyin((void *)arg, &ustat, sizeof (ustat)) < 0) - return (EFAULT); - - kstatus = spcs_s_kcreate(); - if (kstatus == NULL) - return (ENOMEM); - if (!ustat.overflow_vol[0]) - return (spcs_s_ocopyoutf(&kstatus, ustat.status, DSW_EEMPTY)); - - op = _ii_find_overflow(ustat.overflow_vol); - if (op == NULL) - return (spcs_s_ocopyoutf(&kstatus, ustat.status, - DSW_ENOTFOUND)); - - ustat.nchunks = op->ii_nchunks; - ustat.used = op->ii_used; - ustat.unused = op->ii_unused; - ustat.drefcnt = op->ii_drefcnt; - ustat.crefcnt = op->ii_crefcnt; - if (is_iost_2) { - ustat.hversion = op->ii_hversion; - ustat.flags = op->ii_flags; - ustat.hmagic = op->ii_hmagic; - } - - spcs_s_kfree(kstatus); - if (ilp32) { - ustat32.nchunks = ustat.nchunks; - ustat32.used = ustat.used; - ustat32.unused = ustat.unused; - ustat32.drefcnt = ustat.drefcnt; - ustat32.crefcnt = ustat.crefcnt; - if (is_iost_2) { - ustat32.hversion = ustat.hversion; - ustat32.flags = ustat.flags; - ustat32.hmagic = ustat.hmagic; - } - if (copyout(&ustat32, (void *)arg, sizeof (ustat32))) - return (EFAULT); - } else { - if (copyout(&ustat, (void *)arg, sizeof (ustat))) - return (EFAULT); - } - return (0); -} - -/* - * _ii_move_grp() - * Move a set from one group to another, possibly creating the new - * group. - */ - -int -_ii_move_grp(intptr_t arg, int ilp32, int *rvp) -{ - dsw_movegrp_t umove; - dsw_movegrp32_t umove32; - spcs_s_info_t kstatus; - _ii_info_t *ip; - int rc = 0; - nsc_buf_t *tmp; - ii_header_t *bm_header; - - *rvp = 0; - - if (ilp32) { - if (copyin((void *)arg, &umove32, sizeof (umove32)) < 0) - return (EFAULT); - II_TAIL_COPY(umove, umove32, shadow_vol, dsw_movegrp_t); - umove.status = (spcs_s_info_t)umove32.status; - } else if (copyin((void *)arg, &umove, sizeof (umove)) < 0) - return (EFAULT); - - kstatus = spcs_s_kcreate(); - if (kstatus == NULL) - return (ENOMEM); - - if (!umove.shadow_vol[0]) - return (spcs_s_ocopyoutf(&kstatus, umove.status, DSW_EEMPTY)); - - mutex_enter(&_ii_info_mutex); - ip = _ii_find_set(umove.shadow_vol); - mutex_exit(&_ii_info_mutex); - - if (!ip) - return (spcs_s_ocopyoutf(&kstatus, umove.status, - DSW_ENOTFOUND)); - - if (!umove.new_group[0]) { - /* are we clearing the group association? */ - if (ip->bi_group) { - DTRACE_PROBE2(_ii_move_grp1, char *, ip->bi_keyname, - char *, ip->bi_group); - rc = II_UNLINK_GROUP(ip); - } - } else if (!ip->bi_group) { - rc = II_LINK_GROUP(ip, umove.new_group); - DTRACE_PROBE2(_ii_move_grp2, char *, ip->bi_keyname, - char *, ip->bi_group); - } else { - /* remove it from one group and add it to the other */ - DTRACE_PROBE3(_ii_move_grp, char *, ip->bi_keyname, - char *, ip->bi_group, char *, umove.new_group); - rc = II_UNLINK_GROUP(ip); - if (!rc) - rc = II_LINK_GROUP(ip, umove.new_group); - } - - /* ** BEGIN UPDATE BITMAP HEADER ** */ - if ((rc = _ii_rsrv_devs(ip, BMP, II_INTERNAL)) != 0) { - _ii_ioctl_done(ip); - mutex_exit(&ip->bi_mutex); - spcs_s_add(kstatus, rc); - return (spcs_s_ocopyoutf(&kstatus, umove.status, - DSW_ERSRVFAIL)); - } - bm_header = _ii_bm_header_get(ip, &tmp); - if (bm_header) { - (void) strncpy(bm_header->group_name, umove.new_group, - DSW_NAMELEN); - (void) _ii_bm_header_put(bm_header, ip, tmp); - } - _ii_rlse_devs(ip, BMP); - /* ** END UPDATE BITMAP HEADER ** */ - - _ii_ioctl_done(ip); - mutex_exit(&ip->bi_mutex); - - return (spcs_s_ocopyoutf(&kstatus, umove.status, rc)); -} - -/* - * _ii_change_tag() - * Move a set from one group to another, possibly creating the new - * group. - */ - -int -_ii_change_tag(intptr_t arg, int ilp32, int *rvp) -{ - dsw_movegrp_t umove; - dsw_movegrp32_t umove32; - spcs_s_info_t kstatus; - _ii_info_t *ip; - int rc = 0; - nsc_buf_t *tmp; - ii_header_t *bm_header; - - *rvp = 0; - - if (ilp32) { - if (copyin((void *)arg, &umove32, sizeof (umove32)) < 0) - return (EFAULT); - II_TAIL_COPY(umove, umove32, shadow_vol, dsw_movegrp_t); - umove.status = (spcs_s_info_t)umove32.status; - } else if (copyin((void *)arg, &umove, sizeof (umove)) < 0) - return (EFAULT); - - kstatus = spcs_s_kcreate(); - if (kstatus == NULL) - return (ENOMEM); - - if (!umove.shadow_vol[0]) - return (spcs_s_ocopyoutf(&kstatus, umove.status, DSW_EEMPTY)); - - mutex_enter(&_ii_info_mutex); - ip = _ii_find_set(umove.shadow_vol); - mutex_exit(&_ii_info_mutex); - - if (!ip) - return (spcs_s_ocopyoutf(&kstatus, umove.status, - DSW_ENOTFOUND)); - - if (!umove.new_group[0]) { - /* are we clearing the group association? */ - if (ip->bi_cluster) { - DTRACE_PROBE2(_ii_change_tag, char *, ip->bi_keyname, - char *, ip->bi_cluster); - rc = II_UNLINK_CLUSTER(ip); - } - } else if (!ip->bi_cluster) { - /* are we adding it to a group for the first time? */ - rc = II_LINK_CLUSTER(ip, umove.new_group); - DTRACE_PROBE2(_ii_change_tag, char *, ip->bi_keyname, - char *, ip->bi_cluster); - } else { - /* remove it from one group and add it to the other */ - DTRACE_PROBE3(_ii_change_tag_2, char *, ip->bi_keyname, - char *, ip->bi_cluster, char *, umove.new_group); - rc = II_UNLINK_CLUSTER(ip); - if (!rc) - rc = II_LINK_CLUSTER(ip, umove.new_group); - } - - /* ** BEGIN UPDATE BITMAP HEADER ** */ - if ((rc = _ii_rsrv_devs(ip, BMP, II_INTERNAL)) != 0) { - _ii_ioctl_done(ip); - mutex_exit(&ip->bi_mutex); - spcs_s_add(kstatus, rc); - return (spcs_s_ocopyoutf(&kstatus, umove.status, - DSW_ERSRVFAIL)); - } - bm_header = _ii_bm_header_get(ip, &tmp); - if (bm_header) { - (void) strncpy(bm_header->clstr_name, umove.new_group, - DSW_NAMELEN); - (void) _ii_bm_header_put(bm_header, ip, tmp); - } - _ii_rlse_devs(ip, BMP); - /* ** END UPDATE BITMAP HEADER ** */ - - _ii_ioctl_done(ip); - mutex_exit(&ip->bi_mutex); - - return (spcs_s_ocopyoutf(&kstatus, umove.status, rc)); -} - - -/* - * _ii_spcs_s_ocopyoutf() - * Wrapper for spcs_s_ocopyoutf() used by _ii_chk_copy() which permits - * the spcs_s_info_t argument to be NULL. _ii_chk_copy() requires this - * functionality as it is sometimes called by _ii_control_copy() which - * has no user context to copy any errors into. At all other times a NULL - * spcs_s_info_t argument would indicate a bug in the calling function. - */ - -static int -_ii_spcs_s_ocopyoutf(spcs_s_info_t *kstatusp, spcs_s_info_t ustatus, int err) -{ - if (ustatus) - return (spcs_s_ocopyoutf(kstatusp, ustatus, err)); - spcs_s_kfree(*kstatusp); - return (err); -} - -static int -_ii_chk_copy(_ii_info_t *ip, int flags, spcs_s_info_t *kstatusp, pid_t pid, - spcs_s_info_t ustatus) -{ - _ii_info_t *xip; - int rc; - int rtype; - - if ((ip->bi_flags & DSW_COPYINGP) != 0) { - _ii_ioctl_done(ip); - mutex_exit(&ip->bi_mutex); - return (_ii_spcs_s_ocopyoutf(kstatusp, ustatus, DSW_ECOPYING)); - } - - if (ip->bi_flags & DSW_OFFLINE) { - _ii_ioctl_done(ip); - mutex_exit(&ip->bi_mutex); - return (_ii_spcs_s_ocopyoutf(kstatusp, ustatus, DSW_EOFFLINE)); - } - - if ((ip->bi_flags & (DSW_SHDIMPORT|DSW_SHDEXPORT)) != 0) { - _ii_ioctl_done(ip); - mutex_exit(&ip->bi_mutex); - return (_ii_spcs_s_ocopyoutf(kstatusp, ustatus, - DSW_EISEXPORTED)); - } - - if ((flags & CV_SHD2MST) == CV_SHD2MST) { - if ((ip->bi_flags & DSW_COPYINGM) != 0) { - _ii_ioctl_done(ip); - mutex_exit(&ip->bi_mutex); - return (_ii_spcs_s_ocopyoutf(kstatusp, ustatus, - DSW_ECOPYING)); - } - /* check if any sibling shadow is copying towards this master */ - for (xip = ip->bi_head; xip; xip = xip->bi_sibling) { - if (ip != xip && (xip->bi_flags & DSW_COPYINGS) != 0) { - _ii_ioctl_done(ip); - mutex_exit(&ip->bi_mutex); - return (_ii_spcs_s_ocopyoutf(kstatusp, ustatus, - DSW_ECOPYING)); - } - } - } - - if (((flags & CV_SHD2MST) == 0) && - ((ip->bi_flags & DSW_COPYINGS) != 0)) { - _ii_ioctl_done(ip); - mutex_exit(&ip->bi_mutex); - return (_ii_spcs_s_ocopyoutf(kstatusp, ustatus, DSW_ECOPYING)); - } - - if (ip->bi_flags & DSW_TREEMAP) { - if ((ip->bi_flags & DSW_OVERFLOW) && (flags & CV_SHD2MST)) { - _ii_ioctl_done(ip); - mutex_exit(&ip->bi_mutex); - return (_ii_spcs_s_ocopyoutf(kstatusp, ustatus, - DSW_EINCOMPLETE)); - } - } - - /* Assure that no other PID owns this copy/update */ - if (ip->bi_locked_pid == 0) { - if (flags & CV_LOCK_PID) - ip->bi_locked_pid = pid; - } else if (ip->bi_locked_pid != pid) { - _ii_ioctl_done(ip); - mutex_exit(&ip->bi_mutex); - return (spcs_s_ocopyoutf(kstatusp, ustatus, DSW_EINUSE)); - } - - mutex_exit(&ip->bi_mutex); - - rtype = MSTR|SHDR|BMP; - if ((rc = _ii_rsrv_devs(ip, rtype, II_INTERNAL)) != 0) { - mutex_enter(&ip->bi_mutex); - _ii_ioctl_done(ip); - mutex_exit(&ip->bi_mutex); - spcs_s_add(*kstatusp, rc); - return (_ii_spcs_s_ocopyoutf(kstatusp, ustatus, - DSW_ERSRVFAIL)); - } - - if (ii_update_denied(ip, *kstatusp, flags & CV_SHD2MST, 0)) { - mutex_enter(&ip->bi_mutex); - _ii_ioctl_done(ip); - mutex_exit(&ip->bi_mutex); - _ii_rlse_devs(ip, rtype); - return (_ii_spcs_s_ocopyoutf(kstatusp, ustatus, - DSW_EOPACKAGE)); - } - - return (0); -} - -static int -_ii_do_copy(_ii_info_t *ip, int flags, spcs_s_info_t kstatus, int waitflag) -{ - int rc = 0; - int rtype = MSTR|SHDR|BMP; - _ii_overflow_t *op; - int quick_update = 0; - - waitflag = (waitflag != 0); - /* - * a copy of a tree-mapped device must be downgraded to - * an update. - */ - if (ip->bi_flags & DSW_TREEMAP) - flags |= CV_BMP_ONLY; - - /* - * If we want to update the dependent shadow we only need to zero - * the shadow bitmap. - */ - - if (((ip->bi_flags & DSW_GOLDEN) == 0) && - (flags & (CV_BMP_ONLY|CV_SHD2MST)) == CV_BMP_ONLY) { - - DTRACE_PROBE(DEPENDENT); - - /* assign updating time */ - ip->bi_mtime = ddi_get_time(); - - if (ip->bi_flags & DSW_TREEMAP) { - DTRACE_PROBE(COMPACT_DEPENDENT); - - if (ip->bi_overflow && - (ip->bi_overflow->ii_flags & IIO_VOL_UPDATE) == 0) { - /* attempt to do a quick update */ - quick_update = 1; - ip->bi_overflow->ii_flags |= IIO_VOL_UPDATE; - ip->bi_overflow->ii_detachcnt = 1; - } - - rc = ii_tinit(ip); - - if (quick_update && ip->bi_overflow) { - /* clean up */ - ip->bi_overflow->ii_flags &= ~(IIO_VOL_UPDATE); - ip->bi_overflow->ii_detachcnt = 0; - } - } - - if (rc == 0) - rc = II_ZEROBM(ip); /* update copy of shadow */ - if (((op = ip->bi_overflow) != NULL) && - (op->ii_hversion >= 1) && (op->ii_hmagic == II_OMAGIC)) { - mutex_enter(&_ii_overflow_mutex); - if (ip->bi_flags & DSW_OVRHDRDRTY) { - mutex_enter(&ip->bi_mutex); - ip->bi_flags &= ~DSW_OVRHDRDRTY; - mutex_exit(&ip->bi_mutex); - ASSERT(op->ii_urefcnt > 0); - op->ii_urefcnt--; - } - if (op->ii_urefcnt == 0) { - op->ii_flags &= ~IIO_CNTR_INVLD; - op->ii_unused = op->ii_nchunks - 1; - } - mutex_exit(&_ii_overflow_mutex); - } - mutex_enter(&ip->bi_mutex); - II_FLAG_CLR(DSW_OVERFLOW, ip); - mutex_exit(&ip->bi_mutex); - - _ii_unlock_chunk(ip, II_NULLCHUNK); - mutex_enter(&ip->bi_mutex); - _ii_ioctl_done(ip); - mutex_exit(&ip->bi_mutex); - _ii_rlse_devs(ip, rtype); - if (rc) { - spcs_s_add(kstatus, rc); - return (DSW_EIO); - } else { - DTRACE_PROBE(_ii_do_copy_end); - return (0); - } - } - - /* - * need to perform an actual copy. - */ - - /* - * Perform bitmap copy if asked or from dependent shadow to master. - */ - if ((flags & CV_BMP_ONLY) || - ((flags & CV_SHD2MST) && - ((ip->bi_flags & DSW_GOLDEN) == 0))) { - DTRACE_PROBE(INDEPENDENT_fast); - rc = II_ORBM(ip); /* save shadow bits for copy */ - } else { - DTRACE_PROBE(INDEPENDENT_slow); - rc = ii_fill_copy_bmp(ip); /* set bits for independent copy */ - } - if (rc == 0) - rc = II_ZEROBM(ip); - _ii_unlock_chunk(ip, II_NULLCHUNK); - if (rc == 0) { - mutex_enter(&ip->bi_mutex); - if (ip->bi_flags & (DSW_COPYINGP | DSW_SHDEXPORT)) { - rc = (ip->bi_flags & DSW_COPYINGP) - ? DSW_ECOPYING : DSW_EISEXPORTED; - - _ii_ioctl_done(ip); - mutex_exit(&ip->bi_mutex); - _ii_rlse_devs(ip, rtype); - return (rc); - } - - /* assign copying time */ - ip->bi_mtime = ddi_get_time(); - - if (flags & CV_SHD2MST) - II_FLAG_SET(DSW_COPYINGS | DSW_COPYINGP, ip); - else - II_FLAG_SET(DSW_COPYINGM | DSW_COPYINGP, ip); - mutex_exit(&ip->bi_mutex); - rc = _ii_copyvol(ip, (flags & CV_SHD2MST), - rtype, kstatus, waitflag); - } else { - mutex_enter(&ip->bi_mutex); - _ii_ioctl_done(ip); - mutex_exit(&ip->bi_mutex); - } - - if (waitflag) - _ii_rlse_devs(ip, rtype); - - return (rc); -} - -/* - * _ii_copy - * Copy or update (take snapshot) II volume. - * - * Calling/Exit State: - * Returns 0 if the operation succeeded. Otherwise an error code - * is returned and any additional error information is copied - * out to the user. - */ - -int -_ii_copy(intptr_t arg, int ilp32, int *rvp) -{ - dsw_ioctl_t ucopy; - dsw_ioctl32_t ucopy32; - _ii_info_t *ip; - int rc = 0; - spcs_s_info_t kstatus; - - *rvp = 0; - - if (ilp32) { - if (copyin((void *)arg, &ucopy32, sizeof (ucopy32)) < 0) - return (EFAULT); - II_TAIL_COPY(ucopy, ucopy32, shadow_vol, dsw_ioctl_t); - ucopy.status = (spcs_s_info_t)ucopy32.status; - } else if (copyin((void *)arg, &ucopy, sizeof (ucopy)) < 0) - return (EFAULT); - - kstatus = spcs_s_kcreate(); - if (kstatus == NULL) - return (ENOMEM); - - if (!ucopy.shadow_vol[0]) - return (spcs_s_ocopyoutf(&kstatus, ucopy.status, DSW_EEMPTY)); - - mutex_enter(&_ii_info_mutex); - ip = _ii_find_set(ucopy.shadow_vol); - mutex_exit(&_ii_info_mutex); - if (ip == NULL) - return (spcs_s_ocopyoutf(&kstatus, ucopy.status, - DSW_ENOTFOUND)); - - /* Check that the copy/update makes sense */ - if ((rc = _ii_chk_copy(ip, ucopy.flags, &kstatus, ucopy.pid, - ucopy.status)) == 0) { - /* perform the copy */ - _ii_lock_chunk(ip, II_NULLCHUNK); - /* _ii_do_copy() calls _ii_ioctl_done() */ - rc = _ii_do_copy(ip, ucopy.flags, kstatus, 1); - return (spcs_s_ocopyoutf(&kstatus, ucopy.status, rc)); - } - - return (rc); -} - -/* - * _ii_mass_copy - * Copies/updates the sets pointed to in the ipa array. - * - * Calling/Exit State: - * Returns 0 if the operations was successful. Otherwise an - * error code. - */ -int -_ii_mass_copy(_ii_info_t **ipa, dsw_aioctl_t *ucopy, int wait) -{ - int i; - int rc = 0; - int failed; - int rtype = MSTR|SHDR|BMP; - _ii_info_t *ip; - spcs_s_info_t kstatus; - - kstatus = spcs_s_kcreate(); - if (kstatus == NULL) - return (ENOMEM); - - /* Check copy validitity */ - for (i = 0; i < ucopy->count; i++) { - ip = ipa[i]; - - rc = _ii_chk_copy(ip, ucopy->flags, &kstatus, ucopy->pid, - ucopy->status); - - if (rc) { - /* Clean up the mess */ - - DTRACE_PROBE1(_ii_mass_copy_end1, int, rc); - - /* - * The array ipa now looks like: - * 0..(i-1): needs mutex_enter/ioctl_done/mutex_exit - * i: needs nothing (_ii_chk_copy does cleanup) - * (i+1)..n: needs just ioctl_done/mutex_exit - */ - - failed = i; - - for (i = 0; i < failed; i++) { - mutex_enter(&(ipa[i]->bi_mutex)); - _ii_ioctl_done(ipa[i]); - mutex_exit(&(ipa[i]->bi_mutex)); - _ii_rlse_devs(ipa[i], rtype); - } - - /* skip 'failed', start with failed + 1 */ - - for (i = failed + 1; i < ucopy->count; i++) { - _ii_ioctl_done(ipa[i]); - mutex_exit(&(ipa[i]->bi_mutex)); - } - - return (rc); - } - } - - /* Check for duplicate shadows in same II group */ - if (ucopy->flags & CV_SHD2MST) { - /* Reset the state of all masters */ - for (i = 0; i < ucopy->count; i++) { - ip = ipa[i]; - ip->bi_master->bi_state &= ~DSW_MSTTARGET; - } - - for (i = 0; i < ucopy->count; i++) { - ip = ipa[i]; - /* - * Check the state of the master. If DSW_MSTTARGET is - * set, it's because this master is attached to another - * shadow within this set. - */ - if (ip->bi_master->bi_state & DSW_MSTTARGET) { - rc = EINVAL; - break; - } - - /* - * Set the DSW_MSTTARGET bit on the master associated - * with this shadow. This will allow us to detect - * multiple shadows pointing to this master within - * this loop. - */ - ip->bi_master->bi_state |= DSW_MSTTARGET; - } - } - - /* Handle error */ - if (rc) { - DTRACE_PROBE1(_ii_mass_copy_end2, int, rc); - for (i = 0; i < ucopy->count; i++) { - ip = ipa[i]; - - _ii_rlse_devs(ip, rtype); - - mutex_enter(&ip->bi_mutex); - _ii_ioctl_done(ip); - mutex_exit(&ip->bi_mutex); - } - - return (spcs_s_ocopyoutf(&kstatus, ucopy->status, rc)); - } - - /* Lock bitmaps & prepare counts */ - for (i = 0; i < ucopy->count; i++) { - ip = ipa[i]; - _ii_lock_chunk(ip, II_NULLCHUNK); - if (ip->bi_overflow) { - ip->bi_overflow->ii_detachcnt = 0; - } - } - - /* determine which volumes we're dealing with */ - for (i = 0; i < ucopy->count; i++) { - ip = ipa[i]; - if (ip->bi_overflow) { - ip->bi_overflow->ii_flags |= IIO_VOL_UPDATE; - if ((ucopy->flags & (CV_BMP_ONLY|CV_SHD2MST)) == - CV_BMP_ONLY) { - ++ip->bi_overflow->ii_detachcnt; - } - } - } - - /* Perform copy */ - for (i = 0; i < ucopy->count; i++) { - ip = ipa[i]; - rc = _ii_do_copy(ip, ucopy->flags, kstatus, wait); - /* Hum... what to do if one of these fails? */ - } - - /* clear out flags so as to prevent any accidental reuse */ - for (i = 0; i < ucopy->count; i++) { - ip = ipa[i]; - if (ip->bi_overflow) - ip->bi_overflow->ii_flags &= ~(IIO_VOL_UPDATE); - } - - /* - * We can only clean up the kstatus structure if there are - * no waiters. If someone's waiting for the information, - * _ii_copyvolp() uses spcs_s_add to write to kstatus. Panic - * would ensue if we freed it up now. - */ - if (!wait) - rc = spcs_s_ocopyoutf(&kstatus, ucopy->status, rc); - - return (rc); -} - -/* - * _ii_list_copy - * Retrieve a list from a character array and use _ii_mass_copy to - * initiate a copy/update operation on all of the specified sets. - * - * Calling/Exit State: - * Returns 0 if the operations was successful. Otherwise an - * error code. - */ -int -_ii_list_copy(char *list, dsw_aioctl_t *ucopy, int wait) -{ - int i; - int rc = 0; - char *name; - _ii_info_t *ip; - _ii_info_t **ipa; - - ipa = kmem_zalloc(sizeof (_ii_info_t *) * ucopy->count, KM_SLEEP); - - /* Reserve devices */ - name = list; - mutex_enter(&_ii_info_mutex); - for (i = 0; i < ucopy->count; i++, name += DSW_NAMELEN) { - ip = _ii_find_set(name); - - if (ip == NULL) { - rc = DSW_ENOTFOUND; - break; - } - - ipa[i] = ip; - } - - if (rc != 0) { - /* Failed to find all sets, release those we do have */ - while (i-- > 0) { - ip = ipa[i]; - mutex_enter(&ip->bi_mutex); - _ii_ioctl_done(ip); - mutex_exit(&ip->bi_mutex); - } - } else { - /* Begin copy operation */ - rc = _ii_mass_copy(ipa, ucopy, wait); - } - - mutex_exit(&_ii_info_mutex); - - kmem_free(ipa, sizeof (_ii_info_t *) * ucopy->count); - - return (rc); -} - -/* - * _ii_group_copy - * Retrieve list of sets in a group and use _ii_mass_copy to initiate - * a copy/update of all of them. - * - * Calling/Exit State: - * Returns 0 if the operations was successful. Otherwise an - * error code. - */ -int -_ii_group_copy(char *name, dsw_aioctl_t *ucopy, int wait) -{ - int i; - int rc; - uint64_t hash; - _ii_info_t **ipa; - _ii_lsthead_t *head; - _ii_lstinfo_t *np; - - /* find group */ - hash = nsc_strhash(name); - - mutex_enter(&_ii_group_mutex); - - for (head = _ii_group_top; head; head = head->lst_next) { - if (hash == head->lst_hash && strncmp(head->lst_name, - name, DSW_NAMELEN) == 0) - break; - } - - if (!head) { - mutex_exit(&_ii_group_mutex); - DTRACE_PROBE(_ii_group_copy); - return (DSW_EGNOTFOUND); - } - - /* Count entries */ - for (ucopy->count = 0, np = head->lst_start; np; np = np->lst_next) - ++ucopy->count; - - if (ucopy->count == 0) { - mutex_exit(&_ii_group_mutex); - return (DSW_EGNOTFOUND); - } - - ipa = kmem_zalloc(sizeof (_ii_info_t *) * ucopy->count, KM_SLEEP); - if (ipa == NULL) { - mutex_exit(&_ii_group_mutex); - return (ENOMEM); - } - - /* Create list */ - mutex_enter(&_ii_info_mutex); - np = head->lst_start; - for (i = 0; i < ucopy->count; i++) { - ASSERT(np != 0); - - ipa[i] = np->lst_ip; - - mutex_enter(&ipa[i]->bi_mutex); - ipa[i]->bi_ioctl++; - - np = np->lst_next; - } - - /* Begin copy operation */ - rc = _ii_mass_copy(ipa, ucopy, wait); - - mutex_exit(&_ii_info_mutex); - mutex_exit(&_ii_group_mutex); - - kmem_free(ipa, sizeof (_ii_info_t *) * ucopy->count); - - return (rc); -} - -/* - * _ii_acopy - * Copy or update (take snapshot) II multiple volumes. - * - * Calling/Exit State: - * Returns 0 if the operation succeeded. Otherwise an error code - * is returned and any additional error information is copied - * out to the user. - */ -int -_ii_acopy(intptr_t arg, int ilp32, int *rvp) -{ - int rc; - size_t name_offset; - char *list; - char *nptr; - char name[DSW_NAMELEN]; - dsw_aioctl_t ucopy; - dsw_aioctl32_t ucopy32; - spcs_s_info_t kstatus; - - *rvp = 0; - - name_offset = offsetof(dsw_aioctl_t, shadow_vol[0]); - - if (ilp32) { - if (copyin((void *)arg, &ucopy32, sizeof (ucopy32)) < 0) - return (EFAULT); - II_TAIL_COPY(ucopy, ucopy32, flags, dsw_ioctl_t); - ucopy.status = (spcs_s_info_t)ucopy32.status; - name_offset = offsetof(dsw_aioctl32_t, shadow_vol[0]); - } else if (copyin((void *)arg, &ucopy, sizeof (ucopy)) < 0) - return (EFAULT); - - kstatus = spcs_s_kcreate(); - - if (kstatus == NULL) - return (ENOMEM); - - nptr = (char *)arg + name_offset; - rc = 0; - - if (ucopy.flags & CV_IS_GROUP) { - if (copyin(nptr, name, DSW_NAMELEN) < 0) - return (spcs_s_ocopyoutf(&kstatus, ucopy.status, - EFAULT)); - - /* kstatus information is handled within _ii_group_copy */ - rc = _ii_group_copy(name, &ucopy, 0); - } else if (ucopy.count > 0) { - list = kmem_alloc(DSW_NAMELEN * ucopy.count, KM_SLEEP); - - if (list == NULL) - return (spcs_s_ocopyoutf(&kstatus, ucopy.status, - ENOMEM)); - - if (copyin(nptr, list, DSW_NAMELEN * ucopy.count) < 0) - return (spcs_s_ocopyoutf(&kstatus, ucopy.status, - EFAULT)); - - rc = _ii_list_copy(list, &ucopy, 0); - kmem_free(list, DSW_NAMELEN * ucopy.count); - } - - return (spcs_s_ocopyoutf(&kstatus, ucopy.status, rc)); -} - -/* - * _ii_bitsset - * Copy out II pair bitmaps to user program - * - * Calling/Exit State: - * Returns 0 if the operation succeeded. Otherwise an error code - * is returned and any additional error information is copied - * out to the user. - */ -int -_ii_bitsset(intptr_t arg, int ilp32, int cmd, int *rvp) -{ - dsw_bitsset_t ubitsset; - dsw_bitsset32_t ubitsset32; - nsc_size_t nbitsset; - _ii_info_t *ip; - int rc; - spcs_s_info_t kstatus; - int bitmap_size; - - *rvp = 0; - - if (ilp32) { - if (copyin((void *)arg, &ubitsset32, sizeof (ubitsset32))) - return (EFAULT); - ubitsset.status = (spcs_s_info_t)ubitsset32.status; - bcopy(ubitsset32.shadow_vol, ubitsset.shadow_vol, DSW_NAMELEN); - } else if (copyin((void *)arg, &ubitsset, sizeof (ubitsset))) - return (EFAULT); - - kstatus = spcs_s_kcreate(); - if (kstatus == NULL) - return (ENOMEM); - - if (!ubitsset.shadow_vol[0]) - return (spcs_s_ocopyoutf(&kstatus, ubitsset.status, - DSW_EEMPTY)); - - mutex_enter(&_ii_info_mutex); - ip = _ii_find_set(ubitsset.shadow_vol); - mutex_exit(&_ii_info_mutex); - if (ip == NULL) - return (spcs_s_ocopyoutf(&kstatus, ubitsset.status, - DSW_ENOTFOUND)); - - mutex_exit(&ip->bi_mutex); - - if ((rc = _ii_rsrv_devs(ip, BMP, II_INTERNAL)) != 0) { - mutex_enter(&ip->bi_mutex); - _ii_ioctl_done(ip); - mutex_exit(&ip->bi_mutex); - spcs_s_add(kstatus, rc); - return (spcs_s_ocopyoutf(&kstatus, ubitsset.status, - DSW_ERSRVFAIL)); - } - - ubitsset.tot_size = ip->bi_size / DSW_SIZE; - if ((ip->bi_size % DSW_SIZE) != 0) - ++ubitsset.tot_size; - bitmap_size = (ubitsset.tot_size + 7) / 8; - if (cmd == DSWIOC_SBITSSET) - rc = II_CNT_BITS(ip, ip->bi_shdfba, &nbitsset, bitmap_size); - else - rc = II_CNT_BITS(ip, ip->bi_copyfba, &nbitsset, bitmap_size); - ubitsset.tot_set = nbitsset; - _ii_rlse_devs(ip, BMP); - mutex_enter(&ip->bi_mutex); - _ii_ioctl_done(ip); - mutex_exit(&ip->bi_mutex); - if (rc) { - spcs_s_add(kstatus, rc); - return (spcs_s_ocopyoutf(&kstatus, ubitsset.status, DSW_EIO)); - } - - spcs_s_kfree(kstatus); - /* return the fetched names to the user */ - if (ilp32) { - ubitsset32.status = (spcs_s_info32_t)ubitsset.status; - ubitsset32.tot_size = ubitsset.tot_size; - ubitsset32.tot_set = ubitsset.tot_set; - rc = copyout(&ubitsset32, (void *)arg, sizeof (ubitsset32)); - } else { - rc = copyout(&ubitsset, (void *)arg, sizeof (ubitsset)); - } - - return (rc); -} - -/* - * _ii_stopvol - * Stop any copying process for shadow, and stop shadowing - * - */ - -static void -_ii_stopvol(_ii_info_t *ip) -{ - nsc_path_t *mst_tok; - nsc_path_t *mstr_tok; - nsc_path_t *shd_tok; - nsc_path_t *shdr_tok; - nsc_path_t *bmp_tok; - int rc; - - while (_ii_stopcopy(ip) == EINTR) - ; - - DTRACE_PROBE(_ii_stopvol); - - mutex_enter(&ip->bi_mutex); - mst_tok = ip->bi_mst_tok; - mstr_tok = ip->bi_mstr_tok; - shd_tok = ip->bi_shd_tok; - shdr_tok = ip->bi_shdr_tok; - bmp_tok = ip->bi_bmp_tok; - ip->bi_shd_tok = 0; - ip->bi_shdr_tok = 0; - if (!NSHADOWS(ip)) { - ip->bi_mst_tok = 0; - ip->bi_mstr_tok = 0; - } - ip->bi_bmp_tok = 0; - - /* Wait for any _ii_open() calls to complete */ - - while (ip->bi_ioctl) { - ip->bi_state |= DSW_IOCTL; - cv_wait(&ip->bi_ioctlcv, &ip->bi_mutex); - } - mutex_exit(&ip->bi_mutex); - - rc = _ii_reserve_begin(ip); - if (rc) { - cmn_err(CE_WARN, "!_ii_stopvol: _ii_reserve_begin %d", rc); - } - if (!NSHADOWS(ip)) { - if (mst_tok) { - rc = _ii_unregister_path(mst_tok, NSC_PCATCH, - "master"); - if (rc) - cmn_err(CE_WARN, "!ii: unregister master %d", - rc); - } - - if (mstr_tok) { - rc = _ii_unregister_path(mstr_tok, NSC_PCATCH, - "raw master"); - if (rc) - cmn_err(CE_WARN, "!ii: unregister raw " - "master %d", rc); - } - } - - if (shd_tok) { - rc = _ii_unregister_path(shd_tok, NSC_PCATCH, "shadow"); - if (rc) - cmn_err(CE_WARN, "!ii: unregister shadow %d", rc); - } - - if (shdr_tok) { - rc = _ii_unregister_path(shdr_tok, NSC_PCATCH, "raw shadow"); - if (rc) - cmn_err(CE_WARN, "!ii: unregister raw shadow %d", rc); - } - - if (bmp_tok) { - rc = _ii_unregister_path(bmp_tok, NSC_PCATCH, "bitmap"); - if (rc) - cmn_err(CE_WARN, "!ii: unregister bitmap %d", rc); - } - _ii_reserve_end(ip); - - /* Wait for all necessary _ii_close() calls to complete */ - mutex_enter(&ip->bi_mutex); - - while (total_ref(ip) != 0) { - ip->bi_state |= DSW_CLOSING; - cv_wait(&ip->bi_closingcv, &ip->bi_mutex); - } - if (!NSHADOWS(ip)) { - nsc_set_owner(ip->bi_mstfd, NULL); - nsc_set_owner(ip->bi_mstrfd, NULL); - } - nsc_set_owner(ip->bi_shdfd, NULL); - nsc_set_owner(ip->bi_shdrfd, NULL); - mutex_exit(&ip->bi_mutex); - -} - - -/* - * _ii_ioctl_done - * If this is the last one to complete, wakeup all processes waiting - * for ioctls to complete - * - */ - -static void -_ii_ioctl_done(_ii_info_t *ip) -{ - ASSERT(ip->bi_ioctl > 0); - ip->bi_ioctl--; - if (ip->bi_ioctl == 0 && (ip->bi_state & DSW_IOCTL)) { - ip->bi_state &= ~DSW_IOCTL; - cv_broadcast(&ip->bi_ioctlcv); - } - -} - -/* - * _ii_find_vol - * Search the configured shadows list for the supplied volume. - * If found, flag an ioctl in progress and return the locked _ii_info_t. - * - * The caller must check to see if the bi_disable flag is set and - * treat it appropriately. - * - * ASSUMPTION: - * _ii_info_mutex must be locked prior to calling this function - * - */ - -static _ii_info_t * -_ii_find_vol(char *volume, int vol) -{ - _ii_info_t **xip, *ip; - - for (xip = &_ii_info_top; *xip; xip = &(*xip)->bi_next) { - if ((*xip)->bi_disabled) - continue; - if (strcmp(volume, vol == MST ? ii_pathname((*xip)->bi_mstfd) : - (*xip)->bi_keyname) == 0) { - break; - } - } - - if (!*xip) { - DTRACE_PROBE(VolNotFound); - return (NULL); - } - - ip = *xip; - if (!ip->bi_shd_tok && ((ip->bi_flags & DSW_SHDEXPORT) == 0)) { - /* Not fully configured until bi_shd_tok is set */ - DTRACE_PROBE(SetNotConfiged); - return (NULL); - - } - mutex_enter(&ip->bi_mutex); - ip->bi_ioctl++; - - return (ip); -} - -static _ii_info_t * -_ii_find_set(char *volume) -{ - return (_ii_find_vol(volume, SHD)); -} - -/* - * _ii_find_overflow - * Search the configured shadows list for the supplied overflow volume. - * - */ - -static _ii_overflow_t * -_ii_find_overflow(char *volume) -{ - _ii_overflow_t **xop, *op; - - mutex_enter(&_ii_overflow_mutex); - - DTRACE_PROBE(_ii_find_overflowmutex); - - for (xop = &_ii_overflow_top; *xop; xop = &(*xop)->ii_next) { - if (strcmp(volume, (*xop)->ii_volname) == 0) { - break; - } - } - - if (!*xop) { - mutex_exit(&_ii_overflow_mutex); - return (NULL); - } - - op = *xop; - mutex_exit(&_ii_overflow_mutex); - - return (op); -} - -/* - * _ii_bm_header_get - * Fetch the bitmap volume header - * - */ - -ii_header_t * -_ii_bm_header_get(_ii_info_t *ip, nsc_buf_t **tmp) -{ - ii_header_t *hdr; - nsc_off_t read_fba; - int rc; - - ASSERT(ip->bi_bmprsrv); /* assert bitmap is reserved */ - ASSERT(MUTEX_HELD(&ip->bi_mutex)); - - if ((ip->bi_flags & DSW_BMPOFFLINE) != 0) - return (NULL); - - *tmp = NULL; - read_fba = 0; - - II_READ_START(ip, bitmap); - rc = nsc_alloc_buf(ip->bi_bmpfd, read_fba, - FBA_LEN(sizeof (ii_header_t)), NSC_RDWRBUF, tmp); - II_READ_END(ip, bitmap, rc, FBA_LEN(sizeof (ii_header_t))); - if (!II_SUCCESS(rc)) { - if (ii_debug > 2) - cmn_err(CE_WARN, "!ii: nsc_alloc_buf returned 0x%x", - rc); - if (*tmp) - (void) nsc_free_buf(*tmp); - *tmp = NULL; - mutex_exit(&ip->bi_mutex); - _ii_error(ip, DSW_BMPOFFLINE); - mutex_enter(&ip->bi_mutex); - return (NULL); - } - - hdr = (ii_header_t *)(*tmp)->sb_vec[0].sv_addr; - - return (hdr); -} - - -/* - * _ii_bm_header_free - * Free the bitmap volume header - * - */ - -/* ARGSUSED */ - -void -_ii_bm_header_free(ii_header_t *hdr, _ii_info_t *ip, nsc_buf_t *tmp) -{ - (void) nsc_free_buf(tmp); - -} - -/* - * _ii_bm_header_put - * Write out the modified bitmap volume header and free it - * - */ - -/* ARGSUSED */ - -int -_ii_bm_header_put(ii_header_t *hdr, _ii_info_t *ip, nsc_buf_t *tmp) -{ - nsc_off_t write_fba; - int rc; - - ASSERT(MUTEX_HELD(&ip->bi_mutex)); - - write_fba = 0; - - II_NSC_WRITE(ip, bitmap, rc, tmp, write_fba, - FBA_LEN(sizeof (ii_header_t)), 0); - - (void) nsc_free_buf(tmp); - if (!II_SUCCESS(rc)) { - mutex_exit(&ip->bi_mutex); - _ii_error(ip, DSW_BMPOFFLINE); - mutex_enter(&ip->bi_mutex); - DTRACE_PROBE(_ii_bm_header_put); - return (rc); - } else { - DTRACE_PROBE(_ii_bm_header_put_end); - return (0); - } -} - -/* - * _ii_flag_op - * Clear or set a flag in bi_flags and dsw_state. - * This relies on the ownership of the header block's nsc_buf - * for locking. - * - */ - -void -_ii_flag_op(and, or, ip, update) -int and, or; -_ii_info_t *ip; -int update; -{ - ii_header_t *bm_header; - nsc_buf_t *tmp; - - ip->bi_flags &= and; - ip->bi_flags |= or; - - if (update == TRUE) { - - /* - * No point trying to access bitmap header if it's offline - * or has been disassociated from set via DSW_HANGING - */ - if ((ip->bi_flags & (DSW_BMPOFFLINE|DSW_HANGING)) == 0) { - bm_header = _ii_bm_header_get(ip, &tmp); - if (bm_header == NULL) { - if (tmp) - (void) nsc_free_buf(tmp); - DTRACE_PROBE(_ii_flag_op_end); - return; - } - bm_header->ii_state &= and; - bm_header->ii_state |= or; - /* copy over the mtime */ - bm_header->ii_mtime = ip->bi_mtime; - (void) _ii_bm_header_put(bm_header, ip, tmp); - } - } - -} - -/* - * _ii_nsc_io - * Perform read or write on an underlying nsc device - * fd - nsc file descriptor - * flag - nsc io direction and characteristics flag - * fba_pos - offset from beginning of device in FBAs - * io_addr - pointer to data buffer - * io_len - length of io in bytes - */ - -int -_ii_nsc_io(_ii_info_t *ip, int ks, nsc_fd_t *fd, int flag, nsc_off_t fba_pos, - unsigned char *io_addr, nsc_size_t io_len) -{ - nsc_buf_t *tmp = NULL; - nsc_vec_t *vecp; - uchar_t *vaddr; - size_t copy_len; - int64_t vlen; - int rc; - nsc_size_t fba_req, fba_len; - nsc_size_t maxfbas = 0; - nsc_size_t tocopy; - unsigned char *toaddr; - - rc = nsc_maxfbas(fd, 0, &maxfbas); - if (!II_SUCCESS(rc)) { -#ifdef DEBUG - cmn_err(CE_WARN, "!_ii_nsc_io: maxfbas failed (%d)", rc); -#endif - maxfbas = DSW_CBLK_FBA; - } - - toaddr = io_addr; - fba_req = FBA_LEN(io_len); - -#ifdef DEBUG_SPLIT_IO - cmn_err(CE_NOTE, "!_ii_nsc_io: maxfbas = %08x", maxfbas); - cmn_err(CE_NOTE, "!_ii_nsc_io: toaddr=%08x, io_len=%08x, fba_req=%08x", - toaddr, io_len, fba_req); -#endif - -loop: - tmp = NULL; - fba_len = min(fba_req, maxfbas); - tocopy = min(io_len, FBA_SIZE(fba_len)); - - DTRACE_PROBE2(_ii_nsc_io_buffer, nsc_off_t, fba_pos, - nsc_size_t, fba_len); - -#ifdef DEBUG_SPLIT_IO - cmn_err(CE_NOTE, "!_ii_nsc_io: fba_pos=%08x, fba_len=%08x", - fba_pos, fba_len); -#endif - -#ifndef DISABLE_KSTATS - if (flag & NSC_READ) { - switch (ks) { - case KS_MST: - II_READ_START(ip, master); - break; - case KS_SHD: - II_READ_START(ip, shadow); - break; - case KS_BMP: - II_READ_START(ip, bitmap); - break; - case KS_OVR: - II_READ_START(ip, overflow); - break; - default: - cmn_err(CE_WARN, "!Invalid kstats type %d", ks); - break; - } - } -#endif - - rc = nsc_alloc_buf(fd, fba_pos, fba_len, flag, &tmp); - -#ifndef DISABLE_KSTATS - if (flag & NSC_READ) { - switch (ks) { - case KS_MST: - II_READ_END(ip, master, rc, fba_len); - break; - case KS_SHD: - II_READ_END(ip, shadow, rc, fba_len); - break; - case KS_BMP: - II_READ_END(ip, bitmap, rc, fba_len); - break; - case KS_OVR: - II_READ_END(ip, overflow, rc, fba_len); - break; - } - } -#endif - - if (!II_SUCCESS(rc)) { - if (tmp) { - (void) nsc_free_buf(tmp); - } - - return (EIO); - } - - if ((flag & (NSC_WRITE|NSC_READ)) == NSC_WRITE && - (FBA_OFF(io_len) != 0)) { - /* - * Not overwriting all of the last FBA, so read in the - * old contents now before we overwrite it with the new - * data. - */ -#ifdef DEBUG_SPLIT_IO - cmn_err(CE_NOTE, "!_ii_nsc_io: Read-B4-Write %08x", - fba_pos+FBA_NUM(io_len)); -#endif - -#ifdef DISABLE_KSTATS - rc = nsc_read(tmp, fba_pos+FBA_NUM(io_len), 1, 0); -#else - switch (ks) { - case KS_MST: - II_NSC_READ(ip, master, rc, tmp, - fba_pos+FBA_NUM(io_len), 1, 0); - break; - case KS_SHD: - II_NSC_READ(ip, shadow, rc, tmp, - fba_pos+FBA_NUM(io_len), 1, 0); - break; - case KS_BMP: - II_NSC_READ(ip, bitmap, rc, tmp, - fba_pos+FBA_NUM(io_len), 1, 0); - break; - case KS_OVR: - II_NSC_READ(ip, overflow, rc, tmp, - fba_pos+FBA_NUM(io_len), 1, 0); - break; - case KS_NA: - rc = nsc_read(tmp, fba_pos+FBA_NUM(io_len), 1, 0); - break; - default: - cmn_err(CE_WARN, "!Invalid kstats type %d", ks); - rc = nsc_read(tmp, fba_pos+FBA_NUM(io_len), 1, 0); - break; - } -#endif - if (!II_SUCCESS(rc)) { - (void) nsc_free_buf(tmp); - return (EIO); - } - } - - vecp = tmp->sb_vec; - vlen = vecp->sv_len; - vaddr = vecp->sv_addr; - - while (tocopy > 0) { - if (vecp->sv_addr == 0 || vecp->sv_len == 0) { -#ifdef DEBUG - cmn_err(CE_WARN, "!_ii_nsc_io: ran off end of handle"); -#endif - break; - } - - copy_len = (size_t)min(vlen, tocopy); - - DTRACE_PROBE1(_ii_nsc_io_bcopy, size_t, copy_len); - - if (flag & NSC_WRITE) - bcopy(io_addr, vaddr, copy_len); - else - bcopy(vaddr, io_addr, copy_len); - - toaddr += copy_len; - tocopy -= copy_len; - io_addr += copy_len; - io_len -= copy_len; - vaddr += copy_len; - vlen -= copy_len; - - if (vlen <= 0) { - vecp++; - vaddr = vecp->sv_addr; - vlen = vecp->sv_len; - } - } - - if (flag & NSC_WRITE) { -#ifdef DISABLE_KSTATS - rc = nsc_write(tmp, tmp->sb_pos, tmp->sb_len, 0); -#else - switch (ks) { - case KS_MST: - II_NSC_WRITE(ip, master, rc, tmp, tmp->sb_pos, - tmp->sb_len, 0); - break; - case KS_SHD: - II_NSC_WRITE(ip, shadow, rc, tmp, tmp->sb_pos, - tmp->sb_len, 0); - break; - case KS_BMP: - II_NSC_WRITE(ip, bitmap, rc, tmp, tmp->sb_pos, - tmp->sb_len, 0); - break; - case KS_OVR: - II_NSC_WRITE(ip, overflow, rc, tmp, tmp->sb_pos, - tmp->sb_len, 0); - break; - case KS_NA: - rc = nsc_write(tmp, tmp->sb_pos, tmp->sb_len, 0); - break; - default: - cmn_err(CE_WARN, "!Invalid kstats type %d", ks); - rc = nsc_write(tmp, tmp->sb_pos, tmp->sb_len, 0); - break; - } -#endif - if (!II_SUCCESS(rc)) { - (void) nsc_free_buf(tmp); - return (rc); - } - } - - (void) nsc_free_buf(tmp); - - fba_pos += fba_len; - fba_req -= fba_len; - if (fba_req > 0) - goto loop; - - return (0); -} - - -/* - * ii_overflow_attach - */ -static int -ii_overflow_attach(_ii_info_t *ip, char *name, int first) -{ - _ii_overflow_t *op; - int rc = 0; - int reserved = 0; - int mutex_set = 0; - int II_OLD_OMAGIC = 0x426c7565; /* "Blue" */ - - mutex_enter(&_ii_overflow_mutex); - /* search for name in list */ - for (op = _ii_overflow_top; op; op = op->ii_next) { - if (strncmp(op->ii_volname, name, DSW_NAMELEN) == 0) - break; - } - if (op) { - ip->bi_overflow = op; - op->ii_crefcnt++; - op->ii_drefcnt++; - if ((op->ii_flags & IIO_CNTR_INVLD) && (op->ii_hversion >= 1)) { - if (!first) - mutex_enter(&ip->bi_mutex); - ip->bi_flags |= DSW_OVRHDRDRTY; - if (!first) - mutex_exit(&ip->bi_mutex); - op->ii_urefcnt++; - } -#ifndef DISABLE_KSTATS - ip->bi_kstat_io.overflow = op->ii_overflow; - (void) strlcpy(ip->bi_kstat_io.ovrio, op->ii_ioname, - KSTAT_DATA_CHAR_LEN); -#endif - /* write header */ - if (!(rc = nsc_reserve(op->ii_dev->bi_fd, NSC_MULTI))) { - rc = _ii_nsc_io(ip, KS_OVR, op->ii_dev->bi_fd, - NSC_WRBUF, II_OHEADER_FBA, - (unsigned char *)&op->ii_do, sizeof (op->ii_do)); - (void) nsc_release(op->ii_dev->bi_fd); - ++iigkstat.assoc_over.value.ul; - } - mutex_exit(&_ii_overflow_mutex); - return (rc); - } - if ((op = kmem_zalloc(sizeof (*op), KM_SLEEP)) == NULL) { - mutex_exit(&_ii_overflow_mutex); - return (ENOMEM); - } - if ((op->ii_dev = kmem_zalloc(sizeof (_ii_info_dev_t), KM_SLEEP)) - == NULL) { - kmem_free(op, sizeof (*op)); - mutex_exit(&_ii_overflow_mutex); - return (ENOMEM); - } -#ifndef DISABLE_KSTATS - if ((op->ii_overflow = _ii_overflow_kstat_create(ip, op))) { - ip->bi_kstat_io.overflow = op->ii_overflow; - (void) strlcpy(op->ii_ioname, ip->bi_kstat_io.ovrio, - KSTAT_DATA_CHAR_LEN); - } else { - goto fail; - } -#endif - /* open overflow volume */ - op->ii_dev->bi_fd = nsc_open(name, NSC_IIR_ID|NSC_FILE|NSC_RDWR, NULL, - (blind_t)&(op->ii_dev->bi_iodev), &rc); - if (!op->ii_dev->bi_fd) - op->ii_dev->bi_fd = nsc_open(name, - NSC_IIR_ID|NSC_DEVICE|NSC_RDWR, NULL, - (blind_t)&(op->ii_dev->bi_iodev), &rc); - if (op->ii_dev->bi_fd == NULL) { - goto fail; - } - if ((rc = nsc_reserve(op->ii_dev->bi_fd, 0)) != 0) - goto fail; - reserved = 1; - /* register path */ - op->ii_dev->bi_tok = _ii_register_path(name, NSC_DEVICE, - _ii_ior); - if (!op->ii_dev->bi_tok) { - goto fail; - } - /* read header */ - rc = _ii_nsc_io(ip, KS_OVR, op->ii_dev->bi_fd, NSC_RDBUF, - II_OHEADER_FBA, (unsigned char *)&op->ii_do, sizeof (op->ii_do)); - if (!II_SUCCESS(rc)) { - _ii_error(ip, DSW_OVROFFLINE); - goto fail; - } - /* On resume, check for old hmagic */ - if (strncmp(op->ii_volname, name, DSW_NAMELEN) || - ((op->ii_hmagic != II_OLD_OMAGIC) && - (op->ii_hmagic != II_OMAGIC))) { - rc = DSW_EOMAGIC; - goto fail; - } - /* set up counts */ - op->ii_crefcnt = 1; - op->ii_drefcnt = 0; - op->ii_urefcnt = 0; - op->ii_hmagic = II_OMAGIC; - if (!first) { - /* if header version > 0, check if header written */ - if (((op->ii_flags & IIO_HDR_WRTN) == 0) && - (op->ii_hversion >= 1)) { - op->ii_flags |= IIO_CNTR_INVLD; - mutex_enter(&ip->bi_mutex); - ip->bi_flags |= DSW_OVRHDRDRTY; - mutex_exit(&ip->bi_mutex); - op->ii_urefcnt++; - } - } - op->ii_flags &= ~IIO_HDR_WRTN; - op->ii_drefcnt++; - /* write header */ - rc = _ii_nsc_io(ip, KS_OVR, op->ii_dev->bi_fd, NSC_WRBUF, - II_OHEADER_FBA, (unsigned char *)&op->ii_do, sizeof (op->ii_do)); - nsc_release(op->ii_dev->bi_fd); - reserved = 0; - if (!II_SUCCESS(rc)) { - _ii_error(ip, DSW_OVROFFLINE); - goto fail; - } - - mutex_init(&op->ii_mutex, NULL, MUTEX_DRIVER, NULL); - mutex_set++; - - /* link onto list */ - op->ii_next = _ii_overflow_top; - _ii_overflow_top = op; - ip->bi_overflow = op; - - ++iigkstat.assoc_over.value.ul; - mutex_exit(&_ii_overflow_mutex); - - DTRACE_PROBE(_ii_overflow_attach_end); - return (0); -fail: -#ifndef DISABLE_KSTATS - /* Clean-up kstat stuff */ - if (op->ii_overflow) { - kstat_delete(op->ii_overflow); - mutex_destroy(&op->ii_kstat_mutex); - } -#endif - /* clean up mutex if we made it that far */ - if (mutex_set) { - mutex_destroy(&op->ii_mutex); - } - - if (op->ii_dev) { - if (op->ii_dev->bi_tok) { - (void) _ii_unregister_path(op->ii_dev->bi_tok, 0, - "overflow"); - } - if (reserved) - (void) nsc_release(op->ii_dev->bi_fd); - if (op->ii_dev->bi_fd) - (void) nsc_close(op->ii_dev->bi_fd); - kmem_free(op->ii_dev, sizeof (_ii_info_dev_t)); - } - kmem_free(op, sizeof (*op)); - mutex_exit(&_ii_overflow_mutex); - - return (rc); -} - -/* - * ii_overflow_free - * Assumes that ip is locked for I/O - */ -static void -ii_overflow_free(_ii_info_t *ip, int reclaim) -{ - _ii_overflow_t *op, **xp; - - if ((op = ip->bi_overflow) == NULL) - return; - ip->bi_kstat_io.overflow = NULL; - mutex_enter(&_ii_overflow_mutex); - switch (reclaim) { - case NO_RECLAIM: - if (--(op->ii_drefcnt) == 0) { - /* indicate header written */ - op->ii_flags |= IIO_HDR_WRTN; - /* write out header */ - ASSERT(op->ii_dev->bi_fd); - (void) nsc_reserve(op->ii_dev->bi_fd, NSC_MULTI); - (void) _ii_nsc_io(ip, KS_OVR, op->ii_dev->bi_fd, - NSC_WRBUF, II_OHEADER_FBA, - (unsigned char *)&op->ii_do, - sizeof (op->ii_do)); - nsc_release(op->ii_dev->bi_fd); - } - break; - case RECLAIM: - ii_reclaim_overflow(ip); - /* FALLTHRU */ - case INIT_OVR: - if (--(op->ii_drefcnt) == 0) { - /* reset to new condition, c.f. _ii_ocreate() */ - op->ii_used = 1; - op->ii_unused = op->ii_nchunks - op->ii_used; - op->ii_freehead = II_NULLNODE; - } - - /* write out header */ - ASSERT(op->ii_dev->bi_fd); - (void) nsc_reserve(op->ii_dev->bi_fd, NSC_MULTI); - (void) _ii_nsc_io(ip, KS_OVR, op->ii_dev->bi_fd, NSC_WRBUF, - II_OHEADER_FBA, (unsigned char *)&op->ii_do, - sizeof (op->ii_do)); - nsc_release(op->ii_dev->bi_fd); - } - - if (--(op->ii_crefcnt) == 0) { - /* Close fd and unlink from active chain; */ - - (void) _ii_unregister_path(op->ii_dev->bi_tok, 0, "overflow"); - (void) nsc_close(op->ii_dev->bi_fd); - - for (xp = &_ii_overflow_top; *xp && *xp != op; - xp = &((*xp)->ii_next)) - /* NULL statement */; - *xp = op->ii_next; - - if (op->ii_overflow) { - kstat_delete(op->ii_overflow); - } - - /* Clean up ii_overflow_t mutexs */ - mutex_destroy(&op->ii_kstat_mutex); - mutex_destroy(&op->ii_mutex); - - if (op->ii_dev) - kmem_free(op->ii_dev, sizeof (_ii_info_dev_t)); - kmem_free(op, sizeof (*op)); - } - ip->bi_overflow = NULL; - --iigkstat.assoc_over.value.ul; - mutex_exit(&_ii_overflow_mutex); - -} - -/* - * ii_sibling_free - * Free resources and unlink the sibling chains etc. - */ - -static void -ii_sibling_free(_ii_info_t *ip) -{ - _ii_info_t *hip, *yip; - - if (!ip) - return; - - if (ip->bi_shdr_tok) - (void) _ii_unregister_path(ip->bi_shdr_tok, 0, "raw shadow"); - - if (ip->bi_shd_tok) - (void) _ii_unregister_path(ip->bi_shd_tok, 0, "shadow"); - - rw_enter(&ip->bi_linkrw, RW_WRITER); - - ip->bi_shd_tok = NULL; - ip->bi_shdr_tok = NULL; - - if (NSHADOWS(ip)) { - mutex_enter(&_ii_info_mutex); - if (ip->bi_head == ip) { /* removing head of list */ - hip = ip->bi_sibling; - for (yip = hip; yip; yip = yip->bi_sibling) - yip->bi_head = hip; - - } else { /* removing member of list */ - hip = ip->bi_head; - for (yip = ip->bi_head; yip; yip = yip->bi_sibling) { - if (yip->bi_sibling == ip) { - yip->bi_sibling = ip->bi_sibling; - break; - } - } - } - hip->bi_master->bi_head = hip; - if (ip->bi_master == ip) { /* master I/O goes through this */ - mutex_exit(&_ii_info_mutex); - _ii_info_freeshd(ip); - rw_exit(&ip->bi_linkrw); - return; - } - mutex_exit(&_ii_info_mutex); - } else { - if (ip->bi_master != ip) /* last ref to master side ip */ - _ii_info_free(ip->bi_master); /* ==A== */ - } - - if (ip->bi_master != ip) { /* info_free ==A== will close these */ - /* - * Null out any pointers to shared master side resources - * that should only be freed once when the last reference - * to this master is freed and calls _ii_info_free(). - */ - ip->bi_mstdev = NULL; - ip->bi_mstrdev = NULL; - ip->bi_kstat_io.master = NULL; - } - rw_exit(&ip->bi_linkrw); - _ii_info_free(ip); - -} - -/* - * _ii_info_freeshd - * Free shadow side resources - * - * Calling/Exit State: - * No mutexes should be held on entry to this function. - * - * Description: - * Frees the system resources associated with the shadow - * access, leaving the master side alone. This allows the - * original master side to continue in use while there are - * outstanding references to this _ii_info_t. - */ - -static void -_ii_info_freeshd(_ii_info_t *ip) -{ - if (!ip) - return; - if ((ip->bi_flags&DSW_HANGING) == DSW_HANGING) - return; /* this work has already been completed */ - - II_FLAG_SETX(DSW_HANGING, ip); - - if (ip->bi_cluster) - (void) II_UNLINK_CLUSTER(ip); - if (ip->bi_group) - (void) II_UNLINK_GROUP(ip); - - if (ip->bi_shdfd && ip->bi_shdrsrv) - nsc_release(ip->bi_shdfd); - if (ip->bi_shdrfd && ip->bi_shdrrsrv) - nsc_release(ip->bi_shdrfd); - if (ip->bi_bmpfd && ip->bi_bmprsrv) - nsc_release(ip->bi_bmpfd); - - if (ip->bi_bmp_tok) - (void) _ii_unregister_path(ip->bi_bmp_tok, 0, "bitmap"); - - if (ip->bi_shdr_tok) - (void) _ii_unregister_path(ip->bi_shdr_tok, 0, "raw shadow"); - - if (ip->bi_shd_tok) - (void) _ii_unregister_path(ip->bi_shd_tok, 0, "shadow"); - ip->bi_shd_tok = NULL; - ip->bi_shdr_tok = NULL; - - if (ip->bi_shdfd) - (void) nsc_close(ip->bi_shdfd); - - if (ip->bi_shdrfd) - (void) nsc_close(ip->bi_shdrfd); - - if (ip->bi_bmpfd) - (void) nsc_close(ip->bi_bmpfd); - - ip->bi_shdfd = NULL; - ip->bi_shdrfd = NULL; - ip->bi_bmpfd = NULL; - - if (ip->bi_busy) - kmem_free(ip->bi_busy, - 1 + (ip->bi_size / (DSW_SIZE * DSW_BITS))); - ip->bi_busy = NULL; - - if (ip->bi_kstat_io.shadow) { - kstat_delete(ip->bi_kstat_io.shadow); - ip->bi_kstat_io.shadow = NULL; - } - if (ip->bi_kstat_io.bitmap) { - kstat_delete(ip->bi_kstat_io.bitmap); - ip->bi_kstat_io.bitmap = NULL; - } - if (ip->bi_kstat) { - kstat_delete(ip->bi_kstat); - ip->bi_kstat = NULL; - } - -} - -/* - * _ii_info_free - * Free resources - * - * Calling/Exit State: - * No mutexes should be held on entry to this function. - * - * Description: - * Frees the system resources associated with the specified - * II information structure. - */ - -static void -_ii_info_free(_ii_info_t *ip) -{ - _ii_info_t **xip; - - if (!ip) - return; - - mutex_enter(&_ii_info_mutex); - for (xip = &_ii_mst_top; *xip; xip = &((*xip)->bi_nextmst)) { - if (ip == *xip) { - *xip = ip->bi_nextmst; - break; - } - } - mutex_exit(&_ii_info_mutex); - - /* this rw_enter forces us to wait until all nsc_buffers are freed */ - rw_enter(&ip->bi_linkrw, RW_WRITER); - if (ip->bi_mstdev && ip->bi_mstfd && ip->bi_mstrsrv) - nsc_release(ip->bi_mstfd); - if (ip->bi_mstrdev && ip->bi_mstrfd && ip->bi_mstrrsrv) - nsc_release(ip->bi_mstrfd); - - if (ip->bi_mstdev && ip->bi_mst_tok) - (void) _ii_unregister_path(ip->bi_mst_tok, 0, "master"); - if (ip->bi_mstrdev && ip->bi_mstr_tok) - (void) _ii_unregister_path(ip->bi_mstr_tok, 0, "raw master"); - - if (ip->bi_mstdev && ip->bi_mstfd) - (void) nsc_close(ip->bi_mstfd); - if (ip->bi_mstrdev && ip->bi_mstrfd) - (void) nsc_close(ip->bi_mstrfd); - rw_exit(&ip->bi_linkrw); - - if (ip->bi_mstdev) { - nsc_kmem_free(ip->bi_mstdev, sizeof (*ip->bi_mstdev)); - } - if (ip->bi_mstrdev) { - nsc_kmem_free(ip->bi_mstrdev, sizeof (*ip->bi_mstrdev)); - } - - if (ip->bi_kstat_io.master) { - kstat_delete(ip->bi_kstat_io.master); - } - if (ip->bi_kstat_io.shadow) { - kstat_delete(ip->bi_kstat_io.shadow); - ip->bi_kstat_io.shadow = 0; - } - if (ip->bi_kstat_io.bitmap) { - kstat_delete(ip->bi_kstat_io.bitmap); - ip->bi_kstat_io.bitmap = 0; - } - if (ip->bi_kstat) { - kstat_delete(ip->bi_kstat); - ip->bi_kstat = NULL; - } - - /* this rw_enter forces us to wait until all nsc_buffers are freed */ - rw_enter(&ip->bi_linkrw, RW_WRITER); - rw_exit(&ip->bi_linkrw); - - mutex_destroy(&ip->bi_mutex); - mutex_destroy(&ip->bi_rsrvmutex); - mutex_destroy(&ip->bi_rlsemutex); - mutex_destroy(&ip->bi_bmpmutex); - mutex_destroy(&ip->bi_chksmutex); - cv_destroy(&ip->bi_copydonecv); - cv_destroy(&ip->bi_reservecv); - cv_destroy(&ip->bi_releasecv); - cv_destroy(&ip->bi_ioctlcv); - cv_destroy(&ip->bi_closingcv); - cv_destroy(&ip->bi_busycv); - rw_destroy(&ip->bi_busyrw); - rw_destroy(&ip->bi_linkrw); - - _ii_info_freeshd(ip); - -#ifdef DEBUG - ip->bi_head = (_ii_info_t *)0xdeadbeef; -#endif - - nsc_kmem_free(ip, sizeof (*ip)); - -} - -/* - * _ii_copy_chunks - * Perform a copy of some chunks - * - * Calling/Exit State: - * Returns 0 if the data was copied successfully, otherwise - * error code. - * - * Description: - * flag is set to CV_SHD2MST if the data is to be copied from the shadow - * to the master, 0 if it is to be copied from the master to the shadow. - */ - -static int -_ii_copy_chunks(_ii_info_t *ip, int flag, chunkid_t chunk_num, int nchunks) -{ - int mst_flag; - int shd_flag; - int ovr_flag; - nsc_off_t pos; - nsc_size_t len; - int rc; - nsc_off_t shd_pos; - chunkid_t shd_chunk; - nsc_buf_t *mst_tmp = NULL; - nsc_buf_t *shd_tmp = NULL; - - if (ip->bi_flags & DSW_MSTOFFLINE) { - DTRACE_PROBE(_ii_copy_chunks_end); - return (EIO); - } - - if (ip->bi_flags & (DSW_SHDOFFLINE|DSW_SHDEXPORT|DSW_SHDIMPORT)) { - DTRACE_PROBE(_ii_copy_chunks_end); - return (EIO); - } - - if (flag == CV_SHD2MST) { - mst_flag = NSC_WRBUF|NSC_WRTHRU; - shd_flag = NSC_RDBUF; - } else { - shd_flag = NSC_WRBUF|NSC_WRTHRU; - mst_flag = NSC_RDBUF; - } - - pos = DSW_CHK2FBA(chunk_num); - len = DSW_SIZE * nchunks; - if (pos + len > ip->bi_size) - len = ip->bi_size - pos; - if (ip->bi_flags & DSW_TREEMAP) { - ASSERT(nchunks == 1); - shd_chunk = ii_tsearch(ip, chunk_num); - if (shd_chunk == II_NULLNODE) { - /* shadow is full */ - mutex_enter(&ip->bi_mutex); - II_FLAG_SET(DSW_OVERFLOW, ip); - mutex_exit(&ip->bi_mutex); - DTRACE_PROBE(_ii_copy_chunks_end); - return (EIO); - } - - ovr_flag = II_ISOVERFLOW(shd_chunk); - shd_pos = DSW_CHK2FBA((ovr_flag) ? - II_2OVERFLOW(shd_chunk) : shd_chunk); - } else { - ovr_flag = FALSE; - shd_chunk = chunk_num; - shd_pos = pos; - } - - /* - * Always allocate the master side before the shadow to - * avoid deadlocks on the same chunk. - */ - - DTRACE_PROBE2(_ii_copy_chunks_alloc, nsc_off_t, pos, nsc_size_t, len); - - II_ALLOC_BUF(ip, master, rc, MSTFD(ip), pos, len, mst_flag, &mst_tmp); - if (!II_SUCCESS(rc)) { - if (mst_tmp) - (void) nsc_free_buf(mst_tmp); - _ii_error(ip, DSW_MSTOFFLINE); - DTRACE_PROBE(_ii_copy_chunks_end); - return (rc); - } - - if (ovr_flag) { - /* use overflow volume */ - (void) nsc_reserve(OVRFD(ip), NSC_MULTI); - II_ALLOC_BUF(ip, overflow, rc, OVRFD(ip), shd_pos, len, - shd_flag, &shd_tmp); - } else { - II_ALLOC_BUF(ip, shadow, rc, SHDFD(ip), shd_pos, len, shd_flag, - &shd_tmp); - } - if (!II_SUCCESS(rc)) { - (void) nsc_free_buf(mst_tmp); - if (shd_tmp) - (void) nsc_free_buf(shd_tmp); - if (ovr_flag) - nsc_release(OVRFD(ip)); - _ii_error(ip, DSW_SHDOFFLINE); - if (ovr_flag) - _ii_error(ip, DSW_OVROFFLINE); - DTRACE_PROBE(_ii_copy_chunks_end); - return (rc); - } - - /* - * The direction of copy is determined by the mst_flag. - */ - DTRACE_PROBE2(_ii_copy_chunks_copy, kstat_named_t, ii_copy_direct, - int, mst_flag); - - if (ii_copy_direct) { - if (mst_flag & NSC_WRBUF) { - if (ovr_flag) { - II_NSC_COPY_DIRECT(ip, overflow, master, rc, - shd_tmp, mst_tmp, shd_pos, pos, len) - } else { - II_NSC_COPY_DIRECT(ip, shadow, master, rc, - shd_tmp, mst_tmp, shd_pos, pos, len) - } - if (!II_SUCCESS(rc)) { - /* A copy has failed - something is wrong */ - _ii_error(ip, DSW_MSTOFFLINE); - _ii_error(ip, DSW_SHDOFFLINE); - if (ovr_flag) - _ii_error(ip, DSW_OVROFFLINE); - } - } else { - if (ovr_flag) { - II_NSC_COPY_DIRECT(ip, master, overflow, rc, - mst_tmp, shd_tmp, pos, shd_pos, len); - } else { - II_NSC_COPY_DIRECT(ip, master, shadow, rc, - mst_tmp, shd_tmp, pos, shd_pos, len); - } - if (!II_SUCCESS(rc)) { - /* - * A failure has occurred during the above copy. - * The macro calls nsc_copy_direct, which will - * never return a read failure, only a write - * failure. With this assumption, we should - * take only the target volume offline. - */ - _ii_error(ip, DSW_SHDOFFLINE); - if (ovr_flag) - _ii_error(ip, DSW_OVROFFLINE); - } - } - } else { - if (mst_flag & NSC_WRBUF) { - rc = nsc_copy(shd_tmp, mst_tmp, shd_pos, pos, len); - if (II_SUCCESS(rc)) { - II_NSC_WRITE(ip, master, rc, mst_tmp, pos, len, - 0); - if (!II_SUCCESS(rc)) - _ii_error(ip, DSW_MSTOFFLINE); - } else { - /* A copy has failed - something is wrong */ - _ii_error(ip, DSW_MSTOFFLINE); - _ii_error(ip, DSW_SHDOFFLINE); - } - } else { - rc = nsc_copy(mst_tmp, shd_tmp, pos, shd_pos, len); - if (II_SUCCESS(rc)) { - if (ovr_flag) { - II_NSC_WRITE(ip, overflow, rc, shd_tmp, - shd_pos, len, 0); - } else { - II_NSC_WRITE(ip, shadow, rc, shd_tmp, - shd_pos, len, 0); - } - if (!II_SUCCESS(rc)) { - _ii_error(ip, DSW_SHDOFFLINE); - if (ovr_flag) - _ii_error(ip, DSW_OVROFFLINE); - } - } else { - /* A copy has failed - something is wrong */ - _ii_error(ip, DSW_MSTOFFLINE); - _ii_error(ip, DSW_SHDOFFLINE); - } - } - } - - (void) nsc_free_buf(mst_tmp); - (void) nsc_free_buf(shd_tmp); - if (ovr_flag) - nsc_release(OVRFD(ip)); - - DTRACE_PROBE(_ii_copy_chunks); - - if (II_SUCCESS(rc)) { - (void) II_CLR_COPY_BITS(ip, chunk_num, nchunks); - rc = 0; - } - - return (rc); -} - - -/* - * _ii_copy_on_write - * - * Calling/Exit State: - * Returns 0 on success, otherwise error code. - * - * Description: - * Determines if a copy on write is necessary, and performs it. - * A copy on write is necessary in the following cases: - * - No copy is in progress and the shadow bit is clear, which - * means this is the first write to this track. - * - A copy is in progress and the copy bit is set, which means - * that a track copy is required. - * If a copy to the master is to be done, make a recursive call to this - * function to do any necessary copy on write on other InstantImage groups - * that share the same master volume. - */ - -static int -_ii_copy_on_write(_ii_info_t *ip, int flag, chunkid_t chunk_num, int nchunks) -{ - int rc = 0; - int rtype; - int hanging = (ip->bi_flags&DSW_HANGING); - - if (hanging || - (flag & (CV_SIBLING|CV_SHD2MST)) == CV_SHD2MST && NSHADOWS(ip)) { - _ii_info_t *xip; - /* - * Preserve copy of master for all other shadows of this master - * before writing our data onto the master. - */ - - /* - * Avoid deadlock with COW on same chunk of sibling shadow - * by unlocking this chunk before copying all other sibling - * chunks. - */ - - /* - * Only using a single chunk when copying to master avoids - * complex code here. - */ - - ASSERT(nchunks == 1); - if (!hanging) - _ii_unlock_chunk(ip, chunk_num); - for (xip = ip->bi_head; xip; xip = xip->bi_sibling) { - if (xip == ip) /* don't copy ourselves again */ - continue; - - DTRACE_PROBE(_ii_copy_on_write); - - rw_enter(&xip->bi_linkrw, RW_READER); - mutex_enter(&xip->bi_mutex); - if (xip->bi_disabled) { - mutex_exit(&xip->bi_mutex); - rw_exit(&xip->bi_linkrw); - continue; /* this set is stopping */ - } - xip->bi_shdref++; - mutex_exit(&xip->bi_mutex); - /* don't waste time asking for MST as ip shares it */ - rtype = SHDR|BMP; - (void) _ii_rsrv_devs(xip, rtype, II_INTERNAL); - _ii_lock_chunk(xip, chunk_num); - rc = _ii_copy_on_write(xip, flag | CV_SIBLING, - chunk_num, 1); - - /* - * See comments in _ii_shadow_write() - */ - if (rc == 0 || - (rc == EIO && (xip->bi_flags&DSW_OVERFLOW) != 0)) - (void) II_SET_SHD_BIT(xip, chunk_num); - - _ii_unlock_chunk(xip, chunk_num); - _ii_rlse_devs(xip, rtype); - mutex_enter(&xip->bi_mutex); - xip->bi_shdref--; - if (xip->bi_state & DSW_CLOSING) { - if (total_ref(xip) == 0) { - cv_signal(&xip->bi_closingcv); - } - } - mutex_exit(&xip->bi_mutex); - rw_exit(&xip->bi_linkrw); - } - if (hanging) { - DTRACE_PROBE(_ii_copy_on_write_end); - return (0); - } - /* - * Reacquire chunk lock and check that a COW by a sibling - * has not already copied this chunk. - */ - _ii_lock_chunk(ip, chunk_num); - rc = II_TST_SHD_BIT(ip, chunk_num); - if (rc < 0) { - DTRACE_PROBE(_ii_copy_on_write_end); - return (EIO); - } - if (rc != 0) { - DTRACE_PROBE(_ii_copy_on_write_end); - return (0); - } - } - - if ((ip->bi_flags & DSW_COPYING) == 0) { - /* Not copying at all */ - - if ((ip->bi_flags & DSW_GOLDEN) == DSW_GOLDEN) { - /* No copy-on-write as it is independent */ - DTRACE_PROBE(_ii_copy_on_write_end); - return (0); - } - - /* Dependent, so depends on shadow bit */ - - if ((flag == CV_SHD2MST) && - ((ip->bi_flags & DSW_SHDOFFLINE) != 0)) { - /* - * Writing master but shadow is offline, so - * no need to copy on write or set shadow bit - */ - DTRACE_PROBE(_ii_copy_on_write_end); - return (0); - } - if (ip->bi_flags & DSW_BMPOFFLINE) { - DTRACE_PROBE(_ii_copy_on_write_end); - return (EIO); - } - rc = II_TST_SHD_BIT(ip, chunk_num); - if (rc < 0) { - DTRACE_PROBE(_ii_copy_on_write_end); - return (EIO); - } - if (rc == 0) { - /* Shadow bit clear, copy master to shadow */ - rc = _ii_copy_chunks(ip, 0, chunk_num, nchunks); - } - } else { - /* Copying one way or the other */ - if (ip->bi_flags & DSW_BMPOFFLINE) { - DTRACE_PROBE(_ii_copy_on_write_end); - return (EIO); - } - rc = II_TST_COPY_BIT(ip, chunk_num); - if (rc < 0) { - DTRACE_PROBE(_ii_copy_on_write_end); - return (EIO); - } - if (rc) { - /* Copy bit set, do a copy */ - if ((ip->bi_flags & DSW_COPYINGS) == 0) { - /* Copy master to shadow */ - rc = _ii_copy_chunks(ip, 0, chunk_num, nchunks); - } else { - /* Copy shadow to master */ - rc = _ii_copy_chunks(ip, CV_SHD2MST, chunk_num, - nchunks); - } - } - } - return (rc); -} - -#ifdef DEBUG -int ii_maxchunks = 0; -#endif - -/* - * _ii_copyvolp() - * Copy volume process. - * - * Calling/Exit State: - * Passes 0 back to caller when the copy is complete or has been aborted, - * otherwise error code. - * - * Description: - * According to the flag, copy the master to the shadow volume or the - * shadow to the master volume. Upon return wakeup all processes waiting - * for this copy. - * - */ - -static void -_ii_copyvolp(struct copy_args *ca) -{ - chunkid_t chunk_num; - int rc = 0; - chunkid_t max_chunk; - nsc_size_t nc_max; - int nc_try, nc_got; - nsc_size_t mst_max, shd_max; - _ii_info_t *ip; - int flag; - nsc_size_t bitmap_size; - nsc_size_t shadow_set, copy_set; - int chunkcount = 0; - int rsrv = 1; - spcs_s_info_t kstatus; - - ip = ca->ip; - flag = ca->flag; - kstatus = ca->kstatus; - - if (ip->bi_disabled) { - rc = DSW_EABORTED; - goto skip; - } - max_chunk = ip->bi_size / DSW_SIZE; - if ((ip->bi_size % DSW_SIZE) != 0) - ++max_chunk; - if ((ip->bi_flags&DSW_TREEMAP)) - nc_max = 1; - else { - mst_max = shd_max = 0; - (void) nsc_maxfbas(MSTFD(ip), 0, &mst_max); - (void) nsc_maxfbas(SHDFD(ip), 0, &shd_max); - nc_max = (mst_max < shd_max) ? mst_max : shd_max; - nc_max /= DSW_SIZE; - ASSERT(nc_max > 0 && nc_max < 1000); - } -#ifdef DEBUG - if (ii_maxchunks > 0) - nc_max = ii_maxchunks; -#endif - for (chunk_num = nc_got = 0; /* CSTYLED */; /* CSTYLED */) { - if ((flag & CV_SHD2MST) && NSHADOWS(ip)) - nc_try = 1; - else - nc_try = (int)nc_max; - chunk_num = II_NEXT_COPY_BIT(ip, chunk_num + nc_got, - max_chunk, nc_try, &nc_got); - - if (chunk_num >= max_chunk) /* loop complete */ - break; - if (ip->bi_flags & DSW_COPYINGX) { - /* request to abort copy */ - _ii_unlock_chunks(ip, chunk_num, nc_got); - rc = DSW_EABORTED; - break; - } - - sema_p(&_ii_concopy_sema); - rc = _ii_copy_on_write(ip, (flag & CV_SHD2MST), chunk_num, - nc_got); - sema_v(&_ii_concopy_sema); - if (ip->bi_flags & DSW_TREEMAP) - ii_tdelete(ip, chunk_num); - _ii_unlock_chunks(ip, chunk_num, nc_got); - if (!II_SUCCESS(rc)) { - if (ca->wait) - spcs_s_add(kstatus, rc); - rc = DSW_EIO; - break; - } - if (ip->bi_release || - (++chunkcount % ip->bi_throttle_unit) == 0) { - _ii_rlse_devs(ip, (ca->rtype&(~BMP))); - rsrv = 0; - delay(ip->bi_throttle_delay); - ca->rtype = MSTR|SHDR|(ca->rtype&BMP); - if ((rc = _ii_rsrv_devs(ip, (ca->rtype&(~BMP)), - II_INTERNAL)) != 0) { - if (ca->wait) - spcs_s_add(kstatus, rc); - rc = DSW_EIO; - break; - } - rsrv = 1; - if (nc_max > 1) { - /* - * maxfbas could have changed during the - * release/reserve, so recalculate the size - * of transfer we can do. - */ - (void) nsc_maxfbas(MSTFD(ip), 0, &mst_max); - (void) nsc_maxfbas(SHDFD(ip), 0, &shd_max); - nc_max = (mst_max < shd_max) ? - mst_max : shd_max; - nc_max /= DSW_SIZE; - } - } - } -skip: - mutex_enter(&ip->bi_mutex); - if (ip->bi_flags & DSW_COPYINGX) - II_FLAG_CLR(DSW_COPYINGP|DSW_COPYINGX, ip); - else - II_FLAG_CLR(DSW_COPY_FLAGS, ip); - - if ((ip->bi_flags & DSW_TREEMAP) && (flag & CV_SHD2MST) && - (ip->bi_flags & DSW_VOVERFLOW)) { - int rs; - bitmap_size = ip->bi_size / DSW_SIZE; - if ((ip->bi_size % DSW_SIZE) != 0) - ++bitmap_size; - bitmap_size += 7; - bitmap_size /= 8; - - /* Count the number of copy bits set */ - rs = II_CNT_BITS(ip, ip->bi_copyfba, ©_set, bitmap_size); - if ((rs == 0) && (copy_set == 0)) { - /* - * If we counted successfully and completed the copy - * see if any writes have forced the set into the - * overflow - */ - rs = II_CNT_BITS(ip, ip->bi_shdfba, &shadow_set, - bitmap_size); - if ((rs == 0) && (shadow_set < - (nsc_size_t)ip->bi_shdchks)) { - II_FLAG_CLR(DSW_VOVERFLOW, ip); - --iigkstat.spilled_over.value.ul; - } - } - } - - ca->rc = rc; - cv_broadcast(&ip->bi_copydonecv); - mutex_exit(&ip->bi_mutex); - if (!ca->wait) { - if (rsrv) - _ii_rlse_devs(ip, ca->rtype); - kmem_free(ca, sizeof (*ca)); - } - -} - -/* - * _ii_copyvol() - * Copy a volume. - * - * Calling/Exit State: - * Returns 0 when the copy is complete or has been aborted, - * otherwise error code. - * - * Description: - * According to the flag, copy the master to the shadow volume or the - * shadow to the master volume. Upon return wakeup all processes waiting - * for this copy. Uses a separate process (_ii_copyvolp) to allow the - * caller to be interrupted. - */ - -static int -_ii_copyvol(_ii_info_t *ip, int flag, int rtype, spcs_s_info_t kstatus, - int wait) -{ - struct copy_args *ca; - int rc; - - /* - * start copy in separate process. - */ - - ca = (struct copy_args *)kmem_alloc(sizeof (*ca), KM_SLEEP); - ca->ip = ip; - ca->flag = flag; - ca->rtype = rtype; - ca->kstatus = kstatus; - ca->wait = wait; - ca->rc = 0; - - if (rc = nsc_create_process((void (*)(void *))_ii_copyvolp, - (void *)ca, FALSE)) { - mutex_enter(&ip->bi_mutex); - _ii_ioctl_done(ip); - mutex_exit(&ip->bi_mutex); - cmn_err(CE_NOTE, "!Can't create II copy process"); - kmem_free(ca, sizeof (*ca)); - return (rc); - } - mutex_enter(&ip->bi_mutex); - if (wait == 0) { - _ii_ioctl_done(ip); - mutex_exit(&ip->bi_mutex); - return (0); - } - while (ip->bi_flags & DSW_COPYINGP) { - (void) cv_wait_sig(&ip->bi_copydonecv, &ip->bi_mutex); - } - _ii_ioctl_done(ip); - mutex_exit(&ip->bi_mutex); - rc = ca->rc; - kmem_free(ca, sizeof (*ca)); - - return (rc); -} - -/* - * _ii_stopcopy - * Stops any copy process on ip. - * - * Calling/Exit State: - * Returns 0 if the copy was stopped, otherwise error code. - * - * Description: - * Stop an in-progress copy by setting the DSW_COPYINGX flag, then - * wait for the copy to complete. - */ - -static int -_ii_stopcopy(_ii_info_t *ip) -{ - mutex_enter(&ip->bi_mutex); - DTRACE_PROBE1(_ii_stopcopy_flags, - uint_t, ip->bi_flags); - - while (ip->bi_flags & DSW_COPYINGP) { - - DTRACE_PROBE(_ii_stopcopy); - - II_FLAG_SET(DSW_COPYINGX, ip); - - if (cv_wait_sig(&ip->bi_copydonecv, &ip->bi_mutex) == 0) { - /* Awoken by a signal */ - mutex_exit(&ip->bi_mutex); - DTRACE_PROBE(_ii_stopcopy); - return (EINTR); - } - } - - mutex_exit(&ip->bi_mutex); - - return (0); -} - -/* - * _ii_error - * Given the error type that occurred, and the current state of the - * shadowing, set the appropriate error condition(s). - * - */ - -void -_ii_error(_ii_info_t *ip, int error_type) -{ - int copy_flags; - int golden; - int flags; - int recursive_call = (error_type & DSW_OVERFLOW) != 0; - int offline_bits = DSW_OFFLINE; - _ii_info_t *xip; - int rc; - - error_type &= ~DSW_OVERFLOW; - - mutex_enter(&ip->bi_mutex); - flags = (ip->bi_flags) & offline_bits; - if ((flags ^ error_type) == 0) { - /* nothing new offline */ - mutex_exit(&ip->bi_mutex); - return; - } - - if (error_type == DSW_BMPOFFLINE && - (ip->bi_flags & DSW_BMPOFFLINE) == 0) { - /* first, let nskerd know */ - rc = _ii_report_bmp(ip); - if (rc) { - if (ii_debug > 0) { - cmn_err(CE_WARN, "!Unable to mark bitmap bad in" - " config DB; rc = %d", rc); - } - ip->bi_flags |= DSW_CFGOFFLINE; - } - } - - flags = ip->bi_flags; - golden = ((flags & DSW_GOLDEN) == DSW_GOLDEN); - copy_flags = flags & DSW_COPYING; - - switch (error_type) { - - case DSW_BMPOFFLINE: - /* prevent further use of bitmap */ - flags |= DSW_BMPOFFLINE; - if (ii_debug > 0) - cmn_err(CE_NOTE, "!ii: Bitmap offline"); - - switch (copy_flags) { - - case DSW_COPYINGM: - /* Bitmap offline, copying master to shadow */ - flags |= DSW_SHDOFFLINE; - if (ii_debug > 0) - cmn_err(CE_NOTE, "!ii: Implied shadow offline"); - break; - - case DSW_COPYINGS: - /* Bitmap offline, copying shadow to master */ - if (golden) { - /* Shadow is still usable */ - if (ii_debug > 0) - cmn_err(CE_NOTE, - "!ii: Implied master offline"); - flags |= DSW_MSTOFFLINE; - } else { - /* - * Snapshot restore from shadow to master - * is a dumb thing to do anyway. Lose both. - */ - flags |= DSW_SHDOFFLINE | DSW_MSTOFFLINE; - if (ii_debug > 0) - cmn_err(CE_NOTE, - "ii: Implied master and " - "shadow offline"); - } - break; - - case 0: - /* Bitmap offline, no copying in progress */ - if (!golden) { - if (ii_debug > 0) - cmn_err(CE_NOTE, - "!ii: Implied shadow offline"); - flags |= DSW_SHDOFFLINE; - } - break; - } - break; - - case DSW_OVROFFLINE: - flags |= DSW_OVROFFLINE; - ASSERT(ip->bi_overflow); - if (ii_debug > 0) - cmn_err(CE_NOTE, "!ii: Overflow offline"); - /* FALLTHRU */ - case DSW_SHDOFFLINE: - flags |= DSW_SHDOFFLINE; - if (ii_debug > 0) - cmn_err(CE_NOTE, "!ii: Shadow offline"); - - if (copy_flags == DSW_COPYINGS) { - /* Shadow offline, copying shadow to master */ - if (ii_debug > 0) - cmn_err(CE_NOTE, "!ii: Implied master offline"); - flags |= DSW_MSTOFFLINE; - } - break; - - case DSW_MSTOFFLINE: - flags |= DSW_MSTOFFLINE; - if (ii_debug > 0) - cmn_err(CE_NOTE, "!ii: Master offline"); - - switch (copy_flags) { - - case DSW_COPYINGM: - /* Master offline, copying master to shadow */ - flags |= DSW_SHDOFFLINE; - if (ii_debug > 0) - cmn_err(CE_NOTE, "!ii: Implied shadow offline"); - break; - - case DSW_COPYINGS: - /* Master offline, copying shadow to master */ - if (!golden) { - flags |= DSW_SHDOFFLINE; - if (ii_debug > 0) - cmn_err(CE_NOTE, - "!ii: Implied shadow offline"); - } - break; - - case 0: - /* Master offline, no copying in progress */ - if (!golden) { - flags |= DSW_SHDOFFLINE; - if (ii_debug > 0) - cmn_err(CE_NOTE, - "!ii: Implied shadow offline"); - } - break; - } - break; - - default: - break; - } - - II_FLAG_SET(flags, ip); - mutex_exit(&ip->bi_mutex); - - if (!recursive_call && - NSHADOWS(ip) && (flags&DSW_MSTOFFLINE) == DSW_MSTOFFLINE) { - /* take master offline for all other sibling shadows */ - for (xip = ip->bi_head; xip; xip = xip->bi_sibling) { - if (xip == ip) - continue; - if (_ii_rsrv_devs(xip, BMP, II_INTERNAL) != 0) - continue; - /* overload DSW_OVERFLOW */ - _ii_error(xip, DSW_MSTOFFLINE|DSW_OVERFLOW); - _ii_rlse_devs(xip, BMP); - } - } - -} - - -/* - * _ii_lock_chunk - * Locks access to the specified chunk - * - */ - -static void -_ii_lock_chunk(_ii_info_t *ip, chunkid_t chunk) -{ - if (chunk == II_NULLCHUNK) { - - DTRACE_PROBE(_ii_lock_chunk_type); - - rw_enter(&ip->bi_busyrw, RW_WRITER); - - } else { - - DTRACE_PROBE(_ii_lock_chunk_type); - - if (ip->bi_busy == NULL) { - DTRACE_PROBE(_ii_lock_chunk_end); - return; - } - - rw_enter(&ip->bi_busyrw, RW_READER); - mutex_enter(&ip->bi_mutex); - while (DSW_BIT_ISSET(ip->bi_busy[chunk / DSW_BITS], - chunk % DSW_BITS)) - cv_wait(&ip->bi_busycv, &ip->bi_mutex); - DSW_BIT_SET(ip->bi_busy[chunk / DSW_BITS], chunk % DSW_BITS); - mutex_exit(&ip->bi_mutex); - } - -} - - -/* - * _ii_trylock_chunk - * Tries to lock access to the specified chunk - * Returns non-zero on success. - * - */ - -static int -_ii_trylock_chunk(_ii_info_t *ip, chunkid_t chunk) -{ - int rc; - - ASSERT(chunk != II_NULLCHUNK); - if (rw_tryenter(&ip->bi_busyrw, RW_READER) == 0) { - DTRACE_PROBE(_ii_trylock_chunk); - return (0); - } - - if (ip->bi_busy == NULL) { - DTRACE_PROBE(_ii_trylock_chunk_end); - return (0); - } - - mutex_enter(&ip->bi_mutex); - if (DSW_BIT_ISSET(ip->bi_busy[chunk / DSW_BITS], chunk % DSW_BITS)) { - rw_exit(&ip->bi_busyrw); /* RW_READER */ - rc = 0; - } else { - DSW_BIT_SET(ip->bi_busy[chunk / DSW_BITS], chunk % DSW_BITS); - rc = 1; - } - mutex_exit(&ip->bi_mutex); - - return (rc); -} - -/* - * _ii_unlock_chunks - * Unlocks access to the specified chunks - * - */ - -static void -_ii_unlock_chunks(_ii_info_t *ip, chunkid_t chunk, int n) -{ - if (chunk == II_NULLCHUNK) { - - DTRACE_PROBE(_ii_unlock_chunks); - - rw_exit(&ip->bi_busyrw); /* RW_WRITER */ - - } else { - - if (ip->bi_busy == NULL) { - DTRACE_PROBE(_ii_unlock_chunks_end); - return; - } - mutex_enter(&ip->bi_mutex); - - DTRACE_PROBE(_ii_unlock_chunks); - - for (; n-- > 0; chunk++) { - ASSERT(DSW_BIT_ISSET(ip->bi_busy[chunk / DSW_BITS], - chunk % DSW_BITS)); - DSW_BIT_CLR(ip->bi_busy[chunk / DSW_BITS], - chunk % DSW_BITS); - rw_exit(&ip->bi_busyrw); /* RW_READER */ - } - cv_broadcast(&ip->bi_busycv); - mutex_exit(&ip->bi_mutex); - - } -} - -/* - * Copyout the bit map. - */ -static int -_ii_ab_co_bmp(_ii_info_t *ip, nsc_off_t bm_offset, unsigned char *user_bm, - int user_bm_size) -{ - nsc_off_t last_fba; - nsc_buf_t *tmp; - nsc_vec_t *nsc_vecp; - nsc_off_t fba_pos; - int buf_fba_len; - int buf_byte_len; - size_t co_len; - int rc; - - DTRACE_PROBE2(_ii_ab_co_bmp_start, nsc_off_t, bm_offset, - nsc_size_t, user_bm_size); - - if (ip->bi_flags & DSW_BMPOFFLINE) - return (EIO); - - /* First calculate the size of the shadow and copy bitmaps */ - co_len = DSW_BM_FBA_LEN(ip->bi_size); - ASSERT((ip->bi_copyfba - ip->bi_shdfba) == co_len); - - /* Are we in the ranges of the various bitmaps/indexes? */ - if (bm_offset < ip->bi_shdfba) - return (EIO); - else if (bm_offset < (last_fba = ip->bi_shdfba + co_len)) - /*EMPTY*/; - else if (bm_offset < (last_fba = ip->bi_copyfba + co_len)) - /*EMPTY*/; - else if ((ip->bi_flags & DSW_TREEMAP) && - (bm_offset < (last_fba = last_fba + (co_len * 32)))) - /*EMPTY*/; - else return (EIO); - - /* Are we within the size of the segment being copied? */ - if (FBA_LEN(user_bm_size) > last_fba - bm_offset) - return (EIO); - - for (fba_pos = bm_offset; fba_pos < last_fba && user_bm_size > 0; - fba_pos += DSW_CBLK_FBA) { - tmp = NULL; - buf_fba_len = fba_pos + DSW_CBLK_FBA < last_fba ? - DSW_CBLK_FBA : last_fba - fba_pos; - II_READ_START(ip, bitmap); - rc = nsc_alloc_buf(ip->bi_bmpfd, fba_pos, buf_fba_len, - NSC_RDBUF, &tmp); - II_READ_END(ip, bitmap, rc, buf_fba_len); - if (!II_SUCCESS(rc)) { - if (tmp) - (void) nsc_free_buf(tmp); - - _ii_error(ip, DSW_BMPOFFLINE); - return (EIO); - } - - /* copyout each nsc_vec's worth of data */ - buf_byte_len = FBA_SIZE(buf_fba_len); - for (nsc_vecp = tmp->sb_vec; - buf_byte_len > 0 && user_bm_size > 0; - nsc_vecp++) { - co_len = (user_bm_size > nsc_vecp->sv_len) ? - nsc_vecp->sv_len : user_bm_size; - if (copyout(nsc_vecp->sv_addr, user_bm, co_len)) { - (void) nsc_free_buf(tmp); - return (EFAULT); - } - user_bm += co_len; - user_bm_size -= co_len; - buf_byte_len -= co_len; - } - - - (void) nsc_free_buf(tmp); - } - - return (0); -} - -/* - * Copyin a bit map and or with differences bitmap. - */ -static int -_ii_ab_ci_bmp(_ii_info_t *ip, nsc_off_t bm_offset, unsigned char *user_bm, -int user_bm_size) -{ - nsc_off_t last_fba; - nsc_buf_t *tmp; - nsc_vec_t *nsc_vecp; - nsc_off_t fba_pos; - int buf_fba_len; - int buf_byte_len; - size_t ci_len; - int rc; - int n; - unsigned char *tmp_buf, *tmpp, *tmpq; - - DTRACE_PROBE2(_ii_ab_ci_bmp_start, nsc_off_t, bm_offset, - nsc_size_t, user_bm_size); - - if (ip->bi_flags & DSW_BMPOFFLINE) - return (EIO); - - tmp_buf = NULL; - last_fba = bm_offset + DSW_BM_FBA_LEN(ip->bi_size); - - for (fba_pos = bm_offset; fba_pos < last_fba && user_bm_size > 0; - fba_pos += DSW_CBLK_FBA) { - tmp = NULL; - buf_fba_len = fba_pos + DSW_CBLK_FBA < last_fba ? - DSW_CBLK_FBA : last_fba - fba_pos; - II_READ_START(ip, bitmap); - rc = nsc_alloc_buf(ip->bi_bmpfd, fba_pos, buf_fba_len, - NSC_RDWRBUF, &tmp); - II_READ_END(ip, bitmap, rc, buf_fba_len); - if (!II_SUCCESS(rc)) { - if (tmp) - (void) nsc_free_buf(tmp); - - _ii_error(ip, DSW_BMPOFFLINE); - return (EIO); - } - - /* copyin each nsc_vec's worth of data */ - buf_byte_len = FBA_SIZE(buf_fba_len); - for (nsc_vecp = tmp->sb_vec; - buf_byte_len > 0 && user_bm_size > 0; - nsc_vecp++) { - ci_len = (user_bm_size > nsc_vecp->sv_len) ? - nsc_vecp->sv_len : user_bm_size; - tmpp = tmp_buf = kmem_alloc(ci_len, KM_SLEEP); - tmpq = nsc_vecp->sv_addr; - if (copyin(user_bm, tmpp, ci_len)) { - (void) nsc_free_buf(tmp); - kmem_free(tmp_buf, ci_len); - return (EFAULT); - } - for (n = ci_len; n-- > 0; /* CSTYLED */) - *tmpq++ |= *tmpp++; - user_bm += ci_len; - user_bm_size -= ci_len; - buf_byte_len -= ci_len; - kmem_free(tmp_buf, ci_len); - } - - II_NSC_WRITE(ip, bitmap, rc, tmp, fba_pos, buf_fba_len, 0); - if (!II_SUCCESS(rc)) { - (void) nsc_free_buf(tmp); - _ii_error(ip, DSW_BMPOFFLINE); - return (EIO); - } - - (void) nsc_free_buf(tmp); - } - - ip->bi_state |= (DSW_CNTSHDBITS|DSW_CNTCPYBITS); - - return (0); -} - -/* - * Completely zero the bit map. - * - * Returns 0 if no error - * Returns non-zero if there was an error - */ -static int -_ii_ab_zerobm(_ii_info_t *ip) -{ - nsc_off_t fba_pos; - int rc; - nsc_size_t len; - nsc_size_t size; - nsc_buf_t *tmp; - - size = DSW_BM_FBA_LEN(ip->bi_size) + ip->bi_shdfba; - for (fba_pos = ip->bi_shdfba; fba_pos < size; fba_pos += DSW_CBLK_FBA) { - tmp = NULL; - len = fba_pos + DSW_CBLK_FBA < size ? - DSW_CBLK_FBA : size - fba_pos; - II_READ_START(ip, bitmap); - rc = nsc_alloc_buf(ip->bi_bmpfd, fba_pos, len, NSC_RDWRBUF, - &tmp); - II_READ_END(ip, bitmap, rc, len); - if (!II_SUCCESS(rc)) { - if (tmp) - (void) nsc_free_buf(tmp); - - _ii_error(ip, DSW_BMPOFFLINE); - return (rc); - } - - rc = nsc_zero(tmp, fba_pos, len, 0); - if (II_SUCCESS(rc)) { - II_NSC_WRITE(ip, bitmap, rc, tmp, fba_pos, len, 0); - } - - (void) nsc_free_buf(tmp); - if (!II_SUCCESS(rc)) { - _ii_error(ip, DSW_BMPOFFLINE); - return (rc); - } - } - - ip->bi_state |= (DSW_CNTSHDBITS|DSW_CNTCPYBITS); - - return (0); -} - - -/* - * Copy shadow bitmap to copy bitmap - */ -static int -_ii_ab_copybm(_ii_info_t *ip) -{ - nsc_off_t copy_fba_pos, shd_fba_pos; - int rc; - nsc_size_t len; - nsc_off_t size; - nsc_buf_t *copy_tmp, *shd_tmp; - - size = DSW_BM_FBA_LEN(ip->bi_size) + ip->bi_shdfba; - copy_fba_pos = ip->bi_copyfba; - for (shd_fba_pos = ip->bi_shdfba; shd_fba_pos < size; - copy_fba_pos += DSW_CBLK_FBA, shd_fba_pos += DSW_CBLK_FBA) { - shd_tmp = NULL; - len = shd_fba_pos + DSW_CBLK_FBA < size ? - DSW_CBLK_FBA : size - shd_fba_pos; - II_READ_START(ip, bitmap); - rc = nsc_alloc_buf(ip->bi_bmpfd, shd_fba_pos, len, NSC_RDBUF, - &shd_tmp); - II_READ_END(ip, bitmap, rc, len); - if (!II_SUCCESS(rc)) { - if (shd_tmp) - (void) nsc_free_buf(shd_tmp); - - _ii_error(ip, DSW_BMPOFFLINE); - if (ii_debug > 1) - cmn_err(CE_NOTE, "!ii: copybm failed 1 rc %d", - rc); - - return (rc); - } - - copy_tmp = NULL; - rc = nsc_alloc_buf(ip->bi_bmpfd, copy_fba_pos, len, NSC_WRBUF, - ©_tmp); - if (!II_SUCCESS(rc)) { - (void) nsc_free_buf(shd_tmp); - if (copy_tmp) - (void) nsc_free_buf(copy_tmp); - - _ii_error(ip, DSW_BMPOFFLINE); - if (ii_debug > 1) - cmn_err(CE_NOTE, "!ii: copybm failed 2 rc %d", - rc); - - return (rc); - } - rc = nsc_copy(shd_tmp, copy_tmp, shd_fba_pos, copy_fba_pos, - len); - if (II_SUCCESS(rc)) { - II_NSC_WRITE(ip, bitmap, rc, copy_tmp, copy_fba_pos, - len, 0); - } - - (void) nsc_free_buf(shd_tmp); - (void) nsc_free_buf(copy_tmp); - if (!II_SUCCESS(rc)) { - if (ii_debug > 1) - cmn_err(CE_NOTE, "!ii: copybm failed 4 rc %d", - rc); - _ii_error(ip, DSW_BMPOFFLINE); - return (rc); - } - } - - ip->bi_state |= (DSW_CNTSHDBITS|DSW_CNTCPYBITS); - - return (0); -} - - -/* - * stolen from nsc_copy_h() - */ - -static int -_ii_nsc_or(nsc_buf_t *h1, nsc_buf_t *h2, nsc_off_t pos1, nsc_off_t pos2, - nsc_size_t len) -{ - unsigned char *a1, *a2; - unsigned char *b1, *b2; - nsc_vec_t *v1, *v2; - int i, sz, l1, l2; - - if (pos1 < h1->sb_pos || pos1 + len > h1->sb_pos + h1->sb_len || - pos2 < h2->sb_pos || pos2 + len > h2->sb_pos + h2->sb_len) - return (EINVAL); - - if (!len) - return (0); - - /* find starting point in "from" vector */ - - v1 = h1->sb_vec; - pos1 -= h1->sb_pos; - - for (; pos1 >= FBA_NUM(v1->sv_len); v1++) - pos1 -= FBA_NUM(v1->sv_len); - - a1 = v1->sv_addr + FBA_SIZE(pos1); - l1 = v1->sv_len - FBA_SIZE(pos1); - - /* find starting point in "to" vector */ - - v2 = h2->sb_vec; - pos2 -= h2->sb_pos; - - for (; pos2 >= FBA_NUM(v2->sv_len); v2++) - pos2 -= FBA_NUM(v2->sv_len); - - a2 = v2->sv_addr + FBA_SIZE(pos2); - l2 = v2->sv_len - FBA_SIZE(pos2); - - /* copy required data */ - - len = FBA_SIZE(len); - - while (len) { - sz = min(l1, l2); - sz = (int)min((nsc_size_t)sz, len); - - b1 = a1; - b2 = a2; - for (i = sz; i-- > 0; /* CSTYLED */) - *b2++ |= *b1++; - - l1 -= sz; - l2 -= sz; - a1 += sz; - a2 += sz; - len -= sz; - - if (!l1) { - a1 = (++v1)->sv_addr; - l1 = v1->sv_len; - } - if (!l2) { - a2 = (++v2)->sv_addr; - l2 = v2->sv_len; - } - } - - return (0); -} - - -/* - * Or the shadow bitmap in to the copy bitmap, clear the - * shadow bitmap. - */ -static int -_ii_ab_orbm(_ii_info_t *ip) -{ - nsc_off_t copy_fba_pos, shd_fba_pos; - int rc; - nsc_size_t len; - size_t size; - nsc_buf_t *copy_tmp, *shd_tmp; - - if (ip->bi_flags & DSW_BMPOFFLINE) - return (EIO); - - size = DSW_BM_FBA_LEN(ip->bi_size) + ip->bi_shdfba; - copy_fba_pos = ip->bi_copyfba; - for (shd_fba_pos = ip->bi_shdfba; shd_fba_pos < size; - copy_fba_pos += DSW_CBLK_FBA, shd_fba_pos += DSW_CBLK_FBA) { - shd_tmp = NULL; - len = shd_fba_pos + DSW_CBLK_FBA < size ? - DSW_CBLK_FBA : size - shd_fba_pos; - II_READ_START(ip, bitmap); - rc = nsc_alloc_buf(ip->bi_bmpfd, shd_fba_pos, len, - NSC_RDBUF|NSC_WRBUF, &shd_tmp); - II_READ_END(ip, bitmap, rc, len); - if (!II_SUCCESS(rc)) { - if (shd_tmp) - (void) nsc_free_buf(shd_tmp); - - _ii_error(ip, DSW_BMPOFFLINE); - return (rc); - } - - copy_tmp = NULL; - II_READ_START(ip, bitmap); - rc = nsc_alloc_buf(ip->bi_bmpfd, copy_fba_pos, len, - NSC_RDBUF|NSC_WRBUF, ©_tmp); - II_READ_END(ip, bitmap, rc, len); - if (!II_SUCCESS(rc)) { - (void) nsc_free_buf(shd_tmp); - if (copy_tmp) - (void) nsc_free_buf(copy_tmp); - - _ii_error(ip, DSW_BMPOFFLINE); - return (rc); - } - rc = _ii_nsc_or(shd_tmp, copy_tmp, shd_fba_pos, copy_fba_pos, - len); - if (II_SUCCESS(rc)) { - II_NSC_WRITE(ip, bitmap, rc, copy_tmp, copy_fba_pos, - len, 0); - } - if (II_SUCCESS(rc)) - rc = nsc_zero(shd_tmp, shd_fba_pos, len, 0); - if (II_SUCCESS(rc)) { - II_NSC_WRITE(ip, bitmap, rc, shd_tmp, shd_fba_pos, len, - 0); - } - - (void) nsc_free_buf(shd_tmp); - (void) nsc_free_buf(copy_tmp); - if (!II_SUCCESS(rc)) { - _ii_error(ip, DSW_BMPOFFLINE); - return (rc); - } - } - - ip->bi_state |= (DSW_CNTSHDBITS|DSW_CNTCPYBITS); - - return (0); -} - -/* - * _ii_ab_tst_shd_bit - * Determine if a chunk has been copied to the shadow device - * Relies on the alloc_buf/free_buf semantics for locking. - * - * Calling/Exit State: - * Returns 1 if the modified bit has been set for the shadow device, - * Returns 0 if the modified bit has not been set for the shadow device, - * Returns -1 if there was an error - */ - -static int -_ii_ab_tst_shd_bit(_ii_info_t *ip, chunkid_t chunk) -{ - int rc; - nsc_off_t fba; - nsc_buf_t *tmp = NULL; - - if (ip->bi_flags & DSW_BMPOFFLINE) - return (EIO); - - fba = ip->bi_shdfba + chunk / (FBA_SIZE(1) * DSW_BITS); - chunk %= FBA_SIZE(1) * DSW_BITS; - II_READ_START(ip, bitmap); - rc = nsc_alloc_buf(ip->bi_bmpfd, fba, 1, NSC_RDBUF, &tmp); - II_READ_END(ip, bitmap, rc, 1); - if (!II_SUCCESS(rc)) { - _ii_error(ip, DSW_BMPOFFLINE); - if (tmp) - (void) nsc_free_buf(tmp); - return (-1); - } - rc = DSW_BIT_ISSET(tmp->sb_vec->sv_addr[chunk/DSW_BITS], - chunk%DSW_BITS); - (void) nsc_free_buf(tmp); - - return (rc); -} - - -/* - * _ii_ab_set_shd_bit - * Records that a chunk has been copied to the shadow device - * - * Returns non-zero if an error is encountered - * Returns 0 if no error - */ - -static int -_ii_ab_set_shd_bit(_ii_info_t *ip, chunkid_t chunk) -{ - int rc; - nsc_off_t fba; - nsc_buf_t *tmp = NULL; - - if (ip->bi_flags & DSW_BMPOFFLINE) - return (EIO); - - fba = ip->bi_shdfba + chunk / (FBA_SIZE(1) * DSW_BITS); - chunk %= FBA_SIZE(1) * DSW_BITS; - II_READ_START(ip, bitmap); - rc = nsc_alloc_buf(ip->bi_bmpfd, fba, 1, NSC_RDBUF|NSC_WRBUF, &tmp); - II_READ_END(ip, bitmap, rc, 1); - if (!II_SUCCESS(rc)) { - _ii_error(ip, DSW_BMPOFFLINE); - if (tmp) - (void) nsc_free_buf(tmp); - return (rc); - } - if (DSW_BIT_ISSET(tmp->sb_vec->sv_addr[chunk/DSW_BITS], - chunk%DSW_BITS) == 0) { - DSW_BIT_SET(tmp->sb_vec->sv_addr[chunk/DSW_BITS], - chunk%DSW_BITS); - II_NSC_WRITE(ip, bitmap, rc, tmp, fba, 1, 0); - if ((ip->bi_state & DSW_CNTSHDBITS) == 0) - ip->bi_shdbits++; - } - (void) nsc_free_buf(tmp); - if (!II_SUCCESS(rc)) { - _ii_error(ip, DSW_BMPOFFLINE); - return (rc); - } - - return (0); -} - - -/* - * _ii_ab_tst_copy_bit - * Determine if a chunk needs to be copied during updates. - * - * Calling/Exit State: - * Returns 1 if the copy bit for the chunk is set - * Returns 0 if the copy bit for the chunk is not set - * Returns -1 if an error is encountered - */ - -static int -_ii_ab_tst_copy_bit(_ii_info_t *ip, chunkid_t chunk) -{ - int rc; - nsc_off_t fba; - nsc_buf_t *tmp = NULL; - - if (ip->bi_flags & DSW_BMPOFFLINE) - return (-1); - - fba = ip->bi_copyfba + chunk / (FBA_SIZE(1) * DSW_BITS); - chunk %= FBA_SIZE(1) * DSW_BITS; - II_READ_START(ip, bitmap); - rc = nsc_alloc_buf(ip->bi_bmpfd, fba, 1, NSC_RDBUF, &tmp); - II_READ_END(ip, bitmap, rc, 1); - if (!II_SUCCESS(rc)) { - if (tmp) - (void) nsc_free_buf(tmp); - _ii_error(ip, DSW_BMPOFFLINE); - return (-1); - } - rc = DSW_BIT_ISSET(tmp->sb_vec->sv_addr[chunk/DSW_BITS], - chunk%DSW_BITS); - (void) nsc_free_buf(tmp); - - return (rc); -} - - -/* - * _ii_ab_set_copy_bit - * Records that a chunk has been copied to the shadow device - * - * Returns non-zero if an error is encountered - * Returns 0 if no error - */ - -static int -_ii_ab_set_copy_bit(_ii_info_t *ip, chunkid_t chunk) -{ - int rc; - nsc_off_t fba; - nsc_buf_t *tmp = NULL; - - if (ip->bi_flags & DSW_BMPOFFLINE) - return (EIO); - - fba = ip->bi_copyfba + chunk / (FBA_SIZE(1) * DSW_BITS); - chunk %= FBA_SIZE(1) * DSW_BITS; - II_READ_START(ip, bitmap); - rc = nsc_alloc_buf(ip->bi_bmpfd, fba, 1, NSC_RDBUF|NSC_WRBUF, &tmp); - II_READ_END(ip, bitmap, rc, 1); - if (!II_SUCCESS(rc)) { - if (tmp) - (void) nsc_free_buf(tmp); - _ii_error(ip, DSW_BMPOFFLINE); - return (rc); - } - if (DSW_BIT_ISSET(tmp->sb_vec->sv_addr[chunk/DSW_BITS], - chunk%DSW_BITS) == 0) { - DSW_BIT_SET(tmp->sb_vec->sv_addr[chunk/DSW_BITS], - chunk%DSW_BITS); - if ((ip->bi_state & DSW_CNTCPYBITS) == 0) - ip->bi_copybits++; - - II_NSC_WRITE(ip, bitmap, rc, tmp, fba, 1, 0); - } - (void) nsc_free_buf(tmp); - if (!II_SUCCESS(rc)) { - _ii_error(ip, DSW_BMPOFFLINE); - return (rc); - } - - return (0); -} - - -/* - * _ii_ab_clr_copy_bits - * Records that a chunk has been cleared on the shadow device, this - * function assumes that the bits to clear are all in the same fba, - * as is the case when they were generated by _ii_ab_next_copy_bit(). - * - * Returns non-zero if an error is encountered - * Returns 0 if no error - */ - -static int -_ii_ab_clr_copy_bits(_ii_info_t *ip, chunkid_t chunk, int nchunks) -{ - int rc; - nsc_off_t fba; - nsc_buf_t *tmp = NULL; - - if (ip->bi_flags & DSW_BMPOFFLINE) - return (EIO); - - fba = ip->bi_copyfba + chunk / (FBA_SIZE(1) * DSW_BITS); - chunk %= FBA_SIZE(1) * DSW_BITS; - II_READ_START(ip, bitmap); - rc = nsc_alloc_buf(ip->bi_bmpfd, fba, 1, NSC_RDBUF|NSC_WRBUF, &tmp); - II_READ_END(ip, bitmap, rc, 1); - if (!II_SUCCESS(rc)) { - if (tmp) - (void) nsc_free_buf(tmp); - _ii_error(ip, DSW_BMPOFFLINE); - return (rc); - } - for (; nchunks-- > 0; chunk++) { - DSW_BIT_CLR(tmp->sb_vec->sv_addr[chunk/DSW_BITS], - chunk%DSW_BITS); - if (ip->bi_copybits > 0) - ip->bi_copybits--; - } - - II_NSC_WRITE(ip, bitmap, rc, tmp, fba, 1, 0); - (void) nsc_free_buf(tmp); - if (!II_SUCCESS(rc)) { - _ii_error(ip, DSW_BMPOFFLINE); - return (rc); - } - - return (0); -} - -/* - * _ii_ab_fill_copy_bmp - * Fills the copy bitmap with 1's. - * - * Returns non-zero if an error is encountered - * Returns 0 if no error - */ - -static int -_ii_ab_fill_copy_bmp(_ii_info_t *ip) -{ - int rc; - nsc_off_t fba; - nsc_buf_t *tmp; - unsigned char *p; - int i, j; - - if (ip->bi_flags & DSW_BMPOFFLINE) - return (EIO); - - fba = ip->bi_copyfba; - for (i = DSW_BM_FBA_LEN(ip->bi_size); i-- > 0; fba++) { - tmp = NULL; - rc = nsc_alloc_buf(ip->bi_bmpfd, fba, 1, NSC_WRBUF, &tmp); - if (!II_SUCCESS(rc)) { - if (tmp) - (void) nsc_free_buf(tmp); - _ii_error(ip, DSW_BMPOFFLINE); - return (rc); - } - p = (unsigned char *)tmp->sb_vec->sv_addr; - for (j = FBA_SIZE(1); j-- > 0; p++) - *p = (unsigned char)0xff; - II_NSC_WRITE(ip, bitmap, rc, tmp, fba, 1, 0); - if (!II_SUCCESS(rc)) { - _ii_error(ip, DSW_BMPOFFLINE); - (void) nsc_free_buf(tmp); - return (rc); - } - (void) nsc_free_buf(tmp); - } - - ip->bi_state |= (DSW_CNTSHDBITS|DSW_CNTCPYBITS); - - return (0); -} - -/* - * _ii_ab_load_bmp - * Load bitmap from persistent storage. - */ - -static int -_ii_ab_load_bmp(_ii_info_t *ip, int flag) -/* ARGSUSED */ -{ - if (ip->bi_flags & DSW_BMPOFFLINE) - return (EIO); - - ip->bi_state |= (DSW_CNTSHDBITS|DSW_CNTCPYBITS); - - return (0); -} - -/* - * _ii_ab_next_copy_bit - * Find next set copy bit. - * - * Returns the next bits set in the copy bitmap, with the corresponding chunks - * locked. Used to avoid having to reread the same bit map block as each bit - * is tested. - */ - -static chunkid_t -_ii_ab_next_copy_bit(_ii_info_t *ip, chunkid_t startchunk, chunkid_t maxchunk, - int wanted, int *got) -{ - chunkid_t rc; - nsc_off_t fba; - chunkid_t chunk; - int bits_per_fba = FBA_SIZE(1) * DSW_BITS; - int high; - chunkid_t nextchunk; - nsc_buf_t *tmp = NULL; - - *got = 0; -again: - if (ip->bi_flags & DSW_BMPOFFLINE) - return (maxchunk + 1); - - while (startchunk < maxchunk) { - tmp = NULL; - fba = ip->bi_copyfba + startchunk / bits_per_fba; - chunk = startchunk % bits_per_fba; - II_READ_START(ip, bitmap); - rc = nsc_alloc_buf(ip->bi_bmpfd, fba, 1, NSC_RDBUF, &tmp); - II_READ_END(ip, bitmap, rc, 1); - if (!II_SUCCESS(rc)) { - if (tmp) - (void) nsc_free_buf(tmp); - _ii_error(ip, DSW_BMPOFFLINE); - return (maxchunk + 1); - } - high = startchunk + bits_per_fba - startchunk%bits_per_fba; - if (high > maxchunk) - high = maxchunk; - for (; startchunk < high; chunk++, startchunk++) { - if (DSW_BIT_ISSET(tmp->sb_vec->sv_addr[chunk/DSW_BITS], - chunk%DSW_BITS)) { - /* - * trylock won't sleep so can use while - * holding the buf. - */ - if (!_ii_trylock_chunk(ip, startchunk)) { - (void) nsc_free_buf(tmp); - _ii_lock_chunk(ip, startchunk); - if (_ii_ab_tst_copy_bit(ip, startchunk) - != 1) { - /* - * another process copied this - * chunk while we were acquiring - * the chunk lock. - */ - _ii_unlock_chunk(ip, - startchunk); - DTRACE_PROBE( - _ii_ab_next_copy_bit_again); - goto again; - } - *got = 1; - DTRACE_PROBE(_ii_ab_next_copy_bit_end); - return (startchunk); - } - *got = 1; - nextchunk = startchunk + 1; - chunk++; - for (; --wanted > 0 && nextchunk < high; - nextchunk++, chunk++) { - if (!DSW_BIT_ISSET(tmp->sb_vec->sv_addr - [chunk/DSW_BITS], chunk%DSW_BITS)) { - break; /* end of bit run */ - } - if (_ii_trylock_chunk(ip, nextchunk)) - (*got)++; - else - break; - } - (void) nsc_free_buf(tmp); - DTRACE_PROBE(_ii_ab_next_copy_bit); - return (startchunk); - } - } - (void) nsc_free_buf(tmp); - } - - return (maxchunk + 1); -} - -/* - * _ii_ab_save_bmp - * Save bitmap to persistent storage. - */ - -static int -_ii_ab_save_bmp(_ii_info_t *ip, int flag) -/* ARGSUSED */ -{ - if (ip->bi_flags & DSW_BMPOFFLINE) - return (EIO); - - return (0); -} - -/* - * _ii_ab_change_bmp - * copy change bitmap to memory - */ - -static int -_ii_ab_change_bmp(_ii_info_t *ip, unsigned char *ptr) -/* ARGSUSED */ -{ - int bm_size; - int i, j, fba; - int rc; - unsigned char *p; - nsc_buf_t *tmp = NULL; - - if (ip->bi_flags & DSW_BMPOFFLINE) - return (EIO); - bm_size = FBA_SIZE(DSW_BM_FBA_LEN(ip->bi_size)); - - rc = _ii_nsc_io(ip, KS_BMP, ip->bi_bmpfd, NSC_RDBUF, ip->bi_shdfba, - ptr, bm_size); - if (!II_SUCCESS(rc)) { - _ii_error(ip, DSW_BMPOFFLINE); - return (rc); - } - - fba = ip->bi_copyfba; - for (i = DSW_BM_FBA_LEN(ip->bi_size); i-- > 0; fba++) { - tmp = NULL; - II_READ_START(ip, bitmap); - rc = nsc_alloc_buf(ip->bi_bmpfd, fba, 1, NSC_RDBUF, &tmp); - II_READ_END(ip, bitmap, rc, 1); - if (!II_SUCCESS(rc)) { - if (tmp) - (void) nsc_free_buf(tmp); - _ii_error(ip, DSW_BMPOFFLINE); - return (rc); - } - p = (unsigned char *)tmp->sb_vec->sv_addr; - for (j = FBA_SIZE(1); j-- > 0; p++) - *ptr |= *p; - (void) nsc_free_buf(tmp); - } - - return (0); -} - -/* - * Count bits set in the bit map. - */ -static int -_ii_ab_cnt_bits(_ii_info_t *ip, nsc_off_t bm_offset, nsc_size_t *counter, -int bm_size) -{ - nsc_size_t last_fba; - nsc_buf_t *tmp; - nsc_vec_t *sd_vecp; - nsc_off_t fba_pos; - int buf_fba_len; - int buf_byte_len; - int co_len; - int i; - unsigned int j, k; - unsigned char *cp; - int rc; - - *counter = 0; - if (ip->bi_flags & DSW_BMPOFFLINE) - return (EIO); - - last_fba = bm_offset + DSW_BM_FBA_LEN(ip->bi_size); - - for (fba_pos = bm_offset; fba_pos < last_fba && bm_size > 0; - fba_pos += DSW_CBLK_FBA) { - tmp = NULL; - buf_fba_len = fba_pos + DSW_CBLK_FBA < last_fba ? - DSW_CBLK_FBA : last_fba - fba_pos; - II_READ_START(ip, bitmap); - rc = nsc_alloc_buf(ip->bi_bmpfd, fba_pos, buf_fba_len, - NSC_RDBUF, &tmp); - II_READ_END(ip, bitmap, rc, 1); - if (!II_SUCCESS(rc)) { - if (tmp) - (void) nsc_free_buf(tmp); - - _ii_error(ip, DSW_BMPOFFLINE); - return (EIO); - } - - /* count each sd_vec's worth of data */ - buf_byte_len = FBA_SIZE(buf_fba_len); - for (sd_vecp = tmp->sb_vec; - buf_byte_len > 0 && bm_size > 0; - sd_vecp++) { - co_len = (bm_size > sd_vecp->sv_len) ? - sd_vecp->sv_len : bm_size; - cp = sd_vecp->sv_addr; - for (i = k = 0; i < co_len; i++) - for (j = (unsigned)*cp++; j; j &= j - 1) - k++; - *counter += k; - bm_size -= co_len; - buf_byte_len -= co_len; - } - - - (void) nsc_free_buf(tmp); - } - - return (0); -} - -/* - * OR the bitmaps as part of a join operation - */ -static int -_ii_ab_join_bmp(_ii_info_t *dest_ip, _ii_info_t *src_ip) -{ - int rc; - nsc_size_t len; - nsc_size_t size; - nsc_buf_t *dest_tmp, *src_tmp; - nsc_off_t src_fba_pos; - - if ((src_ip->bi_flags & DSW_BMPOFFLINE) || - (dest_ip->bi_flags & DSW_BMPOFFLINE)) - return (EIO); - - size = DSW_BM_FBA_LEN(src_ip->bi_size) + src_ip->bi_shdfba; - for (src_fba_pos = src_ip->bi_shdfba; src_fba_pos < size; - src_fba_pos += DSW_CBLK_FBA) { - src_tmp = NULL; - len = src_fba_pos + DSW_CBLK_FBA < size ? - DSW_CBLK_FBA : size - src_fba_pos; - II_READ_START(src_ip, bitmap); - rc = nsc_alloc_buf(src_ip->bi_bmpfd, src_fba_pos, len, - NSC_RDWRBUF, &src_tmp); - II_READ_END(src_ip, bitmap, rc, len); - if (!II_SUCCESS(rc)) { - if (src_tmp) - (void) nsc_free_buf(src_tmp); - - _ii_error(src_ip, DSW_BMPOFFLINE); - return (rc); - } - - dest_tmp = NULL; - II_READ_START(dest_ip, bitmap); - rc = nsc_alloc_buf(dest_ip->bi_bmpfd, src_fba_pos, len, - NSC_RDWRBUF, &dest_tmp); - II_READ_END(dest_ip, bitmap, rc, len); - if (!II_SUCCESS(rc)) { - (void) nsc_free_buf(src_tmp); - if (dest_tmp) - (void) nsc_free_buf(dest_tmp); - - _ii_error(dest_ip, DSW_BMPOFFLINE); - return (rc); - } - rc = _ii_nsc_or(src_tmp, dest_tmp, src_fba_pos, src_fba_pos, - len); - if (II_SUCCESS(rc)) { - II_NSC_WRITE(dest_ip, bitmap, rc, dest_tmp, - src_fba_pos, len, 0); - } - - (void) nsc_free_buf(src_tmp); - (void) nsc_free_buf(dest_tmp); - if (!II_SUCCESS(rc)) { - _ii_error(dest_ip, DSW_BMPOFFLINE); - return (rc); - } - } - - dest_ip->bi_state |= (DSW_CNTSHDBITS|DSW_CNTCPYBITS); - - return (0); - -} - -static _ii_bmp_ops_t alloc_buf_bmp = { - _ii_ab_co_bmp, - _ii_ab_ci_bmp, - _ii_ab_zerobm, - _ii_ab_copybm, - _ii_ab_orbm, - _ii_ab_tst_shd_bit, - _ii_ab_set_shd_bit, - _ii_ab_tst_copy_bit, - _ii_ab_set_copy_bit, - _ii_ab_clr_copy_bits, - _ii_ab_next_copy_bit, - _ii_ab_fill_copy_bmp, - _ii_ab_load_bmp, - _ii_ab_save_bmp, - _ii_ab_change_bmp, - _ii_ab_cnt_bits, - _ii_ab_join_bmp -}; - - -/* - * Copyout the bit map. - */ -static int -_ii_km_co_bmp(_ii_info_t *ip, nsc_off_t bm_offset, unsigned char *user_bm, - int user_bm_size) -{ - int start_offset; - int bm_size; - size_t co_len; - nsc_off_t last_fba; - - /* First calculate the size of the shadow and copy bitmaps */ - co_len = DSW_BM_FBA_LEN(ip->bi_size); - ASSERT((ip->bi_copyfba - ip->bi_shdfba) == co_len); - - /* Are we in the ranges of the various bitmaps/indexes? */ - if (bm_offset < ip->bi_shdfba) - return (EIO); - else if (bm_offset < (last_fba = ip->bi_shdfba + co_len)) - /*EMPTY*/; - else if (bm_offset < (last_fba = ip->bi_copyfba + co_len)) - /*EMPTY*/; - else if ((ip->bi_flags & DSW_TREEMAP) && - (bm_offset < (last_fba = last_fba + (co_len * 32)))) - /*EMPTY*/; - else return (EIO); - - if (FBA_LEN(user_bm_size) > last_fba - bm_offset) - return (EIO); - - start_offset = FBA_SIZE(bm_offset); - bm_size = FBA_SIZE(last_fba); - - co_len = (user_bm_size > bm_size) ? bm_size : user_bm_size; - if (copyout(ip->bi_bitmap + start_offset, user_bm, co_len)) - return (EFAULT); - - return (0); -} - -/* - * Copyin a bit map and or with differences bitmap. - */ -static int -_ii_km_ci_bmp(_ii_info_t *ip, nsc_off_t bm_offset, unsigned char *user_bm, - int user_bm_size) -{ - unsigned char *tmp_buf; - unsigned char *dest; - unsigned char *p; - size_t tmp_size; - int n; - int start_offset; - int bm_size; - size_t ci_len; - int rc = 0; - - start_offset = FBA_SIZE(bm_offset); - bm_size = FBA_SIZE(DSW_BM_FBA_LEN(ip->bi_size)); - - tmp_buf = NULL; - tmp_size = FBA_SIZE(1); - - tmp_buf = kmem_alloc(tmp_size, KM_SLEEP); - start_offset = FBA_SIZE(bm_offset); - dest = ip->bi_bitmap + start_offset; - bm_size = FBA_SIZE(DSW_BM_FBA_LEN(ip->bi_size)); - - ci_len = (user_bm_size > bm_size) ? bm_size : user_bm_size; - while (ci_len > 0) { - n = (tmp_size > ci_len) ? ci_len : tmp_size; - if (copyin(user_bm, tmp_buf, n)) { - rc = EFAULT; - break; - } - user_bm += n; - for (p = tmp_buf; n--> 0; ci_len--) - *dest++ |= *p++; - } - if (tmp_buf) - kmem_free(tmp_buf, tmp_size); - - ip->bi_state |= (DSW_CNTSHDBITS|DSW_CNTCPYBITS); - - return (rc); -} - -/* - * Completely zero the bit map. - */ -static int -_ii_km_zerobm(_ii_info_t *ip) -{ - int start_offset = FBA_SIZE(ip->bi_shdfba); - int len; - - len = FBA_SIZE(ip->bi_copyfba - ip->bi_shdfba); - mutex_enter(&ip->bi_bmpmutex); - bzero(ip->bi_bitmap+start_offset, len); - mutex_exit(&ip->bi_bmpmutex); - - ip->bi_state |= (DSW_CNTSHDBITS|DSW_CNTCPYBITS); - - return (0); -} - - -/* - * Copy shadow bitmap to copy bitmap - */ -static int -_ii_km_copybm(_ii_info_t *ip) -{ - int copy_offset, shd_offset; - int len; - - len = FBA_SIZE(ip->bi_copyfba - ip->bi_shdfba); - shd_offset = FBA_SIZE(ip->bi_shdfba); - copy_offset = FBA_SIZE(ip->bi_copyfba); - mutex_enter(&ip->bi_bmpmutex); - bcopy(ip->bi_bitmap+shd_offset, ip->bi_bitmap+copy_offset, len); - mutex_exit(&ip->bi_bmpmutex); - - ip->bi_state |= (DSW_CNTSHDBITS|DSW_CNTCPYBITS); - - return (0); -} - - -/* - * Or the shadow bitmap in to the copy bitmap, clear the - * shadow bitmap. - */ -static int -_ii_km_orbm(_ii_info_t *ip) -{ - unsigned char *copy, *shd; - int copy_offset, shd_offset; - int len; - - len = FBA_SIZE(ip->bi_copyfba - ip->bi_shdfba); - shd_offset = FBA_SIZE(ip->bi_shdfba); - copy_offset = FBA_SIZE(ip->bi_copyfba); - shd = ip->bi_bitmap + shd_offset; - copy = ip->bi_bitmap + copy_offset; - - mutex_enter(&ip->bi_bmpmutex); - while (len-- > 0) - *copy++ |= *shd++; - mutex_exit(&ip->bi_bmpmutex); - - ip->bi_state |= (DSW_CNTSHDBITS|DSW_CNTCPYBITS); - - return (0); -} - -/* - * _ii_km_tst_shd_bit - * Determine if a chunk has been copied to the shadow device - * - * Calling/Exit State: - * Returns 1 if the modified bit has been set for the shadow device, - * otherwise returns 0. - */ - -static int -_ii_km_tst_shd_bit(_ii_info_t *ip, chunkid_t chunk) -{ - unsigned char *bmp; - int bmp_offset; - int rc; - - bmp_offset = FBA_SIZE(ip->bi_shdfba); - bmp = ip->bi_bitmap + bmp_offset; - - mutex_enter(&ip->bi_bmpmutex); - rc = DSW_BIT_ISSET(bmp[chunk/DSW_BITS], chunk%DSW_BITS); - mutex_exit(&ip->bi_bmpmutex); - - return (rc); -} - - -/* - * _ii_km_set_shd_bit - * Records that a chunk has been copied to the shadow device - */ - -static int -_ii_km_set_shd_bit(_ii_info_t *ip, chunkid_t chunk) -{ - unsigned char *bmp; - int bmp_offset; - - bmp_offset = FBA_SIZE(ip->bi_shdfba); - bmp = ip->bi_bitmap + bmp_offset; - - mutex_enter(&ip->bi_bmpmutex); - if (DSW_BIT_ISSET(bmp[chunk/DSW_BITS], chunk%DSW_BITS) == 0) { - DSW_BIT_SET(bmp[chunk/DSW_BITS], chunk%DSW_BITS); - if ((ip->bi_state & DSW_CNTSHDBITS) == 0) - ip->bi_shdbits++; - } - mutex_exit(&ip->bi_bmpmutex); - - return (0); -} - -/* - * _ii_km_tst_copy_bit - * Determine if a chunk needs to be copied during updates. - * - * Calling/Exit State: - * Returns 1 if the copy bit for the chunk is set, - * otherwise returns 0 - */ - -static int -_ii_km_tst_copy_bit(_ii_info_t *ip, chunkid_t chunk) -{ - unsigned char *bmp; - int bmp_offset; - int rc; - - bmp_offset = FBA_SIZE(ip->bi_copyfba); - bmp = ip->bi_bitmap + bmp_offset; - - mutex_enter(&ip->bi_bmpmutex); - rc = DSW_BIT_ISSET(bmp[chunk/DSW_BITS], chunk%DSW_BITS); - mutex_exit(&ip->bi_bmpmutex); - - return (rc); -} - - -/* - * _ii_km_set_copy_bit - * Records that a chunk has been copied to the shadow device - */ - -static int -_ii_km_set_copy_bit(_ii_info_t *ip, chunkid_t chunk) -{ - unsigned char *bmp; - int bmp_offset; - - bmp_offset = FBA_SIZE(ip->bi_copyfba); - bmp = ip->bi_bitmap + bmp_offset; - - mutex_enter(&ip->bi_bmpmutex); - if (DSW_BIT_ISSET(bmp[chunk/DSW_BITS], chunk%DSW_BITS) == 0) { - DSW_BIT_SET(bmp[chunk/DSW_BITS], chunk%DSW_BITS); - if ((ip->bi_state & DSW_CNTCPYBITS) == 0) - ip->bi_copybits++; - } - mutex_exit(&ip->bi_bmpmutex); - - return (0); -} - - -/* - * _ii_km_clr_copy_bits - * Records that a chunk has been cleared on the shadow device - */ - -static int -_ii_km_clr_copy_bits(_ii_info_t *ip, chunkid_t chunk, int nchunks) -{ - unsigned char *bmp; - int bmp_offset; - - bmp_offset = FBA_SIZE(ip->bi_copyfba); - bmp = ip->bi_bitmap + bmp_offset; - - mutex_enter(&ip->bi_bmpmutex); - for (; nchunks-- > 0; chunk++) { - DSW_BIT_CLR(bmp[chunk/DSW_BITS], chunk%DSW_BITS); - if (ip->bi_copybits > 0) - ip->bi_copybits--; - } - mutex_exit(&ip->bi_bmpmutex); - - return (0); -} - -/* - * _ii_km_fill_copy_bmp - * Fills the copy bitmap with 1's. - */ - -static int -_ii_km_fill_copy_bmp(_ii_info_t *ip) -{ - int len; - unsigned char *bmp; - int bmp_offset; - - bmp_offset = FBA_SIZE(ip->bi_copyfba); - bmp = ip->bi_bitmap + bmp_offset; - - len = FBA_SIZE(ip->bi_copyfba - ip->bi_shdfba); - - mutex_enter(&ip->bi_bmpmutex); - while (len-- > 0) - *bmp++ = (unsigned char)0xff; - mutex_exit(&ip->bi_bmpmutex); - - ip->bi_state |= (DSW_CNTSHDBITS|DSW_CNTCPYBITS); - - return (0); -} - -/* - * _ii_km_load_bmp - * Load bitmap from persistent storage. - */ - -static int -_ii_km_load_bmp(_ii_info_t *ip, int flag) -{ - nsc_off_t bmp_offset; - nsc_size_t bitmap_size; - int rc; - - if (ip->bi_flags & DSW_BMPOFFLINE) - return (EIO); - - if (ip->bi_bitmap == NULL) { - bitmap_size = FBA_SIZE(2 * (ip->bi_copyfba - ip->bi_shdfba) + - ip->bi_shdfba); - ip->bi_bitmap = nsc_kmem_zalloc(bitmap_size, KM_SLEEP, - _ii_local_mem); - } - if (flag) - return (0); /* just create an empty bitmap */ - bmp_offset = FBA_SIZE(ip->bi_shdfba); - rc = _ii_nsc_io(ip, KS_BMP, ip->bi_bmpfd, NSC_RDBUF, ip->bi_shdfba, - ip->bi_bitmap + bmp_offset, - 2 * FBA_SIZE(ip->bi_copyfba - ip->bi_shdfba)); - if (!II_SUCCESS(rc)) - _ii_error(ip, DSW_BMPOFFLINE); - - ip->bi_state |= (DSW_CNTSHDBITS|DSW_CNTCPYBITS); - - return (rc); -} - -/* - * _ii_km_save_bmp - * Save bitmap to persistent storage. - */ - -static int -_ii_km_save_bmp(_ii_info_t *ip, int flag) -{ - int bmp_offset; - int bitmap_size; - int rc; - - bmp_offset = FBA_SIZE(ip->bi_shdfba); - if (ip->bi_flags & DSW_BMPOFFLINE) - rc = EIO; - else { - rc = _ii_nsc_io(ip, KS_BMP, ip->bi_bmpfd, NSC_WRBUF, - ip->bi_shdfba, ip->bi_bitmap + bmp_offset, - 2 * FBA_SIZE(ip->bi_copyfba - ip->bi_shdfba)); - if (!II_SUCCESS(rc)) - _ii_error(ip, DSW_BMPOFFLINE); - } - - if (flag && ip->bi_bitmap) { /* dispose of bitmap memory */ - bitmap_size = FBA_SIZE(2 * (ip->bi_copyfba - ip->bi_shdfba) + - ip->bi_shdfba); - nsc_kmem_free(ip->bi_bitmap, bitmap_size); - ip->bi_bitmap = NULL; - } - - return (rc); -} - -/* - * _ii_km_next_copy_bit - * Find next set copy bit. - * - * Returns the next bits set in the copy bitmap, with the corresponding chunks - * locked. Used to cut down on the number of times the bmpmutex is acquired. - */ - -static chunkid_t -_ii_km_next_copy_bit(_ii_info_t *ip, chunkid_t chunk, chunkid_t maxchunk, - int want, int *got) -{ - unsigned char *bmp; - int bmp_offset; - int nextchunk; - - *got = 0; - bmp_offset = FBA_SIZE(ip->bi_copyfba); - bmp = ip->bi_bitmap + bmp_offset; - - mutex_enter(&ip->bi_bmpmutex); - for (; chunk < maxchunk; chunk++) { - if (DSW_BIT_ISSET(bmp[chunk/DSW_BITS], chunk%DSW_BITS)) { - /* - * trylock won't sleep so can use while - * holding bi_bmpmutex. - */ - if (!_ii_trylock_chunk(ip, chunk)) { - mutex_exit(&ip->bi_bmpmutex); - _ii_lock_chunk(ip, chunk); - *got = 1; - - DTRACE_PROBE(_ii_km_next_copy_bit); - - return (chunk); - } - *got = 1; - for (nextchunk = chunk + 1; - *got < want && nextchunk < maxchunk; nextchunk++) { - if (!DSW_BIT_ISSET(bmp[nextchunk/DSW_BITS], - nextchunk%DSW_BITS)) - break; - if (_ii_trylock_chunk(ip, nextchunk)) - (*got)++; - else - break; - } - mutex_exit(&ip->bi_bmpmutex); - - DTRACE_PROBE(_ii_km_next_copy_bit); - return (chunk); - } - } - mutex_exit(&ip->bi_bmpmutex); - - return (maxchunk + 1); -} - -/* - * _ii_km_change_bmp - * copy change bitmap to memory - */ - -static int -_ii_km_change_bmp(_ii_info_t *ip, unsigned char *ptr) -/* ARGSUSED */ -{ - int start_offset; - int bm_size; - unsigned char *q; - - bm_size = FBA_SIZE(DSW_BM_FBA_LEN(ip->bi_size)); - - start_offset = FBA_SIZE(ip->bi_shdfba); - bcopy(ip->bi_bitmap + start_offset, ptr, bm_size); - - start_offset = FBA_SIZE(ip->bi_copyfba); - q = ip->bi_bitmap + start_offset; - while (bm_size-- > 0) - *ptr |= *q; - - return (0); -} - -/* - * Count bits set in the bit map. - */ -static int -_ii_km_cnt_bits(_ii_info_t *ip, nsc_off_t bm_offset, nsc_size_t *counter, - int bm_size) -{ - int start_offset; - int i; - nsc_size_t j, k; - unsigned char *cp; - - start_offset = FBA_SIZE(bm_offset); - - cp = ip->bi_bitmap + start_offset; - for (i = k = 0; i < bm_size; i++) - for (j = (unsigned)*cp++; j; j &= j - 1) - k++; - *counter = k; - - return (0); -} - -/* - * Or the shadow bitmap in to the copy bitmap, clear the - * shadow bitmap. - */ -static int -_ii_km_join_bmp(_ii_info_t *dest_ip, _ii_info_t *src_ip) -{ - uchar_t *dest, *src; - nsc_size_t bm_size; - - dest = dest_ip->bi_bitmap + FBA_SIZE(dest_ip->bi_shdfba); - src = src_ip->bi_bitmap + FBA_SIZE(src_ip->bi_shdfba); - bm_size = FBA_SIZE(DSW_BM_FBA_LEN(dest_ip->bi_size)); - - while (bm_size-- > 0) - *dest++ |= *src++; - - dest_ip->bi_state |= (DSW_CNTSHDBITS|DSW_CNTCPYBITS); - - return (0); -} - -static _ii_bmp_ops_t kmem_buf_bmp = { - _ii_km_co_bmp, - _ii_km_ci_bmp, - _ii_km_zerobm, - _ii_km_copybm, - _ii_km_orbm, - _ii_km_tst_shd_bit, - _ii_km_set_shd_bit, - _ii_km_tst_copy_bit, - _ii_km_set_copy_bit, - _ii_km_clr_copy_bits, - _ii_km_next_copy_bit, - _ii_km_fill_copy_bmp, - _ii_km_load_bmp, - _ii_km_save_bmp, - _ii_km_change_bmp, - _ii_km_cnt_bits, - _ii_km_join_bmp -}; - - -static int -ii_read_volume(_ii_info_t *ip, int mst_src, nsc_buf_t *srcbuf, - nsc_buf_t *dstbuf, chunkid_t chunk_num, nsc_off_t fba, nsc_size_t len) -{ - int rc; - nsc_buf_t *tmp; - nsc_off_t mapped_fba; - chunkid_t mapped_chunk; - int overflow; - - if (mst_src || (ip->bi_flags&DSW_TREEMAP) == 0) { - /* simple read with optional copy */ - if (mst_src) { - II_NSC_READ(ip, master, rc, srcbuf, fba, len, 0); - } else { - II_NSC_READ(ip, shadow, rc, srcbuf, fba, len, 0); - } - if (dstbuf && II_SUCCESS(rc)) { - rc = nsc_copy(srcbuf, dstbuf, fba, fba, len); - } - - return (rc); - } - /* read from mapped shadow into final buffer */ - mapped_chunk = ii_tsearch(ip, chunk_num); - if (mapped_chunk == II_NULLNODE) - return (EIO); - overflow = II_ISOVERFLOW(mapped_chunk); - if (overflow) - mapped_chunk = II_2OVERFLOW(mapped_chunk); - /* convert chunk number from tsearch into final fba */ - mapped_fba = DSW_CHK2FBA(mapped_chunk) + (fba % DSW_SIZE); - tmp = NULL; - if (overflow) { - (void) nsc_reserve(OVRFD(ip), NSC_MULTI); - II_READ_START(ip, overflow); - rc = nsc_alloc_buf(OVRFD(ip), mapped_fba, len, NSC_RDBUF, &tmp); - II_READ_END(ip, overflow, rc, len); - } else { - II_READ_START(ip, shadow); - rc = nsc_alloc_buf(SHDFD(ip), mapped_fba, len, NSC_RDBUF, &tmp); - II_READ_END(ip, shadow, rc, len); - } - if (II_SUCCESS(rc)) { - if (dstbuf == NULL) - dstbuf = srcbuf; - rc = nsc_copy(tmp, dstbuf, mapped_fba, fba, len); - (void) nsc_free_buf(tmp); - } - if (overflow) - nsc_release(OVRFD(ip)); - - return (rc); -} - -/* - * _ii_fill_buf - * Read data from the required device - * - * Calling/Exit State: - * Returns 0 if the data was read successfully, otherwise - * error code. - * - * Description: - * Reads the data from fba_pos for length fba_len from the - * required device. This data may be a mix of data from the master - * device and the shadow device, depending on the state of the - * bitmaps. - */ - -static int -_ii_fill_buf(ii_fd_t *bfd, nsc_off_t fba_pos, nsc_size_t fba_len, int flag, - nsc_buf_t **handle, nsc_buf_t **handle2) -{ - _ii_info_t *ip = bfd->ii_info; - _ii_info_t *xip; - int second_shd = 0; - nsc_off_t temp_fba; - nsc_size_t temp_len; - nsc_size_t bmp_len; - chunkid_t chunk_num; - int rc; - int fill_from_pair; - int rtype = SHDR|BMP; - nsc_buf_t *second_buf = NULL; - - if (flag&NSC_RDAHEAD) - return (NSC_DONE); - - chunk_num = fba_pos / DSW_SIZE; - temp_fba = fba_pos; - temp_len = fba_len; - - /* - * If the master is being updated from a shadow we need to fill from - * the correct shadow volume. - */ - if (NSHADOWS(ip) && bfd->ii_shd == 0) { - for (xip = ip->bi_head; xip; xip = xip->bi_sibling) { - if (xip == ip) - continue; - if (xip->bi_flags &DSW_COPYINGS) { - second_shd = 1; - ip = xip; - if ((rc = _ii_rsrv_devs(ip, rtype, - II_INTERNAL)) != 0) - return (EIO); - rc = nsc_alloc_buf(SHDFD(ip), fba_pos, fba_len, - (flag&NSC_RDAHEAD)|NSC_MIXED, &second_buf); - if (!II_SUCCESS(rc)) { - rc = EIO; - goto out; - } - handle2 = &second_buf; - break; - } - } - } - - while (temp_len > 0) { - if ((temp_fba + temp_len) > DSW_CHK2FBA(chunk_num + 1)) { - bmp_len = DSW_CHK2FBA(chunk_num + 1) - temp_fba; - temp_len -= bmp_len; - } else { - bmp_len = temp_len; - temp_len = 0; - } - - fill_from_pair = 0; - - if ((ip->bi_flags & DSW_COPYINGM) == DSW_COPYINGM) { - rc = II_TST_COPY_BIT(ip, chunk_num); - /* Treat a failed bitmap volume as a clear bit */ - if (rc > 0) { - /* Copy bit set */ - if (bfd->ii_shd) { - if (*handle2) - fill_from_pair = 1; - else { - rc = EIO; - goto out; - } - } - } - } - if ((ip->bi_flags & DSW_COPYINGS) == DSW_COPYINGS) { - rc = II_TST_COPY_BIT(ip, chunk_num); - /* Treat a failed bitmap volume as a clear bit */ - if (rc > 0) { - /* Copy bit set */ - if (bfd->ii_shd == 0) { - if (*handle2 || - (ip->bi_flags&DSW_TREEMAP)) - fill_from_pair = 1; - else { - rc = EIO; - goto out; - } - } - } - } - if (((ip->bi_flags & DSW_GOLDEN) == 0) && bfd->ii_shd) { - /* Dependent shadow read */ - - rc = II_TST_SHD_BIT(ip, chunk_num); - if (rc < 0) { - rc = EIO; - goto out; - } - if (rc == 0) { - /* Shadow bit clear */ - if (*handle2) - fill_from_pair = 1; - else { - rc = EIO; - goto out; - } - } - } - - if (fill_from_pair) { - /* it matters now */ - if (ip->bi_flags & (DSW_MSTOFFLINE | DSW_SHDOFFLINE)) { - rc = EIO; - goto out; - } - if (*handle2 == NULL && - (ip->bi_flags&DSW_TREEMAP) == 0) { - rc = EIO; - goto out; - } - rc = ii_read_volume(ip, bfd->ii_shd, - *handle2, *handle, chunk_num, temp_fba, bmp_len); - if (!II_SUCCESS(rc)) { - _ii_error(ip, DSW_MSTOFFLINE); - _ii_error(ip, DSW_SHDOFFLINE); - goto out; - } - } else { - if (bfd->ii_shd && (ip->bi_flags & DSW_SHDOFFLINE)) { - rc = EIO; - goto out; - } - if ((bfd->ii_shd == 0) && - (ip->bi_flags & DSW_MSTOFFLINE)) { - rc = EIO; - goto out; - } - rc = ii_read_volume(ip, !(bfd->ii_shd), *handle, NULL, - chunk_num, temp_fba, bmp_len); - if (!II_SUCCESS(rc)) { - if (bfd->ii_shd) - _ii_error(ip, DSW_SHDOFFLINE); - else - _ii_error(ip, DSW_MSTOFFLINE); - goto out; - } - } - - temp_fba += bmp_len; - chunk_num++; - } - - rc = 0; -out: - if (second_buf) - (void) nsc_free_buf(second_buf); - if (second_shd) - _ii_rlse_devs(ip, rtype); - - return (rc); -} - - -/* - * _ii_shadow_write - * Perform any copy on write required by a write buffer request - * - * Calling/Exit State: - * Returns 0 on success, otherwise error code. - * - */ - -static int -_ii_shadow_write(ii_fd_t *bfd, nsc_off_t pos, nsc_size_t len) -{ - _ii_info_t *ip = bfd->ii_info; - chunkid_t chunk_num; - int rc; - int flag; - int hanging; - - DTRACE_PROBE2(_ii_shadow_write_start, nsc_off_t, pos, nsc_size_t, len); - - /* fail immediately if config DB is unavailable */ - if ((ip->bi_flags & DSW_CFGOFFLINE) == DSW_CFGOFFLINE) { - return (EIO); - } - - chunk_num = pos / DSW_SIZE; - - if (bfd->ii_shd) - flag = 0; /* To shadow */ - else - flag = CV_SHD2MST; /* To master */ - - mutex_enter(&ip->bi_mutex); - ip->bi_shdref++; - mutex_exit(&ip->bi_mutex); - hanging = (ip->bi_flags&DSW_HANGING) != 0; - - for (; (chunk_num >= 0) && - DSW_CHK2FBA(chunk_num) < (pos + len); chunk_num++) { - - if (!hanging) - _ii_lock_chunk(ip, chunk_num); - rc = _ii_copy_on_write(ip, flag, chunk_num, 1); - - /* - * Set the shadow bit when a small shadow has overflowed so - * that ii_read_volume can return an error if an attempt is - * made to read that chunk. - */ - if (!hanging) { - if (rc == 0 || - (rc == EIO && (ip->bi_flags&DSW_OVERFLOW) != 0)) - (void) II_SET_SHD_BIT(ip, chunk_num); - _ii_unlock_chunk(ip, chunk_num); - } - } - - mutex_enter(&ip->bi_mutex); - ip->bi_shdref--; - if (ip->bi_state & DSW_CLOSING) { - if (total_ref(ip) == 0) { - cv_signal(&ip->bi_closingcv); - } - } - mutex_exit(&ip->bi_mutex); - - /* did the bitmap fail during this process? */ - return (ip->bi_flags & DSW_CFGOFFLINE? EIO : 0); -} - -/* - * _ii_alloc_buf - * Allocate a buffer of data - * - * Calling/Exit State: - * Returns 0 for success, < 0 for async I/O, > 0 is an error code. - * - * Description: - * For a write buffer, calls dsw_shadow_write to perform any necessary - * copy on write operations, then allocates the real buffers from the - * underlying devices. - * For a read buffer, allocates the real buffers from the underlying - * devices, then calls _ii_fill_buf to fill the required buffer. - * For a buffer that is neither read nor write, just allocate the - * buffers so that a _ii_fill_buf can be done later by _ii_read. - */ - -static int -_ii_alloc_buf(ii_fd_t *bfd, nsc_off_t pos, nsc_size_t len, int flag, - ii_buf_t **ptr) -{ - _ii_info_t *ip = bfd->ii_info; - ii_buf_t *h; - int raw = II_RAW(bfd); - int rc = 0; - int ioflag; - int fbuf = 0, fbuf2 = 0, abuf = 0; - int rw_ent = 0; - - if (bfd->ii_bmp) { - DTRACE_PROBE(_ii_alloc_buf_end); - /* any I/O to the bitmap device is barred */ - return (EIO); - } - - if (len == 0) { - DTRACE_PROBE(_ii_alloc_buf_end); - return (EINVAL); - } - - /* Bounds checking */ - if (pos + len > ip->bi_size) { - if (ii_debug > 1) - cmn_err(CE_NOTE, - "!ii: Attempt to access beyond end of ii volume"); - DTRACE_PROBE(_ii_alloc_buf_end); - return (EIO); - } - - h = *ptr; - if (h == NULL) { - h = (ii_buf_t *)_ii_alloc_handle(NULL, NULL, NULL, bfd); - if (h == NULL) { - DTRACE_PROBE(_ii_alloc_buf_end); - return (ENOMEM); - } - } - - /* - * Temporary nsc_reserve of bitmap and other device. - * This device has already been reserved by the preceding _ii_attach. - * Corresponding nsc_release is in _ii_free_buf. - */ - - h->ii_rsrv = BMP | (raw ? (bfd->ii_shd ? MSTR : SHDR) - : (bfd->ii_shd ? MST : SHD)); - - if (!bfd->ii_shd) - ip = ip->bi_master; - - rw_enter(&ip->bi_linkrw, RW_READER); - rw_ent = 1; - if (ip->bi_shdfd == NULL || (ip->bi_flags & DSW_SHDEXPORT) == - DSW_SHDEXPORT) - h->ii_rsrv &= ~(SHD|SHDR); - if ((rc = _ii_rsrv_devs(ip, h->ii_rsrv, II_EXTERNAL)) != 0) { - rw_exit(&ip->bi_linkrw); - rw_ent = 0; - h->ii_rsrv = NULL; - goto error; - } - - if (flag & NSC_WRBUF) { - rc = _ii_shadow_write(bfd, pos, len); - if (!II_SUCCESS(rc)) - goto error; - } - - if (!(flag & NSC_RDAHEAD)) - ioflag = flag & ~(NSC_RDBUF); - else - ioflag = flag; - - if (bfd->ii_shd) { - /* - * SHADOW - */ - - if (ip->bi_flags & DSW_SHDEXPORT) { - rc = EIO; - goto error; - } - /* - * The master device buffer has to be allocated first - * so that deadlocks are avoided. - */ - DTRACE_PROBE(AllocBufFor_SHADOW); - - if ((ip->bi_flags & (DSW_MSTOFFLINE|DSW_SHDIMPORT)) == 0) { - rc = nsc_alloc_buf(MSTFD(ip), pos, len, - (flag&NSC_RDAHEAD)|NSC_MIXED, &h->ii_bufp2); - if (!II_SUCCESS(rc)) { - if (ii_debug > 2) - cmn_err(CE_WARN, "!ii: " - "Join/write-S race detected\n"); - if (h->ii_bufp2) - (void) nsc_free_buf(h->ii_bufp2); - h->ii_bufp2 = NULL; - /* - * Carry on as this will not matter if - * _ii_fill_buf is not called, or if - * it is called but doesn't need to read this - * volume. - */ - rc = 0; - } - fbuf2 = 1; - } - - if (ip->bi_flags & DSW_SHDOFFLINE) { - rc = EIO; - goto error; - } - if ((ip->bi_flags)&DSW_TREEMAP) { - rc = nsc_alloc_abuf(pos, len, 0, &h->ii_abufp); - if (!II_SUCCESS(rc)) { - _ii_error(ip, DSW_SHDOFFLINE); - goto error; - } - abuf = 1; - } else { - II_ALLOC_BUF(ip, shadow, rc, SHDFD(ip), pos, len, - ioflag, &h->ii_bufp); /* do not read yet */ - if (!II_SUCCESS(rc)) { - _ii_error(ip, DSW_SHDOFFLINE); - goto error; - } - fbuf = 1; - } - } else { - /* - * MASTER - */ - - /* - * The master device buffer has to be allocated first - * so that deadlocks are avoided. - */ - - if (ip->bi_flags & (DSW_MSTOFFLINE|DSW_SHDIMPORT)) { - rc = EIO; - goto error; - } - - DTRACE_PROBE(AllocBufFor_MASTER); - - II_ALLOC_BUF(ip, master, rc, MSTFD(ip), pos, len, ioflag, - &h->ii_bufp); /* do not read yet */ - if (!II_SUCCESS(rc)) { - _ii_error(ip, DSW_MSTOFFLINE); - goto error; - } - fbuf = 1; - - /* - * If shadow FD and (dependent set OR copying) and - * not (compact dependent && shadow offline && shadow exported) - */ - if ((ip->bi_shdfd) && - ((ip->bi_flags & DSW_COPYINGP) || - (!(ip->bi_flags & DSW_GOLDEN))) && - (!(ip->bi_flags & - (DSW_TREEMAP|DSW_SHDOFFLINE|DSW_SHDEXPORT)))) { - rc = nsc_alloc_buf(SHDFD(ip), pos, len, - (flag&NSC_RDAHEAD)|NSC_MIXED, &h->ii_bufp2); - if (!II_SUCCESS(rc)) { - if (ii_debug > 2) - cmn_err(CE_WARN, "!ii: " - "Join/write-M race detected\n"); - if (h->ii_bufp2) - (void) nsc_free_buf(h->ii_bufp2); - h->ii_bufp2 = NULL; - /* - * Carry on as this will not matter if - * _ii_fill_buf is not called, or if - * it is called but doesn't need to read this - * volume. - */ - rc = 0; - } - fbuf2 = 1; - } - } - - if (flag & NSC_RDBUF) - rc = _ii_fill_buf(bfd, pos, len, flag, - h->ii_abufp ? &h->ii_abufp : &h->ii_bufp, &h->ii_bufp2); - -error: - if (II_SUCCESS(rc)) { - h->ii_bufh.sb_vec = h->ii_abufp ? h->ii_abufp->sb_vec : - h->ii_bufp->sb_vec; - h->ii_bufh.sb_error = 0; - h->ii_bufh.sb_flag |= flag; - h->ii_bufh.sb_pos = pos; - h->ii_bufh.sb_len = len; - } else { - h->ii_bufh.sb_error = rc; - if (h->ii_bufp2 && fbuf2) { - (void) nsc_free_buf(h->ii_bufp2); - h->ii_bufp2 = NULL; - } - if (h->ii_bufp && fbuf) { - (void) nsc_free_buf(h->ii_bufp); - h->ii_bufp = NULL; - } - if (h->ii_abufp && abuf) { - (void) nsc_free_buf(h->ii_abufp); - h->ii_abufp = NULL; - } - - if (h->ii_rsrv) { - /* - * Release temporary reserve - reserved above. - */ - _ii_rlse_devs(ip, h->ii_rsrv); - h->ii_rsrv = NULL; - } - if (rw_ent) - rw_exit(&ip->bi_linkrw); - } - - return (rc); -} - - -/* - * _ii_free_buf - */ - -static int -_ii_free_buf(ii_buf_t *h) -{ - ii_fd_t *bfd; - int rsrv; - int rc; - - if (h->ii_abufp == NULL) { - rc = nsc_free_buf(h->ii_bufp); - } else { - rc = nsc_free_buf(h->ii_abufp); - h->ii_abufp = NULL; - } - if (!II_SUCCESS(rc)) - return (rc); - if (h->ii_bufp2) { - rc = nsc_free_buf(h->ii_bufp2); - h->ii_bufp2 = NULL; - if (!II_SUCCESS(rc)) - return (rc); - } - - bfd = h->ii_fd; - rsrv = h->ii_rsrv; - - if ((h->ii_bufh.sb_flag & NSC_HALLOCATED) == 0) { - rc = _ii_free_handle(h, h->ii_fd); - if (!II_SUCCESS(rc)) - return (rc); - } else { - h->ii_bufh.sb_flag = NSC_HALLOCATED; - h->ii_bufh.sb_vec = NULL; - h->ii_bufh.sb_error = 0; - h->ii_bufh.sb_pos = 0; - h->ii_bufh.sb_len = 0; - h->ii_rsrv = NULL; - } - - /* - * Release temporary reserve - reserved in _ii_alloc_buf. - */ - - if (rsrv) - _ii_rlse_devs(bfd->ii_info, rsrv); - rw_exit(&bfd->ii_info->bi_linkrw); - - return (0); -} - - -/* - * _ii_open - * Open a device - * - * Calling/Exit State: - * Returns a token to identify the shadow device. - * - * Description: - * Performs the housekeeping operations associated with an upper layer - * of the nsc stack opening a shadowed device. - */ - -/* ARGSUSED */ - -static int -_ii_open(char *path, int flag, blind_t *cdp, nsc_iodev_t *iodev) -{ - _ii_info_t *ip; - _ii_overflow_t *op; - ii_fd_t *bfd; - int is_mst = 0; - int is_shd = 0; - int raw = (flag & NSC_CACHE) == 0; - - bfd = nsc_kmem_zalloc(sizeof (*bfd), KM_SLEEP, _ii_local_mem); - if (!bfd) - return (ENOMEM); - - DTRACE_PROBE1(_ii_open_mutex, - ii_fd_t *, bfd); - - mutex_enter(&_ii_info_mutex); - - for (ip = _ii_info_top; ip; ip = ip->bi_next) { - if (strcmp(path, ii_pathname(ip->bi_mstfd)) == 0) { - is_mst = 1; - break; - } else if (strcmp(path, ip->bi_keyname) == 0) { - is_shd = 1; - break; - } else if (strcmp(path, ii_pathname(ip->bi_bmpfd)) == 0) - break; - } - - if (is_mst) - ip = ip->bi_master; - - if (ip && ip->bi_disabled && !(ip->bi_state & DSW_MULTIMST)) { - DTRACE_PROBE(_ii_open_Disabled); - mutex_exit(&_ii_info_mutex); - return (EINTR); - } - - if (!ip) { - /* maybe it's an overflow */ - mutex_exit(&_ii_info_mutex); - mutex_enter(&_ii_overflow_mutex); - for (op = _ii_overflow_top; op; op = op->ii_next) { - if (strcmp(path, op->ii_volname) == 0) - break; - } - mutex_exit(&_ii_overflow_mutex); - - if (!op) { - nsc_kmem_free(bfd, sizeof (*bfd)); - DTRACE_PROBE(_ii_open_end_EINVAL); - return (EINVAL); - } - bfd->ii_ovr = 1; - bfd->ii_oflags = flag; - bfd->ii_optr = op; - *cdp = (blind_t)bfd; - - DTRACE_PROBE(_ii_open_end_overflow); - return (0); - } - mutex_enter(&ip->bi_mutex); - ip->bi_ioctl++; - mutex_exit(&_ii_info_mutex); - - if (is_mst) { - if (raw) { - ip->bi_mstr_iodev = NULL; /* set in attach */ - ip->bi_mstrref++; - } else { - ip->bi_mst_iodev = NULL; /* set in attach */ - ip->bi_mstref++; - } - ip->bi_master->bi_iifd = bfd; - } else if (is_shd) { - if (raw) { - ip->bi_shdr_iodev = NULL; /* set in attach */ - ip->bi_shdrref++; - } else { - ip->bi_shd_iodev = NULL; /* set in attach */ - ip->bi_shdref++; - } - bfd->ii_shd = 1; - } else { - ip->bi_bmpref++; - ip->bi_bmp_iodev = NULL; /* set in attach */ - bfd->ii_bmp = 1; - } - - _ii_ioctl_done(ip); - mutex_exit(&ip->bi_mutex); - - bfd->ii_info = ip; - bfd->ii_oflags = flag; - - *cdp = (blind_t)bfd; - - return (0); -} - -static int -_ii_openc(char *path, int flag, blind_t *cdp, nsc_iodev_t *iodev) -{ - return (_ii_open(path, NSC_CACHE|flag, cdp, iodev)); -} - -static int -_ii_openr(char *path, int flag, blind_t *cdp, nsc_iodev_t *iodev) -{ - return (_ii_open(path, NSC_DEVICE|flag, cdp, iodev)); -} - - -/* - * _ii_close - * Close a device - * - * Calling/Exit State: - * Always succeeds - returns 0 - * - * Description: - * Performs the housekeeping operations associated with an upper layer - * of the nsc stack closing a shadowed device. - */ - -static int -_ii_close(bfd) -ii_fd_t *bfd; -{ - _ii_info_t *ip = bfd->ii_info; - _ii_info_dev_t *dip; - int raw; - - if (!ip) { - ASSERT(bfd->ii_ovr); - return (0); - } - - raw = II_RAW(bfd); - - mutex_enter(&ip->bi_mutex); - - if (bfd->ii_shd && raw) { - dip = &ip->bi_shdrdev; - } else if (bfd->ii_shd) { - dip = &ip->bi_shddev; - } else if (bfd->ii_bmp) { - dip = &ip->bi_bmpdev; - } else if (raw) { - dip = ip->bi_mstrdev; - } else { - dip = ip->bi_mstdev; - } - - if (dip) { - dip->bi_ref--; - if (dip->bi_ref == 0) - dip->bi_iodev = NULL; - } - - if (ip->bi_state & DSW_CLOSING) { - if (total_ref(ip) == 0) { - cv_signal(&ip->bi_closingcv); - } - } else if ((ip->bi_flags & DSW_HANGING) && - (ip->bi_head->bi_state & DSW_CLOSING)) - cv_signal(&ip->bi_head->bi_closingcv); - - if (!(bfd->ii_shd || bfd->ii_bmp)) /* is master device */ - ip->bi_master->bi_iifd = NULL; - mutex_exit(&ip->bi_mutex); - - nsc_kmem_free(bfd, sizeof (*bfd)); - - return (0); -} - -/* - * _ii_alloc_handle - * Allocate a handle - * - */ - -static nsc_buf_t * -_ii_alloc_handle(void (*d_cb)(), void (*r_cb)(), void (*w_cb)(), ii_fd_t *bfd) -{ - ii_buf_t *h; - - if (REMOTE_VOL(bfd->ii_shd, bfd->ii_info)) - return (NULL); - - h = kmem_alloc(sizeof (*h), KM_SLEEP); - if (!h) - return (NULL); - - h->ii_abufp = NULL; - h->ii_bufp = nsc_alloc_handle(II_FD(bfd), d_cb, r_cb, w_cb); - if (!h->ii_bufp) { - kmem_free(h, sizeof (*h)); - return (NULL); - } - h->ii_bufp2 = NULL; - h->ii_bufh.sb_flag = NSC_HALLOCATED; - h->ii_fd = bfd; - h->ii_rsrv = NULL; - - return ((nsc_buf_t *)h); -} - - -/* - * _ii_free_handle - * Free a handle - * - */ - -static int /*ARGSUSED*/ -_ii_free_handle(ii_buf_t *h, ii_fd_t *bfd) -{ - int rc; - - if (h->ii_abufp) - (void) nsc_free_buf(h->ii_abufp); - rc = nsc_free_handle(h->ii_bufp); - if (!II_SUCCESS(rc)) { - return (rc); - } - - kmem_free(h, sizeof (ii_buf_t)); - - return (0); -} - - -/* - * _ii_attach - * Attach - * - * Calling/Exit State: - * Returns 0 for success, errno on failure. - * - * Description: - */ - -static int -_ii_attach(ii_fd_t *bfd, nsc_iodev_t *iodev) -{ - _ii_info_t *ip; - int dev; - int raw; - int rc; - _ii_info_dev_t *infop; - - raw = II_RAW(bfd); - - DTRACE_PROBE2(_ii_attach_info, - char *, bfd->ii_shd? "shadow" : "master", - int, raw); - - if (bfd->ii_ovr) - return (EINVAL); - - ip = bfd->ii_info; - if (ip == NULL) - return (EINVAL); - - mutex_enter(&ip->bi_mutex); - if (bfd->ii_bmp) { - infop = &ip->bi_bmpdev; - } else if (bfd->ii_shd) { - if (raw) { - infop = &ip->bi_shdrdev; - } else { - infop = &ip->bi_shddev; - } - } else if (!bfd->ii_ovr) { - if (raw) { - infop = ip->bi_mstrdev; - } else { - infop = ip->bi_mstdev; - } - } - - if (iodev) { - infop->bi_iodev = iodev; - nsc_set_owner(infop->bi_fd, infop->bi_iodev); - } - mutex_exit(&ip->bi_mutex); - - if (bfd->ii_bmp) - return (EINVAL); - - if (raw) - dev = bfd->ii_shd ? SHDR : MSTR; - else - dev = bfd->ii_shd ? SHD : MST; - - rc = _ii_rsrv_devs(ip, dev, II_EXTERNAL); - - return (rc); -} - - -/* - * _ii_detach - * Detach - * - * Calling/Exit State: - * Returns 0 for success, always succeeds - * - * Description: - */ - -static int -_ii_detach(bfd) -ii_fd_t *bfd; -{ - int dev; - int raw; - - raw = II_RAW(bfd); - - DTRACE_PROBE2(_ii_detach_info, - char *, bfd->ii_shd? "shadow" : "master", - int, raw); - - if (bfd->ii_bmp) - return (0); - - ASSERT(bfd->ii_info); - dev = bfd->ii_shd ? (raw ? SHDR : SHD) : (raw ? MSTR : MST); - _ii_rlse_devs(bfd->ii_info, dev); - - return (0); -} - -/* - * _ii_get_pinned - * - */ - -static int -_ii_get_pinned(ii_fd_t *bfd) -{ - int rc; - - if (REMOTE_VOL(bfd->ii_shd, bfd->ii_info)) - return (EIO); - - rc = nsc_get_pinned(II_FD(bfd)); - - return (rc); -} - -/* - * _ii_discard_pinned - * - */ - -static int -_ii_discard_pinned(ii_fd_t *bfd, nsc_off_t pos, nsc_size_t len) -{ - int rc; - - if (REMOTE_VOL(bfd->ii_shd, bfd->ii_info)) - return (EIO); - rc = nsc_discard_pinned(II_FD(bfd), pos, len); - - return (rc); -} - -/* - * _ii_partsize - * - */ - -static int -_ii_partsize(ii_fd_t *bfd, nsc_size_t *ptr) -{ - /* Always return saved size */ - *ptr = bfd->ii_info->bi_size; - return (0); -} - -/* - * _ii_maxfbas - * - */ - -static int -_ii_maxfbas(ii_fd_t *bfd, int flag, nsc_size_t *ptr) -{ - int rc; - int rs; - int dev; - _ii_info_t *ip; - - ip = bfd->ii_info; - if (REMOTE_VOL(bfd->ii_shd, ip)) - return (EIO); - - dev = ((ip->bi_flags)&DSW_SHDIMPORT) ? SHDR : MSTR; - - DTRACE_PROBE1(_ii_maxfbas_info, - char *, dev == SHDR? "shadow" : "master"); - - rs = _ii_rsrv_devs(ip, dev, II_INTERNAL); - rc = nsc_maxfbas((dev == MSTR) ? MSTFD(ip) : SHDFD(ip), flag, ptr); - - if (rs == 0) - _ii_rlse_devs(ip, dev); - - return (rc); -} - -/* - * ii_get_group_list - */ -_ii_info_t ** -ii_get_group_list(char *group, int *count) -{ - int i; - int nip; - uint64_t hash; - _ii_info_t **ipa; - _ii_lsthead_t *head; - _ii_lstinfo_t *np; - - hash = nsc_strhash(group); - - for (head = _ii_group_top; head; head = head->lst_next) { - if (hash == head->lst_hash && strncmp(head->lst_name, - group, DSW_NAMELEN) == 0) - break; - } - - if (!head) { - return (NULL); - } - - /* Count entries */ - for (nip = 0, np = head->lst_start; np; np = np->lst_next) - ++nip; - - ASSERT(nip > 0); - - ipa = kmem_zalloc(sizeof (_ii_info_t *) * nip, KM_SLEEP); - - np = head->lst_start; - - for (i = 0; i < nip; i++) { - ASSERT(np != 0); - - ipa[i] = np->lst_ip; - np = np->lst_next; - } - - *count = nip; - return (ipa); -} - -/* - * _ii_pinned - * - */ - -static void -_ii_pinned(_ii_info_dev_t *dip, nsc_off_t pos, nsc_size_t len) -{ - DTRACE_PROBE3(_ii_pinned_start, nsc_iodev_t, dip->bi_iodev, - nsc_off_t, pos, nsc_size_t, len); - - nsc_pinned_data(dip->bi_iodev, pos, len); - -} - -/* - * _ii_unpinned - * - */ - -static void -_ii_unpinned(_ii_info_dev_t *dip, nsc_off_t pos, nsc_size_t len) -{ - nsc_unpinned_data(dip->bi_iodev, pos, len); - -} - - -/* - * _ii_read - */ - -static int -_ii_read(ii_buf_t *h, nsc_off_t pos, nsc_size_t len, int flag) -{ - int rc; - void *sb_vec; - nsc_vec_t **src; - - if (REMOTE_VOL(h->ii_fd->ii_shd, h->ii_fd->ii_info)) - rc = EIO; - else { - src = h->ii_abufp? &h->ii_abufp->sb_vec : &h->ii_bufp->sb_vec; - sb_vec = *src; - *src = h->ii_bufh.sb_vec; - rc = _ii_fill_buf(h->ii_fd, pos, len, flag, - h->ii_abufp ? &h->ii_abufp : &h->ii_bufp, &h->ii_bufp2); - *src = sb_vec; - } - if (!II_SUCCESS(rc)) - h->ii_bufh.sb_error = rc; - - return (rc); -} - - -/* - * _ii_write - */ - -static int -_ii_write(ii_buf_t *h, nsc_off_t pos, nsc_size_t len, int flag) -{ - int rc; - ii_fd_t *bfd = h->ii_fd; - _ii_info_t *ip = bfd->ii_info; - chunkid_t chunk_num; - nsc_size_t copy_len; - nsc_off_t mapped_fba; - chunkid_t mapped_chunk; - int overflow; - nsc_buf_t *tmp; - void *sb_vec; - - if (REMOTE_VOL(h->ii_fd->ii_shd, h->ii_fd->ii_info)) - rc = EIO; - else if ((ip->bi_flags&DSW_TREEMAP) == 0 || !bfd->ii_shd) { - sb_vec = h->ii_bufp->sb_vec; - h->ii_bufp->sb_vec = h->ii_bufh.sb_vec; - if (bfd->ii_shd) { - II_NSC_WRITE(ip, shadow, rc, h->ii_bufp, pos, len, - flag); - } else { - II_NSC_WRITE(ip, master, rc, h->ii_bufp, pos, len, - flag); - } - h->ii_bufp->sb_vec = sb_vec; - } else { - /* write of mapped shadow buffer */ - rc = 0; - chunk_num = pos / DSW_SIZE; - while (len > 0 && II_SUCCESS(rc)) { - /* - * don't need to test bitmaps as allocating the - * write buffer will c-o-write the chunk. - */ - mapped_chunk = ii_tsearch(ip, chunk_num); - if (mapped_chunk == II_NULLNODE) { - rc = EIO; - break; - } - overflow = II_ISOVERFLOW(mapped_chunk); - if (overflow) - mapped_chunk = II_2OVERFLOW(mapped_chunk); - mapped_fba = DSW_CHK2FBA(mapped_chunk) + - (pos % DSW_SIZE); - copy_len = DSW_SIZE - (pos % DSW_SIZE); - if (copy_len > len) - copy_len = len; - tmp = NULL; - if (overflow) { - (void) nsc_reserve(OVRFD(ip), NSC_MULTI); - rc = nsc_alloc_buf(OVRFD(ip), mapped_fba, - copy_len, NSC_WRBUF, &tmp); - } else - rc = nsc_alloc_buf(SHDFD(ip), mapped_fba, - copy_len, NSC_WRBUF, &tmp); - sb_vec = h->ii_abufp->sb_vec; - h->ii_abufp->sb_vec = h->ii_bufh.sb_vec; - if (II_SUCCESS(rc)) { - rc = nsc_copy(h->ii_abufp, tmp, pos, - mapped_fba, copy_len); - } - if (overflow) { - II_NSC_WRITE(ip, overflow, rc, tmp, mapped_fba, - copy_len, flag); - } else { - II_NSC_WRITE(ip, shadow, rc, tmp, mapped_fba, - copy_len, flag); - } - h->ii_abufp->sb_vec = sb_vec; - (void) nsc_free_buf(tmp); - if (overflow) - nsc_release(OVRFD(ip)); - /* move on to next chunk */ - pos += copy_len; - len -= copy_len; - chunk_num++; - } - } - if (!II_SUCCESS(rc)) - h->ii_bufh.sb_error = rc; - - return (rc); -} - - -/* - * _ii_zero - */ - -static int -_ii_zero(ii_buf_t *h, nsc_off_t pos, nsc_size_t len, int flag) -{ - int rc; - void *sb_vec; - - sb_vec = h->ii_bufp->sb_vec; - h->ii_bufp->sb_vec = h->ii_bufh.sb_vec; - rc = nsc_zero(h->ii_bufp, pos, len, flag); - h->ii_bufp->sb_vec = sb_vec; - if (!II_SUCCESS(rc)) - h->ii_bufh.sb_error = rc; - - return (rc); -} - - -/* - * _ii_uncommit - */ - -static int -_ii_uncommit(ii_buf_t *h, nsc_off_t pos, nsc_size_t len, int flag) -{ - int rc; - void *sb_vec; - - sb_vec = h->ii_bufp->sb_vec; - h->ii_bufp->sb_vec = h->ii_bufh.sb_vec; - rc = nsc_uncommit(h->ii_bufp, pos, len, flag); - h->ii_bufp->sb_vec = sb_vec; - if (!II_SUCCESS(rc)) - h->ii_bufh.sb_error = rc; - - return (rc); -} - - -/* - * _ii_trksize - */ - -static int -_ii_trksize(ii_fd_t *bfd, int trksize) -{ - int rc; - - rc = nsc_set_trksize(II_FD(bfd), trksize); - - return (rc); -} - -/* - * _ii_register_path - */ - -static nsc_path_t * -_ii_register_path(char *path, int type, nsc_io_t *io) -{ - nsc_path_t *tok; - - tok = nsc_register_path(path, type, io); - - return (tok); -} - -/* - * _ii_unregister_path - */ -/*ARGSUSED*/ -static int -_ii_unregister_path(nsc_path_t *sp, int flag, char *type) -{ - int rc; - - rc = nsc_unregister_path(sp, flag); - - return (rc); -} - -int -_ii_ll_add(_ii_info_t *ip, kmutex_t *mutex, _ii_lsthead_t **lst, char *name, - char **key) -{ - _ii_lsthead_t **head; - _ii_lstinfo_t *node; - uint64_t hash; - - ASSERT(key && !*key); - ASSERT(ip && mutex && lst && name); - - node = kmem_zalloc(sizeof (_ii_lstinfo_t), KM_SLEEP); - if (!node) { - cmn_err(CE_WARN, "!ii: _ii_ll_add: ENOMEM"); - DTRACE_PROBE(_ii_ll_add_end_ENOMEM); - return (ENOMEM); - } - node->lst_ip = ip; - - /* find out where we should insert it */ - hash = nsc_strhash(name); - - mutex_enter(mutex); - for (head = lst; *head; head = &((*head)->lst_next)) { - if (((*head)->lst_hash == hash) && - strncmp(name, (*head)->lst_name, DSW_NAMELEN) == 0) { - node->lst_next = (*head)->lst_start; - (*head)->lst_start = node; - break; - } - } - - if (!*head) { - /* create a new entry */ - *head = kmem_zalloc(sizeof (_ii_lsthead_t), KM_SLEEP); - if (!*head) { - /* bother */ - cmn_err(CE_WARN, "!ii: _ii_ll_add: ENOMEM"); - kmem_free(node, sizeof (_ii_lstinfo_t)); - DTRACE_PROBE(_ii_ll_add_end_2); - return (ENOMEM); - } - (*head)->lst_hash = hash; - (void) strncpy((*head)->lst_name, name, DSW_NAMELEN); - (*head)->lst_start = node; - } - mutex_exit(mutex); - - *key = (*head)->lst_name; - - return (0); -} - -int -_ii_ll_remove(_ii_info_t *ip, kmutex_t *mutex, _ii_lsthead_t **lst, char **key) -{ - _ii_lsthead_t **head, *oldhead = 0; - _ii_lstinfo_t **node, *oldnode = 0; - uint64_t hash; - int found; - - ASSERT(key && *key); - ASSERT(ip && lst); - - hash = nsc_strhash(*key); - - mutex_enter(mutex); - for (head = lst; *head; head = &((*head)->lst_next)) { - if (((*head)->lst_hash == hash) && - strncmp(*key, (*head)->lst_name, DSW_NAMELEN) == 0) - break; - } - if (!*head) { - /* no such link (!) */ - mutex_exit(mutex); - return (0); - } - - found = 0; - for (node = &(*head)->lst_start; *node; node = &((*node)->lst_next)) { - if (ip == (*node)->lst_ip) { - oldnode = *node; - *node = (*node)->lst_next; - kmem_free(oldnode, sizeof (_ii_lstinfo_t)); - found = 1; - break; - } - } - - ASSERT(found); - - if (!found) { - mutex_exit(mutex); - return (0); - } - - /* did we just delete the last set in this resource group? */ - if (!(*head)->lst_start) { - oldhead = *head; - *head = (*head)->lst_next; - kmem_free(oldhead, sizeof (_ii_lsthead_t)); - } - mutex_exit(mutex); - - *key = NULL; - - return (0); -} - -static nsc_def_t _ii_fd_def[] = { - "Pinned", (uintptr_t)_ii_pinned, 0, - "Unpinned", (uintptr_t)_ii_unpinned, 0, - 0, 0, 0 -}; - - -static nsc_def_t _ii_io_def[] = { - "Open", (uintptr_t)_ii_openc, 0, - "Close", (uintptr_t)_ii_close, 0, - "Attach", (uintptr_t)_ii_attach, 0, - "Detach", (uintptr_t)_ii_detach, 0, - "AllocHandle", (uintptr_t)_ii_alloc_handle, 0, - "FreeHandle", (uintptr_t)_ii_free_handle, 0, - "AllocBuf", (uintptr_t)_ii_alloc_buf, 0, - "FreeBuf", (uintptr_t)_ii_free_buf, 0, - "GetPinned", (uintptr_t)_ii_get_pinned, 0, - "Discard", (uintptr_t)_ii_discard_pinned, 0, - "PartSize", (uintptr_t)_ii_partsize, 0, - "MaxFbas", (uintptr_t)_ii_maxfbas, 0, - "Read", (uintptr_t)_ii_read, 0, - "Write", (uintptr_t)_ii_write, 0, - "Zero", (uintptr_t)_ii_zero, 0, - "Uncommit", (uintptr_t)_ii_uncommit, 0, - "TrackSize", (uintptr_t)_ii_trksize, 0, - "Provide", 0, 0, - 0, 0, 0 -}; - -static nsc_def_t _ii_ior_def[] = { - "Open", (uintptr_t)_ii_openr, 0, - "Close", (uintptr_t)_ii_close, 0, - "Attach", (uintptr_t)_ii_attach, 0, - "Detach", (uintptr_t)_ii_detach, 0, - "AllocHandle", (uintptr_t)_ii_alloc_handle, 0, - "FreeHandle", (uintptr_t)_ii_free_handle, 0, - "AllocBuf", (uintptr_t)_ii_alloc_buf, 0, - "FreeBuf", (uintptr_t)_ii_free_buf, 0, - "GetPinned", (uintptr_t)_ii_get_pinned, 0, - "Discard", (uintptr_t)_ii_discard_pinned, 0, - "PartSize", (uintptr_t)_ii_partsize, 0, - "MaxFbas", (uintptr_t)_ii_maxfbas, 0, - "Read", (uintptr_t)_ii_read, 0, - "Write", (uintptr_t)_ii_write, 0, - "Zero", (uintptr_t)_ii_zero, 0, - "Uncommit", (uintptr_t)_ii_uncommit, 0, - "TrackSize", (uintptr_t)_ii_trksize, 0, - "Provide", 0, 0, - 0, 0, 0 -}; diff --git a/usr/src/uts/common/avs/ns/dsw/dsw_dev.h b/usr/src/uts/common/avs/ns/dsw/dsw_dev.h deleted file mode 100644 index f5cb574d8b..0000000000 --- a/usr/src/uts/common/avs/ns/dsw/dsw_dev.h +++ /dev/null @@ -1,633 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _DSW_DEV_H -#define _DSW_DEV_H - -#ifdef __cplusplus -extern "C" { -#endif - -/* - * Definitions for kstats - */ -#define DSW_SKSTAT_SIZE "size" -#define DSW_SKSTAT_MTIME "latest modified time" -#define DSW_SKSTAT_FLAGS "flags" -#define DSW_SKSTAT_THROTTLE_UNIT "ii_throttle_unit" -#define DSW_SKSTAT_THROTTLE_DELAY "ii_throttle_delay" -#define DSW_SKSTAT_SHDCHKS "shdchks" -#define DSW_SKSTAT_SHDCHKUSED "shdchkused" -#define DSW_SKSTAT_SHDBITS "shdbits" -#define DSW_SKSTAT_COPYBITS "copybits" -#define DSW_SKSTAT_MSTA "mst-a" -#define DSW_SKSTAT_MSTB "mst-b" -#define DSW_SKSTAT_MSTC "mst-c" -#define DSW_SKSTAT_MSTD "mst-d" -#define DSW_SKSTAT_SETA "set-a" -#define DSW_SKSTAT_SETB "set-b" -#define DSW_SKSTAT_SETC "set-c" -#define DSW_SKSTAT_SETD "set-d" -#define DSW_SKSTAT_BMPA "bmp-a" -#define DSW_SKSTAT_BMPB "bmp-b" -#define DSW_SKSTAT_BMPC "bmp-c" -#define DSW_SKSTAT_BMPD "bmp-d" -#define DSW_SKSTAT_OVRA "ovr-a" -#define DSW_SKSTAT_OVRB "ovr-b" -#define DSW_SKSTAT_OVRC "ovr-c" -#define DSW_SKSTAT_OVRD "ovr-d" -#define DSW_SKSTAT_MSTIO "mst-io" -#define DSW_SKSTAT_SHDIO "shd-io" -#define DSW_SKSTAT_BMPIO "bmp-io" -#define DSW_SKSTAT_OVRIO "ovr-io" - -/* - * Bitmap macros - */ - -#define DSW_BIT_CLR(bmap, bit) (bmap &= (char)~(1 << bit)) -#define DSW_BIT_SET(bmap, bit) (bmap |= (char)(1 << bit)) -#define DSW_BIT_ISSET(bmap, bit) ((bmap & (1 << bit)) != 0) - -#define DSW_CBLK_FBA 16 /* cache blocks in fba's */ -#define DSW_SHD_BM_OFFSET DSW_CBLK_FBA /* offset to allow for header */ -#define DSW_COPY_BM_OFFSET (DSW_SHD_BM_OFFSET + \ - DSW_BM_FBA_LEN(ip->bi_size)) -#define DSW_BM_FBA_LEN(mst_size) ((mst_size) / FBA_SIZE(DSW_SIZE*DSW_BITS) + \ - DSW_CBLK_FBA) - -#define DSW_BM_SIZE_CHUNKS(ip) ((ip->bi_size + DSW_SIZE - 1) / DSW_SIZE) -#define DSW_BM_SIZE_BYTES(ip) ((DSW_BM_SIZE_CHUNKS(ip) + DSW_BITS - 1) / \ - DSW_BITS) - -#define DSW_CHK2FBA(chunk) (((nsc_off_t)(chunk)) * DSW_SIZE) - -#if defined(_KERNEL) || defined(_KMEMUSER) - -/* - * Shadow types. - */ - -#define DSW_GOLDEN_TYPE 0x1000 -#define DSW_QUICK_TYPE 0x2000 - -/* - * Miscellaneous defines - */ - -#define II_INTERNAL 0x1 -#define II_EXTERNAL 0x2 - -#define II_EXISTING 0x01 /* Internal dsw_ioctl()/dsw_config() flags */ -#define II_IMPORT 0x02 - -/* - * defines for _ii_nsc_io and _ii_write, used by kstats - */ - -#define KS_NA 0 -#define KS_MST 1 -#define KS_SHD 2 -#define KS_BMP 3 -#define KS_OVR 4 - -/* - * global kstats - */ - -typedef struct _iigkstat_s { - /* static */ - kstat_named_t ii_debug; - kstat_named_t ii_bitmap; - kstat_named_t ii_throttle_unit; - kstat_named_t ii_throttle_delay; - kstat_named_t ii_copy_direct; - - /* dynamic */ - kstat_named_t num_sets; - kstat_named_t assoc_over; - kstat_named_t spilled_over; -} iigkstat_t; - -extern iigkstat_t iigkstat; - -/* - * set-specific kstats - */ -typedef struct _ii_kstat_set_s { - kstat_named_t size; /* from _ii_stat() */ - kstat_named_t mtime; /* from _ii_stat() */ - kstat_named_t flags; /* from _ii_stat() */ - kstat_named_t unit; /* ii_throttle_unit */ - kstat_named_t delay; /* ii_throttle_delay */ - kstat_named_t shdchks; /* from _ii_stat() */ - kstat_named_t shdchkused; /* from _ii_stat() */ - kstat_named_t shdbits; /* # bits set shadow bitmap */ - kstat_named_t copybits; /* # bits set copy bitmap */ - kstat_named_t mst_a; /* name */ - kstat_named_t mst_b; /* .. of */ - kstat_named_t mst_c; /* .. master */ - kstat_named_t mst_d; /* .. volume */ - kstat_named_t set_a; /* name */ - kstat_named_t set_b; /* .. of */ - kstat_named_t set_c; /* .. the */ - kstat_named_t set_d; /* .. set */ - kstat_named_t bmp_a; /* name */ - kstat_named_t bmp_b; /* .. of */ - kstat_named_t bmp_c; /* .. bitmap */ - kstat_named_t bmp_d; /* .. volume */ - kstat_named_t ovr_a; /* name */ - kstat_named_t ovr_b; /* .. of */ - kstat_named_t ovr_c; /* .. overflow */ - kstat_named_t ovr_d; /* .. volume */ - kstat_named_t mst_io; /* kstat_io of master */ - kstat_named_t shd_io; /* kstat_io of shadow */ - kstat_named_t bmp_io; /* kstat_io of bitmap */ - kstat_named_t ovr_io; /* kstat_io of overflow */ -} ii_kstat_set_t; - -extern ii_kstat_set_t ii_kstat_set; -#define IOSTAT_NAME_LEN 10 - -/* Basic types */ -#ifdef II_MULTIMULTI_TERABYTE -typedef int64_t chunkid_t; -typedef int32_t chunkid32_t; -#else -typedef int32_t chunkid_t; -#endif - -/* - * OV_HEADER_VERSION - * 0 = original OV header version - * 1 = flags support - */ -#define OV_HEADER_VERSION 1 - -/* Overflow disk volume header */ -typedef struct _ii_doverflow_s { - char ii_dvolname[DSW_NAMELEN]; /* this volumes name */ - uint32_t ii_dhmagic; /* sanity check */ - uint32_t ii_dhversion; /* volume format */ - int32_t ii_ddrefcnt; /* total number of users */ - int32_t ii_dflags; /* status flags */ - int64_t ii_dfreehead; /* chain of freed chunks */ - int64_t ii_dnchunks; /* total number of chunks */ - int64_t ii_dunused; /* number of chunks available */ - int64_t ii_dused; /* number of chunks allocated */ - int32_t ii_urefcnt; /* # shadows needing update */ - int32_t ii_dcrefcnt; /* current number of users */ -} _ii_doverflow_t; - -/* Overflow volume in core structure */ -typedef struct _ii_overflow_s { - _ii_doverflow_t ii_do; - kmutex_t ii_mutex; /* Mutex */ - kmutex_t ii_kstat_mutex; /* Mutex for overflow kstat */ - int ii_detachcnt; /* users detaching on disable */ - struct _ii_overflow_s *ii_next; /* chain of incore structs */ - struct _ii_info_dev_s *ii_dev; /* pointer to device details */ - kstat_t *ii_overflow; /* kstats data for this vol */ - char ii_ioname[KSTAT_DATA_CHAR_LEN]; /* name for iostat -x */ -} _ii_overflow_t; - -#define ii_volname ii_do.ii_dvolname -#define ii_hmagic ii_do.ii_dhmagic -#define ii_drefcnt ii_do.ii_ddrefcnt -#define ii_freehead ii_do.ii_dfreehead -#define ii_nchunks ii_do.ii_dnchunks -#define ii_unused ii_do.ii_dunused -#define ii_used ii_do.ii_dused -#define ii_hversion ii_do.ii_dhversion -#define ii_flags ii_do.ii_dflags -#define ii_urefcnt ii_do.ii_urefcnt -#define ii_crefcnt ii_do.ii_dcrefcnt - -#define II_OHEADER_FBA 0 /* overflow header location */ -/* - * logging of kstat_io - */ -#ifdef DISABLE_KSTATS -#define II_READ_START(ip, type) -#define II_READ_END(ip, type, rc, blocks) -#define II_WRITE_START(ip, type) -#define II_WRITE_END(ip, type, rc, blocks) -#else - -#define II_KS(ip, x) KSTAT_IO_PTR(ip->bi_kstat_io.x) -#define II_MUTEX(ip, x) ip->bi_kstat_io.x->ks_lock -#define II_BLKSIZE 512 - -#define II_READ_START(ip, type) \ - if (ip->bi_kstat_io.type) { \ - mutex_enter(II_MUTEX(ip, type)); \ - kstat_runq_enter(II_KS(ip, type)); \ - mutex_exit(II_MUTEX(ip, type)); \ - } -#define II_READ_END(ip, type, rc, blocks) \ - if (ip->bi_kstat_io.type) { \ - mutex_enter(II_MUTEX(ip, type)); \ - if (II_SUCCESS(rc)) { \ - II_KS(ip, type)->reads++; \ - II_KS(ip, type)->nread += II_BLKSIZE * (blocks);\ - } \ - kstat_runq_exit(II_KS(ip, type)); \ - mutex_exit(II_MUTEX(ip, type)); \ - } - -#define II_WRITE_START(ip, type) \ - if (ip->bi_kstat_io.type) { \ - mutex_enter(II_MUTEX(ip, type)); \ - kstat_runq_enter(II_KS(ip, type)); \ - mutex_exit(II_MUTEX(ip, type)); \ - } -#define II_WRITE_END(ip, type, rc, blocks) \ - if (ip->bi_kstat_io.type) { \ - mutex_enter(II_MUTEX(ip, type)); \ - if (II_SUCCESS(rc)) { \ - II_KS(ip, type)->writes++; \ - II_KS(ip, type)->nwritten += II_BLKSIZE * (blocks);\ - } \ - kstat_runq_exit(II_KS(ip, type)); \ - mutex_exit(II_MUTEX(ip, type)); \ - } -#endif - -#define II_NSC_READ(ip, type, rc, buf, pos, len, flag) \ - II_READ_START(ip, type); \ - rc = nsc_read(buf, pos, len, flag); \ - II_READ_END(ip, type, rc, len); - -#define II_NSC_WRITE(ip, type, rc, buf, pos, len, flag) \ - II_WRITE_START(ip, type); \ - rc = nsc_write(buf, pos, len, flag); \ - II_WRITE_END(ip, type, rc, len); - -#define II_NSC_COPY_DIRECT(ip, t1, t2, rc, buf1, buf2, pos1, pos2, len) \ - II_WRITE_START(ip, t2); \ - rc = nsc_copy_direct(buf1, buf2, pos1, pos2, len); \ - II_WRITE_END(ip, t2, rc, len); - -#define II_ALLOC_BUF(ip, type, rc, fd, pos, len, flag, tmp) \ - if (flag & NSC_READ) { \ - II_READ_START(ip, type); \ - } \ - rc = nsc_alloc_buf(fd, pos, len, flag, tmp); \ - if (flag & NSC_READ) { \ - II_READ_END(ip, type, rc, len); \ - } - -/* - * All kstat_io associated with a set. NOTE: only one mutex for all - * of the kstats for a given set; all master/shadow/bmp/overflow mutexes - * point back to the statmutex - */ - -typedef struct _ii_kstat_info_s { - kstat_t *master; - kstat_t *shadow; - kstat_t *bitmap; - kstat_t *overflow; - kmutex_t statmutex; - char mstio[KSTAT_DATA_CHAR_LEN]; /* name of mst in iostat -x */ - char shdio[KSTAT_DATA_CHAR_LEN]; /* name of shd in iostat -x */ - char bmpio[KSTAT_DATA_CHAR_LEN]; /* name of bmp in iostat -x */ - char ovrio[KSTAT_DATA_CHAR_LEN]; /* name of ovr in iostat -x */ -} ii_kstat_info_t; - -/* - * II device info structure - */ - -typedef struct _ii_info_dev_s { - nsc_fd_t *bi_fd; /* Bitmap file descriptor */ - nsc_iodev_t *bi_iodev; /* I/O device structure */ - nsc_path_t *bi_tok; /* Register path token */ - int bi_ref; /* Count of fd's referencing */ - int bi_rsrv; /* Count of reserves held */ - int bi_orsrv; /* Reserves for other io prov */ - int bi_flag; /* Internal/External reserve */ -} _ii_info_dev_t; - -typedef struct _ii_info_s { - struct _ii_info_s *bi_next; /* Chain of all groups */ - struct _ii_info_s *bi_head; /* head of sibling chain */ - struct _ii_info_s *bi_sibling; /* Chain of groups with same */ - /* master */ - struct _ii_info_s *bi_master; /* location of master */ - struct _ii_info_s *bi_nextmst; /* next multimaster */ - kmutex_t bi_mutex; /* Mutex */ - _ii_info_dev_t *bi_mstdev; - _ii_info_dev_t *bi_mstrdev; - _ii_info_dev_t bi_shddev; - _ii_info_dev_t bi_shdrdev; - _ii_info_dev_t bi_bmpdev; - char bi_keyname[DSW_NAMELEN]; - unsigned char *bi_bitmap; /* Master device bitmap */ - char *bi_cluster; /* cluster name */ - char *bi_group; /* group name */ - char *bi_busy; /* Busy bitmap */ - nsc_off_t bi_shdfba; /* location of shadow bitmap */ - nsc_size_t bi_shdbits; /* shadow bitmap counter */ - nsc_off_t bi_copyfba; /* location of copy bitmap */ - nsc_size_t bi_copybits; /* copy bitmap counter */ - nsc_size_t bi_size; /* Size of mst device */ - uint_t bi_flags; /* Flags */ - uint_t bi_state; /* State flags */ - int bi_disabled; /* Disable has started */ - int bi_ioctl; /* Number of active ioctls */ - int bi_release; /* Do a release in copyvol */ - int bi_rsrvcnt; /* reserve count */ - kcondvar_t bi_copydonecv; /* Copy operation condvar */ - kcondvar_t bi_reservecv; /* Reserve condvar */ - kcondvar_t bi_releasecv; /* Release condvar */ - kcondvar_t bi_closingcv; /* Shadow closing condvar */ - kcondvar_t bi_ioctlcv; /* Ioctls complete condvar */ - kcondvar_t bi_busycv; /* Busy bitmap condvar */ - krwlock_t bi_busyrw; /* Busy bitmap rwlock */ - struct _ii_bmp_ops_s *bi_bitmap_ops; /* Functions for bitmap ops */ - kmutex_t bi_rsrvmutex; /* Reserve operation mutex */ - kmutex_t bi_rlsemutex; /* Release operation mutex */ - kmutex_t bi_bmpmutex; /* mutex for bi_bitmap_ops */ - chunkid_t bi_mstchks; - chunkid_t bi_shdchks; /* # of chunks on shadow vol */ - chunkid_t bi_shdchkused; /* # of allocated */ - chunkid_t bi_shdfchk; /* start of shd chunk flst */ - _ii_overflow_t *bi_overflow; - struct ii_fd_s *bi_iifd; /* fd holding master's ip */ - int32_t bi_throttle_unit; - int32_t bi_throttle_delay; - krwlock_t bi_linkrw; /* altering linkage rwlock */ - kmutex_t bi_chksmutex; /* Mutex for bi_???chks */ - pid_t bi_locked_pid; /* lock pid for update/copy */ - kstat_t *bi_kstat; /* kstat data for set */ - ii_kstat_info_t bi_kstat_io; /* kstat I/O data for set */ - time_t bi_mtime; -} _ii_info_t; - -#define bi_bmpfd bi_bmpdev.bi_fd -#define bi_mstfd bi_mstdev->bi_fd -#define bi_mstrfd bi_mstrdev->bi_fd -#define bi_shdfd bi_shddev.bi_fd -#define bi_shdrfd bi_shdrdev.bi_fd -#define bi_mst_iodev bi_mstdev->bi_iodev -#define bi_mstr_iodev bi_mstrdev->bi_iodev -#define bi_shd_iodev bi_shddev.bi_iodev -#define bi_shdr_iodev bi_shdrdev.bi_iodev -#define bi_bmp_iodev bi_bmpdev.bi_iodev -#define bi_mst_tok bi_mstdev->bi_tok -#define bi_mstr_tok bi_mstrdev->bi_tok -#define bi_shd_tok bi_shddev.bi_tok -#define bi_shdr_tok bi_shdrdev.bi_tok -#define bi_bmp_tok bi_bmpdev.bi_tok -#define bi_mstref bi_mstdev->bi_ref -#define bi_mstrref bi_mstrdev->bi_ref -#define bi_shdref bi_shddev.bi_ref -#define bi_shdrref bi_shdrdev.bi_ref -#define bi_bmpref bi_bmpdev.bi_ref -#define bi_mstrsrv bi_mstdev->bi_rsrv -#define bi_mstrrsrv bi_mstrdev->bi_rsrv -#define bi_shdrsrv bi_shddev.bi_rsrv -#define bi_shdrrsrv bi_shdrdev.bi_rsrv -#define bi_bmprsrv bi_bmpdev.bi_rsrv -#define bi_mstrflag bi_mstrdev->bi_flag -#define bi_shdrflag bi_shdrdev.bi_flag -/* - * Cluster and group linked lists - */ -typedef struct _ii_lstinfo_s { - _ii_info_t *lst_ip; /* ptr to info_t */ - struct _ii_lstinfo_s *lst_next; /* ptr to next in chain */ -} _ii_lstinfo_t; - -typedef struct _ii_lsthead_s { - uint64_t lst_hash; /* from nsc_strhash */ - char lst_name[DSW_NAMELEN]; /* resource group */ - _ii_lstinfo_t *lst_start; /* start of set list */ - struct _ii_lsthead_s *lst_next; /* next list head */ -} _ii_lsthead_t; - -/* - * Flag set and clear macros and function. - */ - -void _ii_flag_op(int and, int or, _ii_info_t *ip, int update); - -#define II_FLAG_SET(f, ip) _ii_flag_op(~0, (f), ip, TRUE) -#define II_FLAG_CLR(f, ip) _ii_flag_op(~(f), 0, ip, TRUE) - -#define II_FLAG_SETX(f, ip) _ii_flag_op(~0, (f), ip, FALSE) -#define II_FLAG_CLRX(f, ip) _ii_flag_op(~(f), 0, ip, FALSE) -#define II_FLAG_ASSIGN(f, ip) _ii_flag_op(0, (f), ip, FALSE); -#define LOG_EVENT(msg, level) \ - nsc_do_sysevent("ii", msg, level, level, component, ii_dip); - -/* Reserve and release macros */ - - /* also used by ii_volume() volume identification, hence NONE & OVR */ -#define NONE 0x0000 /* no volume type */ -#define MST 0x0001 /* master reserve/release flag */ -#define MSTR 0x0010 /* raw master reserve/release flag */ -#define SHD 0x0002 /* shadow reserve/release flag */ -#define SHDR 0x0020 /* raw shadow reserve/release flag */ -#define BMP 0x0100 /* bitmap reserve/release flag */ -#define OVR 0x0400 /* overflow volume */ - -#define RSRV(ip) ((ip)->bi_rsrv > 0 || (ip)->bi_orsrv > 0) - -#define MSTRSRV(ip) (RSRV(((ip)->bi_mstdev))) -#define SHDRSRV(ip) (RSRV(&((ip)->bi_shddev))) - -#define MSTFD(ip) (MSTRSRV(ip) ? (ip)->bi_mstfd : (ip)->bi_mstrfd) -#define SHDFD(ip) (SHDRSRV(ip) ? (ip)->bi_shdfd : (ip)->bi_shdrfd) -#define OVRFD(ip) (ip->bi_overflow->ii_dev->bi_fd) - -#define II_RAW(ii) (((ii)->ii_oflags&NSC_DEVICE) != 0) -#define II_FD(ii) ((ii)->ii_shd ? SHDFD((ii)->ii_info) : \ - MSTFD((ii)->ii_info)) - - /* are there multiple shadows of ip's master volume? */ -#define NSHADOWS(ip) ((ip)->bi_head != (ip) || (ip)->bi_sibling) - -typedef struct _ii_bmp_ops_s { - int (*co_bmp)(_ii_info_t *, nsc_off_t, unsigned char *, int); - int (*ci_bmp)(_ii_info_t *, nsc_off_t, unsigned char *, int); - int (*zerobm)(_ii_info_t *); - int (*copybm)(_ii_info_t *); - int (*orbm)(_ii_info_t *); - int (*tst_shd_bit)(_ii_info_t *, chunkid_t); - int (*set_shd_bit)(_ii_info_t *, chunkid_t); - int (*tst_copy_bit)(_ii_info_t *, chunkid_t); - int (*set_copy_bit)(_ii_info_t *, chunkid_t); - int (*clr_copy_bits)(_ii_info_t *, chunkid_t, int); - chunkid_t (*next_copy_bit)(_ii_info_t *, chunkid_t, chunkid_t, - int, int *); - int (*fill_copy_bmp)(_ii_info_t *); - int (*load_bmp)(_ii_info_t *, int); - int (*save_bmp)(_ii_info_t *, int); - int (*change_bmp)(_ii_info_t *, unsigned char *); - int (*cnt_bits)(_ii_info_t *, nsc_off_t, nsc_size_t *, int); - int (*join_bmp)(_ii_info_t *, _ii_info_t *); -} _ii_bmp_ops_t; - -#define II_CO_BMP(ip, a, b, c) (*(ip)->bi_bitmap_ops->co_bmp)(ip, a, b, c) -#define II_CI_BMP(ip, a, b, c) (*(ip)->bi_bitmap_ops->ci_bmp)(ip, a, b, c) -#define II_ZEROBM(ip) (*(ip)->bi_bitmap_ops->zerobm)(ip) -#define II_COPYBM(ip) (*(ip)->bi_bitmap_ops->copybm)(ip) -#define II_ORBM(ip) (*(ip)->bi_bitmap_ops->orbm)(ip) -#define II_TST_SHD_BIT(ip, c) (*(ip)->bi_bitmap_ops->tst_shd_bit)(ip, c) -#define II_SET_SHD_BIT(ip, c) (*(ip)->bi_bitmap_ops->set_shd_bit)(ip, c) -#define II_TST_COPY_BIT(ip, c) (*(ip)->bi_bitmap_ops->tst_copy_bit)(ip, c) -#define II_SET_COPY_BIT(ip, c) (*(ip)->bi_bitmap_ops->set_copy_bit)(ip, c) -#define II_CLR_COPY_BITS(ip, c, n) (*(ip)->bi_bitmap_ops->clr_copy_bits) \ - (ip, c, n) -#define II_CLR_COPY_BIT(ip, c) (*(ip)->bi_bitmap_ops->clr_copy_bits)(ip, c, 1) -#define II_NEXT_COPY_BIT(ip, c, m, w, g) \ - (*(ip)->bi_bitmap_ops->next_copy_bit)(ip, c, m, w, g) -#define II_FILL_COPY_BMP(ip) (*(ip)->bi_bitmap_ops->fill_copy_bmp)(ip) -#define II_LOAD_BMP(ip, f) (*(ip)->bi_bitmap_ops->load_bmp)(ip, f) -#define II_SAVE_BMP(ip, f) (*(ip)->bi_bitmap_ops->save_bmp)(ip, f) -#define II_CHANGE_BMP(ip, p) (*(ip)->bi_bitmap_ops->change_bmp)(ip, p) -#define II_CNT_BITS(ip, a, b, c) (*(ip)->bi_bitmap_ops->cnt_bits)(ip, a, b, c) -#define II_JOIN_BMP(dip, sip) (*(ip)->bi_bitmap_ops->join_bmp)(dip, sip) - -/* - * State flags - */ -#define DSW_IOCTL 0x0001 /* Waiting for ioctl to complete */ -#define DSW_CLOSING 0x0002 /* Waiting for shadow to close */ -#define DSW_MSTTARGET 0x0004 /* Master is target of update */ -#define DSW_MULTIMST 0x0008 /* disabled set is multi master */ -#define DSW_CNTSHDBITS 0x0010 /* need to count # of shd bits set */ -#define DSW_CNTCPYBITS 0x0020 /* need to count # of copy bits set */ - -/* - * DSW file descriptor structure - */ - -typedef struct ii_fd_s { - _ii_info_t *ii_info; /* Info structure */ - int ii_bmp; /* This fd is for the bmp device */ - int ii_shd; /* This fd is for the shadow device */ - int ii_ovr; /* This fd is for the overflow device */ - _ii_overflow_t *ii_optr; /* pointer to overflow structure */ - int ii_oflags; /* raw or cached open type */ -} ii_fd_t; - - -/* - * II buffer header - */ - -typedef struct ii_buf_s { - nsc_buf_t ii_bufh; /* exported buffer header */ - nsc_buf_t *ii_bufp; /* main underlying buffer */ - nsc_buf_t *ii_bufp2; /* second underlying buffer */ - nsc_buf_t *ii_abufp; /* anonymous underlying buffer */ - ii_fd_t *ii_fd; /* back link */ - int ii_rsrv; /* fd to release in free_buf */ -} ii_buf_t; -#endif /* _KERNEL || _KMEMUSER */ - - -/* - * Valid magic numbers in the bitmap volume header - */ - -#define DSW_DIRTY 0x44495254 -#define DSW_CLEAN 0x434C4541 -#define DSW_INVALID 0x00000000 - -/* - * II_HEADER_VERSION - * 1 = original II header version - * 2 = Compact Dependent Shadows (DSW_TREEMAP) - * 3 = Persistance of throttle parameters - * 4 = add cluster & group information - * 5 = add time string to hold last modify time - */ -#define II_HEADER_VERSION 5 - -/* - * DSW bitmap volume header structure - */ - -typedef struct ii_header_s { - int32_t ii_magic; /* magic number */ - int32_t ii_type; /* bitmap or independent copy */ - int32_t ii_state; /* State of the master/shadow/bitmap tuple */ - int32_t ii_version; /* version or format of bitmap volume */ - int32_t ii_shdfba; /* location of shadow bitmap */ - int32_t ii_copyfba; /* location of copy bitmap */ - char master_vol[DSW_NAMELEN]; - char shadow_vol[DSW_NAMELEN]; - char bitmap_vol[DSW_NAMELEN]; - /* II_HEADER_VERSION 2 */ - char overflow_vol[DSW_NAMELEN]; - int64_t ii_mstchks; /* # of chunks in master volume */ - int64_t ii_shdchks; /* # of chunks in shadow volume */ - int64_t ii_shdchkused; /* # of shd chunks allocated or on free list */ - int64_t ii_shdfchk; /* list of free shadow chunks */ - /* II_HEADER_VERSION 3 */ - int32_t ii_throttle_unit; /* Last setting of throttle unit */ - int32_t ii_throttle_delay; /* Last setting of throttle delay */ - /* II_HEADER_VERSION 4 */ - char clstr_name[DSW_NAMELEN]; - char group_name[DSW_NAMELEN]; - /* II_HEADER_VERSION 5 */ - time_t ii_mtime; -} ii_header_t; - -#define II_SUCCESS(rc) (((rc) == NSC_DONE) || ((rc) == NSC_HIT)) - -/* - * Overflow volume defines. - */ - -#define II_OMAGIC 0x476F6C64 /* "Gold" */ -#define II_ISOVERFLOW(n) ((n) < 0 && (n) != II_NULLCHUNK) -#define II_2OVERFLOW(n) (-(n)) - /* -tive node id's are in overflow volume */ - -#ifdef _SunOS_5_6 -#define II_NULLNODE (INT_MIN) -#define II_NULLCHUNK (INT_MIN) -#else -#ifdef II_MULTIMULTI_TERABYTE -#define II_NULLNODE (INT64_MIN) -#define II_NULLCHUNK (INT64_MIN) -#define II_NULL32NODE (INT32_MIN) -#define II_NULL32CHUNK (INT32_MIN) -#else -#define II_NULLNODE (INT32_MIN) -#define II_NULLCHUNK (INT32_MIN) -#endif /* II_MULTIMULTI_TERABYTE */ -#endif - -#ifdef __cplusplus -} -#endif - -#endif /* _DSW_DEV_H */ diff --git a/usr/src/uts/common/avs/ns/dsw/ii.conf b/usr/src/uts/common/avs/ns/dsw/ii.conf deleted file mode 100644 index f964881494..0000000000 --- a/usr/src/uts/common/avs/ns/dsw/ii.conf +++ /dev/null @@ -1,38 +0,0 @@ -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License (the "License"). -# You may not use this file except in compliance with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -# InstantImage Solaris configuration properties -# Copyright 2008 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# -# -name="ii" parent="pseudo" ; -# -# level of detail in console debugging messages - 0 means no messages. -# -ii_debug=0; -# -# bitmap volume storage strategy: -# 0 indicates kernel memory loaded from bitmap volume when shadow is resumed -# and saved to bitmap volume when shadow is suspended. -# 1 indicates permanent SDBC storage, bitmap volume is updated directly as -# bits are changed. -# 2 indicates that if FWC is present strategy 1 is used, otherwise strategy 0. -ii_bitmap=1; diff --git a/usr/src/uts/common/avs/ns/dsw/ii_tree.c b/usr/src/uts/common/avs/ns/dsw/ii_tree.c deleted file mode 100644 index 202cb2b8f4..0000000000 --- a/usr/src/uts/common/avs/ns/dsw/ii_tree.c +++ /dev/null @@ -1,599 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#include <sys/types.h> -#include <sys/ksynch.h> -#include <sys/kmem.h> -#include <sys/errno.h> -#include <sys/cmn_err.h> -#include <sys/debug.h> -#include <sys/cred.h> -#include <sys/file.h> -#include <sys/ddi.h> -#include <sys/nsctl/nsctl.h> -#include <sys/unistat/spcs_s.h> -#include <sys/unistat/spcs_errors.h> - -#include <sys/unistat/spcs_s_k.h> -#include "dsw.h" -#include "dsw_dev.h" - -#ifdef DS_DDICT -#include "../contract.h" -#endif - -#include <sys/sdt.h> /* dtrace is S10 or later */ - -/* - * Instant Image. - * - * This file contains the chunk map lookup functions of II. - * - */ -#define CHUNK_FBA(chunk) DSW_CHK2FBA(chunk) - -extern int ii_debug; /* debug level switch */ -int ii_map_debug = 0; - -#ifdef II_MULTIMULTI_TERABYTE -typedef int64_t nodeid_t; -typedef int32_t nodeid32_t; -#else -typedef int32_t nodeid_t; -#endif - -typedef struct ii_node { - chunkid_t vchunk_id; /* virtual chunk id */ -} NODE; - -typedef struct ii_nodelink_s { - chunkid_t next_chunk; -} ii_nodelink_t; - -static int nodes_per_fba = FBA_SIZE(1) / sizeof (NODE); - -ii_header_t *_ii_bm_header_get(_ii_info_t *ip, nsc_buf_t **tmp); -int _ii_bm_header_put(ii_header_t *hdr, _ii_info_t *ip, - nsc_buf_t *tmp); -void _ii_rlse_devs(_ii_info_t *, int); -int _ii_rsrv_devs(_ii_info_t *, int, int); -void _ii_error(_ii_info_t *, int); -/* - * Private functions for use in this file. - */ -static void free_node(_ii_info_t *ip, NODE *np, nodeid_t ni); -static chunkid_t ii_alloc_overflow(_ii_info_t *ip); -void ii_free_overflow(_ii_info_t *, chunkid_t); -extern int _ii_nsc_io(_ii_info_t *, int, nsc_fd_t *, int, nsc_off_t, - unsigned char *, nsc_size_t); - -static int -update_tree_header(_ii_info_t *ip) -{ - ii_header_t *header; - nsc_buf_t *tmp = NULL; - - mutex_enter(&ip->bi_mutex); - header = _ii_bm_header_get(ip, &tmp); - if (header == NULL) { - /* bitmap is probably offline */ - mutex_exit(&ip->bi_mutex); - DTRACE_PROBE(_iit_update_tree_header_end); - return (1); - } - header->ii_mstchks = ip->bi_mstchks; - header->ii_shdchks = ip->bi_shdchks; - header->ii_shdchkused = ip->bi_shdchkused; - header->ii_shdfchk = ip->bi_shdfchk; - (void) _ii_bm_header_put(header, ip, tmp); - mutex_exit(&ip->bi_mutex); - - return (0); -} - -static int -update_overflow_header(_ii_info_t *ip, _ii_overflow_t *op) -{ - (void) _ii_nsc_io(ip, KS_OVR, op->ii_dev->bi_fd, NSC_WRBUF, - II_OHEADER_FBA, (unsigned char *)&(op->ii_do), - sizeof (_ii_doverflow_t)); - - return (0); -} - -static int -node_io(_ii_info_t *ip, NODE *np, nodeid_t node, int flag) -{ - int rc; - int node_fba; - int tree_fba = ip->bi_copyfba + (ip->bi_copyfba-ip->bi_shdfba); - int offset; - nsc_buf_t *tmp = NULL; - - /* - * Don't use _ii_nsc_io() as _ii_nsc_io() requires io to start at - * an fba boundary. - */ - - /* calculate location of node on bitmap file */ - offset = (node % nodes_per_fba) * sizeof (NODE); - node_fba = tree_fba + node / nodes_per_fba; - - /* read disk block containing node */ - rc = nsc_alloc_buf(ip->bi_bmpfd, node_fba, 1, NSC_RDBUF|flag, &tmp); - if (!II_SUCCESS(rc)) { - _ii_error(ip, DSW_BMPOFFLINE); - if (tmp) - (void) nsc_free_buf(tmp); - - DTRACE_PROBE(_iit_node_io_end); - return (1); - } - - /* copy node and update bitmap file if needed */ - rc = 0; - if (flag == NSC_RDBUF) - bcopy(tmp->sb_vec->sv_addr+offset, np, sizeof (NODE)); - else { - bcopy(np, tmp->sb_vec->sv_addr+offset, sizeof (NODE)); - II_NSC_WRITE(ip, bitmap, rc, tmp, node_fba, 1, 0); - if (!II_SUCCESS(rc)) { - _ii_error(ip, DSW_BMPOFFLINE); - rc = EIO; - } - } - if (tmp) - (void) nsc_free_buf(tmp); - - return (0); -} - -static int -node_fba_fill(_ii_info_t *ip, nsc_size_t nchunks, chunkid_t vchunk_id) -{ - int rc; - nsc_off_t fba; - nsc_size_t fbas; - nsc_size_t maxfbas; - nsc_buf_t *bp; - nsc_vec_t *vp; - - /* Determine maximum number of FBAs to allocate */ - rc = nsc_maxfbas(ip->bi_bmpfd, 0, &maxfbas); - if (!II_SUCCESS(rc)) - maxfbas = DSW_CBLK_FBA; - - /* Write out blocks of initialied NODEs */ - fba = ip->bi_copyfba + (ip->bi_copyfba-ip->bi_shdfba); - fbas = FBA_LEN(nchunks * sizeof (NODE)); - while (fbas > 0) { - - /* Determine number of FBA to allocate this time */ - if (fbas < maxfbas) maxfbas = fbas; - - /* Allocate buffer which map to FBAs containing NODEs */ - bp = NULL; - rc = nsc_alloc_buf(ip->bi_bmpfd, fba, maxfbas, NSC_WRBUF, &bp); - if (!II_SUCCESS(rc)) { - _ii_error(ip, DSW_BMPOFFLINE); - DTRACE_PROBE(alloc_buf_failed); - return (EIO); - } - - /* traverse vector list, filling wth initialized NODEs */ - for (vp = bp->sb_vec; vp->sv_addr && vp->sv_len; vp++) { - NODE *pnode = (NODE *)vp->sv_addr; - NODE *enode = (NODE *)(vp->sv_addr + vp->sv_len); - while (pnode < enode) { - pnode->vchunk_id = vchunk_id; - pnode++; - } - } - - /* write FBAs containing initialized NODEs */ - II_NSC_WRITE(ip, bitmap, rc, bp, fba, maxfbas, 0); - if (!II_SUCCESS(rc)) { - _ii_error(ip, DSW_BMPOFFLINE); - (void) nsc_free_buf(bp); - DTRACE_PROBE(write_failed); - return (EIO); - } - - /* free the buffer */ - (void) nsc_free_buf(bp); - - /* Adjust nsc buffer values */ - fba += maxfbas; - fbas -= maxfbas; - } - - return (0); -} - -/* - * Reads the node into core and returns a pointer to it. - */ - -static NODE * -read_node(_ii_info_t *ip, nodeid_t node) -{ - NODE *new; - - new = (NODE *)kmem_alloc(sizeof (NODE), KM_SLEEP); - - if (node_io(ip, new, node, NSC_RDBUF)) { - kmem_free(new, sizeof (NODE)); - new = NULL; - } - - return (new); -} - - -static chunkid_t -alloc_chunk(_ii_info_t *ip) -{ - ii_nodelink_t nl; - int fba; - chunkid_t rc = II_NULLCHUNK; - - mutex_enter(&ip->bi_chksmutex); - if (ip->bi_shdchkused < ip->bi_shdchks) { - rc = ip->bi_shdchkused++; - } else if (ip->bi_shdfchk != II_NULLCHUNK) { - ASSERT(ip->bi_shdfchk >= 0 && ip->bi_shdfchk < ip->bi_shdchks); - rc = ip->bi_shdfchk; - fba = CHUNK_FBA(rc); - (void) _ii_rsrv_devs(ip, SHDR, II_INTERNAL); - (void) _ii_nsc_io(ip, KS_SHD, SHDFD(ip), NSC_RDBUF, fba, - (unsigned char *)&nl, sizeof (nl)); - _ii_rlse_devs(ip, SHDR); - ip->bi_shdfchk = nl.next_chunk; - ASSERT(ip->bi_shdfchk == II_NULLCHUNK || - (ip->bi_shdfchk >= 0 && ip->bi_shdfchk < ip->bi_shdchks)); - } else { - - /* into overflow */ - rc = ii_alloc_overflow(ip); - } - mutex_exit(&ip->bi_chksmutex); - (void) update_tree_header(ip); - - return (rc); -} - -/* - * releases memory for node - */ -static void /*ARGSUSED*/ -release_node(_ii_info_t *ip, NODE *np, nodeid_t ni) -{ - kmem_free(np, sizeof (NODE)); - -} - -static void -write_node(_ii_info_t *ip, NODE *np, nodeid_t ni) -{ - (void) node_io(ip, np, ni, NSC_WRBUF); - release_node(ip, np, ni); - -} - -static void -free_node(_ii_info_t *ip, NODE *np, nodeid_t ni) -{ - ii_nodelink_t nl; - int fba; - - if (np == NULL) { - DTRACE_PROBE(_iit_free_node_end); - return; - } - - mutex_enter(&ip->bi_chksmutex); - if (II_ISOVERFLOW(np->vchunk_id)) { - /* link chunk onto overflow free list */ - ii_free_overflow(ip, np->vchunk_id); - } else { - /* write old free list head into chunk */ - nl.next_chunk = ip->bi_shdfchk; - ip->bi_shdfchk = np->vchunk_id; - ASSERT(ip->bi_shdfchk == II_NULLCHUNK || - (ip->bi_shdfchk >= 0 && ip->bi_shdfchk < ip->bi_shdchks)); - fba = CHUNK_FBA(np->vchunk_id); - (void) _ii_rsrv_devs(ip, SHDR, II_INTERNAL); - (void) _ii_nsc_io(ip, KS_SHD, SHDFD(ip), NSC_WRBUF, fba, - (unsigned char *)&nl, sizeof (nl)); - _ii_rlse_devs(ip, SHDR); - /* update free counts */ - /* ip->bi_unused++; */ - } - np->vchunk_id = II_NULLCHUNK; - (void) node_io(ip, np, ni, NSC_WRBUF); - (void) update_tree_header(ip); - mutex_exit(&ip->bi_chksmutex); - -} - -/* - * Public functions for dsw_dev to use. - */ - -/* - * Overflow volume functions. - */ - -/* put overflow chunk on the overflow volume free list */ -void -ii_free_overflow(_ii_info_t *ip, chunkid_t chunk) -{ - ii_nodelink_t nl; - _ii_overflow_t *op; - int fba; - - if (!II_ISOVERFLOW(chunk)) { - DTRACE_PROBE(_iit_free_overflow_end_1); - return; - } - chunk = II_2OVERFLOW(chunk); - - op = ip->bi_overflow; - if (op == NULL) { -#ifdef DEBUG - cmn_err(CE_PANIC, "overflow used, but not attached ip %p", - (void *) ip); -#endif - DTRACE_PROBE(_iit_free_overflow_end_2); - return; - } - mutex_enter(&(op->ii_mutex)); - - DTRACE_PROBE(_iit_free_overflow); - - /* write old free list head into chunk */ - nl.next_chunk = op->ii_freehead; - fba = CHUNK_FBA(chunk); - (void) nsc_reserve(op->ii_dev->bi_fd, NSC_MULTI); - (void) _ii_nsc_io(ip, KS_OVR, op->ii_dev->bi_fd, NSC_WRBUF, fba, - (unsigned char *)&nl, sizeof (nl)); - /* update free counts */ - op->ii_unused++; - ASSERT(op->ii_used > 0); /* always use 1 for header */ - - /* write chunk id into header freelist start */ - op->ii_freehead = chunk; - - (void) update_overflow_header(ip, op); - nsc_release(op->ii_dev->bi_fd); - mutex_exit(&(op->ii_mutex)); - -} - -/* reclaim any overflow storage used by the volume */ -void -ii_reclaim_overflow(_ii_info_t *ip) -{ - NODE *node; - nodeid_t node_id; - _ii_overflow_t *op; - - if ((ip->bi_flags & (DSW_VOVERFLOW | DSW_FRECLAIM)) == 0) { - DTRACE_PROBE(_iit_reclaim_overflow_end); - return; - } - - /* - * Determine whether overflow should be reclaimed: - * 1/ If we're not doing a group volume update - * OR - * 2/ If the number of detaches != number of attached vols - */ - op = ip->bi_overflow; - if (op && (((op->ii_flags & IIO_VOL_UPDATE) == 0) || - (op->ii_detachcnt != op->ii_drefcnt))) { -#ifndef II_MULTIMULTI_TERABYTE - /* assert volume size fits into node_id */ - ASSERT(ip->bi_mstchks <= INT32_MAX); -#endif - for (node_id = 0; node_id < ip->bi_mstchks; node_id++) { - if ((node = read_node(ip, node_id)) == NULL) { - DTRACE_PROBE(_iit_reclaim_overflow_end); - return; - } - ii_free_overflow(ip, node->vchunk_id); - release_node(ip, node, node_id); - } - } else { - /* need to reset the overflow volume header */ - op->ii_freehead = II_NULLNODE; - op->ii_used = 1; /* we have used the header */ - op->ii_unused = op->ii_nchunks - op->ii_used; - (void) update_overflow_header(ip, op); - } - - DTRACE_PROBE(_iit_reclaim_overflow); - - if ((ip->bi_flags & DSW_VOVERFLOW) == DSW_VOVERFLOW) { - mutex_enter(&ip->bi_mutex); - II_FLAG_CLR(DSW_VOVERFLOW, ip); - mutex_exit(&ip->bi_mutex); - } - --iigkstat.spilled_over.value.ul; - -} - -static chunkid_t -ii_alloc_overflow(_ii_info_t *ip) -{ - chunkid_t chunk; - ii_nodelink_t nl; - _ii_overflow_t *op; - int fba; - - if ((op = ip->bi_overflow) == NULL) { - DTRACE_PROBE(_iit_alloc_overflow_end); - return (II_NULLCHUNK); /* no overflow volume attached */ - } - - mutex_enter(&(op->ii_mutex)); - - DTRACE_PROBE(_iit_alloc_overflow); - - if (op->ii_unused < 1) { - mutex_exit(&(op->ii_mutex)); - DTRACE_PROBE(_iit_alloc_overflow_end); - return (II_NULLCHUNK); - } - (void) nsc_reserve(op->ii_dev->bi_fd, NSC_MULTI); - if (op->ii_freehead != II_NULLCHUNK) { - /* pick first from free list */ - chunk = op->ii_freehead; - fba = CHUNK_FBA(chunk); - (void) _ii_nsc_io(ip, KS_OVR, op->ii_dev->bi_fd, NSC_RDBUF, fba, - (unsigned char *)&nl, sizeof (nl)); - op->ii_freehead = nl.next_chunk; - /* decrease unused count, fix bug 4419956 */ - op->ii_unused--; - } else { - /* otherwise pick first unused */ - if (op->ii_used > op->ii_nchunks) - chunk = II_NULLCHUNK; - else { - chunk = op->ii_used++; - op->ii_unused--; - } - } - if (chunk != II_NULLCHUNK) { - chunk = II_2OVERFLOW(chunk); - if ((ip->bi_flags&DSW_VOVERFLOW) == 0) { - mutex_enter(&ip->bi_mutex); - II_FLAG_SET(DSW_VOVERFLOW, ip); - mutex_exit(&ip->bi_mutex); - ++iigkstat.spilled_over.value.ul; - } - } - (void) update_overflow_header(ip, op); - nsc_release(op->ii_dev->bi_fd); - mutex_exit(&(op->ii_mutex)); - - return (chunk); -} -/* - * Find or insert key into search tree. - */ - -chunkid_t -ii_tsearch(_ii_info_t *ip, chunkid_t chunk_id) - /* Address of the root of the tree */ -{ - NODE *rootp = NULL; - chunkid_t n; /* New node id if key not found */ - - if ((rootp = read_node(ip, chunk_id)) == NULL) { - DTRACE_PROBE(_iit_tsearch_end); - return (II_NULLNODE); - } - n = rootp->vchunk_id; - if (n != II_NULLCHUNK) { /* chunk allocated, return location */ - release_node(ip, rootp, 0); - DTRACE_PROBE(_iit_tsearch_end); - return (n); - } - n = alloc_chunk(ip); - if (n != II_NULLCHUNK) { - rootp->vchunk_id = n; - write_node(ip, rootp, chunk_id); - } else - release_node(ip, rootp, 0); - - return (n); -} - -/* Delete node with key chunkid */ -void -ii_tdelete(_ii_info_t *ip, - chunkid_t chunkid) /* Key to be deleted */ -{ - NODE *np = NULL; - - if ((np = read_node(ip, chunkid)) == NULL) { - DTRACE_PROBE(_iit_tdelete_end); - return; - } - - ASSERT(np->vchunk_id != II_NULLCHUNK); - free_node(ip, np, chunkid); - np->vchunk_id = II_NULLCHUNK; - write_node(ip, np, chunkid); - -} - -/* - * initialise an empty map for ip - */ - -int -ii_tinit(_ii_info_t *ip) -{ - int rc = 0; - - /* overflow can't be attached before first call to this function */ - if (ip->bi_overflow) - ii_reclaim_overflow(ip); - - mutex_enter(&ip->bi_chksmutex); - ip->bi_shdfchk = II_NULLCHUNK; /* set freelist to empty chain */ - ip->bi_shdchkused = 0; - - /* fill index (bi_mstchks size) with II_NULLCHUNK */ - rc = node_fba_fill(ip, ip->bi_mstchks, II_NULLCHUNK); - if (rc == 0) - rc = update_tree_header(ip); - mutex_exit(&ip->bi_chksmutex); - - return (rc); -} - -/* - * Calculate the size of map space provided by a bitmap volume with - * tree_len fba's spare for the tree. - */ - -nsc_size_t -ii_btsize(nsc_size_t tree_len) -{ - nsc_size_t nchunks; - - nchunks = tree_len * nodes_per_fba; - - if (ii_debug > 1) - cmn_err(CE_NOTE, - "!ii_btsize: bitmap with %" NSC_SZFMT - " spare fba's will map %" NSC_SZFMT " chunks", - tree_len, nchunks); - - return (nchunks); -} diff --git a/usr/src/uts/common/avs/ns/model.h b/usr/src/uts/common/avs/ns/model.h deleted file mode 100644 index 9c6c1a14af..0000000000 --- a/usr/src/uts/common/avs/ns/model.h +++ /dev/null @@ -1,235 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _SYS_NSCTL_MODEL_H -#define _SYS_NSCTL_MODEL_H - -#ifdef __cplusplus -extern "C" { -#endif - -/* - * Stolen from Solaris 8 - * Only used for Solaris 2.6 - */ -#define _ILP32 -#undef _ASM - - -#ifdef _KERNEL -#include <sys/debug.h> -#endif - -#ifndef DS_DDICT -#include <sys/isa_defs.h> -#endif - -typedef uint32_t caddr32_t; - -#if defined(_KERNEL) || defined(_KMEMUSER) - -/* - * These bits are used in various places to specify the data model - * of the originator (and/or consumer) of data items. See <sys/conf.h> - * <sys/file.h>, <sys/stream.h> and <sys/sunddi.h>. - * - * This state should only be known to the kernel implementation. - */ -#define DATAMODEL_MASK 0x0FF00000 - -#define DATAMODEL_ILP32 0x00100000 -#define DATAMODEL_LP64 0x00200000 - -#define DATAMODEL_NONE 0 - -#if defined(_LP64) -#define DATAMODEL_NATIVE DATAMODEL_LP64 -#elif defined(_ILP32) -#define DATAMODEL_NATIVE DATAMODEL_ILP32 -#else -#error "No DATAMODEL_NATIVE specified" -#endif /* _LP64 || _ILP32 */ - -#endif /* _KERNEL || _KMEMUSER */ - -#ifndef _ASM -/* - * XXX Ick. This type needs to be visible outside the above guard because - * the proc structure is visible outside the _KERNEL | _KMEMUSER guard. - * If we can make proc internals less visible, (which we obviously should) - * then this can be invisible too. - */ -typedef unsigned int model_t; - -#endif /* _ASM */ - -#if defined(_KERNEL) && !defined(_ASM) -/* - * These macros allow two views of the same piece of memory depending - * on the originating user-mode program's data model. See the manual - * pages (or uts/README.XX64). - */ -#if defined(_LP64) - -#define STRUCT_HANDLE(struct_type, handle) \ - struct __##handle##_type { \ - union { \ - struct struct_type##32 *m32; \ - struct struct_type *m64; \ - } ptr; \ - model_t model; \ - } handle = { NULL, DATAMODEL_ILP32 } - -#define STRUCT_DECL(struct_type, handle) \ - struct struct_type __##handle##_buf; \ - STRUCT_HANDLE(struct_type, handle) - -#define STRUCT_SET_HANDLE(handle, umodel, addr) \ - (handle).model = (model_t)(umodel) & DATAMODEL_MASK; \ - ASSERT(((umodel) & DATAMODEL_MASK) != DATAMODEL_NONE); \ - ((handle).ptr.m64) = (addr) - -#define STRUCT_INIT(handle, umodel) \ - STRUCT_SET_HANDLE(handle, umodel, &__##handle##_buf) - -#define STRUCT_SIZE(handle) \ - ((handle).model == DATAMODEL_ILP32 ? \ - sizeof (*(handle).ptr.m32) : \ - sizeof (*(handle).ptr.m64)) - -/* - * In STRUCT_FADDR and STRUCT_FGETP a sleight of hand is employed to make - * the compiler cope with having two different pointer types within ?:. - * The (void *) case on the ILP32 case makes it a pointer which can be - * converted to the pointer on the LP64 case, thus quieting the compiler. - */ -#define STRUCT_FADDR(handle, field) \ - ((handle).model == DATAMODEL_ILP32 ? \ - (void *)&(handle).ptr.m32->field : \ - &(handle).ptr.m64->field) - -#define STRUCT_FGET(handle, field) \ - (((handle).model == DATAMODEL_ILP32) ? \ - (handle).ptr.m32->field : \ - (handle).ptr.m64->field) - -#define STRUCT_FGETP(handle, field) \ - ((handle).model == DATAMODEL_ILP32 ? \ - (void *)(handle).ptr.m32->field : \ - (handle).ptr.m64->field) - -#define STRUCT_FSET(handle, field, val) \ - ((handle).model == DATAMODEL_ILP32 ? \ - ((handle).ptr.m32->field = (val)) : \ - ((handle).ptr.m64->field = (val))) - -#define STRUCT_FSETP(handle, field, val) \ - ((handle).model == DATAMODEL_ILP32 ? \ - (void) ((handle).ptr.m32->field = (caddr32_t)(val)) : \ - (void) ((handle).ptr.m64->field = (val))) - -#define STRUCT_BUF(handle) ((handle).ptr.m64) - -#define SIZEOF_PTR(umodel) \ - (((umodel) & DATAMODEL_MASK) == DATAMODEL_ILP32 ? \ - sizeof (caddr32_t) : \ - sizeof (caddr_t)) - -#define SIZEOF_STRUCT(struct_type, umodel) \ - (((umodel) & DATAMODEL_MASK) == DATAMODEL_ILP32 ? \ - sizeof (struct struct_type##32) : \ - sizeof (struct struct_type)) - -#else /* _LP64 */ - -#define STRUCT_HANDLE(struct_type, handle) \ - struct __##handle##_32 { \ - struct struct_type *ptr; \ - }; \ - struct __##handle##_32 handle = { NULL } - -#define STRUCT_DECL(struct_type, handle) \ - struct struct_type __##handle##_buf; \ - STRUCT_HANDLE(struct_type, handle) - -#ifdef lint -#define STRUCT_SET_HANDLE(handle, umodel, addr) \ - (void) (umodel); \ - (handle).ptr = (addr) -#else -#define STRUCT_SET_HANDLE(handle, umodel, addr) \ - (handle).ptr = (addr) -#endif /* lint */ - -#define STRUCT_INIT(handle, umodel) \ - STRUCT_SET_HANDLE(handle, umodel, &__##handle##_buf) - -#define STRUCT_SIZE(handle) (sizeof (*(handle).ptr)) - -#define STRUCT_FADDR(handle, field) (&(handle).ptr->field) - -#define STRUCT_FGET(handle, field) ((handle).ptr->field) - -#define STRUCT_FGETP STRUCT_FGET - -#define STRUCT_FSET(handle, field, val) ((handle).ptr->field = (val)) - -#define STRUCT_FSETP STRUCT_FSET - -#define STRUCT_BUF(handle) ((handle).ptr) - -#define SIZEOF_PTR(umodel) sizeof (caddr_t) - -#define SIZEOF_STRUCT(struct_type, umodel) sizeof (struct struct_type) - -#endif /* _LP64 */ - -#if defined(_LP64) || defined(lint) || defined(__lint) - -struct _klwp; - -extern model_t lwp_getdatamodel(struct _klwp *); -extern model_t get_udatamodel(void); - -#else - -/* - * If we're the 32-bit kernel, the result of these function - * calls is completely predictable, so let's just cheat. A - * good compiler should be able to elide all the unreachable code - * that results. Optimism about optimization reigns supreme ;-) - */ -#define lwp_getdatamodel(t) DATAMODEL_ILP32 -#define get_udatamodel() DATAMODEL_ILP32 - -#endif /* _LP64 || lint || __lint */ - -#endif /* _KERNEL && !_ASM */ - -#ifdef __cplusplus -} -#endif - -#endif /* _SYS_NSCTL_MODEL_H */ diff --git a/usr/src/uts/common/avs/ns/ncall_inter.h b/usr/src/uts/common/avs/ns/ncall_inter.h deleted file mode 100644 index 5f4c0689e3..0000000000 --- a/usr/src/uts/common/avs/ns/ncall_inter.h +++ /dev/null @@ -1,91 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _SYS_NCALL_INTER_H -#define _SYS_NCALL_INTER_H - -#ifdef __cplusplus -extern "C" { -#endif - -#ifdef _KERNEL - -void ncall_register_svc(int, void (*)(void *, int *)) { } -void ncall_unregister_svc(int) { } -int ncall_register_module(void *, void *); -int ncall_unregister_module(void *); - -int ncall_nodeid(char *) { } -char *ncall_nodename(int) { } -int ncall_mirror(int) { } -int ncall_self(void) { } - -int ncall_alloc(int, int, int, void **) { } -int ncall_timedsend(void *, int, int, struct timeval *, ...) { } -int ncall_timedsendnotify(void *, int, int, struct timeval *, - void (*)(void *, void *), void *, ...) { } -int ncall_send(void *, int, int, ...) { } -int ncall_read_reply(void *, int, ...) { } -void ncall_reset(void *) { } -void ncall_free(void *) { } - -int ncall_put_data(void *, void *, int) { } -int ncall_get_data(void *, void *, int) { } - -int ncall_sender(void *) { } -void ncall_reply(void *, ...) { } -void ncall_pend(void *) { } -void ncall_done(void *) { } - -int ncall_maxnodes(void) { } -int ncall_nextnode(void **) { } -int ncall_errcode(void *, int *) { } - - -/* Health monitor typedefs, variables and functions */ -typedef void hmio_name_t; -typedef void hm_sarea_t; -typedef void hm_statev_t; -#ifndef _HM_TOK_T -#define _HM_TOK_T -typedef void *hm_tok_t; -#endif - -int bchm_load(void) { } -int bchm_unload(void) { } -int bchm_getnetname(hmio_name_t *) { } -int bchm_getstatename(hmio_name_t *) { } -int bchm_startnet(hmio_name_t *, int) { } -int bchm_initted; -hm_sarea_t *bchm_start_addr[1]; -hm_sarea_t hm_latest_state; - -#endif /* _KERNEL */ - -#ifdef __cplusplus -} -#endif - -#endif /* _SYS_NCALL_INTER_H */ diff --git a/usr/src/uts/common/avs/ns/nsctl.h b/usr/src/uts/common/avs/ns/nsctl.h deleted file mode 100644 index c91d691684..0000000000 --- a/usr/src/uts/common/avs/ns/nsctl.h +++ /dev/null @@ -1,526 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _SYS_NSCTL_H -#define _SYS_NSCTL_H - -#ifdef __cplusplus -extern "C" { -#endif - -#if (defined(lint) || defined(OSDEBUG)) && defined(_KERNEL) -#define __NSC_GEN__ -#include <sys/ksynch.h> -#include <sys/nsctl/nsc_dev.h> -#include <sys/nsctl/nsc_gen.h> -#include <sys/nsctl/nsc_mem.h> -#include <sys/nsctl/nsc_rmspin.h> -#endif - - -/* - * nsctl multi-terabyte volume support - * - * To build a multi-terabyte stack, '#define NSC_MULTI_TERABYTE'. - */ - -#ifdef NSC_MULTI_TERABYTE -typedef uint64_t nsc_off_t; /* positions, offsets */ -typedef uint64_t nsc_size_t; /* lengths, sizes */ -#ifdef _LP64 -#define NSC_SZFMT "lu" -#define NSC_XSZFMT "lx" -#else -#define NSC_SZFMT "llu" -#define NSC_XSZFMT "llx" -#endif - -#else /* max 1TB volume size */ -typedef int nsc_off_t; -typedef int nsc_size_t; -#define NSC_SZFMT "u" -#define NSC_XSZFMT "x" -#endif - - -#ifdef _KERNEL - -#ifdef sun -#include <sys/nsc_ddi.h> -#endif - -/* - * Generic parameter definition. - */ - -typedef struct nsc_def_s { - char *name; /* Parameter name */ - uintptr_t value; /* Parameter value */ - int offset; /* Structure offset */ -} nsc_def_t; - -extern int nsc_inval(), nsc_ioerr(); -extern int nsc_fatal(), nsc_null(), nsc_true(); -extern void nsc_decode_param(nsc_def_t *, nsc_def_t *, long *); -#endif /* _KERNEL */ - - -/* ID and Type flags */ - -#define NSC_ID 0x40000000 /* Module ID */ -#define NSC_NULL 0x00000100 /* No I/O possible */ -#define NSC_DEVICE 0x00000200 /* Device interface */ -#define NSC_FILE 0x00000400 /* File vnode interface */ -#define NSC_CACHE 0x00000800 /* Cache interface */ -#define NSC_ANON 0x00001000 /* Supports anonymous buffers */ -#define NSC_VCHR 0x00002000 /* VCHR vnode device */ -#define NSC_NCALL 0x00004000 /* ncall-io interface */ - -#define NSC_IDS 0x7ff00000 /* ID mask */ -#define NSC_TYPES 0x7fffff00 /* Type mask */ - -#define NSC_MKID(x) (NSC_ID | ((x) << 20)) - -#define NSC_RAW_ID NSC_MKID(39) /* Raw device */ -#define NSC_FILE_ID NSC_MKID(40) /* File vnode device */ -#define NSC_FREEZE_ID NSC_MKID(41) /* Frozen raw device */ -#define NSC_VCHR_ID NSC_MKID(42) /* VCHR vnode device */ -#define NSC_NCALL_ID NSC_MKID(43) /* ncall-io */ -#define NSC_SDBC_ID NSC_MKID(80) /* Block based cache */ -#define NSC_RDCLR_ID NSC_MKID(94) /* RDC (low, raw) */ -#define NSC_RDCL_ID NSC_MKID(95) /* RDC (low, cache) */ -#define NSC_IIR_ID NSC_MKID(96) /* Instant Image (raw) */ -#define NSC_II_ID NSC_MKID(98) /* Instant Image */ -#define NSC_RDCHR_ID NSC_MKID(99) /* RDC (high, raw) */ -#define NSC_RDCH_ID NSC_MKID(100) /* RDC (high, cache) */ - -typedef enum nsc_power_ops_e { - Power_Lost, /* Power Failing initial warning */ - /* with timeleft (rideout) minutes */ - - Power_OK, /* Power OK or restored before death */ - - Power_Down /* that's all folks machine will */ - /* be shutdown, save any state */ -} nsc_power_ops_t; - -#ifdef _KERNEL - -/* Module Flags */ - -#define NSC_REFCNT 0x00000001 /* Counts references */ -#define NSC_FILTER 0x00000002 /* Uses lower level driver */ - - -#ifndef _NSC_DEV_H -typedef struct nsc_io_s { int x; } nsc_io_t; -typedef struct nsc_path_s { int x; } nsc_path_t; -#endif - -extern nsc_io_t *nsc_register_io(char *, int, nsc_def_t *); -extern int nsc_unregister_io(nsc_io_t *, int); -extern nsc_path_t *nsc_register_path(char *, int, nsc_io_t *); -extern int nsc_unregister_path(nsc_path_t *, int); -extern int nsc_cache_sizes(int *, int *); -extern int nsc_node_hints(unsigned int *); -extern int nsc_node_hints_set(unsigned int); -extern blind_t nsc_register_power(char *, nsc_def_t *); -extern int nsc_unregister_power(blind_t); - -/* - * Strategy function interface - */ -#ifndef DS_DDICT -typedef int (*strategy_fn_t)(struct buf *); -#endif -extern strategy_fn_t nsc_get_strategy(major_t); - -extern void *nsc_get_devops(major_t); - -#endif /* _KERNEL */ - - -/* Block sizes */ - -#define FBA_SHFT 9 -#define FBA_MASK 0x1ff -#define FBA_SIZE(x) ((x) << FBA_SHFT) /* fba to bytes */ -#define FBA_OFF(x) ((x) & FBA_MASK) /* byte offset */ -#define FBA_LEN(x) FBA_NUM((x) + FBA_MASK) /* len to fba */ -#define FBA_NUM(x) ((nsc_size_t)((uint64_t)(x) >> FBA_SHFT)) - /* bytes to fba */ - - -/* Return values */ - -#define NSC_DONE (0) -#define NSC_PENDING (-1) -#define NSC_HIT (-2) - - -#if defined(_KERNEL) || defined(_KMEMUSER) - -/* - * External file descriptor. - */ - -#ifndef _NSC_DEV_H -typedef struct nsc_fd_s { int x; } nsc_fd_t; -#endif - -#endif /* _KERNEL || _KMEMUSER */ - - -#ifdef _KERNEL - -#define NSC_TRY (1<<24) /* Conditional operation */ -#define NSC_PCATCH (1<<25) /* Catch signals */ -#define NSC_DEFER (1<<26) /* Defer if busy */ -#define NSC_MULTI (1<<27) /* Multiple reserves */ -#define NSC_NOWAIT (1<<28) /* Don't wait if busy */ - -extern nsc_fd_t *nsc_open(char *, int, nsc_def_t *, blind_t, int *); -extern int nsc_close(nsc_fd_t *); -extern char *nsc_pathname(nsc_fd_t *); -extern int nsc_fdpathcmp(nsc_fd_t *, uint64_t, char *); -extern int nsc_shared(nsc_fd_t *); -extern int nsc_setval(nsc_fd_t *, char *, int); -extern int nsc_getval(nsc_fd_t *, char *, int *); -extern int nsc_set_trksize(nsc_fd_t *, nsc_size_t); -extern int nsc_discard_pinned(nsc_fd_t *, nsc_off_t, nsc_size_t); -extern kmutex_t *nsc_lock_addr(nsc_fd_t *); -extern int nsc_attach(nsc_fd_t *, int); -extern int nsc_reserve(nsc_fd_t *, int); -extern void nsc_reserve_lk(nsc_fd_t *); -extern void nsc_release(nsc_fd_t *); -extern int nsc_release_lk(nsc_fd_t *); -extern int nsc_detach(nsc_fd_t *, int); -extern int nsc_avail(nsc_fd_t *); -extern int nsc_held(nsc_fd_t *); -extern int nsc_waiting(nsc_fd_t *); -extern int nsc_partsize(nsc_fd_t *, nsc_size_t *); -extern int nsc_maxfbas(nsc_fd_t *, int, nsc_size_t *); -extern int nsc_get_pinned(nsc_fd_t *); -extern int nsc_max_devices(void); -extern int nsc_control(nsc_fd_t *, int, void *, int); - -#endif /* _KERNEL */ - - -#if defined(_KERNEL) || defined(_KMEMUSER) - -/* - * I/O device structure. - */ - -#ifndef _NSC_DEV_H -typedef struct nsc_iodev_s { int x; } nsc_iodev_t; -#endif - -#ifdef _KERNEL -extern void nsc_set_owner(nsc_fd_t *, nsc_iodev_t *); -extern void nsc_pinned_data(nsc_iodev_t *, nsc_off_t, nsc_size_t); -extern void nsc_unpinned_data(nsc_iodev_t *, nsc_off_t, nsc_size_t); -#endif - - -/* - * Data structures used by I/O interface. - */ - -typedef struct nsc_vec_s { /* Scatter gather element */ - unsigned char *sv_addr; /* Virtual address of data */ - unsigned long sv_vme; /* VME address of data */ - int sv_len; /* Data length in bytes */ -} nsc_vec_t; - - -typedef struct nsc_buf_s { /* Buffer structure */ - nsc_fd_t *sb_fd; /* File descriptor */ - nsc_off_t sb_pos; /* Block offset of data */ - nsc_size_t sb_len; /* Length of data in blocks */ - volatile int sb_flag; /* Buffer flags */ - int sb_error; /* Error code */ - uintptr_t sb_user; /* User definable */ - nsc_vec_t *sb_vec; /* Scatter gather list */ -} nsc_buf_t; - -#endif /* _KERNEL || _KMEMUSER */ - - -/* Allocate flags */ - -#define NSC_RDBUF 0x0001 -#define NSC_WRBUF 0x0002 -#define NSC_PINNABLE 0x0004 -#define NSC_NOBLOCK 0x0008 - -#define NSC_READ (NSC_RDBUF) -#define NSC_WRITE (NSC_WRBUF) -#define NSC_RDWR (NSC_RDBUF | NSC_WRBUF) -#define NSC_RDWRBUF (NSC_RDBUF | NSC_WRBUF) - - -/* Other flags */ - -#define NSC_CACHEBLK 0x0008 /* nsc_maxfbas: size of cache block in fbas */ -#define NSC_HALLOCATED 0x0010 /* handle allocated (IO provider internals) */ -#define NSC_HACTIVE 0x0020 /* handle active (IO provider internals) */ -#define NSC_BCOPY 0x0040 /* bcopy, don't DMA when moving data */ -#define NSC_PAGEIO 0x0080 /* client will use handle for pageio */ -#define NSC_ABUF 0x0100 /* anonymous buffer handle */ -#define NSC_MIXED 0x0200 /* data from 2 devs is mixed in this buffer */ -#define NSC_NODATA 0x0400 /* allocate without data buffer (sb_vec) */ - - -#define NSC_FLAGS 0xffff - -#ifdef _KERNEL - -#define NSC_ANON_CD ((blind_t)(-1)) /* used for IO provider alloc buf */ - -extern int nsc_alloc_buf(nsc_fd_t *, nsc_off_t, nsc_size_t, int, nsc_buf_t **); -extern int nsc_alloc_abuf(nsc_off_t, nsc_size_t, int, nsc_buf_t **); -extern int nsc_read(nsc_buf_t *, nsc_off_t, nsc_size_t, int); -extern int nsc_write(nsc_buf_t *, nsc_off_t, nsc_size_t, int); -extern int nsc_zero(nsc_buf_t *, nsc_off_t, nsc_size_t, int); -extern int nsc_copy(nsc_buf_t *, nsc_buf_t *, nsc_off_t, nsc_off_t, nsc_size_t); -extern int nsc_copy_direct(nsc_buf_t *, nsc_buf_t *, nsc_off_t, - nsc_off_t, nsc_size_t); -extern int nsc_uncommit(nsc_buf_t *, nsc_off_t, nsc_size_t, int); -extern int nsc_free_buf(nsc_buf_t *); -extern nsc_buf_t *nsc_alloc_handle(nsc_fd_t *, - void (*)(), void (*)(), void (*)()); -extern int nsc_free_handle(nsc_buf_t *); -extern int nsc_uread(nsc_fd_t *, void *, void *); -extern int nsc_uwrite(nsc_fd_t *, void *, void *); - -#endif /* _KERNEL */ - - -/* - * Performance hints. - */ - -#define NSC_WRTHRU 0x00010000 -#define NSC_FORCED_WRTHRU 0x00020000 -#define NSC_NOCACHE 0x00040000 -#define NSC_QUEUE 0x00080000 -#define NSC_RDAHEAD 0x00100000 -#define NSC_NO_FORCED_WRTHRU 0x00200000 -#define NSC_METADATA 0x00400000 -#define NSC_SEQ_IO 0x00800000 - -#define NSC_HINTS 0x00ff0000 - - -#ifdef _KERNEL -/* - * node hint actions - */ - -#define NSC_GET_NODE_HINT 0 -#define NSC_SET_NODE_HINT 1 -#define NSC_CLEAR_NODE_HINT 2 - -/* - * Reflective memory spinlocks. - */ - - -#ifndef _NSC_RMSPIN_H -typedef struct nsc_rmlock_s { int x; } nsc_rmlock_t; -#endif - - -extern nsc_rmlock_t *nsc_rm_lock_alloc(char *, int, void *); -extern void nsc_rm_lock_dealloc(nsc_rmlock_t *); -extern int nsc_rm_lock(nsc_rmlock_t *); -extern void nsc_rm_unlock(nsc_rmlock_t *); - -#endif /* _KERNEL */ - - -/* - * Memory allocation routines. - */ - -#define NSC_MEM_LOCAL 0x1 -#define NSC_MEM_GLOBAL 0x4 - -#define NSC_MEM_RESIZE 0x100 -#define NSC_MEM_NVDIRTY 0x400 - - -#ifdef _KERNEL - -#ifndef _NSC_MEM_H -typedef struct nsc_mem_s { int x; } nsc_mem_t; -#endif - - -extern nsc_mem_t *nsc_register_mem(char *, int, int); -extern void nsc_unregister_mem(nsc_mem_t *); -extern void *nsc_kmem_alloc(size_t, int, nsc_mem_t *); -extern void *nsc_kmem_zalloc(size_t, int, nsc_mem_t *); -extern void nsc_kmem_free(void *, size_t); -extern void nsc_mem_sizes(nsc_mem_t *, size_t *, size_t *, size_t *); -extern size_t nsc_mem_avail(nsc_mem_t *); - -/* nvmem suppport */ -typedef void (*nsc_mem_err_cb) (void *, void *, size_t, int); -extern int nsc_commit_mem(void *, void *, size_t, nsc_mem_err_cb); - -extern void nsc_cm_errhdlr(void *, void *, size_t, int); - -#endif /* _KERNEL */ - - -/* - * Max pathname - * Note: Currently defined both here and in nsc_dev.h - */ -#if !defined(NSC_MAXPATH) -#define NSC_MAXPATH 64 -#endif - -#ifdef _KERNEL - -/* - * Inter-module function (callback) services - */ - -#ifndef _NSC_GEN_H -typedef struct nsc_svc_s { int x; } nsc_svc_t; -#endif - -extern nsc_svc_t *nsc_register_svc(char *, void (*)(intptr_t)); -extern int nsc_unregister_svc(nsc_svc_t *); -extern int nsc_call_svc(nsc_svc_t *, intptr_t); - - -/* - * String manipulation functions. - */ - -#ifndef sun -#define sprintf nsc_sprintf -#endif /* sun */ - -extern char *nsc_strdup(char *); -extern void nsc_strfree(char *); -extern int nsc_strmatch(char *, char *); -extern void nsc_sprintf(char *, char *, ...); -extern uint64_t nsc_strhash(char *); - - -/* - * Macro definitions. - */ - -#define NSC_HIER 1 - -#ifndef NULL -#define NULL 0 -#endif - - -/* - * External definitions. - */ - -#undef HZ -extern clock_t HZ; -extern int nsc_max_nodeid, nsc_min_nodeid; - -extern int nsc_node_id(void); -extern char *nsc_node_name(void); -extern int nsc_node_up(int); -extern time_t nsc_time(void); -extern clock_t nsc_lbolt(void); -extern int nsc_delay_sig(clock_t); -extern clock_t nsc_usec(void); -extern void nsc_yield(void); - -extern void nsc_membar_stld(void); -extern uint8_t nsc_ldstub(uint8_t *); -extern caddr_t nsc_caller(void); -extern caddr_t nsc_callee(void); - -extern int nsc_create_process(void (*)(void *), void *, boolean_t); - -extern int nsc_power_init(void); -extern void nsc_power_deinit(void); -extern int nsc_nodeid_data(void); - -#define NSC_ALERT_INFO 0 /* Information alert */ -#define NSC_ALERT_WARNING 1 /* Warning alert */ -#define NSC_ALERT_ERROR 2 /* Error alert */ -#define NSC_ALERT_DOWN 3 /* System or Module down */ - -extern void nsc_do_sysevent(char *, char *, int, int, char *, dev_info_t *); - - -/* - * Missing DDI/DKI definition. - */ - -#if defined(_SYS_CONF_H) -#ifndef D_MP -#define D_MP 0 -#endif -#endif - -extern void *nsc_threadp(void); - -#endif /* _KERNEL */ - - -/* - * Common defines - */ - -#ifndef TRUE -#define TRUE 1 -#endif - -#ifndef FALSE -#define FALSE 0 -#endif - -#ifndef NBBY -#define NBBY 8 /* number of bits per byte */ -#endif - -/* - * kstat definition - */ -#define KSTAT_DATA_CHAR_LEN (sizeof (((kstat_named_t *)0)->value.c)) - -#ifdef __cplusplus -} -#endif - -#endif /* _SYS_NSCTL_H */ diff --git a/usr/src/uts/common/avs/ns/nsctl/Makefile b/usr/src/uts/common/avs/ns/nsctl/Makefile deleted file mode 100644 index 2ea51f19da..0000000000 --- a/usr/src/uts/common/avs/ns/nsctl/Makefile +++ /dev/null @@ -1,55 +0,0 @@ -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License (the "License"). -# You may not use this file except in compliance with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -# Copyright 2008 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# -# include global definitions -include ../../../../../Makefile.master - -HDRS= nsc_dev.h \ - nsc_gen.h \ - nsc_ioctl.h \ - nsc_mem.h \ - nsc_rmspin.h \ - nsc_disk.h \ - nsc_power.h \ - nsvers.h - -ROOTDIRS= $(ROOT)/usr/include/sys/nsctl - -ROOTHDRS= $(HDRS:%=$(ROOTDIRS)/%) - -CHECKHDRS= $(HDRS:%.h=%.check) - -# install rule -$(ROOTDIRS)/%: % - $(INS.file) - -.KEEP_STATE: - -.PARALLEL: $(CHECKHDRS) - -install_h: $(ROOTDIRS) $(ROOTHDRS) - -$(ROOTDIRS): - $(INS.dir) - -check: $(CHECKHDRS) diff --git a/usr/src/uts/common/avs/ns/nsctl/nsc_cache.c b/usr/src/uts/common/avs/ns/nsctl/nsc_cache.c deleted file mode 100644 index bdb7009644..0000000000 --- a/usr/src/uts/common/avs/ns/nsctl/nsc_cache.c +++ /dev/null @@ -1,499 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#include <sys/types.h> -#include <sys/ksynch.h> -#include <sys/errno.h> -#include <sys/uio.h> -#include <sys/ddi.h> - -#define __NSC_GEN__ -#include "nsc_dev.h" - -#ifdef DS_DDICT -#include "../contract.h" -#endif - -#include "../nsctl.h" - - -#define _I(x) (((long)(&((nsc_io_t *)0)->x))/sizeof (long)) - - -nsc_def_t _nsc_cache_def[] = { - "AllocBuf", (uintptr_t)nsc_ioerr, _I(alloc_buf), - "FreeBuf", (uintptr_t)nsc_fatal, _I(free_buf), - "Read", (uintptr_t)nsc_fatal, _I(read), - "Write", (uintptr_t)nsc_fatal, _I(write), - "Zero", (uintptr_t)nsc_fatal, _I(zero), - "Copy", (uintptr_t)nsc_ioerr, _I(copy), - "CopyDirect", (uintptr_t)nsc_ioerr, _I(copy_direct), - "Uncommit", (uintptr_t)nsc_null, _I(uncommit), - "AllocHandle", (uintptr_t)nsc_null, _I(alloc_h), - "FreeHandle", (uintptr_t)nsc_fatal, _I(free_h), - "TrackSize", (uintptr_t)nsc_null, _I(trksize), - "Discard", (uintptr_t)nsc_null, _I(discard), - "Sizes", (uintptr_t)nsc_null, _I(sizes), - "GetPinned", (uintptr_t)nsc_null, _I(getpin), - "NodeHints", (uintptr_t)nsc_inval, _I(nodehints), - 0, 0, 0 -}; - - -static int _nsc_alloc_buf_h(blind_t, nsc_off_t, nsc_size_t, int, - nsc_buf_t **, nsc_fd_t *); -static int _nsc_copy_h(nsc_buf_t *, nsc_buf_t *, nsc_off_t, - nsc_off_t, nsc_size_t); - -extern nsc_io_t *_nsc_reserve_io(char *, int); -extern void _nsc_release_io(nsc_io_t *); - -extern kmutex_t _nsc_io_lock; - - - - -/* ARGSUSED */ - -void -_nsc_add_cache(nsc_io_t *io) -{ -} - - -nsc_buf_t * -nsc_alloc_handle(nsc_fd_t *fd, void (*d_cb)(), void (*r_cb)(), void (*w_cb)()) -{ - nsc_buf_t *h = (*fd->sf_aio->alloc_h)(d_cb, r_cb, w_cb, fd->sf_cd); - - if (h) - h->sb_fd = fd; - - return (h); -} - - -int -nsc_free_handle(nsc_buf_t *h) -{ - if (h == NULL || (h->sb_flag & NSC_ABUF)) - return (EINVAL); - - return ((*h->sb_fd->sf_aio->free_h)(h, h->sb_fd->sf_cd)); -} - - -int -nsc_alloc_abuf(nsc_off_t pos, nsc_size_t len, int flag, nsc_buf_t **ptr) -{ - nsc_buf_t *h; - nsc_io_t *io; - int rc; - - if (*ptr != NULL) - return (EINVAL); - - if (flag & NSC_NODATA) - return (EINVAL); - - io = _nsc_reserve_io(NULL, NSC_ANON); - if (io == NULL) - return (ENOBUFS); - - if ((h = (*io->alloc_h)(NULL, NULL, NULL, NSC_ANON_CD)) == NULL) { - _nsc_release_io(io); - return (ENOBUFS); - } - - rc = (*io->alloc_buf)(NSC_ANON_CD, pos, len, - NSC_NOCACHE|flag, &h, NULL); - if (rc <= 0) { - h->sb_flag &= ~NSC_HALLOCATED; - h->sb_flag |= NSC_ABUF; - h->sb_fd = (nsc_fd_t *)io; /* note overloaded field */ - - *ptr = h; - - mutex_enter(&_nsc_io_lock); - io->abufcnt++; - mutex_exit(&_nsc_io_lock); - } - - _nsc_release_io(io); - return (rc); -} - - -int -nsc_alloc_buf(nsc_fd_t *fd, nsc_off_t pos, nsc_size_t len, - int flag, nsc_buf_t **ptr) -{ - int (*fn)() = _nsc_alloc_buf_h; - - if ((fd->sf_avail & NSC_WRITE) == 0) - if (flag & NSC_WRBUF) - return (EACCES); - - if ((flag & (NSC_READ|NSC_WRITE|NSC_NODATA)) == - (NSC_READ|NSC_NODATA)) { - /* - * NSC_NODATA access checks. - * - * - NSC_READ|NSC_NODATA is illegal since there would - * be no data buffer to immediately read the data into. - * - NSC_WRITE|NSC_NODATA is valid since the client can - * provide the buffer and then call nsc_write() as - * necessary. - * - NSC_NODATA is valid since the client can provide the - * buffer and then call nsc_read() or nsc_write() as - * necessary. - */ - return (EACCES); - } - - if (*ptr) { - fn = fd->sf_aio->alloc_buf; - (*ptr)->sb_fd = fd; - } - - return (*fn)(fd->sf_cd, pos, len, flag, ptr, fd); -} - - -/* ARGSUSED */ - -static int -_nsc_alloc_buf_h(blind_t cd, nsc_off_t pos, nsc_size_t len, - int flag, nsc_buf_t **ptr, nsc_fd_t *fd) -{ - nsc_buf_t *h; - int rc; - - if (!(h = nsc_alloc_handle(fd, NULL, NULL, NULL))) - return (ENOBUFS); - - if ((rc = nsc_alloc_buf(fd, pos, len, flag, &h)) <= 0) { - h->sb_flag &= ~NSC_HALLOCATED; - *ptr = h; - return (rc); - } - - (void) nsc_free_handle(h); - return (rc); -} - - -int -nsc_read(nsc_buf_t *h, nsc_off_t pos, nsc_size_t len, int flag) -{ - if ((h->sb_flag & NSC_ABUF) || - ((h->sb_flag & NSC_NODATA) && h->sb_vec == NULL)) - return (EIO); - - return ((*h->sb_fd->sf_aio->read)(h, pos, len, flag)); -} - - -int -nsc_write(nsc_buf_t *h, nsc_off_t pos, nsc_size_t len, int flag) -{ - if ((h->sb_flag & NSC_ABUF) || - ((h->sb_flag & NSC_NODATA) && h->sb_vec == NULL)) - return (EIO); - - return ((*h->sb_fd->sf_aio->write)(h, pos, len, flag)); -} - - -int -nsc_zero(nsc_buf_t *h, nsc_off_t pos, nsc_size_t len, int flag) -{ - if ((h->sb_flag & NSC_ABUF) || - ((h->sb_flag & NSC_NODATA) && h->sb_vec == NULL)) - return (EIO); - - return ((*h->sb_fd->sf_aio->zero)(h, pos, len, flag)); -} - - -int -nsc_copy(nsc_buf_t *h1, nsc_buf_t *h2, nsc_off_t pos1, - nsc_off_t pos2, nsc_size_t len) -{ - nsc_io_t *io1, *io2; - int rc = EIO; - - if (((h1->sb_flag & NSC_NODATA) && h1->sb_vec == NULL) || - ((h2->sb_flag & NSC_NODATA) && h2->sb_vec == NULL)) - return (EIO); - - if (h1->sb_fd && h2->sb_fd) { - io1 = (h1->sb_flag & NSC_ABUF) ? - (nsc_io_t *)h1->sb_fd : h1->sb_fd->sf_aio; - - io2 = (h2->sb_flag & NSC_ABUF) ? - (nsc_io_t *)h2->sb_fd : h2->sb_fd->sf_aio; - - if (io1 == io2) - rc = (*io1->copy)(h1, h2, pos1, pos2, len); - } - - if (rc != EIO) - return (rc); - - return (_nsc_copy_h(h1, h2, pos1, pos2, len)); -} - - -static int -_nsc_copy_h(nsc_buf_t *h1, nsc_buf_t *h2, nsc_off_t pos1, - nsc_off_t pos2, nsc_size_t len) -{ - nsc_vec_t *v1, *v2; - uchar_t *a1, *a2; - int sz, l1, l2, lenbytes; /* byte sizes within an nsc_vec_t */ - - if (pos1 < h1->sb_pos || pos1 + len > h1->sb_pos + h1->sb_len || - pos2 < h2->sb_pos || pos2 + len > h2->sb_pos + h2->sb_len) - return (EINVAL); - - if (!len) - return (0); - - /* find starting point in "from" vector */ - - v1 = h1->sb_vec; - pos1 -= h1->sb_pos; - - for (; pos1 >= FBA_NUM(v1->sv_len); v1++) - pos1 -= FBA_NUM(v1->sv_len); - - a1 = v1->sv_addr + FBA_SIZE(pos1); - l1 = v1->sv_len - FBA_SIZE(pos1); - - /* find starting point in "to" vector */ - - v2 = h2->sb_vec; - pos2 -= h2->sb_pos; - - for (; pos2 >= FBA_NUM(v2->sv_len); v2++) - pos2 -= FBA_NUM(v2->sv_len); - - a2 = v2->sv_addr + FBA_SIZE(pos2); - l2 = v2->sv_len - FBA_SIZE(pos2); - - /* copy required data */ - - ASSERT(FBA_SIZE(len) < INT_MAX); - lenbytes = (int)FBA_SIZE(len); - - while (lenbytes) { - sz = min(l1, l2); - sz = min(sz, lenbytes); - - bcopy(a1, a2, sz); - - l1 -= sz; l2 -= sz; - a1 += sz; a2 += sz; - lenbytes -= sz; - - if (!l1) - a1 = (++v1)->sv_addr, l1 = v1->sv_len; - if (!l2) - a2 = (++v2)->sv_addr, l2 = v2->sv_len; - } - - return (0); -} - - -int -nsc_copy_direct(nsc_buf_t *h1, nsc_buf_t *h2, nsc_off_t pos1, - nsc_off_t pos2, nsc_size_t len) -{ - int rc = EIO; - - if (!h1 || !h2) - return (EINVAL); - - if (((h1->sb_flag & NSC_NODATA) && h1->sb_vec == NULL) || - ((h2->sb_flag & NSC_NODATA) && h2->sb_vec == NULL)) - return (EIO); - - if ((h2->sb_flag & NSC_RDWR) != NSC_WRITE) { - cmn_err(CE_WARN, - "nsc_copy_direct: h2 (%p) flags (%x) include NSC_READ", - (void *)h2, h2->sb_flag); - } - - if ((h2->sb_flag & NSC_WRTHRU) == 0) { - cmn_err(CE_WARN, - "nsc_copy_direct: h2 (%p) flags (%x) do not " - "include NSC_WRTHRU", (void *)h2, h2->sb_flag); - h2->sb_flag |= NSC_WRTHRU; - } - - if (h1->sb_fd && h2->sb_fd && h1->sb_fd->sf_aio == h2->sb_fd->sf_aio) - rc = (*h1->sb_fd->sf_aio->copy_direct)(h1, h2, pos1, pos2, len); - - if (rc != EIO) - return (rc); - - /* - * The slow way ... - */ - - rc = nsc_copy(h1, h2, pos1, pos2, len); - if (rc <= 0) - rc = nsc_write(h2, pos2, len, NSC_WRTHRU); - - return (rc); -} - - -int -nsc_uncommit(nsc_buf_t *h, nsc_off_t pos, nsc_size_t len, int flag) -{ - if (h->sb_flag & NSC_ABUF) - return (EIO); - - return ((*h->sb_fd->sf_aio->uncommit)(h, pos, len, flag)); -} - - -int -nsc_free_buf(nsc_buf_t *h) -{ - nsc_io_t *io; - int abuf; - int rc; - - if (h == NULL) - return (0); - - if ((h->sb_flag & NSC_NODATA) && (h->sb_vec != NULL)) { - h->sb_vec = NULL; - } - - abuf = (h->sb_flag & NSC_ABUF); - - if (abuf) - io = (nsc_io_t *)h->sb_fd; - else - io = h->sb_fd->sf_aio; - - rc = (*io->free_buf)(h); - - if (abuf && rc <= 0) { - mutex_enter(&_nsc_io_lock); - io->abufcnt--; - mutex_exit(&_nsc_io_lock); - } - - return (rc); -} - - -int -nsc_node_hints(uint_t *hints) -{ - return (_nsc_call_io(_I(nodehints), (blind_t)hints, - (blind_t)NSC_GET_NODE_HINT, 0)); -} - -int -nsc_node_hints_set(uint_t hints) -{ - return (_nsc_call_io(_I(nodehints), (blind_t)(unsigned long)hints, - (blind_t)NSC_SET_NODE_HINT, 0)); -} - - -int -nsc_cache_sizes(int *asize, int *wsize) -{ - return (_nsc_call_io(_I(sizes), (blind_t)asize, (blind_t)wsize, 0)); -} - - -int -nsc_set_trksize(nsc_fd_t *fd, nsc_size_t trsize) -{ - return (*fd->sf_aio->trksize)(fd->sf_cd, trsize); -} - - -int -nsc_get_pinned(nsc_fd_t *fd) -{ - return (*fd->sf_aio->getpin)(fd->sf_cd); -} - - -int -nsc_discard_pinned(nsc_fd_t *fd, nsc_off_t pos, nsc_size_t len) -{ - return (*fd->sf_aio->discard)(fd->sf_cd, pos, len); -} - - -void -nsc_pinned_data(nsc_iodev_t *iodev, nsc_off_t pos, nsc_size_t len) -{ - nsc_fd_t *fd; - - if (!iodev) - return; - - mutex_enter(&iodev->si_dev->nsc_lock); - iodev->si_busy++; - mutex_exit(&iodev->si_dev->nsc_lock); - - for (fd = iodev->si_open; fd; fd = fd->sf_next) - if (fd->sf_avail & _NSC_ATTACH) - (*fd->sf_pinned)(fd->sf_arg, pos, len); - - _nsc_wake_dev(iodev->si_dev, &iodev->si_busy); -} - - -void -nsc_unpinned_data(nsc_iodev_t *iodev, nsc_off_t pos, nsc_size_t len) -{ - nsc_fd_t *fd; - - if (!iodev) - return; - - mutex_enter(&iodev->si_dev->nsc_lock); - iodev->si_busy++; - mutex_exit(&iodev->si_dev->nsc_lock); - - for (fd = iodev->si_open; fd; fd = fd->sf_next) - if (fd->sf_avail & _NSC_ATTACH) - (*fd->sf_unpinned)(fd->sf_arg, pos, len); - - _nsc_wake_dev(iodev->si_dev, &iodev->si_busy); -} diff --git a/usr/src/uts/common/avs/ns/nsctl/nsc_dev.c b/usr/src/uts/common/avs/ns/nsctl/nsc_dev.c deleted file mode 100644 index ac618e0615..0000000000 --- a/usr/src/uts/common/avs/ns/nsctl/nsc_dev.c +++ /dev/null @@ -1,2215 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#include <sys/types.h> -#include <sys/debug.h> -#include <sys/ksynch.h> -#include <sys/kmem.h> -#include <sys/cmn_err.h> -#include <sys/errno.h> -#include <sys/ddi.h> - -#include <sys/ncall/ncall.h> - -#define __NSC_GEN__ -#include "nsc_dev.h" - -#ifdef DS_DDICT -#include "../contract.h" -#endif - -#include "../nsctl.h" - -#define NSC_DEVMIN "DevMin" -#define NSC_DEVMAJ "DevMaj" - -#define _I(x) (((long)(&((nsc_io_t *)0)->x))/sizeof (long)) -#define _F(x) (((long)(&((nsc_fd_t *)0)->x))/sizeof (long)) - - -nsc_def_t _nsc_io_def[] = { - "Open", (uintptr_t)nsc_null, _I(open), - "Close", (uintptr_t)nsc_null, _I(close), - "Attach", (uintptr_t)nsc_null, _I(attach), - "Detach", (uintptr_t)nsc_null, _I(detach), - "Flush", (uintptr_t)nsc_null, _I(flush), - "Provide", 0, _I(provide), - 0, 0, 0 -}; - -nsc_def_t _nsc_fd_def[] = { - "Pinned", (uintptr_t)nsc_null, _F(sf_pinned), - "Unpinned", (uintptr_t)nsc_null, _F(sf_unpinned), - "Attach", (uintptr_t)nsc_null, _F(sf_attach), - "Detach", (uintptr_t)nsc_null, _F(sf_detach), - "Flush", (uintptr_t)nsc_null, _F(sf_flush), - 0, 0, 0 -}; - -kmutex_t _nsc_io_lock; -kmutex_t _nsc_devval_lock; - -nsc_io_t *_nsc_io_top = NULL; -nsc_io_t *_nsc_null_io = NULL; -nsc_dev_t *_nsc_dev_top = NULL; -nsc_dev_t *_nsc_dev_pend = NULL; -nsc_path_t *_nsc_path_top = NULL; -nsc_devval_t *_nsc_devval_top = NULL; - -extern nsc_def_t _nsc_disk_def[]; -extern nsc_def_t _nsc_cache_def[]; - -extern nsc_mem_t *_nsc_local_mem; -extern nsc_rmmap_t *_nsc_global_map; - -static clock_t _nsc_io_lbolt; - -static nsc_io_t *_nsc_find_io(char *, int, int *); -nsc_io_t *_nsc_reserve_io(char *, int); -static nsc_io_t *_nsc_alloc_io(int, char *, int); - -static int _nsc_open_fn(nsc_fd_t *, int); -static int _nsc_close_fn(nsc_fd_t *); -static int _nsc_alloc_fd(char *, int, int, nsc_fd_t **); -static int _nsc_alloc_iodev(nsc_dev_t *, int, nsc_iodev_t **); -static int _nsc_alloc_dev(char *, nsc_dev_t **); -static int _nsc_reopen_io(char *, int); -static int _nsc_reopen_dev(nsc_dev_t *, int); -static int _nsc_relock_dev(nsc_dev_t *, nsc_fd_t *, nsc_iodev_t *); -static int _nsc_reopen_fd(nsc_fd_t *, int); -static int _nsc_decode_io(nsc_def_t *, nsc_io_t *); - -void _nsc_release_io(nsc_io_t *); -static void _nsc_free_fd(nsc_fd_t *); -static void _nsc_free_iodev(nsc_iodev_t *); -static void _nsc_free_dev(nsc_dev_t *); -static void _nsc_free_io(nsc_io_t *); -static void _nsc_relink_fd(nsc_fd_t *, nsc_fd_t **, nsc_fd_t **, nsc_iodev_t *); - -static int _nsc_setval(nsc_dev_t *, char *, char *, int, int); -static void r_nsc_setval(ncall_t *, int *); -static void r_nsc_setval_all(ncall_t *, int *); - -extern void _nsc_add_disk(nsc_io_t *); -extern void _nsc_add_cache(nsc_io_t *); - - -/* - * void - * _nsc_init_dev (void) - * Initialise device subsystem. - * - * Calling/Exit State: - * Called at driver initialisation time to allocate necessary - * data structures. - */ -void -_nsc_init_dev() -{ - mutex_init(&_nsc_io_lock, NULL, MUTEX_DRIVER, NULL); - mutex_init(&_nsc_devval_lock, NULL, MUTEX_DRIVER, NULL); - - _nsc_null_io = nsc_register_io("null", NSC_NULL, (nsc_def_t *)0); - - if (!_nsc_null_io) - cmn_err(CE_PANIC, "nsctl: nsc_init_dev"); - - ncall_register_svc(NSC_SETVAL_ALL, r_nsc_setval_all); - ncall_register_svc(NSC_SETVAL, r_nsc_setval); -} - - -void -_nsc_deinit_dev() -{ - nsc_devval_t *dv; - nsc_val_t *vp; - - mutex_enter(&_nsc_devval_lock); - - while ((dv = _nsc_devval_top) != NULL) { - while ((vp = dv->dv_values) != NULL) { - dv->dv_values = vp->sv_next; - nsc_kmem_free(vp, sizeof (*vp)); - } - - _nsc_devval_top = dv->dv_next; - nsc_kmem_free(dv, sizeof (*dv)); - } - - mutex_exit(&_nsc_devval_lock); - - ncall_unregister_svc(NSC_SETVAL_ALL); - ncall_unregister_svc(NSC_SETVAL); - - mutex_destroy(&_nsc_devval_lock); - mutex_destroy(&_nsc_io_lock); -} - - -/* - * nsc_io_t * - * nsc_register_io (char *name, int type, nsc_def_t *def) - * Register an I/O module. - * - * Calling/Exit State: - * Returns a token for use in future calls to nsc_unregister_io. - * The ID and flags for the module are specified by 'type' and - * the appropriate entry points are defined using 'def'. If - * registration fails NULL is returned. - * - * Description: - * Registers an I/O module for use by subsequent calls to - * nsc_open. - */ -nsc_io_t * -nsc_register_io(name, type, def) -char *name; -int type; -nsc_def_t *def; -{ - nsc_io_t *io, *tp; - int rc, id, flag; - nsc_io_t **iop; - - id = (type & NSC_TYPES); - flag = (type & ~NSC_TYPES); - - if ((!(id & NSC_ID) || (id & ~NSC_IDS)) && - (id != NSC_NULL || _nsc_null_io)) - return (NULL); - - if (!(io = _nsc_alloc_io(id, name, flag))) - return (NULL); - - rc = _nsc_decode_io(def, io); - - if (!rc && id != NSC_NULL) { - _nsc_free_io(io); - return (NULL); - } - - mutex_enter(&_nsc_io_lock); - - for (tp = _nsc_io_top; tp; tp = tp->next) { - if (strcmp(tp->name, name) == 0 || tp->id == id) { - mutex_exit(&_nsc_io_lock); - _nsc_free_io(io); - return (NULL); - } - } - - for (iop = &_nsc_io_top; *iop; iop = &(*iop)->next) - if (id >= (*iop)->id) - break; - - io->next = (*iop); - (*iop) = io; - - _nsc_io_lbolt = nsc_lbolt(); - - while ((rc = _nsc_reopen_io(NULL, 0)) != 0) - if (rc != ERESTART) - break; - - mutex_exit(&_nsc_io_lock); - return (io); -} - - -/* - * static int - * _nsc_decode_io (nsc_def_t *def, nsc_io_t *io) - * Decode I/O module definition. - * - * Calling/Exit State: - * Returns TRUE if the definition contains an adequate - * description of an I/O module. - * - * Description: - * Decode the definition of an I/O module and supply - * translation routines where possible for operations - * that are not defined. - */ -static int -_nsc_decode_io(def, io) -nsc_def_t *def; -nsc_io_t *io; -{ - nsc_decode_param(def, _nsc_io_def, (long *)io); - nsc_decode_param(def, _nsc_disk_def, (long *)io); - nsc_decode_param(def, _nsc_cache_def, (long *)io); - - _nsc_add_disk(io); - _nsc_add_cache(io); - - return (1); -} - - -/* - * int - * nsc_unregister_io (nsc_io_t *io, int flag) - * Un-register an I/O module. - * - * Calling/Exit State: - * Returns 0 on success, otherwise returns an error code. - * - * Description: - * The specified I/O module is un-registered if possible. - * All open file descriptors using the module will be closed - * in preparation for a subsequent re-open. - * - * If NSC_PCATCH is specified and a signal is received, - * the unregister will be terminated and EINTR returned. - */ -int -nsc_unregister_io(nsc_io_t *io, int flag) -{ - nsc_path_t *sp; - nsc_io_t *xio; - int rc = 0; - - if (io == _nsc_null_io) - return (EINVAL); - - mutex_enter(&_nsc_io_lock); - - for (xio = _nsc_io_top; xio; xio = xio->next) - if (xio == io) - break; - - if (!xio || io->pend) { - mutex_exit(&_nsc_io_lock); - return (xio ? EALREADY : 0); - } - - io->pend = 1; -lp: - for (sp = _nsc_path_top; sp; sp = sp->sp_next) - if (sp->sp_io == io) { - mutex_exit(&_nsc_io_lock); - - if ((rc = nsc_unregister_path(sp, flag)) != 0) { - io->pend = 0; - return (rc); - } - - mutex_enter(&_nsc_io_lock); - goto lp; - } - - _nsc_io_lbolt = nsc_lbolt(); - - while (io->refcnt && !rc) { - while ((rc = _nsc_reopen_io(NULL, flag)) != 0) - if (rc != ERESTART) - break; - - if (rc || !io->refcnt) - break; - - if (!cv_wait_sig(&io->cv, &_nsc_io_lock)) - rc = EINTR; - } - - /* - * We have tried to get rid of all the IO provider's clients. - * If there are still anonymous buffers outstanding, then fail - * the unregister. - */ - - if (!rc && io->abufcnt > 0) - rc = EUSERS; - - if (rc) - io->pend = 0; - - mutex_exit(&_nsc_io_lock); - - if (!rc) - _nsc_free_io(io); - - return (rc); -} - - -/* - * nsc_path_t * - * nsc_register_path (char *path, int type, nsc_io_t *io) - * Register interest in pathname. - * - * Calling/Exit State: - * Returns a token for use in future calls to - * nsc_unregister_path. The 'path' argument can contain - * wild characters. If registration fails NULL is returned. - * May not be called for io providers that support NSC_ANON. - * - * Description: - * Registers an interest in any pathnames matching 'path' - * which are opened with the specified type. - */ -nsc_path_t * -nsc_register_path(char *path, int type, nsc_io_t *io) -{ - nsc_path_t *sp, **spp; - int rc; - - if ((type & NSC_IDS) || !io || (io->provide & NSC_ANON) || - !(sp = nsc_kmem_zalloc(sizeof (*sp), KM_SLEEP, _nsc_local_mem))) - return (NULL); - - sp->sp_path = nsc_strdup(path); - sp->sp_type = type; - sp->sp_io = io; - - mutex_enter(&_nsc_io_lock); - - for (spp = &_nsc_path_top; *spp; spp = &(*spp)->sp_next) - if (io->id >= (*spp)->sp_io->id) - break; - - sp->sp_next = (*spp); - (*spp) = sp; - - _nsc_io_lbolt = nsc_lbolt(); - - while ((rc = _nsc_reopen_io(path, 0)) != 0) - if (rc != ERESTART) - break; - - mutex_exit(&_nsc_io_lock); - return (sp); -} - - -/* - * int - * nsc_unregister_path (nsc_path_t *sp, int flag) - * Un-register interest in pathname. - * - * Calling/Exit State: - * Returns 0 on success, otherwise returns an error code. - * - * Description: - * Interest in the specified pathname is un-registered - * if possible. All appropriate file descriptors will be - * closed in preparation for a subsequent re-open. - * - * If NSC_PCATCH is specified and a signal is received, - * the unregister will be terminated and EINTR returned. - */ -int -nsc_unregister_path(sp, flag) -nsc_path_t *sp; -int flag; -{ - nsc_path_t *xsp, **spp; - int rc; - - mutex_enter(&_nsc_io_lock); - - for (xsp = _nsc_path_top; xsp; xsp = xsp->sp_next) - if (xsp == sp) - break; - - if (!xsp || sp->sp_pend) { - mutex_exit(&_nsc_io_lock); - return (xsp ? EALREADY : 0); - } - - sp->sp_pend = 1; - _nsc_io_lbolt = nsc_lbolt(); - - while ((rc = _nsc_reopen_io(sp->sp_path, flag)) != 0) - if (rc != ERESTART) { - sp->sp_pend = 0; - mutex_exit(&_nsc_io_lock); - return (rc); - } - - for (spp = &_nsc_path_top; *spp; spp = &(*spp)->sp_next) - if (*spp == sp) - break; - - if (*spp) - (*spp) = sp->sp_next; - - mutex_exit(&_nsc_io_lock); - - nsc_strfree(sp->sp_path); - nsc_kmem_free(sp, sizeof (*sp)); - return (0); -} - - -/* - * static int - * _nsc_reopen_io (char *path, int flag) - * Force re-open of all file descriptors. - * - * Calling/Exit State: - * The _nsc_io_lock must be held across calls to - * this function. - * - * Returns 0 if the force succeeds without releasing - * _nsc_io_lock, otherwise returns an error code. - * - * Description: - * A re-open is forced for all file descriptors as - * appropriate. For performance reasons available - * devices are re-opened before those that would block. - */ -static int -_nsc_reopen_io(path, flag) -char *path; -int flag; -{ - nsc_dev_t *dp, *dev; - int rc, errno = 0; - int try, run; - - for (run = 1, try = (NSC_TRY | NSC_DEFER); run--; try = 0) { - for (dev = _nsc_dev_top; dev; dev = dev->nsc_next) { - if (path && !nsc_strmatch(dev->nsc_path, path)) - continue; - - if (!(rc = _nsc_reopen_dev(dev, flag | try))) - continue; - - for (dp = _nsc_dev_top; dp; dp = dp->nsc_next) - if (dp == dev) - break; - - if (!dp) - return (ERESTART); - - if (try && !(flag & NSC_TRY)) - run = 1; - if (!run && errno != ERESTART) - errno = rc; - } - } - - return (errno); -} - - -/* - * static int - * _nsc_reopen_dev (nsc_dev_t *dev, int flag) - * Force re-open of entire device. - * - * Calling/Exit State: - * The _nsc_io_lock must be held across calls to - * this function. - * - * Returns 0 if the force succeeds without releasing - * _nsc_io_lock, otherwise returns an error code. - * - * Description: - * A re-open is forced for all file descriptors for the - * device as appropriate. - */ -static int -_nsc_reopen_dev(dev, flag) -nsc_dev_t *dev; -int flag; -{ - int rc, errno = 0; - nsc_iodev_t *iodev; - int try, run; - nsc_fd_t *fd; - - mutex_enter(&dev->nsc_lock); - - for (run = 1, try = (NSC_TRY | NSC_DEFER); run--; try = 0) - for (iodev = dev->nsc_list; iodev; iodev = iodev->si_next) { - for (fd = iodev->si_open; fd; fd = fd->sf_next) { - if (!(rc = _nsc_reopen_fd(fd, flag | try))) - continue; - - if (rc == -ERESTART) - return (ERESTART); - - if (!_nsc_relock_dev(dev, fd, iodev)) - return (ERESTART); - - if (try && !(flag & NSC_TRY)) - run = 1; - if (!run && errno != ERESTART) - errno = rc; - } - } - - for (run = 1, try = (NSC_TRY | NSC_DEFER); run--; try = 0) - for (fd = dev->nsc_close; fd; fd = fd->sf_next) { - if (!(rc = _nsc_reopen_fd(fd, flag | try))) - continue; - - if (rc == -ERESTART) - return (ERESTART); - - if (!_nsc_relock_dev(dev, fd, NULL)) - return (ERESTART); - - if (try && !(flag & NSC_TRY)) - run = 1; - if (!run && errno != ERESTART) - errno = rc; - } - - mutex_exit(&dev->nsc_lock); - return (errno); -} - - -/* - * static int - * _nsc_relock_dev (nsc_dev_t *dev, nsc_fd_t *fd, nsc_iodev_t *iodev) - * Relock device structure if possible. - * - * Calling/Exit State: - * The _nsc_io_lock must be held across calls to - * this function. - * - * Checks whether the file descriptor is still part - * of the specified device and I/O device. If so the - * device lock is taken. Otherwise FALSE is returned. - */ -static int -_nsc_relock_dev(nsc_dev_t *dev, nsc_fd_t *fd, nsc_iodev_t *iodev) -{ - nsc_fd_t *fp = NULL; - nsc_iodev_t *iop; - nsc_dev_t *dp; - - for (dp = _nsc_dev_top; dp; dp = dp->nsc_next) - if (dp == dev) - break; - - if (!dp) - return (0); - - mutex_enter(&dev->nsc_lock); - - if (iodev) - for (iop = dev->nsc_list; iop; iop = iop->si_next) - if (iop == iodev) - break; - - if (!iodev || iop) { - fp = (iodev) ? iodev->si_open : dev->nsc_close; - - for (; fp; fp = fp->sf_next) - if (fp == fd) - break; - } - - if (!fp) { - mutex_exit(&dev->nsc_lock); - return (0); - } - - return (1); -} - - -/* - * static int - * _nsc_reopen_fd (nsc_fd_t *dev, int flag) - * Force re-open of file descriptor. - * - * Calling/Exit State: - * Both _nsc_io_lock and the device lock must be held - * across calls to this function. - * - * Returns 0 if the force succeeds without releasing - * any locks, otherwise returns an error code. If an - * error code is returned the device lock is released. - * - * Description: - * If appropriate the file descriptor is closed in order - * to force a subsequent open using the currently available - * resources. - */ -static int -_nsc_reopen_fd(fd, flag) -nsc_fd_t *fd; -int flag; -{ - nsc_dev_t *dev = fd->sf_dev; - nsc_iodev_t *iodev = fd->sf_iodev; - int changed = 0; - int rc; - - if (!fd->sf_pend && !iodev) - return (0); - - if (fd->sf_pend == _NSC_OPEN) - if (fd->sf_lbolt - _nsc_io_lbolt > 0) - return (0); - - if (iodev && - (iodev->si_io == - _nsc_find_io(dev->nsc_path, fd->sf_type, &changed)) && - !changed) - return (0); - - if (iodev) - fd->sf_reopen = 1; - - mutex_exit(&_nsc_io_lock); - - dev->nsc_reopen = 1; - - rc = _nsc_close_fd(fd, flag); - - dev->nsc_reopen = 0; - - if (rc == EAGAIN && (flag & NSC_DEFER) && fd->sf_reopen) - dev->nsc_drop = 1; - - mutex_exit(&dev->nsc_lock); - - if (rc == -ERESTART) - delay(2); /* allow other threads cpu time */ - - mutex_enter(&_nsc_io_lock); - return (rc ? rc : ERESTART); -} - - -/* - * nsc_fd_t * - * nsc_open (char *path, int type, nsc_def_t *def, blind_t arg, int *sts) - * Open file descriptor for pathname. - * - * Calling/Exit State: - * Returns file descriptor if open succeeds, otherwise - * returns 0 and puts error code in the location pointed - * to by sts. - * - * Description: - * Open the specified pathname using an appropriate access - * method. - */ -nsc_fd_t * -nsc_open(path, type, def, arg, sts) -char *path; -int type; -nsc_def_t *def; -blind_t arg; -int *sts; -{ - int flag, rc; - nsc_fd_t *fd; - - flag = (type & ~NSC_TYPES); - type &= NSC_TYPES; - - if ((flag & NSC_READ) == 0) - flag |= NSC_RDWR; - - if ((rc = _nsc_alloc_fd(path, type, flag, &fd)) != 0) { - if (sts) - *sts = rc; - return (NULL); - } - - fd->sf_arg = arg; - fd->sf_aio = _nsc_null_io; - - nsc_decode_param(def, _nsc_fd_def, (long *)fd); - - mutex_enter(&fd->sf_dev->nsc_lock); - - while ((rc = _nsc_open_fd(fd, flag)) != 0) - if (rc != ERESTART) - break; - - mutex_exit(&fd->sf_dev->nsc_lock); - - if (rc) { - _nsc_free_fd(fd); - if (sts) - *sts = rc; - return (NULL); - } - - return (fd); -} - - -/* - * int - * _nsc_open_fd (nsc_fd_t *fd, int flag) - * Open file descriptor. - * - * Calling/Exit State: - * The device lock must be held across calls to - * this function. - * - * Returns 0 if the open succeeds, otherwise - * returns an error code. - * - * Description: - * Open the specified file descriptor. - */ -int -_nsc_open_fd(fd, flag) -nsc_fd_t *fd; -int flag; -{ - nsc_dev_t *dev = fd->sf_dev; - int rc; - - if (fd->sf_pend) - return (_nsc_wait_dev(dev, flag)); - - if (fd->sf_iodev) - return (0); - if (flag & NSC_NOBLOCK) - return (EAGAIN); - - fd->sf_pend = _NSC_OPEN; - fd->sf_lbolt = nsc_lbolt(); - - mutex_exit(&dev->nsc_lock); - - rc = _nsc_open_fn(fd, flag); - - mutex_enter(&dev->nsc_lock); - fd->sf_pend = 0; - - if (!rc) - fd->sf_iodev->si_pend = 0; - - if (dev->nsc_wait || dev->nsc_refcnt <= 0) - cv_broadcast(&dev->nsc_cv); - - return (rc ? rc : ERESTART); -} - - -/* - * static int - * _nsc_open_fn (nsc_fd_t *fd, int flag) - * Allocate I/O device and open file descriptor. - * - * Calling/Exit State: - * No locks may be held across this function. - * - * If the open succeeds an I/O device will be - * attached to the file descriptor, marked as - * pending and 0 returned. Otherwise, returns - * an error code. - * - * Description: - * Allocate an I/O device and open the specified - * file descriptor. - */ -static int -_nsc_open_fn(fd, flag) -nsc_fd_t *fd; -int flag; -{ - nsc_dev_t *dev = fd->sf_dev; - nsc_iodev_t *iodev; - int rc; - - if ((rc = _nsc_alloc_iodev(dev, fd->sf_type, &iodev)) != 0) - return (rc); - - mutex_enter(&dev->nsc_lock); - - if (iodev->si_pend) { - rc = _nsc_wait_dev(dev, flag); - mutex_exit(&dev->nsc_lock); - _nsc_free_iodev(iodev); - return (rc); - } - - iodev->si_pend = _NSC_OPEN; - mutex_exit(&dev->nsc_lock); - - rc = (*iodev->si_io->open)(dev->nsc_path, - (fd->sf_flag & ~NSC_RDWR), &fd->sf_cd, iodev); - - if (rc) { - iodev->si_pend = 0; - _nsc_free_iodev(iodev); - return (rc); - } - - /* save away the DevMaj and DevMin values */ - if (iodev->si_io->id == NSC_RAW_ID) { - rc = _nsc_setval(dev, NULL, NSC_DEVMAJ, - (int)getmajor((dev_t)fd->sf_cd), FALSE); -#ifdef DEBUG - if (rc != 1) { - cmn_err(CE_NOTE, "!nsctl: could not set DevMaj (%s:%x)", - dev->nsc_path, (int)getmajor((dev_t)fd->sf_cd)); - } -#endif - - rc = _nsc_setval(dev, NULL, NSC_DEVMIN, - (int)getminor((dev_t)fd->sf_cd), FALSE); -#ifdef DEBUG - if (rc != 1) { - cmn_err(CE_NOTE, "!nsctl: could not set DevMin (%s:%x)", - dev->nsc_path, (int)getminor((dev_t)fd->sf_cd)); - } -#endif - } - - fd->sf_iodev = iodev; - _nsc_relink_fd(fd, &dev->nsc_close, &iodev->si_open, iodev); - - return (0); -} - - -/* - * int - * nsc_close (nsc_fd_t *fd) - * Close file descriptor for pathname. - * - * Calling/Exit State: - * Returns 0 if close succeeds, otherwise returns error - * code. - * - * Description: - * Close the specified file descriptor. It is assumed - * that all other users of this file descriptor have - * finished. Any reserve will be discarded before the - * close is performed. - */ -int -nsc_close(fd) -nsc_fd_t *fd; -{ - int rc; - - if (!fd) - return (0); - - while (fd->sf_reserve) - nsc_release(fd); - - mutex_enter(&fd->sf_dev->nsc_lock); - - fd->sf_owner = NULL; - - while ((rc = _nsc_close_fd(fd, 0)) != 0) - if (rc != ERESTART) - break; - - nsc_decode_param(_nsc_fd_def, _nsc_fd_def, (long *)fd); - - mutex_exit(&fd->sf_dev->nsc_lock); - - if (!rc) - _nsc_free_fd(fd); - return (rc); -} - - -/* - * int - * _nsc_close_fd (nsc_fd_t *fd, int flag) - * Close file descriptor. - * - * Calling/Exit State: - * The device lock must be held across calls to - * this function. - * - * Returns 0 if the close succeeds, otherwise - * returns an error code. - * - * Description: - * Close the specified file descriptor. - */ -int -_nsc_close_fd(fd, flag) -nsc_fd_t *fd; -int flag; -{ - nsc_dev_t *dev = fd->sf_dev; - nsc_iodev_t *iodev; - int rc; - - if (fd->sf_pend) { - if (fd->sf_pend == _NSC_CLOSE && dev->nsc_reopen != 0) - return (-ERESTART); - - return (_nsc_wait_dev(dev, flag)); - } - - flag |= NSC_RDWR; - iodev = fd->sf_iodev; - - if (!iodev) - return (0); - - if ((rc = _nsc_detach_fd(fd, flag)) != 0) - return (rc); - - if (iodev->si_pend) - return (_nsc_wait_dev(dev, flag)); - - if (iodev->si_open == fd && !fd->sf_next) { - if ((rc = _nsc_detach_iodev(iodev, NULL, flag)) != 0) - return (rc); - - if (dev->nsc_list == iodev && !iodev->si_next) - if ((rc = _nsc_detach_dev(dev, NULL, flag)) != 0) - return (rc); - } - - if (flag & NSC_NOBLOCK) - return (EAGAIN); - - fd->sf_pend = _NSC_CLOSE; - iodev->si_pend = _NSC_CLOSE; - mutex_exit(&dev->nsc_lock); - - rc = _nsc_close_fn(fd); - - mutex_enter(&dev->nsc_lock); - fd->sf_pend = 0; - - fd->sf_reopen = 0; - if (rc) - iodev->si_pend = 0; - - if (dev->nsc_wait || dev->nsc_refcnt <= 0) - cv_broadcast(&dev->nsc_cv); - - return (rc ? rc : ERESTART); -} - - -/* - * static int - * _nsc_close_fn (nsc_fd_t *fd) - * Close file descriptor and free I/O device. - * - * Calling/Exit State: - * No locks may be held across this function. - * - * Returns 0 if the close succeeds, otherwise - * returns an error code. - * - * If the close succeeds the I/O device will be - * detached from the file descriptor, released - * and 0 returned. Otherwise, returns an error - * code. - * - * Description: - * Close the specified file descriptor and free - * the I/O device. - */ -static int -_nsc_close_fn(fd) -nsc_fd_t *fd; -{ - nsc_iodev_t *iodev = fd->sf_iodev; - nsc_dev_t *dev = fd->sf_dev; - int last, rc; - - last = (iodev->si_open == fd && !fd->sf_next); - - if (last || (iodev->si_io->flag & NSC_REFCNT)) - if ((rc = (*iodev->si_io->close)(fd->sf_cd)) != 0) - return (rc); - - fd->sf_iodev = NULL; - _nsc_relink_fd(fd, &iodev->si_open, &dev->nsc_close, iodev); - - iodev->si_pend = 0; - _nsc_free_iodev(iodev); - - return (0); -} - - -/* - * void - * nsc_set_owner (nsc_fd_t *fd, nsc_iodev_t *iodev) - * Set owner associated with file descriptor. - * - * Calling/Exit State: - * Sets the owner field in the file descriptor. - */ -void -nsc_set_owner(nsc_fd_t *fd, nsc_iodev_t *iodev) -{ - if (fd) { - mutex_enter(&fd->sf_dev->nsc_lock); - fd->sf_owner = iodev; - mutex_exit(&fd->sf_dev->nsc_lock); - } -} - - -/* - * char * - * nsc_pathname (nsc_fd_t *fd) - * Pathname associated with file descriptor. - * - * Calling/Exit State: - * Returns a pointer to the pathname associated - * with the given file descriptor. - */ -char * -nsc_pathname(fd) -nsc_fd_t *fd; -{ - return ((fd) ? (fd->sf_dev->nsc_path) : 0); -} - - -/* - * int - * nsc_fdpathcmp(nsc_fd_t *fd, uint64_t phash, char *path) - * Compare fd to pathname and hash - * - * Calling/Exit State: - * Returns comparison value like strcmp(3C). - * - * Description: - * Does an optimised comparison of the pathname and associated hash - * value (as returned from nsc_strhash()) against the pathname of - * the filedescriptor, fd. - */ -int -nsc_fdpathcmp(nsc_fd_t *fd, uint64_t phash, char *path) -{ - int rc = -1; - - if (fd != NULL && fd->sf_dev->nsc_phash == phash) - rc = strcmp(fd->sf_dev->nsc_path, path); - - return (rc); -} - - -static int -_nsc_setval(nsc_dev_t *dev, char *path, char *name, int val, int do_ncall) -{ - nsc_devval_t *dv; - nsc_rval_t *rval; - ncall_t *ncall; - nsc_val_t *vp; - uint64_t phash; - char *pp; - int rc; - - ASSERT(dev != NULL || path != NULL); -#ifdef DEBUG - if (dev != NULL && path != NULL) { - ASSERT(strcmp(dev->nsc_path, path) == 0); - } -#endif - - pp = (dev != NULL) ? dev->nsc_path : path; - - if (strlen(name) >= NSC_SETVAL_MAX) { -#ifdef DEBUG - cmn_err(CE_WARN, "!nsc_setval: max name size(%d) exceeded(%d)", - NSC_SETVAL_MAX-1, (int)strlen(name)); -#endif - return (0); - } - - phash = nsc_strhash(pp); - - mutex_enter(&_nsc_devval_lock); - - if (dev != NULL) - dv = dev->nsc_values; - else { - for (dv = _nsc_devval_top; dv != NULL; dv = dv->dv_next) { - if (phash == dv->dv_phash && - strcmp(pp, dv->dv_path) == 0) - /* found dv for device */ - break; - } - } - - if (dv == NULL) { - dv = nsc_kmem_zalloc(sizeof (*dv), KM_SLEEP, _nsc_local_mem); - if (dv == NULL) { - mutex_exit(&_nsc_devval_lock); - return (0); - } - - (void) strncpy(dv->dv_path, pp, sizeof (dv->dv_path)); - dv->dv_phash = phash; - - dv->dv_next = _nsc_devval_top; - _nsc_devval_top = dv; - if (dev != NULL) - dev->nsc_values = dv; - } - - for (vp = dv->dv_values; vp; vp = vp->sv_next) { - if (strcmp(vp->sv_name, name) == 0) { - vp->sv_value = val; - break; - } - } - - if (vp == NULL) { - vp = nsc_kmem_zalloc(sizeof (*vp), KM_SLEEP, _nsc_local_mem); - if (vp != NULL) { - (void) strncpy(vp->sv_name, name, sizeof (vp->sv_name)); - vp->sv_value = val; - vp->sv_next = dv->dv_values; - dv->dv_values = vp; - } - } - - mutex_exit(&_nsc_devval_lock); - - /* - * phoenix: ncall the new value to the other node now. - */ - - if (vp && do_ncall) { - /* CONSTCOND */ - ASSERT(sizeof (nsc_rval_t) <= NCALL_DATA_SZ); - - rval = nsc_kmem_zalloc(sizeof (*rval), KM_SLEEP, - _nsc_local_mem); - if (rval == NULL) { - goto out; - } - - rc = ncall_alloc(ncall_mirror(ncall_self()), 0, 0, &ncall); - if (rc == 0) { - (void) strncpy(rval->path, pp, sizeof (rval->path)); - (void) strncpy(rval->name, name, sizeof (rval->name)); - rval->value = val; - - rc = ncall_put_data(ncall, rval, sizeof (*rval)); - if (rc == 0) { - /* - * Send synchronously and read a reply - * so that we know that the remote - * setval has completed before this - * function returns and hence whilst - * the device is still reserved on this - * node. - */ - if (ncall_send(ncall, 0, NSC_SETVAL) == 0) - (void) ncall_read_reply(ncall, 1, &rc); - } - - ncall_free(ncall); - } - - nsc_kmem_free(rval, sizeof (*rval)); - } - -out: - return (vp ? 1 : 0); -} - - -/* ARGSUSED */ - -static void -r_nsc_setval(ncall_t *ncall, int *ap) -{ - nsc_rval_t *rval; - int rc; - - rval = nsc_kmem_zalloc(sizeof (*rval), KM_SLEEP, _nsc_local_mem); - if (rval == NULL) { - ncall_reply(ncall, ENOMEM); - return; - } - - rc = ncall_get_data(ncall, rval, sizeof (*rval)); - if (rc != 0) { - ncall_reply(ncall, EFAULT); - return; - } - - if (_nsc_setval(NULL, rval->path, rval->name, rval->value, FALSE)) - rc = 0; - else - rc = ENOMEM; - - ncall_reply(ncall, rc); - nsc_kmem_free(rval, sizeof (*rval)); -} - - -/* ARGSUSED */ - -static void -r_nsc_setval_all(ncall_t *ncall, int *ap) -{ - nsc_rval_t *in = NULL, *out = NULL; - nsc_devval_t *dv; - nsc_val_t *vp; - ncall_t *np; - uint64_t phash; - int rc; - - /* CONSTCOND */ - ASSERT(sizeof (nsc_rval_t) <= NCALL_DATA_SZ); - - in = nsc_kmem_zalloc(sizeof (*in), KM_SLEEP, _nsc_local_mem); - out = nsc_kmem_zalloc(sizeof (*out), KM_SLEEP, _nsc_local_mem); - if (in == NULL || out == NULL) { - if (in != NULL) { - nsc_kmem_free(in, sizeof (*in)); - in = NULL; - } - if (out != NULL) { - nsc_kmem_free(out, sizeof (*out)); - out = NULL; - } - ncall_reply(ncall, ENOMEM); - } - - rc = ncall_get_data(ncall, in, sizeof (*in)); - if (rc != 0) { - ncall_reply(ncall, EFAULT); - return; - } - - phash = nsc_strhash(in->path); - - (void) strncpy(out->path, in->path, sizeof (out->path)); - - rc = ncall_alloc(ncall_mirror(ncall_self()), 0, 0, &np); - if (rc != 0) { - ncall_reply(ncall, ENOMEM); - return; - } - - mutex_enter(&_nsc_devval_lock); - - for (dv = _nsc_devval_top; dv; dv = dv->dv_next) { - if (dv->dv_phash == phash && - strcmp(dv->dv_path, in->path) == 0) - break; - } - - if (dv) { - for (vp = dv->dv_values; vp; vp = vp->sv_next) { - if (strcmp(vp->sv_name, NSC_DEVMIN) == 0 || - strcmp(vp->sv_name, NSC_DEVMAJ) == 0) { - /* ignore the implicit DevMin/DevMaj values */ - continue; - } - - (void) strncpy(out->name, vp->sv_name, - sizeof (out->name)); - out->value = vp->sv_value; - - rc = ncall_put_data(np, out, sizeof (*out)); - if (rc == 0) { - /* - * Send synchronously and read a reply - * so that we know that the remote - * setval has completed before this - * function returns. - */ - if (ncall_send(np, 0, NSC_SETVAL) == 0) - (void) ncall_read_reply(np, 1, &rc); - } - - ncall_reset(np); - } - - ncall_free(np); - rc = 0; - } else { - rc = ENODEV; - } - - mutex_exit(&_nsc_devval_lock); - - ncall_reply(ncall, rc); - - nsc_kmem_free(out, sizeof (*out)); - nsc_kmem_free(in, sizeof (*in)); -} - - -/* - * int - * nsc_setval (nsc_fd_t *fd, char *name, int val) - * Set value for device. - * - * Calling/Exit State: - * Returns 1 if the value has been set, otherwise 0. - * Must be called with the fd reserved. - * - * Description: - * Sets the specified global variable for the device - * to the value provided. - */ -int -nsc_setval(nsc_fd_t *fd, char *name, int val) -{ - if (!fd) - return (0); - - if (!nsc_held(fd)) - return (0); - - return (_nsc_setval(fd->sf_dev, NULL, name, val, TRUE)); -} - - -/* - * int - * nsc_getval (nsc_fd_t *fd, char *name, int *vp) - * Get value from device. - * - * Calling/Exit State: - * Returns 1 if the value has been found, otherwise 0. - * Must be called with the fd reserved, except for "DevMaj" / "DevMin". - * - * Description: - * Finds the value of the specified device variable for - * the device and returns it in the location pointed to - * by vp. - */ -int -nsc_getval(nsc_fd_t *fd, char *name, int *vp) -{ - nsc_devval_t *dv; - nsc_val_t *val; - - if (!fd) - return (0); - - /* - * Don't check for nsc_held() for the device number values - * since these are magically created and cannot change when - * the fd is not reserved. - */ - - if (strcmp(name, NSC_DEVMAJ) != 0 && - strcmp(name, NSC_DEVMIN) != 0 && - !nsc_held(fd)) - return (0); - - mutex_enter(&_nsc_devval_lock); - - dv = fd->sf_dev->nsc_values; - val = NULL; - - if (dv != NULL) { - for (val = dv->dv_values; val; val = val->sv_next) { - if (strcmp(val->sv_name, name) == 0) { - *vp = val->sv_value; - break; - } - } - } - - mutex_exit(&_nsc_devval_lock); - - return (val ? 1 : 0); -} - - -/* - * char * - * nsc_shared (nsc_fd_t *fd) - * Device is currently shared. - * - * Calling/Exit State: - * The device lock must be held across calls to this - * this function. - * - * Returns an indication of whether the device accessed - * by the file descriptor is currently referenced by more - * than one user. - * - * This is only intended for use in performance critical - * situations. - */ -int -nsc_shared(fd) -nsc_fd_t *fd; -{ - nsc_iodev_t *iodev; - int cnt = 0; - - if (!fd) - return (0); - if (!fd->sf_iodev) - return (1); - - for (iodev = fd->sf_dev->nsc_list; iodev; iodev = iodev->si_next) - for (fd = iodev->si_open; fd; fd = fd->sf_next) - if (!fd->sf_owner && cnt++) - return (1); - - return (0); -} - - -/* - * kmutex_t * - * nsc_lock_addr (nsc_fd_t *fd) - * Address of device lock. - * - * Calling/Exit State: - * Returns a pointer to the spin lock associated with the - * device. - * - * Description: - * This is only intended for use in performance critical - * situations in conjunction with nsc_reserve_lk. - */ -kmutex_t * -nsc_lock_addr(fd) -nsc_fd_t *fd; -{ - return (&fd->sf_dev->nsc_lock); -} - - -/* - * int - * _nsc_call_io (long f, blind_t a, blind_t b, blind_t c) - * Call information function. - * - * Calling/Exit State: - * Returns result from function or 0 if not available. - * f represents the offset into the I/O structure at which - * the required function can be found and a, b, c are the - * desired arguments. - * - * Description: - * Calls the requested function for the first available - * cache interface. - */ -int -_nsc_call_io(long f, blind_t a, blind_t b, blind_t c) -{ - nsc_io_t *io; - int (*fn)(); - int rc; - - io = _nsc_reserve_io(NULL, NSC_SDBC_ID); - if (!io) - io = _nsc_reserve_io(NULL, NSC_NULL); - - fn = (blindfn_t)(((long *)io)[f]); - rc = (*fn)(a, b, c); - - _nsc_release_io(io); - return (rc); -} - - -/* - * nsc_io_t * - * _nsc_reserve_io (char *, int type) - * Reserve I/O module. - * - * Calling/Exit State: - * Returns address of I/O structure matching specified - * type, or NULL. - * - * Description: - * Searches for an appropriate I/O module and increments - * the reference count to prevent it being unregistered. - */ -nsc_io_t * -_nsc_reserve_io(path, type) -char *path; -int type; -{ - nsc_io_t *io; - - mutex_enter(&_nsc_io_lock); - - if ((io = _nsc_find_io(path, type, NULL)) != 0) - io->refcnt++; - - mutex_exit(&_nsc_io_lock); - return (io); -} - - -/* - * static nsc_io_t * - * _nsc_find_io (char *path, int type, int *changed) - * Find I/O module. - * - * Calling/Exit State: - * The _nsc_io_lock must be held across calls to - * this function. - * - * Returns address of I/O structure matching specified - * type, or NULL. - * - * 'changed' will be set to non-zero if there is a pending - * nsc_path_t that matches the criteria for the requested type. - * This allows nsctl to distinguish between multiple - * nsc_register_path's done by the same I/O provider. - * - * Description: - * Searches for an appropriate I/O module. - * - * 1. If <type> is a single module id find the specified I/O - * module by module id. - * - * 2. Find the highest module that provides any of the I/O types - * included in <type>, taking into account any modules - * registered via the nsc_register_path() interface if <path> - * is non-NULL. - * - * 3. Find an I/O module following the rules in (2), but whose - * module id is less than the id OR'd into <type>. - * - * If no module is found by the above algorithms and NSC_NULL was - * included in <type>, return the _nsc_null_io module. Otherwise - * return NULL. - */ -static nsc_io_t * -_nsc_find_io(char *path, int type, int *changed) -{ - nsc_path_t *sp = NULL; - nsc_path_t *pp = NULL; - nsc_io_t *io; - - type &= NSC_TYPES; - - if (path) { - for (sp = _nsc_path_top; sp; sp = sp->sp_next) { - if ((type & NSC_ID) && - sp->sp_io->id >= (type & NSC_IDS)) - continue; - - if (sp->sp_pend || (type & sp->sp_type) == 0) - continue; - - if (nsc_strmatch(path, sp->sp_path)) - break; - } - - if (sp) { - /* look for matching pending paths */ - for (pp = _nsc_path_top; pp; pp = pp->sp_next) { - if (pp->sp_pend && - (type & pp->sp_type) && - nsc_strmatch(path, pp->sp_path)) { - break; - } - } - } - } - - for (io = _nsc_io_top; io; io = io->next) { - if (io->pend) - continue; - - if (type & NSC_ID) { - if ((type & ~NSC_IDS) == 0) { - if (io->id == type) - break; - continue; - } - - if (io->id >= (type & NSC_IDS)) - continue; - } - - if (io->provide & type) - break; - } - - if (pp && (!io || pp->sp_io->id >= io->id)) { - /* - * Mark this as a path change. - */ - if (changed) { - *changed = 1; - } - } - - if (sp && (!io || sp->sp_io->id >= io->id)) - io = sp->sp_io; - - if (!io && !(type & NSC_NULL)) - return (NULL); - - if (!io) - io = _nsc_null_io; - - return (io); -} - - -/* - * void - * _nsc_release_io (nsc_io_t *) - * Release I/O module. - * - * Description: - * Releases reference to I/O structure and wakes up - * anybody waiting on it. - */ -void -_nsc_release_io(io) -nsc_io_t *io; -{ - mutex_enter(&_nsc_io_lock); - - io->refcnt--; - cv_broadcast(&io->cv); - - mutex_exit(&_nsc_io_lock); -} - - -/* - * static int - * _nsc_alloc_fd (char *path, int type, int flag, nsc_fd_t **fdp) - * Allocate file descriptor structure. - * - * Calling/Exit State: - * Stores address of file descriptor through fdp and - * returns 0 on success, otherwise returns error code. - * - * Description: - * A new file descriptor is allocated and linked in to - * the file descriptor chain which is protected by the - * device lock. - * - * On return the file descriptor must contain all the - * information necessary to perform an open. Details - * specific to user callbacks are not required yet. - */ -static int -_nsc_alloc_fd(path, type, flag, fdp) -char *path; -int type, flag; -nsc_fd_t **fdp; -{ - nsc_dev_t *dev; - nsc_fd_t *fd; - int rc; - - if (!(fd = (nsc_fd_t *)nsc_kmem_zalloc( - sizeof (*fd), KM_SLEEP, _nsc_local_mem))) - return (ENOMEM); - - if ((rc = _nsc_alloc_dev(path, &dev)) != 0) { - nsc_kmem_free(fd, sizeof (*fd)); - return (rc); - } - - mutex_enter(&dev->nsc_lock); - - fd->sf_type = type; - fd->sf_flag = flag; - fd->sf_dev = dev; - fd->sf_next = dev->nsc_close; - dev->nsc_close = fd; - - mutex_exit(&dev->nsc_lock); - - *fdp = fd; - return (0); -} - - -/* - * static int - * _nsc_free_fd (nsc_fd_t *) - * Free file descriptor. - * - * Description: - * The file descriptor is removed from the chain and free'd - * once pending activity has completed. - */ -static void -_nsc_free_fd(fd) -nsc_fd_t *fd; -{ - nsc_dev_t *dev = fd->sf_dev; - nsc_fd_t **fdp; - - if (!fd) - return; - - mutex_enter(&dev->nsc_lock); - - for (fdp = &dev->nsc_close; *fdp; fdp = &(*fdp)->sf_next) - if (*fdp == fd) { - *fdp = fd->sf_next; - break; - } - - if (dev->nsc_wait || dev->nsc_refcnt <= 0) - cv_broadcast(&dev->nsc_cv); - - while (fd->sf_pend) - (void) _nsc_wait_dev(dev, 0); - - mutex_exit(&dev->nsc_lock); - - _nsc_free_dev(dev); - - nsc_kmem_free(fd, sizeof (*fd)); -} - - -/* - * static void - * _nsc_relink_fd (nsc_fd_t *fd, nsc_fd_t **from, - * nsc_fd_t **to, nsc_iodev_t *iodev) - * Relink file descriptor. - * - * Description: - * Remove the file descriptor from the 'from' chain and - * add it to the 'to' chain. The busy flag in iodev is - * used to prevent modifications to the chain whilst a - * callback is in progress. - */ -static void -_nsc_relink_fd(nsc_fd_t *fd, nsc_fd_t **from, nsc_fd_t **to, nsc_iodev_t *iodev) -{ - nsc_dev_t *dev = fd->sf_dev; - nsc_fd_t **fdp; - - mutex_enter(&dev->nsc_lock); - - while (iodev->si_busy) - (void) _nsc_wait_dev(dev, 0); - - for (fdp = from; *fdp; fdp = &(*fdp)->sf_next) - if (*fdp == fd) { - *fdp = fd->sf_next; - break; - } - - fd->sf_next = (*to); - (*to) = fd; - - mutex_exit(&dev->nsc_lock); -} - - -/* - * static int - * _nsc_alloc_iodev (nsc_dev_t *dev, int type, nsc_iodev_t **iodevp) - * Allocate I/O device structure. - * - * Calling/Exit State: - * Stores address of I/O device structure through iodevp - * and returns 0 on success, otherwise returns error code. - * - * Description: - * If an entry for the I/O device already exists increment - * the reference count and return the address, otherwise - * allocate a new structure. - * - * A new structure is allocated before scanning the chain - * to avoid calling the memory allocator with a spin lock - * held. If an entry is found the new structure is free'd. - * - * The I/O device chain is protected by the device lock. - */ -static int -_nsc_alloc_iodev(dev, type, iodevp) -nsc_dev_t *dev; -int type; -nsc_iodev_t **iodevp; -{ - nsc_iodev_t *iodev, *ip; - nsc_io_t *io; - - if (!(iodev = (nsc_iodev_t *)nsc_kmem_zalloc( - sizeof (*iodev), KM_SLEEP, _nsc_local_mem))) - return (ENOMEM); - - mutex_init(&iodev->si_lock, NULL, MUTEX_DRIVER, NULL); - cv_init(&iodev->si_cv, NULL, CV_DRIVER, NULL); - - if (!(io = _nsc_reserve_io(dev->nsc_path, type))) { - mutex_destroy(&iodev->si_lock); - cv_destroy(&iodev->si_cv); - nsc_kmem_free(iodev, sizeof (*iodev)); - return (ENXIO); - } - - iodev->si_refcnt++; - iodev->si_io = io; - iodev->si_dev = dev; - - mutex_enter(&_nsc_io_lock); - dev->nsc_refcnt++; - mutex_exit(&_nsc_io_lock); - - mutex_enter(&dev->nsc_lock); - - for (ip = dev->nsc_list; ip; ip = ip->si_next) - if (ip->si_io == io) { - ip->si_refcnt++; - break; - } - - if (!ip) { - iodev->si_next = dev->nsc_list; - dev->nsc_list = iodev; - } - - mutex_exit(&dev->nsc_lock); - - if (ip) { - _nsc_free_iodev(iodev); - iodev = ip; - } - - *iodevp = iodev; - return (0); -} - - -/* - * static int - * _nsc_free_iodev (nsc_iodev_t *iodev) - * Free I/O device structure. - * - * Description: - * Decrements the reference count of a previously allocated - * I/O device structure. If this is the last reference it - * is removed from the device chain and free'd once pending - * activity has completed. - */ -static void -_nsc_free_iodev(nsc_iodev_t *iodev) -{ - nsc_iodev_t **ipp; - nsc_dev_t *dev; - - if (!iodev) - return; - - dev = iodev->si_dev; - - mutex_enter(&dev->nsc_lock); - - if (--iodev->si_refcnt > 0) { - mutex_exit(&dev->nsc_lock); - return; - } - - for (ipp = &dev->nsc_list; *ipp; ipp = &(*ipp)->si_next) - if (*ipp == iodev) { - *ipp = iodev->si_next; - break; - } - - if (dev->nsc_wait || dev->nsc_refcnt <= 0) - cv_broadcast(&dev->nsc_cv); - - while (iodev->si_pend || iodev->si_rpend || iodev->si_busy) - (void) _nsc_wait_dev(dev, 0); - - mutex_exit(&dev->nsc_lock); - - _nsc_release_io(iodev->si_io); - _nsc_free_dev(dev); - - mutex_destroy(&iodev->si_lock); - cv_destroy(&iodev->si_cv); - - nsc_kmem_free(iodev, sizeof (*iodev)); -} - - -/* - * static int - * _nsc_alloc_dev (char *path, nsc_dev_t **devp) - * Allocate device structure. - * - * Calling/Exit State: - * Stores address of device structure through devp - * and returns 0 on success, otherwise returns error - * code. - * - * Description: - * If an entry for the device already exists increment - * the reference count and return the address, otherwise - * allocate a new structure. - * - * A new structure is allocated before scanning the device - * chain to avoid calling the memory allocator with a spin - * lock held. If the device is found the new structure is - * free'd. - * - * The device chain is protected by _nsc_io_lock. - */ -static int -_nsc_alloc_dev(char *path, nsc_dev_t **devp) -{ - nsc_dev_t *dev, *dp, **ddp; - nsc_devval_t *dv; - nsc_rval_t *rval; - ncall_t *ncall; - int rc; - - if (!(dev = (nsc_dev_t *)nsc_kmem_zalloc( - sizeof (*dev), KM_SLEEP, _nsc_local_mem))) - return (ENOMEM); - - dev->nsc_refcnt++; - - mutex_init(&dev->nsc_lock, NULL, MUTEX_DRIVER, NULL); - cv_init(&dev->nsc_cv, NULL, CV_DRIVER, NULL); - - dev->nsc_phash = nsc_strhash(path); - dev->nsc_path = nsc_strdup(path); - - mutex_enter(&_nsc_io_lock); - - dev->nsc_next = _nsc_dev_pend; - _nsc_dev_pend = dev; - - mutex_exit(&_nsc_io_lock); - - mutex_enter(&_nsc_io_lock); - - for (dp = _nsc_dev_top; dp; dp = dp->nsc_next) - if (dp->nsc_phash == dev->nsc_phash && - strcmp(dp->nsc_path, dev->nsc_path) == 0) { - dp->nsc_refcnt++; - break; - } - - if (!dp) { - for (ddp = &_nsc_dev_pend; *ddp; ddp = &(*ddp)->nsc_next) - if (*ddp == dev) { - *ddp = dev->nsc_next; - break; - } - - dev->nsc_next = _nsc_dev_top; - _nsc_dev_top = dev; - } - - mutex_exit(&_nsc_io_lock); - - if (dp) { - _nsc_free_dev(dev); - dev = dp; - } - - /* - * Try and find the device/values header for this device - * and link it back to the device structure. - */ - - mutex_enter(&_nsc_devval_lock); - - if (dev->nsc_values == NULL) { - for (dv = _nsc_devval_top; dv; dv = dv->dv_next) { - if (dv->dv_phash == dev->nsc_phash && - strcmp(dv->dv_path, dev->nsc_path) == 0) { - dev->nsc_values = dv; - break; - } - } - } - - mutex_exit(&_nsc_devval_lock); - - /* - * Refresh the device/values from the other node - */ - - rval = nsc_kmem_zalloc(sizeof (*rval), KM_SLEEP, _nsc_local_mem); - if (rval == NULL) { - goto out; - } - - rc = ncall_alloc(ncall_mirror(ncall_self()), 0, 0, &ncall); - if (rc == 0) { - (void) strncpy(rval->path, path, sizeof (rval->path)); - - rc = ncall_put_data(ncall, rval, sizeof (*rval)); - if (rc == 0) { - /* - * Send synchronously and read a reply - * so that we know that the updates - * have completed before this - * function returns. - */ - if (ncall_send(ncall, 0, NSC_SETVAL_ALL) == 0) - (void) ncall_read_reply(ncall, 1, &rc); - } - - ncall_free(ncall); - } - - nsc_kmem_free(rval, sizeof (*rval)); - -out: - *devp = dev; - return (0); -} - - -/* - * static void - * _nsc_free_dev (nsc_dev_t *dev) - * Free device structure. - * - * Description: - * Decrements the reference count of a previously allocated - * device structure. If this is the last reference it is - * removed from the device chain and free'd once pending - * activity has completed. - * - * Whilst waiting for pending activity to cease the device is - * relinked onto the pending chain. - */ -static void -_nsc_free_dev(dev) -nsc_dev_t *dev; -{ - nsc_dev_t **ddp; - - if (!dev) - return; - - mutex_enter(&_nsc_io_lock); - - if (--dev->nsc_refcnt > 0) { - mutex_exit(&_nsc_io_lock); - return; - } - - for (ddp = &_nsc_dev_top; *ddp; ddp = &(*ddp)->nsc_next) - if (*ddp == dev) { - *ddp = dev->nsc_next; - dev->nsc_next = _nsc_dev_pend; - _nsc_dev_pend = dev; - break; - } - - mutex_exit(&_nsc_io_lock); - - mutex_enter(&dev->nsc_lock); - - while (dev->nsc_pend || dev->nsc_rpend || dev->nsc_wait) { - cv_wait(&dev->nsc_cv, &dev->nsc_lock); - } - - mutex_exit(&dev->nsc_lock); - - mutex_enter(&_nsc_io_lock); - - for (ddp = &_nsc_dev_pend; *ddp; ddp = &(*ddp)->nsc_next) - if (*ddp == dev) { - *ddp = dev->nsc_next; - break; - } - - mutex_exit(&_nsc_io_lock); - - mutex_destroy(&dev->nsc_lock); - cv_destroy(&dev->nsc_cv); - nsc_strfree(dev->nsc_path); - - nsc_kmem_free(dev, sizeof (*dev)); -} - - -/* - * static nsc_io_t * - * _nsc_alloc_io (int id, char *name, int flag) - * Allocate an I/O structure. - * - * Calling/Exit State: - * Returns the address of the I/O structure, or NULL. - */ -static nsc_io_t * -_nsc_alloc_io(id, name, flag) -int id; -char *name; -int flag; -{ - nsc_io_t *io; - - if (!(io = (nsc_io_t *)nsc_kmem_zalloc( - sizeof (*io), KM_NOSLEEP, _nsc_local_mem))) - return (NULL); - - cv_init(&io->cv, NULL, CV_DRIVER, NULL); - - io->id = id; - io->name = name; - io->flag = flag; - - return (io); -} - - -/* - * static void - * _nsc_free_io (int id, char *name, int flag) - * Free an I/O structure. - * - * Calling/Exit State: - * Free the I/O structure and remove it from the chain. - */ -static void -_nsc_free_io(io) -nsc_io_t *io; -{ - nsc_io_t **iop; - - mutex_enter(&_nsc_io_lock); - - for (iop = &_nsc_io_top; *iop; iop = &(*iop)->next) - if (*iop == io) - break; - - if (*iop) - (*iop) = io->next; - - mutex_exit(&_nsc_io_lock); - - cv_destroy(&io->cv); - nsc_kmem_free(io, sizeof (*io)); -} diff --git a/usr/src/uts/common/avs/ns/nsctl/nsc_dev.h b/usr/src/uts/common/avs/ns/nsctl/nsc_dev.h deleted file mode 100644 index 5aee2e66a6..0000000000 --- a/usr/src/uts/common/avs/ns/nsctl/nsc_dev.h +++ /dev/null @@ -1,218 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _NSC_DEV_H -#define _NSC_DEV_H - -#ifdef __cplusplus -extern "C" { -#endif - -#ifndef __NSC_GEN__ -Error: Illegal #include - private file. -#endif - - -#include <sys/nsctl/nsc_gen.h> -#include <sys/nsc_ddi.h> - -/* - * Interface to I/O module. - */ - -typedef struct nsc_io_s { - struct nsc_io_s *next; /* Link to next I/O module */ - kcondvar_t cv; /* Blocking variable */ - int id; /* Module id */ - int flag; /* Flags */ - char *name; /* Module name */ - int refcnt; /* Reference count */ - int abufcnt; /* # of allocated anonymous buffers */ - int pend; /* Unregister pending */ - int (*open)(); /* Open device */ - int (*close)(); /* Close device */ - int (*attach)(); /* Attach device */ - int (*detach)(); /* Detach device */ - int (*flush)(); /* Flush device */ - int (*alloc_buf)(); /* Allocate buffer */ - int (*free_buf)(); /* Free buffer */ - int (*read)(); /* Read buffer */ - int (*write)(); /* Write buffer */ - int (*zero)(); /* Zero buffer */ - int (*copy)(); /* Copy buffer between handles */ - int (*copy_direct)(); /* Copy buffer between handle & disk */ - int (*uncommit)(); /* Uncommit buffer */ - struct nsc_buf_s *(*alloc_h)(); /* Allocate handle */ - int (*free_h)(); /* Free handle */ - int (*uread)(); /* User read */ - int (*uwrite)(); /* User write */ - int (*trksize)(); /* Set track size */ - int (*discard)(); /* Discard pinned data */ - int (*sizes)(); /* Return size of cache */ - int (*getpin)(); /* Get pinned info */ - int (*nodehints)(); /* Return current node hints */ - int (*partsize)(); /* Partition size */ - int (*maxfbas)(); /* Maximum I/O size */ - int (*control)(); /* Module control function */ - long provide; /* Interface provided */ -} nsc_io_t; - - -typedef struct nsc_path_s { - struct nsc_path_s *sp_next; /* Link to next path */ - char *sp_path; /* Pathname */ - int sp_type; /* Open type */ - nsc_io_t *sp_io; /* I/O module */ - int sp_pend; /* Unregister pending */ -} nsc_path_t; - - -/* - * Note: NSC_MAXPATH currently defined here and in nsctl.h - */ -#if !defined(NSC_MAXPATH) -#define NSC_MAXPATH 64 -#endif - - -#define NSC_SETVAL_MAX 32 - -typedef struct nsc_val_s { - struct nsc_val_s *sv_next; /* Link to next value */ - char sv_name[NSC_SETVAL_MAX]; /* Name of value */ - int sv_value; /* Value of name */ -} nsc_val_t; - - -typedef struct nsc_devval_s { - struct nsc_devval_s *dv_next; /* Next dev/val header */ - nsc_val_t *dv_values; /* The values */ - char dv_path[NSC_MAXPATH]; /* Path name of device */ - uint64_t dv_phash; /* Hash of pathname */ -} nsc_devval_t; - - -/* used for ncall */ -typedef struct nsc_rval_s { - char path[NSC_MAXPATH]; /* Path name of dev */ - char name[NSC_SETVAL_MAX]; /* Name of value */ - int value; /* Value of name */ -} nsc_rval_t; - - -extern int _nsc_maxdev; - -#define _NSC_OPEN 0x0004 /* Open in progress */ -#define _NSC_CLOSE 0x0008 /* Close in progress */ -#define _NSC_PINNED 0x0010 /* Pinned data reported */ -#define _NSC_ATTACH 0x0020 /* Available for I/O */ -#define _NSC_DETACH 0x0040 /* Detach in progress */ -#define _NSC_OWNER 0x0080 /* Owner detach in progress */ - - -typedef struct nsc_iodev_s { - struct nsc_iodev_s *si_next; /* Link to next I/O device */ - struct nsc_fd_s *si_open; /* Open file descriptors */ - kmutex_t si_lock; /* Lock to protect I/O chain */ - kcondvar_t si_cv; /* Blocking variable */ - int si_refcnt; /* Reference count */ - int si_busy; /* Callback in progress */ - int si_pend; /* Operation is pending */ - int si_rpend; /* Reserve is pending */ - int si_avail; /* Available for I/O */ - nsc_io_t *si_io; /* Interface to I/O module */ - void *si_active; /* Active I/O chain */ - struct nsc_dev_s *si_dev; /* Device structure */ -} nsc_iodev_t; - - -typedef struct nsc_dev_s { - struct nsc_dev_s *nsc_next; /* Link to next device */ - struct nsc_fd_s *nsc_close; /* Closed file descriptors */ - nsc_iodev_t *nsc_list; /* Active I/O modules */ - char *nsc_path; /* Pathname */ - uint64_t nsc_phash; /* Pathname hash */ - kmutex_t nsc_lock; /* Lock to protect state */ - int nsc_refcnt; /* Reference count */ - kcondvar_t nsc_cv; /* Blocking variable */ - int nsc_wait; /* Count of waiters */ - int nsc_pend; /* Operation is pending */ - int nsc_rpend; /* Reserve is pending */ - int nsc_drop; /* Detach on release */ - int nsc_reopen; /* Doing reopen */ - nsc_devval_t *nsc_values; /* Values - see nsc_setval() */ -} nsc_dev_t; - - -/* - * Storage file descriptor. - */ - -typedef struct nsc_fd_s { - struct nsc_fd_s *sf_next; /* Link to next descriptor */ - nsc_iodev_t *sf_iodev; /* I/O device structure */ - nsc_iodev_t *sf_owner; /* Parent I/O device */ - nsc_dev_t *sf_dev; /* Device structure */ - nsc_io_t *sf_aio; /* Active I/O module */ - int sf_avail; /* Availability for I/O */ - int sf_pend; /* Operation is pending */ - int sf_type; /* Open type */ - int sf_flag; /* Open flags */ - clock_t sf_lbolt; /* Open timestamp */ - int sf_reopen; /* Re-open required */ - blind_t sf_cd; /* Underlying I/O descriptor */ - blind_t sf_arg; /* Argument for callbacks */ - int sf_reserve; /* Device is reserved */ - int sf_mode; /* Type of reserve */ - void (*sf_pinned)(); /* Callback - Data pinned */ - void (*sf_unpinned)(); /* Callback - Data unpinned */ - int (*sf_attach)(); /* Callback - Attach */ - int (*sf_detach)(); /* Callback - Detach */ - int (*sf_flush)(); /* Callback - Flush */ -} nsc_fd_t; - - -/* - * External definitions. - */ - -extern nsc_io_t *_nsc_null_io; - -#ifdef _KERNEL -extern int _nsc_open_fd(nsc_fd_t *, int); -extern int _nsc_close_fd(nsc_fd_t *, int); -extern int _nsc_detach_fd(nsc_fd_t *, int); -extern int _nsc_detach_iodev(nsc_iodev_t *, nsc_fd_t *, int); -extern int _nsc_detach_dev(nsc_dev_t *, nsc_iodev_t *, int); -extern int _nsc_call_io(long, blind_t, blind_t, blind_t); -extern int _nsc_wait_dev(nsc_dev_t *, int); -extern void _nsc_wake_dev(nsc_dev_t *, int *); -#endif /* _KERNEL */ - -#ifdef __cplusplus -} -#endif - -#endif /* _NSC_DEV_H */ diff --git a/usr/src/uts/common/avs/ns/nsctl/nsc_disk.c b/usr/src/uts/common/avs/ns/nsctl/nsc_disk.c deleted file mode 100644 index 5c514f8331..0000000000 --- a/usr/src/uts/common/avs/ns/nsctl/nsc_disk.c +++ /dev/null @@ -1,554 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#include <sys/types.h> -#include <sys/ksynch.h> -#include <sys/errno.h> -#include <sys/file.h> -#include <sys/open.h> -#include <sys/cred.h> -#include <sys/kmem.h> -#include <sys/uio.h> -#include <sys/ddi.h> -#include <sys/sdt.h> - -#define __NSC_GEN__ -#include "nsc_dev.h" -#include "nsc_disk.h" -#include "../nsctl.h" - - -#define _I(x) (((long)(&((nsc_io_t *)0)->x))/sizeof (long)) - -nsc_def_t _nsc_disk_def[] = { - "UserRead", (uintptr_t)nsc_ioerr, _I(uread), - "UserWrite", (uintptr_t)nsc_ioerr, _I(uwrite), - "PartSize", (uintptr_t)nsc_null, _I(partsize), - "MaxFbas", (uintptr_t)nsc_null, _I(maxfbas), - "Control", (uintptr_t)nsc_ioerr, _I(control), - 0, 0, 0 -}; - - -extern nsc_mem_t *_nsc_local_mem; - -static int _nsc_uread(dev_t, uio_t *, cred_t *, nsc_fd_t *); -static int _nsc_uwrite(dev_t, uio_t *, cred_t *, nsc_fd_t *); -static int _nsc_rw_uio(nsc_fd_t *, uio_t *, uio_rw_t); - -static int _nsc_free_dhandle(nsc_dbuf_t *); -static int _nsc_alloc_dbuf(blind_t, nsc_off_t, nsc_size_t, int, nsc_dbuf_t **); -static int _nsc_free_dbuf(nsc_dbuf_t *); -static void _nsc_wait_dbuf(nsc_dbuf_t *); -static int _nsc_read_dbuf(nsc_dbuf_t *, nsc_off_t, nsc_size_t, int); -static int _nsc_write_dbuf(nsc_dbuf_t *, nsc_off_t, nsc_size_t, int); -static int _nsc_zero_dbuf(nsc_dbuf_t *, nsc_off_t, nsc_size_t, int); -static int _nsc_dbuf_io(int (*)(), nsc_dbuf_t *, nsc_off_t, nsc_size_t, int); - -static nsc_dbuf_t *_nsc_alloc_dhandle(void (*)(), void (*)(), void (*)()); - - -/* - * void - * _nsc_add_disk (nsc_io_t *io) - * Add disk interface functions. - * - * Calling/Exit State: - * Updates the I/O module with the appropriate - * interface routines. - * - * Description: - * Add functions to the I/O module to provide a disk - * or cache interface as appropriate. - */ -void -_nsc_add_disk(nsc_io_t *io) -{ - if ((io->alloc_buf != nsc_ioerr && io->free_buf != nsc_fatal) || - (io->flag & NSC_FILTER)) { - if (io->uread == nsc_ioerr) - io->uread = _nsc_uread; - - if (io->uwrite == nsc_ioerr && - (io->write != nsc_fatal || (io->flag & NSC_FILTER))) - io->uwrite = _nsc_uwrite; - - return; - } - - if (io->alloc_h != (nsc_buf_t *(*)())nsc_null || - io->free_h != nsc_fatal || io->alloc_buf != nsc_ioerr || - io->free_buf != nsc_fatal || io->read != nsc_fatal || - io->write != nsc_fatal || io->zero != nsc_fatal) - return; - - if (io->uread == nsc_ioerr && io->uwrite == nsc_ioerr) - return; - - /* - * Layer the generic nsc_buf_t provider onto a uio_t provider. - */ - - io->alloc_h = (nsc_buf_t *(*)())_nsc_alloc_dhandle; - io->free_h = _nsc_free_dhandle; - io->alloc_buf = _nsc_alloc_dbuf; - io->free_buf = _nsc_free_dbuf; - - io->read = _nsc_read_dbuf; - io->write = _nsc_write_dbuf; - io->zero = _nsc_zero_dbuf; - - io->provide |= NSC_ANON; -} - - -int -nsc_uread(nsc_fd_t *fd, void *uiop, void *crp) -{ - return (*fd->sf_aio->uread)(fd->sf_cd, uiop, crp, fd); -} - - -int -nsc_uwrite(nsc_fd_t *fd, void *uiop, void *crp) -{ - if ((fd->sf_avail & NSC_WRITE) == 0) - return (EIO); - - return (*fd->sf_aio->uwrite)(fd->sf_cd, uiop, crp, fd); -} - - -int -nsc_partsize(nsc_fd_t *fd, nsc_size_t *valp) -{ - *valp = 0; - return (*fd->sf_aio->partsize)(fd->sf_cd, valp); -} - - -int -nsc_maxfbas(nsc_fd_t *fd, int flag, nsc_size_t *valp) -{ - *valp = 0; - return (*fd->sf_aio->maxfbas)(fd->sf_cd, flag, valp); -} - -int -nsc_control(nsc_fd_t *fd, int command, void *argp, int argl) -{ - return (*fd->sf_aio->control)(fd->sf_cd, command, argp, argl); -} - - -/* ARGSUSED */ - -static int -_nsc_uread(dev_t dev, uio_t *uiop, cred_t *crp, nsc_fd_t *fd) -{ - return (_nsc_rw_uio(fd, uiop, UIO_READ)); -} - - -/* ARGSUSED */ - -static int -_nsc_uwrite(dev_t dev, uio_t *uiop, cred_t *crp, nsc_fd_t *fd) -{ - return (_nsc_rw_uio(fd, uiop, UIO_WRITE)); -} - - -static int -_nsc_rw_uio(nsc_fd_t *fd, uio_t *uiop, uio_rw_t rw) -{ - nsc_size_t buflen, len, limit, chunk; - nsc_off_t pos, off; - nsc_buf_t *buf; - nsc_vec_t *vec; - size_t n; - int rc; - - pos = FPOS_TO_FBA(uiop); - off = FPOS_TO_OFF(uiop); - len = FBA_LEN(uiop->uio_resid + off); - - DTRACE_PROBE3(_nsc_rw_uio_io, - uint64_t, pos, - uint64_t, off, - uint64_t, len); - - /* prevent non-FBA bounded I/O - this is a disk driver! */ - if (off != 0 || FBA_OFF(uiop->uio_resid) != 0) - return (EINVAL); - - if ((rc = nsc_partsize(fd, &limit)) != 0) - return (rc); - - if ((rc = nsc_maxfbas(fd, 0, &chunk)) != 0) - return (rc); - - DTRACE_PROBE2(_nsc_rw_uio_limit, - uint64_t, limit, - uint64_t, chunk); - - if (limit && pos >= limit) { - if (pos > limit || rw == UIO_WRITE) - return (ENXIO); - return (0); - } - - if (limit && pos + len > limit) - len = limit - pos; - - while (len > 0) { - buflen = min(len, chunk); - - buf = NULL; /* always use a temporary buffer */ - if ((rc = nsc_alloc_buf(fd, pos, buflen, - (rw == UIO_READ) ? NSC_RDBUF : NSC_WRBUF, &buf)) > 0) - return (rc); - - vec = buf->sb_vec; - - for (rc = 0; - !rc && uiop->uio_resid && vec->sv_addr; - vec++, off = 0) { - n = min(vec->sv_len - off, uiop->uio_resid); - rc = uiomove((char *)vec->sv_addr + off, - n, rw, uiop); - } - - if (rw == UIO_WRITE) { - if (rc) { - (void) nsc_uncommit(buf, pos, buflen, 0); - } else if ((rc = nsc_write(buf, pos, buflen, 0)) < 0) { - rc = 0; - } - } - - (void) nsc_free_buf(buf); - - len -= buflen; - pos += buflen; - } - - return (rc); -} - - -/* ARGSUSED */ - -static nsc_dbuf_t * -_nsc_alloc_dhandle(void (*d_cb)(), void (*r_cb)(), void (*w_cb)()) -{ - nsc_dbuf_t *h; - - if ((h = nsc_kmem_zalloc(sizeof (nsc_dbuf_t), - KM_SLEEP, _nsc_local_mem)) == NULL) - return (NULL); - - h->db_disc = d_cb; - h->db_flag = NSC_HALLOCATED; - - return (h); -} - - -static int -_nsc_free_dhandle(nsc_dbuf_t *h) -{ - nsc_kmem_free(h, sizeof (*h)); - return (0); -} - - -static int -_nsc_alloc_dbuf(blind_t cd, nsc_off_t pos, nsc_size_t len, - int flag, nsc_dbuf_t **hp) -{ - nsc_dbuf_t *h = *hp; - int rc; - - if (cd == NSC_ANON_CD) { - flag &= ~(NSC_READ | NSC_WRITE | NSC_RDAHEAD); - } else { - if (h->db_maxfbas == 0) { - rc = nsc_maxfbas(h->db_fd, 0, &h->db_maxfbas); - if (rc != 0) - return (rc); - else if (h->db_maxfbas == 0) - return (EIO); - } - - if (len > h->db_maxfbas) - return (ENOSPC); - } - - if (flag & NSC_NODATA) { - ASSERT(!(flag & NSC_RDBUF)); - h->db_addr = NULL; - } else { - if (h->db_disc) - (*h->db_disc)(h); - - if (!(h->db_addr = nsc_kmem_alloc(FBA_SIZE(len), KM_SLEEP, 0))) - return (ENOMEM); - } - - h->db_pos = pos; - h->db_len = len; - h->db_error = 0; - h->db_flag |= flag; - - if (flag & NSC_NODATA) { - h->db_vec = NULL; - } else { - h->db_vec = &h->db_bvec[0]; - h->db_bvec[0].sv_len = FBA_SIZE(len); - h->db_bvec[0].sv_addr = (void *)h->db_addr; - h->db_bvec[0].sv_vme = 0; - - h->db_bvec[1].sv_len = 0; - h->db_bvec[1].sv_addr = 0; - h->db_bvec[1].sv_vme = 0; - } - - if ((flag & NSC_RDAHEAD) || (cd == NSC_ANON_CD)) - return (NSC_DONE); - - _nsc_wait_dbuf(h); - - if (flag & NSC_RDBUF) { - if ((rc = _nsc_dbuf_io(nsc_uread, h, pos, len, flag)) != 0) { - (void) _nsc_free_dbuf(h); - return (rc); - } - } - - return (NSC_DONE); -} - - -static void -_nsc_wait_dbuf(nsc_dbuf_t *h) -{ - nsc_iodev_t *iodev = h->db_fd->sf_iodev; - void (*fn)() = h->db_disc; - nsc_dbuf_t *hp; - - mutex_enter(&iodev->si_lock); - - h->db_next = iodev->si_active; - iodev->si_active = h; - - /* CONSTCOND */ - - while (1) { - for (hp = h->db_next; hp; hp = hp->db_next) - if (h->db_pos + h->db_len > hp->db_pos && - h->db_pos < hp->db_pos + hp->db_len) break; - - if (!hp) - break; - - if (fn) - (*fn)(h), fn = NULL; - - cv_wait(&iodev->si_cv, &iodev->si_lock); - } - - mutex_exit(&iodev->si_lock); -} - - -static int -_nsc_free_dbuf(nsc_dbuf_t *h) -{ - nsc_dbuf_t **hpp, *hp; - nsc_iodev_t *iodev; - int wake = 0; - - if (h->db_fd && !(h->db_flag & NSC_ABUF)) { - iodev = h->db_fd->sf_iodev; - - mutex_enter(&iodev->si_lock); - - hpp = (nsc_dbuf_t **)&iodev->si_active; - - for (; *hpp; hpp = &hp->db_next) { - if ((hp = *hpp) == h) { - *hpp = h->db_next; - break; - } - - if (h->db_pos + h->db_len > hp->db_pos && - h->db_pos < hp->db_pos + hp->db_len) wake = 1; - - } - if (wake) - cv_broadcast(&iodev->si_cv); - - mutex_exit(&iodev->si_lock); - } - - if (!(h->db_flag & NSC_NODATA) && h->db_addr) - nsc_kmem_free(h->db_addr, FBA_SIZE(h->db_len)); - - h->db_addr = NULL; - h->db_flag &= NSC_HALLOCATED; /* clear flags, preserve NSC_HALLOCATED */ - - if ((h->db_flag & NSC_HALLOCATED) == 0) - (void) _nsc_free_dhandle(h); - - - return (0); -} - - -static int -_nsc_read_dbuf(nsc_dbuf_t *h, nsc_off_t pos, nsc_size_t len, int flag) -{ - return (_nsc_dbuf_io(nsc_uread, h, pos, len, flag)); -} - - -static int -_nsc_write_dbuf(nsc_dbuf_t *h, nsc_off_t pos, nsc_size_t len, int flag) -{ - return (_nsc_dbuf_io(nsc_uwrite, h, pos, len, flag)); -} - - -static int -_nsc_zero_dbuf(nsc_dbuf_t *h, nsc_off_t pos, nsc_size_t len, int flag) -{ - return (_nsc_dbuf_io(NULL, h, pos, len, flag)); -} - - -static int -_nsc_dbuf_io(int (*fn)(), nsc_dbuf_t *h, nsc_off_t pos, - nsc_size_t len, int flag) -{ - nsc_vec_t *vp = NULL; - cred_t *crp = NULL; - iovec_t *iovp; - nsc_size_t thisio; /* bytes in this io */ - nsc_size_t todo; /* anticipated bytes to go */ - nsc_size_t truedo; /* actual bytes to go */ - nsc_off_t xpos; /* offset of this io */ - int destidx; - nsc_size_t firstentryfix; /* value used for first entry */ - - int (*iofn)(); - int rc = 0; - - if (!h->db_vec || (h->db_flag & NSC_ABUF)) - return (EIO); - - if (pos < h->db_pos || pos + len > h->db_pos + h->db_len) - return (EINVAL); - - if (!len) - return (0); - if (fn == nsc_uread && (flag & NSC_RDAHEAD)) - return (0); - - if (h->db_disc) - (*h->db_disc)(h); - - crp = ddi_get_cred(); - bzero(&h->db_uio, sizeof (uio_t)); - bzero(&h->db_iov[0], (_NSC_DBUF_NVEC * sizeof (iovec_t))); - - todo = FBA_SIZE(len); - - /* - * determine where in the vector array we should start. - */ - vp = h->db_vec; - xpos = pos - h->db_pos; - for (; xpos >= FBA_NUM(vp->sv_len); vp++) - xpos -= FBA_NUM(vp->sv_len); - - firstentryfix = FBA_SIZE(xpos); - - xpos = pos; - - /* - * Loop performing i/o to the underlying driver. - */ - while (todo) { - destidx = 0; - thisio = 0; - iofn = fn; - - /* - * Copy up to _NSC_DBUF_NVEC vector entries from the - * nsc_vec_t into the iovec_t so that the number of - * i/o operations is minimised. - */ - while (destidx < _NSC_DBUF_NVEC && todo) { - iovp = &h->db_iov[destidx]; - - ASSERT(FBA_LEN(vp->sv_len) == FBA_NUM(vp->sv_len)); - ASSERT((vp->sv_len - firstentryfix) && vp->sv_addr); - - truedo = min(vp->sv_len - firstentryfix, todo); - iovp->iov_base = (caddr_t)vp->sv_addr + firstentryfix; - firstentryfix = 0; - iovp->iov_len = (size_t)truedo; - if (!iofn) { - bzero(iovp->iov_base, iovp->iov_len); - } - thisio += truedo; - todo -= truedo; - destidx++; - vp++; - } - - h->db_uio.uio_iovcnt = destidx; - h->db_uio.uio_iov = &h->db_iov[0]; - h->db_uio.uio_segflg = UIO_SYSSPACE; - h->db_uio.uio_resid = (size_t)thisio; - - SET_FPOS(&h->db_uio, xpos); - - if (!iofn) { - iofn = nsc_uwrite; - } - - rc = (*iofn)(h->db_fd, &h->db_uio, crp); - if (rc != 0) { - break; - } - - ASSERT(FBA_LEN(thisio) == FBA_NUM(thisio)); - xpos += FBA_LEN(thisio); - } - - return (rc); -} diff --git a/usr/src/uts/common/avs/ns/nsctl/nsc_disk.h b/usr/src/uts/common/avs/ns/nsctl/nsc_disk.h deleted file mode 100644 index f7d52260df..0000000000 --- a/usr/src/uts/common/avs/ns/nsctl/nsc_disk.h +++ /dev/null @@ -1,83 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _NSC_DISK_H -#define _NSC_DISK_H - -#ifdef __cplusplus -extern "C" { -#endif - -#ifndef __NSC_GEN__ -Error: Illegal #include - private file. -#endif - -#include <sys/types.h> -#include <sys/file.h> -#include <sys/uio.h> - -#include <sys/nsctl/nsc_dev.h> -#include <sys/nsctl/nsctl.h> - -#define _NSC_DBUF_NVEC 5 - -/* - * Buffer structure for disk I/O. - */ - -typedef struct nsc_dbuf_s { - nsc_buf_t db_buf; /* Generic buffer header */ - void (*db_disc)(); /* Disconnect callback */ - uio_t db_uio; /* Scatter/gather list */ - iovec_t db_iov[_NSC_DBUF_NVEC]; /* Data transfer address */ - char *db_addr; /* Address of data buffer */ - nsc_vec_t db_bvec[2]; /* Pointers to data */ - struct nsc_dbuf_s *db_next; /* Link to next buffer */ - nsc_size_t db_maxfbas; /* Maxfbas value for the device */ -} nsc_dbuf_t; - - -#define db_fd db_buf.sb_fd -#define db_pos db_buf.sb_pos -#define db_len db_buf.sb_len -#define db_flag db_buf.sb_flag -#define db_error db_buf.sb_error -#define db_vec db_buf.sb_vec - - -/* - * Sector Mode definitions. - */ - -#define FPOS_TO_FBA(u) ((nsc_off_t)(FBA_NUM((u)->uio_loffset))) -#define FPOS_TO_OFF(u) ((nsc_off_t)(FBA_OFF((u)->uio_loffset))) -#define SET_FPOS(u, f) ((u)->uio_loffset = (offset_t)FBA_SIZE((offset_t)f)) - - -#ifdef __cplusplus -} -#endif - -#endif /* _NSC_DISK_H */ diff --git a/usr/src/uts/common/avs/ns/nsctl/nsc_freeze.c b/usr/src/uts/common/avs/ns/nsctl/nsc_freeze.c deleted file mode 100644 index b86fc9e65d..0000000000 --- a/usr/src/uts/common/avs/ns/nsctl/nsc_freeze.c +++ /dev/null @@ -1,310 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#include <sys/types.h> -#include <sys/errno.h> -#include <sys/cmn_err.h> -#include <sys/ksynch.h> -#include <sys/kmem.h> -#include <sys/ddi.h> - -#define __NSC_GEN__ -#include "nsc_dev.h" -#include "../nsctl.h" - -/* - * (Un)Freeze Module - * - * This module provides a means to 'freeze' a device and ensure - * that no SP software has an open reference to that device. Later - * the device can be 'unfrozen' and the SP software can resume - * normal operations. - * - * This module is required because it is possible to place a virtual - * volume driver (RAID-0, 1 or 5) into a state whereby it needs to be - * disabled for corrective action. The (un)freeze facility provides a - * method of doing this without downtime. - * - * A device that is frozen should be frozen on all nodes. It is the - * responsibility of the management software or the user to perform - * the freeze and unfreeze on the required nodes. - */ - -extern nsc_mem_t *_nsc_local_mem; - -typedef struct _nsc_frz_s { - struct _nsc_frz_s *next; - nsc_path_t *token; - char path[NSC_MAXPATH]; -} _nsc_frz_t; - - -extern int _nsc_frz_stop(char *, int *); /* forward decl */ - -static _nsc_frz_t *_nsc_frz_top; -static nsc_def_t _nsc_frz_def[]; -static kmutex_t _nsc_frz_sleep; -static nsc_io_t *_nsc_frz_io; - - -void -_nsc_init_frz(void) -{ - mutex_init(&_nsc_frz_sleep, NULL, MUTEX_DRIVER, NULL); - - _nsc_frz_io = nsc_register_io("frz", - NSC_FREEZE_ID | NSC_FILTER, _nsc_frz_def); - - if (!_nsc_frz_io) - cmn_err(CE_WARN, "nsctl: _nsc_init_frz: register failed"); -} - - -void -_nsc_deinit_frz(void) -{ - if (_nsc_frz_io) - (void) nsc_unregister_io(_nsc_frz_io, 0); - - _nsc_frz_io = NULL; - - mutex_destroy(&_nsc_frz_sleep); -} - - -/* - * int _nsc_frz_start(char *path, int *rvp) - * Freeze a device - * - * Calling/Exit State: - * Must be called from a context that can block. - * Returns 0 for success, or one of the following error codes: - * EINVAL - invalid 'path' argument - * ENOMEM - failed to allocate memory - * EALREADY - 'path' is already frozen - * - * Description: - * Registers 'path' to be accessed through the NSC_FREEZE_ID - * io module, and forces any open file descriptors for 'path' - * to be re-opened as appropriate. - */ -int -_nsc_frz_start(path, rvp) -char *path; -int *rvp; -{ - _nsc_frz_t *frz, *xfrz; - int rc; - - *rvp = 0; - - if (strlen(path) >= NSC_MAXPATH) - return (EINVAL); - - frz = nsc_kmem_zalloc(sizeof (*frz), KM_SLEEP, _nsc_local_mem); - if (!frz) - return (ENOMEM); - - (void) strcpy(frz->path, path); - - mutex_enter(&_nsc_frz_sleep); - - for (xfrz = _nsc_frz_top; xfrz; xfrz = xfrz->next) - if (strcmp(frz->path, xfrz->path) == 0) - break; - - if (!xfrz) { - frz->next = _nsc_frz_top; - _nsc_frz_top = frz; - } - - mutex_exit(&_nsc_frz_sleep); - - if (xfrz) { - nsc_kmem_free(frz, sizeof (*frz)); - return (EALREADY); - } - - frz->token = nsc_register_path(path, NSC_DEVICE, _nsc_frz_io); - - if (!frz->token) { - (void) _nsc_frz_stop(path, &rc); - return (EINVAL); - } - - return (0); -} - - -/* - * int _nsc_frz_stop(char *path, int *rvp) - * Unfreeze a device - * - * Calling/Exit State: - * Must be called from a context that can block. - * Returns 0 or an error code. - * - * Description: - * Removes the path registration for the NSC_FREEZE_ID io module - * and forces any re-opens as appropriate. - */ -int -_nsc_frz_stop(path, rvp) -char *path; -int *rvp; -{ - _nsc_frz_t **xfrz, *frz = NULL; - int rc = 0; - - *rvp = 0; - - mutex_enter(&_nsc_frz_sleep); - - for (xfrz = &_nsc_frz_top; *xfrz; xfrz = &(*xfrz)->next) - if (strcmp(path, (*xfrz)->path) == 0) { - frz = *xfrz; - break; - } - - if (!frz) { - mutex_exit(&_nsc_frz_sleep); - return (EINVAL); - } - - if (frz->token) - rc = nsc_unregister_path(frz->token, NSC_PCATCH); - - if (rc) { - mutex_exit(&_nsc_frz_sleep); - return (rc); - } - - (*xfrz) = frz->next; - - mutex_exit(&_nsc_frz_sleep); - - nsc_kmem_free(frz, sizeof (*frz)); - - return (0); -} - - -/* - * int _nsc_frz_isfrozen(char *path, int *rvp) - * Tests whether a device is frozen. - * - * Calling/Exit State: - * Returns 0 or EINVAL. - * Sets *rvp to 1 if the device was not frozen, and 0 otherwise. - * This function returns historical information. - */ -int -_nsc_frz_isfrozen(path, rvp) -char *path; -int *rvp; -{ - _nsc_frz_t *frz; - - *rvp = 1; - - if (! _nsc_frz_io) - return (EINVAL); - - mutex_enter(&_nsc_frz_sleep); - - for (frz = _nsc_frz_top; frz; frz = frz->next) - if (strcmp(frz->path, path) == 0) { - *rvp = 0; - break; - } - - mutex_exit(&_nsc_frz_sleep); - - return (0); -} - - -/* - * static int - * _nsc_frz_open(char *path, int flag, blind_t *cdp) - * Dummy open function. - * - * Description: - * This is the "Open" function for the I/O module. - * It is just a dummy. - */ - -/* ARGSUSED */ - -static int -_nsc_frz_open(path, flag, cdp) -char *path; -int flag; -blind_t *cdp; -{ - *cdp = 0; - return (0); -} - - -/* - * static int - * _nsc_frz_close() - * Dummy close function. - * - * Description: - * This is the "Close" function for the I/O module. - * It is just a dummy. - */ -static int -_nsc_frz_close() { return (0); } - - -/* - * static int - * _nsc_frz_attach() - * Attach a device to this i/o module. - * - * Calling/Exit State: - * Returns EACCES in all cricumstances. - * - * Description: - * This function is called by the nsctl module when it wishes - * to attach the device to this I/O module (ie. as part of - * nsc_reserve() processing). This function unconditionally - * returns an error which forces the nsc_reserve() to fail, and - * so no access to possible to the underlying device. - */ -static int -_nsc_frz_attach() { return (EACCES); } - - -static nsc_def_t _nsc_frz_def[] = { - "Open", (uintptr_t)_nsc_frz_open, 0, - "Close", (uintptr_t)_nsc_frz_close, 0, - "Attach", (uintptr_t)_nsc_frz_attach, 0, - "Provide", 0, 0, - 0, 0, 0 -}; diff --git a/usr/src/uts/common/avs/ns/nsctl/nsc_gen.c b/usr/src/uts/common/avs/ns/nsctl/nsc_gen.c deleted file mode 100644 index 77a6165fcb..0000000000 --- a/usr/src/uts/common/avs/ns/nsctl/nsc_gen.c +++ /dev/null @@ -1,1106 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#include <sys/types.h> -#include <sys/cmn_err.h> -#include <sys/errno.h> -#include <sys/ksynch.h> -#include <sys/kmem.h> -#include <sys/ddi.h> -#include <sys/varargs.h> -#if defined(DEBUG) && !defined(DS_DDICT) -#include <sys/kobj.h> -#endif - -#include <sys/ncall/ncall.h> - -#define __NSC_GEN__ -#include "nsc_gen.h" -#include "nsc_mem.h" -#include "../nsctl.h" -#ifdef DS_DDICT -#include "../contract.h" -#endif - - -static kcondvar_t _nsc_delay_cv; -static kmutex_t _nsc_delay_mutex; - -static nsc_service_t *_nsc_services; -static kmutex_t _nsc_svc_mutex; - -static int _nsc_rmmap_inuse(nsc_rmmap_t *, ulong_t *, size_t *); - -static void _nsc_sprint_dec(char **, int, int, int); -static void _nsc_sprint_hex(char **, unsigned int, int, int, int, int); - -clock_t HZ; - -extern nsc_rmhdr_t *_nsc_rmhdr_ptr; - -void -_nsc_init_gen() -{ - HZ = drv_usectohz(1000000); -} - - -void -nsc_decode_param(nsc_def_t *args, nsc_def_t *def, long *v) -{ - nsc_def_t *dp; - - for (; def && def->name; def++) { - for (dp = args; dp && dp->name; dp++) { - if (strcmp(dp->name, def->name) == 0) { - v[def->offset] = dp->value; - break; - } - } - - if ((!dp || !dp->name) && !v[def->offset]) - v[def->offset] = def->value; - } -} - - -clock_t -nsc_lbolt() -{ -#ifdef _SunOS_5_6 - clock_t lbolt; - time_t time; - - if (drv_getparm(LBOLT, &lbolt) == 0) - return (lbolt); - - if (drv_getparm(TIME, &time) != 0) - return ((clock_t)0); - - time %= (60 * 60 * 24 * 365); - - return (clock_t)(time * HZ); -#else - return (ddi_get_lbolt()); -#endif -} - - -time_t -nsc_time() -{ - time_t time; - - if (drv_getparm(TIME, &time) != 0) - return ((time_t)0); - - return (time); -} - - -int -nsc_node_up(int node) -{ - return (node == ncall_self()); -} - - - -/* - * HACK increment nodeid in data parameter - */ -int -nsc_nodeid_data() -{ - int data; - return ((data = nsc_node_id()) == 0 ? 1 : data); -} - - -int -nsc_node_id(void) -{ - return (ncall_self()); -} - -char * -nsc_node_name() -{ - return (ncall_nodename(ncall_self())); -} - - -/* - * int - * _nsc_rmmap_init (nsc_rmmap_t *map, char *name, int nslot, - * size_t size, ulong_t offset) - * Initialise a global resource map. - * - * Calling/Exit State: - * Returns TRUE if the map was successfully created. Otherwise - * returns FALSE. - * - * Description: - * Initialises a global resource map. If the map already exists - * the arguments are validated against it. - */ -int -_nsc_rmmap_init(nsc_rmmap_t *map, char *name, - int nslot, size_t size, ulong_t offset) -{ - nsc_rmmap_t *nvmap = NULL; - - if (!size) - return (0); - - mutex_enter(&_nsc_global_lock); - - if (_nsc_rm_nvmem_base) - nvmap = _nsc_global_nvmemmap_lookup(map); - - if (!map->size) - map->size = size; - if (!map->inuse) - map->inuse = nslot; - if (!map->offset) - map->offset = offset; - - if (!map->name[0]) - (void) strncpy(map->name, name, _NSC_MAXNAME); - - /* actually we only need to do this if an update occurred above */ - if (nvmap) { - (void) nsc_commit_mem(map, nvmap, - sizeof (nsc_rmmap_t), nsc_cm_errhdlr); - } - - if (strncmp(map->name, name, _NSC_MAXNAME) || - (uint32_t)size != map->size || (int32_t)offset != map->offset) { - mutex_exit(&_nsc_global_lock); - return (0); - } - - mutex_exit(&_nsc_global_lock); - return (1); -} - - -/* - * ulong_t - * _nsc_rmmap_alloc (nsc_rmmap_t *map, char *name, - * size_t size, void (*alloc)()) - * Allocate entry in a global resource map. - * - * Calling/Exit State: - * On success, returns the base of the allocated area. Otherwise, - * returns NULL. The function 'alloc' will be called if the - * allocated area is not currently in use. - * - * Description: - * Allocates an entry in the global resource map. If the entry - * already exists but is a different size an error is returned. - */ -ulong_t -_nsc_rmmap_alloc(nsc_rmmap_t *map, char *name, size_t size, void (*alloc)()) -{ - int i, nslot = map[0].inuse; - size_t want = size; - ulong_t offset; - nsc_rmmap_t *nvmap = NULL; - - if (!size) - return (0); - - mutex_enter(&_nsc_global_lock); - if (_nsc_rm_nvmem_base) - nvmap = _nsc_global_nvmemmap_lookup(map); - - for (i = 1; i < nslot; i++) { - if (!map[i].inuse || !map[i].size) - continue; - if (strncmp(map[i].name, name, _NSC_MAXNAME)) - continue; - if ((uint32_t)size == map[i].size) { - map[i].inuse |= (1 << nsc_node_id()); - if (nvmap) { - (void) nsc_commit_mem(&map[i], &nvmap[i], - sizeof (nsc_rmmap_t), nsc_cm_errhdlr); - } - mutex_exit(&_nsc_global_lock); - return (map[i].offset); - } - - mutex_exit(&_nsc_global_lock); - return (0); - } - - offset = map[0].offset; - - while ((int32_t)offset < (map[0].offset + map[0].size)) { - if (_nsc_rmmap_inuse(map, &offset, &want)) - continue; - - if (size > want) { - offset += want; - want = size; - continue; - } - - for (i = 1; i < nslot; i++) - if (!map[i].inuse || !map[i].size) - break; - - if (i == nslot) - break; - - bzero(&map[i], sizeof (map[i])); - (void) strncpy(map[i].name, name, _NSC_MAXNAME); - - map[i].size = size; - map[i].offset = offset; - map[i].inuse = (1 << nsc_node_id()); - if (nvmap) { /* update the map and hdr dirty bit. */ - (void) nsc_commit_mem(&map[i], &nvmap[i], - sizeof (nsc_rmmap_t), nsc_cm_errhdlr); - } - - if (alloc) - (*alloc)(offset, size); - - mutex_exit(&_nsc_global_lock); - return (offset); - } - - mutex_exit(&_nsc_global_lock); - return (0); -} - - -/* - * void - * _nsc_rmmap_free (nsc_rmmap_t *map, char *name) - * Free entry in a global resource map. - * - * Description: - * Frees an entry in the global resource map. - */ -void -_nsc_rmmap_free(nsc_rmmap_t *map, char *name, nsc_mem_t *mp) -{ - int i, nslot = map[0].inuse; - nsc_rmmap_t *nvmap = NULL; - - mutex_enter(&_nsc_global_lock); - if (_nsc_rm_nvmem_base) - nvmap = _nsc_global_nvmemmap_lookup(map); - - for (i = 1; i < nslot; i++) { - if (!map[i].inuse || !map[i].size) - continue; - if (strncmp(map[i].name, name, _NSC_MAXNAME)) - continue; - - map[i].inuse &= ~(1 << nsc_node_id()); - if (nvmap) { - /* - * if dirty, set the inuse bit so this area - * will not be _nsc_global_zero'd on restart. - */ - if (mp && (mp->type & NSC_MEM_NVDIRTY)) { - map[i].inuse |= (1 << nsc_node_id()); - } - - (void) nsc_commit_mem(&map[i], &nvmap[i], - sizeof (nsc_rmmap_t), nsc_cm_errhdlr); - } - mutex_exit(&_nsc_global_lock); - return; - } - - mutex_exit(&_nsc_global_lock); - - cmn_err(CE_WARN, "!nsctl: _nsc_rmmap_free: invalid free"); -} - - -/* - * size_t - * _nsc_rmmap_size (nsc_rmmap_t *map, char *name) - * Find size of area in map. - * - * Calling/Exit State: - * Returns the size of the specified area in the map, - * or 0 if it is currently unallocated. - */ -size_t -_nsc_rmmap_size(nsc_rmmap_t *map, char *name) -{ - int i, nslot = map[0].inuse; - size_t size = 0; - - mutex_enter(&_nsc_global_lock); - - for (i = 1; i < nslot; i++) { - if (!map[i].inuse || !map[i].size) - continue; - - if (strncmp(map[i].name, name, _NSC_MAXNAME) == 0) { - size = map[i].size; - break; - } - } - - mutex_exit(&_nsc_global_lock); - return (size); -} - - -/* - * size_t - * _nsc_rmmap_avail (nsc_rmmap_t *map) - * Find available space in global resource map. - * - * Calling/Exit State: - * Returns the size of the largest available area in - * the global resource map. - */ -size_t -_nsc_rmmap_avail(nsc_rmmap_t *map) -{ - size_t size, avail = 0; - ulong_t offset; - - mutex_enter(&_nsc_global_lock); - - size = 1; - offset = map[0].offset; - - while ((int32_t)offset < (map[0].offset + map[0].size)) - if (!_nsc_rmmap_inuse(map, &offset, &size)) { - if (size > avail) - avail = size; - offset += size; - size = 1; - } - - mutex_exit(&_nsc_global_lock); - return (avail); -} - - -/* - * static int - * _nsc_rmmap_inuse (nsc_rmmap_t *map, ulong_t *offsetp, size_t *sizep) - * Check if a section of the map is in use. - * - * Calling/Exit State: - * The global lock must be held across calls to the function. - * - * Returns TRUE if the specified area is currently in use and - * updates offset to point just past the section that was found - * to be in use. - * - * Otherwise, returns FALSE and updates size to reflect the - * amount of free space at the specified offset. - * - * Description: - * Checks the specified global map to determine if any part - * of the area is in use. - */ -static int -_nsc_rmmap_inuse(nsc_rmmap_t *map, ulong_t *offsetp, size_t *sizep) -{ - size_t avail, size = (*sizep); - ulong_t offset = (*offsetp); - int i, nslot; - - nslot = map[0].inuse; - avail = map[0].offset + map[0].size - offset; - - for (i = 1; i < nslot; i++) { - if (!map[i].size || !map[i].inuse) - continue; - if ((int32_t)(offset + size) > map[i].offset && - (int32_t)offset < (map[i].offset + map[i].size)) { - (*offsetp) = map[i].offset + map[i].size; - return (1); - } - - if (map[i].offset >= (int32_t)offset) - if (avail > map[i].offset - offset) - avail = map[i].offset - offset; - } - - (*sizep) = avail; - return (0); -} - -/* - * int - * nsc_delay_sig (clock_t tics) - * Delay for a number of clock ticks. - * - * Calling/Exit State: - * Returns FALSE if the delay was interrupted by a - * signal, TRUE otherwise. - * - * Description: - * Delays execution for the specified number of ticks - * or until a signal is received. - */ -int -nsc_delay_sig(clock_t tics) -{ - clock_t target, remain, rc; - - target = nsc_lbolt() + tics; - rc = 1; - - mutex_enter(&_nsc_delay_mutex); - - /* CONSTCOND */ - - while (1) { - remain = target - nsc_lbolt(); - - if (remain <= 0 || rc == -1) { - /* timeout */ - break; - } - - rc = cv_timedwait_sig(&_nsc_delay_cv, - &_nsc_delay_mutex, target); - - if (rc == 0) { - /* signalled */ - mutex_exit(&_nsc_delay_mutex); - return (FALSE); - } - } - - mutex_exit(&_nsc_delay_mutex); - - return (TRUE); -} - - -/* - * void - * nsc_sprintf (char *s, char *fmt, ...) - * String printf. - * - * Calling/Exit State: - * Builds a NULL terminated string in the buffer - * pointed to by 's', using the format 'fmt'. - * - * Description: - * Simple version of sprintf supporting fairly - * basic formats. - */ - -/* PRINTFLIKE2 */ - -void -nsc_sprintf(char *s, char *fmt, ...) -{ - int alt, zero, len; - char c, *cp; - va_list p; - - va_start(p, fmt); - - /* CONSTCOND */ - - while (1) { - alt = 0, zero = 0, len = 0; - - if ((c = *fmt++) != '%') { - if (!c) - break; - *s++ = c; - continue; - } - - if ((c = *fmt++) == 0) { - *s++ = '%'; - break; - } - - alt = (c == '#'); - if (alt && !(c = *fmt++)) - break; - - zero = (c == '0'); - if (zero && !(c = *fmt++)) - break; - - while ((len ? '0' : '1') <= c && c <= '9') { - len = (len * 10) + (c - '0'); - if (!(c = *fmt++)) - break; - } - - if (c == 's') { - cp = (char *)va_arg(p, caddr_t); - while (*cp) - *s++ = *cp++; - continue; - } - - if (c == 'd' || c == 'u') { - _nsc_sprint_dec(&s, va_arg(p, int), zero, len); - continue; - } - - if (c == 'x' || c == 'X') { - _nsc_sprint_hex(&s, va_arg(p, uint_t), - (c == 'X'), alt, zero, len); - continue; - } - - *s++ = '%'; - if (alt) - *s++ = '#'; - if (zero) - *s++ = '0'; - - if (len) - _nsc_sprint_dec(&s, len, 0, 0); - *s++ = c; - } - - if (alt || zero || len) { - *s++ = '%'; - - if (alt) - *s++ = '#'; - if (zero) - *s++ = '0'; - - if (len) - _nsc_sprint_dec(&s, len, 0, 0); - } - - va_end(p); - *s = 0; -} - - -/* - * static void - * _nsc_sprint_dec (char **sptr, int n, int zero, int len) - * Decimal to string conversion. - * - * Calling/Exit State: - * Stores a character representation of 'n' in the - * buffer referenced by 'sptr' and updates the pointer - * accordingly. - * - * Description: - * Generates a string representation of a signed decimal - * integer. - */ - -static void -_nsc_sprint_dec(char **sptr, int n, int zero, int len) -{ - unsigned int v = (n < 0) ? (-n) : n; - char c[20]; - int i; - - for (i = 0; v; i++) { - c[i] = (v % 10) + '0'; - v /= 10; - } - - len -= (i ? i : 1); - - if (n < 0 && !zero) - for (len--; len > 0; len--) - *(*sptr)++ = ' '; - - if (n < 0) { - *(*sptr)++ = '-'; - len--; - } - - for (; len > 0; len--) - *(*sptr)++ = (zero ? '0' : ' '); - - if (!i) - *(*sptr)++ = '0'; - - while (i--) - *(*sptr)++ = c[i]; -} - - -/* - * static void - * _nsc_sprint_hex (char **sptr, unsigned int v, - * int up, int alt, int zero, int len) - * Hexadecimal to string conversion. - * - * Calling/Exit State: - * Stores a character representation of 'v' in the - * buffer referenced by 'sptr' and updates the pointer - * accordingly. - * - * Description: - * Generates a string representation of an unsigned - * hexadecimal integer. - */ - -static void -_nsc_sprint_hex(char **sptr, uint_t v, int up, int alt, int zero, int len) -{ - char *str = "0123456789abcdef"; - char c[20]; - int i; - - if (up) - str = "0123456789ABCDEF"; - - for (i = 0; v; i++) { - c[i] = str[(v % 16)]; - v /= 16; - } - - if (alt) { - *(*sptr)++ = '0'; - *(*sptr)++ = (up ? 'X' : 'x'); - } - - for (len -= (i ? i : 1); len > 0; len--) - *(*sptr)++ = (zero ? '0' : ' '); - - if (!i) - *(*sptr)++ = '0'; - while (i--) - *(*sptr)++ = c[i]; -} - - -/* - * char * - * nsc_strdup (char *s) - * Duplicate string. - * - * Calling/Exit State: - * Returns the address of the new string. - * - * Description: - * Allocates a suitably sized area of memory and - * copies the string into it. The string should be - * free'd using nsc_strfree(). - */ -char * -nsc_strdup(char *s) -{ - char *cp; - - if (s == NULL) - return (NULL); - - cp = nsc_kmem_alloc(strlen(s) + 1, KM_SLEEP, NULL); - (void) strcpy(cp, s); - return (cp); -} - - -/* - * void - * nsc_strfree (char *s) - * Free string. - * - * Description: - * Frees a string previously allocated by nsc_strdup. - */ -void -nsc_strfree(char *s) -{ - if (s) - nsc_kmem_free(s, strlen(s) + 1); -} - - -/* - * int - * nsc_strmatch (char *s, char *pat) - * Match string against pattern. - * - * Calling/Exit State: - * Returns TRUE if the string matches against the - * pattern, FALSE otherwise. - * - * Description: - * Compares string against regular expression which - * can contain '*', '?' and '[]' constructs. - */ -int -nsc_strmatch(char *s, char *pat) -{ - int neg; - - for (; *pat; pat++, s++) { - if (*pat == '*') { - while (*pat == '*') - pat++; - - if (!*pat) - return (1); - - for (; *s; s++) - if (*pat == '[' || *pat == '?' || *pat == *s) - if (nsc_strmatch(s, pat)) - return (1); - return (0); - } - - if (!*s) - return (0); - - if (*pat == '[') { - if ((neg = (*++pat == '^')) != 0) - pat++; - - while (*pat) { - if (*pat == *s) - break; - - if (pat[1] == '-' && pat[2] != ']') { - if (*pat <= *s && *s <= pat[2]) - break; - pat += 2; - } - - if (*++pat == ']') { - if (neg) - goto lp; - else - return (0); - } - } - - while (*pat && *++pat != ']') - ; - - if (!*pat || neg) - return (0); -lp: - continue; - } - - if (*pat != '?' && *pat != *s) - return (0); - } - - return (!*s); -} - - -/* - * uint64_t - * nsc_strhash(char *str) - * Calculate a simple hash for the specified string - * - * Calling/Exit State: - * Returns a simple hash of the NULL terminated string, str. - * - * Description: - */ -uint64_t -nsc_strhash(char *str) -{ - uint64_t hash = (uint64_t)0; - - if (str == NULL) - return (hash); - - while (*str != '\0') { - hash <<= 1; - hash += (uint64_t)*str; - str++; - } - - return (hash); -} - - -/* - * int - * nsc_fatal(void) - * Fatal error stub function - * - * Calling/Exit State: - * Returns EINVAL (non-DEBUG) or forces a panic. - * - * Description: - * This is a stub function suitable for default actions in - * nsctl i/o provider definitions. It should be used when - * calling the stub would be a programming error. The most - * common reason for nsc_fatal() being called is that an - * nsctl client module has called an nsc_fd_t i/o function - * without the fd already reserved. - * - * The function will display a diagnostic message and when - * built -DDEBUG will force a panic and display the textual - * name of the symbol closest to the caller address of this - * function. - */ -int -nsc_fatal() -{ - void *caller = nsc_caller(); -#ifdef DEBUG - caddr_t caller_sym = NULL; - ulong_t offset = 0UL; - -#ifndef DS_DDICT - caller_sym = kobj_getsymname((uintptr_t)caller, &offset); -#endif /* !DS_DDICT */ - - cmn_err(CE_WARN, "!nsctl: nsc_fatal called at 0x%p (%s+0x%lx)", - caller, caller_sym ? caller_sym : "?", offset); - - /* - * Force TRAP due to NULL pointer dereference - * - CE_PANIC can result in the stack trace being unreadable - * by (k)adb. - */ - *(int *)0 = 0x12345678; - -#else /* !DEBUG */ - - cmn_err(CE_WARN, "!nsctl: nsc_fatal called at 0x%p", caller); - -#endif /* DEBUG */ - - return (EINVAL); -} - - -int nsc_null() { return (0); } -int nsc_true() { return (1); } -int nsc_inval() { return (-1); } -int nsc_ioerr() { return (EIO); } - -/*ARGSUSED*/ -int -nsc_commit_mem(void *src, void *dst, size_t len, nsc_mem_err_cb err_action) -{ - - return (0); -} - -static int _nsc_nvmem_errs; - -/* ARGSUSED */ -void -nsc_cm_errhdlr(void *src, void *dst, size_t len, int errval) -{ - static int _nsc_baddma_already_seen = 0; - - if (!(_nsc_baddma_already_seen % 100)) { - cmn_err(CE_WARN, "!nsc_cm_errhdlr: media down, forced_wrthru"); - - _nsc_baddma_already_seen += 1; - - if (_nsc_baddma_already_seen >= 100) { - cmn_err(CE_WARN, - "!nsc_cm_errhdlr: this message " - "displayed every 100 errors"); - } - } - - (void) nsc_node_hints_set(NSC_FORCED_WRTHRU); - - _nsc_nvmem_errs++; -} - - -void -_nsc_init_svc(void) -{ - mutex_init(&_nsc_svc_mutex, NULL, MUTEX_DRIVER, NULL); - mutex_init(&_nsc_delay_mutex, NULL, MUTEX_DRIVER, NULL); - cv_init(&_nsc_delay_cv, NULL, CV_DRIVER, NULL); -} - - -void -_nsc_deinit_svc(void) -{ - if (_nsc_services != NULL) { - cmn_err(CE_PANIC, - "nsctl: services registered in _nsc_deinit_svc"); - /* NOTREACHED */ - } - - cv_destroy(&_nsc_delay_cv); - mutex_destroy(&_nsc_delay_mutex); - mutex_destroy(&_nsc_svc_mutex); -} - - -nsc_svc_t * -nsc_register_svc(char *name, void (*service_fn)(intptr_t)) -{ - nsc_service_t *sp, *new; - nsc_svc_t *svc; - - new = nsc_kmem_zalloc(sizeof (*new), KM_SLEEP, 0); - if (new == NULL) - return (NULL); - - svc = nsc_kmem_zalloc(sizeof (*svc), KM_SLEEP, 0); - if (svc == NULL) { - nsc_kmem_free(new, sizeof (*new)); - return (NULL); - } - - mutex_enter(&_nsc_svc_mutex); - - for (sp = _nsc_services; sp != NULL; sp = sp->s_next) - if (strcmp(name, sp->s_name) == 0) - break; - - if (sp == NULL) { - sp = new; - sp->s_name = nsc_strdup(name); - if (sp->s_name == NULL) { - mutex_exit(&_nsc_svc_mutex); - nsc_kmem_free(new, sizeof (*new)); - nsc_kmem_free(svc, sizeof (*svc)); - return (NULL); - } - - rw_init(&sp->s_rwlock, NULL, RW_DRIVER, NULL); - sp->s_next = _nsc_services; - _nsc_services = sp; - } - - rw_enter(&sp->s_rwlock, RW_WRITER); - - svc->svc_fn = service_fn; - svc->svc_svc = sp; - - if (svc->svc_fn != NULL) { - svc->svc_next = sp->s_servers; - sp->s_servers = svc; - } else { - svc->svc_next = sp->s_clients; - sp->s_clients = svc; - } - - rw_exit(&sp->s_rwlock); - mutex_exit(&_nsc_svc_mutex); - - if (sp != new) - nsc_kmem_free(new, sizeof (*new)); - - return (svc); -} - - -int -nsc_unregister_svc(nsc_svc_t *svc) -{ - nsc_service_t *sp, **spp; - nsc_svc_t **svcp; - - if (svc == NULL) - return (EINVAL); - - sp = svc->svc_svc; - if (sp == NULL) - return (EINVAL); - - mutex_enter(&_nsc_svc_mutex); - rw_enter(&sp->s_rwlock, RW_WRITER); - - svcp = (svc->svc_fn == NULL) ? &sp->s_clients : &sp->s_servers; - for (; *svcp; svcp = &((*svcp)->svc_next)) - if (svc == (*svcp)) - break; - - if (*svcp) - (*svcp) = svc->svc_next; - - nsc_kmem_free(svc, sizeof (*svc)); - - if (sp->s_servers == NULL && sp->s_clients == NULL) { - for (spp = &_nsc_services; *spp; spp = &((*spp)->s_next)) - if ((*spp) == sp) - break; - - if (*spp) - (*spp) = sp->s_next; - - rw_exit(&sp->s_rwlock); - mutex_exit(&_nsc_svc_mutex); - - rw_destroy(&sp->s_rwlock); - nsc_strfree(sp->s_name); - - nsc_kmem_free(sp, sizeof (*sp)); - return (0); - } - - rw_exit(&sp->s_rwlock); - mutex_exit(&_nsc_svc_mutex); - - return (0); -} - - -int -nsc_call_svc(nsc_svc_t *svc, intptr_t arg) -{ - nsc_service_t *sp; - nsc_svc_t *svcp; - int found; - - if (svc == NULL) - return (EINVAL); - - sp = svc->svc_svc; - if (sp == NULL) - return (EINVAL); - - rw_enter(&sp->s_rwlock, RW_READER); - - found = (sp->s_servers != NULL); - - for (svcp = sp->s_servers; svcp; svcp = svcp->svc_next) - (*svcp->svc_fn)(arg); - - rw_exit(&sp->s_rwlock); - - if (found == 0) - return (ENOSYS); - - return (0); -} diff --git a/usr/src/uts/common/avs/ns/nsctl/nsc_gen.h b/usr/src/uts/common/avs/ns/nsctl/nsc_gen.h deleted file mode 100644 index fc5ec68c48..0000000000 --- a/usr/src/uts/common/avs/ns/nsctl/nsc_gen.h +++ /dev/null @@ -1,116 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _NSC_GEN_H -#define _NSC_GEN_H - -#ifdef __cplusplus -extern "C" { -#endif - -#ifndef __NSC_GEN__ -Error: Illegal #include - private file. -#endif - - -/* - * Global resource map. - */ - -#define _NSC_MAXNAME 20 - -typedef struct nsc_rmmap_s { - char name[_NSC_MAXNAME]; /* Description */ - int32_t offset; /* Offset into arena */ - uint32_t size; /* Length of area */ - int32_t inuse; /* Bitmap of nodes using area */ - int32_t pad[2]; /* For future expansion */ -} nsc_rmmap_t; - - -#ifdef _KERNEL -#include <sys/nsctl/nsc_mem.h> - -extern kmutex_t _nsc_global_lock; -extern int _nsc_global_lock_init; - -extern int _nsc_rmmap_init(nsc_rmmap_t *, char *, int, size_t, ulong_t); -extern ulong_t _nsc_rmmap_alloc(nsc_rmmap_t *, char *, size_t, void (*)()); -extern void _nsc_rmmap_free(nsc_rmmap_t *, char *, nsc_mem_t *); -extern size_t _nsc_rmmap_size(nsc_rmmap_t *, char *); -extern size_t _nsc_rmmap_avail(nsc_rmmap_t *); - -extern nsc_rmmap_t *_nsc_global_nvmemmap_lookup(nsc_rmmap_t *); -extern void nsc_cm_errhdlr(void *, void *, size_t, int); -extern caddr_t _nsc_rm_nvmem_base; - -/* - * Inter-module function (callback) services. - */ - -typedef struct nsc_svc_s { - struct nsc_svc_s *svc_next; /* linked list */ - struct nsc_service_s *svc_svc; /* back link */ - void (*svc_fn)(intptr_t); /* service function, or NULL (client) */ -} nsc_svc_t; - -typedef struct nsc_service_s { - struct nsc_service_s *s_next; /* linked list */ - char *s_name; /* name of service */ - nsc_svc_t *s_servers; /* providers of the service */ - nsc_svc_t *s_clients; /* clients of the service */ - krwlock_t s_rwlock; /* lock */ -} nsc_service_t; - -extern void _nsc_init_svc(void); -extern void _nsc_deinit_svc(void); - -#endif /* _KERNEL */ - - -/* - * ncall usage (NCALL_NSC .. NCALL_NSC+9) - */ - -/* inter-node setval */ -#define NSC_SETVAL (NCALL_NSC + 1) -#define NSC_SETVAL_ALL (NCALL_NSC + 2) - -#define NSC_UNUSED3 (NCALL_NSC + 3) -#define NSC_UNUSED4 (NCALL_NSC + 4) - -/* ncall-io io provider */ -#define NSC_NCIO_PARTSIZE (NCALL_NSC + 5) -#define NSC_NCIO_READ (NCALL_NSC + 6) -#define NSC_NCIO_WRITE (NCALL_NSC + 7) - -#define NSC_UNUSED8 (NCALL_NSC + 8) -#define NSC_UNUSED9 (NCALL_NSC + 9) - -#ifdef __cplusplus -} -#endif - -#endif /* _NSC_GEN_H */ diff --git a/usr/src/uts/common/avs/ns/nsctl/nsc_ioctl.h b/usr/src/uts/common/avs/ns/nsctl/nsc_ioctl.h deleted file mode 100644 index 2fffd75d20..0000000000 --- a/usr/src/uts/common/avs/ns/nsctl/nsc_ioctl.h +++ /dev/null @@ -1,106 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _NSC_IOCTL_H -#define _NSC_IOCTL_H - -#ifdef __cplusplus -extern "C" { -#endif - -#include <sys/dkio.h> -#include <sys/vtoc.h> -#ifdef DKIOCPARTITION -#include <sys/efi_partition.h> -#endif - -/* - * Ioctl definitions for Storage Device. - */ - -#define _NSC_(x) (('S'<<16)|('D'<<8)|(x)) - -#define NSCIOC_OPEN _NSC_(1) -#define NSCIOC_RESERVE _NSC_(2) -#define NSCIOC_RELEASE _NSC_(3) -#define NSCIOC_PARTSIZE _NSC_(4) -#define NSCIOC_FREEZE _NSC_(5) -#define NSCIOC_UNFREEZE _NSC_(6) -#define NSCIOC_ISFROZEN _NSC_(7) -#define NSCIOC_POWERMSG _NSC_(8) /* UPS/PCU power state */ -#define NSCIOC_NSKERND _NSC_(9) -#define NSCIOC_GLOBAL_SIZES _NSC_(10) /* size of RM segs */ -#define NSCIOC_GLOBAL_DATA _NSC_(11) -#define NSCIOC_NVMEM_CLEAN _NSC_(12) /* mark nvm nsc_global clean */ -#define NSCIOC_NVMEM_CLEANF _NSC_(13) /* force mark clean */ -#define NSCIOC_BSIZE _NSC_(14) /* get partition size */ - - -/* - * Structure definitions. - */ - - -struct nscioc_open { - char path[NSC_MAXPATH]; /* Pathname */ - int flag; /* Flags */ - int mode; /* Open modes */ - int pad[15]; -}; - - -struct nscioc_partsize { - uint64_t partsize; -}; - - -struct nskernd { - uint64_t data1; - uint64_t data2; - char char1[NSC_MAXPATH]; - char char2[NSC_MAXPATH]; - int command; -}; - - -struct nscioc_bsize { - uint64_t vtoc; /* (struct vtoc *) */ - uint64_t dki_info; /* (struct dk_cinfo *) */ - uint64_t raw_fd; /* dev_t of slice/partition */ - uint64_t p64; /* (struct partition64 *) */ - int efi; /* do we have an EFI partition table? */ -}; - - -#ifdef _KERNEL -extern int nskernd_command(intptr_t, int, int *); -extern int nskern_bsize(struct nscioc_bsize *, int *); -#endif - -#ifdef __cplusplus -} -#endif - -#endif /* _NSC_IOCTL_H */ diff --git a/usr/src/uts/common/avs/ns/nsctl/nsc_mem.c b/usr/src/uts/common/avs/ns/nsctl/nsc_mem.c deleted file mode 100644 index 81fafd8971..0000000000 --- a/usr/src/uts/common/avs/ns/nsctl/nsc_mem.c +++ /dev/null @@ -1,939 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#include <sys/types.h> -#include <sys/cmn_err.h> -#include <sys/ksynch.h> -#include <sys/kmem.h> -#include <sys/map.h> -#include <sys/errno.h> -#include <sys/ddi.h> - - -#define __NSC_GEN__ -#include "nsc_dev.h" -#include "nsc_gen.h" -#include "nsc_mem.h" -#include "../nsctl.h" -#ifdef DS_DDICT -#include "../contract.h" -#endif - - -static size_t _nsc_rm_size; -caddr_t _nsc_rm_base; -caddr_t _nsc_rm_nvmem_base; -size_t _nsc_rmhdr_size; - -static kmutex_t _nsc_mem_lock; -static nsc_mem_t *_nsc_anon_mem; -static nsc_mem_t *_nsc_rmhdr_mem; - -nsc_mem_t *_nsc_mem_top; - -nsc_rmhdr_t *_nsc_rmhdr_ptr; -nsc_rmmap_t *_nsc_global_map; -nsc_mem_t *_nsc_local_mem; - -static void *_nsc_mem_alloc(size_t *, int, nsc_mem_t *); -static void *_nsc_rm_alloc(size_t *, nsc_mem_t *); -static int _nsc_mem_free(void *, size_t); -static int _nsc_rm_free(void *, size_t); -static size_t _nsc_rm_avail(nsc_mem_t *); - -extern void nscsetup(void); -extern void _nsc_mark_pages(caddr_t, size_t, int); -extern int _nsc_lock_all_rm(void); -extern void _nsc_unlock_all_rm(void); -extern void _nsc_set_max_devices(int); - -/* - * void - * _nsc_init_mem (void) - * Initialise memory allocation system. - * - * Calling/Exit State: - * Called at driver initialisation time to allocate necessary - * data structures. - */ -void -_nsc_init_mem() -{ - mutex_init(&_nsc_mem_lock, NULL, MUTEX_DRIVER, NULL); - - _nsc_anon_mem = nsc_register_mem("anon:kmem", NSC_MEM_LOCAL, 0); - _nsc_local_mem = nsc_register_mem("nsctl:kmem", NSC_MEM_LOCAL, 0); - - if (!_nsc_anon_mem) - cmn_err(CE_PANIC, "nsctl: nsc_init_mem"); -} - - -/* - * void - * _nsc_deinit_mem (void) - * De-initialise memory alloation system. - * - * Calling/Exit State: - * Called at driver unload time to de-allocate - * resources. - */ - - -void -_nsc_deinit_mem() -{ - if (_nsc_rm_nvmem_base) - nsc_kmem_free(_nsc_rm_base, _nsc_rmhdr_size); - - _nsc_rm_nvmem_base = NULL; - _nsc_rm_base = NULL; -} - -/* - * int - * _nsc_clear_dirty(int force) - * mark the global area clean by clearing the header dirty bit number. - * - * returns 0 if successfully cleared, valid errno otherwise - * - * this function should only be called at system shutdown. - */ -/*ARGSUSED*/ -int -_nsc_clear_dirty(int force) -{ - int rc = 0; - -#ifdef DEBUG - ulong_t longzeros = 0; - if (force) { - if (_nsc_rm_nvmem_base) { - if (nsc_commit_mem((void *)&longzeros, - (void *)&((nsc_rmhdr_t *) - _nsc_rm_nvmem_base)->rh_dirty, - sizeof (ulong_t), nsc_cm_errhdlr) < 0) { - cmn_err(CE_WARN, - "!nsctl: _nsc_clear_magic: " - "hdr force clear failed 0x%p", - (void *)_nsc_rm_nvmem_base); - } else { - cmn_err(CE_WARN, - "!nsctl: _nsc_clear_magic: " - "hdr force cleared 0x%p", - (void *)_nsc_rm_nvmem_base); - _nsc_rmhdr_ptr->rh_dirty = 0; - } - - return (0); - } else - return (EINVAL); - } - - if (_nsc_rm_nvmem_base) { - if (_nsc_global_lock_init) { - mutex_enter(&_nsc_global_lock); - if (!_nsc_check_mapinuse()) { - if (nsc_commit_mem((void *)&longzeros, - (void *)&((nsc_rmhdr_t *) - _nsc_rm_nvmem_base)->rh_dirty, - sizeof (ulong_t), nsc_cm_errhdlr) < 0) { - cmn_err(CE_WARN, - "!nsctl: _nsc_clear_magic: " - "hdr clear failed 0x%p", - (void *)_nsc_rm_nvmem_base); - } else { - cmn_err(CE_WARN, - "!nsctl: _nsc_clear_magic: " - "hdr cleared 0x%p", - (void *)_nsc_rm_nvmem_base); - _nsc_rmhdr_ptr->rh_dirty = 0; - } - rc = 0; - } else { - cmn_err(CE_WARN, - "!nsctl: _nsc_clear_magic: " - "global area in use. cannot clear magic"); - rc = EBUSY; - } - mutex_exit(&_nsc_global_lock); - } else { - cmn_err(CE_WARN, - "!nsctl: _nsc_clear_magic: cannot clear magic"); - rc = EINVAL; - } - } else - rc = EINVAL; -#else - - rc = ENOTTY; - -#endif /* DEBUG */ - - return (rc); -} - -/* - * int - * _nsc_check_mapinuse() - * check if any global maps are still inuse; - * - * return 1 if any non-nsctl map is in use, 0 otherwise - * should be called with _nsc_global_lock held - * - * for nvmem support. if a client of nsctl is still - * using the global maps then the global area will not - * be marked clean. - */ -int -_nsc_check_mapinuse(void) -{ - nsc_rmmap_t *rmap = _nsc_rmhdr_ptr->map; - nsc_rmmap_t *rmapend; - - rmapend = (nsc_rmmap_t *) - ((char *)_nsc_rmhdr_ptr + _nsc_rmhdr_ptr->size); - - for (; rmap < rmapend; ++rmap) - if ((rmap->inuse) && !(_nsc_is_nsctl_map(rmap->name))) - return (1); - - return (0); - -} - -/* names of maps in the global area that belong to nsctl */ -static char *nsctl_mapnames[] = { - "nsc_global", - "nsc_lock" -}; - -int -_nsc_is_nsctl_map(char *mapname) -{ - int i; - - for (i = 0; i < sizeof (nsctl_mapnames)/sizeof (char *); ++i) - if (strncmp(mapname, nsctl_mapnames[i], _NSC_MAXNAME) == 0) - return (1); - - return (0); -} - - -/* - * nsc_mem_t * - * nsc_register_mem(char *name, int type, int flag) - * Register a category of memory usage. - * - * Calling/Exit State: - * Returns a token for use in future calls to nsc_kmem_alloc. - * type is NSC_MEM_LOCAL, or NSC_MEM_GLOBAL. - * flag is passed through to kmem_alloc on allocate. - * - * Description: - * The parameters associated with a category can be changed - * by making a subsequent call to nsc_register_mem. - */ -nsc_mem_t * -nsc_register_mem(char *name, int type, int flag) -{ - nsc_mem_t *mp, *new; - - new = kmem_zalloc(sizeof (*new), KM_NOSLEEP); - - mutex_enter(&_nsc_mem_lock); - - for (mp = _nsc_mem_top; mp; mp = mp->next) - if (strcmp(mp->name, name) == 0) - break; - - if (!mp && !(mp = new)) { - mutex_exit(&_nsc_mem_lock); - return (NULL); - } - - mp->type = type; - mp->flag = flag; - - mp->hwm = mp->used; - mp->pagehwm = mp->pages; - mp->nalloc -= mp->nfree; - mp->nfree = 0; - - if (!mp->name) { - mp->name = name; - mp->next = _nsc_mem_top; - _nsc_mem_top = mp; - } - - mutex_exit(&_nsc_mem_lock); - - if (new && mp != new) - kmem_free(new, sizeof (*new)); - - return (mp); -} - - -/* - * void - * nsc_unregister_mem(nsc_mem_t *) - * Un-register a category of memory usage. - * - * Description: - * The specified category is un-registered. For correct - * operation this should only be called when all memory - * associated with the category has been free'd. - */ -void -nsc_unregister_mem(nsc_mem_t *mp) -{ - nsc_mem_t **mpp; - - if (!mp) - return; - - mutex_enter(&_nsc_mem_lock); - - for (mpp = &_nsc_mem_top; *mpp; mpp = &(*mpp)->next) - if (*mpp == mp) - break; - - if (*mpp != NULL) { - *mpp = mp->next; - kmem_free(mp, sizeof (*mp)); - } - - mutex_exit(&_nsc_mem_lock); -} - -/* - * void - * _nsc_global_setup - * Setup global variables. - * - * Calling/Exit State: - * Called to setup the global header. - */ -void -_nsc_global_setup() -{ - nsc_rmhdr_t *hdr = (void *)_nsc_rm_base; - size_t size; - - if (!hdr || !_nsc_global_lock_init || _nsc_rmhdr_ptr) - return; - - mutex_enter(&_nsc_global_lock); - - if (!hdr->magic || (_nsc_rm_nvmem_base && !hdr->rh_dirty)) { - size = sizeof (nsc_rmhdr_t) + - (sizeof (nsc_rmmap_t) * (_NSC_GLSLOT - 1)); - - size = (size + _NSC_GLALIGN) & ~_NSC_GLALIGN; - bzero(_nsc_rm_base, size); - - hdr->magic = _NSCTL_HDRMAGIC; - hdr->ver = _NSCTL_HDRVER3; - hdr->size = size; - hdr->maxdev = nsc_max_devices(); - - hdr->map[0].inuse = _NSC_GLSLOT; - if (_nsc_rm_nvmem_base) { - if (hdr->rh_dirty) { /* corrupted */ - cmn_err(CE_WARN, - "!nsctl: _nsc_global_setup: nv bad header"); - mutex_exit(&_nsc_global_lock); - return; - } - if (nsc_commit_mem((void *)_nsc_rm_base, - (void *)_nsc_rm_nvmem_base, - size, nsc_cm_errhdlr) < 0) - cmn_err(CE_WARN, "!_nsc_global_setup: " - "nvmem header not updated"); - } - } - - _nsc_rmhdr_ptr = hdr; - mutex_exit(&_nsc_global_lock); - - if (hdr->magic != _NSCTL_HDRMAGIC || (hdr->ver != _NSCTL_HDRVER && - hdr->ver != _NSCTL_HDRVER3)) { - cmn_err(CE_WARN, "!nsctl: _nsc_global_setup: bad header"); - return; - } - - if (hdr->ver == _NSCTL_HDRVER3 && hdr->maxdev != nsc_max_devices()) { - _nsc_set_max_devices(hdr->maxdev); - cmn_err(CE_WARN, - "!nsctl: _nsc_global_setup: setting nsc_max_devices to %d", - hdr->maxdev); - } - - if (!_nsc_rmmap_init(hdr->map, "nsc_global", _NSC_GLSLOT, - _nsc_rm_size - hdr->size, hdr->size)) { - cmn_err(CE_WARN, - "!nsctl: _nsc_global_setup: global map init failed"); - return; - } - - _nsc_global_map = hdr->map; - - (void) nsc_kmem_alloc(hdr->size, 0, _nsc_rmhdr_mem); -} - -/* - * int - * _nsc_need_global_mem () - * Expected global memory usage. - * - * Calling/Exit State: - * Returns the amount of global memory expected to be - * used by internal data structures. - * - * Remarks: - * This is provided purely as a configuration aid to - * systems without global memory and as such is not - * declared in nsctl.h. - */ -int -_nsc_need_global_mem() -{ - int size = sizeof (nsc_rmhdr_t) + - (sizeof (nsc_rmmap_t) * (_NSC_GLSLOT - 1)); - - size = (size + _NSC_GLALIGN) & ~_NSC_GLALIGN; - return (size); -} - - -/* - * void * - * nsc_kmem_alloc (size_t size, int flag, nsc_mem_t *mem) - * Allocate memory of the specified type. - * - * Calling/Exit State: - * Returns a pointer to a word aligned area of memory. - * If mem is zero then an anonymous category is used. - * - * Description: - * Allocates the required memory and updates the usage - * statistics stored in mem. - * - * Remarks: - * VME memory is guaranteed to be eight byte aligned. - */ -void * -nsc_kmem_alloc(size_t size, int flag, nsc_mem_t *mem) -{ - void *vp; - - if (!mem) - mem = _nsc_anon_mem; - - if ((vp = _nsc_mem_alloc(&size, flag, mem)) == NULL) - return (NULL); - - mutex_enter(&_nsc_mem_lock); - - mem->nalloc++; - mem->used += size; - mem->pages += btopr(size); - - if (mem->used > mem->hwm) - mem->hwm = mem->used; - if (mem->pages > mem->pagehwm) - mem->pagehwm = mem->pages; - - mutex_exit(&_nsc_mem_lock); - return (vp); -} - - -/* - * void * - * _nsc_mem_alloc (size_t *sizep, int flag, nsc_mem_t *mem) - * Allocate memory of the specified type. - * - * Calling/Exit State: - * Returns a pointer to a word aligned area of memory. - * - * Description: - * Uses the type field to determine whether to allocate RM, - * VME or kernel memory. For types other then RM a copy of - * mem is stored immediately prior to the returned area. - * size is updated to reflect the header. - * - * Remarks: - * A two word header is user for VME memory to ensure - * eight byte alignment. - */ -static void * -_nsc_mem_alloc(size_t *sizep, int flag, nsc_mem_t *mem) -{ - size_t size = *sizep; - void *vp; - - if (mem->type & NSC_MEM_GLOBAL) - return (_nsc_rm_alloc(sizep, mem)); - - flag |= mem->flag; - size += sizeof (nsc_mem_t *); - - if (flag & KM_NOSLEEP) - flag &= ~KM_SLEEP; - - vp = kmem_alloc(size, flag); - if (!vp) - return (NULL); - - *sizep = size; - - *(nsc_mem_t **)vp = mem; - - return (void *)((nsc_mem_t **)vp + 1); -} - - -/* - * void - * nsc_kmem_free (void *addr, size_t size) - * Free a previously allocated area of memory. - * - * Calling/Exit State: - * The memory specified by addr is returned to the free pool. - * - * Description: - * Updates the usage statistics appropriately. - */ -void -nsc_kmem_free(void *addr, size_t size) -{ - caddr_t caddr = (caddr_t)addr; - caddr_t rm_base; - int rc; - - if (_nsc_rm_nvmem_base) - rm_base = _nsc_rm_nvmem_base; - else - rm_base = _nsc_rm_base; - - if (rm_base <= caddr && caddr < rm_base + _nsc_rm_size) - rc = _nsc_rm_free(addr, size); - else - rc = _nsc_mem_free(addr, size); - - if (rc < 0) - cmn_err(CE_PANIC, "nsctl: nsc_kmem_free: invalid free"); -} - - -/* - * nsc_mem_t * - * _nsc_mem_free (void *addr, size_t size) - * Free a previously allocated area of memory. - * - * Calling/Exit State: - * Frees the VME or kernel memory at addr and updates - * the associated mem structure. - */ -static int -_nsc_mem_free(void *addr, size_t size) -{ - nsc_mem_t *mp, *tp; - - addr = (void *)((nsc_mem_t **)addr - 1); - size += sizeof (nsc_mem_t *); - - mutex_enter(&_nsc_mem_lock); - - mp = *(nsc_mem_t **)addr; - - for (tp = _nsc_mem_top; tp; tp = tp->next) - if (tp == mp) - break; - - if (tp == NULL) { - mutex_exit(&_nsc_mem_lock); - return (-1); - } - - mp->nfree++; - mp->used -= size; - mp->pages -= btopr(size); - - *(nsc_mem_t **)addr = NULL; - - mutex_exit(&_nsc_mem_lock); - - kmem_free(addr, size); - - return (0); -} - - -/* - * void * - * nsc_kmem_zalloc(size_t size, int flags, nsc_mem_t *mem) - * Allocate and zero memory. - * - * Calling/Exit State: - * Same as nsc_kmem_alloc(), except that the memory is zeroed. - */ -void * -nsc_kmem_zalloc(size_t size, int flag, nsc_mem_t *mem) -{ - void *vp = nsc_kmem_alloc(size, flag, mem); - - if (vp) - bzero((char *)vp, size); - - return (vp); -} - - -/* - * void - * nsc_mem_sizes (nsc_mem_t *mem, size_t *usedp, size_t *hwmp, size_t *reqp) - * Access size information for category. - * - * Calling/Exit State: - * If the corresponding pointer is non-zero returns - * respectively, the number of bytes currently allocated, the - * high water mark in bytes and an estimate of the number of - * bytes needed for the category assuming that each request - * is satisfied from a different page. - * - * Remarks: - * The reqp parameter is used to estimate the amount of special - * purpose memory needed to support the category. - */ -void -nsc_mem_sizes(nsc_mem_t *mem, size_t *usedp, size_t *hwmp, size_t *reqp) -{ - if (!mem) - mem = _nsc_anon_mem; - - if (usedp) - *usedp = mem->used; - if (hwmp) - *hwmp = mem->hwm; - if (reqp) - *reqp = (size_t)ptob(mem->pagehwm); -} - - -/* - * size_t - * nsc_mem_avail (nsc_mem_t *mem) - * Memory available for use by category. - * - * Calling/Exit State: - * Returns the number of bytes of memory currently - * available for use by the category. - * - * Remarks: - * Reduces the memory available to allow for one unit - * of allocation overhead. - * - * Only implemented for NSC_MEM_GLOBAL. - */ -size_t -nsc_mem_avail(nsc_mem_t *mem) -{ - if (!mem) - mem = _nsc_anon_mem; - - if (mem->type & NSC_MEM_GLOBAL) - return (_nsc_rm_avail(mem)); - -#ifdef DEBUG - cmn_err(CE_WARN, "!nsc_mem_avail: called for non-global memory!"); -#endif - - return (0); -} - - -/* - * void - * _nsc_global_zero (ulong_t offset, size_t size) - * Zero global memory. - * - * Description: - * Zeroes an area of global memory at the specified offset. - */ - -#define ZSIZE 4096 -static char _nsc_nvmem_zeroes[ZSIZE]; - -static void -_nsc_global_zero(ulong_t offset, size_t size) -{ - int i; - int rc; - int failed = 0; - - if (_nsc_rm_nvmem_base) { - for (i = 0; i < (int)(size / ZSIZE); ++i) { - rc = nsc_commit_mem((void *)_nsc_nvmem_zeroes, - (void *)(_nsc_rm_nvmem_base + offset + - i * ZSIZE), - ZSIZE, nsc_cm_errhdlr); - - if (rc < 0) - ++failed; - - } - rc = nsc_commit_mem((void *)_nsc_nvmem_zeroes, - (void *)(_nsc_rm_nvmem_base + offset + i * ZSIZE), - size % ZSIZE, - nsc_cm_errhdlr); - if ((rc < 0) || failed) - cmn_err(CE_WARN, "!_nsc_global_zero: clear mem failed"); - return; - } - - if (_nsc_rm_base) - bzero(_nsc_rm_base + offset, size); -} - - -/* - * void * - * _nsc_rm_alloc (size_t *sizep, nsc_mem_t *mem) - * Allocate next available section of RM. - * - * Calling/Exit State: - * Returns a pointer to an area of global memory. - * - * Description: - * Only one allocation request is allowed for each - * category of global memory. - */ -static void * -_nsc_rm_alloc(size_t *sizep, nsc_mem_t *mem) -{ - size_t avail, size = (*sizep); - ulong_t offset = 0; - caddr_t retaddr; - - if (!_nsc_global_map) { - cmn_err(CE_WARN, "!_nsc_rm_alloc: no map"); - return (NULL); - } - - mutex_enter(&_nsc_mem_lock); - - if (mem->base || mem->pend) { - mutex_exit(&_nsc_mem_lock); - cmn_err(CE_WARN, "!_nsc_rm_alloc: invalid alloc"); - return (NULL); - } - - mem->pend = 1; - mutex_exit(&_nsc_mem_lock); - - size = (size + _NSC_GLALIGN) & ~_NSC_GLALIGN; - - /* CONSTCOND */ - - while (1) { - if (strcmp(mem->name, "nsctl:rmhdr") == 0) - break; - - offset = _nsc_rmmap_alloc(_nsc_global_map, - mem->name, size, _nsc_global_zero); - - if (offset) - break; - - if (mem->type & NSC_MEM_RESIZE) { - avail = _nsc_rmmap_size(_nsc_global_map, mem->name); - - if (avail && avail != size) { - size = avail; - continue; - } - } - - mem->pend = 0; - cmn_err(CE_WARN, - "!_nsc_rm_alloc: alloc %ld bytes - %ld available", - size, _nsc_rm_avail(mem)); - return (NULL); - } - - _nsc_mark_pages(_nsc_rm_base + offset, size, 1); - - if (_nsc_rm_nvmem_base) - retaddr = _nsc_rm_nvmem_base + offset; - else - retaddr = _nsc_rm_base + offset; - - mutex_enter(&_nsc_mem_lock); - - mem->base = retaddr; - mem->pend = 0; - - mutex_exit(&_nsc_mem_lock); - - (*sizep) = size; - return (retaddr); -} - - -/* - * nsc_mem_t * - * _nsc_rm_free (void *addr, size_t size) - * Free an area of RM. - * - * Calling/Exit State: - * Returns 0 on success, -1 on failure. - */ -static int -_nsc_rm_free(void *addr, size_t size) -{ - caddr_t caddr = (caddr_t)addr; - nsc_mem_t *mp; - - mutex_enter(&_nsc_mem_lock); - - for (mp = _nsc_mem_top; mp; mp = mp->next) - if (mp->base == caddr) - break; - - if (!mp) { - mutex_exit(&_nsc_mem_lock); - return (-1); - } - - mp->nfree++; - mp->used -= size; - mp->pages -= btopr(size); - mp->pend = 1; - - if (!mp->used) - mp->base = 0; - - mutex_exit(&_nsc_mem_lock); - - if (_nsc_global_map) - _nsc_rmmap_free(_nsc_global_map, mp->name, mp); - - _nsc_mark_pages(addr, size, 0); - - mp->pend = 0; - return (0); -} - - -/* - * static size_t - * _nsc_rm_avail (mem) - * Amount of RM available. - * - * Calling/Exit State: - * Returns 0 if the specified category has already been - * allocated. Returns the size of the region if it already - * exists, otherwise the number of bytes of global memory - * available. - */ -static size_t -_nsc_rm_avail(nsc_mem_t *mem) -{ - size_t size; - - if (!_nsc_global_map || mem->base || mem->pend) - return (0); - - if ((size = _nsc_rmmap_size(_nsc_global_map, mem->name)) != 0) - return (size); - - return (_nsc_rmmap_avail(_nsc_global_map)); -} - - -/* - * nvram support - * given a map address, return the address of the copy - * in nvram. - * Assumes that _nsc_rm_nvmem_base is valid. - */ -nsc_rmmap_t * -_nsc_global_nvmemmap_lookup(nsc_rmmap_t *hp) -{ - size_t offset; - - /* LINTED */ - offset = (caddr_t)hp - _nsc_rm_base; - return ((nsc_rmmap_t *)(_nsc_rm_nvmem_base + offset)); -} - -int -_nsc_get_global_sizes(void *arg, int *rvp) -{ - if (!_nsc_rmhdr_ptr) - return (EINVAL); - - if (copyout(&_nsc_rmhdr_ptr->size, arg, - sizeof (_nsc_rmhdr_ptr->size)) < 0) - return (EFAULT); - - *rvp = 0; - return (0); -} - -int -_nsc_get_global_data(void *arg, int *rvp) -{ - size_t size; - - if (!_nsc_rmhdr_ptr) - return (EINVAL); - - size = _nsc_rmhdr_ptr->size; - - if (copyout(_nsc_rmhdr_ptr, arg, size) < 0) - return (EFAULT); - - if (_nsc_rm_nvmem_base) { - char *taddr; - - if ((taddr = kmem_alloc(size, KM_NOSLEEP)) == NULL) - return (ENOMEM); - - if (copyout(taddr, (char *)arg + size, size) < 0) { - kmem_free(taddr, size); - return (EFAULT); - } - - kmem_free(taddr, size); - } - - *rvp = 0; - return (0); -} diff --git a/usr/src/uts/common/avs/ns/nsctl/nsc_mem.h b/usr/src/uts/common/avs/ns/nsctl/nsc_mem.h deleted file mode 100644 index 08fce89029..0000000000 --- a/usr/src/uts/common/avs/ns/nsctl/nsc_mem.h +++ /dev/null @@ -1,97 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _NSC_MEM_H -#define _NSC_MEM_H - -#ifdef __cplusplus -extern "C" { -#endif - -#ifndef __NSC_GEN__ -Error: Illegal #include - private file. -#endif - - -/* - * Macro definitions. - */ - - -/* - * Definition of control structure. - */ -typedef struct nsc_mem_s { - struct nsc_mem_s *next; /* Link to next type */ - char *name; /* Description */ - int type; /* Memory type */ - int flag; /* Allocation flags */ - size_t used; /* Current usage */ - size_t hwm; /* High Water Mark */ - int pages; /* Usage in pages */ - int pagehwm; /* Page High Water Mark */ - caddr_t base; /* Base address of RM area */ - int nalloc; /* Number of allocates */ - int nfree; /* Number of frees */ - int pend; /* Operation pending */ -} nsc_mem_t; - - -/* - * Definition of global memory header - */ - -#define _NSCTL_HDRMAGIC 0x5344474c /* Magic number for header */ -#define _NSCTL_HDRVER 2 /* Version number for header */ -#define _NSCTL_HDRVER3 3 /* Version number for header */ -#define _NSC_GLSLOT 125 /* Number of global slots */ -#define _NSC_GLALIGN 4095 /* Alignment between areas */ - - -typedef struct nsc_rmhdr_s { - uint32_t magic; /* Magic number */ - uint32_t ver; /* Version number of header */ - uint32_t size; /* Size of header section */ - int32_t rh_dirty; /* dirty bit for nvmem */ - int32_t maxdev; /* Configured nsc_max_devices */ - int32_t pad[14]; /* Future expansion */ - nsc_rmmap_t map[1]; /* Start of map array */ -} nsc_rmhdr_t; - -extern nsc_rmmap_t *_nsc_global_nvmemmap_lookup(nsc_rmmap_t *); - -extern int _nsc_get_global_sizes(void *, int *); -extern int _nsc_get_global_data(void *, int *); -extern int _nsc_clear_dirty(int); -extern int _nsc_check_mapinuse(void); -extern int _nsc_is_nsctl_map(char *); - -extern caddr_t _nsc_rm_base; - -#ifdef __cplusplus -} -#endif - -#endif /* _NSC_MEM_H */ diff --git a/usr/src/uts/common/avs/ns/nsctl/nsc_ncallio.c b/usr/src/uts/common/avs/ns/nsctl/nsc_ncallio.c deleted file mode 100644 index c39574e95e..0000000000 --- a/usr/src/uts/common/avs/ns/nsctl/nsc_ncallio.c +++ /dev/null @@ -1,757 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#include <sys/types.h> -#include <sys/ksynch.h> -#include <sys/errno.h> -#include <sys/file.h> -#include <sys/open.h> -#include <sys/cred.h> -#include <sys/kmem.h> -#include <sys/ddi.h> - -#include <sys/ncall/ncall.h> - -#define __NSC_GEN__ -#include "nsc_dev.h" -#include "nsc_ncallio.h" -#include "../nsctl.h" - - -extern nsc_mem_t *_nsc_local_mem; - -extern void _nsc_init_ncio(void); -extern void _nsc_deinit_ncio(void); - -static nsc_io_t *nsc_ncio_io; -static kmutex_t nsc_ncio_lock; -static nsc_ncio_dev_t *nsc_ncio_top; - - -/* - * ncall-io io provider - client side. - */ - - -static int -nsc_ncio_split(char *node_and_path, char **pathp) -{ - char *cp; - int i, snode; - - snode = 0; - for (cp = node_and_path; *cp && *cp != ':'; cp++) { - i = *cp - '0'; - if (i < 0 || i > 9) - break; - - snode = (10 * snode) + i; - } - - if (*cp != ':') { - cmn_err(CE_WARN, - "ncio: failed to convert %s to node and path", - node_and_path); - return (-1); - } - - *pathp = cp + 1; - return (snode); -} - - -/* - * nsc_ncio_open() - * - * The pathname that is used with the NSC_NCALL io provider should be - * of the form "<node>:<pathname>", where <node> is the decimal ncall - * nodeid of the server machine and <pathname> is the pathname of the - * device on the server node. - */ - -/* ARGSUSED */ -static int -nsc_ncio_open(char *node_and_path, int flag, blind_t *cdp, void *iodev) -{ - nsc_ncio_dev_t *ncp, *new; - char *path = NULL; - uint64_t phash; - int snode; - - snode = nsc_ncio_split(node_and_path, &path); - if (snode < 0) - return (EINVAL); - - new = nsc_kmem_zalloc(sizeof (*new), KM_SLEEP, _nsc_local_mem); - phash = nsc_strhash(path); - - if (new) { - (void) strncpy(new->path, path, sizeof (new->path)); - new->phash = phash; - new->snode = snode; - } - - mutex_enter(&nsc_ncio_lock); - - for (ncp = nsc_ncio_top; ncp; ncp = ncp->next) - if (ncp->phash == phash && strcmp(path, ncp->path) == 0) - break; - - if (ncp == NULL && new != NULL) { - ncp = new; - new = NULL; - ncp->next = nsc_ncio_top; - nsc_ncio_top = ncp; - } - - if (ncp != NULL) - ncp->ref++; - - mutex_exit(&nsc_ncio_lock); - - if (new) - nsc_kmem_free(new, sizeof (*new)); - - if (!ncp) - return (ENOMEM); - - *cdp = (blind_t)ncp; - return (0); -} - - -static int -nsc_ncio_close(nsc_ncio_dev_t *ncp) -{ - nsc_ncio_dev_t **ncpp; - int found, free; - - if (ncp == NULL) - return (EINVAL); - - found = 0; - free = 0; - - mutex_enter(&nsc_ncio_lock); - - for (ncpp = &nsc_ncio_top; *ncpp; ncpp = &((*ncpp)->next)) { - if (*ncpp == ncp) { - found = 1; - break; - } - } - - if (!found) { - mutex_exit(&nsc_ncio_lock); - return (ENODEV); - } - - ncp->ref--; - if (ncp->ref == 0) { - *ncpp = ncp->next; - free = 1; - } - - mutex_exit(&nsc_ncio_lock); - - if (free) - nsc_kmem_free(ncp, sizeof (*ncp)); - - return (0); -} - - -/* ARGSUSED1 */ -static nsc_buf_t * -nsc_ncio_alloch(void (*d_cb)(), void (*r_cb)(), void (*w_cb)()) -{ - nsc_ncio_buf_t *h; - - if ((h = nsc_kmem_zalloc(sizeof (*h), KM_SLEEP, - _nsc_local_mem)) == NULL) - return (NULL); - - h->disc = d_cb; - h->bufh.sb_flag = NSC_HALLOCATED; - - return (&h->bufh); -} - - -static int -nsc_ncio_freeh(nsc_ncio_buf_t *h) -{ - nsc_kmem_free(h, sizeof (*h)); - return (0); -} - - -static int -nsc_ncio_rwb(nsc_ncio_buf_t *h, nsc_off_t pos, nsc_size_t len, - int flag, const int rwflag) -{ - nsc_ncio_rw_t *rw; - ncall_t *ncall; - int ncall_flag; - int ncall_proc; - int ncall_len; - int rc, err; - - if (h->bufh.sb_flag & NSC_ABUF) - return (EIO); - - if (pos < h->bufh.sb_pos || - (pos + len) > (h->bufh.sb_pos + h->bufh.sb_len)) { - return (EINVAL); - } - - if (!len) - return (0); - - if (rwflag == NSC_READ && (flag & NSC_RDAHEAD)) - return (0); - - /* CONSTCOND */ - if (sizeof (*rw) > NCALL_DATA_SZ) { - /* CONSTCOND */ - ASSERT(sizeof (*rw) <= NCALL_DATA_SZ); - return (ENXIO); - } - - if (rwflag == NSC_READ) { - ncall_flag = NCALL_RDATA; - ncall_proc = NSC_NCIO_READ; - ncall_len = sizeof (*rw) - sizeof (rw->rw_data); - } else { - ncall_flag = 0; - ncall_proc = NSC_NCIO_WRITE; - ncall_len = sizeof (*rw); - } - - rw = &h->rw; - - if (rwflag == 0) { - /* zero */ - bzero(rw->rw_data, sizeof (rw->rw_data)); - } - - if (h->disc) - (*h->disc)(h); - - rc = ncall_alloc(rw->rw_snode, 0, 0, &ncall); - if (rc != 0) { - return (rc); - } - - rw->rw_pos = (uint64_t)pos; - rw->rw_len = (uint64_t)len; - rc = ncall_put_data(ncall, rw, ncall_len); - if (rc != 0) { - return (rc); - } - - rc = ncall_send(ncall, ncall_flag, ncall_proc); - if (rc != 0) { - return (rc); - } - - rc = ncall_read_reply(ncall, 1, &err); - if (rc != 0 || err != 0) { - return (rc ? rc : err); - } - - if (rwflag == NSC_READ) { - rc = ncall_get_data(ncall, rw, sizeof (*rw)); - if (rc != 0) { - return (rc); - } - } - - ncall_free(ncall); - return (0); -} - - -static int -nsc_ncio_read(nsc_ncio_buf_t *h, nsc_off_t pos, nsc_size_t len, int flag) -{ - return (nsc_ncio_rwb(h, pos, len, flag, NSC_READ)); -} - - -static int -nsc_ncio_write(nsc_ncio_buf_t *h, nsc_off_t pos, nsc_size_t len, int flag) -{ - return (nsc_ncio_rwb(h, pos, len, flag, NSC_WRITE)); -} - - -static int -nsc_ncio_zero(nsc_ncio_buf_t *h, nsc_off_t pos, nsc_size_t len, int flag) -{ - return (nsc_ncio_rwb(h, pos, len, flag, 0)); -} - - -static void -nsc_wait_ncio(nsc_ncio_buf_t *h) -{ - nsc_iodev_t *iodev = h->bufh.sb_fd->sf_iodev; - void (*fn)() = h->disc; - nsc_ncio_buf_t *hp; - - mutex_enter(&iodev->si_lock); - - h->next = iodev->si_active; - iodev->si_active = h; - - /* CONSTCOND */ - - while (1) { - for (hp = h->next; hp; hp = hp->next) { - if ((h->bufh.sb_pos + h->bufh.sb_len) > - hp->bufh.sb_pos && - h->bufh.sb_pos < - (hp->bufh.sb_pos + hp->bufh.sb_len)) { - /* found overlapping io in progress */ - break; - } - } - - if (!hp) - break; - - if (fn) { - (*fn)(h); - fn = NULL; - } - - cv_wait(&iodev->si_cv, &iodev->si_lock); - } - - mutex_exit(&iodev->si_lock); -} - - -static int -nsc_ncio_freeb(nsc_ncio_buf_t *h) -{ - nsc_ncio_buf_t **hpp, *hp; - nsc_iodev_t *iodev; - int wake = 0; - - if ((h->bufh.sb_flag & NSC_HACTIVE) && - h->bufh.sb_fd && !(h->bufh.sb_flag & NSC_ABUF)) { - iodev = h->bufh.sb_fd->sf_iodev; - - mutex_enter(&iodev->si_lock); - - for (hpp = (nsc_ncio_buf_t **)&iodev->si_active; - *hpp; hpp = &hp->next) { - if ((hp = *hpp) == h) { - *hpp = h->next; - break; - } - - if ((h->bufh.sb_pos + h->bufh.sb_len) > - hp->bufh.sb_pos && - h->bufh.sb_pos < - (hp->bufh.sb_pos + hp->bufh.sb_len)) { - wake = 1; - } - } - - if (wake) - cv_broadcast(&iodev->si_cv); - - mutex_exit(&iodev->si_lock); - } - - /* clear flags, preserve NSC_HALLOCATED */ - h->bufh.sb_flag &= NSC_HALLOCATED; - - if ((h->bufh.sb_flag & NSC_HALLOCATED) == 0) - (void) nsc_ncio_freeh(h); - - return (0); -} - - -static int -nsc_ncio_allocb(nsc_ncio_dev_t *ncp, nsc_off_t pos, nsc_size_t len, - int flag, nsc_ncio_buf_t **hp) -{ - nsc_ncio_buf_t *h = *hp; - int rc; - - if (h == NULL) { - cmn_err(CE_WARN, "nsc_ncio_allocb: NULL handle!"); - return (EIO); - } - - if (FBA_SIZE(len) > NSC_NCIO_MAXDATA) { - /* too large */ - return (ENXIO); - } - - if ((blind_t)ncp == NSC_ANON_CD) { - flag &= ~(NSC_READ | NSC_WRITE | NSC_RDAHEAD); - } - - if (h->disc) - (*h->disc)(h); - - h->bufh.sb_pos = pos; - h->bufh.sb_len = len; - h->bufh.sb_error = 0; - h->bufh.sb_flag |= flag | NSC_HACTIVE; - h->bufh.sb_vec = &h->vec[0]; - - if (!((blind_t)ncp == NSC_ANON_CD)) { - (void) strncpy(h->rw.rw_path, ncp->path, - sizeof (h->rw.rw_path)); - h->rw.rw_snode = ncp->snode; - } - - h->vec[0].sv_len = FBA_SIZE(len); - h->vec[0].sv_addr = (uchar_t *)&h->rw.rw_data[0]; - h->vec[0].sv_vme = 0; - - h->vec[1].sv_len = 0; - h->vec[1].sv_addr = 0; - h->vec[1].sv_vme = 0; - - if ((flag & NSC_RDAHEAD) || ((blind_t)ncp == NSC_ANON_CD)) - return (NSC_DONE); - - nsc_wait_ncio(h); - - if (flag & NSC_READ) { - if ((rc = nsc_ncio_read(h, pos, len, flag)) != 0) { - (void) nsc_ncio_freeb(h); - return (rc); - } - } - - return (NSC_DONE); -} - - -static int -nsc_ncio_partsize(nsc_ncio_dev_t *ncp, nsc_size_t *rvalp) -{ - *rvalp = (nsc_size_t)ncp->partsize; - return (0); -} - - -/* ARGSUSED */ -static int -nsc_ncio_maxfbas(nsc_ncio_dev_t *ncp, int flag, nsc_size_t *ptr) -{ - if (flag == NSC_CACHEBLK) - *ptr = 1; - else - *ptr = FBA_NUM(NSC_NCIO_MAXDATA); - - return (0); -} - - -static int -nsc_ncio_attach(nsc_ncio_dev_t *ncp) -{ - nsc_ncio_size_t *size; - ncall_t *ncall; - int sizeh, sizel; - int rc, err; - - /* CONSTCOND */ - if (sizeof (*size) > NCALL_DATA_SZ) { - /* CONSTCOND */ - ASSERT(sizeof (*size) <= NCALL_DATA_SZ); - return (ENXIO); - } - - size = kmem_zalloc(sizeof (*size), KM_SLEEP); - (void) strncpy(size->path, ncp->path, sizeof (size->path)); - - rc = ncall_alloc(ncp->snode, 0, 0, &ncall); - if (rc != 0) { - kmem_free(size, sizeof (*size)); - return (rc); - } - - rc = ncall_put_data(ncall, size, sizeof (*size)); - kmem_free(size, sizeof (*size)); - size = NULL; - if (rc != 0) - return (rc); - - rc = ncall_send(ncall, 0, NSC_NCIO_PARTSIZE); - if (rc != 0) - return (0); - - rc = ncall_read_reply(ncall, 3, &err, &sizeh, &sizel); - if (rc != 0 || err != 0) - return (rc ? rc : err); - - ncall_free(ncall); - - ncp->partsize = (uint64_t)(((uint64_t)sizeh << 32) | (uint64_t)sizel); - return (0); -} - - -static nsc_def_t nsc_ncio_def[] = { - { "Open", (uintptr_t)nsc_ncio_open, 0 }, - { "Close", (uintptr_t)nsc_ncio_close, 0 }, - { "Attach", (uintptr_t)nsc_ncio_attach, 0 }, - { "AllocHandle", (uintptr_t)nsc_ncio_alloch, 0 }, - { "FreeHandle", (uintptr_t)nsc_ncio_freeh, 0 }, - { "AllocBuf", (uintptr_t)nsc_ncio_allocb, 0 }, - { "FreeBuf", (uintptr_t)nsc_ncio_freeb, 0 }, - { "Read", (uintptr_t)nsc_ncio_read, 0 }, - { "Write", (uintptr_t)nsc_ncio_write, 0 }, - { "Zero", (uintptr_t)nsc_ncio_zero, 0 }, - { "PartSize", (uintptr_t)nsc_ncio_partsize, 0 }, - { "MaxFbas", (uintptr_t)nsc_ncio_maxfbas, 0 }, - { "Provide", NSC_NCALL, 0 }, - { 0, 0, 0 } -}; - - -/* - * ncall-io io provider - server side. - */ - -/* ARGSUSED1 */ -static void -nsc_rncio_partsize(ncall_t *ncall, int *ap) -{ - nsc_ncio_size_t *size; - nsc_size_t partsize; - int sizeh, sizel; - nsc_fd_t *fd; - int rc; - - size = kmem_alloc(sizeof (*size), KM_SLEEP); - rc = ncall_get_data(ncall, size, sizeof (*size)); - if (rc != 0) { - ncall_reply(ncall, EFAULT, 0, 0); - kmem_free(size, sizeof (*size)); - return; - } - - fd = nsc_open(size->path, NSC_CACHE | NSC_DEVICE | NSC_READ, - NULL, NULL, &rc); - kmem_free(size, sizeof (*size)); - size = NULL; - if (fd == NULL) { - ncall_reply(ncall, rc, 0, 0); - return; - } - - rc = nsc_reserve(fd, NSC_PCATCH); - if (rc != 0) { - (void) nsc_close(fd); - ncall_reply(ncall, rc, 0, 0); - return; - } - - sizeh = sizel = 0; - rc = nsc_partsize(fd, &partsize); - sizel = (int)(partsize & 0xffffffff); - /* CONSTCOND */ - if (sizeof (nsc_size_t) > sizeof (int)) { - sizeh = (int)((partsize & 0xffffffff00000000) >> 32); - } - - nsc_release(fd); - (void) nsc_close(fd); - - ncall_reply(ncall, rc, sizeh, sizel); -} - - -static int -nsc_rncio_copy(char *data, nsc_buf_t *bufp, const int read) -{ - nsc_vec_t *vec; - char *datap; - uint64_t tocopy; /* bytes */ - int thischunk; /* bytes */ - int rc; - - rc = 0; - datap = data; - vec = bufp->sb_vec; - - tocopy = FBA_SIZE(bufp->sb_len); - - while (tocopy > 0) { - if (vec->sv_len == 0 || vec->sv_addr == 0) { - rc = ENOSPC; - break; - } - - thischunk = (int)min((nsc_size_t)vec->sv_len, tocopy); - - if (read) { - bcopy(vec->sv_addr, datap, thischunk); - } else { - bcopy(datap, vec->sv_addr, thischunk); - } - - tocopy -= thischunk; - if (thischunk == vec->sv_len) - vec++; - } - - return (rc); -} - - -/* ARGSUSED */ -static void -nsc_rncio_io(ncall_t *ncall, int *ap, const int read) -{ - nsc_ncio_rw_t *rw; - nsc_buf_t *bufp; - nsc_fd_t *fd; - nsc_size_t len; - nsc_off_t pos; - int ioflag; - int rc; - - rw = kmem_alloc(sizeof (*rw), KM_SLEEP); - rc = ncall_get_data(ncall, rw, sizeof (*rw)); - if (rc != 0) { - ncall_reply(ncall, EFAULT); - kmem_free(rw, sizeof (*rw)); - return; - } - - ioflag = (read ? NSC_READ : NSC_WRITE); - pos = (nsc_off_t)rw->rw_pos; - len = (nsc_size_t)rw->rw_len; - - fd = nsc_open(rw->rw_path, NSC_CACHE | NSC_DEVICE | NSC_READ | ioflag, - NULL, NULL, &rc); - if (fd == NULL) { - ncall_reply(ncall, rc); - kmem_free(rw, sizeof (*rw)); - return; - } - - rc = nsc_reserve(fd, NSC_PCATCH); - if (rc != 0) { - ncall_reply(ncall, rc); - (void) nsc_close(fd); - kmem_free(rw, sizeof (*rw)); - return; - } - - bufp = NULL; - rc = nsc_alloc_buf(fd, pos, len, NSC_NOCACHE | ioflag, &bufp); - if (rc > 0) { - ncall_reply(ncall, rc); - if (bufp != NULL) { - (void) nsc_free_buf(bufp); - } - nsc_release(fd); - (void) nsc_close(fd); - kmem_free(rw, sizeof (*rw)); - return; - } - - rc = nsc_rncio_copy(&rw->rw_data[0], bufp, read); - if (rc == 0) { - if (read) { - /* store reply data */ - rc = ncall_put_data(ncall, rw, sizeof (*rw)); - } else { - /* write new data */ - rc = nsc_write(bufp, pos, len, 0); - } - } - - ncall_reply(ncall, rc); - - (void) nsc_free_buf(bufp); - nsc_release(fd); - (void) nsc_close(fd); - kmem_free(rw, sizeof (*rw)); -} - - -static void -nsc_rncio_read(ncall_t *ncall, int *ap) -{ - nsc_rncio_io(ncall, ap, TRUE); -} - - -static void -nsc_rncio_write(ncall_t *ncall, int *ap) -{ - nsc_rncio_io(ncall, ap, FALSE); -} - - -/* - * ncall-io io provider - setup. - */ - -void -_nsc_init_ncio(void) -{ - mutex_init(&nsc_ncio_lock, NULL, MUTEX_DRIVER, NULL); - - ncall_register_svc(NSC_NCIO_PARTSIZE, nsc_rncio_partsize); - ncall_register_svc(NSC_NCIO_WRITE, nsc_rncio_write); - ncall_register_svc(NSC_NCIO_READ, nsc_rncio_read); - - nsc_ncio_io = nsc_register_io("ncall-io", - NSC_NCALL_ID | NSC_REFCNT, nsc_ncio_def); - - if (!nsc_ncio_io) - cmn_err(CE_WARN, "_nsc_ncio_init: register io failed - ncall"); -} - - -void -_nsc_deinit_ncio(void) -{ - if (nsc_ncio_io) - (void) nsc_unregister_io(nsc_ncio_io, 0); - - ncall_unregister_svc(NSC_NCIO_PARTSIZE); - ncall_unregister_svc(NSC_NCIO_WRITE); - ncall_unregister_svc(NSC_NCIO_READ); - - nsc_ncio_io = NULL; - mutex_destroy(&nsc_ncio_lock); -} diff --git a/usr/src/uts/common/avs/ns/nsctl/nsc_ncallio.h b/usr/src/uts/common/avs/ns/nsctl/nsc_ncallio.h deleted file mode 100644 index 4a42917d2d..0000000000 --- a/usr/src/uts/common/avs/ns/nsctl/nsc_ncallio.h +++ /dev/null @@ -1,105 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _NSC_NCALLIO_H -#define _NSC_NCALLIO_H - -#ifdef __cplusplus -extern "C" { -#endif - -#ifndef __NSC_GEN__ -Error: Illegal #include - private file. -#endif - -#include <sys/types.h> -#include <sys/nsctl/nsc_dev.h> -#include <sys/nsctl/nsctl.h> - -/* - * ncall-io structures - */ - -/* - * local per-device info structure - */ -typedef struct nsc_ncio_dev { - struct nsc_ncio_dev *next; /* linkage */ - char path[NSC_MAXPATH]; /* pathname */ - uint64_t phash; /* path hash */ - uint64_t partsize; /* size (FBAs) */ - int snode; /* server node */ - int ref; /* ref count */ -} nsc_ncio_dev_t; - - -/* - * on the wire partsize request structure (reply is inline). - */ -typedef struct nsc_ncio_size { - char path[NSC_MAXPATH]; -} nsc_ncio_size_t; - - -/* - * buffer handle and one the wire representation. - */ - -#define NSC_NCIO_MAXDATA (NCALL_DATA_SZ - FBA_SIZE(1)) - - -typedef struct nsc_ncio_rw { - union { - struct { - int snode; /* server node */ - uint64_t pos; /* offset of i/o */ - uint64_t len; /* length of i/o */ - char path[NSC_MAXPATH]; /* path of device */ - } rw; - char pad[FBA_SIZE(1)]; /* pad to FBA */ - } rw_u; - char rw_data[NSC_NCIO_MAXDATA]; /* data */ -} nsc_ncio_rw_t; - -#define rw_snode rw_u.rw.snode -#define rw_path rw_u.rw.path -#define rw_pos rw_u.rw.pos -#define rw_len rw_u.rw.len - - -typedef struct nsc_ncio_bufh { - nsc_buf_t bufh; - nsc_vec_t vec[2]; - void (*disc)(); - struct nsc_ncio_bufh *next; - struct nsc_ncio_rw rw; -} nsc_ncio_buf_t; - - -#ifdef __cplusplus -} -#endif - -#endif /* _NSC_NCALLIO_H */ diff --git a/usr/src/uts/common/avs/ns/nsctl/nsc_power.c b/usr/src/uts/common/avs/ns/nsctl/nsc_power.c deleted file mode 100644 index 4b7406052c..0000000000 --- a/usr/src/uts/common/avs/ns/nsctl/nsc_power.c +++ /dev/null @@ -1,249 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#include <sys/types.h> -#include <sys/ksynch.h> -#include <sys/kmem.h> -#include <sys/file.h> -#include <sys/errno.h> -#include <sys/open.h> -#include <sys/cred.h> -#include <sys/conf.h> -#include <sys/uio.h> -#include <sys/cmn_err.h> - -#define __NSC_GEN__ -#include "nsc_dev.h" -#include "nsc_ioctl.h" -#include "nsc_power.h" -#include "../nsctl.h" - -extern nsc_mem_t *_nsc_local_mem; -static int null_power(void); - - -typedef struct _nsc_power_s { - struct _nsc_power_s *next; /* chain */ - char *name; /* module name */ - void (*pw_power_lost)(int); /* callback power lost(rideout) */ - void (*pw_power_ok)(void); /* callback power ok */ - void (*pw_power_down)(void); - /* callback power down (shutdown imminent) */ -} _nsc_power_t; - -#define _P(x) (((long)(&((_nsc_power_t *)0)->x))/sizeof (long)) - -static nsc_def_t _nsc_power_def[] = { - "Power_Lost", (uintptr_t)null_power, _P(pw_power_lost), - "Power_OK", (uintptr_t)null_power, _P(pw_power_ok), - "Power_Down", (uintptr_t)null_power, _P(pw_power_down), - 0, 0, 0, -}; - -static _nsc_power_t *_power_clients; -static kmutex_t _power_mutex; - - -static int null_power(void) -/* - * init null_power - dummy power routine for clients that choose not - * to implement all the power hooks. - * - */ -{ - return (0); -} - -/* - * int - * _nsc_power - * Call registered clients of the generic power ioctls. - * - * Calling/Exit State: - * Calls all the registered clients with a message describing the - * current state of the power for the system. - */ -int -_nsc_power(blind_t argp, int *rvp) -{ - nsc_power_ctl_t opc; - _nsc_power_t *pp; - - *rvp = 0; - if (copyin((void *) argp, &opc, sizeof (nsc_power_ctl_t))) - return (EFAULT); - mutex_enter(&_power_mutex); - - pp = _power_clients; - while (pp) { - switch ((nsc_power_ops_t)opc.msg) { - - case Power_OK: - (*pp->pw_power_ok)(); - break; - - case Power_Down: - (*pp->pw_power_down)(); - break; - - case Power_Lost: - (*pp->pw_power_lost)(opc.arg1); - break; - - default: - mutex_exit(&_power_mutex); - return (EINVAL); - } - - pp = pp->next; - } - mutex_exit(&_power_mutex); - return (0); -} - -/* - * int - * _nsc_init_power (void) - * Initialise power ioctl subsystem. - * - * Calling/Exit State: - * Called at driver initialisation time to allocate necessary - * data structures. - */ -int -_nsc_init_power(void) -{ - mutex_init(&_power_mutex, NULL, MUTEX_DRIVER, NULL); - return (0); -} - -/* - * int - * _nsc_deinit_power (void) - * Initialise power ioctl subsystem. - * - * Calling/Exit State: - * Called at driver initialisation time to allocate necessary - * data structures. - */ -int -_nsc_deinit_power(void) -{ - _nsc_power_t *pp, *npp; - - mutex_enter(&_power_mutex); - pp = _power_clients; - while (pp) { - npp = pp->next; - nsc_kmem_free(pp, sizeof (_nsc_power_t)); - pp = npp; - } - _power_clients = NULL; - mutex_exit(&_power_mutex); - mutex_destroy(&_power_mutex); - return (0); -} - -/* - * blind_t - * nsc_register_power (char *name, nsc_def_t *def) - * Register an power ioctl client. - * - * Calling/Exit State: - * Returns a token for use in future calls to nsc_unregister_power. - * If a client with the same name is already registered then NULL - * is return to indicate failure. - * If registration fails NULL is returned. - * - * Description: - * Registers an power ioctl client for notifications during subsequent - * ioctl from UPS/PCU management. - */ -blind_t -nsc_register_power(char *name, nsc_def_t *def) -{ - _nsc_power_t *entry, *pp; - - - entry = nsc_kmem_alloc(sizeof (_nsc_power_t), 0, _nsc_local_mem); - - if (entry == NULL) - return (NULL); - nsc_decode_param(def, _nsc_power_def, (long *)entry); - - mutex_enter(&_power_mutex); - - for (pp = _power_clients; pp; pp = pp->next) { - if (strcmp(pp->name, name) == 0) { - mutex_exit(&_power_mutex); - nsc_kmem_free(entry, sizeof (_nsc_power_t)); - return (NULL); - } - } - entry->name = name; - - entry->next = _power_clients; - _power_clients = entry; - mutex_exit(&_power_mutex); - return ((blind_t)entry); -} - -/* - * int - * nsc_unregister_power (blind_t powerp) - * Un-register a power ioctl client. - * - * Calling/Exit State: - * Returns 0 on success, otherwise returns an error code. - * - * Description: - * The specified power ioctl client is un-registered if possible. - * Zero is returned on success otherwise an error code. - */ -int -nsc_unregister_power(blind_t powerp) -{ - _nsc_power_t **xpp, *entry; - - entry = (_nsc_power_t *)powerp; - if (entry == NULL) - return (EINVAL); - - mutex_enter(&_power_mutex); - - for (xpp = &_power_clients; *xpp; xpp = &(*xpp)->next) - if (*xpp == entry) - break; - - if (*xpp == NULL) { - mutex_exit(&_power_mutex); - return (EALREADY); - } - *xpp = entry->next; - mutex_exit(&_power_mutex); - nsc_kmem_free(entry, sizeof (_nsc_power_t)); - - return (0); -} diff --git a/usr/src/uts/common/avs/ns/nsctl/nsc_power.h b/usr/src/uts/common/avs/ns/nsctl/nsc_power.h deleted file mode 100644 index 6284818aea..0000000000 --- a/usr/src/uts/common/avs/ns/nsctl/nsc_power.h +++ /dev/null @@ -1,53 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _NSC_POWER_H -#define _NSC_POWER_H - -#ifdef __cplusplus -extern "C" { -#endif - -/* - * Power ioctl definitions for Storage Device. - * This layout is common between 32 and 64 bits kernels. - */ - -typedef struct nsc_power_ctl_s { - int msg; /* power ioctl sub-opcode */ - int arg1; /* argument for the sub-opcode */ -} nsc_power_ctl_t; - -#ifdef _KERNEL -extern int _nsc_init_power(void); -extern int _nsc_deinit_power(void); -extern int _nsc_power(blind_t, int *); -#endif /* _KERNEL */ - -#ifdef __cplusplus -} -#endif - -#endif /* _NSC_POWER_H */ diff --git a/usr/src/uts/common/avs/ns/nsctl/nsc_resv.c b/usr/src/uts/common/avs/ns/nsctl/nsc_resv.c deleted file mode 100644 index 24ef8df98c..0000000000 --- a/usr/src/uts/common/avs/ns/nsctl/nsc_resv.c +++ /dev/null @@ -1,1014 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#include <sys/types.h> -#include <sys/ksynch.h> -#include <sys/kmem.h> -#include <sys/cmn_err.h> -#include <sys/errno.h> -#include <sys/ddi.h> - -#define __NSC_GEN__ -#include <sys/ncall/ncall.h> -#include "nsc_dev.h" -#include "../nsctl.h" -#ifdef DS_DDICT -#include "../contract.h" -#endif - - -static int _nsc_attach_fd(nsc_fd_t *, int); -static int _nsc_detach_owner(nsc_fd_t *, int); -static int _nsc_fd_fn(nsc_fd_t *, int (*)(), int, int); -static int _nsc_attach_iodev(nsc_iodev_t *, int); -static int _nsc_attach_dev(nsc_dev_t *, int); -static int _nsc_call_dev(nsc_dev_t *, blindfn_t, blind_t, - int *, int *, int, int, nsc_iodev_t *); - - -/* - * void - * _nsc_init_resv (void) - * Initialise reserve mechanism. - * - * Calling/Exit State: - * Called at initialisation time to allocate necessary - * data structures. - */ -void -_nsc_init_resv() -{ -} - - -/* - * void - * _nsc_deinit_resv (void) - * De-initialise reserve mechanism. - * - * Calling/Exit State: - * Called at unload time to de-allocate resources. - */ -void -_nsc_deinit_resv() -{ -} - - -/* - * int - * nsc_attach (nsc_fd_t *fd, int flag) - * Force attach of file descriptor. - * - * Calling/Exit State: - * Returns 0 if the attach succeeds, otherwise - * returns an error code. - * - * Description: - * Tries to attach the file descriptor by reserving - * and then releasing it. This is intended purely as - * a performance aid since there is no guarantee that - * the file descriptor will remain attached upon - * return. - */ -int -nsc_attach(fd, flag) -nsc_fd_t *fd; -int flag; -{ - int rc; - - rc = nsc_reserve(fd, flag); - - if (rc == 0) - nsc_release(fd); - - return (rc); -} - - -/* - * int - * nsc_reserve (nsc_fd_t *fd, int flag) - * Reserve file descriptor. - * - * Calling/Exit State: - * Returns 0 if the reserve succeeds, otherwise - * returns an error code. - * - * Description: - * Reserves the file descriptor for either NSC_READ or - * NSC_WRITE access. If neither is specified the mode - * with which the file was opened will be used. Trying - * to reserve a read only file in write mode will cause - * EACCES to be returned. - * - * If NSC_NOBLOCK is specifed and the reserve cannot be - * completed immediately, EAGAIN will be returned. - * - * If NSC_NOWAIT is set and the device is busy, EAGAIN - * will be returned. - * - * If NSC_TRY is set and the device is already reserved - * EAGAIN will be returned. - * - * If NSC_PCATCH is specified and a signal is received, - * the reserve will be terminated and EINTR returned. - * - * If NSC_MULTI is set then multiple reserves of the - * same type are permitted for the file descriptor. - */ -int -nsc_reserve(fd, flag) -nsc_fd_t *fd; -int flag; -{ - nsc_dev_t *dev = fd->sf_dev; - int rc, rw; - - if ((flag & NSC_READ) == 0) - flag |= (fd->sf_flag & NSC_RDWR); - - rw = (flag & NSC_RDWR); - if ((fd->sf_flag & rw) != rw) - return (EACCES); - - mutex_enter(&dev->nsc_lock); - - while ((rc = _nsc_attach_fd(fd, flag)) != 0) - if (rc != ERESTART) - break; - - if (!rc && !fd->sf_reserve++) { - fd->sf_aio = fd->sf_iodev->si_io; - fd->sf_mode = (flag & NSC_MULTI); - } - - mutex_exit(&dev->nsc_lock); - return (rc); -} - - -/* - * int - * nsc_reserve_lk (nsc_fd_t *fd) - * Reserve locked file descriptor. - * - * Calling/Exit State: - * The device lock must be held across calls to - * this function. - * - * Must be preceeded by a successful call to nsc_avail. - * - * Description: - * Reserves the file descriptor using the mode specified - * when the file was opened. This is only intended for - * use in performance critical situations. - */ -void -nsc_reserve_lk(fd) -nsc_fd_t *fd; -{ - fd->sf_reserve = 1; - fd->sf_aio = fd->sf_iodev->si_io; -} - - -/* - * int - * nsc_avail (nsc_fd_t *fd) - * Test if file descriptor is available. - * - * Calling/Exit State: - * The device lock must be held across calls to - * this function. - * - * Returns true if the file descriptor is available to - * be reserved using the mode specified when the file - * was opened. - * - * Description: - * This is only intended for use in performance critical - * situations in conjunction with nsc_reserve_lk. - */ -int -nsc_avail(fd) -nsc_fd_t *fd; -{ - int rw; - - if (!fd || fd->sf_pend || fd->sf_reserve || fd->sf_reopen) - return (0); - - if ((fd->sf_avail & _NSC_ATTACH) == 0) - return (0); - if ((fd->sf_avail & _NSC_PINNED) == 0) - return (0); - - rw = (fd->sf_flag & NSC_RDWR); - - return ((fd->sf_avail & rw) == rw); -} - - -/* - * int - * nsc_held (nsc_fd_t *fd) - * Test if file descriptor is reserved. - * - * Calling/Exit State: - * Returns true if the file descriptor is currently - * reserved. - */ -int -nsc_held(fd) -nsc_fd_t *fd; -{ - return ((fd) ? fd->sf_reserve : 1); -} - - -/* - * int - * nsc_waiting (nsc_fd_t *fd) - * Test if another client is waiting for this device. - * - * Calling/Exit State: - * Must be called with the file descriptor reserved. - * Returns true if another thread is waiting to reserve this device. - * - * Description: - * This is only intended for use in performance critical - * situations and inherently returns historical information. - */ -int -nsc_waiting(nsc_fd_t *fd) -{ - nsc_dev_t *dev; - - if (!fd || !nsc_held(fd)) - return (FALSE); - - dev = fd->sf_dev; - - return (dev->nsc_wait || dev->nsc_refcnt <= 0); -} - - -/* - * int - * nsc_release_lk (nsc_fd_t *fd) - * Release locked file descriptor. - * - * Calling/Exit State: - * The device lock must be held across calls to - * this function. - * - * Returns true if another node is waiting for the - * device and a call to nsc_detach should be made. - * - * Description: - * Releases the file descriptor. This is only intended - * for use in performance critical situations in - * conjunction with nsc_reserve_lk. - */ -int -nsc_release_lk(fd) -nsc_fd_t *fd; -{ - nsc_dev_t *dev = fd->sf_dev; - - fd->sf_reserve = 0; - fd->sf_aio = _nsc_null_io; - - if (dev->nsc_wait || dev->nsc_refcnt <= 0) - cv_broadcast(&dev->nsc_cv); - - return (dev->nsc_drop > 0); -} - - -/* - * int - * nsc_release (nsc_fd_t *fd) - * Release file descriptor. - * - * Description: - * Releases the file descriptor. If another node - * is waiting for the device it will be completely - * detached before returning. - */ -void -nsc_release(fd) -nsc_fd_t *fd; -{ - nsc_dev_t *dev = fd->sf_dev; - int rc; - - mutex_enter(&dev->nsc_lock); - - if (!fd->sf_reserve || --fd->sf_reserve) { - mutex_exit(&dev->nsc_lock); - return; - } - - fd->sf_aio = _nsc_null_io; - fd->sf_mode = 0; - - if (dev->nsc_wait || dev->nsc_refcnt <= 0) - cv_broadcast(&dev->nsc_cv); - - while (dev->nsc_drop > 0) { - rc = _nsc_detach_dev(dev, NULL, NSC_RDWR); - if (!rc || rc != ERESTART) - break; - } - - mutex_exit(&dev->nsc_lock); -} - - -/* - * int - * nsc_detach (nsc_fd_t *fd, int flag) - * Detach device from node. - * - * Calling/Exit State: - * Returns 0 if the reserve succeeds, otherwise - * returns an error code. - * - * Description: - * Detaches the device from the current node. If flag - * specifies read access then flush is called in preference - * to detach. - * - * If NSC_NOBLOCK is specifed and the detach cannot be - * completed immediately, EAGAIN will be returned. - * - * If NSC_TRY is set and the device is reserved, EAGAIN - * will be returned. - * - * If NSC_NOWAIT is set and the device is busy, EAGAIN - * will be returned. - * - * If NSC_PCATCH is specified and a signal is received, - * the reserve will be terminated and EINTR returned. - * - * If NSC_DEFER is set and the device is reserved, then - * the detach will be done on release. - */ -int -nsc_detach(fd, flag) -nsc_fd_t *fd; -int flag; -{ - nsc_dev_t *dev; - int rc; - - if (!fd) - return (0); - - dev = fd->sf_dev; - - if (flag & NSC_DEFER) - flag |= NSC_TRY; - if ((flag & NSC_READ) == 0) - flag |= NSC_RDWR; - - mutex_enter(&dev->nsc_lock); - - while ((rc = _nsc_detach_dev(dev, NULL, flag)) != 0) - if (rc != ERESTART) - break; - - if (rc == EAGAIN && (flag & NSC_DEFER)) - dev->nsc_drop = 1; - - mutex_exit(&dev->nsc_lock); - return (rc); -} - - -/* - * static int - * _nsc_attach_fd (nsc_fd_t *fd, int flag) - * Attach file descriptor. - * - * Calling/Exit State: - * The device lock must be held across calls to - * this function. - * - * Returns 0 if the attach succeeds without releasing - * the device lock, otherwise returns an error code. - * - * Description: - * Attach the specified file descriptor. Other file - * descriptors for the same I/O device will be flushed - * or detached first as necessary. - */ -static int -_nsc_attach_fd(fd, flag) -nsc_fd_t *fd; -int flag; -{ - nsc_dev_t *dev = fd->sf_dev; - int rw = (flag & NSC_RDWR); - nsc_iodev_t *iodev; - int rc, av; - - if (fd->sf_pend) - return (_nsc_wait_dev(dev, flag)); - - if (fd->sf_reopen) - if ((rc = _nsc_close_fd(fd, flag)) != 0) - return (rc); - - if (!fd->sf_iodev) - if ((rc = _nsc_open_fd(fd, flag)) != 0) - return (rc); - - iodev = fd->sf_iodev; - - if ((flag & fd->sf_mode & NSC_MULTI) && fd->sf_reserve) - if ((fd->sf_avail & rw) == rw && !iodev->si_rpend) - if (dev->nsc_drop == 0) - return (0); - - if (fd->sf_reserve) { - if (flag & NSC_TRY) - return (EAGAIN); - return (_nsc_wait_dev(dev, flag)); - } - - if (fd->sf_avail & _NSC_ATTACH) - if (fd->sf_avail & _NSC_PINNED) - if ((fd->sf_avail & rw) == rw) - return (0); - - if (iodev->si_rpend && !fd->sf_avail) - return (_nsc_wait_dev(dev, flag)); - - if ((rc = _nsc_detach_iodev(iodev, fd, flag)) != 0 || - (rc = _nsc_attach_iodev(iodev, flag)) != 0) - return (rc); - - if (!fd->sf_avail) { - fd->sf_avail = rw; - return (_nsc_fd_fn(fd, fd->sf_attach, _NSC_ATTACH, flag)); - } - - if ((fd->sf_avail & _NSC_PINNED) == 0) { - av = (fd->sf_avail | _NSC_PINNED); - - return _nsc_call_dev(dev, iodev->si_io->getpin, - fd->sf_cd, &fd->sf_avail, &fd->sf_pend, av, flag, NULL); - } - - fd->sf_avail |= rw; - return (0); -} - - -/* - * int - * _nsc_detach_fd (nsc_fd_t *fd, int flag) - * Detach file descriptor. - * - * Calling/Exit State: - * The device lock must be held across calls to - * this function. - * - * Returns 0 if the detach succeeds without releasing - * the device lock, otherwise returns an error code. - * - * Description: - * Detach the specified file descriptor. If flag - * specifies read access then flush is called in - * preference to detach. - */ -int -_nsc_detach_fd(fd, flag) -nsc_fd_t *fd; -int flag; -{ - nsc_dev_t *dev = fd->sf_dev; - int rc; - - if (fd->sf_pend == _NSC_CLOSE) - return (0); - - if (fd->sf_pend) - return (_nsc_wait_dev(dev, flag)); - - if (fd->sf_flush == nsc_null) - flag |= NSC_RDWR; - - if ((fd->sf_avail & NSC_RDWR) == 0) - if (!fd->sf_avail || !(flag & NSC_WRITE)) - return (0); - - if (fd->sf_reserve && fd->sf_owner) - if ((rc = _nsc_detach_owner(fd, flag)) != 0) - return (rc); - - if (fd->sf_reserve) { - if (flag & NSC_TRY) - return (EAGAIN); - return (_nsc_wait_dev(dev, flag)); - } - - if (flag & NSC_WRITE) { - if (fd->sf_iodev->si_busy) - return (_nsc_wait_dev(dev, flag)); - - return (_nsc_fd_fn(fd, fd->sf_detach, 0, flag)); - } - - return (_nsc_fd_fn(fd, fd->sf_flush, (fd->sf_avail & ~NSC_RDWR), flag)); -} - - -/* - * static int - * _nsc_detach_owner (nsc_fd_t *fd, int flag) - * Detach owner of file descriptor. - * - * Calling/Exit State: - * The device lock must be held across calls to - * this function. - * - * Returns 0 if the detach succeeds without releasing - * the device lock, otherwise returns an error code. - * - * Description: - * Detach the owner of the specified file descriptor. - * Wherever possible this is done without releasing - * the current device lock. - */ -static int -_nsc_detach_owner(fd, flag) -nsc_fd_t *fd; -int flag; -{ - nsc_dev_t *newdev = fd->sf_owner->si_dev; - nsc_dev_t *dev = fd->sf_dev; - int try; - int rc; - - if (newdev == dev) { - if ((rc = _nsc_detach_iodev(fd->sf_owner, NULL, flag)) == 0) - fd->sf_owner = NULL; - return (rc); - } - - if ((try = mutex_tryenter(&newdev->nsc_lock)) != 0) - if (!_nsc_detach_iodev(fd->sf_owner, NULL, - (flag | NSC_NOBLOCK))) { - mutex_exit(&newdev->nsc_lock); - return (0); - } - - if (flag & NSC_NOBLOCK) { - if (try != 0) - mutex_exit(&newdev->nsc_lock); - return (EAGAIN); - } - - fd->sf_pend = _NSC_OWNER; - mutex_exit(&dev->nsc_lock); - - if (try == 0) - mutex_enter(&newdev->nsc_lock); - - rc = _nsc_detach_iodev(fd->sf_owner, NULL, flag); - fd->sf_owner = NULL; - - mutex_exit(&newdev->nsc_lock); - - mutex_enter(&dev->nsc_lock); - fd->sf_pend = 0; - - if (dev->nsc_wait || dev->nsc_refcnt <= 0) - cv_broadcast(&dev->nsc_cv); - - return (rc ? rc : ERESTART); -} - - -/* - * static int - * _nsc_fd_fn (nsc_fd_t *fd, int (*fn)(), int a, int flag) - * Call function to attach/detach file descriptor. - * - * Calling/Exit State: - * The device lock must be held across calls to - * this function. - * - * Returns an error code if the operation failed, - * otherwise returns ERESTART to indicate that the - * device state has changed. - * - * Description: - * Sets up the active I/O module and calls the - * specified function. - */ -static int -_nsc_fd_fn(nsc_fd_t *fd, int (*fn)(), int a, int flag) -{ - int rc; - - fd->sf_aio = fd->sf_iodev->si_io; - - rc = _nsc_call_dev(fd->sf_dev, fn, fd->sf_arg, - &fd->sf_avail, &fd->sf_pend, a, flag, NULL); - - fd->sf_aio = _nsc_null_io; - return (rc); -} - - -/* - * static int - * _nsc_attach_iodev (nsc_iodev_t *iodev, int flag) - * Attach I/O device. - * - * Calling/Exit State: - * The device lock must be held across calls to - * this function. - * - * Returns 0 if the attach succeeds without releasing - * the device lock, otherwise returns an error code. - * - * Description: - * Attach the specified I/O device. Other I/O devices - * for the same device will be flushed or detached first - * as necessary. - * - * It is assumed that any valid cache descriptor for - * this device can be used to attach the I/O device. - */ -static int -_nsc_attach_iodev(iodev, flag) -nsc_iodev_t *iodev; -int flag; -{ - nsc_dev_t *dev = iodev->si_dev; - nsc_io_t *io = iodev->si_io; - int rc, rw; - - rw = (flag & NSC_RDWR); - - if (iodev->si_pend) - return (_nsc_wait_dev(dev, flag)); - - if (iodev->si_avail & _NSC_ATTACH) - if ((iodev->si_avail & rw) == rw) - return (0); - - if ((io->flag & NSC_FILTER) == 0) { - if (dev->nsc_rpend && !iodev->si_avail) - return (_nsc_wait_dev(dev, flag)); - - if ((rc = _nsc_detach_dev(dev, iodev, flag)) != 0 || - (rc = _nsc_attach_dev(dev, flag)) != 0) - return (rc); - } - - if (!iodev->si_avail) { - iodev->si_avail = rw; - - if (!iodev->si_open) { - cmn_err(CE_PANIC, - "nsctl: _nsc_attach_iodev: %p no fds", - (void *)iodev); - } - - return (_nsc_call_dev(dev, io->attach, iodev->si_open->sf_cd, - &iodev->si_avail, &iodev->si_pend, _NSC_ATTACH, - flag, iodev)); - } - - iodev->si_avail |= rw; - return (0); -} - - -/* - * int - * _nsc_detach_iodev (nsc_iodev_t *iodev, nsc_fd_t *keep, int flag) - * Detach I/O device. - * - * Calling/Exit State: - * The device lock must be held across calls to - * this function. - * - * Returns 0 if the detach succeeds without releasing - * the device lock, otherwise returns an error code. - * - * Description: - * Detach the specified I/O device except for file - * descriptor keep. If flag specifies read access then - * flush is called in preference to detach. - * - * It is assumed that any valid cache descriptor for - * this device can be used to detach the I/O device. - */ -int -_nsc_detach_iodev(nsc_iodev_t *iodev, nsc_fd_t *keep, int flag) -{ - nsc_dev_t *dev = iodev->si_dev; - nsc_io_t *io = iodev->si_io; - int (*fn)(), av, rc; - nsc_fd_t *fd; - - if (iodev->si_pend == _NSC_CLOSE) - return (0); - - if (iodev->si_pend) - return (_nsc_wait_dev(dev, flag)); - - if (!keep && io->flush == nsc_null) - flag |= NSC_RDWR; - - if ((iodev->si_avail & NSC_RDWR) == 0) - if (!iodev->si_avail || !(flag & NSC_WRITE)) - return (0); - - iodev->si_rpend++; - - for (fd = iodev->si_open; fd; fd = fd->sf_next) { - if (fd == keep) - continue; - - if ((rc = _nsc_detach_fd(fd, flag)) != 0) { - _nsc_wake_dev(dev, &iodev->si_rpend); - return (rc); - } - } - - _nsc_wake_dev(dev, &iodev->si_rpend); - - if (keep) - return (0); - - if (!iodev->si_open) { - cmn_err(CE_PANIC, - "nsctl: _nsc_detach_iodev: %p no fds", (void *)iodev); - } - - fn = (flag & NSC_WRITE) ? io->detach : io->flush; - av = (flag & NSC_WRITE) ? 0 : (iodev->si_avail & ~NSC_RDWR); - - return (_nsc_call_dev(dev, fn, iodev->si_open->sf_cd, - &iodev->si_avail, &iodev->si_pend, av, flag, iodev)); -} - - -/* - * static int - * _nsc_attach_dev (nsc_dev_t *dev, int flag) - * Attach device to node. - * - * Calling/Exit State: - * The device lock must be held across calls to - * this function. - * - * Returns 0 if the attach succeeds without releasing - * the device lock, otherwise returns an error code. - * - * Description: - * Attach the device to the current node. - */ -static int -_nsc_attach_dev(dev, flag) -nsc_dev_t *dev; -int flag; -{ - if (dev->nsc_pend) { - if (flag & NSC_TRY) - return (EAGAIN); - return (_nsc_wait_dev(dev, flag)); - } - - return (0); -} - - -/* - * int - * _nsc_detach_dev (nsc_dev_t *dev, nsc_iodev_t *keep, int flag) - * Detach device. - * - * Calling/Exit State: - * The device lock must be held across calls to - * this function. - * - * Returns 0 if the detach succeeds without releasing - * the device lock, otherwise returns an error code. - * - * Description: - * Detach the device except for I/O descriptor keep. - * If flag specifies read access then flush is called - * in preference to detach. If appropriate the device - * will be released for use by another node. - * - * All I/O devices are detached regardless of the - * current owner as a sanity check. - */ -int -_nsc_detach_dev(nsc_dev_t *dev, nsc_iodev_t *keep, int flag) -{ - nsc_iodev_t *iodev; - int rc = 0; - - if (dev->nsc_pend) { - if (flag & NSC_TRY) - return (EAGAIN); - return (_nsc_wait_dev(dev, flag)); - } - - dev->nsc_rpend++; - - for (iodev = dev->nsc_list; iodev; iodev = iodev->si_next) { - if (iodev == keep) - continue; - if (iodev->si_io->flag & NSC_FILTER) - continue; - - if ((rc = _nsc_detach_iodev(iodev, NULL, flag)) != 0) - break; - } - - _nsc_wake_dev(dev, &dev->nsc_rpend); - - if (keep || !(flag & NSC_WRITE)) - return (rc); - if (rc == EAGAIN || rc == ERESTART) - return (rc); - - dev->nsc_drop = 0; - - return (rc); -} - - -/* - * static int - * _nsc_call_dev (nsc_dev_t *dev, blindfn_t fn, blind_t arg, - * *int *ap, int *pp, int a, int flag, nsc_iodev_t *iodev) - * Call attach/detach function. - * - * Calling/Exit State: - * The device lock must be held across calls to this - * this function. - * - * Returns an error code if the operation failed, - * otherwise returns ERESTART to indicate that the - * device state has changed. - * - * The flags pointed to by ap are updated to reflect - * availability based upon argument a. The pending - * flag pointed to by pp is set whilst the operation - * is in progress. - * - * Description: - * Marks the device busy, temporarily releases the - * device lock and calls the specified function with - * the given argument. - * - * If a detach is being performed then clear _NSC_ATTACH - * first to prevent pinned data callbacks. If the detach - * fails then clear _NSC_PINNED and indicate that a flush - * is required by setting NSC_READ. - */ -static int -_nsc_call_dev(nsc_dev_t *dev, blindfn_t fn, blind_t arg, int *ap, int *pp, - int a, int flag, nsc_iodev_t *iodev) -{ - int rc = 0, v = *ap; - - if (flag & NSC_NOBLOCK) - if (fn != nsc_null) - return (EAGAIN); - - if (!a && v) - *ap = (v & ~_NSC_ATTACH) | NSC_READ; - - if (fn != nsc_null) { - *pp = (a) ? a : _NSC_DETACH; - mutex_exit(&dev->nsc_lock); - - rc = (*fn)(arg, iodev); - - mutex_enter(&dev->nsc_lock); - *pp = 0; - } - - if (dev->nsc_wait || dev->nsc_refcnt <= 0) - cv_broadcast(&dev->nsc_cv); - - if (rc) { - if (!a && v) - a = (v & ~_NSC_PINNED) | NSC_READ; - else if (v & _NSC_ATTACH) - a = v; - else - a = 0; - } - - *ap = a; - return (rc ? rc : ERESTART); -} - - -/* - * int - * _nsc_wait_dev (nsc_dev_t *dev, int flag) - * Wait for device state to change. - * - * Calling/Exit State: - * Must be called with the device lock held. - * Returns EAGAIN if NSC_NOBLOCK or NSC_NOWAIT is set, - * or EINTR if the wait was interrupted, otherwise - * returns ERESTART to indicate that the device state - * has changed. - * - * Description: - * Waits for the device state to change before resuming. - * - * Remarks: - * If the reference count on the device has dropped to - * zero then cv_broadcast is called to wakeup _nsc_free_dev. - */ -int -_nsc_wait_dev(dev, flag) -nsc_dev_t *dev; -int flag; -{ - int rc = 1; - - if (flag & (NSC_NOBLOCK | NSC_NOWAIT)) - return (EAGAIN); - - dev->nsc_wait++; - - if (flag & NSC_PCATCH) - rc = cv_wait_sig(&dev->nsc_cv, &dev->nsc_lock); - else - cv_wait(&dev->nsc_cv, &dev->nsc_lock); - - dev->nsc_wait--; - - if (dev->nsc_refcnt <= 0) - cv_broadcast(&dev->nsc_cv); - - return ((rc == 0) ? EINTR : ERESTART); -} - - -/* - * void - * _nsc_wake_dev (nsc_dev_t *dev, int *valp) - * Decrement value and wakeup device. - * - * Calling/Exit State: - * The device lock must be held across calls to - * this function. - * - * Description: - * Decrements the indicated value and if appropriate - * wakes up anybody waiting on the device. - */ -void -_nsc_wake_dev(dev, valp) -nsc_dev_t *dev; -int *valp; -{ - if (--(*valp)) - return; - - if (dev->nsc_wait || dev->nsc_refcnt <= 0) - cv_broadcast(&dev->nsc_cv); -} diff --git a/usr/src/uts/common/avs/ns/nsctl/nsc_rmspin.c b/usr/src/uts/common/avs/ns/nsctl/nsc_rmspin.c deleted file mode 100644 index bae65bbcbe..0000000000 --- a/usr/src/uts/common/avs/ns/nsctl/nsc_rmspin.c +++ /dev/null @@ -1,270 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#include <sys/types.h> -#include <sys/debug.h> -#include <sys/ksynch.h> -#include <sys/cmn_err.h> -#include <sys/kmem.h> -#include <sys/ddi.h> - -#define __NSC_GEN__ -#include "nsc_gen.h" -#include "nsc_mem.h" -#include "nsc_rmspin.h" -#include "../nsctl.h" - - -static kmutex_t _nsc_rmspin_slp; - -nsc_rmlock_t _nsc_lock_top; -kmutex_t _nsc_global_lock; -int _nsc_global_lock_init; - -extern nsc_mem_t *_nsc_local_mem; - -/* - * void - * _nsc_init_rmlock (void) - * Initialise global locks. - * - * Calling/Exit State: - * Called at driver initialisation time to allocate necessary - * data structures. - */ -void -_nsc_init_rmlock() -{ - mutex_init(&_nsc_rmspin_slp, NULL, MUTEX_DRIVER, NULL); - - _nsc_lock_top.next = _nsc_lock_top.prev = &_nsc_lock_top; - - mutex_init(&_nsc_global_lock, NULL, MUTEX_DRIVER, NULL); - _nsc_global_lock_init = 1; -} - - -/* - * void - * _nsc_deinit_rmlock (void) - * De-initialise global locks. - * - * Calling/Exit State: - * Called at driver unload time to de-allocate - * resources. - */ -void -_nsc_deinit_rmlock() -{ - _nsc_global_lock_init = 0; - mutex_destroy(&_nsc_global_lock); - - ASSERT(_nsc_lock_top.next == &_nsc_lock_top); - ASSERT(_nsc_lock_top.prev == &_nsc_lock_top); - - mutex_destroy(&_nsc_rmspin_slp); -} - - -/* - * int - * _nsc_lock_all_rm (void) - * Take all global locks in address order. - * - * Calling/Exit State: - * Returns 0 if _nsc_unlock_all_rm() should be called, or -1. - */ -int -_nsc_lock_all_rm() -{ - nsc_rmlock_t *lp; - - mutex_enter(&_nsc_rmspin_slp); - - for (lp = _nsc_lock_top.next; lp != &_nsc_lock_top; lp = lp->next) { - (void) nsc_rm_lock(lp); - } - - return (0); -} - - -/* - * void - * _nsc_unlock_all_rm (void) - * Release all global locks in reverse address order. - * - * Calling/Exit State: - */ -void -_nsc_unlock_all_rm() -{ - nsc_rmlock_t *lp; - - for (lp = _nsc_lock_top.prev; lp != &_nsc_lock_top; lp = lp->prev) { - nsc_rm_unlock(lp); - } - - mutex_exit(&_nsc_rmspin_slp); -} - - -/* - * nsc_rmlock_t * - * nsc_rm_lock_alloc(char *name, int flag, void *arg) - * Allocate and initialise a global lock. - * - * Calling/Exit State: - * The 'flag' parameter should be either KM_SLEEP or KM_NOSLEEP, - * depending on whether the caller is willing to sleep while memory - * is allocated or not. - * - * The 'arg' parameter is passed directly to the underlying - * mutex_init(9f) function call. - * - * Returns NULL if lock cannot be allocated. - */ -nsc_rmlock_t * -nsc_rm_lock_alloc(char *name, int flag, void *arg) -{ - nsc_rmlock_t *lp, *lk; - - if ((lk = (nsc_rmlock_t *)nsc_kmem_zalloc(sizeof (*lk), - flag, _nsc_local_mem)) == NULL) - return (NULL); - - mutex_init(&lk->lockp, NULL, MUTEX_DRIVER, arg); - - mutex_enter(&_nsc_rmspin_slp); - - for (lp = _nsc_lock_top.next; lp != &_nsc_lock_top; lp = lp->next) - if (strcmp(lp->name, name) == 0) - break; - - if (lp != &_nsc_lock_top) { - mutex_exit(&_nsc_rmspin_slp); - - mutex_destroy(&lk->lockp); - nsc_kmem_free(lk, sizeof (*lk)); - - cmn_err(CE_WARN, "!nsctl: rmlock double allocation (%s)", name); - return (NULL); - } - - lk->name = name; - - lk->next = _nsc_lock_top.next; - lk->prev = &_nsc_lock_top; - _nsc_lock_top.next = lk; - lk->next->prev = lk; - - mutex_exit(&_nsc_rmspin_slp); - - return (lk); -} - - -/* - * void - * nsc_rm_lock_destroy(nsc_rmlock_t *rmlockp) - * Release the global lock. - * - * Remarks: - * The specified global lock is released and made - * available for reallocation. - */ -void -nsc_rm_lock_dealloc(rmlockp) -nsc_rmlock_t *rmlockp; -{ - if (!rmlockp) - return; - - mutex_enter(&_nsc_rmspin_slp); - - rmlockp->next->prev = rmlockp->prev; - rmlockp->prev->next = rmlockp->next; - - if (rmlockp->child) { - cmn_err(CE_WARN, "!nsctl: rmlock destroyed when locked (%s)", - rmlockp->name); - nsc_do_unlock(rmlockp->child); - rmlockp->child = NULL; - } - - mutex_destroy(&rmlockp->lockp); - mutex_exit(&_nsc_rmspin_slp); - - nsc_kmem_free(rmlockp, sizeof (*rmlockp)); -} - - -/* - * void - * nsc_rm_lock(nsc_rmlock_t *rmlockp) - * Acquire a global lock. - * - * Calling/Exit State: - * rmlockp is the lock to be acquired. - * Returns 0 (success) or errno. Lock is not acquired if rc != 0. - */ -int -nsc_rm_lock(nsc_rmlock_t *rmlockp) -{ - int rc; - - mutex_enter(&rmlockp->lockp); - - ASSERT(! rmlockp->child); - - /* always use a write-lock */ - rc = nsc_do_lock(1, &rmlockp->child); - if (rc) { - rmlockp->child = NULL; - mutex_exit(&rmlockp->lockp); - } - - return (rc); -} - - -/* - * static void - * nsc_rm_unlock(nsc_rmlock_t *rmlockp) - * Unlock a global lock. - * - * Calling/Exit State: - * rmlockp is the lock to be released. - */ -void -nsc_rm_unlock(nsc_rmlock_t *rmlockp) -{ - if (rmlockp->child) { - ASSERT(MUTEX_HELD(&rmlockp->lockp)); - nsc_do_unlock(rmlockp->child); - rmlockp->child = NULL; - mutex_exit(&rmlockp->lockp); - } -} diff --git a/usr/src/uts/common/avs/ns/nsctl/nsc_rmspin.h b/usr/src/uts/common/avs/ns/nsctl/nsc_rmspin.h deleted file mode 100644 index 27051669ff..0000000000 --- a/usr/src/uts/common/avs/ns/nsctl/nsc_rmspin.h +++ /dev/null @@ -1,53 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _NSC_RMSPIN_H -#define _NSC_RMSPIN_H - -#ifdef __cplusplus -extern "C" { -#endif - -#ifndef __NSC_GEN__ -Error: Illegal #include - private file. -#endif - -typedef struct nsc_rmlock_s { - struct nsc_rmlock_s *next; - struct nsc_rmlock_s *prev; - char *name; - void *child; - kmutex_t lockp; -} nsc_rmlock_t; - - -extern int nsc_do_lock(int, void **); -extern void nsc_do_unlock(void *); - -#ifdef __cplusplus -} -#endif - -#endif /* _NSC_RMSPIN_H */ diff --git a/usr/src/uts/common/avs/ns/nsctl/nsc_trap.c b/usr/src/uts/common/avs/ns/nsctl/nsc_trap.c deleted file mode 100644 index e66a04b71e..0000000000 --- a/usr/src/uts/common/avs/ns/nsctl/nsc_trap.c +++ /dev/null @@ -1,81 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#include <sys/ddi.h> -#include <sys/sunddi.h> - -#ifdef DS_DDICT -#include "../contract.h" -#endif - -#define SVE_STE_CLASS "SVE_STE" -#define SVE_II_CLASS "SVE_II" -#define SVE_CACHE_CLASS "SVE_CACHE" - -void -nsc_do_sysevent(char *driver_name, char *trap_messages, int errorno, - int alertlevel, char *component, dev_info_t *info_dip) -{ -#if !defined(DS_DDICT) && !defined(_SunOS_5_6) && \ - !defined(_SunOS_5_7) && !defined(_SunOS_5_8) - - nvlist_t *attr_list; - int rc; - - attr_list = NULL; - rc = nvlist_alloc(&attr_list, NV_UNIQUE_NAME_TYPE, KM_SLEEP); - if (rc != 0) { - goto out; - } - rc = nvlist_add_int32(attr_list, "alertlevel", alertlevel); - if (rc != 0) { - goto out; - } - rc = nvlist_add_string(attr_list, "messagevalue", trap_messages); - if (rc != 0) { - goto out; - } - rc = nvlist_add_int32(attr_list, "errorno", errorno); - if (rc != 0) { - goto out; - } - if (strcmp(driver_name, "sdbc") == 0) - rc = ddi_log_sysevent(info_dip, DDI_VENDOR_SUNW, - SVE_CACHE_CLASS, component, attr_list, NULL, DDI_SLEEP); - else if (strcmp(driver_name, "ste") == 0) - rc = ddi_log_sysevent(info_dip, DDI_VENDOR_SUNW, - SVE_STE_CLASS, component, attr_list, NULL, DDI_SLEEP); - else if (strcmp(driver_name, "ii") == 0) - rc = ddi_log_sysevent(info_dip, DDI_VENDOR_SUNW, - SVE_II_CLASS, component, attr_list, NULL, DDI_SLEEP); -out: - nvlist_free(attr_list); - - if (rc != 0) { - cmn_err(CE_WARN, "!%s: unable to log sysevent %d:%s and %d", - driver_name, errorno, trap_messages, alertlevel); - } -#endif /* which O/S? */ -} diff --git a/usr/src/uts/common/avs/ns/nsctl/nsctl.c b/usr/src/uts/common/avs/ns/nsctl/nsctl.c deleted file mode 100644 index b76e12bf33..0000000000 --- a/usr/src/uts/common/avs/ns/nsctl/nsctl.c +++ /dev/null @@ -1,923 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - * Copyright 2012 Milan Jurik. All rights reserved. - */ - -#include <sys/types.h> -#include <sys/ksynch.h> -#include <sys/kmem.h> -#include <sys/file.h> -#include <sys/errno.h> -#include <sys/open.h> -#include <sys/cred.h> -#include <sys/conf.h> -#include <sys/uio.h> -#include <sys/cmn_err.h> -#include <sys/modctl.h> -#include <sys/ddi.h> - -#define __NSC_GEN__ -#include <sys/nsctl/nsc_dev.h> -#include <sys/nsctl/nsc_gen.h> -#include <sys/nsctl/nsc_ioctl.h> -#include <sys/nsctl/nsc_power.h> -#include <sys/nsctl/nsc_mem.h> -#include "../nsctl.h" - -#include <sys/nsctl/nsvers.h> - -#ifdef DS_DDICT -#include "../contract.h" -#endif - -extern void nscsetup(); -extern int _nsc_init_raw(int); -extern void _nsc_deinit_raw(); -extern void _nsc_init_start(); -extern void _nsc_init_os(), _nsc_deinit_os(); -extern void _nsc_init_dev(), _nsc_init_mem(); -extern void _nsc_init_gen(), _nsc_init_rmlock(); -extern void _nsc_init_resv(), _nsc_deinit_resv(); -extern void _nsc_init_frz(), _nsc_deinit_frz(); -extern void _nsc_init_ncio(), _nsc_deinit_ncio(); -extern void _nsc_deinit_mem(), _nsc_deinit_rmlock(); -extern void _nsc_deinit_dev(); - -extern int _nsc_frz_start(char *, int *); -extern int _nsc_frz_stop(char *, int *); -extern int _nsc_frz_isfrozen(char *, int *); - -extern nsc_mem_t *_nsc_local_mem; -extern nsc_rmhdr_t *_nsc_rmhdr_ptr; -extern nsc_def_t _nsc_raw_def[]; -extern int _nsc_raw_flags; - -int nsc_devflag = D_MP; - -int _nsc_init_done = 0; - -kmutex_t _nsc_drv_lock; -nsc_io_t *_nsc_file_io; -nsc_io_t *_nsc_vchr_io; -nsc_io_t *_nsc_raw_io; - -nsc_fd_t **_nsc_minor_fd; -kmutex_t **_nsc_minor_slp; - - -/* Maximum number of devices - tunable in nsctl.conf */ -static int _nsc_max_devices; - -/* Internal version of _nsc_max_devices */ -int _nsc_maxdev; - -extern void _nsc_global_setup(void); - -static int nsc_load(), nsc_unload(); -static void nscteardown(); - -/* - * Solaris specific driver module interface code. - */ - -extern int nscopen(dev_t *, int, int, cred_t *); -extern int nscioctl(dev_t, int, intptr_t, int, cred_t *, int *); -extern int nscclose(dev_t, int, int, cred_t *); -extern int nscread(dev_t, uio_t *, cred_t *); -extern int nscwrite(dev_t, uio_t *, cred_t *); - -static dev_info_t *nsctl_dip; /* Single DIP for driver */ - -static int _nsctl_print(dev_t, char *); - -static struct cb_ops nsctl_cb_ops = { - nscopen, /* open */ - nscclose, /* close */ - nodev, /* not a block driver, strategy not an entry point */ - _nsctl_print, /* no print routine */ - nodev, /* no dump routine */ - nscread, /* read */ - nscwrite, /* write */ - (int (*)()) nscioctl, /* ioctl */ - nodev, /* no devmap routine */ - nodev, /* no mmap routine */ - nodev, /* no segmap routine */ - nochpoll, /* no chpoll routine */ - ddi_prop_op, - 0, /* not a STREAMS driver, no cb_str routine */ - D_NEW | D_MP | D_64BIT, /* safe for multi-thread/multi-processor */ - CB_REV, - nodev, /* aread */ - nodev, /* awrite */ -}; - -static int _nsctl_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **); -static int _nsctl_attach(dev_info_t *, ddi_attach_cmd_t); -static int _nsctl_detach(dev_info_t *, ddi_detach_cmd_t); - -static struct dev_ops nsctl_ops = { - DEVO_REV, /* Driver build version */ - 0, /* device reference count */ - _nsctl_getinfo, - nulldev, /* Identify */ - nulldev, /* Probe */ - _nsctl_attach, - _nsctl_detach, - nodev, /* Reset */ - &nsctl_cb_ops, - (struct bus_ops *)0 -}; - -static struct modldrv nsctl_ldrv = { - &mod_driverops, - "nws:Control:" ISS_VERSION_STR, - &nsctl_ops -}; - -static struct modlinkage nsctl_modlinkage = { - MODREV_1, - &nsctl_ldrv, - NULL -}; - -/* - * Solaris module load time code - */ - -int nsc_min_nodeid; -int nsc_max_nodeid; - -int -_init(void) -{ - int err; - - err = nsc_load(); - - if (!err) - err = mod_install(&nsctl_modlinkage); - - if (err) { - (void) nsc_unload(); - cmn_err(CE_NOTE, "!nsctl_init: err %d", err); - } - - return (err); - -} - -/* - * Solaris module unload time code - */ - -int -_fini(void) -{ - int err; - - if ((err = mod_remove(&nsctl_modlinkage)) == 0) { - err = nsc_unload(); - } - return (err); -} - -/* - * Solaris module info code - */ -int -_info(struct modinfo *modinfop) -{ - return (mod_info(&nsctl_modlinkage, modinfop)); -} - -/* - * Attach an instance of the device. This happens before an open - * can succeed. - */ -static int -_nsctl_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) -{ - int rc; - - if (cmd == DDI_ATTACH) { - nsctl_dip = dip; - - /* Announce presence of the device */ - ddi_report_dev(dip); - - /* - * Get the node parameters now that we can look up. - */ - nsc_min_nodeid = ddi_prop_get_int(DDI_DEV_T_ANY, dip, - DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, - "nsc_min_nodeid", 0); - - nsc_max_nodeid = ddi_prop_get_int(DDI_DEV_T_ANY, dip, - DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, - "nsc_max_nodeid", 5); - - _nsc_max_devices = ddi_prop_get_int(DDI_DEV_T_ANY, dip, - DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, - "nsc_max_devices", 128); - - _nsc_maxdev = _nsc_max_devices; - nscsetup(); - - /* - * Init raw requires the _nsc_max_devices value and so - * cannot be done before the nsc_max_devices property has - * been read which can only be done after the module is - * attached and we have a dip. - */ - - if ((rc = _nsc_init_raw(_nsc_max_devices)) != 0) { - cmn_err(CE_WARN, - "!nsctl: unable to initialize raw io provider: %d", - rc); - return (DDI_FAILURE); - } - - /* - * Init rest of soft state structure - */ - - rc = ddi_create_minor_node(dip, "c,nsctl", S_IFCHR, 0, - DDI_PSEUDO, 0); - if (rc != DDI_SUCCESS) { - /* free anything we allocated here */ - cmn_err(CE_WARN, - "!_nsctl_attach: ddi_create_minor_node failed %d", - rc); - return (DDI_FAILURE); - } - - /* Announce presence of the device */ - ddi_report_dev(dip); - - /* mark the device as attached, opens may proceed */ - return (DDI_SUCCESS); - } else - return (DDI_FAILURE); -} - -static int -_nsctl_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) -{ - if (cmd == DDI_DETACH) { - nscteardown(); - _nsc_deinit_raw(); - - ddi_remove_minor_node(dip, NULL); - nsctl_dip = NULL; - - return (DDI_SUCCESS); - } - else - return (DDI_FAILURE); -} - - -/* ARGSUSED */ -static int -_nsctl_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result) -{ - dev_t dev; - int rc; - - switch (cmd) { - case DDI_INFO_DEVT2INSTANCE: - /* The "instance" number is the minor number */ - dev = (dev_t)arg; - *result = (void *)(unsigned long)getminor(dev); - rc = DDI_SUCCESS; - break; - - case DDI_INFO_DEVT2DEVINFO: - *result = nsctl_dip; - rc = DDI_SUCCESS; - break; - - default: - rc = DDI_FAILURE; - break; - } - - return (rc); -} - - -/* ARGSUSED */ -static int -_nsctl_print(dev_t dev, char *s) -{ - cmn_err(CE_WARN, "!nsctl:%s", s); - return (0); -} - - -void -nsc_init() -{ - if (_nsc_init_done) - return; - - _nsc_init_start(); - _nsc_init_gen(); - _nsc_init_svc(); - _nsc_init_mem(); - _nsc_init_dev(); - _nsc_init_rmlock(); - _nsc_init_resv(); - _nsc_init_os(); - (void) _nsc_init_power(); - - /* - * When using mc, nscsetup is done through mc callback to global_init. - */ - nscsetup(); - - mutex_init(&_nsc_drv_lock, NULL, MUTEX_DRIVER, NULL); - - _nsc_raw_io = nsc_register_io("raw", - NSC_RAW_ID | _nsc_raw_flags, _nsc_raw_def); - - if (!_nsc_raw_io) - cmn_err(CE_WARN, "!_nsc_init: register io failed - raw"); - - _nsc_init_ncio(); - _nsc_init_frz(); - - _nsc_init_done = 1; -} - - -/* - * Called after the mc refresh is complete (SEG_INIT callbacks have - * been received) and module _attach() is done. Only does any real - * work when all of the above conditions have been met. - */ -void -nscsetup() -{ - if (nsc_max_devices() == 0 || _nsc_minor_fd != NULL) - return; - - _nsc_minor_fd = nsc_kmem_zalloc(sizeof (nsc_fd_t *)*_nsc_maxdev, - 0, _nsc_local_mem); - - if (!_nsc_minor_fd) { - cmn_err(CE_WARN, "!nscsetup - alloc failed"); - return; - } - - _nsc_minor_slp = nsc_kmem_zalloc(sizeof (kmutex_t *)*_nsc_maxdev, - 0, _nsc_local_mem); - - if (!_nsc_minor_slp) { - cmn_err(CE_WARN, "!nscsetup - alloc failed"); - nsc_kmem_free(_nsc_minor_fd, sizeof (nsc_fd_t *) * _nsc_maxdev); - _nsc_minor_fd = (nsc_fd_t **)NULL; - } -} - -static void -nscteardown() -{ - int i; - - if (_nsc_minor_fd == NULL) - return; - -#ifdef DEBUG - /* Check all devices were closed. Index 0 is the prototype dev. */ - for (i = 1; i < _nsc_maxdev; i++) { - ASSERT(_nsc_minor_slp[i] == NULL); - ASSERT(_nsc_minor_fd[i] == NULL); - } -#endif /* DEBUG */ - - nsc_kmem_free(_nsc_minor_fd, sizeof (nsc_fd_t *) * _nsc_maxdev); - nsc_kmem_free(_nsc_minor_slp, sizeof (kmutex_t *) * _nsc_maxdev); - - _nsc_minor_fd = (nsc_fd_t **)NULL; - _nsc_minor_slp = (kmutex_t **)NULL; -} - -int -nsc_load() -{ - nsc_init(); - return (0); -} - - -int -nsc_unload() -{ - if (!_nsc_init_done) { - return (0); - } - - nscteardown(); - - (void) _nsc_deinit_power(); - _nsc_deinit_resv(); - _nsc_deinit_mem(); - _nsc_deinit_rmlock(); - _nsc_deinit_svc(); - _nsc_deinit_frz(); - _nsc_deinit_ncio(); - - if (_nsc_vchr_io) - (void) nsc_unregister_io(_nsc_vchr_io, 0); - - if (_nsc_file_io) - (void) nsc_unregister_io(_nsc_file_io, 0); - - _nsc_vchr_io = NULL; - _nsc_file_io = NULL; - - if (_nsc_raw_io) - (void) nsc_unregister_io(_nsc_raw_io, 0); - - _nsc_raw_io = NULL; - - _nsc_deinit_dev(); - _nsc_deinit_os(); - - _nsc_init_done = 0; - return (0); -} - - -/* ARGSUSED */ - -int -nscopen(dev_t *devp, int flag, int otyp, cred_t *crp) -{ - kmutex_t *slp; - int i, error; - - if (error = drv_priv(crp)) - return (error); - - if (!_nsc_minor_fd || !_nsc_minor_slp) - return (ENXIO); - - if (getminor(*devp) != 0) - return (ENXIO); - - slp = nsc_kmem_alloc(sizeof (kmutex_t), 0, _nsc_local_mem); - mutex_init(slp, NULL, MUTEX_DRIVER, NULL); - - mutex_enter(&_nsc_drv_lock); - - for (i = 1; i < _nsc_maxdev; i++) { - if (_nsc_minor_slp[i] == NULL) { - _nsc_minor_slp[i] = slp; - break; - } - } - - mutex_exit(&_nsc_drv_lock); - - if (i >= _nsc_maxdev) { - mutex_destroy(slp); - nsc_kmem_free(slp, sizeof (kmutex_t)); - return (EAGAIN); - } - - *devp = makedevice(getmajor(*devp), i); - - return (0); -} - - -int -_nscopen(dev_t dev, intptr_t arg, int mode, int *rvp) -{ - minor_t mindev = getminor(dev); - struct nscioc_open *op; - nsc_fd_t *fd; - int rc; - - op = nsc_kmem_alloc(sizeof (*op), KM_SLEEP, _nsc_local_mem); - if (op == NULL) { - return (ENOMEM); - } - - if (ddi_copyin((void *)arg, op, sizeof (*op), mode) < 0) { - nsc_kmem_free(op, sizeof (*op)); - return (EFAULT); - } - - mutex_enter(_nsc_minor_slp[mindev]); - - if (_nsc_minor_fd[mindev]) { - mutex_exit(_nsc_minor_slp[mindev]); - nsc_kmem_free(op, sizeof (*op)); - return (EBUSY); - } - - op->path[sizeof (op->path)-1] = 0; - - fd = nsc_open(op->path, (op->flag & NSC_TYPES), 0, 0, &rc); - - if (fd == NULL) { - mutex_exit(_nsc_minor_slp[mindev]); - nsc_kmem_free(op, sizeof (*op)); - return (rc); - } - - mode |= (op->mode - FOPEN); - - if (mode & (FWRITE|FEXCL)) { - if ((rc = nsc_reserve(fd, NSC_PCATCH)) != 0) { - mutex_exit(_nsc_minor_slp[mindev]); - (void) nsc_close(fd); - nsc_kmem_free(op, sizeof (*op)); - return (rc); - } - } - - *rvp = 0; - _nsc_minor_fd[mindev] = fd; - - mutex_exit(_nsc_minor_slp[mindev]); - nsc_kmem_free(op, sizeof (*op)); - return (0); -} - - -/* ARGSUSED */ - -int -nscclose(dev_t dev, int flag, int otyp, cred_t *crp) -{ - minor_t mindev = getminor(dev); - kmutex_t *slp; - nsc_fd_t *fd; - - if (!_nsc_minor_fd || !_nsc_minor_slp) - return (0); - - if ((slp = _nsc_minor_slp[mindev]) == 0) - return (0); - - if ((fd = _nsc_minor_fd[mindev]) != NULL) - (void) nsc_close(fd); - - _nsc_minor_fd[mindev] = NULL; - _nsc_minor_slp[mindev] = NULL; - - mutex_destroy(slp); - nsc_kmem_free(slp, sizeof (kmutex_t)); - return (0); -} - - -/* ARGSUSED */ - -int -nscread(dev_t dev, uio_t *uiop, cred_t *crp) -{ - minor_t mindev = getminor(dev); - int rc, resv; - nsc_fd_t *fd; - - if ((fd = _nsc_minor_fd[mindev]) == 0) - return (EIO); - - mutex_enter(_nsc_minor_slp[mindev]); - - resv = (nsc_held(fd) == 0); - - if (resv && (rc = nsc_reserve(fd, NSC_PCATCH)) != 0) { - mutex_exit(_nsc_minor_slp[mindev]); - return (rc); - } - - rc = nsc_uread(fd, uiop, crp); - - if (resv) - nsc_release(fd); - - mutex_exit(_nsc_minor_slp[mindev]); - return (rc); -} - - -/* ARGSUSED */ - -int -nscwrite(dev_t dev, uio_t *uiop, cred_t *crp) -{ - minor_t mindev = getminor(dev); - int rc, resv; - nsc_fd_t *fd; - - if ((fd = _nsc_minor_fd[mindev]) == 0) - return (EIO); - - mutex_enter(_nsc_minor_slp[mindev]); - - resv = (nsc_held(fd) == 0); - - if (resv && (rc = nsc_reserve(fd, NSC_PCATCH)) != 0) { - mutex_exit(_nsc_minor_slp[mindev]); - return (rc); - } - - rc = nsc_uwrite(fd, uiop, crp); - - if (resv) - nsc_release(fd); - - mutex_exit(_nsc_minor_slp[mindev]); - return (rc); -} - - -int -_nscreserve(dev_t dev, int *rvp) -{ - minor_t mindev = getminor(dev); - nsc_fd_t *fd; - int rc; - - if ((fd = _nsc_minor_fd[mindev]) == 0) - return (EIO); - - mutex_enter(_nsc_minor_slp[mindev]); - - if (nsc_held(fd)) { - mutex_exit(_nsc_minor_slp[mindev]); - return (EBUSY); - } - - if ((rc = nsc_reserve(fd, NSC_PCATCH)) != 0) { - mutex_exit(_nsc_minor_slp[mindev]); - return (rc); - } - - *rvp = 0; - - mutex_exit(_nsc_minor_slp[mindev]); - return (0); -} - - -int -_nscrelease(dev_t dev, int *rvp) -{ - minor_t mindev = getminor(dev); - nsc_fd_t *fd; - - if ((fd = _nsc_minor_fd[mindev]) == 0) - return (EIO); - - mutex_enter(_nsc_minor_slp[mindev]); - - if (!nsc_held(fd)) { - mutex_exit(_nsc_minor_slp[mindev]); - return (EINVAL); - } - - nsc_release(fd); - - *rvp = 0; - - mutex_exit(_nsc_minor_slp[mindev]); - return (0); -} - - -int -_nscpartsize(dev_t dev, intptr_t arg, int mode) -{ - struct nscioc_partsize partsize; - minor_t mindev = getminor(dev); - nsc_size_t size; - int rc, resv; - nsc_fd_t *fd; - - if ((fd = _nsc_minor_fd[mindev]) == 0) - return (EIO); - - mutex_enter(_nsc_minor_slp[mindev]); - - resv = (nsc_held(fd) == 0); - - if (resv && (rc = nsc_reserve(fd, NSC_PCATCH)) != 0) { - mutex_exit(_nsc_minor_slp[mindev]); - return (rc); - } - - rc = nsc_partsize(fd, &size); - partsize.partsize = (uint64_t)size; - - if (resv) - nsc_release(fd); - - mutex_exit(_nsc_minor_slp[mindev]); - - if (ddi_copyout((void *)&partsize, (void *)arg, - sizeof (partsize), mode) < 0) { - return (EFAULT); - } - - return (rc); -} - - -/* ARGSUSED */ - -int -nscioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *crp, int *rvp) -{ - struct nscioc_bsize *bsize = NULL; - char *path = NULL; - int rc = 0; - - *rvp = 0; - - switch (cmd) { - case NSCIOC_OPEN: - rc = _nscopen(dev, arg, mode, rvp); - break; - - case NSCIOC_RESERVE: - rc = _nscreserve(dev, rvp); - break; - - case NSCIOC_RELEASE: - rc = _nscrelease(dev, rvp); - break; - - case NSCIOC_PARTSIZE: - rc = _nscpartsize(dev, arg, mode); - break; - - case NSCIOC_FREEZE: - path = nsc_kmem_alloc(NSC_MAXPATH, KM_SLEEP, _nsc_local_mem); - if (path == NULL) { - rc = ENOMEM; - break; - } - if (ddi_copyin((void *)arg, path, NSC_MAXPATH, mode) < 0) - rc = EFAULT; - else { - path[NSC_MAXPATH-1] = 0; - rc = _nsc_frz_start(path, rvp); - } - break; - - case NSCIOC_UNFREEZE: - path = nsc_kmem_alloc(NSC_MAXPATH, KM_SLEEP, _nsc_local_mem); - if (path == NULL) { - rc = ENOMEM; - break; - } - if (ddi_copyin((void *)arg, path, NSC_MAXPATH, mode) < 0) - rc = EFAULT; - else { - path[NSC_MAXPATH-1] = 0; - rc = _nsc_frz_stop(path, rvp); - } - break; - - case NSCIOC_ISFROZEN: - path = nsc_kmem_alloc(NSC_MAXPATH, KM_SLEEP, _nsc_local_mem); - if (path == NULL) { - rc = ENOMEM; - break; - } - if (ddi_copyin((void *)arg, path, NSC_MAXPATH, mode) < 0) - rc = EFAULT; - else { - path[NSC_MAXPATH-1] = 0; - rc = _nsc_frz_isfrozen(path, rvp); - } - break; - -#ifdef ENABLE_POWER_MSG - case NSCIOC_POWERMSG: - rc = _nsc_power((void *)arg, rvp); - break; -#endif - - case NSCIOC_NSKERND: - rc = nskernd_command(arg, mode, rvp); - break; - - /* return sizes of global memory segments */ - case NSCIOC_GLOBAL_SIZES: - if (!_nsc_init_done) { - rc = EINVAL; - break; - } - - rc = _nsc_get_global_sizes((void *)arg, rvp); - - break; - - /* return contents of global segments */ - case NSCIOC_GLOBAL_DATA: - if (!_nsc_init_done) { - rc = EINVAL; - break; - } - - rc = _nsc_get_global_data((void *)arg, rvp); - break; - - /* - * nvmem systems: - * clear the hdr dirty bit to prevent loading from nvme on reboot - */ - case NSCIOC_NVMEM_CLEANF: - rc = _nsc_clear_dirty(1); /* dont be nice about it */ - break; - case NSCIOC_NVMEM_CLEAN: - rc = _nsc_clear_dirty(0); - break; - - case NSCIOC_BSIZE: - bsize = nsc_kmem_alloc(sizeof (*bsize), KM_SLEEP, - _nsc_local_mem); - if (bsize == NULL) { - rc = ENOMEM; - break; - } - - if (ddi_copyin((void *)arg, bsize, sizeof (*bsize), mode) < 0) { - rc = EFAULT; - break; - } - - rc = nskern_bsize(bsize, rvp); - if (rc == 0) { - if (ddi_copyout(bsize, (void *)arg, - sizeof (*bsize), mode) < 0) { - rc = EFAULT; - break; - } - } - - break; - - default: - return (ENOTTY); - } - - if (bsize != NULL) { - nsc_kmem_free(bsize, sizeof (*bsize)); - bsize = NULL; - } - if (path != NULL) { - nsc_kmem_free(path, NSC_MAXPATH); - path = NULL; - } - return (rc); -} - - -int -nsc_max_devices(void) -{ - return (_nsc_max_devices); -} - - -/* - * Used by _nsc_global_setup() in case nvram is dirty and has saved a different - * value for nsc_max_devices. We need to use the saved value, not the new - * one configured by the user. - */ -void -_nsc_set_max_devices(int maxdev) -{ - _nsc_max_devices = maxdev; - _nsc_maxdev = _nsc_max_devices; -} diff --git a/usr/src/uts/common/avs/ns/nsctl/nsctl.conf b/usr/src/uts/common/avs/ns/nsctl/nsctl.conf deleted file mode 100644 index 1d69f34610..0000000000 --- a/usr/src/uts/common/avs/ns/nsctl/nsctl.conf +++ /dev/null @@ -1,41 +0,0 @@ -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License (the "License"). -# You may not use this file except in compliance with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -# Copyright 2008 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# -# NSCTL Solaris configuration properties -# -# -name="nsctl" parent="pseudo" instance=0; - -# -# Configurable maximum and minimum nodeids that can be used by the -# StorageTek Data Services. -# Usually should not be changed. -# -nsc_min_nodeid=0 nsc_max_nodeid=5; - -# -# Configurable maximum number of devices that can be handled by the -# StorageTek Data Services. A larger value will consume more memory. -# BugId 4729454: increased to 4096 for SVE. -# -nsc_max_devices=4096; diff --git a/usr/src/uts/common/avs/ns/nsctl/nsvers.h b/usr/src/uts/common/avs/ns/nsctl/nsvers.h deleted file mode 100644 index ca0a21555f..0000000000 --- a/usr/src/uts/common/avs/ns/nsctl/nsvers.h +++ /dev/null @@ -1,77 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _NSVERS_H -#define _NSVERS_H - -#ifdef __cplusplus -extern "C" { -#endif - -#ifndef _VERSION_ -#define _VERSION_ "SunOS 5.11" -#endif - -#ifndef ISS_VERSION_STR -#define ISS_VERSION_STR "SunOS 5.11" -#endif - -#ifndef ISS_VERSION_NUM -#define ISS_VERSION_NUM 61 -#endif - -#ifndef ISS_VERSION_MAJ -#define ISS_VERSION_MAJ 11 -#endif - -#ifndef ISS_VERSION_MIN -#define ISS_VERSION_MIN 11 -#endif - -#ifndef ISS_VERSION_MIC -#define ISS_VERSION_MIC 0 -#endif - -#ifndef BUILD_DATE_STR -#define BUILD_DATE_STR "None" -#endif - -#ifndef SCMTEST_MAJOR_VERSION -#define SCMTEST_MAJOR_VERSION "0" -#endif - -#ifndef SCMTEST_MINOR_VERSION -#define SCMTEST_MINOR_VERSION "0" -#endif - -#ifndef SCMTEST_PATCH_VERSION -#define SCMTEST_PATCH_VERSION "0" -#endif - -#ifdef __cplusplus -} -#endif - -#endif /* _NSVERS_H */ diff --git a/usr/src/uts/common/avs/ns/nsctl_inter.h b/usr/src/uts/common/avs/ns/nsctl_inter.h deleted file mode 100644 index e61a1d3536..0000000000 --- a/usr/src/uts/common/avs/ns/nsctl_inter.h +++ /dev/null @@ -1,223 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _SYS_NSCTL_INTER_H -#define _SYS_NSCTL_INTER_H - -#ifdef __cplusplus -extern "C" { -#endif - -#define __NSC_GEN__ -#include <sys/ksynch.h> -#include <sys/nsctl/nsc_dev.h> -#include <sys/nsctl/nsc_gen.h> -#include <sys/nsctl/nsc_mem.h> -#include <sys/nsctl/nsc_rmspin.h> - -#ifdef _KERNEL - -#include <sys/nsc_ddi.h> - -/* prevent errors from typedefs not defined until after this is included */ -typedef int nsc_size_t; -typedef int nsc_off_t; - -int nsc_inval() { } -int nsc_ioerr() { } -int nsc_fatal() { } -int nsc_null() { } -int nsc_true() { } -void nsc_decode_param(void *, void *, void *) { } -int nskernd_isdaemon() { } -uchar_t nsc_ldstub(uchar_t *) { } -void nsc_membar_stld(void) { } - -#ifndef _BLIND_T -typedef void * blind_t; -#endif -typedef void strategy_fn_t; -nsc_io_t *nsc_register_io(char *, int, void *) { } -int nsc_unregister_io(nsc_io_t *, int) { } -nsc_path_t *nsc_register_path(char *, int, nsc_io_t *) { } -int nsc_unregister_path(nsc_path_t *, int) { } -int nsc_cache_sizes(int *, int *) { } -int nsc_register_down(void (*)()) { } -int nsc_node_hints(unsigned int *) { } -int nsc_node_hints_set(unsigned int) { } -blind_t nsc_register_power(char *, void *) { } -int nsc_unregister_power(blind_t) { } -strategy_fn_t nsc_get_strategy(major_t) { } -void *nsc_get_devops(major_t) { } -void nsc_do_sysevent(char *, char *, int, int, char *, dev_info_t *) { } -nsc_fd_t *nsc_open(char *, int, void *, blind_t, int *) { } -int nsc_close(nsc_fd_t *) { } -char *nsc_pathname(nsc_fd_t *) { } -int nsc_shared(nsc_fd_t *) { } -int nsc_setval(nsc_fd_t *, char *, int) { } -int nsc_getval(nsc_fd_t *, char *, int *) { } -int nsc_set_trksize(nsc_fd_t *, int) { } -int nsc_discard_pinned(nsc_fd_t *, int, int) { } -kmutex_t *nsc_lock_addr(nsc_fd_t *) { } -int nsc_attach(nsc_fd_t *, int) { } -int nsc_reserve(nsc_fd_t *, int) { } -void nsc_reserve_lk(nsc_fd_t *) { } -void nsc_release(nsc_fd_t *) { } -int nsc_release_lk(nsc_fd_t *) { } -int nsc_detach(nsc_fd_t *, int) { } -int nsc_avail(nsc_fd_t *) { } -int nsc_held(nsc_fd_t *) { } -int nsc_waiting(nsc_fd_t *) { } -int nsc_partsize(nsc_fd_t *, nsc_size_t *) { } -int nsc_maxfbas(nsc_fd_t *, int, nsc_size_t *) { } -int nsc_control(nsc_fd_t *, int, void *, int) { } -int nsc_get_pinned(nsc_fd_t *) { } -int nsc_max_devices(void) { } - -void nsc_set_owner(nsc_fd_t *, nsc_iodev_t *) { } -void nsc_pinned_data(nsc_iodev_t *, int, int) { } -void nsc_unpinned_data(nsc_iodev_t *, int, int) { } -int nsc_alloc_buf(nsc_fd_t *, nsc_off_t, nsc_size_t, int, void **) { } -int nsc_alloc_abuf(nsc_off_t, nsc_size_t, int, void **) { } -int nsc_read(void *, nsc_off_t, nsc_size_t, int) { } -int nsc_write(void *, nsc_off_t, nsc_size_t, int) { } -int nsc_zero(void *, nsc_off_t, nsc_size_t, int) { } -int nsc_copy(void *, void *, nsc_off_t, nsc_off_t, nsc_size_t) { } -int nsc_copy_direct(void *, void *, nsc_off_t, nsc_off_t, nsc_size_t) { } -int nsc_uncommit(void *, nsc_off_t, nsc_size_t, int) { } -int nsc_free_buf(void *) { } -void *nsc_alloc_handle(nsc_fd_t *, - void (*)(), void (*)(), void (*)()) { } -int nsc_free_handle(void *) { } -int nsc_uread(nsc_fd_t *, void *, void *) { } -int nsc_uwrite(nsc_fd_t *, void *, void *) { } - -nsc_rmlock_t *nsc_rm_lock_alloc(char *, int, void *) { } -void nsc_rm_lock_dealloc(nsc_rmlock_t *) { } -int nsc_rm_lock(nsc_rmlock_t *) { } -void nsc_rm_unlock(nsc_rmlock_t *) { } - -void *nsc_register_mem(char *, int, int) { } -void nsc_unregister_mem(void *) { } -void *nsc_kmem_alloc(size_t, int, void *) { } -void *nsc_kmem_zalloc(size_t, int, void *) { } -void nsc_kmem_free(void *, size_t) { } -void nsc_mem_sizes(void *, size_t *, size_t *, size_t *) { } -size_t nsc_mem_avail(void *) { } - -int nsc_commit_mem(void *, void *, size_t, void) { } - -void nsc_cm_errhdlr(void *, void *, size_t, int) { } - -nsc_svc_t *nsc_register_svc(char *, void (*)(intptr_t)) { } -int nsc_unregister_svc(nsc_svc_t *) { } -int nsc_call_svc(nsc_svc_t *, intptr_t) { } - -char *nsc_strdup(char *) { } -void nsc_strfree(char *) { } -int nsc_strmatch(char *, char *) { } -void nsc_sprintf(char *, char *, ...) { } -int nsc_max_nodeid, nsc_min_nodeid; -int nsc_nodeid_data(void) { } -int nsc_node_id(void) { } -int nsc_node_up(int) { } -char *nsc_node_name(void) { } -time_t nsc_time(void) { } -clock_t nsc_lbolt(void) { } -int nsc_delay_sig(clock_t) { } -clock_t nsc_usec(void) { } -void nsc_yield(void) { } -int nsc_create_process(void (*)(void *), void *, boolean_t) { } -int nsc_power_init(void) { } -void nsc_power_deinit(void) { } -void _nsc_global_nvmemmap_lookup(void *) { } -void _nsc_mark_pages(void addr, void size, int dump) { } -void _nsc_init_raw() { } -void _nsc_deinit_raw() { } -void _nsc_init_start() { } -void _nsc_init_os() { } -void _nsc_raw_flags() { } -int _nsc_raw_def[1]; -void nskernd_command() { } -void nskern_bsize() { } -int nsc_do_lock() { } -void nsc_do_unlock() { } -int HZ; -uint64_t nsc_strhash(char *) { } -int nsc_fdpathcmp(void *, uint64_t, char *) { } -char *nsc_caller() { } -char *nsc_callee() { } -void *nsc_threadp() { } - -/* - * Misc stuff to make our life easier - */ -#ifndef _VERSION_ -#define _VERSION_ "SunOS 5.11" -#endif - -#ifndef ISS_VERSION_STR -#define ISS_VERSION_STR "SunOS 5.11" -#endif - -#ifndef ISS_VERSION_NUM -#define ISS_VERSION_NUM 61 -#endif - -#ifndef ISS_VERSION_MAJ -#define ISS_VERSION_MAJ 11 -#endif - -#ifndef ISS_VERSION_MIN -#define ISS_VERSION_MIN 11 -#endif - -#ifndef ISS_VERSION_MIC -#define ISS_VERSION_MIC 0 -#endif - -#ifndef BUILD_DATE_STR -#define BUILD_DATE_STR "None" -#endif - -#ifndef SCMTEST_MAJOR_VERSION -#define SCMTEST_MAJOR_VERSION "0" -#endif - -#ifndef SCMTEST_MINOR_VERSION -#define SCMTEST_MINOR_VERSION "0" -#endif - -#ifndef SCMTEST_PATCH_VERSION -#define SCMTEST_PATCH_VERSION "0" -#endif - -#endif /* _KERNEL */ - -#ifdef __cplusplus -} -#endif - -#endif /* _SYS_NSCTL_INTER_H */ diff --git a/usr/src/uts/common/avs/ns/rdc/Makefile b/usr/src/uts/common/avs/ns/rdc/Makefile deleted file mode 100644 index 91b1eb3d5b..0000000000 --- a/usr/src/uts/common/avs/ns/rdc/Makefile +++ /dev/null @@ -1,62 +0,0 @@ -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License (the "License"). -# You may not use this file except in compliance with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -# Copyright 2008 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# -# -# include global definitions -include ../../../../../Makefile.master - -HDRS= rdc.h \ - rdc_io.h \ - rdc_ioctl.h \ - rdc_bitmap.h \ - rdc_diskq.h - -DERIVED_HDRS= rdc_prot.h - -ROOTDIR= $(ROOT)/usr/include/sys/nsctl - -ROOTHDRS= $(HDRS:%=$(ROOTDIR)/%) $(DERIVED_HDRS:%=$(ROOTDIR)/%) - -# install rule -$(ROOTDIR)/%: % - $(INS.file) - -DERIVED_FILES= rdc_prot.h - -CHECKHDRS= $(HDRS:%.h=%.check) - -.KEEP_STATE: - -.PARALLEL: $(CHECKHDRS) - -install_h: $(ROOTDIRS) $(ROOTHDRS) - -clobber clean: - $(RM) $(DERIVED_FILES) -rdc_prot.h: rdc_prot.x - $(RPCGEN) -h rdc_prot.x > $@ - -$(ROOTDIR): - $(INS.dir) - -check: $(CHECKHDRS) diff --git a/usr/src/uts/common/avs/ns/rdc/rdc.c b/usr/src/uts/common/avs/ns/rdc/rdc.c deleted file mode 100644 index 28750c6e25..0000000000 --- a/usr/src/uts/common/avs/ns/rdc/rdc.c +++ /dev/null @@ -1,1108 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#define _RDC_ -#include <sys/types.h> -#include <sys/ksynch.h> -#include <sys/kmem.h> -#include <sys/errno.h> -#include <sys/conf.h> -#include <sys/cmn_err.h> -#include <sys/modctl.h> -#include <sys/cred.h> -#include <sys/ddi.h> -#include <sys/sysmacros.h> -#include <sys/unistat/spcs_s.h> -#include <sys/unistat/spcs_s_k.h> -#include <sys/unistat/spcs_errors.h> - -#include <sys/nsc_thread.h> -#ifdef DS_DDICT -#include "../contract.h" -#endif -#include <sys/nsctl/nsctl.h> -#include <sys/nsctl/nsvers.h> - -#include <sys/sdt.h> /* dtrace is S10 or later */ - -#include "rdc.h" -#include "rdc_io.h" -#include "rdc_bitmap.h" -#include "rdc_ioctl.h" -#include "rdcsrv.h" -#include "rdc_diskq.h" - -#define DIDINIT 0x01 -#define DIDNODES 0x02 -#define DIDCONFIG 0x04 - -static int rdcopen(dev_t *devp, int flag, int otyp, cred_t *crp); -static int rdcclose(dev_t dev, int flag, int otyp, cred_t *crp); -static int rdcprint(dev_t dev, char *str); -static int rdcioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *crp, - int *rvp); -static int rdcattach(dev_info_t *dip, ddi_attach_cmd_t cmd); -static int rdcdetach(dev_info_t *dip, ddi_detach_cmd_t cmd); -static int rdcgetinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, - void **result); -#ifdef DEBUG -static int rdc_clrkstat(void *); -#endif - -/* - * kstat interface - */ -static kstat_t *sndr_kstats; - -int sndr_info_stats_update(kstat_t *ksp, int rw); - -static sndr_m_stats_t sndr_info_stats = { - {RDC_MKSTAT_MAXSETS, KSTAT_DATA_ULONG}, - {RDC_MKSTAT_MAXFBAS, KSTAT_DATA_ULONG}, - {RDC_MKSTAT_RPC_TIMEOUT, KSTAT_DATA_ULONG}, - {RDC_MKSTAT_HEALTH_THRES, KSTAT_DATA_ULONG}, - {RDC_MKSTAT_BITMAP_WRITES, KSTAT_DATA_ULONG}, - {RDC_MKSTAT_CLNT_COTS_CALLS, KSTAT_DATA_ULONG}, - {RDC_MKSTAT_CLNT_CLTS_CALLS, KSTAT_DATA_ULONG}, - {RDC_MKSTAT_SVC_COTS_CALLS, KSTAT_DATA_ULONG}, - {RDC_MKSTAT_SVC_CLTS_CALLS, KSTAT_DATA_ULONG}, - {RDC_MKSTAT_BITMAP_REF_DELAY, KSTAT_DATA_ULONG} -}; - -int rdc_info_stats_update(kstat_t *ksp, int rw); - -static rdc_info_stats_t rdc_info_stats = { - {RDC_IKSTAT_FLAGS, KSTAT_DATA_ULONG}, - {RDC_IKSTAT_SYNCFLAGS, KSTAT_DATA_ULONG}, - {RDC_IKSTAT_BMPFLAGS, KSTAT_DATA_ULONG}, - {RDC_IKSTAT_SYNCPOS, KSTAT_DATA_ULONG}, - {RDC_IKSTAT_VOLSIZE, KSTAT_DATA_ULONG}, - {RDC_IKSTAT_BITSSET, KSTAT_DATA_ULONG}, - {RDC_IKSTAT_AUTOSYNC, KSTAT_DATA_ULONG}, - {RDC_IKSTAT_MAXQFBAS, KSTAT_DATA_ULONG}, - {RDC_IKSTAT_MAXQITEMS, KSTAT_DATA_ULONG}, - {RDC_IKSTAT_FILE, KSTAT_DATA_STRING}, - {RDC_IKSTAT_SECFILE, KSTAT_DATA_STRING}, - {RDC_IKSTAT_BITMAP, KSTAT_DATA_STRING}, - {RDC_IKSTAT_PRIMARY_HOST, KSTAT_DATA_STRING}, - {RDC_IKSTAT_SECONDARY_HOST, KSTAT_DATA_STRING}, - {RDC_IKSTAT_TYPE_FLAG, KSTAT_DATA_ULONG}, - {RDC_IKSTAT_BMP_SIZE, KSTAT_DATA_ULONG}, - {RDC_IKSTAT_DISK_STATUS, KSTAT_DATA_ULONG}, - {RDC_IKSTAT_IF_DOWN, KSTAT_DATA_ULONG}, - {RDC_IKSTAT_IF_RPC_VERSION, KSTAT_DATA_ULONG}, - {RDC_IKSTAT_ASYNC_BLOCK_HWM, KSTAT_DATA_ULONG}, - {RDC_IKSTAT_ASYNC_ITEM_HWM, KSTAT_DATA_ULONG}, - {RDC_IKSTAT_ASYNC_THROTTLE_DELAY, KSTAT_DATA_ULONG}, - {RDC_IKSTAT_ASYNC_ITEMS, KSTAT_DATA_ULONG}, - {RDC_IKSTAT_ASYNC_BLOCKS, KSTAT_DATA_ULONG}, - {RDC_IKSTAT_QUEUE_TYPE, KSTAT_DATA_CHAR} -}; - -static struct cb_ops rdc_cb_ops = { - rdcopen, - rdcclose, - nulldev, /* no strategy */ - rdcprint, - nodev, /* no dump */ - nodev, /* no read */ - nodev, /* no write */ - rdcioctl, - nodev, /* no devmap */ - nodev, /* no mmap */ - nodev, /* no segmap */ - nochpoll, - ddi_prop_op, - NULL, /* not STREAMS */ - D_NEW | D_MP | D_64BIT, - CB_REV, - nodev, /* no aread */ - nodev, /* no awrite */ -}; - -static struct dev_ops rdc_ops = { - DEVO_REV, - 0, - rdcgetinfo, - nulldev, /* identify */ - nulldev, /* probe */ - rdcattach, - rdcdetach, - nodev, /* no reset */ - &rdc_cb_ops, - (struct bus_ops *)NULL -}; - -static struct modldrv rdc_ldrv = { - &mod_driverops, - "nws:Remote Mirror:" ISS_VERSION_STR, - &rdc_ops -}; - -static struct modlinkage rdc_modlinkage = { - MODREV_1, - &rdc_ldrv, - NULL -}; - -const int sndr_major_rev = ISS_VERSION_MAJ; -const int sndr_minor_rev = ISS_VERSION_MIN; -const int sndr_micro_rev = ISS_VERSION_MIC; -const int sndr_baseline_rev = ISS_VERSION_NUM; -static char sndr_version[16]; - -static void *rdc_dip; - -extern int _rdc_init_dev(); -extern void _rdc_deinit_dev(); -extern void rdc_link_down_free(); - -int rdc_bitmap_mode; -int rdc_auto_sync; -int rdc_max_sets; -extern int rdc_health_thres; - -kmutex_t rdc_sync_mutex; -rdc_sync_event_t rdc_sync_event; -clock_t rdc_sync_event_timeout; - -static void -rdc_sync_event_init() -{ - mutex_init(&rdc_sync_mutex, NULL, MUTEX_DRIVER, NULL); - mutex_init(&rdc_sync_event.mutex, NULL, MUTEX_DRIVER, NULL); - cv_init(&rdc_sync_event.cv, NULL, CV_DRIVER, NULL); - cv_init(&rdc_sync_event.done_cv, NULL, CV_DRIVER, NULL); - rdc_sync_event.master[0] = 0; - rdc_sync_event.lbolt = (clock_t)0; - rdc_sync_event_timeout = RDC_SYNC_EVENT_TIMEOUT; -} - - -static void -rdc_sync_event_destroy() -{ - mutex_destroy(&rdc_sync_mutex); - mutex_destroy(&rdc_sync_event.mutex); - cv_destroy(&rdc_sync_event.cv); - cv_destroy(&rdc_sync_event.done_cv); -} - - - -int -_init(void) -{ - return (mod_install(&rdc_modlinkage)); -} - -int -_fini(void) -{ - return (mod_remove(&rdc_modlinkage)); -} - -int -_info(struct modinfo *modinfop) -{ - return (mod_info(&rdc_modlinkage, modinfop)); -} - -static int -rdcattach(dev_info_t *dip, ddi_attach_cmd_t cmd) -{ - intptr_t flags; - int instance; - int i; - - /*CONSTCOND*/ - ASSERT(sizeof (u_longlong_t) == 8); - - if (cmd != DDI_ATTACH) - return (DDI_FAILURE); - - (void) strncpy(sndr_version, _VERSION_, sizeof (sndr_version)); - - instance = ddi_get_instance(dip); - rdc_dip = dip; - - flags = 0; - - rdc_sync_event_init(); - - /* - * rdc_max_sets must be set before calling _rdc_load(). - */ - - rdc_max_sets = ddi_prop_get_int(DDI_DEV_T_ANY, dip, - DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, "rdc_max_sets", 64); - - if (_rdc_init_dev()) { - cmn_err(CE_WARN, "!rdc: _rdc_init_dev failed"); - goto out; - } - flags |= DIDINIT; - - if (_rdc_load() != 0) { - cmn_err(CE_WARN, "!rdc: _rdc_load failed"); - goto out; - } - - if (_rdc_configure()) { - cmn_err(CE_WARN, "!rdc: _rdc_configure failed"); - goto out; - } - flags |= DIDCONFIG; - - if (ddi_create_minor_node(dip, "rdc", S_IFCHR, instance, DDI_PSEUDO, 0) - != DDI_SUCCESS) { - cmn_err(CE_WARN, "!rdc: could not create node."); - goto out; - } - flags |= DIDNODES; - - rdc_bitmap_mode = ddi_prop_get_int(DDI_DEV_T_ANY, dip, - DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, - "rdc_bitmap_mode", 0); - - switch (rdc_bitmap_mode) { - case RDC_BMP_AUTO: /* 0 */ - break; - case RDC_BMP_ALWAYS: /* 1 */ - break; - case RDC_BMP_NEVER: /* 2 */ - cmn_err(CE_NOTE, "!SNDR bitmap mode override"); - cmn_err(CE_CONT, - "!SNDR: bitmaps will only be written on shutdown\n"); - break; - default: /* unknown */ - cmn_err(CE_NOTE, - "!SNDR: unknown bitmap mode %d - autodetecting mode", - rdc_bitmap_mode); - rdc_bitmap_mode = RDC_BMP_AUTO; - break; - } - - rdc_bitmap_init(); - - rdc_auto_sync = ddi_prop_get_int(DDI_DEV_T_ANY, dip, - DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, - "rdc_auto_sync", 0); - - i = ddi_prop_get_int(DDI_DEV_T_ANY, dip, - DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, - "rdc_health_thres", RDC_HEALTH_THRESHOLD); - if (i >= RDC_MIN_HEALTH_THRES) - rdc_health_thres = i; - else - cmn_err(CE_WARN, "!value rdc_heath_thres from rdc.conf ignored " - "as it is smaller than the min value of %d", - RDC_MIN_HEALTH_THRES); - - ddi_set_driver_private(dip, (caddr_t)flags); - ddi_report_dev(dip); - - sndr_kstats = kstat_create(RDC_KSTAT_MODULE, 0, - RDC_KSTAT_MINFO, RDC_KSTAT_CLASS, KSTAT_TYPE_NAMED, - sizeof (sndr_m_stats_t) / sizeof (kstat_named_t), - KSTAT_FLAG_VIRTUAL); - - if (sndr_kstats) { - sndr_kstats->ks_data = &sndr_info_stats; - sndr_kstats->ks_update = sndr_info_stats_update; - sndr_kstats->ks_private = &rdc_k_info[0]; - kstat_install(sndr_kstats); - } else - cmn_err(CE_WARN, "!SNDR: module kstats failed"); - - return (DDI_SUCCESS); - -out: - DTRACE_PROBE(rdc_attach_failed); - ddi_set_driver_private(dip, (caddr_t)flags); - (void) rdcdetach(dip, DDI_DETACH); - return (DDI_FAILURE); -} - -static int -rdcdetach(dev_info_t *dip, ddi_detach_cmd_t cmd) -{ - rdc_k_info_t *krdc; - rdc_u_info_t *urdc; - int rdcd; - intptr_t flags; - - - if (cmd != DDI_DETACH) { - DTRACE_PROBE(rdc_detach_unknown_cmd); - return (DDI_FAILURE); - } - - if (rdc_k_info == NULL || rdc_u_info == NULL) - goto cleanup; - - mutex_enter(&rdc_conf_lock); - - for (rdcd = 0; rdcd < rdc_max_sets; rdcd++) { - krdc = &rdc_k_info[rdcd]; - urdc = &rdc_u_info[rdcd]; - - if (IS_ENABLED(urdc) || krdc->devices) { -#ifdef DEBUG - cmn_err(CE_WARN, - "!rdc: cannot detach, rdcd %d still in use", rdcd); -#endif - mutex_exit(&rdc_conf_lock); - DTRACE_PROBE(rdc_detach_err_busy); - return (DDI_FAILURE); - } - } - - mutex_exit(&rdc_conf_lock); - -cleanup: - flags = (intptr_t)ddi_get_driver_private(dip); - - if (flags & DIDNODES) - ddi_remove_minor_node(dip, NULL); - - if (sndr_kstats) { - kstat_delete(sndr_kstats); - } - if (flags & DIDINIT) - _rdc_deinit_dev(); - - if (flags & DIDCONFIG) { - (void) _rdc_deconfigure(); - (void) _rdc_unload(); - rdcsrv_unload(); - } - - rdc_sync_event_destroy(); - rdc_link_down_free(); - - rdc_dip = NULL; - return (DDI_SUCCESS); -} - -/* ARGSUSED */ -static int -rdcgetinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) -{ - int rc = DDI_FAILURE; - - switch (infocmd) { - - case DDI_INFO_DEVT2DEVINFO: - *result = rdc_dip; - rc = DDI_SUCCESS; - break; - - case DDI_INFO_DEVT2INSTANCE: - /* We only have a single instance */ - *result = 0; - rc = DDI_SUCCESS; - break; - - default: - break; - } - - return (rc); -} - - -/* ARGSUSED */ - -static int -rdcopen(dev_t *devp, int flag, int otyp, cred_t *crp) -{ - return (0); -} - - -/* ARGSUSED */ - -static int -rdcclose(dev_t dev, int flag, int otyp, cred_t *crp) -{ - return (0); -} - -/* ARGSUSED */ - -static int -rdcprint(dev_t dev, char *str) -{ - int instance = 0; - - cmn_err(CE_WARN, "!rdc%d: %s", instance, str); - return (0); -} - - -static int -convert_ioctl_args(int cmd, intptr_t arg, int mode, _rdc_ioctl_t *args) -{ - _rdc_ioctl32_t args32; - - if (ddi_copyin((void *)arg, &args32, sizeof (_rdc_ioctl32_t), mode)) - return (EFAULT); - - bzero((void *)args, sizeof (_rdc_ioctl_t)); - - switch (cmd) { - case RDC_CONFIG: - args->arg0 = (uint32_t)args32.arg0; /* _rdc_config_t * */ - args->arg1 = (uint32_t)args32.arg1; /* pointer */ - args->arg2 = (uint32_t)args32.arg2; /* size */ - args->ustatus = (spcs_s_info_t)args32.ustatus; - break; - - case RDC_STATUS: - args->arg0 = (uint32_t)args32.arg0; /* pointer */ - args->ustatus = (spcs_s_info_t)args32.ustatus; - break; - - case RDC_ENABLE_SVR: - args->arg0 = (uint32_t)args32.arg0; /* _rdc_svc_args * */ - break; - - case RDC_VERSION: - args->arg0 = (uint32_t)args32.arg0; /* _rdc_version_t * */ - args->ustatus = (spcs_s_info_t)args32.ustatus; - break; - - case RDC_SYNC_EVENT: - args->arg0 = (uint32_t)args32.arg0; /* char * */ - args->arg1 = (uint32_t)args32.arg1; /* char * */ - args->ustatus = (spcs_s_info_t)args32.ustatus; - break; - - case RDC_LINK_DOWN: - args->arg0 = (uint32_t)args32.arg0; /* char * */ - args->ustatus = (spcs_s_info_t)args32.ustatus; - break; - case RDC_POOL_CREATE: - args->arg0 = (uint32_t)args32.arg0; /* svcpool_args * */ - break; - case RDC_POOL_WAIT: - args->arg0 = (uint32_t)args32.arg0; /* int */ - break; - case RDC_POOL_RUN: - args->arg0 = (uint32_t)args32.arg0; /* int */ - break; - - default: - return (EINVAL); - } - - return (0); -} - -/* - * Build a 32bit rdc_set structure and copyout to the user level. - */ -int -rdc_status_copy32(const void *arg, void *usetp, size_t size, int mode) -{ - rdc_u_info_t *urdc = (rdc_u_info_t *)arg; - struct rdc_set32 set32; - size_t tailsize; -#ifdef DEBUG - size_t tailsize32; -#endif - - bzero(&set32, sizeof (set32)); - - tailsize = sizeof (struct rdc_addr32) - - offsetof(struct rdc_addr32, intf); - - /* primary address structure, avoiding netbuf */ - bcopy(&urdc->primary.intf[0], &set32.primary.intf[0], tailsize); - - /* secondary address structure, avoiding netbuf */ - bcopy(&urdc->secondary.intf[0], &set32.secondary.intf[0], tailsize); - - /* - * the rest, avoiding netconfig - * note: the tail must be the same size in both structures - */ - tailsize = sizeof (struct rdc_set) - offsetof(struct rdc_set, flags); -#ifdef DEBUG - /* - * ASSERT is calling for debug reason, and tailsize32 is only declared - * for ASSERT, put them under debug to avoid lint warning. - */ - tailsize32 = sizeof (struct rdc_set32) - - offsetof(struct rdc_set32, flags); - ASSERT(tailsize == tailsize32); -#endif - - bcopy(&urdc->flags, &set32.flags, tailsize); - - /* copyout to user level */ - return (ddi_copyout(&set32, usetp, size, mode)); -} - - -/* - * Status ioctl. - */ -static int -rdcstatus(_rdc_ioctl_t *args, int mode) -{ - int (*copyout)(const void *, void *, size_t, int); - rdc_u_info_t *urdc; - rdc_k_info_t *krdc; - disk_queue *dqp; - char *usetp; /* pointer to user rdc_set structure */ - size_t size; /* sizeof user rdc_set structure */ - int32_t *maxsetsp; /* address of status->maxsets; */ - int nset, max, i, j; - - if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) { - struct rdc_status32 status32; - - if (ddi_copyin((void *)args->arg0, &status32, - sizeof (status32), mode)) { - return (EFAULT); - } - - usetp = ((char *)args->arg0) + - offsetof(struct rdc_status32, rdc_set); - maxsetsp = (int32_t *)((char *)args->arg0 + - offsetof(struct rdc_status32, maxsets)); - nset = status32.nset; - - size = sizeof (struct rdc_set32); - copyout = rdc_status_copy32; - } else { - struct rdc_status status; - - if (ddi_copyin((void *)args->arg0, &status, - sizeof (status), mode)) { - return (EFAULT); - } - - usetp = ((char *)args->arg0) + - offsetof(struct rdc_status, rdc_set); - maxsetsp = (int32_t *)((char *)args->arg0 + - offsetof(struct rdc_status, maxsets)); - nset = status.nset; - - size = sizeof (struct rdc_set); - copyout = ddi_copyout; - } - - max = min(nset, rdc_max_sets); - - for (i = 0, j = 0; i < max; i++) { - urdc = &rdc_u_info[i]; - krdc = &rdc_k_info[i]; - - if (!IS_ENABLED(urdc)) - continue; - - /* - * sneak out qstate in urdc->flags - * this is harmless because it's value is not used - * in urdc->flags. the real qstate is kept in - * group->diskq->disk_hdr.h.state - */ - if (RDC_IS_DISKQ(krdc->group)) { - dqp = &krdc->group->diskq; - if (IS_QSTATE(dqp, RDC_QNOBLOCK)) - urdc->flags |= RDC_QNOBLOCK; - } - - j++; - if ((*copyout)(urdc, usetp, size, mode) != 0) - return (EFAULT); - - urdc->flags &= ~RDC_QNOBLOCK; /* clear qstate */ - usetp += size; - } - - /* copyout rdc_max_sets value */ - - if (ddi_copyout(&rdc_max_sets, maxsetsp, sizeof (*maxsetsp), mode) != 0) - return (EFAULT); - - /* copyout number of sets manipulated */ - - /*CONSTCOND*/ - ASSERT(offsetof(struct rdc_status32, nset) == 0); - /*CONSTCOND*/ - ASSERT(offsetof(struct rdc_status, nset) == 0); - - return (ddi_copyout(&j, (void *)args->arg0, sizeof (int), mode)); -} - - -/* ARGSUSED */ - -static int -rdcioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *crp, int *rvp) -{ - spcs_s_info_t kstatus = NULL; - _rdc_ioctl_t args; - int error; - int rc = 0; - - if (cmd != RDC_STATUS) { - if ((error = drv_priv(crp)) != 0) - return (error); - } -#ifdef DEBUG - if (cmd == RDC_ASYNC6) { - rc = rdc_async6((void *)arg, mode, rvp); - return (rc); - } - - if (cmd == RDC_CLRKSTAT) { - rc = rdc_clrkstat((void *)arg); - return (rc); - } - - if (cmd == RDC_STALL0) { - if (((int)arg > 1) || ((int)arg < 0)) - return (EINVAL); - rdc_stallzero((int)arg); - return (0); - } - if (cmd == RDC_READGEN) { - rc = rdc_readgen((void *)arg, mode, rvp); - return (rc); - } -#endif - if (cmd == RDC_BITMAPOP) { - rdc_bitmap_op_t bmop; - rdc_bitmap_op32_t bmop32; - - if (ddi_model_convert_from(mode & FMODELS) - == DDI_MODEL_ILP32) { - if (ddi_copyin((void *)arg, &bmop32, sizeof (bmop32), - mode)) - return (EFAULT); - bmop.offset = bmop32.offset; - bmop.op = bmop32.op; - (void) strncpy(bmop.sechost, bmop32.sechost, - MAX_RDC_HOST_SIZE); - (void) strncpy(bmop.secfile, bmop32.secfile, - NSC_MAXPATH); - bmop.len = bmop32.len; - bmop.addr = (unsigned long)bmop32.addr; - } else { - if (ddi_copyin((void *)arg, &bmop, sizeof (bmop), - mode)) - return (EFAULT); - } - rc = rdc_bitmapset(bmop.op, bmop.sechost, bmop.secfile, - (void *)bmop.addr, bmop.len, bmop.offset, mode); - return (rc); - } - - if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) { - if ((rc = convert_ioctl_args(cmd, arg, mode, &args)) != 0) - return (rc); - } else { - if (ddi_copyin((void *)arg, &args, - sizeof (_rdc_ioctl_t), mode)) { - return (EFAULT); - } - } - - kstatus = spcs_s_kcreate(); - if (!kstatus) { - return (ENOMEM); - } - - - switch (cmd) { - - case RDC_POOL_CREATE: { - struct svcpool_args p; - - if (ddi_copyin((void *)arg, &p, sizeof (p), mode)) { - spcs_s_kfree(kstatus); - return (EFAULT); - } - error = svc_pool_create(&p); - - break; - } - case RDC_POOL_WAIT: { - int id; - - if (ddi_copyin((void *)arg, &id, sizeof (id), mode)) { - spcs_s_kfree(kstatus); - return (EFAULT); - } - - error = svc_wait(id); - break; - } - case RDC_POOL_RUN: { - int id; - - if (ddi_copyin((void *)arg, &id, sizeof (id), mode)) { - spcs_s_kfree(kstatus); - return (EFAULT); - } - error = svc_do_run(id); - break; - } - case RDC_ENABLE_SVR: - { - STRUCT_DECL(rdc_svc_args, parms); - - STRUCT_INIT(parms, mode); - /* Only used by sndrd which does not use unistat */ - - if (ddi_copyin((void *)args.arg0, STRUCT_BUF(parms), - STRUCT_SIZE(parms), mode)) { - spcs_s_kfree(kstatus); - return (EFAULT); - } - rc = rdc_start_server(STRUCT_BUF(parms), mode); - } - break; - - case RDC_STATUS: - rc = rdcstatus(&args, mode); - break; - - case RDC_CONFIG: - rc = _rdc_config((void *)args.arg0, mode, kstatus, rvp); - spcs_s_copyoutf(&kstatus, args.ustatus); - return (rc); - - case RDC_VERSION: - { - STRUCT_DECL(rdc_version, parms); - - STRUCT_INIT(parms, mode); - - STRUCT_FSET(parms, major, sndr_major_rev); - STRUCT_FSET(parms, minor, sndr_minor_rev); - STRUCT_FSET(parms, micro, sndr_micro_rev); - STRUCT_FSET(parms, baseline, sndr_baseline_rev); - - if (ddi_copyout(STRUCT_BUF(parms), (void *)args.arg0, - STRUCT_SIZE(parms), mode)) { - spcs_s_kfree(kstatus); - return (EFAULT); - } - break; - } - - case RDC_LINK_DOWN: - /* char *host from user */ - rc = _rdc_link_down((void *)args.arg0, mode, kstatus, rvp); - spcs_s_copyoutf(&kstatus, args.ustatus); - - return (rc); - - case RDC_SYNC_EVENT: - rc = _rdc_sync_event_wait((void *)args.arg0, (void *)args.arg1, - mode, kstatus, rvp); - spcs_s_copyoutf(&kstatus, args.ustatus); - - return (rc); - - - default: - rc = EINVAL; - break; - } - - spcs_s_kfree(kstatus); - return (rc); -} - -int -sndr_info_stats_update(kstat_t *ksp, int rw) -{ - extern int rdc_rpc_tmout; - extern int rdc_health_thres; - extern int rdc_bitmap_delay; - extern long rdc_clnt_count; - extern long rdc_svc_count; - sndr_m_stats_t *info_stats; - rdc_k_info_t *krdc; - - info_stats = (sndr_m_stats_t *)(ksp->ks_data); - krdc = (rdc_k_info_t *)(ksp->ks_private); - - /* no writes currently allowed */ - - if (rw == KSTAT_WRITE) { - return (EACCES); - } - - /* default to READ */ - info_stats->m_maxsets.value.ul = rdc_max_sets; - info_stats->m_maxfbas.value.ul = krdc->maxfbas; - info_stats->m_rpc_timeout.value.ul = rdc_rpc_tmout; - info_stats->m_health_thres.value.ul = rdc_health_thres; - info_stats->m_bitmap_writes.value.ul = krdc->bitmap_write; - info_stats->m_bitmap_ref_delay.value.ul = rdc_bitmap_delay; - - /* clts counters not implemented yet */ - info_stats->m_clnt_cots_calls.value.ul = rdc_clnt_count; - info_stats->m_clnt_clts_calls.value.ul = 0; - info_stats->m_svc_cots_calls.value.ul = rdc_svc_count; - info_stats->m_svc_clts_calls.value.ul = 0; - - return (0); -} - -/* - * copy tailsize-1 bytes of tail of s to s1. - */ -void -rdc_str_tail_cpy(char *s1, char *s, size_t tailsize) -{ - /* To avoid un-terminated string, max size is 16 - 1 */ - ssize_t offset = strlen(s) - (tailsize - 1); - - offset = (offset > 0) ? offset : 0; - - /* ensure it's null terminated */ - (void) strlcpy(s1, (const char *)(s + offset), tailsize); -} - -int -rdc_info_stats_update(kstat_t *ksp, int rw) -{ - rdc_info_stats_t *rdc_info_stats; - rdc_k_info_t *krdc; - rdc_u_info_t *urdc; - - rdc_info_stats = (rdc_info_stats_t *)(ksp->ks_data); - krdc = (rdc_k_info_t *)(ksp->ks_private); - urdc = &rdc_u_info[krdc->index]; - - /* no writes currently allowed */ - - if (rw == KSTAT_WRITE) { - return (EACCES); - } - - /* default to READ */ - rdc_info_stats->s_flags.value.ul = urdc->flags; - rdc_info_stats->s_syncflags.value.ul = - urdc->sync_flags; - rdc_info_stats->s_bmpflags.value.ul = - urdc->bmap_flags; - rdc_info_stats->s_syncpos.value.ul = - urdc->sync_pos; - rdc_info_stats->s_volsize.value.ul = - urdc->volume_size; - rdc_info_stats->s_bits_set.value.ul = - urdc->bits_set; - rdc_info_stats->s_autosync.value.ul = - urdc->autosync; - rdc_info_stats->s_maxqfbas.value.ul = - urdc->maxqfbas; - rdc_info_stats->s_maxqitems.value.ul = - urdc->maxqitems; - - kstat_named_setstr(&rdc_info_stats->s_primary_vol, - urdc->primary.file); - - kstat_named_setstr(&rdc_info_stats->s_secondary_vol, - urdc->secondary.file); - - if (rdc_get_vflags(urdc) & RDC_PRIMARY) { - kstat_named_setstr(&rdc_info_stats->s_bitmap, - urdc->primary.bitmap); - } else { - kstat_named_setstr(&rdc_info_stats->s_bitmap, - urdc->secondary.bitmap); - } - - kstat_named_setstr(&rdc_info_stats->s_primary_intf, - urdc->primary.intf); - - kstat_named_setstr(&rdc_info_stats->s_secondary_intf, - urdc->secondary.intf); - - rdc_info_stats->s_type_flag.value.ul = krdc->type_flag; - rdc_info_stats->s_bitmap_size.value.ul = krdc->bitmap_size; - rdc_info_stats->s_disk_status.value.ul = krdc->disk_status; - - if (krdc->intf) { - rdc_info_stats->s_if_if_down.value.ul = krdc->intf->if_down; - rdc_info_stats->s_if_rpc_version.value.ul = - krdc->intf->rpc_version; - } - - /* the type can change without disable/re-enable so... */ - bzero(rdc_info_stats->s_aqueue_type.value.c, KSTAT_DATA_CHAR_LEN); - if (RDC_IS_MEMQ(krdc->group)) { - (void) strcpy(rdc_info_stats->s_aqueue_type.value.c, "memory"); - rdc_info_stats->s_aqueue_blk_hwm.value.ul = - krdc->group->ra_queue.blocks_hwm; - rdc_info_stats->s_aqueue_itm_hwm.value.ul = - krdc->group->ra_queue.nitems_hwm; - rdc_info_stats->s_aqueue_throttle.value.ul = - krdc->group->ra_queue.throttle_delay; - rdc_info_stats->s_aqueue_items.value.ul = - krdc->group->ra_queue.nitems; - rdc_info_stats->s_aqueue_blocks.value.ul = - krdc->group->ra_queue.blocks; - - } else if (RDC_IS_DISKQ(krdc->group)) { - disk_queue *q = &krdc->group->diskq; - rdc_info_stats->s_aqueue_blk_hwm.value.ul = - krdc->group->diskq.blocks_hwm; - rdc_info_stats->s_aqueue_itm_hwm.value.ul = - krdc->group->diskq.nitems_hwm; - rdc_info_stats->s_aqueue_throttle.value.ul = - krdc->group->diskq.throttle_delay; - rdc_info_stats->s_aqueue_items.value.ul = QNITEMS(q); - rdc_info_stats->s_aqueue_blocks.value.ul = QBLOCKS(q); - (void) strcpy(rdc_info_stats->s_aqueue_type.value.c, "disk"); - } - - return (0); -} - -void -rdc_kstat_create(int index) -{ - int j = index; - rdc_k_info_t *krdc = &rdc_k_info[index]; - rdc_u_info_t *urdc = &rdc_u_info[krdc->index]; - size_t varsize; - - if (!krdc->set_kstats) { - krdc->set_kstats = kstat_create(RDC_KSTAT_MODULE, j, - RDC_KSTAT_INFO, RDC_KSTAT_CLASS, KSTAT_TYPE_NAMED, - sizeof (rdc_info_stats_t) / sizeof (kstat_named_t), - KSTAT_FLAG_VIRTUAL); -#ifdef DEBUG - if (!krdc->set_kstats) - cmn_err(CE_NOTE, "!krdc:u_kstat null"); -#endif - - if (krdc->set_kstats) { - /* calculate exact size of KSTAT_DATA_STRINGs */ - varsize = strlen(urdc->primary.file) + 1 - + strlen(urdc->secondary.file) + 1 - + strlen(urdc->primary.intf) + 1 - + strlen(urdc->secondary.intf) + 1; - if (rdc_get_vflags(urdc) & RDC_PRIMARY) { - varsize += strlen(urdc->primary.bitmap) + 1; - } else { - varsize += strlen(urdc->secondary.bitmap) + 1; - } - - krdc->set_kstats->ks_data_size += varsize; - krdc->set_kstats->ks_data = &rdc_info_stats; - krdc->set_kstats->ks_update = rdc_info_stats_update; - krdc->set_kstats->ks_private = &rdc_k_info[j]; - kstat_install(krdc->set_kstats); - } else - cmn_err(CE_WARN, "!SNDR: k-kstats failed"); - } - - krdc->io_kstats = kstat_create(RDC_KSTAT_MODULE, j, NULL, - "disk", KSTAT_TYPE_IO, 1, 0); - if (krdc->io_kstats) { - krdc->io_kstats->ks_lock = &krdc->kstat_mutex; - kstat_install(krdc->io_kstats); - } - krdc->bmp_kstats = kstat_create("sndrbmp", j, NULL, - "disk", KSTAT_TYPE_IO, 1, 0); - if (krdc->bmp_kstats) { - krdc->bmp_kstats->ks_lock = &krdc->bmp_kstat_mutex; - kstat_install(krdc->bmp_kstats); - } -} - -void -rdc_kstat_delete(int index) -{ - rdc_k_info_t *krdc = &rdc_k_info[index]; - - if (krdc->set_kstats) { - kstat_delete(krdc->set_kstats); - krdc->set_kstats = NULL; - } - - if (krdc->io_kstats) { - kstat_delete(krdc->io_kstats); - krdc->io_kstats = NULL; - } - if (krdc->bmp_kstats) { - kstat_delete(krdc->bmp_kstats); - krdc->bmp_kstats = NULL; - } -} - -#ifdef DEBUG -/* - * Reset the io_kstat structure of the krdc specified - * by the arg index. - */ -static int -rdc_clrkstat(void *arg) -{ - int index; - rdc_k_info_t *krdc; - - index = (int)(unsigned long)arg; - if ((index < 0) || (index >= rdc_max_sets)) { - return (EINVAL); - } - krdc = &rdc_k_info[index]; - if (krdc->io_kstats) { - kstat_delete(krdc->io_kstats); - krdc->io_kstats = NULL; - } else { - return (EINVAL); - } - krdc->io_kstats = kstat_create(RDC_KSTAT_MODULE, index, NULL, - "disk", KSTAT_TYPE_IO, 1, 0); - if (krdc->io_kstats) { - krdc->io_kstats->ks_lock = &krdc->kstat_mutex; - kstat_install(krdc->io_kstats); - } else { - return (EINVAL); - } - /* - * clear the high water marks and throttle. - */ - if (krdc->group) { - krdc->group->ra_queue.nitems_hwm = 0; - krdc->group->ra_queue.blocks_hwm = 0; - krdc->group->ra_queue.throttle_delay = 0; - } - return (0); -} -#endif diff --git a/usr/src/uts/common/avs/ns/rdc/rdc.conf b/usr/src/uts/common/avs/ns/rdc/rdc.conf deleted file mode 100644 index 1ef5e0e420..0000000000 --- a/usr/src/uts/common/avs/ns/rdc/rdc.conf +++ /dev/null @@ -1,55 +0,0 @@ -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License (the "License"). -# You may not use this file except in compliance with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -# Copyright 2008 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# -# -# -name="rdc" parent="pseudo"; - -# -# rdc_bitmap_mode -# - Sets the mode of the RDC bitmap operation, acceptable values are: -# 0 - autodetect bitmap mode depending on the state of SDBC (default). -# 1 - force bitmap writes for every write operation, so an update resync -# can be performed after a crash or reboot. -# 2 - only write the bitmap on shutdown, so a full resync is -# required after a crash, but an update resync is required after -# a reboot. -# -rdc_bitmap_mode=1; - -# -# rdc_max_sets -# - Configure the maximum number of RDC sets that can be enabled on -# this host. The actual maximum number of sets that can be enabled -# will be the minimum of this value and nsc_max_devices (see -# nsctl.conf) at the time the rdc kernel module is loaded. -# -rdc_max_sets=64; - -# -# rdc_health_thres -# - Set the timeout (in seconds) for RDC health monitoring. If IPMP is in -# use over the RDC link this value should be set higher than in.mpathd's -# timeout. -# -#rdc_health_thres=20; diff --git a/usr/src/uts/common/avs/ns/rdc/rdc.h b/usr/src/uts/common/avs/ns/rdc/rdc.h deleted file mode 100644 index 8ebb22ad17..0000000000 --- a/usr/src/uts/common/avs/ns/rdc/rdc.h +++ /dev/null @@ -1,44 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _RDC_H -#define _RDC_H - -#ifdef __cplusplus -extern "C" { -#endif - -#define RDCDEV "/dev/rdc" -#define RDC_KSTAT_CLASS "storedge" -#define RDC_KSTAT_MINFO "modinfo" -#define RDC_KSTAT_INFO "setinfo" -#define RDC_KSTAT_MODULE "sndr" - - -#ifdef __cplusplus -} -#endif - -#endif /* _RDC_H */ diff --git a/usr/src/uts/common/avs/ns/rdc/rdc_bitmap.c b/usr/src/uts/common/avs/ns/rdc/rdc_bitmap.c deleted file mode 100644 index bbea681e09..0000000000 --- a/usr/src/uts/common/avs/ns/rdc/rdc_bitmap.c +++ /dev/null @@ -1,2659 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#include <sys/types.h> -#include <sys/ksynch.h> -#include <sys/cmn_err.h> -#include <sys/kmem.h> -#include <sys/stat.h> -#include <sys/errno.h> - -#include "../solaris/nsc_thread.h" -#ifdef DS_DDICT -#include "../contract.h" -#endif -#include <sys/nsctl/nsctl.h> - -#include <sys/kmem.h> -#include <sys/cmn_err.h> -#include <sys/ddi.h> - -#include "rdc_io.h" -#include "rdc_bitmap.h" -#include "rdc_clnt.h" -#include "rdc_diskq.h" - -#include <sys/unistat/spcs_s.h> -#include <sys/unistat/spcs_s_k.h> -#include <sys/unistat/spcs_errors.h> - -#ifndef UINT8_MAX -#define UINT8_MAX 255 -#endif - -#ifndef UINT_MAX -#define UINT_MAX 0xffffffff -#endif - -/* - * RDC bitmap functions. - */ - -/* - * RDC cluster integration notes. - * - * 1. Configuration - * - * 1.1. Change 'rdc_bitmap_mode' in /usr/kernel/drv/rdc.conf to '1'. - * - * 2. Operation - * - * 2.1. SunCluster ensures that only one physical host has any rdc - * controlled device imported at any one time. Hence rdc will - * only be active on a single node for any set at a time. - * - * 2.2. So operation from the kernel perspective looks just like - * operation on a single, standalone, node. - * - */ - -struct rdc_bitmap_ops *rdc_bitmap_ops; /* the bitmap ops switch */ -static int rdc_wrflag; /* write flag for io */ -int rdc_bitmap_delay = 0; -extern nsc_io_t *_rdc_io_hc; - -int rdc_suspend_diskq(rdc_k_info_t *krdc); - -/* - * rdc_ns_io - * Perform read or write on an underlying ns device - * - * fd - nsc file descriptor - * flag - nsc io direction and characteristics flag - * fba_pos - offset from beginning of device in FBAs - * io_addr - pointer to data buffer - * io_len - length of io in bytes - */ - -int -rdc_ns_io(nsc_fd_t *fd, int flag, nsc_off_t fba_pos, uchar_t *io_addr, - nsc_size_t io_len) -{ - nsc_buf_t *tmp; - nsc_vec_t *vecp; - uchar_t *vaddr; - size_t copy_len; - int vlen; - int rc; - nsc_size_t fba_req, fba_len; - nsc_size_t maxfbas = 0; - nsc_size_t tocopy; - unsigned char *toaddr; - - rc = nsc_maxfbas(fd, 0, &maxfbas); - if (!RDC_SUCCESS(rc)) { -#ifdef DEBUG - cmn_err(CE_WARN, "!rdc_ns_io: maxfbas failed (%d)", rc); -#endif - maxfbas = 256; - } - toaddr = io_addr; - fba_req = FBA_LEN(io_len); -loop: - tmp = NULL; - fba_len = min(fba_req, maxfbas); - tocopy = min(io_len, FBA_SIZE(fba_len)); - ASSERT(tocopy < INT32_MAX); - - rc = nsc_alloc_buf(fd, fba_pos, fba_len, flag, &tmp); - if (!RDC_SUCCESS(rc)) { - if (tmp) { - (void) nsc_free_buf(tmp); - } - return (EIO); - } - - if ((flag & NSC_WRITE) != 0 && (flag & NSC_READ) == 0 && - FBA_OFF(io_len) != 0) { - /* - * Not overwriting all of the last FBA, so read in the - * old contents now before we overwrite it with the new - * data. - */ - rc = nsc_read(tmp, fba_pos+FBA_NUM(io_len), 1, 0); - if (!RDC_SUCCESS(rc)) { - (void) nsc_free_buf(tmp); - return (EIO); - } - } - - vecp = tmp->sb_vec; - vlen = vecp->sv_len; - vaddr = vecp->sv_addr; - - while (tocopy > 0) { - if (vecp->sv_addr == 0 || vecp->sv_len == 0) { -#ifdef DEBUG - cmn_err(CE_WARN, "!rdc_ns_io: ran off end of handle"); -#endif - break; - } - - copy_len = (size_t)min(vlen, (int)tocopy); - - if (flag & NSC_WRITE) - bcopy(toaddr, vaddr, copy_len); - else - bcopy(vaddr, toaddr, copy_len); - - toaddr += copy_len; - io_addr += copy_len; /* adjust position in callers buffer */ - io_len -= copy_len; /* adjust total byte length remaining */ - tocopy -= copy_len; /* adjust chunk byte length remaining */ - vaddr += copy_len; /* adjust location in sv_vec_t */ - vlen -= copy_len; /* adjust length left in sv_vec_t */ - - if (vlen <= 0) { - vecp++; - vaddr = vecp->sv_addr; - vlen = vecp->sv_len; - } - } - - if (flag & NSC_WRITE) { - rc = nsc_write(tmp, tmp->sb_pos, tmp->sb_len, 0); - if (!RDC_SUCCESS(rc)) { - (void) nsc_free_buf(tmp); - return (rc); - } - } - - (void) nsc_free_buf(tmp); - - fba_pos += fba_len; - fba_req -= fba_len; - if (fba_req > 0) - goto loop; - - return (0); -} - -/* - * Must be called with krdc->bmapmutex held. - */ -static void -rdc_fill_header(rdc_u_info_t *urdc, rdc_header_t *header) -{ - rdc_k_info_t *krdc = &rdc_k_info[urdc->index]; -#ifdef DEBUG - ASSERT(MUTEX_HELD(&krdc->bmapmutex)); -#endif - - header->magic = RDC_HDR_MAGIC; - (void) strncpy(header->primary.file, urdc->primary.file, NSC_MAXPATH); - (void) strncpy(header->primary.bitmap, urdc->primary.bitmap, - NSC_MAXPATH); - (void) strncpy(header->secondary.file, urdc->secondary.file, - NSC_MAXPATH); - (void) strncpy(header->secondary.bitmap, urdc->secondary.bitmap, - NSC_MAXPATH); - header->flags = urdc->flags | urdc->sync_flags | urdc->bmap_flags; - header->autosync = urdc->autosync; - header->maxqfbas = urdc->maxqfbas; - header->maxqitems = urdc->maxqitems; - header->asyncthr = urdc->asyncthr; - header->syshostid = urdc->syshostid; - header->refcntsize = rdc_refcntsize(krdc); -#ifdef DEBUG_REFCNT - cmn_err(CE_NOTE, "!sndr: refcntsize %d - %d:%s", - (int)rdc_refcntsize(krdc), __LINE__, __FILE__); -#endif -} - -/* - * Must be called with krdc->bmapmutex held. - */ -static int -rdc_read_header(rdc_k_info_t *krdc, rdc_header_t *header) -{ - int sts; - rdc_u_info_t *urdc; - union { - rdc_header_t *current; - rdc_headerv4_t *v4; - } u_hdrp; - - if (krdc == NULL) { - return (-1); - } - - ASSERT(MUTEX_HELD(&krdc->bmapmutex)); - - urdc = &rdc_u_info[krdc->index]; - if (rdc_get_vflags(urdc) & RDC_BMP_FAILED) - return (-1); - - if (krdc->bitmapfd == NULL) { - return (-1); - } - if (_rdc_rsrv_devs(krdc, RDC_BMP, RDC_INTERNAL)) { - rdc_set_flags_log(urdc, RDC_BMP_FAILED, "reserve failed"); - return (-1); - } - - if (krdc->bmp_kstats) { - mutex_enter(krdc->bmp_kstats->ks_lock); - kstat_runq_enter(KSTAT_IO_PTR(krdc->bmp_kstats)); - mutex_exit(krdc->bmp_kstats->ks_lock); - } - - sts = rdc_ns_io(krdc->bitmapfd, NSC_RDBUF, 0, (uchar_t *)header, - sizeof (rdc_header_t)); - - if (krdc->bmp_kstats) { - mutex_enter(krdc->bmp_kstats->ks_lock); - kstat_runq_exit(KSTAT_IO_PTR(krdc->bmp_kstats)); - mutex_exit(krdc->bmp_kstats->ks_lock); - KSTAT_IO_PTR(krdc->bmp_kstats)->reads++; - KSTAT_IO_PTR(krdc->bmp_kstats)->nread += sizeof (rdc_header_t); - } - - if (!RDC_SUCCESS(sts)) { - cmn_err(CE_WARN, "!rdc_read_header: %s read failed %d", - urdc->primary.file, sts); - rdc_set_flags_log(urdc, RDC_BMP_FAILED, "read header failed"); - } - - _rdc_rlse_devs(krdc, RDC_BMP); - - if (!RDC_SUCCESS(sts)) - return (-1); - switch (header->magic) { - case RDC_HDR_V4: - /* - * old header format - upgrade incore copy, disk copy will - * be changed when state is re-written. - */ -#ifdef DEBUG - cmn_err(CE_NOTE, "!sndr: old style (V4) bit map header"); -#endif - header->magic = RDC_HDR_MAGIC; - u_hdrp.current = header; - /* copy down items moved by new maxq??? sizes */ - u_hdrp.current->asyncthr = u_hdrp.v4->asyncthr; - u_hdrp.current->syshostid = u_hdrp.v4->syshostid; - u_hdrp.current->maxqitems = u_hdrp.v4->maxqitems; - u_hdrp.current->maxqfbas = u_hdrp.v4->maxqfbas; - u_hdrp.current->refcntsize = 1; /* new field */ -#ifdef DEBUG_REFCNT - cmn_err(CE_NOTE, "!sndr: refcntsize %d - %d:%s", - (int)u_hdrp.current->refcntsize, __LINE__, __FILE__); -#endif - return (0); - case RDC_HDR_MAGIC: - /* current header type */ - return (0); - default: - /* not a header we currently understand */ - return (0); - } -} - -/* - * Must be called with krdc->bmapmutex held. - */ -static int -rdc_write_header(rdc_k_info_t *krdc, rdc_header_t *header) -{ - rdc_u_info_t *urdc; - int sts; - - if (krdc == NULL) { - return (-1); - } - - ASSERT(MUTEX_HELD(&krdc->bmapmutex)); - - urdc = &rdc_u_info[krdc->index]; - if (rdc_get_vflags(urdc) & RDC_BMP_FAILED) - return (-1); - - if (krdc->bitmapfd == NULL) { - return (-1); - } - - if (_rdc_rsrv_devs(krdc, RDC_BMP, RDC_INTERNAL)) { - rdc_set_flags_log(urdc, RDC_BMP_FAILED, "reserve failed"); - return (-1); - } - - if (krdc->bmp_kstats) { - mutex_enter(krdc->bmp_kstats->ks_lock); - kstat_runq_enter(KSTAT_IO_PTR(krdc->bmp_kstats)); - mutex_exit(krdc->bmp_kstats->ks_lock); - } - - sts = rdc_ns_io(krdc->bitmapfd, rdc_wrflag, 0, (uchar_t *)header, - sizeof (rdc_header_t)); - - if (krdc->bmp_kstats) { - mutex_enter(krdc->bmp_kstats->ks_lock); - kstat_runq_exit(KSTAT_IO_PTR(krdc->bmp_kstats)); - mutex_exit(krdc->bmp_kstats->ks_lock); - KSTAT_IO_PTR(krdc->bmp_kstats)->writes++; - KSTAT_IO_PTR(krdc->bmp_kstats)->nwritten += - sizeof (rdc_header_t); - } - - if (!RDC_SUCCESS(sts)) { - cmn_err(CE_WARN, "!rdc_write_header: %s write failed %d", - urdc->primary.file, sts); - rdc_set_flags_log(urdc, RDC_BMP_FAILED, "write failed"); - } - - _rdc_rlse_devs(krdc, RDC_BMP); - - if (!RDC_SUCCESS(sts)) - return (-1); - else - return (0); -} - -struct bm_ref_ops rdc_ref_byte_ops; -struct bm_ref_ops rdc_ref_int_ops; - -static void -rdc_set_refcnt_ops(rdc_k_info_t *krdc, size_t refcntsize) -{ - switch (refcntsize) { - default: - /* FALLTHRU */ - case sizeof (unsigned char): - krdc->bm_refs = &rdc_ref_byte_ops; - break; - case sizeof (unsigned int): - krdc->bm_refs = &rdc_ref_int_ops; - break; - } -#ifdef DEBUG_REFCNT - cmn_err(CE_NOTE, "!sndr: set refcnt ops for refcntsize %d - %d:%s", - (int)refcntsize, __LINE__, __FILE__); -#endif -} - -size_t -rdc_refcntsize(rdc_k_info_t *krdc) -{ - if (krdc->bm_refs == &rdc_ref_int_ops) - return (sizeof (unsigned int)); - return (sizeof (unsigned char)); -} - -int -rdc_read_state(rdc_k_info_t *krdc, int *statep, int *hostidp) -{ - rdc_header_t header; - rdc_u_info_t *urdc; - int sts; - - if (krdc == NULL) { - return (-1); - } - - mutex_enter(&krdc->bmapmutex); - - urdc = &rdc_u_info[krdc->index]; - if (rdc_get_vflags(urdc) & RDC_BMP_FAILED) { - mutex_exit(&krdc->bmapmutex); - return (-1); - } - - if (krdc->bitmapfd == NULL) { - mutex_exit(&krdc->bmapmutex); - return (-1); - } - - sts = rdc_read_header(krdc, &header); - mutex_exit(&krdc->bmapmutex); - - if (!RDC_SUCCESS(sts)) { - return (-1); - } - - switch (header.magic) { - case RDC_HDR_MAGIC: - *statep = header.flags; - *hostidp = header.syshostid; - rdc_set_refcnt_ops(krdc, header.refcntsize); -#ifdef DEBUG_REFCNT - cmn_err(CE_NOTE, "!sndr: refcntsize %d - %d:%s", - (int)rdc_refcntsize(krdc), __LINE__, __FILE__); -#endif - sts = 0; - break; - default: - sts = -1; - break; - } - - return (sts); -} - -int -rdc_clear_state(rdc_k_info_t *krdc) -{ - rdc_u_info_t *urdc; - int sts; - rdc_header_t header; - - if (krdc == NULL) { - return (-1); - } - - mutex_enter(&krdc->bmapmutex); - - urdc = &rdc_u_info[krdc->index]; - if (rdc_get_vflags(urdc) & RDC_BMP_FAILED) { - mutex_exit(&krdc->bmapmutex); - return (-1); - } - - if (krdc->bitmapfd == NULL) { - mutex_exit(&krdc->bmapmutex); - return (-1); - } - - if (_rdc_rsrv_devs(krdc, RDC_BMP, RDC_INTERNAL)) { - rdc_set_flags_log(urdc, RDC_BMP_FAILED, "reserve failed"); - mutex_exit(&krdc->bmapmutex); - return (-1); - } - - bzero(&header, sizeof (header)); - - if (krdc->bmp_kstats) { - mutex_enter(krdc->bmp_kstats->ks_lock); - kstat_runq_enter(KSTAT_IO_PTR(krdc->bmp_kstats)); - mutex_exit(krdc->bmp_kstats->ks_lock); - } - - sts = rdc_ns_io(krdc->bitmapfd, rdc_wrflag, 0, - (uchar_t *)&header, sizeof (header)); - - if (krdc->bmp_kstats) { - mutex_enter(krdc->bmp_kstats->ks_lock); - kstat_runq_exit(KSTAT_IO_PTR(krdc->bmp_kstats)); - mutex_exit(krdc->bmp_kstats->ks_lock); - KSTAT_IO_PTR(krdc->bmp_kstats)->writes++; - KSTAT_IO_PTR(krdc->bmp_kstats)->nwritten += - sizeof (rdc_header_t); - } - - if (!RDC_SUCCESS(sts)) { - cmn_err(CE_WARN, "!rdc_clear_state: %s write failed", - urdc->primary.file); - rdc_set_flags_log(urdc, RDC_BMP_FAILED, "write failed"); - } - - _rdc_rlse_devs(krdc, RDC_BMP); - mutex_exit(&krdc->bmapmutex); - - if (!RDC_SUCCESS(sts)) - return (-1); - else - return (0); -} - -void -rdc_write_state(rdc_u_info_t *urdc) -{ - rdc_k_info_t *krdc; - int sts; - rdc_header_t header; - - if (urdc == NULL) { - return; - } - - krdc = &rdc_k_info[urdc->index]; - - mutex_enter(&krdc->bmapmutex); - - if (rdc_get_vflags(urdc) & RDC_BMP_FAILED) { - mutex_exit(&krdc->bmapmutex); - return; - } - - if (krdc->bitmapfd == NULL) { - mutex_exit(&krdc->bmapmutex); - return; - } - - if (_rdc_rsrv_devs(krdc, RDC_BMP, RDC_INTERNAL)) { - rdc_set_flags_log(urdc, RDC_BMP_FAILED, "reserve failed"); - mutex_exit(&krdc->bmapmutex); - return; - } - - if (krdc->bmp_kstats) { - mutex_enter(krdc->bmp_kstats->ks_lock); - kstat_runq_enter(KSTAT_IO_PTR(krdc->bmp_kstats)); - mutex_exit(krdc->bmp_kstats->ks_lock); - } - - sts = rdc_ns_io(krdc->bitmapfd, NSC_RDBUF, 0, (uchar_t *)&header, - sizeof (header)); - - if (krdc->bmp_kstats) { - mutex_enter(krdc->bmp_kstats->ks_lock); - kstat_runq_exit(KSTAT_IO_PTR(krdc->bmp_kstats)); - mutex_exit(krdc->bmp_kstats->ks_lock); - KSTAT_IO_PTR(krdc->bmp_kstats)->reads++; - KSTAT_IO_PTR(krdc->bmp_kstats)->nread += sizeof (header); - } - - if (!RDC_SUCCESS(sts)) { - cmn_err(CE_WARN, "!rdc_write_state: %s read failed", - urdc->primary.file); - rdc_set_flags_log(urdc, RDC_BMP_FAILED, "read failed"); - goto done; - } - - rdc_fill_header(urdc, &header); - - if (krdc->bmp_kstats) { - mutex_enter(krdc->bmp_kstats->ks_lock); - kstat_runq_enter(KSTAT_IO_PTR(krdc->bmp_kstats)); - mutex_exit(krdc->bmp_kstats->ks_lock); - } - - sts = rdc_ns_io(krdc->bitmapfd, rdc_wrflag, 0, - (uchar_t *)&header, sizeof (header)); - - if (krdc->bmp_kstats) { - mutex_enter(krdc->bmp_kstats->ks_lock); - kstat_runq_exit(KSTAT_IO_PTR(krdc->bmp_kstats)); - mutex_exit(krdc->bmp_kstats->ks_lock); - KSTAT_IO_PTR(krdc->bmp_kstats)->writes++; - KSTAT_IO_PTR(krdc->bmp_kstats)->nwritten += sizeof (header); - } - - if (!RDC_SUCCESS(sts)) { - cmn_err(CE_WARN, "!rdc_write_state: %s write failed", - urdc->primary.file); - rdc_set_flags_log(urdc, RDC_BMP_FAILED, "write failed"); - } - -done: - _rdc_rlse_devs(krdc, RDC_BMP); - mutex_exit(&krdc->bmapmutex); -} - - -struct bitmapdata { - uchar_t *data; - size_t len; -}; - -static int -rdc_read_bitmap(rdc_k_info_t *krdc, struct bitmapdata *data) -{ - rdc_u_info_t *urdc; - int sts; - - if (krdc == NULL) { - return (-1); - } - - if (data != NULL) { - data->data = kmem_alloc(krdc->bitmap_size, KM_SLEEP); - data->len = krdc->bitmap_size; - - if (data->data == NULL) { - return (-1); - } - } - - mutex_enter(&krdc->bmapmutex); - - urdc = &rdc_u_info[krdc->index]; - if (rdc_get_vflags(urdc) & RDC_BMP_FAILED) { - mutex_exit(&krdc->bmapmutex); - return (-1); - } - - if (krdc->bitmapfd == NULL) { - mutex_exit(&krdc->bmapmutex); - return (-1); - } - - if (data == NULL && krdc->dcio_bitmap == NULL) { - mutex_exit(&krdc->bmapmutex); - return (-1); - } - - if (_rdc_rsrv_devs(krdc, RDC_BMP, RDC_INTERNAL)) { - cmn_err(CE_WARN, "!rdc_read_bitmap: %s reserve failed", - urdc->primary.file); - rdc_set_flags_log(urdc, RDC_BMP_FAILED, "reserve failed"); - mutex_exit(&krdc->bmapmutex); - return (-1); - } - - if (krdc->bmp_kstats) { - mutex_enter(krdc->bmp_kstats->ks_lock); - kstat_runq_enter(KSTAT_IO_PTR(krdc->bmp_kstats)); - mutex_exit(krdc->bmp_kstats->ks_lock); - } - - sts = rdc_ns_io(krdc->bitmapfd, NSC_RDBUF, RDC_BITMAP_FBA, - data ? data->data : krdc->dcio_bitmap, krdc->bitmap_size); - - if (krdc->bmp_kstats) { - mutex_enter(krdc->bmp_kstats->ks_lock); - kstat_runq_exit(KSTAT_IO_PTR(krdc->bmp_kstats)); - mutex_exit(krdc->bmp_kstats->ks_lock); - KSTAT_IO_PTR(krdc->bmp_kstats)->reads++; - KSTAT_IO_PTR(krdc->bmp_kstats)->nread += krdc->bitmap_size; - } - - _rdc_rlse_devs(krdc, RDC_BMP); - - if (!RDC_SUCCESS(sts)) { - cmn_err(CE_WARN, "!rdc_read_bitmap: %s read failed", - urdc->primary.file); - rdc_set_flags_log(urdc, RDC_BMP_FAILED, "read failed"); - mutex_exit(&krdc->bmapmutex); - return (-1); - } - - mutex_exit(&krdc->bmapmutex); - return (0); -} - -int -rdc_write_bitmap(rdc_k_info_t *krdc) -{ - rdc_u_info_t *urdc; - int sts; - - if (krdc == NULL) { - return (-1); - } - - mutex_enter(&krdc->bmapmutex); - - urdc = &rdc_u_info[krdc->index]; - if (rdc_get_vflags(urdc) & RDC_BMP_FAILED) { - mutex_exit(&krdc->bmapmutex); - return (-1); - } - - if (krdc->bitmapfd == NULL) { - mutex_exit(&krdc->bmapmutex); - return (-1); - } - - if (krdc->dcio_bitmap == NULL) { - mutex_exit(&krdc->bmapmutex); - return (-1); - } - - if (_rdc_rsrv_devs(krdc, RDC_BMP, RDC_INTERNAL)) { - rdc_set_flags_log(urdc, RDC_BMP_FAILED, "reserve failed"); - mutex_exit(&krdc->bmapmutex); - return (-1); - } - - if (krdc->bmp_kstats) { - mutex_enter(krdc->bmp_kstats->ks_lock); - kstat_runq_enter(KSTAT_IO_PTR(krdc->bmp_kstats)); - mutex_exit(krdc->bmp_kstats->ks_lock); - } - - sts = rdc_ns_io(krdc->bitmapfd, rdc_wrflag, RDC_BITMAP_FBA, - krdc->dcio_bitmap, krdc->bitmap_size); - - if (krdc->bmp_kstats) { - mutex_enter(krdc->bmp_kstats->ks_lock); - kstat_runq_exit(KSTAT_IO_PTR(krdc->bmp_kstats)); - mutex_exit(krdc->bmp_kstats->ks_lock); - KSTAT_IO_PTR(krdc->bmp_kstats)->writes++; - KSTAT_IO_PTR(krdc->bmp_kstats)->nwritten += krdc->bitmap_size; - } - - _rdc_rlse_devs(krdc, RDC_BMP); - - if (!RDC_SUCCESS(sts)) { - cmn_err(CE_WARN, "!rdc_write_bitmap: %s write failed", - urdc->primary.file); - rdc_set_flags_log(urdc, RDC_BMP_FAILED, "write failed"); - mutex_exit(&krdc->bmapmutex); - return (-1); - } - - mutex_exit(&krdc->bmapmutex); - return (0); -} - -int -rdc_write_bitmap_fba(rdc_k_info_t *krdc, nsc_off_t fba) -{ - rdc_u_info_t *urdc; - int sts; - - if (krdc == NULL) { - return (-1); - } - - mutex_enter(&krdc->bmapmutex); - - urdc = &rdc_u_info[krdc->index]; - if (rdc_get_vflags(urdc) & RDC_BMP_FAILED) { - mutex_exit(&krdc->bmapmutex); - return (-1); - } - - if (krdc->bitmapfd == NULL) { - mutex_exit(&krdc->bmapmutex); - return (-1); - } - - if (krdc->dcio_bitmap == NULL) { - mutex_exit(&krdc->bmapmutex); - return (-1); - } - - if (_rdc_rsrv_devs(krdc, RDC_BMP, RDC_INTERNAL)) { - cmn_err(CE_WARN, "!rdc_write_bitmap_fba: %s reserve failed", - urdc->primary.file); - rdc_set_flags_log(urdc, RDC_BMP_FAILED, "reserve failed"); - mutex_exit(&krdc->bmapmutex); - return (-1); - } - - if (krdc->bmp_kstats) { - mutex_enter(krdc->bmp_kstats->ks_lock); - kstat_runq_enter(KSTAT_IO_PTR(krdc->bmp_kstats)); - mutex_exit(krdc->bmp_kstats->ks_lock); - } - sts = rdc_ns_io(krdc->bitmapfd, rdc_wrflag, RDC_BITMAP_FBA + fba, - krdc->dcio_bitmap + fba * 512, 512); - - if (krdc->bmp_kstats) { - mutex_enter(krdc->bmp_kstats->ks_lock); - kstat_runq_exit(KSTAT_IO_PTR(krdc->bmp_kstats)); - mutex_exit(krdc->bmp_kstats->ks_lock); - KSTAT_IO_PTR(krdc->bmp_kstats)->writes++; - KSTAT_IO_PTR(krdc->bmp_kstats)->nwritten += 512; - } - - _rdc_rlse_devs(krdc, RDC_BMP); - - if (!RDC_SUCCESS(sts)) { - cmn_err(CE_WARN, "!rdc_write_bitmap_fba: %s write failed", - urdc->primary.file); - rdc_set_flags_log(urdc, RDC_BMP_FAILED, "write failed"); - mutex_exit(&krdc->bmapmutex); - return (-1); - } - - mutex_exit(&krdc->bmapmutex); - return (0); -} - - -static int -rdc_write_bitmap_pattern(rdc_k_info_t *krdc, const char pattern) -{ - rdc_u_info_t *urdc; - char *buffer; - nsc_buf_t *h; - nsc_vec_t *v; - int rc; - size_t i; - nsc_size_t len; - int off; - size_t buffer_size; - size_t iolen; - nsc_size_t fba_req; - nsc_off_t fba_len, fba_pos; - nsc_size_t maxfbas = 0; - nsc_size_t tocopy; - - if (krdc == NULL) { - return (-1); - } - - mutex_enter(&krdc->bmapmutex); - - urdc = &rdc_u_info[krdc->index]; - if (rdc_get_vflags(urdc) & RDC_BMP_FAILED) { - mutex_exit(&krdc->bmapmutex); - return (-1); - } - - if (krdc->bitmapfd == NULL) { - mutex_exit(&krdc->bmapmutex); - return (-1); - } - - if (_rdc_rsrv_devs(krdc, RDC_BMP, RDC_INTERNAL)) { - rdc_set_flags_log(urdc, RDC_BMP_FAILED, "reserve failed"); - mutex_exit(&krdc->bmapmutex); - return (-1); - } - - buffer_size = FBA_SIZE(1); - ASSERT(buffer_size < INT32_MAX); - buffer = kmem_alloc(buffer_size, KM_SLEEP); - - for (i = 0; i < buffer_size; i++) { - buffer[i] = pattern; - } - - rc = nsc_maxfbas(krdc->bitmapfd, 0, &maxfbas); - if (!RDC_SUCCESS(rc)) { -#ifdef DEBUG - cmn_err(CE_WARN, - "!rdc_write_bitmap_pattern: maxfbas failed (%d)", rc); -#endif - maxfbas = 256; - } - - fba_req = FBA_LEN(krdc->bitmap_size); /* total FBAs left to copy */ - fba_pos = RDC_BITMAP_FBA; /* current FBA position */ - tocopy = krdc->bitmap_size; /* total bytes left to copy */ -loop: - h = NULL; - fba_len = min(fba_req, maxfbas); /* FBAs to alloc this time */ - - rc = nsc_alloc_buf(krdc->bitmapfd, fba_pos, fba_len, rdc_wrflag, &h); - if (!RDC_SUCCESS(rc)) { - cmn_err(CE_WARN, "!rdc_write_bitmap_pattern: %s " - "write failed %d", urdc->primary.file, rc); - rdc_set_flags_log(urdc, RDC_BMP_FAILED, "nsc_alloc_buf failed"); - if (h) { - (void) nsc_free_handle(h); - } - - _rdc_rlse_devs(krdc, RDC_BMP); - mutex_exit(&krdc->bmapmutex); - rc = -1; - goto finish; - } - - /* bytes to copy this time */ - len = min(tocopy, FBA_SIZE(fba_len)); - v = h->sb_vec; - off = 0; - - while (len) { - if (off >= v->sv_len) { - off = 0; - v++; - } - - if (v->sv_addr == 0 || v->sv_len == 0) { -#ifdef DEBUG - cmn_err(CE_WARN, - "!rdc_write_bitmap_pattern: ran off end of handle"); -#endif - break; - } - - iolen = (size_t)min(len, buffer_size); - - bcopy(buffer, (char *)(v->sv_addr + off), iolen); - off += iolen; - len -= iolen; - } - - rc = nsc_write(h, h->sb_pos, h->sb_len, 0); - if (!RDC_SUCCESS(rc)) { - cmn_err(CE_WARN, "!rdc_write_bitmap_pattern: " - "%s write failed %d", urdc->primary.file, rc); - rdc_set_flags_log(urdc, RDC_BMP_FAILED, "write failed"); - (void) nsc_free_buf(h); - _rdc_rlse_devs(krdc, RDC_BMP); - mutex_exit(&krdc->bmapmutex); - rc = -1; - goto finish; - } - - (void) nsc_free_buf(h); - - fba_pos += fba_len; - fba_req -= fba_len; - tocopy -= FBA_SIZE(fba_len); /* adjust byte length remaining */ - if (fba_req > 0) - goto loop; - - _rdc_rlse_devs(krdc, RDC_BMP); - mutex_exit(&krdc->bmapmutex); - rc = 0; -finish: - kmem_free(buffer, buffer_size); - return (rc); -} - - -/* - * rdc_write_bitmap_fill() - * - * Write a bitmap full of 1's out to disk without touching the - * in-memory bitmap. - */ -int -rdc_write_bitmap_fill(rdc_k_info_t *krdc) -{ - return (rdc_write_bitmap_pattern(krdc, 0xff)); -} - - -void -rdc_merge_bitmaps(rdc_k_info_t *src, rdc_k_info_t *dst) -{ - if (src->dcio_bitmap == NULL || dst->dcio_bitmap == NULL) - return; - - rdc_lor(src->dcio_bitmap, dst->dcio_bitmap, - min(src->bitmap_size, dst->bitmap_size)); - if (dst->bitmap_write > 0) - (void) rdc_write_bitmap(dst); -} - - -/* - * bitmap size in bytes, vol_size fba's - */ - -size_t -rdc_ref_size_possible(nsc_size_t bitmap_size, nsc_size_t vol_size) -{ - nsc_size_t ref_size; - nsc_size_t bitmap_end_fbas; - - bitmap_end_fbas = RDC_BITMAP_FBA + FBA_LEN(bitmap_size); - ref_size = FBA_LEN(bitmap_size * BITS_IN_BYTE * sizeof (unsigned char)); - if (bitmap_end_fbas + ref_size > vol_size) - return ((size_t)0); - - ref_size = FBA_LEN(bitmap_size * BITS_IN_BYTE * sizeof (unsigned int)); - if (bitmap_end_fbas + ref_size > vol_size) - return (sizeof (unsigned char)); - return (sizeof (unsigned int)); -} - -int -rdc_move_bitmap(rdc_k_info_t *krdc, char *newbitmap) -{ - rdc_u_info_t *urdc; - nsc_fd_t *oldfd; - nsc_fd_t *newfd = NULL; - rdc_header_t header; - int sts; - nsc_size_t vol_size; - nsc_size_t req_size; - size_t ref_size; - - if (krdc == NULL) { - return (-1); - } - - if (krdc->bitmapfd == NULL) { - return (-1); - } - - req_size = RDC_BITMAP_FBA + FBA_LEN(krdc->bitmap_size); - if (RDC_IS_DISKQ(krdc->group)) { - /* new volume must support at least the old refcntsize */ - req_size += FBA_LEN(krdc->bitmap_size * BITS_IN_BYTE * - rdc_refcntsize(krdc)); -#ifdef DEBUG_REFCNT - cmn_err(CE_NOTE, "!sndr: refcntsize %d - %d:%s", - (int)rdc_refcntsize(krdc), __LINE__, __FILE__); -#endif - } - - mutex_enter(&krdc->bmapmutex); - - if (rdc_read_header(krdc, &header) < 0) { -#ifdef DEBUG - cmn_err(CE_WARN, "!rdc_move_bitmap: Read old header failed"); -#endif - mutex_exit(&krdc->bmapmutex); - return (-1); - } - - oldfd = krdc->bitmapfd; - - newfd = nsc_open(newbitmap, NSC_RDCHR_ID|NSC_FILE|NSC_RDWR, 0, 0, 0); - if (newfd == NULL) { - newfd = nsc_open(newbitmap, - NSC_RDCHR_ID|NSC_CACHE|NSC_DEVICE|NSC_RDWR, 0, 0, 0); - if (newfd == NULL) { - /* Can't open new bitmap */ - cmn_err(CE_WARN, - "!rdc_move_bitmap: Cannot open new bitmap %s", - newbitmap); - goto fail; - } - } - - sts = nsc_reserve(newfd, 0); - if (!RDC_SUCCESS(sts)) { - cmn_err(CE_WARN, "!rdc_move_bitmap: Reserve failed for %s", - newbitmap); - goto fail; - } - sts = nsc_partsize(newfd, &vol_size); - nsc_release(newfd); - - if (!RDC_SUCCESS(sts)) { - cmn_err(CE_WARN, - "!rdc_move_bitmap: nsc_partsize failed for %s", newbitmap); - goto fail; - } - - ref_size = rdc_ref_size_possible(krdc->bitmap_size, vol_size); - - if (vol_size < req_size) { - cmn_err(CE_WARN, - "!rdc_move_bitmap: bitmap %s too small: %" NSC_SZFMT - " vs %" NSC_SZFMT " blocks", newbitmap, vol_size, req_size); - goto fail; - } - - mutex_enter(&krdc->devices->id_rlock); - krdc->bitmapfd = newfd; /* swap under lock */ - if (krdc->bmaprsrv > 0) { - sts = nsc_reserve(krdc->bitmapfd, 0); - if (!RDC_SUCCESS(sts)) { - krdc->bitmapfd = oldfd; /* replace under lock */ - mutex_exit(&krdc->devices->id_rlock); - cmn_err(CE_WARN, - "!rdc_move_bitmap: Reserve failed for %s", - newbitmap); - goto fail; - } - } - rdc_set_refcnt_ops(krdc, ref_size); -#ifdef DEBUG_REFCNT - cmn_err(CE_NOTE, "!sndr: refcntsize %d - %d:%s", - (int)rdc_refcntsize(krdc), __LINE__, __FILE__); -#endif - mutex_exit(&krdc->devices->id_rlock); - - /* Forget newfd now it is krdc->bitmapfd */ - newfd = NULL; - - /* Put new bitmap name into header and user-visible data structure */ - urdc = &rdc_u_info[krdc->index]; - if (rdc_get_vflags(urdc) & RDC_PRIMARY) { - (void) strncpy(header.primary.bitmap, newbitmap, NSC_MAXPATH); - (void) strncpy(urdc->primary.bitmap, newbitmap, NSC_MAXPATH); - } else { - (void) strncpy(header.secondary.bitmap, newbitmap, NSC_MAXPATH); - (void) strncpy(urdc->secondary.bitmap, newbitmap, NSC_MAXPATH); - } - - if (rdc_write_header(krdc, &header) < 0) { - cmn_err(CE_WARN, - "!rdc_move_bitmap: Write header %s failed", newbitmap); - goto fail; - } - - mutex_exit(&krdc->bmapmutex); - - if (rdc_write_bitmap(krdc) < 0) { - mutex_enter(&krdc->bmapmutex); - cmn_err(CE_WARN, - "!rdc_move_bitmap: Write bitmap %s failed", newbitmap); - goto fail; - } - - /* Unintercept the old bitmap */ - if (krdc->b_tok) { - int rc; - - rdc_group_exit(krdc); - rc = nsc_unregister_path(krdc->b_tok, 0); - if (rc) - cmn_err(CE_WARN, "!rdc_move_bitmap: " - "unregister bitmap failed %d", rc); - else - krdc->b_tok = nsc_register_path(newbitmap, - NSC_CACHE | NSC_DEVICE, _rdc_io_hc); - rdc_group_enter(krdc); - } - - /* clear the old bitmap header */ - bzero(&header, sizeof (header)); - - sts = nsc_held(oldfd) ? 0 : nsc_reserve(oldfd, 0); - if (sts == 0) { - - if (krdc->bmp_kstats) { - mutex_enter(krdc->bmp_kstats->ks_lock); - kstat_runq_enter(KSTAT_IO_PTR(krdc->bmp_kstats)); - mutex_exit(krdc->bmp_kstats->ks_lock); - } - - sts = rdc_ns_io(oldfd, rdc_wrflag, 0, - (uchar_t *)&header, sizeof (header)); - - if (krdc->bmp_kstats) { - mutex_enter(krdc->bmp_kstats->ks_lock); - kstat_runq_exit(KSTAT_IO_PTR(krdc->bmp_kstats)); - mutex_exit(krdc->bmp_kstats->ks_lock); - KSTAT_IO_PTR(krdc->bmp_kstats)->writes++; - KSTAT_IO_PTR(krdc->bmp_kstats)->nwritten += - sizeof (header); - } - - } -#ifdef DEBUG - if (sts != 0) { - cmn_err(CE_WARN, - "!rdc_move_bitmap: unable to clear bitmap header on %s", - nsc_pathname(oldfd)); - } -#endif - - /* nsc_close will undo any reservation */ - if (nsc_close(oldfd) != 0) { -#ifdef DEBUG - cmn_err(CE_WARN, "!rdc_move_bitmap: close old bitmap failed"); -#else - ; - /*EMPTY*/ -#endif - } - - return (0); - -fail: - /* Close newfd if it was unused */ - if (newfd && newfd != krdc->bitmapfd) { - (void) nsc_close(newfd); - newfd = NULL; - } - - mutex_exit(&krdc->bmapmutex); - return (-1); -} - - -void -rdc_close_bitmap(rdc_k_info_t *krdc) -{ - - if (krdc == NULL) { - return; - } - - mutex_enter(&krdc->bmapmutex); - - if (krdc->bitmapfd) { - if (nsc_close(krdc->bitmapfd) != 0) { -#ifdef DEBUG - cmn_err(CE_WARN, "!nsc_close on bitmap failed"); -#else - ; - /*EMPTY*/ -#endif - } - krdc->bitmapfd = 0; - } - - mutex_exit(&krdc->bmapmutex); -} - -void -rdc_free_bitmap(rdc_k_info_t *krdc, int cmd) -{ - rdc_header_t header; - rdc_u_info_t *urdc = &rdc_u_info[krdc->index]; - - if (krdc == NULL) { - return; - } - - mutex_enter(&krdc->bmapmutex); - - if (cmd != RDC_CMD_SUSPEND) { - - bzero((char *)&header, sizeof (rdc_header_t)); - - if (krdc->bitmapfd) - (void) rdc_write_header(krdc, &header); - } else { - mutex_exit(&krdc->bmapmutex); - /* gotta drop mutex, in case q needs to fail */ - if (RDC_IS_DISKQ(krdc->group) && rdc_suspend_diskq(krdc) < 0) { - cmn_err(CE_WARN, - "!rdc_free_bitmap: diskq suspend failed"); - } - - mutex_enter(&krdc->bmapmutex); - if (rdc_read_header(krdc, &header) < 0) { - cmn_err(CE_WARN, - "!rdc_free_bitmap: Read header failed"); - } else { - rdc_fill_header(urdc, &header); - - (void) rdc_write_header(krdc, &header); - } - } - - mutex_exit(&krdc->bmapmutex); - - if (krdc->dcio_bitmap != NULL) { - if (cmd == RDC_CMD_SUSPEND) { - if (krdc->bitmapfd) - (void) rdc_write_bitmap(krdc); - } - - kmem_free(krdc->dcio_bitmap, krdc->bitmap_size); - krdc->dcio_bitmap = NULL; - } - if (krdc->bitmap_ref != NULL) { - kmem_free(krdc->bitmap_ref, (krdc->bitmap_size * BITS_IN_BYTE * - BMAP_REF_PREF_SIZE)); - krdc->bitmap_ref = NULL; - } - - krdc->bitmap_size = 0; -} - -static int -rdc_alloc_bitmap(rdc_k_info_t *krdc) -{ - rdc_u_info_t *urdc; - char *bitmapname; - nsc_size_t bitmap_ref_size; - - if (krdc == NULL) { - return (-1); - } - - urdc = &rdc_u_info[krdc->index]; - if (rdc_get_vflags(urdc) & RDC_PRIMARY) - bitmapname = &urdc->primary.bitmap[0]; - else - bitmapname = &urdc->secondary.bitmap[0]; - - if (krdc->dcio_bitmap) { -#ifdef DEBUG - cmn_err(CE_WARN, - "!rdc_alloc_bitmap: bitmap %s already allocated", - bitmapname); -#endif - return (0); - } - - if (urdc->volume_size == 0) - return (-1); - - krdc->bitmap_size = BMAP_LOG_BYTES(urdc->volume_size); - /* Round up */ - krdc->bitmap_size = (krdc->bitmap_size + 511) / 512 * 512; - - krdc->dcio_bitmap = (uchar_t *)kmem_zalloc(krdc->bitmap_size, - KM_SLEEP); - if (krdc->dcio_bitmap == NULL) { - cmn_err(CE_WARN, "!rdc_alloc_bitmap: alloc %" NSC_SZFMT - " failed for %s", krdc->bitmap_size, bitmapname); - return (-1); - } - - /* - * use largest ref count type size as we haven't opened the bitmap - * volume yet to find out what has acutally be used. - */ - bitmap_ref_size = krdc->bitmap_size * BITS_IN_BYTE * BMAP_REF_PREF_SIZE; - if ((rdc_get_vflags(urdc) & RDC_PRIMARY) && - ((krdc->type_flag & RDC_ASYNCMODE) != 0)) { - krdc->bitmap_ref = (uchar_t *)kmem_zalloc(bitmap_ref_size, - KM_SLEEP); - if (krdc->bitmap_ref == NULL) { - cmn_err(CE_WARN, - "!rdc_alloc_bitmap: ref alloc %" NSC_SZFMT - " failed for %s", - bitmap_ref_size, bitmapname); - return (-1); - } - } - - return (0); -} - - -static int -rdc_open_bitmap(rdc_k_info_t *krdc) -{ - rdc_u_info_t *urdc; - int sts; - uint_t hints = 0; - nsc_size_t vol_size; - char *bitmapname; - nsc_size_t req_size; - nsc_size_t bit_size; - - urdc = &rdc_u_info[krdc->index]; - if (rdc_get_vflags(urdc) & RDC_PRIMARY) - bitmapname = &urdc->primary.bitmap[0]; - else - bitmapname = &urdc->secondary.bitmap[0]; - - urdc->bits_set = 0; - - bit_size = req_size = RDC_BITMAP_FBA + FBA_LEN(krdc->bitmap_size); - if (RDC_IS_DISKQ(krdc->group)) { - req_size += FBA_LEN(krdc->bitmap_size * BITS_IN_BYTE * - sizeof (unsigned char)); - } - - mutex_enter(&krdc->bmapmutex); - - rdc_set_refcnt_ops(krdc, sizeof (unsigned char)); -#ifdef DEBUG_REFCNT - cmn_err(CE_NOTE, "!sndr: refcntsize %d - %d:%s", - (int)rdc_refcntsize(krdc), __LINE__, __FILE__); -#endif - if (krdc->bitmapfd == NULL) - krdc->bitmapfd = nsc_open(bitmapname, - NSC_RDCHR_ID|NSC_FILE|NSC_RDWR, 0, 0, 0); - if (krdc->bitmapfd == NULL) { - krdc->bitmapfd = nsc_open(bitmapname, - NSC_RDCHR_ID|NSC_CACHE|NSC_DEVICE|NSC_RDWR, 0, 0, 0); - if (krdc->bitmapfd == NULL) { - cmn_err(CE_WARN, "!rdc_open_bitmap: Unable to open %s", - bitmapname); - goto fail; - } - } - - sts = _rdc_rsrv_devs(krdc, RDC_BMP, RDC_INTERNAL); - if (!RDC_SUCCESS(sts)) { - cmn_err(CE_WARN, "!rdc_open_bitmap: Reserve failed for %s", - bitmapname); - goto fail; - } - sts = nsc_partsize(krdc->bitmapfd, &vol_size); - _rdc_rlse_devs(krdc, RDC_BMP); - - if (!RDC_SUCCESS(sts)) { - cmn_err(CE_WARN, - "!rdc_open_bitmap: nsc_partsize failed for %s", bitmapname); - goto fail; - } - - if (vol_size < req_size) { - /* minimum size supports unsigned char reference counts */ - cmn_err(CE_WARN, - "!rdc_open_bitmap: bitmap %s too small: %" NSC_SZFMT " vs %" - NSC_SZFMT "blocks", - bitmapname, vol_size, req_size); - goto fail; - } - - if (rdc_bitmap_mode == RDC_BMP_NEVER) { - krdc->bitmap_write = 0; /* forced off */ - } else if (rdc_bitmap_mode == RDC_BMP_ALWAYS || - (nsc_node_hints(&hints) == 0 && (hints & NSC_FORCED_WRTHRU) == 0)) { - krdc->bitmap_write = 1; /* forced or autodetect on */ - } else { - /* autodetect off */ - krdc->bitmap_write = 0; - } - - mutex_exit(&krdc->bmapmutex); - if (RDC_IS_DISKQ(krdc->group) && (rdc_refcntsize(krdc) < - BMAP_REF_PREF_SIZE)) { - /* test for larger ref counts */ -#ifdef DEBUG_REFCNT - cmn_err(CE_NOTE, "!sndr: refcntsize %d - %d:%s", - (int)rdc_refcntsize(krdc), __LINE__, __FILE__); -#endif - req_size = bit_size; - req_size += FBA_LEN(krdc->bitmap_size * BITS_IN_BYTE * - sizeof (unsigned int)); - if (vol_size >= req_size) - rdc_set_refcnt_ops(krdc, sizeof (unsigned int)); - } -#ifdef DEBUG_REFCNT - cmn_err(CE_NOTE, "!sndr: refcntsize %d - %d:%s", - (int)rdc_refcntsize(krdc), __LINE__, __FILE__); -#endif - return (0); - -fail: - mutex_exit(&krdc->bmapmutex); - return (-1); -} - -int -rdc_enable_bitmap(rdc_k_info_t *krdc, int set) -{ - rdc_header_t header; - rdc_u_info_t *urdc; - char *bitmapname; - - urdc = &rdc_u_info[krdc->index]; - - if (rdc_alloc_bitmap(krdc) < 0) - goto fail; - - if (rdc_open_bitmap(krdc) < 0) - goto fail; - - if (rdc_get_vflags(urdc) & RDC_PRIMARY) - bitmapname = &urdc->primary.bitmap[0]; - else - bitmapname = &urdc->secondary.bitmap[0]; - - mutex_enter(&krdc->bmapmutex); - - rdc_clr_flags(urdc, RDC_BMP_FAILED); - if (rdc_read_header(krdc, &header) < 0) { - cmn_err(CE_WARN, - "!rdc_enable_bitmap: Read header %s failed", bitmapname); - mutex_exit(&krdc->bmapmutex); - goto fail; - } - - rdc_fill_header(urdc, &header); - rdc_set_refcnt_ops(krdc, (size_t)header.refcntsize); - - if (set) - (void) RDC_FILL_BITMAP(krdc, FALSE); - - if (rdc_write_header(krdc, &header) < 0) { - cmn_err(CE_WARN, - "!rdc_enable_bitmap: Write header %s failed", - bitmapname); - mutex_exit(&krdc->bmapmutex); - goto fail; - } - mutex_exit(&krdc->bmapmutex); - - if (rdc_write_bitmap(krdc) < 0) { - cmn_err(CE_WARN, - "!rdc_enable_bitmap: Write bitmap %s failed", - bitmapname); - goto fail; - } - - return (0); - -fail: - rdc_free_bitmap(krdc, RDC_CMD_ENABLE); - rdc_close_bitmap(krdc); - - mutex_enter(&krdc->bmapmutex); - rdc_set_flags_log(urdc, RDC_BMP_FAILED, "I/O failed"); - mutex_exit(&krdc->bmapmutex); - return (-1); -} - -static int -_rdc_rdwr_refcnt(rdc_k_info_t *krdc, int rwflg) -{ - rdc_u_info_t *urdc; - int rc; - nsc_off_t offset; - nsc_size_t len; - - urdc = &rdc_u_info[krdc->index]; - -#ifdef DEBUG_REFCNT - cmn_err(CE_NOTE, "!rdc_rdwr_refcnt: %s refcount for %s", - (rwflg == NSC_READ) ? "resuming" : "writing", - urdc->primary.bitmap); -#endif - ASSERT(MUTEX_HELD(QLOCK((&krdc->group->diskq)))); - mutex_enter(&krdc->bmapmutex); - - if (_rdc_rsrv_devs(krdc, RDC_BMP, RDC_INTERNAL)) { - cmn_err(CE_WARN, "!rdc_rdwr_refcnt: reserve failed"); - goto fail; - } - - if (krdc->bitmap_size == 0) { - cmn_err(CE_WARN, "!rdc_rdwr_refcnt: NULL bitmap!"); - goto fail; - } - - offset = RDC_BITREF_FBA(krdc); - len = krdc->bitmap_size * BITS_IN_BYTE * rdc_refcntsize(krdc); - - rc = rdc_ns_io(krdc->bitmapfd, rwflg, offset, - (uchar_t *)krdc->bitmap_ref, len); - - if (!RDC_SUCCESS(rc)) { - cmn_err(CE_WARN, "!unable to %s refcount from bitmap %s", - (rwflg == NSC_READ) ? "retrieve" : "write", - urdc->primary.bitmap); - rdc_set_flags_log(urdc, RDC_BMP_FAILED, "refcount I/O failed"); - goto fail; - } - - _rdc_rlse_devs(krdc, RDC_BMP); - - mutex_exit(&krdc->bmapmutex); - -#ifdef DEBUG_REFCNT - cmn_err(CE_NOTE, "!rdc_rdwr_refcnt: %s refcount for %s", - (rwflg == NSC_READ) ? "resumed" : "wrote", - urdc->primary.bitmap); -#endif - return (0); - - fail: - _rdc_rlse_devs(krdc, RDC_BMP); - - mutex_exit(&krdc->bmapmutex); - - return (-1); - -} - -/* - * rdc_read_refcount - * read the stored refcount from disk - * queue lock is held - */ -int -rdc_read_refcount(rdc_k_info_t *krdc) -{ - int rc; - - rc = _rdc_rdwr_refcnt(krdc, NSC_READ); - - return (rc); -} - -/* - * rdc_write_refcount - * writes krdc->bitmap_ref to the diskq - * called with qlock held - */ -int -rdc_write_refcount(rdc_k_info_t *krdc) -{ - int rc; - - rc = _rdc_rdwr_refcnt(krdc, NSC_WRBUF); - - return (rc); -} - -static int -rdc_resume_state(rdc_k_info_t *krdc, const rdc_header_t *header) -{ - rdc_u_info_t *urdc; - char *bitmapname; - - urdc = &rdc_u_info[krdc->index]; - if (rdc_get_vflags(urdc) & RDC_PRIMARY) - bitmapname = &urdc->primary.bitmap[0]; - else - bitmapname = &urdc->secondary.bitmap[0]; - - if (header->magic != RDC_HDR_MAGIC) { - cmn_err(CE_WARN, "!rdc_resume_state: Bad magic in %s", - bitmapname); - return (-1); - } - - if (strncmp(urdc->primary.file, header->primary.file, - NSC_MAXPATH) != 0) { -#ifdef DEBUG - cmn_err(CE_WARN, - "!rdc_resume_state: Found %s Expected %s", - header->primary.file, urdc->primary.file); -#endif /* DEBUG */ - return (-1); - } - - if (strncmp(urdc->secondary.file, header->secondary.file, - NSC_MAXPATH) != 0) { -#ifdef DEBUG - cmn_err(CE_WARN, - "!rdc_resume_state: Found %s Expected %s", - header->secondary.file, urdc->secondary.file); -#endif /* DEBUG */ - return (-1); - } - - if (strncmp(urdc->primary.bitmap, header->primary.bitmap, - NSC_MAXPATH) != 0) { -#ifdef DEBUG - cmn_err(CE_WARN, - "!rdc_resume_state: Found %s Expected %s", - header->primary.bitmap, urdc->primary.bitmap); -#endif /* DEBUG */ - return (-1); - } - - if (strncmp(urdc->secondary.bitmap, header->secondary.bitmap, - NSC_MAXPATH) != 0) { -#ifdef DEBUG - cmn_err(CE_WARN, - "!rdc_resume_state: Found %s Expected %s", - header->secondary.bitmap, urdc->secondary.bitmap); -#endif /* DEBUG */ - return (-1); - } - - if (header->maxqfbas) - urdc->maxqfbas = header->maxqfbas; - - if (header->maxqitems) - urdc->maxqitems = header->maxqitems; - - if (header->autosync >= 0) - urdc->autosync = header->autosync; - - if (header->asyncthr) - urdc->asyncthr = header->asyncthr; - - rdc_many_enter(krdc); - rdc_set_refcnt_ops(krdc, header->refcntsize); -#ifdef DEBUG_REFCNT - cmn_err(CE_NOTE, "!sndr: refcntsize %d - %d:%s", - (int)rdc_refcntsize(krdc), __LINE__, __FILE__); -#endif - if (header->flags & RDC_VOL_FAILED) - rdc_set_flags(urdc, RDC_VOL_FAILED); - if (header->flags & RDC_QUEUING) - rdc_set_flags(urdc, RDC_QUEUING); - - rdc_clr_flags(urdc, RDC_SYNC_NEEDED | RDC_RSYNC_NEEDED); - rdc_set_mflags(urdc, (header->flags & RDC_RSYNC_NEEDED)); - rdc_set_flags(urdc, (header->flags & RDC_SYNC_NEEDED)); - rdc_many_exit(krdc); - - if (urdc->flags & RDC_VOL_FAILED) { - - /* Our disk was failed so set all the bits in the bitmap */ - - if (RDC_FILL_BITMAP(krdc, TRUE) != 0) { - cmn_err(CE_WARN, - "!rdc_resume_state: Fill bitmap %s failed", - bitmapname); - return (-1); - } - rdc_many_enter(krdc); - if (IS_STATE(urdc, RDC_QUEUING)) - rdc_clr_flags(urdc, RDC_QUEUING); - rdc_many_exit(krdc); - } else { - /* Header was good, so read in the bitmap */ - - if (rdc_read_bitmap(krdc, NULL) < 0) { - cmn_err(CE_WARN, - "!rdc_resume_state: Read bitmap %s failed", - bitmapname); - return (-1); - } - - urdc->bits_set = RDC_COUNT_BITMAP(krdc); - - /* - * Check if another node went down with bits set, but - * without setting logging mode. - */ - if (urdc->bits_set != 0 && - (rdc_get_vflags(urdc) & RDC_ENABLED) && - !(rdc_get_vflags(urdc) & RDC_LOGGING)) { - rdc_group_log(krdc, RDC_NOFLUSH | RDC_NOREMOTE, NULL); - } - } - - /* if we are using a disk queue, read in the reference count bits */ - if (RDC_IS_DISKQ(krdc->group)) { - disk_queue *q = &krdc->group->diskq; - mutex_enter(QLOCK(q)); - if ((rdc_read_refcount(krdc) < 0)) { - cmn_err(CE_WARN, - "!rdc_resume_state: Resume bitmap %s's refcount" - "failed", - urdc->primary.bitmap); - mutex_exit(QLOCK(q)); - rdc_many_enter(krdc); - if (IS_STATE(urdc, RDC_QUEUING)) - rdc_clr_flags(urdc, RDC_QUEUING); - rdc_many_exit(krdc); - return (-1); - } - mutex_exit(QLOCK(q)); - } - - return (0); -} - - -int -rdc_resume_bitmap(rdc_k_info_t *krdc) -{ - rdc_header_t header; - rdc_u_info_t *urdc = &rdc_u_info[krdc->index]; - char *bitmapname; - - if (rdc_alloc_bitmap(krdc) < 0) - goto allocfail; - - if (rdc_open_bitmap(krdc) < 0) - goto fail; - - if (rdc_get_vflags(urdc) & RDC_PRIMARY) - bitmapname = &urdc->primary.bitmap[0]; - else - bitmapname = &urdc->secondary.bitmap[0]; - - mutex_enter(&krdc->bmapmutex); - - rdc_clr_flags(urdc, RDC_BMP_FAILED); - if (rdc_read_header(krdc, &header) < 0) { - cmn_err(CE_WARN, - "!rdc_resume_bitmap: Read header %s failed", bitmapname); - mutex_exit(&krdc->bmapmutex); - goto fail; - } - - mutex_exit(&krdc->bmapmutex); - - /* Resuming from the bitmap, so do some checking */ - - /*CONSTCOND*/ - ASSERT(FBA_LEN(sizeof (rdc_header_t)) <= RDC_BITMAP_FBA); - /*CONSTCOND*/ - ASSERT(sizeof (rdc_header_t) >= sizeof (rdc_headerv2_t)); - - if (header.magic == RDC_HDR_V2) { - rdc_headerv2_t *hdr_v2 = (rdc_headerv2_t *)&header; - rdc_header_t new_header; - -#ifdef DEBUG - cmn_err(CE_WARN, - "!rdc_resume_bitmap: Converting v2 header for bitmap %s", - bitmapname); -#endif - bzero((char *)&new_header, sizeof (rdc_header_t)); - - new_header.autosync = -1; - new_header.magic = RDC_HDR_MAGIC; - new_header.syshostid = urdc->syshostid; - - if (hdr_v2->volume_failed) - new_header.flags |= RDC_VOL_FAILED; - if (hdr_v2->sync_needed == RDC_SYNC) - new_header.flags |= RDC_SYNC_NEEDED; - if (hdr_v2->sync_needed == RDC_FULL_SYNC) - new_header.flags |= RDC_SYNC_NEEDED; - if (hdr_v2->sync_needed == RDC_REV_SYNC) - new_header.flags |= RDC_RSYNC_NEEDED; - if (rdc_get_vflags(urdc) & RDC_PRIMARY) { - (void) strncpy(new_header.primary.file, - hdr_v2->filename, NSC_MAXPATH); - (void) strncpy(new_header.primary.bitmap, - hdr_v2->bitmapname, NSC_MAXPATH); - (void) strncpy(new_header.secondary.file, - urdc->secondary.file, NSC_MAXPATH); - (void) strncpy(new_header.secondary.bitmap, - urdc->secondary.bitmap, NSC_MAXPATH); - } else { - (void) strncpy(new_header.secondary.file, - hdr_v2->filename, NSC_MAXPATH); - (void) strncpy(new_header.secondary.bitmap, - hdr_v2->bitmapname, NSC_MAXPATH); - (void) strncpy(new_header.primary.file, - urdc->primary.file, NSC_MAXPATH); - (void) strncpy(new_header.primary.bitmap, - urdc->primary.bitmap, NSC_MAXPATH); - } - - bcopy(&new_header, &header, sizeof (rdc_header_t)); - - mutex_enter(&krdc->bmapmutex); - if (rdc_write_header(krdc, &header) < 0) { - mutex_exit(&krdc->bmapmutex); - cmn_err(CE_WARN, - "!rdc_resume_bitmap: Write header %s failed", - bitmapname); - goto fail; - } - mutex_exit(&krdc->bmapmutex); - - } else if (header.magic == RDC_HDR_V3) { - /* - * just update asyncthr and magic, and then we're done - */ - header.magic = RDC_HDR_MAGIC; - header.asyncthr = RDC_ASYNCTHR; - mutex_enter(&krdc->bmapmutex); - if (rdc_write_header(krdc, &header) < 0) { - mutex_exit(&krdc->bmapmutex); - cmn_err(CE_WARN, - "!rdc_resume_bitmap: Write header %s failed", - bitmapname); - goto fail; - } - mutex_exit(&krdc->bmapmutex); - } - - if (rdc_resume_state(krdc, &header) == 0) - return (0); - - rdc_close_bitmap(krdc); - -fail: - (void) RDC_FILL_BITMAP(krdc, FALSE); - rdc_clr_flags(urdc, RDC_QUEUING); - if (krdc->bitmap_ref) - bzero(krdc->bitmap_ref, krdc->bitmap_size * BITS_IN_BYTE * - rdc_refcntsize(krdc)); - -allocfail: - mutex_enter(&krdc->bmapmutex); - rdc_set_flags_log(urdc, RDC_BMP_FAILED, "resume bitmap failed"); - mutex_exit(&krdc->bmapmutex); - - return (-1); -} - -void -rdc_std_zero_bitref(rdc_k_info_t *krdc) -{ - nsc_size_t vol_size; - int sts; - size_t newrefcntsize; - - if (krdc->bitmap_ref) { - mutex_enter(&krdc->bmapmutex); - bzero(krdc->bitmap_ref, krdc->bitmap_size * BITS_IN_BYTE * - BMAP_REF_PREF_SIZE); - if (RDC_IS_DISKQ(krdc->group) && rdc_refcntsize(krdc) != - BMAP_REF_PREF_SIZE) { - /* see if we can upgrade the size of the ref counters */ - sts = _rdc_rsrv_devs(krdc, RDC_BMP, RDC_INTERNAL); - if (!RDC_SUCCESS(sts)) { - goto nochange; - } - sts = nsc_partsize(krdc->bitmapfd, &vol_size); - - newrefcntsize = rdc_ref_size_possible(krdc->bitmap_size, - vol_size); - if (newrefcntsize > rdc_refcntsize(krdc)) { - rdc_set_refcnt_ops(krdc, newrefcntsize); -#ifdef DEBUG_REFCNT - cmn_err(CE_NOTE, "!sndr: refcntsize %d - %d:%s", - (int)rdc_refcntsize(krdc), __LINE__, __FILE__); -#endif - } -nochange: - _rdc_rlse_devs(krdc, RDC_BMP); - } - mutex_exit(&krdc->bmapmutex); - } -} - -int -rdc_reset_bitmap(rdc_k_info_t *krdc) -{ - rdc_u_info_t *urdc; - rdc_header_t header; - char *bitmapname; - - urdc = &rdc_u_info[krdc->index]; - if (rdc_get_vflags(urdc) & RDC_PRIMARY) - bitmapname = &urdc->primary.bitmap[0]; - else - bitmapname = &urdc->secondary.bitmap[0]; - - mutex_enter(&krdc->bmapmutex); - - rdc_clr_flags(urdc, RDC_BMP_FAILED); - if (rdc_read_header(krdc, &header) < 0) { - cmn_err(CE_WARN, - "!rdc_reset_bitmap: Read header %s failed", bitmapname); - goto fail_with_mutex; - } - - rdc_fill_header(urdc, &header); - - if (rdc_write_header(krdc, &header) < 0) { - cmn_err(CE_WARN, - "!rdc_reset_bitmap: Write header %s failed", - bitmapname); - goto fail_with_mutex; - } - mutex_exit(&krdc->bmapmutex); - - if (krdc->bitmap_write == -1) - krdc->bitmap_write = 0; - - if (krdc->bitmap_write == 0) { - if (rdc_write_bitmap_fill(krdc) < 0) { - cmn_err(CE_WARN, - "!rdc_reset_bitmap: Write bitmap %s failed", - bitmapname); - goto fail; - } - krdc->bitmap_write = -1; - } else if (rdc_write_bitmap(krdc) < 0) { - cmn_err(CE_WARN, - "!rdc_reset_bitmap: Write bitmap %s failed", - bitmapname); - goto fail; - } - - return (0); - -fail: - mutex_enter(&krdc->bmapmutex); -fail_with_mutex: - rdc_set_flags_log(urdc, RDC_BMP_FAILED, "reset failed"); - mutex_exit(&krdc->bmapmutex); -#ifdef DEBUG - cmn_err(CE_NOTE, "!SNDR: unable to reset bitmap for %s:%s", - urdc->secondary.intf, urdc->secondary.file); -#endif - return (-1); -} - - -/* - * General bitmap operations - */ - -/* - * rdc_set_bitmap_many() - * - * Used during reverse syncs to a 1-to-many primary to keep the 'many' - * bitmaps up to date. - */ -void -rdc_set_bitmap_many(rdc_k_info_t *krdc, nsc_off_t pos, nsc_size_t len) -{ - uint_t dummy; - -#ifdef DEBUG - rdc_u_info_t *urdc = &rdc_u_info[krdc->index]; - if (!(rdc_get_vflags(urdc) & RDC_PRIMARY)) { - cmn_err(CE_PANIC, "rdc_set_bitmap_many: not primary, urdc %p", - (void *) urdc); - } -#endif - - if (IS_MANY(krdc)) { - rdc_k_info_t *krd; - rdc_u_info_t *urd; - - rdc_many_enter(krdc); - - for (krd = krdc->many_next; krd != krdc; krd = krd->many_next) { - urd = &rdc_u_info[krd->index]; - if (!IS_ENABLED(urd)) - continue; - ASSERT(urd->flags & RDC_PRIMARY); - (void) RDC_SET_BITMAP(krd, pos, len, &dummy); - } - - rdc_many_exit(krdc); - } -} - - -static int -_rdc_net_bmap(const struct bmap6 *b6, net_bdata6 *bd6) -{ - rdc_k_info_t *krdc = &rdc_k_info[b6->cd]; - struct timeval t; - int e, ret; - uint64_t left; - uint64_t bmap_blksize; - - bmap_blksize = krdc->rpc_version < RDC_VERSION7 ? - BMAP_BLKSIZE : BMAP_BLKSIZEV7; - - t.tv_sec = rdc_rpc_tmout; - t.tv_usec = 0; - - if (bd6->data.data_val == NULL) { - return (EINVAL); - } - - left = b6->size; - bd6->endoblk = 0; - while (left) { - if (left >= bmap_blksize) - bd6->size = (int)bmap_blksize; - else - bd6->size = (int)left; - - bd6->data.data_len = bd6->size; - - if ((uint64_t)bd6->size > left) { - left = 0; - } else { - left -= bd6->size; - } - /* - * mark the last block sent. - */ - if (left == 0) { - bd6->endoblk = 1; - } - ASSERT(krdc->rpc_version); - if (krdc->rpc_version <= RDC_VERSION5) { - struct net_bdata bd; - bd.cd = bd6->cd; - bd.offset = bd6->offset; - bd.size = bd6->size; - bd.data.data_len = bd6->data.data_len; - bd.data.data_val = bd6->data.data_val; - e = rdc_clnt_call(krdc->lsrv, RDCPROC_BDATA, - krdc->rpc_version, xdr_net_bdata, (char *)&bd, - xdr_int, (char *)&ret, &t); - } else { - e = rdc_clnt_call(krdc->lsrv, RDCPROC_BDATA6, - krdc->rpc_version, xdr_net_bdata6, (char *)bd6, - xdr_int, (char *)&ret, &t); - } - if (e || ret) { - if (e) - ret = e; - return (ret); - } - bd6->offset += bmap_blksize; - bd6->data.data_val += bmap_blksize; - } - return (0); -} - - -/* - * Standard bitmap operations (combined kmem/disk bitmaps). - */ - -/* - * rdc_std_set_bitmask(pos, len, &bitmask) - * set a bitmask for this range. used to clear the correct - * bits after flushing - */ -static void -rdc_std_set_bitmask(const nsc_off_t fba_pos, const nsc_size_t fba_len, - uint_t *bitmask) -{ - int first, st, en; - if (bitmask) - *bitmask = 0; - else - return; - - first = st = FBA_TO_LOG_NUM(fba_pos); - en = FBA_TO_LOG_NUM(fba_pos + fba_len - 1); - while (st <= en) { - BMAP_BIT_SET((uchar_t *)bitmask, st - first); - st++; - } - -} -/* - * rdc_std_set_bitmap(krdc, fba_pos, fba_len, &bitmask) - * - * Mark modified segments in the dual copy file bitmap - * to provide fast recovery - * Note that bitmask allows for 32 segments, which at 32k per segment equals - * 1 megabyte. If we ever allow more than this to be transferred in one - * operation, or decrease the segment size, then this code will have to be - * changed accordingly. - */ - -static int -rdc_std_set_bitmap(rdc_k_info_t *krdc, const nsc_off_t fba_pos, - const nsc_size_t fba_len, uint_t *bitmask) -{ - int first, st, en; - int fbaset = 0; - nsc_off_t fba = 0; - int printerr = 10; - int tries = RDC_FUTILE_ATTEMPTS; - int queuing = RDC_QUEUING; - rdc_u_info_t *urdc; - - if (bitmask) - *bitmask = 0; - else - return (-1); - - urdc = &rdc_u_info[krdc->index]; - if (rdc_get_vflags(urdc) & RDC_BMP_FAILED) - return (-1); - - if (krdc->bitmap_write == 0) { - if (rdc_write_bitmap_fill(krdc) < 0) - return (-1); - krdc->bitmap_write = -1; - } - first = st = FBA_TO_LOG_NUM(fba_pos); - en = FBA_TO_LOG_NUM(fba_pos + fba_len - 1); - ASSERT(st <= en); - while (st <= en) { - int use_ref; -again: - mutex_enter(&krdc->bmapmutex); - - if (krdc->dcio_bitmap == NULL) { -#ifdef DEBUG - cmn_err(CE_WARN, "!rdc_std_set_bitmap: " - "recovery bitmaps not allocated"); -#endif - mutex_exit(&krdc->bmapmutex); - return (-1); - } - - use_ref = IS_PRIMARY(urdc) && IS_ASYNC(urdc) && - ((rdc_get_vflags(urdc) & RDC_QUEUING) || - !(rdc_get_vflags(urdc) & RDC_LOGGING)); - - - if (!BMAP_BIT_ISSET(krdc->dcio_bitmap, st)) { - BMAP_BIT_SET(krdc->dcio_bitmap, st); - if (use_ref) { - ASSERT(BMAP_REF_ISSET(krdc, st) == - 0); - BMAP_REF_FORCE(krdc, st, 1); - } - BMAP_BIT_SET((uchar_t *)bitmask, st - first); - urdc->bits_set++; - if ((!fbaset) || fba != BIT_TO_FBA(st)) { - if (fbaset && krdc->bitmap_write > 0) { - mutex_exit(&krdc->bmapmutex); - if (rdc_write_bitmap_fba(krdc, fba) < 0) - return (-1); - mutex_enter(&krdc->bmapmutex); - } - fba = BIT_TO_FBA(st); - fbaset = 1; - } - } else { - /* - * Just bump reference count - * For logging or syncing we do not care what the reference - * is as it will be forced back on the state transition. - */ - if (use_ref) { - if (BMAP_REF_ISSET(krdc, st) == - BMAP_REF_MAXVAL(krdc)) { - /* - * Rollover of reference count. - */ - - if (!(rdc_get_vflags(urdc) & - RDC_VOL_FAILED)) { - /* - * Impose throttle to help dump - * queue - */ - mutex_exit(&krdc->bmapmutex); - delay(4); - rdc_bitmap_delay++; - if (printerr--) { - cmn_err(CE_WARN, "!SNDR: bitmap reference count maxed out for %s:%s", - urdc->secondary.intf, urdc->secondary.file); - - } - - if ((tries-- <= 0) && - IS_STATE(urdc, queuing)) { - cmn_err(CE_WARN, "!SNDR: giving up on reference count, logging set" - " %s:%s", urdc->secondary.intf, urdc->secondary.file); - rdc_group_enter(krdc); - rdc_group_log(krdc, - RDC_NOFLUSH | - RDC_NOREMOTE| - RDC_FORCE_GROUP, - "ref count retry limit exceeded"); - rdc_group_exit(krdc); - } - goto again; - } - } else { - BMAP_REF_SET(krdc, st); - } - } - } - mutex_exit(&krdc->bmapmutex); - st++; - } - if (fbaset && krdc->bitmap_write > 0) { - if (rdc_write_bitmap_fba(krdc, fba) < 0) - return (-1); - } - return (0); -} - -static void -rdc_std_clr_bitmap(rdc_k_info_t *krdc, const nsc_off_t fba_pos, - const nsc_size_t fba_len, const uint_t bitmask, const int force) -{ - int first, st, en; - nsc_off_t fba = 0; - int fbaset = 0; - uint_t bm = bitmask; - uchar_t *ptr = (uchar_t *)&bm; - rdc_u_info_t *urdc = &rdc_u_info[krdc->index]; - - if (rdc_get_vflags(urdc) & RDC_BMP_FAILED) - return; - - first = st = FBA_TO_LOG_NUM(fba_pos); - en = FBA_TO_LOG_NUM(fba_pos + fba_len - 1); - ASSERT(st <= en); - while (st <= en) { - mutex_enter(&krdc->bmapmutex); - - if (krdc->dcio_bitmap == NULL) { -#ifdef DEBUG - cmn_err(CE_WARN, "!rdc_std_clr_bitmap: " - "recovery bitmaps not allocated"); -#endif - mutex_exit(&krdc->bmapmutex); - return; - } - - if (((bitmask == 0xffffffff) || - (BMAP_BIT_ISSET(ptr, st - first))) && - BMAP_BIT_ISSET(krdc->dcio_bitmap, st)) { - - int use_ref = IS_PRIMARY(urdc) && IS_ASYNC(urdc) && - ((rdc_get_vflags(urdc) & RDC_QUEUING) || - !(rdc_get_vflags(urdc) & RDC_LOGGING)); - - if (force || (use_ref == 0)) { - if (krdc->bitmap_ref) - BMAP_REF_FORCE(krdc, st, 0); - } else if (use_ref) { - if (BMAP_REF_ISSET(krdc, st) != 0) - BMAP_REF_CLR(krdc, st); - - } - - if ((use_ref == 0) || (use_ref && - !BMAP_REF_ISSET(krdc, st))) { - BMAP_BIT_CLR(krdc->dcio_bitmap, st); - - urdc->bits_set--; - if (!fbaset || fba != BIT_TO_FBA(st)) { - if (fbaset && - krdc->bitmap_write > 0) { - mutex_exit(&krdc->bmapmutex); - if (rdc_write_bitmap_fba(krdc, - fba) < 0) - return; - mutex_enter(&krdc->bmapmutex); - } - fba = BIT_TO_FBA(st); - fbaset = 1; - } - } - } - mutex_exit(&krdc->bmapmutex); - st++; - } - if (fbaset && krdc->bitmap_write > 0) { - if (rdc_write_bitmap_fba(krdc, fba) < 0) - return; - } -} - -/* - * make sure that this bit is set. if it isn't, set it - * used when transitioning from async to sync while going - * from rep to log. an overlapping sync write may unconditionally - * clear the bit that has not been replicated. when the queue - * is being dumped or this is called just to make sure pending stuff - * is in the bitmap - */ -void -rdc_std_check_bit(rdc_k_info_t *krdc, nsc_off_t pos, nsc_size_t len) -{ - int st; - int en; - nsc_off_t fba; - rdc_u_info_t *urdc = &rdc_u_info[krdc->index]; - st = FBA_TO_LOG_NUM(pos); - en = FBA_TO_LOG_NUM(pos + len - 1); - - if (rdc_get_vflags(urdc) & RDC_BMP_FAILED) - return; - - while (st <= en) { - mutex_enter(&krdc->bmapmutex); - - if (krdc->dcio_bitmap == NULL) { -#ifdef DEBUG - cmn_err(CE_WARN, "!rdc_std_check_bit: " - "recovery bitmaps not allocated"); -#endif - mutex_exit(&krdc->bmapmutex); - return; - } - - if (!BMAP_BIT_ISSET(krdc->dcio_bitmap, st)) { - BMAP_BIT_SET(krdc->dcio_bitmap, st); - if (krdc->bitmap_write > 0) { - fba = BIT_TO_FBA(st); - mutex_exit(&krdc->bmapmutex); - (void) rdc_write_bitmap_fba(krdc, fba); - mutex_enter(&krdc->bmapmutex); - } - urdc->bits_set++; - - } - mutex_exit(&krdc->bmapmutex); - st++; - } - -} - -/* - * rdc_std_count_dirty(krdc): - * - * Determine the number of segments that need to be flushed, This should - * agree with the number of segments logged, but since we don't lock when - * we increment, we force these values to agree - */ -static int -rdc_std_count_dirty(rdc_k_info_t *krdc) -{ - rdc_u_info_t *urdc = &rdc_u_info[krdc->index]; - int i, count, size; - - if (krdc->dcio_bitmap == NULL) { -#ifdef DEBUG - cmn_err(CE_WARN, - "!rdc_std_count_dirty: no bitmap configured for %s", - urdc->primary.file); -#endif - return (0); - } - - count = 0; - ASSERT(urdc->volume_size != 0); - size = FBA_TO_LOG_LEN(urdc->volume_size); - for (i = 0; i < size; i++) - if (BMAP_BIT_ISSET(krdc->dcio_bitmap, i)) - count++; - - if (count > size) - count = size; - - return (count); -} - - -static int -rdc_std_bit_isset(rdc_k_info_t *krdc, const int bit) -{ - return (BMAP_BIT_ISSET(krdc->dcio_bitmap, bit)); -} - - -/* - * rdc_std_fill_bitmap(krdc, write) - * - * Called to force bitmaps to a fully dirty state - */ -static int -rdc_std_fill_bitmap(rdc_k_info_t *krdc, const int write) -{ - rdc_u_info_t *urdc = &rdc_u_info[krdc->index]; - int i, size; - - if (krdc->dcio_bitmap == NULL) { -#ifdef DEBUG - cmn_err(CE_WARN, - "!rdc_std_fill_bitmap: no bitmap configured for %s", - urdc->primary.file); -#endif - return (-1); - } - - ASSERT(urdc->volume_size != 0); - size = FBA_TO_LOG_LEN(urdc->volume_size); - for (i = 0; i < size; i++) - BMAP_BIT_SET(krdc->dcio_bitmap, i); - - urdc->bits_set = size; - - if (write) - return (rdc_write_bitmap(krdc)); - - return (0); -} - - -/* - * rdc_std_zero_bitmap(krdc) - * - * Called on the secondary after a sync has completed to force bitmaps - * to a fully clean state - */ -static void -rdc_std_zero_bitmap(rdc_k_info_t *krdc) -{ - rdc_u_info_t *urdc = &rdc_u_info[krdc->index]; - int i, size; - - if (krdc->dcio_bitmap == NULL) { -#ifdef DEBUG - cmn_err(CE_WARN, - "!rdc_std_zero_bitmap: no bitmap configured for %s", - urdc->primary.file); -#endif - return; - } -#ifdef DEBUG - cmn_err(CE_NOTE, "!Clearing bitmap for %s", urdc->secondary.file); -#endif - - ASSERT(urdc->volume_size != 0); - size = FBA_TO_LOG_LEN(urdc->volume_size); - for (i = 0; i < size; i++) - BMAP_BIT_CLR(krdc->dcio_bitmap, i); - if (krdc->bitmap_write > 0) - (void) rdc_write_bitmap(krdc); - - urdc->bits_set = 0; -} - - -/* - * rdc_std_net_bmap() - * - * WARNING acts as both client and server - */ -static int -rdc_std_net_bmap(const struct bmap6 *b) -{ - rdc_k_info_t *krdc = &rdc_k_info[b->cd]; - struct net_bdata6 bd; - - bd.data.data_val = (char *)krdc->dcio_bitmap; - bd.cd = b->dual; - bd.offset = 0; - - return (_rdc_net_bmap(b, &bd)); -} - - -/* - * rdc_std_net_bdata - */ -static int -rdc_std_net_bdata(const struct net_bdata6 *bd) -{ - rdc_k_info_t *krdc = &rdc_k_info[bd->cd]; - - rdc_lor((uchar_t *)bd->data.data_val, - (uchar_t *)(((char *)krdc->dcio_bitmap) + bd->offset), bd->size); - - return (0); -} - - -static struct rdc_bitmap_ops rdc_std_bitmap_ops = { - rdc_std_set_bitmap, - rdc_std_clr_bitmap, - rdc_std_count_dirty, - rdc_std_bit_isset, - rdc_std_fill_bitmap, - rdc_std_zero_bitmap, - rdc_std_net_bmap, - rdc_std_net_bdata, - rdc_std_zero_bitref, - rdc_std_set_bitmask, - rdc_std_check_bit -}; - - -void -rdc_bitmap_init() -{ - rdc_bitmap_ops = &rdc_std_bitmap_ops; - rdc_wrflag = NSC_WRITE; -} - -static void -rdc_bmap_ref_byte_set(rdc_k_info_t *krdc, int ind) -{ - unsigned char *bmap = (unsigned char *)krdc->bitmap_ref; - - ASSERT(BMAP_REF_SIZE(krdc) == sizeof (unsigned char)); - bmap[ind]++; -} - -static void -rdc_bmap_ref_byte_clr(rdc_k_info_t *krdc, int ind) -{ - unsigned char *bmap = (unsigned char *)krdc->bitmap_ref; - - ASSERT(BMAP_REF_SIZE(krdc) == sizeof (unsigned char)); - bmap[ind]--; -} - -static unsigned int -rdc_bmap_ref_byte_isset(rdc_k_info_t *krdc, int ind) -{ - unsigned char *bmap = (unsigned char *)krdc->bitmap_ref; - - ASSERT(BMAP_REF_SIZE(krdc) == sizeof (unsigned char)); - return ((unsigned int)(bmap[ind])); -} - -static void -rdc_bmap_ref_byte_force(rdc_k_info_t *krdc, int ind, unsigned int val) -{ - unsigned char *bmap = (unsigned char *)krdc->bitmap_ref; - - ASSERT(BMAP_REF_SIZE(krdc) == sizeof (unsigned char)); - bmap[ind] = (unsigned char) val; -} - -/* ARGSUSED */ -static unsigned int -rdc_bmap_ref_byte_maxval(rdc_k_info_t *krdc) -{ - ASSERT(BMAP_REF_SIZE(krdc) == sizeof (unsigned char)); - return ((unsigned int)(UINT8_MAX)); -} - -struct bm_ref_ops rdc_ref_byte_ops = { - rdc_bmap_ref_byte_set, - rdc_bmap_ref_byte_clr, - rdc_bmap_ref_byte_isset, - rdc_bmap_ref_byte_force, - rdc_bmap_ref_byte_maxval, - sizeof (unsigned char) -}; - -static void -rdc_bmap_ref_int_set(rdc_k_info_t *krdc, int ind) -{ - unsigned int *bmap = (unsigned int *)krdc->bitmap_ref; - - ASSERT(BMAP_REF_SIZE(krdc) == sizeof (unsigned int)); - bmap[ind]++; -} - -static void -rdc_bmap_ref_int_clr(rdc_k_info_t *krdc, int ind) -{ - unsigned int *bmap = (unsigned int *)krdc->bitmap_ref; - - ASSERT(BMAP_REF_SIZE(krdc) == sizeof (unsigned int)); - bmap[ind]--; -} - -static unsigned int -rdc_bmap_ref_int_isset(rdc_k_info_t *krdc, int ind) -{ - unsigned int *bmap = (unsigned int *)krdc->bitmap_ref; - - ASSERT(BMAP_REF_SIZE(krdc) == sizeof (unsigned int)); - return ((bmap[ind])); -} - -static void -rdc_bmap_ref_int_force(rdc_k_info_t *krdc, int ind, unsigned int val) -{ - unsigned int *bmap = (unsigned int *)krdc->bitmap_ref; - - ASSERT(BMAP_REF_SIZE(krdc) == sizeof (unsigned int)); - bmap[ind] = val; -} - -/* ARGSUSED */ -static unsigned int -rdc_bmap_ref_int_maxval(rdc_k_info_t *krdc) -{ - ASSERT(BMAP_REF_SIZE(krdc) == sizeof (unsigned int)); - return ((unsigned int)(UINT_MAX)); -} - -struct bm_ref_ops rdc_ref_int_ops = { - rdc_bmap_ref_int_set, - rdc_bmap_ref_int_clr, - rdc_bmap_ref_int_isset, - rdc_bmap_ref_int_force, - rdc_bmap_ref_int_maxval, - sizeof (unsigned int) -}; diff --git a/usr/src/uts/common/avs/ns/rdc/rdc_bitmap.h b/usr/src/uts/common/avs/ns/rdc/rdc_bitmap.h deleted file mode 100644 index 72ee2b791a..0000000000 --- a/usr/src/uts/common/avs/ns/rdc/rdc_bitmap.h +++ /dev/null @@ -1,191 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _RDC_BITMAP_H -#define _RDC_BITMAP_H - -#ifdef __cplusplus -extern "C" { -#endif - -#ifdef _KERNEL - -extern int rdc_bitmap_mode; /* property from rdc.conf */ - -/* - * Possible values of rdc_bitmap_mode - integer flag. - */ -#define RDC_BMP_AUTO 0x0 /* auto detect bitmap mode */ -#define RDC_BMP_ALWAYS 0x1 /* always write the bitmap */ -#define RDC_BMP_NEVER 0x2 /* never write the bitmap */ - -#endif /* _KERNEL */ - -/* - * Public bitmap interface - * The bitmaps are maintained on 32 Kbyte segments - */ - -#define LOG_SHFT 15 -#define IND_BYTE(ind) ((ind) >> 3) -#define IND_BIT(ind) (1 << ((ind) & 0x7)) - -#define FBA_LOG_SHFT (LOG_SHFT - FBA_SHFT) -#define FBA_TO_LOG_NUM(x) ((x) >> FBA_LOG_SHFT) -#define LOG_TO_FBA_NUM(x) ((x) << FBA_LOG_SHFT) -#define FBA_TO_LOG_LEN(x) (FBA_TO_LOG_NUM((x)-1) + 1) - -#define BMAP_LOG_BYTES(fbas) (IND_BYTE(FBA_TO_LOG_NUM((fbas)-1))+1) - -#define BITS_IN_BYTE 8 - -/* - * Private macros for bitmap manipulation - */ - -#define BMAP_BIT_SET(bmap, ind) ((bmap)[IND_BYTE(ind)] |= IND_BIT(ind)) -#define BMAP_BIT_CLR(bmap, ind) ((bmap)[IND_BYTE(ind)] &= ~IND_BIT(ind)) -#define BMAP_BIT_ISSET(bmap, ind) \ - ((bmap)[IND_BYTE(ind)] & IND_BIT(ind)) - -#define BIT_TO_FBA(b) (FBA_NUM(b) >> 3) - -#define BMAP_REF_SET(krdc, ind) (((krdc)->bm_refs->bmap_ref_set)(krdc, ind)) -#define BMAP_REF_CLR(krdc, ind) (((krdc)->bm_refs->bmap_ref_clr)(krdc, ind)) -#define BMAP_REF_ISSET(krdc, ind) (((krdc)->bm_refs->bmap_ref_isset)(krdc, ind)) -#define BMAP_REF_FORCE(krdc, ind, val) \ - (((krdc)->bm_refs->bmap_ref_force)(krdc, ind, val)) -#define BMAP_REF_MAXVAL(krdc) (((krdc)->bm_refs->bmap_ref_maxval)(krdc)) -#define BMAP_REF_SIZE(krdc) ((krdc)->bm_refs->bmap_ref_size) -#define BMAP_REF_PREF_SIZE (sizeof (unsigned int)) - -#ifndef _KERNEL - -struct bm_ref_ops { - void (*bmap_ref_set)(void *, int); - void (*bmap_ref_clr)(void *, int); - unsigned int (*bmap_ref_isset)(void *, int); - void (*bmap_ref_force)(void *, int, unsigned int); - unsigned int (*bmap_ref_maxval)(void *); - size_t bmap_ref_size; -}; - -#else - -struct bm_ref_ops { - void (*bmap_ref_set)(rdc_k_info_t *, int); - void (*bmap_ref_clr)(rdc_k_info_t *, int); - unsigned int (*bmap_ref_isset)(rdc_k_info_t *, int); - void (*bmap_ref_force)(rdc_k_info_t *, int, unsigned int); - unsigned int (*bmap_ref_maxval)(rdc_k_info_t *); - size_t bmap_ref_size; -}; - - -/* convert fba to block number */ -#define _BNUM(x) (FBA_TO_LOG_NUM(x)) - -/* force reference clear during sync */ -#define RDC_BIT_BUMP 0x0 -#define RDC_BIT_FORCE 0x1 -#define RDC_BIT_FLUSHER 0x2 - -/* check for overlap, taking account of blocking factor */ -#define RDC_OVERLAP(p1, l1, p2, l2) \ - ((_BNUM(((p1) + (l1) - 1)) >= _BNUM((p2))) && \ - (_BNUM((p1)) <= _BNUM(((p2) + (l2) - 1)))) - -struct rdc_bitmap_ops { - int (*set_bitmap)(rdc_k_info_t *, const nsc_off_t, const nsc_size_t, - uint_t *); - void (*clr_bitmap)(rdc_k_info_t *, const nsc_off_t, const nsc_size_t, - const uint_t, const int); - int (*count_dirty)(rdc_k_info_t *); - int (*bit_isset)(rdc_k_info_t *, const int); - int (*fill_bitmap)(rdc_k_info_t *, const int); - void (*zero_bitmap)(rdc_k_info_t *); - int (*net_bmap)(const struct bmap6 *); - int (*net_b_data)(const struct net_bdata6 *); - void (*zero_bitref)(rdc_k_info_t *); - void (*set_bitmask)(const nsc_off_t, const nsc_size_t, uint_t *); - void (*check_bit)(rdc_k_info_t *, nsc_off_t, nsc_size_t); -}; - -extern struct rdc_bitmap_ops *rdc_bitmap_ops; - -#define RDC_SET_BITMAP(krdc, pos, len, bitmaskp) \ - (*rdc_bitmap_ops->set_bitmap)(krdc, pos, len, bitmaskp) -#define RDC_CLR_BITMAP(krdc, pos, len, bitmask, flag) \ - (*rdc_bitmap_ops->clr_bitmap)(krdc, pos, len, bitmask, flag) -#define RDC_COUNT_BITMAP(krdc) \ - (*rdc_bitmap_ops->count_dirty)(krdc) -#define RDC_BIT_ISSET(krdc, bit) \ - (*rdc_bitmap_ops->bit_isset)(krdc, bit) -#define RDC_FILL_BITMAP(krdc, write) \ - (*rdc_bitmap_ops->fill_bitmap)(krdc, write) -#define RDC_ZERO_BITMAP(krdc) \ - (*rdc_bitmap_ops->zero_bitmap)(krdc) -#define RDC_SEND_BITMAP(argp) \ - (*rdc_bitmap_ops->net_bmap)(argp) -#define RDC_OR_BITMAP(argp) \ - (*rdc_bitmap_ops->net_b_data)(argp) -#define RDC_ZERO_BITREF(krdc) \ - (*rdc_bitmap_ops->zero_bitref)(krdc) -#define RDC_SET_BITMASK(off, len, maskp) \ - (*rdc_bitmap_ops->set_bitmask)(off, len, maskp) -#define RDC_CHECK_BIT(krdc, pos, len) \ - (*rdc_bitmap_ops->check_bit)(krdc, pos, len) - -/* - * Functions - */ - -extern void rdc_bitmap_init(void); -extern int rdc_move_bitmap(rdc_k_info_t *, char *); -extern int rdc_enable_bitmap(rdc_k_info_t *, int); -extern int rdc_resume_bitmap(rdc_k_info_t *); -extern int rdc_reset_bitmap(rdc_k_info_t *); -extern void rdc_free_bitmap(rdc_k_info_t *, int); -extern void rdc_close_bitmap(rdc_k_info_t *); -extern int rdc_write_bitmap(rdc_k_info_t *); -extern int rdc_write_bitmap_fill(rdc_k_info_t *); -extern void rdc_set_bitmap_many(rdc_k_info_t *, nsc_off_t, nsc_size_t); -extern void rdc_merge_bitmaps(rdc_k_info_t *, rdc_k_info_t *); - -extern int rdc_read_state(rdc_k_info_t *, int *, int *); -extern int rdc_clear_state(rdc_k_info_t *); -extern void rdc_write_state(rdc_u_info_t *); -extern int rdc_ns_io(nsc_fd_t *, int, nsc_off_t, uchar_t *, nsc_size_t); -extern int rdc_read_refcount(rdc_k_info_t *); -extern int rdc_write_refcount(rdc_k_info_t *); -extern size_t rdc_refcntsize(rdc_k_info_t *); - -#endif /* _KERNEL */ - -#ifdef __cplusplus -} -#endif - -#endif /* _RDC_BITMAP_H */ diff --git a/usr/src/uts/common/avs/ns/rdc/rdc_clnt.c b/usr/src/uts/common/avs/ns/rdc/rdc_clnt.c deleted file mode 100644 index 971cb09ec0..0000000000 --- a/usr/src/uts/common/avs/ns/rdc/rdc_clnt.c +++ /dev/null @@ -1,3381 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -/* Network data replicator Client side */ - - -#include <sys/types.h> -#include <sys/debug.h> -#include <sys/ksynch.h> -#include <sys/cmn_err.h> -#include <sys/kmem.h> -#include <sys/cred.h> -#include <sys/byteorder.h> -#include <sys/errno.h> - -#ifdef _SunOS_2_6 -/* - * on 2.6 both dki_lock.h and rpc/types.h define bool_t so we - * define enum_t here as it is all we need from rpc/types.h - * anyway and make it look like we included it. Yuck. - */ -#define _RPC_TYPES_H -typedef int enum_t; -#else -#ifndef DS_DDICT -#include <rpc/types.h> -#endif -#endif /* _SunOS_2_6 */ - -#ifndef DS_DDICT -#include <rpc/auth.h> -#include <rpc/svc.h> -#include <rpc/xdr.h> -#endif -#include <sys/ddi.h> - -#include <sys/nsc_thread.h> -#ifdef DS_DDICT -#include <sys/nsctl/contract.h> -#endif -#include <sys/nsctl/nsctl.h> - -#include <sys/sdt.h> /* dtrace is S10 or later */ - -#include "rdc_io.h" -#include "rdc_clnt.h" -#include "rdc_bitmap.h" -#include "rdc_diskq.h" - - -kmutex_t rdc_clnt_lock; - -#ifdef DEBUG -int noflush = 0; -#endif - -int rdc_rpc_tmout = RDC_CLNT_TMOUT; -static void rdc_clnt_free(struct chtab *, CLIENT *); -static void _rdc_remote_flush(rdc_aio_t *); - -void rdc_flush_memq(int index); -void rdc_flush_diskq(int index); -int rdc_drain_net_queue(int index); -void rdc_flusher_thread(int index); -int rdc_diskq_enqueue(rdc_k_info_t *krdc, rdc_aio_t *); -void rdc_init_diskq_header(rdc_group_t *grp, dqheader *hd); -void rdc_dump_iohdrs(disk_queue *dq); -rdc_aio_t *rdc_dequeue(rdc_k_info_t *krdc, int *rc); -void rdc_clr_iohdr(rdc_k_info_t *krdc, nsc_off_t qpos); -void rdc_close_diskq(rdc_group_t *krdc); - -int rdc_writer(int index); - -static struct chtab *rdc_chtable = NULL; -static int rdc_clnt_toomany; -#ifdef DEBUG -static int rdc_ooreply; -#endif - -extern void rdc_fail_diskq(rdc_k_info_t *krdc, int wait, int flag); -extern int _rdc_rsrv_diskq(rdc_group_t *group); -extern void _rdc_rlse_diskq(rdc_group_t *group); - -static enum clnt_stat -cl_call_sig(struct __client *rh, rpcproc_t proc, - xdrproc_t xargs, caddr_t argsp, xdrproc_t xres, - caddr_t resp, struct timeval secs) -{ - enum clnt_stat stat; - k_sigset_t smask; - sigintr(&smask, 0); - rh->cl_nosignal = TRUE; - stat = ((*(rh)->cl_ops->cl_call)\ - (rh, proc, xargs, argsp, xres, resp, secs)); - rh->cl_nosignal = FALSE; - sigunintr(&smask); - return (stat); -} - -int -rdc_net_getsize(int index, uint64_t *sizeptr) -{ - struct timeval t; - int err, size; - rdc_k_info_t *krdc = &rdc_k_info[index]; - int remote_index = krdc->remote_index; - - *sizeptr = 0; - if (krdc->remote_index < 0) - return (EINVAL); - - t.tv_sec = rdc_rpc_tmout; - t.tv_usec = 0; - -#ifdef DEBUG - if (krdc->intf == NULL) - cmn_err(CE_WARN, - "!rdc_net_getsize: null intf for index %d", index); -#endif - if (krdc->rpc_version <= RDC_VERSION5) { - err = rdc_clnt_call(krdc->lsrv, RDCPROC_GETSIZE, - krdc->rpc_version, xdr_int, (char *)&remote_index, - xdr_int, (char *)&size, &t); - if (err == 0) - *sizeptr = size; - } else { - err = rdc_clnt_call(krdc->lsrv, RDCPROC_GETSIZE6, - krdc->rpc_version, xdr_int, (char *)&remote_index, - xdr_u_longlong_t, (char *)sizeptr, &t); - } - return (err); -} - - -int -rdc_net_state(int index, int options) -{ - struct timeval t; - int err; - int remote_index = -1; - rdc_u_info_t *urdc = &rdc_u_info[index]; - rdc_k_info_t *krdc = &rdc_k_info[index]; - struct set_state s; - struct set_state4 s4; - char neta[32], rneta[32]; - unsigned short *sp; - - t.tv_sec = rdc_rpc_tmout; - t.tv_usec = 0; - - if (krdc->rpc_version < RDC_VERSION7) { - s4.netaddrlen = urdc->primary.addr.len; - s4.rnetaddrlen = urdc->secondary.addr.len; - bcopy(urdc->primary.addr.buf, s4.netaddr, s4.netaddrlen); - bcopy(urdc->secondary.addr.buf, s4.rnetaddr, s4.rnetaddrlen); - (void) strncpy(s4.pfile, urdc->primary.file, RDC_MAXNAMLEN); - (void) strncpy(s4.sfile, urdc->secondary.file, RDC_MAXNAMLEN); - s4.flag = options; - - err = rdc_clnt_call(krdc->lsrv, RDCPROC_STATE, - krdc->rpc_version, xdr_set_state4, (char *)&s4, xdr_int, - (char *)&remote_index, &t); - } else { - s.netaddrlen = urdc->primary.addr.len; - s.rnetaddrlen = urdc->secondary.addr.len; - s.netaddr.buf = neta; - s.rnetaddr.buf = rneta; - bcopy(urdc->primary.addr.buf, s.netaddr.buf, s.netaddrlen); - bcopy(urdc->secondary.addr.buf, s.rnetaddr.buf, s.rnetaddrlen); - s.netaddr.len = urdc->primary.addr.len; - s.rnetaddr.len = urdc->secondary.addr.len; - s.netaddr.maxlen = urdc->primary.addr.len; - s.rnetaddr.maxlen = urdc->secondary.addr.len; - sp = (unsigned short *)s.netaddr.buf; - *sp = htons(*sp); - sp = (unsigned short *)s.rnetaddr.buf; - *sp = htons(*sp); - s.pfile = urdc->primary.file; - s.sfile = urdc->secondary.file; - s.flag = options; - - err = rdc_clnt_call(krdc->lsrv, RDCPROC_STATE, - krdc->rpc_version, xdr_set_state, (char *)&s, xdr_int, - (char *)&remote_index, &t); - } - - if (err) - return (-1); - else - return (remote_index); -} - - -/* - * rdc_net_getbmap - * gets the bitmaps from remote side and or's them with remote bitmap - */ -int -rdc_net_getbmap(int index, int size) -{ - struct timeval t; - int err; - struct bmap b; - struct bmap6 b6; - rdc_k_info_t *krdc; - - krdc = &rdc_k_info[index]; - - if (krdc->remote_index < 0) - return (EINVAL); - - t.tv_sec = rdc_rpc_tmout; - t.tv_usec = 0; -#ifdef DEBUG - if (krdc->intf == NULL) - cmn_err(CE_WARN, - "!rdc_net_getbmap: null intf for index %d", index); -#endif - - if (krdc->rpc_version <= RDC_VERSION5) { - b.cd = krdc->remote_index; - b.dual = index; - b.size = size; - err = rdc_clnt_call(krdc->lsrv, RDCPROC_BMAP, - krdc->rpc_version, xdr_bmap, (char *)&b, xdr_int, - (char *)&err, &t); - - } else { - b6.cd = krdc->remote_index; - b6.dual = index; - b6.size = size; - err = rdc_clnt_call(krdc->lsrv, RDCPROC_BMAP6, - krdc->rpc_version, xdr_bmap6, (char *)&b6, xdr_int, - (char *)&err, &t); - } - return (err); -} - -int sndr_proto = 0; - -/* - * return state corresponding to rdc_host - */ -int -rdc_net_getstate(rdc_k_info_t *krdc, int *serial_mode, int *use_mirror, - int *mirror_down, int network) -{ - int err; - struct timeval t; - int state; - rdc_u_info_t *urdc = &rdc_u_info[krdc->index]; - struct set_state s; -#ifdef sparc - struct set_state4 s4; -#endif - char neta[32]; - char rneta[32]; - unsigned short *sp; - char *setp = (char *)&s; - xdrproc_t xdr_proc = xdr_set_state; - - if (krdc->lsrv && (krdc->intf == NULL || krdc->intf->if_down) && - network) /* fail fast */ - return (-1); - - s.netaddrlen = urdc->primary.addr.len; - s.rnetaddrlen = urdc->secondary.addr.len; - s.pfile = urdc->primary.file; - s.sfile = urdc->secondary.file; - s.netaddr.buf = neta; - s.rnetaddr.buf = rneta; - bcopy(urdc->primary.addr.buf, s.netaddr.buf, s.netaddrlen); - bcopy(urdc->secondary.addr.buf, s.rnetaddr.buf, s.rnetaddrlen); - sp = (unsigned short *) s.netaddr.buf; - *sp = htons(*sp); - sp = (unsigned short *) s.rnetaddr.buf; - *sp = htons(*sp); - s.netaddr.len = urdc->primary.addr.len; - s.rnetaddr.len = urdc->secondary.addr.len; - s.netaddr.maxlen = urdc->primary.addr.maxlen; - s.rnetaddr.maxlen = urdc->secondary.addr.maxlen; - s.flag = 0; - - t.tv_sec = rdc_rpc_tmout; - t.tv_usec = 0; - - if (sndr_proto) - krdc->rpc_version = sndr_proto; - else - krdc->rpc_version = RDC_VERS_MAX; - -again: - err = rdc_clnt_call(krdc->lsrv, RDCPROC_GETSTATE4, krdc->rpc_version, - xdr_proc, setp, xdr_int, (char *)&state, &t); - - if (err == RPC_PROGVERSMISMATCH && (krdc->rpc_version != - RDC_VERS_MIN)) { - if (krdc->rpc_version-- == RDC_VERSION7) { - /* set_state struct changed with v7 of protocol */ -#ifdef sparc - s4.netaddrlen = urdc->primary.addr.len; - s4.rnetaddrlen = urdc->secondary.addr.len; - bcopy(urdc->primary.addr.buf, s4.netaddr, - s4.netaddrlen); - bcopy(urdc->secondary.addr.buf, s4.rnetaddr, - s4.rnetaddrlen); - (void) strncpy(s4.pfile, urdc->primary.file, - RDC_MAXNAMLEN); - (void) strncpy(s4.sfile, urdc->secondary.file, - RDC_MAXNAMLEN); - s4.flag = 0; - xdr_proc = xdr_set_state4; - setp = (char *)&s4; -#else - /* x64 can not use protocols < 7 */ - return (-1); -#endif - } - goto again; - } -#ifdef DEBUG - cmn_err(CE_NOTE, "!sndr get_state: Protocol ver %d", krdc->rpc_version); -#endif - - if (err) { - return (-1); - } - - if (state == -1) - return (-1); - - if (serial_mode) - *serial_mode = (state >> 2) & 1; - if (use_mirror) - *use_mirror = (state >> 1) & 1; - if (mirror_down) - *mirror_down = state & 1; - - return (0); -} - - -static struct xdr_discrim rdres_discrim[2] = { - { (int)RDC_OK, xdr_readok }, - { __dontcare__, NULL_xdrproc_t } -}; - - -/* - * Reply from remote read (client side) - */ -static bool_t -xdr_rdresult(XDR *xdrs, readres *rr) -{ - - return (xdr_union(xdrs, (enum_t *)&(rr->rr_status), - (caddr_t)&(rr->rr_ok), rdres_discrim, xdr_void)); -} - -static int -rdc_rrstatus_decode(int status) -{ - int ret = 0; - - if (status != RDC_OK) { - switch (status) { - case RDCERR_NOENT: - ret = ENOENT; - break; - case RDCERR_NOMEM: - ret = ENOMEM; - break; - default: - ret = EIO; - break; - } - } - - return (ret); -} - - -int -rdc_net_read(int local_index, int remote_index, nsc_buf_t *handle, - nsc_off_t fba_pos, nsc_size_t fba_len) -{ - struct rdcrdresult rr; - rdc_k_info_t *krdc; - rdc_u_info_t *urdc; - struct rread list; - struct rread6 list6; - struct timeval t; - uchar_t *sv_addr; - nsc_vec_t *vec; - int rpc_flag; - nsc_size_t sv_len; - int err; - int ret; - nsc_size_t len; - nsc_size_t maxfbas; - int transflag; - - if (handle == NULL) - return (EINVAL); - - if (!RDC_HANDLE_LIMITS(handle, fba_pos, fba_len)) { -#ifdef DEBUG - cmn_err(CE_NOTE, "!rdc_net_read: handle bounds"); -#endif - return (EINVAL); - } - - krdc = &rdc_k_info[local_index]; - urdc = &rdc_u_info[local_index]; - - maxfbas = MAX_RDC_FBAS; - - if (krdc->remote_fd && !(rdc_get_vflags(urdc) & RDC_FCAL_FAILED)) { - nsc_buf_t *remote_h = NULL; - int reserved = 0; - - ret = nsc_reserve(krdc->remote_fd, NSC_MULTI); - if (RDC_SUCCESS(ret)) { - reserved = 1; - ret = nsc_alloc_buf(krdc->remote_fd, fba_pos, fba_len, - NSC_RDBUF, &remote_h); - } - if (RDC_SUCCESS(ret)) { - ret = nsc_copy(remote_h, handle, fba_pos, fba_pos, - fba_len); - if (RDC_SUCCESS(ret)) { - (void) nsc_free_buf(remote_h); - nsc_release(krdc->remote_fd); - return (0); - } - } - rdc_group_enter(krdc); - rdc_set_flags(urdc, RDC_FCAL_FAILED); - rdc_group_exit(krdc); - if (remote_h) - (void) nsc_free_buf(remote_h); - if (reserved) - nsc_release(krdc->remote_fd); - } - - t.tv_sec = rdc_rpc_tmout; - t.tv_usec = 0; - - if (rdc_get_vflags(urdc) & RDC_VOL_FAILED) - rpc_flag = RDC_RREAD_FAIL; - else - rpc_flag = 0; - -#ifdef DEBUG - if (krdc->intf == NULL) - cmn_err(CE_WARN, - "!rdc_net_read: null intf for index %d", local_index); -#endif - /* - * switch on proto version. - */ - len = fba_len; /* length (FBAs) still to xfer */ - rr.rr_bufsize = 0; /* rpc data buffer length (bytes) */ - rr.rr_data = NULL; /* rpc data buffer */ - transflag = rpc_flag | RDC_RREAD_START; /* setup rpc */ - if (krdc->rpc_version <= RDC_VERSION5) { - ASSERT(fba_pos <= INT32_MAX); - list.pos = (int)fba_pos; /* fba position of start of chunk */ - list.cd = remote_index; /* remote end cd */ - /* send setup rpc */ - list.flag = transflag; - ASSERT(len <= INT32_MAX); - list.len = (int)len; /* total fba length */ - err = rdc_clnt_call(krdc->lsrv, RDCPROC_READ5, - krdc->rpc_version, xdr_rread, (char *)&list, xdr_int, - (char *)&ret, &t); - - } else { - list6.pos = fba_pos; /* fba position of start of chunk */ - list6.cd = remote_index; /* remote end cd */ - /* send setup rpc */ - list6.flag = transflag; /* setup rpc */ - ASSERT(len <= INT32_MAX); - list6.len = (int)len; /* total fba length */ - err = rdc_clnt_call(krdc->lsrv, RDCPROC_READ6, - krdc->rpc_version, xdr_rread6, (char *)&list6, xdr_int, - (char *)&ret, &t); - } - - if (err) { -#ifdef DEBUG - cmn_err(CE_NOTE, "!rdc_net_read: setup err %d", err); -#endif - if (err == RPC_INTR) - ret = EINTR; - else - ret = ENOLINK; - - goto remote_rerror; - } - - if (ret == 0) { /* No valid index from r_net_read */ -#ifdef DEBUG - cmn_err(CE_NOTE, - "!rdc_net_read: no valid index from r_net_read"); -#endif - return (ENOBUFS); - } - transflag = rpc_flag | RDC_RREAD_DATA; - if (krdc->rpc_version <= RDC_VERSION5) { - list.idx = ret; /* save idx to return to server */ - list.flag = transflag; - /* move onto to data xfer rpcs */ - } else { - list6.idx = ret; /* save idx to return to server */ - list6.flag = transflag; - } - - /* find starting position in handle */ - - vec = handle->sb_vec; - - fba_pos -= handle->sb_pos; - - for (; fba_pos >= FBA_NUM(vec->sv_len); vec++) - fba_pos -= FBA_NUM(vec->sv_len); - - sv_addr = vec->sv_addr + FBA_SIZE(fba_pos); /* data in vector */ - sv_len = vec->sv_len - FBA_SIZE(fba_pos); /* bytes in vector */ - - while (len) { - nsc_size_t translen; - if (len > maxfbas) { - translen = maxfbas; - } else { - translen = len; - } - - if (FBA_SIZE(translen) > sv_len) { - translen = FBA_NUM(sv_len); - } - - len -= translen; - if (len == 0) { - /* last data xfer rpc - tell server to cleanup */ - transflag |= RDC_RREAD_END; - } - - if (!rr.rr_data || (nsc_size_t)rr.rr_bufsize != - FBA_SIZE(translen)) { - if (rr.rr_data) - kmem_free(rr.rr_data, rr.rr_bufsize); - - ASSERT(FBA_SIZE(translen) <= INT32_MAX); - rr.rr_bufsize = FBA_SIZE(translen); - rr.rr_data = kmem_alloc(rr.rr_bufsize, KM_NOSLEEP); - } - - if (!rr.rr_data) { - /* error */ -#ifdef DEBUG - cmn_err(CE_NOTE, "!rdc_net_read: kmem_alloc failed"); -#endif - return (ENOMEM); - } - - /* get data from remote end */ - -#ifdef DEBUG - if (krdc->intf == NULL) - cmn_err(CE_WARN, - "!rdc_net_read: null intf for index %d", - local_index); -#endif - if (krdc->io_kstats) { - mutex_enter(krdc->io_kstats->ks_lock); - kstat_runq_enter(KSTAT_IO_PTR(krdc->io_kstats)); - mutex_exit(krdc->io_kstats->ks_lock); - } - /*CONSTCOND*/ - ASSERT(RDC_MAXDATA <= INT32_MAX); - ASSERT(translen <= RDC_MAXDATA); - if (krdc->rpc_version <= RDC_VERSION5) { - list.len = (int)translen; - list.flag = transflag; - err = rdc_clnt_call(krdc->lsrv, RDCPROC_READ5, - krdc->rpc_version, xdr_rread, (char *)&list, - xdr_rdresult, (char *)&rr, &t); - } else { - list6.len = (int)translen; - list6.flag = transflag; - err = rdc_clnt_call(krdc->lsrv, RDCPROC_READ6, - krdc->rpc_version, xdr_rread6, (char *)&list6, - xdr_rdresult, (char *)&rr, &t); - } - - if (krdc->io_kstats) { - mutex_enter(krdc->io_kstats->ks_lock); - kstat_runq_exit(KSTAT_IO_PTR(krdc->io_kstats)); - mutex_exit(krdc->io_kstats->ks_lock); - } - - if (err) { -#ifdef DEBUG - cmn_err(CE_NOTE, "!rdc_net_read: rpc err %d", err); -#endif - if (err == RPC_INTR) { - ret = EINTR; - } else { - ret = ENOLINK; - } - - goto remote_rerror; - } - - if (rr.rr_status != RDC_OK) { - ret = rdc_rrstatus_decode(rr.rr_status); - if (!ret) - ret = EIO; - - goto remote_rerror; - } - - /* copy into handle */ - - bcopy(rr.rr_data, sv_addr, (size_t)rr.rr_bufsize); - - /* update counters */ - - sv_addr += rr.rr_bufsize; - if (krdc->rpc_version <= RDC_VERSION5) { - list.pos += translen; - } else { - list6.pos += translen; - } - if (krdc->io_kstats) { - KSTAT_IO_PTR(krdc->io_kstats)->reads++; - KSTAT_IO_PTR(krdc->io_kstats)->nread += rr.rr_bufsize; - } - ASSERT(sv_len <= INT32_MAX); - ASSERT(sv_len >= (nsc_size_t)rr.rr_bufsize); - sv_len -= rr.rr_bufsize; - - if (sv_len == 0) { - /* goto next vector */ - vec++; - sv_addr = vec->sv_addr; - sv_len = vec->sv_len; - } - } - - if (rr.rr_data) - kmem_free(rr.rr_data, rr.rr_bufsize); - - return (0); - -remote_rerror: - if (rr.rr_data) - kmem_free(rr.rr_data, rr.rr_bufsize); - - return (ret ? ret : ENOLINK); -} - -/* - * rdc_net_write - * Main remote write client side - * Handles protocol selection as well as requests for remote allocation - * and data transfer - * Does local IO for FCAL - * caller must clear bitmap on success - */ - -int -rdc_net_write(int local_index, int remote_index, nsc_buf_t *handle, - nsc_off_t fba_pos, nsc_size_t fba_len, uint_t aseq, int qpos, - netwriteres *netres) -{ - rdc_k_info_t *krdc; - rdc_u_info_t *urdc; - struct timeval t; - nsc_vec_t *vec; - int sv_len; - nsc_off_t fpos; - int err; - struct netwriteres netret; - struct netwriteres *netresptr; - struct net_data5 dlist5; - struct net_data6 dlist6; - int ret; - nsc_size_t maxfbas; - int transflag; - int translen; - int transendoblk; - char *transptr; - int vflags; - - if (handle == NULL) - return (EINVAL); - - /* if not a diskq buffer */ - if ((qpos == -1) && (!RDC_HANDLE_LIMITS(handle, fba_pos, fba_len))) { -#ifdef DEBUG - cmn_err(CE_NOTE, "!rdc_net_write: handle bounds"); -#endif - return (EINVAL); - } - - - t.tv_sec = rdc_rpc_tmout; - t.tv_usec = 0; - - krdc = &rdc_k_info[local_index]; - urdc = &rdc_u_info[local_index]; - - maxfbas = MAX_RDC_FBAS; - - /* FCAL IO */ - if (krdc->remote_fd && !(rdc_get_vflags(urdc) & RDC_FCAL_FAILED)) { - nsc_buf_t *remote_h = NULL; - int reserved = 0; - - ret = nsc_reserve(krdc->remote_fd, NSC_MULTI); - if (RDC_SUCCESS(ret)) { - reserved = 1; - ret = nsc_alloc_buf(krdc->remote_fd, fba_pos, fba_len, - NSC_WRBUF, &remote_h); - } - if (RDC_SUCCESS(ret)) { - ret = nsc_copy(handle, remote_h, fba_pos, fba_pos, - fba_len); - if (RDC_SUCCESS(ret)) - ret = nsc_write(remote_h, fba_pos, fba_len, 0); - if (RDC_SUCCESS(ret)) { - (void) nsc_free_buf(remote_h); - nsc_release(krdc->remote_fd); - return (0); - } - } - rdc_group_enter(krdc); - rdc_set_flags(urdc, RDC_FCAL_FAILED); - rdc_group_exit(krdc); - if (remote_h) - (void) nsc_free_buf(remote_h); - if (reserved) - nsc_release(krdc->remote_fd); - } - - /* - * At this point we must decide which protocol we are using and - * do the right thing - */ - netret.vecdata.vecdata_val = NULL; - netret.vecdata.vecdata_len = 0; - if (netres) { - netresptr = netres; - } else { - netresptr = &netret; - } - - vflags = rdc_get_vflags(urdc); - - if (vflags & (RDC_VOL_FAILED|RDC_BMP_FAILED)) - transflag = RDC_RWRITE_FAIL; - else - transflag = 0; - - -#ifdef DEBUG - if (krdc->intf == NULL) - cmn_err(CE_WARN, "!rdc_net_write: null intf for index %d", - local_index); -#endif - - vec = handle->sb_vec; - - /* - * find starting position in vector - */ - if ((qpos == -1) || (handle->sb_user == RDC_NULLBUFREAD)) - fpos = fba_pos - handle->sb_pos; - else - fpos = (qpos + 1) - handle->sb_pos; - - for (; fpos >= FBA_NUM(vec->sv_len); vec++) - fpos -= FBA_NUM(vec->sv_len); - sv_len = vec->sv_len - FBA_SIZE(fpos); /* bytes in vector */ - transptr = (char *)vec->sv_addr + FBA_SIZE(fpos); - - if (krdc->rpc_version <= RDC_VERSION5) { - dlist5.local_cd = local_index; - dlist5.cd = remote_index; - ASSERT(fba_len <= INT32_MAX); - ASSERT(fba_pos <= INT32_MAX); - dlist5.len = (int)fba_len; - dlist5.pos = (int)fba_pos; - dlist5.idx = -1; /* Starting index */ - dlist5.flag = transflag; - dlist5.seq = aseq; /* sequence number */ - dlist5.sfba = (int)fba_pos; /* starting fba for this xfer */ - } else { - dlist6.local_cd = local_index; - dlist6.cd = remote_index; - ASSERT(fba_len <= INT32_MAX); - dlist6.len = (int)fba_len; - dlist6.qpos = qpos; - dlist6.pos = fba_pos; - dlist6.idx = -1; /* Starting index */ - dlist6.flag = transflag; - dlist6.seq = aseq; /* sequence number */ - dlist6.sfba = fba_pos; /* starting fba for this xfer */ - } - - transendoblk = 0; - while (fba_len) { - if (!transptr) { -#ifdef DEBUG - cmn_err(CE_WARN, - "!rdc_net_write: walked off end of handle!"); -#endif - ret = EINVAL; - goto remote_error; - } - - if (fba_len > maxfbas) { - ASSERT(maxfbas <= INT32_MAX); - translen = (int)maxfbas; - } else { - ASSERT(fba_len <= INT32_MAX); - translen = (int)fba_len; - } - - if (FBA_SIZE(translen) > sv_len) { - translen = FBA_NUM(sv_len); - } - - fba_len -= translen; - if (fba_len == 0) { - /* last data xfer - tell server to commit */ - transendoblk = 1; - } - - -#ifdef DEBUG - if (krdc->intf == NULL) - cmn_err(CE_WARN, - "!rdc_net_write: null intf for index %d", - local_index); -#endif - DTRACE_PROBE(rdc_netwrite_clntcall_start); - - if (krdc->io_kstats) { - mutex_enter(krdc->io_kstats->ks_lock); - kstat_runq_enter(KSTAT_IO_PTR(krdc->io_kstats)); - mutex_exit(krdc->io_kstats->ks_lock); - } - if (krdc->rpc_version <= RDC_VERSION5) { - ret = 0; - dlist5.nfba = translen; - dlist5.endoblk = transendoblk; - dlist5.data.data_len = FBA_SIZE(translen); - dlist5.data.data_val = transptr; - err = rdc_clnt_call(krdc->lsrv, RDCPROC_WRITE5, - krdc->rpc_version, xdr_net_data5, - (char *)&dlist5, xdr_int, - (char *)&ret, &t); - if (ret >= 0) { - netresptr->result = 0; - netresptr->index = ret; - } else { - netresptr->result = ret; - } - } else { - netresptr->result = 0; - dlist6.nfba = translen; - dlist6.endoblk = transendoblk; - dlist6.data.data_len = FBA_SIZE(translen); - dlist6.data.data_val = transptr; - err = rdc_clnt_call(krdc->lsrv, RDCPROC_WRITE6, - krdc->rpc_version, xdr_net_data6, - (char *)&dlist6, xdr_netwriteres, - (char *)netresptr, &t); - } - - if (krdc->io_kstats) { - mutex_enter(krdc->io_kstats->ks_lock); - kstat_runq_exit(KSTAT_IO_PTR(krdc->io_kstats)); - mutex_exit(krdc->io_kstats->ks_lock); - } - - DTRACE_PROBE(rdc_netwrite_clntcall_end); - ret = netresptr->result; - if (err) { - if (err == RPC_INTR) - ret = EINTR; - else if (err && ret != EPROTO) - ret = ENOLINK; -#ifdef DEBUG - cmn_err(CE_NOTE, - "!rdc_net_write(5): cd %d err %d ret %d", - remote_index, err, ret); -#endif - goto remote_error; - } - /* Error from r_net_write5 */ - if (netresptr->result < 0) { -#ifdef DEBUG - cmn_err(CE_NOTE, - "!rdc_net_write: r_net_write(5) " - "returned: %d", - -netresptr->result); -#endif - ret = -netresptr->result; - if (netret.vecdata.vecdata_val) - kmem_free(netret.vecdata.vecdata_val, - netret.vecdata.vecdata_len * - sizeof (net_pendvec_t)); - goto remote_error; - } else if (netresptr->index == 0) { -#ifdef DEBUG - cmn_err(CE_NOTE, - "!rdc_net_write: no valid index from " - "r_net_write(5)"); -#endif - ret = ENOBUFS; - if (netret.vecdata.vecdata_val) - kmem_free(netret.vecdata.vecdata_val, - netret.vecdata.vecdata_len * - sizeof (net_pendvec_t)); - goto remote_error; - } - if (krdc->rpc_version <= RDC_VERSION5) { - dlist5.idx = netresptr->index; - dlist5.sfba += dlist5.nfba; - } else { - dlist6.idx = netresptr->index; - dlist6.sfba += dlist6.nfba; - } - /* update counters */ - if (krdc->io_kstats) { - KSTAT_IO_PTR(krdc->io_kstats)->writes++; - KSTAT_IO_PTR(krdc->io_kstats)->nwritten += - FBA_SIZE(translen); - } - transptr += FBA_SIZE(translen); - sv_len -= FBA_SIZE(translen); - - if (sv_len <= 0) { - /* goto next vector */ - vec++; - transptr = (char *)vec->sv_addr; - sv_len = vec->sv_len; - } - } - /* - * this can't happen..... - */ - if (netret.vecdata.vecdata_val) - kmem_free(netret.vecdata.vecdata_val, - netret.vecdata.vecdata_len * - sizeof (net_pendvec_t)); - - return (0); - -remote_error: - return (ret ? ret : ENOLINK); -} - -void -rdc_fixlen(rdc_aio_t *aio) -{ - nsc_vec_t *vecp = aio->qhandle->sb_vec; - nsc_size_t len = 0; - - while (vecp->sv_addr) { - len += FBA_NUM(vecp->sv_len); - vecp++; - } - aio->qhandle->sb_len = len; -} - -/* - * rdc_dump_alloc_bufs_cd - * Dump allocated buffers (rdc_net_hnd's) for the specified cd. - * this could be the flusher failing, if so, don't do the delay forever - * Returns: 0 (success), EAGAIN (caller needs to try again). - */ -int -rdc_dump_alloc_bufs_cd(int index) -{ - rdc_k_info_t *krdc; - rdc_aio_t *aio; - net_queue *q; - disk_queue *dq; - kmutex_t *qlock; - - krdc = &rdc_k_info[index]; - - - if (!krdc->c_fd) { - /* cannot do anything! */ -#ifdef DEBUG - cmn_err(CE_WARN, "!rdc_dump_alloc_bufs_cd(%d): c_fd NULL", - index); -#endif - return (0); - } - rdc_dump_dsets(index); - - dq = &krdc->group->diskq; - - if (RDC_IS_DISKQ(krdc->group)) { - qlock = QLOCK(dq); - (void) _rdc_rsrv_diskq(krdc->group); - } else { - qlock = &krdc->group->ra_queue.net_qlock; - } - - /* - * Now dump the async queue anonymous buffers - * if we are a diskq, the we are using the diskq mutex. - * However, we are flushing from diskq to memory queue - * so we now need to grab the memory lock also - */ - - q = &krdc->group->ra_queue; - - if (RDC_IS_DISKQ(krdc->group)) { - mutex_enter(&q->net_qlock); - if (q->qfill_sleeping == RDC_QFILL_AWAKE) { - int tries = 5; -#ifdef DEBUG_DISKQ - cmn_err(CE_NOTE, - "!dumpalloccd sending diskq->memq flush to sleep"); -#endif - q->qfflags |= RDC_QFILLSLEEP; - mutex_exit(&q->net_qlock); - - while (q->qfill_sleeping == RDC_QFILL_AWAKE && tries--) - delay(5); - mutex_enter(&q->net_qlock); - } - } - - mutex_enter(qlock); - - while ((q->net_qhead != NULL)) { - rdc_k_info_t *tmpkrdc; - aio = q->net_qhead; - tmpkrdc = &rdc_k_info[aio->index]; - - if (RDC_IS_DISKQ(krdc->group)) { - aio->qhandle->sb_user--; - if (aio->qhandle->sb_user == 0) { - rdc_fixlen(aio); - (void) nsc_free_buf(aio->qhandle); - aio->qhandle = NULL; - aio->handle = NULL; - } - } else { - if (aio->handle) { - (void) nsc_free_buf(aio->handle); - aio->handle = NULL; - } - } - - if (tmpkrdc->io_kstats && !RDC_IS_DISKQ(krdc->group)) { - mutex_enter(tmpkrdc->io_kstats->ks_lock); - kstat_waitq_exit(KSTAT_IO_PTR(tmpkrdc->io_kstats)); - mutex_exit(tmpkrdc->io_kstats->ks_lock); - } - q->net_qhead = q->net_qhead->next; - q->blocks -= aio->len; - q->nitems--; - - RDC_CHECK_BIT(tmpkrdc, aio->pos, aio->len); - - kmem_free(aio, sizeof (*aio)); - } - q->net_qtail = NULL; - - if (krdc->group->asyncstall) { - krdc->group->asyncdis = 1; - cv_broadcast(&krdc->group->asyncqcv); - } - if (krdc->group->sleepq) { - rdc_sleepqdiscard(krdc->group); - } - - krdc->group->seq = RDC_NEWSEQ; - krdc->group->seqack = RDC_NEWSEQ; - if (RDC_IS_DISKQ(krdc->group)) { - rdc_dump_iohdrs(dq); - SET_QNXTIO(dq, QHEAD(dq)); - SET_QCOALBOUNDS(dq, QHEAD(dq)); - } - mutex_exit(qlock); - - if (RDC_IS_DISKQ(krdc->group)) { - mutex_exit(&q->net_qlock); - _rdc_rlse_diskq(krdc->group); - } - - return (0); -} - - -/* - * rdc_dump_alloc_bufs - * We have an error on the link - * Try to dump all of the allocated bufs so we can cleanly recover - * and not hang - */ -void -rdc_dump_alloc_bufs(rdc_if_t *ip) -{ - rdc_k_info_t *krdc; - int repeat; - int index; - - for (index = 0; index < rdc_max_sets; index++) { - do { - krdc = &rdc_k_info[index]; - repeat = 0; - if (krdc->intf == ip) { - if (rdc_dump_alloc_bufs_cd(index) == EAGAIN) { - repeat = 1; - delay(2); - } - } - } while (repeat); - } -} - -/* - * returns 1 if the the throttle should throttle, 0 if not. - */ -int -_rdc_diskq_isfull(disk_queue *q, long len) -{ - /* ---T----H----N--- */ - mutex_enter(QLOCK(q)); - - if (FITSONQ(q, len + 1)) { - mutex_exit(QLOCK(q)); - return (0); - } - mutex_exit(QLOCK(q)); - return (1); -} - -void -_rdc_async_throttle(rdc_k_info_t *this, long len) -{ - rdc_k_info_t *krdc; - rdc_u_info_t *urdc; - int print_msg = 1; - int tries = RDC_FUTILE_ATTEMPTS; - - /* - * Throttle entries on queue - */ - - /* Need to take the 1-many case into account, checking all sets */ - - /* ADD HANDY HUERISTIC HERE TO SLOW DOWN IO */ - for (krdc = this; /* CSTYLED */; krdc = krdc->many_next) { - urdc = &rdc_u_info[krdc->index]; - - /* - * this may be the last set standing in a one to many setup. - * we may also be stuck in unintercept, after marking - * the volume as not enabled, but have not removed it - * from the many list resulting in an endless loop if - * we just continue here. Lets jump over this stuff - * and check to see if we are the only dude here. - */ - if (!IS_ENABLED(urdc)) - goto thischeck; - - if (IS_ASYNC(urdc) && RDC_IS_MEMQ(krdc->group)) { - net_queue *q = &krdc->group->ra_queue; - while ((q->blocks + q->inflbls) > urdc->maxqfbas || - (q->nitems + q->inflitems) > urdc->maxqitems) { - - if (!IS_ENABLED(urdc)) /* disable race */ - goto thischeck; - - if (!krdc->group->rdc_writer) - (void) rdc_writer(krdc->index); - delay(2); - q->throttle_delay++; - } - } - - /* do a much more aggressive delay, get disk flush going */ - if (IS_ASYNC(urdc) && RDC_IS_DISKQ(krdc->group)) { - disk_queue *q = &krdc->group->diskq; - while ((!IS_QSTATE(q, RDC_QNOBLOCK)) && - (_rdc_diskq_isfull(q, len)) && - (!IS_STATE(urdc, RDC_DISKQ_FAILED))) { - if (print_msg) { - cmn_err(CE_WARN, "!rdc async throttle:" - " disk queue %s full", - &urdc->disk_queue[0]); - - print_msg = 0; - } - if (!IS_ENABLED(urdc)) /* disable race */ - goto thischeck; - - if (!krdc->group->rdc_writer) - (void) rdc_writer(krdc->index); - delay(10); - q->throttle_delay += 10; - - if (!(tries--) && IS_STATE(urdc, RDC_QUEUING)) { - cmn_err(CE_WARN, "!SNDR: disk queue " - "%s full & not flushing. giving up", - &urdc->disk_queue[0]); - cmn_err(CE_WARN, "!SNDR: %s:%s entering" - " logging mode", - urdc->secondary.intf, - urdc->secondary.file); - rdc_fail_diskq(krdc, RDC_WAIT, - RDC_DOLOG | RDC_NOFAIL); - mutex_enter(QLOCK(q)); - cv_broadcast(&q->qfullcv); - mutex_exit(QLOCK(q)); - } - - } - if ((IS_QSTATE(q, RDC_QNOBLOCK)) && - _rdc_diskq_isfull(q, len) && - !IS_STATE(urdc, RDC_DISKQ_FAILED)) { - if (print_msg) { - cmn_err(CE_WARN, "!disk queue %s full", - &urdc->disk_queue[0]); - print_msg = 0; - } - rdc_fail_diskq(krdc, RDC_WAIT, - RDC_DOLOG | RDC_NOFAIL); - mutex_enter(QLOCK(q)); - cv_broadcast(&q->qfullcv); - mutex_exit(QLOCK(q)); - } - } - -thischeck: - if (krdc->many_next == this) - break; - } -} - -int rdc_coalesce = 1; -static int rdc_joins = 0; - -int -rdc_aio_coalesce(rdc_aio_t *queued, rdc_aio_t *new) -{ - nsc_buf_t *h = NULL; - int rc; - rdc_k_info_t *krdc; - uint_t bitmask; - - if (rdc_coalesce == 0) - return (0); /* don't even try */ - - if ((queued == NULL) || - (queued->handle == NULL) || - (new->handle == NULL)) { - return (0); /* existing queue is empty */ - } - if (queued->index != new->index || queued->len + new->len > - MAX_RDC_FBAS) { - return (0); /* I/O to big */ - } - if ((queued->pos + queued->len == new->pos) || - (new->pos + new->len == queued->pos)) { - rc = nsc_alloc_abuf(queued->pos, queued->len + new->len, 0, - &h); - if (!RDC_SUCCESS(rc)) { - if (h != NULL) - (void) nsc_free_buf(h); - return (0); /* couldn't do coalesce */ - } - rc = nsc_copy(queued->handle, h, queued->pos, queued->pos, - queued->len); - if (!RDC_SUCCESS(rc)) { - (void) nsc_free_buf(h); - return (0); /* couldn't do coalesce */ - } - rc = nsc_copy(new->handle, h, new->pos, new->pos, - new->len); - if (!RDC_SUCCESS(rc)) { - (void) nsc_free_buf(h); - return (0); /* couldn't do coalesce */ - } - - krdc = &rdc_k_info[queued->index]; - - RDC_SET_BITMASK(queued->pos, queued->len, &bitmask); - RDC_CLR_BITMAP(krdc, queued->pos, queued->len, \ - bitmask, RDC_BIT_BUMP); - - RDC_SET_BITMASK(new->pos, new->len, &bitmask); - RDC_CLR_BITMAP(krdc, new->pos, new->len, \ - bitmask, RDC_BIT_BUMP); - - (void) nsc_free_buf(queued->handle); - (void) nsc_free_buf(new->handle); - queued->handle = h; - queued->len += new->len; - bitmask = 0; - /* - * bump the ref count back up - */ - - RDC_SET_BITMAP(krdc, queued->pos, queued->len, &bitmask); - return (1); /* new I/O succeeds last I/O queued */ - } - return (0); -} - -int -rdc_memq_enqueue(rdc_k_info_t *krdc, rdc_aio_t *aio) -{ - net_queue *q; - rdc_group_t *group; - - group = krdc->group; - q = &group->ra_queue; - - mutex_enter(&q->net_qlock); - - if (rdc_aio_coalesce(q->net_qtail, aio)) { - rdc_joins++; - q->blocks += aio->len; - kmem_free(aio, sizeof (*aio)); - goto out; - } - aio->seq = group->seq++; - if (group->seq < aio->seq) - group->seq = RDC_NEWSEQ + 1; /* skip magics */ - - if (q->net_qhead == NULL) { - /* adding to empty q */ - q->net_qhead = q->net_qtail = aio; - -#ifdef DEBUG - if (q->blocks != 0 || q->nitems != 0) { - cmn_err(CE_PANIC, - "rdc enqueue: q %p, qhead 0, q blocks %" NSC_SZFMT - ", nitems %" NSC_SZFMT, - (void *) q, q->blocks, q->nitems); - } -#endif - - } else { - /* discontiguous, add aio to q tail */ - q->net_qtail->next = aio; - q->net_qtail = aio; - } - - q->blocks += aio->len; - q->nitems++; - - if (krdc->io_kstats) { - mutex_enter(krdc->io_kstats->ks_lock); - kstat_waitq_enter(KSTAT_IO_PTR(krdc->io_kstats)); - mutex_exit(krdc->io_kstats->ks_lock); - } -out: -#ifdef DEBUG - /* sum the q and check for sanity */ - { - nsc_size_t qblocks = 0; - uint64_t nitems = 0; - rdc_aio_t *a; - - for (a = q->net_qhead; a != NULL; a = a->next) { - qblocks += a->len; - nitems++; - } - - if (qblocks != q->blocks || nitems != q->nitems) { - cmn_err(CE_PANIC, - "rdc enqueue: q %p, q blocks %" NSC_SZFMT " (%" - NSC_SZFMT "), nitems %" NSC_SZFMT " (%" - NSC_SZFMT ")", (void *) q, q->blocks, qblocks, - q->nitems, nitems); - } - } -#endif - - mutex_exit(&q->net_qlock); - - if (q->nitems > q->nitems_hwm) { - q->nitems_hwm = q->nitems; - } - - if (q->blocks > q->blocks_hwm) { - q->blocks_hwm = q->blocks; - } - - if (!krdc->group->rdc_writer) - (void) rdc_writer(krdc->index); - - return (0); -} - -int -_rdc_enqueue_write(rdc_k_info_t *krdc, nsc_off_t pos, nsc_size_t len, int flag, - nsc_buf_t *h) -{ - rdc_aio_t *aio; - rdc_group_t *group; - rdc_u_info_t *urdc = &rdc_u_info[krdc->index]; - int rc; - - aio = kmem_zalloc(sizeof (*aio), KM_NOSLEEP); - if (!aio) { - return (ENOMEM); - } - - group = krdc->group; - - aio->pos = pos; - aio->qpos = -1; - aio->len = len; - aio->flag = flag; - aio->index = krdc->index; - aio->handle = h; - - if (group->flags & RDC_MEMQUE) { - return (rdc_memq_enqueue(krdc, aio)); - } else if ((group->flags & RDC_DISKQUE) && - !IS_STATE(urdc, RDC_DISKQ_FAILED)) { - rc = rdc_diskq_enqueue(krdc, aio); - kmem_free(aio, sizeof (*aio)); - return (rc); - } - return (-1); /* keep lint quiet */ -} - - - - -/* - * Async Network RDC flusher - */ - -/* - * don't allow any new writer threads to start if a member of the set - * is disable pending - */ -int -is_disable_pending(rdc_k_info_t *krdc) -{ - rdc_k_info_t *this = krdc; - int rc = 0; - - do { - if (krdc->type_flag & RDC_DISABLEPEND) { - krdc = this; - rc = 1; - break; - } - krdc = krdc->group_next; - - } while (krdc != this); - - return (rc); -} - -/* - * rdc_writer -- spawn new writer if not running already - * called after enqueing the dirty blocks - */ -int -rdc_writer(int index) -{ - rdc_k_info_t *krdc = &rdc_k_info[index]; - nsthread_t *t; - rdc_group_t *group; - kmutex_t *qlock; - int tries; - const int MAX_TRIES = 16; - - group = krdc->group; - - if (RDC_IS_DISKQ(group)) - qlock = &group->diskq.disk_qlock; - else - qlock = &group->ra_queue.net_qlock; - - mutex_enter(qlock); - -#ifdef DEBUG - if (noflush) { - mutex_exit(qlock); - return (0); - } -#endif - - if ((group->rdc_writer) || is_disable_pending(krdc)) { - mutex_exit(qlock); - return (0); - } - - if ((group->rdc_thrnum >= 1) && (group->seqack == RDC_NEWSEQ)) { - /* - * We also need to check if we are starting a new - * sequence, and if so don't create a new thread, - * as we must ensure that the start of new sequence - * requests arrives first to re-init the server. - */ - mutex_exit(qlock); - return (0); - } - /* - * For version 6, - * see if we can fit in another thread. - */ - group->rdc_thrnum++; - - if (krdc->intf && (krdc->intf->rpc_version >= RDC_VERSION6)) { - rdc_u_info_t *urdc = &rdc_u_info[index]; - if (group->rdc_thrnum >= urdc->asyncthr) - group->rdc_writer = 1; - } else { - group->rdc_writer = 1; - } - - mutex_exit(qlock); - - - /* - * If we got here, we know that we have not exceeded the allowed - * number of async threads for our group. If we run out of threads - * in _rdc_flset, we add a new thread to the set. - */ - tries = 0; - do { - /* first try to grab a thread from the free list */ - if (t = nst_create(_rdc_flset, rdc_flusher_thread, - (blind_t)(unsigned long)index, 0)) { - break; - } - - /* that failed; add a thread to the set and try again */ - if (nst_add_thread(_rdc_flset, 1) != 1) { - cmn_err(CE_WARN, "!rdc_writer index %d nst_add_thread " - "error, tries: %d", index, tries); - break; - } - } while (++tries < MAX_TRIES); - - if (tries) { - mutex_enter(&group->addthrnumlk); - group->rdc_addthrnum += tries; - mutex_exit(&group->addthrnumlk); - } - - if (t) { - return (1); - } - - cmn_err(CE_WARN, "!rdc_writer: index %d nst_create error", index); - rdc_many_enter(krdc); - mutex_enter(qlock); - group->rdc_thrnum--; - group->rdc_writer = 0; - if ((group->count == 0) && (group->rdc_thrnum == 0)) { - mutex_exit(qlock); - /* - * Race with remove_from_group while write thread was - * failing to be created. - */ -#ifdef DEBUG - cmn_err(CE_WARN, "!rdc_writer: group being destroyed"); -#endif - rdc_delgroup(group); - krdc->group = NULL; - rdc_many_exit(krdc); - return (-1); - } - mutex_exit(qlock); - rdc_many_exit(krdc); - return (-1); -} - -/* - * Either we need to flush the - * kmem (net_queue) queue or the disk (disk_queue) - * determine which, and do it. - */ -void -rdc_flusher_thread(int index) -{ - rdc_k_info_t *krdc = &rdc_k_info[index]; - - if (krdc->group->flags & RDC_MEMQUE) { - rdc_flush_memq(index); - return; - } else if (krdc->group->flags & RDC_DISKQUE) { - rdc_flush_diskq(index); - return; - } else { /* uh-oh, big time */ - cmn_err(CE_PANIC, "flusher trying to flush unknown queue type"); - } - -} - -void -rdc_flush_memq(int index) -{ - rdc_k_info_t *krdc = &rdc_k_info[index]; - rdc_aio_t *aio; - net_queue *q; - int dowork; - rdc_group_t *group = krdc->group; - if (!group || group->count == 0) { -#ifdef DEBUG - cmn_err(CE_WARN, "!rdc_flush_memq: no group left!"); -#endif - return; - } - - if (!krdc->c_fd) { -#ifdef DEBUG - cmn_err(CE_WARN, "!rdc_flush_memq: no c_fd!"); -#endif - goto thread_death; - } - -#ifdef DEBUG_DISABLE - if (index < 0 || (krdc->type_flag & RDC_DISABLEPEND)) { - cmn_err(CE_WARN, "!rdc_flush_memq: DISABLE PENDING!"); - /* - * Need to continue as we may be trying to flush IO - * while trying to disable or suspend - */ - } -#endif - - q = &group->ra_queue; - - dowork = 1; - /* CONSTCOND */ - while (dowork) { - if (net_exit == ATM_EXIT) - break; - - group = krdc->group; - if (!group || group->count == 0) { -#ifdef DEBUG - cmn_err(CE_WARN, "!rdc_flush_memq: no group left!"); -#endif - break; - } - - mutex_enter(&q->net_qlock); - aio = q->net_qhead; - - if (aio == NULL) { -#ifdef DEBUG - if (q->nitems != 0 || - q->blocks != 0 || - q->net_qtail != 0) { - cmn_err(CE_PANIC, - "rdc_flush_memq(1): q %p, q blocks %" - NSC_SZFMT ", nitems %" NSC_SZFMT - ", qhead %p qtail %p", - (void *) q, q->blocks, q->nitems, - (void *) aio, (void *) q->net_qtail); - } -#endif - mutex_exit(&q->net_qlock); - break; - } - - /* aio remove from q */ - - q->net_qhead = aio->next; - aio->next = NULL; - - if (q->net_qtail == aio) - q->net_qtail = q->net_qhead; - - q->blocks -= aio->len; - q->nitems--; - - /* - * in flight numbers. - */ - q->inflbls += aio->len; - q->inflitems++; - -#ifdef DEBUG - if (q->net_qhead == NULL) { - if (q->nitems != 0 || - q->blocks != 0 || - q->net_qtail != 0) { - cmn_err(CE_PANIC, - "rdc_flush_memq(2): q %p, q blocks %" - NSC_SZFMT ", nitems %" NSC_SZFMT - ", qhead %p qtail %p", - (void *) q, q->blocks, q->nitems, - (void *) q->net_qhead, - (void *) q->net_qtail); - } - } - -#ifndef NSC_MULTI_TERABYTE - if (q->blocks < 0) { - cmn_err(CE_PANIC, - "rdc_flush_memq(3): q %p, q blocks %" NSC_SZFMT - ", nitems %d, qhead %p, qtail %p", - (void *) q, q->blocks, q->nitems, - (void *) q->net_qhead, (void *) q->net_qtail); - } -#else - /* blocks and nitems are unsigned for NSC_MULTI_TERABYTE */ -#endif -#endif - - mutex_exit(&q->net_qlock); - - aio->iostatus = RDC_IO_INIT; - - _rdc_remote_flush(aio); - - mutex_enter(&q->net_qlock); - q->inflbls -= aio->len; - q->inflitems--; - if ((group->seqack == RDC_NEWSEQ) && - (group->seq != RDC_NEWSEQ + 1)) { - if ((q->net_qhead == NULL) || - (q->net_qhead->seq != RDC_NEWSEQ + 1)) { - /* - * We are an old thread, and the - * queue sequence has been reset - * during the network write above. - * As such we mustn't pull another - * job from the queue until the - * first sequence message has been ack'ed. - * Just die instead. Unless this thread - * is the first sequence that has just - * been ack'ed - */ - dowork = 0; - } - } - mutex_exit(&q->net_qlock); - - if ((aio->iostatus != RDC_IO_DONE) && (group->count)) { - rdc_k_info_t *krdctmp = &rdc_k_info[aio->index]; - if (krdctmp->type_flag & RDC_DISABLEPEND) { - kmem_free(aio, sizeof (*aio)); - goto thread_death; - } - rdc_group_enter(krdc); - ASSERT(krdc->group); - rdc_group_log(krdc, RDC_NOFLUSH | RDC_ALLREMOTE, - "memq flush aio status not RDC_IO_DONE"); - rdc_group_exit(krdc); - rdc_dump_queue(aio->index); - } - kmem_free(aio, sizeof (*aio)); - - if (krdc->remote_index < 0 || !krdc->lsrv || !krdc->intf) - break; - } - -thread_death: - rdc_many_enter(krdc); - mutex_enter(&group->ra_queue.net_qlock); - group->rdc_thrnum--; - group->rdc_writer = 0; - /* - * all threads must be dead. - */ - if ((group->count == 0) && (group->rdc_thrnum == 0)) { - mutex_exit(&group->ra_queue.net_qlock); - /* - * Group now empty, so destroy - * Race with remove_from_group while write thread was running - */ -#ifdef DEBUG - cmn_err(CE_WARN, "!rdc_flush_memq: group being destroyed"); -#endif - rdc_delgroup(group); - krdc->group = NULL; - rdc_many_exit(krdc); - return; - } - mutex_exit(&group->ra_queue.net_qlock); - rdc_many_exit(krdc); -} - -/* - * rdc_flush_diskq - * disk queue flusher - */ -void -rdc_flush_diskq(int index) -{ - rdc_k_info_t *krdc = &rdc_k_info[index]; - rdc_u_info_t *urdc = &rdc_u_info[index]; - rdc_aio_t *aio = NULL; - disk_queue *q; - net_queue *nq; - int dowork; - int rc; - rdc_group_t *group = krdc->group; - - if (!group || group->count == 0) { -#ifdef DEBUG - cmn_err(CE_WARN, "!rdc_flush_diskq: no group left!"); -#endif - return; - } - - if (!krdc->c_fd) { -#ifdef DEBUG - cmn_err(CE_WARN, "!rdc_flush_diskq: no c_fd!"); -#endif - return; - } - -#ifdef DEBUG_DISABLE - if (index < 0 || (krdc->type_flag & RDC_DISABLEPEND)) { - cmn_err(CE_WARN, "!rdc_flush_diskq: DISABLE PENDING!"); - /* - * Need to continue as we may be trying to flush IO - * while trying to disable or suspend - */ - } -#endif - q = &group->diskq; - nq = &group->ra_queue; - - if (IS_QSTATE(q, RDC_QDISABLEPEND) || IS_STATE(urdc, RDC_LOGGING)) { -#ifdef DEBUG - cmn_err(CE_NOTE, "!flusher thread death 1 %x", QSTATE(q)); -#endif - goto thread_death; - } - - dowork = 1; - /* CONSTCOND */ - while (dowork) { - if (net_exit == ATM_EXIT) - break; - - group = krdc->group; - if (!group || group->count == 0) { -#ifdef DEBUG - cmn_err(CE_WARN, "!rdc_flush_diskq: no group left!"); -#endif - break; - } - - do { - rc = 0; - if ((IS_STATE(urdc, RDC_LOGGING)) || - (IS_STATE(urdc, RDC_SYNCING)) || - (nq->qfflags & RDC_QFILLSLEEP)) - goto thread_death; - - aio = rdc_dequeue(krdc, &rc); - - if ((IS_STATE(urdc, RDC_LOGGING)) || - (IS_STATE(urdc, RDC_SYNCING)) || - (nq->qfflags & RDC_QFILLSLEEP)) { - goto thread_death; - } - if (rc == EAGAIN) { - delay(40); - } - - } while (rc == EAGAIN); - - if (aio == NULL) { - break; - } - - aio->iostatus = RDC_IO_INIT; - - mutex_enter(QLOCK(q)); - q->inflbls += aio->len; - q->inflitems++; - mutex_exit(QLOCK(q)); - - _rdc_remote_flush(aio); - - mutex_enter(QLOCK(q)); - q->inflbls -= aio->len; - q->inflitems--; - - if ((group->seqack == RDC_NEWSEQ) && - (group->seq != RDC_NEWSEQ + 1)) { - if ((nq->net_qhead == NULL) || - (nq->net_qhead->seq != RDC_NEWSEQ + 1)) { - /* - * We are an old thread, and the - * queue sequence has been reset - * during the network write above. - * As such we mustn't pull another - * job from the queue until the - * first sequence message has been ack'ed. - * Just die instead. Unless of course, - * this thread is the first sequence that - * has just been ack'ed. - */ - dowork = 0; - } - } - mutex_exit(QLOCK(q)); - - if (aio->iostatus == RDC_IO_CANCELLED) { - rdc_dump_queue(aio->index); - kmem_free(aio, sizeof (*aio)); - aio = NULL; - if (group) { /* seq gets bumped on dequeue */ - mutex_enter(QLOCK(q)); - rdc_dump_iohdrs(q); - SET_QNXTIO(q, QHEAD(q)); - SET_QCOALBOUNDS(q, QHEAD(q)); - group->seq = RDC_NEWSEQ; - group->seqack = RDC_NEWSEQ; - mutex_exit(QLOCK(q)); - } - break; - } - - if ((aio->iostatus != RDC_IO_DONE) && (group->count)) { - rdc_k_info_t *krdctmp = &rdc_k_info[aio->index]; - if (krdctmp->type_flag & RDC_DISABLEPEND) { - kmem_free(aio, sizeof (*aio)); - aio = NULL; - goto thread_death; - } - rdc_group_enter(krdc); - rdc_group_log(krdc, - RDC_NOFLUSH | RDC_ALLREMOTE | RDC_QUEUING, - "diskq flush aio status not RDC_IO_DONE"); - rdc_group_exit(krdc); - rdc_dump_queue(aio->index); - } - - kmem_free(aio, sizeof (*aio)); - aio = NULL; - -#ifdef DEBUG_DISABLE - if (krdc->type_flag & RDC_DISABLEPEND) { - cmn_err(CE_WARN, - "!rdc_flush_diskq: DISABLE PENDING after IO!"); - } -#endif - if (krdc->remote_index < 0 || !krdc->lsrv || !krdc->intf) - break; - - if (IS_QSTATE(q, RDC_QDISABLEPEND)) { -#ifdef DEBUG - cmn_err(CE_NOTE, "!flusher thread death 2"); -#endif - break; - } - } -thread_death: - rdc_many_enter(krdc); - mutex_enter(QLOCK(q)); - group->rdc_thrnum--; - group->rdc_writer = 0; - - if (aio && aio->qhandle) { - aio->qhandle->sb_user--; - if (aio->qhandle->sb_user == 0) { - (void) _rdc_rsrv_diskq(krdc->group); - rdc_fixlen(aio); - (void) nsc_free_buf(aio->qhandle); - aio->qhandle = NULL; - aio->handle = NULL; - _rdc_rlse_diskq(krdc->group); - } - } - if ((group->count == 0) && (group->rdc_thrnum == 0)) { - mutex_exit(QLOCK(q)); - /* - * Group now empty, so destroy - * Race with remove_from_group while write thread was running - */ -#ifdef DEBUG - cmn_err(CE_WARN, "!rdc_flush_diskq: group being destroyed"); -#endif - mutex_enter(&group->diskqmutex); - rdc_close_diskq(group); - mutex_exit(&group->diskqmutex); - rdc_delgroup(group); - krdc->group = NULL; - rdc_many_exit(krdc); - return; - } - mutex_exit(QLOCK(q)); - rdc_many_exit(krdc); -} - -/* - * _rdc_remote_flush - * Flush a single block ANON block - * this function will flush from either the disk queue - * or the memory queue. The appropriate locks must be - * taken out etc, etc ... - */ -static void -_rdc_remote_flush(rdc_aio_t *aio) -{ - rdc_k_info_t *krdc = &rdc_k_info[aio->index]; - rdc_u_info_t *urdc = &rdc_u_info[aio->index]; - disk_queue *q = &krdc->group->diskq; - kmutex_t *qlock; - rdc_group_t *group; - nsc_buf_t *h = NULL; - int reserved = 0; - int rtype = RDC_RAW; - int rc; - uint_t maxseq; - struct netwriteres netret; - int waitq = 1; - int vflags; - - group = krdc->group; - netret.vecdata.vecdata_val = NULL; - netret.vecdata.vecdata_len = 0; - - /* Where did we get this aio from anyway? */ - if (RDC_IS_DISKQ(group)) { - qlock = &group->diskq.disk_qlock; - } else { - qlock = &group->ra_queue.net_qlock; - } - - /* - * quench transmission if we are too far ahead of the - * server Q, or it will overflow. - * Must fail all requests while asyncdis is set. - * It will be cleared when the last thread to be discarded - * sets the asyncstall counter to zero. - * Note the thread within rdc_net_write - * also bumps the asyncstall counter. - */ - - mutex_enter(qlock); - if (group->asyncdis) { - aio->iostatus = RDC_IO_CANCELLED; - mutex_exit(qlock); - goto failed; - } - /* don't go to sleep if we have gone logging! */ - vflags = rdc_get_vflags(urdc); - if ((vflags & (RDC_BMP_FAILED|RDC_VOL_FAILED|RDC_LOGGING))) { - if ((vflags & RDC_LOGGING) && RDC_IS_DISKQ(group)) - aio->iostatus = RDC_IO_CANCELLED; - - mutex_exit(qlock); - goto failed; - } - - while (maxseq = group->seqack + RDC_MAXPENDQ + 1, - maxseq = (maxseq < group->seqack) ? maxseq + RDC_NEWSEQ + 1 - : maxseq, !RDC_INFRONT(aio->seq, maxseq)) { - group->asyncstall++; - ASSERT(!IS_STATE(urdc, RDC_LOGGING)); - cv_wait(&group->asyncqcv, qlock); - group->asyncstall--; - ASSERT(group->asyncstall >= 0); - if (group->asyncdis) { - if (group->asyncstall == 0) { - group->asyncdis = 0; - } - aio->iostatus = RDC_IO_CANCELLED; - mutex_exit(qlock); - goto failed; - } - /* - * See if we have gone into logging mode - * since sleeping. - */ - vflags = rdc_get_vflags(urdc); - if (vflags & (RDC_BMP_FAILED|RDC_VOL_FAILED|RDC_LOGGING)) { - if ((vflags & RDC_LOGGING) && RDC_IS_DISKQ(group)) - aio->iostatus = RDC_IO_CANCELLED; - - mutex_exit(qlock); - goto failed; - } - } - mutex_exit(qlock); - - if ((krdc->io_kstats) && (!RDC_IS_DISKQ(krdc->group))) { - mutex_enter(krdc->io_kstats->ks_lock); - kstat_waitq_exit(KSTAT_IO_PTR(krdc->io_kstats)); - mutex_exit(krdc->io_kstats->ks_lock); - waitq = 0; - } - - - rc = _rdc_rsrv_devs(krdc, rtype, RDC_INTERNAL); - if (rc != 0) { -#ifdef DEBUG - cmn_err(CE_WARN, "!_rdc_remote_flush: reserve, index %d, rc %d", - aio->index, rc); -#endif - goto failed; - } - - reserved = 1; - /* - * Case where we are multihop and calling with no ANON bufs - * Need to do the read to fill the buf. - */ - if (!aio->handle) { - rc = nsc_alloc_buf(RDC_U_FD(krdc), aio->pos, aio->len, - (aio->flag & ~NSC_WRITE) | NSC_READ, &h); - if (!RDC_SUCCESS(rc)) { -#ifdef DEBUG - cmn_err(CE_WARN, - "!_rdc_remote_flush: alloc_buf, index %d, pos %" - NSC_SZFMT ", len %" NSC_SZFMT ", rc %d", - aio->index, aio->pos, aio->len, rc); -#endif - - goto failed; - } - aio->handle = h; - aio->handle->sb_user = RDC_NULLBUFREAD; - } - - mutex_enter(qlock); - if (group->asyncdis) { - if (group->asyncstall == 0) { - group->asyncdis = 0; - } - aio->iostatus = RDC_IO_CANCELLED; - mutex_exit(qlock); - goto failed; - } - group->asyncstall++; - mutex_exit(qlock); - - - if (krdc->remote_index < 0) { - /* - * this should be ok, we are flushing, not rev syncing. - * remote_index could be -1 if we lost a race with - * resume and the flusher trys to flush an io from - * another set that has not resumed - */ - krdc->remote_index = rdc_net_state(krdc->index, CCIO_SLAVE); - DTRACE_PROBE1(remote_index_negative, int, krdc->remote_index); - - } - - /* - * double check for logging, no check in net_write() - * skip the write if you can, otherwise, if logging - * avoid clearing the bit .. you don't know whose bit it may - * also be. - */ - if (IS_STATE(urdc, RDC_LOGGING) || IS_STATE(urdc, RDC_SYNCING)) { - aio->iostatus = RDC_IO_CANCELLED; - mutex_enter(qlock); - group->asyncstall--; - mutex_exit(qlock); - goto failed; - } - - rc = rdc_net_write(krdc->index, krdc->remote_index, - aio->handle, aio->pos, aio->len, aio->seq, aio->qpos, &netret); - - mutex_enter(qlock); - group->asyncstall--; - if (group->asyncdis) { - if (group->asyncstall == 0) { - group->asyncdis = 0; - } - aio->iostatus = RDC_IO_CANCELLED; - mutex_exit(qlock); - goto failed; - } - - if (IS_STATE(urdc, RDC_LOGGING) || IS_STATE(urdc, RDC_SYNCING)) { - mutex_exit(qlock); - aio->iostatus = RDC_IO_CANCELLED; - goto failed; - } - - ASSERT(aio->handle); - if (rc != 0) { -#ifdef DEBUG - cmn_err(CE_WARN, - "!_rdc_remote_flush: write, index %d, pos %" NSC_SZFMT - ", len %" NSC_SZFMT ", " - "rc %d seq %u group seq %u seqack %u qpos %" NSC_SZFMT, - aio->index, aio->pos, aio->len, rc, aio->seq, - group->seq, group->seqack, aio->qpos); -#endif - if (rc == ENOLINK) { - cmn_err(CE_WARN, - "!Hard timeout detected (%d sec) " - "on SNDR set %s:%s", - rdc_rpc_tmout, urdc->secondary.intf, - urdc->secondary.file); - } - mutex_exit(qlock); - goto failed; - } else { - aio->iostatus = RDC_IO_DONE; - } - - if (RDC_IS_DISKQ(group)) { - /* free locally alloc'd handle */ - if (aio->handle->sb_user == RDC_NULLBUFREAD) { - (void) nsc_free_buf(aio->handle); - aio->handle = NULL; - } - aio->qhandle->sb_user--; - if (aio->qhandle->sb_user == 0) { - (void) _rdc_rsrv_diskq(group); - rdc_fixlen(aio); - (void) nsc_free_buf(aio->qhandle); - aio->qhandle = NULL; - aio->handle = NULL; - _rdc_rlse_diskq(group); - } - - } else { - (void) nsc_free_buf(aio->handle); - aio->handle = NULL; - } - - mutex_exit(qlock); - - _rdc_rlse_devs(krdc, rtype); - - if (netret.result == 0) { - vflags = rdc_get_vflags(urdc); - - if (!(vflags & (RDC_BMP_FAILED|RDC_VOL_FAILED|RDC_LOGGING))) { - RDC_CLR_BITMAP(krdc, aio->pos, aio->len, \ - 0xffffffff, RDC_BIT_BUMP); - - if (RDC_IS_DISKQ(krdc->group)) { - if (!IS_STATE(urdc, RDC_LOGGING)) { - /* tell queue data has been flushed */ - rdc_clr_iohdr(krdc, aio->qpos); - } else { /* throw away queue, logging */ - mutex_enter(qlock); - rdc_dump_iohdrs(q); - SET_QNXTIO(q, QHEAD(q)); - SET_QCOALBOUNDS(q, QHEAD(q)); - mutex_exit(qlock); - } - } - } - - mutex_enter(qlock); - /* - * Check to see if the reply has arrived out of - * order, if so don't update seqack. - */ - if (!RDC_INFRONT(aio->seq, group->seqack)) { - group->seqack = aio->seq; - } -#ifdef DEBUG - else { - rdc_ooreply++; - } -#endif - if (group->asyncstall) { - cv_broadcast(&group->asyncqcv); - } - mutex_exit(qlock); - } else if (netret.result < 0) { - aio->iostatus = RDC_IO_FAILED; - } - - /* - * see if we have any pending async requests we can mark - * as done. - */ - - if (netret.vecdata.vecdata_len) { - net_pendvec_t *vecp; - net_pendvec_t *vecpe; - vecp = netret.vecdata.vecdata_val; - vecpe = netret.vecdata.vecdata_val + netret.vecdata.vecdata_len; - while (vecp < vecpe) { - rdc_k_info_t *krdcp = &rdc_k_info[vecp->pindex]; - rdc_u_info_t *urdcp = &rdc_u_info[vecp->pindex]; - /* - * we must always still be in the same group. - */ - ASSERT(krdcp->group == group); - vflags = rdc_get_vflags(urdcp); - - if (!(vflags & - (RDC_BMP_FAILED|RDC_VOL_FAILED|RDC_LOGGING))) { - RDC_CLR_BITMAP(krdcp, vecp->apos, vecp->alen, \ - 0xffffffff, RDC_BIT_BUMP); - if (RDC_IS_DISKQ(krdcp->group)) { - if (!IS_STATE(urdc, RDC_LOGGING)) { - /* update queue info */ - rdc_clr_iohdr(krdc, vecp->qpos); - } else { /* we've gone logging */ - mutex_enter(qlock); - rdc_dump_iohdrs(q); - SET_QNXTIO(q, QHEAD(q)); - SET_QCOALBOUNDS(q, QHEAD(q)); - mutex_exit(qlock); - } - } - } - - /* - * see if we can re-start transmission - */ - mutex_enter(qlock); - if (!RDC_INFRONT(vecp->seq, group->seqack)) { - group->seqack = vecp->seq; - } -#ifdef DEBUG - else { - rdc_ooreply++; - } -#endif - DTRACE_PROBE1(pendvec_return, int, vecp->seq); - - if (group->asyncstall) { - cv_broadcast(&group->asyncqcv); - } - mutex_exit(qlock); - vecp++; - } - } - if (netret.vecdata.vecdata_val) - kmem_free(netret.vecdata.vecdata_val, - netret.vecdata.vecdata_len * sizeof (net_pendvec_t)); - return; -failed: - - /* perhaps we have a few threads stuck .. */ - if (group->asyncstall) { - group->asyncdis = 1; - cv_broadcast(&group->asyncqcv); - } - if (netret.vecdata.vecdata_val) - kmem_free(netret.vecdata.vecdata_val, - netret.vecdata.vecdata_len * sizeof (net_pendvec_t)); - - mutex_enter(qlock); - if (RDC_IS_DISKQ(group)) { - /* free locally alloc'd hanlde */ - if ((aio->handle) && - (aio->handle->sb_user == RDC_NULLBUFREAD)) { - (void) nsc_free_buf(aio->handle); - aio->handle = NULL; - } - aio->qhandle->sb_user--; - if (aio->qhandle->sb_user == 0) { - (void) _rdc_rsrv_diskq(group); - rdc_fixlen(aio); - (void) nsc_free_buf(aio->qhandle); - aio->qhandle = NULL; - aio->handle = NULL; - _rdc_rlse_diskq(group); - } - } else { - if (aio->handle) { - (void) nsc_free_buf(aio->handle); - aio->handle = NULL; - } - } - mutex_exit(qlock); - - if (reserved) { - _rdc_rlse_devs(krdc, rtype); - } - - if ((waitq && krdc->io_kstats) && (!RDC_IS_DISKQ(krdc->group))) { - mutex_enter(krdc->io_kstats->ks_lock); - kstat_waitq_exit(KSTAT_IO_PTR(krdc->io_kstats)); - mutex_exit(krdc->io_kstats->ks_lock); - } - - /* make sure that the bit is still set */ - RDC_CHECK_BIT(krdc, aio->pos, aio->len); - - if (aio->iostatus != RDC_IO_CANCELLED) - aio->iostatus = RDC_IO_FAILED; -} - - -/* - * rdc_drain_disk_queue - * drain the async network queue for the whole group. Bail out if nothing - * happens in 20 sec - * returns -1 if it bails before the queues are drained. - */ -#define NUM_RETRIES 15 /* Number of retries to wait if no progress */ -int -rdc_drain_disk_queue(int index) -{ - rdc_k_info_t *krdc = &rdc_k_info[index]; - volatile rdc_group_t *group; - volatile disk_queue *diskq; - int threads, counter; - long blocks; - - /* Sanity checking */ - if (index > rdc_max_sets) - return (0); - - /* - * If there is no group or diskq configured, we can leave now - */ - if (!(group = krdc->group) || !(diskq = &group->diskq)) - return (0); - - /* - * No need to wait if EMPTY and threads are gone - */ - counter = 0; - while (!QEMPTY(diskq) || group->rdc_thrnum) { - - /* - * Capture counters to determine if progress is being made - */ - blocks = QBLOCKS(diskq); - threads = group->rdc_thrnum; - - /* - * Wait - */ - delay(HZ); - - /* - * Has the group or disk queue gone away while delayed? - */ - if (!(group = krdc->group) || !(diskq = &group->diskq)) - return (0); - - /* - * Are we still seeing progress? - */ - if (blocks == QBLOCKS(diskq) && threads == group->rdc_thrnum) { - /* - * No progress seen, increment retry counter - */ - if (counter++ > NUM_RETRIES) { - return (-1); - } - } else { - /* - * Reset counter, as we've made progress - */ - counter = 0; - } - } - - return (0); -} - -/* - * decide what needs to be drained, disk or core - * and drain it - */ -int -rdc_drain_queue(int index) -{ - rdc_k_info_t *krdc = &rdc_k_info[index]; - rdc_group_t *group = krdc->group; - - if (!group) - return (0); - - if (RDC_IS_DISKQ(group)) - return (rdc_drain_disk_queue(index)); - if (RDC_IS_MEMQ(group)) - return (rdc_drain_net_queue(index)); - /* oops.. */ -#ifdef DEBUG - cmn_err(CE_WARN, "!rdc_drain_queue: " - "attempting drain of unknown Q type"); -#endif - return (0); -} - -/* - * rdc_drain_net_queue - * drain the async network queue for the whole group. Bail out if nothing - * happens in 20 sec - * returns -1 if it bails before the queues are drained. - */ -int -rdc_drain_net_queue(int index) -{ - rdc_k_info_t *krdc = &rdc_k_info[index]; - volatile net_queue *q; - int bail = 20; /* bail out in about 20 secs */ - nsc_size_t blocks; - - /* Sanity checking */ - if (index > rdc_max_sets) - return (0); - if (!krdc->group) - return (0); - /* LINTED */ - if (!(q = &krdc->group->ra_queue)) - return (0); - - /* CONSTCOND */ - while (1) { - - if (((volatile rdc_aio_t *)q->net_qhead == NULL) && - (krdc->group->rdc_thrnum == 0)) { - break; - } - - blocks = q->blocks; - - q = (volatile net_queue *)&krdc->group->ra_queue; - - if ((blocks == q->blocks) && - (--bail <= 0)) { - break; - } - - delay(HZ); - } - - if (bail <= 0) - return (-1); - - return (0); -} - -/* - * rdc_dump_queue - * We want to release all the blocks currently on the network flushing queue - * We already have them logged in the bitmap. - */ -void -rdc_dump_queue(int index) -{ - rdc_k_info_t *krdc = &rdc_k_info[index]; - rdc_aio_t *aio; - net_queue *q; - rdc_group_t *group; - disk_queue *dq; - kmutex_t *qlock; - - group = krdc->group; - - q = &group->ra_queue; - dq = &group->diskq; - - /* - * gotta have both locks here for diskq - */ - - if (RDC_IS_DISKQ(group)) { - mutex_enter(&q->net_qlock); - if (q->qfill_sleeping == RDC_QFILL_AWAKE) { - int tries = 3; -#ifdef DEBUG_DISKQ - cmn_err(CE_NOTE, - "!dumpq sending diskq->memq flusher to sleep"); -#endif - q->qfflags |= RDC_QFILLSLEEP; - mutex_exit(&q->net_qlock); - while (q->qfill_sleeping == RDC_QFILL_AWAKE && tries--) - delay(5); - mutex_enter(&q->net_qlock); - } - } - - if (RDC_IS_DISKQ(group)) { - qlock = &dq->disk_qlock; - (void) _rdc_rsrv_diskq(group); - } else { - qlock = &q->net_qlock; - } - - mutex_enter(qlock); - - group->seq = RDC_NEWSEQ; /* reset the sequence number */ - group->seqack = RDC_NEWSEQ; - - /* if the q is on disk, dump the q->iohdr chain */ - if (RDC_IS_DISKQ(group)) { - rdc_dump_iohdrs(dq); - - /* back up the nxtio pointer */ - SET_QNXTIO(dq, QHEAD(dq)); - SET_QCOALBOUNDS(dq, QHEAD(dq)); - } - - while (q->net_qhead) { - rdc_k_info_t *tmpkrdc; - aio = q->net_qhead; - tmpkrdc = &rdc_k_info[aio->index]; - - if (RDC_IS_DISKQ(group)) { - aio->qhandle->sb_user--; - if (aio->qhandle->sb_user == 0) { - rdc_fixlen(aio); - (void) nsc_free_buf(aio->qhandle); - aio->qhandle = NULL; - aio->handle = NULL; - } - } else { - if (aio->handle) { - (void) nsc_free_buf(aio->handle); - aio->handle = NULL; - } - } - - q->net_qhead = aio->next; - RDC_CHECK_BIT(tmpkrdc, aio->pos, aio->len); - - kmem_free(aio, sizeof (*aio)); - if (tmpkrdc->io_kstats && !RDC_IS_DISKQ(group)) { - mutex_enter(tmpkrdc->io_kstats->ks_lock); - kstat_waitq_exit(KSTAT_IO_PTR(tmpkrdc->io_kstats)); - mutex_exit(tmpkrdc->io_kstats->ks_lock); - } - - } - - q->net_qtail = NULL; - q->blocks = 0; - q->nitems = 0; - - /* - * See if we have stalled threads. - */ -done: - if (group->asyncstall) { - group->asyncdis = 1; - cv_broadcast(&group->asyncqcv); - } - mutex_exit(qlock); - if (RDC_IS_DISKQ(group)) { - mutex_exit(&q->net_qlock); - _rdc_rlse_diskq(group); - } - -} - - -/* - * rdc_clnt_get - * Get a CLIENT handle and cache it - */ - -static int -rdc_clnt_get(rdc_srv_t *svp, rpcvers_t vers, struct chtab **rch, CLIENT **clp) -{ - uint_t max_msgsize; - int retries; - int ret; - struct cred *cred; - int num_clnts = 0; - register struct chtab *ch; - struct chtab **plistp; - CLIENT *client = 0; - - if (rch) { - *rch = 0; - } - - if (clp) { - *clp = 0; - } - - retries = 6; /* Never used for COTS in Solaris */ - cred = ddi_get_cred(); - max_msgsize = RDC_RPC_MAX; - - mutex_enter(&rdc_clnt_lock); - - ch = rdc_chtable; - plistp = &rdc_chtable; - - /* find the right ch_list chain */ - - for (ch = rdc_chtable; ch != NULL; ch = ch->ch_next) { - if (ch->ch_prog == RDC_PROGRAM && - ch->ch_vers == vers && - ch->ch_dev == svp->ri_knconf->knc_rdev && - ch->ch_protofmly != NULL && - strcmp(ch->ch_protofmly, - svp->ri_knconf->knc_protofmly) == 0) { - /* found the correct chain to walk */ - break; - } - plistp = &ch->ch_next; - } - - if (ch != NULL) { - /* walk the ch_list and try and find a free client */ - - for (num_clnts = 0; ch != NULL; ch = ch->ch_list, num_clnts++) { - if (ch->ch_inuse == FALSE) { - /* suitable handle to reuse */ - break; - } - plistp = &ch->ch_list; - } - } - - if (ch == NULL && num_clnts >= MAXCLIENTS) { - /* alloc a temporary handle and return */ - - rdc_clnt_toomany++; - mutex_exit(&rdc_clnt_lock); - - ret = clnt_tli_kcreate(svp->ri_knconf, &(svp->ri_addr), - RDC_PROGRAM, vers, max_msgsize, retries, cred, &client); - - if (ret != 0) { - cmn_err(CE_NOTE, - "!rdc_call: tli_kcreate failed %d", ret); - return (ret); - } - - *rch = 0; - *clp = client; - (void) CLNT_CONTROL(client, CLSET_PROGRESS, NULL); - return (ret); - } - - if (ch != NULL) { - /* reuse a cached handle */ - - ch->ch_inuse = TRUE; - ch->ch_timesused++; - mutex_exit(&rdc_clnt_lock); - - *rch = ch; - - if (ch->ch_client == NULL) { - ret = clnt_tli_kcreate(svp->ri_knconf, &(svp->ri_addr), - RDC_PROGRAM, vers, max_msgsize, retries, - cred, &ch->ch_client); - if (ret != 0) { - ch->ch_inuse = FALSE; - return (ret); - } - - (void) CLNT_CONTROL(ch->ch_client, CLSET_PROGRESS, - NULL); - *clp = ch->ch_client; - - return (0); - } else { - /* - * Consecutive calls to CLNT_CALL() on the same client handle - * get the same transaction ID. We want a new xid per call, - * so we first reinitialise the handle. - */ - (void) clnt_tli_kinit(ch->ch_client, svp->ri_knconf, - &(svp->ri_addr), max_msgsize, retries, cred); - - *clp = ch->ch_client; - return (0); - } - } - - /* create new handle and cache it */ - ch = (struct chtab *)kmem_zalloc(sizeof (*ch), KM_SLEEP); - - if (ch) { - ch->ch_inuse = TRUE; - ch->ch_prog = RDC_PROGRAM; - ch->ch_vers = vers; - ch->ch_dev = svp->ri_knconf->knc_rdev; - ch->ch_protofmly = (char *)kmem_zalloc( - strlen(svp->ri_knconf->knc_protofmly)+1, KM_SLEEP); - if (ch->ch_protofmly) - (void) strcpy(ch->ch_protofmly, - svp->ri_knconf->knc_protofmly); - *plistp = ch; - } - - mutex_exit(&rdc_clnt_lock); - - ret = clnt_tli_kcreate(svp->ri_knconf, &(svp->ri_addr), - RDC_PROGRAM, vers, max_msgsize, retries, cred, clp); - - if (ret != 0) { - if (ch) - ch->ch_inuse = FALSE; - cmn_err(CE_NOTE, "!rdc_call: tli_kcreate failed %d", ret); - return (ret); - } - - *rch = ch; - if (ch) - ch->ch_client = *clp; - - (void) CLNT_CONTROL(*clp, CLSET_PROGRESS, NULL); - - return (ret); -} - - -long rdc_clnt_count = 0; - -/* - * rdc_clnt_call - * Arguments: - * rdc_srv_t *svp - rdc servinfo - * rpcproc_t proc; - rpcid - * rpcvers_t vers; - protocol version - * xdrproc_t xargs;- xdr function - * caddr_t argsp;- args to xdr function - * xdrproc_t xres;- xdr function - * caddr_t resp;- args to xdr function - * struct timeval timeout; - * Performs RPC client call using specific protocol and version - */ - -int -rdc_clnt_call(rdc_srv_t *svp, rpcproc_t proc, rpcvers_t vers, - xdrproc_t xargs, caddr_t argsp, - xdrproc_t xres, caddr_t resp, struct timeval *timeout) -{ - CLIENT *rh = NULL; - int err; - int tries = 0; - struct chtab *ch = NULL; - - err = rdc_clnt_get(svp, vers, &ch, &rh); - if (err || !rh) - return (err); - - do { - DTRACE_PROBE3(rdc_clnt_call_1, - CLIENT *, rh, rpcproc_t, proc, xdrproc_t, xargs); - - err = cl_call_sig(rh, proc, xargs, argsp, xres, resp, *timeout); - - DTRACE_PROBE1(rdc_clnt_call_end, int, err); - - switch (err) { - case RPC_SUCCESS: /* bail now */ - goto done; - case RPC_INTR: /* No recovery from this */ - goto done; - case RPC_PROGVERSMISMATCH: - goto done; - case RPC_TLIERROR: - /* fall thru */ - case RPC_XPRTFAILED: - /* Delay here to err on side of caution */ - /* fall thru */ - case RPC_VERSMISMATCH: - - default: - if (IS_UNRECOVERABLE_RPC(err)) { - goto done; - } - tries++; - /* - * The call is in progress (over COTS) - * Try the CLNT_CALL again, but don't - * print a noisy error message - */ - if (err == RPC_INPROGRESS) - break; - cmn_err(CE_NOTE, "!SNDR client: err %d %s", - err, clnt_sperrno(err)); - } - } while (tries && (tries < 2)); -done: - ++rdc_clnt_count; - rdc_clnt_free(ch, rh); - return (err); -} - - -/* - * Call an rpc from the client side, not caring which protocol is used. - */ -int -rdc_clnt_call_any(rdc_srv_t *svp, rdc_if_t *ip, rpcproc_t proc, - xdrproc_t xargs, caddr_t argsp, - xdrproc_t xres, caddr_t resp, struct timeval *timeout) -{ - rpcvers_t vers; - int rc; - - if (ip != NULL) { - vers = ip->rpc_version; - } else { - vers = RDC_VERS_MAX; - } - - do { - rc = rdc_clnt_call(svp, proc, vers, xargs, argsp, - xres, resp, timeout); - - if (rc == RPC_PROGVERSMISMATCH) { - /* - * Downgrade and try again. - */ - vers--; - } - } while ((vers >= RDC_VERS_MIN) && (rc == RPC_PROGVERSMISMATCH)); - - if ((rc == 0) && (ip != NULL) && (vers != ip->rpc_version)) { - mutex_enter(&rdc_ping_lock); - ip->rpc_version = vers; - mutex_exit(&rdc_ping_lock); - } - - return (rc); -} - -/* - * Call an rpc from the client side, starting with protocol specified - */ -int -rdc_clnt_call_walk(rdc_k_info_t *krdc, rpcproc_t proc, xdrproc_t xargs, - caddr_t argsp, xdrproc_t xres, caddr_t resp, - struct timeval *timeout) -{ - int rc; - rpcvers_t vers; - rdc_srv_t *svp = krdc->lsrv; - rdc_if_t *ip = krdc->intf; - vers = krdc->rpc_version; - - do { - rc = rdc_clnt_call(svp, proc, vers, xargs, argsp, - xres, resp, timeout); - - if (rc == RPC_PROGVERSMISMATCH) { - /* - * Downgrade and try again. - */ - vers--; - } - } while ((vers >= RDC_VERS_MIN) && (rc == RPC_PROGVERSMISMATCH)); - - if ((rc == 0) && (ip != NULL) && (vers != ip->rpc_version)) { - mutex_enter(&rdc_ping_lock); - ip->rpc_version = vers; - mutex_exit(&rdc_ping_lock); - } - - return (rc); -} - -/* - * rdc_clnt_free - * Free a client structure into the cache, or if this was a temporary - * handle allocated above MAXCLIENTS, destroy it. - */ -static void -rdc_clnt_free(struct chtab *ch, CLIENT *clp) -{ - if (ch != NULL) { - /* cached client, just clear inuse flag and return */ - ASSERT(ch->ch_client == clp); - ch->ch_inuse = FALSE; - return; - } - - /* temporary handle allocated above MAXCLIENTS, so destroy it */ - - if (clp->cl_auth) { - AUTH_DESTROY(clp->cl_auth); - clp->cl_auth = 0; - } - - CLNT_DESTROY(clp); -} - - -/* - * _rdc_clnt_destroy - * Free a chain (ch_list or ch_next) of cached clients - */ -static int -_rdc_clnt_destroy(struct chtab **p, const int list) -{ - struct chtab *ch; - int leak = 0; - - if (!p) - return (0); - - while (*p != NULL) { - ch = *p; - - /* - * unlink from the chain - * - this leaks the client if it was inuse - */ - - *p = list ? ch->ch_list : ch->ch_next; - - if (!ch->ch_inuse) { - /* unused client - destroy it */ - - if (ch->ch_client) { - if (ch->ch_client->cl_auth) { - AUTH_DESTROY(ch->ch_client->cl_auth); - ch->ch_client->cl_auth = 0; - } - - CLNT_DESTROY(ch->ch_client); - ch->ch_client = 0; - } - - if (ch->ch_protofmly) - kmem_free(ch->ch_protofmly, - strlen(ch->ch_protofmly)+1); - - kmem_free(ch, sizeof (*ch)); - } else { - /* remember client leak */ - leak++; - } - } - - return (leak); -} - - -/* - * rdc_clnt_destroy - * Free client caching table on unconfigure - */ -void -rdc_clnt_destroy(void) -{ - struct chtab *ch; - int leak = 0; - - mutex_enter(&rdc_clnt_lock); - - /* destroy each ch_list chain */ - - for (ch = rdc_chtable; ch; ch = ch->ch_next) { - leak += _rdc_clnt_destroy(&ch->ch_list, 1); - } - - /* destroy the main ch_next chain */ - leak += _rdc_clnt_destroy(&rdc_chtable, 0); - - if (leak) { - /* we are about to leak clients */ - cmn_err(CE_WARN, - "!rdc_clnt_destroy: leaking %d inuse clients", leak); - } - - mutex_exit(&rdc_clnt_lock); -} - -#ifdef DEBUG -/* - * Function to send an asynchronous net_data6 request - * direct to a server to allow the generation of - * out of order requests for ZatoIchi tests. - */ -int -rdc_async6(void *arg, int mode, int *rvp) -{ - int index; - rdc_async6_t async6; - struct net_data6 data6; - rdc_k_info_t *krdc; - rdc_u_info_t *urdc; - char *data; - int datasz; - char *datap; - int rc; - struct timeval t; - struct netwriteres netret; - int i; - - rc = 0; - *rvp = 0; - /* - * copyin the user's arguments. - */ - if (ddi_copyin(arg, &async6, sizeof (async6), mode) < 0) { - return (EFAULT); - } - - /* - * search by the secondary host and file. - */ - mutex_enter(&rdc_conf_lock); - for (index = 0; index < rdc_max_sets; index++) { - urdc = &rdc_u_info[index]; - krdc = &rdc_k_info[index]; - - if (!IS_CONFIGURED(krdc)) - continue; - if (!IS_ENABLED(urdc)) - continue; - if (!IS_ASYNC(urdc)) - continue; - if (krdc->rpc_version < RDC_VERSION6) - continue; - - if ((strncmp(urdc->secondary.intf, async6.sechost, - MAX_RDC_HOST_SIZE) == 0) && - (strncmp(urdc->secondary.file, async6.secfile, - NSC_MAXPATH) == 0)) { - break; - } - } - mutex_exit(&rdc_conf_lock); - if (index >= rdc_max_sets) { - return (ENOENT); - } - - if (async6.spos != -1) { - if ((async6.spos < async6.pos) || - ((async6.spos + async6.slen) > - (async6.pos + async6.len))) { - cmn_err(CE_WARN, "!Sub task not within range " - "start %d length %d sub start %d sub length %d", - async6.pos, async6.len, async6.spos, async6.slen); - return (EIO); - } - } - - datasz = FBA_SIZE(1); - data = kmem_alloc(datasz, KM_SLEEP); - datap = data; - while (datap < &data[datasz]) { - /* LINTED */ - *datap++ = async6.pat; - } - - /* - * Fill in the net databuffer prior to transmission. - */ - - data6.local_cd = krdc->index; - if (krdc->remote_index == -1) { - cmn_err(CE_WARN, "!Remote index not known"); - kmem_free(data, datasz); - return (EIO); - } else { - data6.cd = krdc->remote_index; - } - data6.pos = async6.pos; - data6.len = async6.len; - data6.flag = 0; - data6.idx = async6.idx; - data6.seq = async6.seq; - - if (async6.spos == -1) { - data6.sfba = async6.pos; - data6.nfba = async6.len; - data6.endoblk = 1; - - } else { - data6.sfba = async6.spos; - data6.nfba = async6.slen; - data6.endoblk = async6.endind; - } - - data6.data.data_len = datasz; - data6.data.data_val = data; - - t.tv_sec = rdc_rpc_tmout; - t.tv_usec = 0; - - netret.vecdata.vecdata_val = NULL; - netret.vecdata.vecdata_len = 0; - - - rc = rdc_clnt_call(krdc->lsrv, RDCPROC_WRITE6, krdc->rpc_version, - xdr_net_data6, (char *)&data6, xdr_netwriteres, (char *)&netret, - &t); - - kmem_free(data, datasz); - if (rc == 0) { - if (netret.result < 0) { - rc = -netret.result; - } - cmn_err(CE_NOTE, "!async6: seq %u result %d index %d " - "pendcnt %d", - netret.seq, netret.result, netret.index, - netret.vecdata.vecdata_len); - for (i = 0; i < netret.vecdata.vecdata_len; i++) { - net_pendvec_t pvec; - bcopy(netret.vecdata.vecdata_val + i, &pvec, - sizeof (net_pendvec_t)); - cmn_err(CE_NOTE, "!Seq %u pos %llu len %llu", - pvec.seq, (unsigned long long)pvec.apos, - (unsigned long long)pvec.alen); - } - if (netret.vecdata.vecdata_val) - kmem_free(netret.vecdata.vecdata_val, - netret.vecdata.vecdata_len * - sizeof (net_pendvec_t)); - } else { - cmn_err(CE_NOTE, "!async6: rpc call failed %d", rc); - } - *rvp = netret.index; - return (rc); -} - -/* - * Function to send an net_read6 request - * direct to a server to allow the generation of - * read requests. - */ -int -rdc_readgen(void *arg, int mode, int *rvp) -{ - int index; - rdc_readgen_t readgen; - rdc_readgen32_t readgen32; - struct rread6 read6; - struct rread read5; - rdc_k_info_t *krdc; - int ret; - struct timeval t; - struct rdcrdresult rr; - int err; - - *rvp = 0; - rr.rr_bufsize = 0; /* rpc data buffer length (bytes) */ - rr.rr_data = NULL; /* rpc data buffer */ - if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) { - if (ddi_copyin(arg, &readgen32, sizeof (readgen32), mode)) { - return (EFAULT); - } - (void) strncpy(readgen.sechost, readgen32.sechost, - MAX_RDC_HOST_SIZE); - (void) strncpy(readgen.secfile, readgen32.secfile, NSC_MAXPATH); - readgen.len = readgen32.len; - readgen.pos = readgen32.pos; - readgen.idx = readgen32.idx; - readgen.flag = readgen32.flag; - readgen.data = (void *)(unsigned long)readgen32.data; - readgen.rpcversion = readgen32.rpcversion; - } else { - if (ddi_copyin(arg, &readgen, sizeof (readgen), mode)) { - return (EFAULT); - } - } - switch (readgen.rpcversion) { - case 5: - case 6: - break; - default: - return (EINVAL); - } - - mutex_enter(&rdc_conf_lock); - index = rdc_lookup_byhostdev(readgen.sechost, readgen.secfile); - if (index >= 0) { - krdc = &rdc_k_info[index]; - } - if (index < 0 || (krdc->type_flag & RDC_DISABLEPEND)) { - mutex_exit(&rdc_conf_lock); - return (ENODEV); - } - /* - * we should really call setbusy here. - */ - mutex_exit(&rdc_conf_lock); - - t.tv_sec = rdc_rpc_tmout; - t.tv_usec = 0; - if (krdc->remote_index == -1) { - cmn_err(CE_WARN, "!Remote index not known"); - ret = EIO; - goto out; - } - if (readgen.rpcversion == 6) { - read6.cd = krdc->remote_index; - read6.len = readgen.len; - read6.pos = readgen.pos; - read6.idx = readgen.idx; - read6.flag = readgen.flag; - } else { - read5.cd = krdc->remote_index; - read5.len = readgen.len; - read5.pos = readgen.pos; - read5.idx = readgen.idx; - read5.flag = readgen.flag; - } - - if (readgen.flag & RDC_RREAD_START) { - if (readgen.rpcversion == 6) { - err = rdc_clnt_call(krdc->lsrv, RDCPROC_READ6, - RDC_VERSION6, xdr_rread6, (char *)&read6, - xdr_int, (char *)&ret, &t); - } else { - err = rdc_clnt_call(krdc->lsrv, RDCPROC_READ5, - RDC_VERSION5, xdr_rread, (char *)&read5, - xdr_int, (char *)&ret, &t); - } - if (err == 0) { - *rvp = ret; - ret = 0; - } else { - ret = EPROTO; - } - } else { - if (readgen.rpcversion == 6) { - err = rdc_clnt_call(krdc->lsrv, RDCPROC_READ6, - RDC_VERSION6, xdr_rread6, (char *)&read6, - xdr_rdresult, (char *)&rr, &t); - } else { - err = rdc_clnt_call(krdc->lsrv, RDCPROC_READ5, - RDC_VERSION5, xdr_rread, (char *)&read5, - xdr_rdresult, (char *)&rr, &t); - } - if (err == 0) { - if (rr.rr_status != RDC_OK) { - ret = EIO; - goto out; - } - *rvp = rr.rr_bufsize; - if (ddi_copyout(rr.rr_data, readgen.data, - rr.rr_bufsize, mode) != 0) { - ret = EFAULT; - goto out; - } - ret = 0; - } else { - ret = EPROTO; - goto out; - } - } -out: - if (rr.rr_data) { - kmem_free(rr.rr_data, rr.rr_bufsize); - } - return (ret); -} - - -#endif diff --git a/usr/src/uts/common/avs/ns/rdc/rdc_clnt.h b/usr/src/uts/common/avs/ns/rdc/rdc_clnt.h deleted file mode 100644 index d58a0bdc8f..0000000000 --- a/usr/src/uts/common/avs/ns/rdc/rdc_clnt.h +++ /dev/null @@ -1,66 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _RDC_CLNT_H -#define _RDC_CLNT_H - -#ifdef __cplusplus -extern "C" { -#endif - -extern kmutex_t rdc_clnt_lock; - -struct chtab { - uint_t ch_timesused; - bool_t ch_inuse; - ulong_t ch_prog; - rpcvers_t ch_vers; - dev_t ch_dev; - char *ch_protofmly; - CLIENT *ch_client; - struct chtab *ch_next; /* chain of different prog/vers/dev/proto */ - struct chtab *ch_list; /* chain of similar clients */ -}; - -#define MAXCLIENTS 64 - -extern int rdc_clnt_call(rdc_srv_t *, rpcproc_t, rpcvers_t, xdrproc_t, - caddr_t, xdrproc_t, caddr_t, struct timeval *); -extern int rdc_clnt_call_any(rdc_srv_t *, rdc_if_t *, rpcproc_t, - xdrproc_t, caddr_t, xdrproc_t, caddr_t, - struct timeval *); -extern int rdc_clnt_call_walk(rdc_k_info_t *, rpcproc_t, xdrproc_t, caddr_t, - xdrproc_t, caddr_t, struct timeval *); - -extern int rdc_rpc_tmout; - -extern int rdc_aio_coalesce(rdc_aio_t *, rdc_aio_t *); - - -#ifdef __cplusplus -} -#endif - -#endif /* _RDC_CLNT_H */ diff --git a/usr/src/uts/common/avs/ns/rdc/rdc_dev.c b/usr/src/uts/common/avs/ns/rdc/rdc_dev.c deleted file mode 100644 index 7957999c59..0000000000 --- a/usr/src/uts/common/avs/ns/rdc/rdc_dev.c +++ /dev/null @@ -1,3019 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - - -#include <sys/types.h> -#include <sys/ksynch.h> -#include <sys/kmem.h> -#include <sys/errno.h> -#include <sys/cmn_err.h> -#include <sys/debug.h> -#include <sys/cred.h> -#include <sys/file.h> -#include <sys/ddi.h> -#include <sys/nsc_thread.h> -#include <sys/unistat/spcs_s.h> -#include <sys/unistat/spcs_errors.h> - -#include <sys/unistat/spcs_s_k.h> -#ifdef DS_DDICT -#include "../contract.h" -#endif - -#include <sys/nsctl/nsctl.h> - -#include <sys/sdt.h> /* dtrace is S10 or later */ - -#include "rdc.h" -#include "rdc_io.h" -#include "rdc_bitmap.h" - -/* - * Remote Dual Copy - * - * This file contains the nsctl io provider functionality for RDC. - * - * RDC is implemented as a simple filter module that pushes itself between - * user (SIMCKD, STE, etc.) and SDBC. - */ - - -static int _rdc_open_count; -int rdc_eio_nobmp = 0; - -nsc_io_t *_rdc_io_hc; -static nsc_io_t *_rdc_io_hr; -static nsc_def_t _rdc_fd_def[], _rdc_io_def[], _rdc_ior_def[]; - -void _rdc_deinit_dev(); -int rdc_diskq_enqueue(rdc_k_info_t *, rdc_aio_t *); -extern void rdc_unintercept_diskq(rdc_group_t *); -rdc_aio_t *rdc_aio_tbuf_get(void *, void *, int, int, int, int, int); - -static nsc_buf_t *_rdc_alloc_handle(void (*)(), void (*)(), - void (*)(), rdc_fd_t *); -static int _rdc_free_handle(rdc_buf_t *, rdc_fd_t *); - -#ifdef DEBUG -int rdc_overlap_cnt; -int rdc_overlap_hnd_cnt; -#endif - -static rdc_info_dev_t *rdc_devices; - -extern int _rdc_rsrv_diskq(rdc_group_t *group); -extern void _rdc_rlse_diskq(rdc_group_t *group); - -/* - * _rdc_init_dev - * Initialise the io provider. - */ - -int -_rdc_init_dev() -{ - _rdc_io_hc = nsc_register_io("rdc-high-cache", - NSC_RDCH_ID|NSC_REFCNT|NSC_FILTER, _rdc_io_def); - if (_rdc_io_hc == NULL) - cmn_err(CE_WARN, "!rdc: nsc_register_io (high, cache) failed."); - - _rdc_io_hr = nsc_register_io("rdc-high-raw", - NSC_RDCHR_ID|NSC_REFCNT|NSC_FILTER, _rdc_ior_def); - if (_rdc_io_hr == NULL) - cmn_err(CE_WARN, "!rdc: nsc_register_io (high, raw) failed."); - - if (!_rdc_io_hc || !_rdc_io_hr) { - _rdc_deinit_dev(); - return (ENOMEM); - } - - return (0); -} - - -/* - * _rdc_deinit_dev - * De-initialise the io provider. - * - */ - -void -_rdc_deinit_dev() -{ - int rc; - - if (_rdc_io_hc) { - if ((rc = nsc_unregister_io(_rdc_io_hc, 0)) != 0) - cmn_err(CE_WARN, - "!rdc: nsc_unregister_io (high, cache) failed: %d", - rc); - } - - if (_rdc_io_hr) { - if ((rc = nsc_unregister_io(_rdc_io_hr, 0)) != 0) - cmn_err(CE_WARN, - "!rdc: nsc_unregister_io (high, raw) failed: %d", - rc); - } -} - - -/* - * rdc_idev_open - * - Open the nsctl file descriptors for the data devices. - * - * Must be called with rdc_conf_lock held. - * id_sets is protected by rdc_conf_lock. - */ -static rdc_info_dev_t * -rdc_idev_open(rdc_k_info_t *krdc, char *pathname, int *rc) -{ - rdc_info_dev_t *dp; - - ASSERT(MUTEX_HELD(&rdc_conf_lock)); - - for (dp = rdc_devices; dp; dp = dp->id_next) { - if (dp->id_cache_dev.bi_fd && - strcmp(pathname, nsc_pathname(dp->id_cache_dev.bi_fd)) == 0) - break; - } - - if (!dp) { - dp = kmem_zalloc(sizeof (*dp), KM_SLEEP); - if (!dp) - return (NULL); - - dp->id_cache_dev.bi_krdc = krdc; - dp->id_cache_dev.bi_fd = nsc_open(pathname, - NSC_RDCHR_ID|NSC_RDWR|NSC_DEVICE, - _rdc_fd_def, (blind_t)&dp->id_cache_dev, rc); - if (!dp->id_cache_dev.bi_fd) { - kmem_free(dp, sizeof (*dp)); - return (NULL); - } - - dp->id_raw_dev.bi_krdc = krdc; - dp->id_raw_dev.bi_fd = nsc_open(pathname, - NSC_RDCHR_ID|NSC_RDWR|NSC_DEVICE, - _rdc_fd_def, (blind_t)&dp->id_raw_dev, rc); - if (!dp->id_raw_dev.bi_fd) { - (void) nsc_close(dp->id_cache_dev.bi_fd); - kmem_free(dp, sizeof (*dp)); - return (NULL); - } - - mutex_init(&dp->id_rlock, NULL, MUTEX_DRIVER, NULL); - cv_init(&dp->id_rcv, NULL, CV_DRIVER, NULL); - - dp->id_next = rdc_devices; - rdc_devices = dp; - } - - dp->id_sets++; - return (dp); -} - - -/* - * rdc_idev_close - * - Close the nsctl file descriptors for the data devices. - * - * Must be called with rdc_conf_lock and dp->id_rlock held. - * Will release dp->id_rlock before returning. - * - * id_sets is protected by rdc_conf_lock. - */ -static void -rdc_idev_close(rdc_k_info_t *krdc, rdc_info_dev_t *dp) -{ - rdc_info_dev_t **dpp; -#ifdef DEBUG - int count = 0; -#endif - - ASSERT(MUTEX_HELD(&rdc_conf_lock)); - ASSERT(MUTEX_HELD(&dp->id_rlock)); - - dp->id_sets--; - if (dp->id_sets > 0) { - mutex_exit(&dp->id_rlock); - return; - } - - /* external references must have gone */ - ASSERT((krdc->c_ref + krdc->r_ref + krdc->b_ref) == 0); - - /* unlink from chain */ - - for (dpp = &rdc_devices; *dpp; dpp = &((*dpp)->id_next)) { - if (*dpp == dp) { - /* unlink */ - *dpp = dp->id_next; - break; - } - } - - /* - * Wait for all reserves to go away - the rpc server is - * running asynchronously with this close, and so we - * have to wait for it to spot that the krdc is !IS_ENABLED() - * and throw away the nsc_buf_t's that it has allocated - * and release the device. - */ - - while (IS_CRSRV(krdc) || IS_RRSRV(krdc)) { -#ifdef DEBUG - if (!(++count % 16)) { - cmn_err(CE_NOTE, - "!_rdc_idev_close(%s): waiting for nsc_release", - rdc_u_info[krdc->index].primary.file); - } - if (count > (16*20)) { - /* waited for 20 seconds - too long - panic */ - cmn_err(CE_PANIC, - "!_rdc_idev_close(%s, %p): lost nsc_release", - rdc_u_info[krdc->index].primary.file, (void *)krdc); - } -#endif - mutex_exit(&dp->id_rlock); - delay(HZ>>4); - mutex_enter(&dp->id_rlock); - } - - if (dp->id_cache_dev.bi_fd) { - (void) nsc_close(dp->id_cache_dev.bi_fd); - dp->id_cache_dev.bi_fd = NULL; - } - - if (dp->id_raw_dev.bi_fd) { - (void) nsc_close(dp->id_raw_dev.bi_fd); - dp->id_raw_dev.bi_fd = NULL; - } - - mutex_exit(&dp->id_rlock); - mutex_destroy(&dp->id_rlock); - cv_destroy(&dp->id_rcv); - - kmem_free(dp, sizeof (*dp)); -} - - -/* - * This function provokes an nsc_reserve() for the device which - * if successful will populate krdc->maxfbas and urdc->volume_size - * via the _rdc_attach_fd() callback. - */ -void -rdc_get_details(rdc_k_info_t *krdc) -{ - int rc; - rdc_u_info_t *urdc = &rdc_u_info[krdc->index]; - nsc_size_t vol_size, maxfbas; - - if (_rdc_rsrv_devs(krdc, RDC_RAW, RDC_INTERNAL) == 0) { - /* - * if the vol is already reserved, - * volume_size won't be populated on enable because - * it is a *fake* reserve and does not make it to - * _rdc_attach_fd(). So do it here. - */ - rc = nsc_partsize(RDC_U_FD(krdc), &vol_size); - if (rc != 0) { -#ifdef DEBUG - cmn_err(CE_WARN, - "!rdc_get_details: partsize failed (%d)", rc); -#endif /* DEBUG */ - urdc->volume_size = vol_size = 0; - } - - urdc->volume_size = vol_size; - rc = nsc_maxfbas(RDC_U_FD(krdc), 0, &maxfbas); - if (rc != 0) { -#ifdef DEBUG - cmn_err(CE_WARN, - "!rdc_get_details: maxfbas failed (%d)", rc); -#endif /* DEBUG */ - maxfbas = 0; - } - krdc->maxfbas = min(RDC_MAX_MAXFBAS, maxfbas); - - _rdc_rlse_devs(krdc, RDC_RAW); - } -} - - -/* - * Should only be used by the config code. - */ - -int -rdc_dev_open(rdc_set_t *rdc_set, int options) -{ - rdc_k_info_t *krdc; - int index; - int rc; - char *pathname; - - ASSERT(MUTEX_HELD(&rdc_conf_lock)); - - if (options & RDC_OPT_PRIMARY) - pathname = rdc_set->primary.file; - else - pathname = rdc_set->secondary.file; - - for (index = 0; index < rdc_max_sets; index++) { - krdc = &rdc_k_info[index]; - - if (!IS_CONFIGURED(krdc)) - break; - } - - if (index == rdc_max_sets) { -#ifdef DEBUG - cmn_err(CE_WARN, "!rdc_dev_open: out of cd\'s"); -#endif - index = -EINVAL; - goto out; - } - - if (krdc->devices && (krdc->c_fd || krdc->r_fd)) { -#ifdef DEBUG - cmn_err(CE_WARN, "!rdc_dev_open: %s already open", pathname); -#endif - index = -EINVAL; - goto out; - } - - _rdc_open_count++; - - krdc->devices = rdc_idev_open(krdc, pathname, &rc); - if (!krdc->devices) { - index = -rc; - goto open_fail; - } - - /* - * Grab the device size and maxfbas now. - */ - - rdc_get_details(krdc); - -out: - return (index); - -open_fail: - _rdc_open_count--; - - return (index); -} - - -void -rdc_dev_close(rdc_k_info_t *krdc) -{ - rdc_u_info_t *urdc = &rdc_u_info[krdc->index]; - - mutex_enter(&rdc_conf_lock); - - if (krdc->devices) - mutex_enter(&krdc->devices->id_rlock); - -#ifdef DEBUG - if (!krdc->devices || !krdc->c_fd || !krdc->r_fd) { - cmn_err(CE_WARN, - "!rdc_dev_close(%p): c_fd %p r_fd %p", (void *)krdc, - (void *) (krdc->devices ? krdc->c_fd : 0), - (void *) (krdc->devices ? krdc->r_fd : 0)); - } -#endif - - if (krdc->devices) { - /* rdc_idev_close will release id_rlock */ - rdc_idev_close(krdc, krdc->devices); - krdc->devices = NULL; - } - - urdc->primary.file[0] = '\0'; - - if (_rdc_open_count <= 0) { - cmn_err(CE_WARN, "!rdc: _rdc_open_count corrupt: %d", - _rdc_open_count); - } - - _rdc_open_count--; - - mutex_exit(&rdc_conf_lock); -} - - -/* - * rdc_intercept - * - * Register for IO on this device with nsctl. - * - * For a 1-to-many primary we register for each krdc and let nsctl sort - * out which it wants to be using. This means that we cannot tell which - * krdc will receive the incoming io from nsctl, though we do know that - * at any one time only one krdc will be 'attached' and so get io from - * nsctl. - * - * So the krdc->many_next pointer is maintained as a circular list. The - * result of these multiple nsc_register_paths is that we will see a - * few more attach and detach io provider calls during enable/resume - * and disable/suspend of the 1-to-many whilst nsctl settles down to - * using a single krdc. - * - * The major advantage of this scheme is that nsctl sorts out all the - * rdc_fd_t's so that they can only point to krdc's that are currently - * active. - */ -int -rdc_intercept(rdc_k_info_t *krdc) -{ - rdc_u_info_t *urdc = &rdc_u_info[krdc->index]; - char *pathname; - char *bitmap; - - if (rdc_get_vflags(urdc) & RDC_PRIMARY) { - pathname = urdc->primary.file; - bitmap = urdc->primary.bitmap; - } else { - pathname = urdc->secondary.file; - bitmap = urdc->secondary.bitmap; - } - - if (!krdc->b_tok) - krdc->b_tok = nsc_register_path(bitmap, NSC_CACHE | NSC_DEVICE, - _rdc_io_hc); - - if (!krdc->c_tok) - krdc->c_tok = nsc_register_path(pathname, NSC_CACHE, - _rdc_io_hc); - - if (!krdc->r_tok) - krdc->r_tok = nsc_register_path(pathname, NSC_DEVICE, - _rdc_io_hr); - - if (!krdc->c_tok || !krdc->r_tok) { - (void) rdc_unintercept(krdc); - return (ENXIO); - } - - return (0); -} - - -static void -wait_unregistering(rdc_k_info_t *krdc) -{ - while (krdc->group->unregistering > 0) - (void) cv_wait_sig(&krdc->group->unregistercv, &rdc_conf_lock); -} - -static void -set_unregistering(rdc_k_info_t *krdc) -{ - wait_unregistering(krdc); - - krdc->group->unregistering++; -} - -static void -wakeup_unregistering(rdc_k_info_t *krdc) -{ - if (krdc->group->unregistering <= 0) - return; - - krdc->group->unregistering--; - cv_broadcast(&krdc->group->unregistercv); -} - - -/* - * rdc_unintercept - * - * Unregister for IO on this device. - * - * See comments above rdc_intercept. - */ -int -rdc_unintercept(rdc_k_info_t *krdc) -{ - int err = 0; - int rc; - rdc_u_info_t *urdc = &rdc_u_info[krdc->index]; - - mutex_enter(&rdc_conf_lock); - set_unregistering(krdc); - krdc->type_flag |= RDC_UNREGISTER; - mutex_exit(&rdc_conf_lock); - - if (krdc->r_tok) { - rc = nsc_unregister_path(krdc->r_tok, 0); - if (rc) { - cmn_err(CE_WARN, "!rdc: unregister rawfd %d", rc); - err = rc; - } - krdc->r_tok = NULL; - } - - if (krdc->c_tok) { - rc = nsc_unregister_path(krdc->c_tok, 0); - if (rc) { - cmn_err(CE_WARN, "!rdc: unregister cachefd %d", rc); - if (!err) - err = rc; - } - krdc->c_tok = NULL; - } - - if (krdc->b_tok) { - rc = nsc_unregister_path(krdc->b_tok, 0); - if (rc) { - cmn_err(CE_WARN, "!rdc: unregister bitmap %d", rc); - err = rc; - } - krdc->b_tok = NULL; - } - - rdc_group_enter(krdc); - - /* Wait for all necessary _rdc_close() calls to complete */ - while ((krdc->c_ref + krdc->r_ref + krdc->b_ref) != 0) { - krdc->closing++; - cv_wait(&krdc->closingcv, &krdc->group->lock); - krdc->closing--; - } - - rdc_clr_flags(urdc, RDC_ENABLED); - rdc_group_exit(krdc); - - - /* - * Check there are no outstanding writes in progress. - * This can happen when a set is being disabled which - * is one of the 'one_to_many' chain, that did not - * intercept the original write call. - */ - - for (;;) { - rdc_group_enter(krdc); - if (krdc->aux_state & RDC_AUXWRITE) { - rdc_group_exit(krdc); - /* - * This doesn't happen very often, - * just delay a bit and re-look. - */ - delay(50); - } else { - rdc_group_exit(krdc); - break; - } - } - - mutex_enter(&rdc_conf_lock); - krdc->type_flag &= ~RDC_UNREGISTER; - wakeup_unregistering(krdc); - mutex_exit(&rdc_conf_lock); - - return (err); -} - - -/* - * _rdc_rlse_d - * Internal version of _rdc_rlse_devs(), only concerned with the - * data device, not the bitmap. - */ - -static void -_rdc_rlse_d(rdc_k_info_t *krdc, int devs) -{ - _rdc_info_dev_t *cip; - _rdc_info_dev_t *rip; - int raw = (devs & RDC_RAW); - - if (!krdc) { - cmn_err(CE_WARN, "!rdc: _rdc_rlse_devs null krdc"); - return; - } - - ASSERT((devs & (~RDC_BMP)) != 0); - - cip = &krdc->devices->id_cache_dev; - rip = &krdc->devices->id_raw_dev; - - if (IS_RSRV(cip)) { - /* decrement count */ - - if (raw) { - if (cip->bi_ofailed > 0) { - cip->bi_ofailed--; - } else if (cip->bi_orsrv > 0) { - cip->bi_orsrv--; - } - } else { - if (cip->bi_failed > 0) { - cip->bi_failed--; - } else if (cip->bi_rsrv > 0) { - cip->bi_rsrv--; - } - } - - /* - * reset nsc_fd ownership back link, it is only set if - * we have really done an underlying reserve, not for - * failed (faked) reserves. - */ - - if (cip->bi_rsrv > 0 || cip->bi_orsrv > 0) { - nsc_set_owner(cip->bi_fd, krdc->iodev); - } else { - nsc_set_owner(cip->bi_fd, NULL); - } - - /* release nsc_fd */ - - if (!IS_RSRV(cip)) { - nsc_release(cip->bi_fd); - } - } else if (IS_RSRV(rip)) { - /* decrement count */ - - if (raw) { - if (rip->bi_failed > 0) { - rip->bi_failed--; - } else if (rip->bi_rsrv > 0) { - rip->bi_rsrv--; - } - } else { - if (rip->bi_ofailed > 0) { - rip->bi_ofailed--; - } else if (rip->bi_orsrv > 0) { - rip->bi_orsrv--; - } - } - - /* - * reset nsc_fd ownership back link, it is only set if - * we have really done an underlying reserve, not for - * failed (faked) reserves. - */ - - if (rip->bi_rsrv > 0 || rip->bi_orsrv > 0) { - nsc_set_owner(rip->bi_fd, krdc->iodev); - } else { - nsc_set_owner(rip->bi_fd, NULL); - } - - /* release nsc_fd and any waiters */ - - if (!IS_RSRV(rip)) { - rip->bi_flag = 0; - nsc_release(rip->bi_fd); - cv_broadcast(&krdc->devices->id_rcv); - } - } else { - cmn_err(CE_WARN, "!rdc: _rdc_rlse_devs no reserve? krdc %p", - (void *) krdc); - } -} - -/* - * _rdc_rlse_devs - * Release named underlying devices and take care of setting the - * back link on the nsc_fd to the correct parent iodev. - * - * NOTE: the 'devs' argument must be the same as that passed to - * the preceding _rdc_rsrv_devs call. - */ - -void -_rdc_rlse_devs(rdc_k_info_t *krdc, int devs) -{ - - DTRACE_PROBE(_rdc_rlse_devs_start); - mutex_enter(&krdc->devices->id_rlock); - - ASSERT(!(devs & RDC_CACHE)); - - if ((devs & (~RDC_BMP)) != 0) { - _rdc_rlse_d(krdc, devs); - } - - if ((devs & RDC_BMP) != 0) { - if (krdc->bmaprsrv > 0 && --krdc->bmaprsrv == 0) { - nsc_release(krdc->bitmapfd); - } - } - - mutex_exit(&krdc->devices->id_rlock); - -} - -/* - * _rdc_rsrv_d - * Reserve device flagged, unless its companion is already reserved, - * in that case increase the reserve on the companion. Take care - * of setting the nsc_fd ownership back link to the correct parent - * iodev pointer. - */ - -static int -_rdc_rsrv_d(int raw, _rdc_info_dev_t *rid, _rdc_info_dev_t *cid, int flag, - rdc_k_info_t *krdc) -{ - _rdc_info_dev_t *p = NULL; - rdc_u_info_t *urdc = &rdc_u_info[krdc->index]; - int other = 0; - int rc; - - -#ifdef DEBUG - if ((rid->bi_rsrv < 0) || - (cid->bi_rsrv < 0) || - (rid->bi_orsrv < 0) || - (cid->bi_orsrv < 0) || - (rid->bi_failed < 0) || - (cid->bi_failed < 0) || - (rid->bi_ofailed < 0) || - (cid->bi_ofailed < 0)) { - cmn_err(CE_WARN, - "!_rdc_rsrv_d: negative counts (rsrv %d %d orsrv %d %d)", - rid->bi_rsrv, cid->bi_rsrv, - rid->bi_orsrv, cid->bi_orsrv); - cmn_err(CE_WARN, - "!_rdc_rsrv_d: negative counts (fail %d %d ofail %d %d)", - rid->bi_failed, cid->bi_failed, - rid->bi_ofailed, cid->bi_ofailed); - cmn_err(CE_PANIC, "_rdc_rsrv_d: negative counts (krdc %p)", - (void *) krdc); - } -#endif - - /* - * If user wants to do a cache reserve and it's already - * raw reserved internally, we need to do a real nsc_reserve, so wait - * until the release has been done. - */ - if (IS_RSRV(rid) && (flag == RDC_EXTERNAL) && - (raw == 0) && (rid->bi_flag != RDC_EXTERNAL)) { - krdc->devices->id_release++; - while (IS_RSRV(rid)) - cv_wait(&krdc->devices->id_rcv, - &krdc->devices->id_rlock); - krdc->devices->id_release--; - } - - /* select underlying device to use */ - - if (IS_RSRV(rid)) { - p = rid; - if (!raw) { - other = 1; - } - } else if (IS_RSRV(cid)) { - p = cid; - if (raw) { - other = 1; - } - } - - /* just increment count and return if already reserved */ - - if (p && !RFAILED(p)) { - if (other) { - p->bi_orsrv++; - } else { - p->bi_rsrv++; - } - - /* set nsc_fd ownership back link */ - nsc_set_owner(p->bi_fd, krdc->iodev); - return (0); - } - - /* attempt reserve */ - - if (!p) { - p = raw ? rid : cid; - } - - if (!p->bi_fd) { - /* rpc server raced with rdc_dev_close() */ - return (EIO); - } - if ((rc = nsc_reserve(p->bi_fd, 0)) == 0) { - /* - * convert failed counts into reserved counts, and add - * in this reserve. - */ - - p->bi_orsrv = p->bi_ofailed; - p->bi_rsrv = p->bi_failed; - - if (other) { - p->bi_orsrv++; - } else { - p->bi_rsrv++; - } - - p->bi_ofailed = 0; - p->bi_failed = 0; - - /* set nsc_fd ownership back link */ - - nsc_set_owner(p->bi_fd, krdc->iodev); - } else if (rc != EINTR) { - /* - * If this is the master, and the secondary is not - * failed, then just fake this external reserve so that - * we can do remote io to the secondary and continue to - * provide service to the client. - * - * Subsequent calls to _rdc_rsrv_d() will re-try the - * nsc_reserve() until it succeeds. - */ - - if ((rdc_get_vflags(urdc) & RDC_PRIMARY) && - !(rdc_get_vflags(urdc) & RDC_LOGGING) && - !((rdc_get_vflags(urdc) & RDC_SLAVE) && - (rdc_get_vflags(urdc) & RDC_SYNCING))) { - if (!(rdc_get_vflags(urdc) & RDC_VOL_FAILED)) { - rdc_many_enter(krdc); - /* Primary, so reverse sync needed */ - rdc_set_mflags(urdc, RDC_RSYNC_NEEDED); - rdc_set_flags_log(urdc, RDC_VOL_FAILED, - "nsc_reserve failed"); - rdc_many_exit(krdc); - rc = -1; -#ifdef DEBUG - cmn_err(CE_NOTE, "!nsc_reserve failed " - "with rc == %d\n", rc); -#endif - } else { - rc = 0; - } - - if (other) { - p->bi_ofailed++; - } else { - p->bi_failed++; - } - - if (krdc->maxfbas == 0) { - /* - * fake a maxfbas value for remote i/o, - * this will get reset when the next - * successful reserve happens as part - * of the rdc_attach_fd() callback. - */ - krdc->maxfbas = 128; - } - } - } - - if (rc == 0 && raw) { - p->bi_flag = flag; - } - - - return (rc); -} - -/* - * _rdc_rsrv_devs - * Reserve named underlying devices. - * - */ - -int -_rdc_rsrv_devs(rdc_k_info_t *krdc, int devs, int flag) -{ - rdc_u_info_t *urdc = &rdc_u_info[krdc->index]; - int write = 0; - int rc = 0; - int got = 0; - - if (!krdc) { - return (EINVAL); - } - - ASSERT(!(devs & RDC_CACHE)); - - mutex_enter(&krdc->devices->id_rlock); - - if ((devs & (~RDC_BMP)) != 0) { - if ((rc = _rdc_rsrv_d((devs & RDC_CACHE) == 0, - &krdc->devices->id_raw_dev, &krdc->devices->id_cache_dev, - flag, krdc)) != 0) { - if (rc == -1) { - /* - * we need to call rdc_write_state() - * after we drop the mutex - */ - write = 1; - rc = 0; - } else { - cmn_err(CE_WARN, - "!rdc: nsc_reserve(%s) failed %d\n", - nsc_pathname(krdc->c_fd), rc); - } - } else { - got |= (devs & (~RDC_BMP)); - } - } - - if (rc == 0 && (devs & RDC_BMP) != 0) { - if (krdc->bitmapfd == NULL) - rc = EIO; - else if ((krdc->bmaprsrv == 0) && - (rc = nsc_reserve(krdc->bitmapfd, 0)) != 0) { - cmn_err(CE_WARN, "!rdc: nsc_reserve(%s) failed %d\n", - nsc_pathname(krdc->bitmapfd), rc); - } else { - krdc->bmaprsrv++; - got |= RDC_BMP; - } - if (!RDC_SUCCESS(rc)) { - /* Undo any previous reserve */ - if (got != 0) - _rdc_rlse_d(krdc, got); - } - } - - mutex_exit(&krdc->devices->id_rlock); - - if (write) { - rdc_write_state(urdc); - } - - return (rc); -} - - -/* - * Read from the remote end, ensuring that if this is a many group in - * slave mode that we only remote read from the secondary with the - * valid data. - */ -int -_rdc_remote_read(rdc_k_info_t *krdc, nsc_buf_t *h, nsc_off_t pos, - nsc_size_t len, int flag) -{ - rdc_u_info_t *urdc = &rdc_u_info[krdc->index]; - rdc_k_info_t *this = krdc; /* krdc that was requested */ - int rc; - - if (flag & NSC_RDAHEAD) { - /* - * no point in doing readahead remotely, - * just say we did it ok - the client is about to - * throw this buffer away as soon as we return. - */ - return (NSC_DONE); - } - - /* - * If this is a many group with a reverse sync in progress and - * this is not the slave krdc/urdc, then search for the slave - * so that we can do the remote io from the correct secondary. - */ - if ((rdc_get_mflags(urdc) & RDC_SLAVE) && - !(rdc_get_vflags(urdc) & RDC_SLAVE)) { - rdc_many_enter(krdc); - for (krdc = krdc->many_next; krdc != this; - krdc = krdc->many_next) { - urdc = &rdc_u_info[krdc->index]; - if (!IS_ENABLED(urdc)) - continue; - if (rdc_get_vflags(urdc) & RDC_SLAVE) - break; - } - rdc_many_exit(krdc); - - this = krdc; - } - -read1: - if (rdc_get_vflags(urdc) & RDC_LOGGING) { - /* cannot do remote io without the remote node! */ - rc = ENETDOWN; - goto read2; - } - - - /* wait for the remote end to have the latest data */ - - if (IS_ASYNC(urdc)) { - while (krdc->group->ra_queue.blocks != 0) { - if (!krdc->group->rdc_writer) - (void) rdc_writer(krdc->index); - - (void) rdc_drain_queue(krdc->index); - } - } - - if (krdc->io_kstats) { - mutex_enter(krdc->io_kstats->ks_lock); - kstat_runq_enter(KSTAT_IO_PTR(krdc->io_kstats)); - mutex_exit(krdc->io_kstats->ks_lock); - } - - rc = rdc_net_read(krdc->index, krdc->remote_index, h, pos, len); - - if (krdc->io_kstats) { - mutex_enter(krdc->io_kstats->ks_lock); - kstat_runq_exit(KSTAT_IO_PTR(krdc->io_kstats)); - mutex_exit(krdc->io_kstats->ks_lock); - } - - /* If read error keep trying every secondary until no more */ -read2: - if (!RDC_SUCCESS(rc) && IS_MANY(krdc) && - !(rdc_get_mflags(urdc) & RDC_SLAVE)) { - rdc_many_enter(krdc); - for (krdc = krdc->many_next; krdc != this; - krdc = krdc->many_next) { - urdc = &rdc_u_info[krdc->index]; - if (!IS_ENABLED(urdc)) - continue; - rdc_many_exit(krdc); - goto read1; - } - rdc_many_exit(krdc); - } - - return (rc); -} - - -/* - * _rdc_alloc_buf - * Allocate a buffer of data - * - * Calling/Exit State: - * Returns NSC_DONE or NSC_HIT for success, NSC_PENDING for async - * I/O, > 0 is an error code. - * - * Description: - */ -int rdcbufs = 0; - -static int -_rdc_alloc_buf(rdc_fd_t *rfd, nsc_off_t pos, nsc_size_t len, int flag, - rdc_buf_t **ptr) -{ - rdc_k_info_t *krdc = rfd->rdc_info; - rdc_u_info_t *urdc = &rdc_u_info[krdc->index]; - nsc_vec_t *vec = NULL; - rdc_buf_t *h; - size_t size; - int ioflag; - int rc = 0; - - if (RDC_IS_BMP(rfd) || RDC_IS_QUE(rfd)) - return (EIO); - - if (len == 0) - return (EINVAL); - - if (flag & NSC_WRBUF) { - - if (!(rdc_get_vflags(urdc) & RDC_PRIMARY) && - !(rdc_get_vflags(urdc) & RDC_LOGGING)) { - /* - * Forbid writes to secondary unless logging. - */ - return (EIO); - } - } - - if (!(rdc_get_vflags(urdc) & RDC_PRIMARY) && - (rdc_get_vflags(urdc) & RDC_SYNC_NEEDED)) { - /* - * Forbid any io to secondary if it needs a sync. - */ - return (EIO); - } - - if ((rdc_get_vflags(urdc) & RDC_PRIMARY) && - (rdc_get_vflags(urdc) & RDC_RSYNC_NEEDED) && - !(rdc_get_vflags(urdc) & RDC_VOL_FAILED) && - !(rdc_get_vflags(urdc) & RDC_SLAVE)) { - /* - * Forbid any io to primary if it needs a reverse sync - * and is not actively syncing. - */ - return (EIO); - } - - /* Bounds checking */ - ASSERT(urdc->volume_size != 0); - if (pos + len > urdc->volume_size) { -#ifdef DEBUG - cmn_err(CE_NOTE, - "!rdc: Attempt to access beyond end of rdc volume"); -#endif - return (EIO); - } - - h = *ptr; - if (h == NULL) { - /* should never happen (nsctl does this for us) */ -#ifdef DEBUG - cmn_err(CE_WARN, "!_rdc_alloc_buf entered without buffer!"); -#endif - h = (rdc_buf_t *)_rdc_alloc_handle(NULL, NULL, NULL, rfd); - if (h == NULL) - return (ENOMEM); - - h->rdc_bufh.sb_flag &= ~NSC_HALLOCATED; - *ptr = h; - } - - if (flag & NSC_NOBLOCK) { - cmn_err(CE_WARN, - "!_rdc_alloc_buf: removing unsupported NSC_NOBLOCK flag"); - flag &= ~(NSC_NOBLOCK); - } - - h->rdc_bufh.sb_error = 0; - h->rdc_bufh.sb_flag |= flag; - h->rdc_bufh.sb_pos = pos; - h->rdc_bufh.sb_len = len; - ioflag = flag; - - bzero(&h->rdc_sync, sizeof (h->rdc_sync)); - mutex_init(&h->rdc_sync.lock, NULL, MUTEX_DRIVER, NULL); - cv_init(&h->rdc_sync.cv, NULL, CV_DRIVER, NULL); - - if (flag & NSC_WRBUF) - _rdc_async_throttle(krdc, len); /* throttle incoming io */ - - /* - * Use remote io when: - * - local volume is failed - * - reserve status is failed - */ - if ((rdc_get_vflags(urdc) & RDC_VOL_FAILED) || IS_RFAILED(krdc)) { - rc = EIO; - } else { - rc = nsc_alloc_buf(RDC_U_FD(krdc), pos, len, - ioflag, &h->rdc_bufp); - if (!RDC_SUCCESS(rc)) { - rdc_many_enter(krdc); - if (rdc_get_vflags(urdc) & RDC_PRIMARY) { - /* Primary, so reverse sync needed */ - rdc_set_mflags(urdc, RDC_RSYNC_NEEDED); - } else { - /* Secondary, so forward sync needed */ - rdc_set_flags(urdc, RDC_SYNC_NEEDED); - } - rdc_set_flags_log(urdc, RDC_VOL_FAILED, - "nsc_alloc_buf failed"); - rdc_many_exit(krdc); - rdc_write_state(urdc); - } - } - - if (RDC_SUCCESS(rc)) { - h->rdc_bufh.sb_vec = h->rdc_bufp->sb_vec; - h->rdc_flags |= RDC_ALLOC; - - /* - * If in slave and reading data, remote read on top of - * the buffer to ensure that we have the latest data. - */ - if ((flag & NSC_READ) && - (rdc_get_vflags(urdc) & RDC_PRIMARY) && - (rdc_get_mflags(urdc) & RDC_SLAVE)) { - rc = _rdc_remote_read(krdc, &h->rdc_bufh, - pos, len, flag); - /* - * Set NSC_MIXED so that the - * cache will throw away this buffer when we free - * it since we have combined data from multiple - * sources into a single buffer. - */ - h->rdc_bufp->sb_flag |= NSC_MIXED; - } - } - - /* - * If nsc_alloc_buf above fails, or local volume is failed or - * bitmap is failed or reserve, then we fill the buf from remote - */ - - if ((!RDC_SUCCESS(rc)) && (rdc_get_vflags(urdc) & RDC_PRIMARY) && - !(rdc_get_vflags(urdc) & RDC_LOGGING)) { - if (flag & NSC_NODATA) { - ASSERT(!(flag & NSC_READ)); - h->rdc_flags |= RDC_REMOTE_BUF; - h->rdc_bufh.sb_vec = NULL; - } else { - size = sizeof (nsc_vec_t) * 2; - h->rdc_vsize = size + FBA_SIZE(len); - vec = kmem_zalloc(h->rdc_vsize, KM_SLEEP); - - if (!vec) { - rc = ENOMEM; - goto error; - } - - /* single flat buffer */ - - vec[0].sv_addr = (uchar_t *)vec + size; - vec[0].sv_len = FBA_SIZE(len); - vec[0].sv_vme = 0; - - /* null terminator */ - - vec[1].sv_addr = NULL; - vec[1].sv_len = 0; - vec[1].sv_vme = 0; - - h->rdc_bufh.sb_vec = vec; - h->rdc_flags |= RDC_REMOTE_BUF; - h->rdc_flags |= RDC_VEC_ALLOC; - } - - if (flag & NSC_READ) { - rc = _rdc_remote_read(krdc, &h->rdc_bufh, - pos, len, flag); - } else { - rc = NSC_DONE; - } - } -error: - if (!RDC_SUCCESS(rc)) { - h->rdc_bufh.sb_error = rc; - } - - return (rc); -} - - -/* - * _rdc_free_buf - */ - -static int -_rdc_free_buf(rdc_buf_t *h) -{ - int rc = 0; - - if (h->rdc_flags & RDC_ALLOC) { - if (h->rdc_bufp) { - rc = nsc_free_buf(h->rdc_bufp); - } - h->rdc_flags &= ~(RDC_ALLOC); - - if (!RDC_SUCCESS(rc)) { -#ifdef DEBUG - cmn_err(CE_WARN, - "!_rdc_free_buf(%p): nsc_free_buf(%p) returned %d", - (void *) h, (void *) h->rdc_bufp, rc); -#endif - return (rc); - } - } - - if (h->rdc_flags & (RDC_REMOTE_BUF|RDC_VEC_ALLOC)) { - if (h->rdc_flags & RDC_VEC_ALLOC) { - kmem_free(h->rdc_bufh.sb_vec, h->rdc_vsize); - } - h->rdc_flags &= ~(RDC_REMOTE_BUF|RDC_VEC_ALLOC); - } - - if (h->rdc_anon) { - /* anon buffers still pending */ - DTRACE_PROBE1(rdc_free_buf_err, aio_buf_t, h->rdc_anon); - } - - if ((h->rdc_bufh.sb_flag & NSC_HALLOCATED) == 0) { - rc = _rdc_free_handle(h, h->rdc_fd); - if (!RDC_SUCCESS(rc)) { -#ifdef DEBUG - cmn_err(CE_WARN, - "!_rdc_free_buf(%p): _rdc_free_handle returned %d", - (void *) h, rc); -#endif - return (rc); - } - } else { - h->rdc_bufh.sb_flag = NSC_HALLOCATED; - h->rdc_bufh.sb_vec = NULL; - h->rdc_bufh.sb_error = 0; - h->rdc_bufh.sb_pos = 0; - h->rdc_bufh.sb_len = 0; - h->rdc_anon = NULL; - h->rdc_vsize = 0; - - cv_destroy(&h->rdc_sync.cv); - mutex_destroy(&h->rdc_sync.lock); - - } - - return (0); -} - - -/* - * _rdc_open - * Open a device - * - * Calling/Exit State: - * Returns a token to identify the device. - * - * Description: - * Performs the housekeeping operations associated with an upper layer - * of the nsctl stack opening a device. - */ - -/* ARGSUSED */ - -static int -_rdc_open(char *path, int flag, blind_t *cdp, nsc_iodev_t *iodev) -{ - rdc_k_info_t *krdc; -#ifdef DEBUG - rdc_u_info_t *urdc; -#endif - rdc_fd_t *rfd; - int raw = ((flag & NSC_CACHE) == 0); - int index; - int bmp = 0; - int queue = 0; - - rfd = kmem_zalloc(sizeof (*rfd), KM_SLEEP); - if (!rfd) - return (ENOMEM); - - /* - * Take config lock to prevent a race with the - * (de)configuration code. - */ - - mutex_enter(&rdc_conf_lock); - - index = rdc_lookup_enabled(path, 0); - if (index < 0) { - index = rdc_lookup_bitmap(path); - if (index >= 0) - bmp = 1; - } - if (index < 0) { - index = rdc_lookup_diskq(path); - if (index >= 0) - queue = 1; - } - if (index < 0) { - /* not found in config */ - mutex_exit(&rdc_conf_lock); - kmem_free(rfd, sizeof (*rfd)); - return (ENXIO); - } -#ifdef DEBUG - urdc = &rdc_u_info[index]; -#endif - krdc = &rdc_k_info[index]; - - mutex_exit(&rdc_conf_lock); - - rdc_group_enter(krdc); - - ASSERT(IS_ENABLED(urdc)); - - if (bmp) { - krdc->b_ref++; - } else if (raw) { - krdc->r_ref++; - } else if (!queue) { - krdc->c_ref++; - } - - rfd->rdc_info = krdc; - if (bmp) - rfd->rdc_type = RDC_BMP; - else if (queue) - rfd->rdc_type = RDC_QUE; - else - rfd->rdc_oflags = flag; - - rdc_group_exit(krdc); - - *cdp = (blind_t)rfd; - - return (0); -} - -static int -_rdc_openc(char *path, int flag, blind_t *cdp, nsc_iodev_t *iodev) -{ - return (_rdc_open(path, NSC_CACHE|flag, cdp, iodev)); -} - -static int -_rdc_openr(char *path, int flag, blind_t *cdp, nsc_iodev_t *iodev) -{ - return (_rdc_open(path, NSC_DEVICE|flag, cdp, iodev)); -} - - -/* - * _rdc_close - * Close a device - * - * Calling/Exit State: - * Always succeeds - returns 0 - * - * Description: - * Performs the housekeeping operations associated with an upper layer - * of the sd stack closing a shadowed device. - */ - -static int -_rdc_close(rfd) -rdc_fd_t *rfd; -{ - rdc_k_info_t *krdc = rfd->rdc_info; - int bmp = RDC_IS_BMP(rfd); - int raw = RDC_IS_RAW(rfd); - int queue = RDC_IS_QUE(rfd); - - /* - * we don't keep ref counts for the queue, so skip this stuff. - * we may not even have a valid krdc at this point - */ - if (queue) - goto queue; - rdc_group_enter(krdc); - - if (bmp) { - krdc->b_ref--; - } else if (raw && !queue) { - krdc->r_ref--; - } else if (!queue) { - krdc->c_ref--; - } - - if (krdc->closing) { - cv_broadcast(&krdc->closingcv); - } - - rdc_group_exit(krdc); -queue: - kmem_free(rfd, sizeof (*rfd)); - return (0); -} - -/* - * _rdc_alloc_handle - * Allocate a handle - * - */ - -static nsc_buf_t * -_rdc_alloc_handle(void (*d_cb)(), void (*r_cb)(), void (*w_cb)(), rdc_fd_t *rfd) -{ - rdc_buf_t *h; - - h = kmem_zalloc(sizeof (*h), KM_SLEEP); - if (!h) - return (NULL); - - h->rdc_bufp = nsc_alloc_handle(RDC_FD(rfd), d_cb, r_cb, w_cb); - if (!h->rdc_bufp) { - if (!IS_RFAILED(rfd->rdc_info)) { - /* - * This is a real failure from the io provider below. - */ - kmem_free(h, sizeof (*h)); - return (NULL); - } else { - /* EMPTY */ - /* - * This is just a failed primary device where - * we can do remote io to the secondary. - */ - } - } - - h->rdc_bufh.sb_flag = NSC_HALLOCATED; - h->rdc_fd = rfd; - mutex_init(&h->aio_lock, NULL, MUTEX_DRIVER, NULL); - - return (&h->rdc_bufh); -} - - -/* - * _rdc_free_handle - * Free a handle - * - */ - -/* ARGSUSED */ -static int -_rdc_free_handle(rdc_buf_t *h, rdc_fd_t *rfd) -{ - int rc; - - mutex_destroy(&h->aio_lock); - if (h->rdc_bufp) { - rc = nsc_free_handle(h->rdc_bufp); - if (!RDC_SUCCESS(rc)) - return (rc); - } - kmem_free(h, sizeof (rdc_buf_t)); - return (0); -} - - -/* - * _rdc_attach - * Attach - * - * Calling/Exit State: - * Returns 0 for success, errno on failure. - * - * Description: - */ - -static int -_rdc_attach(rdc_fd_t *rfd, nsc_iodev_t *iodev) -{ - rdc_k_info_t *krdc; - int raw = RDC_IS_RAW(rfd); - int rc; - - if ((RDC_IS_BMP(rfd)) || RDC_IS_QUE(rfd)) - return (EINVAL); - - krdc = rfd->rdc_info; - if (krdc == NULL) - return (EINVAL); - - mutex_enter(&krdc->devices->id_rlock); - krdc->iodev = iodev; - mutex_exit(&krdc->devices->id_rlock); - - rc = _rdc_rsrv_devs(krdc, (raw ? RDC_RAW : RDC_CACHE), RDC_EXTERNAL); - return (rc); -} - - -/* - * _rdc_detach - * Detach - * - * Calling/Exit State: - * Returns 0 for success, always succeeds - * - * Description: - */ - -static int -_rdc_detach(rdc_fd_t *rfd, nsc_iodev_t *iodev) -{ - rdc_k_info_t *krdc = rfd->rdc_info; - int raw = RDC_IS_RAW(rfd); - - /* - * Flush the async queue if necessary. - */ - - if (IS_ASYNC(&rdc_u_info[krdc->index]) && !RDC_IS_DISKQ(krdc->group)) { - int tries = 1; - - while (krdc->group->ra_queue.blocks != 0 && tries--) { - if (!krdc->group->rdc_writer) - (void) rdc_writer(krdc->index); - - (void) rdc_drain_queue(krdc->index); - } - - /* force disgard of possibly blocked flusher threads */ - if (rdc_drain_queue(krdc->index) != 0) { -#ifdef DEBUG - net_queue *qp = &krdc->group->ra_queue; -#endif - do { - mutex_enter(&krdc->group->ra_queue.net_qlock); - krdc->group->asyncdis = 1; - cv_broadcast(&krdc->group->asyncqcv); - mutex_exit(&krdc->group->ra_queue.net_qlock); - cmn_err(CE_WARN, - "!RDC: async I/O pending and not drained " - "for %s during detach", - rdc_u_info[krdc->index].primary.file); -#ifdef DEBUG - cmn_err(CE_WARN, - "!nitems: %" NSC_SZFMT " nblocks: %" - NSC_SZFMT " head: 0x%p tail: 0x%p", - qp->nitems, qp->blocks, - (void *)qp->net_qhead, - (void *)qp->net_qtail); -#endif - } while (krdc->group->rdc_thrnum > 0); - } - } - - mutex_enter(&krdc->devices->id_rlock); - if (krdc->iodev != iodev) - cmn_err(CE_WARN, "!_rdc_detach: iodev mismatch %p : %p", - (void *) krdc->iodev, (void *) iodev); - - krdc->iodev = NULL; - mutex_exit(&krdc->devices->id_rlock); - - _rdc_rlse_devs(krdc, (raw ? RDC_RAW : RDC_CACHE)); - - return (0); -} - -/* - * _rdc_get_pinned - * - * only affects local node. - */ - -static int -_rdc_get_pinned(rdc_fd_t *rfd) -{ - return (nsc_get_pinned(RDC_FD(rfd))); -} - -/* - * _rdc_discard_pinned - * - * only affects local node. - */ - -static int -_rdc_discard_pinned(rdc_fd_t *rfd, nsc_off_t pos, nsc_size_t len) -{ - return (nsc_discard_pinned(RDC_FD(rfd), pos, len)); -} - -/* - * _rdc_partsize - * - * only affects the local node. - */ - -static int -_rdc_partsize(rdc_fd_t *rfd, nsc_size_t *ptr) -{ - rdc_u_info_t *urdc; - - urdc = &rdc_u_info[rfd->rdc_info->index]; - /* Always return saved size */ - ASSERT(urdc->volume_size != 0); - *ptr = urdc->volume_size; - return (0); -} - -/* - * _rdc_maxfbas - * - * only affects local node - */ - -/* ARGSUSED */ -static int -_rdc_maxfbas(rdc_fd_t *rfd, int flag, nsc_size_t *ptr) -{ - rdc_k_info_t *krdc = rfd->rdc_info; - int raw = RDC_IS_RAW(rfd); - int rtype = raw ? RDC_RAW : RDC_CACHE; - int rc = 0; - - if (krdc == NULL) - return (EINVAL); - if (flag == NSC_RDAHEAD || flag == NSC_CACHEBLK) { - rc = _rdc_rsrv_devs(krdc, rtype, RDC_INTERNAL); - if (rc == 0) { - rc = nsc_maxfbas(RDC_U_FD(krdc), flag, ptr); - _rdc_rlse_devs(krdc, rtype); - } - } else { - /* Always return saved size */ - ASSERT(krdc->maxfbas != 0); - *ptr = krdc->maxfbas - 1; - } - - return (rc); -} - -/* ARGSUSED */ -static int -_rdc_control(rdc_fd_t *rfd, int cmd, void *ptr, int len) -{ - return (nsc_control(RDC_FD(rfd), cmd, ptr, len)); -} - -/* - * _rdc_attach_fd - * - * called by nsctl as part of nsc_reserve() processing when one of - * SNDR's underlying file descriptors becomes available and metadata - * should be re-acquired. - */ -static int -_rdc_attach_fd(blind_t arg) -{ - _rdc_info_dev_t *dip = (_rdc_info_dev_t *)arg; - rdc_k_info_t *krdc; - rdc_u_info_t *urdc; - nsc_size_t maxfbas, partsize; - int rc; - - krdc = dip->bi_krdc; - urdc = &rdc_u_info[krdc->index]; - - if ((rc = nsc_partsize(dip->bi_fd, &partsize)) != 0) { - cmn_err(CE_WARN, - "!SNDR: cannot get volume size of %s, error %d", - nsc_pathname(dip->bi_fd), rc); - } else if (urdc->volume_size == 0 && partsize > 0) { - /* set volume size for the first time */ - urdc->volume_size = partsize; - } else if (urdc->volume_size != partsize) { - /* - * SNDR cannot yet cope with a volume being resized, - * so fail it. - */ - if (!(rdc_get_vflags(urdc) & RDC_VOL_FAILED)) { - rdc_many_enter(krdc); - if (rdc_get_vflags(urdc) & RDC_PRIMARY) - rdc_set_mflags(urdc, RDC_RSYNC_NEEDED); - else - rdc_set_mflags(urdc, RDC_SYNC_NEEDED); - rdc_set_flags_log(urdc, RDC_VOL_FAILED, - "volume resized"); - rdc_many_exit(krdc); - rdc_write_state(urdc); - } - - cmn_err(CE_WARN, - "!SNDR: %s changed size from %" NSC_SZFMT " to %" NSC_SZFMT, - nsc_pathname(dip->bi_fd), urdc->volume_size, partsize); - } - - if ((rc = nsc_maxfbas(dip->bi_fd, 0, &maxfbas)) != 0) { - cmn_err(CE_WARN, - "!SNDR: cannot get max transfer size for %s, error %d", - nsc_pathname(dip->bi_fd), rc); - } else if (maxfbas > 0) { - krdc->maxfbas = min(RDC_MAX_MAXFBAS, maxfbas); - } - - return (0); -} - - -/* - * _rdc_pinned - * - * only affects local node - */ - -static void -_rdc_pinned(_rdc_info_dev_t *dip, nsc_off_t pos, nsc_size_t len) -{ - nsc_pinned_data(dip->bi_krdc->iodev, pos, len); -} - - -/* - * _rdc_unpinned - * - * only affects local node. - */ - -static void -_rdc_unpinned(_rdc_info_dev_t *dip, nsc_off_t pos, nsc_size_t len) -{ - nsc_unpinned_data(dip->bi_krdc->iodev, pos, len); -} - - -/* - * _rdc_read - * - * read the specified data into the buffer - go remote if local down, - * or the remote end has more recent data because an reverse sync is - * in progress. - */ - -static int -_rdc_read(rdc_buf_t *h, nsc_off_t pos, nsc_size_t len, int flag) -{ - rdc_k_info_t *krdc = h->rdc_fd->rdc_info; - rdc_u_info_t *urdc = &rdc_u_info[krdc->index]; - int remote = (RDC_REMOTE(h) || (rdc_get_mflags(urdc) & RDC_SLAVE)); - int rc1, rc2; - - rc1 = rc2 = 0; - - if (!RDC_HANDLE_LIMITS(&h->rdc_bufh, pos, len)) { - cmn_err(CE_WARN, - "!_rdc_read: bounds check: io(handle) pos %" NSC_XSZFMT - "(%" NSC_XSZFMT ") len %" NSC_XSZFMT "(%" NSC_XSZFMT ")", - pos, h->rdc_bufh.sb_pos, len, h->rdc_bufh.sb_len); - h->rdc_bufh.sb_error = EINVAL; - return (h->rdc_bufh.sb_error); - } - - if (flag & NSC_NOBLOCK) { - cmn_err(CE_WARN, - "!_rdc_read: removing unsupported NSC_NOBLOCK flag"); - flag &= ~(NSC_NOBLOCK); - } - - - if (!remote) { - rc1 = nsc_read(h->rdc_bufp, pos, len, flag); - } - - if (remote || !RDC_SUCCESS(rc1)) { - rc2 = _rdc_remote_read(krdc, &h->rdc_bufh, pos, len, flag); - } - - if (remote && !RDC_SUCCESS(rc2)) - h->rdc_bufh.sb_error = rc2; - else if (!RDC_SUCCESS(rc1) && !RDC_SUCCESS(rc2)) - h->rdc_bufh.sb_error = rc1; - - return (h->rdc_bufh.sb_error); -} - - -static int -_rdc_remote_write(rdc_k_info_t *krdc, rdc_buf_t *h, nsc_buf_t *nsc_h, - nsc_off_t pos, nsc_size_t len, int flag, uint_t bitmask) -{ - rdc_u_info_t *urdc = &rdc_u_info[krdc->index]; - int rc = 0; - nsc_size_t plen, syncblockpos; - aio_buf_t *anon = NULL; - - if (!(rdc_get_vflags(urdc) & RDC_PRIMARY)) - return (EINVAL); - - if ((rdc_get_vflags(urdc) & RDC_LOGGING) && - (!IS_STATE(urdc, RDC_QUEUING))) { - goto done; - } - - /* - * this check for RDC_SYNCING may seem redundant, but there is a window - * in rdc_sync, where an async set has not yet been transformed into a - * sync set. - */ - if ((!IS_ASYNC(urdc) || IS_STATE(urdc, RDC_SYNCING)) || - RDC_REMOTE(h) || - krdc->group->synccount > 0 || - (rdc_get_vflags(urdc) & RDC_SLAVE) || - (rdc_get_vflags(urdc) & RDC_VOL_FAILED) || - (rdc_get_vflags(urdc) & RDC_BMP_FAILED)) { - - /* sync mode, or remote io mode, or local device is dead */ - rc = rdc_net_write(krdc->index, krdc->remote_index, - nsc_h, pos, len, RDC_NOSEQ, RDC_NOQUE, NULL); - - if ((rc == 0) && - !(rdc_get_vflags(urdc) & RDC_BMP_FAILED) && - !(rdc_get_vflags(urdc) & RDC_VOL_FAILED)) { - if (IS_STATE(urdc, RDC_SYNCING) && - !IS_STATE(urdc, RDC_FULL) || - !IS_STATE(urdc, RDC_SLAVE)) { - mutex_enter(&krdc->syncbitmutex); - - syncblockpos = LOG_TO_FBA_NUM(krdc->syncbitpos); - - DTRACE_PROBE4(rdc_remote_write, - nsc_off_t, krdc->syncbitpos, - nsc_off_t, syncblockpos, - nsc_off_t, pos, - nsc_size_t, len); - - /* - * If the current I/O's position plus length is - * greater then the sync block position, only - * clear those blocks upto sync block position - */ - if (pos < syncblockpos) { - if ((pos + len) > syncblockpos) - plen = syncblockpos - pos; - else - plen = len; - RDC_CLR_BITMAP(krdc, pos, plen, bitmask, - RDC_BIT_BUMP); - } - mutex_exit(&krdc->syncbitmutex); - } else { - RDC_CLR_BITMAP(krdc, pos, len, bitmask, - RDC_BIT_BUMP); - } - } else if (rc != 0) { - rdc_group_enter(krdc); - rdc_set_flags_log(urdc, RDC_LOGGING, - "net write failed"); - rdc_write_state(urdc); - if (rdc_get_vflags(urdc) & RDC_SYNCING) - krdc->disk_status = 1; - rdc_group_exit(krdc); - } - } else if (!IS_STATE(urdc, RDC_SYNCING)) { - DTRACE_PROBE1(async_enque_start, rdc_buf_t *, h); - - ASSERT(krdc->group->synccount == 0); - /* async mode */ - if ((h == NULL) || ((h->rdc_flags & RDC_ASYNC_VEC) == 0)) { - - rc = _rdc_enqueue_write(krdc, pos, len, flag, NULL); - - } else { - anon = rdc_aio_buf_get(h, krdc->index); - if (anon == NULL) { -#ifdef DEBUG - cmn_err(CE_WARN, - "!enqueue write failed for handle %p", - (void *) h); -#endif - return (EINVAL); - } - rc = _rdc_enqueue_write(krdc, pos, len, flag, - anon->rdc_abufp); - - /* - * get rid of the aio_buf_t now, as this - * may not be the set that this rdc_buf - * was allocated on, we are done with it anyways - * enqueuing code frees the nsc_abuf - */ - rdc_aio_buf_del(h, krdc); - } - - } else { - ASSERT(IS_STATE(urdc, RDC_SYNCING)); - ASSERT(0); - } - -done: - if ((anon == NULL) && h && (h->rdc_flags & RDC_ASYNC_VEC)) { - /* - * Toss the anonymous buffer if we have one allocated. - */ - anon = rdc_aio_buf_get(h, krdc->index); - if (anon) { - (void) nsc_free_buf(anon->rdc_abufp); - rdc_aio_buf_del(h, krdc); - } - } - - return (rc); -} - -/* - * _rdc_multi_write - * - * Send to multihop remote. Obeys 1 to many if present and we are crazy - * enough to support it. - * - */ -int -_rdc_multi_write(nsc_buf_t *h, nsc_off_t pos, nsc_size_t len, int flag, - rdc_k_info_t *krdc) -{ - rdc_u_info_t *urdc = &rdc_u_info[krdc->index]; - rdc_k_info_t *this = krdc; /* krdc that was requested */ - int rc, retval; - uint_t bitmask; - - retval = rc = 0; - if (!RDC_HANDLE_LIMITS(h, pos, len)) { - cmn_err(CE_WARN, - "!_rdc_multi_write: bounds check: io(handle) pos %" - NSC_XSZFMT "(%" NSC_XSZFMT ") len %" NSC_XSZFMT "(%" - NSC_XSZFMT ")", pos, h->sb_pos, len, h->sb_len); - return (EINVAL); - } - - /* if this is a 1 to many, set all the bits for all the sets */ - do { - if (RDC_SET_BITMAP(krdc, pos, len, &bitmask) < 0) { - (void) nsc_uncommit(h, pos, len, flag); - /* set the error, but try other sets */ - retval = EIO; - } - if (IS_MANY(krdc) && IS_STATE(urdc, RDC_PRIMARY)) { - rdc_many_enter(krdc); - for (krdc = krdc->many_next; krdc != this; - krdc = krdc->many_next) { - urdc = &rdc_u_info[krdc->index]; - if (!IS_ENABLED(urdc)) - continue; - break; - } - rdc_many_exit(krdc); - } - } while (krdc != this); - - urdc = &rdc_u_info[krdc->index]; - - if (flag & NSC_NOBLOCK) { - cmn_err(CE_WARN, - "!_rdc_multi_write: removing unsupported NSC_NOBLOCK flag"); - flag &= ~(NSC_NOBLOCK); - } - -multiwrite1: - if ((rdc_get_vflags(urdc) & RDC_PRIMARY) && - (!IS_STATE(urdc, RDC_LOGGING) || - (IS_STATE(urdc, RDC_LOGGING) && - IS_STATE(urdc, RDC_QUEUING)))) { - rc = _rdc_remote_write(krdc, NULL, h, pos, len, flag, bitmask); - } - - if (!RDC_SUCCESS(rc) && retval == 0) { - retval = rc; - } - -multiwrite2: - if (IS_MANY(krdc) && (rdc_get_vflags(urdc) && RDC_PRIMARY)) { - rdc_many_enter(krdc); - for (krdc = krdc->many_next; krdc != this; - krdc = krdc->many_next) { - urdc = &rdc_u_info[krdc->index]; - if (!IS_ENABLED(urdc)) - continue; - rc = 0; - rdc_many_exit(krdc); - - goto multiwrite1; - } - rdc_many_exit(krdc); - } - - return (retval); -} - -void -_rdc_diskq_enqueue_thr(rdc_aio_t *p) -{ - rdc_thrsync_t *sync = (rdc_thrsync_t *)p->next; - rdc_k_info_t *krdc = &rdc_k_info[p->index]; - int rc2; - - - rc2 = rdc_diskq_enqueue(krdc, p); - - /* - * overload flag with error return if any - */ - if (!RDC_SUCCESS(rc2)) { - p->flag = rc2; - } else { - p->flag = 0; - } - mutex_enter(&sync->lock); - sync->complete++; - cv_broadcast(&sync->cv); - mutex_exit(&sync->lock); -} - -/* - * _rdc_sync_write_thr - * syncronous write thread which writes to network while - * local write is occuring - */ -void -_rdc_sync_write_thr(rdc_aio_t *p) -{ - rdc_thrsync_t *sync = (rdc_thrsync_t *)p->next; - rdc_buf_t *h = (rdc_buf_t *)p->handle; - rdc_k_info_t *krdc = &rdc_k_info[p->index]; -#ifdef DEBUG - rdc_u_info_t *urdc; -#endif - int rc2; - int bitmask; - - rdc_group_enter(krdc); - krdc->aux_state |= RDC_AUXWRITE; -#ifdef DEBUG - urdc = &rdc_u_info[krdc->index]; - if (!IS_ENABLED(urdc)) { - cmn_err(CE_WARN, "!rdc_sync_write_thr: set not enabled %s:%s", - urdc->secondary.file, - urdc->secondary.bitmap); - } -#endif - rdc_group_exit(krdc); - bitmask = p->iostatus; /* overload */ - rc2 = _rdc_remote_write(krdc, h, &h->rdc_bufh, p->pos, p->len, - p->flag, bitmask); - - - /* - * overload flag with error return if any - */ - if (!RDC_SUCCESS(rc2)) { - p->flag = rc2; - } else { - p->flag = 0; - } - - rdc_group_enter(krdc); - krdc->aux_state &= ~RDC_AUXWRITE; - rdc_group_exit(krdc); - - mutex_enter(&sync->lock); - sync->complete++; - cv_broadcast(&sync->cv); - mutex_exit(&sync->lock); -} - -/* - * _rdc_write - * - * Commit changes to the buffer locally and send remote. - * - * If this write is whilst the local primary volume is being synced, - * then we write the remote end first to ensure that the new data - * cannot be overwritten by a concurrent sync operation. - */ - -static int -_rdc_write(rdc_buf_t *h, nsc_off_t pos, nsc_size_t len, int flag) -{ - rdc_k_info_t *krdc = h->rdc_fd->rdc_info; - rdc_u_info_t *urdc = &rdc_u_info[krdc->index]; - rdc_k_info_t *this; - rdc_k_info_t *multi = NULL; - int remote = RDC_REMOTE(h); - int rc1, rc2; - uint_t bitmask; - int first; - int rsync; - int nthr; - int winddown; - int thrrc = 0; - rdc_aio_t *bp[SNDR_MAXTHREADS]; - aio_buf_t *anon; - nsthread_t *tp; - rdc_thrsync_t *sync = &h->rdc_sync; - - /* If this is the multi-hop secondary, move along to the primary */ - if (IS_MULTI(krdc) && !IS_PRIMARY(urdc)) { - multi = krdc; - krdc = krdc->multi_next; - urdc = &rdc_u_info[krdc->index]; - - if (!IS_ENABLED(urdc)) { - krdc = h->rdc_fd->rdc_info; - urdc = &rdc_u_info[krdc->index]; - multi = NULL; - } - } - this = krdc; - - rsync = (IS_PRIMARY(urdc)) && (IS_SLAVE(urdc)); - - /* - * If this is a many group with a reverse sync in progress and - * this is not the slave krdc/urdc, then search for the slave - * so that we can do the remote io to the correct secondary - * before the local io. - */ - if (rsync && !(IS_SLAVE(urdc))) { - rdc_many_enter(krdc); - for (krdc = krdc->many_next; krdc != this; - krdc = krdc->many_next) { - urdc = &rdc_u_info[krdc->index]; - if (!IS_ENABLED(urdc)) - continue; - if (rdc_get_vflags(urdc) & RDC_SLAVE) - break; - } - rdc_many_exit(krdc); - - this = krdc; - } - - urdc = &rdc_u_info[krdc->index]; - - rc1 = rc2 = 0; - first = 1; - nthr = 0; - if (!RDC_HANDLE_LIMITS(&h->rdc_bufh, pos, len)) { - cmn_err(CE_WARN, - "!_rdc_write: bounds check: io(handle) pos %" NSC_XSZFMT - "(%" NSC_XSZFMT ") len %" NSC_XSZFMT "(%" NSC_XSZFMT ")", - pos, h->rdc_bufh.sb_pos, len, h->rdc_bufh.sb_len); - h->rdc_bufh.sb_error = EINVAL; - return (h->rdc_bufh.sb_error); - } - - DTRACE_PROBE(rdc_write_bitmap_start); - - /* if this is a 1 to many, set all the bits for all the sets */ - do { - if (RDC_SET_BITMAP(krdc, pos, len, &bitmask) < 0) { - if (rdc_eio_nobmp) { - (void) nsc_uncommit - (h->rdc_bufp, pos, len, flag); - /* set the error, but try the other sets */ - h->rdc_bufh.sb_error = EIO; - } - } - - if (IS_MANY(krdc) && IS_STATE(urdc, RDC_PRIMARY)) { - rdc_many_enter(krdc); - for (krdc = krdc->many_next; krdc != this; - krdc = krdc->many_next) { - urdc = &rdc_u_info[krdc->index]; - if (!IS_ENABLED(urdc)) - continue; - break; - } - rdc_many_exit(krdc); - } - - } while (krdc != this); - - urdc = &rdc_u_info[krdc->index]; - - DTRACE_PROBE(rdc_write_bitmap_end); - -write1: - /* just in case we switch mode during write */ - if (IS_ASYNC(urdc) && (!IS_STATE(urdc, RDC_SYNCING)) && - (!IS_STATE(urdc, RDC_LOGGING) || - IS_STATE(urdc, RDC_QUEUING))) { - h->rdc_flags |= RDC_ASYNC_BUF; - } - if (BUF_IS_ASYNC(h)) { - /* - * We are async mode - */ - aio_buf_t *p; - DTRACE_PROBE(rdc_write_async_start); - - if ((krdc->type_flag & RDC_DISABLEPEND) || - ((IS_STATE(urdc, RDC_LOGGING) && - !IS_STATE(urdc, RDC_QUEUING)))) { - goto localwrite; - } - if (IS_STATE(urdc, RDC_VOL_FAILED)) { - /* - * overload remote as we don't want to do local - * IO later. forge ahead with async - */ - remote++; - } - if ((IS_STATE(urdc, RDC_SYNCING)) || - (IS_STATE(urdc, RDC_LOGGING) && - !IS_STATE(urdc, RDC_QUEUING))) { - goto localwrite; - } - - p = rdc_aio_buf_add(krdc->index, h); - if (p == NULL) { -#ifdef DEBUG - cmn_err(CE_WARN, - "!rdc_alloc_buf aio_buf allocation failed"); -#endif - goto localwrite; - } - - mutex_enter(&h->aio_lock); - - DTRACE_PROBE(rdc_write_async__allocabuf_start); - rc1 = nsc_alloc_abuf(pos, len, 0, &p->rdc_abufp); - DTRACE_PROBE(rdc_write_async__allocabuf_end); - if (!RDC_SUCCESS(rc1)) { -#ifdef DEBUG - cmn_err(CE_WARN, - "!rdc_alloc_buf NSC_ANON allocation failed rc %d", - rc1); -#endif - mutex_exit(&h->aio_lock); - goto localwrite; - } - h->rdc_flags |= RDC_ASYNC_VEC; - mutex_exit(&h->aio_lock); - - /* - * Copy buffer into anonymous buffer - */ - - DTRACE_PROBE(rdc_write_async_nsccopy_start); - rc1 = - nsc_copy(&h->rdc_bufh, p->rdc_abufp, pos, pos, len); - DTRACE_PROBE(rdc_write_async_nsccopy_end); - if (!RDC_SUCCESS(rc1)) { -#ifdef DEBUG - cmn_err(CE_WARN, - "!_rdc_write: nsc_copy failed rc=%d state %x", - rc1, rdc_get_vflags(urdc)); -#endif - rc1 = nsc_free_buf(p->rdc_abufp); - rdc_aio_buf_del(h, krdc); - rdc_group_enter(krdc); - rdc_group_log(krdc, RDC_FLUSH|RDC_OTHERREMOTE, - "nsc_copy failure"); - rdc_group_exit(krdc); - } - DTRACE_PROBE(rdc_write_async_end); - - /* - * using a diskq, launch a thread to queue it - * and free the aio->h and aio - * if the thread fails, do it the old way (see localwrite) - */ - - if (RDC_IS_DISKQ(krdc->group)) { - - if (nthr >= SNDR_MAXTHREADS) { -#ifdef DEBUG - cmn_err(CE_NOTE, "!nthr overrun in _rdc_write"); -#endif - thrrc = ENOEXEC; - goto localwrite; - } - - anon = rdc_aio_buf_get(h, krdc->index); - if (anon == NULL) { -#ifdef DEBUG - cmn_err(CE_WARN, "!rdc_aio_buf_get failed for " - "%p", (void *)h); -#endif - thrrc = ENOEXEC; - goto localwrite; - } - - /* get a populated rdc_aio_t */ - bp[nthr] = - rdc_aio_tbuf_get(sync, anon->rdc_abufp, pos, len, - flag, krdc->index, bitmask); - - if (bp[nthr] == NULL) { -#ifdef DEBUG - cmn_err(CE_NOTE, "!_rdcwrite: " - "kmem_alloc failed bp aio (1)"); -#endif - thrrc = ENOEXEC; - goto localwrite; - } - /* start the queue io */ - tp = nst_create(_rdc_ioset, _rdc_diskq_enqueue_thr, - (void *)bp[nthr], NST_SLEEP); - - if (tp == NULL) { -#ifdef DEBUG - cmn_err(CE_NOTE, - "!_rdcwrite: nst_create failure"); -#endif - thrrc = ENOEXEC; - } else { - mutex_enter(&(sync->lock)); - sync->threads++; - mutex_exit(&(sync->lock)); - nthr++; - - } - /* - * the handle that is to be enqueued is now in - * the rdc_aio_t, and will be freed there. - * dump the aio_t now. If this is 1 to many - * we may not do this in _rdc_free_buf() - * if this was not the index that the rdc_buf_t - * was allocated on. - */ - rdc_aio_buf_del(h, krdc); - - } - } /* end of async */ - - /* - * We try to overlap local and network IO for the sync case - * (we already do it for async) - * If one to many, we need to track the resulting nst_thread - * so we don't trash the nsc_buf on a free - * Start network IO first then do local (sync only) - */ - - if (IS_PRIMARY(urdc) && !IS_STATE(urdc, RDC_LOGGING) && - !BUF_IS_ASYNC(h)) { - /* - * if forward syncing, we must do local IO first - * then remote io. Don't spawn thread - */ - if (!rsync && (IS_STATE(urdc, RDC_SYNCING))) { - thrrc = ENOEXEC; - goto localwrite; - } - if (IS_MULTI(krdc)) { - rdc_k_info_t *ktmp; - rdc_u_info_t *utmp; - - ktmp = krdc->multi_next; - utmp = &rdc_u_info[ktmp->index]; - if (IS_ENABLED(utmp)) - multi = ktmp; - } - if (nthr >= SNDR_MAXTHREADS) { -#ifdef DEBUG - cmn_err(CE_NOTE, "!nthr overrun in _rdc_write"); -#endif - thrrc = ENOEXEC; - goto localwrite; - } - - bp[nthr] = rdc_aio_tbuf_get(sync, h, pos, len, - flag, krdc->index, bitmask); - - if (bp[nthr] == NULL) { - thrrc = ENOEXEC; - goto localwrite; - } - tp = nst_create(_rdc_ioset, _rdc_sync_write_thr, - (void *)bp[nthr], NST_SLEEP); - if (tp == NULL) { -#ifdef DEBUG - cmn_err(CE_NOTE, "!_rdcwrite: nst_create failure"); -#endif - thrrc = ENOEXEC; - } else { - mutex_enter(&(sync->lock)); - sync->threads++; - mutex_exit(&(sync->lock)); - nthr++; - } - } -localwrite: - if (!remote && !rsync && first) { - DTRACE_PROBE(rdc_write_nscwrite_start); - rc1 = nsc_write(h->rdc_bufp, pos, len, flag); - DTRACE_PROBE(rdc_write_nscwrite_end); - if (!RDC_SUCCESS(rc1)) { - rdc_many_enter(krdc); - if (IS_PRIMARY(urdc)) - /* Primary, so reverse sync needed */ - rdc_set_mflags(urdc, RDC_RSYNC_NEEDED); - else - /* Secondary, so sync needed */ - rdc_set_flags(urdc, RDC_SYNC_NEEDED); - rdc_set_flags_log(urdc, RDC_VOL_FAILED, - "local write failed"); - rdc_many_exit(krdc); - rdc_write_state(urdc); - } - } - - /* - * This is where we either enqueue async IO for the flusher - * or do sync IO in the case of an error in thread creation - * or we are doing a forward sync - * NOTE: if we are async, and using a diskq, we have - * already enqueued this write. - * _rdc_remote_write will end up enqueuueing to memory, - * or in case of a thread creation error above, try again - * enqueue the diskq write if thrrc == ENOEXEC - */ - if ((IS_PRIMARY(urdc)) && (thrrc == ENOEXEC) || - (BUF_IS_ASYNC(h) && !RDC_IS_DISKQ(krdc->group))) { - thrrc = 0; - if (IS_MULTI(krdc)) { - rdc_k_info_t *ktmp; - rdc_u_info_t *utmp; - - ktmp = krdc->multi_next; - utmp = &rdc_u_info[ktmp->index]; - if (IS_ENABLED(utmp)) - multi = ktmp; - } - - DTRACE_PROBE(rdc_write_remote_start); - - rc2 = _rdc_remote_write(krdc, h, &h->rdc_bufh, - pos, len, flag, bitmask); - - DTRACE_PROBE(rdc_rdcwrite_remote_end); - } - - if (!RDC_SUCCESS(rc1)) { - if ((IS_PRIMARY(urdc)) && !RDC_SUCCESS(rc2)) { - h->rdc_bufh.sb_error = rc1; - } - } else if ((remote || rsync) && !RDC_SUCCESS(rc2)) { - h->rdc_bufh.sb_error = rc2; - } -write2: - /* - * If one to many, jump back into the loop to continue IO - */ - if (IS_MANY(krdc) && (IS_PRIMARY(urdc))) { - rdc_many_enter(krdc); - for (krdc = krdc->many_next; krdc != this; - krdc = krdc->many_next) { - urdc = &rdc_u_info[krdc->index]; - if (!IS_ENABLED(urdc)) - continue; - rc2 = first = 0; - h->rdc_flags &= ~RDC_ASYNC_BUF; - rdc_many_exit(krdc); - goto write1; - } - rdc_many_exit(krdc); - } - urdc = &rdc_u_info[krdc->index]; - - /* - * collect all of our threads if any - */ - if (nthr) { - - mutex_enter(&(sync->lock)); - /* wait for the threads */ - while (sync->complete != sync->threads) { - cv_wait(&(sync->cv), &(sync->lock)); - } - mutex_exit(&(sync->lock)); - - /* collect status */ - - winddown = 0; - while (winddown < nthr) { - /* - * Get any error return from thread - */ - if ((remote || rsync) && bp[winddown]->flag) { - h->rdc_bufh.sb_error = bp[winddown]->flag; - } - if (bp[winddown]) - kmem_free(bp[winddown], sizeof (rdc_aio_t)); - winddown++; - } - } - - if (rsync && !(IS_STATE(urdc, RDC_VOL_FAILED))) { - rc1 = nsc_write(h->rdc_bufp, pos, len, flag); - if (!RDC_SUCCESS(rc1)) { - /* rsync, so reverse sync needed already set */ - rdc_many_enter(krdc); - rdc_set_flags_log(urdc, RDC_VOL_FAILED, - "rsync local write failed"); - rdc_many_exit(krdc); - rdc_write_state(urdc); - - /* - * only report the error if a remote error - * occurred as well. - */ - if (h->rdc_bufh.sb_error) - h->rdc_bufh.sb_error = rc1; - } - } - - if (multi) { - /* Multi-hop secondary, just set bits in the bitmap */ - (void) RDC_SET_BITMAP(multi, pos, len, &bitmask); - } - - return (h->rdc_bufh.sb_error); -} - - -static void -_rdc_bzero(nsc_buf_t *h, nsc_off_t pos, nsc_size_t len) -{ - nsc_vec_t *v; - uchar_t *a; - size_t sz; - int l; - - if (!RDC_HANDLE_LIMITS(h, pos, len)) { - cmn_err(CE_WARN, - "!_rdc_bzero: bounds check: io(handle) pos %" NSC_XSZFMT - "(%" NSC_XSZFMT ") len %" NSC_XSZFMT "(%" NSC_XSZFMT ")", - pos, h->sb_pos, len, h->sb_len); - return; - } - - if (!len) - return; - - /* find starting point */ - - v = h->sb_vec; - pos -= h->sb_pos; - - for (; pos >= FBA_NUM(v->sv_len); v++) - pos -= FBA_NUM(v->sv_len); - - a = v->sv_addr + FBA_SIZE(pos); - l = v->sv_len - FBA_SIZE(pos); - - /* zero */ - - len = FBA_SIZE(len); /* convert to bytes */ - - while (len) { - if (!a) /* end of vec */ - break; - - sz = (size_t)min((nsc_size_t)l, len); - - bzero(a, sz); - - len -= sz; - l -= sz; - a += sz; - - if (!l) { - v++; - a = v->sv_addr; - l = v->sv_len; - } - } -} - - -/* - * _rdc_zero - * - * Zero and commit the specified area of the buffer. - * - * If this write is whilst the local primary volume is being synced, - * then we write the remote end first to ensure that the new data - * cannot be overwritten by a concurrent sync operation. - */ - -static int -_rdc_zero(rdc_buf_t *h, nsc_off_t pos, nsc_size_t len, int flag) -{ - rdc_k_info_t *krdc = h->rdc_fd->rdc_info; - rdc_u_info_t *urdc = &rdc_u_info[krdc->index]; - rdc_k_info_t *this; - rdc_k_info_t *multi = NULL; - int remote = RDC_REMOTE(h); - int rc1, rc2; - uint_t bitmask; - int first; - int rsync; - - /* If this is the multi-hop secondary, move along to the primary */ - if (IS_MULTI(krdc) && !(rdc_get_vflags(urdc) & RDC_PRIMARY)) { - multi = krdc; - krdc = krdc->multi_next; - urdc = &rdc_u_info[krdc->index]; - - if (!IS_ENABLED(urdc)) { - krdc = h->rdc_fd->rdc_info; - urdc = &rdc_u_info[krdc->index]; - multi = NULL; - } - } - this = krdc; - - rsync = ((rdc_get_vflags(urdc) & RDC_PRIMARY) && - (rdc_get_mflags(urdc) & RDC_SLAVE)); - - /* - * If this is a many group with a reverse sync in progress and - * this is not the slave krdc/urdc, then search for the slave - * so that we can do the remote io to the correct secondary - * before the local io. - */ - if (rsync && !(rdc_get_vflags(urdc) & RDC_SLAVE)) { - rdc_many_enter(krdc); - for (krdc = krdc->many_next; krdc != this; - krdc = krdc->many_next) { - urdc = &rdc_u_info[krdc->index]; - if (!IS_ENABLED(urdc)) - continue; - if (rdc_get_vflags(urdc) & RDC_SLAVE) - break; - } - rdc_many_exit(krdc); - - this = krdc; - } - - rc1 = rc2 = 0; - first = 1; - - if (!RDC_HANDLE_LIMITS(&h->rdc_bufh, pos, len)) { - cmn_err(CE_WARN, - "!_rdc_zero: bounds check: io(handle) pos %" NSC_XSZFMT - "(%" NSC_XSZFMT ") len %" NSC_XSZFMT "(%" NSC_XSZFMT ")", - pos, h->rdc_bufh.sb_pos, len, h->rdc_bufh.sb_len); - h->rdc_bufh.sb_error = EINVAL; - return (h->rdc_bufh.sb_error); - } - -zero1: - if (RDC_SET_BITMAP(krdc, pos, len, &bitmask) < 0) { - (void) nsc_uncommit(h->rdc_bufp, pos, len, flag); - h->rdc_bufh.sb_error = EIO; - goto zero2; - } - - if (IS_ASYNC(urdc)) { - /* - * We are async mode - */ - aio_buf_t *p; - - if ((krdc->type_flag & RDC_DISABLEPEND) || - (rdc_get_vflags(urdc) & RDC_LOGGING)) { - mutex_exit(&krdc->group->ra_queue.net_qlock); - goto localzero; - } - - if ((rdc_get_vflags(urdc) & RDC_VOL_FAILED) || - (rdc_get_vflags(urdc) & RDC_BMP_FAILED)) { - mutex_exit(&krdc->group->ra_queue.net_qlock); - goto zero2; - } - if (rdc_get_vflags(urdc) & RDC_LOGGING) { - mutex_exit(&krdc->group->ra_queue.net_qlock); - goto localzero; - } - p = rdc_aio_buf_add(krdc->index, h); - if (p == NULL) { -#ifdef DEBUG - cmn_err(CE_WARN, - "!rdc_alloc_buf aio_buf allocation failed"); -#endif - goto localzero; - } - mutex_enter(&h->aio_lock); - rc1 = nsc_alloc_abuf(pos, len, 0, &p->rdc_abufp); - if (!RDC_SUCCESS(rc1)) { -#ifdef DEBUG - cmn_err(CE_WARN, - "!rdc_alloc_buf NSC_ANON allocation failed rc %d", - rc1); -#endif - mutex_exit(&h->aio_lock); - goto localzero; - } - h->rdc_flags |= RDC_ASYNC_VEC; - mutex_exit(&h->aio_lock); - - /* - * Copy buffer into anonymous buffer - */ - - rc1 = nsc_zero(p->rdc_abufp, pos, len, flag); - if (!RDC_SUCCESS(rc1)) { -#ifdef DEBUG - cmn_err(CE_WARN, - "!_rdc_zero: nsc_zero failed rc=%d state %x", - rc1, rdc_get_vflags(urdc)); -#endif - rc1 = nsc_free_buf(p->rdc_abufp); - rdc_aio_buf_del(h, krdc); - rdc_group_enter(krdc); - rdc_group_log(krdc, RDC_FLUSH | RDC_OTHERREMOTE, - "nsc_zero failed"); - rdc_group_exit(krdc); - } - } /* end of async */ - -localzero: - - if (flag & NSC_NOBLOCK) { - cmn_err(CE_WARN, - "!_rdc_zero: removing unsupported NSC_NOBLOCK flag"); - flag &= ~(NSC_NOBLOCK); - } - - if (!remote && !rsync && first) { - rc1 = nsc_zero(h->rdc_bufp, pos, len, flag); - if (!RDC_SUCCESS(rc1)) { - ASSERT(rdc_get_vflags(urdc) & RDC_PRIMARY); - rdc_many_enter(krdc); - /* Primary, so reverse sync needed */ - rdc_set_mflags(urdc, RDC_RSYNC_NEEDED); - rdc_set_flags_log(urdc, RDC_VOL_FAILED, - "nsc_zero failed"); - rdc_many_exit(krdc); - rdc_write_state(urdc); - } - } - - /* - * send new data to remote end - nsc_zero has zero'd - * the data in the buffer, or _rdc_bzero will be used below. - */ - - if (rdc_get_vflags(urdc) & RDC_PRIMARY) { - if (first && (remote || rsync || !RDC_SUCCESS(rc1))) { - /* bzero so that we can send new data to remote node */ - _rdc_bzero(&h->rdc_bufh, pos, len); - } - - if (IS_MULTI(krdc)) { - rdc_k_info_t *ktmp; - rdc_u_info_t *utmp; - - ktmp = krdc->multi_next; - utmp = &rdc_u_info[ktmp->index]; - if (IS_ENABLED(utmp)) - multi = ktmp; - } - - rc2 = _rdc_remote_write(krdc, h, &h->rdc_bufh, - pos, len, flag, bitmask); - } - - if (!RDC_SUCCESS(rc1)) { - if ((rdc_get_vflags(urdc) & RDC_PRIMARY) && !RDC_SUCCESS(rc2)) { - h->rdc_bufh.sb_error = rc1; - } - } else if ((remote || rsync) && !RDC_SUCCESS(rc2)) { - h->rdc_bufh.sb_error = rc2; - } - -zero2: - if (IS_MANY(krdc) && (rdc_get_vflags(urdc) && RDC_PRIMARY)) { - rdc_many_enter(krdc); - for (krdc = krdc->many_next; krdc != this; - krdc = krdc->many_next) { - urdc = &rdc_u_info[krdc->index]; - if (!IS_ENABLED(urdc)) - continue; - rc2 = first = 0; - rdc_many_exit(krdc); - goto zero1; - } - rdc_many_exit(krdc); - } - - if (rsync && !(rdc_get_vflags(urdc) & RDC_VOL_FAILED)) { - rc1 = nsc_write(h->rdc_bufp, pos, len, flag); - if (!RDC_SUCCESS(rc1)) { - /* rsync, so reverse sync needed already set */ - rdc_many_enter(krdc); - rdc_set_flags_log(urdc, RDC_VOL_FAILED, - "nsc_write failed"); - rdc_many_exit(krdc); - rdc_write_state(urdc); - - /* - * only report the error if a remote error - * occurred as well. - */ - if (h->rdc_bufh.sb_error) - h->rdc_bufh.sb_error = rc1; - } - } - - if (multi) { - /* Multi-hop secondary, just set bits in the bitmap */ - (void) RDC_SET_BITMAP(multi, pos, len, &bitmask); - } - - return (h->rdc_bufh.sb_error); -} - - -/* - * _rdc_uncommit - * - refresh specified data region in the buffer to prevent the cache - * serving the scribbled on data back to another client. - * - * Only needs to happen on the local node. If in remote io mode, then - * just return 0 - we do not cache the data on the local node and the - * changed data will not have made it to the cache on the other node, - * so it has no need to uncommit. - */ - -static int -_rdc_uncommit(rdc_buf_t *h, nsc_off_t pos, nsc_size_t len, int flag) -{ - int remote = RDC_REMOTE(h); - int rc = 0; - - if (!RDC_HANDLE_LIMITS(&h->rdc_bufh, pos, len)) { - cmn_err(CE_WARN, - "!_rdc_uncommit: bounds check: io(handle) pos %" NSC_XSZFMT - "(%" NSC_XSZFMT ") len %" NSC_XSZFMT "(%" NSC_XSZFMT ")", - pos, h->rdc_bufh.sb_pos, len, h->rdc_bufh.sb_len); - h->rdc_bufh.sb_error = EINVAL; - return (h->rdc_bufh.sb_error); - } - - if (flag & NSC_NOBLOCK) { - cmn_err(CE_WARN, - "!_rdc_uncommit: removing unsupported NSC_NOBLOCK flag"); - flag &= ~(NSC_NOBLOCK); - } - - if (!remote) { - rc = nsc_uncommit(h->rdc_bufp, pos, len, flag); - } - - if (!RDC_SUCCESS(rc)) - h->rdc_bufh.sb_error = rc; - - return (rc); -} - - -/* - * _rdc_trksize - * - * only needs to happen on local node. - */ - -static int -_rdc_trksize(rdc_fd_t *rfd, nsc_size_t trksize) -{ - return (nsc_set_trksize(RDC_FD(rfd), trksize)); -} - - -static nsc_def_t _rdc_fd_def[] = { - "Attach", (uintptr_t)_rdc_attach_fd, 0, - "Pinned", (uintptr_t)_rdc_pinned, 0, - "Unpinned", (uintptr_t)_rdc_unpinned, 0, - 0, 0, 0 -}; - - -static nsc_def_t _rdc_io_def[] = { - "Open", (uintptr_t)_rdc_openc, 0, - "Close", (uintptr_t)_rdc_close, 0, - "Attach", (uintptr_t)_rdc_attach, 0, - "Detach", (uintptr_t)_rdc_detach, 0, - "AllocHandle", (uintptr_t)_rdc_alloc_handle, 0, - "FreeHandle", (uintptr_t)_rdc_free_handle, 0, - "AllocBuf", (uintptr_t)_rdc_alloc_buf, 0, - "FreeBuf", (uintptr_t)_rdc_free_buf, 0, - "GetPinned", (uintptr_t)_rdc_get_pinned, 0, - "Discard", (uintptr_t)_rdc_discard_pinned, 0, - "PartSize", (uintptr_t)_rdc_partsize, 0, - "MaxFbas", (uintptr_t)_rdc_maxfbas, 0, - "Control", (uintptr_t)_rdc_control, 0, - "Read", (uintptr_t)_rdc_read, 0, - "Write", (uintptr_t)_rdc_write, 0, - "Zero", (uintptr_t)_rdc_zero, 0, - "Uncommit", (uintptr_t)_rdc_uncommit, 0, - "TrackSize", (uintptr_t)_rdc_trksize, 0, - "Provide", 0, 0, - 0, 0, 0 -}; - -static nsc_def_t _rdc_ior_def[] = { - "Open", (uintptr_t)_rdc_openr, 0, - "Close", (uintptr_t)_rdc_close, 0, - "Attach", (uintptr_t)_rdc_attach, 0, - "Detach", (uintptr_t)_rdc_detach, 0, - "AllocHandle", (uintptr_t)_rdc_alloc_handle, 0, - "FreeHandle", (uintptr_t)_rdc_free_handle, 0, - "AllocBuf", (uintptr_t)_rdc_alloc_buf, 0, - "FreeBuf", (uintptr_t)_rdc_free_buf, 0, - "GetPinned", (uintptr_t)_rdc_get_pinned, 0, - "Discard", (uintptr_t)_rdc_discard_pinned, 0, - "PartSize", (uintptr_t)_rdc_partsize, 0, - "MaxFbas", (uintptr_t)_rdc_maxfbas, 0, - "Control", (uintptr_t)_rdc_control, 0, - "Read", (uintptr_t)_rdc_read, 0, - "Write", (uintptr_t)_rdc_write, 0, - "Zero", (uintptr_t)_rdc_zero, 0, - "Uncommit", (uintptr_t)_rdc_uncommit, 0, - "TrackSize", (uintptr_t)_rdc_trksize, 0, - "Provide", 0, 0, - 0, 0, 0 -}; diff --git a/usr/src/uts/common/avs/ns/rdc/rdc_diskq.c b/usr/src/uts/common/avs/ns/rdc/rdc_diskq.c deleted file mode 100644 index b01866c9cc..0000000000 --- a/usr/src/uts/common/avs/ns/rdc/rdc_diskq.c +++ /dev/null @@ -1,3252 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#include <sys/types.h> -#include <sys/ksynch.h> -#include <sys/cmn_err.h> -#include <sys/kmem.h> -#include <sys/stat.h> -#include <sys/errno.h> - -#include "../solaris/nsc_thread.h" -#ifdef DS_DDICT -#include "../contract.h" -#endif -#include <sys/nsctl/nsctl.h> - -#include <sys/kmem.h> -#include <sys/ddi.h> - -#include <sys/sdt.h> /* dtrace is S10 or later */ - -#include "rdc_io.h" -#include "rdc_bitmap.h" -#include "rdc_diskq.h" -#include "rdc_clnt.h" - -#include <sys/unistat/spcs_s.h> -#include <sys/unistat/spcs_s_k.h> -#include <sys/unistat/spcs_errors.h> - -extern nsc_io_t *_rdc_io_hc; - -int rdc_diskq_coalesce = 0; - -int -_rdc_rsrv_diskq(rdc_group_t *group) -{ - int rc = 0; - - mutex_enter(&group->diskqmutex); - if (group->diskqfd == NULL) { - mutex_exit(&group->diskqmutex); - return (EIO); - } else if ((group->diskqrsrv == 0) && - (rc = nsc_reserve(group->diskqfd, 0)) != 0) { - cmn_err(CE_WARN, - "!rdc: nsc_reserve(%s) failed %d\n", - nsc_pathname(group->diskqfd), rc); - } else { - group->diskqrsrv++; - } - - mutex_exit(&group->diskqmutex); - return (rc); -} - -void -_rdc_rlse_diskq(rdc_group_t *group) -{ - mutex_enter(&group->diskqmutex); - if (group->diskqrsrv > 0 && --group->diskqrsrv == 0) { - nsc_release(group->diskqfd); - } - mutex_exit(&group->diskqmutex); -} - -void -rdc_wait_qbusy(disk_queue *q) -{ - ASSERT(MUTEX_HELD(QLOCK(q))); - while (q->busycnt > 0) - cv_wait(&q->busycv, QLOCK(q)); -} - -void -rdc_set_qbusy(disk_queue *q) -{ - ASSERT(MUTEX_HELD(QLOCK(q))); - q->busycnt++; -} - -void -rdc_clr_qbusy(disk_queue *q) -{ - ASSERT(MUTEX_HELD(QLOCK(q))); - q->busycnt--; - if (q->busycnt == 0) - cv_broadcast(&q->busycv); -} - -int -rdc_lookup_diskq(char *pathname) -{ - rdc_u_info_t *urdc; -#ifdef DEBUG - rdc_k_info_t *krdc; -#endif - int index; - - for (index = 0; index < rdc_max_sets; index++) { - urdc = &rdc_u_info[index]; -#ifdef DEBUG - krdc = &rdc_k_info[index]; -#endif - ASSERT(krdc->index == index); - ASSERT(urdc->index == index); - if (!IS_ENABLED(urdc)) - continue; - - if (strncmp(pathname, urdc->disk_queue, - NSC_MAXPATH) == 0) - return (index); - } - - return (-1); -} - -void -rdc_unintercept_diskq(rdc_group_t *grp) -{ - if (!RDC_IS_DISKQ(grp)) - return; - if (grp->q_tok) - (void) nsc_unregister_path(grp->q_tok, 0); - grp->q_tok = NULL; -} - -void -rdc_close_diskq(rdc_group_t *grp) -{ - - if (grp == NULL) { -#ifdef DEBUG - cmn_err(CE_WARN, "!rdc_close_diskq: NULL group!"); -#endif - return; - } - - if (grp->diskqfd) { - if (nsc_close(grp->diskqfd) != 0) { -#ifdef DEBUG - cmn_err(CE_WARN, "!nsc_close on diskq failed"); -#else - ; - /*EMPTY*/ -#endif - } - grp->diskqfd = 0; - grp->diskqrsrv = 0; - } - bzero(&grp->diskq.disk_hdr, sizeof (diskq_header)); -} - -/* - * nsc_open the diskq and attach - * the nsc_fd to krdc->diskqfd - */ -int -rdc_open_diskq(rdc_k_info_t *krdc) -{ - rdc_u_info_t *urdc; - rdc_group_t *grp; - int sts; - nsc_size_t size; - char *diskqname; - int mutexheld = 0; - - grp = krdc->group; - urdc = &rdc_u_info[krdc->index]; - - mutex_enter(&grp->diskqmutex); - mutexheld++; - if (urdc->disk_queue[0] == '\0') { - goto fail; - } - - diskqname = &urdc->disk_queue[0]; - - if (grp->diskqfd == NULL) { - grp->diskqfd = nsc_open(diskqname, - NSC_RDCHR_ID|NSC_DEVICE|NSC_WRITE, 0, 0, 0); - if (grp->diskqfd == NULL) { - cmn_err(CE_WARN, "!rdc_open_diskq: Unable to open %s", - diskqname); - goto fail; - } - } - if (!grp->q_tok) - grp->q_tok = nsc_register_path(urdc->disk_queue, - NSC_DEVICE | NSC_CACHE, _rdc_io_hc); - - grp->diskqrsrv = 0; /* init reserve count */ - - mutex_exit(&grp->diskqmutex); - mutexheld--; - /* just test a reserve release */ - sts = _rdc_rsrv_diskq(grp); - if (!RDC_SUCCESS(sts)) { - cmn_err(CE_WARN, "!rdc_open_diskq: Reserve failed for %s", - diskqname); - goto fail; - } - sts = nsc_partsize(grp->diskqfd, &size); - _rdc_rlse_diskq(grp); - - if ((sts == 0) && (size < 1)) { - rdc_unintercept_diskq(grp); - rdc_close_diskq(grp); - goto fail; - } - - return (0); - -fail: - bzero(&urdc->disk_queue, NSC_MAXPATH); - if (mutexheld) - mutex_exit(&grp->diskqmutex); - return (-1); - -} - -/* - * rdc_count_vecs - * simply vec++'s until sb_addr is null - * returns number of vectors encountered - */ -int -rdc_count_vecs(nsc_vec_t *vec) -{ - nsc_vec_t *vecp; - int i = 0; - vecp = vec; - while (vecp->sv_addr) { - vecp++; - i++; - } - return (i+1); -} -/* - * rdc_setid2idx - * given setid, return index - */ -int -rdc_setid2idx(int setid) -{ - - int index = 0; - - for (index = 0; index < rdc_max_sets; index++) { - if (rdc_u_info[index].setid == setid) - break; - } - if (index >= rdc_max_sets) - index = -1; - return (index); -} - -/* - * rdc_idx2setid - * given an index, return its setid - */ -int -rdc_idx2setid(int index) -{ - return (rdc_u_info[index].setid); -} - -/* - * rdc_fill_ioheader - * fill in all the stuff you want to save on disk - * at the beginnig of each queued write - */ -void -rdc_fill_ioheader(rdc_aio_t *aio, io_hdr *hd, int qpos) -{ - ASSERT(MUTEX_HELD(&rdc_k_info[aio->index].group->diskq.disk_qlock)); - - hd->dat.magic = RDC_IOHDR_MAGIC; - hd->dat.type = RDC_QUEUEIO; - hd->dat.pos = aio->pos; - hd->dat.hpos = aio->pos; - hd->dat.qpos = qpos; - hd->dat.len = aio->len; - hd->dat.flag = aio->flag; - hd->dat.iostatus = aio->iostatus; - hd->dat.setid = rdc_idx2setid(aio->index); - hd->dat.time = nsc_time(); - if (!aio->handle) - hd->dat.flag |= RDC_NULL_BUF; /* no real data to queue */ -} - -/* - * rdc_dump_iohdrs - * give back the iohdr list - * and clear out q->lastio - */ -void -rdc_dump_iohdrs(disk_queue *q) -{ - io_hdr *p, *r; - - ASSERT(MUTEX_HELD(QLOCK(q))); - - p = q->iohdrs; - while (p) { - r = p->dat.next; - kmem_free(p, sizeof (*p)); - q->hdrcnt--; - p = r; - } - q->iohdrs = q->hdr_last = NULL; - q->hdrcnt = 0; - if (q->lastio->handle) - (void) nsc_free_buf(q->lastio->handle); - bzero(&(*q->lastio), sizeof (*q->lastio)); -} - -/* - * rdc_fail_diskq - * set flags, throw away q info - * clean up what you can - * wait for flusher threads to stop (taking into account this may be one) - * takes group_lock, so conf, many, and bitmap may not be held - */ -void -rdc_fail_diskq(rdc_k_info_t *krdc, int wait, int flag) -{ - rdc_k_info_t *p; - rdc_u_info_t *q = &rdc_u_info[krdc->index]; - rdc_group_t *group = krdc->group; - disk_queue *dq = &krdc->group->diskq; - - if (IS_STATE(q, RDC_DISKQ_FAILED)) - return; - - if (!(flag & RDC_NOFAIL)) - cmn_err(CE_WARN, "!disk queue %s failure", q->disk_queue); - - if (flag & RDC_DOLOG) { - rdc_group_enter(krdc); - rdc_group_log(krdc, RDC_NOFLUSH | RDC_ALLREMOTE, - "disk queue failed"); - rdc_group_exit(krdc); - } - mutex_enter(QHEADLOCK(dq)); - mutex_enter(QLOCK(dq)); - /* - * quick stop of the flushers - * other cleanup is done on the un-failing of the diskq - */ - SET_QHEAD(dq, RDC_DISKQ_DATA_OFF); - SET_QTAIL(dq, RDC_DISKQ_DATA_OFF); - SET_QNXTIO(dq, RDC_DISKQ_DATA_OFF); - SET_LASTQTAIL(dq, 0); - - rdc_dump_iohdrs(dq); - - mutex_exit(QLOCK(dq)); - mutex_exit(QHEADLOCK(dq)); - - bzero(krdc->bitmap_ref, krdc->bitmap_size * BITS_IN_BYTE * - BMAP_REF_PREF_SIZE); - - if (flag & RDC_DOLOG) /* otherwise, we already have the conf lock */ - rdc_group_enter(krdc); - - else if (!(flag & RDC_GROUP_LOCKED)) - ASSERT(MUTEX_HELD(&rdc_conf_lock)); - - if (!(flag & RDC_NOFAIL)) { - rdc_set_flags(q, RDC_DISKQ_FAILED); - } - rdc_clr_flags(q, RDC_QUEUING); - - for (p = krdc->group_next; p != krdc; p = p->group_next) { - q = &rdc_u_info[p->index]; - if (!IS_ENABLED(q)) - continue; - if (!(flag & RDC_NOFAIL)) { - rdc_set_flags(q, RDC_DISKQ_FAILED); - } - rdc_clr_flags(q, RDC_QUEUING); - bzero(p->bitmap_ref, p->bitmap_size * BITS_IN_BYTE * - BMAP_REF_PREF_SIZE); - /* RDC_QUEUING is cleared in group_log() */ - } - - if (flag & RDC_DOLOG) - rdc_group_exit(krdc); - - /* can't wait for myself to go away, I'm a flusher */ - if (wait & RDC_WAIT) - while (group->rdc_thrnum) - delay(2); - -} - -/* - * rdc_stamp_diskq - * write out diskq header info - * must have disk_qlock held - * if rsrvd flag is 0, the nsc_reserve is done - */ -int -rdc_stamp_diskq(rdc_k_info_t *krdc, int rsrvd, int failflags) -{ - nsc_vec_t vec[2]; - nsc_buf_t *head = NULL; - rdc_group_t *grp; - rdc_u_info_t *urdc; - disk_queue *q; - int rc, flags; - - grp = krdc->group; - q = &krdc->group->diskq; - - ASSERT(MUTEX_HELD(&q->disk_qlock)); - - urdc = &rdc_u_info[krdc->index]; - - if (!rsrvd && _rdc_rsrv_diskq(grp)) { - cmn_err(CE_WARN, "!rdc_stamp_diskq: %s reserve failed", - urdc->disk_queue); - mutex_exit(QLOCK(q)); - rdc_fail_diskq(krdc, RDC_NOWAIT, failflags); - mutex_enter(QLOCK(q)); - return (-1); - } - flags = NSC_WRITE | NSC_NOCACHE | NSC_NODATA; - rc = nsc_alloc_buf(grp->diskqfd, 0, 1, flags, &head); - - if (!RDC_SUCCESS(rc)) { - cmn_err(CE_WARN, "!Alloc buf failed for disk queue %s", - &urdc->disk_queue[0]); - mutex_exit(QLOCK(q)); - rdc_fail_diskq(krdc, RDC_NOWAIT, failflags); - mutex_enter(QLOCK(q)); - return (-1); - } - vec[0].sv_len = FBA_SIZE(1); - vec[0].sv_addr = (uchar_t *)&q->disk_hdr; - vec[1].sv_len = 0; - vec[1].sv_addr = NULL; - - head->sb_vec = &vec[0]; - -#ifdef DEBUG_DISKQ - cmn_err(CE_NOTE, "!rdc_stamp_diskq: hdr: %p magic: %x state: " - "%x head: %d tail: %d size: %d nitems: %d blocks: %d", - q, QMAGIC(q), QSTATE(q), QHEAD(q), - QTAIL(q), QSIZE(q), QNITEMS(q), QBLOCKS(q)); -#endif - - rc = nsc_write(head, 0, 1, 0); - - if (!RDC_SUCCESS(rc)) { - if (!rsrvd) - _rdc_rlse_diskq(grp); - cmn_err(CE_CONT, "!disk queue %s failed rc %d", - &urdc->disk_queue[0], rc); - mutex_exit(QLOCK(q)); - rdc_fail_diskq(krdc, RDC_NOWAIT, failflags); - mutex_enter(QLOCK(q)); - return (-1); - } - - (void) nsc_free_buf(head); - if (!rsrvd) - _rdc_rlse_diskq(grp); - - return (0); -} - -/* - * rdc_init_diskq_header - * load initial values into the header - */ -void -rdc_init_diskq_header(rdc_group_t *grp, dqheader *header) -{ - int rc; - int type = 0; - disk_queue *q = &grp->diskq; - - ASSERT(MUTEX_HELD(QLOCK(q))); - - /* save q type if this is a failure */ - if (QSTATE(q) & RDC_QNOBLOCK) - type = RDC_QNOBLOCK; - bzero(header, sizeof (*header)); - header->h.magic = RDC_DISKQ_MAGIC; - header->h.vers = RDC_DISKQ_VERS; - header->h.state |= (RDC_SHUTDOWN_BAD|type); /* SHUTDOWN_OK on suspend */ - header->h.head_offset = RDC_DISKQ_DATA_OFF; - header->h.tail_offset = RDC_DISKQ_DATA_OFF; - header->h.nitems = 0; - header->h.blocks = 0; - header->h.qwrap = 0; - SET_QNXTIO(q, QHEAD(q)); - SET_QCOALBOUNDS(q, RDC_DISKQ_DATA_OFF); - - /* do this last, as this might be a failure. get the kernel state ok */ - rc = _rdc_rsrv_diskq(grp); - if (!RDC_SUCCESS(rc)) { - cmn_err(CE_WARN, "!init_diskq_hdr: Reserve failed for queue"); - return; - } - (void) nsc_partsize(grp->diskqfd, &header->h.disk_size); - _rdc_rlse_diskq(grp); - -} - -/* - * rdc_unfail_diskq - * the diskq failed for some reason, lets try and re-start it - * the old stuff has already been thrown away - * should just be called from rdc_sync - */ -void -rdc_unfail_diskq(rdc_k_info_t *krdc) -{ - rdc_k_info_t *p; - rdc_u_info_t *q = &rdc_u_info[krdc->index]; - rdc_group_t *group = krdc->group; - disk_queue *dq = &group->diskq; - - rdc_group_enter(krdc); - rdc_clr_flags(q, RDC_ASYNC); - /* someone else won the race... */ - if (!IS_STATE(q, RDC_DISKQ_FAILED)) { - rdc_group_exit(krdc); - return; - } - rdc_clr_flags(q, RDC_DISKQ_FAILED); - for (p = krdc->group_next; p != krdc; p = p->group_next) { - q = &rdc_u_info[p->index]; - if (!IS_ENABLED(q)) - continue; - rdc_clr_flags(q, RDC_DISKQ_FAILED); - rdc_clr_flags(q, RDC_ASYNC); - if (IS_STATE(q, RDC_QUEUING)) - rdc_clr_flags(q, RDC_QUEUING); - } - rdc_group_exit(krdc); - - mutex_enter(QLOCK(dq)); - - rdc_init_diskq_header(group, &group->diskq.disk_hdr); - /* real i/o to the queue */ - /* clear RDC_AUXSYNCIP because we cannot halt a sync that's not here */ - krdc->aux_state &= ~RDC_AUXSYNCIP; - if (rdc_stamp_diskq(krdc, 0, RDC_GROUP_LOCKED | RDC_DOLOG) < 0) { - mutex_exit(QLOCK(dq)); - goto fail; - } - - SET_QNXTIO(dq, QHEAD(dq)); - SET_QHDRCNT(dq, 0); - SET_QSTATE(dq, RDC_SHUTDOWN_BAD); /* only suspend can write good */ - dq->iohdrs = NULL; - dq->hdr_last = NULL; - - /* should be none, but.. */ - rdc_dump_iohdrs(dq); - - mutex_exit(QLOCK(dq)); - - -fail: - krdc->aux_state |= RDC_AUXSYNCIP; - return; - -} - -int -rdc_read_diskq_header(rdc_k_info_t *krdc) -{ - int rc; - diskq_header *header; - rdc_u_info_t *urdc = &rdc_u_info[krdc->index]; - - if (krdc->group->diskqfd == NULL) { - char buf[NSC_MAXPATH]; - (void) snprintf(buf, NSC_MAXPATH, "%s:%s", urdc->secondary.intf, - &urdc->secondary.intf[0]); - cmn_err(CE_WARN, "!Disk Queue Header read failed for %s", - urdc->group_name[0] == '\0' ? buf: - &urdc->group_name[0]); - return (-1); - } - - header = &krdc->group->diskq.disk_hdr.h; - if (_rdc_rsrv_diskq(krdc->group)) { - return (-1); - } - - rc = rdc_ns_io(krdc->group->diskqfd, NSC_RDBUF, 0, - (uchar_t *)header, sizeof (diskq_header)); - - _rdc_rlse_diskq(krdc->group); - - if (!RDC_SUCCESS(rc)) { - char buf[NSC_MAXPATH]; - (void) snprintf(buf, NSC_MAXPATH, "%s:%s", urdc->secondary.intf, - &urdc->secondary.file[0]); - cmn_err(CE_WARN, "!Disk Queue Header read failed(%d) for %s", - rc, urdc->group_name[0] == '\0' ? buf : - &urdc->group_name[0]); - return (-1); - } - return (0); -} - -/* - * rdc_stop_diskq_flusher - */ -void -rdc_stop_diskq_flusher(rdc_k_info_t *krdc) -{ - disk_queue q, *qp; - rdc_group_t *group; -#ifdef DEBUG - cmn_err(CE_NOTE, "!stopping flusher threads"); -#endif - group = krdc->group; - qp = &krdc->group->diskq; - - /* save the queue info */ - q = *qp; - - /* lie a little */ - SET_QTAIL(qp, RDC_DISKQ_DATA_OFF); - SET_QHEAD(qp, RDC_DISKQ_DATA_OFF); - SET_QSTATE(qp, RDC_QDISABLEPEND); - SET_QSTATE(qp, RDC_STOPPINGFLUSH); - - /* drop locks to allow flushers to die */ - mutex_exit(QLOCK(qp)); - mutex_exit(QHEADLOCK(qp)); - rdc_group_exit(krdc); - - while (group->rdc_thrnum) - delay(2); - - rdc_group_enter(krdc); - mutex_enter(QHEADLOCK(qp)); - mutex_enter(QLOCK(qp)); - - CLR_QSTATE(qp, RDC_STOPPINGFLUSH); - *qp = q; -} - -/* - * rdc_enable_diskq - * open the diskq - * and stamp the header onto it. - */ -int -rdc_enable_diskq(rdc_k_info_t *krdc) -{ - rdc_group_t *group; - disk_queue *q; - - group = krdc->group; - q = &group->diskq; - - if (rdc_open_diskq(krdc) < 0) - goto fail; - - mutex_enter(QLOCK(q)); - rdc_init_diskq_header(group, &group->diskq.disk_hdr); - - if (rdc_stamp_diskq(krdc, 0, RDC_NOLOG) < 0) { - mutex_exit(QLOCK(q)); - goto fail; - } - - SET_QNXTIO(q, QHEAD(q)); - - mutex_exit(QLOCK(q)); - return (0); - -fail: - mutex_enter(&group->diskqmutex); - rdc_close_diskq(group); - mutex_exit(&group->diskqmutex); - - /* caller has to fail diskq after dropping conf & many locks */ - return (RDC_EQNOADD); -} - -/* - * rdc_resume_diskq - * open the diskq and read the header - */ -int -rdc_resume_diskq(rdc_k_info_t *krdc) -{ - rdc_u_info_t *urdc; - rdc_group_t *group; - disk_queue *q; - int rc = 0; - - urdc = &rdc_u_info[krdc->index]; - group = krdc->group; - q = &group->diskq; - - if (rdc_open_diskq(krdc) < 0) { - rc = RDC_EQNOADD; - goto fail; - } - - mutex_enter(QLOCK(q)); - - rdc_init_diskq_header(group, &group->diskq.disk_hdr); - - if (rdc_read_diskq_header(krdc) < 0) { - SET_QSTATE(q, RDC_QBADRESUME); - rc = RDC_EQNOADD; - } - - /* check diskq magic number */ - if (QMAGIC(q) != RDC_DISKQ_MAGIC) { - cmn_err(CE_WARN, "!SNDR: unable to resume diskq %s," - " incorrect magic number in header", urdc->disk_queue); - rdc_init_diskq_header(group, &group->diskq.disk_hdr); - SET_QSTATE(q, RDC_QBADRESUME); - rc = RDC_EQNOADD; - } else switch (QVERS(q)) { - diskq_header1 h1; /* version 1 header */ - diskq_header *hc; /* current header */ - -#ifdef NSC_MULTI_TERABYTE - case RDC_DISKQ_VER_ORIG: - /* version 1 diskq header, upgrade to 64bit version */ - h1 = *(diskq_header1 *)(&group->diskq.disk_hdr.h); - hc = &group->diskq.disk_hdr.h; - - cmn_err(CE_WARN, "!SNDR: old version header for diskq %s," - " upgrading to current version", urdc->disk_queue); - hc->vers = RDC_DISKQ_VERS; - hc->state = h1.state; - hc->head_offset = h1.head_offset; - hc->tail_offset = h1.tail_offset; - hc->disk_size = h1.disk_size; - hc->nitems = h1.nitems; - hc->blocks = h1.blocks; - hc->qwrap = h1.qwrap; - hc->auxqwrap = h1.auxqwrap; - hc->seq_last = h1.seq_last; - hc->ack_last = h1.ack_last; - - if (hc->nitems > 0) { - cmn_err(CE_WARN, "!SNDR: unable to resume diskq %s," - " old version Q contains data", urdc->disk_queue); - rdc_init_diskq_header(group, &group->diskq.disk_hdr); - SET_QSTATE(q, RDC_QBADRESUME); - rc = RDC_EQNOADD; - } - break; -#else - case RDC_DISKQ_VER_64BIT: - cmn_err(CE_WARN, "!SNDR: unable to resume diskq %s," - " diskq header newer than current version", - urdc->disk_queue); - rdc_init_diskq_header(group, &group->diskq.disk_hdr); - SET_QSTATE(q, RDC_QBADRESUME); - rc = RDC_EQNOADD; - break; -#endif - case RDC_DISKQ_VERS: - /* okay, current version diskq */ - break; - default: - cmn_err(CE_WARN, "!SNDR: unable to resume diskq %s," - " unknown diskq header version", urdc->disk_queue); - rdc_init_diskq_header(group, &group->diskq.disk_hdr); - SET_QSTATE(q, RDC_QBADRESUME); - rc = RDC_EQNOADD; - break; - } - if (IS_QSTATE(q, RDC_SHUTDOWN_BAD)) { - cmn_err(CE_WARN, "!SNDR: unable to resume diskq %s," - " unsafe shutdown", urdc->disk_queue); - rdc_init_diskq_header(group, &group->diskq.disk_hdr); - SET_QSTATE(q, RDC_QBADRESUME); - rc = RDC_EQNOADD; - } - - CLR_QSTATE(q, RDC_SHUTDOWN_OK); - SET_QSTATE(q, RDC_SHUTDOWN_BAD); - - /* bad, until proven not bad */ - if (rdc_stamp_diskq(krdc, 0, RDC_NOLOG) < 0) { - rdc_fail_diskq(krdc, RDC_NOWAIT, RDC_NOLOG); - rc = RDC_EQNOADD; - } - - SET_QNXTIO(q, QHEAD(q)); - group->diskq.nitems_hwm = QNITEMS(q); - group->diskq.blocks_hwm = QBLOCKS(q); - - mutex_exit(QLOCK(q)); - -#ifdef DEBUG - cmn_err(CE_NOTE, "!rdc_resume_diskq: resuming diskq %s \n", - urdc->disk_queue); - cmn_err(CE_NOTE, "!qinfo: " QDISPLAY(q)); -#endif - if (rc == 0) - return (0); - -fail: - - /* caller has to set the diskq failed after dropping it's locks */ - return (rc); - -} - -int -rdc_suspend_diskq(rdc_k_info_t *krdc) -{ - int rc; - rdc_u_info_t *urdc = &rdc_u_info[krdc->index]; - disk_queue *q; - - q = &krdc->group->diskq; - - /* grab both diskq locks as we are going to kill the flusher */ - mutex_enter(QHEADLOCK(q)); - mutex_enter(QLOCK(q)); - - if ((krdc->group->rdc_thrnum) && (!IS_QSTATE(q, RDC_STOPPINGFLUSH))) { - SET_QSTATE(q, RDC_STOPPINGFLUSH); - rdc_stop_diskq_flusher(krdc); - CLR_QSTATE(q, RDC_STOPPINGFLUSH); - } - - krdc->group->diskq.disk_hdr.h.state &= ~RDC_SHUTDOWN_BAD; - krdc->group->diskq.disk_hdr.h.state |= RDC_SHUTDOWN_OK; - krdc->group->diskq.disk_hdr.h.state &= ~RDC_QBADRESUME; - - /* let's make sure that the flusher has stopped.. */ - if (krdc->group->rdc_thrnum) { - mutex_exit(QLOCK(q)); - mutex_exit(QHEADLOCK(q)); - rdc_group_exit(krdc); - - while (krdc->group->rdc_thrnum) - delay(5); - - rdc_group_enter(krdc); - mutex_enter(QLOCK(q)); - mutex_enter(QHEADLOCK(q)); - } - /* write refcount to the bitmap */ - if ((rc = rdc_write_refcount(krdc)) < 0) { - rdc_group_exit(krdc); - goto fail; - } - - if (!QEMPTY(q)) { - rdc_set_flags(urdc, RDC_QUEUING); - } else { - rdc_clr_flags(urdc, RDC_QUEUING); - } - - /* fill in diskq header info */ - krdc->group->diskq.disk_hdr.h.state &= ~RDC_QDISABLEPEND; - -#ifdef DEBUG - cmn_err(CE_NOTE, "!suspending disk queue\n" QDISPLAY(q)); -#endif - - /* to avoid a possible deadlock, release in order, and reacquire */ - mutex_exit(QLOCK(q)); - mutex_exit(QHEADLOCK(q)); - - if (krdc->group->count > 1) { - rdc_group_exit(krdc); - goto fail; /* just stamp on the last suspend */ - } - rdc_group_exit(krdc); /* in case this stamp fails */ - mutex_enter(QLOCK(q)); - - rc = rdc_stamp_diskq(krdc, 0, RDC_NOLOG); - - mutex_exit(QLOCK(q)); - -fail: - rdc_group_enter(krdc); - - /* diskq already failed if stamp failed */ - - return (rc); -} - -/* - * copy orig aio to copy, including the nsc_buf_t - */ -int -rdc_dup_aio(rdc_aio_t *orig, rdc_aio_t *copy) -{ - int rc; - bcopy(orig, copy, sizeof (*orig)); - copy->handle = NULL; - - if (orig->handle == NULL) /* no buf to alloc/copy */ - return (0); - - rc = nsc_alloc_abuf(orig->pos, orig->len, 0, ©->handle); - if (!RDC_SUCCESS(rc)) { -#ifdef DEBUG - cmn_err(CE_WARN, "!rdc_dup_aio: alloc_buf failed (%d)", rc); -#endif - return (rc); - } - rc = nsc_copy(orig->handle, copy->handle, orig->pos, - orig->pos, orig->len); - if (!RDC_SUCCESS(rc)) { - (void) nsc_free_buf(copy->handle); -#ifdef DEBUG - cmn_err(CE_WARN, "!rdc_dup_aio: copy buf failed (%d)", rc); -#endif - return (rc); - } - return (0); -} - -/* - * rdc_qfill_shldwakeup() - * 0 if the memory queue has filled, and the low water - * mark has not been reached. 0 if diskq is empty. - * 1 if less than low water mark - * net_queue mutex is already held - */ -int -rdc_qfill_shldwakeup(rdc_k_info_t *krdc) -{ - rdc_group_t *group = krdc->group; - rdc_u_info_t *urdc = &rdc_u_info[krdc->index]; - net_queue *nq = &group->ra_queue; - disk_queue *dq = &group->diskq; - - ASSERT(MUTEX_HELD(&nq->net_qlock)); - - if (!RDC_IS_DISKQ(krdc->group)) - return (0); - - if (nq->qfill_sleeping != RDC_QFILL_ASLEEP) - return (0); - - if (nq->qfflags & RDC_QFILLSTOP) - return (1); - - if (nq->qfflags & RDC_QFILLSLEEP) - return (0); - - if (IS_STATE(urdc, RDC_LOGGING) || IS_STATE(urdc, RDC_SYNCING)) - return (0); - - mutex_enter(QLOCK(dq)); - if ((QNXTIO(dq) == QTAIL(dq)) && !IS_QSTATE(dq, RDC_QFULL)) { - mutex_exit(QLOCK(dq)); - return (0); - } - mutex_exit(QLOCK(dq)); - - if (nq->qfill_sleeping == RDC_QFILL_ASLEEP) { - if (nq->hwmhit) { - if (nq->blocks <= RDC_LOW_QBLOCKS) { - nq->hwmhit = 0; - } else { - return (0); - } - } -#ifdef DEBUG_DISKQ_NOISY - cmn_err(CE_NOTE, "!Waking up diskq->memq flusher, flags 0x%x" - " idx: %d", rdc_get_vflags(urdc), urdc->index); -#endif - return (1); - } - return (0); - -} - -/* - * rdc_diskq_enqueue - * enqueue one i/o to the diskq - * after appending some metadata to the front - */ -int -rdc_diskq_enqueue(rdc_k_info_t *krdc, rdc_aio_t *aio) -{ - nsc_vec_t *vec = NULL; - nsc_buf_t *bp = NULL; - nsc_buf_t *qbuf = NULL; - io_hdr *iohdr = NULL; - disk_queue *q; - rdc_group_t *group; - int numvecs; - int i, j, rc = 0; - int retries = 0; - rdc_u_info_t *urdc; - nsc_size_t iofbas; /* len of io + io header len */ - int qtail; - int delay_time = 2; - int print_msg = 1; - -#ifdef DEBUG_WRITER_UBERNOISE - int qhead; -#endif - urdc = &rdc_u_info[krdc->index]; - group = krdc->group; - q = &group->diskq; - - mutex_enter(QLOCK(q)); - - /* - * there is a thread that is blocking because the queue is full, - * don't try to set up this write until all is clear - * check before and after for logging or failed queue just - * in case a thread was in flight while the queue was full, - * and in the proccess of failing - */ - while (IS_QSTATE(q, RDC_QFULL)) { - if (IS_STATE(urdc, RDC_DISKQ_FAILED) || - (IS_STATE(urdc, RDC_LOGGING) && - !IS_STATE(urdc, RDC_QUEUING))) { - mutex_exit(QLOCK(q)); - if (aio->handle) - (void) nsc_free_buf(aio->handle); - return (-1); - } - cv_wait(&q->qfullcv, QLOCK(q)); - - if (IS_STATE(urdc, RDC_DISKQ_FAILED) || - (IS_STATE(urdc, RDC_LOGGING) && - !IS_STATE(urdc, RDC_QUEUING))) { - mutex_exit(QLOCK(q)); - if (aio->handle) - (void) nsc_free_buf(aio->handle); - return (-1); - } - - } - - SET_QSTATE(q, QTAILBUSY); - - if (aio->handle == NULL) { - /* we're only going to write the header to the queue */ - numvecs = 2; /* kmem_alloc io header + null terminate */ - iofbas = FBA_LEN(sizeof (io_hdr)); - - } else { - /* find out how many vecs */ - numvecs = rdc_count_vecs(aio->handle->sb_vec) + 1; - iofbas = aio->len + FBA_LEN(sizeof (io_hdr)); - } - - /* - * this, in conjunction with QTAILBUSY, will prevent - * premature dequeuing - */ - - SET_LASTQTAIL(q, QTAIL(q)); - - iohdr = (io_hdr *) kmem_zalloc(sizeof (io_hdr), KM_NOSLEEP); - vec = (nsc_vec_t *) kmem_zalloc(sizeof (nsc_vec_t) * numvecs, - KM_NOSLEEP); - - if (!vec || !iohdr) { - if (!vec) { - cmn_err(CE_WARN, "!vec kmem alloc failed"); - } else { - cmn_err(CE_WARN, "!iohdr kmem alloc failed"); - } - if (vec) - kmem_free(vec, sizeof (*vec)); - if (iohdr) - kmem_free(iohdr, sizeof (*iohdr)); - CLR_QSTATE(q, QTAILBUSY); - SET_LASTQTAIL(q, 0); - mutex_exit(QLOCK(q)); - if (aio->handle) - (void) nsc_free_buf(aio->handle); - return (ENOMEM); - } - - vec[numvecs - 1].sv_len = 0; - vec[numvecs - 1].sv_addr = 0; - - /* now add the write itself */ - bp = aio->handle; - - for (i = 1, j = 0; bp && bp->sb_vec[j].sv_addr && - i < numvecs; i++, j++) { - vec[i].sv_len = bp->sb_vec[j].sv_len; - vec[i].sv_addr = bp->sb_vec[j].sv_addr; - } - -retry: - - /* check for queue wrap, then check for overflow */ - if (IS_STATE(urdc, RDC_DISKQ_FAILED) || - (IS_STATE(urdc, RDC_LOGGING) && !IS_STATE(urdc, RDC_QUEUING))) { - kmem_free(iohdr, sizeof (*iohdr)); - kmem_free(vec, sizeof (*vec) * numvecs); - CLR_QSTATE(q, QTAILBUSY); - SET_LASTQTAIL(q, 0); - if (IS_QSTATE(q, RDC_QFULL)) { /* wakeup blocked threads */ - CLR_QSTATE(q, RDC_QFULL); - cv_broadcast(&q->qfullcv); - } - mutex_exit(QLOCK(q)); - if (aio->handle) - (void) nsc_free_buf(aio->handle); - - return (-1); - } - - if (QTAILSHLDWRAP(q, iofbas)) { - /* - * just go back to the beginning of the disk - * it's not worth the trouble breaking up the write - */ -#ifdef DEBUG_DISKQWRAP - cmn_err(CE_NOTE, "!wrapping Q tail: " QDISPLAY(q)); -#endif - /*LINTED*/ - WRAPQTAIL(q); - } - - /* - * prepend the write's metadata - */ - rdc_fill_ioheader(aio, iohdr, QTAIL(q)); - - vec[0].sv_len = FBA_SIZE(1); - vec[0].sv_addr = (uchar_t *)iohdr; - - /* check for tail < head */ - - if (!(FITSONQ(q, iofbas))) { - /* - * don't allow any more writes to start - */ - SET_QSTATE(q, RDC_QFULL); - mutex_exit(QLOCK(q)); - - if ((!group->rdc_writer) && !IS_STATE(urdc, RDC_LOGGING)) - (void) rdc_writer(krdc->index); - - delay(delay_time); - q->throttle_delay += delay_time; - retries++; - delay_time *= 2; /* fairly aggressive */ - if ((retries >= 8) || (delay_time >= 256)) { - delay_time = 2; - if (print_msg) { - cmn_err(CE_WARN, "!enqueue: disk queue %s full", - &urdc->disk_queue[0]); - print_msg = 0; -#ifdef DEBUG - cmn_err(CE_WARN, "!qinfo: " QDISPLAY(q)); -#else - cmn_err(CE_CONT, "!qinfo: " QDISPLAYND(q)); -#endif - } - /* - * if this is a no-block queue, or this is a blocking - * queue that is not flushing. reset and log - */ - if ((QSTATE(q) & RDC_QNOBLOCK) || - (IS_STATE(urdc, RDC_QUEUING))) { - - if (IS_STATE(urdc, RDC_QUEUING)) { - cmn_err(CE_WARN, "!SNDR: disk queue %s full and not flushing. " - "giving up", &urdc->disk_queue[0]); - cmn_err(CE_WARN, "!SNDR: %s:%s entering logging mode", - urdc->secondary.intf, urdc->secondary.file); - } - - rdc_fail_diskq(krdc, RDC_WAIT, - RDC_DOLOG | RDC_NOFAIL); - kmem_free(iohdr, sizeof (*iohdr)); - kmem_free(vec, sizeof (*vec) * numvecs); - mutex_enter(QLOCK(q)); - CLR_QSTATE(q, QTAILBUSY | RDC_QFULL); - cv_broadcast(&q->qfullcv); - mutex_exit(QLOCK(q)); - SET_LASTQTAIL(q, 0); - if (aio->handle) - (void) nsc_free_buf(aio->handle); - return (ENOMEM); - } - } - - mutex_enter(QLOCK(q)); - goto retry; - - } - - qtail = QTAIL(q); -#ifdef DEBUG_WRITER_UBERNOISE - qhead = QHEAD(q); -#endif - - /* update tail pointer, nitems on queue and blocks on queue */ - INC_QTAIL(q, iofbas); /* increment tail over i/o size + ioheader size */ - INC_QNITEMS(q, 1); - /* increment counter for i/o blocks only */ - INC_QBLOCKS(q, (iofbas - FBA_LEN(sizeof (io_hdr)))); - - if (QNITEMS(q) > q->nitems_hwm) - q->nitems_hwm = QNITEMS(q); - if (QBLOCKS(q) > q->blocks_hwm) - q->blocks_hwm = QBLOCKS(q); - - if (IS_QSTATE(q, RDC_QFULL)) { - CLR_QSTATE(q, RDC_QFULL); - cv_broadcast(&q->qfullcv); - } - - mutex_exit(QLOCK(q)); - - /* - * if (krdc->io_kstats) { - * mutex_enter(krdc->io_kstats->ks_lock); - * kstat_waitq_enter(KSTAT_IO_PTR(krdc->io_kstats)); - * mutex_exit(krdc->io_kstats->ks_lock); - * } - */ - - DTRACE_PROBE(rdc_diskq_rsrv); - - if (_rdc_rsrv_diskq(group)) { - cmn_err(CE_WARN, "!rdc_enqueue: %s reserve failed", - &urdc->disk_queue[0]); - rdc_fail_diskq(krdc, RDC_WAIT, RDC_DOLOG); - kmem_free(iohdr, sizeof (*iohdr)); - kmem_free(vec, sizeof (*vec) * numvecs); - mutex_enter(QLOCK(q)); - CLR_QSTATE(q, QTAILBUSY); - SET_LASTQTAIL(q, 0); - mutex_exit(QLOCK(q)); - if (aio->handle) - (void) nsc_free_buf(aio->handle); - return (-1); - } - -/* XXX for now do this, but later pre-alloc handle in enable/resume */ - - DTRACE_PROBE(rdc_diskq_alloc_start); - rc = nsc_alloc_buf(group->diskqfd, qtail, iofbas, - NSC_NOCACHE | NSC_WRITE | NSC_NODATA, &qbuf); - - DTRACE_PROBE(rdc_diskq_alloc_end); - - if (!RDC_SUCCESS(rc)) { - cmn_err(CE_WARN, "!disk queue %s alloc failed(%d) %" NSC_SZFMT, - &urdc->disk_queue[0], rc, iofbas); - rdc_fail_diskq(krdc, RDC_WAIT, RDC_DOLOG); - rc = ENOMEM; - goto fail; - } - /* move vec and write to queue */ - qbuf->sb_vec = &vec[0]; - -#ifdef DEBUG_WRITER_UBERNOISE - - cmn_err(CE_NOTE, "!about to write to queue, qbuf: %p, qhead: %d, " - "qtail: %d, len: %d contents: %c%c%c%c%c", - (void *) qbuf, qhead, qtail, iofbas, - qbuf->sb_vec[1].sv_addr[0], - qbuf->sb_vec[1].sv_addr[1], - qbuf->sb_vec[1].sv_addr[2], - qbuf->sb_vec[1].sv_addr[3], - qbuf->sb_vec[1].sv_addr[4]); - cmn_err(CE_CONT, "!qinfo: " QDISPLAYND(q)); - -#endif - - DTRACE_PROBE2(rdc_diskq_nswrite_start, int, qtail, nsc_size_t, iofbas); - rc = nsc_write(qbuf, qtail, iofbas, 0); - DTRACE_PROBE2(rdc_diskq_nswrite_end, int, qtail, nsc_size_t, iofbas); - - if (!RDC_SUCCESS(rc)) { - cmn_err(CE_WARN, "!disk queue %s write failed %d", - &urdc->disk_queue[0], rc); - rdc_fail_diskq(krdc, RDC_WAIT, RDC_DOLOG); - goto fail; - - } - - mutex_enter(QLOCK(q)); - - SET_LASTQTAIL(q, 0); - CLR_QSTATE(q, QTAILBUSY); - - mutex_exit(QLOCK(q)); - -fail: - - /* - * return what should be returned - * the aio is returned in _rdc_write after status is gathered. - */ - - if (qbuf) - qbuf->sb_vec = 0; - (void) nsc_free_buf(qbuf); - - if (aio->handle) - (void) nsc_free_buf(aio->handle); - - _rdc_rlse_diskq(group); - DTRACE_PROBE(rdc_diskq_rlse); - - /* free the iohdr and the vecs */ - - if (iohdr) - kmem_free(iohdr, sizeof (*iohdr)); - if (vec) - kmem_free(vec, sizeof (*vec) * numvecs); - - /* if no flusher running, start one */ - if ((!krdc->group->rdc_writer) && !IS_STATE(urdc, RDC_LOGGING)) - (void) rdc_writer(krdc->index); - - return (rc); -} - -/* - * place this on the pending list of io_hdr's out for flushing - */ -void -rdc_add_iohdr(io_hdr *header, rdc_group_t *group) -{ - disk_queue *q = NULL; -#ifdef DEBUG - io_hdr *p; -#endif - - q = &group->diskq; - - /* paranoia */ - header->dat.next = NULL; - - mutex_enter(QLOCK(q)); -#ifdef DEBUG /* AAAH! double flush!? */ - p = q->iohdrs; - while (p) { - if (p->dat.qpos == header->dat.qpos) { - cmn_err(CE_WARN, "!ADDING DUPLICATE HEADER %" NSC_SZFMT, - p->dat.qpos); - kmem_free(header, sizeof (*header)); - mutex_exit(QLOCK(q)); - return; - } - p = p->dat.next; - } -#endif - if (q->iohdrs == NULL) { - q->iohdrs = q->hdr_last = header; - q->hdrcnt = 1; - mutex_exit(QLOCK(q)); - return; - } - - q->hdr_last->dat.next = header; - q->hdr_last = header; - q->hdrcnt++; - mutex_exit(QLOCK(q)); - return; - -} - -/* - * mark an io header as flushed. If it is the qhead, - * then update the qpointers - * free the io_hdrs - * called after the bitmap is cleared by flusher - */ -void -rdc_clr_iohdr(rdc_k_info_t *krdc, nsc_size_t qpos) -{ - rdc_group_t *group = krdc->group; - disk_queue *q = NULL; - io_hdr *hp = NULL; - io_hdr *p = NULL; - int found = 0; - int cnt = 0; - -#ifndef NSC_MULTI_TERABYTE - ASSERT(qpos >= 0); /* assertion to validate change for 64bit */ - if (qpos < 0) /* not a diskq offset */ - return; -#endif - - q = &group->diskq; - mutex_enter(QLOCK(q)); - - hp = p = q->iohdrs; - - /* find outstanding io_hdr */ - while (hp) { - if (hp->dat.qpos == qpos) { - found++; - break; - } - cnt++; - p = hp; - hp = hp->dat.next; - } - - if (!found) { - if (RDC_BETWEEN(QHEAD(q), QNXTIO(q), qpos)) { -#ifdef DEBUG - cmn_err(CE_WARN, "!iohdr already cleared? " - "qpos %" NSC_SZFMT " cnt %d ", qpos, cnt); - cmn_err(CE_WARN, "!Qinfo: " QDISPLAY(q)); -#endif - mutex_exit(QLOCK(q)); - return; - } - mutex_exit(QLOCK(q)); - return; - } - - /* mark it as flushed */ - hp->dat.iostatus = RDC_IOHDR_DONE; - - /* - * if it is the head pointer, travel the list updating the queue - * pointers until the next unflushed is reached, freeing on the way. - */ - while (hp && (hp->dat.qpos == QHEAD(q)) && - (hp->dat.iostatus == RDC_IOHDR_DONE)) { -#ifdef DEBUG_FLUSHER_UBERNOISE - cmn_err(CE_NOTE, "!clr_iohdr info: magic %x type %d pos %d" - " qpos %d hpos %d len %d flag 0x%x iostatus %x setid %d", - hp->dat.magic, hp->dat.type, hp->dat.pos, hp->dat.qpos, - hp->dat.hpos, hp->dat.len, hp->dat.flag, - hp->dat.iostatus, hp->dat.setid); -#endif - if (hp->dat.flag & RDC_NULL_BUF) { - INC_QHEAD(q, FBA_LEN(sizeof (io_hdr))); - } else { - INC_QHEAD(q, FBA_LEN(sizeof (io_hdr)) + hp->dat.len); - DEC_QBLOCKS(q, hp->dat.len); - } - - DEC_QNITEMS(q, 1); - - if (QHEADSHLDWRAP(q)) { /* simple enough */ -#ifdef DEBUG_DISKQWRAP - cmn_err(CE_NOTE, "!wrapping Q head: " QDISPLAY(q)); -#endif - /*LINTED*/ - WRAPQHEAD(q); - } - - /* get rid of the iohdr */ - if (hp == q->iohdrs) { - q->iohdrs = hp->dat.next; - kmem_free(hp, sizeof (*hp)); - hp = q->iohdrs; - } else { - if (hp == q->hdr_last) - q->hdr_last = p; - p->dat.next = hp->dat.next; - kmem_free(hp, sizeof (*hp)); - hp = p->dat.next; - } - q->hdrcnt--; - } - - if (QEMPTY(q) && !IS_QSTATE(q, RDC_QFULL) && - !(IS_QSTATE(q, RDC_QDISABLEPEND))) { -#ifdef DEBUG_FLUSHER_UBERNOISE - rdc_u_info_t *urdc = &rdc_u_info[krdc->index]; - cmn_err(CE_NOTE, "!clr_iohdr: diskq %s empty, " - "resetting defaults", urdc->disk_queue); -#endif - - rdc_init_diskq_header(group, &q->disk_hdr); - SET_QNXTIO(q, QHEAD(q)); - } - - /* wakeup any blocked enqueue threads */ - cv_broadcast(&q->qfullcv); - mutex_exit(QLOCK(q)); -} - -/* - * put in whatever useful checks we can on the io header - */ -int -rdc_iohdr_ok(io_hdr *hdr) -{ - if (hdr->dat.magic != RDC_IOHDR_MAGIC) - goto bad; - return (1); -bad: - -#ifdef DEBUG - cmn_err(CE_WARN, "!Bad io header magic %x type %d pos %" NSC_SZFMT - " hpos %" NSC_SZFMT " qpos %" NSC_SZFMT " len %" NSC_SZFMT - " flag %d iostatus %d setid %d", hdr->dat.magic, - hdr->dat.type, hdr->dat.pos, hdr->dat.hpos, hdr->dat.qpos, - hdr->dat.len, hdr->dat.flag, hdr->dat.iostatus, hdr->dat.setid); -#else - cmn_err(CE_WARN, "!Bad io header retrieved"); -#endif - return (0); -} - -/* - * rdc_netqueue_insert() - * add an item to a netqueue. No locks necessary as it should only - * be used in a single threaded manor. If that changes, then - * a lock or assertion should be done here - */ -void -rdc_netqueue_insert(rdc_aio_t *aio, net_queue *q) -{ - rdc_k_info_t *krdc = &rdc_k_info[aio->index]; - - /* paranoid check for bit set */ - RDC_CHECK_BIT(krdc, aio->pos, aio->len); - - if (q->net_qhead == NULL) { - q->net_qhead = q->net_qtail = aio; - - } else { - q->net_qtail->next = aio; - q->net_qtail = aio; - } - q->blocks += aio->len; - q->nitems++; - - if (q->nitems > q->nitems_hwm) { - q->nitems_hwm = q->nitems; - } - if (q->blocks > q->blocks_hwm) { - q->nitems_hwm = q->blocks; - } -} - -/* - * rdc_fill_aio(aio, hdr) - * take the pertinent info from an io_hdr and stick it in - * an aio, including seq number, abuf. - */ -void -rdc_fill_aio(rdc_group_t *grp, rdc_aio_t *aio, io_hdr *hdr, nsc_buf_t *abuf) -{ - if (hdr->dat.flag & RDC_NULL_BUF) { - aio->handle = NULL; - } else { - aio->handle = abuf; - } - aio->qhandle = abuf; - aio->pos = hdr->dat.pos; - aio->qpos = hdr->dat.qpos; - aio->len = hdr->dat.len; - aio->flag = hdr->dat.flag; - if ((aio->index = rdc_setid2idx(hdr->dat.setid)) < 0) - return; - mutex_enter(&grp->diskq.disk_qlock); - if (grp->ra_queue.qfflags & RDC_QFILLSLEEP) { - mutex_exit(&grp->diskq.disk_qlock); - aio->seq = RDC_NOSEQ; - return; - } - if (abuf && aio->qhandle) { - abuf->sb_user++; - } - aio->seq = grp->seq++; - if (grp->seq < aio->seq) - grp->seq = RDC_NEWSEQ + 1; - mutex_exit(&grp->diskq.disk_qlock); - hdr->dat.iostatus = aio->seq; - -} - -#ifdef DEBUG -int maxaios_perbuf = 0; -int midaios_perbuf = 0; -int aveaios_perbuf = 0; -int totaios_perbuf = 0; -int buf2qcalls = 0; - -void -calc_perbuf(int items) -{ - if (totaios_perbuf < 0) { - maxaios_perbuf = 0; - midaios_perbuf = 0; - aveaios_perbuf = 0; - totaios_perbuf = 0; - buf2qcalls = 0; - } - - if (items > maxaios_perbuf) - maxaios_perbuf = items; - midaios_perbuf = maxaios_perbuf / 2; - totaios_perbuf += items; - aveaios_perbuf = totaios_perbuf / buf2qcalls; -} -#endif - -/* - * rdc_discard_tmpq() - * free up the passed temporary queue - * NOTE: no cv's or mutexes have been initialized - */ -void -rdc_discard_tmpq(net_queue *q) -{ - rdc_aio_t *aio; - - if (q == NULL) - return; - - while (q->net_qhead) { - aio = q->net_qhead; - q->net_qhead = q->net_qhead->next; - if (aio->qhandle) { - aio->qhandle->sb_user--; - if (aio->qhandle->sb_user == 0) { - rdc_fixlen(aio); - (void) nsc_free_buf(aio->qhandle); - } - } - kmem_free(aio, sizeof (*aio)); - q->nitems--; - } - kmem_free(q, sizeof (*q)); - -} - -/* - * rdc_diskq_buf2queue() - * take a chunk of the diskq, parse it and assemble - * a chain of rdc_aio_t's. - * updates QNXTIO() - */ -net_queue * -rdc_diskq_buf2queue(rdc_group_t *grp, nsc_buf_t **abuf, int index) -{ - rdc_aio_t *aio = NULL; - nsc_vec_t *vecp = NULL; - uchar_t *vaddr = NULL; - uchar_t *ioaddr = NULL; - net_queue *netq = NULL; - io_hdr *hdr = NULL; - nsc_buf_t *buf = *abuf; - rdc_u_info_t *urdc = &rdc_u_info[index]; - rdc_k_info_t *krdc = &rdc_k_info[index]; - disk_queue *dq = &grp->diskq; - net_queue *nq = &grp->ra_queue; - int nullbuf = 0; - nsc_off_t endobuf; - nsc_off_t bufoff; - int vlen; - nsc_off_t fpos; - long bufcnt = 0; - int nullblocks = 0; - int fail = 1; - - if (buf == NULL) - return (NULL); - - netq = kmem_zalloc(sizeof (*netq), KM_NOSLEEP); - if (netq == NULL) { - cmn_err(CE_WARN, "!SNDR: unable to allocate net queue"); - return (NULL); - } - - vecp = buf->sb_vec; - vlen = vecp->sv_len; - vaddr = vecp->sv_addr; - bufoff = buf->sb_pos; - endobuf = bufoff + buf->sb_len; - -#ifdef DEBUG_FLUSHER_UBERNOISE - cmn_err(CE_WARN, "!BUFFOFFENTER %d", bufoff); -#endif - /* CONSTCOND */ - while (1) { - if (IS_STATE(urdc, RDC_LOGGING) || - (nq->qfflags & RDC_QFILLSLEEP)) { - fail = 0; - goto fail; - } -#ifdef DEBUG_FLUSHER_UBERNOISE - cmn_err(CE_WARN, "!BUFFOFF_0 %d", bufoff); -#endif - - if ((vaddr == NULL) || (vlen == 0)) - break; - - if (vlen <= 0) { - vecp++; - vaddr = vecp->sv_addr; - vlen = vecp->sv_len; - if (vaddr == NULL) - break; - } - - /* get the iohdr information */ - - hdr = kmem_zalloc(sizeof (*hdr), KM_NOSLEEP); - if (hdr == NULL) { - cmn_err(CE_WARN, - "!SNDR: unable to alocate net queue header"); - goto fail; - } - - ioaddr = (uchar_t *)hdr; - - bcopy(vaddr, ioaddr, sizeof (*hdr)); - - if (!rdc_iohdr_ok(hdr)) { - cmn_err(CE_WARN, - "!unable to retrieve i/o data from queue %s " - "at offset %" NSC_SZFMT " bp: %" NSC_SZFMT " bl: %" - NSC_SZFMT, urdc->disk_queue, - bufoff, buf->sb_pos, buf->sb_len); -#ifdef DEBUG_DISKQ - cmn_err(CE_WARN, "!FAILING QUEUE state: %x", - rdc_get_vflags(urdc)); - cmn_err(CE_WARN, "!qinfo: " QDISPLAY(dq)); - cmn_err(CE_WARN, "!VADDR %p, IOADDR %p", vaddr, ioaddr); - cmn_err(CE_WARN, "!BUF %p", buf); -#endif - cmn_err(CE_WARN, "!qinfo: " QDISPLAYND(dq)); - - goto fail; - } - - nullbuf = hdr->dat.flag & RDC_NULL_BUF; - - bufoff += FBA_NUM(sizeof (*hdr)); - - /* out of buffer, set nxtio to re read this last hdr */ - if (!nullbuf && ((bufoff + hdr->dat.len) > endobuf)) { - break; - } - - bufcnt += FBA_NUM(sizeof (*hdr)); - - aio = kmem_zalloc(sizeof (*aio), KM_NOSLEEP); - if (aio == NULL) { - bufcnt -= FBA_NUM(sizeof (*hdr)); - cmn_err(CE_WARN, "!SNDR: net queue aio alloc failed"); - goto fail; - } - - if (!nullbuf) { - /* move to next iohdr in big buf */ - bufoff += hdr->dat.len; - bufcnt += hdr->dat.len; - } - - rdc_fill_aio(grp, aio, hdr, buf); - - if (aio->index < 0) { - cmn_err(CE_WARN, "!Set id %d not found or no longer " - "enabled, failing disk queue", hdr->dat.setid); - kmem_free(aio, sizeof (*aio)); - goto fail; - } - if (aio->seq == RDC_NOSEQ) { - kmem_free(aio, sizeof (*aio)); - fail = 0; - goto fail; - } - if (aio->handle == NULL) - nullblocks += aio->len; - - rdc_add_iohdr(hdr, grp); - hdr = NULL; /* don't accidentally free on break or fail */ - rdc_netqueue_insert(aio, netq); - - /* no more buffer, skip the below logic */ - if ((bufoff + FBA_NUM(sizeof (*hdr))) >= endobuf) { - break; - } - - fpos = bufoff - buf->sb_pos; - vecp = buf->sb_vec; - for (; fpos >= FBA_NUM(vecp->sv_len); vecp++) - fpos -= FBA_NUM(vecp->sv_len); - vlen = vecp->sv_len - FBA_SIZE(fpos); - vaddr = vecp->sv_addr + FBA_SIZE(fpos); - /* abuf = NULL; */ - - } - - /* free extraneous header */ - if (hdr) { - kmem_free(hdr, sizeof (*hdr)); - hdr = NULL; - } - - /* - * probably won't happen, but if we didn't goto fail, but - * we don't contain anything meaningful.. return NULL - * and let the flusher or the sleep/wakeup routines - * decide - */ - if (netq && netq->nitems == 0) { - kmem_free(netq, sizeof (*netq)); - return (NULL); - } - -#ifdef DEBUG - buf2qcalls++; - calc_perbuf(netq->nitems); -#endif - if (IS_STATE(urdc, RDC_LOGGING) || - nq->qfflags & RDC_QFILLSLEEP) { - fail = 0; - goto fail; - } - - mutex_enter(QLOCK(dq)); - INC_QNXTIO(dq, bufcnt); - mutex_exit(QLOCK(dq)); - - netq->net_qtail->orig_len = nullblocks; /* overload */ - - return (netq); - -fail: - - if (hdr) { - kmem_free(hdr, sizeof (*hdr)); - } - - if (netq) { - if (netq->nitems > 0) { - /* the never can happen case ... */ - if ((netq->nitems == 1) && - (netq->net_qhead->handle == NULL)) { - (void) nsc_free_buf(buf); - *abuf = NULL; - } - - } - rdc_discard_tmpq(netq); - } - - mutex_enter(QLOCK(dq)); - rdc_dump_iohdrs(dq); - mutex_exit(QLOCK(dq)); - - if (fail) { /* real failure, not just state change */ -#ifdef DEBUG - cmn_err(CE_WARN, "!rdc_diskq_buf2queue: failing disk queue %s", - urdc->disk_queue); -#endif - rdc_fail_diskq(krdc, RDC_NOWAIT, RDC_DOLOG); - } - - return (NULL); - -} - -/* - * rdc_diskq_unqueue - * remove one chunk from the diskq belonging to - * rdc_k_info[index] - * updates the head and tail pointers in the disk header - * but does not write. The header should be written on ack - * flusher should free whatever.. - */ -rdc_aio_t * -rdc_diskq_unqueue(int index) -{ - int rc, rc1, rc2; - nsc_off_t qhead; - int nullhandle = 0; - io_hdr *iohdr; - rdc_aio_t *aio = NULL; - nsc_buf_t *buf = NULL; - nsc_buf_t *abuf = NULL; - rdc_group_t *group = NULL; - disk_queue *q = NULL; - rdc_k_info_t *krdc = &rdc_k_info[index]; - rdc_u_info_t *urdc = &rdc_u_info[index]; - - group = krdc->group; - q = &group->diskq; - - if (group->diskqfd == NULL) /* we've been disabled */ - return (NULL); - - aio = kmem_zalloc(sizeof (*aio), KM_NOSLEEP); - if (!aio) { - return (NULL); - } - - iohdr = kmem_zalloc(sizeof (*iohdr), KM_NOSLEEP); - if (!iohdr) { - kmem_free(aio, sizeof (*aio)); - return (NULL); - } - - mutex_enter(QLOCK(q)); - rdc_set_qbusy(q); /* make sure no one disables the queue */ - mutex_exit(QLOCK(q)); - - DTRACE_PROBE(rdc_diskq_unq_rsrv); - - if (_rdc_rsrv_diskq(group)) { - cmn_err(CE_WARN, "!rdc_unqueue: %s reserve failed", - urdc->disk_queue); - goto fail; - } - - mutex_enter(QHEADLOCK(q)); - mutex_enter(QLOCK(q)); - - if (IS_STATE(urdc, RDC_DISKQ_FAILED) || IS_STATE(urdc, RDC_LOGGING)) { - rdc_clr_qbusy(q); - mutex_exit(QLOCK(q)); - mutex_exit(QHEADLOCK(q)); - kmem_free(aio, sizeof (*aio)); - kmem_free(iohdr, sizeof (*iohdr)); - return (NULL); - } - - if (QNXTIOSHLDWRAP(q)) { -#ifdef DEBUG_DISKQWRAP - cmn_err(CE_NOTE, "!wrapping Q nxtio: " QDISPLAY(q)); -#endif - /*LINTED*/ - WRAPQNXTIO(q); - } - - /* read the metainfo at q->nxt_io first */ - if (QNXTIO(q) == QTAIL(q)) { /* empty */ - - _rdc_rlse_diskq(group); - if (q->lastio->handle) - (void) nsc_free_buf(q->lastio->handle); - bzero(&(*q->lastio), sizeof (*q->lastio)); - - mutex_exit(QHEADLOCK(q)); - rdc_clr_qbusy(q); - mutex_exit(QLOCK(q)); - kmem_free(aio, sizeof (*aio)); - kmem_free(iohdr, sizeof (*iohdr)); - return (NULL); - } - - qhead = QNXTIO(q); - - /* - * have to drop the lock here, sigh. Cannot block incoming io - * we have to wait until after this read to find out how - * much to increment QNXTIO. Might as well grab the seq then too - */ - - while ((qhead == LASTQTAIL(q)) && (IS_QSTATE(q, QTAILBUSY))) { - mutex_exit(QLOCK(q)); -#ifdef DEBUG_DISKQ - cmn_err(CE_NOTE, "!Qtail busy delay lastqtail: %d", qhead); -#endif - delay(5); - mutex_enter(QLOCK(q)); - } - mutex_exit(QLOCK(q)); - - DTRACE_PROBE(rdc_diskq_iohdr_read_start); - - rc = rdc_ns_io(group->diskqfd, NSC_READ, qhead, - (uchar_t *)iohdr, FBA_SIZE(1)); - - DTRACE_PROBE(rdc_diskq_iohdr_read_end); - - if (!RDC_SUCCESS(rc) || !rdc_iohdr_ok(iohdr)) { - cmn_err(CE_WARN, "!unable to retrieve i/o data from queue %s" - " at offset %" NSC_SZFMT " rc %d", urdc->disk_queue, - qhead, rc); -#ifdef DEBUG_DISKQ - cmn_err(CE_WARN, "!qinfo: " QDISPLAY(q)); -#endif - mutex_exit(QHEADLOCK(q)); - goto fail; - } - -/* XXX process buffer here, creating rdc_aio_t's */ - - mutex_enter(QLOCK(q)); - /* update the next pointer */ - if (iohdr->dat.flag == RDC_NULL_BUF) { - INC_QNXTIO(q, FBA_LEN(sizeof (io_hdr))); - nullhandle = 1; - } else { - INC_QNXTIO(q, (FBA_LEN(sizeof (io_hdr)) + iohdr->dat.len)); - } - - aio->seq = group->seq++; - if (group->seq < aio->seq) - group->seq = RDC_NEWSEQ + 1; - - mutex_exit(QLOCK(q)); - mutex_exit(QHEADLOCK(q)); - -#ifdef DEBUG_FLUSHER_UBERNOISE - p = &iohdr->dat; - cmn_err(CE_NOTE, "!unqueued iohdr from %d pos: %d len: %d flag: %d " - "iostatus: %d setid: %d time: %d", qhead, p->pos, p->len, - p->flag, p->iostatus, p->setid, p->time); -#endif - - if (nullhandle) /* nothing to get from queue */ - goto nullbuf; - - /* now that we know how much to get (iohdr.dat.len), get it */ - DTRACE_PROBE(rdc_diskq_unq_allocbuf1_start); - - rc = nsc_alloc_buf(group->diskqfd, qhead + 1, iohdr->dat.len, - NSC_NOCACHE | NSC_READ, &buf); - - DTRACE_PROBE(rdc_diskq_unq_allocbuf1_end); - - /* and get somewhere to keep it for a bit */ - DTRACE_PROBE(rdc_diskq_unq_allocbuf2_start); - - rc1 = nsc_alloc_abuf(qhead + 1, iohdr->dat.len, 0, &abuf); - - DTRACE_PROBE(rdc_diskq_unq_allocbuf2_end); - - if (!RDC_SUCCESS(rc) || !RDC_SUCCESS(rc1)) { /* uh-oh */ - cmn_err(CE_WARN, "!disk queue %s read failure", - urdc->disk_queue); - goto fail; - } - - /* move it on over... */ - rc2 = nsc_copy(buf, abuf, qhead + 1, qhead + 1, iohdr->dat.len); - - if (!RDC_SUCCESS(rc2)) { -#ifdef DEBUG - cmn_err(CE_WARN, "!nsc_copy failed for diskq unqueue"); -#endif - goto fail; - } - - /* let go of the real buf, we've got the abuf */ - (void) nsc_free_buf(buf); - buf = NULL; - - aio->handle = abuf; - /* Hack in the original sb_pos */ - aio->handle->sb_pos = iohdr->dat.hpos; - - /* skip the RDC_HANDLE_LIMITS check */ - abuf->sb_user |= RDC_DISKQUE; - -nullbuf: - if (nullhandle) { - aio->handle = NULL; - } - - /* set up the rest of the aio values, seq set above ... */ - aio->pos = iohdr->dat.pos; - aio->qpos = iohdr->dat.qpos; - aio->len = iohdr->dat.len; - aio->flag = iohdr->dat.flag; - aio->index = rdc_setid2idx(iohdr->dat.setid); - if (aio->index < 0) { /* uh-oh */ -#ifdef DEBUG - cmn_err(CE_WARN, "!rdc_diskq_unqueue: index < 0"); -#endif - goto fail; - } - - -#ifdef DEBUG_FLUSHER_UBERNOISE_STAMP - h = &q->disk_hdr.h; - cmn_err(CE_NOTE, "!stamping diskq header:\n" - "magic: %x\nstate: %d\nhead_offset: %d\n" - "tail_offset: %d\ndisk_size: %d\nnitems: %d\nblocks: %d\n", - h->magic, h->state, h->head_offset, h->tail_offset, - h->disk_size, h->nitems, h->blocks); -#endif - - _rdc_rlse_diskq(group); - - mutex_enter(QLOCK(q)); - rdc_clr_qbusy(q); - mutex_exit(QLOCK(q)); - - DTRACE_PROBE(rdc_diskq_unq_rlse); - - iohdr->dat.iostatus = aio->seq; - rdc_add_iohdr(iohdr, group); - -#ifdef DEBUG_FLUSHER_UBERNOISE - if (!nullhandle) { - cmn_err(CE_NOTE, "!UNQUEUING, %p" - " contents: %c%c%c%c%c pos: %d len: %d", - (void *)aio->handle, - aio->handle->sb_vec[0].sv_addr[0], - aio->handle->sb_vec[0].sv_addr[1], - aio->handle->sb_vec[0].sv_addr[2], - aio->handle->sb_vec[0].sv_addr[3], - aio->handle->sb_vec[0].sv_addr[4], - aio->handle->sb_pos, aio->handle->sb_len); - } else { - cmn_err(CE_NOTE, "!UNQUEUING, NULL " QDISPLAY(q)); - } - cmn_err(CE_NOTE, "!qinfo: " QDISPLAY(q)); -#endif - - return (aio); - -fail: - if (aio) - kmem_free(aio, sizeof (*aio)); - if (iohdr) - kmem_free(iohdr, sizeof (*iohdr)); - if (buf) - (void) nsc_free_buf(buf); - if (abuf) - (void) nsc_free_buf(abuf); - - _rdc_rlse_diskq(group); -#ifdef DEBUG - cmn_err(CE_WARN, "!diskq_unqueue: failing diskq"); -#endif - mutex_enter(QLOCK(q)); - rdc_clr_qbusy(q); - mutex_exit(QLOCK(q)); - - rdc_fail_diskq(krdc, RDC_NOWAIT, RDC_DOLOG); - - return (NULL); -} - -int -rdc_diskq_inuse(rdc_set_t *set, char *diskq) -{ - rdc_u_info_t *urdc; - char *group; - int index; - - group = set->group_name; - - ASSERT(MUTEX_HELD(&rdc_conf_lock)); - - if ((rdc_lookup_bitmap(diskq) >= 0) || - (rdc_lookup_configured(diskq) >= 0)) { - return (1); - } - for (index = 0; index < rdc_max_sets; index++) { - urdc = &rdc_u_info[index]; - - if (!IS_ENABLED(urdc)) - continue; - - /* same diskq different group */ - if ((strcmp(urdc->disk_queue, diskq) == 0) && - (urdc->group_name[0] == '\0' || - strcmp(urdc->group_name, group))) { - return (1); - } - } - /* last, but not least, lets see if someone is getting really funky */ - if ((strcmp(set->disk_queue, set->primary.file) == 0) || - (strcmp(set->disk_queue, set->primary.bitmap) == 0)) { - return (1); - } - - return (0); - -} - -#ifdef DEBUG -int maxlen = 0; -int avelen = 0; -int totalen = 0; -int lencalls = 0; - -void -update_lenstats(int len) -{ - if (lencalls == 0) { - lencalls = 1; - avelen = 0; - maxlen = 0; - totalen = 0; - } - - if (len > maxlen) - maxlen = len; - totalen += len; - avelen = totalen / lencalls; -} -#endif - -/* - * rdc_calc_len() - * returns the size of the diskq that can be read for dequeuing - * always <= RDC_MAX_DISKQREAD - */ -int -rdc_calc_len(rdc_k_info_t *krdc, disk_queue *dq) -{ - nsc_size_t len = 0; - - ASSERT(MUTEX_HELD(QLOCK(dq))); - - /* ---H-----N-----T--- */ - if (QNXTIO(dq) < QTAIL(dq)) { - - len = min(RDC_MAX_DISKQREAD, QTAIL(dq) - QNXTIO(dq)); - - /* ---T-----H-----N--- */ - } else if (QNXTIO(dq) > QTAIL(dq)) { - if (QWRAP(dq)) { - len = min(RDC_MAX_DISKQREAD, QWRAP(dq) - QNXTIO(dq)); - } else { /* should never happen */ - len = min(RDC_MAX_DISKQREAD, QSIZE(dq) - QNXTIO(dq)); - } - } else if (QNXTIO(dq) == QTAIL(dq)) { - if (QWRAP(dq) && !IS_QSTATE(dq, QNXTIOWRAPD)) - len = min(RDC_MAX_DISKQREAD, QWRAP(dq) - QNXTIO(dq)); - } - - len = min(len, krdc->maxfbas); - -#ifdef DEBUG - lencalls++; - update_lenstats(len); -#endif - - return ((int)len); -} - -/* - * lie a little if we can, so we don't get tied up in - * _nsc_wait_dbuf() on the next read. sb_len MUST be - * restored before nsc_free_buf() however, or we will - * be looking at memory leak city.. - * so update the entire queue with the info as well - * and the one that ends up freeing it, can fix the len - * IMPORTANT: This assumes that we are not cached, in - * 3.2 caching was turned off for data volumes, if that - * changes, then this must too - */ -void -rdc_trim_buf(nsc_buf_t *buf, net_queue *q) -{ - rdc_aio_t *p; - int len; - - if (buf == NULL || q == NULL) - return; - - if (q && (buf->sb_len > - (q->blocks + q->nitems - q->net_qtail->orig_len))) { - len = buf->sb_len; - buf->sb_len = (q->blocks + q->nitems - q->net_qtail->orig_len); - } - - p = q->net_qhead; - do { - p->orig_len = len; - p = p->next; - - } while (p); - -} - -/* - * rdc_read_diskq_buf() - * read a large as possible chunk of the diskq into a nsc_buf_t - * and convert it to a net_queue of rdc_aio_t's to be appended - * to the group's netqueue - */ -net_queue * -rdc_read_diskq_buf(int index) -{ - nsc_buf_t *buf = NULL; - net_queue *tmpnq = NULL; - disk_queue *dq = NULL; - rdc_k_info_t *krdc = &rdc_k_info[index]; - rdc_u_info_t *urdc = &rdc_u_info[index]; - rdc_group_t *group = krdc->group; - net_queue *nq = &group->ra_queue; - int len = 0; - int rc; - int fail = 0; - int offset = 0; - - if (group == NULL || group->diskqfd == NULL) { - DTRACE_PROBE(rdc_read_diskq_buf_bail1); - return (NULL); - } - - dq = &group->diskq; - - mutex_enter(QLOCK(dq)); - rdc_set_qbusy(dq); /* prevent disables on the queue */ - mutex_exit(QLOCK(dq)); - - if (_rdc_rsrv_diskq(group)) { - cmn_err(CE_WARN, "!rdc_readdiskqbuf: %s reserve failed", - urdc->disk_queue); - mutex_enter(QLOCK(dq)); - rdc_clr_qbusy(dq); /* prevent disables on the queue */ - mutex_exit(QLOCK(dq)); - return (NULL); - } - - mutex_enter(QHEADLOCK(dq)); - mutex_enter(QLOCK(dq)); - - if (IS_STATE(urdc, RDC_DISKQ_FAILED) || - IS_STATE(urdc, RDC_LOGGING) || - (nq->qfflags & RDC_QFILLSLEEP)) { - mutex_exit(QLOCK(dq)); - mutex_exit(QHEADLOCK(dq)); - DTRACE_PROBE(rdc_read_diskq_buf_bail2); - goto done; - } - - /* - * real corner case here, we need to let the flusher wrap first. - * we've gotten too far ahead, so just delay and try again - */ - if (IS_QSTATE(dq, QNXTIOWRAPD) && AUXQWRAP(dq)) { - mutex_exit(QLOCK(dq)); - mutex_exit(QHEADLOCK(dq)); - goto done; - } - - if (QNXTIOSHLDWRAP(dq)) { -#ifdef DEBUG_DISKQWRAP - cmn_err(CE_NOTE, "!wrapping Q nxtio: " QDISPLAY(dq)); -#endif - /*LINTED*/ - WRAPQNXTIO(dq); - } - - /* read the metainfo at q->nxt_io first */ - if (!QNITEMS(dq)) { /* empty */ - - if (dq->lastio->handle) - (void) nsc_free_buf(dq->lastio->handle); - bzero(&(*dq->lastio), sizeof (*dq->lastio)); - mutex_exit(QLOCK(dq)); - mutex_exit(QHEADLOCK(dq)); - DTRACE_PROBE(rdc_read_diskq_buf_bail3); - goto done; - } - - - len = rdc_calc_len(krdc, dq); - - if ((len <= 0) || (IS_STATE(urdc, RDC_LOGGING)) || - (IS_STATE(urdc, RDC_DISKQ_FAILED)) || - (nq->qfflags & RDC_QFILLSLEEP)) { - mutex_exit(QLOCK(dq)); - mutex_exit(QHEADLOCK(dq)); - /* - * a write could be trying to get on the queue, or if - * the queue is really really small, a complete image - * of it could be on the net queue waiting for flush. - * the latter being a fairly stupid scenario and a gross - * misconfiguration.. but what the heck, why make the thread - * thrash around.. just pause a little here. - */ - if (len <= 0) - delay(50); - - DTRACE_PROBE3(rdc_read_diskq_buf_bail4, int, len, - int, rdc_get_vflags(urdc), int, nq->qfflags); - - goto done; - } - - DTRACE_PROBE2(rdc_calc_len, int, len, int, (int)QNXTIO(dq)); - -#ifdef DEBUG_FLUSHER_UBERNOISE - cmn_err(CE_WARN, "!CALC_LEN(%d) h:%d n%d t%d, w%d", - len, QHEAD(dq), QNXTIO(dq), QTAIL(dq), QWRAP(dq)); - cmn_err(CE_CONT, "!qinfo: " QDISPLAYND(dq)); -#endif - SET_QCOALBOUNDS(dq, QNXTIO(dq) + len); - - while ((LASTQTAIL(dq) > 0) && !QWRAP(dq) && - ((QNXTIO(dq) + len) >= LASTQTAIL(dq)) && - (IS_QSTATE(dq, QTAILBUSY))) { - mutex_exit(QLOCK(dq)); - -#ifdef DEBUG_FLUSHER_UBERNOISE - cmn_err(CE_NOTE, "!Qtail busy delay nxtio %d len %d " - "lastqtail: %d", QNXTIO(dq), len, LASTQTAIL(dq)); -#endif - delay(20); - mutex_enter(QLOCK(dq)); - } - - offset = QNXTIO(dq); - - /* - * one last check to see if we have gone logging, or should. - * we may have released the mutex above, so check again - */ - if ((IS_STATE(urdc, RDC_LOGGING)) || - (IS_STATE(urdc, RDC_DISKQ_FAILED)) || - (nq->qfflags & RDC_QFILLSLEEP)) { - mutex_exit(QLOCK(dq)); - mutex_exit(QHEADLOCK(dq)); - goto done; - } - - mutex_exit(QLOCK(dq)); - mutex_exit(QHEADLOCK(dq)); - - DTRACE_PROBE2(rdc_buf2q_preread, int, offset, int, len); - - rc = nsc_alloc_buf(group->diskqfd, offset, len, - NSC_NOCACHE | NSC_READ, &buf); - - if (!RDC_SUCCESS(rc)) { - cmn_err(CE_WARN, "!disk queue %s read failure pos %" NSC_SZFMT - " len %d", urdc->disk_queue, QNXTIO(dq), len); - fail++; - buf = NULL; - DTRACE_PROBE(rdc_read_diskq_buf_bail5); - goto done; - } - - DTRACE_PROBE2(rdc_buf2q_postread, int, offset, nsc_size_t, buf->sb_len); - - /* - * convert buf to a net_queue. buf2queue will - * update the QNXTIO pointer for us, based on - * the last readable queue item - */ - tmpnq = rdc_diskq_buf2queue(group, &buf, index); - -#ifdef DEBUG_FLUSHER_UBERNOISE - cmn_err(CE_NOTE, "!QBUF p: %d l: %d p+l: %d users: %d qblocks: %d ", - "qitems: %d WASTED: %d", buf->sb_pos, buf->sb_len, - buf->sb_pos+buf->sb_len, buf->sb_user, tmpnq?tmpnq->blocks:-1, - tmpnq?tmpnq->nitems:-1, - tmpnq?((buf->sb_len-tmpnq->nitems) - tmpnq->blocks):-1); -#endif - - DTRACE_PROBE3(rdc_buf2que_returned, net_queue *, tmpnq?tmpnq:0, - uint64_t, tmpnq?tmpnq->nitems:0, - uint_t, tmpnq?tmpnq->net_qhead->seq:0); -done: - - /* we don't need to retain the buf */ - if (tmpnq == NULL) - if (buf) { - (void) nsc_free_buf(buf); - buf = NULL; - } - - rdc_trim_buf(buf, tmpnq); - - mutex_enter(QLOCK(dq)); - rdc_clr_qbusy(dq); - mutex_exit(QLOCK(dq)); - - _rdc_rlse_diskq(group); - - if (fail) { - rdc_fail_diskq(krdc, RDC_NOWAIT, RDC_DOLOG); - tmpnq = NULL; - } - - return (tmpnq); -} - -/* - * rdc_dequeue() - * removes the head of the memory queue - */ -rdc_aio_t * -rdc_dequeue(rdc_k_info_t *krdc, int *rc) -{ - net_queue *q = &krdc->group->ra_queue; - disk_queue *dq = &krdc->group->diskq; - rdc_u_info_t *urdc = &rdc_u_info[krdc->index]; - rdc_aio_t *aio; - - *rc = 0; - - if (q == NULL) - return (NULL); - - mutex_enter(&q->net_qlock); - - aio = q->net_qhead; - - if (aio == NULL) { -#ifdef DEBUG - if (q->nitems != 0 || q->blocks != 0 || q->net_qtail != 0) { - cmn_err(CE_PANIC, - "rdc_dequeue(1): q %p, q blocks %" NSC_SZFMT - " , nitems %" NSC_SZFMT ", qhead %p qtail %p", - (void *) q, q->blocks, q->nitems, - (void *) aio, (void *) q->net_qtail); - } -#endif - - mutex_exit(&q->net_qlock); - - if ((!IS_STATE(urdc, RDC_LOGGING)) && - (!(q->qfflags & RDC_QFILLSLEEP)) && - (!IS_STATE(urdc, RDC_SYNCING)) && (QNITEMS(dq) > 0)) { - *rc = EAGAIN; - } - - goto done; - } - - /* aio remove from q */ - - q->net_qhead = aio->next; - aio->next = NULL; - - if (q->net_qtail == aio) - q->net_qtail = q->net_qhead; - - q->blocks -= aio->len; - q->nitems--; - -#ifdef DEBUG - if (q->net_qhead == NULL) { - if (q->nitems != 0 || q->blocks != 0 || q->net_qtail != 0) { - cmn_err(CE_PANIC, "rdc_dequeue(2): q %p, q blocks %" - NSC_SZFMT " nitems %" NSC_SZFMT - " , qhead %p qtail %p", - (void *) q, q->blocks, q->nitems, - (void *) q->net_qhead, (void *) q->net_qtail); - } - } -#endif - mutex_exit(&q->net_qlock); -done: - - mutex_enter(&q->net_qlock); - - if (rdc_qfill_shldwakeup(krdc)) - cv_broadcast(&q->qfcv); - - /* - * clear EAGAIN if - * logging or q filler thread is sleeping or stopping altogether - * or if q filler thread is dead already - * or if syncing, this will return a null aio, with no error code set - * telling the flusher to die - */ - if (*rc == EAGAIN) { - if (IS_STATE(urdc, RDC_LOGGING) || - (q->qfflags & (RDC_QFILLSLEEP | RDC_QFILLSTOP)) || - (IS_QSTATE(dq, (RDC_QDISABLEPEND | RDC_STOPPINGFLUSH))) || - (q->qfill_sleeping == RDC_QFILL_DEAD) || - (IS_STATE(urdc, RDC_SYNCING))) - *rc = 0; - } - - mutex_exit(&q->net_qlock); - - return (aio); - -} - -/* - * rdc_qfill_shldsleep() - * returns 1 if the qfilling code should cv_wait() 0 if not. - * reasons for going into cv_wait(); - * there is nothing in the diskq to flush to mem. - * the memory queue has gotten too big and needs more flushing attn. - */ -int -rdc_qfill_shldsleep(rdc_k_info_t *krdc) -{ - net_queue *nq = &krdc->group->ra_queue; - disk_queue *dq = &krdc->group->diskq; - rdc_u_info_t *urdc = &rdc_u_info[krdc->index]; - - ASSERT(MUTEX_HELD(&nq->net_qlock)); - - if (!RDC_IS_DISKQ(krdc->group)) - return (1); - - if (nq->qfflags & RDC_QFILLSLEEP) { -#ifdef DEBUG_DISKQ_NOISY - cmn_err(CE_NOTE, "!Sleeping diskq->memq flusher: QFILLSLEEP idx: %d", - krdc->index); -#endif - return (1); - } - - if (IS_STATE(urdc, RDC_LOGGING) || IS_STATE(urdc, RDC_SYNCING)) { -#ifdef DEBUG_DISKQ_NOISY - cmn_err(CE_NOTE, "!Sleeping diskq->memq flusher: Sync|Log (0x%x)" - " idx: %d", rdc_get_vflags(urdc), urdc->index); -#endif - return (1); - } - - mutex_enter(QLOCK(dq)); - if ((QNXTIO(dq) == QTAIL(dq)) && !IS_QSTATE(dq, RDC_QFULL)) { -#ifdef DEBUG_DISKQ_NOISY - cmn_err(CE_NOTE, "!Sleeping diskq->memq flusher: QEMPTY"); -#endif - mutex_exit(QLOCK(dq)); - return (1); - } - mutex_exit(QLOCK(dq)); - - if (nq->blocks >= RDC_MAX_QBLOCKS) { - nq->hwmhit = 1; - /* stuck flushers ? */ -#ifdef DEBUG_DISKQ_NOISY - cmn_err(CE_NOTE, "!Sleeping diskq->memq flusher: memq full:" - " seq: %d seqack %d", krdc->group->seq, - krdc->group->seqack); -#endif - return (1); - } - - return (0); -} - -/* - * rdc_join_netqueues(a, b) - * appends queue b to queue a updating all the queue info - * as it is assumed queue a is the important one, - * it's mutex must be held. no one can add to queue b - */ -void -rdc_join_netqueues(net_queue *q, net_queue *tmpq) -{ - ASSERT(MUTEX_HELD(&q->net_qlock)); - - if (q->net_qhead == NULL) { /* empty */ -#ifdef DEBUG - if (q->blocks != 0 || q->nitems != 0) { - cmn_err(CE_PANIC, "rdc filler: q %p, qhead 0, " - " q blocks %" NSC_SZFMT ", nitems %" NSC_SZFMT, - (void *) q, q->blocks, q->nitems); - } -#endif - q->net_qhead = tmpq->net_qhead; - q->net_qtail = tmpq->net_qtail; - q->nitems = tmpq->nitems; - q->blocks = tmpq->blocks; - } else { - q->net_qtail->next = tmpq->net_qhead; - q->net_qtail = tmpq->net_qtail; - q->nitems += tmpq->nitems; - q->blocks += tmpq->blocks; - } - - if (q->nitems > q->nitems_hwm) { - q->nitems_hwm = q->nitems; - } - - if (q->blocks > q->blocks_hwm) { - q->blocks_hwm = q->blocks; - } -} - -/* - * rdc_qfiller_thr() single thread that moves - * data from the diskq to a memory queue for - * the flusher to pick up. - */ -void -rdc_qfiller_thr(rdc_k_info_t *krdc) -{ - rdc_group_t *grp = krdc->group; - rdc_u_info_t *urdc = &rdc_u_info[krdc->index]; - net_queue *q = &grp->ra_queue; - net_queue *tmpq = NULL; - int index = krdc->index; - - q->qfill_sleeping = RDC_QFILL_AWAKE; - while (!(q->qfflags & RDC_QFILLSTOP)) { - if (!RDC_IS_DISKQ(grp) || - IS_STATE(urdc, RDC_LOGGING) || - IS_STATE(urdc, RDC_DISKQ_FAILED) || - (q->qfflags & RDC_QFILLSLEEP)) { - goto nulltmpq; - } - - DTRACE_PROBE(qfiller_top); - tmpq = rdc_read_diskq_buf(index); - - if (tmpq == NULL) - goto nulltmpq; - - if ((q->qfflags & RDC_QFILLSLEEP) || - IS_STATE(urdc, RDC_LOGGING)) { - rdc_discard_tmpq(tmpq); - goto nulltmpq; - } - - mutex_enter(&q->net_qlock); - - /* race with log, redundant yet paranoid */ - if ((q->qfflags & RDC_QFILLSLEEP) || - IS_STATE(urdc, RDC_LOGGING)) { - rdc_discard_tmpq(tmpq); - mutex_exit(&q->net_qlock); - goto nulltmpq; - } - - - rdc_join_netqueues(q, tmpq); - kmem_free(tmpq, sizeof (*tmpq)); - tmpq = NULL; - - mutex_exit(&q->net_qlock); -nulltmpq: - /* - * sleep for a while if we can. - * the enqueuing or flushing code will - * wake us if if necessary. - */ - mutex_enter(&q->net_qlock); - while (rdc_qfill_shldsleep(krdc)) { - q->qfill_sleeping = RDC_QFILL_ASLEEP; - DTRACE_PROBE(qfiller_sleep); - cv_wait(&q->qfcv, &q->net_qlock); - DTRACE_PROBE(qfiller_wakeup); - q->qfill_sleeping = RDC_QFILL_AWAKE; - if (q->qfflags & RDC_QFILLSTOP) { -#ifdef DEBUG_DISKQ - cmn_err(CE_NOTE, - "!rdc_qfiller_thr: recieved kill signal"); -#endif - mutex_exit(&q->net_qlock); - goto done; - } - } - mutex_exit(&q->net_qlock); - - DTRACE_PROBE(qfiller_bottom); - } -done: - DTRACE_PROBE(qfiller_done); - q->qfill_sleeping = RDC_QFILL_DEAD; /* the big sleep */ - -#ifdef DEBUG - cmn_err(CE_NOTE, "!rdc_qfiller_thr stopping"); -#endif - q->qfflags &= ~RDC_QFILLSTOP; - -} - -int -_rdc_add_diskq(int index, char *diskq) -{ - rdc_k_info_t *krdc, *kp; - rdc_u_info_t *urdc, *up; - rdc_group_t *group; - int rc; - - krdc = &rdc_k_info[index]; - urdc = &rdc_u_info[index]; - group = krdc->group; - - if (!diskq || urdc->disk_queue[0]) { /* how'd that happen? */ -#ifdef DEBUG - cmn_err(CE_WARN, "!NULL diskq in _rdc_add_diskq"); -#endif - rc = -1; - goto fail; - } - - /* if the enable fails, this is bzero'ed */ - (void) strncpy(urdc->disk_queue, diskq, NSC_MAXPATH); - group->flags &= ~RDC_MEMQUE; - group->flags |= RDC_DISKQUE; - -#ifdef DEBUG - cmn_err(CE_NOTE, "!adding diskq to group %s", urdc->group_name); -#endif - mutex_enter(&rdc_conf_lock); - rc = rdc_enable_diskq(krdc); - mutex_exit(&rdc_conf_lock); - - if (rc == RDC_EQNOADD) { - goto fail; - } - - RDC_ZERO_BITREF(krdc); - for (kp = krdc->group_next; kp != krdc; kp = kp->group_next) { - up = &rdc_u_info[kp->index]; - (void) strncpy(up->disk_queue, diskq, NSC_MAXPATH); - /* size lives in the diskq structure, already set by enable */ - RDC_ZERO_BITREF(kp); - } - -fail: - return (rc); - -} - -/* - * add a diskq to an existing set/group - */ -int -rdc_add_diskq(rdc_config_t *uparms, spcs_s_info_t kstatus) -{ - char *diskq; - int rc; - int index; - rdc_k_info_t *krdc, *this; - rdc_u_info_t *urdc; - rdc_group_t *group; - nsc_size_t vol_size = 0; - nsc_size_t req_size = 0; - - mutex_enter(&rdc_conf_lock); - index = rdc_lookup_byname(uparms->rdc_set); - mutex_exit(&rdc_conf_lock); - if (index < 0) { - spcs_s_add(kstatus, RDC_EALREADY, uparms->rdc_set->primary.file, - uparms->rdc_set->secondary.file); - rc = RDC_EALREADY; - goto failed; - } - urdc = &rdc_u_info[index]; - krdc = &rdc_k_info[index]; - this = &rdc_k_info[index]; - group = krdc->group; - diskq = uparms->rdc_set->disk_queue; - - if (!IS_ASYNC(urdc)) { - spcs_s_add(kstatus, RDC_EQWRONGMODE, urdc->primary.intf, - urdc->primary.file, urdc->secondary.intf, - urdc->secondary.file); - rc = RDC_EQNOQUEUE; - goto failed; - } - - do { - if (!IS_STATE(urdc, RDC_LOGGING)) { - spcs_s_add(kstatus, RDC_EQNOTLOGGING, - uparms->rdc_set->disk_queue); - rc = RDC_EQNOTLOGGING; - goto failed; - } - /* make sure that we have enough bitmap vol */ - req_size = RDC_BITMAP_FBA + FBA_LEN(krdc->bitmap_size); - req_size += FBA_LEN(krdc->bitmap_size * BITS_IN_BYTE); - - rc = _rdc_rsrv_devs(krdc, RDC_BMP, RDC_INTERNAL); - - if (!RDC_SUCCESS(rc)) { - cmn_err(CE_WARN, - "!rdc_open_diskq: Bitmap reserve failed"); - spcs_s_add(kstatus, RDC_EBITMAP, - urdc->primary.bitmap); - rc = RDC_EBITMAP; - goto failed; - } - - (void) nsc_partsize(krdc->bitmapfd, &vol_size); - - _rdc_rlse_devs(krdc, RDC_BMP); - - if (vol_size < req_size) { - spcs_s_add(kstatus, RDC_EBITMAP2SMALL, - urdc->primary.bitmap); - rc = RDC_EBITMAP2SMALL; - goto failed; - } - - krdc = krdc->group_next; - urdc = &rdc_u_info[krdc->index]; - - } while (krdc != this); - - if (urdc->disk_queue[0] != '\0') { - spcs_s_add(kstatus, RDC_EQALREADY, urdc->primary.intf, - urdc->primary.file, urdc->secondary.intf, - urdc->secondary.file); - rc = RDC_EQALREADY; - goto failed; - } - - if (uparms->options & RDC_OPT_SECONDARY) { /* how'd we get here? */ - spcs_s_add(kstatus, RDC_EQWRONGMODE); - rc = RDC_EQWRONGMODE; - goto failed; - } - - mutex_enter(&rdc_conf_lock); - if (rdc_diskq_inuse(uparms->rdc_set, uparms->rdc_set->disk_queue)) { - spcs_s_add(kstatus, RDC_EDISKQINUSE, - uparms->rdc_set->disk_queue); - rc = RDC_EDISKQINUSE; - mutex_exit(&rdc_conf_lock); - goto failed; - } - mutex_exit(&rdc_conf_lock); - - rdc_group_enter(krdc); - rc = _rdc_add_diskq(urdc->index, diskq); - if (rc < 0 || rc == RDC_EQNOADD) { - group->flags &= ~RDC_DISKQUE; - group->flags |= RDC_MEMQUE; - spcs_s_add(kstatus, RDC_EQNOADD, uparms->rdc_set->disk_queue); - rc = RDC_EQNOADD; - } - rdc_group_exit(krdc); -failed: - return (rc); -} - -int -_rdc_init_diskq(rdc_k_info_t *krdc) -{ - rdc_group_t *group = krdc->group; - disk_queue *q = &group->diskq; - - rdc_init_diskq_header(group, &group->diskq.disk_hdr); - SET_QNXTIO(q, QHEAD(q)); - - if (rdc_stamp_diskq(krdc, 0, RDC_NOLOG) < 0) - goto fail; - - return (0); -fail: - return (-1); -} - -/* - * inititalize the disk queue. This is a destructive - * operation that will not check for emptiness of the queue. - */ -int -rdc_init_diskq(rdc_config_t *uparms, spcs_s_info_t kstatus) -{ - int rc = 0; - int index; - rdc_k_info_t *krdc, *kp; - rdc_u_info_t *urdc, *up; - rdc_set_t *uset; - rdc_group_t *group; - disk_queue *qp; - - uset = uparms->rdc_set; - - mutex_enter(&rdc_conf_lock); - index = rdc_lookup_byname(uset); - mutex_exit(&rdc_conf_lock); - if (index < 0) { - spcs_s_add(kstatus, RDC_EALREADY, uset->primary.file, - uset->secondary.file); - rc = RDC_EALREADY; - goto fail; - } - - krdc = &rdc_k_info[index]; - urdc = &rdc_u_info[index]; - group = krdc->group; - qp = &group->diskq; - - if (!IS_STATE(urdc, RDC_SYNCING) && !IS_STATE(urdc, RDC_LOGGING)) { - spcs_s_add(kstatus, RDC_EQUEISREP, urdc->disk_queue); - rc = RDC_EQUEISREP; - goto fail; - } - - /* - * a couple of big "ifs" here. in the first implementation - * neither of these will be possible. This will come into - * play when we persist the queue across reboots - */ - if (!(uparms->options & RDC_OPT_FORCE_QINIT)) { - if (!QEMPTY(qp)) { - if (group->rdc_writer) { - spcs_s_add(kstatus, RDC_EQFLUSHING, - urdc->disk_queue); - rc = RDC_EQFLUSHING; - } else { - spcs_s_add(kstatus, RDC_EQNOTEMPTY, - urdc->disk_queue); - rc = RDC_EQNOTEMPTY; - } - goto fail; - } - } - - mutex_enter(QLOCK(qp)); - if (_rdc_init_diskq(krdc) < 0) { - mutex_exit(QLOCK(qp)); - goto fail; - } - rdc_dump_iohdrs(qp); - - rdc_group_enter(krdc); - - rdc_clr_flags(urdc, RDC_QUEUING); - for (kp = krdc->group_next; kp != krdc; kp = kp->group_next) { - up = &rdc_u_info[kp->index]; - rdc_clr_flags(up, RDC_QUEUING); - } - rdc_group_exit(krdc); - - mutex_exit(QLOCK(qp)); - - return (0); -fail: - /* generic queue failure */ - if (!rc) { - spcs_s_add(kstatus, RDC_EQINITFAIL, urdc->disk_queue); - rc = RDC_EQINITFAIL; - } - - return (rc); -} - -int -_rdc_kill_diskq(rdc_u_info_t *urdc) -{ - rdc_k_info_t *krdc = &rdc_k_info[urdc->index]; - rdc_group_t *group = krdc->group; - disk_queue *q = &group->diskq; - rdc_u_info_t *up; - rdc_k_info_t *p; - - group->flags |= RDC_DISKQ_KILL; -#ifdef DEBUG - cmn_err(CE_NOTE, "!disabling disk queue %s", urdc->disk_queue); -#endif - - mutex_enter(QLOCK(q)); - rdc_init_diskq_header(group, &q->disk_hdr); - rdc_dump_iohdrs(q); - - /* - * nsc_close the queue and zero out the queue name - */ - rdc_wait_qbusy(q); - rdc_close_diskq(group); - mutex_exit(QLOCK(q)); - SET_QSIZE(q, 0); - rdc_clr_flags(urdc, RDC_DISKQ_FAILED); - bzero(urdc->disk_queue, NSC_MAXPATH); - for (p = krdc->group_next; p != krdc; p = p->group_next) { - up = &rdc_u_info[p->index]; - rdc_clr_flags(up, RDC_DISKQ_FAILED); - bzero(up->disk_queue, NSC_MAXPATH); - } - -#ifdef DEBUG - cmn_err(CE_NOTE, "!_rdc_kill_diskq: enabling memory queue"); -#endif - group->flags &= ~(RDC_DISKQUE|RDC_DISKQ_KILL); - group->flags |= RDC_MEMQUE; - return (0); -} - -/* - * remove this diskq regardless of whether it is draining or not - * stops the flusher by invalidating the qdata (ie, instant empty) - * remove the disk qeueue from the group, leaving the group with a memory - * queue. - */ -int -rdc_kill_diskq(rdc_config_t *uparms, spcs_s_info_t kstatus) -{ - int rc; - int index; - rdc_u_info_t *urdc; - rdc_k_info_t *krdc; - rdc_set_t *rdc_set = uparms->rdc_set; - - mutex_enter(&rdc_conf_lock); - index = rdc_lookup_byname(uparms->rdc_set); - mutex_exit(&rdc_conf_lock); - - if (index < 0) { - spcs_s_add(kstatus, RDC_EALREADY, rdc_set->primary.file, - rdc_set->secondary.file); - rc = RDC_EALREADY; - goto failed; - } - - urdc = &rdc_u_info[index]; - krdc = &rdc_k_info[index]; - - if (!RDC_IS_DISKQ(krdc->group)) { - spcs_s_add(kstatus, RDC_EQNOQUEUE, rdc_set->primary.intf, - rdc_set->primary.file, rdc_set->secondary.intf, - rdc_set->secondary.file); - rc = RDC_EQNOQUEUE; - goto failed; - } - -/* - * if (!IS_STATE(urdc, RDC_LOGGING)) { - * spcs_s_add(kstatus, RDC_EQNOTLOGGING, - * uparms->rdc_set->disk_queue); - * rc = RDC_EQNOTLOGGING; - * goto failed; - * } - */ - rdc_unintercept_diskq(krdc->group); /* stop protecting queue */ - rdc_group_enter(krdc); /* to prevent further flushing */ - rc = _rdc_kill_diskq(urdc); - rdc_group_exit(krdc); - -failed: - return (rc); -} - -/* - * remove a diskq from a group. - * removal of a diskq from a set, or rather - * a set from a queue, is done by reconfigging out - * of the group. This removes the diskq from a whole - * group and replaces it with a memory based queue - */ -#define NUM_RETRIES 15 /* Number of retries to wait if no progress */ -int -rdc_rem_diskq(rdc_config_t *uparms, spcs_s_info_t kstatus) -{ - int index; - rdc_u_info_t *urdc; - rdc_k_info_t *krdc; - rdc_k_info_t *this; - volatile rdc_group_t *group; - volatile disk_queue *diskq; - int threads, counter; - long blocks; - - mutex_enter(&rdc_conf_lock); - index = rdc_lookup_byname(uparms->rdc_set); - mutex_exit(&rdc_conf_lock); - if (index < 0) { - spcs_s_add(kstatus, RDC_EALREADY, uparms->rdc_set->primary.file, - uparms->rdc_set->secondary.file); - return (RDC_EALREADY); - } - - urdc = &rdc_u_info[index]; - this = &rdc_k_info[index]; - krdc = &rdc_k_info[index]; - - do { - if (!IS_STATE(urdc, RDC_LOGGING)) { - spcs_s_add(kstatus, RDC_EQNOTLOGGING, - urdc->disk_queue); - return (RDC_EQNOTLOGGING); - } - krdc = krdc->group_next; - urdc = &rdc_u_info[krdc->index]; - - } while (krdc != this); - - /* - * If there is no group or diskq configured, we can leave now - */ - if (!(group = krdc->group) || !(diskq = &group->diskq)) - return (0); - - - /* - * Wait if not QEMPTY or threads still active - */ - counter = 0; - while (!QEMPTY(diskq) || group->rdc_thrnum) { - - /* - * Capture counters to determine if progress is being made - */ - blocks = QBLOCKS(diskq); - threads = group->rdc_thrnum; - - /* - * Wait - */ - delay(HZ); - - /* - * Has the group or disk queue gone away while delayed? - */ - if (!(group = krdc->group) || !(diskq = &group->diskq)) - return (0); - - /* - * Are we still seeing progress? - */ - if (blocks == QBLOCKS(diskq) && threads == group->rdc_thrnum) { - /* - * No progress see, decrement retry counter - */ - if (counter++ > NUM_RETRIES) { - /* - * No progress seen, increment retry counter - */ - int rc = group->rdc_thrnum ? - RDC_EQFLUSHING : RDC_EQNOTEMPTY; - spcs_s_add(kstatus, rc, urdc->disk_queue); - return (rc); - } - } else { - /* - * Reset counter, as we've made progress - */ - counter = 0; - } - } - - return (0); -} diff --git a/usr/src/uts/common/avs/ns/rdc/rdc_diskq.h b/usr/src/uts/common/avs/ns/rdc/rdc_diskq.h deleted file mode 100644 index 27b476d293..0000000000 --- a/usr/src/uts/common/avs/ns/rdc/rdc_diskq.h +++ /dev/null @@ -1,332 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _RDC_DISKQ_H -#define _RDC_DISKQ_H - -#ifdef __cplusplus -extern "C" { -#endif - -#ifdef _KERNEL - -#define RDC_DISKQ_HEADER_OFF 0 /* beginning of disk */ -#define RDC_DISKQ_DATA_OFF FBA_LEN(1024) /* beginning of queue */ - -typedef struct qentry { - int magic; - int type; /* special data ? io? bitmap? */ - nsc_off_t pos; /* position it will be in the rdc_aio_t */ - nsc_off_t hpos; /* starting pos of orig nsc_buf_t */ - nsc_off_t qpos; /* where this info is in the queue */ - nsc_size_t len; /* len */ - int flag; - int iostatus; - uint32_t setid; /* krdc */ - time_t time; - void *next; -} q_data; - -typedef union io_dat { - q_data dat; - char dummy[512]; -} io_hdr; - -#define RDC_IOHDR_MAGIC 0x494F4844 /* IOHD */ -#define RDC_IOHDR_DONE 0xDEADCAFE /* this q entry has been flushed */ -#define RDC_IOHDR_WAITING 0xBEEFCAFE /* this q entry is waiting for ack */ - -/* type */ -#define RDC_QUEUEIO 0x02 - -#define RDC_DISKQ_MAGIC 0x44534B51 -#define RDC_DISKQ_VER_ORIG 0x01 -#define RDC_DISKQ_VER_64BIT 0x02 - -#ifdef NSC_MULTI_TERABYTE -#define RDC_DISKQ_VERS RDC_DISKQ_VER_64BIT -#else -#define RDC_DISKQ_VERS RDC_DISKQ_VER_ORIG -#endif - -typedef struct diskqheader1 { - int magic; - int vers; - int state; - int head_offset; /* offset of meta-info of head (fbas) */ - int tail_offset; /* addr of next write (fbas) */ - int disk_size; /* allow growing ? (fbas) */ - long nitems; /* items */ - long blocks; /* fbas */ - int qwrap; /* where the tail wrapped */ - int auxqwrap; /* if the tail wraps again, before head wraps once */ - uint_t seq_last; /* last sequence before suspend */ - uint_t ack_last; /* last ack before suspend */ -} diskq_header1; - -typedef struct diskqheader2 { - int magic; - int vers; - int state; - uint64_t head_offset; /* offset of meta-info of head (fbas) */ - uint64_t tail_offset; /* addr of next write (fbas) */ - uint64_t disk_size; /* allow growing ? (fbas) */ - uint64_t nitems; /* items */ - uint64_t blocks; /* fbas */ - uint64_t qwrap; /* where the tail wrapped */ - uint64_t auxqwrap; /* if the tail wraps again, before head wraps once */ - uint_t seq_last; /* last sequence before suspend */ - uint_t ack_last; /* last ack before suspend */ -} diskq_header2; - -#ifdef NSC_MULTI_TERABYTE -typedef diskq_header2 diskq_header; -#ifdef _LP64 -#define RDC_DQFMT "lu" -#else -#define RDC_DQFMT "llu" -#endif -#else -typedef diskq_header1 diskq_header; -#define RDC_DQFMT "ld" -#endif -typedef union headr { - diskq_header h; - char dummy[512]; -} dqheader; - -/* flags for the state field in the header */ - -#define RDC_SHUTDOWN_OK 0x01 -#define RDC_SHUTDOWN_BAD 0x02 -#define QNXTIOWRAPD 0x04 -#define QHEADWRAPD 0x08 -#define QTAILBUSY 0x10 /* tell flusher not to grab, incomplete */ -#define RDC_QNOBLOCK 0x10000 /* can also be passed out by status */ -#define RDC_QBADRESUME 0x20 /* don't resume bit ref */ -#define RDC_QFULL 0x40 /* the queue is in a full delay loop */ -#define RDC_STOPPINGFLUSH 0x80 - -#define RDC_QFILLSTOP 0x01 /* diskq->memq flusher kill switch */ -#define RDC_QFILLSLEEP 0x02 /* explicit diskq->memq flusher sleep */ - -#define RDC_MAX_DISKQREAD 0x1000 /* max 2 mb q read */ - -typedef struct diskqueue { /* the incore info about the diskq */ - dqheader disk_hdr; /* info about the queue */ - long nitems_hwm; - long blocks_hwm; - long throttle_delay; - nsc_off_t last_tail; /* pos of the last tail write */ - volatile int inflbls; /* number of inflight blocks */ - volatile int inflitems; /* number of inflight blocks */ - - kmutex_t disk_qlock; /* protects all things in diskq */ - /* and all things in dqheader */ - - kmutex_t head_lock; - kcondvar_t busycv; - int busycnt; - nsc_off_t nxt_io; /* flushers head pointer */ - int hdrcnt; /* number of io_hdrs on list */ - nsc_off_t coalesc_bounds; /* don't coalesce below this offset */ - rdc_aio_t *lastio; /* cached copy of the last write on q */ - io_hdr *iohdrs; /* flushed, not ack'd on queue */ - io_hdr *hdr_last; /* tail of iohdr list */ - kcondvar_t qfullcv; /* block, queue is full */ -} disk_queue; - -/* diskq macros (gets) */ - -#define QHEAD(q) q->disk_hdr.h.head_offset -#define QNXTIO(q) q->nxt_io -#define QTAIL(q) q->disk_hdr.h.tail_offset -#define QNITEMS(q) q->disk_hdr.h.nitems -#define QBLOCKS(q) q->disk_hdr.h.blocks -#define QSTATE(q) q->disk_hdr.h.state -#define IS_QSTATE(q, s) (q->disk_hdr.h.state & s) -#define QSIZE(q) q->disk_hdr.h.disk_size -#define QMAGIC(q) q->disk_hdr.h.magic -#define QVERS(q) q->disk_hdr.h.vers -#define QSEQ(q) q->disk_hdr.h.seq_last -#define QACK(q) q->disk_hdr.h.ack_last -#define QEMPTY(q) ((QTAIL(q) == QHEAD(q))&&(!(QNITEMS(q)))) -#define QWRAP(q) q->disk_hdr.h.qwrap -#define AUXQWRAP(q) q->disk_hdr.h.auxqwrap -#define LASTQTAIL(q) q->last_tail -#define QCOALBOUNDS(q) q->coalesc_bounds - -/* diskq macros (sets) */ - -#define INC_QHEAD(q, n) q->disk_hdr.h.head_offset += n -#define INC_QNXTIO(q, n) q->nxt_io += n -#define DEC_QNXTIO(q, n) q->nxt_io -= n -#define DEC_QHEAD(q, n) q->disk_hdr.h.head_offset -= n -#define INC_QTAIL(q, n) q->disk_hdr.h.tail_offset += n -#define DEC_QTAIL(q, n) q->disk_hdr.h.tail_offset -= n -#define INC_QNITEMS(q, n) q->disk_hdr.h.nitems += n -#define DEC_QNITEMS(q, n) q->disk_hdr.h.nitems -= n -#define INC_QBLOCKS(q, n) q->disk_hdr.h.blocks += n -#define DEC_QBLOCKS(q, n) q->disk_hdr.h.blocks -= n - -#define SET_QMAGIC(q, n) q->disk_hdr.h.magic = n -#define SET_QSTATE(q, n) q->disk_hdr.h.state |= n -#define CLR_QSTATE(q, n) q->disk_hdr.h.state &= ~n -#define SET_QHEAD(q, n) q->disk_hdr.h.head_offset = n -#define SET_QNXTIO(q, n) q->nxt_io = n -#define SET_QHDRCNT(q, n) q->hdrcnt = n -#define SET_QTAIL(q, n) q->disk_hdr.h.tail_offset = n -#define SET_LASTQTAIL(q, n) q->last_tail = n -#define SET_LASTQWRITE(q, w) q->last_qwrite = w -#define SET_QSIZE(q, n) q->disk_hdr.h.disk_size = n -#define SET_QNITEMS(q, n) q->disk_hdr.h.nitems = n -#define SET_QBLOCKS(q, n) q->disk_hdr.h.blocks = n - -#define SET_QWRAP(q, n) q->disk_hdr.h.qwrap = n -#define CLR_QWRAP(q) q->disk_hdr.h.qwrap = 0 -#define SET_AUXQWRAP(q, n) q->disk_hdr.h.auxqwrap = n -#define CLR_AUXQWRAP(q) q->disk_hdr.h.auxqwrap = 0 -#define SET_QCOALBOUNDS(q, n) q->coalesc_bounds = n - -#define WRAPQTAIL(q) \ - do { \ - if (QWRAP(q)) { \ - SET_AUXQWRAP(q, QTAIL(q)); \ - } else { \ - SET_QWRAP(q, QTAIL(q)); \ - } \ - SET_QTAIL(q, RDC_DISKQ_DATA_OFF); \ - } while (0) - -#define DO_AUXQWRAP(q) \ - do { \ - SET_QWRAP(q, AUXQWRAP(q)); \ - SET_AUXQWRAP(q, 0); \ - } while (0) - -/* these can be wrapped by different threads, avoid the race */ -#define WRAPQHEAD(q) \ - do { \ - if (IS_QSTATE(q, QNXTIOWRAPD)) { \ - if (AUXQWRAP(q)) { \ - DO_AUXQWRAP(q); \ - } else { \ - SET_QWRAP(q, 0); \ - } \ - CLR_QSTATE(q, QNXTIOWRAPD); \ - } else { \ - SET_QSTATE(q, QHEADWRAPD); \ - } \ - SET_QHEAD(q, RDC_DISKQ_DATA_OFF); \ - } while (0) - -#define WRAPQNXTIO(q) \ - do { \ - if (IS_QSTATE(q, QHEADWRAPD)) { \ - if (AUXQWRAP(q)) { \ - DO_AUXQWRAP(q); \ - } else { \ - SET_QWRAP(q, 0); \ - } \ - CLR_QSTATE(q, QHEADWRAPD); \ - } else { \ - SET_QSTATE(q, QNXTIOWRAPD); \ - } \ - SET_QNXTIO(q, RDC_DISKQ_DATA_OFF); \ - } while (0) - -#define DQEND(q) (QWRAP(q)?QWRAP(q):QSIZE(q)) - -#define FITSONQ(q, n) \ - (((QBLOCKS(q)+QNITEMS(q)+RDC_DISKQ_DATA_OFF+n) >= \ - (uint64_t)DQEND(q))?0:1) - -/* diskq defines/macros (non-specific) */ - -#define RDC_NOLOG 0x00 -#define RDC_WAIT 0x01 -#define RDC_NOWAIT 0x02 -#define RDC_DOLOG 0x04 /* put the group into logging */ -#define RDC_NOFAIL 0x08 /* don't fail the queue, just init */ -#define RDC_GROUP_LOCKED 0x10 /* trust me, I have the group lock */ - -#define RDC_WRITTEN 0x10 /* data has been commited to queue */ -#define RDC_LAST 0x20 /* end of dequeued buffer, discard */ - -/* CSTYLED */ -#define RDC_BETWEEN(a,b,c) (a<b?((c>=a)&&(c<=b)):((a!=b)&&((c<b)||(c>=a)))) -/* CSTYLED */ - -#define QHEADSHLDWRAP(q) (QWRAP(q) && (QHEAD(q) >= QWRAP(q))) -#define QNXTIOSHLDWRAP(q) (QWRAP(q) && (QNXTIO(q) >= QWRAP(q))) -#define QTAILSHLDWRAP(q, size) (QTAIL(q) + size > QSIZE(q)) -#define QCOALESCEOK(q, dec) ((q->lastio->iostatus & RDC_WRITTEN) && \ - ((QTAIL(q) > QNXTIO(q)) ? \ - (((QTAIL(q) - dec) > QNXTIO(q)) && ((QTAIL(q) - dec) > \ - QCOALBOUNDS(q))):\ - (QNXTIOSHLDWRAP(q) && QTAIL(q) > RDC_DISKQ_DATA_OFF))) - -#define QLOCK(q) &q->disk_qlock -#define QTAILLOCK(q) &q->tail_lock -#define QHEADLOCK(q) &q->head_lock - -#define QDISPLAY(q) "qmagic: %x qvers: %d qstate: %x qhead: %" \ - NSC_SZFMT " qnxtio: %" NSC_SZFMT " qtail: %" NSC_SZFMT " qtaillast: %" \ - NSC_SZFMT " qsize: %" NSC_SZFMT " qnitems: %" RDC_DQFMT \ - " qblocks: %" RDC_DQFMT " coalbounds %" NSC_SZFMT, QMAGIC(q), \ - QVERS(q), QSTATE(q), QHEAD(q), QNXTIO(q), QTAIL(q), LASTQTAIL(q), \ - QSIZE(q), QNITEMS(q), QBLOCKS(q), QCOALBOUNDS(q) - -#define QDISPLAYND(q) "m: %x v: %d s: %d h: %" NSC_SZFMT " n: %" \ - NSC_SZFMT " t: %" NSC_SZFMT " l: %" NSC_SZFMT " z: %" NSC_SZFMT \ - " i: %" RDC_DQFMT " b: %" RDC_DQFMT " w: %" NSC_SZFMT \ - " a: %" NSC_SZFMT, \ - QMAGIC(q), QVERS(q), QSTATE(q), QHEAD(q), \ - QNXTIO(q), QTAIL(q), LASTQTAIL(q), QSIZE(q), QNITEMS(q), \ - QBLOCKS(q), QWRAP(q), AUXQWRAP(q) - -/* Disk queue flusher state */ -#define RDC_QFILL_AWAKE (0) -#define RDC_QFILL_ASLEEP (1) -#define RDC_QFILL_DEAD (-1) - -/* functions */ - -int rdc_add_diskq(rdc_config_t *uparms, spcs_s_info_t kstatus); -int rdc_rem_diskq(rdc_config_t *uparms, spcs_s_info_t kstatus); -int rdc_kill_diskq(rdc_config_t *uparms, spcs_s_info_t kstatus); -int rdc_init_diskq(rdc_config_t *uparms, spcs_s_info_t kstatus); -int rdc_lookup_diskq(char *path); -int rdc_diskq_inuse(rdc_set_t *set, char *diskq); -void rdc_dump_iohdrs(disk_queue *q); -extern void rdc_fixlen(rdc_aio_t *aio); - -#endif /* _KERNEL */ - -#ifdef __cplusplus -} -#endif - -#endif /* _RDC_DISKQ_H */ diff --git a/usr/src/uts/common/avs/ns/rdc/rdc_health.c b/usr/src/uts/common/avs/ns/rdc/rdc_health.c deleted file mode 100644 index 16bc34242d..0000000000 --- a/usr/src/uts/common/avs/ns/rdc/rdc_health.c +++ /dev/null @@ -1,800 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - * Copyright (c) 2016 by Delphix. All rights reserved. - */ - -/* - * RDC interface health monitoring code. - */ - -#include <sys/types.h> -#include <sys/ksynch.h> -#include <sys/errno.h> -#include <sys/debug.h> -#include <sys/cmn_err.h> -#include <sys/kmem.h> - -#include <sys/errno.h> - -#ifdef _SunOS_2_6 -/* - * on 2.6 both dki_lock.h and rpc/types.h define bool_t so we - * define enum_t here as it is all we need from rpc/types.h - * anyway and make it look like we included it. Yuck. - */ -#define _RPC_TYPES_H -typedef int enum_t; -#else -#ifndef DS_DDICT -#include <rpc/types.h> -#endif -#endif /* _SunOS_2_6 */ - -#include <sys/ddi.h> -#include <sys/nsc_thread.h> -#ifdef DS_DDICT -#include <sys/nsctl/contract.h> -#endif -#include <sys/nsctl/nsctl.h> - -#include <sys/unistat/spcs_s.h> -#include <sys/unistat/spcs_s_k.h> -#include <sys/unistat/spcs_errors.h> - -#include "rdc_io.h" -#include "rdc_clnt.h" - - -/* - * Forward declarations. - */ - -static void rdc_update_health(rdc_if_t *); - -/* - * Global data. - */ - -/* - * These structures are added when a new host name is introduced to the - * kernel. They never disappear (but that won't waste much space at all). - */ -typedef struct rdc_link_down { - char host[MAX_RDC_HOST_SIZE]; /* The host name of this link */ - int waiting; /* A user is waiting to be woken up */ - int link_down; /* The current state of the link */ - struct rdc_link_down *next; /* Chain */ - kcondvar_t syncd_cv; /* Syncd wakeup */ - kmutex_t syncd_mutex; /* Lock for syncd_cv */ -} rdc_link_down_t; -static rdc_link_down_t *rdc_link_down = NULL; - -int rdc_health_thres = RDC_HEALTH_THRESHOLD; -rdc_if_t *rdc_if_top; - - -/* - * IPv6 addresses are represented as 16bit hexadecimal integers - * separated by colons. Contiguous runs of zeros can be abbreviated by - * double colons: - * FF02:0:0:0:0:1:200E:8C6C - * | - * v - * FF02::1:200E:8C6C - */ -void -rdc_if_ipv6(const uint16_t *addr, char *buf) -{ - const int end = 8; /* 8 shorts, 128 bits in an IPv6 address */ - int i; - - for (i = 0; i < end; i++) { - if (i > 0) - (void) sprintf(buf, "%s:", buf); - - if (addr[i] != 0 || i == 0 || i == (end - 1)) { - /* first, last, or non-zero value */ - (void) sprintf(buf, "%s%x", buf, (int)addr[i]); - } else { - if ((i + 1) < end && addr[i + 1] != 0) { - /* single zero */ - (void) sprintf(buf, "%s%x", buf, (int)addr[i]); - } else { - /* skip contiguous zeros */ - while ((i + 1) < end && addr[i + 1] == 0) - i++; - } - } - } -} - -static void -rdc_if_xxx(rdc_if_t *ip, char *updown) -{ - if (strcmp("inet6", ip->srv->ri_knconf->knc_protofmly) == 0) { - uint16_t *this = (uint16_t *)ip->ifaddr.buf; - uint16_t *other = (uint16_t *)ip->r_ifaddr.buf; - char this_str[256], other_str[256]; - - bzero(this_str, sizeof (this_str)); - bzero(other_str, sizeof (other_str)); - rdc_if_ipv6(&this[4], this_str); - rdc_if_ipv6(&other[4], other_str); - - cmn_err(CE_NOTE, "!SNDR: Interface %s <==> %s : %s", - this_str, other_str, updown); - } else { - uchar_t *this = (uchar_t *)ip->ifaddr.buf; - uchar_t *other = (uchar_t *)ip->r_ifaddr.buf; - - cmn_err(CE_NOTE, - "!SNDR: Interface %d.%d.%d.%d <==> %d.%d.%d.%d : %s", - (int)this[4], (int)this[5], (int)this[6], (int)this[7], - (int)other[4], (int)other[5], (int)other[6], (int)other[7], - updown); - } -} - - -static void -rdc_if_down(rdc_if_t *ip) -{ - rdc_if_xxx(ip, "Down"); -} - - -static void -rdc_if_up(rdc_if_t *ip) -{ - rdc_if_xxx(ip, "Up"); -} - - -/* - * Health monitor for a single interface. - * - * The secondary sends ping RPCs to the primary. - * The primary just stores the results and updates its structures. - */ -static void -rdc_health_thread(void *arg) -{ - rdc_if_t *ip = (rdc_if_t *)arg; - struct rdc_ping ping; - struct rdc_ping6 ping6; - struct timeval t; - int down = 1; - int ret, err; - int sec = 0; - char ifaddr[RDC_MAXADDR]; - char r_ifaddr[RDC_MAXADDR]; - uint16_t *sp; - - bcopy(ip->ifaddr.buf, ifaddr, ip->ifaddr.len); - sp = (uint16_t *)ifaddr; - *sp = htons(*sp); - bcopy(ip->r_ifaddr.buf, r_ifaddr, ip->r_ifaddr.len); - sp = (uint16_t *)r_ifaddr; - *sp = htons(*sp); - - while ((ip->exiting != 1) && (net_exit != ATM_EXIT)) { - delay(HZ); - - /* setup RPC timeout */ - - t.tv_sec = rdc_rpc_tmout; - t.tv_usec = 0; - - if (ip->issecondary && !ip->no_ping) { - if (ip->rpc_version < RDC_VERSION7) { - bcopy(ip->r_ifaddr.buf, ping6.p_ifaddr, - RDC_MAXADDR); - /* primary ifaddr */ - bcopy(ip->ifaddr.buf, ping6.s_ifaddr, - RDC_MAXADDR); - /* secondary ifaddr */ - err = rdc_clnt_call_any(ip->srv, ip, - RDCPROC_PING4, xdr_rdc_ping6, - (char *)&ping6, xdr_int, (char *)&ret, &t); - } else { - ping.p_ifaddr.buf = r_ifaddr; - ping.p_ifaddr.len = ip->r_ifaddr.len; - ping.p_ifaddr.maxlen = ip->r_ifaddr.len; - ping.s_ifaddr.buf = ifaddr; - ping.s_ifaddr.len = ip->ifaddr.len; - ping.s_ifaddr.maxlen = ip->ifaddr.len; - err = rdc_clnt_call_any(ip->srv, ip, - RDCPROC_PING4, xdr_rdc_ping, (char *)&ping, - xdr_int, (char *)&ret, &t); - } - - - if (err || ret) { - /* RPC failed - link is down */ - if (!down && !ip->isprimary) { - /* - * don't print messages if also - * a primary - the primary will - * take care of it. - */ - rdc_if_down(ip); - down = 1; - } - rdc_dump_alloc_bufs(ip); - ip->no_ping = 1; - - /* - * Start back at the max possible version - * since the remote server could come back - * on a different protocol version. - */ - mutex_enter(&rdc_ping_lock); - ip->rpc_version = RDC_VERS_MAX; - mutex_exit(&rdc_ping_lock); - } else { - if (down && !ip->isprimary) { - /* - * was failed, but now ok - * - * don't print messages if also - * a primary - the primary will - * take care of it. - */ - rdc_if_up(ip); - down = 0; - } - } - } - if (!ip->isprimary && down && ++sec == 5) { - sec = 0; - rdc_dump_alloc_bufs(ip); - } - - if (ip->isprimary) - rdc_update_health(ip); - } - - /* signal that this thread is done */ - ip->exiting = 2; -} - - -int -rdc_isactive_if(struct netbuf *addr, struct netbuf *r_addr) -{ - rdc_if_t *ip; - int rc = 0; - - /* search for existing interface structure */ - - mutex_enter(&rdc_ping_lock); - for (ip = rdc_if_top; ip; ip = ip->next) { - if (ip->exiting != 0) - continue; - if (((bcmp(ip->ifaddr.buf, addr->buf, addr->len) == 0) && - (bcmp(ip->r_ifaddr.buf, r_addr->buf, r_addr->len) == 0)) || - ((bcmp(ip->r_ifaddr.buf, addr->buf, addr->len) == 0) && - (bcmp(ip->ifaddr.buf, r_addr->buf, r_addr->len) == 0))) { - /* found matching interface structure */ - if (ip->isprimary && !ip->if_down) { - rc = 1; - } else if (ip->issecondary && !ip->no_ping) { - rc = 1; - } - break; - } - } - mutex_exit(&rdc_ping_lock); - return (rc); -} - -/* - * Set the rdc rpc version of the rdc_if_t. - * - * Called from incoming rpc calls which start before - * the health service becomes established. - */ -void -rdc_set_if_vers(rdc_u_info_t *urdc, rpcvers_t vers) -{ - rdc_if_t *ip; - struct netbuf *addr, *r_addr; - - if (rdc_get_vflags(urdc) & RDC_PRIMARY) { - addr = &(urdc->primary.addr); - r_addr = &(urdc->secondary.addr); - } else { - addr = &(urdc->secondary.addr); - r_addr = &(urdc->primary.addr); - } - - /* search for existing interface structure */ - - mutex_enter(&rdc_ping_lock); - for (ip = rdc_if_top; ip; ip = ip->next) { - if (ip->exiting != 0) - continue; - if (((bcmp(ip->ifaddr.buf, addr->buf, addr->len) == 0) && - (bcmp(ip->r_ifaddr.buf, r_addr->buf, r_addr->len) == 0)) || - ((bcmp(ip->r_ifaddr.buf, addr->buf, addr->len) == 0) && - (bcmp(ip->ifaddr.buf, r_addr->buf, r_addr->len) == 0))) { - /* found matching interface structure */ - ip->rpc_version = vers; -#ifdef DEBUG - cmn_err(CE_NOTE, "!rdc intf %p rpc version set to %u", - (void *)ip, vers); -#endif - break; - } - } - mutex_exit(&rdc_ping_lock); -} - -/* - * Free all the rdc_link_down structures (only at module unload time) - */ -void -rdc_link_down_free() -{ - rdc_link_down_t *p; - rdc_link_down_t *q; - - if (rdc_link_down == NULL) - return; - - for (p = rdc_link_down->next; p != rdc_link_down; ) { - q = p; - p = p->next; - kmem_free(q, sizeof (*q)); - } - kmem_free(rdc_link_down, sizeof (*q)); - rdc_link_down = NULL; -} - - -/* - * Look up the supplied hostname in the rdc_link_down chain. Add a new - * entry if it isn't found. Return a pointer to the new or found entry. - */ -static rdc_link_down_t * -rdc_lookup_host(char *host) -{ - rdc_link_down_t *p; - - mutex_enter(&rdc_ping_lock); - - if (rdc_link_down == NULL) { - rdc_link_down = kmem_zalloc(sizeof (*rdc_link_down), KM_SLEEP); - rdc_link_down->next = rdc_link_down; - } - - for (p = rdc_link_down->next; p != rdc_link_down; p = p->next) { - if (strcmp(host, p->host) == 0) { - /* Match */ - mutex_exit(&rdc_ping_lock); - return (p); - } - } - - /* No match, must create a new entry */ - - p = kmem_zalloc(sizeof (*p), KM_SLEEP); - p->link_down = 1; - p->next = rdc_link_down->next; - rdc_link_down->next = p; - (void) strncpy(p->host, host, MAX_RDC_HOST_SIZE); - mutex_init(&p->syncd_mutex, NULL, MUTEX_DRIVER, NULL); - cv_init(&p->syncd_cv, NULL, CV_DRIVER, NULL); - - mutex_exit(&rdc_ping_lock); - return (p); -} - - -/* - * Handle the RDC_LINK_DOWN ioctl. - * The user specifies which host they're interested in. - * This function is woken up when the link to that host goes down. - */ - -/* ARGSUSED3 */ -int -_rdc_link_down(void *arg, int mode, spcs_s_info_t kstatus, int *rvp) -{ - char host[MAX_RDC_HOST_SIZE]; - rdc_link_down_t *syncdp; - clock_t timeout = RDC_SYNC_EVENT_TIMEOUT * 2; /* 2 min */ - int rc = 0; - - if (ddi_copyin(arg, host, MAX_RDC_HOST_SIZE, mode)) - return (EFAULT); - - - syncdp = rdc_lookup_host(host); - - mutex_enter(&syncdp->syncd_mutex); - if (!syncdp->link_down) { - syncdp->waiting = 1; - if (cv_timedwait_sig(&syncdp->syncd_cv, &syncdp->syncd_mutex, - nsc_lbolt() + timeout) == 0) { - /* Woken by a signal, not a link down event */ - syncdp->waiting = 0; - rc = EAGAIN; - spcs_s_add(kstatus, rc); - } - - } - mutex_exit(&syncdp->syncd_mutex); - - return (rc); -} - - -/* - * Add an RDC set to an interface - * - * If the interface is new, add it to the list of interfaces. - */ -rdc_if_t * -rdc_add_to_if(rdc_srv_t *svp, struct netbuf *addr, struct netbuf *r_addr, - int primary) -{ - rdc_if_t *new, *ip; - - if ((addr->buf == NULL) || (r_addr->buf == NULL)) - return (NULL); - - /* setup a new interface structure */ - new = (rdc_if_t *)kmem_zalloc(sizeof (*new), KM_SLEEP); - if (!new) - return (NULL); - - dup_rdc_netbuf(addr, &new->ifaddr); - dup_rdc_netbuf(r_addr, &new->r_ifaddr); - new->rpc_version = RDC_VERS_MAX; - new->srv = rdc_create_svinfo(svp->ri_hostname, &svp->ri_addr, - svp->ri_knconf); - new->old_pulse = -1; - new->new_pulse = 0; - - if (!new->srv) { - free_rdc_netbuf(&new->r_ifaddr); - free_rdc_netbuf(&new->ifaddr); - kmem_free(new, sizeof (*new)); - return (NULL); - } - - /* search for existing interface structure */ - - mutex_enter(&rdc_ping_lock); - - for (ip = rdc_if_top; ip; ip = ip->next) { - if ((bcmp(ip->ifaddr.buf, addr->buf, addr->len) == 0) && - (bcmp(ip->r_ifaddr.buf, r_addr->buf, r_addr->len) == 0) && - ip->exiting == 0) { - /* found matching interface structure */ - break; - } - } - - if (!ip) { - /* add new into the chain */ - - new->next = rdc_if_top; - rdc_if_top = new; - ip = new; - - /* start daemon */ - - ip->last = nsc_time(); - ip->deadness = 1; - ip->if_down = 1; - - if (nsc_create_process(rdc_health_thread, ip, TRUE)) { - mutex_exit(&rdc_ping_lock); - return (NULL); - } - } - - /* mark usage type */ - - if (primary) { - ip->isprimary = 1; - } else { - ip->issecondary = 1; - ip->no_ping = 0; - } - - mutex_exit(&rdc_ping_lock); - - /* throw away new if it was not used */ - - if (ip != new) { - free_rdc_netbuf(&new->r_ifaddr); - free_rdc_netbuf(&new->ifaddr); - rdc_destroy_svinfo(new->srv); - kmem_free(new, sizeof (*new)); - } - - return (ip); -} - - -/* - * Update an interface following the removal of an RDC set. - * - * If there are no more RDC sets using the interface, delete it from - * the list of interfaces. - * - * Either clear krdc->intf, or ensure !IS_CONFIGURED(krdc) before calling this. - */ -void -rdc_remove_from_if(rdc_if_t *ip) -{ - rdc_k_info_t *krdc; - rdc_u_info_t *urdc; - rdc_if_t **ipp; - int pfound = 0; - int sfound = 0; - int delete = 1; - int index; - - mutex_enter(&rdc_ping_lock); - - /* - * search for RDC sets using this interface and update - * the isprimary and issecondary flags. - */ - - for (index = 0; index < rdc_max_sets; index++) { - krdc = &rdc_k_info[index]; - urdc = &rdc_u_info[index]; - if (IS_CONFIGURED(krdc) && krdc->intf == ip) { - delete = 0; - - if (rdc_get_vflags(urdc) & RDC_PRIMARY) { - pfound = 1; - } else { - sfound = 1; - } - - if (pfound && sfound) - break; - } - } - - ip->isprimary = pfound; - ip->issecondary = sfound; - - if (!delete || ip->exiting > 0) { - mutex_exit(&rdc_ping_lock); - return; - } - - /* mark and wait for daemon to exit */ - - ip->exiting = 1; - - mutex_exit(&rdc_ping_lock); - - while (ip->exiting == 1) - delay(drv_usectohz(10)); - - mutex_enter(&rdc_ping_lock); - - ASSERT(ip->exiting == 2); - - /* remove from chain */ - - for (ipp = &rdc_if_top; *ipp; ipp = &((*ipp)->next)) { - if (*ipp == ip) { - *ipp = ip->next; - break; - } - } - - mutex_exit(&rdc_ping_lock); - - /* free unused interface structure */ - - free_rdc_netbuf(&ip->r_ifaddr); - free_rdc_netbuf(&ip->ifaddr); - rdc_destroy_svinfo(ip->srv); - kmem_free(ip, sizeof (*ip)); -} - - -/* - * Check the status of the link to the secondary, and optionally update - * the primary-side ping variables. - * - * For use on a primary only. - * - * Returns: - * TRUE - interface up. - * FALSE - interface down. - */ -int -rdc_check_secondary(rdc_if_t *ip, int update) -{ - int rc = TRUE; - - if (!ip || !ip->isprimary) { -#ifdef DEBUG - cmn_err(CE_WARN, - "!rdc_check_secondary: ip %p, isprimary %d, issecondary %d", - (void *) ip, ip ? ip->isprimary : 0, - ip ? ip->issecondary : 0); -#endif - return (FALSE); - } - - if (!ip->deadness) { -#ifdef DEBUG - cmn_err(CE_WARN, "!rdc_check_secondary: ip %p, ip->deadness %d", - (void *) ip, ip->deadness); -#endif - return (FALSE); - } - - if (!update) { - /* quick look */ - return ((ip->deadness > rdc_health_thres) ? FALSE : TRUE); - } - - /* update (slow) with lock */ - - mutex_enter(&rdc_ping_lock); - - if (ip->old_pulse == ip->new_pulse) { - /* - * ping has not been received since last update - * or we have not yet been pinged, - * the health thread has started only as a - * local client so far, not so on the other side - */ - - if (ip->last != nsc_time()) { - /* time has passed, so move closer to death */ - - ip->last = nsc_time(); - ip->deadness++; - - if (ip->deadness <= 0) { - /* avoid the wrap */ - ip->deadness = rdc_health_thres + 1; - } - } - - if (ip->deadness > rdc_health_thres) { - rc = FALSE; - /* - * Start back at the max possible version - * since the remote server could come back - * on a different protocol version. - */ - ip->rpc_version = RDC_VERS_MAX; - } - } else { - ip->old_pulse = ip->new_pulse; - } - - mutex_exit(&rdc_ping_lock); - return (rc); -} - - -/* - * Update the interface structure with the latest ping info, and - * perform interface up/down transitions if required. - * - * For use on a primary only. - */ -static void -rdc_update_health(rdc_if_t *ip) -{ - rdc_k_info_t *krdc; - rdc_u_info_t *urdc; - int index; - rdc_link_down_t *syncdp; - - if (!ip->isprimary) { -#ifdef DEBUG - cmn_err(CE_WARN, - "!rdc_update_health: ip %p, isprimary %d, issecondary %d", - (void *) ip, ip ? ip->isprimary : 0, - ip ? ip->issecondary : 0); -#endif - return; - } - - if (!rdc_check_secondary(ip, TRUE)) { - /* interface down */ - if (!ip->if_down) { - rdc_if_down(ip); - ip->if_down = 1; - - /* scan rdc sets and update status */ - - for (index = 0; index < rdc_max_sets; index++) { - krdc = &rdc_k_info[index]; - urdc = &rdc_u_info[index]; - - if (IS_ENABLED(urdc) && (krdc->intf == ip) && - (rdc_get_vflags(urdc) & RDC_PRIMARY) && - !(rdc_get_vflags(urdc) & RDC_LOGGING)) { - /* mark down */ - - rdc_group_enter(krdc); - /* - * check for possible race with - * with delete logic - */ - if (!IS_ENABLED(urdc)) { - rdc_group_exit(krdc); - continue; - } - rdc_group_log(krdc, RDC_NOFLUSH | - RDC_NOREMOTE | RDC_QUEUING, - "hm detected secondary " - "interface down"); - - rdc_group_exit(krdc); - - /* dump async queues */ - rdc_dump_queue(index); - } - } - - /* dump allocated bufs */ - rdc_dump_alloc_bufs(ip); - } - - syncdp = rdc_lookup_host(ip->srv->ri_hostname); - mutex_enter(&syncdp->syncd_mutex); - if (syncdp->link_down == 0) { - /* Link has gone down, notify rdcsyncd daemon */ - syncdp->link_down = 1; - if (syncdp->waiting) { - syncdp->waiting = 0; - cv_signal(&syncdp->syncd_cv); - } - } - mutex_exit(&syncdp->syncd_mutex); - } else { - /* interface up */ - if (ip->if_down && ip->isprimary) { - rdc_if_up(ip); - ip->if_down = 0; - } - - syncdp = rdc_lookup_host(ip->srv->ri_hostname); - mutex_enter(&syncdp->syncd_mutex); - if (syncdp->link_down) { - /* Link has come back up */ - syncdp->link_down = 0; - } - mutex_exit(&syncdp->syncd_mutex); - } -} diff --git a/usr/src/uts/common/avs/ns/rdc/rdc_io.c b/usr/src/uts/common/avs/ns/rdc/rdc_io.c deleted file mode 100644 index 89949b0b33..0000000000 --- a/usr/src/uts/common/avs/ns/rdc/rdc_io.c +++ /dev/null @@ -1,6718 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#include <sys/types.h> -#include <sys/ksynch.h> -#include <sys/cmn_err.h> -#include <sys/kmem.h> -#include <sys/conf.h> -#include <sys/errno.h> -#include <sys/sysmacros.h> - -#ifdef _SunOS_5_6 -/* - * on 2.6 both dki_lock.h and rpc/types.h define bool_t so we - * define enum_t here as it is all we need from rpc/types.h - * anyway and make it look like we included it. Yuck. - */ -#define _RPC_TYPES_H -typedef int enum_t; -#else -#ifndef DS_DDICT -#include <rpc/types.h> -#endif -#endif /* _SunOS_5_6 */ - -#include <sys/ddi.h> - -#include <sys/nsc_thread.h> -#include <sys/nsctl/nsctl.h> - -#include <sys/sdt.h> /* dtrace is S10 or later */ - -#include "rdc_io.h" -#include "rdc_bitmap.h" -#include "rdc_update.h" -#include "rdc_ioctl.h" -#include "rdcsrv.h" -#include "rdc_diskq.h" - -#include <sys/unistat/spcs_s.h> -#include <sys/unistat/spcs_s_k.h> -#include <sys/unistat/spcs_errors.h> - -volatile int net_exit; -nsc_size_t MAX_RDC_FBAS; - -#ifdef DEBUG -int RDC_MAX_SYNC_THREADS = 8; -int rdc_maxthreads_last = 8; -#endif - -kmutex_t rdc_ping_lock; /* Ping lock */ -static kmutex_t net_blk_lock; - -/* - * rdc_conf_lock is used as a global device configuration lock. - * It is also used by enable/resume and disable/suspend code to ensure that - * the transition of an rdc set between configured and unconfigured is - * atomic. - * - * krdc->group->lock is used to protect state changes of a configured rdc - * set (e.g. changes to urdc->flags), such as enabled to disabled and vice - * versa. - * - * rdc_many_lock is also used to protect changes in group membership. A group - * linked list cannot change while this lock is held. The many list and the - * multi-hop list are both protected by rdc_many_lock. - */ -kmutex_t rdc_conf_lock; -kmutex_t rdc_many_lock; /* Many/multi-list lock */ - -static kmutex_t rdc_net_hnd_id_lock; /* Network handle id lock */ -int rdc_debug = 0; -int rdc_debug_sleep = 0; - -static int rdc_net_hnd_id = 1; - -extern kmutex_t rdc_clnt_lock; - -static void rdc_ditemsfree(rdc_net_dataset_t *); -void rdc_clnt_destroy(void); - -rdc_k_info_t *rdc_k_info; -rdc_u_info_t *rdc_u_info; - -unsigned long rdc_async_timeout; - -nsc_size_t rdc_maxthres_queue = RDC_MAXTHRES_QUEUE; -int rdc_max_qitems = RDC_MAX_QITEMS; -int rdc_asyncthr = RDC_ASYNCTHR; -static nsc_svc_t *rdc_volume_update; -static int rdc_prealloc_handle = 1; - -extern int _rdc_rsrv_diskq(rdc_group_t *group); -extern void _rdc_rlse_diskq(rdc_group_t *group); - -/* - * Forward declare all statics that are used before defined - * to enforce parameter checking - * - * Some (if not all) of these could be removed if the code were reordered - */ - -static void rdc_volume_update_svc(intptr_t); -static void halt_sync(rdc_k_info_t *krdc); -void rdc_kstat_create(int index); -void rdc_kstat_delete(int index); -static int rdc_checkforbitmap(int, nsc_off_t); -static int rdc_installbitmap(int, void *, int, nsc_off_t, int, int *, int); -static rdc_group_t *rdc_newgroup(); - -int rdc_enable_diskq(rdc_k_info_t *krdc); -void rdc_close_diskq(rdc_group_t *group); -int rdc_suspend_diskq(rdc_k_info_t *krdc); -int rdc_resume_diskq(rdc_k_info_t *krdc); -void rdc_init_diskq_header(rdc_group_t *grp, dqheader *header); -void rdc_fail_diskq(rdc_k_info_t *krdc, int wait, int dolog); -void rdc_unfail_diskq(rdc_k_info_t *krdc); -void rdc_unintercept_diskq(rdc_group_t *grp); -int rdc_stamp_diskq(rdc_k_info_t *krdc, int rsrvd, int flags); -void rdc_qfiller_thr(rdc_k_info_t *krdc); - -nstset_t *_rdc_ioset; -nstset_t *_rdc_flset; - -/* - * RDC threadset tunables - */ -int rdc_threads = 64; /* default number of threads */ -int rdc_threads_inc = 8; /* increment for changing the size of the set */ - -/* - * Private threadset manipulation variables - */ -static int rdc_threads_hysteresis = 2; - /* hysteresis for threadset resizing */ -static int rdc_sets_active; /* number of sets currently enabled */ - -#ifdef DEBUG -kmutex_t rdc_cntlock; -#endif - -/* - * rdc_thread_deconfigure - rdc is being deconfigured, stop any - * thread activity. - * - * Inherently single-threaded by the Solaris module unloading code. - */ -static void -rdc_thread_deconfigure(void) -{ - nst_destroy(_rdc_ioset); - _rdc_ioset = NULL; - - nst_destroy(_rdc_flset); - _rdc_flset = NULL; - - nst_destroy(sync_info.rdc_syncset); - sync_info.rdc_syncset = NULL; -} - -/* - * rdc_thread_configure - rdc is being configured, initialize the - * threads we need for flushing aync volumes. - * - * Must be called with rdc_conf_lock held. - */ -static int -rdc_thread_configure(void) -{ - ASSERT(MUTEX_HELD(&rdc_conf_lock)); - - if ((_rdc_ioset = nst_init("rdc_thr", rdc_threads)) == NULL) - return (EINVAL); - - if ((_rdc_flset = nst_init("rdc_flushthr", 2)) == NULL) - return (EINVAL); - - if ((sync_info.rdc_syncset = - nst_init("rdc_syncthr", RDC_MAX_SYNC_THREADS)) == NULL) - return (EINVAL); - - return (0); -} - - -/* - * rdc_thread_tune - called to tune the size of the rdc threadset. - * - * Called from the config code when an rdc_set has been enabled or disabled. - * 'sets' is the increment to the number of active rdc_sets. - * - * Must be called with rdc_conf_lock held. - */ -static void -rdc_thread_tune(int sets) -{ - int incr = (sets > 0) ? 1 : -1; - int change = 0; - int nthreads; - - ASSERT(MUTEX_HELD(&rdc_conf_lock)); - - if (sets < 0) - sets = -sets; - - while (sets--) { - nthreads = nst_nthread(_rdc_ioset); - rdc_sets_active += incr; - - if (rdc_sets_active >= nthreads) - change += nst_add_thread(_rdc_ioset, rdc_threads_inc); - else if ((rdc_sets_active < - (nthreads - (rdc_threads_inc + rdc_threads_hysteresis))) && - ((nthreads - rdc_threads_inc) >= rdc_threads)) - change -= nst_del_thread(_rdc_ioset, rdc_threads_inc); - } - -#ifdef DEBUG - if (change) { - cmn_err(CE_NOTE, "!rdc_thread_tune: " - "nsets %d, nthreads %d, nthreads change %d", - rdc_sets_active, nst_nthread(_rdc_ioset), change); - } -#endif -} - - -/* - * _rdc_unload() - cache is being unloaded, - * deallocate any dual copy structures allocated during cache - * loading. - */ -void -_rdc_unload(void) -{ - int i; - rdc_k_info_t *krdc; - - if (rdc_volume_update) { - (void) nsc_unregister_svc(rdc_volume_update); - rdc_volume_update = NULL; - } - - rdc_thread_deconfigure(); - - if (rdc_k_info != NULL) { - for (i = 0; i < rdc_max_sets; i++) { - krdc = &rdc_k_info[i]; - mutex_destroy(&krdc->dc_sleep); - mutex_destroy(&krdc->bmapmutex); - mutex_destroy(&krdc->kstat_mutex); - mutex_destroy(&krdc->bmp_kstat_mutex); - mutex_destroy(&krdc->syncbitmutex); - cv_destroy(&krdc->busycv); - cv_destroy(&krdc->closingcv); - cv_destroy(&krdc->haltcv); - cv_destroy(&krdc->synccv); - } - } - - mutex_destroy(&sync_info.lock); - mutex_destroy(&rdc_ping_lock); - mutex_destroy(&net_blk_lock); - mutex_destroy(&rdc_conf_lock); - mutex_destroy(&rdc_many_lock); - mutex_destroy(&rdc_net_hnd_id_lock); - mutex_destroy(&rdc_clnt_lock); -#ifdef DEBUG - mutex_destroy(&rdc_cntlock); -#endif - net_exit = ATM_EXIT; - - if (rdc_k_info != NULL) - kmem_free(rdc_k_info, sizeof (*rdc_k_info) * rdc_max_sets); - if (rdc_u_info != NULL) - kmem_free(rdc_u_info, sizeof (*rdc_u_info) * rdc_max_sets); - rdc_k_info = NULL; - rdc_u_info = NULL; - rdc_max_sets = 0; -} - - -/* - * _rdc_load() - rdc is being loaded, Allocate anything - * that will be needed while the cache is loaded but doesn't really - * depend on configuration parameters. - * - */ -int -_rdc_load(void) -{ - int i; - rdc_k_info_t *krdc; - - mutex_init(&rdc_ping_lock, NULL, MUTEX_DRIVER, NULL); - mutex_init(&net_blk_lock, NULL, MUTEX_DRIVER, NULL); - mutex_init(&rdc_conf_lock, NULL, MUTEX_DRIVER, NULL); - mutex_init(&rdc_many_lock, NULL, MUTEX_DRIVER, NULL); - mutex_init(&rdc_net_hnd_id_lock, NULL, MUTEX_DRIVER, NULL); - mutex_init(&rdc_clnt_lock, NULL, MUTEX_DRIVER, NULL); - mutex_init(&sync_info.lock, NULL, MUTEX_DRIVER, NULL); - -#ifdef DEBUG - mutex_init(&rdc_cntlock, NULL, MUTEX_DRIVER, NULL); -#endif - - if ((i = nsc_max_devices()) < rdc_max_sets) - rdc_max_sets = i; - /* following case for partial installs that may fail */ - if (!rdc_max_sets) - rdc_max_sets = 1024; - - rdc_k_info = kmem_zalloc(sizeof (*rdc_k_info) * rdc_max_sets, KM_SLEEP); - if (!rdc_k_info) - return (ENOMEM); - - rdc_u_info = kmem_zalloc(sizeof (*rdc_u_info) * rdc_max_sets, KM_SLEEP); - if (!rdc_u_info) { - kmem_free(rdc_k_info, sizeof (*rdc_k_info) * rdc_max_sets); - return (ENOMEM); - } - - net_exit = ATM_NONE; - for (i = 0; i < rdc_max_sets; i++) { - krdc = &rdc_k_info[i]; - bzero(krdc, sizeof (*krdc)); - krdc->index = i; - mutex_init(&krdc->dc_sleep, NULL, MUTEX_DRIVER, NULL); - mutex_init(&krdc->bmapmutex, NULL, MUTEX_DRIVER, NULL); - mutex_init(&krdc->kstat_mutex, NULL, MUTEX_DRIVER, NULL); - mutex_init(&krdc->bmp_kstat_mutex, NULL, MUTEX_DRIVER, NULL); - mutex_init(&krdc->syncbitmutex, NULL, MUTEX_DRIVER, NULL); - cv_init(&krdc->busycv, NULL, CV_DRIVER, NULL); - cv_init(&krdc->closingcv, NULL, CV_DRIVER, NULL); - cv_init(&krdc->haltcv, NULL, CV_DRIVER, NULL); - cv_init(&krdc->synccv, NULL, CV_DRIVER, NULL); - } - - rdc_volume_update = nsc_register_svc("RDCVolumeUpdated", - rdc_volume_update_svc); - - return (0); -} - -static void -rdc_u_init(rdc_u_info_t *urdc) -{ - const int index = (int)(urdc - &rdc_u_info[0]); - - if (urdc->secondary.addr.maxlen) - free_rdc_netbuf(&urdc->secondary.addr); - if (urdc->primary.addr.maxlen) - free_rdc_netbuf(&urdc->primary.addr); - - bzero(urdc, sizeof (rdc_u_info_t)); - - urdc->index = index; - urdc->maxqfbas = rdc_maxthres_queue; - urdc->maxqitems = rdc_max_qitems; - urdc->asyncthr = rdc_asyncthr; -} - -/* - * _rdc_configure() - cache is being configured. - * - * Initialize dual copy structures - */ -int -_rdc_configure(void) -{ - int index; - rdc_k_info_t *krdc; - - for (index = 0; index < rdc_max_sets; index++) { - krdc = &rdc_k_info[index]; - - krdc->remote_index = -1; - krdc->dcio_bitmap = NULL; - krdc->bitmap_ref = NULL; - krdc->bitmap_size = 0; - krdc->bitmap_write = 0; - krdc->disk_status = 0; - krdc->many_next = krdc; - - rdc_u_init(&rdc_u_info[index]); - } - - rdc_async_timeout = 120 * HZ; /* Seconds * HZ */ - MAX_RDC_FBAS = FBA_LEN(RDC_MAXDATA); - if (net_exit != ATM_INIT) { - net_exit = ATM_INIT; - return (0); - } - return (0); -} - -/* - * _rdc_deconfigure - rdc is being deconfigured, shut down any - * dual copy operations and return to an unconfigured state. - */ -void -_rdc_deconfigure(void) -{ - rdc_k_info_t *krdc; - rdc_u_info_t *urdc; - int index; - - for (index = 0; index < rdc_max_sets; index++) { - krdc = &rdc_k_info[index]; - urdc = &rdc_u_info[index]; - - krdc->remote_index = -1; - krdc->dcio_bitmap = NULL; - krdc->bitmap_ref = NULL; - krdc->bitmap_size = 0; - krdc->bitmap_write = 0; - krdc->disk_status = 0; - krdc->many_next = krdc; - - if (urdc->primary.addr.maxlen) - free_rdc_netbuf(&(urdc->primary.addr)); - - if (urdc->secondary.addr.maxlen) - free_rdc_netbuf(&(urdc->secondary.addr)); - - bzero(urdc, sizeof (rdc_u_info_t)); - urdc->index = index; - } - net_exit = ATM_EXIT; - rdc_clnt_destroy(); - -} - - -/* - * Lock primitives, containing checks that lock ordering isn't broken - */ -/*ARGSUSED*/ -void -rdc_many_enter(rdc_k_info_t *krdc) -{ - ASSERT(!MUTEX_HELD(&krdc->bmapmutex)); - - mutex_enter(&rdc_many_lock); -} - -/* ARGSUSED */ -void -rdc_many_exit(rdc_k_info_t *krdc) -{ - mutex_exit(&rdc_many_lock); -} - -void -rdc_group_enter(rdc_k_info_t *krdc) -{ - ASSERT(!MUTEX_HELD(&rdc_many_lock)); - ASSERT(!MUTEX_HELD(&rdc_conf_lock)); - ASSERT(!MUTEX_HELD(&krdc->bmapmutex)); - - mutex_enter(&krdc->group->lock); -} - -void -rdc_group_exit(rdc_k_info_t *krdc) -{ - mutex_exit(&krdc->group->lock); -} - -/* - * Suspend and disable operations use this function to wait until it is safe - * to do continue, without trashing data structures used by other ioctls. - */ -static void -wait_busy(rdc_k_info_t *krdc) -{ - ASSERT(MUTEX_HELD(&rdc_conf_lock)); - - while (krdc->busy_count > 0) - cv_wait(&krdc->busycv, &rdc_conf_lock); -} - - -/* - * Other ioctls use this function to hold off disable and suspend. - */ -void -set_busy(rdc_k_info_t *krdc) -{ - ASSERT(MUTEX_HELD(&rdc_conf_lock)); - - wait_busy(krdc); - - krdc->busy_count++; -} - - -/* - * Other ioctls use this function to allow disable and suspend to continue. - */ -void -wakeup_busy(rdc_k_info_t *krdc) -{ - ASSERT(MUTEX_HELD(&rdc_conf_lock)); - - if (krdc->busy_count <= 0) - return; - - krdc->busy_count--; - cv_broadcast(&krdc->busycv); -} - - -/* - * Remove the rdc set from its group, and destroy the group if no longer in - * use. - */ -static void -remove_from_group(rdc_k_info_t *krdc) -{ - rdc_k_info_t *p; - rdc_group_t *group; - - ASSERT(MUTEX_HELD(&rdc_conf_lock)); - - rdc_many_enter(krdc); - group = krdc->group; - - group->count--; - - /* - * lock queue while looking at thrnum - */ - mutex_enter(&group->ra_queue.net_qlock); - if ((group->rdc_thrnum == 0) && (group->count == 0)) { - - /* - * Assure the we've stopped and the flusher thread has not - * fallen back to sleep - */ - if (krdc->group->ra_queue.qfill_sleeping != RDC_QFILL_DEAD) { - group->ra_queue.qfflags |= RDC_QFILLSTOP; - while (krdc->group->ra_queue.qfflags & RDC_QFILLSTOP) { - if (krdc->group->ra_queue.qfill_sleeping == - RDC_QFILL_ASLEEP) - cv_broadcast(&group->ra_queue.qfcv); - mutex_exit(&group->ra_queue.net_qlock); - delay(2); - mutex_enter(&group->ra_queue.net_qlock); - } - } - mutex_exit(&group->ra_queue.net_qlock); - - mutex_enter(&group->diskqmutex); - rdc_close_diskq(group); - mutex_exit(&group->diskqmutex); - rdc_delgroup(group); - rdc_many_exit(krdc); - krdc->group = NULL; - return; - } - mutex_exit(&group->ra_queue.net_qlock); - /* - * Always clear the group field. - * no, you need it set in rdc_flush_memq(). - * to call rdc_group_log() - * krdc->group = NULL; - */ - - /* Take this rdc structure off the group list */ - - for (p = krdc->group_next; p->group_next != krdc; p = p->group_next) - ; - p->group_next = krdc->group_next; - - rdc_many_exit(krdc); -} - - -/* - * Add the rdc set to its group, setting up a new group if it's the first one. - */ -static int -add_to_group(rdc_k_info_t *krdc, int options, int cmd) -{ - rdc_u_info_t *urdc = &rdc_u_info[krdc->index]; - rdc_u_info_t *utmp; - rdc_k_info_t *ktmp; - int index; - rdc_group_t *group; - int rc = 0; - nsthread_t *trc; - - ASSERT(MUTEX_HELD(&rdc_conf_lock)); - - /* - * Look for matching group name, primary host name and secondary - * host name. - */ - - rdc_many_enter(krdc); - for (index = 0; index < rdc_max_sets; index++) { - utmp = &rdc_u_info[index]; - ktmp = &rdc_k_info[index]; - - if (urdc->group_name[0] == 0) - break; - - if (!IS_CONFIGURED(ktmp)) - continue; - - if (strncmp(utmp->group_name, urdc->group_name, - NSC_MAXPATH) != 0) - continue; - if (strncmp(utmp->primary.intf, urdc->primary.intf, - MAX_RDC_HOST_SIZE) != 0) { - /* Same group name, different primary interface */ - rdc_many_exit(krdc); - return (-1); - } - if (strncmp(utmp->secondary.intf, urdc->secondary.intf, - MAX_RDC_HOST_SIZE) != 0) { - /* Same group name, different secondary interface */ - rdc_many_exit(krdc); - return (-1); - } - - /* Group already exists, so add this set to the group */ - - if (((options & RDC_OPT_ASYNC) == 0) && - ((ktmp->type_flag & RDC_ASYNCMODE) != 0)) { - /* Must be same mode as existing group members */ - rdc_many_exit(krdc); - return (-1); - } - if (((options & RDC_OPT_ASYNC) != 0) && - ((ktmp->type_flag & RDC_ASYNCMODE) == 0)) { - /* Must be same mode as existing group members */ - rdc_many_exit(krdc); - return (-1); - } - - /* cannont reconfigure existing group into new queue this way */ - if ((cmd != RDC_CMD_RESUME) && - !RDC_IS_DISKQ(ktmp->group) && urdc->disk_queue[0] != '\0') { - rdc_many_exit(krdc); - return (RDC_EQNOADD); - } - - ktmp->group->count++; - krdc->group = ktmp->group; - krdc->group_next = ktmp->group_next; - ktmp->group_next = krdc; - - urdc->autosync = utmp->autosync; /* Same as rest */ - - (void) strncpy(urdc->disk_queue, utmp->disk_queue, NSC_MAXPATH); - - rdc_many_exit(krdc); - return (0); - } - - /* This must be a new group */ - group = rdc_newgroup(); - krdc->group = group; - krdc->group_next = krdc; - urdc->autosync = -1; /* Unknown */ - - /* - * Tune the thread set by one for each thread created - */ - rdc_thread_tune(1); - - trc = nst_create(_rdc_ioset, rdc_qfiller_thr, (void *)krdc, NST_SLEEP); - if (trc == NULL) { - rc = -1; - cmn_err(CE_NOTE, "!unable to create queue filler daemon"); - goto fail; - } - - if (urdc->disk_queue[0] == '\0') { - krdc->group->flags |= RDC_MEMQUE; - } else { - krdc->group->flags |= RDC_DISKQUE; - - /* XXX check here for resume or enable and act accordingly */ - - if (cmd == RDC_CMD_RESUME) { - rc = rdc_resume_diskq(krdc); - - } else if (cmd == RDC_CMD_ENABLE) { - rc = rdc_enable_diskq(krdc); - if ((rc == RDC_EQNOADD) && (cmd != RDC_CMD_ENABLE)) { - cmn_err(CE_WARN, "!disk queue %s enable failed," - " enabling memory queue", - urdc->disk_queue); - krdc->group->flags &= ~RDC_DISKQUE; - krdc->group->flags |= RDC_MEMQUE; - bzero(urdc->disk_queue, NSC_MAXPATH); - } - } - } -fail: - rdc_many_exit(krdc); - return (rc); -} - - -/* - * Move the set to a new group if possible - */ -static int -change_group(rdc_k_info_t *krdc, int options) -{ - rdc_u_info_t *urdc = &rdc_u_info[krdc->index]; - rdc_u_info_t *utmp; - rdc_k_info_t *ktmp; - rdc_k_info_t *next; - char tmpq[NSC_MAXPATH]; - int index; - int rc = -1; - rdc_group_t *group, *old_group; - nsthread_t *trc; - - ASSERT(MUTEX_HELD(&rdc_conf_lock)); - - /* - * Look for matching group name, primary host name and secondary - * host name. - */ - - bzero(&tmpq, sizeof (tmpq)); - rdc_many_enter(krdc); - - old_group = krdc->group; - next = krdc->group_next; - - if (RDC_IS_DISKQ(old_group)) { /* can't keep your own queue */ - (void) strncpy(tmpq, urdc->disk_queue, NSC_MAXPATH); - bzero(urdc->disk_queue, sizeof (urdc->disk_queue)); - } - for (index = 0; index < rdc_max_sets; index++) { - utmp = &rdc_u_info[index]; - ktmp = &rdc_k_info[index]; - - if (ktmp == krdc) - continue; - - if (urdc->group_name[0] == 0) - break; - - if (!IS_CONFIGURED(ktmp)) - continue; - - if (strncmp(utmp->group_name, urdc->group_name, - NSC_MAXPATH) != 0) - continue; - if (strncmp(utmp->primary.intf, urdc->primary.intf, - MAX_RDC_HOST_SIZE) != 0) - goto bad; - if (strncmp(utmp->secondary.intf, urdc->secondary.intf, - MAX_RDC_HOST_SIZE) != 0) - goto bad; - - /* Group already exists, so add this set to the group */ - - if (((options & RDC_OPT_ASYNC) == 0) && - ((ktmp->type_flag & RDC_ASYNCMODE) != 0)) { - /* Must be same mode as existing group members */ - goto bad; - } - if (((options & RDC_OPT_ASYNC) != 0) && - ((ktmp->type_flag & RDC_ASYNCMODE) == 0)) { - /* Must be same mode as existing group members */ - goto bad; - } - - ktmp->group->count++; - krdc->group = ktmp->group; - krdc->group_next = ktmp->group_next; - ktmp->group_next = krdc; - bzero(urdc->disk_queue, sizeof (urdc->disk_queue)); - (void) strncpy(urdc->disk_queue, utmp->disk_queue, NSC_MAXPATH); - - goto good; - } - - /* This must be a new group */ - group = rdc_newgroup(); - krdc->group = group; - krdc->group_next = krdc; - - trc = nst_create(_rdc_ioset, rdc_qfiller_thr, (void *)krdc, NST_SLEEP); - if (trc == NULL) { - rc = -1; - cmn_err(CE_NOTE, "!unable to create queue filler daemon"); - goto bad; - } - - if (urdc->disk_queue[0] == 0) { - krdc->group->flags |= RDC_MEMQUE; - } else { - krdc->group->flags |= RDC_DISKQUE; - if ((rc = rdc_enable_diskq(krdc)) < 0) - goto bad; - } -good: - if (options & RDC_OPT_ASYNC) { - krdc->type_flag |= RDC_ASYNCMODE; - rdc_set_flags(urdc, RDC_ASYNC); - } else { - krdc->type_flag &= ~RDC_ASYNCMODE; - rdc_clr_flags(urdc, RDC_ASYNC); - } - - old_group->count--; - if (!old_group->rdc_writer && old_group->count == 0) { - /* Group now empty, so destroy */ - if (RDC_IS_DISKQ(old_group)) { - rdc_unintercept_diskq(old_group); - mutex_enter(&old_group->diskqmutex); - rdc_close_diskq(old_group); - mutex_exit(&old_group->diskqmutex); - } - - mutex_enter(&old_group->ra_queue.net_qlock); - - /* - * Assure the we've stopped and the flusher thread has not - * fallen back to sleep - */ - if (old_group->ra_queue.qfill_sleeping != RDC_QFILL_DEAD) { - old_group->ra_queue.qfflags |= RDC_QFILLSTOP; - while (old_group->ra_queue.qfflags & RDC_QFILLSTOP) { - if (old_group->ra_queue.qfill_sleeping == - RDC_QFILL_ASLEEP) - cv_broadcast(&old_group->ra_queue.qfcv); - mutex_exit(&old_group->ra_queue.net_qlock); - delay(2); - mutex_enter(&old_group->ra_queue.net_qlock); - } - } - mutex_exit(&old_group->ra_queue.net_qlock); - - rdc_delgroup(old_group); - rdc_many_exit(krdc); - return (0); - } - - /* Take this rdc structure off the old group list */ - - for (ktmp = next; ktmp->group_next != krdc; ktmp = ktmp->group_next) - ; - ktmp->group_next = next; - - rdc_many_exit(krdc); - return (0); - -bad: - /* Leave existing group status alone */ - (void) strncpy(urdc->disk_queue, tmpq, NSC_MAXPATH); - rdc_many_exit(krdc); - return (rc); -} - - -/* - * Set flags for an rdc set, setting the group flags as necessary. - */ -void -rdc_set_flags(rdc_u_info_t *urdc, int flags) -{ - rdc_k_info_t *krdc = &rdc_k_info[urdc->index]; - int vflags, sflags, bflags, ssflags; - - DTRACE_PROBE2(rdc_set_flags, int, krdc->index, int, flags); - vflags = flags & RDC_VFLAGS; - sflags = flags & RDC_SFLAGS; - bflags = flags & RDC_BFLAGS; - ssflags = flags & RDC_SYNC_STATE_FLAGS; - - if (vflags) { - /* normal volume flags */ - ASSERT(MUTEX_HELD(&rdc_conf_lock) || - MUTEX_HELD(&krdc->group->lock)); - if (ssflags) - mutex_enter(&krdc->bmapmutex); - - urdc->flags |= vflags; - - if (ssflags) - mutex_exit(&krdc->bmapmutex); - } - - if (sflags) { - /* Sync state flags that are protected by a different lock */ - ASSERT(MUTEX_HELD(&rdc_many_lock)); - urdc->sync_flags |= sflags; - } - - if (bflags) { - /* Bmap state flags that are protected by a different lock */ - ASSERT(MUTEX_HELD(&krdc->bmapmutex)); - urdc->bmap_flags |= bflags; - } - -} - - -/* - * Clear flags for an rdc set, clearing the group flags as necessary. - */ -void -rdc_clr_flags(rdc_u_info_t *urdc, int flags) -{ - rdc_k_info_t *krdc = &rdc_k_info[urdc->index]; - int vflags, sflags, bflags; - - DTRACE_PROBE2(rdc_clr_flags, int, krdc->index, int, flags); - vflags = flags & RDC_VFLAGS; - sflags = flags & RDC_SFLAGS; - bflags = flags & RDC_BFLAGS; - - if (vflags) { - /* normal volume flags */ - ASSERT(MUTEX_HELD(&rdc_conf_lock) || - MUTEX_HELD(&krdc->group->lock)); - urdc->flags &= ~vflags; - - } - - if (sflags) { - /* Sync state flags that are protected by a different lock */ - ASSERT(MUTEX_HELD(&rdc_many_lock)); - urdc->sync_flags &= ~sflags; - } - - if (bflags) { - /* Bmap state flags that are protected by a different lock */ - ASSERT(MUTEX_HELD(&krdc->bmapmutex)); - urdc->bmap_flags &= ~bflags; - } -} - - -/* - * Get the flags for an rdc set. - */ -int -rdc_get_vflags(rdc_u_info_t *urdc) -{ - return (urdc->flags | urdc->sync_flags | urdc->bmap_flags); -} - - -/* - * Initialise flags for an rdc set. - */ -static void -rdc_init_flags(rdc_u_info_t *urdc) -{ - urdc->flags = 0; - urdc->mflags = 0; - urdc->sync_flags = 0; - urdc->bmap_flags = 0; -} - - -/* - * Set flags for a many group. - */ -void -rdc_set_mflags(rdc_u_info_t *urdc, int flags) -{ - rdc_k_info_t *krdc = &rdc_k_info[urdc->index]; - rdc_k_info_t *this = krdc; - - ASSERT(!(flags & ~RDC_MFLAGS)); - - if (flags == 0) - return; - - ASSERT(MUTEX_HELD(&rdc_many_lock)); - - rdc_set_flags(urdc, flags); /* set flags on local urdc */ - - urdc->mflags |= flags; - for (krdc = krdc->many_next; krdc != this; krdc = krdc->many_next) { - urdc = &rdc_u_info[krdc->index]; - if (!IS_ENABLED(urdc)) - continue; - urdc->mflags |= flags; - } -} - - -/* - * Clear flags for a many group. - */ -void -rdc_clr_mflags(rdc_u_info_t *urdc, int flags) -{ - rdc_k_info_t *krdc = &rdc_k_info[urdc->index]; - rdc_k_info_t *this = krdc; - rdc_u_info_t *utmp; - - ASSERT(!(flags & ~RDC_MFLAGS)); - - if (flags == 0) - return; - - ASSERT(MUTEX_HELD(&rdc_many_lock)); - - rdc_clr_flags(urdc, flags); /* clear flags on local urdc */ - - /* - * We must maintain the mflags based on the set of flags for - * all the urdc's that are chained up. - */ - - /* - * First look through all the urdc's and remove bits from - * the 'flags' variable that are in use elsewhere. - */ - - for (krdc = krdc->many_next; krdc != this; krdc = krdc->many_next) { - utmp = &rdc_u_info[krdc->index]; - if (!IS_ENABLED(utmp)) - continue; - flags &= ~(rdc_get_vflags(utmp) & RDC_MFLAGS); - if (flags == 0) - break; - } - - /* - * Now clear flags as necessary. - */ - - if (flags != 0) { - urdc->mflags &= ~flags; - for (krdc = krdc->many_next; krdc != this; - krdc = krdc->many_next) { - utmp = &rdc_u_info[krdc->index]; - if (!IS_ENABLED(utmp)) - continue; - utmp->mflags &= ~flags; - } - } -} - - -int -rdc_get_mflags(rdc_u_info_t *urdc) -{ - return (urdc->mflags); -} - - -void -rdc_set_flags_log(rdc_u_info_t *urdc, int flags, char *why) -{ - DTRACE_PROBE2(rdc_set_flags_log, int, urdc->index, int, flags); - - rdc_set_flags(urdc, flags); - - if (why == NULL) - return; - - if (flags & RDC_LOGGING) - cmn_err(CE_NOTE, "!sndr: %s:%s entered logging mode: %s", - urdc->secondary.intf, urdc->secondary.file, why); - if (flags & RDC_VOL_FAILED) - cmn_err(CE_NOTE, "!sndr: %s:%s volume failed: %s", - urdc->secondary.intf, urdc->secondary.file, why); - if (flags & RDC_BMP_FAILED) - cmn_err(CE_NOTE, "!sndr: %s:%s bitmap failed: %s", - urdc->secondary.intf, urdc->secondary.file, why); -} -/* - * rdc_lor(source, dest, len) - * logically OR memory pointed to by source and dest, copying result into dest. - */ -void -rdc_lor(const uchar_t *source, uchar_t *dest, int len) -{ - int i; - - if (source == NULL) - return; - - for (i = 0; i < len; i++) - *dest++ |= *source++; -} - - -static int -check_filesize(int index, spcs_s_info_t kstatus) -{ - uint64_t remote_size; - char tmp1[16], tmp2[16]; - rdc_u_info_t *urdc = &rdc_u_info[index]; - int status; - - status = rdc_net_getsize(index, &remote_size); - if (status) { - (void) spcs_s_inttostring(status, tmp1, sizeof (tmp1), 0); - spcs_s_add(kstatus, RDC_EGETSIZE, urdc->secondary.intf, - urdc->secondary.file, tmp1); - (void) rdc_net_state(index, CCIO_ENABLELOG); - return (RDC_EGETSIZE); - } - if (remote_size < (unsigned long long)urdc->volume_size) { - (void) spcs_s_inttostring( - urdc->volume_size, tmp1, sizeof (tmp1), 0); - /* - * Cheat, and covert to int, until we have - * spcs_s_unsignedlonginttostring(). - */ - status = (int)remote_size; - (void) spcs_s_inttostring(status, tmp2, sizeof (tmp2), 0); - spcs_s_add(kstatus, RDC_ESIZE, urdc->primary.intf, - urdc->primary.file, tmp1, urdc->secondary.intf, - urdc->secondary.file, tmp2); - (void) rdc_net_state(index, CCIO_ENABLELOG); - return (RDC_ESIZE); - } - return (0); -} - - -static void -rdc_volume_update_svc(intptr_t arg) -{ - rdc_update_t *update = (rdc_update_t *)arg; - rdc_k_info_t *krdc; - rdc_k_info_t *this; - rdc_u_info_t *urdc; - struct net_bdata6 bd; - int index; - int rc; - -#ifdef DEBUG_IIUPDATE - cmn_err(CE_NOTE, "!SNDR received update request for %s", - update->volume); -#endif - - if ((update->protocol != RDC_SVC_ONRETURN) && - (update->protocol != RDC_SVC_VOL_ENABLED)) { - /* don't understand what the client intends to do */ - update->denied = 1; - spcs_s_add(update->status, RDC_EVERSION); - return; - } - - index = rdc_lookup_enabled(update->volume, 0); - if (index < 0) - return; - - /* - * warn II that this volume is in use by sndr so - * II can validate the sizes of the master vs shadow - * and avoid trouble later down the line with - * size mis-matches between urdc->volume_size and - * what is returned from nsc_partsize() which may - * be the size of the master when replicating the shadow - */ - if (update->protocol == RDC_SVC_VOL_ENABLED) { - if (index >= 0) - update->denied = 1; - return; - } - - krdc = &rdc_k_info[index]; - urdc = &rdc_u_info[index]; - this = krdc; - - do { - if (!(rdc_get_vflags(urdc) & RDC_LOGGING)) { -#ifdef DEBUG_IIUPDATE - cmn_err(CE_NOTE, "!SNDR refused update request for %s", - update->volume); -#endif - update->denied = 1; - spcs_s_add(update->status, RDC_EMIRRORUP); - return; - } - /* 1->many - all must be logging */ - if (IS_MANY(krdc) && IS_STATE(urdc, RDC_PRIMARY)) { - rdc_many_enter(krdc); - for (krdc = krdc->many_next; krdc != this; - krdc = krdc->many_next) { - urdc = &rdc_u_info[krdc->index]; - if (!IS_ENABLED(urdc)) - continue; - break; - } - rdc_many_exit(krdc); - } - } while (krdc != this); - -#ifdef DEBUG_IIUPDATE - cmn_err(CE_NOTE, "!SNDR allowed update request for %s", update->volume); -#endif - urdc = &rdc_u_info[krdc->index]; - do { - - bd.size = min(krdc->bitmap_size, (nsc_size_t)update->size); - bd.data.data_val = (char *)update->bitmap; - bd.offset = 0; - bd.cd = index; - - if ((rc = RDC_OR_BITMAP(&bd)) != 0) { - update->denied = 1; - spcs_s_add(update->status, rc); - return; - } - urdc = &rdc_u_info[index]; - urdc->bits_set = RDC_COUNT_BITMAP(krdc); - if (IS_MANY(krdc) && IS_STATE(urdc, RDC_PRIMARY)) { - rdc_many_enter(krdc); - for (krdc = krdc->many_next; krdc != this; - krdc = krdc->many_next) { - index = krdc->index; - if (!IS_ENABLED(urdc)) - continue; - break; - } - rdc_many_exit(krdc); - } - } while (krdc != this); - - - /* II (or something else) has updated us, so no need for a sync */ - if (rdc_get_vflags(urdc) & (RDC_SYNC_NEEDED | RDC_RSYNC_NEEDED)) { - rdc_many_enter(krdc); - rdc_clr_flags(urdc, RDC_SYNC_NEEDED | RDC_RSYNC_NEEDED); - rdc_many_exit(krdc); - } - - if (krdc->bitmap_write > 0) - (void) rdc_write_bitmap(krdc); -} - - -/* - * rdc_check() - * - * Return 0 if the set is configured, enabled and the supplied - * addressing information matches the in-kernel config, otherwise - * return 1. - */ -static int -rdc_check(rdc_k_info_t *krdc, rdc_set_t *rdc_set) -{ - rdc_u_info_t *urdc = &rdc_u_info[krdc->index]; - - ASSERT(MUTEX_HELD(&krdc->group->lock)); - - if (!IS_ENABLED(urdc)) - return (1); - - if (strncmp(urdc->primary.file, rdc_set->primary.file, - NSC_MAXPATH) != 0) { -#ifdef DEBUG - cmn_err(CE_WARN, "!rdc_check: primary file mismatch %s vs %s", - urdc->primary.file, rdc_set->primary.file); -#endif - return (1); - } - - if (rdc_set->primary.addr.len != 0 && - bcmp(urdc->primary.addr.buf, rdc_set->primary.addr.buf, - urdc->primary.addr.len) != 0) { -#ifdef DEBUG - cmn_err(CE_WARN, "!rdc_check: primary address mismatch for %s", - urdc->primary.file); -#endif - return (1); - } - - if (strncmp(urdc->secondary.file, rdc_set->secondary.file, - NSC_MAXPATH) != 0) { -#ifdef DEBUG - cmn_err(CE_WARN, "!rdc_check: secondary file mismatch %s vs %s", - urdc->secondary.file, rdc_set->secondary.file); -#endif - return (1); - } - - if (rdc_set->secondary.addr.len != 0 && - bcmp(urdc->secondary.addr.buf, rdc_set->secondary.addr.buf, - urdc->secondary.addr.len) != 0) { -#ifdef DEBUG - cmn_err(CE_WARN, "!rdc_check: secondary addr mismatch for %s", - urdc->secondary.file); -#endif - return (1); - } - - return (0); -} - - -/* - * Lookup enabled sets for a bitmap match - */ - -int -rdc_lookup_bitmap(char *pathname) -{ - rdc_u_info_t *urdc; -#ifdef DEBUG - rdc_k_info_t *krdc; -#endif - int index; - - for (index = 0; index < rdc_max_sets; index++) { - urdc = &rdc_u_info[index]; -#ifdef DEBUG - krdc = &rdc_k_info[index]; -#endif - ASSERT(krdc->index == index); - ASSERT(urdc->index == index); - - if (!IS_ENABLED(urdc)) - continue; - - if (rdc_get_vflags(urdc) & RDC_PRIMARY) { - if (strncmp(pathname, urdc->primary.bitmap, - NSC_MAXPATH) == 0) - return (index); - } else { - if (strncmp(pathname, urdc->secondary.bitmap, - NSC_MAXPATH) == 0) - return (index); - } - } - - return (-1); -} - - -/* - * Translate a pathname to index into rdc_k_info[]. - * Returns first match that is enabled. - */ - -int -rdc_lookup_enabled(char *pathname, int allow_disabling) -{ - rdc_u_info_t *urdc; - rdc_k_info_t *krdc; - int index; - -restart: - for (index = 0; index < rdc_max_sets; index++) { - urdc = &rdc_u_info[index]; - krdc = &rdc_k_info[index]; - - ASSERT(krdc->index == index); - ASSERT(urdc->index == index); - - if (!IS_ENABLED(urdc)) - continue; - - if (allow_disabling == 0 && krdc->type_flag & RDC_UNREGISTER) - continue; - - if (rdc_get_vflags(urdc) & RDC_PRIMARY) { - if (strncmp(pathname, urdc->primary.file, - NSC_MAXPATH) == 0) - return (index); - } else { - if (strncmp(pathname, urdc->secondary.file, - NSC_MAXPATH) == 0) - return (index); - } - } - - if (allow_disabling == 0) { - /* None found, or only a disabling one found, so try again */ - allow_disabling = 1; - goto restart; - } - - return (-1); -} - - -/* - * Translate a pathname to index into rdc_k_info[]. - * Returns first match that is configured. - * - * Used by enable & resume code. - * Must be called with rdc_conf_lock held. - */ - -int -rdc_lookup_configured(char *pathname) -{ - rdc_u_info_t *urdc; - rdc_k_info_t *krdc; - int index; - - ASSERT(MUTEX_HELD(&rdc_conf_lock)); - - for (index = 0; index < rdc_max_sets; index++) { - urdc = &rdc_u_info[index]; - krdc = &rdc_k_info[index]; - - ASSERT(krdc->index == index); - ASSERT(urdc->index == index); - - if (!IS_CONFIGURED(krdc)) - continue; - - if (rdc_get_vflags(urdc) & RDC_PRIMARY) { - if (strncmp(pathname, urdc->primary.file, - NSC_MAXPATH) == 0) - return (index); - } else { - if (strncmp(pathname, urdc->secondary.file, - NSC_MAXPATH) == 0) - return (index); - } - } - - return (-1); -} - - -/* - * Looks up a configured set with matching secondary interface:volume - * to check for illegal many-to-one volume configs. To be used during - * enable and resume processing. - * - * Must be called with rdc_conf_lock held. - */ - -static int -rdc_lookup_many2one(rdc_set_t *rdc_set) -{ - rdc_u_info_t *urdc; - rdc_k_info_t *krdc; - int index; - - ASSERT(MUTEX_HELD(&rdc_conf_lock)); - - for (index = 0; index < rdc_max_sets; index++) { - urdc = &rdc_u_info[index]; - krdc = &rdc_k_info[index]; - - if (!IS_CONFIGURED(krdc)) - continue; - - if (strncmp(urdc->secondary.file, - rdc_set->secondary.file, NSC_MAXPATH) != 0) - continue; - if (strncmp(urdc->secondary.intf, - rdc_set->secondary.intf, MAX_RDC_HOST_SIZE) != 0) - continue; - - break; - } - - if (index < rdc_max_sets) - return (index); - else - return (-1); -} - - -/* - * Looks up an rdc set to check if it is already configured, to be used from - * functions called from the config ioctl where the interface names can be - * used for comparison. - * - * Must be called with rdc_conf_lock held. - */ - -int -rdc_lookup_byname(rdc_set_t *rdc_set) -{ - rdc_u_info_t *urdc; - rdc_k_info_t *krdc; - int index; - - ASSERT(MUTEX_HELD(&rdc_conf_lock)); - - for (index = 0; index < rdc_max_sets; index++) { - urdc = &rdc_u_info[index]; - krdc = &rdc_k_info[index]; - - ASSERT(krdc->index == index); - ASSERT(urdc->index == index); - - if (!IS_CONFIGURED(krdc)) - continue; - - if (strncmp(urdc->primary.file, rdc_set->primary.file, - NSC_MAXPATH) != 0) - continue; - if (strncmp(urdc->primary.intf, rdc_set->primary.intf, - MAX_RDC_HOST_SIZE) != 0) - continue; - if (strncmp(urdc->secondary.file, rdc_set->secondary.file, - NSC_MAXPATH) != 0) - continue; - if (strncmp(urdc->secondary.intf, rdc_set->secondary.intf, - MAX_RDC_HOST_SIZE) != 0) - continue; - - break; - } - - if (index < rdc_max_sets) - return (index); - else - return (-1); -} - -/* - * Looks up a secondary hostname and device, to be used from - * functions called from the config ioctl where the interface names can be - * used for comparison. - * - * Must be called with rdc_conf_lock held. - */ - -int -rdc_lookup_byhostdev(char *intf, char *file) -{ - rdc_u_info_t *urdc; - rdc_k_info_t *krdc; - int index; - - ASSERT(MUTEX_HELD(&rdc_conf_lock)); - - for (index = 0; index < rdc_max_sets; index++) { - urdc = &rdc_u_info[index]; - krdc = &rdc_k_info[index]; - - ASSERT(krdc->index == index); - ASSERT(urdc->index == index); - - if (!IS_CONFIGURED(krdc)) - continue; - - if (strncmp(urdc->secondary.file, file, - NSC_MAXPATH) != 0) - continue; - if (strncmp(urdc->secondary.intf, intf, - MAX_RDC_HOST_SIZE) != 0) - continue; - break; - } - - if (index < rdc_max_sets) - return (index); - else - return (-1); -} - - -/* - * Looks up an rdc set to see if it is currently enabled, to be used on the - * server so that the interface addresses must be used for comparison, as - * the interface names may differ from those used on the client. - * - */ - -int -rdc_lookup_byaddr(rdc_set_t *rdc_set) -{ - rdc_u_info_t *urdc; -#ifdef DEBUG - rdc_k_info_t *krdc; -#endif - int index; - - for (index = 0; index < rdc_max_sets; index++) { - urdc = &rdc_u_info[index]; -#ifdef DEBUG - krdc = &rdc_k_info[index]; -#endif - ASSERT(krdc->index == index); - ASSERT(urdc->index == index); - - if (!IS_ENABLED(urdc)) - continue; - - if (strcmp(urdc->primary.file, rdc_set->primary.file) != 0) - continue; - - if (strcmp(urdc->secondary.file, rdc_set->secondary.file) != 0) - continue; - - if (bcmp(urdc->primary.addr.buf, rdc_set->primary.addr.buf, - urdc->primary.addr.len) != 0) { - continue; - } - - if (bcmp(urdc->secondary.addr.buf, rdc_set->secondary.addr.buf, - urdc->secondary.addr.len) != 0) { - continue; - } - - break; - } - - if (index < rdc_max_sets) - return (index); - else - return (-1); -} - - -/* - * Return index of first multihop or 1-to-many - * Behavior controlled by setting ismany. - * ismany TRUE (one-to-many) - * ismany FALSE (multihops) - * - */ -static int -rdc_lookup_multimany(rdc_k_info_t *krdc, const int ismany) -{ - rdc_u_info_t *urdc = &rdc_u_info[krdc->index]; - rdc_u_info_t *utmp; - rdc_k_info_t *ktmp; - char *pathname; - int index; - int role; - - ASSERT(MUTEX_HELD(&rdc_conf_lock)); - ASSERT(MUTEX_HELD(&rdc_many_lock)); - - if (rdc_get_vflags(urdc) & RDC_PRIMARY) { - /* this host is the primary of the krdc set */ - pathname = urdc->primary.file; - if (ismany) { - /* - * 1-many sets are linked by primary : - * look for matching primary on this host - */ - role = RDC_PRIMARY; - } else { - /* - * multihop sets link primary to secondary : - * look for matching secondary on this host - */ - role = 0; - } - } else { - /* this host is the secondary of the krdc set */ - pathname = urdc->secondary.file; - if (ismany) { - /* - * 1-many sets are linked by primary, so if - * this host is the secondary of the set this - * cannot require 1-many linkage. - */ - return (-1); - } else { - /* - * multihop sets link primary to secondary : - * look for matching primary on this host - */ - role = RDC_PRIMARY; - } - } - - for (index = 0; index < rdc_max_sets; index++) { - utmp = &rdc_u_info[index]; - ktmp = &rdc_k_info[index]; - - if (!IS_CONFIGURED(ktmp)) { - continue; - } - - if (role == RDC_PRIMARY) { - /* - * Find a primary that is this host and is not - * krdc but shares the same data volume as krdc. - */ - if ((rdc_get_vflags(utmp) & RDC_PRIMARY) && - strncmp(utmp->primary.file, pathname, - NSC_MAXPATH) == 0 && (krdc != ktmp)) { - break; - } - } else { - /* - * Find a secondary that is this host and is not - * krdc but shares the same data volume as krdc. - */ - if (!(rdc_get_vflags(utmp) & RDC_PRIMARY) && - strncmp(utmp->secondary.file, pathname, - NSC_MAXPATH) == 0 && (krdc != ktmp)) { - break; - } - } - } - - if (index < rdc_max_sets) - return (index); - else - return (-1); -} - -/* - * Returns secondary match that is configured. - * - * Used by enable & resume code. - * Must be called with rdc_conf_lock held. - */ - -static int -rdc_lookup_secondary(char *pathname) -{ - rdc_u_info_t *urdc; - rdc_k_info_t *krdc; - int index; - - ASSERT(MUTEX_HELD(&rdc_conf_lock)); - - for (index = 0; index < rdc_max_sets; index++) { - urdc = &rdc_u_info[index]; - krdc = &rdc_k_info[index]; - - ASSERT(krdc->index == index); - ASSERT(urdc->index == index); - - if (!IS_CONFIGURED(krdc)) - continue; - - if (!IS_STATE(urdc, RDC_PRIMARY)) { - if (strncmp(pathname, urdc->secondary.file, - NSC_MAXPATH) == 0) - return (index); - } - } - - return (-1); -} - - -static nsc_fd_t * -rdc_open_direct(rdc_k_info_t *krdc) -{ - rdc_u_info_t *urdc = &rdc_u_info[krdc->index]; - int rc; - - if (krdc->remote_fd == NULL) - krdc->remote_fd = nsc_open(urdc->direct_file, - NSC_RDCHR_ID|NSC_DEVICE|NSC_RDWR, 0, 0, &rc); - return (krdc->remote_fd); -} - -static void -rdc_close_direct(rdc_k_info_t *krdc) -{ - rdc_u_info_t *urdc = &rdc_u_info[krdc->index]; - - urdc->direct_file[0] = 0; - if (krdc->remote_fd) { - if (nsc_close(krdc->remote_fd) == 0) { - krdc->remote_fd = NULL; - } - } -} - - -#ifdef DEBUG_MANY -static void -print_many(rdc_k_info_t *start) -{ - rdc_k_info_t *p = start; - rdc_u_info_t *q = &rdc_u_info[p->index]; - - do { - cmn_err(CE_CONT, "!krdc %p, %s %s (many_nxt %p multi_nxt %p)\n", - p, q->primary.file, q->secondary.file, p->many_next, - p->multi_next); - delay(10); - p = p->many_next; - q = &rdc_u_info[p->index]; - } while (p && p != start); -} -#endif /* DEBUG_MANY */ - - -static int -add_to_multi(rdc_k_info_t *krdc) -{ - rdc_u_info_t *urdc; - rdc_k_info_t *ktmp; - rdc_u_info_t *utmp; - int mindex; - int domulti; - - urdc = &rdc_u_info[krdc->index]; - - ASSERT(MUTEX_HELD(&rdc_conf_lock)); - ASSERT(MUTEX_HELD(&rdc_many_lock)); - - /* Now find companion krdc */ - mindex = rdc_lookup_multimany(krdc, FALSE); - -#ifdef DEBUG_MANY - cmn_err(CE_NOTE, - "!add_to_multi: lookup_multimany: mindex %d prim %s sec %s", - mindex, urdc->primary.file, urdc->secondary.file); -#endif - - if (mindex >= 0) { - ktmp = &rdc_k_info[mindex]; - utmp = &rdc_u_info[mindex]; - - domulti = 1; - - if ((rdc_get_vflags(urdc) & RDC_PRIMARY) && - ktmp->multi_next != NULL) { - /* - * We are adding a new primary to a many - * group that is the target of a multihop, just - * ignore it since we are linked in elsewhere. - */ - domulti = 0; - } - - if (domulti) { - if (rdc_get_vflags(urdc) & RDC_PRIMARY) { - /* Is previous leg using direct file I/O? */ - if (utmp->direct_file[0] != 0) { - /* It is, so cannot proceed */ - return (-1); - } - } else { - /* Is this leg using direct file I/O? */ - if (urdc->direct_file[0] != 0) { - /* It is, so cannot proceed */ - return (-1); - } - } - krdc->multi_next = ktmp; - ktmp->multi_next = krdc; - } - } else { - krdc->multi_next = NULL; -#ifdef DEBUG_MANY - cmn_err(CE_NOTE, "!add_to_multi: NULL multi_next index %d", - krdc->index); -#endif - } - - return (0); -} - - -/* - * Add a new set to the circular list of 1-to-many primaries and chain - * up any multihop as well. - */ -static int -add_to_many(rdc_k_info_t *krdc) -{ - rdc_k_info_t *okrdc; - int oindex; - - ASSERT(MUTEX_HELD(&rdc_conf_lock)); - - rdc_many_enter(krdc); - - if (add_to_multi(krdc) < 0) { - rdc_many_exit(krdc); - return (-1); - } - - oindex = rdc_lookup_multimany(krdc, TRUE); - if (oindex < 0) { -#ifdef DEBUG_MANY - print_many(krdc); -#endif - rdc_many_exit(krdc); - return (0); - } - - okrdc = &rdc_k_info[oindex]; - -#ifdef DEBUG_MANY - print_many(okrdc); -#endif - krdc->many_next = okrdc->many_next; - okrdc->many_next = krdc; - -#ifdef DEBUG_MANY - print_many(okrdc); -#endif - rdc_many_exit(krdc); - return (0); -} - - -/* - * Remove a set from the circular list of 1-to-many primaries. - */ -static void -remove_from_many(rdc_k_info_t *old) -{ - rdc_u_info_t *uold = &rdc_u_info[old->index]; - rdc_k_info_t *p, *q; - - ASSERT(MUTEX_HELD(&rdc_conf_lock)); - - rdc_many_enter(old); - -#ifdef DEBUG_MANY - cmn_err(CE_NOTE, "!rdc: before remove_from_many"); - print_many(old); -#endif - - if (old->many_next == old) { - /* remove from multihop */ - if ((q = old->multi_next) != NULL) { - ASSERT(q->multi_next == old); - q->multi_next = NULL; - old->multi_next = NULL; - } - - rdc_many_exit(old); - return; - } - - /* search */ - for (p = old->many_next; p->many_next != old; p = p->many_next) - ; - - p->many_next = old->many_next; - old->many_next = old; - - if ((q = old->multi_next) != NULL) { - /* - * old was part of a multihop, so switch multi pointers - * to someone remaining on the many chain - */ - ASSERT(p->multi_next == NULL); - - q->multi_next = p; - p->multi_next = q; - old->multi_next = NULL; - } - -#ifdef DEBUG_MANY - if (p == old) { - cmn_err(CE_NOTE, "!rdc: after remove_from_many empty"); - } else { - cmn_err(CE_NOTE, "!rdc: after remove_from_many"); - print_many(p); - } -#endif - - rdc_clr_mflags(&rdc_u_info[p->index], - (rdc_get_vflags(uold) & RDC_MFLAGS)); - - rdc_many_exit(old); -} - - -static int -_rdc_enable(rdc_set_t *rdc_set, int options, spcs_s_info_t kstatus) -{ - int index; - char *rhost; - struct netbuf *addrp; - rdc_k_info_t *krdc; - rdc_u_info_t *urdc; - rdc_srv_t *svp = NULL; - char *local_file; - char *local_bitmap; - char *diskq; - int rc; - nsc_size_t maxfbas; - rdc_group_t *grp; - - if ((rdc_set->primary.intf[0] == 0) || - (rdc_set->primary.addr.len == 0) || - (rdc_set->primary.file[0] == 0) || - (rdc_set->primary.bitmap[0] == 0) || - (rdc_set->secondary.intf[0] == 0) || - (rdc_set->secondary.addr.len == 0) || - (rdc_set->secondary.file[0] == 0) || - (rdc_set->secondary.bitmap[0] == 0)) { - spcs_s_add(kstatus, RDC_EEMPTY); - return (RDC_EEMPTY); - } - - /* Next check there aren't any enabled rdc sets which match. */ - - mutex_enter(&rdc_conf_lock); - - if (rdc_lookup_byname(rdc_set) >= 0) { - mutex_exit(&rdc_conf_lock); - spcs_s_add(kstatus, RDC_EENABLED, rdc_set->primary.intf, - rdc_set->primary.file, rdc_set->secondary.intf, - rdc_set->secondary.file); - return (RDC_EENABLED); - } - - if (rdc_lookup_many2one(rdc_set) >= 0) { - mutex_exit(&rdc_conf_lock); - spcs_s_add(kstatus, RDC_EMANY2ONE, rdc_set->primary.intf, - rdc_set->primary.file, rdc_set->secondary.intf, - rdc_set->secondary.file); - return (RDC_EMANY2ONE); - } - - if (rdc_set->netconfig->knc_proto == NULL) { - mutex_exit(&rdc_conf_lock); - spcs_s_add(kstatus, RDC_ENETCONFIG); - return (RDC_ENETCONFIG); - } - - if (rdc_set->primary.addr.len == 0) { - mutex_exit(&rdc_conf_lock); - spcs_s_add(kstatus, RDC_ENETBUF, rdc_set->primary.file); - return (RDC_ENETBUF); - } - - if (rdc_set->secondary.addr.len == 0) { - mutex_exit(&rdc_conf_lock); - spcs_s_add(kstatus, RDC_ENETBUF, rdc_set->secondary.file); - return (RDC_ENETBUF); - } - - /* Check that the local data volume isn't in use as a bitmap */ - if (options & RDC_OPT_PRIMARY) - local_file = rdc_set->primary.file; - else - local_file = rdc_set->secondary.file; - if (rdc_lookup_bitmap(local_file) >= 0) { - mutex_exit(&rdc_conf_lock); - spcs_s_add(kstatus, RDC_EVOLINUSE, local_file); - return (RDC_EVOLINUSE); - } - - /* check that the secondary data volume isn't in use */ - if (!(options & RDC_OPT_PRIMARY)) { - local_file = rdc_set->secondary.file; - if (rdc_lookup_secondary(local_file) >= 0) { - mutex_exit(&rdc_conf_lock); - spcs_s_add(kstatus, RDC_EVOLINUSE, local_file); - return (RDC_EVOLINUSE); - } - } - - /* check that the local data vol is not in use as a diskqueue */ - if (options & RDC_OPT_PRIMARY) { - if (rdc_lookup_diskq(rdc_set->primary.file) >= 0) { - mutex_exit(&rdc_conf_lock); - spcs_s_add(kstatus, - RDC_EVOLINUSE, rdc_set->primary.file); - return (RDC_EVOLINUSE); - } - } - - /* Check that the bitmap isn't in use as a data volume */ - if (options & RDC_OPT_PRIMARY) - local_bitmap = rdc_set->primary.bitmap; - else - local_bitmap = rdc_set->secondary.bitmap; - if (rdc_lookup_configured(local_bitmap) >= 0) { - mutex_exit(&rdc_conf_lock); - spcs_s_add(kstatus, RDC_EBMPINUSE, local_bitmap); - return (RDC_EBMPINUSE); - } - - /* Check that the bitmap isn't already in use as a bitmap */ - if (rdc_lookup_bitmap(local_bitmap) >= 0) { - mutex_exit(&rdc_conf_lock); - spcs_s_add(kstatus, RDC_EBMPINUSE, local_bitmap); - return (RDC_EBMPINUSE); - } - - /* check that the diskq (if here) is not in use */ - diskq = rdc_set->disk_queue; - if (diskq[0] && rdc_diskq_inuse(rdc_set, diskq)) { - mutex_exit(&rdc_conf_lock); - spcs_s_add(kstatus, RDC_EDISKQINUSE, diskq); - return (RDC_EDISKQINUSE); - } - - - /* Set urdc->volume_size */ - index = rdc_dev_open(rdc_set, options); - if (index < 0) { - mutex_exit(&rdc_conf_lock); - if (options & RDC_OPT_PRIMARY) - spcs_s_add(kstatus, RDC_EOPEN, rdc_set->primary.intf, - rdc_set->primary.file); - else - spcs_s_add(kstatus, RDC_EOPEN, rdc_set->secondary.intf, - rdc_set->secondary.file); - return (RDC_EOPEN); - } - - urdc = &rdc_u_info[index]; - krdc = &rdc_k_info[index]; - - /* copy relevant parts of rdc_set to urdc field by field */ - - (void) strncpy(urdc->primary.intf, rdc_set->primary.intf, - MAX_RDC_HOST_SIZE); - (void) strncpy(urdc->secondary.intf, rdc_set->secondary.intf, - MAX_RDC_HOST_SIZE); - - (void) strncpy(urdc->group_name, rdc_set->group_name, NSC_MAXPATH); - (void) strncpy(urdc->disk_queue, rdc_set->disk_queue, NSC_MAXPATH); - - dup_rdc_netbuf(&rdc_set->primary.addr, &urdc->primary.addr); - (void) strncpy(urdc->primary.file, rdc_set->primary.file, NSC_MAXPATH); - (void) strncpy(urdc->primary.bitmap, rdc_set->primary.bitmap, - NSC_MAXPATH); - - dup_rdc_netbuf(&rdc_set->secondary.addr, &urdc->secondary.addr); - (void) strncpy(urdc->secondary.file, rdc_set->secondary.file, - NSC_MAXPATH); - (void) strncpy(urdc->secondary.bitmap, rdc_set->secondary.bitmap, - NSC_MAXPATH); - - urdc->setid = rdc_set->setid; - - /* - * before we try to add to group, or create one, check out - * if we are doing the wrong thing with the diskq - */ - - if (urdc->disk_queue[0] && (options & RDC_OPT_SYNC)) { - mutex_exit(&rdc_conf_lock); - rdc_dev_close(krdc); - spcs_s_add(kstatus, RDC_EQWRONGMODE); - return (RDC_EQWRONGMODE); - } - - if ((rc = add_to_group(krdc, options, RDC_CMD_ENABLE)) != 0) { - mutex_exit(&rdc_conf_lock); - rdc_dev_close(krdc); - if (rc == RDC_EQNOADD) { - spcs_s_add(kstatus, RDC_EQNOADD, rdc_set->disk_queue); - return (RDC_EQNOADD); - } else { - spcs_s_add(kstatus, RDC_EGROUP, - rdc_set->primary.intf, rdc_set->primary.file, - rdc_set->secondary.intf, rdc_set->secondary.file, - rdc_set->group_name); - return (RDC_EGROUP); - } - } - - /* - * maxfbas was set in rdc_dev_open as primary's maxfbas. - * If diskq's maxfbas is smaller, then use diskq's. - */ - grp = krdc->group; - if (grp && RDC_IS_DISKQ(grp) && (grp->diskqfd != 0)) { - rc = _rdc_rsrv_diskq(grp); - if (RDC_SUCCESS(rc)) { - rc = nsc_maxfbas(grp->diskqfd, 0, &maxfbas); - if (rc == 0) { -#ifdef DEBUG - if (krdc->maxfbas != maxfbas) - cmn_err(CE_NOTE, - "!_rdc_enable: diskq maxfbas = %" - NSC_SZFMT ", primary maxfbas = %" - NSC_SZFMT, maxfbas, krdc->maxfbas); -#endif - krdc->maxfbas = min(krdc->maxfbas, maxfbas); - } else { - cmn_err(CE_WARN, - "!_rdc_enable: diskq maxfbas failed (%d)", - rc); - } - _rdc_rlse_diskq(grp); - } else { - cmn_err(CE_WARN, - "!_rdc_enable: diskq reserve failed (%d)", rc); - } - } - - rdc_init_flags(urdc); - (void) strncpy(urdc->direct_file, rdc_set->direct_file, NSC_MAXPATH); - if ((options & RDC_OPT_PRIMARY) && rdc_set->direct_file[0]) { - if (rdc_open_direct(krdc) == NULL) - rdc_set_flags(urdc, RDC_FCAL_FAILED); - } - - krdc->many_next = krdc; - - ASSERT(krdc->type_flag == 0); - krdc->type_flag = RDC_CONFIGURED; - - if (options & RDC_OPT_PRIMARY) - rdc_set_flags(urdc, RDC_PRIMARY); - - if (options & RDC_OPT_ASYNC) - krdc->type_flag |= RDC_ASYNCMODE; - - set_busy(krdc); - urdc->syshostid = rdc_set->syshostid; - - if (add_to_many(krdc) < 0) { - mutex_exit(&rdc_conf_lock); - - rdc_group_enter(krdc); - - spcs_s_add(kstatus, RDC_EMULTI); - rc = RDC_EMULTI; - goto fail; - } - - /* Configured but not enabled */ - ASSERT(IS_CONFIGURED(krdc) && !IS_ENABLED(urdc)); - - mutex_exit(&rdc_conf_lock); - - rdc_group_enter(krdc); - - /* Configured but not enabled */ - ASSERT(IS_CONFIGURED(krdc) && !IS_ENABLED(urdc)); - - /* - * The rdc set is configured but not yet enabled. Other operations must - * ignore this set until it is enabled. - */ - - urdc->sync_pos = 0; - - if (rdc_set->maxqfbas > 0) - urdc->maxqfbas = rdc_set->maxqfbas; - else - urdc->maxqfbas = rdc_maxthres_queue; - - if (rdc_set->maxqitems > 0) - urdc->maxqitems = rdc_set->maxqitems; - else - urdc->maxqitems = rdc_max_qitems; - - if (rdc_set->asyncthr > 0) - urdc->asyncthr = rdc_set->asyncthr; - else - urdc->asyncthr = rdc_asyncthr; - - if (urdc->autosync == -1) { - /* Still unknown */ - if (rdc_set->autosync > 0) - urdc->autosync = 1; - else - urdc->autosync = 0; - } - - urdc->netconfig = rdc_set->netconfig; - - if (options & RDC_OPT_PRIMARY) { - rhost = rdc_set->secondary.intf; - addrp = &rdc_set->secondary.addr; - } else { - rhost = rdc_set->primary.intf; - addrp = &rdc_set->primary.addr; - } - - if (options & RDC_OPT_ASYNC) - rdc_set_flags(urdc, RDC_ASYNC); - - svp = rdc_create_svinfo(rhost, addrp, urdc->netconfig); - if (svp == NULL) { - spcs_s_add(kstatus, ENOMEM); - rc = ENOMEM; - goto fail; - } - urdc->netconfig = NULL; /* This will be no good soon */ - - rdc_kstat_create(index); - - /* Don't set krdc->intf here */ - - if (rdc_enable_bitmap(krdc, options & RDC_OPT_SETBMP) < 0) - goto bmpfail; - - RDC_ZERO_BITREF(krdc); - if (krdc->lsrv == NULL) - krdc->lsrv = svp; - else { -#ifdef DEBUG - cmn_err(CE_WARN, "!_rdc_enable: krdc->lsrv already set: %p", - (void *) krdc->lsrv); -#endif - rdc_destroy_svinfo(svp); - } - svp = NULL; - - /* Configured but not enabled */ - ASSERT(IS_CONFIGURED(krdc) && !IS_ENABLED(urdc)); - - /* And finally */ - - krdc->remote_index = -1; - /* Should we set the whole group logging? */ - rdc_set_flags(urdc, RDC_ENABLED | RDC_LOGGING); - - rdc_group_exit(krdc); - - if (rdc_intercept(krdc) != 0) { - rdc_group_enter(krdc); - rdc_clr_flags(urdc, RDC_ENABLED); - if (options & RDC_OPT_PRIMARY) - spcs_s_add(kstatus, RDC_EREGISTER, urdc->primary.file); - else - spcs_s_add(kstatus, RDC_EREGISTER, - urdc->secondary.file); -#ifdef DEBUG - cmn_err(CE_NOTE, "!nsc_register_path failed %s", - urdc->primary.file); -#endif - rc = RDC_EREGISTER; - goto bmpfail; - } -#ifdef DEBUG - cmn_err(CE_NOTE, "!SNDR: enabled %s %s", urdc->primary.file, - urdc->secondary.file); -#endif - - rdc_write_state(urdc); - - mutex_enter(&rdc_conf_lock); - wakeup_busy(krdc); - mutex_exit(&rdc_conf_lock); - - return (0); - -bmpfail: - if (options & RDC_OPT_PRIMARY) - spcs_s_add(kstatus, RDC_EBITMAP, rdc_set->primary.bitmap); - else - spcs_s_add(kstatus, RDC_EBITMAP, rdc_set->secondary.bitmap); - rc = RDC_EBITMAP; - if (rdc_get_vflags(urdc) & RDC_ENABLED) { - rdc_group_exit(krdc); - (void) rdc_unintercept(krdc); - rdc_group_enter(krdc); - } - -fail: - rdc_kstat_delete(index); - rdc_group_exit(krdc); - if (krdc->intf) { - rdc_if_t *ip = krdc->intf; - mutex_enter(&rdc_conf_lock); - krdc->intf = NULL; - rdc_remove_from_if(ip); - mutex_exit(&rdc_conf_lock); - } - rdc_group_enter(krdc); - /* Configured but not enabled */ - ASSERT(IS_CONFIGURED(krdc) && !IS_ENABLED(urdc)); - - rdc_dev_close(krdc); - rdc_close_direct(krdc); - rdc_destroy_svinfo(svp); - - /* Configured but not enabled */ - ASSERT(IS_CONFIGURED(krdc) && !IS_ENABLED(urdc)); - - rdc_group_exit(krdc); - - mutex_enter(&rdc_conf_lock); - - /* Configured but not enabled */ - ASSERT(IS_CONFIGURED(krdc) && !IS_ENABLED(urdc)); - - remove_from_group(krdc); - - if (IS_MANY(krdc) || IS_MULTI(krdc)) - remove_from_many(krdc); - - rdc_u_init(urdc); - - ASSERT(krdc->type_flag & RDC_CONFIGURED); - krdc->type_flag = 0; - wakeup_busy(krdc); - - mutex_exit(&rdc_conf_lock); - - return (rc); -} - -static int -rdc_enable(rdc_config_t *uparms, spcs_s_info_t kstatus) -{ - int rc; - char itmp[10]; - - if (!(uparms->options & RDC_OPT_SYNC) && - !(uparms->options & RDC_OPT_ASYNC)) { - rc = RDC_EEINVAL; - (void) spcs_s_inttostring( - uparms->options, itmp, sizeof (itmp), 1); - spcs_s_add(kstatus, RDC_EEINVAL, itmp); - goto done; - } - - if (!(uparms->options & RDC_OPT_PRIMARY) && - !(uparms->options & RDC_OPT_SECONDARY)) { - rc = RDC_EEINVAL; - (void) spcs_s_inttostring( - uparms->options, itmp, sizeof (itmp), 1); - spcs_s_add(kstatus, RDC_EEINVAL, itmp); - goto done; - } - - if (!(uparms->options & RDC_OPT_SETBMP) && - !(uparms->options & RDC_OPT_CLRBMP)) { - rc = RDC_EEINVAL; - (void) spcs_s_inttostring( - uparms->options, itmp, sizeof (itmp), 1); - spcs_s_add(kstatus, RDC_EEINVAL, itmp); - goto done; - } - - rc = _rdc_enable(uparms->rdc_set, uparms->options, kstatus); -done: - return (rc); -} - -/* ARGSUSED */ -static int -_rdc_disable(rdc_k_info_t *krdc, rdc_config_t *uap, spcs_s_info_t kstatus) -{ - rdc_u_info_t *urdc = &rdc_u_info[krdc->index]; - rdc_if_t *ip; - int index = krdc->index; - disk_queue *q; - rdc_set_t *rdc_set = uap->rdc_set; - - ASSERT(krdc->group != NULL); - rdc_group_enter(krdc); -#ifdef DEBUG - ASSERT(rdc_check(krdc, rdc_set) == 0); -#else - if (((uap->options & RDC_OPT_FORCE_DISABLE) == 0) && - rdc_check(krdc, rdc_set)) { - rdc_group_exit(krdc); - spcs_s_add(kstatus, RDC_EALREADY, rdc_set->primary.file, - rdc_set->secondary.file); - return (RDC_EALREADY); - } -#endif - - if (rdc_get_vflags(urdc) & RDC_PRIMARY) { - halt_sync(krdc); - ASSERT(IS_ENABLED(urdc)); - } - q = &krdc->group->diskq; - - if (IS_ASYNC(urdc) && RDC_IS_DISKQ(krdc->group) && - ((!IS_STATE(urdc, RDC_LOGGING)) && (!QEMPTY(q)))) { - krdc->type_flag &= ~RDC_DISABLEPEND; - rdc_group_exit(krdc); - spcs_s_add(kstatus, RDC_EQNOTEMPTY, urdc->disk_queue); - return (RDC_EQNOTEMPTY); - } - rdc_group_exit(krdc); - (void) rdc_unintercept(krdc); - -#ifdef DEBUG - cmn_err(CE_NOTE, "!SNDR: disabled %s %s", urdc->primary.file, - urdc->secondary.file); -#endif - - /* Configured but not enabled */ - ASSERT(IS_CONFIGURED(krdc) && !IS_ENABLED(urdc)); - - /* - * No new io can come in through the io provider. - * Wait for the async flusher to finish. - */ - - if (IS_ASYNC(urdc) && !RDC_IS_DISKQ(krdc->group)) { - int tries = 2; /* in case of hopelessly stuck flusher threads */ -#ifdef DEBUG - net_queue *qp = &krdc->group->ra_queue; -#endif - do { - if (!krdc->group->rdc_writer) - (void) rdc_writer(krdc->index); - - (void) rdc_drain_queue(krdc->index); - - } while (krdc->group->rdc_writer && tries--); - - /* ok, force it to happen... */ - if (rdc_drain_queue(krdc->index) != 0) { - do { - mutex_enter(&krdc->group->ra_queue.net_qlock); - krdc->group->asyncdis = 1; - cv_broadcast(&krdc->group->asyncqcv); - mutex_exit(&krdc->group->ra_queue.net_qlock); - cmn_err(CE_WARN, - "!SNDR: async I/O pending and not flushed " - "for %s during disable", - urdc->primary.file); -#ifdef DEBUG - cmn_err(CE_WARN, - "!nitems: %" NSC_SZFMT " nblocks: %" - NSC_SZFMT " head: 0x%p tail: 0x%p", - qp->nitems, qp->blocks, - (void *)qp->net_qhead, - (void *)qp->net_qtail); -#endif - } while (krdc->group->rdc_thrnum > 0); - } - } - - mutex_enter(&rdc_conf_lock); - ip = krdc->intf; - krdc->intf = 0; - - if (ip) { - rdc_remove_from_if(ip); - } - - mutex_exit(&rdc_conf_lock); - - rdc_group_enter(krdc); - - /* Configured but not enabled */ - ASSERT(IS_CONFIGURED(krdc) && !IS_ENABLED(urdc)); - - /* Must not hold group lock during this function */ - rdc_group_exit(krdc); - while (rdc_dump_alloc_bufs_cd(krdc->index) == EAGAIN) - delay(2); - rdc_group_enter(krdc); - - (void) rdc_clear_state(krdc); - - rdc_free_bitmap(krdc, RDC_CMD_DISABLE); - rdc_close_bitmap(krdc); - - rdc_dev_close(krdc); - rdc_close_direct(krdc); - - /* Configured but not enabled */ - ASSERT(IS_CONFIGURED(krdc) && !IS_ENABLED(urdc)); - - rdc_group_exit(krdc); - - /* - * we should now unregister the queue, with no conflicting - * locks held. This is the last(only) member of the group - */ - if (krdc->group && RDC_IS_DISKQ(krdc->group) && - krdc->group->count == 1) { /* stop protecting queue */ - rdc_unintercept_diskq(krdc->group); - } - - mutex_enter(&rdc_conf_lock); - - /* Configured but not enabled */ - ASSERT(IS_CONFIGURED(krdc) && !IS_ENABLED(urdc)); - - wait_busy(krdc); - - if (IS_MANY(krdc) || IS_MULTI(krdc)) - remove_from_many(krdc); - - remove_from_group(krdc); - - krdc->remote_index = -1; - ASSERT(krdc->type_flag & RDC_CONFIGURED); - ASSERT(krdc->type_flag & RDC_DISABLEPEND); - krdc->type_flag = 0; -#ifdef DEBUG - if (krdc->dcio_bitmap) - cmn_err(CE_WARN, "!_rdc_disable: possible mem leak, " - "dcio_bitmap"); -#endif - krdc->dcio_bitmap = NULL; - krdc->bitmap_ref = NULL; - krdc->bitmap_size = 0; - krdc->maxfbas = 0; - krdc->bitmap_write = 0; - krdc->disk_status = 0; - rdc_destroy_svinfo(krdc->lsrv); - krdc->lsrv = NULL; - krdc->multi_next = NULL; - - rdc_u_init(urdc); - - mutex_exit(&rdc_conf_lock); - rdc_kstat_delete(index); - - return (0); -} - -static int -rdc_disable(rdc_config_t *uparms, spcs_s_info_t kstatus) -{ - rdc_k_info_t *krdc; - int index; - int rc; - - mutex_enter(&rdc_conf_lock); - - index = rdc_lookup_byname(uparms->rdc_set); - if (index >= 0) - krdc = &rdc_k_info[index]; - if (index < 0 || (krdc->type_flag & RDC_DISABLEPEND)) { - mutex_exit(&rdc_conf_lock); - spcs_s_add(kstatus, RDC_EALREADY, uparms->rdc_set->primary.file, - uparms->rdc_set->secondary.file); - return (RDC_EALREADY); - } - - krdc->type_flag |= RDC_DISABLEPEND; - wait_busy(krdc); - if (krdc->type_flag == 0) { - /* A resume or enable failed */ - mutex_exit(&rdc_conf_lock); - spcs_s_add(kstatus, RDC_EALREADY, uparms->rdc_set->primary.file, - uparms->rdc_set->secondary.file); - return (RDC_EALREADY); - } - mutex_exit(&rdc_conf_lock); - - rc = _rdc_disable(krdc, uparms, kstatus); - return (rc); -} - - -/* - * Checks whether the state of one of the other sets in the 1-many or - * multi-hop config should prevent a sync from starting on this one. - * Return NULL if no just cause or impediment is found, otherwise return - * a pointer to the offending set. - */ -static rdc_u_info_t * -rdc_allow_pri_sync(rdc_u_info_t *urdc, int options) -{ - rdc_k_info_t *krdc = &rdc_k_info[urdc->index]; - rdc_k_info_t *ktmp; - rdc_u_info_t *utmp; - rdc_k_info_t *kmulti = NULL; - - ASSERT(rdc_get_vflags(urdc) & RDC_PRIMARY); - - rdc_many_enter(krdc); - - /* - * In the reverse sync case we need to check the previous leg of - * the multi-hop config. The link to that set can be from any of - * the 1-many list, so as we go through we keep an eye open for it. - */ - if ((options & RDC_OPT_REVERSE) && (IS_MULTI(krdc))) { - /* This set links to the first leg */ - ktmp = krdc->multi_next; - utmp = &rdc_u_info[ktmp->index]; - if (IS_ENABLED(utmp)) - kmulti = ktmp; - } - - if (IS_MANY(krdc)) { - for (ktmp = krdc->many_next; ktmp != krdc; - ktmp = ktmp->many_next) { - utmp = &rdc_u_info[ktmp->index]; - - if (!IS_ENABLED(utmp)) - continue; - - if (options & RDC_OPT_FORWARD) { - /* - * Reverse sync needed is bad, as it means a - * reverse sync in progress or started and - * didn't complete, so this primary volume - * is not consistent. So we shouldn't copy - * it to its secondary. - */ - if (rdc_get_mflags(utmp) & RDC_RSYNC_NEEDED) { - rdc_many_exit(krdc); - return (utmp); - } - } else { - /* Reverse, so see if we need to spot kmulti */ - if ((kmulti == NULL) && (IS_MULTI(ktmp))) { - /* This set links to the first leg */ - kmulti = ktmp->multi_next; - if (!IS_ENABLED( - &rdc_u_info[kmulti->index])) - kmulti = NULL; - } - - /* - * Non-logging is bad, as the bitmap will - * be updated with the bits for this sync. - */ - if (!(rdc_get_vflags(utmp) & RDC_LOGGING)) { - rdc_many_exit(krdc); - return (utmp); - } - } - } - } - - if (kmulti) { - utmp = &rdc_u_info[kmulti->index]; - ktmp = kmulti; /* In case we decide we do need to use ktmp */ - - ASSERT(options & RDC_OPT_REVERSE); - - if (IS_REPLICATING(utmp)) { - /* - * Replicating is bad as data is already flowing to - * the target of the requested sync operation. - */ - rdc_many_exit(krdc); - return (utmp); - } - - if (rdc_get_vflags(utmp) & RDC_SYNCING) { - /* - * Forward sync in progress is bad, as data is - * already flowing to the target of the requested - * sync operation. - * Reverse sync in progress is bad, as the primary - * has already decided which data to copy. - */ - rdc_many_exit(krdc); - return (utmp); - } - - /* - * Clear the "sync needed" flags, as the multi-hop secondary - * will be updated via this requested sync operation, so does - * not need to complete its aborted forward sync. - */ - if (rdc_get_vflags(utmp) & RDC_SYNC_NEEDED) - rdc_clr_flags(utmp, RDC_SYNC_NEEDED); - } - - if (IS_MANY(krdc) && (options & RDC_OPT_REVERSE)) { - for (ktmp = krdc->many_next; ktmp != krdc; - ktmp = ktmp->many_next) { - utmp = &rdc_u_info[ktmp->index]; - if (!IS_ENABLED(utmp)) - continue; - - /* - * Clear any "reverse sync needed" flags, as the - * volume will be updated via this requested - * sync operation, so does not need to complete - * its aborted reverse sync. - */ - if (rdc_get_mflags(utmp) & RDC_RSYNC_NEEDED) - rdc_clr_mflags(utmp, RDC_RSYNC_NEEDED); - } - } - - rdc_many_exit(krdc); - - return (NULL); -} - -static void -_rdc_sync_wrthr(void *thrinfo) -{ - rdc_syncthr_t *syncinfo = (rdc_syncthr_t *)thrinfo; - nsc_buf_t *handle = NULL; - rdc_k_info_t *krdc = syncinfo->krdc; - int rc; - int tries = 0; - - DTRACE_PROBE2(rdc_sync_loop_netwrite_start, int, krdc->index, - nsc_buf_t *, handle); - -retry: - rc = nsc_alloc_buf(RDC_U_FD(krdc), syncinfo->offset, syncinfo->len, - NSC_READ | NSC_NOCACHE, &handle); - - if (!RDC_SUCCESS(rc) || krdc->remote_index < 0) { - DTRACE_PROBE(rdc_sync_wrthr_alloc_buf_err); - goto failed; - } - - rdc_group_enter(krdc); - if ((krdc->disk_status == 1) || (krdc->dcio_bitmap == NULL)) { - rdc_group_exit(krdc); - goto failed; - } - rdc_group_exit(krdc); - - if ((rc = rdc_net_write(krdc->index, krdc->remote_index, handle, - handle->sb_pos, handle->sb_len, RDC_NOSEQ, RDC_NOQUE, NULL)) > 0) { - rdc_u_info_t *urdc = &rdc_u_info[krdc->index]; - - /* - * The following is to handle - * the case where the secondary side - * has thrown our buffer handle token away in a - * attempt to preserve its health on restart - */ - if ((rc == EPROTO) && (tries < 3)) { - (void) nsc_free_buf(handle); - handle = NULL; - tries++; - delay(HZ >> 2); - goto retry; - } - - DTRACE_PROBE(rdc_sync_wrthr_remote_write_err); - cmn_err(CE_WARN, "!rdc_sync_wrthr: remote write failed (%d) " - "0x%x", rc, rdc_get_vflags(urdc)); - - goto failed; - } - (void) nsc_free_buf(handle); - handle = NULL; - - return; -failed: - (void) nsc_free_buf(handle); - syncinfo->status->offset = syncinfo->offset; -} - -/* - * see above comments on _rdc_sync_wrthr - */ -static void -_rdc_sync_rdthr(void *thrinfo) -{ - rdc_syncthr_t *syncinfo = (rdc_syncthr_t *)thrinfo; - nsc_buf_t *handle = NULL; - rdc_k_info_t *krdc = syncinfo->krdc; - rdc_u_info_t *urdc = &rdc_u_info[krdc->index]; - int rc; - - rc = nsc_alloc_buf(RDC_U_FD(krdc), syncinfo->offset, syncinfo->len, - NSC_WRITE | NSC_WRTHRU | NSC_NOCACHE, &handle); - - if (!RDC_SUCCESS(rc) || krdc->remote_index < 0) { - goto failed; - } - rdc_group_enter(krdc); - if ((krdc->disk_status == 1) || (krdc->dcio_bitmap == NULL)) { - rdc_group_exit(krdc); - goto failed; - } - rdc_group_exit(krdc); - - rc = rdc_net_read(krdc->index, krdc->remote_index, handle, - handle->sb_pos, handle->sb_len); - - if (!RDC_SUCCESS(rc)) { - cmn_err(CE_WARN, "!rdc_sync_rdthr: remote read failed(%d)", rc); - goto failed; - } - if (!IS_STATE(urdc, RDC_FULL)) - rdc_set_bitmap_many(krdc, handle->sb_pos, handle->sb_len); - - rc = nsc_write(handle, handle->sb_pos, handle->sb_len, 0); - - if (!RDC_SUCCESS(rc)) { - rdc_many_enter(krdc); - rdc_set_flags_log(urdc, RDC_VOL_FAILED, "nsc_write failed"); - rdc_many_exit(krdc); - rdc_write_state(urdc); - goto failed; - } - - (void) nsc_free_buf(handle); - handle = NULL; - - return; -failed: - (void) nsc_free_buf(handle); - syncinfo->status->offset = syncinfo->offset; -} - -/* - * _rdc_sync_wrthr - * sync loop write thread - * if there are avail threads, we have not - * used up the pipe, so the sync loop will, if - * possible use these to multithread the write/read - */ -void -_rdc_sync_thread(void *thrinfo) -{ - rdc_syncthr_t *syncinfo = (rdc_syncthr_t *)thrinfo; - rdc_k_info_t *krdc = syncinfo->krdc; - rdc_u_info_t *urdc = &rdc_u_info[krdc->index]; - rdc_thrsync_t *sync = &krdc->syncs; - uint_t bitmask; - int rc; - - rc = _rdc_rsrv_devs(krdc, RDC_RAW, RDC_INTERNAL); - if (!RDC_SUCCESS(rc)) - goto failed; - - if (IS_STATE(urdc, RDC_SLAVE)) - _rdc_sync_rdthr(thrinfo); - else - _rdc_sync_wrthr(thrinfo); - - _rdc_rlse_devs(krdc, RDC_RAW); - - if (krdc->dcio_bitmap == NULL) { -#ifdef DEBUG - cmn_err(CE_NOTE, "!_rdc_sync_wrthr: NULL bitmap"); -#else - /*EMPTY*/ -#endif - } else if (syncinfo->status->offset < 0) { - - RDC_SET_BITMASK(syncinfo->offset, syncinfo->len, &bitmask); - RDC_CLR_BITMAP(krdc, syncinfo->offset, syncinfo->len, \ - bitmask, RDC_BIT_FORCE); - } - -failed: - /* - * done with this, get rid of it. - * the status is not freed, it should still be a status chain - * that _rdc_sync() has the head of - */ - kmem_free(syncinfo, sizeof (*syncinfo)); - - /* - * decrement the global sync thread num - */ - mutex_enter(&sync_info.lock); - sync_info.active_thr--; - /* LINTED */ - RDC_AVAIL_THR_TUNE(sync_info); - mutex_exit(&sync_info.lock); - - /* - * krdc specific stuff - */ - mutex_enter(&sync->lock); - sync->complete++; - cv_broadcast(&sync->cv); - mutex_exit(&sync->lock); -} - -int -_rdc_setup_syncthr(rdc_syncthr_t **synthr, nsc_off_t offset, - nsc_size_t len, rdc_k_info_t *krdc, sync_status_t *stats) -{ - rdc_syncthr_t *tmp; - /* alloc here, free in the sync thread */ - tmp = - (rdc_syncthr_t *)kmem_zalloc(sizeof (rdc_syncthr_t), KM_NOSLEEP); - - if (tmp == NULL) - return (-1); - tmp->offset = offset; - tmp->len = len; - tmp->status = stats; - tmp->krdc = krdc; - - *synthr = tmp; - return (0); -} - -sync_status_t * -_rdc_new_sync_status() -{ - sync_status_t *s; - - s = (sync_status_t *)kmem_zalloc(sizeof (*s), KM_NOSLEEP); - s->offset = -1; - return (s); -} - -void -_rdc_free_sync_status(sync_status_t *status) -{ - sync_status_t *s; - - while (status) { - s = status->next; - kmem_free(status, sizeof (*status)); - status = s; - } -} -int -_rdc_sync_status_ok(sync_status_t *status, int *offset) -{ -#ifdef DEBUG_SYNCSTATUS - int i = 0; -#endif - while (status) { - if (status->offset >= 0) { - *offset = status->offset; - return (-1); - } - status = status->next; -#ifdef DEBUG_SYNCSTATUS - i++; -#endif - } -#ifdef DEBUGSYNCSTATUS - cmn_err(CE_NOTE, "!rdc_sync_status_ok: checked %d statuses", i); -#endif - return (0); -} - -int mtsync = 1; -/* - * _rdc_sync() : rdc sync loop - * - */ -static void -_rdc_sync(rdc_k_info_t *krdc) -{ - nsc_size_t size = 0; - rdc_u_info_t *urdc = &rdc_u_info[krdc->index]; - int rtype; - int sts; - int reserved = 0; - nsc_buf_t *alloc_h = NULL; - nsc_buf_t *handle = NULL; - nsc_off_t mask; - nsc_size_t maxbit; - nsc_size_t len; - nsc_off_t offset = 0; - int sync_completed = 0; - int tries = 0; - int rc; - int queuing = 0; - uint_t bitmask; - sync_status_t *ss, *sync_status = NULL; - rdc_thrsync_t *sync = &krdc->syncs; - rdc_syncthr_t *syncinfo; - nsthread_t *trc = NULL; - - if (IS_STATE(urdc, RDC_QUEUING) && !IS_STATE(urdc, RDC_FULL)) { - /* flusher is handling the sync in the update case */ - queuing = 1; - goto sync_done; - } - - /* - * Main sync/resync loop - */ - DTRACE_PROBE(rdc_sync_loop_start); - - rtype = RDC_RAW; - sts = _rdc_rsrv_devs(krdc, rtype, RDC_INTERNAL); - - DTRACE_PROBE(rdc_sync_loop_rsrv); - - if (sts != 0) - goto failed_noincr; - - reserved = 1; - - /* - * pre-allocate a handle if we can - speeds up the sync. - */ - - if (rdc_prealloc_handle) { - alloc_h = nsc_alloc_handle(RDC_U_FD(krdc), NULL, NULL, NULL); -#ifdef DEBUG - if (!alloc_h) { - cmn_err(CE_WARN, - "!rdc sync: failed to pre-alloc handle"); - } -#endif - } else { - alloc_h = NULL; - } - - ASSERT(urdc->volume_size != 0); - size = urdc->volume_size; - mask = ~(LOG_TO_FBA_NUM(1) - 1); - maxbit = FBA_TO_LOG_NUM(size - 1); - - /* - * as this while loop can also move data, it is counted as a - * sync loop thread - */ - rdc_group_enter(krdc); - rdc_clr_flags(urdc, RDC_LOGGING); - rdc_set_flags(urdc, RDC_SYNCING); - krdc->group->synccount++; - rdc_group_exit(krdc); - mutex_enter(&sync_info.lock); - sync_info.active_thr++; - /* LINTED */ - RDC_AVAIL_THR_TUNE(sync_info); - mutex_exit(&sync_info.lock); - - while (offset < size) { - rdc_group_enter(krdc); - ASSERT(krdc->aux_state & RDC_AUXSYNCIP); - if (krdc->disk_status == 1 || krdc->dcio_bitmap == NULL) { - rdc_group_exit(krdc); - if (krdc->disk_status == 1) { - DTRACE_PROBE(rdc_sync_loop_disk_status_err); - } else { - DTRACE_PROBE(rdc_sync_loop_dcio_bitmap_err); - } - goto failed; /* halt sync */ - } - rdc_group_exit(krdc); - - if (!(rdc_get_vflags(urdc) & RDC_FULL)) { - mutex_enter(&krdc->syncbitmutex); - krdc->syncbitpos = FBA_TO_LOG_NUM(offset); - len = 0; - - /* skip unnecessary chunks */ - - while (krdc->syncbitpos <= maxbit && - !RDC_BIT_ISSET(krdc, krdc->syncbitpos)) { - offset += LOG_TO_FBA_NUM(1); - krdc->syncbitpos++; - } - - /* check for boundary */ - - if (offset >= size) { - mutex_exit(&krdc->syncbitmutex); - goto sync_done; - } - - /* find maximal length we can transfer */ - - while (krdc->syncbitpos <= maxbit && - RDC_BIT_ISSET(krdc, krdc->syncbitpos)) { - len += LOG_TO_FBA_NUM(1); - krdc->syncbitpos++; - /* we can only read maxfbas anyways */ - if (len >= krdc->maxfbas) - break; - } - - len = min(len, (size - offset)); - - } else { - len = size - offset; - } - - /* truncate to the io provider limit */ - ASSERT(krdc->maxfbas != 0); - len = min(len, krdc->maxfbas); - - if (len > LOG_TO_FBA_NUM(1)) { - /* - * If the update is larger than a bitmap chunk, - * then truncate to a whole number of bitmap - * chunks. - * - * If the update is smaller than a bitmap - * chunk, this must be the last write. - */ - len &= mask; - } - - if (!(rdc_get_vflags(urdc) & RDC_FULL)) { - krdc->syncbitpos = FBA_TO_LOG_NUM(offset + len); - mutex_exit(&krdc->syncbitmutex); - } - - /* - * Find out if we can reserve a thread here ... - * note: skip the mutex for the first check, if the number - * is up there, why bother even grabbing the mutex to - * only realize that we can't have a thread anyways - */ - - if (mtsync && sync_info.active_thr < RDC_MAX_SYNC_THREADS) { - - mutex_enter(&sync_info.lock); - if (sync_info.avail_thr >= 1) { - if (sync_status == NULL) { - ss = sync_status = - _rdc_new_sync_status(); - } else { - ss = ss->next = _rdc_new_sync_status(); - } - if (ss == NULL) { - mutex_exit(&sync_info.lock); -#ifdef DEBUG - cmn_err(CE_WARN, "!rdc_sync: can't " - "allocate status for mt sync"); -#endif - goto retry; - } - /* - * syncinfo protected by sync_info lock but - * not part of the sync_info structure - * be careful if moving - */ - if (_rdc_setup_syncthr(&syncinfo, - offset, len, krdc, ss) < 0) { - _rdc_free_sync_status(ss); - } - - trc = nst_create(sync_info.rdc_syncset, - _rdc_sync_thread, syncinfo, NST_SLEEP); - - if (trc == NULL) { - mutex_exit(&sync_info.lock); -#ifdef DEBUG - cmn_err(CE_NOTE, "!rdc_sync: unable to " - "mt sync"); -#endif - _rdc_free_sync_status(ss); - kmem_free(syncinfo, sizeof (*syncinfo)); - syncinfo = NULL; - goto retry; - } else { - mutex_enter(&sync->lock); - sync->threads++; - mutex_exit(&sync->lock); - } - - sync_info.active_thr++; - /* LINTED */ - RDC_AVAIL_THR_TUNE(sync_info); - - mutex_exit(&sync_info.lock); - goto threaded; - } - mutex_exit(&sync_info.lock); - } -retry: - handle = alloc_h; - DTRACE_PROBE(rdc_sync_loop_allocbuf_start); - if (rdc_get_vflags(urdc) & RDC_SLAVE) - sts = nsc_alloc_buf(RDC_U_FD(krdc), offset, len, - NSC_WRITE | NSC_WRTHRU | NSC_NOCACHE, &handle); - else - sts = nsc_alloc_buf(RDC_U_FD(krdc), offset, len, - NSC_READ | NSC_NOCACHE, &handle); - - DTRACE_PROBE(rdc_sync_loop_allocbuf_end); - if (sts > 0) { - if (handle && handle != alloc_h) { - (void) nsc_free_buf(handle); - } - - handle = NULL; - DTRACE_PROBE(rdc_sync_loop_allocbuf_err); - goto failed; - } - - if (rdc_get_vflags(urdc) & RDC_SLAVE) { - /* overwrite buffer with remote data */ - sts = rdc_net_read(krdc->index, krdc->remote_index, - handle, handle->sb_pos, handle->sb_len); - - if (!RDC_SUCCESS(sts)) { -#ifdef DEBUG - cmn_err(CE_WARN, - "!rdc sync: remote read failed (%d)", sts); -#endif - DTRACE_PROBE(rdc_sync_loop_remote_read_err); - goto failed; - } - if (!(rdc_get_vflags(urdc) & RDC_FULL)) - rdc_set_bitmap_many(krdc, handle->sb_pos, - handle->sb_len); - - /* commit locally */ - - sts = nsc_write(handle, handle->sb_pos, - handle->sb_len, 0); - - if (!RDC_SUCCESS(sts)) { - /* reverse sync needed already set */ - rdc_many_enter(krdc); - rdc_set_flags_log(urdc, RDC_VOL_FAILED, - "write failed during sync"); - rdc_many_exit(krdc); - rdc_write_state(urdc); - DTRACE_PROBE(rdc_sync_loop_nsc_write_err); - goto failed; - } - } else { - /* send local data to remote */ - DTRACE_PROBE2(rdc_sync_loop_netwrite_start, - int, krdc->index, nsc_buf_t *, handle); - - if ((sts = rdc_net_write(krdc->index, - krdc->remote_index, handle, handle->sb_pos, - handle->sb_len, RDC_NOSEQ, RDC_NOQUE, NULL)) > 0) { - - /* - * The following is to handle - * the case where the secondary side - * has thrown our buffer handle token away in a - * attempt to preserve its health on restart - */ - if ((sts == EPROTO) && (tries < 3)) { - (void) nsc_free_buf(handle); - handle = NULL; - tries++; - delay(HZ >> 2); - goto retry; - } -#ifdef DEBUG - cmn_err(CE_WARN, - "!rdc sync: remote write failed (%d) 0x%x", - sts, rdc_get_vflags(urdc)); -#endif - DTRACE_PROBE(rdc_sync_loop_netwrite_err); - goto failed; - } - DTRACE_PROBE(rdc_sync_loop_netwrite_end); - } - - (void) nsc_free_buf(handle); - handle = NULL; - - if (krdc->dcio_bitmap == NULL) { -#ifdef DEBUG - cmn_err(CE_NOTE, "!_rdc_sync: NULL bitmap"); -#else - ; - /*EMPTY*/ -#endif - } else { - - RDC_SET_BITMASK(offset, len, &bitmask); - RDC_CLR_BITMAP(krdc, offset, len, bitmask, \ - RDC_BIT_FORCE); - ASSERT(!IS_ASYNC(urdc)); - } - - /* - * Only release/reserve if someone is waiting - */ - if (krdc->devices->id_release || nsc_waiting(RDC_U_FD(krdc))) { - DTRACE_PROBE(rdc_sync_loop_rlse_start); - if (alloc_h) { - (void) nsc_free_handle(alloc_h); - alloc_h = NULL; - } - - _rdc_rlse_devs(krdc, rtype); - reserved = 0; - delay(2); - - rtype = RDC_RAW; - sts = _rdc_rsrv_devs(krdc, rtype, RDC_INTERNAL); - if (sts != 0) { - handle = NULL; - DTRACE_PROBE(rdc_sync_loop_rdc_rsrv_err); - goto failed; - } - - reserved = 1; - - if (rdc_prealloc_handle) { - alloc_h = nsc_alloc_handle(RDC_U_FD(krdc), - NULL, NULL, NULL); -#ifdef DEBUG - if (!alloc_h) { - cmn_err(CE_WARN, "!rdc_sync: " - "failed to pre-alloc handle"); - } -#endif - } - DTRACE_PROBE(rdc_sync_loop_rlse_end); - } -threaded: - offset += len; - urdc->sync_pos = offset; - } - -sync_done: - sync_completed = 1; - -failed: - krdc->group->synccount--; -failed_noincr: - mutex_enter(&sync->lock); - while (sync->complete != sync->threads) { - cv_wait(&sync->cv, &sync->lock); - } - sync->complete = 0; - sync->threads = 0; - mutex_exit(&sync->lock); - - /* - * if sync_completed is 0 here, - * we know that the main sync thread failed anyway - * so just free the statuses and fail - */ - if (sync_completed && (_rdc_sync_status_ok(sync_status, &rc) < 0)) { - urdc->sync_pos = rc; - sync_completed = 0; /* at least 1 thread failed */ - } - - _rdc_free_sync_status(sync_status); - - /* - * we didn't increment, we didn't even sync, - * so don't dec sync_info.active_thr - */ - if (!queuing) { - mutex_enter(&sync_info.lock); - sync_info.active_thr--; - /* LINTED */ - RDC_AVAIL_THR_TUNE(sync_info); - mutex_exit(&sync_info.lock); - } - - if (handle) { - (void) nsc_free_buf(handle); - } - - if (alloc_h) { - (void) nsc_free_handle(alloc_h); - } - - if (reserved) { - _rdc_rlse_devs(krdc, rtype); - } - -notstarted: - rdc_group_enter(krdc); - ASSERT(krdc->aux_state & RDC_AUXSYNCIP); - if (IS_STATE(urdc, RDC_QUEUING)) - rdc_clr_flags(urdc, RDC_QUEUING); - - if (sync_completed) { - (void) rdc_net_state(krdc->index, CCIO_DONE); - } else { - (void) rdc_net_state(krdc->index, CCIO_ENABLELOG); - } - - rdc_clr_flags(urdc, RDC_SYNCING); - if (rdc_get_vflags(urdc) & RDC_SLAVE) { - rdc_many_enter(krdc); - rdc_clr_mflags(urdc, RDC_SLAVE); - rdc_many_exit(krdc); - } - if (krdc->type_flag & RDC_ASYNCMODE) - rdc_set_flags(urdc, RDC_ASYNC); - if (sync_completed) { - rdc_many_enter(krdc); - rdc_clr_mflags(urdc, RDC_RSYNC_NEEDED); - rdc_many_exit(krdc); - } else { - krdc->remote_index = -1; - rdc_set_flags_log(urdc, RDC_LOGGING, "sync failed to complete"); - } - rdc_group_exit(krdc); - rdc_write_state(urdc); - - mutex_enter(&net_blk_lock); - if (sync_completed) - krdc->sync_done = RDC_COMPLETED; - else - krdc->sync_done = RDC_FAILED; - cv_broadcast(&krdc->synccv); - mutex_exit(&net_blk_lock); - -} - - -static int -rdc_sync(rdc_config_t *uparms, spcs_s_info_t kstatus) -{ - rdc_set_t *rdc_set = uparms->rdc_set; - int options = uparms->options; - int rc = 0; - int busy = 0; - int index; - rdc_k_info_t *krdc; - rdc_u_info_t *urdc; - rdc_k_info_t *kmulti; - rdc_u_info_t *umulti; - rdc_group_t *group; - rdc_srv_t *svp; - int sm, um, md; - int sync_completed = 0; - int thrcount; - - mutex_enter(&rdc_conf_lock); - index = rdc_lookup_byname(rdc_set); - if (index >= 0) - krdc = &rdc_k_info[index]; - if (index < 0 || (krdc->type_flag & RDC_DISABLEPEND)) { - mutex_exit(&rdc_conf_lock); - spcs_s_add(kstatus, RDC_EALREADY, rdc_set->primary.file, - rdc_set->secondary.file); - rc = RDC_EALREADY; - goto notstarted; - } - - urdc = &rdc_u_info[index]; - group = krdc->group; - set_busy(krdc); - busy = 1; - if ((krdc->type_flag == 0) || (krdc->type_flag & RDC_DISABLEPEND)) { - /* A resume or enable failed or we raced with a teardown */ - mutex_exit(&rdc_conf_lock); - spcs_s_add(kstatus, RDC_EALREADY, rdc_set->primary.file, - rdc_set->secondary.file); - rc = RDC_EALREADY; - goto notstarted; - } - mutex_exit(&rdc_conf_lock); - rdc_group_enter(krdc); - - if (!IS_STATE(urdc, RDC_LOGGING)) { - spcs_s_add(kstatus, RDC_ESETNOTLOGGING, urdc->secondary.intf, - urdc->secondary.file); - rc = RDC_ENOTLOGGING; - goto notstarted_unlock; - } - - if (rdc_check(krdc, rdc_set)) { - spcs_s_add(kstatus, RDC_EALREADY, rdc_set->primary.file, - rdc_set->secondary.file); - rc = RDC_EALREADY; - goto notstarted_unlock; - } - - if (!(rdc_get_vflags(urdc) & RDC_PRIMARY)) { - spcs_s_add(kstatus, RDC_ENOTPRIMARY, rdc_set->primary.intf, - rdc_set->primary.file, rdc_set->secondary.intf, - rdc_set->secondary.file); - rc = RDC_ENOTPRIMARY; - goto notstarted_unlock; - } - - if ((options & RDC_OPT_REVERSE) && (IS_STATE(urdc, RDC_QUEUING))) { - /* - * cannot reverse sync when queuing, need to go logging first - */ - spcs_s_add(kstatus, RDC_EQNORSYNC, rdc_set->primary.intf, - rdc_set->primary.file, rdc_set->secondary.intf, - rdc_set->secondary.file); - rc = RDC_EQNORSYNC; - goto notstarted_unlock; - } - - svp = krdc->lsrv; - krdc->intf = rdc_add_to_if(svp, &(urdc->primary.addr), - &(urdc->secondary.addr), 1); - - if (!krdc->intf) { - spcs_s_add(kstatus, RDC_EADDTOIF, urdc->primary.intf, - urdc->secondary.intf); - rc = RDC_EADDTOIF; - goto notstarted_unlock; - } - - if (urdc->volume_size == 0) { - /* Implies reserve failed when previous resume was done */ - rdc_get_details(krdc); - } - if (urdc->volume_size == 0) { - spcs_s_add(kstatus, RDC_ENOBMAP); - rc = RDC_ENOBMAP; - goto notstarted_unlock; - } - - if (krdc->dcio_bitmap == NULL) { - if (rdc_resume_bitmap(krdc) < 0) { - spcs_s_add(kstatus, RDC_ENOBMAP); - rc = RDC_ENOBMAP; - goto notstarted_unlock; - } - } - - if ((rdc_get_vflags(urdc) & RDC_BMP_FAILED) && (krdc->bitmapfd)) { - if (rdc_reset_bitmap(krdc)) { - spcs_s_add(kstatus, RDC_EBITMAP); - rc = RDC_EBITMAP; - goto notstarted_unlock; - } - } - - if (IS_MANY(krdc) || IS_MULTI(krdc)) { - rdc_u_info_t *ubad; - - if ((ubad = rdc_allow_pri_sync(urdc, options)) != NULL) { - spcs_s_add(kstatus, RDC_ESTATE, - ubad->primary.intf, ubad->primary.file, - ubad->secondary.intf, ubad->secondary.file); - rc = RDC_ESTATE; - goto notstarted_unlock; - } - } - - /* - * there is a small window where _rdc_sync is still - * running, but has cleared the RDC_SYNCING flag. - * Use aux_state which is only cleared - * after _rdc_sync had done its 'death' broadcast. - */ - if (krdc->aux_state & RDC_AUXSYNCIP) { -#ifdef DEBUG - if (!rdc_get_vflags(urdc) & RDC_SYNCING) { - cmn_err(CE_WARN, "!rdc_sync: " - "RDC_AUXSYNCIP set, SYNCING off"); - } -#endif - spcs_s_add(kstatus, RDC_ESYNCING, rdc_set->primary.file); - rc = RDC_ESYNCING; - goto notstarted_unlock; - } - if (krdc->disk_status == 1) { - spcs_s_add(kstatus, RDC_ESYNCING, rdc_set->primary.file); - rc = RDC_ESYNCING; - goto notstarted_unlock; - } - - if ((options & RDC_OPT_FORWARD) && - (rdc_get_mflags(urdc) & RDC_RSYNC_NEEDED)) { - /* cannot forward sync if a reverse sync is needed */ - spcs_s_add(kstatus, RDC_ERSYNCNEEDED, rdc_set->primary.intf, - rdc_set->primary.file, rdc_set->secondary.intf, - rdc_set->secondary.file); - rc = RDC_ERSYNCNEEDED; - goto notstarted_unlock; - } - - urdc->sync_pos = 0; - - /* Check if the rdc set is accessible on the remote node */ - if (rdc_net_getstate(krdc, &sm, &um, &md, FALSE) < 0) { - /* - * Remote end may be inaccessible, or the rdc set is not - * enabled at the remote end. - */ - spcs_s_add(kstatus, RDC_ECONNOPEN, urdc->secondary.intf, - urdc->secondary.file); - rc = RDC_ECONNOPEN; - goto notstarted_unlock; - } - if (options & RDC_OPT_REVERSE) - krdc->remote_index = rdc_net_state(index, CCIO_RSYNC); - else - krdc->remote_index = rdc_net_state(index, CCIO_SLAVE); - if (krdc->remote_index < 0) { - /* - * Remote note probably not in a valid state to be synced, - * as the state was fetched OK above. - */ - spcs_s_add(kstatus, RDC_ERSTATE, urdc->secondary.intf, - urdc->secondary.file, urdc->primary.intf, - urdc->primary.file); - rc = RDC_ERSTATE; - goto notstarted_unlock; - } - - rc = check_filesize(index, kstatus); - if (rc != 0) { - (void) rdc_net_state(krdc->index, CCIO_ENABLELOG); - goto notstarted_unlock; - } - - krdc->sync_done = 0; - - mutex_enter(&krdc->bmapmutex); - krdc->aux_state |= RDC_AUXSYNCIP; - mutex_exit(&krdc->bmapmutex); - - if (options & RDC_OPT_REVERSE) { - rdc_many_enter(krdc); - rdc_set_mflags(urdc, RDC_SLAVE | RDC_RSYNC_NEEDED); - mutex_enter(&krdc->bmapmutex); - rdc_clr_flags(urdc, RDC_VOL_FAILED); - mutex_exit(&krdc->bmapmutex); - rdc_write_state(urdc); - /* LINTED */ - if (kmulti = krdc->multi_next) { - umulti = &rdc_u_info[kmulti->index]; - if (IS_ENABLED(umulti) && (rdc_get_vflags(umulti) & - (RDC_VOL_FAILED | RDC_SYNC_NEEDED))) { - rdc_clr_flags(umulti, RDC_SYNC_NEEDED); - rdc_clr_flags(umulti, RDC_VOL_FAILED); - rdc_write_state(umulti); - } - } - rdc_many_exit(krdc); - } else { - rdc_clr_flags(urdc, RDC_FCAL_FAILED); - rdc_write_state(urdc); - } - - if (options & RDC_OPT_UPDATE) { - ASSERT(urdc->volume_size != 0); - if (rdc_net_getbmap(index, - BMAP_LOG_BYTES(urdc->volume_size)) > 0) { - spcs_s_add(kstatus, RDC_ENOBMAP); - rc = RDC_ENOBMAP; - - (void) rdc_net_state(index, CCIO_ENABLELOG); - - rdc_clr_flags(urdc, RDC_SYNCING); - if (options & RDC_OPT_REVERSE) { - rdc_many_enter(krdc); - rdc_clr_mflags(urdc, RDC_SLAVE); - rdc_many_exit(krdc); - } - if (krdc->type_flag & RDC_ASYNCMODE) - rdc_set_flags(urdc, RDC_ASYNC); - krdc->remote_index = -1; - rdc_set_flags_log(urdc, RDC_LOGGING, - "failed to read remote bitmap"); - rdc_write_state(urdc); - goto failed; - } - rdc_clr_flags(urdc, RDC_FULL); - } else { - /* - * This is a full sync (not an update sync), mark the - * entire bitmap dirty - */ - (void) RDC_FILL_BITMAP(krdc, FALSE); - - rdc_set_flags(urdc, RDC_FULL); - } - - rdc_group_exit(krdc); - - /* - * allow diskq->memq flusher to wake up - */ - mutex_enter(&krdc->group->ra_queue.net_qlock); - krdc->group->ra_queue.qfflags &= ~RDC_QFILLSLEEP; - mutex_exit(&krdc->group->ra_queue.net_qlock); - - /* - * if this is a full sync on a non-diskq set or - * a diskq set that has failed, clear the async flag - */ - if (krdc->type_flag & RDC_ASYNCMODE) { - if ((!(options & RDC_OPT_UPDATE)) || - (!RDC_IS_DISKQ(krdc->group)) || - (!(IS_STATE(urdc, RDC_QUEUING)))) { - /* full syncs, or core queue are synchronous */ - rdc_group_enter(krdc); - rdc_clr_flags(urdc, RDC_ASYNC); - rdc_group_exit(krdc); - } - - /* - * if the queue failed because it was full, lets see - * if we can restart it. After _rdc_sync() is done - * the modes will switch and we will begin disk - * queuing again. NOTE: this should only be called - * once per group, as it clears state for all group - * members, also clears the async flag for all members - */ - if (IS_STATE(urdc, RDC_DISKQ_FAILED)) { - rdc_unfail_diskq(krdc); - } else { - /* don't add insult to injury by flushing a dead queue */ - - /* - * if we are updating, and a diskq and - * the async thread isn't active, start - * it up. - */ - if ((options & RDC_OPT_UPDATE) && - (IS_STATE(urdc, RDC_QUEUING))) { - rdc_group_enter(krdc); - rdc_clr_flags(urdc, RDC_SYNCING); - rdc_group_exit(krdc); - mutex_enter(&krdc->group->ra_queue.net_qlock); - if (krdc->group->ra_queue.qfill_sleeping == - RDC_QFILL_ASLEEP) - cv_broadcast(&group->ra_queue.qfcv); - mutex_exit(&krdc->group->ra_queue.net_qlock); - thrcount = urdc->asyncthr; - while ((thrcount-- > 0) && - !krdc->group->rdc_writer) { - (void) rdc_writer(krdc->index); - } - } - } - } - - /* - * For a reverse sync, merge the current bitmap with all other sets - * that share this volume. - */ - if (options & RDC_OPT_REVERSE) { -retry_many: - rdc_many_enter(krdc); - if (IS_MANY(krdc)) { - rdc_k_info_t *kmany; - rdc_u_info_t *umany; - - for (kmany = krdc->many_next; kmany != krdc; - kmany = kmany->many_next) { - umany = &rdc_u_info[kmany->index]; - if (!IS_ENABLED(umany)) - continue; - ASSERT(umany->flags & RDC_PRIMARY); - - if (!mutex_tryenter(&kmany->group->lock)) { - rdc_many_exit(krdc); - /* May merge more than once */ - goto retry_many; - } - rdc_merge_bitmaps(krdc, kmany); - mutex_exit(&kmany->group->lock); - } - } - rdc_many_exit(krdc); - -retry_multi: - rdc_many_enter(krdc); - if (IS_MULTI(krdc)) { - rdc_k_info_t *kmulti = krdc->multi_next; - rdc_u_info_t *umulti = &rdc_u_info[kmulti->index]; - - if (IS_ENABLED(umulti)) { - ASSERT(!(umulti->flags & RDC_PRIMARY)); - - if (!mutex_tryenter(&kmulti->group->lock)) { - rdc_many_exit(krdc); - goto retry_multi; - } - rdc_merge_bitmaps(krdc, kmulti); - mutex_exit(&kmulti->group->lock); - } - } - rdc_many_exit(krdc); - } - - rdc_group_enter(krdc); - - if (krdc->bitmap_write == 0) { - if (rdc_write_bitmap_fill(krdc) >= 0) - krdc->bitmap_write = -1; - } - - if (krdc->bitmap_write > 0) - (void) rdc_write_bitmap(krdc); - - urdc->bits_set = RDC_COUNT_BITMAP(krdc); - - rdc_group_exit(krdc); - - if (options & RDC_OPT_REVERSE) { - (void) _rdc_sync_event_notify(RDC_SYNC_START, - urdc->primary.file, urdc->group_name); - } - - /* Now set off the sync itself */ - - mutex_enter(&net_blk_lock); - if (nsc_create_process( - (void (*)(void *))_rdc_sync, (void *)krdc, FALSE)) { - mutex_exit(&net_blk_lock); - spcs_s_add(kstatus, RDC_ENOPROC); - /* - * We used to just return here, - * but we need to clear the AUXSYNCIP bit - * and there is a very small chance that - * someone may be waiting on the disk_status flag. - */ - rc = RDC_ENOPROC; - /* - * need the group lock held at failed. - */ - rdc_group_enter(krdc); - goto failed; - } - - mutex_enter(&rdc_conf_lock); - wakeup_busy(krdc); - busy = 0; - mutex_exit(&rdc_conf_lock); - - while (krdc->sync_done == 0) - cv_wait(&krdc->synccv, &net_blk_lock); - mutex_exit(&net_blk_lock); - - rdc_group_enter(krdc); - - if (krdc->sync_done == RDC_FAILED) { - char siztmp1[16]; - (void) spcs_s_inttostring( - urdc->sync_pos, siztmp1, sizeof (siztmp1), - 0); - spcs_s_add(kstatus, RDC_EFAIL, siztmp1); - rc = RDC_EFAIL; - } else - sync_completed = 1; - -failed: - /* - * We use this flag now to make halt_sync() wait for - * us to terminate and let us take the group lock. - */ - krdc->aux_state &= ~RDC_AUXSYNCIP; - if (krdc->disk_status == 1) { - krdc->disk_status = 0; - cv_broadcast(&krdc->haltcv); - } - -notstarted_unlock: - rdc_group_exit(krdc); - - if (sync_completed && (options & RDC_OPT_REVERSE)) { - (void) _rdc_sync_event_notify(RDC_SYNC_DONE, - urdc->primary.file, urdc->group_name); - } - -notstarted: - if (busy) { - mutex_enter(&rdc_conf_lock); - wakeup_busy(krdc); - mutex_exit(&rdc_conf_lock); - } - - return (rc); -} - -/* ARGSUSED */ -static int -_rdc_suspend(rdc_k_info_t *krdc, rdc_set_t *rdc_set, spcs_s_info_t kstatus) -{ - rdc_u_info_t *urdc = &rdc_u_info[krdc->index]; - rdc_if_t *ip; - int index = krdc->index; - - ASSERT(krdc->group != NULL); - rdc_group_enter(krdc); -#ifdef DEBUG - ASSERT(rdc_check(krdc, rdc_set) == 0); -#else - if (rdc_check(krdc, rdc_set)) { - rdc_group_exit(krdc); - spcs_s_add(kstatus, RDC_EALREADY, rdc_set->primary.file, - rdc_set->secondary.file); - return (RDC_EALREADY); - } -#endif - - if (rdc_get_vflags(urdc) & RDC_PRIMARY) { - halt_sync(krdc); - ASSERT(IS_ENABLED(urdc)); - } - - rdc_group_exit(krdc); - (void) rdc_unintercept(krdc); - -#ifdef DEBUG - cmn_err(CE_NOTE, "!SNDR: suspended %s %s", urdc->primary.file, - urdc->secondary.file); -#endif - - /* Configured but not enabled */ - ASSERT(IS_CONFIGURED(krdc) && !IS_ENABLED(urdc)); - - - if (IS_ASYNC(urdc) && !RDC_IS_DISKQ(krdc->group)) { - int tries = 2; /* in case of possibly stuck flusher threads */ -#ifdef DEBUG - net_queue *qp = &krdc->group->ra_queue; -#endif - do { - if (!krdc->group->rdc_writer) - (void) rdc_writer(krdc->index); - - (void) rdc_drain_queue(krdc->index); - - } while (krdc->group->rdc_writer && tries--); - - /* ok, force it to happen... */ - if (rdc_drain_queue(krdc->index) != 0) { - do { - mutex_enter(&krdc->group->ra_queue.net_qlock); - krdc->group->asyncdis = 1; - cv_broadcast(&krdc->group->asyncqcv); - mutex_exit(&krdc->group->ra_queue.net_qlock); - cmn_err(CE_WARN, - "!SNDR: async I/O pending and not flushed " - "for %s during suspend", - urdc->primary.file); -#ifdef DEBUG - cmn_err(CE_WARN, - "!nitems: %" NSC_SZFMT " nblocks: %" - NSC_SZFMT " head: 0x%p tail: 0x%p", - qp->nitems, qp->blocks, - (void *)qp->net_qhead, - (void *)qp->net_qtail); -#endif - } while (krdc->group->rdc_thrnum > 0); - } - } - - mutex_enter(&rdc_conf_lock); - ip = krdc->intf; - krdc->intf = 0; - - if (ip) { - rdc_remove_from_if(ip); - } - - mutex_exit(&rdc_conf_lock); - - rdc_group_enter(krdc); - - /* Configured but not enabled */ - ASSERT(IS_CONFIGURED(krdc) && !IS_ENABLED(urdc)); - - rdc_group_exit(krdc); - /* Must not hold group lock during this function */ - while (rdc_dump_alloc_bufs_cd(krdc->index) == EAGAIN) - delay(2); - rdc_group_enter(krdc); - - /* Don't rdc_clear_state, unlike _rdc_disable */ - - rdc_free_bitmap(krdc, RDC_CMD_SUSPEND); - rdc_close_bitmap(krdc); - - rdc_dev_close(krdc); - rdc_close_direct(krdc); - - /* Configured but not enabled */ - ASSERT(IS_CONFIGURED(krdc) && !IS_ENABLED(urdc)); - - rdc_group_exit(krdc); - - /* - * we should now unregister the queue, with no conflicting - * locks held. This is the last(only) member of the group - */ - if (krdc->group && RDC_IS_DISKQ(krdc->group) && - krdc->group->count == 1) { /* stop protecting queue */ - rdc_unintercept_diskq(krdc->group); - } - - mutex_enter(&rdc_conf_lock); - - /* Configured but not enabled */ - ASSERT(IS_CONFIGURED(krdc) && !IS_ENABLED(urdc)); - - wait_busy(krdc); - - if (IS_MANY(krdc) || IS_MULTI(krdc)) - remove_from_many(krdc); - - remove_from_group(krdc); - - krdc->remote_index = -1; - ASSERT(krdc->type_flag & RDC_CONFIGURED); - ASSERT(krdc->type_flag & RDC_DISABLEPEND); - krdc->type_flag = 0; -#ifdef DEBUG - if (krdc->dcio_bitmap) - cmn_err(CE_WARN, "!_rdc_suspend: possible mem leak, " - "dcio_bitmap"); -#endif - krdc->dcio_bitmap = NULL; - krdc->bitmap_ref = NULL; - krdc->bitmap_size = 0; - krdc->maxfbas = 0; - krdc->bitmap_write = 0; - krdc->disk_status = 0; - rdc_destroy_svinfo(krdc->lsrv); - krdc->lsrv = NULL; - krdc->multi_next = NULL; - - rdc_u_init(urdc); - - mutex_exit(&rdc_conf_lock); - rdc_kstat_delete(index); - return (0); -} - -static int -rdc_suspend(rdc_config_t *uparms, spcs_s_info_t kstatus) -{ - rdc_k_info_t *krdc; - int index; - int rc; - - mutex_enter(&rdc_conf_lock); - - index = rdc_lookup_byname(uparms->rdc_set); - if (index >= 0) - krdc = &rdc_k_info[index]; - if (index < 0 || (krdc->type_flag & RDC_DISABLEPEND)) { - mutex_exit(&rdc_conf_lock); - spcs_s_add(kstatus, RDC_EALREADY, uparms->rdc_set->primary.file, - uparms->rdc_set->secondary.file); - return (RDC_EALREADY); - } - - krdc->type_flag |= RDC_DISABLEPEND; - wait_busy(krdc); - if (krdc->type_flag == 0) { - /* A resume or enable failed */ - mutex_exit(&rdc_conf_lock); - spcs_s_add(kstatus, RDC_EALREADY, uparms->rdc_set->primary.file, - uparms->rdc_set->secondary.file); - return (RDC_EALREADY); - } - mutex_exit(&rdc_conf_lock); - - rc = _rdc_suspend(krdc, uparms->rdc_set, kstatus); - return (rc); -} - -static int -_rdc_resume(rdc_set_t *rdc_set, int options, spcs_s_info_t kstatus) -{ - int index; - char *rhost; - struct netbuf *addrp; - rdc_k_info_t *krdc; - rdc_u_info_t *urdc; - rdc_srv_t *svp = NULL; - char *local_file; - char *local_bitmap; - int rc, rc1; - nsc_size_t maxfbas; - rdc_group_t *grp; - - if ((rdc_set->primary.intf[0] == 0) || - (rdc_set->primary.addr.len == 0) || - (rdc_set->primary.file[0] == 0) || - (rdc_set->primary.bitmap[0] == 0) || - (rdc_set->secondary.intf[0] == 0) || - (rdc_set->secondary.addr.len == 0) || - (rdc_set->secondary.file[0] == 0) || - (rdc_set->secondary.bitmap[0] == 0)) { - spcs_s_add(kstatus, RDC_EEMPTY); - return (RDC_EEMPTY); - } - - /* Next check there aren't any enabled rdc sets which match. */ - - mutex_enter(&rdc_conf_lock); - - if (rdc_lookup_byname(rdc_set) >= 0) { - mutex_exit(&rdc_conf_lock); - spcs_s_add(kstatus, RDC_EENABLED, rdc_set->primary.intf, - rdc_set->primary.file, rdc_set->secondary.intf, - rdc_set->secondary.file); - return (RDC_EENABLED); - } - - if (rdc_lookup_many2one(rdc_set) >= 0) { - mutex_exit(&rdc_conf_lock); - spcs_s_add(kstatus, RDC_EMANY2ONE, rdc_set->primary.intf, - rdc_set->primary.file, rdc_set->secondary.intf, - rdc_set->secondary.file); - return (RDC_EMANY2ONE); - } - - if (rdc_set->netconfig->knc_proto == NULL) { - mutex_exit(&rdc_conf_lock); - spcs_s_add(kstatus, RDC_ENETCONFIG); - return (RDC_ENETCONFIG); - } - - if (rdc_set->primary.addr.len == 0) { - mutex_exit(&rdc_conf_lock); - spcs_s_add(kstatus, RDC_ENETBUF, rdc_set->primary.file); - return (RDC_ENETBUF); - } - - if (rdc_set->secondary.addr.len == 0) { - mutex_exit(&rdc_conf_lock); - spcs_s_add(kstatus, RDC_ENETBUF, rdc_set->secondary.file); - return (RDC_ENETBUF); - } - - /* Check that the local data volume isn't in use as a bitmap */ - if (options & RDC_OPT_PRIMARY) - local_file = rdc_set->primary.file; - else - local_file = rdc_set->secondary.file; - if (rdc_lookup_bitmap(local_file) >= 0) { - mutex_exit(&rdc_conf_lock); - spcs_s_add(kstatus, RDC_EVOLINUSE, local_file); - return (RDC_EVOLINUSE); - } - - /* check that the secondary data volume isn't in use */ - if (!(options & RDC_OPT_PRIMARY)) { - local_file = rdc_set->secondary.file; - if (rdc_lookup_secondary(local_file) >= 0) { - mutex_exit(&rdc_conf_lock); - spcs_s_add(kstatus, RDC_EVOLINUSE, local_file); - return (RDC_EVOLINUSE); - } - } - - /* Check that the bitmap isn't in use as a data volume */ - if (options & RDC_OPT_PRIMARY) - local_bitmap = rdc_set->primary.bitmap; - else - local_bitmap = rdc_set->secondary.bitmap; - if (rdc_lookup_configured(local_bitmap) >= 0) { - mutex_exit(&rdc_conf_lock); - spcs_s_add(kstatus, RDC_EBMPINUSE, local_bitmap); - return (RDC_EBMPINUSE); - } - - /* Check that the bitmap isn't already in use as a bitmap */ - if (rdc_lookup_bitmap(local_bitmap) >= 0) { - mutex_exit(&rdc_conf_lock); - spcs_s_add(kstatus, RDC_EBMPINUSE, local_bitmap); - return (RDC_EBMPINUSE); - } - - /* Set urdc->volume_size */ - index = rdc_dev_open(rdc_set, options); - if (index < 0) { - mutex_exit(&rdc_conf_lock); - if (options & RDC_OPT_PRIMARY) - spcs_s_add(kstatus, RDC_EOPEN, rdc_set->primary.intf, - rdc_set->primary.file); - else - spcs_s_add(kstatus, RDC_EOPEN, rdc_set->secondary.intf, - rdc_set->secondary.file); - return (RDC_EOPEN); - } - - urdc = &rdc_u_info[index]; - krdc = &rdc_k_info[index]; - - /* copy relevant parts of rdc_set to urdc field by field */ - - (void) strncpy(urdc->primary.intf, rdc_set->primary.intf, - MAX_RDC_HOST_SIZE); - (void) strncpy(urdc->secondary.intf, rdc_set->secondary.intf, - MAX_RDC_HOST_SIZE); - - (void) strncpy(urdc->group_name, rdc_set->group_name, NSC_MAXPATH); - - dup_rdc_netbuf(&rdc_set->primary.addr, &urdc->primary.addr); - (void) strncpy(urdc->primary.file, rdc_set->primary.file, NSC_MAXPATH); - (void) strncpy(urdc->primary.bitmap, rdc_set->primary.bitmap, - NSC_MAXPATH); - - dup_rdc_netbuf(&rdc_set->secondary.addr, &urdc->secondary.addr); - (void) strncpy(urdc->secondary.file, rdc_set->secondary.file, - NSC_MAXPATH); - (void) strncpy(urdc->secondary.bitmap, rdc_set->secondary.bitmap, - NSC_MAXPATH); - (void) strncpy(urdc->disk_queue, rdc_set->disk_queue, NSC_MAXPATH); - urdc->setid = rdc_set->setid; - - if ((options & RDC_OPT_SYNC) && urdc->disk_queue[0]) { - mutex_exit(&rdc_conf_lock); - rdc_dev_close(krdc); - spcs_s_add(kstatus, RDC_EQWRONGMODE); - return (RDC_EQWRONGMODE); - } - - /* - * init flags now so that state left by failures in add_to_group() - * are preserved. - */ - rdc_init_flags(urdc); - - if ((rc1 = add_to_group(krdc, options, RDC_CMD_RESUME)) != 0) { - if (rc1 == RDC_EQNOADD) { /* something went wrong with queue */ - rdc_fail_diskq(krdc, RDC_WAIT, RDC_NOLOG); - /* don't return a failure here, continue with resume */ - - } else { /* some other group add failure */ - mutex_exit(&rdc_conf_lock); - rdc_dev_close(krdc); - spcs_s_add(kstatus, RDC_EGROUP, - rdc_set->primary.intf, rdc_set->primary.file, - rdc_set->secondary.intf, rdc_set->secondary.file, - rdc_set->group_name); - return (RDC_EGROUP); - } - } - - /* - * maxfbas was set in rdc_dev_open as primary's maxfbas. - * If diskq's maxfbas is smaller, then use diskq's. - */ - grp = krdc->group; - if (grp && RDC_IS_DISKQ(grp) && (grp->diskqfd != 0)) { - rc = _rdc_rsrv_diskq(grp); - if (RDC_SUCCESS(rc)) { - rc = nsc_maxfbas(grp->diskqfd, 0, &maxfbas); - if (rc == 0) { -#ifdef DEBUG - if (krdc->maxfbas != maxfbas) - cmn_err(CE_NOTE, - "!_rdc_resume: diskq maxfbas = %" - NSC_SZFMT ", primary maxfbas = %" - NSC_SZFMT, maxfbas, krdc->maxfbas); -#endif - krdc->maxfbas = min(krdc->maxfbas, - maxfbas); - } else { - cmn_err(CE_WARN, - "!_rdc_resume: diskq maxfbas failed (%d)", - rc); - } - _rdc_rlse_diskq(grp); - } else { - cmn_err(CE_WARN, - "!_rdc_resume: diskq reserve failed (%d)", rc); - } - } - - (void) strncpy(urdc->direct_file, rdc_set->direct_file, NSC_MAXPATH); - if ((options & RDC_OPT_PRIMARY) && rdc_set->direct_file[0]) { - if (rdc_open_direct(krdc) == NULL) - rdc_set_flags(urdc, RDC_FCAL_FAILED); - } - - krdc->many_next = krdc; - - ASSERT(krdc->type_flag == 0); - krdc->type_flag = RDC_CONFIGURED; - - if (options & RDC_OPT_PRIMARY) - rdc_set_flags(urdc, RDC_PRIMARY); - - if (options & RDC_OPT_ASYNC) - krdc->type_flag |= RDC_ASYNCMODE; - - set_busy(krdc); - - urdc->syshostid = rdc_set->syshostid; - - if (add_to_many(krdc) < 0) { - mutex_exit(&rdc_conf_lock); - - rdc_group_enter(krdc); - - spcs_s_add(kstatus, RDC_EMULTI); - rc = RDC_EMULTI; - goto fail; - } - - /* Configured but not enabled */ - ASSERT(IS_CONFIGURED(krdc) && !IS_ENABLED(urdc)); - - mutex_exit(&rdc_conf_lock); - - if (urdc->volume_size == 0) { - rdc_many_enter(krdc); - if (options & RDC_OPT_PRIMARY) - rdc_set_mflags(urdc, RDC_RSYNC_NEEDED); - else - rdc_set_flags(urdc, RDC_SYNC_NEEDED); - rdc_set_flags(urdc, RDC_VOL_FAILED); - rdc_many_exit(krdc); - } - - rdc_group_enter(krdc); - - /* Configured but not enabled */ - ASSERT(IS_CONFIGURED(krdc) && !IS_ENABLED(urdc)); - - /* - * The rdc set is configured but not yet enabled. Other operations must - * ignore this set until it is enabled. - */ - - urdc->sync_pos = 0; - - /* Set tunable defaults, we'll pick up tunables from the header later */ - - urdc->maxqfbas = rdc_maxthres_queue; - urdc->maxqitems = rdc_max_qitems; - urdc->autosync = 0; - urdc->asyncthr = rdc_asyncthr; - - urdc->netconfig = rdc_set->netconfig; - - if (options & RDC_OPT_PRIMARY) { - rhost = rdc_set->secondary.intf; - addrp = &rdc_set->secondary.addr; - } else { - rhost = rdc_set->primary.intf; - addrp = &rdc_set->primary.addr; - } - - if (options & RDC_OPT_ASYNC) - rdc_set_flags(urdc, RDC_ASYNC); - - svp = rdc_create_svinfo(rhost, addrp, urdc->netconfig); - if (svp == NULL) { - spcs_s_add(kstatus, ENOMEM); - rc = ENOMEM; - goto fail; - } - - urdc->netconfig = NULL; /* This will be no good soon */ - - /* Don't set krdc->intf here */ - rdc_kstat_create(index); - - /* if the bitmap resume isn't clean, it will clear queuing flag */ - - (void) rdc_resume_bitmap(krdc); - - if (RDC_IS_DISKQ(krdc->group)) { - disk_queue *q = &krdc->group->diskq; - if ((rc1 == RDC_EQNOADD) || - IS_QSTATE(q, RDC_QBADRESUME)) { - rdc_clr_flags(urdc, RDC_QUEUING); - RDC_ZERO_BITREF(krdc); - } - } - - if (krdc->lsrv == NULL) - krdc->lsrv = svp; - else { -#ifdef DEBUG - cmn_err(CE_WARN, "!_rdc_resume: krdc->lsrv already set: %p", - (void *) krdc->lsrv); -#endif - rdc_destroy_svinfo(svp); - } - svp = NULL; - - /* Configured but not enabled */ - ASSERT(IS_CONFIGURED(krdc) && !IS_ENABLED(urdc)); - - /* And finally */ - - krdc->remote_index = -1; - - /* Should we set the whole group logging? */ - rdc_set_flags(urdc, RDC_ENABLED | RDC_LOGGING); - - rdc_group_exit(krdc); - - if (rdc_intercept(krdc) != 0) { - rdc_group_enter(krdc); - rdc_clr_flags(urdc, RDC_ENABLED); - if (options & RDC_OPT_PRIMARY) - spcs_s_add(kstatus, RDC_EREGISTER, urdc->primary.file); - else - spcs_s_add(kstatus, RDC_EREGISTER, - urdc->secondary.file); -#ifdef DEBUG - cmn_err(CE_NOTE, "!nsc_register_path failed %s", - urdc->primary.file); -#endif - rc = RDC_EREGISTER; - goto bmpfail; - } -#ifdef DEBUG - cmn_err(CE_NOTE, "!SNDR: resumed %s %s", urdc->primary.file, - urdc->secondary.file); -#endif - - rdc_write_state(urdc); - - mutex_enter(&rdc_conf_lock); - wakeup_busy(krdc); - mutex_exit(&rdc_conf_lock); - - return (0); - -bmpfail: - if (options & RDC_OPT_PRIMARY) - spcs_s_add(kstatus, RDC_EBITMAP, urdc->primary.bitmap); - else - spcs_s_add(kstatus, RDC_EBITMAP, urdc->secondary.bitmap); - rc = RDC_EBITMAP; - if (rdc_get_vflags(urdc) & RDC_ENABLED) { - rdc_group_exit(krdc); - (void) rdc_unintercept(krdc); - rdc_group_enter(krdc); - } - -fail: - rdc_kstat_delete(index); - /* Don't unset krdc->intf here, unlike _rdc_enable */ - - /* Configured but not enabled */ - ASSERT(IS_CONFIGURED(krdc) && !IS_ENABLED(urdc)); - - rdc_dev_close(krdc); - rdc_close_direct(krdc); - rdc_destroy_svinfo(svp); - - /* Configured but not enabled */ - ASSERT(IS_CONFIGURED(krdc) && !IS_ENABLED(urdc)); - - rdc_group_exit(krdc); - - mutex_enter(&rdc_conf_lock); - - /* Configured but not enabled */ - ASSERT(IS_CONFIGURED(krdc) && !IS_ENABLED(urdc)); - - remove_from_group(krdc); - - if (IS_MANY(krdc) || IS_MULTI(krdc)) - remove_from_many(krdc); - - rdc_u_init(urdc); - - ASSERT(krdc->type_flag & RDC_CONFIGURED); - krdc->type_flag = 0; - wakeup_busy(krdc); - - mutex_exit(&rdc_conf_lock); - - return (rc); -} - -static int -rdc_resume(rdc_config_t *uparms, spcs_s_info_t kstatus) -{ - char itmp[10]; - int rc; - - if (!(uparms->options & RDC_OPT_SYNC) && - !(uparms->options & RDC_OPT_ASYNC)) { - (void) spcs_s_inttostring( - uparms->options, itmp, sizeof (itmp), 1); - spcs_s_add(kstatus, RDC_EEINVAL, itmp); - rc = RDC_EEINVAL; - goto done; - } - - if (!(uparms->options & RDC_OPT_PRIMARY) && - !(uparms->options & RDC_OPT_SECONDARY)) { - (void) spcs_s_inttostring( - uparms->options, itmp, sizeof (itmp), 1); - spcs_s_add(kstatus, RDC_EEINVAL, itmp); - rc = RDC_EEINVAL; - goto done; - } - - rc = _rdc_resume(uparms->rdc_set, uparms->options, kstatus); -done: - return (rc); -} - -/* - * if rdc_group_log is called because a volume has failed, - * we must disgard the queue to preserve write ordering. - * later perhaps, we can keep queuing, but we would have to - * rewrite the i/o path to acommodate that. currently, if there - * is a volume failure, the buffers are satisfied remotely and - * there is no way to satisfy them from the current diskq config - * phew, if we do that.. it will be difficult - */ -int -rdc_can_queue(rdc_k_info_t *krdc) -{ - rdc_k_info_t *p; - rdc_u_info_t *q; - - for (p = krdc->group_next; ; p = p->group_next) { - q = &rdc_u_info[p->index]; - if (IS_STATE(q, RDC_VOL_FAILED)) - return (0); - if (p == krdc) - break; - } - return (1); -} - -/* - * wait here, until all in flight async i/o's have either - * finished or failed. Avoid the race with r_net_state() - * which tells remote end to log. - */ -void -rdc_inflwait(rdc_group_t *grp) -{ - int bail = RDC_CLNT_TMOUT * 2; /* to include retries */ - volatile int *inflitems; - - if (RDC_IS_DISKQ(grp)) - inflitems = (&(grp->diskq.inflitems)); - else - inflitems = (&(grp->ra_queue.inflitems)); - - while (*inflitems && (--bail > 0)) - delay(HZ); -} - -void -rdc_group_log(rdc_k_info_t *krdc, int flag, char *why) -{ - rdc_u_info_t *urdc = &rdc_u_info[krdc->index]; - rdc_k_info_t *p; - rdc_u_info_t *q; - int do_group; - int sm, um, md; - disk_queue *dq; - - void (*flag_op)(rdc_u_info_t *urdc, int flag); - - ASSERT(MUTEX_HELD(&krdc->group->lock)); - - if (!IS_ENABLED(urdc)) - return; - - rdc_many_enter(krdc); - - if ((flag & RDC_QUEUING) && (!IS_STATE(urdc, RDC_SYNCING)) && - (rdc_can_queue(krdc))) { - flag_op = rdc_set_flags; /* keep queuing, link error */ - flag &= ~RDC_FLUSH; - } else { - flag_op = rdc_clr_flags; /* stop queuing, user request */ - } - - do_group = 1; - if (!(rdc_get_vflags(urdc) & RDC_PRIMARY)) - do_group = 0; - else if ((urdc->group_name[0] == 0) || - (rdc_get_vflags(urdc) & RDC_LOGGING) || - (rdc_get_vflags(urdc) & RDC_SYNCING)) - do_group = 0; - if (do_group) { - for (p = krdc->group_next; p != krdc; p = p->group_next) { - q = &rdc_u_info[p->index]; - if (!IS_ENABLED(q)) - continue; - if ((rdc_get_vflags(q) & RDC_LOGGING) || - (rdc_get_vflags(q) & RDC_SYNCING)) { - do_group = 0; - break; - } - } - } - if (!do_group && (flag & RDC_FORCE_GROUP)) - do_group = 1; - - rdc_many_exit(krdc); - dq = &krdc->group->diskq; - if (do_group) { -#ifdef DEBUG - cmn_err(CE_NOTE, "!SNDR:Group point-in-time for grp: %s %s:%s", - urdc->group_name, urdc->primary.intf, urdc->secondary.intf); -#endif - DTRACE_PROBE(rdc_diskq_group_PIT); - - /* Set group logging at the same PIT under rdc_many_lock */ - rdc_many_enter(krdc); - rdc_set_flags_log(urdc, RDC_LOGGING, why); - if (RDC_IS_DISKQ(krdc->group)) - flag_op(urdc, RDC_QUEUING); - for (p = krdc->group_next; p != krdc; p = p->group_next) { - q = &rdc_u_info[p->index]; - if (!IS_ENABLED(q)) - continue; - rdc_set_flags_log(q, RDC_LOGGING, - "consistency group member following leader"); - if (RDC_IS_DISKQ(p->group)) - flag_op(q, RDC_QUEUING); - } - - rdc_many_exit(krdc); - - /* - * This can cause the async threads to fail, - * which in turn will call rdc_group_log() - * again. Release the lock and re-aquire. - */ - rdc_group_exit(krdc); - - while (rdc_dump_alloc_bufs_cd(krdc->index) == EAGAIN) - delay(2); - if (!RDC_IS_DISKQ(krdc->group)) - RDC_ZERO_BITREF(krdc); - - rdc_inflwait(krdc->group); - - /* - * a little lazy, but neat. recall dump_alloc_bufs to - * ensure that the queue pointers & seq are reset properly - * after we have waited for inflight stuff - */ - while (rdc_dump_alloc_bufs_cd(krdc->index) == EAGAIN) - delay(2); - - rdc_group_enter(krdc); - if (RDC_IS_DISKQ(krdc->group) && (!(flag & RDC_QUEUING))) { - /* fail or user request */ - RDC_ZERO_BITREF(krdc); - mutex_enter(&krdc->group->diskq.disk_qlock); - rdc_init_diskq_header(krdc->group, - &krdc->group->diskq.disk_hdr); - SET_QNXTIO(dq, QHEAD(dq)); - mutex_exit(&krdc->group->diskq.disk_qlock); - } - - if (flag & RDC_ALLREMOTE) { - /* Tell other node to start logging */ - if (krdc->lsrv && krdc->intf && !krdc->intf->if_down) - (void) rdc_net_state(krdc->index, - CCIO_ENABLELOG); - } - - if (flag & (RDC_ALLREMOTE | RDC_OTHERREMOTE)) { - rdc_many_enter(krdc); - for (p = krdc->group_next; p != krdc; - p = p->group_next) { - if (p->lsrv && krdc->intf && - !krdc->intf->if_down) { - (void) rdc_net_state(p->index, - CCIO_ENABLELOG); - } - } - rdc_many_exit(krdc); - } - - rdc_write_state(urdc); - for (p = krdc->group_next; p != krdc; p = p->group_next) { - q = &rdc_u_info[p->index]; - if (!IS_ENABLED(q)) - continue; - rdc_write_state(q); - } - } else { - /* No point in time is possible, just deal with single set */ - - if (rdc_get_vflags(urdc) & RDC_PRIMARY) { - halt_sync(krdc); - } else { - if (rdc_net_getstate(krdc, &sm, &um, &md, TRUE) < 0) { - rdc_clr_flags(urdc, RDC_SYNCING); - rdc_set_flags_log(urdc, RDC_LOGGING, - "failed to read remote state"); - - rdc_write_state(urdc); - while (rdc_dump_alloc_bufs_cd(krdc->index) - == EAGAIN) - delay(2); - if ((RDC_IS_DISKQ(krdc->group)) && - (!(flag & RDC_QUEUING))) { /* fail! */ - mutex_enter(QLOCK(dq)); - rdc_init_diskq_header(krdc->group, - &krdc->group->diskq.disk_hdr); - SET_QNXTIO(dq, QHEAD(dq)); - mutex_exit(QLOCK(dq)); - } - - return; - } - } - - if (rdc_get_vflags(urdc) & RDC_SYNCING) - return; - - if (RDC_IS_DISKQ(krdc->group)) - flag_op(urdc, RDC_QUEUING); - - if ((RDC_IS_DISKQ(krdc->group)) && - (!(flag & RDC_QUEUING))) { /* fail! */ - RDC_ZERO_BITREF(krdc); - mutex_enter(QLOCK(dq)); - rdc_init_diskq_header(krdc->group, - &krdc->group->diskq.disk_hdr); - SET_QNXTIO(dq, QHEAD(dq)); - mutex_exit(QLOCK(dq)); - } - - if (!(rdc_get_vflags(urdc) & RDC_LOGGING)) { - rdc_set_flags_log(urdc, RDC_LOGGING, why); - - rdc_write_state(urdc); - - while (rdc_dump_alloc_bufs_cd(krdc->index) == EAGAIN) - delay(2); - if (!RDC_IS_DISKQ(krdc->group)) - RDC_ZERO_BITREF(krdc); - - rdc_inflwait(krdc->group); - /* - * a little lazy, but neat. recall dump_alloc_bufs to - * ensure that the queue pointers & seq are reset - * properly after we have waited for inflight stuff - */ - while (rdc_dump_alloc_bufs_cd(krdc->index) == EAGAIN) - delay(2); - - if (flag & RDC_ALLREMOTE) { - /* Tell other node to start logging */ - if (krdc->lsrv && krdc->intf && - !krdc->intf->if_down) { - (void) rdc_net_state(krdc->index, - CCIO_ENABLELOG); - } - } - } - } - /* - * just in case any threads were in flight during log cleanup - */ - if (RDC_IS_DISKQ(krdc->group)) { - mutex_enter(QLOCK(dq)); - cv_broadcast(&dq->qfullcv); - mutex_exit(QLOCK(dq)); - } -} - -static int -_rdc_log(rdc_k_info_t *krdc, rdc_set_t *rdc_set, spcs_s_info_t kstatus) -{ - rdc_u_info_t *urdc = &rdc_u_info[krdc->index]; - rdc_srv_t *svp; - - rdc_group_enter(krdc); - if (rdc_check(krdc, rdc_set)) { - rdc_group_exit(krdc); - spcs_s_add(kstatus, RDC_EALREADY, rdc_set->primary.file, - rdc_set->secondary.file); - return (RDC_EALREADY); - } - - svp = krdc->lsrv; - if (rdc_get_vflags(urdc) & RDC_PRIMARY) - krdc->intf = rdc_add_to_if(svp, &(urdc->primary.addr), - &(urdc->secondary.addr), 1); - else - krdc->intf = rdc_add_to_if(svp, &(urdc->secondary.addr), - &(urdc->primary.addr), 0); - - if (!krdc->intf) { - rdc_group_exit(krdc); - spcs_s_add(kstatus, RDC_EADDTOIF, urdc->primary.intf, - urdc->secondary.intf); - return (RDC_EADDTOIF); - } - - rdc_group_log(krdc, RDC_FLUSH | RDC_ALLREMOTE, NULL); - - if (rdc_get_vflags(urdc) & RDC_SYNCING) { - rdc_group_exit(krdc); - spcs_s_add(kstatus, RDC_ESYNCING, urdc->primary.file); - return (RDC_ESYNCING); - } - - rdc_group_exit(krdc); - - return (0); -} - -static int -rdc_log(rdc_config_t *uparms, spcs_s_info_t kstatus) -{ - rdc_k_info_t *krdc; - int rc = 0; - int index; - - mutex_enter(&rdc_conf_lock); - index = rdc_lookup_byname(uparms->rdc_set); - if (index >= 0) - krdc = &rdc_k_info[index]; - if (index < 0 || (krdc->type_flag & RDC_DISABLEPEND)) { - mutex_exit(&rdc_conf_lock); - spcs_s_add(kstatus, RDC_EALREADY, uparms->rdc_set->primary.file, - uparms->rdc_set->secondary.file); - return (RDC_EALREADY); - } - - set_busy(krdc); - if (krdc->type_flag == 0) { - /* A resume or enable failed */ - wakeup_busy(krdc); - mutex_exit(&rdc_conf_lock); - spcs_s_add(kstatus, RDC_EALREADY, uparms->rdc_set->primary.file, - uparms->rdc_set->secondary.file); - return (RDC_EALREADY); - } - mutex_exit(&rdc_conf_lock); - - rc = _rdc_log(krdc, uparms->rdc_set, kstatus); - - mutex_enter(&rdc_conf_lock); - wakeup_busy(krdc); - mutex_exit(&rdc_conf_lock); - - return (rc); -} - - -static int -rdc_wait(rdc_config_t *uparms, spcs_s_info_t kstatus) -{ - rdc_k_info_t *krdc; - rdc_u_info_t *urdc; - int index; - int need_check = 0; - - mutex_enter(&rdc_conf_lock); - index = rdc_lookup_byname(uparms->rdc_set); - if (index >= 0) - krdc = &rdc_k_info[index]; - if (index < 0 || (krdc->type_flag & RDC_DISABLEPEND)) { - mutex_exit(&rdc_conf_lock); - spcs_s_add(kstatus, RDC_EALREADY, uparms->rdc_set->primary.file, - uparms->rdc_set->secondary.file); - return (RDC_EALREADY); - } - - urdc = &rdc_u_info[index]; - if (!(rdc_get_vflags(urdc) & RDC_PRIMARY)) { - mutex_exit(&rdc_conf_lock); - return (0); - } - - set_busy(krdc); - if (krdc->type_flag == 0) { - /* A resume or enable failed */ - wakeup_busy(krdc); - mutex_exit(&rdc_conf_lock); - spcs_s_add(kstatus, RDC_EALREADY, uparms->rdc_set->primary.file, - uparms->rdc_set->secondary.file); - return (RDC_EALREADY); - } - mutex_exit(&rdc_conf_lock); - - rdc_group_enter(krdc); - if (rdc_check(krdc, uparms->rdc_set)) { - rdc_group_exit(krdc); - mutex_enter(&rdc_conf_lock); - wakeup_busy(krdc); - mutex_exit(&rdc_conf_lock); - spcs_s_add(kstatus, RDC_EALREADY, uparms->rdc_set->primary.file, - uparms->rdc_set->secondary.file); - return (RDC_EALREADY); - } - - if ((rdc_get_vflags(urdc) & (RDC_SYNCING | RDC_PRIMARY)) != - (RDC_SYNCING | RDC_PRIMARY)) { - rdc_group_exit(krdc); - mutex_enter(&rdc_conf_lock); - wakeup_busy(krdc); - mutex_exit(&rdc_conf_lock); - return (0); - } - if (rdc_get_vflags(urdc) & RDC_SYNCING) { - need_check = 1; - } - rdc_group_exit(krdc); - - mutex_enter(&net_blk_lock); - - mutex_enter(&rdc_conf_lock); - wakeup_busy(krdc); - mutex_exit(&rdc_conf_lock); - - (void) cv_wait_sig(&krdc->synccv, &net_blk_lock); - - mutex_exit(&net_blk_lock); - if (need_check) { - if (krdc->sync_done == RDC_COMPLETED) { - return (0); - } else if (krdc->sync_done == RDC_FAILED) { - return (EIO); - } - } - return (0); -} - - -static int -rdc_health(rdc_config_t *uparms, spcs_s_info_t kstatus, int *rvp) -{ - rdc_k_info_t *krdc; - rdc_u_info_t *urdc; - int rc = 0; - int index; - - mutex_enter(&rdc_conf_lock); - index = rdc_lookup_byname(uparms->rdc_set); - if (index >= 0) - krdc = &rdc_k_info[index]; - if (index < 0 || (krdc->type_flag & RDC_DISABLEPEND)) { - mutex_exit(&rdc_conf_lock); - spcs_s_add(kstatus, RDC_EALREADY, uparms->rdc_set->primary.file, - uparms->rdc_set->secondary.file); - return (RDC_EALREADY); - } - - set_busy(krdc); - if (krdc->type_flag == 0) { - /* A resume or enable failed */ - wakeup_busy(krdc); - mutex_exit(&rdc_conf_lock); - spcs_s_add(kstatus, RDC_EALREADY, uparms->rdc_set->primary.file, - uparms->rdc_set->secondary.file); - return (RDC_EALREADY); - } - - mutex_exit(&rdc_conf_lock); - - rdc_group_enter(krdc); - if (rdc_check(krdc, uparms->rdc_set)) { - rdc_group_exit(krdc); - spcs_s_add(kstatus, RDC_EALREADY, uparms->rdc_set->primary.file, - uparms->rdc_set->secondary.file); - rc = RDC_EALREADY; - goto done; - } - - urdc = &rdc_u_info[index]; - if (rdc_isactive_if(&(urdc->primary.addr), &(urdc->secondary.addr))) - *rvp = RDC_ACTIVE; - else - *rvp = RDC_INACTIVE; - - rdc_group_exit(krdc); - -done: - mutex_enter(&rdc_conf_lock); - wakeup_busy(krdc); - mutex_exit(&rdc_conf_lock); - - return (rc); -} - - -static int -rdc_reconfig(rdc_config_t *uparms, spcs_s_info_t kstatus) -{ - rdc_k_info_t *krdc; - rdc_u_info_t *urdc; - int rc = -2; - int index; - - mutex_enter(&rdc_conf_lock); - index = rdc_lookup_byname(uparms->rdc_set); - if (index >= 0) - krdc = &rdc_k_info[index]; - if (index < 0 || (krdc->type_flag & RDC_DISABLEPEND)) { - mutex_exit(&rdc_conf_lock); - spcs_s_add(kstatus, RDC_EALREADY, uparms->rdc_set->primary.file, - uparms->rdc_set->secondary.file); - return (RDC_EALREADY); - } - - urdc = &rdc_u_info[index]; - set_busy(krdc); - if (krdc->type_flag == 0) { - /* A resume or enable failed */ - wakeup_busy(krdc); - mutex_exit(&rdc_conf_lock); - spcs_s_add(kstatus, RDC_EALREADY, uparms->rdc_set->primary.file, - uparms->rdc_set->secondary.file); - return (RDC_EALREADY); - } - - mutex_exit(&rdc_conf_lock); - - rdc_group_enter(krdc); - if (rdc_check(krdc, uparms->rdc_set)) { - rdc_group_exit(krdc); - spcs_s_add(kstatus, RDC_EALREADY, uparms->rdc_set->primary.file, - uparms->rdc_set->secondary.file); - rc = RDC_EALREADY; - goto done; - } - if ((rdc_get_vflags(urdc) & RDC_BMP_FAILED) && (krdc->bitmapfd)) - (void) rdc_reset_bitmap(krdc); - - /* Move to a new bitmap if necessary */ - if (strncmp(urdc->primary.bitmap, uparms->rdc_set->primary.bitmap, - NSC_MAXPATH) != 0) { - if (rdc_get_vflags(urdc) & RDC_PRIMARY) { - rc = rdc_move_bitmap(krdc, - uparms->rdc_set->primary.bitmap); - } else { - (void) strncpy(urdc->primary.bitmap, - uparms->rdc_set->primary.bitmap, NSC_MAXPATH); - /* simulate a succesful rdc_move_bitmap */ - rc = 0; - } - } - if (strncmp(urdc->secondary.bitmap, uparms->rdc_set->secondary.bitmap, - NSC_MAXPATH) != 0) { - if (rdc_get_vflags(urdc) & RDC_PRIMARY) { - (void) strncpy(urdc->secondary.bitmap, - uparms->rdc_set->secondary.bitmap, NSC_MAXPATH); - /* simulate a succesful rdc_move_bitmap */ - rc = 0; - } else { - rc = rdc_move_bitmap(krdc, - uparms->rdc_set->secondary.bitmap); - } - } - if (rc == -1) { - rdc_group_exit(krdc); - spcs_s_add(kstatus, RDC_EBMPRECONFIG, - uparms->rdc_set->secondary.intf, - uparms->rdc_set->secondary.file); - rc = RDC_EBMPRECONFIG; - goto done; - } - - /* - * At this point we fail any other type of reconfig - * if not in logging mode and we did not do a bitmap reconfig - */ - - if (!(rdc_get_vflags(urdc) & RDC_LOGGING) && rc == -2) { - /* no other changes possible unless logging */ - rdc_group_exit(krdc); - spcs_s_add(kstatus, RDC_ENOTLOGGING, - uparms->rdc_set->primary.intf, - uparms->rdc_set->primary.file, - uparms->rdc_set->secondary.intf, - uparms->rdc_set->secondary.file); - rc = RDC_ENOTLOGGING; - goto done; - } - rc = 0; - /* Change direct file if necessary */ - if ((rdc_get_vflags(urdc) & RDC_PRIMARY) && - strncmp(urdc->direct_file, uparms->rdc_set->direct_file, - NSC_MAXPATH)) { - if (!(rdc_get_vflags(urdc) & RDC_LOGGING)) { - rdc_group_exit(krdc); - goto notlogging; - } - rdc_close_direct(krdc); - (void) strncpy(urdc->direct_file, uparms->rdc_set->direct_file, - NSC_MAXPATH); - - if (urdc->direct_file[0]) { - if (rdc_open_direct(krdc) == NULL) - rdc_set_flags(urdc, RDC_FCAL_FAILED); - else - rdc_clr_flags(urdc, RDC_FCAL_FAILED); - } - } - - rdc_group_exit(krdc); - - /* Change group if necessary */ - if (strncmp(urdc->group_name, uparms->rdc_set->group_name, - NSC_MAXPATH) != 0) { - char orig_group[NSC_MAXPATH]; - if (!(rdc_get_vflags(urdc) & RDC_LOGGING)) - goto notlogging; - mutex_enter(&rdc_conf_lock); - - (void) strncpy(orig_group, urdc->group_name, NSC_MAXPATH); - (void) strncpy(urdc->group_name, uparms->rdc_set->group_name, - NSC_MAXPATH); - - rc = change_group(krdc, uparms->options); - if (rc == RDC_EQNOADD) { - mutex_exit(&rdc_conf_lock); - spcs_s_add(kstatus, RDC_EQNOADD, - uparms->rdc_set->disk_queue); - goto done; - } else if (rc < 0) { - (void) strncpy(urdc->group_name, orig_group, - NSC_MAXPATH); - mutex_exit(&rdc_conf_lock); - spcs_s_add(kstatus, RDC_EGROUP, - urdc->primary.intf, urdc->primary.file, - urdc->secondary.intf, urdc->secondary.file, - uparms->rdc_set->group_name); - rc = RDC_EGROUP; - goto done; - } - - mutex_exit(&rdc_conf_lock); - - if (rc >= 0) { - if (!(rdc_get_vflags(urdc) & RDC_LOGGING)) - goto notlogging; - if (uparms->options & RDC_OPT_ASYNC) { - mutex_enter(&rdc_conf_lock); - krdc->type_flag |= RDC_ASYNCMODE; - mutex_exit(&rdc_conf_lock); - if (uparms->options & RDC_OPT_PRIMARY) - krdc->bitmap_ref = - (uchar_t *)kmem_zalloc( - (krdc->bitmap_size * BITS_IN_BYTE * - BMAP_REF_PREF_SIZE), KM_SLEEP); - rdc_group_enter(krdc); - rdc_set_flags(urdc, RDC_ASYNC); - rdc_group_exit(krdc); - } else { - mutex_enter(&rdc_conf_lock); - krdc->type_flag &= ~RDC_ASYNCMODE; - mutex_exit(&rdc_conf_lock); - rdc_group_enter(krdc); - rdc_clr_flags(urdc, RDC_ASYNC); - rdc_group_exit(krdc); - if (krdc->bitmap_ref) { - kmem_free(krdc->bitmap_ref, - (krdc->bitmap_size * BITS_IN_BYTE * - BMAP_REF_PREF_SIZE)); - krdc->bitmap_ref = NULL; - } - } - } - } else { - if ((((uparms->options & RDC_OPT_ASYNC) == 0) && - ((krdc->type_flag & RDC_ASYNCMODE) != 0)) || - (((uparms->options & RDC_OPT_ASYNC) != 0) && - ((krdc->type_flag & RDC_ASYNCMODE) == 0))) { - if (!(rdc_get_vflags(urdc) & RDC_LOGGING)) - goto notlogging; - - if (krdc->group->count > 1) { - spcs_s_add(kstatus, RDC_EGROUPMODE); - rc = RDC_EGROUPMODE; - goto done; - } - } - - /* Switch sync/async if necessary */ - if (krdc->group->count == 1) { - /* Only member of group. Can change sync/async */ - if (((uparms->options & RDC_OPT_ASYNC) == 0) && - ((krdc->type_flag & RDC_ASYNCMODE) != 0)) { - if (!(rdc_get_vflags(urdc) & RDC_LOGGING)) - goto notlogging; - /* switch to sync */ - mutex_enter(&rdc_conf_lock); - krdc->type_flag &= ~RDC_ASYNCMODE; - if (RDC_IS_DISKQ(krdc->group)) { - krdc->group->flags &= ~RDC_DISKQUE; - krdc->group->flags |= RDC_MEMQUE; - rdc_unintercept_diskq(krdc->group); - mutex_enter(&krdc->group->diskqmutex); - rdc_close_diskq(krdc->group); - mutex_exit(&krdc->group->diskqmutex); - bzero(&urdc->disk_queue, - sizeof (urdc->disk_queue)); - } - mutex_exit(&rdc_conf_lock); - rdc_group_enter(krdc); - rdc_clr_flags(urdc, RDC_ASYNC); - rdc_group_exit(krdc); - if (krdc->bitmap_ref) { - kmem_free(krdc->bitmap_ref, - (krdc->bitmap_size * BITS_IN_BYTE * - BMAP_REF_PREF_SIZE)); - krdc->bitmap_ref = NULL; - } - } else if (((uparms->options & RDC_OPT_ASYNC) != 0) && - ((krdc->type_flag & RDC_ASYNCMODE) == 0)) { - if (!(rdc_get_vflags(urdc) & RDC_LOGGING)) - goto notlogging; - /* switch to async */ - mutex_enter(&rdc_conf_lock); - krdc->type_flag |= RDC_ASYNCMODE; - mutex_exit(&rdc_conf_lock); - if (uparms->options & RDC_OPT_PRIMARY) - krdc->bitmap_ref = - (uchar_t *)kmem_zalloc( - (krdc->bitmap_size * BITS_IN_BYTE * - BMAP_REF_PREF_SIZE), KM_SLEEP); - rdc_group_enter(krdc); - rdc_set_flags(urdc, RDC_ASYNC); - rdc_group_exit(krdc); - } - } - } - /* Reverse concept of primary and secondary */ - if ((uparms->options & RDC_OPT_REVERSE_ROLE) != 0) { - rdc_set_t rdc_set; - struct netbuf paddr, saddr; - - mutex_enter(&rdc_conf_lock); - - /* - * Disallow role reversal for advanced configurations - */ - - if (IS_MANY(krdc) || IS_MULTI(krdc)) { - mutex_exit(&rdc_conf_lock); - spcs_s_add(kstatus, RDC_EMASTER, urdc->primary.intf, - urdc->primary.file, urdc->secondary.intf, - urdc->secondary.file); - return (RDC_EMASTER); - } - bzero((void *) &rdc_set, sizeof (rdc_set_t)); - dup_rdc_netbuf(&urdc->primary.addr, &saddr); - dup_rdc_netbuf(&urdc->secondary.addr, &paddr); - free_rdc_netbuf(&urdc->primary.addr); - free_rdc_netbuf(&urdc->secondary.addr); - dup_rdc_netbuf(&saddr, &urdc->secondary.addr); - dup_rdc_netbuf(&paddr, &urdc->primary.addr); - free_rdc_netbuf(&paddr); - free_rdc_netbuf(&saddr); - /* copy primary parts of urdc to rdc_set field by field */ - (void) strncpy(rdc_set.primary.intf, urdc->primary.intf, - MAX_RDC_HOST_SIZE); - (void) strncpy(rdc_set.primary.file, urdc->primary.file, - NSC_MAXPATH); - (void) strncpy(rdc_set.primary.bitmap, urdc->primary.bitmap, - NSC_MAXPATH); - - /* Now overwrite urdc primary */ - (void) strncpy(urdc->primary.intf, urdc->secondary.intf, - MAX_RDC_HOST_SIZE); - (void) strncpy(urdc->primary.file, urdc->secondary.file, - NSC_MAXPATH); - (void) strncpy(urdc->primary.bitmap, urdc->secondary.bitmap, - NSC_MAXPATH); - - /* Now ovwewrite urdc secondary */ - (void) strncpy(urdc->secondary.intf, rdc_set.primary.intf, - MAX_RDC_HOST_SIZE); - (void) strncpy(urdc->secondary.file, rdc_set.primary.file, - NSC_MAXPATH); - (void) strncpy(urdc->secondary.bitmap, rdc_set.primary.bitmap, - NSC_MAXPATH); - - if (rdc_get_vflags(urdc) & RDC_PRIMARY) { - rdc_clr_flags(urdc, RDC_PRIMARY); - if (krdc->intf) { - krdc->intf->issecondary = 1; - krdc->intf->isprimary = 0; - krdc->intf->if_down = 1; - } - } else { - rdc_set_flags(urdc, RDC_PRIMARY); - if (krdc->intf) { - krdc->intf->issecondary = 0; - krdc->intf->isprimary = 1; - krdc->intf->if_down = 1; - } - } - - if ((rdc_get_vflags(urdc) & RDC_PRIMARY) && - ((krdc->type_flag & RDC_ASYNCMODE) != 0)) { - if (!krdc->bitmap_ref) - krdc->bitmap_ref = - (uchar_t *)kmem_zalloc((krdc->bitmap_size * - BITS_IN_BYTE * BMAP_REF_PREF_SIZE), - KM_SLEEP); - if (krdc->bitmap_ref == NULL) { - cmn_err(CE_WARN, - "!rdc_reconfig: bitmap_ref alloc %" - NSC_SZFMT " failed", - krdc->bitmap_size * BITS_IN_BYTE * - BMAP_REF_PREF_SIZE); - mutex_exit(&rdc_conf_lock); - return (-1); - } - } - - if ((rdc_get_vflags(urdc) & RDC_PRIMARY) && - (rdc_get_vflags(urdc) & RDC_SYNC_NEEDED)) { - /* Primary, so reverse sync needed */ - rdc_many_enter(krdc); - rdc_clr_flags(urdc, RDC_SYNC_NEEDED); - rdc_set_mflags(urdc, RDC_RSYNC_NEEDED); - rdc_many_exit(krdc); - } else if (rdc_get_vflags(urdc) & RDC_RSYNC_NEEDED) { - /* Secondary, so forward sync needed */ - rdc_many_enter(krdc); - rdc_clr_flags(urdc, RDC_RSYNC_NEEDED); - rdc_set_flags(urdc, RDC_SYNC_NEEDED); - rdc_many_exit(krdc); - } - - /* - * rewrite bitmap header - */ - rdc_write_state(urdc); - mutex_exit(&rdc_conf_lock); - } - -done: - mutex_enter(&rdc_conf_lock); - wakeup_busy(krdc); - mutex_exit(&rdc_conf_lock); - - return (rc); - -notlogging: - /* no other changes possible unless logging */ - mutex_enter(&rdc_conf_lock); - wakeup_busy(krdc); - mutex_exit(&rdc_conf_lock); - spcs_s_add(kstatus, RDC_ENOTLOGGING, urdc->primary.intf, - urdc->primary.file, urdc->secondary.intf, - urdc->secondary.file); - return (RDC_ENOTLOGGING); -} - -static int -rdc_reset(rdc_config_t *uparms, spcs_s_info_t kstatus) -{ - rdc_k_info_t *krdc; - rdc_u_info_t *urdc; - int rc = 0; - int index; - int cleared_error = 0; - - mutex_enter(&rdc_conf_lock); - index = rdc_lookup_byname(uparms->rdc_set); - if (index >= 0) - krdc = &rdc_k_info[index]; - if (index < 0 || (krdc->type_flag & RDC_DISABLEPEND)) { - mutex_exit(&rdc_conf_lock); - spcs_s_add(kstatus, RDC_EALREADY, uparms->rdc_set->primary.file, - uparms->rdc_set->secondary.file); - return (RDC_EALREADY); - } - - urdc = &rdc_u_info[index]; - set_busy(krdc); - if (krdc->type_flag == 0) { - /* A resume or enable failed */ - wakeup_busy(krdc); - mutex_exit(&rdc_conf_lock); - spcs_s_add(kstatus, RDC_EALREADY, uparms->rdc_set->primary.file, - uparms->rdc_set->secondary.file); - return (RDC_EALREADY); - } - - mutex_exit(&rdc_conf_lock); - - rdc_group_enter(krdc); - if (rdc_check(krdc, uparms->rdc_set)) { - spcs_s_add(kstatus, RDC_EALREADY, uparms->rdc_set->primary.file, - uparms->rdc_set->secondary.file); - rc = RDC_EALREADY; - goto done; - } - - if ((rdc_get_vflags(urdc) & RDC_BMP_FAILED) && (krdc->bitmapfd)) { - if (rdc_reset_bitmap(krdc) == 0) - cleared_error++; - } - - /* Fix direct file if necessary */ - if ((rdc_get_vflags(urdc) & RDC_PRIMARY) && urdc->direct_file[0]) { - if (rdc_open_direct(krdc) == NULL) - rdc_set_flags(urdc, RDC_FCAL_FAILED); - else { - rdc_clr_flags(urdc, RDC_FCAL_FAILED); - cleared_error++; - } - } - - if ((rdc_get_vflags(urdc) & RDC_VOL_FAILED)) { - rdc_many_enter(krdc); - rdc_clr_flags(urdc, RDC_VOL_FAILED); - cleared_error++; - rdc_many_exit(krdc); - } - - if (cleared_error) { - /* cleared an error so we should be in logging mode */ - rdc_set_flags_log(urdc, RDC_LOGGING, "set reset"); - } - rdc_group_exit(krdc); - - if ((rdc_get_vflags(urdc) & RDC_DISKQ_FAILED)) - rdc_unfail_diskq(krdc); - -done: - mutex_enter(&rdc_conf_lock); - wakeup_busy(krdc); - mutex_exit(&rdc_conf_lock); - - return (rc); -} - - -static int -rdc_tunable(rdc_config_t *uparms, spcs_s_info_t kstatus) -{ - rdc_k_info_t *krdc; - rdc_u_info_t *urdc; - rdc_k_info_t *p; - rdc_u_info_t *q; - int rc = 0; - int index; - - mutex_enter(&rdc_conf_lock); - index = rdc_lookup_byname(uparms->rdc_set); - if (index >= 0) - krdc = &rdc_k_info[index]; - if (index < 0 || (krdc->type_flag & RDC_DISABLEPEND)) { - mutex_exit(&rdc_conf_lock); - spcs_s_add(kstatus, RDC_EALREADY, uparms->rdc_set->primary.file, - uparms->rdc_set->secondary.file); - return (RDC_EALREADY); - } - - urdc = &rdc_u_info[index]; - set_busy(krdc); - if (krdc->type_flag == 0) { - /* A resume or enable failed */ - wakeup_busy(krdc); - mutex_exit(&rdc_conf_lock); - spcs_s_add(kstatus, RDC_EALREADY, uparms->rdc_set->primary.file, - uparms->rdc_set->secondary.file); - return (RDC_EALREADY); - } - - mutex_exit(&rdc_conf_lock); - - rdc_group_enter(krdc); - if (rdc_check(krdc, uparms->rdc_set)) { - spcs_s_add(kstatus, RDC_EALREADY, uparms->rdc_set->primary.file, - uparms->rdc_set->secondary.file); - rc = RDC_EALREADY; - goto done; - } - - if (uparms->rdc_set->maxqfbas > 0) { - urdc->maxqfbas = uparms->rdc_set->maxqfbas; - rdc_write_state(urdc); - for (p = krdc->group_next; p != krdc; p = p->group_next) { - q = &rdc_u_info[p->index]; - q->maxqfbas = urdc->maxqfbas; - rdc_write_state(q); - } - } - - if (uparms->rdc_set->maxqitems > 0) { - urdc->maxqitems = uparms->rdc_set->maxqitems; - rdc_write_state(urdc); - for (p = krdc->group_next; p != krdc; p = p->group_next) { - q = &rdc_u_info[p->index]; - q->maxqitems = urdc->maxqitems; - rdc_write_state(q); - } - } - - if (uparms->options & RDC_OPT_SET_QNOBLOCK) { - disk_queue *que; - - if (!RDC_IS_DISKQ(krdc->group)) { - spcs_s_add(kstatus, RDC_EQNOQUEUE, urdc->primary.intf, - urdc->primary.file, urdc->secondary.intf, - urdc->secondary.file); - rc = RDC_EQNOQUEUE; - goto done; - } - - que = &krdc->group->diskq; - mutex_enter(QLOCK(que)); - SET_QSTATE(que, RDC_QNOBLOCK); - /* queue will fail if this fails */ - (void) rdc_stamp_diskq(krdc, 0, RDC_GROUP_LOCKED); - mutex_exit(QLOCK(que)); - - } - - if (uparms->options & RDC_OPT_CLR_QNOBLOCK) { - disk_queue *que; - - if (!RDC_IS_DISKQ(krdc->group)) { - spcs_s_add(kstatus, RDC_EQNOQUEUE, urdc->primary.intf, - urdc->primary.file, urdc->secondary.intf, - urdc->secondary.file); - rc = RDC_EQNOQUEUE; - goto done; - } - que = &krdc->group->diskq; - mutex_enter(QLOCK(que)); - CLR_QSTATE(que, RDC_QNOBLOCK); - /* queue will fail if this fails */ - (void) rdc_stamp_diskq(krdc, 0, RDC_GROUP_LOCKED); - mutex_exit(QLOCK(que)); - - } - if (uparms->rdc_set->asyncthr > 0) { - urdc->asyncthr = uparms->rdc_set->asyncthr; - rdc_write_state(urdc); - for (p = krdc->group_next; p != krdc; p = p->group_next) { - q = &rdc_u_info[p->index]; - q->asyncthr = urdc->asyncthr; - rdc_write_state(q); - } - } - - if (uparms->rdc_set->autosync >= 0) { - if (uparms->rdc_set->autosync == 0) - urdc->autosync = 0; - else - urdc->autosync = 1; - - rdc_write_state(urdc); - - /* Changed autosync, so update rest of the group */ - - for (p = krdc->group_next; p != krdc; p = p->group_next) { - q = &rdc_u_info[p->index]; - q->autosync = urdc->autosync; - rdc_write_state(q); - } - } - -done: - rdc_group_exit(krdc); - - mutex_enter(&rdc_conf_lock); - wakeup_busy(krdc); - mutex_exit(&rdc_conf_lock); - - return (rc); -} - -static int -rdc_status(void *arg, int mode, rdc_config_t *uparms, spcs_s_info_t kstatus) -{ - rdc_k_info_t *krdc; - rdc_u_info_t *urdc; - disk_queue *dqp; - int rc = 0; - int index; - char *ptr; - extern int rdc_status_copy32(const void *, void *, size_t, int); - - mutex_enter(&rdc_conf_lock); - index = rdc_lookup_byname(uparms->rdc_set); - if (index >= 0) - krdc = &rdc_k_info[index]; - if (index < 0 || (krdc->type_flag & RDC_DISABLEPEND)) { - mutex_exit(&rdc_conf_lock); - spcs_s_add(kstatus, RDC_EALREADY, uparms->rdc_set->primary.file, - uparms->rdc_set->secondary.file); - return (RDC_EALREADY); - } - - set_busy(krdc); - if (krdc->type_flag == 0) { - /* A resume or enable failed */ - wakeup_busy(krdc); - mutex_exit(&rdc_conf_lock); - spcs_s_add(kstatus, RDC_EALREADY, uparms->rdc_set->primary.file, - uparms->rdc_set->secondary.file); - return (RDC_EALREADY); - } - - mutex_exit(&rdc_conf_lock); - - rdc_group_enter(krdc); - if (rdc_check(krdc, uparms->rdc_set)) { - rdc_group_exit(krdc); - spcs_s_add(kstatus, RDC_EALREADY, uparms->rdc_set->primary.file, - uparms->rdc_set->secondary.file); - rc = RDC_EALREADY; - goto done; - } - - urdc = &rdc_u_info[index]; - - /* - * sneak out qstate in urdc->flags - * this is harmless because it's value is not used - * in urdc->flags. the real qstate is kept in - * group->diskq->disk_hdr.h.state - */ - if (RDC_IS_DISKQ(krdc->group)) { - dqp = &krdc->group->diskq; - if (IS_QSTATE(dqp, RDC_QNOBLOCK)) - urdc->flags |= RDC_QNOBLOCK; - } - - if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) { - ptr = (char *)arg + offsetof(struct rdc_config32, rdc_set); - rc = rdc_status_copy32(urdc, ptr, sizeof (struct rdc_set32), - mode); - } else { - ptr = (char *)arg + offsetof(struct rdc_config, rdc_set); - rc = ddi_copyout(urdc, ptr, sizeof (struct rdc_set), mode); - } - /* clear out qstate from flags */ - urdc->flags &= ~RDC_QNOBLOCK; - - if (rc) - rc = EFAULT; - - rdc_group_exit(krdc); -done: - mutex_enter(&rdc_conf_lock); - wakeup_busy(krdc); - mutex_exit(&rdc_conf_lock); - - return (rc); -} - -/* - * Overwrite the bitmap with one supplied by the - * user. - * Copy into all bitmaps that are tracking this volume. - */ - -int -rdc_bitmapset(int op, char *sechost, char *secdev, void *bmapaddr, int bmapsz, - nsc_off_t off, int mode) -{ - int rc; - rdc_k_info_t *krdc; - int *indexvec; - int index; - int indexit; - kmutex_t **grouplocks; - int i; - int groupind; - - if (off % FBA_SIZE(1)) { - /* Must be modulo FBA */ - cmn_err(CE_WARN, "!bitmapset: Offset is not on an FBA " - "boundary %llu", (unsigned long long)off); - return (EINVAL); - } - if (bmapsz % FBA_SIZE(1)) { - /* Must be modulo FBA */ - cmn_err(CE_WARN, "!bitmapset: Size is not on an FBA " - "boundary %d", bmapsz); - return (EINVAL); - } - - mutex_enter(&rdc_conf_lock); - index = rdc_lookup_byhostdev(sechost, secdev); - if (index >= 0) { - krdc = &rdc_k_info[index]; - } - if (index < 0 || (krdc->type_flag & RDC_DISABLEPEND)) { - rc = ENODEV; - mutex_exit(&rdc_conf_lock); - return (rc); - } - indexvec = kmem_alloc(rdc_max_sets * sizeof (int), KM_SLEEP); - grouplocks = kmem_alloc(rdc_max_sets * sizeof (kmutex_t *), KM_SLEEP); - - /* - * I now have this set, and I want to take the group - * lock on it, and all the group locks of all the - * sets on the many and multi-hop links. - * I have to take the many lock while traversing the - * many/multi links. - * I think I also need to set the busy count on this - * set, otherwise when I drop the conf_lock, what - * will stop some other process from coming in and - * issuing a disable? - */ - set_busy(krdc); - mutex_exit(&rdc_conf_lock); - -retrylock: - groupind = 0; - indexit = 0; - rdc_many_enter(krdc); - /* - * Take this initial sets group lock first. - */ - if (!mutex_tryenter(&krdc->group->lock)) { - rdc_many_exit(krdc); - goto retrylock; - } - - grouplocks[groupind] = &krdc->group->lock; - groupind++; - - rc = rdc_checkforbitmap(index, off + bmapsz); - if (rc) { - goto done; - } - indexvec[indexit] = index; - indexit++; - if (IS_MANY(krdc)) { - rdc_k_info_t *ktmp; - - for (ktmp = krdc->many_next; ktmp != krdc; - ktmp = ktmp->many_next) { - /* - * attempt to take the group lock, - * if we don't already have it. - */ - if (ktmp->group == NULL) { - rc = ENODEV; - goto done; - } - for (i = 0; i < groupind; i++) { - if (grouplocks[i] == &ktmp->group->lock) - /* already have the group lock */ - break; - } - /* - * didn't find our lock in our collection, - * attempt to take group lock. - */ - if (i >= groupind) { - if (!mutex_tryenter(&ktmp->group->lock)) { - for (i = 0; i < groupind; i++) { - mutex_exit(grouplocks[i]); - } - rdc_many_exit(krdc); - goto retrylock; - } - grouplocks[groupind] = &ktmp->group->lock; - groupind++; - } - rc = rdc_checkforbitmap(ktmp->index, off + bmapsz); - if (rc == 0) { - indexvec[indexit] = ktmp->index; - indexit++; - } else { - goto done; - } - } - } - if (IS_MULTI(krdc)) { - rdc_k_info_t *kmulti = krdc->multi_next; - - if (kmulti->group == NULL) { - rc = ENODEV; - goto done; - } - /* - * This can't be in our group already. - */ - if (!mutex_tryenter(&kmulti->group->lock)) { - for (i = 0; i < groupind; i++) { - mutex_exit(grouplocks[i]); - } - rdc_many_exit(krdc); - goto retrylock; - } - grouplocks[groupind] = &kmulti->group->lock; - groupind++; - - rc = rdc_checkforbitmap(kmulti->index, off + bmapsz); - if (rc == 0) { - indexvec[indexit] = kmulti->index; - indexit++; - } else { - goto done; - } - } - rc = rdc_installbitmap(op, bmapaddr, bmapsz, off, mode, indexvec, - indexit); -done: - for (i = 0; i < groupind; i++) { - mutex_exit(grouplocks[i]); - } - rdc_many_exit(krdc); - mutex_enter(&rdc_conf_lock); - wakeup_busy(krdc); - mutex_exit(&rdc_conf_lock); - kmem_free(indexvec, rdc_max_sets * sizeof (int)); - kmem_free(grouplocks, rdc_max_sets * sizeof (kmutex_t *)); - return (rc); -} - -static int -rdc_checkforbitmap(int index, nsc_off_t limit) -{ - rdc_k_info_t *krdc; - rdc_u_info_t *urdc; - - krdc = &rdc_k_info[index]; - urdc = &rdc_u_info[index]; - - if (!IS_ENABLED(urdc)) { - return (EIO); - } - if (!(rdc_get_vflags(urdc) & RDC_LOGGING)) { - return (ENXIO); - } - if (krdc->dcio_bitmap == NULL) { - cmn_err(CE_WARN, "!checkforbitmap: No bitmap for set (%s:%s)", - urdc->secondary.intf, urdc->secondary.file); - return (ENOENT); - } - if (limit > krdc->bitmap_size) { - cmn_err(CE_WARN, "!checkbitmap: Bitmap exceeded, " - "incore %" NSC_SZFMT " user supplied %" NSC_SZFMT - " for set (%s:%s)", krdc->bitmap_size, - limit, urdc->secondary.intf, urdc->secondary.file); - return (ENOSPC); - } - return (0); -} - - - -/* - * Copy the user supplied bitmap to this set. - */ -static int -rdc_installbitmap(int op, void *bmapaddr, int bmapsz, - nsc_off_t off, int mode, int *vec, int veccnt) -{ - int rc; - nsc_off_t sfba; - nsc_off_t efba; - nsc_off_t fba; - void *ormem = NULL; - int len; - int left; - int copied; - int index; - rdc_k_info_t *krdc; - rdc_u_info_t *urdc; - - rc = 0; - ormem = kmem_alloc(RDC_MAXDATA, KM_SLEEP); - left = bmapsz; - copied = 0; - while (left > 0) { - if (left > RDC_MAXDATA) { - len = RDC_MAXDATA; - } else { - len = left; - } - if (ddi_copyin((char *)bmapaddr + copied, ormem, - len, mode)) { - cmn_err(CE_WARN, "!installbitmap: Copyin failed"); - rc = EFAULT; - goto out; - } - sfba = FBA_NUM(off + copied); - efba = FBA_NUM(off + copied + len); - for (index = 0; index < veccnt; index++) { - krdc = &rdc_k_info[vec[index]]; - urdc = &rdc_u_info[vec[index]]; - - mutex_enter(&krdc->bmapmutex); - if (op == RDC_BITMAPSET) { - bcopy(ormem, krdc->dcio_bitmap + off + copied, - len); - } else { - rdc_lor(ormem, - krdc->dcio_bitmap + off + copied, len); - } - /* - * Maybe this should be just done once outside of - * the the loop? (Less work, but leaves a window - * where the bits_set doesn't match the bitmap). - */ - urdc->bits_set = RDC_COUNT_BITMAP(krdc); - mutex_exit(&krdc->bmapmutex); - if (krdc->bitmap_write > 0) { - for (fba = sfba; fba < efba; fba++) { - if (rc = rdc_write_bitmap_fba(krdc, - fba)) { - - cmn_err(CE_WARN, - "!installbitmap: " - "write_bitmap_fba failed " - "on fba number %" NSC_SZFMT - " set %s:%s", fba, - urdc->secondary.intf, - urdc->secondary.file); - goto out; - } - } - } - } - copied += len; - left -= len; - } -out: - kmem_free(ormem, RDC_MAXDATA); - return (rc); -} - -/* - * _rdc_config - */ -int -_rdc_config(void *arg, int mode, spcs_s_info_t kstatus, int *rvp) -{ - int rc = 0; - struct netbuf fsvaddr, tsvaddr; - struct knetconfig *knconf; - char *p = NULL, *pf = NULL; - struct rdc_config *uap; - STRUCT_DECL(knetconfig, knconf_tmp); - STRUCT_DECL(rdc_config, uparms); - int enable, disable; - int cmd; - - - STRUCT_HANDLE(rdc_set, rs); - STRUCT_HANDLE(rdc_addr, pa); - STRUCT_HANDLE(rdc_addr, sa); - - STRUCT_INIT(uparms, mode); - - bzero(STRUCT_BUF(uparms), STRUCT_SIZE(uparms)); - bzero(&fsvaddr, sizeof (fsvaddr)); - bzero(&tsvaddr, sizeof (tsvaddr)); - - knconf = NULL; - - if (ddi_copyin(arg, STRUCT_BUF(uparms), STRUCT_SIZE(uparms), mode)) { - return (EFAULT); - } - - STRUCT_SET_HANDLE(rs, mode, STRUCT_FGETP(uparms, rdc_set)); - STRUCT_SET_HANDLE(pa, mode, STRUCT_FADDR(rs, primary)); - STRUCT_SET_HANDLE(sa, mode, STRUCT_FADDR(rs, secondary)); - cmd = STRUCT_FGET(uparms, command); - if (cmd == RDC_CMD_ENABLE || cmd == RDC_CMD_RESUME) { - fsvaddr.len = STRUCT_FGET(pa, addr.len); - fsvaddr.maxlen = STRUCT_FGET(pa, addr.maxlen); - fsvaddr.buf = kmem_zalloc(fsvaddr.len, KM_SLEEP); - - if (ddi_copyin(STRUCT_FGETP(pa, addr.buf), - fsvaddr.buf, fsvaddr.len, mode)) { - kmem_free(fsvaddr.buf, fsvaddr.len); -#ifdef DEBUG - cmn_err(CE_WARN, "!copyin failed primary.addr 2"); -#endif - return (EFAULT); - } - - - tsvaddr.len = STRUCT_FGET(sa, addr.len); - tsvaddr.maxlen = STRUCT_FGET(sa, addr.maxlen); - tsvaddr.buf = kmem_zalloc(tsvaddr.len, KM_SLEEP); - - if (ddi_copyin(STRUCT_FGETP(sa, addr.buf), - tsvaddr.buf, tsvaddr.len, mode)) { -#ifdef DEBUG - cmn_err(CE_WARN, "!copyin failed secondary addr"); -#endif - kmem_free(fsvaddr.buf, fsvaddr.len); - kmem_free(tsvaddr.buf, tsvaddr.len); - return (EFAULT); - } - } else { - fsvaddr.len = 0; - fsvaddr.maxlen = 0; - fsvaddr.buf = kmem_zalloc(fsvaddr.len, KM_SLEEP); - tsvaddr.len = 0; - tsvaddr.maxlen = 0; - tsvaddr.buf = kmem_zalloc(tsvaddr.len, KM_SLEEP); - } - - if (STRUCT_FGETP(uparms, rdc_set->netconfig) != NULL) { - STRUCT_INIT(knconf_tmp, mode); - knconf = kmem_zalloc(sizeof (*knconf), KM_SLEEP); - if (ddi_copyin(STRUCT_FGETP(uparms, rdc_set->netconfig), - STRUCT_BUF(knconf_tmp), STRUCT_SIZE(knconf_tmp), mode)) { -#ifdef DEBUG - cmn_err(CE_WARN, "!copyin failed netconfig"); -#endif - kmem_free(fsvaddr.buf, fsvaddr.len); - kmem_free(tsvaddr.buf, tsvaddr.len); - kmem_free(knconf, sizeof (*knconf)); - return (EFAULT); - } - - knconf->knc_semantics = STRUCT_FGET(knconf_tmp, knc_semantics); - knconf->knc_protofmly = STRUCT_FGETP(knconf_tmp, knc_protofmly); - knconf->knc_proto = STRUCT_FGETP(knconf_tmp, knc_proto); - -#ifndef _SunOS_5_6 - if ((mode & DATAMODEL_LP64) == 0) { - knconf->knc_rdev = - expldev(STRUCT_FGET(knconf_tmp, knc_rdev)); - } else { -#endif - knconf->knc_rdev = STRUCT_FGET(knconf_tmp, knc_rdev); -#ifndef _SunOS_5_6 - } -#endif - - pf = kmem_alloc(KNC_STRSIZE, KM_SLEEP); - p = kmem_alloc(KNC_STRSIZE, KM_SLEEP); - rc = ddi_copyin(knconf->knc_protofmly, pf, KNC_STRSIZE, mode); - if (rc) { -#ifdef DEBUG - cmn_err(CE_WARN, "!copyin failed parms protofmly"); -#endif - rc = EFAULT; - goto out; - } - rc = ddi_copyin(knconf->knc_proto, p, KNC_STRSIZE, mode); - if (rc) { -#ifdef DEBUG - cmn_err(CE_WARN, "!copyin failed parms proto"); -#endif - rc = EFAULT; - goto out; - } - knconf->knc_protofmly = pf; - knconf->knc_proto = p; - } /* !NULL netconfig */ - - uap = kmem_alloc(sizeof (*uap), KM_SLEEP); - - /* copy relevant parts of rdc_config to uap field by field */ - - (void) strncpy(uap->rdc_set[0].primary.intf, STRUCT_FGETP(pa, intf), - MAX_RDC_HOST_SIZE); - (void) strncpy(uap->rdc_set[0].primary.file, STRUCT_FGETP(pa, file), - NSC_MAXPATH); - (void) strncpy(uap->rdc_set[0].primary.bitmap, STRUCT_FGETP(pa, bitmap), - NSC_MAXPATH); - uap->rdc_set[0].netconfig = knconf; - uap->rdc_set[0].flags = STRUCT_FGET(uparms, rdc_set->flags); - uap->rdc_set[0].index = STRUCT_FGET(uparms, rdc_set->index); - uap->rdc_set[0].setid = STRUCT_FGET(uparms, rdc_set->setid); - uap->rdc_set[0].sync_pos = STRUCT_FGET(uparms, rdc_set->sync_pos); - uap->rdc_set[0].volume_size = STRUCT_FGET(uparms, rdc_set->volume_size); - uap->rdc_set[0].bits_set = STRUCT_FGET(uparms, rdc_set->bits_set); - uap->rdc_set[0].autosync = STRUCT_FGET(uparms, rdc_set->autosync); - uap->rdc_set[0].maxqfbas = STRUCT_FGET(uparms, rdc_set->maxqfbas); - uap->rdc_set[0].maxqitems = STRUCT_FGET(uparms, rdc_set->maxqitems); - uap->rdc_set[0].asyncthr = STRUCT_FGET(uparms, rdc_set->asyncthr); - uap->rdc_set[0].syshostid = STRUCT_FGET(uparms, rdc_set->syshostid); - uap->rdc_set[0].primary.addr = fsvaddr; /* struct copy */ - uap->rdc_set[0].secondary.addr = tsvaddr; /* struct copy */ - - (void) strncpy(uap->rdc_set[0].secondary.intf, STRUCT_FGETP(sa, intf), - MAX_RDC_HOST_SIZE); - (void) strncpy(uap->rdc_set[0].secondary.file, STRUCT_FGETP(sa, file), - NSC_MAXPATH); - (void) strncpy(uap->rdc_set[0].secondary.bitmap, - STRUCT_FGETP(sa, bitmap), NSC_MAXPATH); - - (void) strncpy(uap->rdc_set[0].direct_file, - STRUCT_FGETP(rs, direct_file), NSC_MAXPATH); - - (void) strncpy(uap->rdc_set[0].group_name, STRUCT_FGETP(rs, group_name), - NSC_MAXPATH); - - (void) strncpy(uap->rdc_set[0].disk_queue, STRUCT_FGETP(rs, disk_queue), - NSC_MAXPATH); - - uap->command = STRUCT_FGET(uparms, command); - uap->options = STRUCT_FGET(uparms, options); - - enable = (uap->command == RDC_CMD_ENABLE || - uap->command == RDC_CMD_RESUME); - disable = (uap->command == RDC_CMD_DISABLE || - uap->command == RDC_CMD_SUSPEND); - - /* - * Initialise the threadset if it has not already been done. - * - * This has to be done now, not in rdcattach(), because - * rdcattach() can be called before nskernd is running (eg. - * boot -r) in which case the nst_init() would fail and hence - * the attach would fail. - * - * Threadset creation is locked by the rdc_conf_lock, - * destruction is inherently single threaded as it is done in - * _rdc_unload() which must be the last thing performed by - * rdcdetach(). - */ - - if (enable && _rdc_ioset == NULL) { - mutex_enter(&rdc_conf_lock); - - if (_rdc_ioset == NULL) { - rc = rdc_thread_configure(); - } - - mutex_exit(&rdc_conf_lock); - - if (rc || _rdc_ioset == NULL) { - spcs_s_add(kstatus, RDC_ENOTHREADS); - rc = RDC_ENOTHREADS; - goto outuap; - } - } - switch (uap->command) { - case RDC_CMD_ENABLE: - rc = rdc_enable(uap, kstatus); - break; - case RDC_CMD_DISABLE: - rc = rdc_disable(uap, kstatus); - break; - case RDC_CMD_COPY: - rc = rdc_sync(uap, kstatus); - break; - case RDC_CMD_LOG: - rc = rdc_log(uap, kstatus); - break; - case RDC_CMD_RECONFIG: - rc = rdc_reconfig(uap, kstatus); - break; - case RDC_CMD_RESUME: - rc = rdc_resume(uap, kstatus); - break; - case RDC_CMD_SUSPEND: - rc = rdc_suspend(uap, kstatus); - break; - case RDC_CMD_TUNABLE: - rc = rdc_tunable(uap, kstatus); - break; - case RDC_CMD_WAIT: - rc = rdc_wait(uap, kstatus); - break; - case RDC_CMD_HEALTH: - rc = rdc_health(uap, kstatus, rvp); - break; - case RDC_CMD_STATUS: - rc = rdc_status(arg, mode, uap, kstatus); - break; - case RDC_CMD_RESET: - rc = rdc_reset(uap, kstatus); - break; - case RDC_CMD_ADDQ: - rc = rdc_add_diskq(uap, kstatus); - break; - case RDC_CMD_REMQ: - if ((rc = rdc_rem_diskq(uap, kstatus)) != 0) - break; - /* FALLTHRU */ - case RDC_CMD_KILLQ: - rc = rdc_kill_diskq(uap, kstatus); - break; - case RDC_CMD_INITQ: - rc = rdc_init_diskq(uap, kstatus); - break; - - default: - rc = EINVAL; - break; - } - - /* - * Tune the threadset size after a successful rdc_set addition - * or removal. - */ - if ((enable || disable) && rc == 0) { - mutex_enter(&rdc_conf_lock); - rdc_thread_tune(enable ? 2 : -2); - mutex_exit(&rdc_conf_lock); - } -outuap: - kmem_free(uap, sizeof (*uap)); -out: - kmem_free(fsvaddr.buf, fsvaddr.len); - kmem_free(tsvaddr.buf, tsvaddr.len); - if (pf) - kmem_free(pf, KNC_STRSIZE); - if (p) - kmem_free(p, KNC_STRSIZE); - if (knconf) - kmem_free(knconf, sizeof (*knconf)); - return (rc); -} - - -/* - * krdc->group->lock held on entry to halt_sync() - */ -static void -halt_sync(rdc_k_info_t *krdc) -{ - rdc_u_info_t *urdc = &rdc_u_info[krdc->index]; - - ASSERT(MUTEX_HELD(&krdc->group->lock)); - ASSERT(IS_ENABLED(urdc)); - - /* - * If a sync is in progress, halt it - */ - if ((rdc_get_vflags(urdc) & RDC_PRIMARY) && - (krdc->aux_state & RDC_AUXSYNCIP)) { - krdc->disk_status = 1; - - while (krdc->disk_status == 1) { - if (cv_wait_sig(&krdc->haltcv, &krdc->group->lock) == 0) - break; - } - } -} - -/* - * return size in blocks - */ -uint64_t -mirror_getsize(int index) -{ - rdc_k_info_t *krdc; - rdc_u_info_t *urdc; - int rc, rs; - nsc_size_t size; - - krdc = &rdc_k_info[index]; - urdc = &rdc_u_info[index]; - - rc = _rdc_rsrv_devs(krdc, RDC_RAW, RDC_INTERNAL); - rs = nsc_partsize(RDC_U_FD(krdc), &size); - urdc->volume_size = size; - if (rc == 0) - _rdc_rlse_devs(krdc, RDC_RAW); - - return (rs == 0 ? urdc->volume_size : 0); -} - - -/* - * Create a new dataset for this transfer, and add it to the list - * of datasets via the net_dataset pointer in the krdc. - */ -rdc_net_dataset_t * -rdc_net_add_set(int index) -{ - rdc_k_info_t *krdc; - rdc_u_info_t *urdc; - rdc_net_dataset_t *dset; - - if (index >= rdc_max_sets) { - cmn_err(CE_NOTE, "!rdc_net_add_set: bad index %d", index); - return (NULL); - } - krdc = &rdc_k_info[index]; - urdc = &rdc_u_info[index]; - - dset = kmem_alloc(sizeof (*dset), KM_NOSLEEP); - if (dset == NULL) { - cmn_err(CE_NOTE, "!rdc_net_add_set: kmem_alloc failed"); - return (NULL); - } - RDC_DSMEMUSE(sizeof (*dset)); - dset->inuse = 1; - dset->nitems = 0; - dset->delpend = 0; - dset->head = NULL; - dset->tail = NULL; - mutex_enter(&krdc->dc_sleep); - - if (!IS_ENABLED(urdc)) { - /* raced with a disable command */ - kmem_free(dset, sizeof (*dset)); - RDC_DSMEMUSE(-sizeof (*dset)); - mutex_exit(&krdc->dc_sleep); - return (NULL); - } - /* - * Shared the id generator, (and the locks). - */ - mutex_enter(&rdc_net_hnd_id_lock); - if (++rdc_net_hnd_id == 0) - rdc_net_hnd_id = 1; - dset->id = rdc_net_hnd_id; - mutex_exit(&rdc_net_hnd_id_lock); - -#ifdef DEBUG - if (krdc->net_dataset != NULL) { - rdc_net_dataset_t *dset2; - for (dset2 = krdc->net_dataset; dset2; dset2 = dset2->next) { - if (dset2->id == dset->id) { - cmn_err(CE_PANIC, - "rdc_net_add_set duplicate id %p:%d %p:%d", - (void *)dset, dset->id, - (void *)dset2, dset2->id); - } - } - } -#endif - dset->next = krdc->net_dataset; - krdc->net_dataset = dset; - mutex_exit(&krdc->dc_sleep); - - return (dset); -} - -/* - * fetch the previously added dataset. - */ -rdc_net_dataset_t * -rdc_net_get_set(int index, int id) -{ - rdc_k_info_t *krdc; - rdc_net_dataset_t *dset; - - if (index >= rdc_max_sets) { - cmn_err(CE_NOTE, "!rdc_net_get_set: bad index %d", index); - return (NULL); - } - krdc = &rdc_k_info[index]; - - mutex_enter(&krdc->dc_sleep); - - dset = krdc->net_dataset; - while (dset && (dset->id != id)) - dset = dset->next; - - if (dset) { - dset->inuse++; - } - - mutex_exit(&krdc->dc_sleep); - return (dset); -} - -/* - * Decrement the inuse counter. Data may be freed. - */ -void -rdc_net_put_set(int index, rdc_net_dataset_t *dset) -{ - rdc_k_info_t *krdc; - - if (index >= rdc_max_sets) { - cmn_err(CE_NOTE, "!rdc_net_put_set: bad index %d", index); - return; - } - krdc = &rdc_k_info[index]; - - mutex_enter(&krdc->dc_sleep); - dset->inuse--; - ASSERT(dset->inuse >= 0); - if ((dset->inuse == 0) && (dset->delpend)) { - rdc_net_free_set(krdc, dset); - } - mutex_exit(&krdc->dc_sleep); -} - -/* - * Mark that we are finished with this set. Decrement inuse - * counter, mark as needing deletion, and - * remove from linked list. - */ -void -rdc_net_del_set(int index, rdc_net_dataset_t *dset) -{ - rdc_k_info_t *krdc; - - if (index >= rdc_max_sets) { - cmn_err(CE_NOTE, "!rdc_net_del_set: bad index %d", index); - return; - } - krdc = &rdc_k_info[index]; - - mutex_enter(&krdc->dc_sleep); - dset->inuse--; - ASSERT(dset->inuse >= 0); - dset->delpend = 1; - if (dset->inuse == 0) { - rdc_net_free_set(krdc, dset); - } - mutex_exit(&krdc->dc_sleep); -} - -/* - * free all the memory associated with this set, and remove from - * list. - * Enters and exits with dc_sleep lock held. - */ - -void -rdc_net_free_set(rdc_k_info_t *krdc, rdc_net_dataset_t *dset) -{ - rdc_net_dataset_t **dsetp; -#ifdef DEBUG - int found = 0; -#endif - - ASSERT(MUTEX_HELD(&krdc->dc_sleep)); - ASSERT(dset); - for (dsetp = &krdc->net_dataset; *dsetp; dsetp = &((*dsetp)->next)) { - if (*dsetp == dset) { - *dsetp = dset->next; -#ifdef DEBUG - found = 1; -#endif - break; - } - } - -#ifdef DEBUG - if (found == 0) { - cmn_err(CE_WARN, "!rdc_net_free_set: Unable to find " - "dataset 0x%p in krdc list", (void *)dset); - } -#endif - /* - * unlinked from list. Free all the data - */ - rdc_ditemsfree(dset); - /* - * free my core. - */ - kmem_free(dset, sizeof (*dset)); - RDC_DSMEMUSE(-sizeof (*dset)); -} - - -/* - * Free all the dataitems and the data it points to. - */ -static void -rdc_ditemsfree(rdc_net_dataset_t *dset) -{ - rdc_net_dataitem_t *ditem; - rdc_net_dataitem_t *nitem; - - ditem = dset->head; - - while (ditem) { - nitem = ditem->next; - kmem_free(ditem->dptr, ditem->mlen); - RDC_DSMEMUSE(-ditem->mlen); - dset->nitems--; - kmem_free(ditem, sizeof (*ditem)); - RDC_DSMEMUSE(-sizeof (*ditem)); - ditem = nitem; - } - ASSERT(dset->nitems == 0); -} - -/* - * allocate and initialize a rdc_aio_t - */ -rdc_aio_t * -rdc_aio_tbuf_get(void *n, void *h, int pos, int len, int flag, int index, int s) -{ - rdc_aio_t *p; - - p = kmem_zalloc(sizeof (rdc_aio_t), KM_NOSLEEP); - if (p == NULL) { -#ifdef DEBUG - cmn_err(CE_NOTE, "!_rdcaiotbufget: kmem_alloc failed bp aio"); -#endif - return (NULL); - } else { - p->next = n; /* overload */ - p->handle = h; - p->pos = pos; - p->qpos = -1; - p->len = len; - p->flag = flag; - p->index = index; - p->iostatus = s; /* overload */ - /* set up seq later, in case thr create fails */ - } - return (p); -} - -/* - * rdc_aio_buf_get - * get an aio_buf - */ -aio_buf_t * -rdc_aio_buf_get(rdc_buf_t *h, int index) -{ - aio_buf_t *p; - - if (index >= rdc_max_sets) { - cmn_err(CE_NOTE, "!rdc: rdc_aio_buf_get bad index %x", index); - return (NULL); - } - - mutex_enter(&h->aio_lock); - - p = h->rdc_anon; - while (p && (p->kindex != index)) - p = p->next; - - mutex_exit(&h->aio_lock); - return (p); -} - -/* - * rdc_aio_buf_del - * delete a aio_buf - */ -void -rdc_aio_buf_del(rdc_buf_t *h, rdc_k_info_t *krdc) -{ - aio_buf_t *p, **pp; - - mutex_enter(&h->aio_lock); - - p = NULL; - for (pp = &h->rdc_anon; *pp; pp = &((*pp)->next)) { - if ((*pp)->kindex == krdc->index) { - p = *pp; - break; - } - } - - if (p) { - *pp = p->next; - kmem_free(p, sizeof (*p)); - } - mutex_exit(&h->aio_lock); -} - -/* - * rdc_aio_buf_add - * Add a aio_buf. - */ -aio_buf_t * -rdc_aio_buf_add(int index, rdc_buf_t *h) -{ - aio_buf_t *p; - - p = kmem_zalloc(sizeof (*p), KM_NOSLEEP); - if (p == NULL) { - cmn_err(CE_NOTE, "!rdc_aio_buf_add: kmem_alloc failed"); - return (NULL); - } - - p->rdc_abufp = NULL; - p->kindex = index; - - mutex_enter(&h->aio_lock); - p->next = h->rdc_anon; - h->rdc_anon = p; - mutex_exit(&h->aio_lock); - return (p); -} - -/* - * kmemalloc a new group structure and setup the common - * fields. - */ -static rdc_group_t * -rdc_newgroup() -{ - rdc_group_t *group; - - group = kmem_zalloc(sizeof (rdc_group_t), KM_SLEEP); - group->diskq.lastio = kmem_zalloc(sizeof (rdc_aio_t), KM_SLEEP); - group->count = 1; - group->seq = RDC_NEWSEQ; - group->seqack = RDC_NEWSEQ; - mutex_init(&group->lock, NULL, MUTEX_DRIVER, NULL); - mutex_init(&group->ra_queue.net_qlock, NULL, MUTEX_DRIVER, NULL); - mutex_init(&group->diskqmutex, NULL, MUTEX_DRIVER, NULL); - mutex_init(&group->diskq.disk_qlock, NULL, MUTEX_DRIVER, NULL); - mutex_init(&group->diskq.head_lock, NULL, MUTEX_DRIVER, NULL); - mutex_init(&group->addthrnumlk, NULL, MUTEX_DRIVER, NULL); - cv_init(&group->unregistercv, NULL, CV_DRIVER, NULL); - cv_init(&group->asyncqcv, NULL, CV_DRIVER, NULL); - cv_init(&group->diskq.busycv, NULL, CV_DRIVER, NULL); - cv_init(&group->diskq.qfullcv, NULL, CV_DRIVER, NULL); - cv_init(&group->ra_queue.qfcv, NULL, CV_DRIVER, NULL); - group->ra_queue.qfill_sleeping = RDC_QFILL_DEAD; - group->diskq.busycnt = 0; - ASSERT(group->synccount == 0); /* group was kmem_zalloc'ed */ - - /* - * add default number of threads to the flusher thread set, plus - * one extra thread for the disk queue flusher - */ - if (nst_add_thread(_rdc_flset, 3) != 3) - cmn_err(CE_NOTE, "!rdc_newgroup: nst_add_thread failed"); - - return (group); -} - -void -rdc_delgroup(rdc_group_t *group) -{ - - ASSERT(group->asyncstall == 0); - ASSERT(group->rdc_thrnum == 0); - ASSERT(group->count == 0); - ASSERT(MUTEX_HELD(&rdc_many_lock)); - - mutex_enter(&group->ra_queue.net_qlock); - rdc_sleepqdiscard(group); - mutex_exit(&group->ra_queue.net_qlock); - - /* try to remove flusher threads that this group added to _rdc_flset */ - if (nst_del_thread(_rdc_flset, group->rdc_addthrnum + 3) != - group->rdc_addthrnum + 3) - cmn_err(CE_NOTE, "!rdc_delgroup: nst_del_thread failed"); - - mutex_destroy(&group->lock); - mutex_destroy(&group->ra_queue.net_qlock); - mutex_destroy(&group->diskqmutex); - mutex_destroy(&group->diskq.disk_qlock); - mutex_destroy(&group->diskq.head_lock); - mutex_destroy(&group->addthrnumlk); - cv_destroy(&group->unregistercv); - cv_destroy(&group->asyncqcv); - cv_destroy(&group->diskq.busycv); - cv_destroy(&group->diskq.qfullcv); - cv_destroy(&group->ra_queue.qfcv); - kmem_free(group->diskq.lastio, sizeof (rdc_aio_t)); - kmem_free(group, sizeof (rdc_group_t)); -} diff --git a/usr/src/uts/common/avs/ns/rdc/rdc_io.h b/usr/src/uts/common/avs/ns/rdc/rdc_io.h deleted file mode 100644 index 6acf96686c..0000000000 --- a/usr/src/uts/common/avs/ns/rdc/rdc_io.h +++ /dev/null @@ -1,1009 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _RDC_IO_H -#define _RDC_IO_H - -#ifdef __cplusplus -extern "C" { -#endif - - -#include <sys/unistat/spcs_s.h> -#ifdef DS_DDICT -#define bool_t int -#endif -#include <sys/nsctl/rdc_prot.h> -#include <sys/nsctl/nsctl.h> -#include <sys/nsctl/rdc_ioctl.h> - -/* - * Definitions for kstats - */ -#define RDC_MKSTAT_MAXSETS "maxsets" -#define RDC_MKSTAT_MAXFBAS "maxfbas" -#define RDC_MKSTAT_RPC_TIMEOUT "rpc_timeout" -#define RDC_MKSTAT_HEALTH_THRES "health_thres" -#define RDC_MKSTAT_BITMAP_WRITES "bitmap_writes" -#define RDC_MKSTAT_CLNT_COTS_CALLS "clnt_cots_calls" -#define RDC_MKSTAT_CLNT_CLTS_CALLS "clnt_clts_calls" -#define RDC_MKSTAT_SVC_COTS_CALLS "svc_cots_calls" -#define RDC_MKSTAT_SVC_CLTS_CALLS "svc_clts_calls" -#define RDC_MKSTAT_BITMAP_REF_DELAY "bitmap_ref_delay" - -#define RDC_IKSTAT_FLAGS "flags" -#define RDC_IKSTAT_SYNCFLAGS "syncflags" -#define RDC_IKSTAT_BMPFLAGS "bmpflags" -#define RDC_IKSTAT_SYNCPOS "syncpos" -#define RDC_IKSTAT_VOLSIZE "volsize" -#define RDC_IKSTAT_BITSSET "bitsset" -#define RDC_IKSTAT_AUTOSYNC "autosync" -#define RDC_IKSTAT_MAXQFBAS "maxqfbas" -#define RDC_IKSTAT_MAXQITEMS "maxqitems" -#define RDC_IKSTAT_FILE "primary_vol" -#define RDC_IKSTAT_SECFILE "secondary_vol" -#define RDC_IKSTAT_BITMAP "bitmap" -#define RDC_IKSTAT_PRIMARY_HOST "primary_host" -#define RDC_IKSTAT_SECONDARY_HOST "secondary_host" -#define RDC_IKSTAT_TYPE_FLAG "type_flag" -#define RDC_IKSTAT_BMP_SIZE "bmp_size" -#define RDC_IKSTAT_DISK_STATUS "disk_status" -#define RDC_IKSTAT_IF_DOWN "if_down" -#define RDC_IKSTAT_IF_RPC_VERSION "if_rpc_version" -#define RDC_IKSTAT_ASYNC_THROTTLE_DELAY "async_throttle_delay" -#define RDC_IKSTAT_ASYNC_BLOCK_HWM "async_block_hwm" -#define RDC_IKSTAT_ASYNC_ITEM_HWM "async_item_hwm" -#define RDC_IKSTAT_QUEUE_TYPE "async_queue_type" -#define RDC_IKSTAT_ASYNC_ITEMS "async_queue_items" -#define RDC_IKSTAT_ASYNC_BLOCKS "async_queue_blocks" - -/* - * Queue types - */ -#define RDC_DISKQUE 0X01 -#define RDC_MEMQUE 0x02 -#define RDC_NOQUE -1 - -#define RDC_ACTIVE 0x1 -#define RDC_INACTIVE 0x2 - -#ifdef _KERNEL - -extern nstset_t *_rdc_ioset; -extern nstset_t *_rdc_flset; - -#ifdef DEBUG -extern int RDC_MAX_SYNC_THREADS; -extern int rdc_maxthreads_last; -int num_sync_threads; -#else -#define RDC_MAX_SYNC_THREADS 8 -#endif -#ifdef DEBUG -#define RDC_AVAIL_THR_TUNE(n) \ - do { \ - if (rdc_maxthreads_last < RDC_MAX_SYNC_THREADS) { \ - (void) nst_add_thread(n.rdc_syncset, \ - RDC_MAX_SYNC_THREADS - rdc_maxthreads_last);\ - } \ - if (rdc_maxthreads_last > RDC_MAX_SYNC_THREADS) { \ - (void) nst_del_thread(n.rdc_syncset, \ - rdc_maxthreads_last - RDC_MAX_SYNC_THREADS); \ - } \ - n.avail_thr = RDC_MAX_SYNC_THREADS - n.active_thr; \ - if (n.avail_thr < 0) { \ - n.avail_thr = 0; \ - } \ - rdc_maxthreads_last = RDC_MAX_SYNC_THREADS; \ - num_sync_threads = nst_nthread(n.rdc_syncset); \ - } while (0); -#else -#define RDC_AVAIL_THR_TUNE(n) \ - do { \ - n.avail_thr = RDC_MAX_SYNC_THREADS - n.active_thr; \ - if (n.avail_thr < 0) \ - n.avail_thr = 0; \ - } while (0); - -#endif - -typedef struct syncloop_info { - int active_thr; - int avail_thr; /* should be MAX_RDC_SYNC_THREADS - active */ - kmutex_t lock; - nstset_t *rdc_syncset; -} sync_info_t; - -sync_info_t sync_info; - -/* - * Static server information - */ -typedef struct servinfo { - struct knetconfig *ri_knconf; /* bound TLI fd */ - struct netbuf ri_addr; /* server's address */ - struct sec_data *ri_secdata; /* sec data for rpcsec module */ - char *ri_hostname; /* server's hostname */ - int ri_hostnamelen; /* server's hostname length */ -} rdc_srv_t; - -/* - * Interface structure, including health monitoring. - */ -typedef struct rdc_if_s { - struct rdc_if_s *next; /* chain pointer */ - struct netbuf ifaddr; - struct netbuf r_ifaddr; - rdc_srv_t *srv; /* servinfo of server end */ - int if_down; /* i/f is down (set on primary) */ - int isprimary; /* this end is a primary */ - int issecondary; /* this end is a secondary */ - rpcvers_t rpc_version; /* RPC protocol version in use */ - int no_ping; /* set on secondary to hold off RPCs */ - int old_pulse; /* previous (current) pulse value */ - int new_pulse; /* new (incoming) pulse value */ - int deadness; /* how close to death are we? */ - volatile int exiting; /* daemon exit flag */ - time_t last; /* time of last ping */ -} rdc_if_t; - - -typedef struct rdc_aio_s { - struct rdc_aio_s *next; - nsc_buf_t *handle; - nsc_buf_t *qhandle; - nsc_off_t pos; - nsc_off_t qpos; - nsc_size_t len; - nsc_size_t orig_len; - int flag; - int iostatus; - int index; - uint_t seq; /* sequence on async Q */ -} rdc_aio_t; - -/* values for (rdc_aio_t *)->iostatus */ -enum { - RDC_IO_NONE = 0, /* not used */ - RDC_IO_INIT, /* io started */ - RDC_IO_DONE, /* io done successfully */ - RDC_IO_FAILED, /* io failed */ - RDC_IO_DISCARDED, /* io discarded */ - RDC_IO_CANCELLED /* group_log in progress */ -}; - - -#define RDC_MAX_QBLOCKS 16384 /* 8MB temporary q for diskq to flush to */ -#define RDC_LOW_QBLOCKS 13927 /* roughly 85% of queue full */ -#define RDC_HALF_MQUEUE 8192 /* half of the memory queue */ - -typedef struct netqueue { - rdc_aio_t *net_qhead; - rdc_aio_t *net_qtail; - kmutex_t net_qlock; - int hwmhit; /* queue full hit? reset after hwm */ - int qfill_sleeping; /* waiting for work? */ - int qfflags; /* diskq/memq flusher flags */ - kcondvar_t qfcv; /* for timed waits */ - volatile nsc_size_t blocks; /* number of FBAs in q */ - volatile uint64_t nitems; /* number of items in q */ - volatile int inflbls; /* number of inflight blocks */ - volatile int inflitems; /* number of inflight items */ - uint64_t nitems_hwm; /* highest items on queue */ - nsc_size_t blocks_hwm; /* highest blocks on queue */ - long throttle_delay; /* Number of times we delayed x 2 */ -} net_queue; - - -/* - * Bitmap header structures. - * These must be fixed size in all data models. - * If we ever support little-endian machines (eg. Intel) we will need - * to add byte-swapping logic. - */ - -typedef struct { - int32_t magic; - int32_t serial_mode; - int32_t use_mirror; - int32_t mirror_down; - int32_t sync_needed; - char bitmapname[NSC_MAXPATH]; - char filename[NSC_MAXPATH]; - int32_t volume_failed; -} rdc_headerv2_t; -#define RDC_HDR_V2 0x52444302 /* RDC2 */ - -#define RDC_SYNC 0x1 -#define RDC_REV_SYNC 0x2 -#define RDC_FULL_SYNC 0x3 - -#define RDC_FAILED 0x1 -#define RDC_COMPLETED 0x2 - -typedef struct { - char file[NSC_MAXPATH]; - char bitmap[NSC_MAXPATH]; -} rdc_hdr_addr_t; - -typedef struct { - int32_t magic; - rdc_hdr_addr_t primary; - rdc_hdr_addr_t secondary; - int32_t flags; - int32_t autosync; - int32_t maxqfbas; - int32_t maxqitems; - int32_t syshostid; /* for cluster bitmaps */ -} rdc_headerv3_t; -#define RDC_HDR_V3 0x52444303 /* RDC3 */ - -typedef struct { - int32_t magic; - rdc_hdr_addr_t primary; - rdc_hdr_addr_t secondary; - int32_t flags; - int32_t autosync; - int32_t maxqfbas; - int32_t maxqitems; - int32_t syshostid; /* for cluster bitmaps */ - int32_t asyncthr; -} rdc_headerv4_t; -#define RDC_HDR_V4 0x52444304 /* RDC4 */ - -typedef struct { - int32_t magic; - rdc_hdr_addr_t primary; - rdc_hdr_addr_t secondary; - int32_t flags; - int32_t autosync; - int64_t maxqfbas; - int64_t maxqitems; - int32_t syshostid; /* for cluster bitmaps */ - int32_t asyncthr; - int32_t refcntsize; /* size in bytes of each refcount */ -} rdc_headerv5_t; -#define RDC_HDR_V5 0x52444305 /* RDC5 */ - -typedef rdc_headerv5_t rdc_header_t; /* Current header type */ -#define RDC_HDR_MAGIC RDC_HDR_V5 /* Current header magic number */ - -#endif /* _KERNEL */ - -#define RDC_BITMAP_FBA 1 /* Offset at which the bitmap starts */ -#define RDC_BITREF_FBA(krdc) (RDC_BITMAP_FBA + FBA_LEN(krdc->bitmap_size)) - -#ifdef _KERNEL - -#define RDC_FUTILE_ATTEMPTS 50 -typedef struct aio_buf_s { - struct aio_buf_s *next; /* next aio_buf */ - nsc_buf_t *rdc_abufp; /* actual anon buf */ - int kindex; /* index we are attached to */ -} aio_buf_t; - -typedef struct rdc_thrsync { - kmutex_t lock; - int threads; - int complete; - kcondvar_t cv; -} rdc_thrsync_t; - -typedef struct sync_status_s { - int offset; - struct sync_status_s *next; -} sync_status_t; - -typedef struct rdc_syncthr { - nsc_off_t offset; - nsc_size_t len; - struct rdc_k_info *krdc; - sync_status_t *status; -} rdc_syncthr_t; - -/* - * RDC buffer header - */ - -typedef struct rdc_buf_s { - nsc_buf_t rdc_bufh; /* exported buffer header */ - nsc_buf_t *rdc_bufp; /* underlying buffer */ - aio_buf_t *rdc_anon; /* ANON async buffer */ - struct rdc_fd_s *rdc_fd; /* back link */ - size_t rdc_vsize; /* size of allocated nsc_vec_t */ - int rdc_flags; /* flags */ - kmutex_t aio_lock; /* lock for rdc_anon */ - rdc_thrsync_t rdc_sync; /* for thread syncronization */ -} rdc_buf_t; - -#define RDC_VEC_ALLOC 0x1 /* local kmem vector for remote io */ -#define RDC_ALLOC 0x2 /* rdc_bufp is nsc_buf_alloc'd */ -#define RDC_ASYNC_VEC 0x4 /* Keep tmp handle for async flusher */ -#define RDC_REMOTE_BUF 0x8 /* buffer alloc'd for remote io only */ -#define RDC_NULL_BUF 0x10 /* tell diskq to only store io_hdr */ -#define RDC_ASYNC_BUF 0x20 /* this buf is to an async vol */ -#define RDC_NULLBUFREAD 0x0f000000 /* read because RDC_NULL_BUF detected */ - -#define BUF_IS_ASYNC(h) (((h) != NULL) && (h)->rdc_flags & RDC_ASYNC_BUF) -#define RDC_REMOTE(h) (((h) != NULL) && ((h)->rdc_flags & RDC_REMOTE_BUF) && \ - (((h)->rdc_flags & RDC_ASYNC_VEC) == 0)) - -/* check a handle against a supplied pos/len pair */ - -#define RDC_HANDLE_LIMITS(h, p, l) \ - (((h)->sb_user & RDC_DISKQUE) || \ - ((p) >= (h)->sb_pos) && \ - (((p) + (l)) <= ((h)->sb_pos + (h)->sb_len))) - -/* check a dset against a supplied pos/len pair */ - -#define RDC_DSET_LIMITS(d, p, l) \ - (((p) >= (d)->pos) && \ - (((p) + (l)) <= ((d)->pos + (d)->fbalen))) - -/* - * RDC device info structures - */ - -typedef struct _rdc_info_dev_s { - nsc_fd_t *bi_fd; /* file descriptor */ - nsc_iodev_t *bi_iodev; /* I/O device structure */ - struct rdc_k_info *bi_krdc; /* back link */ - int bi_rsrv; /* Count of reserves held */ - int bi_orsrv; /* Reserves for other io provider */ - int bi_failed; /* Count of failed (faked) reserves */ - int bi_ofailed; /* Other io provider failed reserves */ - int bi_flag; /* Reserve flags */ -} _rdc_info_dev_t; - - -typedef struct rdc_info_dev_s { - struct rdc_info_dev_s *id_next; /* forward link */ - _rdc_info_dev_t id_cache_dev; /* cached device info */ - _rdc_info_dev_t id_raw_dev; /* raw device info */ - kmutex_t id_rlock; /* reserve/release lock */ - kcondvar_t id_rcv; /* nsc_release pending cv */ - int id_sets; /* # of sets referencing */ - int id_release; /* # of pending nsc_releases */ - int id_flag; /* flags */ -} rdc_info_dev_t; - - -typedef struct rdc_path_s { - nsc_path_t *rp_tok; /* nsc_register_path token */ - int rp_ref; /* # of rdc_fd_t's */ -} rdc_path_t; - - -/* - * Values for id_flag - */ -#define RDC_ID_CLOSING 0x1 /* device is closing */ - -#include <sys/nsctl/rdc_diskq.h> - -/* - * value for diskio.seq. - */ -#define RDC_NOSEQ (0) /* ignore sequence */ -#define RDC_NEWSEQ (1) /* start of sequence */ - -typedef struct rdc_sleepq { - struct rdc_sleepq *next; - uint_t seq; /* sequence in queue */ - int idx; /* idx number of request */ - int pindex; /* primary host set index */ - int sindex; /* secondary host set index */ - uint64_t qpos; /* offset on primary's queue */ - int nocache; /* cache flag to alloc_buf */ -} rdc_sleepq_t; - -/* - * RDC group structure - */ -typedef struct rdc_group { - int count; - int rdc_writer; - int unregistering; - kmutex_t lock; - net_queue ra_queue; /* io todo async queues */ - kcondvar_t iowaitcv; /* wait for flusher */ - kcondvar_t unregistercv; /* wait for unregister */ - int rdc_thrnum; /* number of threads */ - int rdc_addthrnum; /* number threads added to thr set */ - kmutex_t addthrnumlk; /* lock for above */ - rdc_sleepq_t *sleepq; /* head of waiting tasks */ - /* - * Dual use, the outgoing sequence number on the client. - * The next expected sequence number on the server. - * Protected by the ra_queue lock. - */ - uint_t seq; - /* - * Dual use, the last acknowledged sequence number. - * Used to ensure that the queue doesn't overflow on server - * and to stall transmissions on the client. - * Protected by the ra_queue lock. - */ - uint_t seqack; - int asyncstall; /* count of asleep threads */ - int asyncdis; /* discard stalled output */ - kcondvar_t asyncqcv; /* output stall here */ - int flags; /* memory or disk. status etc */ - disk_queue diskq; /* disk queue */ - nsc_fd_t *diskqfd; /* diskq handle */ - nsc_path_t *q_tok; /* q registration */ - int diskqrsrv; /* reserve count */ - kmutex_t diskqmutex; /* enables/disables/reserves */ - uint_t synccount; /* number of group members syncing */ -} rdc_group_t; - -/* group state */ -#define RDC_DISKQ_KILL 0x01 /* a force kill of diskq pending */ - -#define RDC_IS_DISKQ(grp) (grp->flags & RDC_DISKQUE) -#define RDC_IS_MEMQ(grp) (grp->flags & RDC_MEMQUE) - -/* - * These flags are used in the - * aux_state field, and are used to track: - * AUXSYNCIP: When the code has a sync thread running, used instead - * of the RC_SYNCING flag which gets cleared before the sync thread - * terminates. - * AUXWRITE: Set when rdc_sync_write_thr is running, so the rdc_unintercept - * code can wait until a one-to-many write has actually terminated. - */ -#define RDC_AUXSYNCIP 0x01 /* a sync is in progress */ -#define RDC_AUXWRITE 0x02 /* I've got a write in progress */ - - -/* - * RDC kernel-private information - */ -typedef struct rdc_k_info { - int index; /* Index into array */ - int remote_index; /* -1 means unknown */ - int type_flag; - int rpc_version; /* RPC version this set supps */ - int spare1; - nsc_off_t syncbitpos; - kmutex_t syncbitmutex; /* lock for syncbitpos */ - volatile int busy_count; /* ioctls in progress */ - volatile int sync_done; - int aux_state; /* syncing ,don't disable */ - rdc_thrsync_t syncs; /* _rdc_sync thread tracking */ - rdc_info_dev_t *devices; - nsc_iodev_t *iodev; /* I/O device structure */ - rdc_path_t cache_path; - rdc_path_t raw_path; - rdc_if_t *intf; - rdc_srv_t *lsrv; /* list of servinfo */ - nsc_size_t maxfbas; /* returned from nsc_maxfbas */ - unsigned char *dcio_bitmap; - void *bitmap_ref; /* Incore bitmap bit ref */ - struct rdc_group *group; - nsc_size_t bitmap_size; - int bmaprsrv; /* bitmap reserve count */ - int bitmap_write; - nsc_fd_t *bitmapfd; - nsc_fd_t *remote_fd; /* FCAL direct io */ - volatile int disk_status; /* set to halt sync */ - int closing; - nsc_path_t *b_tok; /* Bitmap registration */ - int b_ref; - kmutex_t dc_sleep; - kmutex_t bmapmutex; /* mutex for bitmap ops */ - kcondvar_t busycv; /* wait for ioctl to complete */ - kcondvar_t closingcv; /* unregister_path/close */ - kcondvar_t haltcv; /* wait for sync to halt */ - kcondvar_t synccv; /* wait for sync to halt */ - struct rdc_net_dataset *net_dataset; /* replaces hnds */ - int64_t io_time; /* moved from cd_info */ - struct rdc_k_info *many_next; /* 1-to-many circular list */ - struct rdc_k_info *multi_next; /* to multihop krdc */ - struct rdc_k_info *group_next; /* group circular list */ - kstat_t *io_kstats; /* io kstat */ - kstat_t *bmp_kstats; /* bitmap io kstat */ - kstat_t *set_kstats; /* set kstat */ - kmutex_t kstat_mutex; /* mutex for kstats */ - kmutex_t bmp_kstat_mutex; /* mutex for kstats */ - struct bm_ref_ops *bm_refs; -} rdc_k_info_t; - -#define c_fd devices->id_cache_dev.bi_fd -#define c_rsrv devices->id_cache_dev.bi_rsrv -#define c_failed devices->id_cache_dev.bi_failed -#define c_flag devices->id_cache_dev.bi_flag - -#define c_tok cache_path.rp_tok -#define c_ref cache_path.rp_ref - -#define r_fd devices->id_raw_dev.bi_fd -#define r_rsrv devices->id_raw_dev.bi_rsrv -#define r_failed devices->id_raw_dev.bi_failed -#define r_flag devices->id_raw_dev.bi_flag - -#define r_tok raw_path.rp_tok -#define r_ref raw_path.rp_ref - -/* - * flags for _rdc_rsrv_devs() - */ - -/* - * which device(s) to reserve - integer bitmap. - */ - -#define RDC_CACHE 0x1 /* data device in cache mode */ -#define RDC_RAW 0x2 /* data device in raw mode */ -#define RDC_BMP 0x4 /* bitmap device */ -#define RDC_QUE 0x8 /* diskq device */ - -/* - * device usage after reserve - integer flag. - */ - -#define RDC_INTERNAL 0x1 /* reserve for rdc internal purposes */ -#define RDC_EXTERNAL 0x2 /* reserve in response to io provider Attach */ - -/* - * Utility macro for nsc_*() io function returns. - */ - -#define RDC_SUCCESS(rc) (((rc) == NSC_DONE) || ((rc) == NSC_HIT)) - -/* - * RDC file descriptor structure - */ - -typedef struct rdc_fd_s { - rdc_k_info_t *rdc_info; /* devices info structure */ - int rdc_type; /* open type, diskq or bitmap */ - int rdc_oflags; /* raw or cached open type */ -} rdc_fd_t; - -/* - * fd and rsrv macros - */ - -#define RSRV(bi) (((bi)->bi_rsrv > 0) || ((bi)->bi_failed > 0)) -#define ORSRV(bi) (((bi)->bi_orsrv > 0) || ((bi)->bi_ofailed > 0)) -#define RFAILED(bi) (((bi)->bi_failed > 0) || ((bi)->bi_ofailed > 0)) - -#define IS_RSRV(bi) (RSRV(bi) || ORSRV(bi)) - -#define IS_CRSRV(gcd) (IS_RSRV(&(gcd)->devices->id_cache_dev)) -#define IS_RRSRV(gcd) (IS_RSRV(&(gcd)->devices->id_raw_dev)) - -#define IS_RFAILED(gcd) \ - (RFAILED(&(gcd)->devices->id_cache_dev) || \ - RFAILED(&(gcd)->devices->id_raw_dev)) - -#define RDC_IS_BMP(rdc) ((rdc)->rdc_type == RDC_BMP) -#define RDC_IS_QUE(rdc) ((rdc)->rdc_type == RDC_QUE) -#define RDC_IS_RAW(rdc) (((rdc)->rdc_oflags & NSC_CACHE) == 0) -#define RDC_U_FD(gcd) (IS_CRSRV(gcd) ? (gcd)->c_fd : (gcd)->r_fd) -#define RDC_FD(rdc) (RDC_U_FD(rdc->rdc_info)) - - -typedef struct rdc_host_u { - char *nodename; - int netaddr; - struct netbuf *naddr; -} rdc_host_t; - -/* - * Reply from remote read - * - convenience defines for the client side code. - * - keep this in sync with the readres structure in rdc_prot.h/.x - */ -#define rdcrdresult readres -#define rr_status status -#define rr_ok readres_u.reply -#define rr_bufsize rr_ok.data.data_len -#define rr_data rr_ok.data.data_val - -/* - * Flags for remote read rpc - * - * _START must be a unique rpc, _DATA and _END may be OR-d together. - */ -#define RDC_RREAD_DATA 0x1 /* Intermediate rpc with data payload */ -#define RDC_RREAD_START 0x2 /* Setup rpc */ -#define RDC_RREAD_END 0x4 /* End rpc */ -#define RDC_RREAD_FAIL 0x8 /* Primary is failed */ - -/* - * Flags for remote write rpc - */ -#define RDC_RWRITE_FAIL 0x8 /* Primary is failed */ - -/* - * macro used to determine if the incomming sq, with sequence - * value x, should be placed before the sq with sequence value y. - * This has to account for integer wrap. We account for integer - * wrap by checking if the difference between x and y is within - * half of the maximum integer value (RDC_MAXINT) or not. - */ - -#define RDC_BITSPERBYTE 8 -#define RDC_BITS(type) (RDC_BITSPERBYTE * (long)sizeof (type)) -#define RDC_HIBITI ((unsigned)1 << (RDC_BITS(int) - 1)) -#define RDC_MAXINT ((int)(~RDC_HIBITI)) -#define RDC_RANGE ((RDC_MAXINT / 2) -1) - -#define RDC_INFRONT(x, y) (((x < y) && ((y - x) < RDC_RANGE)) ? 1 : \ - ((x > y) && ((x - y) > RDC_RANGE)) ? 1 : 0) - - - - -#endif /* _KERNEL */ - -/* - * RDC user-visible information - */ -typedef rdc_set_t rdc_u_info_t; - - -/* - * RDC flags for set state / set cd RPC. - * Must remain compatible with rdc RPC protocol version v3. - */ -#define CCIO_NONE 0x0000 -#define CCIO_ENABLE 0x0008 -#define CCIO_SLAVE 0x0010 -#define CCIO_DONE 0x0020 -#define CCIO_ENABLELOG 0x0100 -#define CCIO_RSYNC 0x0400 -#define CCIO_REMOTE 0x2000 - - -/* - * In kernel type flags (krdc->type_flag). - */ -#define RDC_CONFIGURED 0x1 -#define RDC_DISABLEPEND 0x2 /* Suspend/Disable is in progress */ -#define RDC_ASYNCMODE 0x4 -#define RDC_RESUMEPEND 0x8 -#define RDC_RESPONSIBLE 0x10 -#define RDC_BUSYWAIT 0x20 -#define RDC_UNREGISTER 0x40 /* Unregister is in progress */ -#define RDC_QDISABLEPEND 0x100 /* Q Suspend/Disable is in progress */ - -#define IS_ENABLED(urdc) ((IS_CONFIGURED(&rdc_k_info[(urdc)->index]) && \ - (rdc_get_vflags(urdc) & RDC_ENABLED))) -#define IS_CONFIGURED(krdc) ((krdc)->type_flag & RDC_CONFIGURED) -#define IS_MANY(krdc) ((krdc)->many_next != (krdc)) -#define IS_MULTI(krdc) ((krdc)->multi_next != NULL) - -#define IS_VALID_INDEX(index) ((index) >= 0 && (index) < rdc_max_sets && \ - IS_CONFIGURED(&rdc_k_info[(index)])) - -#define RDC_NOFLUSH 0 /* Do not do a flush when starting logging */ -#define RDC_NOREMOTE 0 /* Do no remote logging notifications */ -#define RDC_FLUSH 1 /* Do a flush when starting logging */ -#define RDC_ALLREMOTE 2 /* Notify all remote group members */ -#define RDC_OTHERREMOTE 4 /* Notify all remote group members except */ - /* the one corresponding to the current set, */ - /* to prevent recursion in the case where */ - /* the request was initiated from the remote */ - /* node. */ -#define RDC_FORCE_GROUP 8 /* set all group memebers logging regardless */ - -#ifdef _KERNEL - -/* - * Functions, vars - */ - -#define RDC_SYNC_EVENT_TIMEOUT (60 * HZ) -typedef struct { - clock_t lbolt; - int event; - int ack; - int daemon_waiting; /* Daemon waiting in ioctl */ - int kernel_waiting; /* Kernel waiting for daemon to reply */ - char master[NSC_MAXPATH]; - char group[NSC_MAXPATH]; - kmutex_t mutex; - kcondvar_t cv; - kcondvar_t done_cv; -} rdc_sync_event_t; -extern rdc_sync_event_t rdc_sync_event; -extern clock_t rdc_sync_event_timeout; -extern kmutex_t rdc_sync_mutex; - -extern rdc_u_info_t *rdc_u_info; -extern rdc_k_info_t *rdc_k_info; - -extern int rdc_max_sets; - -extern unsigned long rdc_async_timeout; - -extern int rdc_self_host(); -extern uint64_t mirror_getsize(int index); -extern void rdc_sleepqdiscard(rdc_group_t *); - - -#ifdef DEBUG -extern void rdc_stallzero(int); -#endif - -struct rdc_net_dataitem { - void *dptr; - int len; /* byte count */ - int mlen; /* actual malloced size */ - struct rdc_net_dataitem *next; -}; -typedef struct rdc_net_dataitem rdc_net_dataitem_t; - -struct rdc_net_dataset { - int id; - int inuse; - int delpend; - int nitems; - nsc_off_t pos; - nsc_size_t fbalen; - rdc_net_dataitem_t *head; - rdc_net_dataitem_t *tail; - struct rdc_net_dataset *next; -}; -typedef struct rdc_net_dataset rdc_net_dataset_t; - - -#endif /* _KERNEL */ - - -#define RDC_TCP_DEV "/dev/tcp" - -#define RDC_VERS_MIN RDC_VERSION5 -#define RDC_VERS_MAX RDC_VERSION7 - -#define RDC_HEALTH_THRESHOLD 20 -#define RDC_MIN_HEALTH_THRES 5 -#define SNDR_MAXTHREADS 16 -/* - * These next two defines are the default value of the async queue size - * They have been calculated to be 8MB of data with an average of - * 2K IO size - */ -#define RDC_MAXTHRES_QUEUE 16384 /* max # of fbas on async q */ -#define RDC_MAX_QITEMS 4096 /* max # of items on async q */ -#define RDC_ASYNCTHR 2 /* number of async threads */ - -#define RDC_RPC_MAX (RDC_MAXDATA + sizeof (net_data5) +\ - (RPC_MAXDATASIZE - 8192)) -#define ATM_NONE 0 -#define ATM_INIT 1 -#define ATM_EXIT 2 - -#define RDC_CLNT_TMOUT 16 - -#define BMAP_BLKSIZE 1024 -#define BMAP_BLKSIZEV7 RDC_MAXDATA - -/* right now we can only trace 1m or less writes to the bitmap (32 bits wide) */ -#define RDC_MAX_MAXFBAS 2048 - -#if defined(_KERNEL) -/* kstat interface */ - -/* - * Per module kstats - * only one instance - */ -typedef struct { - kstat_named_t m_maxsets; /* Max # of sndr sets */ - kstat_named_t m_maxfbas; /* Max # of FBAS from nsctl */ - kstat_named_t m_rpc_timeout; /* global RPC timeout */ - kstat_named_t m_health_thres; /* Health thread timeout */ - kstat_named_t m_bitmap_writes; /* True for bitmap writes */ - kstat_named_t m_clnt_cots_calls; /* # of clnt COTS calls */ - kstat_named_t m_clnt_clts_calls; /* # of clnt CLTS calls */ - kstat_named_t m_svc_cots_calls; /* # of server COTS calls */ - kstat_named_t m_svc_clts_calls; /* # of server CLTS calls */ - kstat_named_t m_bitmap_ref_delay; /* # of bitmap ref overflows */ -} sndr_m_stats_t; - -/* - * Per set kstats - * one instance per configured set - */ -typedef struct { - kstat_named_t s_flags; /* from rdc_set_t */ - kstat_named_t s_syncflags; /* from rdc_set_t */ - kstat_named_t s_bmpflags; /* from rdc_set_t */ - kstat_named_t s_syncpos; /* from rdc_set_t */ - kstat_named_t s_volsize; /* from rdc_set_t */ - kstat_named_t s_bits_set; /* from rdc_set_t */ - kstat_named_t s_autosync; /* from rdc_set_t */ - kstat_named_t s_maxqfbas; /* from rdc_set_t */ - kstat_named_t s_maxqitems; /* from rdc_set_t */ - kstat_named_t s_primary_vol; /* from rdc_set_t */ - kstat_named_t s_secondary_vol; /* from rdc_set_t */ - kstat_named_t s_bitmap; /* from rdc_set_t */ - kstat_named_t s_primary_intf; /* from rdc_set_t */ - kstat_named_t s_secondary_intf; /* from rdc_set_t */ - kstat_named_t s_type_flag; /* from rdc_k_info_t */ - kstat_named_t s_bitmap_size; /* from rdc_k_info_t */ - kstat_named_t s_disk_status; /* from rdc_k_info_t */ - kstat_named_t s_if_if_down; /* from rdc_if_t */ - kstat_named_t s_if_rpc_version; /* from rdc_if_t */ - kstat_named_t s_aqueue_blk_hwm; /* from rdc_k_info_t */ - kstat_named_t s_aqueue_itm_hwm; /* from rdc_k_info_t */ - kstat_named_t s_aqueue_throttle; /* from rdc_k_info_t */ - kstat_named_t s_aqueue_items; - kstat_named_t s_aqueue_blocks; - kstat_named_t s_aqueue_type; -} rdc_info_stats_t; -#endif /* _KERNEL */ - -#ifndef _SunOS_5_6 /* i.e. 2.7+ */ -typedef int xdr_t; -#else /* i.e. 2.6- */ -typedef unsigned long rpcprog_t; -typedef unsigned long rpcvers_t; -typedef unsigned long rpcproc_t; -typedef unsigned long rpcprot_t; -typedef unsigned long rpcport_t; -#endif /* _SunOS_5_6 */ - - -#ifdef _KERNEL - -extern nsc_size_t MAX_RDC_FBAS; -extern volatile int net_exit; -extern nsc_size_t rdc_maxthres_queue; /* max # of fbas on async q */ -extern int rdc_max_qitems; /* max # of items on async q */ -extern int rdc_asyncthr; /* # of async threads */ - -#ifdef DEBUG -extern kmutex_t rdc_cntlock; -extern int rdc_datasetcnt; -#endif - -/* - * Macro to keep tabs on dataset memory usage. - */ -#ifdef DEBUG -#define RDC_DSMEMUSE(x) \ - mutex_enter(&rdc_cntlock);\ - rdc_datasetcnt += (x);\ - mutex_exit(&rdc_cntlock); -#else -#define RDC_DSMEMUSE(x) -#endif - - - - - -extern kmutex_t rdc_ping_lock; -extern rdc_if_t *rdc_if_top; - -extern int _rdc_enqueue_write(rdc_k_info_t *, nsc_off_t, nsc_size_t, int, - nsc_buf_t *); -extern int rdc_net_state(int, int); -extern int rdc_net_getbmap(int, int); -extern int rdc_net_getsize(int, uint64_t *); -extern int rdc_net_write(int, int, nsc_buf_t *, nsc_off_t, nsc_size_t, uint_t, - int, netwriteres *); -extern int rdc_net_read(int, int, nsc_buf_t *, nsc_off_t, nsc_size_t); -extern int _rdc_remote_read(rdc_k_info_t *, nsc_buf_t *, nsc_off_t, nsc_size_t, - int); -extern int _rdc_multi_write(nsc_buf_t *, nsc_off_t, nsc_size_t, int, - rdc_k_info_t *); -extern int rdc_start_server(struct rdc_svc_args *, int); -extern aio_buf_t *rdc_aio_buf_get(rdc_buf_t *, int); -extern void rdc_aio_buf_del(rdc_buf_t *, rdc_k_info_t *); -extern aio_buf_t *rdc_aio_buf_add(int, rdc_buf_t *); -extern int rdc_net_getstate(rdc_k_info_t *, int *, int *, int *, int); -extern kmutex_t rdc_conf_lock; -extern kmutex_t rdc_many_lock; -extern int rdc_drain_queue(int); -extern int flush_group_queue(int); -extern void rdc_dev_close(rdc_k_info_t *); -extern int rdc_dev_open(rdc_set_t *, int); -extern void rdc_get_details(rdc_k_info_t *); -extern int rdc_lookup_bitmap(char *); -extern int rdc_lookup_enabled(char *, int); -extern int rdc_lookup_byaddr(rdc_set_t *); -extern int rdc_lookup_byname(rdc_set_t *); -extern int rdc_intercept(rdc_k_info_t *); -extern int rdc_unintercept(rdc_k_info_t *); -extern int _rdc_rsrv_devs(rdc_k_info_t *, int, int); -extern void _rdc_rlse_devs(rdc_k_info_t *, int); -extern void _rdc_unload(void); -extern int _rdc_load(void); -extern int _rdc_configure(void); -extern void _rdc_deconfigure(void); -extern void _rdc_async_throttle(rdc_k_info_t *, long); -extern int rdc_writer(int); -extern int rdc_dump_alloc_bufs_cd(int); -extern void rdc_dump_alloc_bufs(rdc_if_t *); -extern int rdc_check_secondary(rdc_if_t *, int); -extern void rdc_dump_queue(int); -extern int rdc_isactive_if(struct netbuf *, struct netbuf *); -extern rdc_if_t *rdc_add_to_if(rdc_srv_t *, struct netbuf *, struct netbuf *, - int); -extern void rdc_remove_from_if(rdc_if_t *); -extern void rdc_set_if_vers(rdc_u_info_t *, rpcvers_t); - -extern void rdc_print_svinfo(rdc_srv_t *, char *); -extern rdc_srv_t *rdc_create_svinfo(char *, struct netbuf *, - struct knetconfig *); -extern void rdc_destroy_svinfo(rdc_srv_t *); - -extern void init_rdc_netbuf(struct netbuf *); -extern void free_rdc_netbuf(struct netbuf *); -extern void dup_rdc_netbuf(const struct netbuf *, struct netbuf *); -extern int rdc_netbuf_toint(struct netbuf *); -extern struct netbuf *rdc_int_tonetbuf(int); -extern void rdc_lor(const uchar_t *, uchar_t *, int); -extern int rdc_resume2(rdc_k_info_t *); -extern void rdc_set_flags(rdc_u_info_t *, int); -extern void rdc_clr_flags(rdc_u_info_t *, int); -extern int rdc_get_vflags(rdc_u_info_t *); -extern void rdc_set_mflags(rdc_u_info_t *, int); -extern void rdc_clr_mflags(rdc_u_info_t *, int); -extern int rdc_get_mflags(rdc_u_info_t *); -extern void rdc_set_flags_log(rdc_u_info_t *, int, char *); -extern void rdc_group_log(rdc_k_info_t *krdc, int flush, char *why); -extern int _rdc_config(void *, int, spcs_s_info_t, int *); -extern void rdc_many_enter(rdc_k_info_t *); -extern void rdc_many_exit(rdc_k_info_t *); -extern void rdc_group_enter(rdc_k_info_t *); -extern void rdc_group_exit(rdc_k_info_t *); -extern int _rdc_sync_event_wait(void *, void *, int, spcs_s_info_t, int *); -extern int _rdc_sync_event_notify(int, char *, char *); -extern int _rdc_link_down(void *, int, spcs_s_info_t, int *); -extern void rdc_delgroup(rdc_group_t *); -extern int rdc_write_bitmap_fba(rdc_k_info_t *, nsc_off_t); -extern int rdc_bitmapset(int, char *, char *, void *, int, nsc_off_t, int); -extern rdc_net_dataset_t *rdc_net_add_set(int); -extern rdc_net_dataset_t *rdc_net_get_set(int, int); -extern void rdc_net_put_set(int, rdc_net_dataset_t *); -extern void rdc_net_del_set(int, rdc_net_dataset_t *); -extern void rdc_net_free_set(rdc_k_info_t *, rdc_net_dataset_t *); -extern int rdc_lookup_byhostdev(char *intf, char *file); -extern int rdc_lookup_configured(char *path); -extern void rdc_dump_dsets(int); -extern void set_busy(rdc_k_info_t *); -extern void wakeup_busy(rdc_k_info_t *); - - -#ifdef DEBUG -extern int rdc_async6(void *, int mode, int *); -extern int rdc_readgen(void *, int, int *); -#endif - -#endif /* _KERNEL */ - -#ifdef __cplusplus -} -#endif - -#endif /* _RDC_IO_H */ diff --git a/usr/src/uts/common/avs/ns/rdc/rdc_ioctl.h b/usr/src/uts/common/avs/ns/rdc/rdc_ioctl.h deleted file mode 100644 index ddb6fb5970..0000000000 --- a/usr/src/uts/common/avs/ns/rdc/rdc_ioctl.h +++ /dev/null @@ -1,498 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _RDC_IOCTL_H -#define _RDC_IOCTL_H - -#ifdef __cplusplus -extern "C" { -#endif - -#include <sys/unistat/spcs_s.h> -#include <sys/nsctl/nsctl.h> -#ifndef DS_DDICT -#include <rpc/rpc.h> -#endif - -#ifdef _SunOS_5_6 -#define netbuf32 netbuf -#include <sys/nsctl/model.h> -#endif - -typedef struct _rdc_ioctl_s { - long arg0; - long arg1; - long arg2; - long arg3; - long arg4; - long magic; - spcs_s_info_t ustatus; - long pad[1]; -} _rdc_ioctl_t; - -#ifdef _SYSCALL32 -typedef struct _rdc_ioctl32_s { - int32_t arg0; - int32_t arg1; - int32_t arg2; - int32_t arg3; - int32_t arg4; - int32_t magic; - spcs_s_info32_t ustatus; - int32_t pad[1]; -} _rdc_ioctl32_t; -#endif /* _SYSCALL32 */ - -/* - * Ioctl command numbers - */ - -#define _RDCI_(x) (('R'<<16)|('D'<<8)|(x)) - -/* - * Generic rdc ioctl arguments structure. - * Individual ioctl's will use 0-n of these arguments. - * - * Each rdc ioctl is described first by the command number - * e.g. #define RDC_CONFIG _RDCI_(0) - * - * Followed by a description of each argument (if any). - * Each argument is on a single line. - * - */ - -#define RDC_CONFIG _RDCI_(0) -/* - * rdc_config_t *user_configuration; - */ - -#define RDC_ENABLE_SVR _RDCI_(1) -/* - * rdc_svc_args_t *daemon_configuration; - */ - -#define RDC_STATUS _RDCI_(2) -/* - * rdc_status_t *rdc_status; - */ - -#define RDC_VERSION _RDCI_(3) -/* - * rdc_version_t *rdc_version; - */ - -#define RDC_LINK_DOWN _RDCI_(4) -/* - * char *rdc_host; - */ - -#define RDC_SYNC_EVENT _RDCI_(5) -/* - * char *rdc_master; - * char *rdc_group; - */ - -#define RDC_POOL_CREATE _RDCI_(6) -/* - * struct svcpool_args * - */ - -#define RDC_POOL_WAIT _RDCI_(7) -/* - * int id - */ - -#define RDC_POOL_RUN _RDCI_(8) -/* - * int id - */ -#define RDC_BITMAPOP _RDCI_(9) - -#ifdef DEBUG -#define RDC_ASYNC6 _RDCI_(20) /* send async message by hand */ -#define RDC_CLRKSTAT _RDCI_(21) /* clear kstat_io structure */ -#define RDC_STALL0 _RDCI_(22) /* stall sequence 0 on server */ -#define RDC_READGEN _RDCI_(23) /* cause a read on server */ -#endif - - -#define MAX_RDC_HOST_SIZE 64 - -/* - * Change this when the ioctl structure changes - */ -#define RDC_MAGIC 0xf00d0001 - -typedef struct rdc_addr { - struct netbuf addr; - char intf[MAX_RDC_HOST_SIZE]; - char file[NSC_MAXPATH]; - char bitmap[NSC_MAXPATH]; -} rdc_addr_t; - -#ifdef _SYSCALL32 -struct rdc_addr32 { - struct netbuf32 addr; - char intf[MAX_RDC_HOST_SIZE]; - char file[NSC_MAXPATH]; - char bitmap[NSC_MAXPATH]; -}; -#endif /* _SYSCALL32 */ - -/* - * User level rdc set structure - must be a multiple of 64bits long. - */ -typedef struct rdc_set { - rdc_addr_t primary; - rdc_addr_t secondary; - struct knetconfig *netconfig; - long align1; - double alignfix; - int flags; /* See RDC flags below */ - int sync_flags; /* See RDC flags below */ - int bmap_flags; /* See RDC flags below */ - int mflags; /* RDC 1-to-many flags */ - int index; /* 0 .. rdc_max_sets - 1 */ - int bits_set; /* Bits set in bitmap */ - int autosync; /* Autosync on (1) or off (0) */ - int syshostid; /* for cluster integration */ - int asyncthr; /* # of async threads */ - int setid; /* unique set id for this set */ - uint64_t sync_pos; /* Progress through sync */ - uint64_t volume_size; /* Size of volume */ - int64_t maxqfbas; /* max # of fbas on async q */ - int64_t maxqitems; /* max # of items on async q */ - char group_name[NSC_MAXPATH]; /* Group the set belongs to */ - char direct_file[NSC_MAXPATH]; /* Local FCAL direct io file */ - char disk_queue[NSC_MAXPATH]; /* Disk Queue for set|group */ -} rdc_set_t; - -#ifdef _SYSCALL32 -struct rdc_set32 { - struct rdc_addr32 primary; - struct rdc_addr32 secondary; - caddr32_t netconfig; - int32_t align1; - double alignfix; - int32_t flags; /* See RDC flags below */ - int32_t sync_flags; /* See RDC flags below */ - int32_t bmap_flags; /* See RDC flags below */ - int32_t mflags; /* RDC 1-to-many flags */ - int32_t index; /* 0 .. rdc_max_sets - 1 */ - int32_t bits_set; /* Bits set in bitmap */ - int32_t autosync; /* Autosync on (1) or off (0) */ - int32_t syshostid; /* for cluster integration */ - int32_t asyncthr; /* # of async threads */ - int32_t setid; /* unique set id for this set */ - uint64_t sync_pos; /* Progress through sync */ - uint64_t volume_size; /* Size of volume */ - int64_t maxqfbas; /* max # of fbas on async q */ - int64_t maxqitems; /* max # of items on async q */ - char group_name[NSC_MAXPATH]; /* Group the set belongs to */ - char direct_file[NSC_MAXPATH]; /* Local FCAL direct io file */ - char disk_queue[NSC_MAXPATH]; /* Disk Queue for set|group */ -}; -#endif /* _SYSCALL32 */ - -/* - * Parameter structure to pass to RDC_CONFIG - */ - -typedef struct rdc_config { - int command; /* RDC_CMD_XXX */ - int options; /* RDC_OPT_XXX */ - int pad[2]; /* Do NOT remove - 32/64-bit padding */ - rdc_set_t rdc_set[1]; /* The rdc sets */ -} rdc_config_t; - -#ifdef _SYSCALL32 -struct rdc_config32 { - int32_t command; /* RDC_CMD_XXX */ - int32_t options; /* RDC_OPT_XXX */ - int32_t pad[2]; /* Do NOT remove - 32/64-bit padding */ - struct rdc_set32 rdc_set[1]; /* The rdc sets */ -}; -#endif /* _SYSCALL32 */ - -#define RDC_BITMAPSET 0x01 -#define RDC_BITMAPOR 0x02 -typedef struct rdc_bitmap_op { - nsc_off_t offset; /* byte offset within bitmap mod fba */ - int32_t op; /* or/set operation */ - char sechost[MAX_RDC_HOST_SIZE]; - char secfile[NSC_MAXPATH]; - int32_t len; /* length of bitmap in bytes */ - unsigned long addr; /* address of bitmap in userland */ -} rdc_bitmap_op_t; - -#ifdef _SYSCALL32 -typedef struct rdc_bitmap_op32 { - nsc_off_t offset; - int32_t op; - char sechost[MAX_RDC_HOST_SIZE]; - char secfile[NSC_MAXPATH]; - int32_t len; - uint32_t addr; -} rdc_bitmap_op32_t; - -#endif /* _SYSCALL32 */ - -#ifdef DEBUG -/* - * structure to initiate an asynchronous send to the secondary, - * so we can test the queuing code. - */ -typedef struct rdc_async6 { - char sechost[MAX_RDC_HOST_SIZE]; - char secfile[NSC_MAXPATH]; - int pos; /* Position in file */ - int len; - int seq; - int pat; /* fill data with this */ - int idx; /* server returned index */ - int spos; /* sub task start block */ - int slen; /* sub task length */ - int endind; /* set when last block in multi request */ -} rdc_async6_t; -/* - * structure to initiate a read on the secondary, so we can test the - * maxfba break up code. - */ -typedef struct rdc_readgen { - char sechost[MAX_RDC_HOST_SIZE]; - char secfile[NSC_MAXPATH]; - int len; - int pos; - int idx; - int flag; - int rpcversion; - void *data; /* where to place the data from the read */ -} rdc_readgen_t; - -#ifdef _SYSCALL32 -typedef struct rdc_readgen32 { - char sechost[MAX_RDC_HOST_SIZE]; - char secfile[NSC_MAXPATH]; - int len; - int pos; - int idx; - int flag; - int rpcversion; - caddr32_t data; /* where to place the data from the read */ -} rdc_readgen32_t; -#endif -#endif - - - - - -/* - * Config ioctl commands - */ -#define RDC_CMD_ENABLE 1 /* New enable */ -#define RDC_CMD_DISABLE 2 /* Complete disable */ -#define RDC_CMD_RESUME 3 /* Local re-enable */ -#define RDC_CMD_SUSPEND 4 /* Local clear */ -#define RDC_CMD_LOG 5 /* Start logging mode */ -#define RDC_CMD_COPY 6 /* Start synching */ -#define RDC_CMD_RECONFIG 7 /* Change the rdc set */ -#define RDC_CMD_TUNABLE 8 /* Change a tunable parameter */ -#define RDC_CMD_WAIT 9 /* Wait for syncs to complete */ -#define RDC_CMD_HEALTH 10 /* Return health state */ -#define RDC_CMD_STATUS 11 /* Single set status */ -#define RDC_CMD_RESET 12 /* reset error or failed status */ -#define RDC_CMD_INITQ 14 /* initialise the disk queue */ -#define RDC_CMD_FLUSHQ 15 /* flush queue for set */ -#define RDC_CMD_ADDQ 16 /* add diskq to a set/group */ -#define RDC_CMD_REMQ 17 /* nice remove a diskq from set/grp */ -#define RDC_CMD_KILLQ 18 /* forced disgard of queue */ -#define RDC_CMD_REPQ 19 /* replace queue */ - - - - - -/* - * Config ioctl options - */ -#define RDC_OPT_SYNC 0x1 /* RDC_CMD_ENABLE, RDC_CMD_RESUME */ -#define RDC_OPT_ASYNC 0x2 /* RDC_CMD_ENABLE, RDC_CMD_RESUME */ -#define RDC_OPT_PRIMARY 0x4 /* All */ -#define RDC_OPT_SECONDARY 0x8 /* All */ -#define RDC_OPT_FORWARD 0x10 /* RDC_CMD_COPY */ -#define RDC_OPT_REVERSE 0x20 /* RDC_CMD_COPY */ -#define RDC_OPT_FULL 0x40 /* RDC_CMD_COPY */ -#define RDC_OPT_UPDATE 0x80 /* RDC_CMD_COPY */ -#define RDC_OPT_SETBMP 0x100 /* RDC_CMD_ENABLE */ -#define RDC_OPT_CLRBMP 0x200 /* RDC_CMD_ENABLE */ -#define RDC_OPT_REVERSE_ROLE 0x400 /* RDC_CMD_RECONFIG */ -#define RDC_OPT_FORCE_QINIT 0x800 /* RDC_CMD_INITQ */ -#define RDC_OPT_SET_QNOBLOCK 0x1000 /* RDC_CMD_TUNABLE */ -#define RDC_OPT_CLR_QNOBLOCK 0x2000 /* RDC_CMD_TUNABLE */ -#define RDC_OPT_FORCE_DISABLE 0x4000 /* RDC_CMD_DISABLE */ - -/* - * RDC flags - */ - -/* - * Passed out by the kernel (status) - */ -#define RDC_ENABLED 0x2 /* RDC enabled */ -#define RDC_PRIMARY 0x4 /* This node is the primary */ -#define RDC_SLAVE 0x8 /* This node is target of the synch */ -#define RDC_VOL_FAILED 0x10 /* Volume is failed */ -#define RDC_BMP_FAILED 0x20 /* Bitmap is failed */ -#define RDC_SYNC_NEEDED 0x40 /* Sync is needed */ -#define RDC_RSYNC_NEEDED 0x80 /* Reverse sync is needed */ -#define RDC_SYNCING 0x100 /* Synch in progress */ -#define RDC_LOGGING 0x200 /* Logging */ -#define RDC_FCAL_FAILED 0x400 /* Direct remote I/O failed */ -#define RDC_ASYNC 0x800 /* Set is in async replicating mode */ -#define RDC_FULL 0x1000 /* Full sync, not an update */ -#define RDC_CLR_AFTERSYNC 0x2000 /* clr bitmap on secondary after sync */ -#define RDC_DISKQ_FAILED 0x4000 /* Diskq I/O has failed */ -#define RDC_QUEUING 0x8000 /* logging, but queueing to disk */ -#ifndef RDC_QNOBLOCK -#define RDC_QNOBLOCK 0x10000 -#endif -#define RDC_SYNC_START 0 -#define RDC_SYNC_DONE 1 -#define RDC_RSYNC_START 2 - -#ifdef _KERNEL - -/* - * urdc->flags vs urdc->mflags usage: - * - * All flags are valid in urdc->flags, in which case the condition - * holds for the specific urdc. - * - * The flags in RDC_MFLAGS can also be in urdc->mflags, in which case - * the condition holds for a urdc somewhere on the many/multi chains - * connected to this urdc. - */ - -#define RDC_GROUP 0x7f8 /* Volume states that affect a group */ - -/* - * Mask of volume flags that are valid in urdc->mflags - */ -#define RDC_MFLAGS (RDC_SLAVE | RDC_RSYNC_NEEDED) - -#define IS_SLAVE(urdc) (rdc_get_mflags(urdc) & RDC_SLAVE) - -/* - * Mask of volume flags that are maintained in sync_flags not flags, - * and protected by rdc_many_lock rather than the group lock. - * This allows code that is operating on one set to change the flags - * of another set. - */ -#define RDC_SFLAGS (RDC_SYNC_NEEDED | RDC_RSYNC_NEEDED | \ - RDC_VOL_FAILED | RDC_CLR_AFTERSYNC) - -/* - * Mask of volume flags that are maintained in bmap_flags not flags, - * and protected by the bmapmutex rather than the group lock. - */ -#define RDC_BFLAGS RDC_BMP_FAILED - -#define RDC_VFLAGS (~(RDC_SFLAGS | RDC_BFLAGS)) - -#define RDC_SYNC_STATE_FLAGS (RDC_LOGGING | RDC_SYNCING | RDC_QUEUING | \ - RDC_ASYNC) - -#define IS_ASYNC(urdc) (rdc_get_vflags(urdc) & RDC_ASYNC) -#define IS_PRIMARY(urdc) (rdc_get_vflags(urdc) & RDC_PRIMARY) -#define IS_SECONDARY(urdc) (!IS_PRIMARY(urdc)) -#define IS_STATE(urdc, state) (rdc_get_vflags(urdc) & (state)) -#define IS_REPLICATING(urdc) (!(rdc_get_vflags(urdc) & RDC_LOGGING) && \ - !(rdc_get_vflags(urdc) & RDC_SYNCING)) - -#endif /* _KERNEL */ - -typedef struct rdc_status { - int nset; /* Number of sets requested/enabled */ - int maxsets; /* Max # of sets allowed today */ - rdc_set_t rdc_set[1]; -} rdc_status_t; - -#ifdef _SYSCALL32 -struct rdc_status32 { - int32_t nset; /* Number of sets requested/enabled */ - int32_t maxsets; /* Max # of sets allowed today */ - struct rdc_set32 rdc_set[1]; -}; -#endif /* _SYSCALL32 */ - -typedef struct rdc_svc_args { - int fd; /* Connection endpoint */ - int nthr; /* Number of server threads */ - char netid[128]; /* Identify transport */ - struct netbuf addrmask; /* Address mask for host */ -} rdc_svc_args_t; - -#ifdef _SYSCALL32 -struct rdc_svc_args32 { - int32_t fd; - int32_t nthr; - char netid[128]; - struct netbuf32 addrmask; -}; -#endif /* _SYSCALL32 */ - -typedef struct rdc_version { - int major; /* Major release number */ - int minor; /* Minor release number */ - int micro; /* Micro release number */ - int baseline; /* Baseline revison number */ -} rdc_version_t; -#ifdef _SYSCALL32 -typedef struct rdc_version32 { - int32_t major; /* Major release number */ - int32_t minor; /* Minor release number */ - int32_t micro; /* Micro release number */ - int32_t baseline; /* Baseline revison number */ -} rdc_version32_t; -#endif - - -#if !defined(_KERNEL) - -#define RDC_IOCTL(cmd, a0, a1, a2, a3, a4, ustatus) \ - rdc_ioctl((long)(cmd), (long)(a0), (long)(a1), (long)(a2), \ - (long)(a3), (long)(a4), (ustatus)) - -extern int rdc_ioctl(long, long, long, long, long, long, spcs_s_info_t); -extern int rdc_ioctl_simple(long, void *); - -#endif /* ! _KERNEL */ - -#ifdef __cplusplus -} -#endif - -#endif /* _RDC_IOCTL_H */ diff --git a/usr/src/uts/common/avs/ns/rdc/rdc_prot.x b/usr/src/uts/common/avs/ns/rdc/rdc_prot.x deleted file mode 100644 index cf9055c186..0000000000 --- a/usr/src/uts/common/avs/ns/rdc/rdc_prot.x +++ /dev/null @@ -1,390 +0,0 @@ -%/* -% * CDDL HEADER START -% * -% * The contents of this file are subject to the terms of the -% * Common Development and Distribution License (the "License"). -% * You may not use this file except in compliance with the License. -% * -% * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -% * or http://www.opensolaris.org/os/licensing. -% * See the License for the specific language governing permissions -% * and limitations under the License. -% * -% * When distributing Covered Code, include this CDDL HEADER in each -% * file and include the License file at usr/src/OPENSOLARIS.LICENSE. -% * If applicable, add the following below this CDDL HEADER, with the -% * fields enclosed by brackets "[]" replaced with your own identifying -% * information: Portions Copyright [yyyy] [name of copyright owner] -% * -% * CDDL HEADER END -% */ -% -% -%/* -% * Copyright 2008 Sun Microsystems, Inc. All rights reserved. -% * Use is subject to license terms. -% */ -% -%/* -% * Auto generated from rdc_prot.x -% */ -% -%/* -% * Network Replicator RPC spec -% */ - -% -%/* -% * We don't define netbuf in RPCL, since it would contain structure member -% * names that would conflict with the definition of struct netbuf in -% * <tiuser.h>. Instead we merely declare the XDR routine xdr_netbuf() here, -% * and implement it ourselves in rpc/rpcb_prot.c. -% */ -%#ifdef __cplusplus -%extern "C" bool_t xdr_netbuf(XDR *, struct netbuf *); -% -%#elif __STDC__ -%extern bool_t xdr_netbuf(XDR *, struct netbuf *); -% -%#else /* K&R C */ -%bool_t xdr_netbuf(); -% -%#endif /* K&R C */ -const RDC_PORT = 121; -const RDC_MAXDATA = 32768; -const RDC_MAXNAMLEN = 64; -const RDC_BMAPBLKSIZE = 1024; -const RDC_MAXADDR = 32; -const RDC_MAXPENDQ = 64; - -%/* -% * Use this to limit the size of the net_pendvec_t array -% * to ~ 32k -% */ -const RDC_PENDQLIM = 1365; -% -%/* -% * Error status -% */ -enum rdcstat { - RDC_OK = 0, - RDCERR_PERM = 1, - RDCERR_NOENT = 2, - RDCERR_NOMEM = 3 -}; - -% -%/* -%* Set state (V4) -%*/ - -struct set_state4 { - opaque netaddr[RDC_MAXADDR]; - opaque rnetaddr[RDC_MAXADDR]; - int netaddrlen; - int rnetaddrlen; - unsigned flag; - opaque pfile[RDC_MAXNAMLEN]; - opaque sfile[RDC_MAXNAMLEN]; -}; - -const RDC_XDR_MAXNAMLEN = RDC_MAXNAMLEN; - -struct set_state { - struct netbuf netaddr; - struct netbuf rnetaddr; - int netaddrlen; - int rnetaddrlen; - unsigned flag; - string pfile<RDC_XDR_MAXNAMLEN>; - string sfile<RDC_XDR_MAXNAMLEN>; -}; - -% -%/* -% * Get size of volume -% */ -struct getsize { - int cd; -}; - -% -%/* -% * Remote read (v5) -% */ -struct rread { - int cd; - int len; - int pos; - int idx; - int flag; -}; - -% -%/* -% * Remote read (v6) -% */ -struct rread6 { - int cd; - int len; - u_longlong_t pos; - int idx; - int flag; -}; - -% -%/* -% * status OK from remote read -% */ -struct readok { - opaque data<RDC_MAXDATA>; -}; -union readres switch (rdcstat status) { -case RDC_OK: - readok reply; -default: - void; -}; - -% -%/* -% * Initiate bit map scoreboard transfer (v5) -% */ -struct bmap { - int cd; - int dual; - int size; -}; - -% -%/* -% * Initiate bit map scoreboard transfer (v6) -% */ -struct bmap6 { - int cd; - int dual; - u_longlong_t size; -}; - -% -%/* -% * Scoreboard bitmap data (v5) -% */ -struct net_bdata { - int cd; - int offset; - int size; - opaque data<RDC_BMAPBLKSIZE>; -}; - -% -%/* -% * Scoreboard bitmap data (v6) -% */ -struct net_bdata6 { - u_longlong_t offset; - int size; - int cd; - int endoblk; - opaque data<RDC_BMAPBLKSIZE>; -}; - -% -%/* -% * Data transfer and allocation (v5) -% */ -struct net_data5 { - int local_cd; - int cd; - int pos; - int len; - int flag; - int idx; - int seq; - int sfba; - int endoblk; - int nfba; - opaque data<RDC_MAXDATA>; -}; - -% -%/* -% * Data transfer and allocation (v6) -% */ -struct net_data6 { - int local_cd; - int cd; - u_longlong_t pos; - u_longlong_t qpos; - u_longlong_t sfba; - int nfba; - int len; - int flag; - int idx; - unsigned int seq; - int endoblk; - opaque data<RDC_MAXDATA>; -}; - - -struct net_pendvec { - u_longlong_t apos; - u_longlong_t qpos; - int alen; - unsigned int seq; - int pindex; -}; -typedef net_pendvec net_pendvec_t; - - - -%/* -% * results returned from a netwrite request. (v6) -% * index = index number of request assigned by server when -% * requests is broken down into smaller chunks. -% * result = 0 request ok. -% * result = 1 request is pending. -% * result < 0 failure, set with -errno. -% * If the vecdata array is not empty, then it contains -% * a list of apos and alen -% * pairs of previously pending requests that have been written. -% */ -struct netwriteres { - int index; - int result; - unsigned int seq; - net_pendvec_t vecdata<RDC_PENDQLIM>; -}; - - - -% -%/* -% * Ping -% */ -struct rdc_ping6 { - opaque p_ifaddr[RDC_MAXADDR]; - opaque s_ifaddr[RDC_MAXADDR]; -}; - -struct rdc_ping { - struct netbuf p_ifaddr; - struct netbuf s_ifaddr; -}; - - -/* - * Remote file service routines - */ - -program RDC_PROGRAM { - - /* - * This is protocol version 5 that shipped with SNDR 3.1 - * We must support this protocol until (protocol - * version 7) is released. - * I.e. N-1 protocol support. - */ - - version RDC_VERSION5 { - - void - RDCPROC_NULL(void) = 0; - - int - RDCPROC_GETSIZE(int) = 2; - - int - RDCPROC_WRITE5(net_data5) = 4; - - readres - RDCPROC_READ5(rread) = 5; - - int - RDCPROC_STATE(set_state4) = 7; - - int - RDCPROC_PING4(rdc_ping6) = 8; - - int - RDCPROC_BMAP(net_bmap) = 9; - - int - RDCPROC_BDATA(net_bdata) = 10; - - int - RDCPROC_GETSTATE4(set_state4) = 12; - } = 5; - - /* - * This is protocol version 6 that shipped with SNDR 3.2 - * We must support this protocol until (protocol - * version 8) is released. - * I.e. N-1 protocol support. - * - * Changed to support multiple transmitting async threads - * (sequence numbers and write reply structure) - * and 64bit datapath. - */ - - version RDC_VERSION6 { - - void - RDCPROC_NULL(void) = 0; - - u_longlong_t - RDCPROC_GETSIZE6(int) = 2; - - netwriteres - RDCPROC_WRITE6(net_data6) = 4; - - readres - RDCPROC_READ6(rread6) = 5; - - int - RDCPROC_STATE(set_state4) = 7; - - int - RDCPROC_PING4(rdc_ping6) = 8; - - int - RDCPROC_BMAP6(net_bmap6) = 9; - - int - RDCPROC_BDATA6(net_bdata6) = 10; - - int - RDCPROC_GETSTATE4(set_state4) = 12; - } = 6; - - version RDC_VERSION7 { - - void - RDCPROC_NULL(void) = 0; - - u_longlong_t - RDCPROC_GETSIZE6(int) = 2; - - netwriteres - RDCPROC_WRITE6(net_data6) = 4; - - readres - RDCPROC_READ6(rread6) = 5; - - int - RDCPROC_STATE(set_state) = 7; - - int - RDCPROC_PING4(rdc_ping) = 8; - - int - RDCPROC_BMAP6(net_bmap6) = 9; - - int - RDCPROC_BDATA6(net_bdata6) = 10; - - int - RDCPROC_GETSTATE4(set_state) = 12; - } = 7; - -} = 100143; diff --git a/usr/src/uts/common/avs/ns/rdc/rdc_stub.c b/usr/src/uts/common/avs/ns/rdc/rdc_stub.c deleted file mode 100644 index c1ef2dc502..0000000000 --- a/usr/src/uts/common/avs/ns/rdc/rdc_stub.c +++ /dev/null @@ -1,110 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#include <sys/types.h> -#include <sys/cmn_err.h> -#include <sys/modctl.h> -#include <sys/errno.h> - -#include <rpc/auth.h> -#include <rpc/svc.h> - -#include <sys/nsctl/nsctl.h> -#include <sys/nsctl/nsvers.h> -#include "rdc_stub.h" - -static void null_dispatch(struct svc_req *req, SVCXPRT *xprt); -static void (*dispatch)(struct svc_req *, SVCXPRT *) = null_dispatch; - -/* - * Solaris module setup. - */ -extern struct mod_ops mod_miscops; - -static struct modlmisc modlmisc = { - &mod_miscops, /* Type of module */ - "nws:Remote Mirror kRPC Stub:" ISS_VERSION_STR -}; - -static struct modlinkage modlinkage = { - MODREV_1, - &modlmisc, - NULL -}; - - -int -_init(void) -{ - return (mod_install(&modlinkage)); -} - - -int -_fini(void) -{ - /* unload is forbidden */ - return (EBUSY); -} - - -int -_info(struct modinfo *modinfop) -{ - return (mod_info(&modlinkage, modinfop)); -} - - -/* - * rdcstub_dispatch is the place holder for rdcsrv_dispatch. - * rdcsrv registers this function as kRPC dispatch function. - * If rdcsrv is unloaded (uninstall package), then dispatch - * is set to null_dispatch - */ -void -rdcstub_dispatch(struct svc_req *req, SVCXPRT *xprt) -{ - (*dispatch)(req, xprt); -} - -/* ARGSUSED */ -static void -null_dispatch(struct svc_req *req, SVCXPRT *xprt) -{ - svcerr_noproc(xprt); -} - -void -rdcstub_set_dispatch(void (*disp)(struct svc_req *, SVCXPRT *)) -{ - ASSERT(disp != NULL); - dispatch = disp; -} - -void -rdcstub_unset_dispatch() -{ - dispatch = null_dispatch; -} diff --git a/usr/src/uts/common/avs/ns/rdc/rdc_stub.h b/usr/src/uts/common/avs/ns/rdc/rdc_stub.h deleted file mode 100644 index 19b71eb4bf..0000000000 --- a/usr/src/uts/common/avs/ns/rdc/rdc_stub.h +++ /dev/null @@ -1,45 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _RDC_STUB_H -#define _RDC_STUB_H - -#ifdef __cplusplus -extern "C" { -#endif - -#ifdef _KERNEL - -extern void rdcstub_dispatch(struct svc_req *, SVCXPRT *); -extern void rdcstub_set_dispatch(void (*)(struct svc_req *, SVCXPRT *)); -extern void rdcstub_unset_dispatch(); - -#endif /* _KERNEL */ - -#ifdef __cplusplus -} -#endif - -#endif /* _RDC_STUB_H */ diff --git a/usr/src/uts/common/avs/ns/rdc/rdc_subr.c b/usr/src/uts/common/avs/ns/rdc/rdc_subr.c deleted file mode 100644 index de5e1dd50a..0000000000 --- a/usr/src/uts/common/avs/ns/rdc/rdc_subr.c +++ /dev/null @@ -1,241 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#include <sys/types.h> -#include <sys/ksynch.h> -#include <sys/errno.h> -#include <sys/debug.h> -#include <sys/cmn_err.h> -#include <sys/kmem.h> -#include <sys/errno.h> - -#ifdef _SunOS_2_6 -/* - * on 2.6 both dki_lock.h and rpc/types.h define bool_t so we - * define enum_t here as it is all we need from rpc/types.h - * anyway and make it look like we included it. Yuck. - */ -#define _RPC_TYPES_H -typedef int enum_t; -#else -#ifndef DS_DDICT -#include <rpc/types.h> -#endif -#endif /* _SunOS_2_6 */ - -#include <sys/nsc_thread.h> -#include <sys/nsctl/nsctl.h> -#include "rdc_io.h" -#include "rdc_ioctl.h" -#include "rdc_prot.h" - -/* - * Initialize a netbuf suitable for - * describing an address - */ - -void -init_rdc_netbuf(struct netbuf *nbuf) -{ - nbuf->buf = kmem_zalloc(RDC_MAXADDR, KM_SLEEP); - nbuf->maxlen = RDC_MAXADDR; - nbuf->len = 0; -} - -/* - * Free a netbuf - */ - -void -free_rdc_netbuf(struct netbuf *nbuf) -{ - if (!(nbuf) || !(nbuf->buf)) { -#ifdef DEBUG - cmn_err(CE_PANIC, "Null netbuf in free_rdc_netbuf"); -#endif - return; - } - kmem_free(nbuf->buf, nbuf->maxlen); - nbuf->buf = NULL; - nbuf->maxlen = 0; - nbuf->len = 0; -} - - -/* - * Duplicate a netbuf, must be followed by a free_rdc_netbuf(). - */ -void -dup_rdc_netbuf(const struct netbuf *from, struct netbuf *to) -{ - init_rdc_netbuf(to); - to->len = from->len; - - if (from->len > to->maxlen) { - cmn_err(CE_WARN, "!dup_rdc_netbuf: from->len %d, to->maxlen %d", - from->len, to->maxlen); - } - - bcopy(from->buf, to->buf, (size_t)from->len); -} - - -#ifdef DEBUG -void -rdc_print_svinfo(rdc_srv_t *svp, char *str) -{ - int i; - - if (svp == NULL) - return; - - cmn_err(CE_NOTE, "!rdc %s servinfo: %p\n", str, (void *) svp); - - if (svp->ri_knconf != NULL) { - cmn_err(CE_NOTE, "!knconf: semantics %d", - svp->ri_knconf->knc_semantics); - cmn_err(CE_NOTE, "! protofmly %s", - svp->ri_knconf->knc_protofmly); - cmn_err(CE_NOTE, "! proto %s", - svp->ri_knconf->knc_proto); - cmn_err(CE_NOTE, "! rdev %lx", - svp->ri_knconf->knc_rdev); - } - - for (i = 0; i < svp->ri_addr.len; i++) - printf("%u ", svp->ri_addr.buf[i]); - - cmn_err(CE_NOTE, "!\naddr: len %d buf %p\n", - svp->ri_addr.len, (void *) svp->ri_addr.buf); - cmn_err(CE_NOTE, "!host: %s\n", svp->ri_hostname); -} -#endif /* DEBUG */ - -/* - * Initialize an rdc servinfo - * Contains all the protocol we need to do a client rpc - * A chain of rdc_srv_t indicates a one to many - */ - -rdc_srv_t * -rdc_create_svinfo(char *host, struct netbuf *svaddr, struct knetconfig *conf) -{ - rdc_srv_t *nvp; - int hlen = strlen(host) + 1; - - if (conf == NULL) { - return (NULL); - } - - if (host == NULL) { - return (NULL); - } - - nvp = kmem_zalloc(sizeof (*nvp), KM_SLEEP); - nvp->ri_knconf = kmem_alloc(sizeof (*nvp->ri_knconf), KM_SLEEP); - nvp->ri_hostname = kmem_zalloc(hlen, KM_SLEEP); - - if (nvp == NULL || nvp->ri_hostname == NULL || nvp->ri_knconf == NULL) { - rdc_destroy_svinfo(nvp); - return (NULL); - } - - nvp->ri_hostnamelen = hlen; - - bcopy((void *)conf, (void *)nvp->ri_knconf, sizeof (*nvp->ri_knconf)); - nvp->ri_knconf->knc_protofmly = kmem_zalloc(KNC_STRSIZE + 1, KM_SLEEP); - nvp->ri_knconf->knc_proto = kmem_zalloc(KNC_STRSIZE + 1, KM_SLEEP); - - if (nvp->ri_knconf->knc_protofmly == NULL || - nvp->ri_knconf->knc_proto == NULL) { - rdc_destroy_svinfo(nvp); - return (NULL); - - } - - (void) strncpy(nvp->ri_knconf->knc_protofmly, conf->knc_protofmly, - KNC_STRSIZE); - (void) strncpy(nvp->ri_knconf->knc_proto, conf->knc_proto, KNC_STRSIZE); - - dup_rdc_netbuf(svaddr, &nvp->ri_addr); - - nvp->ri_secdata = NULL; /* For now */ - (void) strncpy(nvp->ri_hostname, host, hlen); -#ifdef DEBUG_IP - rdc_print_svinfo(nvp, "!create"); -#endif - return (nvp); -} - -void -rdc_destroy_svinfo(rdc_srv_t *svp) -{ - if (svp == NULL) - return; - - if (svp->ri_addr.buf && svp->ri_addr.maxlen) - free_rdc_netbuf(&(svp->ri_addr)); - - if (svp->ri_knconf->knc_protofmly) - kmem_free(svp->ri_knconf->knc_protofmly, KNC_STRSIZE + 1); - - if (svp->ri_knconf->knc_proto) - kmem_free(svp->ri_knconf->knc_proto, KNC_STRSIZE + 1); - - if (svp->ri_knconf) - kmem_free(svp->ri_knconf, sizeof (*svp->ri_knconf)); - - kmem_free(svp, sizeof (*svp)); -} - -/* - * rdc_netbuf_toint - * Returns oldsytle ipv4 RDC ver 3 addresses for RPC protocol from netbuf - * Note: This would never be called in the case of IPv6 and a program - * mismatch ie ver 3 to ver 4 - */ -int -rdc_netbuf_toint(struct netbuf *nb) -{ - int ret; - if (nb->len > RDC_MAXADDR) - cmn_err(CE_NOTE, "!rdc_netbuf_toint: bad size %d", nb->len); - - switch (nb->len) { - case 4: - bcopy(nb->buf, (char *)&ret, sizeof (int)); - return (ret); - - case 8: - case 16: - case 32: - bcopy(&nb->buf[4], (char *)&ret, sizeof (int)); - return (ret); - - default: - cmn_err(CE_NOTE, "!rdc_netbuf_toint: size %d", nb->len); - } - return (0); -} diff --git a/usr/src/uts/common/avs/ns/rdc/rdc_svc.c b/usr/src/uts/common/avs/ns/rdc/rdc_svc.c deleted file mode 100644 index ea1425055d..0000000000 --- a/usr/src/uts/common/avs/ns/rdc/rdc_svc.c +++ /dev/null @@ -1,3079 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -/* - * kRPC Server for sndr - */ - -#include <sys/types.h> -#include <sys/ksynch.h> -#include <sys/cmn_err.h> -#include <sys/kmem.h> -#include <sys/cred.h> -#include <sys/conf.h> -#include <sys/stream.h> -#include <sys/errno.h> - -#include <sys/unistat/spcs_s.h> -#include <sys/unistat/spcs_s_k.h> -#include <sys/unistat/spcs_errors.h> - -#ifdef _SunOS_2_6 -/* - * on 2.6 both dki_lock.h and rpc/types.h define bool_t so we - * define enum_t here as it is all we need from rpc/types.h - * anyway and make it look like we included it. Yuck. - */ -#define _RPC_TYPES_H -typedef int enum_t; -#else -#ifndef DS_DDICT -#include <rpc/types.h> -#endif -#endif /* _SunOS_2_6 */ - -#ifndef DS_DDICT -#include <rpc/auth.h> -#include <rpc/svc.h> -#include <rpc/xdr.h> -#endif -#include <sys/ddi.h> -#include <sys/nsc_thread.h> -#ifdef DS_DDICT -#include <sys/nsctl/contract.h> -#endif -#include <sys/nsctl/nsctl.h> -#include <sys/ncall/ncall.h> - -#include <sys/sdt.h> /* dtrace is S10 or later */ - -#include "rdc_io.h" -#include "rdc_bitmap.h" -#include "rdcsrv.h" - -static rdc_sleepq_t *rdc_newsleepq(); -static void rdc_delsleepq(rdc_sleepq_t *); -static int rdc_sleepq(rdc_group_t *, rdc_sleepq_t *); -static int rdc_combywrite(rdc_k_info_t *, nsc_buf_t *); -static int rdc_writemaxfba(rdc_k_info_t *, rdc_u_info_t *, - rdc_net_dataset_t *, uint_t, int); -static void rdc_setbitind(int *, net_pendvec_t *, rdc_net_dataset_t *, uint_t, - int, int); -static void rdc_dopending(rdc_group_t *, netwriteres *); -static nsc_vec_t *rdc_dset2vec(rdc_net_dataset_t *); -static int rdc_combyread(rdc_k_info_t *, rdc_u_info_t *, nsc_buf_t *); -static int rdc_readmaxfba(int, nsc_off_t, nsc_size_t, int); -static int rdc_dsetcopy(rdc_net_dataset_t *, nsc_vec_t *, nsc_off_t, nsc_size_t, - char *, int, int); - -/* direction for dsetcopy() */ -#define COPY_IN 1 /* copy data into the rpc buffer */ -#define COPY_OUT 2 /* copy data out of the rpc buffer */ - -#define MAX_EINTR_COUNT 1000 - -static int rdc_rread_slow; -static rdcsrv_t rdc_srvtab[]; - -#ifdef DEBUG -static int rdc_netwrite6; -static int rdc_stall0; -static int rdc_sleepcnt; -int rdc_datasetcnt; -#endif - - -int -_rdc_sync_event_notify(int operation, char *volume, char *group) -{ - int ack = 0; - clock_t time; - - mutex_enter(&rdc_sync_mutex); - mutex_enter(&rdc_sync_event.mutex); - - if (rdc_sync_event.daemon_waiting) { - rdc_sync_event.daemon_waiting = 0; - rdc_sync_event.event = operation; - (void) strncpy(rdc_sync_event.master, volume, NSC_MAXPATH); - (void) strncpy(rdc_sync_event.group, group, NSC_MAXPATH); - - cv_signal(&rdc_sync_event.cv); - - rdc_sync_event.kernel_waiting = 1; - time = cv_reltimedwait_sig(&rdc_sync_event.done_cv, - &rdc_sync_event.mutex, rdc_sync_event_timeout, - TR_CLOCK_TICK); - if (time == (clock_t)0 || time == (clock_t)-1) { - /* signalled or timed out */ - ack = 0; - } else { - if (rdc_sync_event.ack) - ack = 1; - else - ack = -1; - } - } - mutex_exit(&rdc_sync_event.mutex); - mutex_exit(&rdc_sync_mutex); - return (ack); -} - - -int -_rdc_sync_event_wait(void *arg0, void *arg1, int mode, spcs_s_info_t kstatus, - int *rvp) -{ - int rc = 0; - static char master[NSC_MAXPATH]; - - master[0] = '\0'; - *rvp = 0; - if (ddi_copyin(arg0, master, NSC_MAXPATH, mode)) - return (EFAULT); - - mutex_enter(&rdc_sync_event.mutex); - - if (rdc_sync_event.kernel_waiting && - (rdc_sync_event.lbolt - nsc_lbolt() < rdc_sync_event_timeout)) { - /* We haven't been away too long */ - if (master[0]) - rdc_sync_event.ack = 1; - else - rdc_sync_event.ack = 0; - rdc_sync_event.kernel_waiting = 0; - cv_signal(&rdc_sync_event.done_cv); - } - - rdc_sync_event.daemon_waiting = 1; - if (cv_wait_sig(&rdc_sync_event.cv, &rdc_sync_event.mutex) == 0) { - rdc_sync_event.daemon_waiting = 0; - rc = EAGAIN; - spcs_s_add(kstatus, rc); - } else { - (void) ddi_copyout(rdc_sync_event.master, arg0, NSC_MAXPATH, - mode); - (void) ddi_copyout(rdc_sync_event.group, arg1, NSC_MAXPATH, - mode); - *rvp = rdc_sync_event.event; - } - rdc_sync_event.lbolt = nsc_lbolt(); - mutex_exit(&rdc_sync_event.mutex); - - return (rc); -} - - -static int -rdc_allow_sec_sync(rdc_u_info_t *urdc, int option) -{ - rdc_k_info_t *krdc = &rdc_k_info[urdc->index]; - rdc_k_info_t *ktmp; - rdc_u_info_t *utmp; - - if (!IS_MULTI(krdc)) - return (0); - - rdc_many_enter(krdc); - - krdc = krdc->multi_next; - urdc = &rdc_u_info[krdc->index]; - - if (!IS_ENABLED(urdc)) { - rdc_many_exit(krdc); - return (0); - } - - if (option == CCIO_RSYNC) { - - /* Reverse sync */ - - if (rdc_get_mflags(urdc) & RDC_RSYNC_NEEDED) { - /* - * Reverse sync needed or in progress. - */ - rdc_many_exit(krdc); - return (-1); - } - } else { - ASSERT(option == CCIO_SLAVE); - - /* Forward sync */ - - if (rdc_get_mflags(urdc) & RDC_SLAVE) { - /* - * Reverse syncing is bad, as that means that data - * is already flowing to the target of the requested - * sync operation. - */ - rdc_many_exit(krdc); - return (-1); - } - - /* - * Clear "reverse sync needed" on all 1-many volumes. - * The data on them will be updated from the primary of this - * requested sync operation, so the aborted reverse sync need - * not be completed. - */ - - if ((rdc_get_mflags(urdc) & RDC_RSYNC_NEEDED) || - (rdc_get_vflags(urdc) & RDC_VOL_FAILED)) { - rdc_clr_mflags(urdc, RDC_RSYNC_NEEDED); - rdc_clr_flags(urdc, RDC_VOL_FAILED); - rdc_write_state(urdc); - } - if (IS_MANY(krdc)) { - for (ktmp = krdc->many_next; ktmp != krdc; - ktmp = ktmp->many_next) { - utmp = &rdc_u_info[ktmp->index]; - if (!IS_ENABLED(utmp)) - continue; - if (rdc_get_mflags(utmp) & RDC_RSYNC_NEEDED) { - rdc_clr_mflags(utmp, RDC_RSYNC_NEEDED); - rdc_write_state(utmp); - } - } - } - } - - rdc_many_exit(krdc); - - return (0); -} - - -/* - * r_net_null - * Proc 0 Null action - */ -static void -r_net_null(SVCXPRT *xprt) -{ - (void) svc_sendreply(xprt, xdr_void, 0); -} - -/* - * r_net_read - */ -static void -r_net_read(SVCXPRT *xprt) -{ - readres resp; - rdc_u_info_t *urdc; - struct rread diskio; - char *buffer = NULL; - uchar_t *sv_addr; - nsc_vec_t *vec; - int pos, st; - int nocache; - int sv_len; - nsc_vec_t *vector = NULL; - rdc_net_dataset_t *dset = NULL; - int vecsz = 0; - - st = SVC_GETARGS(xprt, xdr_rread, (char *)&diskio); - if (!st) { - (void) svc_sendreply(xprt, xdr_int, (char *)&st); - return; - } - nocache = (diskio.flag & RDC_RREAD_FAIL) ? 0 : NSC_NOCACHE; - - if ((diskio.cd >= rdc_max_sets) || (diskio.cd < 0)) { - resp.rr_status = RDCERR_NOENT; - (void) svc_sendreply(xprt, xdr_readres, (char *)&resp); -#ifdef DEBUG - cmn_err(CE_NOTE, - "!r_net_read: EPROTO cd out or not enabled"); -#endif - return; - } - - urdc = &rdc_u_info[diskio.cd]; - - if (diskio.flag & RDC_RREAD_START) { - /* setup rpc */ - if (!IS_ENABLED(urdc)) { - st = 0; - (void) svc_sendreply(xprt, xdr_int, (char *)&st); - return; - } - st = rdc_readmaxfba(diskio.cd, diskio.pos, diskio.len, - nocache); - - if (!svc_sendreply(xprt, xdr_int, (char *)&st)) { - if (st != 0) { - rdc_net_dataset_t *dset; - if (dset = rdc_net_get_set(diskio.cd, st)) { - rdc_net_del_set(diskio.cd, dset); - } else { - cmn_err(CE_NOTE, "!r_net_read: get_set " - "has failed in cleanup"); - } - } - } - return; - } - - /* data rpc */ - -#ifdef DEBUG - if ((diskio.flag & RDC_RREAD_DATA) == 0) { - cmn_err(CE_WARN, "!r_net_read: received non-DATA rpc! flag %x", - diskio.flag); - } -#endif - - dset = rdc_net_get_set(diskio.cd, diskio.idx); - if (dset) { - vector = rdc_dset2vec(dset); - } - if (vector == NULL) { - resp.rr_status = RDCERR_NOMEM; - (void) svc_sendreply(xprt, xdr_readres, (char *)&resp); - goto cleanup; - } - vecsz = (dset->nitems + 1) * sizeof (nsc_vec_t); - - if (!IS_ENABLED(urdc)) { - resp.rr_status = RDCERR_NOENT; - (void) svc_sendreply(xprt, xdr_readres, (char *)&resp); - goto cleanup; - } - resp.rr_status = RDC_OK; - - /* find place in vector */ - vec = vector; - pos = diskio.pos - dset->pos; - - for (; pos >= FBA_NUM(vec->sv_len); vec++) - pos -= FBA_NUM(vec->sv_len); - - sv_addr = vec->sv_addr + FBA_SIZE(pos); - sv_len = vec->sv_len - FBA_SIZE(pos); - - /* - * IF the data is in a single sb_vec entry - * THEN - * we can just point to that - * ELSE - * we have to alloc a local buffer, - * copy the data in and the point to - * the local buffer. - */ - - if (sv_len >= FBA_SIZE(diskio.len)) { - /* fast */ - resp.rr_data = (char *)sv_addr; - resp.rr_bufsize = FBA_SIZE(diskio.len); - } else { - /* slow */ - rdc_rread_slow++; /* rough count */ - resp.rr_bufsize = FBA_SIZE(diskio.len); - buffer = kmem_alloc(resp.rr_bufsize, KM_NOSLEEP); - if (!buffer) { - resp.rr_status = RDCERR_NOMEM; - } else { - resp.rr_data = buffer; - if (!rdc_dsetcopy(dset, vector, diskio.pos, diskio.len, - resp.rr_data, resp.rr_bufsize, COPY_IN)) { - resp.rr_status = RDCERR_NOMEM; /* ??? */ - } - } - } - - st = svc_sendreply(xprt, xdr_readres, (char *)&resp); /* send data */ - -cleanup: - - if (dset) { - if (!st || - (diskio.flag & RDC_RREAD_END) || - (resp.rr_status != RDC_OK)) { - /* - * RPC reply failed, OR - * Last RPC for this IO operation, OR - * We are failing this IO operation. - * - * Do cleanup. - */ - rdc_net_del_set(diskio.cd, dset); - } else { - rdc_net_put_set(diskio.cd, dset); - } - } - - if (buffer) - kmem_free(buffer, resp.rr_bufsize); - if (vector) { - kmem_free(vector, vecsz); - RDC_DSMEMUSE(-vecsz); - } -} - -/* - * r_net_read (v6) - */ -static void -r_net_read6(SVCXPRT *xprt) -{ - readres resp; - rdc_u_info_t *urdc; - struct rread6 diskio; - char *buffer = NULL; - uchar_t *sv_addr; - nsc_vec_t *vec; - int pos, st; - int nocache; - int sv_len; - nsc_vec_t *vector = NULL; - rdc_net_dataset_t *dset = NULL; - int vecsz = 0; - - st = SVC_GETARGS(xprt, xdr_rread6, (char *)&diskio); - if (!st) { - (void) svc_sendreply(xprt, xdr_int, (char *)&st); - return; - } - nocache = (diskio.flag & RDC_RREAD_FAIL) ? 0 : NSC_NOCACHE; - - if ((diskio.cd >= rdc_max_sets) || (diskio.cd < 0)) { - resp.rr_status = RDCERR_NOENT; - (void) svc_sendreply(xprt, xdr_readres, (char *)&resp); -#ifdef DEBUG - cmn_err(CE_NOTE, "!r_net_read6: EPROTO cd out or not enabled"); -#endif - return; - } - - urdc = &rdc_u_info[diskio.cd]; - - if (diskio.flag & RDC_RREAD_START) { - /* setup rpc */ - if (!IS_ENABLED(urdc)) { - st = 0; - (void) svc_sendreply(xprt, xdr_int, (char *)&st); - return; - } - st = rdc_readmaxfba(diskio.cd, diskio.pos, diskio.len, - nocache); - - if (!svc_sendreply(xprt, xdr_int, (char *)&st)) { - if (st != 0) { - rdc_net_dataset_t *dset; - if (dset = rdc_net_get_set(diskio.cd, st)) { - rdc_net_del_set(diskio.cd, dset); - } else { - cmn_err(CE_NOTE, "!read6: get_set " - "has failed in cleanup"); - } - } - } - return; - } - - /* data rpc */ - -#ifdef DEBUG - if ((diskio.flag & RDC_RREAD_DATA) == 0) { - cmn_err(CE_WARN, "!read6: received non-DATA rpc! flag %x", - diskio.flag); - } -#endif - - dset = rdc_net_get_set(diskio.cd, diskio.idx); - if (dset) { - vector = rdc_dset2vec(dset); - } - if (vector == NULL) { - resp.rr_status = RDCERR_NOMEM; - (void) svc_sendreply(xprt, xdr_readres, (char *)&resp); - goto cleanup; - } - vecsz = (dset->nitems + 1) * sizeof (nsc_vec_t); - - if (!IS_ENABLED(urdc)) { - resp.rr_status = RDCERR_NOENT; - (void) svc_sendreply(xprt, xdr_readres, (char *)&resp); - goto cleanup; - } - resp.rr_status = RDC_OK; - - /* find place in vector */ - vec = vector; - pos = diskio.pos - dset->pos; - - for (; pos >= FBA_NUM(vec->sv_len); vec++) - pos -= FBA_NUM(vec->sv_len); - - sv_addr = vec->sv_addr + FBA_SIZE(pos); - sv_len = vec->sv_len - FBA_SIZE(pos); - - /* - * IF the data is in a single sb_vec entry - * THEN - * we can just point to that - * ELSE - * we have to alloc a local buffer, - * copy the data in and the point to - * the local buffer. - */ - - if (sv_len >= FBA_SIZE(diskio.len)) { - /* fast */ - resp.rr_data = (char *)sv_addr; - resp.rr_bufsize = FBA_SIZE(diskio.len); - } else { - /* slow */ - rdc_rread_slow++; /* rough count */ - resp.rr_bufsize = FBA_SIZE(diskio.len); - buffer = kmem_alloc(resp.rr_bufsize, KM_NOSLEEP); - if (!buffer) { - resp.rr_status = RDCERR_NOMEM; - } else { - resp.rr_data = buffer; - if (!rdc_dsetcopy(dset, vector, diskio.pos, diskio.len, - resp.rr_data, resp.rr_bufsize, COPY_IN)) { - resp.rr_status = RDCERR_NOMEM; /* ??? */ - } - } - } - - st = svc_sendreply(xprt, xdr_readres, (char *)&resp); /* send data */ - -cleanup: - - if (dset) { - if (!st || - (diskio.flag & RDC_RREAD_END) || - (resp.rr_status != RDC_OK)) { - /* - * RPC reply failed, OR - * Last RPC for this IO operation, OR - * We are failing this IO operation. - * - * Do cleanup. - */ - rdc_net_del_set(diskio.cd, dset); - } else { - rdc_net_put_set(diskio.cd, dset); - } - } - - if (buffer) - kmem_free(buffer, resp.rr_bufsize); - if (vector) { - kmem_free(vector, vecsz); - RDC_DSMEMUSE(-vecsz); - } -} - -/* - * r_net_write (Version 5) - * 0 reply indicates error - * >0 reply indicates a net handle index - * <0 reply indicates errno - * ret net handle index - * ret2 general error - * ret3 multi-hop errors (never returned) - */ -static void -r_net_write5(SVCXPRT *xprt) -{ - rdc_k_info_t *krdc; - rdc_u_info_t *urdc; - struct net_data5 diskio; - rdc_net_dataset_t *dset; - rdc_net_dataitem_t *ditem; - int nocache; - int ret = 0; - int ret2 = 0; - int st; - - krdc = NULL; - diskio.data.data_val = kmem_alloc(RDC_MAXDATA, KM_NOSLEEP); - - if (!diskio.data.data_val) { - ret2 = ENOMEM; - goto out; - } - RDC_DSMEMUSE(RDC_MAXDATA); - st = SVC_GETARGS(xprt, xdr_net_data5, (char *)&diskio); - if (!st) { - ret2 = ENOMEM; -#ifdef DEBUG - cmn_err(CE_NOTE, "!r_net_write5:SVC_GETARGS failed: st %d", st); -#endif - goto out; - } - if ((diskio.cd >= rdc_max_sets) || (diskio.cd < 0)) { - ret2 = EPROTO; -#ifdef DEBUG - cmn_err(CE_NOTE, "!r_net_write6: EPROTO cd out or not enabled"); -#endif - goto out; - } - - nocache = (diskio.flag & RDC_RWRITE_FAIL) ? 0 : NSC_NOCACHE; - krdc = &rdc_k_info[diskio.cd]; - urdc = &rdc_u_info[diskio.cd]; - - if (!IS_ENABLED(urdc) || IS_STATE(urdc, RDC_LOGGING)) { - ret2 = EPROTO; -#ifdef DEBUG - cmn_err(CE_NOTE, "!r_net_write6: cd logging / not enabled (%x)", - rdc_get_vflags(urdc)); -#endif - krdc = NULL; /* so we don't try to unqueue kstat entry */ - goto out; - } - - if (krdc->io_kstats) { - mutex_enter(krdc->io_kstats->ks_lock); - kstat_runq_enter(KSTAT_IO_PTR(krdc->io_kstats)); - mutex_exit(krdc->io_kstats->ks_lock); - } - - - /* -1 index says allocate a buffer */ - if (diskio.idx < 0) { - dset = rdc_net_add_set(diskio.cd); - if (dset == NULL) { -#ifdef DEBUG - cmn_err(CE_NOTE, "!r_net_write5: " - "failed to add dataset"); -#endif - ret2 = EIO; - goto out; - } else { - ret = dset->id; - dset->pos = diskio.pos; - dset->fbalen = diskio.len; - diskio.idx = ret; - } - ditem = kmem_alloc(sizeof (rdc_net_dataitem_t), KM_NOSLEEP); - if (ditem == NULL) { - ret2 = ENOMEM; - goto out; - } - RDC_DSMEMUSE(sizeof (rdc_net_dataitem_t)); - /* - * If this is a single transfer, then we don't - * need to allocate any memory for the data, - * just point the ditem data pointer to the - * existing buffer. - */ - ditem->next = NULL; - if (diskio.endoblk) { - ditem->dptr = diskio.data.data_val; - /* - * So we don't free it twice. - */ - diskio.data.data_val = NULL; - ditem->len = diskio.data.data_len; - ditem->mlen = RDC_MAXDATA; - } else { - /* - * Allocate the memory for the complete - * transfer. - */ - ditem->dptr = kmem_alloc(FBA_SIZE(diskio.len), - KM_NOSLEEP); - if (ditem->dptr == NULL) { - ret2 = ENOMEM; - goto out; - } - RDC_DSMEMUSE(FBA_SIZE(diskio.len)); - ditem->len = FBA_SIZE(diskio.len); - ditem->mlen = ditem->len; - - /* - * Copy the data to the new buffer. - */ - ASSERT(diskio.data.data_len == FBA_SIZE(diskio.nfba)); - bcopy(diskio.data.data_val, ditem->dptr, - diskio.data.data_len); - /* - * free the old data buffer. - */ - kmem_free(diskio.data.data_val, RDC_MAXDATA); - RDC_DSMEMUSE(-RDC_MAXDATA); - diskio.data.data_val = NULL; - } - dset->head = ditem; - dset->tail = ditem; - dset->nitems++; - } else { - ret = diskio.idx; - dset = rdc_net_get_set(diskio.cd, diskio.idx); - if (dset == NULL) { - ret2 = EPROTO; -#ifdef DEBUG - cmn_err(CE_NOTE, - "!r_net_write5: net_get_set failed cd %d idx %d", - diskio.cd, diskio.idx); -#endif - goto out; - } - /* - * We have to copy the data from the rpc buffer - * to the data in ditem. - */ - ditem = dset->head; - bcopy(diskio.data.data_val, (char *)ditem->dptr + - FBA_SIZE(diskio.sfba - diskio.pos), diskio.data.data_len); - - kmem_free(diskio.data.data_val, RDC_MAXDATA); - RDC_DSMEMUSE(-RDC_MAXDATA); - diskio.data.data_val = NULL; - } - ASSERT(dset); - - if (diskio.endoblk) { - ret2 = rdc_writemaxfba(krdc, urdc, dset, diskio.seq, nocache); - rdc_net_del_set(diskio.cd, dset); - dset = NULL; - } -out: - if (!RDC_SUCCESS(ret2)) { - if (ret2 > 0) - ret2 = -ret2; - DTRACE_PROBE1(rdc_svcwrite5_err_ret2, int, ret2); - st = svc_sendreply(xprt, xdr_int, (char *)&ret2); - } else - st = svc_sendreply(xprt, xdr_int, (char *)&ret); - - if (krdc && krdc->io_kstats && ret2 != ENOMEM) { - mutex_enter(krdc->io_kstats->ks_lock); - kstat_runq_exit(KSTAT_IO_PTR(krdc->io_kstats)); - mutex_exit(krdc->io_kstats->ks_lock); - } - /* - * On Error we must cleanup. - * If we have a handle, free it. - * If we have a network handle, free it. - */ - if (!st || !RDC_SUCCESS(ret2)) { -#ifdef DEBUG - cmn_err(CE_WARN, "!r_net_write5 error case? st %x ret %d", - st, ret2); -#endif - if (dset) { - rdc_net_del_set(diskio.cd, dset); - } - - } else { - if (dset) { - rdc_net_put_set(diskio.cd, dset); - } - } - if (diskio.data.data_val) { - kmem_free(diskio.data.data_val, RDC_MAXDATA); - RDC_DSMEMUSE(-RDC_MAXDATA); - } -} - -/* - * r_net_write (Version 6) - * index 0 = error, or net handle index. - * result = 0 , ok. - * result = 1, pending write. - * result < 0 error, and is the -errno. - * ret net handle index. - * ret2 general error. - */ -static void -r_net_write6(SVCXPRT *xprt) -{ - rdc_k_info_t *krdc; - rdc_u_info_t *urdc; - rdc_group_t *group; - struct net_data6 diskio; - struct netwriteres netret; - rdc_net_dataset_t *dset; - rdc_net_dataitem_t *ditem; - int ret = 0; - int ret2 = 0; - int st; - int nocache; - - netret.vecdata.vecdata_val = NULL; - netret.vecdata.vecdata_len = 0; - dset = NULL; - krdc = NULL; - diskio.data.data_val = kmem_alloc(RDC_MAXDATA, KM_NOSLEEP); - - if (!diskio.data.data_val) { - ret2 = ENOMEM; - goto out; - } - RDC_DSMEMUSE(RDC_MAXDATA); - st = SVC_GETARGS(xprt, xdr_net_data6, (char *)&diskio); - if (!st) { - ret2 = ENOMEM; -#ifdef DEBUG - cmn_err(CE_NOTE, - "!r_net_write6:SVC_GETARGS failed: st %d", st); -#endif - goto out; - } - - if ((diskio.cd >= rdc_max_sets) || (diskio.cd < 0)) { - ret2 = EPROTO; -#ifdef DEBUG - cmn_err(CE_NOTE, "!r_net_write6: EPROTO cd out or not enabled"); -#endif - goto out; - } - - nocache = (diskio.flag & RDC_RWRITE_FAIL) ? 0 : NSC_NOCACHE; - netret.seq = diskio.seq; - - krdc = &rdc_k_info[diskio.cd]; - urdc = &rdc_u_info[diskio.cd]; - - if (!IS_ENABLED(urdc) || IS_STATE(urdc, RDC_LOGGING)) { - ret2 = EPROTO; -#ifdef DEBUG - cmn_err(CE_NOTE, - "!r_net_write6: cd logging or not enabled (%x)", - rdc_get_vflags(urdc)); -#endif - krdc = NULL; /* so we don't try to unqueue kstat entry */ - goto out; - } - - group = krdc->group; - if (group == NULL) { - ret2 = EIO; -#ifdef DEBUG - cmn_err(CE_NOTE, - "!r_net_write6: No group structure for set %s:%s", - urdc->secondary.intf, urdc->secondary.file); -#endif - krdc = NULL; /* so we don't try to unqueue kstat entry */ - goto out; - } - -#ifdef DEBUG - if (rdc_netwrite6) { - cmn_err(CE_NOTE, - "!r_net_write6: idx %d seq %u current seq %u pos %llu " - "len %d sfba %llu nfba %d endoblk %d", - diskio.idx, diskio.seq, group->seq, - (unsigned long long)diskio.pos, diskio.len, - (unsigned long long)diskio.sfba, diskio.nfba, - diskio.endoblk); - } -#endif - - if (krdc->io_kstats) { - mutex_enter(krdc->io_kstats->ks_lock); - kstat_runq_enter(KSTAT_IO_PTR(krdc->io_kstats)); - mutex_exit(krdc->io_kstats->ks_lock); - } - - /* -1 index says allocate a net dataset */ - if (diskio.idx < 0) { - dset = rdc_net_add_set(diskio.cd); - if (dset == NULL) { -#ifdef DEBUG - cmn_err(CE_NOTE, - "!r_net_write6: failed to add dataset"); -#endif - ret2 = EIO; - goto out; - } else { - ret = dset->id; - dset->pos = (nsc_off_t)diskio.pos; /* 64bit! */ - dset->fbalen = diskio.len; - diskio.idx = ret; - } - ditem = kmem_alloc(sizeof (rdc_net_dataitem_t), KM_NOSLEEP); - if (ditem == NULL) { - ret2 = ENOMEM; - goto out; - } - RDC_DSMEMUSE(sizeof (rdc_net_dataitem_t)); - /* - * If this is a single transfer, then we don't - * need to allocate any memory for the data, - * just point the ditem data pointer to the - * existing buffer. - */ - ditem->next = NULL; - if (diskio.endoblk) { - ditem->dptr = diskio.data.data_val; - /* - * So we don't free it twice. - */ - diskio.data.data_val = NULL; - ditem->len = diskio.data.data_len; - ditem->mlen = RDC_MAXDATA; - } else { - /* - * Allocate the memory for the complete - * transfer. - */ - ditem->dptr = kmem_alloc(FBA_SIZE(diskio.len), - KM_NOSLEEP); - if (ditem->dptr == NULL) { - ret2 = ENOMEM; - goto out; - } - RDC_DSMEMUSE(FBA_SIZE(diskio.len)); - ditem->len = FBA_SIZE(diskio.len); - ditem->mlen = ditem->len; - - /* - * Copy the data to the new buffer. - */ - ASSERT(diskio.data.data_len == FBA_SIZE(diskio.nfba)); - bcopy(diskio.data.data_val, ditem->dptr, - diskio.data.data_len); - /* - * free the old data buffer. - */ - kmem_free(diskio.data.data_val, RDC_MAXDATA); - RDC_DSMEMUSE(-RDC_MAXDATA); - diskio.data.data_val = NULL; - } - dset->head = ditem; - dset->tail = ditem; - dset->nitems++; - } else { - ret = diskio.idx; - dset = rdc_net_get_set(diskio.cd, diskio.idx); - if (dset == NULL) { - ret2 = EPROTO; -#ifdef DEBUG - cmn_err(CE_NOTE, - "!r_net_write6: net_get_set failed cd %d idx %d " - "packet sequence %u expected seq %u", - diskio.cd, diskio.idx, diskio.seq, group->seq); -#endif - goto out; - } - /* - * We have to copy the data from the rpc buffer - * to the data in ditem. - */ - ditem = dset->head; - bcopy(diskio.data.data_val, (char *)ditem->dptr + - FBA_SIZE(diskio.sfba - diskio.pos), diskio.data.data_len); - - kmem_free(diskio.data.data_val, RDC_MAXDATA); - RDC_DSMEMUSE(-RDC_MAXDATA); - diskio.data.data_val = NULL; - } - ASSERT(dset); - - if (diskio.endoblk) { -#ifdef DEBUG - if (diskio.seq == (RDC_NEWSEQ + 1)) { - rdc_stallzero(2); - } -#endif - if (diskio.seq == RDC_NEWSEQ) { - /* - * magic marker, start of sequence. - */ - mutex_enter(&group->ra_queue.net_qlock); - /* - * see if some threads are stuck. - */ - if (group->sleepq) { - rdc_sleepqdiscard(group); - } - group->seqack = RDC_NEWSEQ; - mutex_exit(&group->ra_queue.net_qlock); - } - - if ((diskio.seq != RDC_NOSEQ) && (diskio.seq != RDC_NEWSEQ)) { - /* - * see if we are allowed through here to - * do the write, or if we have to q the - * request and send back a pending reply. - */ - mutex_enter(&group->ra_queue.net_qlock); - if (diskio.seq != group->seq) { - rdc_sleepq_t *sq; - int maxseq; - - /* - * Check that we have room. - */ - maxseq = group->seqack + RDC_MAXPENDQ + 1; - if (maxseq < group->seqack) { - /* - * skip magic values. - */ - maxseq += RDC_NEWSEQ + 1; - } - if (!RDC_INFRONT(diskio.seq, maxseq)) { -#ifdef DEBUG - cmn_err(CE_WARN, "!net_write6: Queue " - "size %d exceeded seqack %u " - "this seq %u maxseq %u seq %u", - RDC_MAXPENDQ, group->seqack, - diskio.seq, maxseq, group->seq); -#endif - DTRACE_PROBE2(qsize_exceeded, int, diskio.seq, - int, maxseq); - if (!(rdc_get_vflags(urdc) & - RDC_VOL_FAILED)) { - rdc_many_enter(krdc); - rdc_set_flags(urdc, - RDC_VOL_FAILED); - rdc_many_exit(krdc); - rdc_write_state(urdc); - } - ret2 = EIO; - rdc_sleepqdiscard(group); - group->seq = RDC_NEWSEQ; - group->seqack = RDC_NEWSEQ; - mutex_exit(&group->ra_queue.net_qlock); - goto out; - } - - sq = rdc_newsleepq(); - sq->seq = diskio.seq; - sq->sindex = diskio.cd; - sq->pindex = diskio.local_cd; - sq->idx = diskio.idx; - sq->qpos = diskio.qpos; - sq->nocache = nocache; - if (rdc_sleepq(group, sq)) { - ret2 = EIO; - group->seq = RDC_NEWSEQ; - group->seqack = RDC_NEWSEQ; - rdc_sleepqdiscard(group); - mutex_exit(&group->ra_queue.net_qlock); - goto out; - } - rdc_net_put_set(diskio.cd, dset); - dset = NULL; - if (krdc->io_kstats) { - mutex_enter(krdc->io_kstats->ks_lock); - kstat_waitq_enter(KSTAT_IO_PTR(krdc-> - io_kstats)); - mutex_exit(krdc->io_kstats->ks_lock); - } - mutex_exit(&group->ra_queue.net_qlock); - /* - * pending state. - */ - netret.result = 1; - netret.index = diskio.idx; - st = svc_sendreply(xprt, xdr_netwriteres, - (char *)&netret); - if (krdc->io_kstats && ret2 != ENOMEM) { - mutex_enter(krdc->io_kstats->ks_lock); - kstat_runq_exit(KSTAT_IO_PTR( - krdc->io_kstats)); - mutex_exit(krdc->io_kstats->ks_lock); - } - return; - } - mutex_exit(&group->ra_queue.net_qlock); - } - - ret2 = rdc_writemaxfba(krdc, urdc, dset, diskio.seq, nocache); - rdc_net_del_set(diskio.cd, dset); - dset = NULL; -#ifdef DEBUG - if (!RDC_SUCCESS(ret2)) { - cmn_err(CE_WARN, "!r_net_write6: writemaxfba failed %d", - ret2); - } -#endif - if (diskio.seq != RDC_NOSEQ) { - mutex_enter(&group->ra_queue.net_qlock); - group->seq = diskio.seq + 1; - if (group->seq < diskio.seq) - group->seq = RDC_NEWSEQ + 1; - if (group->sleepq && - (group->sleepq->seq == group->seq)) { - rdc_dopending(group, &netret); - } - group->seqack = group->seq; - mutex_exit(&group->ra_queue.net_qlock); - } - } -out: - if (!RDC_SUCCESS(ret2)) { - DTRACE_PROBE1(rdc_svcwrite6_err_ret2, int, ret2); - netret.result = -ret2; - } else { - netret.result = 0; - netret.index = ret; - } - st = svc_sendreply(xprt, xdr_netwriteres, (char *)&netret); - if (netret.vecdata.vecdata_val) { - kmem_free(netret.vecdata.vecdata_val, - netret.vecdata.vecdata_len * sizeof (net_pendvec_t)); - } - if (krdc && krdc->io_kstats && ret2 != ENOMEM) { - mutex_enter(krdc->io_kstats->ks_lock); - kstat_runq_exit(KSTAT_IO_PTR(krdc->io_kstats)); - mutex_exit(krdc->io_kstats->ks_lock); - } - /* - * On Error we must cleanup. - * If we have a handle, free it. - * If we have a network handle, free it. - * If we hold the main nsc buffer, free it. - */ - if (!st || !RDC_SUCCESS(ret2)) { -#ifdef DEBUG - cmn_err(CE_WARN, "!r_net_write6 error st %x ret %d seq %u", - st, ret2, diskio.seq); -#endif - if (dset) { - rdc_net_del_set(diskio.cd, dset); - } - } else { - if (dset) { - rdc_net_put_set(diskio.cd, dset); - } - } - if (diskio.data.data_val) { - kmem_free(diskio.data.data_val, RDC_MAXDATA); - RDC_DSMEMUSE(-RDC_MAXDATA); - } -} - -/* - * r_net_ping4 - * - * received on the primary. - */ -static void -r_net_ping4(SVCXPRT *xprt, struct svc_req *req) -{ - struct rdc_ping6 ping; - int e, ret = 0; - rdc_if_t *ip; - - e = SVC_GETARGS(xprt, xdr_rdc_ping6, (char *)&ping); - if (e) { - mutex_enter(&rdc_ping_lock); - - /* update specified interface */ - - for (ip = rdc_if_top; ip; ip = ip->next) { - if ((bcmp(ping.p_ifaddr, ip->ifaddr.buf, - RDC_MAXADDR) == 0) && - (bcmp(ping.s_ifaddr, ip->r_ifaddr.buf, - RDC_MAXADDR) == 0)) { - ip->new_pulse++; - ip->deadness = 1; - - /* Update the rpc protocol version to use */ - - ip->rpc_version = req->rq_vers; - break; - } - } - - mutex_exit(&rdc_ping_lock); - } else { - svcerr_decode(xprt); -#ifdef DEBUG - cmn_err(CE_NOTE, "!SNDR: couldn't get ping4 arguments"); -#endif - } - - (void) svc_sendreply(xprt, xdr_int, (char *)&ret); -} - -/* - * r_net_ping7 - * - * received on the primary. - */ -static void -r_net_ping7(SVCXPRT *xprt, struct svc_req *req) -{ - struct rdc_ping ping; - int e, ret = 0; - rdc_if_t *ip; - unsigned short *sp; - - bzero(&ping, sizeof (struct rdc_ping)); - e = SVC_GETARGS(xprt, xdr_rdc_ping, (char *)&ping); - if (e) { - sp = (unsigned short *)ping.p_ifaddr.buf; - *sp = ntohs(*sp); - sp = (unsigned short *)ping.s_ifaddr.buf; - *sp = ntohs(*sp); - mutex_enter(&rdc_ping_lock); - - /* update specified interface */ - - for (ip = rdc_if_top; ip; ip = ip->next) { - if ((bcmp(ping.p_ifaddr.buf, ip->ifaddr.buf, - ping.p_ifaddr.len) == 0) && - (bcmp(ping.s_ifaddr.buf, ip->r_ifaddr.buf, - ping.s_ifaddr.len) == 0)) { - ip->new_pulse++; - ip->deadness = 1; - - /* Update the rpc protocol version to use */ - - ip->rpc_version = req->rq_vers; - break; - } - } - - mutex_exit(&rdc_ping_lock); - } else { - svcerr_decode(xprt); -#ifdef DEBUG - cmn_err(CE_NOTE, "!SNDR: couldn't get ping7 arguments"); -#endif - } - - (void) svc_sendreply(xprt, xdr_int, (char *)&ret); -} - - -/* - * r_net_bmap (v5) - * WARNING acts as both client and server - */ -static void -r_net_bmap(SVCXPRT *xprt) -{ - int e, ret = EINVAL; - struct bmap b; - rdc_k_info_t *krdc; - rdc_u_info_t *urdc; - struct bmap6 b6; - - - e = SVC_GETARGS(xprt, xdr_bmap, (char *)&b); - if (e == TRUE) { - krdc = &rdc_k_info[b.cd]; - urdc = &rdc_u_info[b.cd]; - if (b.cd >= 0 && b.cd < rdc_max_sets && IS_ENABLED(urdc) && - ((krdc->type_flag & RDC_DISABLEPEND) == 0)) { - krdc->rpc_version = RDC_VERSION5; - b6.cd = b.cd; - b6.dual = b.dual; - b6.size = b.size; - ret = RDC_SEND_BITMAP(&b6); - } - } - - (void) svc_sendreply(xprt, xdr_int, (char *)&ret); -} - -/* - * r_net_bmap (v6) - * WARNING acts as both client and server - */ -static void -r_net_bmap6(SVCXPRT *xprt) -{ - int e, ret = EINVAL; - struct bmap6 b; - rdc_k_info_t *krdc; - rdc_u_info_t *urdc; - - e = SVC_GETARGS(xprt, xdr_bmap6, (char *)&b); - if (e == TRUE) { - krdc = &rdc_k_info[b.cd]; - urdc = &rdc_u_info[b.cd]; - if (b.cd >= 0 && b.cd < rdc_max_sets && IS_ENABLED(urdc) && - ((krdc->type_flag & RDC_DISABLEPEND) == 0)) { - krdc->rpc_version = RDC_VERSION6; - ret = RDC_SEND_BITMAP(&b); - } - } - /* - * If the bitmap send has succeeded, clear it. - */ - if (ret == 0) { -#ifdef DEBUG - cmn_err(CE_NOTE, "!Bitmap clear in r_net_bmap6"); -#endif - RDC_ZERO_BITMAP(krdc); - rdc_many_enter(krdc); - rdc_clr_flags(urdc, RDC_CLR_AFTERSYNC); - rdc_many_exit(krdc); - } - (void) svc_sendreply(xprt, xdr_int, (char *)&ret); -} - -/* - * r_net_bdata - */ -static void -r_net_bdata(SVCXPRT *xprt) -{ - struct net_bdata bd; - struct net_bdata6 bd6; - int e, ret = -1; - rdc_k_info_t *krdc; - rdc_u_info_t *urdc; - - /* - * We have to convert it to the internal form here, - * net_data6, when we know that we will have to convert - * it back to the v5 variant for transmission. - */ - - bd.data.data_val = kmem_alloc(BMAP_BLKSIZE, KM_NOSLEEP); - if (bd.data.data_val == NULL) - goto out; - - e = SVC_GETARGS(xprt, xdr_net_bdata, (char *)&bd); - if (e == TRUE) { - krdc = &rdc_k_info[bd.cd]; - urdc = &rdc_u_info[bd.cd]; - if (bd.cd >= 0 && bd.cd < rdc_max_sets && IS_ENABLED(urdc) && - ((krdc->type_flag & RDC_DISABLEPEND) == 0)) { - bd6.cd = bd.cd; - bd6.offset = bd.offset; - bd6.size = bd.size; - bd6.data.data_len = bd.data.data_len; - bd6.data.data_val = bd.data.data_val; - ret = RDC_OR_BITMAP(&bd6); - } - } - kmem_free(bd.data.data_val, BMAP_BLKSIZE); -out: - (void) svc_sendreply(xprt, xdr_int, (char *)&ret); -} - -/* - * r_net_bdata v6 - */ -static void -r_net_bdata6(SVCXPRT *xprt) -{ - struct net_bdata6 bd; - int e, ret = -1; - rdc_k_info_t *krdc; - rdc_u_info_t *urdc; - - /* - * just allocate the bigger block, regardless of < V7 - * bd.size will dictate how much we lor into our bitmap - * the other option would be write r_net_bdata7 that is identical - * to this function, but a V7 alloc. - */ - bd.data.data_val = kmem_alloc(BMAP_BLKSIZEV7, KM_NOSLEEP); - if (bd.data.data_val == NULL) - goto out; - - e = SVC_GETARGS(xprt, xdr_net_bdata6, (char *)&bd); - if (e == TRUE) { - krdc = &rdc_k_info[bd.cd]; - urdc = &rdc_u_info[bd.cd]; - if (bd.cd >= 0 && bd.cd < rdc_max_sets && IS_ENABLED(urdc) && - ((krdc->type_flag & RDC_DISABLEPEND) == 0)) - ret = RDC_OR_BITMAP(&bd); - } - /* - * Write the merged bitmap. - */ - if ((ret == 0) && bd.endoblk && (krdc->bitmap_write > 0)) { -#ifdef DEBUG - cmn_err(CE_NOTE, "!r_net_bdata6: Written bitmap for %s:%s", - urdc->secondary.intf, urdc->secondary.file); -#endif - ret = rdc_write_bitmap(krdc); - } - kmem_free(bd.data.data_val, BMAP_BLKSIZEV7); -out: - (void) svc_sendreply(xprt, xdr_int, (char *)&ret); -} - -/* - * r_net_getsize (v5) - */ -static void -r_net_getsize(SVCXPRT *xprt) -{ - int e, ret = -1, index; - rdc_k_info_t *krdc; - - e = SVC_GETARGS(xprt, xdr_int, (char *)&index); - if (e) { - krdc = &rdc_k_info[index]; - if (IS_VALID_INDEX(index) && ((krdc->type_flag & - RDC_DISABLEPEND) == 0)) - ret = mirror_getsize(index); - } - (void) svc_sendreply(xprt, xdr_int, (char *)&ret); -} - -/* - * r_net_getsize (v6) - */ -static void -r_net_getsize6(SVCXPRT *xprt) -{ - int e, index; - rdc_k_info_t *krdc; - uint64_t ret; - - /* - * small change in semantics here, as we can't return - * -1 over the wire anymore. - */ - ret = 0; - - e = SVC_GETARGS(xprt, xdr_int, (char *)&index); - if (e) { - krdc = &rdc_k_info[index]; - if (IS_VALID_INDEX(index) && ((krdc->type_flag & - RDC_DISABLEPEND) == 0)) - ret = mirror_getsize(index); - } - (void) svc_sendreply(xprt, xdr_u_longlong_t, (char *)&ret); -} - - -/* - * r_net_state4 - */ -static void -r_net_state4(SVCXPRT *xprt) -{ - rdc_u_info_t *urdc; - rdc_k_info_t *krdc; - struct set_state4 state; - rdc_set_t rdc_set; - int e, index = -1; - int options; - int log = 0; - int done = 0; - int slave = 0; - int rev_sync = 0; - - e = SVC_GETARGS(xprt, xdr_set_state4, (char *)&state); - if (e) { - init_rdc_netbuf(&(rdc_set.primary.addr)); - init_rdc_netbuf(&(rdc_set.secondary.addr)); - bcopy(state.netaddr, rdc_set.primary.addr.buf, - state.netaddrlen); - bcopy(state.rnetaddr, rdc_set.secondary.addr.buf, - state.rnetaddrlen); - rdc_set.primary.addr.len = state.netaddrlen; - rdc_set.secondary.addr.len = state.rnetaddrlen; - (void) strncpy(rdc_set.primary.file, state.pfile, - RDC_MAXNAMLEN); - (void) strncpy(rdc_set.secondary.file, state.sfile, - RDC_MAXNAMLEN); - options = state.flag; - index = rdc_lookup_byaddr(&rdc_set); - - krdc = &rdc_k_info[index]; - - if (index < 0 || (krdc->type_flag & RDC_DISABLEPEND)) { -#ifdef DEBUG - cmn_err(CE_WARN, - "!r_net_state: no index or disable pending"); -#endif - (void) svc_sendreply(xprt, xdr_int, (char *)&index); - return; - } - - urdc = &rdc_u_info[index]; - - if (!IS_ENABLED(urdc)) { - index = -1; -#ifdef DEBUG - cmn_err(CE_WARN, "!r_net_state: set not enabled "); -#endif - (void) svc_sendreply(xprt, xdr_int, (char *)&index); - return; - } - - if (krdc->lsrv == NULL) { - cmn_err(CE_NOTE, "!r_net_state: no valid svp\n"); - index = -1; - (void) svc_sendreply(xprt, xdr_int, (char *)&index); - return; - } - if (!krdc || !krdc->group) { -#ifdef DEBUG - cmn_err(CE_NOTE, - "!r_net_state: no valid krdc %p\n", (void*)krdc); -#endif - index = -1; - (void) svc_sendreply(xprt, xdr_int, (char *)&index); - return; - } - - mutex_enter(&rdc_conf_lock); - if (krdc->type_flag & RDC_DISABLEPEND) { - mutex_exit(&rdc_conf_lock); - index = -1; -#ifdef DEBUG - cmn_err(CE_WARN, "!r_net_state: disable pending"); -#endif - (void) svc_sendreply(xprt, xdr_int, (char *)&index); - return; - } - set_busy(krdc); - mutex_exit(&rdc_conf_lock); - - rdc_group_enter(krdc); - - if (rdc_get_vflags(urdc) & RDC_PRIMARY) - krdc->intf = rdc_add_to_if(krdc->lsrv, - &(urdc->primary.addr), &(urdc->secondary.addr), 1); - else - krdc->intf = rdc_add_to_if(krdc->lsrv, - &(urdc->secondary.addr), &(urdc->primary.addr), 0); - - if (options & CCIO_SLAVE) { - /* - * mark that the bitmap needs clearing. - */ - rdc_many_enter(krdc); - rdc_set_flags(urdc, RDC_CLR_AFTERSYNC); - rdc_many_exit(krdc); - - /* Starting forward sync */ - if (urdc->volume_size == 0) - rdc_get_details(krdc); - if (urdc->volume_size == 0) { - index = -1; - goto out; - } - if (krdc->dcio_bitmap == NULL) { - if (rdc_resume_bitmap(krdc) < 0) { - index = -1; - goto out; - } - } - if (rdc_allow_sec_sync(urdc, CCIO_SLAVE) < 0) { - index = -1; - goto out; - } - rdc_dump_dsets(index); - slave = 1; - } else if (options & CCIO_RSYNC) { - /* - * mark that the bitmap needs clearing. - */ - rdc_many_enter(krdc); - rdc_set_flags(urdc, RDC_CLR_AFTERSYNC); - rdc_many_exit(krdc); - - /* Starting reverse sync */ - if (rdc_get_vflags(urdc) & (RDC_SYNC_NEEDED | - RDC_VOL_FAILED | RDC_BMP_FAILED)) { - index = -1; - goto out; - } - if (rdc_allow_sec_sync(urdc, CCIO_RSYNC) < 0) { - index = -1; - goto out; - } - rdc_dump_dsets(index); - rev_sync = 1; - } else if (options & CCIO_DONE) { - /* Sync completed OK */ - if (rdc_get_vflags(urdc) & RDC_SYNC_NEEDED) - done = 1; /* forward sync complete */ - rdc_many_enter(krdc); - rdc_clr_flags(urdc, RDC_SYNCING | RDC_SYNC_NEEDED); - rdc_clr_mflags(urdc, RDC_SLAVE | RDC_RSYNC_NEEDED); - rdc_many_exit(krdc); - rdc_write_state(urdc); - if (rdc_get_vflags(urdc) & RDC_CLR_AFTERSYNC) { - RDC_ZERO_BITMAP(krdc); - rdc_many_enter(krdc); - rdc_clr_flags(urdc, RDC_CLR_AFTERSYNC); - rdc_many_exit(krdc); - } - } else if (options & CCIO_ENABLELOG) { - /* Sync aborted or logging started */ - if (!(rdc_get_vflags(urdc) & RDC_PRIMARY)) { - rdc_clr_flags(urdc, RDC_SYNCING); - rdc_many_enter(krdc); - rdc_clr_mflags(urdc, RDC_SLAVE); - rdc_many_exit(krdc); - } - log = 1; - } -out: - rdc_group_exit(krdc); - free_rdc_netbuf(&(rdc_set.primary.addr)); - free_rdc_netbuf(&(rdc_set.secondary.addr)); - - if (slave) { - if (_rdc_sync_event_notify(RDC_SYNC_START, - urdc->secondary.file, urdc->group_name) >= 0) { - rdc_group_enter(krdc); - rdc_clr_flags(urdc, RDC_LOGGING); - rdc_many_enter(krdc); - rdc_clr_flags(urdc, RDC_VOL_FAILED); - rdc_set_flags(urdc, - RDC_SYNCING | RDC_SYNC_NEEDED); - rdc_set_mflags(urdc, RDC_SLAVE); - rdc_many_exit(krdc); - rdc_write_state(urdc); - rdc_group_exit(krdc); - } else { - index = -1; - } - } else if (rev_sync) { - /* Check to see if volume is mounted */ - if (_rdc_sync_event_notify(RDC_RSYNC_START, - urdc->secondary.file, urdc->group_name) >= 0) { - rdc_group_enter(krdc); - rdc_clr_flags(urdc, RDC_LOGGING); - rdc_set_flags(urdc, RDC_SYNCING); - rdc_write_state(urdc); - rdc_group_exit(krdc); - } else { - index = -1; - } - } else if (done) { - - /* - * special case... - * if this set is in a group, then sndrsyncd will - * make sure that all sets in the group are REP - * before updating the config to "update", telling - * sndrsyncd that it is ok to take anther snapshot - * on a following sync. The important part about - * the whole thing is that syncd needs kernel stats. - * however, this thread must set the set busy to - * avoid disables. since this is the only - * sync_event_notify() that will cause a status - * call back into the kernel, and we will not be - * accessing the group structure, we have to wakeup now - */ - - mutex_enter(&rdc_conf_lock); - wakeup_busy(krdc); - mutex_exit(&rdc_conf_lock); - - (void) _rdc_sync_event_notify(RDC_SYNC_DONE, - urdc->secondary.file, urdc->group_name); - } - } - - if (!done) { - mutex_enter(&rdc_conf_lock); - wakeup_busy(krdc); - mutex_exit(&rdc_conf_lock); - } - - (void) svc_sendreply(xprt, xdr_int, (char *)&index); - if (log) { - rdc_group_enter(krdc); - rdc_group_log(krdc, RDC_NOFLUSH | RDC_OTHERREMOTE, - "Sync aborted or logging started"); - rdc_group_exit(krdc); - } -} - - -/* - * r_net_state - */ -static void -r_net_state(SVCXPRT *xprt) -{ - rdc_u_info_t *urdc; - rdc_k_info_t *krdc; - struct set_state state; - rdc_set_t rdc_set; - int e, index = -1; - int options; - int log = 0; - int done = 0; - int slave = 0; - int rev_sync = 0; - unsigned short *sp; - - bzero(&state, sizeof (struct set_state)); - e = SVC_GETARGS(xprt, xdr_set_state, (char *)&state); - if (e) { - init_rdc_netbuf(&(rdc_set.primary.addr)); - init_rdc_netbuf(&(rdc_set.secondary.addr)); - sp = (unsigned short *)(state.netaddr.buf); - *sp = ntohs(*sp); - bcopy(state.netaddr.buf, rdc_set.primary.addr.buf, - state.netaddrlen); - sp = (unsigned short *)(state.rnetaddr.buf); - *sp = ntohs(*sp); - bcopy(state.rnetaddr.buf, rdc_set.secondary.addr.buf, - state.rnetaddrlen); - rdc_set.primary.addr.len = state.netaddrlen; - rdc_set.secondary.addr.len = state.rnetaddrlen; - (void) strncpy(rdc_set.primary.file, state.pfile, - RDC_MAXNAMLEN); - (void) strncpy(rdc_set.secondary.file, state.sfile, - RDC_MAXNAMLEN); - options = state.flag; - index = rdc_lookup_byaddr(&rdc_set); - - krdc = &rdc_k_info[index]; - - if (index < 0 || (krdc->type_flag & RDC_DISABLEPEND)) { -#ifdef DEBUG - cmn_err(CE_WARN, - "!r_net_state: no index or disable pending"); -#endif - (void) svc_sendreply(xprt, xdr_int, (char *)&index); - return; - } - - urdc = &rdc_u_info[index]; - - if (!IS_ENABLED(urdc)) { - index = -1; -#ifdef DEBUG - cmn_err(CE_WARN, "!r_net_state: set not enabled "); -#endif - (void) svc_sendreply(xprt, xdr_int, (char *)&index); - return; - } - - if (krdc->lsrv == NULL) { - cmn_err(CE_NOTE, "!r_net_state: no valid svp\n"); - index = -1; - (void) svc_sendreply(xprt, xdr_int, (char *)&index); - return; - } - if (!krdc || !krdc->group) { -#ifdef DEBUG - cmn_err(CE_NOTE, - "!r_net_state: no valid krdc %p\n", (void*)krdc); -#endif - index = -1; - (void) svc_sendreply(xprt, xdr_int, (char *)&index); - return; - } - - mutex_enter(&rdc_conf_lock); - if (krdc->type_flag & RDC_DISABLEPEND) { - mutex_exit(&rdc_conf_lock); - index = -1; -#ifdef DEBUG - cmn_err(CE_WARN, "!r_net_state: disable pending"); -#endif - (void) svc_sendreply(xprt, xdr_int, (char *)&index); - return; - } - set_busy(krdc); - mutex_exit(&rdc_conf_lock); - - rdc_group_enter(krdc); - - if (rdc_get_vflags(urdc) & RDC_PRIMARY) - krdc->intf = rdc_add_to_if(krdc->lsrv, - &(urdc->primary.addr), &(urdc->secondary.addr), 1); - else - krdc->intf = rdc_add_to_if(krdc->lsrv, - &(urdc->secondary.addr), &(urdc->primary.addr), 0); - - if (options & CCIO_SLAVE) { - /* - * mark that the bitmap needs clearing. - */ - rdc_many_enter(krdc); - rdc_set_flags(urdc, RDC_CLR_AFTERSYNC); - rdc_many_exit(krdc); - - /* Starting forward sync */ - if (urdc->volume_size == 0) - rdc_get_details(krdc); - if (urdc->volume_size == 0) { - index = -1; - goto out; - } - if (krdc->dcio_bitmap == NULL) { - if (rdc_resume_bitmap(krdc) < 0) { - index = -1; - goto out; - } - } - if (rdc_allow_sec_sync(urdc, CCIO_SLAVE) < 0) { - index = -1; - goto out; - } - rdc_dump_dsets(index); - slave = 1; - } else if (options & CCIO_RSYNC) { - /* - * mark that the bitmap needs clearing. - */ - rdc_many_enter(krdc); - rdc_set_flags(urdc, RDC_CLR_AFTERSYNC); - rdc_many_exit(krdc); - - /* Starting reverse sync */ - if (rdc_get_vflags(urdc) & (RDC_SYNC_NEEDED | - RDC_VOL_FAILED | RDC_BMP_FAILED)) { - index = -1; - goto out; - } - if (rdc_allow_sec_sync(urdc, CCIO_RSYNC) < 0) { - index = -1; - goto out; - } - rdc_dump_dsets(index); - rev_sync = 1; - } else if (options & CCIO_DONE) { - /* Sync completed OK */ - if (rdc_get_vflags(urdc) & RDC_SYNC_NEEDED) - done = 1; /* forward sync complete */ - rdc_many_enter(krdc); - rdc_clr_flags(urdc, RDC_SYNCING | RDC_SYNC_NEEDED); - rdc_clr_mflags(urdc, RDC_SLAVE | RDC_RSYNC_NEEDED); - rdc_many_exit(krdc); - rdc_write_state(urdc); - if (rdc_get_vflags(urdc) & RDC_CLR_AFTERSYNC) { - RDC_ZERO_BITMAP(krdc); - rdc_many_enter(krdc); - rdc_clr_flags(urdc, RDC_CLR_AFTERSYNC); - rdc_many_exit(krdc); - } - } else if (options & CCIO_ENABLELOG) { - /* Sync aborted or logging started */ - if (!(rdc_get_vflags(urdc) & RDC_PRIMARY)) { - rdc_clr_flags(urdc, RDC_SYNCING); - rdc_many_enter(krdc); - rdc_clr_mflags(urdc, RDC_SLAVE); - rdc_many_exit(krdc); - } - log = 1; - } -out: - rdc_group_exit(krdc); - free_rdc_netbuf(&(rdc_set.primary.addr)); - free_rdc_netbuf(&(rdc_set.secondary.addr)); - - if (slave) { - if (_rdc_sync_event_notify(RDC_SYNC_START, - urdc->secondary.file, urdc->group_name) >= 0) { - rdc_group_enter(krdc); - rdc_clr_flags(urdc, RDC_LOGGING); - rdc_many_enter(krdc); - rdc_clr_flags(urdc, RDC_VOL_FAILED); - rdc_set_flags(urdc, - RDC_SYNCING | RDC_SYNC_NEEDED); - rdc_set_mflags(urdc, RDC_SLAVE); - rdc_many_exit(krdc); - rdc_write_state(urdc); - rdc_group_exit(krdc); - } else { - index = -1; - } - } else if (rev_sync) { - /* Check to see if volume is mounted */ - if (_rdc_sync_event_notify(RDC_RSYNC_START, - urdc->secondary.file, urdc->group_name) >= 0) { - rdc_group_enter(krdc); - rdc_clr_flags(urdc, RDC_LOGGING); - rdc_set_flags(urdc, RDC_SYNCING); - rdc_write_state(urdc); - rdc_group_exit(krdc); - } else { - index = -1; - } - } else if (done) { - - /* - * special case... - * if this set is in a group, then sndrsyncd will - * make sure that all sets in the group are REP - * before updating the config to "update", telling - * sndrsyncd that it is ok to take anther snapshot - * on a following sync. The important part about - * the whole thing is that syncd needs kernel stats. - * however, this thread must set the set busy to - * avoid disables. since this is the only - * sync_event_notify() that will cause a status - * call back into the kernel, and we will not be - * accessing the group structure, we have to wakeup now - */ - - mutex_enter(&rdc_conf_lock); - wakeup_busy(krdc); - mutex_exit(&rdc_conf_lock); - - (void) _rdc_sync_event_notify(RDC_SYNC_DONE, - urdc->secondary.file, urdc->group_name); - } - } - - if (!done) { - mutex_enter(&rdc_conf_lock); - wakeup_busy(krdc); - mutex_exit(&rdc_conf_lock); - } - - (void) svc_sendreply(xprt, xdr_int, (char *)&index); - if (log) { - rdc_group_enter(krdc); - rdc_group_log(krdc, RDC_NOFLUSH | RDC_OTHERREMOTE, - "Sync aborted or logging started"); - rdc_group_exit(krdc); - } - free_rdc_netbuf(&(state.netaddr)); - free_rdc_netbuf(&(state.rnetaddr)); -} - -/* - * r_net_getstate4 - * Return our state to client - */ -static void -r_net_getstate4(SVCXPRT *xprt, struct svc_req *req) -{ - int e, ret = -1, index = -1; - struct set_state4 state; - rdc_u_info_t *urdc; - rdc_set_t rdc_set; - - bzero(&state, sizeof (struct set_state)); - e = SVC_GETARGS(xprt, xdr_set_state4, (char *)&state); - if (e) { - init_rdc_netbuf(&(rdc_set.primary.addr)); - init_rdc_netbuf(&(rdc_set.secondary.addr)); - bcopy(state.netaddr, rdc_set.primary.addr.buf, - state.netaddrlen); - bcopy(state.rnetaddr, rdc_set.secondary.addr.buf, - state.rnetaddrlen); - rdc_set.primary.addr.len = state.netaddrlen; - rdc_set.secondary.addr.len = state.rnetaddrlen; - (void) strncpy(rdc_set.primary.file, state.pfile, - RDC_MAXNAMLEN); - (void) strncpy(rdc_set.secondary.file, state.sfile, - RDC_MAXNAMLEN); - index = rdc_lookup_byaddr(&rdc_set); - if (index >= 0) { - urdc = &rdc_u_info[index]; - - ret = 0; - if (rdc_get_vflags(urdc) & RDC_SYNCING) - ret |= 4; - if (rdc_get_vflags(urdc) & RDC_SLAVE) - ret |= 2; - if (rdc_get_vflags(urdc) & RDC_LOGGING) - ret |= 1; - rdc_set_if_vers(urdc, req->rq_vers); - } - free_rdc_netbuf(&(rdc_set.primary.addr)); - free_rdc_netbuf(&(rdc_set.secondary.addr)); - } - (void) svc_sendreply(xprt, xdr_int, (char *)&ret); -} - -/* - * r_net_getstate7 - * Return our state to client - */ -static void -r_net_getstate7(SVCXPRT *xprt, struct svc_req *req) -{ - int e, ret = -1, index = -1; - struct set_state state; - char pstr[RDC_MAXNAMLEN]; - char sstr[RDC_MAXNAMLEN]; - rdc_u_info_t *urdc; - rdc_set_t rdc_set; - unsigned short *sp; - - bzero(&state, sizeof (struct set_state)); - state.pfile = pstr; - state.sfile = sstr; - - e = SVC_GETARGS(xprt, xdr_set_state, (char *)&state); - if (e) { - init_rdc_netbuf(&(rdc_set.primary.addr)); - init_rdc_netbuf(&(rdc_set.secondary.addr)); - sp = (unsigned short *)(state.netaddr.buf); - *sp = ntohs(*sp); - bcopy(state.netaddr.buf, rdc_set.primary.addr.buf, - state.netaddrlen); - sp = (unsigned short *)(state.rnetaddr.buf); - *sp = ntohs(*sp); - bcopy(state.rnetaddr.buf, rdc_set.secondary.addr.buf, - state.rnetaddrlen); - rdc_set.primary.addr.len = state.netaddrlen; - rdc_set.secondary.addr.len = state.rnetaddrlen; - /* - * strncpy(rdc_set.primary.file, state.pfile, RDC_MAXNAMLEN); - * strncpy(rdc_set.secondary.file, state.sfile, RDC_MAXNAMLEN); - */ - bcopy(state.pfile, rdc_set.primary.file, RDC_MAXNAMLEN); - bcopy(state.sfile, rdc_set.secondary.file, RDC_MAXNAMLEN); - index = rdc_lookup_byaddr(&rdc_set); - if (index >= 0) { - urdc = &rdc_u_info[index]; - - ret = 0; - if (rdc_get_vflags(urdc) & RDC_SYNCING) - ret |= 4; - if (rdc_get_vflags(urdc) & RDC_SLAVE) - ret |= 2; - if (rdc_get_vflags(urdc) & RDC_LOGGING) - ret |= 1; - rdc_set_if_vers(urdc, req->rq_vers); - } - free_rdc_netbuf(&(rdc_set.primary.addr)); - free_rdc_netbuf(&(rdc_set.secondary.addr)); - } - (void) svc_sendreply(xprt, xdr_int, (char *)&ret); -} - -/* - * copy from/to a dset/vector combination to a network xdr buffer. - */ -static int -rdc_dsetcopy(rdc_net_dataset_t *dset, nsc_vec_t *invec, nsc_off_t fba_pos, - nsc_size_t fba_len, char *bdata, int blen, int dir) -{ - nsc_vec_t *vec; - uchar_t *sv_addr; - uchar_t *data; - int sv_len; - nsc_off_t fpos; - int len; - int n; - - if (!bdata || !dset || !invec) { -#ifdef DEBUG - cmn_err(CE_NOTE, - "!rdc: dsetcopy: parameters failed bdata %p, dset %p " - "invec %p", (void *)bdata, (void *)dset, (void *)invec); -#endif - return (FALSE); - } - - if (fba_len > MAX_RDC_FBAS || - (dir != COPY_IN && dir != COPY_OUT)) { -#ifdef DEBUG - cmn_err(CE_NOTE, - "!rdc: dsetcopy: params failed fba_len %" NSC_SZFMT - " fba_pos %" NSC_SZFMT ", dir %d", fba_len, fba_pos, dir); -#endif - return (FALSE); - } - - data = (uchar_t *)bdata; /* pointer to data in rpc */ - len = FBA_SIZE(fba_len); /* length of this transfer in bytes */ - fpos = fba_pos; /* start fba offset within buffer */ - - if (!len) { -#ifdef DEBUG - cmn_err(CE_NOTE, "!rdc: dsetcopy: len = 0"); -#endif - return (FALSE); - } - - if (len != blen) { -#ifdef DEBUG - cmn_err(CE_NOTE, "!rdc:dsetcopy: len %d != blen %d", len, blen); -#endif - if (len > blen) - len = blen; - } - - if (!RDC_DSET_LIMITS(dset, fba_pos, fba_len)) { - /* should never happen */ -#ifdef DEBUG - cmn_err(CE_NOTE, - "!rdc: dsetcopy: handle limits pos %" NSC_SZFMT " (%" - NSC_SZFMT ") len %" NSC_SZFMT " (%" NSC_SZFMT ")", - fba_pos, dset->pos, fba_len, dset->fbalen); -#endif - return (FALSE); /* Don't overrun handle */ - } - - vec = invec; - fpos -= dset->pos; - - /* find starting position in vector */ - - for (; fpos >= FBA_NUM(vec->sv_len); vec++) - fpos -= FBA_NUM(vec->sv_len); - - /* - * Copy data - */ - - sv_addr = vec->sv_addr + FBA_SIZE(fpos); - sv_len = vec->sv_len - FBA_SIZE(fpos); - - while (len) { - if (!sv_addr) /* end of vec - how did this happen? */ - break; - - n = min(sv_len, len); - - if (dir == COPY_OUT) - bcopy(data, sv_addr, (size_t)n); - else - bcopy(sv_addr, data, (size_t)n); - - sv_len -= n; - len -= n; - - sv_addr += n; - data += n; - - if (sv_len <= 0) { - /* goto next vector */ - vec++; - sv_addr = vec->sv_addr; - sv_len = vec->sv_len; - } - } - - return (TRUE); -} - - -/* - * rdc_start_server - * Starts the kRPC server for rdc. Uses tli file descriptor passed down - * from user level rdc server. - * - * Returns: 0 or errno (NOT unistat!). - */ -int -rdc_start_server(struct rdc_svc_args *args, int mode) -{ - file_t *fp; - int ret; - struct cred *cred; - STRUCT_HANDLE(rdc_svc_args, rs); - - STRUCT_SET_HANDLE(rs, mode, args); - cred = ddi_get_cred(); - if (drv_priv(cred) != 0) - return (EPERM); - fp = getf(STRUCT_FGET(rs, fd)); - if (fp == NULL) { -#ifdef DEBUG - cmn_err(CE_WARN, "!rdc_start_server fd %d, fp %p", args->fd, - (void *) fp); -#endif - return (EBADF); - } - - ret = rdcsrv_load(fp, rdc_srvtab, args, mode); - - releasef(STRUCT_FGET(rs, fd)); - return (ret); -} - -/* - * Allocate a new sleepq element. - */ - -static rdc_sleepq_t * -rdc_newsleepq() -{ - rdc_sleepq_t *sq; - - sq = kmem_alloc(sizeof (rdc_sleepq_t), KM_SLEEP); - sq->next = NULL; -#ifdef DEBUG - mutex_enter(&rdc_cntlock); - rdc_sleepcnt++; - mutex_exit(&rdc_cntlock); -#endif - return (sq); -} - -/* - * free memory/resources used by a sleepq element. - */ -static void -rdc_delsleepq(rdc_sleepq_t *sq) -{ - rdc_net_dataset_t *dset; - - if (sq->idx != -1) { - dset = rdc_net_get_set(sq->sindex, sq->idx); - if (dset) { - rdc_net_del_set(sq->sindex, dset); - } - } - kmem_free(sq, sizeof (rdc_sleepq_t)); -#ifdef DEBUG - mutex_enter(&rdc_cntlock); - rdc_sleepcnt--; - mutex_exit(&rdc_cntlock); -#endif -} - - -/* - * skip down the sleep q and insert the sleep request - * in ascending order. Return 0 on success, 1 on failure. - */ -static int -rdc_sleepq(rdc_group_t *group, rdc_sleepq_t *sq) -{ - rdc_sleepq_t *findsq; - - - ASSERT(MUTEX_HELD(&group->ra_queue.net_qlock)); - if (group->sleepq == NULL) { - group->sleepq = sq; - } else { - if (sq->seq == group->sleepq->seq) { - cmn_err(CE_WARN, "!rdc_sleepq: Attempt to " - "add duplicate request to queue %d", sq->seq); - return (1); - } - if (RDC_INFRONT(sq->seq, group->sleepq->seq)) { - sq->next = group->sleepq; - group->sleepq = sq; - } else { - findsq = group->sleepq; - - while (findsq->next) { - if (sq->seq == findsq->next->seq) { - cmn_err(CE_WARN, "!rdc_sleepq: " - "Attempt to add duplicate " - "request to queue %d", sq->seq); - return (1); - } - if (RDC_INFRONT(sq->seq, findsq->next->seq)) { - sq->next = findsq->next; - findsq->next = sq; - break; - } - findsq = findsq->next; - } - if (findsq->next == NULL) - findsq->next = sq; - } - } - return (0); -} - -/* - * run down the sleep q and discard all the sleepq elements. - */ -void -rdc_sleepqdiscard(rdc_group_t *group) -{ - rdc_sleepq_t *sq; - rdc_k_info_t *krdc; - - ASSERT(MUTEX_HELD(&group->ra_queue.net_qlock)); - sq = group->sleepq; - - while (sq) { - rdc_sleepq_t *dsq; - - dsq = sq; - krdc = &rdc_k_info[dsq->sindex]; - if (krdc->io_kstats) { - mutex_enter(krdc->io_kstats->ks_lock); - kstat_waitq_exit(KSTAT_IO_PTR(krdc->io_kstats)); - mutex_exit(krdc->io_kstats->ks_lock); - } - sq = sq->next; - rdc_delsleepq(dsq); - } - group->sleepq = NULL; -} - -/* - * split any write requests down to maxfba sized chunks. - */ -/*ARGSUSED*/ -static int -rdc_writemaxfba(rdc_k_info_t *krdc, rdc_u_info_t *urdc, - rdc_net_dataset_t *dset, uint_t seq, int nocache) -{ - int len; - int ret; - nsc_vec_t vector[2]; - nsc_buf_t *handle; - int reserved; - int rtype; - nsc_size_t mfba; - nsc_size_t wsize; - nsc_off_t pos; - int eintr_count; - unsigned char *daddr; - int kstat_len; - - kstat_len = len = dset->fbalen; - ret = 0; - handle = NULL; - reserved = 0; - rtype = RDC_RAW; - - ASSERT(dset->nitems == 1); - - eintr_count = 0; - do { - ret = _rdc_rsrv_devs(krdc, rtype, RDC_INTERNAL); - if (ret == EINTR) { - ++eintr_count; - delay(2); - } - } while ((ret == EINTR) && (eintr_count < MAX_EINTR_COUNT)); - if (ret != 0) { -#ifdef DEBUG - cmn_err(CE_NOTE, "!rdc_writemaxfba: reserve devs " - "failed %d", ret); -#endif - goto out; - - } - reserved = 1; - /* - * Perhaps we should cache mfba. - */ - ret = nsc_maxfbas(RDC_U_FD(krdc), 0, &mfba); - if (ret != 0) { -#ifdef DEBUG - cmn_err(CE_NOTE, "!rdc_writemaxfba: msc_maxfbas failed %d", - ret); -#endif - goto out; - } - - ASSERT(urdc->volume_size != 0); - if (dset->pos + len > urdc->volume_size) { - /* should never happen */ - /* - * also need to trim down the vector - * sizes. - */ - kstat_len = len = urdc->volume_size - dset->pos; - dset->head->len -= FBA_SIZE(len); - ASSERT(dset->head->len > 0); - } - daddr = dset->head->dptr; - pos = dset->pos; - vector[1].sv_addr = NULL; - vector[1].sv_len = 0; - - while (len > 0) { - wsize = min((nsc_size_t)len, mfba); - vector[0].sv_addr = daddr; - vector[0].sv_len = FBA_SIZE(wsize); - - if (handle) { - (void) nsc_free_buf(handle); - handle = NULL; - } - ret = nsc_alloc_buf(RDC_U_FD(krdc), pos, wsize, - NSC_WRBUF|NSC_NODATA|nocache, &handle); - if (ret != 0) { -#ifdef DEBUG - cmn_err(CE_NOTE, "!rdc_writemaxfba: " - "nsc_alloc (d1) buf failed %d at " - "pos %" NSC_SZFMT " len %" NSC_SZFMT, - ret, pos, wsize); -#endif - goto out; - } - handle->sb_vec = &vector[0]; - ret = rdc_combywrite(krdc, handle); - if (ret != 0) { -#ifdef DEBUG - cmn_err(CE_NOTE, "!rdc_writemaxfba: " - "write failed (d1) %d offset %" NSC_SZFMT " " - "length %" NSC_SZFMT, ret, pos, wsize); -#endif - goto out; - } - pos += wsize; - len -= wsize; - daddr += FBA_SIZE(wsize); - } -out: - if (!RDC_SUCCESS(ret)) { - if (!(rdc_get_vflags(urdc) & RDC_VOL_FAILED)) { - ASSERT(!(rdc_get_vflags(urdc) & - RDC_PRIMARY)); - rdc_many_enter(krdc); - rdc_set_flags(urdc, RDC_SYNC_NEEDED); - rdc_set_flags_log(urdc, RDC_VOL_FAILED, - "svc write failed"); - rdc_many_exit(krdc); - rdc_write_state(urdc); - } - } else { - /* success */ -#ifdef DEBUG - if (rdc_netwrite6) { - /* - * This string is used in the ZatoIchi MASNDR - * tests, if you change this, update the test. - */ - cmn_err(CE_NOTE, "!writemaxfba: Write " - "sequence %u", seq); - } -#endif - if (krdc->io_kstats) { - KSTAT_IO_PTR(krdc->io_kstats)->writes++; - KSTAT_IO_PTR(krdc->io_kstats)->nwritten += - FBA_SIZE(kstat_len); - } - } - if (handle) - (void) nsc_free_buf(handle); - if (reserved) - _rdc_rlse_devs(krdc, rtype); - return (ret); -} - -static int -rdc_combywrite(rdc_k_info_t *krdc, nsc_buf_t *handle) -{ - int rsync; - int ret; - int multiret; - - rsync = -1; - ret = 0; - /* Handle multihop I/O even on error */ - if (IS_MULTI(krdc)) { - rdc_k_info_t *ktmp; - rdc_u_info_t *utmp; - - rdc_many_enter(krdc); - /* - * Find a target primary that is enabled, - * taking account of the fact that this - * could be a multihop secondary - * connected to a 1-to-many primary. - */ - ktmp = krdc->multi_next; - if (ktmp == NULL) { - rdc_many_exit(krdc); - goto multi_done; - } - utmp = &rdc_u_info[ktmp->index]; - do { - if ((rdc_get_vflags(utmp) & RDC_PRIMARY) - /* CSTYLED */ - && IS_ENABLED(utmp)) - break; - - ktmp = ktmp->many_next; - utmp = &rdc_u_info[ktmp->index]; - } while (ktmp != krdc->multi_next); - - if (!(rdc_get_vflags(utmp) & RDC_PRIMARY) || - !IS_ENABLED(utmp)) { - rdc_many_exit(krdc); - goto multi_done; - } - - rdc_many_exit(krdc); - rsync = (rdc_get_mflags(utmp) & RDC_SLAVE); - if (!rsync) { - /* normal case - local io first */ - ret = nsc_write(handle, handle->sb_pos, handle->sb_len, - 0); - } - multiret = _rdc_multi_write(handle, handle->sb_pos, - handle->sb_len, 0, ktmp); - if (!RDC_SUCCESS(multiret)) { -#ifdef DEBUG - cmn_err(CE_NOTE, "!combywrite: " - "rdc_multi_write failed " - "status %d ret %d", - handle->sb_error, multiret); -#endif - if (!(rdc_get_vflags(utmp) & - RDC_VOL_FAILED)) { - rdc_many_enter(ktmp); - if (rdc_get_vflags(utmp) & - RDC_PRIMARY) { - rdc_set_mflags(utmp, - RDC_RSYNC_NEEDED); - } else { - rdc_set_flags(utmp, - RDC_SYNC_NEEDED); - } - rdc_set_flags(utmp, - RDC_VOL_FAILED); - rdc_many_exit(ktmp); - rdc_write_state(utmp); - } - } - } - -multi_done: - if (rsync != 0) { - /* - * Either: - * reverse sync in progress and so we - * need to do the local io after the - * (multihop) secondary io. - * Or: - * no multihop and this is the only io - * required. - */ - ret = nsc_write(handle, handle->sb_pos, handle->sb_len, 0); - - } - return (ret); -} -/* - * set the pos and len values in the piggyback reply. - */ -static void -rdc_setbitind(int *pendcnt, net_pendvec_t *pvec, rdc_net_dataset_t *dset, - uint_t seq, int pindex, int qpos) -{ - int pc; - ASSERT(*pendcnt < RDC_MAXPENDQ); - - pc = *pendcnt; - pvec[pc].seq = seq; - pvec[pc].apos = dset->pos; - pvec[pc].qpos = qpos; - pvec[pc].alen = dset->fbalen; - pvec[pc].pindex = pindex; - *pendcnt = pc + 1; - DTRACE_PROBE1(pvec_reply, int, seq); -} - -/* - * Enters with group->ra_queue.net_qlock held. - * Tries to construct the return status data for - * all the pending requests in the sleepq that it can - * satisfy. - */ -static void -rdc_dopending(rdc_group_t *group, netwriteres *netretp) -{ - int pendcnt; - net_pendvec_t *pendvec; - rdc_sleepq_t *sq; - int ret; - int pendsz; - - ASSERT(MUTEX_HELD(&group->ra_queue.net_qlock)); - - pendcnt = 0; - pendsz = RDC_MAXPENDQ * sizeof (net_pendvec_t); - pendvec = kmem_alloc(pendsz, KM_SLEEP); - - /* - * now look at the Q of pending tasks, attempt - * to write any that have been waiting for - * me to complete my write, and piggyback - * their results in my reply, by setiing pendcnt - * to the number of extra requests sucessfully - * processed. - */ - while (group->sleepq && group->sleepq->seq == group->seq) { - rdc_k_info_t *krdc; - rdc_u_info_t *urdc; - struct rdc_net_dataset *dset; - - sq = group->sleepq; - group->sleepq = sq->next; - mutex_exit(&group->ra_queue.net_qlock); - - krdc = &rdc_k_info[sq->sindex]; - urdc = &rdc_u_info[sq->sindex]; - if (krdc->io_kstats) { - mutex_enter(krdc->io_kstats->ks_lock); - kstat_waitq_exit(KSTAT_IO_PTR(krdc->io_kstats)); - mutex_exit(krdc->io_kstats->ks_lock); - } - - dset = rdc_net_get_set(sq->sindex, sq->idx); - if (dset == NULL) { -#ifdef DEBUG - cmn_err(CE_NOTE, "!pending: %s:%s rdc_net_get_set " - "failed", urdc->secondary.intf, - urdc->secondary.file); -#endif - /* - * as we failed to get the pointer, there - * is no point expecting the cleanup - * code in rdc_delsleepq() to get it - * either. - */ - sq->idx = -1; - goto cleansq; - } - sq->idx = -1; /* marked as cleaned up */ - - ret = rdc_writemaxfba(krdc, urdc, dset, sq->seq, sq->nocache); - if (RDC_SUCCESS(ret)) { - rdc_setbitind(&pendcnt, pendvec, dset, - sq->seq, sq->pindex, sq->qpos); - } else { - cmn_err(CE_WARN, "!dopending: Write of pending " - "asynchronous task failed, with " - "sequence number %u for SNDR set %s:%s", - sq->seq, urdc->secondary.intf, - urdc->secondary.file); - } - rdc_net_del_set(sq->sindex, dset); -cleansq: - mutex_enter(&group->ra_queue.net_qlock); - group->seq = sq->seq + 1; - if (group->seq < sq->seq) - group->seq = RDC_NEWSEQ + 1; - rdc_delsleepq(sq); - } - mutex_exit(&group->ra_queue.net_qlock); - if (pendcnt) { - int vecsz; -#ifdef DEBUG - if (rdc_netwrite6) { - cmn_err(CE_NOTE, "!packing pend, count %d", pendcnt); - } -#endif - vecsz = pendcnt * sizeof (net_pendvec_t); - netretp->vecdata.vecdata_val = - kmem_alloc(vecsz, KM_SLEEP); - netretp->vecdata.vecdata_len = pendcnt; - bcopy(pendvec, netretp->vecdata.vecdata_val, vecsz); - } - kmem_free(pendvec, pendsz); - mutex_enter(&group->ra_queue.net_qlock); -} - -/* - * Take the dset and allocate and fill in the vector. - */ -static nsc_vec_t * -rdc_dset2vec(rdc_net_dataset_t *dset) -{ - nsc_vec_t *vecret; - int i; - rdc_net_dataitem_t *ditem; - - ASSERT(dset->nitems > 0); - ASSERT(dset->head); - ASSERT(dset->tail); - - vecret = kmem_alloc((dset->nitems + 1) * sizeof (nsc_vec_t), - KM_NOSLEEP); - if (vecret == NULL) { - return (NULL); - } - RDC_DSMEMUSE((dset->nitems + 1) * sizeof (nsc_vec_t)); - ditem = dset->head; - for (i = 0; i < dset->nitems; i++) { - ASSERT(ditem); - vecret[i].sv_addr = ditem->dptr; - vecret[i].sv_len = ditem->len; - ditem = ditem->next; - } - /* - * Null terminate. - */ - vecret[i].sv_addr = NULL; - vecret[i].sv_len = 0; - /* - * Check the list and count matches. - */ - ASSERT(ditem == NULL); - return (vecret); -} - -/* - * Split the local read into maxfba sized chunks. - * Returns 0 on an error, or a valid idx on success. - */ -static int -rdc_readmaxfba(int cd, nsc_off_t pos, nsc_size_t fbalen, int nocache) -{ - int idx; - rdc_k_info_t *krdc; - rdc_u_info_t *urdc; - rdc_net_dataset_t *dset; - rdc_net_dataitem_t *ditem; - int rtype; - nsc_buf_t *handle; - nsc_vec_t veclist[2]; - int ret; - int reserved; - nsc_size_t fbaleft; - nsc_size_t mfba; - nsc_off_t fba; - nsc_off_t spos; - int eintr_count; - - handle = NULL; - idx = 0; /* error status */ - dset = NULL; - ditem = NULL; - reserved = 0; - ret = 0; - mfba = 0; - - rtype = RDC_RAW; - krdc = &rdc_k_info[cd]; - urdc = &rdc_u_info[cd]; - - eintr_count = 0; - do { - ret = _rdc_rsrv_devs(krdc, rtype, RDC_INTERNAL); - if (ret == EINTR) { - ++eintr_count; - delay(2); - } - } while ((ret == EINTR) && (eintr_count < MAX_EINTR_COUNT)); - if (ret != 0) { -#ifdef DEBUG - cmn_err(CE_NOTE, "!readmaxfba: reserve failed on set %s:%s %d", - urdc->secondary.intf, urdc->secondary.file, - ret); -#endif - goto out; - } - reserved = 1; - /* - * create a dataset that we can hang all the buffers from. - */ - dset = rdc_net_add_set(cd); - if (dset == NULL) { -#ifdef DEBUG - cmn_err(CE_NOTE, "!readmaxfba: Unable to allocate dset on set " - "%s:%s", urdc->secondary.intf, urdc->secondary.file); -#endif - goto out; - } - dset->pos = pos; - dset->fbalen = fbalen; - ret = nsc_maxfbas(RDC_U_FD(krdc), 0, &mfba); - if (ret != 0) { -#ifdef DEBUG - cmn_err(CE_NOTE, "!readmaxfba: msc_maxfbas failed on set %s:%s " - "%d", urdc->secondary.intf, urdc->secondary.file, ret); -#endif - goto out; - } - spos = pos; - fbaleft = fbalen; - veclist[1].sv_addr = NULL; - veclist[1].sv_len = 0; - - while (fbaleft > 0) { - fba = min(mfba, fbaleft); - if (handle) { - (void) nsc_free_buf(handle); - handle = NULL; - } - ret = nsc_alloc_buf(RDC_U_FD(krdc), spos, fba, - nocache|NSC_NODATA, &handle); - if (ret != 0) { -#ifdef DEBUG - cmn_err(CE_NOTE, "!readmaxfba: alloc failed on set" - "%s:%s %d", urdc->secondary.intf, - urdc->secondary.file, ret); -#endif - goto out; - } - ditem = kmem_alloc(sizeof (rdc_net_dataitem_t), KM_NOSLEEP); - if (ditem == NULL) { - goto out; - } - RDC_DSMEMUSE(sizeof (rdc_net_dataitem_t)); - ditem->len = FBA_SIZE(fba); - ditem->mlen = ditem->len; - ditem->dptr = kmem_alloc(ditem->len, KM_SLEEP); - RDC_DSMEMUSE(ditem->len); - ditem->next = NULL; - /* - * construct a vector list - */ - veclist[0].sv_addr = ditem->dptr; - veclist[0].sv_len = ditem->len; - handle->sb_vec = veclist; - ret = rdc_combyread(krdc, urdc, handle); - if (ret != 0) { - goto out; - } - /* - * place on linked list. - */ - dset->nitems++; - if (dset->head == NULL) { - dset->head = ditem; - dset->tail = ditem; - } else { - dset->tail->next = ditem; - dset->tail = ditem; - } - /* - * now its linked, clear this so its not freed twice. - */ - ditem = NULL; - fbaleft -= fba; - spos += fba; - } - /* - * all the reads have worked, store the results. - */ - idx = dset->id; - rdc_net_put_set(cd, dset); - dset = NULL; -out: - if (handle) - (void) nsc_free_buf(handle); - if (reserved) - _rdc_rlse_devs(krdc, rtype); - if (dset) - rdc_net_del_set(cd, dset); - if (ditem) { - kmem_free(ditem->dptr, ditem->mlen); - RDC_DSMEMUSE(-ditem->mlen); - kmem_free(ditem, sizeof (*ditem)); - RDC_DSMEMUSE(-sizeof (*ditem)); - } - return (idx); -} - - -/* - * perform both a local read, and if multihop, a remote read. - * return 0 on success, or errno on failure. - */ -static int -rdc_combyread(rdc_k_info_t *krdc, rdc_u_info_t *urdc, nsc_buf_t *handle) -{ - int ret; - rdc_k_info_t *ktmp; - rdc_u_info_t *utmp; - - /* - * read it. - */ - if (krdc->io_kstats) { - mutex_enter(krdc->io_kstats->ks_lock); - kstat_runq_enter(KSTAT_IO_PTR(krdc->io_kstats)); - mutex_exit(krdc->io_kstats->ks_lock); - } - - ret = nsc_read(handle, handle->sb_pos, handle->sb_len, NSC_READ); - - if (krdc->io_kstats) { - mutex_enter(krdc->io_kstats->ks_lock); - kstat_runq_exit(KSTAT_IO_PTR(krdc->io_kstats)); - mutex_exit(krdc->io_kstats->ks_lock); - } - - if (ret != 0) { -#ifdef DEBUG - cmn_err(CE_NOTE, "!combyread: read failed on set %s:%s %d", - urdc->secondary.intf, urdc->secondary.file, ret); -#endif - if (!(rdc_get_vflags(urdc) & RDC_VOL_FAILED)) { - rdc_many_enter(krdc); - rdc_set_mflags(urdc, RDC_RSYNC_NEEDED); - rdc_set_flags_log(urdc, RDC_VOL_FAILED, - "comby read failed"); - rdc_many_exit(krdc); - rdc_write_state(urdc); - } - goto out; - } - if (IS_MULTI(krdc) && (ktmp = krdc->multi_next) && - (utmp = &rdc_u_info[ktmp->index]) && - IS_ENABLED(utmp) && - (rdc_get_mflags(utmp) & RDC_RSYNC_NEEDED)) { - ret = _rdc_remote_read(ktmp, handle, handle->sb_pos, - handle->sb_len, NSC_READ); - /* - * Set NSC_MIXED so - * that the cache will throw away this - * buffer when we free it since we have - * combined data from multiple sources - * into a single buffer. - * Currently we don't use the cache for - * data volumes, so comment this out. - * handle->sb_flag |= NSC_MIXED; - */ - if (ret != 0) { -#ifdef DEBUG - cmn_err(CE_NOTE, "!combyread: remote read failed on " - "set %s:%s %d", utmp->secondary.intf, - utmp->secondary.file, ret); -#endif - goto out; - } - } - if (krdc->io_kstats) { - KSTAT_IO_PTR(krdc->io_kstats)->reads++; - KSTAT_IO_PTR(krdc->io_kstats)->nread += - FBA_SIZE(handle->sb_len); - } -out: - return (ret); -} - - -/* - * remove and free all the collected dsets for this set. - */ -void -rdc_dump_dsets(int index) -{ - rdc_k_info_t *krdc; - rdc_net_dataset_t *dset; - - krdc = &rdc_k_info[index]; -tloop: - mutex_enter(&krdc->dc_sleep); - while ((dset = krdc->net_dataset) != NULL) { - if (dset->inuse) { - /* - * for the dset to be in use, the - * service routine r_net_write6() must - * be active with it. It will free - * it eventually. - */ - mutex_exit(&krdc->dc_sleep); - delay(5); - goto tloop; - } - /* - * free it. - */ - rdc_net_free_set(krdc, dset); - } - mutex_exit(&krdc->dc_sleep); -} - -#ifdef DEBUG -void -rdc_stallzero(int flag) -{ - static int init = 0; - static kcondvar_t cv; - static kmutex_t mu; - - if (init == 0) { - cv_init(&cv, NULL, CV_DRIVER, NULL); - mutex_init(&mu, NULL, MUTEX_DRIVER, NULL); - init = 1; - } - - mutex_enter(&mu); - switch (flag) { - case 0: - rdc_stall0 = 0; - cv_signal(&cv); - break; - case 1: - rdc_stall0 = 1; - break; - case 2: - while (rdc_stall0 == 1) - cv_wait(&cv, &mu); - break; - default: - cmn_err(CE_PANIC, "Bad flag value passed to rdc_stallzero"); - break; - } - mutex_exit(&mu); -} -#endif - -/* - * RDC protocol version 5 - */ -static rdc_disptab_t rdc_disptab5[] = -{ - /* PROC Idempotent */ - { r_net_null, FALSE }, - { rdcsrv_noproc, FALSE }, - { r_net_getsize, FALSE }, - { rdcsrv_noproc, FALSE }, - { r_net_write5, TRUE }, - { r_net_read, FALSE }, - { rdcsrv_noproc, FALSE }, - { r_net_state4, FALSE }, - { r_net_ping4, FALSE }, - { r_net_bmap, FALSE }, - { r_net_bdata, FALSE }, - { rdcsrv_noproc, FALSE }, - { r_net_getstate4, FALSE } -}; - -/* - * RDC protocol version 6 - */ -static rdc_disptab_t rdc_disptab6[] = -{ - /* PROC Idempotent */ - { r_net_null, FALSE }, - { rdcsrv_noproc, FALSE }, - { r_net_getsize6, FALSE }, - { rdcsrv_noproc, FALSE }, - { r_net_write6, TRUE }, - { r_net_read6, FALSE }, - { rdcsrv_noproc, FALSE }, - { r_net_state4, FALSE }, - { r_net_ping4, FALSE }, - { r_net_bmap6, FALSE }, - { r_net_bdata6, FALSE }, - { rdcsrv_noproc, FALSE }, - { r_net_getstate4, FALSE } -}; - -/* - * RDC protocol version 7 - */ -static rdc_disptab_t rdc_disptab7[] = -{ - /* PROC Idempotent */ - { r_net_null, FALSE }, - { rdcsrv_noproc, FALSE }, - { r_net_getsize6, FALSE }, - { rdcsrv_noproc, FALSE }, - { r_net_write6, TRUE }, - { r_net_read6, FALSE }, - { rdcsrv_noproc, FALSE }, - { r_net_state, FALSE }, - { r_net_ping7, FALSE }, - { r_net_bmap6, FALSE }, - { r_net_bdata6, FALSE }, - { rdcsrv_noproc, FALSE }, - { r_net_getstate7, FALSE } -}; - -static rdcsrv_t rdc_srvtab[] = { - { rdc_disptab5, sizeof (rdc_disptab5) / sizeof (*rdc_disptab5) }, - { rdc_disptab6, sizeof (rdc_disptab6) / sizeof (*rdc_disptab6) }, - { rdc_disptab7, sizeof (rdc_disptab7) / sizeof (*rdc_disptab7) } -}; diff --git a/usr/src/uts/common/avs/ns/rdc/rdc_update.h b/usr/src/uts/common/avs/ns/rdc/rdc_update.h deleted file mode 100644 index 438ff657d2..0000000000 --- a/usr/src/uts/common/avs/ns/rdc/rdc_update.h +++ /dev/null @@ -1,50 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _RDC_UPDATE_H -#define _RDC_UPDATE_H - -#ifdef __cplusplus -extern "C" { -#endif - -typedef struct rdc_update_s { - spcs_s_info_t status; - int protocol; /* semantics of update svc */ - char *volume; /* volume name */ - uchar_t *bitmap; /* set of changes to be made */ - int size; /* size of bitmap in bytes */ - int denied; /* don't do it? */ -} rdc_update_t; - - /* semantics of update svc call */ -#define RDC_SVC_ONRETURN 0 /* caller will update on return */ -#define RDC_SVC_VOL_ENABLED 1 /* tell me if a given vol is enabled */ - -#ifdef __cplusplus -} -#endif - -#endif /* _RDC_UPDATE_H */ diff --git a/usr/src/uts/common/avs/ns/rdc/rdcsrv.c b/usr/src/uts/common/avs/ns/rdc/rdcsrv.c deleted file mode 100644 index 731fce8728..0000000000 --- a/usr/src/uts/common/avs/ns/rdc/rdcsrv.c +++ /dev/null @@ -1,447 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#include <sys/types.h> -#include <sys/ksynch.h> -#include <sys/cmn_err.h> -#include <sys/kmem.h> -#include <sys/stat.h> -#include <sys/file.h> -#include <sys/cred.h> -#include <sys/conf.h> -#include <sys/modctl.h> -#include <sys/errno.h> - -#include <sys/unistat/spcs_s.h> -#include <sys/unistat/spcs_s_k.h> -#include <sys/unistat/spcs_errors.h> - -#ifdef _SunOS_2_6 -/* - * on 2.6 both dki_lock.h and rpc/types.h define bool_t so we - * define enum_t here as it is all we need from rpc/types.h - * anyway and make it look like we included it. Yuck. - */ -#define _RPC_TYPES_H -typedef int enum_t; -#else -#ifndef DS_DDICT -#include <rpc/types.h> -#endif -#endif /* _SunOS_2_6 */ - -#ifndef DS_DDICT -#include <rpc/auth.h> -#include <rpc/svc.h> -#include <rpc/xdr.h> -#else -#include "../contract.h" -#endif - -#include <sys/ddi.h> - -#include <sys/nsc_thread.h> -#include <sys/nsctl/nsctl.h> - -#include <sys/nsctl/nsvers.h> - -#include "rdc_io.h" -#include "rdc_stub.h" -#include "rdc_ioctl.h" -#include "rdcsrv.h" - -#if defined(_SunOS_5_6) || defined(_SunOS_5_7) -static void rdcsrv_xprtclose(const SVCXPRT *xprt); -#else /* SunOS 5.8 or later */ -/* - * SunOS 5.8 or later. - * - * RDC callout table - * - * This table is used by svc_getreq to dispatch a request with a given - * prog/vers pair to an approriate service provider. - */ - -static SVC_CALLOUT rdcsrv_sc[] = { - { RDC_PROGRAM, RDC_VERS_MIN, RDC_VERS_MAX, rdcstub_dispatch } -}; - -static SVC_CALLOUT_TABLE rdcsrv_sct = { - sizeof (rdcsrv_sc) / sizeof (rdcsrv_sc[0]), FALSE, rdcsrv_sc -}; -#endif /* SunOS 5.8 or later */ - -static kmutex_t rdcsrv_lock; - -static int rdcsrv_dup_error; -static int rdcsrv_registered; -static int rdcsrv_closing; -static int rdcsrv_refcnt; -long rdc_svc_count = 0; -static rdcsrv_t *rdcsrv_disptab; - -/* - * Solaris module setup. - */ - -extern struct mod_ops mod_miscops; - -static struct modlmisc modlmisc = { - &mod_miscops, /* Type of module */ - "nws:Remote Mirror kRPC:" ISS_VERSION_STR -}; - -static struct modlinkage modlinkage = { - MODREV_1, - &modlmisc, - NULL -}; - - -int -_init(void) -{ - int rc; - - mutex_init(&rdcsrv_lock, NULL, MUTEX_DRIVER, NULL); - - if ((rc = mod_install(&modlinkage)) != DDI_SUCCESS) - mutex_destroy(&rdcsrv_lock); - - return (rc); -} - - -int -_fini(void) -{ - int rc; - - if ((rc = mod_remove(&modlinkage)) == DDI_SUCCESS) - mutex_destroy(&rdcsrv_lock); - - return (rc); -} - - -int -_info(struct modinfo *modinfop) -{ - return (mod_info(&modlinkage, modinfop)); -} - - -/* - * RDC kRPC server stub. - */ - -void -rdcsrv_noproc(void) -{ - ; -} - - -static int -rdcsrv_dispdup(struct svc_req *req, SVCXPRT *xprt) -{ - rdc_disptab_t *disp; - struct dupreq *dr; - rdcsrv_t *srvp; - void (*fn)(); - int dupstat; - - srvp = &rdcsrv_disptab[req->rq_vers - RDC_VERS_MIN]; - disp = &srvp->disptab[req->rq_proc]; - fn = disp->dispfn; - - dupstat = SVC_DUP(xprt, req, 0, 0, &dr); - - switch (dupstat) { - case DUP_ERROR: - /* svcerr_systemerr does a freeargs */ - svcerr_systemerr(xprt); - rdcsrv_dup_error++; - break; - - case DUP_INPROGRESS: - rdcsrv_dup_error++; - break; - - case DUP_NEW: - case DUP_DROP: - (*fn)(xprt, req); - SVC_DUPDONE(xprt, dr, 0, 0, DUP_DONE); - break; - - case DUP_DONE: - break; - } - - return (dupstat); -} - - -/* - * rdcsrv_dispatch is the dispatcher routine for the RDC RPC protocol - */ -void -rdcsrv_dispatch(struct svc_req *req, SVCXPRT *xprt) -{ - rdc_disptab_t *disp; - rdcsrv_t *srvp; - - mutex_enter(&rdcsrv_lock); - rdcsrv_refcnt++; - - if (!rdcsrv_registered || rdcsrv_closing || !rdcsrv_disptab) { - mutex_exit(&rdcsrv_lock); - goto outdisp; - } - - mutex_exit(&rdcsrv_lock); - - if ((req->rq_vers < RDC_VERS_MIN) || (req->rq_vers > RDC_VERS_MAX)) { - svcerr_noproc(xprt); - cmn_err(CE_NOTE, "!rdcsrv_dispatch: unknown version %d", - req->rq_vers); - /* svcerr_noproc does a freeargs on xprt */ - goto done; - } - - srvp = &rdcsrv_disptab[req->rq_vers - RDC_VERS_MIN]; - disp = &srvp->disptab[req->rq_proc]; - - if (req->rq_proc >= srvp->nprocs || - disp->dispfn == rdcsrv_noproc) { - svcerr_noproc(xprt); - cmn_err(CE_NOTE, "!rdcsrv_dispatch: bad proc number %d", - req->rq_proc); - /* svcerr_noproc does a freeargs on xprt */ - goto done; - } else if (disp->clone) { - switch (rdcsrv_dispdup(req, xprt)) { - case DUP_ERROR: - goto done; - /* NOTREACHED */ - case DUP_INPROGRESS: - goto outdisp; - /* NOTREACHED */ - default: - break; - } - } else { - (*disp->dispfn)(xprt, req); - rdc_svc_count++; - } - -outdisp: - if (!SVC_FREEARGS(xprt, (xdrproc_t)0, (caddr_t)0)) - cmn_err(CE_NOTE, "!rdcsrv_dispatch: bad freeargs"); -done: - mutex_enter(&rdcsrv_lock); - rdcsrv_refcnt--; - mutex_exit(&rdcsrv_lock); -} - - -static int -rdcsrv_create(file_t *fp, rdc_svc_args_t *args, int mode) -{ - /*LINTED*/ - int rc, error = 0; - /*LINTED*/ - rpcvers_t vers; - struct netbuf addrmask; - -#if defined(_SunOS_5_6) || defined(_SunOS_5_7) - SVCXPRT *xprt; -#else - SVCMASTERXPRT *xprt; -#endif - STRUCT_HANDLE(rdc_svc_args, uap); - - STRUCT_SET_HANDLE(uap, mode, args); - - addrmask.len = STRUCT_FGET(uap, addrmask.len); - addrmask.maxlen = STRUCT_FGET(uap, addrmask.maxlen); - addrmask.buf = kmem_alloc(addrmask.maxlen, KM_SLEEP); - error = ddi_copyin(STRUCT_FGETP(uap, addrmask.buf), addrmask.buf, - addrmask.len, mode); - if (error) { - kmem_free(addrmask.buf, addrmask.maxlen); -#ifdef DEBUG - cmn_err(CE_WARN, "!addrmask copyin failed %p", (void *) args); -#endif - return (error); - } - - /* - * Set rdcstub's dispatch handle to rdcsrv_dispatch - */ - rdcstub_set_dispatch(rdcsrv_dispatch); - - /* - * Create a transport endpoint and create one kernel thread to run the - * rdc service loop - */ -#if defined(_SunOS_5_6) || defined(_SunOS_5_7) - error = svc_tli_kcreate(fp, RDC_RPC_MAX, - STRUCT_FGETP(uap, netid), &addrmask, STRUCT_FGET(uap, nthr), &xprt); -#else - { -#if defined(_SunOS_5_8) - struct svcpool_args p; - p.id = RDC_SVCPOOL_ID; - p.maxthreads = STRUCT_FGET(uap, nthr); - p.redline = 0; - p.qsize = 0; - p.timeout = 0; - p.stksize = 0; - p.max_same_xprt = 0; - - error = svc_pool_create(&p); - if (error) { - cmn_err(CE_NOTE, - "!rdcsrv_create: svc_pool_create failed %d", error); - return (error); - } -#endif - error = svc_tli_kcreate(fp, RDC_RPC_MAX, - STRUCT_FGETP(uap, netid), &addrmask, - &xprt, &rdcsrv_sct, NULL, RDC_SVCPOOL_ID, FALSE); - } -#endif - - if (error) { - cmn_err(CE_NOTE, "!rdcsrv_create: svc_tli_kcreate failed %d", - error); - return (error); - } - -#if defined(_SunOS_5_6) || defined(_SunOS_5_7) - if (xprt == NULL) { - cmn_err(CE_NOTE, "!xprt in rdcsrv_create is NULL"); - } else { - /* - * Register a cleanup routine in case the transport gets - * destroyed. If the registration fails for some reason, - * it means that the transport is already being destroyed. - * This shouldn't happen, but it's probably not worth a - * panic. - */ - if (!svc_control(xprt, SVCSET_CLOSEPROC, - (void *)rdcsrv_xprtclose)) { - cmn_err( -#ifdef DEBUG - CE_PANIC, -#else - CE_WARN, -#endif - "!rdcsrv_create: couldn't set xprt callback"); - - error = EBADF; - goto done; - } - } - - for (vers = RDC_VERS_MIN; vers <= RDC_VERS_MAX; vers++) { - rc = svc_register(xprt, (ulong_t)RDC_PROGRAM, vers, - rdcstub_dispatch, 0); - if (!rc) { - cmn_err(CE_NOTE, - "!rdcsrv_create: svc_register(%d, %lu) failed", - RDC_PROGRAM, vers); - - if (!error) { - error = EBADF; - } - } - } -#endif /* 5.6 or 5.7 */ - - if (!error) { - /* mark as registered with the kRPC subsystem */ - rdcsrv_registered = 1; - } - -done: - return (error); -} - - -#if defined(_SunOS_5_6) || defined(_SunOS_5_7) -/* - * Callback routine for when a transport is closed. - */ -static void -rdcsrv_xprtclose(const SVCXPRT *xprt) -{ -} -#endif - - -/* - * Private interface from the main RDC module. - */ - -int -rdcsrv_load(file_t *fp, rdcsrv_t *disptab, rdc_svc_args_t *args, int mode) -{ - int rc = 0; - - mutex_enter(&rdcsrv_lock); - - rc = rdcsrv_create(fp, args, mode); - if (rc == 0) { - rdcsrv_disptab = disptab; - } - - mutex_exit(&rdcsrv_lock); - return (rc); -} - - -void -rdcsrv_unload(void) -{ - mutex_enter(&rdcsrv_lock); - - /* Unset rdcstub's dispatch handle */ - rdcstub_unset_dispatch(); - - rdcsrv_closing = 1; - - while (rdcsrv_refcnt > 0) { - mutex_exit(&rdcsrv_lock); - delay(drv_usectohz(25)); - mutex_enter(&rdcsrv_lock); - } - - rdcsrv_closing = 0; - rdcsrv_disptab = 0; - - mutex_exit(&rdcsrv_lock); -} diff --git a/usr/src/uts/common/avs/ns/rdc/rdcsrv.h b/usr/src/uts/common/avs/ns/rdc/rdcsrv.h deleted file mode 100644 index cd1fc88906..0000000000 --- a/usr/src/uts/common/avs/ns/rdc/rdcsrv.h +++ /dev/null @@ -1,56 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _RDCSRV_H -#define _RDCSRV_H - -#ifdef __cplusplus -extern "C" { -#endif - -#ifdef _KERNEL - - -typedef struct rdc_disptab_s { - void (*dispfn)(); - int clone; -} rdc_disptab_t; - -typedef struct rdcsrv_s { - rdc_disptab_t *disptab; - int nprocs; -} rdcsrv_t; - -extern void rdcsrv_noproc(void); -extern void rdcsrv_unload(void); -extern int rdcsrv_load(file_t *, rdcsrv_t *, rdc_svc_args_t *, int); - -#endif /* _KERNEL */ - -#ifdef __cplusplus -} -#endif - -#endif /* _RDCSRV_H */ diff --git a/usr/src/uts/common/avs/ns/sdbc/CACHE_SPEC.txt b/usr/src/uts/common/avs/ns/sdbc/CACHE_SPEC.txt deleted file mode 100644 index 1769251955..0000000000 --- a/usr/src/uts/common/avs/ns/sdbc/CACHE_SPEC.txt +++ /dev/null @@ -1,389 +0,0 @@ -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License (the "License"). -# You may not use this file except in compliance with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# Copyright 2008 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# -# $Id: CACHE_SPEC,v 3.6.0.0 1998/01/05 22:55:19 idumois Exp $ -# - - "sd" cache layer - ---------------- -#include <sys/sd/sd.h> - -The "sd" layer provides a common interface to the functionality -described below. It will also allow switching to a direct to disk -version, so that a new cache module could be loaded. -The functions are basically the same as those below, -but named without the leading underscore. -(ie sd_alloc_buf instead of _sd_alloc_buf) - - - "sdbc" -- storage device block cache (aka blkc) - ----------------------------------------------- - -#include "uts/sd/sdbc/sd_cache.h" /* for SDBC interface */ -#include "sys/sd/sd.h" /* for generic SD interface */ - -(all interaction is in terms of the buf_handle. - -Currently buf_handle is declared as: - -#define _SD_MAX_BLKS 64 -#define _SD_MAX_FBAS (_SD_MAX_BLKS << FBA_SHFT) - -typedef struct _sd_buf_handle { - int bh_cd; /* actually bh_buf.sb_cd */ - int bh_fba_pos; /* bh_buf.sb_pos */ - int bh_fba_len; /* bh_buf.sb_len */ - int bh_flag; /* bh_buf.sb_flag */ - int bh_error; /* bh_buf.sb_error */ - _sd_vec_t bh_bufvec[_SD_MAX_BLKS]; /* bh_buf.sb_vec */ - void (*bh_disconnect_cb)(); - void (*bh_read_cb)(); - void (*bh_write_cb)(); - ...... -} _sd_buf_handle_t; - - -typedef struct sd_vec_s { /* Scatter gather element */ - unsigned char *sv_addr; /* Virtual address of data */ - unsigned int sv_vme; /* VME address of data */ - int sv_len; /* Data length in bytes */ -} sd_vec_t; - -The upper level routines should reference only: handle->bh_error, -handle->bh_bufvec The bh_bufvec is an array of _sd_vec_t with the -last item in the array having a NULL bufaddr. - -IMPORTANT: The handle should be treated read-only and never be modified. - - 1) Multiple accesses to a single file will be supported. - (Side effect: If a process owning cache blocks of a files attempts - to allocate overlapping cache blocks, it will be a - deadlock condition.) - - 2) Multiple writes to an allocated block will be supported. It - is no longer necessary to free and re-allocate between writes. - - 3) _SD_NOBLOCK is equivalent of async_io -- the io will be initiated - if required with the call returning _SD_PENDING. A callback - (read or write) will be called at io end action. - - 4) Disconnect hints to ckd will be provided by the use of - either psema or thread_bind() when io needs to be initiated. - - -NOTE: - fba_pos = disk block number, each block being 512 bytes. - fba_len = len in disk blocks, each block being 512 bytes. - Thus, 512 bytes = 1 fba_len, 1024 = 2 fba_len etc... - -Hints: - _SD_WRTHRU: write through mode. - This hint can be set on a node, a device or per access. - _SD_FORCED_WRTHRU: forced write through (node down or flow control) - If this hint is cleared, when only one node is up, - _sd_uncommit() will not work properly, and a second - failure could result in lost data. - This is a node hint. - _SD_NOCACHE: reuse cache blocks immediately instead of keeping - in lru order. - This hint can be set on a device or per access. - -Interface: - -_sd_buf_handle_t * -_sd_alloc_handle(discon_cb, read_cb, write_cb) - void (*discon_cb)(); - void (*read_cb)(); - void (*write_cb)(); - - The callbacks can be NULL if you do not want any callbacks. - Else, the callbacks will be stored in the handle, and will be - called at specific points in the cache. (Its up to the - callback to do what is necessary, including disconnecting - from the channel) - - Usage: for better performance, an application could allocate - a handle (or as many handles as is required) upfront and - use it later on in the cache calls. - - Not allocating and managing the handles would mean a new - handle will be allocated and freed during _sd_alloc_buf - and _sd_freebuf. - -int -_sd_free_handle(handle) - _sd_buf_handle_t *handle; - - Only handles that are allocated through _sd_alloc_handle - should be freed with this call. - -int -_sd_alloc_buf (cd, fba_pos, fba_len, flag, handle_p) - int cd; - int fba_pos; - int fba_len; - int flag; - _sd_buf_handle_t **handle_p; - - cd = cache descriptor. Results in an error if this node does - not own this disk and the other node has not crashed. - (ie. requests must be routed to the correct node) - (see fault tolerant aspects discussed elsewhere) - - fba_pos = disk position in multiples of 512 byte blocks. - fba_len = length in multiples of 512 bytes blocks. - (NOTE: This cannot exceed _SD_MAX_FBAS) - - flag = None, one or more of the following (described below): - _SD_RDBUF | SD_WRBUF | _SD_RDWRBUF | _SD_PINNABLE | - _SD_NOBLOCK | _SD_NOCACHE | _SD_WRTHRU - - handle_p = (*handle_p = handle to be used for this call) - If *handle_p == NULL, a new handle will be - allocated. _sd_free_buf will free up any handles - allocated in this fashion. - NOTE: Handles allocated in this fashion will not have - any callbacks registered in them. As such, - _SD_NOBLOCK flag along with a NULL handle would - result in the io being lost. - - return: Error number if > 0 - possible errors: - EINVAL if arguments are incorrect or - cache not initialized or - device not open. - E2BIG if this request is a read and such a large - request cannot be currently satisfied. (break up - the io or re-issue at a later point) - EIO or any other errno that the driver might return. - Note: on error, the handle is not active, - and also is freed if *handle_p was NULL. - - if 0 or less, status will be one of: - _SD_DONE: buffer is ready, and ready to be used. - (with the blocks valid if _SD_RDBUF is set) - _SD_PENDING: - read callback, if one has been registered in the handle, - will be called to complete this request. - _SD_HIT: Same as _SD_DONE, read was satisfied by cache, - or no blocking required for write buffer. - - Note: _SD_RDBUF will issue the read if necessary. - _SD_WRBUF allocates a network address to reflect to - mirror node on _sd_write(). - ~_SD_RDBUF allocates buffers but does NOT pre-read; - use _sd_read() to fill in (portions) as req'd. - - Note: flag == (_SD_RDBUF|_SD_WRTHRU|_SD_NOCACHE) will - clear valid bits (that are not dirty) thus read direct - from disk, without requiring a hash invalidate. - - -int -_sd_write (handle, fba_pos, fba_len, flag) - _sd_buf_handle_t *handle; - int fba_pos, fba_len; - int flag; -{ - handle = handle previously allocated in allocate buf. - fba_pos and fba_len have to be within the allocated portion. - int flag. Flag: _SD_NOBLOCK | SD_WRTHRU - - Attempting to write to a handle that was not allocated for write - will return error (EINVAL) - - returns: errno if return > 0 - if 0 or less, return will be one of: - _SD_PENDING: will be returned only if _SD_NOBLOCK is set AND - either the flag is _SD_WRTHRU or the other node is down, - or the device/node is in write through mode - _SD_DONE: is returned if the block has been written to the disk. - _SD_HIT: write block in cache.. - -int -_sd_read (handle, fba_pos, fba_len, flag) - _sd_buf_handle_t *handle; - int fba_pos, fba_len; - int flag; - - handle = handle previously allocated in allocate buf. - fba_pos and fba_len have to be within the allocated portion. - int flag. Flag: _SD_NOBLOCK - - returns: errno if return > 0 - error E2BIG if this request is big and cannot be currently - satisfied. (break up the io or re-issue at a later point) - - if 0 or less, return will be one of: - _SD_PENDING: will be returned only if _SD_NOBLOCK is set and - we need to do an io. - _SD_HIT: is returned if the blocks were satisfied by cache. - _SD_DONE: some blocks were read from disk. - -int -_sd_uncommit(handle, fba_pos, fba_len, flag) - _sd_buf_handle_t *handle; - int fba_pos, fba_len; - int flag; - - handle = handle previously allocated in allocate buf. - fba_pos and fba_len have to be within the allocated portion. - flag: reserved for future use. - - _sd_uncommit could block and cannot be called from a - "non-blocking" context. - (This is under review, from the ckd point of view) - - returns 0 (_SD_DONE) else errno; - - -int -_sd_zero (handle, fba_pos, fba_len, flag) - _sd_buf_handle_t *handle; - int fba_pos, fba_len; - int flag; - - handle = handle previously allocated in allocate buf. - fba_pos and fba_len have to be within the allocated portion. - zero the buffer described by the handle. - flag: _SD_NOBLOCK | _SD_WRTHRU - - The call commits data to disk. - This call has characteristics similar to _sd_write. - - returns: errno if return > 0 - if 0 or less, return will be one of: - _SD_DONE - _SD_PENDING - -_sd_copy (handle1, handle2, fba_pos1, fba_pos2, fba_len) - _sd_buf_handle_t *handle1, handle2; - int fba_pos1, fba_pos2, fba_len; - - Copies relevant data from handle1 to handle2. - Useful for mirroring, remote dual copy, backup while open, - in-house tests, etc. - - This call does not commit data to disk - you must explicitly - call _sd_write() on handle2 if that is what you want. - - returns: errno if return > 0: - EIO - if sd module should do a generic bcopy - others - real error (passed to user) - if 0 or less, return will be: - _SD_DONE - sucess - -_sd_free_buf(handle) - _sd_buf_handle_t *handle; - - handle = handle previously allocated in allocate buf. - - returns 0 (_SD_DONE) else errno; - -_sd_open(filename, flag) - char *filename; - int flag; - - returns a cache descriptor, or negative error number. - Typically use _sd_attach_cd(cd) before accessing the device. - Note: if devices is already open, it returns the same cache descriptor. - Currently there is no reference count; so one _sd_close() closes - the cache descriptor (in all contexts). - -_sd_close(cd) - int cd; - Similar to _sd_detach_cd below. - Note: intended to be called when terminating the cache; and not during - normal operation. No reference count (see above). - Returns: 0 success, EIO. - -_sd_detach_cd(cd) - re-reflect any pinned blocks to the other side, - or wait for writes to flush; and invalidate that device's hash entries, - and relinquish device responsibility. - Returns: 0 success, EIO, EAGAIN. - -_sd_attach_cd(cd) - If device has pinned blocks then scan for and re-pin those blocks - (same idea as "node recovery" process, but per-device); - and assert device responsibility. - -_sd_notify_all_pin(cd) - rescan list of failed blocks and re-issue the pinned callback to - simulation. - - -_sd_register_pinned(func) - void (*func)(); - callback (*func)(cd, fba_pos, fba_len) when disk write fails, - and _SD_PENDING was specified on alloc. - -_sd_register_unpinned(func) - void (*func)(); - callback (*func)(cd, fba_pos, fba_len) when data previously pinned - is successfully written to disk. - -_sd_register_down(func) - void (*func)(); - callback (*func)() when health monitor detects the other node went down. - -_sd_set_hint(cd, hint) -_sd_clear_hint(cd, hint) -_sd_get_cd_hint(cd, &hint) -_sd_set_node_hint(hint) -_sd_clear_node_hint(hint) -_sd_get_node_hint(&hint) - - where hint is _SD_NOCACHE and _SD_WRTHRU. (Write through being synchronous - write and will be the default if the second node dies.) - - _SD_NOCACHE: hint indicating that the current access need not be - cached for later consumption. - - -_sd_discard_pinned(cd, fba_pos, fba_len) - call from ckd into cache, called when data that was earlier - on pinned can be discarded from the cache. - - returns: 0 or error. - (error = EINVAL if the discard could not be done) - -(note: there is an inherent race between the unpinned callback and -_sd_discard_pinned which could put the data on disk in an inconsistent -state) - - -Failover support: - -The Nodedown callback will be called, if one has been registered. This -will happen as soon as the other node has been detected to have gone down, -or when the cache is disabled on the other node. - -The amount of time to for this callback to happen after the node goes down -is not deterministic. - -Access to a mirror node's devices is only valid from the point the -nodedown callback is called till the other node is determined to be back -in operation. - -Access to mirror node's devices while recovery is in progress will -block the access till the recovery is complete. diff --git a/usr/src/uts/common/avs/ns/sdbc/Makefile b/usr/src/uts/common/avs/ns/sdbc/Makefile deleted file mode 100644 index 3ece00b584..0000000000 --- a/usr/src/uts/common/avs/ns/sdbc/Makefile +++ /dev/null @@ -1,55 +0,0 @@ -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License (the "License"). -# You may not use this file except in compliance with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -# Copyright 2008 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# -# include global definitions -include ../../../../../Makefile.master - -HDRS= sd_bcache.h \ - sd_cache.h \ - sd_conf.h \ - sd_hash.h \ - sdbc_ioctl.h \ - sd_pcu.h \ - sd_trace.h \ - safestore.h - -ROOTDIRS= $(ROOT)/usr/include/sys/nsctl - -ROOTHDRS= $(HDRS:%=$(ROOTDIRS)/%) - -CHECKHDRS= $(HDRS:%.h=%.check) - -# install rule -$(ROOTDIRS)/%: % - $(INS.file) - -.KEEP_STATE: - -.PARALLEL: $(CHECKHDRS) - -install_h: $(ROOTDIRS) $(ROOTHDRS) - -$(ROOTDIRS): - $(INS.dir) - -check: $(CHECKHDRS) diff --git a/usr/src/uts/common/avs/ns/sdbc/cache_kstats_readme.txt b/usr/src/uts/common/avs/ns/sdbc/cache_kstats_readme.txt deleted file mode 100644 index 3d73a4559b..0000000000 --- a/usr/src/uts/common/avs/ns/sdbc/cache_kstats_readme.txt +++ /dev/null @@ -1,319 +0,0 @@ -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License (the "License"). -# You may not use this file except in compliance with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -# Copyright 2008 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# -================================================================================ - -TITLE: Kstats Specification for SDBC - -DATE: 10-28-2002 - -AUTHOR: Chris Juhasz (chris.juhasz@sun.com) - -LOCATION: src/uts/common/ns/sdbc/cache_kstats_readme.txt -================================================================================ - -The existing sd_stat cache statistical reporting mechanism has been expanded -with the kstat library reporting mechanism. The existing mechanism will probably -eventually be phased out. In general the statistics have fallen -into two general categories - "global" and "cd." The global stats reflect gross -behavior over all cached volumes, while "cd" stats reflect behavior particular -to each cached volume (or cache descriptor). - -The sdbc module makes use of two types of kstats. For generic statistic -reporting, "regular" kstat_named_t type kstats are used. For timing-specific -reporting, sdbc relies on the kstat_io_t type. - -For more information on kstats, see [1] in the References section. - -1.0 NAMING: -=========== -The names for the sdbc kstats are defined in src/uts/common/ns/sdbc/sd_misc.h - -2.0 REGULAR KSTATS: -=================== -The following are kstats of type kstat_named_t, used to gather generic -statistics. - -These make use of the original statistics gathering mechanism for sdbc, -_sd_stats_t and _sd_shared_t structs, defined in -src/uts/common/ns/sdbc/sd_bcache.h. The _sd_stats_t structure tracks -statistics that are global to the entire cache, while the _sd_shared_t struct -is used to track statistics particular to a cache descriptor (cd). - -2.1 GLOBAL KSTATS: -~~~~~~~~~~~~~~~~~~ -This global kstat represents statistics which reflect the state of the entire -cache, summed over all cache descriptors. - -2.1.1 Field Definitions: ------------------------- -The "global" kstat corresponds to fields in the _sd_stats_t structure. The -following table maps the name of the kstat field to its equivalent field in -the _sd_stats_t structure, also providing a description where appropriate. - -KSTAT FIELD _sd_stats_t DESCRIPTION ------------ ----------- ----------- -sdbc_count st_count - number of opens for device -sdbc_loc_count st_loc_count - number of open devices -sdbc_rdhits st_rdhits - number of read hits -sdbc_rdmiss st_rdmiss - number of read misses -sdbc_wrhits st_wrhits - number of write hits -sdbc_wrmiss st_wrmiss - number of write misses -sdbc_blksize st_blksize - cache block size (in bytes) - -/* I'm not very sure what the next three fields track--we might take them out */ -sdbc_lru_blocks st_lru_blocks -sdbc_lru_noreq st_lru_noreq -sdbc_lru_req st_lru_req - -sdbc_wlru_inq st_wlru_inq - number of write blocks -sdbc_cachesize st_cachesize - cache size (in bytes) -sdbc_numblocks st_numblocks - cache blocks -sdbc_num_shared MAXFILES*2 - number of shared structures (one for - each cached volume) - This number dictates the maximum - index size for shared stats and - names given below. -sdbc_destaged st_destaged - number of bytes destaged to disk - (flushed from the cache to disk). -sdbc_wrcancelns st_wrcancelns - number of write cancellations - (writes to cached blocks that are - already dirty). -sdbc_nodehints --- - node hints (such as wrthru/nowrthru) - -All fields are read-only and are of type KSTAT_DATA_ULONG. Note that the -"sdbc_wrcancelns" and "sdbc_destaged" are new, and have also been added to the -_sd_stats_t struct. - -2.1.2 Naming characteristics: ------------------------------ -module: SDBC_KSTAT_MODULE "sdbc" -class: SDBC_KSTAT_CLASS "storedge" -name: SDBC_KSTAT_GSTATS "global" -instance #: 0 - - -2.2 KSTATS (PER CACHE DESCRIPTOR): -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -These "cd" kstats present statistics which reflect the state of a single cache -descriptor. One of these kstats exists for each open cache descriptor. - -2.2.1 Field Definitions: ------------------------- -The "cd" kstats correspond to fields in the _sd_shared_t structure. The -following table maps the name of the kstat field to its equivalent field in -the _sd_shared_t structure, also providing a description where appropriate. - -KSTAT FIELD _sd_shared_t DESCRIPTION ------------ ------------ ----------- -sdbc_vol_name sh_filename - last 16 characters of the volume name -sdbc_alloc sh_alloc - is this allocated? -sdbc_failed sh_failed - Disk failure status (0=ok,1= /o - error ,2= open failed) -sdbc_cd sh_cd - the cache descriptor. (for stats) -sdbc_cache_read sh_cache_read - Number of FBA's read from cache -sdbc_cache_write sh_cache_write - Number of FBA's written to cache -sdbc_disk_read sh_disk_read - Number of FBA's read from disk -sdbc_disk_write sh_disk_write - Number of FBA's written to disk -sdbc_filesize sh_filesize - Filesize (in FBA's) -sdbc_numdirty sh_numdirty - Number of dirty blocks -sdbc_numio sh_numio - Number of blocks on way to disk -sdbc_numfail sh_numfail - Number of blocks failed -sdbc_flushloop sh_flushloop - Loops delayed so far -sdbc_flag sh_flag - Flags visible to user programs -sdbc_destaged sh_destaged - number of bytes destaged to disk - (flushed from the cache to disk). -sdbc_cdhints --- - cd hints (such as wrthru/nowrthru) - -All fields are read-only kstat_named_t kstats, with data type KSTAT_DATA_ULONG. -The instance number of the kstat corresponds to the cache descriptor number. -Note that the "sdbc_wrcancelns" and "sdbc_destaged" are new, and have also -been added to the _sd_shared_t struct. - -2.2.2 Naming characteristics: ------------------------------ -module: SDBC_KSTAT_MODULE "sdbc" -class: SDBC_KSTAT_CLASS "storedge" -name: SDBC_KSTAT_CDSTATS "cd%d" (%d = < cd number >) -instance #: < cache descriptor number > - -3.0 I/O KSTATS: -=============== -The sdbc module now contains kstats of type kstat_io_t. These are used to -track timing through the cache. As with the "regular" kstats, sdbc tracks -global statistics, as well as those per cache descriptor. Since kstat_io_t -is a built-in kstat type, all are defined the same way. - -3.0.1 Time-Gathering: ---------------------- -These kstat_io_t types provide two built-in time-gathering mechanisms, which it -refers to as "waitq" and "runq," where "waitq" is intended to be interpreted -as the amount of time a request spends in its pre-service state, and "runq" the -amount of time a request spends in its service state. Transitions to the -runq and the waitq must be made via built-in functions, such as -kstat_runq_enter() and kstat_runq_exit(). The relevant fields in the -kstat_io_t structure should not be considered explicitly. (See comment below). -The iostat(1M) utility may be used to gather timing-related information -collected through this mechanism. - -Please note that sdbc does not use waitq. -sdbc uses runq as follows: - -An I/O request transitions to the runq (both global, and per-cd) upon entering -the cache through _sd_read(), _sd_write(), or _sd_alloc_buf(). It -transitions off the runq after the request has been serviced, either by the -cache, or as the result of disk I/O. Thus, this allows a user to track the -total time spent in the cache, which includes disk I/O time. - - -3.0.2 kstat_io_t Fields: ------------------------- -These I/O kstats include the following fields: - - u_longlong_t nread; /* number of bytes read */ - u_longlong_t nwritten; /* number of bytes written */ - uint_t reads; /* number of read operations */ - uint_t writes; /* number of write operations */ - -# The following fields are automatically updated by the built-in -# kstat_waitq_enter(), kstat_waitq_exit(), kstat_runq_enter() and -# kstat_runq_exit() functions. - - hrtime_t wtime; /* cumulative wait (pre-service) time */ - hrtime_t wlentime; /* cumulative wait length*time product */ - hrtime_t wlastupdate; /* last time wait queue changed */ - hrtime_t rtime; /* cumulative run (service) time */ - hrtime_t rlentime; /* cumulative run length*time product */ - hrtime_t rlastupdate; /* last time run queue changed */ - - uint_t wcnt; /* count of elements in wait state */ - uint_t rcnt; /* count of elements in run state */ - -For more information, refer to [2] in the References section. - -3.1 GLOBAL IO KSTATS: -~~~~~~~~~~~~~~~~~~~~~ -sdbc includes "global" I/O kstats which track the timings through the cache as -a whole, taking into account all cache descriptors. The fields definitions -are built-in, as explained above. - -3.1.1 Naming characteristics: ------------------------------ -module: SDBC_KSTAT_MODULE "sdbc" -class: "disk" -name: SDBC_IOKSTAT_GSTATS "gsdbc" -instance #: 0 - -3.2 IO KSTATS (PER CACHE DESCRIPTOR): -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -These "cd" I/O kstats present statistics which reflect the state of a single -cache descriptor. One of these I/O kstats exists for each open cache -descriptor. The fields definitions are built-in, as explained above. - -3.2.1 Naming characteristics: ------------------------------ -module: SDBC_KSTAT_MODULE "sdbc" -class: "disk" -name: SDBC_IOKSTAT_STATS "sdbc%d" (%d = < cd number >) -instance #: < cache descriptor number > - -4.0 DYNMEM KSTATS: -================== -The sdbc module also a "regular" kstat to track dynamic memory -allocation in the cache. These are "global" statistics. - -Its fields can be divided logically between behavior variables, and statistical -variable - -4.1 Field Definitions: -~~~~~~~~~~~~~~~~~~~~~~ - -4.1.1 Behavior Variables: -------------------------- -sdbc_monitor_dynmem --- D0=monitor thread shutdown in the console window - D1=print deallocation thread stats to the console - window - D2=print more deallocation thread stats to the console - window - (usage: setting a value of 6 = 2+4 sets D1 and D2) -sdbc_max_dyn_list ----- 1 to ?: sets the maximum host/parasite list length - (A length of 1 prevents any multipage allocations from - occuring and effectively removes the concept of - host/parasite.) -sdbc_cache_aging_ct1 -- 1 to 255: fully aged count (everything but meta and - holdover) -sdbc_cache_aging_ct2 -- 1 to 255: fully aged count for meta-data entries -sdbc_cache_aging_ct3 -- 1 to 255: fully aged count for holdovers -sdbc_cache_aging_sec1 - 1 to 255: sleep level 1 for 100% to pcnt1 free cache - entries -sdbc_cache_aging_sec2 - 1 to 255: sleep level 2 for pcnt1 to pcnt2 free cache - entries -sdbc_cache_aging_sec3 - 1 to 255: sleep level 3 for pcnt2 to 0% free cache - entries -sdbc_cache_aging_pcnt1- 0 to 100: cache free percent for transition from - sleep1 to sleep2 -sdbc_cache_aging_pcnt2- 0 to 100: cache free percent for transition from - sleep2 to sleep3 -sdbc_max_holds_pcnt --- 0 to 100: max percent of cache entries to be maintained - as holdovers - -4.1.2 Statistical Variables: ----------------------------- -Cache Stats (per wake cycle) (r/w): -sdbc_alloc_ct --------- total allocations performed -sdbc_dealloc_ct ------- total deallocations performed -sdbc_history ---------- current hysterisis flag setting -sdbc_nodatas ---------- cache entries w/o memory assigned -sdbc_candidates ------- cache entries ready to be aged or released -sdbc_deallocs --------- cache entries w/memory deallocated and requeued -sdbc_hosts ------------ number of host cache entries -sdbc_pests ------------ number of parasitic cache entries -sdbc_metas ------------ number of meta-data cache entries -sdbc_holds ------------ number of holdovers (fully aged w/memory and requeued) -sdbc_others ----------- number of not [host, pests or metas] -sdbc_notavail --------- number of cache entries to bypass (nodatas+'in use by - other processes') -sdbc_process_directive- D0=1 wake thread - D1=1 temporaily accelerate aging (set the hysterisis - flag) -sdbc_simplect --------- simple count of the number of times the kstat update - routine has been called (used for debugging) - -The behavior fields (along with the "sdbc_process_directive" field) may be both -read and written. The remaining statistical fields are read-only. - -For more information, please refer to [3] in the References section. - -4.2 Naming characteristics: -~~~~~~~~~~~~~~~~~~~~~~~~~~~ -module: SDBC_KSTAT_MODULE "sdbc" -class: SDBC_KSTAT_CLASS "storedge" -name: SDBC_KSTAT_DYNMEM "dynmem" -instance #: 0 - -5.0 REFERENCES FOR FURTHER READING: -=================================== -1. generic kstat information: kstat(1M), <sys/include/kstat.h> -2. kstat_io_t information: kstat_io(9S), kstat_queue(9F) -3. sdbc dynamic memory implementation: -<ds[3,4]>/src/uts/common/ns/sdbc/dynmem_readme.txt diff --git a/usr/src/uts/common/avs/ns/sdbc/dynmem_readme.txt b/usr/src/uts/common/avs/ns/sdbc/dynmem_readme.txt deleted file mode 100644 index d5fba71d7d..0000000000 --- a/usr/src/uts/common/avs/ns/sdbc/dynmem_readme.txt +++ /dev/null @@ -1,352 +0,0 @@ -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License (the "License"). -# You may not use this file except in compliance with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -# Copyright 2008 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# - -TITLE: Dynamic Memory Implementation Overview - -DATE: 10/13/2000 - -AUTHOR: Jim Guerrera (james.guerrera@east) - - -1.0 Dynamic Memory Implementation in the SCM Module - -The system memory allocation required by the Storage Cache Manager (SCM) -has been modified to more fully conform to the requirements of the Solaris -OS. The previous implementation required that the total memory requirements -of the package be allocated 'up front' during bootup and was never released. -The current implementation performs 'on demand' allocations at the time -memory is required in a piecemeal manner. In addition the requisitioned -memory will be released back to the system at some later time. - -2.0 Implementation - -2.1 Memory Allocation - -The memory allocation involves modifications primarily to sd_alloc_buf() -in module sd_bcache.c. When a request is received for cache and system -resources it is broken down and each piece catagorized both as an -independent entity and as a member of a group with close neighbors. Cache -resources comprise cache control entries (ccent), write control entries -(wctrl for FWC support) and system memory. The current allocation algorithim -for ccent and wrctl remains the same. The memory allocation has been modified -and falls into two general catagories - single page and multi-page -allocations. - -2.1.1 A single page allocation means exactly that - the ccent points to and -owns one page of system memory. If two or more ccent are requisitioned to -support the caching request then only the first entry in the group actually -owns the the allocated memory of two or more pages. The secondary entries -simply point to page boundaries within this larger piece of contiguous memory. -The first entry is termed a host and the secondaries are termed parasites. - -The process for determining what is a host, a parasite or anything else is -done in three phases. Phase one simply determines whether the caching request -references a disk area already in cache and marks it as such. If it is not -in cache it is typed as eligible - i.e. needing memory allocation. Phase -two scans this list of typed cache entries and based on immediate neighbors -is catagorized as host, pest or downgraded to other. A host can only exist -if there is one or more eligible entries immediately following it and it -itself either starts the list or immediately follows a non-eligible entry. -If either condition proves false the catagory remains as eligible (i.e. -needs memory allocation) but the type is cleared to not host (i.e. other). -The next phase is simply a matter of scanning the cache entry list and -allocating multipage memory for hosts, single page entries for others or -simply setting up pointers in the parasitic entries into it's corresponding -host multipage memory allocation block. - -2.1.2 The maximum number of parasitic entries following a host memory -allocation is adjustable by the system administrator. The details of this -are under the description of the KSTAT interface (Sec 3.0). - -2.2 Memory Deallocation - -Memory deallocation is implemented in sd_dealloc_dm() in module sd_io.c. -This possibly overly complicated routine works as follows: - -In general the routine sleeps a specified amount of time then wakes and -examines the entire centry list. If an entry is available (i.e. not in use -by another thread and has memory which may be deallocated) it takes -possession and ages the centry by one tick. It then determines if the -centry has aged sufficiently to have its memory deallocated and for it to -be placed at the top of the lru. - -2.3 There are two general deallocation schemes in place depending on -whether the centry is a single page allocation centry or it is a member -of a host/parasite multipage allocation chain. - -2.3.1 The behavior for a single page allocation centry is as follows: - -If the given centry is selected as a 'holdover' it will age normally -however at full aging it will only be placed at the head of the lru. -It's memory will not be deallocated until a further aging level has -been reached. The entries selected for this behavior are governed by -counting the number of these holdovers in existence on each wakeup -and comparing it to a specified percentage. This comparision is always -one cycle out of date and will float in the relative vicinity of the -specified number. - -In addition there is a placeholder for centries identified as 'sticky -meta-data' with its own aging counter. It operates exactly as the holdover -entries as regards to aging but is absolute - i.e. no percentage governs -the number of such entries. - -2.3.2 The percentage and additional aging count are adjustable by the -system administrator. The details of this are under the description of -the KSTAT interface (Sec. 3.0). - -2.3.3 The behavior for a host/parasite chain is as follows: - -The host/parasite subchain is examined. If all entries are fully aged the -entire chain is removed - i.e memory is deallocated from the host centry -and all centry fields are cleared and each entry requeued on to the lru. - -There are three sleep times and two percentage levels specifiable by the -system administrator. A meaningful relationship between these variables -is: - -sleeptime1 >= sleeptime2 >= sleeptime2 and -100% >= pcntfree1 >= pcntfree2 >= 0% - -sleeptime1 is honored between 100% free and pcntfree1. sleeptime2 is -honored between pcntfree1 and pcntfree2. sleeptime3 is honored between -pcntfree2 and 0% free. The general thrust here is to automatically -adjust sleep time to centry load. - -In addition there exist an accelerated aging flag which mimics hysterisis -behavior. If the available centrys fall between pcntfree1 and pcntfree2 -an 8 bit counter is switched on. The effect is to keep the timer value -at sleeptime2 for 8 cycles even if the number available cache entries -drifts above pcntfree1. If it falls below pcntfree2 an additional 8 bit -counter is switched on. This causes the sleep timer to remain at sleeptime3 -for at least 8 cycles even if it floats above pcntfree2 or even pcntfree1. -The overall effect of this is to accelerate the release of system resources -under what the thread thinks is a heavy load as measured by the number of -used cache entries. - -3.0 Dynamic Memory Tuning - -A number of behavior modification variables are accessible via system calls -to the kstat library. A sample program exercising the various features can -be found in ./src/cmd/ns/sdbc/sdbc_dynmem.c. In addition the behavior variable -identifiers can be placed in the sdbc.conf file and will take effect on bootup. -There is also a -number of dynamic memory statistics available to gauge its current state. - -3.1 Behavior Variables - -sdbc_monitor_dynmem --- D0=monitor thread shutdown in the console window - D1=print deallocation thread stats to the console - window - D2=print more deallocation thread stats to the console - window - (usage: setting a value of 6 = 2+4 sets D1 and D2) -sdbc_max_dyn_list ----- 1 to ?: sets the maximum host/parasite list length - (A length of 1 prevents any multipage allocations from - occuring and effectively removes the concept of - host/parasite.) -sdbc_cache_aging_ct1 -- 1 to 255: fully aged count (everything but meta and - holdover) -sdbc_cache_aging_ct2 -- 1 to 255: fully aged count for meta-data entries -sdbc_cache_aging_ct3 -- 1 to 255: fully aged count for holdovers -sdbc_cache_aging_sec1 - 1 to 255: sleep level 1 for 100% to pcnt1 free cache - entries -sdbc_cache_aging_sec2 - 1 to 255: sleep level 2 for pcnt1 to pcnt2 free cache - entries -sdbc_cache_aging_sec3 - 1 to 255: sleep level 3 for pcnt2 to 0% free cache - entries -sdbc_cache_aging_pcnt1- 0 to 100: cache free percent for transition from - sleep1 to sleep2 -sdbc_cache_aging_pcnt2- 0 to 100: cache free percent for transition from - sleep2 to sleep3 -sdbc_max_holds_pcnt --- 0 to 100: max percent of cache entries to be maintained - as holdovers - -3.2 Statistical Variables - -Cache Stats (per wake cycle) (r/w): -sdbc_alloc_ct --------- total allocations performed -sdbc_dealloc_ct ------- total deallocations performed -sdbc_history ---------- current hysterisis flag setting -sdbc_nodatas ---------- cache entries w/o memory assigned -sdbc_candidates ------- cache entries ready to be aged or released -sdbc_deallocs --------- cache entries w/memory deallocated and requeued -sdbc_hosts ------------ number of host cache entries -sdbc_pests ------------ number of parasitic cache entries -sdbc_metas ------------ number of meta-data cache entries -sdbc_holds ------------ number of holdovers (fully aged w/memory and requeued) -sdbc_others ----------- number of not [host, pests or metas] -sdbc_notavail --------- number of cache entries to bypass (nodatas+'in use by - other processes') -sdbc_process_directive- D0=1 wake thread - D1=1 temporaily accelerate aging (set the hysterisis - flag) -sdbc_simplect --------- simple count of the number of times the kstat update - routine has been called - - -3.3 Range Checks and Limits - -Only range limits are checked. Internal inconsistencies are not checked -(e.g. pcnt2 > pcnt1). Inconsistencies won't break the system you just won't -get meaningful behavior. - -The aging counter and sleep timer limits are arbitrarily limited to a byte -wide counter. This can be expanded. However max'ing the values under the -current implementation yields about 18 hours for full aging. - -3.4 Kstat Lookup Name - -The kstat_lookup() module name is "sdbc:dynmem" with an instance of 0. - -3.5 Defaults - -Default values are: -sdbc_max_dyn_list = 8 -sdbc_monitor_dynmem = 0 -sdbc_cache_aging_ct1 = 3 -sdbc_cache_aging_ct2 = 3 -sdbc_cache_aging_ct3 = 3 -sdbc_cache_aging_sec1 = 10 -sdbc_cache_aging_sec2 = 5 -sdbc_cache_aging_sec3 = 1 -sdbc_cache_aging_pcnt1 = 50 -sdbc_cache_aging_pcnt2 = 25 -sdbc_max_holds_pcnt = 0 - -To make the dynmem act for all intents and purposes like the static model -beyond the inital startup the appropriate values are: -sdbc_max_dyn_list = 1, -sdbc_cache_aging_ct1/2/3=255, -sdbc_cache_aging_sec1/2/3=255 -The remaining variables are irrelevant. - -4.0 KSTAT Implementation for Existing Statistics - -The existing cache statistical reporting mechanism has been replaced by -the kstat library reporting mechanism. In general the statistics fall into -two general catagories - global and shared. The global stats reflect gross -behavior over all cached volumes and shared reflects behavior particular -to each cached volume. - -4.1 Global KSTAT lookup_name - -The kstat_lookup() module name is "sdbc:gstats" with an instance of 0. The -identifying ascii strings and associated values matching the sd_stats driver -structure are: - -sdbc_dirty -------- net_dirty -sdbc_pending ------ net_pending -sdbc_free --------- net_free -sdbc_count -------- st_count - number of opens for device -sdbc_loc_count ---- st_loc_count - number of open devices -sdbc_rdhits ------- st_rdhits - number of read hits -sdbc_rdmiss ------- st_rdmiss - number of read misses -sdbc_wrhits ------- st_wrhits - number of write hits -sdbc_wrmiss ------- st_wrmiss - number of write misses -sdbc_blksize ------ st_blksize - cache block size -sdbc_num_memsize -- SD_MAX_MEM - number of defined blocks - (currently 6) -To find the size of each memory blocks append the numbers 0 to 5 to -'sdbc_memsize'. -sdbc_memsize0 ----- local memory -sdbc_memsize1 ----- cache memory -sdbc_memsize2 ----- iobuf memory -sdbc_memsize3 ----- hash memory -sdbc_memsize4 ----- global memory -sdbc_memsize5 ----- stats memory -sdbc_total_cmem --- st_total_cmem - memory used by cache structs -sdbc_total_smem --- st_total_smem - memory used by stat structs -sdbc_lru_blocks --- st_lru_blocks -sdbc_lru_noreq ---- st_lru_noreq -sdbc_lru_req ------ st_lru_req -sdbc_num_wlru_inq - MAX_CACHE_NET - number of net (currently 4) -To find the size of the least recently used write cache per net append -the numbers 0-3 to sdbc_wlru_inq -sdbc_wlru_inq0 ---- net 0 -sdbc_wlru_inq1 ---- net 1 -sdbc_wlru_inq2 ---- net 2 -sdbc_wlru_inq3 ---- net 3 -sdbc_cachesize ---- st_cachesize - cache size -sdbc_numblocks ---- st_numblocks - cache blocks -sdbc_num_shared --- MAXFILES*2 - number of shared structures (one for - each cached volume) - This number dictates the maximum - index size for shared stats and - names given below. -sdbc_simplect ----- simple count of the number of times the kstat update routine - has been called - -All fields are read only. - - -4.2 Shared Structures KSTAT lookup_name - -The kstat_lookup() module name is "sdbc:shstats" and "sdbc:shname" both with -an instance of 0. The identifying ascii strings and associated values matching -the sd_shared driver structure are: - -sdbc:shstats module -sdbc_index ------- structure index number -sdbc_alloc ------- sh_alloc - is this allocated? -sdbc_failed ------ sh_failed - Disk failure status (0=ok,1= /o error - ,2= open failed) -sdbc_cd ---------- sh_cd - the cache descriptor. (for stats) -sdbc_cache_read -- sh_cache_read - Number of bytes read from cache -sdbc_cache_write - sh_cache_write - Number of bytes written to cache -sdbc_disk_read --- sh_disk_read - Number of bytes read from disk -sdbc_disk_write -- sh_disk_write - Number of bytes written to disk -sdbc_filesize ---- sh_filesize - Filesize -sdbc_numdirty ---- sh_numdirty - Number of dirty blocks -sdbc_numio ------- sh_numio - Number of blocks on way to disk -sdbc_numfail ----- sh_numfail - Number of blocks failed -sdbc_flushloop --- sh_flushloop - Loops delayed so far -sdbc_flag -------- sh_flag - Flags visible to user programs -sdbc_simplect ---- simple count of the number of times the kstat update routine - has been called - -sdbc:shname module -read in as raw bytes and interpreted as a nul terminated assci string. - -These two modules operate hand in hand based on information obtained from the -"sdbc:gstats" module. "sdbc:gstats - sdbc_num_shared" gives the maximum number -possible of shared devices. It does not tell how many devices are actually -cached - just the maximum possible. In order to determine the number present -and retrieve the statistics for each device the user must: - -1. open and read "sdbc:shstats" -2. set the index "sdbc_index" to a starting value (presumably 0) -3. write the kstat module ( the only item in the module is sdbc_index) - -What this does is set a starting index for all subsequent reads. - -4. to get the device count and associated statistics the user now simply -reads each module "sdbc:shstats" and "sdbc:shname" as a group repeatedly - -the index will auto increment - -To reset the index set "sdbc:shstats - sdbc_index" to the required value -and write the module. - -The first entry returning a nul string to "sdbc:shname" signifies no more -configured devices. - diff --git a/usr/src/uts/common/avs/ns/sdbc/safestore.c b/usr/src/uts/common/avs/ns/sdbc/safestore.c deleted file mode 100644 index e559a2cf6f..0000000000 --- a/usr/src/uts/common/avs/ns/sdbc/safestore.c +++ /dev/null @@ -1,394 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#include <sys/types.h> -#include <sys/ksynch.h> -#include <sys/kmem.h> -#include <sys/sdt.h> - -#include <sys/varargs.h> -#include <sys/unistat/spcs_s.h> - -#include "safestore.h" -#include "safestore_impl.h" -#include "sd_trace.h" - -typedef struct safestore_modules_s { - struct safestore_modules_s *ssm_next; - safestore_ops_t *ssm_module; -} safestore_modules_t; - -safestore_modules_t *ss_modules; -kmutex_t safestore_mutex; -int ss_initialized; - -/* the safestore module init/deinit functions */ - -void ss_ram_init(); -void ss_ram_deinit(); - -/* CSTYLED */ -/**# - * initialize the safestore subsystem and all safestore - * modules by calling all safestore modules' initialization functions - * - * NOTE: This function must be called with the _sdbc_config_lock held - * - * @param none - * @return void - * - */ -void -sst_init() -{ - /* - * initialize the ss modules we know about - * this results in calls to sst_register_mod() - */ - if (ss_initialized != SS_INITTED) { - mutex_init(&safestore_mutex, NULL, MUTEX_DRIVER, NULL); - ss_ram_init(); - ss_initialized = SS_INITTED; - } - -} - -/* CSTYLED */ -/**# - * deinitialize the safestore subsystem and all safestore modules - * by calling all safestore modules' deinitialization functions - * - * NOTE: This function must be called with the _sdbc_config_lock held - * - * @param none - * @return void - * - */ -void -sst_deinit() -{ - if (ss_initialized == SS_INITTED) { - ss_ram_deinit(); - mutex_destroy(&safestore_mutex); - ss_initialized = 0; - } -} - -/* BEGIN CSTYLED */ -/**# - * called by a safestore module to register its ops table - * for use by clients - * - * @param ss_ops structure of safestore functions - * @return void - * - * @see safestore_ops_t{} - */ -void -sst_register_mod(safestore_ops_t *ss_ops) /* END CSTYLED */ -{ - safestore_modules_t *new; - - new = kmem_alloc(sizeof (*new), KM_SLEEP); - - mutex_enter(&safestore_mutex); - new->ssm_module = ss_ops; - new->ssm_next = ss_modules; - - ss_modules = new; - mutex_exit(&safestore_mutex); -} - -/* BEGIN CSTYLED */ -/**# - * called by a safestore module to unregister its ops table - * @param ss_ops structure of safestore functions - * - * @return void - * - * @see safestore_ops_t{} - */ -void -sst_unregister_mod(safestore_ops_t *ss_ops) /* END CSTYLED */ -{ - safestore_modules_t *ssm, *prev; - int found = 0; - - mutex_enter(&safestore_mutex); - prev = NULL; - for (ssm = ss_modules; ssm; prev = ssm, ssm = ssm->ssm_next) { - if (ssm->ssm_module == ss_ops) { - if (!prev) - ss_modules = ssm->ssm_next; - else - prev->ssm_next = ssm->ssm_next; - - kmem_free(ssm, sizeof (safestore_modules_t)); - ++found; - break; - } - } - mutex_exit(&safestore_mutex); - - if (!found) - cmn_err(CE_WARN, "ss(sst_unregister_mod) " - "ss module %p not found", (void *)ss_ops); -} - -/* BEGIN CSTYLED */ -/**# - * open a safestore module for use by a client - * @param ss_type specifies a valid media type and transport type. - * the first module found that supports these reqested type - * is used. may contain more than one media type or transport - * type if client has no preference among several types. - * more than one ss_type may be specified in the call if - * client has an ordered preference. - * - * @return safestore_ops_t * pointer to a valid safestore ops structure - * if the request is satisfied. - * NULL otherwise - * - * @see safestore_ops_t{} - * @see SS_M_RAM - * @see SS_M_NV_SINGLENODE - * @see SS_M_NV_DUALNODE_NOMIRROR - * @see SS_M_NV_DUALNODE_MIRROR - * @see SS_T_STE - * @see SS_T_RPC - * @see SS_T_NONE - */ -safestore_ops_t * -sst_open(uint_t ss_type, ...) /* END CSTYLED */ -{ - va_list ap; - uint_t ssop_type; - safestore_modules_t *ssm; - - if ((ss_modules == NULL) || !ss_type) - return (NULL); - - va_start(ap, ss_type); - mutex_enter(&safestore_mutex); - do { - for (ssm = ss_modules; ssm; ssm = ssm->ssm_next) { - ssop_type = ssm->ssm_module->ssop_type; - if ((ssop_type & SS_MEDIA_MASK) & ss_type) - if ((ssop_type & SS_TRANSPORT_MASK) & ss_type) { - va_end(ap); - mutex_exit(&safestore_mutex); - return (ssm->ssm_module); - } - } - } while ((ss_type = va_arg(ap, uint_t)) != 0); - mutex_exit(&safestore_mutex); - - va_end(ap); - return (NULL); -} - -/* BEGIN CSTYLED */ -/**# - * close a safestore module. called when client no longer wishes to use - * a safestore module - * - * @param ssp points to a safestore_ops_t obtained from a previous call - * to sst_open() - * - * @return SS_OK if successful - * SS_ERR otherwise - */ -/*ARGSUSED*/ -int -sst_close(safestore_ops_t *ssp) /* END CSTYLED */ -{ - return (SS_OK); -} - - -/* - * _sdbc_writeq_configure - configure the given writeq - * Allocate the lock and sv we need to maintain waiters - * - */ -int -_sdbc_writeq_configure(_sd_writeq_t *wrq) -{ - int i; - - wrq->wq_inq = 0; - mutex_init(&wrq->wq_qlock, NULL, MUTEX_DRIVER, NULL); - wrq->wq_qtop = NULL; - wrq->wq_slp_top = 0; - wrq->wq_slp_index = 0; - wrq->wq_slp_inq = 0; - - for (i = 0; i < SD_WR_SLP_Q_MAX; i++) { - wrq->wq_slp[i].slp_wqneed = 0; - cv_init(&wrq->wq_slp[i].slp_wqcv, NULL, CV_DRIVER, NULL); - } - - return (0); -} - -/* - * _sdbc_writeq_deconfigure - deconfigure the given writeq - * Deallocate the lock and sv if present. - * - */ -void -_sdbc_writeq_deconfigure(_sd_writeq_t *wrq) -{ - int i; - - if (wrq) { - mutex_destroy(&wrq->wq_qlock); - for (i = 0; i < SD_WR_SLP_Q_MAX; i++) { - cv_destroy(&wrq->wq_slp[i].slp_wqcv); - } - wrq->wq_inq = 0; - wrq->wq_qtop = NULL; - } - -} - - -int _sd_wblk_sync = 1; - -ss_wr_cctl_t * -ss_alloc_write(int need, int *stall, _sd_writeq_t *q) -{ - ss_wr_cctl_t *wctl; - ss_wr_cctl_t *ret; - int i; - int aged = 0; - - if (_sd_wblk_sync && (q->wq_inq == 0)) - return (NULL); /* do sync write if queue empty */ - - SDTRACE(ST_ENTER|SDF_WR_ALLOC, SDT_INV_CD, need, - SDT_INV_BL, q->wq_inq, _SD_NO_NET); - - if (need <= 0) { - cmn_err(CE_WARN, "ss_alloc_write: bogus need value! %d", need); - return (NULL); - } - - mutex_enter(&(q->wq_qlock)); -retry_wr_get: - if (q->wq_inq < need) { - if (!_sd_wblk_sync) { - unsigned stime; - stime = nsc_usec(); - - /* - * Try to keep requests ordered so large requests - * are not starved. We can queue 255 write requests, - * After That go into write-through. - */ - if (q->wq_slp_inq < SD_WR_SLP_Q_MAX) { - q->wq_slp_inq++; - /* give preference to aged requests */ - if (aged) { - WQ_SVWAIT_TOP(q, need); - } else { - WQ_SVWAIT_BOTTOM(q, need); - } - aged++; - } else { - mutex_exit(&(q->wq_qlock)); - return (NULL); - } - - SDTRACE(ST_INFO|SDF_WR_ALLOC, - SDT_INV_CD, need, SDT_INV_BL, q->wq_inq, - (nsc_usec()-stime)); - (void) (*stall)++; - goto retry_wr_get; - } - ret = NULL; - } else { -get_wctl: - wctl = q->wq_qtop; - ret = wctl; - DTRACE_PROBE1(alloc_write, - ss_wr_cctl_t *, wctl); - for (i = 1; i < need; ++i) { - wctl = wctl->wc_next; - DTRACE_PROBE1(alloc_write_cont, - ss_wr_cctl_t *, wctl); - } - - q->wq_qtop = wctl->wc_next; - wctl->wc_next = NULL; - q->wq_inq -= need; - } - mutex_exit(&(q->wq_qlock)); - - SDTRACE(ST_EXIT|SDF_WR_ALLOC, SDT_INV_CD, need, - SDT_INV_BL, q->wq_inq, _SD_NO_NET); - return (ret); -} - -/* - * ss_release_write - put a write block back in the writeq. - * - * ARGUMENTS: - * wctl - Write control block to be release. - * q - write q to put the wctl - * - * RETURNS: NONE - */ - -void -ss_release_write(ss_wr_cctl_t *wctl, _sd_writeq_t *q) -{ - - SDTRACE(ST_ENTER|SDF_WR_FREE, SDT_INV_CD, 0, SDT_INV_BL, q->wq_inq, - _SD_NO_NET); - - DTRACE_PROBE1(release_write, - ss_wr_cctl_t *, wctl); - -#if defined(_SD_DEBUG) - if (wctl->wc_gl_info->sci_dirty) { - SDALERT(SDF_WR_FREE, wctl->wc_gl_info->sci_cd, - 0, wctl->wc_gl_info->sci_fpos, - wctl->wc_gl_info->sci_dirty, 0); - } -#endif - mutex_enter(&q->wq_qlock); - - wctl->wc_next = q->wq_qtop; - q->wq_qtop = wctl; - q->wq_inq++; - if (WQ_NEED_SIG(q)) { - q->wq_slp_inq--; - WQ_SVSIG(q); - } - mutex_exit(&q->wq_qlock); - SDTRACE(ST_EXIT|SDF_WR_FREE, SDT_INV_CD, 0, SDT_INV_BL, q->wq_inq, - _SD_NO_NET); -} diff --git a/usr/src/uts/common/avs/ns/sdbc/safestore.h b/usr/src/uts/common/avs/ns/sdbc/safestore.h deleted file mode 100644 index a25e3c794b..0000000000 --- a/usr/src/uts/common/avs/ns/sdbc/safestore.h +++ /dev/null @@ -1,655 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _SD_SAFESTORE_H -#define _SD_SAFESTORE_H - -#ifdef __cplusplus -extern "C" { -#endif - -#include <sys/nsc_thread.h> -#ifdef DS_DDICT -#include <sys/nsctl/contract.h> -#endif -#include <sys/nsctl/nsctl.h> -#if defined(_KERNEL) || defined(_KMEMUSER) - -/* CSTYLED */ -/**$ - * token for a volume directory stream - */ -typedef struct ss_vdir_s { - intptr_t opaque[6]; -} ss_vdir_t; - -/* CSTYLED */ -/**$ - * token for a cache entry directory stream - */ -typedef struct ss_cdir_s { - intptr_t opaque[6]; -}ss_cdir_t; - -/* CSTYLED */ -/**$ - * token for a volume - */ -typedef struct ss_vol_s { - intptr_t opaque; -}ss_vol_t; - -/* CSTYLED */ -/**$ - * token for cache entry block and dirty bits - */ -typedef struct s_resource_s { - intptr_t opaque; -} ss_resource_t; - -/* CSTYLED */ -/**$ - * token for a list of cache safestore resources - */ -typedef struct ss_resourcelist_s { - intptr_t opaque; -}ss_resourcelist_t; - - -/* CSTYLED */ -/**$ - * cache entry directory stream type specifier - * - * @field ck_type specifies all cache entries, cache entries for volume, node - * @field ck_vol volume token if ck_type is CDIR_VOL - * @field ck_node node id if ck_type is node CDIR_NODE - */ -typedef struct ss_cdirkey_s { - uint_t ck_type; /* discriminator: see type defines below */ - union { - ss_vol_t *ck_vol; - uint_t ck_node; - } cdk_u; -} ss_cdirkey_t; - -/* centry directory stream types */ -#define CDIR_ALL 0 -#define CDIR_VOL 1 -#define CDIR_NODE 2 - -/* BEGIN CSTYLED */ -/**$ - * exported cache entry info - * - * @field sc_cd the cache descriptor, associates this entry with a volume - * @field sc_fpos file position in cache blocks - * @field sc_dirty dirty bits, one for each fba in the cache block - * @field sc_flag flags - * @field sc_res safestore resource token for this cache entry - * @see ss_voldata_t{} - */ -typedef struct ss_centry_info_s { - int sc_cd; /* Cache descriptor */ - nsc_off_t sc_fpos; /* File position */ - int sc_dirty; /* Dirty mask */ - int sc_flag; /* CC_PINNABLE | CC_PINNED */ - ss_resource_t *sc_res; /* token for this centry */ -} ss_centry_info_t; -/* END CSTYLED */ - - -/* CSTYLED */ -/**$ - * volume directory stream type specifier - * - * @field vk_type specifies all volume entries, entries for volume, node - * @field vk_vol volume token if vk_type is VDIR_VOL - * @field vk_node node id if vk_type is node VDIR_NODE - */ -typedef struct ss_vdirkey_s { - uint_t vk_type; /* discriminator: see type defines below */ - union { - ss_vol_t *vk_vol; - uint_t vk_node; - } cdk_u; -} ss_vdirkey_t; - -/* volume directory stream types */ -#define VDIR_ALL 0 -#define VDIR_VOL 1 -#define VDIR_NODE 2 - -/* CSTYLED */ -/**$ - * exported volume entry info - * - * @field sv_cd the cache descriptor - * @field sv_vol the safestore volume token for this volume - * @field sv_pinned volume has pinned blocks, holds node id - * @field sv_attached node which has attached this volume - * @field sv_volname path name - * @field sv_devidsz length of device id, the sv_devid - * @field sv_devid unique id for physical, i.e. non-volume-managed volumes - */ -typedef struct ss_voldata_s { - int sv_cd; /* NOTE may need dual node map info */ - ss_vol_t *sv_vol; /* volume token for this vol entry */ - int sv_pinned; /* Device has failed/pinned blocks */ - int sv_attached; /* Node which has device attached */ - char sv_volname[NSC_MAXPATH]; /* Filename */ - int sv_devidsz; /* unique dev id length */ - uchar_t sv_devid[NSC_MAXPATH]; /* wwn id - physical devs only */ -} ss_voldata_t; - -/* safestore media types */ - -/* CSTYLED */ -/**% - * safestore in RAM, useful but not very safe - */ -#define SS_M_RAM 0x00000001 - -/* CSTYLED */ -/**% - * safestore in NVRAM on a single node - */ -#define SS_M_NV_SINGLENODE 0x00000002 - -/* CSTYLED */ -/**% - * safestore in NVRAM on a dual node system. all data is store remotely. - */ -#define SS_M_NV_DUALNODE_NOMIRROR 0x00000004 - -/* CSTYLED */ -/**% - * safestore in NVRAM on a dual node system. data is mirrored on both nodes. - */ -#define SS_M_NV_DUALNODE_MIRROR 0x00000008 - - -/* safestore data and metadata transport types */ - -/* CSTYLED */ -/**% - * data is transferred using STE connection - */ -#define SS_T_STE 0x00010000 - -/* CSTYLED */ -/**% - * data is transferred using RPC - */ -#define SS_T_RPC 0x00020000 - -/* CSTYLED */ -/**% - * no transport -- (single node) - */ -#define SS_T_NONE 0x08000000 - -#define SS_MEDIA_MASK 0x0000ffff -#define SS_TRANSPORT_MASK 0xffff0000 - -#define _SD_NO_NET 0 -#define _SD_NO_NETADDR 0 -#define _SD_NO_HOST -1 -#define _SD_NO_CD -1 - -/* config settings */ -#define SS_UNCONFIGURED 0 -#define SS_INITTED 1 -#define SS_CONFIGURED 2 - -/* error return for safestore ops */ -#define SS_ERR -1 -#define SS_OK 0 -#define SS_EOF 1 - -/* config flag */ -#define SS_GENPATTERN 1 - -/* - * convenience macros. should they be implemented in ss_ctl()? - */ - -/* is safestore on a single node? */ -#define SAFESTORE_LOCAL(ssp) ((ssp) && (ssp->ssop_type & SS_T_NONE)) - -/* is safestore really safe or is it just RAM? */ -#define SAFESTORE_SAFE(ssp) ((ssp) && !(ssp->ssop_type & SS_M_RAM)) - -/* is recovery needed with this safestore module? */ -#define SAFESTORE_RECOVERY(ssp) ((ssp) && \ - (ssp->ssop_flags & SS_RECOVERY_NEEDED)) - -/* CSTYLED */ -/**$ - * configuration structure provided by safestore client - * - * @field ssc_configured set by safestore module to indicate config completed - * @field ssc_ss_psize safestore internal page size, set by ss module - * @field ssc_client_psize callers page size - * @field ssc_wsize cache size in bytes: amount of data that can be safestored - * @field ssc_maxfiles maximum number of volumes - * @field ssc_pattern initialization pattern if any - * @field ssc_flag use ssc_pattern if this is SS_GENPATTERN - */ -typedef struct ss_common_config_s { - uint_t ssc_configured; - uint_t ssc_ss_psize; /* safestore internal page size */ - uint_t ssc_client_psize; /* client page size */ - uint_t ssc_wsize; /* Write cache size in bytes */ - int ssc_maxfiles; /* max files */ - uint_t ssc_pattern; /* initialization pattern */ - uint_t ssc_flag; -} ss_common_config_t; - -/* BEGIN CSTYLED */ -/**$ - * safestore operations structure - * - * @field ssop_name description of this module. - * @field ssop_type media type OR'd with transport type - * @field ssop_flags SS_RECOVERY_NEEDED - * @field ssop_configure configure the module - * @field ssop_deconfigure deconfigure the module - * @field ssop_getvdir get a volume directory stream according to type - * @field ssop_getvdirent get next entry in a volume directory stream - * @field ssop_getvol get the data for a volume - * @field ssop_setvol set the data for a volume - * @field ssop_getcdir get cache entry directory stream according to type - * @field ssop_getcdirent get next cache entry in stream - * @field ssop_allocresource allocate safestore resources from free list - * @field ssop_deallocresource deallocate, i.e. free, a safestore resource - * @field ssop_getresource get next resource in resource list - * @field ssop_getcentry get metadata for a cache entry - * @field ssop_setcentry set the metadata for a cache entry - * @field ssop_read_cblock read the actual data for a cache entry - * @field ssop_write_cblock write the data for a cache entry - * @field ssop_ctl module entry point for everything else, e.g. stats - * - * @see ss_vdirkey_t{} - * @see ss_voldata_t{} - * @see ss_cdirkey_t{} - * @see ss_resourcelist_t{} - * @see ss_resource_t{} - * @see ss_centry_info_t{} - */ -typedef struct safestore_ops_s { - char *ssop_name; - uint_t ssop_type; /* media type OR'd with transport type */ - uint_t ssop_flags; /* recovery needed, etc */ - int (* ssop_configure)(ss_common_config_t *, spcs_s_info_t); - int (* ssop_deconfigure)(int); - int (* ssop_getvdir)(const ss_vdirkey_t *, ss_vdir_t *); - int (* ssop_getvdirent)(const ss_vdir_t *, ss_voldata_t *); - int (* ssop_getvol)(ss_voldata_t *); - int (* ssop_setvol)(const ss_voldata_t *); - int (* ssop_getcdir)(const ss_cdirkey_t *, ss_cdir_t *); - int (* ssop_getcdirent)(ss_cdir_t *, ss_centry_info_t *); - int (* ssop_allocresource)(int, int *, ss_resourcelist_t **); - void (* ssop_deallocresource)(ss_resource_t *); - int (* ssop_getresource)(ss_resourcelist_t **, ss_resource_t **); - int (* ssop_getcentry)(ss_centry_info_t *); - int (* ssop_setcentry)(const ss_centry_info_t *); - int (* ssop_read_cblock)(const ss_resource_t *, void *, int, int); - int (* ssop_write_cblock)(const ss_resource_t *, - const void *, int, int); - int (* ssop_ctl)(uint_t, uintptr_t); -} safestore_ops_t; -/* END CSTYLED */ - -/* ssop_flags */ -/* - * no writes permitted when this bit is set in ssop flags field - * (single node nvram mostly) - */ -#define SS_RECOVERY_NEEDED 1 - -/* safestore operations */ - -/* BEGIN CSTYLED */ -/**# - * SSOP_CONFIGURE() configure a safestore module - * @param ssp a safestore_ops_t pointer obtained from sst_open() - * @param cfg a pointer to ss_common_config_t, initialized by caller - * @param kstatus unistat spcs_s_info_t - * @return SS_OK successful, errno otherwise - * - * @see safestore_ops_t{} - * @see sst_open() - * @see ss_common_config_t{} - */ -#define SSOP_CONFIGURE(ssp, cfg, kstatus) \ - ((ssp)->ssop_configure(cfg, kstatus)) - -/**# - * SSOP_DECONFIGURE deconfigure a safestore module - * @param ssp a safestore_ops_t pointer obtained from sst_open() - * @param dirty integer flag, if set it signifies there is pinned data - * @return SS_OK success, SS_ERR otherwise - * - * @see safestore_ops_t{} - */ -#define SSOP_DECONFIGURE(ssp, dirty) ((ssp)->ssop_deconfigure(dirty)) - - -/* volume directory functions */ - -/**# - * SSOP_GETVDIR get a volume directory stream according to type - * @param ssp a safestore_ops_t pointer obtained from sst_open() - * @param key pointer to ss_vdirkey_t initialized by caller - * @param vdir pointer to ss_vdir_t owned by caller - * @return SS_OK success, SS_ERR otherwise - * - * @see safestore_ops_t{} - * @see ss_vdirkey_t{} - * @see ss_vdir_t{} - */ -#define SSOP_GETVDIR(ssp, key, vdir) ((ssp)->ssop_getvdir(key, vdir)) - -/**# - * SSOP_GETVDIRENT get next volume in a volume directory stream - * @param ssp a safestore_ops_t pointer obtained from sst_open() - * @param vdir pointer to a properly initialized ss_vdir_t obtained - * from a successsful SSOP_GETVDIR() call - * @param voldata pointer to ss_voldata_t owned by caller, filled - * in with valid data on successful return - * @return SS_OK success - * SS_EOF if no more elements in stream, - * SS_ERR otherwise - * - * @see safestore_ops_t{} - * @see sst_open() - * @see ss_vdir_t{} - * @see ss_voldata_t{} - * @see SSOP_GETVDIR() - */ -#define SSOP_GETVDIRENT(ssp, vdir, voldata) \ - ((ssp)->ssop_getvdirent(vdir, voldata)) - -/* volume accessor functions */ - -/**# - * SSOP_GETVOL get the volume data for a particular volume - * @param ssp a safestore_ops_t pointer obtained from sst_open() - * @param voldata pointer to ss_voldata_t owned by caller, field sv_vol - * must be initialized with a valid ss_vol_t, normally - * obtained from a successful SSOP_GETVDIRENT() call. - * the rest of the structure is filled with valid volume data - * on successful return - * @return SS_OK if data read successfully - * SS_ERR otherwise - * @see safestore_ops_t{} - * @see sst_open() - * @see ss_voldata_t{} - * @see ss_vol_t{} - * @see SSOP_GETVDIRENT() - */ -#define SSOP_GETVOL(ssp, voldata) ((ssp)->ssop_getvol(voldata)) - - -/**# - * SSOP_SETVOL set the volume data for a particular volume - * @param ssp a safestore_ops_t pointer obtained from sst_open() - * @param voldata pointer to ss_voldata_t owned by caller, field sv_vol - * must be initialized with a valid ss_vol_t, obtained from - * a successful SSOP_GETVDIRENT() call. the remaining - * fields of the structure are written to safestore - * @return SS_OK if data saved successfully - * SS_ERR otherwise - * @see safestore_ops_t{} - * @see sst_open() - * @see ss_voldata_t{} - * @see ss_vol_t{} - * @see SSOP_GETVDIRENT() - */ -#define SSOP_SETVOL(ssp, voldata) ((ssp)->ssop_setvol(voldata)) - -/* centry directory functions */ - -/**# - * SSOP_GETCDIR get a cache entry stream accroding to type - * @param ssp a safestore_ops_t pointer obtained from sst_open() - * @param key pointer to a ss_cdirkey_t initialized by caller - * @param cdir pointer to ss_cdir_t owned by caller - * @return SS_OK success, SS_ERR otherwise - * - * @see safestore_ops_t{} - * @see sst_open() - * @see ss_cdirkey_t{} - * @ see ss_cdir_t{} - */ -#define SSOP_GETCDIR(ssp, key, cdir) \ - ((ssp)->ssop_getcdir(key, cdir)) - -/**# - * SSOP_GETCDIRENT get next cache entry in a cache entry stream - * @param ssp a safestore_ops_t pointer obtained from sst_open() - * @param cdir pointer to valid ss_cdirkey_t obtained from a - * successsful SSOP_GETCDIR call - * @param voldata pointer to ss_voldata_t owned by caller, filled - * in with valid data on successful return - * @return SS_OK success - * SS_EOF if no more elements in stream, - * SS_ERR otherwise - * - * @see safestore_ops_t{} - * @see sst_open() - * @see ss_vdirkey_t{} - * @see ss_voldata_t{} - * @see SSOP_GETVDIR() - */ -#define SSOP_GETCDIRENT(ssp, cdir, centry) \ - ((ssp)->ssop_getcdirent(cdir, centry)) - -/* cache entry alloc functions */ - -/**# - * SSOP_ALLOCRESOURCE allocate safestore resources from the free list - * @param ssp a safestore_ops_t pointer obtained from sst_open() - * @param count number of resources, that is data blocks, needed - * @param stall integer pointer to stall count, no blocks available. used only - * when _sd_wblk_sync === 0 - * @param reslist pointer to pointer to ss_resourcelist_t. points to valid - * resource list on successful return - * @return SS_OK success - * SS_ERR otherwise - * - * @see safestore_ops_t{} - * @see ss_resourcelist_t{} - * @see SSOP_DEALLOCRESOURCE() - * @see SSOP_GETRESOURCE() - */ -#define SSOP_ALLOCRESOURCE(ssp, count, stall, reslist) \ - ((ssp)->ssop_allocresource(count, stall, reslist)) - -/**# - * SSOP_DEALLOCRESOURCE deallocate, i.e. release, a single safestore resource - * @param ssp a safestore_ops_t pointer obtained from sst_open() - * @param res pointer to ss_resource_t to be released - * @return void - * - * @see safestore_ops_t{} - * @see ss_resource_t{} - * @see SSOP_ALLOCRESOURCE() - * @see SSOP_GETRESOURCE() - */ -#define SSOP_DEALLOCRESOURCE(ssp, res) \ - ((ssp)->ssop_deallocresource(res)) - -/**# - * SSOP_GETRESOURCE get the next safestore resource in a list - * @param ssp a safestore_ops_t pointer obtained from sst_open() - * @param reslist pointer to pointer to ss_resourcelist_t obtained from - * a successful call to SSOP_ALLOCRESOURCE() - * @param res pointer to pointer to ss_resource_t. points to a valid - * on successful resource - * @return SS_OK success - * SS_EOF if no more resources in list - * SS_ERR otherwise - * - * @see safestore_ops_t{} - * @see ss_resourcelist_t{} - * @see ss_resource_t{} - * @see SSOP_ALLOCRESOURCE() - * @see SSOP_DEALLOCRESOURCE() - */ -#define SSOP_GETRESOURCE(ssp, reslist, res) \ - ((ssp)->ssop_getresource(reslist, res)) - -/* centry accessor functions */ - - -/**# - * SSOP_GETCENTRY read cache entry metadata for a particular cache entry - * @param ssp a safestore_ops_t pointer obtained from sst_open() - * @param centry_info pointer to ss_centry_info_t owned by caller. - * field sc_res must point to a valid ss_resource_t - * obtained from a successful call to SSOP_GETRESOURCE(). - * the rest of the structure is filled with valid - * metadata on successful return - * @return SS_OK if data was read successfully - * SS_ERR otherwise - * - * @see safestore_ops_t{} - * @see sst_open() - * @see ss_centry_info_t - * @see ss_resource_t{} - * @see SSOP_GETRESOURCE() - */ -#define SSOP_GETCENTRY(ssp, centry_info) \ - ((ssp)->ssop_getcentry(centry_info)) - -/**# - * SSOP_SETCENTRY write cache entry metadata for a particular cache entry - * @param ssp a safestore_ops_t pointer obtained from sst_open() - * @param centry_info pointer to ss_centry_info_t owned by caller. - * field sc_res must point to a valid ss_resource_t - * obtained from a successful call to SSOP_GETRESOURCE(). - * the remaining fields of the structured are written - * to safestore. - * @return SS_OK if data was written successfully - * SS_ERR otherwise - * - * @see safestore_ops_t{} - * @see sst_open() - * @see ss_centry_info_t{} - * @see ss_resource_t{} - * @see SSOP_GETRESOURCE() - */ -#define SSOP_SETCENTRY(ssp, centry_info) \ - ((ssp)->ssop_setcentry(centry_info)) - -/* cache data block read/write and ctl */ - - -/**# - * SSOP_READ_CBLOCK read cache data for a particular cache entry - * @param ssp a safestore_ops_t pointer obtained from sst_open() - * @param resource pointer to ss_resource_t obtained from a successful - * call to SSOP_GETRESOURCE(). - * @param buf buffer to hold the data - * @param nbyte number of bytes to read - * @param srcoffset byte location from beginning of the cache block - * represented by resource to read the data from - * - * @return SS_OK if data was read successfully - * SS_ERR otherwise - * - * @see safestore_ops_t{} - * @see sst_open() - * @see ss_resource_t{} - * @see SSOP_GETRESOURCE() - */ -#define SSOP_READ_CBLOCK(ssp, resource, buf, nbyte, srcoffset) \ - ((ssp)->ssop_read_cblock(resource, buf, nbyte, srcoffset)) -/**# - * SSOP_WRITE_CBLOCK write cache data for a particular cache entry - * @param ssp a safestore_ops_t pointer obtained from sst_open() - * @param resource pointer to ss_resource_t obtained from a successful - * call to SSOP_GETRESOURCE(). - * @param buf buffer to hold the data - * @param nbyte number of bytes to write - * @param destoffset byte location from beginning the cache block - * represented by resource to write the data to - * - * @return SS_OK if data was read successfully - * SS_ERR otherwise - * - * @see safestore_ops_t{} - * @see sst_open() - * @see ss_resource_t{} - * @see SSOP_GETRESOURCE() - */ -#define SSOP_WRITE_CBLOCK(ssp, resource, buf, nbyte, destoffset) \ - ((ssp)->ssop_write_cblock(resource, buf, nbyte, destoffset)) - -/**# - * SSOP_CTL perform a safestore control function - * @param cmd integer specifying the command to execute, e.g. SSIOC_STATS. - * some commands may be specific to a safestore module type - * @param arg a uintptr_t that has additional information that is - * needed by the safestore module to perform the command. it - * may be an int or a pionter to a module specifc structure. - * @return SS_OK success - * errno otherwise - */ -#define SSOP_CTL(ssp, cmd, arg) ((ssp)->ssop_ctl(cmd, arg)) - -/* END CSTYLED */ - -/* general control definitions supported by safestore modules */ - -#define SSCTL(x) (('S'<< 16)|('S'<< 8)|(x)) - -#define SSIOC_STATS SSCTL(1) -#define SSIOC_SETFLAG SSCTL(2) - -/* structure definitions */ - -typedef struct ssioc_stats_s { - int wq_inq; /* write queue count */ -} ssioc_stats_t; - -extern void sst_init(); -extern void sst_register_mod(safestore_ops_t *); -extern void sst_unregister_mod(safestore_ops_t *); -extern safestore_ops_t *sst_open(uint_t, ...); -extern int sst_close(safestore_ops_t *); - -extern safestore_ops_t *sdbc_safestore; - -extern int _sd_centry_shift; - -#endif /* _KERNEL */ - -#ifdef __cplusplus -} -#endif - -#endif /* _SD_SAFESTORE_H */ diff --git a/usr/src/uts/common/avs/ns/sdbc/safestore_impl.h b/usr/src/uts/common/avs/ns/sdbc/safestore_impl.h deleted file mode 100644 index bc623446d0..0000000000 --- a/usr/src/uts/common/avs/ns/sdbc/safestore_impl.h +++ /dev/null @@ -1,142 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _SD_SAFESTORE_IMPL_H -#define _SD_SAFESTORE_IMPL_H - -#ifdef __cplusplus -extern "C" { -#endif - -#ifdef _KERNEL - -/* ss config stages */ -#define SD_WR_SLP_Q_MAX 256 - -/* - * Global fields for cache LRU entry. Fault tolerant structure in RMS. - */ - -#define INCX(x) (x = (x + 1 + SD_WR_SLP_Q_MAX) % SD_WR_SLP_Q_MAX) -#define DECX(x) (x = (x - 1 + SD_WR_SLP_Q_MAX) % SD_WR_SLP_Q_MAX) - -typedef struct _sd_wr_slp_queue { - kcondvar_t slp_wqcv; - int slp_wqneed; -} _sd_wr_slp_queue_t; - -typedef struct _sd_wr_queue { - struct ss_wr_cctl *wq_qtop; /* Top of write control blocks */ - kmutex_t wq_qlock; /* allocation spinlock */ - int wq_inq; /* number of write blocks available in q */ - int wq_nentries; /* total Number of write blocks in q */ - unsigned int wq_slp_top; - unsigned int wq_slp_index; - unsigned int wq_slp_inq; - _sd_wr_slp_queue_t wq_slp[SD_WR_SLP_Q_MAX]; -} _sd_writeq_t; - -#define WQ_SET_NEED(q, need, i) { \ - (q->wq_slp[i].slp_wqneed = need); \ -} - -#define WQ_SVWAIT_BOTTOM(q, need) \ -{ \ - int ix = q->wq_slp_index; \ - INCX(q->wq_slp_index); \ - WQ_SET_NEED(q, need, ix); \ - cv_wait(&q->wq_slp[ix].slp_wqcv, &q->wq_qlock); \ - mutex_exit(&q->wq_qlock); \ -} - -#define WQ_SVWAIT_TOP(q, need) \ -{ \ - DECX(q->wq_slp_top); \ - WQ_SET_NEED(q, need, q->wq_slp_top); \ - cv_wait(&q->wq_slp[q->wq_slp_top].slp_wqcv, &q->wq_qlock);\ - mutex_exit(&q->wq_qlock); \ -} - -#define WQ_NEED_SIG(q) \ - (q->wq_slp_inq && (q->wq_slp[q->wq_slp_top].slp_wqneed <= q->wq_inq)) - -#define WQ_SVSIG(q) \ -{ \ - int tp = q->wq_slp_top; \ - INCX(q->wq_slp_top); \ - q->wq_slp[tp].slp_wqneed = 0; \ - cv_signal(&q->wq_slp[tp].slp_wqcv); \ -} - -/* - * cache entry information - * note -- this structure is a identical to the first 4 words of - * the exported ss_centry_info_t. internal copies depened on this - * fact. changes to this structure may require changes to the - * *getcentry() and *setcentry() functions. - * - */ -typedef struct ss_centry_info_impl_s { - int sci_cd; /* Cache descriptor */ - nsc_off_t sci_fpos; /* File position */ - int sci_dirty; /* Dirty mask */ - int sci_flag; /* CC_PINNABLE | CC_PINNED */ -} ss_centry_info_impl_t; - -/* - * The write control structure has information about the remote page that - * will mirror a write. - */ -typedef struct ss_wr_cctl { - struct ss_wr_cctl *wc_next; /* chaining queue entries */ - caddr_t wc_addr; /* points to data address */ - ss_centry_info_impl_t *wc_gl_info; /* information for the page */ - unsigned char wc_flag; /* flag */ -} ss_wr_cctl_t; - -/* volume information */ -typedef struct ss_voldata_impl_s { - char svi_volname[NSC_MAXPATH]; /* Filename in RMS for failover */ - int svi_cd; /* NOTE may need dual node map info */ - int svi_pinned; /* Device has failed/pinned blocks */ - int svi_attached; /* Node which has device attached */ - int svi_devidsz; /* unique dev id length */ - uchar_t svi_devid[NSC_MAXPATH]; /* wwn id - physical devs only */ - int svi_reserved[13]; /* Reserved global space */ -} ss_voldata_impl_t; - -extern int _sd_fill_pattern(caddr_t addr, uint_t pat, uint_t size); -extern int _sdbc_writeq_configure(_sd_writeq_t *); -extern void _sdbc_writeq_deconfigure(_sd_writeq_t *); -extern void ss_release_write(ss_wr_cctl_t *, _sd_writeq_t *); -extern ss_wr_cctl_t *ss_alloc_write(int, int *, _sd_writeq_t *); - -#endif /* _KERNEL */ - -#ifdef __cplusplus -} -#endif - -#endif /* _SD_SAFESTORE_IMPL_H */ diff --git a/usr/src/uts/common/avs/ns/sdbc/safestore_ram.c b/usr/src/uts/common/avs/ns/sdbc/safestore_ram.c deleted file mode 100644 index 25cee4f1ac..0000000000 --- a/usr/src/uts/common/avs/ns/sdbc/safestore_ram.c +++ /dev/null @@ -1,614 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -/* - * RAM Safe Store Module - */ - -#include <sys/types.h> -#include <sys/ksynch.h> -#include <sys/kmem.h> -#include <sys/cmn_err.h> -#include <sys/errno.h> - -#include <sys/nsc_thread.h> -#include "sd_cache.h" -#include "sd_trace.h" -#include <sys/unistat/spcs_s.h> -#include <sys/unistat/spcs_s_k.h> -#include <sys/unistat/spcs_errors.h> - -#include "safestore.h" -#include "safestore_impl.h" -#include "safestore_ram.h" - -extern void _sd_print(int level, char *fmt, ...); - -static int ss_ram_configure(ss_common_config_t *, spcs_s_info_t); -static int ss_ram_deconfigure(int); -static int ss_ram_getvdir(const ss_vdirkey_t *, ss_vdir_t *); -static int ss_ram_getvdirent(const ss_vdir_t *, ss_voldata_t *); -static int ss_ram_getvol(ss_voldata_t *); -static int ss_ram_setvol(const ss_voldata_t *); -static int ss_ram_getcdir(const ss_cdirkey_t *, ss_cdir_t *); -static int ss_ram_getcdirent(ss_cdir_t *, ss_centry_info_t *); -static int ss_ram_allocresource(int, int *, ss_resourcelist_t **); -static void ss_ram_deallocresource(ss_resource_t *); -static int ss_ram_getresource(ss_resourcelist_t **, ss_resource_t **); -static int ss_ram_getcentry(ss_centry_info_t *); -static int ss_ram_setcentry(const ss_centry_info_t *); -static int ss_ram_cblock_read(const ss_resource_t *, void *, int, int); -static int ss_ram_cblock_write(const ss_resource_t *, const void *, int, int); -static int ss_ram_ctl(uint_t, uintptr_t); - - -safestore_ops_t ss_ram_ops = { - "safestore_ram", - SS_M_RAM | SS_T_NONE, - 0, - ss_ram_configure, - ss_ram_deconfigure, - ss_ram_getvdir, - ss_ram_getvdirent, - ss_ram_getvol, - ss_ram_setvol, - ss_ram_getcdir, - ss_ram_getcdirent, - ss_ram_allocresource, - ss_ram_deallocresource, - ss_ram_getresource, - ss_ram_getcentry, - ss_ram_setcentry, - ss_ram_cblock_read, - ss_ram_cblock_write, - ss_ram_ctl -}; - -static void ss_ram_vol_deconfigure(); -static int ss_ram_vol_configure(int); -static int ss_ram_wctl_configure(); -static void ss_ram_wctl_deconfigure(void); -static int ss_ram_deconfigure_locked(); - -static kmutex_t ss_ram_config_lock; - -static ss_common_config_t ss_ramcommon_config; -static ss_ram_config_t ss_ram_config; - -static char default_cblock [8192]; - - -#define MEGABYTE (1024*1024) - -void -ss_ram_init() -{ - mutex_init(&ss_ram_config_lock, NULL, MUTEX_DRIVER, NULL); - bzero(&ss_ram_config, sizeof (ss_ram_config_t)); - bzero(&ss_ramcommon_config, sizeof (ss_common_config_t)); - sst_register_mod(&ss_ram_ops); - - ss_ram_config.ss_configured = SS_INITTED; -} - -void -ss_ram_deinit() -{ - mutex_destroy(&ss_ram_config_lock); - sst_unregister_mod(&ss_ram_ops); -} - - -/* ARGSUSED */ -static int -ss_ram_configure(ss_common_config_t *clientptr, spcs_s_info_t kstatus) -{ - - if (clientptr->ssc_wsize == 0) /* choose a default? */ - return (EINVAL); - - mutex_enter(&ss_ram_config_lock); - - /* read in the parameters */ - bcopy(clientptr, &ss_ramcommon_config, sizeof (ss_common_config_t)); - - /* set the page size */ - ss_ramcommon_config.ssc_ss_psize = BLK_SIZE(1); - - /* initialize client page size if not set */ - if (ss_ramcommon_config.ssc_client_psize == 0) - ss_ramcommon_config.ssc_client_psize = - ss_ramcommon_config.ssc_ss_psize; - - /* setup volume directory */ - if (ss_ram_vol_configure(clientptr->ssc_maxfiles)) { - (void) ss_ram_deconfigure_locked(); - mutex_exit(&ss_ram_config_lock); - return (SDBC_ENONETMEM); - } - - /* setup write q */ - if (ss_ram_wctl_configure()) { - (void) ss_ram_deconfigure_locked(); - mutex_exit(&ss_ram_config_lock); - return (SDBC_ENONETMEM); - } - - if (ss_ramcommon_config.ssc_flag & SS_GENPATTERN) { - (void) _sd_fill_pattern(default_cblock, - ss_ramcommon_config.ssc_pattern, - sizeof (default_cblock)); - } - - ss_ram_config.ss_configured = SS_CONFIGURED; - /* update client */ - bcopy(&ss_ramcommon_config, clientptr, sizeof (ss_common_config_t)); - - mutex_exit(&ss_ram_config_lock); - return (SS_OK); -} - -/* acquires the ss_ram_config_lock and calls ss_ram_deconfigure_locked() */ -/* ARGSUSED */ -static int -ss_ram_deconfigure(int dirty) -{ - int rc; - - if (ss_ram_config.ss_configured != SS_CONFIGURED) - return (SS_ERR); - - mutex_enter(&ss_ram_config_lock); - rc = ss_ram_deconfigure_locked(); - mutex_exit(&ss_ram_config_lock); - - return (rc); -} - -/* - * internal use only - * caller should acquire config lock before calling this function - */ -static int -ss_ram_deconfigure_locked() -{ - ss_ram_wctl_deconfigure(); - ss_ram_vol_deconfigure(); - - ss_ram_config.ss_configured = 0; - return (SS_OK); -} - -static int -ss_ram_getvdir(const ss_vdirkey_t *key, ss_vdir_t *vdir) -{ - ss_ram_vdir_t *ram_vdir = (ss_ram_vdir_t *)vdir; - int rc = SS_OK; - - if ((key == NULL) || (vdir == NULL)) - return (SS_ERR); - - switch (key->vk_type) { - case VDIR_ALL: - ram_vdir->rv_type = VDIR_ALL; - ram_vdir->rv_u.rv_all.rv_current = - ss_ram_config.sn_volumes; - ram_vdir->rv_u.rv_all.rv_end = - ss_ram_config.sn_volumes + - ss_ramcommon_config.ssc_maxfiles; - break; - case VDIR_VOL: - case VDIR_NODE: - default: - rc = SS_ERR; - break; - } - - return (rc); -} - - -static int -ss_ram_getvdirent(const ss_vdir_t *vdir, ss_voldata_t *vol) -{ - int rc = SS_OK; - - ss_ram_vdir_t *ram_vdir = (ss_ram_vdir_t *)vdir; - - if (vol == NULL) - return (SS_ERR); - - if (vdir == NULL) - return (SS_ERR); - - switch (ram_vdir->rv_type) { - case VDIR_ALL: - if (ram_vdir->rv_u.rv_all.rv_current == - ram_vdir->rv_u.rv_all.rv_end) { - rc = SS_EOF; - } else { - /* stuff client copy with token */ - vol->sv_vol = (ss_vol_t *) - ram_vdir->rv_u.rv_all.rv_current++; - - /* get the volume data */ - rc = ss_ram_getvol(vol); - } - break; - case VDIR_VOL: - case VDIR_NODE: - default: - rc = SS_ERR; - break; - } - - return (rc); -} - -static int -ss_ram_getvol(ss_voldata_t *voldata) -{ - ss_voldata_impl_t *ramvoldata; - - if (voldata == NULL) - return (SS_ERR); - - /* get the pointer to the volume entry */ - ramvoldata = (ss_voldata_impl_t *)voldata->sv_vol; - - if (ramvoldata == NULL) - return (SS_ERR); - - /* stuff the client structure from the ram entry */ - voldata->sv_cd = ramvoldata->svi_cd; - voldata->sv_pinned = ramvoldata->svi_pinned; - voldata->sv_attached = ramvoldata->svi_attached; - voldata->sv_devidsz = ramvoldata->svi_devidsz; - - bcopy(ramvoldata->svi_volname, voldata->sv_volname, - sizeof (voldata->sv_volname)); - - bcopy(ramvoldata->svi_devid, voldata->sv_devid, - sizeof (voldata->sv_devid)); - return (SS_OK); -} - -static int -ss_ram_setvol(const ss_voldata_t *voldata) -{ - ss_voldata_impl_t *ramvoldata; - - if (voldata == NULL) - return (SS_ERR); - - /* get the pointer to the volume entry */ - ramvoldata = (ss_voldata_impl_t *)voldata->sv_vol; - - if (ramvoldata == NULL) - return (SS_ERR); - - /* load the volume entry from the client structure */ - ramvoldata->svi_cd = voldata->sv_cd; - ramvoldata->svi_pinned = voldata->sv_pinned; - ramvoldata->svi_attached = voldata->sv_attached; - ramvoldata->svi_devidsz = voldata->sv_devidsz; - bcopy(voldata->sv_volname, ramvoldata->svi_volname, - sizeof (ramvoldata->svi_volname)); - - bcopy(voldata->sv_devid, ramvoldata->svi_devid, - sizeof (ramvoldata->svi_devid)); - return (SS_OK); -} - -static int -ss_ram_getcdir(const ss_cdirkey_t *key, ss_cdir_t *cdir) -{ - ss_ram_cdir_t *ram_cdir = (ss_ram_cdir_t *)cdir; - int rc = 0; - - if ((key == NULL) || (cdir == NULL)) - return (SS_ERR); - - switch (key->ck_type) { - case CDIR_ALL: - { int blocks; - - blocks = ss_ramcommon_config.ssc_wsize / - ss_ramcommon_config.ssc_client_psize; - - ram_cdir->rc_type = CDIR_ALL; - ram_cdir->rc_u.rc_all.rc_current = - ss_ram_config.sn_wr_cctl; - ram_cdir->rc_u.rc_all.rc_end = - ss_ram_config.sn_wr_cctl + blocks; - } - break; - case CDIR_VOL: - case CDIR_NODE: - default: - rc = SS_ERR; - break; - } - - return (rc); -} - -static int -ss_ram_getcdirent(ss_cdir_t *cdir, ss_centry_info_t *centry) -{ - int rc = SS_OK; - - ss_ram_cdir_t *ram_cdir = (ss_ram_cdir_t *)cdir; - - if (centry == NULL) - return (SS_ERR); - - if (cdir == NULL) - return (SS_ERR); - - switch (ram_cdir->rc_type) { - case CDIR_ALL: - if (ram_cdir->rc_u.rc_all.rc_current == - ram_cdir->rc_u.rc_all.rc_end) { - rc = SS_EOF; - } else { - /* stuff client copy with token */ - centry->sc_res = (ss_resource_t *) - ram_cdir->rc_u.rc_all.rc_current++; - - /* get the centry data */ - rc = ss_ram_getcentry(centry); - } - break; - case CDIR_VOL: - case CDIR_NODE: - default: - rc = SS_ERR; - break; - } - - return (rc); -} - -static int -ss_ram_allocresource(int need, int *stall, ss_resourcelist_t **reslist) -{ - if (reslist == NULL) - return (SS_ERR); - - *reslist = ((ss_resourcelist_t *)ss_alloc_write(need, stall, - &(ss_ram_config.sn_wr_queue))); - if (*reslist == NULL) /* do sync write */ - return (SS_ERR); - - return (SS_OK); -} - -static void -ss_ram_deallocresource(ss_resource_t *res) -{ - ss_release_write((ss_wr_cctl_t *)res, &(ss_ram_config.sn_wr_queue)); -} - -static int -ss_ram_getresource(ss_resourcelist_t **reslist, ss_resource_t **res) -{ - if ((res == NULL) || (reslist == NULL)) { - return (SS_ERR); - } - - if (*reslist == NULL) - return (SS_EOF); - - *res = (ss_resource_t *)(*reslist); - *reslist = (ss_resourcelist_t *) - ((ss_wr_cctl_t *)(*reslist))->wc_next; - - return (SS_OK); -} - -static int -ss_ram_getcentry(ss_centry_info_t *centry) -{ - ss_wr_cctl_t *wctl; - ss_centry_info_impl_t *ramcentry = (ss_centry_info_impl_t *)centry; - - if (centry == NULL) - return (SS_ERR); - else - wctl = (ss_wr_cctl_t *)centry->sc_res; - - if (wctl == NULL) - return (SS_ERR); - - if (wctl->wc_gl_info) - bcopy(wctl->wc_gl_info, ramcentry, - sizeof (ss_centry_info_impl_t)); - else - return (SS_ERR); - - return (SS_OK); -} - -static int -ss_ram_setcentry(const ss_centry_info_t *centry) -{ - ss_wr_cctl_t *wctl; - ss_centry_info_impl_t *ramcentry = (ss_centry_info_impl_t *)centry; - - if (centry == NULL) - return (SS_ERR); - else - wctl = (ss_wr_cctl_t *)centry->sc_res; - - if (wctl == NULL) - return (SS_ERR); - - if (wctl->wc_gl_info) - bcopy(ramcentry, wctl->wc_gl_info, - sizeof (ss_centry_info_impl_t)); - else - return (SS_ERR); - - return (SS_OK); -} - - -static int -ss_ram_cblock_read(const ss_resource_t *res, void *buf, - int count, int srcoffset) -{ - if ((res == NULL) || (buf == NULL)) - return (SS_ERR); - - if ((srcoffset < 0) || - (srcoffset > ss_ramcommon_config.ssc_client_psize)) - return (SS_ERR); - - bcopy(default_cblock + srcoffset, buf, count); - - return (SS_OK); -} - -static int -ss_ram_cblock_write(const ss_resource_t *res, - const void *buf, int count, int destoffset) -{ - if ((res == NULL) || (buf == NULL)) - return (SS_ERR); - - if ((destoffset < 0) || - (destoffset > ss_ramcommon_config.ssc_client_psize)) - return (SS_ERR); - - bcopy(buf, default_cblock + destoffset, count); - - return (SS_OK); -} - -static int -ss_ram_ctl(uint_t cmd, uintptr_t arg) -{ - int rc = SS_OK; - - switch (cmd) { - case SSIOC_STATS: - ((ssioc_stats_t *)arg)->wq_inq = - ss_ram_config.sn_wr_queue.wq_inq; - break; - default: - cmn_err(CE_WARN, "ss_nvs_ctl: cmd %x not supported", - cmd); - rc = ENOTTY; - break; - } - - return (rc); -} - -static int -ss_ram_vol_configure(int maxvols) -{ - if ((ss_ram_config.sn_volumes = kmem_zalloc(maxvols * - sizeof (ss_voldata_impl_t), KM_NOSLEEP)) == NULL) - return (-1); - - return (0); -} - -static void -ss_ram_vol_deconfigure() -{ - int maxvols = ss_ramcommon_config.ssc_maxfiles; - - if (ss_ram_config.sn_volumes) - kmem_free(ss_ram_config.sn_volumes, - maxvols * sizeof (ss_voldata_impl_t)); -} - -static int -ss_ram_wctl_configure() -{ - int blocks; - ss_wr_cctl_t *wentry; - static ss_centry_info_impl_t *gl; - int i; - - blocks = ss_ramcommon_config.ssc_wsize / - ss_ramcommon_config.ssc_client_psize; - - if ((ss_ram_config.sn_wr_cctl = (ss_wr_cctl_t *) - kmem_zalloc(blocks * sizeof (ss_wr_cctl_t), KM_NOSLEEP)) - == NULL) { - return (-1); - } - - if ((ss_ram_config.sn_gl_centry_info = (ss_centry_info_impl_t *) - kmem_zalloc(blocks * sizeof (ss_centry_info_impl_t), - KM_NOSLEEP)) == NULL) { - return (-1); - } - - /* - * Mini-DSP: no write/ft area - * (ie forced_wrthru clear) - */ - - if (_sdbc_writeq_configure(&(ss_ram_config.sn_wr_queue)) != 0) - return (-1); - - gl = ss_ram_config.sn_gl_centry_info; - - wentry = ss_ram_config.sn_wr_cctl; - for (i = 0; i < blocks; ++i, ++wentry) { - wentry->wc_gl_info = gl++; - ss_release_write(wentry, &(ss_ram_config.sn_wr_queue)); - } - - ss_ram_config.sn_wr_queue.wq_nentries = blocks; - - return (0); -} - -static void -ss_ram_wctl_deconfigure() -{ - int blocks; - - _sdbc_writeq_deconfigure(&(ss_ram_config.sn_wr_queue)); - - blocks = ss_ramcommon_config.ssc_wsize / - ss_ramcommon_config.ssc_client_psize; - - if (ss_ram_config.sn_wr_cctl) { - kmem_free(ss_ram_config.sn_wr_cctl, - blocks * sizeof (ss_wr_cctl_t)); - } - - if (ss_ram_config.sn_gl_centry_info) { - kmem_free(ss_ram_config.sn_gl_centry_info, - blocks * sizeof (ss_centry_info_impl_t)); - } -} diff --git a/usr/src/uts/common/avs/ns/sdbc/safestore_ram.h b/usr/src/uts/common/avs/ns/sdbc/safestore_ram.h deleted file mode 100644 index 4367a92ec0..0000000000 --- a/usr/src/uts/common/avs/ns/sdbc/safestore_ram.h +++ /dev/null @@ -1,90 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _SD_SAFESTORE_RAM_H -#define _SD_SAFESTORE_RAM_H - -#ifdef __cplusplus -extern "C" { -#endif - -#ifdef _KERNEL - -typedef struct ss_ram_config_s { - uint_t ss_configured; /* configured bit */ - ss_voldata_impl_t *sn_volumes; /* volume directory */ - struct ss_wr_cctl *sn_wr_cctl; /* the write control blocks */ - ss_centry_info_impl_t *sn_gl_centry_info; /* dirty bits */ - struct _sd_wr_queue sn_wr_queue; /* the write queue */ -} ss_ram_config_t; - -/* internal volume directory stream struct must be same size as ss_vdir_t */ -typedef struct ss_ram_vdir_s { - intptr_t rv_type; /* stream type */ - union { - struct { - ss_voldata_impl_t *rv_current; - ss_voldata_impl_t *rv_end; - } rv_all; - - struct { - intptr_t v[5]; - } rv_vol; - - struct { - intptr_t n[5]; - } rv_node; - } rv_u; -} ss_ram_vdir_t; - -/* internal centry stream struct must be same size as ss_cdir_t */ -typedef struct ss_ram_cdir_t_s { - intptr_t rc_type; /* stream type */ - union { - struct { - ss_wr_cctl_t *rc_current; - ss_wr_cctl_t *rc_end; - } rc_all; - - struct { - intptr_t v[5]; - } rc_vol; - - struct { - intptr_t n[5]; - } rc_node; - } rc_u; -}ss_ram_cdir_t; - -typedef ss_wr_cctl_t *ss_ram_resource_t; -typedef ss_wr_cctl_t *ss_ram_resourcelist_t; - -#endif /* _KERNEL */ - -#ifdef __cplusplus -} -#endif - -#endif /* _SD_SAFESTORE_RAM_H */ diff --git a/usr/src/uts/common/avs/ns/sdbc/sd_bcache.c b/usr/src/uts/common/avs/ns/sdbc/sd_bcache.c deleted file mode 100644 index c3d32ae08a..0000000000 --- a/usr/src/uts/common/avs/ns/sdbc/sd_bcache.c +++ /dev/null @@ -1,7484 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#include <sys/types.h> -#include <sys/ksynch.h> -#include <sys/cmn_err.h> -#include <sys/errno.h> -#include <sys/kmem.h> -#include <sys/cred.h> -#include <sys/buf.h> -#include <sys/ddi.h> - -#include <sys/nsc_thread.h> -#include <sys/nsctl/nsctl.h> - -#include <sys/sdt.h> /* dtrace is S10 or later */ - -#include "sd_bcache.h" -#include "sd_trace.h" -#include "sd_io.h" -#include "sd_bio.h" -#include "sd_ft.h" -#include "sd_misc.h" -#include "sd_pcu.h" - -#include <sys/unistat/spcs_s.h> -#include <sys/unistat/spcs_s_k.h> -#include <sys/unistat/spcs_errors.h> -#include <sys/nsctl/safestore.h> -#ifndef DS_DDICT -#include <sys/ddi_impldefs.h> -#endif - - -/* - * kstat interface - */ - -static kstat_t *sdbc_global_stats_kstat; -static int sdbc_global_stats_update(kstat_t *ksp, int rw); - -typedef struct { - kstat_named_t ci_sdbc_count; - kstat_named_t ci_sdbc_loc_count; - kstat_named_t ci_sdbc_rdhits; - kstat_named_t ci_sdbc_rdmiss; - kstat_named_t ci_sdbc_wrhits; - kstat_named_t ci_sdbc_wrmiss; - kstat_named_t ci_sdbc_blksize; - kstat_named_t ci_sdbc_lru_blocks; -#ifdef DEBUG - kstat_named_t ci_sdbc_lru_noreq; - kstat_named_t ci_sdbc_lru_req; -#endif - kstat_named_t ci_sdbc_wlru_inq; - kstat_named_t ci_sdbc_cachesize; - kstat_named_t ci_sdbc_numblocks; - kstat_named_t ci_sdbc_num_shared; - kstat_named_t ci_sdbc_wrcancelns; - kstat_named_t ci_sdbc_destaged; - kstat_named_t ci_sdbc_nodehints; -} sdbc_global_stats_t; - -static sdbc_global_stats_t sdbc_global_stats = { - {SDBC_GKSTAT_COUNT, KSTAT_DATA_ULONG}, - {SDBC_GKSTAT_LOC_COUNT, KSTAT_DATA_ULONG}, - {SDBC_GKSTAT_RDHITS, KSTAT_DATA_ULONG}, - {SDBC_GKSTAT_RDMISS, KSTAT_DATA_ULONG}, - {SDBC_GKSTAT_WRHITS, KSTAT_DATA_ULONG}, - {SDBC_GKSTAT_WRMISS, KSTAT_DATA_ULONG}, - {SDBC_GKSTAT_BLKSIZE, KSTAT_DATA_ULONG}, - {SDBC_GKSTAT_LRU_BLOCKS, KSTAT_DATA_ULONG}, -#ifdef DEBUG - {SDBC_GKSTAT_LRU_NOREQ, KSTAT_DATA_ULONG}, - {SDBC_GKSTAT_LRU_REQ, KSTAT_DATA_ULONG}, -#endif - {SDBC_GKSTAT_WLRU_INQ, KSTAT_DATA_ULONG}, - {SDBC_GKSTAT_CACHESIZE, KSTAT_DATA_ULONG}, - {SDBC_GKSTAT_NUMBLOCKS, KSTAT_DATA_ULONG}, - {SDBC_GKSTAT_NUM_SHARED, KSTAT_DATA_ULONG}, - {SDBC_GKSTAT_WRCANCELNS, KSTAT_DATA_ULONG}, - {SDBC_GKSTAT_DESTAGED, KSTAT_DATA_ULONG}, - {SDBC_GKSTAT_NODEHINTS, KSTAT_DATA_ULONG}, -}; - -static kstat_t **sdbc_cd_kstats; -static kstat_t **sdbc_cd_io_kstats; -static kmutex_t *sdbc_cd_io_kstats_mutexes; -static kstat_t *sdbc_global_io_kstat; -static kmutex_t sdbc_global_io_kstat_mutex; -static int sdbc_cd_stats_update(kstat_t *ksp, int rw); -static int cd_kstat_add(int cd); -static int cd_kstat_remove(int cd); - -typedef struct { - kstat_named_t ci_sdbc_vol_name; - kstat_named_t ci_sdbc_failed; - kstat_named_t ci_sdbc_cd; - kstat_named_t ci_sdbc_cache_read; - kstat_named_t ci_sdbc_cache_write; - kstat_named_t ci_sdbc_disk_read; - kstat_named_t ci_sdbc_disk_write; - kstat_named_t ci_sdbc_filesize; - kstat_named_t ci_sdbc_numdirty; - kstat_named_t ci_sdbc_numio; - kstat_named_t ci_sdbc_numfail; - kstat_named_t ci_sdbc_destaged; - kstat_named_t ci_sdbc_wrcancelns; - kstat_named_t ci_sdbc_cdhints; -} sdbc_cd_stats_t; - -static sdbc_cd_stats_t sdbc_cd_stats = { - {SDBC_CDKSTAT_VOL_NAME, KSTAT_DATA_CHAR}, - {SDBC_CDKSTAT_FAILED, KSTAT_DATA_ULONG}, - {SDBC_CDKSTAT_CD, KSTAT_DATA_ULONG}, - {SDBC_CDKSTAT_CACHE_READ, KSTAT_DATA_ULONG}, - {SDBC_CDKSTAT_CACHE_WRITE, KSTAT_DATA_ULONG}, - {SDBC_CDKSTAT_DISK_READ, KSTAT_DATA_ULONG}, - {SDBC_CDKSTAT_DISK_WRITE, KSTAT_DATA_ULONG}, -#ifdef NSC_MULTI_TERABYTE - {SDBC_CDKSTAT_FILESIZE, KSTAT_DATA_UINT64}, -#else - {SDBC_CDKSTAT_FILESIZE, KSTAT_DATA_ULONG}, -#endif - {SDBC_CDKSTAT_NUMDIRTY, KSTAT_DATA_ULONG}, - {SDBC_CDKSTAT_NUMIO, KSTAT_DATA_ULONG}, - {SDBC_CDKSTAT_NUMFAIL, KSTAT_DATA_ULONG}, - {SDBC_CDKSTAT_DESTAGED, KSTAT_DATA_ULONG}, - {SDBC_CDKSTAT_WRCANCELNS, KSTAT_DATA_ULONG}, - {SDBC_CDKSTAT_CDHINTS, KSTAT_DATA_ULONG}, -}; - -#ifdef DEBUG -/* - * dynmem kstat interface - */ -static kstat_t *sdbc_dynmem_kstat_dm; -static int simplect_dm; -static int sdbc_dynmem_kstat_update_dm(kstat_t *ksp, int rw); - -typedef struct { - kstat_named_t ci_sdbc_monitor_dynmem; - kstat_named_t ci_sdbc_max_dyn_list; - kstat_named_t ci_sdbc_cache_aging_ct1; - kstat_named_t ci_sdbc_cache_aging_ct2; - kstat_named_t ci_sdbc_cache_aging_ct3; - kstat_named_t ci_sdbc_cache_aging_sec1; - kstat_named_t ci_sdbc_cache_aging_sec2; - kstat_named_t ci_sdbc_cache_aging_sec3; - kstat_named_t ci_sdbc_cache_aging_pcnt1; - kstat_named_t ci_sdbc_cache_aging_pcnt2; - kstat_named_t ci_sdbc_max_holds_pcnt; - - kstat_named_t ci_sdbc_alloc_ct; - kstat_named_t ci_sdbc_dealloc_ct; - kstat_named_t ci_sdbc_history; - kstat_named_t ci_sdbc_nodatas; - kstat_named_t ci_sdbc_candidates; - kstat_named_t ci_sdbc_deallocs; - kstat_named_t ci_sdbc_hosts; - kstat_named_t ci_sdbc_pests; - kstat_named_t ci_sdbc_metas; - kstat_named_t ci_sdbc_holds; - kstat_named_t ci_sdbc_others; - kstat_named_t ci_sdbc_notavail; - - kstat_named_t ci_sdbc_process_directive; - - kstat_named_t ci_sdbc_simplect; -} sdbc_dynmem_dm_t; - -static sdbc_dynmem_dm_t sdbc_dynmem_dm = { - {SDBC_DMKSTAT_MONITOR_DYNMEM, KSTAT_DATA_ULONG}, - {SDBC_DMKSTAT_MAX_DYN_LIST, KSTAT_DATA_ULONG}, - {SDBC_DMKSTAT_CACHE_AGING_CT1, KSTAT_DATA_ULONG}, - {SDBC_DMKSTAT_CACHE_AGING_CT2, KSTAT_DATA_ULONG}, - {SDBC_DMKSTAT_CACHE_AGING_CT3, KSTAT_DATA_ULONG}, - {SDBC_DMKSTAT_CACHE_AGING_SEC1, KSTAT_DATA_ULONG}, - {SDBC_DMKSTAT_CACHE_AGING_SEC2, KSTAT_DATA_ULONG}, - {SDBC_DMKSTAT_CACHE_AGING_SEC3, KSTAT_DATA_ULONG}, - {SDBC_DMKSTAT_CACHE_AGING_PCNT1, KSTAT_DATA_ULONG}, - {SDBC_DMKSTAT_CACHE_AGING_PCNT2, KSTAT_DATA_ULONG}, - {SDBC_DMKSTAT_MAX_HOLDS_PCNT, KSTAT_DATA_ULONG}, - {SDBC_DMKSTAT_ALLOC_CNT, KSTAT_DATA_ULONG}, - {SDBC_DMKSTAT_DEALLOC_CNT, KSTAT_DATA_ULONG}, - {SDBC_DMKSTAT_HISTORY, KSTAT_DATA_ULONG}, - {SDBC_DMKSTAT_NODATAS, KSTAT_DATA_ULONG}, - {SDBC_DMKSTAT_CANDIDATES, KSTAT_DATA_ULONG}, - {SDBC_DMKSTAT_DEALLOCS, KSTAT_DATA_ULONG}, - {SDBC_DMKSTAT_HOSTS, KSTAT_DATA_ULONG}, - {SDBC_DMKSTAT_PESTS, KSTAT_DATA_ULONG}, - {SDBC_DMKSTAT_METAS, KSTAT_DATA_ULONG}, - {SDBC_DMKSTAT_HOLDS, KSTAT_DATA_ULONG}, - {SDBC_DMKSTAT_OTHERS, KSTAT_DATA_ULONG}, - {SDBC_DMKSTAT_NOTAVAIL, KSTAT_DATA_ULONG}, - {SDBC_DMKSTAT_PROCESS_DIRECTIVE, KSTAT_DATA_ULONG}, - {SDBC_DMKSTAT_SIMPLECT, KSTAT_DATA_ULONG} -}; -#endif - -/* End of dynmem kstats */ - -#ifdef DEBUG -int *dmchainpull_table; /* dmchain wastage stats */ -#endif - -/* - * dynmem process vars - */ -extern _dm_process_vars_t dynmem_processing_dm; - -/* metadata for volumes */ -ss_voldata_t *_sdbc_gl_file_info; - -size_t _sdbc_gl_file_info_size; - -/* metadata for cache write blocks */ -static ss_centry_info_t *_sdbc_gl_centry_info; - -/* wblocks * sizeof(ss_centry_info_t) */ -static size_t _sdbc_gl_centry_info_size; - -static int _SD_DELAY_QUEUE = 1; -static int sdbc_allocb_inuse, sdbc_allocb_lost, sdbc_allocb_hit; -static int sdbc_allocb_pageio1, sdbc_allocb_pageio2; -static int sdbc_centry_hit, sdbc_centry_inuse, sdbc_centry_lost; -static int sdbc_dmchain_not_avail; -static int sdbc_allocb_deallocd; -static int sdbc_centry_deallocd; -static int sdbc_check_cot; -static int sdbc_ra_hash; /* 1-block read-ahead fails due to hash hit */ -static int sdbc_ra_none; /* 1-block read-ahead fails due to "would block" */ - - -/* - * Set the following variable to 1 to enable pagelist io mutual - * exclusion on all _sd_alloc_buf() operations. - * - * This is set to ON to prevent front end / back end races between new - * NSC_WRTHRU io operations coming in through _sd_alloc_buf(), and - * previously written data being flushed out to disk by the sdbc - * flusher at the back end. - * -- see bugtraq 4287564 - * -- Simon Crosland, Mon Nov 8 16:34:09 GMT 1999 - */ -static int sdbc_pageio_always = 1; - -int sdbc_use_dmchain = 0; /* start time switch for dm chaining */ -int sdbc_prefetch1 = 1; /* do 1-block read-ahead */ -/* - * if sdbc_static_cache is 1 allocate all cache memory at startup. - * deallocate only at shutdown. - */ -int sdbc_static_cache = 1; - -#ifdef DEBUG -/* - * Pagelist io mutual exclusion debug facility. - */ -#define SDBC_PAGEIO_OFF 0 /* no debug */ -#define SDBC_PAGEIO_RDEV 1 /* force NSC_PAGEIO for specified dev */ -#define SDBC_PAGEIO_RAND 2 /* randomly force NSC_PAGEIO */ -#define SDBC_PAGEIO_ALL 3 /* always force NSC_PAGEIO */ -static int sdbc_pageio_debug = SDBC_PAGEIO_OFF; -static dev_t sdbc_pageio_rdev = (dev_t)-1; -#endif - -/* - * INF SD cache global data - */ - -_sd_cd_info_t *_sd_cache_files; -_sd_stats_t *_sd_cache_stats; -kmutex_t _sd_cache_lock; - -_sd_hash_table_t *_sd_htable; -_sd_queue_t _sd_lru_q; - -_sd_cctl_t *_sd_cctl[_SD_CCTL_GROUPS]; -int _sd_cctl_groupsz; - -_sd_net_t _sd_net_config; - -extern krwlock_t sdbc_queue_lock; - -unsigned int _sd_node_hint; - -#define _SD_LRU_Q (&_sd_lru_q) -int BLK_FBAS; /* number of FBA's in a cache block */ -int CACHE_BLOCK_SIZE; /* size in bytes of a cache block */ -int CBLOCKS; -_sd_bitmap_t BLK_FBA_BITS; -static int sdbc_prefetch_valid_cnt; -static int sdbc_prefetch_busy_cnt; -static int sdbc_prefetch_trailing; -static int sdbc_prefetch_deallocd; -static int sdbc_prefetch_pageio1; -static int sdbc_prefetch_pageio2; -static int sdbc_prefetch_hit; -static int sdbc_prefetch_lost; -static int _sd_prefetch_opt = 1; /* 0 to disable & use _prefetch_sb_vec[] */ -static nsc_vec_t _prefetch_sb_vec[_SD_MAX_BLKS + 1]; - -_sd_bitmap_t _fba_bits[] = { - 0x0000, 0x0001, 0x0003, 0x0007, - 0x000f, 0x001f, 0x003f, 0x007f, - 0x00ff, -#if defined(_SD_8K_BLKSIZE) - 0x01ff, 0x03ff, 0x07ff, - 0x0fff, 0x1fff, 0x3fff, 0x7fff, - 0xffff, -#endif -}; - - -static int _sd_ccsync_cnt = 256; -static _sd_cctl_sync_t *_sd_ccent_sync; - -nsc_io_t *sdbc_io; - -#ifdef _MULTI_DATAMODEL -_sd_stats32_t *_sd_cache_stats32 = NULL; -#endif - - -#ifdef DEBUG -int cmn_level = CE_PANIC; -#else -int cmn_level = CE_WARN; -#endif - -/* - * Forward declare all statics that are used before defined to enforce - * parameter checking - * Some (if not all) of these could be removed if the code were reordered - */ - -static void _sdbc_stats_deconfigure(void); -static int _sdbc_stats_configure(int cblocks); -static int _sdbc_lruq_configure(_sd_queue_t *); -static void _sdbc_lruq_deconfigure(void); -static int _sdbc_mem_configure(int cblocks, spcs_s_info_t kstatus); -static void _sdbc_mem_deconfigure(int cblocks); -static void _sd_ins_queue(_sd_queue_t *, _sd_cctl_t *centry); -static int _sd_flush_cd(int cd); -static int _sd_check_buffer_alloc(int cd, nsc_off_t fba_pos, nsc_size_t fba_len, - _sd_buf_handle_t **hp); -static int _sd_doread(_sd_buf_handle_t *handle, _sd_cctl_t *cc_ent, - nsc_off_t fba_pos, nsc_size_t fba_len, int flag); -static void _sd_async_read_ea(blind_t xhandle, nsc_off_t fba_pos, - nsc_size_t fba_len, int error); -static void _sd_async_write_ea(blind_t xhandle, nsc_off_t fba_pos, - nsc_size_t fba_len, int error); -static void _sd_queue_write(_sd_buf_handle_t *handle, nsc_off_t fba_pos, - nsc_size_t fba_len); -static int _sd_remote_store(_sd_cctl_t *cc_ent, nsc_off_t fba_pos, - nsc_size_t fba_len); -static int _sd_copy_direct(_sd_buf_handle_t *handle1, _sd_buf_handle_t *handle2, - nsc_off_t fba_pos1, nsc_off_t fba_pos2, nsc_size_t fba_len); -static int _sd_sync_write(_sd_buf_handle_t *handle, nsc_off_t fba_pos, - nsc_size_t fba_len, int flag); -static int _sd_sync_write2(_sd_buf_handle_t *wr_handle, nsc_off_t wr_st_pos, - nsc_size_t fba_len, int flag, _sd_buf_handle_t *rd_handle, - nsc_off_t rd_st_pos); -static int sdbc_fd_attach_cd(blind_t xcd); -static int sdbc_fd_detach_cd(blind_t xcd); -static int sdbc_fd_flush_cd(blind_t xcd); -static int _sdbc_gl_centry_configure(spcs_s_info_t); -static int _sdbc_gl_file_configure(spcs_s_info_t); -static void _sdbc_gl_centry_deconfigure(void); -static void _sdbc_gl_file_deconfigure(void); -static int sdbc_doread_prefetch(_sd_cctl_t *cc_ent, nsc_off_t fba_pos, - nsc_size_t fba_len); -static _sd_bitmap_t update_dirty(_sd_cctl_t *cc_ent, sdbc_cblk_fba_t st_off, - sdbc_cblk_fba_t st_len); -static int _sd_prefetch_buf(int cd, nsc_off_t fba_pos, nsc_size_t fba_len, - int flag, _sd_buf_handle_t *handle, int locked); - -/* dynmem support */ -static int _sd_setup_category_on_type(_sd_cctl_t *header); -static int _sd_setup_mem_chaining(_sd_cctl_t *header, int flag); - -static int sdbc_check_cctl_cot(_sd_cctl_t *); - -static int sdbc_dmqueues_configure(); -static void sdbc_dmqueues_deconfigure(); -static _sd_cctl_t *sdbc_get_dmchain(int, int *, int); -static int sdbc_dmchain_avail(_sd_cctl_t *); -void sdbc_requeue_dmchain(_sd_queue_t *, _sd_cctl_t *, int, int); -static void sdbc_ins_dmqueue_back(_sd_queue_t *, _sd_cctl_t *); -void sdbc_ins_dmqueue_front(_sd_queue_t *, _sd_cctl_t *); -void sdbc_remq_dmchain(_sd_queue_t *, _sd_cctl_t *); -static void sdbc_clear_dmchain(_sd_cctl_t *, _sd_cctl_t *); -void sdbc_requeue_head_dm_try(_sd_cctl_t *); -static _sd_cctl_t *sdbc_alloc_dmc(int, nsc_off_t, nsc_size_t, int *, - sdbc_allocbuf_t *, int); -static _sd_cctl_t *sdbc_alloc_lru(int, nsc_off_t, int *, int); -static _sd_cctl_t *sdbc_alloc_from_dmchain(int, nsc_off_t, sdbc_allocbuf_t *, - int); -static void sdbc_centry_init_dm(_sd_cctl_t *); -static int sdbc_centry_memalloc_dm(_sd_cctl_t *, int, int); -static void sdbc_centry_alloc_end(sdbc_allocbuf_t *); - - - - -/* _SD_DEBUG */ -#if defined(_SD_DEBUG) || defined(DEBUG) -static int _sd_cctl_valid(_sd_cctl_t *); -#endif - -static -nsc_def_t _sdbc_fd_def[] = { - "Attach", (uintptr_t)sdbc_fd_attach_cd, 0, - "Detach", (uintptr_t)sdbc_fd_detach_cd, 0, - "Flush", (uintptr_t)sdbc_fd_flush_cd, 0, - 0, 0, 0 -}; - - -/* - * _sdbc_cache_configure - initialize cache blocks, queues etc. - * - * ARGUMENTS: - * cblocks - Number of cache blocks - * - * RETURNS: - * 0 on success. - * SDBC_EENABLEFAIL or SDBC_EMEMCONFIG on failure. - * - */ - - - -int -_sdbc_cache_configure(int cblocks, spcs_s_info_t kstatus) -{ - CBLOCKS = cblocks; - - _sd_cache_files = (_sd_cd_info_t *) - kmem_zalloc(sdbc_max_devs * sizeof (_sd_cd_info_t), - KM_SLEEP); - - if (_sdbc_stats_configure(cblocks)) - return (SDBC_EENABLEFAIL); - - if (sdbc_use_dmchain) { - if (sdbc_dmqueues_configure()) - return (SDBC_EENABLEFAIL); - } else { - if (_sdbc_lruq_configure(_SD_LRU_Q)) - return (SDBC_EENABLEFAIL); - } - - - if (_sdbc_mem_configure(cblocks, kstatus)) - return (SDBC_EMEMCONFIG); - - CACHE_BLOCK_SIZE = BLK_SIZE(1); - BLK_FBAS = FBA_NUM(CACHE_BLOCK_SIZE); - BLK_FBA_BITS = _fba_bits[BLK_FBAS]; - - sdbc_allocb_pageio1 = 0; - sdbc_allocb_pageio2 = 0; - sdbc_allocb_hit = 0; - sdbc_allocb_inuse = 0; - sdbc_allocb_lost = 0; - sdbc_centry_inuse = 0; - sdbc_centry_lost = 0; - sdbc_centry_hit = 0; - sdbc_centry_deallocd = 0; - sdbc_dmchain_not_avail = 0; - sdbc_allocb_deallocd = 0; - - sdbc_prefetch_valid_cnt = 0; - sdbc_prefetch_busy_cnt = 0; - sdbc_prefetch_trailing = 0; - sdbc_prefetch_deallocd = 0; - sdbc_prefetch_pageio1 = 0; - sdbc_prefetch_pageio2 = 0; - sdbc_prefetch_hit = 0; - sdbc_prefetch_lost = 0; - - sdbc_check_cot = 0; - sdbc_prefetch1 = 1; - sdbc_ra_hash = 0; - sdbc_ra_none = 0; - - return (0); -} - -/* - * _sdbc_cache_deconfigure - cache is being deconfigured. Release any - * memory that we acquired during the configuration process and return - * to the unconfigured state. - * - * NOTE: all users of the cache should be inactive at this point, - * i.e. we are unregistered from sd and all cache daemons/threads are - * gone. - * - */ -void -_sdbc_cache_deconfigure(void) -{ - /* CCIO shutdown must happen before memory is free'd */ - - if (_sd_cache_files) { - kmem_free(_sd_cache_files, - sdbc_max_devs * sizeof (_sd_cd_info_t)); - _sd_cache_files = (_sd_cd_info_t *)NULL; - } - - - BLK_FBA_BITS = 0; - BLK_FBAS = 0; - CACHE_BLOCK_SIZE = 0; - _sdbc_mem_deconfigure(CBLOCKS); - _sdbc_gl_centry_deconfigure(); - _sdbc_gl_file_deconfigure(); - - if (sdbc_use_dmchain) - sdbc_dmqueues_deconfigure(); - else - _sdbc_lruq_deconfigure(); - _sdbc_stats_deconfigure(); - - CBLOCKS = 0; -} - - -/* - * _sdbc_stats_deconfigure - cache is being deconfigured turn off - * stats. This could seemingly do more but we leave most of the - * data intact until cache is configured again. - * - */ -static void -_sdbc_stats_deconfigure(void) -{ - int i; - -#ifdef DEBUG - if (sdbc_dynmem_kstat_dm) { - kstat_delete(sdbc_dynmem_kstat_dm); - sdbc_dynmem_kstat_dm = NULL; - } -#endif - - if (sdbc_global_stats_kstat) { - kstat_delete(sdbc_global_stats_kstat); - sdbc_global_stats_kstat = NULL; - } - - if (sdbc_cd_kstats) { - for (i = 0; i < sdbc_max_devs; i++) { - if (sdbc_cd_kstats[i]) { - kstat_delete(sdbc_cd_kstats[i]); - sdbc_cd_kstats[i] = NULL; - } - } - kmem_free(sdbc_cd_kstats, sizeof (kstat_t *) * sdbc_max_devs); - sdbc_cd_kstats = NULL; - } - - if (sdbc_global_io_kstat) { - kstat_delete(sdbc_global_io_kstat); - mutex_destroy(&sdbc_global_io_kstat_mutex); - sdbc_global_io_kstat = NULL; - } - - if (sdbc_cd_io_kstats) { - for (i = 0; i < sdbc_max_devs; i++) { - if (sdbc_cd_io_kstats[i]) { - kstat_delete(sdbc_cd_io_kstats[i]); - sdbc_cd_io_kstats[i] = NULL; - } - } - kmem_free(sdbc_cd_io_kstats, sizeof (kstat_t *) * - sdbc_max_devs); - sdbc_cd_io_kstats = NULL; - } - - if (sdbc_cd_io_kstats_mutexes) { - /* mutexes are already destroyed in cd_kstat_remove() */ - kmem_free(sdbc_cd_io_kstats_mutexes, - sizeof (kmutex_t) * sdbc_max_devs); - sdbc_cd_io_kstats_mutexes = NULL; - } - - - if (_sd_cache_stats) { - kmem_free(_sd_cache_stats, - sizeof (_sd_stats_t) + - (sdbc_max_devs - 1) * sizeof (_sd_shared_t)); - _sd_cache_stats = NULL; - } -#ifdef _MULTI_DATAMODEL - if (_sd_cache_stats32) { - kmem_free(_sd_cache_stats32, sizeof (_sd_stats32_t) + - (sdbc_max_devs - 1) * sizeof (_sd_shared_t)); - _sd_cache_stats32 = NULL; - } -#endif -} - -static int -_sdbc_stats_configure(int cblocks) -{ - - _sd_cache_stats = kmem_zalloc(sizeof (_sd_stats_t) + - (sdbc_max_devs - 1) * sizeof (_sd_shared_t), KM_SLEEP); - _sd_cache_stats->st_blksize = (int)BLK_SIZE(1); - _sd_cache_stats->st_cachesize = cblocks * BLK_SIZE(1); - _sd_cache_stats->st_numblocks = cblocks; - _sd_cache_stats->st_wrcancelns = 0; - _sd_cache_stats->st_destaged = 0; -#ifdef _MULTI_DATAMODEL - _sd_cache_stats32 = kmem_zalloc(sizeof (_sd_stats32_t) + - (sdbc_max_devs - 1) * sizeof (_sd_shared_t), KM_SLEEP); -#endif - - /* kstat implementation - global stats */ - sdbc_global_stats_kstat = kstat_create(SDBC_KSTAT_MODULE, 0, - SDBC_KSTAT_GSTATS, SDBC_KSTAT_CLASS, KSTAT_TYPE_NAMED, - sizeof (sdbc_global_stats)/sizeof (kstat_named_t), - KSTAT_FLAG_VIRTUAL|KSTAT_FLAG_WRITABLE); - - if (sdbc_global_stats_kstat != NULL) { - sdbc_global_stats_kstat->ks_data = &sdbc_global_stats; - sdbc_global_stats_kstat->ks_update = sdbc_global_stats_update; - sdbc_global_stats_kstat->ks_private = _sd_cache_stats; - kstat_install(sdbc_global_stats_kstat); - } else { - cmn_err(CE_WARN, "!sdbc: gstats kstat failed"); - } - - /* global I/O kstats */ - sdbc_global_io_kstat = kstat_create(SDBC_KSTAT_MODULE, 0, - SDBC_IOKSTAT_GSTATS, "disk", KSTAT_TYPE_IO, 1, 0); - - if (sdbc_global_io_kstat) { - mutex_init(&sdbc_global_io_kstat_mutex, NULL, MUTEX_DRIVER, - NULL); - sdbc_global_io_kstat->ks_lock = - &sdbc_global_io_kstat_mutex; - kstat_install(sdbc_global_io_kstat); - } - - /* - * kstat implementation - cd stats - * NOTE: one kstat instance for each open cache descriptor - */ - sdbc_cd_kstats = kmem_zalloc(sizeof (kstat_t *) * sdbc_max_devs, - KM_SLEEP); - - /* - * kstat implementation - i/o kstats per cache descriptor - * NOTE: one I/O kstat instance for each cd - */ - sdbc_cd_io_kstats = kmem_zalloc(sizeof (kstat_t *) * sdbc_max_devs, - KM_SLEEP); - - sdbc_cd_io_kstats_mutexes = kmem_zalloc(sizeof (kmutex_t) * - sdbc_max_devs, KM_SLEEP); - -#ifdef DEBUG - /* kstat implementation - dynamic memory stats */ - sdbc_dynmem_kstat_dm = kstat_create(SDBC_KSTAT_MODULE, 0, - SDBC_KSTAT_DYNMEM, SDBC_KSTAT_CLASS, KSTAT_TYPE_NAMED, - sizeof (sdbc_dynmem_dm)/sizeof (kstat_named_t), - KSTAT_FLAG_VIRTUAL|KSTAT_FLAG_WRITABLE); - - if (sdbc_dynmem_kstat_dm != NULL) { - sdbc_dynmem_kstat_dm->ks_data = &sdbc_dynmem_dm; - sdbc_dynmem_kstat_dm->ks_update = sdbc_dynmem_kstat_update_dm; - sdbc_dynmem_kstat_dm->ks_private = &dynmem_processing_dm; - kstat_install(sdbc_dynmem_kstat_dm); - } else { - cmn_err(CE_WARN, "!sdbc: dynmem kstat failed"); - } -#endif - - return (0); -} - -/* - * sdbc_dmqueues_configure() - * initialize the queues of dynamic memory chains. - */ - -_sd_queue_t *sdbc_dm_queues; -static int max_dm_queues; - - -static int -sdbc_dmqueues_configure() -{ - int i; - - /* - * CAUTION! this code depends on max_dyn_list not changing - * if it does change behavior may be incorrect, as cc_alloc_size_dm - * depends on max_dyn_list and indexes to dmqueues are derived from - * cc_alloc_size_dm. - * see _sd_setup_category_on_type() and _sd_dealloc_dm() - * TODO: prevent max_dyn_list from on-the-fly modification (easy) or - * allow for on-the-fly changes to number of dm queues (hard). - */ - max_dm_queues = dynmem_processing_dm.max_dyn_list; - - ++max_dm_queues; /* need a "0" queue for centrys with no memory */ - - sdbc_dm_queues = (_sd_queue_t *) - kmem_zalloc(max_dm_queues * sizeof (_sd_queue_t), KM_SLEEP); - -#ifdef DEBUG - dmchainpull_table = (int *)kmem_zalloc(max_dm_queues * - max_dm_queues * sizeof (int), KM_SLEEP); -#endif - - for (i = 0; i < max_dm_queues; ++i) { - (void) _sdbc_lruq_configure(&sdbc_dm_queues[i]); - sdbc_dm_queues[i].sq_dmchain_cblocks = i; - } - - return (0); -} - -static void -sdbc_dmqueues_deconfigure() -{ - /* CAUTION! this code depends on max_dyn_list not changing */ - - if (sdbc_dm_queues) - kmem_free(sdbc_dm_queues, max_dm_queues * sizeof (_sd_queue_t)); - sdbc_dm_queues = NULL; - max_dm_queues = 0; -} - -#define GOOD_LRUSIZE(q) ((q->sq_inq >= 0) || (q->sq_inq <= CBLOCKS)) - -/* - * _sdbc_lruq_configure - initialize the lru queue - * - * ARGUMENTS: NONE - * RETURNS: 0 - * - */ - -static int -_sdbc_lruq_configure(_sd_queue_t *_sd_lru) -{ - - _sd_lru->sq_inq = 0; - - mutex_init(&_sd_lru->sq_qlock, NULL, MUTEX_DRIVER, NULL); - - _sd_lru->sq_qhead.cc_next = _sd_lru->sq_qhead.cc_prev - = &(_sd_lru->sq_qhead); - return (0); -} - -/* - * _sdbc_lruq_deconfigure - deconfigure the lru queue - * - * ARGUMENTS: NONE - * - */ - -static void -_sdbc_lruq_deconfigure(void) -{ - _sd_queue_t *_sd_lru; - - _sd_lru = _SD_LRU_Q; - - mutex_destroy(&_sd_lru->sq_qlock); - bzero(_sd_lru, sizeof (_sd_queue_t)); - -} - -/* - * _sdbc_mem_configure - initialize the cache memory. - * Create and initialize the hash table. - * Create cache control blocks and fill them with relevent - * information and enqueue onto the lru queue. - * Initialize the Write control blocks (blocks that contain - * information as to where the data will be mirrored) - * Initialize the Fault tolerant blocks (blocks that contain - * information about the mirror nodes dirty writes) - * - * ARGUMENTS: - * cblocks - Number of cache blocks. - * RETURNS: 0 - * - */ -static int -_sdbc_mem_configure(int cblocks, spcs_s_info_t kstatus) -{ - int num_blks, i, blk; - _sd_cctl_t *centry; - _sd_net_t *netc; - _sd_cctl_t *prev_entry_dm, *first_entry_dm; - - if ((_sd_htable = _sdbc_hash_configure(cblocks)) == NULL) { - spcs_s_add(kstatus, SDBC_ENOHASH); - return (-1); - } - - _sd_cctl_groupsz = (cblocks / _SD_CCTL_GROUPS) + - ((cblocks % _SD_CCTL_GROUPS) != 0); - - for (i = 0; i < _SD_CCTL_GROUPS; i++) { - _sd_cctl[i] = (_sd_cctl_t *) - nsc_kmem_zalloc(_sd_cctl_groupsz * sizeof (_sd_cctl_t), - KM_SLEEP, sdbc_cache_mem); - - if (_sd_cctl[i] == NULL) { - spcs_s_add(kstatus, SDBC_ENOCB); - return (-1); - } - } - - _sd_ccent_sync = (_sd_cctl_sync_t *) - nsc_kmem_zalloc(_sd_ccsync_cnt * sizeof (_sd_cctl_sync_t), - KM_SLEEP, sdbc_local_mem); - - if (_sd_ccent_sync == NULL) { - spcs_s_add(kstatus, SDBC_ENOCCTL); - return (-1); - } - - for (i = 0; i < _sd_ccsync_cnt; i++) { - mutex_init(&_sd_ccent_sync[i]._cc_lock, NULL, MUTEX_DRIVER, - NULL); - cv_init(&_sd_ccent_sync[i]._cc_blkcv, NULL, CV_DRIVER, NULL); - } - - blk = 0; - - netc = &_sd_net_config; - - num_blks = (netc->sn_cpages * (int)netc->sn_psize)/BLK_SIZE(1); - - prev_entry_dm = 0; - first_entry_dm = 0; - for (i = 0; i < num_blks; i++, blk++) { - centry = _sd_cctl[(blk/_sd_cctl_groupsz)] + - (blk%_sd_cctl_groupsz); - centry->cc_sync = &_sd_ccent_sync[blk % _sd_ccsync_cnt]; - centry->cc_next = centry->cc_prev = NULL; - centry->cc_dirty_next = centry->cc_dirty_link = NULL; - centry->cc_await_use = centry->cc_await_page = 0; - centry->cc_inuse = centry->cc_pageio = 0; - centry->cc_flag = 0; - centry->cc_iocount = 0; - centry->cc_valid = 0; - - if (!first_entry_dm) - first_entry_dm = centry; - if (prev_entry_dm) - prev_entry_dm->cc_link_list_dm = centry; - prev_entry_dm = centry; - centry->cc_link_list_dm = first_entry_dm; - centry->cc_data = 0; - centry->cc_write = NULL; - centry->cc_dirty = 0; - - { - _sd_queue_t *q; - if (sdbc_use_dmchain) { - q = &sdbc_dm_queues[0]; - centry->cc_cblocks = 0; - } else - q = _SD_LRU_Q; - - _sd_ins_queue(q, centry); - } - - } - - if (_sdbc_gl_centry_configure(kstatus) != 0) - return (-1); - - if (_sdbc_gl_file_configure(kstatus) != 0) - return (-1); - - return (0); -} - -/* - * _sdbc_gl_file_configure() - * allocate and initialize space for the global filename data. - * - */ -static int -_sdbc_gl_file_configure(spcs_s_info_t kstatus) -{ - ss_voldata_t *fileinfo; - ss_voldata_t tempfinfo; - ss_vdir_t vdir; - ss_vdirkey_t key; - int err = 0; - - _sdbc_gl_file_info_size = safestore_config.ssc_maxfiles * - sizeof (ss_voldata_t); - - if ((_sdbc_gl_file_info = kmem_zalloc(_sdbc_gl_file_info_size, - KM_NOSLEEP)) == NULL) { - spcs_s_add(kstatus, SDBC_ENOSFNV); - return (-1); - } - - /* setup the key to get a directory stream of all volumes */ - key.vk_type = CDIR_ALL; - - fileinfo = _sdbc_gl_file_info; - - /* - * if coming up after a crash, "refresh" the host - * memory copy from safestore. - */ - if (_sdbc_warm_start()) { - - if (SSOP_GETVDIR(sdbc_safestore, &key, &vdir)) { - cmn_err(CE_WARN, "!sdbc(_sdbc_gl_file_configure): " - "cannot read safestore"); - return (-1); - } - - - /* - * cycle through the vdir getting volume data - * and volume tokens - */ - - while ((err = SSOP_GETVDIRENT(sdbc_safestore, &vdir, fileinfo)) - == SS_OK) { - ++fileinfo; - } - - if (err != SS_EOF) { - /* - * fail to configure since - * recovery is not possible. - */ - spcs_s_add(kstatus, SDBC_ENOREFRESH); - return (-1); - } - - } else { /* normal initialization, not a warm start */ - - /* - * if this fails, continue: cache will start - * in writethru mode - */ - - if (SSOP_GETVDIR(sdbc_safestore, &key, &vdir)) { - cmn_err(CE_WARN, "!sdbc(_sdbc_gl_file_configure): " - "cannot read safestore"); - return (-1); - } - - /* - * cycle through the vdir getting just the volume tokens - * and initializing volume entries - */ - - while ((err = SSOP_GETVDIRENT(sdbc_safestore, &vdir, - &tempfinfo)) == 0) { - /* - * initialize the host memory copy of the - * global file region. this means setting the - * _pinned and _attached fields to _SD_NO_HOST - * because the default of zero conflicts with - * the min nodeid of zero. - */ - fileinfo->sv_vol = tempfinfo.sv_vol; - fileinfo->sv_pinned = _SD_NO_HOST; - fileinfo->sv_attached = _SD_NO_HOST; - fileinfo->sv_cd = _SD_NO_CD; - - /* initialize the directory entry */ - if ((err = SSOP_SETVOL(sdbc_safestore, fileinfo)) - == SS_ERR) { - cmn_err(CE_WARN, - "!sdbc(_sdbc_gl_file_configure): " - "volume entry write failure %p", - (void *)fileinfo->sv_vol); - break; - } - - ++fileinfo; - } - - /* coming up clean, continue in w-t mode */ - if (err != SS_EOF) - cmn_err(CE_WARN, "!sdbc(_sdbc_gl_file_configure) " - "unable to init safe store volinfo"); - } - - return (0); -} - -static void -_sdbc_gl_centry_deconfigure(void) -{ - if (_sdbc_gl_centry_info) - kmem_free(_sdbc_gl_centry_info, _sdbc_gl_centry_info_size); - - _sdbc_gl_centry_info = NULL; - _sdbc_gl_centry_info_size = 0; -} - -static int -_sdbc_gl_centry_configure(spcs_s_info_t kstatus) -{ - - int wblocks; - ss_centry_info_t *cinfo; - ss_cdirkey_t key; - ss_cdir_t cdir; - int err = 0; - - - wblocks = safestore_config.ssc_wsize / BLK_SIZE(1); - _sdbc_gl_centry_info_size = sizeof (ss_centry_info_t) * wblocks; - - if ((_sdbc_gl_centry_info = kmem_zalloc(_sdbc_gl_centry_info_size, - KM_NOSLEEP)) == NULL) { - cmn_err(CE_WARN, "!sdbc(_sdbc_gl_centry_configure) " - "alloc failed for gl_centry_info region"); - - _sdbc_gl_centry_deconfigure(); - return (-1); - } - - /* - * synchronize the centry info area with safe store - */ - - /* setup the key to get a directory stream of all centrys */ - key.ck_type = CDIR_ALL; - - cinfo = _sdbc_gl_centry_info; - - if (_sdbc_warm_start()) { - - if (SSOP_GETCDIR(sdbc_safestore, &key, &cdir)) { - cmn_err(CE_WARN, "!sdbc(_sdbc_gl_centry_configure): " - "cannot read safestore"); - return (-1); - } - - - /* - * cycle through the cdir getting resource - * tokens and reading centrys - */ - - while ((err = SSOP_GETCDIRENT(sdbc_safestore, &cdir, cinfo)) - == 0) { - ++cinfo; - } - - if (err != SS_EOF) { - /* - * fail to configure since - * recovery is not possible. - */ - _sdbc_gl_centry_deconfigure(); - spcs_s_add(kstatus, SDBC_EGLDMAFAIL); - return (-1); - } - - } else { - - if (SSOP_GETCDIR(sdbc_safestore, &key, &cdir)) { - cmn_err(CE_WARN, "!sdbc(_sdbc_gl_centry_configure): " - "cannot read safestore"); - return (-1); - } - - /* - * cycle through the cdir getting resource - * tokens and initializing centrys - */ - - while ((err = SSOP_GETCDIRENT(sdbc_safestore, &cdir, cinfo)) - == 0) { - cinfo->sc_cd = -1; - cinfo->sc_fpos = -1; - - if ((err = SSOP_SETCENTRY(sdbc_safestore, cinfo)) - == SS_ERR) { - cmn_err(CE_WARN, - "!sdbc(_sdbc_gl_centry_configure): " - "cache entry write failure %p", - (void *)cinfo->sc_res); - break; - } - - ++cinfo; - } - - /* coming up clean, continue in w-t mode */ - if (err != SS_EOF) { - cmn_err(CE_WARN, "!sdbc(sdbc_gl_centry_configure) " - "_sdbc_gl_centry_info initialization failed"); - } - } - - return (0); -} - - -static void -_sdbc_gl_file_deconfigure(void) -{ - - if (_sdbc_gl_file_info) - kmem_free(_sdbc_gl_file_info, _sdbc_gl_file_info_size); - - _sdbc_gl_file_info = NULL; - - _sdbc_gl_file_info_size = 0; -} - - -/* - * _sdbc_mem_deconfigure - deconfigure the cache memory. - * Release any memory/locks/sv's acquired during _sdbc_mem_configure. - * - * ARGUMENTS: - * cblocks - Number of cache blocks. - * - */ -/* ARGSUSED */ -static void -_sdbc_mem_deconfigure(int cblocks) -{ - int i; - - if (_sd_ccent_sync) { - for (i = 0; i < _sd_ccsync_cnt; i++) { - mutex_destroy(&_sd_ccent_sync[i]._cc_lock); - cv_destroy(&_sd_ccent_sync[i]._cc_blkcv); - } - nsc_kmem_free(_sd_ccent_sync, - _sd_ccsync_cnt * sizeof (_sd_cctl_sync_t)); - } - _sd_ccent_sync = NULL; - - for (i = 0; i < _SD_CCTL_GROUPS; i++) { - if (_sd_cctl[i] != NULL) { - nsc_kmem_free(_sd_cctl[i], - _sd_cctl_groupsz * sizeof (_sd_cctl_t)); - _sd_cctl[i] = NULL; - } - } - _sd_cctl_groupsz = 0; - - _sdbc_hash_deconfigure(_sd_htable); - _sd_htable = NULL; - -} - - -#if defined(_SD_DEBUG) || defined(DEBUG) -static int -_sd_cctl_valid(_sd_cctl_t *addr) -{ - _sd_cctl_t *end; - int i, valid; - - valid = 0; - for (i = 0; i < _SD_CCTL_GROUPS; i++) { - end = _sd_cctl[i] + _sd_cctl_groupsz; - if (addr >= _sd_cctl[i] && addr < end) { - valid = 1; - break; - } - } - - return (valid); -} -#endif - - -/* - * _sd_ins_queue - insert centry into LRU queue - * (during initialization, locking not required) - */ -static void -_sd_ins_queue(_sd_queue_t *q, _sd_cctl_t *centry) -{ - _sd_cctl_t *q_head; - - ASSERT(_sd_cctl_valid(centry)); - - q_head = &q->sq_qhead; - centry->cc_prev = q_head; - centry->cc_next = q_head->cc_next; - q_head->cc_next->cc_prev = centry; - q_head->cc_next = centry; - q->sq_inq++; - - ASSERT(GOOD_LRUSIZE(q)); -} - - - -void -_sd_requeue(_sd_cctl_t *centry) -{ - _sd_queue_t *q = _SD_LRU_Q; - - /* was FAST */ - mutex_enter(&q->sq_qlock); -#if defined(_SD_DEBUG) - if (1) { - _sd_cctl_t *cp, *cn, *qp; - cp = centry->cc_prev; - cn = centry->cc_next; - qp = (q->sq_qhead).cc_prev; - if (!_sd_cctl_valid(centry) || - (cp != &(q->sq_qhead) && !_sd_cctl_valid(cp)) || - (cn != &(q->sq_qhead) && !_sd_cctl_valid(cn)) || - !_sd_cctl_valid(qp)) - cmn_err(CE_PANIC, - "_sd_requeue %x prev %x next %x qp %x", - centry, cp, cn, qp); - } -#endif - centry->cc_prev->cc_next = centry->cc_next; - centry->cc_next->cc_prev = centry->cc_prev; - centry->cc_next = &(q->sq_qhead); - centry->cc_prev = q->sq_qhead.cc_prev; - q->sq_qhead.cc_prev->cc_next = centry; - q->sq_qhead.cc_prev = centry; - centry->cc_seq = q->sq_seq++; - /* was FAST */ - mutex_exit(&q->sq_qlock); - (q->sq_req_stat)++; - -} - -void -_sd_requeue_head(_sd_cctl_t *centry) -{ - _sd_queue_t *q = _SD_LRU_Q; - - /* was FAST */ - mutex_enter(&q->sq_qlock); -#if defined(_SD_DEBUG) - if (1) { - _sd_cctl_t *cp, *cn, *qn; - cp = centry->cc_prev; - cn = centry->cc_next; - qn = (q->sq_qhead).cc_prev; - if (!_sd_cctl_valid(centry) || - (cp != &(q->sq_qhead) && !_sd_cctl_valid(cp)) || - (cn != &(q->sq_qhead) && !_sd_cctl_valid(cn)) || - !_sd_cctl_valid(qn)) - cmn_err(CE_PANIC, - "_sd_requeue_head %x prev %x next %x qn %x", - centry, cp, cn, qn); - } -#endif - centry->cc_prev->cc_next = centry->cc_next; - centry->cc_next->cc_prev = centry->cc_prev; - centry->cc_prev = &(q->sq_qhead); - centry->cc_next = q->sq_qhead.cc_next; - q->sq_qhead.cc_next->cc_prev = centry; - q->sq_qhead.cc_next = centry; - centry->cc_seq = q->sq_seq++; - centry->cc_flag &= ~CC_QHEAD; - /* was FAST */ - mutex_exit(&q->sq_qlock); -} - - - -/* - * _sd_open - Open a file. - * - * ARGUMENTS: - * filename - Name of the file to be opened. - * flag - Flag associated with open. - * (currently used to determine a ckd device) - * RETURNS: - * cd - the cache descriptor. - */ - -int -_sd_open(char *filename, int flag) -{ - int cd; - - if (!_sd_cache_initialized) { - cmn_err(CE_WARN, "!sdbc(_sd_open) cache not initialized"); - return (-EINVAL); - } - cd = _sd_open_cd(filename, -1, flag); - SDTRACE(SDF_OPEN, (cd < 0) ? SDT_INV_CD : cd, 0, SDT_INV_BL, 0, cd); - - return (cd); -} - - -static int -_sd_open_io(char *filename, int flag, blind_t *cdp, nsc_iodev_t *iodev) -{ - _sd_cd_info_t *cdi; - int cd; - int rc = 0; - - if ((cd = _sd_open(filename, flag)) >= 0) { - - cdi = &(_sd_cache_files[cd]); - cdi->cd_iodev = iodev; - nsc_set_owner(cdi->cd_rawfd, cdi->cd_iodev); - - *cdp = (blind_t)(unsigned long)cd; - } else - rc = -cd; - - return (rc); -} - - - -int -_sd_open_cd(char *filename, const int cd, const int flag) -{ - int new_cd, rc = 0, alloc_cd = -1; - ss_voldata_t *cdg; - int preexists = 0; - _sd_cd_info_t *cdi; - int failover_open, open_failed; - major_t devmaj; - minor_t devmin; - - if (_sdbc_shutdown_in_progress) - return (-EIO); - - if (strlen(filename) > (NSC_MAXPATH-1)) - return (-ENAMETOOLONG); - - /* - * If the cd is >= 0, then this is a open for a specific cd. - * This happens when the mirror node crashes, and we attempt to - * reopen the files with the same cache descriptors as existed on - * the other node - */ - -retry_open: - failover_open = 0; - open_failed = 0; - if (cd >= 0) { - failover_open++; - cdi = &(_sd_cache_files[cd]); - mutex_enter(&_sd_cache_lock); - if (cdi->cd_info == NULL) - cdi->cd_info = &_sd_cache_stats->st_shared[cd]; - else if (cdi->cd_info->sh_alloc && - strcmp(cdi->cd_info->sh_filename, filename)) { - cmn_err(CE_WARN, "!sdbc(_sd_open_cd) cd %d mismatch", - cd); - mutex_exit(&_sd_cache_lock); - return (-EEXIST); - } - - if (cdi->cd_info->sh_failed != 2) { - if (cdi->cd_info->sh_alloc != 0) - preexists = 1; - else { - cdi->cd_info->sh_alloc = CD_ALLOC_IN_PROGRESS; - (void) strcpy(cdi->cd_info->sh_filename, - filename); - if (_sd_cache_stats->st_count < sdbc_max_devs) - _sd_cache_stats->st_count++; - } - } - - mutex_exit(&_sd_cache_lock); - alloc_cd = cd; - - goto known_cd; - } - - new_cd = 0; - mutex_enter(&_sd_cache_lock); - - for (cdi = &(_sd_cache_files[new_cd]), - cdg = _sdbc_gl_file_info + new_cd; - new_cd < (sdbc_max_devs); new_cd++, cdi++, cdg++) { - if (strlen(cdg->sv_volname) != 0) - if (strcmp(cdg->sv_volname, filename)) - continue; - - if (cdi->cd_info == NULL) - cdi->cd_info = &_sd_cache_stats->st_shared[new_cd]; - - if (cdi->cd_info->sh_failed != 2) { - if (cdi->cd_info->sh_alloc != 0) - preexists = 1; - else { - if (cd == -2) { - mutex_exit(&_sd_cache_lock); - return (-1); - } - cdi->cd_info->sh_alloc = CD_ALLOC_IN_PROGRESS; - (void) strcpy(cdi->cd_info->sh_filename, - filename); - (void) strcpy(cdg->sv_volname, filename); - - cdg->sv_cd = new_cd; - /* update safestore */ - SSOP_SETVOL(sdbc_safestore, cdg); - if (_sd_cache_stats->st_count < sdbc_max_devs) - _sd_cache_stats->st_count++; - cdi->cd_flag = 0; - } - } - alloc_cd = new_cd; - break; - } - - mutex_exit(&_sd_cache_lock); - - if (alloc_cd == -1) - return (-ENOSPC); - -known_cd: - /* - * If preexists: someone else is attempting to open this file as - * well. Do only one open, but block everyone else here till the - * open is completed. - */ - if (preexists) { - while (cdi->cd_info->sh_alloc == CD_ALLOC_IN_PROGRESS) { - delay(drv_usectohz(20000)); - } - if ((cdi->cd_info->sh_alloc != CD_ALLOCATED)) - goto retry_open; - return (alloc_cd); - } - - if (!(cdi->cd_rawfd = - nsc_open(filename, NSC_SDBC_ID|NSC_DEVICE, _sdbc_fd_def, - (blind_t)(unsigned long)alloc_cd, &rc)) || - !nsc_getval(cdi->cd_rawfd, "DevMaj", (int *)&devmaj) || - !nsc_getval(cdi->cd_rawfd, "DevMin", (int *)&devmin)) { - if (cdi->cd_rawfd) { - (void) nsc_close(cdi->cd_rawfd); - cdi->cd_rawfd = NULL; - } - /* - * take into account that there may be pinned data on a - * device that can no longer be opened - */ - open_failed++; - if (!(cdi->cd_info->sh_failed) && !failover_open) { - cdi->cd_info->sh_alloc = 0; - mutex_enter(&_sd_cache_lock); - _sd_cache_stats->st_count--; - mutex_exit(&_sd_cache_lock); - if (!rc) - rc = EIO; - return (-rc); - } - } - - cdi->cd_strategy = nsc_get_strategy(devmaj); - cdi->cd_crdev = makedevice(devmaj, devmin); - cdi->cd_desc = alloc_cd; - cdi->cd_dirty_head = cdi->cd_dirty_tail = NULL; - cdi->cd_io_head = cdi->cd_io_tail = NULL; - cdi->cd_hint = 0; -#ifdef DEBUG - /* put the dev_t in the ioerr_inject_table */ - _sdbc_ioj_set_dev(alloc_cd, cdi->cd_crdev); -#endif - - cdi->cd_global = (_sdbc_gl_file_info + alloc_cd); - if (open_failed) { - cdi->cd_info->sh_failed = 2; - } else if (cdi->cd_info->sh_failed != 2) - if ((cdi->cd_global->sv_pinned == _SD_SELF_HOST) && - !failover_open) - cdi->cd_info->sh_failed = 1; - else - cdi->cd_info->sh_failed = 0; - - cdi->cd_flag |= flag; - mutex_init(&cdi->cd_lock, NULL, MUTEX_DRIVER, NULL); - -#ifndef _SD_NOTRACE - (void) _sdbc_tr_configure(alloc_cd); -#endif - cdi->cd_info->sh_alloc = CD_ALLOCATED; - cdi->cd_global = (_sdbc_gl_file_info + alloc_cd); - cdi->cd_info->sh_cd = (unsigned short) alloc_cd; - mutex_enter(&_sd_cache_lock); - _sd_cache_stats->st_loc_count++; - mutex_exit(&_sd_cache_lock); - - if (cd_kstat_add(alloc_cd) < 0) { - cmn_err(CE_WARN, "!Could not create kstats for cache descriptor" - " %d", alloc_cd); - } - - - return (open_failed ? -EIO : alloc_cd); -} - - -/* - * _sd_close - Close a cache descriptor. - * - * ARGUMENTS: - * cd - the cache descriptor to be closed. - * RETURNS: - * 0 on success. - * Error otherwise. - * - * Note: Under Construction. - */ - -int -_sd_close(int cd) -{ - int rc; - _sd_cd_info_t *cdi = &(_sd_cache_files[cd]); - - if (!FILE_OPENED(cd)) { - rc = EINVAL; - goto out; - } - - SDTRACE(ST_ENTER|SDF_CLOSE, cd, 0, SDT_INV_BL, 0, 0); - - mutex_enter(&_sd_cache_lock); - if ((cdi->cd_info->sh_alloc == 0) || - (cdi->cd_info->sh_alloc & CD_CLOSE_IN_PROGRESS)) { - mutex_exit(&_sd_cache_lock); - SDTRACE(ST_EXIT|SDF_CLOSE, cd, 0, SDT_INV_BL, 0, EINVAL); - rc = EINVAL; - goto out; - } - cdi->cd_info->sh_alloc |= CD_CLOSE_IN_PROGRESS; - mutex_exit(&_sd_cache_lock); - - /* - * _sd_flush_cd() will return -1 for the case where pinned - * data is present, but has been transfered to the mirror - * node. In this case it is safe to close the device as - * though _sd_flush_cd() had returned 0. - */ - - rc = _sd_flush_cd(cd); - if (rc == -1) - rc = 0; - - if (rc != 0) { - mutex_enter(&_sd_cache_lock); - if ((rc == EAGAIN) && - (cdi->cd_global->sv_pinned == _SD_NO_HOST)) { - cdi->cd_global->sv_pinned = _SD_SELF_HOST; - SSOP_SETVOL(sdbc_safestore, cdi->cd_global); - } - - cdi->cd_info->sh_alloc &= ~CD_CLOSE_IN_PROGRESS; - mutex_exit(&_sd_cache_lock); - SDTRACE(ST_EXIT|SDF_CLOSE, cd, 0, SDT_INV_BL, - _SD_CD_WBLK_USED(cd), rc); - goto out; - } - - rc = nsc_close(cdi->cd_rawfd); - if (rc) { - mutex_enter(&_sd_cache_lock); - cdi->cd_info->sh_alloc &= ~CD_CLOSE_IN_PROGRESS; - mutex_exit(&_sd_cache_lock); - SDTRACE(ST_EXIT|SDF_CLOSE, cd, 0, SDT_INV_BL, 0, rc); - goto out; - } - mutex_enter(&_sd_cache_lock); - _sd_cache_stats->st_loc_count--; - mutex_exit(&_sd_cache_lock); - - if (cd_kstat_remove(cd) < 0) { - cmn_err(CE_WARN, "!Could not remove kstat for cache descriptor " - "%d", cd); - } - - cdi->cd_info->sh_alloc = 0; - cdi->cd_info->sh_failed = 0; - /* cdi->cd_info = NULL; */ - cdi->cd_flag = 0; - SDTRACE(ST_EXIT|SDF_CLOSE, cd, 0, SDT_INV_BL, 0, NSC_DONE); - rc = NSC_DONE; - goto out; - -out: - return (rc); -} - - -static int -_sd_close_io(blind_t xcd) -{ - _sd_cd_info_t *cdi; - int cd = (int)(unsigned long)xcd; - int rc = 0; - - if ((rc = _sd_close((int)cd)) == NSC_DONE) { - cdi = &(_sd_cache_files[cd]); - cdi->cd_iodev = NULL; - } - - return (rc); -} - - -/* - * _sdbc_remote_store_pinned - reflect pinned/failed blocks for cd - * to our remote mirror. Returns count of blocks reflected or -1 on error. - * - */ -int -_sdbc_remote_store_pinned(int cd) -{ - int cnt = 0; - _sd_cd_info_t *cdi = &(_sd_cache_files[cd]); - _sd_cctl_t *cc_ent, *cc_list; - - ASSERT(cd >= 0); - if (cdi->cd_info->sh_failed) { - - if (cdi->cd_global->sv_pinned == _SD_NO_HOST) { - cdi->cd_global->sv_pinned = _SD_SELF_HOST; - SSOP_SETVOL(sdbc_safestore, cdi->cd_global); - } - - mutex_enter(&cdi->cd_lock); - cc_ent = cc_list = cdi->cd_fail_head; - while (cc_ent) { - cnt++; - - /* is this always necessary? jgk */ - - if (SSOP_WRITE_CBLOCK(sdbc_safestore, - cc_ent->cc_write->sc_res, cc_ent->cc_data, - CACHE_BLOCK_SIZE, 0)) { - mutex_exit(&cdi->cd_lock); - return (-1); - } - - /* update the cache block metadata */ - CENTRY_SET_FTPOS(cc_ent); - cc_ent->cc_write->sc_flag = cc_ent->cc_flag; - - cc_ent->cc_write->sc_dirty = CENTRY_DIRTY(cc_ent); - - SSOP_SETCENTRY(sdbc_safestore, cc_ent->cc_write); - - cc_ent = cc_ent->cc_dirty_next; - if (!cc_ent) - cc_ent = cc_list = cc_list->cc_dirty_link; - } - mutex_exit(&cdi->cd_lock); - } - - return (cnt); -} - -/* - * _sd_flush_cd() - * reflect pinned blocks to mirrored node - * wait for dirty blocks to be flushed - * returns: - * EIO I/O failure, or pinned blocks and no mirror - * EAGAIN Hang: count of outstanding writes isn't decreasing - * -1 pinned blocks, reflected to mirror - * 0 success - */ -static int -_sd_flush_cd(int cd) -{ - int rc; - - if ((rc = _sd_wait_for_flush(cd)) == 0) - return (0); - - /* - * if we timed out simply return otherwise - * it must be an i/o type of error - */ - if (rc == EAGAIN) - return (rc); - - if (_sd_is_mirror_down()) - return (EIO); /* already failed, no mirror */ - - /* flush any pinned/failed blocks to mirror */ - if (_sdbc_remote_store_pinned(cd) >= 0) - /* - * At this point it looks like we have blocks on the - * failed list and taking up space on this node but - * no longer have responsibility for the blocks. - * These blocks will in fact be freed from the cache - * and the failed list when the mirror picks them up - * from safe storage and then calls _sd_cd_discard_mirror - * which will issue an rpc telling us to finish up. - * - * Should the other node die before sending the rpc then - * we are safe with these blocks simply waiting on the - * failed list. - */ - return (-1); - else - return (rc); -} - -/* - * _sdbc_io_attach_cd -- set up for client access to device, reserve raw device - * - * ARGUMENTS: - * cd - the cache descriptor to attach. - * - * RETURNS: - * 0 on success. - * Error otherwise. - */ -int -_sdbc_io_attach_cd(blind_t xcd) -{ - int rc = 0; - _sd_cd_info_t *cdi; - int cd = (int)(unsigned long)xcd; - - SDTRACE(ST_ENTER|SDF_ATTACH, cd, 0, SDT_INV_BL, 0, 0); - if (!_sd_cache_initialized || - _sdbc_shutdown_in_progress || - !FILE_OPENED(cd)) { - SDTRACE(ST_EXIT|SDF_ATTACH, cd, 0, SDT_INV_BL, 0, EINVAL); - - DTRACE_PROBE(_sdbc_io_attach_cd_end1); - - return (EINVAL); - } - cdi = &(_sd_cache_files[cd]); - - /* - * check if disk is failed without raw device open. If it is, - * it has to be recovered using _sd_disk_online - */ - - if (cdi->cd_global->sv_pinned == _SD_SELF_HOST) { - _sd_print(3, - "_sdbc_io_attach_cd: pinned data. returning EINVAL"); - - DTRACE_PROBE(_sdbc_io_attach_cd_end2); - - return (EINVAL); - } - - if ((cdi->cd_info == NULL) || (cdi->cd_info->sh_failed)) { - DTRACE_PROBE1(_sdbc_io_attach_cd_end3, - struct _sd_shared *, cdi->cd_info); - - return (EINVAL); - } - -#if defined(_SD_FAULT_RES) - /* wait for node recovery to finish */ - if (_sd_node_recovery) - (void) _sd_recovery_wait(); -#endif - - /* this will provoke a sdbc_fd_attach_cd call .. */ - - rc = nsc_reserve(cdi->cd_rawfd, NSC_MULTI); - SDTRACE(ST_EXIT|SDF_ATTACH, cd, 0, SDT_INV_BL, 0, rc); - - return (rc); -} - -/* - * sdbc_fd_attach_cd -- setup cache for access to raw device underlying cd. - * This is provoked by some piece of sdbc doing a reserve on the raw device. - * - * ARGUMENTS: - * cd - the cache descriptor to attach. - * - * RETURNS: - * 0 on success. - * Error otherwise. - */ -static int -sdbc_fd_attach_cd(blind_t xcd) -{ - int rc = 0; - int cd = (int)(unsigned long)xcd; - _sd_cd_info_t *cdi; - - if (!_sd_cache_initialized || !FILE_OPENED(cd)) { - SDTRACE(ST_INFO|SDF_ATTACH, cd, 0, SDT_INV_BL, 0, EINVAL); - - DTRACE_PROBE(sdbc_fd_attach_cd_end1); - - return (EINVAL); - } - cdi = &(_sd_cache_files[cd]); - -#if defined(_SD_FAULT_RES) - /* retrieve pinned/failed data */ - if (!_sd_node_recovery) { - (void) _sd_repin_cd(cd); - } -#endif - - rc = nsc_partsize(cdi->cd_rawfd, &cdi->cd_info->sh_filesize); - if (rc != 0) { - SDTRACE(ST_INFO|SDF_ATTACH, cd, 0, SDT_INV_BL, 0, rc); - - DTRACE_PROBE(sdbc_fd_attach_cd_end3); - - return (rc); - } - - cdi->cd_global->sv_attached = _SD_SELF_HOST; - - SSOP_SETVOL(sdbc_safestore, cdi->cd_global); - - mutex_enter(&_sd_cache_lock); - cdi->cd_info->sh_flag |= CD_ATTACHED; - mutex_exit(&_sd_cache_lock); - - return (0); -} - -/* - * _sdbc_io_detach_cd -- release raw device - * Called when a cache client is being detached from this cd. - * - * ARGUMENTS: - * cd - the cache descriptor to detach. - * RETURNS: - * 0 on success. - * Error otherwise. - */ -int -_sdbc_io_detach_cd(blind_t xcd) -{ - int cd = (int)(unsigned long)xcd; - _sd_cd_info_t *cdi; - - - SDTRACE(ST_ENTER|SDF_DETACH, cd, 0, SDT_INV_BL, 0, 0); - if (!_sd_cache_initialized || !FILE_OPENED(cd)) { - SDTRACE(ST_EXIT|SDF_DETACH, cd, 0, SDT_INV_BL, 0, EINVAL); - - DTRACE_PROBE(_sdbc_io_detach_cd_end1); - - return (EINVAL); - } - -#if defined(_SD_FAULT_RES) - if (_sd_node_recovery) - (void) _sd_recovery_wait(); -#endif - /* relinquish responsibility for device */ - cdi = &(_sd_cache_files[cd]); - if (!(cdi->cd_rawfd) || !nsc_held(cdi->cd_rawfd)) { - cmn_err(CE_WARN, "!sdbc(_sdbc_detach_cd)(%d) not attached", cd); - SDTRACE(ST_EXIT|SDF_DETACH, cd, 0, SDT_INV_BL, 0, EPROTO); - DTRACE_PROBE1(_sdbc_io_detach_cd_end2, - nsc_fd_t *, cdi->cd_rawfd); - - return (EPROTO); - } - /* this will provoke/allow a call to sdbc_fd_detach_cd */ - nsc_release(cdi->cd_rawfd); - - SDTRACE(ST_EXIT|SDF_DETACH, cd, 0, SDT_INV_BL, 0, 0); - - return (0); -} - -/* - * _sdbc_detach_cd -- flush dirty writes to disk, release raw device - * Called when raw device is being detached from this cd. - * - * ARGUMENTS: - * cd - the cache descriptor to detach. - * rd_only - non-zero if detach is for read access. - * RETURNS: - * 0 on success. - * Error otherwise. - */ -static int -sdbc_detach_cd(blind_t xcd, int rd_only) -{ - int rc; - int cd = (int)(unsigned long)xcd; - _sd_cd_info_t *cdi; - - SDTRACE(ST_INFO|SDF_DETACH, cd, 0, SDT_INV_BL, 0, 0); - - if (!_sd_cache_initialized || !FILE_OPENED(cd)) { - SDTRACE(ST_INFO|SDF_DETACH, cd, 0, SDT_INV_BL, 0, EINVAL); - - DTRACE_PROBE(sdbc_detach_cd_end1); - - return (EINVAL); - } - - - rc = _sd_flush_cd(cd); - if (rc > 0) { - SDTRACE(ST_INFO|SDF_DETACH, cd, 0, SDT_INV_BL, 0, rc); - - DTRACE_PROBE(sdbc_detach_cd_end2); - - return (rc); - } - - if (!rd_only) { - _sd_hash_invalidate_cd(cd); - cdi = &(_sd_cache_files[cd]); - - if (cdi->cd_global->sv_attached == _SD_SELF_HOST) { - cdi->cd_global->sv_attached = _SD_NO_HOST; - SSOP_SETVOL(sdbc_safestore, cdi->cd_global); - } else { - cmn_err(CE_WARN, - "!sdbc(_sdbc_detach_cd) (%d) attached by node %d", - cd, cdi->cd_global->sv_attached); - SDTRACE(SDF_DETACH, cd, 0, SDT_INV_BL, 0, EPROTO); - - DTRACE_PROBE1(sdbc_detach_cd_end3, - int, cdi->cd_global->sv_attached); - - return (EPROTO); - } - - mutex_enter(&_sd_cache_lock); - cdi->cd_info->sh_flag &= ~CD_ATTACHED; - mutex_exit(&_sd_cache_lock); - } - - SDTRACE(ST_INFO|SDF_DETACH, cd, 0, SDT_INV_BL, 0, 0); - - return (0); -} - -/* - * _sdbc_fd_detach_cd -- flush dirty writes to disk, release raw device - * Called when raw device is being detached from this cd. - * - * ARGUMENTS: - * xcd - the cache descriptor to detach. - * RETURNS: - * 0 on success. - * Error otherwise. - */ -static int -sdbc_fd_detach_cd(blind_t xcd) -{ - return (sdbc_detach_cd(xcd, 0)); -} - -/* - * sdbc_fd_flush_cd - raw device "xcd" is being detached and needs - * flushing. We only need to flush we don't need to hash invalidate - * this file. - */ -static int -sdbc_fd_flush_cd(blind_t xcd) -{ - return (sdbc_detach_cd(xcd, 1)); -} - -/* - * _sd_get_pinned - re-issue PINNED callbacks for cache device - * - * ARGUMENTS: - * cd - the cache descriptor to reissue pinned calbacks from. - * RETURNS: - * 0 on success. - * Error otherwise. - */ -int -_sd_get_pinned(blind_t xcd) -{ - _sd_cd_info_t *cdi; - _sd_cctl_t *cc_list, *cc_ent; - int cd = (int)(unsigned long)xcd; - - cdi = &_sd_cache_files[cd]; - - if (cd < 0 || cd >= sdbc_max_devs) { - DTRACE_PROBE(_sd_get_pinned_end1); - return (EINVAL); - } - - if (!FILE_OPENED(cd)) { - DTRACE_PROBE(_sd_get_pinned_end2); - return (0); - } - - mutex_enter(&cdi->cd_lock); - - if (!cdi->cd_info->sh_failed) { - mutex_exit(&cdi->cd_lock); - - DTRACE_PROBE(_sd_get_pinned_end3); - return (0); - } - - cc_ent = cc_list = cdi->cd_fail_head; - while (cc_ent) { - if (CENTRY_PINNED(cc_ent)) - nsc_pinned_data(cdi->cd_iodev, - BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)), BLK_FBAS); - cc_ent = cc_ent->cc_dirty_next; - if (!cc_ent) - cc_ent = cc_list = cc_list->cc_dirty_link; - } - - mutex_exit(&cdi->cd_lock); - - return (0); -} - -/* - * _sd_allocate_buf - allocate a vector of buffers for io. - * *This call has been replaced by _sd_alloc_buf* - */ - -_sd_buf_handle_t * -_sd_allocate_buf(int cd, nsc_off_t fba_pos, nsc_size_t fba_len, int flag, - int *sts) -{ - _sd_buf_handle_t *handle = NULL; - - *sts = _sd_alloc_buf((blind_t)(unsigned long)cd, fba_pos, fba_len, - flag, &handle); - if (*sts == NSC_HIT) - *sts = NSC_DONE; - return (handle); -} - - -/* - * _sd_prefetch_buf - _sd_alloc_buf w/flag = NSC_RDAHEAD|NSC_RDBUF - * no 'bufvec' (data is not read by caller) - * skip leading valid or busy entries (data available sooner) - * truncate on busy block (to avoid deadlock) - * release trailing valid entries, adjust length before starting I/O. - */ -static int -_sd_prefetch_buf(int cd, nsc_off_t fba_pos, nsc_size_t fba_len, int flag, - _sd_buf_handle_t *handle, int locked) -{ - _sd_cd_info_t *cdi; - nsc_off_t cblk; /* position of temp cache block */ - sdbc_cblk_fba_t st_cblk_len; /* FBA len of starting cache block */ - sdbc_cblk_fba_t end_cblk_len; /* FBA len of ending cache block */ - sdbc_cblk_fba_t st_cblk_off; /* FBA offset into starting cblock */ - nsc_off_t io_pos; /* offset in FBA's */ - nsc_size_t fba_orig_len; - int sts, stall; - _sd_cctl_t *centry = NULL; - _sd_cctl_t *lentry = NULL; - _sd_cctl_t *ioent = NULL; - _sd_cctl_t *last_ioent = NULL; - sdbc_allocbuf_t alloc_tok = {0}; - int this_entry_type = 0; - nsc_size_t request_blocks = 0; /* number of cache blocks required */ - int pageio; - - handle->bh_flag |= NSC_HACTIVE; - ASSERT(cd >= 0); - cdi = &_sd_cache_files[cd]; - - /* prefetch: truncate if req'd */ - if (fba_len > sdbc_max_fbas) - fba_len = sdbc_max_fbas; - if ((fba_pos + fba_len) > cdi->cd_info->sh_filesize) { - if (fba_pos >= cdi->cd_info->sh_filesize) { - sts = EIO; - goto done; - } - fba_len = cdi->cd_info->sh_filesize - fba_pos; - } - - fba_orig_len = fba_len; - - _SD_SETUP_HANDLE(handle, cd, fba_pos, fba_len, flag); - handle->bh_centry = NULL; - - cblk = FBA_TO_BLK_NUM(fba_pos); - st_cblk_off = BLK_FBA_OFF(fba_pos); - st_cblk_len = BLK_FBAS - st_cblk_off; - - /* - * count number of blocks on chain that is required - */ - if ((nsc_size_t)st_cblk_len >= fba_len) { - st_cblk_len = (sdbc_cblk_fba_t)fba_len; - end_cblk_len = 0; - } else { - end_cblk_len = BLK_FBA_OFF(fba_pos + fba_len); - } - - request_blocks = 1; /* at least one */ - - /* middle piece */ - request_blocks += (fba_len - (st_cblk_len + end_cblk_len)) >> - BLK_FBA_SHFT; - - if (end_cblk_len) - ++request_blocks; - - stall = 0; - do { - pageio = ((flag & NSC_PAGEIO) != 0 || sdbc_pageio_always != 0); -cget: - if (centry = (_sd_cctl_t *) - _sd_hash_search(cd, cblk, _sd_htable)) { -try: - /* prefetch: skip leading valid blocks */ - if ((ioent == NULL) && - SDBC_VALID_BITS(st_cblk_off, st_cblk_len, centry)) { -skip: - sdbc_prefetch_valid_cnt++; - --request_blocks; - lentry = centry; - centry = NULL; - cblk++; - fba_len -= st_cblk_len; - st_cblk_off = 0; - st_cblk_len = (sdbc_cblk_fba_t) - ((fba_len > (nsc_size_t)BLK_FBAS) ? - BLK_FBAS : fba_len); - continue; - } - - if (SET_CENTRY_INUSE(centry)) { - /* - * prefetch: skip leading busy - * or truncate at busy block - */ - if (ioent == NULL) - goto skip; - sdbc_prefetch_busy_cnt++; - fba_orig_len -= fba_len; - fba_len = 0; - centry = lentry; /* backup */ - break; - } - - /* - * bug 4529671 - * now that we own the centry make sure that - * it is still good. it could have been processed - * by _sd_dealloc_dm() in the window between - * _sd_hash_search() and SET_CENTRY_INUSE(). - */ - if ((_sd_cctl_t *) - _sd_hash_search(cd, cblk, _sd_htable) != centry) { - sdbc_prefetch_deallocd++; -#ifdef DEBUG - cmn_err(CE_WARN, - "!prefetch centry %p cd %d cblk %" NSC_SZFMT - " fba_len %" NSC_SZFMT " lost to dealloc?! " - "cc_data %p", - (void *)centry, cd, cblk, fba_orig_len, - (void *)centry->cc_data); -#endif - - CLEAR_CENTRY_INUSE(centry); - continue; - } - - if (CC_CD_BLK_MATCH(cd, cblk, centry)) { - /* - * Do pagelist io mutual exclusion - * before messing with the centry. - */ - if (pageio && SET_CENTRY_PAGEIO(centry)) { - /* flusher not done with pageio */ - /* - * prefetch: skip leading busy - * or truncate at busy block - */ - CLEAR_CENTRY_INUSE(centry); - if (ioent == NULL) - goto skip; - sdbc_prefetch_pageio1++; - fba_orig_len -= fba_len; - fba_len = 0; - centry = lentry; /* backup */ - break; - - } - - sdbc_prefetch_hit++; - this_entry_type = HASH_ENTRY_DM; - pageio = 0; - centry->cc_toflush = 0; - - centry->cc_hits++; - - /* this will reset the age flag */ - sdbc_centry_init_dm(centry); - - DTRACE_PROBE1(_sd_prefetch_buf, - _sd_cctl_t *, centry); - } else { - /* block mismatch */ - sdbc_prefetch_lost++; - - CLEAR_CENTRY_INUSE(centry); - continue; - } - } else { - centry = sdbc_centry_alloc(cd, cblk, request_blocks, - &stall, &alloc_tok, ALLOC_NOWAIT); - - if (centry == NULL) { - /* - * prefetch: cache is very busy. just do - * the i/o for the blocks already acquired, - * if any. - */ - fba_orig_len -= fba_len; - fba_len = 0; - /* - * if we have a chain of centry's - * then back up (set centry to lentry). - * if there is no chain (ioent == NULL) - * then centry remains NULL. this can occur - * if all previous centrys were hash hits - * on valid blocks that were processed in - * the skip logic above. - */ - if (ioent) - centry = lentry; /* backup */ - break; - } - - /* - * dmchaining adjustment. - * if centry was obtained from the dmchain - * then clear local pageio variable because the - * centry already has cc_pageio set. - */ - if (CENTRY_PAGEIO(centry)) - pageio = 0; - - DTRACE_PROBE1(_sd_alloc_buf, _sd_cctl_t *, centry); - - this_entry_type = ELIGIBLE_ENTRY_DM; - if (centry->cc_aging_dm & FOUND_IN_HASH_DM) - this_entry_type = HASH_ENTRY_DM; - else { - if (centry->cc_aging_dm & FOUND_HOLD_OVER_DM) - this_entry_type = HOLD_ENTRY_DM; - } - } - - centry->cc_chain = NULL; - - centry->cc_aging_dm &= ~(FOUND_IN_HASH_DM|FOUND_HOLD_OVER_DM); - - /* - * Do pagelist io mutual exclusion now if we did not do - * it above. - */ - - if (pageio && SET_CENTRY_PAGEIO(centry)) { - /* flusher not done with pageio */ - sdbc_prefetch_pageio2++; - - /* - * prefetch: skip leading busy - * or truncate at busy block - */ - CLEAR_CENTRY_INUSE(centry); - if (ioent == NULL) - goto skip; - sdbc_prefetch_busy_cnt++; - fba_orig_len -= fba_len; - fba_len = 0; - centry = lentry; /* backup */ - break; - } - - pageio = 0; - - fba_len -= st_cblk_len; - - if (ioent == NULL) { - if (!SDBC_VALID_BITS(st_cblk_off, st_cblk_len, - centry)) { - io_pos = BLK_TO_FBA_NUM(cblk) + st_cblk_off; - ioent = last_ioent = centry; - } else { - DATA_LOG(SDF_ALLOC, centry, st_cblk_off, - st_cblk_len); - DTRACE_PROBE4(_sd_prefetch_buf_data1, - uint64_t, (uint64_t)(BLK_TO_FBA_NUM(cblk) + - st_cblk_off), int, st_cblk_len, - char *, *(int64_t *)(centry->cc_data + - FBA_SIZE(st_cblk_off)), char *, - *(int64_t *)(centry->cc_data + - FBA_SIZE(st_cblk_off + st_cblk_len) - 8)); - } - - handle->bh_centry = centry; - st_cblk_off = 0; - st_cblk_len = (sdbc_cblk_fba_t) - ((fba_len > (nsc_size_t)BLK_FBAS) ? - BLK_FBAS : fba_len); - } else { - if (!SDBC_VALID_BITS(st_cblk_off, st_cblk_len, centry)) - last_ioent = centry; - else { - DTRACE_PROBE4(_sd_prefetch_buf_data2, - uint64_t, (uint64_t)(BLK_TO_FBA_NUM(cblk) + - st_cblk_off), int, st_cblk_len, - char *, *(int64_t *)(centry->cc_data + - FBA_SIZE(st_cblk_off)), char *, - *(int64_t *)(centry->cc_data + - FBA_SIZE(st_cblk_off + st_cblk_len) - 8)); - } - - lentry->cc_chain = centry; - if (fba_len < (nsc_size_t)BLK_FBAS) - st_cblk_len = (sdbc_cblk_fba_t)fba_len; - } - lentry = centry; - cblk++; - - /* if this block has a new identity clear prefetch history */ - if (this_entry_type != HASH_ENTRY_DM) - centry->cc_aging_dm &= - ~(PREFETCH_BUF_I | PREFETCH_BUF_E); - - centry->cc_aging_dm &= ~(ENTRY_FIELD_DM); - centry->cc_aging_dm |= this_entry_type | PREFETCH_BUF_E; - if (flag & NSC_METADATA) - centry->cc_aging_dm |= STICKY_METADATA_DM; - - --request_blocks; - } while (fba_len > 0); - - - if (locked) { - rw_exit(&sdbc_queue_lock); - locked = 0; - } - - sdbc_centry_alloc_end(&alloc_tok); - - if (centry) { - centry->cc_chain = NULL; - if (sts = _sd_setup_category_on_type(handle->bh_centry)) { - (void) _sd_free_buf(handle); - goto done; - } - - (void) _sd_setup_mem_chaining(handle->bh_centry, 0); - } - - - if (ioent) { - /* prefetch: trailing valid can be released, adjust len */ - if ((centry != last_ioent)) { - centry = last_ioent->cc_chain; - last_ioent->cc_chain = NULL; - while (centry) { - lentry = centry->cc_chain; - centry->cc_aging_dm &= ~PREFETCH_BUF_E; - _sd_centry_release(centry); - centry = lentry; - sdbc_prefetch_trailing++; - } - fba_len = (CENTRY_BLK(last_ioent) - - CENTRY_BLK(ioent) + 1) * BLK_FBAS - - BLK_FBA_OFF(io_pos); - fba_orig_len = fba_len + (io_pos - fba_pos); - } - - _SD_DISCONNECT_CALLBACK(handle); - sts = _sd_doread(handle, ioent, io_pos, - (fba_pos + fba_orig_len - io_pos), flag); - if (sts > 0) - (void) _sd_free_buf(handle); - } else { - CACHE_FBA_READ(cd, fba_orig_len); - CACHE_READ_HIT; - FBA_READ_IO_KSTATS(cd, FBA_SIZE(fba_orig_len)); - - sts = NSC_HIT; - } -done: - if (locked) - rw_exit(&sdbc_queue_lock); - - return (sts); -} - - -/* - * _sd_cc_wait - wait for inuse cache block to become available - * Usage: - * if (SET_CENTRY_INUSE(centry)) { - * _sd_cc_wait(cd, blk, centry, CC_INUSE); - * goto try_again; - * } - * -or- - * if (SET_CENTRY_PAGEIO(centry)) { - * _sd_cc_wait(cd, blk, centry, CC_PAGEIO); - * goto try_again; - * } - */ -void -_sd_cc_wait(int cd, nsc_off_t cblk, _sd_cctl_t *centry, int flag) -{ - volatile ushort_t *waiters; - volatile uchar_t *uflag; - - if (flag == CC_INUSE) { - waiters = &(centry->cc_await_use); - uflag = &(CENTRY_INUSE(centry)); - } else if (flag == CC_PAGEIO) { - waiters = &(centry->cc_await_page); - uflag = &(CENTRY_PAGEIO(centry)); - } else { - /* Oops! */ -#ifdef DEBUG - cmn_err(CE_WARN, "!_sd_cc_wait: unknown flag value (%x)", flag); -#endif - return; - } - - mutex_enter(¢ry->cc_lock); - if (CC_CD_BLK_MATCH(cd, cblk, centry) && (*uflag) != 0) { - (*waiters)++; - sd_serialize(); - if ((*uflag) != 0) { - unsigned stime = nsc_usec(); - cv_wait(¢ry->cc_blkcv, ¢ry->cc_lock); - (*waiters)--; - mutex_exit(¢ry->cc_lock); - SDTRACE(ST_INFO|SDF_ENT_GET, - cd, 0, BLK_TO_FBA_NUM(cblk), (nsc_usec()-stime), 0); - } else { - (*waiters)--; - mutex_exit(¢ry->cc_lock); - } - } else - mutex_exit(¢ry->cc_lock); - -} - -/* - * _sd_alloc_buf - Allocate a vector of buffers for io. - * - * ARGUMENTS: - * cd - Cache descriptor (from a previous open) - * fba_pos - disk position (512-byte FBAs) - * fba_len - length in disk FBAs. - * flag - allocation type. Flag is one or more of - * NSC_RDBUF, NSC_WRBUF, NSC_NOBLOCK and hints. - * NSC_RDAHEAD - prefetch for future read. - * handle_p - pointer to a handle pointer. - * If the handle pointer is non-null, its used as a - * pre-allocated handle. Else a new handle will be allocated - * and stored in *handle_p - * - * RETURNS: - * errno if return > 0. - * else NSC_HIT or NSC_DONE on success - * or NSC_PENDING on io in progress and NSC_NOBLOCK - * specified in the flag. - * USAGE: - * This routine allocates the cache blocks requested and creates a list - * of entries for this request. - * If NSC_NOBLOCK was not specified, this call could block on read io. - * If flag specified NSC_RDBUF and the request is not an entire - * hit, an io is initiated. - */ -int -_sd_alloc_buf(blind_t xcd, nsc_off_t fba_pos, nsc_size_t fba_len, int flag, - _sd_buf_handle_t **handle_p) -{ - int cd = (int)(unsigned long)xcd; - _sd_cd_info_t *cdi; - _sd_buf_handle_t *handle; - int sts; - nsc_off_t st_cblk, cblk; /* position of start and temp cache block */ - sdbc_cblk_fba_t st_cblk_len; /* FBA len of starting cache block */ - sdbc_cblk_fba_t end_cblk_len; /* FBA len of ending cache block */ - sdbc_cblk_fba_t st_cblk_off; /* FBA offset into starting cblock */ - nsc_off_t io_pos; /* offset in FBA's */ - _sd_bufvec_t *bufvec; - _sd_cctl_t *centry, *lentry, *ioent = NULL; - nsc_size_t fba_orig_len = fba_len; /* FBA length of orig request */ - int stall, pageio; - unsigned char cc_flag; - int this_entry_type; - int locked = 0; - nsc_size_t dmchain_request_blocks; /* size of dmchain in cache blocks */ - sdbc_allocbuf_t alloc_tok = {0}; - int min_frag = 0; /* frag statistics */ - int max_frag = 0; /* frag statistics */ - int nfrags = 0; /* frag statistics */ -#ifdef DEBUG - int err = 0; -#endif - - - ASSERT(*handle_p != NULL); - handle = *handle_p; - - if (_sdbc_shutdown_in_progress) - return (EIO); - - if (xcd == NSC_ANON_CD) - cd = _CD_NOHASH; - - KSTAT_RUNQ_ENTER(cd); - - /* - * Force large writes on nvram systems to be write-through to - * avoid the (slow) bcopy into nvram. - */ - - if (flag & NSC_WRBUF) { - if (fba_len > (nsc_size_t)sdbc_wrthru_len) { - flag |= NSC_WRTHRU; - } - } - -#ifdef DEBUG - if (sdbc_pageio_debug != SDBC_PAGEIO_OFF) { - switch (sdbc_pageio_debug) { - case SDBC_PAGEIO_RDEV: - if (cd != _CD_NOHASH && - sdbc_pageio_rdev != (dev_t)-1 && - _sd_cache_files[cd].cd_crdev == sdbc_pageio_rdev) - flag |= NSC_PAGEIO; - break; - - case SDBC_PAGEIO_RAND: - if ((nsc_lbolt() % 3) == 0) - flag |= NSC_PAGEIO; - break; - - case SDBC_PAGEIO_ALL: - flag |= NSC_PAGEIO; - break; - } - } -#endif /* DEBUG */ - - if (fba_len > (nsc_size_t)BLK_FBAS) { - rw_enter(&sdbc_queue_lock, RW_WRITER); - locked = 1; - } - - /* - * _CD_NOHASH: client wants temporary (not hashed) cache memory - * not associated with a local disk. Skip local disk checks. - */ - if (cd == _CD_NOHASH) { - flag &= ~(NSC_RDBUF | NSC_WRBUF | NSC_RDAHEAD); - handle = *handle_p; - handle->bh_flag |= NSC_HACTIVE; - goto setup; - } - - SDTRACE(ST_ENTER|SDF_ALLOCBUF, cd, fba_len, fba_pos, flag, 0); - - - if ((flag & NSC_RDAHEAD) && _sd_prefetch_opt) { - sts = _sd_prefetch_buf(cd, fba_pos, fba_len, flag, handle, - locked); - goto done; - } - -#if !defined(_SD_NOCHECKS) - if (flag & NSC_RDAHEAD) { /* _sd_prefetch_opt == 0 */ - nsc_size_t file_size; /* file_size in FBA's */ - /* prefetch: truncate if req'd */ - if (fba_len > sdbc_max_fbas) - fba_len = sdbc_max_fbas; - file_size = _sd_cache_files[(cd)].cd_info->sh_filesize; - if ((fba_pos + fba_len) > file_size) { - fba_len = file_size - fba_pos; -#ifdef NSC_MULTI_TERABYTE - if ((int64_t)fba_len <= 0) { -#else - if ((int32_t)fba_len <= 0) { -#endif - sts = EIO; - SDTRACE(ST_EXIT|SDF_ALLOCBUF, cd, fba_len, - fba_pos, flag, sts); - goto done; - } - } - } else - if (sts = _sd_check_buffer_alloc(cd, fba_pos, fba_len, handle_p)) { - SDTRACE(ST_EXIT|SDF_ALLOCBUF, cd, fba_len, fba_pos, flag, sts); - goto done; - } -#endif - if (fba_len == 0) { - SDTRACE(ST_EXIT|SDF_ALLOCBUF, cd, fba_len, fba_pos, - flag, EINVAL); - sts = EINVAL; - goto done; - } - - handle->bh_flag |= NSC_HACTIVE; - cdi = &_sd_cache_files[cd]; - - if (cdi->cd_recovering) { - /* - * If recovering this device, then block all allocates - * for reading or writing. If we allow reads then - * this path could see old data before we recover. - * If we allow writes then new data could be overwritten - * by old data. - * This is clearly still not a complete solution as - * the thread doing this allocate could conceivably be - * by this point (and in _sd_write/_sd_read for that matter - * which don't even have this protection). But this type - * of path seems to only exist in a failover situation - * where a device has failed on the other node and works - * on this node so the problem is not a huge one but exists - * never the less. - */ - if (sts = _sd_recovery_wblk_wait(cd)) { - handle->bh_flag &= ~NSC_HACTIVE; - SDTRACE(ST_EXIT|SDF_ALLOCBUF, cd, fba_len, fba_pos, - flag, sts); - goto done; - } - } - - /* write & disk failed, return error immediately */ - if ((flag & NSC_WRBUF) && cdi->cd_info->sh_failed) { - handle->bh_flag &= ~NSC_HACTIVE; - SDTRACE(ST_EXIT|SDF_ALLOCBUF, cd, fba_len, fba_pos, flag, EIO); - sts = EIO; - goto done; - } - -setup: - - _SD_SETUP_HANDLE(handle, cd, fba_pos, fba_len, flag); - handle->bh_centry = NULL; - bufvec = handle->bh_bufvec; - if (flag & NSC_RDAHEAD) { /* _sd_prefetch_opt == 0 */ - /* CKD prefetch: bufvec not req'd, use placeholder */ - bufvec->bufaddr = NULL; - bufvec->bufvmeaddr = NULL; - bufvec->buflen = 0; - bufvec = _prefetch_sb_vec; - } - st_cblk = FBA_TO_BLK_NUM(fba_pos); - st_cblk_off = BLK_FBA_OFF(fba_pos); - st_cblk_len = BLK_FBAS - st_cblk_off; - if ((nsc_size_t)st_cblk_len >= fba_len) { - end_cblk_len = 0; - st_cblk_len = (sdbc_cblk_fba_t)fba_len; - } else - end_cblk_len = BLK_FBA_OFF(fba_pos + fba_len); - cblk = st_cblk; - - - /* - * count number of blocks on chain that is required - */ - - /* middle piece */ - dmchain_request_blocks = - (fba_len - (st_cblk_len + end_cblk_len)) >> BLK_FBA_SHFT; - - /* start piece */ - ++dmchain_request_blocks; - - /* end piece */ - if (end_cblk_len) - ++dmchain_request_blocks; - - - cc_flag = 0; - if ((handle->bh_flag & NSC_PINNABLE) && (handle->bh_flag & NSC_WRBUF)) - cc_flag |= CC_PINNABLE; - if (handle->bh_flag & (NSC_NOCACHE|NSC_SEQ_IO)) - cc_flag |= CC_QHEAD; - lentry = NULL; - stall = 0; - - do { - pageio = ((flag & NSC_PAGEIO) != 0 || sdbc_pageio_always != 0); -cget: - if ((centry = (_sd_cctl_t *) - _sd_hash_search(cd, cblk, _sd_htable)) != 0) { - - if (SET_CENTRY_INUSE(centry)) { - /* already inuse: wait for block, retry */ - sdbc_allocb_inuse++; - if (locked) - rw_exit(&sdbc_queue_lock); - _sd_cc_wait(cd, cblk, centry, CC_INUSE); - if (locked) - rw_enter(&sdbc_queue_lock, RW_WRITER); - goto cget; - } - - /* - * bug 4529671 - * now that we own the centry make sure that - * it is still good. it could have been processed - * by _sd_dealloc_dm() in the window between - * _sd_hash_search() and SET_CENTRY_INUSE(). - */ - if ((_sd_cctl_t *) - _sd_hash_search(cd, cblk, _sd_htable) != centry) { - sdbc_allocb_deallocd++; -#ifdef DEBUG - cmn_err(CE_WARN, - "!centry %p cd %d cblk %" NSC_SZFMT - " fba_len %" NSC_SZFMT " lost to dealloc?! " - "cc_data %p", (void *)centry, cd, cblk, - fba_orig_len, (void *)centry->cc_data); -#endif - - CLEAR_CENTRY_INUSE(centry); - goto cget; - } - - if (CC_CD_BLK_MATCH(cd, cblk, centry)) { - /* - * Do pagelist io mutual exclusion - * before messing with the centry. - */ - if (pageio && SET_CENTRY_PAGEIO(centry)) { - /* wait for flusher to finish pageio */ - sdbc_allocb_pageio1++; - - CLEAR_CENTRY_INUSE(centry); - if (locked) - rw_exit(&sdbc_queue_lock); - _sd_cc_wait(cd, cblk, centry, - CC_PAGEIO); - if (locked) - rw_enter(&sdbc_queue_lock, - RW_WRITER); - goto cget; - } - - sdbc_allocb_hit++; - this_entry_type = HASH_ENTRY_DM; - pageio = 0; - centry->cc_toflush = 0; - - centry->cc_hits++; - - /* this will reset the age flag */ - sdbc_centry_init_dm(centry); - - DTRACE_PROBE1(_sd_alloc_buf1, - _sd_cctl_t *, centry); - } else { - /* block mismatch: release, alloc new block */ - sdbc_allocb_lost++; - - CLEAR_CENTRY_INUSE(centry); - - goto cget; - - } - } else { - centry = sdbc_centry_alloc(cd, cblk, - dmchain_request_blocks, &stall, - &alloc_tok, locked ? ALLOC_LOCKED : 0); - - /* - * dmchaining adjustment. - * if centry was obtained from the dmchain - * then clear local pageio variable because the - * centry already has cc_pageio set. - */ - if (CENTRY_PAGEIO(centry)) - pageio = 0; - - DTRACE_PROBE1(_sd_alloc_buf2, _sd_cctl_t *, centry); - - this_entry_type = ELIGIBLE_ENTRY_DM; - if (centry->cc_aging_dm & FOUND_IN_HASH_DM) - this_entry_type = HASH_ENTRY_DM; - else { - if (centry->cc_aging_dm & FOUND_HOLD_OVER_DM) - this_entry_type = HOLD_ENTRY_DM; - } - } - - centry->cc_aging_dm &= ~(FOUND_IN_HASH_DM|FOUND_HOLD_OVER_DM); - - /* - * Do pagelist io mutual exclusion now if we did not do - * it above. - */ - - if (pageio && SET_CENTRY_PAGEIO(centry)) { - /* wait for flusher to finish pageio */ - sdbc_allocb_pageio2++; - - - CLEAR_CENTRY_INUSE(centry); - if (locked) - rw_exit(&sdbc_queue_lock); - _sd_cc_wait(cd, cblk, centry, CC_PAGEIO); - if (locked) - rw_enter(&sdbc_queue_lock, RW_WRITER); - goto cget; - } - - pageio = 0; - - if (CENTRY_DIRTY(centry)) { - /* - * end action might set PEND_DIRTY flag - * must lock if need to change flag bits - */ - if (centry->cc_flag != (centry->cc_flag | cc_flag)) { - /* was FAST */ - mutex_enter(¢ry->cc_lock); - centry->cc_flag |= cc_flag; - /* was FAST */ - mutex_exit(¢ry->cc_lock); - } - } else - centry->cc_flag |= cc_flag; - - centry->cc_chain = NULL; - - /* - * step 0:check valid bits in each cache ele as - * the chain grows - set ioent/io_pos to first - * instance of invalid data - */ - if (cblk == st_cblk) { - handle->bh_centry = centry; - fba_len -= st_cblk_len; - lentry = centry; - if (flag & NSC_RDBUF) { - if (!SDBC_VALID_BITS(st_cblk_off, st_cblk_len, - centry)) { - io_pos = fba_pos; - ioent = centry; - } else { - DATA_LOG(SDF_ALLOC, centry, st_cblk_off, - st_cblk_len); - - DTRACE_PROBE4(_sd_alloc_data1, - uint64_t, (uint64_t) - (BLK_TO_FBA_NUM(cblk) + - st_cblk_off), int, st_cblk_len, - char *, *(int64_t *) - (centry->cc_data + - FBA_SIZE(st_cblk_off)), - char *, *(int64_t *) - (centry->cc_data + - FBA_SIZE(st_cblk_off + st_cblk_len) - - 8)); - } - } - cblk++; - } else if (fba_len == (nsc_size_t)end_cblk_len) { - lentry->cc_chain = centry; - fba_len -= end_cblk_len; - if (flag & NSC_RDBUF) { - if (ioent == NULL) { - if (!SDBC_VALID_BITS(0, end_cblk_len, - centry)) { - io_pos = BLK_TO_FBA_NUM(cblk); - ioent = centry; - } else { - DATA_LOG(SDF_ALLOC, centry, 0, - end_cblk_len); - - DTRACE_PROBE4(_sd_alloc_data2, - uint64_t, - BLK_TO_FBA_NUM(cblk), - int, end_cblk_len, - char *, *(int64_t *) - (centry->cc_data), - char *, *(int64_t *) - (centry->cc_data + - FBA_SIZE(end_cblk_len) - - 8)); - } - } - } - } else { - lentry->cc_chain = centry; - lentry = centry; - fba_len -= BLK_FBAS; - if (flag & NSC_RDBUF) { - if (ioent == NULL) { - if (!FULLY_VALID(centry)) { - io_pos = BLK_TO_FBA_NUM(cblk); - ioent = centry; - } else { - DATA_LOG(SDF_ALLOC, centry, 0, - BLK_FBAS); - - DTRACE_PROBE4(_sd_alloc_data3, - uint64_t, (uint64_t) - BLK_TO_FBA_NUM(cblk), - int, BLK_FBAS, - char *, *(int64_t *) - (centry->cc_data), - char *, *(int64_t *) - (centry->cc_data + - FBA_SIZE(BLK_FBAS) - 8)); - } - } - } - cblk++; - } - - /* if this block has a new identity clear prefetch history */ - if (this_entry_type != HASH_ENTRY_DM) - centry->cc_aging_dm &= - ~(PREFETCH_BUF_I | PREFETCH_BUF_E); - - centry->cc_aging_dm &= ~(ENTRY_FIELD_DM); - centry->cc_aging_dm |= this_entry_type; - if (flag & NSC_METADATA) - centry->cc_aging_dm |= STICKY_METADATA_DM; - - --dmchain_request_blocks; - } while (fba_len); - - if (locked) { - rw_exit(&sdbc_queue_lock); - locked = 0; - } - - ASSERT(dmchain_request_blocks == 0); - - /* - * do any necessary cleanup now that all the blocks are allocated. - */ - sdbc_centry_alloc_end(&alloc_tok); - - /* be sure you nul term. the chain */ - centry->cc_chain = NULL; - - /* - * step one: establish HOST/PARASITE/OTHER relationships - * between the centry ele in the list and calc the alloc size - * (fill in CATAGORY based on TYPE and immediate neighbors) - */ - if (sts = _sd_setup_category_on_type(handle->bh_centry)) { -#ifdef DEBUG - err = _sd_free_buf(handle); - if (err) { - cmn_err(CE_WARN, "!sdbc(_sd_alloc_buf): _sd_free_buf " - "failed: err:%d handle:%p", err, (void *)handle); - } -#else - (void) _sd_free_buf(handle); -#endif - goto done; - } - - /* - * step two: alloc the needed mem and fill in the data and chaining - * fields (leave bufvec for step three) - */ - (void) _sd_setup_mem_chaining(handle->bh_centry, 0); - - /* - * step three: do the bufvec - */ - fba_len = fba_orig_len; - centry = handle->bh_centry; - bufvec = handle->bh_bufvec; - - while (centry) { - DTRACE_PROBE3(_sd_alloc_buf_centrys, _sd_cctl_t *, centry, - int, cd, uint64_t, - (uint64_t)BLK_TO_FBA_NUM(CENTRY_BLK(centry))); - - if (fba_len == fba_orig_len) { - bufvec->bufaddr = (centry->cc_data + - FBA_SIZE(st_cblk_off)); - bufvec->bufvmeaddr = 0; /* not used */ - bufvec->buflen = FBA_SIZE(st_cblk_len); - bufvec++; - fba_len -= st_cblk_len; - } else if (fba_len == (nsc_size_t)end_cblk_len) { - _sd_bufvec_t *pbufvec = bufvec - 1; - - if ((pbufvec->bufaddr + pbufvec->buflen) == - centry->cc_data) { - /* contiguous */ - pbufvec->buflen += FBA_SIZE(end_cblk_len); - } else { - - bufvec->bufaddr = centry->cc_data; - bufvec->bufvmeaddr = 0; /* not used */ - bufvec->buflen = FBA_SIZE(end_cblk_len); - bufvec++; - } - - fba_len -= end_cblk_len; - } else { - _sd_bufvec_t *pbufvec = bufvec - 1; - - if ((pbufvec->bufaddr + pbufvec->buflen) == - centry->cc_data) { - /* contiguous */ - pbufvec->buflen += CACHE_BLOCK_SIZE; - } else { - - bufvec->bufaddr = centry->cc_data; - bufvec->bufvmeaddr = 0; /* not used */ - bufvec->buflen = CACHE_BLOCK_SIZE; - bufvec++; - } - - fba_len -= BLK_FBAS; - } - - centry = centry->cc_chain; - } - - /* be sure you nul term. the chain */ - bufvec->bufaddr = NULL; - bufvec->bufvmeaddr = 0; - bufvec->buflen = 0; - - /* frag statistics */ - { - _sd_bufvec_t *tbufvec; - - for (tbufvec = handle->bh_bufvec; tbufvec != bufvec; - ++tbufvec) { - if ((min_frag > tbufvec->buflen) || (min_frag == 0)) - min_frag = tbufvec->buflen; - - if (max_frag < tbufvec->buflen) - max_frag = tbufvec->buflen; - } - - nfrags = bufvec - handle->bh_bufvec; - min_frag = FBA_LEN(min_frag); - max_frag = FBA_LEN(max_frag); - } - - /* buffer memory frag stats */ - DTRACE_PROBE4(_sd_alloc_buf_frag, uint64_t, (uint64_t)fba_orig_len, - int, nfrags, int, min_frag, int, max_frag); - - - if (flag & NSC_WRBUF) { - if (_SD_IS_WRTHRU(handle)) - goto alloc_done; - if (_sd_alloc_write(handle->bh_centry, &stall)) { - _sd_unblock(&_sd_flush_cv); - handle->bh_flag |= NSC_FORCED_WRTHRU; - } else { - for (centry = handle->bh_centry; - centry; centry = centry->cc_chain) { - - CENTRY_SET_FTPOS(centry); - SSOP_SETCENTRY(sdbc_safestore, - centry->cc_write); - } - } - } - -alloc_done: - if (locked) { - rw_exit(&sdbc_queue_lock); - locked = 0; - } - if (ioent) { - _SD_DISCONNECT_CALLBACK(handle); - sts = _sd_doread(handle, ioent, io_pos, - (fba_pos + fba_orig_len - io_pos), flag); - if (sts > 0) - (void) _sd_free_buf(handle); - } else - if (flag & NSC_RDBUF) { - CACHE_FBA_READ(cd, fba_orig_len); - CACHE_READ_HIT; - FBA_READ_IO_KSTATS(cd, FBA_SIZE(fba_orig_len)); - - sts = NSC_HIT; - } else - sts = (stall) ? NSC_DONE : NSC_HIT; - - SDTRACE(ST_EXIT|SDF_ALLOCBUF, cd, fba_orig_len, fba_pos, flag, sts); - -done: - if (locked) - rw_exit(&sdbc_queue_lock); - - KSTAT_RUNQ_EXIT(cd); - - return (sts); -} - -/* - * consistency checking for ccents - */ - -#define ELIGIBLE(p) (p & ELIGIBLE_ENTRY_DM) -#define HOLD(p) (p & HOLD_ENTRY_DM) -#define HASHE(p) (p & HASH_ENTRY_DM) - -#define HOST(p) (p & HOST_ENTRY_DM) -#define PARA(p) (p & PARASITIC_ENTRY_DM) -#define OTHER(p) \ - (!(p & (HOST_ENTRY_DM | PARASITIC_ENTRY_DM | ELIGIBLE_ENTRY_DM))) - -#define AVAIL(p) (p & AVAIL_ENTRY_DM) - -/* - * sdbc_check_cctl_cot -- consistency check for _sd_setup_category_on_type() - * may only be called on entry to state machine (when ccent is either - * ELIGIBLE_ENTRY_DM, HOLD_ENTRY_DM or HASH_ENTRY_DM). - * - * print message or panic (DEBUG) if inconsistency detected. - */ -static int -sdbc_check_cctl_cot(_sd_cctl_t *centry) -{ - uint_t age; - int size; - uchar_t *data; - int host_or_other; - int para; - int ccent_ok = 1; - - age = centry->cc_aging_dm; - size = centry->cc_alloc_size_dm; - data = centry->cc_data; - host_or_other = size && data; - para = !size && data; - - /* - * on entry to _sd_setup_category_on_type(), - * one of three mutually exclusive entry field bits must be set - */ - - switch ((age & (ELIGIBLE_ENTRY_DM | HOLD_ENTRY_DM | HASH_ENTRY_DM))) { - case ELIGIBLE_ENTRY_DM: - case HOLD_ENTRY_DM: - case HASH_ENTRY_DM: - /* ok */ - break; - default: - /* zero or multiple flag bits */ - ccent_ok = 0; - break; - } - - /* categories are mutually exclusive */ - if (HOST(age) && PARA(age)) - ccent_ok = 0; - - /* these bits should be cleared out (STICKY_METADATA_DM not used) */ - if (age & (AVAIL_ENTRY_DM | FOUND_HOLD_OVER_DM | FOUND_IN_HASH_DM | - STICKY_METADATA_DM)) - ccent_ok = 0; - - /* eligible has no data and no size */ - if (ELIGIBLE(age) && (size || data)) - ccent_ok = 0; - - /* parasite has zero size and non-zero data */ - if (PARA(age) && !para) - ccent_ok = 0; - - /* host has non-zero size and non-zero data */ - if (HOST(age) && !host_or_other) - ccent_ok = 0; - - /* "other" is just like a host */ - if (OTHER(age) && !host_or_other) - ccent_ok = 0; - - /* a HOLD or a HASH must have a size */ - if ((size) && !(age & (HASH_ENTRY_DM | HOLD_ENTRY_DM))) - ccent_ok = 0; - - if (!ccent_ok) - cmn_err(cmn_level, - "!sdbc(sdbc_check_cctl_cot): inconsistent ccent %p " - "age %x size %d data %p", (void *)centry, age, size, - (void *)data); - - return (ccent_ok); -} - -/* - * sdbc_mark_cctl_cot -- mark cctls bad and invalidate when - * inconsistency found in _sd_setup_category_on_type() - * returns nothing - * - * Note: this is an error recovery path that is triggered when an - * inconsistency in a cctl is detected. _sd_centry_release() will take - * these cache entries out of circulation and place them on a separate list - * for debugging purposes. - */ -void -sdbc_mark_cctl_cot(_sd_cctl_t *header, _sd_cctl_t *centry) -{ - _sd_cctl_t *cur_ent = header; - - /* the entire chain is guilty by association */ - while (cur_ent) { - - (void) _sd_hash_delete((struct _sd_hash_hd *)cur_ent, - _sd_htable); - - cur_ent->cc_aging_dm |= BAD_CHAIN_DM; - - cur_ent = cur_ent->cc_chain; - } - - centry->cc_aging_dm |= BAD_ENTRY_DM; /* this is the problem child */ -} - -/* - * _sd_setup_category_on_type(_sd_cctl_t *) - Setup the centry CATEGORY based on - * centry TYPE and immediate neighbors. Identify each eligible (ie not HASH) - * centry as a host/parasite. host actually have memory allocated to - * them and parasites are chained to the host and point to page offsets within - * the host's memory. - * - * RETURNS: - * 0 on success, EINTR if inconsistency detected in centry - * - * Note: - * none - */ -static int -_sd_setup_category_on_type(_sd_cctl_t *header) -{ - _sd_cctl_t *prev_ent, *next_ent, *centry; - _sd_cctl_t *anchor = NULL; - int current_pest_count, local_max_dyn_list; - int cl; - int ret = 0; - - ASSERT(header); - - if (sdbc_use_dmchain) - local_max_dyn_list = max_dm_queues - 1; - else { - /* pickup a fresh copy - has the world changed */ - local_max_dyn_list = dynmem_processing_dm.max_dyn_list; - } - - prev_ent = 0; - centry = header; - next_ent = centry->cc_chain; - current_pest_count = 0; - cl = 2; - - /* try to recover from bad cctl */ - if (sdbc_check_cot && !sdbc_check_cctl_cot(centry)) - ret = EINTR; - - while (cl && (ret == 0)) { - switch (cl) { - case (1): /* chain to next/monitor for completion */ - prev_ent = centry; - centry = next_ent; - next_ent = 0; - cl = 0; - if (centry) { - - if (sdbc_check_cot && - !sdbc_check_cctl_cot(centry)) { - ret = EINTR; - break; - } - - next_ent = centry->cc_chain; - cl = 2; - } - break; - - case (2): /* vector to appropriate routine */ - if (!(centry->cc_aging_dm & ELIGIBLE_ENTRY_DM)) - cl = 5; - else if (prev_ent && (prev_ent->cc_aging_dm & - ELIGIBLE_ENTRY_DM)) - cl = 15; - else - cl = 10; - break; - - case (5): /* process NON-ELIGIBLE entries */ - if (!(centry->cc_aging_dm & - (HASH_ENTRY_DM|HOLD_ENTRY_DM))) { - /* no catagory */ - - /* consistency check */ - if (centry->cc_alloc_size_dm || - centry->cc_data) { - cmn_err(cmn_level, - "!sdbc(setup_cot): " - "OTHER with data/size %p", - (void *)centry); - - ret = EINTR; - break; - } - - centry->cc_aging_dm &= - ~CATAGORY_ENTRY_DM; - centry->cc_alloc_size_dm = BLK_SIZE(1); - DTRACE_PROBE1(_sd_setup_category, - _sd_cctl_t *, centry); - } - cl = 1; - break; - - /* - * no prev entry (ie top of list) or no prev - * ELIGIBLE entry - */ - case (10): - /* - * this is an eligible entry, does it start - * a list or is it a loner - */ - /* consistency check */ - if (centry->cc_alloc_size_dm || - centry->cc_data) { - cmn_err(cmn_level, "!sdbc(setup_cot): " - "HOST with data/size %p", - (void *)centry); - ret = EINTR; - break; - } - - if (next_ent && (next_ent->cc_aging_dm & - ELIGIBLE_ENTRY_DM)) { - - - /* it starts a list */ - /* host catagory */ - centry->cc_aging_dm |= HOST_ENTRY_DM; - /* start out with one page */ - centry->cc_alloc_size_dm = BLK_SIZE(1); - anchor = centry; - DTRACE_PROBE1(_sd_setup_category, - _sd_cctl_t *, anchor); - cl = 1; - } else { - /* - * it's a loner - * drop status to no category and - * restart - */ - cl = 2; - centry->cc_aging_dm &= - ~ELIGIBLE_ENTRY_DM; - } - break; - - case (15): /* default to parasite catagory */ - - /* consistency check */ - if (centry->cc_alloc_size_dm || - centry->cc_data) { - cmn_err(cmn_level, "!sdbc(setup_cot): " - "PARA with data/size %p", - (void *)centry); - - ret = EINTR; - break; - } - - if (current_pest_count < local_max_dyn_list-1) { - /* continue to grow the pest list */ - current_pest_count++; - centry->cc_aging_dm |= - PARASITIC_ENTRY_DM; - - /* - * offset of host ent mem this will pt - * to - */ - centry->cc_alloc_size_dm = - anchor->cc_alloc_size_dm; - /* - * up the host mem req by one for - * this parasite - */ - DTRACE_PROBE1(_sd_setup_category, - _sd_cctl_t *, centry); - - anchor->cc_alloc_size_dm += BLK_SIZE(1); - - cl = 1; - } else { - /* - * term this pest list - restart fresh - * on this entry - */ - current_pest_count = 0; - prev_ent->cc_aging_dm &= - ~(HOST_ENTRY_DM|ELIGIBLE_ENTRY_DM); - cl = 2; - } - break; - } /* switch(cl) */ - } /* while (cl) */ - - if (ret != 0) - sdbc_mark_cctl_cot(header, centry); - - return (ret); -} - -/* - * _sd_setup_mem_chaining(_sd_cctl_t *) - Allocate memory, setup - * mem ptrs an host/pest chaining. Do the actual allocation as described in - * sd_setup_category_on_type(). - * - * RETURNS: - * 0 on success - * non-zero on error - * - * Note: - * if called with ALLOC_NOWAIT, caller must check for non-zero return - */ -static int -_sd_setup_mem_chaining(_sd_cctl_t *header, int flag) -{ - _sd_cctl_t *prev_ent, *next_ent, *centry; - _sd_cctl_t *anchor = NULL; - int cl, rc = 0; - - ASSERT(header); - - if (!header) - return (0); - - prev_ent = 0; - centry = header; - next_ent = centry->cc_chain; - cl = 2; - while (cl) { - switch (cl) { - case (1): /* chain to next/monitor for completion */ - centry->cc_aging_dm &= ~ELIGIBLE_ENTRY_DM; - prev_ent = centry; - centry = next_ent; - next_ent = 0; - cl = 0; - if (centry) { - next_ent = centry->cc_chain; - cl = 2; - } - break; - - case (2): /* vector to appropriate routine */ - if (centry->cc_aging_dm & HOST_ENTRY_DM) - cl = 10; - else if (centry->cc_aging_dm & - PARASITIC_ENTRY_DM) - cl = 15; - else - cl = 5; - break; - - case (5): /* OTHER processing - alloc mem */ - if (rc = sdbc_centry_memalloc_dm(centry, - centry->cc_alloc_size_dm, flag)) - /* The allocation failed */ - cl = 0; - else - cl = 1; - break; - - /* - * HOST entry processing - save the anchor pt, - * alloc the memory, - */ - case (10): /* setup head and nxt ptrs */ - anchor = centry; - if (rc = sdbc_centry_memalloc_dm(centry, - centry->cc_alloc_size_dm, flag)) - /* The allocation failed */ - cl = 0; - else - cl = 1; - break; - - /* - * PARASITIC entry processing - setup w/no - * memory, setup head/next ptrs, - */ - case (15): - /* - * fudge the data mem ptr to an offset from - * the anchor alloc - */ - if (!(centry->cc_aging_dm & - (HASH_ENTRY_DM| HOLD_ENTRY_DM))) { - centry->cc_head_dm = anchor; - - /* chain prev to this */ - prev_ent->cc_next_dm = centry; - - /* - * generate the actual data ptr into - * host entry memory - */ - centry->cc_data = anchor->cc_data + - centry->cc_alloc_size_dm; - centry->cc_alloc_size_dm = 0; - } - cl = 1; - break; - } /* switch(cl) */ - } /* while (cl) */ - - return (rc); -} - -/* - * _sd_check_buffer_alloc - Check if buffer allocation is invalid. - * - * RETURNS: - * 0 if its ok to continue with allocation. - * Else errno to be returned to the user. - * - * Note: - * This routine could block if the device is not local and - * recovery is in progress. - */ - -/* ARGSUSED */ -static int -_sd_check_buffer_alloc(int cd, nsc_off_t fba_pos, nsc_size_t fba_len, - _sd_buf_handle_t **hp) -{ - /* - * This check exists to ensure that someone will not pass in an - * arbitrary pointer and try to pass it off as a handle. - */ - if ((*hp)->bh_flag & (~_SD_VALID_FLAGS)) { - cmn_err(CE_WARN, "!sdbc(_sd_check_buffer_alloc) " - "cd %d invalid handle %p flags %x", - cd, (void *)*hp, (*hp)->bh_flag); - return (EINVAL); - } - - if ((_sd_cache_initialized == 0) || (FILE_OPENED(cd) == 0)) { - cmn_err(CE_WARN, "!sdbc(_sd_check_buffer_alloc) " - "cd %d not open. Cache init %d", - cd, _sd_cache_initialized); - return (EINVAL); - } - ASSERT(cd >= 0); - if (!(_sd_cache_files[cd].cd_rawfd) || - !nsc_held(_sd_cache_files[cd].cd_rawfd)) { - cmn_err(CE_WARN, - "!sdbc(_sd_check_buffer_alloc) cd %d is not attached", cd); - return (EINVAL); - } - - ASSERT_IO_SIZE(fba_pos, fba_len, cd); - ASSERT_LEN(fba_len); - - return (0); -} - -/* - * sdbc_check_handle -- check that handle is valid - * return 1 if ok, 0 otherwise (if debug then panic). - */ -static int -sdbc_check_handle(_sd_buf_handle_t *handle) -{ - int ret = 1; - - if (!_SD_HANDLE_ACTIVE(handle)) { - - cmn_err(cmn_level, "!sdbc(_sd_free_buf): invalid handle %p" - "cd %d fpos %" NSC_SZFMT " flen %" NSC_SZFMT " flag %x", - (void *)handle, HANDLE_CD(handle), handle->bh_fba_pos, - handle->bh_fba_len, handle->bh_flag); - - ret = 0; - } - - return (ret); -} - -/* - * _sd_free_buf - Free the buffers allocated in _sd_alloc_buf. - * - * ARGUMENTS: - * handle - The handle allocated in _sd_alloc_buf. - * - * RETURNS: - * 0 on success. - * Else errno. - * - * NOTE: - * If handle was allocated through _sd_alloc_buf, the handle allocated - * flag (NSC_HALLOCATED) will be reset by _sd_alloc_buf. This indicates - * that _sd_free_buf should free up the handle as well. - * All other handles directly allocated from _sd_alloc_handle will have - * that flag set. Any handle with valid blocks will have the handle - * active flag. It is an error if the active flag is not set. - * (if free_buf were called without going through alloc_buf) - */ - -int -_sd_free_buf(_sd_buf_handle_t *handle) -{ - _sd_cctl_t *centry, *cc_chain; - int cd = HANDLE_CD(handle); - int flen = handle->bh_fba_len; - int fpos = handle->bh_fba_pos; - - SDTRACE(ST_ENTER|SDF_FREEBUF, HANDLE_CD(handle), - handle->bh_fba_len, handle->bh_fba_pos, 0, 0); - - if (sdbc_check_handle(handle) == 0) - return (EINVAL); - - if (handle->bh_flag & NSC_MIXED) { - /* - * Data in this handle will be a mix of data from the - * source device and data from another device, so - * invalidate all the blocks. - */ - handle->bh_flag &= ~NSC_QUEUE; - centry = handle->bh_centry; - while (centry) { - centry->cc_valid = 0; - centry = centry->cc_chain; - } - } - - if ((handle->bh_flag & NSC_QUEUE)) { - handle->bh_flag &= ~NSC_QUEUE; - _sd_queue_write(handle, handle->bh_fba_pos, handle->bh_fba_len); - } - - handle->bh_flag &= ~NSC_HACTIVE; - - centry = handle->bh_centry; - while (centry) { - cc_chain = centry->cc_chain; - _sd_centry_release(centry); - centry = cc_chain; - } - - /* - * help prevent dup call to _sd_centry_release if this handle - * is erroneously _sd_free_buf'd twice. (should not happen). - */ - handle->bh_centry = NULL; - - if ((handle->bh_flag & NSC_HALLOCATED) == 0) { - handle->bh_flag |= NSC_HALLOCATED; - (void) _sd_free_handle(handle); - } else { - handle->bh_flag = NSC_HALLOCATED; - } - - SDTRACE(ST_EXIT|SDF_FREEBUF, cd, flen, fpos, 0, 0); - - return (0); -} - - -static int _sd_lruq_srch = 0x2000; - -/* - * sdbc_get_dmchain -- get a candidate centry chain pointing to - * contiguous memory - * ARGUMENTS: - * cblocks - number of cache blocks requested - * stall - pointer to stall count (no blocks avail) - * flag - ALLOC_NOWAIT flag - * - * RETURNS: - * a cache entry or possible NULL if ALLOC_NOWAIT set - * USAGE: - * attempt to satisfy entire request from queue - * that has no memory allocated. - * if this fails then attempt a partial allocation - * with a preallocated block of requested size up to - * max_dyn_list. - * then look for largest chain less than max_dyn_list. - */ -static _sd_cctl_t * -sdbc_get_dmchain(int cblocks, int *stall, int flag) -{ - _sd_cctl_t *cc_dmchain = NULL; - _sd_queue_t *q; - _sd_cctl_t *qhead; - int num_tries; - int cblocks_orig = cblocks; - int nowait = flag & ALLOC_NOWAIT; - int i; - - num_tries = _sd_lruq_srch; - - ASSERT(cblocks != 0); - - while (!cc_dmchain) { - /* get it from the os if possible */ - q = &sdbc_dm_queues[0]; - qhead = &(q->sq_qhead); - - if (q->sq_inq >= cblocks) { - mutex_enter(&q->sq_qlock); - if (q->sq_inq >= cblocks) { - _sd_cctl_t *cc_ent; - - cc_dmchain = qhead->cc_next; - - /* - * set the inuse and pageio bits - * Note: this code expects the cc_ent to - * be available. no other thread may set the - * inuse or pageio bit for an entry on the - * 0 queue. - */ - cc_ent = qhead; - for (i = 0; i < cblocks; ++i) { - cc_ent = cc_ent->cc_next; - - if (SET_CENTRY_INUSE(cc_ent)) { - cmn_err(CE_PANIC, - "centry inuse on 0 q! %p", - (void *)cc_ent); - } - - if (SET_CENTRY_PAGEIO(cc_ent)) { - cmn_err(CE_PANIC, - "centry pageio on 0 q! %p", - (void *)cc_ent); - } - } - /* got a dmchain */ - - /* remove this chain from the 0 queue */ - cc_dmchain->cc_prev->cc_next = cc_ent->cc_next; - cc_ent->cc_next->cc_prev = cc_dmchain->cc_prev; - cc_dmchain->cc_prev = NULL; - cc_ent->cc_next = NULL; - - q->sq_inq -= cblocks; - - ASSERT(GOOD_LRUSIZE(q)); - - } - mutex_exit(&q->sq_qlock); - if (cc_dmchain) - continue; - } - - /* look for a pre-allocated block of the requested size */ - - - if (cblocks > (max_dm_queues - 1)) - cblocks = max_dm_queues - 1; - - q = &sdbc_dm_queues[cblocks]; - qhead = &(q->sq_qhead); - - if (q->sq_inq != 0) { - _sd_cctl_t *tmp_dmchain; - - mutex_enter(&q->sq_qlock); - - for (tmp_dmchain = qhead->cc_next; tmp_dmchain != qhead; - tmp_dmchain = tmp_dmchain->cc_next) { - - /* - * get a dmchain - * set the inuse and pageio bits - */ - if (sdbc_dmchain_avail(tmp_dmchain)) { - /* put on MRU end of queue */ - sdbc_requeue_dmchain(q, tmp_dmchain, - 1, 0); - cc_dmchain = tmp_dmchain; - break; - } - sdbc_dmchain_not_avail++; - } - - mutex_exit(&q->sq_qlock); - if (cc_dmchain) - continue; - } - - /* - * spin block - * nudge the deallocator, accelerate ageing - */ - - mutex_enter(&dynmem_processing_dm.thread_dm_lock); - cv_broadcast(&dynmem_processing_dm.thread_dm_cv); - mutex_exit(&dynmem_processing_dm.thread_dm_lock); - - if (nowait) - break; - - if (!(--num_tries)) { - delay(drv_usectohz(20000)); - (void) (*stall)++; - num_tries = _sd_lruq_srch; - cblocks = cblocks_orig; - } else { /* see if smaller request size is available */ - if (!(--cblocks)) - cblocks = cblocks_orig; - } - - } /* while (!cc_dmchain) */ - - return (cc_dmchain); -} - -static int -sdbc_dmchain_avail(_sd_cctl_t *cc_ent) -{ - int chain_avail = 1; - _sd_cctl_t *anchor = cc_ent; - - while (cc_ent) { - - ASSERT(_sd_cctl_valid(cc_ent)); - - if (cc_ent->cc_aging_dm & BAD_CHAIN_DM) { - chain_avail = 0; - break; - } - - if (CENTRY_DIRTY(cc_ent)) { - chain_avail = 0; - break; - } - if (SET_CENTRY_INUSE(cc_ent)) { - chain_avail = 0; - break; - } - - if ((SET_CENTRY_PAGEIO(cc_ent))) { - - CLEAR_CENTRY_INUSE(cc_ent); - chain_avail = 0; - break; - } - - if (CENTRY_DIRTY(cc_ent)) { - - CLEAR_CENTRY_PAGEIO(cc_ent); - CLEAR_CENTRY_INUSE(cc_ent); - chain_avail = 0; - break; - } - - cc_ent->cc_flag = 0; - cc_ent->cc_toflush = 0; - - cc_ent = cc_ent->cc_next_dm; - } - - if (!chain_avail) - sdbc_clear_dmchain(anchor, cc_ent); - else { - cc_ent = anchor; - - /* - * prevent possible deadlocks in _sd_cc_wait(): - * remove from hash and wakeup any waiters now that we - * have acquired the chain. - */ - while (cc_ent) { - (void) _sd_hash_delete((struct _sd_hash_hd *)cc_ent, - _sd_htable); - - mutex_enter(&cc_ent->cc_lock); - if (cc_ent->cc_await_use) { - cv_broadcast(&cc_ent->cc_blkcv); - } - mutex_exit(&cc_ent->cc_lock); - - cc_ent->cc_creat = nsc_lbolt(); - cc_ent->cc_hits = 0; - - cc_ent = cc_ent->cc_next_dm; - } - } - - return (chain_avail); -} - -static void -sdbc_clear_dmchain(_sd_cctl_t *cc_ent_start, _sd_cctl_t *cc_ent_end) -{ - _sd_cctl_t *cc_ent = cc_ent_start; - _sd_cctl_t *prev_ent; - - ASSERT(_sd_cctl_valid(cc_ent)); - - while (cc_ent != cc_ent_end) { - - ASSERT(_sd_cctl_valid(cc_ent)); - - prev_ent = cc_ent; - cc_ent = cc_ent->cc_next_dm; - - CLEAR_CENTRY_PAGEIO(prev_ent); - CLEAR_CENTRY_INUSE(prev_ent); - } - -} - -/* - * put a dmchain on the LRU end of a queue - */ -void -sdbc_ins_dmqueue_front(_sd_queue_t *q, _sd_cctl_t *cc_ent) -{ - _sd_cctl_t *qhead = &(q->sq_qhead); - - ASSERT(_sd_cctl_valid(cc_ent)); - - mutex_enter(&q->sq_qlock); - cc_ent->cc_next = qhead->cc_next; - cc_ent->cc_prev = qhead; - qhead->cc_next->cc_prev = cc_ent; - qhead->cc_next = cc_ent; - q->sq_inq++; - cc_ent->cc_cblocks = q->sq_dmchain_cblocks; - - ASSERT(GOOD_LRUSIZE(q)); - - mutex_exit(&q->sq_qlock); - -} - -/* - * put a dmchain on the MRU end of a queue - */ -static void -sdbc_ins_dmqueue_back(_sd_queue_t *q, _sd_cctl_t *cc_ent) -{ - _sd_cctl_t *qhead = &(q->sq_qhead); - - ASSERT(_sd_cctl_valid(cc_ent)); - - mutex_enter(&q->sq_qlock); - cc_ent->cc_next = qhead; - cc_ent->cc_prev = qhead->cc_prev; - qhead->cc_prev->cc_next = cc_ent; - qhead->cc_prev = cc_ent; - cc_ent->cc_seq = q->sq_seq++; - q->sq_inq++; - cc_ent->cc_cblocks = q->sq_dmchain_cblocks; - - ASSERT(GOOD_LRUSIZE(q)); - - mutex_exit(&q->sq_qlock); - -} - -/* - * remove dmchain from a queue - */ -void -sdbc_remq_dmchain(_sd_queue_t *q, _sd_cctl_t *cc_ent) -{ - - ASSERT(_sd_cctl_valid(cc_ent)); - - mutex_enter(&q->sq_qlock); - cc_ent->cc_prev->cc_next = cc_ent->cc_next; - cc_ent->cc_next->cc_prev = cc_ent->cc_prev; - cc_ent->cc_next = cc_ent->cc_prev = NULL; /* defensive programming */ - cc_ent->cc_cblocks = -1; /* indicate not on any queue */ - - q->sq_inq--; - - ASSERT(GOOD_LRUSIZE(q)); - - mutex_exit(&q->sq_qlock); - -} - -/* - * requeue a dmchain to the MRU end of its queue. - * if getlock is 0 on entry the queue lock (sq_qlock) must be held - */ -void -sdbc_requeue_dmchain(_sd_queue_t *q, _sd_cctl_t *cc_ent, int mru, - int getlock) -{ - _sd_cctl_t *qhead = &(q->sq_qhead); - - - ASSERT(_sd_cctl_valid(cc_ent)); - - if (getlock) - mutex_enter(&q->sq_qlock); - - /* inline of sdbc_remq_dmchain() */ - cc_ent->cc_prev->cc_next = cc_ent->cc_next; - cc_ent->cc_next->cc_prev = cc_ent->cc_prev; - - if (mru) { /* put on MRU end of queue */ - /* inline of sdbc_ins_dmqueue_back */ - cc_ent->cc_next = qhead; - cc_ent->cc_prev = qhead->cc_prev; - qhead->cc_prev->cc_next = cc_ent; - qhead->cc_prev = cc_ent; - cc_ent->cc_seq = q->sq_seq++; - (q->sq_req_stat)++; - } else { /* put on LRU end of queue i.e. requeue to head */ - /* inline of sdbc_ins_dmqueue_front */ - cc_ent->cc_next = qhead->cc_next; - cc_ent->cc_prev = qhead; - qhead->cc_next->cc_prev = cc_ent; - qhead->cc_next = cc_ent; - cc_ent->cc_seq = q->sq_seq++; - - /* - * clear the CC_QHEAD bit on all members of the chain - */ - { - _sd_cctl_t *tcent; - - for (tcent = cc_ent; tcent; tcent = tcent->cc_next_dm) - tcent->cc_flag &= ~CC_QHEAD; - } - } - - if (getlock) - mutex_exit(&q->sq_qlock); - -} - -/* - * sdbc_dmchain_dirty(cc_ent) - * return first dirty cc_ent in dmchain, NULL if chain is not dirty - */ -static _sd_cctl_t * -sdbc_dmchain_dirty(_sd_cctl_t *cc_ent) -{ - for (/* CSTYLED */; cc_ent; cc_ent = cc_ent->cc_next_dm) - if (CENTRY_DIRTY(cc_ent)) - break; - - return (cc_ent); -} - -/* - * sdbc_requeue_head_dm_try() - * attempt to requeue a dmchain to the head of the queue - */ -void -sdbc_requeue_head_dm_try(_sd_cctl_t *cc_ent) -{ - int qidx; - _sd_queue_t *q; - - if (!sdbc_dmchain_dirty(cc_ent)) { - qidx = cc_ent->cc_cblocks; - q = &sdbc_dm_queues[qidx]; - sdbc_requeue_dmchain(q, cc_ent, 0, 1); /* requeue head */ - } -} - -/* - * sdbc_centry_alloc_blks -- allocate cache entries with memory - * - * ARGUMENTS: - * cd - Cache descriptor (from a previous open) - * cblk - cache block number. - * reqblks - number of cache blocks to be allocated - * flag - can be ALLOC_NOWAIT - * RETURNS: - * A cache block chain or NULL if ALLOC_NOWAIT and request fails - * - * Note: caller must check for null return if called with - * ALLOC_NOWAIT set. - */ -_sd_cctl_t * -sdbc_centry_alloc_blks(int cd, nsc_off_t cblk, nsc_size_t reqblks, int flag) -{ - sdbc_allocbuf_t alloc_tok = {0}; /* must be 0 */ - int stall = 0; - _sd_cctl_t *centry = NULL; - _sd_cctl_t *lentry = NULL; - _sd_cctl_t *anchor = NULL; - _sd_cctl_t *next_centry; - - ASSERT(reqblks); - - while (reqblks) { - centry = sdbc_centry_alloc(cd, cblk, reqblks, &stall, - &alloc_tok, flag); - - if (!centry) - break; - - centry->cc_chain = NULL; - - if (lentry == NULL) - anchor = centry; - else - lentry->cc_chain = centry; - - lentry = centry; - - centry->cc_aging_dm &= ~(ENTRY_FIELD_DM); - - if (centry->cc_aging_dm & FOUND_IN_HASH_DM) - centry->cc_aging_dm |= HASH_ENTRY_DM; - else - if (centry->cc_aging_dm & FOUND_HOLD_OVER_DM) - centry->cc_aging_dm |= HOLD_ENTRY_DM; - else - centry->cc_aging_dm |= ELIGIBLE_ENTRY_DM; - - centry->cc_aging_dm &= ~(FOUND_IN_HASH_DM|FOUND_HOLD_OVER_DM); - --reqblks; - } - - sdbc_centry_alloc_end(&alloc_tok); - - if (reqblks || (_sd_setup_category_on_type(anchor))) { - centry = anchor; - while (centry) { - next_centry = centry->cc_chain; - _sd_centry_release(centry); - centry = next_centry; - } - anchor = NULL; - - } else - /* This is where the memory is actually allocated */ - if (_sd_setup_mem_chaining(anchor, flag)) - anchor = NULL; - - return (anchor); -} - - -/* - * sdbc_centry_alloc - sdbc internal function to allocate a new cache block. - * - * ARGUMENTS: - * cd - Cache descriptor (from a previous open) - * cblk - cache block number. - * stall - pointer to stall count (no blocks avail) - * req_blocks - number of cache blocks remaining in caller's i/o request - * alloc_tok - pointer to token initialized to 0 on first call to function - * flag - lock status of sdbc_queue_lock or ALLOC_NOWAIT flag - * RETURNS: - * A cache block, or possibly NULL if ALLOC_NOWAIT set . - * - * USAGE: - * switch to the appropriate allocation function. - * this function is used when callers need more than one cache block. - * it is called repeatedly until the entire request is satisfied, - * at which time the caller will then do the memory allocation. - * if only one cache block is needed callers may use - * sdbc_centry_alloc_blks() which also allocates memory. - * - * Note: caller must check for null return if called with - * ALLOC_NOWAIT set. - */ - -_sd_cctl_t * -sdbc_centry_alloc(int cd, nsc_off_t cblk, nsc_size_t req_blocks, int *stall, - sdbc_allocbuf_t *alloc_tok, int flag) -{ - _sd_cctl_t *centry; - - if (sdbc_use_dmchain) - centry = sdbc_alloc_dmc(cd, cblk, req_blocks, stall, alloc_tok, - flag); - else - centry = sdbc_alloc_lru(cd, cblk, stall, flag); - - return (centry); -} - -/* - * sdbc_alloc_dmc -- allocate a centry from a dmchain - * - * ARGUMENTS: - * cd - Cache descriptor (from a previous open) - * cblk - cache block number. - * stall - pointer to stall count (no blocks avail) - * req_blocks - number of cache blocks in clients i/o request - * alloc_tok - pointer to token initialized to 0 on first call to function - * flag - lock status of sdbc_queue_lock, or ALLOC_NOWAIT flag - * RETURNS: - * A cache block or possibly NULL if ALLOC_NOWAIT set - * - * USAGE: - * if dmchain is empty, allocate one. - */ -static _sd_cctl_t * -sdbc_alloc_dmc(int cd, nsc_off_t cblk, nsc_size_t req_blocks, int *stall, - sdbc_allocbuf_t *alloc_tok, int flag) -{ - sdbc_allocbuf_impl_t *dmc = (sdbc_allocbuf_impl_t *)alloc_tok; - _sd_cctl_t *centry = NULL; - - if (!dmc->sab_dmchain) { - /* - * Note - sdbc_get_dmchain() returns - * with cc_inuse and cc_pageio set - * for all members of dmchain. - */ - if (dmc->sab_dmchain = - sdbc_get_dmchain(req_blocks, stall, flag)) { - - /* remember q it came from */ - if (dmc->sab_dmchain->cc_alloc_size_dm) - dmc->sab_q = dmc->sab_dmchain->cc_cblocks; - } - } - - /* - * Note: dmchain pointer is advanced in sdbc_alloc_from_dmchain() - */ - if (dmc->sab_dmchain) /* could be NULL if ALLOC_NOWAIT set */ - centry = sdbc_alloc_from_dmchain(cd, cblk, alloc_tok, flag); - - return (centry); -} - -/* - * sdbc_alloc_from_dmchain -- allocate centry from a dmchain of centrys - * - * ARGUMENTS: - * cd - Cache descriptor (from a previous open) - * cblk - cache block number. - * alloc_tok - pointer to token - * flag - lock status of sdbc_queue_lock or ALLOC_NOWAIT - * - * RETURNS: - * A cache block or possibly NULL if ALLOC_NOWAIT set. - * - * USAGE: - * This routine allocates a new cache block from the supplied dmchain. - * Assumes that dmchain is non-NULL and that all cache entries in - * the dmchain have been removed from hash and have their cc_inuse and - * cc_pageio bits set. - */ -static _sd_cctl_t * -sdbc_alloc_from_dmchain(int cd, nsc_off_t cblk, sdbc_allocbuf_t *alloc_tok, - int flag) -{ - _sd_cctl_t *cc_ent, *old_ent; - int categorize_centry; - int locked = flag & ALLOC_LOCKED; - int nowait = flag & ALLOC_NOWAIT; - sdbc_allocbuf_impl_t *dmc = (sdbc_allocbuf_impl_t *)alloc_tok; - - SDTRACE(ST_ENTER|SDF_ENT_ALLOC, cd, 0, BLK_TO_FBA_NUM(cblk), 0, 0); - - ASSERT(dmc->sab_dmchain); - - cc_ent = dmc->sab_dmchain; - - ASSERT(_sd_cctl_valid(cc_ent)); - - cc_ent->cc_valid = 0; - categorize_centry = 0; - if (cc_ent->cc_data) - categorize_centry = FOUND_HOLD_OVER_DM; - -alloc_try: - if (cd == _CD_NOHASH) - CENTRY_BLK(cc_ent) = cblk; - else if ((old_ent = (_sd_cctl_t *) - _sd_hash_insert(cd, cblk, (struct _sd_hash_hd *)cc_ent, - _sd_htable)) != cc_ent) { - - if (SET_CENTRY_INUSE(old_ent)) { - sdbc_centry_inuse++; - - if (nowait) { - cc_ent = NULL; - goto out; - } - - if (locked) - rw_exit(&sdbc_queue_lock); - _sd_cc_wait(cd, cblk, old_ent, CC_INUSE); - if (locked) - rw_enter(&sdbc_queue_lock, RW_WRITER); - goto alloc_try; - } - - /* - * bug 4529671 - * now that we own the centry make sure that - * it is still good. it could have been processed - * by _sd_dealloc_dm() in the window between - * _sd_hash_insert() and SET_CENTRY_INUSE(). - */ - if ((_sd_cctl_t *)_sd_hash_search(cd, cblk, _sd_htable) - != old_ent) { - sdbc_centry_deallocd++; -#ifdef DEBUG - cmn_err(CE_WARN, "!cc_ent %p cd %d cblk %" NSC_SZFMT - " lost to dealloc?! cc_data %p", (void *)old_ent, - cd, cblk, (void *)old_ent->cc_data); -#endif - - CLEAR_CENTRY_INUSE(old_ent); - - if (nowait) { - cc_ent = NULL; - goto out; - } - - goto alloc_try; - } - - if (CC_CD_BLK_MATCH(cd, cblk, old_ent)) { - sdbc_centry_hit++; - old_ent->cc_toflush = 0; - /* _sd_centry_release(cc_ent); */ - cc_ent = old_ent; - categorize_centry = FOUND_IN_HASH_DM; - } else { - sdbc_centry_lost++; - - CLEAR_CENTRY_INUSE(old_ent); - - if (nowait) { - cc_ent = NULL; - goto out; - } - - goto alloc_try; - } - } - - /* - * advance the dmchain pointer, but only if we got the - * cc_ent from the dmchain - */ - if (categorize_centry != FOUND_IN_HASH_DM) { - if (cc_ent->cc_data) - dmc->sab_dmchain = dmc->sab_dmchain->cc_next_dm; - else - dmc->sab_dmchain = dmc->sab_dmchain->cc_next; - } - - - SDTRACE(ST_EXIT|SDF_ENT_ALLOC, cd, 0, BLK_TO_FBA_NUM(cblk), 0, 0); - - mutex_enter(&cc_ent->cc_lock); - if (cc_ent->cc_await_use) { - cv_broadcast(&cc_ent->cc_blkcv); - } - mutex_exit(&cc_ent->cc_lock); - - sdbc_centry_init_dm(cc_ent); - - cc_ent->cc_aging_dm |= categorize_centry; - - out: - - SDTRACE(ST_INFO|SDF_ENT_ALLOC, cd, 0, BLK_TO_FBA_NUM(cblk), 0, 0); - - return (cc_ent); -} - -/* - * sdbc_centry_alloc_end -- tidy up after all cache blocks have been - * allocated for a request - * ARGUMENTS: - * alloc_tok - pointer to allocation token - * RETURNS - * nothing - * USAGE: - * at this time only useful when sdbc_use_dmchain is true. - * if there are cache blocks remaining on the chain then the inuse and - * pageio bits must be cleared (they were set in sdbc_get_dmchain(). - * - */ -static void -sdbc_centry_alloc_end(sdbc_allocbuf_t *alloc_tok) -{ - _sd_cctl_t *next_centry; - _sd_cctl_t *prev_centry; - _sd_queue_t *q; - sdbc_allocbuf_impl_t *dmc = (sdbc_allocbuf_impl_t *)alloc_tok; -#ifdef DEBUG - int chainpull = 0; -#endif - - if (!sdbc_use_dmchain) - return; - - next_centry = dmc->sab_dmchain; - - while (next_centry != NULL) { - CLEAR_CENTRY_PAGEIO(next_centry); - - prev_centry = next_centry; - - if (next_centry->cc_data) { -#ifdef DEBUG - ++chainpull; -#endif - next_centry = next_centry->cc_next_dm; - - /* clear bit after final reference */ - - CLEAR_CENTRY_INUSE(prev_centry); - } else { - next_centry = next_centry->cc_next; - - /* - * a floater from the 0 queue, insert on q. - * - * since this centry is not on any queue - * the inuse bit can be cleared before - * inserting on the q. this is also required - * since sdbc_get_dmchain() does not expect - * inuse bits to be set on 0 queue entry's. - */ - - CLEAR_CENTRY_INUSE(prev_centry); - q = &sdbc_dm_queues[0]; - sdbc_ins_dmqueue_front(q, prev_centry); - } - } - -#ifdef DEBUG - /* compute wastage stats */ - ASSERT((chainpull >= 0) && (chainpull < max_dm_queues)); - if (chainpull) - (*(dmchainpull_table + (dmc->sab_q * - max_dm_queues + chainpull)))++; -#endif - -} - - -/* - * sdbc_alloc_lru - allocate a new cache block from the lru queue - * - * ARGUMENTS: - * cd - Cache descriptor (from a previous open) - * cblk - cache block number. - * stall - pointer to stall count (no blocks avail) - * flag - lock status of sdbc_queue_lock or ALLOC_NOWAIT - * - * RETURNS: - * A cache block or NULL if ALLOC_NOWAIT specified - * - * USAGE: - * This routine allocates a new cache block from the lru. - * If an allocation cannot be done, we block, unless ALLOC_NOWAIT is set. - */ - -static _sd_cctl_t * -sdbc_alloc_lru(int cd, nsc_off_t cblk, int *stall, int flag) -{ - _sd_cctl_t *cc_ent, *old_ent, *ccnext; - _sd_queue_t *q = _SD_LRU_Q; - _sd_cctl_t *qhead = &(q->sq_qhead); - int tries = 0, num_tries; - int categorize_centry; - int locked = flag & ALLOC_LOCKED; - int nowait = flag & ALLOC_NOWAIT; - - if (nowait) { - num_tries = q->sq_inq / 100; /* only search 1% of q */ - - if (num_tries <= 0) /* ensure num_tries is non-zero */ - num_tries = q->sq_inq; - } else - num_tries = _sd_lruq_srch; - - SDTRACE(ST_ENTER|SDF_ENT_ALLOC, cd, 0, BLK_TO_FBA_NUM(cblk), 0, 0); -retry_alloc_centry: - - for (cc_ent = (qhead->cc_next); cc_ent != qhead; cc_ent = ccnext) { - if (--num_tries <= 0) - if (nowait) { - cc_ent = NULL; - goto out; - } else - break; - - ccnext = cc_ent->cc_next; - - if (cc_ent->cc_aging_dm & BAD_CHAIN_DM) - continue; - - if (CENTRY_DIRTY(cc_ent)) - continue; - if (SET_CENTRY_INUSE(cc_ent)) - continue; - - if (CENTRY_DIRTY(cc_ent)) { - sdbc_centry_lost++; - - CLEAR_CENTRY_INUSE(cc_ent); - continue; - } - cc_ent->cc_flag = 0; /* CC_INUSE */ - cc_ent->cc_toflush = 0; - - /* - * Inlined requeue of the LRU. (should match _sd_requeue) - */ - /* was FAST */ - mutex_enter(&q->sq_qlock); -#if defined(_SD_DEBUG) - if (1) { - _sd_cctl_t *cp, *cn, *qp; - cp = cc_ent->cc_prev; - cn = cc_ent->cc_next; - qp = (q->sq_qhead).cc_prev; - if (!_sd_cctl_valid(cc_ent) || - (cp != &(q->sq_qhead) && !_sd_cctl_valid(cp)) || - (cn != &(q->sq_qhead) && !_sd_cctl_valid(cn)) || - !_sd_cctl_valid(qp)) - cmn_err(CE_PANIC, - "_sd_centry_alloc %x prev %x next %x qp %x", - cc_ent, cp, cn, qp); - } -#endif - cc_ent->cc_prev->cc_next = cc_ent->cc_next; - cc_ent->cc_next->cc_prev = cc_ent->cc_prev; - cc_ent->cc_next = qhead; - cc_ent->cc_prev = qhead->cc_prev; - qhead->cc_prev->cc_next = cc_ent; - qhead->cc_prev = cc_ent; - cc_ent->cc_seq = q->sq_seq++; - /* was FAST */ - mutex_exit(&q->sq_qlock); - /* - * End inlined requeue. - */ - -#if defined(_SD_STATS) - if (_sd_hash_delete(cc_ent, _sd_htable) == 0) - SDTRACE(SDF_REPLACE, - CENTRY_CD(cc_ent), cc_ent->cc_hits, - BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)), - nsc_lbolt(), cc_ent->cc_creat); - cc_ent->cc_creat = nsc_lbolt(); - cc_ent->cc_hits = 0; -#else -#if defined(_SD_DEBUG) - if (_sd_hash_delete(cc_ent, _sd_htable) == 0) { - SDTRACE(SDF_REPLACE|ST_DL, - CENTRY_CD(cc_ent), - cc_ent->cc_valid, - BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)), - cd, BLK_TO_FBA_NUM(cblk)); - if (cc_ent->cc_await_use || - ((cd == CENTRY_CD(cc_ent)) && - (cblk == CENTRY_BLK(cc_ent)))) - DATA_LOG(SDF_REPLACE|ST_DL, cc_ent, 0, - BLK_FBAS); - } -#else - (void) _sd_hash_delete((struct _sd_hash_hd *)cc_ent, - _sd_htable); -#endif -#endif - cc_ent->cc_creat = nsc_lbolt(); - cc_ent->cc_hits = 0; - - cc_ent->cc_valid = 0; - categorize_centry = 0; - if (cc_ent->cc_data) - categorize_centry = FOUND_HOLD_OVER_DM; - - alloc_try: - if (cd == _CD_NOHASH) - CENTRY_BLK(cc_ent) = cblk; - else if ((old_ent = (_sd_cctl_t *) - _sd_hash_insert(cd, cblk, (struct _sd_hash_hd *)cc_ent, - _sd_htable)) != cc_ent) { - - if (SET_CENTRY_INUSE(old_ent)) { - sdbc_centry_inuse++; - - if (nowait) { - _sd_centry_release(cc_ent); - cc_ent = NULL; - goto out; - } - - if (locked) - rw_exit(&sdbc_queue_lock); - _sd_cc_wait(cd, cblk, old_ent, CC_INUSE); - if (locked) - rw_enter(&sdbc_queue_lock, RW_WRITER); - goto alloc_try; - } - - /* - * bug 4529671 - * now that we own the centry make sure that - * it is still good. it could have been processed - * by _sd_dealloc_dm() in the window between - * _sd_hash_insert() and SET_CENTRY_INUSE(). - */ - if ((_sd_cctl_t *) - _sd_hash_search(cd, cblk, _sd_htable) != old_ent) { - sdbc_centry_deallocd++; -#ifdef DEBUG - cmn_err(CE_WARN, "!cc_ent %p cd %d cblk %" - NSC_SZFMT " lost to dealloc?! cc_data %p", - (void *)old_ent, cd, cblk, - (void *)old_ent->cc_data); -#endif - - CLEAR_CENTRY_INUSE(old_ent); - - if (nowait) { - _sd_centry_release(cc_ent); - cc_ent = NULL; - goto out; - } - - goto alloc_try; - } - - if (CC_CD_BLK_MATCH(cd, cblk, old_ent)) { - sdbc_centry_hit++; - old_ent->cc_toflush = 0; - _sd_centry_release(cc_ent); - cc_ent = old_ent; - categorize_centry = FOUND_IN_HASH_DM; - } else { - sdbc_centry_lost++; - - CLEAR_CENTRY_INUSE(old_ent); - - if (nowait) { - _sd_centry_release(cc_ent); - cc_ent = NULL; - goto out; - } - - goto alloc_try; - } - } - - SDTRACE(ST_EXIT|SDF_ENT_ALLOC, cd, tries, - BLK_TO_FBA_NUM(cblk), 0, 0); - - if (cc_ent->cc_await_use) { - mutex_enter(&cc_ent->cc_lock); - cv_broadcast(&cc_ent->cc_blkcv); - mutex_exit(&cc_ent->cc_lock); - } - - sdbc_centry_init_dm(cc_ent); - - cc_ent->cc_aging_dm |= categorize_centry; - - out: - return (cc_ent); - } - - SDTRACE(ST_INFO|SDF_ENT_ALLOC, cd, ++tries, BLK_TO_FBA_NUM(cblk), 0, 0); - - delay(drv_usectohz(20000)); - (void) (*stall)++; - num_tries = _sd_lruq_srch; - goto retry_alloc_centry; -} - -/* - * sdbc_centry_init_dm - setup the cache block for dynamic memory allocation - * - * ARGUMENTS: - * centry - Cache block. - * - * RETURNS: - * NONE - * - * USAGE: - * This routine is the central point in which cache entry blocks are setup - */ -static void -sdbc_centry_init_dm(_sd_cctl_t *centry) -{ - - /* an entry already setup - don't touch simply refresh age */ - if (centry->cc_data) { - centry->cc_aging_dm &= ~(FINAL_AGING_DM); - - DTRACE_PROBE1(sdbc_centry_init_dm_end, - char *, centry->cc_data); - return; - } - - centry->cc_aging_dm &= ~(FINAL_AGING_DM | CATAGORY_ENTRY_DM); - - if (centry->cc_head_dm || centry->cc_next_dm) - cmn_err(cmn_level, "!sdbc(sdbc_centry_init_dm): " - "non-zero mem chain in ccent %p", (void *)centry); - - centry->cc_head_dm = 0; - - if (!sdbc_use_dmchain) - centry->cc_next_dm = 0; - - centry->cc_data = 0; - -} - -/* - * sdbc_centry_memalloc_dm - * - * Actually allocate the cache memory, storing it in the cc_data field for - * the cctl - * - * ARGS: - * centry: cache control block for which to allocate the memory - * alloc_request: number of bytes to allocate - * flag: if called with ALLOC_NOWAIT, caller must check for non-zero return - * - * RETURNS: - * 0 on success - * non-zero on error - */ -static int -sdbc_centry_memalloc_dm(_sd_cctl_t *centry, int alloc_request, int flag) -{ - int cblocks; - _sd_queue_t *newq; - int sleep; - sleep = (flag & ALLOC_NOWAIT) ? KM_NOSLEEP : KM_SLEEP; - - if (!centry->cc_data && (alloc_request > 0)) { - /* host or other */ - dynmem_processing_dm.alloc_ct++; - centry->cc_data = (unsigned char *) - kmem_alloc((size_t)centry->cc_alloc_size_dm, sleep); - - - if (sdbc_use_dmchain) { - cblocks = centry->cc_alloc_size_dm >> _sd_cblock_shift; - newq = &sdbc_dm_queues[cblocks]; - - /* set the dmqueue index */ - centry->cc_cblocks = cblocks; - - /* put on appropriate queue */ - sdbc_ins_dmqueue_back(newq, centry); - } - - /* - * for KM_NOSLEEP (should never happen with KM_SLEEP) - */ - if (!centry->cc_data) - return (LOW_RESOURCES_DM); - centry->cc_head_dm = centry; - centry->cc_alloc_ct_dm++; - } - - return (0); -} - -/* - * _sd_centry_release - release a cache block - * - * ARGUMENTS: - * centry - Cache block. - * - * RETURNS: - * NONE - * - * USAGE: - * This routine frees up a cache block. It also frees up a write - * block if allocated and its valid to release it. - */ - -void -_sd_centry_release(_sd_cctl_t *centry) -{ - ss_centry_info_t *wctl; - - SDTRACE(ST_ENTER|SDF_ENT_FREE, CENTRY_CD(centry), 0, - BLK_TO_FBA_NUM(CENTRY_BLK(centry)), 0, 0); - - CLEAR_CENTRY_PAGEIO(centry); - - if ((wctl = centry->cc_write) != 0) { - /* was FAST */ - mutex_enter(¢ry->cc_lock); - if (CENTRY_DIRTY(centry)) - wctl = NULL; - else { - centry->cc_write = NULL; - centry->cc_flag &= ~(CC_PINNABLE); - } - /* was FAST */ - mutex_exit(¢ry->cc_lock); - if (wctl) { - wctl->sc_dirty = 0; - SSOP_SETCENTRY(sdbc_safestore, wctl); - SSOP_DEALLOCRESOURCE(sdbc_safestore, wctl->sc_res); - } - } - - if (!(centry->cc_aging_dm & BAD_CHAIN_DM)) { - if (sdbc_use_dmchain) { - if (centry->cc_alloc_size_dm) { - - /* see if this can be queued to head */ - if (CENTRY_QHEAD(centry)) { - sdbc_requeue_head_dm_try(centry); - } else { - int qidx; - _sd_queue_t *q; - - qidx = centry->cc_cblocks; - q = &sdbc_dm_queues[qidx]; - - if (_sd_lru_reinsert(q, centry)) { - sdbc_requeue_dmchain(q, - centry, 1, 1); - } - } - } else { - /* - * Fix for bug 4949134: - * If an internal block is marked with CC_QHEAD - * but the HOST block is not, the chain will - * never age properly, and will never be made - * available. Only the HOST of the dmchain is - * checked for CC_QHEAD, so clearing an internal - * block indiscriminately (as is being done - * here) does no damage. - * - * The same result could instead be achieved by - * not setting the CC_QHEAD flag in the first - * place, if the block is an internal dmchain - * block, and if it is found in the hash table. - * The current solution was chosen since it is - * the least intrusive. - */ - centry->cc_flag &= ~CC_QHEAD; - } - } else { - if (CENTRY_QHEAD(centry)) { - if (!CENTRY_DIRTY(centry)) - _sd_requeue_head(centry); - } else if (_sd_lru_reinsert(_SD_LRU_Q, centry)) - _sd_requeue(centry); - } - } - - SDTRACE(ST_EXIT|SDF_ENT_FREE, CENTRY_CD(centry), 0, - BLK_TO_FBA_NUM(CENTRY_BLK(centry)), 0, 0); - - /* only clear inuse after final reference to centry */ - - CLEAR_CENTRY_INUSE(centry); -} - - -/* - * lookup to centry info associated with safestore resource - * return pointer to the centry info structure - */ -ss_centry_info_t * -sdbc_get_cinfo_byres(ss_resource_t *res) -{ - ss_centry_info_t *cinfo; - ss_centry_info_t *cend; - int found = 0; - - ASSERT(res != NULL); - - if (res == NULL) - return (NULL); - - cinfo = _sdbc_gl_centry_info; - cend = _sdbc_gl_centry_info + - (_sdbc_gl_centry_info_size / sizeof (ss_centry_info_t)) - 1; - - for (; cinfo <= cend; ++cinfo) - if (cinfo->sc_res == res) { - ++found; - break; - } - - if (!found) - cinfo = NULL; /* bad */ - - return (cinfo); -} - -/* - * _sd_alloc_write - Allocate a write block (for remote mirroring) - * and set centry->cc_write - * - * ARGUMENTS: - * centry - Head of Cache chain - * stall - pointer to stall count (no blocks avail) - * - * RETURNS: - * 0 - and sets cc_write for all entries when write contl block obtained. - * -1 - if a write control block could not be obtained. - */ - -int -_sd_alloc_write(_sd_cctl_t *centry, int *stall) -{ - - ss_resourcelist_t *reslist; - ss_resourcelist_t *savereslist; - ss_resource_t *res; - _sd_cctl_t *ce; - int err; - int need; - - - need = 0; - - for (ce = centry; ce; ce = ce->cc_chain) { - if (!(ce->cc_write)) - need++; - } - - if (!need) - return (0); - - if ((SSOP_ALLOCRESOURCE(sdbc_safestore, need, stall, &reslist)) - == SS_OK) { - savereslist = reslist; - for (ce = centry; ce; ce = ce->cc_chain) { - if (ce->cc_write) - continue; - err = SSOP_GETRESOURCE(sdbc_safestore, &reslist, &res); - if (err == SS_OK) - ce->cc_write = sdbc_get_cinfo_byres(res); - - ASSERT(err == SS_OK); /* panic if DEBUG on */ - ASSERT(ce->cc_write != NULL); - - /* - * this is bad and should not happen. - * we use the saved reslist to cleanup - * and return. - */ - if ((err != SS_OK) || !ce->cc_write) { - - cmn_err(CE_WARN, "!_sd_alloc_write: " - "bad resource list 0x%p" - "changing to forced write thru mode", - (void *)savereslist); - - (void) _sd_set_node_hint(NSC_FORCED_WRTHRU); - - while (SSOP_GETRESOURCE(sdbc_safestore, - &savereslist, &res) == SS_OK) { - - SSOP_DEALLOCRESOURCE(sdbc_safestore, - res); - } - - return (-1); - - } - - } - return (0); - } - - /* no safestore resources available. do sync write */ - _sd_unblock(&_sd_flush_cv); - return (-1); -} - -/* - * _sd_read - Interface call to do read. - * - * ARGUMENTS: - * handle - handle allocated earlier on. - * fba_pos - disk block number to read from. - * fba_len - length in fbas. - * flag - flag: (NSC_NOBLOCK for async io) - * - * RETURNS: - * errno if return > 0 - * NSC_DONE or NSC_PENDING otherwise. - * - * USAGE: - * This routine checks if the request is valid and calls the underlying - * doread routine (also called by alloc_buf) - */ - -int -_sd_read(_sd_buf_handle_t *handle, nsc_off_t fba_pos, nsc_size_t fba_len, - int flag) -{ - sdbc_cblk_fba_t st_cblk_len; /* FBA len of starting cache block */ - sdbc_cblk_fba_t end_cblk_len; /* FBA len of ending cache block */ - sdbc_cblk_fba_t st_cblk_off; /* FBA offset into starting cblock */ - _sd_cctl_t *cc_ent = NULL; - nsc_size_t fba_orig_len = fba_len; - int ret; - int cd = HANDLE_CD(handle); - - if (_sdbc_shutdown_in_progress || (handle->bh_flag & NSC_ABUF)) { - ret = EIO; - goto out; - } - - -#if !defined(_SD_NOCHECKS) - if (!_SD_HANDLE_ACTIVE(handle)) { - cmn_err(CE_WARN, "!sdbc(_sd_read) handle %p not active", - (void *)handle); - ret = EINVAL; - goto out; - } - ASSERT_HANDLE_LIMITS(handle, fba_pos, fba_len); -#endif - if (fba_len == 0) { - ret = NSC_DONE; - goto out; - } - - KSTAT_RUNQ_ENTER(cd); - - st_cblk_off = BLK_FBA_OFF(fba_pos); - st_cblk_len = BLK_FBAS - st_cblk_off; - if ((nsc_size_t)st_cblk_len >= fba_len) { - end_cblk_len = 0; - st_cblk_len = (sdbc_cblk_fba_t)fba_len; - } else { - end_cblk_len = BLK_FBA_OFF(fba_pos + fba_len); - } - - cc_ent = handle->bh_centry; - while (CENTRY_BLK(cc_ent) != FBA_TO_BLK_NUM(fba_pos)) - cc_ent = cc_ent->cc_chain; - - if (!SDBC_VALID_BITS(st_cblk_off, st_cblk_len, cc_ent)) - goto need_io; - DATA_LOG(SDF_RD, cc_ent, st_cblk_off, st_cblk_len); - - DTRACE_PROBE4(_sd_read_data1, uint64_t, - (uint64_t)(BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)) + st_cblk_off), - uint64_t, (uint64_t)st_cblk_len, char *, - *(int64_t *)(cc_ent->cc_data + FBA_SIZE(st_cblk_off)), - char *, *(int64_t *)(cc_ent->cc_data + - FBA_SIZE(st_cblk_off + st_cblk_len) - 8)); - - fba_pos += st_cblk_len; - fba_len -= st_cblk_len; - cc_ent = cc_ent->cc_chain; - - while (fba_len > (nsc_size_t)end_cblk_len) { - if (!FULLY_VALID(cc_ent)) - goto need_io; - DATA_LOG(SDF_RD, cc_ent, 0, BLK_FBAS); - - DTRACE_PROBE4(_sd_read_data2, uint64_t, - (uint64_t)BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)), - uint64_t, (uint64_t)BLK_FBAS, - char *, *(int64_t *)(cc_ent->cc_data), - char *, *(int64_t *)(cc_ent->cc_data + - FBA_SIZE(BLK_FBAS) - 8)); - - fba_pos += BLK_FBAS; - fba_len -= BLK_FBAS; - cc_ent = cc_ent->cc_chain; - } - if (fba_len) { - if (!SDBC_VALID_BITS(0, end_cblk_len, cc_ent)) - goto need_io; - DATA_LOG(SDF_RD, cc_ent, 0, end_cblk_len); - - DTRACE_PROBE4(_sd_read_data3, uint64_t, - (uint64_t)BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)), - uint64_t, (uint64_t)end_cblk_len, - char *, *(int64_t *)(cc_ent->cc_data), - char *, *(int64_t *)(cc_ent->cc_data + - FBA_SIZE(end_cblk_len) - 8)); - } - - CACHE_FBA_READ(handle->bh_cd, fba_orig_len); - CACHE_READ_HIT; - - FBA_READ_IO_KSTATS(handle->bh_cd, FBA_SIZE(fba_orig_len)); - - ret = NSC_HIT; - goto stats_exit; -need_io: - _SD_DISCONNECT_CALLBACK(handle); - - ret = _sd_doread(handle, cc_ent, fba_pos, fba_len, flag); - -stats_exit: - KSTAT_RUNQ_EXIT(cd); -out: - return (ret); -} - - -/* - * sdbc_doread_prefetch - read ahead one cache block - * - * ARGUMENTS: - * cc_ent - cache entry - * fba_pos - disk block number to read from - * fba_len - length in fbas. - * - * RETURNS: - * number of fbas, if any, that are to be read beyond (fba_pos + fba_len) - * - * USAGE: - * if readahead is to be done allocate a cache block and place - * on the cc_chain of cc_ent - */ -static int -sdbc_doread_prefetch(_sd_cctl_t *cc_ent, nsc_off_t fba_pos, nsc_size_t fba_len) -{ - nsc_off_t st_cblk = FBA_TO_BLK_NUM(fba_pos); - nsc_off_t next_cblk = FBA_TO_BLK_NUM(fba_pos + BLK_FBAS); - nsc_size_t filesize; - int fba_count = 0; /* number of fbas to prefetch */ - _sd_cctl_t *cc_ra; /* the read ahead cache entry */ - int cd = CENTRY_CD(cc_ent); - nsc_size_t vol_fill; - - filesize = _sd_cache_files[cd].cd_info->sh_filesize; - vol_fill = filesize - (fba_pos + fba_len); - - /* readahead only for small reads */ - if ((fba_len <= FBA_LEN(CACHE_BLOCK_SIZE)) && (fba_pos != 0) && - (vol_fill > 0)) { - - /* - * if prev block is in cache and next block is not, - * then read ahead one block - */ - if (_sd_hash_search(cd, st_cblk - 1, _sd_htable)) { - if (!_sd_hash_search(cd, next_cblk, _sd_htable)) { - - cc_ra = sdbc_centry_alloc_blks - (cd, next_cblk, 1, ALLOC_NOWAIT); - if (cc_ra) { - /* if in cache don't readahead */ - if (cc_ra->cc_aging_dm & - HASH_ENTRY_DM) { - ++sdbc_ra_hash; - _sd_centry_release(cc_ra); - } else { - cc_ent->cc_chain = cc_ra; - cc_ra->cc_chain = 0; - fba_count = - (vol_fill > - (nsc_size_t)BLK_FBAS) ? - BLK_FBAS : (int)vol_fill; - /* - * indicate implicit prefetch - * and mark for release in - * _sd_read_complete() - */ - cc_ra->cc_aging_dm |= - (PREFETCH_BUF_I | - PREFETCH_BUF_IR); - } - } else { - ++sdbc_ra_none; - } - } - } - - } - - return (fba_count); -} - -/* - * _sd_doread - Check if blocks in cache. If not completely true, do io. - * - * ARGUMENTS: - * handle - handle allocated earlier on. - * fba_pos - disk block number to read from. - * fba_len - length in fbas. - * flag - flag: (NSC_NOBLOCK for async io) - * - * RETURNS: - * errno if return > 0 - * NSC_DONE(from disk), or NSC_PENDING otherwise. - * - * Comments: - * It initiates an io and either blocks waiting for the completion - * or return NSC_PENDING, depending on whether the flag bit - * NSC_NOBLOCK is reset or set. - * - */ - - -static int -_sd_doread(_sd_buf_handle_t *handle, _sd_cctl_t *cc_ent, nsc_off_t fba_pos, - nsc_size_t fba_len, int flag) -{ - int cd, err; - nsc_size_t fba_orig_len; /* length in FBA's of the original request */ - nsc_size_t file_len; /* length in bytes of io to be done */ - sdbc_cblk_fba_t st_cblk_len; /* FBA len of starting cache block */ - sdbc_cblk_fba_t end_cblk_len; /* FBA len of ending cache block */ - sdbc_cblk_fba_t st_cblk_off; /* FBA offset into starting cblock */ - int num_bdl; - _sd_cctl_t *cc_temp; - struct buf *bp; - unsigned int want_bits; - void (*fn)(blind_t, nsc_off_t, nsc_size_t, int); - sdbc_cblk_fba_t end_cblk_fill; /* FBA's to fill to end of last block */ - nsc_size_t vol_end_fill; /* # of FBA's to fill to end of the volume */ - - cd = HANDLE_CD(handle); - SDTRACE(ST_ENTER|SDF_READ, cd, fba_len, fba_pos, flag, 0); - - ASSERT(cd >= 0); - if (_sd_cache_files[cd].cd_info->sh_failed) { - SDTRACE(ST_EXIT|SDF_READ, cd, fba_len, fba_pos, flag, EIO); - return (EIO); - } - - /* - * adjust the position and length so that the entire cache - * block is read in - */ - - /* first, adjust to beginning of cache block */ - - fba_len += BLK_FBA_OFF(fba_pos); /* add start offset to length */ - fba_pos &= ~BLK_FBA_MASK; /* move position back to start of block */ - - /* compute fill to end of cache block */ - end_cblk_fill = (BLK_FBAS - 1) - ((fba_len - 1) % BLK_FBAS); - vol_end_fill = _sd_cache_files[(cd)].cd_info->sh_filesize - - (fba_pos + fba_len); - - /* fill to lesser of cache block or end of volume */ - fba_len += ((nsc_size_t)end_cblk_fill < vol_end_fill) ? end_cblk_fill : - vol_end_fill; - - DTRACE_PROBE2(_sd_doread_rfill, nsc_off_t, fba_pos, - nsc_size_t, fba_len); - - - /* for small reads do 1-block readahead if previous block is in cache */ - if (sdbc_prefetch1) - fba_len += sdbc_doread_prefetch(cc_ent, fba_pos, fba_len); - - fba_orig_len = fba_len; - st_cblk_off = BLK_FBA_OFF(fba_pos); - st_cblk_len = BLK_FBAS - st_cblk_off; - if ((nsc_size_t)st_cblk_len >= fba_len) { - end_cblk_len = 0; - st_cblk_len = (sdbc_cblk_fba_t)fba_len; - } else { - end_cblk_len = BLK_FBA_OFF(fba_pos + fba_len); - } - - cc_temp = cc_ent; - num_bdl = 0; - while (cc_temp) { - num_bdl += (SDBC_LOOKUP_IOCOUNT(CENTRY_DIRTY(cc_temp))); - cc_temp = cc_temp->cc_chain; - } - bp = sd_alloc_iob(_sd_cache_files[cd].cd_crdev, - fba_pos, num_bdl, B_READ); - if (bp == NULL) { - SDTRACE(ST_EXIT|SDF_READ, cd, fba_len, fba_pos, flag, E2BIG); - return (E2BIG); - } - - want_bits = SDBC_GET_BITS(st_cblk_off, st_cblk_len); - if (want_bits & CENTRY_DIRTY(cc_ent)) - _sd_ccent_rd(cc_ent, want_bits, bp); - else { - sd_add_fba(bp, &cc_ent->cc_addr, st_cblk_off, st_cblk_len); - } - file_len = FBA_SIZE(st_cblk_len); - cc_ent = cc_ent->cc_chain; - fba_len -= st_cblk_len; - - while (fba_len > (nsc_size_t)end_cblk_len) { - if (CENTRY_DIRTY(cc_ent)) - _sd_ccent_rd(cc_ent, (uint_t)BLK_FBA_BITS, bp); - else { - sd_add_fba(bp, &cc_ent->cc_addr, 0, BLK_FBAS); - } - file_len += CACHE_BLOCK_SIZE; - cc_ent = cc_ent->cc_chain; - fba_len -= BLK_FBAS; - } - - if (fba_len) { - want_bits = SDBC_GET_BITS(0, end_cblk_len); - if (want_bits & CENTRY_DIRTY(cc_ent)) - _sd_ccent_rd(cc_ent, want_bits, bp); - else { - sd_add_fba(bp, &cc_ent->cc_addr, 0, end_cblk_len); - } - file_len += FBA_SIZE(end_cblk_len); - } - - CACHE_READ_MISS; - FBA_READ_IO_KSTATS(cd, file_len); - - DISK_FBA_READ(cd, FBA_NUM(file_len)); - - fn = (handle->bh_flag & NSC_NOBLOCK) ? _sd_async_read_ea : NULL; - err = sd_start_io(bp, _sd_cache_files[cd].cd_strategy, fn, handle); - - if (err != NSC_PENDING) { - _sd_read_complete(handle, fba_pos, fba_orig_len, err); - } - - SDTRACE(ST_EXIT|SDF_READ, cd, fba_orig_len, fba_pos, flag, err); - - return (err); -} - - - -/* - * _sd_read_complete - Do whatever is necessary after a read io is done. - * - * ARGUMENTS: - * handle - handle allocated earlier on. - * fba_pos - disk block number to read from. - * fba_len - length in fbas. - * error - error from io if any. - * - * RETURNS: - * NONE. - * - * Comments: - * This routine marks the cache blocks valid if the io completed - * sucessfully. Called from the async end action as well as after - * a synchrnous read completes. - */ - -void -_sd_read_complete(_sd_buf_handle_t *handle, nsc_off_t fba_pos, - nsc_size_t fba_len, int error) -{ - sdbc_cblk_fba_t st_cblk_len; /* FBA len of starting cache block */ - sdbc_cblk_fba_t end_cblk_len; /* FBA len of ending cache block */ - sdbc_cblk_fba_t st_cblk_off; /* FBA offset into starting cblock */ - nsc_size_t cur_fba_len; /* length in FBA's */ - _sd_cctl_t *cc_iocent; - _sd_cctl_t *first_iocent; /* first buffer when processing prefetch */ - - cc_iocent = handle->bh_centry; - - if ((handle->bh_error = error) == 0) { - while (CENTRY_BLK(cc_iocent) != FBA_TO_BLK_NUM(fba_pos)) - cc_iocent = cc_iocent->cc_chain; - - cur_fba_len = fba_len; - st_cblk_off = BLK_FBA_OFF(fba_pos); - st_cblk_len = BLK_FBAS - st_cblk_off; - if ((nsc_size_t)st_cblk_len >= fba_len) { - end_cblk_len = 0; - st_cblk_len = (sdbc_cblk_fba_t)fba_len; - } else { - end_cblk_len = BLK_FBA_OFF(fba_pos + fba_len); - } - - SDBC_SET_VALID_BITS(st_cblk_off, st_cblk_len, cc_iocent); - DATA_LOG(SDF_RDIO, cc_iocent, st_cblk_off, st_cblk_len); - - DTRACE_PROBE4(_sd_read_complete_data1, uint64_t, (uint64_t) - BLK_TO_FBA_NUM(CENTRY_BLK(cc_iocent)) + st_cblk_off, - int, st_cblk_len, char *, - *(int64_t *)(cc_iocent->cc_data + FBA_SIZE(st_cblk_off)), - char *, *(int64_t *)(cc_iocent->cc_data + - FBA_SIZE(st_cblk_off + st_cblk_len) - 8)); - - - first_iocent = cc_iocent; - cc_iocent = cc_iocent->cc_chain; - cur_fba_len -= st_cblk_len; - - while (cur_fba_len > (nsc_size_t)end_cblk_len) { - SET_FULLY_VALID(cc_iocent); - DATA_LOG(SDF_RDIO, cc_iocent, 0, BLK_FBAS); - - DTRACE_PROBE4(_sd_read_complete_data2, uint64_t, - (uint64_t)BLK_TO_FBA_NUM(CENTRY_BLK(cc_iocent)), - int, BLK_FBAS, char *, - *(int64_t *)(cc_iocent->cc_data), char *, - *(int64_t *)(cc_iocent->cc_data + - FBA_SIZE(BLK_FBAS) - 8)); - - /* - * 4755485 release implicit prefetch buffers - * - * the cc_chain of the first buffer must NULL'd - * else _sd_free_buf() will do a double free when - * it traverses the chain. - * - * if a buffer has been marked PREFETCH_BUF_IR then - * it is guaranteed that - * 1. it is the second in a chain of two. - * 2. cur_fba_len is BLK_FBAS. - * 3. end_cblk_len is zero. - * - * because of 1 (and 2) above, we can safely exit the - * while loop via the break statement without - * executing the last two statements. the break - * statement is necessary because it would be unsafe - * to access cc_iocent which could be reallocated - * immediately after the _sd_centry_release(). - */ - if (cc_iocent->cc_aging_dm & PREFETCH_BUF_IR) { - cc_iocent->cc_aging_dm &= ~(PREFETCH_BUF_IR); - _sd_centry_release(cc_iocent); - first_iocent->cc_chain = NULL; - break; - } - - cc_iocent = cc_iocent->cc_chain; - cur_fba_len -= BLK_FBAS; - } - if (end_cblk_len) { - SDBC_SET_VALID_BITS(0, end_cblk_len, cc_iocent); - DATA_LOG(SDF_RDIO, cc_iocent, 0, end_cblk_len); - - DTRACE_PROBE4(_sd_read_complete_data3, uint64_t, - (uint64_t)BLK_TO_FBA_NUM(CENTRY_BLK(cc_iocent)), - int, end_cblk_len, char *, - *(int64_t *)(cc_iocent->cc_data), char *, - *(int64_t *)(cc_iocent->cc_data + - FBA_SIZE(end_cblk_len) - 8)); - } - } - -} - - -/* - * _sd_async_read_ea - End action for async reads. - * - * ARGUMENTS: - * xhandle - handle allocated earlier on (cast to blind_t). - * fba_pos - disk block number read from. - * fba_len - length in fbas. - * error - error from io if any. - * - * RETURNS: - * NONE. - * - * Comments: - * This routine is called at interrupt level when the io is done. - * This is called only when read is asynchronous (NSC_NOBLOCK) - */ - -static void -_sd_async_read_ea(blind_t xhandle, nsc_off_t fba_pos, nsc_size_t fba_len, - int error) -{ - _sd_buf_handle_t *handle = xhandle; - int cd; - - if (error) { - cd = HANDLE_CD(handle); - ASSERT(cd >= 0); - _sd_cache_files[cd].cd_info->sh_failed = 1; - } - SDTRACE(ST_ENTER|SDF_READ_EA, HANDLE_CD(handle), - handle->bh_fba_len, handle->bh_fba_pos, 0, error); - - _sd_read_complete(handle, fba_pos, fba_len, error); - -#if defined(_SD_DEBUG_PATTERN) - check_buf_consistency(handle, "rd"); -#endif - - SDTRACE(ST_EXIT|SDF_READ_EA, HANDLE_CD(handle), - handle->bh_fba_len, handle->bh_fba_pos, 0, 0); - _SD_READ_CALLBACK(handle); -} - - -/* - * _sd_async_write_ea - End action for async writes. - * - * ARGUMENTS: - * xhandle - handle allocated earlier on. (cast to blind_t) - * fba_pos - disk block number written to. - * fba_len - length in fbas. - * error - error from io if any. - * - * RETURNS: - * NONE. - * - * Comments: - * This routine is called at interrupt level when the write io is done. - * This is called only when we are in write-through mode and the write - * call indicated asynchronous callback. (NSC_NOBLOCK) - */ - -/* ARGSUSED */ - -static void -_sd_async_write_ea(blind_t xhandle, nsc_off_t fba_pos, nsc_size_t fba_len, - int error) -{ - _sd_buf_handle_t *handle = xhandle; - handle->bh_error = error; - - if (error) - _sd_cache_files[HANDLE_CD(handle)].cd_info->sh_failed = 1; - - _SD_WRITE_CALLBACK(handle); -} - -/* - * update_dirty - set dirty bits in cache block which is already dirty - * cc_inuse is held, need cc_lock to avoid race with _sd_process_pending - * must check for I/O in-progress and set PEND_DIRTY. - * return previous dirty bits - * [if set _sd_process_pending will re-issue] - */ -static _sd_bitmap_t -update_dirty(_sd_cctl_t *cc_ent, sdbc_cblk_fba_t st_off, sdbc_cblk_fba_t st_len) -{ - _sd_bitmap_t old; - - /* was FAST */ - mutex_enter(&cc_ent->cc_lock); - old = CENTRY_DIRTY(cc_ent); - if (old) { - /* - * If we are writing to an FBA that is still marked dirty, - * record a write cancellation. - */ - if (old & SDBC_GET_BITS(st_off, st_len)) { - CACHE_WRITE_CANCELLATION(CENTRY_CD(cc_ent)); - } - - /* This is a write to a block that was already dirty */ - SDBC_SET_DIRTY(st_off, st_len, cc_ent); - sd_serialize(); - if (CENTRY_IO_INPROGRESS(cc_ent)) - cc_ent->cc_flag |= CC_PEND_DIRTY; - } - /* was FAST */ - mutex_exit(&cc_ent->cc_lock); - return (old); -} - -/* - * _sd_write - Interface call to commit part of handle. - * - * ARGUMENTS: - * handle - handle allocated earlier o. - * fba_pos - disk block number to write to. - * fba_len - length in fbas. - * flag - (NSC_NOBLOCK | NSC_WRTHRU) - * - * RETURNS: - * errno if return > 0 - * NSC_HIT (in cache), NSC_DONE (to disk) or NSC_PENDING otherwise. - * - * Comments: - * This routine checks validity of the handle and then calls the - * sync-write function if this write is determined to be write-through. - * Else, it reflects the data to the write blocks on the mirror node, - * (allocated in alloc_buf). If the cache block is not dirty, it is - * marked dirty and queued up for io processing later on. - * If parts are already dirty but io is not in progress yet, it is - * marked dirty and left alone (it is already in the queue) - * If parts are already dirty but io is in progress, it is marked - * dirty and also a flag is set indicating that this buffer should - * be reprocessed after the io-end-action. - * Attempt is made to coalesce multiple writes into a single list - * for io processing later on. - * - * Issuing of writes may be delayed until the handle is released; - * _sd_queue_write() sets NSC_QUEUE, indicating that dirty bits - * and reflection to mirror have already been done, just queue I/O. - */ - - - -int -_sd_write(_sd_buf_handle_t *handle, nsc_off_t fba_pos, nsc_size_t fba_len, - int flag) -{ - int cd = HANDLE_CD(handle); - int num_queued, ret, queue_only, store_only; - sdbc_cblk_fba_t st_cblk_len; /* FBA len of starting cache block */ - sdbc_cblk_fba_t end_cblk_len; /* FBA len of ending cache block */ - sdbc_cblk_fba_t st_cblk_off; /* FBA offset into starting cblock */ - nsc_size_t cur_fba_len; /* position in disk blocks */ - _sd_cctl_t *cc_ent = NULL; - _sd_cctl_t *cur_chain = NULL, *dirty_next = NULL; - - - if (_sdbc_shutdown_in_progress) { - ret = EIO; - goto out; - } - - - if (!_SD_HANDLE_ACTIVE(handle)) { - SDALERT(SDF_WRITE, - SDT_INV_CD, 0, SDT_INV_BL, handle->bh_flag, 0); - ret = EINVAL; - goto out; - } -#if !defined(_SD_NOCHECKS) - ASSERT_HANDLE_LIMITS(handle, fba_pos, fba_len); - if ((handle->bh_flag & NSC_WRBUF) == 0) { - ret = EINVAL; - goto out; - } -#endif - if (fba_len == 0) { - ret = NSC_DONE; - goto out; - } - - /* - * store_only: don't queue this I/O yet - * queue_only: queue I/O to disk, don't store in mirror node - */ - if (flag & NSC_QUEUE) - queue_only = 1, store_only = 0; - else - if (_SD_DELAY_QUEUE && (fba_len != handle->bh_fba_len)) - queue_only = 0, store_only = 1; - else - queue_only = store_only = 0; - - if (!queue_only && _SD_FORCE_DISCONNECT(fba_len)) - _SD_DISCONNECT_CALLBACK(handle); - - if (_sd_cache_files[cd].cd_info->sh_failed) { - ret = EIO; - goto out; - } - - KSTAT_RUNQ_ENTER(cd); - - SDTRACE(ST_ENTER|SDF_WRITE, cd, fba_len, fba_pos, flag, 0); - -#if defined(_SD_DEBUG_PATTERN) - check_buf_consistency(handle, "wr"); -#endif - - cc_ent = handle->bh_centry; - - while (CENTRY_BLK(cc_ent) != FBA_TO_BLK_NUM(fba_pos)) - cc_ent = cc_ent->cc_chain; - - if (((handle->bh_flag | flag) & _SD_WRTHRU_MASK) || - (!queue_only && _sd_remote_store(cc_ent, fba_pos, fba_len))) { - flag |= NSC_WRTHRU; - - ret = _sd_sync_write(handle, fba_pos, fba_len, flag); - goto stats_exit; - } - - if (store_only) /* enqueue in _sd_free_buf() */ - handle->bh_flag |= NSC_QUEUE; - cur_fba_len = fba_len; - st_cblk_off = BLK_FBA_OFF(fba_pos); - st_cblk_len = BLK_FBAS - st_cblk_off; - if ((nsc_size_t)st_cblk_len >= fba_len) { - end_cblk_len = 0; - st_cblk_len = (sdbc_cblk_fba_t)fba_len; - } else { - end_cblk_len = BLK_FBA_OFF(fba_pos + fba_len); - } - - if (CENTRY_DIRTY(cc_ent) && update_dirty(cc_ent, st_cblk_off, - st_cblk_len)) - goto loop1; - if (store_only) { - SDBC_SET_TOFLUSH(st_cblk_off, st_cblk_len, cc_ent); - goto loop1; - } - SDBC_SET_DIRTY(st_cblk_off, st_cblk_len, cc_ent); - cur_chain = dirty_next = cc_ent; - num_queued = 1; - -loop1: - DATA_LOG(SDF_WR, cc_ent, st_cblk_off, st_cblk_len); - - DTRACE_PROBE4(_sd_write_data1, uint64_t, (uint64_t) - (BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)) + st_cblk_off), - int, st_cblk_len, char *, - *(int64_t *)(cc_ent->cc_data + FBA_SIZE(st_cblk_off)), - char *, *(int64_t *)(cc_ent->cc_data + - FBA_SIZE(st_cblk_off+ st_cblk_len) - 8)); - - cur_fba_len -= st_cblk_len; - cc_ent = cc_ent->cc_chain; - - while (cur_fba_len > (nsc_size_t)end_cblk_len) { - if (CENTRY_DIRTY(cc_ent) && update_dirty(cc_ent, 0, BLK_FBAS)) { - if (cur_chain) { - _sd_enqueue_dirty(cd, cur_chain, dirty_next, - num_queued); - cur_chain = dirty_next = NULL; - } - goto loop2; - } - if (store_only) { - SDBC_SET_TOFLUSH(0, BLK_FBAS, cc_ent); - goto loop2; - } - SDBC_SET_DIRTY(0, BLK_FBAS, cc_ent); - if (dirty_next) { - dirty_next->cc_dirty_next = cc_ent; - dirty_next = cc_ent; - num_queued++; - } else { - cur_chain = dirty_next = cc_ent; - num_queued = 1; - } - loop2: - DATA_LOG(SDF_WR, cc_ent, 0, BLK_FBAS); - - DTRACE_PROBE4(_sd_write_data2, uint64_t, - (uint64_t)(BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent))), - int, BLK_FBAS, char *, *(int64_t *)(cc_ent->cc_data), - char *, *(int64_t *)(cc_ent->cc_data + - FBA_SIZE(BLK_FBAS) - 8)); - - cc_ent = cc_ent->cc_chain; - cur_fba_len -= BLK_FBAS; - } - -#if defined(_SD_DEBUG) - if (cur_fba_len != end_cblk_len) - cmn_err(CE_WARN, "!fba_len %" NSC_SZFMT " end_cblk_len %d in " - "_sd_write", cur_fba_len, end_cblk_len); -#endif - - if (cur_fba_len) { - if (CENTRY_DIRTY(cc_ent) && update_dirty(cc_ent, 0, - end_cblk_len)) { - if (cur_chain) { - _sd_enqueue_dirty(cd, cur_chain, dirty_next, - num_queued); - cur_chain = dirty_next = NULL; - } - goto loop3; - } - if (store_only) { - SDBC_SET_TOFLUSH(0, end_cblk_len, cc_ent); - goto loop3; - } - SDBC_SET_DIRTY(0, end_cblk_len, cc_ent); - if (dirty_next) { - dirty_next->cc_dirty_next = cc_ent; - dirty_next = cc_ent; - num_queued++; - } else { - cur_chain = dirty_next = cc_ent; - num_queued = 1; - } - } -loop3: - if (cur_fba_len) { - DATA_LOG(SDF_WR, cc_ent, 0, end_cblk_len); - - DTRACE_PROBE4(_sd_write_data3, uint64_t, - (uint64_t)(BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent))), - int, end_cblk_len, char *, *(int64_t *)(cc_ent->cc_data), - char *, *(int64_t *)(cc_ent->cc_data + - FBA_SIZE(end_cblk_len) - 8)); - - } - - if (!store_only && cur_chain) { - _sd_enqueue_dirty(cd, cur_chain, dirty_next, num_queued); - } - - if (!queue_only) { - CACHE_FBA_WRITE(cd, fba_len); - CACHE_WRITE_HIT; - - FBA_WRITE_IO_KSTATS(cd, FBA_SIZE(fba_len)); - } - - ret = NSC_HIT; - -stats_exit: - SDTRACE(ST_EXIT|SDF_WRITE, cd, fba_len, fba_pos, flag, ret); - KSTAT_RUNQ_EXIT(cd); -out: - return (ret); -} - - -/* - * _sd_queue_write(handle, fba_pos, fba_len): Queues delayed writes for - * flushing - * - * ARGUMENTS: handle - handle allocated with NSC_WRBUF - * fba_pos - starting fba pos from _sd_alloc_buf() - * fba_len - fba len from _sd_alloc_buf() - * - * USAGE : Called if _SD_DELAY_QUEUE is set. Finds all blocks in the - * handle marked for flushing and queues them to be written in - * optimized (i.e. sequential) order - */ -static void -_sd_queue_write(_sd_buf_handle_t *handle, nsc_off_t fba_pos, nsc_size_t fba_len) -{ - nsc_off_t fba_end; - sdbc_cblk_fba_t sblk, len, dirty; - _sd_cctl_t *cc_ent; - nsc_off_t flush_pos; - int flush_pos_valid = 0; - nsc_size_t flush_len = 0; - - cc_ent = handle->bh_centry; - fba_end = fba_pos + fba_len; - fba_pos = BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)); /* 1st block */ - while (fba_pos < fba_end) { - dirty = cc_ent->cc_toflush; - cc_ent->cc_toflush = 0; - /* - * Full block - */ - if (_SD_BMAP_ISFULL(dirty)) { - if (flush_pos_valid == 0) { - flush_pos_valid = 1; - flush_pos = fba_pos; - } - flush_len += BLK_FBAS; - } - /* - * Partial block - */ - else while (dirty) { - sblk = SDBC_LOOKUP_STPOS(dirty); - len = SDBC_LOOKUP_LEN(dirty); - SDBC_LOOKUP_MODIFY(dirty); - - if (sblk && flush_pos_valid) { - (void) _sd_write(handle, flush_pos, flush_len, - NSC_QUEUE); - flush_pos_valid = 0; - flush_len = 0; - } - if (flush_pos_valid == 0) { - flush_pos_valid = 1; - flush_pos = fba_pos + sblk; - } - flush_len += len; - } - fba_pos += BLK_FBAS; - cc_ent = cc_ent->cc_chain; - /* - * If we find a gap, write out what we've got - */ - if (flush_pos_valid && (flush_pos + flush_len) != fba_pos) { - (void) _sd_write(handle, flush_pos, flush_len, - NSC_QUEUE); - flush_pos_valid = 0; - flush_len = 0; - } - } - if (flush_pos_valid) - (void) _sd_write(handle, flush_pos, flush_len, NSC_QUEUE); -} - - -static int -_sd_remote_store(_sd_cctl_t *cc_ent, nsc_off_t fba_pos, nsc_size_t fba_len) -{ - sdbc_cblk_fba_t st_cblk_len; /* FBA len of starting cache block */ - sdbc_cblk_fba_t end_cblk_len; /* FBA len of ending cache block */ - sdbc_cblk_fba_t st_cblk_off; /* FBA offset into starting cblock */ - ss_resource_t *ss_res; - - if (_sd_nodes_configured <= 2 && _sd_is_mirror_down()) - return (0); - st_cblk_off = BLK_FBA_OFF(fba_pos); - st_cblk_len = BLK_FBAS - st_cblk_off; - if ((nsc_size_t)st_cblk_len >= fba_len) { - end_cblk_len = 0; - st_cblk_len = (sdbc_cblk_fba_t)fba_len; - } else { - end_cblk_len = BLK_FBA_OFF(fba_pos + fba_len); - } - - fba_len -= st_cblk_len; - - ss_res = cc_ent->cc_write->sc_res; - if (SSOP_WRITE_CBLOCK(sdbc_safestore, ss_res, - cc_ent->cc_data + FBA_SIZE(st_cblk_off), FBA_SIZE(st_cblk_len), - FBA_SIZE(st_cblk_off))) { - - cmn_err(CE_WARN, - "!sdbc(_sd_write) safe store failed. Going synchronous"); - SDTRACE(SDF_REFLECT, CENTRY_CD(cc_ent), fba_len, - fba_pos, 0, -1); - return (-1); - } - - cc_ent = cc_ent->cc_chain; - while (fba_len > (nsc_size_t)end_cblk_len) { - fba_len -= BLK_FBAS; - - if (SSOP_WRITE_CBLOCK(sdbc_safestore, ss_res, cc_ent->cc_data, - CACHE_BLOCK_SIZE, 0)) { - - cmn_err(CE_WARN, "!sdbc(_sd_write) safe store failed. " - "Going synchronous"); - SDTRACE(SDF_REFLECT, CENTRY_CD(cc_ent), fba_len, - fba_pos, 0, -1); - return (-1); - } - - cc_ent = cc_ent->cc_chain; - } /* end while */ - - if (fba_len) { - if (SSOP_WRITE_CBLOCK(sdbc_safestore, ss_res, - cc_ent->cc_data, FBA_SIZE(end_cblk_len), 0)) { - - cmn_err(CE_WARN, "!sdbc(_sd_write) nvmem dma failed. " - "Going synchronous"); - SDTRACE(SDF_REFLECT, CENTRY_CD(cc_ent), fba_len, - fba_pos, 0, -1); - return (-1); - } - } - return (0); -} - - -/* - * _sd_sync_write2 - Write-through function. - * - * ARGUMENTS: - * wr_handle - handle into which to write the data. - * wr_st_pos - starting FBA position in wr_handle. - * fba_len - length in fbas. - * flag - NSC_NOBLOCK for async io. - * rd_handle - handle from which to read the data, or NULL. - * rd_st_pos - starting FBA position in rd_handle. - * - * RETURNS: - * errno if return > 0 - * NSC_DONE or NSC_PENDING otherwise. - * - * Comments: - * This routine initiates io of the indicated portion. It returns - * synchronously after io is completed if NSC_NOBLOCK is not set. - * Else NSC_PENDING is returned with a subsequent write callback on - * io completion. - * - * See _sd_copy_direct() for usage when - * (wr_handle != rd_handle && rd_handle != NULL) - */ - -static int -_sd_sync_write2(_sd_buf_handle_t *wr_handle, nsc_off_t wr_st_pos, - nsc_size_t fba_len, int flag, _sd_buf_handle_t *rd_handle, - nsc_off_t rd_st_pos) -{ - void (*fn)(blind_t, nsc_off_t, nsc_size_t, int); - _sd_cctl_t *wr_ent, *rd_ent; - nsc_size_t this_len; - nsc_off_t rd_pos, wr_pos; - nsc_size_t log_bytes; - int cd = HANDLE_CD(wr_handle); - int err; - uint_t dirty; - struct buf *bp; - - LINTUSED(flag); - - _SD_DISCONNECT_CALLBACK(wr_handle); - - if (rd_handle == NULL) { - rd_handle = wr_handle; - rd_st_pos = wr_st_pos; - } - - wr_ent = wr_handle->bh_centry; - while (CENTRY_BLK(wr_ent) != FBA_TO_BLK_NUM(wr_st_pos)) - wr_ent = wr_ent->cc_chain; - - rd_ent = rd_handle->bh_centry; - while (CENTRY_BLK(rd_ent) != FBA_TO_BLK_NUM(rd_st_pos)) - rd_ent = rd_ent->cc_chain; - - bp = sd_alloc_iob(_sd_cache_files[cd].cd_crdev, - wr_st_pos, FBA_TO_BLK_LEN(fba_len) + 2, B_WRITE); - - if (bp == NULL) - return (E2BIG); - - wr_pos = BLK_FBA_OFF(wr_st_pos); - rd_pos = BLK_FBA_OFF(rd_st_pos); - log_bytes = 0; - - do { - this_len = min((BLK_FBAS - rd_pos), (BLK_FBAS - wr_pos)); - - if (this_len > fba_len) - this_len = fba_len; - - /* - * clear dirty bits in the write handle. - */ - - if (CENTRY_DIRTY(wr_ent)) { - mutex_enter(&wr_ent->cc_lock); - - if (CENTRY_DIRTY(wr_ent)) { - if (this_len == (nsc_size_t)BLK_FBAS || - rd_handle != wr_handle) { - /* - * optimization for when we have a - * full cache block, or are doing - * copy_direct (see below). - */ - - wr_ent->cc_write->sc_dirty = 0; - } else { - dirty = wr_ent->cc_write->sc_dirty; - dirty &= ~(SDBC_GET_BITS( - wr_pos, this_len)); - wr_ent->cc_write->sc_dirty = dirty; - } - - SSOP_SETCENTRY(sdbc_safestore, - wr_ent->cc_write); - } - - mutex_exit(&wr_ent->cc_lock); - } - - /* - * update valid bits in the write handle. - */ - - if (rd_handle == wr_handle) { - if (this_len == (nsc_size_t)BLK_FBAS) { - SET_FULLY_VALID(wr_ent); - } else { - SDBC_SET_VALID_BITS(wr_pos, this_len, wr_ent); - } - } else { - /* - * doing copy_direct, so mark the write handle - * as invalid since the data is on disk, but not - * in cache. - */ - wr_ent->cc_valid = 0; - } - - DATA_LOG(SDF_WRSYNC, rd_ent, rd_pos, this_len); - - DTRACE_PROBE4(_sd_sync_write2_data, uint64_t, - (uint64_t)BLK_TO_FBA_NUM(CENTRY_BLK(rd_ent)) + rd_pos, - uint64_t, (uint64_t)this_len, char *, - *(int64_t *)(rd_ent->cc_data + FBA_SIZE(rd_pos)), - char *, *(int64_t *)(rd_ent->cc_data + - FBA_SIZE(rd_pos + this_len) - 8)); - - sd_add_fba(bp, &rd_ent->cc_addr, rd_pos, this_len); - - log_bytes += FBA_SIZE(this_len); - fba_len -= this_len; - - wr_pos += this_len; - if (wr_pos >= (nsc_size_t)BLK_FBAS) { - wr_ent = wr_ent->cc_chain; - wr_pos = 0; - } - - rd_pos += this_len; - if (rd_pos >= (nsc_size_t)BLK_FBAS) { - rd_ent = rd_ent->cc_chain; - rd_pos = 0; - } - - } while (fba_len > 0); - - DISK_FBA_WRITE(cd, FBA_NUM(log_bytes)); - CACHE_WRITE_MISS; - - FBA_WRITE_IO_KSTATS(cd, log_bytes); - - fn = (wr_handle->bh_flag & NSC_NOBLOCK) ? _sd_async_write_ea : NULL; - - err = sd_start_io(bp, _sd_cache_files[cd].cd_strategy, fn, wr_handle); - - if (err != NSC_PENDING) { - DATA_LOG_CHAIN(SDF_WRSYEA, wr_handle->bh_centry, - wr_st_pos, FBA_NUM(log_bytes)); - } - - return (err); -} - - -static int -_sd_sync_write(_sd_buf_handle_t *handle, nsc_off_t fba_pos, nsc_size_t fba_len, - int flag) -{ - return (_sd_sync_write2(handle, fba_pos, fba_len, flag, NULL, 0)); -} - - -/* - * _sd_zero - Interface call to zero out a portion of cache blocks. - * - * ARGUMENTS: - * handle - handle allocated earlier on. - * fba_pos - disk block number to zero from. - * fba_len - length in fbas. - * flag - NSC_NOBLOCK for async io. - * - * RETURNS: - * errno if return > 0 - * NSC_DONE or NSC_PENDING otherwise. - * - * Comments: - * This routine zeroes out the indicated portion of the cache blocks - * and commits the data to disk. - * (See write for more details on the commit) - */ - - -int -_sd_zero(_sd_buf_handle_t *handle, nsc_off_t fba_pos, nsc_size_t fba_len, - int flag) -{ - int cd; - sdbc_cblk_fba_t st_cblk_len; /* FBA len of starting cache block */ - sdbc_cblk_fba_t end_cblk_len; /* FBA len of ending cache block */ - sdbc_cblk_fba_t st_cblk_off; /* FBA offset into starting cblock */ - nsc_size_t cur_fba_len; /* position in disk blocks */ - int ret; - _sd_cctl_t *cc_ent; - - if (_sdbc_shutdown_in_progress) { - DTRACE_PROBE(shutdown); - return (EIO); - } - - if (!_SD_HANDLE_ACTIVE(handle)) { - cmn_err(CE_WARN, "!sdbc(_sd_zero) handle %p not active", - (void *)handle); - - DTRACE_PROBE1(handle_active, int, handle->bh_flag); - - return (EINVAL); - } - ASSERT_HANDLE_LIMITS(handle, fba_pos, fba_len); - if ((handle->bh_flag & NSC_WRBUF) == 0) { - DTRACE_PROBE1(handle_write, int, handle->bh_flag); - return (EINVAL); - } - - if (fba_len == 0) { - DTRACE_PROBE(zero_len); - return (NSC_DONE); - } - - if (_SD_FORCE_DISCONNECT(fba_len)) - _SD_DISCONNECT_CALLBACK(handle); - - cd = HANDLE_CD(handle); - SDTRACE(ST_ENTER|SDF_ZERO, cd, fba_len, fba_pos, flag, 0); - - cc_ent = handle->bh_centry; - while (CENTRY_BLK(cc_ent) != FBA_TO_BLK_NUM(fba_pos)) - cc_ent = cc_ent->cc_chain; - cur_fba_len = fba_len; - st_cblk_off = BLK_FBA_OFF(fba_pos); - st_cblk_len = BLK_FBAS - st_cblk_off; - if ((nsc_size_t)st_cblk_len >= fba_len) { - end_cblk_len = 0; - st_cblk_len = (sdbc_cblk_fba_t)fba_len; - } else { - end_cblk_len = BLK_FBA_OFF(fba_pos + fba_len); - } - - cur_fba_len -= st_cblk_len; - bzero(cc_ent->cc_data + FBA_SIZE(st_cblk_off), FBA_SIZE(st_cblk_len)); - - cc_ent = cc_ent->cc_chain; - while (cur_fba_len > (nsc_size_t)end_cblk_len) { - cur_fba_len -= BLK_FBAS; - bzero(cc_ent->cc_data, CACHE_BLOCK_SIZE); - cc_ent = cc_ent->cc_chain; - } - if (cur_fba_len) { - bzero(cc_ent->cc_data, FBA_SIZE(cur_fba_len)); - } - - ret = _sd_write(handle, fba_pos, fba_len, flag); - SDTRACE(ST_EXIT|SDF_ZERO, cd, fba_len, fba_pos, flag, ret); - - return (ret); -} - - -/* - * _sd_copy - Copies portions of 2 handles. - * - * ARGUMENTS: - * handle1 - handle allocated earlier on. - * handle2 - handle allocated earlier on. - * fba_pos1 - disk block number to read from. - * fba_pos2 - disk block number to write to. - * fba_len - length in fbas. - * - * RETURNS: - * errno if return > 0 - * NSC_DONE otherwise. - * - * Comments: - * This routine copies the 2 handles. - * WARNING: this could put the cache blocks in the destination handle - * in an inconsistent state. (the blocks could be valid in cache, - * but the copy makes the cache different from disk) - * - */ - - -int -_sd_copy(_sd_buf_handle_t *handle1, _sd_buf_handle_t *handle2, - nsc_off_t fba_pos1, nsc_off_t fba_pos2, nsc_size_t fba_len) -{ - sdbc_cblk_fba_t st_cblk_len; /* FBA len of starting cache block */ - sdbc_cblk_fba_t end_cblk_len; /* FBA len of ending cache block */ - sdbc_cblk_fba_t st_cblk_off; /* FBA offset into starting cblock */ - nsc_off_t off1, off2; /* offsets in FBA's into the disk */ - nsc_size_t cur_fba_len; /* position in disk blocks */ - _sd_cctl_t *cc_ent1, *cc_ent2; - - if (_sdbc_shutdown_in_progress) { - DTRACE_PROBE(shutdown); - return (EIO); - } - if (!_SD_HANDLE_ACTIVE(handle1) || !_SD_HANDLE_ACTIVE(handle2)) { - cmn_err(CE_WARN, "!sdbc(_sd_copy) handle %p or %p not active", - (void *)handle1, (void *)handle2); - - DTRACE_PROBE2(handle_active1, int, handle1->bh_flag, - int, handle2->bh_flag); - - return (EINVAL); - } - ASSERT_HANDLE_LIMITS(handle1, fba_pos1, fba_len); - ASSERT_HANDLE_LIMITS(handle2, fba_pos2, fba_len); - - cc_ent1 = handle1->bh_centry; - while (CENTRY_BLK(cc_ent1) != FBA_TO_BLK_NUM(fba_pos1)) - cc_ent1 = cc_ent1->cc_chain; - - cc_ent2 = handle2->bh_centry; - while (CENTRY_BLK(cc_ent2) != FBA_TO_BLK_NUM(fba_pos2)) - cc_ent2 = cc_ent2->cc_chain; - - if (BLK_FBA_OFF(fba_pos1) != BLK_FBA_OFF(fba_pos2)) { - /* Different offsets, do it slowly (per fba) */ - - while (fba_len) { - off1 = FBA_SIZE(BLK_FBA_OFF(fba_pos1)); - off2 = FBA_SIZE(BLK_FBA_OFF(fba_pos2)); - - bcopy(cc_ent1->cc_data+off1, cc_ent2->cc_data+off2, - FBA_SIZE(1)); - - fba_pos1++; - fba_pos2++; - fba_len--; - - if (FBA_TO_BLK_NUM(fba_pos1) != CENTRY_BLK(cc_ent1)) - cc_ent1 = cc_ent1->cc_chain; - if (FBA_TO_BLK_NUM(fba_pos2) != CENTRY_BLK(cc_ent2)) - cc_ent2 = cc_ent2->cc_chain; - } - - DTRACE_PROBE(_sd_copy_end); - return (NSC_DONE); - } - cur_fba_len = fba_len; - st_cblk_off = BLK_FBA_OFF(fba_pos1); - st_cblk_len = BLK_FBAS - st_cblk_off; - if ((nsc_size_t)st_cblk_len >= fba_len) { - end_cblk_len = 0; - st_cblk_len = (sdbc_cblk_fba_t)fba_len; - } else { - end_cblk_len = BLK_FBA_OFF(fba_pos1 + fba_len); - } - - bcopy(cc_ent1->cc_data + FBA_SIZE(st_cblk_off), - cc_ent2->cc_data + FBA_SIZE(st_cblk_off), FBA_SIZE(st_cblk_len)); - cur_fba_len -= st_cblk_len; - cc_ent1 = cc_ent1->cc_chain; - cc_ent2 = cc_ent2->cc_chain; - - while (cur_fba_len > (nsc_size_t)end_cblk_len) { - bcopy(cc_ent1->cc_data, cc_ent2->cc_data, CACHE_BLOCK_SIZE); - cc_ent1 = cc_ent1->cc_chain; - cc_ent2 = cc_ent2->cc_chain; - cur_fba_len -= BLK_FBAS; - } - if (cur_fba_len) { - bcopy(cc_ent1->cc_data, cc_ent2->cc_data, - FBA_SIZE(end_cblk_len)); - } - - return (NSC_DONE); -} - - -/* - * _sd_copy_direct - Copies data from one handle direct to another disk. - * - * ARGUMENTS: - * handle1 - handle to read from - * handle2 - handle to write to - * fba_pos1 - disk block number to read from. - * fba_pos2 - disk block number to write to. - * fba_len - length in fbas. - * - * RETURNS: - * errno if return > 0 - * NSC_DONE otherwise. - * - * Comments: - * This routine copies data from handle1 directly (sync write) - * onto the disk pointed to by handle2. The handle2 is then - * invalidated since the data it contains is now stale compared to - * the disk. - */ - -static int -_sd_copy_direct(_sd_buf_handle_t *handle1, _sd_buf_handle_t *handle2, - nsc_off_t fba_pos1, nsc_off_t fba_pos2, nsc_size_t fba_len) -{ - int rc; - - if (_sdbc_shutdown_in_progress) { - DTRACE_PROBE(shutdown); - return (EIO); - } - - if (!_SD_HANDLE_ACTIVE(handle1) || !_SD_HANDLE_ACTIVE(handle2)) { - cmn_err(CE_WARN, - "!sdbc(_sd_copy_direct) handle %p or %p not active", - (void *)handle1, (void *)handle2); - - DTRACE_PROBE2(handle_active2, int, handle1->bh_flag, - int, handle2->bh_flag); - - return (EINVAL); - } - - ASSERT_HANDLE_LIMITS(handle1, fba_pos1, fba_len); - ASSERT_HANDLE_LIMITS(handle2, fba_pos2, fba_len); - - if ((handle2->bh_flag & NSC_WRITE) == 0) { - cmn_err(CE_WARN, - "!sdbc(_sd_copy_direct) handle2 %p is not writeable", - (void *)handle2); - DTRACE_PROBE1(handle2_write, int, handle2->bh_flag); - return (EINVAL); - } - - rc = _sd_sync_write2(handle2, fba_pos2, fba_len, 0, handle1, fba_pos1); - - return (rc); -} - - -/* - * _sd_enqueue_dirty - Enqueue a list of dirty buffers. - * - * ARGUMENTS: - * cd - cache descriptor. - * chain - pointer to list. - * cc_last - last entry in the chain. - * numq - number of entries in the list. - * - * RETURNS: - * NONE. - * - * Comments: - * This routine queues up the dirty blocks for io processing. - * It uses the cc_last to try to coalesce multiple lists into a - * single list, if consecutive writes are sequential in nature. - */ - -void -_sd_enqueue_dirty(int cd, _sd_cctl_t *chain, _sd_cctl_t *cc_last, int numq) -{ - _sd_cd_info_t *cdi; - _sd_cctl_t *last_ent; - int start_write = 0, maxq = SGIO_MAX; - - ASSERT(cd >= 0); - cdi = &(_sd_cache_files[cd]); -#if defined(_SD_DEBUG) - if (chain->cc_dirty_link) - cmn_err(CE_WARN, "!dirty_link set in enq %x fl %x", - chain->cc_dirty_link, chain->cc_flag); -#endif - - /* was FAST */ - mutex_enter(&(cdi->cd_lock)); - cdi->cd_info->sh_numdirty += numq; - if (cc_last == NULL) - numq = 0; - - if (cdi->cd_dirty_head == NULL) { - cdi->cd_dirty_head = cdi->cd_dirty_tail = chain; - cdi->cd_last_ent = cc_last; - cdi->cd_lastchain_ptr = chain; - cdi->cd_lastchain = numq; - } else { - if ((cc_last) && (last_ent = cdi->cd_last_ent) && - (CENTRY_BLK(chain) == (CENTRY_BLK(last_ent)+1)) && - (SDBC_DIRTY_NEIGHBORS(last_ent, chain)) && - (cdi->cd_lastchain + numq < maxq)) { - cdi->cd_last_ent->cc_dirty_next = chain; - cdi->cd_last_ent = cc_last; - cdi->cd_lastchain += numq; - } else { - cdi->cd_dirty_tail->cc_dirty_link = chain; - cdi->cd_dirty_tail = chain; - cdi->cd_last_ent = cc_last; - cdi->cd_lastchain_ptr = chain; - cdi->cd_lastchain = numq; - start_write = 1; - } - } - /* was FAST */ - mutex_exit(&(cdi->cd_lock)); - if (start_write) - (void) _SD_CD_WRITER(cd); -} - -/* - * _sd_enqueue_dirty_chain - Enqueue a chain of a list of dirty buffers. - * - * ARGUMENTS: - * cd - cache descriptor. - * chain_first - first list in this chain. - * chain_last - last list in this chain. - * numq - number of entries being queue (total of all lists) - * - * RETURNS: - * NONE. - * - * Comments: - * This routine is called from the processing after io completions. - * If the buffers are still dirty, they are queued up in one shot. - */ - -void -_sd_enqueue_dirty_chain(int cd, _sd_cctl_t *chain_first, - _sd_cctl_t *chain_last, int numq) -{ - _sd_cd_info_t *cdi; - - ASSERT(cd >= 0); - cdi = &(_sd_cache_files[cd]); - if (chain_last->cc_dirty_link) - cmn_err(CE_PANIC, - "!_sd_enqueue_dirty_chain: chain_last %p dirty_link %p", - (void *)chain_last, (void *)chain_last->cc_dirty_link); - /* was FAST */ - mutex_enter(&(cdi->cd_lock)); - cdi->cd_last_ent = NULL; - cdi->cd_lastchain_ptr = NULL; - cdi->cd_lastchain = 0; - - cdi->cd_info->sh_numdirty += numq; - if (cdi->cd_dirty_head == NULL) { - cdi->cd_dirty_head = chain_first; - cdi->cd_dirty_tail = chain_last; - } else { - cdi->cd_dirty_tail->cc_dirty_link = chain_first; - cdi->cd_dirty_tail = chain_last; - } - /* was FAST */ - mutex_exit(&(cdi->cd_lock)); -} - -/* - * Convert the 64 bit statistic structure to 32bit version. - * Possibly losing information when cache is > 4gb. Ha! - * - * NOTE: this code isn't really MT ready since the copied to struct - * is static. However the race is pretty benign and isn't a whole - * lot worse than the vanilla version which copies data to user - * space from kernel structures that can be changing under it too. - * We can't use a local stack structure since the data size is - * 70k or so and kernel stacks are tiny (8k). - */ -#ifndef _MULTI_DATAMODEL -/* ARGSUSED */ -#endif -static int -convert_stats(_sd_stats32_t *uptr) -{ -#ifndef _MULTI_DATAMODEL - return (SDBC_EMODELCONVERT); -#else - int rc = 0; - - /* - * This could be done in less code with bcopy type operations - * but this is simpler to follow and easier to change if - * the structures change. - */ - - _sd_cache_stats32->net_dirty = _sd_cache_stats->net_dirty; - _sd_cache_stats32->net_pending = _sd_cache_stats->net_pending; - _sd_cache_stats32->net_free = _sd_cache_stats->net_free; - _sd_cache_stats32->st_count = _sd_cache_stats->st_count; - _sd_cache_stats32->st_loc_count = _sd_cache_stats->st_loc_count; - _sd_cache_stats32->st_rdhits = _sd_cache_stats->st_rdhits; - _sd_cache_stats32->st_rdmiss = _sd_cache_stats->st_rdmiss; - _sd_cache_stats32->st_wrhits = _sd_cache_stats->st_wrhits; - _sd_cache_stats32->st_wrmiss = _sd_cache_stats->st_wrmiss; - _sd_cache_stats32->st_blksize = _sd_cache_stats->st_blksize; - - _sd_cache_stats32->st_lru_blocks = _sd_cache_stats->st_lru_blocks; - _sd_cache_stats32->st_lru_noreq = _sd_cache_stats->st_lru_noreq; - _sd_cache_stats32->st_lru_req = _sd_cache_stats->st_lru_req; - - _sd_cache_stats32->st_wlru_inq = _sd_cache_stats->st_wlru_inq; - - _sd_cache_stats32->st_cachesize = _sd_cache_stats->st_cachesize; - _sd_cache_stats32->st_numblocks = _sd_cache_stats->st_numblocks; - _sd_cache_stats32->st_wrcancelns = _sd_cache_stats->st_wrcancelns; - _sd_cache_stats32->st_destaged = _sd_cache_stats->st_destaged; - - /* - * bcopy the shared stats which has nothing that needs conversion - * in them - */ - - bcopy(_sd_cache_stats->st_shared, _sd_cache_stats32->st_shared, - sizeof (_sd_shared_t) * sdbc_max_devs); - - if (copyout(_sd_cache_stats32, uptr, sizeof (_sd_stats32_t) + - (sdbc_max_devs - 1) * sizeof (_sd_shared_t))) - rc = EFAULT; - - return (rc); -#endif /* _MULTI_DATAMODEL */ -} - - -int -_sd_get_stats(_sd_stats_t *uptr, int convert_32) -{ - int rc = 0; - - if (_sd_cache_stats == NULL) { - static _sd_stats_t dummy; -#ifdef _MULTI_DATAMODEL - static _sd_stats32_t dummy32; -#endif - - if (convert_32) { -#ifdef _MULTI_DATAMODEL - if (copyout(&dummy32, uptr, sizeof (_sd_stats32_t))) - rc = EFAULT; -#else - rc = SDBC_EMODELCONVERT; -#endif - } else if (copyout(&dummy, uptr, sizeof (_sd_stats_t))) - rc = EFAULT; - return (rc); - } - - _sd_cache_stats->st_lru_blocks = _sd_lru_q.sq_inq; - _sd_cache_stats->st_lru_noreq = _sd_lru_q.sq_noreq_stat; - _sd_cache_stats->st_lru_req = _sd_lru_q.sq_req_stat; - - if (sdbc_safestore) { - ssioc_stats_t ss_stats; - - if (SSOP_CTL(sdbc_safestore, SSIOC_STATS, - (uintptr_t)&ss_stats) == 0) - _sd_cache_stats->st_wlru_inq = ss_stats.wq_inq; - else - _sd_cache_stats->st_wlru_inq = 0; - } - - if (convert_32) - rc = convert_stats((_sd_stats32_t *)uptr); - else if (copyout(_sd_cache_stats, uptr, - sizeof (_sd_stats_t) + (sdbc_max_devs - 1) * sizeof (_sd_shared_t))) - rc = EFAULT; - - return (rc); -} - - -int -_sd_set_hint(int cd, uint_t hint) -{ - int ret = 0; - if (FILE_OPENED(cd)) { - SDTRACE(ST_ENTER|SDF_HINT, cd, 1, SDT_INV_BL, hint, 0); - _sd_cache_files[cd].cd_hint |= (hint & _SD_HINT_MASK); - SDTRACE(ST_EXIT|SDF_HINT, cd, 1, SDT_INV_BL, hint, ret); - } else - ret = EINVAL; - - return (ret); -} - - - -int -_sd_clear_hint(int cd, uint_t hint) -{ - int ret = 0; - if (FILE_OPENED(cd)) { - SDTRACE(ST_ENTER|SDF_HINT, cd, 2, SDT_INV_BL, hint, 0); - _sd_cache_files[cd].cd_hint &= ~(hint & _SD_HINT_MASK); - SDTRACE(ST_EXIT|SDF_HINT, cd, 2, SDT_INV_BL, hint, ret); - } else - ret = EINVAL; - - return (ret); -} - - -int -_sd_get_cd_hint(int cd, uint_t *hint) -{ - *hint = 0; - if (FILE_OPENED(cd)) { - *hint = _sd_cache_files[cd].cd_hint; - return (0); - } else - return (EINVAL); -} - -static int -_sd_node_hint_caller(blind_t hint, int hint_action) -{ - int rc; - - switch (hint_action) { - case NSC_GET_NODE_HINT: - rc = _sd_get_node_hint((uint_t *)hint); - break; - case NSC_SET_NODE_HINT: - rc = _sd_set_node_hint((uint_t)(unsigned long)hint); - break; - case NSC_CLEAR_NODE_HINT: - rc = _sd_clear_node_hint((uint_t)(unsigned long)hint); - break; - default: - rc = EINVAL; - break; - } - - return (rc); -} - -int -_sd_set_node_hint(uint_t hint) -{ - SDTRACE(ST_ENTER|SDF_HINT, SDT_INV_CD, 3, SDT_INV_BL, hint, 0); - if ((_sd_node_hint & NSC_NO_FORCED_WRTHRU) && - (hint & NSC_FORCED_WRTHRU)) - return (EINVAL); - _sd_node_hint |= (hint & _SD_HINT_MASK); - SDTRACE(ST_EXIT|SDF_HINT, SDT_INV_CD, 3, SDT_INV_BL, hint, 0); - return (0); -} - - -int -_sd_clear_node_hint(uint_t hint) -{ - SDTRACE(ST_ENTER|SDF_HINT, SDT_INV_CD, 4, SDT_INV_BL, hint, 0); - _sd_node_hint &= ~(hint & _SD_HINT_MASK); - SDTRACE(ST_EXIT|SDF_HINT, SDT_INV_CD, 4, SDT_INV_BL, hint, 0); - return (0); -} - - -int -_sd_get_node_hint(uint_t *hint) -{ - *hint = _sd_node_hint; - return (0); -} - - -int -_sd_get_partsize(blind_t xcd, nsc_size_t *ptr) -{ - int cd = (int)(unsigned long)xcd; - - if (FILE_OPENED(cd)) { - *ptr = _sd_cache_files[cd].cd_info->sh_filesize; - return (0); - } else - return (EINVAL); -} - - -int -_sd_get_maxfbas(blind_t xcd, int flag, nsc_size_t *ptr) -{ - int cd = (int)(unsigned long)xcd; - - if (!FILE_OPENED(cd)) - return (EINVAL); - - if (flag & NSC_CACHEBLK) - *ptr = BLK_FBAS; - else - *ptr = sdbc_max_fbas; - - return (0); -} - - -int -_sd_control(blind_t xcd, int cmd, void *ptr, int len) -{ - _sd_cd_info_t *cdi; - int cd = (int)(unsigned long)xcd; - - cdi = &(_sd_cache_files[cd]); - return (nsc_control(cdi->cd_rawfd, cmd, ptr, len)); -} - - -int -_sd_discard_pinned(blind_t xcd, nsc_off_t fba_pos, nsc_size_t fba_len) -{ - int cd = (int)(unsigned long)xcd; - _sd_cctl_t *cc_ent, **cc_lst, **cc_tmp, *nxt; - ss_centry_info_t *wctl; - int found = 0; - nsc_off_t cblk; - _sd_cd_info_t *cdi = &_sd_cache_files[cd]; - int rc; - - if ((!FILE_OPENED(cd)) || (!cdi->cd_info->sh_failed)) { - - return (EINVAL); - } - - for (cblk = FBA_TO_BLK_NUM(fba_pos); - cblk < FBA_TO_BLK_LEN(fba_pos + fba_len); cblk++) { - if (cc_ent = - (_sd_cctl_t *)_sd_hash_search(cd, cblk, _sd_htable)) { - if (!CENTRY_PINNED(cc_ent)) - continue; - - /* - * remove cc_ent from failed links - * cc_lst - pointer to "cc_dirty_link" pointer - * starts at &cd_failed_head. - * cc_tmp - pointer to "cc_dirty_next" - * except when equal to cc_lst. - */ - mutex_enter(&cdi->cd_lock); - cc_tmp = cc_lst = &(cdi->cd_fail_head); - while (*cc_tmp != cc_ent) { - cc_tmp = &((*cc_tmp)->cc_dirty_next); - if (!*cc_tmp) - cc_lst = &((*cc_lst)->cc_dirty_link), - cc_tmp = cc_lst; - } - if (*cc_tmp) { - found++; - if (cc_lst != cc_tmp) /* break chain */ - *cc_tmp = NULL; - nxt = cc_ent->cc_dirty_next; - if (nxt) { - nxt->cc_dirty_link = - (*cc_lst)->cc_dirty_link; - *cc_lst = nxt; - } else { - *cc_lst = (*cc_lst)->cc_dirty_link; - } - cdi->cd_info->sh_numfail--; - nsc_unpinned_data(cdi->cd_iodev, - BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)), - BLK_FBAS); - } - mutex_exit(&cdi->cd_lock); - - /* clear dirty bits */ - /* was FAST */ - mutex_enter(&cc_ent->cc_lock); - cc_ent->cc_valid = cc_ent->cc_dirty = 0; - cc_ent->cc_flag &= ~(CC_QHEAD|CC_PEND_DIRTY|CC_PINNED); - cc_ent->cc_dirty_link = NULL; - wctl = cc_ent->cc_write; - cc_ent->cc_write = NULL; - /* was FAST */ - mutex_exit(&cc_ent->cc_lock); - - /* release cache block to head of LRU */ - if (wctl) { - wctl->sc_flag = 0; - wctl->sc_dirty = 0; - SSOP_SETCENTRY(sdbc_safestore, wctl); - SSOP_DEALLOCRESOURCE(sdbc_safestore, - wctl->sc_res); - } - - if (!sdbc_use_dmchain) - _sd_requeue_head(cc_ent); - } - } - - rc = found ? NSC_DONE : EINVAL; - - return (rc); -} - - -/* - * Handle allocation - */ - -_sd_buf_hlist_t _sd_handle_list; - -/* - * _sdbc_handles_unload - cache is being unloaded. - */ -void -_sdbc_handles_unload(void) -{ - mutex_destroy(&_sd_handle_list.hl_lock); - -} - -/* - * _sdbc_handles_load - cache is being unloaded. - */ -int -_sdbc_handles_load(void) -{ - mutex_init(&_sd_handle_list.hl_lock, NULL, MUTEX_DRIVER, NULL); - - return (0); -} - -int -_sdbc_handles_configure() -{ - _sd_handle_list.hl_count = 0; - - _sd_handle_list.hl_top.bh_next = &_sd_handle_list.hl_top; - _sd_handle_list.hl_top.bh_prev = &_sd_handle_list.hl_top; - - return (0); -} - - - -/* - * _sdbc_handles_deconfigure - cache is being deconfigured - */ -void -_sdbc_handles_deconfigure(void) -{ - _sd_handle_list.hl_count = 0; -} - - -_sd_buf_handle_t * -_sd_alloc_handle(sdbc_callback_fn_t d_cb, sdbc_callback_fn_t r_cb, - sdbc_callback_fn_t w_cb) -{ - _sd_buf_handle_t *handle; - - handle = (_sd_buf_handle_t *)kmem_zalloc(sizeof (_sd_buf_handle_t), - KM_SLEEP); - /* maintain list and count for debugging */ - mutex_enter(&_sd_handle_list.hl_lock); - - handle->bh_prev = &_sd_handle_list.hl_top; - handle->bh_next = _sd_handle_list.hl_top.bh_next; - _sd_handle_list.hl_top.bh_next->bh_prev = handle; - _sd_handle_list.hl_top.bh_next = handle; - - ++_sd_handle_list.hl_count; - mutex_exit(&_sd_handle_list.hl_lock); -#if !defined(_SD_NOCHECKS) - ASSERT(!(handle->bh_flag & (NSC_HALLOCATED | NSC_HACTIVE))); -#endif - handle->bh_disconnect_cb = d_cb; - handle->bh_read_cb = r_cb; - handle->bh_write_cb = w_cb; - handle->bh_flag |= NSC_HALLOCATED; - handle->bh_alloc_thread = nsc_threadp(); - - return (handle); -} - -int -_sd_free_handle(_sd_buf_handle_t *handle) -{ - - if ((handle->bh_flag & NSC_HALLOCATED) == 0) { - cmn_err(CE_WARN, "!sdbc(_sd_free_handle) handle %p not valid", - (void *)handle); - - DTRACE_PROBE(_sd_free_handle_end); - - return (EINVAL); - } - if (_SD_HANDLE_ACTIVE(handle)) { - cmn_err(CE_WARN, - "!sdbc(_sd_free_handle) attempt to free active handle %p", - (void *)handle); - - DTRACE_PROBE1(free_handle_active, int, handle->bh_flag); - - return (EINVAL); - } - - - /* remove from queue before free */ - mutex_enter(&_sd_handle_list.hl_lock); - handle->bh_prev->bh_next = handle->bh_next; - handle->bh_next->bh_prev = handle->bh_prev; - --_sd_handle_list.hl_count; - mutex_exit(&_sd_handle_list.hl_lock); - - kmem_free(handle, sizeof (_sd_buf_handle_t)); - - return (0); -} - - - - -#if !defined (_SD_8K_BLKSIZE) -#define _SD_MAX_MAP 0x100 -#else /* !(_SD_8K_BLKSIZE) */ -#define _SD_MAX_MAP 0x10000 -#endif /* !(_SD_8K_BLKSIZE) */ - -char _sd_contig_bmap[_SD_MAX_MAP]; -_sd_map_info_t _sd_lookup_map[_SD_MAX_MAP]; - -void -_sd_init_contig_bmap(void) -{ - int i, j; - - for (i = 1; i < _SD_MAX_MAP; i = ((i << 1) | 1)) - for (j = i; j < _SD_MAX_MAP; j <<= 1) - _sd_contig_bmap[j] = 1; -} - - - - -void -_sd_init_lookup_map(void) -{ - unsigned int i, j, k; - int stpos, len; - _sd_bitmap_t mask; - - for (i = 0; i < _SD_MAX_MAP; i++) { - for (j = i, k = 0; j && ((j & 1) == 0); j >>= 1, k++) - ; - stpos = k; - _sd_lookup_map[i].mi_stpos = (unsigned char)k; - - for (k = 0; j & 1; j >>= 1, k++) - ; - len = k; - _sd_lookup_map[i].mi_len = (unsigned char)k; - - _sd_lookup_map[i].mi_mask = SDBC_GET_BITS(stpos, len); - } - for (i = 0; i < _SD_MAX_MAP; i++) { - mask = (_sd_bitmap_t)i; - for (j = 0; mask; j++) - SDBC_LOOKUP_MODIFY(mask); - - _sd_lookup_map[i].mi_dirty_count = (unsigned char)j; - } - for (i = 0; i < _SD_MAX_MAP; i++) { - _sd_lookup_map[i].mi_io_count = SDBC_LOOKUP_DTCOUNT(i); - mask = ~i; - _sd_lookup_map[i].mi_io_count += SDBC_LOOKUP_DTCOUNT(mask); - } -} - - -nsc_def_t _sd_sdbc_def[] = { - "Open", (uintptr_t)_sd_open_io, 0, - "Close", (uintptr_t)_sd_close_io, 0, - "Attach", (uintptr_t)_sdbc_io_attach_cd, 0, - "Detach", (uintptr_t)_sdbc_io_detach_cd, 0, - "AllocBuf", (uintptr_t)_sd_alloc_buf, 0, - "FreeBuf", (uintptr_t)_sd_free_buf, 0, - "Read", (uintptr_t)_sd_read, 0, - "Write", (uintptr_t)_sd_write, 0, - "Zero", (uintptr_t)_sd_zero, 0, - "Copy", (uintptr_t)_sd_copy, 0, - "CopyDirect", (uintptr_t)_sd_copy_direct, 0, - "Uncommit", (uintptr_t)_sd_uncommit, 0, - "AllocHandle", (uintptr_t)_sd_alloc_handle, 0, - "FreeHandle", (uintptr_t)_sd_free_handle, 0, - "Discard", (uintptr_t)_sd_discard_pinned, 0, - "Sizes", (uintptr_t)_sd_cache_sizes, 0, - "GetPinned", (uintptr_t)_sd_get_pinned, 0, - "NodeHints", (uintptr_t)_sd_node_hint_caller, 0, - "PartSize", (uintptr_t)_sd_get_partsize, 0, - "MaxFbas", (uintptr_t)_sd_get_maxfbas, 0, - "Control", (uintptr_t)_sd_control, 0, - "Provide", NSC_CACHE, 0, - 0, 0, 0 -}; - -/* - * do the SD_GET_CD_CLUSTER_DATA ioctl (get the global filename data) - */ -/* ARGSUSED */ -int -sd_get_file_info_data(char *uaddrp) -{ - return (ENOTTY); -} - -/* - * do the SD_GET_CD_CLUSTER_SIZE ioctl (get size of global filename area) - */ -int -sd_get_file_info_size(void *uaddrp) -{ - if (copyout(&_sdbc_gl_file_info_size, uaddrp, - sizeof (_sdbc_gl_file_info_size))) { - return (EFAULT); - } - - return (0); -} - - -/* - * SD_GET_GLMUL_SIZES ioctl - * get sizes of the global info regions (for this node only) - */ -/* ARGSUSED */ -int -sd_get_glmul_sizes(int *uaddrp) -{ - return (ENOTTY); -} - -/* - * SD_GET_GLMUL_INFO ioctl - * get the global metadata for write blocks (for this node only) - */ -/* ARGSUSED */ -int -sd_get_glmul_info(char *uaddrp) -{ - - return (ENOTTY); -} - -int -sdbc_global_stats_update(kstat_t *ksp, int rw) -{ - sdbc_global_stats_t *sdbc_gstats; - _sd_stats_t *gstats_vars; - uint_t hint; - - sdbc_gstats = (sdbc_global_stats_t *)(ksp->ks_data); - - gstats_vars = _sd_cache_stats; - - if (rw == KSTAT_WRITE) { - return (EACCES); - } - - /* default to READ */ - sdbc_gstats->ci_sdbc_count.value.ul = gstats_vars->st_count; - sdbc_gstats->ci_sdbc_loc_count.value.ul = gstats_vars->st_loc_count; - sdbc_gstats->ci_sdbc_rdhits.value.ul = (ulong_t)gstats_vars->st_rdhits; - sdbc_gstats->ci_sdbc_rdmiss.value.ul = (ulong_t)gstats_vars->st_rdmiss; - sdbc_gstats->ci_sdbc_wrhits.value.ul = (ulong_t)gstats_vars->st_wrhits; - sdbc_gstats->ci_sdbc_wrmiss.value.ul = (ulong_t)gstats_vars->st_wrmiss; - - sdbc_gstats->ci_sdbc_blksize.value.ul = - (ulong_t)gstats_vars->st_blksize; - sdbc_gstats->ci_sdbc_lru_blocks.value.ul = (ulong_t)_sd_lru_q.sq_inq; -#ifdef DEBUG - sdbc_gstats->ci_sdbc_lru_noreq.value.ul = - (ulong_t)_sd_lru_q.sq_noreq_stat; - sdbc_gstats->ci_sdbc_lru_req.value.ul = (ulong_t)_sd_lru_q.sq_req_stat; -#endif - sdbc_gstats->ci_sdbc_wlru_inq.value.ul = - (ulong_t)gstats_vars->st_wlru_inq; - sdbc_gstats->ci_sdbc_cachesize.value.ul = - (ulong_t)gstats_vars->st_cachesize; - sdbc_gstats->ci_sdbc_numblocks.value.ul = - (ulong_t)gstats_vars->st_numblocks; - sdbc_gstats->ci_sdbc_wrcancelns.value.ul = - (ulong_t)gstats_vars->st_wrcancelns; - sdbc_gstats->ci_sdbc_destaged.value.ul = - (ulong_t)gstats_vars->st_destaged; - sdbc_gstats->ci_sdbc_num_shared.value.ul = (ulong_t)sdbc_max_devs; - (void) _sd_get_node_hint(&hint); - sdbc_gstats->ci_sdbc_nodehints.value.ul = (ulong_t)hint; - - - return (0); -} - -int -sdbc_cd_stats_update(kstat_t *ksp, int rw) -{ - sdbc_cd_stats_t *sdbc_shstats; - _sd_shared_t *shstats_vars; - int name_len; - uint_t hint; - - sdbc_shstats = (sdbc_cd_stats_t *)(ksp->ks_data); - - shstats_vars = (_sd_shared_t *)(ksp->ks_private); - - if (rw == KSTAT_WRITE) { - return (EACCES); - } - - /* copy tail of filename to kstat. leave 1 byte for null char */ - if (shstats_vars->sh_filename != NULL) { - name_len = (int)strlen(shstats_vars->sh_filename); - name_len -= (KSTAT_DATA_CHAR_LEN - 1); - - if (name_len < 0) { - name_len = 0; - } - - (void) strlcpy(sdbc_shstats->ci_sdbc_vol_name.value.c, - shstats_vars->sh_filename + name_len, KSTAT_DATA_CHAR_LEN); - } else { - cmn_err(CE_WARN, "!Kstat error: no volume name associated " - "with cache descriptor"); - } - - sdbc_shstats->ci_sdbc_failed.value.ul = - (ulong_t)shstats_vars->sh_failed; - sdbc_shstats->ci_sdbc_cd.value.ul = (ulong_t)shstats_vars->sh_cd; - sdbc_shstats->ci_sdbc_cache_read.value.ul = - (ulong_t)shstats_vars->sh_cache_read; - sdbc_shstats->ci_sdbc_cache_write.value.ul = - (ulong_t)shstats_vars->sh_cache_write; - sdbc_shstats->ci_sdbc_disk_read.value.ul = - (ulong_t)shstats_vars->sh_disk_read; - sdbc_shstats->ci_sdbc_disk_write.value.ul = - (ulong_t)shstats_vars->sh_disk_write; -#ifdef NSC_MULTI_TERABYTE - sdbc_shstats->ci_sdbc_filesize.value.ui64 = - (uint64_t)shstats_vars->sh_filesize; -#else - sdbc_shstats->ci_sdbc_filesize.value.ul = - (ulong_t)shstats_vars->sh_filesize; -#endif - sdbc_shstats->ci_sdbc_numdirty.value.ul = - (ulong_t)shstats_vars->sh_numdirty; - sdbc_shstats->ci_sdbc_numio.value.ul = (ulong_t)shstats_vars->sh_numio; - sdbc_shstats->ci_sdbc_numfail.value.ul = - (ulong_t)shstats_vars->sh_numfail; - sdbc_shstats->ci_sdbc_destaged.value.ul = - (ulong_t)shstats_vars->sh_destaged; - sdbc_shstats->ci_sdbc_wrcancelns.value.ul = - (ulong_t)shstats_vars->sh_wrcancelns; - (void) _sd_get_cd_hint(shstats_vars->sh_cd, &hint); - sdbc_shstats->ci_sdbc_cdhints.value.ul = (ulong_t)hint; - - - return (0); -} - - -/* - * cd_kstat_add - * - * Installs all kstats and associated infrastructure (mutex, buffer), - * associated with a particular cache descriptor. This function is called - * when the cache descriptor is opened in _sd_open(). - * "cd" -- cache descriptor number whose kstats we wish to add - * returns: 0 on success, -1 on failure - */ -static int -cd_kstat_add(int cd) -{ - char name[KSTAT_STRLEN]; - - if (cd < 0 || cd >= sdbc_max_devs) { - cmn_err(CE_WARN, "!invalid cache descriptor: %d", cd); - return (-1); - } - - /* create a regular kstat for this cache descriptor */ - if (!sdbc_cd_kstats) { - cmn_err(CE_WARN, "!sdbc_cd_kstats not allocated"); - return (-1); - } - - (void) snprintf(name, KSTAT_STRLEN, "%s%d", SDBC_KSTAT_CDSTATS, cd); - - sdbc_cd_kstats[cd] = kstat_create(SDBC_KSTAT_MODULE, - cd, name, SDBC_KSTAT_CLASS, KSTAT_TYPE_NAMED, - sizeof (sdbc_cd_stats)/sizeof (kstat_named_t), - KSTAT_FLAG_VIRTUAL|KSTAT_FLAG_WRITABLE); - - if (sdbc_cd_kstats[cd] != NULL) { - sdbc_cd_kstats[cd]->ks_data = &sdbc_cd_stats; - sdbc_cd_kstats[cd]->ks_update = sdbc_cd_stats_update; - sdbc_cd_kstats[cd]->ks_private = - &_sd_cache_stats->st_shared[cd]; - kstat_install(sdbc_cd_kstats[cd]); - } else { - cmn_err(CE_WARN, "!cdstats %d kstat allocation failed", cd); - } - - /* create an I/O kstat for this cache descriptor */ - if (!sdbc_cd_io_kstats) { - cmn_err(CE_WARN, "!sdbc_cd_io_kstats not allocated"); - return (-1); - } - - (void) snprintf(name, KSTAT_STRLEN, "%s%d", SDBC_IOKSTAT_CDSTATS, cd); - - sdbc_cd_io_kstats[cd] = kstat_create( - SDBC_KSTAT_MODULE, cd, name, "disk", KSTAT_TYPE_IO, 1, 0); - - if (sdbc_cd_io_kstats[cd]) { - if (!sdbc_cd_io_kstats_mutexes) { - cmn_err(CE_WARN, "!sdbc_cd_io_kstats_mutexes not " - "allocated"); - return (-1); - } - - mutex_init(&sdbc_cd_io_kstats_mutexes[cd], NULL, - MUTEX_DRIVER, NULL); - - sdbc_cd_io_kstats[cd]->ks_lock = &sdbc_cd_io_kstats_mutexes[cd]; - - kstat_install(sdbc_cd_io_kstats[cd]); - - } else { - cmn_err(CE_WARN, "!sdbc cd %d io kstat allocation failed", cd); - } - - return (0); -} - -/* - * cd_kstat_remove - * - * Uninstalls all kstats and associated infrastructure (mutex, buffer), - * associated with a particular cache descriptor. This function is called - * when the cache descriptor is closed in _sd_close(). - * "cd" -- cache descriptor number whose kstats we wish to remove - * returns: 0 on success, -1 on failure - */ -static int -cd_kstat_remove(int cd) -{ - if (cd < 0 || cd >= sdbc_max_devs) { - cmn_err(CE_WARN, "!invalid cache descriptor: %d", cd); - return (-1); - } - - /* delete the regular kstat corresponding to this cache descriptor */ - if (sdbc_cd_kstats && sdbc_cd_kstats[cd]) { - kstat_delete(sdbc_cd_kstats[cd]); - sdbc_cd_kstats[cd] = NULL; - } - - /* delete the I/O kstat corresponding to this cache descriptor */ - if (sdbc_cd_io_kstats && sdbc_cd_io_kstats[cd]) { - kstat_delete(sdbc_cd_io_kstats[cd]); - sdbc_cd_io_kstats[cd] = NULL; - - if (sdbc_cd_io_kstats_mutexes) { - /* destroy the mutex associated with this I/O kstat */ - mutex_destroy(&sdbc_cd_io_kstats_mutexes[cd]); - } - } - - return (0); -} - -#ifdef DEBUG -/* - * kstat update - */ -int -sdbc_dynmem_kstat_update_dm(kstat_t *ksp, int rw) -{ - sdbc_dynmem_dm_t *sdbc_dynmem; - _dm_process_vars_t *process_vars; - _dm_process_vars_t local_dm_process_vars; - - simplect_dm++; - - sdbc_dynmem = (sdbc_dynmem_dm_t *)(ksp->ks_data); - - /* global dynmem_processing_dm */ - process_vars = (_dm_process_vars_t *)(ksp->ks_private); - - if (rw == KSTAT_WRITE) { - simplect_dm = sdbc_dynmem->ci_sdbc_simplect.value.ul; - local_dm_process_vars.monitor_dynmem_process = - sdbc_dynmem->ci_sdbc_monitor_dynmem.value.ul; - local_dm_process_vars.max_dyn_list = - sdbc_dynmem->ci_sdbc_max_dyn_list.value.ul; - local_dm_process_vars.cache_aging_ct1 = - sdbc_dynmem->ci_sdbc_cache_aging_ct1.value.ul; - local_dm_process_vars.cache_aging_ct2 = - sdbc_dynmem->ci_sdbc_cache_aging_ct2.value.ul; - local_dm_process_vars.cache_aging_ct3 = - sdbc_dynmem->ci_sdbc_cache_aging_ct3.value.ul; - local_dm_process_vars.cache_aging_sec1 = - sdbc_dynmem->ci_sdbc_cache_aging_sec1.value.ul; - local_dm_process_vars.cache_aging_sec2 = - sdbc_dynmem->ci_sdbc_cache_aging_sec2.value.ul; - local_dm_process_vars.cache_aging_sec3 = - sdbc_dynmem->ci_sdbc_cache_aging_sec3.value.ul; - local_dm_process_vars.cache_aging_pcnt1 = - sdbc_dynmem->ci_sdbc_cache_aging_pcnt1.value.ul; - local_dm_process_vars.cache_aging_pcnt2 = - sdbc_dynmem->ci_sdbc_cache_aging_pcnt2.value.ul; - local_dm_process_vars.max_holds_pcnt = - sdbc_dynmem->ci_sdbc_max_holds_pcnt.value.ul; - local_dm_process_vars.process_directive = - sdbc_dynmem->ci_sdbc_process_directive.value.ul; - (void) sdbc_edit_xfer_process_vars_dm(&local_dm_process_vars); - - if (process_vars->process_directive & WAKE_DEALLOC_THREAD_DM) { - process_vars->process_directive &= - ~WAKE_DEALLOC_THREAD_DM; - mutex_enter(&dynmem_processing_dm.thread_dm_lock); - cv_broadcast(&dynmem_processing_dm.thread_dm_cv); - mutex_exit(&dynmem_processing_dm.thread_dm_lock); - } - - return (0); - } - - /* default to READ */ - sdbc_dynmem->ci_sdbc_simplect.value.ul = simplect_dm; - sdbc_dynmem->ci_sdbc_monitor_dynmem.value.ul = - process_vars->monitor_dynmem_process; - sdbc_dynmem->ci_sdbc_max_dyn_list.value.ul = - process_vars->max_dyn_list; - sdbc_dynmem->ci_sdbc_cache_aging_ct1.value.ul = - process_vars->cache_aging_ct1; - sdbc_dynmem->ci_sdbc_cache_aging_ct2.value.ul = - process_vars->cache_aging_ct2; - sdbc_dynmem->ci_sdbc_cache_aging_ct3.value.ul = - process_vars->cache_aging_ct3; - sdbc_dynmem->ci_sdbc_cache_aging_sec1.value.ul = - process_vars->cache_aging_sec1; - sdbc_dynmem->ci_sdbc_cache_aging_sec2.value.ul = - process_vars->cache_aging_sec2; - sdbc_dynmem->ci_sdbc_cache_aging_sec3.value.ul = - process_vars->cache_aging_sec3; - sdbc_dynmem->ci_sdbc_cache_aging_pcnt1.value.ul = - process_vars->cache_aging_pcnt1; - sdbc_dynmem->ci_sdbc_cache_aging_pcnt2.value.ul = - process_vars->cache_aging_pcnt2; - sdbc_dynmem->ci_sdbc_max_holds_pcnt.value.ul = - process_vars->max_holds_pcnt; - sdbc_dynmem->ci_sdbc_process_directive.value.ul = - process_vars->process_directive; - - sdbc_dynmem->ci_sdbc_alloc_ct.value.ul = process_vars->alloc_ct; - sdbc_dynmem->ci_sdbc_dealloc_ct.value.ul = process_vars->dealloc_ct; - sdbc_dynmem->ci_sdbc_history.value.ul = process_vars->history; - sdbc_dynmem->ci_sdbc_nodatas.value.ul = process_vars->nodatas; - sdbc_dynmem->ci_sdbc_candidates.value.ul = process_vars->candidates; - sdbc_dynmem->ci_sdbc_deallocs.value.ul = process_vars->deallocs; - sdbc_dynmem->ci_sdbc_hosts.value.ul = process_vars->hosts; - sdbc_dynmem->ci_sdbc_pests.value.ul = process_vars->pests; - sdbc_dynmem->ci_sdbc_metas.value.ul = process_vars->metas; - sdbc_dynmem->ci_sdbc_holds.value.ul = process_vars->holds; - sdbc_dynmem->ci_sdbc_others.value.ul = process_vars->others; - sdbc_dynmem->ci_sdbc_notavail.value.ul = process_vars->notavail; - - return (0); -} -#endif diff --git a/usr/src/uts/common/avs/ns/sdbc/sd_bcache.h b/usr/src/uts/common/avs/ns/sdbc/sd_bcache.h deleted file mode 100644 index 0cf9bf3836..0000000000 --- a/usr/src/uts/common/avs/ns/sdbc/sd_bcache.h +++ /dev/null @@ -1,1161 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _SD_BCACHE_H -#define _SD_BCACHE_H - -#ifdef __cplusplus -extern "C" { -#endif - -#ifdef DS_DDICT -#include <sys/nsctl/contract.h> -#endif -#include <sys/nsctl/nsctl.h> -#include <sys/nsctl/sdbc_ioctl.h> -#include <sys/nsctl/sd_hash.h> -#include <sys/nsctl/sd_cache.h> -#include <sys/nsctl/sd_conf.h> -#include <sys/nsctl/safestore.h> - -/* - * Definitions for kstats - */ -#define SDBC_KSTAT_CLASS "storedge" -#define SDBC_KSTAT_MODULE "sdbc" - -#ifdef DEBUG -#define SDBC_KSTAT_DYNMEM "dynmem" -#endif - -#define SDBC_KSTAT_CDNAME "cdname" -#define SDBC_KSTAT_CDSTATS "cd" -#define SDBC_KSTAT_GSTATS "global" -#define SDBC_KSTAT_STATS "sdbcstats" -#define SDBC_IOKSTAT_GSTATS "gsdbc" -#define SDBC_IOKSTAT_CDSTATS "sdbc" - -/* Global kstat field names */ -#define SDBC_GKSTAT_COUNT "sdbc_count" -#define SDBC_GKSTAT_LOC_COUNT "sdbc_loc_count" -#define SDBC_GKSTAT_RDHITS "sdbc_rdhits" -#define SDBC_GKSTAT_RDMISS "sdbc_rdmiss" -#define SDBC_GKSTAT_WRHITS "sdbc_wrhits" -#define SDBC_GKSTAT_WRMISS "sdbc_wrmiss" -#define SDBC_GKSTAT_BLKSIZE "sdbc_blksize" -#define SDBC_GKSTAT_LRU_BLOCKS "sdbc_lru_blocks" - -#ifdef DEBUG -#define SDBC_GKSTAT_LRU_NOREQ "sdbc_lru_noreq" -#define SDBC_GKSTAT_LRU_REQ "sdbc_lru_req" -#endif - -#define SDBC_GKSTAT_WLRU_INQ "sdbc_wlru_inq" -#define SDBC_GKSTAT_CACHESIZE "sdbc_cachesize" -#define SDBC_GKSTAT_NUMBLOCKS "sdbc_numblocks" -#define SDBC_GKSTAT_NUM_SHARED "sdbc_num_shared" -#define SDBC_GKSTAT_WRCANCELNS "sdbc_wrcancelns" -#define SDBC_GKSTAT_DESTAGED "sdbc_destaged" -#define SDBC_GKSTAT_NODEHINTS "sdbc_nodehints" - -/* per-cache descriptor kstats field names */ -#define SDBC_CDKSTAT_VOL_NAME "sdbc_vol_name" -#define SDBC_CDKSTAT_FAILED "sdbc_failed" -#define SDBC_CDKSTAT_CD "sdbc_cd" -#define SDBC_CDKSTAT_CACHE_READ "sdbc_cache_read" -#define SDBC_CDKSTAT_CACHE_WRITE "sdbc_cache_write" -#define SDBC_CDKSTAT_DISK_READ "sdbc_disk_read" -#define SDBC_CDKSTAT_DISK_WRITE "sdbc_disk_write" -#define SDBC_CDKSTAT_FILESIZE "sdbc_filesize" -#define SDBC_CDKSTAT_NUMDIRTY "sdbc_numdirty" -#define SDBC_CDKSTAT_NUMIO "sdbc_numio" -#define SDBC_CDKSTAT_NUMFAIL "sdbc_numfail" -#define SDBC_CDKSTAT_DESTAGED "sdbc_destaged" -#define SDBC_CDKSTAT_WRCANCELNS "sdbc_wrcancelns" -#define SDBC_CDKSTAT_CDHINTS "sdbc_cdhints" - -#ifdef DEBUG -/* dynmem kstats field names */ -#define SDBC_DMKSTAT_MONITOR_DYNMEM "sdbc_monitor_dynmem" -#define SDBC_DMKSTAT_MAX_DYN_LIST "sdbc_max_dyn_list" -#define SDBC_DMKSTAT_CACHE_AGING_CT1 "sdbc_cache_aging_ct1" -#define SDBC_DMKSTAT_CACHE_AGING_CT2 "sdbc_cache_aging_ct2" -#define SDBC_DMKSTAT_CACHE_AGING_CT3 "sdbc_cache_aging_ct3" -#define SDBC_DMKSTAT_CACHE_AGING_SEC1 "sdbc_cache_aging_sec1" -#define SDBC_DMKSTAT_CACHE_AGING_SEC2 "sdbc_cache_aging_sec2" -#define SDBC_DMKSTAT_CACHE_AGING_SEC3 "sdbc_cache_aging_sec3" -#define SDBC_DMKSTAT_CACHE_AGING_PCNT1 "sdbc_cache_aging_pcnt1" -#define SDBC_DMKSTAT_CACHE_AGING_PCNT2 "sdbc_cache_aging_pcnt2" -#define SDBC_DMKSTAT_MAX_HOLDS_PCNT "sdbc_max_holds_pcnt" -#define SDBC_DMKSTAT_ALLOC_CNT "sdbc_alloc_cnt" -#define SDBC_DMKSTAT_DEALLOC_CNT "sdbc_dealloc_cnt" -#define SDBC_DMKSTAT_HISTORY "sdbc_history" -#define SDBC_DMKSTAT_NODATAS "sdbc_nodatas" -#define SDBC_DMKSTAT_CANDIDATES "sdbc_candidates" -#define SDBC_DMKSTAT_DEALLOCS "sdbc_deallocs" -#define SDBC_DMKSTAT_HOSTS "sdbc_hosts" -#define SDBC_DMKSTAT_PESTS "sdbc_pests" -#define SDBC_DMKSTAT_METAS "sdbc_metas" -#define SDBC_DMKSTAT_HOLDS "sdbc_holds" -#define SDBC_DMKSTAT_OTHERS "sdbc_others" -#define SDBC_DMKSTAT_NOTAVAIL "sdbc_notavail" -#define SDBC_DMKSTAT_PROCESS_DIRECTIVE "sdbc_process_directive" -#define SDBC_DMKSTAT_SIMPLECT "sdbc_simplect" - -#endif - -/* ... values are in range [0-BLK_FBAS] */ -typedef uint32_t sdbc_cblk_fba_t; /* FBA len or offset in cache block */ - -typedef unsigned char *ucaddr_t; /* unsigned char pointer */ - -/* - * Atomic exchange function - */ - -#ifdef _KERNEL - -/* - * Note: ldstub sets all bits in the memory byte. - * so far this is compatible with the usage of xmem_bu() whereby - * the values of ptr are either 0 or 1, and the xmem_bu() is used - * to set the byte to 1. - */ -#define xmem_bu(val, ptr) nsc_ldstub((uint8_t *)ptr) -#define atomic_swap xmem_bu -#define sd_serialize nsc_membar_stld - -#endif /* _KERNEL */ - -#if defined(_KERNEL) || defined(_KMEMUSER) - -#if defined(_SD_8K_BLKSIZE) -typedef unsigned short _sd_bitmap_t; -#else -typedef unsigned char _sd_bitmap_t; -#endif - -/* - * CCTL flag types - */ - -/* - * Note: CC_INUSE and CC_PAGEIO are dummy flags that are used in - * individual flags bytes (cc_inuse and cc_pageio) NOT cc_flag. - * Thus they can take any convenient value, however, they must be - * distinct and non-zero. - */ -#define CC_INUSE 0x01 /* Cache entry is in use */ -#define CC_PAGEIO 0x02 /* Pagelist IO is active for cache entry */ - -/* - * Real cc_flag values. - */ -#define CC_PEND_DIRTY 0x02 /* The entry needs to be reprocessed for io */ -#define CC_PINNED 0x04 /* The entry has data that is "pinned" */ -#define CC_PINNABLE 0x08 /* Issue pin if write fails */ -#define CC_QHEAD 0x10 /* NSC_NOCACHE: requeue at head */ - -/* specify the size of _sd_cctl[] array */ -#define _SD_CCTL_GROUPS 32 - -/* - * Individual SDBC cache block entry - * "cc_lock" must be held when changing dirty/valid bits. - * "cc_inuse" (optimistic) atomic exchange replaces check/set of - * CC_INUSE bit in cc_flag; special handling of rare collisions. - * "cc_pageio" flusher / client locking of pagelist io operations, - * atomic exchange - needs machine ld/st protection. - * "cc_iostatus" is set by flusher without holding cc_lock, - * writer will set CC_PEND_DIRTY if cc_iostatus is set. - * Thus "cc_inuse", "cc_iostatus" and "cc_pageio" are volatile. - * - * The cc_await_* values are in the main _sd_cctl to avoid over - * signalling _cc_blkcv. - * - * The _sd_cctl structure is aligned to group related members and - * to ensure good packing. - */ - -typedef struct _sd_cctl_sync { - kcondvar_t _cc_blkcv; /* Synchronisation var to block on */ - kmutex_t _cc_lock; /* Cache entry spinlock */ -} _sd_cctl_sync_t; - -typedef struct sd_addr_s { /* Generic address structure */ - unsigned char *sa_virt; /* Virtual address of data */ -} sd_addr_t; - -/* - * See notes above. - */ - -typedef struct _sd_cctl { - _sd_hash_hd_t cc_head; /* hash information - must be first */ - struct _sd_cctl *cc_next, *cc_prev; /* next and prev in a chain */ - struct _sd_cctl *cc_chain; /* chaining request centries */ - struct _sd_cctl *cc_dirty_next; /* for chaining sequential writes */ - struct _sd_cctl *cc_dirty_link; /* for chaining the dirty lists */ - struct _sd_cctl *cc_dirty_net_next; /* for chaining net writes */ - struct _sd_cctl *cc_dirty_net_link; /* for chaining net lists */ - uint_t cc_seq; /* sequence number: for lru optim */ - volatile int net_iostatus; /* net status of io */ - volatile _sd_bitmap_t net_dirty; /* net cache block dirty mask */ - _sd_bitmap_t cc_valid; /* Cache block valid mask */ - _sd_bitmap_t cc_toflush; /* Cache block deferred dirty mask */ - volatile _sd_bitmap_t cc_dirty; /* Cache block dirty mask */ - volatile ushort_t cc_await_use; /* # waiting for this entry (inuse) */ - volatile ushort_t cc_await_page; /* # waiting for this entry (pageio) */ - volatile uchar_t cc_inuse; /* atomic_swap(CC_INUSE, cc_inuse) */ - volatile uchar_t cc_pageio; /* atomic_swap(CC_PAGEIO, cc_pageio) */ - uchar_t cc_flag; /* flag */ - char cc_iocount; /* number of ios in progress */ - volatile uchar_t cc_iostatus; /* status of io */ - uchar_t cc_prot; /* Segmented LRU protection flag */ - sd_addr_t cc_addr; /* Data address information */ - ss_centry_info_t *cc_write; /* mirrored writes control block */ - struct _sd_cctl_sync *cc_sync; /* Cache block synchronisation blk */ - - /* support for backend i/o memory coalescing */ - sd_addr_t cc_anon_addr; /* address for backend mem coalescing */ - int cc_anon_len; /* length of anon mem */ - - clock_t cc_creat; - int cc_hits; - - /* dynamic memory support fields */ - uint_t cc_aging_dm; /* For bit settings */ - /* see defines */ - int cc_alloc_size_dm; /* mem allocation */ - /* size bytes */ - struct _sd_cctl *cc_head_dm; /* ptr to host centry */ - /* for a host/pest */ - /* chain */ - struct _sd_cctl *cc_next_dm; /* ptr to next centry */ - /* in host/pest chain */ - struct _sd_cctl *cc_link_list_dm; /* simple link list */ - /* ptr of all centrys */ - /* dynmem chains */ - /* _sd_queue_t *cc_dmchain_q; dmqueue */ - int cc_cblocks; /* number of centrys for size_dm */ - - /* debugging stats */ - int cc_alloc_ct_dm; - int cc_dealloc_ct_dm; - -} _sd_cctl_t; - -/* cache entry allocation tokens */ -typedef struct sdbc_allocbuf_s { - intptr_t opaque[2]; /* must be initialized to 0 */ -} sdbc_allocbuf_t; - -typedef struct sdbc_allocbuf_impl_s { - _sd_cctl_t *sab_dmchain; - int sab_q; /* dmqueue of last chain allocated */ - int reserved; /* stats ? */ -} sdbc_allocbuf_impl_t; - -/* - * bits for flag argument to sdbc_centry_alloc() and callees. - */ -#define ALLOC_LOCKED 0x1 /* locked status of sdbc_queue_lock */ -#define ALLOC_NOWAIT 0x2 /* do not block, return NULL */ - -/* - * definitions supporting the dynmem dealloc thread - */ -#define LOW_RESOURCES_DM -1 - -#define NO_THREAD_DM -1 -#define PROCESS_CACHE_DM 0 -#define CACHE_SHUTDOWN_DM 1 -#define CACHE_THREAD_TERMINATED_DM 2 -#define TIME_DELAY_LVL0 3 -#define TIME_DELAY_LVL1 4 -#define TIME_DELAY_LVL2 5 -#define HISTORY_LVL0 (ushort_t)0 -#define HISTORY_LVL1 (ushort_t)0x00ff -#define HISTORY_LVL2 (ushort_t)0xff00 -/* - * definitions supporing the ddditional fields in the cache - * entry structure for dyn mem - */ -#define FIRST_AGING_DM 0x00000001 -#define FINAL_AGING_DM 0x000000ff -#define FOUND_IN_HASH_DM 0x00000100 /* used to bring cent info */ - /* out of sd_centry_alloc() */ -#define FOUND_HOLD_OVER_DM 0x00000200 /* used to bring cent info */ - /* out of sd_centry_alloc() */ -#define HOST_ENTRY_DM 0x00000400 -#define PARASITIC_ENTRY_DM 0x00000800 -#define STICKY_METADATA_DM 0x00001000 -#define CATAGORY_ENTRY_DM (HOST_ENTRY_DM|PARASITIC_ENTRY_DM| \ - STICKY_METADATA_DM) -#define ELIGIBLE_ENTRY_DM 0x00002000 -#define HASH_ENTRY_DM 0x00008000 -#define HOLD_ENTRY_DM 0x00010000 -#define ENTRY_FIELD_DM (ELIGIBLE_ENTRY_DM|HASH_ENTRY_DM|HOLD_ENTRY_DM) -#define AVAIL_ENTRY_DM 0x00020000 - -/* info only */ -#define PREFETCH_BUF_I 0x00040000 /* implicit read-ahead */ -#define PREFETCH_BUF_E 0x00080000 /* explicit read-ahead */ -#define PREFETCH_BUF_IR 0x00100000 /* release when read complete */ - -/* error processing */ -#define BAD_ENTRY_DM 0x20000000 /* inconsistent ccent */ -#define BAD_CHAIN_DM 0x40000000 /* chain containing bad ccent */ - -/* - * definitions supporting the dynmem monitoring - */ -#define RPT_SHUTDOWN_PROCESS_DM 0x00000001 -#define RPT_DEALLOC_STATS1_DM 0x00000002 /* nodat,cand,host,pest,meta, */ - /* other,dealloc */ -#define RPT_DEALLOC_STATS2_DM 0x00000004 /* hysterisis,grossct */ -/* - * definitions supporting the processing directive bit flags - */ -#define WAKE_DEALLOC_THREAD_DM 0x00000001 /* one shot - acted */ - /* on then cleared */ -#define MAX_OUT_ACCEL_HIST_FLAG_DM 0x00000002 /* one shot - acted */ - /* on then cleared */ -/* - * Default - Max - Min definitions - */ -#define MAX_DYN_LIST_DEFAULT 8 -#define MONITOR_DYNMEM_PROCESS_DEFAULT 0 -#define CACHE_AGING_CT_DEFAULT 3 -#define CACHE_AGING_SEC1_DEFAULT 10 -#define CACHE_AGING_SEC2_DEFAULT 5 -#define CACHE_AGING_SEC3_DEFAULT 1 -#define CACHE_AGING_PCNT1_DEFAULT 50 -#define CACHE_AGING_PCNT2_DEFAULT 25 -#define MAX_HOLDS_PCNT_DEFAULT 0 -#define PROCESS_DIRECTIVE_DEFAULT 0 - -#define CACHE_AGING_CT_MAX FINAL_AGING_DM /* 255 */ -#define CACHE_AGING_SEC1_MAX 255 /* arbitrary but easy to remember */ -#define CACHE_AGING_SEC2_MAX 255 /* arbitrary but easy to remember */ -#define CACHE_AGING_SEC3_MAX 255 /* arbitrary but easy to remember */ -#define CACHE_AGING_PCNT1_MAX 100 -#define CACHE_AGING_PCNT2_MAX 100 -#define MAX_HOLDS_PCNT_MAX 100 -/* - * dynmem global structure defn - */ -typedef struct _dm_process_vars { - kcondvar_t thread_dm_cv; - kmutex_t thread_dm_lock; - int sd_dealloc_flagx; /* gen'l purpose bit flag */ - int monitor_dynmem_process; /* bit flag indicating what to report */ - int max_dyn_list; /* max num of pages to allow list to */ - /* grow */ - /* cache aging parameter set */ - int cache_aging_ct1; /* hosts/pests - aging hits which */ - /* trigger dealloc */ - int cache_aging_ct2; /* metas - aging hits which */ - /* trigger dealloc not yet imple */ - int cache_aging_ct3; /* holds - aging hits which */ - /* trigger dealloc */ - int cache_aging_sec1; /* sleep time between cache list */ - /* exam - 100% to pcnt1 free */ - int cache_aging_sec2; /* sleep time between cache list */ - /* exam - pcnt1 to pcnt2 free */ - int cache_aging_sec3; /* sleep time between cache list */ - /* exam - pcnt2 to 0% free */ - int cache_aging_pcnt1; /* % free when to kick in accel */ - /* aging - sec2 */ - int cache_aging_pcnt2; /* % free when to kick in accel */ - /* aging - sec3 */ - int max_holds_pcnt; /* max % of cents to act as holdovers */ - /* stats - debug */ - int alloc_ct; /* gross count */ - int dealloc_ct; /* gross count */ - /* thread stats - debug and on the fly tuning of dealloc vars */ - int history; /* history flag */ - int nodatas; /* # cctls w/o data assigned */ - int notavail; /* # cctls w/data but in use */ - int candidates; /* # cand. for dealloc checking */ - int deallocs; /* # deallocs */ - int hosts; /* # hosts */ - int pests; /* # pests */ - int metas; /* # metas - sticky meata data */ - int holds; /* # holdovers - single page, fully */ - /* aged but not dealloc'd or hash */ - /* del'd */ - int others; /* # everybody else */ - int process_directive; /* processing directive bitmap flag */ - /* standard stats (no prefetch tallies here) */ - int read_hits; /* found in cache memory */ - int read_misses; /* not found in cache memory */ - int write_hits; /* found in cache memory */ - int write_misses; /* not found in cache memory */ - int write_thru; /* not bothering to put in cache mem */ - /* - * prefetch tracked by _sd_prefetch_valid_cnt and _sd_prefetch_busy_cnt - * might want different usage ? - */ - int prefetch_hits; - int prefetch_misses; -} _dm_process_vars_t; - -/* - * dynmem interface - */ -int sdbc_edit_xfer_process_vars_dm(_dm_process_vars_t *process_vars); - -/* - * Defines to hide the sd_addr_t structure - */ - -#define cc_data cc_addr.sa_virt - - -/* - * Defines to hide the synchronisation block - */ - -#define cc_blkcv cc_sync->_cc_blkcv -#define cc_lock cc_sync->_cc_lock - -/* - * This struct exists solely so that sd_info is able to - * extract this kind of data from sdbc without passing out - * the entire _sd_cctl_t which has lots of pointers which - * makes it impossible to deal with in 32bit program and an - * LP64 kernel. - */ - -typedef struct { - int ci_write; /* 0 == no wrt data */ - _sd_bitmap_t ci_dirty; /* dirty bits */ - _sd_bitmap_t ci_valid; /* valid bits */ - int ci_cd; /* the cd */ - nsc_off_t ci_dblk; /* the disk block number */ -} sdbc_info_t; - -typedef struct _sd_wr_cctl { - ss_resource_t wc_res; - ss_centry_info_t wc_centry_info; -} _sd_wr_cctl_t; - -typedef struct _sd_queue { - struct _sd_cctl sq_qhead; /* LRU queue head */ - kmutex_t sq_qlock; /* LRU spinlock */ - char sq_await; /* number blocked on lru sema */ - int sq_inq; /* Number of LRU entries in q */ - unsigned int sq_seq; /* sequence number for lru optim */ - unsigned int sq_req_stat; - unsigned int sq_noreq_stat; - - /* dmchain support */ - int sq_dmchain_cblocks; /* dmchain len in ccents */ -} _sd_queue_t; - - - -/* - * The net structure contains which memory net has been configured for - * cache, the amount of space allocated, the write control and fault - * tolerant blocks etc - */ - -typedef struct _sd_net { - unsigned short sn_psize; /* Page size of memory in this net */ - unsigned char sn_configured; /* is this network configured */ - size_t sn_csize; /* Cache size in bytes */ - uint_t sn_wsize; /* Write size in bytes */ - int sn_cpages; /* number of pages for Cache */ -}_sd_net_t; - -#endif /* _KERNEL || _KMEMUSER */ - - -/* - * Shared structure shared between cds and statistics - * - * NOTE - this structure is visible as an ioctl result. - * If anything changes here _sd_get_stats() and convert_stats() - * will need to be changed. - */ -typedef struct _sd_shared { - nsc_size_t sh_filesize; /* Filesize (in FBAs) */ - volatile uchar_t sh_alloc; /* Is this allocated? */ - volatile uchar_t sh_failed; /* Disk failure status (0 == ok, */ - /* 1 == i/o error, 2 == open failed ) */ - unsigned short sh_cd; /* the cache descriptor. (for stats) */ - int sh_cache_read; /* Number of FBAs read from cache */ - int sh_cache_write; /* Number of FBAs written to cache */ - int sh_disk_read; /* Number of FBAs read from disk */ - int sh_disk_write; /* Number of FBAs written to disk */ - volatile int sh_numdirty; /* Number of dirty blocks */ - volatile int sh_numio; /* Number of blocks on way to disk */ - volatile int sh_numfail; /* Number of blocks failed */ - int sh_flushloop; /* Loops delayed so far */ - int sh_flag; /* Flags visible to user programs */ - int sh_destaged; /* number of bytes destaged to disk */ - int sh_wrcancelns; /* number of writes to dirty blocks */ - char sh_filename[NSC_MAXPATH]; -} _sd_shared_t; - - -#if defined(_KERNEL) || defined(_KMEMUSER) - -/* - * Cache descriptor information. - */ -typedef struct _sd_cd_info { - int cd_desc; /* The cache descriptor */ - int cd_flag; /* Flag */ - nsc_fd_t *cd_rawfd; /* File descriptor for raw device */ - strategy_fn_t cd_strategy; /* Cached copy of strategy func */ - dev_t cd_crdev; /* The device this represents */ - nsc_iodev_t *cd_iodev; /* I/O device for callbacks */ - kmutex_t cd_lock; /* spinlock guarding this cd */ - volatile uchar_t cd_writer; /* Disk writer status */ - unsigned int cd_hint; /* Hints for this descriptor */ - ss_voldata_t *cd_global; /* RM information for this cd */ - struct _sd_cctl *cd_dirty_head, *cd_dirty_tail; /* dirty chain */ - struct _sd_cctl *cd_last_ent; /* last entry in dirty chain, for */ - int cd_lastchain; /* sequential optimization */ - struct _sd_cctl *cd_lastchain_ptr; /* last sequential chain */ - struct _sd_cctl *cd_io_head, *cd_io_tail; /* io in progress q */ - struct _sd_cctl *cd_fail_head; - struct _sd_shared *cd_info; /* shared info (filename, size) */ - char cd_failover; /* done nsc_reserve during failover */ - volatile char cd_recovering; /* cd is being recovered failover or */ - /* disk_online */ - char cd_write_inprogress; - struct sd_net_hnd *net_hnd; -} _sd_cd_info_t; - -typedef struct _sd_buf_hlist { - _sd_buf_handle_t hl_top; - kmutex_t hl_lock; - short hl_count; -} _sd_buf_hlist_t; - -#endif /* _KERNEL || _KMEMUSER */ - -/* - * Index into the following st_mem_sizes[] array - */ -#define _SD_LOCAL_MEM 0x00 /* type of memory to allocate */ -#define _SD_CACHE_MEM 0x01 -#define _SD_IOBUF_MEM 0x02 -#define _SD_HASH_MEM 0x03 -#define _SD_GLOBAL_MEM 0x04 -#define _SD_STATS_MEM 0x05 -#define _SD_MAX_MEM _SD_STATS_MEM + 1 - -/* maintain stat struct layout */ -#define NUM_WQ_PAD 4 -/* - * cache statistics structure - * - * NOTE - if anything changes here _sd_get_stats() and convert_stats() - * must be changed and _sd_stats32_t must also be synchronized. - * - */ -typedef struct _sd_stats { - int net_dirty; - int net_pending; - int net_free; - int st_count; /* number of opens for device */ - int st_loc_count; /* number of open devices */ - int st_rdhits; /* number of read hits */ - int st_rdmiss; /* number of read misses */ - int st_wrhits; /* number of write hits */ - int st_wrmiss; /* number of write misses */ - int st_blksize; /* cache block size (in bytes) */ - uint_t st_lru_blocks; - uint_t st_lru_noreq; - uint_t st_lru_req; - int st_wlru_inq; /* number of write blocks */ - int st_cachesize; /* cache size (in bytes) */ - int st_numblocks; /* # of cache blocks */ - int st_wrcancelns; /* # of write cancellations */ - int st_destaged; /* # of bytes destaged to disk */ - _sd_shared_t st_shared[1]; /* shared structures */ -} _sd_stats_t; - -typedef struct _sd_stats_32 { - int net_dirty; - int net_pending; - int net_free; - int st_count; /* number of opens for device */ - int st_loc_count; /* number of open devices */ - int st_rdhits; /* number of read hits */ - int st_rdmiss; /* number of read misses */ - int st_wrhits; /* number of write hits */ - int st_wrmiss; /* number of write misses */ - int st_blksize; /* cache block size (in bytes) */ - uint_t st_lru_blocks; - uint_t st_lru_noreq; - uint_t st_lru_req; - int st_wlru_inq; /* number of write blocks */ - int st_cachesize; /* cache size (in bytes) */ - int st_numblocks; /* # of cache blocks */ - int st_wrcancelns; /* # of write cancellations */ - int st_destaged; /* # of bytes destaged to disk */ - _sd_shared_t st_shared[1]; /* shared structures */ -} _sd_stats32_t; - - -#if defined(_KERNEL) || defined(_KMEMUSER) - -/* - * The map structure contains mapping between a mask and relevent information - * that would take some computation at runtime. - * Given a mask, what is the first LSB set (stpos) - * Given a mask, what are the consecutive number of LSB bits set (len) - * Given a mask, what would be a new mask if the consecutive LSB bits are reset - * Given a mask, how many ios would be needed to flush this block. - * Given a mask, how many buffer descriptor lists (bdls) would be needed - * on a read. - */ - -typedef struct _sd_map_info { - unsigned char mi_stpos; /* position of first LSB set */ - unsigned char mi_len; /* Length of consecutive LSB set */ - unsigned char mi_dirty_count; /* number of fragmented bits */ - unsigned char mi_io_count; /* number of bdls for a given mask */ - _sd_bitmap_t mi_mask; /* new mask with cons. LSB's reset */ -} _sd_map_info_t; - - -/* - * cc_inuse is set with atomic exchange instruction - * when clearing, must check for waiters. - * sd_serialize prohibits speculative reads - */ -#define CENTRY_INUSE(centry) ((centry)->cc_inuse) -#define SET_CENTRY_INUSE(centry) \ - ((centry)->cc_inuse || atomic_swap(CC_INUSE, &(centry)->cc_inuse)) -#define CLEAR_CENTRY_INUSE(centry) { \ - (centry)->cc_inuse = 0; \ - sd_serialize(); \ - if ((centry)->cc_await_use) { \ - mutex_enter(&(centry)->cc_lock); \ - cv_broadcast(&(centry)->cc_blkcv); \ - mutex_exit(&(centry)->cc_lock); \ - } \ -} - - -/* - * cc_pageio is set with atomic exchange instruction - * when clearing, must check for waiters. - * sd_serialize prohibits speculative reads - */ -#define CENTRY_PAGEIO(centry) ((centry)->cc_pageio) -#define SET_CENTRY_PAGEIO(centry) \ - ((centry)->cc_pageio || atomic_swap(CC_PAGEIO, &(centry)->cc_pageio)) -#define WAIT_CENTRY_PAGEIO(centry, stat) { \ - while (SET_CENTRY_PAGEIO(centry)) { \ - (stat)++; \ - _sd_cc_wait(CENTRY_CD(centry), CENTRY_BLK(centry), \ - centry, CC_PAGEIO); \ - } \ -} -#define CLEAR_CENTRY_PAGEIO(centry) { \ - (centry)->cc_pageio = 0; \ - sd_serialize(); \ - if ((centry)->cc_await_page) { \ - mutex_enter(&(centry)->cc_lock); \ - cv_broadcast(&(centry)->cc_blkcv); \ - mutex_exit(&(centry)->cc_lock); \ - } \ -} - - -#define CENTRY_DIRTY_PENDING(centry) ((centry)->cc_flag & CC_PEND_DIRTY) -#define CENTRY_PINNED(centry) ((centry)->cc_flag & CC_PINNED) -#define CENTRY_PINNABLE(centry) ((centry)->cc_flag & CC_PINNABLE) -#define CENTRY_QHEAD(centry) ((centry)->cc_flag & CC_QHEAD) - -#define CENTRY_DIRTY(centry) ((centry)->cc_dirty) -#define CENTRY_CD(centry) ((centry)->cc_head.hh_cd) -#define CENTRY_BLK(centry) ((centry)->cc_head.hh_blk_num) -#define CENTRY_IO_INPROGRESS(centry) ((centry)->cc_iostatus) - -#define HANDLE_CD(handle) ((handle)->bh_cd) - -#endif /* _KERNEL || _KMEMUSER */ - -#if defined(_KERNEL) - -#define CENTRY_SET_FTPOS(centry) \ - (centry)->cc_write->sc_cd = CENTRY_CD(centry), \ - (centry)->cc_write->sc_fpos = CENTRY_BLK(centry) - -#define CC_CD_BLK_MATCH(cd, blk, centry) \ - (((centry)->cc_head.hh_cd == cd) && \ - ((centry)->cc_head.hh_blk_num == blk)) - - -#define _SD_ZEROADDR ((ucaddr_t)(_sd_net_config.sn_zeroaddr)) - - -#define ASSERT_LEN(len) \ - if (len > _SD_MAX_FBAS) {\ - cmn_err(CE_WARN, \ - "!sdbc(ASSERT_LEN) fba exceeds limits. fba_len %" \ - NSC_SZFMT ". Max %d", len, _SD_MAX_FBAS); \ - return (EIO); } - -#define ASSERT_IO_SIZE(fba_num, fba_len, cd) \ - if ((fba_num + fba_len) > \ - (_sd_cache_files[(cd)].cd_info->sh_filesize)) { \ - cmn_err(CE_WARN, \ - "!sdbc(ASSERT_IO_SIZE) io beyond end of file." \ - " fpos %" NSC_SZFMT " len %" NSC_SZFMT " file size 0 - %" \ - NSC_SZFMT "\n", fba_num, fba_len, \ - (_sd_cache_files[(cd)].cd_info->sh_filesize)); \ - return (EIO); \ - } - - -#define ASSERT_HANDLE_LIMITS(m_h1, m_fpos, m_flen) \ - if (((m_fpos) < (m_h1)->bh_fba_pos) || \ - (((m_fpos) + (m_flen)) > \ - ((m_h1)->bh_fba_pos + (m_h1)->bh_fba_len))) { \ - cmn_err(CE_WARN, \ - "!sdbc(ASSERT_HANDLE_LIMITS) operation out of bounds" \ - " cd %x want %" NSC_SZFMT " to %" NSC_SZFMT ". Handle %" \ - NSC_SZFMT " to %" NSC_SZFMT, HANDLE_CD(m_h1), m_fpos,\ - m_flen, (m_h1)->bh_fba_pos, (m_h1)->bh_fba_len); \ - return (EINVAL); \ - } - - -#define _SD_HANDLE_ACTIVE(handle) ((handle)->bh_flag & NSC_HACTIVE) - -#define _SD_CD_HINTS(cd) (_sd_cache_files[(cd)].cd_hint) -#define _SD_NODE_HINTS (_sd_node_hint) - -#define _SD_SETUP_HANDLE(hndl, cd, fpos, flen, flag) { \ - hndl->bh_cd = cd; \ - hndl->bh_vec = hndl->bh_bufvec; \ - hndl->bh_fba_pos = fpos; \ - hndl->bh_fba_len = flen; \ - hndl->bh_busy_thread = nsc_threadp(); \ - if (cd == _CD_NOHASH) \ - hndl->bh_flag |= \ - (flag | _SD_NODE_HINTS | NSC_HACTIVE); \ - else \ - hndl->bh_flag |= \ - (flag | _SD_CD_HINTS(cd) | \ - _SD_NODE_HINTS | NSC_HACTIVE); \ - } - -#define _SD_NOT_WRTHRU(handle) (((handle)->bh_flag & _SD_WRTHRU_MASK) == 0) -#define _SD_IS_WRTHRU(handle) ((handle)->bh_flag & _SD_WRTHRU_MASK) - -#define FILE_OPENED(cd) (((cd) >= 0) && ((cd) < (sdbc_max_devs)) && \ - (_sd_cache_files[(cd)].cd_info != NULL) && \ - (_sd_cache_files[(cd)].cd_info->sh_alloc \ - & CD_ALLOCATED)) - -/* - * bitmap stuff - */ - -#define SDBC_LOOKUP_STPOS(mask) (_sd_lookup_map[(mask)].mi_stpos) -#define SDBC_LOOKUP_LEN(mask) (_sd_lookup_map[(mask)].mi_len) -#define SDBC_LOOKUP_MASK(mask) (_sd_lookup_map[(mask)].mi_mask) -#define SDBC_LOOKUP_DTCOUNT(mask) (_sd_lookup_map[(mask)].mi_dirty_count) -#define SDBC_LOOKUP_IOCOUNT(mask) (_sd_lookup_map[(mask)].mi_io_count) -#define SDBC_LOOKUP_MODIFY(mask) (mask &= ~(_sd_lookup_map[(mask)].mi_mask)) - -#define SDBC_IS_FRAGMENTED(bmap) (!_sd_contig_bmap[(bmap)]) -#define SDBC_IS_CONTIGUOUS(bmap) (_sd_contig_bmap[(bmap)]) - -#endif /* _KERNEL */ - -#if defined(_KERNEL) || defined(_KMEMUSER) - -#define SDBC_GET_BITS(fba_off, fba_len) \ - (_fba_bits[(fba_len)] << (fba_off)) - -#define SDBC_SET_VALID_BITS(fba_off, fba_len, cc_entry) \ - (cc_entry)->cc_valid |= SDBC_GET_BITS(fba_off, fba_len) - -#define SDBC_SET_DIRTY(fba_off, fba_len, cc_entry) { \ - _sd_bitmap_t dirty, newdb = SDBC_GET_BITS(fba_off, fba_len); \ - ss_centry_info_t *gl = (cc_entry)->cc_write; \ - (cc_entry)->cc_valid |= newdb; \ - dirty = ((cc_entry)->cc_dirty |= newdb); \ - gl->sc_dirty = dirty; \ - gl->sc_flag = (int)(cc_entry)->cc_flag; \ - SSOP_SETCENTRY(sdbc_safestore, gl); } - -#define SDBC_SET_TOFLUSH(fba_off, fba_len, cc_entry) { \ - _sd_bitmap_t dirty, newdb = SDBC_GET_BITS(fba_off, fba_len); \ - ss_centry_info_t *gl = (cc_entry)->cc_write; \ - (cc_entry)->cc_toflush |= newdb; \ - (cc_entry)->cc_valid |= newdb; \ - dirty = (cc_entry)->cc_toflush | (cc_entry)->cc_dirty; \ - gl->sc_dirty = dirty; \ - SSOP_SETCENTRY(sdbc_safestore, gl); } - -#define SDBC_VALID_BITS(fba_off, fba_len, cc_entry) \ - ((((cc_entry)->cc_valid) & (SDBC_GET_BITS(fba_off, fba_len))) \ - == (SDBC_GET_BITS(fba_off, fba_len))) - - -#define SDBC_DIRTY_NEIGHBORS(last, next) \ - ((SDBC_IS_CONTIGUOUS((last)->cc_dirty)) && \ - (SDBC_IS_CONTIGUOUS((next)->cc_dirty)) && \ -(((last)->cc_dirty & (1 << (BLK_FBAS - 1))) && ((next)->cc_dirty & 0x01))) - - -#define FULLY_VALID(cc_entry) ((cc_entry)->cc_valid == BLK_FBA_BITS) -#define SET_FULLY_VALID(cc_entry) \ - ((cc_entry)->cc_valid = BLK_FBA_BITS) - -#define FULLY_DIRTY(cc_entry) ((cc_entry)->cc_dirty == BLK_FBA_BITS) - -#define _SD_BIT_ISSET(bmap, bit) ((bmap & (1 << bit)) ? 1 : 0) -#define _SD_BMAP_ISFULL(bmap) (bmap == BLK_FBA_BITS) - -#endif /* _KERNEL || _KMEMUSER */ - -#if defined(_KERNEL) - -#if !defined(_SD_NOSTATS) -#define CACHE_FBA_READ(cd, blks) \ - if (((cd) >= 0) && ((cd) < sdbc_max_devs))\ - _sd_cache_stats->st_shared[(cd)].sh_cache_read += (blks) -#define DISK_FBA_READ(cd, blks) \ - if (((cd) >= 0) && ((cd) < sdbc_max_devs))\ - _sd_cache_stats->st_shared[(cd)].sh_disk_read += (blks) -#define CACHE_FBA_WRITE(cd, blks) \ - if (((cd) >= 0) && ((cd) < sdbc_max_devs))\ - _sd_cache_stats->st_shared[(cd)].sh_cache_write += (blks) -#define DISK_FBA_WRITE(cd, blks) \ - if (((cd) >= 0) && ((cd) < sdbc_max_devs))\ - _sd_cache_stats->st_shared[(cd)].sh_disk_write += (blks) -#define CACHE_READ_HIT _sd_cache_stats->st_rdhits++ -#define CACHE_READ_MISS _sd_cache_stats->st_rdmiss++ -#define CACHE_WRITE_HIT _sd_cache_stats->st_wrhits++ -#define CACHE_WRITE_MISS _sd_cache_stats->st_wrmiss++ - -#define CACHE_WRITE_CANCELLATION(cd) {\ - if ((cd) < sdbc_max_devs)\ - _sd_cache_stats->st_shared[(cd)].sh_wrcancelns++;\ - _sd_cache_stats->st_wrcancelns++;\ -} - -#define WRITE_DESTAGED(cd, bytes) {\ - if (((cd) >= 0) && ((cd) < sdbc_max_devs))\ - _sd_cache_stats->st_shared[(cd)].sh_destaged += (bytes);\ - _sd_cache_stats->st_destaged += (bytes);\ -} - -#define FBA_READ_IO_KSTATS(cd, bytes) {\ - if (((cd) >= 0) && ((cd) < sdbc_max_devs) && sdbc_cd_io_kstats[(cd)]) {\ - KSTAT_IO_PTR(sdbc_cd_io_kstats[(cd)])->reads++;\ - KSTAT_IO_PTR(sdbc_cd_io_kstats[(cd)])->nread += (bytes);\ - }\ - if (sdbc_global_io_kstat) {\ - KSTAT_IO_PTR(sdbc_global_io_kstat)->reads++;\ - KSTAT_IO_PTR(sdbc_global_io_kstat)->nread += (bytes);\ - }\ -} - -#define FBA_WRITE_IO_KSTATS(cd, bytes) {\ - if (((cd) >= 0) && ((cd) < sdbc_max_devs) && sdbc_cd_io_kstats[(cd)]) {\ - KSTAT_IO_PTR(sdbc_cd_io_kstats[(cd)])->writes++;\ - KSTAT_IO_PTR(sdbc_cd_io_kstats[(cd)])->nwritten += (bytes);\ - }\ - if (sdbc_global_io_kstat) {\ - KSTAT_IO_PTR(sdbc_global_io_kstat)->writes++;\ - KSTAT_IO_PTR(sdbc_global_io_kstat)->nwritten += (bytes);\ - }\ -} - -/* start timer measuring amount of time spent in the cache */ -#define KSTAT_RUNQ_ENTER(cd) {\ - if (((cd) >= 0) && ((cd) < sdbc_max_devs) && \ - sdbc_cd_io_kstats[(cd)] && sdbc_cd_io_kstats_mutexes) {\ - mutex_enter(sdbc_cd_io_kstats[(cd)]->ks_lock);\ - kstat_runq_enter(KSTAT_IO_PTR(sdbc_cd_io_kstats[(cd)]));\ - mutex_exit(sdbc_cd_io_kstats[(cd)]->ks_lock);\ - }\ - if (sdbc_global_io_kstat) {\ - mutex_enter(sdbc_global_io_kstat->ks_lock);\ - kstat_runq_enter(KSTAT_IO_PTR(sdbc_global_io_kstat));\ - mutex_exit(sdbc_global_io_kstat->ks_lock);\ - }\ -} - -/* stop timer measuring amount of time spent in the cache */ -#define KSTAT_RUNQ_EXIT(cd) {\ - if (((cd) >= 0) && ((cd) < sdbc_max_devs) && \ - sdbc_cd_io_kstats[(cd)] && sdbc_cd_io_kstats_mutexes) {\ - mutex_enter(sdbc_cd_io_kstats[(cd)]->ks_lock);\ - kstat_runq_exit(KSTAT_IO_PTR(sdbc_cd_io_kstats[(cd)]));\ - mutex_exit(sdbc_cd_io_kstats[(cd)]->ks_lock);\ - }\ - if (sdbc_global_io_kstat) {\ - mutex_enter(sdbc_global_io_kstat->ks_lock);\ - kstat_runq_exit(KSTAT_IO_PTR(sdbc_global_io_kstat));\ - mutex_exit(sdbc_global_io_kstat->ks_lock);\ - }\ -} - -#else -#define CACHE_FBA_READ(cd, blks) -#define DISK_FBA_READ(cd, blks) -#define CACHE_FBA_WRITE(cd, blks) -#define DISK_FBA_WRITE(cd, blks) -#define CACHE_READ_HIT -#define CACHE_READ_MISS -#define CACHE_WRITE_HIT -#define CACHE_WRITE_MISS -#define CACHE_WRITE_CANCELLATION(cd) -#define WRITE_DESTAGED(cd, bytes) -#endif - -#endif /* _KERNEL */ - -/* defines for sh_alloc */ - -#define CD_ALLOC_IN_PROGRESS 0x0001 -#define CD_ALLOCATED 0x0002 -#define CD_CLOSE_IN_PROGRESS 0x0010 - -/* defines for sh_flag */ - -#define CD_ATTACHED 0x0001 - -#ifdef _KERNEL - -typedef void (*sdbc_ea_fn_t) (blind_t, nsc_off_t, nsc_size_t, int); - -#define _SD_DISCONNECT_CALLBACK(hndl) \ - if ((hndl)->bh_disconnect_cb) { \ - SDTRACE(SDF_DISCONNECT, (hndl)->bh_cd, (hndl)->bh_fba_len, \ - (hndl)->bh_fba_pos, (hndl)->bh_flag, 0); \ - ((*((hndl)->bh_disconnect_cb))(hndl)); \ - } -#define _SD_READ_CALLBACK(hndl) \ - if ((hndl)->bh_read_cb) \ - ((*((hndl)->bh_read_cb))(hndl)); \ - else cmn_err(CE_WARN, \ - "!sdbc(_SD_READ_CALLBACK) not registered. io lost"); -#define _SD_WRITE_CALLBACK(hndl) \ - if ((hndl)->bh_write_cb) \ - ((*((hndl)->bh_write_cb))(hndl)); \ - else cmn_err(CE_WARN, \ - "!sdbc(_SD_WRITE_CALLBACK) not registered. io lost"); - -#endif /* _KERNEL */ - - -#if defined(_SD_LRU_OPTIMIZE) -/* - * Do not requeue if we fall into the tail 25% of the lru - */ -#define LRU_REQ_LIMIT(q) (q->sq_inq >> 2) - -#define _sd_lru_reinsert(q, ent) \ - (((q->sq_seq - ent->cc_seq) > LRU_REQ_LIMIT(q)) ?\ - 1 : ((q->sq_noreq_stat)++, 0)) -#else -#define _sd_lru_reinsert(ent) 1 -#endif - -#if defined(_KERNEL) -#define SD_WR_NUMIO 100 -#define SD_DCON_THRESH 0x10000 /* Disconnect if io len greater than 64 */ - -/* - * These defines are the hardwired values after sd_config_param was - * zapped. Ought to remove the use of these entirely .... - */ - -#define _SD_CD_WRITER(cd) ((_sd_cache_files[(cd)].cd_info->sh_numdirty>\ - SD_WR_NUMIO) ? \ - cd_writer(cd) : 0) -#define _SD_FORCE_DISCONNECT(len) (SD_DCON_THRESH < FBA_SIZE(len)) - -/* -------------------------------- END sd_config_param defines ---------- */ - -#define _SD_CD_WBLK_USED(cd) (_sd_cache_stats->st_shared[(cd)].sh_numio +\ - _sd_cache_stats->st_shared[(cd)].sh_numdirty) - -#define _SD_CD_ALL_WRITES(cd) (_sd_cache_stats->st_shared[(cd)].sh_numio +\ - _sd_cache_stats->st_shared[(cd)].sh_numdirty+\ - _sd_cache_stats->st_shared[(cd)].sh_numfail) - - - -/* - * ncall usage - */ -#define SD_ENABLE (NCALL_SDBC + 0) -#define SD_DISABLE (NCALL_SDBC + 1) -#define SD_DUAL_WRITE (NCALL_SDBC + 2) -#define SD_DUAL_READ (NCALL_SDBC + 3) -#define SD_SET_CD (NCALL_SDBC + 4) -#define SD_GETSIZE (NCALL_SDBC + 5) -#define SD_DUAL_OPEN (NCALL_SDBC + 6) -#define SD_REMOTE_FLUSH (NCALL_SDBC + 7) -#define SD_SGREMOTE_FLUSH (NCALL_SDBC + 8) -#define SD_DISK_IO (NCALL_SDBC + 9) -#define SD_GET_BMAP (NCALL_SDBC + 10) -#define SD_CD_DISCARD (NCALL_SDBC + 11) -#define SD_PING (NCALL_SDBC + 12) -#define SD_DC_MAIN_LOOP (NCALL_SDBC + 13) -#define SD_DATA (NCALL_SDBC + 14) -#define SD_BDATA (NCALL_SDBC + 15) -#define SD_UPDATE (NCALL_SDBC + 16) -#define SD_GET_SYSID (NCALL_SDBC + 17) - -#ifdef lint -#include <sys/nsctl/nsctl.h> -#define LINTUSED(x) (void)(x)++ -#else -#define LINTUSED(x) -#endif - - -extern int BLK_FBAS; -extern _sd_bitmap_t BLK_FBA_BITS; -extern _sd_bitmap_t _fba_bits[]; -extern _sd_cctl_t *_sd_cctl[]; -extern _sd_cd_info_t *_sd_cache_files; -extern _sd_hash_table_t *_sd_htable; -extern _sd_map_info_t _sd_lookup_map[]; -extern _sd_net_t _sd_net_config; -extern _sd_queue_t _sd_lru_q; -extern _sd_stats_t *_sd_cache_stats; -extern char _sd_contig_bmap[]; -extern int CACHE_BLOCK_SIZE; -extern int CBLOCKS; -extern int _sd_cctl_groupsz; -extern int sdbc_static_cache; -extern kmutex_t _sd_cache_lock; -extern nsc_def_t _sd_sdbc_def[]; -extern nsc_io_t *sdbc_io; -extern nsc_mem_t *sdbc_iobuf_mem, *sdbc_hash_mem; -extern uint_t _sd_node_hint; -extern int _sd_minidsp; -extern krwlock_t sdbc_queue_lock; -extern safestore_ops_t *sdbc_safestore; -extern ss_common_config_t safestore_config; -extern ss_voldata_t *_sdbc_gl_file_info; - -extern int _sdbc_cache_configure(int cblocks, spcs_s_info_t kstatus); -extern void _sdbc_cache_deconfigure(void); -extern void _sd_requeue(_sd_cctl_t *centry); -extern void _sd_requeue_head(_sd_cctl_t *centry); -extern int _sd_open(char *filename, int flag); -extern int _sd_open_cd(char *filename, const int cd, const int flag); -extern int _sd_close(int cd); -extern int _sdbc_remote_store_pinned(int cd); -extern int _sdbc_io_attach_cd(blind_t xcd); -extern int _sdbc_io_detach_cd(blind_t xcd); -extern int _sd_get_pinned(blind_t cd); -extern void _sd_cc_copy(_sd_cctl_t *cc_real, _sd_cctl_t *cc_shadow); -extern _sd_buf_handle_t *_sd_allocate_buf(int cd, nsc_off_t fba_pos, - nsc_size_t fba_len, int flag, int *sts); -extern void _sd_cc_wait(int cd, nsc_off_t cblk, _sd_cctl_t *centry, int flag); -extern int _sd_alloc_buf(blind_t xcd, nsc_off_t fba_pos, nsc_size_t fba_len, - int flag, _sd_buf_handle_t **handle_p); -extern int _sd_free_buf(_sd_buf_handle_t *handle); -extern _sd_cctl_t *_sd_centry_alloc(int, int, int *, int, int); -extern int _sd_centry_setup_dm(_sd_cctl_t *, int, int); -extern void _sdbc_dealloc_deconfigure_dm(void); -extern int _sdbc_dealloc_configure_dm(void); -extern _sd_cctl_t *_sd_shadow_centry(_sd_cctl_t *, _sd_cctl_t *, int, int, int); -extern void _sd_centry_release(_sd_cctl_t *centry); -extern int _sd_alloc_write(_sd_cctl_t *centry, int *stall); -extern int _sd_read(_sd_buf_handle_t *handle, nsc_off_t fba_pos, - nsc_size_t fba_len, int flag); -extern void _sd_read_complete(_sd_buf_handle_t *handle, nsc_off_t fba_pos, - nsc_size_t fba_len, int error); -extern int _sd_write(_sd_buf_handle_t *handle, nsc_off_t fba_pos, - nsc_size_t fba_len, int flag); -extern int _sd_zero(_sd_buf_handle_t *handle, nsc_off_t fba_pos, - nsc_size_t fba_len, int flag); -extern int _sd_copy(_sd_buf_handle_t *handle1, _sd_buf_handle_t *handle2, - nsc_off_t fba_pos1, nsc_off_t fba_pos2, nsc_size_t fba_len); -extern void _sd_enqueue_dirty(int cd, _sd_cctl_t *chain, _sd_cctl_t *cc_last, - int numq); -extern void _sd_enqueue_dirty_chain(int cd, _sd_cctl_t *chain_first, - _sd_cctl_t *chain_last, int numq); -extern int _sd_get_stats(_sd_stats_t *uptr, int convert_32); -extern int _sd_set_hint(int cd, uint_t hint); -extern int _sd_clear_hint(int cd, uint_t hint); -extern int _sd_get_cd_hint(int cd, uint_t *hint); -extern int _sd_set_node_hint(uint_t hint); -extern int _sd_clear_node_hint(uint_t hint); -extern int _sd_get_node_hint(uint_t *hint); -extern int _sd_get_partsize(blind_t cd, nsc_size_t *ptr); -extern int _sd_get_maxfbas(blind_t cd, int flag, nsc_size_t *ptr); -extern int _sd_discard_pinned(blind_t cd, nsc_off_t fba_pos, - nsc_size_t fba_len); -extern void _sdbc_handles_unload(void); -extern int _sdbc_handles_load(void); -extern int _sdbc_handles_configure(); -extern void _sdbc_handles_deconfigure(void); -extern _sd_buf_handle_t *_sd_alloc_handle(sdbc_callback_fn_t d_cb, - sdbc_callback_fn_t r_cb, sdbc_callback_fn_t w_cb); -extern int _sd_free_handle(_sd_buf_handle_t *handle); -extern void _sd_init_contig_bmap(void); -extern void _sd_init_lookup_map(void); -extern int sd_get_file_info_size(void *uaddrp); -extern int sd_get_file_info_data(char *uaddrp); -extern int sd_get_glmul_sizes(int *uaddrp); -extern int sd_get_glmul_info(char *uaddrp); -extern _sd_cctl_t *sdbc_centry_alloc(int, nsc_off_t, nsc_size_t, int *, - sdbc_allocbuf_t *, int); -extern _sd_cctl_t *sdbc_centry_alloc_blks(int, nsc_off_t, nsc_size_t, int); -extern int _sdbc_ft_hold_io; -extern kcondvar_t _sdbc_ft_hold_io_cv; -extern kmutex_t _sdbc_ft_hold_io_lk; - -#ifdef DEBUG -/* for testing only */ -extern int _sdbc_flush_flag; /* inhibit flush for testing */ -extern int _sdbc_clear_ioerr(int); -extern int _sdbc_inject_ioerr(int, int, int); -extern void _sdbc_ioj_set_dev(int, dev_t); -extern void _sdbc_ioj_load(); -extern void _sdbc_ioj_unload(); -#endif - -#endif /* _KERNEL */ - -#ifdef __cplusplus -} -#endif - -#endif /* _SD_BCACHE_H */ diff --git a/usr/src/uts/common/avs/ns/sdbc/sd_bio.c b/usr/src/uts/common/avs/ns/sdbc/sd_bio.c deleted file mode 100644 index a82a19f6f7..0000000000 --- a/usr/src/uts/common/avs/ns/sdbc/sd_bio.c +++ /dev/null @@ -1,1305 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ -/* - * Copyright (c) 2017 by Delphix. All rights reserved. - */ - -#include <sys/types.h> -#include <sys/param.h> -#include <sys/ksynch.h> -#include <sys/kmem.h> -#include <sys/stat.h> -#include <sys/buf.h> -#include <sys/open.h> -#include <sys/conf.h> -#include <sys/file.h> -#include <sys/cmn_err.h> -#include <sys/errno.h> -#include <sys/ddi.h> - -#include <sys/nsc_thread.h> -#include <sys/nsctl/nsctl.h> - -#include <sys/sdt.h> /* dtrace is S10 or later */ - -#include <vm/seg_kmem.h> -#include "sd_bcache.h" -#include "sd_trace.h" -#include "sd_io.h" -#include "sd_iob.h" -#include "sd_misc.h" -#if defined(_SD_DEBUG) /* simulate disk errors */ -#include "sd_tdaemon.h" -#endif - -#ifndef DS_DDICT -extern uintptr_t kobj_getsymvalue(char *, int); /* DDI violation */ -#endif - -#define DO_PAGE_LIST sdbc_do_page /* enable pagelist code */ - -int sdbc_do_page = 0; - -#define SGIO_MAX 254 - -static kmutex_t sdbc_bio_mutex; -static int sdbc_bio_count; - -static unsigned long page_size, page_offset_mask; - -#ifdef _SD_BIO_STATS -static __start_io_count = 0; -#endif /* _SD_BIO_STATS */ - -/* - * Forward declare all statics that are used before defined to enforce - * parameter checking. Also forward-declare all functions that have 64-bit - * argument types to enforce correct parameter checking. - * - * Some (if not all) of these could be removed if the code were reordered - */ - -static int _sd_sync_ea(struct buf *, iob_hook_t *); -static int _sd_async_ea(struct buf *, iob_hook_t *); -static void _sd_pack_pages(struct buf *bp, struct buf *list, sd_addr_t *addr, - nsc_off_t offset, nsc_size_t size); -static void _sd_pack_pages_nopageio(struct buf *bp, struct buf *list, - sd_addr_t *addr, nsc_off_t offset, nsc_size_t size); -static void _sd_setup_iob(struct buf *bp, dev_t dev, nsc_off_t pos, int flag); - -#ifdef DEBUG -static int _sdbc_ioj_lookup(dev_t); -static void _sdbc_ioj_clear_err(int); -#endif - -static int SD_WRITES_TOT = 0; -static int SD_WRITES_LEN[100]; - -_sd_buf_list_t _sd_buflist; - -/* - * _sd_add_vm_to_bp_plist - add the page corresponding to the - * virtual address "v" (kernel virtaddr) to the pagelist linked - * to buffer "bp". - * - * The virtual address "v" is "known" to be allocated by segkmem - * and we can look up the page by using the segkmem vnode kvp. - * This violates the ddi/ddk but is workable for now anyway. - * - * - */ -static void -_sd_add_vm_to_bp_plist(struct buf *bp, unsigned char *v) -{ - page_t *pp; - page_t *one_pg = NULL; - - pp = page_find(&kvp, (u_offset_t)((uintptr_t)v & ~page_offset_mask)); - if (!pp) { - cmn_err(CE_PANIC, - "_sd_add_vm_to_bp_plist: couldn't find page for 0x%p", - (void *)v); - } - - page_add(&one_pg, pp); - page_list_concat(&(bp->b_pages), &one_pg); - -} - -#ifdef _SD_BIO_STATS -static int -_sd_count_pages(page_t *pp) -{ - int cnt = 0; - page_t *pp1; - if (pp == NULL) - return (cnt); - - for (cnt = 1, pp1 = pp->p_next; pp != pp1; cnt++, pp1 = pp1->p_next) - ; - - return (cnt); -} -#endif /* _SD_BIO_STATS */ - - -/* - * _sdbc_iobuf_load - load time initialization of io bufs structures. - * - * - * RETURNS: - * 0 - success. - * -1 - failure. - * - * USAGE: - * This routine initializes load time buf structures. - * Should be called when the cache is loaded. - */ - -int -_sdbc_iobuf_load(void) -{ - mutex_init(&sdbc_bio_mutex, NULL, MUTEX_DRIVER, NULL); - - /* - * HACK add a ref to kvp, to prevent VN_RELE on it from panicing - * the system - */ - VN_HOLD(&kvp); - - return (0); -} - -/* - * _sdbc_iobuf_unload - unload time cleanup of io buf structures. - * - * - * USAGE: - * This routine removes load time buf structures. - * Should be called when the cache is unloaded. - */ -void -_sdbc_iobuf_unload(void) -{ - mutex_enter(&kvp.v_lock); - ASSERT(kvp.v_count == 1); - VN_RELE_LOCKED(&kvp); - mutex_exit(&kvp.v_lock); - - mutex_destroy(&sdbc_bio_mutex); - bzero(&_sd_buflist, sizeof (_sd_buf_list_t)); -} - -/* - * _sdbc_iobuf_configure - configure a list of io bufs for later use. - * - * ARGUMENTS: - * num_bufs - number of buffers. (from the configuration file) - * - * RETURNS: - * 0 - success. - * <0 - failure. - * - * USAGE: - * This routine configures the buf structures for io. - * Should be called when the cache is configured. - */ - -int -_sdbc_iobuf_configure(int num) -{ - int i; - _sd_buf_list_t *buflist; - iob_hook_t *hook; - char symbol_name[32]; - - if (!num || (num > _SD_DEFAULT_IOBUFS)) - num = _SD_DEFAULT_IOBUFS; - - if ((_sd_buflist.hooks = (iob_hook_t *)nsc_kmem_zalloc( - num * sizeof (iob_hook_t), KM_SLEEP, sdbc_iobuf_mem)) == NULL) { - return (-1); - } - - buflist = &_sd_buflist; - buflist->bl_init_count = num; - buflist->bl_hooks_avail = num; - buflist->bl_hook_lowmark = num; - hook = buflist->hooks; - buflist->hook_head = hook; - for (i = 0; i < num; i++, hook++) { - cv_init(&hook->wait, NULL, CV_DRIVER, NULL); - (void) sprintf(symbol_name, "sd_iob_dcb%d", i); - hook->iob_drv_iodone = (dcb_t)kobj_getsymvalue(symbol_name, 0); - if (!hook->iob_drv_iodone) { - return (-2); - } - hook->next_hook = hook+1; - } - (hook-1)->next_hook = NULL; - - for (i = 0; i < MAX_HOOK_LOCKS; i++) - mutex_init(&_sd_buflist.hook_locks[i], NULL, MUTEX_DRIVER, - NULL); - - cv_init(&_sd_buflist.hook_wait, NULL, CV_DRIVER, NULL); - _sd_buflist.hook_waiters = 0; - - sdbc_bio_count = 0; - SD_WRITES_TOT = 0; - bzero(SD_WRITES_LEN, sizeof (SD_WRITES_LEN)); - - /* pagelist i/o pages must be done in cache_init */ - - page_size = ptob(1); - page_offset_mask = page_size - 1; - - return (0); -} - -/* - * _sdbc_iobuf_deconfigure - release all memory allocated for buf list - * - * ARGUMENTS: - * None. - * - * RETURNS: - * 0 - */ -void -_sdbc_iobuf_deconfigure(void) -{ - ushort_t i; - - if (_sd_buflist.hooks) { - for (i = 0; i < _sd_buflist.bl_init_count; i ++) { - cv_destroy(&_sd_buflist.hooks[i].wait); - } - cv_destroy(&_sd_buflist.hook_wait); - nsc_kmem_free(_sd_buflist.hooks, - _sd_buflist.bl_init_count * sizeof (iob_hook_t)); - for (i = 0; i < MAX_HOOK_LOCKS; i ++) { - mutex_destroy(&_sd_buflist.hook_locks[i]); - } - } - - _sd_buflist.hooks = NULL; - -#ifdef DEBUG - { - void _sdbc_ioj_clear_err(int); - _sdbc_ioj_clear_err(-1); /* clear any injected i/o errors */ - _sdbc_ioj_set_dev(-1, 0); /* clear dev entries */ - } -#endif - -} - -/* - * _sd_pending_iobuf() - * - * Return the number of I/O bufs outstanding - */ -int -_sd_pending_iobuf(void) -{ - return (sdbc_bio_count); -} - -/* - * _sd_get_iobuf - allocate a buf. - * - * ARGUMENTS: - * None. - * - * RETURNS: - * NULL - failure. - * buf ptr otherwise. - * - * ASSUMPTIONS - process could block if we run out. - * - */ -/*ARGSUSED*/ -static struct buf * -_sd_get_iobuf(int num_bdl) -{ - struct buf *bp; - - /* Get a buffer, ready for page list i/o */ - - if (DO_PAGE_LIST) - bp = pageio_setup(NULL, 0, &kvp, 0); - else - bp = getrbuf(KM_SLEEP); - - if (bp == NULL) - return (NULL); - mutex_enter(&sdbc_bio_mutex); - sdbc_bio_count++; - mutex_exit(&sdbc_bio_mutex); - return (bp); -} - -/* - * _sd_put_iobuf - put a buf back in the freelist. - * - * ARGUMENTS: - * bp - buf pointer. - * - * RETURNS: - * 0 - * - */ -static void -_sd_put_iobuf(struct buf *bp) -{ - mutex_enter(&sdbc_bio_mutex); - sdbc_bio_count--; - mutex_exit(&sdbc_bio_mutex); - if (DO_PAGE_LIST) - pageio_done(bp); - else - freerbuf(bp); -} - - -/* use for ORing only */ -#define B_KERNBUF 0 - -static void -_sd_setup_iob(struct buf *bp, dev_t dev, nsc_off_t pos, int flag) -{ - bp->b_pages = NULL; - bp->b_un.b_addr = 0; - - flag &= (B_READ | B_WRITE); - - /* - * if pagelist i/o, _sd_get_iobuf()/pageio_setup() has already - * set b_flags to - * B_KERNBUF | B_PAGEIO | B_NOCACHE | B_BUSY (sol 6,7,8) - * or - * B_PAGEIO | B_NOCACHE | B_BUSY (sol 9) - */ - - bp->b_flags |= B_KERNBUF | B_BUSY | flag; - - bp->b_error = 0; - - bp->b_forw = NULL; - bp->b_back = NULL; - - bp->b_lblkno = (diskaddr_t)pos; - bp->b_bufsize = 0; - bp->b_resid = 0; - bp->b_proc = NULL; - bp->b_edev = dev; -} - - -/* - * _sd_get_hook - get an iob hook from the free list. - * - * ARGUMENTS: - * none - * - * RETURNS: - * the newly allocated iob_hook. - * - */ -static iob_hook_t * -_sd_get_hook(void) -{ - - iob_hook_t *ret; - - mutex_enter(&sdbc_bio_mutex); - -retry: - ret = _sd_buflist.hook_head; - if (ret) - _sd_buflist.hook_head = ret->next_hook; - else { - ++_sd_buflist.hook_waiters; - if (_sd_buflist.max_hook_waiters < _sd_buflist.hook_waiters) - _sd_buflist.max_hook_waiters = _sd_buflist.hook_waiters; - cv_wait(&_sd_buflist.hook_wait, &sdbc_bio_mutex); - --_sd_buflist.hook_waiters; - goto retry; - } - - if (_sd_buflist.bl_hook_lowmark > --_sd_buflist.bl_hooks_avail) - _sd_buflist.bl_hook_lowmark = _sd_buflist.bl_hooks_avail; - - mutex_exit(&sdbc_bio_mutex); - ret->skipped = 0; - - ret->count = 0; - -#ifdef _SD_BIO_STATS - ret->PAGE_IO = 0; - ret->NORM_IO = 0; - ret->NORM_IO_SIZE = 0; - ret->SKIP_IO = 0; - ret->PAGE_COMBINED = 0; -#endif /* _SD_BIO_STATS */ - - return (ret); -} - -/* - * _sd_put_hook - put an iob hook back on the free list. - * - * ARGUMENTS: - * hook - an iob_hook to be returned to the freelist. - * - * - */ -static void -_sd_put_hook(iob_hook_t *hook) -{ - - mutex_enter(&sdbc_bio_mutex); - - if (_sd_buflist.hook_waiters) { - cv_signal(&_sd_buflist.hook_wait); - } - hook->next_hook = _sd_buflist.hook_head; - _sd_buflist.hook_head = hook; - - ++_sd_buflist.bl_hooks_avail; - - mutex_exit(&sdbc_bio_mutex); -} - -/* - * _sd_extend_iob - the i/o block we are handling needs a new struct buf to - * describe the next hunk of i/o. Get a new struct buf initialize it based - * on the state in the struct buf we are passed as an arg. - * ARGUMENTS: - * head_bp - a buffer header in the current i/o block we are handling. - * (generally the initial header but in fact could be any - * of the ones [if any] that were chained to the initial - * one). - */ -static struct buf * -_sd_extend_iob(struct buf *head_bp) -{ - struct buf *bp; - iob_hook_t *hook = (iob_hook_t *)head_bp->b_private; - - - if (!(bp = _sd_get_iobuf(0))) - return (0); - - bp->b_pages = NULL; - bp->b_un.b_addr = 0; - - bp->b_flags |= (head_bp->b_flags & (B_READ | B_WRITE)); - - if (!DO_PAGE_LIST) - bp->b_flags |= B_KERNBUF | B_BUSY; - - bp->b_error = 0; - - /* - * b_forw/b_back will form a doubly linked list of all the buffers - * associated with this block of i/o. - * hook->tail points to the last buffer in the chain. - */ - bp->b_forw = NULL; - bp->b_back = hook->tail; - hook->tail->b_forw = bp; - hook->tail = bp; - hook->count++; - - ASSERT(BLK_FBA_OFF(hook->size) == 0); - - bp->b_lblkno = (diskaddr_t)hook->start_fba + - (diskaddr_t)FBA_NUM(hook->size); - - bp->b_bufsize = 0; - bp->b_resid = 0; - bp->b_proc = NULL; - bp->b_edev = head_bp->b_edev; - - bp->b_iodone = NULL; /* for now */ - bp->b_private = hook; - - return (bp); -} - -/* - * sd_alloc_iob - start processing a block of i/o. This allocates an initial - * buffer header for describing the i/o and a iob_hook for collecting - * information about all the i/o requests added to this buffer. - * - * ARGUMENTS: - * dev - the device all the i/o is destined for. - * fba_pos - the initial disk block to read. - * blks - ignored - * flag - signal whether this is a read or write request. - * - * RETURNS: - * pointer to free struct buf which will be used to describe i/o request. - */ -/* ARGSUSED */ -struct buf * -sd_alloc_iob(dev_t dev, nsc_off_t fba_pos, int blks, int flag) -{ - struct buf *bp; - iob_hook_t *hook; - - if (!(bp = _sd_get_iobuf(0))) - return (0); - - _sd_setup_iob(bp, dev, fba_pos, flag); - - bp->b_iodone = NULL; /* for now */ - hook = _sd_get_hook(); - if (!hook) { - /* can't see how this could happen */ - _sd_put_iobuf(bp); - return (0); - } - - /* - * pick an arbitrary lock - */ - hook->lockp = &_sd_buflist.hook_locks[((long)hook >> 9) & - (MAX_HOOK_LOCKS - 1)]; - hook->start_fba = fba_pos; - hook->last_fba = fba_pos; - hook->size = 0; - hook->tail = bp; - hook->chain = bp; - hook->count = 1; - hook->error = 0; - bp->b_private = hook; - - return (bp); -} - -/* - * _sd_pack_pages - produce i/o requests that will perform the type of i/o - * described by bp (READ/WRITE). It attempt to tack the i/o onto the - * buf pointer to by list to minimize the number of bufs required. - * - * ARGUMENTS: - * bp - is the i/o description i.e. head - * list - is where to start adding this i/o request (null if we should extend) - * addr - address describing where the data is. - * offset - offset from addr where data begins - * size - size of the i/o request. - */ -static void -_sd_pack_pages(struct buf *bp, struct buf *list, sd_addr_t *addr, - nsc_off_t offset, nsc_size_t size) -{ - uintptr_t start_addr, end_addr; - int page_end_aligned; -#ifdef _SD_BIO_STATS - iob_hook_t *hook = (iob_hook_t *)bp->b_private; - struct buf *orig_list = list; -#endif /* _SD_BIO_STATS */ - - start_addr = (uintptr_t)addr->sa_virt + offset; - end_addr = start_addr + size; - - page_end_aligned = !(end_addr & page_offset_mask); - - if (!list && !(list = _sd_extend_iob(bp))) { - /* - * we're hosed since we have no error return... - * though we could ignore stuff from here on out - * and return ENOMEM when we get to sd_start_io. - * This will do for now. - */ - cmn_err(CE_PANIC, "_sd_pack_pages: couldn't extend iob"); - } - - /* - * We only want to do pagelist i/o if we end on a page boundary. - * If we don't end on a page boundary we won't combine with the - * next request and so we may as well do it as normal as it - * will only use one buffer. - */ - - if (DO_PAGE_LIST && page_end_aligned) { - if (start_addr & page_offset_mask) { - /* - * handle the partial page - */ - if (list->b_bufsize) { - if (!(list = _sd_extend_iob(bp))) { - /* - * we're hosed since we have no error - * return though we could ignore stuff - * from here on out and return ENOMEM - * when we get to sd_start_io. - * This will do for now. - */ - cmn_err(CE_PANIC, - "_sd_pack_pages: couldn't extend iob"); - } - } -#ifdef _SD_BIO_STATS - hook->PAGE_IO++; -#endif /* _SD_BIO_STATS */ - _sd_add_vm_to_bp_plist(list, - (unsigned char *) start_addr); - list->b_bufsize = page_size - - (start_addr & page_offset_mask); - list->b_un.b_addr = (caddr_t) - (start_addr & page_offset_mask); - size -= list->b_bufsize; - start_addr += list->b_bufsize; - } - /* - * Now fill with all the full pages remaining. - */ - for (; size > 0; size -= page_size) { -#ifdef _SD_BIO_STATS - hook->PAGE_IO++; -#endif /* _SD_BIO_STATS */ - - _sd_add_vm_to_bp_plist(list, - (unsigned char *) start_addr); - start_addr += page_size; - list->b_bufsize += page_size; -#ifdef _SD_BIO_STATS - if (list == orig_list) - hook->PAGE_COMBINED++; -#endif /* _SD_BIO_STATS */ - } - if (size) - cmn_err(CE_PANIC, "_sd_pack_pages: bad size: %" - NSC_SZFMT, size); - } else { - /* - * Wasn't worth it as pagelist i/o, do as normal - */ - if (list->b_bufsize && !(list = _sd_extend_iob(bp))) { - /* - * we're hosed since we have no error return... - * though we could ignore stuff from here on out - * and return ENOMEM when we get to sd_start_io. - * This will do for now. - */ - cmn_err(CE_PANIC, - "_sd_pack_pages: couldn't extend iob"); - } - - /* kernel virtual */ - list->b_flags &= ~(B_PHYS | B_PAGEIO); - list->b_un.b_addr = (caddr_t)start_addr; -#ifdef _SD_BIO_STATS - hook->NORM_IO++; - hook->NORM_IO_SIZE += size; -#endif /* _SD_BIO_STATS */ - list->b_bufsize = (size_t)size; - } - -} - -/* - * perform same function as _sd_pack_pages() when not doing pageio - */ -static void -_sd_pack_pages_nopageio(struct buf *bp, struct buf *list, sd_addr_t *addr, - nsc_off_t offset, nsc_size_t size) -{ - uintptr_t start_addr; -#ifdef _SD_BIO_STATS - iob_hook_t *hook = (iob_hook_t *)bp->b_private; - struct buf *orig_list = list; -#endif /* _SD_BIO_STATS */ - - start_addr = (uintptr_t)addr->sa_virt + offset; - - if (!list && !(list = _sd_extend_iob(bp))) { - /* - * we're hosed since we have no error return... - * though we could ignore stuff from here on out - * and return ENOMEM when we get to sd_start_io. - * This will do for now. - */ - cmn_err(CE_PANIC, "_sd_pack_pages_nopageio: couldn't " - "extend iob"); - } - - if (list->b_bufsize && - (start_addr == (uintptr_t)(list->b_un.b_addr + list->b_bufsize))) { - /* contiguous */ - list->b_bufsize += (size_t)size; - } else { - /* - * not contiguous mem (extend) or first buffer (bufsize == 0). - */ - if (list->b_bufsize && !(list = _sd_extend_iob(bp))) { - /* - * we're hosed since we have no error return... - * though we could ignore stuff from here on out - * and return ENOMEM when we get to sd_start_io. - * This will do for now. - */ - cmn_err(CE_PANIC, "_sd_pack_pages_nopageio: couldn't " - "extend iob"); - } - list->b_un.b_addr = (caddr_t)start_addr; - list->b_bufsize = (size_t)size; - } - -#ifdef _SD_BIO_STATS - hook->NORM_IO++; - hook->NORM_IO_SIZE += size; -#endif /* _SD_BIO_STATS */ -} - -/* - * sd_add_fba - add an i/o request to the block of i/o described by bp. - * We try and combine this request with the previous request. In - * Addition we try and do the i/o as PAGELIST_IO if it satisfies - * the restrictions for it. If the i/o request can't be combined - * we extend the i/o description with a new buffer header and add - * it to the chain headed by bp. - * - * ARGUMENTS: - * bp - the struct buf describing the block i/o we are collecting. - * addr - description of the address where the data will read/written to. - * A NULL indicates that this i/o request doesn't need to actually - * happen. Used to mark reads when the fba is already in cache and - * dirty. - * - * fba_pos - offset from address in addr where the i/o is to start. - * - * fba_len - number of consecutive fbas to transfer. - * - * NOTE: It is assumed that the memory is physically contiguous but may span - * multiple pages (should a cache block be larger than a page). - * - */ -void -sd_add_fba(struct buf *bp, sd_addr_t *addr, nsc_off_t fba_pos, - nsc_size_t fba_len) -{ - nsc_off_t offset; - nsc_size_t size; - iob_hook_t *hook = (iob_hook_t *)bp->b_private; - - size = FBA_SIZE(fba_len); - offset = FBA_SIZE(fba_pos); - - if (addr) { - /* - * See if this can be combined with previous request(s) - */ - if (!bp->b_bufsize) { - if (DO_PAGE_LIST) - _sd_pack_pages(bp, bp, addr, offset, size); - else - _sd_pack_pages_nopageio(bp, bp, addr, offset, - size); - } else { - if (DO_PAGE_LIST) { - if (hook->tail->b_flags & B_PAGEIO) { - /* - * Last buffer was a pagelist. Unless a - * skip was detected the last request - * ended on a page boundary. If this - * one starts on one we combine the - * best we can. - */ - if (hook->skipped) - _sd_pack_pages(bp, NULL, addr, - offset, size); - else - _sd_pack_pages(bp, hook->tail, - addr, offset, size); - } else { - /* - * Last buffer was vanilla i/o or worse - * (sd_add_mem) - */ - _sd_pack_pages(bp, NULL, addr, offset, - size); - } - } else { - if (hook->skipped) - _sd_pack_pages_nopageio(bp, NULL, - addr, offset, size); - else - _sd_pack_pages_nopageio(bp, - hook->tail, addr, offset, size); - } - } - hook->skipped = 0; - } else { - /* Must be a read of dirty block we want to discard */ - - ASSERT(bp->b_flags & B_READ); -#ifdef _SD_BIO_STATS - hook->SKIP_IO++; -#endif /* _SD_BIO_STATS */ - hook->skipped = 1; - if (!bp->b_bufsize) - bp->b_lblkno += fba_len; - } - hook->size += size; - -} - -/* - * sd_add_mem - add an i/o request to the block of i/o described by bp. - * The memory target for this i/o may span multiple pages and may - * not be physically contiguous. - * also the len might also not be a multiple of an fba. - * - * ARGUMENTS: - * bp - the struct buf describing the block i/o we are collecting. - * - * buf - target of this i/o request. - * - * len - number of bytes to transfer. - * - */ -void -sd_add_mem(struct buf *bp, char *buf, nsc_size_t len) -{ - nsc_size_t n; - uintptr_t start; - iob_hook_t *hook = (iob_hook_t *)bp->b_private; - - start = (uintptr_t)buf & page_offset_mask; - - for (; len > 0; buf += n, len -= n, start = 0) { - n = min((nsc_size_t)len, (nsc_size_t)(page_size - start)); - /* - * i/o size must be multiple of an FBA since we can't - * count on lower level drivers to understand b_offset - */ - if (BLK_FBA_OFF(n) != 0) { - cmn_err(CE_WARN, - "!sdbc(sd_add_mem) i/o request not FBA sized (%" - NSC_SZFMT ")", n); - } - - if (!bp->b_bufsize) { - /* first request */ - bp->b_flags &= ~(B_PHYS | B_PAGEIO); - bp->b_un.b_addr = buf; - bp->b_bufsize = (size_t)n; - } else { - struct buf *new_bp; - if (!(new_bp = _sd_extend_iob(bp))) { - /* we're hosed */ - cmn_err(CE_PANIC, - "sd_add_mem: couldn't extend iob"); - } - new_bp->b_flags &= ~(B_PHYS | B_PAGEIO); - new_bp->b_un.b_addr = buf; - new_bp->b_bufsize = (size_t)n; - } - hook->size += n; - } -} - - -/* - * sd_start_io - start all the i/o needed to satisfy the i/o request described - * by bp. If supplied the a non-NULL fn then this is an async request - * and we will return NSC_PENDING and call fn when all the i/o complete. - * Otherwise this is a synchronous request and we sleep until all the - * i/o is complete. If any buffer in the chain gets an error we return - * the first error we see (once all the i/o is complete). - * - * ARGUMENTS: - * bp - the struct buf describing the block i/o we are collecting. - * - * strategy - strategy function to call if known by the user, or NULL. - * - * fn - user's callback function. NULL implies synchronous request. - * - * arg - an argument passed to user's callback function. - * - */ -int -sd_start_io(struct buf *bp, strategy_fn_t strategy, sdbc_ea_fn_t fn, - blind_t arg) -{ - int err; - iob_hook_t *hook = (iob_hook_t *)bp->b_private; - struct buf *bp_next; - int (*ea_fn)(struct buf *, iob_hook_t *); -#ifdef _SD_BIO_STATS - static int total_pages, total_pages_combined, total_norm; - static int total_norm_combined, total_skipped; - static nsc_size_t total_norm_size; - - static int total_bufs; - static int total_xpages_w, total_ypages_w; - static int total_xpages_r, total_ypages_r; - static int max_run_r, max_run_w; - -#endif /* _SD_BIO_STATS */ - - hook->func = fn; - hook->param = arg; - if (fn != NULL) - ea_fn = _sd_async_ea; - else - ea_fn = _sd_sync_ea; - - hook->iob_hook_iodone = ea_fn; - -#ifdef _SD_BIO_STATS - __start_io_count++; - total_pages += hook->PAGE_IO; - total_pages_combined += hook->PAGE_COMBINED; - total_norm += hook->NORM_IO; - total_norm_size += hook->NORM_IO_SIZE; - total_skipped += hook->SKIP_IO; -#endif /* _SD_BIO_STATS */ - - for (; bp; bp = bp_next) { - - DTRACE_PROBE4(sd_start_io_bufs, struct buf *, bp, long, bp->b_bufsize, - int, bp->b_flags, iob_hook_t *, hook); - - bp_next = bp->b_forw; - if (!(bp->b_flags & B_READ)) { - SD_WRITES_TOT++; - SD_WRITES_LEN[(bp->b_bufsize/32768) % - (sizeof (SD_WRITES_LEN)/sizeof (int))]++; - } - bp->b_iodone = hook->iob_drv_iodone; - bp->b_bcount = bp->b_bufsize; - bp->b_forw = NULL; - bp->b_back = NULL; - bp->b_private = NULL; - -#ifdef _SD_BIO_STATS - total_bufs ++; - if (bp->b_flags & B_PAGEIO) { - int i; - i = _sd_count_pages(bp->b_pages); - if (bp->b_flags & B_READ) { - if (i > max_run_r) - max_run_r = i; - total_xpages_r += i; - total_ypages_r++; - } else { - if (i > max_run_w) - max_run_w = i; - total_xpages_w += i; - total_ypages_w++; - } - } -#endif /* _SD_BIO_STATS */ - - - /* - * It's possible for us to be told to read a dirty block - * where all the i/o can go away (e.g. read one fba, it's - * in cache and dirty) so we really have nothing to do but - * say we're done. - */ - if (bp->b_bcount) { - if (!strategy) { - strategy = - nsc_get_strategy(getmajor(bp->b_edev)); - } - - if (!strategy) { - bp->b_flags |= B_ERROR; - bp->b_error = ENXIO; - (*bp->b_iodone)(bp); - } else -#ifdef DEBUG - /* inject i/o error for testing */ - if (bp->b_error = _sdbc_ioj_lookup(bp->b_edev)) { - bp->b_flags |= B_ERROR; - (*bp->b_iodone)(bp); - } else -#endif - { - (*strategy)(bp); - } - } else { - (*bp->b_iodone)(bp); - } - - } - -#ifdef _SD_BIO_STATS - if (__start_io_count == 2000) { - __start_io_count = 0; - cmn_err(CE_WARN, - "!sdbc(sd_start_io) t_bufs %d pages %d " - "combined %d norm %d norm_size %" NSC_SZFMT " skipped %d", - total_bufs, - total_pages, total_pages_combined, total_norm, - total_norm_size, total_skipped); - - total_bufs = 0; - total_pages = 0; - total_pages_combined = 0; - total_norm = 0; - total_norm_combined = 0; - total_skipped = 0; - total_norm_size = 0; - - cmn_err(CE_WARN, - "!sdbc(sd_start_io)(r) max_run %d, total_xp %d total yp %d", - max_run_r, total_xpages_r, total_ypages_r); - - total_xpages_r = 0; - total_ypages_r = 0; - max_run_r = 0; - - cmn_err(CE_WARN, - "!sdbc(sd_start_io)(w) max_run %d, total_xp %d total yp %d", - max_run_w, total_xpages_w, total_ypages_w); - - total_xpages_w = 0; - total_ypages_w = 0; - max_run_w = 0; - } -#endif /* _SD_BIO_STATS */ - - if (ea_fn == _sd_async_ea) { - DTRACE_PROBE(sd_start_io_end); - - return (NSC_PENDING); - } - - mutex_enter(hook->lockp); - - while (hook->count) { - cv_wait(&hook->wait, hook->lockp); - } - mutex_exit(hook->lockp); - - err = hook->error ? hook->error : NSC_DONE; - bp = hook->tail; - _sd_put_hook(hook); - _sd_put_iobuf(bp); - - return (err); -} - -/* - * _sd_sync_ea - called when a single i/o operation is complete. If this - * is the last outstanding i/o we wakeup the sleeper. - * If this i/o had an error then we store the error result in the - * iob_hook if this was the first error. - * - * ARGUMENTS: - * bp - the struct buf describing the block i/o that just completed. - * - * Comments: - * This routine is called at interrupt level when the io is done. - */ - -static int -_sd_sync_ea(struct buf *bp, iob_hook_t *hook) -{ - - int error; - int done; - - /* - * We get called for each buf that completes. When they are all done. - * we wakeup the waiter. - */ - error = (bp->b_flags & B_ERROR) ? bp->b_error : 0; - - mutex_enter(hook->lockp); - - if (!hook->error) - hook->error = error; - - done = !(--hook->count); - if (done) { - /* remember the last buffer so we can free it later */ - hook->tail = bp; - cv_signal(&hook->wait); - } - mutex_exit(hook->lockp); - - /* - * let sd_start_io free the final buffer so the hook can be returned - * first. - */ - if (!done) - _sd_put_iobuf(bp); - - return (0); -} - -/* - * static int - * _sd_async_ea - End action for async read/write. - * - * ARGUMENTS: - * bp - io buf pointer. - * - * RETURNS: - * NONE. - * - * Comments: - * This routine is called at interrupt level when the io is done. - * This is only called when the operation is asynchronous. - */ -static int -_sd_async_ea(struct buf *bp, iob_hook_t *hook) -{ - int done, error; - - /* - * We get called for each buf that completes. When they are all done. - * we call the requestor's callback function. - */ - error = (bp->b_flags & B_ERROR) ? bp->b_error : 0; - - mutex_enter(hook->lockp); - done = !(--hook->count); - - if (!hook->error) - hook->error = error; - - mutex_exit(hook->lockp); - - bp->b_forw = NULL; - bp->b_back = NULL; - - if (done) { - nsc_off_t fba_pos; - nsc_size_t fba_len; - int error; - sdbc_ea_fn_t fn; - blind_t arg; - - arg = hook->param; - fn = hook->func; - error = hook->error; -#if defined(_SD_DEBUG) /* simulate disk errors */ - if (_test_async_fail == bp->b_edev) error = EIO; -#endif - - /* MAKE SURE b_lblkno, b_count never changes!! */ - fba_pos = hook->start_fba; - fba_len = FBA_LEN(hook->size); - - _sd_put_hook(hook); - _sd_put_iobuf(bp); - (*fn)(arg, fba_pos, fba_len, error); - } else - _sd_put_iobuf(bp); - - return (0); -} - -#ifdef DEBUG -typedef struct ioerr_inject_s { - dev_t ioj_dev; - int ioj_err; - int ioj_cnt; -} ioerr_inject_t; - -static ioerr_inject_t *ioerr_inject_table = NULL; - -void -_sdbc_ioj_load() -{ - ioerr_inject_table = - kmem_zalloc(sdbc_max_devs * sizeof (ioerr_inject_t), KM_SLEEP); -} - -void -_sdbc_ioj_unload() -{ - if (ioerr_inject_table != NULL) { - kmem_free(ioerr_inject_table, - sdbc_max_devs * sizeof (ioerr_inject_t)); - ioerr_inject_table = NULL; - } -} - -static int -_sdbc_ioj_lookup(dev_t dev) -{ - int cd; - - for (cd = 0; cd < sdbc_max_devs; ++cd) - if (ioerr_inject_table[cd].ioj_dev == dev) { - if (ioerr_inject_table[cd].ioj_cnt > 0) { - --ioerr_inject_table[cd].ioj_cnt; - return (0); - } else { - return (ioerr_inject_table[cd].ioj_err); - } - } - return (0); -} - -void -_sdbc_ioj_set_dev(int cd, dev_t crdev) -{ - int i; - - if (cd == -1) { /* all -- used for clearing table on shutdown */ - for (i = 0; i < sdbc_max_devs; ++i) { - ioerr_inject_table[i].ioj_dev = crdev; - } - } else - ioerr_inject_table[cd].ioj_dev = crdev; /* assume valid cd */ -} - -static -void -_sdbc_ioj_set_err(int cd, int err, int count) -{ - int i; - - if (cd == -1) { /* all */ - for (i = 0; i < sdbc_max_devs; ++i) { - ioerr_inject_table[i].ioj_err = err; - ioerr_inject_table[i].ioj_cnt = count; - } - } else { - ioerr_inject_table[cd].ioj_err = err; - ioerr_inject_table[cd].ioj_cnt = count; - } -} - -static void -_sdbc_ioj_clear_err(int cd) -{ - _sdbc_ioj_set_err(cd, 0, 0); -} - -int -_sdbc_inject_ioerr(int cd, int ioj_err, int count) -{ - if ((cd < -1) || (cd >= sdbc_max_devs)) - return (EINVAL); - - _sdbc_ioj_set_err(cd, ioj_err, count); - - return (0); -} - -int -_sdbc_clear_ioerr(int cd) -{ - if ((cd < -1) || (cd >= sdbc_max_devs)) - return (EINVAL); - - _sdbc_ioj_clear_err(cd); - - return (0); -} -#endif diff --git a/usr/src/uts/common/avs/ns/sdbc/sd_bio.h b/usr/src/uts/common/avs/ns/sdbc/sd_bio.h deleted file mode 100644 index 132bb3152b..0000000000 --- a/usr/src/uts/common/avs/ns/sdbc/sd_bio.h +++ /dev/null @@ -1,47 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _SD_BIO_H -#define _SD_BIO_H - -#ifdef __cplusplus -extern "C" { -#endif - -extern int _sdbc_iobuf_load(void); -extern void _sdbc_iobuf_unload(void); -extern int _sdbc_iobuf_configure(int); -extern void _sdbc_iobuf_deconfigure(void); -extern int _sd_pending_iobuf(void); -extern struct buf *sd_alloc_iob(dev_t, nsc_off_t, int, int); -extern void sd_add_fba(struct buf *, sd_addr_t *, nsc_off_t, nsc_size_t); -extern void sd_add_mem(struct buf *, char *, nsc_size_t); -extern int sd_start_io(struct buf *, strategy_fn_t, sdbc_ea_fn_t, blind_t); - -#ifdef __cplusplus -} -#endif - -#endif /* _SD_BIO_H */ diff --git a/usr/src/uts/common/avs/ns/sdbc/sd_cache.h b/usr/src/uts/common/avs/ns/sdbc/sd_cache.h deleted file mode 100644 index 2d45e3fbbc..0000000000 --- a/usr/src/uts/common/avs/ns/sdbc/sd_cache.h +++ /dev/null @@ -1,182 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _SD_CACHE_H -#define _SD_CACHE_H - -#ifdef __cplusplus -extern "C" { -#endif - -#include <sys/debug.h> -#include <sys/nsctl/nsctl.h> - -/* - * Compiler defines - */ - -#define _SD_FAULT_RES /* Enable Fault tolerance */ - -#define _SD_USE_THREADS /* Use own threadset */ -#define _SD_LRU_OPTIMIZE /* Enable LRU queue optimizations */ -#define _SD_HASH_OPTIMIZE /* Enable Hash optimizations */ - -#if !defined(_SD_NO_GENERIC) -#define _SD_MULTIUSER /* Block locking (concurrent/dual copy) */ -#endif /* (_SD_NO_GENERIC) */ - -#if defined(_SD_OPTIM_ALLOC) -#define _SD_NOCHECKS /* Disable handle allocation checks */ -#define _SD_NOTRACE /* Disable SDTRACE() macro */ -#undef _SD_MULTIUSER /* Disable Block locking */ -#if (_SD_OPTIM_ALLOC+0 > 1) -#define _SD_NOSTATS /* Disable read/write counts */ -#endif -#endif /* (_SD_OPTIM_ALLOC) */ - -#if defined(_SD_CHECKS) /* Enable checks, stats, and tracing */ -#undef _SD_NOCHECKS -#undef _SD_NOTRACE -#undef _SD_NOSTATS -#define _SD_STATS /* Enable cache hits/longevity stats */ -#if (_SD_CHECKS+0 > 1) -#define _SD_DEBUG /* Extra debugging checks */ -#endif -#endif /* (_SD_CHECKS) */ - -#if defined(_SD_NOTRACE) && defined(_SD_STATS) -#undef _SD_STATS /* _SD_STATS requires SDTRACE() macro */ -#endif - -/* - * Other compiler defines currently not enabled. - * #define _SD_FBA_DATA_LOG Enable data logging per 512 bytes. - * Other compiler defines enabled in the Makefile. - * #define _SD_8K_BLKSIZE Allow 8K cache block size - */ - -extern int _sd_cblock_shift; -#define BLK_SHFT (_sd_cblock_shift) -#define BLK_MASK ((1 << BLK_SHFT) - 1) -#define BLK_SIZE(x) ((x) << BLK_SHFT) -#define BLK_NUM(x) ((x) >> BLK_SHFT) -#define BLK_LEN(x) ((x + BLK_MASK) >> BLK_SHFT) -#define BLK_OFF(x) ((x) & BLK_MASK) - - - -#define BLK_FBA_SHFT (BLK_SHFT - FBA_SHFT) -#define BLK_FBA_MASK ((1 << BLK_FBA_SHFT) - 1) -#define BLK_TO_FBA_NUM(x) \ - ((x) << BLK_FBA_SHFT) /* block_num to fba_num */ -#define BLK_FBA_OFF(x) ((x) & BLK_FBA_MASK) /* fba offset within */ - /* a cache block */ - -#define FBA_TO_BLK_NUM(x) \ - ((x) >> BLK_FBA_SHFT) /* fba_num to a */ - /* block_num */ - -/* fba_num to the next higher block_num */ -#define FBA_TO_BLK_LEN(x) ((x + BLK_FBA_MASK) >> BLK_FBA_SHFT) - -/* - * This is the set of flags that are valid. Anything else set in the - * handle is invalid and the handle should be rejected during an allocation. - */ - -#define _SD_VALID_FLAGS (NSC_RDWRBUF | NSC_NOBLOCK | NSC_WRTHRU | NSC_NOCACHE\ - | NSC_HALLOCATED | NSC_BCOPY | NSC_PAGEIO \ - | NSC_PINNABLE | NSC_MIXED | NSC_FORCED_WRTHRU \ - | NSC_METADATA) - - -#define _SD_FLAG_MASK (NSC_FLAGS) -#define _SD_HINT_MASK (NSC_HINTS) -#define _SD_WRTHRU_MASK (NSC_WRTHRU | NSC_FORCED_WRTHRU) -#define _SD_NOCACHE_MASK (NSC_NOCACHE) - - - -#define _SD_INVALID_CD(cd) ((cd) > sdbc_max_devs) - -#define _INFSD_NODE_UP(i) (nsc_node_up(i)) - -#ifdef m88k -#define _sd_cache_initialized _INFSD_cache_initialized -#endif - -#define _SD_MAX_FBAS 1024 -/* - * Allow one entry for null terminator and another to handle - * requests that are not cache block aligned. - */ -#if defined(_SD_8K_BLKSIZE) -#define _SD_MAX_BLKS (2 + ((_SD_MAX_FBAS) >> 4)) -#else -#define _SD_MAX_BLKS (2 + ((_SD_MAX_FBAS) >> 3)) -#endif - -/* cd to use for _sd_centry_alloc to avoid entering hash table */ - -#define _CD_NOHASH -1 - -#if defined(_KERNEL) || defined(_KMEMUSER) - -struct _sd_buf_handle; -typedef void (*sdbc_callback_fn_t)(struct _sd_buf_handle *); - -typedef struct _sd_buf_handle { - nsc_buf_t bh_buf; /* Generic buffer - must be first */ - nsc_vec_t bh_bufvec[_SD_MAX_BLKS]; /* Scatter gather list */ - int bh_cd; - sdbc_callback_fn_t bh_disconnect_cb; - sdbc_callback_fn_t bh_read_cb; - sdbc_callback_fn_t bh_write_cb; - struct _sd_cctl *bh_centry; - struct _sd_buf_handle *bh_next; - struct _sd_buf_handle *bh_prev; - void *bh_alloc_thread; /* debug: kthread that alloc'd this handle */ - void *bh_busy_thread; /* debug: kthread that is using this handle */ - void *bh_param; -} _sd_buf_handle_t; - -#define bh_fba_pos bh_buf.sb_pos -#define bh_fba_len bh_buf.sb_len -#define bh_flag bh_buf.sb_flag -#define bh_error bh_buf.sb_error -#define bh_vec bh_buf.sb_vec - -#define _sd_bufvec_t nsc_vec_t -#define buflen sv_len -#define bufaddr sv_addr -#define bufvmeaddr sv_vme - -#endif /* _KERNEL || _KMEMUSER */ - -#ifdef __cplusplus -} -#endif - -#endif /* _SD_CACHE_H */ diff --git a/usr/src/uts/common/avs/ns/sdbc/sd_conf.c b/usr/src/uts/common/avs/ns/sdbc/sd_conf.c deleted file mode 100644 index 120b295c03..0000000000 --- a/usr/src/uts/common/avs/ns/sdbc/sd_conf.c +++ /dev/null @@ -1,839 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#include <sys/types.h> -#include <sys/ksynch.h> -#include <sys/cmn_err.h> -#include <sys/errno.h> -#include <sys/kmem.h> -#include <sys/ddi.h> - -#include <sys/nsc_thread.h> -#include "sd_bcache.h" -#include "sd_ft.h" -#include "sd_misc.h" -#include "sd_pcu.h" -#include "sd_io.h" -#include "sd_bio.h" -#include "sd_trace.h" -#include "sd_tdaemon.h" -#include <sys/nsctl/nsctl.h> - -#include <sys/unistat/spcs_s.h> -#include <sys/unistat/spcs_s_k.h> -#include <sys/unistat/spcs_errors.h> -#include <sys/nsctl/safestore.h> - -extern int sdbc_use_dmchain; - -int _sd_cblock_shift = 0; - -int _SD_SELF_HOST = _SD_NO_HOST; -int _SD_MIRROR_HOST = _SD_NO_HOST; -int _SD_NUM_REM; -int _sd_nodes_configured; -int _sdbc_gateway_wblocks; - -int _SD_NETS = 0; - -/* - * Normally we unregister memory at deconfig time. By setting this non-zero - * it will be delayed until unload time. - */ -int _sdbc_memtype_deconfigure_delayed = 0; - -nsc_mem_t *sdbc_iobuf_mem, *sdbc_hash_mem; -nsc_mem_t *sdbc_local_mem, *sdbc_stats_mem, *sdbc_cache_mem; -nsc_mem_t *sdbc_info_mem; - -_sd_cache_param_t _sd_cache_config; - -kmutex_t _sdbc_config_lock; -volatile int _sd_cache_dem_cnt; - -#if !defined(m88k) || defined(lint) -volatile int _sd_cache_initialized; -#endif - -static blind_t sdbc_power; - -static -nsc_def_t _sdbc_power_def[] = { - "Power_Lost", (uintptr_t)_sdbc_power_lost, 0, - "Power_OK", (uintptr_t)_sdbc_power_ok, 0, - "Power_Down", (uintptr_t)_sdbc_power_down, 0, - 0, 0, 0 -}; - -/* - * Forward declare all statics that are used before defined to enforce - * parameter checking - * Some (if not all) of these could be removed if the code were reordered - */ - -int _sd_fill_pattern(caddr_t addr, uint_t pat, uint_t size); -static void _sdbc_nodeid_deconfigure(void); -static void _sdbc_nodeid_configure(void); -static void _sdbc_thread_deconfigure(void); -static int _sdbc_thread_configure(void); -void sst_deinit(); - -ss_common_config_t safestore_config; -safestore_ops_t *sdbc_safestore; - -/* - * _sdbc_memtype_configure - register with the sd layer the types of memory - * we want to use. If any of the critical memory types can't be registered - * we return non-zero otherwise 0. - */ -static int -_sdbc_memtype_configure(void) -{ - - if ((sdbc_info_mem = nsc_register_mem("sdbc:info", - NSC_MEM_GLOBAL, KM_NOSLEEP)) == NULL) { - return (EINVAL); - } - - sdbc_local_mem = nsc_register_mem("sdbc:local", NSC_MEM_LOCAL, 0); - sdbc_stats_mem = nsc_register_mem("sdbc:stats", NSC_MEM_LOCAL, 0); - sdbc_iobuf_mem = nsc_register_mem("sdbc:iobuf", NSC_MEM_LOCAL, 0); - - sdbc_cache_mem = nsc_register_mem("sdbc:cache", NSC_MEM_LOCAL, 0); - - sdbc_hash_mem = nsc_register_mem("sdbc:hash", NSC_MEM_LOCAL, 0); - - return (0); -} - -/* - * _sdbc_memtype_deconfigure - undo the effects of _sdbc_memtype_configure. - */ -void -_sdbc_memtype_deconfigure(void) -{ - - if (sdbc_hash_mem) - nsc_unregister_mem(sdbc_hash_mem); - if (sdbc_iobuf_mem) - nsc_unregister_mem(sdbc_iobuf_mem); - if (sdbc_cache_mem) - nsc_unregister_mem(sdbc_cache_mem); - if (sdbc_stats_mem) - nsc_unregister_mem(sdbc_stats_mem); - if (sdbc_local_mem) - nsc_unregister_mem(sdbc_local_mem); - if (sdbc_info_mem) - nsc_unregister_mem(sdbc_info_mem); - - sdbc_info_mem = NULL; - sdbc_local_mem = sdbc_stats_mem = sdbc_cache_mem = NULL; - sdbc_iobuf_mem = sdbc_hash_mem = NULL; - -} - - -/* - * figure out what kind of safe storage we need - */ -uint_t -sdbc_determine_safestore() -{ - return (SS_M_RAM | SS_T_NONE); -} - -static void -sd_setup_ssconfig() -{ - safestore_config.ssc_client_psize = BLK_SIZE(1); - - if (_sd_cache_config.write_cache) - safestore_config.ssc_wsize = - _sd_cache_config.write_cache * MEGABYTE; - else - safestore_config.ssc_wsize = - (_sd_cache_config.cache_mem[_SD_NO_NET] * MEGABYTE)/2; - safestore_config.ssc_maxfiles = sdbc_max_devs; - safestore_config.ssc_pattern = _sd_cache_config.fill_pattern; - safestore_config.ssc_flag = _sd_cache_config.gen_pattern ? - SS_GENPATTERN : 0; -} - -/* - * _sdbc_configure - process the ioctl that describes the configuration - * for the cache. This is the main driver routine for cache configuration - * Return 0 on success, otherwise nonzero. - * - */ -int -_sdbc_configure(_sd_cache_param_t *uptr, - _sdbc_config_t *mgmt, spcs_s_info_t spcs_kstatus) -{ - int cache_bytes; - nsc_io_t *io; - char itmp[16]; - char itmp2[16]; - int i; - uint_t ss_type; - int rc; - - ASSERT(MUTEX_HELD(&_sdbc_config_lock)); - - _sd_print(1, "sdbc(_sdbc_configure) _SD_MAGIC 0x%x\n", _SD_MAGIC); - - _sd_ioset = 0; - if (_sd_cache_initialized) { - spcs_s_add(spcs_kstatus, SDBC_EALREADY); - rc = EALREADY; - goto out; - } - - ASSERT((uptr != NULL) || (mgmt != NULL)); - - if (uptr) { - if (copyin(uptr, &_sd_cache_config, - sizeof (_sd_cache_param_t))) { - rc = EFAULT; - goto out; - } - } else { - bzero(&_sd_cache_config, sizeof (_sd_cache_config)); - - /* copy in mgmt config info */ - - _sd_cache_config.magic = mgmt->magic; - _sd_cache_config.threads = mgmt->threads; - - for (i = 0; i < CACHE_MEM_PAD; i++) { - _sd_cache_config.cache_mem[i] = mgmt->cache_mem[i]; - } - - /* fake the rest as a single node config */ - - _sd_cache_config.nodes_conf[0] = nsc_node_id(); - _sd_cache_config.num_nodes = 1; - } - - /* - * Check that the requested cache size doesn't break the code. - * This test can be refined once the cache size is stored in variables - * larger than an int. - */ - for (i = 0; i < MAX_CACHE_NET; i++) { - if (_sd_cache_config.cache_mem[i] < 0) { - cmn_err(CE_WARN, "!_sdbc_configure: " - "negative cache size (%d) for net %d", - _sd_cache_config.cache_mem[i], i); - spcs_s_add(spcs_kstatus, SDBC_ENONETMEM); - rc = SDBC_ENONETMEM; - goto out; - } - if (_sd_cache_config.cache_mem[i] > MAX_CACHE_SIZE) { - _sd_cache_config.cache_mem[i] = MAX_CACHE_SIZE; - cmn_err(CE_WARN, "!_sdbc_configure: " - "cache size limited to %d megabytes for net %d", - MAX_CACHE_SIZE, i); - } - } - - if (_sd_cache_config.blk_size == 0) - _sd_cache_config.blk_size = 8192; - - if (_sd_cache_config.procs == 0) - _sd_cache_config.procs = 16; - -#if !defined(_SD_8K_BLKSIZE) - if (_sd_cache_config.blk_size != 4096) { -#else - if (_sd_cache_config.blk_size != 8192) { -#endif - (void) spcs_s_inttostring(_sd_cache_config.blk_size, itmp, - sizeof (itmp), 0); - spcs_s_add(spcs_kstatus, SDBC_ESIZE, itmp); - rc = SDBC_EENABLEFAIL; - goto out; - } - if (((_sd_cblock_shift = - get_high_bit(_sd_cache_config.blk_size)) == -1) || - (_sd_cache_config.blk_size != (1 << _sd_cblock_shift))) { - (void) spcs_s_inttostring(_sd_cache_config.blk_size, itmp, - sizeof (itmp), 0); - spcs_s_add(spcs_kstatus, SDBC_ESIZE, itmp); - rc = SDBC_EENABLEFAIL; - goto out; - } - - if (_sd_cache_config.magic != _SD_MAGIC) { - rc = SDBC_EMAGIC; - goto out; - } - - sdbc_use_dmchain = (_sd_cache_config.reserved1 & CFG_USE_DMCHAIN); - sdbc_static_cache = (_sd_cache_config.reserved1 & CFG_STATIC_CACHE); - - _sdbc_nodeid_configure(); - - if (_SD_SELF_HOST > nsc_max_nodeid || - _SD_MIRROR_HOST > nsc_max_nodeid) { - (void) spcs_s_inttostring((_SD_SELF_HOST > nsc_max_nodeid ? - _SD_SELF_HOST : _SD_MIRROR_HOST), itmp, sizeof (itmp), 0); - (void) spcs_s_inttostring( - nsc_max_nodeid, itmp2, sizeof (itmp2), 0); - spcs_s_add(spcs_kstatus, SDBC_EINVHOSTID, itmp, itmp2); - rc = SDBC_EENABLEFAIL; - goto out; - } - - - if (_SD_SELF_HOST == _SD_MIRROR_HOST) { - (void) spcs_s_inttostring( - _SD_SELF_HOST, itmp, sizeof (itmp), 0); - (void) spcs_s_inttostring( - _SD_MIRROR_HOST, itmp2, sizeof (itmp2), 0); - spcs_s_add(spcs_kstatus, SDBC_ENOTSAME, itmp, itmp2); - rc = SDBC_EENABLEFAIL; - goto out; - } - - /* initialize the safestore modules */ - sst_init(); - - /* figure out which kind of safestore we need to use */ - ss_type = sdbc_determine_safestore(); - -tryss: - /* open and configure the safestore module */ - if ((sdbc_safestore = sst_open(ss_type, 0)) == NULL) { - cmn_err(CE_WARN, "!cannot open safestore module for type %x", - ss_type); - rc = SDBC_EENABLEFAIL; - goto out; - } else { - sd_setup_ssconfig(); - if (SSOP_CONFIGURE(sdbc_safestore, &safestore_config, - spcs_kstatus)) { - cmn_err(CE_WARN, - "!cannot configure safestore module for type %x", - ss_type); - (void) sst_close(sdbc_safestore); - - /* try ram if possible, otherwise return */ - if ((ss_type & (SS_M_RAM | SS_T_NONE)) == - (SS_M_RAM | SS_T_NONE)) { - rc = SDBC_EENABLEFAIL; - goto out; - } - - ss_type = (SS_M_RAM | SS_T_NONE); - goto tryss; - } - } - - if (SAFESTORE_LOCAL(sdbc_safestore)) - _SD_MIRROR_HOST = _SD_NO_HOST; - - ASSERT(safestore_config.ssc_ss_psize <= UINT16_MAX); /* LINTED */ - _sd_net_config.sn_psize = safestore_config.ssc_ss_psize; - - - _sd_net_config.sn_csize = - _sd_cache_config.cache_mem[_SD_NO_NET] * MEGABYTE; - _sd_net_config.sn_cpages = - _sd_net_config.sn_csize / BLK_SIZE(1); - - _sd_net_config.sn_configured = 1; - cache_bytes = _sd_net_config.sn_cpages * BLK_SIZE(1); - - if (_sdbc_memtype_configure()) { - rc = EINVAL; - goto out; - } - - if ((rc = _sdbc_iobuf_configure(_sd_cache_config.iobuf))) { - if (rc == -1) { - rc = SDBC_ENOIOBMEM; - goto out; - } - if (rc == -2) { - rc = SDBC_ENOIOBCB; - goto out; - } - - } - - if (_sdbc_handles_configure()) { - rc = SDBC_ENOHANDLEMEM; - goto out; - } - - _sd_cache_dem_cnt = 0; - - - /* - * nvmem support: - * if the cache did not shutdown properly we mark it as dirty. - * this must be done before _sdbc_cache_configure() so it can - * refresh sd_info_mem and sd_file_mem from nvmem if necsssary, - * and before _sdbc_ft_configure() so the ft thread will do a recovery. - * - */ - if (SAFESTORE_RECOVERY(sdbc_safestore)) { - _sdbc_set_warm_start(); - _sdbc_ft_hold_io = 1; - cmn_err(CE_WARN, - "!sdbc(_sdbc_configure) cache marked dirty after" - " incomplete shutdown"); - } - - if ((rc = _sdbc_cache_configure(cache_bytes / BLK_SIZE(1), - spcs_kstatus))) { - goto out; - } - - - /* ST_ALERT trace buffer */ - if (_sdbc_tr_configure(-1 /* SDT_INV_CD */) != 0) { - rc = EINVAL; - goto out; - } - - if (_sdbc_thread_configure()) { - rc = SDBC_EFLUSHTHRD; - goto out; - } - - if (_sdbc_flush_configure()) { - rc = EINVAL; - goto out; - } - - if (rc = _sdbc_dealloc_configure_dm()) { - goto out; - } - - if (_sd_cache_config.test_demons) - if (_sdbc_tdaemon_configure(_sd_cache_config.test_demons)) { - rc = EINVAL; - goto out; - } - - - _sd_cache_initialized = 1; - - sdbc_power = nsc_register_power("sdbc", _sdbc_power_def); - - if (_sdbc_ft_configure() != 0) { - rc = EINVAL; - goto out; - } - - /* - * try to control the race between the ft thread - * and threads that will open the devices that the ft thread - * may be recovering. this synchronizing with the ft thread - * prevents sd_cadmin from returning until ft has opened - * the recovery devices, so if other apps wait for sd_cadmin - * to complete the race is prevented. - */ - mutex_enter(&_sdbc_ft_hold_io_lk); - while (_sdbc_ft_hold_io) { - cv_wait(&_sdbc_ft_hold_io_cv, &_sdbc_ft_hold_io_lk); - } - - io = nsc_register_io("sdbc", NSC_SDBC_ID|NSC_FILTER, - _sd_sdbc_def); - - if (io) sdbc_io = io; - - mutex_exit(&_sdbc_ft_hold_io_lk); - -#ifdef DEBUG - cmn_err(CE_NOTE, "!sd_config: Cache has been configured"); -#endif - - rc = 0; - -out: - return (rc); -} - -/* - * _sdbc_deconfigure - Put the cache back to the unconfigured state. Release - * any memory we allocated as part of the configuration process (but not the - * load/init process). Put globals back to unconfigured state and shut down - * any processes/threads we have running. - * - * Since the cache has loaded we know that global lock/sv's are present and - * we can use them to produce an orderly deconfiguration. - * - * NOTE: this routine and its callee should always be capable of reversing - * the effects of _sdbc_configure no matter what partially configured - * state might be present. - * - */ -int -_sdbc_deconfigure(spcs_s_info_t spcs_kstatus) -{ - int i; - _sd_cd_info_t *cdi; - int rc; - int pinneddata = 0; - uint_t saved_hint; - - ASSERT(MUTEX_HELD(&_sdbc_config_lock)); - -#ifdef DEBUG - cmn_err(CE_NOTE, "!SD cache being deconfigured."); -#endif - - /* check if there is pinned data and our mirror is down */ - if (_sd_cache_files && _sd_is_mirror_down()) { - for (i = 0; i < sdbc_max_devs; i++) { - cdi = &(_sd_cache_files[i]); - if (cdi->cd_info == NULL) - continue; - /* - * if (!(cdi->cd_info->sh_failed)) - * continue; - */ - if (!(_SD_CD_ALL_WRITES(i))) - continue; - spcs_s_add(spcs_kstatus, SDBC_EPINNED, - cdi->cd_info->sh_filename); - rc = SDBC_EDISABLEFAIL; - goto out; - } - } - - /* remember hint setting for restoration in case shutdown fails */ - (void) _sd_get_node_hint(&saved_hint); - - (void) _sd_set_node_hint(NSC_FORCED_WRTHRU); - - - /* TODO - there is a possible race between deconfig and power hits... */ - - if (sdbc_power) - (void) nsc_unregister_power(sdbc_power); - - - if (sdbc_io) { - rc = nsc_unregister_io(sdbc_io, NSC_PCATCH); - if (rc == 0) - sdbc_io = NULL; - else { - if (rc == EUSERS) - spcs_s_add(spcs_kstatus, SDBC_EABUFS); - - spcs_s_add(spcs_kstatus, SDBC_EUNREG); - - /* Re-register-power if it was register before. */ - if (sdbc_power) { - sdbc_power = nsc_register_power("sdbc", - _sdbc_power_def); - } - - /* Remove NSC_FORCED_WRTHRU if we set it */ - (void) _sd_clear_node_hint( - (~saved_hint) & _SD_HINT_MASK); - - rc = SDBC_EDISABLEFAIL; - goto out; - } - } - - sdbc_power = NULL; - -#if defined(_SD_FAULT_RES) - _sd_remote_disable(0); /* notify mirror to forced_wrthru */ -#endif - /* - * close devices, deconfigure processes, wait for exits - */ - _sdbc_tdaemon_deconfigure(); - - if (_sd_cache_files) { - for (i = 0; i < sdbc_max_devs; i++) { - if (FILE_OPENED(i) && ((rc = _sd_close(i)) > 0)) { - cmn_err(CE_WARN, "!sdbc(_sd_deconfigure)" - " %d not closed (%d)\n", i, rc); - } - } - } - - /* - * look for pinned data - * TODO sort this out for multinode systems. - * cannot shutdown with pinned data on multinode. - * the state of pinned data should be determined in - * the close operation. - */ - if (_sd_cache_files) { - for (i = 0; i < sdbc_max_devs; i++) { - cdi = &(_sd_cache_files[i]); - if (cdi->cd_info == NULL) - continue; - /* - * if (!(cdi->cd_info->sh_failed)) - * continue; - */ - if (!(_SD_CD_ALL_WRITES(i))) - continue; - cmn_err(CE_WARN, - "!sdbc(_sd_deconfigure) Pinned Data on cd %d(%s)", - i, cdi->cd_info->sh_filename); - pinneddata++; - } - } - - _sd_cache_initialized = 0; - - _sdbc_ft_deconfigure(); - - _sdbc_flush_deconfigure(); - _sdbc_thread_deconfigure(); - - mutex_enter(&_sd_cache_lock); - - while (_sd_cache_dem_cnt > 0) { - mutex_exit(&_sd_cache_lock); - (void) nsc_delay_sig(HZ/2); - mutex_enter(&_sd_cache_lock); - } - mutex_exit(&_sd_cache_lock); - - /* - * remove all dynamically allocated cache data memory - * there should be no i/o at this point - */ - _sdbc_dealloc_deconfigure_dm(); - /* - * At this point no thread of control should be active in the cache - * but us (unless they are blocked on the config lock). - */ - - -#if defined(_SD_FAULT_RES) - _sd_remote_disable(1); /* notify mirror I/O shutdown complete */ -#endif - -#define KEEP_TRACES 0 /* set to 1 keep traces after deconfig */ -#if !KEEP_TRACES - /* - * This needs to happen before we unregister the memory. - */ - _sdbc_tr_deconfigure(); -#endif - - - /* delete/free hash table, cache blocks, etc */ - _sdbc_cache_deconfigure(); - - _sdbc_handles_deconfigure(); - - _sdbc_iobuf_deconfigure(); - -#if !KEEP_TRACES - if (!_sdbc_memtype_deconfigure_delayed) - _sdbc_memtype_deconfigure(); -#else - _sdbc_memtype_deconfigure_delayed = 1; -#endif - - /* - * Call ss deconfig(), - * check for valid pointer in case _sdbc_configure() - * failed before safestrore system was initialized. - */ - if (sdbc_safestore) - SSOP_DECONFIGURE(sdbc_safestore, pinneddata); - - /* tear down safestore system */ - sst_deinit(); - - _sdbc_nodeid_deconfigure(); - - bzero(&_sd_cache_config, sizeof (_sd_cache_param_t)); - - _SD_SELF_HOST = _SD_MIRROR_HOST = _SD_NO_HOST; - _SD_NETS = 0; - _sd_cblock_shift = 0; - _sd_node_hint = 0; - -#ifdef DEBUG - cmn_err(CE_NOTE, "!SD cache deconfigured."); -#endif - - rc = 0; - -out: - return (rc); -} - - - -static int -find_low_bit(int mask, int start) -{ - for (; start < 32; start++) - if ((mask & (1 << start))) - break; - - return (start); -} - -int -get_high_bit(int size) -{ - int lowbit; - int newblk = size; - int highbit = -1; - int next_high = 0; - - while ((lowbit = find_low_bit(newblk, 0)) != 32) { - if (highbit >= 0) next_high = 1; - highbit = lowbit; - newblk &= ~(1 << highbit); - } - - if (highbit <= 0) { - cmn_err(CE_WARN, - "!sdbc(get_high_bit) invalid block size %x\n", size); - return (-1); - } - - if (next_high) highbit++; - - return (highbit); -} - - -int -_sd_fill_pattern(caddr_t addr, uint_t pat, uint_t size) -{ - caddr_t fmt_page; - int i, page_size; - - page_size = (int)ptob(1); - - if ((fmt_page = (caddr_t)nsc_kmem_alloc(ptob(1), - KM_SLEEP, sdbc_local_mem)) == NULL) { - cmn_err(CE_WARN, "!sdbc(_sd_fill pattern) no more memory"); - return (-1); - } - for (i = 0; i < page_size; i += 4) - *(int *)(void *)(fmt_page + i) = pat; - - while (size >= page_size) { - bcopy(fmt_page, addr, ptob(1)); - addr += page_size; - size -= page_size; - } - nsc_kmem_free(fmt_page, page_size); - return (0); -} - - -/* - * _sdbc_nodeid_deconfigure - merely a place holder until - * such time as there is something to be undone w.r.t. - * _sdbc_nodeid_configure. - * - */ -static void -_sdbc_nodeid_deconfigure(void) -{ - /* My but we're quick */ -} - -/* - * _sdbc_nodeid_configure - configure the nodeid's we need to connect - * to any other nodes in the network. - * - */ -void -_sdbc_nodeid_configure(void) -{ - - if (_sd_cache_config.num_nodes == 0) { - _sd_nodes_configured = 1; - } else { - _sd_nodes_configured = _sd_cache_config.num_nodes; - } - - _SD_SELF_HOST = nsc_node_id(); - _SD_MIRROR_HOST = _sd_cache_config.mirror_host; -} - -#define STACK_SIZE (32*1024) -#define num_spin 0 -nstset_t *_sd_ioset; - -/* - * _sdbc_thread_deconfigure - cache is being deconfigure, stop any - * thread activity. - * - */ -static void -_sdbc_thread_deconfigure(void) -{ - ASSERT(MUTEX_HELD(&_sdbc_config_lock)); - nst_destroy(_sd_ioset); - _sd_ioset = NULL; -} - -/* - * _sdbc_thread_configure - cache is being configured, initialize the - * threads we need for flushing dirty cds. - * - */ -static int -_sdbc_thread_configure(void) -{ - ASSERT(MUTEX_HELD(&_sdbc_config_lock)); - - if (!_sd_ioset) - _sd_ioset = nst_init("sd_thr", _sd_cache_config.threads); - - if (!_sd_ioset) - return (EINVAL); - - return (0); -} - -int -_sdbc_get_config(_sdbc_config_t *config_info) -{ - int i; - - config_info->enabled = _sd_cache_initialized; - config_info->magic = _SD_MAGIC; - for (i = 0; i < CACHE_MEM_PAD; i++) { - config_info->cache_mem[i] = _sd_cache_config.cache_mem[i]; - } - config_info->threads = _sd_cache_config.threads; - - return (0); -} diff --git a/usr/src/uts/common/avs/ns/sdbc/sd_conf.h b/usr/src/uts/common/avs/ns/sdbc/sd_conf.h deleted file mode 100644 index b3ad6648a4..0000000000 --- a/usr/src/uts/common/avs/ns/sdbc/sd_conf.h +++ /dev/null @@ -1,87 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _SD_CONF_H -#define _SD_CONF_H - -#ifdef __cplusplus -extern "C" { -#endif - -#if defined(_RCSID) -static char *rcs_sd_conf_h = "@(#)(SMI) sd_conf.h 1.1 07/06/21 16:17:54"; -#endif - -#define MEGABYTE (1024*1024) - -#define DEFAULT_HANDLES 1000 -#define MAX_SD_NODES 256 /* max configured nodes */ -#define SD_MCIII 0 -#define SD_MCIV 1 - -/* for initializing fields to an invalid host id */ -#define _SD_NO_HOST -1 - -/* netaddr filler for mc_*() compatibility */ -#define _SD_NO_NETADDR 0 -/* dummy net for mc_*() compatibility */ -#define _SD_NO_NET 0 - -#define _SD_VME_DEFAULT (1024*1024) /* default 1mb contiguous memory */ - -#ifdef _KERNEL - -extern _sd_cache_param_t _sd_cache_config; -extern int _SD_SELF_DSP, _SD_REM_DSP[], _SD_NUM_REM; -extern int _SD_SELF_HOST, _SD_MIRROR_HOST; -extern int _sd_nodes_configured, _SD_HOST_CONF[]; -extern int _sd_parallel_resync_cnt; -extern int _sdbc_gateway_wblocks; -extern int _sdbc_memtype_deconfigure_delayed; -extern kmutex_t _sdbc_config_lock; -extern nsc_mem_t *sdbc_info_mem; -extern nsc_mem_t *sdbc_iobuf_mem, *sdbc_hash_mem; -extern nsc_mem_t *sdbc_local_mem, *sdbc_stats_mem, *sdbc_cache_mem; -#if defined(_SD_USE_THREADS) -extern nstset_t *_sd_ioset; -#endif /* _SD_USE_THREADS */ -extern ushort_t SD_AUTO_RESYNC; -extern volatile int _sd_cache_dem_cnt; -extern volatile int _sd_cache_initialized; - -extern void _sdbc_memtype_deconfigure(void); -extern int _sdbc_configure(_sd_cache_param_t *, - _sdbc_config_t *, spcs_s_info_t); -extern int _sdbc_deconfigure(spcs_s_info_t); -extern int _sdbc_get_config(_sdbc_config_t *); -extern int get_high_bit(int size); - -#endif /* _KERNEL */ - -#ifdef __cplusplus -} -#endif - -#endif /* _SD_CONF_H */ diff --git a/usr/src/uts/common/avs/ns/sdbc/sd_ft.c b/usr/src/uts/common/avs/ns/sdbc/sd_ft.c deleted file mode 100644 index 184462208f..0000000000 --- a/usr/src/uts/common/avs/ns/sdbc/sd_ft.c +++ /dev/null @@ -1,1266 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#include <sys/types.h> -#include <sys/ksynch.h> -#include <sys/cmn_err.h> -#include <sys/errno.h> -#include <sys/kmem.h> -#include <sys/cred.h> -#include <sys/ddi.h> - -#include <sys/nsc_thread.h> -#include <sys/nsctl/nsctl.h> - -#include <sys/sdt.h> /* dtrace is S10 or later */ - -#include "sd_bcache.h" -#include "sd_ft.h" -#include "sd_trace.h" -#include "sd_io.h" -#include "sd_misc.h" -#include <sys/ncall/ncall.h> - -_sd_ft_info_t _sd_ft_data; - -static volatile int _sd_ft_exit = 0; -static kcondvar_t _sd_ft_cv; -int _sd_node_recovery; /* node recovery in progress */ -/* - * _sd_async_recovery: - * 0 = flush and wait - * 1 = clone and async-write - * 2 = quicksort, clone, and async-write - * quicksort allows contiguous blocks to be joined, - * which may greatly improve recovery time for raid devices. - * if kmem_alloc fails, acts as _sd_async_recovery == 1 - */ -static int _sd_async_recovery = 2; -static int xmem_inval_hit, xmem_inval_miss, xmem_inval_inuse; - - -/* - * flag to inhibit reset of remote SCSI buses and sending of - * nodedown callback if mirror was deconfigured properly. - * - prevents trashing any I/O that may be happening on the mirror - * node during a normal shutdown and prevents undesired simckd failover. - */ -static int mirror_clean_shutdown = 0; - -/* - * Forward declare all statics that are used before defined to enforce - * parameter checking - * Some (if not all) of these could be removed if the code were reordered - */ - -static void _sd_health_thread(void); -static void _sd_cache_recover(void); -static int _sd_ft_clone(ss_centry_info_t *, int); -static void _sd_remote_enable(void); -static void sdbc_setmodeandftdata(); -static void _sd_cd_discard_mirror(int cd); -static int _sd_failover_file_open(void); -static void _sd_failover_done(void); -static void _sd_wait_for_dirty(void); -static void _sdbc_clear_warm_start(void); -static int sdbc_recover_vol(ss_vol_t *, int); -void _ncall_poke(int); - -int _sdbc_ft_hold_io; -kcondvar_t _sdbc_ft_hold_io_cv; -kmutex_t _sdbc_ft_hold_io_lk; -extern int sdbc_use_dmchain; -extern void sdbc_requeue_head_dm_try(_sd_cctl_t *cc_ent); - -/* - * _sdbc_ft_unload - cache is being unloaded (or failed to load). - * Deallocate any global lock/sv that we created. - */ -void -_sdbc_ft_unload(void) -{ - cv_destroy(&_sd_ft_cv); - mutex_destroy(&_sd_ft_data.fi_lock); - cv_destroy(&_sd_ft_data.fi_rem_sv); - mutex_destroy(&_sd_ft_data.fi_sleep); - bzero(&_sd_ft_data, sizeof (_sd_ft_info_t)); -} - -/* - * _sdbc_ft_load - cache is being loaded. Allocate all global lock/sv - * that we need. Return 0 if we succeed. If we fail return -1 (don't - * need to do the unload step as we expect our caller to do that). - */ -int -_sdbc_ft_load(void) -{ - /* _sd_ft_data is sure to be zeroes, don't need to bzero it */ - - mutex_init(&_sd_ft_data.fi_lock, NULL, MUTEX_DRIVER, NULL); - cv_init(&_sd_ft_data.fi_rem_sv, NULL, CV_DRIVER, NULL); - cv_init(&_sd_ft_cv, NULL, CV_DRIVER, NULL); - mutex_init(&_sd_ft_data.fi_sleep, NULL, MUTEX_DRIVER, NULL); - return (0); -} - - -int -_sdbc_ft_configure(void) -{ - _sd_ft_exit = 1; - return (nsc_create_process( - (void (*)(void *))_sd_health_thread, 0, TRUE)); -} - - -void -_sdbc_ft_deconfigure(void) -{ - _sd_ft_exit = 0; - _sd_unblock(&_sd_ft_cv); - mutex_enter(&_sd_ft_data.fi_lock); - _sd_node_recovery = 0; - cv_broadcast(&_sd_ft_data.fi_rem_sv); - mutex_exit(&_sd_ft_data.fi_lock); -} - - -/* - * _sd_health_thread -- daemon thread on each node watches if mirror - * node to has crashed, and it needs to flush the mirrors cache entries. - * Note we do *not* detect that the node has come up again, but wait - * for the node to inform us that it is up via _sd_cache_reenable(). - */ -static void -_sd_health_thread(void) -{ - int warm_started = 0; - - mutex_enter(&_sd_cache_lock); - _sd_cache_dem_cnt++; - mutex_exit(&_sd_cache_lock); - - /* clear _sd_ft_data in case this is a cache re-enable w/o unload */ - - bzero(&_sd_ft_data, sizeof (_sd_ft_info_t)); - - sdbc_setmodeandftdata(); - -#ifdef DEBUG - cmn_err(CE_NOTE, "!sdbc(_sd_health_thread) safestore " - "is %s. Fast writes %s", - (_SD_MIRROR_CONFIGD) ? "up" : "down", - (_SD_NODE_HINTS & _SD_WRTHRU_MASK) ? - "disabled" : "enabled"); -#endif - - /* CONSTCOND */ - while (1) { - _sd_timed_block(HZ/8, &_sd_ft_cv); - if (_sd_ft_exit == 0) { - mutex_enter(&_sd_cache_lock); - _sd_cache_dem_cnt--; - mutex_exit(&_sd_cache_lock); - return; - } - - /* NB evaluation order is important here for nvmem systems */ - if (_sd_is_mirror_crashed() || - (warm_started = _sdbc_warm_start())) { - - /* - * Hash invalidate here. We do not want data from - * previous failover incarnation to be cache hits, if - * the 2 failover happens within a short time - */ - _sd_hash_invalidate_cd(-1); - - /* - * don't change mirror state when warm starting - * nvmem systems. _sd_mirror_down() is called in - * in _sd_remote_enable() on nvmem systems if the - * media is down. - */ - if (!warm_started) - if (!mirror_clean_shutdown) - _sd_mirror_down(); - else - _sd_mirror_cache_down(); - - (void) _sd_set_node_hint(NSC_FORCED_WRTHRU); - if (!warm_started) { - /* was FAST */ - mutex_enter(&_sd_ft_data.fi_lock); - _sd_node_recovery = 0; - /* was FAST */ - mutex_exit(&_sd_ft_data.fi_lock); - /* Assume other side is still up */ - cmn_err(CE_WARN, - "!sdbc(_sd_health_thread)" - "Safestore is down. Fast writes %s", - (_SD_NODE_HINTS & _SD_WRTHRU_MASK) ? - "disabled" : "enabled"); - _sd_unblock(&_sd_flush_cv); - - if (SAFESTORE_LOCAL(sdbc_safestore)) - continue; - - /* Wait for cache to drain and panic */ - _sd_wait_for_dirty(); - cmn_err(CE_WARN, - "!sdbc(_sd_health_thread)" - " dirty blocks flushed"); - continue; - } - /* was FAST */ - mutex_enter(&_sd_ft_data.fi_lock); - _sd_node_recovery = 1; - /* was FAST */ - mutex_exit(&_sd_ft_data.fi_lock); - if (!SAFESTORE_LOCAL(sdbc_safestore)) - cmn_err(CE_WARN, - "!sdbc(_sd_health_thread)" - " Cache on node %d is down. " - "Fast writes %s", - _SD_MIRROR_HOST, - (_SD_NODE_HINTS & _SD_WRTHRU_MASK) ? - "disabled" : "enabled"); - cmn_err(CE_NOTE, - "!sdbc(_sd_health_thread)" - " Cache recovery in progress"); - _sd_cache_recover(); - - mutex_enter(&_sd_ft_data.fi_lock); - _sd_node_recovery = 0; - _sdbc_clear_warm_start(); /* nvmem systems */ - cv_broadcast(&_sd_ft_data.fi_rem_sv); - mutex_exit(&_sd_ft_data.fi_lock); - cmn_err(CE_NOTE, - "!sdbc(_sd_health_thread) %s Cache recovery done", - _sd_async_recovery ? - "asynchronous" : "synchronous"); - /* restore previous state */ - if (warm_started && !_sd_is_mirror_down()) { - (void) _sd_clear_node_hint(NSC_FORCED_WRTHRU); - cmn_err(CE_NOTE, - "!sdbc(_sd_health_thread) Fast writes %s", - (_SD_NODE_HINTS & _SD_WRTHRU_MASK) ? - "disabled" : "enabled"); - } - warm_started = 0; - - } else if (_sd_is_mirror_node_down()) { - _sd_mirror_down(); - } - } -} - -/* - * _sdbc_recovery_io_wait - wait for i/o being done directly - * out of safe storage to complete. If the i/o does not make any - * progress within about 25 seconds we return EIO otherwise return 0. - * - */ -static -int -_sdbc_recovery_io_wait(void) -{ - int tries = 0; - int last_numio = 0; - - /* - * Wait for numio to reach 0. - * If numio has not changed for 85+ seconds, - * break & pin blocks - */ - while (_sd_ft_data.fi_numio > 0) { - if (last_numio == _sd_ft_data.fi_numio) { - if (++tries > 512) break; - } else { - last_numio = _sd_ft_data.fi_numio; - tries = 0; - } - delay(HZ/8); - } - if (_sd_ft_data.fi_numio != 0) { - cmn_err(CE_WARN, "!sdbc(_sdbc_recovery_io_wait) %d " - "recovery i/o's not done", _sd_ft_data.fi_numio); - return (EIO); - } - return (0); -} - - -#if defined(_SD_FAULT_RES) -/* - * _sd_recovery_wait() - * while _sd_node_recovery is set, accesses to mirrored devices will block - * (_sd_node_recovery-1) is count of blocked threads. - */ -int -_sd_recovery_wait(void) -{ - int blk; - - mutex_enter(&_sd_ft_data.fi_lock); - blk = _sd_node_recovery ? _sd_node_recovery++ : 0; - - if (blk) - cv_wait(&_sd_ft_data.fi_rem_sv, &_sd_ft_data.fi_lock); - mutex_exit(&_sd_ft_data.fi_lock); - - if (!_sd_cache_initialized) - return (EINVAL); - return (0); -} - -/* - * _sd_recovery_wblk_wait - wait for recovery i/o to a device - * to cease. If the file is closed or the cache is disabled - * first return an error otherwise return 0. - * - * A device is being recovered from our point of view either - * during failover or by putting a disk back online after - * a disk failure. - * - * This code is used to delay access to a device while recovery - * writes are in progress from either a failover or while flushing - * i/o after a failed disk has been repaired. - */ -int -_sd_recovery_wblk_wait(int cd) -{ - _sd_cd_info_t *cdi = &_sd_cache_files[cd]; - - while (_sd_cache_initialized && - FILE_OPENED(cd) && cdi->cd_recovering) { - /* spawn writer if none */ - if (!cdi->cd_writer) (void) cd_writer(cd); - delay(HZ/8); - } - if (!_sd_cache_initialized || !FILE_OPENED(cd)) - return (EINVAL); - return (0); -} - -/* - * Recover from a crash of another node: - * - * 1) Open all remote files - * 2) Allocate other node's buffers and new buffer headers - * 3) Flush all dirty buffers to disk - * 4) Deallocate resources - */ -static void -_sd_cache_recover(void) -{ - int cblocks_processed; - - SDTRACE(ST_ENTER|SDF_RECOVER, SDT_INV_CD, 0, SDT_INV_BL, 0, 0); - - /* was FAST */ - mutex_enter(&_sd_ft_data.fi_lock); - _sd_ft_data.fi_numio = 0; - /* was FAST */ - mutex_exit(&_sd_ft_data.fi_lock); - -#ifdef _SD_DRIVE_RESP - if (!mirror_clean_shutdown) - _raw_reset_other(); -#endif - mirror_clean_shutdown = 0; - - cblocks_processed = _sd_failover_file_open(); - - /* allow cache config to proceed */ - mutex_enter(&_sdbc_ft_hold_io_lk); - _sdbc_ft_hold_io = 0; - cv_signal(&_sdbc_ft_hold_io_cv); - mutex_exit(&_sdbc_ft_hold_io_lk); - - /* wait for sequential recovery to complete */ - if (!_sd_async_recovery && cblocks_processed) - (void) _sdbc_recovery_io_wait(); - - _sd_failover_done(); - - if (cblocks_processed) - cmn_err(CE_NOTE, - "!sdbc %ssynchronous recovery complete " - "%d cache blocks processed", - _sd_async_recovery ? "a" : "", - cblocks_processed); - - SDTRACE(ST_EXIT|SDF_RECOVER, SDT_INV_CD, 0, SDT_INV_BL, 0, 0); -} - -void -_sd_mirror_iodone(void) -{ - /* was FAST */ - mutex_enter(&_sd_ft_data.fi_lock); - _sd_ft_data.fi_numio--; - /* was FAST */ - mutex_exit(&_sd_ft_data.fi_lock); -} - - - -/* - * _sd_ft_clone -- clone cache block from ft area, retry write or pin. - */ -static int -_sd_ft_clone(ss_centry_info_t *ft_cent, int async) -{ - _sd_cctl_t *ent; - int cd = ft_cent->sc_cd; - nsc_off_t cblk = ft_cent->sc_fpos; - int dirty = ft_cent->sc_dirty; - ss_resource_t *res = ft_cent->sc_res; - _sd_cd_info_t *cdi; - - SDTRACE(ST_ENTER|SDF_FT_CLONE, cd, BLK_FBAS, cblk, dirty, _SD_NO_NET); - cdi = &(_sd_cache_files[cd]); - if ((cdi->cd_info->sh_failed != 2) && !FILE_OPENED(cd)) { - cmn_err(CE_WARN, "!sdbc(_sd_ft_clone) recovery " - "write failed: cd %x; cblk %" NSC_SZFMT "; dirty %x", - cd, cblk, dirty); - SDTRACE(ST_EXIT|SDF_FT_CLONE, - cd, BLK_FBAS, cblk, dirty, EINTR); - return (-1); - } - - /* - * allocate new cache entry and read data - */ - ent = sdbc_centry_alloc_blks(cd, cblk, 1, 0); - - if (SSOP_READ_CBLOCK(sdbc_safestore, res, (void *)ent->cc_data, - CACHE_BLOCK_SIZE, 0) == SS_ERR) { - cmn_err(CE_WARN, "!sdbc(_sd_ft_clone) read of " - "pinned data block failed. cannot recover " - "0x%p size 0x%x", (void *)res, CACHE_BLOCK_SIZE); - - /* _sd_process_failure ?? */ - _sd_centry_release(ent); - return (-1); - } - - ent->cc_write = ft_cent; - ent->cc_dirty = ent->cc_valid = (ushort_t)dirty; - ent->cc_flag |= (ft_cent->sc_flag & CC_PINNABLE); - - ent->cc_chain = NULL; - - /* - * _sd_process_failure() adds to failed list & does pinned callback - * otherwise async flush - */ - if (cdi->cd_info->sh_failed) { /* raw device open/reserve failed */ - mutex_enter(&cdi->cd_lock); - (cdi->cd_info->sh_numio)++; - mutex_exit(&cdi->cd_lock); - (void) _sd_process_failure(ent); - } else { - - if (cdi->cd_global->sv_pinned != _SD_NO_HOST) { - cdi->cd_global->sv_pinned = _SD_NO_HOST; - SSOP_SETVOL(sdbc_safestore, cdi->cd_global); - } - - if (async) { - _sd_enqueue_dirty(cd, ent, ent, 1); - } else { - /* - * this is sync write with asynchronous callback - * (queue to disk and return). - */ - - mutex_enter(&(cdi->cd_lock)); - (cdi->cd_info->sh_numio)++; - mutex_exit(&cdi->cd_lock); - _sd_async_flcent(ent, cdi->cd_crdev); - } - } - _sd_centry_release(ent); - SDTRACE(ST_EXIT|SDF_FT_CLONE, cd, BLK_FBAS, cblk, dirty, _SD_NO_NET); - return (0); -} - - -/* - * _sd_repin_cd - scan for dirty blocks held by mirror node. - * - * sdbc on this node is being attached to cd. If sdbc on other - * node had failed writes (pinnable or not) we need to take - * responsbility for them now here. - */ -int -_sd_repin_cd(int cd) -{ - ss_voldata_t *cd_gl; - _sd_cd_info_t *cdi; - - if (!FILE_OPENED(cd)) - return (EINVAL); - - cdi = &_sd_cache_files[cd]; - if (cdi->cd_global->sv_pinned == _SD_NO_HOST) - return (0); - - cd_gl = _sdbc_gl_file_info + cd; - - if (sdbc_recover_vol(cd_gl->sv_vol, cd)) - _sd_cd_discard_mirror(cd); - - return (0); -} - - -static int -_sd_cache_mirror_enable(int host) -{ - if (_sd_cache_initialized) { - if (host != _SD_MIRROR_HOST) { - cmn_err(CE_WARN, "!sdbc(_sd_cache_mirror_enable) " - "Configured mirror %x. Got message from %x", - _SD_MIRROR_HOST, host); - return (-EINVAL); - } - if (_sd_node_recovery) (void) _sd_recovery_wait(); - if (_sd_cache_initialized && _sd_is_mirror_down()) { - int i; - - /* make sure any pinned data we have is now refreshed */ - for (i = 0; i < sdbc_max_devs; i++) - if (FILE_OPENED(i)) - (void) _sdbc_remote_store_pinned(i); - - cmn_err(CE_NOTE, - "!sdbc(_sd_cache_mirror_enable) Cache on " - "mirror node %d is up. Fast writes enabled", - host); - _sd_mirror_up(); - (void) _sd_clear_node_hint(NSC_FORCED_WRTHRU); - } - } - _sd_ft_data.fi_host_state = _SD_HOST_CONFIGURED; - return (_sd_cache_initialized); -} - - -/* - * two stage mirror disable: - * stage 0: set FORCED_WRTHRU hint (cache shutdown started) - * stage 1: mirror shutdown completed - */ -static int -_sd_cache_mirror_disable(int host, int stage) -{ - if (_sd_cache_initialized) { - - if (host != _SD_MIRROR_HOST) - return (0); - if (stage == 0) { - (void) _sd_set_node_hint(NSC_FORCED_WRTHRU); - return (0); - } - _sd_ft_data.fi_host_state = _SD_HOST_DECONFIGURED; - mirror_clean_shutdown = 1; - _sd_unblock(&_sd_ft_cv); - } else { - _sd_ft_data.fi_host_state = _SD_HOST_NONE; - } - return (0); -} - -/* - * set the fault tolerant data to indicate the state - * of the safestore host. set mode to writethru if appropriate - */ -static void -sdbc_setmodeandftdata() -{ - /* - * if single node local safestore or ram safestore - * then mark host state as carashed/_SD_HOST_NONE and set writethru - */ - if (SAFESTORE_LOCAL(sdbc_safestore)) { - if (!SAFESTORE_SAFE(sdbc_safestore)) { - _sd_mirror_down(); /* mirror node down */ - (void) _sd_set_node_hint(NSC_FORCED_WRTHRU); - } else { - _sd_ft_data.fi_host_state = _SD_HOST_CONFIGURED; - if (_sdbc_warm_start()) - (void) _sd_set_node_hint(NSC_FORCED_WRTHRU); - } - } else - _sd_remote_enable(); -} - -static void -_sd_remote_enable(void) -{ - ncall_t *ncall; - long r; - - if (ncall_alloc(_SD_MIRROR_HOST, 0, _SD_NO_NET, &ncall)) { - _sd_mirror_down(); /* mirror node down */ - (void) _sd_set_node_hint(NSC_FORCED_WRTHRU); - return; - } - - r = ncall_send(ncall, 0, SD_ENABLE, _SD_SELF_HOST); - if (!r) (void) ncall_read_reply(ncall, 1, &r); - ncall_free(ncall); - - if (r == 1) { /* _sd_cache_initialized */ - if (!_sd_is_mirror_crashed() && - _sd_ft_data.fi_host_state == _SD_HOST_NONE) - _sd_ft_data.fi_host_state = _SD_HOST_CONFIGURED; - return; - } - if (r == ENOLINK) - _sd_mirror_down(); /* mirror node down */ - else - _sd_mirror_cache_down(); /* mirror up, but no cache */ - (void) _sd_set_node_hint(NSC_FORCED_WRTHRU); -} - - -void -_sd_remote_disable(int stage) -{ - ncall_t *ncall; - - if (ncall_alloc(_SD_MIRROR_HOST, 0, 0, &ncall) == 0) - (void) ncall_send(ncall, NCALL_ASYNC, SD_DISABLE, - _SD_SELF_HOST, stage); -} - -void -r_sd_ifs_cache_enable(ncall_t *ncall, int *ap) -{ - ncall_reply(ncall, _sd_cache_mirror_enable(*ap)); -} - - - -void -r_sd_ifs_cache_disable(ncall_t *ncall, int *ap) -{ - (void) _sd_cache_mirror_disable(ap[0], ap[1]); - ncall_done(ncall); -} - -#else /* (_SD_FAULT_RES) */ - -void r_sd_ifs_cache_enable() {; } -void r_sd_ifs_cache_disable() {; } - -#endif /* (_SD_FAULT_RES) */ - -/* - * invalidate cache hash table entries for given device - * or (-1) all devices belonging to mirrored node - */ -void -_sd_hash_invalidate_cd(int CD) -{ - int i; - _sd_cd_info_t *cdi; - _sd_hash_hd_t *hptr; - _sd_cctl_t *cc_ent, *ent; - _sd_hash_bucket_t *bucket; - int cd; - nsc_off_t blk; - - for (i = 0; i < (_sd_htable->ht_size); i++) { - bucket = (_sd_htable->ht_buckets + i); - mutex_enter(bucket->hb_lock); - hptr = bucket->hb_head; - while (hptr) { - cc_ent = (_sd_cctl_t *)hptr; - cd = CENTRY_CD(cc_ent); - blk = CENTRY_BLK(cc_ent); - cdi = &_sd_cache_files[cd]; - - /* - * Skip if device doesn't match or pinned. - * (-1) skip attached cd's - */ - if ((CD != -1 && (cd != CD || CENTRY_PINNED(cc_ent))) || - (CD == -1 && nsc_held(cdi->cd_rawfd))) { - hptr = hptr->hh_next; - continue; - } - mutex_exit(bucket->hb_lock); - - ent = cc_ent; - fl1: - if (CC_CD_BLK_MATCH(cd, blk, ent) || - (ent = (_sd_cctl_t *)_sd_hash_search(cd, blk, - _sd_htable))) { - if (SET_CENTRY_INUSE(ent)) { - xmem_inval_inuse++; - _sd_cc_wait(cd, blk, ent, CC_INUSE); - goto fl1; /* try again */ - } - - /* cc_inuse is set, delete on block match */ - if (CC_CD_BLK_MATCH(cd, blk, ent)) { - xmem_inval_hit++; - (void) _sd_hash_delete( - (struct _sd_hash_hd *)ent, - _sd_htable); - - if (sdbc_use_dmchain) { - - /* attempt to que head */ - if (ent->cc_alloc_size_dm) { - sdbc_requeue_head_dm_try - (ent); - } - } else - _sd_requeue_head(ent); - - } else - xmem_inval_miss++; - - CLEAR_CENTRY_INUSE(ent); - } - mutex_enter(bucket->hb_lock); - hptr = bucket->hb_head; - } - mutex_exit(bucket->hb_lock); - } -} - - -/* - * _sd_cd_online(cd,discard) - * clear local error state. - * if (discard && _attached != _SD_SELF_HOST) then release buffers. - * if (!discard && _attached != _SD_MIRROR_HOST) then re-issue I/Os - * (add to dirty pending queue). - * returns: - * 0 success - * EINVAL invalid device or not failed - * EBUSY attached by this node, or by active mirror - */ -static int -_sd_cd_online(int cd, int discard) -{ - _sd_cd_info_t *cdi = &_sd_cache_files[cd]; - int failed, num; - _sd_cctl_t *cc_ent, *cc_next, *cc_last, *cc_first, *cc_next_chain; - - /* - * in the case where a failed device has been closed and - * then re-opened, sh_failed will be zero because it is - * cleared in _sd_open_cd(). hence the test for - * _pinned != _SD_SELF_HOST which allows the restore to - * proceed in this scenario. - */ - if (cd < 0 || cd >= sdbc_max_devs) - return (EINVAL); - - if (!cdi->cd_info || !cdi->cd_global) - return (EINVAL); - - if ((cdi->cd_info->sh_failed == 0) && - (cdi->cd_global->sv_pinned != _SD_SELF_HOST)) - return (0); - - if (_sd_nodes_configured > 1) { - - /* can't discard while attached on multinode systems */ - if (discard && (cdi->cd_global->sv_attached == _SD_SELF_HOST)) - return (EBUSY); - - if (!discard && /* attached by active mirror! */ - (cdi->cd_global->sv_attached == _SD_MIRROR_HOST) && - !_sd_is_mirror_down()) - return (EBUSY); - } - - mutex_enter(&cdi->cd_lock); - - cc_ent = cdi->cd_fail_head; - failed = cdi->cd_info->sh_numfail; - cdi->cd_fail_head = NULL; - cdi->cd_info->sh_numfail = 0; - cdi->cd_info->sh_failed = 0; - cdi->cd_global->sv_pinned = _SD_NO_HOST; - SSOP_SETVOL(sdbc_safestore, cdi->cd_global); - - if (cc_ent == NULL) { - mutex_exit(&cdi->cd_lock); - return (0); - } - /* prevent any new i/o from arriving for this cd */ - if (!discard) - cdi->cd_recovering = 1; - - mutex_exit(&cdi->cd_lock); - - num = 0; - cc_first = cc_ent; - for (; cc_ent; cc_ent = cc_next_chain) { - cc_next_chain = cc_ent->cc_dirty_link; - - for (; cc_ent; cc_ent = cc_next) { - cc_next = cc_ent->cc_dirty_next; - cc_last = cc_ent; - num++; - - if (discard) { - ss_centry_info_t *wctl; - /* was FAST */ - mutex_enter(&cc_ent->cc_lock); - cc_ent->cc_valid = cc_ent->cc_dirty = 0; - cc_ent->cc_flag &= ~(CC_PEND_DIRTY|CC_PINNED); - cc_ent->cc_dirty_next = NULL; - cc_ent->cc_dirty_link = NULL; - wctl = cc_ent->cc_write; - cc_ent->cc_write = NULL; - /* was FAST */ - mutex_exit(&cc_ent->cc_lock); - if (wctl) { - wctl->sc_flag = 0; - wctl->sc_dirty = 0; - - SSOP_SETCENTRY(sdbc_safestore, wctl); - SSOP_DEALLOCRESOURCE(sdbc_safestore, - wctl->sc_res); - } - - continue; - } - - /* Clear PEND_DIRTY, iocount & iostatus */ - if (SET_CENTRY_INUSE(cc_ent) == 0) { - cc_ent->cc_flag &= ~CC_PEND_DIRTY; - cc_ent->cc_iocount = 0; - cc_ent->cc_iostatus = 0; /* _SD_IO_NONE */ - CLEAR_CENTRY_INUSE(cc_ent); - } else { - /* was FAST */ - mutex_enter(&cc_ent->cc_lock); - cc_ent->cc_flag &= ~CC_PEND_DIRTY; - cc_ent->cc_iocount = 0; - cc_ent->cc_iostatus = 0; /* _SD_IO_NONE */ - /* was FAST */ - mutex_exit(&cc_ent->cc_lock); - } - } - } - if (num != failed) - cmn_err(CE_WARN, "!sdbc(_sd_cd_online) count %d vs numfail %d", - num, failed); - if (discard) { - _sd_hash_invalidate_cd(cd); - return (0); - } - - _sd_enqueue_dirty_chain(cd, cc_first, cc_last, num); - /* make sure data gets flushed in case there is no new I/O */ - (void) nsc_reserve(cdi->cd_rawfd, NSC_MULTI); - (void) _sd_wait_for_flush(cd); - cdi->cd_recovering = 0; - nsc_release(cdi->cd_rawfd); - - return (0); -} - -#if defined(_SD_FAULT_RES) - -/* - * This node has disk attached, discard pins held by mirror - */ -static void -_sd_cd_discard_mirror(int cd) -{ - ncall_t *ncall; - if (ncall_alloc(_SD_MIRROR_HOST, 0, 0, &ncall)) - return; - (void) ncall_send(ncall, NCALL_ASYNC, SD_CD_DISCARD, cd); -} - -void -r_cd_discard(ncall_t *ncall, int *ap) -{ - int r, cd = *ap; - if (_sd_cache_initialized) { - SDTRACE(ST_ENTER|SDF_ONLINE, cd, 1, SDT_INV_BL, 1, 0); - r = _sd_cd_online(cd, 1); - SDTRACE(ST_EXIT|SDF_ONLINE, cd, 1, SDT_INV_BL, 1, r); - } - ncall_done(ncall); -} - -/* - * _sd_failover_file_open - - * on failover, open devices which are not attached by this node. - */ -static int -_sd_failover_file_open(void) -{ - int rc, cd, flag = 0; - ss_voldata_t *cd_gl; - _sd_cd_info_t *cdi; - int cblocks_processed = 0; - extern ss_voldata_t *_sdbc_gl_file_info; - - for (cd = 0; cd < sdbc_max_devs; cd++) { - cd_gl = _sdbc_gl_file_info + cd; - cdi = &(_sd_cache_files[cd]); - - /* - * If the cd is open and reserved we certainly don't - * need to do it again. However the recovery code - * must be racing some other cache usage which could - * be bad. We really need to be able to lock out - * all cache activity for this cd that is not tied - * to the recovery process. This doesn't seem to be - * feasible in sdbc since a competing thread could - * already be finished doing an alloc_buf. If this - * hole is to be closed sd-ctl must be more in - * control of the failover process. - */ - if (FILE_OPENED(cd) && nsc_held(cdi->cd_rawfd)) - continue; - - /* - * this constuct says that, on non-nvmem systems, - * if we are attempting to open a "local" device and - * nothing is pinned, then continue. i.e. open only - * remote devices or devices that have pinned data. - * for recovery on nvmem systems we open all devices. - */ - if ((!_sdbc_warm_start()) && - ((cd_gl->sv_attached != _SD_MIRROR_HOST) && - (cd_gl->sv_pinned != _SD_MIRROR_HOST) && - (cd_gl->sv_pinned != _SD_SELF_HOST))) - continue; - if (!cd_gl->sv_volname || !cd_gl->sv_volname[0]) - continue; - - if (_sd_open_cd(cd_gl->sv_volname, cd, flag) < 0) { - cmn_err(CE_WARN, "!sdbc(_sd_failover_file_open) " - "Unable to open disk partition %s", - cd_gl->sv_volname); - continue; - } - - SDTRACE(ST_INFO|SDF_RECOVER, cd, 0, 0, 0, 0); - rc = nsc_reserve(cdi->cd_rawfd, NSC_MULTI); - if (rc == 0) { - cdi->cd_failover = 1; - } - - if (rc != 0) cdi->cd_info->sh_failed = 1; - - cblocks_processed += sdbc_recover_vol(cd_gl->sv_vol, cd); - } - - return (cblocks_processed); -} - - -static int -sdbc_recover_vol(ss_vol_t *vol, int cd) -{ - ss_cdirkey_t key; - ss_cdir_t cdir; - ss_voldata_t *cd_gl = _sdbc_gl_file_info + cd; - ss_centry_info_t *cinfo; - ss_centry_info_t centry; - int cblocks_processed = 0; - int err; - ss_centry_info_t *sdbc_get_cinfo_byres(ss_resource_t *); - - /* setup the key to get a volume directory stream of centrys */ - key.ck_type = CDIR_VOL; - key.cdk_u.ck_vol = vol; - - if (SSOP_GETCDIR(sdbc_safestore, &key, &cdir)) { - cmn_err(CE_WARN, "!sdbc(sdbc_recover_vol): " - "cannot recover volume %s", - cd_gl->sv_volname); - return (0); - } - - /* cycle through the cdir getting resource tokens and reading centrys */ - /*CONSTANTCONDITION*/ - while (1) { - - if ((err = SSOP_GETCDIRENT(sdbc_safestore, &cdir, ¢ry)) - == SS_ERR) { - cmn_err(CE_WARN, "!sdbc(sdbc_recover_vol): " - "cache entry read failure %s %p", - cd_gl->sv_volname, (void *)centry.sc_res); - - continue; - } - - - if (err == SS_EOF) - break; /* done */ - - - /* - * this get into double caching consistency - * need to resolve this jgk - */ - if ((cinfo = sdbc_get_cinfo_byres(centry.sc_res)) == NULL) { - /* should not happen */ - cmn_err(CE_WARN, "!sdbc(sdbc_recover_vol): " - "invalid ss resource %p", (void *)centry.sc_res); - continue; - } - bcopy(¢ry, cinfo, sizeof (ss_centry_info_t)); - - /* - * note - * ss should return a stream of dirty blocks ordered - * by block number. if it turns out that ss will not support - * this then sorting for async recovery will have to be - * done here jgk - */ - ASSERT(cinfo->sc_dirty); - - if (!cinfo->sc_dirty) /* should not happen */ - continue; - - /* - * clone mirror cache entry and do - * async I/O or sync I/O or pin if sh_failed - */ - (void) _sd_ft_clone(cinfo, _sd_async_recovery); - ++cblocks_processed; - } - - - if (cblocks_processed) - cmn_err(CE_NOTE, - "!sdbc(sdbc_recover_vol) %d cache blocks processed for " - "volume %s", cblocks_processed, cd_gl->sv_volname); - - return (cblocks_processed); -} - -/* - * _sd_failover_done - - * mark failover open'd devices as requiring nsc_release() - * when all queued I/O's have drained. - */ -static void -_sd_failover_done(void) -{ - _sd_cd_info_t *cdi; - int cd; - - for (cd = 0; cd < sdbc_max_devs; cd++) { - cdi = &(_sd_cache_files[cd]); - - if (FILE_OPENED(cd) && cdi->cd_failover) - cdi->cd_failover = 2; - } -} - -#endif /* (_SD_FAULT_RES) */ - -/* - * _sd_uncommit - discard local buffer modifications - * clear the valid bits. - */ -int -_sd_uncommit(_sd_buf_handle_t *handle, nsc_off_t fba_pos, nsc_size_t fba_len, - int flag) -{ - int cd; - sdbc_cblk_fba_t st_cblk_len; /* FBA len of starting cache block */ - sdbc_cblk_fba_t end_cblk_len; /* FBA len of ending cache block */ - sdbc_cblk_fba_t st_cblk_off; /* FBA offset into starting cblock */ - nsc_size_t cc_len; - int bits; - _sd_cctl_t *cc_ent; - - cd = HANDLE_CD(handle); - - ASSERT_HANDLE_LIMITS(handle, fba_pos, fba_len); - - if ((handle->bh_flag & NSC_WRBUF) == 0) { - DTRACE_PROBE(_sd_uncommit_end_handle_write); - - return (EINVAL); - } - - if (fba_len == 0) { - DTRACE_PROBE(_sd_uncommit_end_zero_len); - return (NSC_DONE); - } - - SDTRACE(ST_ENTER|SDF_UNCOMMIT, cd, fba_len, fba_pos, flag, 0); - - cc_ent = handle->bh_centry; - while (CENTRY_BLK(cc_ent) != FBA_TO_BLK_NUM(fba_pos)) - cc_ent = cc_ent->cc_chain; - - cc_len = fba_len; /* current length */ - st_cblk_off = BLK_FBA_OFF(fba_pos); - st_cblk_len = (BLK_FBAS - st_cblk_off); - if ((nsc_size_t)st_cblk_len >= fba_len) { - end_cblk_len = 0; - st_cblk_len = (sdbc_cblk_fba_t)fba_len; - } - else - end_cblk_len = BLK_FBA_OFF(fba_pos + fba_len); - - /* - * Check if remote write-cache spool is dirty, - * if not, we can just discard local valid bits. - */ - bits = SDBC_GET_BITS(st_cblk_off, st_cblk_len); - cc_ent->cc_valid &= ~bits; - - cc_len -= st_cblk_len; - cc_ent = cc_ent->cc_chain; - bits = SDBC_GET_BITS(0, BLK_FBAS); - - while (cc_len > (nsc_size_t)end_cblk_len) { - cc_ent->cc_valid = 0; - cc_ent = cc_ent->cc_chain; - cc_len -= BLK_FBAS; - } - -#if defined(_SD_DEBUG) - if (cc_len != end_cblk_len) - cmn_err(CE_WARN, "!fba_len %" NSC_SZFMT " end_cblk_len %d in " - "_sd_write", fba_len, end_cblk_len); -#endif - - if (cc_len) { - bits = SDBC_GET_BITS(0, end_cblk_len); - cc_ent->cc_valid &= ~bits; - } - SDTRACE(ST_EXIT|SDF_UNCOMMIT, cd, fba_len, fba_pos, flag, 0); - - return (NSC_DONE); -} - -static void -_sd_wait_for_dirty(void) -{ - int cd; - - for (cd = 0; cd < sdbc_max_devs; cd++) { - while (_SD_CD_WBLK_USED(cd)) - delay(HZ); - } -} - -/* - * _sd_wait_for_flush - wait for all i/o for this cd to cease. - * This function assumes that no further i/o are being issued - * against this device. This assumption is enforced by sd-ctl - * when called from _sd_flush_cd. Recovery also uses this - * wait and it enforces this assumption (somewhat imperfectly) - * by using cd_recovering. - * We must see progress in getting i/o complete within 25 seconds - * or we will return an error. If we complete normally (all i/o done) - * we return 0. - */ -int -_sd_wait_for_flush(int cd) -{ - _sd_cd_info_t *cdi = &(_sd_cache_files[cd]); - int tries = 0, used, last_used = 0, inprogress = 0; - - if (!(_SD_CD_WBLK_USED(cd))) - return (0); - /* - * Wait for WBLK_USED to reach 0. - * If unchanged for 32+ seconds returns EAGAIN - */ - if (!cdi->cd_writer) - (void) cd_writer(cd); /* spawn writer if not already running */ - - while (((used = _SD_CD_WBLK_USED(cd)) != 0) || cdi->cd_writer) { - if (last_used == used && - inprogress == cdi->cd_write_inprogress) { - if (cdi->cd_info->sh_failed) - break; - if (++tries > 128) { - cmn_err(CE_WARN, "!sdbc(_sd_wait_for_flush) " - "%s still has %d blocks pending %d" - " in progress (@ %lx)", - cdi->cd_info->sh_filename, last_used, - inprogress, nsc_lbolt()); - return (EAGAIN); - } - } else { - last_used = used; - inprogress = cdi->cd_write_inprogress; - tries = 0; - } - _sd_unblock(&_sd_flush_cv); - delay(HZ/4); - } - if (cdi->cd_info->sh_failed) - return (EIO); - else - return (0); -} - - -static -int _sd_ft_warm_start; - -int -_sdbc_warm_start(void) -{ - return (_sd_ft_warm_start); -} - -void -_sdbc_clear_warm_start(void) -{ - _sd_ft_warm_start = 0; -} - -void -_sdbc_set_warm_start(void) -{ - _sd_ft_warm_start = 1; -} - -/*ARGSUSED*/ -void -_ncall_poke(int host) -{ - cmn_err(CE_PANIC, " NYI - _ncall_poke"); -} diff --git a/usr/src/uts/common/avs/ns/sdbc/sd_ft.h b/usr/src/uts/common/avs/ns/sdbc/sd_ft.h deleted file mode 100644 index db8ce51187..0000000000 --- a/usr/src/uts/common/avs/ns/sdbc/sd_ft.h +++ /dev/null @@ -1,111 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _SD_FT_H -#define _SD_FT_H - -#ifdef __cplusplus -extern "C" { -#endif - -#include <sys/ncall/ncall.h> - -typedef struct _sd_ft_info { - char fi_crashed; /* mirror cache state */ - char fi_host_state; /* mirror node state */ - kmutex_t fi_lock; - kcondvar_t fi_rem_sv; - volatile int fi_numio; - kmutex_t fi_sleep; - -} _sd_ft_info_t; - - -#define _SD_MIRROR_CONFIGD (_sd_ft_data.fi_host_state ==\ - _SD_HOST_CONFIGURED) -#define _SD_MIRROR_DECONFIGD (_sd_ft_data.fi_host_state == \ - _SD_HOST_DECONFIGURED) -#define _SD_MIRROR_NOCACHE (_sd_ft_data.fi_host_state == \ - _SD_HOST_NOCACHE) - -#define _SD_HOST_NONE 0x00 /* mirror node dead or state unknown */ -#define _SD_HOST_CONFIGURED 0x01 /* mirror cache configured */ -#define _SD_HOST_DECONFIGURED 0x02 /* mirror cache deconfigured */ -#define _SD_HOST_NOCACHE 0x03 /* mirror cache deconfigured and */ - /* waiting for node down or re-enable */ - -/* - * mirror node has paniced with cache enabled, - * or mirror cache has been deconfigured. - */ -#define _sd_is_mirror_crashed() ((!_INFSD_NODE_UP(_SD_MIRROR_HOST) &&\ - _SD_MIRROR_CONFIGD) || _SD_MIRROR_DECONFIGD) - -/* - * mirror node has shutdown having previously - * deconfigured its cache. - */ -#define _sd_is_mirror_node_down() \ - (!_INFSD_NODE_UP(_SD_MIRROR_HOST) &&\ - _SD_MIRROR_NOCACHE) - -#define _sd_is_mirror_down() (_sd_ft_data.fi_crashed) -#define _sd_mirror_cache_down() (_sd_ft_data.fi_crashed = 1,\ - _sd_ft_data.fi_host_state = _SD_HOST_NOCACHE) -#define _sd_mirror_down() (_sd_ft_data.fi_crashed = 1,\ - _sd_ft_data.fi_host_state = _SD_HOST_NONE) -#define _sd_mirror_up() (_sd_ft_data.fi_crashed = 0) -#ifdef _KERNEL - -extern _sd_ft_info_t _sd_ft_data; -extern int _sd_node_recovery; - -extern void _sdbc_ft_unload(void); -extern int _sdbc_ft_load(void); -extern int _sdbc_ft_configure(void); -extern void _sdbc_ft_deconfigure(void); -extern int _sd_recovery_wait(void); -extern int _sd_recovery_wblk_wait(int cd); -extern void _sd_mirror_iodone(void); -extern int _sd_repin_cd(int); -extern void _sd_remote_disable(int); -extern void r_sd_ifs_cache_enable(ncall_t *, int *); -extern void r_sd_ifs_cache_disable(ncall_t *, int *); -extern void _sd_hash_invalidate_cd(int); -extern void r_cd_discard(ncall_t *, int *); -extern int _sd_uncommit(_sd_buf_handle_t *, nsc_off_t, nsc_size_t, int); -extern int _sd_uncommit_refresh(_sd_cctl_t *, int); -extern void r_sd_uncommit_refresh(ncall_t *, int *); -extern int _sd_wait_for_flush(int); -extern int _sdbc_warm_start(void); -extern void _sdbc_set_warm_start(void); - -#endif /* _KERNEL */ - -#ifdef __cplusplus -} -#endif - -#endif /* _SD_FT_H */ diff --git a/usr/src/uts/common/avs/ns/sdbc/sd_hash.c b/usr/src/uts/common/avs/ns/sdbc/sd_hash.c deleted file mode 100644 index 93a4eac43a..0000000000 --- a/usr/src/uts/common/avs/ns/sdbc/sd_hash.c +++ /dev/null @@ -1,499 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#include <sys/types.h> -#include <sys/ksynch.h> -#include <sys/cmn_err.h> -#include <sys/kmem.h> -#include <sys/ddi.h> -#include <sys/nsc_thread.h> -#include <sys/nsctl/nsctl.h> - -#include <sys/sdt.h> /* dtrace is S10 or later */ - -#include "sd_bcache.h" -#include "sd_hash.h" - -#if defined(_SD_DEBUG) -int _sd_hash_max_inlist = 0; -#endif - - -#define _SD_HB_LOCKS 32 -static kmutex_t _sd_hb_locks[_SD_HB_LOCKS]; - - -/* - * _sdbc_hash_load - allocate all the locks for buckets. - * - * - */ -int -_sdbc_hash_load(void) -{ - int i; - for (i = 0; i < _SD_HB_LOCKS; i++) { - mutex_init(&_sd_hb_locks[i], NULL, MUTEX_DRIVER, NULL); - } - return (0); -} - -/* - * _sdbc_hash_unload - free all the locks for buckets. - * - * - */ -void -_sdbc_hash_unload(void) -{ - int i; - for (i = 0; i < _SD_HB_LOCKS; i++) { - mutex_destroy(&_sd_hb_locks[i]); - } -} - - -/* - * _sdbc_hash_configure - create a hash table - * - * ARGUMENTS: - * num_ents - Number of entries (or hash buckets) - * htype - Type of memory to allocate. - * - * RETURNS: - * The address of the hash table just created - * or zero in the event of failure. - * - * USAGE: - * This routine rounds of the number of entries to the next higher - * power of 2. Allocate the hash buckets and initializes the locks - * and returns the hash table that is created. - * It is the caller's responsibility to save the hash_table and pass - * it as a key for future accesses to the hash. - * It is also the caller's responsibility to destroy the hash table - * when appropriate. - */ - - -_sd_hash_table_t * -_sdbc_hash_configure(int num_ents) -{ - _sd_hash_table_t *hash_table; - _sd_hash_bucket_t *bucket; - int i; - int get_high_bit(int); - - if ((hash_table = (_sd_hash_table_t *) - nsc_kmem_zalloc(sizeof (_sd_hash_table_t), - KM_SLEEP, sdbc_hash_mem)) == NULL) - return (NULL); - - hash_table->ht_bits = get_high_bit(num_ents); - hash_table->ht_size = (1 << hash_table->ht_bits); - - /* - * this is where we compute the mask used in the hash function - * the ht_nmask is basically an not of ht_mask used in hash - * function. - */ - hash_table->ht_mask = (hash_table->ht_size - 1); - hash_table->ht_nmask = (~0 & ~(hash_table->ht_mask)); - - if ((hash_table->ht_buckets = (_sd_hash_bucket_t *) - nsc_kmem_zalloc(hash_table->ht_size * - sizeof (_sd_hash_bucket_t), KM_SLEEP, - sdbc_hash_mem)) == NULL) - return (NULL); - - for (i = 0; i < (hash_table->ht_size); i++) { - bucket = (hash_table->ht_buckets + i); - - bucket->hb_lock = &_sd_hb_locks[i % _SD_HB_LOCKS]; - bucket->hb_head = bucket->hb_tail = NULL; - bucket->hb_inlist = 0; - } - - return (hash_table); -} - - -/* - * _sdbc_hash_deconfigure - deconfigure a hash table - * - * ARGUMENTS: - * hash_table - hash table that was created earlier on. - * - * RETURNS: - * None. - * - * USAGE: - * this routine deallocates memory that was allocated during the - * hash create. - */ - -void -_sdbc_hash_deconfigure(_sd_hash_table_t *hash_table) -{ - if (!hash_table) - return; - - nsc_kmem_free(hash_table->ht_buckets, - hash_table->ht_size * sizeof (_sd_hash_bucket_t)); - - nsc_kmem_free(hash_table, sizeof (_sd_hash_table_t)); -} - -static int _sd_forced_hash_miss; -static int _sd_hash_collision; - - -/* - * _sd_hash_search - search the hash table for an entry - * - * ARGUMENTS: - * cd - device that we are interested in. - * block_num - block number we are interested in. - * hash_table - hash table to search in. - * - * RETURNS: - * returns a hash header if a match was found in the hash table - * for the device & block_num. - * Else returns 0. - * - * USAGE: - * This routine is called to check if a block already exists for - * the device, block_num combination. If the block does not exist, - * then a new block needs to be allocated and inserted into the hash - * table for future references. - */ - -_sd_hash_hd_t * -_sd_hash_search(int cd, nsc_off_t block_num, _sd_hash_table_t *table) -{ - int i; - _sd_hash_bucket_t *bucket; - _sd_hash_hd_t *hptr; -#if defined(_SD_HASH_OPTIMIZE) -#define MAX_HSEARCH_RETRIES 30 - int tries = 0; - _sd_hash_hd_t *hnext; - unsigned int seq; - - i = HASH(cd, block_num, table); - bucket = (table->ht_buckets + i); -retry_search: - seq = bucket->hb_seq; - for (hptr = bucket->hb_head; hptr; hptr = hnext) { - /* - * Save pointer for next before checking the seq counter. - */ - hnext = hptr->hh_next; - /* - * enforce ordering of load of hptr->hh_next - * above and bucket->hb_seq below - */ - sd_serialize(); - if (bucket->hb_seq != seq) { - /* - * To avoid looping forever, break out if a certain - * limit is reached. Its okay to return miss - * since the insert will do a proper search. - */ - if (++tries < MAX_HSEARCH_RETRIES) goto retry_search; - else { - _sd_forced_hash_miss++; - DTRACE_PROBE1(_sd_hash_search_end, - int, _sd_forced_hash_miss); - return (NULL); - } - } - if ((hptr->hh_cd == cd) && (hptr->hh_blk_num == block_num)) - break; - if (hptr->hh_blk_num > block_num) { - DTRACE_PROBE1(_sd_hash_search_end, - _sd_hash_hd_t *, hptr); - return (NULL); - } - } - - DTRACE_PROBE1(_sd_hash_search_end, - _sd_hash_hd_t *, hptr); - return (hptr); -#else - - i = HASH(cd, block_num, table); - bucket = (table->ht_buckets + i); - - mutex_enter(bucket->hb_lock); - - for (hptr = bucket->hb_head; hptr; hptr = hptr->hh_next) { - if ((hptr->hh_cd == cd) && (hptr->hh_blk_num == block_num)) - break; - /* - * the list is ordered. If we go beyond our block, no - * point searching - */ - if (hptr->hh_blk_num > block_num) { - hptr = NULL; - break; - } - } - mutex_exit(bucket->hb_lock); - - return (hptr); -#endif -} - - -/* - * _sd_hash_insert - insert an entry into the hash table - * - * ARGUMENTS: - * cd - device that we are interested in. - * block_num - block number we are interested in. - * hptr - pointer to block that we are inserting. - * table - hash table to search in. - * - * RETURNS: - * Pointer to block that was passed in, except if the cd, block_num - * already exists in the hash. Caller must check for return - * not equal hptr. - * - * USAGE: - * this routine inserts the hptr into the appropriate hash bucket and - * sets the cd, block_num in the block for future references. - */ - -_sd_hash_hd_t * -_sd_hash_insert(int cd, - nsc_off_t block_num, - _sd_hash_hd_t *hptr, - _sd_hash_table_t *table) -{ - int i; - _sd_hash_hd_t *p; - _sd_hash_bucket_t *bucket; - - i = HASH(cd, block_num, table); - bucket = (table->ht_buckets + i); - -#if defined(_SD_DEBUG) - if (hptr->hh_hashed) { - cmn_err(CE_WARN, "_sd_err: hptr %p bucket %p already hashed", - hptr, bucket); - } -#endif - hptr->hh_cd = (ushort_t)cd; - hptr->hh_blk_num = block_num; - - mutex_enter(bucket->hb_lock); - - for (p = bucket->hb_head; (p && (p->hh_blk_num <= block_num)); - p = p->hh_next) { - if ((p->hh_cd == cd) && (p->hh_blk_num == block_num)) { - mutex_exit(bucket->hb_lock); - _sd_hash_collision++; - DTRACE_PROBE2(_sd_hash_insert_end, - _sd_hash_hd_t *, p, - int, _sd_hash_collision); - - return (p); - } - } - hptr->hh_hashed = 1; - /* - * At this point, (p) points to the next higher block number or is - * NULL. If it is NULL, we are queueing to the tail of list. - * Else, insert just before p - */ - if (p) { - hptr->hh_next = p; - if ((hptr->hh_prev = p->hh_prev) != NULL) - p->hh_prev->hh_next = hptr; - else - bucket->hb_head = hptr; - p->hh_prev = hptr; - } else { - hptr->hh_next = NULL; - hptr->hh_prev = bucket->hb_tail; - if (bucket->hb_head) - bucket->hb_tail->hh_next = hptr; - else - bucket->hb_head = hptr; - bucket->hb_tail = hptr; - } -#if defined(_SD_HASH_OPTIMIZE) - bucket->hb_seq++; -#endif -#if defined(_SD_DEBUG) - if (_sd_hash_max_inlist < (int)++(bucket->hb_inlist)) - _sd_hash_max_inlist = bucket->hb_inlist; -#endif - mutex_exit(bucket->hb_lock); - - return (hptr); -} - - - -/* - * _sd_hash_delete - delete an entry from the hash table - * - * ARGUMENTS: - * hptr - pointer to delete from hash table. - * hash_table - hash table that was created earlier on. - * - * RETURNS: - * 0 on success. -1 on errors. - * - * USAGE: - * this routine deletes a hash entry from the hash table. - */ - -int -_sd_hash_delete(_sd_hash_hd_t *hptr, _sd_hash_table_t *table) -{ - int i; - _sd_hash_bucket_t *bucket; - - if (hptr->hh_hashed == 0) { - DTRACE_PROBE(_sd_hash_delete_end1); - return (-1); - } - - i = HASH(hptr->hh_cd, hptr->hh_blk_num, table); - bucket = (table->ht_buckets + i); - - /* was FAST */ - mutex_enter(bucket->hb_lock); - if (hptr->hh_hashed == 0) { - /* was FAST */ - mutex_exit(bucket->hb_lock); - DTRACE_PROBE(_sd_hash_delete_end2); - return (-1); - } - hptr->hh_hashed = 0; -#if defined(_SD_HASH_OPTIMIZE) - /* - * Increment sequence counter on bucket. This will signal a lookup - * to redo the lookup since we might have broken the link used - * during the lookup. - */ - bucket->hb_seq++; -#endif - - if (hptr->hh_prev) - hptr->hh_prev->hh_next = hptr->hh_next; - else - bucket->hb_head = hptr->hh_next; - if (hptr->hh_next) - hptr->hh_next->hh_prev = hptr->hh_prev; - else - bucket->hb_tail = hptr->hh_prev; -#if defined(_SD_DEBUG) - bucket->hb_inlist--; -#endif - /* was FAST */ - mutex_exit(bucket->hb_lock); - - return (0); -} - -/* - * _sd_hash_replace - replace 'old' with 'new' entry. - * - * ARGUMENTS: - * old - pointer to block being deleted (to be anonymous) - * new - pointer to block inserting in place. - * table - hash table to search in. - * - * RETURNS: - * pointer to inserted block. - * - * USAGE: - * expects old & new to refer to same block. - * new must not be already hashed. - */ - -_sd_hash_hd_t * -_sd_hash_replace(_sd_hash_hd_t *old, _sd_hash_hd_t *new, - _sd_hash_table_t *table) -{ - int i; - _sd_hash_bucket_t *bucket; - - if ((old->hh_cd != new->hh_cd) || (old->hh_blk_num != new->hh_blk_num)) - cmn_err(CE_PANIC, "_sd_hash_replace: mismatch %p %p", - (void *)old, (void *)new); - if (new->hh_hashed) - cmn_err(CE_PANIC, "_sd_hash_replace: new %p already hashed", - (void *)new); - if (old->hh_hashed == 0) { - _sd_hash_hd_t *hptr; - hptr = _sd_hash_insert(new->hh_cd, new->hh_blk_num, new, table); - - DTRACE_PROBE1(_sd_hash_replace_end, - _sd_hash_hd_t *, hptr); - - return (hptr); - } - - i = HASH(old->hh_cd, old->hh_blk_num, table); - bucket = (table->ht_buckets + i); - - /* was FAST */ - mutex_enter(bucket->hb_lock); - if (old->hh_hashed == 0) { - _sd_hash_hd_t *hptr; - /* was FAST */ - mutex_exit(bucket->hb_lock); - - hptr = _sd_hash_insert(new->hh_cd, new->hh_blk_num, new, table); - - DTRACE_PROBE1(_sd_hash_replace_end, - _sd_hash_hd_t *, hptr); - return (hptr); - } - old->hh_hashed = 0; - new->hh_hashed = 1; - new->hh_prev = old->hh_prev; - new->hh_next = old->hh_next; - - if (new->hh_prev) - new->hh_prev->hh_next = new; - else - bucket->hb_head = new; - if (new->hh_next) - new->hh_next->hh_prev = new; - else - bucket->hb_tail = new; -#if defined(_SD_HASH_OPTIMIZE) - bucket->hb_seq++; -#endif - /* was FAST */ - mutex_exit(bucket->hb_lock); - - return (new); -} diff --git a/usr/src/uts/common/avs/ns/sdbc/sd_hash.h b/usr/src/uts/common/avs/ns/sdbc/sd_hash.h deleted file mode 100644 index fa57dc0f90..0000000000 --- a/usr/src/uts/common/avs/ns/sdbc/sd_hash.h +++ /dev/null @@ -1,93 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _SD_HASH_H -#define _SD_HASH_H - -#ifdef __cplusplus -extern "C" { -#endif - -#include <sys/nsctl/nsctl.h> - -#if defined(_KERNEL) || defined(_KMEMUSER) - -typedef struct _sd_hash_hd { - unsigned short hh_hashed; /* Is this block in hash */ - unsigned short hh_cd; /* The cache descriptor */ - nsc_off_t hh_blk_num; /* Cache block number */ - struct _sd_hash_hd *hh_prev; /* for chaining withing */ - struct _sd_hash_hd *hh_next; /* hash table */ -} _sd_hash_hd_t; - - -typedef struct _sd_hash_bucket { - struct _sd_hash_hd *hb_head; - struct _sd_hash_hd *hb_tail; - kmutex_t *hb_lock; - unsigned short hb_inlist; - volatile unsigned int hb_seq; -} _sd_hash_bucket_t; - - -typedef struct _sd_hash_table { - int ht_size; - int ht_bits; - int ht_mask; - int ht_nmask; - struct _sd_hash_bucket *ht_buckets; -} _sd_hash_table_t; - - -#endif /* _KERNEL && _KMEMUSER */ - - -#if defined(_KERNEL) - -#define HASH(cd, blk, table) \ - (((cd << 6) ^ ((blk) ^ ((blk) >> table->ht_bits))) \ - & (table->ht_mask)) - -#define HT_SEARCH 0 -#define HT_NOSEARCH 1 - -extern int _sdbc_hash_load(void); -extern void _sdbc_hash_unload(void); -extern _sd_hash_table_t *_sdbc_hash_configure(int num_ents); -extern void _sdbc_hash_deconfigure(_sd_hash_table_t *hash_table); -extern _sd_hash_hd_t *_sd_hash_search(int cd, nsc_off_t block_num, - _sd_hash_table_t *table); -extern _sd_hash_hd_t *_sd_hash_insert(int cd, nsc_off_t block_num, - _sd_hash_hd_t *hptr, _sd_hash_table_t *table); -extern int _sd_hash_delete(_sd_hash_hd_t *hptr, _sd_hash_table_t *table); -extern _sd_hash_hd_t *_sd_hash_replace(_sd_hash_hd_t *old, _sd_hash_hd_t *new, - _sd_hash_table_t *table); -#endif /* _KERNEL */ - -#ifdef __cplusplus -} -#endif - -#endif /* _SD_HASH_H */ diff --git a/usr/src/uts/common/avs/ns/sdbc/sd_io.c b/usr/src/uts/common/avs/ns/sdbc/sd_io.c deleted file mode 100644 index 05884467a9..0000000000 --- a/usr/src/uts/common/avs/ns/sdbc/sd_io.c +++ /dev/null @@ -1,2009 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#include <sys/types.h> -#include <sys/ksynch.h> -#include <sys/cmn_err.h> -#include <sys/kmem.h> -#include <sys/buf.h> -#include <sys/cred.h> -#include <sys/errno.h> -#include <sys/ddi.h> - -#include <sys/nsc_thread.h> -#include <sys/nsctl/nsctl.h> - -#include <sys/sdt.h> /* dtrace is S10 or later */ - -#include "sd_bcache.h" -#include "sd_trace.h" -#include "sd_io.h" -#include "sd_bio.h" -#include "sd_misc.h" -#include "sd_ft.h" -#include "sd_pcu.h" - -/* - * dynamic memory support - */ -_dm_process_vars_t dynmem_processing_dm; -static int sd_dealloc_flag_dm = NO_THREAD_DM; -static void _sd_dealloc_dm(void); -static int _sd_entry_availability_dm(_sd_cctl_t *cc_ent, int *nodata); - -extern void sdbc_requeue_dmchain(_sd_queue_t *, _sd_cctl_t *, int, int); -extern void sdbc_ins_dmqueue_front(_sd_queue_t *q, _sd_cctl_t *cc_ent); -extern void sdbc_remq_dmchain(_sd_queue_t *q, _sd_cctl_t *cc_ent); -extern void sdbc_requeue_head_dm_try(_sd_cctl_t *); -extern int sdbc_use_dmchain; -extern _sd_queue_t *sdbc_dm_queues; - -kcondvar_t _sd_flush_cv; -static volatile int _sd_flush_exit; - -/* secret flush toggle flag for testing */ -#ifdef DEBUG -int _sdbc_flush_flag = 1; /* 0 ==> noflushing, 1 ==> flush */ -#endif - -static int sdbc_flush_pageio; - - - -/* - * Forward declare all statics that are used before defined to enforce - * parameter checking - * Some (if not all) of these could be removed if the code were reordered - */ - -static void _sd_flcent_ea(blind_t xcc_ent, nsc_off_t fba_pos, - nsc_size_t fba_len, int error); -static void _sd_flclist_ea(blind_t xcc_ent, nsc_off_t fba_pos, - nsc_size_t fba_len, int error); -static void _sd_process_reflush(_sd_cctl_t *cc_ent); -static void _sd_flush_thread(void); - -int -_sdbc_flush_configure(void) -{ - _sd_flush_exit = 1; - sdbc_flush_pageio = 0; - return (nsc_create_process( - (void (*)(void *))_sd_flush_thread, 0, TRUE)); -} - - -void -_sdbc_flush_deconfigure(void) -{ - _sd_unblock(&_sd_flush_cv); - _sd_flush_exit = 0; -} - -static int -sdbc_alloc_static_cache(int reqblks) -{ - _sd_cctl_t *centry; - _sd_cctl_t *next_centry; - - if (centry = sdbc_centry_alloc_blks(_CD_NOHASH, 0, reqblks, - ALLOC_NOWAIT)) { - /* release the blocks to the queue */ - while (centry) { - next_centry = centry->cc_chain; - _sd_centry_release(centry); - centry = next_centry; - } - return (reqblks); - } - return (0); -} - -int -_sdbc_dealloc_configure_dm(void) -{ - int rc = 0; - int reqblks = MEGABYTE/BLK_SIZE(1); /* alloc in mb chunks */ - int i; - int blk_groups; /* number of ~MB groups */ - int blks_remaining; - int blks_allocd = 0; - - dynmem_processing_dm.alloc_ct = 0; - dynmem_processing_dm.dealloc_ct = 0; - - if (sdbc_static_cache) { /* alloc all static cache memory here */ - dynmem_processing_dm.max_dyn_list = reqblks; - - blk_groups = CBLOCKS / reqblks; - blks_remaining = CBLOCKS % reqblks; - - for (i = 0; i < blk_groups; ++i) { - if (!sdbc_alloc_static_cache(reqblks)) - break; - blks_allocd += reqblks; - } - DTRACE_PROBE2(_sdbc_dealloc_configure_dm1, - int, i, int, blks_allocd); - - /* if successful then allocate any remaining blocks */ - if ((i == blk_groups) && blks_remaining) - if (sdbc_alloc_static_cache(blks_remaining)) - blks_allocd += blks_remaining; - - DTRACE_PROBE2(_sdbc_dealloc_configure_dm2, - int, i, int, blks_allocd); - - sd_dealloc_flag_dm = NO_THREAD_DM; - - if (blks_allocd < CBLOCKS) { - cmn_err(CE_WARN, "!Failed to allocate sdbc cache " - "memory.\n requested mem: %d MB; actual mem: %d MB", - CBLOCKS/reqblks, blks_allocd/reqblks); - rc = ENOMEM; - } - - -#ifdef DEBUG - cmn_err(CE_NOTE, "!sdbc(_sdbc_dealloc_configure_dm) %d bytes " - "(%d cache blocks) allocated for static cache, " - "block size %d", blks_allocd * BLK_SIZE(1), blks_allocd, - BLK_SIZE(1)); -#endif /* DEBUG */ - } else { - sd_dealloc_flag_dm = PROCESS_CACHE_DM; - rc = nsc_create_process((void (*)(void *))_sd_dealloc_dm, 0, - TRUE); - if (rc != 0) - sd_dealloc_flag_dm = NO_THREAD_DM; - } - return (rc); -} - -/* - * sdbc_dealloc_dm_shutdown - deallocate cache memory. - * - * ARGUMENTS: none - * - * RETURNS: nothing - * - * USAGE: - * this function is intended for use after all i/o has stopped and all - * other cache threads have terminated. write cache resources, if any - * are released, except in the case of pinned data. - */ -static void -sdbc_dealloc_dm_shutdown() -{ - _sd_cctl_t *cc_ent; - ss_centry_info_t *wctl; - - cc_ent = _sd_cctl[0]; - - if (!cc_ent) - return; - - do { - if (cc_ent->cc_alloc_size_dm) { - /* HOST or OTHER */ - - if (cc_ent->cc_data) - kmem_free(cc_ent->cc_data, - cc_ent->cc_alloc_size_dm); - - cc_ent->cc_alloc_size_dm = 0; - - dynmem_processing_dm.dealloc_ct++; - - DTRACE_PROBE2(sdbc_dealloc_dm_shutdown, char *, - cc_ent->cc_data, int, cc_ent->cc_alloc_size_dm); - } - - /* release safestore resource, if any. preserve pinned data */ - if (!(CENTRY_DIRTY(cc_ent)) && (wctl = cc_ent->cc_write)) { - wctl->sc_flag = 0; - wctl->sc_dirty = 0; - - SSOP_SETCENTRY(sdbc_safestore, wctl); - SSOP_DEALLOCRESOURCE(sdbc_safestore, wctl->sc_res); - } - cc_ent = cc_ent->cc_link_list_dm; - } while (cc_ent != _sd_cctl[0]); -} - -void -_sdbc_dealloc_deconfigure_dm(void) -{ - int one_sec; - - if (sdbc_static_cache) { - sdbc_dealloc_dm_shutdown(); - return; - } - - if (sd_dealloc_flag_dm == NO_THREAD_DM) - return; /* thread never started */ - one_sec = HZ; /* drv_usectohz(1000000); */ - - mutex_enter(&dynmem_processing_dm.thread_dm_lock); - sd_dealloc_flag_dm = CACHE_SHUTDOWN_DM; - cv_broadcast(&dynmem_processing_dm.thread_dm_cv); - mutex_exit(&dynmem_processing_dm.thread_dm_lock); - - while (sd_dealloc_flag_dm != CACHE_THREAD_TERMINATED_DM) - delay(one_sec); - - sd_dealloc_flag_dm = NO_THREAD_DM; -} - -/* - * This complicated - possibly overly complicated routine works as follows: - * In general the routine sleeps a specified amount of time then wakes and - * examines the entire centry list. If an entry is avail. it ages it by one - * tick else it clears the aging flag completely. It then determines if the - * centry has aged sufficiently to have its memory deallocated and for it to - * be placed at the top of the lru. - * - * There are two deallocation schemes in place depending on whether the - * centry is a standalone entry or it is a member of a host/parasite chain. - * - * The behavior for a standalone entry is as follows: - * If the given centry is selected it will age normally however at full - * aging it will only be placed at the head of the lru. It's memory will - * not be deallocated until a further aging level has been reached. The - * entries selected for this behavior are goverend by counting the number - * of these holdovers in existence on each wakeup and and comparing it - * to a specified percentage. This comparision is always one cycle out of - * date and will float in the relative vicinity of the specified number. - * - * The behavior for a host/parasite chain is as follows: - * The chain is examined. If all entries are fully aged the entire chain - * is removed - ie mem is dealloc. from the host entry and all memory ref. - * removed from the parasitic entries and each entry requeued on to the lru. - * - * There are three delay timeouts and two percentage levels specified. Timeout - * level 1 is honored between 100% free and pcnt level 1. Timeout level 2 is - * honored between pcnt level 1 and pcnt level 2, Timeout level 3 is - * honored between pcnt level 2 and 0% free. In addition there exist an - * accelerated - * aging flag which mimics hysterisis behavior. If the available centrys fall - * between pcnt1 and pcnt2 an 8 bit counter is switched on. The effect is to - * keep the timer value at timer level 2 for 8 cycles even if the number - * available cache entries drifts above pcnt1. If it falls below pcnt2 an - * additional 8 bit counter is switched on. This causes the sleep timer to - * remain at timer level 3 for at least 8 cycles even if it floats above - * pcnt2 or even pcnt1. The effect of all this is to accelerate the release - * of system resources under a heavy load. - * - * All of the footwork can be stubbed out by a judicious selection of values - * for the times, aging counts and pcnts. - * - * All of these behavior parameters are adjustable on the fly via the kstat - * mechanism. In addition there is a thread wakeup msg available through the - * same mechanism. - */ - -static void -_sd_dealloc_dm(void) -{ - int one_sec_tics, tic_delay; - int sleep_tics_lvl1, sleep_tics_lvl2, sleep_tics_lvl3; - int transition_lvl1, transition_lvl2; - int host_cache_aging_ct, meta_cache_aging_ct, hold_cache_aging_ct; - int max_holds_ct; - int cache_aging_ct, hold_candidate, last_holds_ct; - _sd_cctl_t *cc_ent, *next_ccentry, *cur_ent, *nxt_ent; - ss_centry_info_t *wctl; - int current_breakout_count, number_cache_entries; - int dealloc; - _dm_process_vars_t *ppvars; - - int write_dealloc; /* remove after debugging */ - - ppvars = &dynmem_processing_dm; - - /* setup a one sec time var */ - one_sec_tics = HZ; /* drv_usectohz(1000000); */ - - ppvars->history = 0; - - cc_ent = _sd_cctl[0]; - - number_cache_entries = _sd_net_config.sn_cpages; - - last_holds_ct = 0; - - /*CONSTANTCONDITION*/ - while (1) { - if (sd_dealloc_flag_dm == CACHE_SHUTDOWN_DM) { - /* finished. shutdown - get out */ - sdbc_dealloc_dm_shutdown(); /* free all memory */ - sd_dealloc_flag_dm = CACHE_THREAD_TERMINATED_DM; - return; - } - - /* has the world changed */ - - /* - * get num cctl entries (%) below which different sleep - * rates kick in - */ - transition_lvl1 = - (ppvars->cache_aging_pcnt1*number_cache_entries) / 100; - transition_lvl2 = - (ppvars->cache_aging_pcnt2*number_cache_entries) / 100; - - /* get sleep rates for each level */ - sleep_tics_lvl1 = ppvars->cache_aging_sec1 * one_sec_tics; - sleep_tics_lvl2 = ppvars->cache_aging_sec2 * one_sec_tics; - sleep_tics_lvl3 = ppvars->cache_aging_sec3 * one_sec_tics; - - /* get num of cycles for full normal aging */ - host_cache_aging_ct = ppvars->cache_aging_ct1; - - /* get num of cycles for full meta aging */ - meta_cache_aging_ct = ppvars->cache_aging_ct2; - - /* get num of cycles for full extended holdover aging */ - hold_cache_aging_ct = ppvars->cache_aging_ct3; - - /* get maximum holds count in % */ - max_holds_ct = (ppvars->max_holds_pcnt*number_cache_entries) - / 100; - - /* apply the delay */ - tic_delay = sleep_tics_lvl1; - if (sd_dealloc_flag_dm == TIME_DELAY_LVL1) - tic_delay = sleep_tics_lvl2; - else - if (sd_dealloc_flag_dm == TIME_DELAY_LVL2) - tic_delay = sleep_tics_lvl3; - - mutex_enter(&ppvars->thread_dm_lock); - (void) cv_reltimedwait(&ppvars->thread_dm_cv, - &ppvars->thread_dm_lock, tic_delay, TR_CLOCK_TICK); - mutex_exit(&ppvars->thread_dm_lock); - - /* check for special directives on wakeup */ - if (ppvars->process_directive & - MAX_OUT_ACCEL_HIST_FLAG_DM) { - ppvars->process_directive &= - ~MAX_OUT_ACCEL_HIST_FLAG_DM; - ppvars->history = - (HISTORY_LVL1|HISTORY_LVL2); - } - - /* Start of deallocation loop */ - current_breakout_count = 0; - - ppvars->nodatas = 0; - write_dealloc = 0; - ppvars->deallocs = 0; - ppvars->candidates = 0; - ppvars->hosts = 0; - ppvars->pests = 0; - ppvars->metas = 0; - ppvars->holds = 0; - ppvars->others = 0; - ppvars->notavail = 0; - - while (sd_dealloc_flag_dm != CACHE_SHUTDOWN_DM && - current_breakout_count < number_cache_entries) { - - next_ccentry = cc_ent->cc_link_list_dm; - - if (_sd_entry_availability_dm(cc_ent, &ppvars->nodatas) - == FALSE) { - ppvars->notavail++; - goto next_dealloc_entry; - } - - cache_aging_ct = host_cache_aging_ct; - hold_candidate = FALSE; - if (cc_ent->cc_aging_dm & HOST_ENTRY_DM) - ppvars->hosts++; - else - if (cc_ent->cc_aging_dm & PARASITIC_ENTRY_DM) - ppvars->pests++; - else - if (cc_ent->cc_aging_dm & STICKY_METADATA_DM) { - cache_aging_ct = meta_cache_aging_ct; - ppvars->metas++; - } else { - if (last_holds_ct < max_holds_ct) - hold_candidate = TRUE; - ppvars->others++; - } - - ppvars->candidates++; - - if ((cc_ent->cc_aging_dm & FINAL_AGING_DM) < - cache_aging_ct) { - cc_ent->cc_aging_dm += FIRST_AGING_DM; - CLEAR_CENTRY_PAGEIO(cc_ent); - CLEAR_CENTRY_INUSE(cc_ent); - goto next_dealloc_entry; - } - - /* bonafide aged entry - examine its chain */ - dealloc = TRUE; - cur_ent = cc_ent->cc_head_dm; - while (cur_ent) { - if (cur_ent == cc_ent) - cur_ent->cc_aging_dm |= AVAIL_ENTRY_DM; - else { - if (_sd_entry_availability_dm(cur_ent, - 0) == TRUE) { - cur_ent->cc_aging_dm |= - AVAIL_ENTRY_DM; - if ((cur_ent->cc_aging_dm & - FINAL_AGING_DM) < - cache_aging_ct) - dealloc = FALSE; - } else - dealloc = FALSE; - } - - cur_ent = cur_ent->cc_next_dm; - } - cur_ent = cc_ent->cc_head_dm; - - /* chain not fully free - free inuse for all entries */ - if (dealloc == FALSE) { - while (cur_ent) { - nxt_ent = cur_ent->cc_next_dm; - - if (cur_ent->cc_aging_dm & - AVAIL_ENTRY_DM) { - cur_ent->cc_aging_dm &= - ~AVAIL_ENTRY_DM; - CLEAR_CENTRY_PAGEIO(cur_ent); - CLEAR_CENTRY_INUSE(cur_ent); - } - cur_ent = nxt_ent; - } - } else { /* OK - free memory */ - if (hold_candidate == TRUE && - (cur_ent->cc_aging_dm & FINAL_AGING_DM) < - hold_cache_aging_ct) { - ppvars->holds++; - - ASSERT(cur_ent == cc_ent); - - cc_ent->cc_aging_dm += FIRST_AGING_DM; - - cur_ent->cc_aging_dm &= ~AVAIL_ENTRY_DM; - - wctl = cur_ent->cc_write; - - CLEAR_CENTRY_PAGEIO(cur_ent); - CLEAR_CENTRY_INUSE(cur_ent); - - if (wctl) { - write_dealloc++; - wctl->sc_flag = 0; - wctl->sc_dirty = 0; - SSOP_SETCENTRY(sdbc_safestore, - wctl); - SSOP_DEALLOCRESOURCE( - sdbc_safestore, - wctl->sc_res); - } - goto next_dealloc_entry; - } /* if (hold_candidate == TRUE */ - - while (cur_ent) { - - DTRACE_PROBE4(_sd_dealloc_dm, - _sd_cctl_t *, cur_ent, - int, CENTRY_CD(cur_ent), - int, CENTRY_BLK(cur_ent), - uint_t, cur_ent->cc_aging_dm); - - if ((cur_ent->cc_aging_dm - & BAD_CHAIN_DM)) { - (void) _sd_hash_delete( - (_sd_hash_hd_t *)cur_ent, - _sd_htable); - - nxt_ent = cur_ent->cc_next_dm; - CLEAR_CENTRY_PAGEIO(cur_ent); - CLEAR_CENTRY_INUSE(cur_ent); - cur_ent = nxt_ent; - continue; - } - - ppvars->deallocs++; - - if (cur_ent->cc_alloc_size_dm) { - int qidx; - _sd_queue_t *q; - - /* HOST or OTHER */ - - /* debugging */ - ppvars->dealloc_ct++; - cur_ent->cc_dealloc_ct_dm++; - kmem_free(cur_ent->cc_data, - cur_ent->cc_alloc_size_dm); - - /* - * remove from queue - * in preparation for putting - * on the 0 queue after - * memory is freed - */ - if (sdbc_use_dmchain) { - - qidx = - cur_ent->cc_cblocks; - q = &sdbc_dm_queues - [qidx]; - - sdbc_remq_dmchain(q, - cur_ent); - } - } - - wctl = cur_ent->cc_write; - cur_ent->cc_write = 0; - cur_ent->cc_data = 0; - cur_ent->cc_alloc_size_dm = 0; - cur_ent->cc_head_dm = NULL; - cur_ent->cc_aging_dm &= - ~(FINAL_AGING_DM | ENTRY_FIELD_DM | - CATAGORY_ENTRY_DM | AVAIL_ENTRY_DM | - PREFETCH_BUF_I | PREFETCH_BUF_E); - - (void) _sd_hash_delete( - (_sd_hash_hd_t *)cur_ent, - _sd_htable); - cur_ent->cc_valid = 0; - - if (sdbc_use_dmchain) { - _sd_queue_t *q; - - nxt_ent = cur_ent->cc_next_dm; - - cur_ent->cc_next_dm = NULL; - - CLEAR_CENTRY_PAGEIO(cur_ent); - CLEAR_CENTRY_INUSE(cur_ent); - - q = &sdbc_dm_queues[0]; - sdbc_ins_dmqueue_front(q, - cur_ent); - } else { - _sd_requeue_head(cur_ent); - - nxt_ent = cur_ent->cc_next_dm; - cur_ent->cc_next_dm = NULL; - - CLEAR_CENTRY_PAGEIO(cur_ent); - CLEAR_CENTRY_INUSE(cur_ent); - } - - cur_ent = nxt_ent; - - if (wctl) { - write_dealloc++; - wctl->sc_flag = 0; - wctl->sc_dirty = 0; - SSOP_SETCENTRY(sdbc_safestore, - wctl); - SSOP_DEALLOCRESOURCE( - sdbc_safestore, - wctl->sc_res); - } - } /* while (cur_ent) */ - } /* else OK - free memory */ -next_dealloc_entry: - current_breakout_count++; - - cc_ent = next_ccentry; - } /* while (entries) */ - - if (ppvars->monitor_dynmem_process & RPT_DEALLOC_STATS1_DM) { - cmn_err(CE_NOTE, - "!notavl=%x, nodat=%x, cand=%x, hosts=%x," - " pests=%x, metas=%x, holds=%x, others=%x," - " deallo=%x", - ppvars->notavail, ppvars->nodatas, - ppvars->candidates, ppvars->hosts, ppvars->pests, - ppvars->metas, ppvars->holds, ppvars->others, - ppvars->deallocs); - } - - if (ppvars->monitor_dynmem_process & RPT_DEALLOC_STATS2_DM) { - cmn_err(CE_NOTE, - "!hist=%x, gross a/d=%x %x", ppvars->history, - ppvars->alloc_ct, ppvars->dealloc_ct); - } - - if (sd_dealloc_flag_dm == CACHE_SHUTDOWN_DM) - continue; - - last_holds_ct = ppvars->holds; - - /* set the history flag which will govern the sleep rate */ - if (ppvars->nodatas > transition_lvl1) { - /* upper - lots of virgin cctls */ - if (ppvars->history) - ppvars->history >>= 1; - } else { - if (ppvars->nodatas > transition_lvl2) { - /* middle - not so many virgin cctls */ - if (ppvars->history & (HISTORY_LVL1-1)) - ppvars->history >>= 1; - else - ppvars->history = HISTORY_LVL1; - - } else { - /* - * appear to be running low - accelerate the - * aging to free more - */ - if (ppvars->history & HISTORY_LVL2) - ppvars->history >>= 1; - else - ppvars->history = - (HISTORY_LVL1|HISTORY_LVL2); - } - } - - sd_dealloc_flag_dm = TIME_DELAY_LVL0; - if (ppvars->history & HISTORY_LVL2) - sd_dealloc_flag_dm = TIME_DELAY_LVL2; - else - if (ppvars->history & HISTORY_LVL1) - sd_dealloc_flag_dm = TIME_DELAY_LVL1; - - } /* while (TRUE) */ -} - -int -_sd_entry_availability_dm(_sd_cctl_t *cc_ent, int *nodata) -{ - /* - * if using dmchaining return immediately and do not attempt - * to acquire the cc_ent if there is no memory associated with - * this cc_ent. - * this avoids conflicts for centrys on the 0 queue. - * see sdbc_get_dmchain() - */ - - if ((sdbc_use_dmchain) && (cc_ent->cc_data == 0)) { - - if (nodata) - (*nodata)++; - - DTRACE_PROBE(sdbc_availability_dm_end1); - return (FALSE); - } - - if ((SET_CENTRY_INUSE(cc_ent))) { - - DTRACE_PROBE(sdbc_availability_dm_end2); - - return (FALSE); - } - - - if ((SET_CENTRY_PAGEIO(cc_ent))) { - - CLEAR_CENTRY_INUSE(cc_ent); - - DTRACE_PROBE(sdbc_availability_dm_end3); - - return (FALSE); - } - - /* - * we allow the QHEAD flag as it does not affect the availabilty - * of memory for aging - */ - if ((CENTRY_DIRTY(cc_ent)) || (CENTRY_IO_INPROGRESS(cc_ent)) || - (cc_ent->cc_flag & ~(CC_QHEAD)) || - cc_ent->cc_dirty_next || cc_ent->cc_dirty_link || - cc_ent->cc_data == 0) { - - cc_ent->cc_aging_dm &= ~FINAL_AGING_DM; - if (nodata) - if (cc_ent->cc_data == 0) { - (*nodata)++; - } - - CLEAR_CENTRY_PAGEIO(cc_ent); - CLEAR_CENTRY_INUSE(cc_ent); - - DTRACE_PROBE(sdbc_availability_dm_end4); - - return (FALSE); - } - - return (TRUE); -} - -/* - * function below to prohibit code movement by compiler - * and avoid using spinlocks for syncronization - */ -static void -_sd_cc_iostatus_initiate(_sd_cctl_t *cc_ent) -{ - cc_ent->cc_iostatus = _SD_IO_INITIATE; - sd_serialize(); -} - -/* - * Yet another switch! - * alloc mem and coalesce if at least this number of frags - */ -static int sdbc_coalesce_backend = 1; - -/* - * optimization for _sd_async_flclist() - * called only if not doing pageio and sdbc_coalesce_backend > 0 - * - * returns with pagio bit set in the centrys in list - */ -static unsigned char * -sdbc_alloc_io_mem(_sd_cctl_t *cc_ent, int first_dirty, int last_dirty) -{ - unsigned char *prev_addr = NULL; - _sd_cctl_t *cc_ent_orig = cc_ent; - int fba_len; - int total_len_bytes = 0; - unsigned char *start_addr = NULL; /* function return value */ - unsigned char *next_addr; - int num_frags = 0; - - if (first_dirty && (!_SD_BMAP_ISFULL(first_dirty))) { - WAIT_CENTRY_PAGEIO(cc_ent, sdbc_flush_pageio); - - fba_len = SDBC_LOOKUP_LEN(first_dirty); - total_len_bytes += FBA_SIZE(fba_len); - - prev_addr = cc_ent->cc_data; - cc_ent = cc_ent->cc_dirty_next; - } - - while (cc_ent) { - - WAIT_CENTRY_PAGEIO(cc_ent, sdbc_flush_pageio); - /* check for contiguity */ - if (prev_addr && - !((prev_addr + CACHE_BLOCK_SIZE) == cc_ent->cc_data)) - ++num_frags; - - /* compute length */ - if (FULLY_DIRTY(cc_ent)) { - total_len_bytes += CACHE_BLOCK_SIZE; - } else { - fba_len = SDBC_LOOKUP_LEN(last_dirty); - total_len_bytes += FBA_SIZE(fba_len); - } - - prev_addr = cc_ent->cc_data; - cc_ent = cc_ent->cc_dirty_next; - } - - if (num_frags >= sdbc_coalesce_backend) { - /* - * TODO - determine metric for deciding - * whether to coalesce memory or do separate i/o's - */ - - DTRACE_PROBE(sdbc_io_mem_kmem_start); - - if (start_addr = kmem_alloc(total_len_bytes, KM_NOSLEEP)) { - int sblk, offset; - - cc_ent = cc_ent_orig; - - cc_ent->cc_anon_addr.sa_virt = start_addr; - cc_ent->cc_anon_len = total_len_bytes; - - next_addr = start_addr; - - DTRACE_PROBE2(sdbc_io_mem_bcopy_start, - int, num_frags, int, total_len_bytes); - - /* copy the first dirty piece */ - if (first_dirty && (!_SD_BMAP_ISFULL(first_dirty))) { - - fba_len = SDBC_LOOKUP_LEN(first_dirty); - sblk = SDBC_LOOKUP_STPOS(first_dirty); - offset = FBA_SIZE(sblk); - - bcopy(cc_ent->cc_data + offset, next_addr, - FBA_SIZE(fba_len)); - cc_ent = cc_ent->cc_dirty_next; - next_addr += FBA_SIZE(fba_len); - } - - /* copy the rest of data */ - while (cc_ent) { - if (FULLY_DIRTY(cc_ent)) { - bcopy(cc_ent->cc_data, next_addr, - CACHE_BLOCK_SIZE); - next_addr += CACHE_BLOCK_SIZE; - } else { - fba_len = SDBC_LOOKUP_LEN(last_dirty); - bcopy(cc_ent->cc_data, next_addr, - FBA_SIZE(fba_len)); - next_addr += FBA_SIZE(fba_len); - } - - cc_ent = cc_ent->cc_dirty_next; - } - - DTRACE_PROBE(sdbc_io_mem_bcopy_end); - } - - DTRACE_PROBE(sdbc_io_mem_kmem_end); - } - - return (start_addr); -} - -void -_sd_async_flclist(_sd_cctl_t *cclist, dev_t rdev) -{ - int flushed, i, cd; - uint_t first_dirty, last_dirty; - _sd_cctl_t *cc_ent, *cc_prev = NULL; - struct buf *bp; - int dblk, fba_len; - int len; - int toflush; - int coalesce; /* convenience boolean */ - unsigned char *anon_mem = NULL; - extern int sdbc_do_page; - - - SDTRACE(ST_ENTER|SDF_FLCLIST, CENTRY_CD(cclist), - 0, BLK_TO_FBA_NUM(CENTRY_BLK(cclist)), 0, 0); - - coalesce = (!sdbc_do_page && sdbc_coalesce_backend); - - cc_ent = cclist; - _sd_cc_iostatus_initiate(cc_ent); - first_dirty = CENTRY_DIRTY(cc_ent); - if (SDBC_IS_FRAGMENTED(first_dirty)) { - cclist = cc_ent->cc_dirty_next; - cc_ent->cc_dirty_next = NULL; - _sd_async_flcent(cc_ent, rdev); - cc_ent = cclist; - first_dirty = 0; - } - - toflush = 0; - while (cc_ent->cc_dirty_next) { - if (cc_ent->cc_iocount) - SDALERT(SDF_FLCLIST, CENTRY_CD(cc_ent), 0, - BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)), - cc_ent->cc_iocount, 0); - cc_prev = cc_ent; - cc_ent = cc_ent->cc_dirty_next; - toflush++; - } - _sd_cc_iostatus_initiate(cc_ent); - last_dirty = CENTRY_DIRTY(cc_ent); - if (SDBC_IS_FRAGMENTED(last_dirty)) { - if (cc_prev) - cc_prev->cc_dirty_next = NULL; - _sd_async_flcent(cc_ent, rdev); - last_dirty = 0; - } - else - toflush++; - - if (toflush == 0) - return; - - - dblk = BLK_TO_FBA_NUM(CENTRY_BLK(cclist)); - if (first_dirty && (!_SD_BMAP_ISFULL(first_dirty))) - dblk += SDBC_LOOKUP_STPOS(first_dirty); - - cd = CENTRY_CD(cclist); - bp = sd_alloc_iob(rdev, dblk, toflush, B_WRITE); - cc_ent = cclist; - - if (coalesce && (anon_mem = sdbc_alloc_io_mem(cc_ent, first_dirty, - last_dirty))) - sd_add_fba(bp, &cc_ent->cc_anon_addr, 0, - FBA_NUM(cc_ent->cc_anon_len)); - - if (first_dirty && (!_SD_BMAP_ISFULL(first_dirty))) { - cc_ent->cc_iocount = flushed = 1; - - /* pageio bit already set in sdbc_alloc_io_mem() above */ - if (!coalesce) - WAIT_CENTRY_PAGEIO(cc_ent, sdbc_flush_pageio); - - fba_len = SDBC_LOOKUP_LEN(first_dirty); - - /* build buffer only if it was not done above */ - if (!anon_mem) { - i = SDBC_LOOKUP_STPOS(first_dirty); - sd_add_fba(bp, &cc_ent->cc_addr, i, fba_len); - DATA_LOG(SDF_FLSHLIST, cc_ent, i, fba_len); - - DTRACE_PROBE4(_sd_async_flclist_data1, int, - BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)) + i, - int, fba_len, char *, - *(int64_t *)(cc_ent->cc_data + FBA_SIZE(i)), - char *, *(int64_t *)(cc_ent->cc_data + - FBA_SIZE(i + fba_len) - 8)); - } - - len = FBA_SIZE(fba_len); - cc_ent = cc_ent->cc_dirty_next; - } else { - len = 0; - flushed = 0; - } - while (cc_ent) { - _sd_cc_iostatus_initiate(cc_ent); - - /* pageio bit already set in sdbc_alloc_io_mem() above */ - if (!coalesce) - WAIT_CENTRY_PAGEIO(cc_ent, sdbc_flush_pageio); - - if (FULLY_DIRTY(cc_ent)) { - flushed++; - cc_ent->cc_iocount = 1; - - /* build buffer only if it was not done above */ - if (!anon_mem) { - sd_add_fba(bp, &cc_ent->cc_addr, 0, BLK_FBAS); - DATA_LOG(SDF_FLSHLIST, cc_ent, 0, BLK_FBAS); - - DTRACE_PROBE4(_sd_async_flclist_data2, - int, BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)), - int, BLK_FBAS, char *, - *(int64_t *)(cc_ent->cc_data), - char *, *(int64_t *)(cc_ent->cc_data + - FBA_SIZE(BLK_FBAS) - 8)); - } - - len += CACHE_BLOCK_SIZE; - } else { -#if defined(_SD_DEBUG) - /* - * consistency check. - */ - if (!last_dirty || cc_ent->cc_dirty_next || - SDBC_IS_FRAGMENTED(last_dirty)) { - SDALERT(SDF_FLCLIST, cd, 0, - BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)), - cc_ent->cc_dirty_next, last_dirty); - cmn_err(CE_WARN, - "!_sd_err: flclist: last_dirty %x next %x", - last_dirty, cc_ent->cc_dirty_next); - } -#endif - flushed++; - cc_ent->cc_iocount = 1; - - fba_len = SDBC_LOOKUP_LEN(last_dirty); - - /* build buffer only if it was not done above */ - if (!anon_mem) { - sd_add_fba(bp, &cc_ent->cc_addr, 0, fba_len); - DATA_LOG(SDF_FLSHLIST, cc_ent, 0, fba_len); - - DTRACE_PROBE4(_sd_async_flclist_data3, int, - BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)), - int, fba_len, char *, - *(int64_t *)(cc_ent->cc_data), char *, - *(int64_t *)(cc_ent->cc_data + - FBA_SIZE(fba_len) - 8)); - } - - len += FBA_SIZE(fba_len); - } - cc_ent = cc_ent->cc_dirty_next; - } - -#ifdef DEBUG - if (anon_mem) - ASSERT(len == cclist->cc_anon_len); -#endif - - /* SDTRACE(ST_INFO|SDF_FLCLIST, cd, FBA_NUM(len), dblk, flushed, bp); */ - (void) sd_start_io(bp, _sd_cache_files[cd].cd_strategy, - _sd_flclist_ea, cclist); - - DISK_FBA_WRITE(cd, FBA_NUM(len)); - /* increment number of bytes destaged to disk */ - WRITE_DESTAGED(cd, FBA_NUM(len)); - - _sd_enqueue_io_pending(cd, cclist); - - SDTRACE(ST_EXIT|SDF_FLCLIST, cd, FBA_NUM(len), dblk, flushed, 0); -} - - -void -_sd_enqueue_io_pending(int cd, _sd_cctl_t *cclist) -{ - _sd_cd_info_t *cdi; - - cdi = &(_sd_cache_files[cd]); - if (cdi->cd_io_head == NULL) - cdi->cd_io_head = cdi->cd_io_tail = cclist; - else { - cdi->cd_io_tail->cc_dirty_link = cclist; - cdi->cd_io_tail = cclist; - } -} - - - -void -_sd_async_flcent(_sd_cctl_t *cc_ent, dev_t rdev) -{ - int dblk, len, sblk; - int dirty; - struct buf *bp; - int cd; - - cd = CENTRY_CD(cc_ent); - - SDTRACE(ST_ENTER|SDF_FLCENT, cd, 0, - BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)), 0, 0); -#if defined(_SD_DEBUG_PATTERN) - check_write_consistency(cc_ent); -#endif - if (cc_ent->cc_iocount) - SDALERT(SDF_FLCENT, cd, 0, BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)), - cc_ent->cc_iocount, 0); - _sd_cc_iostatus_initiate(cc_ent); - WAIT_CENTRY_PAGEIO(cc_ent, sdbc_flush_pageio); - - dirty = CENTRY_DIRTY(cc_ent); - - if (_SD_BMAP_ISFULL(dirty)) { - cc_ent->cc_iocount = 1; - dblk = BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)); - bp = sd_alloc_iob(rdev, dblk, 1, B_WRITE); - sd_add_fba(bp, &cc_ent->cc_addr, 0, BLK_FBAS); - DATA_LOG(SDF_FLSHENT, cc_ent, 0, BLK_FBAS); - - DTRACE_PROBE4(_sd_async_flcent_data1, - int, BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)), - int, BLK_FBAS, char *, *(int64_t *)(cc_ent->cc_data), - char *, *(int64_t *)(cc_ent->cc_data + - FBA_SIZE(BLK_FBAS) - 8)); - cc_ent->cc_iocount = 1; - (void) sd_start_io(bp, _sd_cache_files[cd].cd_strategy, - _sd_flcent_ea, cc_ent); - DISK_FBA_WRITE(cd, BLK_FBAS); - /* increment number of bytes destaged to disk */ - WRITE_DESTAGED(cd, BLK_FBAS); - } else { - cc_ent->cc_iocount = SDBC_LOOKUP_DTCOUNT(dirty); - - while (dirty) { - sblk = SDBC_LOOKUP_STPOS(dirty); - len = SDBC_LOOKUP_LEN(dirty); - SDBC_LOOKUP_MODIFY(dirty); - - dblk = BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)) + sblk; - bp = sd_alloc_iob(rdev, dblk, 1, B_WRITE); - sd_add_fba(bp, &cc_ent->cc_addr, sblk, len); - DATA_LOG(SDF_FLSHENT, cc_ent, sblk, len); - - DTRACE_PROBE4(_sd_async_flcent_data2, int, - BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)) + sblk, - int, len, char *, - *(int64_t *)(cc_ent->cc_data + FBA_SIZE(sblk)), - char *, *(int64_t *)(cc_ent->cc_data + - FBA_SIZE(sblk + len) - 8)); - - /* SDTRACE(ST_INFO|SDF_FLCENT, cd, len, dblk, 0, bp); */ - - (void) sd_start_io(bp, _sd_cache_files[cd].cd_strategy, - _sd_flcent_ea, cc_ent); - DISK_FBA_WRITE(cd, len); - /* increment number of bytes destaged to disk */ - WRITE_DESTAGED(cd, len); - } - } - _sd_enqueue_io_pending(cd, cc_ent); - - SDTRACE(ST_EXIT|SDF_FLCENT, cd, 0, dblk, 0, 0); -} - -static void -_sd_process_pending(int cd) -{ - _sd_cd_info_t *cdi; - _sd_cctl_t *cc_ent, *cc_next; - int dirty_enq; - ss_centry_info_t *wctl; - _sd_cctl_t *dirty_hd, **dirty_nxt; - int sts, processed = 0; - - cdi = &(_sd_cache_files[cd]); - - SDTRACE(ST_ENTER|SDF_FLDONE, cd, 0, - SDT_INV_BL, cdi->cd_info->sh_numio, 0); -process_loop: - if (cdi->cd_io_head == NULL) { - if (processed) { - mutex_enter(&cdi->cd_lock); - cdi->cd_info->sh_numio -= processed; - mutex_exit(&cdi->cd_lock); - } - SDTRACE(ST_EXIT|SDF_FLDONE, cd, 0, - SDT_INV_BL, cdi->cd_info->sh_numio, processed); - return; - } - cc_ent = cdi->cd_io_head; - if ((sts = cc_ent->cc_iostatus) == _SD_IO_INITIATE) { - if (processed) { - mutex_enter(&cdi->cd_lock); - cdi->cd_info->sh_numio -= processed; - mutex_exit(&cdi->cd_lock); - } - SDTRACE(ST_EXIT|SDF_FLDONE, cd, 0, - SDT_INV_BL, cdi->cd_info->sh_numio, processed); - return; - } - LINTUSED(sts); -#if defined(_SD_DEBUG) - if ((sts != _SD_IO_DONE) && (sts != _SD_IO_FAILED)) - SDALERT(SDF_FLDONE, cd, 0, - BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)), 0, sts); -#endif - - if ((cdi->cd_io_head = cc_ent->cc_dirty_link) == NULL) - cdi->cd_io_tail = NULL; - - cc_ent->cc_dirty_link = NULL; - if (cc_ent->cc_iostatus == _SD_IO_FAILED && - _sd_process_failure(cc_ent)) - goto process_loop; - - dirty_enq = 0; - dirty_nxt = &(dirty_hd); - - DTRACE_PROBE1(_sd_process_pending_cd, int, cd); - - for (; cc_ent; cc_ent = cc_next) { - - DTRACE_PROBE1(_sd_process_pending_cc_ent, - _sd_cctl_t *, cc_ent); - processed++; - cc_next = cc_ent->cc_dirty_next; - cc_ent->cc_dirty_next = NULL; - - if (CENTRY_PINNED(cc_ent)) - _sd_process_reflush(cc_ent); - - /* - * Optimize for common case where block not inuse - * Grabbing cc_inuse is faster than cc_lock. - */ - if (SET_CENTRY_INUSE(cc_ent)) - goto must_lock; - - cc_ent->cc_iostatus = _SD_IO_NONE; - if (CENTRY_DIRTY_PENDING(cc_ent)) { - cc_ent->cc_flag &= ~CC_PEND_DIRTY; - - CLEAR_CENTRY_INUSE(cc_ent); - if (dirty_enq) - dirty_nxt = &((*dirty_nxt)->cc_dirty_link); - (*dirty_nxt) = cc_ent; - dirty_enq++; - continue; - } - cc_ent->cc_dirty = 0; - wctl = cc_ent->cc_write; - cc_ent->cc_write = NULL; - cc_ent->cc_flag &= ~(CC_PINNABLE); - - - wctl->sc_dirty = 0; - SSOP_SETCENTRY(sdbc_safestore, wctl); - SSOP_DEALLOCRESOURCE(sdbc_safestore, wctl->sc_res); - - /* - * if this was a QHEAD cache block, then - * _sd_centry_release() did not requeue it as - * it was dirty. Requeue it now. - */ - - if (CENTRY_QHEAD(cc_ent)) - if (sdbc_use_dmchain) { - - /* attempt to que head */ - if (cc_ent->cc_alloc_size_dm) { - - sdbc_requeue_head_dm_try(cc_ent); - } - } else - _sd_requeue_head(cc_ent); - - CLEAR_CENTRY_INUSE(cc_ent); - continue; - - /* - * Block is inuse, must take cc_lock - * if DIRTY_PENDING, must re-issue - */ - must_lock: - /* was FAST */ - mutex_enter(&cc_ent->cc_lock); - cc_ent->cc_iostatus = _SD_IO_NONE; - if (CENTRY_DIRTY_PENDING(cc_ent)) { - cc_ent->cc_flag &= ~CC_PEND_DIRTY; - /* was FAST */ - mutex_exit(&cc_ent->cc_lock); - if (dirty_enq) - dirty_nxt = &((*dirty_nxt)->cc_dirty_link); - (*dirty_nxt) = cc_ent; - dirty_enq++; - continue; - } - /* - * clear dirty bits, if block no longer inuse release cc_write - */ - cc_ent->cc_dirty = 0; - if (SET_CENTRY_INUSE(cc_ent) == 0) { - - wctl = cc_ent->cc_write; - cc_ent->cc_write = NULL; - cc_ent->cc_flag &= ~(CC_PINNABLE); - /* was FAST */ - mutex_exit(&cc_ent->cc_lock); - - - wctl->sc_dirty = 0; - SSOP_SETCENTRY(sdbc_safestore, wctl); - SSOP_DEALLOCRESOURCE(sdbc_safestore, wctl->sc_res); - - /* - * if this was a QHEAD cache block, then - * _sd_centry_release() did not requeue it as - * it was dirty. Requeue it now. - */ - - if (CENTRY_QHEAD(cc_ent)) - if (sdbc_use_dmchain) { - - /* attempt to que head */ - if (cc_ent->cc_alloc_size_dm) { - sdbc_requeue_head_dm_try - (cc_ent); - } - } else - _sd_requeue_head(cc_ent); - CLEAR_CENTRY_INUSE(cc_ent); - } else { - /* was FAST */ - mutex_exit(&cc_ent->cc_lock); - } - } - - if (dirty_enq) - _sd_enqueue_dirty_chain(cd, dirty_hd, (*dirty_nxt), dirty_enq); - - goto process_loop; -} - - -static void -_sd_flcent_ea(blind_t xcc_ent, nsc_off_t fba_pos, nsc_size_t fba_len, int error) -{ - _sd_cctl_t *cc_ent = (_sd_cctl_t *)xcc_ent; - int cd; - nsc_off_t dblk; - - _sd_cd_info_t *cdi; - - cd = CENTRY_CD(cc_ent); - dblk = BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)); - cdi = &(_sd_cache_files[cd]); - - SDTRACE(ST_ENTER|SDF_FLCENT_EA, cd, 0, dblk, 2, (unsigned long)cc_ent); - - if (error) { - if (cdi->cd_info->sh_failed == 0) { - cdi->cd_info->sh_failed = 1; - cmn_err(CE_WARN, "!sdbc(_sd_flcent_ea) " - "Disk write failed cd %d (%s): err %d", - cd, cdi->cd_info->sh_filename, error); - } - } - - /* was FAST */ - mutex_enter(&cc_ent->cc_lock); - if (--(cc_ent->cc_iocount) != 0) { - /* more io's to complete before the cc_ent is done. */ - - if (cc_ent->cc_iocount < 0) { - /* was FAST */ - mutex_exit(&cc_ent->cc_lock); - SDALERT(SDF_FLCENT_EA, cd, 0, - dblk, cc_ent->cc_iocount, 0); - } else { - /* was FAST */ - mutex_exit(&cc_ent->cc_lock); - } - SDTRACE(ST_EXIT|SDF_FLCENT_EA, cd, 0, dblk, 2, - (unsigned long)cc_ent); - - DTRACE_PROBE(_sd_flcent_ea_end); - return; - } - /* was FAST */ - mutex_exit(&cc_ent->cc_lock); - - DATA_LOG(SDF_FLEA, cc_ent, BLK_FBA_OFF(fba_pos), fba_len); - - DTRACE_PROBE4(_sd_flcent_ea_data, uint64_t, ((uint64_t) - BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent) + BLK_FBA_OFF(fba_pos))), - uint64_t, (uint64_t)fba_len, char *, - *(int64_t *)(cc_ent->cc_data + FBA_SIZE(BLK_FBA_OFF(fba_pos))), - char *, *(int64_t *)(cc_ent->cc_data + - FBA_SIZE(BLK_FBA_OFF(fba_pos) + fba_len) - 8)); - - /* - * All io's are done for this cc_ent. - * Clear the pagelist io flag. - */ - CLEAR_CENTRY_PAGEIO(cc_ent); - - if (error) - cc_ent->cc_iostatus = _SD_IO_FAILED; - else - cc_ent->cc_iostatus = _SD_IO_DONE; - - SDTRACE(ST_EXIT|SDF_FLCENT_EA, cd, 0, dblk, 2, (unsigned long)cc_ent); - -} - - - -static void -_sd_flclist_ea(blind_t xcc_ent, nsc_off_t fba_pos, nsc_size_t fba_len, - int error) -{ - _sd_cctl_t *cc_ent = (_sd_cctl_t *)xcc_ent; - _sd_cctl_t *first_cc = cc_ent; - _sd_cd_info_t *cdi; - int cd; - nsc_off_t dblk; - - cd = CENTRY_CD(cc_ent); - dblk = BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)); - cdi = &(_sd_cache_files[cd]); - - SDTRACE(ST_ENTER|SDF_FLCLIST_EA, cd, 0, dblk, 1, (unsigned long)cc_ent); - - if (error) { - if (cdi->cd_info->sh_failed == 0) { - cdi->cd_info->sh_failed = 1; - cmn_err(CE_WARN, "!sdbc(_sd_flclist_ea) " - "Disk write failed cd %d (%s): err %d", - cd, cdi->cd_info->sh_filename, error); - } - } - /* - * Important: skip the first cc_ent in the list. Marking this will - * make the writer think the io is done, though the rest of the - * chain have not been processed here. so mark the first cc_ent - * last. Optimization, so as not to use locks - */ - - cc_ent = cc_ent->cc_dirty_next; - while (cc_ent) { - DTRACE_PROBE2(_sd_flclist_ea, _sd_cctl_t *, cc_ent, - int, CENTRY_CD(cc_ent)); - - if (cc_ent->cc_iocount != 1) - SDALERT(SDF_FLCLIST_EA, cd, 0, - BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)), - cc_ent->cc_iocount, 0); - cc_ent->cc_iocount = 0; - - /* - * Clear the pagelist io flag. - */ - CLEAR_CENTRY_PAGEIO(cc_ent); - - if (error) - cc_ent->cc_iostatus = _SD_IO_FAILED; - else - cc_ent->cc_iostatus = _SD_IO_DONE; - if (cc_ent->cc_dirty_next) { - DATA_LOG(SDF_FLSTEA, cc_ent, 0, BLK_FBAS); - - DTRACE_PROBE4(_sd_flclist_ea_data1, uint64_t, - BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)), - int, BLK_FBAS, char *, - *(int64_t *)(cc_ent->cc_data), - char *, *(int64_t *)(cc_ent->cc_data + - FBA_SIZE(BLK_FBAS) - 8)); - } else { - DATA_LOG(SDF_FLSTEA, cc_ent, 0, - BLK_FBA_OFF(fba_pos + fba_len)); - - DTRACE_PROBE4(_sd_flclist_ea_data2, uint64_t, - (uint64_t)BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)), - uint64_t, (uint64_t)BLK_FBA_OFF(fba_pos + fba_len), - char *, *(int64_t *)(cc_ent->cc_data), - char *, *(int64_t *)(cc_ent->cc_data + - FBA_SIZE(BLK_FBA_OFF(fba_pos + fba_len)) - 8)); - } - - cc_ent = cc_ent->cc_dirty_next; - } - - /* - * Now process the first cc_ent in the list. - */ - cc_ent = first_cc; - DATA_LOG(SDF_FLSTEA, cc_ent, BLK_FBA_OFF(fba_pos), - BLK_FBAS - BLK_FBA_OFF(fba_pos)); - - DTRACE_PROBE4(_sd_flclist_ea_data3, uint64_t, - (uint64_t)fba_pos, int, BLK_FBAS - BLK_FBA_OFF(fba_pos), - char *, *(int64_t *)(cc_ent->cc_data + - FBA_SIZE(BLK_FBA_OFF(fba_pos))), char *, - *(int64_t *)(cc_ent->cc_data + FBA_SIZE(BLK_FBA_OFF(fba_pos) + - BLK_FBAS - BLK_FBA_OFF(fba_pos)) - 8)); - - cc_ent->cc_iocount = 0; - - if (cc_ent->cc_anon_addr.sa_virt) { - kmem_free(cc_ent->cc_anon_addr.sa_virt, cc_ent->cc_anon_len); - cc_ent->cc_anon_addr.sa_virt = NULL; - cc_ent->cc_anon_len = 0; - } - - /* - * Clear the pagelist io flag. - */ - CLEAR_CENTRY_PAGEIO(cc_ent); - - if (error) - cc_ent->cc_iostatus = _SD_IO_FAILED; - else - cc_ent->cc_iostatus = _SD_IO_DONE; - - SDTRACE(ST_EXIT|SDF_FLCLIST_EA, cd, 0, dblk, 1, (unsigned long)cc_ent); -} - - -static void -_sd_mark_failed(_sd_cctl_t *cclist) -{ - _sd_cctl_t *cc_ent; - int cd; - - cd = CENTRY_CD(cclist); - cc_ent = cclist; - while (cc_ent) { - cc_ent->cc_iostatus = _SD_IO_FAILED; - cc_ent = cc_ent->cc_dirty_next; - } - _sd_enqueue_io_pending(cd, cclist); -} - - - -/* - * Fail single chain of cache blocks, updating numfail/numio counts. - * For dual-copy, log & clear PINNED, fall thru to regular processing. - */ -int -_sd_process_failure(_sd_cctl_t *cc_ent) -{ - int cd, num; - _sd_cctl_t *cc_chain; - _sd_cd_info_t *cdi; - - cd = CENTRY_CD(cc_ent); - cdi = &(_sd_cache_files[cd]); - - cc_chain = cc_ent; - - if (!cdi->cd_global->sv_pinned) { - cdi->cd_global->sv_pinned = _SD_SELF_HOST; - SSOP_SETVOL(sdbc_safestore, cdi->cd_global); - } - - for (num = 0; cc_ent; cc_ent = cc_ent->cc_dirty_next) { - num++; - /* was FAST */ - mutex_enter(&cc_ent->cc_lock); - cc_ent->cc_flag |= (CC_PEND_DIRTY | - (CENTRY_PINNABLE(cc_ent) ? CC_PINNED : 0)); - if (cc_ent->cc_write) { - cc_ent->cc_write->sc_flag = cc_ent->cc_flag; - SSOP_SETCENTRY(sdbc_safestore, cc_ent->cc_write); - } - mutex_exit(&cc_ent->cc_lock); - if (CENTRY_PINNED(cc_ent)) - nsc_pinned_data(cdi->cd_iodev, - BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)), BLK_FBAS); - } - - /* - * In normal processing we wouldn't need a lock here as all i/o - * is single threaded by cd. However during failover blocks can - * be failing from real i/o and as soon as the disk is marked bad - * the failover code which is furiously cloning safe-store into - * more blocks will short circuit to here (see _sd_ft_clone) - * and two threads can be executing in here simultaneously. - */ - mutex_enter(&cdi->cd_lock); - cc_chain->cc_dirty_link = cdi->cd_fail_head; - cdi->cd_fail_head = cc_chain; - cdi->cd_info->sh_numfail += num; - cdi->cd_info->sh_numio -= num; - mutex_exit(&cdi->cd_lock); - return (1); /* blocks are failed */ -} - - -static void -_sd_process_reflush(_sd_cctl_t *cc_ent) -{ - int cd; - - if (CENTRY_PINNABLE(cc_ent)) { - cd = CENTRY_CD(cc_ent); - nsc_unpinned_data(_sd_cache_files[cd].cd_iodev, - BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)), BLK_FBAS); - } - - /* was FAST */ - mutex_enter(&cc_ent->cc_lock); - cc_ent->cc_flag &= ~CC_PINNED; - /* was FAST */ - mutex_exit(&cc_ent->cc_lock); -} - - - -/* - * cd_write_thread -- flush dirty buffers. - * - * ARGUMENTS: - * - * cd - cache descriptor - * - * USAGE: - * called by cd's writer thread, returns when no more entries - * - * NOTE: if sdbc is being shutdown (for powerfail) then we will - * process pending i/o's but issue no more new ones. - */ -static int SD_LOOP_DELAY = 32; -#if !defined(m88k) && !defined(sun) -static int SD_WRITE_HIGH = 255; /* cache blocks */ -#endif - -static void -cd_write_thread(int cd) -{ - _sd_cctl_t *cc_list, *dirty_head, *last_chain; - _sd_cd_info_t *cdi; - - cdi = &(_sd_cache_files[cd]); - if (!FILE_OPENED(cd)) { - cdi->cd_writer = _SD_WRITER_NONE; - return; - } - cdi->cd_writer = _SD_WRITER_RUNNING; - - _sd_process_pending(cd); - - if (_sdbc_shutdown_in_progress) { - cdi->cd_write_inprogress = 0; - cdi->cd_writer = _SD_WRITER_NONE; - return; - } -#if !defined(m88k) && !defined(sun) - if (cdi->cd_info->sh_numio > SD_WRITE_HIGH) { - /* let I/Os complete before issuing more */ - cdi->cd_writer = _SD_WRITER_NONE; - return; - } -#endif - -#ifdef DEBUG - if (!_sdbc_flush_flag) { /* hang the flusher for testing */ - cdi->cd_write_inprogress = 0; - cdi->cd_writer = _SD_WRITER_NONE; - return; - } -#endif - - dirty_head = cdi->cd_dirty_head; - if (dirty_head && (dirty_head != cdi->cd_lastchain_ptr || - ++cdi->cd_info->sh_flushloop > SD_LOOP_DELAY)) { - cdi->cd_info->sh_flushloop = 0; - /* was FAST */ - mutex_enter(&cdi->cd_lock); - if (SD_LOOP_DELAY == 0 || - dirty_head == cdi->cd_lastchain_ptr) { - last_chain = NULL; - cdi->cd_dirty_head = NULL; - cdi->cd_dirty_tail = NULL; - cdi->cd_info->sh_numio += cdi->cd_info->sh_numdirty; - cdi->cd_info->sh_numdirty = 0; - } else -#if !defined(m88k) && !defined(sun) - if (cdi->cd_info->sh_numdirty > SD_WRITE_HIGH) { - int count = 0; - for (last_chain = dirty_head; last_chain; - last_chain = last_chain->cc_dirty_next) - count++; - last_chain = dirty_head->cc_dirty_link; - cdi->cd_dirty_head = last_chain; - /* cdi->cd_dirty_tail is unchanged */ - cdi->cd_info->sh_numio += count; - cdi->cd_info->sh_numdirty -= count; - } else -#endif - { - last_chain = cdi->cd_lastchain_ptr; - cdi->cd_dirty_head = last_chain; - cdi->cd_dirty_tail = last_chain; - cdi->cd_info->sh_numio += cdi->cd_info->sh_numdirty - - cdi->cd_lastchain; - cdi->cd_info->sh_numdirty = cdi->cd_lastchain; - } - /* was FAST */ - mutex_exit(&cdi->cd_lock); - - while (((cc_list = dirty_head) != NULL) && - cc_list != last_chain) { - dirty_head = cc_list->cc_dirty_link; - cc_list->cc_dirty_link = NULL; - if (cdi->cd_info->sh_failed) - _sd_mark_failed(cc_list); - else if (cc_list->cc_dirty_next == NULL) - _sd_async_flcent(cc_list, cdi->cd_crdev); - else - _sd_async_flclist(cc_list, cdi->cd_crdev); - cdi->cd_write_inprogress++; - } - } - cdi->cd_write_inprogress = 0; - cdi->cd_writer = _SD_WRITER_NONE; -} - -/* - * cd_writer -- spawn new writer if not running already - * called after enqueing the dirty blocks - */ -int -cd_writer(int cd) -{ - _sd_cd_info_t *cdi; - nstset_t *tset = NULL; - nsthread_t *t; - -#if defined(_SD_USE_THREADS) - tset = _sd_ioset; -#endif /* _SD_USE_THREADS */ - - cdi = &(_sd_cache_files[cd]); - - if (cdi->cd_writer) - return (0); - - if (tset == NULL) { - _sd_unblock(&_sd_flush_cv); - return (0); - } - - if (cdi->cd_writer || xmem_bu(_SD_WRITER_CREATE, &cdi->cd_writer)) - return (0); - - t = nst_create(tset, cd_write_thread, (blind_t)(unsigned long)cd, 0); - if (t) - return (1); - - cmn_err(CE_WARN, "!sdbc(cd_writer) cd %d nst_create error", cd); - cdi->cd_writer = _SD_WRITER_NONE; - return (-1); -} - -/* - * _sd_ccent_rd - add appropriate parts of cc_ent to struct buf. - * optimized not to read dirty FBAs from disk. - * - * ARGUMENTS: - * - * cc_ent - single cache block - * wanted - bitlist of FBAs that need to be read - * bp - struct buf to extend - * - * USAGE: - * Called for each dirty in a read I/O. - * The bp must be sized to allow for one entry per FBA that needs - * to be read (see _sd_doread()). - */ - -void -_sd_ccent_rd(_sd_cctl_t *cc_ent, uint_t wanted, struct buf *bp) -{ - int index, offset = 0, size = 0; - int state, state1 = -3; /* state1 is previous state */ - sd_addr_t *addr = NULL; - uint_t dirty; - - dirty = CENTRY_DIRTY(cc_ent); - for (index = 0; index < BLK_FBAS; index++) { - if (!_SD_BIT_ISSET(wanted, index)) - continue; - state = _SD_BIT_ISSET(dirty, index); - if (state == state1) /* same state, expand size */ - size++; - else { - if (state1 != -3) /* not first FBA */ - sd_add_fba(bp, addr, offset, size); - state1 = state; /* new previous state */ - offset = index; - size = 1; - if (state) { /* dirty, don't overwrite */ - addr = NULL; - } else { - addr = &cc_ent->cc_addr; - } - } - } - if (state1 != -3) - sd_add_fba(bp, addr, offset, size); -} - - - -int _SD_WR_THRESHOLD = 1000; -static void -_sd_flush_thread(void) -{ - int cd; - _sd_cd_info_t *cdi; - _sd_shared_t *shi; - int cnt; - int short_sleep = 0; - long tics; - int waiting_for_idle = 0; - int check_count = 0; - int pending, last_pending; - int SD_LONG_SLEEP_TICS, SD_SHORT_SLEEP_TICS; - nstset_t *tset = NULL; - nsthread_t *t; - -#if defined(_SD_USE_THREADS) - tset = _sd_ioset; -#endif /* _SD_USE_THREADS */ - - mutex_enter(&_sd_cache_lock); - _sd_cache_dem_cnt++; - mutex_exit(&_sd_cache_lock); - - /* .2 seconds */ - SD_LONG_SLEEP_TICS = drv_usectohz(200000); - /* .02 seconds */ - SD_SHORT_SLEEP_TICS = drv_usectohz(20000); - - /* CONSTCOND */ - while (1) { - if (_sd_flush_exit == 0) { - /* - * wait until no i/o's pending (on two successive - * iterations) or we see no progress after - * GIVE_UP_WAITING total sleeps. - */ -/* at most 5*128 ticks about 6 seconds of no progress */ -#define GIVE_UP_WAITING 128 - if (waiting_for_idle) { - pending = _sd_pending_iobuf(); - /*LINTED*/ - if (pending == last_pending) { - if (pending != 0) - check_count++; - } else - check_count = 0; - if ((last_pending == 0 && (pending == 0)) || - (check_count == GIVE_UP_WAITING)) { - mutex_enter(&_sd_cache_lock); - _sd_cache_dem_cnt--; - mutex_exit(&_sd_cache_lock); - if (check_count == GIVE_UP_WAITING) - cmn_err(CE_WARN, - "!_sd_flush_thread " - "exiting with %d IOs " - "pending", pending); - return; - } - last_pending = pending; - } else { - waiting_for_idle = 1; - last_pending = _sd_pending_iobuf(); - } - } - - /* - * Normally wakeup every SD_LONG_SLEEP_TICS to flush. - */ - - if (!short_sleep) { - ssioc_stats_t ss_stats; - int rc; - - if ((rc = SSOP_CTL(sdbc_safestore, SSIOC_STATS, - (uintptr_t)&ss_stats)) == 0) { - - if (ss_stats.wq_inq < _SD_WR_THRESHOLD) - short_sleep = 1; - } else { - if (rc == SS_ERR) - cmn_err(CE_WARN, - "!sdbc(_sd_flush_thread)" - "cannot get safestore inq"); - } - } - - if (short_sleep) - tics = SD_SHORT_SLEEP_TICS; - else - tics = SD_LONG_SLEEP_TICS; - - _sd_timed_block(tics, &_sd_flush_cv); - cd = 0; - cnt = short_sleep = 0; - for (; (cnt < _sd_cache_stats->st_loc_count) && - (cd < sdbc_max_devs); cd++) { - cdi = &_sd_cache_files[cd]; - shi = cdi->cd_info; - - if (shi == NULL || (shi->sh_failed == 2)) - continue; - - if (!(shi->sh_alloc & CD_ALLOCATED) || - !(shi->sh_flag & CD_ATTACHED)) - continue; - cnt++; - if (cdi->cd_writer) - continue; - if (!_SD_CD_WBLK_USED(cd)) { - if (cdi->cd_failover == 2) { - nsc_release(cdi->cd_rawfd); - cdi->cd_failover = 0; - } - continue; - } - if (cdi->cd_writer || - xmem_bu(_SD_WRITER_CREATE, &cdi->cd_writer)) - continue; - - t = NULL; - if (tset) { - t = nst_create(tset, - cd_write_thread, (blind_t)(unsigned long)cd, - 0); - } - if (!t) - cd_write_thread(cd); - } - } -} - - -#if defined(_SD_DEBUG_PATTERN) -check_write_consistency(cc_entry) - _sd_cctl_t *cc_entry; -{ - int *data; - nsc_off_t fba_pos; - int i, dirty_bl; - - while (cc_entry) { - dirty_bl = CENTRY_DIRTY(cc_entry); - if (dirty_bl == 0) { - cmn_err(CE_WARN, "!check: no dirty"); - } - data = (int *)cc_entry->cc_data; - fba_pos = BLK_TO_FBA_NUM(CENTRY_BLK(cc_entry)); - - for (i = 0; i < 8; i++, data += 128, fba_pos++) { - if (dirty_bl & 1) { - if (*((int *)(data + 2)) != fba_pos) { - cmn_err(CE_WARN, "!wr exp %" NSC_SZFMT - " got %x", fba_pos, *(data + 2)); - } - } - dirty_bl >>= 1; - } - cc_entry = cc_entry->cc_dirty_next; - } -} - -check_buf_consistency(handle, rw) - _sd_buf_handle_t *handle; - char *rw; -{ - _sd_bufvec_t *bvec1; - int *data; - nsc_off_t fpos; - nsc_size_t fba_len, i; - nsc_size_t len = 0; - - bvec1 = handle->bh_bufvec; - fpos = handle->bh_fba_pos; - - while (bvec1->bufaddr) { - fba_len = FBA_NUM(bvec1->buflen); - data = (int *)bvec1->bufaddr; - for (i = 0; i < fba_len; i++, data += 128, fpos++) { - len++; - if (*(data+2) != fpos) { - cmn_err(CE_WARN, "!%s exp%" NSC_SZFMT " got%x", - rw, fpos, *(data + 2)); - } - } - bvec1++; - } - if (handle->bh_fba_len != len) { - cmn_err(CE_WARN, "!len %" NSC_SZFMT " real %" NSC_SZFMT, len, - handle->bh_fba_len); - } -} -#endif - -int -_sdbc_wait_pending(void) -{ - int tries, pend, last; - - tries = 0; - last = _sd_pending_iobuf(); - while ((pend = _sd_pending_iobuf()) > 0) { - if (pend == last) { - if (++tries > 60) { - return (pend); - } - } else { - pend = last; - tries = 0; - } - delay(HZ); - } - return (0); -} diff --git a/usr/src/uts/common/avs/ns/sdbc/sd_io.h b/usr/src/uts/common/avs/ns/sdbc/sd_io.h deleted file mode 100644 index f28ca06f28..0000000000 --- a/usr/src/uts/common/avs/ns/sdbc/sd_io.h +++ /dev/null @@ -1,68 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _SD_IO_H -#define _SD_IO_H - -#ifdef __cplusplus -extern "C" { -#endif - -#define SGIO_MAX 254 - -#define _SD_IO_NONE 0 -#define _SD_IO_INITIATE 1 -#define _SD_IO_DONE 2 -#define _SD_IO_FAILED 3 -#define _SD_IO_DISCARDED 4 - -#define _SD_WRITER_NONE 0 -#define _SD_WRITER_CREATE 1 -#define _SD_WRITER_RUNNING 2 - -#ifdef _KERNEL - -extern kcondvar_t _sd_flush_cv; -/* secret flush toggle flag for testing */ -extern int _sdbc_flush_flag; /* 0 ==> noflushing, 1 ==> flush */ - - -extern int _sdbc_flush_configure(void); -extern void _sdbc_flush_deconfigure(void); -extern void _sd_async_flclist(_sd_cctl_t *cclist, dev_t rdev); -extern void _sd_enqueue_io_pending(int cd, _sd_cctl_t *cclist); -extern void _sd_async_flcent(_sd_cctl_t *cc_ent, dev_t rdev); -extern int _sd_process_failure(_sd_cctl_t *cc_ent); -extern int cd_writer(int cd); -extern void _sd_ccent_rd(_sd_cctl_t *cc_ent, uint_t wanted, buf_t *bp); -extern int _sdbc_wait_pending(void); - -#endif /* _KERNEL */ - -#ifdef __cplusplus -} -#endif - -#endif /* _SD_IO_H */ diff --git a/usr/src/uts/common/avs/ns/sdbc/sd_iob.h b/usr/src/uts/common/avs/ns/sdbc/sd_iob.h deleted file mode 100644 index f875c6aef8..0000000000 --- a/usr/src/uts/common/avs/ns/sdbc/sd_iob.h +++ /dev/null @@ -1,105 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - - -#ifndef _SD_IOB_H -#define _SD_IOB_H - -#ifdef __cplusplus -extern "C" { -#endif - -#define MAX_HOOK_LOCKS 32 -typedef int (*dcb_t)(struct buf *); /* driver callback type */ - -/* - * order of end action calls: - * driver callback (iob_drv_iodone) is stuffed in b_iodone and called by - * the device driver when i/o completes. It calls the hook end action - * (iob_hook_iodone) which maintains the completion count (iob_hook.count) - * and calls the clients end action (iob_hook.func) when the chain is complete. - */ -typedef struct iob_hook { - struct iob_hook *next_hook; - struct buf *chain; /* all the buffers for this iob */ - struct buf *tail; /* tail of buffer chain */ - int count; /* number of bufs on the chain */ - nsc_off_t start_fba; /* initial disk block for the xfer */ - nsc_off_t last_fba; /* last disk block for the xfer */ - nsc_size_t size; /* # bytes for entire transfer */ - unsigned char *last_vaddr; /* ending addr of last i/o request */ - sdbc_ea_fn_t func; /* clients end action routine */ - int (* iob_hook_iodone)(struct buf *, struct iob_hook *); - dcb_t iob_drv_iodone; /* driver call back */ - blind_t param; /* param for clnt end action routine */ - int flags; /* flags for each buffer */ - int error; /* any error */ - int skipped; /* this iob used sd_add_mem */ - kmutex_t *lockp; /* mutex for releasing buffers */ - kcondvar_t wait; /* sync for sleeping on synch i/o */ -#ifdef _SD_BIO_STATS - int PAGE_IO, NORM_IO, SKIP_IO; - int PAGE_COMBINED; - nsc_size_t NORM_IO_SIZE; -#endif /* _SD_BIO_STATS */ - } iob_hook_t; - -typedef struct _sd_buf_list { - iob_hook_t *hooks; /* all of the iob hooks */ - iob_hook_t *hook_head; /* free iob hook */ - int bl_init_count; /* total count */ - int bl_hooks_avail; /* monitor available hook count */ - int bl_hook_lowmark; /* record if ever run out of hooks */ - int hook_waiters; /* count of waiters */ - int max_hook_waiters; /* record max ever waiters */ - kcondvar_t hook_wait; /* sync for sleeping on synch i/o */ - kmutex_t hook_locks[MAX_HOOK_LOCKS]; -} _sd_buf_list_t; - -/* - * NOTE: if you change this, then also make changes to the generation - * of sd_iob_impl*.c in src/uts/common/Makefile.files and Makefile.rules! - */ -#define _SD_DEFAULT_IOBUFS 4096 - -/* define driver callback and driver callback function table */ - -#define IOB_DCBP(i) (sd_iob_dcb ## i) - -#define IOB_DCB(i) \ - int \ - IOB_DCBP(i)(struct buf *bp) \ - { \ - return ((*_sd_buflist.hooks[i].iob_hook_iodone) \ - (bp, &_sd_buflist.hooks[i])); \ - } - -extern _sd_buf_list_t _sd_buflist; - -#ifdef __cplusplus -} -#endif - -#endif /* _SD_IOB_H */ diff --git a/usr/src/uts/common/avs/ns/sdbc/sd_misc.c b/usr/src/uts/common/avs/ns/sdbc/sd_misc.c deleted file mode 100644 index e63bf9dd4d..0000000000 --- a/usr/src/uts/common/avs/ns/sdbc/sd_misc.c +++ /dev/null @@ -1,1437 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#define _SCM_ - -#include <sys/types.h> -#include <sys/ksynch.h> -#include <sys/cmn_err.h> -#include <sys/modctl.h> -#include <sys/conf.h> -#include <sys/errno.h> -#include <sys/file.h> -#include <sys/kmem.h> -#include <sys/cred.h> -#include <sys/ddi.h> -#include <sys/nsc_thread.h> - -#include "sd_bcache.h" -#include "sd_misc.h" -#include "sd_trace.h" -#include "sd_ft.h" -#include "sd_io.h" -#include "sd_bio.h" -#include "sd_pcu.h" -#include "sd_tdaemon.h" -#include "sdbc_ioctl.h" -#include <sys/ncall/ncall.h> -#include <sys/nsctl/nsctl.h> -#include <sys/nsctl/nsvers.h> - -#include <sys/sdt.h> /* dtrace is S10 or later */ - -#include <sys/unistat/spcs_s.h> -#include <sys/unistat/spcs_s_k.h> -#include <sys/unistat/spcs_errors.h> -static dev_info_t *dev_dip; -dev_info_t *sdbc_get_dip(); - - -/* - * A global variable to set the threshold for large writes to - * be in write through mode when NVRAM is present. This should - * solve the NVRAM bandwidth problem. - */ - -int sdbc_wrthru_len; -nsc_size_t sdbc_max_fbas = _SD_MAX_FBAS; -int sdbc_max_devs = 0; - -krwlock_t sdbc_queue_lock; - -static int _sd_debug_level = 0; - -static kmutex_t _sd_block_lk; - -#define REGISTER_SVC(X, Y) (ncall_register_svc(X, Y)) -#define UNREGISTER_SVC(X) (ncall_unregister_svc(X)) - -const int sdbc_major_rev = ISS_VERSION_MAJ; -const int sdbc_minor_rev = ISS_VERSION_MIN; -const int sdbc_micro_rev = ISS_VERSION_MIC; -const int sdbc_baseline_rev = ISS_VERSION_NUM; -static char sdbc_version[16]; - -static int _sdbc_attached = 0; - -static int _sdbc_print(dev_t dev, char *s); -static int sdbcunload(void); -static int sdbcload(void); -static int sdbcopen(dev_t *devp, int flag, int otyp, cred_t *crp); -static int sdbcclose(dev_t dev, int flag, int otyp, cred_t *crp); -static int sdbcioctl(dev_t dev, int cmd, void *arg, int mode, cred_t *crp, - int *rvp); -static int _sdbc_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **); -static int _sdbc_probe(dev_info_t *dip); -static int _sdbc_attach(dev_info_t *, ddi_attach_cmd_t); -static int _sdbc_detach(dev_info_t *, ddi_detach_cmd_t); -static int _sdbc_reset(dev_info_t *, ddi_reset_cmd_t); - -#ifdef sun -/* - * Solaris specific driver module interface code. - */ - -#ifdef USES_SOFT_STATE -struct sdbc_state { - dev_info_t *dip; /* everyone would need a devinfo */ -}; - -static void *sdbc_statep; /* for soft state routines */ -#endif /* USES_SOFT_STATE */ - -static struct cb_ops sdbc_cb_ops = { - sdbcopen, /* open */ - sdbcclose, /* close */ - nodev, /* not a block driver, strategy not an entry point */ - _sdbc_print, /* no print routine */ - nodev, /* no dump routine */ - nodev, /* read */ - nodev, /* write */ - (int (*) ()) sdbcioctl, /* ioctl */ - nodev, /* no devmap routine */ - nodev, /* no mmap routine */ - nodev, /* no segmap routine */ - nochpoll, /* no chpoll routine */ - ddi_prop_op, - 0, /* not a STREAMS driver, no cb_str routine */ - D_NEW | D_MP, /* safe for multi-thread/multi-processor */ -}; - - -static struct dev_ops sdbc_ops = { - DEVO_REV, /* Driver build version */ - 0, /* device reference count */ - _sdbc_getinfo, - nulldev, - _sdbc_probe, - _sdbc_attach, - _sdbc_detach, - _sdbc_reset, - &sdbc_cb_ops, - (struct bus_ops *)NULL -}; - -static struct modldrv sdbc_ldrv = { - &mod_driverops, - "nws:Storage Cache:" ISS_VERSION_STR, - &sdbc_ops -}; - -static struct modlinkage sdbc_modlinkage = { - MODREV_1, - &sdbc_ldrv, - NULL -}; - -/* - * dynmem interface - */ -static int mutex_and_condvar_flag; - -/* - * Solaris module load time code - */ -int -_init(void) -{ - - int err; - - mutex_and_condvar_flag = 0; - -#ifdef USES_SOFT_STATE - ddi_soft_state_init(&sdbc_statep, sizeof (struct sdbc_state), - MAX_INSTANCES); -#endif /* USES_SOFT_STATE */ - - /* - * It is "load" time, call the unixware equivalent. - */ - err = sdbcload(); - if (!err) - err = mod_install(&sdbc_modlinkage); - - if (err) { - (void) sdbcunload(); -#ifdef USES_SOFT_STATE - ddi_soft_state_fini(&sdbc_statep); -#endif /* USES_SOFT_STATE */ - } - - if (!err) { - mutex_and_condvar_flag = 1; - mutex_init(&dynmem_processing_dm.thread_dm_lock, "dynmem", - MUTEX_DRIVER, NULL); - cv_init(&dynmem_processing_dm.thread_dm_cv, "dynmem", - CV_DRIVER, NULL); - } - - return (err); - -} -/* - * Solaris module unload time code - */ - -int -_fini(void) -{ - int err; - - if (_sd_cache_initialized) { - return (EBUSY); - } else if (_sd_ioset && - (_sd_ioset->set_nlive || _sd_ioset->set_nthread)) { - cmn_err(CE_WARN, "!sdbc:_fini() %d threads still " - "active; %d threads in set\n", _sd_ioset->set_nlive, - _sd_ioset->set_nthread); - return (EBUSY); - } - if ((err = mod_remove(&sdbc_modlinkage)) == 0) { - DTRACE_PROBE2(_sdbc_fini_mod_remove_succeeded, - int, err, - struct modlinkage *, &sdbc_modlinkage); - err = sdbcunload(); -#ifdef USES_SOFT_STATE - ddi_soft_state_fini(&sdbc_statep); -#endif /* USES_SOFT_STATE */ - - if (mutex_and_condvar_flag) { - cv_destroy(&dynmem_processing_dm.thread_dm_cv); - mutex_destroy(&dynmem_processing_dm.thread_dm_lock); - mutex_and_condvar_flag = 0; - } - } - - return (err); -} - -/* - * Solaris module info code - */ -int -_info(struct modinfo *modinfop) -{ - return (mod_info(&sdbc_modlinkage, modinfop)); -} - -/*ARGSUSED*/ -static int -_sdbc_probe(dev_info_t *dip) -{ - return (DDI_PROBE_SUCCESS); -} - -/* - * Attach an instance of the device. This happens before an open - * can succeed. - */ -static int -_sdbc_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) -{ - _dm_process_vars_t local_dm_process_vars; - struct buf bp; - - if (cmd != DDI_ATTACH) - return (DDI_FAILURE); - - /* - * Get the threshold value for setting large writes in - * write through mode(when NVRAM is present) - */ - - sdbc_wrthru_len = ddi_prop_get_int(DDI_DEV_T_ANY, dip, - DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, "sdbc_wrthru_thresh", 64); - - /* Get sdbc_max_fbas from sdbc.conf */ - sdbc_max_fbas = ddi_prop_get_int(DDI_DEV_T_ANY, dip, - DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, "sdbc_max_fbas", - _SD_MAX_FBAS); - - bp.b_bcount = (size_t)FBA_SIZE(sdbc_max_fbas); - minphys(&bp); /* clamps value to maxphys */ - - sdbc_max_fbas = FBA_NUM(bp.b_bcount); - - if (sdbc_max_fbas > _SD_MAX_FBAS) { - cmn_err(CE_WARN, - "!_sdbc_attach: sdbc_max_fbas set to %d", _SD_MAX_FBAS); - sdbc_max_fbas = _SD_MAX_FBAS; - } - - /* - * -get the maximum list length for multipage dynmem - * -time between aging - * -number of agings before dealloc - * -what to report D0=shutdown, D1=thread variables - */ - dynmem_processing_dm.max_dyn_list = MAX_DYN_LIST_DEFAULT; - dynmem_processing_dm.monitor_dynmem_process = - MONITOR_DYNMEM_PROCESS_DEFAULT; - dynmem_processing_dm.cache_aging_ct1 = CACHE_AGING_CT_DEFAULT; - dynmem_processing_dm.cache_aging_ct2 = CACHE_AGING_CT_DEFAULT; - dynmem_processing_dm.cache_aging_ct3 = CACHE_AGING_CT_DEFAULT; - dynmem_processing_dm.cache_aging_sec1 = CACHE_AGING_SEC1_DEFAULT; - dynmem_processing_dm.cache_aging_sec2 = CACHE_AGING_SEC2_DEFAULT; - dynmem_processing_dm.cache_aging_sec3 = CACHE_AGING_SEC3_DEFAULT; - dynmem_processing_dm.cache_aging_pcnt1 = CACHE_AGING_PCNT1_DEFAULT; - dynmem_processing_dm.cache_aging_pcnt2 = CACHE_AGING_PCNT2_DEFAULT; - dynmem_processing_dm.max_holds_pcnt = MAX_HOLDS_PCNT_DEFAULT; - dynmem_processing_dm.process_directive = PROCESS_DIRECTIVE_DEFAULT; - - local_dm_process_vars.max_dyn_list = ddi_prop_get_int(DDI_DEV_T_ANY, - dip, DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, "sdbc_max_dyn_list", - MAX_DYN_LIST_DEFAULT); - - local_dm_process_vars.monitor_dynmem_process = - ddi_prop_get_int(DDI_DEV_T_ANY, dip, - DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, "sdbc_monitor_dynmem", - MONITOR_DYNMEM_PROCESS_DEFAULT); - - local_dm_process_vars.cache_aging_ct1 = ddi_prop_get_int(DDI_DEV_T_ANY, - dip, DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, "sdbc_cache_aging_ct1", - CACHE_AGING_CT_DEFAULT); - - local_dm_process_vars.cache_aging_ct2 = ddi_prop_get_int(DDI_DEV_T_ANY, - dip, DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, "sdbc_cache_aging_ct2", - CACHE_AGING_CT_DEFAULT); - - local_dm_process_vars.cache_aging_ct3 = ddi_prop_get_int(DDI_DEV_T_ANY, - dip, DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, "sdbc_cache_aging_ct3", - CACHE_AGING_CT_DEFAULT); - - local_dm_process_vars.cache_aging_sec1 = ddi_prop_get_int(DDI_DEV_T_ANY, - dip, DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, "sdbc_cache_aging_sec1", - CACHE_AGING_SEC1_DEFAULT); - - local_dm_process_vars.cache_aging_sec2 = ddi_prop_get_int(DDI_DEV_T_ANY, - dip, DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, "sdbc_cache_aging_sec2", - CACHE_AGING_SEC2_DEFAULT); - - local_dm_process_vars.cache_aging_sec3 = ddi_prop_get_int(DDI_DEV_T_ANY, - dip, DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, "sdbc_cache_aging_sec3", - CACHE_AGING_SEC3_DEFAULT); - - local_dm_process_vars.cache_aging_pcnt1 = - ddi_prop_get_int(DDI_DEV_T_ANY, dip, - DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, "sdbc_cache_aging_pcnt1", - CACHE_AGING_PCNT1_DEFAULT); - - local_dm_process_vars.cache_aging_pcnt2 = - ddi_prop_get_int(DDI_DEV_T_ANY, dip, - DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, "sdbc_cache_aging_pcnt2", - CACHE_AGING_PCNT2_DEFAULT); - - local_dm_process_vars.process_directive = - ddi_prop_get_int(DDI_DEV_T_ANY, dip, - DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, "sdbc_process_directive", - PROCESS_DIRECTIVE_DEFAULT); - - local_dm_process_vars.max_holds_pcnt = ddi_prop_get_int(DDI_DEV_T_ANY, - dip, DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, "sdbc_max_holds_pcnt", - MAX_HOLDS_PCNT_DEFAULT); - - (void) sdbc_edit_xfer_process_vars_dm(&local_dm_process_vars); - -#define MINOR_NAME "c,sdbc" /* character device */ -#define MINOR_NUMBER 0 -#ifdef MINOR_NAME - if (ddi_create_minor_node(dip, MINOR_NAME, S_IFCHR, - MINOR_NUMBER, DDI_PSEUDO, 0) != DDI_SUCCESS) { - /* free anything we allocated here */ - return (DDI_FAILURE); - } -#endif /* MINOR_NAME */ - - /* Announce presence of the device */ - ddi_report_dev(dip); - dev_dip = dip; - /* mark the device as attached, opens may proceed */ - _sdbc_attached = 1; - - rw_init(&sdbc_queue_lock, NULL, RW_DRIVER, NULL); - - return (DDI_SUCCESS); -} - -/*ARGSUSED*/ -static int -_sdbc_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) -{ - if (cmd == DDI_DETACH) { - /* - * Check first if the cache is still in use - * and if it is, prevent the detach. - */ - if (_sd_cache_initialized) - return (EBUSY); - - _sdbc_attached = 0; - - rw_destroy(&sdbc_queue_lock); - dev_dip = NULL; - - return (DDI_SUCCESS); - } else - return (DDI_FAILURE); -} - -/*ARGSUSED*/ -static int -_sdbc_reset(dev_info_t *dip, ddi_reset_cmd_t cmd) -{ - return (DDI_SUCCESS); -} - -/*ARGSUSED*/ -static int -_sdbc_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result) -{ - dev_t dev; -#ifdef USES_SOFT_STATE - struct sdbc_state *xsp; - int instance; -#endif /* USES_SOFT_STATE */ - int rc; - - switch (cmd) { - case DDI_INFO_DEVT2INSTANCE: - dev = (dev_t)arg; - /* The "instance" number is the minor number */ - *result = (void *)(unsigned long)getminor(dev); - rc = DDI_SUCCESS; - break; - - case DDI_INFO_DEVT2DEVINFO: - dev = (dev_t)arg; -#ifdef USES_SOFT_STATE - /* the instance number is the minor number */ - instance = getminor(dev); - xsp = ddi_get_soft_state(sdbc_statep, instance); - if (xsp == NULL) - return (DDI_FAILURE); - *result = (void *) xsp->dip; -#else - *result = (void *) NULL; -#endif /* USES_SOFT_STATE */ - rc = DDI_SUCCESS; - break; - - default: - rc = DDI_FAILURE; - break; - } - return (rc); -} - -/*ARGSUSED*/ -int -_sdbc_print(dev_t dev, char *s) -{ - cmn_err(CE_WARN, "!sdbc(_sdbc_print) %s", s); - return (0); -} -#else -MOD_DRV_WRAPPER(sdbc, sdbcload, sdbcunload, NULL, "Storage Device Block Cache"); -#endif /* sun */ - -static int sdbc_inited; - -static int -sdbcinit(void) -{ - int rc; - - sdbc_inited = 0; - - (void) strncpy(sdbc_version, _VERSION_, sizeof (sdbc_version)); - - mutex_init(&_sd_cache_lock, NULL, MUTEX_DRIVER, NULL); - mutex_init(&_sdbc_config_lock, NULL, MUTEX_DRIVER, NULL); - -#ifdef m88k - REGISTER_SVC(SD_DUAL_WRITE, r_sd_ifs_write); - REGISTER_SVC(SD_DUAL_READ, r_sd_ifs_read); - REGISTER_SVC(SD_SET_CD, r_sd_set_cd); - REGISTER_SVC(SD_GETSIZE, r_sd_getsize); - REGISTER_SVC(SD_DUAL_OPEN, r_sd_ifs_open); - REGISTER_SVC(SD_REMOTE_FLUSH, r_sd_remote_flush); - REGISTER_SVC(SD_SGREMOTE_FLUSH, r_sd_sgremote_flush); - REGISTER_SVC(SD_DISK_IO, r_sd_disk_io); - REGISTER_SVC(SD_GET_BMAP, r_rem_get_bmap); - - if ((rc = hpf_register_module("SDBC", _sd_hpf_stats)) != 0) - return (rc); -#endif - REGISTER_SVC(SD_ENABLE, r_sd_ifs_cache_enable); - REGISTER_SVC(SD_DISABLE, r_sd_ifs_cache_disable); - REGISTER_SVC(SD_CD_DISCARD, r_cd_discard); - - cv_init(&_sd_flush_cv, NULL, CV_DRIVER, NULL); - - mutex_init(&_sd_block_lk, NULL, MUTEX_DRIVER, NULL); - - sdbc_max_devs = nsc_max_devices(); - - /* - * Initialize the bitmap array that would be useful in determining - * if the mask is not fragmented, instead of determinig this - * at run time. Also initialize a lookup array for each mask, with - * the starting position, the length, and the mask subset - */ - _sd_init_contig_bmap(); - _sd_init_lookup_map(); - - if ((rc = _sdbc_iobuf_load()) != 0) - return (rc); - if ((rc = _sdbc_handles_load()) != 0) - return (rc); - if ((rc = _sdbc_tr_load()) != 0) - return (rc); - if ((rc = _sdbc_ft_load()) != 0) - return (rc); - if ((rc = _sdbc_tdaemon_load()) != 0) - return (rc); - if ((rc = _sdbc_hash_load()) != 0) - return (rc); -#ifdef DEBUG - _sdbc_ioj_load(); -#endif - sdbc_inited = 1; - - return (0); -} - -static int -sdbcunload(void) -{ - if (_sd_cache_initialized) { - cmn_err(CE_WARN, - "!sdbc(sdbcunload) cannot unload module - cache in use!"); - return (EEXIST); - } -#ifdef m88k - UNREGISTER_SVC(SD_DUAL_WRITE); - UNREGISTER_SVC(SD_DUAL_READ); - UNREGISTER_SVC(SD_SET_CD); - UNREGISTER_SVC(SD_GETSIZE); - UNREGISTER_SVC(SD_DUAL_OPEN); - UNREGISTER_SVC(SD_REMOTE_FLUSH); - UNREGISTER_SVC(SD_SGREMOTE_FLUSH); - UNREGISTER_SVC(SD_DISK_IO); - UNREGISTER_SVC(SD_GET_BMAP); - - (void) hpf_unregister_module("SDBC"); -#endif - UNREGISTER_SVC(SD_ENABLE); - UNREGISTER_SVC(SD_DISABLE); - UNREGISTER_SVC(SD_CD_DISCARD); - - cv_destroy(&_sd_flush_cv); - mutex_destroy(&_sd_block_lk); - - _sdbc_hash_unload(); - _sdbc_ft_unload(); - _sdbc_tr_unload(); - _sdbc_tdaemon_unload(); - _sdbc_handles_unload(); - _sdbc_iobuf_unload(); -#ifdef DEBUG - _sdbc_ioj_unload(); -#endif - - mutex_destroy(&_sd_cache_lock); - mutex_destroy(&_sdbc_config_lock); - - /* - * Normally we would unregister memory at deconfig time. - * However when chasing things like memory leaks it is - * useful to defer until unload time. - */ - if (_sdbc_memtype_deconfigure_delayed) - _sdbc_memtype_deconfigure(); - - return (0); -} - - -static int -sdbcload(void) -{ - int err; - - if ((err = sdbcinit()) != 0) { - (void) sdbcunload(); - return (err); - } - return (0); -} - - -/* ARGSUSED */ - -static int -sdbcopen(dev_t *devp, int flag, int otyp, cred_t *crp) -{ - int nd = nsc_node_id(); - - /* - * If we were statically linked in then returning an error out - * of sdbcinit won't prevent someone from coming thru here. - * We must prevent them from getting any further. - */ - if (!sdbc_inited) - return (EINVAL); - - if (nd < nsc_min_nodeid) { - cmn_err(CE_WARN, - "!sdbc(sdbcopen) open failed, systemid (%d) must be >= %d", - nd, nsc_min_nodeid); - return (EINVAL); - } - if (!_sdbc_attached) - return (ENXIO); - - return (0); -} - - -/* ARGSUSED */ - -static int -sdbcclose(dev_t dev, int flag, int otyp, cred_t *crp) -{ - return (0); -} - -#ifdef _MULTI_DATAMODEL -static int -convert_ioctl_args(int cmd, void *arg, int mode, _sdbc_ioctl_t *args) -/* - * convert_ioctl-args - Do a case by case conversion of a ILP32 ioctl - * structure to an LP64 structure. - * The main concern here is whether to sign-extend or not. The rule - * is that pointers are not sign extended, the rest are obvious. - * Since most everything is sign-extended the definition of - * _sdbc_ioctl32_t uses signed fields. - * - */ -{ - _sdbc_ioctl32_t args32; - - if (ddi_copyin(arg, &args32, sizeof (_sdbc_ioctl32_t), mode)) - return (EFAULT); - - bzero((void *) args, sizeof (_sdbc_ioctl_t)); - - switch (cmd) { - - case SDBC_UNUSED_1: - case SDBC_UNUSED_2: - case SDBC_UNUSED_3: - args->sdbc_ustatus = (spcs_s_info_t)args32.sdbc_ustatus; - cmn_err(CE_WARN, - "!sdbc(convert_ioctl_args) obsolete sdbc ioctl used"); - return (EINVAL); - - case SDBC_ADUMP: - args->arg0 = args32.arg0; /* cd */ - args->arg1 = (uint32_t)args32.arg1; /* &tt */ - args->arg2 = (uint32_t)args32.arg2; /* NULL (buf) */ - args->arg3 = args32.arg3; /* size of buf */ - args->arg4 = args32.arg4; /* flag */ - args->sdbc_ustatus = (spcs_s_info_t)args32.sdbc_ustatus; - break; - - case SDBC_TEST_INIT: - args->arg0 = (uint32_t)args32.arg0; /* fname (char *) */ - args->arg1 = args32.arg1; /* index */ - args->arg2 = args32.arg2; /* len */ - args->arg3 = args32.arg3; /* track size */ - args->arg4 = args32.arg4; /* flag */ - break; - - case SDBC_TEST_START: - args->arg0 = args32.arg0; /* num */ - args->arg1 = args32.arg1; /* type */ - args->arg2 = args32.arg2; /* loops */ - args->arg3 = args32.arg3; /* from */ - args->arg4 = args32.arg4; /* seed */ - break; - - case SDBC_TEST_END: - break; - - case SDBC_ENABLE: - case SDBC_VERSION: - args->arg0 = (uint32_t)args32.arg0; /* pointer */ - args->sdbc_ustatus = (spcs_s_info_t)args32.sdbc_ustatus; - break; - - case SDBC_DISABLE: - args->sdbc_ustatus = (spcs_s_info_t)args32.sdbc_ustatus; - break; - - case SDBC_GET_CLUSTER_SIZE: - args->arg0 = (uint32_t)args32.arg0; /* (int * ) */ - args->sdbc_ustatus = (spcs_s_info_t)args32.sdbc_ustatus; - break; - - /* get the gl_file data */ - case SDBC_GET_CLUSTER_DATA: - /* pointer to array[2*cluster_size] */ - args->arg0 = (uint32_t)args32.arg0; - args->sdbc_ustatus = (spcs_s_info_t)args32.sdbc_ustatus; - break; - - /* get the size of the global info pages for each board */ - case SDBC_GET_GLMUL_SIZES: - args->arg0 = (uint32_t)args32.arg0; /* int[CACHE_MEM_PAD] * */ - args->sdbc_ustatus = (spcs_s_info_t)args32.sdbc_ustatus; - break; - - /* get the global info about write blocks */ - case SDBC_GET_GLMUL_INFO: - /* pointer to array[2*(sum of GLMUL_SIZES)] */ - args->arg0 = (uint32_t)args32.arg0; - args->sdbc_ustatus = (spcs_s_info_t)args32.sdbc_ustatus; - break; - - case SDBC_SET_CD_HINT: - args->arg0 = args32.arg0; /* cd */ - args->arg1 = args32.arg1; /* hint */ - args->arg2 = args32.arg2; /* flag */ - args->sdbc_ustatus = (spcs_s_info_t)args32.sdbc_ustatus; - break; - - case SDBC_GET_CD_HINT: - args->arg0 = args32.arg0; - args->sdbc_ustatus = (spcs_s_info_t)args32.sdbc_ustatus; - break; - - case SDBC_SET_NODE_HINT: - args->arg0 = args32.arg0; /* hint */ - args->arg1 = args32.arg1; /* flag */ - args->sdbc_ustatus = (spcs_s_info_t)args32.sdbc_ustatus; - break; - - case SDBC_GET_NODE_HINT: - args->sdbc_ustatus = (spcs_s_info_t)args32.sdbc_ustatus; - break; - - case SDBC_STATS: - args->arg0 = (uint32_t)args32.arg0; /* (_sd_stats_t *) */ - args->sdbc_ustatus = (spcs_s_info_t)args32.sdbc_ustatus; - break; - - case SDBC_ZAP_STATS: - args->sdbc_ustatus = (spcs_s_info_t)args32.sdbc_ustatus; - break; - - case SDBC_GET_CD_BLK: - args->arg0 = args32.arg0; /* cd */ - args->arg1 = (uint32_t)args32.arg1; /* blk */ - args->arg2 = (uint32_t)args32.arg2; /* (addr[5] *) */ - break; - - case SDBC_GET_CONFIG: - args->arg0 = (uint32_t)args32.arg0; /* (_sdbc_config_t *) */ - args->sdbc_ustatus = (spcs_s_info_t)args32.sdbc_ustatus; - break; - - case SDBC_SET_CONFIG: - args->arg0 = (uint32_t)args32.arg0; /* (_sdbc_config_t *) */ - args->sdbc_ustatus = (spcs_s_info_t)args32.sdbc_ustatus; - break; - - case SDBC_MAXFILES: - args->arg0 = (uint32_t)args32.arg0; /* (int * ) */ - args->sdbc_ustatus = (spcs_s_info_t)args32.sdbc_ustatus; - break; - -#ifdef DEBUG - /* toggle flusher flag for testing */ - case SDBC_TOGGLE_FLUSH: - args->sdbc_ustatus = (spcs_s_info_t)args32.sdbc_ustatus; - break; - - case SDBC_INJ_IOERR: /* cd, errnum */ - args->arg0 = args32.arg0; /* cd */ - args->arg1 = args32.arg1; /* i/o error number */ - args->arg2 = args32.arg2; /* countdown to issuing error */ - break; - - /* clear injected i/o errors */ - case SDBC_CLR_IOERR: /* cd */ - args->arg0 = args32.arg0; /* cd */ - break; -#endif /* DEBUG */ - default: - return (EINVAL); - } - - return (0); -} -#endif /* _MULTI_DATAMODEL */ - -static int -sdbc_get_cd_blk(_sdbc_ioctl_t *args, int mode) -{ - - _sd_cctl_t *cc_ent; - caddr_t data; - char *taddr; - intptr_t addr[5]; -#ifdef _MULTI_DATAMODEL - uint32_t addr_32[5]; -#endif /* _MULTI_DATAMODEL */ - char *lookup_file = NULL; - int rc; - sdbc_info_t info; - nsc_off_t fba_pos; /* disk block number */ - - if (_sd_cache_initialized == 0) { - return (EINVAL); - } - - /* copyin the block number */ - if (ddi_copyin((void *)args->arg1, &fba_pos, sizeof (nsc_off_t), - mode)) { - return (EFAULT); - } - -#ifdef _MULTI_DATAMODEL - if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) { - if (ddi_copyin((void *)args->arg2, addr_32, sizeof (addr_32), - mode)) { - return (EFAULT); - } - addr[0] = addr_32[0]; /* (sdbc_info_t *) */ - addr[1] = addr_32[1]; /* (char *) cdata */ - addr[2] = addr_32[2]; /* ( int * ) cblk_size */ - addr[3] = addr_32[3]; /* ( char * ) filename */ - addr[4] = addr_32[4]; /* ( char *) wdata */ - } else { - if (ddi_copyin((void *)args->arg2, addr, sizeof (addr), mode)) { - return (EFAULT); - } - } -#else /* _MULTI_DATAMODEL */ - if (ddi_copyin((void *)args->arg2, addr, sizeof (addr), mode)) { - return (EFAULT); - } -#endif /* _MULTI_DATAMODEL */ - - (void) copyout(&CACHE_BLOCK_SIZE, (void *)addr[2], sizeof (int)); - - if (_sd_get_cd_blk((int)args->arg0, FBA_TO_BLK_NUM(fba_pos), - &cc_ent, &data, &lookup_file)) { - if (lookup_file != NULL) - (void) copyout(lookup_file, (void *)addr[3], - NSC_MAXPATH); - return (ENOENT); - } - rc = 0; - taddr = NULL; - - info.ci_write = cc_ent->cc_write ? 1 : 0; - info.ci_dirty = cc_ent->cc_dirty; - info.ci_valid = cc_ent->cc_valid; - info.ci_cd = CENTRY_CD(cc_ent); - info.ci_dblk = BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)); - (void) copyout(lookup_file, (void *)addr[3], NSC_MAXPATH); - (void) copyout(&info, (void *)addr[0], sizeof (sdbc_info_t)); - - (void) copyout(data, (void *)addr[1], CACHE_BLOCK_SIZE); - - /* get the write data if any */ - if (cc_ent->cc_write) { - - if (sdbc_safestore) { - cmn_err(CE_WARN, - "!sdbc(sdbc_get_cd_blk) cc_write 0x%p sc-res 0x%p", - (void *)cc_ent->cc_write, - (void *)cc_ent->cc_write->sc_res); - - if ((taddr = kmem_alloc(CACHE_BLOCK_SIZE, - KM_NOSLEEP)) == NULL) { - cmn_err(CE_WARN, - "!sdbc(sdbc_get_cd_blk) kmem_alloc failed." - " cannot get write data"); - info.ci_write = NULL; - rc = EFAULT; - } else if (SSOP_READ_CBLOCK(sdbc_safestore, - cc_ent->cc_write->sc_res, taddr, - CACHE_BLOCK_SIZE, 0) == SS_ERR) { - - cmn_err(CE_WARN, "sdbc(sdbc_get_cd_blk) " - "!safestore read failed"); - rc = EFAULT; - - } else if (copyout(taddr, (void *)addr[4], - CACHE_BLOCK_SIZE)) { - cmn_err(CE_WARN, - "!sdbc(sdbc_get_cd_blk) copyout failed." - " cannot get write data"); - rc = EFAULT; - } - } - - } - - if (taddr) - kmem_free(taddr, CACHE_BLOCK_SIZE); - - return (rc); -} - -/* ARGSUSED */ -static int -sdbcioctl(dev_t dev, int cmd, void *arg, int mode, cred_t *crp, int *rvp) -{ - int rc = 0; - _sdbc_ioctl_t args; - int convert_32 = 0; - spcs_s_info_t kstatus; - - *rvp = 0; - -#ifdef _MULTI_DATAMODEL - if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) { - int rc; - convert_32 = 1; - if ((rc = convert_ioctl_args(cmd, arg, mode, &args)) != 0) - return (rc); - } else { - if (ddi_copyin(arg, &args, sizeof (_sdbc_ioctl_t), mode)) { - return (EFAULT); - } - } -#else /* _MULTI_DATAMODEL */ - if (ddi_copyin(arg, &args, sizeof (_sdbc_ioctl_t), mode)) { - return (EFAULT); - } -#endif /* _MULTI_DATAMODEL */ - - kstatus = spcs_s_kcreate(); - if (!kstatus) - return (ENOMEM); - - switch (cmd) { - - case SDBC_UNUSED_1: - case SDBC_UNUSED_2: - case SDBC_UNUSED_3: - - return (spcs_s_ocopyoutf(&kstatus, args.sdbc_ustatus, - SDBC_EOBSOLETE)); - - case SDBC_ADUMP: - rc = _sd_adump(&args, rvp); - break; - - case SDBC_TEST_INIT: - rc = _sd_test_init(&args); - break; - - case SDBC_TEST_START: - rc = _sd_test_start(&args, rvp); - break; - - case SDBC_TEST_END: - rc = _sd_test_end(); - break; - - case SDBC_ENABLE: - mutex_enter(&_sdbc_config_lock); - rc = _sdbc_configure((_sd_cache_param_t *)args.arg0, - NULL, kstatus); - if (rc && rc != EALREADY && rc != SDBC_ENONETMEM) { - (void) _sdbc_deconfigure(kstatus); - mutex_exit(&_sdbc_config_lock); - return (spcs_s_ocopyoutf - (&kstatus, args.sdbc_ustatus, rc)); - } - mutex_exit(&_sdbc_config_lock); - return (spcs_s_ocopyoutf(&kstatus, args.sdbc_ustatus, rc)); - - case SDBC_DISABLE: - mutex_enter(&_sdbc_config_lock); - if (_sd_cache_initialized == 0) { - - mutex_exit(&_sdbc_config_lock); - return (spcs_s_ocopyoutf(&kstatus, args.sdbc_ustatus, - SDBC_EDISABLE)); - } - rc = _sdbc_deconfigure(kstatus); - mutex_exit(&_sdbc_config_lock); - return (spcs_s_ocopyoutf(&kstatus, args.sdbc_ustatus, rc)); - - case SDBC_GET_CLUSTER_SIZE: - if (_sd_cache_initialized == 0) { - - return (spcs_s_ocopyoutf(&kstatus, args.sdbc_ustatus, - SDBC_ECLUSTER_SIZE)); - } - - rc = sd_get_file_info_size((void *)args.arg0); - break; - - /* get the gl_file data */ - case SDBC_GET_CLUSTER_DATA: - if (_sd_cache_initialized == 0) { - - return (spcs_s_ocopyoutf(&kstatus, args.sdbc_ustatus, - SDBC_ECLUSTER_DATA)); - } - rc = sd_get_file_info_data((void *)args.arg0); - break; - - /* get the size of the global info pages for each board */ - case SDBC_GET_GLMUL_SIZES: - if (_sd_cache_initialized == 0) { - return (spcs_s_ocopyoutf(&kstatus, args.sdbc_ustatus, - SDBC_EGLMUL_SIZE)); - } - rc = sd_get_glmul_sizes((void *)args.arg0); - break; - - /* get the global info about write blocks */ - case SDBC_GET_GLMUL_INFO: - if (_sd_cache_initialized == 0) { - - return (spcs_s_ocopyoutf(&kstatus, args.sdbc_ustatus, - SDBC_EGLMUL_INFO)); - - } - rc = sd_get_glmul_info((void *)args.arg0); - break; - - case SDBC_SET_CD_HINT: - if (_sd_cache_initialized == 0) - return (spcs_s_ocopyoutf(&kstatus, - args.sdbc_ustatus, EINVAL)); - rc = ((args.arg2) ? - _sd_set_hint((int)args.arg0, (uint_t)args.arg1) : - _sd_clear_hint((int)args.arg0, (uint_t)args.arg1)); - return (spcs_s_ocopyoutf(&kstatus, args.sdbc_ustatus, rc)); - - case SDBC_GET_CD_HINT: - { - uint_t hint; - - if (_sd_cache_initialized == 0) - return (spcs_s_ocopyoutf(&kstatus, - args.sdbc_ustatus, EINVAL)); - if ((rc = _sd_get_cd_hint((int)args.arg0, &hint)) == 0) - *rvp = hint; - return (spcs_s_ocopyoutf(&kstatus, args.sdbc_ustatus, - rc)); - } - - case SDBC_SET_NODE_HINT: - rc = ((args.arg1) ? _sd_set_node_hint((uint_t)args.arg0) : - _sd_clear_node_hint((uint_t)args.arg0)); - if (rc) - return (spcs_s_ocopyoutf(&kstatus, args.sdbc_ustatus, - rc)); - /* FALLTHRU */ - case SDBC_GET_NODE_HINT: - { - uint_t hint; - if ((rc = _sd_get_node_hint(&hint)) == 0) - *rvp = hint; - return (spcs_s_ocopyoutf(&kstatus, args.sdbc_ustatus, - rc)); - } - - case SDBC_STATS: - rc = _sd_get_stats((void *)args.arg0, convert_32); - return (spcs_s_ocopyoutf(&kstatus, args.sdbc_ustatus, rc)); - - case SDBC_ZAP_STATS: - _sd_zap_stats(); - return (spcs_s_ocopyoutf(&kstatus, args.sdbc_ustatus, 0)); - - case SDBC_GET_CD_BLK: - if (_sd_cache_initialized == 0) - return (spcs_s_ocopyoutf(&kstatus, - args.sdbc_ustatus, EINVAL)); - rc = sdbc_get_cd_blk(&args, mode); - break; - - case SDBC_GET_CONFIG: - { - _sdbc_config_t sdbc_config_info; - - if (ddi_copyin((void *)args.arg0, - &sdbc_config_info, - sizeof (_sdbc_config_t), - mode)) { - spcs_s_kfree(kstatus); - return (EFAULT); - } - rc = _sdbc_get_config(&sdbc_config_info); - (void) ddi_copyout(&sdbc_config_info, - (void *)args.arg0, - sizeof (_sdbc_config_t), - mode); - return (spcs_s_ocopyoutf(&kstatus, args.sdbc_ustatus, rc)); - } - - case SDBC_SET_CONFIG: - { - _sdbc_config_t mgmt_config_info; - - if (ddi_copyin((void *)args.arg0, - &mgmt_config_info, - sizeof (_sdbc_config_t), - mode)) { - spcs_s_kfree(kstatus); - return (EFAULT); - } - - rc = _sdbc_configure(NULL, &mgmt_config_info, kstatus); - if (rc && rc != EALREADY) { - (void) _sdbc_deconfigure(kstatus); - return (spcs_s_ocopyoutf - (&kstatus, args.sdbc_ustatus, rc)); - } - - return (spcs_s_ocopyoutf(&kstatus, args.sdbc_ustatus, rc)); - } - - case SDBC_MAXFILES: - if (copyout(&sdbc_max_devs, (void *)args.arg0, - sizeof (sdbc_max_devs))) - rc = EFAULT; - else - rc = 0; - - break; - - case SDBC_VERSION: - { - cache_version_t cache_version; - - cache_version.major = sdbc_major_rev; - cache_version.minor = sdbc_minor_rev; - cache_version.micro = sdbc_micro_rev; - cache_version.baseline = sdbc_baseline_rev; - - if (ddi_copyout(&cache_version, (void *)args.arg0, - sizeof (cache_version_t), mode)) { - rc = EFAULT; - break; - } - - break; - } - - -#ifdef DEBUG - /* toggle flusher flag for testing */ - case SDBC_TOGGLE_FLUSH: - _sdbc_flush_flag ^= 1; - *rvp = _sdbc_flush_flag; - rc = 0; - - return (spcs_s_ocopyoutf(&kstatus, args.sdbc_ustatus, - SDBC_ETOGGLE_FLUSH, _sdbc_flush_flag ? "on" : "off")); - - - /* inject i/o errors */ - case SDBC_INJ_IOERR: /* cd, errnum */ - if (_sd_cache_initialized == 0) - return (spcs_s_ocopyoutf(&kstatus, - args.sdbc_ustatus, EINVAL)); - rc = _sdbc_inject_ioerr(args.arg0, args.arg1, args.arg2); - break; - - /* clear injected i/o errors */ - case SDBC_CLR_IOERR: /* cd */ - if (_sd_cache_initialized == 0) - return (spcs_s_ocopyoutf(&kstatus, - args.sdbc_ustatus, EINVAL)); - rc = _sdbc_clear_ioerr(args.arg0); - break; - -#endif /* DEBUG */ - default: - _sd_print(3, "!SDBC unknown ioctl: 0x%x unsupported", cmd); - rc = EINVAL; - break; - } - - spcs_s_kfree(kstatus); - return (rc); -} - - -/* - * _sd_timed_block - sleep waiting for ticks time delay. - * ticks - # of ticks to sleep - * cvp - pointer to the cv we wait on while we delay. - * - * NO spin locks can be held at entry! - * - */ -void -_sd_timed_block(clock_t ticks, kcondvar_t *cvp) -{ - mutex_enter(&_sd_block_lk); - (void) cv_reltimedwait(cvp, &_sd_block_lk, ticks, TR_CLOCK_TICK); - mutex_exit(&_sd_block_lk); -} - - -/* - * _sd_unblock - awake a sleeper waiting on cv pointed to by cvp. - * - * NO spin locks can be held at entry as we may sleep. - * - */ -void -_sd_unblock(kcondvar_t *cvp) -{ - - mutex_enter(&_sd_block_lk); - cv_broadcast(cvp); - mutex_exit(&_sd_block_lk); -} - -/* ARGSUSED */ -void -_sd_data_log(int num, _sd_cctl_t *centry, nsc_off_t st, nsc_size_t len) -{ -#if defined(_SD_FBA_DATA_LOG) - nsc_size_t i; - nsc_off_t blk; - - blk = BLK_TO_FBA_NUM(CENTRY_BLK(centry)); - for (i = st; i < (st + len); i++) - SDTRACE(num, CENTRY_CD(centry), 1, blk + i, - *(int *)(centry->cc_data + FBA_SIZE(i)), - *(int *)(centry->cc_data + FBA_SIZE(i) + 4)); -#endif /* _SD_FBA_DATA_LOG */ -} - -/* ARGSUSED */ -void -_sd_data_log_chain(int num, _sd_cctl_t *centry, nsc_off_t fba_pos, - nsc_size_t fba_len) -{ -#if defined(_SD_FBA_DATA_LOG) - sdbc_cblk_fba_t st_cblk_len; /* FBA len of starting cache block */ - sdbc_cblk_fba_t end_cblk_len; /* FBA len of ending cache block */ - sdbc_cblk_fba_t st_cblk_off; /* FBA offset into starting cblock */ - - while (CENTRY_BLK(centry) != FBA_TO_BLK_NUM(fba_pos)) - centry = centry->cc_chain; - - st_cblk_off = BLK_FBA_OFF(fba_pos); - st_cblk_len = BLK_FBAS - st_cblk_off; - if (st_cblk_len >= fba_len) { - end_cblk_len = 0; - st_cblk_len = fba_len; - } else { - end_cblk_len = BLK_FBA_OFF(fba_pos + fba_len); - } - - DATA_LOG(num, centry, st_cblk_off, st_cblk_len); - - fba_len -= st_cblk_len; - centry = centry->cc_chain; - - while (fba_len > end_cblk_len) { - DATA_LOG(num, centry, 0, BLK_FBAS); - fba_len -= BLK_FBAS; - centry = centry->cc_chain; - } - if (end_cblk_len) DATA_LOG(num, centry, 0, end_cblk_len); -#endif /* _SD_FBA_DATA_LOG */ -} - - -void -_sd_zap_stats(void) -{ - int i; - - if (_sd_cache_stats == NULL) - return; - - _sd_cache_stats->st_rdhits = 0; - _sd_cache_stats->st_rdmiss = 0; - _sd_cache_stats->st_wrhits = 0; - _sd_cache_stats->st_wrmiss = 0; - _sd_lru_q.sq_noreq_stat = 0; - _sd_lru_q.sq_req_stat = 0; - - for (i = 0; i < sdbc_max_devs; i++) { - _sd_cache_stats->st_shared[i].sh_cache_read = 0; - _sd_cache_stats->st_shared[i].sh_cache_write = 0; - _sd_cache_stats->st_shared[i].sh_disk_read = 0; - _sd_cache_stats->st_shared[i].sh_disk_write = 0; - } -} - - -/* - * Return the cache sizes used by the Sense Subsystem Status CCW - */ -int -_sd_cache_sizes(int *asize, int *wsize) -{ - int psize; - - *asize = 0; - *wsize = 0; - - /* - * add in the total cache size and the - * non-volatile (battery-backed) cache size. - */ - if (_sd_net_config.sn_configured) { - psize = _sd_net_config.sn_psize; - *asize += (_sd_net_config.sn_cpages * psize); - *wsize += (safestore_config.ssc_wsize); - } - - return (0); -} - - -/*PRINTFLIKE2*/ -void -_sd_print(int level, char *fmt, ...) -{ - va_list adx; - if (level <= _sd_debug_level) { - va_start(adx, fmt); - vcmn_err(CE_NOTE, fmt, adx); - va_end(adx); - - } -} - - -int -_sd_get_cd_blk(int cd, nsc_off_t cblk, _sd_cctl_t **cc, caddr_t *data, - char **filename) -{ - _sd_cctl_t *cc_ent; - - if (FILE_OPENED(cd) != 0) { - *filename = _sd_cache_files[cd].cd_info->sh_filename; - if (cc_ent = (_sd_cctl_t *) - _sd_hash_search(cd, cblk, _sd_htable)) { - *cc = cc_ent; - *data = (caddr_t)cc_ent->cc_data; - return (0); - } - } - return (-1); -} - -/* - * central dyn mem processing vars edit rtn. - * input a local copy and xfer to global - * - * sec0,sec1,sec2 - * range check 1 to 255 (arbitrary but in any case must be <= 2000 due to - * 32bit signed int limits in later calc) - * aging_ct - * range check 1 to 255 (only 8 bits reserved for aging ctr) - * - */ -int -sdbc_edit_xfer_process_vars_dm(_dm_process_vars_t *process_vars) -{ - if (process_vars->max_dyn_list > 0) - dynmem_processing_dm.max_dyn_list = process_vars->max_dyn_list; - - /* no edit on monitor_dynmem_process */ - dynmem_processing_dm.monitor_dynmem_process = - process_vars->monitor_dynmem_process; - /* no edit on process_directive */ - dynmem_processing_dm.process_directive = - process_vars->process_directive; - - if (process_vars->cache_aging_ct1 > 0 && - process_vars->cache_aging_ct1 <= CACHE_AGING_CT_MAX) - dynmem_processing_dm.cache_aging_ct1 = - process_vars->cache_aging_ct1; - if (process_vars->cache_aging_ct2 > 0 && - process_vars->cache_aging_ct2 <= CACHE_AGING_CT_MAX) - dynmem_processing_dm.cache_aging_ct2 = - process_vars->cache_aging_ct2; - if (process_vars->cache_aging_ct3 > 0 && - process_vars->cache_aging_ct3 <= CACHE_AGING_CT_MAX) - dynmem_processing_dm.cache_aging_ct3 = - process_vars->cache_aging_ct3; - if (process_vars->cache_aging_sec1 > 0 && - process_vars->cache_aging_sec1 <= CACHE_AGING_SEC1_MAX) - dynmem_processing_dm.cache_aging_sec1 = - process_vars->cache_aging_sec1; - if (process_vars->cache_aging_sec2 > 0 && - process_vars->cache_aging_sec2 <= CACHE_AGING_SEC2_MAX) - dynmem_processing_dm.cache_aging_sec2 = - process_vars->cache_aging_sec2; - if (process_vars->cache_aging_sec3 > 0 && - process_vars->cache_aging_sec3 <= CACHE_AGING_SEC3_MAX) - dynmem_processing_dm.cache_aging_sec3 = - process_vars->cache_aging_sec3; - if (process_vars->cache_aging_pcnt1 >= 0 && - process_vars->cache_aging_pcnt1 <= CACHE_AGING_PCNT1_MAX) - dynmem_processing_dm.cache_aging_pcnt1 = - process_vars->cache_aging_pcnt1; - if (process_vars->cache_aging_pcnt2 >= 0 && - process_vars->cache_aging_pcnt2 <= CACHE_AGING_PCNT2_MAX) - dynmem_processing_dm.cache_aging_pcnt2 = - process_vars->cache_aging_pcnt2; - if (process_vars->max_holds_pcnt >= 0 && - process_vars->max_holds_pcnt <= MAX_HOLDS_PCNT_MAX) - dynmem_processing_dm.max_holds_pcnt = - process_vars->max_holds_pcnt; - return (0); -} - -dev_info_t * -sdbc_get_dip() -{ - return (dev_dip); -} diff --git a/usr/src/uts/common/avs/ns/sdbc/sd_misc.h b/usr/src/uts/common/avs/ns/sdbc/sd_misc.h deleted file mode 100644 index aed864d82d..0000000000 --- a/usr/src/uts/common/avs/ns/sdbc/sd_misc.h +++ /dev/null @@ -1,65 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _SD_MISC_H -#define _SD_MISC_H - -#ifdef __cplusplus -extern "C" { -#endif - -#define _SD_FIFO_WAIT 1000 -#define _SD_FIFO_WSPIN 100 -#ifdef _KERNEL - -extern _dm_process_vars_t dynmem_processing_dm; - -extern int sdbc_wrthru_len; -extern nsc_size_t sdbc_max_fbas; -extern int sdbc_max_devs; - - -extern int _init(void); -extern void _sd_data_log(int num, _sd_cctl_t *centry, nsc_off_t st, - nsc_size_t len); -extern void _sd_data_log_chain(int num, _sd_cctl_t *centry, nsc_off_t fba_pos, - nsc_size_t fba_len); -extern int _sd_reflect_ignore(ucaddr_t from, ucaddr_t to, int size); -extern int _sd_reflect(ucaddr_t from, ucaddr_t to, int size, int flag); -extern void _sd_timed_block(clock_t ticks, kcondvar_t *cvp); -extern void _sd_unblock(kcondvar_t *cvp); -extern void _sd_zap_stats(void); -extern int _sd_cache_sizes(int *asize, int *wsize); -extern void _sd_print(int level, char *fmt, ...); -extern int _sd_get_cd_blk(int cd, nsc_off_t blk, _sd_cctl_t **cc, caddr_t *data, - char **filename); - -#endif /* _KERNEL */ - -#ifdef __cplusplus -} -#endif - -#endif /* _SD_MISC_H */ diff --git a/usr/src/uts/common/avs/ns/sdbc/sd_mkiob.sh b/usr/src/uts/common/avs/ns/sdbc/sd_mkiob.sh deleted file mode 100644 index 276261b03b..0000000000 --- a/usr/src/uts/common/avs/ns/sdbc/sd_mkiob.sh +++ /dev/null @@ -1,77 +0,0 @@ -#!/usr/bin/sh -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License (the "License"). -# You may not use this file except in compliance with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -# Copyright 2008 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# -# Build-time script to generate the sd_iob_implX.c files. -# -START="$1" -END="$2" -FILE="$3" - -awk ' -/#define.*_SD_DEFAULT_IOBUFS/ { - num = $3; -printf("/* start = %d, end = %d, num %d */\n", start, end, num); - if (num > end) { - num = end; - } -printf("/* start = %d, end = %d, num %d */\n", start, end, num); -} - -END { - printf("/* start = %d, end = %d, num %d */\n", start, end, num); - printf("#include <sys/types.h>\n"); - printf("#include <sys/param.h>\n"); - printf("#include <sys/ksynch.h>\n"); - printf("#include <sys/kmem.h>\n"); - printf("#include <sys/stat.h>\n"); - printf("#include <sys/buf.h>\n"); - printf("#include <sys/open.h>\n"); - printf("#include <sys/conf.h>\n"); - printf("#include <sys/file.h>\n"); - printf("#include <sys/cmn_err.h>\n"); - printf("#include <sys/errno.h>\n"); - printf("#include <sys/debug.h>\n"); - printf("#include <sys/ddi.h>\n"); - printf("#include <sys/nsc_thread.h>\n"); - printf("#include <sys/nsctl/sd_bcache.h>\n"); - printf("#include <sys/nsctl/sd_trace.h>\n"); - printf("#include <ns/sdbc/sd_io.h>\n"); - printf("#include <ns/sdbc/sd_iob.h>\n"); - - n = start; - while (n < num) { - printf("IOB_DCB(%d)", n); - n = n + 1; - - if (n % 4) { - printf(" "); - } else { - printf("\n"); - if (!((n - start) % 2048) && (n < num)) - printf("static int _cscope_brkline%d;\n", n); - } - } -}' start=$START end=$END incdir=`dirname $FILE` $FILE - -exit 0 diff --git a/usr/src/uts/common/avs/ns/sdbc/sd_pcu.c b/usr/src/uts/common/avs/ns/sdbc/sd_pcu.c deleted file mode 100644 index 556a0659c9..0000000000 --- a/usr/src/uts/common/avs/ns/sdbc/sd_pcu.c +++ /dev/null @@ -1,874 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#include <sys/types.h> -#include <sys/ksynch.h> -#include <sys/cmn_err.h> -#include <sys/errno.h> -#include <sys/kmem.h> -#include <sys/cred.h> -#include <sys/buf.h> -#include <sys/ddi.h> -#include <sys/nsc_thread.h> - - -#include "sd_bcache.h" -#include "sd_trace.h" -#include "sd_io.h" -#include "sd_bio.h" -#include "sd_ft.h" -#include "sd_misc.h" -#include "sd_pcu.h" - -/* - * PCU (aka UPS) handling - - */ -#define bitmap_next cc_dirty_link -#define bitmap_tail cc_dirty_next - -#define anon_next cc_dirty_link -#define anon_tail cc_dirty_next -#define anon_data cc_data - -struct bitmap { - _sd_cctl_t *bmps; - int bmaps_per_block; - int inuse; /* In use in the _last_ block */ -}; - -#define SDBC_PCU_MAXSWAPIL 3 /* Watch for 5 fields in ioctl arg. */ - -struct swapfiles { - int nswpf; /* Number of filenames */ - int colsize; /* In cache blocks */ - char *names[SDBC_PCU_MAXSWAPIL]; -}; - -static void _sdbc_pcu_cleanup(struct swapfiles *); - -/* - * Forward declare functions containing 64-bit argument types to enforce - * type-checking. - */ -static int add_bitmap_entry(struct bitmap *bmp, _sd_bitmap_t bits, int any_fail, - nsc_off_t fba_num); -static int flush_bitmap_list(struct bitmap *bmp, dev_t dev, nsc_off_t *blkno); -static int flush_centry_list(_sd_cd_info_t *cdi, _sd_cctl_t *dirty, dev_t dev, - nsc_off_t *blkno, int failed, struct bitmap *bmaps); -static int flush_hdr(_sd_cctl_t *hdr, dev_t dev, nsc_off_t blkno); -static int flush_anon_list(_sd_cctl_t *anon_list, dev_t dev, nsc_off_t *blkno); -static void sdbc_anon_copy(caddr_t src, nsc_size_t len, _sd_cctl_t *dest, - nsc_off_t dest_off); -static void sdbc_anon_get(_sd_cctl_t *src, nsc_off_t src_off, caddr_t dest, - nsc_size_t len); -static _sd_cctl_t *sdbc_get_anon_list(nsc_size_t bytes); - -static int got_hint; /* did we capture hint at power_lost */ -static unsigned int wrthru_hint; /* saved hint at power_lost */ -static int saw_power_lost; - -char _sdbc_shutdown_in_progress; -static struct swapfiles swfs; - -/* - * sdbc_get_anon_list - allocate a set of anonymous cache block - * entries that can pretend to be a single blocks of data holding - * a virtual character array holding "bytes" entries. - * - * returns - the cache block heading the chain. - */ -static _sd_cctl_t * -sdbc_get_anon_list(nsc_size_t bytes) -{ - _sd_cctl_t *list, *prev; - nsc_size_t i, blks; - - prev = NULL; - blks = (bytes + CACHE_BLOCK_SIZE - 1) / CACHE_BLOCK_SIZE; - for (i = 0; i < blks; i++) { - - list = sdbc_centry_alloc_blks(_CD_NOHASH, 0, 1, 0); - bzero(list->cc_data, CACHE_BLOCK_SIZE); - list->anon_next = prev; - prev = list; - }; - - return (list); -} - -/* - * sdbc_anon_get - gets "len" bytes of data virtual character array represented - * by "src" begining at index "dest_off" and copy to buffer "dest". - * - * dest - pointer to our virtual array (chain of cache blocks). - * dest_off - first location to copy data to. - * src - pointer to data to copy - * len - the number of bytes of data to copy - * - */ -static void -sdbc_anon_get(_sd_cctl_t *src, nsc_off_t src_off, caddr_t dest, nsc_size_t len) -{ - nsc_size_t i; - nsc_size_t nlen; - nsc_off_t blk_start, blk_end; - - if (len == 0) - return; - - blk_start = src_off / CACHE_BLOCK_SIZE; - blk_end = (src_off + len) / CACHE_BLOCK_SIZE; - - for (i = 0; i < blk_start; i++) { - src = src->anon_next; - src_off -= CACHE_BLOCK_SIZE; - } - - nlen = min(len, CACHE_BLOCK_SIZE - src_off); - bcopy(&src->anon_data[src_off], dest, (size_t)nlen); - - for (i = 1; i < blk_end - blk_start; i++) { - bcopy(src->anon_data, &dest[nlen], (size_t)CACHE_BLOCK_SIZE); - nlen += CACHE_BLOCK_SIZE; - src = src->anon_next; - } - if (nlen != len) { - bcopy(src->anon_data, &dest[nlen], (size_t)(len - nlen)); - } -} - -/* - * sdbc_anon_copy - copies "len" bytes of data from "src" to the - * virtual character array represented by "dest" begining at index - * "dest_off". - * - * src - pointer to data to copy - * len - the number of bytes of data to copy - * dest - pointer to our virtual array (chain of cache blocks). - * dest_off - first location to copy data to. - * - */ -static void -sdbc_anon_copy(caddr_t src, nsc_size_t len, _sd_cctl_t *dest, - nsc_off_t dest_off) -{ - nsc_size_t i; - nsc_size_t nlen; - nsc_off_t blk_start, blk_end; - - if (len == 0) - return; - - blk_start = dest_off / CACHE_BLOCK_SIZE; - blk_end = (dest_off + len) / CACHE_BLOCK_SIZE; - - for (i = 0; i < blk_start; i++) { - dest = dest->anon_next; - dest_off -= CACHE_BLOCK_SIZE; - } - - nlen = min(len, CACHE_BLOCK_SIZE - dest_off); - bcopy(src, &dest->anon_data[dest_off], (size_t)nlen); - - for (i = 1; i < blk_end - blk_start; i++) { - bcopy(&src[nlen], dest->anon_data, (size_t)CACHE_BLOCK_SIZE); - nlen += CACHE_BLOCK_SIZE; - dest = dest->anon_next; - } - if (nlen != len) { - bcopy(&src[nlen], dest->anon_data, (size_t)(len - nlen)); - } -} - -/* - * flush_anon_list - flush a chain of anonymous cache blocks - * to the state file. Anonymous chains of cache blocks represent - * virtual arrays for the state flushing code and can contain - * various types of data. - * - * anon_list - chain of cache blocks to flush. - * - * dev - the state file device - * - * blkno - on input the cache block number to begin writing at. - * On exit the next cache block number following the data - * just written. - * - * returns - 0 on success, error number on failure. - */ -static int -flush_anon_list(_sd_cctl_t *anon_list, - dev_t dev, - nsc_off_t *blkno) -{ - struct buf *bp; - int rc; - _sd_cctl_t *prev; - nsc_size_t bcnt; - - if (anon_list == NULL) - return (0); - - bcnt = 0; - do { - bp = sd_alloc_iob(dev, BLK_TO_FBA_NUM(*blkno), - BLK_TO_FBA_NUM(1), 0); - sd_add_fba(bp, &anon_list->cc_addr, 0, BLK_FBAS); - rc = sd_start_io(bp, NULL, NULL, 0); - (*blkno)++; - - /* - * A failure here is death. This is harsh but not sure - * what else to do - */ - - if (rc != NSC_DONE) - return (rc); - bcnt++; - - prev = anon_list; - anon_list = anon_list->anon_next; - _sd_centry_release(prev); - - } while (anon_list); - - cmn_err(CE_CONT, "sdbc(flush_anon_list) %" NSC_SZFMT "\n", bcnt); - return (0); - -} - -/* - * start_bitmap_list - allocate an anonymous cache block entry - * to anchor a chain of cache blocks representing a virtual - * array of bitmap entries. - * - * returns - the cache block heading the chain. - */ -static void -start_bitmap_list(struct bitmap *bmp, int bpb) -{ - _sd_cctl_t *list; - - list = sdbc_centry_alloc_blks(_CD_NOHASH, 0, 1, 0); - bzero(list->cc_data, CACHE_BLOCK_SIZE); - list->bitmap_next = NULL; - list->bitmap_tail = list; - - bmp->bmps = list; - bmp->inuse = 0; - bmp->bmaps_per_block = bpb; -} - -/* - * add_bitmap_entry - Add a bitmap entry to the chain of bitmap - * entries we are creating for cd's entry in the state file. - * - * Bitmaps are stored in a chain of anonymous cache blocks. Each - * cache block can hold bmaps_per_block in it. As each block is - * filled a new block is added to the tail of the chain. - * - * list - the chain of cache blocks containing the bitmaps. - * bits - the bitmap entry to add. - * any_fail - flag saying whether the data corresponding to this - * bitmap entry had previously failed going to disk. - * fba_num - FBA number corresponding to the entry. - * - * returns - 0 on success, error number on failure. - */ -static int -add_bitmap_entry(struct bitmap *bmp, - _sd_bitmap_t bits, int any_fail, nsc_off_t fba_num) -{ - sdbc_pwf_bitmap_t *bmap; - _sd_cctl_t *list = bmp->bmps; - int i; - - bmap = (sdbc_pwf_bitmap_t *)list->bitmap_tail->cc_data; - if (bmp->inuse == bmp->bmaps_per_block) { - _sd_cctl_t *nlist; - - nlist = sdbc_centry_alloc_blks(_CD_NOHASH, 0, 1, 0); - bzero(nlist->cc_data, CACHE_BLOCK_SIZE); - nlist->bitmap_next = NULL; - nlist->bitmap_tail = NULL; - list->bitmap_tail->bitmap_next = nlist; - list->bitmap_tail = nlist; - bmp->inuse = 0; - } - i = bmp->inuse++; - bmap->bitmaps[i].fba_num = fba_num; - bmap->bitmaps[i].dirty = bits; - bmap->bitmaps[i].errs = (char)any_fail; - - return (0); -} - -/* - * flush_bitmap_list - flush a chain of anonymous cache blocks - * containing the dirty/valid bitmaps for a set of cache blocks. - * - * b_list - the chain of bitmap data. - * dev - the state file device. - * blkno - on input the cache block number to begin writing at. - * On exit the next cache block number following the data - * just written. - * - * returns - 0 on success, error number on failure. - */ -static int -flush_bitmap_list(struct bitmap *bmp, dev_t dev, nsc_off_t *blkno) -{ - _sd_cctl_t *b_list; - struct buf *bp; - int rc; - _sd_cctl_t *prev; - int bcnt = 0; /* P3 temp */ - - if ((b_list = bmp->bmps) == NULL) - return (0); - - do { - bp = sd_alloc_iob(dev, BLK_TO_FBA_NUM(*blkno), - BLK_TO_FBA_NUM(1), 0); - sd_add_fba(bp, &b_list->cc_addr, 0, BLK_FBAS); - rc = sd_start_io(bp, NULL, NULL, 0); - (*blkno)++; - - /* - * A failure here is death. This is harsh but not sure - * what else to do - */ - - if (rc != NSC_DONE) - return (rc); - bcnt++; - - prev = b_list; - b_list = b_list->bitmap_next; - _sd_centry_release(prev); - - } while (b_list); - cmn_err(CE_CONT, "sdbc(flush_bitmap_list) %d\n", bcnt); /* P3 */ - - return (0); - -} - -/* - * flush_centry_list - flush a chain of cache blocks for the - * cache descriptor described by "cdi" to the state file. - * In addition the bitmaps describing the validity and dirty - * state of each entry are captured to the bitmap chain. - * - * cdi - pointer to description of the cd we are writing. - * dirty - chain of dirty cache blocks to flush (linked - * by dirty_next (sequential) and dirty_link (disjoint). - * - * dev - the state file device. - * - * blkno - on input the cache block number to begin writing at. - * On exit the next cache block number following the data - * just written. - * - * failed - a flag noting whether these blocks had already - * been attempted to write to their true destination and - * failed. (i.e. is the chain from fail_head). - * - * bmaps - a chain of anonymous cache blocks containing all - * the dirty/valid bitmaps for the cache blocks we write. - * - * returns - 0 on success, error number on failure. - */ -static int -flush_centry_list(_sd_cd_info_t *cdi, - _sd_cctl_t *dirty, - dev_t dev, - nsc_off_t *blkno, - int failed, - struct bitmap *bmaps) -{ - _sd_cctl_t *cc_ent; - nsc_size_t count; /* count of cache blocks in a sequential chain */ - struct buf *bp; - int rc; - int bcnt = 0; - - if (dirty == NULL) - return (0); - - mutex_enter(&cdi->cd_lock); - - do { - /* - * each cache block is written to the disk regardless of its - * valid/dirty masks. - */ - count = 0; - cc_ent = dirty; - do { - count++; - cc_ent = cc_ent->cc_dirty_next; - } while (cc_ent); - - bp = sd_alloc_iob(dev, BLK_TO_FBA_NUM(*blkno), - BLK_TO_FBA_NUM(count), 0); - - cc_ent = dirty; - do { - sd_add_fba(bp, &cc_ent->cc_addr, 0, BLK_FBAS); - rc = add_bitmap_entry(bmaps, - cc_ent->cc_dirty | cc_ent->cc_toflush, failed, - BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent))); - if (rc) - return (rc); - cc_ent = cc_ent->cc_dirty_next; - } while (cc_ent); - - *blkno += count; - rc = sd_start_io(bp, NULL, NULL, 0); - - /* - * A failure here is death. This is harsh but not sure - * what else to do - */ - - if (rc != NSC_DONE) - return (rc); - bcnt += count; - - dirty = dirty->cc_dirty_link; - } while (dirty); - cmn_err(CE_CONT, "sdbc(flush_centry_list) %d\n", bcnt); /* P3 */ - - mutex_exit(&cdi->cd_lock); - return (0); -} - -/* - * flush_hdr - Flush the state file header to the disk partition - * "dev" at FBA "blkno". Return the result of the i/o operation. - * hdr - a cache block containing the header. - * dev - the state file device. - * blkno - cache block position to write the header. - * - * returns - 0 on success, error number on failure. - */ -static int -flush_hdr(_sd_cctl_t *hdr, dev_t dev, nsc_off_t blkno) -{ - struct buf *bp; - int rc; - - bp = sd_alloc_iob(dev, BLK_TO_FBA_NUM(blkno), BLK_TO_FBA_NUM(1), 0); - sd_add_fba(bp, &hdr->cc_addr, 0, BLK_FBAS); - rc = sd_start_io(bp, NULL, NULL, 0); - _sd_centry_release(hdr); - return (rc); - -} - -/* - * _sdbc_power_flush - flushd the state of sdbc to the state "file" - * on the system disk. All dirty blocks (in progress, unscheduled, - * failed) are written along with the bitmap for each block. The - * data is written using normal sdbc i/o via anonymous cache blocks. - * This is done to simplify the job here (and to limit memory - * requests) at the expense of making the recovery programs more - * complex. Since recovery is done at user level this seems to be - * a good trade off. - * - * Returns: 0 on success, error number on failure. - */ -static int -_sdbc_power_flush(void) -{ - _sd_cctl_t *name_pool; - int string_size; - - sdbc_pwf_hdr_t *hdr; - _sd_cctl_t *hdrblk; - - struct bitmap bmap; - - _sd_cd_info_t *cdi; - int open_files; - _sd_cctl_t *file_pool; - sdbc_pwf_desc_t current; - - nsc_fd_t *state_fd; - dev_t state_rdev; - int devmaj, devmin; - nsc_off_t blkno; - long len; - long total_len; - int pending; - int rc = 0; - - /* - * Force wrthru just in case SLM software didn't really send us a - * warning. (Also makes for easier testing) - */ - (void) _sd_set_node_hint(NSC_FORCED_WRTHRU); - /* disable all (dangerous) cache entry points */ - - cmn_err(CE_CONT, "sdbc(sdbc_power_flush) hint set..\n"); /* P3 */ - - _sdbc_shutdown_in_progress = 1; - -#if 0 - if (sdbc_io && (rc = nsc_unregister_io(sdbc_io, NSC_PCATCH)) != 0) { - /* - * this is bad, in theory we could just busy-out all our - * interfaces and continue. - */ - cmn_err(CE_WARN, - "sdbc(_sdbc_power_flush) couldn't unregister i/o %d", rc); - return (rc); - } - - sdbc_io = NULL; -#endif - - /* wait for all i/o to finish/timeout ? */ - - if ((pending = _sdbc_wait_pending()) != 0) - cmn_err(CE_NOTE, "sdbc(_sdbc_power_flush) %d I/Os were" - " pending at power shutdown", pending); - - cmn_err(CE_CONT, "sdbc(sdbc_power_flush) over pending\n"); /* P3 */ - - /* prevent any further async flushing */ - - _sdbc_flush_deconfigure(); - - /* - * At this point no higher level clients should be able to get thru. - * Failover i/o from the other node is our only other concern as - * far as disturbing the state of sdbc. - */ - - /* figure out the names for the string pool */ - - string_size = 0; - open_files = 0; - cdi = _sd_cache_files; - do { - - if (cdi->cd_info == NULL) - continue; - if (cdi->cd_info->sh_alloc == 0) - continue; - open_files++; - string_size += strlen(cdi->cd_info->sh_filename) + 1; - } while (++cdi != &_sd_cache_files[sdbc_max_devs]); - - if (open_files == 0) { - return (0); - } - - hdrblk = sdbc_centry_alloc_blks(_CD_NOHASH, 0, 1, 0); - bzero(hdrblk->cc_data, CACHE_BLOCK_SIZE); - hdr = (sdbc_pwf_hdr_t *)hdrblk->cc_data; - hdr->magic = SDBC_PWF_MAGIC; - hdr->alignment = CACHE_BLOCK_SIZE; - hdr->cd_count = open_files; - /* XXX bmap_size is redundant */ - hdr->bmap_size = CACHE_BLOCK_SIZE / sizeof (sdbc_pwf_bitmap_t); - - name_pool = sdbc_get_anon_list(string_size); - file_pool = sdbc_get_anon_list(sizeof (sdbc_pwf_desc_t) * open_files); - - open_files = 0; - cdi = _sd_cache_files; - total_len = 0; - do { - - if (cdi->cd_info == NULL) - continue; - if (cdi->cd_info->sh_alloc == 0) - continue; - - len = strlen(cdi->cd_info->sh_filename) + 1; - - /* copy the name to string pool */ - sdbc_anon_copy(cdi->cd_info->sh_filename, - len, name_pool, total_len); - - bzero(¤t, sizeof (current)); - current.name = total_len; - sdbc_anon_copy((caddr_t)¤t, sizeof (current), file_pool, - open_files * sizeof (sdbc_pwf_desc_t)); - - open_files++; - total_len += len; - - } while (++cdi != &_sd_cache_files[sdbc_max_devs]); - - /* flush dirty data */ - - if (swfs.nswpf == 0 || swfs.names[0] == NULL) { - cmn_err(CE_WARN, "sdbc(_sdbc_power_flush): State file" - " is not configured"); - rc = ENODEV; - goto cleanup; - } - - if (!(state_fd = - nsc_open(swfs.names[0], NSC_DEVICE, NULL, NULL, &rc)) || - !nsc_getval(state_fd, "DevMaj", (int *)&devmaj) || - !nsc_getval(state_fd, "DevMin", (int *)&devmin)) { - if (state_fd) { - (void) nsc_close(state_fd); - } - /* - * We are hosed big time. We can't get device to write the - * state file opened. - */ - cmn_err(CE_WARN, "sdbc(_sdbc_power_flush): Couldn't " - "open %s for saved state file", swfs.names[0]); - rc = EIO; - goto cleanup; - } - - state_rdev = makedevice(devmaj, devmin); - - blkno = 1; - - hdr->string_pool = blkno; - rc = flush_anon_list(name_pool, state_rdev, &blkno); - - hdr->descriptor_pool = blkno; - rc = flush_anon_list(file_pool, state_rdev, &blkno); - - /* - * iterate across all devices, flushing the data and collecting bitmaps - */ - - open_files = 0; - for (cdi = _sd_cache_files; - cdi != &_sd_cache_files[sdbc_max_devs]; cdi++) { - nsc_off_t blk2; - nsc_off_t fp_off; - - if (cdi->cd_info == NULL) - continue; - if (cdi->cd_info->sh_alloc == 0) - continue; - - /* retrieve the file description so we can update it */ - fp_off = (open_files++) * sizeof (sdbc_pwf_desc_t); - sdbc_anon_get(file_pool, fp_off, - (caddr_t)¤t, sizeof (current)); - - current.blocks = blkno; - - if (cdi->cd_io_head) { - /* - * Need to wait for this to timeout? - * Seems like worst case we just write the data twice - * so we should be ok. - */ - /*EMPTY*/ - ; - } - - start_bitmap_list(&bmap, hdr->bmap_size); - - /* Flush the enqueued dirty data blocks */ - - (void) flush_centry_list(cdi, cdi->cd_dirty_head, state_rdev, - &blkno, 0, &bmap); - cdi->cd_dirty_head = NULL; - cdi->cd_dirty_tail = NULL; - - /* Flush the failed dirty data blocks */ - - (void) flush_centry_list(cdi, cdi->cd_fail_head, state_rdev, - &blkno, 1, &bmap); - cdi->cd_fail_head = NULL; - - /* - * Flush the in progress dirty data blocks. These really should - * really be null by now. Worst case we write the data again - * on recovery as we know the dirty masks won't change since - * flusher is stopped. - */ - - (void) flush_centry_list(cdi, cdi->cd_io_head, state_rdev, - &blkno, 0, &bmap); - cdi->cd_io_head = NULL; - cdi->cd_io_tail = NULL; - - current.bitmaps = blkno; - current.nblocks = blkno - current.blocks; - - (void) flush_bitmap_list(&bmap, state_rdev, &blkno); - - /* update the current cd's file description */ - sdbc_anon_copy((caddr_t)¤t, sizeof (current), file_pool, - fp_off); - - blk2 = hdr->descriptor_pool; - rc = flush_anon_list(file_pool, state_rdev, &blk2); - } - -#if !defined(_SunOS_5_6) - hdr->dump_time = ddi_get_time(); -#else - hdr->dump_time = hrestime.tv_sec; -#endif - /* write the header at front and back */ - (void) flush_hdr(hdrblk, state_rdev, blkno); - (void) flush_hdr(hdrblk, state_rdev, 0L); - - /* P3 */ - cmn_err(CE_CONT, "sdbc(sdbc_power_flush) %" NSC_SZFMT " total\n", - blkno); - -cleanup: - ; - return (rc); - -} - -/* - * _sdbc_power_lost - System is running on UPS power we have "rideout" - * minutes of power left prior to shutdown. Get into a state where we - * will be ready should we need to shutdown. - * - * ARGUMENTS: - * rideout - minutes of power left prior to shutdown. - */ -void -_sdbc_power_lost(int rideout) -{ - cmn_err(CE_WARN, "sdbc(_sdbc_power_lost) battery time " - "remaining %d minute(s)", rideout); - - got_hint = 1; - if (_sd_get_node_hint(&wrthru_hint)) - got_hint = 0; - - cmn_err(CE_WARN, "sdbc(_sdbc_power_lost) got hint %d " - "hint 0x%x", got_hint, wrthru_hint); - - (void) _sd_set_node_hint(NSC_FORCED_WRTHRU); - saw_power_lost = 1; -} - -/* - * _sdbc_power_ok - System is back running on mains power after - * seeing a power fail. Return to normal power up operation. - * - */ -void -_sdbc_power_ok(void) -{ - cmn_err(CE_WARN, "sdbc(_sdbc_power_ok) power ok"); - if (saw_power_lost && got_hint) { - /* - * In theory we have a race here between _sdbc_power_lost - * and here. However it is expected that power ioctls that - * cause these to be generated are sequential in nature - * so there is no race. - */ - saw_power_lost = 0; - if (wrthru_hint & _SD_WRTHRU_MASK) - (void) _sd_set_node_hint(wrthru_hint & _SD_WRTHRU_MASK); - else - (void) _sd_clear_node_hint(_SD_WRTHRU_MASK); - } -} - -/* - * _sdbc_power_down - System is running on UPS power and we must stop - * operation as the machine is now going down. Schedule a shutdown - * thread. - * - * When we return all cache activity will be blocked. - */ -void -_sdbc_power_down(void) -{ - cmn_err(CE_WARN, "sdbc(_sdbc_power_down) powering down..."); - (void) _sdbc_power_flush(); -} - -/* - * Configure safe store from the general cache configuration ioctl. - */ -int -_sdbc_pcu_config(int namec, char **namev) -{ - int i; - - if (swfs.nswpf != 0) { - /* - * This should not happen because cache protects itself - * from double configuration in sd_conf.c. - */ - cmn_err(CE_CONT, "sdbc(_sdbc_pcu_config) double " - "configuration of Safe Store\n"); - return (EINVAL); - } - swfs.colsize = 32; /* No way to configure in the general ioctl */ - - for (i = 0; i < namec; i++) { - if ((swfs.names[i] = kmem_alloc(strlen(namev[i])+1, - KM_NOSLEEP)) == NULL) { - _sdbc_pcu_cleanup(&swfs); - return (ENOMEM); - } - swfs.nswpf++; - (void) strcpy(swfs.names[i], namev[i]); - } - - return (0); -} - -/* - */ -void -_sdbc_pcu_unload() -{ - _sdbc_pcu_cleanup(&swfs); -} - -/* - * Destructor for struct swapfiles. - */ -static void -_sdbc_pcu_cleanup(struct swapfiles *swp) -{ - int i; - char *s; - - for (i = 0; i < swp->nswpf; i++) { - if ((s = swp->names[i]) != NULL) - kmem_free(s, strlen(s)+1); - swp->names[i] = NULL; - } - swp->nswpf = 0; -} diff --git a/usr/src/uts/common/avs/ns/sdbc/sd_pcu.h b/usr/src/uts/common/avs/ns/sdbc/sd_pcu.h deleted file mode 100644 index 4545aab0f4..0000000000 --- a/usr/src/uts/common/avs/ns/sdbc/sd_pcu.h +++ /dev/null @@ -1,103 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _SD_PCU_H -#define _SD_PCU_H - -/* - * All structures here are on-disk, unless specified otherwise. - * In-core stuff is hidden inside implementation modules. - */ - -#ifdef __cplusplus -extern "C" { -#endif - -/* - * Header. - */ -#define SDBC_PWF_MAGIC 0xbcbcbc01 - -typedef struct sdbc_pwf_hdr_s { - int32_t magic; /* magic number to distinguish file revs */ - int32_t alignment; /* all sections are multiples of this */ - /* a cache block is this identical size */ - int32_t bmap_size; /* number of entries in each bitmap entry */ - int32_t cd_count; /* number of devices we have data for */ - nsc_off_t string_pool; /* offset in file to pool of filenames */ - nsc_off_t descriptor_pool; /* offset in file to dbc_pwf_desc_t vector */ - int64_t dump_time; /* Timestamp == longest time_t */ -} sdbc_pwf_hdr_t; - -/* - * File description - */ -typedef struct sdbc_pwf_desc_s { - int32_t pad0; - uint32_t name; /* name + stringpool == offset of filename */ - /* the name given to nsc_open */ - nsc_off_t blocks; /* offset into swap for this device's data */ - nsc_off_t bitmaps; /* offset into swap for data bitmaps */ - /* (i.e. nothing to do with rdc bitmaps */ - uint64_t nblocks; /* number of data blocks == bitmap dimension */ - /* long rdc_data; */ /* offset to rdc data (NYI) */ -} sdbc_pwf_desc_t; - -/* - * record status section - describes the state of each cache block in the file - * - * zaitcev - XXX errs is per block, not per fba? - */ -typedef struct sdbc_pwf_rec_s { - uint32_t dirty; /* Bitmap of dirty fba'a (_sd_bitmap_t) */ - int32_t errs; /* error status per fba, needed to recover */ - /* from errors to a raidset where we must recover */ - /* from a stripe write error */ - /* (i.e. parity is bad or suspect ) */ - nsc_off_t fba_num; /* the block on the disk */ -} sdbc_pwf_rec_t; - -typedef struct sdbc_pwf_bitmap_s { - sdbc_pwf_rec_t bitmaps[1]; /* dynamic array based on cache block size */ -} sdbc_pwf_bitmap_t; - -/* - * Prototypes - */ -#ifdef _KERNEL /* XXX Split into sd_pcu_ondisk.h, sd_pcu_iface.h */ -extern char _sdbc_shutdown_in_progress; - -extern int _sdbc_pcu_config(int c, char **v); -extern void _sdbc_pcu_unload(void); -extern void _sdbc_power_lost(int rideout); -extern void _sdbc_power_ok(void); -extern void _sdbc_power_down(void); -#endif - -#ifdef __cplusplus -} -#endif - -#endif /* _SD_PCU_H */ diff --git a/usr/src/uts/common/avs/ns/sdbc/sd_tdaemon.c b/usr/src/uts/common/avs/ns/sdbc/sd_tdaemon.c deleted file mode 100644 index 1a7069ce33..0000000000 --- a/usr/src/uts/common/avs/ns/sdbc/sd_tdaemon.c +++ /dev/null @@ -1,1157 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -/* - * Routines for the Infinity Storage Device daemon - */ - -#include <sys/types.h> -#include <sys/ksynch.h> -#include <sys/cmn_err.h> -#include <sys/errno.h> -#include <sys/buf.h> -#include <sys/kmem.h> -#include <sys/cred.h> -#include <sys/ddi.h> -#include <sys/nsc_thread.h> - -#include "sd_bcache.h" -#include "sd_io.h" -#include "sd_bio.h" -#include "sd_ft.h" -#include "sd_misc.h" - -#define _INFSD_LOCAL_MEM - -#define _CD_VTRK_SIZE(cd) (dev_tsize[GET_CD_STATE(cd)] * 1024) -#define _CD_VTRK_NUM(cd, len) ((len)/_CD_VTRK_SIZE(cd)) -#define _CD_VTRK_OFF(cd, len) ((len)%(_CD_VTRK_SIZE(cd))) - -#define FILESIZE (1 << 27) /* 128 MB */ - -#define SIZEMASK 0x0000FFFF -#define _INFSD_RECORD_SIZE(ndx) REC_SIZE -#define GET_SEED(ndx) (gld[ndx] . seed & SIZEMASK) -#define MAX_CD_STS 600 -#define MAX_TDAEMONS 128 - -static char devarray[MAX_TDAEMONS][MAX_TDAEMONS*2]; -static int dev_tsize[MAX_TDAEMONS*2]; -static int dev_flag[MAX_TDAEMONS*2]; - - -/* - * sd_test options - */ -#define SD_TEST_CACHE_HIT 0x00000001 -#define SD_TEST_CACHE_MISS 0x00000002 -#define SD_TEST_CHECK_DATA 0x00000004 -#define SD_TEST_READ_ONLY 0x00000008 -#define SD_TEST_WRITE_ONLY 0x00000010 -#define SD_TEST_SEQUENTIAL 0x00000020 - -static struct cd_sts { - volatile short cd_state; - volatile char waiting; - volatile char inited; - kcondvar_t cd_blk; - volatile caddr_t asy_key; -} cd_test_sts[MAX_CD_STS]; - -#define SET_CD_STATE(cd, i) (cd_test_sts[(cd)].cd_state = (short)(i)) -#define GET_CD_STATE(cd) (cd_test_sts[(cd)].cd_state) - -static kmutex_t tdaemon_lock; -static kcondvar_t _wait_daemons; -dev_t _test_async_fail; /* fail async writes to cache dev_t */ -static volatile int test_stop; - -static int daemon_awake(int i); -static void wakeup_all_tdaemons(void); -static void _sd_idle_daemon(void); -static void _td_detach_cd(int cd); -static int _fork_test_daemon(int num_disks, int test_typ, int loop_cnt, - int from, int seed); -static void _sd_test_rwloop_seq(int i, int loops, int seed, int forw); -static int _sd_copy_pattern_to_handle(_sd_buf_handle_t *handle, - nsc_off_t fba_pos, nsc_size_t fba_len); -static int _sd_copy_handle(_sd_buf_handle_t *handle1, _sd_buf_handle_t *handle2, - nsc_off_t fba_pos1, nsc_off_t fba_pos2, nsc_size_t fba_len, int skew); -static int _sd_compare_handle(_sd_buf_handle_t *handle1, - _sd_buf_handle_t *handle2, nsc_off_t fba_pos1, nsc_off_t fba_pos2, - nsc_size_t fba_len, int skew); -static void _sd_direct_test(int c, int loop, int seed, int type); -static void set_parameters(void); -static void test_dma_loop(int net, int seg); -static int _sd_hwrite(_sd_buf_handle_t *buf, nsc_off_t fba_pos, - nsc_size_t fba_len, int flag); -static void myend(blind_t arg, nsc_off_t fba_pos, nsc_size_t fba_len, - int error); -static int test_control(int typ, int cd, nsc_off_t fba_pos, nsc_size_t fba_len); - -int -_sim_write(_sd_buf_handle_t *buf, int x) -{ - int rval; - - if (test_stop) - return (EINVAL); - rval = _sd_write(buf, buf->bh_fba_pos, buf->bh_fba_len, x); - return (rval == NSC_HIT ? NSC_DONE : rval); -} - -static int -_sd_hwrite(_sd_buf_handle_t *buf, nsc_off_t fba_pos, nsc_size_t fba_len, - int flag) -{ - int rval; - - rval = _sd_write(buf, fba_pos, fba_len, flag); - return (rval == NSC_HIT ? NSC_DONE : rval); -} - -#define _sd_allocate_buf _trk_allocate_buf -#define _sd_write _sim_write - -/* - * INF SD daemon global data - */ - -volatile int test_created; -static int _sd_daemon_created; -static int _sd_num_daemons; - -static struct gld { - volatile int type; - volatile int loop; - volatile int seed; - volatile int asleep; - kcondvar_t blk; -} gld[MAX_TDAEMONS]; - -/* - * _sdbc_tdaemon_load: cache is being loaded, initialize any global state that - * isn't configurable (lock/sv's). - */ -int -_sdbc_tdaemon_load(void) -{ - int i; - - for (i = 0; i < MAX_TDAEMONS; i++) - cv_init(&gld[i].blk, NULL, CV_DRIVER, NULL); - - mutex_init(&tdaemon_lock, NULL, MUTEX_DRIVER, NULL); - cv_init(&_wait_daemons, NULL, CV_DRIVER, NULL); - - return (0); -} -/* - * _sdbc_tdaemon_unload: cache is being unloaded. - */ -void -_sdbc_tdaemon_unload(void) -{ - int i; - - for (i = 0; i < MAX_TDAEMONS; i++) { - cv_destroy(&gld[i].blk); - } - - mutex_destroy(&tdaemon_lock); - cv_destroy(&_wait_daemons); - -} - -/* - * _sdbc_tdaemon_configure: configure the desired number of test daemons. - */ -int -_sdbc_tdaemon_configure(int num) -{ - int i; - - if (num >= MAX_TDAEMONS) - return (-1); - - for (i = 0; i < num; i++) { - cv_init(&gld[i].blk, NULL, CV_DRIVER, NULL); - } - mutex_enter(&tdaemon_lock); - test_created = 1; - test_stop = 0; - _sd_num_daemons = 0; - mutex_exit(&tdaemon_lock); - - mutex_enter(&_sd_cache_lock); - if (_sd_daemon_created == 1) { - mutex_exit(&_sd_cache_lock); - return (-1); - } - _sd_daemon_created = 1; - mutex_exit(&_sd_cache_lock); - - for (i = 0; i < num; i++) { - (void) nsc_create_process( - (void (*)(void *))_sd_idle_daemon, 0, FALSE); - } - -#ifdef DEBUG - if (num) - cmn_err(CE_NOTE, "!Starting %d SDBC test daemon(s).", num); -#endif - return (0); -} - -void -_sdbc_tdaemon_deconfigure(void) -{ - int i, running, retry = 30; - - if (_sd_num_daemons) { - _sd_daemon_created = 0; - - mutex_enter(&tdaemon_lock); - test_created = 0; - test_stop = 1; - mutex_exit(&tdaemon_lock); - - wakeup_all_tdaemons(); - while (retry--) { - delay(HZ); - running = 0; - for (i = 0; i < _sd_num_daemons; i++) - if (daemon_awake(i)) - running++; - if (running == 0) break; - } - } - for (i = 0; i < MAX_CD_STS; i++) { - cv_destroy(&cd_test_sts[i].cd_blk); - cd_test_sts[i].inited = 0; - } - _sd_num_daemons = 0; -} - - -int sind = 0; - -/* - * Globals to change test parameters - Initially added for tests written - * by Ajay - */ -#ifdef SD_TDAEMON_DEBUG -struct statis { - int cd; - nsc_size_t len; - nsc_off_t offset; - int type; -} statis[4000]; - -#define add_statis(c, l, o, t) (statis[sind].cd = (c), \ - statis[sind].len = (l), \ - statis[sind].offset = (o), \ - statis[sind].type = (t), sind++) -int -statis_upd(caddr_t adr) -{ - (void) copyout(statis, adr, sizeof (struct statis) * sind); - return (sind); -} -#endif /* SD_TDAEMON_DEBUG */ - -static int -daemon_awake(int i) -{ - if (gld[i].asleep == 2) - return (1); - return (0); -} - -static int -daemon_nexist(int i) -{ - if (gld[i].asleep == 0) - return (1); - return (0); -} - -static void -daemon_wakeup(int i) -{ -#ifdef _SD_DEBUG - cmn_err(CE_NOTE, "!unblocking %d %x", i, gld[i].blk); -#endif - mutex_enter(&tdaemon_lock); - cv_broadcast(&gld[i].blk); - mutex_exit(&tdaemon_lock); -} - - -static void -wakeup_all_tdaemons(void) -{ - int i; - - for (i = 0; i < _sd_num_daemons; i++) - daemon_wakeup(i); -} - - -static void -_sd_idle_daemon(void) -{ - int who; /* id of this daemon */ - - mutex_enter(&_sd_cache_lock); - _sd_cache_dem_cnt++; - who = _sd_num_daemons++; - mutex_exit(&_sd_cache_lock); - - /* CONSTCOND */ - while (1) { - mutex_enter(&tdaemon_lock); - gld[who].asleep = 1; -#ifdef DEBUG - cmn_err(CE_NOTE, "!%d daemon: sleeping %p", who, - (void *)&gld[who].blk); -#endif - - cv_signal(&_wait_daemons); - if (test_created == 0) { - gld[who].asleep = 0; - mutex_exit(&tdaemon_lock); - mutex_enter(&_sd_cache_lock); - _sd_cache_dem_cnt--; - mutex_exit(&_sd_cache_lock); - return; - } else { - cv_wait(&gld[who].blk, &tdaemon_lock); - mutex_exit(&tdaemon_lock); - } - - _sd_print(0, "%d daemon awake type %d loop %d seed %d", - who, gld[who].type, gld[who].loop, GET_SEED(who)); - - if (test_created == 0) { - gld[who].asleep = 0; - mutex_enter(&_sd_cache_lock); - _sd_cache_dem_cnt--; - mutex_exit(&_sd_cache_lock); - return; - } - gld[who].asleep = 2; - - switch (gld[who].type) { - - case 210: - test_dma_loop(gld[who].loop, gld[who].seed); - break; - case 323: - _sd_direct_test(who, gld[who].loop, GET_SEED(who), 0); - break; - - case 350: - _sd_test_rwloop_seq(who, gld[who].loop, GET_SEED(who), - 1); - break; - case 351: - _sd_test_rwloop_seq(who, gld[who].loop, GET_SEED(who), - 0); - break; - -#if 0 - case 400: - if (gld[who].loop >= 6) - numdevs = gld[who].loop; - break; -#endif - default: - cmn_err(CE_WARN, "!%d daemon %d type inval\n", who, - gld[who].type); - break; - } - if (test_created == 0) { - gld[who].asleep = 0; - mutex_enter(&_sd_cache_lock); - _sd_cache_dem_cnt--; - mutex_exit(&_sd_cache_lock); - return; - } - } -} - - -static void -_td_attach_cd(int cd) -{ - (void) nsc_reserve(_sd_cache_files[cd].cd_rawfd, NSC_MULTI); -} - - -static void -_td_detach_cd(int cd) -{ - nsc_release(_sd_cache_files[cd].cd_rawfd); -} - - -int -_sd_test_start(void *args, int *rvp) -{ - - register struct a { - long num; - long type; - long loop; - long from; - long seed; - } *uap = (struct a *)args; - - *rvp = _fork_test_daemon(uap->num, uap->type, uap->loop, - uap->from, uap->seed); - - return (0); -} - -static int -test_control(int typ, int cd, nsc_off_t fba_pos, nsc_size_t fba_len) -/* - * test_control - perform control operations outside of the range - * of a test. This is typically called before/after a series of - * tests to either check a result or to setup/free a device. - */ -{ - int rc = 0; - - if ((cd < 0) || (cd >= sdbc_max_devs)) - return (-1); - switch (typ) { - case 1: - rc = _sdbc_io_attach_cd((blind_t)(unsigned long)cd); - cmn_err(CE_NOTE, "!_sdbc_io_attach_cd(%d): %d", cd, rc); - break; - case 2: - rc = _sdbc_io_detach_cd((blind_t)(unsigned long)cd); - cmn_err(CE_NOTE, "!_sdbc_io_detach_cd(%d): %d", cd, rc); - break; - case 3: - _test_async_fail = _sd_cache_files[cd].cd_crdev; - cmn_err(CE_NOTE, "!async fail dev %lu (cd=%d)", - _test_async_fail, cd); - break; - case 4: - _test_async_fail = 0; - cmn_err(CE_NOTE, "!async fail cleared"); - break; -#if 0 - case 5: - _trk_alloc_flag = NSC_PINNABLE; - break; - case 6: - _trk_alloc_flag = 0; - break; -#endif - case 7: - rc = _sd_get_pinned((blind_t)(unsigned long)cd); - cmn_err(CE_NOTE, "!get_pinned(%d): %d", cd, rc); - break; - case 8: - rc = _sd_discard_pinned((blind_t)(unsigned long)cd, fba_pos, - fba_len); - cmn_err(CE_NOTE, "!discard_pinned(%d,%" NSC_SZFMT ",%" NSC_SZFMT - "): %d", cd, fba_pos, fba_len, rc); - break; - default: - cmn_err(CE_WARN, "!cache device command %d invalid\n", typ); - } - return (rc); -} - - -/* - * _fork_sd_daemon(): Fork an nunix process that periodically flushes the - * raw device buffer cache - */ - -static int -_fork_test_daemon(int num_disks, int test_typ, int loop_cnt, int from, int seed) -{ - int i; - int type; - int dowait = 0, verify = 0; - - if (num_disks == -1) { - return (test_control(test_typ, loop_cnt, from, seed)); - } - - type = test_typ; - cmn_err(CE_NOTE, - "!sd_test %d %d %d %d %d", num_disks, type, loop_cnt, from, seed); - if (type == 100) { - test_stop = 1; - return (0); - } - - if (type == 99) { - /* Set some parameters for other tests */ - switch (num_disks) { - /* Params set for this test */ -#if 0 - case 302 : - _sd_write_len = loop_cnt; - break; - case 303 : - _sd_write_len = loop_cnt; - break; - case 304 : - _sd_trk_zero = loop_cnt; - _sd_trk_size = from; - break; - case 305 : - _sd_min_blks = loop_cnt; - _sd_max_blks = from; - break; -#endif - default : - cmn_err(CE_WARN, - "!Usage : sd_test <test_num> 99" - " <param1> <param2> <param3>"); - break; - } - return (0); - } /* type == 99 */ - - if (type > 1000) { - dowait = 1; - type -= 1000; - } - if (type > 1000) { - verify = 1; - type -= 1000; - } - -again: - set_parameters(); - - for (i = from; i < (from+num_disks); i++) { - if (daemon_awake(i)) { - cmn_err(CE_WARN, "!Daemon %d awake!?", i); - return (-1); - } - if (daemon_nexist(i)) { - cmn_err(CE_WARN, "!Daemon %d nexist!?", i); - return (-1); - } - - gld[i].type = type; - gld[i].loop = loop_cnt; - gld[i].seed = seed; - daemon_wakeup(i); - } - cmn_err(CE_CONT, "!%d daemons woken (test %d)\n", num_disks, type); - if (num_disks <= 0) - return (0); - - if (dowait) { - wait: - mutex_enter(&tdaemon_lock); - if (!cv_wait_sig(&_wait_daemons, &tdaemon_lock)) { - mutex_exit(&tdaemon_lock); - test_stop = 1; - cmn_err(CE_WARN, "!Interrupt: stopping tests"); - return (-1); /* interrupt */ - } - mutex_exit(&tdaemon_lock); - - /* wait for all to stop */ - if (test_stop) - return (-1); - for (i = from; i < (from+num_disks); i++) { - if (daemon_awake(i)) - goto wait; - } - } - if (verify) { - verify = 0; - type++; /* next test */ - goto again; - } - return (0); -} - -int -_sd_test_end(void) -{ - test_created = 0; - test_stop = 1; - return (0); -} - -int -_sd_test_init(void *args) -{ - register struct a { - caddr_t addr; - long ar; - long len; - long tsize; - long flag; - } *uap = (struct a *)args; - - if (copyin(uap->addr, devarray[uap->ar], uap->len)) { - return (EFAULT); - } - dev_tsize[uap->ar] = (uap->tsize < 48) ? 48 : uap->tsize; - dev_flag[uap->ar] = uap->flag; - return (0); -} - - -typedef struct io_type { - int cd, tsize; - _sd_buf_handle_t *wbuf, *rbuf; - int len, len2, rnet, wnet; - int trk_num, trk_off; - int offset, boff; - char test_pattern; -} infnsc_io_t; - -/* static spinlock_t INFSD_iolock = { SLK_IFS_SRVR, 0 }; */ -#define _INFSD_TRK_SIZE() (64*1024) -#define _INFSD_BUF_ALIGN 512 /* Each read/write should be 512 aligned */ - -/* - * _sd_test_rwloop_seq(i,loops, seed, forw): - * - * Sequential I/O test. Writes track records sequentially, either forwards - * or backwards (forw = 1 or forw = 0), writing a fixed pattern with a - * few unique bytes depending on loop id. Then reads back, checking - * for data consistency. - */ - -/* ARGSUSED */ -static void -_sd_test_rwloop_seq(int i, int loops, int seed, int forw) -{ - int cd; - int j, len; - nsc_off_t offset; - nsc_size_t fsize; - int sts; - _sd_buf_handle_t *fbuf, *buf; - - if (strlen(devarray[i]) == 0) { - cmn_err(CE_WARN, "!child %d devarray null", i); - return; - } - if ((cd = _sd_open(devarray[i], dev_flag[i])) < 0) { - cmn_err(CE_WARN, "!Open error %s child %d", devarray[i], i); - return; - } - SET_CD_STATE(cd, i); - _td_attach_cd(cd); - - (void) _sd_get_partsize((blind_t)(unsigned long)cd, &fsize); - len = 120; - - /* - * Write a base pattern into the first buffer - */ - fbuf = NULL; - offset = 0; - sts = _sd_alloc_buf((blind_t)(unsigned long)cd, 0, len, NSC_WRBUF, - &fbuf); - if (sts > 0) { - cmn_err(CE_WARN, "!Buffer alloc failed %d", sts); - return; - } - (void) _sd_copy_pattern_to_handle(fbuf, 0, len); - _td_detach_cd(cd); - - offset = 0; - for (j = 0; j < loops; j++) { - if (test_stop == 1) goto done; - - offset += len; - if (offset + len > fsize) - break; - - buf = NULL; - _td_attach_cd(cd); - sts = _sd_alloc_buf((blind_t)(unsigned long)cd, offset, len, - NSC_WRBUF, &buf); - if (sts > 0) { - cmn_err(CE_WARN, "!ch%d getbuf error(WRBUF)%d", i, sts); - goto done; - } - (void) _sd_copy_handle(fbuf, buf, 0, offset, len, j); - - sts = len; - while (sts > 0) { - if (forw && _sd_hwrite(buf, offset + len - sts, - 12, 0) > 0) { - cmn_err(CE_WARN, "!ch %d fwwr err", i); - test_stop = 1; - } - sts -= 12; - if (!forw && _sd_hwrite(buf, offset + sts, 12, 0) > 0) { - cmn_err(CE_WARN, "!ch %d rvwr err", i); - test_stop = 1; - } - } - if (sts = _sd_free_buf(buf)) { - cmn_err(CE_WARN, "!ch %d freebuf error %d", i, sts); - goto done; - } - _td_detach_cd(cd); - } - offset = 0; - for (j = 0; j < loops; j++) { - if (test_stop == 1) goto done; - - offset += len; - if (offset + len > fsize) - break; - - buf = NULL; - _td_attach_cd(cd); - sts = _sd_alloc_buf((blind_t)(unsigned long)cd, offset, len, - NSC_RDBUF, &buf); - if (sts > 0) { - cmn_err(CE_WARN, "!ch%d getbuf error(WRBUF)%d", i, sts); - goto done; - } - (void) _sd_compare_handle(fbuf, buf, 0, offset, len, j); - - if (sts = _sd_free_buf(buf)) { - cmn_err(CE_WARN, "!ch %d freebuf error %d", i, sts); - goto done; - } - _td_detach_cd(cd); - } -done: - if (sts = _sd_free_buf(fbuf)) - cmn_err(CE_WARN, "!child %d freebuf error %d", i, sts); - cmn_err(CE_NOTE, "!TEST OVER : rwloop_seq_%s() child %d", - forw ? "forw" : "rev", i); -} - -static int -_sd_copy_pattern_to_handle(_sd_buf_handle_t *handle, nsc_off_t fba_pos, - nsc_size_t fba_len) -{ - sdbc_cblk_fba_t st_cblk_len; /* FBA len of starting cache block */ - sdbc_cblk_fba_t end_cblk_len; /* FBA len of ending cache block */ - sdbc_cblk_fba_t st_cblk_off; /* FBA offset into starting cblock */ - nsc_size_t cur_fba_len; - int i; - _sd_cctl_t *cc_ent; - - cc_ent = handle->bh_centry; - while (CENTRY_BLK(cc_ent) != FBA_TO_BLK_NUM(fba_pos)) - cc_ent = cc_ent->cc_chain; - - cur_fba_len = fba_len; - st_cblk_off = BLK_FBA_OFF(fba_pos); - st_cblk_len = (BLK_FBAS - st_cblk_off); - if ((nsc_size_t)st_cblk_len >= fba_len) { - end_cblk_len = 0; - st_cblk_len = (sdbc_cblk_fba_t)fba_len; - } else - end_cblk_len = BLK_FBA_OFF(fba_pos + fba_len); - - for (i = 0; i < (int)FBA_SIZE(st_cblk_len); i += 4) - *((uint_t *)(void *)(cc_ent->cc_data + FBA_SIZE(st_cblk_off) + - i)) = nsc_usec(); - cur_fba_len -= st_cblk_len; - cc_ent = cc_ent->cc_chain; - - while (cur_fba_len > (nsc_size_t)end_cblk_len) { - for (i = 0; i < CACHE_BLOCK_SIZE; i += 4) { - unsigned int usec = nsc_usec(); - bcopy(&usec, cc_ent->cc_data + i, 4); - } - cc_ent = cc_ent->cc_chain; - cur_fba_len -= BLK_FBAS; - } - if (cur_fba_len) { - for (i = 0; i < (int)FBA_SIZE(end_cblk_len); i += 4) { - unsigned int usec = nsc_usec(); - bcopy(&usec, cc_ent->cc_data + i, 4); - } - } - return (0); -} - -static int -_sd_copy_handle(_sd_buf_handle_t *handle1, - _sd_buf_handle_t *handle2, - nsc_off_t fba_pos1, - nsc_off_t fba_pos2, - nsc_size_t fba_len, - int skew) -{ - sdbc_cblk_fba_t st_cblk_len; /* FBA len of starting cache block */ - sdbc_cblk_fba_t end_cblk_len; /* FBA len of ending cache block */ - sdbc_cblk_fba_t st_cblk_off; /* FBA offset into starting cblock */ - nsc_size_t cur_fba_len; - _sd_cctl_t *cc_ent, *cc_ent1; - unsigned char *skew_word; - int skew_count = 0; - - ASSERT_HANDLE_LIMITS(handle1, fba_pos1, fba_len); - ASSERT_HANDLE_LIMITS(handle2, fba_pos2, fba_len); - - cc_ent = handle1->bh_centry; - while (CENTRY_BLK(cc_ent) != FBA_TO_BLK_NUM(fba_pos1)) - cc_ent = cc_ent->cc_chain; - - cc_ent1 = handle2->bh_centry; - while (CENTRY_BLK(cc_ent1) != FBA_TO_BLK_NUM(fba_pos2)) - cc_ent1 = cc_ent1->cc_chain; - - - if (BLK_FBA_OFF(fba_pos1) != BLK_FBA_OFF(fba_pos2)) { - cmn_err(CE_WARN, "!Cannot copy unaligned handles"); - return (0); - } - - cur_fba_len = fba_len; - st_cblk_off = BLK_FBA_OFF(fba_pos1); - st_cblk_len = (BLK_FBAS - st_cblk_off); - if ((nsc_size_t)st_cblk_len >= fba_len) { - end_cblk_len = 0; - st_cblk_len = (sdbc_cblk_fba_t)fba_len; - } else - end_cblk_len = BLK_FBA_OFF(fba_pos1 + fba_len); - - skew_word = cc_ent->cc_data + FBA_SIZE(st_cblk_off); - *skew_word = skew | (++skew_count << 24); - bcopy(cc_ent->cc_data + FBA_SIZE(st_cblk_off), cc_ent1->cc_data + - FBA_SIZE(st_cblk_off), FBA_SIZE(st_cblk_len)); - cur_fba_len -= st_cblk_len; - cc_ent = cc_ent->cc_chain; - cc_ent1 = cc_ent1->cc_chain; - - while (cur_fba_len > (nsc_size_t)end_cblk_len) { - skew_word = cc_ent->cc_data; - *skew_word = skew | (++skew_count << 24); - bcopy(cc_ent->cc_data, cc_ent1->cc_data, CACHE_BLOCK_SIZE); - cc_ent = cc_ent->cc_chain; - cc_ent1 = cc_ent1->cc_chain; - cur_fba_len -= BLK_FBAS; - } - if (cur_fba_len) { - skew_word = cc_ent->cc_data; - *skew_word = skew | (++skew_count << 24); - bcopy(cc_ent->cc_data, cc_ent1->cc_data, - FBA_SIZE(end_cblk_len)); - } - return (0); -} - -static int -_sd_compare_handle(_sd_buf_handle_t *handle1, _sd_buf_handle_t *handle2, - nsc_off_t fba_pos1, nsc_off_t fba_pos2, nsc_size_t fba_len, int skew) -{ - sdbc_cblk_fba_t st_cblk_len; /* FBA len of starting cache block */ - sdbc_cblk_fba_t end_cblk_len; /* FBA len of ending cache block */ - sdbc_cblk_fba_t st_cblk_off; /* FBA offset into starting cblock */ - nsc_size_t cur_fba_len; - _sd_cctl_t *cc_ent, *cc_ent1; - unsigned char *skew_word; - int skew_count = 0; - - ASSERT_HANDLE_LIMITS(handle1, fba_pos1, fba_len); - ASSERT_HANDLE_LIMITS(handle2, fba_pos2, fba_len); - - cc_ent = handle1->bh_centry; - while (CENTRY_BLK(cc_ent) != FBA_TO_BLK_NUM(fba_pos1)) - cc_ent = cc_ent->cc_chain; - - cc_ent1 = handle2->bh_centry; - while (CENTRY_BLK(cc_ent1) != FBA_TO_BLK_NUM(fba_pos2)) - cc_ent1 = cc_ent1->cc_chain; - - if (BLK_FBA_OFF(fba_pos1) != BLK_FBA_OFF(fba_pos2)) { - cmn_err(CE_WARN, "!Cannot compare unaligned handles"); - return (0); - } - - cur_fba_len = fba_len; - st_cblk_off = BLK_FBA_OFF(fba_pos1); - st_cblk_len = (BLK_FBAS - st_cblk_off); - if ((nsc_size_t)st_cblk_len >= fba_len) { - end_cblk_len = 0; - st_cblk_len = (sdbc_cblk_fba_t)fba_len; - } else - end_cblk_len = BLK_FBA_OFF(fba_pos1 + fba_len); - - skew_word = cc_ent->cc_data + FBA_SIZE(st_cblk_off); - *skew_word = skew | (++skew_count << 24); - if (bcmp(cc_ent->cc_data + FBA_SIZE(st_cblk_off), - cc_ent1->cc_data + FBA_SIZE(st_cblk_off), - FBA_SIZE(st_cblk_len)) != 0) - cmn_err(CE_WARN, "!Data mismatch fba_pos:%" NSC_SZFMT, - fba_pos2); - - cur_fba_len -= st_cblk_len; - cc_ent = cc_ent->cc_chain; - cc_ent1 = cc_ent1->cc_chain; - - while (cur_fba_len > (nsc_size_t)end_cblk_len) { - skew_word = cc_ent->cc_data; - *skew_word = skew | (++skew_count << 24); - if (bcmp(cc_ent->cc_data, cc_ent1->cc_data, - CACHE_BLOCK_SIZE) != 0) - cmn_err(CE_WARN, "!Data mismatch fba_pos:%" NSC_SZFMT, - fba_pos2); - - cc_ent = cc_ent->cc_chain; - cc_ent1 = cc_ent1->cc_chain; - cur_fba_len -= BLK_FBAS; - } - if (cur_fba_len) { - skew_word = cc_ent->cc_data; - *skew_word = skew | (++skew_count << 24); - if (bcmp(cc_ent->cc_data, cc_ent1->cc_data, - FBA_SIZE(end_cblk_len)) != 0) - cmn_err(CE_WARN, "!Data mismatch fba_pos:%" NSC_SZFMT, - fba_pos2); - } - return (0); -} - -/* - * Macro definition for waiting for an IO buffer to be allocated or a read - * to complete. Macro defined so code doesn't have to be typed each time - */ -#define WAIT_IO(st, cd, buf, l) \ -if ((st != NSC_DONE) && (st != NSC_HIT)) { \ - if (st != NSC_PENDING) \ - cmn_err(CE_WARN, "!alloc sts: %d", st); \ - else { \ - buf = wait_io(cd, &st); \ - if (st) { \ - cmn_err(CE_WARN, "!ch %d getbuf errpr %d\n", l, st); \ - if (buf) \ - (void) _sd_free_buf(buf); \ - return; \ - } \ - } \ -} - - -#undef _sd_write - -static int tiodone, iosent, tioerr; - -/* ARGSUSED */ - -static void -myend(blind_t arg, nsc_off_t fba_pos, nsc_size_t fba_len, int error) -{ - if (error) - tioerr++; - else tiodone++; -} - -static int ckd_sskip = 3; - -/* ARGSUSED3 */ -static void -_sd_direct_test(int c, int loop, int seed, int type) -{ - nsc_size_t filesize; - int loops; - - int cd; - int ckd_hd, recs, rec_size, ckd_doz; - int done_size; - clock_t st_time; - int i; - - int ckd_hd_sz, rec_bsz; - int print_stuff; - int throttle; - struct buf *bp; - nsc_off_t curpos; - - caddr_t caddr; - iosent = 0; - - print_stuff = 0; - seed = gld[c].seed; - rec_size = (seed & 0xff); - recs = (seed & 0xf00)>>8; - ckd_hd = (seed & 0xf000)>>12; - ckd_doz = (seed & 0xf0000)>>16; - throttle = (seed & 0xff00000)>>20; - ckd_hd_sz = ckd_hd * 512; - rec_bsz = rec_size * 512; - - done_size = 0; - tiodone = 0; - curpos = 0; - tioerr = 0; - - if (strlen(devarray[c]) == 0) { - cmn_err(CE_WARN, "!child %d devarray null\n", c); - return; - } - if ((cd = _sd_open(devarray[c], dev_flag[c])) < 0) { - cmn_err(CE_WARN, "!Open error %s child %d\n", devarray[c], c); - return; - } - - caddr = (caddr_t)nsc_kmem_alloc(20 * 8192, KM_SLEEP, sdbc_local_mem); - - (void) _sd_get_partsize((blind_t)(unsigned long)cd, &filesize); - filesize = FBA_SIZE(filesize); - loops = ((nsc_size_t)loop > (filesize / (60 * 1024))) ? - (filesize / (60 * 1024)) : loop; - - st_time = nsc_usec(); - cmn_err(CE_CONT, "!Test 100: %s file %d cd %d loops %x seed\n", - devarray[c], cd, loop, seed); - cmn_err(CE_CONT, - "!Test 100: %d recsize %d recs %d throttle %d hd %d doz\n", - rec_size, recs, throttle, ckd_hd, ckd_doz); - - for (i = 0; i < loops; i++) { - curpos = i * 120; - if (ckd_doz) { - bp = sd_alloc_iob(_sd_cache_files[cd].cd_crdev, - curpos, 20, B_WRITE); - sd_add_mem(bp, caddr, ckd_hd_sz); - (void) sd_start_io(bp, - _sd_cache_files[cd].cd_strategy, myend, NULL); - iosent++; - curpos += ckd_sskip; - } - if (ckd_doz == 2) { - bp = sd_alloc_iob(_sd_cache_files[cd].cd_crdev, - curpos, 20, B_WRITE); - sd_add_mem(bp, caddr, 4096-ckd_sskip*512); - (void) sd_start_io(bp, - _sd_cache_files[cd].cd_strategy, myend, NULL); - iosent++; - curpos += 4096-ckd_sskip*512; - } - bp = sd_alloc_iob(_sd_cache_files[cd].cd_crdev, - curpos, 20, B_WRITE); - sd_add_mem(bp, caddr, recs * rec_bsz); - (void) sd_start_io(bp, - _sd_cache_files[cd].cd_strategy, myend, NULL); - iosent++; - - done_size += recs * rec_bsz; - - if (tiodone && ((tiodone / 300) > print_stuff)) { - cmn_err(CE_CONT, "!Done %d ios %d size in %lu time\n", - tiodone, - ckd_doz ? ((ckd_doz == 2) ? - (tiodone * (recs * rec_bsz + 4096)) / 3: - (tiodone * (recs * rec_bsz + ckd_hd_sz)) / 2) : - (tiodone * (recs * rec_bsz)), - (nsc_usec() - st_time) / 1000); - print_stuff++; - } - while ((iosent - (tiodone + tioerr)) > throttle) - ; - } - while ((tiodone + tioerr) < iosent) { - if (tiodone && ((tiodone / 300) > print_stuff)) { - cmn_err(CE_CONT, "!Done %d ios %d size in %lu time\n", - tiodone, - ckd_doz ? ((ckd_doz == 2) ? - (tiodone * (recs * rec_bsz + 4096)) / 3: - (tiodone * (recs * rec_bsz + ckd_hd_sz)) / 2) : - (tiodone * (recs * rec_bsz)), - (nsc_usec() - st_time) / 1000); - print_stuff++; - } - } - cmn_err(CE_CONT, "!Done %d ios %d size in %lu time\n", - tiodone, - ckd_doz ? ((ckd_doz == 2) ? - (tiodone * (recs * rec_bsz + 4096)) / 3: - (tiodone * (recs * rec_bsz + ckd_hd_sz)) / 2) : - (tiodone * (recs * rec_bsz)), - (nsc_usec() - st_time) / 1000); - - print_stuff++; - nsc_kmem_free(caddr, 20 * 8192); -} - -static void -set_parameters(void) -{ - test_stop = 0; -} - -static nsc_mem_t *dma_test = NULL; -static int *dma_mem = NULL; - -static int -init_dmatest(void) -{ - dma_test = nsc_register_mem("dmatest:mem", NSC_MEM_GLOBAL, 0); - dma_mem = (int *)nsc_kmem_zalloc(4096, 0, dma_test); - if (!dma_mem) { - cmn_err(CE_NOTE, "!could not get rm mem\n"); - return (1); - } - cmn_err(CE_NOTE, "!rm = 0x%p\n", (void *)dma_mem); - return (0); -} - -/*ARGSUSED*/ -static void -release_dmatest(void) -{ - nsc_kmem_free(dma_mem, 1); - nsc_unregister_mem(dma_test); - dma_test = NULL; - dma_mem = NULL; -} -/*ARGSUSED*/ -static void -test_dma_loop(int net, int seg) -{ - delay(3*HZ); - - if (!dma_mem && init_dmatest()) { - cmn_err(CE_WARN, "!test_dma_loop: init failed"); - return; - } - - /* - * The body of test loop is removed since we don't use any more - */ - - release_dmatest(); -} diff --git a/usr/src/uts/common/avs/ns/sdbc/sd_tdaemon.h b/usr/src/uts/common/avs/ns/sdbc/sd_tdaemon.h deleted file mode 100644 index 60de75dc5f..0000000000 --- a/usr/src/uts/common/avs/ns/sdbc/sd_tdaemon.h +++ /dev/null @@ -1,49 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _SD_TDAEMON_H -#define _SD_TDAEMON_H - -#ifdef __cplusplus -extern "C" { -#endif - -#if defined(_SD_DEBUG) -extern int _test_async_fail; -#endif - -extern int _sdbc_tdaemon_load(void); -extern void _sdbc_tdaemon_unload(void); -extern int _sdbc_tdaemon_configure(int num); -extern void _sdbc_tdaemon_deconfigure(void); -extern int _sd_test_start(void *args, int *rvp); -extern int _sd_test_end(void); -extern int _sd_test_init(void *args); - -#ifdef __cplusplus -} -#endif - -#endif /* _SD_TDAEMON_H */ diff --git a/usr/src/uts/common/avs/ns/sdbc/sd_trace.c b/usr/src/uts/common/avs/ns/sdbc/sd_trace.c deleted file mode 100644 index 6a4502cfca..0000000000 --- a/usr/src/uts/common/avs/ns/sdbc/sd_trace.c +++ /dev/null @@ -1,620 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#include <sys/types.h> -#include <sys/ksynch.h> -#include <sys/cmn_err.h> -#include <sys/errno.h> -#include <sys/kmem.h> -#include <sys/ddi.h> -#include <sys/nsc_thread.h> - -#include "sd_bcache.h" -#include "sd_trace.h" -#include "sd_misc.h" - -#ifndef _SD_NOTRACE - -#ifndef SM_SDTRSEMA -#define SM_SDTRSEMA 1 -#define SM_SDTRLCK 1 -#endif - -int _sd_trace_mask = 0; - -/* - * _sdbd_trace_t _sd_trace_table[-1, 0 .. sdbc_max_devs - 1] - * allocate memory, shift pointer up by one. - */ -static _sdbc_trace_t *_sd_trace_table; - -static kcondvar_t _sd_adump_cv; -static int _sd_trace_configed; -static kmutex_t _sd_adump_lk; - -static int _alert_cd = SDT_ANY_CD; -static int _last_cd = SDT_ANY_CD; -#define XMEM(x, y) (void)(x = y, y = (SDT_ANY_CD), x) - -/* - * Forward declare all statics that are used before defined to enforce - * parameter checking. - * Some (if not all) of these could be removed if the code were reordered - */ - -static int _sd_set_adump(int cd, int flag, _sdtr_table_t *table); - -/* - * _sdbc_tr_unload - cache is being unloaded. Release any memory/lock/sv's - * created by _sdbc_tr_unload and null the stale pointers. - * - */ -void -_sdbc_tr_unload(void) -{ - if (_sd_trace_table) - nsc_kmem_free((_sd_trace_table - 1), - sizeof (_sdbc_trace_t) * (sdbc_max_devs + 1)); - cv_destroy(&_sd_adump_cv); - mutex_destroy(&_sd_adump_lk); - - _sd_trace_table = NULL; -} - -/* - * _sdbc_tr_load - cache is being loaded. Allocate the memory/lock/sv's - * which need to be present regardless of state of cache configuration. - * - */ -int -_sdbc_tr_load(void) -{ - _sdbc_trace_t *m; - - cv_init(&_sd_adump_cv, NULL, CV_DRIVER, NULL); - mutex_init(&_sd_adump_lk, NULL, MUTEX_DRIVER, NULL); - - /* - * this maybe ought to wait to see if traces are configured, but it - * is only 4k - */ - - m = (_sdbc_trace_t *)nsc_kmem_zalloc( - sizeof (_sdbc_trace_t) * (sdbc_max_devs + 1), - KM_NOSLEEP, sdbc_stats_mem); - - if (m == NULL) { - cmn_err(CE_WARN, - "sdbc(_sdbc_tr_load) cannot allocate trace table"); - return (-1); - } - _sd_trace_table = m + 1; - - return (0); - -} - -/* - * _sdbc_tr_configure - configure a trace area for the descriptor "cd". - * Unlike other ..._configure routines this routine is called multiple - * times since there will be an unknown number of open descriptors. At - * cache config time if tracing is enabled only the slot for SDT_INV_CD - * is created. - * - * Allocate the SD cache trace area (per device) - */ - -int -_sdbc_tr_configure(int cd) -{ - int size; - _sdtr_table_t *t; - kmutex_t *lk; - - if (!_sd_cache_config.trace_size) - return (0); - - if (cd == SDT_INV_CD) - _sd_trace_configed = 1; - - if (_sd_trace_table[cd].tbl) - return (0); - - size = sizeof (_sdtr_table_t) + - _sd_cache_config.trace_size * sizeof (_sdtr_t); - - if ((t = (_sdtr_table_t *)nsc_kmem_zalloc(size, - KM_NOSLEEP, sdbc_stats_mem)) == NULL) { - cmn_err(CE_WARN, "sdbc(_sdbc_tr_configure) failed to " - "allocate %d bytes for trace, cd=%d", size, cd); - return (-1); - } - - lk = nsc_kmem_zalloc(sizeof (kmutex_t), KM_NOSLEEP, sdbc_local_mem); - if (!lk) { - nsc_kmem_free(t, size); - cmn_err(CE_WARN, "sdbc(_sdbc_tr_configure) cannot " - "alloc trace lock for cd %d", cd); - return (-1); - } - mutex_init(lk, NULL, MUTEX_DRIVER, NULL); - - _sd_trace_table[cd].t_lock = lk; - t->tt_cd = cd; - t->tt_max = _sd_cache_config.trace_size; - t->tt_mask = _sd_cache_config.trace_mask; - t->tt_lbolt = (char)_sd_cache_config.trace_lbolt; - t->tt_good = (char)_sd_cache_config.trace_good; - _sd_trace_mask |= t->tt_mask; - _sd_trace_table[cd].tbl = t; - return (0); -} - - -/* - * _sdbc_tr_deconfigure - * free all trace memory (regions) when deconfiguring cache - */ -void -_sdbc_tr_deconfigure(void) -{ - int i, size; - _sdbc_trace_t *tt; - - if (!_sd_cache_config.trace_size || !_sd_trace_configed) - return; - - mutex_enter(&_sd_adump_lk); - _sd_trace_configed = 0; - cv_broadcast(&_sd_adump_cv); - mutex_exit(&_sd_adump_lk); - - for (i = -1, tt = &_sd_trace_table[-1]; i < sdbc_max_devs; i++, tt++) { - if (tt->tbl == NULL) continue; - size = tt->tbl->tt_max * sizeof (_sdtr_t) + - sizeof (_sdtr_table_t); - if (tt->t_lock) { - mutex_destroy(tt->t_lock); - nsc_kmem_free(tt->t_lock, sizeof (kmutex_t)); - } - nsc_kmem_free(tt->tbl, size); - tt->t_lock = NULL; - tt->tbl = NULL; - } - _alert_cd = SDT_ANY_CD; - _last_cd = SDT_ANY_CD; -} - -static int first_alert = 0; -/* - * SDALERT(f,cd,len,fba,flg,ret) \ - * _sd_alert(f,cd,len,fba,flg,ret) - * Build a ALERT trace entry and place it into the trace table. - */ -void -_sd_alert(int f, int cd, int len, nsc_off_t fba, int flg, int ret) -{ - int tin; - _sdtr_t *tp; - _sdtr_table_t *t; - kmutex_t *lk; - - if (!first_alert) { - first_alert++; - cmn_err(CE_WARN, - "sdbc(_sd_alert) cd=%x f=%x len=%x fba=%" NSC_SZFMT - " flg=%x ret=%x", cd, f, len, fba, flg, ret); - - } - - /* Watch out for negative error codes or simply bogus cd's */ - - if (cd < -1 || cd >= sdbc_max_devs) { - /* - * no device trace buffer -- use SDT_INV_CD table? - */ - if ((t = _sd_trace_table[-1].tbl) == NULL) - return; - lk = _sd_trace_table[-1].t_lock; - } else { - lk = _sd_trace_table[cd].t_lock; - if ((t = _sd_trace_table[cd].tbl) == NULL) { - /* - * no device trace buffer -- use SDT_INV_CD table? - */ - if ((t = _sd_trace_table[-1].tbl) == NULL) - return; - lk = _sd_trace_table[-1].t_lock; - } - } - - if (!(t->tt_mask & ST_ALERT)) - return; /* check per-device mask */ - - if (t->tt_good) mutex_enter(lk); - t->tt_alert++; /* alert on this device */ - t->tt_cnt++; /* overwritten entries if (tt_cnt >= tt_max) */ - - tin = t->tt_in++; - if (tin >= t->tt_max) tin = t->tt_in = 0; - tp = &t->tt_buf[tin]; - tp->t_time = 0; /* not filled in yet */ - if (t->tt_good) mutex_exit(lk); - - tp->t_func = (ushort_t)f | ST_ALERT; - tp->t_len = (ushort_t)len; - tp->t_fba = fba; - tp->t_flg = flg; - tp->t_ret = ret; - /* - * On LP64 systems we will only capture the low 32 bits of the - * time this really should be good enough for our purposes. - * - */ - if (t->tt_lbolt) - tp->t_time = (int)nsc_lbolt(); - else - tp->t_time = (int)nsc_usec(); - - /* wakeup trace daemon, with hint */ - _alert_cd = cd; - - if (_sd_trace_configed) - cv_signal(&_sd_adump_cv); -} - - -/* - * SDTRACE(f,cd,len,fba,flg,ret) \ - * if (_sd_trace_mask & (f)) _sd_trace(f,cd,len,fba,flg,ret) - * Build a trace entry and place it into the trace table. - */ -void -_sd_trace(int f, int cd, int len, nsc_off_t fba, int flg, int ret) -{ - int tin; - _sdtr_t *tp; - _sdtr_table_t *t; - kmutex_t *lk; - - /* Watch out for negative error codes or simply bogus cd's */ - - if (cd < -1 || cd >= sdbc_max_devs) { - /* - * no device trace buffer -- use SDT_INV_CD table? - */ - if ((t = _sd_trace_table[-1].tbl) == NULL) - return; - lk = _sd_trace_table[-1].t_lock; - } else { - lk = _sd_trace_table[cd].t_lock; - if ((t = _sd_trace_table[cd].tbl) == NULL) - return; - } - - if (!(t->tt_mask & f)) - return; /* check per-device mask */ - - /* - * Don't overwrite if alert signaled (count lost instead) - * Locking only if 'trace_good' parameter set. - */ - if (t->tt_good) mutex_enter(lk); - if (t->tt_alert && (t->tt_cnt >= t->tt_max)) { - t->tt_lost++; /* lost during alert */ - if (t->tt_good) mutex_exit(lk); - return; - } - t->tt_cnt++; /* overwritten entries if (tt_cnt >= tt_max) */ - - tin = t->tt_in++; - if (tin >= t->tt_max) tin = t->tt_in = 0; - tp = &t->tt_buf[tin]; - tp->t_time = 0; /* not filled in yet */ - if (t->tt_good) mutex_exit(lk); - - tp->t_func = (ushort_t)f; - tp->t_len = (ushort_t)len; - tp->t_fba = fba; - tp->t_flg = flg; - tp->t_ret = ret; - /* - * On LP64 systems we will only capture the low 32 bits of the - * time this really should be good enough for our purposes. - * - */ - if (t->tt_lbolt) - tp->t_time = (int)nsc_lbolt(); - else - tp->t_time = (int)nsc_usec(); -} - -/* - * _sd_scan_alert -- search for device with trace alert - */ -static int -_sd_scan_alert(void) -{ - int cd; - - XMEM(cd, _alert_cd); - if ((cd != SDT_ANY_CD) && _sd_trace_table[cd].tbl->tt_alert) - return (cd); - for (cd = _last_cd + 1; cd < sdbc_max_devs; cd++) - if (_sd_trace_table[cd].tbl && - _sd_trace_table[cd].tbl->tt_alert) - return (_last_cd = cd); - for (cd = SDT_INV_CD; cd <= _last_cd; cd++) - if (_sd_trace_table[cd].tbl && - _sd_trace_table[cd].tbl->tt_alert) - return (_last_cd = cd); - return (SDT_ANY_CD); -} - -/* - * _sd_scan_entries -- search for next device with trace entries - */ -static int -_sd_scan_entries(void) -{ - int cd; - - for (cd = _last_cd + 1; cd < sdbc_max_devs; cd++) - if (_sd_trace_table[cd].tbl && _sd_trace_table[cd].tbl->tt_cnt) - return (_last_cd = cd); - for (cd = SDT_INV_CD; cd <= _last_cd; cd++) - if (_sd_trace_table[cd].tbl && _sd_trace_table[cd].tbl->tt_cnt) - return (_last_cd = cd); - return (SDT_ANY_CD); -} - - -/* - * _sd_adump - * copy information about new trace records to trace daemon, - * or modify trace parameters. - * - * Some tracing parameters can be modified - * [Either per-device if cd specified, or the defaults if cd = SDT_ANY_CD] - * SD_LOGSIZE: table.tt_max (size for future opens) - * SD_SET_LBOLT: table.tt_lbolt - * SD_SET_MASK: table.tt_mask - * SD_SET_GOOD: table.tt_good - * - * if (cd >= 0) dump specific device records; - * if (cd == SDT_INV_CD) dump records which don't apply to any one device. - * if (cd == SDT_ANY_CD), then choose a device: - * 1) most recent alert, block if (flag & SD_ALERT_WAIT) - * 2) "next" device with unprocessed records. - */ -int -_sd_adump(void *args, int *rvp) -{ - struct a { - long cd; - _sdtr_table_t *table; - _sdtr_t *buf; - long size; - long flag; - } *uap = (struct a *)args; - _sdtr_t *ubuf; - _sdtr_table_t tt, *t; - kmutex_t *lk; - int cd, count, lost, new_cnt; - - if (uap->flag & (SD_SET_SIZE|SD_SET_MASK|SD_SET_LBOLT|SD_SET_GOOD)) { - return (_sd_set_adump(uap->cd, uap->flag, uap->table)); - } - if (! _sd_trace_configed) { - return (EINVAL); /* not initialized yet */ - } - if (uap->cd >= SDT_INV_CD) { - /* specific device: check if configured. dump current state. */ - if ((uap->cd > (long)sdbc_max_devs) || - !(t = _sd_trace_table[uap->cd].tbl)) { - return (ENOSPC); /* no space configured */ - } - lk = _sd_trace_table[uap->cd].t_lock; - cd = uap->cd; - } else { - /* - * SDT_ANY_CD: - * SD_ALERT_WAIT - wait for alert - */ - scan: - if ((cd = _sd_scan_alert()) != SDT_ANY_CD) - goto dump; - if ((uap->flag & SD_ALERT_WAIT)) { - mutex_enter(&_sd_adump_lk); - if (!_sd_trace_configed) { - mutex_exit(&_sd_adump_lk); - return (EINVAL); - } - - if (!cv_wait_sig(&_sd_adump_cv, &_sd_adump_lk)) { - mutex_exit(&_sd_adump_lk); - return (EINTR); - } - mutex_exit(&_sd_adump_lk); - - if (!_sd_trace_configed || !_sd_cache_initialized) { - return (EIDRM); - } - goto scan; - } - /* any device with entries */ - if ((cd = _sd_scan_entries()) == SDT_INV_CD) - return (0); /* no new entries */ - - dump: - lk = _sd_trace_table[cd].t_lock; - if ((t = _sd_trace_table[cd].tbl) == NULL) { - if (uap->flag & SD_ALERT_WAIT) { - t = _sd_trace_table[-1].tbl; - lk = _sd_trace_table[-1].t_lock; - } else { - return (ENOSPC); /* no space configured */ - } - } - } - - /* - * take a snapshot of the table state - */ - if (t->tt_good) - mutex_enter(lk); - tt = *t; - if (t->tt_good) - mutex_exit(lk); - - /* - * copy trace log entries to daemon - * - * size: entries in user-level 'buf' - * count: how many entries to copy [force count <= size] - * tt_max: size of kernel buffer - * tt_cnt: written entries [lossage if tt_cnt > tt_max] - * cnt: for wrap-around calculations - */ - if ((count = tt.tt_cnt) > tt.tt_max) { /* lost from beginning */ - tt.tt_out = tt.tt_in; - count = tt.tt_max; - lost = tt.tt_cnt - tt.tt_max; - } else - lost = 0; - if (count <= 0) - return (0); - if ((long)count > uap->size) - count = uap->size; - ubuf = uap->buf; - if ((tt.tt_out + count) > tt.tt_max) { - int cnt = tt.tt_max - tt.tt_out; - if (cnt > count) - cnt = count; - if (copyout(&(t->tt_buf[tt.tt_out]), ubuf, - cnt * sizeof (_sdtr_t))) { - return (EFAULT); - } - ubuf += cnt; - cnt = count - cnt; - if (copyout(&(t->tt_buf[0]), ubuf, cnt * sizeof (_sdtr_t))) { - return (EFAULT); - } - tt.tt_out = cnt; - } else { - if (copyout(&(t->tt_buf[tt.tt_out]), ubuf, - count * sizeof (_sdtr_t))) { - return (EFAULT); - } - tt.tt_out += count; - if (tt.tt_out == tt.tt_max) - tt.tt_out = 0; - } - - /* - * tt_alert uses fuzzy counting. - * if multiple alerts signaled, leave it at 1. - */ - if (t->tt_alert) - t->tt_alert = (t->tt_alert > 1) ? 1 : 0; - - /* - * tt_cntout is tt_cnt after dump - * update tt_cnt for copied entries - */ - if (t->tt_good) - mutex_enter(lk); - tt.tt_cntout = t->tt_cnt; - t->tt_out = tt.tt_out; - new_cnt = t->tt_cnt; - if ((new_cnt -= count+lost) < 0) - new_cnt = 0; - t->tt_cnt = new_cnt; /* race with new traces if not "tt_good" */ - if (t->tt_good) - mutex_exit(lk); - - if (copyout(&tt, uap->table, sizeof (tt) - sizeof (_sdtr_t))) { - return (EFAULT); - } - *rvp = count; - - first_alert = 0; - return (0); -} - - -/* set size, mask, lbolt, or good(locks) */ -static int -_sd_set_adump(int cd, int flag, _sdtr_table_t *table) -{ - _sdtr_table_t tt, *t; - - if (copyin(table, &tt, sizeof (tt) - sizeof (_sdtr_t))) { - return (EFAULT); - } - if (cd == SDT_ANY_CD) { /* modify config parameter */ - if (flag & SD_SET_SIZE) - _sd_cache_config.trace_size = tt.tt_max; - if (flag & SD_SET_MASK) { - _sd_cache_config.trace_mask = tt.tt_mask; - /* explicitly set global mask, not bitwise or */ - _sd_trace_mask = tt.tt_mask; - } - if (flag & SD_SET_LBOLT) - _sd_cache_config.trace_lbolt = tt.tt_lbolt; - if (flag & SD_SET_GOOD) - _sd_cache_config.trace_good = tt.tt_good; - return (0); - } - if (flag & SD_SET_SIZE) - _sd_cache_config.trace_size = tt.tt_max; - /* modify particular device parameters */ - if (!_sd_trace_table[cd].tbl) - (void) _sdbc_tr_configure(cd); - if ((t = _sd_trace_table[cd].tbl) == NULL) - return (0); - if (flag & SD_SET_MASK) { - t->tt_mask = tt.tt_mask; - _sd_trace_mask |= tt.tt_mask; /* or-ed with global mask */ - } - if (flag & SD_SET_LBOLT) - t->tt_lbolt = tt.tt_lbolt; - if (flag & SD_SET_GOOD) - t->tt_good = tt.tt_good; - if (copyout(t, table, sizeof (*t) - sizeof (_sdtr_t))) { - return (EFAULT); - } - return (0); -} - -#else /* ! _SD_NOTRACE */ - -int _sd_adump() { return (ENOSYS); } -int _sdbc_tr_load(void) { return (0); } -int _sdbc_tr_configure(void) { return (0); } -void _sdbc_tr_deconfigure(void) { return; } -void _sdbc_tr_unload(void) { return; } - -#endif /* ! _SD_NOTRACE */ diff --git a/usr/src/uts/common/avs/ns/sdbc/sd_trace.h b/usr/src/uts/common/avs/ns/sdbc/sd_trace.h deleted file mode 100644 index 8dd86528c2..0000000000 --- a/usr/src/uts/common/avs/ns/sdbc/sd_trace.h +++ /dev/null @@ -1,320 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - - -#ifndef _SD_TRACE_H -#define _SD_TRACE_H - -#ifdef __cplusplus -extern "C" { -#endif - - -#ifdef _SD_NOTRACE -#define SDALERT(f, cd, len, fba, flg, ret) -#define SDTRACE(f, cd, len, fba, flg, ret) -#define DATA_LOG_CHAIN(ttype, c_ent, stp, ln) -#define DATA_LOG(ttype, c_ent, stp, ln) -#else -#define SDALERT(f, cd, len, fba, flg, ret) \ - _sd_alert(f, (int)cd, (int)len, (nsc_off_t)fba, (int)flg, (int)ret) -#define SDTRACE(f, cd, len, fba, flg, ret) \ - if (_sd_trace_mask & (f)) \ - _sd_trace(f, (int)cd, (int)len, (nsc_off_t)fba,\ - (int)flg, (int)ret) -#define DATA_LOG_CHAIN(ttype, c_ent, stp, ln) \ - _sd_data_log_chain((int)(ttype), c_ent, (nsc_off_t)(stp), \ - (nsc_size_t)(ln)) -#if defined(_SD_FBA_DATA_LOG) || defined(lint) -#define DATA_LOG(ttype, c_ent, stp, ln) \ - _sd_data_log((int)(ttype), c_ent, (nsc_off_t)(stp), (nsc_size_t)(ln)) -#else -#define DATA_LOG(ttype, c_ent, stp, ln) \ - SDTRACE(ttype, CENTRY_CD(c_ent), \ - ln, (nsc_off_t)(BLK_TO_FBA_NUM(CENTRY_BLK(c_ent)) + stp), \ - *(int *)((c_ent)->cc_data+FBA_SIZE(stp)), \ - *(int *)((c_ent)->cc_data+FBA_SIZE(stp+ln)-4)) -#endif /* (_SD_FBA_DATA_LOG) */ -#endif - -#define SDT_INV_CD -1 -#define SDT_ANY_CD -2 -#define SDT_INV_BL 0xffffffff - -typedef struct _sdtr -{ - ushort_t t_func; /* function being traced */ - ushort_t t_len; /* allocation type */ - nsc_off_t t_fba; /* fixed block offset */ - int t_flg; /* buffer size requested */ - int t_ret; /* return value */ - int t_time; /* micro_second timer, or lbolt */ - /* low order only on LP64 systems */ -} _sdtr_t; - -typedef struct _sdtr_table -{ - int tt_cd; /* cache device */ - int tt_max; /* entries in table */ - int tt_in; /* entries added */ - int tt_out; /* entries read */ - int tt_cnt; /* unread entries */ - int tt_cntout; /* tt_cnt after dump */ - int tt_mask; /* copy of _sd_trace_mask */ - int tt_lost; /* lost after alert */ - char tt_alert; /* alert signaled */ - char tt_lbolt; /* use 'lbolt' instead of microsec */ - char tt_good; /* use locking (races with end-action) */ - char tt_type; /* memory region 0 or 1 (_SD_MEM_TRACE) */ - _sdtr_t tt_buf[1]; /* per-device trace records [0..tt_max] */ -} _sdtr_table_t; - -#if defined(_KERNEL) -typedef struct _sdbc_trace_s { - _sdtr_table_t *tbl; /* points to the trace table for a cd */ - kmutex_t *t_lock; /* the lock for this cd */ - } _sdbc_trace_t; -#endif /* _KERNEL */ - -/* sd_adump() flags */ -#define SD_SET_SIZE 0x01 /* create log if it doesn't exist */ -#define SD_SET_MASK 0x02 -#define SD_SET_LBOLT 0x04 -#define SD_SET_GOOD 0x08 -#define SD_ADUMP_WAIT 0x10 /* wakeup for buffer full or alert */ -#define SD_ALERT_WAIT 0x20 /* wakeup for alert messages */ - -/* Trace function, category, mask bits */ -#define ST_FUNC 0x000f /* functions per category */ -#define ST_CATMASK 0x0ff0 /* Category mask */ - -#define ST_BCACHE 0x0010 /* BCACHE entry points */ -#define ST_BSUB 0x0020 /* BCACHE subroutines */ -#define ST_IO 0x0040 /* IO subsystem */ -#define ST_CCIO 0x0080 /* concurrent (dual) copy */ -#define ST_FT 0x0100 /* Fault-tolerant subsystem */ -#define ST_DL 0x0200 /* Data-logging (debug) */ -#define ST_STATS 0x0400 /* cache statistics */ -#define ST_CKD 0x0800 /* SIMCKD traces */ - -#define ST_ENTER 0x1000 /* function entry */ -#define ST_EXIT 0x2000 /* function exit */ -#define ST_INFO 0x4000 /* see t_flg */ -#define ST_ALERT 0x8000 /* force write to daemon */ - -/* - * dump file pseudo-entries - */ -#define SDF_LOST 0x0000 /* trace is missing entries */ -#define SDF_CD 0x0001 /* new device (following entries) */ - -/* - * ST_BCACHE functions - */ -#define SDF_OPEN 0x00 | ST_BCACHE -#define SDF_CLOSE 0x01 | ST_BCACHE -#define SDF_HALLOC 0x02 | ST_BCACHE -#define SDF_HFREE 0x03 | ST_BCACHE -#define SDF_ALLOCBUF 0x04 | ST_BCACHE -#define SDF_FREEBUF 0x05 | ST_BCACHE -#define SDF_WRITE 0x06 | ST_BCACHE -#define SDF_READ 0x07 | ST_BCACHE -#define SDF_UNCOMMIT 0x08 | ST_BCACHE -#define SDF_ZERO 0x09 | ST_BCACHE -#define SDF_HINT 0x0a | ST_BCACHE -#define SDF_ATTACH 0x0b | ST_BCACHE | ST_FT -#define SDF_DETACH 0x0c | ST_BCACHE | ST_FT -#define SDF_NOTIFY 0x0d | ST_BCACHE - -/* - * ST_BSUB - bcache subroutines - */ -#define SDF_ENT_GET 0x00 | ST_BSUB -#define SDF_ENT_ALLOC 0x01 | ST_BSUB -#define SDF_READ_EA 0x02 | ST_BSUB -#define SDF_ENT_FREE 0x03 | ST_BSUB -#define SDF_WR_ALLOC 0x04 | ST_BSUB -#define SDF_WR_FREE 0x05 | ST_BSUB -#define SDF_WR_ALLOCONE 0x06 | ST_BSUB - - -/* - * SD_IO - I/O subsustem - */ -#define SDF_FLCLIST 0x00 | ST_IO -#define SDF_FLCENT 0x01 | ST_IO -#define SDF_FLCLIST_EA 0x02 | ST_IO -#define SDF_FLCENT_EA 0x03 | ST_IO -#define SDF_FLDONE 0x04 | ST_IO -#define SDF_IOB_ALLOC 0x05 | ST_IO - -/* - * ST_FT - Fault-tolerant subsystem - */ -#define SDF_AWAITR 0x00 | ST_FT -#define SDF_RECOVER 0x01 | ST_FT -#define SDF_FT_CLONE 0x02 | ST_FT -#define SDF_REFLECT 0x03 | ST_FT -#define SDF_ONLINE 0x04 | ST_FT - -/* - * ST_STATS - Statistics points - */ -#define SDF_REPLACE 0x00 | ST_STATS -#define SDF_DISCONNECT 0x01 | ST_STATS - -/* - * ST_INFO - */ -#define SDF_COVERAGE 0x00 | ST_INFO - -/* - * ST_DL - */ - -#define SDF_ALLOC 0x00 | ST_DL -#define SDF_RD 0x01 | ST_DL -#define SDF_WR 0x02 | ST_DL -#define SDF_WRSYNC 0x03 | ST_DL -#define SDF_FLSHLIST 0x04 | ST_DL -#define SDF_FLSHENT 0x05 | ST_DL -#define SDF_RDIO 0x06 | ST_DL -#define SDF_FLEA 0x07 | ST_DL -#define SDF_FLSTEA 0x08 | ST_DL -#define SDF_WRSYEA 0x09 | ST_DL - -/* - * More entry points - */ - -#ifdef _SD_FNAME -/* - * function category names - * change these when changing functions above - * compress name to fit in 8 printable characters - */ -char *_bcache_fname[16] = -{ - "open", - "close", - "al_hndl", - "fr_hndl", - "al_buf", - "fr_buf", - "write", - "read", - "ucommit", - "zero", - "hint", - "attach", - "detach", - "notify", -}; - -char *_bsub_fname[16] = -{ - "get_cent", - "al_cent", - "read_ea", - "fr_cent", - "al_went", - "fr_went", - "al_wone", -}; - -char *_io_fname[16] = -{ - "flclist", - "flcent", - "eaclist", - "eacent", - "fldone", - "get_iob", -}; - -char *_ccio_fname[16] = -{ - "ccio", - "dc_albuf", - "dc_frbuf", - "dc_write", - "dc_read", - "dc_zero", -}; - -char *_ft_fname[16] = -{ - "wait_rec", - "cache_rc", - "ft_clone", - "reflect", - "online", -}; - -char *_stats_fname[16] = -{ - "LRU-repl", - "Disconn", -}; - -char *_info_fname[16] = -{ - "Cover", -}; - -char *_dlog_fname[16] = -{ - "alloc", - "rd", - "wr", - "wrsync", - "flshlist", - "flshent", - "rdio", - "flea", - "flstea", - "wrsyea", -}; - -#endif /* _ST_NAMES */ -#ifdef _KERNEL - -extern int _sd_trace_mask; - -extern void _sdbc_tr_unload(void); -extern int _sdbc_tr_load(void); -extern int _sdbc_tr_configure(int cd); -extern void _sdbc_tr_deconfigure(void); -extern int _sd_adump(void *args, int *rvp); -extern void _sd_alert(int f, int cd, int len, nsc_off_t fba, int flg, int ret); -extern void _sd_trace(int f, int cd, int len, nsc_off_t fba, int flg, - int ret); -#endif /* _KERNEL */ - -#ifdef __cplusplus -} -#endif - -#endif /* _SD_TRACE_H */ diff --git a/usr/src/uts/common/avs/ns/sdbc/sdbc.conf b/usr/src/uts/common/avs/ns/sdbc/sdbc.conf deleted file mode 100644 index ce2e87d523..0000000000 --- a/usr/src/uts/common/avs/ns/sdbc/sdbc.conf +++ /dev/null @@ -1,35 +0,0 @@ -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License (the "License"). -# You may not use this file except in compliance with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -# Copyright 2008 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# -# -# SDBC Solaris configuration properties -# -name="sdbc" parent="pseudo" instance=0; - -# Threshold for large writes above which sdbc will force write through -# mode; specified as a number of 512-byte blocks (default 64). -# This value only takes affect when NVRAM is present. -sdbc_wrthru_thresh=64; - -# Reduce to 256 if IDE disks are to be used, cannot exceed 1024 -sdbc_max_fbas=1024; diff --git a/usr/src/uts/common/avs/ns/sdbc/sdbc_ioctl.h b/usr/src/uts/common/avs/ns/sdbc/sdbc_ioctl.h deleted file mode 100644 index 33a512826e..0000000000 --- a/usr/src/uts/common/avs/ns/sdbc/sdbc_ioctl.h +++ /dev/null @@ -1,296 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - - -#ifndef _SDBC_IOCTL_H -#define _SDBC_IOCTL_H - -#ifdef __cplusplus -extern "C" { -#endif - -#include <sys/unistat/spcs_s.h> /* included for unistat */ - -/* - * Generic sdbc ioctl arguments structure. - * Individual ioctl's will use 0-n of these arguments. - * - * Each sdbc ioctl is described first by the command number - * e.g. #define SDBC_ADUMP _SDBC_(4) - * - * Followed by a description of each argument (if any). - * Each argument is on a single line. - * - */ - -typedef struct _sdbc_ioctl_s { - long arg0; - long arg1; - long arg2; - long arg3; - long arg4; - long magic; - spcs_s_info_t sdbc_ustatus; - long pad[1]; -} _sdbc_ioctl_t; - -typedef struct _sdbc_ioctl32_s { - int32_t arg0; - int32_t arg1; - int32_t arg2; - int32_t arg3; - int32_t arg4; - int32_t magic; - spcs_s_info32_t sdbc_ustatus; - int32_t pad[1]; -} _sdbc_ioctl32_t; - -/* - * Ioctl command numbers - */ - -#define _SDBC_(x) (('B'<<16)|('C'<<8)|(x)) - -/* - * Old ioctl commands prior to ioctl reorg. These could be re-used - * at a later date - */ -#define SDBC_UNUSED_1 _SDBC_(1) /* OLD out of date syscall -> ioctl stuff */ -#define SDBC_UNUSED_2 _SDBC_(2) /* OLD INFSD_CONC_WRITE */ -#define SDBC_UNUSED_3 _SDBC_(3) /* OLD muli-subopcode configuration */ - -#define SDBC_ADUMP _SDBC_(4) -/* - * int cd; - * _sdtr_table * table; - * _sdtr_t * trace_buffer; - * int size_of_trace_buffer; - * int flags; - */ - -#define SDBC_TEST_INIT _SDBC_(5) /* TESTING - tdaemon parameters */ -/* - * char * device_name; - * int index; - * int len; - * int track_size; - * int flags; - */ - -#define SDBC_TEST_START _SDBC_(6) /* TESTING - tdaemon .... */ -/* - * int num; - * int type; - * int loops; - * int from; - * int seed; - */ - -#define SDBC_TEST_END _SDBC_(7) /* TESTING - tdaemon .... */ -/* NO-ARGS */ - -#define SDBC_ENABLE _SDBC_(8) /* configure sdbc */ -/* - * _sd_cache_param_t * user_configuration; - */ - -#define SDBC_DISABLE _SDBC_(9) /* deconfigure sdbc */ -/* NO-ARGS */ - -#define SDBC_SET_CD_HINT _SDBC_(10) -/* - * int cd; - * int hint; - * int flags; - */ - -#define SDBC_GET_CD_HINT _SDBC_(11) -/* - * int cd; - */ - -#define SDBC_SET_NODE_HINT _SDBC_(12) -/* - * int hint; - * int flags; - */ - -#define SDBC_GET_NODE_HINT _SDBC_(13) -/* NO-ARGS */ - -#define SDBC_STATS _SDBC_(14) -/* - * _sd_stats_t * stats buffer; - */ - -#define SDBC_ZAP_STATS _SDBC_(15) -/* NO-ARGS */ - -#define SDBC_GET_CD_BLK _SDBC_(16) -/* - * int cd; - * nsc_off_t * block_number; - * void * addresses[5]; - */ - -#define SDBC_GET_CLUSTER_SIZE _SDBC_(17) -/* - * int * cluster_size; - */ - -#define SDBC_GET_CLUSTER_DATA _SDBC_(18) -/* - * char * buffer[2*cluster_size]; - */ - -#define SDBC_GET_GLMUL_SIZES _SDBC_(19) -/* - * int * global_sizes; - */ - -#define SDBC_GET_GLMUL_INFO _SDBC_(20) -/* - * char * buffer[ 2 times sum of global_sizes]; - */ - -/* Unused _SDBC(21,22) */ - -#define SDBC_STATE_DEV _SDBC_(23) /* set path to sdbc state file/volume */ -/* - * char * device_name; - * int device_name_length; - */ -#define SDBC_TOGGLE_FLUSH _SDBC_(24) /* TESTING - toggle flusher enable */ - /* NO-ARGS */ - -#define SDBC_INJ_IOERR _SDBC_(25) /* TESTING - inject i/o error */ -/* - * int cd - * int io_error_number; - */ - -#define SDBC_CLR_IOERR _SDBC_(26) /* TESTING - clear injected i/o error */ -/* - * int cd - */ - -#define SDBC_GET_CONFIG _SDBC_(27) /* retrieve current configuration */ -/* - * _sdbc_config_t *current_config; - */ - -#define SDBC_SET_CONFIG _SDBC_(28) /* enable cache configuration info */ -/* - * _sdbc_config_t *mgmt_config_info; - */ - -/* Unused _SDBC(29) */ - -#define SDBC_MAXFILES _SDBC_(30) /* get maxfiles */ -/* - * int * max_files; - */ - -#define SDBC_VERSION _SDBC_(31) -/* - * cache_version_t *cache_version; - */ - -#define _SD_MAGIC 0xD017 - -#define MAX_CACHE_NET 4 -#define MAX_REMOTE_MIRRORS 64 -#define MAX_MIR_SEGS MAX_REMOTE_MIRRORS -#define MAX_CACHE_SIZE 1024 - -/* unexposed configuration bits */ -#define CFG_USE_DMCHAIN 0x1 -#define CFG_STATIC_CACHE 0x2 - -#define RESERVED1_DEFAULTS (CFG_STATIC_CACHE) - -/* maintain _sd_cache_param struct layout (MAX_CACHE_NET is deprecated) */ -#define CACHE_MEM_PAD 4 - -typedef struct _sd_cache_param { - int mirror_host; - int blk_size; - int threads; - int procs; - int test_demons; - int write_cache; - int trace_size; - int trace_mask; - int trace_lbolt; - int trace_good; - int trace_net; /* not used */ - int iobuf; - int num_handles; - int cache_mem[CACHE_MEM_PAD]; - int prot_lru; - int gen_pattern; - uint_t fill_pattern; - short nodes_conf[MAX_REMOTE_MIRRORS]; /* Actual Nodes in conf file */ - short num_nodes; /* Number of nodes in sd.cf */ - short net_type; /* not used */ - ushort_t magic; /* Check for proper sd_cadmin */ - int reserved1; /* unexposed config options */ - int reserved[8]; -} _sd_cache_param_t; - -typedef struct _sdbc_config { - int cache_mem[CACHE_MEM_PAD]; - int threads; - int enabled; - ushort_t magic; -} _sdbc_config_t; - -typedef struct cache_version { - int major; /* Major release number */ - int minor; /* Minor release number */ - int micro; /* Micro release number */ - int baseline; /* Baseline revison number */ -} cache_version_t; - -#if !defined(_KERNEL) - - -/* Keep this definition in sync with the one in rdc_ioctl.h. */ -#ifndef SDBC_IOCTL -#define SDBC_IOCTL(cmd, a0, a1, a2, a3, a4, ustatus) \ - sdbc_ioctl((long)(cmd), (long)(a0), (long)(a1), (long)(a2), \ - (long)(a3), (long)(a4), (spcs_s_info_t *)(ustatus)) -#endif - -int sdbc_ioctl(long, long, long, long, long, long, spcs_s_info_t *); - - -#endif /* ! _KERNEL */ - -#ifdef __cplusplus -} -#endif - -#endif /* _SDBC_IOCTL_H */ diff --git a/usr/src/uts/common/avs/ns/solaris/Makefile b/usr/src/uts/common/avs/ns/solaris/Makefile deleted file mode 100644 index d891bb36b9..0000000000 --- a/usr/src/uts/common/avs/ns/solaris/Makefile +++ /dev/null @@ -1,50 +0,0 @@ -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License (the "License"). -# You may not use this file except in compliance with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -# Copyright 2008 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# -# include global definitions -include ../../../../../Makefile.master - -HDRS= nsc_ddi.h \ - nskernd.h \ - nsc_thread.h - -ROOTDIR= $(ROOT)/usr/include/sys - -ROOTHDRS= $(HDRS:%=$(ROOTDIR)/%) - -# install rules -$(ROOTDIR)/%: % - $(INS.file) - -CHECKHDRS= $(HDRS:%.h=%.check) - -.KEEP_STATE: - -.PARALLEL: $(CHECKHDRS) - -install_h: $(ROOTDIR) $(ROOTHDRS) - -$(ROOTDIR): - $(INS.dir) - -check: $(CHECKHDRS) diff --git a/usr/src/uts/common/avs/ns/solaris/nsc_ddi.c b/usr/src/uts/common/avs/ns/solaris/nsc_ddi.c deleted file mode 100644 index d8bc957670..0000000000 --- a/usr/src/uts/common/avs/ns/solaris/nsc_ddi.c +++ /dev/null @@ -1,389 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -/* - * This file contains interface code to make the kernel look it has - * an svr4.2 ddi/ddk. It also adds a little other system dependent - * functionality that is useful for drivers lower than nsctl. - */ - -#include <sys/types.h> -#ifndef DS_DDICT -#include <sys/time.h> /* only DDI compliant as of 5.9 */ -#endif -#include <sys/param.h> -#include <sys/errno.h> -#include <sys/kmem.h> -#include <sys/ksynch.h> -#include <sys/cmn_err.h> -#include <sys/uio.h> -#include <sys/conf.h> -#include <sys/modctl.h> -#ifndef DS_DDICT -#include <sys/vnode.h> -#endif -#include <sys/open.h> -#include <sys/ddi.h> - -#include "nsc_thread.h" - -#ifdef DS_DDICT -#include <sys/nsctl/contract.h> -#endif - -#include <sys/nsctl/nsctl.h> -#include <sys/nsctl/nsvers.h> -#include "nskernd.h" -#include "nsc_list.h" - -kmutex_t _nskern_lock; - -void _nsc_stop_proc(void); -void _nsc_start_proc(void); - - -/* - * Solaris specific driver module interface code. - */ - -static struct cb_ops nskern_cb_ops = { - nulldev, /* open */ - nulldev, /* close */ - nodev, /* strategy */ - nodev, /* print */ - nodev, /* dump */ - nodev, /* read */ - nodev, /* write */ - nodev, /* ioctl */ - nodev, /* devmap routine */ - nodev, /* mmap routine */ - nodev, /* segmap */ - nochpoll, /* chpoll */ - ddi_prop_op, - 0, /* not a STREAMS driver, no cb_str routine */ - D_NEW | D_MP | D_64BIT, /* safe for multi-thread/multi-processor */ - CB_REV, - nodev, /* aread */ - nodev, /* awrite */ -}; - -static int _nskern_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **); -static int _nskern_attach(dev_info_t *, ddi_attach_cmd_t); -static int _nskern_detach(dev_info_t *, ddi_detach_cmd_t); - -static struct dev_ops nskern_ops = { - DEVO_REV, /* Driver build version */ - 0, /* device reference count */ - _nskern_getinfo, - nulldev, /* identify */ - nulldev, /* probe */ - _nskern_attach, - _nskern_detach, - nodev, /* reset */ - &nskern_cb_ops, - (struct bus_ops *)NULL -}; - -static struct modldrv nskern_ldrv = { - &mod_driverops, - "nws:Kernel Interface:" ISS_VERSION_STR, - &nskern_ops -}; - -static dev_info_t *nskern_dip; - -static struct modlinkage nskern_modlinkage = { - MODREV_1, - &nskern_ldrv, - NULL -}; - -/* - * Solaris module load time code - */ - -int -_init(void) -{ - void nskern_init(); - int err; - - mutex_init(&_nskern_lock, NULL, MUTEX_DRIVER, NULL); - - err = mod_install(&nskern_modlinkage); - if (err) { - mutex_destroy(&_nskern_lock); - cmn_err(CE_WARN, "nskern_init: mod_install err %d", err); - return (err); - } - - nskern_init(); - - return (DDI_SUCCESS); -} - -/* - * Solaris module unload time code - */ - -int -_fini(void) -{ - int err; - - if ((err = mod_remove(&nskern_modlinkage)) == 0) { - nskernd_stop(); - _nsc_stop_proc(); - nskernd_deinit(); - - mutex_destroy(&_nskern_lock); - } - - return (err); -} - -int -_info(struct modinfo *modinfop) -{ - return (mod_info(&nskern_modlinkage, modinfop)); -} - -/* - * Attach an instance of the device. This happens before an open - * can succeed. - */ - -static int -_nskern_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) -{ - if (cmd == DDI_ATTACH) { - nskern_dip = dip; - return (DDI_SUCCESS); - } else { - return (DDI_FAILURE); - } -} - -/* ARGSUSED */ - -static int -_nskern_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) -{ - if (cmd == DDI_DETACH) { - nskern_dip = NULL; - return (DDI_SUCCESS); - } else { - return (DDI_FAILURE); - } -} - -/* ARGSUSED */ -static int -_nskern_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result) -{ - int rc = DDI_FAILURE; - - switch (cmd) { - case DDI_INFO_DEVT2DEVINFO: - *result = nskern_dip; - rc = DDI_SUCCESS; - break; - - case DDI_INFO_DEVT2INSTANCE: - /* single instance */ - *result = 0; - rc = DDI_SUCCESS; - break; - } - - return (rc); -} - -/* ARGSUSED */ - -int -_nskern_print(dev_t dev, char *s) -{ - cmn_err(CE_WARN, "nskern:%s", s); - return (0); -} - -/* - * nskern_init - initialize the nskern layer at module load time. - */ - -void -nskern_init(void) -{ - _nsc_start_proc(); - nskernd_init(); - - (void) nst_startup(); -} - - -#if (defined(DS_DDICT)) -static clock_t -nskern_lbolt(void) -{ -#ifdef _SunOS_5_6 - clock_t lbolt; - - if (drv_getparm(LBOLT, &lbolt) == 0) - return (lbolt); - - return (0); -#else - return (ddi_get_lbolt()); -#endif -} -#endif /* ddict */ - - -/* - * nsc_usec() - * - return the value of the "microsecond timer emulation". - * - * Pre-SunOS 5.9: - * Actually this is a fake free running counter based on the lbolt value. - * - * SunOS 5.9+ - * This is based on the gethrtime(9f) DDI facility. - */ - -#if (defined(DS_DDICT)) -/* these two #defines need to match! */ -#define USEC_SHIFT 16 -#define INCR_TYPE uint16_t -#endif /* ! _SunOS_5_9+ */ - -clock_t -nsc_usec(void) -{ - /* avoid divide by zero */ - return (gethrtime() / 1000); -} - - -/* - * nsc_yield - yield the cpu. - */ -void -nsc_yield(void) -{ - /* can't call yield() unless there is an lwp context */ - /* do this for now */ - - delay(2); -} - - -/* - * void - * ls_ins_before(ls_elt_t *, ls_elt_t *) - * Link new into list before old. - * - * Calling/Exit State: - * None. - */ -#ifdef lint -void -nsc_ddi_ls_ins_before(ls_elt_t *old, ls_elt_t *new) -#else -void -ls_ins_before(ls_elt_t *old, ls_elt_t *new) -#endif -{ - new->ls_prev = old->ls_prev; - new->ls_next = old; - new->ls_prev->ls_next = new; - new->ls_next->ls_prev = new; -} - -/* - * void - * ls_ins_after(ls_elt_t *, ls_elt_t *) - * Link new into list after old. - * - * Calling/Exit State: - * None. - */ -#ifdef lint -void -nsc_ddi_ls_ins_after(ls_elt_t *old, ls_elt_t *new) -#else -void -ls_ins_after(ls_elt_t *old, ls_elt_t *new) -#endif -{ - new->ls_next = old->ls_next; - new->ls_prev = old; - new->ls_next->ls_prev = new; - new->ls_prev->ls_next = new; -} - -/* - * ls_elt_t * - * ls_remque(ls_elt_t *) - * Unlink first element in the specified list. - * - * Calling/Exit State: - * Returns the element's address or 0 if list is empty. - * Resets elements pointers to empty list state. - */ -ls_elt_t * -ls_remque(ls_elt_t *p) -{ - ls_elt_t *result = 0; - - if (!LS_ISEMPTY(p)) { - result = p->ls_next; - result->ls_prev->ls_next = result->ls_next; - result->ls_next->ls_prev = result->ls_prev; - LS_INIT(result); - } - return (result); -} - -/* - * void - * ls_remove(ls_elt_t *) - * Unlink donated element for list. - * - * Calling/Exit State: - * Resets elements pointers to empty list state. - */ -#ifdef lint -void -nsc_ddi_ls_remove(ls_elt_t *p) -#else -void -ls_remove(ls_elt_t *p) -#endif -{ - p->ls_prev->ls_next = p->ls_next; - p->ls_next->ls_prev = p->ls_prev; - LS_INIT(p); -} diff --git a/usr/src/uts/common/avs/ns/solaris/nsc_ddi.h b/usr/src/uts/common/avs/ns/solaris/nsc_ddi.h deleted file mode 100644 index 41172e2b92..0000000000 --- a/usr/src/uts/common/avs/ns/solaris/nsc_ddi.h +++ /dev/null @@ -1,70 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _NSC_DDI_H -#define _NSC_DDI_H - -#ifdef __cplusplus -extern "C" { -#endif - -/* - * before we redefine our thread calls we must be sure that solaris has its - * thread stuff defined else we'll redefine it also. - */ - -#include <sys/stat.h> /* for S_IFCHR and friends */ -#include <sys/ddi.h> -#include <sys/sunddi.h> - -#ifdef _KERNEL - -/* - * Misc - */ - -typedef caddr_t vaddr_t; - -#ifndef _BLIND_T -#define _BLIND_T -typedef void * blind_t; -#endif /* _BLIND_T */ - -typedef int (*blindfn_t)(); -typedef uintptr_t mc_io_addr_t; - -/* - * You would think that sys/ddi.h would define these, as they are in the ddi. - */ -extern int copyout(const void *, void *, size_t); -extern int copyin(const void *, void *, size_t); - -#endif /* _KERNEL */ - -#ifdef __cplusplus -} -#endif - -#endif /* _NSC_DDI_H */ diff --git a/usr/src/uts/common/avs/ns/solaris/nsc_list.c b/usr/src/uts/common/avs/ns/solaris/nsc_list.c deleted file mode 100644 index 07a3dfff34..0000000000 --- a/usr/src/uts/common/avs/ns/solaris/nsc_list.c +++ /dev/null @@ -1,85 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -/* - * Generic lists - * Lists are circular, doubly-linked, with headers. - * When a list is empty, both pointers in the header - * point to the header itself. - */ - -#include "nsc_list.h" -/* - * void - * ls_remove(ls_elt_t *) - * Unlink donated element for list. - * - * Calling/Exit State: - * Resets elements pointers to empty list state. - */ -void -ls_remove(ls_elt_t *p) -{ - p->ls_prev->ls_next = p->ls_next; - p->ls_next->ls_prev = p->ls_prev; - LS_INIT(p); -} -/* - * void - * ls_ins_after(ls_elt_t *, ls_elt_t *) - * - * Link new into list after old. - * - * Calling/Exit State: - * - * None. - */ -void -ls_ins_after(ls_elt_t *old, ls_elt_t *new) -{ - new->ls_next = old->ls_next; - new->ls_prev = old; - new->ls_next->ls_prev = new; - new->ls_prev->ls_next = new; -} - - -/* - * void - * ls_ins_before(ls_elt_t *, ls_elt_t *) - * Link new into list after old. - * - * Calling/Exit State: - * - * None. - */ -void -ls_ins_before(ls_elt_t *old, ls_elt_t *new) -{ - new->ls_prev = old->ls_prev; - new->ls_next = old; - new->ls_prev->ls_next = new; - new->ls_next->ls_prev = new; -} diff --git a/usr/src/uts/common/avs/ns/solaris/nsc_list.h b/usr/src/uts/common/avs/ns/solaris/nsc_list.h deleted file mode 100644 index 771053c5cf..0000000000 --- a/usr/src/uts/common/avs/ns/solaris/nsc_list.h +++ /dev/null @@ -1,119 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _NSC_LIST_H -#define _NSC_LIST_H - -#ifdef __cplusplus -extern "C" { -#endif - -/* - * Generic lists support. - */ - - -/* - * Lists are circular and doubly-linked, with headers. - * When a list is empty, both pointers in the header - * point to the header itself. - */ - -#if defined(_KERNEL) || defined(_KMEMUSER) - -/* list element */ -typedef struct ls_elt { - struct ls_elt *ls_next; - struct ls_elt *ls_prev; -} ls_elt_t; - -#endif /* _KERNEL || _KMEMUSER */ - -#ifdef _KERNEL - -/* - * All take as arguments side effect-free pointers to list structures - */ -#define LS_ISEMPTY(listp) \ - (((ls_elt_t *)(listp))->ls_next == (ls_elt_t *)(listp)) -#define LS_INIT(listp) { \ - ((ls_elt_t *)(listp))->ls_next = \ - ((ls_elt_t *)(listp))->ls_prev = \ - ((ls_elt_t *)(listp)); \ -} - -#define LS_REMOVE(listp) ls_remove((ls_elt_t *)(listp)) - -/* - * For these five, ptrs are to list elements, but qp and stackp are - * implicitly headers. - */ -#define LS_INS_BEFORE(oldp, newp) \ - ls_ins_before((ls_elt_t *)(oldp), (ls_elt_t *)(newp)) - -#define LS_INS_AFTER(oldp, newp) \ - ls_ins_after((ls_elt_t *)(oldp), (ls_elt_t *)(newp)) - -#define LS_INSQUE(qp, eltp) \ - ls_ins_before((ls_elt_t *)(qp), (ls_elt_t *)(eltp)) - -/* result needs cast; 0 result if empty queue */ -#define LS_REMQUE(qp) ls_remque((ls_elt_t *)(qp)) - -#define LS_PUSH(stackp, newp) \ - ls_ins_after((ls_elt_t *)(stackp), (ls_elt_t *)(newp)) - -/* result needs cast; 0 result if empty stack */ -#define LS_POP(stackp) ls_remque((ls_elt_t *)(stackp)) - -/* public function declarations */ -void ls_ins_before(ls_elt_t *, ls_elt_t *); -void ls_ins_after(ls_elt_t *, ls_elt_t *); -ls_elt_t *ls_remque(ls_elt_t *); -void ls_remove(ls_elt_t *); - -#endif /* _KERNEL */ - -#if defined(_KERNEL) || defined(_KMEMUSER) - -typedef struct llist { - struct llist *volatile flink; /* forward link */ - struct llist *volatile rlink; /* reverse link */ -} llist_t; - -#endif /* _KERNEL || _KMEMUSER */ - -#ifdef _KERNEL - -#define INITQUE(l) ((l)->flink = (l)->rlink = (l)) -#define EMPTYQUE(l) ((l)->flink == (l)) - -#endif /* _KERNEL */ - -#ifdef __cplusplus -} -#endif - -#endif /* _NSC_LIST_H */ diff --git a/usr/src/uts/common/avs/ns/solaris/nsc_proc.c b/usr/src/uts/common/avs/ns/solaris/nsc_proc.c deleted file mode 100644 index 317f1871f5..0000000000 --- a/usr/src/uts/common/avs/ns/solaris/nsc_proc.c +++ /dev/null @@ -1,382 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#include <sys/types.h> -#include <sys/ksynch.h> -#include <sys/errno.h> -#include <sys/cmn_err.h> -#include <sys/conf.h> -#include <sys/kmem.h> -#include <sys/ddi.h> - -#define __NSC_GEN__ -#include <sys/nsctl/nsc_rmspin.h> -#include "../nsctl.h" -#include "nskernd.h" - -struct nsc_nlwp { - struct nsc_nlwp *next; - void (*fn)(void *); - void *arg; - volatile int ready; - int errno; - kcondvar_t child_cv; -}; - -kmutex_t nsc_proc_lock; -kcondvar_t nsc_proc_cv; - -static struct nsc_nlwp *nsc_nlwp_top; - -void -_nsc_start_proc(void) -{ - mutex_init(&nsc_proc_lock, NULL, MUTEX_DRIVER, NULL); - cv_init(&nsc_proc_cv, NULL, CV_DRIVER, NULL); -} - - -void -_nsc_stop_proc(void) -{ - mutex_destroy(&nsc_proc_lock); - cv_destroy(&nsc_proc_cv); -} - - -/* - * Create a daemon (server) proc. - * - * If 'rt' is TRUE, then increase the scheduling priority of the lwp. - * Exactly how, if at all, this feature is implemented is at the - * discretion of nskernd. - * - * Returns 0 or errno. - */ - -int -nsc_create_process(void (*func)(void *), void *arg, boolean_t rt) -{ - struct nsc_nlwp *nlwp, **nlwpp; - struct nskernd *nsk = NULL; - int rc = 0; - - nlwp = kmem_zalloc(sizeof (*nlwp), KM_NOSLEEP); - nsk = kmem_zalloc(sizeof (*nsk), KM_NOSLEEP); - if (!nlwp || !nsk) { - if (nlwp) { - kmem_free(nlwp, sizeof (*nlwp)); - } - if (nsk) { - kmem_free(nsk, sizeof (*nsk)); - } - return (ENOMEM); - } - - nlwp->fn = func; - nlwp->arg = arg; - - mutex_enter(&nsc_proc_lock); - - nlwp->next = nsc_nlwp_top; - nsc_nlwp_top = nlwp; - - mutex_exit(&nsc_proc_lock); - - nsk->command = NSKERND_NEWLWP; - nsk->data1 = (uint64_t)(unsigned long)nlwp; - nsk->data2 = (uint64_t)rt; - - rc = nskernd_get(nsk); - - /* user level returns error in nsk->data1 */ - if (!rc && nsk->data1) - rc = nsk->data1; - - mutex_enter(&nsc_proc_lock); - - if (!rc) { - /* - * wait for the child to start and check in. - */ - - while (! nlwp->ready) { - cv_wait(&nsc_proc_cv, &nsc_proc_lock); - } - } - - /* - * remove from list of outstanding requests. - */ - - for (nlwpp = &nsc_nlwp_top; (*nlwpp); nlwpp = &((*nlwpp)->next)) { - if (*nlwpp == nlwp) { - *nlwpp = nlwp->next; - break; - } - } - - mutex_exit(&nsc_proc_lock); - - kmem_free(nlwp, sizeof (*nlwp)); - kmem_free(nsk, sizeof (*nsk)); - return (rc); -} - - -/* - * Child lwp calls this function when it returns to the kernel. - * - * Check if the args are still on the pending list. If they are, then - * run the required function. If they are not, then something went - * wrong, so just return back to userland and die. - */ -void -nsc_runlwp(uint64_t arg) -{ - struct nsc_nlwp *nlwp; - void (*fn)(void *); - void *fn_arg; - - fn_arg = NULL; - fn = NULL; - - mutex_enter(&nsc_proc_lock); - - /* - * check that the request is still on the list of work to do - */ - - for (nlwp = nsc_nlwp_top; nlwp; nlwp = nlwp->next) { - if (nlwp == (struct nsc_nlwp *)(unsigned long)arg) { - fn_arg = nlwp->arg; - fn = nlwp->fn; - - /* mark as ready */ - nlwp->ready = 1; - cv_broadcast(&nsc_proc_cv); - - break; - } - } - - mutex_exit(&nsc_proc_lock); - - if (fn) { - (*fn)(fn_arg); - } -} - - -/* - * Create a thread that acquires an inter-node lock. - * - * mode - 0 (read), 1 (write). - * lockp - used to return the opaque address of a sync structure, which - * must be passed to nsc_do_unlock() later. - * - * Returns 0 or errno. - */ - -int -nsc_do_lock(int mode, void **lockp) -{ - struct nsc_nlwp *nlwp = NULL, **nlwpp; - struct nskernd *nsk = NULL; - int rc = 0; - - nlwp = kmem_zalloc(sizeof (*nlwp), KM_NOSLEEP); - nsk = kmem_zalloc(sizeof (*nsk), KM_NOSLEEP); - if (!nlwp || !nsk) { - if (nlwp) { - kmem_free(nlwp, sizeof (*nlwp)); - } - if (nsk) { - kmem_free(nsk, sizeof (*nsk)); - } - return (ENOMEM); - } - - cv_init(&nlwp->child_cv, NULL, CV_DRIVER, NULL); - - mutex_enter(&nsc_proc_lock); - - nlwp->next = nsc_nlwp_top; - nsc_nlwp_top = nlwp; - - mutex_exit(&nsc_proc_lock); - - nsk->command = NSKERND_LOCK; - nsk->data1 = (uint64_t)(unsigned long)nlwp; - nsk->data2 = (uint64_t)mode; - - rc = nskernd_get(nsk); - - /* user level returns error in nsk->data1 */ - if (!rc && nsk->data1) - rc = nsk->data1; - - mutex_enter(&nsc_proc_lock); - - if (!rc) { - /* - * wait for the child to start and check in. - */ - - while (! nlwp->ready) { - cv_wait(&nsc_proc_cv, &nsc_proc_lock); - } - - /* retrieve errno from child's lock operation */ - rc = (int)nlwp->errno; - } - - if (rc) { - /* - * error - remove from list of outstanding requests as - * child will not be checking in (nskernd_get() failed - * or user thread create failed) or will not be waiting - * (child thread lock failure). - */ - - for (nlwpp = &nsc_nlwp_top; (*nlwpp); - nlwpp = &((*nlwpp)->next)) { - if (*nlwpp == nlwp) { - *nlwpp = nlwp->next; - break; - } - } - - mutex_exit(&nsc_proc_lock); - - cv_destroy(&nlwp->child_cv); - kmem_free(nlwp, sizeof (*nlwp)); - kmem_free(nsk, sizeof (*nsk)); - *lockp = NULL; - return (rc); - } - - /* success, return argument for nsc_do_unlock() */ - - mutex_exit(&nsc_proc_lock); - - kmem_free(nsk, sizeof (*nsk)); - *lockp = nlwp; - return (0); -} - - -void -nsc_do_unlock(void *arg) -{ - struct nsc_nlwp *nlwp; - - /* find child on work list */ - - mutex_enter(&nsc_proc_lock); - - for (nlwp = nsc_nlwp_top; nlwp; nlwp = nlwp->next) { - if (nlwp == (struct nsc_nlwp *)arg) { - /* signal unlock */ - nlwp->ready = 0; - cv_broadcast(&nlwp->child_cv); - } - } - - mutex_exit(&nsc_proc_lock); -} - - -/* - * Lock child thread calls this function when it returns to the kernel. - * - * Check if the args are still on the pending list. If they are, then - * post the lock results and wait for the unlock. If they are not, - * then something went wrong, so just return back to userland and die. - */ -void -nsc_lockchild(uint64_t arg, uint64_t errno) -{ - struct nsc_nlwp *nlwp, **nlwpp; - - if (!arg) { - return; - } - - mutex_enter(&nsc_proc_lock); - - /* - * check that the request is still on the list of work to do - */ - - for (nlwp = nsc_nlwp_top; nlwp; nlwp = nlwp->next) { - if (nlwp == (struct nsc_nlwp *)(unsigned long)arg) { - /* mark as ready */ - nlwp->errno = (int)errno; - nlwp->ready = 1; - cv_broadcast(&nsc_proc_cv); - break; - } - } - - if (!nlwp || errno) { - /* - * Error - either this request is no longer on the work - * queue, or there was an error in the userland lock code - * in which case the lock caller (currently blocked in - * nsc_do_lock() will do the cleanup. - */ - mutex_exit(&nsc_proc_lock); - return; - } - - /* - * no errors, so wait for an unlock - */ - - while (nlwp->ready) { - cv_wait(&nlwp->child_cv, &nsc_proc_lock); - } - - /* - * remove self from list of outstanding requests. - */ - - for (nlwpp = &nsc_nlwp_top; (*nlwpp); nlwpp = &((*nlwpp)->next)) { - if (*nlwpp == nlwp) { - *nlwpp = nlwp->next; - break; - } - } - - /* - * cleanup - */ - - cv_destroy(&nlwp->child_cv); - kmem_free(nlwp, sizeof (*nlwp)); - - mutex_exit(&nsc_proc_lock); -} diff --git a/usr/src/uts/common/avs/ns/solaris/nsc_raw.c b/usr/src/uts/common/avs/ns/solaris/nsc_raw.c deleted file mode 100644 index 171ac3ec07..0000000000 --- a/usr/src/uts/common/avs/ns/solaris/nsc_raw.c +++ /dev/null @@ -1,853 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#include <sys/types.h> -#include <sys/debug.h> -#include <sys/kmem.h> -#include <sys/ksynch.h> -#ifndef DS_DDICT -#include <sys/vnode.h> -#endif -#include <sys/cmn_err.h> -#include <sys/open.h> -#include <sys/file.h> -#include <sys/cred.h> -#include <sys/conf.h> -#include <sys/errno.h> -#include <sys/uio.h> -#ifndef DS_DDICT -#include <sys/pathname.h> /* for lookupname */ -#endif -#include <sys/ddi.h> -#include <sys/sunddi.h> -#include <sys/sunldi.h> - -#include <ns/solaris/nsc_thread.h> -#ifdef DS_DDICT -#include "../contract.h" -#endif -#include "../nsctl.h" -#include "nskernd.h" - - -typedef struct raw_maj { - struct raw_maj *next; - major_t major; - struct dev_ops *devops; - strategy_fn_t strategy; - int (*open)(dev_t *, int, int, cred_t *); - int (*close)(dev_t, int, int, cred_t *); - int (*ioctl)(dev_t, int, intptr_t, int, cred_t *, int *); -} raw_maj_t; - -typedef struct raw_dev { - ldi_handle_t lh; /* Solaris layered driver handle */ - struct vnode *vp; /* vnode of device */ - uint64_t size; /* size of device in blocks */ - raw_maj_t *major; /* pointer to major structure */ - char *path; /* pathname -- kmem_alloc'd */ - int plen; /* length of kmem_alloc for pathname */ - dev_t rdev; /* device number */ - char in_use; /* flag */ - int partition; /* partition number */ -} raw_dev_t; - -static int fd_hwm = 0; /* first never used entry in _nsc_raw_files */ - -static raw_dev_t *_nsc_raw_files; -static raw_maj_t *_nsc_raw_majors; - -kmutex_t _nsc_raw_lock; - -int _nsc_raw_flags = 0; /* required by nsctl */ -static int _nsc_raw_maxdevs; /* local copy */ - -static int _raw_strategy(struct buf *); /* forward decl */ - -static dev_t -ldi_get_dev_t_from_path(char *path) -{ - vnode_t *vp; - dev_t rdev; - - /* Validate parameters */ - if (path == NULL) - return (NULL); - - /* Lookup path */ - vp = NULL; - if (lookupname(path, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp)) - return (NULL); - - /* Validate resulting vnode */ - if ((vp) && (vp->v_type == VCHR)) - rdev = vp->v_rdev; - else - rdev = (dev_t)NULL; - - /* Release vnode */ - if (vp) - VN_RELE(vp); - - return (rdev); -} - -int -_nsc_init_raw(int maxdevs) -{ - _nsc_raw_files = - kmem_zalloc(sizeof (*_nsc_raw_files) * maxdevs, KM_SLEEP); - if (!_nsc_raw_files) - return (ENOMEM); - - _nsc_raw_maxdevs = maxdevs; - _nsc_raw_majors = NULL; - - mutex_init(&_nsc_raw_lock, NULL, MUTEX_DRIVER, NULL); - return (0); -} - - -void -_nsc_deinit_raw(void) -{ - raw_maj_t *maj = _nsc_raw_majors; - raw_maj_t *next; - - /* Free the memory allocated for strategy pointers */ - while (maj != NULL) { - next = maj->next; - kmem_free(maj, sizeof (*maj)); - maj = next; - } - - mutex_destroy(&_nsc_raw_lock); - kmem_free(_nsc_raw_files, sizeof (*_nsc_raw_files) * _nsc_raw_maxdevs); - _nsc_raw_files = NULL; - _nsc_raw_maxdevs = 0; -} - - -/* must be called with the _nsc_raw_lock held */ -static raw_maj_t * -_raw_get_maj_info(major_t umaj) -{ - raw_maj_t *maj = _nsc_raw_majors; - - ASSERT(MUTEX_HELD(&_nsc_raw_lock)); - - /* Walk through the linked list */ - while (maj != NULL) { - if (maj->major == umaj) { - /* Found major number */ - break; - } - maj = maj->next; - } - - if (maj == NULL) { - struct dev_ops *ops = NULL; -#ifdef DEBUG - const int maxtry = 5; - int try = maxtry; -#endif - - /* - * The earlier ldi_open call has locked the driver - * for this major number into memory, so just index into - * the devopsp array to get the dev_ops pointer which - * must be valid. - */ - - ops = devopsp[umaj]; - - if (ops == NULL || ops->devo_cb_ops == NULL) { - cmn_err(CE_WARN, - "nskern: cannot find dev_ops for major %d", umaj); - - return (NULL); - } - -#ifdef DEBUG - cmn_err(CE_NOTE, - "nsc_raw: held driver (%d) after %d attempts", - umaj, (maxtry - try)); -#endif /* DEBUG */ - - maj = kmem_zalloc(sizeof (raw_maj_t), KM_NOSLEEP); - if (!maj) { - return (NULL); - } - - maj->strategy = ops->devo_cb_ops->cb_strategy; - maj->ioctl = ops->devo_cb_ops->cb_ioctl; - maj->close = ops->devo_cb_ops->cb_close; - maj->open = ops->devo_cb_ops->cb_open; - maj->major = umaj; - maj->devops = ops; - - if (maj->strategy == NULL || - maj->strategy == nodev || - maj->strategy == nulldev) { - cmn_err(CE_WARN, - "nskern: no strategy function for " - "disk driver (major %d)", - umaj); - kmem_free(maj, sizeof (*maj)); - return (NULL); - } - - maj->next = _nsc_raw_majors; - _nsc_raw_majors = maj; - } - - return (maj); -} - - -/* - * nsc_get_strategy returns the strategy function associated with - * the major number umaj. NULL is returned if no strategy is found. - */ -strategy_fn_t -nsc_get_strategy(major_t umaj) -{ - raw_maj_t *maj; - strategy_fn_t strategy = NULL; - - mutex_enter(&_nsc_raw_lock); - - for (maj = _nsc_raw_majors; maj != NULL; maj = maj->next) { - if (maj->major == umaj) { - /* Found major number */ - strategy = maj->strategy; - break; - } - } - - mutex_exit(&_nsc_raw_lock); - - return (strategy); -} - - -void * -nsc_get_devops(major_t umaj) -{ - raw_maj_t *maj; - void *devops = NULL; - - mutex_enter(&_nsc_raw_lock); - - for (maj = _nsc_raw_majors; maj != NULL; maj = maj->next) { - if (maj->major == umaj) { - devops = maj->devops; - break; - } - } - - mutex_exit(&_nsc_raw_lock); - - return (devops); -} - - -/* - * _raw_open - * - * Multiple opens, single close. - */ - -/* ARGSUSED */ -static int -_raw_open(char *path, int flag, blind_t *cdp, void *iodev) -{ - struct cred *cred; - raw_dev_t *cdi = NULL; - char *spath; - dev_t rdev; - int rc, cd, the_cd; - int plen; - ldi_ident_t li; - - if (proc_nskernd == NULL) { - cmn_err(CE_WARN, "nskern: no nskernd daemon running!"); - return (ENXIO); - } - - if (_nsc_raw_maxdevs == 0) { - cmn_err(CE_WARN, "nskern: _raw_open() before _nsc_init_raw()!"); - return (ENXIO); - } - - plen = strlen(path) + 1; - spath = kmem_alloc(plen, KM_SLEEP); - if (spath == NULL) { - cmn_err(CE_WARN, - "nskern: unable to alloc memory in _raw_open()"); - return (ENOMEM); - } - - (void) strcpy(spath, path); - - /* - * Lookup the vnode to extract the dev_t info, - * then release the vnode. - */ - if ((rdev = ldi_get_dev_t_from_path(path)) == 0) { - kmem_free(spath, plen); - return (ENXIO); - } - - /* - * See if this device is already opened - */ - - the_cd = -1; - - mutex_enter(&_nsc_raw_lock); - - for (cd = 0, cdi = _nsc_raw_files; cd < fd_hwm; cd++, cdi++) { - if (rdev == cdi->rdev) { - the_cd = cd; - break; - } else if (the_cd == -1 && !cdi->in_use) - the_cd = cd; - } - - if (the_cd == -1) { - if (fd_hwm < _nsc_raw_maxdevs) - the_cd = fd_hwm++; - else { - mutex_exit(&_nsc_raw_lock); - cmn_err(CE_WARN, "_raw_open: too many open devices"); - kmem_free(spath, plen); - return (EIO); - } - } - - cdi = &_nsc_raw_files[the_cd]; - if (cdi->in_use) { - /* already set up - just return */ - mutex_exit(&_nsc_raw_lock); - *cdp = (blind_t)cdi->rdev; - kmem_free(spath, plen); - return (0); - } - - cdi->partition = -1; - cdi->size = (uint64_t)0; - cdi->rdev = rdev; - cdi->path = spath; - cdi->plen = plen; - - cred = ddi_get_cred(); - - /* - * Layered driver - * - * We use xxx_open_by_dev() since this guarantees that a - * specfs vnode is created and used, not a standard filesystem - * vnode. This is necessary since in a cluster PXFS will block - * vnode operations during switchovers, so we have to use the - * underlying specfs vnode not the PXFS vnode. - * - */ - - if ((rc = ldi_ident_from_dev(cdi->rdev, &li)) == 0) { - rc = ldi_open_by_dev(&cdi->rdev, - OTYP_BLK, FREAD|FWRITE, cred, &cdi->lh, li); - } - if (rc != 0) { - cdi->lh = NULL; - goto failed; - } - - /* - * grab the major_t related information - */ - - cdi->major = _raw_get_maj_info(getmajor(rdev)); - if (cdi->major == NULL) { - /* Out of memory */ - cmn_err(CE_WARN, - "_raw_open: cannot alloc major number structure"); - - rc = ENOMEM; - goto failed; - } - - *cdp = (blind_t)cdi->rdev; - cdi->in_use++; - - mutex_exit(&_nsc_raw_lock); - - return (rc); - -failed: - - if (cdi->lh) - (void) ldi_close(cdi->lh, FWRITE|FREAD, cred); - - bzero(cdi, sizeof (*cdi)); - - mutex_exit(&_nsc_raw_lock); - - kmem_free(spath, plen); - return (rc); -} - - -static int -__raw_get_cd(dev_t fd) -{ - int cd; - - if (_nsc_raw_maxdevs != 0) { - for (cd = 0; cd < fd_hwm; cd++) { - if (fd == _nsc_raw_files[cd].rdev) - return (cd); - } - } - - return (-1); -} - - -/* - * _raw_close - * - * Multiple opens, single close. - */ - -static int -_raw_close(dev_t fd) -{ - struct cred *cred; - raw_dev_t *cdi; - int rc; - int cd; - - mutex_enter(&_nsc_raw_lock); - - if ((cd = __raw_get_cd(fd)) == -1 || !_nsc_raw_files[cd].in_use) { - mutex_exit(&_nsc_raw_lock); - return (EIO); - } - - cdi = &_nsc_raw_files[cd]; - - cred = ddi_get_cred(); - - rc = ldi_close(cdi->lh, FREAD|FWRITE, cred); - if (rc != 0) { - mutex_exit(&_nsc_raw_lock); - return (rc); - } - - kmem_free(cdi->path, cdi->plen); - - bzero(cdi, sizeof (*cdi)); - - mutex_exit(&_nsc_raw_lock); - - return (0); -} - - -/* ARGSUSED */ -static int -_raw_uread(dev_t fd, uio_t *uiop, cred_t *crp) -{ - return (physio(_raw_strategy, 0, fd, B_READ, minphys, uiop)); -} - - -/* ARGSUSED */ -static int -_raw_uwrite(dev_t fd, uio_t *uiop, cred_t *crp) -{ - return (physio(_raw_strategy, 0, fd, B_WRITE, minphys, uiop)); -} - - -static int -_raw_strategy(struct buf *bp) -{ - int cd = __raw_get_cd(bp->b_edev); - - if (cd == -1 || _nsc_raw_files[cd].major == NULL) { - bioerror(bp, ENXIO); - biodone(bp); - return (NULL); - } - - return ((*_nsc_raw_files[cd].major->strategy)(bp)); -} - - -static int -_raw_partsize(dev_t fd, nsc_size_t *rvalp) -{ - int cd; - - if ((cd = __raw_get_cd(fd)) == -1 || !_nsc_raw_files[cd].in_use) - return (EIO); - - *rvalp = (nsc_size_t)_nsc_raw_files[cd].size; - return (0); -} - - -/* - * Return largest i/o size. - */ - -static nsc_size_t nsc_rawmaxfbas = 0; -/* ARGSUSED */ -static int -_raw_maxfbas(dev_t dev, int flag, nsc_size_t *ptr) -{ - struct buf *bp; - if (flag == NSC_CACHEBLK) - *ptr = 1; - else { - if (nsc_rawmaxfbas == 0) { - bp = getrbuf(KM_SLEEP); - bp->b_bcount = 4096 * 512; - minphys(bp); - nsc_rawmaxfbas = FBA_NUM(bp->b_bcount); - freerbuf(bp); - } - *ptr = nsc_rawmaxfbas; - } - return (0); -} - - -/* - * Control device or system. - */ - -/* ARGSUSED */ -static int -_raw_control(dev_t dev, int cmd, int *ptr) -{ -#ifdef DEBUG - cmn_err(CE_WARN, "unrecognised nsc_control: %x", cmd); -#endif - return (EINVAL); /* no control commands understood */ -} - - -static int -_raw_get_bsize(dev_t dev, uint64_t *bsizep, int *partitionp) -{ -#ifdef DKIOCPARTITION - struct partition64 *p64 = NULL; -#endif - struct dk_cinfo *dki_info = NULL; - struct dev_ops *ops; - struct cred *cred; - struct vtoc *vtoc = NULL; - dev_info_t *dip; - raw_dev_t *cdi; - int rc, cd; - int flags; - int rval; - - *partitionp = -1; - *bsizep = 0; - - if ((cd = __raw_get_cd(dev)) == -1 || !_nsc_raw_files[cd].in_use) - return (-1); - - cdi = &_nsc_raw_files[cd]; - ops = cdi->major->devops; - - if (ops == NULL) { - return (-1); - } - - rc = (*ops->devo_getinfo)(NULL, DDI_INFO_DEVT2DEVINFO, - (void *)dev, (void **)&dip); - - if (rc != DDI_SUCCESS || dip == NULL) { - return (-1); - } - - if (!ddi_prop_exists(DDI_DEV_T_ANY, dip, - DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, DDI_KERNEL_IOCTL)) { - return (-1); - } - - cred = ddi_get_cred(); - - flags = FKIOCTL | FREAD | FWRITE | DATAMODEL_NATIVE; - - dki_info = kmem_alloc(sizeof (*dki_info), KM_SLEEP); - - /* DKIOCINFO */ - rc = (*cdi->major->ioctl)(dev, DKIOCINFO, - (intptr_t)dki_info, flags, cred, &rval); - - if (rc != 0) { - goto out; - } - - /* return partition number */ - *partitionp = (int)dki_info->dki_partition; - - vtoc = kmem_alloc(sizeof (*vtoc), KM_SLEEP); - - /* DKIOCGVTOC */ - rc = (*cdi->major->ioctl)(dev, DKIOCGVTOC, - (intptr_t)vtoc, flags, cred, &rval); - - if (rc) { - /* DKIOCGVTOC failed, but there might be an EFI label */ - rc = -1; - -#ifdef DKIOCPARTITION - /* do we have an EFI partition table? */ - p64 = kmem_alloc(sizeof (*p64), KM_SLEEP); - p64->p_partno = (uint_t)*partitionp; - - /* DKIOCPARTITION */ - rc = (*cdi->major->ioctl)(dev, DKIOCPARTITION, - (intptr_t)p64, flags, cred, &rval); - - if (rc == 0) { - /* found EFI, return size */ - *bsizep = (uint64_t)p64->p_size; - } else { - /* both DKIOCGVTOC and DKIOCPARTITION failed - error */ - rc = -1; - } -#endif - - goto out; - } - - if ((vtoc->v_sanity != VTOC_SANE) || - (vtoc->v_version != V_VERSION && vtoc->v_version != 0) || - (dki_info->dki_partition > V_NUMPAR)) { - rc = -1; - goto out; - } - - *bsizep = (uint64_t)vtoc->v_part[(int)dki_info->dki_partition].p_size; - rc = 0; - -out: - if (dki_info) { - kmem_free(dki_info, sizeof (*dki_info)); - } - - if (vtoc) { - kmem_free(vtoc, sizeof (*vtoc)); - } - -#ifdef DKIOCPARTITION - if (p64) { - kmem_free(p64, sizeof (*p64)); - } -#endif - - return (rc); -} - - -/* - * Ugly, ugly, ugly. - * - * Some volume managers (Veritas) don't support layered ioctls - * (no FKIOCTL support, no DDI_KERNEL_IOCTL property defined) AND - * do not support the properties for bdev_Size()/bdev_size(). - * - * If the underlying driver has specified DDI_KERNEL_IOCTL, then we use - * the FKIOCTL technique. Otherwise ... - * - * The only reliable way to get the partition size, is to bounce the - * command through user land (nskernd). - * - * Then, SunCluster PXFS blocks access at the vnode level to device - * nodes during failover / switchover, so a read_vtoc() function call - * from user land deadlocks. So, we end up coming back into the kernel - * to go directly to the underlying device driver - that's what - * nskern_bsize() is doing below. - * - * There has to be a better way ... - */ - -static int -_raw_init_dev(dev_t fd, uint64_t *sizep, int *partitionp) -{ - struct nskernd *nsk; - int rc, cd; - - if ((cd = __raw_get_cd(fd)) == -1 || !_nsc_raw_files[cd].in_use) - return (EIO); - - /* try the in-kernel way */ - - rc = _raw_get_bsize(fd, sizep, partitionp); - if (rc == 0) { - return (0); - } - - /* fallback to the the slow way */ - - nsk = kmem_zalloc(sizeof (*nsk), KM_SLEEP); - nsk->command = NSKERND_BSIZE; - nsk->data1 = (uint64_t)0; - nsk->data2 = (uint64_t)fd; - (void) strncpy(nsk->char1, _nsc_raw_files[cd].path, NSC_MAXPATH); - - rc = nskernd_get(nsk); - if (rc == 0) { - *partitionp = (int)nsk->data2; - *sizep = nsk->data1; - } - - kmem_free(nsk, sizeof (*nsk)); - return (rc < 0 ? EIO : 0); -} - - -static int -_raw_attach_io(dev_t fd) -{ - int cd; - - if ((cd = __raw_get_cd(fd)) == -1 || !_nsc_raw_files[cd].in_use) - return (EIO); - - return (_raw_init_dev(fd, &_nsc_raw_files[cd].size, - &_nsc_raw_files[cd].partition)); -} - - -/* - * See the comment above _raw_init_dev(). - */ - -int -nskern_bsize(struct nscioc_bsize *bsize, int *rvp) -{ - struct cred *cred; - raw_dev_t *cdi; - int errno = 0; - int flag; - int cd; - - *rvp = 0; - - if (bsize == NULL || rvp == NULL) - return (EINVAL); - - cd = __raw_get_cd(bsize->raw_fd); - if (cd == -1 || !_nsc_raw_files[cd].in_use) - return (EIO); - - cdi = &_nsc_raw_files[cd]; - cred = ddi_get_cred(); - - /* - * ddi_mmap_get_model() returns the model for this user thread - * which is what we want - get_udatamodel() is not public. - */ - - flag = FREAD | FWRITE | ddi_mmap_get_model(); - - if (bsize->efi == 0) { - /* DKIOCINFO */ - errno = (*cdi->major->ioctl)(bsize->raw_fd, - DKIOCINFO, (intptr_t)bsize->dki_info, flag, cred, rvp); - - if (errno) { - return (errno); - } - - /* DKIOCGVTOC */ - errno = (*cdi->major->ioctl)(bsize->raw_fd, - DKIOCGVTOC, (intptr_t)bsize->vtoc, flag, cred, rvp); - - if (errno) { - return (errno); - } - } else { -#ifdef DKIOCPARTITION - /* do we have an EFI partition table? */ - errno = (*cdi->major->ioctl)(bsize->raw_fd, - DKIOCPARTITION, (intptr_t)bsize->p64, flag, cred, rvp); - - if (errno) { - return (errno); - } -#endif - } - - return (0); -} - - -/* - * Private function for sv to use. - */ -int -nskern_partition(dev_t fd, int *partitionp) -{ - uint64_t size; - int cd, rc; - - if ((cd = __raw_get_cd(fd)) == -1 || !_nsc_raw_files[cd].in_use) - return (EIO); - - if ((*partitionp = _nsc_raw_files[cd].partition) != -1) { - return (0); - } - - rc = _raw_init_dev(fd, &size, partitionp); - if (rc != 0 || *partitionp < 0) { - return (EIO); - } - - return (0); -} - - -nsc_def_t _nsc_raw_def[] = { - "Open", (uintptr_t)_raw_open, 0, - "Close", (uintptr_t)_raw_close, 0, - "Attach", (uintptr_t)_raw_attach_io, 0, - "UserRead", (uintptr_t)_raw_uread, 0, - "UserWrite", (uintptr_t)_raw_uwrite, 0, - "PartSize", (uintptr_t)_raw_partsize, 0, - "MaxFbas", (uintptr_t)_raw_maxfbas, 0, - "Control", (uintptr_t)_raw_control, 0, - "Provide", NSC_DEVICE, 0, - 0, 0, 0 -}; diff --git a/usr/src/uts/common/avs/ns/solaris/nsc_solaris.c b/usr/src/uts/common/avs/ns/solaris/nsc_solaris.c deleted file mode 100644 index e4894875a9..0000000000 --- a/usr/src/uts/common/avs/ns/solaris/nsc_solaris.c +++ /dev/null @@ -1,141 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -/* - * This file contains interface code to the kernel. - */ - -/* LINTLIBRARY */ - -#include <sys/types.h> -#include <sys/param.h> -#include <sys/errno.h> -#include <sys/kmem.h> -#include <sys/ksynch.h> -#include <sys/cmn_err.h> -#include <sys/uio.h> -#include <sys/conf.h> -#include <sys/ddi.h> - -#include <sys/nsctl/nsctl.h> -#include "nsc_list.h" - -/* - * _nsc_init_start - * - * ARGUMENTS: - * - * RETURNS: - * - * USAGE: - * - * CALLED BY: - */ -void -_nsc_init_start(void) -{ -} - -/* - * _nsc_init_os - - * - * ARGUMENTS: - * - * RETURNS: - * - * USAGE: - * - * CALLED BY: - */ -void -_nsc_init_os(void) -{ -} - -/* - * _nsc_deinit_os - - * - * ARGUMENTS: - * - * RETURNS: - * - * USAGE: - * - * CALLED BY: - */ -void -_nsc_deinit_os(void) -{ -} - -/* dummy routine unless RMS/MC is really running */ -void -_nsc_self_alive() -{ -} - -/* - * Check other nodes: checks for the heart_beat of other nodes and decides - * if a node that was up went down... or a node that was down is now - * up. Events NODE_UP and NODE_DOWN are posted to myself (this node) - * Any processing that happens in these event handlers SHOULD abide by - * the health monitor rules for the health monitor to work correctly. - * If excessive computation during these events is required, consider the - * possibility of forking of a process OR breaking up the computation into - * smaller parts, and making sure that we call "SELF_ALIVE()" "often". - * This is not the suggested mechanism, but there are times when we need it. - */ - -void -_nsc_check_other_nodes() -{ -} - -/* - * Is our partner active ? (Should never block) - */ -int -alternate_health_hbeat() -{ - return (0); -} - - -static int -mark_rm_pages_to_dump(addr, size, dump) -caddr_t addr; -int size, dump; -{ - return (0); -} - - -void -_nsc_mark_pages(caddr_t addr, size_t size, int dump) -{ - if (mark_rm_pages_to_dump(addr, (int)size, dump) < 0) - cmn_err(CE_WARN, "_nsc_mark_pages: %s failed - 0x%p size %d", - (dump ? "mark" : "unmark"), addr, (int)size); -} diff --git a/usr/src/uts/common/avs/ns/solaris/nsc_thread.c b/usr/src/uts/common/avs/ns/solaris/nsc_thread.c deleted file mode 100644 index d75cddf200..0000000000 --- a/usr/src/uts/common/avs/ns/solaris/nsc_thread.c +++ /dev/null @@ -1,1026 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#include <sys/types.h> -#include <sys/debug.h> -#include <sys/ksynch.h> -#include <sys/cmn_err.h> -#include <sys/kmem.h> -#include <sys/ddi.h> -#include <sys/errno.h> -#include "nsc_thread.h" - -#ifdef DS_DDICT -#include "../contract.h" -#endif - -#include "../nsctl.h" -#include "nskernd.h" -#include <sys/nsctl/nsctl.h> - -#include <sys/sdt.h> /* dtrace is S10 or later */ - - -/* - * Global data - */ -static nstset_t *nst_sets; -static nsthread_t *nst_pending; -static kmutex_t nst_global_lock; /* nst_sets, nst_pending */ - - -/* - * nst_kmem_xalloc - * - * Poll for memory. - */ -static void * -nst_kmem_xalloc(size_t size, int sec, void *(*alloc)(size_t, int)) -{ - clock_t usec = sec * 1000000; - void *p = NULL; - - while (usec > 0) { - if ((p = (*alloc)(size, KM_NOSLEEP)) != NULL) - return (p); - - delay(drv_usectohz((clock_t)NST_MEMORY_TIMEOUT)); - usec -= NST_MEMORY_TIMEOUT; - } - - cmn_err(CE_WARN, "!nst_kmem_xalloc: failed to alloc %ld bytes", size); - return (NULL); -} - - -#if 0 -/* currently unused */ -static void * -nst_kmem_alloc(size_t size, int sec) -{ - return (nst_kmem_xalloc(size, sec, kmem_alloc)); -} -#endif - - -static void * -nst_kmem_zalloc(size_t size, int sec) -{ - return (nst_kmem_xalloc(size, sec, kmem_zalloc)); -} - - -/* - * Queue stuff that should be in the DDI. - */ - -/* - * nst_insque - * - * Insert entryp after predp in a doubly linked list. - */ -static void -nst_insque(nst_q_t *entryp, nst_q_t *predp) -{ - entryp->q_back = predp; - entryp->q_forw = predp->q_forw; - predp->q_forw = entryp; - entryp->q_forw->q_back = entryp; -} -#ifndef DS_DDICT -#pragma inline(nst_insque) /* compiler hint to inline this function */ -#endif - - -/* - * nst_remque - * - * Remove entryp from a doubly linked list. - */ -static void -nst_remque(nst_q_t *entryp) -{ - entryp->q_back->q_forw = entryp->q_forw; - entryp->q_forw->q_back = entryp->q_back; - entryp->q_forw = entryp->q_back = NULL; -} -#ifndef DS_DDICT -#pragma inline(nst_remque) /* compiler hint to inline this function */ -#endif - - -/* - * nst_thread_init - * - * Initialise the dynamic part of a thread - */ -static void -nst_thread_init(nsthread_t *tp) -{ - ASSERT(MUTEX_HELD(&((tp->tp_set)->set_lock))); - ASSERT(!(tp->tp_flag & NST_TF_INUSE)); - tp->tp_flag = NST_TF_INUSE; - tp->tp_func = NULL; - tp->tp_arg = NULL; -} -#ifndef DS_DDICT -#pragma inline(nst_thread_init) /* compiler hint to inline this function */ -#endif - - -/* - * nst_thread_alloc - * - * Return an nsthread from the free pool, NULL if none - */ -static nsthread_t * -nst_thread_alloc(nstset_t *set, const int sleep) -{ - nsthread_t *tp = NULL; - - mutex_enter(&set->set_lock); - - if (set->set_flag & NST_SF_KILL) { - mutex_exit(&set->set_lock); - DTRACE_PROBE1(nst_thread_alloc_err_kill, nstset_t *, set); - return (NULL); - } - - do { - tp = (nsthread_t *)set->set_free.q_forw; - if (tp != (nsthread_t *)&set->set_free) - nst_remque(&tp->tp_link); - else { - tp = NULL; - - if (!sleep) - break; - - set->set_res_cnt++; - - DTRACE_PROBE2(nst_thread_alloc_sleep, nstset_t *, set, - int, set->set_res_cnt); - - cv_wait(&set->set_res_cv, &set->set_lock); - - DTRACE_PROBE1(nst_thread_alloc_wake, nstset_t *, set); - - set->set_res_cnt--; - - if (set->set_flag & NST_SF_KILL) - break; - } - } while (tp == NULL); - - /* initialise the thread */ - - if (tp != NULL) { - nst_thread_init(tp); - set->set_nlive++; - } - - mutex_exit(&set->set_lock); - - return (tp); -} - - -/* - * nst_thread_free - * - * Requeue a thread on the free or reuse pools. Threads are always - * queued to the tail of the list to prevent rapid recycling. - * - * Must be called with set->set_lock held. - */ -static void -nst_thread_free(nsthread_t *tp) -{ - nstset_t *set = tp->tp_set; - - if (!set) - return; - - ASSERT(MUTEX_HELD(&set->set_lock)); - - tp->tp_flag &= ~NST_TF_INUSE; - if (tp->tp_flag & NST_TF_DESTROY) { - /* add self to reuse pool */ - nst_insque(&tp->tp_link, set->set_reuse.q_back); - } else { - /* add self to free pool */ - nst_insque(&tp->tp_link, set->set_free.q_back); - if (set->set_res_cnt > 0) - cv_broadcast(&set->set_res_cv); - } -} - - -/* - * nst_thread_run - * - * The first function that a new thread runs on entry from user land. - * This is the main thread function that handles thread work and death. - */ -static void -nst_thread_run(void *arg) -{ - nsthread_t *tp; - nstset_t *set; - int first = 1; - - mutex_enter(&nst_global_lock); - - /* check if this thread is still on the pending list */ - - for (tp = nst_pending; tp; tp = tp->tp_chain) { - if (tp == (nsthread_t *)arg) { - break; - } - } - - if (!tp) { - mutex_exit(&nst_global_lock); - return; - } - - if (!tp->tp_set) { - mutex_exit(&nst_global_lock); -#ifdef DEBUG - cmn_err(CE_WARN, "!nst_thread_run(%p): already dead?", - (void *)tp); -#endif - return; - } - - /* check that the set is still on the list of sets */ - - for (set = nst_sets; set; set = set->set_next) { - if (set == tp->tp_set) { - break; - } - } - - if (!set) { - mutex_exit(&nst_global_lock); -#ifdef DEBUG - cmn_err(CE_WARN, "!nst_thread_run(%p): no set?", (void *)tp); -#endif - return; - } - - mutex_enter(&set->set_lock); - - mutex_exit(&nst_global_lock); - - /* - * Mark the parent. - * The parent won't actually run until set->set_lock is dropped. - */ - - tp->tp_flag &= ~NST_TF_PENDING; - cv_broadcast(&tp->tp_cv); - - /* - * Main loop. - */ - - while (!(set->set_flag & NST_SF_KILL) && - !(tp->tp_flag & NST_TF_KILL)) { - /* - * On initial entry the caller will add this thread to - * the free pool if required, there after the thread - * must do it for itself. - */ - - if (first) { - first = 0; - } else { - nst_thread_free(tp); - set->set_nlive--; - } - - DTRACE_PROBE1(nst_thread_run_sleep, nsthread_t *, tp); - - cv_wait(&tp->tp_cv, &set->set_lock); - - DTRACE_PROBE1(nst_thread_run_wake, nsthread_t *, tp); - - if ((set->set_flag & NST_SF_KILL) || - (tp->tp_flag & NST_TF_KILL)) { - break; - } - - mutex_exit(&set->set_lock); - - if (tp->tp_func) { - (*tp->tp_func)(tp->tp_arg); - tp->tp_func = 0; - tp->tp_arg = 0; - } -#ifdef DEBUG - else { - cmn_err(CE_WARN, - "!nst_thread_run(%p): NULL function pointer", - (void *)tp); - } -#endif - - mutex_enter(&set->set_lock); - } - - /* remove self from the free and/or reuse pools */ - if (tp->tp_link.q_forw != NULL || tp->tp_link.q_back != NULL) { - ASSERT(tp->tp_link.q_forw != NULL && - tp->tp_link.q_back != NULL); - nst_remque(&tp->tp_link); - } - - set->set_nthread--; - tp->tp_flag &= ~NST_TF_KILL; - - /* wake the context that is running nst_destroy() or nst_del_thread() */ - cv_broadcast(&set->set_kill_cv); - - mutex_exit(&set->set_lock); - - /* suicide */ -} - - -/* - * nst_thread_destroy - * - * Free up the kernel level resources. The thread must already be - * un-chained from the set, and the caller must not be the thread - * itself. - */ -static void -nst_thread_destroy(nsthread_t *tp) -{ - if (!tp) - return; - - ASSERT(tp->tp_chain == NULL); - - tp->tp_set = NULL; - - if (tp->tp_flag & NST_TF_INUSE) { - cmn_err(CE_WARN, "!nst_thread_destroy(%p): still in use!", - (void *)tp); - /* leak the thread */ - return; - } - - cv_destroy(&tp->tp_cv); - kmem_free(tp, sizeof (*tp)); -} - - -/* - * nst_thread_create - * - * Create and return a new thread from a threadset. - */ -static nsthread_t * -nst_thread_create(nstset_t *set) -{ - nsthread_t *tp, **tpp; - int rc; - - /* try and reuse a thread first */ - - if (set->set_reuse.q_forw != &set->set_reuse) { - mutex_enter(&set->set_lock); - - tp = (nsthread_t *)set->set_reuse.q_forw; - if (tp != (nsthread_t *)&set->set_reuse) - nst_remque(&tp->tp_link); - else - tp = NULL; - - mutex_exit(&set->set_lock); - - if (tp) { - DTRACE_PROBE2(nst_thread_create_end, nstset_t *, set, - nsthread_t *, tp); - return (tp); - } - } - - /* create a thread using nskernd */ - - tp = nst_kmem_zalloc(sizeof (*tp), 2); - if (!tp) { - DTRACE_PROBE1(nst_thread_create_err_mem, nstset_t *, set); - return (NULL); - } - - cv_init(&tp->tp_cv, NULL, CV_DRIVER, NULL); - tp->tp_flag = NST_TF_PENDING; - tp->tp_set = set; - - mutex_enter(&set->set_lock); - - if (set->set_flag & NST_SF_KILL) { - mutex_exit(&set->set_lock); - nst_thread_destroy(tp); -#ifdef DEBUG - cmn_err(CE_WARN, "!nst_thread_create: called during destroy"); -#endif - DTRACE_PROBE2(nst_thread_create_err_kill, nstset_t *, set, - nsthread_t *, tp); - return (NULL); - } - - set->set_pending++; - - mutex_exit(&set->set_lock); - - mutex_enter(&nst_global_lock); - - tp->tp_chain = nst_pending; - nst_pending = tp; - - mutex_exit(&nst_global_lock); - - DTRACE_PROBE2(nst_dbg_thr_create_proc_start, nstset_t *, set, - nsthread_t *, tp); - - rc = nsc_create_process(nst_thread_run, tp, 0); - - DTRACE_PROBE2(nst_dbg_thr_create_proc_end, nstset_t *, set, - nsthread_t *, tp); - - if (!rc) { - /* - * wait for child to start and check in. - */ - - mutex_enter(&set->set_lock); - - while (tp->tp_flag & NST_TF_PENDING) - cv_wait(&tp->tp_cv, &set->set_lock); - - mutex_exit(&set->set_lock); - } - - /* - * remove from pending chain. - */ - - mutex_enter(&nst_global_lock); - - for (tpp = &nst_pending; (*tpp); tpp = &((*tpp)->tp_chain)) { - if (*tpp == tp) { - *tpp = tp->tp_chain; - tp->tp_chain = NULL; - break; - } - } - - mutex_exit(&nst_global_lock); - - /* - * Check for errors and return if required. - */ - - mutex_enter(&set->set_lock); - - set->set_pending--; - - if (rc || - (set->set_flag & NST_SF_KILL) || - (set->set_nthread + 1) > USHRT_MAX) { - if (rc == 0) { - /* - * Thread is alive, and needs to be woken and killed. - */ - tp->tp_flag |= NST_TF_KILL; - cv_broadcast(&tp->tp_cv); - - while (tp->tp_flag & NST_TF_KILL) - cv_wait(&set->set_kill_cv, &set->set_lock); - } - mutex_exit(&set->set_lock); - - nst_thread_destroy(tp); -#ifdef DEBUG - cmn_err(CE_WARN, - "!nst_thread_create: error (rc %d, set_flag %x, " - "set_nthread %d)", rc, set->set_flag, set->set_nthread); -#endif - DTRACE_PROBE2(nst_thread_create_err_proc, nstset_t *, set, - nsthread_t *, tp); - - return (NULL); - } - - /* - * Move into set proper. - */ - - tp->tp_chain = set->set_chain; - set->set_chain = tp; - set->set_nthread++; - - mutex_exit(&set->set_lock); - - return (tp); -} - - -/* - * nst_create - * - * Start a new thread from a thread set, returning the - * address of the thread, or NULL on failure. - * - * All threads are created detached. - * - * Valid flag values: - * - * NST_CREATE - create a new thread rather than using one - * from the threadset. Once the thread - * completes it will not be added to the active - * portion of the threadset, but will be cached - * on the reuse chain, and so is available for - * subsequent NST_CREATE or nst_add_thread() - * operations. - * - * NST_SLEEP - wait for a thread to be available instead of - * returning NULL. Has no meaning with NST_CREATE. - * - * Returns a pointer to the new thread, or NULL. - */ -nsthread_t * -nst_create(nstset_t *set, void (*func)(), blind_t arg, int flags) -{ - nsthread_t *tp = NULL; - - if (!set) - return (NULL); - - if (set->set_flag & NST_SF_KILL) { - DTRACE_PROBE1(nst_create_err_kill, nstset_t *, set); - return (NULL); - } - - if (flags & NST_CREATE) { - /* get new thread */ - - if ((tp = nst_thread_create(set)) == NULL) - return (NULL); - - /* initialise the thread */ - - mutex_enter(&set->set_lock); - nst_thread_init(tp); - tp->tp_flag |= NST_TF_DESTROY; - set->set_nlive++; - mutex_exit(&set->set_lock); - } else { - if (!(tp = nst_thread_alloc(set, (flags & NST_SLEEP)))) - return (NULL); - } - - /* set thread running */ - - tp->tp_func = func; - tp->tp_arg = arg; - - mutex_enter(&set->set_lock); - cv_broadcast(&tp->tp_cv); - mutex_exit(&set->set_lock); - - return (tp); -} - - -/* - * nst_destroy - * - * Destroy a thread set created by nst_init(). It is the - * caller's responsibility to ensure that all prior thread - * calls have completed prior to this call and that the - * caller is not executing from within thread context. - */ -void -nst_destroy(nstset_t *set) -{ - nsthread_t *tp, *ntp; - nstset_t *sp, **spp; - - if (!set) - return; - - mutex_enter(&nst_global_lock); - - for (sp = nst_sets; sp; sp = sp->set_next) { - if (sp == set) { - break; - } - } - - if (!sp) { - mutex_exit(&nst_global_lock); -#ifdef DEBUG - cmn_err(CE_WARN, "!nst_destroy(%p): no set?", (void *)set); -#endif - DTRACE_PROBE1(nst_destroy_err_noset, nstset_t *, set); - return; - } - - mutex_enter(&set->set_lock); - - mutex_exit(&nst_global_lock); - - if (set->set_flag & NST_SF_KILL) { - /* - * Wait for a pending destroy to complete - */ - -#ifdef DEBUG - cmn_err(CE_WARN, - "!nst_destroy(%p): duplicate destroy of set", (void *)set); -#endif - - set->set_destroy_cnt++; - (void) cv_wait_sig(&set->set_destroy_cv, &set->set_lock); - set->set_destroy_cnt--; - - mutex_exit(&set->set_lock); - - DTRACE_PROBE1(nst_destroy_end, nstset_t *, set); - - return; - } - - set->set_flag |= NST_SF_KILL; - - /* Wake all threads in nst_create(NST_SLEEP) */ - cv_broadcast(&set->set_res_cv); - - /* - * Wake all the threads chained in the set. - */ - - for (tp = set->set_chain; tp; tp = tp->tp_chain) - cv_broadcast(&tp->tp_cv); - - /* Wait for the threads to exit */ - - while ((set->set_free.q_forw != &set->set_free) || - (set->set_reuse.q_forw != &set->set_reuse)) - cv_wait(&set->set_kill_cv, &set->set_lock); - - /* Unchain and destroy all the threads in the set */ - - tp = set->set_chain; - set->set_chain = 0; - - while (tp) { - ntp = tp->tp_chain; - tp->tp_chain = 0; - - nst_thread_destroy(tp); - - tp = ntp; - } - - mutex_exit(&set->set_lock); - - mutex_enter(&nst_global_lock); - - /* remove the set from the chain */ - - for (spp = &nst_sets; *spp; spp = &((*spp)->set_next)) { - if (*spp == set) { - *spp = set->set_next; - set->set_next = NULL; - break; - } - } - - mutex_exit(&nst_global_lock); - - mutex_enter(&set->set_lock); - -#ifdef DEBUG - if (set->set_nthread != 0) { - cmn_err(CE_WARN, "!nst_destroy(%p): nthread != 0 (%d)", - (void *)set, set->set_nthread); - } -#endif - - /* Allow any waiters (above) to continue */ - - cv_broadcast(&set->set_destroy_cv); - - while (set->set_destroy_cnt > 0 || set->set_pending > 0 || - set->set_res_cnt > 0) { - mutex_exit(&set->set_lock); - delay(drv_usectohz((clock_t)NST_KILL_TIMEOUT)); - mutex_enter(&set->set_lock); - } - - mutex_exit(&set->set_lock); - - if (set->set_nthread != 0) { - /* leak the set control structure */ - - DTRACE_PROBE1(nst_destroy_end, nstset_t *, set); - - return; - } - - cv_destroy(&set->set_res_cv); - cv_destroy(&set->set_kill_cv); - cv_destroy(&set->set_destroy_cv); - mutex_destroy(&set->set_lock); - kmem_free(set, sizeof (*set)); - -} - - -/* - * nst_add_thread - * - * Add more threads into an existing thread set. - * Returns the number successfully added. - */ -int -nst_add_thread(nstset_t *set, int nthread) -{ - nsthread_t *tp; - int i; - - if (!set || nthread < 1) { -#ifdef DEBUG - cmn_err(CE_WARN, - "!nst_add_thread(%p, %d) - bad args", (void *)set, nthread); -#endif - return (0); - } - - for (i = 0; i < nthread; i++) { - /* get new thread */ - - if ((tp = nst_thread_create(set)) == NULL) - break; - - /* add to free list */ - - mutex_enter(&set->set_lock); - nst_thread_free(tp); - mutex_exit(&set->set_lock); - } - - return (i); -} - - -/* - * nst_del_thread - * - * Removes threads from an existing thread set. - * Returns the number successfully removed. - */ -int -nst_del_thread(nstset_t *set, int nthread) -{ - nsthread_t **tpp, *tp; - int i; - - if (!set || nthread < 1) { -#ifdef DEBUG - cmn_err(CE_WARN, - "!nst_del_thread(%p, %d) - bad args", (void *)set, nthread); -#endif - return (0); - } - - for (i = 0; i < nthread; i++) { - /* get thread */ - - if (!(tp = nst_thread_alloc(set, FALSE))) - break; - - mutex_enter(&set->set_lock); - - /* unlink from the set */ - - for (tpp = &set->set_chain; *tpp; tpp = &(*tpp)->tp_chain) { - if (*tpp == tp) { - *tpp = tp->tp_chain; - tp->tp_chain = NULL; - break; - } - } - - /* kill the thread */ - - tp->tp_flag |= NST_TF_KILL; - tp->tp_flag &= ~NST_TF_INUSE; - cv_broadcast(&tp->tp_cv); - - /* wait for thread to exit */ - - while (tp->tp_flag & NST_TF_KILL) - cv_wait(&set->set_kill_cv, &set->set_lock); - - set->set_nlive--; - mutex_exit(&set->set_lock); - - /* free kernel resources */ - - nst_thread_destroy(tp); - } - - return (i); -} - - -/* - * nst_init - * - * Initialise a new nsthread set, returning its address or - * NULL in the event of failure. The set should be destroyed - * by calling nst_destroy(). - */ -nstset_t * -nst_init(char *name, int nthread) -{ - nstset_t *set, *sp; - int len, i; - - if (nthread < 1) { -#ifdef DEBUG - cmn_err(CE_WARN, "!nst_init: invalid arg"); -#endif - return (NULL); - } - - if (nthread > USHRT_MAX) { -#ifdef DEBUG - cmn_err(CE_WARN, "!nst_init: arg limit exceeded"); -#endif - return (NULL); - } - - if (!(set = nst_kmem_zalloc(sizeof (*set), 2))) - return (NULL); - - len = strlen(name); - if (len >= sizeof (set->set_name)) - len = sizeof (set->set_name) - 1; - - bcopy(name, set->set_name, len); - - mutex_init(&set->set_lock, NULL, MUTEX_DRIVER, NULL); - cv_init(&set->set_destroy_cv, NULL, CV_DRIVER, NULL); - cv_init(&set->set_kill_cv, NULL, CV_DRIVER, NULL); - cv_init(&set->set_res_cv, NULL, CV_DRIVER, NULL); - - set->set_reuse.q_forw = set->set_reuse.q_back = &set->set_reuse; - set->set_free.q_forw = set->set_free.q_back = &set->set_free; - - mutex_enter(&nst_global_lock); - - /* check for duplicates */ - - for (sp = nst_sets; sp; sp = sp->set_next) { - if (strcmp(sp->set_name, set->set_name) == 0) { - /* duplicate */ - mutex_exit(&nst_global_lock); - cv_destroy(&set->set_res_cv); - cv_destroy(&set->set_kill_cv); - cv_destroy(&set->set_destroy_cv); - mutex_destroy(&set->set_lock); - kmem_free(set, sizeof (*set)); -#ifdef DEBUG - cmn_err(CE_WARN, - "!nst_init: duplicate set \"%s\"", name); -#endif - /* add threads if necessary */ - - if (nthread > sp->set_nthread) { - i = nst_add_thread(sp, - nthread - sp->set_nthread); -#ifdef DEBUG - if (i != (nthread - sp->set_nthread)) - cmn_err(CE_WARN, - "!nst_init: failed to allocate %d " - "threads (got %d)", - (nthread - sp->set_nthread), i); -#endif - } - - /* return pointer to existing set */ - - return (sp); - } - } - - /* add new set to chain */ - set->set_next = nst_sets; - nst_sets = set; - - mutex_exit(&nst_global_lock); - - i = nst_add_thread(set, nthread); - - if (i != nthread) { -#ifdef DEBUG - cmn_err(CE_WARN, - "!nst_init: failed to allocate %d threads (got %d)", - nthread, i); -#endif - nst_destroy(set); - return (NULL); - } - - return (set); -} - - -/* - * nst_nlive - * - * Return the number of live threads in a set. - */ -int -nst_nlive(nstset_t *set) -{ - return (set ? set->set_nlive : 0); -} - - -/* - * nst_nthread - * - * Return the number of threads in the set. - */ -int -nst_nthread(nstset_t *set) -{ - return (set ? set->set_nthread : 0); -} - - -/* - * nst_shutdown - * - * Called by nskern to shutdown the nsthread software. - */ -void -nst_shutdown(void) -{ - nstset_t *set; - - mutex_enter(&nst_global_lock); - - while ((set = nst_sets) != NULL) { - mutex_exit(&nst_global_lock); - nst_destroy(set); - mutex_enter(&nst_global_lock); - } - - mutex_exit(&nst_global_lock); - mutex_destroy(&nst_global_lock); -} - - -/* - * nst_startup - * - * Called by nskern to initialise the nsthread software - */ -int -nst_startup(void) -{ - mutex_init(&nst_global_lock, NULL, MUTEX_DRIVER, NULL); - return (0); -} diff --git a/usr/src/uts/common/avs/ns/solaris/nsc_thread.h b/usr/src/uts/common/avs/ns/solaris/nsc_thread.h deleted file mode 100644 index d41901820e..0000000000 --- a/usr/src/uts/common/avs/ns/solaris/nsc_thread.h +++ /dev/null @@ -1,157 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _NSC_THREAD_H -#define _NSC_THREAD_H - -#ifdef __cplusplus -extern "C" { -#endif - -#ifdef _KERNEL - -#include <sys/ksynch.h> /* for kmutex_t and kcondvar_t */ - -/* - * A simple way to marshal kthreads into sets for use by nsctl / nskern - * clients. The ns threads are created in user land by nskernd, and - * then call into the nskern kernel module for allocation into sets. - */ - -struct nsthread; -struct nstset; - -#ifndef _BLIND_T -#define _BLIND_T -typedef void * blind_t; -#endif /* _BLIND_T */ - - -/* - * Queue stuff that should really be in the DDI. - */ - -typedef struct nst_q { - struct nst_q *q_forw; - struct nst_q *q_back; -} nst_q_t; - - -/* - * Per thread data structure. - */ - -typedef struct nsthread { - nst_q_t tp_link; /* Doubly linked free list */ - - struct nstset *tp_set; /* Set to which thread belongs */ - struct nsthread *tp_chain; /* Link in chain of threads in set */ - - kcondvar_t tp_cv; /* Suspend/resume synchronisation */ - - /* - * Everything past this point is cleared when the thread is - * initialised for (re)use. - */ - - int tp_flag; /* State (below) */ - - void (*tp_func)(); /* First function */ - blind_t tp_arg; /* Argument to tp_func */ -} nsthread_t; - -/* - * Flags for nst_init - */ -#define NST_CREATE 0x1 /* Create resources to run thread */ -#define NST_SLEEP 0x2 /* Wait for resources to be available */ - -/* - * Thread state flags - */ -#define NST_TF_INUSE 0x1 /* Thread currently in use */ -#define NST_TF_ACTIVE 0x2 /* Thread is being manipulated */ -#define NST_TF_PENDING 0x4 /* Thread is pending a create */ -#define NST_TF_DESTROY 0x8 /* Destroy thread when finished */ -#define NST_TF_KILL 0x10 /* Thread is being killed */ - -/* - * Thread set. - */ -typedef struct nstset { - struct nstset *set_next; /* Next set in list of sets */ - - nsthread_t *set_chain; /* Chain of all threads in set */ - nst_q_t set_reuse; /* Chain of reusable threads */ - nst_q_t set_free; /* Chain of free threads */ - - char set_name[32]; /* Name associated with set */ - - ushort_t set_nlive; /* No. of active threads */ - ushort_t set_nthread; /* No. of threads in set */ - int set_flag; /* State (below) */ - int set_pending; /* Operation is pending */ - - kmutex_t set_lock; /* Mutex for chains and counts */ - kcondvar_t set_kill_cv; /* Kill synchronisation */ - kcondvar_t set_destroy_cv; /* Shutdown synchronisation */ - volatile int set_destroy_cnt; /* No. of waiters */ - - kcondvar_t set_res_cv; /* Resource alloc synchronisation */ - int set_res_cnt; /* No. of waiters */ -} nstset_t; - -/* - * Set state flags - */ -#define NST_SF_KILL 1 /* Set is being killed */ - -/* - * General defines - */ -#define NST_KILL_TIMEOUT 100000 /* usec to wait for threads to die */ -#define NST_MEMORY_TIMEOUT 500000 /* usec to wait for memory */ - -/* - * Function prototypes - */ - -int nst_add_thread(nstset_t *, int); -nsthread_t *nst_create(nstset_t *, void (*)(), blind_t, int); -int nst_del_thread(nstset_t *, int); -void nst_destroy(nstset_t *); -nstset_t *nst_init(char *, int); -int nst_nlive(nstset_t *); -int nst_nthread(nstset_t *); -int nst_startup(void); -void nst_shutdown(void); - -#endif /* _KERNEL */ - -#ifdef __cplusplus -} -#endif - -#endif /* _NSC_THREAD_H */ diff --git a/usr/src/uts/common/avs/ns/solaris/nskern.conf b/usr/src/uts/common/avs/ns/solaris/nskern.conf deleted file mode 100644 index 1d5288f858..0000000000 --- a/usr/src/uts/common/avs/ns/solaris/nskern.conf +++ /dev/null @@ -1,24 +0,0 @@ -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License (the "License"). -# You may not use this file except in compliance with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -# Copyright 2008 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# -name="nskern" parent="pseudo" instance=0; diff --git a/usr/src/uts/common/avs/ns/solaris/nskernd.c b/usr/src/uts/common/avs/ns/solaris/nskernd.c deleted file mode 100644 index e050bc917f..0000000000 --- a/usr/src/uts/common/avs/ns/solaris/nskernd.c +++ /dev/null @@ -1,298 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#include <sys/types.h> -#include <sys/ksynch.h> -#include <sys/errno.h> -#include <sys/ddi.h> -#include <sys/sunddi.h> - -#include "../nsctl.h" -#include "../nsctl/nsc_ioctl.h" -#include "nskernd.h" - -void *proc_nskernd; -int nskernd_iscluster; - -static kmutex_t nskernd_lock; - -static kcondvar_t nskernd_ask_cv; -static kcondvar_t nskernd_k_cv; -static kcondvar_t nskernd_u_cv; - -static volatile int nskernd_k_wait; -static volatile int nskernd_u_wait; - -static int nskernd_norun; - -static volatile int nskernd_ask; -static struct nskernd nskernd_kdata; - -void -nskernd_init(void) -{ - mutex_init(&nskernd_lock, NULL, MUTEX_DRIVER, NULL); - cv_init(&nskernd_ask_cv, NULL, CV_DRIVER, NULL); - cv_init(&nskernd_k_cv, NULL, CV_DRIVER, NULL); - cv_init(&nskernd_u_cv, NULL, CV_DRIVER, NULL); - - nskernd_norun = 0; -} - - -void -nskernd_deinit(void) -{ - mutex_destroy(&nskernd_lock); - cv_destroy(&nskernd_ask_cv); - cv_destroy(&nskernd_k_cv); - cv_destroy(&nskernd_u_cv); -} - - -static int -nskernd_start(const int iscluster) -{ - int rc = 0; - - mutex_enter(&nskernd_lock); - - if (proc_nskernd != NULL) { - rc = 1; - } else if (nskernd_norun != 0) { - rc = 2; - } else { - (void) drv_getparm(UPROCP, (void *)&proc_nskernd); - nskernd_iscluster = iscluster; - } - - mutex_exit(&nskernd_lock); - - return (rc); -} - - -/* - * must be called with nskernd_lock held. - */ -void -nskernd_cleanup(void) -{ - proc_nskernd = NULL; - cv_broadcast(&nskernd_ask_cv); - cv_broadcast(&nskernd_k_cv); -} - - -void -nskernd_stop(void) -{ - mutex_enter(&nskernd_lock); - - if (proc_nskernd == NULL) { - nskernd_norun = 1; - mutex_exit(&nskernd_lock); - return; - } - - while (nskernd_u_wait == 0) { - nskernd_k_wait++; - cv_wait(&nskernd_k_cv, &nskernd_lock); - nskernd_k_wait--; - - if (proc_nskernd == NULL) { - mutex_exit(&nskernd_lock); - return; - } - } - - nskernd_kdata.command = NSKERND_STOP; - nskernd_kdata.data1 = (uint64_t)1; /* kernel has done cleanup */ - - nskernd_cleanup(); - - cv_signal(&nskernd_u_cv); - mutex_exit(&nskernd_lock); -} - - -int -nskernd_get(struct nskernd *nskp) -{ - mutex_enter(&nskernd_lock); - - if (proc_nskernd == NULL) { - mutex_exit(&nskernd_lock); - return (ENXIO); - } - - while (nskernd_u_wait == 0 || nskernd_ask) { - nskernd_k_wait++; - cv_wait(&nskernd_k_cv, &nskernd_lock); - nskernd_k_wait--; - - if (proc_nskernd == NULL) { - mutex_exit(&nskernd_lock); - return (ENXIO); - } - } - - bcopy(nskp, &nskernd_kdata, sizeof (*nskp)); - nskernd_ask++; - - cv_signal(&nskernd_u_cv); - - cv_wait(&nskernd_ask_cv, &nskernd_lock); - - if (proc_nskernd == NULL) { - nskernd_ask--; - mutex_exit(&nskernd_lock); - return (ENXIO); - } - - bcopy(&nskernd_kdata, nskp, sizeof (*nskp)); - nskernd_ask--; - - if (nskernd_k_wait > 0) - cv_signal(&nskernd_k_cv); - - mutex_exit(&nskernd_lock); - return (0); -} - - -int -nskernd_command(intptr_t arg, int mode, int *rvalp) -{ - struct nskernd *udata = NULL; - uint64_t arg1, arg2; - int rc; - - *rvalp = 0; - rc = 0; - - udata = kmem_alloc(sizeof (*udata), KM_SLEEP); - if (ddi_copyin((void *)arg, udata, sizeof (*udata), mode) < 0) { - kmem_free(udata, sizeof (*udata)); - return (EFAULT); - } - - switch (udata->command) { - case NSKERND_START: /* User program start */ - *rvalp = nskernd_start(udata->data1); - break; - - case NSKERND_STOP: /* User program requesting stop */ - mutex_enter(&nskernd_lock); - nskernd_cleanup(); - mutex_exit(&nskernd_lock); - break; - - case NSKERND_WAIT: - mutex_enter(&nskernd_lock); - - bcopy(udata, &nskernd_kdata, sizeof (*udata)); - - if (nskernd_ask > 0) - cv_signal(&nskernd_ask_cv); - - nskernd_u_wait++; - - if (cv_wait_sig(&nskernd_u_cv, &nskernd_lock) != 0) { - /* - * woken by cv_signal() or cv_broadcast() - */ - bcopy(&nskernd_kdata, udata, sizeof (*udata)); - } else { - /* - * signal - the user process has blocked all - * signals except for SIGTERM and the - * uncatchables, so the process is about to die - * and we need to clean up. - */ - udata->command = NSKERND_STOP; - udata->data1 = (uint64_t)1; /* cleanup done */ - - nskernd_cleanup(); - } - - nskernd_u_wait--; - - mutex_exit(&nskernd_lock); - - if (ddi_copyout(udata, (void *)arg, - sizeof (*udata), mode) < 0) { - rc = EFAULT; - break; - } - - break; - - case NSKERND_NEWLWP: - /* save kmem by freeing the udata structure */ - arg1 = udata->data1; - kmem_free(udata, sizeof (*udata)); - udata = NULL; - nsc_runlwp(arg1); - break; - - case NSKERND_LOCK: - /* save kmem by freeing the udata structure */ - arg1 = udata->data1; - arg2 = udata->data2; - kmem_free(udata, sizeof (*udata)); - udata = NULL; - nsc_lockchild(arg1, arg2); - break; - - default: - cmn_err(CE_WARN, "nskernd: unknown command %d", udata->command); - rc = EINVAL; - break; - } - - if (udata != NULL) { - kmem_free(udata, sizeof (*udata)); - udata = NULL; - } - - return (rc); -} - -/* - * This function is included for SV ioctl processing only. - */ - -int -nskernd_isdaemon(void) -{ - void *this_proc; - - if (proc_nskernd == NULL) - return (0); - if (drv_getparm(UPROCP, (void *)&this_proc) != 0) - return (0); - return (proc_nskernd == this_proc); -} diff --git a/usr/src/uts/common/avs/ns/solaris/nskernd.h b/usr/src/uts/common/avs/ns/solaris/nskernd.h deleted file mode 100644 index 7cd2bb4085..0000000000 --- a/usr/src/uts/common/avs/ns/solaris/nskernd.h +++ /dev/null @@ -1,71 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _NSKERND_H -#define _NSKERND_H - -#ifdef __cplusplus -extern "C" { -#endif - -#include <sys/nsctl/nsc_ioctl.h> /* for struct nskernd */ - -enum { - NSKERND_START, /* Start of daemon processing */ - NSKERND_STOP, /* Stop daemon */ - NSKERND_WAIT, /* Wait for next command */ - NSKERND_BSIZE, /* Get size in blocks of device */ - NSKERND_NEWLWP, /* Create a new lwp */ - NSKERND_LOCK, /* Obtain an inter-node lock */ - NSKERND_IIBITMAP /* mark an II bitmap as failed */ -}; - -/* - * The following #define is used by the ii kernel to write any - * flags information into the dscfg file when the bitmap volume - * fails. - */ -#define NSKERN_II_BMP_OPTION "flags" - -#ifdef _KERNEL - -extern void *proc_nskernd; -extern int nskernd_iscluster; - -extern void nskernd_init(void); -extern void nskernd_deinit(void); -extern void nskernd_stop(void); -extern int nskernd_get(struct nskernd *); - -extern void nsc_lockchild(uint64_t, uint64_t); -extern void nsc_runlwp(uint64_t); - -#endif - -#ifdef __cplusplus -} -#endif - -#endif /* _NSKERND_H */ diff --git a/usr/src/uts/common/avs/ns/sv/Makefile b/usr/src/uts/common/avs/ns/sv/Makefile deleted file mode 100644 index 7da8b58703..0000000000 --- a/usr/src/uts/common/avs/ns/sv/Makefile +++ /dev/null @@ -1,50 +0,0 @@ -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License (the "License"). -# You may not use this file except in compliance with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -# Copyright 2008 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# -# include global definitions -include ../../../../../Makefile.master - -HDRS= sv.h \ - sv_efi.h \ - sv_impl.h - -ROOTDIRS= $(ROOT)/usr/include/sys/nsctl - -ROOTHDRS= $(HDRS:%=$(ROOTDIRS)/%) - -CHECKHDRS= $(HDRS:%.h=%.check) - -# install rule -$(ROOTDIRS)/%: % - $(INS.file) - -.KEEP_STATE: - -.PARALLEL: $(CHECKHDRS) - -install_h: $(ROOTDIRS) $(ROOTHDRS) - -$(ROOTDIRS): - $(INS.dir) - -check: $(CHECKHDRS) diff --git a/usr/src/uts/common/avs/ns/sv/sv.c b/usr/src/uts/common/avs/ns/sv/sv.c deleted file mode 100644 index 8ea464cb48..0000000000 --- a/usr/src/uts/common/avs/ns/sv/sv.c +++ /dev/null @@ -1,2816 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - * - * Copyright 2011 Nexenta Systems, Inc. All rights reserved. - */ - -/* - * Storage Volume Character and Block Driver (SV) - * - * This driver implements a simplistic /dev/{r}dsk/ interface to a - * specified disk volume that is otherwise managed by the Prism - * software. The SV driver layers itself onto the underlying disk - * device driver by changing function pointers in the cb_ops - * structure. - * - * CONFIGURATION: - * - * 1. Configure the driver using the svadm utility. - * 2. Access the device as before through /dev/rdsk/c?t?d?s? - * - * LIMITATIONS: - * - * This driver should NOT be used to share a device between another - * DataServices user interface module (e.g., STE) and a user accessing - * the device through the block device in O_WRITE mode. This is because - * writes through the block device are asynchronous (due to the page - * cache) and so consistency between the block device user and the - * STE user cannot be guaranteed. - * - * Data is copied between system struct buf(9s) and nsc_vec_t. This is - * wasteful and slow. - */ - -#include <sys/debug.h> -#include <sys/types.h> - -#include <sys/ksynch.h> -#include <sys/kmem.h> -#include <sys/errno.h> -#include <sys/varargs.h> -#include <sys/file.h> -#include <sys/open.h> -#include <sys/conf.h> -#include <sys/cred.h> -#include <sys/buf.h> -#include <sys/uio.h> -#ifndef DS_DDICT -#include <sys/pathname.h> -#endif -#include <sys/aio_req.h> -#include <sys/dkio.h> -#include <sys/vtoc.h> -#include <sys/cmn_err.h> -#include <sys/modctl.h> -#include <sys/ddi.h> -#include <sys/sysmacros.h> -#include <sys/sunddi.h> -#include <sys/sunldi.h> -#include <sys/nsctl/nsvers.h> - -#include <sys/nsc_thread.h> -#include <sys/unistat/spcs_s.h> -#include <sys/unistat/spcs_s_k.h> -#include <sys/unistat/spcs_errors.h> - -#ifdef DS_DDICT -#include "../contract.h" -#endif - -#include "../nsctl.h" - - -#include <sys/sdt.h> /* dtrace is S10 or later */ - -#include "sv.h" -#include "sv_impl.h" -#include "sv_efi.h" - -#define MAX_EINTR_COUNT 1000 - -/* - * sv_mod_status - */ -#define SV_PREVENT_UNLOAD 1 -#define SV_ALLOW_UNLOAD 2 - -static const int sv_major_rev = ISS_VERSION_MAJ; /* Major number */ -static const int sv_minor_rev = ISS_VERSION_MIN; /* Minor number */ -static const int sv_micro_rev = ISS_VERSION_MIC; /* Micro number */ -static const int sv_baseline_rev = ISS_VERSION_NUM; /* Baseline number */ - -#ifdef DKIOCPARTITION -/* - * CRC32 polynomial table needed for computing the checksums - * in an EFI vtoc. - */ -static const uint32_t sv_crc32_table[256] = { CRC32_TABLE }; -#endif - -static clock_t sv_config_time; /* Time of successful {en,dis}able */ -static int sv_debug; /* Set non-zero for debug to syslog */ -static int sv_mod_status; /* Set to prevent modunload */ - -static dev_info_t *sv_dip; /* Single DIP for driver */ -static kmutex_t sv_mutex; /* Protect global lists, etc. */ - -static nsc_mem_t *sv_mem; /* nsctl memory allocator token */ - - -/* - * Per device and per major state. - */ - -#ifndef _SunOS_5_6 -#define UNSAFE_ENTER() -#define UNSAFE_EXIT() -#else -#define UNSAFE_ENTER() mutex_enter(&unsafe_driver) -#define UNSAFE_EXIT() mutex_exit(&unsafe_driver) -#endif - - /* hash table of major dev structures */ -static sv_maj_t *sv_majors[SV_MAJOR_HASH_CNT] = {0}; -static sv_dev_t *sv_devs; /* array of per device structures */ -static int sv_max_devices; /* SV version of nsc_max_devices() */ -static int sv_ndevices; /* number of SV enabled devices */ - -/* - * Threading. - */ - -int sv_threads_max = 1024; /* maximum # to dynamically alloc */ -int sv_threads = 32; /* # to pre-allocate (see sv.conf) */ -int sv_threads_extra = 0; /* addl # we would have alloc'ed */ - -static nstset_t *sv_tset; /* the threadset pointer */ - -static int sv_threads_hysteresis = 4; /* hysteresis for threadset resizing */ -static int sv_threads_dev = 2; /* # of threads to alloc per device */ -static int sv_threads_inc = 8; /* increment for changing the set */ -static int sv_threads_needed; /* number of threads needed */ -static int sv_no_threads; /* number of nsc_create errors */ -static int sv_max_nlive; /* max number of threads running */ - - - -/* - * nsctl fd callbacks. - */ - -static int svattach_fd(blind_t); -static int svdetach_fd(blind_t); - -static nsc_def_t sv_fd_def[] = { - { "Attach", (uintptr_t)svattach_fd, }, - { "Detach", (uintptr_t)svdetach_fd, }, - { 0, 0, } -}; - -/* - * cb_ops functions. - */ - -static int svopen(dev_t *, int, int, cred_t *); -static int svclose(dev_t, int, int, cred_t *); -static int svioctl(dev_t, int, intptr_t, int, cred_t *, int *); -static int svprint(dev_t, char *); - -/* - * These next functions are layered into the underlying driver's devops. - */ - -static int sv_lyr_open(dev_t *, int, int, cred_t *); -static int sv_lyr_close(dev_t, int, int, cred_t *); -static int sv_lyr_strategy(struct buf *); -static int sv_lyr_read(dev_t, struct uio *, cred_t *); -static int sv_lyr_write(dev_t, struct uio *, cred_t *); -static int sv_lyr_aread(dev_t, struct aio_req *, cred_t *); -static int sv_lyr_awrite(dev_t, struct aio_req *, cred_t *); -static int sv_lyr_ioctl(dev_t, int, intptr_t, int, cred_t *, int *); - -static struct cb_ops sv_cb_ops = { - svopen, /* open */ - svclose, /* close */ - nulldev, /* strategy */ - svprint, - nodev, /* dump */ - nodev, /* read */ - nodev, /* write */ - svioctl, - nodev, /* devmap */ - nodev, /* mmap */ - nodev, /* segmap */ - nochpoll, /* poll */ - ddi_prop_op, - NULL, /* NOT a stream */ - D_NEW | D_MP | D_64BIT, - CB_REV, - nodev, /* aread */ - nodev, /* awrite */ -}; - - -/* - * dev_ops functions. - */ - -static int sv_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **); -static int sv_attach(dev_info_t *, ddi_attach_cmd_t); -static int sv_detach(dev_info_t *, ddi_detach_cmd_t); - -static struct dev_ops sv_ops = { - DEVO_REV, - 0, - sv_getinfo, - nulldev, /* identify */ - nulldev, /* probe */ - sv_attach, - sv_detach, - nodev, /* reset */ - &sv_cb_ops, - (struct bus_ops *)0 -}; - -/* - * Module linkage. - */ - -extern struct mod_ops mod_driverops; - -static struct modldrv modldrv = { - &mod_driverops, - "nws:Storage Volume:" ISS_VERSION_STR, - &sv_ops -}; - -static struct modlinkage modlinkage = { - MODREV_1, - &modldrv, - 0 -}; - - -int -_init(void) -{ - int error; - - mutex_init(&sv_mutex, NULL, MUTEX_DRIVER, NULL); - - if ((error = mod_install(&modlinkage)) != 0) { - mutex_destroy(&sv_mutex); - return (error); - } - -#ifdef DEBUG - cmn_err(CE_CONT, "!sv (revision %d.%d.%d.%d, %s, %s)\n", - sv_major_rev, sv_minor_rev, sv_micro_rev, sv_baseline_rev, - ISS_VERSION_STR, BUILD_DATE_STR); -#else - if (sv_micro_rev) { - cmn_err(CE_CONT, "!sv (revision %d.%d.%d, %s, %s)\n", - sv_major_rev, sv_minor_rev, sv_micro_rev, - ISS_VERSION_STR, BUILD_DATE_STR); - } else { - cmn_err(CE_CONT, "!sv (revision %d.%d, %s, %s)\n", - sv_major_rev, sv_minor_rev, - ISS_VERSION_STR, BUILD_DATE_STR); - } -#endif - - return (error); -} - - -int -_fini(void) -{ - int error; - - if ((error = mod_remove(&modlinkage)) != 0) - return (error); - - mutex_destroy(&sv_mutex); - - return (error); -} - - -int -_info(struct modinfo *modinfop) -{ - return (mod_info(&modlinkage, modinfop)); -} - - -/* - * Locking & State. - * - * sv_mutex protects config information - sv_maj_t and sv_dev_t lists; - * threadset creation and sizing; sv_ndevices. - * - * If we need to hold both sv_mutex and sv_lock, then the sv_mutex - * must be acquired first. - * - * sv_lock protects the sv_dev_t structure for an individual device. - * - * sv_olock protects the otyp/open members of the sv_dev_t. If we need - * to hold both sv_lock and sv_olock, then the sv_lock must be acquired - * first. - * - * nsc_reserve/nsc_release are used in NSC_MULTI mode to allow multiple - * I/O operations to a device simultaneously, as above. - * - * All nsc_open/nsc_close/nsc_reserve/nsc_release operations that occur - * with sv_lock write-locked must be done with (sv_state == SV_PENDING) - * and (sv_pending == curthread) so that any recursion through - * sv_lyr_open/sv_lyr_close can be detected. - */ - - -static int -sv_init_devs(void) -{ - int i; - - ASSERT(MUTEX_HELD(&sv_mutex)); - - if (sv_max_devices > 0) - return (0); - - sv_max_devices = nsc_max_devices(); - - if (sv_max_devices <= 0) { - /* nsctl is not attached (nskernd not running) */ - if (sv_debug > 0) - cmn_err(CE_CONT, "!sv: nsc_max_devices = 0\n"); - return (EAGAIN); - } - - sv_devs = nsc_kmem_zalloc((sv_max_devices * sizeof (*sv_devs)), - KM_NOSLEEP, sv_mem); - - if (sv_devs == NULL) { - cmn_err(CE_WARN, "!sv: could not allocate sv_devs array"); - return (ENOMEM); - } - - for (i = 0; i < sv_max_devices; i++) { - mutex_init(&sv_devs[i].sv_olock, NULL, MUTEX_DRIVER, NULL); - rw_init(&sv_devs[i].sv_lock, NULL, RW_DRIVER, NULL); - } - - if (sv_debug > 0) - cmn_err(CE_CONT, "!sv: sv_init_devs successful\n"); - - return (0); -} - - -static int -sv_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) -{ - int rc; - - switch (cmd) { - - case DDI_ATTACH: - sv_dip = dip; - - if (ddi_create_minor_node(dip, "sv", S_IFCHR, - 0, DDI_PSEUDO, 0) != DDI_SUCCESS) - goto failed; - - mutex_enter(&sv_mutex); - - sv_mem = nsc_register_mem("SV", NSC_MEM_LOCAL, 0); - if (sv_mem == NULL) { - mutex_exit(&sv_mutex); - goto failed; - } - - rc = sv_init_devs(); - if (rc != 0 && rc != EAGAIN) { - mutex_exit(&sv_mutex); - goto failed; - } - - mutex_exit(&sv_mutex); - - - ddi_report_dev(dip); - - sv_threads = ddi_prop_get_int(DDI_DEV_T_ANY, dip, - DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, - "sv_threads", sv_threads); - - if (sv_debug > 0) - cmn_err(CE_CONT, "!sv: sv_threads=%d\n", sv_threads); - - if (sv_threads > sv_threads_max) - sv_threads_max = sv_threads; - - return (DDI_SUCCESS); - - default: - return (DDI_FAILURE); - } - -failed: - DTRACE_PROBE(sv_attach_failed); - (void) sv_detach(dip, DDI_DETACH); - return (DDI_FAILURE); -} - - -static int -sv_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) -{ - sv_dev_t *svp; - int i; - - switch (cmd) { - - case DDI_DETACH: - - /* - * Check that everything is disabled. - */ - - mutex_enter(&sv_mutex); - - if (sv_mod_status == SV_PREVENT_UNLOAD) { - mutex_exit(&sv_mutex); - DTRACE_PROBE(sv_detach_err_prevent); - return (DDI_FAILURE); - } - - for (i = 0; sv_devs && i < sv_max_devices; i++) { - svp = &sv_devs[i]; - - if (svp->sv_state != SV_DISABLE) { - mutex_exit(&sv_mutex); - DTRACE_PROBE(sv_detach_err_busy); - return (DDI_FAILURE); - } - } - - - for (i = 0; sv_devs && i < sv_max_devices; i++) { - mutex_destroy(&sv_devs[i].sv_olock); - rw_destroy(&sv_devs[i].sv_lock); - } - - if (sv_devs) { - nsc_kmem_free(sv_devs, - (sv_max_devices * sizeof (*sv_devs))); - sv_devs = NULL; - } - sv_max_devices = 0; - - if (sv_mem) { - nsc_unregister_mem(sv_mem); - sv_mem = NULL; - } - - mutex_exit(&sv_mutex); - - /* - * Remove all minor nodes. - */ - - ddi_remove_minor_node(dip, NULL); - sv_dip = NULL; - - return (DDI_SUCCESS); - - default: - return (DDI_FAILURE); - } -} - -static sv_maj_t * -sv_getmajor(const dev_t dev) -{ - sv_maj_t **insert, *maj; - major_t umaj = getmajor(dev); - - /* - * See if the hash table entry, or one of the hash chains - * is already allocated for this major number - */ - if ((maj = sv_majors[SV_MAJOR_HASH(umaj)]) != 0) { - do { - if (maj->sm_major == umaj) - return (maj); - } while ((maj = maj->sm_next) != 0); - } - - /* - * If the sv_mutex is held, there is design flaw, as the only non-mutex - * held callers can be sv_enable() or sv_dev_to_sv() - * Return an error, instead of panicing the system - */ - if (MUTEX_HELD(&sv_mutex)) { - cmn_err(CE_WARN, "!sv: could not allocate sv_maj_t"); - return (NULL); - } - - /* - * Determine where to allocate a new element in the hash table - */ - mutex_enter(&sv_mutex); - insert = &(sv_majors[SV_MAJOR_HASH(umaj)]); - for (maj = *insert; maj; maj = maj->sm_next) { - - /* Did another thread beat us to it? */ - if (maj->sm_major == umaj) - return (maj); - - /* Find a NULL insert point? */ - if (maj->sm_next == NULL) - insert = &maj->sm_next; - } - - /* - * Located the new insert point - */ - *insert = nsc_kmem_zalloc(sizeof (*maj), KM_NOSLEEP, sv_mem); - if ((maj = *insert) != 0) - maj->sm_major = umaj; - else - cmn_err(CE_WARN, "!sv: could not allocate sv_maj_t"); - - mutex_exit(&sv_mutex); - - return (maj); -} - -/* ARGSUSED */ - -static int -sv_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) -{ - int rc = DDI_FAILURE; - - switch (infocmd) { - - case DDI_INFO_DEVT2DEVINFO: - *result = sv_dip; - rc = DDI_SUCCESS; - break; - - case DDI_INFO_DEVT2INSTANCE: - /* - * We only have a single instance. - */ - *result = 0; - rc = DDI_SUCCESS; - break; - - default: - break; - } - - return (rc); -} - - -/* - * Hashing of devices onto major device structures. - * - * Individual device structures are hashed onto one of the sm_hash[] - * buckets in the relevant major device structure. - * - * Hash insertion and deletion -must- be done with sv_mutex held. Hash - * searching does not require the mutex because of the sm_seq member. - * sm_seq is incremented on each insertion (-after- hash chain pointer - * manipulation) and each deletion (-before- hash chain pointer - * manipulation). When searching the hash chain, the seq number is - * checked before accessing each device structure, if the seq number has - * changed, then we restart the search from the top of the hash chain. - * If we restart more than SV_HASH_RETRY times, we take sv_mutex and search - * the hash chain (we are guaranteed that this search cannot be - * interrupted). - */ - -#define SV_HASH_RETRY 16 - -static sv_dev_t * -sv_dev_to_sv(const dev_t dev, sv_maj_t **majpp) -{ - minor_t umin = getminor(dev); - sv_dev_t **hb, *next, *svp; - sv_maj_t *maj; - int seq; - int try; - - /* Get major hash table */ - maj = sv_getmajor(dev); - if (majpp) - *majpp = maj; - if (maj == NULL) - return (NULL); - - if (maj->sm_inuse == 0) { - DTRACE_PROBE1( - sv_dev_to_sv_end, - dev_t, dev); - return (NULL); - } - - hb = &(maj->sm_hash[SV_MINOR_HASH(umin)]); - try = 0; - -retry: - if (try > SV_HASH_RETRY) - mutex_enter(&sv_mutex); - - seq = maj->sm_seq; - for (svp = *hb; svp; svp = next) { - next = svp->sv_hash; - - nsc_membar_stld(); /* preserve register load order */ - - if (maj->sm_seq != seq) { - DTRACE_PROBE1(sv_dev_to_sv_retry, dev_t, dev); - try++; - goto retry; - } - - if (svp->sv_dev == dev) - break; - } - - if (try > SV_HASH_RETRY) - mutex_exit(&sv_mutex); - - return (svp); -} - - -/* - * Must be called with sv_mutex held. - */ - -static int -sv_get_state(const dev_t udev, sv_dev_t **svpp) -{ - sv_dev_t **hb, **insert, *svp; - sv_maj_t *maj; - minor_t umin; - int i; - - /* Get major hash table */ - if ((maj = sv_getmajor(udev)) == NULL) - return (NULL); - - /* Determine which minor hash table */ - umin = getminor(udev); - hb = &(maj->sm_hash[SV_MINOR_HASH(umin)]); - - /* look for clash */ - - insert = hb; - - for (svp = *hb; svp; svp = svp->sv_hash) { - if (svp->sv_dev == udev) - break; - - if (svp->sv_hash == NULL) - insert = &svp->sv_hash; - } - - if (svp) { - DTRACE_PROBE1( - sv_get_state_enabled, - dev_t, udev); - return (SV_EENABLED); - } - - /* look for spare sv_devs slot */ - - for (i = 0; i < sv_max_devices; i++) { - svp = &sv_devs[i]; - - if (svp->sv_state == SV_DISABLE) - break; - } - - if (i >= sv_max_devices) { - DTRACE_PROBE1( - sv_get_state_noslots, - dev_t, udev); - return (SV_ENOSLOTS); - } - - svp->sv_state = SV_PENDING; - svp->sv_pending = curthread; - - *insert = svp; - svp->sv_hash = NULL; - maj->sm_seq++; /* must be after the store to the hash chain */ - - *svpp = svp; - - /* - * We do not know the size of the underlying device at - * this stage, so initialise "nblocks" property to - * zero, and update it whenever we succeed in - * nsc_reserve'ing the underlying nsc_fd_t. - */ - - svp->sv_nblocks = 0; - - return (0); -} - - -/* - * Remove a device structure from it's hash chain. - * Must be called with sv_mutex held. - */ - -static void -sv_rm_hash(sv_dev_t *svp) -{ - sv_dev_t **svpp; - sv_maj_t *maj; - - /* Get major hash table */ - if ((maj = sv_getmajor(svp->sv_dev)) == NULL) - return; - - /* remove svp from hash chain */ - - svpp = &(maj->sm_hash[SV_MINOR_HASH(getminor(svp->sv_dev))]); - while (*svpp) { - if (*svpp == svp) { - /* - * increment of sm_seq must be before the - * removal from the hash chain - */ - maj->sm_seq++; - *svpp = svp->sv_hash; - break; - } - - svpp = &(*svpp)->sv_hash; - } - - svp->sv_hash = NULL; -} - -/* - * Free (disable) a device structure. - * Must be called with sv_lock(RW_WRITER) and sv_mutex held, and will - * perform the exits during its processing. - */ - -static int -sv_free(sv_dev_t *svp, const int error) -{ - struct cb_ops *cb_ops; - sv_maj_t *maj; - - /* Get major hash table */ - if ((maj = sv_getmajor(svp->sv_dev)) == NULL) - return (NULL); - - svp->sv_state = SV_PENDING; - svp->sv_pending = curthread; - - /* - * Close the fd's before removing from the hash or swapping - * back the cb_ops pointers so that the cache flushes before new - * io can come in. - */ - - if (svp->sv_fd) { - (void) nsc_close(svp->sv_fd); - svp->sv_fd = 0; - } - - sv_rm_hash(svp); - - if (error != SV_ESDOPEN && - error != SV_ELYROPEN && --maj->sm_inuse == 0) { - - if (maj->sm_dev_ops) - cb_ops = maj->sm_dev_ops->devo_cb_ops; - else - cb_ops = NULL; - - if (cb_ops && maj->sm_strategy != NULL) { - cb_ops->cb_strategy = maj->sm_strategy; - cb_ops->cb_close = maj->sm_close; - cb_ops->cb_ioctl = maj->sm_ioctl; - cb_ops->cb_write = maj->sm_write; - cb_ops->cb_open = maj->sm_open; - cb_ops->cb_read = maj->sm_read; - cb_ops->cb_flag = maj->sm_flag; - - if (maj->sm_awrite) - cb_ops->cb_awrite = maj->sm_awrite; - - if (maj->sm_aread) - cb_ops->cb_aread = maj->sm_aread; - - /* - * corbin XXX - * Leave backing device ops in maj->sm_* - * to handle any requests that might come - * in during the disable. This could be - * a problem however if the backing device - * driver is changed while we process these - * requests. - * - * maj->sm_strategy = 0; - * maj->sm_awrite = 0; - * maj->sm_write = 0; - * maj->sm_ioctl = 0; - * maj->sm_close = 0; - * maj->sm_aread = 0; - * maj->sm_read = 0; - * maj->sm_open = 0; - * maj->sm_flag = 0; - * - */ - } - - if (maj->sm_dev_ops) { - maj->sm_dev_ops = 0; - } - } - - if (svp->sv_lh) { - cred_t *crp = ddi_get_cred(); - - /* - * Close the protective layered driver open using the - * Sun Private layered driver i/f. - */ - - (void) ldi_close(svp->sv_lh, FREAD|FWRITE, crp); - svp->sv_lh = NULL; - } - - svp->sv_timestamp = nsc_lbolt(); - svp->sv_state = SV_DISABLE; - svp->sv_pending = NULL; - rw_exit(&svp->sv_lock); - mutex_exit(&sv_mutex); - - return (error); -} - -/* - * Reserve the device, taking into account the possibility that - * the reserve might have to be retried. - */ -static int -sv_reserve(nsc_fd_t *fd, int flags) -{ - int eintr_count; - int rc; - - eintr_count = 0; - do { - rc = nsc_reserve(fd, flags); - if (rc == EINTR) { - ++eintr_count; - delay(2); - } - } while ((rc == EINTR) && (eintr_count < MAX_EINTR_COUNT)); - - return (rc); -} - -static int -sv_enable(const caddr_t path, const int flag, - const dev_t udev, spcs_s_info_t kstatus) -{ - struct dev_ops *dev_ops; - struct cb_ops *cb_ops; - sv_dev_t *svp; - sv_maj_t *maj; - nsc_size_t nblocks; - int rc; - cred_t *crp; - ldi_ident_t li; - - if (udev == (dev_t)-1 || udev == 0) { - DTRACE_PROBE1( - sv_enable_err_baddev, - dev_t, udev); - return (SV_EBADDEV); - } - - if ((flag & ~(NSC_CACHE|NSC_DEVICE)) != 0) { - DTRACE_PROBE1(sv_enable_err_amode, dev_t, udev); - return (SV_EAMODE); - } - - /* Get major hash table */ - if ((maj = sv_getmajor(udev)) == NULL) - return (SV_EBADDEV); - - mutex_enter(&sv_mutex); - - rc = sv_get_state(udev, &svp); - if (rc) { - mutex_exit(&sv_mutex); - DTRACE_PROBE1(sv_enable_err_state, dev_t, udev); - return (rc); - } - - rw_enter(&svp->sv_lock, RW_WRITER); - - /* - * Get real fd used for io - */ - - svp->sv_dev = udev; - svp->sv_flag = flag; - - /* - * OR in NSC_DEVICE to ensure that nskern grabs the real strategy - * function pointer before sv swaps them out. - */ - - svp->sv_fd = nsc_open(path, (svp->sv_flag | NSC_DEVICE), - sv_fd_def, (blind_t)udev, &rc); - - if (svp->sv_fd == NULL) { - if (kstatus) - spcs_s_add(kstatus, rc); - DTRACE_PROBE1(sv_enable_err_fd, dev_t, udev); - return (sv_free(svp, SV_ESDOPEN)); - } - - /* - * Perform a layered driver open using the Sun Private layered - * driver i/f to ensure that the cb_ops structure for the driver - * is not detached out from under us whilst sv is enabled. - * - */ - - crp = ddi_get_cred(); - svp->sv_lh = NULL; - - if ((rc = ldi_ident_from_dev(svp->sv_dev, &li)) == 0) { - rc = ldi_open_by_dev(&svp->sv_dev, - OTYP_BLK, FREAD|FWRITE, crp, &svp->sv_lh, li); - } - - if (rc != 0) { - if (kstatus) - spcs_s_add(kstatus, rc); - DTRACE_PROBE1(sv_enable_err_lyr_open, dev_t, udev); - return (sv_free(svp, SV_ELYROPEN)); - } - - /* - * Do layering if required - must happen after nsc_open(). - */ - - if (maj->sm_inuse++ == 0) { - maj->sm_dev_ops = nsc_get_devops(getmajor(udev)); - - if (maj->sm_dev_ops == NULL || - maj->sm_dev_ops->devo_cb_ops == NULL) { - DTRACE_PROBE1(sv_enable_err_load, dev_t, udev); - return (sv_free(svp, SV_ELOAD)); - } - - dev_ops = maj->sm_dev_ops; - cb_ops = dev_ops->devo_cb_ops; - - if (cb_ops->cb_strategy == NULL || - cb_ops->cb_strategy == nodev || - cb_ops->cb_strategy == nulldev) { - DTRACE_PROBE1(sv_enable_err_nostrategy, dev_t, udev); - return (sv_free(svp, SV_ELOAD)); - } - - if (cb_ops->cb_strategy == sv_lyr_strategy) { - DTRACE_PROBE1(sv_enable_err_svstrategy, dev_t, udev); - return (sv_free(svp, SV_ESTRATEGY)); - } - - maj->sm_strategy = cb_ops->cb_strategy; - maj->sm_close = cb_ops->cb_close; - maj->sm_ioctl = cb_ops->cb_ioctl; - maj->sm_write = cb_ops->cb_write; - maj->sm_open = cb_ops->cb_open; - maj->sm_read = cb_ops->cb_read; - maj->sm_flag = cb_ops->cb_flag; - - cb_ops->cb_flag = cb_ops->cb_flag | D_MP; - cb_ops->cb_strategy = sv_lyr_strategy; - cb_ops->cb_close = sv_lyr_close; - cb_ops->cb_ioctl = sv_lyr_ioctl; - cb_ops->cb_write = sv_lyr_write; - cb_ops->cb_open = sv_lyr_open; - cb_ops->cb_read = sv_lyr_read; - - /* - * Check that the driver has async I/O entry points - * before changing them. - */ - - if (dev_ops->devo_rev < 3 || cb_ops->cb_rev < 1) { - maj->sm_awrite = 0; - maj->sm_aread = 0; - } else { - maj->sm_awrite = cb_ops->cb_awrite; - maj->sm_aread = cb_ops->cb_aread; - - cb_ops->cb_awrite = sv_lyr_awrite; - cb_ops->cb_aread = sv_lyr_aread; - } - - /* - * Bug 4645743 - * - * Prevent sv from ever unloading after it has interposed - * on a major device because there is a race between - * sv removing its layered entry points from the target - * dev_ops, a client coming in and accessing the driver, - * and the kernel modunloading the sv text. - * - * To allow unload, do svboot -u, which only happens in - * pkgrm time. - */ - ASSERT(MUTEX_HELD(&sv_mutex)); - sv_mod_status = SV_PREVENT_UNLOAD; - } - - - svp->sv_timestamp = nsc_lbolt(); - svp->sv_state = SV_ENABLE; - svp->sv_pending = NULL; - rw_exit(&svp->sv_lock); - - sv_ndevices++; - mutex_exit(&sv_mutex); - - nblocks = 0; - if (sv_reserve(svp->sv_fd, NSC_READ|NSC_MULTI|NSC_PCATCH) == 0) { - nblocks = svp->sv_nblocks; - nsc_release(svp->sv_fd); - } - - cmn_err(CE_CONT, "!sv: rdev 0x%lx, nblocks %" NSC_SZFMT "\n", - svp->sv_dev, nblocks); - - return (0); -} - - -static int -sv_prepare_unload() -{ - int rc = 0; - - mutex_enter(&sv_mutex); - - if (sv_mod_status == SV_PREVENT_UNLOAD) { - if ((sv_ndevices != 0) || (sv_tset != NULL)) { - rc = EBUSY; - } else { - sv_mod_status = SV_ALLOW_UNLOAD; - delay(SV_WAIT_UNLOAD * drv_usectohz(1000000)); - } - } - - mutex_exit(&sv_mutex); - return (rc); -} - -static int -svattach_fd(blind_t arg) -{ - dev_t dev = (dev_t)arg; - sv_dev_t *svp = sv_dev_to_sv(dev, NULL); - int rc; - - if (sv_debug > 0) - cmn_err(CE_CONT, "!svattach_fd(%p, %p)\n", arg, (void *)svp); - - if (svp == NULL) { - cmn_err(CE_WARN, "!svattach_fd: no state (arg %p)", arg); - return (0); - } - - if ((rc = nsc_partsize(svp->sv_fd, &svp->sv_nblocks)) != 0) { - cmn_err(CE_WARN, - "!svattach_fd: nsc_partsize() failed, rc %d", rc); - svp->sv_nblocks = 0; - } - - if ((rc = nsc_maxfbas(svp->sv_fd, 0, &svp->sv_maxfbas)) != 0) { - cmn_err(CE_WARN, - "!svattach_fd: nsc_maxfbas() failed, rc %d", rc); - svp->sv_maxfbas = 0; - } - - if (sv_debug > 0) { - cmn_err(CE_CONT, - "!svattach_fd(%p): size %" NSC_SZFMT ", " - "maxfbas %" NSC_SZFMT "\n", - arg, svp->sv_nblocks, svp->sv_maxfbas); - } - - return (0); -} - - -static int -svdetach_fd(blind_t arg) -{ - dev_t dev = (dev_t)arg; - sv_dev_t *svp = sv_dev_to_sv(dev, NULL); - - if (sv_debug > 0) - cmn_err(CE_CONT, "!svdetach_fd(%p, %p)\n", arg, (void *)svp); - - /* svp can be NULL during disable of an sv */ - if (svp == NULL) - return (0); - - svp->sv_maxfbas = 0; - svp->sv_nblocks = 0; - return (0); -} - - -/* - * Side effect: if called with (guard != 0), then expects both sv_mutex - * and sv_lock(RW_WRITER) to be held, and will release them before returning. - */ - -/* ARGSUSED */ -static int -sv_disable(dev_t dev, spcs_s_info_t kstatus) -{ - sv_dev_t *svp = sv_dev_to_sv(dev, NULL); - - if (svp == NULL) { - - DTRACE_PROBE1(sv_disable_err_nodev, sv_dev_t *, svp); - return (SV_ENODEV); - } - - mutex_enter(&sv_mutex); - rw_enter(&svp->sv_lock, RW_WRITER); - - if (svp->sv_fd == NULL || svp->sv_state != SV_ENABLE) { - rw_exit(&svp->sv_lock); - mutex_exit(&sv_mutex); - - DTRACE_PROBE1(sv_disable_err_disabled, sv_dev_t *, svp); - return (SV_EDISABLED); - } - - - sv_ndevices--; - return (sv_free(svp, 0)); -} - - - -static int -sv_lyr_open(dev_t *devp, int flag, int otyp, cred_t *crp) -{ - nsc_buf_t *tmph; - sv_dev_t *svp; - sv_maj_t *maj; - int (*fn)(); - dev_t odev; - int ret; - int rc; - - svp = sv_dev_to_sv(*devp, &maj); - - if (svp) { - if (svp->sv_state == SV_PENDING && - svp->sv_pending == curthread) { - /* - * This is a recursive open from a call to - * ddi_lyr_open_by_devt and so we just want - * to pass it straight through to the - * underlying driver. - */ - DTRACE_PROBE2(sv_lyr_open_recursive, - sv_dev_t *, svp, - dev_t, *devp); - svp = NULL; - } else - rw_enter(&svp->sv_lock, RW_READER); - } - - odev = *devp; - - if (maj && (fn = maj->sm_open) != 0) { - if (!(maj->sm_flag & D_MP)) { - UNSAFE_ENTER(); - ret = (*fn)(devp, flag, otyp, crp); - UNSAFE_EXIT(); - } else { - ret = (*fn)(devp, flag, otyp, crp); - } - - if (ret == 0) { - /* - * Re-acquire svp if the driver changed *devp. - */ - - if (*devp != odev) { - if (svp != NULL) - rw_exit(&svp->sv_lock); - - svp = sv_dev_to_sv(*devp, NULL); - - if (svp) { - rw_enter(&svp->sv_lock, RW_READER); - } - } - } - } else { - ret = ENODEV; - } - - if (svp && ret != 0 && svp->sv_state == SV_ENABLE) { - /* - * Underlying DDI open failed, but we have this - * device SV enabled. If we can read some data - * from the device, fake a successful open (this - * probably means that this device is RDC'd and we - * are getting the data from the secondary node). - * - * The reserve must be done with NSC_TRY|NSC_NOWAIT to - * ensure that it does not deadlock if this open is - * coming from nskernd:get_bsize(). - */ - rc = sv_reserve(svp->sv_fd, - NSC_TRY | NSC_NOWAIT | NSC_MULTI | NSC_PCATCH); - if (rc == 0) { - tmph = NULL; - - rc = nsc_alloc_buf(svp->sv_fd, 0, 1, NSC_READ, &tmph); - if (rc <= 0) { - /* success */ - ret = 0; - } - - if (tmph) { - (void) nsc_free_buf(tmph); - tmph = NULL; - } - - nsc_release(svp->sv_fd); - - /* - * Count the number of layered opens that we - * fake since we have to fake a matching number - * of closes (OTYP_LYR open/close calls must be - * paired). - */ - - if (ret == 0 && otyp == OTYP_LYR) { - mutex_enter(&svp->sv_olock); - svp->sv_openlcnt++; - mutex_exit(&svp->sv_olock); - } - } - } - - if (svp) { - rw_exit(&svp->sv_lock); - } - - return (ret); -} - - -static int -sv_lyr_close(dev_t dev, int flag, int otyp, cred_t *crp) -{ - sv_dev_t *svp; - sv_maj_t *maj; - int (*fn)(); - int ret; - - svp = sv_dev_to_sv(dev, &maj); - - if (svp && - svp->sv_state == SV_PENDING && - svp->sv_pending == curthread) { - /* - * This is a recursive open from a call to - * ddi_lyr_close and so we just want - * to pass it straight through to the - * underlying driver. - */ - DTRACE_PROBE2(sv_lyr_close_recursive, sv_dev_t *, svp, - dev_t, dev); - svp = NULL; - } - - if (svp) { - rw_enter(&svp->sv_lock, RW_READER); - - if (otyp == OTYP_LYR) { - mutex_enter(&svp->sv_olock); - - if (svp->sv_openlcnt) { - /* - * Consume sufficient layered closes to - * account for the opens that we faked - * whilst the device was failed. - */ - svp->sv_openlcnt--; - mutex_exit(&svp->sv_olock); - rw_exit(&svp->sv_lock); - - DTRACE_PROBE1(sv_lyr_close_end, dev_t, dev); - - return (0); - } - - mutex_exit(&svp->sv_olock); - } - } - - if (maj && (fn = maj->sm_close) != 0) { - if (!(maj->sm_flag & D_MP)) { - UNSAFE_ENTER(); - ret = (*fn)(dev, flag, otyp, crp); - UNSAFE_EXIT(); - } else { - ret = (*fn)(dev, flag, otyp, crp); - } - } else { - ret = ENODEV; - } - - if (svp) { - rw_exit(&svp->sv_lock); - } - - return (ret); -} - - -/* - * Convert the specified dev_t into a locked and enabled sv_dev_t, or - * return NULL. - */ -static sv_dev_t * -sv_find_enabled(const dev_t dev, sv_maj_t **majpp) -{ - sv_dev_t *svp; - - while ((svp = sv_dev_to_sv(dev, majpp)) != NULL) { - rw_enter(&svp->sv_lock, RW_READER); - - if (svp->sv_state == SV_ENABLE) { - /* locked and enabled */ - break; - } - - /* - * State was changed while waiting on the lock. - * Wait for a stable state. - */ - rw_exit(&svp->sv_lock); - - DTRACE_PROBE1(sv_find_enabled_retry, dev_t, dev); - - delay(2); - } - - return (svp); -} - - -static int -sv_lyr_uio(dev_t dev, uio_t *uiop, cred_t *crp, int rw) -{ - sv_dev_t *svp; - sv_maj_t *maj; - int (*fn)(); - int rc; - - svp = sv_find_enabled(dev, &maj); - if (svp == NULL) { - if (maj) { - if (rw == NSC_READ) - fn = maj->sm_read; - else - fn = maj->sm_write; - - if (fn != 0) { - if (!(maj->sm_flag & D_MP)) { - UNSAFE_ENTER(); - rc = (*fn)(dev, uiop, crp); - UNSAFE_EXIT(); - } else { - rc = (*fn)(dev, uiop, crp); - } - } - - return (rc); - } else { - return (ENODEV); - } - } - - ASSERT(RW_READ_HELD(&svp->sv_lock)); - - if (svp->sv_flag == 0) { - /* - * guard access mode - * - prevent user level access to the device - */ - DTRACE_PROBE1(sv_lyr_uio_err_guard, uio_t *, uiop); - rc = EPERM; - goto out; - } - - if ((rc = sv_reserve(svp->sv_fd, NSC_MULTI|NSC_PCATCH)) != 0) { - DTRACE_PROBE1(sv_lyr_uio_err_rsrv, uio_t *, uiop); - goto out; - } - - if (rw == NSC_READ) - rc = nsc_uread(svp->sv_fd, uiop, crp); - else - rc = nsc_uwrite(svp->sv_fd, uiop, crp); - - nsc_release(svp->sv_fd); - -out: - rw_exit(&svp->sv_lock); - - return (rc); -} - - -static int -sv_lyr_read(dev_t dev, uio_t *uiop, cred_t *crp) -{ - return (sv_lyr_uio(dev, uiop, crp, NSC_READ)); -} - - -static int -sv_lyr_write(dev_t dev, uio_t *uiop, cred_t *crp) -{ - return (sv_lyr_uio(dev, uiop, crp, NSC_WRITE)); -} - - -/* ARGSUSED */ - -static int -sv_lyr_aread(dev_t dev, struct aio_req *aio, cred_t *crp) -{ - return (aphysio(sv_lyr_strategy, - anocancel, dev, B_READ, minphys, aio)); -} - - -/* ARGSUSED */ - -static int -sv_lyr_awrite(dev_t dev, struct aio_req *aio, cred_t *crp) -{ - return (aphysio(sv_lyr_strategy, - anocancel, dev, B_WRITE, minphys, aio)); -} - - -/* - * Set up an array containing the list of raw path names - * The array for the paths is svl and the size of the array is - * in size. - * - * If there are more layered devices than will fit in the array, - * the number of extra layered devices is returned. Otherwise - * zero is return. - * - * Input: - * svn : array for paths - * size : size of the array - * - * Output (extra): - * zero : All paths fit in array - * >0 : Number of defined layered devices don't fit in array - */ - -static int -sv_list(void *ptr, const int size, int *extra, const int ilp32) -{ - sv_name32_t *svn32; - sv_name_t *svn; - sv_dev_t *svp; - int *mode, *nblocks; - int i, index; - char *path; - - *extra = 0; - index = 0; - - if (ilp32) - svn32 = ptr; - else - svn = ptr; - - mutex_enter(&sv_mutex); - for (i = 0; i < sv_max_devices; i++) { - svp = &sv_devs[i]; - - rw_enter(&svp->sv_lock, RW_READER); - - if (svp->sv_state != SV_ENABLE) { - rw_exit(&svp->sv_lock); - continue; - } - - if ((*extra) != 0 || ptr == NULL) { - /* Another overflow entry */ - rw_exit(&svp->sv_lock); - (*extra)++; - continue; - } - - if (ilp32) { - nblocks = &svn32->svn_nblocks; - mode = &svn32->svn_mode; - path = svn32->svn_path; - - svn32->svn_timestamp = (uint32_t)svp->sv_timestamp; - svn32++; - } else { - nblocks = &svn->svn_nblocks; - mode = &svn->svn_mode; - path = svn->svn_path; - - svn->svn_timestamp = svp->sv_timestamp; - svn++; - } - - (void) strcpy(path, nsc_pathname(svp->sv_fd)); - *nblocks = svp->sv_nblocks; - *mode = svp->sv_flag; - - if (*nblocks == 0) { - if (sv_debug > 3) - cmn_err(CE_CONT, "!sv_list: need to reserve\n"); - - if (sv_reserve(svp->sv_fd, NSC_MULTI|NSC_PCATCH) == 0) { - *nblocks = svp->sv_nblocks; - nsc_release(svp->sv_fd); - } - } - - if (++index >= size) { - /* Out of space */ - (*extra)++; - } - - rw_exit(&svp->sv_lock); - } - mutex_exit(&sv_mutex); - - if (index < size) { - /* NULL terminated list */ - if (ilp32) - svn32->svn_path[0] = '\0'; - else - svn->svn_path[0] = '\0'; - } - - return (0); -} - - -static void -sv_thread_tune(int threads) -{ - int incr = (threads > 0) ? 1 : -1; - int change = 0; - int nthreads; - - ASSERT(MUTEX_HELD(&sv_mutex)); - - if (sv_threads_extra) { - /* keep track of any additional threads requested */ - if (threads > 0) { - sv_threads_extra += threads; - return; - } - threads = -threads; - if (threads >= sv_threads_extra) { - threads -= sv_threads_extra; - sv_threads_extra = 0; - /* fall through to while loop */ - } else { - sv_threads_extra -= threads; - return; - } - } else if (threads > 0) { - /* - * do not increase the number of threads beyond - * sv_threads_max when doing dynamic thread tuning - */ - nthreads = nst_nthread(sv_tset); - if ((nthreads + threads) > sv_threads_max) { - sv_threads_extra = nthreads + threads - sv_threads_max; - threads = sv_threads_max - nthreads; - if (threads <= 0) - return; - } - } - - if (threads < 0) - threads = -threads; - - while (threads--) { - nthreads = nst_nthread(sv_tset); - sv_threads_needed += incr; - - if (sv_threads_needed >= nthreads) - change += nst_add_thread(sv_tset, sv_threads_inc); - else if ((sv_threads_needed < - (nthreads - (sv_threads_inc + sv_threads_hysteresis))) && - ((nthreads - sv_threads_inc) >= sv_threads)) - change -= nst_del_thread(sv_tset, sv_threads_inc); - } - -#ifdef DEBUG - if (change) { - cmn_err(CE_NOTE, - "!sv_thread_tune: threads needed %d, nthreads %d, " - "nthreads change %d", - sv_threads_needed, nst_nthread(sv_tset), change); - } -#endif -} - - -/* ARGSUSED */ -static int -svopen(dev_t *devp, int flag, int otyp, cred_t *crp) -{ - int rc; - - mutex_enter(&sv_mutex); - rc = sv_init_devs(); - mutex_exit(&sv_mutex); - - return (rc); -} - - -/* ARGSUSED */ -static int -svclose(dev_t dev, int flag, int otyp, cred_t *crp) -{ - const int secs = HZ * 5; - const int ticks = HZ / 10; - int loops = secs / ticks; - - mutex_enter(&sv_mutex); - while (sv_ndevices <= 0 && sv_tset != NULL && loops > 0) { - if (nst_nlive(sv_tset) <= 0) { - nst_destroy(sv_tset); - sv_tset = NULL; - break; - } - - /* threads still active - wait for them to exit */ - mutex_exit(&sv_mutex); - delay(ticks); - loops--; - mutex_enter(&sv_mutex); - } - mutex_exit(&sv_mutex); - - if (loops <= 0) { - cmn_err(CE_WARN, -#ifndef DEBUG - /* do not write to console when non-DEBUG */ - "!" -#endif - "sv:svclose: threads still active " - "after %d sec - leaking thread set", secs); - } - - return (0); -} - - -static int -svioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *crp, int *rvalp) -{ - char itmp1[12], itmp2[12]; /* temp char array for editing ints */ - spcs_s_info_t kstatus; /* Kernel version of spcs status */ - spcs_s_info_t ustatus; /* Address of user version of spcs status */ - sv_list32_t svl32; /* 32 bit Initial structure for SVIOC_LIST */ - sv_version_t svv; /* Version structure */ - sv_conf_t svc; /* User config structure */ - sv_list_t svl; /* Initial structure for SVIOC_LIST */ - void *usvn; /* Address of user sv_name_t */ - void *svn = NULL; /* Array for SVIOC_LIST */ - uint64_t phash; /* pathname hash */ - int rc = 0; /* Return code -- errno */ - int size; /* Number of items in array */ - int bytes; /* Byte size of array */ - int ilp32; /* Convert data structures for ilp32 userland */ - - *rvalp = 0; - - /* - * If sv_mod_status is 0 or SV_PREVENT_UNLOAD, then it will continue. - * else it means it previously was SV_PREVENT_UNLOAD, and now it's - * SV_ALLOW_UNLOAD, expecting the driver to eventually unload. - * - * SV_ALLOW_UNLOAD is final state, so no need to grab sv_mutex. - */ - if (sv_mod_status == SV_ALLOW_UNLOAD) { - return (EBUSY); - } - - if ((cmd != SVIOC_LIST) && ((rc = drv_priv(crp)) != 0)) - return (rc); - - kstatus = spcs_s_kcreate(); - if (!kstatus) { - DTRACE_PROBE1(sv_ioctl_err_kcreate, dev_t, dev); - return (ENOMEM); - } - - ilp32 = (ddi_model_convert_from((mode & FMODELS)) == DDI_MODEL_ILP32); - - switch (cmd) { - - case SVIOC_ENABLE: - - if (ilp32) { - sv_conf32_t svc32; - - if (ddi_copyin((void *)arg, &svc32, - sizeof (svc32), mode) < 0) { - spcs_s_kfree(kstatus); - return (EFAULT); - } - - svc.svc_error = (spcs_s_info_t)svc32.svc_error; - (void) strcpy(svc.svc_path, svc32.svc_path); - svc.svc_flag = svc32.svc_flag; - svc.svc_major = svc32.svc_major; - svc.svc_minor = svc32.svc_minor; - } else { - if (ddi_copyin((void *)arg, &svc, - sizeof (svc), mode) < 0) { - spcs_s_kfree(kstatus); - return (EFAULT); - } - } - - /* force to raw access */ - svc.svc_flag = NSC_DEVICE; - - if (sv_tset == NULL) { - mutex_enter(&sv_mutex); - - if (sv_tset == NULL) { - sv_tset = nst_init("sv_thr", sv_threads); - } - - mutex_exit(&sv_mutex); - - if (sv_tset == NULL) { - cmn_err(CE_WARN, - "!sv: could not allocate %d threads", - sv_threads); - } - } - - rc = sv_enable(svc.svc_path, svc.svc_flag, - makedevice(svc.svc_major, svc.svc_minor), kstatus); - - if (rc == 0) { - sv_config_time = nsc_lbolt(); - - mutex_enter(&sv_mutex); - sv_thread_tune(sv_threads_dev); - mutex_exit(&sv_mutex); - } - - DTRACE_PROBE3(sv_ioctl_end, dev_t, dev, int, *rvalp, int, rc); - - return (spcs_s_ocopyoutf(&kstatus, svc.svc_error, rc)); - /* NOTREACHED */ - - case SVIOC_DISABLE: - - if (ilp32) { - sv_conf32_t svc32; - - if (ddi_copyin((void *)arg, &svc32, - sizeof (svc32), mode) < 0) { - spcs_s_kfree(kstatus); - return (EFAULT); - } - - svc.svc_error = (spcs_s_info_t)svc32.svc_error; - svc.svc_major = svc32.svc_major; - svc.svc_minor = svc32.svc_minor; - (void) strcpy(svc.svc_path, svc32.svc_path); - svc.svc_flag = svc32.svc_flag; - } else { - if (ddi_copyin((void *)arg, &svc, - sizeof (svc), mode) < 0) { - spcs_s_kfree(kstatus); - return (EFAULT); - } - } - - if (svc.svc_major == (major_t)-1 && - svc.svc_minor == (minor_t)-1) { - sv_dev_t *svp; - int i; - - /* - * User level could not find the minor device - * node, so do this the slow way by searching - * the entire sv config for a matching pathname. - */ - - phash = nsc_strhash(svc.svc_path); - - mutex_enter(&sv_mutex); - - for (i = 0; i < sv_max_devices; i++) { - svp = &sv_devs[i]; - - if (svp->sv_state == SV_DISABLE || - svp->sv_fd == NULL) - continue; - - if (nsc_fdpathcmp(svp->sv_fd, phash, - svc.svc_path) == 0) { - svc.svc_major = getmajor(svp->sv_dev); - svc.svc_minor = getminor(svp->sv_dev); - break; - } - } - - mutex_exit(&sv_mutex); - - if (svc.svc_major == (major_t)-1 && - svc.svc_minor == (minor_t)-1) - return (spcs_s_ocopyoutf(&kstatus, - svc.svc_error, SV_ENODEV)); - } - - rc = sv_disable(makedevice(svc.svc_major, svc.svc_minor), - kstatus); - - if (rc == 0) { - sv_config_time = nsc_lbolt(); - - mutex_enter(&sv_mutex); - sv_thread_tune(-sv_threads_dev); - mutex_exit(&sv_mutex); - } - - DTRACE_PROBE3(sv_ioctl_2, dev_t, dev, int, *rvalp, int, rc); - - return (spcs_s_ocopyoutf(&kstatus, svc.svc_error, rc)); - /* NOTREACHED */ - - case SVIOC_LIST: - - if (ilp32) { - if (ddi_copyin((void *)arg, &svl32, - sizeof (svl32), mode) < 0) { - spcs_s_kfree(kstatus); - return (EFAULT); - } - - ustatus = (spcs_s_info_t)svl32.svl_error; - size = svl32.svl_count; - usvn = (void *)(unsigned long)svl32.svl_names; - } else { - if (ddi_copyin((void *)arg, &svl, - sizeof (svl), mode) < 0) { - spcs_s_kfree(kstatus); - return (EFAULT); - } - - ustatus = svl.svl_error; - size = svl.svl_count; - usvn = svl.svl_names; - } - - /* Do some boundary checking */ - if ((size < 0) || (size > sv_max_devices)) { - /* Array size is out of range */ - return (spcs_s_ocopyoutf(&kstatus, ustatus, - SV_EARRBOUNDS, "0", - spcs_s_inttostring(sv_max_devices, itmp1, - sizeof (itmp1), 0), - spcs_s_inttostring(size, itmp2, - sizeof (itmp2), 0))); - } - - if (ilp32) - bytes = size * sizeof (sv_name32_t); - else - bytes = size * sizeof (sv_name_t); - - /* Allocate memory for the array of structures */ - if (bytes != 0) { - svn = kmem_zalloc(bytes, KM_SLEEP); - if (!svn) { - return (spcs_s_ocopyoutf(&kstatus, - ustatus, ENOMEM)); - } - } - - rc = sv_list(svn, size, rvalp, ilp32); - if (rc) { - if (svn != NULL) - kmem_free(svn, bytes); - return (spcs_s_ocopyoutf(&kstatus, ustatus, rc)); - } - - if (ilp32) { - svl32.svl_timestamp = (uint32_t)sv_config_time; - svl32.svl_maxdevs = (int32_t)sv_max_devices; - - /* Return the list structure */ - if (ddi_copyout(&svl32, (void *)arg, - sizeof (svl32), mode) < 0) { - spcs_s_kfree(kstatus); - if (svn != NULL) - kmem_free(svn, bytes); - return (EFAULT); - } - } else { - svl.svl_timestamp = sv_config_time; - svl.svl_maxdevs = sv_max_devices; - - /* Return the list structure */ - if (ddi_copyout(&svl, (void *)arg, - sizeof (svl), mode) < 0) { - spcs_s_kfree(kstatus); - if (svn != NULL) - kmem_free(svn, bytes); - return (EFAULT); - } - } - - /* Return the array */ - if (svn != NULL) { - if (ddi_copyout(svn, usvn, bytes, mode) < 0) { - kmem_free(svn, bytes); - spcs_s_kfree(kstatus); - return (EFAULT); - } - kmem_free(svn, bytes); - } - - DTRACE_PROBE3(sv_ioctl_3, dev_t, dev, int, *rvalp, int, 0); - - return (spcs_s_ocopyoutf(&kstatus, ustatus, 0)); - /* NOTREACHED */ - - case SVIOC_VERSION: - - if (ilp32) { - sv_version32_t svv32; - - if (ddi_copyin((void *)arg, &svv32, - sizeof (svv32), mode) < 0) { - spcs_s_kfree(kstatus); - return (EFAULT); - } - - svv32.svv_major_rev = sv_major_rev; - svv32.svv_minor_rev = sv_minor_rev; - svv32.svv_micro_rev = sv_micro_rev; - svv32.svv_baseline_rev = sv_baseline_rev; - - if (ddi_copyout(&svv32, (void *)arg, - sizeof (svv32), mode) < 0) { - spcs_s_kfree(kstatus); - return (EFAULT); - } - - ustatus = (spcs_s_info_t)svv32.svv_error; - } else { - if (ddi_copyin((void *)arg, &svv, - sizeof (svv), mode) < 0) { - spcs_s_kfree(kstatus); - return (EFAULT); - } - - svv.svv_major_rev = sv_major_rev; - svv.svv_minor_rev = sv_minor_rev; - svv.svv_micro_rev = sv_micro_rev; - svv.svv_baseline_rev = sv_baseline_rev; - - if (ddi_copyout(&svv, (void *)arg, - sizeof (svv), mode) < 0) { - spcs_s_kfree(kstatus); - return (EFAULT); - } - - ustatus = svv.svv_error; - } - - DTRACE_PROBE3(sv_ioctl_4, dev_t, dev, int, *rvalp, int, 0); - - return (spcs_s_ocopyoutf(&kstatus, ustatus, 0)); - /* NOTREACHED */ - - case SVIOC_UNLOAD: - rc = sv_prepare_unload(); - - if (ddi_copyout(&rc, (void *)arg, sizeof (rc), mode) < 0) { - rc = EFAULT; - } - - spcs_s_kfree(kstatus); - return (rc); - - default: - spcs_s_kfree(kstatus); - - DTRACE_PROBE3(sv_ioctl_4, dev_t, dev, int, *rvalp, int, EINVAL); - - return (EINVAL); - /* NOTREACHED */ - } - - /* NOTREACHED */ -} - - -/* ARGSUSED */ -static int -svprint(dev_t dev, char *str) -{ - int instance = ddi_get_instance(sv_dip); - cmn_err(CE_WARN, "!%s%d: %s", ddi_get_name(sv_dip), instance, str); - return (0); -} - - -static void -_sv_lyr_strategy(struct buf *bp) -{ - caddr_t buf_addr; /* pointer to linear buffer in bp */ - nsc_buf_t *bufh = NULL; - nsc_buf_t *hndl = NULL; - sv_dev_t *svp; - nsc_vec_t *v; - sv_maj_t *maj; - nsc_size_t fba_req, fba_len; /* FBA lengths */ - nsc_off_t fba_off; /* FBA offset */ - size_t tocopy, nbytes; /* byte lengths */ - int rw, rc; /* flags and return codes */ - int (*fn)(); - - rc = 0; - - if (sv_debug > 5) - cmn_err(CE_CONT, "!_sv_lyr_strategy(%p)\n", (void *)bp); - - svp = sv_find_enabled(bp->b_edev, &maj); - if (svp == NULL) { - if (maj && (fn = maj->sm_strategy) != 0) { - if (!(maj->sm_flag & D_MP)) { - UNSAFE_ENTER(); - rc = (*fn)(bp); - UNSAFE_EXIT(); - } else { - rc = (*fn)(bp); - } - return; - } else { - bioerror(bp, ENODEV); - biodone(bp); - return; - } - } - - ASSERT(RW_READ_HELD(&svp->sv_lock)); - - if (svp->sv_flag == 0) { - /* - * guard access mode - * - prevent user level access to the device - */ - DTRACE_PROBE1(sv_lyr_strategy_err_guard, struct buf *, bp); - bioerror(bp, EPERM); - goto out; - } - - if ((rc = sv_reserve(svp->sv_fd, NSC_MULTI|NSC_PCATCH)) != 0) { - DTRACE_PROBE1(sv_lyr_strategy_err_rsrv, struct buf *, bp); - - if (rc == EINTR) - cmn_err(CE_WARN, "!nsc_reserve() returned EINTR"); - bioerror(bp, rc); - goto out; - } - - if (bp->b_lblkno >= (diskaddr_t)svp->sv_nblocks) { - DTRACE_PROBE1(sv_lyr_strategy_eof, struct buf *, bp); - - if (bp->b_flags & B_READ) { - /* return EOF, not an error */ - bp->b_resid = bp->b_bcount; - bioerror(bp, 0); - } else - bioerror(bp, EINVAL); - - goto done; - } - - /* - * Preallocate a handle once per call to strategy. - * If this fails, then the nsc_alloc_buf() will allocate - * a temporary handle per allocation/free pair. - */ - - DTRACE_PROBE1(sv_dbg_alloch_start, sv_dev_t *, svp); - - bufh = nsc_alloc_handle(svp->sv_fd, NULL, NULL, NULL); - - DTRACE_PROBE1(sv_dbg_alloch_end, sv_dev_t *, svp); - - if (bufh && (bufh->sb_flag & NSC_HACTIVE) != 0) { - DTRACE_PROBE1(sv_lyr_strategy_err_hactive, struct buf *, bp); - - cmn_err(CE_WARN, - "!sv: allocated active handle (bufh %p, flags %x)", - (void *)bufh, bufh->sb_flag); - - bioerror(bp, ENXIO); - goto done; - } - - fba_req = FBA_LEN(bp->b_bcount); - if (fba_req + bp->b_lblkno > (diskaddr_t)svp->sv_nblocks) - fba_req = (nsc_size_t)(svp->sv_nblocks - bp->b_lblkno); - - rw = (bp->b_flags & B_READ) ? NSC_READ : NSC_WRITE; - - bp_mapin(bp); - - bp->b_resid = bp->b_bcount; - buf_addr = bp->b_un.b_addr; - fba_off = 0; - - /* - * fba_req - requested size of transfer in FBAs after - * truncation to device extent, and allowing for - * possible non-FBA bounded final chunk. - * fba_off - offset of start of chunk from start of bp in FBAs. - * fba_len - size of this chunk in FBAs. - */ - -loop: - fba_len = min(fba_req, svp->sv_maxfbas); - hndl = bufh; - - DTRACE_PROBE4(sv_dbg_allocb_start, - sv_dev_t *, svp, - uint64_t, (uint64_t)(bp->b_lblkno + fba_off), - uint64_t, (uint64_t)fba_len, - int, rw); - - rc = nsc_alloc_buf(svp->sv_fd, (nsc_off_t)(bp->b_lblkno + fba_off), - fba_len, rw, &hndl); - - DTRACE_PROBE1(sv_dbg_allocb_end, sv_dev_t *, svp); - - if (rc > 0) { - DTRACE_PROBE1(sv_lyr_strategy_err_alloc, struct buf *, bp); - bioerror(bp, rc); - if (hndl != bufh) - (void) nsc_free_buf(hndl); - hndl = NULL; - goto done; - } - - tocopy = min(FBA_SIZE(fba_len), bp->b_resid); - v = hndl->sb_vec; - - if (rw == NSC_WRITE && FBA_OFF(tocopy) != 0) { - /* - * Not overwriting all of the last FBA, so read in the - * old contents now before we overwrite it with the new - * data. - */ - - DTRACE_PROBE2(sv_dbg_read_start, sv_dev_t *, svp, - uint64_t, (uint64_t)(hndl->sb_pos + hndl->sb_len - 1)); - - rc = nsc_read(hndl, (hndl->sb_pos + hndl->sb_len - 1), 1, 0); - if (rc > 0) { - bioerror(bp, rc); - goto done; - } - - DTRACE_PROBE1(sv_dbg_read_end, sv_dev_t *, svp); - } - - DTRACE_PROBE1(sv_dbg_bcopy_start, sv_dev_t *, svp); - - while (tocopy > 0) { - nbytes = min(tocopy, (nsc_size_t)v->sv_len); - - if (bp->b_flags & B_READ) - (void) bcopy(v->sv_addr, buf_addr, nbytes); - else - (void) bcopy(buf_addr, v->sv_addr, nbytes); - - bp->b_resid -= nbytes; - buf_addr += nbytes; - tocopy -= nbytes; - v++; - } - - DTRACE_PROBE1(sv_dbg_bcopy_end, sv_dev_t *, svp); - - if ((bp->b_flags & B_READ) == 0) { - DTRACE_PROBE3(sv_dbg_write_start, sv_dev_t *, svp, - uint64_t, (uint64_t)hndl->sb_pos, - uint64_t, (uint64_t)hndl->sb_len); - - rc = nsc_write(hndl, hndl->sb_pos, hndl->sb_len, 0); - - DTRACE_PROBE1(sv_dbg_write_end, sv_dev_t *, svp); - - if (rc > 0) { - bioerror(bp, rc); - goto done; - } - } - - /* - * Adjust FBA offset and requested (ie. remaining) length, - * loop if more data to transfer. - */ - - fba_off += fba_len; - fba_req -= fba_len; - - if (fba_req > 0) { - DTRACE_PROBE1(sv_dbg_freeb_start, sv_dev_t *, svp); - - rc = nsc_free_buf(hndl); - - DTRACE_PROBE1(sv_dbg_freeb_end, sv_dev_t *, svp); - - if (rc > 0) { - DTRACE_PROBE1(sv_lyr_strategy_err_free, - struct buf *, bp); - bioerror(bp, rc); - } - - hndl = NULL; - - if (rc <= 0) - goto loop; - } - -done: - if (hndl != NULL) { - DTRACE_PROBE1(sv_dbg_freeb_start, sv_dev_t *, svp); - - rc = nsc_free_buf(hndl); - - DTRACE_PROBE1(sv_dbg_freeb_end, sv_dev_t *, svp); - - if (rc > 0) { - DTRACE_PROBE1(sv_lyr_strategy_err_free, - struct buf *, bp); - bioerror(bp, rc); - } - - hndl = NULL; - } - - if (bufh) - (void) nsc_free_handle(bufh); - - DTRACE_PROBE1(sv_dbg_rlse_start, sv_dev_t *, svp); - - nsc_release(svp->sv_fd); - - DTRACE_PROBE1(sv_dbg_rlse_end, sv_dev_t *, svp); - -out: - if (sv_debug > 5) { - cmn_err(CE_CONT, - "!_sv_lyr_strategy: bp %p, bufh %p, bp->b_error %d\n", - (void *)bp, (void *)bufh, bp->b_error); - } - - DTRACE_PROBE2(sv_lyr_strategy_end, struct buf *, bp, int, bp->b_error); - - rw_exit(&svp->sv_lock); - biodone(bp); -} - - -static void -sv_async_strategy(blind_t arg) -{ - struct buf *bp = (struct buf *)arg; - _sv_lyr_strategy(bp); -} - - -static int -sv_lyr_strategy(struct buf *bp) -{ - nsthread_t *tp; - int nlive; - - /* - * If B_ASYNC was part of the DDI we could use it as a hint to - * not create a thread for synchronous i/o. - */ - if (sv_dev_to_sv(bp->b_edev, NULL) == NULL) { - /* not sv enabled - just pass through */ - DTRACE_PROBE1(sv_lyr_strategy_notsv, struct buf *, bp); - _sv_lyr_strategy(bp); - return (0); - } - - if (sv_debug > 4) { - cmn_err(CE_CONT, "!sv_lyr_strategy: nthread %d nlive %d\n", - nst_nthread(sv_tset), nst_nlive(sv_tset)); - } - - /* - * If there are only guard devices enabled there - * won't be a threadset, so don't try and use it. - */ - tp = NULL; - if (sv_tset != NULL) { - tp = nst_create(sv_tset, sv_async_strategy, (blind_t)bp, 0); - } - - if (tp == NULL) { - /* - * out of threads, so fall back to synchronous io. - */ - if (sv_debug > 0) { - cmn_err(CE_CONT, - "!sv_lyr_strategy: thread alloc failed\n"); - } - - DTRACE_PROBE1(sv_lyr_strategy_no_thread, - struct buf *, bp); - - _sv_lyr_strategy(bp); - sv_no_threads++; - } else { - nlive = nst_nlive(sv_tset); - if (nlive > sv_max_nlive) { - if (sv_debug > 0) { - cmn_err(CE_CONT, - "!sv_lyr_strategy: " - "new max nlive %d (nthread %d)\n", - nlive, nst_nthread(sv_tset)); - } - - sv_max_nlive = nlive; - } - } - - return (0); -} - -/* - * re-write the size of the current partition - */ -static int -sv_fix_dkiocgvtoc(const intptr_t arg, const int mode, sv_dev_t *svp) -{ - size_t offset; - int ilp32; - int pnum; - int rc; - - ilp32 = (ddi_model_convert_from((mode & FMODELS)) == DDI_MODEL_ILP32); - - rc = nskern_partition(svp->sv_dev, &pnum); - if (rc != 0) { - return (rc); - } - - if (pnum < 0 || pnum >= V_NUMPAR) { - cmn_err(CE_WARN, - "!sv_gvtoc: unable to determine partition number " - "for dev %lx", svp->sv_dev); - return (EINVAL); - } - - if (ilp32) { - int32_t p_size; - -#ifdef _SunOS_5_6 - offset = offsetof(struct vtoc, v_part); - offset += sizeof (struct partition) * pnum; - offset += offsetof(struct partition, p_size); -#else - offset = offsetof(struct vtoc32, v_part); - offset += sizeof (struct partition32) * pnum; - offset += offsetof(struct partition32, p_size); -#endif - - p_size = (int32_t)svp->sv_nblocks; - if (p_size == 0) { - if (sv_reserve(svp->sv_fd, - NSC_MULTI|NSC_PCATCH) == 0) { - p_size = (int32_t)svp->sv_nblocks; - nsc_release(svp->sv_fd); - } else { - rc = EINTR; - } - } - - if ((rc == 0) && ddi_copyout(&p_size, (void *)(arg + offset), - sizeof (p_size), mode) != 0) { - rc = EFAULT; - } - } else { - long p_size; - - offset = offsetof(struct vtoc, v_part); - offset += sizeof (struct partition) * pnum; - offset += offsetof(struct partition, p_size); - - p_size = (long)svp->sv_nblocks; - if (p_size == 0) { - if (sv_reserve(svp->sv_fd, - NSC_MULTI|NSC_PCATCH) == 0) { - p_size = (long)svp->sv_nblocks; - nsc_release(svp->sv_fd); - } else { - rc = EINTR; - } - } - - if ((rc == 0) && ddi_copyout(&p_size, (void *)(arg + offset), - sizeof (p_size), mode) != 0) { - rc = EFAULT; - } - } - - return (rc); -} - - -#ifdef DKIOCPARTITION -/* - * re-write the size of the current partition - * - * arg is dk_efi_t. - * - * dk_efi_t->dki_data = (void *)(uintptr_t)efi.dki_data_64; - * - * dk_efi_t->dki_data --> efi_gpt_t (label header) - * dk_efi_t->dki_data + 1 --> efi_gpe_t[] (array of partitions) - * - * efi_gpt_t->efi_gpt_PartitionEntryArrayCRC32 --> CRC32 of array of parts - * efi_gpt_t->efi_gpt_HeaderCRC32 --> CRC32 of header itself - * - * This assumes that sizeof (efi_gpt_t) is the same as the size of a - * logical block on the disk. - * - * Everything is little endian (i.e. disk format). - */ -static int -sv_fix_dkiocgetefi(const intptr_t arg, const int mode, sv_dev_t *svp) -{ - dk_efi_t efi; - efi_gpt_t gpt; - efi_gpe_t *gpe = NULL; - size_t sgpe; - uint64_t p_size; /* virtual partition size from nsctl */ - uint32_t crc; - int unparts; /* number of parts in user's array */ - int pnum; - int rc; - - rc = nskern_partition(svp->sv_dev, &pnum); - if (rc != 0) { - return (rc); - } - - if (pnum < 0) { - cmn_err(CE_WARN, - "!sv_efi: unable to determine partition number for dev %lx", - svp->sv_dev); - return (EINVAL); - } - - if (ddi_copyin((void *)arg, &efi, sizeof (efi), mode)) { - return (EFAULT); - } - - efi.dki_data = (void *)(uintptr_t)efi.dki_data_64; - - if (efi.dki_length < sizeof (gpt) + sizeof (gpe)) { - return (EINVAL); - } - - if (ddi_copyin((void *)efi.dki_data, &gpt, sizeof (gpt), mode)) { - rc = EFAULT; - goto out; - } - - if ((unparts = LE_32(gpt.efi_gpt_NumberOfPartitionEntries)) == 0) - unparts = 1; - else if (pnum >= unparts) { - cmn_err(CE_WARN, - "!sv_efi: partition# beyond end of user array (%d >= %d)", - pnum, unparts); - return (EINVAL); - } - - sgpe = sizeof (*gpe) * unparts; - gpe = kmem_alloc(sgpe, KM_SLEEP); - - if (ddi_copyin((void *)(efi.dki_data + 1), gpe, sgpe, mode)) { - rc = EFAULT; - goto out; - } - - p_size = svp->sv_nblocks; - if (p_size == 0) { - if (sv_reserve(svp->sv_fd, NSC_MULTI|NSC_PCATCH) == 0) { - p_size = (diskaddr_t)svp->sv_nblocks; - nsc_release(svp->sv_fd); - } else { - rc = EINTR; - } - } - - gpe[pnum].efi_gpe_EndingLBA = LE_64( - LE_64(gpe[pnum].efi_gpe_StartingLBA) + p_size - 1); - - gpt.efi_gpt_PartitionEntryArrayCRC32 = 0; - CRC32(crc, gpe, sgpe, -1U, sv_crc32_table); - gpt.efi_gpt_PartitionEntryArrayCRC32 = LE_32(~crc); - - gpt.efi_gpt_HeaderCRC32 = 0; - CRC32(crc, &gpt, sizeof (gpt), -1U, sv_crc32_table); - gpt.efi_gpt_HeaderCRC32 = LE_32(~crc); - - if ((rc == 0) && ddi_copyout(&gpt, efi.dki_data, sizeof (gpt), mode)) { - rc = EFAULT; - goto out; - } - - if ((rc == 0) && ddi_copyout(gpe, efi.dki_data + 1, sgpe, mode)) { - rc = EFAULT; - goto out; - } - -out: - if (gpe) { - kmem_free(gpe, sgpe); - } - - return (rc); -} - - -/* - * Re-write the size of the partition specified by p_partno - * - * Note that if a DKIOCPARTITION is issued to an fd opened against a - * non-sv'd device, but p_partno requests the size for a different - * device that is sv'd, this function will *not* be called as sv is - * not interposed on the original device (the fd). - * - * It would not be easy to change this as we cannot get the partition - * number for the non-sv'd device, so cannot compute the dev_t of the - * (sv'd) p_partno device, and so cannot find out if it is sv'd or get - * its size from nsctl. - * - * See also the "Bug 4755783" comment in sv_lyr_ioctl(). - */ -static int -sv_fix_dkiocpartition(const intptr_t arg, const int mode, sv_dev_t *svp) -{ - struct partition64 p64; - sv_dev_t *nsvp = NULL; - diskaddr_t p_size; - minor_t nminor; - int pnum, rc; - dev_t ndev; - - rc = nskern_partition(svp->sv_dev, &pnum); - if (rc != 0) { - return (rc); - } - - if (ddi_copyin((void *)arg, &p64, sizeof (p64), mode)) { - return (EFAULT); - } - - if (p64.p_partno != pnum) { - /* switch to requested partition, not the current one */ - nminor = getminor(svp->sv_dev) + (p64.p_partno - pnum); - ndev = makedevice(getmajor(svp->sv_dev), nminor); - nsvp = sv_find_enabled(ndev, NULL); - if (nsvp == NULL) { - /* not sv device - just return */ - return (0); - } - - svp = nsvp; - } - - p_size = svp->sv_nblocks; - if (p_size == 0) { - if (sv_reserve(svp->sv_fd, NSC_MULTI|NSC_PCATCH) == 0) { - p_size = (diskaddr_t)svp->sv_nblocks; - nsc_release(svp->sv_fd); - } else { - rc = EINTR; - } - } - - if (nsvp != NULL) { - rw_exit(&nsvp->sv_lock); - } - - if ((rc == 0) && ddi_copyout(&p_size, - (void *)(arg + offsetof(struct partition64, p_size)), - sizeof (p_size), mode) != 0) { - return (EFAULT); - } - - return (rc); -} -#endif /* DKIOCPARTITION */ - - -static int -sv_lyr_ioctl(const dev_t dev, const int cmd, const intptr_t arg, - const int mode, cred_t *crp, int *rvalp) -{ - sv_dev_t *svp; - sv_maj_t *maj; - int (*fn)(); - int rc = 0; - - maj = 0; - fn = 0; - - /* - * If sv_mod_status is 0 or SV_PREVENT_UNLOAD, then it will continue. - * else it means it previously was SV_PREVENT_UNLOAD, and now it's - * SV_ALLOW_UNLOAD, expecting the driver to eventually unload. - * - * SV_ALLOW_UNLOAD is final state, so no need to grab sv_mutex. - */ - if (sv_mod_status == SV_ALLOW_UNLOAD) { - return (EBUSY); - } - - svp = sv_find_enabled(dev, &maj); - if (svp != NULL) { - if (nskernd_isdaemon()) { - /* - * This is nskernd which always needs to see - * the underlying disk device accurately. - * - * So just pass the ioctl straight through - * to the underlying driver as though the device - * was not sv enabled. - */ - DTRACE_PROBE2(sv_lyr_ioctl_nskernd, sv_dev_t *, svp, - dev_t, dev); - - rw_exit(&svp->sv_lock); - svp = NULL; - } else { - ASSERT(RW_READ_HELD(&svp->sv_lock)); - } - } - - /* - * We now have a locked and enabled SV device, or a non-SV device. - */ - - switch (cmd) { - /* - * DKIOCGVTOC, DKIOCSVTOC, DKIOCPARTITION, DKIOCGETEFI - * and DKIOCSETEFI are intercepted and faked up as some - * i/o providers emulate volumes of a different size to - * the underlying volume. - * - * Setting the size by rewriting the vtoc is not permitted. - */ - - case DKIOCSVTOC: -#ifdef DKIOCPARTITION - case DKIOCSETEFI: -#endif - if (svp == NULL) { - /* not intercepted -- allow ioctl through */ - break; - } - - rw_exit(&svp->sv_lock); - - DTRACE_PROBE2(sv_lyr_ioctl_svtoc, dev_t, dev, int, EPERM); - - return (EPERM); - - default: - break; - } - - /* - * Pass through the real ioctl command. - */ - - if (maj && (fn = maj->sm_ioctl) != 0) { - if (!(maj->sm_flag & D_MP)) { - UNSAFE_ENTER(); - rc = (*fn)(dev, cmd, arg, mode, crp, rvalp); - UNSAFE_EXIT(); - } else { - rc = (*fn)(dev, cmd, arg, mode, crp, rvalp); - } - } else { - rc = ENODEV; - } - - /* - * Bug 4755783 - * Fix up the size of the current partition to allow - * for the virtual volume to be a different size to the - * physical volume (e.g. for II compact dependent shadows). - * - * Note that this only attempts to fix up the current partition - * - the one that the ioctl was issued against. There could be - * other sv'd partitions in the same vtoc, but we cannot tell - * so we don't attempt to fix them up. - */ - - if (svp != NULL && rc == 0) { - switch (cmd) { - case DKIOCGVTOC: - rc = sv_fix_dkiocgvtoc(arg, mode, svp); - break; - -#ifdef DKIOCPARTITION - case DKIOCGETEFI: - rc = sv_fix_dkiocgetefi(arg, mode, svp); - break; - - case DKIOCPARTITION: - rc = sv_fix_dkiocpartition(arg, mode, svp); - break; -#endif /* DKIOCPARTITION */ - } - } - - if (svp != NULL) { - rw_exit(&svp->sv_lock); - } - - return (rc); -} diff --git a/usr/src/uts/common/avs/ns/sv/sv.conf b/usr/src/uts/common/avs/ns/sv/sv.conf deleted file mode 100644 index 75d6ba3295..0000000000 --- a/usr/src/uts/common/avs/ns/sv/sv.conf +++ /dev/null @@ -1,36 +0,0 @@ -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License (the "License"). -# You may not use this file except in compliance with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -# Copyright 2008 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. - -name="sv" parent="pseudo" instance=0; - -# -# sv_threads -# -# The number of threads to pre-allocate when loading the sv kernel module. -# The sv module will ensure that there is always at least one thread -# allocated per enabled device. -# -# However, if lots of asynchronous (including filesystem) i/o is -# anticipated increasing this value may improve performance. -# -sv_threads=32; diff --git a/usr/src/uts/common/avs/ns/sv/sv.h b/usr/src/uts/common/avs/ns/sv/sv.h deleted file mode 100644 index c84a2b5674..0000000000 --- a/usr/src/uts/common/avs/ns/sv/sv.h +++ /dev/null @@ -1,175 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _SV_H -#define _SV_H - -#ifdef __cplusplus -extern "C" { -#endif - - -/* - * Storage Volume Character and Block Driver (SV) - * Public header file. - * SPARC case 1998/036. - * PSARC case 1999/023. - */ - -#define SV_MAXPATH NSC_MAXPATH -#define SV_DEVICE "/dev/sv" - - -/* - * Ioctl structures - */ - -typedef struct sv_name_s { - char svn_path[SV_MAXPATH]; /* path to underlying raw device */ - time_t svn_timestamp; /* timestamp of successful enable */ - int svn_nblocks; /* size of device */ - int svn_mode; /* NSC_DEVICE | NSC_CACHE */ -} sv_name_t; - - -#ifdef _KERNEL - -typedef struct sv_name32_s { - char svn_path[SV_MAXPATH]; /* path to underlying raw device */ - int32_t svn_timestamp; /* timestamp of successful enable */ - int32_t svn_nblocks; /* size of device */ - int32_t svn_mode; /* NSC_DEVICE | NSC_CACHE */ -} sv_name32_t; - -#endif /* _KERNEL */ - - -typedef struct sv_list_s { - spcs_s_info_t svl_error; /* Error information */ - time_t svl_timestamp; /* time of successful {en,dis}able */ - int svl_count; /* Count of elements in svl_names */ - int svl_maxdevs; /* Max # of devices that can be used */ - sv_name_t *svl_names; /* pointer to names array */ -} sv_list_t; - - -#ifdef _KERNEL - -typedef struct sv_list32_s { - spcs_s_info32_t svl_error; /* Error information */ - int32_t svl_timestamp; /* time of successful {en,dis}able */ - int32_t svl_count; /* Count of elements in svl_names */ - int32_t svl_maxdevs; /* Max # of devices that can be used */ - uint32_t svl_names; /* pointer to names array */ -} sv_list32_t; - -#endif /* _KERNEL */ - - -typedef struct sv_conf_s { - spcs_s_info_t svc_error; /* Error information */ - char svc_path[SV_MAXPATH]; /* path to underlying raw device */ - int svc_flag; /* NSC_DEVICE | NSC_CACHE */ - major_t svc_major; /* major_t of underlying raw device */ - minor_t svc_minor; /* minor_t of underlying raw device */ -} sv_conf_t; - -#ifdef _KERNEL - -typedef struct sv_conf32_s { - spcs_s_info32_t svc_error; /* Error information */ - char svc_path[SV_MAXPATH]; /* path to underlying raw device */ - int32_t svc_flag; /* NSC_DEVICE | NSC_CACHE */ - major_t svc_major; /* major_t of underlying raw device */ - minor_t svc_minor; /* minor_t of underlying raw device */ -} sv_conf32_t; - -#endif /* _KERNEL */ - - -typedef struct sv_version_s { - spcs_s_info_t svv_error; /* Error information */ - int svv_major_rev; /* Major revision */ - int svv_minor_rev; /* Minor revision */ - int svv_micro_rev; /* Micro revision */ - int svv_baseline_rev; /* Baseline revision */ -} sv_version_t; - -#ifdef _KERNEL - -typedef struct sv_version32_s { - spcs_s_info32_t svv_error; /* Error information */ - int32_t svv_major_rev; /* Major revision */ - int32_t svv_minor_rev; /* Minor revision */ - int32_t svv_micro_rev; /* Micro revision */ - int32_t svv_baseline_rev; /* Baseline revision */ -} sv_version32_t; - -#endif /* _KERNEL */ - - -#ifdef _KERNEL - -/* - * SV guard devices. - */ - -typedef struct sv_guard_s { - int sg_magic; /* Magic # */ - int sg_version; /* Version # */ - char *sg_pathname; /* Pathname of device to guard */ - char *sg_module; /* Module name of client */ - int sg_kernel; /* Prevent user access if true */ - spcs_s_info_t sg_error; /* Error to be returned to client */ -} sv_guard_t; - -#define SV_SG_MAGIC 0x47554152 -#define SV_SG_VERSION 1 - -#endif /* _KERNEL */ - - -/* - * Ioctl numbers. - */ - -#define __SV__(x) (('S'<<16)|('V'<<8)|(x)) - -#define SVIOC_ENABLE __SV__(1) -#define SVIOC_DISABLE __SV__(2) -#define SVIOC_LIST __SV__(3) -#define SVIOC_VERSION __SV__(4) -#define SVIOC_UNLOAD __SV__(5) - -/* - * seconds to wait before unload, to drain lingering IOs. - */ -#define SV_WAIT_UNLOAD 10 - -#ifdef __cplusplus -} -#endif - -#endif /* _SV_H */ diff --git a/usr/src/uts/common/avs/ns/sv/sv_efi.h b/usr/src/uts/common/avs/ns/sv/sv_efi.h deleted file mode 100644 index 2e8d4fcda3..0000000000 --- a/usr/src/uts/common/avs/ns/sv/sv_efi.h +++ /dev/null @@ -1,65 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _SV_EFI_H -#define _SV_EFI_H - -#ifdef __cplusplus -extern "C" { -#endif - -/* - * This header hides the differences between the header files and - * macros needed for EFI vtocs in the various Solaris releases. - * - * <sys/dkio.h> and <sys/vtoc.h> must have already been included. - */ - -#if !defined(_SYS_DKIO_H) || !defined(_SYS_VTOC_H) -#error sys/dkio.h or sys/vtoc.h has not been included -#endif - -#ifdef DS_DDICT -#undef DKIOCPARTITION -#endif - -#ifdef DKIOCPARTITION - -#include <sys/efi_partition.h> -#include <sys/byteorder.h> - -/* - * Solaris 10 has all the support we need in the header files, - * just include <sys/crc32.h>. - */ -#include <sys/crc32.h> - -#endif /* DKIOCPARTITION */ - -#ifdef __cplusplus -} -#endif - -#endif /* _SV_EFI_H */ diff --git a/usr/src/uts/common/avs/ns/sv/sv_impl.h b/usr/src/uts/common/avs/ns/sv/sv_impl.h deleted file mode 100644 index d177e8f0e3..0000000000 --- a/usr/src/uts/common/avs/ns/sv/sv_impl.h +++ /dev/null @@ -1,152 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _SV_IMPL_H -#define _SV_IMPL_H - -#ifdef __cplusplus -extern "C" { -#endif - -/* - * Storage Volume Character and Block Driver (SV) - * Private header file. - */ - -#if defined(_KERNEL) - -/* - * Locking. - * Define SV_SLEEP_LOCK to get full sleep lock semantics (ie. mutex not - * held across calls to sdctl functions. - * - * #define SV_SLEEP_LOCK - */ - - -/* - * Misc defines, enums. - */ - -enum { SV_DISABLE = 0, SV_PENDING, SV_ENABLE }; - - -/* - * Guard device clients - */ - -typedef int64_t sv_gid_t; /* bitmask */ - -typedef struct sv_gclient_s { - struct sv_gclient_s *sg_next; /* linked list */ - char *sg_name; /* name of client */ - sv_gid_t sg_id; /* id (bitmask) of client */ -} sv_gclient_t; - - -/* - * Hashing. - * - * SV_MAJOR_HASH_CNT & SV_MINOR_HASH_CNT should be prime. - * - * In a given system, there is likely to be one or two major devices in use. - * - * Examples are: - * SD - Direct Attached Storage (SCSI-2/3) - * SSD - SAN Direct Attached Storage FC SCSI-2/3 - * SVM - Solaris Volume Manager - * VxVM - Veritas Volume Manager - * Global - Sun Cluster Global Devices - * - * For a typical system, there may be a 10s to 100s of minor devices configured - * per major device, but most are likely to be configured under a single major - * number. SV_MINOR_HASH_CNT has been chosen to ensure that the hash chains are - * not too long (one or two devices), for the worst case. - */ - -#define SV_MAJOR_HASH_CNT 3 /* # hash buckets per system */ -#define SV_MAJOR_HASH(min) ((min) % SV_MAJOR_HASH_CNT) - -#define SV_MINOR_HASH_CNT 37 /* # hash buckets per major */ -#define SV_MINOR_HASH(min) ((min) % SV_MINOR_HASH_CNT) - -/* - * Per major device structure. - * - */ - -typedef struct sv_maj_s { - struct dev_ops *sm_dev_ops; - int (*sm_strategy)(); - int (*sm_awrite)(); - int (*sm_write)(); - int (*sm_ioctl)(); - int (*sm_close)(); - int (*sm_aread)(); - int (*sm_read)(); - int (*sm_open)(); - major_t sm_major; /* Major device # */ - int sm_flag; - volatile int sm_inuse; - volatile int sm_seq; - struct sv_dev_s *sm_hash[SV_MINOR_HASH_CNT]; /* Minor Hash Table */ - struct sv_maj_s *sm_next; /* Major Hash Chain */ -} sv_maj_t; - -/* - * Per configured sv structure. - */ - -typedef struct sv_dev_s { - struct sv_dev_s *sv_hash; /* Minor hash chain */ - krwlock_t sv_lock; /* mutual exclusion */ - kmutex_t sv_olock; /* mutual exclusion for otyp flags */ - dev_t sv_dev; /* underlying dev_t */ - nsc_fd_t *sv_fd; /* underlying fd */ - nsc_size_t sv_maxfbas; /* maxfbas accepted by I/O module */ - nsc_size_t sv_nblocks; /* size of device */ - int sv_state; /* state */ - int sv_flag; /* internal flags */ - sv_gid_t sv_gclients; /* bitmask of all guard clients */ - sv_gid_t sv_gkernel; /* bitmask of kernel guard clients */ - int sv_openlcnt; /* # of OTYP_LYR opens whilst failed */ - clock_t sv_timestamp; /* time of successful {en,dis}able */ - ldi_handle_t sv_lh; /* layered open handle */ - void *sv_pending; /* the thread setting SV_PENDING */ -} sv_dev_t; - -/* - * private functions exported from nskern to sv. - */ -extern int nskern_partition(dev_t, int *); -extern int nskernd_isdaemon(void); - -#endif /* _KERNEL */ - -#ifdef __cplusplus -} -#endif - -#endif /* _SV_IMPL_H */ diff --git a/usr/src/uts/common/avs/ns/unistat/Makefile b/usr/src/uts/common/avs/ns/unistat/Makefile deleted file mode 100644 index 7423874404..0000000000 --- a/usr/src/uts/common/avs/ns/unistat/Makefile +++ /dev/null @@ -1,52 +0,0 @@ -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License (the "License"). -# You may not use this file except in compliance with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -# Copyright 2008 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# -# include global definitions -include ../../../../../Makefile.master - -HDRS= spcs_s.h \ - spcs_s_k.h \ - spcs_s_impl.h \ - spcs_s_u.h - -ROOTDIR= $(ROOT)/usr/include/sys/unistat -ROOTDIRS= $(ROOTDIR) - -ROOTHDRS= $(HDRS:%=$(ROOTDIRS)/%) - -# install rules -$(ROOTDIRS)/%: % - $(INS.file) - -CHECKHDRS= $(HDRS:%.h=%.check) - -.KEEP_STATE: - -.PARALLEL: $(CHECKHDRS) - -install_h: $(ROOTDIRS) $(ROOTHDRS) - -$(ROOTDIRS): - $(INS.dir) - -check: $(CHECKHDRS) diff --git a/usr/src/uts/common/avs/ns/unistat/spcs_s.h b/usr/src/uts/common/avs/ns/unistat/spcs_s.h deleted file mode 100644 index bac1383d90..0000000000 --- a/usr/src/uts/common/avs/ns/unistat/spcs_s.h +++ /dev/null @@ -1,89 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _SPCS_S_H -#define _SPCS_S_H - -#ifdef __cplusplus -extern "C" { -#endif - -/* - * SPCS Uniform status handling public definitions - * @author Soper - * @version PROTOTYPE - */ - - - - -/* - * Function returned normally, no status info available (== 0) - */ -#define SPCS_S_OK 0 - -/* - * Function returned abnormally, status info available (== -1) - */ -#define SPCS_S_ERROR -1 - -/* - * The maximum status line character array length (== 1024) - * @see spcs_s_string - */ -#define SPCS_S_MAXLINE 1024 - -/* - * The maximum number of "%s" format descriptors in status message - * text and data parameters that can be passed along with status - * @see spcs_s_string - */ -#define SPCS_S_MAXSUPP 8 - -/* - * The opaque status information type - */ -typedef uintptr_t spcs_s_info_t; - -/* - * The status information type as a 32 bit entity for model conversions - */ -typedef uint32_t spcs_s_info32_t; - -/* - * The type of bytestream data (see spcs_s_add_bytestream() ) - */ -typedef uchar_t *spcs_s_bytestream_ptr_t; - -/* - * The type of a status code - */ -typedef int spcs_s_status_t; - -#ifdef __cplusplus -} -#endif - -#endif /* _SPCS_S_H */ diff --git a/usr/src/uts/common/avs/ns/unistat/spcs_s_impl.h b/usr/src/uts/common/avs/ns/unistat/spcs_s_impl.h deleted file mode 100644 index a4e4b965a0..0000000000 --- a/usr/src/uts/common/avs/ns/unistat/spcs_s_impl.h +++ /dev/null @@ -1,310 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _SPCS_S_IMPL_H -#define _SPCS_S_IMPL_H - -#ifdef __cplusplus -extern "C" { -#endif - -/* - * The SPCS Unistat private implementation definitions - * - * Only modules spcs_s_u.c and spcs_s_k.c should be using this - */ - -/* - * For Unistat, here are the definitions of the major and minor revisions: - * - * Bump major revision and zero minor revision if: Any change made to - * spcs_s_pinfo_t in terms of size, changed fields, etc, or any user - * functional change to spcs_s.h definitions that is not backwards - * compatible. - * - * Bump minor revision if: Any backwards compatible change to - * functionality but with no impact on interoperability between kernel and - * user level Unistat code having differing minor revs. - * - */ - -#define SPCS_S_MAJOR_REV 1 /* Unistat major revision */ -#define SPCS_S_MINOR_REV 1 /* Unistat minor revision */ -/* - * This is the format of a unistat status code. It must overlay - * an int. - */ -#if defined(__sparc) -typedef struct { - /* - * If this flag is set the last supplemental item in idata is expected - * to be of type SU_BYTESTREAM and offset is a tdata index. - */ - unsigned char bytestream: 1; - /* - * count of items of supporting information references in idata - * to accompany this error status code spcs.h define SPCS_S_MAXSUPP - * must be 2 raised to the bit size of this field. Also don't forget - * to update the sprintf in spcs_s_string. - */ - unsigned char reserved: 4; /* reserved for future expansion */ - unsigned char sup_count: 3; - unsigned char module: 8; /* module code (see below) */ - unsigned short code: 16; /* status code number (>0) */ -} spcs_s_code_t; -#elif defined(__i386) || (__amd64) -typedef struct { - /* - * count of items of supporting information references in idata - * to accompany this error status code spcs.h define SPCS_S_MAXSUPP - * must be 2 raised to the bit size of this field. Also don't forget - * to update the sprintf in spcs_s_string. - */ - unsigned short code: 16; /* status code number (>0) */ - unsigned char module: 8; /* module code (see below) */ - unsigned char sup_count: 3; - unsigned char reserved: 4; /* reserved for future expansion */ - /* - * If this flag is set the last supplemental item in idata is expected - * to be of type SU_BYTESTREAM and offset is a tdata index. - */ - unsigned char bytestream: 1; -} spcs_s_code_t; -#else -#error "instruction set architecture error" -#endif - -/* - * The types of supplemental data references - */ - -typedef enum {SU_STRING, /* character string reference */ - SU_BYTESTREAM, /* bytestream data reference */ - SU_RES2, - SU_RES3} suenum; -/* - * Supplemental data references. These follow status codes that have - * nonzero sup_count fields. The supplemental data references can - * currently be either a string reference or a bytestream data reference. - * In both cases the reference simply contains an offset into the - * sdata array (string) or tdata array (bytestream). This struct must be - * the size of an int. - */ - -#if defined(__sparc) -typedef struct { - suenum type: 3; /* the supplemental data type */ - unsigned short reserved: 13; /* unused, reserved */ - unsigned short offset: 16; /* the sudata array offset of the */ - /* start of the supplemental data */ - /* or the tdata array offset for */ - /* bytestream data */ -} spcs_s_sudata_t; -#elif defined(__i386) || (__amd64) -typedef struct { - unsigned short offset: 16; /* the sudata array offset of the */ - /* start of the supplemental data */ - /* or the tdata array offset for */ - /* bytestream data */ - unsigned short reserved: 13; /* unused, reserved */ - suenum type: 3; /* the supplemental data type */ -} spcs_s_sudata_t; -#else -#error "instruction set architecture error" -#endif - -/* - * Although bytestream data pointers are only used in the kernel layer - * and are converted to offsets prior to unistat data being made available - * to userspace (i.e. this never comes back via an ioctl), it is critical - * to keep the unistat data structure spcs_s_pinfo_t a constant size - * whether or not we're using LP64 or a 32 bit model. So we put the - * pointer in a union with a long long so it is fixed at 64 bits in size. - * - * Prior to being transported through a pipe, unistat data containing - * tdata items (see below) must have its pointers eliminated. The pointers - * are simply nulled out and the actual bytestream data is sent out the - * pipe following the spcs_s_pinfo_t in the same order as its references - * in the sequential tdata elements. - */ - -typedef union { - uchar_t *data; /* the pointer to the bytestream data */ - long long _fix_the_size; -} _fixed_char_pointer_t; - -/* - * The bytestream data descriptor in a tdata array element - */ - -typedef struct { - uint32_t size; /* byte size of the bytestream data */ - _fixed_char_pointer_t u_p; /* union containing pointer inside */ - /* fixed length field */ -} spcs_s_tdesc_t; - -/* - * All the types that can occupy an idata array element. - */ - -typedef union { - spcs_s_status_t s; /* as the public status type */ - spcs_s_code_t f; /* as the internal status type */ - spcs_s_sudata_t su; /* the supplemental data reference type */ - int i; /* as integer: TEMPORARY */ -} spcs_s_udata_t; - -/* - * The number of idata array elements. This is the upper bound for the - * total status codes and supplemental data reference items that can be - * held by unistat at one time. It is IMPORTANT that this array be large - * enough to hold all the status and references for the worst case path - * through core software. This is currently trivial to do by inspection - * of the ioctl service code. However once unistat usage is deployed to - * the underlying layers of core software below the ioctl service call - * layer it may require special tools to validate this. - */ - -#define SPCS_S_IDSIZE 16 /* size of idata array */ -/* - * The number of sdata array elements. This is the upper bound for the - * total characters of string data added to the unistat structure as - * supplemental info. Same cautions as for SPCS_S_IDSIZE. - */ - -#define SPCS_S_SDSIZE 512 /* size of sdata array */ -/* - * The number of tdata array elements. This is the upper bound for the - * total bytestream data descriptors that can be held by unistat at one - * time. Same cautions as for SPCS_S_IDSIZE. - */ - -#define SPCS_S_TDSIZE 2 /* size of tdata array */ - -/* - * The Unistat private data structure. This is pointed to by the - * public opaque pointer spcs_s_info_t and holds all the status codes - * and supplemental data references. String data is also stored here - * but the body of bytestream data is stored elsewhere (see below). - * - * If there is real concern about the overhead of ioctl copyouts they - * could be optimized such that only the scalars and the "used" elements - * of the idata, sdata and tdata arrays are moved. If this is done it is - * recommended that the scalars (i.e. major through spare) be moved into - * a structure to cut down on the chance of a coding error with manual - * size arithmetic. - * - * The major and minor revs are currently supperfulous since unistat and - * all of its clients are contained within the same private consolidation. - * There is an assertion to BLOW UP if mismatched major revisions are - * detected between the kernel and user layers. If the consolidation - * policies of core software are relaxed in the future the assertion must - * be replaced by code designed to do something intelligent if possible. - * - */ - -#pragma pack() -typedef struct { - /* The next two fields must stay shorts and */ - /* stay at the front and in this order */ - /* "forever" */ - short major; /* Major unistat revision */ - short minor; /* Minor unistat revision */ - /* this define should obviously never change */ -#define SPCS_S_REVSIZE (sizeof (short) + sizeof (short)) - short icount; /* Number of items currently stored in idata */ - /* and the "next" index to store a new item */ - /* into */ - short scount; /* Number of items currently stored in sdata */ - /* and the "next" index to store a new item */ - /* into */ - short tcount; /* Number of items currently stored in tdata */ - /* and the "next" index to store a new item */ - /* into */ - short spare; /* Unused, reserved */ - spcs_s_udata_t idata[SPCS_S_IDSIZE]; /* the status info and supp refs */ - char sdata[SPCS_S_SDSIZE]; /* the supplemental string data pool. */ - /* the supplemental bytestream data pool. */ - spcs_s_tdesc_t tdata[SPCS_S_TDSIZE]; -} spcs_s_pinfo_t; - -/* - * Module codes. These can be in any order except that Solaris MUST BE - * FIRST. - */ - -enum {SPCS_M_Solaris, /* Solaris module */ - SPCS_M_SPCS, /* SPCS "module" (for codes that apply across */ - /* all controller modules */ - SPCS_M_DSW, /* InstantImage Module */ - SPCS_M_SV, /* Storage Volume Module */ - SPCS_M_RDC, /* Remote Dual Copy Module */ - SPCS_M_SDBC, /* Storage Device Block Cache Module */ - SPCS_M_STE, /* SCSI Target Emulation Module */ - SPCS_M_SDCTL, /* Storage Device Control Module */ - SPCS_M_MC, /* Memory Channel Module */ - SPCS_M_SIMCKD, /* CKD Simulation (SIMCKD) Module */ - SPCS_M_NVM}; /* Non-Volatile Memory Module */ - -#define SPCS_M_MAX SPCS_M_NVM /* Highest defined module code */ - -/* - * The SPCS general status values - */ - -/* the module name spellings */ - -#define SPCS_M_NSOL "SOLARIS" -#define SPCS_M_NSPCS "SPCS" -#define SPCS_M_NDSW "II" -#define SPCS_M_NSV "SV" -#define SPCS_M_NRDC "SNDR" -#define SPCS_M_NSDBC "SDBC" -#define SPCS_M_NSTE "STE" -#define SPCS_M_NSDCTL "NSCTL" -#define SPCS_M_NMC "MC" -#define SPCS_M_NSIM "SIMCKD" -#define SPCS_M_NNVM "NVM" - -/* limits */ - -#define SPCS_S_MAXKEY 256 /* max msg key length */ -#define SPCS_S_MAXTEXT SPCS_S_MAXLINE /* max msg text length */ -#define SPCS_S_MAXSIG 32 /* max format data signature length */ -#define SPCS_S_MAXPRE 32 /* max module prefix length */ -#define SPCS_S_MAXMODNAME 16 /* max module name length */ - -/* the module names in a lookup array */ -#if !defined(_KERNEL) -static char *module_names[] = {SPCS_M_NSOL, SPCS_M_NSPCS, SPCS_M_NDSW, - SPCS_M_NSV, SPCS_M_NRDC, SPCS_M_NSDBC, SPCS_M_NSTE, SPCS_M_NSDCTL, - SPCS_M_NMC, SPCS_M_NSIM, SPCS_M_NNVM, NULL}; -#endif - -#ifdef __cplusplus -} -#endif - -#endif /* _SPCS_S_IMPL_H */ diff --git a/usr/src/uts/common/avs/ns/unistat/spcs_s_k.c b/usr/src/uts/common/avs/ns/unistat/spcs_s_k.c deleted file mode 100644 index 7ee4a93d98..0000000000 --- a/usr/src/uts/common/avs/ns/unistat/spcs_s_k.c +++ /dev/null @@ -1,888 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -/* - * The SPCS status support kernel utilities - * See header spcs_s_k.h for functional spec - */ -#include <sys/types.h> -#include <sys/ksynch.h> -#include <sys/kmem.h> -#include <sys/errno.h> -#include <sys/cmn_err.h> -#include <sys/ddi.h> -#include <sys/sunddi.h> -#include <sys/varargs.h> - -#include <sys/unistat/spcs_s.h> -#include <sys/unistat/spcs_s_k.h> -#include <sys/unistat/spcs_s_impl.h> -#include <sys/unistat/spcs_errors.h> - -#ifdef DS_DDICT -#include <sys/nsctl/contract.h> -#endif -/* - * Debug support to allow testing in userspace - */ - -#if UNISTAT_ASSERTIONS -#define _CELEVEL CE_PANIC -#else -#define _CELEVEL CE_WARN -#endif - - -/* - * Unistat state data - */ - -/* - * This flag is made nonzero to indicate the bytestream transport mechanism - * is initalized. - */ - -static int bytestream_transport_initialized = 0; - -/* - * Common code for status init - * - */ - -static void init_status(spcs_s_pinfo_t *p) -{ -#ifdef UNISTAT_TRACE - cmn_err(CE_WARN, "!init_status entry"); -#endif - p->major = SPCS_S_MAJOR_REV; - p->minor = SPCS_S_MINOR_REV; - p->icount = 0; - p->scount = 0; - p->tcount = 0; -#ifdef UNISTAT_TRACE - cmn_err(CE_WARN, "!init_status exit"); -#endif -} - -/* - * Create and initialize local ioctl status. - * - */ - -spcs_s_info_t -spcs_s_kcreate() -{ - spcs_s_pinfo_t *kstatus; -#ifdef UNISTAT_TRACE - cmn_err(CE_WARN, "!spcs_s_kcreate entry"); -#endif - kstatus = (spcs_s_pinfo_t *) - kmem_alloc(sizeof (spcs_s_pinfo_t), KM_SLEEP); - - if (kstatus) - init_status(kstatus); -#ifdef UNISTAT_TRACE - cmn_err(CE_WARN, "!spcs_s_kcreate exit"); -#endif - return ((spcs_s_info_t)kstatus); -} - -/* - * Initialize existing ioctl status. - */ - -void -spcs_s_kinit(spcs_s_info_t kstatus) -{ -#ifdef UNISTAT_TRACE - cmn_err(CE_WARN, "!spcs_s_kinit called"); -#endif - init_status((spcs_s_pinfo_t *)kstatus); -} - -/* - * Release (free) ioctl status storage. - * BUG: this should take an spcs_s_info_t** or else the userspace - * version shoud just take a pointer. Could hopefully fix up Simon and - * Phil's code without too much trouble to fix this. Being inconsistent - * over the long term is bad. - */ - -void -spcs_s_kfree(spcs_s_info_t kstatus) -{ -#ifdef UNISTAT_TRACE - cmn_err(CE_WARN, "!spcs_s_kfree entry"); -#endif - kmem_free((void *)kstatus, sizeof (spcs_s_pinfo_t)); -#ifdef UNISTAT_TRACE - cmn_err(CE_WARN, "!spcs_s_kfree exit"); -#endif -} - -/* - * Delete one error code and its supplemental info - * The "oldest" error code is removed. - * The assumption is that there is at least one status code present. - * Neither sdata nor tdata space is reclaimed - */ - -static void -spcs_delete(spcs_s_pinfo_t *p) -{ - int i; - int d; -#ifdef UNISTAT_TRACE - cmn_err(CE_WARN, "!spcs_s_delete entry"); -#endif - d = p->idata[0].f.sup_count + 1; - - for (i = 0; i < (p->icount - d); i++) - p->idata[i] = p->idata[i+d]; - p->icount -= d; -#ifdef UNISTAT_TRACE - cmn_err(CE_WARN, "!spcs_s_delete exit"); -#endif -} - -/* - * Common code for adding a status code - * Return 1 if overflow detected, 0 if enough space for code and support - * info. - */ - -static boolean_t -add_code(spcs_s_pinfo_t *p, spcs_s_status_t stcode) -{ - spcs_s_udata_t c; - c.s = stcode; -#ifdef UNISTAT_TRACE - cmn_err(CE_WARN, "!add_code entry"); -#endif - - if ((p->icount + c.f.sup_count + 1) > SPCS_S_IDSIZE) { - if (p->icount == SPCS_S_IDSIZE) - spcs_delete(p); - p->idata[p->icount++].s = SPCS_EOVERFLOW; - - cmn_err(_CELEVEL, "!SPCS Unistat: not enough room in idata!"); -#ifdef UNISTAT_TRACE - cmn_err(CE_WARN, "!add_code exit 1"); -#endif - - return (B_TRUE); - } else - p->idata[p->icount++] = c; -#ifdef UNISTAT_TRACE - cmn_err(CE_WARN, "!add_code exit 2"); -#endif - return (B_FALSE); -} - -/* - * Common code for adding a string as supplemental info. - * Add_code is assumed to have been called already to ensure enough space - * idata. The string is copied into the sdata array and the index to the - * first character is put in idata along with the datatype indicator. - */ - -static void -add_item(spcs_s_pinfo_t *p, char *string) -{ - int len; - char *nullstr = "XXXXXXXX"; -#ifdef UNISTAT_TRACE - cmn_err(CE_WARN, "!add_item entry"); -#endif - len = strlen(string); - -/* - * The following HACK is for RDC which is somewhat careless about - * it's usage of strings. It does not make sense to panic the machine - * because we botched an informational message. Print something - * usefull so we can go back and fix it. - * This can be removed when everyone has played by the correct unistat rules - */ - if (len == 0) { - string = nullstr; - len = strlen(nullstr); - } - if ((len + 1) > (SPCS_S_SDSIZE - p->scount)) - cmn_err(_CELEVEL, - "!SPCS: Unistat sdata array too small: needed %d bytes", - len + 1); - - p->idata[p->icount].su.type = SU_STRING; - p->idata[p->icount++].su.offset = p->scount; - (void) strcpy(&(p->sdata[p->scount]), string); - p->scount += len + 1; -} - -/* - * Check the rev level of the userspace status structure - * and spew some chunks if it doesn't match the kernel's unistat rev. - * Some day something more intelligent should happen to try to provide - * backward compatiblity with some mismatches (see the impl header file). - * Returns true if the revisions are compatible, false otherwise. - */ - -static boolean_t -check_revision(spcs_s_info_t ustatus) -{ - char *m; - char buf[SPCS_S_REVSIZE]; - spcs_s_pinfo_t *p = (spcs_s_pinfo_t *)buf; - int mode = 0; - -#ifdef UNISTAT_TRACE - cmn_err(CE_WARN, "!check_revision entry"); -#endif - - m = - "!SPCS Unistat failure (packaging error): data struct mismatch"; - (void) ddi_copyin((void *) ustatus, (void *) p, SPCS_S_REVSIZE, mode); - - if ((p->major == SPCS_S_MAJOR_REV) && (p->minor == SPCS_S_MINOR_REV)) { - /* Both match */ -#ifdef UNISTAT_TRACE - cmn_err(CE_WARN, "!check_revision exit 1"); -#endif - return (B_TRUE); - } - - /* - * We have a major and/or minor version mismatch. - * Deal with each case individually. - */ - -#ifdef DEBUG - cmn_err(CE_WARN, "!unistat kernel v%d.%d, user v%d.%d\n", - SPCS_S_MAJOR_REV, SPCS_S_MINOR_REV, (int)p->major, (int)p->minor); -#endif - - if (p->major > SPCS_S_MAJOR_REV) { - /* - * couldn't guess what to do if the userspace version is ahead - * of the kernel version, so issue a warning - */ - cmn_err(CE_WARN, m); - } else if (p->major < SPCS_S_MAJOR_REV) { - /* - * kernel's major version is ahead of userspace version: do - * something extremely clever here some day instead of the - * warning - */ - cmn_err(CE_WARN, m); - } else if (p->minor < SPCS_S_MINOR_REV) { - - /* - * kernel's minor version is ahead of userspace version: do - * something clever here some day instead of the warning - */ - - cmn_err(CE_WARN, m); - } else { - /* - * couldn't guess what to do if the userspace version is ahead - * of the kernel's minor version, so issue a warning - */ - - cmn_err(CE_WARN, m); - } -#ifdef UNISTAT_TRACE - cmn_err(CE_WARN, "!check_revision exit 2"); -#endif - return (B_FALSE); -} - -/* - * Add a code and optional support information to status - * - * The support info can only consist of char pointers. - * - * Varargs doesn't provide a means of detecting too few supplemental - * values... - */ - -void -spcs_s_add(spcs_s_info_t kstatus, spcs_s_status_t stcode, ...) -{ - va_list ap; - spcs_s_udata_t c; - spcs_s_pinfo_t *p; - char *sp; - -#ifdef UNISTAT_TRACE - cmn_err(CE_WARN, "!cspcs_s_add entry"); -#endif - p = (spcs_s_pinfo_t *)kstatus; - c.s = stcode; - - if (add_code(p, stcode) == B_TRUE) { -#ifdef UNISTAT_TRACE - cmn_err(CE_WARN, "!cspcs_s_add exit 1"); -#endif - return; - } - - va_start(ap, stcode); - - while (c.f.sup_count--) { - sp = va_arg(ap, caddr_t); - if (sp != (char *)NULL) - add_item(p, sp); - } - - va_end(ap); -#ifdef UNISTAT_TRACE - cmn_err(CE_WARN, "!cspcs_s_add exit 2"); -#endif -} - -/* - * Common code to copy status to userspace - * - * Only "used" data is copied to minimize overhead. - */ - -static void -scopyout(spcs_s_pinfo_t *kstatus, spcs_s_pinfo_t *ustatus) -{ - int mode = 0; -#ifdef UNISTAT_TRACE - cmn_err(CE_WARN, "!scopyout entry"); -#endif - - /* - * If tdata is in use, blow up: asynch data is not intended for ioctls. - * How would we ship it back? (the user hasn't given us any place to - * put it!) - */ - - if (kstatus->tcount) - cmn_err(_CELEVEL, "!SPCS: Unistat async data in ioctl status!"); - - /* - * Gently, Bentley - * Have to copy all the header stuff even though there is no need for - * some items like the revisions. This is unavoidable without making - * the structure more complex or guessing about alignment and the true - * size of the part of the structure sitting ahead of the {i,s,t}data - * arrays. - */ - - (void) ddi_copyout((void *) kstatus, (void *) ustatus, - sizeof (spcs_s_pinfo_t) - (sizeof (kstatus->idata) + - sizeof (kstatus->sdata) + sizeof (kstatus->tdata)), mode); - (void) ddi_copyout((void *)kstatus->idata, (void *) ustatus->idata, - (kstatus->icount * sizeof (kstatus->idata[0])), mode); - (void) ddi_copyout((void *)kstatus->sdata, (void *) ustatus->sdata, - (kstatus->scount * sizeof (kstatus->sdata[0])), mode); - (void) ddi_copyout((void *)kstatus->tdata, (void *) ustatus->tdata, - (kstatus->tcount * sizeof (kstatus->tdata[0])), mode); -#ifdef UNISTAT_TRACE - cmn_err(CE_WARN, "!scopyout exit"); -#endif -} - -/* - * Copy the ioctl status info to userspace - */ - -void -spcs_s_copyout(spcs_s_info_t *kstatus_a, spcs_s_info_t ustatus) -{ -#ifdef UNISTAT_TRACE - cmn_err(CE_WARN, "!spcs_s_copyout entry"); -#endif - if (check_revision(ustatus) == B_TRUE) - scopyout((spcs_s_pinfo_t *)*kstatus_a, - (spcs_s_pinfo_t *)ustatus); -#ifdef UNISTAT_TRACE - cmn_err(CE_WARN, "!spcs_s_copyout exit"); -#endif -} - - -/* - * Copy the ioctl status info to userspace - * Free the status info storage. - */ - -void -spcs_s_copyoutf(spcs_s_info_t *kstatus_a, spcs_s_info_t ustatus) -{ -#ifdef UNISTAT_TRACE - cmn_err(CE_WARN, "!spcs_s_copyoutf entry"); -#endif - if (check_revision(ustatus) == B_TRUE) - scopyout((spcs_s_pinfo_t *)*kstatus_a, - (spcs_s_pinfo_t *)ustatus); - spcs_s_kfree(*kstatus_a); - *kstatus_a = NULL; -#ifdef UNISTAT_TRACE - cmn_err(CE_WARN, "!spcs_s_copyoutf exit"); -#endif -} - -/* - * Return the oldest status code from the status info or SPCS_S_OK if - * there is none. - */ - -spcs_s_status_t -spcs_s_oldest_status(spcs_s_info_t kstatus) -{ - spcs_s_pinfo_t *p; -#ifdef UNISTAT_TRACE - cmn_err(CE_WARN, "!spcs_s_oldest_status entry"); -#endif - p = (spcs_s_pinfo_t *)kstatus; - -#ifdef UNISTAT_TRACE - cmn_err(CE_WARN, "!spcs_s_oldest_status exit"); -#endif - return (p->icount ? p->idata[0].s : SPCS_S_OK); -} - -/* - * Return the idata index of the last status code in the array (i.e. - * the "youngest" code present). The assumption is that the caller has - * checked to see that pcount is nonzero. - */ - -static int -last_code_idx(spcs_s_pinfo_t *p) -{ - int last = 0; - int idx = 0; -#ifdef UNISTAT_TRACE - cmn_err(CE_WARN, "!last_code_idx entry"); -#endif - - while (idx < p->icount) { - last = idx; - idx += p->idata[idx].f.sup_count + 1; - } -#ifdef UNISTAT_TRACE - cmn_err(CE_WARN, "!last_code_idx exit"); -#endif - return (last); -} - -/* - * Return the youngest status code form the status info or SPCS_S_OK if - * there is none. - */ - -spcs_s_status_t -spcs_s_youngest_status(spcs_s_info_t kstatus) -{ - spcs_s_pinfo_t *p; - spcs_s_status_t temp; -#ifdef UNISTAT_TRACE - cmn_err(CE_WARN, "!spcs_s_youngest_status entry"); -#endif - p = (spcs_s_pinfo_t *)kstatus; - - if (p->icount) - temp = p->idata[last_code_idx(p)].s; - else - temp = SPCS_S_OK; - -#ifdef UNISTAT_TRACE - cmn_err(CE_WARN, "!spcs_s_youngest_status exit"); -#endif - return (temp); -} - -/* - * Insert a new status code or NULL if there is none. - * Copy the status info to userspace. - * return a value to use as an return value (e.g. ioctl return). - */ - -spcs_s_status_t -spcs_s_ocopyout(spcs_s_info_t *kstatus_a, - spcs_s_info_t ustatus, spcs_s_status_t stcode, ...) -{ - spcs_s_udata_t ret; - va_list ap; - spcs_s_udata_t c; - spcs_s_pinfo_t *p; - char *sp; - -#ifdef UNISTAT_TRACE - cmn_err(CE_WARN, "!spcs_s_ocopyout entry"); -#endif - p = (spcs_s_pinfo_t *)*kstatus_a; - c.s = stcode; - - if (check_revision(ustatus) == B_FALSE) - ret.s = EINVAL; - else { - if (stcode) { - if (add_code(p, stcode) == B_FALSE) { - va_start(ap, stcode); - - while (c.f.sup_count--) { - sp = va_arg(ap, caddr_t); - if (sp != (char *)NULL) - add_item(p, sp); - } - - va_end(ap); - } - } - ret.s = p->icount ? p->idata[last_code_idx(p)].s: SPCS_S_OK; - scopyout(p, (spcs_s_pinfo_t *)ustatus); - } -#ifdef UNISTAT_TRACE - cmn_err(CE_WARN, "!spcs_s_ocopyout exit"); -#endif - return (ret.s); -} - - -/* - * Insert a new status code or NULL if there is none. - * Copy the status info to userspace. - * Free the kernel status info storage - * return a value to use as an operatiion return value (e.g. ioctl return) - */ - -spcs_s_status_t -spcs_s_ocopyoutf(spcs_s_info_t *kstatus_a, - spcs_s_info_t ustatus, spcs_s_status_t stcode, ...) -{ - spcs_s_udata_t ret; - va_list ap; - spcs_s_udata_t c; - spcs_s_pinfo_t *p; - char *sp; - -#ifdef UNISTAT_TRACE - cmn_err(CE_WARN, "!spcs_s_ocopyoutf entry"); -#endif - p = *(spcs_s_pinfo_t **)kstatus_a; - c.s = stcode; - - if (check_revision(ustatus) == B_FALSE) { - ret.s = EINVAL; - } else { - if (stcode) { - if (add_code(p, stcode) == B_FALSE) { - va_start(ap, stcode); - - while (c.f.sup_count--) { - sp = va_arg(ap, caddr_t); - if (sp != (char *)NULL) - add_item(p, sp); - } - - va_end(ap); - } - } - - ret.s = p->icount ? p->idata[last_code_idx(p)].s: SPCS_S_OK; - scopyout(p, (spcs_s_pinfo_t *)ustatus); - } - spcs_s_kfree((spcs_s_info_t)p); - *kstatus_a = NULL; -#ifdef UNISTAT_TRACE - cmn_err(CE_WARN, "!spcs_s_ocopyoutf exit"); -#endif - return (ret.s); -} - -/* - * Return true if a status code is a Solaris error code - */ - -boolean_t -spcs_s_is_solaris(spcs_s_status_t error) -{ - spcs_s_udata_t c; -#ifdef UNISTAT_TRACE - cmn_err(CE_WARN, "!spcs_s_is_solaris called"); -#endif - c.s = error; - return (c.f.module == 0 ? B_TRUE : B_FALSE); -} - -/* - * Edit a value into a numeric string - */ - -char -*spcs_s_inttostring(int val, char *buf, int buflen, int hex) -{ - char tempbuf[20]; - -#ifdef UNISTAT_TRACE - cmn_err(CE_WARN, "!spcs_s_inttostring entry 0x%x", val); -#endif - if (buflen) { - if (hex) - (void) sprintf(tempbuf, "0x%0X", val); - else - (void) sprintf(tempbuf, "%d", val); - if (strlen(tempbuf) < (size_t)buflen) - (void) strcpy(buf, tempbuf); - else - (void) strcpy(buf, "***"); - } else { - (void) strcpy(buf, "***"); - } -#ifdef UNISTAT_TRACE - cmn_err(CE_WARN, "!spcs_s_inttostring exit: %s", buf); -#endif - return (buf); -} - -/* - * Initialize the bytestream mechanism. - * This is a prototype. Specification TBD. Not in 10/22 commitment - */ - -int -spcs_s_start_bytestream() -{ -#ifdef UNISTAT_TRACE - cmn_err(CE_WARN, "!spcs_s_start_bytestream called"); -#endif - bytestream_transport_initialized = 1; - return (SPCS_S_OK); -} - -/* - * Stop (shut off) the bytestream mechanism. - * - * This is a prototype. Specification TBD. Not in 10/22 commitment - */ - -int -spcs_s_stop_bytestream() -{ -#ifdef UNISTAT_TRACE - cmn_err(CE_WARN, "!spcs_s_stop_bytestream called"); -#endif - bytestream_transport_initialized = 0; - return (SPCS_S_OK); -} - -/* - * Add a status code and the address and length of arbitrary binary - * data to be held (possibly with other status) for later transmission to - * userspace via a pipe facility (i.e. NOT via ioctl return). This is a - * means of getting arbitrary information with or without other status - * info shipped out as an alternative to cmn_err and/or trace file - * mechanisms. - * @param kstatus The status info pointer - * @param stcode The status code to annotate the data - * @param address The starting address of the data - * @param length The byte length of the data - * This is a prototype. Specification TBD. Not in the 10/22/98 unistat - * commitment - */ - -void -spcs_s_add_bytestream(spcs_s_info_t kstatus, spcs_s_status_t stcode, - spcs_s_bytestream_ptr_t data, int size) -{ - spcs_s_pinfo_t *p; -#ifdef UNISTAT_TRACE - cmn_err(CE_WARN, "!spcs_s_add_bytestream entry"); -#endif - p = (spcs_s_pinfo_t *)kstatus; - - if (p->tcount == SPCS_S_TDSIZE) - cmn_err(CE_PANIC, - "SPCS: Unistat too many calls to spcs_s_add_bytestream"); - if ((p->icount + 2) >= SPCS_S_TDSIZE) - cmn_err(CE_PANIC, - "SPCS: Unistat idata array too small in " - "spcs_s_add_bytestream"); - p->idata[p->icount].s = stcode; - if (p->idata[p->icount++].f.sup_count != 1) - cmn_err(CE_PANIC, - "SPCS: Unistat wrong sup_count in spcs_s_add_bytestream"); - p->idata[p->icount].su.type = SU_BYTESTREAM; - p->idata[p->icount].su.offset = p->tcount++; - p->tdata[p->idata[p->icount].su.offset].size = size; - p->tdata[p->idata[p->icount++].su.offset].u_p.data = data; -#ifdef UNISTAT_TRACE - cmn_err(CE_WARN, "!spcs_s_add_bytestream exit"); -#endif -} - -/* - * Asynchronously output unistat info and possibly bytestreams to - * userspace. The bytestream mechanism must have been initialized. - * @param kstatus The status info pointer - * @return SPCS_S_OK for normal completion, SPCS_S_ERROR otherwise - * This is a prototype. Specification TBD. Not in the 10/22/98 unistat - * commitment - */ - -int -spcs_s_asynch_status(spcs_s_info_t kstatus) -{ - spcs_s_pinfo_t *p; - int i, s, b, suppcount; - uchar_t *bp; -#ifdef UNISTAT_TRACE - cmn_err(CE_WARN, "!spcs_s_asynch_status entry"); -#endif - p = (spcs_s_pinfo_t *)kstatus; - - /* - * Any real code would have to go through and process the - * address/length pairs in the tdata array. The lengths would be - * valid but the addresses would be meaningless. Instead, for a - * stream transport mechanism the bytestream(s) would follow the - * spcs_s_pinfo_t structure. So after the last call to - * spcs_s_add_bytestream things the spcs_pinfo_t would look like this: - * |-------------| - * | preamble | - * |-------------| - * | idata | - * |(sup offset) |-----------------| - * |(sup offset) |--| | bytestream reference (index) - * |-------------| | string | - * | sdata | | ref (offset) | - * | (strings) |<-| | - * |-------------| | - * | tdata | | - * | |<----------------| - * | (length) | - * | (address) |-------------------->byte data "out there somewhere" - * |-------------| - * - * After processing in this function the data headed for a pipe or - * other sequention stream would look like this: - * - * |-------------| - * | preamble | - * |-------------| - * | idata | - * | |-----------------| - * | |--| | bytestream reference (index) - * |-------------| | string | - * | sdata | | ref (offset) | - * | (strings) |<-| | - * |-------------| | - * | tdata | | - * | |<----------------| - * | (length) | - * | (null addr) | - * |-------------| - * |first | - * |bytestream | - * |group | - * |-------------| - * |second | - * |bytestream | - * |group | - * |-------------| - * | . . . | - * |-------------| - * - * For the prototype we just dump the stuff out so we can see the - * functions work. - */ - - if (! bytestream_transport_initialized) { -#ifdef UNISTAT_TRACE - cmn_err(CE_WARN, "!spcs_s_asynch_status exit 1"); -#endif - return (SPCS_S_ERROR); - } - - cmn_err(CE_NOTE, "!SPCS Unistat Asynchronous Status Dump"); - cmn_err(CE_NOTE, "!This is a test fixture waiting for a pipe or"); - cmn_err(CE_NOTE, "!shared memory"); - - /* - * I'd like nothing more than to code up a really cool pipe or mmap'd - * shared memory scheme to shovel this stuff up to a daemon that feeds - * Java events out to listener threads belonging to both management - * software, coresw product code and developer code. As it is I just - * have time to spew stuff out via cmn_err. Have to make believe this - * is an alternative to cmn_err and not just another dang client! - */ - - i = 0; - - while (i < p->icount) { - - /* - * can't access the status text or anything else proper and - * pretty from here in the kernel, have to just dump it. Put - * the status codes out as decimal to make them look as weird - * as possible so we see that the point of this is not for - * anybody to actually pay attention to them but to use this - * as a means of testing the rest of the prototype and - * suggesting potental functionality. We also put the oldest - * stuff out first, backwards from ioctl status. That's - * because there are only minutes to implement this and the - * point is to see the potential, etc. - */ - - suppcount = p->idata[i].f.sup_count; - - cmn_err(CE_NOTE, "!Status item %d value %x supplements %d", - i, p->idata[i].s, suppcount); - i++; - - for (s = 0; s < suppcount; s++) { - if (p->idata[i+s].su.type == SU_STRING) - cmn_err(CE_NOTE, - "!Supplement %d string value: %s", s, - (char *)(p->sdata + - p->idata[i+s].su.offset)); - else { - cmn_err(CE_NOTE, - "!Supplement %d bytestream dump:", s); - cmn_err(CE_NOTE, "!offset data"); - bp = p->tdata[p->idata[i+s].su.offset].u_p.data; - /* The SunSoft mandated 8 character tabstops */ - /* really BITE MY BUTT */ - for (b = 0; - b < p->tdata[p->idata[i+s].su.offset].size; - b++) - cmn_err(CE_NOTE, "!%6d %2x", - b, *bp++); - } - } - - i += suppcount; - } - -#ifdef UNISTAT_TRACE - cmn_err(CE_WARN, "!spcs_s_asynch_status exit 2"); -#endif - return (SPCS_S_OK); -} diff --git a/usr/src/uts/common/avs/ns/unistat/spcs_s_k.h b/usr/src/uts/common/avs/ns/unistat/spcs_s_k.h deleted file mode 100644 index 852a072abe..0000000000 --- a/usr/src/uts/common/avs/ns/unistat/spcs_s_k.h +++ /dev/null @@ -1,252 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _SPCS_S_K_H -#define _SPCS_S_K_H - -#ifdef __cplusplus -extern "C" { -#endif - -/* - * Public SPCS uniform status details - */ - -/* - * KERNEL level status support utilities - */ - - -/* - * Create and initialize local status. Call this at entry to topmost - * operation (e.g. the start of ioctl service) - * @return The allocated and initialized status info or NULL if no memory - * available - */ -spcs_s_info_t -spcs_s_kcreate(); - -/* - * Initialize existing status. Call this at entry to topmost operation - * (e.g. the start of ioctl service) - * @param kstatus The status info. - */ -void -spcs_s_kinit(spcs_s_info_t kstatus); - -/* - * Add a status code and optional support information to status - * @param kstatus The status info pointer - * @param stcode The status code to be added (.e.g. DSW_EEMPTY) - * <BR>Supplemental value parameters may be supplied as needed. There - * should be one supplemental info parameter corresponding - * to each edit specification (e.g. %s) in the message text for a - * given code. - * <BR>If there is no additional room to insert everything the code - * SPCS_EOVERFLOW is inserted instead of stcode, possibly replacing an - * a previously inserted status code. - */ -void -spcs_s_add(spcs_s_info_t kstatus, spcs_s_status_t stcode, ...); - -/* - * Copy status info to userspace - * @param kstatus_a is The address of the local (kernel) status info - * @param ustatus The userspace status info - */ -void -spcs_s_copyout(spcs_s_info_t *kstatus_a, spcs_s_info_t ustatus); - -/* - * Copy status info to userspace and free status info storage - * @param kstatus_a is The address of the local (kernel) status info - * @param ustatus The userspace status info - */ -void -spcs_s_copyoutf(spcs_s_info_t *kstatus_a, spcs_s_info_t ustatus); - -/* - * Return the oldest status code from the status info or SPCS_S_OK if - * there is none. This is the status code that was inserted first (i.e. - * LIFO). - * @param kstatus The local (kernel level) status info - * @return The oldest status code value - */ - -spcs_s_status_t -spcs_s_oldest_status(spcs_s_info_t kstatus); - -/* - * Return the youngest status code from the status info or SPCS_S_OK if - * there is none. This is the status code that was inserted last (i.e. - * LIFO). - * @param kstatus The local (kernel level) status info - * @return The youngest status code value - */ - -spcs_s_status_t -spcs_s_youngest_status(spcs_s_info_t kstatus); - -/* - * Copy status info to userspace and provide return value. - * <BR>This is a one-step means of returning from a kernel function. It is - * identical to spcs_s_fcopyout except that the kernel status storage is - * not released. - * @param kstatus_a The address of the local kernel status info. - * @param ustatus The user status info - * @param stcode A status code. If the status code is NULL it is ignored. - * <BR>Supplemental value parameters may be supplied as needed. There - * should be one supplemental info parameter corresponding - * to each edit specification (e.g. %s) in the message text for a - * given code. - * <BR>If there is no additional room to insert everything the code - * SPCS_EOVERFLOW is inserted instead of stcode, possibly replacing an - * a previously inserted status code. - * @return If stcode is NULL and there is no status info present, - * SPCS_S_OK, else SPCS_S_ERROR. - */ -spcs_s_status_t -spcs_s_ocopyout(spcs_s_info_t *kstatus_a, - spcs_s_info_t ustatus, spcs_s_status_t stcode, ...); - -/* - * Copy status info to userspace, free it and provide a return value - * <BR>This is a one-step means of returning from a kernel function. It is - * identical to spcs_s_fcopyout except that the kernel status storage is - * released. - * <BR>Return a value to use as a function result (SPCS_S_OK or ERROR) - * <BR>This is a one-step means of returning from an operation. It is - * identical to spcs_s_copyout except that the kernel status information - * storage is released. - * @param kstatus_a The address of the local kernel status info. - * @param ustatus The user status info - * @param stcode A status code. If the status code is NULL it is ignored. - * @param stcode A status code. If the status code is NULL it is ignored. - * <BR>Supplemental value parameters may be supplied as needed. There - * should be one supplemental info parameter corresponding - * to each edit specification (e.g. %s) in the message text for a - * <BR>If there is no additional room to insert everything the code - * SPCS_EOVERFLOW is inserted instead of stcode, possibly replacing an - * a previously inserted status code. - * @return If stcode is NULL and there is no status info present, - * SPCS_S_OK, else SPCS_S_ERROR. - */ -spcs_s_status_t -spcs_s_ocopyoutf(spcs_s_info_t *kstatus_a, - spcs_s_info_t ustatus, spcs_s_status_t stcode, ...); - -/* - * Release (free) status storage. - * @param status The status information to release (kmem_free) - */ -void -spcs_s_kfree(spcs_s_info_t status); - -/* - * Test a status code and return true if it is a Solaris error code - * @return B_TRUE if the code is a Solaris code (module == 0), else - * B_FALSE - */ -boolean_t -spcs_s_is_solaris(spcs_s_status_t error); - -/* - * - * Edit an value into a decimal or hexidecimal string. - * Note that if multiple calls to this function are used to develop the - * parameters for spcs_s_add() the character arrays must be distinct. - * @param val The value to edit - * @param buf Pointer to the start of a char array for conversion - * @param buflen The size of the char array (minimum 2) - * @param hex If nonzero "0x" is prepended to generated string and - * it is edited as hexidecimal. - * @return The numeric string or "***" if an error is detected - */ - -char * -spcs_s_inttostring(int val, char *buf, int buflen, int hex); - -/* - * Initialize the bytestream mechanism. - * - * This function initializes the Unistat mechanism for transporting - * status information with or without bytestream data to userspace. - * - * @return SPCS_S_OK for normal completion, SPCS_S_ERROR otherwise - * - * Specification TBD. Not in 10/22 commitment - */ - -int -spcs_s_start_bytestream(); - -/* - * Stop (shut off) the bytestream mechanism. - * - * This function terminates the Unistat mechanism for transporting - * status information with or without bytestream data to userspace. - * - * @return SPCS_S_OK for normal completion, SPCS_S_ERROR otherwise - * - * Specification TBD. Not in 10/22 commitment - */ - -int -spcs_s_stop_bytestream(); - -/* - * Add a status code and the address and length of arbitrary binary - * data to be held (possibly with other status) for later transmission to - * userspace via a pipe facility (i.e. NOT via ioctl return). This is a - * means of getting arbitrary information with or without other status - * info shipped out as an alternative to cmn_err and/or trace file - * mechanisms. - * @param kstatus The status info pointer - * @param stcode The status code to annotate the data - * @param data The starting address of the data - * @param size The byte length of the data - * Specification TBD. Not in the 10/22/98 unistat commitment - */ - -void -spcs_s_add_bytestream(spcs_s_info_t kstatus, spcs_s_status_t stcode, - spcs_s_bytestream_ptr_t data, int size); - -/* - * Asynchronously output unistat info and possibly bytestreams to - * userspace. The bytestream mechanism must have been initialized. - * @param kstatus The status info pointer - * @return SPCS_S_OK for normal completion, SPCS_S_ERROR otherwise - * Specification TBD. Not in the 10/22/98 unistat commitment - */ - -int -spcs_s_asynch_status(spcs_s_info_t kstatus); - -#ifdef __cplusplus -} -#endif - -#endif /* _SPCS_S_K_H */ diff --git a/usr/src/uts/common/avs/ns/unistat/spcs_s_u.h b/usr/src/uts/common/avs/ns/unistat/spcs_s_u.h deleted file mode 100644 index 84dcbc1124..0000000000 --- a/usr/src/uts/common/avs/ns/unistat/spcs_s_u.h +++ /dev/null @@ -1,117 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _SPCS_S_U_H -#define _SPCS_S_U_H - -#ifdef __cplusplus -extern "C" { -#endif - -/* - * USER level status support utilities - */ - -#include <stdio.h> - -/* - * Create and initialize local status. Call this prior to invoking - * an ioctl - * @return The status or NULL if malloc failed - */ - -spcs_s_info_t -spcs_s_ucreate(); - -/* - * Initialize ioctl status storage to "remove" any status present - * @param ustatus The status - */ - -void -spcs_s_uinit(spcs_s_info_t ustatus); - -/* - * Return a string with the module label and next status message text or - * NULL if none left. Supplemental values are edited into the text and - * the used status and values are removed so that subsequent calls will - * access the next piece of information. - * Note that status codes and supplemental values are processed in - * the reverse order of their insertion by SPCS kernel code. That is, - * spcs_s_string returns the "youngest" status information first (i.e. - * LIFO). - * Note that spcs_s_string will not have any error information in - * the special case where Solaris has aborted an ioctl and returned an - * error code via errno or the ioctl service code had an "early" error - * from copyin or could not allocate its status area. In this case - * spcs_s_string will return NULL the first time it is called and a - * positive integer error code will be present in errno and should get - * handled by the spcs_s_string caller appropriately by using strerror. - * @param ustatus The status - * @param msg A char array of at least SPCS_S_MAXTEXT length - * @return status message string or NULL if no more status present - */ - -char *spcs_s_string(spcs_s_info_t ustatus, char *msg); - -/* - * Write status info to the file specified - * Uses spsc_s_string to edit status into strings and output them - * to the file specifed in the same order that the status was inserted. - * If there is no status present but errno contains a positive value - * then it will be treated as a Solaris error code and its message text - * will be written. Note that this routine does NOT remove status - * information so it can be called more than once. - * @param ustatus The status - * @param fd The file descriptor to use for output - */ - -void spcs_s_report(spcs_s_info_t ustatus, FILE *fd); - -/* - * Release (free) ioctl status storage. - * Note that this interface is an extension to SPARC 1998/038 10/22/98 - * commitment. - * @param ustatus_a The address of the status (set to NULL) - */ - -void -spcs_s_ufree(spcs_s_info_t *ustatus_a); - -/* - * Write message to log file. - * @param product Product code for tagging in log file. - * @param ustatus The status - may be NULL. - * @param format printf style format. - */ - -void -spcs_log(const char *product, spcs_s_info_t *ustatus, const char *format, ...); - -#ifdef __cplusplus -} -#endif - -#endif /* _SPCS_S_U_H */ diff --git a/usr/src/uts/common/avs/ns/unistat/spuni.c b/usr/src/uts/common/avs/ns/unistat/spuni.c deleted file mode 100644 index 6a13b72702..0000000000 --- a/usr/src/uts/common/avs/ns/unistat/spuni.c +++ /dev/null @@ -1,81 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#define _SPUNI_ - -#include <sys/types.h> -#include <sys/ksynch.h> -#include <sys/kmem.h> -#include <sys/errno.h> -#include <sys/conf.h> -#include <sys/cmn_err.h> -#include <sys/modctl.h> -#include <sys/cred.h> -#include <sys/file.h> -#include <sys/ddi.h> -#include <sys/unistat/spcs_s.h> - -#ifdef DS_DDICT -#include <sys/nsctl/contract.h> -#endif - -#include <sys/nsctl/nsctl.h> -#include <sys/nsctl/nsvers.h> - -/* - * Module linkage. - */ - -static struct modlmisc spuni_modlmisc = { - &mod_miscops, /* Type of module */ - "nws:Unistat:" ISS_VERSION_STR -}; - -static struct modlinkage spuni_modlinkage = { - MODREV_1, - &spuni_modlmisc, - NULL -}; - -int -_init(void) -{ - return (mod_install(&spuni_modlinkage)); -} - -int -_fini(void) -{ - return (mod_remove(&spuni_modlinkage)); -} - -/* - * Solaris module info code - */ -int -_info(struct modinfo *modinfop) -{ - return (mod_info(&spuni_modlinkage, modinfop)); -} |