summaryrefslogtreecommitdiff
path: root/usr/src/lib/lvm
diff options
context:
space:
mode:
authorstevel@tonic-gate <none@none>2005-06-14 00:00:00 -0700
committerstevel@tonic-gate <none@none>2005-06-14 00:00:00 -0700
commit7c478bd95313f5f23a4c958a745db2134aa03244 (patch)
treec871e58545497667cbb4b0a4f2daf204743e1fe7 /usr/src/lib/lvm
downloadillumos-joyent-7c478bd95313f5f23a4c958a745db2134aa03244.tar.gz
OpenSolaris Launch
Diffstat (limited to 'usr/src/lib/lvm')
-rw-r--r--usr/src/lib/lvm/Makefile66
-rw-r--r--usr/src/lib/lvm/Makefile.lvm39
-rw-r--r--usr/src/lib/lvm/Makefile.targ42
-rw-r--r--usr/src/lib/lvm/libmeta/Makefile78
-rw-r--r--usr/src/lib/lvm/libmeta/Makefile.com189
-rw-r--r--usr/src/lib/lvm/libmeta/common/hdrs/meta_lib_prv.h48
-rw-r--r--usr/src/lib/lvm/libmeta/common/hdrs/meta_repartition.h51
-rw-r--r--usr/src/lib/lvm/libmeta/common/hdrs/meta_set_prv.h96
-rw-r--r--usr/src/lib/lvm/libmeta/common/inc.flg29
-rw-r--r--usr/src/lib/lvm/libmeta/common/llib-lmeta33
-rw-r--r--usr/src/lib/lvm/libmeta/common/meta_admin.c133
-rw-r--r--usr/src/lib/lvm/libmeta/common/meta_attach.c120
-rw-r--r--usr/src/lib/lvm/libmeta/common/meta_check.c874
-rw-r--r--usr/src/lib/lvm/libmeta/common/meta_db.c2517
-rw-r--r--usr/src/lib/lvm/libmeta/common/meta_db_balance.c1215
-rw-r--r--usr/src/lib/lvm/libmeta/common/meta_devadm.c1607
-rw-r--r--usr/src/lib/lvm/libmeta/common/meta_devstamp.c127
-rw-r--r--usr/src/lib/lvm/libmeta/common/meta_error.c2309
-rw-r--r--usr/src/lib/lvm/libmeta/common/meta_getdevs.c592
-rw-r--r--usr/src/lib/lvm/libmeta/common/meta_hotspares.c1630
-rw-r--r--usr/src/lib/lvm/libmeta/common/meta_import.c2179
-rw-r--r--usr/src/lib/lvm/libmeta/common/meta_init.c453
-rw-r--r--usr/src/lib/lvm/libmeta/common/meta_lib_prv.c69
-rw-r--r--usr/src/lib/lvm/libmeta/common/meta_mdcf.c148
-rw-r--r--usr/src/lib/lvm/libmeta/common/meta_med.c851
-rw-r--r--usr/src/lib/lvm/libmeta/common/meta_med_err.c97
-rw-r--r--usr/src/lib/lvm/libmeta/common/meta_mem.c250
-rw-r--r--usr/src/lib/lvm/libmeta/common/meta_metad.c4082
-rw-r--r--usr/src/lib/lvm/libmeta/common/meta_metad_subr.c2055
-rw-r--r--usr/src/lib/lvm/libmeta/common/meta_mh.c842
-rw-r--r--usr/src/lib/lvm/libmeta/common/meta_mirror.c2762
-rw-r--r--usr/src/lib/lvm/libmeta/common/meta_mirror_resync.c658
-rw-r--r--usr/src/lib/lvm/libmeta/common/meta_mn_changelog.c636
-rw-r--r--usr/src/lib/lvm/libmeta/common/meta_mn_comm.c984
-rw-r--r--usr/src/lib/lvm/libmeta/common/meta_mn_handlers.c1957
-rw-r--r--usr/src/lib/lvm/libmeta/common/meta_mn_msg_table.c690
-rw-r--r--usr/src/lib/lvm/libmeta/common/meta_mn_subr.c922
-rw-r--r--usr/src/lib/lvm/libmeta/common/meta_mount.c97
-rw-r--r--usr/src/lib/lvm/libmeta/common/meta_name.c3289
-rw-r--r--usr/src/lib/lvm/libmeta/common/meta_nameinfo.c1267
-rw-r--r--usr/src/lib/lvm/libmeta/common/meta_namespace.c601
-rw-r--r--usr/src/lib/lvm/libmeta/common/meta_notify.c692
-rw-r--r--usr/src/lib/lvm/libmeta/common/meta_patch.c299
-rw-r--r--usr/src/lib/lvm/libmeta/common/meta_patch_root.c171
-rw-r--r--usr/src/lib/lvm/libmeta/common/meta_print.c439
-rw-r--r--usr/src/lib/lvm/libmeta/common/meta_raid.c2784
-rw-r--r--usr/src/lib/lvm/libmeta/common/meta_raid_resync.c130
-rw-r--r--usr/src/lib/lvm/libmeta/common/meta_rename.c539
-rw-r--r--usr/src/lib/lvm/libmeta/common/meta_repartition.c415
-rw-r--r--usr/src/lib/lvm/libmeta/common/meta_replace.c144
-rw-r--r--usr/src/lib/lvm/libmeta/common/meta_reset.c146
-rw-r--r--usr/src/lib/lvm/libmeta/common/meta_resync.c108
-rw-r--r--usr/src/lib/lvm/libmeta/common/meta_runtime.c301
-rw-r--r--usr/src/lib/lvm/libmeta/common/meta_se_notify.c399
-rw-r--r--usr/src/lib/lvm/libmeta/common/meta_set.c5918
-rw-r--r--usr/src/lib/lvm/libmeta/common/meta_set_drv.c1948
-rw-r--r--usr/src/lib/lvm/libmeta/common/meta_set_hst.c5688
-rw-r--r--usr/src/lib/lvm/libmeta/common/meta_set_med.c1253
-rw-r--r--usr/src/lib/lvm/libmeta/common/meta_set_prv.c818
-rw-r--r--usr/src/lib/lvm/libmeta/common/meta_set_tkr.c1079
-rw-r--r--usr/src/lib/lvm/libmeta/common/meta_setup.c897
-rw-r--r--usr/src/lib/lvm/libmeta/common/meta_smf.c351
-rw-r--r--usr/src/lib/lvm/libmeta/common/meta_sp.c6652
-rw-r--r--usr/src/lib/lvm/libmeta/common/meta_stat.c103
-rw-r--r--usr/src/lib/lvm/libmeta/common/meta_stripe.c2496
-rw-r--r--usr/src/lib/lvm/libmeta/common/meta_systemfile.c475
-rw-r--r--usr/src/lib/lvm/libmeta/common/meta_tab.c342
-rw-r--r--usr/src/lib/lvm/libmeta/common/meta_time.c53
-rw-r--r--usr/src/lib/lvm/libmeta/common/meta_trans.c1761
-rw-r--r--usr/src/lib/lvm/libmeta/common/meta_userflags.c98
-rw-r--r--usr/src/lib/lvm/libmeta/common/metad_svc_stubs.c825
-rw-r--r--usr/src/lib/lvm/libmeta/common/metagetroot.c121
-rw-r--r--usr/src/lib/lvm/libmeta/common/metarpcopen.c422
-rw-r--r--usr/src/lib/lvm/libmeta/common/metasplitname.c77
-rw-r--r--usr/src/lib/lvm/libmeta/common/sdssc_bind.c205
-rw-r--r--usr/src/lib/lvm/libmeta/i386/Makefile31
-rw-r--r--usr/src/lib/lvm/libmeta/sparc/Makefile31
-rw-r--r--usr/src/lib/lvm/libmeta/spec/Makefile29
-rw-r--r--usr/src/lib/lvm/libmeta/spec/Makefile.targ36
-rw-r--r--usr/src/lib/lvm/libmeta/spec/amd64/Makefile46
-rw-r--r--usr/src/lib/lvm/libmeta/spec/i386/Makefile47
-rw-r--r--usr/src/lib/lvm/libmeta/spec/meta.spec3699
-rw-r--r--usr/src/lib/lvm/libmeta/spec/sparc/Makefile47
-rw-r--r--usr/src/lib/lvm/libmeta/spec/sparcv9/Makefile47
-rw-r--r--usr/src/lib/lvm/libmeta/spec/versions31
-rw-r--r--usr/src/lib/lvm/libpreen/Makefile49
-rw-r--r--usr/src/lib/lvm/libpreen/Makefile.com50
-rw-r--r--usr/src/lib/lvm/libpreen/common/mdpreen.c335
-rw-r--r--usr/src/lib/lvm/libpreen/i386/Makefile31
-rw-r--r--usr/src/lib/lvm/libpreen/sparc/Makefile30
-rw-r--r--usr/src/lib/lvm/libpreen/spec/Makefile29
-rw-r--r--usr/src/lib/lvm/libpreen/spec/Makefile.targ36
-rw-r--r--usr/src/lib/lvm/libpreen/spec/amd64/Makefile46
-rw-r--r--usr/src/lib/lvm/libpreen/spec/i386/Makefile47
-rw-r--r--usr/src/lib/lvm/libpreen/spec/preen.spec31
-rw-r--r--usr/src/lib/lvm/libpreen/spec/sparc/Makefile47
-rw-r--r--usr/src/lib/lvm/libpreen/spec/sparcv9/Makefile47
-rw-r--r--usr/src/lib/lvm/libpreen/spec/versions31
-rw-r--r--usr/src/lib/lvm/libsvm/Makefile58
-rw-r--r--usr/src/lib/lvm/libsvm/Makefile.com64
-rw-r--r--usr/src/lib/lvm/libsvm/common/check_svm.c169
-rw-r--r--usr/src/lib/lvm/libsvm/common/debug.c76
-rw-r--r--usr/src/lib/lvm/libsvm/common/getdrvname.c90
-rw-r--r--usr/src/lib/lvm/libsvm/common/hdrs/libsvm.h70
-rw-r--r--usr/src/lib/lvm/libsvm/common/hdrs/svm.h95
-rw-r--r--usr/src/lib/lvm/libsvm/common/metaconf.c195
-rw-r--r--usr/src/lib/lvm/libsvm/common/metainterfaces.c490
-rw-r--r--usr/src/lib/lvm/libsvm/common/modops.c120
-rw-r--r--usr/src/lib/lvm/libsvm/common/start_svm.c284
-rw-r--r--usr/src/lib/lvm/libsvm/common/update_mdconf.c379
-rw-r--r--usr/src/lib/lvm/libsvm/i386/Makefile31
-rw-r--r--usr/src/lib/lvm/libsvm/sparc/Makefile31
-rw-r--r--usr/src/lib/lvm/libsvm/spec/Makefile28
-rw-r--r--usr/src/lib/lvm/libsvm/spec/Makefile.targ41
-rw-r--r--usr/src/lib/lvm/libsvm/spec/amd64/Makefile50
-rw-r--r--usr/src/lib/lvm/libsvm/spec/i386/Makefile53
-rw-r--r--usr/src/lib/lvm/libsvm/spec/sparc/Makefile50
-rw-r--r--usr/src/lib/lvm/libsvm/spec/sparcv9/Makefile50
-rw-r--r--usr/src/lib/lvm/libsvm/spec/svm.spec55
-rw-r--r--usr/src/lib/lvm/libsvm/spec/versions31
120 files changed, 82196 insertions, 0 deletions
diff --git a/usr/src/lib/lvm/Makefile b/usr/src/lib/lvm/Makefile
new file mode 100644
index 0000000000..15d40f66b7
--- /dev/null
+++ b/usr/src/lib/lvm/Makefile
@@ -0,0 +1,66 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License"). You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 1998-2002 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+include ../Makefile.lib
+
+SUBDIRS = libmeta .WAIT libpreen libsvm
+HDRSUBDIRS = libsvm
+DCSUBDIRS = libmeta
+MSGSUBDIRS = libmeta
+
+all := TARGET = all
+clean := TARGET = clean
+clobber := TARGET = clobber
+check := TARGET = check
+debug := TARGET = debug
+install := TARGET = install
+install_h := TARGET = install_h
+lint := TARGET = lint
+_dc := TARGET = _dc
+_msg := TARGET = _msg
+
+.KEEP_STATE:
+
+.PARALLEL: $(SUBDIRS)
+
+all clean clobber debug lint: $(SUBDIRS)
+
+install: $(ROOTDIRS) $(SUBDIRS)
+
+_dc: $(DCSUBDIRS)
+
+check install_h: $(HDRSUBDIRS)
+
+_msg: $(MSGSUBDIRS)
+
+$(SUBDIRS): FRC
+ @cd $@; pwd; $(MAKE) $(TARGET)
+
+FRC:
+
+include $(SRC)/lib/Makefile.targ
diff --git a/usr/src/lib/lvm/Makefile.lvm b/usr/src/lib/lvm/Makefile.lvm
new file mode 100644
index 0000000000..99a4d94727
--- /dev/null
+++ b/usr/src/lib/lvm/Makefile.lvm
@@ -0,0 +1,39 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License"). You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+include $(SRC)/lib/Makefile.lib
+
+sparc_C_PICFLAGS = -K PIC
+
+SRCDIR = ../common
+
+# base target directories
+ROOTDIRS = $(ROOT)/usr $(ROOTHDRDIR) $(ROOTLIBDIR)
+
+RPCGENFLAGS = -C -M
+CFLAGS += $(CCVERBOSE)
diff --git a/usr/src/lib/lvm/Makefile.targ b/usr/src/lib/lvm/Makefile.targ
new file mode 100644
index 0000000000..41665529c2
--- /dev/null
+++ b/usr/src/lib/lvm/Makefile.targ
@@ -0,0 +1,42 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License"). You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright (c) 1998-2001 by Sun Microsystems, Inc.
+# All rights reserved.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+include $(SRC)/lib/Makefile.targ
+
+# basic target directories
+$(ROOTDIRS):
+ $(INS.dir)
+
+debug := COPTFLAG = -g
+debug := COPTFLAG64 = -g
+debug := DYNFLAGS += -g
+
+cstyle:
+ cstyle -pP $(SRCS)
+
+lint: lintcheck
diff --git a/usr/src/lib/lvm/libmeta/Makefile b/usr/src/lib/lvm/libmeta/Makefile
new file mode 100644
index 0000000000..ab4fe9337c
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/Makefile
@@ -0,0 +1,78 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License"). You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2003 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+include $(SRC)/lib/Makefile.lib
+
+SUBDIRS = $(MACH)
+
+MSGSRCS :sh= echo */*.[ch]
+MSGFILES = $(MSGSRCS:%.c=%.i)
+POFILE = libmeta.po
+
+DCFILES = common/meta_print.po
+DCFILE = libmeta.dc
+
+install := TARGET= install
+clean := TARGET= clean
+clobber := TARGET= clobber
+lint := TARGET= lint
+test := TARGET= test
+debug := TARGET= debug
+
+CPPFLAGS += -I$(SRC)/lib/lvm/libmeta/common/hdrs
+
+.KEEP_STATE:
+
+all debug install: spec .WAIT $(SUBDIRS)
+
+clean: spec $(SUBDIRS)
+ $(RM) $(MSGFILES) $(DCFILES)
+
+clobber: spec $(SUBDIRS)
+ $(RM) $(POFILE) $(DCFILE)
+
+lint: $(SUBDIRS)
+
+$(DCFILE):= XGETFLAGS = -c TRANSLATION_NOTE_LC_TIME -t
+
+$(DCFILE): $(DCFILES)
+ $(CAT) $(DCFILES) > $(DCFILE)
+
+$(POFILE): $(MSGFILES)
+ $(BUILDPO.msgfiles)
+
+_msg: $(MSGDOMAINPOFILE)
+
+_dc: $(DCMSGDOMAINPOFILE)
+
+spec $(MACH): FRC
+ @cd $@; pwd; $(MAKE) $(TARGET)
+
+FRC:
+
+include $(SRC)/Makefile.msg.targ
diff --git a/usr/src/lib/lvm/libmeta/Makefile.com b/usr/src/lib/lvm/libmeta/Makefile.com
new file mode 100644
index 0000000000..ad747e2331
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/Makefile.com
@@ -0,0 +1,189 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License"). You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+LIBRARY= libmeta.a
+VERS= .1
+COMMON = $(SRC)/common/lvm
+
+CMN_OBJS = md_crc.o
+
+DERIVED_OBJS = \
+ mdiox_xdr.o \
+ meta_basic_xdr.o \
+ metad_clnt.o \
+ metad_xdr.o \
+ metamed_clnt.o \
+ metamed_xdr.o \
+ metamhd_clnt.o \
+ metamhd_xdr.o \
+ mdmn_commd_xdr.o \
+ mhdx_xdr.o
+
+LOCAL_OBJS= \
+ metad_svc_stubs.o \
+ meta_admin.o \
+ meta_attach.o \
+ meta_db.o \
+ meta_db_balance.o \
+ meta_devadm.o \
+ meta_devstamp.o \
+ meta_error.o \
+ meta_getdevs.o \
+ meta_hotspares.o \
+ meta_import.o \
+ meta_init.o \
+ meta_lib_prv.o \
+ meta_mdcf.o \
+ meta_med_err.o \
+ meta_mem.o \
+ meta_metad.o \
+ meta_metad_subr.o \
+ meta_med.o \
+ meta_mh.o \
+ meta_mirror.o \
+ meta_mirror_resync.o \
+ meta_mn_comm.o \
+ meta_mn_changelog.o \
+ meta_mn_handlers.o \
+ meta_mn_msg_table.o \
+ meta_mn_subr.o \
+ meta_mount.o \
+ meta_name.o \
+ meta_nameinfo.o \
+ meta_namespace.o \
+ meta_notify.o \
+ meta_se_notify.o \
+ meta_patch.o \
+ meta_patch_root.o \
+ meta_print.o \
+ meta_raid.o \
+ meta_raid_resync.o \
+ meta_rename.o \
+ meta_repartition.o \
+ meta_replace.o \
+ meta_reset.o \
+ meta_resync.o \
+ meta_runtime.o \
+ meta_set.o \
+ meta_set_drv.o \
+ meta_set_hst.o \
+ meta_set_med.o \
+ meta_set_prv.o \
+ meta_set_tkr.o \
+ meta_setup.o \
+ meta_smf.o \
+ meta_stat.o \
+ meta_sp.o \
+ meta_stripe.o \
+ meta_systemfile.o \
+ meta_tab.o \
+ meta_time.o \
+ meta_trans.o \
+ meta_userflags.o \
+ metarpcopen.o \
+ metasplitname.o \
+ metagetroot.o \
+ sdssc_bind.o
+
+SPC_OBJS= meta_check.o
+
+CMN_SRCS = $(CMN_OBJS:%.o=$(COMMON)/%.c)
+LOCAL_SRCS = $(LOCAL_OBJS:%.o=../common/%.c)
+DERIVED_SRCS = $(DERIVED_OBJS:%.o=%.c)
+SPC_SRCS = $(SPC_OBJS:%.o=../common/%.c)
+
+include ../../../Makefile.lib
+
+MAPDIR= $(SRC)/lib/lvm/libmeta/spec/$(TRANSMACH)
+SPECMAPFILE = $(MAPDIR)/mapfile
+OBJECTS64 = $(LOCAL_OBJS) $(DERIVED_OBJS) $(CMN_OBJS)
+OBJECTS = $(OBJECTS64) $(SPC_OBJS)
+
+include $(SRC)/lib/lvm/Makefile.lvm
+
+# install this library in the root filesystem
+include ../../../Makefile.rootfs
+
+LIBS = $(DYNLIB) $(LINTLIB)
+SRCS = $(CMN_SRCS) $(LOCAL_SRCS) $(DERIVED_SRCS)
+$(LINTLIB) := SRCS = $(SRCDIR)/$(LINTSRC)
+lint := SRCS = $(CMN_SRCS) $(LOCAL_SRCS) $(SPC_SRCS)
+CPPFLAGS += -I$(SRC)/lib/lvm/libmeta/common/hdrs
+LDLIBS += -lnsl -lc -ladm -ldevid -lgen -lefi -ldevinfo -lscf
+CLEANFILES += $(DERIVED_SRCS)
+
+.KEEP_STATE:
+
+BIG_TARGETS = $(OBJECTS64:%=pics/%)
+
+$(BIG_TARGETS) := CPPFLAGS += -D_LARGEFILE_SOURCE=1 -D_FILE_OFFSET_BITS=64
+
+$(LINTLIB) := CPPFLAGS += -D_LARGEFILE_SOURCE=1 -D_FILE_OFFSET_BITS=64
+
+all: $(LIBS)
+
+objs/%.o profs/%.o pics/%.o: $(COMMON)/%.c
+ $(COMPILE.c) -o $@ $<
+ $(POST_PROCESS_O)
+
+mdiox_xdr.c: $(SRC)/uts/common/sys/lvm/mdiox.x
+ $(RPCGEN) $(RPCGENFLAGS) -c -i 100 $(SRC)/uts/common/sys/lvm/mdiox.x | \
+ nawk '{sub(/uts\/common\/sys\/lvm/, "head"); print $$0}' >$@
+
+meta_basic_xdr.c: $(SRC)/uts/common/sys/lvm/meta_basic.x
+ $(RPCGEN) $(RPCGENFLAGS) -c $(SRC)/uts/common/sys/lvm/meta_basic.x | \
+ nawk '{sub(/uts\/common\/sys\/lvm/, "head"); print $$0}' >$@
+
+metad_clnt.c: $(SRC)/head/metad.x
+ $(RPCGEN) $(RPCGENFLAGS) -l $(SRC)/head/metad.x -o $@
+
+metad_xdr.c: $(SRC)/head/metad.x
+ $(RPCGEN) $(RPCGENFLAGS) -c $(SRC)/head/metad.x -o $@
+
+metamed_clnt.c: $(SRC)/uts/common/sys/lvm/metamed.x
+ $(RPCGEN) $(RPCGENFLAGS) -l $(SRC)/uts/common/sys/lvm/metamed.x | \
+ nawk '{sub(/uts\/common\/sys\/lvm/, "head"); print $$0}' >$@
+
+metamed_xdr.c: $(SRC)/uts/common/sys/lvm/metamed.x
+ $(RPCGEN) $(RPCGENFLAGS) -c $(SRC)/uts/common/sys/lvm/metamed.x | \
+ nawk '{sub(/uts\/common\/sys\/lvm/, "head"); print $$0}' >$@
+
+metamhd_clnt.c: $(SRC)/head/metamhd.x
+ $(RPCGEN) $(RPCGENFLAGS) -l $(SRC)/head/metamhd.x -o $@
+
+metamhd_xdr.c: $(SRC)/head/metamhd.x
+ $(RPCGEN) $(RPCGENFLAGS) -c $(SRC)/head/metamhd.x -o $@
+
+mhdx_xdr.c: $(SRC)/uts/common/sys/lvm/mhdx.x
+ $(RPCGEN) $(RPCGENFLAGS) -c $(SRC)/uts/common/sys/lvm/mhdx.x | \
+ nawk '{sub(/uts\/common\/sys\/lvm/, "head"); print $$0}' >$@
+
+mdmn_commd_xdr.c: $(SRC)/uts/common/sys/lvm/mdmn_commd.x
+ $(RPCGEN) -c $(SRC)/uts/common/sys/lvm/mdmn_commd.x -o $@
+
+include $(SRC)/lib/lvm/Makefile.targ
diff --git a/usr/src/lib/lvm/libmeta/common/hdrs/meta_lib_prv.h b/usr/src/lib/lvm/libmeta/common/hdrs/meta_lib_prv.h
new file mode 100644
index 0000000000..e35cd8c07e
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/hdrs/meta_lib_prv.h
@@ -0,0 +1,48 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 1992, 1993, 1994, 2000 by Sun Microsystems, Inc.
+ * All rights reserved.
+ */
+
+#ifndef _META_SET_COM_H
+#define _META_SET_COM_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <meta.h>
+#include <ctype.h>
+#include <sys/mnttab.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* meta_lib_prv.c */
+extern FILE *open_mnttab(void);
+extern int close_mnttab(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _META_SET_COM_H */
diff --git a/usr/src/lib/lvm/libmeta/common/hdrs/meta_repartition.h b/usr/src/lib/lvm/libmeta/common/hdrs/meta_repartition.h
new file mode 100644
index 0000000000..f5053acccd
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/hdrs/meta_repartition.h
@@ -0,0 +1,51 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2001, 2002 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _META_REPARTITION_H
+#define _META_REPARTITION_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <meta.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* meta_repartition_drive() option flags */
+#define MD_REPART_FORCE 0x01
+#define MD_REPART_LEAVE_REP 0x02
+#define MD_REPART_DONT_LABEL 0x04
+
+/* meta_repartition.c */
+extern int meta_repartition_drive(mdsetname_t *sp,
+ mddrivename_t *dnp, int options, mdvtoc_t *vtocp, md_error_t *ep);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _META_REPARTITION_H */
diff --git a/usr/src/lib/lvm/libmeta/common/hdrs/meta_set_prv.h b/usr/src/lib/lvm/libmeta/common/hdrs/meta_set_prv.h
new file mode 100644
index 0000000000..6f63b161e1
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/hdrs/meta_set_prv.h
@@ -0,0 +1,96 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _META_SET_COM_H
+#define _META_SET_COM_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <meta.h>
+#include <ctype.h>
+#include <sys/lvm/md_convert.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define RB_PREEMPT if (md_got_sig()) goto rollback
+#ifdef DEBUG
+#define RB_TEST(tstpt, tag, ep) if (rb_test(tstpt, tag, (ep)) < 0) \
+ goto rollback;
+#else /* !DEBUG */
+#define RB_TEST(tstpt, tag, ep)
+#endif /* DEBUG */
+
+/* meta_setup.c */
+extern int procsigs(int block, sigset_t *oldsigs, md_error_t *ep);
+
+#ifdef DEBUG
+extern int rb_test(int rbt_sel_tpt, char *rbt_sel_tag, md_error_t *ep);
+#endif /* DEBUG */
+
+/*
+ * Flag values used by the nodehasset() function.
+ */
+#define NHS_N_EQ 0x00000001 /* name == */
+#define NHS_NS_EQ 0x00000002 /* name, setno == */
+#define NHS_NST_EQ 0x00000004 /* name, setno, TS == */
+#define NHS_NSTG_EQ 0x00000008 /* name, setno, TS, genid == */
+#define NHS_NST_EQ_G_GT 0x00000010 /* name, setno, TS ==, genid > */
+
+/*
+ * Node, set, and mediator names can be any printable characters
+ * (isprint()) except for the characters in the #define that follows.
+ */
+#define INVALID_IN_NAMES " *?/"
+
+/* meta_set_prv.c */
+extern int checkdrive_onnode(mdsetname_t *sp, mddrivename_t *dnp,
+ char *node, md_error_t *ep);
+extern side_t getnodeside(char *node, md_set_desc *sd);
+extern int halt_set(mdsetname_t *sp, md_error_t *ep);
+extern md_drive_desc *metadrivedesc_append(md_drive_desc **dd,
+ mddrivename_t *dnp, int dbcnt, int dbsize,
+ md_timeval32_t timestamp, ulong_t genid,
+ uint_t flags);
+extern int nodehasset(mdsetname_t *sp, char *node,
+ uint_t match_flag, md_error_t *ep);
+extern int nodesuniq(mdsetname_t *sp, int cnt, char **strings,
+ md_error_t *ep);
+extern int own_set(mdsetname_t *sp, char **owner_of_set,
+ int forceflg, md_error_t *ep);
+extern void resync_genid(mdsetname_t *sp, md_set_desc *sd,
+ ulong_t max_genid, int node_c, char **node_v);
+extern int setup_db_bydd(mdsetname_t *sp, md_drive_desc *dd,
+ int force, md_error_t *ep);
+extern int snarf_set(mdsetname_t *sp, bool_t stale_bool,
+ md_error_t *ep);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _META_SET_COM_H */
diff --git a/usr/src/lib/lvm/libmeta/common/inc.flg b/usr/src/lib/lvm/libmeta/common/inc.flg
new file mode 100644
index 0000000000..58651e7f09
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/inc.flg
@@ -0,0 +1,29 @@
+#!/bin/sh
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License"). You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+#ident "%Z%%M% %I% %E% SMI"
+#
+# Copyright (c) 1995, 2000 by Sun Microsystems, Inc.
+# All rights reserved.
+
+echo_file usr/src/common/lvm/md_crc.c
diff --git a/usr/src/lib/lvm/libmeta/common/llib-lmeta b/usr/src/lib/lvm/libmeta/common/llib-lmeta
new file mode 100644
index 0000000000..747bc0f9ae
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/llib-lmeta
@@ -0,0 +1,33 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2001 by Sun Microsystems, Inc.
+ * All rights reserved.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/* LINTLIBRARY */
+/* PROTOLIB1 */
+
+#include <meta_lib_prv.h>
+#include <meta_set_prv.h>
diff --git a/usr/src/lib/lvm/libmeta/common/meta_admin.c b/usr/src/lib/lvm/libmeta/common/meta_admin.c
new file mode 100644
index 0000000000..9b3e13c10f
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_admin.c
@@ -0,0 +1,133 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 1992-1994, 2000-2002 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+
+/*
+ * miscellaneous utilities
+ */
+
+#include <meta.h>
+
+static int meta_fd = -1;
+static major_t meta_major;
+
+/*
+ * open administrative device
+ */
+int
+open_admin(
+ md_error_t *ep
+)
+{
+ struct stat buf;
+
+ /* if not already open */
+ if (meta_fd < 0) {
+ ulong_t dversion = 0;
+
+ /* try read/write fall back to readonly */
+ if ((meta_fd = open(ADMSPECIAL, O_RDWR, 0)) < 0) {
+ if (errno != EACCES)
+ return (mdsyserror(ep, errno, ADMSPECIAL));
+ if ((meta_fd = open(ADMSPECIAL, O_RDONLY, 0)) < 0)
+ return (mdsyserror(ep, errno, ADMSPECIAL));
+ }
+
+ /* get major */
+ if (fstat(meta_fd, &buf) != 0)
+ return (mdsyserror(ep, errno, ADMSPECIAL));
+ meta_major = major(buf.st_rdev);
+
+ /* check driver version */
+ if (metaioctl(MD_IOCGVERSION, &dversion, ep, NULL) != 0)
+ return (-1);
+ if (dversion != MD_DVERSION)
+ return (mderror(ep, MDE_DVERSION, NULL));
+ }
+
+ /* return fd */
+ return (meta_fd);
+}
+
+int
+close_admin(
+ md_error_t *ep
+)
+{
+ if (meta_fd >= 0) {
+ if (close(meta_fd) == -1)
+ return (mdsyserror(ep, errno, ADMSPECIAL));
+ meta_fd = -1;
+ }
+
+ return (0);
+}
+
+/*
+ * Returns True if the md_dev64_t passed in is a metadevice.
+ * Else it returns False.
+ */
+int
+meta_dev_ismeta(
+ md_dev64_t dev
+)
+{
+ int fd;
+ md_error_t status = mdnullerror;
+
+ fd = open_admin(&status);
+ assert(fd >= 0);
+ return (meta_getmajor(dev) == meta_major);
+}
+
+
+int
+meta_get_nunits(md_error_t *ep)
+{
+
+ static set_t max_nunits = 0;
+
+ if (max_nunits == 0)
+ if (metaioctl(MD_IOCGETNUNITS, &max_nunits, ep, NULL) != 0)
+ return (-1);
+
+ return (max_nunits);
+}
+
+md_dev64_t
+metamakedev(minor_t mnum)
+{
+ int fd;
+ md_error_t status = mdnullerror;
+
+ fd = open_admin(&status);
+
+ assert(fd >= 0);
+
+ return (((md_dev64_t)meta_major << NBITSMINOR64) | mnum);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_attach.c b/usr/src/lib/lvm/libmeta/common/meta_attach.c
new file mode 100644
index 0000000000..275640d927
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_attach.c
@@ -0,0 +1,120 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 1992-2002 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+
+/*
+ * attach operations
+ */
+
+#include <meta.h>
+
+/*
+ * grow generic device
+ */
+int
+meta_concat_generic(
+ mdsetname_t *sp,
+ mdname_t *namep,
+ u_longlong_t big_or_little,
+ md_error_t *ep
+)
+{
+ md_grow_params_t mgp;
+ char *miscname;
+
+ /* should have a set */
+ assert(sp != NULL);
+ assert(sp->setno == MD_MIN2SET(meta_getminor(namep->dev)));
+
+ /* get type */
+ if ((miscname = metagetmiscname(namep, ep)) == NULL)
+ return (-1);
+
+ /* grow device */
+ (void) memset(&mgp, 0, sizeof (mgp));
+ if (big_or_little == MD_64BIT_META_DEV)
+ mgp.options = MD_CRO_64BIT;
+ else
+ mgp.options = MD_CRO_32BIT;
+
+ mgp.mnum = meta_getminor(namep->dev);
+ MD_SETDRIVERNAME(&mgp, miscname, sp->setno);
+ if (metaioctl(MD_IOCGROW, &mgp, &mgp.mde, namep->cname) != 0)
+ return (mdstealerror(ep, &mgp.mde));
+
+ /* clear cache */
+ meta_invalidate_name(namep);
+
+ /* return success */
+ return (0);
+}
+
+/*
+ * grow the parent of a device
+ */
+int
+meta_concat_parent(
+ mdsetname_t *sp,
+ mdname_t *childnp,
+ md_error_t *ep
+)
+{
+ md_common_t *mdp;
+ mdname_t *parentnp;
+ md_unit_t *mup;
+
+ /* should have a set */
+ assert(sp != NULL);
+ assert(sp->setno == MD_MIN2SET(meta_getminor(childnp->dev)));
+
+ /* get parent */
+ if ((mdp = meta_get_unit(sp, childnp, ep)) == NULL)
+ return (-1);
+ if (! MD_HAS_PARENT(mdp->parent))
+ return (0);
+ if (mdp->parent == MD_MULTI_PARENT)
+ return (0);
+
+ /* single parent */
+ if ((parentnp = metamnumname(&sp, mdp->parent, 0, ep)) == NULL)
+ return (-1);
+ /* don't grow non-metadevices or soft partitions */
+ if (! metaismeta(parentnp) || meta_sp_issp(sp, parentnp, ep) == 0)
+ return (0);
+
+ if ((mup = meta_get_mdunit(sp, childnp, ep)) == NULL)
+ return (-1);
+
+ /* grow parent */
+ if (meta_concat_generic(sp, parentnp, mup->c.un_revision, ep) != 0)
+ return (-1);
+
+ /* recursively check for parents of parents */
+ return (meta_concat_parent(sp, parentnp, ep));
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_check.c b/usr/src/lib/lvm/libmeta/common/meta_check.c
new file mode 100644
index 0000000000..94c103e0aa
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_check.c
@@ -0,0 +1,874 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * Just in case we're not in a build environment, make sure that
+ * TEXT_DOMAIN gets set to something.
+ */
+#if !defined(TEXT_DOMAIN)
+#define TEXT_DOMAIN "SYS_TEST"
+#endif
+
+/*
+ * check componets
+ */
+
+#include <meta.h>
+#include "meta_lib_prv.h"
+
+#include <sys/mnttab.h>
+#include <sys/swap.h>
+
+#include "meta_lib_prv.h"
+#include <devid.h>
+#include <sys/dumpadm.h>
+
+/*
+ * static list(s)
+ */
+typedef struct dev_list {
+ char *dev_name;
+ ddi_devid_t devid;
+ struct dev_list *dev_nxt;
+} dev_list_t;
+
+static dev_list_t *devnamelist = NULL;
+
+/*
+ * free swap info
+ */
+static void
+free_swapinfo(
+ struct swaptable *swtp
+)
+{
+ int i;
+
+ if (swtp == NULL)
+ return;
+
+ for (i = 0; (i < swtp->swt_n); ++i) {
+ if (swtp->swt_ent[i].ste_path != NULL)
+ Free(swtp->swt_ent[i].ste_path);
+ }
+
+ Free(swtp);
+}
+
+/*
+ * get swap info
+ */
+static int
+get_swapinfo(
+ struct swaptable **swtpp,
+ int *nswap,
+ md_error_t *ep
+)
+{
+ int i;
+ size_t swtsize;
+
+ *swtpp = NULL;
+
+ /* get number of entries */
+ if ((*nswap = swapctl(SC_GETNSWP, NULL)) < 0) {
+ return (mdsyserror(ep, errno, "swapctl(SC_GETNSWP)"));
+ }
+
+ /* allocate structure */
+ swtsize = sizeof ((*swtpp)->swt_n) +
+ ((*nswap) * sizeof ((*swtpp)->swt_ent[0]));
+ *swtpp = (struct swaptable *)Zalloc(swtsize);
+ (*swtpp)->swt_n = *nswap;
+ for (i = 0; (i < (*nswap)); ++i)
+ (*swtpp)->swt_ent[i].ste_path = Zalloc(MAXPATHLEN);
+
+ /* get info */
+ if (((*nswap) = swapctl(SC_LIST, (*swtpp))) < 0) {
+ (void) mdsyserror(ep, errno, "swapctl(SC_LIST)");
+ free_swapinfo(*swtpp);
+ return (-1);
+ }
+
+ /* return success */
+ return (0);
+}
+
+/*
+ * check whether device is swapped on
+ */
+static int
+meta_check_swapped(
+ mdsetname_t *sp,
+ mdname_t *np,
+ md_error_t *ep
+)
+{
+ struct swaptable *swtp;
+ int nswap;
+ int i;
+ int rval = 0;
+
+ /* should have a set */
+ assert(sp != NULL);
+
+ /* get swap info */
+ if (get_swapinfo(&swtp, &nswap, ep) != 0)
+ return (-1);
+
+ /* look for match */
+ for (i = 0; ((i < nswap) && (rval == 0)); ++i) {
+ mdname_t *snp;
+
+ if ((snp = metaname(&sp, swtp->swt_ent[i].ste_path,
+ ep)) == NULL) {
+ mdclrerror(ep);
+ continue;
+ }
+ if (np->dev == snp->dev) {
+ rval = mddeverror(ep, MDE_IS_SWAPPED,
+ np->dev, np->cname);
+ } else { /* not swap - does it overlap */
+ rval = meta_check_overlap(snp->cname, np, 0, -1,
+ snp, 0, -1, ep);
+ if (rval != 0) {
+ (void) mdoverlaperror(ep, MDE_OVERLAP_SWAP,
+ np->cname, NULL, snp->cname);
+ }
+ }
+ }
+ free_swapinfo(swtp);
+
+ /* return success */
+ return (rval);
+}
+
+/*
+ * Is a driver currently swapped on?
+ */
+int
+meta_check_driveswapped(
+ mdsetname_t *sp,
+ mddrivename_t *dnp,
+ md_error_t *ep
+)
+{
+ struct swaptable *swtp;
+ int nswap;
+ int i;
+ int rval = 0;
+
+ /* should have a set */
+ assert(sp != NULL);
+
+ /* get swap info */
+ if (get_swapinfo(&swtp, &nswap, ep) != 0)
+ return (-1);
+
+ /* look for match */
+ for (i = 0; (i < nswap); ++i) {
+ mdname_t *snp;
+
+ if ((snp = metaname(&sp, swtp->swt_ent[i].ste_path,
+ ep)) == NULL) {
+ mdclrerror(ep);
+ continue;
+ }
+
+ if (strcmp(dnp->cname, snp->drivenamep->cname) == 0) {
+ rval = mddeverror(ep, MDE_IS_SWAPPED, NODEV64,
+ dnp->cname);
+ }
+ }
+ free_swapinfo(swtp);
+
+ /* return success */
+ return (rval);
+}
+
+/*
+ * check whether device is a dump device
+ */
+static int
+meta_check_dump(
+ mdsetname_t *sp,
+ mdname_t *np,
+ md_error_t *ep
+)
+{
+ int rval = 0;
+ int dump_fd;
+ char device[MAXPATHLEN];
+
+
+ if ((dump_fd = open("/dev/dump", O_RDONLY)) < 0)
+ return (mdsyserror(ep, errno, "/dev/dump"));
+
+ if (ioctl(dump_fd, DIOCGETDEV, device) != -1) {
+ mdname_t *dump_np;
+
+ if ((dump_np = metaname(&sp, device, ep)) == NULL) {
+ mdclrerror(ep);
+ (void) close(dump_fd);
+ return (0);
+ }
+
+ if (np->dev == dump_np->dev) {
+ rval = mddeverror(ep, MDE_IS_DUMP,
+ np->dev, np->cname);
+ } else { /* not a dump device - but does it overlap? */
+ rval = meta_check_overlap(dump_np->cname, np, 0, -1,
+ dump_np, 0, -1, ep);
+ if (rval != 0) {
+ (void) mdoverlaperror(ep, MDE_OVERLAP_DUMP,
+ np->cname, NULL, dump_np->cname);
+ }
+ }
+ }
+ (void) close(dump_fd);
+ return (rval);
+}
+
+/*
+ * check whether device is mounted
+ */
+static int
+meta_check_mounted(
+ mdsetname_t *sp,
+ mdname_t *np,
+ md_error_t *ep
+)
+{
+ FILE *mfp;
+ struct mnttab m;
+ int rval = 0;
+ char mountp[MNT_LINE_MAX];
+ char mnt_special[MNT_LINE_MAX];
+
+ /* should have a set */
+ assert(sp != NULL);
+
+ /* look in mnttab */
+ if ((mfp = open_mnttab()) == NULL)
+ return (mdsyserror(ep, errno, MNTTAB));
+ while ((getmntent(mfp, &m) == 0) && (rval == 0)) {
+ mdname_t *mnp;
+
+ if ((m.mnt_special == NULL) || (m.mnt_mountp == NULL))
+ continue;
+
+ if (m.mnt_mountp[0] != '/')
+ continue;
+
+ if ((strcmp(m.mnt_fstype, "nfs") == 0) ||
+ (strcmp(m.mnt_fstype, "autofs") == 0) ||
+ (strcmp(m.mnt_fstype, "proc") == 0) ||
+ (strcmp(m.mnt_fstype, "tmpfs") == 0) ||
+ (strcmp(m.mnt_fstype, "cachefs") == 0) ||
+ (strcmp(m.mnt_fstype, "lofs") == 0) ||
+ (strcmp(m.mnt_fstype, "rfs") == 0) ||
+ (strcmp(m.mnt_fstype, "fd") == 0) ||
+ (strcmp(m.mnt_fstype, "mntfs") == 0) ||
+ (strcmp(m.mnt_fstype, "devfs") == 0))
+ continue;
+
+ (void) strcpy(mountp, m.mnt_mountp);
+ (void) strcpy(mnt_special, m.mnt_special);
+
+ if ((mnp = metaname(&sp, mnt_special, ep)) == NULL) {
+ mdclrerror(ep);
+ continue;
+ }
+
+ if (np->dev == mnp->dev) {
+ rval = mduseerror(ep, MDE_IS_MOUNTED,
+ np->dev, mountp, np->cname);
+ } else { /* device isn't in mnttab - does it overlap? */
+ rval = meta_check_overlap(mnp->cname, np, 0, -1,
+ mnp, 0, -1, ep);
+ if (rval != 0) {
+ (void) mdoverlaperror(ep, MDE_OVERLAP_MOUNTED,
+ np->cname, mountp, mnp->cname);
+ }
+ }
+ }
+
+ /* return success */
+ return (rval);
+}
+
+
+/*
+ * Is a file system currently mounted on this disk drive?
+ */
+int
+meta_check_drivemounted(
+ mdsetname_t *sp,
+ mddrivename_t *dnp,
+ md_error_t *ep
+)
+{
+ FILE *mfp;
+ struct mnttab m;
+ int rval = 0;
+ char mountp[MNT_LINE_MAX];
+ char mnt_special[MNT_LINE_MAX];
+
+ /* should have a set */
+ assert(sp != NULL);
+
+ /* look in mnttab */
+ if ((mfp = open_mnttab()) == NULL)
+ return (mdsyserror(ep, errno, MNTTAB));
+ while ((getmntent(mfp, &m) == 0) && (rval == 0)) {
+ mdname_t *mnp;
+
+ if ((m.mnt_special == NULL) || (m.mnt_mountp == NULL))
+ continue;
+
+ if (m.mnt_mountp[0] != '/')
+ continue;
+
+ if ((strcmp(m.mnt_fstype, "nfs") == 0) ||
+ (strcmp(m.mnt_fstype, "autofs") == 0) ||
+ (strcmp(m.mnt_fstype, "proc") == 0) ||
+ (strcmp(m.mnt_fstype, "tmpfs") == 0) ||
+ (strcmp(m.mnt_fstype, "cachefs") == 0) ||
+ (strcmp(m.mnt_fstype, "lofs") == 0) ||
+ (strcmp(m.mnt_fstype, "rfs") == 0) ||
+ (strcmp(m.mnt_fstype, "fd") == 0))
+ continue;
+
+ (void) strcpy(mountp, m.mnt_mountp);
+ (void) strcpy(mnt_special, m.mnt_special);
+ if ((mnp = metaname(&sp, mnt_special, ep)) == NULL) {
+ mdclrerror(ep);
+ continue;
+ }
+ if (strcmp(dnp->cname, mnp->drivenamep->cname) == 0) {
+ rval = mduseerror(ep, MDE_IS_MOUNTED, NODEV64,
+ mountp, dnp->cname);
+ }
+ }
+
+ /* return success */
+ return (rval);
+}
+
+/*
+ * Check to see if the specified name is already in use or overlaps
+ * with a device already in use. Checks are made to determine whether
+ * the device is mounted, is a swap device, or a dump device. In each
+ * case if the device is not in use then an overlap check is done to ensure
+ * that the specified slice does not overlap.
+ */
+int
+meta_check_inuse(
+ mdsetname_t *sp,
+ mdname_t *np,
+ mdinuseopts_t inuse_flags,
+ md_error_t *ep
+)
+{
+ int rval = 0;
+
+ if ((inuse_flags & MDCHK_MOUNTED) &&
+ (rval = meta_check_mounted(sp, np, ep)) != 0)
+ return (rval);
+
+ if ((inuse_flags & MDCHK_SWAP) &&
+ (rval = meta_check_swapped(sp, np, ep)) != 0)
+ return (rval);
+
+ if ((inuse_flags & MDCHK_DUMP) &&
+ (rval = meta_check_dump(sp, np, ep)) != 0)
+ return (rval);
+
+ return (rval);
+}
+
+int
+meta_check_driveinset(mdsetname_t *sp, mddrivename_t *dn, md_error_t *ep)
+{
+ set_t setno;
+ set_t max_sets;
+
+ if ((max_sets = get_max_sets(ep)) == 0)
+ return (-1);
+
+ for (setno = 1; setno < max_sets; setno++) {
+ mdsetname_t *sp1;
+ int is_it;
+
+ if (setno == sp->setno)
+ continue;
+
+ if ((sp1 = metasetnosetname(setno, ep)) == NULL) {
+ if (mdismddberror(ep, MDE_DB_NODB)) {
+ mdclrerror(ep);
+ return (0);
+ }
+ if (mdiserror(ep, MDE_NO_SET)) {
+ mdclrerror(ep);
+ continue;
+ }
+ return (-1);
+ }
+
+ metaflushsetname(sp1);
+
+ if ((is_it = meta_is_drive_in_thisset(sp1, dn, FALSE, ep))
+ == -1)
+ return (-1);
+
+ if (is_it)
+ return (mddserror(ep, MDE_DS_DRIVEINSET, sp->setno,
+ sp1->setname, dn->cname, sp->setname));
+ }
+
+ return (0);
+}
+
+/*
+ * Add a device/device id tuple to the devname cache
+ */
+static void
+add_to_devname_list(
+ char *device_name, /* fully qualified dev name */
+ ddi_devid_t devid /* device id */
+)
+{
+ dev_list_t *dnlp;
+
+ dnlp = Zalloc(sizeof (*dnlp));
+ dnlp->dev_name = Strdup(device_name);
+ dnlp->devid = devid;
+
+ /* link the node into the devname list */
+ dnlp->dev_nxt = devnamelist;
+ devnamelist = dnlp;
+}
+
+/*
+ * check for same drive
+ */
+int
+meta_check_samedrive(
+ mdname_t *np1, /* first comp */
+ mdname_t *np2, /* second comp */
+ md_error_t *ep
+)
+{
+
+ mdcinfo_t *cinfop1, *cinfop2;
+ mdnmtype_t type1 = np1->drivenamep->type;
+ mdnmtype_t type2 = np2->drivenamep->type;
+ int l = 0;
+
+ char *name1 = NULL;
+ char *name2 = NULL;
+
+ int retval = -1;
+ int fd1 = -1;
+ int fd2 = -1;
+ int rc1 = -2, rc2 = -2;
+ uint_t strl1 = 0, strl2 = 0;
+ int devid1_found = 0;
+ int devid2_found = 0;
+
+ ddi_devid_t devid1 = NULL;
+ ddi_devid_t devid2 = NULL;
+ dev_list_t *dnlp = NULL;
+
+ assert(type1 != MDT_FAST_META && type1 != MDT_FAST_COMP);
+ assert(type2 != MDT_FAST_META && type2 != MDT_FAST_COMP);
+
+ /*
+ * The process of determining if 2 names are the same drive is
+ * as follows:
+ *
+ * Case 1 - The filenames are identical
+ *
+ * Case 2 - Either name is a metadevice name. If so then they
+ * are not the same drive.
+ *
+ * Case 3 - Both devices have a devid
+ * get and compare the devids for the devices. If both
+ * devices have a devid then the compare will is all
+ * that is needed we are done.
+ *
+ * Case 4 - One or more devices does not have a devid
+ * start by doing a simple compare of the name, if they
+ * are the same just return.
+ *
+ * If the names differ then keep going and see if the
+ * may be the same underlying devic. First check to
+ * see if the sd name is the same (old code).
+ *
+ * Then check the major and minor numbers to see if
+ * they are the same. If they are then return (old code).
+ *
+ * Next compare the raw name and the component name and
+ * if they are the same then return.
+ *
+ * All else has failed so use the component name (cname)
+ * component number and unit number. If they all are
+ * equal then call them the same drive.
+ *
+ */
+
+ if ((np1 == NULL) || (np2 == NULL))
+ return (0);
+
+ /* if the name structs are the same then the drives must be */
+ if (np1 == np2)
+ return (1);
+
+ name1 = np1->bname;
+ name2 = np2->bname;
+
+ if ((name1 == NULL) || ((strl1 = strlen(name1)) == 0) ||
+ (name2 == NULL) || ((strl2 = strlen(name2)) == 0))
+ return (0);
+
+ if ((strl1 == strl2) && (strcmp(name1, name2) == 0)) {
+ /* names are identical */
+ return (1);
+ }
+
+ if (is_metaname(name1) || is_metaname(name2))
+ return (0);
+
+ /*
+ * Check to see if the devicename is in the static list. If so,
+ * use its devid. Otherwise do the expensive operations
+ * of opening the device, getting the devid, and closing the
+ * device. Add the result into the static list.
+ *
+ * The case where this list will be useful is when there are soft
+ * partitions on multiple drives and a new soft partition is being
+ * created. In that situation the underlying physical device name
+ * for the new soft partition would be compared against each of the
+ * existing soft partititions. Without this static list that would
+ * involve 2 opens, closes, and devid gets for each existing soft
+ * partition
+ */
+ for (dnlp = devnamelist;
+ (dnlp != NULL) && !(devid1_found && devid2_found);
+ dnlp = dnlp->dev_nxt) {
+ if (!devid1_found && (strcmp(dnlp->dev_name, name1) == 0)) {
+ devid1_found = 1;
+ devid1 = dnlp->devid;
+ if (devid1 == NULL)
+ rc1 = 1;
+ else
+ rc1 = 0;
+ continue;
+ }
+ if (!devid2_found && (strcmp(dnlp->dev_name, name2) == 0)) {
+ devid2_found = 1;
+ devid2 = dnlp->devid;
+ if (devid2 == NULL)
+ rc2 = 1;
+ else
+ rc2 = 0;
+ continue;
+ }
+ }
+
+ /*
+ * Start by checking if the device has a device id, and if they
+ * are equal. If they are there is no question there is a match.
+ *
+ * The process here is open each disk, get the devid for each
+ * disk. If they both have a devid compare them and return
+ * the results.
+ */
+ if (!devid1_found) {
+ if ((fd1 = open(name1, O_RDONLY | O_NDELAY)) < 0) {
+ return (0);
+ }
+ rc1 = devid_get(fd1, &devid1);
+ (void) close(fd1);
+
+ /* add the name and devid to the cache */
+ add_to_devname_list(name1, devid1);
+ }
+
+ if (!devid2_found) {
+ if ((fd2 = open(name2, O_RDONLY | O_NDELAY)) < 0) {
+ return (0);
+ }
+ rc2 = devid_get(fd2, &devid2);
+ (void) close(fd2);
+
+ /* add the name and devid to the cache */
+ add_to_devname_list(name2, devid2);
+ }
+
+
+ if ((rc1 == 0) && (rc2 == 0)) {
+ if (devid_compare(devid1, devid2) == 0)
+ retval = 1; /* same drive */
+ else
+ retval = 0; /* different drives */
+
+ }
+
+ if (retval >= 0) {
+ return (retval);
+ }
+
+ /*
+ * At this point in time one of the two drives did not have a
+ * device ID. Do not make the assumption that is one drive
+ * did have a device id and the other did not that they are not
+ * the same. One drive could be covered by a device and still
+ * be the same drive. This is a general flaw in the system at
+ * this time.
+ */
+
+ /*
+ * The optimization can not happen if we are given an old style name
+ * in the form /dev/XXNN[a-h], since the name caches differently and
+ * allows overlaps to happen.
+ */
+ if (! ((sscanf(np1->bname, "/dev/%*[^0-9/]%*u%*[a-h]%n", &l) == 0 &&
+ l == strlen(np1->bname)) ||
+ (sscanf(np2->bname, "/dev/%*[^0-9/]%*u%*[a-h]%n", &l) == 0 &&
+ l == strlen(np2->bname))) &&
+ ((type1 == MDT_COMP) || (type1 == MDT_META)) &&
+ ((type2 == MDT_COMP) || (type2 == MDT_META)))
+ return (np1->drivenamep == np2->drivenamep);
+
+ /* check for same drive */
+ if (meta_getmajor(np1->dev) != meta_getmajor(np2->dev))
+ return (0); /* not same drive */
+
+ if (((cinfop1 = metagetcinfo(np1, ep)) == NULL) ||
+ ((cinfop2 = metagetcinfo(np2, ep)) == NULL)) {
+ if ((strcmp(np1->drivenamep->cname,
+ np2->drivenamep->cname) != 0) &&
+ (strcmp(np1->drivenamep->rname,
+ np2->drivenamep->rname) != 0)) {
+ mdclrerror(ep);
+ return (0); /* not same drive */
+ } else {
+ return (-1); /* can't tell */
+ }
+ } else if ((strncmp(cinfop1->cname, cinfop2->cname,
+ sizeof (cinfop1->cname)) != 0) ||
+ (cinfop1->cnum != cinfop2->cnum) ||
+ (cinfop1->unit != cinfop2->unit)) {
+ return (0); /* not same drive */
+ }
+
+ /* same drive */
+ return (1);
+}
+
+/*
+ * check for overlap
+ */
+int
+meta_check_overlap(
+ char *uname, /* user supplied name for errors */
+ mdname_t *np1, /* first comp */
+ diskaddr_t slblk1, /* first comp - start logical block */
+ diskaddr_t nblks1, /* first comp - # of blocks */
+ mdname_t *np2, /* second comp */
+ diskaddr_t slblk2, /* second comp - start logical block */
+ diskaddr_t nblks2, /* second comp - # of blocks */
+ md_error_t *ep
+)
+{
+ diskaddr_t sblk1, sblk2;
+ mdvtoc_t *vtocp1, *vtocp2;
+ uint_t partno1, partno2;
+ mdpart_t *partp1, *partp2;
+ int err;
+
+ /* verify args */
+ if (slblk1 == MD_DISKADDR_ERROR) {
+ assert(0);
+ return (mdsyserror(ep, EINVAL, np1->cname));
+ }
+ if (slblk2 == MD_DISKADDR_ERROR) {
+ assert(0);
+ return (mdsyserror(ep, EINVAL, np2->cname));
+ }
+
+ /* check for same drive */
+ if ((err = meta_check_samedrive(np1, np2, ep)) == 0) {
+ return (0); /* not same drive */
+ } else if (err < 0) {
+ return (-1); /* can't tell */
+ }
+
+ /* check for overlap */
+ if (((vtocp1 = metagetvtoc(np1, FALSE, &partno1, ep)) == NULL) ||
+ ((vtocp2 = metagetvtoc(np2, FALSE, &partno2, ep)) == NULL)) {
+ return (-1); /* can't tell */
+ }
+ partp1 = &vtocp1->parts[partno1];
+ partp2 = &vtocp2->parts[partno2];
+ sblk1 = partp1->start + slblk1;
+ if (nblks1 == -1)
+ nblks1 = partp1->size - slblk1;
+ sblk2 = partp2->start + slblk2;
+ if (nblks2 == -1)
+ nblks2 = partp2->size - slblk2;
+ if (((sblk1 >= sblk2) && (sblk1 < (sblk2 + nblks2))) ||
+ ((sblk2 >= sblk1) && (sblk2 < (sblk1 + nblks1)))) {
+ if (np1->dev == np2->dev) { /* slice in use */
+ return (mduseerror(ep, MDE_ALREADY, np1->dev,
+ uname, np1->cname));
+ }
+ return (mduseerror(ep, /* slice overlaps */
+ MDE_OVERLAP, np1->dev, uname, np1->cname));
+ }
+
+ /* return success */
+ return (0); /* no overlap */
+}
+
+/*
+ * check to see if a device is in a metadevice
+ */
+int
+meta_check_inmeta(
+ mdsetname_t *sp,
+ mdname_t *np,
+ mdchkopts_t options,
+ diskaddr_t slblk,
+ diskaddr_t nblks,
+ md_error_t *ep
+)
+{
+ uint_t partno;
+
+ /* see if replica slice is ok, only applies to disks in sets */
+ if (! (options & MDCHK_ALLOW_REPSLICE) &&
+ ! metaislocalset(sp)) {
+ uint_t rep_slice;
+
+ if (metagetvtoc(np, FALSE, &partno, ep) == NULL)
+ return (-1);
+ if (meta_replicaslice(np->drivenamep, &rep_slice, ep)
+ != 0)
+ return (-1);
+ if (partno == rep_slice)
+ return (mddeverror(ep, MDE_REPCOMP_INVAL, np->dev,
+ np->cname));
+ }
+
+ /* check for databases */
+ if (meta_check_inreplica(sp, np, slblk, nblks, ep) != 0) {
+ if (mdisuseerror(ep, MDE_ALREADY)) {
+ if (options & MDCHK_ALLOW_MDDB) {
+ mdclrerror(ep);
+ } else {
+ return (mddeverror(ep, MDE_HAS_MDDB,
+ np->dev, np->cname));
+ }
+ } else {
+ return (-1);
+ }
+ }
+
+ /* check metadevices */
+ if (meta_check_instripe(sp, np, slblk, nblks, ep) != 0)
+ return (-1);
+ if (meta_check_inmirror(sp, np, slblk, nblks, ep) != 0)
+ return (-1);
+ if (meta_check_intrans(sp, np, options, slblk, nblks, ep) != 0)
+ return (-1);
+ if (meta_check_insp(sp, np, slblk, nblks, ep) != 0)
+ return (-1);
+ if (! (options & MDCHK_ALLOW_HS)) {
+ if (meta_check_inhsp(sp, np, slblk, nblks, ep) != 0)
+ return (-1);
+ }
+ if (meta_check_inraid(sp, np, slblk, nblks, ep) != 0)
+ return (-1);
+
+ /* return success */
+ return (0);
+}
+
+/*
+ * check to see if a device is in its set
+ */
+int
+meta_check_inset(
+ mdsetname_t *sp,
+ mdname_t *np,
+ md_error_t *ep
+)
+{
+ mdsetname_t *npsp;
+ int bypass_daemon = FALSE;
+
+
+ /* check devices set */
+ if (metaislocalset(sp))
+ bypass_daemon = TRUE;
+ if ((npsp = metagetset(np, bypass_daemon, ep)) == NULL) {
+ if ((! metaismeta(np)) &&
+ (metaislocalset(sp)) &&
+ (mdismddberror(ep, MDE_DB_NODB))) {
+ mdclrerror(ep);
+ npsp = sp;
+ } else {
+ return (-1);
+ }
+ }
+
+ /* check set */
+ if (metaissameset(sp, npsp))
+ return (0);
+
+ /* return appropriate error */
+ if (metaislocalset(sp))
+ return (mddeverror(ep, MDE_IN_SHARED_SET, np->dev, np->cname));
+ else
+ return (mddeverror(ep, MDE_NOT_IN_SET, np->dev, np->cname));
+}
+
+/*
+ * check to see if current user is root
+ */
+int
+meta_check_root(md_error_t *ep)
+{
+ if (geteuid() != 0) {
+ (void) mderror(ep, MDE_NOPERM, "");
+ return (-1);
+ }
+ return (0);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_db.c b/usr/src/lib/lvm/libmeta/common/meta_db.c
new file mode 100644
index 0000000000..e30eb58c06
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_db.c
@@ -0,0 +1,2517 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * Just in case we're not in a build environment, make sure that
+ * TEXT_DOMAIN gets set to something.
+ */
+#if !defined(TEXT_DOMAIN)
+#define TEXT_DOMAIN "SYS_TEST"
+#endif
+
+/*
+ * Metadevice database interfaces.
+ */
+
+#define MDDB
+
+#include <meta.h>
+#include <sys/lvm/md_mddb.h>
+#include <sys/lvm/md_crc.h>
+#include <sys/lvm/mdio.h>
+#include <string.h>
+#include <strings.h>
+#include <ctype.h>
+
+struct svm_daemon {
+ char *svmd_name;
+ char *svmd_kill_val;
+};
+
+struct svm_daemon svmd_kill_list[] = {
+ {"mdmonitord", "HUP"},
+ {"mddoors", "KILL"},
+ };
+
+#define DAEMON_COUNT (sizeof (svmd_kill_list)/ sizeof (struct svm_daemon))
+#define MDMONITORD "/usr/sbin/mdmonitord"
+
+extern int procsigs(int block, sigset_t *oldsigs, md_error_t *ep);
+
+/*
+ * meta_get_lb_inittime sends a request for the lb_inittime to the kernel
+ */
+md_timeval32_t
+meta_get_lb_inittime(
+ mdsetname_t *sp,
+ md_error_t *ep
+)
+{
+ mddb_config_t c;
+
+ (void) memset(&c, 0, sizeof (c));
+
+ /* Fill in setno, setname, and sideno */
+ c.c_setno = sp->setno;
+
+ if (metaioctl(MD_DB_LBINITTIME, &c, &c.c_mde, NULL) != 0) {
+ (void) mdstealerror(ep, &c.c_mde);
+ }
+
+ return (c.c_timestamp);
+}
+
+/*
+ * mkmasterblks writes out the master blocks of the mddb to the replica.
+ *
+ * In a MN diskset, this is called by the node that is adding this replica
+ * to the diskset.
+ */
+
+#define MDDB_VERIFY_SIZE 8192
+
+static int
+mkmasterblks(
+ mdsetname_t *sp,
+ mdname_t *np,
+ int fd,
+ daddr_t firstblk,
+ int dbsize,
+ md_timeval32_t inittime,
+ md_error_t *ep
+)
+{
+ int consecutive;
+ md_timeval32_t tp;
+ struct mddb_mb *mb;
+ char *buffer;
+ int iosize;
+ md_set_desc *sd;
+ int mn_set = 0;
+ daddr_t startblk;
+ int cnt;
+ ddi_devid_t devid;
+
+ if (! metaislocalset(sp)) {
+ if ((sd = metaget_setdesc(sp, ep)) == NULL)
+ return (-1);
+
+ if (MD_MNSET_DESC(sd)) {
+ mn_set = 1; /* Used later */
+ }
+ }
+
+ /*
+ * Loop to verify the entire mddb region on disk is read/writable.
+ * buffer is used to write/read in at most MDDB_VERIFY_SIZE block
+ * chunks.
+ *
+ * A side-effect of this loop is to zero out the entire mddb region
+ */
+ if ((buffer = Zalloc(MDDB_VERIFY_SIZE * DEV_BSIZE)) == NULL)
+ return (mdsyserror(ep, ENOMEM, np->rname));
+
+ startblk = firstblk;
+ for (cnt = dbsize; cnt > 0; cnt -= consecutive) {
+
+ if (cnt > MDDB_VERIFY_SIZE)
+ consecutive = MDDB_VERIFY_SIZE;
+ else
+ consecutive = cnt;
+
+ if (lseek(fd, (off_t)(startblk * DEV_BSIZE), SEEK_SET) < 0) {
+ Free(buffer);
+ return (mdsyserror(ep, errno, np->rname));
+ }
+
+ iosize = DEV_BSIZE * consecutive;
+ if (write(fd, buffer, iosize) != iosize) {
+ Free(buffer);
+ return (mdsyserror(ep, errno, np->rname));
+ }
+
+ if (lseek(fd, (off_t)(startblk * DEV_BSIZE), SEEK_SET) < 0) {
+ Free(buffer);
+ return (mdsyserror(ep, errno, np->rname));
+ }
+
+ if (read(fd, buffer, iosize) != iosize) {
+ Free(buffer);
+ return (mdsyserror(ep, errno, np->rname));
+ }
+
+ startblk += consecutive;
+ }
+
+ Free(buffer);
+ if ((mb = Zalloc(DEV_BSIZE)) == NULL)
+ return (mdsyserror(ep, ENOMEM, np->rname));
+
+ if (meta_gettimeofday(&tp) == -1) {
+ Free(mb);
+ return (mdsyserror(ep, errno, np->rname));
+ }
+
+ mb->mb_magic = MDDB_MAGIC_MB;
+ /*
+ * If a MN diskset, set master block revision for a MN set.
+ * Even though the master block structure is no different
+ * for a MN set, setting the revision field to a different
+ * number keeps any pre-MN_diskset code from accessing
+ * this diskset. It also allows for an early determination
+ * of a MN diskset when reading in from disk so that the
+ * proper size locator block and locator names structure
+ * can be read in thus saving time on diskset startup.
+ */
+ if (mn_set)
+ mb->mb_revision = MDDB_REV_MNMB;
+ else
+ mb->mb_revision = MDDB_REV_MB;
+ mb->mb_timestamp = tp;
+ mb->mb_setno = sp->setno;
+ mb->mb_blkcnt = dbsize - 1;
+ mb->mb_blkno = firstblk;
+ mb->mb_nextblk = 0;
+
+ mb->mb_blkmap.m_firstblk = firstblk + 1;
+ mb->mb_blkmap.m_consecutive = dbsize - 1;
+ if (! metaislocalset(sp)) {
+ mb->mb_setcreatetime = inittime;
+ }
+
+ /*
+ * We try to save the disks device ID into the remaining bytes in
+ * the master block. The saved devid is used to provide a mapping
+ * between this disk's devid and the devid stored into the master
+ * block. This allows the disk image to be self-identifying
+ * if it gets copied (e.g. SNDR, True Copy, etc.). This is used
+ * when we try to import these disks on the remote copied image.
+ * If we cannot save the disks device ID onto the master block that is
+ * ok. The disk is just not self-identifying and won't be importable
+ * in the remote copy scenario.
+ */
+ if (devid_get(fd, &devid) == 0) {
+ size_t len;
+
+ len = devid_sizeof(devid);
+ if (len <= DEV_BSIZE - sizeof (*mb)) {
+ /* there is enough space to store the devid */
+ mb->mb_devid_magic = MDDB_MAGIC_DE;
+ mb->mb_devid_len = len;
+ (void) memcpy(mb->mb_devid, devid, len);
+ }
+ devid_free(devid);
+ }
+
+ crcgen((uchar_t *)mb, (uint_t *)&mb->mb_checksum, (uint_t)DEV_BSIZE,
+ (crc_skip_t *)NULL);
+
+ if (lseek(fd, (off_t)(firstblk * DEV_BSIZE), SEEK_SET) < 0) {
+ Free(mb);
+ return (mdsyserror(ep, errno, np->rname));
+ }
+
+ if (write(fd, mb, DEV_BSIZE) != DEV_BSIZE) {
+ Free(mb);
+ return (mdsyserror(ep, errno, np->rname));
+ }
+
+ if (lseek(fd, (off_t)(firstblk * DEV_BSIZE), SEEK_SET) < 0) {
+ Free(mb);
+ return (mdsyserror(ep, errno, np->rname));
+ }
+
+ if (read(fd, mb, DEV_BSIZE) != DEV_BSIZE) {
+ Free(mb);
+ return (mdsyserror(ep, errno, np->rname));
+ }
+
+ if (crcchk((uchar_t *)mb, (uint_t *)&mb->mb_checksum,
+ (uint_t)DEV_BSIZE, (crc_skip_t *)NULL)) {
+ Free(mb);
+ return (mdmddberror(ep, MDE_NOTVERIFIED,
+ meta_getminor(np->dev), sp->setno, 0, np->rname));
+ }
+
+ Free(mb);
+ return (0);
+}
+
+void
+meta_mkdummymaster(
+ mdsetname_t *sp,
+ int fd,
+ daddr_t firstblk
+)
+{
+ md_timeval32_t tp;
+ struct mddb_mb *mb;
+ ddi_devid_t devid;
+ md_set_desc *sd;
+ md_error_t ep = mdnullerror;
+ md_timeval32_t inittime;
+
+ /*
+ * No dummy master blocks are written for a MN diskset since devids
+ * are not supported in MN disksets.
+ */
+ if (! metaislocalset(sp)) {
+ if ((sd = metaget_setdesc(sp, &ep)) == NULL)
+ return;
+
+ if (MD_MNSET_DESC(sd))
+ return;
+ }
+
+ if ((mb = Zalloc(DEV_BSIZE)) == NULL)
+ return;
+
+ mb->mb_magic = MDDB_MAGIC_DU;
+ mb->mb_revision = MDDB_REV_MB;
+ mb->mb_setno = sp->setno;
+ inittime = meta_get_lb_inittime(sp, &ep);
+ mb->mb_setcreatetime = inittime;
+
+ if (meta_gettimeofday(&tp) != -1)
+ mb->mb_timestamp = tp;
+
+ /*
+ * We try to save the disks device ID into the remaining bytes in
+ * the master block. This allows the disk image to be self-identifying
+ * if it gets copied (e.g. SNDR, True Copy, etc.). This is used
+ * when we try to import these disks on the remote copied image.
+ * If we cannot save the disks device ID onto the master block that is
+ * ok. The disk is just not self-identifying and won't be importable
+ * in the remote copy scenario.
+ */
+ if (devid_get(fd, &devid) == 0) {
+ int len;
+
+ len = devid_sizeof(devid);
+ if (len <= DEV_BSIZE - sizeof (*mb)) {
+ /* there is enough space to store the devid */
+ mb->mb_devid_magic = MDDB_MAGIC_DE;
+ mb->mb_devid_len = len;
+ (void) memcpy(mb->mb_devid, (char *)devid, len);
+ }
+ devid_free(devid);
+ }
+
+ crcgen((uchar_t *)mb, (uint_t *)&mb->mb_checksum, (uint_t)DEV_BSIZE,
+ (crc_skip_t *)NULL);
+
+ /*
+ * If any of these operations fail, we need to inform the
+ * user that the disk won't be self identifying. When support
+ * for importing remotely replicated disksets is added, we
+ * want to add the error messages here.
+ */
+ if (lseek(fd, (off_t)(firstblk * DEV_BSIZE), SEEK_SET) < 0)
+ goto out;
+
+ if (write(fd, mb, DEV_BSIZE) != DEV_BSIZE)
+ goto out;
+
+ if (lseek(fd, (off_t)(firstblk * DEV_BSIZE), SEEK_SET) < 0)
+ goto out;
+
+ if (read(fd, mb, DEV_BSIZE) != DEV_BSIZE)
+ goto out;
+
+ if (crcchk((uchar_t *)mb, (uint_t *)&mb->mb_checksum,
+ (uint_t)DEV_BSIZE, (crc_skip_t *)NULL))
+ goto out;
+
+out:
+ Free(mb);
+}
+
+static int
+buildconf(mdsetname_t *sp, md_error_t *ep)
+{
+ md_replicalist_t *rlp = NULL;
+ md_replicalist_t *rl;
+ FILE *cfp = NULL;
+ FILE *mfp = NULL;
+ struct stat sbuf;
+ int rval = 0;
+ int in_miniroot = 0;
+ char line[MDDB_BOOTLIST_MAX_LEN];
+ char *tname = NULL;
+
+ /* get list of local replicas */
+ if (! metaislocalset(sp))
+ return (0);
+
+ if (metareplicalist(sp, MD_BASICNAME_OK, &rlp, ep) < 0)
+ return (-1);
+
+ /* open tempfile, copy permissions of original file */
+ if ((cfp = fopen(META_DBCONFTMP, "w+")) == NULL) {
+ /*
+ * On the miniroot tmp files must be created in /var/tmp.
+ * If we get a EROFS error, we assume that we are in the
+ * miniroot.
+ */
+ if (errno != EROFS)
+ goto error;
+ in_miniroot = 1;
+ errno = 0;
+ tname = tempnam("/var/tmp", "slvm_");
+ if (tname == NULL && errno == EROFS) {
+ /*
+ * If we are booted on a read-only root because
+ * of mddb quorum problems we don't want to emit
+ * any scary error messages.
+ */
+ errno = 0;
+ goto out;
+ }
+
+ /* open tempfile, copy permissions of original file */
+ if ((cfp = fopen(tname, "w+")) == NULL)
+ goto error;
+ }
+ if (stat(META_DBCONF, &sbuf) == 0) {
+ if (fchmod(fileno(cfp), (sbuf.st_mode & 0666)) != 0)
+ goto error;
+ if (fchown(fileno(cfp), sbuf.st_uid, sbuf.st_gid) != 0)
+ goto error;
+ }
+
+ /* print header */
+ if (fprintf(cfp, "#metadevice database location file ") == EOF)
+ goto error;
+ if (fprintf(cfp, "do not hand edit\n") < 0)
+ goto error;
+ if (fprintf(cfp,
+ "#driver\tminor_t\tdaddr_t\tdevice id\tchecksum\n") < 0)
+ goto error;
+
+ /* dump replicas */
+ for (rl = rlp; (rl != NULL); rl = rl->rl_next) {
+ md_replica_t *r = rl->rl_repp;
+ int checksum = 42;
+ int i;
+ char *devidp;
+ minor_t min;
+
+ devidp = devid_str_encode(r->r_devid, r->r_minor_name);
+ /* If devid code can't encode devidp - skip entry */
+ if (devidp == NULL) {
+ continue;
+ }
+
+ /* compute checksum */
+ for (i = 0; ((r->r_driver_name[i] != '\0') &&
+ (i < sizeof (r->r_driver_name))); i++) {
+ checksum -= r->r_driver_name[i];
+ }
+ min = meta_getminor(r->r_namep->dev);
+ checksum -= min;
+ checksum -= r->r_blkno;
+
+ for (i = 0; i < strlen(devidp); i++) {
+ checksum -= devidp[i];
+ }
+ /* print info */
+ if (fprintf(cfp, "%s\t%lu\t%ld\t%s\t%d\n",
+ r->r_driver_name, min, r->r_blkno, devidp, checksum) < 0) {
+ goto error;
+ }
+
+ devid_str_free(devidp);
+ }
+
+ /* close and rename to real file */
+ if (fflush(cfp) != 0)
+ goto error;
+ if (fsync(fileno(cfp)) != 0)
+ goto error;
+ if (fclose(cfp) != 0) {
+ cfp = NULL;
+ goto error;
+ }
+ cfp = NULL;
+
+ /*
+ * Renames don't work in the miniroot since tmpfiles are
+ * created in /var/tmp. Hence we copy the data out.
+ */
+
+ if (! in_miniroot) {
+ if (rename(META_DBCONFTMP, META_DBCONF) != 0)
+ goto error;
+ } else {
+ if ((cfp = fopen(tname, "r")) == NULL)
+ goto error;
+ if ((mfp = fopen(META_DBCONF, "w+")) == NULL)
+ goto error;
+ while (fgets(line, MDDB_BOOTLIST_MAX_LEN, cfp) != NULL) {
+ if (fputs(line, mfp) == NULL)
+ goto error;
+ }
+ (void) fclose(cfp);
+ cfp = NULL;
+ if (fflush(mfp) != 0)
+ goto error;
+ if (fsync(fileno(mfp)) != 0)
+ goto error;
+ if (fclose(mfp) != 0) {
+ mfp = NULL;
+ goto error;
+ }
+ /* delete the tempfile */
+ (void) unlink(tname);
+ }
+ /* success */
+ rval = 0;
+ goto out;
+
+ /* tempfile error */
+error:
+ rval = (in_miniroot) ? mdsyserror(ep, errno, tname):
+ mdsyserror(ep, errno, META_DBCONFTMP);
+
+
+ /* cleanup, return success */
+out:
+ if (rlp != NULL)
+ metafreereplicalist(rlp);
+ if ((cfp != NULL) && (fclose(cfp) != 0) && (rval == 0)) {
+ rval = (in_miniroot) ? mdsyserror(ep, errno, tname):
+ mdsyserror(ep, errno, META_DBCONFTMP);
+ }
+ free(tname);
+ return (rval);
+}
+
+/*
+ * check replica for dev
+ */
+static int
+in_replica(
+ mdsetname_t *sp,
+ md_replica_t *rp,
+ mdname_t *np,
+ diskaddr_t slblk,
+ diskaddr_t nblks,
+ md_error_t *ep
+)
+{
+ mdname_t *repnp = rp->r_namep;
+ diskaddr_t rep_sblk = rp->r_blkno;
+ diskaddr_t rep_nblks = rp->r_nblk;
+
+ /* should be in the same set */
+ assert(sp != NULL);
+
+ /* if error in master block, assume whole partition */
+ if ((rep_sblk == MD_DISKADDR_ERROR) ||
+ (rep_nblks == MD_DISKADDR_ERROR)) {
+ rep_sblk = 0;
+ rep_nblks = MD_DISKADDR_ERROR;
+ }
+
+ /* check overlap */
+ if (meta_check_overlap(
+ MDB_STR, np, slblk, nblks, repnp, rep_sblk, rep_nblks, ep) != 0) {
+ return (-1);
+ }
+
+ /* return success */
+ return (0);
+}
+
+/*
+ * check to see if we're in a replica
+ */
+int
+meta_check_inreplica(
+ mdsetname_t *sp,
+ mdname_t *np,
+ diskaddr_t slblk,
+ diskaddr_t nblks,
+ md_error_t *ep
+)
+{
+ md_replicalist_t *rlp = NULL;
+ md_replicalist_t *rl;
+ int rval = 0;
+
+ /* should have a set */
+ assert(sp != NULL);
+
+ /* for each replica */
+ if (metareplicalist(sp, MD_BASICNAME_OK, &rlp, ep) < 0)
+ return (-1);
+ for (rl = rlp; (rl != NULL); rl = rl->rl_next) {
+ md_replica_t *rp = rl->rl_repp;
+
+ /* check replica */
+ if (in_replica(sp, rp, np, slblk, nblks, ep) != 0) {
+ rval = -1;
+ break;
+ }
+ }
+
+ /* cleanup, return success */
+ metafreereplicalist(rlp);
+ return (rval);
+}
+
+/*
+ * check replica
+ */
+int
+meta_check_replica(
+ mdsetname_t *sp, /* set to check against */
+ mdname_t *np, /* component to check against */
+ mdchkopts_t options, /* option flags */
+ diskaddr_t slblk, /* start logical block */
+ diskaddr_t nblks, /* number of blocks (-1,rest of them) */
+ md_error_t *ep /* error packet */
+)
+{
+ mdchkopts_t chkoptions = MDCHK_ALLOW_REPSLICE;
+
+ /* make sure we have a disk */
+ if (metachkcomp(np, ep) != 0)
+ return (-1);
+
+ /* check to ensure that it is not already in use */
+ if (meta_check_inuse(sp, np, MDCHK_INUSE, ep) != 0) {
+ return (-1);
+ }
+
+ if (options & MDCHK_ALLOW_NODBS)
+ return (0);
+
+ if (options & MDCHK_DRVINSET)
+ return (0);
+
+ /* make sure it is in the set */
+ if (meta_check_inset(sp, np, ep) != 0)
+ return (-1);
+
+ /* make sure its not in a metadevice */
+ if (meta_check_inmeta(sp, np, chkoptions, slblk, nblks, ep) != 0)
+ return (-1);
+
+ /* return success */
+ return (0);
+}
+
+static int
+update_dbinfo_on_drives(
+ mdsetname_t *sp,
+ md_drive_desc *dd,
+ int set_locked,
+ int force,
+ md_error_t *ep
+)
+{
+ md_set_desc *sd;
+ int i;
+ md_setkey_t *cl_sk;
+ int rval = 0;
+ md_mnnode_desc *nd;
+
+ if ((sd = metaget_setdesc(sp, ep)) == NULL)
+ return (-1);
+
+ if (! set_locked) {
+ if (MD_MNSET_DESC(sd)) {
+ md_error_t xep = mdnullerror;
+ sigset_t sigs;
+ /* Make sure we are blocking all signals */
+ if (procsigs(TRUE, &sigs, &xep) < 0)
+ mdclrerror(&xep);
+
+ nd = sd->sd_nodelist;
+ while (nd) {
+ if (force && strcmp(nd->nd_nodename,
+ mynode()) != 0) {
+ nd = nd->nd_next;
+ continue;
+ }
+
+ if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+ nd = nd->nd_next;
+ continue;
+ }
+
+ if (clnt_lock_set(nd->nd_nodename, sp, ep))
+ return (-1);
+ nd = nd->nd_next;
+ }
+ } else {
+ for (i = 0; i < MD_MAXSIDES; i++) {
+ /* Skip empty slots */
+ if (sd->sd_nodes[i][0] == '\0')
+ continue;
+
+ if (force && strcmp(sd->sd_nodes[i],
+ mynode()) != 0)
+ continue;
+
+ if (clnt_lock_set(sd->sd_nodes[i], sp, ep))
+ return (-1);
+ }
+ }
+ }
+
+ if (MD_MNSET_DESC(sd)) {
+ nd = sd->sd_nodelist;
+ while (nd) {
+ if (force && strcmp(nd->nd_nodename, mynode()) != 0) {
+ nd = nd->nd_next;
+ continue;
+ }
+
+ if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+ nd = nd->nd_next;
+ continue;
+ }
+
+ if (clnt_upd_dr_dbinfo(nd->nd_nodename, sp, dd, ep)
+ == -1) {
+ rval = -1;
+ break;
+ }
+ nd = nd->nd_next;
+ }
+ } else {
+ for (i = 0; i < MD_MAXSIDES; i++) {
+ /* Skip empty slots */
+ if (sd->sd_nodes[i][0] == '\0')
+ continue;
+
+ if (force && strcmp(sd->sd_nodes[i], mynode()) != 0)
+ continue;
+
+ if (clnt_upd_dr_dbinfo(sd->sd_nodes[i], sp, dd, ep)
+ == -1) {
+ rval = -1;
+ break;
+ }
+ }
+ }
+
+ if (! set_locked) {
+ cl_sk = cl_get_setkey(sp->setno, sp->setname);
+ if (MD_MNSET_DESC(sd)) {
+ nd = sd->sd_nodelist;
+ while (nd) {
+ if (force &&
+ strcmp(nd->nd_nodename, mynode()) != 0) {
+ nd = nd->nd_next;
+ continue;
+ }
+
+ if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+ nd = nd->nd_next;
+ continue;
+ }
+
+ if (clnt_unlock_set(nd->nd_nodename, cl_sk,
+ ep)) {
+ rval = -1;
+ break;
+ }
+ nd = nd->nd_next;
+ }
+ } else {
+ for (i = 0; i < MD_MAXSIDES; i++) {
+ /* Skip empty slots */
+ if (sd->sd_nodes[i][0] == '\0')
+ continue;
+
+ if (force &&
+ strcmp(sd->sd_nodes[i], mynode()) != 0)
+ continue;
+
+ if (clnt_unlock_set(sd->sd_nodes[i], cl_sk,
+ ep)) {
+ rval = -1;
+ break;
+ }
+ }
+
+ }
+ cl_set_setkey(NULL);
+ }
+
+ return (rval);
+}
+
+int
+meta_db_addsidenms(
+ mdsetname_t *sp,
+ mdname_t *np,
+ daddr_t blkno,
+ int bcast,
+ md_error_t *ep
+)
+{
+ side_t sideno;
+ char *bname = NULL;
+ char *dname = NULL;
+ minor_t mnum;
+ mddb_config_t c;
+ int done;
+ int rval = 0;
+ md_set_desc *sd;
+
+ sideno = MD_SIDEWILD;
+ /*CONSTCOND*/
+ while (1) {
+ if (bname != NULL) {
+ Free(bname);
+ bname = NULL;
+ }
+ if (dname != NULL) {
+ Free(dname);
+ dname = NULL;
+ }
+ if ((done = meta_getnextside_devinfo(sp, np->bname,
+ &sideno, &bname, &dname, &mnum, ep)) == -1) {
+ rval = -1;
+ break;
+ }
+
+ if (done == 0)
+ break;
+
+ if (! metaislocalset(sp)) {
+ if ((sd = metaget_setdesc(sp, ep)) == NULL) {
+ rval = -1;
+ break;
+ }
+ }
+
+ /*
+ * Send addsidenms to all nodes using rpc.mdcommd if
+ * sidename is being added to MN diskset.
+ *
+ * It's ok to broadcast this call to other nodes.
+ *
+ * Note: The broadcast to other nodes isn't needed during
+ * the addition of the first mddbs to the set since the
+ * other nodes haven't been joined to the set yet. All
+ * nodes in a MN diskset are (implicitly) joined to the set
+ * on the addition of the first mddb.
+ */
+ if ((! metaislocalset(sp)) && MD_MNSET_DESC(sd) &&
+ (bcast == DB_ADDSIDENMS_BCAST)) {
+ md_mn_result_t *resultp = NULL;
+ md_mn_msg_meta_db_newside_t db_ns;
+ int send_rval;
+
+ db_ns.msg_l_dev = np->dev;
+ db_ns.msg_sideno = sideno;
+ db_ns.msg_blkno = blkno;
+ (void) strncpy(db_ns.msg_dname, dname,
+ sizeof (db_ns.msg_dname));
+ (void) splitname(np->bname, &db_ns.msg_splitname);
+ db_ns.msg_mnum = mnum;
+
+ /* Set devid to NULL until devids are supported */
+ db_ns.msg_devid[0] = NULL;
+
+ /*
+ * If reconfig cycle has been started, this node is
+ * stuck in in the return step until this command has
+ * completed. If mdcommd is suspended, ask
+ * send_message to fail (instead of retrying)
+ * so that metaset can finish allowing the reconfig
+ * cycle to proceed.
+ */
+ send_rval = mdmn_send_message(sp->setno,
+ MD_MN_MSG_META_DB_NEWSIDE, MD_MSGF_FAIL_ON_SUSPEND |
+ MD_MSGF_PANIC_WHEN_INCONSISTENT, (char *)&db_ns,
+ sizeof (md_mn_msg_meta_db_newside_t),
+ &resultp, ep);
+ if (send_rval != 0) {
+ rval = -1;
+ if (resultp == NULL)
+ (void) mddserror(ep,
+ MDE_DS_COMMD_SEND_FAIL,
+ sp->setno, NULL, NULL,
+ sp->setname);
+ else {
+ (void) mdstealerror(ep,
+ &(resultp->mmr_ep));
+ if (mdisok(ep)) {
+ (void) mddserror(ep,
+ MDE_DS_COMMD_SEND_FAIL,
+ sp->setno, NULL, NULL,
+ sp->setname);
+ }
+ free_result(resultp);
+ }
+ break;
+ }
+ if (resultp)
+ free_result(resultp);
+ } else {
+ /*
+ * Let this side's device name, minor # and driver name
+ * be known to the database replica.
+ */
+ (void) memset(&c, 0, sizeof (c));
+
+ /* Fill in device/replica info */
+ c.c_locator.l_dev = meta_cmpldev(np->dev);
+ c.c_locator.l_blkno = blkno;
+ (void) strncpy(c.c_locator.l_driver, dname,
+ sizeof (c.c_locator.l_driver));
+ (void) splitname(bname, &c.c_devname);
+ c.c_locator.l_mnum = mnum;
+
+ /* Fill in setno, setname, and sideno */
+ c.c_setno = sp->setno;
+ (void) strncpy(c.c_setname, sp->setname,
+ sizeof (c.c_setname));
+ c.c_sideno = sideno;
+
+ /*
+ * Don't need device id information from this ioctl
+ * Kernel determines device id from dev_t, which
+ * is just what this code would do.
+ */
+ c.c_locator.l_devid = (uint64_t)0;
+ c.c_locator.l_devid_flags = 0;
+
+ if (metaioctl(MD_DB_NEWSIDE, &c, &c.c_mde, NULL) != 0) {
+ rval = mdstealerror(ep, &c.c_mde);
+ break;
+ }
+ }
+ }
+
+ /* cleanup, return success */
+ if (bname != NULL) {
+ Free(bname);
+ bname = NULL;
+ }
+ if (dname != NULL) {
+ Free(dname);
+ dname = NULL;
+ }
+ return (rval);
+}
+
+
+int
+meta_db_delsidenm(
+ mdsetname_t *sp,
+ side_t sideno,
+ mdname_t *np,
+ daddr_t blkno,
+ md_error_t *ep
+)
+{
+ mddb_config_t c;
+ md_set_desc *sd;
+
+ if (! metaislocalset(sp)) {
+ if ((sd = metaget_setdesc(sp, ep)) == NULL)
+ return (-1);
+ }
+ /* Use rpc.mdcommd to delete mddb side from all nodes */
+ if ((! metaislocalset(sp)) && MD_MNSET_DESC(sd) &&
+ (sd->sd_mn_mynode->nd_flags & MD_MN_NODE_OWN)) {
+ md_mn_result_t *resultp = NULL;
+ md_mn_msg_meta_db_delside_t db_ds;
+ int send_rval;
+
+ db_ds.msg_l_dev = np->dev;
+ db_ds.msg_blkno = blkno;
+ db_ds.msg_sideno = sideno;
+
+ /* Set devid to NULL until devids are supported */
+ db_ds.msg_devid[0] = NULL;
+
+ /*
+ * If reconfig cycle has been started, this node is
+ * stuck in in the return step until this command has
+ * completed. If mdcommd is suspended, ask
+ * send_message to fail (instead of retrying)
+ * so that metaset can finish allowing the reconfig
+ * cycle to proceed.
+ */
+ send_rval = mdmn_send_message(sp->setno,
+ MD_MN_MSG_META_DB_DELSIDE, MD_MSGF_FAIL_ON_SUSPEND |
+ MD_MSGF_PANIC_WHEN_INCONSISTENT, (char *)&db_ds,
+ sizeof (md_mn_msg_meta_db_delside_t), &resultp, ep);
+ if (send_rval != 0) {
+ if (resultp == NULL)
+ (void) mddserror(ep,
+ MDE_DS_COMMD_SEND_FAIL,
+ sp->setno, NULL, NULL,
+ sp->setname);
+ else {
+ (void) mdstealerror(ep, &(resultp->mmr_ep));
+ if (mdisok(ep)) {
+ (void) mddserror(ep,
+ MDE_DS_COMMD_SEND_FAIL,
+ sp->setno, NULL, NULL,
+ sp->setname);
+ }
+ free_result(resultp);
+ }
+ return (-1);
+ }
+ if (resultp)
+ free_result(resultp);
+
+ } else {
+ /*
+ * Let this side's device name, minor # and driver name
+ * be known to the database replica.
+ */
+ (void) memset(&c, 0, sizeof (c));
+
+ /* Fill in device/replica info */
+ c.c_locator.l_dev = meta_cmpldev(np->dev);
+ c.c_locator.l_blkno = blkno;
+
+ /* Fill in setno, setname, and sideno */
+ c.c_setno = sp->setno;
+ (void) strcpy(c.c_setname, sp->setname);
+ c.c_sideno = sideno;
+
+ /*
+ * Don't need device id information from this ioctl
+ * Kernel determines device id from dev_t, which
+ * is just what this code would do.
+ */
+ c.c_locator.l_devid = (uint64_t)0;
+ c.c_locator.l_devid_flags = 0;
+
+ if (metaioctl(MD_DB_DELSIDE, &c, &c.c_mde, NULL) != 0)
+ return (mdstealerror(ep, &c.c_mde));
+ }
+ return (0);
+}
+
+
+static int
+mdnamesareunique(mdnamelist_t *nlp, md_error_t *ep)
+{
+ mdnamelist_t *dnp1, *dnp2;
+
+ for (dnp1 = nlp; dnp1 != NULL; dnp1 = dnp1->next) {
+ for (dnp2 = dnp1->next; dnp2 != NULL; dnp2 = dnp2->next) {
+ if (strcmp(dnp1->namep->cname, dnp2->namep->cname) == 0)
+ return (mderror(ep, MDE_DUPDRIVE,
+ dnp1->namep->cname));
+ }
+ }
+ return (0);
+}
+
+
+/*
+ * Return 1 if files are different, else return 0
+ */
+static int
+filediff(char *tsname, char *sname)
+{
+ int ret = 1, fd;
+ size_t tsz, sz;
+ struct stat sbuf;
+ char *tbuf, *buf;
+
+ if (stat(tsname, &sbuf) != 0)
+ return (1);
+ tsz = sbuf.st_size;
+ if (stat(sname, &sbuf) != 0)
+ return (1);
+ sz = sbuf.st_size;
+ if (tsz != sz)
+ return (1);
+
+ /* allocate memory and read both files into buffer */
+ tbuf = malloc(tsz);
+ buf = malloc(sz);
+ if (tbuf == NULL || buf == NULL)
+ goto out;
+
+ fd = open(tsname, O_RDONLY);
+ if (fd == -1)
+ goto out;
+ sz = read(fd, tbuf, tsz);
+ (void) close(fd);
+ if (sz != tsz)
+ goto out;
+
+ fd = open(sname, O_RDONLY);
+ if (fd == -1)
+ goto out;
+ sz = read(fd, buf, tsz);
+ (void) close(fd);
+ if (sz != tsz)
+ goto out;
+
+ /* compare content */
+ ret = bcmp(tbuf, buf, tsz);
+out:
+ if (tbuf)
+ free(tbuf);
+ if (buf)
+ free(buf);
+ return (ret);
+}
+
+/*
+ * patch md.conf file with mddb locations
+ */
+int
+meta_db_patch(
+ char *sname, /* system file name */
+ char *cname, /* mddb.cf file name */
+ int patch, /* patching locally */
+ md_error_t *ep
+)
+{
+ char *tsname = NULL;
+ char line[MDDB_BOOTLIST_MAX_LEN];
+ FILE *tsfp = NULL;
+ FILE *mfp = NULL;
+ int rval = -1;
+
+ /* check names */
+ if (sname == NULL) {
+ if (patch)
+ sname = "md.conf";
+ else
+ sname = "/kernel/drv/md.conf";
+ }
+ if (cname == NULL)
+ cname = META_DBCONF;
+
+ /*
+ * edit file
+ */
+ if (meta_systemfile_copy(sname, 0, 1, 1, 0, &tsname, &tsfp, ep) != 0) {
+ if (mdissyserror(ep, EROFS)) {
+ /*
+ * If we are booted on a read-only root because
+ * of mddb quorum problems we don't want to emit
+ * any scary error messages.
+ */
+ mdclrerror(ep);
+ rval = 0;
+ }
+ goto out;
+ }
+
+ if (meta_systemfile_append_mddb(cname, sname, tsname, tsfp, 1, 0,
+ ep) != 0)
+ goto out;
+
+ /* if file content is identical, skip rename */
+ if (filediff(tsname, sname) == 0) {
+ rval = 0;
+ goto out;
+ }
+
+ if ((fflush(tsfp) != 0) || (fsync(fileno(tsfp)) != 0) ||
+ (fclose(tsfp) != 0)) {
+ (void) mdsyserror(ep, errno, tsname);
+ goto out;
+ }
+
+ tsfp = NULL;
+
+ /*
+ * rename file. If we get a Cross Device error then it
+ * is because we are in the miniroot.
+ */
+ if (rename(tsname, sname) != 0 && errno != EXDEV) {
+ (void) mdsyserror(ep, errno, sname);
+ goto out;
+ }
+
+ if (errno == EXDEV) {
+ if ((tsfp = fopen(tsname, "r")) == NULL)
+ goto out;
+ if ((mfp = fopen(sname, "w+")) == NULL)
+ goto out;
+ while (fgets(line, sizeof (line), tsfp) != NULL) {
+ if (fputs(line, mfp) == NULL)
+ goto out;
+ }
+ (void) fclose(tsfp);
+ tsfp = NULL;
+ if (fflush(mfp) != 0)
+ goto out;
+ if (fsync(fileno(mfp)) != 0)
+ goto out;
+ if (fclose(mfp) != 0) {
+ mfp = NULL;
+ goto out;
+ }
+ }
+
+ Free(tsname);
+ tsname = NULL;
+ rval = 0;
+
+ /* cleanup, return error */
+out:
+ if (tsfp != NULL)
+ (void) fclose(tsfp);
+ if (tsname != NULL) {
+ (void) unlink(tsname);
+ Free(tsname);
+ }
+ return (rval);
+}
+
+/*
+ * Add replicas to set. This happens as a result of:
+ * - metadb [-s set_name] -a
+ * - metaset -s set_name -a disk
+ * - metaset -s set_name -d disk (causes a rebalance of mddbs)
+ * - metaset -s set_name -b
+ *
+ * For a local set, this routine is run on the local set host.
+ *
+ * For a traditional diskset, this routine is run on the node that
+ * is running the metaset command.
+ *
+ * For a multinode diskset, this routine is run by the node that is
+ * running the metaset command. If this is the first mddb added to
+ * the MN diskset, then no communication is made to other nodes via commd
+ * since the other nodes will be in-sync with respect to the mddbs when
+ * those other nodes join the set and snarf in the newly created mddb.
+ * If this is not the first mddb added to the MN diskset, then this
+ * attach command is sent to all of the nodes using commd. This keeps
+ * the nodes in-sync.
+ */
+int
+meta_db_attach(
+ mdsetname_t *sp,
+ mdnamelist_t *db_nlp,
+ mdchkopts_t options,
+ md_timeval32_t *timeval,
+ int dbcnt,
+ int dbsize,
+ char *sysfilename,
+ md_error_t *ep
+)
+{
+ struct mddb_config c;
+ mdnamelist_t *nlp;
+ mdname_t *np;
+ md_drive_desc *dd = NULL;
+ md_drive_desc *p;
+ int i;
+ int fd;
+ side_t sideno;
+ daddr_t blkno;
+ int replicacount = 0;
+ int start_mdmonitord = 0;
+ int rval = 0;
+ md_error_t status = mdnullerror;
+ md_set_desc *sd;
+ int stale_bool = FALSE;
+ int flags;
+ int firstmddb = 1;
+ md_timeval32_t inittime = {0, 0};
+
+ /*
+ * Error if we don't get some work to do.
+ */
+ if (db_nlp == NULL)
+ return (mdsyserror(ep, EINVAL, NULL));
+
+ if (mdnamesareunique(db_nlp, ep) != 0)
+ return (-1);
+ (void) memset(&c, 0, sizeof (c));
+ c.c_id = 0;
+ c.c_setno = sp->setno;
+
+ /* Don't need device id information from this ioctl */
+ c.c_locator.l_devid = (uint64_t)0;
+ c.c_locator.l_devid_flags = 0;
+ if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0) {
+ if (metaislocalset(sp)) {
+ if (mdismddberror(&c.c_mde, MDE_DB_INVALID))
+ mdclrerror(&c.c_mde);
+ else if (! mdismddberror(&c.c_mde, MDE_DB_NODB) ||
+ (! (options & MDCHK_ALLOW_NODBS)))
+ return (mdstealerror(ep, &c.c_mde));
+ } else {
+ if (! mdismddberror(&c.c_mde, MDE_DB_NOTOWNER))
+ return (mdstealerror(ep, &c.c_mde));
+ }
+ mdclrerror(&c.c_mde);
+ }
+ /*
+ * Is current set STALE?
+ */
+ if (c.c_flags & MDDB_C_STALE) {
+ stale_bool = TRUE;
+ }
+
+ assert(db_nlp != NULL);
+
+ /* if creating the metadbs for the first time start mdmonitord */
+ if (c.c_dbcnt == 0)
+ start_mdmonitord = 1;
+
+ /*
+ * check to see if we will go over the total possible number
+ * of data bases
+ */
+ nlp = db_nlp;
+ while (nlp) {
+ replicacount += dbcnt;
+ nlp = nlp->next;
+ }
+
+ if ((replicacount + c.c_dbcnt) > c.c_dbmax)
+ return (mdmddberror(ep, MDE_TOOMANY_REPLICAS, NODEV32,
+ sp->setno, c.c_dbcnt + replicacount, NULL));
+
+ /*
+ * go through and check to make sure all locations specified
+ * are legal also pick out driver name;
+ */
+ for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) {
+ diskaddr_t devsize;
+
+ np = nlp->namep;
+
+ if (! metaislocalset(sp)) {
+ uint_t partno;
+ uint_t rep_partno;
+ mddrivename_t *dnp = np->drivenamep;
+
+ /*
+ * make sure that non-local database replicas
+ * are always on the replica slice.
+ */
+ if (meta_replicaslice(dnp,
+ &rep_partno, ep) != 0)
+ return (-1);
+ if (metagetvtoc(np, FALSE, &partno, ep) == NULL)
+ return (-1);
+ if (partno != rep_partno)
+ return (mddeverror(ep, MDE_REPCOMP_ONLY,
+ np->dev, sp->setname));
+ }
+
+ if (meta_check_replica(sp, np, options, 0, (dbcnt * dbsize),
+ ep)) {
+ return (-1);
+ }
+
+ if ((devsize = metagetsize(np, ep)) == -1)
+ return (-1);
+
+ if (devsize < (diskaddr_t)((dbcnt * dbsize) + 16))
+ return (mdmddberror(ep, MDE_REPLICA_TOOSMALL,
+ meta_getminor(np->dev), sp->setno, devsize,
+ np->cname));
+ }
+
+ /*
+ * If first disk in set we don't have lb_inittime yet for use as
+ * mb_setcreatetime so don't go looking for it. WE'll come back
+ * later and update after the locator block has been created.
+ * If this isn't the first disk in the set, we have a locator
+ * block and thus we have lb_inittime. Set mb_setcreatetime to
+ * lb_inittime.
+ */
+ if (! metaislocalset(sp)) {
+ if (c.c_dbcnt != 0) {
+ firstmddb = 0;
+ inittime = meta_get_lb_inittime(sp, ep);
+ }
+ }
+
+ /*
+ * go through and write all master blocks
+ */
+
+ for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) {
+ np = nlp->namep;
+
+ if ((fd = open(np->rname, O_RDWR)) < 0)
+ return (mdsyserror(ep, errno, np->rname));
+
+ for (i = 0; i < dbcnt; i++) {
+ if (mkmasterblks(sp, np, fd, (i * dbsize + 16), dbsize,
+ inittime, ep)) {
+ (void) close(fd);
+ return (-1);
+ }
+ }
+ (void) close(fd);
+ }
+
+ if ((sideno = getmyside(sp, ep)) == MD_SIDEWILD)
+ return (-1);
+
+ if (! metaislocalset(sp)) {
+ dd = metaget_drivedesc_fromnamelist(sp, db_nlp, ep);
+ if (! mdisok(ep))
+ return (-1);
+ if ((sd = metaget_setdesc(sp, ep)) == NULL)
+ return (-1);
+
+ }
+
+ /*
+ * go through and tell kernel to add them
+ */
+ for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) {
+ mdcinfo_t *cinfo;
+
+ np = nlp->namep;
+
+ if ((cinfo = metagetcinfo(np, ep)) == NULL) {
+ rval = -1;
+ goto out;
+ }
+
+ /*
+ * If mddb is being added to MN diskset and there already
+ * exists a valid mddb in the set (which equates to this
+ * node being an owner of the set) then use rpc.mdcommd
+ * mechanism to add mddb(s) so that all nodes stay in sync.
+ * If set is stale, don't log the message since rpc.mdcommd
+ * can't write the message to the mddb.
+ *
+ * Otherwise, just add mddb to this node.
+ */
+ if ((! metaislocalset(sp)) && MD_MNSET_DESC(sd) &&
+ (sd->sd_mn_mynode->nd_flags & MD_MN_NODE_OWN)) {
+ md_mn_result_t *resultp = NULL;
+ md_mn_msg_meta_db_attach_t attach;
+ int send_rval;
+
+ /*
+ * In a scenario where new replicas had been added on
+ * the master, and then all of the old replicas failed
+ * before the slaves had knowledge of the new replicas,
+ * the slaves are unable to re-parse in the mddb
+ * from the new replicas since the slaves have no
+ * knowledge of the new replicas. The following
+ * algorithm solves this problem:
+ * - META_DB_ATTACH message generates submsgs
+ * - BLOCK parse (master)
+ * - MDDB_ATTACH new replicas
+ * - UNBLOCK parse (master) causing parse
+ * information to be sent from master
+ * to slaves at a higher class than the
+ * unblock so the parse message will
+ * reach slaves before unblock message.
+ */
+ attach.msg_l_dev = np->dev;
+ attach.msg_cnt = dbcnt;
+ attach.msg_dbsize = dbsize;
+ (void) strncpy(attach.msg_dname, cinfo->dname,
+ sizeof (attach.msg_dname));
+ (void) splitname(np->bname, &attach.msg_splitname);
+ attach.msg_options = options;
+
+ /* Set devid to NULL until devids are supported */
+ attach.msg_devid[0] = NULL;
+
+ /*
+ * If reconfig cycle has been started, this node is
+ * stuck in in the return step until this command has
+ * completed. If mdcommd is suspended, ask
+ * send_message to fail (instead of retrying)
+ * so that metaset can finish allowing the reconfig
+ * cycle to proceed.
+ */
+ flags = MD_MSGF_FAIL_ON_SUSPEND;
+ if (stale_bool == TRUE)
+ flags |= MD_MSGF_NO_LOG;
+ send_rval = mdmn_send_message(sp->setno,
+ MD_MN_MSG_META_DB_ATTACH,
+ flags, (char *)&attach,
+ sizeof (md_mn_msg_meta_db_attach_t),
+ &resultp, ep);
+ if (send_rval != 0) {
+ rval = -1;
+ if (resultp == NULL)
+ (void) mddserror(ep,
+ MDE_DS_COMMD_SEND_FAIL,
+ sp->setno, NULL, NULL,
+ sp->setname);
+ else {
+ (void) mdstealerror(ep,
+ &(resultp->mmr_ep));
+ if (mdisok(ep)) {
+ (void) mddserror(ep,
+ MDE_DS_COMMD_SEND_FAIL,
+ sp->setno, NULL, NULL,
+ sp->setname);
+ }
+ free_result(resultp);
+ }
+ goto out;
+ }
+ if (resultp)
+ free_result(resultp);
+ } else {
+ /* Adding mddb(s) to just this node */
+ for (i = 0; i < dbcnt; i++) {
+ (void) memset(&c, 0, sizeof (c));
+ /* Fill in device/replica info */
+ c.c_locator.l_dev = meta_cmpldev(np->dev);
+ c.c_locator.l_blkno = i * dbsize + 16;
+ blkno = c.c_locator.l_blkno;
+ (void) strncpy(c.c_locator.l_driver, cinfo->dname,
+ sizeof (c.c_locator.l_driver));
+ (void) splitname(np->bname, &c.c_devname);
+ c.c_locator.l_mnum = meta_getminor(np->dev);
+
+ /* Fill in setno, setname, and sideno */
+ c.c_setno = sp->setno;
+ if (! metaislocalset(sp)) {
+ if (MD_MNSET_DESC(sd)) {
+ c.c_multi_node = 1;
+ }
+ }
+ (void) strcpy(c.c_setname, sp->setname);
+ c.c_sideno = sideno;
+
+ /*
+ * Don't need device id information from this ioctl
+ * Kernel determines device id from dev_t, which
+ * is just what this code would do.
+ */
+ c.c_locator.l_devid = (uint64_t)0;
+ c.c_locator.l_devid_flags = 0;
+
+ if (timeval != NULL)
+ c.c_timestamp = *timeval;
+
+ if (setup_med_cfg(sp, &c, (options & MDCHK_SET_FORCE),
+ ep)) {
+ rval = -1;
+ goto out;
+ }
+
+ if (metaioctl(MD_DB_NEWDEV, &c, &c.c_mde, NULL) != 0) {
+ rval = mdstealerror(ep, &c.c_mde);
+ goto out;
+ }
+ /*
+ * This is either a traditional diskset OR this
+ * is the first replica added to a MN diskset.
+ * In either case, set broadcast to NO_BCAST so
+ * that message won't go through rpc.mdcommd.
+ * If this is a traditional diskset, the bcast
+ * flag is ignored since traditional disksets
+ * don't use the rpc.mdcommd.
+ */
+ if (meta_db_addsidenms(sp, np, blkno,
+ DB_ADDSIDENMS_NO_BCAST, ep))
+ goto out;
+ }
+ }
+ if (! metaislocalset(sp)) {
+ /* update the dbcnt and size in dd */
+ for (p = dd; p != NULL; p = p->dd_next)
+ if (p->dd_dnp == np->drivenamep) {
+ p->dd_dbcnt = dbcnt;
+ p->dd_dbsize = dbsize;
+ break;
+ }
+ }
+
+ /*
+ * If this was the first addition of disks to the
+ * diskset you now need to update the mb_setcreatetime
+ * which needed lb_inittime which wasn't there until now.
+ */
+ if (firstmddb) {
+ if (meta_update_mb(sp, dd, ep) != 0) {
+ return (-1);
+ }
+ }
+ (void) close(fd);
+ }
+
+out:
+ if (metaislocalset(sp)) {
+
+ /* everything looks fine. Start mdmonitord */
+ /* Note: popen/pclose is the MT-safe replacement for system */
+ if (rval == 0 && start_mdmonitord == 1) {
+ if (pclose(popen(MDMONITORD, "w")) == -1)
+ md_perror(MDMONITORD);
+
+ if (meta_smf_enable(META_SMF_CORE, &status) == -1) {
+ mde_perror(&status, "");
+ mdclrerror(&status);
+ }
+ }
+
+ if (buildconf(sp, &status)) {
+ /* Don't mask any previous errors */
+ if (rval == 0)
+ rval = mdstealerror(ep, &status);
+ return (rval);
+ }
+
+ if (meta_db_patch(sysfilename, NULL, 0, &status)) {
+ /* Don't mask any previous errors */
+ if (rval == 0)
+ rval = mdstealerror(ep, &status);
+ }
+ } else {
+ if (update_dbinfo_on_drives(sp, dd,
+ (options & MDCHK_SET_LOCKED),
+ (options & MDCHK_SET_FORCE),
+ &status)) {
+ /* Don't mask any previous errors */
+ if (rval == 0)
+ rval = mdstealerror(ep, &status);
+ else
+ mdclrerror(&status);
+ }
+ metafreedrivedesc(&dd);
+ }
+ /*
+ * For MN disksets that already had already had nodes joined
+ * before the attach of this mddb(s), the name invalidation is
+ * done by the commd handler routine. Otherwise, if this
+ * is the first attach of a MN diskset mddb, the invalidation
+ * must be done here since the first attach cannot be sent
+ * via the commd since there are no nodes joined to the set yet.
+ */
+ if ((metaislocalset(sp)) || (!MD_MNSET_DESC(sd)) ||
+ (MD_MNSET_DESC(sd) &&
+ (!(sd->sd_mn_mynode->nd_flags & MD_MN_NODE_OWN)))) {
+ for (nlp = db_nlp; (nlp != NULL); nlp = nlp->next) {
+ meta_invalidate_name(nlp->namep);
+ }
+ }
+ return (rval);
+}
+
+/*
+ * deletelist_length
+ *
+ * return the number of slices that have been specified for deletion
+ * on the metadb command line. This does not calculate the number
+ * of replicas because there may be multiple replicas per slice.
+ */
+static int
+deletelist_length(mdnamelist_t *db_nlp)
+{
+
+ mdnamelist_t *nlp;
+ int list_length = 0;
+
+ for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) {
+ list_length++;
+ }
+
+ return (list_length);
+}
+
+static int
+in_deletelist(char *devname, mdnamelist_t *db_nlp)
+{
+
+ mdnamelist_t *nlp;
+ mdname_t *np;
+ int index = 0;
+
+ for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) {
+ np = nlp->namep;
+
+ if (strcmp(devname, np->bname) == 0)
+ return (index);
+ index++;
+ }
+
+ return (-1);
+}
+
+/*
+ * Delete replicas from set. This happens as a result of:
+ * - metadb [-s set_name] -d
+ * - metaset -s set_name -a disk (causes a rebalance of mddbs)
+ * - metaset -s set_name -d disk
+ * - metaset -s set_name -b
+ *
+ * For a local set, this routine is run on the local set host.
+ *
+ * For a traditional diskset, this routine is run on the node that
+ * is running the metaset command.
+ *
+ * For a multinode diskset, this routine is run by the node that is
+ * running the metaset command. This detach routine is sent to all
+ * of the joined nodes in the diskset using commd. This keeps
+ * the nodes in-sync.
+ */
+int
+meta_db_detach(
+ mdsetname_t *sp,
+ mdnamelist_t *db_nlp,
+ mdforceopts_t force_option,
+ char *sysfilename,
+ md_error_t *ep
+)
+{
+ struct mddb_config c;
+ mdnamelist_t *nlp;
+ mdname_t *np;
+ md_drive_desc *dd = NULL;
+ md_drive_desc *p;
+ int replicacount;
+ int replica_delete_count;
+ int nr_replica_slices;
+ int i;
+ int stop_svmdaemons = 0;
+ int rval = 0;
+ int index;
+ int valid_replicas_nottodelete = 0;
+ int invalid_replicas_nottodelete = 0;
+ int invalid_replicas_todelete = 0;
+ int errored = 0;
+ int *tag_array;
+ int fd = -1;
+ md_error_t status = mdnullerror;
+ md_set_desc *sd;
+ int stale_bool = FALSE;
+ int flags;
+
+ /*
+ * Error if we don't get some work to do.
+ */
+ if (db_nlp == NULL)
+ return (mdsyserror(ep, EINVAL, NULL));
+
+ if (mdnamesareunique(db_nlp, ep) != 0)
+ return (-1);
+
+ (void) memset(&c, 0, sizeof (c));
+ c.c_id = 0;
+ c.c_setno = sp->setno;
+
+ /* Don't need device id information from this ioctl */
+ c.c_locator.l_devid = (uint64_t)0;
+ c.c_locator.l_devid_flags = 0;
+
+ if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0)
+ return (mdstealerror(ep, &c.c_mde));
+
+ /*
+ * Is current set STALE?
+ */
+ if (c.c_flags & MDDB_C_STALE) {
+ stale_bool = TRUE;
+ }
+
+ replicacount = c.c_dbcnt;
+
+ assert(db_nlp != NULL);
+
+ /*
+ * go through and gather how many data bases are on each
+ * device specified.
+ */
+
+ nr_replica_slices = deletelist_length(db_nlp);
+ tag_array = (int *)calloc(nr_replica_slices, sizeof (int));
+
+ replica_delete_count = 0;
+ for (i = 0; i < replicacount; i++) {
+ char *devname;
+ int found = 0;
+
+ c.c_id = i;
+
+ /* Don't need device id information from this ioctl */
+ c.c_locator.l_devid = (uint64_t)0;
+ c.c_locator.l_devid_flags = 0;
+
+ if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0)
+ return (mdstealerror(ep, &c.c_mde));
+
+ devname = splicename(&c.c_devname);
+
+ if ((index = in_deletelist(devname, db_nlp)) != -1) {
+ found = 1;
+ tag_array[index] = 1;
+ replica_delete_count++;
+ }
+
+ errored = c.c_locator.l_flags & (MDDB_F_EREAD |
+ MDDB_F_EWRITE | MDDB_F_TOOSMALL |
+ MDDB_F_EFMT | MDDB_F_EDATA |
+ MDDB_F_EMASTER);
+
+ /*
+ * There are four combinations of "errored" and "found"
+ * and they are used to find the number of
+ * (a) valid/invalid replicas that are not in the delete
+ * list and are available in the system.
+ * (b) valid/invalid replicas that are to be deleted.
+ */
+
+ if (errored && !found) /* errored and !found */
+ invalid_replicas_nottodelete++;
+ else if (!found) /* !errored and !found */
+ valid_replicas_nottodelete++;
+ else if (errored) /* errored and found */
+ invalid_replicas_todelete++;
+ /*
+ * else it is !errored and found. This means
+ * valid_replicas_todelete++; But this variable will not
+ * be used anywhere
+ */
+
+ Free(devname);
+ }
+
+ index = 0;
+ for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) {
+ np = nlp->namep;
+ if (tag_array[index++] != 1) {
+ Free(tag_array);
+ return (mddeverror(ep, MDE_NO_DB, np->dev, np->cname));
+ }
+ }
+
+ Free(tag_array);
+
+
+ /* if all replicas are deleted stop mdmonitord */
+ if ((replicacount - replica_delete_count) == 0)
+ stop_svmdaemons = 1;
+
+ if (((replicacount - replica_delete_count) < MD_MINREPLICAS)) {
+ if (force_option & MDFORCE_NONE)
+ return (mderror(ep, MDE_NOTENOUGH_DB, sp->setname));
+ if (! metaislocalset(sp) && ! (force_option & MDFORCE_DS))
+ return (mderror(ep, MDE_DELDB_NOTALLOWED, sp->setname));
+ }
+
+ /*
+ * The following algorithms are followed to check for deletion:
+ * (a) If the delete list(db_nlp) has all invalid replicas and no valid
+ * replicas, then deletion should be allowed.
+ * (b) Deletion should be allowed only if valid replicas that are "not"
+ * to be deleted is always greater than the invalid replicas that
+ * are "not" to be deleted.
+ * (c) If the user uses -f option, then deletion should be allowed.
+ */
+
+ if ((invalid_replicas_todelete != replica_delete_count) &&
+ (invalid_replicas_nottodelete > valid_replicas_nottodelete) &&
+ (force_option != MDFORCE_LOCAL))
+ return (mderror(ep, MDE_DEL_VALIDDB_NOTALLOWED, sp->setname));
+
+ /*
+ * go through and tell kernel to delete them
+ */
+
+ /* Don't need device id information from this ioctl */
+ c.c_locator.l_devid = (uint64_t)0;
+ c.c_locator.l_devid_flags = 0;
+
+ if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0)
+ return (mdstealerror(ep, &c.c_mde));
+
+ if (! metaislocalset(sp)) {
+ dd = metaget_drivedesc_fromnamelist(sp, db_nlp, ep);
+ if (! mdisok(ep))
+ return (-1);
+ if ((sd = metaget_setdesc(sp, ep)) == NULL)
+ return (-1);
+ }
+
+ for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) {
+ np = nlp->namep;
+
+ /*
+ * If mddb is being deleted from MN diskset and node is
+ * an owner of the diskset then use rpc.mdcommd
+ * mechanism to add mddb(s) so that all nodes stay in sync.
+ * If set is stale, don't log the message since rpc.mdcommd
+ * can't write the message to the mddb.
+ *
+ * When mddbs are first being added to set, a detach can
+ * be called before any node has joined the diskset, so
+ * must check to see if node is an owner of the diskset.
+ *
+ * Otherwise, just delete mddb from this node.
+ */
+
+ if ((! metaislocalset(sp)) && MD_MNSET_DESC(sd) &&
+ (sd->sd_mn_mynode->nd_flags & MD_MN_NODE_OWN)) {
+ md_mn_result_t *resultp;
+ md_mn_msg_meta_db_detach_t detach;
+ int send_rval;
+
+ /*
+ * The following algorithm is used to detach replicas.
+ * - META_DB_DETACH message generates submsgs
+ * - BLOCK parse (master)
+ * - MDDB_DETACH replicas
+ * - UNBLOCK parse (master) causing parse
+ * information to be sent from master
+ * to slaves at a higher class than the
+ * unblock so the parse message will
+ * reach slaves before unblock message.
+ */
+ (void) splitname(np->bname, &detach.msg_splitname);
+
+ /* Set devid to NULL until devids are supported */
+ detach.msg_devid[0] = NULL;
+
+ /*
+ * If reconfig cycle has been started, this node is
+ * stuck in in the return step until this command has
+ * completed. If mdcommd is suspended, ask
+ * send_message to fail (instead of retrying)
+ * so that metaset can finish allowing the reconfig
+ * cycle to proceed.
+ */
+ flags = MD_MSGF_FAIL_ON_SUSPEND;
+ if (stale_bool == TRUE)
+ flags |= MD_MSGF_NO_LOG;
+ send_rval = mdmn_send_message(sp->setno,
+ MD_MN_MSG_META_DB_DETACH,
+ flags, (char *)&detach,
+ sizeof (md_mn_msg_meta_db_detach_t),
+ &resultp, ep);
+ if (send_rval != 0) {
+ rval = -1;
+ if (resultp == NULL)
+ (void) mddserror(ep,
+ MDE_DS_COMMD_SEND_FAIL,
+ sp->setno, NULL, NULL,
+ sp->setname);
+ else {
+ (void) mdstealerror(ep,
+ &(resultp->mmr_ep));
+ if (mdisok(ep)) {
+ (void) mddserror(ep,
+ MDE_DS_COMMD_SEND_FAIL,
+ sp->setno, NULL, NULL,
+ sp->setname);
+ }
+ free_result(resultp);
+ }
+ goto out;
+ }
+ if (resultp)
+ free_result(resultp);
+ } else {
+ i = 0;
+ while (i < c.c_dbcnt) {
+ char *devname;
+
+ c.c_id = i;
+
+ /* Don't need devid info from this ioctl */
+ c.c_locator.l_devid = (uint64_t)0;
+ c.c_locator.l_devid_flags = 0;
+
+ if (metaioctl(MD_DB_GETDEV, &c,
+ &c.c_mde, NULL)) {
+ rval = mdstealerror(ep, &c.c_mde);
+ goto out;
+ }
+
+ devname = splicename(&c.c_devname);
+ if (strcmp(devname, np->bname) != 0) {
+ Free(devname);
+ i++;
+ continue;
+ }
+ Free(devname);
+
+ /* Don't need devid info from this ioctl */
+ c.c_locator.l_devid = (uint64_t)0;
+ c.c_locator.l_devid_flags = 0;
+
+ if (metaioctl(MD_DB_DELDEV, &c,
+ &c.c_mde, NULL) != 0) {
+ rval = mdstealerror(ep, &c.c_mde);
+ goto out;
+ }
+
+ /* Not incrementing "i" intentionally */
+ }
+ }
+ if (! metaislocalset(sp)) {
+ /* update the dbcnt and size in dd */
+ for (p = dd; p != NULL; p = p->dd_next) {
+ if (p->dd_dnp == np->drivenamep) {
+ p->dd_dbcnt = 0;
+ p->dd_dbsize = 0;
+ break;
+ }
+ }
+
+ /*
+ * Slam a dummy master block and make it self
+ * identifying
+ */
+ if ((fd = open(np->rname, O_RDWR)) >= 0) {
+ meta_mkdummymaster(sp, fd, 16);
+ (void) close(fd);
+ }
+ }
+ }
+out:
+ if (metaislocalset(sp)) {
+ /*
+ * Stop all the daemons if there are
+ * no more replicas so that the module can be
+ * unloaded.
+ */
+ if (rval == 0 && stop_svmdaemons == 1) {
+ char buf[MAXPATHLEN];
+ int i;
+
+ for (i = 0; i < DAEMON_COUNT; i++) {
+ (void) snprintf(buf, MAXPATHLEN,
+ "/usr/bin/pkill -%s -x %s",
+ svmd_kill_list[i].svmd_kill_val,
+ svmd_kill_list[i].svmd_name);
+ if (pclose(popen(buf, "w")) == -1)
+ md_perror(buf);
+ }
+
+ if (meta_smf_disable(META_SMF_ALL, &status) == -1) {
+ mde_perror(&status, "");
+ mdclrerror(&status);
+ }
+ }
+ if (buildconf(sp, &status)) {
+ /* Don't mask any previous errors */
+ if (rval == 0)
+ rval = mdstealerror(ep, &status);
+ else
+ mdclrerror(&status);
+ return (rval);
+ }
+
+ if (meta_db_patch(sysfilename, NULL, 0, &status)) {
+ /* Don't mask any previous errors */
+ if (rval == 0)
+ rval = mdstealerror(ep, &status);
+ else
+ mdclrerror(&status);
+ }
+ } else {
+ if (update_dbinfo_on_drives(sp, dd,
+ (force_option & MDFORCE_SET_LOCKED),
+ ((force_option & MDFORCE_LOCAL) |
+ (force_option & MDFORCE_DS)), &status)) {
+ /* Don't mask any previous errors */
+ if (rval == 0)
+ rval = mdstealerror(ep, &status);
+ else
+ mdclrerror(&status);
+ }
+ metafreedrivedesc(&dd);
+ }
+ if ((metaislocalset(sp)) || (!(MD_MNSET_DESC(sd)))) {
+ for (nlp = db_nlp; (nlp != NULL); nlp = nlp->next) {
+ meta_invalidate_name(nlp->namep);
+ }
+ }
+ return (rval);
+}
+
+static md_replica_t *
+metareplicaname(
+ mdsetname_t *sp,
+ int flags,
+ struct mddb_config *c,
+ md_error_t *ep
+)
+{
+ md_replica_t *rp;
+ char *devname;
+ size_t sz;
+
+ /* allocate replicaname */
+ rp = Zalloc(sizeof (*rp));
+
+ /* get device name */
+ devname = splicename(&c->c_devname);
+ if (flags & PRINT_FAST) {
+ if ((rp->r_namep = metaname_fast(&sp, devname, ep)) == NULL) {
+ Free(devname);
+ Free(rp);
+ return (NULL);
+ }
+ } else {
+ if ((rp->r_namep = metaname(&sp, devname, ep)) == NULL) {
+ Free(devname);
+ Free(rp);
+ return (NULL);
+ }
+ }
+ Free(devname);
+
+ /* make sure it's OK */
+ if ((! (flags & MD_BASICNAME_OK)) &&
+ (metachkcomp(rp->r_namep, ep) != 0)) {
+ Free(rp);
+ return (NULL);
+ }
+
+ rp->r_blkno = MD_DISKADDR_ERROR;
+ rp->r_nblk = MD_DISKADDR_ERROR;
+ rp->r_flags = c->c_locator.l_flags | MDDB_F_NODEVID;
+ if (c->c_locator.l_devid_flags & MDDB_DEVID_VALID) {
+ sz = devid_sizeof((ddi_devid_t)(c->c_locator.l_devid));
+ if ((rp->r_devid = (ddi_devid_t)malloc(sz)) ==
+ (ddi_devid_t)NULL) {
+ Free(rp);
+ return (NULL);
+ }
+ (void) memcpy((void *)rp->r_devid,
+ (void *)c->c_locator.l_devid, sz);
+ (void) strcpy(rp->r_minor_name, c->c_locator.l_minor_name);
+ rp->r_flags &= ~MDDB_F_NODEVID;
+ /* Overwrite dev derived from name with dev from devid */
+ rp->r_namep->dev = meta_expldev(c->c_locator.l_dev);
+ }
+ (void) strcpy(rp->r_driver_name, c->c_locator.l_driver);
+
+ rp->r_blkno = c->c_locator.l_blkno;
+ if (c->c_dbend != 0)
+ rp->r_nblk = c->c_dbend - c->c_locator.l_blkno + 1;
+
+ /* return replica */
+ return (rp);
+}
+
+/*
+ * free replica list
+ */
+void
+metafreereplicalist(
+ md_replicalist_t *rlp
+)
+{
+ md_replicalist_t *rl = NULL;
+
+ for (/* void */; (rlp != NULL); rlp = rl) {
+ rl = rlp->rl_next;
+ if (rlp->rl_repp->r_devid != (ddi_devid_t)0) {
+ free(rlp->rl_repp->r_devid);
+ }
+ Free(rlp->rl_repp);
+ Free(rlp);
+ }
+}
+
+/*
+ * return list of all replicas in set
+ */
+int
+metareplicalist(
+ mdsetname_t *sp,
+ int flags,
+ md_replicalist_t **rlpp,
+ md_error_t *ep
+)
+{
+ md_replicalist_t **tail = rlpp;
+ int count = 0;
+ struct mddb_config c;
+ int i;
+ char *devid;
+
+ /* for each replica */
+ i = 0;
+ do {
+ md_replica_t *rp;
+
+ /* get next replica */
+ (void) memset(&c, 0, sizeof (c));
+ c.c_id = i;
+ c.c_setno = sp->setno;
+
+ c.c_locator.l_devid_flags = MDDB_DEVID_GETSZ;
+ if (metaioctl(MD_DB_ENDDEV, &c, &c.c_mde, NULL) != 0) {
+ if (mdismddberror(&c.c_mde, MDE_DB_INVALID)) {
+ mdclrerror(&c.c_mde);
+ break; /* handle none at all */
+ }
+ (void) mdstealerror(ep, &c.c_mde);
+ goto out;
+ }
+
+ if (c.c_locator.l_devid_flags & MDDB_DEVID_SZ) {
+ if ((devid = malloc(c.c_locator.l_devid_sz)) == NULL) {
+ (void) mdsyserror(ep, ENOMEM, META_DBCONF);
+ goto out;
+ }
+ c.c_locator.l_devid = (uintptr_t)devid;
+ /*
+ * Turn on space and sz flags since 'sz' amount of
+ * space has been alloc'd.
+ */
+ c.c_locator.l_devid_flags =
+ MDDB_DEVID_SPACE | MDDB_DEVID_SZ;
+ }
+
+ if (metaioctl(MD_DB_ENDDEV, &c, &c.c_mde, NULL) != 0) {
+ if (mdismddberror(&c.c_mde, MDE_DB_INVALID)) {
+ mdclrerror(&c.c_mde);
+ break; /* handle none at all */
+ }
+ (void) mdstealerror(ep, &c.c_mde);
+ goto out;
+ }
+
+ /*
+ * Paranoid check - shouldn't happen, but is left as
+ * a place holder for changes that will be needed after
+ * dynamic reconfiguration changes are added to SVM (to
+ * support movement of disks at any point in time).
+ */
+ if (c.c_locator.l_devid_flags & MDDB_DEVID_NOSPACE) {
+ (void) fprintf(stderr,
+ dgettext(TEXT_DOMAIN,
+ "Error: Relocation Information "
+ "(drvnm=%s, mnum=0x%lx) \n"
+ "relocation information size changed - \n"
+ "rerun command\n"),
+ c.c_locator.l_driver, c.c_locator.l_mnum);
+ (void) mderror(ep, MDE_DEVID_TOOBIG, NULL);
+ goto out;
+ }
+
+ if (c.c_dbcnt == 0)
+ break; /* handle none at all */
+
+ /* get info */
+ if ((rp = metareplicaname(sp, flags, &c, ep)) == NULL)
+ goto out;
+
+ /* append to list */
+ *tail = Zalloc(sizeof (**tail));
+ (*tail)->rl_repp = rp;
+ tail = &(*tail)->rl_next;
+ ++count;
+
+ if (c.c_locator.l_devid_flags & MDDB_DEVID_SPACE) {
+ free(devid);
+ c.c_locator.l_devid_flags = 0;
+ }
+
+ } while (++i < c.c_dbcnt);
+
+ if (c.c_locator.l_devid_flags & MDDB_DEVID_SPACE) {
+ free(devid);
+ }
+
+ /* return count */
+ return (count);
+
+ /* cleanup, return error */
+out:
+ if (c.c_locator.l_devid_flags & MDDB_DEVID_SPACE) {
+ free(devid);
+ }
+ metafreereplicalist(*rlpp);
+ *rlpp = NULL;
+ return (-1);
+}
+
+/*
+ * meta_sync_db_locations - get list of replicas from kernel and write
+ * out to mddb.cf and md.conf. 'Syncs up' the replica list in
+ * the kernel with the replica list in the conf files.
+ *
+ */
+void
+meta_sync_db_locations(
+ mdsetname_t *sp,
+ md_error_t *ep
+)
+{
+ char *sname = 0; /* system file name */
+ char *cname = 0; /* config file name */
+
+ if (!metaislocalset(sp))
+ return;
+
+ /* Updates backup of configuration file (aka mddb.cf) */
+ if (buildconf(sp, ep) != 0)
+ return;
+
+ /* Updates system configuration file (aka md.conf) */
+ (void) meta_db_patch(sname, cname, 0, ep);
+}
+
+/*
+ * setup_db_locations - parse the mddb.cf file and
+ * tells the driver which db locations to use.
+ */
+int
+meta_setup_db_locations(
+ md_error_t *ep
+)
+{
+ mddb_config_t c;
+ FILE *fp;
+ char inbuff[1024];
+ char *buff;
+ uint_t i;
+ size_t sz;
+ int rval = 0;
+ char *devidp;
+ uint_t devid_size;
+ char *minor_name = NULL;
+ ddi_devid_t devid_decode;
+ int checksum;
+
+ /* do mddb.cf file */
+ (void) memset(&c, '\0', sizeof (c));
+ if ((fp = fopen(META_DBCONF, "r")) == NULL) {
+ if (errno != ENOENT)
+ return (mdsyserror(ep, errno, META_DBCONF));
+ }
+ while ((fp != NULL) && ((buff = fgets(inbuff, (sizeof (inbuff) - 1),
+ fp)) != NULL)) {
+
+ /* ignore comments */
+ if (*buff == '#')
+ continue;
+
+ /* parse locator */
+ (void) memset(&c, 0, sizeof (c));
+ c.c_setno = MD_LOCAL_SET;
+ i = strcspn(buff, " \t");
+ if (i > sizeof (c.c_locator.l_driver))
+ i = sizeof (c.c_locator.l_driver);
+ (void) strncpy(c.c_locator.l_driver, buff, i);
+ buff += i;
+ c.c_locator.l_dev =
+ makedev((major_t)0, (minor_t)strtol(buff, &buff, 10));
+ c.c_locator.l_blkno = (daddr_t)strtol(buff, &buff, 10);
+ c.c_locator.l_mnum = minor(c.c_locator.l_dev);
+
+ /* parse out devid */
+ while (isspace((int)(*buff)))
+ buff += 1;
+ i = strcspn(buff, " \t");
+ if ((devidp = (char *)malloc(i+1)) == NULL)
+ return (mdsyserror(ep, ENOMEM, META_DBCONF));
+
+ (void) strncpy(devidp, buff, i);
+ devidp[i] = '\0';
+ if (devid_str_decode(devidp, &devid_decode,
+ &minor_name) == -1) {
+ free(devidp);
+ continue;
+ }
+
+ /* Conf file must have minor name associated with devid */
+ if (minor_name == NULL) {
+ free(devidp);
+ devid_free(devid_decode);
+ continue;
+ }
+
+ sz = devid_sizeof(devid_decode);
+ /* Copy to devid size buffer that ioctl expects */
+ if ((c.c_locator.l_devid = (uintptr_t)malloc(sz)) == NULL) {
+ devid_free(devid_decode);
+ free(minor_name);
+ free(devidp);
+ return (mdsyserror(ep, ENOMEM, META_DBCONF));
+ }
+
+ (void) memcpy((void *)c.c_locator.l_devid,
+ (void *)devid_decode, sz);
+
+ devid_free(devid_decode);
+
+ if (strlen(minor_name) > MDDB_MINOR_NAME_MAX) {
+ free(minor_name);
+ free(devidp);
+ free((void *)c.c_locator.l_devid);
+ return (mdsyserror(ep, ENOMEM, META_DBCONF));
+ }
+ (void) strcpy(c.c_locator.l_minor_name, minor_name);
+ free(minor_name);
+ c.c_locator.l_devid_flags = MDDB_DEVID_VALID |
+ MDDB_DEVID_SPACE | MDDB_DEVID_SZ;
+ c.c_locator.l_devid_sz = sz;
+
+ devid_size = strlen(devidp);
+ buff += devid_size;
+
+ checksum = strtol(buff, &buff, 10);
+ for (i = 0; c.c_locator.l_driver[i] != 0; i++)
+ checksum += c.c_locator.l_driver[i];
+ for (i = 0; i < devid_size; i++) {
+ checksum += devidp[i];
+ }
+ free(devidp);
+
+ checksum += minor(c.c_locator.l_dev);
+ checksum += c.c_locator.l_blkno;
+ if (checksum != 42) {
+ /* overwritten later for more serious problems */
+ rval = mderror(ep, MDE_MDDB_CKSUM, META_DBCONF);
+ free((void *)c.c_locator.l_devid);
+ continue;
+ }
+ c.c_locator.l_flags = 0;
+
+ /* use db location */
+ if (metaioctl(MD_DB_USEDEV, &c, &c.c_mde, NULL) != 0) {
+ free((void *)c.c_locator.l_devid);
+ return (mdstealerror(ep, &c.c_mde));
+ }
+
+ /* free up devid if in use */
+ free((void *)c.c_locator.l_devid);
+ c.c_locator.l_devid = (uint64_t)0;
+ c.c_locator.l_devid_flags = 0;
+ }
+ if ((fp) && (fclose(fp) != 0))
+ return (mdsyserror(ep, errno, META_DBCONF));
+
+ /* check for stale database */
+ (void) memset((char *)&c, 0, sizeof (struct mddb_config));
+ c.c_id = 0;
+ c.c_setno = MD_LOCAL_SET;
+
+ /* Don't need device id information from this ioctl */
+ c.c_locator.l_devid = (uint64_t)0;
+ c.c_locator.l_devid_flags = 0;
+
+ if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0) {
+ if (! mdismddberror(&c.c_mde, MDE_DB_INVALID))
+ return (mdstealerror(ep, &c.c_mde));
+ mdclrerror(&c.c_mde);
+ }
+
+ if (c.c_flags & MDDB_C_STALE)
+ return (mdmddberror(ep, MDE_DB_STALE, NODEV32, MD_LOCAL_SET,
+ 0, NULL));
+
+ /* success */
+ return (rval);
+}
+
+/*
+ * meta_db_minreplica - returns the minimum size replica currently in use.
+ */
+daddr_t
+meta_db_minreplica(
+ mdsetname_t *sp,
+ md_error_t *ep
+)
+{
+ md_replica_t *r;
+ md_replicalist_t *rl, *rlp = NULL;
+ daddr_t nblks = 0;
+
+ if (metareplicalist(sp, (MD_BASICNAME_OK | PRINT_FAST), &rlp, ep) < 0)
+ return (-1);
+
+ if (rlp == NULL)
+ return (-1);
+
+ /* find the smallest existing replica */
+ for (rl = rlp; rl != NULL; rl = rl->rl_next) {
+ r = rl->rl_repp;
+ nblks = ((nblks == 0) ? r->r_nblk : min(r->r_nblk, nblks));
+ }
+
+ metafreereplicalist(rlp);
+ return (nblks);
+}
+
+/*
+ * meta_get_replica_names
+ * returns an mdnamelist_t of replica slices
+ */
+/*ARGSUSED*/
+int
+meta_get_replica_names(
+ mdsetname_t *sp,
+ mdnamelist_t **nlpp,
+ int options,
+ md_error_t *ep
+)
+{
+ md_replicalist_t *rlp = NULL;
+ md_replicalist_t *rl;
+ mdnamelist_t **tailpp = nlpp;
+ int cnt = 0;
+
+ assert(nlpp != NULL);
+
+ if (!metaislocalset(sp))
+ goto out;
+
+ /* get replicas */
+ if (metareplicalist(sp, MD_BASICNAME_OK, &rlp, ep) < 0) {
+ cnt = -1;
+ goto out;
+ }
+
+ /* build name list */
+ for (rl = rlp; (rl != NULL); rl = rl->rl_next) {
+ /*
+ * Add the name struct to the end of the
+ * namelist but keep a pointer to the last
+ * element so that we don't incur the overhead
+ * of traversing the list each time
+ */
+ tailpp = meta_namelist_append_wrapper(
+ tailpp, rl->rl_repp->r_namep);
+ ++cnt;
+ }
+
+ /* cleanup, return count or error */
+out:
+ metafreereplicalist(rlp);
+ return (cnt);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_db_balance.c b/usr/src/lib/lvm/libmeta/common/meta_db_balance.c
new file mode 100644
index 0000000000..2becd5a5a4
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_db_balance.c
@@ -0,0 +1,1215 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * Database location balancing code.
+ */
+
+#include <meta.h>
+#include <sys/lvm/md_mddb.h>
+#include <sdssc.h>
+
+#define MD_MINBALREP 2
+
+/*
+ * Stuff for DB balancing.
+ */
+enum md_ctlr_ops_t {
+ DRV_NOP = 0,
+ DRV_ADD = 1,
+ DRV_DEL = 2
+};
+typedef enum md_ctlr_ops_t md_ctlr_ops_t;
+
+/* drive flag fields */
+#define DRV_F_ERROR 0x1
+#define DRV_F_INDISKSET 0x2
+
+struct md_ctlr_drv_t {
+ md_ctlr_ops_t drv_op;
+ int drv_flags;
+ int drv_dbcnt;
+ int drv_new_dbcnt;
+ daddr_t drv_dbsize;
+ mddrivename_t *drv_dnp;
+ struct md_ctlr_drv_t *drv_next;
+};
+typedef struct md_ctlr_drv_t md_ctlr_drv_t;
+
+struct md_ctlr_ctl_t {
+ mdcinfo_t *ctl_cinfop;
+ int ctl_dbcnt;
+ int ctl_drcnt;
+ md_ctlr_drv_t *ctl_drvs;
+ struct md_ctlr_ctl_t *ctl_next;
+};
+typedef struct md_ctlr_ctl_t md_ctlr_ctl_t;
+
+static int
+add_replica(
+ mdsetname_t *sp,
+ mddrivename_t *dnp,
+ int dbcnt,
+ daddr_t dbsize,
+ md_error_t *ep
+)
+{
+ mdnamelist_t *nlp = NULL;
+ mdname_t *np;
+ md_set_desc *sd;
+ uint_t rep_slice;
+
+ if (meta_replicaslice(dnp, &rep_slice, ep) != 0)
+ return (-1);
+
+ if ((np = metaslicename(dnp, rep_slice, ep)) == NULL)
+ return (-1);
+
+ (void) metanamelist_append(&nlp, np);
+
+ if ((sd = metaget_setdesc(sp, ep)) == NULL) {
+ metafreenamelist(nlp);
+ return (-1);
+ }
+
+ if (meta_db_attach(sp, nlp, (MDCHK_DRVINSET | MDCHK_SET_LOCKED),
+ (&sd->sd_ctime), dbcnt, dbsize, NULL, ep) == -1) {
+ metafreenamelist(nlp);
+ return (-1);
+ }
+
+ metafreenamelist(nlp);
+ return (0);
+}
+
+static int
+del_replica(
+ mdsetname_t *sp,
+ mddrivename_t *dnp,
+ md_error_t *ep
+)
+{
+ mdnamelist_t *nlp = NULL;
+ mdname_t *np;
+ uint_t rep_slice;
+
+ if (meta_replicaslice(dnp, &rep_slice, ep) != 0)
+ return (-1);
+
+ if ((np = metaslicename(dnp, rep_slice, ep)) == NULL)
+ return (-1);
+
+ (void) metanamelist_append(&nlp, np);
+
+ if (meta_db_detach(sp, nlp, (MDFORCE_DS | MDFORCE_SET_LOCKED),
+ NULL, ep) == -1) {
+ metafreenamelist(nlp);
+ return (-1);
+ }
+
+ metafreenamelist(nlp);
+ return (0);
+}
+
+static int
+rep_has_err(md_replicalist_t *rlp, mdname_t *np)
+{
+ md_replicalist_t *rl;
+
+ for (rl = rlp; rl != NULL; rl = rl->rl_next) {
+ md_replica_t *r = rl->rl_repp;
+
+ if (strcmp(r->r_namep->cname, np->cname) != 0)
+ continue;
+
+ if (r->r_flags & (MDDB_F_EREAD | MDDB_F_EFMT | MDDB_F_EDATA |
+ MDDB_F_EMASTER | MDDB_F_EWRITE))
+ return (1);
+
+ }
+ return (0);
+}
+
+static int
+add_drv_to_ctl_lst(
+ md_ctlr_ctl_t **clpp,
+ md_replicalist_t *rlp,
+ mddrivename_t *dnp,
+ int dbcnt,
+ daddr_t dbsize,
+ mdcinfo_t *cinfop,
+ int indiskset,
+ int with_bus,
+ int errored,
+ md_error_t *ep
+)
+{
+ md_ctlr_drv_t **dpp;
+ mdname_t *np;
+ mdcinfo_t *tcinfop;
+ char *cmp_name_1,
+ *cmp_name_2;
+ int not_found;
+
+ /*
+ * The user must pass in a list head.
+ */
+ assert(clpp != NULL);
+
+ if (cinfop == NULL) {
+ uint_t rep_slice;
+
+ if (meta_replicaslice(dnp, &rep_slice, ep) != 0) {
+ /*
+ * A failure to get the slice information can occur
+ * because the drive has failed, if this is the
+ * case then there is nothing that can be done
+ * with this drive, so do not include it in the
+ * list of drives. Clear the error and return.
+ */
+ mdclrerror(ep);
+ return (0);
+ }
+
+ if ((np = metaslicename(dnp, rep_slice, ep)) == NULL)
+ return (-1);
+
+ if ((tcinfop = metagetcinfo(np, ep)) == NULL)
+ return (-1);
+
+ if (metagetvtoc(np, FALSE, NULL, ep) == NULL)
+ errored = 1;
+
+ if (rep_has_err(rlp, np))
+ errored = 1;
+ } else
+ tcinfop = cinfop;
+
+ for (/* void */; *clpp != NULL; clpp = &(*clpp)->ctl_next) {
+ /*
+ * Try to locate ctlr.
+ */
+ (void) sdssc_convert_cluster_path(tcinfop->cname, &cmp_name_1);
+ (void) sdssc_convert_cluster_path((*clpp)->ctl_cinfop->cname,
+ &cmp_name_2);
+
+ if (tcinfop->ctype != (*clpp)->ctl_cinfop->ctype ||
+ tcinfop->cnum != (*clpp)->ctl_cinfop->cnum ||
+ strncmp(cmp_name_1, cmp_name_2, 16) != 0 ||
+ (with_bus && tcinfop->bus != (*clpp)->ctl_cinfop->bus)) {
+ not_found = 1;
+ } else
+ not_found = 0;
+
+
+ sdssc_convert_path_free(cmp_name_1);
+ sdssc_convert_path_free(cmp_name_2);
+
+ if (not_found)
+ continue;
+
+ /*
+ * Found ctlr, try to locate the drive.
+ */
+ for (dpp = &(*clpp)->ctl_drvs; *dpp != NULL;
+ dpp = &(*dpp)->drv_next) {
+ (void) sdssc_convert_cluster_path(
+ (*dpp)->drv_dnp->cname, &cmp_name_1);
+ (void) sdssc_convert_cluster_path(dnp->cname,
+ &cmp_name_2);
+
+ not_found = strcmp(cmp_name_1, cmp_name_2);
+
+ sdssc_convert_path_free(cmp_name_1);
+ sdssc_convert_path_free(cmp_name_2);
+
+ if (not_found)
+ continue;
+
+ /*
+ * Found drive, must be deleting.
+ */
+ (*dpp)->drv_op = DRV_DEL;
+ if (indiskset)
+ (*dpp)->drv_flags |= DRV_F_INDISKSET;
+ if (errored) {
+ mdclrerror(ep);
+ (*dpp)->drv_flags |= DRV_F_ERROR;
+ }
+ (*clpp)->ctl_dbcnt -= (*dpp)->drv_dbcnt;
+ (*clpp)->ctl_drcnt--;
+ return (0);
+ }
+ /*
+ * The ctlr was found, but not the drive, so add
+ * the drive
+ */
+ (*dpp) = Zalloc(sizeof (**dpp));
+
+
+ if (indiskset) {
+ (*dpp)->drv_op = DRV_NOP;
+ (*dpp)->drv_flags |= DRV_F_INDISKSET;
+ if (errored) {
+ mdclrerror(ep);
+ (*dpp)->drv_flags |= DRV_F_ERROR;
+ }
+ } else {
+ (*dpp)->drv_op = DRV_ADD;
+ if (errored) {
+ (*dpp)->drv_flags |= DRV_F_ERROR;
+ return (-1);
+ }
+ assert(dbsize != 0);
+ }
+ (*dpp)->drv_dbcnt = dbcnt;
+ (*dpp)->drv_dbsize = dbsize;
+ (*dpp)->drv_dnp = dnp;
+ (*clpp)->ctl_dbcnt += dbcnt;
+ (*clpp)->ctl_drcnt++;
+ return (0);
+ }
+ /*
+ * No ctlr was located, so add the ctlr, then recurse to add the
+ * drive to the ctlr.
+ */
+ (*clpp) = Zalloc(sizeof (**clpp));
+
+ (*clpp)->ctl_cinfop = tcinfop;
+
+ return (add_drv_to_ctl_lst(clpp, rlp, dnp, dbcnt, dbsize, tcinfop,
+ indiskset, with_bus, errored, ep));
+}
+
+static int
+add_replica_to_ctl(
+ mdsetname_t *sp,
+ md_ctlr_ctl_t *c,
+ int minimum_replicas,
+ md_error_t *ep
+)
+{
+ md_ctlr_drv_t *d;
+ int maxdb = 0;
+
+ /*
+ * If this ctrl has no "usable" drives, assert() or just return if
+ * assert()'s are turned off.
+ */
+ if (c->ctl_drcnt == 0) {
+ assert(0);
+ return (0);
+ }
+
+ /*
+ * Determine the largest DB count on a drive.
+ */
+ for (d = c->ctl_drvs; d != NULL; d = d->drv_next)
+ if (d->drv_dbcnt > maxdb && d->drv_op != DRV_DEL)
+ maxdb = d->drv_dbcnt;
+
+ /*
+ * Make sure we start at a reasonable number
+ */
+ if (maxdb == 0)
+ maxdb = 1;
+
+ /*
+ * Add a replica to a drive on this ctrl.
+ */
+ /*CONSTCOND*/
+ while (1) {
+ for (d = c->ctl_drvs; d != NULL; d = d->drv_next) {
+ /*
+ * If this drive is being deleted, skip it.
+ */
+ if (d->drv_op == DRV_DEL)
+ continue;
+
+ if (d->drv_flags & DRV_F_ERROR)
+ continue;
+ /*
+ * Make sure that the replicas are distributed across
+ * the drives.
+ */
+ if (d->drv_dbcnt >= maxdb)
+ continue;
+ /*
+ * See if the drive already has replicas,
+ * if it does, then delete the exisiting
+ * replica(s) and re-add n+1 replicas to the drive.
+ */
+ /* ==== Vulnerability - no DB's start ==== */
+ if (d->drv_dbcnt > 0) {
+ if (del_replica(sp, d->drv_dnp, ep) == -1) {
+ d->drv_flags |= DRV_F_ERROR;
+ if (! (d->drv_flags & DRV_F_INDISKSET))
+ return (-1);
+ mdclrerror(ep);
+ continue;
+ }
+ }
+ if (add_replica(sp, d->drv_dnp, (d->drv_dbcnt + 1),
+ d->drv_dbsize, ep) == -1) {
+ if (d->drv_dbcnt) {
+ c->ctl_dbcnt -= d->drv_dbcnt;
+ d->drv_dbcnt = 0;
+ }
+
+ if (mdismddberror(ep, MDE_TOOMANY_REPLICAS))
+ return (-1);
+
+ if (mdismddberror(ep, MDE_REPLICA_TOOSMALL))
+ return (-1);
+
+ d->drv_flags |= DRV_F_ERROR;
+ if (! (d->drv_flags & DRV_F_INDISKSET))
+ return (-1);
+ mdclrerror(ep);
+ continue;
+ }
+
+ d->drv_dbcnt++;
+ c->ctl_dbcnt++;
+ /* ==== Vulnerability - no DB's end ==== */
+ return (1);
+ }
+ maxdb++;
+ if (maxdb > minimum_replicas)
+ return (0);
+ }
+ /*NOTREACHED*/
+}
+
+static int
+del_replica_from_ctl(
+ mdsetname_t *sp,
+ md_ctlr_ctl_t *c,
+ md_error_t *ep
+)
+{
+ md_ctlr_drv_t *d;
+ int maxdb = 0;
+
+ /*
+ * If this ctrl has no "usable" drives, assert() or just return if
+ * assert()'s are turned off.
+ */
+ if (c->ctl_drcnt == 0) {
+ assert(0);
+ return (0);
+ }
+
+ /*
+ * Determine the largest DB count on a drive.
+ */
+ for (d = c->ctl_drvs; d != NULL; d = d->drv_next)
+ if (d->drv_dbcnt > maxdb && d->drv_op != DRV_DEL)
+ maxdb = d->drv_dbcnt;
+
+ if (maxdb == 0)
+ return (0);
+
+ /*
+ * Delete a replica from a drive on this ctrl.
+ */
+ /*CONSTCOND*/
+ while (1) {
+ for (d = c->ctl_drvs; d != NULL; d = d->drv_next) {
+ /*
+ * If this drive is being deleted, skip it.
+ */
+ if (d->drv_op == DRV_DEL)
+ continue;
+
+ /*
+ * Make sure that there are replicas on this drive to
+ * delete.
+ */
+ if (d->drv_dbcnt == 0)
+ continue;
+
+ if (d->drv_flags & DRV_F_ERROR)
+ continue;
+
+ /*
+ * We need to keep the DB's distributed across the
+ * drives.
+ */
+ if (d->drv_dbcnt < maxdb)
+ continue;
+
+ /*
+ * Delete all the replicas on the drive.
+ */
+ /* ==== Vulnerability - no DB's start ==== */
+ if (del_replica(sp, d->drv_dnp, ep) == -1) {
+ d->drv_flags |= DRV_F_ERROR;
+ if (! (d->drv_flags & DRV_F_INDISKSET))
+ return (-1);
+ mdclrerror(ep);
+ continue;
+ }
+ d->drv_dbcnt--;
+ c->ctl_dbcnt--;
+ /*
+ * If there is still a dbcnt for this drive, then add
+ * back the needed DB's.
+ */
+ if (d->drv_dbcnt > 0) {
+ if (add_replica(sp, d->drv_dnp, d->drv_dbcnt,
+ d->drv_dbsize, ep) == -1) {
+ c->ctl_dbcnt -= d->drv_dbcnt;
+ d->drv_dbcnt = 0;
+
+ if (mdismddberror(ep,
+ MDE_TOOMANY_REPLICAS))
+ return (-1);
+
+ d->drv_flags |= DRV_F_ERROR;
+ if (! (d->drv_flags & DRV_F_INDISKSET))
+ return (-1);
+ mdclrerror(ep);
+ continue;
+ }
+ }
+ /* ==== Vulnerability - no DB's end ==== */
+ return (1);
+ }
+ maxdb--;
+ if (maxdb <= 0)
+ return (0);
+ }
+ /*NOTREACHED*/
+}
+
+static int
+del_replicas(mdsetname_t *sp, md_ctlr_ctl_t *clp, md_error_t *ep)
+{
+ md_ctlr_ctl_t *c;
+ md_ctlr_drv_t *d;
+ mdnamelist_t *nlp;
+ mdname_t *np;
+
+ for (c = clp; c != NULL; c = c->ctl_next) {
+ for (d = c->ctl_drvs; d != NULL; d = d->drv_next) {
+ uint_t rep_slice;
+
+ if (! (d->drv_flags & DRV_F_ERROR) &&
+ (d->drv_op != DRV_DEL))
+ continue;
+
+ if (d->drv_dbcnt == 0)
+ continue;
+
+ if (meta_replicaslice(d->drv_dnp,
+ &rep_slice, ep) != 0)
+ return (-1);
+
+ np = metaslicename(d->drv_dnp, rep_slice, ep);
+ if (np == NULL)
+ return (-1);
+
+ nlp = NULL;
+ (void) metanamelist_append(&nlp, np);
+
+ /*
+ * Delete the replicas listed.
+ */
+ if (meta_db_detach(sp, nlp,
+ (MDFORCE_DS | MDFORCE_SET_LOCKED), NULL,
+ ep) == -1) {
+ metafreenamelist(nlp);
+ if (d->drv_flags & DRV_F_INDISKSET) {
+ mdclrerror(ep);
+ continue;
+ }
+ return (-1);
+ }
+ metafreenamelist(nlp);
+ }
+ }
+
+ return (0);
+}
+
+static void
+free_ctlr_lst(md_ctlr_ctl_t **clpp)
+{
+ md_ctlr_ctl_t *c, *tc = NULL;
+ md_ctlr_drv_t *d, *td = NULL;
+
+ for (c = *clpp; c != NULL; c = tc) {
+ tc = c->ctl_next;
+ for (d = c->ctl_drvs; d != NULL; d = td) {
+ td = d->drv_next;
+ Free(d);
+ }
+ Free(c);
+ }
+ *clpp = NULL;
+}
+
+static int
+build_ctlr_lst(
+ mdsetname_t *sp,
+ md_ctlr_ctl_t **clpp,
+ md_drive_desc *opdd,
+ md_drive_desc *curdd,
+ int with_bus,
+ daddr_t dbsize,
+ md_error_t *ep
+)
+{
+ md_drive_desc *d;
+ md_set_desc *sd;
+ daddr_t nblks;
+ md_replicalist_t *rlp = NULL;
+ static daddr_t min_dbsize = 0;
+
+ if (min_dbsize == 0) {
+ if ((nblks = meta_db_minreplica(sp, ep)) < 0) {
+ min_dbsize = MD_DBSIZE;
+
+ if (! metaislocalset(sp)) {
+ if ((sd = metaget_setdesc(sp, ep)) == NULL)
+ return (-1);
+
+ if (MD_MNSET_DESC(sd))
+ min_dbsize = MD_MN_DBSIZE;
+ }
+ mdclrerror(ep);
+ } else
+ min_dbsize = nblks;
+ }
+
+ if (metareplicalist(sp, MD_BASICNAME_OK, &rlp, ep) < 0) {
+ if (! mdismddberror(ep, MDE_DB_NODB) &&
+ ! mdismddberror(ep, MDE_DB_NOTOWNER))
+ return (-1);
+ mdclrerror(ep);
+ }
+
+ /*
+ * Add drives currently in the set to the ctlr list.
+ */
+ for (d = curdd; d != NULL; d = d->dd_next) {
+ daddr_t this_dbsize = d->dd_dbsize;
+
+ if (this_dbsize == 0)
+ this_dbsize = min_dbsize;
+
+ if (add_drv_to_ctl_lst(clpp, rlp, d->dd_dnp, d->dd_dbcnt,
+ this_dbsize, NULL, TRUE, with_bus, 0, ep) == -1)
+ return (-1);
+ }
+
+ /*
+ * Add the drives that are being operated on to the ctlr list.
+ */
+ for (d = opdd; d != NULL; d = d->dd_next)
+ if (add_drv_to_ctl_lst(clpp, rlp, d->dd_dnp, 0, dbsize, NULL,
+ FALSE, with_bus, 0, ep) == -1)
+ return (-1);
+
+ metafreereplicalist(rlp);
+ return (0);
+}
+
+static int
+count_replica_on_ctl(
+ md_ctlr_ctl_t *c,
+ int adding,
+ int *db_cnt,
+ int minimum_replicas
+)
+{
+ md_ctlr_drv_t *d;
+ int maxdb = 0;
+
+ /*
+ * If this ctrl has no "usable" drives, nothing to do.
+ */
+ if (c->ctl_drcnt == 0)
+ return (0);
+
+ /*
+ * Determine the largest DB count on a drive.
+ */
+ for (d = c->ctl_drvs; d != NULL; d = d->drv_next)
+ if (d->drv_new_dbcnt > maxdb && d->drv_op != DRV_DEL)
+ maxdb = d->drv_new_dbcnt;
+
+ /*
+ * Make sure we start at a reasonable number
+ */
+ if (maxdb == 0) {
+ if (!adding)
+ return (0);
+ maxdb = 1;
+ }
+
+ /*
+ * Count or Un-Count replicas that would be
+ * added or deleted respectively.
+ */
+ /*CONSTCOND*/
+ while (1) {
+ for (d = c->ctl_drvs; d != NULL; d = d->drv_next) {
+ /*
+ * If this drive is being deleted, skip it.
+ */
+ if (d->drv_op == DRV_DEL)
+ continue;
+
+ /*
+ * If the drive is errored and adding, skip it.
+ */
+ if (adding && (d->drv_flags & DRV_F_ERROR))
+ continue;
+
+ /*
+ * Make sure that the replicas are distributed across
+ * the drives.
+ */
+ if (adding) {
+ if (d->drv_new_dbcnt >= maxdb)
+ continue;
+ } else {
+ if (d->drv_new_dbcnt == 0)
+ continue;
+ if (d->drv_new_dbcnt < maxdb)
+ continue;
+ }
+
+ /*
+ * Count or Un-Count replicas here.
+ */
+ if (adding) {
+ mdpart_t *partp;
+ uint_t rep_slice;
+ md_error_t mde;
+
+ if (meta_replicaslice(d->drv_dnp,
+ &rep_slice, &mde) != 0)
+ continue;
+
+ partp = &d->drv_dnp->vtoc.parts[rep_slice];
+ if (! partp)
+ continue;
+
+ if (((d->drv_new_dbcnt + 1) * d->drv_dbsize) >
+ (partp->size - 16))
+ continue;
+ (*db_cnt)++;
+ d->drv_new_dbcnt++;
+ } else {
+ (*db_cnt)--;
+ d->drv_new_dbcnt--;
+ }
+ return (0);
+ }
+
+ /*
+ * This should make sure they get spread
+ * around. This is to emulate the {add,del}_replica
+ * routines.
+ */
+ if (adding) {
+ maxdb++;
+ if (maxdb > minimum_replicas)
+ return (-1);
+ } else {
+ maxdb--;
+ if (maxdb <= 0)
+ return (-1);
+ }
+ }
+ /*NOTREACHED*/
+}
+
+static int
+count_replicas(
+ md_ctlr_ctl_t *clp,
+ int min_reps
+)
+{
+ md_ctlr_ctl_t *c;
+ md_ctlr_drv_t *d;
+ int db_cnt;
+ int uctlrs = 0;
+ int total_cnt = 0;
+
+ /*
+ * Count the number of controllers,
+ * counting the replicas is slightly different based
+ * on the controller count.
+ */
+ for (c = clp; c != NULL; c = c->ctl_next)
+ if (c->ctl_drcnt > 0) {
+ uctlrs++;
+ for (d = c->ctl_drvs; d != NULL; d = d->drv_next)
+ d->drv_new_dbcnt = d->drv_dbcnt;
+ }
+
+ if (uctlrs > 2) {
+ for (c = clp; c != NULL; c = c->ctl_next) {
+ if (c->ctl_drcnt == 0)
+ continue;
+
+ db_cnt = c->ctl_dbcnt;
+ /*
+ * Count the replicas that would be added.
+ */
+ while (db_cnt < min_reps)
+ if (count_replica_on_ctl(c, TRUE,
+ &db_cnt, min_reps))
+ return (-1);
+
+ /*
+ * Un-Count the replicas that would be deleted.
+ */
+ while (db_cnt > min_reps)
+ if (count_replica_on_ctl(c, FALSE,
+ &db_cnt, min_reps))
+ return (-1);
+ total_cnt += db_cnt;
+ }
+ } else {
+ for (c = clp; c != NULL; c = c->ctl_next) {
+ if (c->ctl_drcnt == 0)
+ continue;
+
+ db_cnt = c->ctl_dbcnt;
+ /*
+ * Count the replicas that woud be added.
+ */
+ while (db_cnt < (min_reps * c->ctl_drcnt))
+ if (count_replica_on_ctl(c, TRUE,
+ &db_cnt, min_reps))
+ return (-1);
+
+ total_cnt += db_cnt;
+ }
+ }
+
+ return (total_cnt);
+}
+
+static int
+balance_replicas(
+ mdsetname_t *sp,
+ md_ctlr_ctl_t **clpp,
+ md_drive_desc *opdd,
+ md_drive_desc *curdd,
+ daddr_t dbsize,
+ int *minimum_replicas,
+ md_error_t *ep
+)
+{
+ int n;
+ int rctlrs = 0;
+ int uctlrs;
+ int ructlrs;
+ int octlrs;
+ int save_done;
+ int prevcnt = 0, issame = 1;
+ uint_t drvcnt = ~0U;
+ uint_t save_cnum;
+ mhd_ctlrtype_t save_ctype;
+ char save_cname[16],
+ *cmp_name_1,
+ *cmp_name_2;
+ int reps;
+ md_ctlr_ctl_t *c;
+
+ /*
+ * Build a ctlr list with SSA-100 busses NOT as separate controllers.
+ */
+ if (build_ctlr_lst(sp, clpp, opdd, curdd, FALSE, dbsize, ep) == -1)
+ return (-1);
+
+ /*
+ * Determine what controllers are usable in the sense of being able to
+ * add a replica to a drive on the controller.
+ * Also find the minimum number of drives on a controller.
+ */
+ for (c = *clpp; c != NULL; c = c->ctl_next) {
+ if (c->ctl_drcnt > 0) {
+ rctlrs++;
+ drvcnt = min(drvcnt, c->ctl_drcnt);
+ if (prevcnt == 0)
+ prevcnt = c->ctl_drcnt;
+ else if (prevcnt != c->ctl_drcnt)
+ issame = 0;
+ }
+ }
+
+ if ((rctlrs <= 2) || (issame && (drvcnt >= 30)))
+ goto cont;
+
+ /*
+ * If here: Handling 3 or more controllers most
+ * likely with non-symmetrical number of
+ * disks. The number of replicas will be
+ * the minimum number of disks on a controller.
+ *
+ * The main point is to insure that a
+ * controller does not have more than half
+ * of the replicas.
+ */
+ drvcnt = min(drvcnt, 12);
+ drvcnt = max(drvcnt, MD_MINBALREP);
+
+ /*
+ * Can we find fewer than the maximum replicas by reducing the
+ * number of replicas per drive.
+ */
+ for (n = drvcnt; n > 0; n--) {
+ reps = count_replicas(*clpp, n);
+ if (reps > 0 && reps <= MDDB_NLB) {
+ *minimum_replicas = n;
+ return (0);
+ }
+ }
+
+cont:
+ free_ctlr_lst(clpp);
+
+ /*
+ * Build a ctlr list with SSA-100 busses as separate controllers.
+ *
+ * If Here: Try to put 2 replicas per controller/bus
+ * If that doesn't work put 1 replica per controller/bus
+ */
+ if (build_ctlr_lst(sp, clpp, opdd, curdd, TRUE, dbsize, ep) == -1)
+ return (-1);
+
+ /*
+ * If the number of "real" controllers is 2, special handling may be
+ * needed.
+ */
+ if (rctlrs != 2) {
+ drvcnt = MD_MINBALREP;
+ goto other;
+ }
+
+ /*
+ * Determine what controllers are usable in the sense of being able to
+ * add a replica to a drive on the controller.
+ * Also find the minimum number of drives on a controller.
+ */
+ drvcnt = ~0U;
+ uctlrs = 0;
+ for (c = *clpp; c != NULL; c = c->ctl_next) {
+ if (c->ctl_drcnt > 0) {
+ uctlrs++;
+ drvcnt = min(drvcnt, c->ctl_drcnt);
+ }
+ }
+
+ /*
+ * If the number of controllers is not changed, continue with original
+ * strategy.
+ */
+ if (uctlrs == rctlrs) {
+ drvcnt = MD_MINBALREP;
+ goto other;
+ }
+
+ /*
+ * Check the distribution of bus ctlrs across real controllers.
+ */
+ ructlrs = 0;
+ octlrs = 0;
+ save_done = 0;
+ for (c = *clpp; c != NULL; c = c->ctl_next) {
+ if (c->ctl_drcnt == 0)
+ continue;
+
+ if (! save_done) {
+ save_cnum = c->ctl_cinfop->cnum;
+ save_ctype = c->ctl_cinfop->ctype;
+ (void) strncpy(save_cname, c->ctl_cinfop->cname, 16);
+ save_done = 1;
+ }
+
+ (void) sdssc_convert_cluster_path(c->ctl_cinfop->cname,
+ &cmp_name_1);
+ (void) sdssc_convert_cluster_path(save_cname, &cmp_name_2);
+
+ if (save_ctype != c->ctl_cinfop->ctype ||
+ save_cnum != c->ctl_cinfop->cnum ||
+ strncmp(cmp_name_1, cmp_name_2, 16) != 0)
+ octlrs++;
+ else
+ ructlrs++;
+
+ sdssc_convert_path_free(cmp_name_1);
+ sdssc_convert_path_free(cmp_name_2);
+ }
+
+ /*
+ * Take the largest of the counts
+ */
+ ructlrs = max(ructlrs, octlrs);
+
+ /*
+ * If the distribution of bus controlers is half of the total, then
+ * this layout strategy will work, doit.
+ */
+ if ((uctlrs / 2) == ructlrs) {
+ drvcnt = MD_MINBALREP;
+ goto other;
+ }
+
+ /*
+ * If here, there is a distribution of bus controllers that will cause
+ * the real controller distribution to be unbalanced, so a different
+ * strategy is used.
+ */
+ free_ctlr_lst(clpp);
+
+ /*
+ * Build the ctlr list with SSA-100 busses NOT as separate controllers.
+ */
+ if (build_ctlr_lst(sp, clpp, opdd, curdd, FALSE, dbsize, ep) == -1)
+ return (-1);
+
+ /*
+ * Make ctl_drcnt limit the number of replicas
+ */
+ for (c = *clpp; c != NULL; c = c->ctl_next)
+ c->ctl_drcnt = min(drvcnt, c->ctl_drcnt);
+
+ /*
+ * Try at least MD_MINBALREP's per controller after changing ctl_drcnt
+ */
+ drvcnt = MD_MINBALREP;
+
+other:
+ /*
+ * Can we find fewer than the maximum replicas by reducing the number
+ * of replicas per drive.
+ */
+ for (n = drvcnt; n > 0; n--) {
+ reps = count_replicas(*clpp, n);
+ if (reps > 0 && reps <= MDDB_NLB) {
+ *minimum_replicas = n;
+ return (0);
+ }
+ }
+
+ free_ctlr_lst(clpp);
+
+ /*
+ * Build a ctlr list with SSA-100 busses NOT as separate controllers.
+ *
+ * If Here: Try to put 2 replicas per controller (not on busses)
+ * If that doesn't work put 1 replica per controller
+ */
+ if (build_ctlr_lst(sp, clpp, opdd, curdd, FALSE, dbsize, ep) == -1)
+ return (-1);
+
+ /*
+ * Can we find fewer than the maximum replicas by reducing the
+ * number of replicas per drive.
+ */
+ for (n = MD_MINBALREP; n > 0; n--) {
+ reps = count_replicas(*clpp, n);
+ if (reps > 0 && reps <= MDDB_NLB) {
+ *minimum_replicas = n;
+ return (0);
+ }
+ }
+
+ /*
+ * Return a ctrl list that does not include the SSA-100 buses as
+ * separate controllers. This will create fewer separate controllers.
+ */
+ *minimum_replicas = 1;
+ return (0);
+}
+
+static int
+morethan2_ctl_balance(
+ mdsetname_t *sp,
+ md_ctlr_ctl_t *clp,
+ int min_reps,
+ md_error_t *ep
+)
+{
+ md_ctlr_ctl_t *c;
+ int err;
+
+ for (c = clp; c != NULL; c = c->ctl_next) {
+ if (c->ctl_drcnt == 0)
+ continue;
+
+ while (c->ctl_dbcnt < min_reps) {
+ if ((err = add_replica_to_ctl(sp, c, min_reps, ep)) < 0)
+ return (-1);
+ if (err == 0)
+ break;
+ }
+
+ while (c->ctl_dbcnt > min_reps) {
+ if ((err = del_replica_from_ctl(sp, c, ep)) < 0)
+ return (-1);
+ if (err == 0)
+ break;
+ }
+ }
+
+ return (0);
+}
+
+static int
+lessthan3_ctl_balance(
+ mdsetname_t *sp,
+ md_ctlr_ctl_t *clp,
+ int min_reps,
+ md_error_t *ep
+)
+{
+ md_ctlr_ctl_t *c;
+ int err;
+
+ for (c = clp; c != NULL; c = c->ctl_next) {
+ if (c->ctl_drcnt == 0)
+ continue;
+
+ while (c->ctl_dbcnt < (min_reps * c->ctl_drcnt)) {
+ if ((err = add_replica_to_ctl(sp, c, min_reps, ep)) < 0)
+ return (-1);
+ if (err == 0)
+ break;
+ }
+
+ while (c->ctl_dbcnt > (min_reps * c->ctl_drcnt)) {
+ if ((err = del_replica_from_ctl(sp, c, ep)) < 0)
+ return (-1);
+ if (err == 0)
+ break;
+ }
+ }
+
+ return (0);
+}
+
+static int
+try_again(
+ md_ctlr_ctl_t *clp,
+ md_error_t *ep
+)
+{
+ md_ctlr_ctl_t *c;
+ md_ctlr_drv_t *d;
+
+ if (mdismddberror(ep, MDE_TOOMANY_REPLICAS))
+ return (TRUE);
+
+ /*
+ * retry if all the errored drives are already in the diskset.
+ */
+ for (c = clp; c != NULL; c = c->ctl_next) {
+ for (d = c->ctl_drvs; d != NULL; d = d->drv_next) {
+ if ((d->drv_flags & (DRV_F_INDISKSET|DRV_F_ERROR))
+ == DRV_F_ERROR)
+ return (FALSE);
+ }
+ }
+ return (TRUE);
+}
+
+int
+meta_db_balance(
+ mdsetname_t *sp,
+ md_drive_desc *opdd,
+ md_drive_desc *curdd,
+ daddr_t dbsize,
+ md_error_t *ep
+)
+{
+ int min_reps;
+ md_ctlr_ctl_t *c, *cl = NULL;
+ int uctlrs = 0;
+ int retry = 0;
+ int rval = 0;
+
+ if (balance_replicas(sp, &cl, opdd, curdd, dbsize, &min_reps, ep) == -1)
+ return (-1);
+
+ /*
+ * Determine what controllers are usable in the sense of being able to
+ * add a replica to a drive on the controller.
+ */
+ for (c = cl; c != NULL; c = c->ctl_next)
+ if (c->ctl_drcnt > 0)
+ uctlrs++;
+
+ /*
+ * Add replicas to achieve a balance.
+ */
+ if (uctlrs > 2)
+ rval = morethan2_ctl_balance(sp, cl, min_reps, ep);
+ else
+ rval = lessthan3_ctl_balance(sp, cl, min_reps, ep);
+
+ if (rval) {
+ if ((retry = try_again(cl, ep)) == TRUE) {
+ mdclrerror(ep);
+ rval = 0;
+ }
+ }
+
+ /*
+ * Delete all the replicas from drives that are so marked.
+ */
+ if (! rval)
+ rval = del_replicas(sp, cl, ep);
+
+ if (retry) {
+ if (uctlrs > 2)
+ rval = morethan2_ctl_balance(sp, cl, min_reps, ep);
+ else
+ rval = lessthan3_ctl_balance(sp, cl, min_reps, ep);
+
+ if (rval && mdismddberror(ep, MDE_TOOMANY_REPLICAS)) {
+ mdclrerror(ep);
+ rval = 0;
+ }
+ }
+
+ /*
+ * Free up the ctlr list.
+ */
+ free_ctlr_lst(&cl);
+
+ return (rval);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_devadm.c b/usr/src/lib/lvm/libmeta/common/meta_devadm.c
new file mode 100644
index 0000000000..a30789a72e
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_devadm.c
@@ -0,0 +1,1607 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <stdio.h>
+#include <stdarg.h>
+#include <ctype.h>
+#include <sys/fcntl.h>
+#include <sys/types.h>
+#include <devid.h>
+#include <ftw.h>
+#include <string.h>
+#include <mdiox.h>
+#include <sys/lvm/mdio.h>
+#include <meta.h>
+#include <syslog.h>
+#include <sdssc.h>
+#include "meta_set_prv.h"
+
+/*
+ * Just in case we're not in a build environment, make sure that
+ * TEXT_DOMAIN gets set to something.
+ */
+#if !defined(TEXT_DOMAIN)
+#define TEXT_DOMAIN "SYS_TEST"
+#endif
+
+#define RAW_PATH 0x001 /* rdsk */
+#define BLOCK_PATH 0x002 /* dsk */
+#define DSK_TYPE 0x004 /* normal /dev/[r]dsk */
+#define TEST_TYPE 0x008 /* test driver path */
+#define DID_TYPE 0x010 /* cluster did path */
+#define AP_TYPE 0x020 /* should be obsolete */
+
+typedef struct path_list {
+ char *search_path;
+ char *search_type;
+ int path_type;
+} path_list_t;
+
+/*
+ * A table of the supported path types - this should ideally be generated
+ * by reading the /etc/lvm/devpath file
+ */
+static path_list_t plist[] = {
+ {"/dev/rdsk", DEVID_MINOR_NAME_ALL_CHR, RAW_PATH|DSK_TYPE},
+ {"/dev/dsk", DEVID_MINOR_NAME_ALL_BLK, BLOCK_PATH|DSK_TYPE},
+ {"/dev/did/rdsk", DEVID_MINOR_NAME_ALL_CHR, RAW_PATH|DID_TYPE},
+ {"/dev/did/dsk", DEVID_MINOR_NAME_ALL_BLK, BLOCK_PATH|DID_TYPE},
+ {"/dev/td/dsk", DEVID_MINOR_NAME_ALL_BLK, BLOCK_PATH|TEST_TYPE},
+ {"/dev/td/rdsk", DEVID_MINOR_NAME_ALL_CHR, RAW_PATH|TEST_TYPE},
+};
+static int num = sizeof (plist)/sizeof (path_list_t);
+
+static mddevopts_t dev_options = 0;
+
+/* indicate whether to print an error message or not */
+static int firsttime = 1;
+
+#define DEV_MATCH 0x1
+#define NAME_MATCH 0x2
+
+#define DEBUGON 1
+#define DEBUGOFF 2
+
+/*
+ * Debug function: to turn on devadm function debugging include DEVADM
+ * in the MD_DEBUG enviroment variable: MD_DEBUG=...,DEVADM...
+ */
+/*PRINTFLIKE1*/
+static void
+mda_debug(char *format, ...)
+{
+ char *p;
+ static int debug_set = 0;
+ va_list ap;
+
+ if (debug_set == 0) {
+ if (((p = getenv("MD_DEBUG")) != NULL) &&
+ (strstr(p, "DEVADM") != NULL))
+ debug_set = DEBUGON;
+ else
+ debug_set = DEBUGOFF;
+ }
+ if (debug_set == DEBUGON) {
+ va_start(ap, format);
+ (void) vfprintf(stderr, format, ap);
+ va_end(ap);
+ }
+}
+
+/* print error messages to the terminal or syslog */
+/*PRINTFLIKE1*/
+static void
+mda_print(char *message, ...)
+{
+ va_list ap;
+
+ va_start(ap, message);
+ if (dev_options & DEV_LOG) {
+ /*
+ * The program is a daemon in the sense that it
+ * is a system utility.
+ */
+ (void) vsyslog((LOG_ERR | LOG_DAEMON), message, ap);
+ } else {
+ (void) vfprintf(stderr, message, ap);
+ }
+ va_end(ap);
+}
+
+/*
+ * Utility to find the correct options to use for the devid search
+ * based upon the path of the device.
+ *
+ * RETURN:
+ * -1 Error, the path passed in is not in the table
+ * >= 0 The element number for the options within the table
+ */
+static int
+mda_findpath(char *path)
+{
+ int i = 0;
+
+ for (i = 0; i < num; i++) {
+ if (strncmp(plist[i].search_path, path,
+ strlen(plist[i].search_path)) == 0)
+ return (i);
+ }
+ return (-1);
+}
+
+/*
+ * Utility to get the path of a device
+ */
+static char *
+mda_getpath(char *devname)
+{
+ char *ptr;
+ char *pathname;
+ size_t len;
+
+ if ((ptr = strrchr(devname, '/')) == NULL) {
+ mda_debug("Invalid format: %s\n", devname);
+ return (NULL);
+ }
+ ptr++;
+ len = strlen(devname) - strlen(ptr);
+ pathname = Malloc(len + 1);
+ (void) strncpy(pathname, devname, len);
+ pathname[len] = '\0';
+ return (pathname);
+}
+
+/*
+ * update_locator_namespace -- Contains the ioctl call that will update
+ * the ctds and pathname (ie. /dev/dsk etc) within the
+ * locator block namespace.
+ *
+ * RETURN
+ * METADEVADM_ERR ioctl failed and ep is updated with the error
+ * METADEVADM_SUCCESS success
+ */
+static int
+update_locator_namespace(
+ set_t setno,
+ side_t sideno,
+ char *devname,
+ md_dev64_t dev,
+ char *pname,
+ md_error_t *ep
+)
+{
+ mdnm_params_t nm;
+
+ (void) memset(&nm, '\0', sizeof (nm));
+ nm.mde = mdnullerror;
+ nm.setno = setno;
+ nm.side = sideno;
+ nm.devname = (uintptr_t)devname;
+ nm.devname_len = strlen(devname);
+ nm.devt = dev;
+ nm.pathname = (uintptr_t)pname;
+ nm.pathname_len = strlen(pname);
+ if (metaioctl(MD_IOCUPD_LOCNM, &nm, &nm.mde, NULL) != 0) {
+ (void) mdstealerror(ep, &nm.mde);
+ return (METADEVADM_ERR);
+ }
+ return (METADEVADM_SUCCESS);
+}
+
+/*
+ * update_namespace -- Contains the ioctl call that will update the
+ * device name and pathname in the namespace area.
+ *
+ * RETURN
+ * METADEVADM_ERR ioctl failed and ep is updated with the error
+ * METADEVADM_SUCCESS success
+ */
+static int
+update_namespace(
+ set_t setno,
+ side_t sideno,
+ char *devname,
+ md_dev64_t dev,
+ mdkey_t key,
+ char *pname,
+ md_error_t *ep
+)
+{
+ mdnm_params_t nm;
+
+ (void) memset(&nm, '\0', sizeof (nm));
+ nm.mde = mdnullerror;
+ nm.setno = setno;
+ nm.side = sideno;
+ nm.devname = (uintptr_t)devname;
+ nm.devname_len = strlen(devname);
+ nm.mnum = meta_getminor(dev);
+ nm.key = key;
+ nm.pathname = (uintptr_t)pname;
+ nm.pathname_len = strlen(pname);
+ if (metaioctl(MD_IOCUPD_NM, &nm, &nm.mde, NULL) != 0) {
+ (void) mdstealerror(ep, &nm.mde);
+ return (METADEVADM_ERR);
+ }
+ return (METADEVADM_SUCCESS);
+}
+
+/*
+ * stripS - Strip s<digits> off the end of the ctds name if it exists
+ */
+static void
+stripS(char *name)
+{
+ char *p;
+
+ /* gobble number and 's' */
+ p = name + strlen(name) - 1;
+ for (; (p > name); --p) {
+ if (!isdigit(*p))
+ break;
+ }
+
+ if (*p == 's') {
+ *p = '\0';
+ }
+}
+
+/*
+ * getdiskname -- to be used when scanning the input from the -u arg.
+ * This routine will strip off input that is anything but cxtxdx.
+ * ie. it will call stripS to get rid of slice info. Will also
+ * strip off /dev/dsk, /dev/rdsk, /dev/ap/dsk, /dev/ap/rdsk,
+ * /dev/did/dsk, or /dev/did/rdsk. The caller will need to free
+ * the return value.
+ *
+ * RETURN
+ * string that has the disk name in it ie. c0t0d0
+ */
+static char *
+getdiskname(
+ char *name
+)
+{
+ char *p;
+ char *diskname;
+
+ /* regular device */
+ if ((strncmp(name, "/dev/dsk/", strlen("/dev/dsk/")) == 0) &&
+ (strchr((p = name + strlen("/dev/dsk/")), '/') == NULL)) {
+ diskname = Strdup(p);
+ stripS(diskname);
+ return (diskname);
+ }
+
+ if ((strncmp(name, "/dev/rdsk/", strlen("/dev/rdsk/")) == 0) &&
+ (strchr((p = name + strlen("/dev/rdsk/")), '/') == NULL)) {
+ diskname = Strdup(p);
+ stripS(diskname);
+ return (diskname);
+ }
+
+ if ((strncmp(name, "/dev/ap/dsk/", strlen("/dev/ap/dsk/")) == 0) &&
+ (strchr((p = name + strlen("/dev/ap/dsk/")), '/') == NULL)) {
+ diskname = Strdup(p);
+ stripS(diskname);
+ return (diskname);
+ }
+
+ if ((strncmp(name, "/dev/ap/rdsk/", strlen("/dev/ap/rdsk/")) == 0) &&
+ (strchr((p = name + strlen("/dev/ap/rdsk/")), '/') == NULL)) {
+ diskname = Strdup(p);
+ stripS(diskname);
+ return (diskname);
+ }
+
+ if ((strncmp(name, "/dev/did/dsk/", strlen("/dev/did/dsk/")) == 0) &&
+ (strchr((p = name + strlen("/dev/did/dsk/")), '/') == NULL)) {
+ diskname = Strdup(p);
+ stripS(diskname);
+ return (diskname);
+ }
+
+ if ((strncmp(name, "/dev/did/rdsk/", strlen("/dev/did/rdsk/")) == 0) &&
+ (strchr((p = name + strlen("/dev/did/rdsk/")), '/') == NULL)) {
+ diskname = Strdup(p);
+ stripS(diskname);
+ return (diskname);
+ }
+
+ diskname = Strdup(name);
+ stripS(diskname);
+ return (diskname);
+}
+
+/*
+ * has_devid -- return the device ID for a given key
+ *
+ * RETURN
+ * NULL error
+ * devid devid found that corresponds to the given key.
+ */
+static ddi_devid_t
+has_devid(set_t setno, side_t sideno, mdkey_t key, md_error_t *ep)
+{
+ return (meta_getdidbykey(setno, sideno, key, ep));
+}
+
+/*
+ * Go through the existing list of replicas and check to see
+ * if their disk has moved, if so update the replica list
+ *
+ * RETURN
+ * -1 error
+ * 0 success
+ */
+static int
+fix_replicanames(
+ mdsetname_t *sp,
+ md_error_t *ep
+)
+{
+ md_replicalist_t *rlp = NULL;
+ md_replicalist_t *rl;
+ int ret = -1;
+ int match_type = 0;
+ devid_nmlist_t *disklist = NULL;
+ dev_t small_dev = (dev_t)NODEV;
+ side_t sideno;
+ set_t setno = sp->setno;
+ char *search_path;
+ int search_number;
+ char *ctds_name;
+ char *path_name;
+ int i;
+
+ sideno = getmyside(sp, ep);
+ if (sideno == MD_SIDEWILD) {
+ mda_debug("Failed to find the side number\n");
+ return (-1);
+ }
+
+ if (metareplicalist(sp, MD_BASICNAME_OK | PRINT_FAST, &rlp, ep) < 0) {
+ mda_debug("Unable to get a list of replicas\n");
+ return (METADEVADM_ERR);
+ }
+
+ for (rl = rlp; (rl != NULL); rl = rl->rl_next) {
+ md_replica_t *r = rl->rl_repp;
+
+ small_dev = meta_cmpldev(r->r_namep->dev);
+ search_number = mda_findpath(r->r_namep->bname);
+ if (search_number == -1) {
+ mda_debug("replica update: invalid path: %s",
+ r->r_namep->bname);
+ continue;
+ } else {
+ search_path = plist[search_number].search_path;
+ }
+
+ if (r->r_devid == NULL)
+ continue;
+
+ ret = meta_deviceid_to_nmlist(search_path, r->r_devid,
+ r->r_minor_name, &disklist);
+
+ mda_debug("replica update: search_path %s\n", search_path);
+
+ if (ret != 0) {
+ /*
+ * Failed to find the disk, nothing can be done.
+ * The replica will be marked as bad later.
+ */
+ mda_debug("replica update: failed to find disk %s\n",
+ r->r_namep->cname);
+ continue;
+ }
+ mda_debug("replica update: current %s (%p)\n",
+ r->r_namep->bname, (void *) small_dev);
+
+ /*
+ * Check to see if the returned disk matches the stored one
+ */
+ for (i = 0; disklist[i].dev != NODEV; i++) {
+ match_type = 0;
+
+ mda_debug("replica update: devid list: %s (%p)\n",
+ disklist[i].devname, (void *) disklist[i].dev);
+
+ if (disklist[i].dev == small_dev) {
+ match_type |= DEV_MATCH;
+ }
+
+ if (strncmp(r->r_namep->bname, disklist[i].devname,
+ strlen(r->r_namep->bname)) == 0) {
+ match_type |= NAME_MATCH;
+ }
+
+ /*
+ * break out if some sort of match is found because
+ * we already match on the devid.
+ */
+ if (match_type != 0)
+ break;
+ }
+
+ mda_debug("fix_replicanames: match: %x i: %d\n", match_type, i);
+
+ if (match_type == (DEV_MATCH|NAME_MATCH)) {
+ /* no change */
+ mda_debug("replica update: no change %s\n",
+ disklist[i].devname);
+ devid_free_nmlist(disklist);
+ continue;
+ }
+
+ /* No match found - use the first entry in disklist */
+ if (disklist[i].dev == NODEV)
+ i = 0;
+
+ mda_debug("replica update: reloading %s %p\n",
+ disklist[i].devname,
+ (void *) meta_expldev(disklist[i].dev));
+
+ if (firsttime) {
+ mda_print(dgettext(TEXT_DOMAIN,
+ "Disk movement detected\n"));
+ mda_print(dgettext(TEXT_DOMAIN,
+ "Updating device names in Solaris Volume "
+ "Manager\n"));
+ firsttime = 0;
+ }
+
+ if (dev_options & DEV_VERBOSE) {
+ char *devidstr;
+
+ devidstr =
+ devid_str_encode(r->r_devid, r->r_minor_name);
+ if (devidstr == NULL) {
+ mda_print(dgettext(TEXT_DOMAIN,
+ "Failed to encode the devid\n"));
+ continue;
+ }
+ mda_print(dgettext(TEXT_DOMAIN,
+ "%s changed to %s from device relocation "
+ "information %s\n"),
+ (char *)r->r_namep->cname, disklist[i].devname,
+ devidstr);
+ }
+
+ if (!(dev_options & DEV_NOACTION)) {
+ mda_debug("Updating locator name\n");
+ ctds_name = strrchr(disklist[i].devname, '/');
+ ctds_name++;
+ if ((path_name = mda_getpath(disklist[i].devname))
+ == NULL) {
+ continue;
+ }
+ if (update_locator_namespace(setno, sideno,
+ ctds_name, meta_expldev(disklist[i].dev),
+ path_name, ep) != 0) {
+ mda_debug("replica update: ioctl failed\n");
+ if (dev_options & DEV_VERBOSE) {
+ mda_print(dgettext(TEXT_DOMAIN,
+ "Failed to update locator "
+ "namespace on change from %s "
+ "to %s\n"), ctds_name,
+ disklist[i].devname);
+ }
+ }
+ }
+ Free(path_name);
+ devid_free_nmlist(disklist);
+ }
+ metafreereplicalist(rlp);
+ return (0);
+}
+
+/*
+ * pathname_reload - main function for the -r option. Will reload the
+ * pathname in both the main namespace and the locator namespace.
+ * Also, checks both areas for invalid device ID's and prints them
+ * out.
+ *
+ * If the set is a multi-node diskset that means there are no devid's
+ * so just return.
+ *
+ * RETURN
+ * METADEVADM_ERR error
+ * METADEVADM_SUCCESS success
+ * METADEVADM_DEVIDINVALID success, but invalid devids detected
+ */
+int
+pathname_reload(
+ mdsetname_t **spp,
+ set_t setno,
+ md_error_t *ep)
+{
+ char *drvnmp;
+ minor_t mnum = 0;
+ md_dev64_t dev = 0;
+ mdnm_params_t nm;
+ char *ctds_name;
+ ddi_devid_t devidp;
+ md_i_didstat_t ds;
+ side_t sideno;
+ char *search_path = NULL;
+ int search_number;
+ devid_nmlist_t *disklist = NULL;
+ char *minor_name = NULL;
+ char *devidstr = NULL;
+ char *path = NULL;
+ int ret;
+ dev_t small_dev = (dev_t)NODEV;
+ int match_type;
+ char *tmp = NULL;
+ mdsetname_t *sp = *spp;
+ md_set_desc *sd;
+ int i;
+
+ /*
+ * Check for multi-node diskset and return if it is one.
+ */
+ if (!metaislocalset(sp)) {
+ if ((sd = metaget_setdesc(sp, ep)) == NULL)
+ return (METADEVADM_ERR);
+
+ if (MD_MNSET_DESC(sd))
+ return (METADEVADM_SUCCESS);
+ }
+
+ /*
+ * Get the entry of the namespace via the key. To do this
+ * call MD_IOCNXTKEY until no more.
+ * For each entry in the namespace we want to check
+ * for devid and update
+ */
+
+ (void) memset(&nm, '\0', sizeof (nm));
+ nm.key = MD_KEYWILD;
+
+ sideno = getmyside(*spp, ep);
+ if (sideno == MD_SIDEWILD) {
+ /* failed to find this node in the set */
+ mda_debug("Failed to find the side number\n");
+ return (METADEVADM_ERR);
+ }
+
+ /* LINTED */
+ while (1) {
+ nm.mde = mdnullerror;
+ nm.setno = setno;
+ nm.side = sideno;
+ /* look at each key in the namespace */
+ if (metaioctl(MD_IOCNXTKEY_NM, &nm, &nm.mde, NULL) != 0) {
+ (void) mdstealerror(ep, &nm.mde);
+ return (METADEVADM_ERR);
+ }
+
+ if (nm.key == MD_KEYWILD) {
+ /* no more entries */
+ break;
+ }
+
+ /*
+ * get the nm entry using the key. Then check to see if
+ * there's a devid associated with this entry
+ * If not, go onto next key.
+ */
+ if ((nm.devname = (uintptr_t)meta_getnmentbykey(setno, sideno,
+ nm.key, &drvnmp, &mnum, &dev, ep)) == NULL) {
+ mda_debug("pathname_reload: no name for key: %d\n",
+ nm.key);
+ continue;
+ }
+
+ mda_debug("pathname_reload: examining %s\n",
+ (char *)nm.devname);
+
+ if ((devidp = has_devid(setno, sideno, nm.key, ep)) == NULL) {
+ /* metadevices do not have devid's in them */
+ mda_debug("pathname_reload: no devid for %s\n",
+ (char *)nm.devname);
+ continue;
+ }
+
+ if ((minor_name = meta_getdidminorbykey(setno, sideno,
+ nm.key, ep)) == NULL) {
+ /*
+ * In theory this is impossible because if the
+ * devidp is non-null then the minor_name has
+ * already been looked up.
+ */
+ mda_debug("No minor name for %s\n", (char *)nm.devname);
+ free(devidp);
+ continue;
+ }
+ /*
+ * If there is a devid then we have a real device that
+ * could have moved.
+ */
+ devidstr = devid_str_encode(devidp, minor_name);
+ if (devidstr == NULL) {
+ mda_debug("Failed to encode the devid\n");
+ free(devidp);
+ continue;
+ }
+ mda_debug("devid: %s\n", devidstr);
+
+ /*
+ * Find the search path that should be used. This is an
+ * optimization to try and prevent a search for the complete
+ * /dev namespace.
+ */
+ search_number = mda_findpath((char *)nm.devname);
+ if (search_number == -1) {
+ search_path = "/dev";
+ } else {
+ search_path = plist[search_number].search_path;
+ }
+
+ /* now look for the disk name using the devid */
+ ret = meta_deviceid_to_nmlist(search_path, devidp,
+ minor_name, &disklist);
+ free(devidp);
+
+ if (ret != 0) {
+ /*
+ * Failed to find the disk
+ */
+ devid_str_free(devidstr);
+ continue;
+ }
+
+ small_dev = meta_cmpldev(dev);
+ mda_debug("Old device lookup: %s (%p)\n",
+ (char *)nm.devname, (void *)small_dev);
+
+ /*
+ * Check to see if the returned disk matches the stored one
+ */
+ for (i = 0; disklist[i].dev != NODEV; i++) {
+ match_type = 0;
+ mda_debug("From devid lookup: %s (%p)\n",
+ (char *)disklist[i].devname,
+ (void *)disklist[i].dev);
+
+ if (disklist[i].dev == small_dev) {
+ match_type |= DEV_MATCH;
+ }
+
+ if (strncmp((char *)nm.devname, disklist[i].devname,
+ strlen((char *)nm.devname)) == 0) {
+ mda_debug("Name match: %s and %s (%d)\n",
+ disklist[i].devname, (char *)nm.devname,
+ strlen((char *)nm.devname));
+ match_type |= NAME_MATCH;
+ }
+
+ if (match_type == (DEV_MATCH|NAME_MATCH))
+ break;
+ }
+
+ if (match_type == (DEV_MATCH|NAME_MATCH)) {
+ /* no change */
+ devid_str_free(devidstr);
+ mda_debug("All matched %s\n", disklist[i].devname);
+ devid_free_nmlist(disklist);
+ continue;
+ }
+
+ /* No match found - use the first entry in disklist */
+ i = 0;
+
+ if (firsttime) {
+ mda_print(dgettext(TEXT_DOMAIN,
+ "Disk movement detected\n"));
+ mda_print(dgettext(TEXT_DOMAIN,
+ "Updating device names in "
+ "Solaris Volume Manager\n"));
+ firsttime = 0;
+ }
+ if (dev_options & DEV_VERBOSE) {
+ mda_print(dgettext(TEXT_DOMAIN,
+ "%s changed to %s from device relocation "
+ "information %s\n"),
+ (char *)nm.devname, disklist[i].devname,
+ devidstr);
+ }
+ devid_str_free(devidstr);
+
+ /* need to build up the path of the disk */
+ if ((path = Strdup(disklist[i].devname)) == NULL) {
+ mda_debug("Failed to duplicate path: %s\n",
+ disklist[i].devname);
+ devid_free_nmlist(disklist);
+ continue;
+ }
+ if ((tmp = strrchr(path, '/')) == NULL) {
+ mda_debug("Failed to parse %s\n", path);
+ devid_free_nmlist(disklist);
+ Free(path);
+ continue;
+ }
+ tmp += sizeof (char);
+ *tmp = '\0';
+
+ if ((ctds_name = strrchr(disklist[i].devname, '/')) == NULL) {
+ mda_debug("Failed to parse ctds name: %s\n",
+ disklist[i].devname);
+ devid_free_nmlist(disklist);
+ Free(path);
+ continue;
+ }
+ ctds_name += sizeof (char);
+
+ mda_debug("Reloading disk %s %s %p\n",
+ ctds_name, path, (void *) meta_expldev(disklist[i].dev));
+
+ if (!(dev_options & DEV_NOACTION)) {
+ /* Something has changed so update the namespace */
+ if (update_namespace(setno, sideno, ctds_name,
+ meta_expldev(disklist[i].dev), nm.key, path,
+ ep) != 0) {
+ mda_debug("Failed to update namespace\n");
+ if (dev_options & DEV_VERBOSE) {
+ mda_print(dgettext(TEXT_DOMAIN,
+ "Failed to update namespace on "
+ "change from %s to %s\n"),
+ ctds_name, disklist[i].devname);
+ }
+ }
+ }
+ devid_free_nmlist(disklist);
+ Free(path);
+ }
+
+ if (fix_replicanames(*spp, ep) == -1)
+ mda_debug("Failed to update replicas\n");
+
+ /*
+ * check for invalid device id's
+ */
+ (void) memset(&ds, '\0', sizeof (ds));
+ ds.setno = setno;
+ ds.side = sideno;
+ ds.mode = MD_FIND_INVDID;
+ /* get count of number of invalid device id's */
+ if (metaioctl(MD_IOCDID_STAT, &ds, &ds.mde, NULL) != 0) {
+ (void) mdstealerror(ep, &ds.mde);
+ return (METADEVADM_ERR);
+ }
+ if (ds.cnt != 0) {
+ char *ctdptr, *ctdp;
+ /*
+ * we have some invalid device id's so we need to
+ * print them out
+ */
+ ds.mode = MD_GET_INVDID;
+ /* malloc buffer for kernel to place devid list into */
+ if ((ctdptr = (char *)Malloc((ds.cnt * ds.maxsz) + 1)) == 0) {
+ return (METADEVADM_ERR);
+ }
+ ds.ctdp = (uintptr_t)ctdptr;
+ /* get actual list of invalid device id's */
+ if (metaioctl(MD_IOCDID_STAT, &ds, &ds.mde, NULL) != 0) {
+ Free(ctdptr);
+ (void) mdstealerror(ep, &ds.mde);
+ return (METADEVADM_ERR);
+ }
+
+ /* print out the invalid devid's */
+ mda_print(dgettext(TEXT_DOMAIN,
+ "Invalid device relocation information "
+ "detected in Solaris Volume Manager\n"));
+ mda_print(dgettext(TEXT_DOMAIN,
+ "Please check the status of the following disk(s):\n"));
+ ctdp = (char *)ds.ctdp;
+ while (*ctdp != NULL) {
+ mda_print("\t%s\n", ctdp);
+ ctdp += ds.maxsz;
+ }
+ Free(ctdptr);
+ return (METADEVADM_DEVIDINVALID);
+ }
+ return (METADEVADM_SUCCESS);
+}
+
+/*
+ * replica_update_devid - cycle through the replica list, rlp, and
+ * update the device ids on all of the replicas that are on the
+ * device specified by lp. A side effect is to update the value of
+ * cdevidpp to contain the character representation of the device
+ * id before updating if it is not already set.
+ *
+ * RETURN
+ * METADEVADM_ERR error
+ * METADEVADM_SUCCESS success
+ */
+static int
+replica_update_devid(
+ md_replicalist_t *rlp,
+ mddrivename_t *dnp,
+ set_t setno,
+ char **cdevidpp,
+ md_error_t *ep
+)
+{
+ mddb_config_t db_c;
+ md_replicalist_t *rl;
+ ddi_devid_t devidp;
+ int ret;
+
+ if (cdevidpp == NULL)
+ return (METADEVADM_ERR);
+
+ ret = devid_str_decode(dnp->devid, &devidp, NULL);
+ if (ret != 0) {
+ /* failed to encode the devid */
+ mda_debug("Failed to decode %s into a valid devid\n",
+ dnp->devid);
+ return (METADEVADM_ERR);
+ }
+
+ /* search replica list for give ctd name */
+ for (rl = rlp; (rl != NULL); rl = rl->rl_next) {
+ md_replica_t *r = rl->rl_repp;
+ mdname_t *rnp = r->r_namep;
+
+ if (strncmp(rnp->cname, dnp->cname, strlen(dnp->cname)) == 0) {
+
+ /* found the replica, now grab the devid */
+ if (*cdevidpp == NULL) {
+ *cdevidpp = devid_str_encode(r->r_devid, NULL);
+ }
+
+ if (*cdevidpp == NULL) {
+ devid_free(devidp);
+ return (METADEVADM_ERR);
+ }
+
+ mda_debug("Updating replica %s, set %d, old devid %s\n",
+ rnp->cname, setno, *cdevidpp);
+
+ if (dev_options & DEV_VERBOSE) {
+ mda_print(dgettext(TEXT_DOMAIN,
+ "Updating replica %s of set number %d from "
+ "device id %s to device id %s\n"),
+ rnp->cname, setno, *cdevidpp, dnp->devid);
+ }
+
+ (void) memset(&db_c, '\0', sizeof (db_c));
+
+ db_c.c_setno = setno;
+ db_c.c_devt = rnp->dev;
+
+ if (!(dev_options & DEV_NOACTION)) {
+
+ mda_debug("Updating replica\n");
+
+ /*
+ * call into kernel to update lb
+ * namespace device id
+ * of given devt
+ */
+ if (metaioctl(MD_DB_SETDID, &db_c,
+ &db_c.c_mde, NULL) != 0) {
+ devid_free(devidp);
+ (void) mdstealerror(ep, &db_c.c_mde);
+ return (METADEVADM_ERR);
+ }
+ }
+
+ }
+ }
+ devid_free(devidp);
+ return (METADEVADM_SUCCESS);
+}
+
+/*
+ * devid_update -- main routine for the -u option. Will update both the
+ * namespace and the locator block with the correct devid for the
+ * disk specified.
+ *
+ * RETURN
+ * METADEVADM_ERR error
+ * METADEVADM_SUCCESS success
+ */
+static int
+devid_update(
+ mdsetname_t **spp,
+ set_t setno,
+ char *ctd,
+ md_error_t *ep
+)
+{
+ md_drive_desc *dd, *ddp;
+ mddrivename_t *dnp;
+ mdnm_params_t nm;
+ ddi_devid_t devidp;
+ side_t side;
+ char *old_cdevidp = NULL;
+ md_replicalist_t *rlp = NULL;
+ int rval = METADEVADM_ERR;
+ mdname_t *np = NULL;
+ uint_t rep_slice;
+ char *pathname = NULL;
+ char *diskname = NULL;
+ int fd = -1;
+ int len;
+ char *fp;
+
+ side = getmyside(*spp, ep);
+ if (side == MD_SIDEWILD) {
+ /* failed to find this node in the set */
+ mda_debug("Failed to find the side number\n");
+ return (METADEVADM_ERR);
+ }
+
+ if ((dnp = metadrivename(spp, ctd, ep)) == NULL) {
+ mda_debug("Failed to create a dnp for %s\n", ctd);
+ return (METADEVADM_ERR);
+ }
+ if (dnp->devid == NULL) {
+ /*
+ * Disk does not have a devid! So cannot update the
+ * devid within the replica.
+ */
+ mda_debug("%s does not have a devid\n", dnp->cname);
+ if (dev_options & DEV_VERBOSE) {
+ mda_print(dgettext(TEXT_DOMAIN,
+ "%s does not have a device id. Cannot update "
+ "device id if none exists\n"), ctd);
+ }
+ return (METADEVADM_ERR);
+ }
+
+ mda_debug("Devid update to: %s\n", dnp->devid);
+
+ /*
+ * Check if we own the set, if we do then do some processing
+ * on the replicas.
+ */
+ if (meta_check_ownership(*spp, ep) == 0) {
+
+ /* get the replicas */
+ if (metareplicalist(*spp, MD_BASICNAME_OK | PRINT_FAST, &rlp,
+ ep) < 0)
+ return (METADEVADM_ERR);
+
+ /* update the devids in the replicas if necessary */
+ if (replica_update_devid(rlp, dnp, setno, &old_cdevidp,
+ ep) != METADEVADM_SUCCESS) {
+ metafreereplicalist(rlp);
+ return (METADEVADM_ERR);
+ }
+
+ metafreereplicalist(rlp);
+ }
+
+ /*
+ * If this is not the LOCAL set then need to update the LOCAL
+ * replica with the new disk record.
+ */
+
+ if (setno != MD_LOCAL_SET) {
+ mda_debug("Non-local set: %d side %d\n", setno, side);
+
+ /*
+ * Need to find the disk record within the set and then
+ * update it.
+ */
+ if ((dd =
+ metaget_drivedesc(*spp, MD_FULLNAME_ONLY, ep)) == NULL) {
+ if (! mdisok(ep))
+ goto out;
+ /* no disks in the set - no point continuing */
+ mda_debug("No disks in diskset\n");
+ rval = METADEVADM_SUCCESS;
+ goto out;
+ }
+
+ for (ddp = dd; ddp != NULL; ddp = ddp->dd_next) {
+ if (strncmp(ddp->dd_dnp->cname, dnp->cname,
+ strlen(dnp->cname)) == 0)
+ break;
+ }
+
+ if (ddp == NULL) {
+ /* failed to finddisk in the set */
+ mda_print(dgettext(TEXT_DOMAIN,
+ "%s not found in set %s. Check your syntax\n"),
+ ctd, (*spp)->setname);
+ (void) mddserror(ep, MDE_DS_DRIVENOTINSET, setno, NULL,
+ ctd, (*spp)->setname);
+ goto out;
+ }
+
+ /*
+ * Now figure out the correct slice, for a diskset the slice
+ * we care about is always the 'replica' slice.
+ */
+ if (meta_replicaslice(dnp, &rep_slice, ep) != 0) {
+ mda_debug("Unable to find replica slice for %s\n",
+ dnp->cname);
+ goto out;
+ }
+
+ mda_debug("slice no: %d disk %s\n", rep_slice, dnp->cname);
+
+ if ((np = metaslicename(dnp, rep_slice, ep)) == NULL) {
+ mda_debug("Unable to build namespace\n");
+ goto out;
+ }
+
+ mda_debug("check: ctdname: %s\n", np->cname);
+ mda_debug("check: ctdname: %s\n", np->rname);
+ mda_debug("check: ctdname: %s\n", np->bname);
+
+ if (!(dev_options & DEV_NOACTION)) {
+
+ mda_debug("Updating record: key %d name %s\n",
+ ddp->dd_dnp->side_names_key, np->cname);
+
+ pathname = mda_getpath(np->bname);
+
+ if (update_namespace(MD_LOCAL_SET, side + SKEW,
+ np->cname, np->dev, ddp->dd_dnp->side_names_key,
+ pathname, ep) != 0) {
+ goto out;
+ }
+
+ /*
+ * Now update the devid entry as well, this works
+ * correctly because the prior call to
+ * update_namespace() above puts the correct dev_t
+ * in the namespace which will then be resolved
+ * to the new devid by the ioctl now called.
+ */
+ nm.mde = mdnullerror;
+ nm.setno = MD_LOCAL_SET;
+ nm.side = side + SKEW;
+ nm.key = ddp->dd_dnp->side_names_key;
+ if (metaioctl(MD_SETNMDID, &nm, &nm.mde, NULL) != 0) {
+ (void) mdstealerror(ep, &nm.mde);
+ goto out;
+ }
+ }
+ }
+
+ if ((dev_options & DEV_LOCAL_SET) && (setno != MD_LOCAL_SET)) {
+ /*
+ * Only want to update the local set so do not continue.
+ */
+ rval = METADEVADM_SUCCESS;
+ goto out;
+ }
+
+ /*
+ * Iterate through all of the metadevices looking for the
+ * passed in ctd. If found then update the devid
+ */
+ (void) memset(&nm, '\0', sizeof (nm));
+ nm.key = MD_KEYWILD;
+ /* LINTED */
+ while (1) {
+ nm.mde = mdnullerror;
+ nm.setno = setno;
+ nm.side = side;
+
+ /* search each namespace entry */
+ if (metaioctl(MD_IOCNXTKEY_NM, &nm, &nm.mde, NULL) != 0) {
+ (void) mdstealerror(ep, &nm.mde);
+ rval = METADEVADM_ERR;
+ goto out;
+ }
+ if (nm.key == MD_KEYWILD) {
+ if (setno != MD_LOCAL_SET) {
+ mda_print(dgettext(TEXT_DOMAIN,
+ "%s not found in set %s. Check your "
+ "syntax\n"), ctd, (*spp)->setname);
+ goto out;
+ } else {
+ mda_print(dgettext(TEXT_DOMAIN,
+ "%s not found in local set. "
+ "Check your syntax\n"), ctd);
+ goto out;
+ }
+ }
+
+ nm.devname = (uintptr_t)meta_getnmentbykey(setno, side, nm.key,
+ NULL, NULL, NULL, ep);
+ if (nm.devname == NULL) {
+ rval = METADEVADM_ERR;
+ goto out;
+ }
+
+ diskname = getdiskname((char *)nm.devname);
+
+ mda_debug("Checking %s with %s\n", diskname, dnp->cname);
+ if (strcmp(diskname, dnp->cname) != 0)
+ continue;
+
+ mda_debug("Updating device %s in namespace\n",
+ (char *)nm.devname);
+
+ /*
+ * found disk, does it have a devid within the namespace ?
+ * It might not because it does not support devid's or was
+ * put into the namespace when there was no devid support
+ */
+ if ((devidp = has_devid(setno, side, nm.key, ep)) == NULL) {
+ mda_debug("%s has no devid in the namespace",
+ (char *)nm.devname);
+ if (dev_options & DEV_VERBOSE) {
+ mda_print(dgettext(TEXT_DOMAIN,
+ "SVM has no device id for "
+ "%s, cannot update.\n"), (char *)nm.devname);
+ }
+ continue; /* no devid. go on to next */
+ }
+ if (old_cdevidp == NULL) {
+ old_cdevidp = devid_str_encode(devidp, NULL);
+ }
+ free(devidp);
+
+ /*
+ * has devid so update namespace, note the key has been set
+ * by the prior MD_IOCNXTKEY_NM ioctl.
+ */
+ nm.mde = mdnullerror;
+ nm.setno = setno;
+ nm.side = side;
+ if (!(dev_options & DEV_NOACTION)) {
+ /*
+ * The call below may fail if the -u option is being
+ * used to update a disk that has been replaced.
+ * The -u option to metadevadm should not be used
+ * for this purpose because we trust the dev_t of
+ * the device in the replica and if we have replaced
+ * the device and it is a fibre one then the dev_t
+ * will have changed. This means we end up looking for
+ * the devid of a non-existant disk and we subsequently
+ * fail with NODEVID.
+ */
+ if (metaioctl(MD_SETNMDID, &nm,
+ &nm.mde, NULL) != 0) {
+ if (dev_options & DEV_VERBOSE) {
+ mda_print(dgettext(TEXT_DOMAIN,
+ "SVM failed to update the device "
+ "id for %s probably due to both "
+ "devt and device id changing.\n"),
+ (char *)nm.devname);
+ }
+ (void) mdstealerror(ep, &nm.mde);
+ mde_perror(ep, "");
+ rval = METADEVADM_ERR;
+ goto out;
+ }
+ }
+ if (old_cdevidp == NULL) {
+ rval = METADEVADM_ERR;
+ goto out;
+ }
+ break;
+ } /* end while */
+
+ mda_print(dgettext(TEXT_DOMAIN,
+ "Updating Solaris Volume Manager device relocation "
+ "information for %s\n"), ctd);
+
+ mda_print(dgettext(TEXT_DOMAIN,
+ "Old device reloc information:\n\t%s\n"), old_cdevidp);
+
+ len = strlen(dnp->rname) + strlen("s0");
+ if ((fp = (char *)Malloc(len + 1)) == NULL) {
+ mda_print(dgettext(TEXT_DOMAIN,
+ "insufficient memory, device Reloc info not "
+ "available\n"));
+ } else {
+ (void) snprintf(fp, len + 1, "%ss0", dnp->rname);
+ if ((fd = open(fp, O_RDONLY|O_NDELAY)) < 0) {
+ mda_print(dgettext(TEXT_DOMAIN,
+ "Open of %s failed\n"), fp);
+ } else {
+ int rc = -1;
+ ddi_devid_t devid1 = NULL;
+ char *cdevidp;
+
+ rc = devid_get(fd, &devid1);
+ if (close(fd) < 0) {
+ mda_print(dgettext(TEXT_DOMAIN,
+ "Close of %s failed\n"), fp);
+ }
+ if (rc != 0) {
+ mda_print(dgettext(TEXT_DOMAIN,
+ "Unable to obtain device "
+ "Reloc info for %s\n"), fp);
+ } else {
+ cdevidp = devid_str_encode(devid1, NULL);
+ if (cdevidp == NULL) {
+ mda_print(dgettext(TEXT_DOMAIN,
+ "Unable to print "
+ "device Reloc info for %s\n"), fp);
+ } else {
+ mda_print(dgettext(TEXT_DOMAIN,
+ "New device reloc "
+ "information:\n\t%s\n"), cdevidp);
+ devid_str_free(cdevidp);
+ }
+ devid_free(devid1);
+ }
+ }
+ Free(fp);
+ }
+
+ rval = METADEVADM_SUCCESS;
+
+out:
+ if (diskname)
+ Free(diskname);
+ if (pathname)
+ Free(pathname);
+ if (old_cdevidp) {
+ devid_str_free(old_cdevidp);
+ }
+ return (rval);
+
+}
+
+/*
+ * Check the ctd name of the disk to see if the disk has moved. If it
+ * has moved then the newname is returned in 'newname', it is up to
+ * the caller to free the memory associated with it.
+ *
+ * RETURN
+ * METADEVADM_ERR error
+ * METADEVADM_SUCCESS success
+ * METADEVADM_DISKMOVE success, and the disk has moved
+ * METADEVADM_DSKNAME_ERR error creating the disk name structures.
+ */
+int
+meta_upd_ctdnames(
+ mdsetname_t **spp,
+ set_t setno,
+ side_t sideno,
+ mddrivename_t *dnp,
+ char **newname,
+ md_error_t *ep
+)
+{
+ char *drvnmp;
+ int i;
+ minor_t mnum = 0;
+ md_dev64_t dev = 0;
+ dev_t small_dev = (dev_t)NODEV;
+ mdnm_params_t nm;
+ char *pathname;
+ char *minor_name = NULL;
+ ddi_devid_t devidp;
+ devid_nmlist_t *disklist = NULL;
+ int ret = 0;
+ mdsidenames_t *snp;
+ int match_type;
+ int search_number = -1;
+ char *search_type = NULL;
+ char *search_path = NULL;
+ uint_t rep_slice;
+ mddrivename_t *newdnp;
+ mdname_t *np;
+ mdsetname_t *sp = *spp;
+ md_set_desc *sd;
+
+ /*
+ * setno should always be 0 but we're going to
+ * check for multi-node diskset and return if it is one.
+ */
+ if (!metaislocalset(sp)) {
+ if ((sd = metaget_setdesc(sp, ep)) == NULL)
+ return (METADEVADM_ERR);
+
+ if (MD_MNSET_DESC(sd))
+ return (METADEVADM_SUCCESS);
+ }
+
+ if (dnp->devid == NULL) {
+ /* no devid, nothing can be done */
+ mda_debug("meta_upd_ctdnames: %s has no devid\n", dnp->cname);
+ if (dev_options & DEV_VERBOSE) {
+ mda_print(dgettext(TEXT_DOMAIN,
+ "%s has no devid, cannot detect "
+ "disk movement for this disk.\n"), dnp->cname);
+ }
+ return (ret);
+ }
+
+ /*
+ * Find the correct side name for the disk. There is a sidename
+ * for each host associated with the diskset.
+ */
+ for (snp = dnp->side_names; snp != NULL; snp = snp->next) {
+ mda_debug("meta_upd_ctdnames: %s %d args: setno %d sideno %d\n",
+ snp->cname, snp->sideno, setno, sideno);
+ /* only use SKEW for the local replica */
+ if (setno == 0) {
+ if (snp->sideno + SKEW == sideno)
+ break;
+ } else {
+ if (snp->sideno == sideno)
+ break;
+ }
+ }
+
+ if (snp == NULL) {
+ /*
+ * Failed to find the side name, this should not
+ * be possible. However if it does happen this is an
+ * indication of an inconsistant replica - something
+ * might have gone wrong during an add or a delete of
+ * a host.
+ */
+ mda_debug("Unable to find the side information for disk %s",
+ dnp->cname);
+ (void) mddserror(ep, MDE_DS_HOSTNOSIDE, (*spp)->setno, mynode(),
+ NULL, dnp->cname);
+ return (METADEVADM_ERR);
+ }
+ /*
+ * Find the type of device we are to be searching on
+ */
+ search_number = mda_findpath(snp->cname);
+ if (search_number == -1) {
+ search_path = "/dev";
+ search_type = DEVID_MINOR_NAME_ALL;
+ } else {
+ search_path = plist[search_number].search_path;
+ search_type = plist[search_number].search_type;
+ }
+
+ mda_debug("Search path :%s searth_type: %x\n",
+ search_path, (int)search_type);
+ (void) memset(&nm, '\0', sizeof (nm));
+
+ nm.mde = mdnullerror;
+ nm.setno = setno;
+ nm.side = sideno;
+
+ /*
+ * Get the devname from the name space.
+ */
+ if ((nm.devname = (uintptr_t)meta_getnmentbykey(setno, sideno,
+ dnp->side_names_key, &drvnmp, &mnum, &dev, ep)) == NULL) {
+ return (METADEVADM_ERR);
+ }
+
+ ret = devid_str_decode(dnp->devid, &devidp, &minor_name);
+ devid_str_free(minor_name);
+
+ if (ret != 0) {
+ /*
+ * Failed to encode the devid.
+ */
+ devid_free(devidp);
+ return (METADEVADM_ERR);
+ }
+
+ /*
+ * Use the stored devid to find the existing device node and check
+ * to see if the disk has moved. Use the raw devices as the name
+ * of the disk is stored as the raw device, if this is not done
+ * then the disk will not be found.
+ */
+ ret = meta_deviceid_to_nmlist(search_path, devidp,
+ search_type, &disklist);
+
+ if (ret != 0) {
+ if (dev_options & DEV_VERBOSE) {
+ mda_print(dgettext(TEXT_DOMAIN,
+ "Device ID %s last associated with "
+ "disk %s no longer found in system\n"),
+ dnp->devid, dnp->cname);
+ }
+ devid_free(devidp);
+ devid_free_nmlist(disklist);
+ return (METADEVADM_SUCCESS);
+ }
+
+ small_dev = meta_cmpldev(dev);
+ mda_debug("Old device lookup: %s (%p)\n",
+ (char *)nm.devname, (void *)small_dev);
+ /*
+ * Check to see if the returned disk matches the stored one
+ */
+ for (i = 0; disklist[i].dev != NODEV; i++) {
+ match_type = 0;
+ mda_debug("From devid lookup: %s (%p)\n",
+ disklist[i].devname, (void *)disklist[i].dev);
+
+ if (disklist[i].dev == small_dev) {
+ match_type |= DEV_MATCH;
+ }
+
+ if (strncmp((char *)nm.devname, disklist[i].devname,
+ strlen((char *)nm.devname)) == 0) {
+ match_type |= NAME_MATCH;
+ }
+
+ if (match_type != 0)
+ break;
+ }
+ devid_free(devidp);
+
+ mda_debug("meta_upd_ctdnames: match: %x i: %d\n", match_type, i);
+
+ if (match_type == (DEV_MATCH|NAME_MATCH)) {
+ /* no change */
+ devid_free_nmlist(disklist);
+ return (METADEVADM_SUCCESS);
+ }
+
+ /* No match found - use the first entry in disklist */
+ if (disklist[i].dev == NODEV)
+ i = 0;
+
+ if (!(match_type & DEV_MATCH)) {
+ /* did not match on the dev, so dev_t has changed */
+ mda_debug("Did not match on dev: %p %p\n",
+ (void *) small_dev, (void *) disklist[i].dev);
+ dev = meta_expldev(disklist[i].dev);
+ }
+
+ if (!(match_type & NAME_MATCH)) {
+ mda_debug("Did not match on name: %s (%p)\n",
+ (char *)nm.devname, (void *) disklist[i].dev);
+ }
+
+ /*
+ * If here, then the name in the disklist is the one we
+ * want in any case so use it.
+ */
+ mda_debug("devname: %s\n", disklist[i].devname);
+ /*
+ * Need to remove the slice as metadrivename() expects a diskname
+ */
+ stripS(disklist[i].devname);
+ /*
+ * Build an mddrivename_t to use
+ */
+ if ((newdnp = metadrivename(spp, disklist[i].devname, ep)) == NULL) {
+ mda_debug("Unable to make a dnp out of %s\n",
+ disklist[i].devname);
+ return (METADEVADM_DSKNAME_ERR);
+ }
+ /*
+ * Need to find the correct slice used for the replica
+ */
+ if (meta_replicaslice(newdnp, &rep_slice, ep) != 0) {
+ return (METADEVADM_DSKNAME_ERR);
+ }
+
+ if ((np = metaslicename(newdnp, rep_slice, ep)) == NULL) {
+ mda_debug("Failed to build an np for %s\n", dnp->rname);
+ return (METADEVADM_DSKNAME_ERR);
+ }
+ mda_debug("check: cname: %s\n", np->cname);
+ mda_debug("check: rname: %s\n", np->rname);
+ mda_debug("check: bname: %s\n", np->bname);
+
+ if (newname != NULL)
+ *newname = Strdup(np->bname);
+
+ if (!(dev_options & DEV_NOACTION)) {
+
+ mda_debug("update namespace\n");
+
+ /* get the block path */
+ pathname = mda_getpath(np->bname);
+
+ if (update_namespace(setno, sideno, np->cname,
+ dev, dnp->side_names_key, pathname, ep) != 0) {
+ /* finished with the list so return the memory */
+ Free(pathname);
+ devid_free_nmlist(disklist);
+ return (METADEVADM_ERR);
+ }
+ }
+ /* finished with the list so return the memory */
+ Free(pathname);
+ devid_free_nmlist(disklist);
+ ret = METADEVADM_DISKMOVE;
+ return (ret);
+}
+
+int
+meta_fixdevid(
+ mdsetname_t *sp,
+ mddevopts_t options,
+ char *diskname,
+ md_error_t *ep
+)
+{
+ set_t setno = sp->setno;
+ int ret = 0;
+ char *pathname = NULL;
+ mdsetname_t *local_sp = NULL;
+ md_drive_desc *d = NULL;
+ char *newname = NULL;
+ md_drive_desc *dd;
+ side_t sideno;
+ md_set_desc *sd;
+
+ /* if MN diskset just return */
+ if (!metaislocalset(sp)) {
+ if ((sd = metaget_setdesc(sp, ep)) == NULL) {
+ return (METADEVADM_ERR);
+ }
+ if (MD_MNSET_DESC(sd))
+ return (METADEVADM_SUCCESS);
+ }
+
+ dev_options |= options;
+ mda_debug("dev_options: %x\n", dev_options);
+ if (dev_options & DEV_RELOAD) {
+ /*
+ * If it's not the local set we need to check the local
+ * namespace to see if disks have moved as it contains
+ * entries for the disks in the set.
+ */
+ if (setno != MD_LOCAL_SET) {
+ if ((dd = metaget_drivedesc(sp, MD_BASICNAME_OK |
+ PRINT_FAST, ep)) == NULL) {
+ mde_perror(ep, "");
+ mdclrerror(ep);
+ return (METADEVADM_ERR);
+ }
+ local_sp = metasetname(MD_LOCAL_NAME, ep);
+ sideno = getmyside(sp, ep) + SKEW;
+ for (d = dd; d != NULL; d = d->dd_next) {
+ /*
+ * Actually do the check of the disks.
+ */
+ ret = meta_upd_ctdnames(&local_sp, 0, sideno,
+ d->dd_dnp, &newname, ep);
+
+ if ((ret == METADEVADM_ERR) ||
+ (ret == METADEVADM_DSKNAME_ERR)) {
+ /* check failed in unknown manner */
+ mda_debug("meta_upd_ctdnames failed\n");
+ return (METADEVADM_ERR);
+ }
+ }
+ }
+
+ /* do a reload of the devid namespace */
+ ret = pathname_reload(&sp, setno, ep);
+ } else if (dev_options & DEV_UPDATE) {
+ pathname = getdiskname(diskname);
+ ret = devid_update(&sp, setno, pathname, ep);
+ free(pathname);
+ }
+ return (ret);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_devstamp.c b/usr/src/lib/lvm/libmeta/common/meta_devstamp.c
new file mode 100644
index 0000000000..1a3cf3e1ce
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_devstamp.c
@@ -0,0 +1,127 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 1993-2002 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * get timestamp from device
+ */
+
+#include <meta.h>
+
+/*
+ * get timestamp
+ */
+int
+getdevstamp(
+ mddrivename_t *dnp,
+ time_t *stamp, /* return timestamp here */
+ md_error_t *ep
+)
+{
+ int fd;
+ int partno;
+ struct vtoc vtocbuf;
+ mdname_t *np;
+
+ if ((np = metaslicename(dnp, MD_SLICE0, ep)) == NULL)
+ return (-1);
+
+ /* open given device */
+ if ((fd = open(np->rname, O_RDONLY | O_NDELAY, 0)) < 0)
+ return (mdsyserror(ep, errno, np->cname));
+
+ /* re-read vtoc */
+ if (meta_getvtoc(fd, np->cname, &vtocbuf, &partno, ep) == -1) {
+ (void) close(fd);
+ return (-1);
+ }
+
+ /* close device */
+ (void) close(fd); /* sd/ssd bug */
+
+ /* return timestamp, success */
+ *stamp = vtocbuf.timestamp[partno];
+ return (0);
+}
+
+/*
+ * returns
+ * 0 on success,
+ * ENOTSUP if it's not a device with a vtoc
+ * -1 on failure
+ */
+int
+setdevstamp(
+ mddrivename_t *dnp,
+ time_t *stamp, /* returned timestamp */
+ md_error_t *ep
+)
+{
+ int fd;
+ int partno;
+ struct vtoc vtocbuf;
+ time_t now = time(NULL);
+ mdname_t *np;
+
+ if ((np = metaslicename(dnp, MD_SLICE0, ep)) == NULL)
+ return (-1);
+
+ /* open for vtoc */
+ if ((fd = open(np->rname, O_RDWR | O_NDELAY, 0)) < 0)
+ return (mdsyserror(ep, errno, np->cname));
+
+ if (meta_getvtoc(fd, np->cname, &vtocbuf, &partno, ep) == -1) {
+ (void) close(fd);
+ if (partno == VT_ENOTSUP)
+ return (ENOTSUP);
+ else
+ return (-1);
+ }
+
+ *stamp = vtocbuf.timestamp[partno] = now;
+
+ if (meta_setvtoc(fd, np->cname, &vtocbuf, ep) == -1) {
+ (void) close(fd);
+ return (-1);
+ }
+
+ /* Clear the timestamp */
+ vtocbuf.timestamp[partno] = 0;
+
+ if (meta_getvtoc(fd, np->cname, &vtocbuf, &partno, ep) == -1) {
+ (void) close(fd);
+ return (-1);
+ }
+
+ (void) close(fd); /* sd/ssd bug */
+
+ if (*stamp != vtocbuf.timestamp[partno])
+ return (mddeverror(ep, MDE_CANTVERIFY_VTOC, NODEV64,
+ np->cname));
+
+ return (0);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_error.c b/usr/src/lib/lvm/libmeta/common/meta_error.c
new file mode 100644
index 0000000000..0c359f344b
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_error.c
@@ -0,0 +1,2309 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * Just in case we're not in a build environment, make sure that
+ * TEXT_DOMAIN gets set to something.
+ */
+#if !defined(TEXT_DOMAIN)
+#define TEXT_DOMAIN "SYS_TEST"
+#endif
+
+/*
+ * print metedevice errors
+ */
+
+#include <meta.h>
+#include <sys/lvm/md_mddb.h>
+
+#include <syslog.h>
+
+/*
+ * clear error
+ */
+void
+mdclrerror(
+ md_error_t *ep
+)
+{
+ if (ep->name != NULL)
+ Free(ep->name);
+ if (ep->host != NULL)
+ Free(ep->host);
+ if (ep->extra != NULL)
+ Free(ep->extra);
+ (void) memset(ep, '\0', sizeof (*ep));
+}
+
+/*
+ * cook names
+ */
+static char *
+md_name(
+ minor_t mnum
+)
+{
+ char *name;
+
+ /* get name, or fake it */
+ if ((name = get_mdname(mnum)) == NULL) {
+ char buf[40];
+
+ (void) sprintf(buf, "%lu/d%lu", MD_MIN2SET(mnum),
+ MD_MIN2UNIT(mnum));
+ return (Strdup(buf));
+ }
+ return (Strdup(name));
+}
+
+static char *
+dev_name(
+ set_t setno,
+ md_dev64_t dev
+)
+{
+ char *name;
+
+ /* get name or fake it */
+ if (dev == NODEV64)
+ return (Strdup(dgettext(TEXT_DOMAIN, "unknown device")));
+ if ((name = get_devname(setno, dev)) == NULL) {
+ char buf[40];
+
+ (void) sprintf(buf, "%lu.%lu", meta_getmajor(dev),
+ meta_getminor(dev));
+ return (Strdup(buf));
+ }
+ return (Strdup(name));
+}
+
+static char *
+hsp_name(
+ hsp_t hsp
+)
+{
+ char *name;
+
+ if ((name = get_hspname(hsp)) == NULL) {
+ char buf[40];
+
+ (void) sprintf(buf, "%u/hsp%03u", HSP_SET(hsp), HSP_ID(hsp));
+ return (Strdup(buf));
+ }
+ return (Strdup(name));
+}
+
+static char *
+set_name(
+ set_t setno
+)
+{
+ mdsetname_t *sp;
+ md_error_t xep = mdnullerror;
+
+ if (setno == MD_SET_BAD)
+ return (NULL);
+
+ if ((sp = metasetnosetname(setno, &xep)) == NULL) {
+ char buf[40];
+
+ mdclrerror(&xep);
+ (void) sprintf(buf, "setno %u", setno);
+ return (Strdup(buf));
+ }
+ return (Strdup(sp->setname));
+}
+
+/*
+ * fill in all the appropriate md_error_t fields
+ */
+static void
+metacookerror(
+ md_error_t *ep, /* generic error */
+ char *name /* optional name or host */
+)
+{
+ /* get host name */
+ if (ep->host != NULL) {
+ Free(ep->host);
+ ep->host = NULL;
+ }
+ if ((ep->info.errclass == MDEC_RPC) &&
+ (name != NULL) && (*name != '\0')) {
+ ep->host = Strdup(name);
+ name = NULL;
+ } else
+ ep->host = Strdup(mynode());
+
+ /* get appropriate name */
+ if (ep->name != NULL) {
+ Free(ep->name);
+ ep->name = NULL;
+ }
+ if ((name != NULL) && (*name != '\0')) {
+ ep->name = Strdup(name);
+ } else {
+ switch (ep->info.errclass) {
+
+ /* can't do anything about these */
+ case MDEC_VOID:
+ case MDEC_SYS:
+ case MDEC_RPC:
+ default:
+ break;
+
+ /* device name */
+ case MDEC_DEV:
+ {
+ md_dev_error_t *ip =
+ &ep->info.md_error_info_t_u.dev_error;
+
+ ep->name = dev_name(MD_SET_BAD, ip->dev);
+ break;
+ }
+
+ /* device name */
+ case MDEC_USE:
+ {
+ md_use_error_t *ip =
+ &ep->info.md_error_info_t_u.use_error;
+
+ ep->name = dev_name(MD_SET_BAD, ip->dev);
+ if (ip->where == NULL) {
+ ip->where = Strdup(dgettext(TEXT_DOMAIN,
+ "unknown"));
+ }
+ break;
+ }
+
+ /* metadevice name */
+ case MDEC_MD:
+ {
+ md_md_error_t *ip =
+ &ep->info.md_error_info_t_u.md_error;
+
+ ep->name = md_name(ip->mnum);
+ break;
+ }
+
+ /* component name */
+ case MDEC_COMP:
+ {
+ md_comp_error_t *ip =
+ &ep->info.md_error_info_t_u.comp_error;
+ char *mdname, *devname;
+ size_t len;
+
+ mdname = md_name(ip->comp.mnum);
+ devname = dev_name(MD_MIN2SET(ip->comp.mnum),
+ ip->comp.dev);
+ len = strlen(mdname) + strlen(": ")
+ + strlen(devname) + 1;
+ ep->name = Malloc(len);
+ (void) snprintf(ep->name, len, "%s: %s",
+ mdname, devname);
+ Free(mdname);
+ Free(devname);
+ break;
+ }
+
+ /* hotspare pool name */
+ case MDEC_HSP:
+ {
+ md_hsp_error_t *ip =
+ &ep->info.md_error_info_t_u.hsp_error;
+
+ ep->name = hsp_name(ip->hsp);
+ break;
+ }
+
+ /* hotspare name */
+ case MDEC_HS:
+ {
+ md_hs_error_t *ip =
+ &ep->info.md_error_info_t_u.hs_error;
+ char *hspname, *devname;
+ size_t len;
+
+ hspname = hsp_name(ip->hs.hsp);
+ devname = dev_name(HSP_SET(ip->hs.hsp), ip->hs.dev);
+ len = strlen(hspname) + strlen(": ")
+ + strlen(devname) + 1;
+ ep->name = Malloc(len);
+ (void) snprintf(ep->name, len, "%s: %s",
+ hspname, devname);
+ Free(hspname);
+ Free(devname);
+ break;
+ }
+
+ /* mddb name */
+ case MDEC_MDDB:
+ {
+ md_mddb_error_t *ip =
+ &ep->info.md_error_info_t_u.mddb_error;
+ if (ip->mnum != NODEV32)
+ ep->name = md_name(ip->mnum);
+ ep->name = set_name(ip->setno);
+ break;
+ }
+
+ /* set name */
+ case MDEC_DS:
+ {
+ md_ds_error_t *ip =
+ &ep->info.md_error_info_t_u.ds_error;
+
+ ep->name = set_name(ip->setno);
+ break;
+ }
+ }
+ }
+}
+
+/*
+ * simple error
+ */
+int
+mderror(
+ md_error_t *ep,
+ md_void_errno_t errnum,
+ char *name
+)
+{
+ md_void_error_t *ip = &ep->info.md_error_info_t_u.void_error;
+
+ mdclrerror(ep);
+ ep->info.errclass = MDEC_VOID;
+ ip->errnum = errnum;
+
+ metacookerror(ep, name);
+ return (-1);
+}
+
+/*
+ * system error
+ */
+int
+mdsyserror(
+ md_error_t *ep,
+ int errnum,
+ char *name
+)
+{
+ md_sys_error_t *ip = &ep->info.md_error_info_t_u.sys_error;
+
+ mdclrerror(ep);
+ if (errnum != 0) {
+ ep->info.errclass = MDEC_SYS;
+ ip->errnum = errnum;
+ }
+
+ metacookerror(ep, name);
+ return (-1);
+}
+
+/*
+ * RPC error
+ */
+int
+mdrpcerror(
+ md_error_t *ep,
+ CLIENT *clntp,
+ char *host,
+ char *extra
+)
+{
+ md_rpc_error_t *ip = &ep->info.md_error_info_t_u.rpc_error;
+ struct rpc_err rpcerr;
+
+ mdclrerror(ep);
+ clnt_geterr(clntp, &rpcerr);
+ ep->info.errclass = MDEC_RPC;
+ ip->errnum = rpcerr.re_status;
+
+ metacookerror(ep, host);
+ mderrorextra(ep, extra);
+ return (-1);
+}
+
+/*
+ * RPC create error
+ */
+int
+mdrpccreateerror(
+ md_error_t *ep,
+ char *host,
+ char *extra
+)
+{
+ md_rpc_error_t *ip = &ep->info.md_error_info_t_u.rpc_error;
+
+ mdclrerror(ep);
+ ep->info.errclass = MDEC_RPC;
+ ip->errnum = rpc_createerr.cf_stat;
+
+ metacookerror(ep, host);
+ mderrorextra(ep, extra);
+ return (-1);
+}
+
+/*
+ * device error
+ */
+int
+mddeverror(
+ md_error_t *ep,
+ md_dev_errno_t errnum,
+ md_dev64_t dev,
+ char *name
+)
+{
+ md_dev_error_t *ip = &ep->info.md_error_info_t_u.dev_error;
+
+ mdclrerror(ep);
+ ep->info.errclass = MDEC_DEV;
+ ip->errnum = errnum;
+ ip->dev = dev;
+
+ metacookerror(ep, name);
+ return (-1);
+}
+
+/*
+ * use error
+ */
+int
+mduseerror(
+ md_error_t *ep,
+ md_use_errno_t errnum,
+ md_dev64_t dev,
+ char *where,
+ char *name
+)
+{
+ md_use_error_t *ip = &ep->info.md_error_info_t_u.use_error;
+
+ assert(where != NULL);
+ mdclrerror(ep);
+ ep->info.errclass = MDEC_USE;
+ ip->errnum = errnum;
+ ip->dev = dev;
+ ip->where = Strdup(where);
+
+ metacookerror(ep, name);
+ return (-1);
+}
+
+/*
+ * overlap error
+ */
+int
+mdoverlaperror(
+ md_error_t *ep,
+ md_overlap_errno_t errnum,
+ char *name,
+ char *where,
+ char *overlap
+)
+{
+ md_overlap_error_t *ip =
+ &ep->info.md_error_info_t_u.overlap_error;
+
+ assert(overlap != NULL);
+ mdclrerror(ep);
+ ep->info.errclass = MDEC_OVERLAP;
+ ip->errnum = errnum;
+ ip->overlap = Strdup(overlap);
+ ip->where = NULL;
+ if (where != NULL)
+ ip->where = Strdup(where);
+
+ metacookerror(ep, name);
+ return (-1);
+}
+
+/*
+ * metadevice error
+ */
+int
+mdmderror(
+ md_error_t *ep,
+ md_md_errno_t errnum,
+ minor_t mnum,
+ char *name
+)
+{
+ md_md_error_t *ip = &ep->info.md_error_info_t_u.md_error;
+
+ mdclrerror(ep);
+ ep->info.errclass = MDEC_MD;
+ ip->errnum = errnum;
+ ip->mnum = mnum;
+
+ metacookerror(ep, name);
+ return (-1);
+}
+
+/*
+ * component error
+ */
+int
+mdcomperror(
+ md_error_t *ep,
+ md_comp_errno_t errnum,
+ minor_t mnum,
+ md_dev64_t dev,
+ char *name
+)
+{
+ md_comp_error_t *ip = &ep->info.md_error_info_t_u.comp_error;
+
+ mdclrerror(ep);
+ ep->info.errclass = MDEC_COMP;
+ ip->errnum = errnum;
+ ip->comp.mnum = mnum;
+ ip->comp.dev = dev;
+
+ metacookerror(ep, name);
+ return (-1);
+}
+
+/*
+ * hotspare pool error
+ */
+int
+mdhsperror(
+ md_error_t *ep,
+ md_hsp_errno_t errnum,
+ hsp_t hsp,
+ char *name
+)
+{
+ md_hsp_error_t *ip = &ep->info.md_error_info_t_u.hsp_error;
+
+ mdclrerror(ep);
+ ep->info.errclass = MDEC_HSP;
+ ip->errnum = errnum;
+ ip->hsp = hsp;
+
+ metacookerror(ep, name);
+ return (-1);
+}
+
+/*
+ * hotspare error
+ */
+int
+mdhserror(
+ md_error_t *ep,
+ md_hs_errno_t errnum,
+ hsp_t hsp,
+ md_dev64_t dev,
+ char *name
+)
+{
+ md_hs_error_t *ip = &ep->info.md_error_info_t_u.hs_error;
+
+ mdclrerror(ep);
+ ep->info.errclass = MDEC_HS;
+ ip->errnum = errnum;
+ ip->hs.hsp = hsp;
+ ip->hs.dev = dev;
+
+ metacookerror(ep, name);
+ return (-1);
+}
+
+/*
+ * MDDB error
+ */
+int
+mdmddberror(
+ md_error_t *ep,
+ md_mddb_errno_t errnum,
+ minor_t mnum,
+ set_t setno,
+ size_t size,
+ char *name
+)
+{
+ md_mddb_error_t *ip = &ep->info.md_error_info_t_u.mddb_error;
+
+ mdclrerror(ep);
+ ep->info.errclass = MDEC_MDDB;
+ ip->errnum = errnum;
+ ip->mnum = mnum;
+ ip->setno = setno;
+ ip->size = size;
+
+ metacookerror(ep, name);
+ return (-1);
+}
+
+/*
+ * metadevice diskset (ds) error
+ */
+int
+mddserror(
+ md_error_t *ep,
+ md_ds_errno_t errnum,
+ set_t setno,
+ char *node,
+ char *drive,
+ char *name
+)
+{
+ md_ds_error_t *ip = &ep->info.md_error_info_t_u.ds_error;
+
+ mdclrerror(ep);
+ ep->info.errclass = MDEC_DS;
+ ip->errnum = errnum;
+ ip->setno = setno;
+ ip->node = ((node != NULL) ? Strdup(node) : NULL);
+ ip->drive = ((drive != NULL) ? Strdup(drive) : NULL);
+
+ metacookerror(ep, name);
+ return (-1);
+}
+
+/*
+ * clear/attach extra context information
+ */
+void
+mderrorextra(
+ md_error_t *ep,
+ char *extra
+)
+{
+ if (ep->extra != NULL)
+ Free(ep->extra);
+ if (extra != NULL)
+ ep->extra = Strdup(extra);
+ else
+ ep->extra = NULL;
+}
+
+/*
+ * steal (copy) an error code safely
+ */
+int
+mdstealerror(
+ md_error_t *to,
+ md_error_t *from
+)
+{
+ mdclrerror(to);
+ *to = *from;
+ (void) memset(from, '\0', sizeof (*from));
+ return (-1);
+}
+
+/*
+ * do an ioctl, cook the error, and return status
+ */
+int
+metaioctl(
+ int cmd,
+ void *data,
+ md_error_t *ep,
+ char *name
+)
+{
+ int fd;
+
+ /* open admin device */
+ if ((fd = open_admin(ep)) < 0)
+ return (-1);
+
+ /* do ioctl */
+ mdclrerror(ep);
+ if (ioctl(fd, cmd, data) != 0) {
+ return (mdsyserror(ep, errno, name));
+ } else if (! mdisok(ep)) {
+ metacookerror(ep, name);
+ return (-1);
+ }
+
+ /* return success */
+ return (0);
+}
+
+/*
+ * print void class errors
+ */
+static char *
+void_to_str(
+ md_error_t *ep,
+ char *buf,
+ size_t size
+)
+{
+ md_void_error_t *ip = &ep->info.md_error_info_t_u.void_error;
+ char *p = buf + strlen(buf);
+ size_t psize = size - strlen(buf);
+
+ switch (ip->errnum) {
+ case MDE_NONE:
+ (void) snprintf(p, psize,
+ dgettext(TEXT_DOMAIN, "no error"));
+ break;
+ case MDE_UNIT_NOT_FOUND:
+ (void) snprintf(p, psize,
+ dgettext(TEXT_DOMAIN, "unit not found"));
+ break;
+ case MDE_DUPDRIVE:
+ (void) snprintf(p, psize,
+ dgettext(TEXT_DOMAIN, "drive specified more than once"));
+ break;
+ case MDE_INVAL_HSOP:
+ (void) snprintf(p, psize,
+ dgettext(TEXT_DOMAIN, "illegal hot spare operation"));
+ break;
+ case MDE_NO_SET:
+ (void) snprintf(p, psize,
+ dgettext(TEXT_DOMAIN, "no such set"));
+ break;
+ case MDE_SET_DIFF:
+ (void) snprintf(p, psize,
+ dgettext(TEXT_DOMAIN, "set name is inconsistent"));
+ break;
+ case MDE_BAD_RD_OPT:
+ (void) snprintf(p, psize,
+ dgettext(TEXT_DOMAIN, "invalid read option"));
+ break;
+ case MDE_BAD_WR_OPT:
+ (void) snprintf(p, psize,
+ dgettext(TEXT_DOMAIN, "invalid write option"));
+ break;
+ case MDE_BAD_PASS_NUM:
+ (void) snprintf(p, psize,
+ dgettext(TEXT_DOMAIN, "invalid pass number"));
+ break;
+ case MDE_BAD_RESYNC_OPT:
+ (void) snprintf(p, psize,
+ dgettext(TEXT_DOMAIN, "invalid resync option"));
+ break;
+ case MDE_BAD_INTERLACE:
+ (void) snprintf(p, psize,
+ dgettext(TEXT_DOMAIN, "invalid interlace"));
+ break;
+ case MDE_NO_HSPS:
+ (void) snprintf(p, psize,
+ dgettext(TEXT_DOMAIN, "no hotspare pools found"));
+ break;
+ case MDE_NOTENOUGH_DB:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "must have at least 1 database (-f overrides)"));
+ break;
+ case MDE_DELDB_NOTALLOWED:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "cannot delete the last database replica in the diskset"));
+ break;
+ case MDE_DEL_VALIDDB_NOTALLOWED:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "Deleting specified valid replicas results in stale "
+ "state database. Configuration changes with stale "
+ "database result in panic(-f overrides)"));
+ break;
+ case MDE_SYSTEM_FILE:
+ (void) snprintf(p, psize,
+ dgettext(TEXT_DOMAIN, "error in system file"));
+ break;
+ case MDE_MDDB_FILE:
+ (void) snprintf(p, psize,
+ dgettext(TEXT_DOMAIN, "error in mddb.cf file"));
+ break;
+ case MDE_MDDB_CKSUM:
+ (void) snprintf(p, psize,
+ dgettext(TEXT_DOMAIN, "checksum error in mddb.cf file"));
+ break;
+ case MDE_VFSTAB_FILE:
+ (void) snprintf(p, psize,
+ dgettext(TEXT_DOMAIN, "error in vfstab file"));
+ break;
+ case MDE_NOSLICE:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "invalid slice number for drive name"));
+ break;
+ case MDE_SYNTAX:
+ (void) snprintf(p, psize,
+ dgettext(TEXT_DOMAIN, "syntax error"));
+ break;
+ case MDE_OPTION:
+ (void) snprintf(p, psize,
+ dgettext(TEXT_DOMAIN, "illegal option"));
+ break;
+ case MDE_TAKE_OWN:
+ (void) snprintf(p, psize,
+ dgettext(TEXT_DOMAIN, "failed to reserve any drives"));
+ break;
+ case MDE_NOT_DRIVENAME:
+ (void) snprintf(p, psize,
+ dgettext(TEXT_DOMAIN, "not a valid drive name"));
+ break;
+ case MDE_RESERVED:
+ (void) snprintf(p, psize,
+ dgettext(TEXT_DOMAIN, "reserved by another host"));
+ break;
+ case MDE_DVERSION:
+ (void) snprintf(p, psize,
+ dgettext(TEXT_DOMAIN, "driver version mismatch"));
+ break;
+ case MDE_MVERSION:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "metadevice state database version mismatch"));
+ break;
+ case MDE_TESTERROR:
+ (void) snprintf(p, psize,
+ dgettext(TEXT_DOMAIN, "TEST ERROR MESSAGE"));
+ break;
+ case MDE_BAD_ORIG_NCOL:
+ (void) snprintf(p, psize,
+ dgettext(TEXT_DOMAIN, "invalid column count"));
+ break;
+ case MDE_RAID_INVALID:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "devices were not RAIDed previously or "
+ "are specified in the wrong order"));
+ break;
+ case MDE_MED_ERROR:
+ break;
+ case MDE_TOOMANYMED:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "too many mediator hosts requested"));
+ break;
+ case MDE_NOMED:
+ (void) snprintf(p, psize,
+ dgettext(TEXT_DOMAIN, "no mediator hosts found"));
+ break;
+ case MDE_ONLYNODENAME:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "only the nodename of a host is required for deletes"));
+ break;
+ case MDE_RAID_BAD_PW_CNT:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "simultaneous writes out of range"));
+ break;
+ case MDE_DEVID_TOOBIG:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "relocation information size is greater than reported"));
+ break;
+ case MDE_NOPERM:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "Permission denied. You must have root privilege "
+ "to execute this command."));
+ break;
+ case MDE_NODEVID:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "Device relocation information not available "
+ "for this device"));
+ break;
+ case MDE_NOROOT:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "no root filesystem in /etc/mnttab"));
+ break;
+ case MDE_EOF_TRANS:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ MD_EOF_TRANS_MSG));
+ break;
+ case MDE_NOT_MN:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "option only valid within a multi-owner set"));
+ break;
+ case MDE_ABR_SET:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "Invalid command for mirror with ABR set"));
+ break;
+ case MDE_INVAL_MNOP:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "Invalid operation on multi-owner set"));
+ break;
+ case MDE_MNSET_NOTRANS:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "Trans metadevice not supported on multi-owner set"));
+ break;
+ case MDE_MNSET_NORAID:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "RAID-5 metadevice not supported on multi-owner set"));
+ break;
+ case MDE_FORCE_DEL_ALL_DRV:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "Must specify -f option to delete all drives from set"));
+ break;
+ case MDE_STRIPE_TRUNC_SINGLE:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "The necessary rounding would result in data loss. "
+ "You can avoid this by concatenating additional devices "
+ "totaling at least %s blocks, or by increasing the size "
+ "of the specified component by exactly %s blocks."),
+ ep->extra, ep->extra);
+ break;
+ case MDE_STRIPE_TRUNC_MULTIPLE:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "The necessary rounding would result in data loss. "
+ "You can avoid this by concatenating additional devices "
+ "totaling at least %s blocks."), ep->extra);
+ break;
+ case MDE_SMF_FAIL:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "failed to enable/disable SVM service"));
+ break;
+ case MDE_SMF_NO_SERVICE:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "service(s) not online in SMF"));
+ break;
+ default:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "unknown void error code %d"), ip->errnum);
+ break;
+ }
+
+ return (buf);
+}
+
+/*
+ * print sys class errors
+ */
+static char *
+sys_to_str(
+ md_error_t *ep,
+ char *buf,
+ size_t size
+)
+{
+ md_sys_error_t *ip = &ep->info.md_error_info_t_u.sys_error;
+ char *emsg;
+ char *p = buf + strlen(buf);
+ size_t psize = size - strlen(buf);
+
+ if ((emsg = strerror(ip->errnum)) == NULL) {
+ (void) snprintf(p, psize,
+ dgettext(TEXT_DOMAIN, "unknown errno %d out of range"),
+ ip->errnum);
+ } else {
+ (void) snprintf(p, psize, "%s", emsg);
+ }
+
+ return (buf);
+}
+
+/*
+ * print RPC class errors
+ */
+static char *
+rpc_to_str(
+ md_error_t *ep,
+ char *buf,
+ size_t size
+)
+{
+ md_rpc_error_t *ip = &ep->info.md_error_info_t_u.rpc_error;
+ char *p = buf + strlen(buf);
+ size_t psize = size - strlen(buf);
+
+ (void) snprintf(p, psize, "%s", clnt_sperrno(ip->errnum));
+ return (buf);
+}
+
+/*
+ * print dev class errors
+ */
+static char *
+dev_to_str(
+ md_error_t *ep,
+ char *buf,
+ size_t size
+)
+{
+ md_dev_error_t *ip = &ep->info.md_error_info_t_u.dev_error;
+ char *p = buf + strlen(buf);
+ size_t psize = size - strlen(buf);
+
+ switch (ip->errnum) {
+ case MDE_INVAL_HS:
+ (void) snprintf(p, psize,
+ dgettext(TEXT_DOMAIN, "hotspare doesn't exist"));
+ break;
+ case MDE_FIX_INVAL_STATE:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "cannot enable hotspared device"));
+ break;
+ case MDE_FIX_INVAL_HS_STATE:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "hotspare isn't broken, can't enable"));
+ break;
+ case MDE_NOT_META:
+ (void) snprintf(p, psize,
+ dgettext(TEXT_DOMAIN, "not a metadevice"));
+ break;
+ case MDE_IS_DUMP:
+ (void) snprintf(p, psize,
+ dgettext(TEXT_DOMAIN, "is a dump device"));
+ break;
+ case MDE_IS_META:
+ (void) snprintf(p, psize,
+ dgettext(TEXT_DOMAIN, "is a metadevice"));
+ break;
+ case MDE_IS_SWAPPED:
+ (void) snprintf(p, psize,
+ dgettext(TEXT_DOMAIN, "is swapped on"));
+ break;
+ case MDE_NAME_SPACE:
+ (void) snprintf(p, psize,
+ dgettext(TEXT_DOMAIN, "namespace error"));
+ break;
+ case MDE_IN_SHARED_SET:
+ (void) snprintf(p, psize,
+ dgettext(TEXT_DOMAIN, "device in shared set"));
+ break;
+ case MDE_NOT_IN_SET:
+ (void) snprintf(p, psize,
+ dgettext(TEXT_DOMAIN, "device not in set"));
+ break;
+ case MDE_NOT_DISK:
+ (void) snprintf(p, psize,
+ dgettext(TEXT_DOMAIN, "not a disk device"));
+ break;
+ case MDE_CANT_CONFIRM:
+ (void) snprintf(p, psize,
+ dgettext(TEXT_DOMAIN, "can't confirm device"));
+ break;
+ case MDE_INVALID_PART:
+ (void) snprintf(p, psize,
+ dgettext(TEXT_DOMAIN, "invalid partition"));
+ break;
+ case MDE_HAS_MDDB:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "has a metadevice database replica"));
+ break;
+ case MDE_NO_DB:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "no metadevice database replica on device"));
+ break;
+ case MDE_CANTVERIFY_VTOC:
+ (void) snprintf(p, psize,
+ dgettext(TEXT_DOMAIN, "unable to verify the vtoc"));
+ break;
+ case MDE_NOT_LOCAL:
+ (void) snprintf(p, psize,
+ dgettext(TEXT_DOMAIN, "not in local set"));
+ break;
+ case MDE_DEVICES_NAME:
+ (void) snprintf(p, psize,
+ dgettext(TEXT_DOMAIN, "can't parse /devices name"));
+ break;
+ case MDE_REPCOMP_INVAL:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "replica slice is not usable as a metadevice component"));
+ break;
+ case MDE_REPCOMP_ONLY:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "only replica slice is usable for a diskset "
+ "database replica"));
+ break;
+ case MDE_INV_ROOT:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "invalid root device for this operation"));
+ break;
+ case MDE_MULTNM:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "multiple entries for device in Solaris Volume Manager "
+ "configuration"));
+ break;
+ case MDE_TOO_MANY_PARTS:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "Disks with more than %d partitions are not supported "
+ "in Solaris Volume Manager"), MD_MAX_PARTS);
+ break;
+ case MDE_REPART_REPLICA:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "cannot repartition a slice with an existing replica"));
+ break;
+ default:
+ (void) snprintf(p, psize,
+ dgettext(TEXT_DOMAIN, "unknown dev error code %d"),
+ ip->errnum);
+ break;
+ }
+
+ return (buf);
+}
+
+/*
+ * print overlap class errors
+ */
+static char *
+overlap_to_str(
+ md_error_t *ep,
+ char *buf,
+ size_t size
+)
+{
+ md_overlap_error_t *ip =
+ &ep->info.md_error_info_t_u.overlap_error;
+ char *p = buf + strlen(buf);
+ size_t psize = size - strlen(buf);
+
+ switch (ip->errnum) {
+ case MDE_OVERLAP_MOUNTED:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "overlaps with %s which is mounted as \'%s\'"),
+ ip->overlap, ip->where);
+ break;
+ case MDE_OVERLAP_SWAP:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "overlaps with %s which is a swap device"), ip->overlap);
+ break;
+ case MDE_OVERLAP_DUMP:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "overlaps with %s which is the dump device"), ip->overlap);
+ break;
+ default:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "unknown overlap error code %d"), ip->errnum);
+ break;
+ }
+
+ return (buf);
+}
+
+/*
+ * print use class errors
+ */
+static char *
+use_to_str(
+ md_error_t *ep,
+ char *buf,
+ size_t size
+)
+{
+ md_use_error_t *ip = &ep->info.md_error_info_t_u.use_error;
+ char *p = buf + strlen(buf);
+ size_t psize = size - strlen(buf);
+
+ switch (ip->errnum) {
+ case MDE_IS_MOUNTED:
+ (void) snprintf(p, psize,
+ dgettext(TEXT_DOMAIN, "is mounted on %s"),
+ ip->where);
+ break;
+ case MDE_ALREADY:
+ /*
+ * when the object of the error (existing device that
+ * would being used by SVM) is the metadb then it is necessary
+ * to explicitly specify the string in the error message so
+ * that it can be successfully localized for the Asian locales.
+ */
+ if (strcmp(ip->where, MDB_STR) != 0) {
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "has appeared more than once in the "
+ "specification of %s"), ip->where);
+ } else {
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "has appeared more than once in the "
+ "specification of " MDB_STR));
+ }
+ break;
+ case MDE_OVERLAP:
+ /*
+ * when the object of the error (existing device that
+ * would overlap) is the metadb then it is necessary
+ * to explicitly specify the string in the error message so
+ * that it can be successfully localized for the Asian locales.
+ */
+ if (strcmp(ip->where, MDB_STR) != 0) {
+ (void) snprintf(p, psize,
+ dgettext(TEXT_DOMAIN, "overlaps with device in %s"),
+ ip->where);
+ } else {
+ (void) snprintf(p, psize,
+ dgettext(TEXT_DOMAIN, "overlaps with device in "
+ MDB_STR));
+ }
+ break;
+ default:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "unknown dev error code %d"), ip->errnum);
+ break;
+ }
+
+ return (buf);
+}
+
+/*
+ * print md class errors
+ */
+static char *
+md_to_str(
+ md_error_t *ep,
+ char *buf,
+ size_t size
+)
+{
+ md_md_error_t *ip = &ep->info.md_error_info_t_u.md_error;
+ char *p = buf + strlen(buf);
+ size_t psize = size - strlen(buf);
+
+ switch (ip->errnum) {
+ case MDE_INVAL_UNIT:
+ (void) snprintf(p, psize,
+ dgettext(TEXT_DOMAIN, "invalid unit"));
+ break;
+ case MDE_UNIT_NOT_SETUP:
+ (void) snprintf(p, psize,
+ dgettext(TEXT_DOMAIN, "unit not set up"));
+ break;
+ case MDE_UNIT_ALREADY_SETUP:
+ (void) snprintf(p, psize,
+ dgettext(TEXT_DOMAIN, "unit already set up"));
+ break;
+ case MDE_NOT_MM:
+ (void) snprintf(p, psize,
+ dgettext(TEXT_DOMAIN, "unit is not a mirror"));
+ break;
+ case MDE_IS_SM:
+ (void) snprintf(p, psize,
+ dgettext(TEXT_DOMAIN, "illegal to clear submirror"));
+ break;
+ case MDE_IS_OPEN:
+ (void) snprintf(p, psize,
+ dgettext(TEXT_DOMAIN, "metadevice is open"));
+ break;
+ case MDE_C_WITH_INVAL_SM:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "attempted to clear mirror with submirror(s) "
+ "in invalid state"));
+ break;
+ case MDE_RESYNC_ACTIVE:
+ (void) snprintf(p, psize,
+ dgettext(TEXT_DOMAIN, "resync in progress"));
+ break;
+ case MDE_LAST_SM_RE:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "attempt to replace a component on the last "
+ "running submirror"));
+ break;
+ case MDE_MIRROR_FULL:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "mirror has maximum number of submirrors"));
+ break;
+ case MDE_IN_UNAVAIL_STATE:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "component is in unavailable state; run 'metastat -i'"));
+ break;
+ case MDE_IN_USE:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "metadevice in use"));
+ break;
+ case MDE_SM_TOO_SMALL:
+ (void) snprintf(p, psize,
+ dgettext(TEXT_DOMAIN, "submirror too small to attach"));
+ break;
+ case MDE_NO_LABELED_SM:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "can't attach labeled submirror to an unlabeled mirror"));
+ break;
+ case MDE_SM_OPEN_ERR:
+ (void) snprintf(p, psize,
+ dgettext(TEXT_DOMAIN, "submirror open error"));
+ break;
+ case MDE_CANT_FIND_SM:
+ (void) snprintf(p, psize,
+ dgettext(TEXT_DOMAIN, "can't find submirror in mirror"));
+ break;
+ case MDE_LAST_SM:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "attempt to detach last running submirror"));
+ break;
+ case MDE_NO_READABLE_SM:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "operation would result in no readable submirrors"));
+ break;
+ case MDE_SM_FAILED_COMPS:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "attempt an operation on a submirror "
+ "that has erred components"));
+ break;
+ case MDE_ILLEGAL_SM_STATE:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "attempt operation on a submirror in illegal state"));
+ break;
+ case MDE_RR_ALLOC_ERROR:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "attach failed, unable to allocate new resync info"));
+ break;
+ case MDE_MIRROR_OPEN_FAILURE:
+ (void) snprintf(p, psize,
+ dgettext(TEXT_DOMAIN, "insufficient devices to open"));
+ break;
+ case MDE_MIRROR_THREAD_FAILURE:
+ (void) snprintf(p, psize,
+ dgettext(TEXT_DOMAIN, "mirror thread failure"));
+ break;
+ case MDE_GROW_DELAYED:
+ (void) snprintf(p, psize,
+ dgettext(TEXT_DOMAIN, "growing of metadevice delayed"));
+ break;
+ case MDE_NOT_MT:
+ (void) snprintf(p, psize,
+ dgettext(TEXT_DOMAIN, "unit is not a trans"));
+ break;
+ case MDE_HS_IN_USE:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "can't modify hot spare pool, hot spare in use"));
+ break;
+ case MDE_HAS_LOG:
+ (void) snprintf(p, psize,
+ dgettext(TEXT_DOMAIN, "already has log"));
+ break;
+ case MDE_UNKNOWN_TYPE:
+ (void) snprintf(p, psize,
+ dgettext(TEXT_DOMAIN, "unknown metadevice type"));
+ break;
+ case MDE_NOT_STRIPE:
+ (void) snprintf(p, psize,
+ dgettext(TEXT_DOMAIN, "unit is not a concat/stripe"));
+ break;
+ case MDE_NOT_RAID:
+ (void) snprintf(p, psize,
+ dgettext(TEXT_DOMAIN, "unit is not a RAID"));
+ break;
+ case MDE_NROWS:
+ (void) snprintf(p, psize,
+ dgettext(TEXT_DOMAIN, "not enough stripes specified"));
+ break;
+ case MDE_NCOMPS:
+ (void) snprintf(p, psize,
+ dgettext(TEXT_DOMAIN, "not enough components specified"));
+ break;
+ case MDE_NSUBMIRS:
+ (void) snprintf(p, psize,
+ dgettext(TEXT_DOMAIN, "not enough submirrors specified"));
+ break;
+ case MDE_BAD_STRIPE:
+ (void) snprintf(p, psize,
+ dgettext(TEXT_DOMAIN, "invalid stripe configuration"));
+ break;
+ case MDE_BAD_MIRROR:
+ (void) snprintf(p, psize,
+ dgettext(TEXT_DOMAIN, "invalid mirror configuration"));
+ break;
+ case MDE_BAD_TRANS:
+ (void) snprintf(p, psize,
+ dgettext(TEXT_DOMAIN, "invalid trans configuration"));
+ break;
+ case MDE_BAD_RAID:
+ (void) snprintf(p, psize,
+ dgettext(TEXT_DOMAIN, "invalid RAID configuration"));
+ break;
+ case MDE_RAID_OPEN_FAILURE:
+ (void) snprintf(p, psize,
+ dgettext(TEXT_DOMAIN, "resync unable to open RAID unit"));
+ break;
+ case MDE_RAID_THREAD_FAILURE:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "attempt to start resync thread failed"));
+ break;
+ case MDE_RAID_NEED_FORCE:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "operation requires -f (force) flag"));
+ break;
+ case MDE_NO_LOG:
+ (void) snprintf(p, psize,
+ dgettext(TEXT_DOMAIN, "log has already been detached"));
+ break;
+ case MDE_RAID_DOI:
+ (void) snprintf(p, psize,
+ dgettext(TEXT_DOMAIN, "only valid action is metaclear"));
+ break;
+ case MDE_RAID_LAST_ERRED:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "in Last Erred state, "
+ "errored components must be replaced"));
+ break;
+ case MDE_RAID_NOT_OKAY:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "all components must be Okay to perform this operation"));
+ break;
+ case MDE_RENAME_BUSY:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "metadevice is temporarily too busy for renames"));
+ break;
+ case MDE_RENAME_SOURCE_BAD:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "source metadevice is not able to be renamed"));
+ break;
+ case MDE_RENAME_TARGET_BAD:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "target metadevice is not able to be renamed"));
+ break;
+ case MDE_RENAME_TARGET_UNRELATED:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "target metadevice is not related to source metadevice"));
+ break;
+ case MDE_RENAME_CONFIG_ERROR:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "metadevice driver configuration error; "
+ "rename can't occur"));
+ break;
+ case MDE_RENAME_ORDER:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "units may not be renamed in that order"));
+ break;
+ case MDE_RECOVER_FAILED:
+ (void) snprintf(p, psize,
+ dgettext(TEXT_DOMAIN, "recovery failed"));
+ break;
+ case MDE_SP_NOSPACE:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "not enough space available for request"));
+ break;
+ case MDE_SP_BADWMREAD:
+ (void) snprintf(p, psize,
+ dgettext(TEXT_DOMAIN, "error reading extent header"));
+ break;
+ case MDE_SP_BADWMWRITE:
+ (void) snprintf(p, psize,
+ dgettext(TEXT_DOMAIN, "error writing extent header"));
+ break;
+ case MDE_SP_BADWMMAGIC:
+ (void) snprintf(p, psize,
+ dgettext(TEXT_DOMAIN, "bad magic number in extent header"));
+ break;
+ case MDE_SP_BADWMCRC:
+ (void) snprintf(p, psize,
+ dgettext(TEXT_DOMAIN, "bad checksum in extent header"));
+ break;
+ case MDE_NOT_SP:
+ (void) snprintf(p, psize,
+ dgettext(TEXT_DOMAIN, "unit is not a soft partition"));
+ break;
+ case MDE_SP_OVERLAP:
+ (void) snprintf(p, psize,
+ dgettext(TEXT_DOMAIN, "overlapping extents specified"));
+ break;
+ case MDE_SP_BAD_LENGTH:
+ (void) snprintf(p, psize,
+ dgettext(TEXT_DOMAIN, "bad length specified"));
+ break;
+ case MDE_SP_NOSP:
+ (void) snprintf(p, psize,
+ dgettext(TEXT_DOMAIN, "no soft partitions on this device"));
+ break;
+ case MDE_UNIT_TOO_LARGE:
+ (void) snprintf(p, psize,
+ dgettext(TEXT_DOMAIN, "Volume size cannot exceed 1 TByte"));
+ break;
+ case MDE_LOG_TOO_LARGE:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "Trans log size must be less than 1 TByte"));
+ break;
+ default:
+ (void) snprintf(p, psize,
+ dgettext(TEXT_DOMAIN, "unknown md error code %d"),
+ ip->errnum);
+ break;
+ }
+
+ return (buf);
+}
+
+/*
+ * print comp class errors
+ */
+static char *
+comp_to_str(
+ md_error_t *ep,
+ char *buf,
+ size_t size
+)
+{
+ md_comp_error_t *ip = &ep->info.md_error_info_t_u.comp_error;
+ char *p = buf + strlen(buf);
+ size_t psize = size - strlen(buf);
+
+ switch (ip->errnum) {
+ case MDE_CANT_FIND_COMP:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "can't find component in unit"));
+ break;
+ case MDE_REPL_INVAL_STATE:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "component in invalid state to replace - "
+ "Replace \"Maintenance\" components first"));
+ break;
+ case MDE_COMP_TOO_SMALL:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "replace failure, new component is too small"));
+ break;
+ case MDE_COMP_OPEN_ERR:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "unable to open concat/stripe component"));
+ break;
+ case MDE_RAID_COMP_ERRED:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "must replace errored component first"));
+ break;
+ case MDE_MAXIO:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "maxtransfer is too small"));
+ break;
+ case MDE_SP_COMP_OPEN_ERR:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "error opening device under soft partition. Check"
+ " device status, then use metadevadm(1M)."));
+ break;
+ default:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "unknown comp error code %d"), ip->errnum);
+ break;
+ }
+
+ return (buf);
+}
+
+/*
+ * print hsp class errors
+ */
+static char *
+hsp_to_str(
+ md_error_t *ep,
+ char *buf,
+ size_t size
+)
+{
+ md_hsp_error_t *ip = &ep->info.md_error_info_t_u.hsp_error;
+ char *p = buf + strlen(buf);
+ size_t psize = size - strlen(buf);
+
+ switch (ip->errnum) {
+ case MDE_HSP_CREATE_FAILURE:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "hotspare pool database create failure"));
+ break;
+ case MDE_HSP_IN_USE:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "hotspare pool in use"));
+ break;
+ case MDE_INVAL_HSP:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "invalid hotspare pool"));
+ break;
+ case MDE_HSP_BUSY:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "hotspare pool is busy"));
+ break;
+ case MDE_HSP_REF:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "hotspare pool is referenced"));
+ break;
+ case MDE_HSP_ALREADY_SETUP:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "hotspare pool is already setup"));
+ break;
+ case MDE_BAD_HSP:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "invalid hotspare pool configuration"));
+ break;
+ case MDE_HSP_UNIT_TOO_LARGE:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "units in the hotspare pool cannot exceed 1 TByte"));
+ break;
+ default:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "unknown hsp error code %d"), ip->errnum);
+ break;
+ }
+
+ return (buf);
+}
+
+/*
+ * print hs class errors
+ */
+static char *
+hs_to_str(
+ md_error_t *ep,
+ char *buf,
+ size_t size
+)
+{
+ md_hs_error_t *ip = &ep->info.md_error_info_t_u.hs_error;
+ char *p = buf + strlen(buf);
+ size_t psize = size - strlen(buf);
+
+ switch (ip->errnum) {
+ case MDE_HS_RESVD:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "hotspare is in use"));
+ break;
+ case MDE_HS_CREATE_FAILURE:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "hotspare database create failure"));
+ break;
+ case MDE_HS_INUSE:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "add or replace failed, hot spare is already in use"));
+ break;
+ case MDE_HS_UNIT_TOO_LARGE:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "hotspare size cannot exceed 1 TByte"));
+ break;
+ default:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "unknown hs error code %d"), ip->errnum);
+ break;
+ }
+
+ return (buf);
+}
+
+/*
+ * print mddb class errors
+ */
+static char *
+mddb_to_str(
+ md_error_t *ep,
+ char *buf,
+ size_t size
+)
+{
+ md_mddb_error_t *ip = &ep->info.md_error_info_t_u.mddb_error;
+ char *p = buf + strlen(buf);
+ size_t psize = size - strlen(buf);
+
+ switch (ip->errnum) {
+ case MDE_TOOMANY_REPLICAS:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "%d metadevice database replicas is too many; the maximum is %d"),
+ ip->size, MDDB_NLB);
+ break;
+ case MDE_REPLICA_TOOSMALL:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "device size %d is too small for metadevice database replica"),
+ ip->size);
+ break;
+ case MDE_NOTVERIFIED:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "data not returned correctly from disk"));
+ break;
+ case MDE_DB_INVALID:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "invalid argument"));
+ break;
+ case MDE_DB_EXISTS:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "metadevice database replica exists on device"));
+ break;
+ case MDE_DB_MASTER:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "has bad master block on device"));
+ break;
+ case MDE_DB_TOOSMALL:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "device is too small"));
+ break;
+ case MDE_DB_NORECORD:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "no such metadevice database record"));
+ break;
+ case MDE_DB_NOSPACE:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "metadevice database is full, can't create new records"));
+ break;
+ case MDE_DB_NOTNOW:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "metadevice database has too few replicas, for "
+ "metadevice database operation"));
+ break;
+ case MDE_DB_NODB:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "there are no existing databases"));
+ break;
+ case MDE_DB_NOTOWNER:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "not owner of metadevice database"));
+ break;
+ case MDE_DB_STALE:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "stale databases"));
+ break;
+ case MDE_DB_TOOFEW:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "not enough databases"));
+ break;
+ case MDE_DB_TAGDATA:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "tagged data detected, user intervention required"));
+ break;
+ case MDE_DB_ACCOK:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "50% replicas & 50% mediator hosts available, "
+ "user intervention required"));
+ break;
+ case MDE_DB_NTAGDATA:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "no tagged data available or only one tag found"));
+ break;
+ case MDE_DB_ACCNOTOK:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "50% replicas & 50% mediator hosts not available"));
+ break;
+ case MDE_DB_NOLOCBLK:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "no valid locator blocks were found"));
+ break;
+ case MDE_DB_NOLOCNMS:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "no valid locator name information was found"));
+ break;
+ case MDE_DB_NODIRBLK:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "no valid directory blocks were found"));
+ break;
+ case MDE_DB_NOTAGREC:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "no tag record was allocated, so data "
+ "tagging is disabled"));
+ break;
+ case MDE_DB_NOTAG:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "no tag records exist or no matching tag was found"));
+ break;
+ case MDE_DB_BLKRANGE:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "logical block number %d out of range"), ip->size);
+ break;
+ default:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "unknown mddb error code %d"), ip->errnum);
+ break;
+ }
+
+ return (buf);
+}
+
+/*
+ * print diskset (ds) class errors
+ */
+static char *
+ds_to_str(
+ md_error_t *ep,
+ char *buf,
+ size_t size
+)
+{
+ md_ds_error_t *ip = &ep->info.md_error_info_t_u.ds_error;
+ char *p = buf + strlen(buf);
+ size_t psize = size - strlen(buf);
+
+ switch (ip->errnum) {
+ case MDE_DS_DUPHOST:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "host %s is specified more than once"), ip->node);
+ break;
+ case MDE_DS_NOTNODENAME:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "\"%s\" is not a nodename, but a network name"), ip->node);
+ break;
+ case MDE_DS_SELFNOTIN:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "nodename of host %s creating the set must be included"),
+ ip->node);
+ break;
+ case MDE_DS_NODEHASSET:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "host %s already has set"), ip->node);
+ break;
+ case MDE_DS_NODENOSET:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "host %s does not have set"), ip->node);
+ break;
+ case MDE_DS_NOOWNER:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "must be owner of the set for this command"));
+ break;
+ case MDE_DS_NOTOWNER:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "only the current owner %s may operate on this set"),
+ ip->node);
+ break;
+ case MDE_DS_NODEISNOTOWNER:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "host %s is not the owner"), ip->node);
+ break;
+ case MDE_DS_NODEINSET:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "host %s is already in the set"), ip->node);
+ break;
+ case MDE_DS_NODENOTINSET:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "host %s is not in the set"), ip->node);
+ break;
+ case MDE_DS_SETNUMBUSY:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "host %s already has a set numbered %ld"),
+ ip->node, ip->setno);
+ break;
+ case MDE_DS_SETNUMNOTAVAIL:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "no available set numbers"));
+ break;
+ case MDE_DS_SETNAMEBUSY:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "set name is in-use or invalid on host %s"), ip->node);
+ break;
+ case MDE_DS_DRIVENOTCOMMON:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "drive %s is not common with host %s"),
+ ip->drive, ip->node);
+ break;
+ case MDE_DS_DRIVEINSET:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "drive %s is in set %s"), ip->drive, ip->node);
+ break;
+ case MDE_DS_DRIVENOTINSET:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "drive %s is not in set"), ip->drive);
+ break;
+ case MDE_DS_DRIVEINUSE:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "drive %s is in use"), ip->drive);
+ break;
+ case MDE_DS_DUPDRIVE:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "drive %s is specified more than once"), ip->drive);
+ break;
+ case MDE_DS_INVALIDSETNAME:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "set name contains invalid characters"));
+ break;
+ case MDE_DS_HASDRIVES:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "unable to delete set, it still has drives"));
+ break;
+ case MDE_DS_SIDENUMNOTAVAIL:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "maximum number of nodenames exceeded"));
+ break;
+ case MDE_DS_SETNAMETOOLONG:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "set name is too long"));
+ break;
+ case MDE_DS_NODENAMETOOLONG:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "host name %s is too long"), ip->node);
+ break;
+ case MDE_DS_OHACANTDELSELF:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+"administrator host %s deletion disallowed in one host admin mode"),
+ ip->node);
+ break;
+ case MDE_DS_HOSTNOSIDE:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "side information missing for host %s"), ip->node);
+ break;
+ case MDE_DS_SETLOCKED:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "host %s is modifying set - try later or restart rpc.metad"),
+ ip->drive);
+ break;
+ case MDE_DS_ULKSBADKEY:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "set unlock failed - bad key"));
+ break;
+ case MDE_DS_LKSBADKEY:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "set lock failed - bad key"));
+ break;
+ case MDE_DS_WRITEWITHSULK:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "write operation attempted on set with set unlocked"));
+ break;
+ case MDE_DS_SETCLEANUP:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "set \"%s\" is out of date - cleaning up - take failed"),
+ ip->node);
+ break;
+ case MDE_DS_CANTDELSELF:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+"administrator host %s can't be deleted, other hosts still in set\n"
+"Use -f to override"), ip->node);
+ break;
+ case MDE_DS_HASMED:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "unable to delete set, it still has mediator hosts"));
+ break;
+ case MDE_DS_TOOMANYALIAS:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "%s causes there to be more aliases than allowed"),
+ ip->node);
+ break;
+ case MDE_DS_ISMED:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "%s is already a mediator host"), ip->node);
+ break;
+ case MDE_DS_ISNOTMED:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "%s is not a mediator host"), ip->node);
+ break;
+ case MDE_DS_INVALIDMEDNAME:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "mediator name \"%s\" contains invalid characters"),
+ ip->node);
+ break;
+ case MDE_DS_ALIASNOMATCH:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "mediator alias \"%s\" is not an alias for host "
+ "\"%s\""), ip->node, ip->drive);
+ break;
+ case MDE_DS_NOMEDONHOST:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "unable to contact %s on host \"%s\""),
+ MED_SERVNAME, ip->node);
+ break;
+ case MDE_DS_DRIVENOTONHOST:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "drive %s is not present on host %s"),
+ ip->drive, ip->node);
+ break;
+ case MDE_DS_CANTDELMASTER:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "master %s can't be deleted, other hosts still in set"),
+ ip->node);
+ break;
+ case MDE_DS_NOTINMEMBERLIST:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "node %s is not in membership list"),
+ ip->node);
+ break;
+ case MDE_DS_MNCANTDELSELF:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "host %s can't delete self from multi-owner set\n"
+ "while other hosts still in set"),
+ ip->node);
+ break;
+ case MDE_DS_RPCVERSMISMATCH:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "host %s does not support multi-owner diskset"),
+ ip->node);
+ break;
+ case MDE_DS_WITHDRAWMASTER:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "master host %s cannot withdraw from multi-owner diskset "
+ "when other owner nodes are still present in diskset"),
+ ip->node);
+ break;
+ case MDE_DS_CANTRESNARF:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "imported set could not be loaded"));
+ break;
+ case MDE_DS_INSUFQUORUM:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "insufficient replica quorum detected. Use "
+ "-f to force import of the set"));
+ break;
+ case MDE_DS_EXTENDEDNM:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "multiple namespace records detected"));
+ break;
+ case MDE_DS_PARTIALSET:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "partial diskset detected\n"
+ "Please refer to the Solaris Volume Manager documentation,"
+ "\nTroubleshooting section, at http://docs.sun.com or from"
+ "\nyour local copy"));
+ break;
+ case MDE_DS_COMMDCTL_SUSPEND_NYD:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "rpc.mdcommd on host %s is not yet drained during "
+ "suspend operation"),
+ ip->node);
+ break;
+ case MDE_DS_COMMDCTL_SUSPEND_FAIL:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "rpc.mdcommd on host %s failed suspend operation"),
+ ip->node);
+ break;
+ case MDE_DS_COMMDCTL_REINIT_FAIL:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "rpc.mdcommd on host %s failed reinitialization operation"),
+ ip->node);
+ break;
+ case MDE_DS_COMMDCTL_RESUME_FAIL:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "rpc.mdcommd on host %s failed resume operation"),
+ ip->node);
+ break;
+ case MDE_DS_NOTNOW_RECONFIG:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "command terminated, host %s starting reconfig cycle"),
+ ip->node);
+ break;
+ case MDE_DS_NOTNOW_CMD:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "metaset or metadb command already running on diskset "
+ "on host %s"), ip->node);
+ break;
+ case MDE_DS_COMMD_SEND_FAIL:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "rpc.mdcommd on host %s failed operation"),
+ ip->node);
+ break;
+ case MDE_DS_MASTER_ONLY:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "this command must be run on the master node of the set,"
+ " which is currently %s"), ip->node);
+ break;
+ case MDE_DS_SINGLEHOST:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "diskset is auto-take; cannot accept additional hosts"));
+ break;
+ case MDE_DS_AUTONOTSET:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "auto-take is not enabled on diskset"));
+ break;
+ case MDE_DS_INVALIDDEVID:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "Invalid device id on drive %s on host %s"), ip->drive,
+ ip->node);
+ break;
+ case MDE_DS_SETNOTIMP:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "Unable to import set on node %s"), ip->node);
+ break;
+ case MDE_DS_NOTSELFIDENTIFY:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "Drive %s won't be self identifying"), ip->drive);
+ break;
+ default:
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "unknown diskset error code %d"), ip->errnum);
+ break;
+ }
+
+ return (buf);
+}
+
+/*
+ * convert error to printable string
+ */
+static char *
+mde_to_str(
+ md_error_t *ep
+)
+{
+ static char buf[BUFSIZ];
+ size_t bufsz;
+
+ /* intialize buf */
+ buf[0] = '\0';
+ bufsz = sizeof (buf);
+
+ /* class specific */
+ switch (ep->info.errclass) {
+ case MDEC_VOID:
+ return (void_to_str(ep, buf, bufsz));
+ case MDEC_SYS:
+ return (sys_to_str(ep, buf, bufsz));
+ case MDEC_RPC:
+ return (rpc_to_str(ep, buf, bufsz));
+ case MDEC_DEV:
+ return (dev_to_str(ep, buf, bufsz));
+ case MDEC_USE:
+ return (use_to_str(ep, buf, bufsz));
+ case MDEC_MD:
+ return (md_to_str(ep, buf, bufsz));
+ case MDEC_COMP:
+ return (comp_to_str(ep, buf, bufsz));
+ case MDEC_HSP:
+ return (hsp_to_str(ep, buf, bufsz));
+ case MDEC_HS:
+ return (hs_to_str(ep, buf, bufsz));
+ case MDEC_MDDB:
+ return (mddb_to_str(ep, buf, bufsz));
+ case MDEC_DS:
+ return (ds_to_str(ep, buf, bufsz));
+ case MDEC_OVERLAP:
+ return (overlap_to_str(ep, buf, bufsz));
+ default:
+ (void) snprintf(buf, bufsz,
+ dgettext(TEXT_DOMAIN, "unknown error class %d"),
+ ep->info.errclass);
+ return (buf);
+ }
+}
+
+/*
+ * print log prefix
+ */
+void
+md_logpfx(
+ FILE *fp
+)
+{
+ time_t t;
+ struct tm *tm;
+ char buf[100];
+
+ if ((time(&t) != (time_t)-1) &&
+ ((tm = localtime(&t)) != NULL) &&
+ (strftime(buf, sizeof (buf), (char *)0, tm) < sizeof (buf))) {
+ (void) fprintf(fp, "%s: ", buf);
+ }
+ (void) fprintf(fp, "%s: ", myname);
+}
+
+/*
+ * varargs sperror()
+ */
+/*PRINTFLIKE2*/
+static char *
+mde_vsperror(
+ md_error_t *ep,
+ const char *fmt,
+ va_list ap
+)
+{
+ static char buf[BUFSIZ];
+ size_t bufsz = sizeof (buf);
+ char *p = buf;
+ char *host1 = "";
+ char *host2 = "";
+ char *extra1 = "";
+ char *extra2 = "";
+ char *name1 = "";
+ char *name2 = "";
+
+ /* get stuff */
+ if ((ep->host != NULL) && (*(ep->host) != '\0')) {
+ host1 = ep->host;
+ host2 = ": ";
+ }
+ if ((ep->extra != NULL) && (*(ep->extra) != '\0')) {
+ extra1 = ep->extra;
+ extra2 = ": ";
+ }
+ if ((ep->name != NULL) && (*(ep->name) != '\0')) {
+ name1 = ep->name;
+ name2 = ": ";
+ }
+
+ /* context */
+ (void) snprintf(p, bufsz, "%s%s%s%s%s%s",
+ host1, host2, extra1, extra2, name1, name2);
+ p = &buf[strlen(buf)];
+ bufsz -= strlen(buf);
+
+ /* user defined part */
+ if ((fmt != NULL) && (*fmt != '\0')) {
+ (void) vsnprintf(p, bufsz, fmt, ap);
+ p = &buf[strlen(buf)];
+ bufsz = sizeof (buf) - strlen(buf);
+ (void) snprintf(p, bufsz, ": ");
+ p = &buf[strlen(buf)];
+ bufsz = sizeof (buf) - strlen(buf);
+ }
+
+ /* error code */
+ (void) snprintf(p, bufsz, "%s\n", mde_to_str(ep));
+
+ /* return error message */
+ return (buf);
+}
+
+/*
+ * printf-like sperror()
+ */
+/*PRINTFLIKE2*/
+char *
+mde_sperror(
+ md_error_t *ep,
+ const char *fmt,
+ ...
+)
+{
+ va_list ap;
+ char *emsg;
+
+ va_start(ap, fmt);
+ emsg = mde_vsperror(ep, fmt, ap);
+ va_end(ap);
+ return (emsg);
+}
+
+/*
+ * printf-like perror()
+ */
+/*PRINTFLIKE2*/
+void
+mde_perror(
+ md_error_t *ep,
+ const char *fmt,
+ ...
+)
+{
+ va_list ap;
+ char *emsg;
+
+ /* get error message */
+ va_start(ap, fmt);
+ emsg = mde_vsperror(ep, fmt, ap);
+ va_end(ap);
+ assert((emsg != NULL) && (*emsg != '\0'));
+
+ /* stderr */
+ (void) fprintf(stderr, "%s: %s\n", myname, emsg);
+ (void) fflush(stderr);
+
+ /* metalog */
+ if (metalogfp != NULL) {
+ md_logpfx(metalogfp);
+ (void) fprintf(metalogfp, "%s\n", emsg);
+ (void) fflush(metalogfp);
+ (void) fsync(fileno(metalogfp));
+ }
+
+ /* syslog */
+ if (metasyslog) {
+ syslog(LOG_ERR, emsg);
+ }
+}
+
+/*
+ * printf-like perror()
+ */
+/*PRINTFLIKE1*/
+void
+md_perror(
+ const char *fmt,
+ ...
+)
+{
+ md_error_t status = mdnullerror;
+ va_list ap;
+ char *emsg;
+
+ /* get error message */
+ (void) mdsyserror(&status, errno, NULL);
+ va_start(ap, fmt);
+ emsg = mde_vsperror(&status, fmt, ap);
+ va_end(ap);
+ assert((emsg != NULL) && (*emsg != '\0'));
+ mdclrerror(&status);
+
+ /* stderr */
+ (void) fprintf(stderr, "%s: %s\n", myname, emsg);
+ (void) fflush(stderr);
+
+ /* metalog */
+ if (metalogfp != NULL) {
+ md_logpfx(metalogfp);
+ (void) fprintf(metalogfp, "%s\n", emsg);
+ (void) fflush(metalogfp);
+ (void) fsync(fileno(metalogfp));
+ }
+
+ /* syslog */
+ if (metasyslog) {
+ syslog(LOG_ERR, emsg);
+ }
+}
+
+/*
+ * printf-like log
+ */
+/*PRINTFLIKE1*/
+void
+md_eprintf(
+ const char *fmt,
+ ...
+)
+{
+ va_list ap;
+
+ /* begin */
+ va_start(ap, fmt);
+
+ /* stderr */
+ (void) fprintf(stderr, "%s: ", myname);
+ (void) vfprintf(stderr, fmt, ap);
+ (void) fflush(stderr);
+
+ /* metalog */
+ if (metalogfp != NULL) {
+ md_logpfx(metalogfp);
+ (void) vfprintf(metalogfp, fmt, ap);
+ (void) fflush(metalogfp);
+ (void) fsync(fileno(metalogfp));
+ }
+
+ /* syslog */
+ if (metasyslog) {
+ vsyslog(LOG_ERR, fmt, ap);
+ }
+
+ /* end */
+ va_end(ap);
+}
+
+/*
+ * metaclust timing messages logging routine
+ *
+ * level - The class of the message to be logged. Message will be logged
+ * if this is less than or equal to the verbosity level.
+ */
+void
+meta_mc_log(int level, const char *fmt, ...)
+{
+ va_list args;
+
+ va_start(args, fmt);
+ /*
+ * Log all messages upto MC_LOG2 to syslog regardless of the
+ * verbosity level
+ */
+ if (metasyslog && (level <= MC_LOG2)) {
+ if (level <= MC_LOG1)
+ (void) vsyslog(LOG_ERR, fmt, args);
+ else
+ (void) vsyslog(LOG_INFO, fmt, args);
+ }
+ /*
+ * Print all messages to stderr provided the message level is
+ * within the verbosity level
+ */
+ if (level <= verbosity) {
+ (void) fprintf(stderr, "%s: ", myname);
+ (void) vfprintf(stderr, fmt, args);
+ (void) fprintf(stderr, "\n");
+ (void) fflush(stderr);
+ }
+ va_end(args);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_getdevs.c b/usr/src/lib/lvm/libmeta/common/meta_getdevs.c
new file mode 100644
index 0000000000..af828bd083
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_getdevs.c
@@ -0,0 +1,592 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * get dev_t list
+ */
+
+#include <meta.h>
+
+#include <sys/mhd.h>
+#include <strings.h>
+
+/*
+ * private version of minor(), able to handle 64 bit and 32 bit devices.
+ * print a warning out in case a 32 bit dev is specified.
+ */
+minor_t
+meta_getminor(md_dev64_t dev64)
+{
+ /* check if it's a real 64 bit dev */
+ if ((dev64 >> NBITSMAJOR64) > 0) {
+ return ((minor_t)(dev64 & MAXMIN64));
+ } else {
+ if (getenv("META_DEBUG"))
+ (void) printf(
+ "meta_getminor called with 32 bit dev: 0x%llx\n",
+ dev64);
+ return ((minor_t)(dev64 & MAXMIN32));
+ }
+}
+
+/*
+ * private version of major(), able to handle 64 bit and 32 bit devices.
+ * print a warning out in case a 32 bit dev is specified.
+ */
+major_t
+meta_getmajor(md_dev64_t dev64)
+{
+ /* check if it's a real 64 bit dev */
+ if ((dev64 >> NBITSMAJOR64) > 0) {
+ return ((major_t)((dev64 >> NBITSMINOR64) & MAXMAJ64));
+ } else {
+ if (getenv("META_DEBUG"))
+ (void) printf(
+ "meta_getmajor called with 32 bit dev: 0x%llx\n",
+ dev64);
+ return ((major_t)((dev64 >> NBITSMINOR32) & MAXMAJ32));
+ }
+}
+
+/*
+ * private version of cmpldev(), able to handle 64 bit and 32 bit devices.
+ */
+dev32_t
+meta_cmpldev(md_dev64_t dev64)
+{
+ minor_t minor;
+ major_t major;
+
+ major = (major_t)(dev64 >> NBITSMAJOR64);
+ if (major == 0) {
+ return ((dev32_t)dev64);
+ }
+ minor = (dev32_t)dev64 & MAXMIN32;
+ return ((major << NBITSMINOR32) | minor);
+}
+
+/*
+ * private version of expldev(), able to handle 64 bit and 32 bit devices.
+ */
+md_dev64_t
+meta_expldev(md_dev64_t dev64)
+{
+ minor_t minor;
+ major_t major;
+
+ major = (major_t)(dev64 >> NBITSMAJOR64);
+ if (major > 0) { /* a 64 bit device was given, return unchanged */
+ return (dev64);
+ }
+ minor = (minor_t)(dev64) & MAXMIN32;
+ major = ((major_t)dev64 >> NBITSMINOR32) & MAXMAJ32;
+ return (((md_dev64_t)major << NBITSMINOR64) | minor);
+}
+
+/*
+ * get underlying devices (recursively)
+ */
+int
+meta_getdevs(
+ mdsetname_t *sp,
+ mdname_t *namep,
+ mdnamelist_t **nlpp,
+ md_error_t *ep
+)
+{
+ char *miscname;
+ md_dev64_t *mydevs = NULL;
+ md_getdevs_params_t mgd;
+ size_t i;
+ int rval = -1;
+ md_sys_error_t *ip;
+
+ /* must have local set */
+ assert(sp != NULL);
+
+ /* just add regular devices */
+ if (! metaismeta(namep)) {
+ mdnamelist_t *p;
+
+ /*
+ * If the dev_t is in the array already
+ * then let's continue.
+ */
+ for (p = *nlpp; (p != NULL); p = p->next) {
+ if (strcmp(namep->bname, p->namep->bname) == 0) {
+ rval = 0;
+ goto out;
+ }
+ }
+
+ /* add to list */
+ (void) metanamelist_append(nlpp, namep);
+ rval = 0;
+ goto out;
+ }
+
+ /* get MD misc module */
+ if ((miscname = metagetmiscname(namep, ep)) == NULL)
+ goto out;
+
+ /* get count of underlying devices */
+ (void) memset(&mgd, '\0', sizeof (mgd));
+ MD_SETDRIVERNAME(&mgd, miscname, sp->setno);
+ mgd.mnum = meta_getminor(namep->dev);
+ mgd.cnt = 0;
+ mgd.devs = NULL;
+ if (metaioctl(MD_IOCGET_DEVS, &mgd, &mgd.mde, namep->cname) != 0) {
+ if (mgd.mde.info.errclass == MDEC_SYS) {
+ ip = &mgd.mde.info.md_error_info_t_u.sys_error;
+ if (ip->errnum == ENODEV) {
+ rval = 0;
+ goto out;
+ }
+ }
+ (void) mdstealerror(ep, &mgd.mde);
+ goto out;
+ } else if (mgd.cnt <= 0) {
+ assert(mgd.cnt >= 0);
+ rval = 0;
+ goto out;
+ }
+
+ /* get underlying devices */
+ mydevs = Zalloc(sizeof (*mydevs) * mgd.cnt);
+ mgd.devs = (uintptr_t)mydevs;
+ if (metaioctl(MD_IOCGET_DEVS, &mgd, &mgd.mde, namep->cname) != 0) {
+ if (mgd.mde.info.errclass == MDEC_SYS) {
+ ip = &mgd.mde.info.md_error_info_t_u.sys_error;
+ if (ip->errnum == ENODEV) {
+ rval = 0;
+ goto out;
+ }
+ }
+ (void) mdstealerror(ep, &mgd.mde);
+ goto out;
+ } else if (mgd.cnt <= 0) {
+ assert(mgd.cnt >= 0);
+ rval = 0;
+ goto out;
+ }
+ /* recurse */
+ for (i = 0; (i < mgd.cnt); ++i) {
+ mdname_t *devnp;
+
+ if (mydevs[i] == NODEV64) {
+ continue;
+ }
+ if ((devnp = metadevname(&sp, mydevs[i], ep)) == NULL) {
+ goto out;
+ }
+ if (meta_getdevs(sp, devnp, nlpp, ep) != 0)
+ goto out;
+ }
+
+ /* success */
+ rval = 0;
+
+ /* cleanup, return error */
+out:
+ if (mydevs != NULL)
+ Free(mydevs);
+ return (rval);
+}
+
+/*
+ * get all dev_t for a set
+ */
+int
+meta_getalldevs(
+ mdsetname_t *sp, /* set to look in */
+ mdnamelist_t **nlpp, /* returned devices */
+ int check_db,
+ md_error_t *ep
+)
+{
+ md_replicalist_t *rlp, *rp;
+ mdnamelist_t *nlp, *np;
+ mdhspnamelist_t *hspnlp, *hspp;
+ int rval = 0;
+
+ assert(sp != NULL);
+
+ /*
+ * Get a replica namelist,
+ * and then get all the devs within the replicas.
+ */
+ if (check_db == TRUE) {
+ rlp = NULL;
+ if (metareplicalist(sp, MD_BASICNAME_OK, &rlp, ep) < 0)
+ rval = -1;
+ for (rp = rlp; (rp != NULL); rp = rp->rl_next) {
+ if (meta_getdevs(sp, rp->rl_repp->r_namep,
+ nlpp, ep) != 0)
+ rval = -1;
+ }
+ metafreereplicalist(rlp);
+ }
+
+ /*
+ * Get a stripe namelist,
+ * and then get all the devs within the stripes.
+ */
+ nlp = NULL;
+ if (meta_get_stripe_names(sp, &nlp, 0, ep) < 0)
+ rval = -1;
+ for (np = nlp; (np != NULL); np = np->next) {
+ if (meta_getdevs(sp, np->namep, nlpp, ep) != 0)
+ rval = -1;
+ }
+ metafreenamelist(nlp);
+
+ /*
+ * Get a mirror namelist,
+ * and then get all the devs within the mirrors.
+ */
+ nlp = NULL;
+ if (meta_get_mirror_names(sp, &nlp, 0, ep) < 0)
+ rval = -1;
+ for (np = nlp; (np != NULL); np = np->next) {
+ if (meta_getdevs(sp, np->namep, nlpp, ep) != 0)
+ rval = -1;
+ }
+ metafreenamelist(nlp);
+
+ /*
+ * Get a trans namelist,
+ * and then get all the devs within the trans.
+ */
+ nlp = NULL;
+
+ if (meta_get_trans_names(sp, &nlp, 0, ep) < 0)
+ rval = -1;
+ for (np = nlp; (np != NULL); np = np->next) {
+ if (meta_getdevs(sp, np->namep, nlpp, ep) != 0)
+ rval = -1;
+ }
+ metafreenamelist(nlp);
+
+ /*
+ * Get a hot spare pool namelist,
+ * and then get all the devs within the hot spare pools.
+ */
+ hspnlp = NULL;
+ if (meta_get_hsp_names(sp, &hspnlp, 0, ep) < 0)
+ rval = -1;
+ for (hspp = hspnlp; (hspp != NULL); hspp = hspp->next) {
+ md_hsp_t *hsp;
+ uint_t i;
+
+ if ((hsp = meta_get_hsp(sp, hspp->hspnamep, ep)) == NULL)
+ rval = -1;
+ else for (i = 0; (i < hsp->hotspares.hotspares_len); ++i) {
+ md_hs_t *hs = &hsp->hotspares.hotspares_val[i];
+
+ if (meta_getdevs(sp, hs->hsnamep, nlpp, ep) != 0)
+ rval = -1;
+ }
+ }
+ metafreehspnamelist(hspnlp);
+
+ /*
+ * Get a raid namelist,
+ * and then get all the devs within the raids.
+ */
+ nlp = NULL;
+ if (meta_get_raid_names(sp, &nlp, 0, ep) < 0)
+ rval = -1;
+ for (np = nlp; (np != NULL); np = np->next) {
+ if (meta_getdevs(sp, np->namep, nlpp, ep) != 0)
+ rval = -1;
+ }
+ metafreenamelist(nlp);
+
+ /*
+ * Get a soft partition namelist,
+ * and then get all the devs within the softpartitions
+ */
+ nlp = NULL;
+ if (meta_get_sp_names(sp, &nlp, 0, ep) < 0)
+ rval = -1;
+ for (np = nlp; (np != NULL); np = np->next) {
+ if (meta_getdevs(sp, np->namep, nlpp, ep) != 0)
+ rval = -1;
+ }
+ metafreenamelist(nlp);
+
+ return (rval);
+}
+
+/*
+ * get vtoc from a device already opened.
+ * returns
+ * 0 on success,
+ * -1 on error. If the error was ENOTSUP, partno will be set to
+ * VT_ENOTSUP if possible.
+ */
+int
+meta_getvtoc(
+ int fd, /* fd for named device */
+ char *devname, /* name of device */
+ struct vtoc *vtocbufp, /* vtoc buffer to fill */
+ int *partno, /* return partno here */
+ md_error_t *ep
+)
+{
+ int part;
+
+ (void) memset(vtocbufp, 0, sizeof (*vtocbufp));
+ if ((part = read_vtoc(fd, vtocbufp)) < 0) {
+ int err = errno;
+
+ if (ioctl(fd, MHIOCSTATUS, NULL) == 1)
+ err = EACCES;
+ else if (part == VT_EINVAL)
+ err = EINVAL;
+ else if (part == VT_EIO)
+ err = EIO;
+ else if (part == VT_ENOTSUP) {
+ if (partno) {
+ *partno = VT_ENOTSUP;
+ return (-1);
+ }
+ }
+ return (mdsyserror(ep, err, devname));
+ }
+
+ /* Slice number for *p0 partition (whole disk on x86) is 16 */
+ if (part >= V_NUMPAR)
+ return (mdsyserror(ep, EINVAL, devname));
+
+ if (partno)
+ *partno = part;
+ return (0);
+}
+/*
+ * set mdvtoc for a meta devices
+ */
+int
+meta_setmdvtoc(
+ int fd, /* fd for named device */
+ char *devname, /* name of device */
+ mdvtoc_t *mdvtocp, /* mdvtoc buffer to fill */
+ md_error_t *ep
+)
+{
+ uint_t i;
+
+ /*
+ * Sanity-check the mdvtoc
+ */
+
+ if (mdvtocp->nparts > V_NUMPAR) {
+ return (-1);
+ }
+
+ /*
+ * since many drivers won't allow opening a device make sure
+ * all partitions aren't being set to zero. If all are zero then
+ * we have no way to set them to something else
+ */
+
+ for (i = 0; i < mdvtocp->nparts; i++)
+ if (mdvtocp->parts[i].size > 0)
+ break;
+ if (i == mdvtocp->nparts)
+ return (-1);
+
+ /*
+ * Write the mdvtoc
+ */
+ if (ioctl(fd, DKIOCSVTOC, (caddr_t)mdvtocp) == -1) {
+ return (mdsyserror(ep, errno, devname));
+ }
+
+ return (0);
+}
+
+/*
+ * set vtoc
+ */
+int
+meta_setvtoc(
+ int fd, /* fd for named device */
+ char *devname, /* name of device */
+ struct vtoc *vtocbufp, /* vtoc buffer to fill */
+ md_error_t *ep
+)
+{
+ int part;
+ int err;
+
+ if ((part = write_vtoc(fd, vtocbufp)) < 0) {
+ if (part == VT_EINVAL)
+ err = EINVAL;
+ else if (part == VT_EIO)
+ err = EIO;
+ else
+ err = errno;
+ return (mdsyserror(ep, err, devname));
+ }
+
+ return (0);
+}
+
+/*
+ * FUNCTION: meta_get_names()
+ * INPUT: drivername - char string containing the driver name
+ * sp - the set name to get soft partitions from
+ * options - options from the command line
+ * OUTPUT: nlpp - list of all soft partition names
+ * ep - return error pointer
+ * RETURNS: int - -1 if error, 0 success
+ * PURPOSE: returns a list of all specified devices in the metadb
+ * for all devices in the specified set
+ */
+int
+meta_get_names(
+ char *drivername,
+ mdsetname_t *sp,
+ mdnamelist_t **nlpp,
+ mdprtopts_t options,
+ md_error_t *ep
+)
+{
+ md_i_getnum_t gn; /* MD_IOCGET_NUM params */
+ mdnamelist_t **tailpp = nlpp;
+ minor_t *minors = NULL;
+ minor_t *m_ptr;
+ int i;
+
+ (void) memset(&gn, '\0', sizeof (gn));
+ MD_SETDRIVERNAME(&gn, drivername, sp->setno);
+
+ /* get number of devices */
+ if (metaioctl(MD_IOCGET_NUM, &gn, &gn.mde, NULL) != 0) {
+ if (mdiserror(&gn.mde, MDE_UNIT_NOT_FOUND)) {
+ mdclrerror(&gn.mde);
+ } else {
+ (void) mdstealerror(ep, &gn.mde);
+ return (-1);
+ }
+ }
+
+ if (gn.size > 0) {
+ /* malloc minor number buffer to be filled by ioctl */
+ if ((minors = (minor_t *)malloc(
+ gn.size * sizeof (minor_t))) == 0) {
+ return (ENOMEM);
+ }
+ gn.minors = (uintptr_t)minors;
+ if (metaioctl(MD_IOCGET_NUM, &gn, &gn.mde, NULL) != 0) {
+ (void) mdstealerror(ep, &gn.mde);
+ free(minors);
+ return (-1);
+ }
+ m_ptr = minors;
+ for (i = 0; i < gn.size; i++) {
+ mdname_t *np;
+
+ /* get name */
+ np = metamnumname(&sp, *m_ptr,
+ ((options & PRINT_FAST) ? 1 : 0), ep);
+ if (np == NULL)
+ goto out;
+
+ tailpp = meta_namelist_append_wrapper(
+ tailpp, np);
+
+ /* next device */
+ m_ptr++;
+ }
+ free(minors);
+ }
+ return (gn.size);
+
+out:
+ if (minors != NULL)
+ free(minors);
+ metafreenamelist(*nlpp);
+ *nlpp = NULL;
+ return (-1);
+}
+
+/*
+ * Wrap lib/libdevid/devid_deviceid_to_nmlist. We want to take the
+ * results from that function and filter out the c[t]dp style names that
+ * we typically see on x86 so that we never see them.
+ */
+int
+meta_deviceid_to_nmlist(char *search_path, ddi_devid_t devid, char *minor_name,
+ devid_nmlist_t **retlist)
+{
+ int res;
+ devid_nmlist_t *dp;
+ devid_nmlist_t *tmp_retlist;
+ int i = 1;
+ devid_nmlist_t *rp;
+
+ res = devid_deviceid_to_nmlist(search_path, devid, minor_name, retlist);
+ if (res != 0) {
+ return (res);
+ }
+
+
+ /* first count the number of non c[t]dp items in retlist */
+ for (dp = *retlist; dp->dev != NODEV; dp++) {
+ uint_t s;
+
+ /* Check if this is a c[t]dp style name. */
+ if (parse_ctd(basename(dp->devname), &s) != 1) {
+ i++;
+ }
+ }
+
+ /* create an array to hold the non c[t]dp items */
+ tmp_retlist = Malloc(sizeof (devid_nmlist_t) * i);
+ /* copy the non c[t]dp items to the array */
+ for (dp = *retlist, rp = tmp_retlist; dp->dev != NODEV; dp++) {
+ uint_t s;
+
+ /* Check if this is a c[t]dp style name. */
+ if (parse_ctd(basename(dp->devname), &s) != 1) {
+ /* nope, so copy and go to the next */
+ rp->dev = dp->dev;
+ rp->devname = Strdup(dp->devname);
+ rp++;
+ }
+ /* if it is c[t]dp, just skip the element */
+ }
+ /* copy the list terminator */
+ rp->dev = NODEV;
+ rp->devname = NULL;
+ devid_free_nmlist (*retlist);
+ *retlist = tmp_retlist;
+ return (res);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_hotspares.c b/usr/src/lib/lvm/libmeta/common/meta_hotspares.c
new file mode 100644
index 0000000000..a76f9f3765
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_hotspares.c
@@ -0,0 +1,1630 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * Just in case we're not in a build environment, make sure that
+ * TEXT_DOMAIN gets set to something.
+ */
+#if !defined(TEXT_DOMAIN)
+#define TEXT_DOMAIN "SYS_TEST"
+#endif
+
+/*
+ * hotspares utilities
+ */
+
+#include <meta.h>
+#include <sys/lvm/md_hotspares.h>
+#include <sys/lvm/md_convert.h>
+
+
+/*
+ * FUNCTION: meta_get_hsp_names()
+ * INPUT: sp - the set name to get hotspares from
+ * options - options from the command line
+ * OUTPUT: hspnlpp - list of all hotspare names
+ * ep - return error pointer
+ * RETURNS: int - -1 if error, 0 success
+ * PURPOSE: returns a list of all hotspares in the metadb
+ * for all devices in the specified set
+ */
+/*ARGSUSED*/
+int
+meta_get_hsp_names(
+ mdsetname_t *sp,
+ mdhspnamelist_t **hspnlpp,
+ int options,
+ md_error_t *ep
+)
+{
+ md_i_getnum_t gn; /* MD_IOCGET_NUM params */
+ minor_t *minors = NULL;
+ minor_t *m_ptr;
+ int i;
+
+ /* we must have a set */
+ assert(sp != NULL);
+
+ (void) memset(&gn, 0, sizeof (gn));
+ MD_SETDRIVERNAME(&gn, MD_HOTSPARES, sp->setno);
+
+ /* get number of devices */
+ if (metaioctl(MD_IOCGET_NUM, &gn, &gn.mde, NULL) != 0) {
+ if (mdiserror(&gn.mde, MDE_UNIT_NOT_FOUND)) {
+ mdclrerror(&gn.mde);
+ } else {
+ (void) mdstealerror(ep, &gn.mde);
+ return (-1);
+ }
+ }
+
+ if (gn.size > 0) {
+ /* malloc minor number buffer to be filled by ioctl */
+ if ((minors = (minor_t *)malloc(
+ gn.size * sizeof (minor_t))) == 0) {
+ return (ENOMEM);
+ }
+ gn.minors = (uintptr_t)minors;
+ if (metaioctl(MD_IOCGET_NUM, &gn, &gn.mde, NULL) != 0) {
+ (void) mdstealerror(ep, &gn.mde);
+ free(minors);
+ return (-1);
+ }
+ m_ptr = minors;
+ for (i = 0; i < gn.size; i++) {
+ mdhspname_t *hspnp;
+
+
+ /* get name */
+ if ((hspnp = metahsphspname(&sp, *m_ptr, ep))
+ == NULL)
+ goto out;
+
+ /* append to list */
+ (void) metahspnamelist_append(hspnlpp, hspnp);
+
+ /* next device */
+ m_ptr++;
+ }
+ free(minors);
+ }
+ return (gn.size);
+
+out:
+ if (minors != NULL)
+ free(minors);
+ metafreehspnamelist(*hspnlpp);
+ *hspnlpp = NULL;
+ return (-1);
+}
+
+/*
+ * get information of a specific hotspare pool from driver
+ */
+static get_hsp_t *
+get_hspinfo(
+ mdsetname_t *sp,
+ mdhspname_t *hspnp,
+ md_error_t *ep
+)
+{
+ md_i_get_t mig;
+
+ /* should have a set */
+ assert(sp != NULL);
+ assert(sp->setno == HSP_SET(hspnp->hsp));
+
+ /* get size of unit structure */
+ (void) memset(&mig, 0, sizeof (mig));
+ MD_SETDRIVERNAME(&mig, MD_HOTSPARES, sp->setno);
+ mig.id = hspnp->hsp;
+ if (metaioctl(MD_IOCGET, &mig, &mig.mde, hspnp->hspname) != 0) {
+ (void) mdstealerror(ep, &mig.mde);
+ return (NULL);
+ }
+
+ /* get actual unit structure */
+ assert(mig.size > 0);
+ mig.mdp = (uintptr_t)Zalloc(mig.size);
+ if (metaioctl(MD_IOCGET, &mig, &mig.mde, hspnp->hspname) != 0) {
+ (void) mdstealerror(ep, &mig.mde);
+ Free((void *)mig.mdp);
+ return (NULL);
+ }
+ return ((get_hsp_t *)mig.mdp);
+}
+
+/*
+ * free hotspare pool unit
+ */
+void
+meta_free_hsp(
+ md_hsp_t *hspp
+)
+{
+ if (hspp->hotspares.hotspares_val != NULL) {
+ assert(hspp->hotspares.hotspares_len > 0);
+ Free(hspp->hotspares.hotspares_val);
+ }
+ Free(hspp);
+}
+
+/*
+ * get hotspare pool unit (common)
+ */
+md_hsp_t *
+meta_get_hsp_common(
+ mdsetname_t *sp,
+ mdhspname_t *hspnp,
+ int fast,
+ md_error_t *ep
+)
+{
+ get_hsp_t *ghsp;
+ md_hsp_t *hspp;
+ uint_t hsi;
+
+ /* must have set */
+ assert(sp != NULL);
+ assert(sp->setno == HSP_SET(hspnp->hsp));
+
+ /* short circuit */
+ if (hspnp->unitp != NULL)
+ return (hspnp->unitp);
+
+ /* get unit */
+ if ((ghsp = get_hspinfo(sp, hspnp, ep)) == NULL)
+ return (NULL);
+
+ /* allocate hsp */
+ hspp = Zalloc(sizeof (*hspp));
+
+ /* allocate hotspares */
+ hspp->hotspares.hotspares_len = ghsp->ghsp_nhotspares;
+
+ /* if empty hotspare pool, we are done */
+ if (hspp->hotspares.hotspares_len != 0)
+ hspp->hotspares.hotspares_val =
+ Zalloc(hspp->hotspares.hotspares_len *
+ sizeof (*hspp->hotspares.hotspares_val));
+
+ /* get name, refcount */
+ hspp->hspnamep = hspnp;
+ hspp->refcount = ghsp->ghsp_refcount;
+
+ /* get hotspares */
+ for (hsi = 0; (hsi < hspp->hotspares.hotspares_len); ++hsi) {
+ mdkey_t hs_key = ghsp->ghsp_hs_keys[hsi];
+ md_hs_t *hsp = &hspp->hotspares.hotspares_val[hsi];
+ get_hs_params_t ghs;
+
+ /* get hotspare name */
+ hsp->hsnamep = metakeyname(&sp, hs_key, fast, ep);
+ if (hsp->hsnamep == NULL)
+ goto out;
+
+ /* get hotspare state */
+ (void) memset(&ghs, 0, sizeof (ghs));
+ MD_SETDRIVERNAME(&ghs, MD_HOTSPARES, sp->setno);
+ ghs.ghs_key = hs_key;
+ if (metaioctl(MD_IOCGET_HS, &ghs, &ghs.mde, NULL) != 0) {
+ (void) mdstealerror(ep, &ghs.mde);
+ goto out;
+ }
+ hsp->state = ghs.ghs_state;
+ hsp->size = ghs.ghs_number_blks;
+ hsp->timestamp = ghs.ghs_timestamp;
+ hsp->revision = ghs.ghs_revision;
+ }
+
+ /* cleanup, return success */
+ Free(ghsp);
+ hspnp->unitp = hspp;
+ return (hspp);
+
+ /* cleanup, return error */
+out:
+ Free(ghsp);
+ meta_free_hsp(hspp);
+ return (NULL);
+}
+
+/*
+ * get hotspare pool unit
+ */
+md_hsp_t *
+meta_get_hsp(
+ mdsetname_t *sp,
+ mdhspname_t *hspnp,
+ md_error_t *ep
+)
+{
+ return (meta_get_hsp_common(sp, hspnp, 0, ep));
+}
+
+/*
+ * check hotspare pool for dev
+ */
+static int
+in_hsp(
+ mdsetname_t *sp,
+ mdhspname_t *hspnp,
+ mdname_t *np,
+ diskaddr_t slblk,
+ diskaddr_t nblks,
+ md_error_t *ep
+)
+{
+ md_hsp_t *hspp;
+ uint_t i;
+
+ /* should be in the same set */
+ assert(sp != NULL);
+ assert(sp->setno == HSP_SET(hspnp->hsp));
+
+ /* get unit */
+ if ((hspp = meta_get_hsp(sp, hspnp, ep)) == NULL)
+ return (-1);
+
+ /* look in hotspares */
+ for (i = 0; (i < hspp->hotspares.hotspares_len); ++i) {
+ md_hs_t *hs = &hspp->hotspares.hotspares_val[i];
+ mdname_t *hsnp = hs->hsnamep;
+
+ /* check overlap */
+ if (metaismeta(hsnp))
+ continue;
+ if (meta_check_overlap(hspnp->hspname, np, slblk, nblks,
+ hsnp, 0, -1, ep) != 0)
+ return (-1);
+ }
+
+ /* return success */
+ return (0);
+}
+
+/*
+ * check to see if we're in a hotspare pool
+ */
+int
+meta_check_inhsp(
+ mdsetname_t *sp,
+ mdname_t *np,
+ diskaddr_t slblk,
+ diskaddr_t nblks,
+ md_error_t *ep
+)
+{
+ mdhspnamelist_t *hspnlp = NULL;
+ mdhspnamelist_t *p;
+ int rval = 0;
+
+ /* should have a set */
+ assert(sp != NULL);
+
+ /* for each hotspare pool */
+ if (meta_get_hsp_names(sp, &hspnlp, 0, ep) < 0)
+ return (-1);
+ for (p = hspnlp; (p != NULL); p = p->next) {
+ mdhspname_t *hspnp = p->hspnamep;
+
+ /* check hotspare pool */
+ if (in_hsp(sp, hspnp, np, slblk, nblks, ep) != 0) {
+ rval = -1;
+ break;
+ }
+ }
+
+ /* cleanup, return success */
+ metafreehspnamelist(hspnlp);
+ return (rval);
+}
+
+/*
+ * check hotspare
+ */
+int
+meta_check_hotspare(
+ mdsetname_t *sp,
+ mdname_t *np,
+ md_error_t *ep
+)
+{
+ mdchkopts_t options = (MDCHK_ALLOW_HS);
+
+ /* make sure we have a disk */
+ if (metachkcomp(np, ep) != 0)
+ return (-1);
+
+ /* check to ensure that it is not already in use */
+ if (meta_check_inuse(sp, np, MDCHK_INUSE, ep) != 0) {
+ return (-1);
+ }
+
+ /* make sure it is in the set */
+ if (meta_check_inset(sp, np, ep) != 0)
+ return (-1);
+
+ /* make sure its not in a metadevice */
+ if (meta_check_inmeta(sp, np, options, 0, -1, ep) != 0)
+ return (-1);
+
+ /* return success */
+ return (0);
+}
+
+/*
+ * print hsp
+ */
+static int
+hsp_print(
+ md_hsp_t *hspp,
+ char *fname,
+ FILE *fp,
+ md_error_t *ep
+)
+{
+ uint_t hsi;
+ int rval = -1;
+
+ /* print name */
+ if (fprintf(fp, "%s", hspp->hspnamep->hspname) == EOF)
+ goto out;
+
+ /* print hotspares */
+ for (hsi = 0; (hsi < hspp->hotspares.hotspares_len); ++hsi) {
+ md_hs_t *hsp = &hspp->hotspares.hotspares_val[hsi];
+
+ /* print hotspare */
+ /*
+ * If the path is our standard /dev/rdsk or /dev/md/rdsk
+ * then just print out the cxtxdxsx or the dx, metainit
+ * will assume the default, otherwise we need the full
+ * pathname to make sure this works as we intend.
+ */
+ if ((strstr(hsp->hsnamep->rname, "/dev/rdsk") == NULL) &&
+ (strstr(hsp->hsnamep->rname, "/dev/md/rdsk") == NULL) &&
+ (strstr(hsp->hsnamep->rname, "/dev/td/") == NULL)) {
+ /* not standard path, print full pathname */
+ if (fprintf(fp, " %s", hsp->hsnamep->rname) == EOF)
+ goto out;
+ } else {
+ /* standard path, just print ctd or d value */
+ if (fprintf(fp, " %s", hsp->hsnamep->cname) == EOF)
+ goto out;
+ }
+ }
+
+ /* terminate last line */
+ if (fprintf(fp, "\n") == EOF)
+ goto out;
+
+ /* success */
+ rval = 0;
+
+ /* cleanup, return error */
+out:
+ if (rval != 0)
+ (void) mdsyserror(ep, errno, fname);
+ return (rval);
+}
+
+/*
+ * hotspare state name
+ */
+char *
+hs_state_to_name(
+ md_hs_t *hsp,
+ md_timeval32_t *tvp
+)
+{
+ hotspare_states_t state = hsp->state;
+
+ /* grab time */
+ if (tvp != NULL)
+ *tvp = hsp->timestamp;
+
+ switch (state) {
+ case HSS_AVAILABLE:
+ return (dgettext(TEXT_DOMAIN, "Available"));
+ case HSS_RESERVED:
+ return (dgettext(TEXT_DOMAIN, "In use"));
+ case HSS_BROKEN:
+ return (dgettext(TEXT_DOMAIN, "Broken"));
+ case HSS_UNUSED:
+ default:
+ return (dgettext(TEXT_DOMAIN, "invalid"));
+ }
+}
+
+/*
+ * report hsp
+ */
+static int
+hsp_report(
+ md_hsp_t *hspp,
+ mdnamelist_t **nlpp,
+ char *fname,
+ FILE *fp,
+ mdprtopts_t options,
+ md_error_t *ep,
+ mdsetname_t *sp
+)
+{
+ uint_t hsi;
+ int rval = -1;
+ char *devid = "";
+ mdname_t *didnp = NULL;
+ uint_t len;
+ int large_hs_dev_cnt = 0;
+
+ if (options & PRINT_LARGEDEVICES) {
+ for (hsi = 0; (hsi < hspp->hotspares.hotspares_len); ++hsi) {
+ md_hs_t *hsp = &hspp->hotspares.hotspares_val[hsi];
+ if (hsp->revision == MD_64BIT_META_DEV) {
+ large_hs_dev_cnt += 1;
+ if (meta_getdevs(sp, hsp->hsnamep, nlpp, ep)
+ != 0)
+ goto out;
+ }
+ }
+
+ if (large_hs_dev_cnt == 0) {
+ rval = 0;
+ goto out;
+ }
+ }
+ /* print header */
+ if (hspp->hotspares.hotspares_len == 0) {
+ if (fprintf(fp, dgettext(TEXT_DOMAIN, "%s: is empty\n"),
+ hspp->hspnamep->hspname) == EOF) {
+ goto out;
+ }
+ } else if (hspp->hotspares.hotspares_len == 1) {
+
+ /*
+ * This allows the length
+ * of the ctd to vary from small to large without
+ * looking horrible.
+ */
+
+ len = strlen(hspp->hotspares.hotspares_val[0].hsnamep->cname);
+ /*
+ * if the length is to short to print out all of the header
+ * force the matter
+ */
+ len = max(len, strlen(dgettext(TEXT_DOMAIN, "Device")));
+ len += 2;
+ if (options & PRINT_LARGEDEVICES) {
+ if (fprintf(fp,
+ "%s: 1 hot spare (1 big device)\n\t%-*.*s "
+ "%-12.12s%-8.6s\t\t%s\n",
+ hspp->hspnamep->hspname, len, len,
+ dgettext(TEXT_DOMAIN, "Device"),
+ dgettext(TEXT_DOMAIN, "Status"),
+ dgettext(TEXT_DOMAIN, "Length"),
+ dgettext(TEXT_DOMAIN, "Reloc")) == EOF) {
+ goto out;
+ }
+ } else {
+ if (fprintf(fp,
+ "%s: 1 hot spare\n\t%-*.*s %-12.12s%-8.6s\t\t%s\n",
+ hspp->hspnamep->hspname, len, len,
+ dgettext(TEXT_DOMAIN, "Device"),
+ dgettext(TEXT_DOMAIN, "Status"),
+ dgettext(TEXT_DOMAIN, "Length"),
+ dgettext(TEXT_DOMAIN, "Reloc")) == EOF) {
+ goto out;
+ }
+ }
+ } else {
+ /*
+ * This allows the length
+ * of the ctd to vary from small to large without
+ * looking horrible.
+ */
+ len = 0;
+ for (hsi = 0; (hsi < hspp->hotspares.hotspares_len); ++hsi) {
+ len = max(len, strlen(hspp->
+ hotspares.hotspares_val[hsi].hsnamep->cname));
+ }
+ len = max(len, strlen(dgettext(TEXT_DOMAIN, "Device")));
+ len += 2;
+ if (options & PRINT_LARGEDEVICES) {
+ if (fprintf(fp,
+ "%s: %u hot spares (%d big device(s))\n\t%-*.*s "
+ "%-12.12s%-8.6s\t\t%s\n",
+ hspp->hspnamep->hspname,
+ hspp->hotspares.hotspares_len,
+ large_hs_dev_cnt, len, len,
+ dgettext(TEXT_DOMAIN, "Device"),
+ dgettext(TEXT_DOMAIN, "Status"),
+ dgettext(TEXT_DOMAIN, "Length"),
+ dgettext(TEXT_DOMAIN, "Reloc")) == EOF) {
+ goto out;
+ }
+ } else {
+ if (fprintf(fp, "%s: %u hot spares\n\t%-*.*s "
+ "%-12.12s%-8.6s\t\t%s\n",
+ hspp->hspnamep->hspname,
+ hspp->hotspares.hotspares_len, len, len,
+ dgettext(TEXT_DOMAIN, "Device"),
+ dgettext(TEXT_DOMAIN, "Status"),
+ dgettext(TEXT_DOMAIN, "Length"),
+ dgettext(TEXT_DOMAIN, "Reloc")) == EOF) {
+ goto out;
+ }
+ }
+ }
+
+ /* print hotspares */
+ for (hsi = 0; (hsi < hspp->hotspares.hotspares_len); ++hsi) {
+ md_hs_t *hsp = &hspp->hotspares.hotspares_val[hsi];
+ char *cname = hsp->hsnamep->cname;
+ char *hs_state;
+ md_timeval32_t tv;
+ char *timep;
+ ddi_devid_t dtp;
+
+ /* populate the key in the name_p structure */
+ if ((didnp = metadevname(&sp, hsp->hsnamep->dev, ep)) == NULL) {
+ return (-1);
+ }
+
+ if (options & PRINT_LARGEDEVICES) {
+ if (hsp->revision != MD_64BIT_META_DEV)
+ continue;
+ }
+ /* determine if devid does NOT exist */
+ if (options & PRINT_DEVID) {
+ if ((dtp = meta_getdidbykey(sp->setno, getmyside(sp, ep),
+ didnp->key, ep)) == NULL)
+ devid = dgettext(TEXT_DOMAIN, "No ");
+ else {
+ devid = dgettext(TEXT_DOMAIN, "Yes");
+ free(dtp);
+ }
+ }
+ /* print hotspare */
+ hs_state = hs_state_to_name(hsp, &tv);
+ /*
+ * This allows the length
+ * of the ctd to vary from small to large without
+ * looking horrible.
+ */
+ if (! (options & PRINT_TIMES)) {
+ if (fprintf(fp,
+ " %-*s %-12s %lld blocks\t%s\n",
+ len, cname, hs_state,
+ hsp->size, devid) == EOF) {
+ goto out;
+ }
+ } else {
+ timep = meta_print_time(&tv);
+
+ if (fprintf(fp,
+ " %-*s\t %-11s %8lld blocks%s\t%s\n",
+ len, cname, hs_state,
+ hsp->size, devid, timep) == EOF) {
+ goto out;
+ }
+ }
+ }
+
+ /* add extra line */
+ if (fprintf(fp, "\n") == EOF)
+ goto out;
+
+ /* success */
+ rval = 0;
+
+ /* cleanup, return error */
+out:
+ if (rval != 0)
+ (void) mdsyserror(ep, errno, fname);
+ return (rval);
+}
+
+/*
+ * print/report hsp
+ */
+int
+meta_hsp_print(
+ mdsetname_t *sp,
+ mdhspname_t *hspnp,
+ mdnamelist_t **nlpp,
+ char *fname,
+ FILE *fp,
+ mdprtopts_t options,
+ md_error_t *ep
+)
+{
+ md_hsp_t *hspp;
+
+ /* should have same set */
+ assert(sp != NULL);
+ assert((hspnp == NULL) || (sp->setno == HSP_SET(hspnp->hsp)));
+
+ /* print all hsps */
+ if (hspnp == NULL) {
+ mdhspnamelist_t *hspnlp = NULL;
+ mdhspnamelist_t *p;
+ int cnt;
+ int rval = 0;
+
+ if ((cnt = meta_get_hsp_names(sp, &hspnlp, options, ep)) < 0)
+ return (-1);
+ else if (cnt == 0)
+ return (0);
+
+ /* recurse */
+ for (p = hspnlp; (p != NULL); p = p->next) {
+ mdhspname_t *hspnp = p->hspnamep;
+
+ if (meta_hsp_print(sp, hspnp, nlpp, fname, fp,
+ options, ep) != 0)
+ rval = -1;
+ }
+
+ /* cleanup, return success */
+ metafreehspnamelist(hspnlp);
+ return (rval);
+ }
+
+ /* get unit structure */
+ if ((hspp = meta_get_hsp_common(sp, hspnp,
+ ((options & PRINT_FAST) ? 1 : 0), ep)) == NULL)
+ return (-1);
+
+ /* print appropriate detail */
+ if (options & PRINT_SHORT)
+ return (hsp_print(hspp, fname, fp, ep));
+ else
+ return (hsp_report(hspp, nlpp, fname, fp, options, ep, sp));
+}
+
+/*
+ * check for valid hotspare pool
+ */
+int
+metachkhsp(
+ mdsetname_t *sp,
+ mdhspname_t *hspnp,
+ md_error_t *ep
+)
+{
+ if (meta_get_hsp(sp, hspnp, ep) == NULL)
+ return (-1);
+ return (0);
+}
+
+/*
+ * invalidate hotspare pool info
+ */
+void
+meta_invalidate_hsp(
+ mdhspname_t *hspnp
+)
+{
+ md_hsp_t *hspp = hspnp->unitp;
+
+ /* free it up */
+ if (hspp == NULL)
+ return;
+ meta_free_hsp(hspp);
+
+ /* clear cache */
+ hspnp->unitp = NULL;
+}
+
+/*
+ * add hotspares and/or hotspare pool
+ */
+int
+meta_hs_add(
+ mdsetname_t *sp,
+ mdhspname_t *hspnp,
+ mdnamelist_t *hsnlp,
+ mdcmdopts_t options,
+ md_error_t *ep
+)
+{
+ mdnamelist_t *p;
+ set_hs_params_t shs;
+
+ /* should have a set */
+ assert(sp != NULL);
+ assert(sp->setno == HSP_SET(hspnp->hsp));
+
+ /* clear cache */
+ meta_invalidate_hsp(hspnp);
+
+ /* setup hotspare pool info */
+ (void) memset(&shs, 0, sizeof (shs));
+ shs.shs_cmd = ADD_HOT_SPARE;
+ shs.shs_hot_spare_pool = hspnp->hsp;
+ MD_SETDRIVERNAME(&shs, MD_HOTSPARES, sp->setno);
+
+ /* add empty hotspare pool */
+ if (hsnlp == NULL) {
+ shs.shs_options = HS_OPT_POOL;
+ /* If DOIT is not set, it's a dryrun */
+ if ((options & MDCMD_DOIT) == 0) {
+ shs.shs_options |= HS_OPT_DRYRUN;
+ }
+ if (metaioctl(MD_IOCSET_HS, &shs, &shs.mde,
+ hspnp->hspname) != 0)
+ return (mdstealerror(ep, &shs.mde));
+ goto success;
+ }
+
+ /* add hotspares */
+ shs.shs_options = HS_OPT_NONE;
+ /* If DOIT is not set, it's a dryrun */
+ if ((options & MDCMD_DOIT) == 0) {
+ shs.shs_options |= HS_OPT_DRYRUN;
+ }
+ for (p = hsnlp; (p != NULL); p = p->next) {
+ mdname_t *hsnp = p->namep;
+ diskaddr_t size, label, start_blk;
+
+ /* should be in same set */
+ assert(sp->setno == HSP_SET(hspnp->hsp));
+
+ /* check it out */
+ if (meta_check_hotspare(sp, hsnp, ep) != 0)
+ return (-1);
+ if ((size = metagetsize(hsnp, ep)) == MD_DISKADDR_ERROR)
+ return (-1);
+ else if (size == 0)
+ return (mdsyserror(ep, ENOSPC, hsnp->cname));
+ if ((label = metagetlabel(hsnp, ep)) == MD_DISKADDR_ERROR)
+ return (-1);
+ if ((start_blk = metagetstart(sp, hsnp, ep))
+ == MD_DISKADDR_ERROR)
+ return (-1);
+
+ shs.shs_size_option = meta_check_devicesize(size);
+
+ /* In dryrun mode (DOIT not set) we must not alter the mddb */
+ if (options & MDCMD_DOIT) {
+ /* store name in namespace */
+ if (add_key_name(sp, hsnp, NULL, ep) != 0)
+ return (-1);
+ }
+
+ /* add hotspare and/or hotspare pool */
+ shs.shs_component_old = hsnp->dev;
+ shs.shs_start_blk = start_blk;
+ shs.shs_has_label = ((label > 0) ? 1 : 0);
+ shs.shs_number_blks = size;
+ shs.shs_key_old = hsnp->key;
+ if (metaioctl(MD_IOCSET_HS, &shs, &shs.mde, NULL) != 0) {
+ if ((options & MDCMD_DOIT) &&
+ (shs.shs_options != HS_OPT_POOL)) {
+ (void) del_key_name(sp, hsnp, ep);
+ }
+ return (mdstealerror(ep, &shs.mde));
+ }
+ }
+
+ /* print success message */
+success:
+ if (options & MDCMD_PRINT) {
+ if ((options & MDCMD_INIT) || (hsnlp == NULL)) {
+ (void) printf(dgettext(TEXT_DOMAIN,
+ "%s: Hotspare pool is setup\n"),
+ hspnp->hspname);
+ } else if (hsnlp->next == NULL) {
+ (void) printf(dgettext(TEXT_DOMAIN,
+ "%s: Hotspare is added\n"),
+ hspnp->hspname);
+ } else {
+ (void) printf(dgettext(TEXT_DOMAIN,
+ "%s: Hotspares are added\n"),
+ hspnp->hspname);
+ }
+ (void) fflush(stdout);
+ }
+
+ /* return success */
+ return (0);
+}
+
+/*
+ * delete hotspares from pool
+ */
+int
+meta_hs_delete(
+ mdsetname_t *sp,
+ mdhspname_t *hspnp,
+ mdnamelist_t *hsnlp,
+ mdcmdopts_t options,
+ md_error_t *ep
+)
+{
+ mdnamelist_t *p;
+ set_hs_params_t shs;
+
+ /* should have a set */
+ assert(sp != NULL);
+ assert(sp->setno == HSP_SET(hspnp->hsp));
+
+ /* clear cache */
+ meta_invalidate_hsp(hspnp);
+
+ /* setup hotspare pool info */
+ (void) memset(&shs, 0, sizeof (shs));
+ shs.shs_hot_spare_pool = hspnp->hsp;
+ MD_SETDRIVERNAME(&shs, MD_HOTSPARES, sp->setno);
+ shs.shs_cmd = DELETE_HOT_SPARE;
+
+ /* delete empty hotspare pool */
+ if (hsnlp == NULL) {
+ shs.shs_options = HS_OPT_POOL;
+ /* If DOIT is not set, it's a dryrun */
+ if ((options & MDCMD_DOIT) == 0) {
+ shs.shs_options |= HS_OPT_DRYRUN;
+ }
+ if (metaioctl(MD_IOCSET_HS, &shs, &shs.mde,
+ hspnp->hspname) != 0)
+ return (mdstealerror(ep, &shs.mde));
+ goto success;
+ }
+
+ /* delete hotspares */
+ shs.shs_options = HS_OPT_NONE;
+ /* If DOIT is not set, it's a dryrun */
+ if ((options & MDCMD_DOIT) == 0) {
+ shs.shs_options |= HS_OPT_DRYRUN;
+ }
+ for (p = hsnlp; (p != NULL); p = p->next) {
+ mdname_t *hsnp = p->namep;
+
+ /* should be in same set */
+ assert(sp->setno == HSP_SET(hspnp->hsp));
+
+ /* delete hotspare */
+ shs.shs_component_old = hsnp->dev;
+ meta_invalidate_name(hsnp);
+ if (metaioctl(MD_IOCSET_HS, &shs, &shs.mde, hsnp->cname) != 0)
+ return (mdstealerror(ep, &shs.mde));
+ }
+
+ /* print success message */
+success:
+ if (options & MDCMD_PRINT) {
+ if (hsnlp == NULL) {
+ (void) printf(dgettext(TEXT_DOMAIN,
+ "%s: Hotspare pool is cleared\n"),
+ hspnp->hspname);
+ } else if (hsnlp->next == NULL) {
+ (void) printf(dgettext(TEXT_DOMAIN,
+ "%s: Hotspare is deleted\n"),
+ hspnp->hspname);
+ } else {
+ (void) printf(dgettext(TEXT_DOMAIN,
+ "%s: Hotspares are deleted\n"),
+ hspnp->hspname);
+ }
+ (void) fflush(stdout);
+ }
+
+ /* return success */
+ return (0);
+}
+
+/*
+ * replace hotspare in pool
+ */
+int
+meta_hs_replace(
+ mdsetname_t *sp,
+ mdhspname_t *hspnp,
+ mdname_t *oldnp,
+ mdname_t *newnp,
+ mdcmdopts_t options,
+ md_error_t *ep
+)
+{
+ set_hs_params_t shs;
+ diskaddr_t size, label, start_blk;
+ md_dev64_t old_dev, new_dev;
+ diskaddr_t new_start_blk, new_end_blk;
+ int rebind;
+ char *new_devidp = NULL;
+ int ret;
+ md_set_desc *sd;
+
+ /* should be in same set */
+ assert(sp != NULL);
+ assert(sp->setno == HSP_SET(hspnp->hsp));
+
+ /* save new binding incase this is a rebind where oldnp==newnp */
+ new_dev = newnp->dev;
+ new_start_blk = newnp->start_blk;
+ new_end_blk = newnp->end_blk;
+
+ /* invalidate, then get the hotspare (fill in oldnp from metadb) */
+ meta_invalidate_hsp(hspnp);
+ if (meta_get_hsp(sp, hspnp, ep) == NULL)
+ return (-1);
+
+ /* the old device binding is now established */
+ if ((old_dev = oldnp->dev) == NODEV64)
+ return (mdsyserror(ep, ENODEV, oldnp->cname));
+
+ /*
+ * check for the case where oldnp and newnp indicate the same
+ * device, but the dev_t of the device has changed between old
+ * and new. This is called a rebind. On entry the dev_t
+ * represents the new device binding determined from the
+ * filesystem (meta_getdev). After calling meta_get_hsp
+ * oldnp (and maybe newnp if this is a rebind) is updated based
+ * to the old binding from the metadb (done by metakeyname).
+ */
+ if ((strcmp(oldnp->rname, newnp->rname) == 0) &&
+ (old_dev != new_dev)) {
+ rebind = 1;
+ } else {
+ rebind = 0;
+ }
+ if (rebind) {
+ newnp->dev = new_dev;
+ newnp->start_blk = new_start_blk;
+ newnp->end_blk = new_end_blk;
+ }
+
+ /*
+ * Save a copy of the devid associated with the new disk, the reason
+ * is that the meta_check_hotspare() call could cause the devid to
+ * be changed to that of the devid that is currently stored in the
+ * replica namespace for the disk in question. This devid could be
+ * stale if we are replacing the disk. The function that overwrites
+ * the devid is dr2drivedesc().
+ */
+ if (newnp->drivenamep->devid != NULL)
+ new_devidp = Strdup(newnp->drivenamep->devid);
+
+ /* if it's a multi-node diskset clear new_devidp */
+ if (!metaislocalset(sp)) {
+ if ((sd = metaget_setdesc(sp, ep)) == NULL) {
+ Free(new_devidp);
+ return (-1);
+ }
+ if (MD_MNSET_DESC(sd)) {
+ Free(new_devidp);
+ new_devidp = NULL;
+ }
+ }
+
+ /* check it out */
+ if (meta_check_hotspare(sp, newnp, ep) != 0) {
+ if ((! rebind) || (! mdisuseerror(ep, MDE_ALREADY))) {
+ Free(new_devidp);
+ return (-1);
+ }
+ mdclrerror(ep);
+ }
+ if ((size = metagetsize(newnp, ep)) == MD_DISKADDR_ERROR) {
+ Free(new_devidp);
+ return (-1);
+ }
+ if ((label = metagetlabel(newnp, ep)) == MD_DISKADDR_ERROR) {
+ Free(new_devidp);
+ return (-1);
+ }
+ if ((start_blk = metagetstart(sp, newnp, ep)) == MD_DISKADDR_ERROR) {
+ Free(new_devidp);
+ return (-1);
+ }
+ if (start_blk >= size) {
+ (void) mdsyserror(ep, ENOSPC, newnp->cname);
+ Free(new_devidp);
+ return (-1);
+ }
+
+ /* In dryrun mode (DOIT not set) we must not alter the mddb */
+ if (options & MDCMD_DOIT) {
+ /* store name in namespace */
+ if (add_key_name(sp, newnp, NULL, ep) != 0)
+ return (-1);
+ }
+
+ /*
+ * Copy back the saved devid.
+ */
+ Free(newnp->drivenamep->devid);
+ if (new_devidp != NULL) {
+ newnp->drivenamep->devid = new_devidp;
+ new_devidp = NULL;
+ }
+
+ /* In dryrun mode (DOIT not set) we must not alter the mddb */
+ if (options & MDCMD_DOIT) {
+ /* store name in namespace */
+ if (add_key_name(sp, newnp, NULL, ep) != 0)
+ return (-1);
+ }
+
+ if (rebind && !metaislocalset(sp)) {
+ /*
+ * We are 'rebind'ing a disk that is in a diskset so as well
+ * as updating the diskset's namespace the local set needs
+ * to be updated because it also contains a reference to the
+ * disk in question.
+ */
+ ret = meta_fixdevid(sp, DEV_UPDATE|DEV_LOCAL_SET, newnp->cname,
+ ep);
+
+ if (ret != METADEVADM_SUCCESS) {
+ md_error_t xep = mdnullerror;
+
+ /*
+ * In dryrun mode (DOIT not set) we must not alter
+ * the mddb
+ */
+ if (options & MDCMD_DOIT) {
+ (void) del_key_name(sp, newnp, &xep);
+ mdclrerror(&xep);
+ return (-1);
+ }
+ }
+ }
+
+ /* replace hotspare */
+ (void) memset(&shs, 0, sizeof (shs));
+
+ shs.shs_size_option = meta_check_devicesize(size);
+
+ shs.shs_cmd = REPLACE_HOT_SPARE;
+ shs.shs_hot_spare_pool = hspnp->hsp;
+ MD_SETDRIVERNAME(&shs, MD_HOTSPARES, sp->setno);
+ shs.shs_component_old = old_dev;
+ shs.shs_options = HS_OPT_NONE;
+ /* If DOIT is not set, it's a dryrun */
+ if ((options & MDCMD_DOIT) == 0) {
+ shs.shs_options |= HS_OPT_DRYRUN;
+ }
+ shs.shs_component_new = new_dev;
+ shs.shs_start_blk = start_blk;
+ shs.shs_has_label = ((label > 0) ? 1 : 0);
+ shs.shs_number_blks = size;
+ shs.shs_key_new = newnp->key;
+ if (metaioctl(MD_IOCSET_HS, &shs, &shs.mde, NULL) != 0) {
+ if (options & MDCMD_DOIT) {
+ (void) del_key_name(sp, newnp, ep);
+ }
+ return (mdstealerror(ep, &shs.mde));
+ }
+
+ /* clear cache */
+ meta_invalidate_name(oldnp);
+ meta_invalidate_name(newnp);
+ meta_invalidate_hsp(hspnp);
+
+ /* let em know */
+ if (options & MDCMD_PRINT) {
+ (void) printf(dgettext(TEXT_DOMAIN,
+ "%s: Hotspare %s is replaced with %s\n"),
+ hspnp->hspname, oldnp->cname, newnp->cname);
+ (void) fflush(stdout);
+ }
+
+ /* return success */
+ return (0);
+}
+
+/*
+ * enable hotspares
+ */
+int
+meta_hs_enable(
+ mdsetname_t *sp,
+ mdnamelist_t *hsnlp,
+ mdcmdopts_t options,
+ md_error_t *ep
+)
+{
+ mdhspnamelist_t *hspnlp = NULL;
+ mdhspnamelist_t *hspnp;
+ set_hs_params_t shs;
+ int rval = -1;
+
+ /* should have a set */
+ assert(sp != NULL);
+
+ /* setup device info */
+ (void) memset(&shs, 0, sizeof (shs));
+ MD_SETDRIVERNAME(&shs, MD_HOTSPARES, sp->setno);
+ shs.shs_cmd = FIX_HOT_SPARE;
+ shs.shs_options = HS_OPT_NONE;
+ /* If DOIT is not set, it's a dryrun */
+ if ((options & MDCMD_DOIT) == 0) {
+ shs.shs_options |= HS_OPT_DRYRUN;
+ }
+
+ /* get the list of hotspare names */
+ if (meta_get_hsp_names(sp, &hspnlp, 0, ep) < 0)
+ goto out;
+
+ /* enable hotspares for each components */
+ for (; (hsnlp != NULL); hsnlp = hsnlp->next) {
+ mdname_t *hsnp = hsnlp->namep;
+ md_dev64_t fs_dev;
+ int rebind = 0;
+ diskaddr_t size, label, start_blk;
+
+ /* get the file_system dev binding */
+ if (meta_getdev(sp, hsnp, ep) != 0)
+ return (-1);
+ fs_dev = hsnp->dev;
+
+ /*
+ * search for the component in each hotspare pool
+ * and replace it (instead of enable) if the binding
+ * has changed.
+ */
+ for (hspnp = hspnlp; (hspnp != NULL); hspnp = hspnp->next) {
+ /*
+ * in_hsp will call meta_get_hsp which will fill
+ * in hspnp with metadb version of component
+ */
+ meta_invalidate_hsp(hspnp->hspnamep);
+ if (in_hsp(sp, hspnp->hspnamep, hsnp, 0, -1, ep) != 0) {
+ /*
+ * check for the case where the dev_t has
+ * changed between the filesystem and the
+ * metadb. This is called a rebind, and
+ * is handled by meta_hs_replace.
+ */
+ if (fs_dev != hsnp->dev) {
+ /*
+ * establish file system binding
+ * with invalid start/end
+ */
+ rebind++;
+ hsnp->dev = fs_dev;
+ hsnp->start_blk = -1;
+ hsnp->end_blk = -1;
+ rval = meta_hs_replace(sp,
+ hspnp->hspnamep,
+ hsnp, hsnp, options, ep);
+ if (rval != 0)
+ goto out;
+ }
+ }
+ }
+ if (rebind)
+ continue;
+
+ /* enable the component in all hotspares that use it */
+ if (meta_check_hotspare(sp, hsnp, ep) != 0)
+ goto out;
+
+ if ((size = metagetsize(hsnp, ep)) == MD_DISKADDR_ERROR)
+ goto out;
+ if ((label = metagetlabel(hsnp, ep)) == MD_DISKADDR_ERROR)
+ goto out;
+ if ((start_blk = metagetstart(sp, hsnp, ep))
+ == MD_DISKADDR_ERROR)
+ goto out;
+ if (start_blk >= size) {
+ (void) mdsyserror(ep, ENOSPC, hsnp->cname);
+ goto out;
+ }
+
+ /* enable hotspare */
+ shs.shs_component_old = hsnp->dev;
+ shs.shs_component_new = hsnp->dev;
+ shs.shs_start_blk = start_blk;
+ shs.shs_has_label = ((label > 0) ? 1 : 0);
+ shs.shs_number_blks = size;
+ if (metaioctl(MD_IOCSET_HS, &shs, &shs.mde, hsnp->cname) != 0) {
+ rval = mdstealerror(ep, &shs.mde);
+ goto out;
+ }
+
+ /*
+ * Are we dealing with a non-local set? If so need to update
+ * the local namespace so that the disk record has the correct
+ * devid.
+ */
+ if (!metaislocalset(sp)) {
+ rval = meta_fixdevid(sp, DEV_UPDATE|DEV_LOCAL_SET,
+ hsnp->cname, ep);
+
+ if (rval != METADEVADM_SUCCESS) {
+ /*
+ * Failed to update the local set. Nothing to
+ * do here apart from report the error. The
+ * namespace is most likely broken and some
+ * form of remedial recovery is going to
+ * be required.
+ */
+ mde_perror(ep, "");
+ mdclrerror(ep);
+ }
+ }
+
+ /* clear cache */
+ meta_invalidate_name(hsnp);
+
+ /* let em know */
+ if (options & MDCMD_PRINT) {
+ (void) printf(dgettext(TEXT_DOMAIN,
+ "hotspare %s is enabled\n"),
+ hsnp->cname);
+ (void) fflush(stdout);
+ }
+ }
+
+ /* clear whole cache */
+ for (hspnp = hspnlp; (hspnp != NULL); hspnp = hspnp->next) {
+ meta_invalidate_hsp(hspnp->hspnamep);
+ }
+
+
+ /* return success */
+ rval = 0;
+
+out:
+ if (hspnlp)
+ metafreehspnamelist(hspnlp);
+ return (rval);
+}
+
+/*
+ * check for dups in the hsp itself
+ */
+static int
+check_twice(
+ md_hsp_t *hspp,
+ uint_t hsi,
+ md_error_t *ep
+)
+{
+ mdhspname_t *hspnp = hspp->hspnamep;
+ mdname_t *thisnp;
+ uint_t h;
+
+ thisnp = hspp->hotspares.hotspares_val[hsi].hsnamep;
+ for (h = 0; (h < hsi); ++h) {
+ md_hs_t *hsp = &hspp->hotspares.hotspares_val[h];
+ mdname_t *hsnp = hsp->hsnamep;
+
+ if (meta_check_overlap(hspnp->hspname, thisnp, 0, -1,
+ hsnp, 0, -1, ep) != 0)
+ return (-1);
+ }
+ return (0);
+}
+
+/*
+ * check hsp
+ */
+/*ARGSUSED2*/
+int
+meta_check_hsp(
+ mdsetname_t *sp,
+ md_hsp_t *hspp,
+ mdcmdopts_t options,
+ md_error_t *ep
+)
+{
+ mdhspname_t *hspnp = hspp->hspnamep;
+ uint_t hsi;
+
+ /* check hotspares */
+ for (hsi = 0; (hsi < hspp->hotspares.hotspares_len); ++hsi) {
+ md_hs_t *hsp = &hspp->hotspares.hotspares_val[hsi];
+ mdname_t *hsnp = hsp->hsnamep;
+ diskaddr_t size;
+
+ /* check hotspare */
+ if (meta_check_hotspare(sp, hsnp, ep) != 0)
+ return (-1);
+ if ((size = metagetsize(hsnp, ep)) == MD_DISKADDR_ERROR) {
+ return (-1);
+ } else if (size == 0) {
+ return (mdsyserror(ep, ENOSPC, hspnp->hspname));
+ }
+
+ /* check this hsp too */
+ if (check_twice(hspp, hsi, ep) != 0)
+ return (-1);
+ }
+
+ /* return success */
+ return (0);
+}
+
+/*
+ * create hsp
+ */
+int
+meta_create_hsp(
+ mdsetname_t *sp,
+ md_hsp_t *hspp,
+ mdcmdopts_t options,
+ md_error_t *ep
+)
+{
+ mdhspname_t *hspnp = hspp->hspnamep;
+ mdnamelist_t *hsnlp = NULL;
+ uint_t hsi;
+ int rval = -1;
+
+ /* validate hsp */
+ if (meta_check_hsp(sp, hspp, options, ep) != 0)
+ return (-1);
+
+ /* if we're not doing anything, return success */
+ if (! (options & MDCMD_DOIT))
+ return (0);
+
+ /* create hsp */
+ for (hsi = 0; (hsi < hspp->hotspares.hotspares_len); ++hsi) {
+ md_hs_t *hsp = &hspp->hotspares.hotspares_val[hsi];
+ mdname_t *hsnp = hsp->hsnamep;
+
+ (void) metanamelist_append(&hsnlp, hsnp);
+ }
+ options |= MDCMD_INIT;
+ rval = meta_hs_add(sp, hspnp, hsnlp, options, ep);
+
+ /* cleanup, return success */
+ metafreenamelist(hsnlp);
+ return (rval);
+}
+
+/*
+ * initialize hsp
+ * NOTE: this functions is metainit(1m)'s command line parser!
+ */
+int
+meta_init_hsp(
+ mdsetname_t **spp,
+ int argc,
+ char *argv[],
+ mdcmdopts_t options,
+ md_error_t *ep
+)
+{
+ char *uname = argv[0];
+ mdhspname_t *hspnp = NULL;
+ md_hsp_t *hspp = NULL;
+ uint_t hsi;
+ int rval = -1;
+
+
+ /* get hsp name */
+ assert(argc > 0);
+ if (argc < 1)
+ goto syntax;
+ if ((hspnp = metahspname(spp, uname, ep)) == NULL)
+ goto out;
+ assert(*spp != NULL);
+ uname = hspnp->hspname;
+
+ if (!(options & MDCMD_NOLOCK)) {
+ /* grab set lock */
+ if (meta_lock(*spp, TRUE, ep))
+ goto out;
+
+ if (meta_check_ownership(*spp, ep) != 0)
+ goto out;
+ }
+
+ /* see if it exists already */
+ if (meta_get_hsp(*spp, hspnp, ep) != NULL) {
+ (void) mdhsperror(ep, MDE_HSP_ALREADY_SETUP, hspnp->hsp, uname);
+ goto out;
+ } else if (! mdishsperror(ep, MDE_INVAL_HSP)) {
+ goto out;
+ } else {
+ mdclrerror(ep);
+ }
+ --argc, ++argv;
+
+ /* parse general options */
+ optind = 0;
+ opterr = 0;
+ if (getopt(argc, argv, "") != -1)
+ goto options;
+
+ /* allocate hsp */
+ hspp = Zalloc(sizeof (*hspp));
+ hspp->hotspares.hotspares_len = argc;
+ if (argc > 0) {
+ hspp->hotspares.hotspares_val =
+ Zalloc(argc * sizeof (*hspp->hotspares.hotspares_val));
+ }
+
+ /* setup pool */
+ hspp->hspnamep = hspnp;
+
+ /* parse hotspares */
+ for (hsi = 0; ((argc > 0) && (hsi < hspp->hotspares.hotspares_len));
+ ++hsi) {
+ md_hs_t *hsp = &hspp->hotspares.hotspares_val[hsi];
+ mdname_t *hsnamep;
+
+ /* parse hotspare name */
+ if ((hsnamep = metaname(spp, argv[0], ep)) == NULL)
+ goto out;
+ hsp->hsnamep = hsnamep;
+ --argc, ++argv;
+ }
+
+ /* we should be at the end */
+ if (argc != 0)
+ goto syntax;
+
+ /* create hotspare pool */
+ if (meta_create_hsp(*spp, hspp, options, ep) != 0)
+ goto out;
+ rval = 0; /* success */
+ goto out;
+
+ /* syntax error */
+syntax:
+ rval = meta_cook_syntax(ep, MDE_SYNTAX, uname, argc, argv);
+ goto out;
+
+ /* options error */
+options:
+ rval = meta_cook_syntax(ep, MDE_OPTION, uname, argc, argv);
+ goto out;
+
+ /* cleanup, return error */
+out:
+ if (hspp != NULL)
+ meta_free_hsp(hspp);
+ return (rval);
+}
+
+/*
+ * reset hotspare pool
+ */
+int
+meta_hsp_reset(
+ mdsetname_t *sp,
+ mdhspname_t *hspnp,
+ mdcmdopts_t options,
+ md_error_t *ep
+)
+{
+ md_hsp_t *hspp;
+ set_hs_params_t shs;
+ uint_t i;
+ int rval = -1;
+
+ /* should have the same set */
+ assert(sp != NULL);
+ assert((hspnp == NULL) || (sp->setno == HSP_SET(hspnp->hsp)));
+
+ /* reset all hotspares */
+ if (hspnp == NULL) {
+ mdhspnamelist_t *hspnlp = NULL;
+ mdhspnamelist_t *p;
+
+ /* for each hotspare pool */
+ rval = 0;
+ if (meta_get_hsp_names(sp, &hspnlp, 0, ep) < 0)
+ return (-1);
+ for (p = hspnlp; (p != NULL); p = p->next) {
+ /* reset hotspare pool */
+ hspnp = p->hspnamep;
+
+ /*
+ * If this is a multi-node set, we send a series
+ * of individual metaclear commands.
+ */
+ if (meta_is_mn_set(sp, ep)) {
+ if (meta_mn_send_metaclear_command(sp,
+ hspnp->hspname, options, 0, ep) != 0) {
+ rval = -1;
+ break;
+ }
+ } else {
+ if (meta_hsp_reset(sp, hspnp, options,
+ ep) != 0) {
+ rval = -1;
+ break;
+ }
+ }
+ }
+
+ /* cleanup, return success */
+ metafreehspnamelist(hspnlp);
+ return (rval);
+ }
+
+ /* get unit structure */
+ if ((hspp = meta_get_hsp(sp, hspnp, ep)) == NULL)
+ return (-1);
+
+ /* make sure nobody owns us */
+ if (hspp->refcount > 0) {
+ return (mdhsperror(ep, MDE_HSP_IN_USE, hspnp->hsp,
+ hspnp->hspname));
+ }
+
+ /* clear hotspare pool members */
+ (void) memset(&shs, 0, sizeof (shs));
+ MD_SETDRIVERNAME(&shs, MD_HOTSPARES, sp->setno);
+ shs.shs_cmd = DELETE_HOT_SPARE;
+ shs.shs_hot_spare_pool = hspnp->hsp;
+ for (i = 0; (i < hspp->hotspares.hotspares_len); ++i) {
+ md_hs_t *hs = &hspp->hotspares.hotspares_val[i];
+ mdname_t *hsnamep = hs->hsnamep;
+
+ /* clear cache */
+ meta_invalidate_name(hsnamep);
+
+ /* clear hotspare */
+ shs.shs_component_old = hsnamep->dev;
+ shs.shs_options = HS_OPT_FORCE;
+ /* If DOIT is not set, it's a dryrun */
+ if ((options & MDCMD_DOIT) == 0) {
+ shs.shs_options |= HS_OPT_DRYRUN;
+ }
+ if (metaioctl(MD_IOCSET_HS, &shs, &shs.mde, NULL) != 0) {
+ (void) mdstealerror(ep, &shs.mde);
+ goto out;
+ }
+ }
+
+ /* clear hotspare pool */
+ shs.shs_options = HS_OPT_POOL;
+ /* If DOIT is not set, it's a dryrun */
+ if ((options & MDCMD_DOIT) == 0) {
+ shs.shs_options |= HS_OPT_DRYRUN;
+ }
+ if (metaioctl(MD_IOCSET_HS, &shs, &shs.mde, hspnp->hspname) != 0) {
+ (void) mdstealerror(ep, &shs.mde);
+ goto out;
+ }
+ rval = 0; /* success */
+
+ /* let em know */
+ if (options & MDCMD_PRINT) {
+ (void) printf(dgettext(TEXT_DOMAIN,
+ "%s: Hotspare pool is cleared\n"),
+ hspnp->hspname);
+ (void) fflush(stdout);
+ }
+
+ /* clear subdevices (nothing to do) */
+
+ /* cleanup, return success */
+out:
+ meta_invalidate_hsp(hspnp);
+ return (rval);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_import.c b/usr/src/lib/lvm/libmeta/common/meta_import.c
new file mode 100644
index 0000000000..ec8819794c
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_import.c
@@ -0,0 +1,2179 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <assert.h>
+#include <ctype.h>
+#include <libdevinfo.h>
+#include <mdiox.h>
+#include <meta.h>
+#include "meta_repartition.h"
+#include "meta_set_prv.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/lvm/md_mddb.h>
+#include <sys/lvm/md_names.h>
+#include <sys/lvm/md_crc.h>
+
+typedef struct did_list {
+ void *rdid; /* real did if replicated set */
+ void *did; /* did stored in lb */
+ char *devname;
+ dev_t dev;
+ uint_t did_index;
+ char *minor_name;
+ struct did_list *next;
+} did_list_t;
+
+typedef struct replicated_disk {
+ void *old_devid;
+ void *new_devid;
+ struct replicated_disk *next;
+} replicated_disk_t;
+
+/*
+ * The current implementation limits the max device id length to 256 bytes.
+ * Should the max device id length be increased, this define would have to
+ * be bumped up accordingly
+ */
+#define MAX_DEVID_LEN 256
+
+/*
+ * We store a global list of all the replicated disks in the system. In
+ * order to prevent us from performing a linear search on this list, we
+ * store the disks in a two dimensional sparse array. The disks are bucketed
+ * based on the length of their device ids.
+ */
+static replicated_disk_t *replicated_disk_list[MAX_DEVID_LEN + 1] = {NULL};
+
+/*
+ * The list of replicated disks is built just once and this flag is set
+ * once it's done
+ */
+static int replicated_disk_list_built = 0;
+
+/*
+ * Map logical blk to physical
+ *
+ * This is based on the routine of the same name in the md kernel module (see
+ * file md_mddb.c), with the following caveats:
+ *
+ * - The kernel routine works on in core master blocks, or mddb_mb_ic_t; this
+ * routine works instead on the mddb_mb_t read directly from the disk
+ */
+static daddr_t
+getphysblk(
+ mddb_block_t blk,
+ mddb_mb_t *mbp
+)
+{
+ /*
+ * Sanity check: is the block within range? If so, we then assume
+ * that the block range map in the master block is valid and
+ * consistent with the block count. Unfortunately, there is no
+ * reliable way to validate this assumption.
+ */
+ if (blk >= mbp->mb_blkcnt || blk >= mbp->mb_blkmap.m_consecutive)
+ return ((daddr_t)-1);
+
+ return (mbp->mb_blkmap.m_firstblk + blk);
+}
+
+
+
+/*
+ * drive_append()
+ *
+ * Append to tail of linked list of md_im_drive_info_t.
+ *
+ * Will allocate space for new node and copy args into new space.
+ *
+ * Returns pointer to new node.
+ */
+static md_im_drive_info_t *
+drive_append(
+ md_im_drive_info_t **midpp,
+ mddrivename_t *dnp,
+ void *devid,
+ void *rdevid,
+ int devid_sz,
+ char *minor_name,
+ md_timeval32_t timestamp,
+ md_im_replica_info_t *mirp
+)
+{
+ md_im_drive_info_t *midp;
+ int o_devid_sz;
+
+ for (; (*midpp != NULL); midpp = &((*midpp)->mid_next))
+ ;
+
+ midp = *midpp = Zalloc(sizeof (md_im_drive_info_t));
+
+ midp->mid_dnp = dnp;
+
+ /*
+ * If rdevid is not NULL then we know we are dealing with
+ * replicated diskset case. 'devid_sz' will always be the
+ * size of a valid devid which can be 'devid' or 'rdevid'
+ */
+ midp->mid_devid = (void *)Malloc(devid_sz);
+
+ if (rdevid) {
+ (void) memcpy(midp->mid_devid, rdevid, devid_sz);
+ /*
+ * Also need to store the 'other' devid
+ */
+ o_devid_sz = devid_sizeof((ddi_devid_t)devid);
+ midp->mid_o_devid = (void *)Malloc(o_devid_sz);
+ (void) memcpy(midp->mid_o_devid, devid, o_devid_sz);
+ midp->mid_o_devid_sz = o_devid_sz;
+ } else {
+ /*
+ * In the case of regular diskset, midp->mid_o_devid
+ * will be a NULL pointer
+ */
+ (void) memcpy(midp->mid_devid, devid, devid_sz);
+ }
+
+ midp->mid_devid_sz = devid_sz;
+ midp->mid_setcreatetimestamp = timestamp;
+ (void) strlcpy(midp->mid_minor_name, minor_name, MDDB_MINOR_NAME_MAX);
+ midp->mid_replicas = mirp;
+
+ return (midp);
+}
+
+
+
+/*
+ * drive_append_wrapper()
+ *
+ * Constant time append wrapper; the append function will always walk the list,
+ * this will take a tail argument and use the append function on just the tail
+ * node, doing the appropriate old-tail-next-pointer bookkeeping.
+ */
+static md_im_drive_info_t **
+drive_append_wrapper(
+ md_im_drive_info_t **tailpp,
+ mddrivename_t *dnp,
+ void *devid,
+ void *rdevid,
+ int devid_sz,
+ char *minor_name,
+ md_timeval32_t timestamp,
+ md_im_replica_info_t *mirp
+)
+{
+ (void) drive_append(tailpp, dnp, devid, rdevid, devid_sz, minor_name,
+ timestamp, mirp);
+
+ if ((*tailpp)->mid_next == NULL)
+ return (tailpp);
+
+ return (&((*tailpp)->mid_next));
+}
+
+
+
+/*
+ * replica_append()
+ *
+ * Append to tail of linked list of md_im_replica_info_t.
+ *
+ * Will allocate space for new node and copy args into new space.
+ *
+ * Returns pointer to new node.
+ */
+static md_im_replica_info_t *
+replica_append(
+ md_im_replica_info_t **mirpp,
+ int flags,
+ daddr32_t offset,
+ daddr32_t length,
+ md_timeval32_t timestamp
+)
+{
+ md_im_replica_info_t *mirp;
+
+ for (; (*mirpp != NULL); mirpp = &((*mirpp)->mir_next))
+ ;
+
+ mirp = *mirpp = Zalloc(sizeof (md_im_replica_info_t));
+
+ mirp->mir_flags = flags;
+ mirp->mir_offset = offset;
+ mirp->mir_length = length;
+ mirp->mir_timestamp = timestamp;
+
+ return (mirp);
+
+}
+
+
+
+/*
+ * replica_append_wrapper()
+ *
+ * Constant time append wrapper; the append function will always walk the list,
+ * this will take a tail argument and use the append function on just the tail
+ * node, doing the appropriate old-tail-next-pointer bookkeeping.
+ */
+static md_im_replica_info_t **
+replica_append_wrapper(
+ md_im_replica_info_t **tailpp,
+ int flags,
+ daddr32_t offset,
+ daddr32_t length,
+ md_timeval32_t timestamp
+)
+{
+ (void) replica_append(tailpp, flags, offset, length, timestamp);
+
+ if ((*tailpp)->mir_next == NULL)
+ return (tailpp);
+
+ return (&(*tailpp)->mir_next);
+}
+
+/*
+ * map_replica_disk()
+ *
+ * Searches the device id list for a specific
+ * disk based on the locator block device id array index.
+ *
+ * Returns a pointer to the did_list node if a match was
+ * found or NULL otherwise.
+ */
+static did_list_t *
+map_replica_disk(
+ did_list_t *did_listp,
+ int did_index
+)
+{
+ did_list_t *tailp = did_listp;
+
+ while (tailp != NULL) {
+ if (tailp->did_index == did_index)
+ return (tailp);
+ tailp = tailp->next;
+ }
+
+ /* not found, return failure */
+ return (NULL);
+}
+
+/*
+ * replicated_list_lookup()
+ *
+ * looks up a replicated disk entry in the global replicated disk list
+ * based upon the length of that disk's device id. returns the new device id
+ * for the disk.
+ * If you store the returned devid you must create a local copy.
+ */
+static void *
+replicated_list_lookup(
+ uint_t devid_len,
+ void *old_devid
+)
+{
+ replicated_disk_t *head = NULL;
+
+ assert(devid_len <= MAX_DEVID_LEN);
+ head = replicated_disk_list[devid_len];
+
+ if (head == NULL)
+ return (NULL);
+
+ do {
+ if (devid_compare((ddi_devid_t)old_devid,
+ (ddi_devid_t)head->old_devid) == 0)
+ return (head->new_devid);
+ head = head->next;
+ } while (head != NULL);
+
+ return (NULL);
+}
+
+/*
+ * replicated_list_insert()
+ *
+ * inserts a replicated disk entry into the global replicated disk list
+ */
+static void
+replicated_list_insert(
+ size_t old_devid_len,
+ void *old_devid,
+ void *new_devid
+)
+{
+ replicated_disk_t *repl_disk, **first_entry;
+ void *repl_old_devid = NULL;
+
+ assert(old_devid_len <= MAX_DEVID_LEN);
+
+ repl_disk = Zalloc(sizeof (replicated_disk_t));
+ repl_old_devid = Zalloc(old_devid_len);
+ (void) memcpy(repl_old_devid, (void *)old_devid, old_devid_len);
+
+ repl_disk->old_devid = repl_old_devid;
+ repl_disk->new_devid = new_devid;
+
+ first_entry = &replicated_disk_list[old_devid_len];
+
+ if (*first_entry == NULL) {
+ *first_entry = repl_disk;
+ return;
+ }
+
+ repl_disk->next = *first_entry;
+ replicated_disk_list[old_devid_len] = repl_disk;
+}
+
+/*
+ * get_replica_disks()
+ *
+ * Will step through the locator records in the supplied locator block, and add
+ * each one with an active replica to a supplied list of md_im_drive_info_t, and
+ * add the appropriate replicas to the md_im_replica_info_t contained therein.
+ */
+static void
+get_replica_disks(
+ md_im_set_desc_t *misp,
+ did_list_t *did_listp,
+ mddb_mb_t *mb,
+ mddb_lb_t *lbp,
+ md_error_t *ep,
+ int replicated
+)
+{
+ mddrivename_t *dnp;
+ int indx, on_list;
+ mdsetname_t *sp = metasetname(MD_LOCAL_NAME, ep);
+ int flags;
+ int devid_sz;
+ char *minor_name;
+ did_list_t *replica_disk;
+ daddr32_t offset;
+ daddr32_t length;
+ md_timeval32_t timestamp;
+ md_im_replica_info_t **mirpp = NULL;
+ md_im_drive_info_t **midpp = &misp->mis_drives;
+ md_im_drive_info_t *midp;
+ void *did;
+
+ for (indx = 0; indx < lbp->lb_loccnt; indx++) {
+
+ on_list = 0;
+ if (lbp->lb_locators[indx].l_flags & MDDB_F_ACTIVE) {
+
+ /*
+ * search the device id list for a
+ * specific ctds based on the locator
+ * block device id array index.
+ */
+ replica_disk = map_replica_disk(did_listp, indx);
+
+ assert(replica_disk != NULL);
+
+
+ /*
+ * metadrivename() can fail for a slice name
+ * if there is not an existing mddrivename_t.
+ * So we use metadiskname() to strip the slice
+ * number.
+ */
+ dnp = metadrivename(&sp,
+ metadiskname(replica_disk->devname), ep);
+
+ for (midp = misp->mis_drives; midp != NULL;
+ midp = midp->mid_next) {
+ if (dnp == midp->mid_dnp) {
+ on_list = 1;
+ mirpp = &midp->mid_replicas;
+ break;
+ }
+ }
+
+ /*
+ * Get the correct devid_sz
+ */
+ if (replicated)
+ did = replica_disk->rdid;
+ else
+ did = replica_disk->did;
+
+ devid_sz = devid_sizeof((ddi_devid_t)did);
+ minor_name = replica_disk->minor_name;
+
+ /*
+ * New on the list so add it
+ */
+ if (!on_list) {
+ mddb_mb_t *mbp;
+ uint_t sliceno;
+ mdname_t *rsp;
+ int fd = -1;
+
+ mbp = Malloc(DEV_BSIZE);
+
+ /* determine the replica slice */
+ if (meta_replicaslice(dnp, &sliceno,
+ ep) != 0) {
+ Free(mbp);
+ continue;
+ }
+
+ /*
+ * if the replica slice size is zero,
+ * don't bother opening
+ */
+ if (dnp->vtoc.parts[sliceno].size == 0) {
+ Free(mbp);
+ continue;
+ }
+
+ if ((rsp = metaslicename(dnp, sliceno,
+ ep)) == NULL) {
+ Free(mbp);
+ continue;
+ }
+
+ if ((fd = open(rsp->rname,
+ O_RDONLY| O_NDELAY)) < 0) {
+ Free(mbp);
+ continue;
+ }
+
+ /*
+ * a drive may not have a master block
+ */
+ if (read_master_block(ep, fd, mbp,
+ DEV_BSIZE) <= 0) {
+ mdclrerror(ep);
+ Free(mbp);
+ (void) close(fd);
+ continue;
+ }
+
+ (void) close(fd);
+ midpp = drive_append_wrapper(midpp, dnp,
+ replica_disk->did, replica_disk->rdid,
+ devid_sz, minor_name, mbp->mb_setcreatetime,
+ NULL);
+ mirpp = &((*midpp)->mid_replicas);
+ Free(mbp);
+ }
+
+ /*
+ * For either of these assertions to fail, it implies
+ * a NULL return from metadrivename() above. Since
+ * the args came from a presumed valid locator block,
+ * that's Bad.
+ */
+ assert(midpp != NULL);
+ assert(mirpp != NULL);
+
+ /*
+ * Extract the parameters describing this replica.
+ *
+ * The magic "1" in the length calculation accounts
+ * for the length of the master block, in addition to
+ * the block count it describes. (The master block
+ * will always take up one block on the disk, and
+ * there will always only be one master block per
+ * replica, even though much of the code is structured
+ * to handle noncontiguous replicas.)
+ */
+ flags = lbp->lb_locators[indx].l_flags;
+ offset = lbp->lb_locators[indx].l_blkno;
+ length = mb->mb_blkcnt + 1;
+ timestamp = mb->mb_setcreatetime;
+
+ mirpp = replica_append_wrapper(mirpp, flags,
+ offset, length, timestamp);
+
+ /*
+ * If we're here it means -
+ *
+ * a) we had an active copy of the replica, and
+ * b) we've added the disk to the list of
+ * disks as well.
+ *
+ * We need to bump up the number of active
+ * replica count for each such replica so that it
+ * can be used later for replica quorum check.
+ */
+ misp->mis_active_replicas++;
+ }
+ }
+}
+
+
+
+/*
+ * get_nonreplica_disks()
+ *
+ * Extracts the disks without replicas from the locator name space and adds them
+ * to the supplied list of md_im_drive_info_t.
+ */
+static void
+get_nonreplica_disks(
+ md_im_set_desc_t *misp,
+ mddb_rb_t *did_nm,
+ mddb_rb_t *did_shrnm,
+ md_error_t *ep,
+ int replicated
+)
+{
+ char *search_path = "/dev";
+ devid_nmlist_t *nmlist;
+ md_im_drive_info_t *midp, **midpp = &misp->mis_drives;
+ mddrivename_t *dnp;
+ mdsetname_t *sp = metasetname(MD_LOCAL_NAME, ep);
+ mddb_rb_t *rbp_did = did_nm;
+ mddb_rb_t *rbp_did_shr = did_shrnm;
+ int on_list = 0;
+ int devid_sz;
+ struct devid_min_rec *did_rec;
+ struct devid_shr_rec *did_shr_rec;
+ struct did_shr_name *did;
+ struct did_min_name *min;
+ void *r_did; /* NULL if not a replicated diskset */
+ void *valid_did;
+
+ /*
+ * We got a pointer to an mddb record, which we expect to contain a
+ * name record; extract the pointer thereto.
+ */
+ /* LINTED */
+ did_rec = (struct devid_min_rec *)((caddr_t)(&rbp_did->rb_data));
+ /* LINTED */
+ did_shr_rec = (struct devid_shr_rec *)
+ ((caddr_t)(&rbp_did_shr->rb_data));
+
+ /*
+ * Skip the nm_rec_hdr and iterate on the array of struct minor_name
+ * at the end of the devid_min_rec
+ */
+ for (min = &did_rec->minor_name[0]; min->min_devid_key != 0;
+ /* LINTED */
+ min = (struct did_min_name *)((char *)min + DID_NAMSIZ(min))) {
+
+ on_list = 0;
+ r_did = NULL;
+
+ /*
+ * For a give DID_NM key, locate the corresponding device
+ * id from DID_NM_SHR
+ */
+ for (did = &did_shr_rec->device_id[0]; did->did_key != 0;
+ /* LINTED */
+ did = (struct did_shr_name *)
+ ((char *)did + DID_SHR_NAMSIZ(did))) {
+ /*
+ * We got a match, this is the device id we're
+ * looking for
+ */
+ if (min->min_devid_key == did->did_key)
+ break;
+ }
+
+ if (did->did_key == 0) {
+ /* we didn't find a match */
+ assert(did->did_key != 0);
+ md_exit(NULL, 1);
+ }
+
+ /*
+ * If replicated diskset
+ */
+ if (replicated) {
+ size_t new_devid_len;
+ char *temp;
+ /*
+ * In this case, did->did_devid will
+ * be invalid so lookup the real one
+ */
+ temp = replicated_list_lookup(did->did_size,
+ did->did_devid);
+ new_devid_len = devid_sizeof((ddi_devid_t)temp);
+ r_did = Zalloc(new_devid_len);
+ (void) memcpy(r_did, temp, new_devid_len);
+ valid_did = r_did;
+ } else {
+ valid_did = did->did_devid;
+ }
+
+ /* Get the ctds mapping for that device id */
+ if (meta_deviceid_to_nmlist(search_path,
+ (ddi_devid_t)valid_did,
+ &min->min_name[0], &nmlist) == 0) {
+
+ assert(nmlist->devname != NULL);
+ /* Don't bother with metadevices, but track disks */
+ if (!is_metaname(nmlist->devname)) {
+ dnp = metadrivename(&sp,
+ metadiskname(nmlist->devname), ep);
+
+ assert(dnp != NULL);
+ /* Is it already on the list? */
+ for (midp = misp->mis_drives; midp != NULL;
+ midp = midp->mid_next) {
+ if (midp->mid_dnp == dnp) {
+ on_list = 1;
+ break;
+ }
+ }
+
+ devid_sz = devid_sizeof(
+ (ddi_devid_t)valid_did);
+
+ if (!on_list) {
+ mddb_mb_t *mbp;
+ uint_t sliceno;
+ mdname_t *rsp;
+ int fd = -1;
+
+ mbp = Malloc(DEV_BSIZE);
+
+ /* determine the replica slice */
+ if (meta_replicaslice(dnp, &sliceno,
+ ep) != 0) {
+ Free(mbp);
+ continue;
+ }
+
+ /*
+ * if the replica slice size is zero,
+ * don't bother opening
+ */
+ if (dnp->vtoc.parts[sliceno].size
+ == 0) {
+ Free(mbp);
+ continue;
+ }
+
+ if ((rsp = metaslicename(dnp, sliceno,
+ ep)) == NULL) {
+ Free(mbp);
+ continue;
+ }
+
+ if ((fd = open(rsp->rname,
+ O_RDONLY| O_NDELAY)) < 0) {
+ Free(mbp);
+ continue;
+ }
+
+ /*
+ * a drive may not have a master block
+ */
+ if (read_master_block(ep, fd, mbp,
+ DEV_BSIZE) <= 0) {
+ mdclrerror(ep);
+ Free(mbp);
+ (void) close(fd);
+ continue;
+ }
+
+ (void) close(fd);
+ /*
+ * If it is replicated diskset,
+ * r_did will be non-NULL and
+ * devid_sz will be its size
+ */
+ midpp = drive_append_wrapper(midpp,
+ dnp, &did->did_devid, r_did,
+ devid_sz, &min->min_name[0],
+ mbp->mb_setcreatetime, NULL);
+ Free(mbp);
+ }
+ }
+ devid_free_nmlist(nmlist);
+ }
+ }
+}
+
+/*
+ * set_append()
+ *
+ * Append to tail of linked list of md_im_set_desc_t.
+ *
+ * Will allocate space for new node AND populate it by extracting disks with
+ * and without replicas from the locator blocks and locator namespace.
+ *
+ * Returns pointer to new node.
+ */
+static md_im_set_desc_t *
+set_append(
+ md_im_set_desc_t **mispp,
+ did_list_t *did_listp,
+ mddb_mb_t *mb,
+ mddb_lb_t *lbp,
+ mddb_rb_t *nm,
+ mddb_rb_t *did_nm,
+ mddb_rb_t *did_shrnm,
+ md_error_t *ep,
+ int replicated
+)
+{
+ md_im_set_desc_t *misp;
+ set_t setno = mb->mb_setno;
+
+ /* run to end of list */
+ for (; (*mispp != NULL); mispp = &((*mispp)->mis_next))
+ ;
+
+ /* allocate new list element */
+ misp = *mispp = Zalloc(sizeof (md_im_set_desc_t));
+
+ if (replicated)
+ misp->mis_flags = MD_IM_SET_REPLICATED;
+
+ misp->mis_oldsetno = setno;
+
+ /* Get the disks with and without replicas */
+ get_replica_disks(misp, did_listp, mb, lbp, ep, replicated);
+
+ if (nm != NULL && did_nm != NULL && did_shrnm != NULL) {
+ get_nonreplica_disks(misp, did_nm, did_shrnm, ep, replicated);
+ }
+
+ /*
+ * An error in this struct could come from either of the above routines;
+ * in both cases, we want to pass it back on up.
+ */
+ return (misp);
+}
+
+
+
+/*
+ * set_append_wrapper()
+ *
+ * Constant time append wrapper; the append function will always walk the list,
+ * this will take a tail argument and use the append function on just the tail
+ * node, doing the appropriate old-tail-next-pointer bookkeeping.
+ */
+static md_im_set_desc_t **
+set_append_wrapper(
+ md_im_set_desc_t **tailpp,
+ did_list_t *did_listp,
+ mddb_mb_t *mb,
+ mddb_lb_t *lbp,
+ mddb_rb_t *nm,
+ mddb_rb_t *did_nm,
+ mddb_rb_t *did_shrnm,
+ md_error_t *ep,
+ int replicated
+)
+{
+ (void) set_append(tailpp, did_listp, mb, lbp, nm, did_nm,
+ did_shrnm, ep, replicated);
+
+ /* it's the first item in the list, return it instead of the next */
+ return (((*tailpp)->mis_next == NULL) ? tailpp : &(*tailpp)->mis_next);
+}
+
+
+
+/*
+ * add_disk_names()
+ *
+ * Iterator to walk the minor node tree of the device snapshot, adding only the
+ * first non-block instance of each non-cdrom minor node to a list of disks.
+ */
+static int
+add_disk_names(di_node_t node, di_minor_t minor, void *args)
+{
+ char *search_path = "/dev";
+ ddi_devid_t devid = di_devid(node);
+ devid_nmlist_t *nm;
+ char *min = di_minor_name(minor);
+ md_im_names_t *cnames = (md_im_names_t *)args;
+ static di_node_t save_node = NULL;
+
+ /*
+ * skip CD devices
+ * If a device does not have a device id, we can't
+ * do anything with it so just exclude it from our
+ * list.
+ *
+ * This would also encompass CD devices and floppy
+ * devices that don't have a device id.
+ */
+ if (devid == NULL) {
+ return (DI_WALK_CONTINUE);
+ }
+
+ /* char disk devices (as opposed to block) */
+ if (di_minor_spectype(minor) == S_IFCHR) {
+
+ /* only first occurrence (slice 0) of each instance */
+ if (save_node == NULL || node != save_node) {
+ save_node = node;
+ if (meta_deviceid_to_nmlist(search_path, devid,
+ min, &nm) == 0) {
+ int index = cnames->min_count++;
+
+ assert(nm->devname != NULL);
+ cnames->min_names =
+ Realloc(cnames->min_names,
+ cnames->min_count *
+ sizeof (char *));
+
+ assert(cnames->min_names != NULL);
+ cnames->min_names[index] =
+ metadiskname(nm->devname);
+ devid_free_nmlist(nm);
+ }
+ }
+ }
+ return (DI_WALK_CONTINUE);
+}
+
+
+
+/*
+ * meta_list_disks()
+ *
+ * Snapshots the device tree and extracts disk devices from the snapshot.
+ */
+int
+meta_list_disks(md_error_t *ep, md_im_names_t *cnames)
+{
+ di_node_t root_node;
+
+ assert(cnames != NULL);
+ cnames->min_count = 0;
+ cnames->min_names = NULL;
+
+ if ((root_node = di_init("/", DINFOCPYALL|DINFOFORCE))
+ == DI_NODE_NIL) {
+ return (mdsyserror(ep, errno, NULL));
+ }
+
+ (void) di_walk_minor(root_node, DDI_NT_BLOCK, 0, cnames,
+ add_disk_names);
+
+ di_fini(root_node);
+ return (0);
+}
+
+/*
+ * meta_imp_drvused
+ *
+ * Checks if given drive is mounted, swapped, part of disk configuration
+ * or in use by SVM. ep also has error code set up if drive is in use.
+ *
+ * Returns 1 if drive is in use.
+ * Returns 0 if drive is not in use.
+ */
+int
+meta_imp_drvused(
+ mdsetname_t *sp,
+ mddrivename_t *dnp,
+ md_error_t *ep
+)
+{
+ md_error_t status = mdnullerror;
+ md_error_t *db_ep = &status;
+
+ /*
+ * We pass in db_ep to meta_setup_db_locations
+ * and never ever use the error contained therein
+ * because all we're interested in is a check to
+ * see whether any local metadbs are present.
+ */
+ if ((meta_check_drivemounted(sp, dnp, ep) != 0) ||
+ (meta_check_driveswapped(sp, dnp, ep) != 0) ||
+ (((meta_setup_db_locations(db_ep) == 0) &&
+ ((meta_check_drive_inuse(sp, dnp, 1, ep) != 0) ||
+ (meta_check_driveinset(sp, dnp, ep) != 0))))) {
+ return (1);
+ } else {
+ return (0);
+ }
+}
+
+/*
+ * meta_prune_cnames()
+ *
+ * Removes in-use disks from the list prior to further processing.
+ *
+ * Return value depends on err_on_prune flag: if set, and one or more disks
+ * are pruned, the return list will be the pruned disks. If not set, or if no
+ * disks are pruned, the return list will be the unpruned disks.
+ */
+mddrivenamelist_t *
+meta_prune_cnames(
+ md_error_t *ep,
+ md_im_names_t *cnames,
+ int err_on_prune
+)
+{
+ int d;
+ int fcount = 0;
+ mddrivenamelist_t *dnlp = NULL;
+ mddrivenamelist_t **dnlpp = &dnlp;
+ mddrivenamelist_t *fdnlp = NULL;
+ mddrivenamelist_t **fdnlpp = &fdnlp;
+ mdsetname_t *sp = metasetname(MD_LOCAL_NAME, ep);
+
+ for (d = 0; d < cnames->min_count; ++d) {
+ mddrivename_t *dnp;
+
+ dnp = metadrivename(&sp, cnames->min_names[d], ep);
+ if (dnp == NULL) {
+ /*
+ * Assuming we're interested in knowing about
+ * whatever error occurred, but not in stopping.
+ */
+ mde_perror(ep, cnames->min_names[d]);
+ mdclrerror(ep);
+
+ continue;
+ }
+
+ /*
+ * Check if the drive is inuse.
+ */
+ if (meta_imp_drvused(sp, dnp, ep)) {
+ fdnlpp = meta_drivenamelist_append_wrapper(fdnlpp, dnp);
+ fcount++;
+ mdclrerror(ep);
+ } else {
+ dnlpp = meta_drivenamelist_append_wrapper(dnlpp, dnp);
+ }
+ }
+
+ if (fcount) {
+ if (err_on_prune) {
+ (void) mddserror(ep, MDE_DS_DRIVEINUSE, 0,
+ NULL, fdnlp->drivenamep->cname, NULL);
+ metafreedrivenamelist(dnlp);
+ return (fdnlp);
+ }
+ metafreedrivenamelist(fdnlp);
+ }
+
+ return (dnlp);
+}
+
+/*
+ * read_master_block()
+ *
+ * Returns:
+ * < 0 for failure
+ * 0 for no valid master block
+ * 1 for valid master block
+ *
+ * The supplied buffer will be filled in for EITHER 0 or 1.
+ */
+int
+read_master_block(
+ md_error_t *ep,
+ int fd,
+ void *bp,
+ int bsize
+)
+{
+ mddb_mb_t *mbp = bp;
+ int rval = 1;
+
+ assert(bp != NULL);
+
+ if (lseek(fd, (off_t)dbtob(16), SEEK_SET) < 0)
+ return (mdsyserror(ep, errno, NULL));
+
+ if (read(fd, bp, bsize) != bsize)
+ return (mdsyserror(ep, errno, NULL));
+
+ /*
+ * The master block magic number can either be MDDB_MAGIC_MB in
+ * the case of a real master block, or, it can be MDDB_MAGIC_DU
+ * in the case of a dummy master block
+ */
+ if ((mbp->mb_magic != MDDB_MAGIC_MB) &&
+ (mbp->mb_magic != MDDB_MAGIC_DU)) {
+ rval = 0;
+ (void) mdmddberror(ep, MDE_DB_MASTER, 0, 0, 0, NULL);
+ }
+
+ if (mbp->mb_revision != MDDB_REV_MB) {
+ rval = 0;
+ }
+
+ return (rval);
+}
+
+/*
+ * read_locator_block()
+ *
+ * Returns:
+ * < 0 for failure
+ * 0 for no valid locator block
+ * 1 for valid locator block
+ */
+int
+read_locator_block(
+ md_error_t *ep,
+ int fd,
+ mddb_mb_t *mbp,
+ void *bp,
+ int bsize
+)
+{
+ mddb_lb_t *lbp = bp;
+
+ assert(bp != NULL);
+
+ if (lseek(fd, (off_t)dbtob(mbp->mb_blkmap.m_firstblk), SEEK_SET) < 0)
+ return (mdsyserror(ep, errno, NULL));
+
+ if (read(fd, bp, bsize) != bsize)
+ return (mdsyserror(ep, errno, NULL));
+
+ return ((lbp->lb_magic == MDDB_MAGIC_LB) ? 1 : 0);
+}
+
+int
+phys_read(
+ md_error_t *ep,
+ int fd,
+ mddb_mb_t *mbp,
+ daddr_t blk,
+ void *bp,
+ int bcount
+)
+{
+ daddr_t pblk;
+
+ if ((pblk = getphysblk(blk, mbp)) < 0)
+ return (mdmddberror(ep, MDE_DB_BLKRANGE, NODEV32,
+ MD_LOCAL_SET, blk, NULL));
+
+ if (lseek(fd, (off_t)dbtob(pblk), SEEK_SET) < 0)
+ return (mdsyserror(ep, errno, NULL));
+
+ if (read(fd, bp, bcount) != bcount)
+ return (mdsyserror(ep, errno, NULL));
+
+ return (bcount);
+}
+
+/*
+ * read_locator_block_did()
+ *
+ * Returns:
+ * < 0 for failure
+ * 0 for no valid locator name struct
+ * 1 for valid locator name struct
+ */
+int
+read_locator_block_did(
+ md_error_t *ep,
+ int fd,
+ mddb_mb_t *mbp,
+ mddb_lb_t *lbp,
+ void *bp,
+ int bsize
+)
+{
+ int lb_didfirstblk = lbp->lb_didfirstblk;
+ mddb_did_blk_t *lbdidp = bp;
+ int rval;
+
+ assert(bp != NULL);
+
+ if ((rval = phys_read(ep, fd, mbp, lb_didfirstblk, bp, bsize)) < 0)
+ return (rval);
+
+ return ((lbdidp->blk_magic == MDDB_MAGIC_DI) ? 1 : 0);
+}
+
+/*
+ * read_locator_names()
+ *
+ * Returns:
+ * < 0 for failure
+ * 0 for no valid locator name struct
+ * 1 for valid locator name struct
+ */
+int
+read_locator_names(
+ md_error_t *ep,
+ int fd,
+ mddb_mb_t *mbp,
+ mddb_lb_t *lbp,
+ void *bp,
+ int bsize
+)
+{
+ int lnfirstblk = lbp->lb_lnfirstblk;
+ mddb_ln_t *lnp = bp;
+ int rval;
+
+ assert(bp != NULL);
+
+ if ((rval = phys_read(ep, fd, mbp, lnfirstblk, bp, bsize)) < 0)
+ return (rval);
+
+ return ((lnp->ln_magic == MDDB_MAGIC_LN) ? 1 : 0);
+}
+
+
+int
+read_database_block(
+ md_error_t *ep,
+ int fd,
+ mddb_mb_t *mbp,
+ int dbblk,
+ void *bp,
+ int bsize
+)
+{
+ mddb_db_t *dbp = bp;
+ int rval;
+
+ assert(bp != NULL);
+
+ if ((rval = phys_read(ep, fd, mbp, dbblk, bp, bsize)) < 0)
+ return (rval);
+
+ return ((dbp->db_magic == MDDB_MAGIC_DB) ? 1 : 0);
+}
+
+int
+read_loc_didblks(
+ md_error_t *ep,
+ int fd,
+ mddb_mb_t *mbp,
+ int didblk,
+ void *bp,
+ int bsize
+)
+{
+ mddb_did_blk_t *didbp = bp;
+ int rval;
+
+ assert(bp != NULL);
+
+ if ((rval = phys_read(ep, fd, mbp, didblk, bp, bsize)) < 0)
+ return (rval);
+
+ return ((didbp->blk_magic == MDDB_MAGIC_DI) ? 1 : 0);
+}
+
+
+int
+read_loc_didinfo(
+ md_error_t *ep,
+ int fd,
+ mddb_mb_t *mbp,
+ int infoblk,
+ void *bp,
+ int bsize
+)
+{
+ int rval = 1;
+ mddb_did_info_t *infop = bp;
+
+ assert(bp != NULL);
+
+ if ((rval = phys_read(ep, fd, mbp, infoblk, bp, bsize)) < 0)
+ return (rval);
+
+ return ((infop->info_flags & MDDB_DID_EXISTS) ? 1 : 0);
+}
+
+/*
+ * meta_nm_rec()
+ *
+ * Return the DE corresponding to the requested namespace record type.
+ * Modifies dbp to have a firstentry if one isn't there.
+ */
+static mddb_de_t *
+meta_nm_rec(mddb_db_t *dbp, mddb_type_t rectype)
+{
+ mddb_de_t *dep;
+ int desize;
+
+ if (dbp->db_firstentry != NULL) {
+ /* LINTED */
+ dep = (mddb_de_t *)((caddr_t)(&dbp->db_firstentry)
+ + sizeof (dbp->db_firstentry));
+ dbp->db_firstentry = dep;
+ while (dep && dep->de_next) {
+ desize = sizeof (*dep) - sizeof (dep->de_blks) +
+ sizeof (daddr_t) * dep->de_blkcount;
+ /* LINTED */
+ dep->de_next = (mddb_de_t *)
+ ((caddr_t)dep + desize);
+ dep = dep->de_next;
+ }
+ }
+
+ for (dep = dbp->db_firstentry; dep != NULL; dep = dep->de_next) {
+ if (dep->de_type1 == rectype)
+ break;
+ }
+ return (dep);
+}
+
+/*
+ * read_nm_rec()
+ *
+ * Reads the NM, NM_DID or NM_DID_SHR record in the mddb and stores the
+ * configuration data in the buffer 'nm'
+ *
+ * Returns:
+ * < 0 for failure
+ * 0 for no valid NM/DID_NM/DID_NM_SHR record
+ * 1 for valid NM/DID_NM/DID_NM_SHR record
+ *
+ */
+static int
+read_nm_rec(
+ md_error_t *ep,
+ int fd,
+ mddb_mb_t *mbp,
+ mddb_lb_t *lbp,
+ char **nm,
+ mddb_type_t rectype,
+ char *diskname
+)
+{
+ int cnt, dbblk, rval = 0;
+ char db[DEV_BSIZE];
+ mddb_de_t *dep;
+ /*LINTED*/
+ mddb_db_t *dbp = (mddb_db_t *)&db;
+ char *tmpnm = NULL;
+ daddr_t pblk;
+
+ for (dbblk = lbp->lb_dbfirstblk;
+ dbblk != 0;
+ dbblk = dbp->db_nextblk) {
+
+ if ((rval = read_database_block(ep, fd, mbp, dbblk, dbp,
+ sizeof (db))) <= 0)
+ return (rval);
+
+ /*
+ * Locate NM/DID_NM/DID_NM_SHR record. Normally there is
+ * only one record per mddb. There is a rare case when we
+ * can't expand the record. If this is the case then we
+ * will have multiple NM/DID_NM/DID_NM_SHR records linked
+ * with r_next_recid.
+ *
+ * For now assume the normal case and handle the extended
+ * namespace in Phase 2.
+ */
+ if ((dep = meta_nm_rec(dbp, rectype)) != NULL)
+ break;
+ }
+
+ /* If meta_nm_rec() never succeeded, bail out */
+ if (dep == NULL)
+ return (0);
+
+ /* Read in the appropriate record and return configurations */
+ tmpnm = (char *)Zalloc(dbtob(dep->de_blkcount));
+ *nm = tmpnm;
+
+ for (cnt = 0; cnt < dep->de_blkcount; cnt++) {
+ if ((pblk = getphysblk(dep->de_blks[cnt], mbp)) < 0) {
+ rval = mdmddberror(ep, MDE_DB_BLKRANGE,
+ NODEV32, MD_LOCAL_SET,
+ dep->de_blks[cnt], diskname);
+ return (rval);
+ }
+
+ if (lseek(fd, (off_t)dbtob(pblk), SEEK_SET) < 0) {
+ rval = mdsyserror(ep, errno, diskname);
+ return (rval);
+ }
+
+ if (read(fd, tmpnm, DEV_BSIZE) != DEV_BSIZE) {
+ rval = mdsyserror(ep, errno, diskname);
+ return (rval);
+ }
+
+ tmpnm += DEV_BSIZE;
+ }
+ return (1);
+}
+
+/*
+ * is_replicated
+ *
+ * Determines whether a disk has been replicated or not. It checks to see
+ * if the device id stored in the master block is the same as the device id
+ * registered for that disk on the current system. If the two device ids are
+ * different, then we know that the disk has been replicated.
+ *
+ * If need_devid is set and the disk is replicated, fill in the new_devid.
+ * Also, if need_devid is set, this routine allocates memory for the device
+ * ids; the caller of this routine is responsible for free'ing up the memory.
+ *
+ * Returns:
+ * 1 if it's a replicated disk
+ * 0 if it's not a replicated disk
+ */
+static int
+is_replicated(
+ int fd,
+ mddb_mb_t *mbp,
+ int need_devid,
+ void **new_devid
+)
+{
+ ddi_devid_t current_devid;
+ int retval = 0;
+ size_t new_devid_len;
+
+ if (mbp->mb_devid_magic != MDDB_MAGIC_DE)
+ return (retval);
+
+ if (devid_get(fd, &current_devid) != 0)
+ return (retval);
+
+ if (devid_compare((ddi_devid_t)mbp->mb_devid, current_devid) != 0)
+ retval = 1;
+
+ if (retval && need_devid) {
+ new_devid_len = devid_sizeof(current_devid);
+ *new_devid = Zalloc(new_devid_len);
+ (void) memcpy(*new_devid, (void *)current_devid, new_devid_len);
+ }
+
+ devid_free(current_devid);
+ return (retval);
+}
+
+/*
+ * free_replicated_disks_list()
+ *
+ * this frees up all the memory allocated by build_replicated_disks_list
+ */
+static void
+free_replicated_disks_list()
+{
+ replicated_disk_t **repl_disk, *temp;
+ int index;
+
+ for (index = 0; index <= MAX_DEVID_LEN; index++) {
+ repl_disk = &replicated_disk_list[index];
+
+ while (*repl_disk != NULL) {
+ temp = *repl_disk;
+ *repl_disk = (*repl_disk)->next;
+
+ Free(temp->old_devid);
+ Free(temp->new_devid);
+ Free(temp);
+ }
+ }
+}
+
+/*
+ * build_replicated_disks_list()
+ *
+ * Builds a list of disks that have been replicated using either a
+ * remote replication or a point-in-time replication software. The
+ * list is stored as a two dimensional sparse array.
+ *
+ * Returns
+ * 1 on success
+ * 0 on failure
+ */
+static int
+build_replicated_disks_list(
+ md_error_t *ep,
+ mddrivenamelist_t *dnlp
+)
+{
+ uint_t sliceno;
+ int fd = -1;
+ mddrivenamelist_t *dp;
+ mdname_t *rsp;
+ mddb_mb_t *mbp;
+
+ mbp = Malloc(DEV_BSIZE);
+
+ for (dp = dnlp; dp != NULL; dp = dp->next) {
+ mddrivename_t *dnp;
+ void *new_devid;
+
+ dnp = dp->drivenamep;
+ /* determine the replica slice */
+ if (meta_replicaslice(dnp, &sliceno, ep) != 0)
+ continue;
+
+ /*
+ * if the replica slice size is zero, don't bother opening
+ */
+ if (dnp->vtoc.parts[sliceno].size == 0)
+ continue;
+
+ if ((rsp = metaslicename(dnp, sliceno, ep)) == NULL)
+ continue;
+
+ if ((fd = open(rsp->rname, O_RDONLY| O_NDELAY)) < 0)
+ return (mdsyserror(ep, errno, rsp->rname));
+
+ /* a drive may not have a master block so we just continue */
+ if (read_master_block(ep, fd, mbp, DEV_BSIZE) <= 0) {
+ (void) close(fd);
+ mdclrerror(ep);
+ continue;
+ }
+
+ if (is_replicated(fd, mbp, 1, &new_devid)) {
+ replicated_list_insert(mbp->mb_devid_len,
+ mbp->mb_devid, new_devid);
+ }
+ (void) close(fd);
+ }
+ replicated_disk_list_built = 1;
+
+ Free(mbp);
+ return (1);
+}
+
+/*
+ * free_did_list()
+ *
+ * Frees the did_list allocated as part of build_did_list
+ */
+static void
+free_did_list(
+ did_list_t *did_listp
+)
+{
+ did_list_t *temp, *head;
+
+ head = did_listp;
+
+ while (head != NULL) {
+ temp = head;
+ head = head->next;
+ if (temp->rdid)
+ Free(temp->rdid);
+ if (temp->did)
+ Free(temp->did);
+ if (temp->devname)
+ Free(temp->devname);
+ if (temp->minor_name)
+ Free(temp->minor_name);
+ Free(temp);
+ }
+}
+
+/*
+ * build_did_list()
+ *
+ * Build a list of device ids corresponding to disks in the locator block.
+ * Memory is allocated here for the nodes in the did_list. The callers of
+ * this routine must also call free_did_list to free up the memory after
+ * they're done.
+ *
+ * Returns:
+ * < 0 for failure
+ * 0 for no valid locator block device id array
+ * 1 for valid locator block device id array
+ * ENOTSUP partial diskset, not all disks in a diskset on the
+ * system where import is being executed
+ */
+static int
+build_did_list(
+ md_error_t *ep,
+ int fd,
+ mddb_mb_t *mb,
+ mddb_did_blk_t *lbdidp,
+ did_list_t **did_listp,
+ int replicated
+)
+{
+ char *search_path = "/dev";
+ char *minor_name;
+ int rval, cnt;
+ devid_nmlist_t *nm;
+ uint_t did_info_length = 0;
+ uint_t did_info_firstblk = 0;
+ did_list_t *new, *head = NULL;
+ char *bp = NULL, *temp;
+ mddb_did_info_t *did_info = NULL;
+ void *did = NULL;
+ size_t new_devid_len;
+
+ for (cnt = 0; cnt < MDDB_NLB; cnt++) {
+ did_info = &lbdidp->blk_info[cnt];
+
+ if (!(did_info->info_flags & MDDB_DID_EXISTS))
+ continue;
+
+ new = Zalloc(sizeof (did_list_t));
+ new->did = Zalloc(did_info->info_length);
+
+ /*
+ * If we can re-use the buffer already has been
+ * read in then just use it. Otherwise free
+ * the previous one and alloc a new one
+ */
+ if (dbtob(did_info->info_blkcnt) != did_info_length &&
+ did_info->info_firstblk != did_info_firstblk) {
+
+ did_info_length = dbtob(did_info->info_blkcnt);
+ did_info_firstblk = did_info->info_firstblk;
+
+ if (bp)
+ Free(bp);
+ bp = temp = Zalloc(did_info_length);
+
+ if ((rval = phys_read(ep, fd, mb, did_info_firstblk,
+ (void *)bp, did_info_length)) < 0)
+ return (rval);
+ } else {
+ temp = bp;
+ }
+
+ temp += did_info->info_offset;
+ (void) memcpy(new->did, temp, did_info->info_length);
+ new->did_index = cnt;
+ minor_name = did_info->info_minor_name;
+
+ /*
+ * If we are not able to find the ctd mapping corresponding
+ * to a given device id, it probably means the device id in
+ * question is not registered with the system.
+ *
+ * Highly likely that the only time this happens, we've hit
+ * a case where not all the disks that are a part of the
+ * diskset were moved before importing the diskset.
+ *
+ * If set is a replicated diskset, then the device id we get
+ * from 'lb' will be the 'other' did and we need to lookup
+ * the real one before we call this routine.
+ */
+ if (replicated) {
+ temp = replicated_list_lookup(did_info->info_length,
+ new->did);
+ new_devid_len = devid_sizeof((ddi_devid_t)temp);
+ new->rdid = Zalloc(new_devid_len);
+ (void) memcpy(new->rdid, temp, new_devid_len);
+ did = new->rdid;
+ } else {
+ did = new->did;
+ }
+
+ if (devid_valid((ddi_devid_t)(did)) == 0) {
+ return (-1);
+ }
+
+ if ((rval = meta_deviceid_to_nmlist(search_path,
+ (ddi_devid_t)did, minor_name, &nm)) != 0) {
+ *did_listp = head;
+ free_did_list(*did_listp);
+ *did_listp = NULL;
+ (void) mddserror(ep, MDE_DS_PARTIALSET, MD_SET_BAD,
+ mynode(), NULL, NULL);
+ return (ENOTSUP);
+ }
+
+ assert(nm->devname != NULL);
+ new->devname = Strdup(nm->devname);
+ new->dev = nm->dev;
+ new->minor_name = Strdup(minor_name);
+
+ devid_free_nmlist(nm);
+
+ new->next = head;
+ head = new;
+ }
+
+ /* Free the last bp */
+ if (bp)
+ Free(bp);
+ *did_listp = head;
+ return (1);
+}
+
+/*
+ * meta_get_set_info
+ *
+ * Scans a given drive for set specific information. If the given drive
+ * has a shared metadb, scans the shared metadb for information pertaining
+ * to the set.
+ *
+ * Returns:
+ * <0 for failure
+ * 0 success but no replicas were found
+ * 1 success and a replica was found
+ * ENOTSUP for partial disksets detected
+ */
+int
+meta_get_set_info(
+ mddrivenamelist_t *dp,
+ md_im_set_desc_t **mispp,
+ int local_mb_ok,
+ md_error_t *ep
+)
+{
+ uint_t s;
+ mdname_t *rsp;
+ int fd;
+ char mb[DEV_BSIZE];
+ /*LINTED*/
+ mddb_mb_t *mbp = (mddb_mb_t *)mb;
+ char lb[dbtob(MDDB_LBCNT)];
+ /*LINTED*/
+ mddb_lb_t *lbp = (mddb_lb_t *)lb;
+ mddb_did_blk_t *lbdidp = NULL;
+ mddb_ln_t *lnp = NULL;
+ int lnsize, lbdid_size;
+ int rval = 0;
+ char db[DEV_BSIZE];
+ /*LINTED*/
+ mddb_db_t *dbp = (mddb_db_t *)db;
+ did_list_t *did_listp = NULL;
+ mddrivenamelist_t *dnlp;
+ mddrivename_t *dnp;
+ md_im_names_t cnames = { 0, NULL};
+ char *nm = NULL;
+ char *did_nm = NULL, *did_shrnm = NULL;
+ struct nm_rec *nmp;
+ struct devid_shr_rec *did_shrnmp;
+ struct devid_min_rec *did_nmp;
+ int extended_namespace = 0;
+ int replicated = 0;
+
+ dnp = dp->drivenamep;
+
+ /*
+ * Determine and open the replica slice
+ */
+ if (meta_replicaslice(dnp, &s, ep) != 0) {
+ return (-1);
+ }
+
+ /*
+ * Test for the size of replica slice in question. If
+ * the size is zero, we know that this is not a disk that was
+ * part of a set and it should be silently ignored for import.
+ */
+ if (dnp->vtoc.parts[s].size == 0)
+ return (0);
+
+ if ((rsp = metaslicename(dnp, s, ep)) == NULL) {
+ return (-1);
+ }
+
+ if ((fd = open(rsp->rname, O_RDONLY|O_NDELAY)) < 0)
+ return (mdsyserror(ep, errno, rsp->cname));
+
+ /*
+ * After the open() succeeds, we should return via the "out"
+ * label to clean up after ourselves. (Up 'til now, we can
+ * just return directly, because there are no resources to
+ * give back.)
+ */
+
+ if ((rval = read_master_block(ep, fd, mbp, sizeof (mb))) <= 0)
+ goto out;
+
+ replicated = is_replicated(fd, mbp, 0, NULL);
+
+ if (!local_mb_ok && mbp->mb_setno == 0) {
+ rval = 0;
+ goto out;
+ }
+
+ if ((rval = read_locator_block(ep, fd, mbp, lbp, sizeof (lb))) <= 0)
+ goto out;
+
+ /*
+ * Once the locator block has been read, we need to
+ * check if the locator block commit count is zero.
+ * If it is zero, we know that the replica we're dealing
+ * with is on a disk that was deleted from the disk set;
+ * and, it potentially has stale data. We need to quit
+ * in that case
+ */
+ if (lbp->lb_commitcnt == 0) {
+ rval = 0;
+ goto out;
+ }
+
+ /*
+ * Make sure that the disk being imported has device id
+ * namespace present for disksets. If a disk doesn't have
+ * device id namespace, we skip reading the replica on that disk
+ */
+ if (!(lbp->lb_flags & MDDB_DEVID_STYLE)) {
+ rval = 0;
+ goto out;
+ }
+
+ /*
+ * Grab the locator block device id array. Allocate memory for the
+ * array first.
+ */
+ lbdid_size = dbtob(lbp->lb_didblkcnt);
+ lbdidp = Zalloc(lbdid_size);
+
+ if ((rval = read_locator_block_did(ep, fd, mbp, lbp, lbdidp,
+ lbdid_size)) <= 0)
+ goto out;
+
+ /*
+ * For a disk that has not been replicated, extract the device ids
+ * stored in the locator block device id array and store them in
+ * a list.
+ *
+ * If the disk has been replicated using replication software such
+ * as HDS Truecopy/ShadowImage or EMC SRDF/BCV, the device ids in
+ * the locator block are invalid and we need to build a list of
+ * replicated disks.
+ */
+ if (replicated && !replicated_disk_list_built) {
+ /*
+ * if there's a replicated diskset involved, we need to
+ * scan the system one more time and build a list of all
+ * candidate disks that might be part of that replicated set
+ */
+ if (meta_list_disks(ep, &cnames) != 0) {
+ rval = 0;
+ goto out;
+ }
+ dnlp = meta_prune_cnames(ep, &cnames, 0);
+ rval = build_replicated_disks_list(ep, dnlp);
+ if (rval == 0)
+ goto out;
+ }
+
+ rval = build_did_list(ep, fd, mbp, lbdidp, &did_listp, replicated);
+
+ if ((rval <= 0) || (rval == ENOTSUP))
+ goto out;
+
+ /*
+ * Until here, we've gotten away with fixed sizes for the
+ * master block and locator block. The locator names,
+ * however, are sized (and therefore allocated) dynamically
+ * according to information in the locator block.
+ */
+ lnsize = dbtob(lbp->lb_lnblkcnt);
+ lnp = Zalloc(lnsize);
+
+ if ((rval = read_locator_names(ep, fd, mbp, lbp, lnp, lnsize)) <= 0)
+ goto out;
+
+ /*
+ * Read in the NM record
+ * If no NM record was found, it still is a valid configuration
+ * but it also means that we won't find any corresponding DID_NM
+ * or DID_SHR_NM.
+ */
+ if ((rval = read_nm_rec(ep, fd, mbp, lbp, &nm, MDDB_NM, rsp->cname))
+ < 0)
+ goto out;
+ else if (rval == 0)
+ goto append;
+
+ /*
+ * At this point, we have read in all of the blocks that form
+ * the nm_rec. We should at least detect the corner case
+ * mentioned above, in which r_next_recid links to another
+ * nm_rec. Extended namespace handling is left for Phase 2.
+ *
+ * What this should really be is a loop, each iteration of
+ * which reads in a nm_rec and calls the set_append_wrapper().
+ */
+ /*LINTED*/
+ nmp = (struct nm_rec *)(nm + sizeof (mddb_rb_t));
+ if (nmp->r_rec_hdr.r_next_recid != (mddb_recid_t)0) {
+ extended_namespace = 1;
+ rval = 0;
+ goto out;
+ }
+
+ if ((rval = read_nm_rec(ep, fd, mbp, lbp, &did_nm,
+ MDDB_DID_NM, rsp->cname)) < 0)
+ goto out;
+ else if (rval == 0)
+ goto append;
+
+ /*LINTED*/
+ did_nmp = (struct devid_min_rec *)(did_nm + sizeof (mddb_rb_t));
+ if (did_nmp->min_rec_hdr.r_next_recid != (mddb_recid_t)0) {
+ extended_namespace = 1;
+ rval = 0;
+ goto out;
+ }
+
+ if ((rval = read_nm_rec(ep, fd, mbp, lbp, &did_shrnm,
+ MDDB_DID_SHR_NM, rsp->cname)) < 0)
+ goto out;
+ else if (rval == 0)
+ goto append;
+
+ /*LINTED*/
+ did_shrnmp = (struct devid_shr_rec *)(did_shrnm + sizeof (mddb_rb_t));
+ if (did_shrnmp->did_rec_hdr.r_next_recid != (mddb_recid_t)0) {
+ extended_namespace = 1;
+ rval = 0;
+ goto out;
+ }
+
+append:
+ /* Finally, we've got what we need to process this replica. */
+ mispp = set_append_wrapper(mispp, did_listp, mbp, lbp,
+ /*LINTED*/
+ (mddb_rb_t *)nm, (mddb_rb_t *)did_nm, (mddb_rb_t *)did_shrnm,
+ ep, replicated);
+
+ /* Return the fact that we found at least one set */
+ rval = 1;
+
+out:
+ if (fd >= 0)
+ (void) close(fd);
+ if (did_listp != NULL)
+ free_did_list(did_listp);
+ if (lnp != NULL)
+ Free(lnp);
+ if (nm != NULL)
+ Free(nm);
+ if (did_nm != NULL)
+ Free(did_nm);
+ if (did_shrnm != NULL)
+ Free(did_shrnm);
+
+ /*
+ * If we are at the end of the list, we must free up
+ * the replicated list too
+ */
+ if (dp->next == NULL)
+ free_replicated_disks_list();
+
+ if (extended_namespace)
+ return (mddserror(ep, MDE_DS_EXTENDEDNM, MD_SET_BAD,
+ mynode(), NULL, NULL));
+
+ return (rval);
+}
+
+/*
+ * Return the minor name associated with a given disk slice
+ */
+static char *
+meta_getminor_name(
+ char *devname,
+ md_error_t *ep
+)
+{
+ int fd = -1;
+ char *minor_name = NULL;
+ char *ret_minor_name = NULL;
+
+ if (devname == NULL)
+ return (NULL);
+
+ if ((fd = open(devname, O_RDONLY|O_NDELAY, 0)) < 0) {
+ (void) mdsyserror(ep, errno, devname);
+ return (NULL);
+ }
+
+ if (devid_get_minor_name(fd, &minor_name) == 0) {
+ ret_minor_name = Strdup(minor_name);
+ devid_str_free(minor_name);
+ }
+
+ (void) close(fd);
+ return (ret_minor_name);
+}
+
+static int
+meta_replica_quorum(
+ md_im_set_desc_t *misp,
+ md_error_t *ep
+)
+{
+ md_im_drive_info_t *midp;
+ mddrivename_t *dnp;
+ md_im_replica_info_t *midr;
+ mdname_t *np;
+ struct stat st_buf;
+ uint_t rep_slice;
+ int replica_count = 0;
+
+ for (midp = misp->mis_drives; midp != NULL;
+ midp = midp->mid_next) {
+
+ dnp = midp->mid_dnp;
+
+ if ((meta_replicaslice(dnp, &rep_slice, ep) != 0) ||
+ ((np = metaslicename(dnp, rep_slice, ep))
+ == NULL)) {
+ mdclrerror(ep);
+ continue;
+ }
+
+ if (stat(np->bname, &st_buf) != 0)
+ continue;
+
+ /*
+ * The drive is okay now count its replicas
+ */
+ for (midr = midp->mid_replicas; midr != NULL;
+ midr = midr->mir_next) {
+ replica_count++;
+ }
+ }
+
+ if (replica_count < (misp->mis_active_replicas + 1)/2)
+ return (-1);
+
+ return (0);
+}
+
+static set_t
+meta_imp_setno(
+ md_error_t *ep
+)
+{
+ set_t max_sets, setno;
+ int bool;
+
+ if ((max_sets = get_max_sets(ep)) == 0) {
+ return (MD_SET_BAD);
+ }
+
+ /*
+ * This code needs to be expanded when we run in SunCluster
+ * environment SunCluster obtains setno internally
+ */
+ for (setno = 1; setno < max_sets; setno++) {
+ if (clnt_setnumbusy(mynode(), setno,
+ &bool, ep) == -1) {
+ setno = MD_SET_BAD;
+ break;
+ }
+ /*
+ * found one available
+ */
+ if (bool == FALSE)
+ break;
+ }
+
+ if (setno == max_sets) {
+ setno = MD_SET_BAD;
+ }
+
+ return (setno);
+}
+
+int
+meta_imp_set(
+ md_im_set_desc_t *misp,
+ char *setname,
+ int force,
+ bool_t dry_run,
+ md_error_t *ep
+)
+{
+ md_timeval32_t tp;
+ md_im_drive_info_t *midp;
+ uint_t rep_slice;
+ mddrivename_t *dnp;
+ struct mddb_config c;
+ mdname_t *np;
+ md_im_replica_info_t *mirp;
+ char setnum_link[MAXPATHLEN];
+ char setname_link[MAXPATHLEN];
+ char *minor_name = NULL;
+
+ (void) memset(&c, 0, sizeof (c));
+ (void) strlcpy(c.c_setname, setname, sizeof (c.c_setname));
+ c.c_sideno = 0;
+ c.c_flags = MDDB_C_IMPORT;
+
+ /*
+ * Check to see if the setname that the set is being imported into,
+ * already exists.
+ */
+ if (getsetbyname(c.c_setname, ep) != NULL) {
+ return (mddserror(ep, MDE_DS_SETNAMEBUSY, MD_SET_BAD,
+ mynode(), NULL, c.c_setname));
+ }
+
+ /*
+ * Find the next available set number
+ */
+ if ((c.c_setno = meta_imp_setno(ep)) == MD_SET_BAD) {
+ return (mddserror(ep, MDE_DS_SETNOTIMP, MD_SET_BAD,
+ mynode(), NULL, c.c_setname));
+ }
+
+ if (meta_gettimeofday(&tp) == -1) {
+ return (mdsyserror(ep, errno, NULL));
+ }
+ c.c_timestamp = tp;
+
+ /* Check to see if replica quorum requirement is fulfilled */
+ if (!force && meta_replica_quorum(misp, ep) == -1)
+ return (mddserror(ep, MDE_DS_INSUFQUORUM, MD_SET_BAD,
+ mynode(), NULL, c.c_setname));
+
+ for (midp = misp->mis_drives; midp != NULL;
+ midp = midp->mid_next) {
+ mdcinfo_t *cinfo;
+
+ /*
+ * We pass down the list of the drives in the
+ * set down to the kernel irrespective of
+ * whether the drives have a replica or not.
+ *
+ * The kernel detects which of the drives don't
+ * have a replica and accordingly does the
+ * right thing.
+ */
+ dnp = midp->mid_dnp;
+ if ((meta_replicaslice(dnp, &rep_slice, ep) != 0) ||
+ ((np = metaslicename(dnp, rep_slice, ep))
+ == NULL)) {
+ mdclrerror(ep);
+ continue;
+ }
+
+ (void) strcpy(c.c_locator.l_devname, np->bname);
+ c.c_locator.l_dev = meta_cmpldev(np->dev);
+ c.c_locator.l_mnum = meta_getminor(np->dev);
+ c.c_locator.l_devid = (uintptr_t)Malloc(midp->mid_devid_sz);
+ (void) memcpy((void *)c.c_locator.l_devid, midp->mid_devid,
+ midp->mid_devid_sz);
+ c.c_locator.l_devid_sz = midp->mid_devid_sz;
+ c.c_locator.l_devid_flags =
+ MDDB_DEVID_VALID | MDDB_DEVID_SPACE | MDDB_DEVID_SZ;
+ if (midp->mid_o_devid) {
+ c.c_locator.l_old_devid =
+ (uint64_t)Malloc(midp->mid_o_devid_sz);
+ (void) memcpy((void *)c.c_locator.l_old_devid,
+ midp->mid_o_devid, midp->mid_o_devid_sz);
+ c.c_locator.l_old_devid_sz = midp->mid_o_devid_sz;
+ }
+ minor_name = meta_getminor_name(np->bname, ep);
+ (void) strncpy(c.c_locator.l_minor_name, minor_name,
+ sizeof (c.c_locator.l_minor_name));
+
+ if ((cinfo = metagetcinfo(np, ep)) == NULL) {
+ mdclrerror(ep);
+ continue;
+ }
+ (void) strncpy(c.c_locator.l_driver, cinfo->dname,
+ sizeof (c.c_locator.l_driver));
+
+ mirp = midp->mid_replicas;
+
+ do {
+ if (mirp) {
+ c.c_locator.l_flags = 0;
+ c.c_locator.l_blkno = mirp->mir_offset;
+ mirp = mirp->mir_next;
+ } else {
+ /*
+ * Default offset for dummy is 16
+ */
+ c.c_locator.l_blkno = 16;
+ }
+
+ if (metaioctl(MD_DB_USEDEV, &c, &c.c_mde, NULL) != 0) {
+ Free((void *)c.c_locator.l_devid);
+ if (c.c_locator.l_old_devid)
+ Free((void *)c.c_locator.l_old_devid);
+ return (mdstealerror(ep, &c.c_mde));
+ }
+ } while (mirp != NULL);
+ }
+
+ /*
+ * If the dry run option was specified, flag success
+ * and exit out
+ */
+ if (dry_run == 1) {
+ md_eprintf("%s\n", dgettext(TEXT_DOMAIN,
+ "import should be successful"));
+ Free((void *)c.c_locator.l_devid);
+ if (c.c_locator.l_old_devid)
+ Free((void *)c.c_locator.l_old_devid);
+ return (0);
+ }
+
+ /*
+ * Now kernel should have all the information
+ * regarding the import diskset replica.
+ * Tell kernel to load them up and import the set
+ */
+ if (metaioctl(MD_IOCIMP_LOAD, &c.c_setno, &c.c_mde, NULL) != 0) {
+ Free((void *)c.c_locator.l_devid);
+ if (c.c_locator.l_old_devid)
+ Free((void *)c.c_locator.l_old_devid);
+ return (mdstealerror(ep, &c.c_mde));
+ }
+
+ (void) meta_smf_enable(META_SMF_DISKSET, NULL);
+
+ /* The set has now been imported, create the appropriate symlink */
+ (void) snprintf(setname_link, MAXPATHLEN, "/dev/md/%s", setname);
+ (void) snprintf(setnum_link, MAXPATHLEN, "shared/%d", c.c_setno);
+
+ /*
+ * Since we already verified that the setname was OK, make sure to
+ * cleanup before proceeding.
+ */
+ if (unlink(setname_link) == -1) {
+ if (errno != ENOENT)
+ (void) mdsyserror(ep, errno, setname_link);
+ }
+
+ if (symlink(setnum_link, setname_link) == -1)
+ (void) mdsyserror(ep, errno, setname_link);
+
+ /* resnarf the set that has just been imported */
+ if (clnt_resnarf_set(mynode(), c.c_setno, ep) != 0)
+ md_eprintf("%s\n", dgettext(TEXT_DOMAIN, "Please stop and "
+ "restart rpc.metad"));
+
+ Free((void *)c.c_locator.l_devid);
+ if (c.c_locator.l_old_devid)
+ Free((void *)c.c_locator.l_old_devid);
+ return (0);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_init.c b/usr/src/lib/lvm/libmeta/common/meta_init.c
new file mode 100644
index 0000000000..5775af48bc
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_init.c
@@ -0,0 +1,453 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * initialize metadevices
+ */
+
+#include <meta.h>
+#include <libdevinfo.h>
+
+
+int
+parse_interlace(
+ char *uname, /* Meta Device name (eg d0) */
+ char *str, /* String to Parse */
+ diskaddr_t *interlacep,
+ md_error_t *ep
+)
+{
+ diskaddr_t num;
+ char c;
+ int cnt;
+
+ /* parse interlace */
+ if ((cnt = sscanf(str, "%llu%c", &num, &c)) < 1) {
+ return (meta_cook_syntax(ep, MDE_BAD_INTERLACE,
+ uname, 1, &str));
+ } else if (cnt == 1) {
+ if (num & (DEV_BSIZE - 1)) {
+ return (meta_cook_syntax(ep, MDE_BAD_INTERLACE,
+ uname, 1, &str));
+ }
+ num = lbtodb(num);
+ } else switch (c) {
+ case 'b':
+ case 'B':
+ num *= DEV_BSIZE / DEV_BSIZE;
+ break;
+ case 'k':
+ case 'K':
+ num *= 1024 / DEV_BSIZE;
+ break;
+ case 'm':
+ case 'M':
+ num *= 1024 * 1024 / DEV_BSIZE;
+ break;
+ default:
+ return (meta_cook_syntax(ep, MDE_BAD_INTERLACE,
+ NULL, 1, &str));
+ }
+
+ /* return success */
+ *interlacep = num;
+ return (0);
+}
+
+/*
+ * cook up syntax error
+ */
+int
+meta_cook_syntax(
+ md_error_t *ep,
+ md_void_errno_t errcode,
+ char *uname,
+ int argc,
+ char *argv[]
+)
+{
+ int rval;
+
+ /* if we have a token, concat it to uname */
+ if ((argc > 0) && (argv[0] != NULL) && (argv[0][0] != '\0')) {
+ char *p;
+
+ if ((uname != NULL) && (uname[0] != '\0')) {
+ p = Malloc(strlen(uname) + 2
+ + 1 + strlen(argv[0]) + 1 + 1);
+ (void) strcpy(p, uname);
+ (void) strcat(p, ": ");
+ } else {
+ p = Malloc(1 + strlen(argv[0]) + 1 + 1);
+ p[0] = '\0';
+ }
+ (void) strcat(p, "\"");
+ (void) strcat(p, argv[0]);
+ (void) strcat(p, "\"");
+ rval = mderror(ep, errcode, p);
+ Free(p);
+ } else {
+ rval = mderror(ep, errcode, uname);
+ }
+
+ return (rval);
+}
+
+int
+meta_check_devicesize(
+ diskaddr_t total_blocks
+)
+{
+ int rval = MD_CRO_32BIT;
+
+
+ if (total_blocks > MD_MAX_BLKS_FOR_SMALL_DEVS) {
+ rval = MD_CRO_64BIT;
+ }
+ return (rval);
+}
+
+
+/*
+ * setup metadevice geometry
+ */
+/*ARGSUSED*/
+int
+meta_setup_geom(
+ md_unit_t *md,
+ mdname_t *np,
+ mdgeom_t *geomp,
+ uint_t write_reinstruct,
+ uint_t read_reinstruct,
+ uint_t round_cyl,
+ md_error_t *ep
+)
+{
+ diskaddr_t cylsize = geomp->nhead * geomp->nsect;
+ diskaddr_t total_blocks;
+
+ if (round_cyl) {
+ total_blocks = rounddown(md->c.un_actual_tb, cylsize);
+ } else {
+ total_blocks = md->c.un_actual_tb;
+ }
+
+ md->c.un_total_blocks = total_blocks;
+ md->c.un_nhead = geomp->nhead;
+ md->c.un_nsect = geomp->nsect;
+ md->c.un_rpm = geomp->rpm;
+ md->c.un_wr_reinstruct = write_reinstruct;
+ md->c.un_rd_reinstruct = read_reinstruct;
+ return (0);
+}
+
+/*
+ * adjust metadevice geometry
+ */
+/*ARGSUSED*/
+int
+meta_adjust_geom(
+ md_unit_t *md,
+ mdname_t *np,
+ uint_t write_reinstruct,
+ uint_t read_reinstruct,
+ uint_t round_cyl,
+ md_error_t *ep
+)
+{
+ diskaddr_t cylsize = md->c.un_nhead * md->c.un_nsect;
+ diskaddr_t total_blocks;
+
+ if (round_cyl) {
+ total_blocks = rounddown(md->c.un_actual_tb, cylsize);
+ } else {
+ total_blocks = md->c.un_actual_tb;
+ }
+
+ md->c.un_total_blocks = total_blocks;
+ if (write_reinstruct > md->c.un_wr_reinstruct)
+ md->c.un_wr_reinstruct = write_reinstruct;
+ if (read_reinstruct > md->c.un_rd_reinstruct)
+ md->c.un_rd_reinstruct = read_reinstruct;
+ return (0);
+}
+
+/*
+ * Function: meta_init_make_device
+ * Purpose:
+ * Create the device node <uname> by constructing the necessary
+ * md_mkdev_params_t structure. We have to handle relative names
+ * (e.g. "d80") and fully-qualified names (e.g. "/dev/md/red/dsk/d80").
+ * The field that we need is the unit number of the metadevice (80 in
+ * the above examples).
+ * Input: spp set structure
+ * uname unit-name (fully qualified or relative)
+ * Output: ep error return structure
+ * Returns: 0 success
+ * -1 Error. <ep> contains error reason
+ */
+int
+meta_init_make_device(
+ mdsetname_t **spp,
+ char *uname,
+ md_error_t *ep
+)
+{
+ di_devlink_handle_t hdl;
+ md_mkdev_params_t params;
+ int rval = 0;
+ char *p, *e = uname;
+ size_t len = strlen(uname);
+
+ e += len;
+ (void) memset(&params, 0, sizeof (params));
+ MD_SETDRIVERNAME(&params, "md", (*spp)->setno);
+
+ /*
+ * Find the start of the unit within <uname>.
+ */
+ p = strrchr(uname, '/');
+ if (p == NULL) {
+ /* Relative name (e.g. d80) */
+ p = &uname[1];
+ } else {
+ /* qualified name (e.g. /dev/md/dsk/d80) */
+ p += 2;
+ if (p >= e) {
+ /* Invalid drive name */
+ p = Malloc(len + 3);
+ (void) snprintf(p, len + 3, "\"%s\"", uname);
+ rval = mderror(ep, MDE_NOT_DRIVENAME, p);
+ Free(p);
+ return (rval);
+ }
+ }
+ e = NULL;
+ params.mnum = strtoul(p, &e, 10);
+ if (e == p) {
+ /* Invalid drive name */
+ p = Malloc(len + 3);
+ (void) snprintf(p, len + 3, "\"%s\"", uname);
+ rval = mderror(ep, MDE_NOT_DRIVENAME, p);
+ Free(p);
+ return (rval);
+ }
+
+ if (metaioctl(MD_IOCMAKE_DEV, &params, &params.mde, NULL) != 0) {
+ return (mdstealerror(ep, &params.mde));
+ }
+ /*
+ * Wait until device appears in namespace. di_devlink_init() returns
+ * once the /dev links have been created. If NULL is returned the
+ * link operation failed and we haven't got a device to use.
+ * NOTE: This will take a _long_ time for large numbers of metadevices.
+ * Change to use the enhanced di_devlink_init() interface when
+ * available.
+ */
+ hdl = di_devlink_init("md", DI_MAKE_LINK);
+ if (hdl != NULL) {
+ (void) di_devlink_fini(&hdl);
+ } else {
+ p = Malloc(len + 3);
+ (void) snprintf(p, len + 3, "\"%s\"", uname);
+ rval = mderror(ep, MDE_UNIT_NOT_FOUND, p);
+ Free(p);
+ }
+ return (rval);
+}
+
+/*
+ * FUNCTION: is_metadb_cmd()
+ * INPUT: argc - number of command line arguments
+ * argv - pointer to array of command line arguments
+ * OUTPUT: none
+ * RETURNS: TRUE if a metadb is to be created, FALSE otherwise
+ * PURPOSE: parses enough of the command line to determine if a metadb
+ * create is being attempted
+ */
+static boolean_t
+is_metadb_cmd(
+ int argc,
+ char *argv[]
+)
+{
+ ulong_t num;
+ int len;
+
+ /* look for match */
+ if (argc > 0 && (sscanf(argv[0], "mddb%lu%n", &num, &len) == 1) &&
+ (strlen(argv[0]) == len) && ((long)num >= 0)) {
+ return (B_TRUE);
+ }
+
+ return (B_FALSE);
+}
+
+/*
+ * FUNCTION: is_stripe_cmd()
+ * INPUT: argc - number of command line arguments
+ * argv - pointer to array of command line arguments
+ * OUTPUT: none
+ * RETURNS: TRUE if a stripe is to be created, FALSE otherwise
+ * PURPOSE: parses enough of the command line to determine if a stripe
+ * create is being attempted
+ */
+static boolean_t
+is_stripe_cmd(
+ int argc,
+ char *argv[]
+)
+{
+ uint_t nrow;
+
+ if (argc > 1 && (sscanf(argv[1], "%u", &nrow) != 1) || ((int)nrow < 0))
+ return (B_FALSE);
+
+ return (B_TRUE);
+}
+
+/*
+ * FUNCTION: meta_get_init_type()
+ * INPUT: argc - number of command line arguments
+ * argv - pointer to array of command line arguments
+ * OUTPUT: none
+ * RETURNS: type of metadevice or hot spare pools being initialized
+ * PURPOSE: parses enough of the command line to determine what type
+ * of metainit is being attempted
+ */
+mdinittypes_t
+meta_get_init_type(
+ int argc,
+ char *argv[]
+)
+{
+ char *arg = argv[1];
+ mdinittypes_t init_type;
+
+ if (argc == 1) /* must be a hot spare pool w/o devices */
+ return (TAB_HSP);
+
+ init_type = TAB_UNKNOWN;
+ if (arg != NULL) {
+ if (strcmp(arg, "-m") == 0) {
+ init_type = TAB_MIRROR;
+ } else if (strcmp(arg, "-r") == 0) {
+ init_type = TAB_RAID;
+ } else if (strcmp(arg, "-p") == 0) {
+ init_type = TAB_SP;
+ } else if (strcmp(arg, "-t") == 0) {
+ init_type = TAB_TRANS;
+ } else if (is_metadb_cmd(argc, argv)) {
+ init_type = TAB_MDDB;
+ } else if (is_stripe_cmd(argc, argv)) {
+ init_type = TAB_STRIPE;
+ } else { /* assume that it is a hsp */
+ init_type = TAB_HSP;
+ }
+ }
+ return (init_type);
+}
+
+/*
+ * initialize named device or hotspare pool
+ */
+int
+meta_init_name(
+ mdsetname_t **spp,
+ int argc,
+ char *argv[],
+ mdcmdopts_t options,
+ md_error_t *ep
+)
+{
+ mdinittypes_t init_type;
+ char *p;
+ int rval;
+ char *uname = argv[0];
+
+ assert(argc > 0);
+
+ /* determine type of metadevice or hot spare pool being created */
+ init_type = meta_get_init_type(argc, argv);
+
+ /* hotspare pool */
+ if (init_type == TAB_HSP)
+ return (meta_init_hsp(spp, argc, argv, options, ep));
+
+ /* metadevice */
+ if (argc >= 2 && init_type != TAB_UNKNOWN) {
+ md_error_t t_e = mdnullerror;
+ char *cname;
+
+ /*
+ * We need to create the device node if the specified metadevice
+ * does not already exist in the database. The actual creation
+ * is undertaken by the md driver and the links propagated by
+ * devfsadm.
+ */
+
+ /* initialize the spp properly */
+ if ((cname = meta_name_getname(spp, uname, &t_e)) != NULL)
+ Free(cname);
+ if (! mdisok(&t_e))
+ return (mdstealerror(ep, &t_e));
+
+ /* Create device node */
+ if (meta_init_make_device(spp, uname, &t_e) != 0) {
+ return (mdstealerror(ep, &t_e));
+ }
+
+ switch (init_type) {
+ case TAB_MIRROR:
+ return (meta_init_mirror(spp, argc, argv, options, ep));
+ break;
+ case TAB_RAID:
+ return (meta_init_raid(spp, argc, argv, options, ep));
+ break;
+ case TAB_SP:
+ return (meta_init_sp(spp, argc, argv, options, ep));
+ break;
+ case TAB_TRANS:
+ return (mderror(ep, MDE_EOF_TRANS, NULL));
+ break;
+ case TAB_STRIPE:
+ return (meta_init_stripe(spp, argc, argv, options, ep));
+ break;
+ }
+ }
+
+ /* unknown type */
+ p = Malloc(1 + strlen(uname) + 1 + 1);
+ (void) strcpy(p, "\"");
+ (void) strcat(p, uname);
+ (void) strcat(p, "\"");
+ rval = mderror(ep, MDE_SYNTAX, p);
+ Free(p);
+ return (rval);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_lib_prv.c b/usr/src/lib/lvm/libmeta/common/meta_lib_prv.c
new file mode 100644
index 0000000000..1b63a2a03e
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_lib_prv.c
@@ -0,0 +1,69 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 1992-2002 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * Metadevice diskset interfaces
+ */
+
+#include "meta_lib_prv.h"
+#include <sys/vfstab.h>
+
+static FILE *mfp = NULL;
+
+FILE *
+open_mnttab(void)
+{
+ if (mfp != NULL) {
+ if (fseeko(mfp, (off_t)0L, SEEK_SET) == -1) {
+ (void) fclose(mfp);
+ mfp = NULL;
+ return (NULL);
+ }
+ return (mfp);
+ }
+
+ if ((mfp = fopen(MNTTAB, "r")) == NULL)
+ return (NULL);
+
+ return (mfp);
+}
+
+int
+close_mnttab(void)
+{
+ int ret = -1;
+
+ if (mfp == NULL)
+ return (0);
+
+ ret = fclose(mfp);
+
+ mfp = NULL;
+
+ return (ret);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_mdcf.c b/usr/src/lib/lvm/libmeta/common/meta_mdcf.c
new file mode 100644
index 0000000000..3af1c3be19
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_mdcf.c
@@ -0,0 +1,148 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+
+/*
+ * Just in case we're not in a build environment, make sure that
+ * TEXT_DOMAIN gets set to something.
+ */
+#if !defined(TEXT_DOMAIN)
+#define TEXT_DOMAIN "SYS_TEST"
+#endif
+
+/*
+ * patch md.cf file
+ */
+
+#include <meta.h>
+
+/*
+ * save metadevice configuration in md.cf
+ */
+int
+meta_update_md_cf(
+ mdsetname_t *sp,
+ md_error_t *ep
+)
+{
+ char *name = METACONF;
+ char *tname = METACONFTMP;
+ FILE *tfp = NULL;
+ FILE *mfp = NULL;
+ mdprtopts_t options = PRINT_SHORT | PRINT_FAST;
+ struct stat sbuf;
+ char line[1000];
+
+ /* If this is not the local set, no need to do anything */
+ if (!metaislocalset(sp))
+ return (0);
+
+ /* open temp file */
+ if ((tfp = fopen(tname, "w")) == NULL)
+ return (mdsyserror(ep, errno, tname));
+ if (stat(name, &sbuf) == 0) {
+ (void) fchmod(fileno(tfp), (sbuf.st_mode & 0777));
+ (void) fchown(fileno(tfp), sbuf.st_uid, sbuf.st_gid);
+ }
+
+ /* dump header */
+ if (fputs(dgettext(TEXT_DOMAIN,
+ "# metadevice configuration file\n"
+ "# do not hand edit\n"), tfp) == EOF) {
+ (void) mdsyserror(ep, errno, tname);
+ goto errout;
+ }
+
+ /* dump device configuration */
+ if (meta_print_all(sp, tname, NULL, tfp, options, NULL, ep) != 0)
+ goto errout;
+
+ /* close and rename file */
+ if (fclose(tfp) != 0) {
+ (void) mdsyserror(ep, errno, tname);
+ goto errout;
+ }
+ tfp = NULL;
+
+ /*
+ * Renames don't work in the miniroot since tmpfiles are
+ * created in /var/tmp. Hence we copy the data out.
+ */
+
+ if (rename(tname, name) != 0) {
+ if (errno == EROFS) {
+ if ((tfp = fopen(tname, "r")) == NULL) {
+ goto errout;
+ }
+ if ((mfp = fopen(METACONF, "w+")) == NULL) {
+ goto errout;
+ }
+ while (fgets(line, 1000, tfp) != NULL) {
+ if (fputs(line, mfp) == NULL) {
+ (void) mdsyserror(ep, errno, METACONF);
+ goto errout;
+ }
+ }
+ if (fclose(tfp) != 0) {
+ tfp = NULL;
+ goto errout;
+ }
+ tfp = NULL;
+ /* delete the tempfile */
+ (void) unlink(tname);
+ if (fflush(mfp) != 0) {
+ goto errout;
+ }
+ if (fsync(fileno(mfp)) != 0) {
+ goto errout;
+ }
+ if (fclose(mfp) != 0) {
+ mfp = NULL;
+ goto errout;
+ }
+ mfp = NULL;
+ } else {
+ (void) mdsyserror(ep, errno, name);
+ goto errout;
+ }
+ }
+
+ /* success */
+ return (0);
+
+ /* cleanup, return error */
+errout:
+ if (tfp != NULL) {
+ (void) fclose(tfp);
+ (void) unlink(tname);
+ }
+ if (mfp != NULL) {
+ (void) fclose(mfp);
+ }
+ return (-1);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_med.c b/usr/src/lib/lvm/libmeta/common/meta_med.c
new file mode 100644
index 0000000000..b11f86a0c1
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_med.c
@@ -0,0 +1,851 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * Just in case we're not in a build environment, make sure that
+ * TEXT_DOMAIN gets set to something.
+ */
+#if !defined(TEXT_DOMAIN)
+#define TEXT_DOMAIN "SYS_TEST"
+#endif
+
+/*
+ * Mediator functions
+ */
+
+#include <meta.h>
+#include <metamed.h>
+#include <dlfcn.h>
+#include <sdssc.h>
+
+/*
+ * There are too many external factors that affect the timing of the
+ * operations, so we set the timeout to a very large value, in this
+ * case 1 day, which should handle HW timeouts, large configurations,
+ * and other potential delays.
+ */
+#define CL_LONG_TMO 86400L /* 1 day */
+#define CL_MEDIUM_TMO 3600L /* 1 hour */
+#define CL_SHORT_TMO 600L /* 10 minutes */
+#define CL_DEF_TMO 10L /* 10 seconds */
+
+static md_timeval32_t def_rpcb_timeout = { MD_CLNT_CREATE_TOUT, 0 };
+
+/*
+ * RPC handle
+ */
+typedef struct {
+ char *hostname;
+ CLIENT *clntp;
+} med_handle_t;
+
+/*
+ * Data to be sent from med_clnt_create_timed to med_create_helper via
+ * meta_client_create_retry.
+ */
+typedef struct {
+ rpcprog_t mcd_program; /* RPC program designation */
+ rpcvers_t mcd_version; /* RPC version */
+ char *mcd_nettype; /* Type of network to use for RPC */
+} med_create_data_t;
+
+/*
+ * Perform the work of actually doing the clnt_create for
+ * meta_client_create_retry.
+ */
+static CLIENT *
+med_create_helper(char *hostname, void *private, struct timeval *time_out)
+{
+ med_create_data_t *cd = (med_create_data_t *)private;
+
+ return (clnt_create_timed(hostname, cd->mcd_program, cd->mcd_version,
+ cd->mcd_nettype, time_out));
+}
+
+static
+CLIENT *med_clnt_create_timed(
+ char *hostname,
+ const ulong_t prog,
+ const ulong_t vers,
+ char *nettype,
+ const md_timeval32_t *tp
+)
+{
+ med_create_data_t cd; /* Create data. */
+
+ cd.mcd_program = prog;
+ cd.mcd_version = vers;
+ cd.mcd_nettype = nettype;
+ return (meta_client_create_retry(hostname, med_create_helper,
+ (void *)&cd, (time_t)tp->tv_sec, NULL));
+}
+
+/*
+ * Set the timeout value for this client handle.
+ */
+static int
+cl_sto_medd(
+ CLIENT *clntp,
+ char *hostname,
+ long time_out,
+ md_error_t *ep
+)
+{
+ md_timeval32_t nto;
+
+ (void) memset(&nto, '\0', sizeof (nto));
+
+ nto.tv_sec = time_out;
+
+ if (clnt_control(clntp, CLSET_TIMEOUT, (char *)&nto) != TRUE)
+ return (mdrpcerror(ep, clntp, hostname,
+ dgettext(TEXT_DOMAIN, "metad client set timeout")));
+
+ return (0);
+}
+
+/*
+ * close RPC connection
+ */
+static void
+close_medd(
+ med_handle_t *hp
+)
+{
+ assert(hp != NULL);
+ if (hp->hostname != NULL) {
+ Free(hp->hostname);
+ }
+ if (hp->clntp != NULL) {
+ auth_destroy(hp->clntp->cl_auth);
+ clnt_destroy(hp->clntp);
+ }
+ Free(hp);
+}
+
+/*
+ * open RPC connection to rpc.medd
+ */
+static med_handle_t *
+open_medd(
+ char *hostname,
+ long time_out,
+ md_error_t *ep
+)
+{
+ CLIENT *clntp;
+ med_handle_t *hp;
+
+ /* default to local host */
+ if ((hostname == NULL) || (*hostname == '\0'))
+ hostname = mynode();
+
+ /* open RPC connection */
+ assert(hostname != NULL);
+ if ((clntp = med_clnt_create_timed(hostname, MED_PROG, MED_VERS,
+ "tcp", &def_rpcb_timeout)) == NULL) {
+ if (rpc_createerr.cf_stat != RPC_PROGNOTREGISTERED)
+ clnt_pcreateerror(hostname);
+ (void) mdrpccreateerror(ep, hostname,
+ "medd med_clnt_create_timed");
+ return (NULL);
+ } else {
+ auth_destroy(clntp->cl_auth);
+ clntp->cl_auth = authsys_create_default();
+ assert(clntp->cl_auth != NULL);
+ }
+
+ if (cl_sto_medd(clntp, hostname, time_out, ep) != 0)
+ return (NULL);
+
+ /* return connection */
+ hp = Zalloc(sizeof (*hp));
+ hp->hostname = Strdup(hostname);
+ hp->clntp = clntp;
+
+ return (hp);
+}
+
+/*
+ * steal and convert med_err_t
+ */
+int
+meddstealerror(
+ md_error_t *ep,
+ med_err_t *medep
+)
+{
+ char buf[BUFSIZ];
+ char *p = buf;
+ size_t psize = BUFSIZ;
+ char *emsg;
+ int rval = -1;
+
+ /* no error */
+ if (medep->med_errno == 0) {
+ /* assert(medep->name == NULL); */
+ rval = 0;
+ goto out;
+ }
+
+ /* steal error */
+ if ((medep->med_node != NULL) && (medep->med_node[0] != '\0')) {
+ (void) snprintf(p, psize, "%s: ", medep->med_node);
+ p = &buf[strlen(buf)];
+ psize = buf + BUFSIZ - p;
+ }
+
+ if ((medep->med_misc != NULL) && (medep->med_misc[0] != '\0')) {
+ (void) snprintf(p, psize, "%s: ", medep->med_misc);
+ p = &buf[strlen(buf)];
+ psize = buf + BUFSIZ - p;
+ }
+
+ if (medep->med_errno < 0) {
+ if ((emsg = med_errnum_to_str(medep->med_errno)) != NULL)
+ (void) snprintf(p, psize, "%s", emsg);
+ else
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "unknown mediator errno %d\n"), medep->med_errno);
+ } else {
+ if ((emsg = strerror(medep->med_errno)) != NULL)
+ (void) snprintf(p, psize, "%s", emsg);
+ else
+ (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+ "errno %d out of range"), medep->med_errno);
+ }
+ (void) mderror(ep, MDE_MED_ERROR, buf);
+
+ /* cleanup, return success */
+out:
+ if (medep->med_node != NULL)
+ Free(medep->med_node);
+ if (medep->med_misc != NULL)
+ Free(medep->med_misc);
+ (void) memset(medep, 0, sizeof (*medep));
+ return (rval);
+}
+
+static med_handle_t *
+open_medd_wrap(
+ md_h_t *mdhp,
+ long time_out,
+ md_error_t *ep
+)
+{
+ med_handle_t *hp = NULL;
+ int i;
+ char *hnm;
+
+ assert(mdhp && mdhp->a_cnt > 0);
+
+ /* Loop through the hosts listed */
+ i = min(mdhp->a_cnt, MAX_HOST_ADDRS) - 1;
+ for (; i >= 0; i--) {
+ hnm = mdhp->a_nm[i];
+
+ if ((hp = open_medd(hnm, time_out, ep)) == NULL) {
+ if (mdanyrpcerror(ep) && i != 0) {
+ mdclrerror(ep);
+ continue;
+ }
+ }
+ return (hp);
+ }
+
+ rpc_createerr.cf_stat = RPC_CANTSEND;
+ rpc_createerr.cf_error.re_status = 0;
+ (void) mdrpccreateerror(ep, mdhp->a_nm[0],
+ dgettext(TEXT_DOMAIN, "medd open wrap"));
+
+ return (NULL);
+}
+
+static int
+setup_med_transtab(md_error_t *ep)
+{
+ mddb_med_t_parm_t *tp = NULL;
+ struct stat statb;
+ int i;
+ size_t alloc_size = 0;
+ int err = 0;
+
+
+ if ((tp = Zalloc(sizeof (mddb_med_t_parm_t))) == NULL)
+ return (mdsyserror(ep, ENOMEM, "setup_med_transtab"));
+
+ if (metaioctl(MD_MED_GET_TLEN, tp, &tp->med_tp_mde, NULL) != 0) {
+ err = mdstealerror(ep, &tp->med_tp_mde);
+ goto out;
+ }
+
+ if (tp->med_tp_setup == 1)
+ goto out;
+
+ alloc_size = (sizeof (mddb_med_t_parm_t) - sizeof (mddb_med_t_ent_t)) +
+ (sizeof (mddb_med_t_ent_t) * tp->med_tp_nents);
+
+ if ((tp = Realloc(tp, alloc_size)) == NULL) {
+ err = mdsyserror(ep, ENOMEM, "setup_med_transtab");
+ goto out;
+ }
+
+ if (metaioctl(MD_MED_GET_T, tp, &tp->med_tp_mde, NULL) != 0) {
+ err = mdstealerror(ep, &tp->med_tp_mde);
+ goto out;
+ }
+
+ for (i = 0; i < tp->med_tp_nents; i++) {
+ if (meta_stat(tp->med_tp_ents[i].med_te_nm, &statb) == -1) {
+ md_perror("setup_med_transtab(): stat():");
+ tp->med_tp_ents[i].med_te_dev = NODEV64;
+ } else {
+ tp->med_tp_ents[i].med_te_dev =
+ meta_expldev(statb.st_rdev);
+ }
+ }
+
+ if (metaioctl(MD_MED_SET_T, tp, &tp->med_tp_mde, NULL) != 0)
+ err = mdstealerror(ep, &tp->med_tp_mde);
+
+out:
+ Free(tp);
+ return (err);
+}
+
+/*
+ * Externals
+ */
+
+/*
+ * NULLPROC - just returns a response
+ */
+int
+clnt_med_null(
+ char *hostname,
+ md_error_t *ep
+)
+{
+ med_handle_t *hp;
+ med_err_t res;
+
+ /* initialize */
+ mdclrerror(ep);
+
+ /* do it */
+ if ((hp = open_medd(hostname, CL_DEF_TMO, ep)) == NULL)
+ return (-1);
+
+ if (med_null_1(NULL, &res, hp->clntp) != RPC_SUCCESS)
+ (void) mdrpcerror(ep, hp->clntp, hostname,
+ dgettext(TEXT_DOMAIN, "medd nullproc"));
+
+ close_medd(hp);
+
+ xdr_free(xdr_med_err_t, (char *)&res);
+
+ if (! mdisok(ep))
+ return (-1);
+
+ return (0);
+}
+
+/*
+ * Update the mediator information on the mediator.
+ * *** This is not normally called from user code, the kernel does this! ***
+ */
+int
+clnt_med_upd_data(
+ md_h_t *mdhp,
+ mdsetname_t *sp,
+ med_data_t *meddp,
+ md_error_t *ep
+)
+{
+ med_handle_t *hp;
+ med_upd_data_args_t args;
+ med_err_t res;
+ md_set_desc *sd;
+
+ /* initialize */
+ mdclrerror(ep);
+ (void) memset(&args, 0, sizeof (args));
+ (void) memset(&res, 0, sizeof (res));
+
+ /* build args */
+ if ((sd = metaget_setdesc(sp, ep)) == NULL)
+ return (-1);
+
+ if (MD_MNSET_DESC(sd))
+ /*
+ * In the MN diskset, use a generic nodename, multiowner, as
+ * the node initiating the RPC request. This allows
+ * any node to access mediator information.
+ *
+ * MN diskset reconfig cycle forces consistent
+ * view of set/node/drive/mediator information across all nodes
+ * in the MN diskset. This allows the relaxation of
+ * node name checking in rpc.metamedd for MN disksets.
+ *
+ * In the traditional diskset, only a calling node that is
+ * in the mediator record's diskset nodelist can access
+ * mediator data.
+ */
+ args.med.med_caller = Strdup(MED_MN_CALLER);
+ else
+ args.med.med_caller = Strdup(mynode());
+ args.med.med_setname = Strdup(sp->setname);
+ args.med.med_setno = sp->setno;
+ args.med_data = *meddp;
+
+ /* do it */
+ if ((hp = open_medd_wrap(mdhp, CL_DEF_TMO, ep)) == NULL)
+ return (-1);
+
+ if (med_upd_data_1(&args, &res, hp->clntp) != RPC_SUCCESS)
+ (void) mdrpcerror(ep, hp->clntp, hp->hostname,
+ dgettext(TEXT_DOMAIN, "medd update data"));
+ else
+ (void) meddstealerror(ep, &res);
+
+ close_medd(hp);
+
+ xdr_free(xdr_med_upd_data_args_t, (char *)&args);
+ xdr_free(xdr_med_err_t, (char *)&res);
+
+ if (! mdisok(ep))
+ return (-1);
+
+ return (0);
+}
+
+/*
+ * Get the mediator data for this client from the mediator
+ */
+int
+clnt_med_get_data(
+ md_h_t *mdhp,
+ mdsetname_t *sp,
+ med_data_t *meddp,
+ md_error_t *ep
+)
+{
+ med_handle_t *hp;
+ med_args_t args;
+ med_get_data_res_t res;
+ int rval = -1;
+ md_set_desc *sd;
+
+ /* initialize */
+ mdclrerror(ep);
+ (void) memset(&args, 0, sizeof (args));
+ (void) memset(&res, 0, sizeof (res));
+
+ /* build args */
+ if ((sd = metaget_setdesc(sp, ep)) == NULL)
+ return (-1);
+
+ if (MD_MNSET_DESC(sd))
+ /*
+ * In the MN diskset, use a generic nodename, multiowner, as
+ * the node initiating the RPC request. This allows
+ * any node to access mediator information.
+ *
+ * MN diskset reconfig cycle forces consistent
+ * view of set/node/drive/mediator information across all nodes
+ * in the MN diskset. This allows the relaxation of
+ * node name checking in rpc.metamedd for MN disksets.
+ *
+ * In the traditional diskset, only a calling node that is
+ * in the mediator record's diskset nodelist can access
+ * mediator data.
+ */
+ args.med.med_caller = Strdup(MED_MN_CALLER);
+ else
+ args.med.med_caller = Strdup(mynode());
+ args.med.med_setname = Strdup(sp->setname);
+ args.med.med_setno = sp->setno;
+
+ /* do it */
+ if ((hp = open_medd_wrap(mdhp, CL_DEF_TMO, ep)) == NULL)
+ return (-1);
+
+ if (med_get_data_1(&args, &res, hp->clntp) != RPC_SUCCESS)
+ (void) mdrpcerror(ep, hp->clntp, hp->hostname,
+ dgettext(TEXT_DOMAIN, "medd get data"));
+ else
+ (void) meddstealerror(ep, &res.med_status);
+
+ close_medd(hp);
+
+ if (mdisok(ep)) {
+ /* do something with the results */
+ (void) memmove(meddp, &res.med_data, sizeof (med_data_t));
+ rval = 0;
+ }
+
+ xdr_free(xdr_med_args_t, (char *)&args);
+ xdr_free(xdr_med_get_data_res_t, (char *)&res);
+
+ return (rval);
+}
+
+/*
+ * Update the mediator record on the mediator.
+ */
+int
+clnt_med_upd_rec(
+ md_h_t *mdhp,
+ mdsetname_t *sp,
+ med_rec_t *medrp,
+ md_error_t *ep
+)
+{
+ med_handle_t *hp;
+ med_upd_rec_args_t args;
+ med_err_t res;
+ md_set_desc *sd;
+
+ /* initialize */
+ mdclrerror(ep);
+ (void) memset(&args, 0, sizeof (args));
+ (void) memset(&res, 0, sizeof (res));
+
+ /* build args */
+ if ((sd = metaget_setdesc(sp, ep)) == NULL)
+ return (-1);
+
+ if (MD_MNSET_DESC(sd))
+ /*
+ * In the MN diskset, use a generic nodename, multiowner, as
+ * the node initiating the RPC request. This allows
+ * any node to access mediator information.
+ *
+ * MN diskset reconfig cycle forces consistent
+ * view of set/node/drive/mediator information across all nodes
+ * in the MN diskset. This allows the relaxation of
+ * node name checking in rpc.metamedd for MN disksets.
+ *
+ * In the traditional diskset, only a calling node that is
+ * in the mediator record's diskset nodelist can access
+ * mediator data.
+ */
+ args.med.med_caller = Strdup(MED_MN_CALLER);
+ else
+ args.med.med_caller = Strdup(mynode());
+ args.med.med_setname = Strdup(sp->setname);
+ args.med.med_setno = sp->setno;
+ args.med_flags = 0;
+ args.med_rec = *medrp; /* structure assignment */
+
+ /* do it */
+ if ((hp = open_medd_wrap(mdhp, CL_DEF_TMO, ep)) == NULL)
+ return (-1);
+
+ if (med_upd_rec_1(&args, &res, hp->clntp) != RPC_SUCCESS)
+ (void) mdrpcerror(ep, hp->clntp, hp->hostname,
+ dgettext(TEXT_DOMAIN, "medd update record"));
+ else
+ (void) meddstealerror(ep, &res);
+
+ close_medd(hp);
+
+ xdr_free(xdr_med_upd_rec_args_t, (char *)&args);
+ xdr_free(xdr_med_err_t, (char *)&res);
+
+ if (! mdisok(ep))
+ return (-1);
+
+ return (0);
+}
+
+/*
+ * Get the mediator record for this client from the mediator
+ */
+int
+clnt_med_get_rec(
+ md_h_t *mdhp,
+ mdsetname_t *sp,
+ med_rec_t *medrp,
+ md_error_t *ep
+)
+{
+ med_handle_t *hp;
+ med_args_t args;
+ med_get_rec_res_t res;
+ int rval = -1;
+ md_set_desc *sd;
+
+ /* initialize */
+ mdclrerror(ep);
+ (void) memset(&args, 0, sizeof (args));
+ (void) memset(&res, 0, sizeof (res));
+
+ /* build args */
+ if ((sd = metaget_setdesc(sp, ep)) == NULL)
+ return (-1);
+
+ if (MD_MNSET_DESC(sd))
+ /*
+ * In the MN diskset, use a generic nodename, multiowner, as
+ * the node initiating the RPC request. This allows
+ * any node to access mediator information.
+ *
+ * MN diskset reconfig cycle forces consistent
+ * view of set/node/drive/mediator information across all nodes
+ * in the MN diskset. This allows the relaxation of
+ * node name checking in rpc.metamedd for MN disksets.
+ *
+ * In the traditional diskset, only a calling node that is
+ * in the mediator record's diskset nodelist can access
+ * mediator data.
+ */
+ args.med.med_caller = Strdup(MED_MN_CALLER);
+ else
+ args.med.med_caller = Strdup(mynode());
+ args.med.med_setname = Strdup(sp->setname);
+ args.med.med_setno = sp->setno;
+
+ /* do it */
+ if ((hp = open_medd_wrap(mdhp, CL_DEF_TMO, ep)) == NULL)
+ return (-1);
+
+ if (med_get_rec_1(&args, &res, hp->clntp) != RPC_SUCCESS)
+ (void) mdrpcerror(ep, hp->clntp, hp->hostname,
+ dgettext(TEXT_DOMAIN, "medd get record"));
+ else
+ (void) meddstealerror(ep, &res.med_status);
+
+ close_medd(hp);
+
+ if (mdisok(ep)) {
+ /* do something with the results */
+ (void) memmove(medrp, &res.med_rec, sizeof (med_rec_t));
+ rval = 0;
+ }
+
+ xdr_free(xdr_med_args_t, (char *)&args);
+ xdr_free(xdr_med_get_rec_res_t, (char *)&res);
+
+ return (rval);
+}
+
+/*
+ * Get the name of the host from the mediator daemon.
+ */
+int
+clnt_med_hostname(
+ char *hostname,
+ char **ret_hostname,
+ md_error_t *ep
+)
+{
+ med_handle_t *hp;
+ med_hnm_res_t res;
+ int rval = -1;
+
+ /* initialize */
+ mdclrerror(ep);
+ (void) memset(&res, 0, sizeof (res));
+
+ /* No args */
+
+ /* do it */
+ if ((hp = open_medd(hostname, CL_DEF_TMO, ep)) == NULL)
+ return (-1);
+
+ if (med_hostname_1(NULL, &res, hp->clntp) != RPC_SUCCESS)
+ (void) mdrpcerror(ep, hp->clntp, hostname,
+ dgettext(TEXT_DOMAIN, "medd hostname"));
+ else
+ (void) meddstealerror(ep, &res.med_status);
+
+ close_medd(hp);
+
+ if (mdisok(ep)) {
+ /* do something with the results */
+ rval = 0;
+
+ if (ret_hostname != NULL)
+ *ret_hostname = Strdup(res.med_hnm);
+ }
+
+ xdr_free(xdr_med_hnm_res_t, (char *)&res);
+
+ return (rval);
+}
+
+int
+meta_med_hnm2ip(md_hi_arr_t *mp, md_error_t *ep)
+{
+ int i, j;
+ int max_meds;
+
+ if ((max_meds = get_max_meds(ep)) == 0)
+ return (-1);
+
+ for (i = 0; i < max_meds; i++) {
+ mp->n_lst[i].a_flg = 0;
+ /* See if this is the local host */
+ if (mp->n_lst[i].a_cnt > 0 &&
+ strcmp(mp->n_lst[i].a_nm[0], mynode()) == NULL)
+ mp->n_lst[i].a_flg |= NMIP_F_LOCAL;
+
+ for (j = 0; j < mp->n_lst[i].a_cnt; j++) {
+ struct hostent *hp;
+ char *hnm = mp->n_lst[i].a_nm[j];
+
+ /*
+ * Cluster nodename support
+ *
+ * See if the clustering code can give us an IP addr
+ * for the stored name. If not, find it the old way
+ * which will use the public interface.
+ */
+ if (sdssc_get_priv_ipaddr(mp->n_lst[i].a_nm[j],
+ (struct in_addr *)&mp->n_lst[i].a_ip[j]) !=
+ SDSSC_OKAY) {
+ if ((hp = gethostbyname(hnm)) == NULL)
+ return (mdsyserror(ep, EADDRNOTAVAIL,
+ hnm));
+
+ /* We only do INET addresses */
+ if (hp->h_addrtype != AF_INET)
+ return (mdsyserror(ep, EPFNOSUPPORT,
+ hnm));
+
+ /* We take the first address only */
+ if (*hp->h_addr_list) {
+ (void) memmove(&mp->n_lst[i].a_ip[j],
+ *hp->h_addr_list,
+ sizeof (struct in_addr));
+ } else
+ return (mdsyserror(ep, EADDRNOTAVAIL,
+ hnm));
+ }
+
+ }
+ }
+ return (0);
+}
+
+int
+meta_h2hi(md_h_arr_t *mdhp, md_hi_arr_t *mdhip, md_error_t *ep)
+{
+ int i, j;
+ int max_meds;
+
+ if ((max_meds = get_max_meds(ep)) == 0)
+ return (-1);
+
+ mdhip->n_cnt = mdhp->n_cnt;
+
+ for (i = 0; i < max_meds; i++) {
+ mdhip->n_lst[i].a_flg = 0;
+ mdhip->n_lst[i].a_cnt = mdhp->n_lst[i].a_cnt;
+ if (mdhp->n_lst[i].a_cnt == 0)
+ continue;
+ for (j = 0; j < mdhp->n_lst[i].a_cnt; j++)
+ (void) strcpy(mdhip->n_lst[i].a_nm[j],
+ mdhp->n_lst[i].a_nm[j]);
+ }
+ return (0);
+}
+
+int
+meta_hi2h(md_hi_arr_t *mdhip, md_h_arr_t *mdhp, md_error_t *ep)
+{
+ int i, j;
+ int max_meds;
+
+ if ((max_meds = get_max_meds(ep)) == 0)
+ return (-1);
+
+ mdhp->n_cnt = mdhip->n_cnt;
+ for (i = 0; i < max_meds; i++) {
+ mdhp->n_lst[i].a_cnt = mdhip->n_lst[i].a_cnt;
+ if (mdhip->n_lst[i].a_cnt == 0)
+ continue;
+ for (j = 0; j < mdhip->n_lst[i].a_cnt; j++)
+ (void) strcpy(mdhp->n_lst[i].a_nm[j],
+ mdhip->n_lst[i].a_nm[j]);
+ }
+ return (0);
+}
+
+int
+setup_med_cfg(
+ mdsetname_t *sp,
+ mddb_config_t *cp,
+ int force,
+ md_error_t *ep
+)
+{
+ md_set_desc *sd;
+ int i;
+ int max_meds;
+
+ if (metaislocalset(sp))
+ return (0);
+
+ if ((sd = metaget_setdesc(sp, ep)) == NULL)
+ return (-1);
+
+ if (setup_med_transtab(ep))
+ return (-1);
+
+ if (meta_h2hi(&sd->sd_med, &cp->c_med, ep))
+ return (-1);
+
+ /* Make sure the ip addresses are current */
+ if (meta_med_hnm2ip(&cp->c_med, ep))
+ return (-1);
+
+ if (force)
+ return (0);
+
+ if ((max_meds = get_max_meds(ep)) == 0)
+ return (-1);
+
+ /* Make sure metamedd still running on host - only chk nodename */
+ for (i = 0; i < max_meds; i++) {
+ char *hostname;
+ char *hnm;
+
+ if (sd->sd_med.n_lst[i].a_cnt == 0)
+ continue;
+
+ hnm = sd->sd_med.n_lst[i].a_nm[0];
+
+ if (clnt_med_hostname(hnm, &hostname, ep))
+ return (mddserror(ep, MDE_DS_NOMEDONHOST, sp->setno,
+ hnm, NULL, sp->setname));
+ Free(hostname);
+ }
+ return (0);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_med_err.c b/usr/src/lib/lvm/libmeta/common/meta_med_err.c
new file mode 100644
index 0000000000..6b83280f89
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_med_err.c
@@ -0,0 +1,97 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 1992-2003 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * Just in case we're not in a build environment, make sure that
+ * TEXT_DOMAIN gets set to something.
+ */
+#if !defined(TEXT_DOMAIN)
+#define TEXT_DOMAIN "SYS_TEST"
+#endif
+
+#include <meta.h>
+#include <metamed.h>
+
+char *
+med_errnum_to_str(int errnum)
+{
+ switch (errnum) {
+ case MDE_MED_NOERROR:
+ return (dgettext(TEXT_DOMAIN, "No Error"));
+ case MDE_MED_HOSTNOMED:
+ return (dgettext(TEXT_DOMAIN,
+ "mediator host has no mediator data for host"));
+ case MDE_MED_DBNOTINIT:
+ return (dgettext(TEXT_DOMAIN,
+ "mediator database is not initialized"));
+ case MDE_MED_DBSZBAD:
+ return (dgettext(TEXT_DOMAIN,
+ "mediator database size is not valid"));
+ case MDE_MED_DBKEYADDFAIL:
+ return (dgettext(TEXT_DOMAIN,
+ "unable to add key to mediator database"));
+ case MDE_MED_DBKEYDELFAIL:
+ return (dgettext(TEXT_DOMAIN,
+ "unable to delete key from mediator database"));
+ case MDE_MED_DBHDRSZBAD:
+ return (dgettext(TEXT_DOMAIN,
+ "mediator database header record size is not valid"));
+ case MDE_MED_DBHDRMAGBAD:
+ return (dgettext(TEXT_DOMAIN,
+ "mediator database header magic is not valid"));
+ case MDE_MED_DBHDRREVBAD:
+ return (dgettext(TEXT_DOMAIN,
+ "mediator database header revision is not valid"));
+ case MDE_MED_DBHDRCKSBAD:
+ return (dgettext(TEXT_DOMAIN,
+ "mediator database header checksum is not valid"));
+ case MDE_MED_DBRECSZBAD:
+ return (dgettext(TEXT_DOMAIN,
+ "mediator database record record size is not valid"));
+ case MDE_MED_DBRECMAGBAD:
+ return (dgettext(TEXT_DOMAIN,
+ "mediator database record magic is not valid"));
+ case MDE_MED_DBRECREVBAD:
+ return (dgettext(TEXT_DOMAIN,
+ "mediator database record revision is not valid"));
+ case MDE_MED_DBRECCKSBAD:
+ return (dgettext(TEXT_DOMAIN,
+ "mediator database record checksum is not valid"));
+ case MDE_MED_DBRECOFFBAD:
+ return (dgettext(TEXT_DOMAIN,
+ "mediator database record offset in not valid"));
+ case MDE_MED_DBRECNOENT:
+ return (dgettext(TEXT_DOMAIN,
+ "no matching mediator record found"));
+ case MDE_MED_DBARGSMISMATCH:
+ return (dgettext(TEXT_DOMAIN, "set number in arguments "
+ "different from set number in data"));
+ default:
+ return (NULL);
+ }
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_mem.c b/usr/src/lib/lvm/libmeta/common/meta_mem.c
new file mode 100644
index 0000000000..d685f57c09
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_mem.c
@@ -0,0 +1,250 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 1992, 1993, 2000 by Sun Microsystems, Inc.
+ * All rights reserved.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <meta.h>
+
+/*
+ * free
+ */
+#ifdef _DEBUG_MALLOC_INC
+
+void
+_Free(
+ char *file,
+ int line,
+ void *p
+)
+{
+ debug_free(file, line, p);
+}
+
+#else /* ! _DEBUG_MALLOC_INC */
+
+void
+Free(
+ void *p
+)
+{
+ free(p);
+}
+
+#endif /* ! _DEBUG_MALLOC_INC */
+
+/*
+ * malloc
+ */
+#ifdef _DEBUG_MALLOC_INC
+
+void *
+_Malloc(
+ char *file,
+ int line,
+ size_t s
+)
+{
+ void *mem;
+
+ mem = debug_malloc(file, line, s);
+ if (mem == NULL) {
+ md_perror("");
+ md_exit(NULL, 1);
+ }
+ return (mem);
+}
+
+#else /* ! _DEBUG_MALLOC_INC */
+
+void *
+Malloc(
+ size_t s
+)
+{
+ void *mem;
+
+ if ((mem = malloc(s)) == NULL) {
+ md_perror("");
+ md_exit(NULL, 1);
+ }
+ return (mem);
+}
+
+#endif /* ! _DEBUG_MALLOC_INC */
+
+/*
+ * zalloc
+ */
+#ifdef _DEBUG_MALLOC_INC
+
+void *
+_Zalloc(
+ char *file,
+ int line,
+ size_t s
+)
+{
+ return (memset(_Malloc(file, line, s), 0, s));
+}
+
+#else /* ! _DEBUG_MALLOC_INC */
+
+void *
+Zalloc(
+ size_t s
+)
+{
+ return (memset(Malloc(s), 0, s));
+}
+
+#endif /* ! _DEBUG_MALLOC_INC */
+
+/*
+ * realloc
+ */
+#ifdef _DEBUG_MALLOC_INC
+
+void *
+_Realloc(
+ char *file,
+ int line,
+ void *p,
+ size_t s
+)
+{
+ if (p == NULL)
+ p = debug_malloc(file, line, s);
+ else
+ p = debug_realloc(file, line, p, s);
+ if (p == NULL) {
+ md_perror("");
+ md_exit(NULL, 1);
+ }
+ return (p);
+}
+
+#else /* ! _DEBUG_MALLOC_INC */
+
+void *
+Realloc(
+ void *p,
+ size_t s
+)
+{
+ if ((p = realloc(p, s)) == NULL) {
+ md_perror("");
+ md_exit(NULL, 1);
+ }
+ return (p);
+}
+
+#endif /* ! _DEBUG_MALLOC_INC */
+
+/*
+ * calloc
+ */
+#ifdef _DEBUG_MALLOC_INC
+
+void *
+_Calloc(
+ char *file,
+ int line,
+ size_t n,
+ size_t s
+)
+{
+ unsigned long total;
+
+ if (n == 0 || s == 0) {
+ total = 0;
+ } else {
+ total = (unsigned long)n * s;
+ /* check for overflow */
+ if (total / n != s)
+ return (NULL);
+ }
+ return (_Zalloc(file, line, total));
+}
+
+#else /* ! _DEBUG_MALLOC_INC */
+
+void *
+Calloc(
+ size_t n,
+ size_t s
+)
+{
+ unsigned long total;
+
+ if (n == 0 || s == 0) {
+ total = 0;
+ } else {
+ total = (unsigned long)n * s;
+ /* check for overflow */
+ if (total / n != s)
+ return (NULL);
+ }
+ return (Zalloc(total));
+}
+
+#endif /* ! _DEBUG_MALLOC_INC */
+
+/*
+ * strdup
+ */
+#ifdef _DEBUG_MALLOC_INC
+
+char *
+_Strdup(
+ char *file,
+ int line,
+ char *p
+)
+{
+ p = DBstrdup(file, line, p);
+ if (p == NULL) {
+ md_perror("");
+ md_exit(NULL, 1);
+ }
+ return (p);
+}
+
+#else /* ! _DEBUG_MALLOC_INC */
+
+char *
+Strdup(
+ char *p
+)
+{
+ if ((p = strdup(p)) == NULL) {
+ md_perror("");
+ md_exit(NULL, 1);
+ }
+ return (p);
+}
+
+#endif /* ! _DEBUG_MALLOC_INC */
diff --git a/usr/src/lib/lvm/libmeta/common/meta_metad.c b/usr/src/lib/lvm/libmeta/common/meta_metad.c
new file mode 100644
index 0000000000..7588843f5c
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_metad.c
@@ -0,0 +1,4082 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * Just in case we're not in a build environment, make sure that
+ * TEXT_DOMAIN gets set to something.
+ */
+#if !defined(TEXT_DOMAIN)
+#define TEXT_DOMAIN "SYS_TEST"
+#endif
+
+#include <meta.h>
+#include <metad.h>
+#include <devid.h>
+
+static md_setkey_t *my_cl_sk = NULL;
+
+#define CL_DEF_TMO 30L
+
+/*
+ * Convert an old style mddrivename_t into a new style
+ * mddrivename_t. Meant to be used *ONLY* by rpc.metad
+ */
+void
+meta_conv_drvname_old2new(
+ o_mddrivename_t *v1_dp,
+ mddrivename_t *v2_dp
+)
+{
+ int sliceno;
+ o_mdname_t *v1_np;
+ mdname_t *v2_np;
+
+ /* fields that haven't changed */
+ v2_dp->cname = v1_dp->cname;
+ v2_dp->rname = v1_dp->rname;
+ v2_dp->type = v1_dp->type;
+ v2_dp->errnum = v1_dp->errnum;
+
+ /* geometry information */
+ v2_dp->geom.ncyl = v1_dp->geom.ncyl;
+ v2_dp->geom.nhead = v1_dp->geom.nhead;
+ v2_dp->geom.nsect = v1_dp->geom.nsect;
+ v2_dp->geom.rpm = v1_dp->geom.rpm;
+ v2_dp->geom.write_reinstruct = v1_dp->geom.write_reinstruct;
+ v2_dp->geom.read_reinstruct = v1_dp->geom.read_reinstruct;
+ v2_dp->geom.blk_sz = 0;
+
+ /* controller information */
+ v2_dp->cinfo = v1_dp->cinfo;
+
+ /* vtoc information */
+ v2_dp->vtoc.nparts = v1_dp->vtoc.nparts;
+ v2_dp->vtoc.first_lba = 0;
+ v2_dp->vtoc.last_lba = 0;
+ v2_dp->vtoc.lbasize = 0;
+
+ for (sliceno = 0; sliceno < (MD_MAX_PARTS - 1); sliceno++) {
+ v2_dp->vtoc.parts[sliceno].start =
+ (diskaddr_t)v1_dp->vtoc.parts[sliceno].start;
+ v2_dp->vtoc.parts[sliceno].size =
+ (diskaddr_t)v1_dp->vtoc.parts[sliceno].size;
+ v2_dp->vtoc.parts[sliceno].tag =
+ v1_dp->vtoc.parts[sliceno].tag;
+ v2_dp->vtoc.parts[sliceno].flag =
+ v1_dp->vtoc.parts[sliceno].flag;
+ v2_dp->vtoc.parts[sliceno].label =
+ (diskaddr_t)v1_dp->vtoc.parts[sliceno].label;
+ }
+
+ /* The new style vtoc has 17 partitions */
+ v2_dp->vtoc.parts[MD_MAX_PARTS - 1].start = 0;
+ v2_dp->vtoc.parts[MD_MAX_PARTS - 1].size = 0;
+ v2_dp->vtoc.parts[MD_MAX_PARTS - 1].tag = 0;
+ v2_dp->vtoc.parts[MD_MAX_PARTS - 1].flag = 0;
+ v2_dp->vtoc.parts[MD_MAX_PARTS - 1].label = 0;
+
+ v2_dp->vtoc.typename = v1_dp->vtoc.typename;
+
+ /* partition information */
+ v2_dp->parts.parts_len = v1_dp->parts.parts_len;
+ for (sliceno = 0; sliceno < v1_dp->parts.parts_len; sliceno++) {
+ v1_np = &v1_dp->parts.parts_val[sliceno];
+ v2_np = &v2_dp->parts.parts_val[sliceno];
+
+ /*
+ * We speculate that if cname for a particular
+ * partition does not exist, the other fields
+ * don't exist either. In such a case, we don't
+ * need to do anything for that partition.
+ */
+ if (v1_np->cname != NULL) {
+ v2_np->cname = v1_np->cname;
+ v2_np->bname = v1_np->bname;
+ v2_np->rname = v1_np->rname;
+ v2_np->devicesname = v1_np->devicesname;
+ v2_np->dev = meta_expldev(v1_np->dev);
+ v2_np->key = v1_np->key;
+ v2_np->end_blk = (diskaddr_t)v1_np->end_blk;
+ v2_np->start_blk = (diskaddr_t)v1_np->start_blk;
+ }
+ v2_np->drivenamep = v2_dp;
+ }
+
+ /* We don't care about the rest of the fields */
+ v2_dp->side_names = v1_dp->side_names;
+ v2_dp->side_names_key = v1_dp->side_names_key;
+ v2_dp->miscname = v1_dp->miscname;
+}
+
+/*
+ * Convert a new style mddrivename_t into an old style
+ * mddrivename_t. Meant to be used *ONLY* by rpc.metad
+ */
+void
+meta_conv_drvname_new2old(
+ o_mddrivename_t *v1_dp,
+ mddrivename_t *v2_dp
+)
+{
+ int sliceno;
+ o_mdname_t *v1_np;
+ mdname_t *v2_np;
+
+ /* fields that haven't changed */
+ v1_dp->cname = v2_dp->cname;
+ v1_dp->rname = v2_dp->rname;
+ v1_dp->type = v2_dp->type;
+ v1_dp->errnum = v2_dp->errnum;
+
+ /* geometry information */
+ v1_dp->geom.ncyl = v2_dp->geom.ncyl;
+ v1_dp->geom.nhead = v2_dp->geom.nhead;
+ v1_dp->geom.nsect = v2_dp->geom.nsect;
+ v1_dp->geom.rpm = v2_dp->geom.rpm;
+ v1_dp->geom.write_reinstruct = v2_dp->geom.write_reinstruct;
+ v1_dp->geom.read_reinstruct = v2_dp->geom.read_reinstruct;
+
+ /* controller information */
+ v1_dp->cinfo = v2_dp->cinfo;
+
+ /* vtoc information */
+ v1_dp->vtoc.typename = v2_dp->vtoc.typename;
+ v1_dp->vtoc.nparts = v2_dp->vtoc.nparts;
+
+ for (sliceno = 0; sliceno < (MD_MAX_PARTS - 1); sliceno++) {
+ v1_dp->vtoc.parts[sliceno].start =
+ (daddr_t)v2_dp->vtoc.parts[sliceno].start;
+ v1_dp->vtoc.parts[sliceno].size =
+ (daddr_t)v2_dp->vtoc.parts[sliceno].size;
+ v1_dp->vtoc.parts[sliceno].tag =
+ v2_dp->vtoc.parts[sliceno].tag;
+ v1_dp->vtoc.parts[sliceno].flag =
+ v2_dp->vtoc.parts[sliceno].flag;
+ v1_dp->vtoc.parts[sliceno].label =
+ (daddr_t)v2_dp->vtoc.parts[sliceno].label;
+ }
+
+ /* partition information */
+ v1_dp->parts.parts_len = v2_dp->parts.parts_len;
+
+ for (sliceno = 0; sliceno < v2_dp->parts.parts_len; sliceno++) {
+ v1_np = &v1_dp->parts.parts_val[sliceno];
+ v2_np = &v2_dp->parts.parts_val[sliceno];
+
+ /*
+ * We speculate that if cname for a particular
+ * partition does not exist then the rest of
+ * the fields a partition don't exist either.
+ * In such a case, we don't need to do anything
+ * for that partition.
+ */
+ if (v2_np->cname != NULL) {
+ v1_np->cname = v2_np->cname;
+ v1_np->bname = v2_np->bname;
+ v1_np->rname = v2_np->rname;
+ v1_np->devicesname = v2_np->devicesname;
+ v1_np->dev = meta_cmpldev(v2_np->dev);
+ v1_np->key = v2_np->key;
+ v1_np->end_blk = (daddr_t)v2_np->end_blk;
+ v1_np->start_blk = (daddr_t)v2_np->start_blk;
+ }
+ v1_np->drivenamep = v1_dp;
+ }
+
+ /* We don't care about the rest of the fields */
+ v1_dp->side_names = v2_dp->side_names;
+ v1_dp->side_names_key = v2_dp->side_names_key;
+ v1_dp->miscname = v2_dp->miscname;
+}
+
+/*
+ * Convert an old style md_drive_desc_t into a new style
+ * md_drive_desc_t. Meant to be used *ONLY* by rpc.metad
+ */
+void
+meta_conv_drvdesc_old2new(
+ o_md_drive_desc *v1_dd,
+ md_drive_desc *v2_dd
+)
+{
+ md_drive_desc *dd;
+ o_md_drive_desc *o_dd;
+
+ dd = v2_dd;
+
+ for (o_dd = v1_dd; o_dd != NULL; o_dd = o_dd->dd_next) {
+ dd->dd_ctime = o_dd->dd_ctime;
+ dd->dd_genid = o_dd->dd_genid;
+ dd->dd_flags = o_dd->dd_flags;
+ meta_conv_drvname_old2new(o_dd->dd_dnp, dd->dd_dnp);
+ dd->dd_dbcnt = o_dd->dd_dbcnt;
+ dd->dd_dbsize = o_dd->dd_dbsize;
+ dd = dd->dd_next;
+ }
+}
+
+/*
+ * Convert an new style md_drive_desc_t into a old style
+ * md_drive_desc_t. Meant to be used *ONLY* by rpc.metad
+ */
+void
+meta_conv_drvdesc_new2old(
+ o_md_drive_desc *v1_dd,
+ md_drive_desc *v2_dd
+)
+{
+ md_drive_desc *dd;
+ o_md_drive_desc *o_dd;
+
+ o_dd = v1_dd;
+
+ for (dd = v2_dd; dd != NULL; dd = dd->dd_next) {
+ o_dd->dd_ctime = dd->dd_ctime;
+ o_dd->dd_genid = dd->dd_genid;
+ o_dd->dd_flags = dd->dd_flags;
+ meta_conv_drvname_new2old(o_dd->dd_dnp, dd->dd_dnp);
+ o_dd->dd_dbcnt = dd->dd_dbcnt;
+ o_dd->dd_dbsize = dd->dd_dbsize;
+ o_dd = o_dd->dd_next;
+ }
+}
+
+/*
+ * Allocate memory for v1 drive descriptor
+ * depending upon the number of drives in the
+ * v2 drive descriptor
+ */
+void
+alloc_olddrvdesc(
+ o_md_drive_desc **v1_dd,
+ md_drive_desc *v2_dd
+)
+{
+ md_drive_desc *dd;
+ o_md_drive_desc *new, *head;
+
+ head = NULL;
+
+ for (dd = v2_dd; dd != NULL; dd = dd->dd_next) {
+ new = Zalloc(sizeof (o_md_drive_desc));
+ new->dd_dnp = Zalloc(sizeof (o_mddrivename_t));
+ new->dd_dnp->parts.parts_val = Zalloc(sizeof (o_mdname_t) *
+ dd->dd_dnp->parts.parts_len);
+ new->dd_next = head;
+ head = new;
+ }
+ *v1_dd = head;
+}
+
+/*
+ * Allocate memory for v2 drive descriptor
+ * depending upon the number of drives in the
+ * v1 drive descriptor
+ */
+void
+alloc_newdrvdesc(
+ o_md_drive_desc *v1_dd,
+ md_drive_desc **v2_dd
+)
+{
+ md_drive_desc *new, *head;
+ o_md_drive_desc *o_dd;
+
+ head = NULL;
+
+ for (o_dd = v1_dd; o_dd != NULL; o_dd = o_dd->dd_next) {
+ new = Zalloc(sizeof (md_drive_desc));
+ new->dd_dnp = Zalloc(sizeof (mddrivename_t));
+ new->dd_dnp->parts.parts_val = Zalloc(sizeof (mdname_t) *
+ o_dd->dd_dnp->parts.parts_len);
+ new->dd_next = head;
+ head = new;
+ }
+ *v2_dd = head;
+}
+
+void
+free_olddrvdesc(
+ o_md_drive_desc *v1_dd
+)
+{
+ o_md_drive_desc *o_dd, *head;
+
+ head = v1_dd;
+
+ while (head != NULL) {
+ o_dd = head;
+ head = head->dd_next;
+ free(o_dd->dd_dnp->parts.parts_val);
+ free(o_dd->dd_dnp);
+ free(o_dd);
+ }
+}
+
+void
+free_newdrvdesc(
+ md_drive_desc *v2_dd
+)
+{
+ md_drive_desc *dd, *head;
+
+ head = v2_dd;
+
+ while (head != NULL) {
+ dd = head;
+ head = head->dd_next;
+ free(dd->dd_dnp->parts.parts_val);
+ free(dd->dd_dnp);
+ free(dd);
+ }
+}
+
+/*
+ * Return the device id for a given device
+ */
+char *
+meta_get_devid(
+ char *rname
+)
+{
+ ddi_devid_t devid;
+ int fd;
+ char *enc_devid, *dup_enc_devid = NULL;
+
+ if ((fd = open(rname, O_RDWR | O_NDELAY, 0)) < 0)
+ return (NULL);
+
+ if (devid_get(fd, &devid) == -1) {
+ (void) close(fd);
+ return (NULL);
+ }
+ (void) close(fd);
+
+ enc_devid = devid_str_encode(devid, NULL);
+ devid_free(devid);
+
+ if (enc_devid != NULL) {
+ dup_enc_devid = strdup(enc_devid);
+ devid_str_free(enc_devid);
+ }
+
+ return (dup_enc_devid);
+}
+
+/*
+ * Add side names for the diskset drive records
+ * NOTE: these go into the local set's namespace.
+ */
+int
+clnt_add_drv_sidenms(
+ char *hostname,
+ char *this_host,
+ mdsetname_t *sp,
+ md_set_desc *sd,
+ int node_c,
+ char **node_v,
+ md_error_t *ep
+)
+{
+ CLIENT *clntp;
+ mdrpc_drv_sidenm_args v1_args;
+ mdrpc_drv_sidenm_2_args v2_args;
+ mdrpc_drv_sidenm_2_args_r1 *v21_args;
+ mdrpc_generic_res res;
+ int rval;
+ int version;
+ int i, j;
+
+ /* initialize */
+ mdclrerror(ep);
+ (void) memset(&v1_args, 0, sizeof (v1_args));
+ (void) memset(&v2_args, 0, sizeof (v2_args));
+ (void) memset(&res, 0, sizeof (res));
+
+ /* build args */
+ v2_args.rev = MD_METAD_ARGS_REV_1;
+ v21_args = &v2_args.mdrpc_drv_sidenm_2_args_u.rev1;
+ v21_args->hostname = this_host;
+ v21_args->cl_sk = cl_get_setkey(sp->setno, sp->setname);
+ v21_args->sp = sp;
+ v21_args->sd = sd;
+ v21_args->node_v.node_v_len = node_c;
+ v21_args->node_v.node_v_val = node_v;
+
+ /* do it */
+ if (md_in_daemon && strcmp(mynode(), hostname) == 0) {
+ int bool;
+
+ /*
+ * If the server is local, we call the v2 procedure
+ */
+ bool = mdrpc_add_drv_sidenms_2_svc(&v2_args, &res, NULL);
+ assert(bool == TRUE);
+ (void) mdstealerror(ep, &res.status);
+ } else {
+ if ((clntp = metarpcopen(hostname, CL_LONG_TMO, ep)) == NULL)
+ return (-1);
+
+ /*
+ * Check the client handle for the version
+ * and invoke the appropriate version of the
+ * remote procedure
+ */
+ CLNT_CONTROL(clntp, CLGET_VERS, (char *)&version);
+
+ if (version == METAD_VERSION) { /* version 1 */
+
+ v1_args.sd = Zalloc(sizeof (o_md_set_desc));
+ alloc_olddrvdesc(&v1_args.sd->sd_drvs, sd->sd_drvs);
+
+ /* build args */
+ v1_args.hostname = this_host;
+ v1_args.cl_sk = cl_get_setkey(sp->setno, sp->setname);
+ v1_args.sp = sp;
+ /* set descriptor */
+ v1_args.sd->sd_ctime = sd->sd_ctime;
+ v1_args.sd->sd_genid = sd->sd_genid;
+ v1_args.sd->sd_setno = sd->sd_setno;
+ v1_args.sd->sd_flags = sd->sd_flags;
+ for (i = 0; i < MD_MAXSIDES; i++) {
+ v1_args.sd->sd_isown[i] = sd->sd_isown[i];
+
+ for (j = 0; j < MD_MAX_NODENAME_PLUS_1; j ++)
+ v1_args.sd->sd_nodes[i][j] =
+ sd->sd_nodes[i][j];
+ }
+ v1_args.sd->sd_med = sd->sd_med;
+ meta_conv_drvdesc_new2old(v1_args.sd->sd_drvs,
+ sd->sd_drvs);
+ v1_args.node_v.node_v_len = node_c;
+ v1_args.node_v.node_v_val = node_v;
+
+ rval = mdrpc_add_drv_sidenms_1(&v1_args, &res, clntp);
+
+ free_olddrvdesc(v1_args.sd->sd_drvs);
+ free(v1_args.sd);
+
+ if (rval != RPC_SUCCESS)
+ (void) mdrpcerror(ep, clntp, hostname,
+ dgettext(TEXT_DOMAIN,
+ "metad add drive sidenames"));
+ else
+ (void) mdstealerror(ep, &res.status);
+ } else { /* version 2 */
+ rval = mdrpc_add_drv_sidenms_2(&v2_args, &res, clntp);
+
+ if (rval != RPC_SUCCESS)
+ (void) mdrpcerror(ep, clntp, hostname,
+ dgettext(TEXT_DOMAIN,
+ "metad add drive sidenames"));
+ else
+ (void) mdstealerror(ep, &res.status);
+ }
+
+ metarpcclose(clntp);
+ }
+
+ xdr_free(xdr_mdrpc_generic_res, (char *)&res);
+
+ if (! mdisok(ep))
+ return (-1);
+
+ return (0);
+}
+
+/*
+ * Add drives to disksets.
+ */
+int
+clnt_adddrvs(
+ char *hostname,
+ mdsetname_t *sp,
+ md_drive_desc *dd,
+ md_timeval32_t timestamp,
+ ulong_t genid,
+ md_error_t *ep
+)
+{
+ CLIENT *clntp;
+ mdrpc_drives_args v1_args;
+ mdrpc_drives_2_args v2_args;
+ mdrpc_drives_2_args_r1 *v21_args;
+ mdrpc_generic_res res;
+ int rval;
+ int version;
+
+ /* initialize */
+ mdclrerror(ep);
+ (void) memset(&v1_args, 0, sizeof (v1_args));
+ (void) memset(&v2_args, 0, sizeof (v2_args));
+ (void) memset(&res, 0, sizeof (res));
+
+ /* build args */
+ v2_args.rev = MD_METAD_ARGS_REV_1;
+ v21_args = &v2_args.mdrpc_drives_2_args_u.rev1;
+ v21_args->sp = sp;
+ v21_args->cl_sk = cl_get_setkey(sp->setno, sp->setname);
+ v21_args->drivedescs = dd;
+ v21_args->timestamp = timestamp;
+ v21_args->genid = genid;
+
+ /* do it */
+ if (md_in_daemon && strcmp(mynode(), hostname) == 0) {
+ int bool;
+
+ /*
+ * If the server is local, we call the v2 procedure
+ */
+ bool = mdrpc_adddrvs_2_svc(&v2_args, &res, NULL);
+ assert(bool == TRUE);
+ (void) mdstealerror(ep, &res.status);
+ } else {
+ if ((clntp = metarpcopen(hostname, CL_LONG_TMO, ep)) == NULL)
+ return (-1);
+
+ /*
+ * Check the client handle for the version
+ * and invoke the appropriate version of the
+ * remote procedure
+ */
+ CLNT_CONTROL(clntp, CLGET_VERS, (char *)&version);
+
+ if (version == METAD_VERSION) { /* version 1 */
+
+ alloc_olddrvdesc(&v1_args.drivedescs, dd);
+
+ /* build args */
+ v1_args.sp = sp;
+ v1_args.cl_sk = cl_get_setkey(sp->setno, sp->setname);
+ meta_conv_drvdesc_new2old(v1_args.drivedescs, dd);
+ v1_args.timestamp = timestamp;
+ v1_args.genid = genid;
+
+ rval = mdrpc_adddrvs_1(&v1_args, &res, clntp);
+
+ free_olddrvdesc(v1_args.drivedescs);
+
+ if (rval != RPC_SUCCESS)
+ (void) mdrpcerror(ep, clntp, hostname,
+ dgettext(TEXT_DOMAIN, "metad add drives"));
+ else
+ (void) mdstealerror(ep, &res.status);
+ } else { /* version 2 */
+ rval = mdrpc_adddrvs_2(&v2_args, &res, clntp);
+
+ if (rval != RPC_SUCCESS)
+ (void) mdrpcerror(ep, clntp, hostname,
+ dgettext(TEXT_DOMAIN, "metad add drives"));
+ else
+ (void) mdstealerror(ep, &res.status);
+ }
+
+ metarpcclose(clntp);
+ }
+
+ xdr_free(xdr_mdrpc_generic_res, (char *)&res);
+
+ if (! mdisok(ep))
+ return (-1);
+
+ return (0);
+}
+
+/*
+ * Add hosts to disksets.
+ */
+int
+clnt_addhosts(
+ char *hostname,
+ mdsetname_t *sp,
+ int node_c,
+ char **node_v,
+ md_error_t *ep
+)
+{
+ CLIENT *clntp;
+ mdrpc_host_args *args;
+ mdrpc_host_2_args v2_args;
+ mdrpc_generic_res res;
+ int version;
+
+ /* initialize */
+ mdclrerror(ep);
+ (void) memset(&v2_args, 0, sizeof (v2_args));
+ (void) memset(&res, 0, sizeof (res));
+
+ /* build args */
+ v2_args.rev = MD_METAD_ARGS_REV_1;
+ args = &v2_args.mdrpc_host_2_args_u.rev1;
+ args->sp = sp;
+ args->cl_sk = cl_get_setkey(sp->setno, sp->setname);
+ args->hosts.hosts_len = node_c;
+ args->hosts.hosts_val = node_v;
+
+ /* do it */
+ if (md_in_daemon && strcmp(mynode(), hostname) == 0) {
+ int bool;
+ bool = mdrpc_addhosts_2_svc(&v2_args, &res, NULL);
+ assert(bool == TRUE);
+ (void) mdstealerror(ep, &res.status);
+ } else {
+ if ((clntp = metarpcopen(hostname, CL_LONG_TMO, ep)) == NULL)
+ return (-1);
+
+ /*
+ * Check the client handle for the version and invoke
+ * the appropriate version of the remote procedure
+ */
+ CLNT_CONTROL(clntp, CLGET_VERS, (char *)&version);
+
+ if (version == METAD_VERSION) { /* version 1 */
+ if (mdrpc_addhosts_1(args, &res, clntp) != RPC_SUCCESS)
+ (void) mdrpcerror(ep, clntp, hostname,
+ dgettext(TEXT_DOMAIN, "metad add hosts"));
+ else
+ (void) mdstealerror(ep, &res.status);
+ } else {
+ if (mdrpc_addhosts_2(&v2_args, &res, clntp) !=
+ RPC_SUCCESS)
+ (void) mdrpcerror(ep, clntp, hostname,
+ dgettext(TEXT_DOMAIN, "metad add hosts"));
+ else
+ (void) mdstealerror(ep, &res.status);
+ }
+
+ metarpcclose(clntp);
+ }
+
+ xdr_free(xdr_mdrpc_generic_res, (char *)&res);
+
+ if (! mdisok(ep))
+ return (-1);
+
+ return (0);
+}
+
+/*
+ * Create disksets.
+ */
+int
+clnt_createset(
+ char *hostname,
+ mdsetname_t *sp,
+ md_node_nm_arr_t nodes,
+ md_timeval32_t timestamp,
+ ulong_t genid,
+ md_error_t *ep
+)
+{
+ CLIENT *clntp;
+ mdrpc_createset_args *args;
+ mdrpc_createset_2_args v2_args;
+ mdrpc_generic_res res;
+ int i;
+ int version;
+
+ /* initialize */
+ mdclrerror(ep);
+ (void) memset(&v2_args, 0, sizeof (v2_args));
+ (void) memset(&res, 0, sizeof (res));
+
+ /* build args */
+ v2_args.rev = MD_METAD_ARGS_REV_1;
+ args = &v2_args.mdrpc_createset_2_args_u.rev1;
+ args->sp = sp;
+ args->cl_sk = cl_get_setkey(sp->setno, sp->setname);
+ args->timestamp = timestamp;
+ args->genid = genid;
+ for (i = 0; i < MD_MAXSIDES; i++)
+ (void) strcpy(args->nodes[i], nodes[i]);
+
+ /* do it */
+ if (md_in_daemon && strcmp(mynode(), hostname) == 0) {
+ int bool;
+ bool = mdrpc_createset_2_svc(&v2_args, &res, NULL);
+ assert(bool == TRUE);
+ (void) mdstealerror(ep, &res.status);
+ } else {
+ if ((clntp = metarpcopen(hostname, CL_LONG_TMO, ep)) == NULL)
+ return (-1);
+
+ /*
+ * Check the client handle for the version and invoke
+ * the appropriate version of the remote procedure
+ */
+ CLNT_CONTROL(clntp, CLGET_VERS, (char *)&version);
+
+ if (version == METAD_VERSION) { /* version 1 */
+ if (mdrpc_createset_1(args, &res, clntp) !=
+ RPC_SUCCESS)
+ (void) mdrpcerror(ep, clntp, hostname,
+ dgettext(TEXT_DOMAIN, "metad create set"));
+ else
+ (void) mdstealerror(ep, &res.status);
+ } else {
+ if (mdrpc_createset_2(&v2_args, &res, clntp) !=
+ RPC_SUCCESS)
+ (void) mdrpcerror(ep, clntp, hostname,
+ dgettext(TEXT_DOMAIN, "metad create set"));
+ else
+ (void) mdstealerror(ep, &res.status);
+ }
+
+ metarpcclose(clntp);
+ }
+
+ xdr_free(xdr_mdrpc_generic_res, (char *)&res);
+
+ if (! mdisok(ep))
+ return (-1);
+
+ return (0);
+}
+
+/*
+ * Create MN disksets.
+ */
+int
+clnt_mncreateset(
+ char *hostname,
+ mdsetname_t *sp,
+ md_mnnode_desc *nodelist,
+ md_timeval32_t timestamp,
+ ulong_t genid,
+ md_node_nm_t master_nodenm,
+ int master_nodeid,
+ md_error_t *ep
+)
+{
+ CLIENT *clntp;
+ mdrpc_mncreateset_args *args;
+ mdrpc_mncreateset_2_args v2_args;
+ mdrpc_generic_res res;
+ int version;
+
+ /* initialize */
+ mdclrerror(ep);
+ (void) memset(&v2_args, 0, sizeof (v2_args));
+ (void) memset(&res, 0, sizeof (res));
+
+ /* build args */
+ v2_args.rev = MD_METAD_ARGS_REV_1;
+ args = &v2_args.mdrpc_mncreateset_2_args_u.rev1;
+ args->sp = sp;
+ args->cl_sk = cl_get_setkey(sp->setno, sp->setname);
+ args->timestamp = timestamp;
+ args->genid = genid;
+ (void) strlcpy(args->master_nodenm, master_nodenm, MD_MAX_NODENAME);
+ args->master_nodeid = master_nodeid;
+ args->nodelist = nodelist;
+
+ /* do it */
+ if (md_in_daemon && strcmp(mynode(), hostname) == 0) {
+ int bool;
+ bool = mdrpc_mncreateset_2_svc(&v2_args, &res, NULL);
+ assert(bool == TRUE);
+ (void) mdstealerror(ep, &res.status);
+ } else {
+ if ((clntp = metarpcopen(hostname, CL_LONG_TMO, ep)) == NULL)
+ return (-1);
+
+ /*
+ * Check the client handle for the version
+ */
+ CLNT_CONTROL(clntp, CLGET_VERS, (char *)&version);
+
+ /*
+ * If the client is version 1, return error
+ * otherwise, make the remote procedure call.
+ */
+ if (version == METAD_VERSION) { /* version 1 */
+ (void) mddserror(ep, MDE_DS_RPCVERSMISMATCH,
+ sp->setno, hostname, NULL, sp->setname);
+ metarpcclose(clntp);
+ return (-1);
+ } else {
+ if (mdrpc_mncreateset_2(&v2_args, &res, clntp)
+ != RPC_SUCCESS)
+ (void) mdrpcerror(ep, clntp, hostname,
+ dgettext(TEXT_DOMAIN, "metad mncreate set"));
+ else
+ (void) mdstealerror(ep, &res.status);
+ }
+
+ metarpcclose(clntp);
+ }
+
+ xdr_free(xdr_mdrpc_generic_res, (char *)&res);
+
+ if (! mdisok(ep))
+ return (-1);
+
+ return (0);
+}
+
+/*
+ * Join MN set
+ */
+int
+clnt_joinset(
+ char *hostname,
+ mdsetname_t *sp,
+ int flags,
+ md_error_t *ep
+)
+{
+ CLIENT *clntp;
+ mdrpc_sp_flags_args *args;
+ mdrpc_sp_flags_2_args v2_args;
+ mdrpc_generic_res res;
+ int version;
+
+ /* initialize */
+ mdclrerror(ep);
+ (void) memset(&v2_args, 0, sizeof (v2_args));
+ (void) memset(&res, 0, sizeof (res));
+
+ /* build args */
+ v2_args.rev = MD_METAD_ARGS_REV_1;
+ args = &v2_args.mdrpc_sp_flags_2_args_u.rev1;
+ args->sp = sp;
+ args->flags = flags;
+ args->cl_sk = cl_get_setkey(sp->setno, sp->setname);
+
+ /* do it */
+ if (md_in_daemon && strcmp(mynode(), hostname) == 0) {
+ int bool;
+ bool = mdrpc_joinset_2_svc(&v2_args, &res, NULL);
+ assert(bool == TRUE);
+ (void) mdstealerror(ep, &res.status);
+ } else {
+ if ((clntp = metarpcopen(hostname, CL_LONG_TMO, ep)) == NULL)
+ return (-1);
+
+ /*
+ * Check the client handle for the version
+ */
+ CLNT_CONTROL(clntp, CLGET_VERS, (char *)&version);
+
+ /*
+ * If the client is version 1, return error
+ * otherwise, make the remote procedure call.
+ */
+ if (version == METAD_VERSION) { /* version 1 */
+ (void) mddserror(ep, MDE_DS_RPCVERSMISMATCH,
+ sp->setno, hostname, NULL, sp->setname);
+ metarpcclose(clntp);
+ return (-1);
+ } else {
+ if (mdrpc_joinset_2(&v2_args, &res, clntp)
+ != RPC_SUCCESS)
+ (void) mdrpcerror(ep, clntp, hostname,
+ dgettext(TEXT_DOMAIN, "metad join set"));
+ else
+ (void) mdstealerror(ep, &res.status);
+ }
+
+ metarpcclose(clntp);
+ }
+
+ xdr_free(xdr_mdrpc_generic_res, (char *)&res);
+
+ if (! mdisok(ep))
+ return (-1);
+
+ return (0);
+}
+
+/*
+ * Withdraw from MN set
+ */
+int
+clnt_withdrawset(
+ char *hostname,
+ mdsetname_t *sp,
+ md_error_t *ep
+)
+{
+ CLIENT *clntp;
+ mdrpc_sp_args *args;
+ mdrpc_sp_2_args v2_args;
+ mdrpc_generic_res res;
+ int version;
+
+ /* initialize */
+ mdclrerror(ep);
+ (void) memset(&v2_args, 0, sizeof (v2_args));
+ (void) memset(&res, 0, sizeof (res));
+
+ /* build args */
+ v2_args.rev = MD_METAD_ARGS_REV_1;
+ args = &v2_args.mdrpc_sp_2_args_u.rev1;
+ args->sp = sp;
+ args->cl_sk = cl_get_setkey(sp->setno, sp->setname);
+
+ /* do it */
+ if (md_in_daemon && strcmp(mynode(), hostname) == 0) {
+ int bool;
+ bool = mdrpc_withdrawset_2_svc(&v2_args, &res, NULL);
+ assert(bool == TRUE);
+ (void) mdstealerror(ep, &res.status);
+ } else {
+ if ((clntp = metarpcopen(hostname, CL_LONG_TMO, ep)) == NULL)
+ return (-1);
+
+ /*
+ * Check the client handle for the version
+ */
+ CLNT_CONTROL(clntp, CLGET_VERS, (char *)&version);
+
+ /*
+ * If the client is version 1, return error
+ * otherwise, make the remote procedure call.
+ */
+ if (version == METAD_VERSION) { /* version 1 */
+ (void) mddserror(ep, MDE_DS_RPCVERSMISMATCH,
+ sp->setno, hostname, NULL, sp->setname);
+ metarpcclose(clntp);
+ return (-1);
+ } else {
+ if (mdrpc_withdrawset_2(&v2_args, &res, clntp)
+ != RPC_SUCCESS)
+ (void) mdrpcerror(ep, clntp, hostname,
+ dgettext(TEXT_DOMAIN,
+ "metad withdraw set"));
+ else
+ (void) mdstealerror(ep, &res.status);
+ }
+
+ metarpcclose(clntp);
+ }
+
+ xdr_free(xdr_mdrpc_generic_res, (char *)&res);
+
+ if (! mdisok(ep))
+ return (-1);
+
+ return (0);
+}
+
+/*
+ * Delete side names for the diskset drive records
+ * NOTE: these are removed from the local set's namespace.
+ */
+int
+clnt_del_drv_sidenms(
+ char *hostname,
+ mdsetname_t *sp,
+ md_error_t *ep
+)
+{
+ CLIENT *clntp;
+ mdrpc_sp_args *args;
+ mdrpc_sp_2_args v2_args;
+ mdrpc_generic_res res;
+ int version;
+
+ /* initialize */
+ mdclrerror(ep);
+ (void) memset(&v2_args, 0, sizeof (v2_args));
+ (void) memset(&res, 0, sizeof (res));
+
+ /* build args */
+ v2_args.rev = MD_METAD_ARGS_REV_1;
+ args = &v2_args.mdrpc_sp_2_args_u.rev1;
+ args->sp = sp;
+ args->cl_sk = cl_get_setkey(sp->setno, sp->setname);
+
+ /* do it */
+ if (md_in_daemon && strcmp(mynode(), hostname) == 0) {
+ int bool;
+ bool = mdrpc_del_drv_sidenms_2_svc(&v2_args, &res, NULL);
+ assert(bool == TRUE);
+ (void) mdstealerror(ep, &res.status);
+ } else {
+ if ((clntp = metarpcopen(hostname, CL_LONG_TMO, ep)) == NULL)
+ return (-1);
+
+ if (metaget_setdesc(sp, ep) == NULL) {
+ if (! mdisok(ep))
+ return (-1);
+ mdclrerror(ep);
+ }
+
+ /*
+ * Check the client handle for the version and invoke
+ * the appropriate version of the remote procedure
+ */
+ CLNT_CONTROL(clntp, CLGET_VERS, (char *)&version);
+
+ if (version == METAD_VERSION) { /* version 1 */
+ if (mdrpc_del_drv_sidenms_1(args, &res, clntp) !=
+ RPC_SUCCESS)
+ (void) mdrpcerror(ep, clntp, hostname,
+ dgettext(TEXT_DOMAIN,
+ "metad delete drive sidenames"));
+ else
+ (void) mdstealerror(ep, &res.status);
+ } else {
+ if (mdrpc_del_drv_sidenms_2(&v2_args, &res, clntp) !=
+ RPC_SUCCESS)
+ (void) mdrpcerror(ep, clntp, hostname,
+ dgettext(TEXT_DOMAIN,
+ "metad delete drive sidenames"));
+ else
+ (void) mdstealerror(ep, &res.status);
+ }
+
+ metarpcclose(clntp);
+ }
+
+ xdr_free(xdr_mdrpc_generic_res, (char *)&res);
+
+ if (! mdisok(ep))
+ return (-1);
+
+ return (0);
+}
+
+/*
+ * delete drives from the set
+ */
+int
+clnt_deldrvs(
+ char *hostname,
+ mdsetname_t *sp,
+ md_drive_desc *dd,
+ md_error_t *ep
+)
+{
+ CLIENT *clntp;
+ mdrpc_drives_args v1_args;
+ mdrpc_drives_2_args v2_args;
+ mdrpc_drives_2_args_r1 *v21_args;
+ mdrpc_generic_res res;
+ int rval;
+ int version;
+
+ /* initialize */
+ mdclrerror(ep);
+ (void) memset(&v1_args, 0, sizeof (v1_args));
+ (void) memset(&v2_args, 0, sizeof (v2_args));
+ (void) memset(&res, 0, sizeof (res));
+
+ /* build args */
+ v2_args.rev = MD_METAD_ARGS_REV_1;
+ v21_args = &v2_args.mdrpc_drives_2_args_u.rev1;
+ v21_args->sp = sp;
+ v21_args->cl_sk = cl_get_setkey(sp->setno, sp->setname);
+ v21_args->drivedescs = dd;
+
+ /* do it */
+ if (md_in_daemon && strcmp(mynode(), hostname) == 0) {
+ int bool;
+
+ /*
+ * If the server is local, we call the v2 procedure
+ */
+ bool = mdrpc_deldrvs_2_svc(&v2_args, &res, NULL);
+ assert(bool == TRUE);
+ (void) mdstealerror(ep, &res.status);
+ } else {
+ if ((clntp = metarpcopen(hostname, CL_LONG_TMO, ep)) == NULL)
+ return (-1);
+
+ /*
+ * Check the client handle for the version
+ * and invoke the appropriate version of the
+ * remote procedure
+ */
+ CLNT_CONTROL(clntp, CLGET_VERS, (char *)&version);
+
+ if (version == METAD_VERSION) { /* version 1 */
+
+ alloc_olddrvdesc(&v1_args.drivedescs, dd);
+
+ /* build args */
+ v1_args.sp = sp;
+ v1_args.cl_sk = cl_get_setkey(sp->setno, sp->setname);
+ meta_conv_drvdesc_new2old(v1_args.drivedescs, dd);
+
+ rval = mdrpc_deldrvs_1(&v1_args, &res, clntp);
+
+ free_olddrvdesc(v1_args.drivedescs);
+
+ if (rval != RPC_SUCCESS)
+ (void) mdrpcerror(ep, clntp, hostname,
+ dgettext(TEXT_DOMAIN,
+ "metad delete drives"));
+ else
+ (void) mdstealerror(ep, &res.status);
+ } else { /* version 2 */
+ rval = mdrpc_deldrvs_2(&v2_args, &res, clntp);
+
+ if (rval != RPC_SUCCESS)
+ (void) mdrpcerror(ep, clntp, hostname,
+ dgettext(TEXT_DOMAIN,
+ "metad delete drives"));
+ else
+ (void) mdstealerror(ep, &res.status);
+ }
+
+ metarpcclose(clntp);
+ }
+
+ xdr_free(xdr_mdrpc_generic_res, (char *)&res);
+
+ if (! mdisok(ep))
+ return (-1);
+
+ return (0);
+}
+
+/*
+ * delete host(s) from a set.
+ */
+int
+clnt_delhosts(
+ char *hostname,
+ mdsetname_t *sp,
+ int node_c,
+ char **node_v,
+ md_error_t *ep
+)
+{
+ CLIENT *clntp;
+ mdrpc_host_args *args;
+ mdrpc_host_2_args v2_args;
+ mdrpc_generic_res res;
+ int version;
+
+ /* initialize */
+ mdclrerror(ep);
+ (void) memset(&v2_args, 0, sizeof (v2_args));
+ (void) memset(&res, 0, sizeof (res));
+
+ /* build args */
+ v2_args.rev = MD_METAD_ARGS_REV_1;
+ args = &v2_args.mdrpc_host_2_args_u.rev1;
+ args->sp = sp;
+ args->cl_sk = cl_get_setkey(sp->setno, sp->setname);
+ args->hosts.hosts_len = node_c;
+ args->hosts.hosts_val = node_v;
+
+ /* do it */
+ if (md_in_daemon && strcmp(mynode(), hostname) == 0) {
+ int bool;
+ bool = mdrpc_delhosts_2_svc(&v2_args, &res, NULL);
+ assert(bool == TRUE);
+ (void) mdstealerror(ep, &res.status);
+ } else {
+ if ((clntp = metarpcopen(hostname, CL_LONG_TMO, ep)) == NULL)
+ return (-1);
+
+ /*
+ * Check the client handle for the version
+ * and invoke the appropriate version of the
+ * remote procedure
+ */
+ CLNT_CONTROL(clntp, CLGET_VERS, (char *)&version);
+
+ if (version == METAD_VERSION) { /* version 1 */
+ if (mdrpc_delhosts_1(args, &res, clntp) != RPC_SUCCESS)
+ (void) mdrpcerror(ep, clntp, hostname,
+ dgettext(TEXT_DOMAIN, "metad delete hosts"));
+ else
+ (void) mdstealerror(ep, &res.status);
+ } else {
+ if (mdrpc_delhosts_2(&v2_args, &res, clntp) !=
+ RPC_SUCCESS)
+ (void) mdrpcerror(ep, clntp, hostname,
+ dgettext(TEXT_DOMAIN, "metad delete hosts"));
+ else
+ (void) mdstealerror(ep, &res.status);
+ }
+
+ metarpcclose(clntp);
+ }
+
+ xdr_free(xdr_mdrpc_generic_res, (char *)&res);
+
+ if (! mdisok(ep))
+ return (-1);
+
+ return (0);
+}
+
+/*
+ * Delete diskset.
+ */
+int
+clnt_delset(
+ char *hostname,
+ mdsetname_t *sp,
+ md_error_t *ep
+)
+{
+ CLIENT *clntp;
+ mdrpc_sp_args *args;
+ mdrpc_sp_2_args v2_args;
+ mdrpc_generic_res res;
+ int version;
+
+ /* initialize */
+ mdclrerror(ep);
+ (void) memset(&v2_args, 0, sizeof (v2_args));
+ (void) memset(&res, 0, sizeof (res));
+
+ /* build args */
+ v2_args.rev = MD_METAD_ARGS_REV_1;
+ args = &v2_args.mdrpc_sp_2_args_u.rev1;
+ args->sp = sp;
+ args->cl_sk = cl_get_setkey(sp->setno, sp->setname);
+
+ /* do it */
+ if (md_in_daemon && strcmp(mynode(), hostname) == 0) {
+ int bool;
+ bool = mdrpc_delset_2_svc(&v2_args, &res, NULL);
+ assert(bool == TRUE);
+ (void) mdstealerror(ep, &res.status);
+ } else {
+ if ((clntp = metarpcopen(hostname, CL_LONG_TMO, ep)) == NULL)
+ return (-1);
+
+ /*
+ * Check the client handle for the version
+ * and invoke the appropriate version of the
+ * remote procedure
+ */
+ CLNT_CONTROL(clntp, CLGET_VERS, (char *)&version);
+
+ if (version == METAD_VERSION) { /* version 1 */
+ if (mdrpc_delset_1(args, &res, clntp) != RPC_SUCCESS)
+ (void) mdrpcerror(ep, clntp, hostname,
+ dgettext(TEXT_DOMAIN, "metad delete set"));
+ else
+ (void) mdstealerror(ep, &res.status);
+ } else {
+ if (mdrpc_delset_2(&v2_args, &res, clntp) !=
+ RPC_SUCCESS)
+ (void) mdrpcerror(ep, clntp, hostname,
+ dgettext(TEXT_DOMAIN, "metad delete set"));
+ else
+ (void) mdstealerror(ep, &res.status);
+ }
+
+ metarpcclose(clntp);
+ }
+
+ xdr_free(xdr_mdrpc_generic_res, (char *)&res);
+
+ if (! mdisok(ep))
+ return (-1);
+
+ return (0);
+}
+
+/*
+ * return remote device info
+ */
+int
+clnt_devinfo(
+ char *hostname,
+ mdsetname_t *sp,
+ mddrivename_t *dp,
+ md_dev64_t *ret_dev,
+ time_t *ret_timestamp,
+ md_error_t *ep
+)
+{
+ CLIENT *clntp;
+ mdrpc_devinfo_args v1_args;
+ mdrpc_devinfo_2_args v2_args;
+ mdrpc_devinfo_2_args_r1 *v21_args;
+ mdrpc_devinfo_res v1_res;
+ mdrpc_devinfo_2_res v2_res;
+ int rval, version;
+
+ /* initialize */
+ mdclrerror(ep);
+ (void) memset(&v1_args, 0, sizeof (v1_args));
+ (void) memset(&v2_args, 0, sizeof (v2_args));
+ (void) memset(&v1_res, 0, sizeof (v1_res));
+ (void) memset(&v2_res, 0, sizeof (v2_res));
+
+ /* build args */
+ v2_args.rev = MD_METAD_ARGS_REV_1;
+ v21_args = &v2_args.mdrpc_devinfo_2_args_u.rev1;
+ v21_args->sp = sp;
+ v21_args->cl_sk = cl_get_setkey(sp->setno, sp->setname);
+ v21_args->drivenamep = dp;
+
+ /* do it */
+ if (md_in_daemon && strcmp(mynode(), hostname) == 0) {
+ int bool;
+
+ /*
+ * If the server is local, we call the v2 procedure.
+ */
+ bool = mdrpc_devinfo_2_svc(&v2_args, &v2_res, NULL);
+ assert(bool == TRUE);
+ (void) mdstealerror(ep, &v1_res.status);
+ } else {
+ if ((clntp = metarpcopen(hostname, CL_LONG_TMO, ep)) == NULL)
+ return (-1);
+
+ /*
+ * Check the client handle for the version
+ * and invoke the appropriate version of
+ * the remote procedure.
+ */
+ CLNT_CONTROL(clntp, CLGET_VERS, (char *)&version);
+
+ if (version == METAD_VERSION) { /* version 1 */
+ v1_args.drivenamep =
+ Zalloc(sizeof (o_mddrivename_t));
+ v1_args.drivenamep->parts.parts_val =
+ Zalloc((sizeof (o_mdname_t)) *
+ dp->parts.parts_len);
+
+ /* build args */
+ v1_args.sp = sp;
+ v1_args.cl_sk = cl_get_setkey(sp->setno,
+ sp->setname);
+
+ /*
+ * Convert v2 arguments to v1 arguments
+ * before sending over the wire.
+ */
+ meta_conv_drvname_new2old(v1_args.drivenamep,
+ v21_args->drivenamep);
+
+ rval = mdrpc_devinfo_1(&v1_args, &v1_res, clntp);
+
+ free(v1_args.drivenamep->parts.parts_val);
+ free(v1_args.drivenamep);
+
+ if (rval != RPC_SUCCESS)
+ (void) mdrpcerror(ep, clntp, hostname,
+ dgettext(TEXT_DOMAIN, "metad device info"));
+ else
+ (void) mdstealerror(ep, &v1_res.status);
+ } else { /* version 2 */
+ rval = mdrpc_devinfo_2(&v2_args, &v2_res, clntp);
+ if (rval != RPC_SUCCESS)
+ (void) mdrpcerror(ep, clntp, hostname,
+ dgettext(TEXT_DOMAIN, "metad device info"));
+ else
+ (void) mdstealerror(ep, &v2_res.status);
+ }
+
+ metarpcclose(clntp);
+ }
+
+ if (mdisok(ep)) {
+ /* do something with the results */
+ rval = 0;
+
+ if (ret_dev != NULL) {
+ if (version == METAD_VERSION)
+ *ret_dev = meta_expldev(v1_res.dev);
+ else
+ *ret_dev = v2_res.dev;
+ }
+
+ if (ret_timestamp != NULL) {
+ if (version == METAD_VERSION)
+ *ret_timestamp = v1_res.vtime;
+ else
+ *ret_timestamp = v2_res.vtime;
+ }
+ }
+
+ if (version == METAD_VERSION)
+ xdr_free(xdr_mdrpc_devinfo_res, (char *)&v1_res);
+ else
+ xdr_free(xdr_mdrpc_devinfo_2_res, (char *)&v2_res);
+
+ return (rval);
+}
+
+/*
+ * return remote device info
+ */
+int
+clnt_devid(
+ char *hostname,
+ mdsetname_t *sp,
+ mddrivename_t *dp,
+ char **ret_encdevid,
+ md_error_t *ep
+)
+{
+ CLIENT *clntp;
+ mdrpc_devid_args *args;
+ mdrpc_devid_2_args v2_args;
+ mdrpc_devid_res res;
+ int rval;
+ int version;
+
+ /* initialize */
+ mdclrerror(ep);
+ (void) memset(&v2_args, 0, sizeof (v2_args));
+ (void) memset(&res, 0, sizeof (res));
+
+ /* build args */
+ v2_args.rev = MD_METAD_ARGS_REV_1;
+ args = &v2_args.mdrpc_devid_2_args_u.rev1;
+ args->sp = sp;
+ args->cl_sk = cl_get_setkey(sp->setno, sp->setname);
+ args->drivenamep = dp;
+
+ /* do it */
+ if (md_in_daemon && strcmp(mynode(), hostname) == 0) {
+ int bool;
+
+ /*
+ * If the server is local, we call the v2 procedure.
+ */
+ bool = mdrpc_devid_2_svc(&v2_args, &res, NULL);
+ assert(bool == TRUE);
+ (void) mdstealerror(ep, &res.status);
+ } else {
+ if ((clntp = metarpcopen(hostname, CL_LONG_TMO, ep)) == NULL)
+ return (-1);
+
+ /*
+ * Check the client handle for the version
+ */
+ CLNT_CONTROL(clntp, CLGET_VERS, (char *)&version);
+
+ /*
+ * If the client is version 1, return error
+ * otherwise, make the remote procedure call.
+ */
+ if (version == METAD_VERSION) { /* version 1 */
+ (void) mddserror(ep, MDE_DS_DRIVENOTONHOST, sp->setno,
+ hostname, dp->cname, sp->setname);
+ } else { /* version 2 */
+ rval = mdrpc_devid_2(&v2_args, &res, clntp);
+
+ if (rval != RPC_SUCCESS)
+ (void) mdrpcerror(ep, clntp, hostname,
+ dgettext(TEXT_DOMAIN, "metad devid info"));
+ else
+ (void) mdstealerror(ep, &res.status);
+ }
+
+ metarpcclose(clntp);
+ }
+
+ if (mdisok(ep)) {
+ /* do something with the results */
+ rval = 0;
+
+ if (ret_encdevid != NULL)
+ *ret_encdevid = strdup(res.enc_devid);
+
+ }
+
+ xdr_free(xdr_mdrpc_devid_res, (char *)&res);
+
+ return (rval);
+}
+
+/*
+ * Get the device information of a disk on a remote host. The information
+ * retrieved is the device's name, the associated driver and the dev_t.
+ * The lookup is performed by using the devid of the disk as this is
+ * unique to the disk. The device name on the originating node is passed
+ * in. If that devname is found when doing the devid to namelist translation
+ * then that value is used to make the device names as consistent as possible
+ * across the nodes.
+ *
+ * An attempt is made to retrieve this information by calling
+ * mdrpc_devinfo_by_devid_name_2_svc. Locally this call should always
+ * succeed. In the case where a call is made through a CLIENT handle,
+ * it is possible that the function hasn't been implemented on the called
+ * node. If this is the case fall back to mdrpc_devinfo_by_devidstr_2_svc.
+ *
+ * Returns:
+ * -1 Error
+ * ENOTSUP Operation not supported i.e. procedure not supported on
+ * the remote node
+ * 0 Success
+ */
+int
+clnt_devinfo_by_devid(
+ char *hostname,
+ mdsetname_t *sp,
+ char *devidstr,
+ md_dev64_t *ret_dev,
+ char *orig_devname,
+ char **ret_devname,
+ char **ret_driver,
+ md_error_t *ep
+)
+{
+ CLIENT *clntp;
+ mdrpc_devidstr_args devid_args;
+ mdrpc_devid_name_args *args;
+ mdrpc_devid_name_2_args v2_args;
+ mdrpc_devinfo_2_res res;
+ int rval;
+ int version;
+
+ /* initialize */
+ mdclrerror(ep);
+ (void) memset(&v2_args, 0, sizeof (v2_args));
+ (void) memset(&res, 0, sizeof (res));
+
+ /* build args */
+ v2_args.rev = MD_METAD_ARGS_REV_1;
+ args = &v2_args.mdrpc_devid_name_2_args_u.rev1;
+ args->enc_devid = devidstr;
+ args->orig_devname = orig_devname;
+ args->sp = sp;
+
+ if (md_in_daemon && strcmp(mynode(), hostname) == 0) {
+ int bool;
+
+ /*
+ * We are calling this locally so call the function
+ * directly.
+ */
+ bool = mdrpc_devinfo_by_devid_name_2_svc(&v2_args, &res, NULL);
+ assert(bool == TRUE);
+ (void) mdstealerror(ep, &res.status);
+ } else {
+
+ /* open connection */
+ if ((clntp = metarpcopen(hostname, CL_LONG_TMO, ep)) == NULL) {
+ return (-1);
+ }
+
+ CLNT_CONTROL(clntp, CLGET_VERS, (char *)&version);
+
+ if (version == METAD_VERSION) { /* Version 1 */
+ metarpcclose(clntp);
+ return (ENOTSUP);
+ }
+
+ rval = mdrpc_devinfo_by_devid_name_2(&v2_args, &res, clntp);
+
+ if (rval != RPC_SUCCESS) {
+ /* try falling back to devidstr_2_svc */
+ (void) memset(&devid_args, 0, sizeof (devid_args));
+ (void) memset(&res, 0, sizeof (res));
+
+ devid_args.enc_devid = devidstr;
+ devid_args.sp = sp;
+
+ rval = mdrpc_devinfo_by_devid_2(
+ &devid_args, &res, clntp);
+
+ if (rval != RPC_SUCCESS) {
+ (void) mdrpcerror(ep, clntp, hostname,
+ dgettext(TEXT_DOMAIN,
+ "metad devinfo by devid"));
+ } else {
+ (void) mdstealerror(ep, &res.status);
+ }
+ } else {
+ (void) mdstealerror(ep, &res.status);
+ }
+ metarpcclose(clntp);
+ }
+
+ if (mdisok(ep)) {
+ rval = 0;
+ if (ret_dev != NULL)
+ *ret_dev = res.dev;
+
+ if (ret_devname != NULL && res.devname != NULL)
+ *ret_devname = Strdup(res.devname);
+
+ if (ret_driver != NULL && res.drivername != NULL)
+ *ret_driver = Strdup(res.drivername);
+ }
+
+ xdr_free(xdr_mdrpc_devinfo_2_res, (char *)&res);
+
+ if (! mdisok(ep))
+ return (-1);
+
+ return (0);
+
+}
+
+
+/*
+ * return status of whether driver is used, mount
+ */
+int
+clnt_drvused(
+ char *hostname,
+ mdsetname_t *sp,
+ mddrivename_t *dp,
+ md_error_t *ep
+)
+{
+ CLIENT *clntp;
+ mdrpc_drvused_args v1_args;
+ mdrpc_drvused_2_args v2_args;
+ mdrpc_drvused_2_args_r1 *v21_args;
+ mdrpc_generic_res res;
+ int rval;
+ int version;
+
+ /* initialize */
+ mdclrerror(ep);
+ (void) memset(&v1_args, 0, sizeof (v1_args));
+ (void) memset(&v2_args, 0, sizeof (v2_args));
+ (void) memset(&res, 0, sizeof (res));
+
+ /* build args */
+ v2_args.rev = MD_METAD_ARGS_REV_1;
+ v21_args = &v2_args.mdrpc_drvused_2_args_u.rev1;
+ v21_args->sp = sp;
+ v21_args->cl_sk = cl_get_setkey(sp->setno, sp->setname);
+ v21_args->drivenamep = dp;
+
+ /* do it */
+ if (md_in_daemon && strcmp(mynode(), hostname) == 0) {
+ int bool;
+
+ /*
+ * If the server is local, we call the v2 procedure
+ */
+ bool = mdrpc_drvused_2_svc(&v2_args, &res, NULL);
+ assert(bool == TRUE);
+ (void) mdstealerror(ep, &res.status);
+ } else {
+ /* open connection */
+ if ((clntp = metarpcopen(hostname, CL_LONG_TMO, ep)) == NULL)
+ return (-1);
+
+ /*
+ * Check the client handle for the version
+ * and invoke the appropriate version of the
+ * remote procedure
+ */
+ CLNT_CONTROL(clntp, CLGET_VERS, (char *)&version);
+
+ if (version == METAD_VERSION) { /* version 1 */
+ v1_args.drivenamep =
+ Zalloc(sizeof (o_mddrivename_t));
+ v1_args.drivenamep->parts.parts_val =
+ Zalloc((sizeof (o_mdname_t)) *
+ dp->parts.parts_len);
+
+ /* build args */
+ v1_args.sp = sp;
+ v1_args.cl_sk = cl_get_setkey(sp->setno, sp->setname);
+
+ /* Convert v2 args to v1 args */
+ meta_conv_drvname_new2old(v1_args.drivenamep,
+ v21_args->drivenamep);
+
+ rval = mdrpc_drvused_1(&v1_args, &res, clntp);
+
+ free(v1_args.drivenamep->parts.parts_val);
+ free(v1_args.drivenamep);
+
+ if (rval != RPC_SUCCESS)
+ (void) mdrpcerror(ep, clntp, hostname,
+ dgettext(TEXT_DOMAIN, "metad drive used"));
+ else
+ (void) mdstealerror(ep, &res.status);
+ } else { /* version 2 */
+ rval = mdrpc_drvused_2(&v2_args, &res, clntp);
+ if (rval != RPC_SUCCESS)
+ (void) mdrpcerror(ep, clntp, hostname,
+ dgettext(TEXT_DOMAIN, "metad drive used"));
+ else
+ (void) mdstealerror(ep, &res.status);
+ }
+
+ metarpcclose(clntp);
+ }
+
+ xdr_free(xdr_mdrpc_generic_res, (char *)&res);
+
+ if (! mdisok(ep))
+ return (-1);
+
+ return (0);
+}
+
+void
+free_sr(md_set_record *sr)
+{
+ mdrpc_getset_res res;
+ mdrpc_mngetset_res mnres;
+
+ if (md_in_daemon)
+ return;
+
+ /*
+ * dummy up a result struct, to do a deep free of the (mn)sr.
+ * (A deep free means that the xdr_free code will free the
+ * linked list of drive records for the sr and will also free
+ * the linked list of node records for the mnsr.)
+ */
+ if (MD_MNSET_REC(sr)) {
+ (void) memset(&mnres, 0, sizeof (mnres));
+ mnres.mnsr = (struct md_mnset_record *)sr;
+ xdr_free(xdr_mdrpc_mngetset_res, (char *)&mnres);
+ } else {
+ (void) memset(&res, 0, sizeof (res));
+ res.sr = sr;
+ xdr_free(xdr_mdrpc_getset_res, (char *)&res);
+ }
+}
+
+void
+short_circuit_getset(
+ mdrpc_getset_args *args,
+ mdrpc_getset_res *res
+)
+{
+ if (args->setname != NULL)
+ res->sr = metad_getsetbyname(args->setname, &res->status);
+ else
+ res->sr = metad_getsetbynum(args->setno, &res->status);
+}
+
+void
+short_circuit_mngetset(
+ mdrpc_getset_args *args,
+ mdrpc_mngetset_res *res
+)
+{
+ md_set_record *sr;
+ if (args->setname != NULL)
+ sr = metad_getsetbyname(args->setname, &res->status);
+ else
+ sr = metad_getsetbynum(args->setno, &res->status);
+
+ if (MD_MNSET_REC(sr)) {
+ res->mnsr = (struct md_mnset_record *)sr;
+ } else {
+ res->mnsr = NULL;
+ }
+}
+
+static int
+is_auto_take_set(char *setname, set_t setno)
+{
+ if (setname != NULL)
+ return (metad_isautotakebyname(setname));
+ else
+ return (metad_isautotakebynum(setno));
+}
+
+/*
+ * return the diskset record, and drive records.
+ * If record is a MNdiskset record, then only the first md_set_record
+ * bytes were copied from the daemon.
+ */
+int
+clnt_getset(
+ char *hostname,
+ char *setname,
+ set_t setno,
+ md_set_record **ret_sr,
+ md_error_t *ep
+)
+{
+ CLIENT *clntp;
+ mdrpc_getset_args *args;
+ mdrpc_getset_2_args v2_args;
+ mdrpc_getset_res res;
+ int rval = -1;
+ int version;
+
+ /* initialize */
+ mdclrerror(ep);
+ (void) memset(&v2_args, 0, sizeof (v2_args));
+ (void) memset(&res, 0, sizeof (res));
+
+ /* build args */
+ v2_args.rev = MD_METAD_ARGS_REV_1;
+ args = &v2_args.mdrpc_getset_2_args_u.rev1;
+ args->setname = setname;
+ args->setno = setno;
+
+ /* do it */
+ if (md_in_daemon && strcmp(mynode(), hostname) == 0) {
+ short_circuit_getset(args, &res);
+ (void) mdstealerror(ep, &res.status);
+ } else {
+ if ((clntp = metarpcopen(hostname, CL_LONG_TMO, ep)) == NULL) {
+ /*
+ * This has to work during the boot up before the rpc.metad can
+ * run. Check to see if we can handle this as a strictly local
+ * diskset.
+ */
+ if (is_auto_take_set(setname, setno)) {
+ mdclrerror(ep);
+ short_circuit_getset(args, &res);
+ res.sr = setdup(res.sr);
+ (void) mdstealerror(ep, &res.status);
+ } else {
+ return (-1);
+ }
+ } else {
+
+ /*
+ * Check the client handle for the version
+ * and invoke the appropriate version of the
+ * remote procedure
+ */
+ CLNT_CONTROL(clntp, CLGET_VERS, (char *)&version);
+
+ if (version == METAD_VERSION) { /* version 1 */
+ if (mdrpc_getset_1(args, &res, clntp) != RPC_SUCCESS)
+ (void) mdrpcerror(ep, clntp, hostname,
+ dgettext(TEXT_DOMAIN, "metad get set"));
+ else
+ (void) mdstealerror(ep, &res.status);
+ } else {
+ if (mdrpc_getset_2(&v2_args, &res, clntp) !=
+ RPC_SUCCESS)
+ (void) mdrpcerror(ep, clntp, hostname,
+ dgettext(TEXT_DOMAIN, "metad get set"));
+ else
+ (void) mdstealerror(ep, &res.status);
+ }
+
+ metarpcclose(clntp);
+ }
+ }
+
+ if (mdisok(ep)) {
+ rval = 0;
+ if (ret_sr != NULL)
+ *ret_sr = res.sr;
+ else
+ if (! md_in_daemon)
+ xdr_free(xdr_mdrpc_getset_res, (char *)&res);
+ }
+
+ return (rval);
+}
+
+/*
+ * return the multi-node diskset record, drive records and node records.
+ */
+clnt_mngetset(
+ char *hostname,
+ char *setname,
+ set_t setno,
+ md_mnset_record **ret_mnsr,
+ md_error_t *ep
+)
+{
+ CLIENT *clntp;
+ mdrpc_getset_args *args;
+ mdrpc_getset_2_args v2_args;
+ mdrpc_mngetset_res res;
+ int rval = -1;
+ int version;
+
+ /* initialize */
+ mdclrerror(ep);
+ (void) memset(&v2_args, 0, sizeof (v2_args));
+ (void) memset(&res, 0, sizeof (res));
+
+ /* build args */
+ v2_args.rev = MD_METAD_ARGS_REV_1;
+ args = &v2_args.mdrpc_getset_2_args_u.rev1;
+ args->setname = setname;
+ args->setno = setno;
+
+ /* do it */
+ if (md_in_daemon && strcmp(mynode(), hostname) == 0) {
+ short_circuit_mngetset(args, &res);
+ (void) mdstealerror(ep, &res.status);
+ } else {
+ if ((clntp = metarpcopen(hostname, CL_LONG_TMO, ep)) == NULL)
+ return (-1);
+
+ /*
+ * Check the client handle for the version
+ */
+ CLNT_CONTROL(clntp, CLGET_VERS, (char *)&version);
+
+ /*
+ * If the client is version 1, return error
+ * otherwise, make the remote procedure call.
+ */
+ if (version == METAD_VERSION) { /* version 1 */
+ (void) mddserror(ep, MDE_DS_RPCVERSMISMATCH,
+ setno, hostname, NULL, setname);
+ metarpcclose(clntp);
+ return (-1);
+ } else {
+ if (mdrpc_mngetset_2(&v2_args, &res, clntp)
+ != RPC_SUCCESS)
+ (void) mdrpcerror(ep, clntp, hostname,
+ dgettext(TEXT_DOMAIN, "metad mn get set"));
+ else
+ (void) mdstealerror(ep, &res.status);
+ }
+
+ metarpcclose(clntp);
+ }
+
+ /* If no ep error and no version mismatch - rpc call worked ok */
+ if (mdisok(ep)) {
+ rval = 0;
+ if (ret_mnsr != NULL)
+ *ret_mnsr = res.mnsr;
+ else
+ if (! md_in_daemon)
+ xdr_free(xdr_mdrpc_mngetset_res, (char *)&res);
+ }
+
+ return (rval);
+}
+
+/*
+ * Set master nodeid and nodename in multi-node set record.
+ */
+clnt_mnsetmaster(
+ char *hostname,
+ mdsetname_t *sp,
+ md_node_nm_t master_nodenm,
+ int master_nodeid,
+ md_error_t *ep
+)
+{
+ CLIENT *clntp;
+ mdrpc_mnsetmaster_args *args;
+ mdrpc_mnsetmaster_2_args v2_args;
+ mdrpc_generic_res res;
+ int version;
+
+ /* initialize */
+ mdclrerror(ep);
+ (void) memset(&v2_args, 0, sizeof (v2_args));
+ (void) memset(&res, 0, sizeof (res));
+
+ /* build args */
+ v2_args.rev = MD_METAD_ARGS_REV_1;
+ args = &v2_args.mdrpc_mnsetmaster_2_args_u.rev1;
+ args->sp = sp;
+ args->cl_sk = cl_get_setkey(sp->setno, sp->setname);
+ (void) strlcpy(args->master_nodenm, master_nodenm, MD_MAX_NODENAME);
+ args->master_nodeid = master_nodeid;
+
+ /* do it */
+ if ((clntp = metarpcopen(hostname, CL_LONG_TMO, ep)) == NULL)
+ return (-1);
+
+ /*
+ * Check the client handle for the version
+ */
+ CLNT_CONTROL(clntp, CLGET_VERS, (char *)&version);
+
+ /*
+ * If the client is version 1, return error
+ * otherwise, make the remote procedure call.
+ */
+ if (version == METAD_VERSION) { /* version 1 */
+ (void) mddserror(ep, MDE_DS_RPCVERSMISMATCH,
+ sp->setno, hostname, NULL, sp->setname);
+ metarpcclose(clntp);
+ return (-1);
+ } else {
+ if (mdrpc_mnsetmaster_2(&v2_args, &res, clntp) != RPC_SUCCESS)
+ (void) mdrpcerror(ep, clntp, hostname,
+ dgettext(TEXT_DOMAIN, "metad multi-owner set master"));
+ else
+ (void) mdstealerror(ep, &res.status);
+ }
+
+ metarpcclose(clntp);
+
+ xdr_free(xdr_mdrpc_generic_res, (char *)&res);
+
+ if (! mdisok(ep))
+ return (-1);
+
+ return (0);
+}
+
+/*
+ * Get the MH timeout values.
+ */
+int
+clnt_gtimeout(
+ char *hostname,
+ mdsetname_t *sp,
+ mhd_mhiargs_t *ret_mhiargs,
+ md_error_t *ep
+)
+{
+ CLIENT *clntp;
+ mdrpc_sp_args *args;
+ mdrpc_sp_2_args v2_args;
+ mdrpc_gtimeout_res res;
+ int rval = -1;
+ int version;
+
+ /* initialize */
+ mdclrerror(ep);
+ (void) memset(&v2_args, 0, sizeof (v2_args));
+ (void) memset(&res, 0, sizeof (res));
+
+ /* build args */
+ v2_args.rev = MD_METAD_ARGS_REV_1;
+ args = &v2_args.mdrpc_sp_2_args_u.rev1;
+ args->sp = sp;
+ args->cl_sk = cl_get_setkey(sp->setno, sp->setname);
+
+ /* do it */
+ if (md_in_daemon && strcmp(mynode(), hostname) == 0) {
+ int bool;
+ bool = mdrpc_gtimeout_2_svc(&v2_args, &res, NULL);
+ assert(bool == TRUE);
+ (void) mdstealerror(ep, &res.status);
+ } else {
+ if ((clntp = metarpcopen(hostname, CL_LONG_TMO, ep)) == NULL)
+ return (-1);
+
+ /*
+ * Check the client handle for the version
+ * and invoke the appropriate version of the
+ * remote procedure
+ */
+ CLNT_CONTROL(clntp, CLGET_VERS, (char *)&version);
+
+ if (version == METAD_VERSION) { /* version 1 */
+ if (mdrpc_gtimeout_1(args, &res, clntp) != RPC_SUCCESS)
+ (void) mdrpcerror(ep, clntp, hostname,
+ dgettext(TEXT_DOMAIN, "metad get timeout"));
+ else
+ (void) mdstealerror(ep, &res.status);
+ } else {
+ if (mdrpc_gtimeout_2(&v2_args, &res, clntp) !=
+ RPC_SUCCESS)
+ (void) mdrpcerror(ep, clntp, hostname,
+ dgettext(TEXT_DOMAIN, "metad get timeout"));
+ else
+ (void) mdstealerror(ep, &res.status);
+ }
+
+ metarpcclose(clntp);
+ }
+
+ if (mdisok(ep)) {
+
+ /* do something with the results */
+ rval = 0;
+
+ /* copy md_mhiargs_t */
+ if (ret_mhiargs != NULL)
+ *ret_mhiargs = *res.mhiargsp;
+ }
+
+ xdr_free(xdr_mdrpc_gtimeout_res, (char *)&res);
+
+ return (rval);
+}
+
+/*
+ * get real hostname from remote host
+ */
+int
+clnt_hostname(
+ char *hostname,
+ char **ret_hostname,
+ md_error_t *ep
+)
+{
+ CLIENT *clntp;
+ mdrpc_null_args args;
+ mdrpc_hostname_res res;
+ int rval = -1;
+
+ /* initialize */
+ mdclrerror(ep);
+ (void) memset(&args, 0, sizeof (args));
+ (void) memset(&res, 0, sizeof (res));
+
+ /* build args */
+ args.cl_sk = NULL;
+
+ /* do it */
+ if (md_in_daemon && strcmp(mynode(), hostname) == 0) {
+ int bool;
+ bool = mdrpc_hostname_1_svc(&args, &res, NULL);
+ assert(bool == TRUE);
+ (void) mdstealerror(ep, &res.status);
+ } else {
+ if ((clntp = metarpcopen(hostname, CL_LONG_TMO, ep)) == NULL)
+ return (-1);
+
+ if (mdrpc_hostname_1(&args, &res, clntp) != RPC_SUCCESS)
+ (void) mdrpcerror(ep, clntp, hostname,
+ dgettext(TEXT_DOMAIN, "metad hostname"));
+ else
+ (void) mdstealerror(ep, &res.status);
+
+ metarpcclose(clntp);
+ }
+
+ if (mdisok(ep)) {
+ /* do something with the results */
+ rval = 0;
+
+ if (ret_hostname != NULL)
+ *ret_hostname = Strdup(res.hostname);
+ }
+
+ xdr_free(xdr_mdrpc_hostname_res, (char *)&res);
+
+ return (rval);
+}
+
+/*
+ * NULLPROC - just returns a response
+ */
+int
+clnt_nullproc(
+ char *hostname,
+ md_error_t *ep
+)
+{
+ CLIENT *clntp;
+
+ /* initialize */
+ mdclrerror(ep);
+
+ /* do it */
+ if (md_in_daemon && strcmp(mynode(), hostname) == 0) {
+ int bool;
+ bool = mdrpc_nullproc_1_svc(NULL, ep, NULL);
+ assert(bool == TRUE);
+ } else {
+ if ((clntp = metarpcopen(hostname, CL_DEF_TMO, ep)) == NULL)
+ return (-1);
+
+ if (mdrpc_nullproc_1(NULL, ep, clntp) != RPC_SUCCESS)
+ (void) mdrpcerror(ep, clntp, hostname,
+ dgettext(TEXT_DOMAIN, "metad nullproc"));
+
+ metarpcclose(clntp);
+ }
+
+ if (! mdisok(ep))
+ return (-1);
+
+ return (0);
+}
+
+/*
+ * does host own the set?
+ */
+int
+clnt_ownset(
+ char *hostname,
+ mdsetname_t *sp,
+ int *ret_bool,
+ md_error_t *ep
+)
+{
+ CLIENT *clntp;
+ mdrpc_sp_args *args;
+ mdrpc_sp_2_args v2_args;
+ mdrpc_bool_res res;
+ int rval = -1;
+ int version;
+
+ /* initialize */
+ mdclrerror(ep);
+ (void) memset(&v2_args, 0, sizeof (v2_args));
+ (void) memset(&res, 0, sizeof (res));
+
+ /* build args */
+ v2_args.rev = MD_METAD_ARGS_REV_1;
+ args = &v2_args.mdrpc_sp_2_args_u.rev1;
+ args->sp = sp;
+ args->cl_sk = cl_get_setkey(sp->setno, sp->setname);
+
+ /* do it */
+ if (md_in_daemon && strcmp(mynode(), hostname) == 0) {
+ int bool;
+ bool = mdrpc_ownset_2_svc(&v2_args, &res, NULL);
+ assert(bool == TRUE);
+ (void) mdstealerror(ep, &res.status);
+ } else {
+ if ((clntp = metarpcopen(hostname, CL_LONG_TMO, ep)) == NULL) {
+ /*
+ * This has to work in the code path from libpreen which is
+ * running within fsck before the rpc.metad can run. Check
+ * to see if we should handle this as an auto-take diskset.
+ */
+ if (is_auto_take_set(sp->setname, sp->setno)) {
+ /* Can't call mdrpc_ownset_2_svc since not in daemon */
+ mdclrerror(ep);
+ if (s_ownset(sp->setno, ep))
+ res.value = TRUE;
+ else
+ res.value = FALSE;
+ } else {
+ return (-1);
+ }
+
+ } else {
+
+ /*
+ * Check the client handle for the version
+ * and invoke the appropriate version of the
+ * remote procedure
+ */
+ CLNT_CONTROL(clntp, CLGET_VERS, (char *)&version);
+
+ if (version == METAD_VERSION) { /* version 1 */
+ if (mdrpc_ownset_1(args, &res, clntp) != RPC_SUCCESS)
+ (void) mdrpcerror(ep, clntp, hostname,
+ dgettext(TEXT_DOMAIN, "metad own set"));
+ else
+ (void) mdstealerror(ep, &res.status);
+ } else {
+ if (mdrpc_ownset_2(&v2_args, &res, clntp) !=
+ RPC_SUCCESS)
+ (void) mdrpcerror(ep, clntp, hostname,
+ dgettext(TEXT_DOMAIN, "metad own set"));
+ else
+ (void) mdstealerror(ep, &res.status);
+ }
+
+ metarpcclose(clntp);
+ }
+ }
+
+ if (mdisok(ep)) {
+ /* do something with the results */
+ rval = 0;
+
+ if (ret_bool != NULL)
+ *ret_bool = res.value;
+ }
+
+ xdr_free(xdr_mdrpc_bool_res, (char *)&res);
+
+ return (rval);
+}
+
+/*
+ * Valid set name.
+ */
+int
+clnt_setnameok(
+ char *hostname,
+ mdsetname_t *sp,
+ int *ret_bool,
+ md_error_t *ep
+)
+{
+ CLIENT *clntp;
+ mdrpc_sp_args *args;
+ mdrpc_sp_2_args v2_args;
+ mdrpc_bool_res res;
+ int rval = -1;
+ int version;
+
+ /* initialize */
+ mdclrerror(ep);
+ (void) memset(&v2_args, 0, sizeof (v2_args));
+ (void) memset(&res, 0, sizeof (res));
+
+ /* build args */
+ v2_args.rev = MD_METAD_ARGS_REV_1;
+ args = &v2_args.mdrpc_sp_2_args_u.rev1;
+ args->sp = sp;
+ args->cl_sk = cl_get_setkey(sp->setno, sp->setname);
+
+ /* do it */
+ if (md_in_daemon && strcmp(mynode(), hostname) == 0) {
+ int bool;
+ bool = mdrpc_setnameok_2_svc(&v2_args, &res, NULL);
+ assert(bool == TRUE);
+ (void) mdstealerror(ep, &res.status);
+ } else {
+ if ((clntp = metarpcopen(hostname, CL_LONG_TMO, ep)) == NULL)
+ return (-1);
+
+ /*
+ * Check the client handle for the version
+ * and invoke the appropriate version of the
+ * remote procedure
+ */
+ CLNT_CONTROL(clntp, CLGET_VERS, (char *)&version);
+
+ if (version == METAD_VERSION) { /* version 1 */
+ if (mdrpc_setnameok_1(args, &res, clntp) != RPC_SUCCESS)
+ (void) mdrpcerror(ep, clntp, hostname,
+ dgettext(TEXT_DOMAIN, "metad setname ok"));
+ else
+ (void) mdstealerror(ep, &res.status);
+ } else {
+ if (mdrpc_setnameok_2(&v2_args, &res, clntp) !=
+ RPC_SUCCESS)
+ (void) mdrpcerror(ep, clntp, hostname,
+ dgettext(TEXT_DOMAIN, "metad setname ok"));
+ else
+ (void) mdstealerror(ep, &res.status);
+ }
+
+ metarpcclose(clntp);
+ }
+
+ if (mdisok(ep)) {
+ /* do something with the results */
+ rval = 0;
+
+ if (ret_bool != NULL)
+ *ret_bool = res.value;
+ }
+
+ xdr_free(xdr_mdrpc_bool_res, (char *)&res);
+
+ return (rval);
+}
+
+/*
+ * Is set number in-use?
+ */
+int
+clnt_setnumbusy(
+ char *hostname,
+ set_t setno,
+ int *ret_bool,
+ md_error_t *ep
+)
+{
+ CLIENT *clntp;
+ mdrpc_setno_args *args;
+ mdrpc_setno_2_args v2_args;
+ mdrpc_bool_res res;
+ int rval = -1;
+ int version;
+
+ /* initialize */
+ mdclrerror(ep);
+ (void) memset(&v2_args, 0, sizeof (v2_args));
+ (void) memset(&res, 0, sizeof (res));
+
+ /* build args */
+ v2_args.rev = MD_METAD_ARGS_REV_1;
+ args = &v2_args.mdrpc_setno_2_args_u.rev1;
+ args->setno = setno;
+ args->cl_sk = NULL;
+
+ /* do it */
+ if (md_in_daemon && strcmp(mynode(), hostname) == 0) {
+ int bool;
+ bool = mdrpc_setnumbusy_2_svc(&v2_args, &res, NULL);
+ assert(bool == TRUE);
+ (void) mdstealerror(ep, &res.status);
+ } else {
+ if ((clntp = metarpcopen(hostname, CL_LONG_TMO, ep)) == NULL)
+ return (-1);
+
+ /*
+ * Check the client handle for the version
+ * and invoke the appropriate version of the
+ * remote procedure
+ */
+ CLNT_CONTROL(clntp, CLGET_VERS, (char *)&version);
+
+ if (version == METAD_VERSION) { /* version 1 */
+ if (mdrpc_setnumbusy_1(args, &res, clntp) !=
+ RPC_SUCCESS)
+ (void) mdrpcerror(ep, clntp, hostname,
+ dgettext(TEXT_DOMAIN, "metad setnumber busy"));
+ else
+ (void) mdstealerror(ep, &res.status);
+ } else {
+ if (mdrpc_setnumbusy_2(&v2_args, &res, clntp) !=
+ RPC_SUCCESS)
+ (void) mdrpcerror(ep, clntp, hostname,
+ dgettext(TEXT_DOMAIN, "metad setnumber busy"));
+ else
+ (void) mdstealerror(ep, &res.status);
+ }
+
+ metarpcclose(clntp);
+ }
+
+ if (mdisok(ep)) {
+ /* do something with the results */
+ rval = 0;
+
+ if (ret_bool != NULL)
+ *ret_bool = res.value;
+ }
+
+ xdr_free(xdr_mdrpc_bool_res, (char *)&res);
+
+ return (rval);
+}
+
+/*
+ * Set the timeout values used into the drive records.
+ */
+int
+clnt_stimeout(
+ char *hostname,
+ mdsetname_t *sp,
+ mhd_mhiargs_t *mhiargsp,
+ md_error_t *ep
+)
+{
+ CLIENT *clntp;
+ mdrpc_stimeout_args *args;
+ mdrpc_stimeout_2_args v2_args;
+ mdrpc_generic_res res;
+ int version;
+
+ /* initialize */
+ mdclrerror(ep);
+ (void) memset(&v2_args, 0, sizeof (v2_args));
+ (void) memset(&res, 0, sizeof (res));
+
+ /* build args */
+ v2_args.rev = MD_METAD_ARGS_REV_1;
+ args = &v2_args.mdrpc_stimeout_2_args_u.rev1;
+ args->sp = sp;
+ args->cl_sk = cl_get_setkey(sp->setno, sp->setname);
+ args->mhiargsp = mhiargsp;
+
+ /* do it */
+ if (md_in_daemon && strcmp(mynode(), hostname) == 0) {
+ int bool;
+ bool = mdrpc_stimeout_2_svc(&v2_args, &res, NULL);
+ assert(bool == TRUE);
+ (void) mdstealerror(ep, &res.status);
+ } else {
+ if ((clntp = metarpcopen(hostname, CL_LONG_TMO, ep)) == NULL)
+ return (-1);
+
+ /*
+ * Check the client handle for the version
+ * and invoke the appropriate version of the
+ * remote procedure
+ */
+ CLNT_CONTROL(clntp, CLGET_VERS, (char *)&version);
+
+ if (version == METAD_VERSION) { /* version 1 */
+ if (mdrpc_stimeout_1(args, &res, clntp) != RPC_SUCCESS)
+ (void) mdrpcerror(ep, clntp, hostname,
+ dgettext(TEXT_DOMAIN, "metad set timeout"));
+ else
+ (void) mdstealerror(ep, &res.status);
+ } else {
+ if (mdrpc_stimeout_2(&v2_args, &res, clntp) !=
+ RPC_SUCCESS)
+ (void) mdrpcerror(ep, clntp, hostname,
+ dgettext(TEXT_DOMAIN, "metad set timeout"));
+ else
+ (void) mdstealerror(ep, &res.status);
+ }
+
+ metarpcclose(clntp);
+ }
+
+ xdr_free(xdr_mdrpc_generic_res, (char *)&res);
+
+ if (! mdisok(ep))
+ return (-1);
+
+ return (0);
+}
+
+/*
+ * update drive records
+ */
+int
+clnt_upd_dr_dbinfo(
+ char *hostname,
+ mdsetname_t *sp,
+ md_drive_desc *dd,
+ md_error_t *ep
+)
+{
+ CLIENT *clntp;
+ mdrpc_drives_args v1_args;
+ mdrpc_drives_2_args v2_args;
+ mdrpc_drives_2_args_r1 *v21_args;
+ mdrpc_generic_res res;
+ int rval;
+ int version;
+
+ /* initialize */
+ mdclrerror(ep);
+ (void) memset(&v1_args, 0, sizeof (v1_args));
+ (void) memset(&v2_args, 0, sizeof (v2_args));
+ (void) memset(&res, 0, sizeof (res));
+
+ /* build args */
+ v2_args.rev = MD_METAD_ARGS_REV_1;
+ v21_args = &v2_args.mdrpc_drives_2_args_u.rev1;
+ v21_args->sp = sp;
+ v21_args->cl_sk = cl_get_setkey(sp->setno, sp->setname);
+ v21_args->drivedescs = dd;
+
+ /* do it */
+ if (md_in_daemon && strcmp(mynode(), hostname) == 0) {
+ int bool;
+
+ /*
+ * If the server is local, we call the v2 procedure
+ */
+ bool = mdrpc_upd_dr_dbinfo_2_svc(&v2_args, &res, NULL);
+ assert(bool == TRUE);
+ (void) mdstealerror(ep, &res.status);
+ } else {
+ if ((clntp = metarpcopen(hostname, CL_LONG_TMO, ep)) == NULL)
+ return (-1);
+
+ /*
+ * Check the client handle for the version
+ * and invoke the appropriate version of the
+ * remote procedure
+ */
+ CLNT_CONTROL(clntp, CLGET_VERS, (char *)&version);
+
+ if (version == METAD_VERSION) { /* version 1 */
+
+ alloc_olddrvdesc(&v1_args.drivedescs, dd);
+
+ /* build args */
+ v1_args.sp = sp;
+ v1_args.cl_sk = cl_get_setkey(sp->setno, sp->setname);
+ meta_conv_drvdesc_new2old(v1_args.drivedescs, dd);
+
+ rval = mdrpc_upd_dr_dbinfo_1(&v1_args, &res, clntp);
+
+ free_olddrvdesc(v1_args.drivedescs);
+
+ if (rval != RPC_SUCCESS)
+ (void) mdrpcerror(ep, clntp, hostname,
+ dgettext(TEXT_DOMAIN,
+ "metad update drive dbinfo"));
+ else
+ (void) mdstealerror(ep, &res.status);
+ } else { /* version 2 */
+ rval = mdrpc_upd_dr_dbinfo_2(&v2_args, &res, clntp);
+
+ if (rval != RPC_SUCCESS)
+ (void) mdrpcerror(ep, clntp, hostname,
+ dgettext(TEXT_DOMAIN,
+ "metad update drive dbinfo"));
+ else
+ (void) mdstealerror(ep, &res.status);
+ }
+
+ metarpcclose(clntp);
+ }
+
+ xdr_free(xdr_mdrpc_generic_res, (char *)&res);
+
+ if (! mdisok(ep))
+ return (-1);
+
+ return (0);
+}
+
+/*
+ * update dr_flags field of drive record.
+ */
+int
+clnt_upd_dr_flags(
+ char *hostname,
+ mdsetname_t *sp,
+ md_drive_desc *dd,
+ uint_t new_flags,
+ md_error_t *ep
+)
+{
+ CLIENT *clntp;
+ mdrpc_upd_dr_flags_args v1_args;
+ mdrpc_upd_dr_flags_2_args v2_args;
+ mdrpc_upd_dr_flags_2_args_r1 *v21_args;
+ mdrpc_generic_res res;
+ int rval;
+ int version;
+
+ /* initialize */
+ mdclrerror(ep);
+ (void) memset(&v1_args, 0, sizeof (v1_args));
+ (void) memset(&v2_args, 0, sizeof (v2_args));
+ (void) memset(&res, 0, sizeof (res));
+
+ /* build args */
+ v2_args.rev = MD_METAD_ARGS_REV_1;
+ v21_args = &v2_args.mdrpc_upd_dr_flags_2_args_u.rev1;
+ v21_args->sp = sp;
+ v21_args->cl_sk = cl_get_setkey(sp->setno, sp->setname);
+ v21_args->drivedescs = dd;
+ v21_args->new_flags = new_flags;
+
+ /* do it */
+ if (md_in_daemon && strcmp(mynode(), hostname) == 0) {
+ int bool;
+
+ /*
+ * If the server is local, we call the v2 procedure
+ */
+ bool = mdrpc_upd_dr_flags_2_svc(&v2_args, &res, NULL);
+ assert(bool == TRUE);
+ (void) mdstealerror(ep, &res.status);
+ } else {
+ if ((clntp = metarpcopen(hostname, CL_LONG_TMO, ep)) == NULL)
+ return (-1);
+
+ /*
+ * Check the client handle for the version
+ * and invoke the appropriate version of the
+ * remote procedure
+ */
+ CLNT_CONTROL(clntp, CLGET_VERS, (char *)&version);
+
+ if (version == METAD_VERSION) { /* version 1 */
+
+ alloc_olddrvdesc(&v1_args.drivedescs, dd);
+
+ /* build args */
+ v1_args.sp = sp;
+ v1_args.cl_sk = cl_get_setkey(sp->setno, sp->setname);
+ meta_conv_drvdesc_new2old(v1_args.drivedescs, dd);
+ v1_args.new_flags = new_flags;
+
+ rval = mdrpc_upd_dr_flags_1(&v1_args, &res, clntp);
+
+ free_olddrvdesc(v1_args.drivedescs);
+
+ if (rval != RPC_SUCCESS)
+ (void) mdrpcerror(ep, clntp, hostname,
+ dgettext(TEXT_DOMAIN,
+ "metad update drive flags"));
+ else
+ (void) mdstealerror(ep, &res.status);
+ } else { /* version 2 */
+ rval = mdrpc_upd_dr_flags_2(&v2_args, &res, clntp);
+
+ if (rval != RPC_SUCCESS)
+ (void) mdrpcerror(ep, clntp, hostname,
+ dgettext(TEXT_DOMAIN,
+ "metad update drive flags"));
+ else
+ (void) mdstealerror(ep, &res.status);
+ }
+
+ metarpcclose(clntp);
+ }
+
+ xdr_free(xdr_mdrpc_generic_res, (char *)&res);
+
+ if (! mdisok(ep)) {
+ if (! mdanyrpcerror(ep))
+ return (-1);
+ if (strcmp(mynode(), hostname) == 0)
+ return (-1);
+ mdclrerror(ep);
+ }
+
+ return (0);
+}
+
+/*
+ * update set record flags
+ * This replaces all of the sr_flags with the new_flags. It relies on the
+ * caller to "do the right thing" to preserve the existing flags that should
+ * not be reset.
+ */
+static int
+upd_sr_flags_common(
+ char *hostname,
+ mdsetname_t *sp,
+ uint_t new_flags,
+ md_error_t *ep
+)
+{
+ CLIENT *clntp;
+ mdrpc_upd_sr_flags_args *args;
+ mdrpc_upd_sr_flags_2_args v2_args;
+ mdrpc_generic_res res;
+ int version;
+
+ /* initialize */
+ mdclrerror(ep);
+ (void) memset(&v2_args, 0, sizeof (v2_args));
+ (void) memset(&res, 0, sizeof (res));
+
+ /* build args */
+ v2_args.rev = MD_METAD_ARGS_REV_1;
+ args = &v2_args.mdrpc_upd_sr_flags_2_args_u.rev1;
+ args->sp = sp;
+ args->cl_sk = cl_get_setkey(sp->setno, sp->setname);
+
+ args->new_flags = new_flags;
+
+ /* do it */
+ if (md_in_daemon && strcmp(mynode(), hostname) == 0) {
+ int bool;
+ bool = mdrpc_upd_sr_flags_2_svc(&v2_args, &res, NULL);
+ assert(bool == TRUE);
+ (void) mdstealerror(ep, &res.status);
+ } else {
+ if ((clntp = metarpcopen(hostname, CL_LONG_TMO, ep)) == NULL)
+ return (-1);
+
+ /*
+ * Check the client handle for the version
+ * and invoke the appropriate version of the
+ * remote procedure
+ */
+ CLNT_CONTROL(clntp, CLGET_VERS, (char *)&version);
+
+ if (version == METAD_VERSION) { /* version 1 */
+ if (mdrpc_upd_sr_flags_1(args, &res, clntp) !=
+ RPC_SUCCESS)
+ (void) mdrpcerror(ep, clntp, hostname,
+ dgettext(TEXT_DOMAIN, "metad update set flags"));
+ else
+ (void) mdstealerror(ep, &res.status);
+ } else {
+ if (mdrpc_upd_sr_flags_2(&v2_args, &res, clntp) !=
+ RPC_SUCCESS)
+ (void) mdrpcerror(ep, clntp, hostname,
+ dgettext(TEXT_DOMAIN, "metad update set flags"));
+ else
+ (void) mdstealerror(ep, &res.status);
+ }
+
+ metarpcclose(clntp);
+ }
+
+ xdr_free(xdr_mdrpc_generic_res, (char *)&res);
+
+ if (! mdisok(ep)) {
+ if (! mdanyrpcerror(ep))
+ return (-1);
+ if (strcmp(mynode(), hostname) == 0)
+ return (-1);
+ mdclrerror(ep);
+ }
+
+ return (0);
+}
+
+/*
+ * Enable bits in the set record flags field. This just turns on the specified
+ * bits and leaves the other bits alone.
+ */
+int
+clnt_enable_sr_flags(
+ char *hostname,
+ mdsetname_t *sp,
+ uint_t flags,
+ md_error_t *ep
+)
+{
+ uint_t new_flags;
+ md_set_desc *sd;
+
+ mdclrerror(ep);
+
+ /* Get the flags from the current set */
+ if ((sd = metaget_setdesc(sp, ep)) == NULL)
+ return (-1);
+
+ /* Turn on the specified bits */
+ new_flags = (sd->sd_flags | flags);
+
+ /* do it */
+ return (upd_sr_flags_common(hostname, sp, new_flags, ep));
+}
+
+/*
+ * Disable bits in the set record flags field. This just turns off the
+ * specified bits and leaves the other bits alone.
+ */
+int
+clnt_disable_sr_flags(
+ char *hostname,
+ mdsetname_t *sp,
+ uint_t flags,
+ md_error_t *ep
+)
+{
+ uint_t new_flags;
+ md_set_desc *sd;
+
+ mdclrerror(ep);
+
+ /* Get the flags from the current set */
+ if ((sd = metaget_setdesc(sp, ep)) == NULL)
+ return (-1);
+
+ /* Turn off the specified bits */
+ new_flags = (sd->sd_flags & ~flags);
+
+ /* do it */
+ return (upd_sr_flags_common(hostname, sp, new_flags, ep));
+}
+
+/*
+ * Assign the flags as the new value(s) for the MD_SR_STATE_FLAGS within the
+ * set record flags field. This actually can set any bits but only clears
+ * the bits within the MD_SR_STATE_FLAGS subfield and leaves any other
+ * bits turned on. It can be used to clear (state) and set bits all in one
+ * rpc call.
+ */
+int
+clnt_upd_sr_flags(
+ char *hostname,
+ mdsetname_t *sp,
+ uint_t flags,
+ md_error_t *ep
+)
+{
+ uint_t new_flags;
+ md_set_desc *sd;
+
+ mdclrerror(ep);
+
+ /* Get the flags from the current set */
+ if ((sd = metaget_setdesc(sp, ep)) == NULL)
+ return (-1);
+
+ /* clear the existing state flags */
+ sd->sd_flags &= ~MD_SR_STATE_FLAGS;
+
+ /* Or in the new value */
+ new_flags = (sd->sd_flags | flags);
+
+ /* do it */
+ return (upd_sr_flags_common(hostname, sp, new_flags, ep));
+}
+
+md_setkey_t *
+cl_get_setkey(set_t setno, char *setname)
+{
+
+ if (my_cl_sk == NULL) {
+ my_cl_sk = Zalloc(sizeof (md_setkey_t));
+ my_cl_sk->sk_setno = setno;
+ my_cl_sk->sk_setname = Strdup(setname);
+ my_cl_sk->sk_host = Strdup(mynode());
+ } else {
+ my_cl_sk->sk_setno = setno;
+ if (my_cl_sk->sk_setname != NULL)
+ Free(my_cl_sk->sk_setname);
+ my_cl_sk->sk_setname = Strdup(setname);
+ }
+
+ return (my_cl_sk);
+}
+
+void
+cl_set_setkey(md_setkey_t *cl_sk)
+{
+ if ((cl_sk != NULL) && (my_cl_sk != NULL)) {
+ assert(my_cl_sk->sk_setno == cl_sk->sk_setno);
+ assert(strcmp(my_cl_sk->sk_setname, cl_sk->sk_setname) == 0);
+ assert(strcmp(my_cl_sk->sk_host, cl_sk->sk_host) == 0);
+ my_cl_sk->sk_key = cl_sk->sk_key;
+ return;
+ }
+
+ if (my_cl_sk != NULL) {
+ if (my_cl_sk->sk_setname != NULL)
+ Free(my_cl_sk->sk_setname);
+ if (my_cl_sk->sk_host != NULL)
+ Free(my_cl_sk->sk_host);
+ Free(my_cl_sk);
+ }
+
+ my_cl_sk = NULL;
+
+ /* get here, if set called before get */
+ if (cl_sk != NULL) {
+ my_cl_sk = Zalloc(sizeof (md_setkey_t));
+ my_cl_sk->sk_host = Strdup(cl_sk->sk_host);
+ my_cl_sk->sk_setno = cl_sk->sk_setno;
+ my_cl_sk->sk_setname = Strdup(cl_sk->sk_setname);
+ my_cl_sk->sk_key = cl_sk->sk_key;
+ }
+}
+
+/*
+ * Unlock the set after operation is complete.
+ */
+int
+clnt_unlock_set(
+ char *hostname,
+ md_setkey_t *cl_sk,
+ md_error_t *ep
+)
+{
+ CLIENT *clntp;
+ mdrpc_null_args args;
+ mdrpc_setlock_res res;
+
+ /* initialize */
+ mdclrerror(ep);
+ (void) memset(&args, 0, sizeof (args));
+ (void) memset(&res, 0, sizeof (res));
+
+ /* build args */
+ args.cl_sk = cl_sk;
+
+ /* do it */
+ if (md_in_daemon && strcmp(mynode(), hostname) == 0) {
+ int bool;
+ bool = mdrpc_unlock_set_1_svc(&args, &res, NULL);
+ assert(bool == TRUE);
+ (void) mdstealerror(ep, &res.status);
+ } else {
+ if ((clntp = metarpcopen(hostname, CL_LONG_TMO, ep)) == NULL)
+ return (-1);
+
+ if (mdrpc_unlock_set_1(&args, &res, clntp) != RPC_SUCCESS)
+ (void) mdrpcerror(ep, clntp, hostname,
+ dgettext(TEXT_DOMAIN, "metad unlock set"));
+ else
+ (void) mdstealerror(ep, &res.status);
+
+ metarpcclose(clntp);
+ }
+
+ xdr_free(xdr_mdrpc_setlock_res, (char *)&res);
+
+ if (! mdisok(ep)) {
+ if (! mdanyrpcerror(ep))
+ return (-1);
+ if (strcmp(mynode(), hostname) == 0)
+ return (-1);
+ mdclrerror(ep);
+ }
+
+ return (0);
+}
+
+/*
+ * Lock set so that only operators with valid keys are allowed in the daemon.
+ */
+int
+clnt_lock_set(
+ char *hostname,
+ mdsetname_t *sp,
+ md_error_t *ep
+)
+{
+ CLIENT *clntp;
+ mdrpc_null_args args;
+ mdrpc_setlock_res res;
+
+ /* initialize */
+ mdclrerror(ep);
+ (void) memset(&args, 0, sizeof (args));
+ (void) memset(&res, 0, sizeof (res));
+
+ /* build args */
+ args.cl_sk = cl_get_setkey(sp->setno, sp->setname);
+
+ /* do it */
+ if (md_in_daemon && strcmp(mynode(), hostname) == 0) {
+ int bool;
+ bool = mdrpc_lock_set_1_svc(&args, &res, NULL);
+ assert(bool == TRUE);
+ (void) mdstealerror(ep, &res.status);
+ } else {
+ if ((clntp = metarpcopen(hostname, CL_LONG_TMO, ep)) == NULL)
+ return (-1);
+
+ if (mdrpc_lock_set_1(&args, &res, clntp) != RPC_SUCCESS)
+ (void) mdrpcerror(ep, clntp, hostname,
+ dgettext(TEXT_DOMAIN, "metad lock set"));
+ else
+ (void) mdstealerror(ep, &res.status);
+
+ metarpcclose(clntp);
+ }
+
+ if (mdisok(ep))
+ cl_set_setkey(res.cl_sk);
+
+ xdr_free(xdr_mdrpc_setlock_res, (char *)&res);
+
+ if (! mdisok(ep)) {
+ if (! mdanyrpcerror(ep))
+ return (-1);
+ if (strcmp(mynode(), hostname) == 0)
+ return (-1);
+ mdclrerror(ep);
+ }
+
+ return (0);
+}
+
+/*
+ * Add mediator hosts to disksets.
+ */
+int
+clnt_updmeds(
+ char *hostname,
+ mdsetname_t *sp,
+ md_h_arr_t *medp,
+ md_error_t *ep
+)
+{
+ CLIENT *clntp;
+ mdrpc_updmeds_args *args;
+ mdrpc_updmeds_2_args v2_args;
+ mdrpc_generic_res res;
+ int version;
+
+ /* initialize */
+ mdclrerror(ep);
+ (void) memset(&v2_args, 0, sizeof (v2_args));
+ (void) memset(&res, 0, sizeof (res));
+
+ /* build args */
+ v2_args.rev = MD_METAD_ARGS_REV_1;
+ args = &v2_args.mdrpc_updmeds_2_args_u.rev1;
+ args->sp = sp;
+ args->cl_sk = cl_get_setkey(sp->setno, sp->setname);
+ args->meds = *medp; /* structure assignment */
+
+ /* do it */
+ if (md_in_daemon && strcmp(mynode(), hostname) == 0) {
+ int bool;
+ bool = mdrpc_updmeds_2_svc(&v2_args, &res, NULL);
+ assert(bool == TRUE);
+ (void) mdstealerror(ep, &res.status);
+ } else {
+ if ((clntp = metarpcopen(hostname, CL_LONG_TMO, ep)) == NULL)
+ return (-1);
+
+ /*
+ * Check the client handle for the version
+ * and invoke the appropriate version of the
+ * remote procedure
+ */
+ CLNT_CONTROL(clntp, CLGET_VERS, (char *)&version);
+
+ if (version == METAD_VERSION) { /* version 1 */
+ if (mdrpc_updmeds_1(args, &res, clntp) != RPC_SUCCESS)
+ (void) mdrpcerror(ep, clntp, hostname,
+ dgettext(TEXT_DOMAIN, "metad add hosts"));
+ else
+ (void) mdstealerror(ep, &res.status);
+ } else {
+ if (mdrpc_updmeds_2(&v2_args, &res, clntp) !=
+ RPC_SUCCESS)
+ (void) mdrpcerror(ep, clntp, hostname,
+ dgettext(TEXT_DOMAIN, "metad add hosts"));
+ else
+ (void) mdstealerror(ep, &res.status);
+ }
+
+ metarpcclose(clntp);
+ }
+
+ xdr_free(xdr_mdrpc_generic_res, (char *)&res);
+
+ if (! mdisok(ep))
+ return (-1);
+
+ return (0);
+}
+
+/*
+ * update nr_flags field of node records based
+ * on given action.
+ */
+int
+clnt_upd_nr_flags(
+ char *hostname,
+ mdsetname_t *sp,
+ md_mnnode_desc *nd,
+ uint_t flag_action,
+ uint_t flags,
+ md_error_t *ep
+)
+{
+ CLIENT *clntp;
+ mdrpc_upd_nr_flags_args *args;
+ mdrpc_upd_nr_flags_2_args v2_args;
+ mdrpc_generic_res res;
+ int version;
+
+ /* initialize */
+ mdclrerror(ep);
+ (void) memset(&v2_args, 0, sizeof (v2_args));
+ (void) memset(&res, 0, sizeof (res));
+
+ /* build args */
+ v2_args.rev = MD_METAD_ARGS_REV_1;
+ args = &v2_args.mdrpc_upd_nr_flags_2_args_u.rev1;
+ args->sp = sp;
+ args->cl_sk = cl_get_setkey(sp->setno, sp->setname);
+ args->nodedescs = nd;
+ args->flag_action = flag_action;
+ args->flags = flags;
+
+ /* do it */
+ if (md_in_daemon && strcmp(mynode(), hostname) == 0) {
+ int bool;
+ bool = mdrpc_upd_nr_flags_2_svc(&v2_args, &res, NULL);
+ assert(bool == TRUE);
+ (void) mdstealerror(ep, &res.status);
+ } else {
+ if ((clntp = metarpcopen(hostname, CL_LONG_TMO, ep)) == NULL)
+ return (-1);
+
+ /*
+ * Check the client handle for the version
+ */
+ CLNT_CONTROL(clntp, CLGET_VERS, (char *)&version);
+
+ /*
+ * If the client is version 1, return error
+ * otherwise, make the remote procedure call.
+ */
+ if (version == METAD_VERSION) { /* version 1 */
+ (void) mddserror(ep, MDE_DS_RPCVERSMISMATCH,
+ sp->setno, hostname, NULL, sp->setname);
+ metarpcclose(clntp);
+ return (-1);
+ } else {
+ if (mdrpc_upd_nr_flags_2(&v2_args, &res, clntp)
+ != RPC_SUCCESS)
+ (void) mdrpcerror(ep, clntp, hostname,
+ dgettext(TEXT_DOMAIN,
+ "metad set node flags"));
+ else
+ (void) mdstealerror(ep, &res.status);
+ }
+
+ metarpcclose(clntp);
+ }
+
+ xdr_free(xdr_mdrpc_generic_res, (char *)&res);
+
+ if (! mdisok(ep)) {
+ if (! mdanyrpcerror(ep))
+ return (-1);
+ if (strcmp(mynode(), hostname) == 0)
+ return (-1);
+ mdclrerror(ep);
+ }
+
+ return (0);
+}
+
+/*
+ * Clear set locks for all MN disksets.
+ * Used during reconfig cycle to recover from failed nodes.
+ */
+int
+clnt_clr_mnsetlock(
+ char *hostname,
+ md_error_t *ep
+)
+{
+ CLIENT *clntp;
+ mdrpc_null_args args;
+ mdrpc_generic_res res;
+ int version;
+
+ /* initialize */
+ mdclrerror(ep);
+ (void) memset(&args, 0, sizeof (args));
+ (void) memset(&res, 0, sizeof (res));
+
+ /* do it */
+ if ((clntp = metarpcopen(hostname, CL_LONG_TMO, ep)) == NULL)
+ return (-1);
+
+ /*
+ * Check the client handle for the version
+ */
+ CLNT_CONTROL(clntp, CLGET_VERS, (char *)&version);
+
+ /*
+ * If the client is version 1, return error
+ * otherwise, make the remote procedure call.
+ */
+ if (version == METAD_VERSION) { /* version 1 */
+ (void) mddserror(ep, MDE_DS_RPCVERSMISMATCH,
+ NULL, hostname, NULL, NULL);
+ metarpcclose(clntp);
+ return (-1);
+ } else {
+ if (mdrpc_clr_mnsetlock_2(&args, &res, clntp) != RPC_SUCCESS)
+ (void) mdrpcerror(ep, clntp, hostname,
+ dgettext(TEXT_DOMAIN, "metad clr mnsetlock"));
+ else
+ (void) mdstealerror(ep, &res.status);
+ }
+
+ metarpcclose(clntp);
+
+ xdr_free(xdr_mdrpc_generic_res, (char *)&res);
+
+ if (! mdisok(ep))
+ return (-1);
+
+ return (0);
+}
+
+/*
+ * Calls to suspend, resume or reinit the rpc.mdcommd daemon.
+ * This allows a node to remotely suspend, reinit and resume the
+ * rpc.mdcommd daemon on the given hostname node. Used by libmeta
+ * to lock out class 1 messages (metainit, etc) on all nodes when running
+ * metaset and metadb commands on this node.
+ *
+ * When suspending the commd, the suspend request will fail until all
+ * messages have been drained from the rpc.mdcommd. This routine will
+ * spin sending the suspend request until the rpc.mdcommd is drained
+ * or until rpc.mdcommd returns a failure other than MDMNE_SET_NOT_DRAINED.
+ *
+ * Also used to send the rpc.mdcommd daemon a new nodelist by draining all
+ * messages from the mdcommd and sending a reinit command to have mdcommd
+ * get the new nodelist from rpc.metad. Used when nodelist is changed
+ * during:
+ * - addition or deletion of host from diskset
+ * - join or withdrawal of host from diskset
+ * - addition of first disk to diskset (joins all nodes)
+ * - removal of last disk from diskset (withdraws all nodes)
+ */
+int
+clnt_mdcommdctl(
+ char *hostname,
+ int flag_action,
+ mdsetname_t *sp,
+ md_mn_msgclass_t class,
+ uint_t flags,
+ md_error_t *ep
+)
+{
+ CLIENT *clntp;
+ mdrpc_mdcommdctl_args *args;
+ mdrpc_mdcommdctl_2_args v2_args;
+ mdrpc_generic_res res;
+ int version;
+ int suspend_spin = 0;
+
+ /* initialize */
+ mdclrerror(ep);
+ (void) memset(&v2_args, 0, sizeof (v2_args));
+ (void) memset(&res, 0, sizeof (res));
+
+ /* build args */
+ v2_args.rev = MD_METAD_ARGS_REV_1;
+ args = &v2_args.mdrpc_mdcommdctl_2_args_u.rev1;
+ args->flag_action = flag_action;
+ args->setno = sp->setno;
+ args->class = class;
+ args->flags = flags;
+
+ /* do it */
+ if (md_in_daemon && strcmp(mynode(), hostname) == 0) {
+ int bool;
+ /*
+ * Call v2 procedure directly if rpc.metad on this node is
+ * sending message to itself.
+ */
+ if (flag_action == COMMDCTL_SUSPEND) {
+ suspend_spin = 1;
+ while (suspend_spin) {
+ suspend_spin = 0;
+ bool = mdrpc_mdcommdctl_2_svc(&v2_args, &res,
+ NULL);
+ assert(bool == TRUE);
+ /*
+ * If set not yet drained, wait a second
+ * and try again.
+ */
+ if (mdisdserror(&(res.status),
+ MDE_DS_COMMDCTL_SUSPEND_NYD)) {
+ /* Wait a second and try again */
+ mdclrerror(&(res.status));
+ (void) sleep(1);
+ suspend_spin = 1;
+ }
+ }
+ } else {
+ bool = mdrpc_mdcommdctl_2_svc(&v2_args, &res, NULL);
+ assert(bool == TRUE);
+ }
+ (void) mdstealerror(ep, &res.status);
+ } else {
+ if ((clntp = metarpcopen(hostname, CL_LONG_TMO, ep)) == NULL)
+ return (-1);
+
+ /*
+ * Check the client handle for the version
+ */
+ CLNT_CONTROL(clntp, CLGET_VERS, (char *)&version);
+
+ /*
+ * If the client is version 1, return error
+ * otherwise, make the remote procedure call.
+ */
+ if (version == METAD_VERSION) { /* version 1 */
+ (void) mddserror(ep, MDE_DS_RPCVERSMISMATCH,
+ sp->setno, hostname, NULL, sp->setname);
+ metarpcclose(clntp);
+ return (-1);
+ }
+
+ if (flag_action == COMMDCTL_SUSPEND) {
+ suspend_spin = 1;
+ while (suspend_spin) {
+ suspend_spin = 0;
+ if (mdrpc_mdcommdctl_2(&v2_args, &res,
+ clntp) != RPC_SUCCESS) {
+ (void) mdrpcerror(ep, clntp,
+ hostname,
+ dgettext(TEXT_DOMAIN,
+ "metad commd control"));
+ } else {
+ /*
+ * If set not yet drained,
+ * wait a second and
+ * and try again.
+ */
+ if (mdisdserror(&(res.status),
+ MDE_DS_COMMDCTL_SUSPEND_NYD)) {
+ mdclrerror(&(res.status));
+ (void) sleep(1);
+ suspend_spin = 1;
+ } else {
+ (void) mdstealerror(ep,
+ &res.status);
+ }
+ }
+ }
+ } else {
+ if (mdrpc_mdcommdctl_2(&v2_args, &res, clntp)
+ != RPC_SUCCESS)
+ (void) mdrpcerror(ep, clntp, hostname,
+ dgettext(TEXT_DOMAIN,
+ "metad commd control"));
+ else
+ (void) mdstealerror(ep, &res.status);
+ }
+ metarpcclose(clntp);
+ }
+
+ xdr_free(xdr_mdrpc_generic_res, (char *)&res);
+
+ if (! mdisok(ep)) {
+ if (! mdanyrpcerror(ep))
+ return (-1);
+ if (strcmp(mynode(), hostname) == 0)
+ return (-1);
+ mdclrerror(ep);
+ }
+
+ return (0);
+}
+
+/*
+ * Is owner node stale?
+ */
+int
+clnt_mn_is_stale(
+ char *hostname,
+ mdsetname_t *sp,
+ int *ret_bool,
+ md_error_t *ep
+)
+{
+ CLIENT *clntp;
+ mdrpc_setno_args *args;
+ mdrpc_setno_2_args v2_args;
+ mdrpc_bool_res res;
+ int rval = -1;
+ int version;
+
+ /* initialize */
+ mdclrerror(ep);
+ (void) memset(&v2_args, 0, sizeof (v2_args));
+ (void) memset(&res, 0, sizeof (res));
+
+ /* build args */
+ v2_args.rev = MD_METAD_ARGS_REV_1;
+ args = &v2_args.mdrpc_setno_2_args_u.rev1;
+ args->setno = sp->setno;
+
+ /* do it */
+ if (md_in_daemon && strcmp(mynode(), hostname) == 0) {
+ int bool;
+ /*
+ * Call v2 procedure directly if rpc.metad on this node is
+ * sending message to itself.
+ */
+ bool = mdrpc_mn_is_stale_2_svc(&v2_args, &res, NULL);
+ assert(bool == TRUE);
+ (void) mdstealerror(ep, &res.status);
+ } else {
+ if ((clntp = metarpcopen(hostname, CL_LONG_TMO, ep)) == NULL)
+ return (-1);
+
+ /*
+ * Check the client handle for the version
+ * and invoke the appropriate version of the
+ * remote procedure
+ */
+ CLNT_CONTROL(clntp, CLGET_VERS, (char *)&version);
+
+ /*
+ * If the client is version 1, return error
+ * otherwise, make the remote procedure call.
+ */
+ if (version == METAD_VERSION) { /* version 1 */
+ (void) mddserror(ep, MDE_DS_RPCVERSMISMATCH,
+ sp->setno, hostname, NULL, sp->setname);
+ metarpcclose(clntp);
+ return (-1);
+ } else {
+ if (mdrpc_mn_is_stale_2(&v2_args, &res, clntp) !=
+ RPC_SUCCESS)
+ (void) mdrpcerror(ep, clntp, hostname,
+ dgettext(TEXT_DOMAIN, "metad mn is stale"));
+ else
+ (void) mdstealerror(ep, &res.status);
+ }
+
+ metarpcclose(clntp);
+ }
+
+ if (mdisok(ep)) {
+ /* do something with the results */
+ rval = 0;
+
+ if (ret_bool != NULL)
+ *ret_bool = res.value;
+ }
+
+ xdr_free(xdr_mdrpc_bool_res, (char *)&res);
+
+ return (rval);
+}
+
+/*
+ * Free md_drive_desc linked list of drive descriptors that was alloc'd
+ * from a call to the RPC routine clnt_getdrivedesc. Drive descriptors
+ * are from another node.
+ */
+void
+free_rem_dd(md_drive_desc *dd)
+{
+ mdrpc_getdrivedesc_res res;
+
+ /*
+ * dummy up a result struct, to do a deep free of the dd.
+ * (A deep free means that the xdr_free code will free the
+ * linked list of drive descs.)
+ */
+ (void) memset(&res, 0, sizeof (res));
+ res.dd = (struct md_drive_desc *)dd;
+ xdr_free(xdr_mdrpc_getdrivedesc_res, (char *)&res);
+}
+
+/*
+ * Get a partially filled in drive desc from remote node. Used in MN
+ * disksets during the reconfig cycle to get the diskset drive
+ * information from another host in order to sync up all nodes.
+ * Used when the drive record information isn't good enough
+ * since the drive record doesn't give the name of
+ * the drive, but just a key into that other node's nodespace.
+ * Returned drive desc has the drive name filled in but no other strings
+ * in the drivename structure.
+ *
+ * Returns a 0 if RPC was successful, 1 otherwise.
+ */
+int
+clnt_getdrivedesc(
+ char *hostname,
+ mdsetname_t *sp,
+ md_drive_desc **ret_dd,
+ md_error_t *ep
+)
+{
+ CLIENT *clntp;
+ mdrpc_sp_args *args;
+ mdrpc_sp_2_args v2_args;
+ mdrpc_getdrivedesc_res res;
+ int version;
+ int rval = -1;
+
+ /* initialize */
+ mdclrerror(ep);
+ (void) memset(&v2_args, 0, sizeof (v2_args));
+ (void) memset(&res, 0, sizeof (res));
+
+ /* build args */
+ v2_args.rev = MD_METAD_ARGS_REV_1;
+ args = &v2_args.mdrpc_sp_2_args_u.rev1;
+ args->sp = sp;
+ args->cl_sk = cl_get_setkey(sp->setno, sp->setname);
+
+ /* do it */
+ if (md_in_daemon && strcmp(mynode(), hostname) == 0) {
+ int bool;
+ bool = mdrpc_getdrivedesc_2_svc(&v2_args, &res, NULL);
+ assert(bool == TRUE);
+ (void) mdstealerror(ep, &res.status);
+ } else {
+ if ((clntp = metarpcopen(hostname, CL_LONG_TMO, ep)) == NULL)
+ return (-1);
+
+ /*
+ * Check the client handle for the version
+ */
+ CLNT_CONTROL(clntp, CLGET_VERS, (char *)&version);
+
+ /*
+ * If the client is version 1, return error
+ * otherwise, make the remote procedure call.
+ */
+ if (version == METAD_VERSION) { /* version 1 */
+ (void) mddserror(ep, MDE_DS_RPCVERSMISMATCH,
+ sp->setno, hostname, NULL, sp->setname);
+ metarpcclose(clntp);
+ return (-1);
+ } else {
+ if (mdrpc_getdrivedesc_2(&v2_args, &res, clntp)
+ != RPC_SUCCESS)
+ (void) mdrpcerror(ep, clntp, hostname,
+ dgettext(TEXT_DOMAIN,
+ "metad get drive desc set"));
+ else
+ (void) mdstealerror(ep, &res.status);
+ }
+
+ metarpcclose(clntp);
+ }
+
+ /* If no ep error and no version mismatch - rpc call worked ok */
+ if (mdisok(ep)) {
+ rval = 0;
+ if (ret_dd != NULL)
+ *ret_dd = res.dd;
+ else
+ xdr_free(xdr_mdrpc_getdrivedesc_res, (char *)&res);
+ }
+
+ return (rval);
+}
+
+/*
+ * update dr_flags field of drive record.
+ * Also sync up genid of drive descriptors and make set
+ * record and node records match the genid.
+ *
+ * Returns a 0 if RPC was successful, 1 otherwise.
+ */
+int
+clnt_upd_dr_reconfig(
+ char *hostname,
+ mdsetname_t *sp,
+ md_drive_desc *dd,
+ md_error_t *ep
+)
+{
+ CLIENT *clntp;
+ mdrpc_upd_dr_flags_2_args v2_args;
+ mdrpc_upd_dr_flags_2_args_r1 *v21_args;
+ mdrpc_generic_res res;
+ int rval;
+ int version;
+
+ /* initialize */
+ mdclrerror(ep);
+ (void) memset(&v2_args, 0, sizeof (v2_args));
+ (void) memset(&res, 0, sizeof (res));
+
+ /* build args */
+ v2_args.rev = MD_METAD_ARGS_REV_1;
+ v21_args = &v2_args.mdrpc_upd_dr_flags_2_args_u.rev1;
+ v21_args->sp = sp;
+ v21_args->drivedescs = dd;
+
+ /* do it */
+ if (md_in_daemon && strcmp(mynode(), hostname) == 0) {
+ int bool;
+
+ /*
+ * If the server is local, we call the v2 procedure
+ */
+ bool = mdrpc_upd_dr_reconfig_2_svc(&v2_args, &res, NULL);
+ assert(bool == TRUE);
+ (void) mdstealerror(ep, &res.status);
+ } else {
+ if ((clntp = metarpcopen(hostname, CL_LONG_TMO, ep)) == NULL)
+ return (-1);
+
+ /*
+ * Check the client handle for the version
+ */
+ CLNT_CONTROL(clntp, CLGET_VERS, (char *)&version);
+ /*
+ * If the client is version 1, return error
+ * otherwise, make the remote procedure call.
+ */
+ if (version == METAD_VERSION) { /* version 1 */
+ (void) mddserror(ep, MDE_DS_RPCVERSMISMATCH,
+ sp->setno, hostname, NULL, sp->setname);
+ metarpcclose(clntp);
+ return (-1);
+ } else {
+ rval = mdrpc_upd_dr_reconfig_2(&v2_args, &res, clntp);
+
+ if (rval != RPC_SUCCESS)
+ (void) mdrpcerror(ep, clntp, hostname,
+ dgettext(TEXT_DOMAIN,
+ "metad update drive reconfig"));
+ else
+ (void) mdstealerror(ep, &res.status);
+ }
+
+ metarpcclose(clntp);
+ }
+
+ xdr_free(xdr_mdrpc_generic_res, (char *)&res);
+
+ if (! mdisok(ep)) {
+ if (! mdanyrpcerror(ep))
+ return (-1);
+ if (strcmp(mynode(), hostname) == 0)
+ return (-1);
+ mdclrerror(ep);
+ }
+
+ return (0);
+}
+
+/*
+ * Reset mirror owner(s) if mirror owner(s) is in the list of
+ * node's specified in the array of nodeids.
+ * This is called when a node has been deleted or withdrawn
+ * from the diskset.
+ */
+int
+clnt_reset_mirror_owner(
+ char *hostname,
+ mdsetname_t *sp,
+ int node_c,
+ int node_id[],
+ md_error_t *ep
+)
+{
+ CLIENT *clntp;
+ mdrpc_nodeid_args *args;
+ mdrpc_nodeid_2_args v2_args;
+ mdrpc_generic_res res;
+ int version;
+
+ /* initialize */
+ mdclrerror(ep);
+ (void) memset(&v2_args, 0, sizeof (v2_args));
+ (void) memset(&res, 0, sizeof (res));
+
+ /* build args */
+ v2_args.rev = MD_METAD_ARGS_REV_1;
+ args = &v2_args.mdrpc_nodeid_2_args_u.rev1;
+ args->sp = sp;
+ args->cl_sk = cl_get_setkey(sp->setno, sp->setname);
+ args->nodeid.nodeid_len = node_c;
+ args->nodeid.nodeid_val = &node_id[0];
+
+ /* do it */
+ if (md_in_daemon && strcmp(mynode(), hostname) == 0) {
+ int bool;
+ bool = mdrpc_reset_mirror_owner_2_svc(&v2_args, &res, NULL);
+ assert(bool == TRUE);
+ (void) mdstealerror(ep, &res.status);
+ } else {
+ if ((clntp = metarpcopen(hostname, CL_LONG_TMO, ep)) == NULL)
+ return (-1);
+
+ /*
+ * Check the client handle for the version
+ * and invoke the appropriate version of the
+ * remote procedure
+ */
+ CLNT_CONTROL(clntp, CLGET_VERS, (char *)&version);
+
+ /*
+ * If the client is version 1, return error
+ * otherwise, make the remote procedure call.
+ */
+ if (version == METAD_VERSION) { /* version 1 */
+ (void) mddserror(ep, MDE_DS_RPCVERSMISMATCH,
+ sp->setno, hostname, NULL, sp->setname);
+ metarpcclose(clntp);
+ return (-1);
+ } else {
+ if (mdrpc_reset_mirror_owner_2(&v2_args, &res, clntp)
+ != RPC_SUCCESS)
+ (void) mdrpcerror(ep, clntp, hostname,
+ dgettext(TEXT_DOMAIN,
+ "metad reset mirror owner"));
+ else
+ (void) mdstealerror(ep, &res.status);
+ }
+
+ metarpcclose(clntp);
+ }
+
+ xdr_free(xdr_mdrpc_generic_res, (char *)&res);
+
+ if (! mdisok(ep))
+ return (-1);
+
+ return (0);
+}
+
+/*
+ * Call to suspend and resume I/O for given diskset(s).
+ * This allows a node to remotely suspend and resume I/O on
+ * a MN diskset. A diskset number of 0 represents all MN disksets.
+ */
+int
+clnt_mn_susp_res_io(
+ char *hostname,
+ set_t setno,
+ int cmd,
+ md_error_t *ep
+)
+{
+ CLIENT *clntp;
+ mdrpc_mn_susp_res_io_args *args;
+ mdrpc_mn_susp_res_io_2_args v2_args;
+ mdrpc_generic_res res;
+ int version;
+
+ /* initialize */
+ mdclrerror(ep);
+ (void) memset(&v2_args, 0, sizeof (v2_args));
+ (void) memset(&res, 0, sizeof (res));
+
+ /* build args */
+ v2_args.rev = MD_METAD_ARGS_REV_1;
+ args = &v2_args.mdrpc_mn_susp_res_io_2_args_u.rev1;
+ args->susp_res_cmd = cmd;
+ args->susp_res_setno = setno;
+
+ /* do it */
+ if (md_in_daemon && strcmp(mynode(), hostname) == 0) {
+ int bool;
+ /*
+ * Call v2 procedure directly if rpc.metad on this node is
+ * sending message to itself.
+ */
+ bool = mdrpc_mn_susp_res_io_2_svc(&v2_args, &res, NULL);
+ assert(bool == TRUE);
+ (void) mdstealerror(ep, &res.status);
+ } else {
+ if ((clntp = metarpcopen(hostname, CL_LONG_TMO, ep)) == NULL)
+ return (-1);
+
+ /*
+ * Check the client handle for the version
+ */
+ CLNT_CONTROL(clntp, CLGET_VERS, (char *)&version);
+
+ /*
+ * If the client is version 1, return error
+ * otherwise, make the remote procedure call.
+ */
+ if (version == METAD_VERSION) { /* version 1 */
+ (void) mddserror(ep, MDE_DS_RPCVERSMISMATCH,
+ setno, hostname, NULL, NULL);
+ metarpcclose(clntp);
+ return (-1);
+ } else {
+ if (mdrpc_mn_susp_res_io_2(&v2_args, &res, clntp)
+ != RPC_SUCCESS)
+ (void) mdrpcerror(ep, clntp, hostname,
+ dgettext(TEXT_DOMAIN,
+ "metad mn_susp_res_io control"));
+ else
+ (void) mdstealerror(ep, &res.status);
+ }
+
+ metarpcclose(clntp);
+ }
+
+ xdr_free(xdr_mdrpc_generic_res, (char *)&res);
+
+ if (! mdisok(ep)) {
+ if (! mdanyrpcerror(ep))
+ return (-1);
+ if (strcmp(mynode(), hostname) == 0)
+ return (-1);
+ mdclrerror(ep);
+ }
+
+ return (0);
+}
+
+/*
+ * Resnarf the set after the set has been imported
+ *
+ * We should never be making this procedure call
+ * over the wire, it's sole purpose is to snarf
+ * the imported set on the localhost.
+ */
+int
+clnt_resnarf_set(
+ char *hostname,
+ set_t setno,
+ md_error_t *ep
+)
+{
+ CLIENT *clntp;
+ mdrpc_setno_2_args args;
+ mdrpc_generic_res res;
+ int rval = -1;
+ int version;
+
+ /* initialize */
+ mdclrerror(ep);
+ (void) memset(&args, 0, sizeof (args));
+ (void) memset(&res, 0, sizeof (res));
+
+ /* build args */
+ args.rev = MD_METAD_ARGS_REV_1;
+ args.mdrpc_setno_2_args_u.rev1.setno = setno;
+ args.mdrpc_setno_2_args_u.rev1.cl_sk = NULL;
+
+ /* do it */
+ if (strcmp(mynode(), hostname) == 0) {
+ if ((clntp = metarpcopen(hostname, CL_LONG_TMO, ep)) == NULL)
+ return (-1);
+
+ /* Check the client handle for the version */
+ CLNT_CONTROL(clntp, CLGET_VERS, (char *)&version);
+
+ /* If the client is version 1, return error */
+ if (version == METAD_VERSION) { /* version 1 */
+ (void) mddserror(ep, MDE_DS_CANTRESNARF, MD_SET_BAD,
+ mynode(), NULL, NULL);
+ } else {
+ rval = mdrpc_resnarf_set_2(&args, &res, clntp);
+
+ if (rval != RPC_SUCCESS)
+ (void) mdrpcerror(ep, clntp, hostname,
+ dgettext(TEXT_DOMAIN, "metad resnarf set"));
+ else
+ (void) mdstealerror(ep, &res.status);
+ }
+
+ metarpcclose(clntp);
+
+ } else {
+ (void) mddserror(ep, MDE_DS_CANTRESNARF, MD_SET_BAD,
+ mynode(), NULL, NULL);
+ }
+
+ if (mdisok(ep))
+ rval = 0;
+
+ xdr_free(xdr_mdrpc_generic_res, (char *)&res);
+
+ return (rval);
+}
+
+/*
+ * Call to start a resync for a given diskset.
+ * Used when a node has been added to a diskset.
+ * Should be called after rpc.mdcommd is resumed.
+ */
+int
+clnt_mn_mirror_resync_all(
+ char *hostname,
+ set_t setno,
+ md_error_t *ep
+)
+{
+ CLIENT *clntp;
+ mdrpc_setno_2_args args;
+ mdrpc_generic_res res;
+ int version;
+
+ /* initialize */
+ mdclrerror(ep);
+ (void) memset(&args, 0, sizeof (args));
+ (void) memset(&res, 0, sizeof (res));
+
+ /* build args */
+ args.rev = MD_METAD_ARGS_REV_1;
+ args.mdrpc_setno_2_args_u.rev1.setno = setno;
+ args.mdrpc_setno_2_args_u.rev1.cl_sk = NULL;
+
+ /* do it */
+ if (md_in_daemon && strcmp(mynode(), hostname) == 0) {
+ int bool;
+ /*
+ * Call v2 procedure directly if rpc.metad on this node is
+ * sending message to itself.
+ */
+ bool = mdrpc_mn_mirror_resync_all_2_svc(&args, &res, NULL);
+ assert(bool == TRUE);
+ (void) mdstealerror(ep, &res.status);
+ } else {
+ if ((clntp = metarpcopen(hostname, CL_LONG_TMO, ep)) == NULL)
+ return (-1);
+
+ /*
+ * Check the client handle for the version
+ */
+ CLNT_CONTROL(clntp, CLGET_VERS, (char *)&version);
+
+ /*
+ * If the client is version 1, return error
+ * otherwise, make the remote procedure call.
+ */
+ if (version == METAD_VERSION) { /* version 1 */
+ (void) mddserror(ep, MDE_DS_RPCVERSMISMATCH,
+ setno, hostname, NULL, NULL);
+ metarpcclose(clntp);
+ return (-1);
+ } else {
+ if (mdrpc_mn_mirror_resync_all_2(&args, &res, clntp)
+ != RPC_SUCCESS)
+ (void) mdrpcerror(ep, clntp, hostname,
+ dgettext(TEXT_DOMAIN,
+ "metad mn_mirror_resync_all"));
+ else
+ (void) mdstealerror(ep, &res.status);
+ }
+
+ metarpcclose(clntp);
+ }
+
+ xdr_free(xdr_mdrpc_generic_res, (char *)&res);
+
+ if (! mdisok(ep)) {
+ if (! mdanyrpcerror(ep))
+ return (-1);
+ if (strcmp(mynode(), hostname) == 0)
+ return (-1);
+ mdclrerror(ep);
+ }
+
+ return (0);
+}
+
+/*
+ * Call to update the ABR state for all soft partitions.
+ * Used when a node has been added to a diskset.
+ * Should be called after rpc.mdcommd is resumed.
+ */
+int
+clnt_mn_sp_update_abr(
+ char *hostname,
+ set_t setno,
+ md_error_t *ep
+)
+{
+ CLIENT *clntp;
+ mdrpc_setno_2_args args;
+ mdrpc_generic_res res;
+ int version;
+
+ /* initialize */
+ mdclrerror(ep);
+ (void) memset(&args, 0, sizeof (args));
+ (void) memset(&res, 0, sizeof (res));
+
+ /* build args */
+ args.rev = MD_METAD_ARGS_REV_1;
+ args.mdrpc_setno_2_args_u.rev1.setno = setno;
+ args.mdrpc_setno_2_args_u.rev1.cl_sk = NULL;
+
+ /*
+ * No need to call function if adding local node as ABR cannot
+ * be set.
+ */
+ if (strcmp(mynode(), hostname) != 0) {
+ if ((clntp = metarpcopen(hostname, CL_LONG_TMO, ep)) == NULL)
+ return (-1);
+
+ /*
+ * Check the client handle for the version
+ */
+ CLNT_CONTROL(clntp, CLGET_VERS, (char *)&version);
+
+ /*
+ * If the client is version 1, return error
+ * otherwise, make the remote procedure call.
+ */
+ if (version == METAD_VERSION) { /* version 1 */
+ (void) mddserror(ep, MDE_DS_RPCVERSMISMATCH,
+ setno, hostname, NULL, NULL);
+ metarpcclose(clntp);
+ return (-1);
+ } else {
+ if (mdrpc_mn_sp_update_abr_2(&args, &res, clntp)
+ != RPC_SUCCESS)
+ (void) mdrpcerror(ep, clntp, hostname,
+ dgettext(TEXT_DOMAIN,
+ "metad mn_sp_update_abr"));
+ else
+ (void) mdstealerror(ep, &res.status);
+ }
+
+ metarpcclose(clntp);
+ }
+
+ xdr_free(xdr_mdrpc_generic_res, (char *)&res);
+
+ if (! mdisok(ep)) {
+ if (! mdanyrpcerror(ep))
+ return (-1);
+ mdclrerror(ep);
+ }
+
+ return (0);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_metad_subr.c b/usr/src/lib/lvm/libmeta/common/meta_metad_subr.c
new file mode 100644
index 0000000000..df50a7650e
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_metad_subr.c
@@ -0,0 +1,2055 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * Just in case we're not in a build environment, make sure that
+ * TEXT_DOMAIN gets set to something.
+ */
+#if !defined(TEXT_DOMAIN)
+#define TEXT_DOMAIN "SYS_TEST"
+#endif
+
+/*
+ * interface between user land and the set records
+ */
+
+#include <meta.h>
+#include <metad.h>
+#include <sdssc.h>
+#include <syslog.h>
+#include <sys/cladm.h>
+#include "meta_set_prv.h"
+
+#include <sys/sysevent/eventdefs.h>
+#include <sys/sysevent/svm.h>
+
+static md_set_record *setrecords = NULL; /* head of cache linked list */
+static int setsnarfdone = 0;
+
+typedef struct key_lst_t {
+ side_t kl_side;
+ mdkey_t kl_key;
+ struct key_lst_t *kl_next;
+} key_lst_t;
+
+typedef struct ur_recid_lst {
+ mddb_recid_t url_recid;
+ struct ur_recid_lst *url_nx;
+} ur_recid_lst_t;
+
+static ur_recid_lst_t *url_used = NULL;
+static ur_recid_lst_t *url_tode = NULL;
+
+static void
+url_addl(ur_recid_lst_t **urlpp, mddb_recid_t recid)
+{
+ /* Run to the end of the list */
+ for (/* void */; (*urlpp != NULL); urlpp = &(*urlpp)->url_nx)
+ if ((*urlpp)->url_recid == recid)
+ return;
+
+ /* Add the new member */
+ *urlpp = Zalloc(sizeof (**urlpp));
+ if (*urlpp == NULL)
+ return;
+
+ (*urlpp)->url_recid = recid;
+}
+
+static int
+url_findl(ur_recid_lst_t *urlp, mddb_recid_t recid)
+{
+ while (urlp != NULL) {
+ if (urlp->url_recid == recid)
+ return (1);
+ urlp = urlp->url_nx;
+ }
+ return (0);
+}
+
+static void
+url_freel(ur_recid_lst_t **urlpp)
+{
+ ur_recid_lst_t *urlp;
+ ur_recid_lst_t *turlp;
+
+ for (turlp = *urlpp; turlp != NULL; turlp = urlp) {
+ urlp = turlp->url_nx;
+ Free(turlp);
+ }
+ *urlpp = (ur_recid_lst_t *)NULL;
+}
+
+static int
+ckncvt_set_record(mddb_userreq_t *reqp, md_error_t *ep)
+{
+ mddb_userreq_t req;
+ md_set_record *sr;
+ int recs[3];
+
+ if (reqp->ur_size == sizeof (*sr))
+ return (0);
+
+ if (! md_in_daemon) {
+ if (reqp->ur_size >= sizeof (*sr))
+ return (0);
+
+ reqp->ur_data = (uintptr_t)Realloc((void *)reqp->ur_data,
+ sizeof (*sr));
+ (void) memset(((char *)reqp->ur_data) + reqp->ur_size, '\0',
+ sizeof (*sr) - reqp->ur_size);
+ reqp->ur_size = sizeof (*sr);
+ return (0);
+ }
+
+ /*
+ * If here, then the daemon is calling, and so the automatic
+ * conversion will be performed.
+ */
+
+ /* shorthand */
+ req = *reqp; /* structure assignment */
+ sr = (md_set_record *)req.ur_data;
+
+ if (sr->sr_flags & MD_SR_CVT)
+ return (0);
+
+ /* Leave multi-node set records alone */
+ if (MD_MNSET_REC(sr)) {
+ return (0);
+ }
+
+ /* Mark the old record as converted */
+ sr->sr_flags |= MD_SR_CVT;
+
+ METAD_SETUP_SR(MD_DB_SETDATA, sr->sr_selfid)
+
+ if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde, NULL) != 0)
+ return (mdstealerror(ep, &req.ur_mde));
+
+ /* Create space for the new record */
+ METAD_SETUP_SR(MD_DB_CREATE, 0);
+ req.ur_size = sizeof (*sr);
+
+ if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde, NULL) != 0)
+ return (mdstealerror(ep, &req.ur_mde));
+
+ /* Allocate the new record */
+ sr = Zalloc(sizeof (*sr));
+
+ /* copy all the data from the record being converted */
+ (void) memmove(sr, (void *)reqp->ur_data, reqp->ur_size);
+ sr->sr_flags &= ~MD_SR_CVT;
+
+ /* adjust the selfid to point to the new record */
+ sr->sr_selfid = req.ur_recid;
+
+ METAD_SETUP_SR(MD_DB_SETDATA, sr->sr_selfid)
+ req.ur_size = sizeof (*sr);
+ req.ur_data = (uintptr_t)sr;
+
+ if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde, NULL) != 0) {
+ Free(sr);
+ return (mdstealerror(ep, &req.ur_mde));
+ }
+
+ /* Commit the old and the new */
+ recs[0] = ((md_set_record *)reqp->ur_data)->sr_selfid;
+ recs[1] = sr->sr_selfid;
+ recs[2] = 0;
+
+ METAD_SETUP_UR(MD_DB_COMMIT_MANY, 0, 0);
+ req.ur_size = sizeof (recs);
+ req.ur_data = (uintptr_t)recs;
+
+ if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde, NULL) != 0) {
+ Free(sr);
+ return (mdstealerror(ep, &req.ur_mde));
+ }
+
+ /* Add the the old record to the list of records to delete */
+ url_addl(&url_tode, ((md_set_record *)reqp->ur_data)->sr_selfid);
+
+ /* Free the old records space */
+ Free((void *)reqp->ur_data);
+
+ /* Adjust the reqp structure to point to the new record and size */
+ reqp->ur_recid = sr->sr_selfid;
+ reqp->ur_size = sizeof (*sr);
+ reqp->ur_data = (uintptr_t)sr;
+
+ return (0);
+}
+
+mddb_userreq_t *
+get_db_rec(
+ md_ur_get_cmd_t cmd,
+ set_t setno,
+ mddb_type_t type,
+ uint_t type2,
+ mddb_recid_t *idp,
+ md_error_t *ep
+)
+{
+ mddb_userreq_t *reqp = Zalloc(sizeof (*reqp));
+
+ reqp->ur_setno = setno;
+ reqp->ur_type = type;
+ reqp->ur_type2 = type2;
+
+ switch (cmd) {
+ case MD_UR_GET_NEXT:
+ reqp->ur_cmd = MD_DB_GETNEXTREC;
+ reqp->ur_recid = *idp;
+ if (metaioctl(MD_DB_USERREQ, reqp, &reqp->ur_mde, NULL)
+ != 0) {
+ (void) mdstealerror(ep, &reqp->ur_mde);
+ Free(reqp);
+ return (NULL);
+ }
+ *idp = reqp->ur_recid;
+ break;
+ case MD_UR_GET_WKEY:
+ reqp->ur_recid = *idp;
+ break;
+ }
+
+ if (*idp <= 0) {
+ Free(reqp);
+ return (NULL);
+ }
+
+ reqp->ur_cmd = MD_DB_GETSIZE;
+ if (metaioctl(MD_DB_USERREQ, reqp, &reqp->ur_mde, NULL) != 0) {
+ (void) mdstealerror(ep, &reqp->ur_mde);
+ Free(reqp);
+
+ *idp = 0;
+ return (NULL);
+ }
+
+ reqp->ur_cmd = MD_DB_GETDATA;
+ reqp->ur_data = (uintptr_t)Zalloc(reqp->ur_size);
+ if (metaioctl(MD_DB_USERREQ, reqp, &reqp->ur_mde, NULL) != 0) {
+ (void) mdstealerror(ep, &reqp->ur_mde);
+ Free((void *)reqp->ur_data);
+ Free(reqp);
+ *idp = 0;
+ return (NULL);
+ }
+
+ switch (reqp->ur_type) {
+ case MDDB_USER:
+ switch (reqp->ur_type2) {
+ case MDDB_UR_SR:
+ if (ckncvt_set_record(reqp, ep)) {
+ Free((void *)reqp->ur_data);
+ Free(reqp);
+ return (NULL);
+ }
+ break;
+ }
+ break;
+ }
+
+ return (reqp);
+}
+
+void *
+get_ur_rec(
+ set_t setno,
+ md_ur_get_cmd_t cmd,
+ uint_t type2,
+ mddb_recid_t *idp,
+ md_error_t *ep
+)
+{
+ mddb_userreq_t *reqp = NULL;
+ void *ret_val;
+
+ assert(idp != NULL);
+
+ reqp = get_db_rec(cmd, setno, MDDB_USER, type2, idp, ep);
+ if (reqp == NULL)
+ return (NULL);
+
+ ret_val = (void *)reqp->ur_data;
+ Free(reqp);
+ return (ret_val);
+}
+
+/*
+ * Called by rpc.metad on startup of disksets to cleanup
+ * the host entries associated with a diskset. This is needed if
+ * a node failed or the metaset command was killed during the addition
+ * of a node to a diskset.
+ *
+ * This is called for all traditional disksets.
+ * This is only called for MNdisksets when in there is only one node
+ * in all of the MN disksets and this node is not running SunCluster.
+ * (Otherwise, the cleanup of the host entries is handled by a
+ * reconfig cycle that the SunCluster software calls).
+ */
+static int
+sr_hosts(md_set_record *sr)
+{
+ int i,
+ nid,
+ self_in_set = FALSE;
+ md_error_t xep = mdnullerror;
+ md_mnnode_record *nr;
+ md_mnset_record *mnsr;
+
+ if (MD_MNSET_REC(sr)) {
+ mnsr = (struct md_mnset_record *)sr;
+ nr = mnsr->sr_nodechain;
+ /*
+ * Already guaranteed to be only 1 node in set which
+ * is mynode (done in sr_validate).
+ * Now, check if node is in the OK state. If not in
+ * the OK state, leave self_in_set FALSE so that
+ * set will be removed.
+ */
+ if (nr->nr_flags & MD_MN_NODE_OK)
+ self_in_set = TRUE;
+ } else {
+ for (i = 0; i < MD_MAXSIDES; i++) {
+ /* Skip empty slots */
+ if (sr->sr_nodes[i][0] == '\0')
+ continue;
+
+ /* Make sure we are in the set and skip this node */
+ if (strcmp(sr->sr_nodes[i], mynode()) == 0) {
+ self_in_set = TRUE;
+ break;
+ }
+ }
+ }
+
+ if ((self_in_set == FALSE) && (!(MD_MNSET_REC(sr))) &&
+ (_cladm(CL_CONFIG, CL_NODEID, &nid) == 0)) {
+
+ /*
+ * See if we've got a node which has been booted in
+ * non-cluster mode. If true the nodeid will match
+ * one of the sr_nodes values because the conversion
+ * from nodeid to hostname failed to occur.
+ */
+ for (i = 0; i < MD_MAXSIDES; i++) {
+ if (sr->sr_nodes[i][0] == 0)
+ continue;
+ if (atoi(sr->sr_nodes[i]) == nid)
+ self_in_set = TRUE;
+ }
+ }
+
+ /* If we aren't in the set, delete the set */
+ if (self_in_set == FALSE) {
+ syslog(LOG_ERR, dgettext(TEXT_DOMAIN,
+ "Removing set %s from database\n"),
+ sr->sr_setname);
+ s_delset(sr->sr_setname, &xep);
+ if (! mdisok(&xep))
+ mdclrerror(&xep);
+ return (1);
+ }
+ return (0);
+}
+
+void
+sr_del_drv(md_set_record *sr, mddb_recid_t recid)
+{
+ mddb_userreq_t req;
+ md_error_t xep = mdnullerror;
+
+ if (!s_ownset(sr->sr_setno, &xep)) {
+ if (! mdisok(&xep))
+ mdclrerror(&xep);
+ goto skip;
+ }
+
+ /* delete the replicas? */
+ /* release ownership of the drive? */
+ /* NOTE: We may not have a name, so both of the above are ugly! */
+
+skip:
+ (void) memset(&req, 0, sizeof (req));
+ METAD_SETUP_DR(MD_DB_DELETE, recid)
+ if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde, NULL) != 0)
+ mdclrerror(&req.ur_mde);
+
+ dr_cache_del(sr, recid);
+}
+
+static void
+sr_drvs(md_set_record *sr)
+{
+ md_drive_record *dr;
+ int i;
+ int modified = 0;
+ int sidesok;
+ mdnm_params_t nm;
+ static char device_name[MAXPATHLEN];
+ md_error_t xep = mdnullerror;
+ md_mnnode_record *nr;
+ md_mnset_record *mnsr;
+
+ for (dr = sr->sr_drivechain; dr != NULL; dr = dr->dr_next) {
+ /* If we were mid-add, cleanup */
+ if ((dr->dr_flags & MD_DR_ADD)) {
+ sr_del_drv(sr, dr->dr_selfid);
+ modified++;
+ continue;
+ }
+
+ sidesok = TRUE;
+ if (MD_MNSET_REC(sr)) {
+ mnsr = (md_mnset_record *)sr;
+ nr = mnsr->sr_nodechain;
+ /*
+ * MultiNode disksets only have entries for
+ * their side in the local set. Verify
+ * that drive has a name associated with
+ * this node's side.
+ */
+ while (nr) {
+ /* Find my node */
+ if (strcmp(mynode(), nr->nr_nodename) != 0) {
+ nr = nr->nr_next;
+ continue;
+ }
+
+ (void) memset(&nm, '\0', sizeof (nm));
+ nm.setno = MD_LOCAL_SET;
+ nm.side = nr->nr_nodeid;
+ nm.key = dr->dr_key;
+ nm.devname = (uint64_t)device_name;
+
+ if (metaioctl(MD_IOCGET_NM, &nm, &nm.mde,
+ NULL) != 0) {
+ if (! mdissyserror(&nm.mde, ENOENT)) {
+ mdclrerror(&nm.mde);
+ return;
+ }
+ }
+
+ /*
+ * If entry is found for this node, then
+ * break out of loop walking through
+ * node list. For a multi-node diskset,
+ * there should only be an entry for
+ * this node.
+ */
+ if (nm.key != MD_KEYWILD &&
+ ! mdissyserror(&nm.mde, ENOENT)) {
+ break;
+ }
+
+ /*
+ * If entry is not found for this node,
+ * then delete the drive. No need to
+ * continue through the node loop since
+ * our node has already been found.
+ */
+ sidesok = FALSE;
+ mdclrerror(&nm.mde);
+
+ /* If we are missing a sidename, cleanup */
+ sr_del_drv(sr, dr->dr_selfid);
+ modified++;
+
+ break;
+ }
+ } else {
+ for (i = 0; i < MD_MAXSIDES; i++) {
+ /* Skip empty slots */
+ if (sr->sr_nodes[i][0] == '\0')
+ continue;
+
+ (void) memset(&nm, '\0', sizeof (nm));
+ nm.setno = MD_LOCAL_SET;
+ nm.side = i + SKEW;
+ nm.key = dr->dr_key;
+ nm.devname = (uint64_t)device_name;
+
+ if (metaioctl(MD_IOCGET_NM, &nm, &nm.mde,
+ NULL) != 0) {
+ if (! mdissyserror(&nm.mde, ENOENT)) {
+ mdclrerror(&nm.mde);
+ return;
+ }
+ }
+
+ if (nm.key != MD_KEYWILD &&
+ ! mdissyserror(&nm.mde, ENOENT))
+ continue;
+
+ sidesok = FALSE;
+ mdclrerror(&nm.mde);
+
+ /* If we are missing a sidename, cleanup */
+ sr_del_drv(sr, dr->dr_selfid);
+ modified++;
+
+ break;
+ }
+ }
+
+ if (sidesok == FALSE)
+ continue;
+
+ /*
+ * If we got this far, the drive record is either in the OK
+ * or DEL state, if it is in the DEL state and the sidenames
+ * all checked out, then we will make it OK.
+ */
+ if ((dr->dr_flags & MD_DR_OK))
+ continue;
+
+ dr->dr_flags = MD_DR_OK;
+
+ modified++;
+ }
+
+ if (modified) {
+ commitset(sr, FALSE, &xep);
+ if (! mdisok(&xep))
+ mdclrerror(&xep);
+ }
+}
+
+static void
+add_key_to_lst(key_lst_t **klpp, side_t side, mdkey_t key)
+{
+ key_lst_t *klp;
+
+ assert(klpp != NULL);
+
+ for (/* void */; *klpp != NULL; klpp = &(*klpp)->kl_next)
+ /* void */;
+
+ /* allocate new list element */
+ klp = *klpp = Zalloc(sizeof (*klp));
+
+ klp->kl_side = side;
+ klp->kl_key = key;
+}
+
+#ifdef DUMPKEYLST
+static void
+pr_key_lst(char *tag, key_lst_t *klp)
+{
+ key_lst_t *tklp;
+
+ md_eprintf("Tag=%s\n", tag);
+ for (tklp = klp; tklp != NULL; tklp = tklp->kl_next)
+ md_eprintf("side=%d, key=%lu\n", tklp->kl_side, tklp->kl_key);
+}
+#endif /* DUMPKEYLST */
+
+static int
+key_in_key_lst(key_lst_t *klp, side_t side, mdkey_t key)
+{
+ key_lst_t *tklp;
+
+ for (tklp = klp; tklp != NULL; tklp = tklp->kl_next)
+ if (tklp->kl_side == side && tklp->kl_key == key)
+ return (1);
+
+ return (0);
+}
+
+static void
+destroy_key_lst(key_lst_t **klpp)
+{
+ key_lst_t *tklp, *klp;
+
+ assert(klpp != NULL);
+
+ tklp = klp = *klpp;
+ while (klp != NULL) {
+ tklp = klp;
+ klp = klp->kl_next;
+ Free(tklp);
+ }
+ *klpp = NULL;
+}
+
+static void
+sr_sidenms(void)
+{
+ md_drive_record *dr;
+ md_set_record *sr;
+ key_lst_t *use = NULL;
+ mdnm_params_t nm;
+ int i;
+ md_mnset_record *mnsr;
+ md_mnnode_record *nr;
+ side_t myside = 0;
+
+ /*
+ * We now go through the list of set and drive records collecting
+ * the key/side pairs that are being used.
+ */
+ for (sr = setrecords; sr != NULL; sr = sr->sr_next) {
+ /*
+ * To handle the multi-node diskset case, get the sideno
+ * associated with this node. This sideno will be the
+ * same across all multi-node disksets.
+ */
+ if ((myside == 0) && (MD_MNSET_REC(sr))) {
+ mnsr = (struct md_mnset_record *)sr;
+ nr = mnsr->sr_nodechain;
+ while (nr) {
+ if (strcmp(mynode(), nr->nr_nodename) == 0) {
+ myside = nr->nr_nodeid;
+ break;
+ }
+ nr = nr->nr_next;
+ }
+ /*
+ * If this node is not in this MNset -
+ * then skip this set.
+ */
+ if (!nr) {
+ continue;
+ }
+ }
+
+ for (dr = sr->sr_drivechain; dr != NULL; dr = dr->dr_next) {
+ if (MD_MNSET_REC(sr)) {
+ /*
+ * There are no non-local sidenames in the
+ * local set for a multi-node diskset.
+ */
+ add_key_to_lst(&use, myside, dr->dr_key);
+ } else {
+ for (i = 0; i < MD_MAXSIDES; i++) {
+ /* Skip empty slots */
+ if (sr->sr_nodes[i][0] == '\0')
+ continue;
+
+ add_key_to_lst(&use, i + SKEW,
+ dr->dr_key);
+ }
+ }
+ }
+ }
+
+#ifdef DUMPKEYLST
+ pr_key_lst("use", use);
+#endif /* DUMPKEYLST */
+
+ /*
+ * We take the list above and get all non-local sidenames, checking
+ * each to see if they are in use, if they are not used, we delete them.
+ * Do the check for myside to cover multinode disksets.
+ * Then do the check for MD_MAXSIDES to cover non-multinode disksets.
+ * If any multi-node disksets were present, myside would be non-zero.
+ * myside is the same for all multi-node disksets for this node.
+ */
+ if (myside) {
+ (void) memset(&nm, '\0', sizeof (nm));
+ nm.setno = MD_LOCAL_SET;
+ nm.side = myside;
+ nm.key = MD_KEYWILD;
+
+ /*CONSTCOND*/
+ while (1) {
+ if (metaioctl(MD_IOCNXTKEY_NM, &nm, &nm.mde,
+ NULL) != 0) {
+ mdclrerror(&nm.mde);
+ break;
+ }
+
+ if (nm.key == MD_KEYWILD)
+ break;
+
+ if (! key_in_key_lst(use, nm.side, nm.key)) {
+ if (metaioctl(MD_IOCREM_NM, &nm, &nm.mde,
+ NULL) != 0) {
+ mdclrerror(&nm.mde);
+ continue;
+ }
+ }
+ }
+ }
+ /* Now handle the non-multinode disksets */
+ for (i = 0; i < MD_MAXSIDES; i++) {
+ (void) memset(&nm, '\0', sizeof (nm));
+ nm.setno = MD_LOCAL_SET;
+ nm.side = i + SKEW;
+ nm.key = MD_KEYWILD;
+
+ /*CONSTCOND*/
+ while (1) {
+ if (metaioctl(MD_IOCNXTKEY_NM, &nm, &nm.mde,
+ NULL) != 0) {
+ mdclrerror(&nm.mde);
+ break;
+ }
+
+ if (nm.key == MD_KEYWILD)
+ break;
+
+ if (! key_in_key_lst(use, nm.side, nm.key)) {
+ if (metaioctl(MD_IOCREM_NM, &nm, &nm.mde,
+ NULL) != 0) {
+ mdclrerror(&nm.mde);
+ continue;
+ }
+ }
+ }
+ }
+
+ /* Cleanup */
+ destroy_key_lst(&use);
+}
+
+void
+sr_validate(void)
+{
+ md_set_record *sr;
+ md_error_t xep = mdnullerror;
+ int mnset_single_node;
+ md_mnnode_record *nr;
+ md_mnset_record *mnsr;
+
+ assert(setsnarfdone != 0);
+
+ /* We have validated the records already */
+ if (setsnarfdone == 3)
+ return;
+
+ /*
+ * Check if we are in a single node non-SC3.x environmemnt
+ */
+ mnset_single_node = meta_mn_singlenode();
+ /*
+ * If a possible single_node situation, verify that all
+ * MN disksets have only one node (which is mynode()).
+ */
+ if (mnset_single_node) {
+ for (sr = setrecords; sr != NULL; sr = sr->sr_next) {
+ if (MD_MNSET_REC(sr)) {
+ mnsr = (struct md_mnset_record *)sr;
+ nr = mnsr->sr_nodechain;
+ /*
+ * If next pointer is non-null (more than
+ * one node in list) or if the single node
+ * isn't my node - reset single node flag.
+ */
+ if ((nr->nr_next) ||
+ (strcmp(nr->nr_nodename, mynode()) != 0)) {
+ mnset_single_node = 0;
+ break;
+ }
+ }
+ }
+ }
+
+ for (sr = setrecords; sr != NULL; sr = sr->sr_next) {
+ /*
+ * If a MN diskset and not in the single node
+ * situation, then don't validate the MN set.
+ * This is done during a reconfig cycle since all
+ * nodes must take the same action.
+ */
+ if (MD_MNSET_REC(sr) && (mnset_single_node == 0))
+ continue;
+
+ /* Since we do "partial" snarf's, we only check new entries */
+ if (! (sr->sr_flags & MD_SR_CHECK))
+ continue;
+
+ /* If we were mid-add, cleanup */
+ if ((sr->sr_flags & MD_SR_ADD)) {
+ s_delset(sr->sr_setname, &xep);
+ if (! mdisok(&xep))
+ mdclrerror(&xep);
+ continue;
+ }
+
+ /* Make sure we are in the set. */
+ if (sr_hosts(sr))
+ continue;
+
+ /* Check has been done, clear the flag */
+ if ((sr->sr_flags & MD_SR_CHECK))
+ sr->sr_flags &= ~MD_SR_CHECK;
+
+ /*
+ * If we got here, we are in the set, make sure the flags make
+ * sense.
+ */
+ if (! (sr->sr_flags & MD_SR_OK)) {
+ sr->sr_flags &= ~MD_SR_STATE_FLAGS;
+ sr->sr_flags |= MD_SR_OK;
+ commitset(sr, FALSE, &xep);
+ if (! mdisok(&xep))
+ mdclrerror(&xep);
+ }
+
+ /* Make sure all the drives are in a stable state. */
+ sr_drvs(sr);
+ }
+
+ /* Cleanup any stray sidenames */
+ sr_sidenms();
+
+ setsnarfdone = 3;
+}
+
+static md_set_record *
+sr_in_cache(mddb_recid_t recid)
+{
+ md_set_record *tsr;
+
+ for (tsr = setrecords; tsr != NULL; tsr = tsr->sr_next)
+ if (tsr->sr_selfid == recid)
+ return (tsr);
+ return ((md_set_record *)NULL);
+}
+
+int
+set_snarf(md_error_t *ep)
+{
+ md_set_record *sr;
+ md_mnset_record *mnsr;
+ md_set_record *tsr;
+ md_drive_record *dr;
+ mddb_userreq_t *reqp;
+ ur_recid_lst_t *urlp;
+ mddb_recid_t id;
+ mddb_recid_t *p;
+ md_error_t xep = mdnullerror;
+ md_mnnode_record *nr;
+ mddb_set_node_params_t snp;
+ int nodecnt;
+ mndiskset_membershiplist_t *nl, *nl2;
+
+ /* We have done the snarf call */
+ if (setsnarfdone != 0)
+ return (0);
+
+ if (meta_setup_db_locations(ep) != 0) {
+ if (! mdismddberror(ep, MDE_DB_STALE))
+ return (-1);
+ mdclrerror(ep);
+ }
+
+ /*
+ * Get membershiplist from API routine.
+ * If there's an error, just use a NULL
+ * nodelist.
+ */
+ if (meta_read_nodelist(&nodecnt, &nl, ep) == -1) {
+ nodecnt = 0; /* no nodes are alive */
+ nl = NULL;
+ mdclrerror(ep);
+ }
+
+ /* Let sr_cache_add and dr_cache_add know we are doing the snarf */
+ setsnarfdone = 1;
+
+ /* Go get the set records */
+ id = 0;
+ while ((sr = get_ur_rec(MD_LOCAL_SET, MD_UR_GET_NEXT, MDDB_UR_SR,
+ &id, ep)) != NULL) {
+ sr->sr_next = NULL;
+ sr->sr_drivechain = NULL;
+
+ /*
+ * Cluster nodename support
+ * Convert nodeid -> nodename
+ * Don't do this for MN disksets since we've already stored
+ * both the nodeid and name.
+ */
+ if (!(MD_MNSET_REC(sr)))
+ sdssc_cm_sr_nid2nm(sr);
+
+ /* If we were mid-cvt, cleanup */
+ if (sr->sr_flags & MD_SR_CVT) {
+ /* If the daemon is calling, cleanup */
+ if (md_in_daemon)
+ url_addl(&url_tode, sr->sr_selfid);
+ continue;
+ }
+
+ if (md_in_daemon)
+ url_addl(&url_used, sr->sr_selfid);
+
+ /* Skip cached records */
+ tsr = sr_in_cache(sr->sr_selfid);
+ if (tsr != (md_set_record *)NULL) {
+ if (MD_MNSET_REC(sr)) {
+ mnsr = (struct md_mnset_record *)sr;
+ Free(mnsr);
+ } else {
+ Free(sr);
+ }
+ if (md_in_daemon)
+ for (dr = tsr->sr_drivechain;
+ dr != (md_drive_record *)NULL;
+ dr = dr->dr_next)
+ url_addl(&url_used, dr->dr_selfid);
+ continue;
+ }
+
+ /* Mark the record as one to be checked */
+ sr->sr_flags |= MD_SR_CHECK;
+
+ sr_cache_add(sr);
+
+ /* If MNdiskset, go get the node records */
+ if (MD_MNSET_REC(sr)) {
+ mnsr = (struct md_mnset_record *)sr;
+ mnsr->sr_nodechain = NULL;
+ p = &mnsr->sr_noderec;
+ while ((nr = get_ur_rec(MD_LOCAL_SET, MD_UR_GET_WKEY,
+ MDDB_UR_NR, p, ep)) != NULL) {
+ nr->nr_next = NULL;
+
+ if (md_in_daemon)
+ url_addl(&url_used, nr->nr_selfid);
+
+ /*
+ * Turn off ALIVE node flag based on member
+ * list.
+ * If ALIVE flag is not set, reset OWN flag.
+ * If this node is mynode, set the OWN flag
+ * to match the ownership of the diskset.
+ */
+ if (md_in_daemon) {
+ nr->nr_flags &= ~MD_MN_NODE_ALIVE;
+ nl2 = nl;
+ while (nl2) {
+ /*
+ * If in member list,
+ * set alive.
+ */
+ if (nl2->msl_node_id ==
+ nr->nr_nodeid) {
+ nr->nr_flags |=
+ MD_MN_NODE_ALIVE;
+ break;
+ }
+ nl2 = nl2->next;
+ }
+ /*
+ * If mynode is in member list, then
+ * check to see if set is snarfed.
+ * If set snarfed, set own flag;
+ * otherwise reset it.
+ * Don't change master even if
+ * node isn't an owner node, since
+ * node may be master, but hasn't
+ * joined the set yet.
+ */
+ if (nr->nr_flags & MD_MN_NODE_ALIVE) {
+ if (strcmp(nr->nr_nodename,
+ mynode()) == 0) {
+ if (s_ownset(
+ mnsr->sr_setno, ep)) {
+ nr->nr_flags |=
+ MD_MN_NODE_OWN;
+ } else {
+ nr->nr_flags &=
+ ~MD_MN_NODE_OWN;
+ }
+ }
+ } else {
+ if (strcmp(nr->nr_nodename,
+ mynode()) == 0) {
+ /*
+ * If my node isn't in member
+ * list then reset master.
+ */
+ mnsr = (struct
+ md_mnset_record *)sr;
+ mnsr->sr_master_nodeid =
+ MD_MN_INVALID_NID;
+ mnsr->sr_master_nodenm[0] =
+ '\0';
+ }
+ nr->nr_flags &= ~MD_MN_NODE_OWN;
+ }
+ }
+
+ /*
+ * Must grab nr_nextrec now since
+ * mnnr_cache_add may change it
+ * (mnnr_cache_add is storing the nodes in
+ * an ascending nodeid order list in order
+ * to support reconfig).
+ */
+ if (nr->nr_nextrec != 0)
+ p = &nr->nr_nextrec;
+ else
+ p = NULL;
+
+ mnnr_cache_add((struct md_mnset_record *)sr,
+ nr);
+
+ if ((md_in_daemon) &&
+ (strcmp(nr->nr_nodename, mynode()) == 0)) {
+ (void) memset(&snp, 0, sizeof (snp));
+ snp.sn_nodeid = nr->nr_nodeid;
+ snp.sn_setno = mnsr->sr_setno;
+ if (metaioctl(MD_MN_SET_NODEID, &snp,
+ &snp.sn_mde, NULL) != 0) {
+ (void) mdstealerror(ep,
+ &snp.sn_mde);
+ }
+ }
+
+ if (p == NULL)
+ break;
+ }
+ if (! mdisok(ep)) {
+ if (! mdissyserror(ep, ENOENT))
+ goto out;
+ mdclrerror(ep);
+ }
+ }
+
+ if (sr->sr_driverec == 0)
+ continue;
+
+ /* Go get the drive records */
+ p = &sr->sr_driverec;
+ while ((dr = get_ur_rec(MD_LOCAL_SET, MD_UR_GET_WKEY,
+ MDDB_UR_DR, p, ep)) != NULL) {
+ dr->dr_next = NULL;
+
+ if (md_in_daemon)
+ url_addl(&url_used, dr->dr_selfid);
+
+ dr_cache_add(sr, dr);
+
+ if (dr->dr_nextrec == 0)
+ break;
+
+ p = &dr->dr_nextrec;
+ }
+ if (! mdisok(ep)) {
+ if (! mdissyserror(ep, ENOENT))
+ goto out;
+ mdclrerror(ep);
+ /*
+ * If dr_nextrec was not valid, or we had some
+ * problem getting the record, we end up here.
+ * get_ur_rec() zeroes the recid we passed in,
+ * if we had a failure getting a record using a key,
+ * so we simply commit the set record and valid
+ * drive records, if this fails, we hand an error
+ * back to the caller.
+ */
+ commitset(sr, FALSE, ep);
+ if (! mdisok(ep))
+ goto out;
+ }
+ }
+ if (! mdisok(ep)) {
+ if (! mdissyserror(ep, ENOENT))
+ goto out;
+ mdclrerror(ep);
+ }
+
+ /*
+ * If the daemon called, go through the USER records and cleanup
+ * any that are not used by valid sets.
+ */
+ if (md_in_daemon) {
+ id = 0;
+ /* Make a list of records to delete */
+ while ((reqp = get_db_rec(MD_UR_GET_NEXT, MD_LOCAL_SET,
+ MDDB_USER, 0, &id, ep)) != NULL) {
+ if (reqp->ur_type2 != MDDB_UR_SR &&
+ reqp->ur_type2 != MDDB_UR_DR) {
+ Free((void *)reqp->ur_data);
+ Free(reqp);
+ continue;
+ }
+ if (! url_findl(url_used, reqp->ur_recid))
+ url_addl(&url_tode, reqp->ur_recid);
+ Free((void *)reqp->ur_data);
+ Free(reqp);
+ }
+ if (! mdisok(ep)) {
+ if (! mdissyserror(ep, ENOENT))
+ goto out;
+ mdclrerror(ep);
+ }
+
+ /* Delete all the delete listed records */
+ for (urlp = url_tode; urlp != NULL; urlp = urlp->url_nx) {
+ s_delrec(urlp->url_recid, &xep);
+ if (! mdisok(&xep))
+ mdclrerror(&xep);
+ }
+ }
+
+ url_freel(&url_used);
+ url_freel(&url_tode);
+
+ if (nodecnt)
+ meta_free_nodelist(nl);
+
+ /* Mark the snarf complete */
+ setsnarfdone = 2;
+ return (0);
+
+out:
+ url_freel(&url_used);
+ url_freel(&url_tode);
+
+ sr_cache_flush(1);
+
+ if (nodecnt)
+ meta_free_nodelist(nl);
+
+ /* Snarf failed, reset state */
+ setsnarfdone = 0;
+
+ return (-1);
+}
+
+void
+sr_cache_add(md_set_record *sr)
+{
+ md_set_record *tsr;
+
+ assert(setsnarfdone != 0);
+
+ if (setrecords == NULL) {
+ setrecords = sr;
+ return;
+ }
+
+ for (tsr = setrecords; tsr->sr_next != NULL; tsr = tsr->sr_next)
+ /* void */;
+ tsr->sr_next = sr;
+}
+
+void
+sr_cache_del(mddb_recid_t recid)
+{
+ md_set_record *sr, *tsr;
+ md_mnset_record *mnsr;
+
+ assert(setsnarfdone != 0);
+
+ for (sr = tsr = setrecords; sr != NULL; tsr = sr, sr = sr->sr_next) {
+ if (sr->sr_selfid != recid)
+ continue;
+ if (sr == setrecords)
+ setrecords = sr->sr_next;
+ else
+ tsr->sr_next = sr->sr_next;
+ if (MD_MNSET_REC(sr)) {
+ mnsr = (struct md_mnset_record *)sr;
+ Free(mnsr);
+ } else {
+ Free(sr);
+ }
+ break;
+ }
+ if (setrecords == NULL)
+ setsnarfdone = 0;
+}
+
+void
+dr_cache_add(md_set_record *sr, md_drive_record *dr)
+{
+ md_drive_record *tdr;
+
+ assert(setsnarfdone != 0);
+
+ assert(sr != NULL);
+
+ if (sr->sr_drivechain == NULL) {
+ sr->sr_drivechain = dr;
+ sr->sr_driverec = dr->dr_selfid;
+ return;
+ }
+
+ for (tdr = sr->sr_drivechain; tdr->dr_next != NULL; tdr = tdr->dr_next)
+ /* void */;
+
+ tdr->dr_next = dr;
+ tdr->dr_nextrec = dr->dr_selfid;
+}
+
+void
+dr_cache_del(md_set_record *sr, mddb_recid_t recid)
+{
+ md_drive_record *dr;
+ md_drive_record *tdr;
+
+ assert(setsnarfdone != 0);
+
+ assert(sr != NULL);
+
+ for (dr = tdr = sr->sr_drivechain; dr != NULL;
+ tdr = dr, dr = dr->dr_next) {
+ if (dr->dr_selfid != recid)
+ continue;
+
+ if (dr == sr->sr_drivechain) {
+ sr->sr_drivechain = dr->dr_next;
+ sr->sr_driverec = dr->dr_nextrec;
+ } else {
+ tdr->dr_next = dr->dr_next;
+ tdr->dr_nextrec = dr->dr_nextrec;
+ }
+ Free(dr);
+ break;
+ }
+}
+
+/*
+ * Nodes must be kept in ascending node id order in order to
+ * support reconfig.
+ *
+ * This routine may change nr->nr_next and nr->nr_nextrec.
+ */
+void
+mnnr_cache_add(md_mnset_record *mnsr, md_mnnode_record *nr)
+{
+ md_mnnode_record *tnr, *tnr_prev;
+
+ assert(mnsr != NULL);
+
+ if (mnsr->sr_nodechain == NULL) {
+ mnsr->sr_nodechain = nr;
+ mnsr->sr_noderec = nr->nr_selfid;
+ return;
+ }
+
+ /*
+ * If new_record->nodeid < first_record->nodeid,
+ * put new_record at beginning of list.
+ */
+ if (nr->nr_nodeid < mnsr->sr_nodechain->nr_nodeid) {
+ nr->nr_next = mnsr->sr_nodechain;
+ nr->nr_nextrec = mnsr->sr_noderec;
+ mnsr->sr_nodechain = nr;
+ mnsr->sr_noderec = nr->nr_selfid;
+ return;
+ }
+
+ /*
+ * Walk list looking for place to insert record.
+ */
+
+ tnr_prev = mnsr->sr_nodechain;
+ tnr = tnr_prev->nr_next;
+ while (tnr) {
+ /* Insert new record between tnr_prev and tnr */
+ if (nr->nr_nodeid < tnr->nr_nodeid) {
+ nr->nr_next = tnr;
+ nr->nr_nextrec = tnr->nr_selfid; /* tnr's recid */
+ tnr_prev->nr_next = nr;
+ tnr_prev->nr_nextrec = nr->nr_selfid;
+ return;
+ }
+ tnr_prev = tnr;
+ tnr = tnr->nr_next;
+ }
+
+ /*
+ * Add record to end of list.
+ */
+ tnr_prev->nr_next = nr;
+ tnr_prev->nr_nextrec = nr->nr_selfid;
+}
+
+void
+mnnr_cache_del(md_mnset_record *mnsr, mddb_recid_t recid)
+{
+ md_mnnode_record *nr;
+ md_mnnode_record *tnr;
+
+ assert(mnsr != NULL);
+
+ tnr = 0;
+ nr = mnsr->sr_nodechain;
+ while (nr) {
+ if (nr->nr_selfid != recid) {
+ tnr = nr;
+ nr = nr->nr_next;
+ continue;
+ }
+
+ if (nr == mnsr->sr_nodechain) {
+ mnsr->sr_nodechain = nr->nr_next;
+ mnsr->sr_noderec = nr->nr_nextrec;
+ } else {
+ tnr->nr_next = nr->nr_next;
+ tnr->nr_nextrec = nr->nr_nextrec;
+ }
+ Free(nr);
+ break;
+ }
+}
+
+int
+metad_isautotakebyname(char *setname)
+{
+ md_error_t error = mdnullerror;
+ md_set_record *sr;
+
+ if (md_in_daemon)
+ assert(setsnarfdone != 0);
+ else if (set_snarf(&error)) {
+ mdclrerror(&error);
+ return (0);
+ }
+
+ for (sr = setrecords; sr != NULL; sr = sr->sr_next) {
+ if (strcmp(setname, sr->sr_setname) == 0) {
+ if (sr->sr_flags & MD_SR_AUTO_TAKE)
+ return (1);
+ return (0);
+ }
+ }
+
+ return (0);
+}
+
+int
+metad_isautotakebynum(set_t setno)
+{
+ md_error_t error = mdnullerror;
+ md_set_record *sr;
+
+ if (md_in_daemon)
+ assert(setsnarfdone != 0);
+ else if (set_snarf(&error)) {
+ mdclrerror(&error);
+ return (0);
+ }
+
+ for (sr = setrecords; sr != NULL; sr = sr->sr_next) {
+ if (setno == sr->sr_setno) {
+ if (sr->sr_flags & MD_SR_AUTO_TAKE)
+ return (1);
+ return (0);
+ }
+ }
+
+ return (0);
+}
+
+md_set_record *
+metad_getsetbyname(char *setname, md_error_t *ep)
+{
+ md_set_record *sr;
+ char buf[100];
+
+ assert(setsnarfdone != 0);
+
+ for (sr = setrecords; sr != NULL; sr = sr->sr_next)
+ if (strcmp(setname, sr->sr_setname) == 0)
+ return (sr);
+
+ (void) snprintf(buf, sizeof (buf), "setname \"%s\"", setname);
+ (void) mderror(ep, MDE_NO_SET, buf);
+ return (NULL);
+}
+
+md_set_record *
+metad_getsetbynum(set_t setno, md_error_t *ep)
+{
+ md_set_record *sr;
+ char buf[100];
+
+ if (md_in_daemon)
+ assert(setsnarfdone != 0);
+ else if (set_snarf(ep)) /* BYPASS DAEMON mode */
+ return (NULL);
+
+ for (sr = setrecords; sr != NULL; sr = sr->sr_next)
+ if (setno == sr->sr_setno)
+ return (sr);
+
+ (void) sprintf(buf, "setno %u", setno);
+ (void) mderror(ep, MDE_NO_SET, buf);
+ return (NULL);
+}
+
+
+/*
+ * Commit the set record and all of its associated records
+ * (drive records, node records for a MNset) to the local mddb.
+ */
+void
+commitset(md_set_record *sr, int inc_genid, md_error_t *ep)
+{
+ int drc, nrc, rc;
+ int *recs;
+ uint_t size;
+ md_drive_record *dr;
+ mddb_userreq_t req;
+ md_mnset_record *mnsr;
+ md_mnnode_record *nr;
+
+ assert(setsnarfdone != 0);
+
+ /*
+ * Cluster nodename support
+ * Convert nodename -> nodeid
+ * Don't do this for MN disksets since we've already stored
+ * both the nodeid and name.
+ */
+ if (!(MD_MNSET_REC(sr)))
+ sdssc_cm_sr_nm2nid(sr);
+
+ /* Send down to kernel the data in mddb USER set record */
+ if (inc_genid)
+ sr->sr_genid++;
+ (void) memset(&req, 0, sizeof (req));
+ METAD_SETUP_SR(MD_DB_SETDATA, sr->sr_selfid)
+ if (MD_MNSET_REC(sr)) {
+ req.ur_size = sizeof (*mnsr);
+ } else {
+ req.ur_size = sizeof (*sr);
+ }
+ req.ur_data = (uintptr_t)sr;
+ if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde, NULL) != 0) {
+ (void) mdstealerror(ep, &req.ur_mde);
+ return;
+ }
+
+ /*
+ * Walk through the drive records associated with this set record
+ * and send down to kernel the data in mddb USER drive record.
+ */
+ drc = 0;
+ dr = sr->sr_drivechain;
+ while (dr) {
+ if (inc_genid)
+ dr->dr_genid++;
+ METAD_SETUP_DR(MD_DB_SETDATA, dr->dr_selfid)
+ req.ur_size = sizeof (*dr);
+ req.ur_data = (uintptr_t)dr;
+ if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde, NULL) != 0) {
+ (void) mdstealerror(ep, &req.ur_mde);
+ return;
+ }
+ drc++;
+ dr = dr->dr_next;
+ }
+
+
+ /*
+ * If this set is a multi-node set -
+ * walk through the node records associated with this set record
+ * and send down to kernel the data in mddb USER node record.
+ */
+ nrc = 0;
+ if (MD_MNSET_REC(sr)) {
+ mnsr = (struct md_mnset_record *)sr;
+ nr = mnsr->sr_nodechain;
+ while (nr) {
+ if (inc_genid)
+ nr->nr_genid++;
+ METAD_SETUP_NR(MD_DB_SETDATA, nr->nr_selfid)
+ req.ur_size = sizeof (*nr);
+ req.ur_data = (uint64_t)nr;
+ if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde, NULL)
+ != 0) {
+ (void) mdstealerror(ep, &req.ur_mde);
+ return;
+ }
+ nrc++;
+ nr = nr->nr_next;
+ }
+ }
+
+ /*
+ * Set up list of mddb USER recids containing set and drive records
+ * and node records if a MNset.
+ */
+ rc = 0;
+ size = (nrc + drc + 2) * sizeof (int);
+ recs = Zalloc(size);
+ /* First recid in list is the set record's id */
+ recs[rc] = sr->sr_selfid;
+ rc++;
+ dr = sr->sr_drivechain;
+ while (dr) {
+ /* Now, fill in the drive record ids */
+ recs[rc] = dr->dr_selfid;
+ dr = dr->dr_next;
+ rc++;
+ }
+ if (MD_MNSET_REC(sr)) {
+ nr = mnsr->sr_nodechain;
+ while (nr) {
+ /* If a MNset, fill in the node record ids */
+ recs[rc] = nr->nr_selfid;
+ nr = nr->nr_next;
+ rc++;
+ }
+ }
+ /* Set last record to null recid */
+ recs[rc] = 0;
+
+ /* Write out the set and drive and node records to the local mddb */
+ METAD_SETUP_UR(MD_DB_COMMIT_MANY, 0, 0);
+ req.ur_size = size;
+ req.ur_data = (uintptr_t)recs;
+ if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde, NULL) != 0) {
+ (void) mdstealerror(ep, &req.ur_mde);
+ return;
+ }
+
+ /*
+ * Cluster nodename support
+ * Convert nodeid -> nodename
+ * Don't do this for MN disksets since we've already stored
+ * both the nodeid and name.
+ */
+ if (!(MD_MNSET_REC(sr)))
+ sdssc_cm_sr_nid2nm(sr);
+
+ Free(recs);
+}
+
+/*
+ * This routine only handles returns a md_set_record structure even
+ * if the set record describes a MN set. This will allow pre-MN
+ * SVM RPC code to access a MN set record and to display it.
+ *
+ * The MN SVM RPC code detects if the set record returned describes
+ * a MN set and then will copy it using mnsetdup.
+ */
+md_set_record *
+setdup(md_set_record *sr)
+{
+ md_set_record *tsr = NULL;
+ md_drive_record **tdrpp = NULL;
+
+ if (sr && (tsr = Malloc(sizeof (*sr))) != NULL) {
+ (void) memmove(tsr, sr, sizeof (*sr));
+ tsr->sr_next = NULL;
+ tdrpp = &tsr->sr_drivechain;
+ while (*tdrpp) {
+ *tdrpp = drdup(*tdrpp);
+ tdrpp = &(*tdrpp)->dr_next;
+ }
+ }
+ return (tsr);
+}
+
+/*
+ * This routine only copies MN set records. If a non-MN set
+ * record was passed in NULL pointer will be returned.
+ */
+md_mnset_record *
+mnsetdup(md_mnset_record *mnsr)
+{
+ md_mnset_record *tmnsr = NULL;
+ md_drive_record **tdrpp = NULL;
+ md_mnnode_record **tnrpp = NULL;
+
+ if (!MD_MNSET_REC(mnsr)) {
+ return (NULL);
+ }
+
+ if (mnsr && (tmnsr = Malloc(sizeof (*mnsr))) != NULL) {
+ (void) memmove(tmnsr, mnsr, sizeof (*mnsr));
+ tmnsr->sr_next = NULL;
+ tdrpp = &tmnsr->sr_drivechain;
+ while (*tdrpp) {
+ *tdrpp = drdup(*tdrpp);
+ tdrpp = &(*tdrpp)->dr_next;
+ }
+ tnrpp = &tmnsr->sr_nodechain;
+ while (*tnrpp) {
+ *tnrpp = nrdup(*tnrpp);
+ tnrpp = &(*tnrpp)->nr_next;
+ }
+ }
+ return (tmnsr);
+}
+
+md_drive_record *
+drdup(md_drive_record *dr)
+{
+ md_drive_record *tdr = NULL;
+
+ if (dr && (tdr = Malloc(sizeof (*dr))) != NULL)
+ (void) memmove(tdr, dr, sizeof (*dr));
+ return (tdr);
+}
+
+md_mnnode_record *
+nrdup(md_mnnode_record *nr)
+{
+ md_mnnode_record *tnr = NULL;
+
+ if (nr && (tnr = Malloc(sizeof (*nr))) != NULL)
+ (void) memmove(tnr, nr, sizeof (*nr));
+ return (tnr);
+}
+
+/*
+ * Duplicate parts of the drive decriptor list for this node.
+ * Only duplicate the drive name string in the mddrivename structure, don't
+ * need to copy any other pointers since only interested in the flags and
+ * the drive name (i.e. other pointers will be set to NULL).
+ * Returns NULL if failure due to Malloc failure.
+ * Returns pointer (non-NULL) to dup'd list if successful.
+ */
+md_drive_desc *
+dd_list_dup(md_drive_desc *dd)
+{
+ md_drive_desc *orig_dd;
+ md_drive_desc *copy_dd = NULL, *copy_dd_prev = NULL;
+ md_drive_desc *copy_dd_head = NULL;
+ mddrivename_t *copy_dnp;
+ char *copy_cname;
+ char *copy_devid;
+
+ if (dd == NULL)
+ return (NULL);
+
+ orig_dd = dd;
+
+ while (orig_dd) {
+ copy_dd = Zalloc(sizeof (*copy_dd));
+ copy_dnp = Zalloc(sizeof (mddrivename_t));
+ copy_cname = Zalloc(sizeof (orig_dd->dd_dnp->cname));
+ if (orig_dd->dd_dnp->devid) {
+ copy_devid = Zalloc(sizeof (orig_dd->dd_dnp->devid));
+ } else {
+ copy_devid = NULL;
+ }
+ copy_dd->dd_next = NULL;
+ if ((copy_dd == NULL) || (copy_dnp == NULL) ||
+ (copy_cname == NULL)) {
+ while (copy_dd_head) {
+ copy_dd = copy_dd_head->dd_next;
+ Free(copy_dd_head);
+ copy_dd_head = copy_dd;
+ }
+ if (copy_dnp)
+ Free(copy_dnp);
+ if (copy_dd)
+ Free(copy_dd);
+ if (copy_cname)
+ Free(copy_cname);
+ if (copy_devid)
+ Free(copy_devid);
+ return (NULL);
+ }
+ (void) memmove(copy_dd, orig_dd, sizeof (*orig_dd));
+ (void) strlcpy(copy_cname, orig_dd->dd_dnp->cname,
+ sizeof (orig_dd->dd_dnp->cname));
+ copy_dd->dd_next = NULL;
+ copy_dd->dd_dnp = copy_dnp;
+ copy_dd->dd_dnp->cname = copy_cname;
+ if (copy_devid) {
+ (void) strlcpy(copy_devid, orig_dd->dd_dnp->devid,
+ sizeof (orig_dd->dd_dnp->devid));
+ }
+
+ if (copy_dd_prev == NULL) {
+ copy_dd_head = copy_dd;
+ copy_dd_prev = copy_dd;
+ } else {
+ copy_dd_prev->dd_next = copy_dd;
+ copy_dd_prev = copy_dd;
+ }
+ orig_dd = orig_dd->dd_next;
+ }
+ copy_dd->dd_next = NULL;
+ return (copy_dd_head);
+}
+
+void
+sr_cache_flush(int flushnames)
+{
+ md_set_record *sr, *tsr;
+ md_mnset_record *mnsr;
+ md_drive_record *dr, *tdr;
+ md_mnnode_record *nr, *tnr;
+
+ sr = tsr = setrecords;
+ while (sr != NULL) {
+ dr = tdr = sr->sr_drivechain;
+ while (dr != NULL) {
+ tdr = dr;
+ dr = dr->dr_next;
+ Free(tdr);
+ }
+ tsr = sr;
+ sr = sr->sr_next;
+ if (MD_MNSET_REC(tsr)) {
+ mnsr = (struct md_mnset_record *)tsr;
+ nr = tnr = mnsr->sr_nodechain;
+ while (nr != NULL) {
+ tnr = nr;
+ nr = nr->nr_next;
+ Free(tnr);
+ }
+ Free(mnsr);
+ } else {
+ Free(tsr);
+ }
+ }
+
+ setrecords = NULL;
+
+ setsnarfdone = 0;
+
+ /* This will cause the other caches to be cleared */
+ if (flushnames)
+ metaflushnames(0);
+}
+
+void
+sr_cache_flush_setno(set_t setno)
+{
+ md_set_record *sr, *tsr;
+ md_mnset_record *mnsr;
+ md_drive_record *dr, *tdr;
+
+ assert(setsnarfdone != 0);
+
+ for (sr = tsr = setrecords; sr; tsr = sr, sr = sr->sr_next) {
+ if (sr->sr_setno != setno)
+ continue;
+
+ dr = tdr = sr->sr_drivechain;
+ while (dr != NULL) {
+ tdr = dr;
+ dr = dr->dr_next;
+ Free(tdr);
+ }
+ if (sr == setrecords)
+ setrecords = sr->sr_next;
+ else
+ tsr->sr_next = sr->sr_next;
+ if (MD_MNSET_REC(sr)) {
+ mnsr = (struct md_mnset_record *)sr;
+ Free(mnsr);
+ } else {
+ Free(sr);
+ }
+ break;
+ }
+
+ setsnarfdone = 0;
+
+ /* This will cause the other caches to be cleared */
+ metaflushnames(0);
+}
+
+int
+s_ownset(set_t setno, md_error_t *ep)
+{
+ mddb_ownset_t ownset_arg;
+
+ ownset_arg.setno = setno;
+ ownset_arg.owns_set = MD_SETOWNER_NONE;
+
+ if (metaioctl(MD_DB_OWNSET, &ownset_arg, ep, NULL) != 0)
+ return (0);
+
+ return (ownset_arg.owns_set);
+}
+
+void
+s_delset(char *setname, md_error_t *ep)
+{
+ md_set_record *sr;
+ md_set_record *tsr;
+ md_drive_record *dr;
+ md_drive_record *tdr;
+ md_mnnode_record *nr, *tnr;
+ mddb_userreq_t req;
+ char stringbuf[100];
+ int i;
+ mdsetname_t *sp = NULL;
+ mddrivename_t *dn = NULL;
+ mdname_t *np = NULL;
+ md_dev64_t dev;
+ side_t myside = MD_SIDEWILD;
+ md_error_t xep = mdnullerror;
+ md_mnset_record *mnsr;
+ int num_sets = 0;
+ int num_mn_sets = 0;
+
+ (void) memset(&req, 0, sizeof (mddb_userreq_t));
+
+ if ((sr = getsetbyname(setname, ep)) == NULL)
+ return;
+
+ sp = metasetnosetname(sr->sr_setno, &xep);
+ mdclrerror(&xep);
+
+ if (MD_MNSET_REC(sr)) {
+ /*
+ * If this node is a set owner, halt the set before
+ * deleting the set records. Ignore any errors since
+ * s_ownset and halt_set could fail if panic had occurred
+ * during the add/delete of a node.
+ */
+ if (s_ownset(sr->sr_setno, &xep)) {
+ mdclrerror(&xep);
+ if (halt_set(sp, &xep))
+ mdclrerror(&xep);
+ }
+ }
+
+ (void) snprintf(stringbuf, sizeof (stringbuf), "/dev/md/%s", setname);
+ (void) unlink(stringbuf);
+ (void) unlink(meta_lock_name(sr->sr_setno));
+
+ if (MD_MNSET_REC(sr)) {
+ mnsr = (struct md_mnset_record *)sr;
+ nr = mnsr->sr_nodechain;
+ while (nr) {
+ /* Setting myside for later use */
+ if (strcmp(mynode(), nr->nr_nodename) == 0)
+ myside = nr->nr_nodeid;
+
+ (void) memset(&req, 0, sizeof (req));
+ METAD_SETUP_NR(MD_DB_DELETE, nr->nr_selfid)
+ if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde,
+ NULL) != 0) {
+ (void) mdstealerror(ep, &req.ur_mde);
+ free_sr(sr);
+ return;
+ }
+ tnr = nr;
+ nr = nr->nr_next;
+
+ SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_REMOVE, SVM_TAG_HOST,
+ sr->sr_setno, tnr->nr_nodeid);
+
+ mnnr_cache_del((struct md_mnset_record *)sr,
+ tnr->nr_selfid);
+ }
+ } else {
+ for (i = 0; i < MD_MAXSIDES; i++) {
+ /* Skip empty slots */
+ if (sr->sr_nodes[i][0] == '\0')
+ continue;
+
+ if (strcmp(mynode(), sr->sr_nodes[i]) == 0)
+ myside = i;
+
+ SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_REMOVE, SVM_TAG_HOST,
+ sr->sr_setno, i);
+ }
+ }
+
+ dr = sr->sr_drivechain;
+ while (dr) {
+ (void) memset(&req, 0, sizeof (req));
+ METAD_SETUP_DR(MD_DB_DELETE, dr->dr_selfid)
+ if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde, NULL) != 0) {
+ (void) mdstealerror(ep, &req.ur_mde);
+ free_sr(sr);
+ return;
+ }
+ tdr = dr;
+ dr = dr->dr_next;
+
+ dev = NODEV64;
+ if (myside != MD_SIDEWILD && sp != NULL) {
+ dn = metadrivename_withdrkey(sp, myside,
+ tdr->dr_key, MD_BASICNAME_OK, &xep);
+ if (dn != NULL) {
+ uint_t rep_slice;
+
+ np = NULL;
+ if (meta_replicaslice(dn, &rep_slice,
+ &xep) == 0) {
+ np = metaslicename(dn, rep_slice, &xep);
+ }
+
+ if (np != NULL)
+ dev = np->dev;
+ else
+ mdclrerror(&xep);
+ } else
+ mdclrerror(&xep);
+ } else
+ mdclrerror(&xep);
+
+ SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_REMOVE, SVM_TAG_DRIVE,
+ sr->sr_setno, dev);
+ SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_ADD, SVM_TAG_DRIVE,
+ MD_LOCAL_SET, dev);
+
+ dr_cache_del(sr, tdr->dr_selfid);
+
+ }
+
+ (void) memset(&req, 0, sizeof (req));
+ METAD_SETUP_SR(MD_DB_DELETE, sr->sr_selfid)
+ if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde, NULL) != 0) {
+ (void) mdstealerror(ep, &req.ur_mde);
+ free_sr(sr);
+ return;
+ }
+
+ SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_DELETE, SVM_TAG_SET, sr->sr_setno,
+ NODEV64);
+
+ for (tsr = setrecords; tsr; tsr = tsr->sr_next) {
+ if (tsr == sr)
+ continue;
+
+ num_sets++;
+ if (MD_MNSET_REC(tsr))
+ num_mn_sets++;
+ }
+
+ if (num_mn_sets == 0)
+ (void) meta_smf_disable(META_SMF_MN_DISKSET, &xep);
+
+ /* The set we just deleted is the only one left */
+ if (num_sets == 0)
+ (void) meta_smf_disable(META_SMF_DISKSET, &xep);
+
+ sr_cache_del(sr->sr_selfid);
+ free_sr(sr);
+
+}
+
+void
+s_delrec(mddb_recid_t recid, md_error_t *ep)
+{
+ mddb_userreq_t req;
+
+ (void) memset(&req, 0, sizeof (req));
+
+ METAD_SETUP_SR(MD_DB_DELETE, recid)
+
+ if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde, NULL) != 0)
+ (void) mdstealerror(ep, &req.ur_mde);
+}
+
+/*
+ * resnarf the imported set
+ */
+int
+resnarf_set(
+ set_t setno,
+ md_error_t *ep
+)
+{
+ md_set_record *sr;
+ md_drive_record *dr;
+ mddb_recid_t id, *p;
+
+ if (meta_setup_db_locations(ep) != 0) {
+ if (! mdismddberror(ep, MDE_DB_STALE))
+ return (-1);
+ mdclrerror(ep);
+ }
+
+ setsnarfdone = 1;
+
+ id = 0;
+ while ((sr = get_ur_rec(MD_LOCAL_SET, MD_UR_GET_NEXT, MDDB_UR_SR, &id,
+ ep)) != NULL) {
+
+ if (sr->sr_setno != setno)
+ continue;
+
+ /* Don't allow resnarf of a multi-node diskset */
+ if (MD_MNSET_REC(sr))
+ goto out;
+
+ sr->sr_next = NULL;
+ sr->sr_drivechain = NULL;
+
+ if (md_in_daemon)
+ url_addl(&url_used, sr->sr_selfid);
+
+ sr->sr_flags |= MD_SR_CHECK;
+
+ sr_cache_add(sr);
+
+ if (sr->sr_driverec == 0)
+ break;
+
+ p = &sr->sr_driverec;
+ while ((dr = get_ur_rec(MD_LOCAL_SET, MD_UR_GET_WKEY,
+ MDDB_UR_DR, p, ep)) != NULL) {
+ dr->dr_next = NULL;
+
+ if (md_in_daemon)
+ url_addl(&url_used, dr->dr_selfid);
+
+ dr_cache_add(sr, dr);
+
+ if (dr->dr_nextrec == 0)
+ break;
+
+ p = &dr->dr_nextrec;
+ }
+ if (! mdisok(ep)) {
+ if (! mdissyserror(ep, ENOENT))
+ goto out;
+ mdclrerror(ep);
+ commitset(sr, FALSE, ep);
+ if (! mdisok(ep))
+ goto out;
+ }
+ }
+ if (! mdisok(ep)) {
+ if (! mdissyserror(ep, ENOENT))
+ goto out;
+ mdclrerror(ep);
+ }
+
+ setsnarfdone = 2;
+
+ url_freel(&url_used);
+ url_freel(&url_tode);
+ return (0);
+
+out:
+ url_freel(&url_used);
+ url_freel(&url_tode);
+
+ sr_cache_flush(1);
+
+ setsnarfdone = 0;
+
+ return (-1);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_mh.c b/usr/src/lib/lvm/libmeta/common/meta_mh.c
new file mode 100644
index 0000000000..ba0ce10656
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_mh.c
@@ -0,0 +1,842 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * Just in case we're not in a build environment, make sure that
+ * TEXT_DOMAIN gets set to something.
+ */
+#if !defined(TEXT_DOMAIN)
+#define TEXT_DOMAIN "SYS_TEST"
+#endif
+
+/*
+ * MH ioctl functions
+ */
+
+#include <meta.h>
+#include <metamhd.h>
+#include <string.h>
+
+#include "meta_runtime.h"
+
+#define DEFAULTDEV "/dev/rdsk"
+/*
+ * default timeout values
+ */
+mhd_mhiargs_t defmhiargs = {
+ 1000, /* failfast */
+ { 6000, 6000, 30000 } /* take ownership */
+};
+
+/* RPC timeouts */
+static md_timeval32_t tk_own_timeout = { 24 * 60 * 60, 0 }; /* 1 day */
+static md_timeval32_t rel_own_timeout = { 24 * 60 * 60, 0 }; /* 1 day */
+
+/*
+ * RPC handle
+ */
+typedef struct {
+ char *hostname;
+ CLIENT *clientp;
+} mhd_handle_t;
+
+/*
+ * close RPC connection
+ */
+static void
+close_metamhd(
+ mhd_handle_t *hp
+)
+{
+ assert(hp != NULL);
+ if (hp->hostname != NULL) {
+ Free(hp->hostname);
+ }
+ if (hp->clientp != NULL) {
+ auth_destroy(hp->clientp->cl_auth);
+ clnt_destroy(hp->clientp);
+ }
+ Free(hp);
+}
+
+/*
+ * open RPC connection to rpc.metamhd
+ */
+static mhd_handle_t *
+open_metamhd(
+ char *hostname,
+ md_error_t *ep
+)
+{
+ CLIENT *clientp;
+ mhd_handle_t *hp;
+
+ /* default to local host */
+ if ((hostname == NULL) || (*hostname == '\0'))
+ hostname = mynode();
+
+ /* open RPC connection */
+ assert(hostname != NULL);
+ if ((clientp = meta_client_create(hostname, METAMHD, METAMHD_VERSION,
+ "tcp")) == NULL) {
+ clnt_pcreateerror(hostname);
+ (void) mdrpccreateerror(ep, hostname, "metamhd clnt_create");
+ return (NULL);
+ } else {
+ auth_destroy(clientp->cl_auth);
+ clientp->cl_auth = authsys_create_default();
+ assert(clientp->cl_auth != NULL);
+ }
+
+ /* return connection */
+ hp = Zalloc(sizeof (*hp));
+ hp->hostname = Strdup(hostname);
+ hp->clientp = clientp;
+ return (hp);
+}
+
+/*
+ * steal and convert mherror_t
+ */
+int
+mhstealerror(
+ mhd_error_t *mhep,
+ md_error_t *ep
+)
+{
+ int rval = -1;
+
+ /* no error */
+ if (mhep->errnum == 0) {
+ /* assert(mhep->name == NULL); */
+ rval = 0;
+ goto out;
+ }
+
+ /* steal error */
+ switch (mhep->errnum) {
+ case MHD_E_MAJORITY:
+ (void) mderror(ep, MDE_TAKE_OWN, mhep->name);
+ break;
+ case MHD_E_RESERVED:
+ (void) mderror(ep, MDE_RESERVED, mhep->name);
+ break;
+ default:
+ (void) mdsyserror(ep, mhep->errnum, mhep->name);
+ break;
+ }
+
+ /* cleanup, return success */
+out:
+ if (mhep->name != NULL)
+ Free(mhep->name);
+ (void) memset(mhep, 0, sizeof (*mhep));
+ return (rval);
+}
+
+/*
+ * should we do MHIOCTLs ?
+ */
+static int
+do_mhioctl()
+{
+ if (getenv("MD_NOMHIOCTL") != NULL) {
+ (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
+ "NOT doing MH ioctls\n"));
+ (void) fflush(stderr);
+ return (0);
+ }
+ return (1);
+}
+
+/*
+ * take ownership of drives
+ */
+int
+meta_take_own(
+ char *sname,
+ mddrivenamelist_t *dnlp,
+ mhd_mhiargs_t *mhiargsp,
+ int partial_set,
+ md_error_t *ep
+)
+{
+ mddrivenamelist_t *p;
+ uint_t ndev = 0;
+ mhd_tkown_args_t args;
+ mhd_error_t mherror;
+ mhd_set_t *mhsp = &args.set;
+ uint_t i;
+ char *e;
+ mhd_handle_t *hp = NULL;
+ int rval = -1;
+
+ /*
+ * RFE 4126509. Check the runtime parameters to see if
+ * they're set to disable MHIOCTKOWN ioctl() operations
+ * on the disks. If so, return immediately without
+ * performing the operations.
+ */
+
+ if (do_owner_ioctls() == B_FALSE) {
+ return (0);
+ }
+
+ /* count drives, get set */
+ for (p = dnlp; (p != NULL); p = p->next)
+ ++ndev;
+ if (ndev == 0)
+ return (0);
+
+ /* initialize */
+ (void) memset(&args, 0, sizeof (args));
+ (void) memset(&mherror, 0, sizeof (mherror));
+
+ /* build arguments */
+ mhsp->setname = Strdup(sname);
+ mhsp->drives.drives_len = ndev;
+ mhsp->drives.drives_val
+ = Calloc(ndev, sizeof (*mhsp->drives.drives_val));
+ for (p = dnlp, i = 0; (i < ndev); p = p->next, ++i) {
+ mhsp->drives.drives_val[i] = Strdup(p->drivenamep->rname);
+ }
+ args.timeouts = *mhiargsp;
+ args.ff_mode = MHD_FF_DRIVER;
+ if (((e = getenv("MD_DEBUG")) != NULL) &&
+ ((e = strstr(e, "FAILFAST=")) != NULL) &&
+ ((e = strchr(e, '=')) != NULL)) {
+ ++e;
+ if (strcmp(e, "NONE") == 0)
+ args.ff_mode = MHD_FF_NONE;
+ else if (strcmp(e, "DRIVER") == 0)
+ args.ff_mode = MHD_FF_DRIVER;
+ else if (strcmp(e, "DEBUG") == 0)
+ args.ff_mode = MHD_FF_DEBUG;
+ else if (strcmp(e, "HALT") == 0)
+ args.ff_mode = MHD_FF_HALT;
+ else if (strcmp(e, "PANIC") == 0)
+ args.ff_mode = MHD_FF_PANIC;
+ }
+ if (partial_set)
+ args.options |= MHD_PARTIAL_SET;
+ if (((e = getenv("MD_DEBUG")) != NULL) &&
+ (strstr(e, "NOTHREAD") != NULL)) {
+ args.options |= MHD_SERIAL;
+ }
+
+ /* open connection */
+ if ((hp = open_metamhd(NULL, ep)) == NULL)
+ return (-1);
+ clnt_control(hp->clientp, CLSET_TIMEOUT, (char *)&tk_own_timeout);
+
+ /* take ownership */
+ if (mhd_tkown_1(&args, &mherror, hp->clientp) != RPC_SUCCESS) {
+ (void) mdrpcerror(ep, hp->clientp, hp->hostname,
+ "metamhd tkown");
+ } else if (mhstealerror(&mherror, ep) == 0) {
+ rval = 0; /* success */
+ }
+
+ /* cleanup, return success */
+out:
+ xdr_free(xdr_mhd_tkown_args_t, (char *)&args);
+ xdr_free(xdr_mhd_error_t, (char *)&mherror);
+ if (hp != NULL)
+ close_metamhd(hp);
+ return (rval);
+}
+
+/*
+ * take ownership of drives
+ */
+int
+tk_own_bydd(
+ mdsetname_t *sp,
+ md_drive_desc *ddlp,
+ mhd_mhiargs_t *mhiargsp,
+ int partial_set,
+ md_error_t *ep
+)
+{
+ mddrivenamelist_t *dnlp = NULL;
+ mddrivenamelist_t **tailpp = &dnlp;
+ md_drive_desc *p;
+ int rval;
+
+ /*
+ * Add the drivename struct to the end of the
+ * drivenamelist but keep a pointer to the last
+ * element so that we don't incur the overhead
+ * of traversing the list each time
+ */
+ for (p = ddlp; (p != NULL); p = p->dd_next)
+ tailpp = meta_drivenamelist_append_wrapper(tailpp, p->dd_dnp);
+
+ /* take ownership */
+ rval = meta_take_own(sp->setname, dnlp, mhiargsp, partial_set, ep);
+
+ /* cleanup, return success */
+ metafreedrivenamelist(dnlp);
+ return (rval);
+}
+
+/*
+ * release ownership of drives
+ */
+int
+meta_rel_own(
+ char *sname,
+ mddrivenamelist_t *dnlp,
+ int partial_set,
+ md_error_t *ep
+)
+{
+ mddrivenamelist_t *p;
+ uint_t ndev = 0;
+ mhd_relown_args_t args;
+ mhd_error_t mherror;
+ mhd_set_t *mhsp = &args.set;
+ uint_t i;
+ char *e;
+ mhd_handle_t *hp = NULL;
+ int rval = -1;
+
+ /*
+ * RFE 4126509. Check the runtime parameters to see if
+ * they're set to disable MHIOCRELEASE and MHIOCENFAILFAST
+ * ioctl() operations on the disks. If so, return
+ * immediately without performing the operations.
+ */
+
+ if (do_owner_ioctls() == B_FALSE) {
+ return (0);
+ }
+
+ /*
+ * if not doing ioctls (HK 98/10/28: the following code tests
+ * an environment variable, and was apparently inserted to
+ * make testing easier.)
+ */
+
+ if (! do_mhioctl())
+ return (0);
+
+ /* count drives, get set */
+ for (p = dnlp; (p != NULL); p = p->next)
+ ++ndev;
+ if (ndev == 0)
+ return (0);
+
+ /* initialize */
+ (void) memset(&args, 0, sizeof (args));
+ (void) memset(&mherror, 0, sizeof (mherror));
+
+ /* build arguments */
+ mhsp->setname = Strdup(sname);
+ mhsp->drives.drives_len = ndev;
+ mhsp->drives.drives_val
+ = Calloc(ndev, sizeof (*mhsp->drives.drives_val));
+ for (p = dnlp, i = 0; (i < ndev); p = p->next, ++i) {
+ mhsp->drives.drives_val[i] = Strdup(p->drivenamep->rname);
+ }
+ if (partial_set)
+ args.options |= MHD_PARTIAL_SET;
+ if (((e = getenv("MD_DEBUG")) != NULL) &&
+ (strstr(e, "NOTHREAD") != NULL)) {
+ args.options |= MHD_SERIAL;
+ }
+
+ /* open connection */
+ if ((hp = open_metamhd(NULL, ep)) == NULL)
+ return (-1);
+ clnt_control(hp->clientp, CLSET_TIMEOUT, (char *)&rel_own_timeout);
+
+ /* take ownership */
+ if (mhd_relown_1(&args, &mherror, hp->clientp) != RPC_SUCCESS) {
+ (void) mdrpcerror(ep, hp->clientp, hp->hostname,
+ "metamhd relown");
+ } else if (mhstealerror(&mherror, ep) == 0) {
+ rval = 0; /* success */
+ }
+
+ /* cleanup, return success */
+out:
+ xdr_free(xdr_mhd_relown_args_t, (char *)&args);
+ xdr_free(xdr_mhd_error_t, (char *)&mherror);
+ if (hp != NULL)
+ close_metamhd(hp);
+ return (rval);
+}
+
+/*
+ * release ownership of drives
+ */
+int
+rel_own_bydd(
+ mdsetname_t *sp,
+ md_drive_desc *ddlp,
+ int partial_set,
+ md_error_t *ep
+)
+{
+ mddrivenamelist_t *dnlp = NULL;
+ mddrivenamelist_t **tailpp = &dnlp;
+ md_drive_desc *p;
+ int rval;
+
+ /*
+ * Add the drivename struct to the end of the
+ * drivenamelist but keep a pointer to the last
+ * element so that we don't incur the overhead
+ * of traversing the list each time
+ */
+ for (p = ddlp; (p != NULL); p = p->dd_next)
+ tailpp = meta_drivenamelist_append_wrapper(tailpp, p->dd_dnp);
+
+ /* release ownership */
+ rval = meta_rel_own(sp->setname, dnlp, partial_set, ep);
+
+ /* cleanup, return success */
+ metafreedrivenamelist(dnlp);
+ return (rval);
+}
+
+/*
+ * get status of drives
+ */
+int
+meta_status_own(
+ char *sname,
+ md_disk_status_list_t *dslp,
+ int partial_set,
+ md_error_t *ep
+)
+{
+ md_disk_status_list_t *p;
+ uint_t ndev = 0;
+ mhd_status_args_t args;
+ mhd_status_res_t results;
+ mhd_error_t *mhep = &results.status;
+ mhd_set_t *mhsp = &args.set;
+ uint_t i;
+ char *e;
+ mhd_handle_t *hp = NULL;
+ int rval = -1;
+
+ /* if not doing ioctls */
+ if (! do_mhioctl())
+ return (0);
+
+ /* count drives, get set */
+ for (p = dslp; (p != NULL); p = p->next)
+ ++ndev;
+ if (ndev == 0)
+ return (0);
+
+ /* initialize */
+ (void) memset(&args, 0, sizeof (args));
+ (void) memset(&results, 0, sizeof (results));
+
+ /* build arguments */
+ mhsp->setname = Strdup(sname);
+ mhsp->drives.drives_len = ndev;
+ mhsp->drives.drives_val
+ = Calloc(ndev, sizeof (*mhsp->drives.drives_val));
+ for (p = dslp, i = 0; (i < ndev); p = p->next, ++i) {
+ mhsp->drives.drives_val[i] = Strdup(p->drivenamep->rname);
+ }
+ if (partial_set)
+ args.options |= MHD_PARTIAL_SET;
+ if (((e = getenv("MD_DEBUG")) != NULL) &&
+ (strstr(e, "NOTHREAD") != NULL)) {
+ args.options |= MHD_SERIAL;
+ }
+
+ /* open connection */
+ if ((hp = open_metamhd(NULL, ep)) == NULL)
+ return (-1);
+ clnt_control(hp->clientp, CLSET_TIMEOUT, (char *)&tk_own_timeout);
+
+ /* get status */
+ if (mhd_status_1(&args, &results, hp->clientp) != RPC_SUCCESS) {
+ (void) mdrpcerror(ep, hp->clientp, hp->hostname,
+ dgettext(TEXT_DOMAIN, "metamhd status"));
+ goto out;
+ } else if (mhstealerror(mhep, ep) != 0) {
+ goto out;
+ }
+
+ /* do something with it */
+ assert(results.results.results_len == ndev);
+ for (p = dslp, i = 0; (i < ndev); p = p->next, ++i) {
+ mhd_drive_status_t *resp = &results.results.results_val[i];
+ mddrivename_t *dp = p->drivenamep;
+ mhd_error_t mherror;
+
+ /* make sure we have the right drive */
+ assert(strcmp(dp->rname, resp->drive) == 0);
+
+ /* copy status */
+ if (resp->errnum != 0) {
+ (void) memset(&mherror, 0, sizeof (mherror));
+ mherror.errnum = resp->errnum;
+ mherror.name = Strdup(resp->drive);
+ (void) mhstealerror(&mherror, &p->status);
+ }
+ }
+ rval = 0; /* success */
+
+ /* cleanup, return success */
+out:
+ xdr_free(xdr_mhd_status_args_t, (char *)&args);
+ xdr_free(xdr_mhd_status_res_t, (char *)&results);
+ if (hp != NULL)
+ close_metamhd(hp);
+ return (rval);
+}
+
+/*
+ * build disk status list from drivename list
+ */
+md_disk_status_list_t *
+meta_drive_to_disk_status_list(
+ mddrivenamelist_t *dnlp
+)
+{
+ md_disk_status_list_t *head = NULL;
+ md_disk_status_list_t **tailp = &head;
+ mddrivenamelist_t *p;
+
+ /* copy list */
+ for (p = dnlp; (p != NULL); p = p->next) {
+ md_disk_status_list_t *dsp;
+
+ dsp = *tailp = Zalloc(sizeof (*dsp));
+ tailp = &dsp->next;
+ dsp->drivenamep = p->drivenamep;
+ }
+
+ /* return list */
+ return (head);
+}
+
+/*
+ * free disk status list
+ */
+void
+meta_free_disk_status_list(
+ md_disk_status_list_t *dslp
+)
+{
+ md_disk_status_list_t *next = NULL;
+
+ for (/* void */; (dslp != NULL); dslp = next) {
+ next = dslp->next;
+ mdclrerror(&dslp->status);
+ Free(dslp);
+ }
+}
+
+/*
+ * free drive info list
+ */
+void
+meta_free_drive_info_list(
+ mhd_drive_info_list_t *listp
+)
+{
+ xdr_free(xdr_mhd_drive_info_list_t, (char *)listp);
+ (void) memset(listp, 0, sizeof (*listp));
+}
+
+/*
+ * sort drive info list
+ */
+static int
+compare_drives(
+ const void *p1,
+ const void *p2
+)
+{
+ const mhd_drive_info_t *di1 = p1;
+ const mhd_drive_info_t *di2 = p2;
+ const char *n1 = di1->dif_name;
+ const char *n2 = di2->dif_name;
+ uint_t c1 = 0, t1 = 0, d1 = 0, s1 = 0;
+ uint_t c2 = 0, t2 = 0, d2 = 0, s2 = 0;
+ uint_t l, cl;
+
+ if (n1 == NULL)
+ n1 = "";
+ if (n2 == NULL)
+ n2 = "";
+
+ /* attempt to sort correctly for c0t1d0s0 .vs. c0t18d0s0 */
+ if ((n1 = strrchr(n1, '/')) == NULL)
+ goto u;
+ n1 += (n1[1] != 'c') ? 2 : 1;
+ cl = strlen(n1);
+ if ((sscanf(n1, "c%ut%ud%us%u%n", &c1, &t1, &d1, &s1, &l) != 4 &&
+ sscanf(n1, "c%ud%us%u%n", &c1, &d1, &s1, &l) != 3 &&
+ sscanf(n1, "c%ut%ud%u%n", &c1, &t1, &d1, &l) != 3 &&
+ sscanf(n1, "c%ud%u%n", &c1, &d1, &l) != 2) || (l != cl))
+ goto u;
+
+ if ((n2 = strrchr(n2, '/')) == NULL)
+ goto u;
+ n2 += (n2[1] != 'c') ? 2 : 1;
+ cl = strlen(n2);
+ if ((sscanf(n2, "c%ut%ud%us%u%n", &c2, &t2, &d2, &s2, &l) != 4 &&
+ sscanf(n2, "c%ud%us%u%n", &c2, &d2, &s2, &l) != 3 &&
+ sscanf(n2, "c%ut%ud%u%n", &c2, &t2, &d2, &l) != 3 &&
+ sscanf(n2, "c%ud%u%n", &c2, &d2, &l) != 2) || (l != cl))
+ goto u;
+ if (c1 != c2)
+ return ((c1 > c2) ? 1 : -1);
+ if (t1 != t2)
+ return ((t1 > t2) ? 1 : -1);
+ if (d1 != d2)
+ return ((d1 > d2) ? 1 : -1);
+ if (s1 != s2)
+ return ((s1 > s2) ? 1 : -1);
+ return (0);
+
+u: return (strcmp(di1->dif_name, di2->dif_name));
+}
+
+static void
+sort_drives(
+ mhd_drive_info_list_t *listp
+)
+{
+ qsort(listp->mhd_drive_info_list_t_val,
+ listp->mhd_drive_info_list_t_len,
+ sizeof (*listp->mhd_drive_info_list_t_val),
+ compare_drives);
+}
+
+/*
+ * return list of all drives
+ */
+int
+meta_list_drives(
+ char *hostname,
+ char *path,
+ mhd_did_flags_t flags,
+ mhd_drive_info_list_t *listp,
+ md_error_t *ep
+)
+{
+ mhd_list_args_t args;
+ mhd_list_res_t results;
+ mhd_error_t *mhep = &results.status;
+ mhd_handle_t *hp = NULL;
+ int rval = -1;
+
+ /* if not doing ioctls */
+ if (! do_mhioctl())
+ return (0);
+
+ /* initialize */
+ (void) memset(&args, 0, sizeof (args));
+ (void) memset(&results, 0, sizeof (results));
+
+ /* build arguments */
+ if (path == NULL)
+ path = getenv("MD_DRIVE_ROOT");
+ if ((path != NULL) && (*path != '\0'))
+ args.path = Strdup(path);
+ args.flags = flags;
+
+ /* open connection */
+ if ((hp = open_metamhd(hostname, ep)) == NULL)
+ return (-1);
+ clnt_control(hp->clientp, CLSET_TIMEOUT, (char *)&tk_own_timeout);
+
+ /* get list */
+ if (mhd_list_1(&args, &results, hp->clientp) != RPC_SUCCESS) {
+ (void) mdrpcerror(ep, hp->clientp, hp->hostname,
+ dgettext(TEXT_DOMAIN, "metamhd list"));
+ goto out;
+ } else if (mhstealerror(mhep, ep) != 0) {
+ goto out;
+ }
+
+ /* sort list */
+ sort_drives(&results.results);
+
+ /* steal list */
+ *listp = results.results;
+ results.results.mhd_drive_info_list_t_len = 0;
+ results.results.mhd_drive_info_list_t_val = NULL;
+ rval = listp->mhd_drive_info_list_t_len; /* success */
+
+ /* cleanup, return success */
+out:
+ xdr_free(xdr_mhd_list_args_t, (char *)&args);
+ xdr_free(xdr_mhd_list_res_t, (char *)&results);
+ if (hp != NULL)
+ close_metamhd(hp);
+ return (rval);
+}
+
+static void
+load_paths_to_metamhd()
+{
+ FILE *cfp; /* config file pointer */
+ char buf[BUFSIZ],
+ *p,
+ *x;
+ mhd_drive_info_list_t list;
+ md_error_t ep;
+ mhd_did_flags_t flags = MHD_DID_SERIAL;
+
+ if ((cfp = fopen(METADEVPATH, "r")) != NULL) {
+ /*
+ * Read each line from the file. Lines will be either
+ * comments or path names to pass to rpc.metamhd. If
+ * path names check to see if their a colon seperate
+ * list of names which must be processed one at a time.
+ */
+
+ while (fgets(buf, BUFSIZ, cfp) != NULL) {
+ if (buf[0] == '#') {
+ /*
+ * Ignore comment lines
+ */
+ continue;
+
+ } else if (strchr(buf, ':') != NULL) {
+ p = buf;
+ while ((x = strchr(p, ':')) != NULL) {
+ *x = '\0';
+ (void) memset(&ep, '\0', sizeof (ep));
+ (void) meta_list_drives(NULL, p, 0,
+ &list, &ep);
+ meta_free_drive_info_list(&list);
+ p = x + 1;
+ }
+ /*
+ * We won't pick up the last path name
+ * because the line ends with a newline
+ * not a ':'. So p will still point to
+ * a valid path in this case. Copy the
+ * data that p points to to the beginning
+ * of the buf and let the default case
+ * handle this buffer.
+ * NOTE:
+ * If the file does end with a ":\n", p at
+ * will point to the newline. The default
+ * cause would then set the newline to a
+ * NULL which is okay because meta_list_drives
+ * interprets a null string as /dev/rdsk.
+ */
+ (void) memcpy(buf, p, strlen(p));
+ }
+ /*
+ * Remove any newlines in the buffer.
+ */
+ if ((p = strchr(buf, '\n')) != NULL)
+ *p = '\0';
+ (void) memset(&ep, '\0', sizeof (ep));
+ (void) memset(&list, '\0', sizeof (list));
+ (void) meta_list_drives(NULL, buf, flags, &list, &ep);
+ meta_free_drive_info_list(&list);
+ }
+ (void) fclose(cfp);
+ }
+}
+
+/*
+ * build list of all drives in set
+ */
+/*ARGSUSED*/
+int
+meta_get_drive_names(
+ mdsetname_t *sp,
+ mddrivenamelist_t **dnlpp,
+ int options,
+ md_error_t *ep
+)
+{
+ mhd_did_flags_t flags = MHD_DID_SERIAL;
+ mhd_drive_info_list_t list;
+ mhd_drive_info_t *mp;
+ uint_t i;
+ unsigned cnt = 0;
+ int rval = -1;
+ mddrivenamelist_t **tailpp = dnlpp;
+
+ /* must have a set */
+ assert(sp != NULL);
+
+ load_paths_to_metamhd();
+ (void) memset(&list, 0, sizeof (list));
+ if ((meta_list_drives(NULL, NULL, flags, &list, ep)) < 0)
+ return (-1);
+
+ /* find drives in set */
+ for (i = 0; (i < list.mhd_drive_info_list_t_len); ++i) {
+ mddrivename_t *dnp;
+ mdname_t *np;
+
+ mp = &list.mhd_drive_info_list_t_val[i];
+
+ if (mp->dif_id.did_flags & MHD_DID_DUPLICATE)
+ continue;
+
+ /* quietly skip drives which don't conform */
+ if ((dnp = metadrivename(&sp, mp->dif_name, ep)) == NULL) {
+ mdclrerror(ep);
+ continue;
+ }
+
+ /* check in set */
+ if ((np = metaslicename(dnp, MD_SLICE0, ep)) == NULL)
+ goto out;
+ if (meta_check_inset(sp, np, ep) != 0) {
+ mdclrerror(ep);
+ continue;
+ }
+
+ /*
+ * Add the drivename struct to the end of the
+ * drivenamelist but keep a pointer to the last
+ * element so that we don't incur the overhead
+ * of traversing the list each time
+ */
+ tailpp = meta_drivenamelist_append_wrapper(tailpp, dnp);
+ ++cnt;
+ }
+ rval = cnt;
+
+ /* cleanup, return error */
+out:
+ meta_free_drive_info_list(&list);
+ return (rval);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_mirror.c b/usr/src/lib/lvm/libmeta/common/meta_mirror.c
new file mode 100644
index 0000000000..8be4ada7ae
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_mirror.c
@@ -0,0 +1,2762 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * Just in case we're not in a build environment, make sure that
+ * TEXT_DOMAIN gets set to something.
+ */
+#if !defined(TEXT_DOMAIN)
+#define TEXT_DOMAIN "SYS_TEST"
+#endif
+
+/*
+ * mirror operations
+ */
+
+#include <meta.h>
+#include <sys/lvm/md_mirror.h>
+#include <sys/lvm/md_convert.h>
+
+#include <ctype.h>
+#include <stddef.h>
+
+/*
+ * FUNCTION: meta_get_mirror_names()
+ * INPUT: sp - the set name to get mirrors from
+ * options - options from the command line
+ * OUTPUT: nlpp - list of all mirror names
+ * ep - return error pointer
+ * RETURNS: int - -1 if error, 0 success
+ * PURPOSE: returns a list of all mirrors in the metadb
+ * for all devices in the specified set
+ */
+int
+meta_get_mirror_names(
+ mdsetname_t *sp,
+ mdnamelist_t **nlpp,
+ int options,
+ md_error_t *ep
+)
+{
+ return (meta_get_names(MD_MIRROR, sp, nlpp, options, ep));
+}
+
+/*
+ * free mirror unit
+ */
+void
+meta_free_mirror(
+ md_mirror_t *mirrorp
+)
+{
+ Free(mirrorp);
+}
+
+/*
+ * get mirror unit
+ */
+static md_mirror_t *
+meta_get_mirror_common(
+ mdsetname_t *sp,
+ mdname_t *mirnp,
+ int fast,
+ md_error_t *ep
+)
+{
+ mddrivename_t *dnp = mirnp->drivenamep;
+ char *miscname;
+ mm_unit_t *mm;
+ md_mirror_t *mirrorp;
+ uint_t smi, nsm;
+ md_resync_ioctl_t ri;
+
+ /* must have set */
+ assert(sp != NULL);
+
+ /* short circuit */
+ if (dnp->unitp != NULL) {
+ assert(dnp->unitp->type == MD_METAMIRROR);
+ return ((md_mirror_t *)dnp->unitp);
+ }
+
+ /* get miscname and unit */
+ if ((miscname = metagetmiscname(mirnp, ep)) == NULL)
+ return (NULL);
+ if (strcmp(miscname, MD_MIRROR) != 0) {
+ (void) mdmderror(ep, MDE_NOT_MM, meta_getminor(mirnp->dev),
+ mirnp->cname);
+ return (NULL);
+ }
+ if ((mm = (mm_unit_t *)meta_get_mdunit(sp, mirnp, ep)) == NULL)
+ return (NULL);
+ assert(mm->c.un_type == MD_METAMIRROR);
+
+ /* allocate mirror */
+ mirrorp = Zalloc(sizeof (*mirrorp));
+
+ /* get common info */
+ mirrorp->common.namep = mirnp;
+ mirrorp->common.type = mm->c.un_type;
+ mirrorp->common.state = mm->c.un_status;
+ mirrorp->common.capabilities = mm->c.un_capabilities;
+ mirrorp->common.parent = mm->c.un_parent;
+ mirrorp->common.size = mm->c.un_total_blocks;
+ mirrorp->common.user_flags = mm->c.un_user_flags;
+ mirrorp->common.revision = mm->c.un_revision;
+
+ /* get options */
+ mirrorp->read_option = mm->un_read_option;
+ mirrorp->write_option = mm->un_write_option;
+ mirrorp->pass_num = mm->un_pass_num;
+
+ /* get submirrors */
+ for (smi = 0, nsm = 0; (smi < NMIRROR); ++smi) {
+ mm_submirror_t *mmsp = &mm->un_sm[smi];
+ md_submirror_t *mdsp = &mirrorp->submirrors[smi];
+
+ /* get submirror state */
+ mdsp->state = mmsp->sm_state;
+ if (mdsp->state == SMS_UNUSED)
+ continue;
+ ++nsm;
+
+ /* get submirror time of last state change */
+ mdsp->timestamp = mmsp->sm_timestamp;
+
+ /* get submirror flags */
+ mdsp->flags = mmsp->sm_flags;
+
+ /* get submirror name */
+ mdsp->submirnamep = metakeyname(&sp, mmsp->sm_key, fast, ep);
+ if (mdsp->submirnamep == NULL)
+ goto out;
+ }
+ assert(nsm == mm->un_nsm);
+
+ /* get resync info */
+ (void) memset(&ri, 0, sizeof (ri));
+ ri.ri_mnum = meta_getminor(mirnp->dev);
+ MD_SETDRIVERNAME(&ri, MD_MIRROR, sp->setno);
+ if (metaioctl(MD_IOCGETSYNC, &ri, &ri.mde, mirnp->cname) != 0) {
+ (void) mdstealerror(ep, &ri.mde);
+ goto out;
+ }
+ mirrorp->percent_done = ri.ri_percent_done;
+ mirrorp->percent_dirty = ri.ri_percent_dirty;
+
+ /* cleanup, return success */
+ Free(mm);
+ dnp->unitp = (md_common_t *)mirrorp;
+ return (mirrorp);
+
+ /* cleanup, return error */
+out:
+ Free(mm);
+ meta_free_mirror(mirrorp);
+ return (NULL);
+}
+
+/*
+ * get mirror unit
+ */
+md_mirror_t *
+meta_get_mirror(
+ mdsetname_t *sp,
+ mdname_t *mirnp,
+ md_error_t *ep
+)
+{
+ return (meta_get_mirror_common(sp, mirnp, 0, ep));
+}
+
+/*
+ * check mirror for dev
+ */
+static int
+in_mirror(
+ mdsetname_t *sp,
+ mdname_t *mirnp,
+ mdname_t *np,
+ diskaddr_t slblk,
+ diskaddr_t nblks,
+ md_error_t *ep
+)
+{
+ md_mirror_t *mirrorp;
+ uint_t smi;
+
+ /* should be in the same set */
+ assert(sp != NULL);
+ assert(sp->setno == MD_MIN2SET(meta_getminor(mirnp->dev)));
+
+ /* get unit */
+ if ((mirrorp = meta_get_mirror(sp, mirnp, ep)) == NULL)
+ return (-1);
+
+ /* look in submirrors */
+ for (smi = 0; (smi < NMIRROR); ++smi) {
+ md_submirror_t *mdsp = &mirrorp->submirrors[smi];
+ mdname_t *submirnp = mdsp->submirnamep;
+
+ /* skip unused submirrors */
+ if (submirnp == NULL) {
+ assert(mdsp->state == SMS_UNUSED);
+ continue;
+ }
+
+ /* check overlap */
+ if (metaismeta(submirnp))
+ continue;
+ if (meta_check_overlap(mirnp->cname, np, slblk, nblks,
+ submirnp, 0, -1, ep) != 0)
+ return (-1);
+ }
+
+ /* return success */
+ return (0);
+}
+
+/*
+ * check to see if we're in a mirror
+ */
+int
+meta_check_inmirror(
+ mdsetname_t *sp,
+ mdname_t *np,
+ diskaddr_t slblk,
+ diskaddr_t nblks,
+ md_error_t *ep
+)
+{
+ mdnamelist_t *mirrornlp = NULL;
+ mdnamelist_t *p;
+ int rval = 0;
+
+ /* should have a set */
+ assert(sp != NULL);
+
+ /* for each mirror */
+ if (meta_get_mirror_names(sp, &mirrornlp, 0, ep) < 0)
+ return (-1);
+ for (p = mirrornlp; (p != NULL); p = p->next) {
+ mdname_t *mirnp = p->namep;
+
+ /* check mirror */
+ if (in_mirror(sp, mirnp, np, slblk, nblks, ep) != 0) {
+ rval = -1;
+ break;
+ }
+ }
+
+ /* cleanup, return success */
+ metafreenamelist(mirrornlp);
+ return (rval);
+}
+
+/*
+ * Check to see if the primary mirror is built on top of a
+ * root slice which is mounted. This check is primarily to
+ * account for this case -
+ *
+ * # metainit -f d1 1 1 <root slice>
+ * # metainit d0 -m d1
+ * # metainit d2 1 1 ctds
+ * # metattach d0 d2
+ *
+ * The metattach here needs to fail if the root slice is
+ * being mirrored; otherwise there is a potential for
+ * data corruption.
+ */
+static int
+meta_check_primary_mirror(
+ mdsetname_t *sp,
+ mdname_t *mirnp,
+ md_error_t *ep
+)
+{
+ int smi;
+ char *curroot;
+ mdname_t *rootnp;
+ md_mirror_t *mirrorp;
+ md_stripe_t *stripep;
+ md_row_t *rp;
+ md_comp_t *cp;
+
+ if ((curroot = meta_get_current_root(ep)) == NULL)
+ return (-1);
+ /*
+ * Get device name of current root metadevice. If root
+ * is net mounted as happens if we're part of the
+ * install process, rootnp will be set to NULL and we
+ * return success.
+ */
+ if ((rootnp = metaname(&sp, curroot, ep)) == NULL)
+ return (0);
+ /*
+ * If the currently mounted root slice is not a
+ * ctds, we don't bother checking
+ */
+ if ((!metaismeta(rootnp)) && metaismeta(mirnp)) {
+ if ((mirrorp = meta_get_mirror(sp, mirnp, ep)) == NULL)
+ return (-1);
+
+ for (smi = 0; (smi < NMIRROR); ++smi) {
+ /* Check all submirrors */
+ md_submirror_t *mdsp = &mirrorp->submirrors[smi];
+ mdname_t *submirnamep = mdsp->submirnamep;
+
+ /* skip unused submirrors */
+ if (submirnamep == NULL) {
+ assert(mdsp->state == SMS_UNUSED);
+ continue;
+ }
+ /* check if submirror is a stripe or not */
+ if (strcmp(metagetmiscname(submirnamep, ep), MD_STRIPE)
+ != 0)
+ return (-1);
+ if ((stripep = meta_get_stripe(sp, submirnamep, ep))
+ == NULL)
+ return (-1);
+
+ /*
+ * Examine the first component of the first row and
+ * check to see if it has a mounted root slice
+ */
+ rp = &stripep->rows.rows_val[0];
+ cp = &rp->comps.comps_val[0];
+ /*
+ * we just care about the component built on
+ * top of a raw device
+ */
+ if (!metaismeta(cp->compnamep)) {
+ /*
+ * If root device is the 1st component of
+ * the stripe, then fail.
+ */
+ if (strcmp(rootnp->cname, cp->compnamep->cname)
+ == 0) {
+ (void) mduseerror(ep, MDE_IS_MOUNTED,
+ rootnp->dev, "/", rootnp->cname);
+ return (-1);
+ }
+ }
+ }
+ }
+ /* return success */
+ return (0);
+}
+
+/*
+ * check submirror
+ */
+int
+meta_check_submirror(
+ mdsetname_t *sp,
+ mdname_t *np,
+ mdname_t *mirnp,
+ int force,
+ md_error_t *ep
+)
+{
+ mdchkopts_t options = 0;
+ md_common_t *mdp;
+
+ /* make sure we have a metadevice disk */
+ if (metachkmeta(np, ep) != 0)
+ return (-1);
+
+ /*
+ * Check to see if the primary mirror consists of a root
+ * mounted device
+ */
+ if (mirnp && (!force) && ((meta_check_primary_mirror(sp, mirnp, ep)
+ != 0)))
+ return (-1);
+
+ /* check to ensure that it is not already in use */
+ if ((! force) &&
+ (meta_check_inuse(sp, np, MDCHK_INUSE, ep) != 0)) {
+ return (-1);
+ }
+
+ /* make sure it is in the set */
+ if (meta_check_inset(sp, np, ep) != 0)
+ return (-1);
+
+ /* make sure its not in a metadevice */
+ if (! metaismeta(np)) { /* Non-metadevices */
+ if (meta_check_inmeta(sp, np, options, 0, -1, ep) != 0)
+ return (-1);
+ } else { /* Metadevices only! */
+ /* make sure it can be parented */
+ if ((mdp = meta_get_unit(sp, np, ep)) == NULL)
+ return (-1);
+
+ if ((! (mdp->capabilities & MD_CAN_PARENT)) ||
+ (! (mdp->capabilities & MD_CAN_SUB_MIRROR)) ||
+ (mdp->parent != MD_NO_PARENT)) {
+ return (mdmderror(ep, MDE_INVAL_UNIT,
+ meta_getminor(np->dev), np->cname));
+ }
+ }
+
+ /* return success */
+ return (0);
+}
+
+/*
+ * convert read options
+ */
+char *
+rd_opt_to_name(
+ mm_rd_opt_t opt
+)
+{
+ switch (opt) {
+ case RD_LOAD_BAL:
+ return ("roundrobin");
+ case RD_GEOMETRY:
+ return ("geometric");
+ case RD_FIRST:
+ return ("first");
+ default:
+ assert(0);
+ return (dgettext(TEXT_DOMAIN, "invalid"));
+ }
+}
+
+static char *
+rd_opt_to_opt(
+ mm_rd_opt_t opt
+)
+{
+ switch (opt) {
+ case RD_LOAD_BAL:
+ return (NULL); /* default */
+ case RD_GEOMETRY:
+ return ("-g");
+ case RD_FIRST:
+ return ("-r");
+ default:
+ assert(0);
+ return (dgettext(TEXT_DOMAIN, "invalid"));
+ }
+}
+
+int
+name_to_rd_opt(
+ char *uname,
+ char *name,
+ mm_rd_opt_t *optp,
+ md_error_t *ep
+)
+{
+ if (strcasecmp(name, "roundrobin") == 0) {
+ *optp = RD_LOAD_BAL;
+ return (0);
+ }
+ if (strcasecmp(name, "geometric") == 0) {
+ *optp = RD_GEOMETRY;
+ return (0);
+ }
+ if (strcasecmp(name, "first") == 0) {
+ *optp = RD_FIRST;
+ return (0);
+ }
+ return (meta_cook_syntax(ep, MDE_BAD_RD_OPT, uname, 1, &name));
+}
+
+/*
+ * convert write options
+ */
+char *
+wr_opt_to_name(
+ mm_wr_opt_t opt
+)
+{
+ switch (opt) {
+ case WR_PARALLEL:
+ return ("parallel");
+ case WR_SERIAL:
+ return ("serial");
+ default:
+ assert(0);
+ return (dgettext(TEXT_DOMAIN, "invalid"));
+ }
+}
+
+static char *
+wr_opt_to_opt(
+ mm_wr_opt_t opt
+)
+{
+ switch (opt) {
+ case WR_PARALLEL:
+ return (NULL); /* default */
+ case WR_SERIAL:
+ return ("-S");
+ default:
+ assert(0);
+ return (dgettext(TEXT_DOMAIN, "invalid"));
+ }
+}
+
+int
+name_to_wr_opt(
+ char *uname,
+ char *name,
+ mm_wr_opt_t *optp,
+ md_error_t *ep
+)
+{
+ if (strcasecmp(name, "parallel") == 0) {
+ *optp = WR_PARALLEL;
+ return (0);
+ }
+ if (strcasecmp(name, "serial") == 0) {
+ *optp = WR_SERIAL;
+ return (0);
+ }
+ return (meta_cook_syntax(ep, MDE_BAD_WR_OPT, uname, 1, &name));
+}
+
+/*
+ * convert pass numbers
+ */
+int
+name_to_pass_num(
+ char *uname,
+ char *name,
+ mm_pass_num_t *passp,
+ md_error_t *ep
+)
+{
+ if ((sscanf(name, "%hd", passp) != 1) ||
+ (*passp < 0) || (*passp > MD_PASS_MAX)) {
+ return (meta_cook_syntax(ep, MDE_BAD_PASS_NUM,
+ uname, 1, &name));
+ }
+ return (0);
+}
+
+/*
+ * convert resync option
+ */
+
+static char *
+resync_opt_to_name(
+ uint_t tstate
+)
+{
+ if (tstate & MD_ABR_CAP)
+ return (dgettext(TEXT_DOMAIN, "application based"));
+ else
+ return (dgettext(TEXT_DOMAIN, "optimized resync"));
+}
+
+/*
+ * print mirror
+ */
+static int
+mirror_print(
+ md_mirror_t *mirrorp,
+ char *fname,
+ FILE *fp,
+ mdprtopts_t options,
+ md_error_t *ep
+)
+{
+ uint_t smi;
+ char *p;
+ int rval = -1;
+
+
+ if (options & PRINT_LARGEDEVICES) {
+ if (mirrorp->common.revision != MD_64BIT_META_DEV) {
+ rval = 0;
+ goto out;
+ }
+ }
+
+ /* print name and -m */
+ if (fprintf(fp, "%s -m", mirrorp->common.namep->cname) == EOF)
+ goto out;
+
+ /* print submirrors */
+ for (smi = 0; (smi < NMIRROR); ++smi) {
+ md_submirror_t *mdsp = &mirrorp->submirrors[smi];
+ mdname_t *submirnamep = mdsp->submirnamep;
+
+ /* skip unused submirrors */
+ if (submirnamep == NULL) {
+ assert(mdsp->state == SMS_UNUSED);
+ continue;
+ }
+
+ /* print submirror */
+ if (fprintf(fp, " %s", submirnamep->cname) == EOF)
+ goto out;
+ }
+
+ /* print options */
+ if ((p = rd_opt_to_opt(mirrorp->read_option)) != NULL) {
+ if (fprintf(fp, " %s", p) == EOF)
+ goto out;
+ }
+ if ((p = wr_opt_to_opt(mirrorp->write_option)) != NULL) {
+ if (fprintf(fp, " %s", p) == EOF)
+ goto out;
+ }
+ if (fprintf(fp, " %u\n", mirrorp->pass_num) == EOF)
+ goto out;
+
+ /* success */
+ rval = 0;
+
+ /* cleanup, return error */
+out:
+ if (rval != 0)
+ (void) mdsyserror(ep, errno, fname);
+ return (rval);
+}
+
+/*
+ * convert submirror state to name
+ */
+char *
+sm_state_to_name(
+ md_submirror_t *mdsp,
+ md_status_t mirror_status,
+ md_timeval32_t *tvp,
+ uint_t tstate
+)
+{
+ static char state_to_str[100];
+ sm_state_t state = mdsp->state;
+ uint_t is_target = mdsp->flags & MD_SM_RESYNC_TARGET;
+
+ /* grab time */
+ if (tvp != NULL)
+ *tvp = mdsp->timestamp;
+
+ /*
+ * Only return Unavailable if there is no flagged error on the
+ * submirror. If the mirror has received any writes since the submirror
+ * went into Unavailable state a resync is required. To alert the
+ * administrator to this we return a 'Needs maintenance' message.
+ */
+ if ((tstate != 0) && (state & SMS_RUNNING)) {
+ return (dgettext(TEXT_DOMAIN, "Unavailable"));
+ }
+
+ /* all is well */
+ if (state & SMS_RUNNING) {
+ if (!(mirror_status & MD_UN_OPT_NOT_DONE) ||
+ ((mirror_status & MD_UN_OPT_NOT_DONE) && !is_target)) {
+ return (dgettext(TEXT_DOMAIN, "Okay"));
+ }
+ }
+
+ /* resyncing, needs repair */
+ if ((state & (SMS_COMP_RESYNC | SMS_ATTACHED_RESYNC |
+ SMS_OFFLINE_RESYNC)) ||
+ (mirror_status & MD_UN_OPT_NOT_DONE)) {
+ if (mirror_status & MD_UN_RESYNC_ACTIVE) {
+ return (dgettext(TEXT_DOMAIN, "Resyncing"));
+ }
+ if (mirror_status & MD_UN_RESYNC_CANCEL) {
+ return (dgettext(TEXT_DOMAIN, "Resync cancelled"));
+ }
+ return (dgettext(TEXT_DOMAIN, "Needs maintenance"));
+ }
+
+ /* needs repair */
+ if (state & (SMS_COMP_ERRED | SMS_ATTACHED | SMS_OFFLINE)) {
+ if (mirror_status & MD_UN_RESYNC_CANCEL) {
+ return (dgettext(TEXT_DOMAIN, "Resync cancelled"));
+ }
+ return (dgettext(TEXT_DOMAIN, "Needs maintenance"));
+ }
+
+ /* unknown */
+ assert(0);
+ (void) sprintf(state_to_str, "0x%x", state);
+ return (state_to_str);
+}
+
+/*
+ * convert submirror state to repair action
+ */
+int
+sm_state_to_action(
+ mdsetname_t *sp,
+ md_submirror_t *mdsp,
+ md_status_t mirror_status,
+ md_mirror_t *mirrorp,
+ char **actionp,
+ md_error_t *ep
+)
+{
+ static char buf[1024];
+ mdname_t *submirnamep = mdsp->submirnamep;
+ sm_state_t state = mdsp->state;
+ char *miscname;
+
+ /* all is well */
+ *actionp = NULL;
+ if (mirror_status & MD_UN_RESYNC_ACTIVE)
+ return (0);
+ if ((state == SMS_RUNNING) && !(mirror_status & MD_UN_OPT_NOT_DONE))
+ return (0);
+
+ /* complete cancelled resync */
+ if (mirror_status & MD_UN_RESYNC_CANCEL) {
+ (void) snprintf(buf, sizeof (buf),
+ dgettext(TEXT_DOMAIN, "metasync %s"),
+ mirrorp->common.namep->cname);
+ *actionp = buf;
+ return (0);
+ }
+
+ /* replace stripe component */
+ if ((metaismeta(submirnamep)) && (state & SMS_COMP_ERRED)) {
+ if ((miscname = metagetmiscname(submirnamep, ep)) == NULL)
+ return (-1);
+ if (strcmp(miscname, MD_STRIPE) == 0) {
+ mdname_t *compnamep;
+ comp_state_t compstate;
+
+ if (meta_find_erred_comp(sp, submirnamep,
+ &compnamep, &compstate, ep) != 0) {
+ return (-1);
+ }
+ if (compstate != CS_LAST_ERRED)
+ (void) snprintf(buf, sizeof (buf),
+ "metareplace %s %s <%s>",
+ mirrorp->common.namep->cname,
+ compnamep->cname,
+ dgettext(TEXT_DOMAIN, "new device"));
+ else
+ (void) snprintf(buf, sizeof (buf),
+ dgettext(TEXT_DOMAIN,
+ "after replacing \"Maintenance\" "
+ "components:\n"
+ "\t\tmetareplace %s %s <new device>"),
+ mirrorp->common.namep->cname,
+ compnamep->cname);
+ *actionp = buf;
+ return (0);
+ }
+ }
+
+ /* resync mirror */
+ if ((state & (SMS_ATTACHED_RESYNC | SMS_OFFLINE_RESYNC |
+ SMS_COMP_RESYNC | SMS_ATTACHED)) ||
+ (mirror_status & MD_UN_OPT_NOT_DONE)) {
+ (void) snprintf(buf, sizeof (buf), "metasync %s",
+ mirrorp->common.namep->cname);
+ *actionp = buf;
+ return (0);
+ }
+
+ /* online submirror */
+ if (state & SMS_OFFLINE) {
+ (void) snprintf(buf, sizeof (buf), "metaonline %s %s",
+ mirrorp->common.namep->cname, submirnamep->cname);
+ *actionp = buf;
+ return (0);
+ }
+
+ /* unknown action */
+ *actionp = dgettext(TEXT_DOMAIN, "???");
+ return (0);
+}
+
+/*
+ * print mirror options
+ */
+int
+meta_print_mirror_options(
+ mm_rd_opt_t read_option,
+ mm_wr_opt_t write_option,
+ mm_pass_num_t pass_num,
+ uint_t tstate,
+ char *fname,
+ mdsetname_t *sp,
+ FILE *fp,
+ md_error_t *ep
+)
+{
+ char *p;
+ int rval = -1;
+
+ /* print options */
+ if (fprintf(fp, dgettext(TEXT_DOMAIN, " Pass: %u\n"),
+ pass_num) == EOF) {
+ goto out;
+ }
+ if ((p = rd_opt_to_opt(read_option)) == NULL)
+ p = dgettext(TEXT_DOMAIN, "default");
+ if (fprintf(fp, dgettext(TEXT_DOMAIN, " Read option: %s (%s)\n"),
+ rd_opt_to_name(read_option), p) == EOF) {
+ goto out;
+ }
+ if ((p = wr_opt_to_opt(write_option)) == NULL)
+ p = dgettext(TEXT_DOMAIN, "default");
+ if (fprintf(fp, dgettext(TEXT_DOMAIN, " Write option: %s (%s)\n"),
+ wr_opt_to_name(write_option), p) == EOF) {
+ goto out;
+ }
+ /* Display resync option for mirror, if MultiNode set */
+ if (meta_is_mn_set(sp, ep)) {
+ if (fprintf(fp, dgettext(TEXT_DOMAIN,
+ " Resync option: %s\n"),
+ resync_opt_to_name(tstate)) == EOF) {
+ goto out;
+ }
+ }
+
+ /* success */
+ rval = 0;
+
+ /* cleanup, return error */
+out:
+ if (rval != 0)
+ (void) mdsyserror(ep, errno, fname);
+ return (rval);
+}
+
+static char *
+get_node_name(uint_t nid, md_error_t *ep)
+{
+ mndiskset_membershiplist_t *nl, *p;
+ int n;
+ char *node_nm;
+
+ /* get the known membership list */
+ if (meta_read_nodelist(&n, &nl, ep)) {
+ return (NULL);
+ }
+
+ /* find the matching node and return the name */
+ for (p = nl; (p != NULL); p = p->next) {
+ if (nid == p->msl_node_id) {
+ /* match found */
+ node_nm = Strdup(p->msl_node_name);
+ goto out;
+ }
+ }
+
+ /* match not found */
+ node_nm = Strdup(dgettext(TEXT_DOMAIN, "None"));
+
+out:
+ meta_free_nodelist(nl);
+ return (node_nm);
+}
+
+/*
+ * report mirror
+ */
+static int
+mirror_report(
+ mdsetname_t *sp,
+ md_mirror_t *mirrorp,
+ mdnamelist_t **nlpp,
+ char *fname,
+ FILE *fp,
+ mdprtopts_t options,
+ md_error_t *ep
+)
+{
+ md_status_t status = mirrorp->common.state;
+ uint_t smi;
+ char *p;
+ int rval = -1;
+ uint_t tstate = 0;
+
+ /*
+ * check for the -B option. If -B and the metadevice is
+ * a 64 bit device, get the dev for relocation information
+ * printout. If not a 64 bit device, just don't print this
+ * information out but you need to go down to the subdevice
+ * level and print there if appropriate.
+ */
+ if (options & PRINT_LARGEDEVICES) {
+ if (mirrorp->common.revision != MD_64BIT_META_DEV) {
+ for (smi = 0; (smi < NMIRROR); ++smi) {
+ md_submirror_t *mdsp =
+ &mirrorp->submirrors[smi];
+ mdname_t *submirnamep =
+ mdsp->submirnamep;
+ if (submirnamep == NULL) {
+ continue;
+ }
+ if ((metaismeta(submirnamep)) &&
+ (meta_print_name(sp, submirnamep, nlpp,
+ fname, fp, options | PRINT_SUBDEVS, NULL,
+ ep) != 0)) {
+ return (-1);
+ }
+ }
+ rval = 0;
+ goto out;
+ } else {
+ if (meta_getdevs(sp, mirrorp->common.namep,
+ nlpp, ep) != 0)
+ goto out;
+ }
+ }
+
+ /* print header */
+ if (options & PRINT_HEADER) {
+ if (fprintf(fp, dgettext(TEXT_DOMAIN, "%s: Mirror\n"),
+ mirrorp->common.namep->cname) == EOF) {
+ goto out;
+ }
+ }
+
+ /* print submirrors, adjust status */
+ for (smi = 0; (smi < NMIRROR); ++smi) {
+ md_submirror_t *mdsp = &mirrorp->submirrors[smi];
+ mdname_t *submirnamep = mdsp->submirnamep;
+ char *sm_state;
+ md_timeval32_t tv;
+ char *timep;
+
+ /* skip unused submirrors */
+ if (submirnamep == NULL) {
+ assert(mdsp->state == SMS_UNUSED);
+ continue;
+ }
+
+ if (mdsp->state & SMS_OFFLINE)
+ status &= ~MD_UN_OPT_NOT_DONE;
+
+ /* print submirror */
+ if (fprintf(fp, dgettext(TEXT_DOMAIN, " Submirror %u: %s\n"),
+ smi, submirnamep->cname) == EOF) {
+ goto out;
+ }
+
+ /* print state */
+ if (metaismeta(mdsp->submirnamep)) {
+ if (meta_get_tstate(mdsp->submirnamep->dev, &tstate,
+ ep) != 0)
+ return (-1);
+ }
+ sm_state = sm_state_to_name(mdsp, status, &tv,
+ tstate & MD_DEV_ERRORED);
+ if (options & PRINT_TIMES) {
+ timep = meta_print_time(&tv);
+ } else {
+ timep = "";
+ }
+ if (fprintf(fp, dgettext(TEXT_DOMAIN,
+ " State: %-12s %s\n"),
+ sm_state, timep) == EOF) {
+ goto out;
+ }
+ }
+
+ /* print resync status */
+ if (status & MD_UN_RESYNC_CANCEL) {
+ /* Resync was cancelled but is restartable */
+ if (mirrorp->common.revision == MD_64BIT_META_DEV) {
+ if (fprintf(fp, dgettext(TEXT_DOMAIN,
+ " Resync cancelled: %2d.%1d %% done\n"),
+ mirrorp->percent_done/10,
+ mirrorp->percent_done%10) == EOF) {
+ goto out;
+ }
+ } else {
+ if (fprintf(fp, dgettext(TEXT_DOMAIN,
+ " Resync cancelled: %d %% done\n"),
+ mirrorp->percent_done) == EOF) {
+ goto out;
+ }
+ }
+ } else if (status & MD_UN_RESYNC_ACTIVE) {
+ if (mirrorp->common.revision == MD_64BIT_META_DEV) {
+ if (fprintf(fp, dgettext(TEXT_DOMAIN,
+ " Resync in progress: %2d.%1d %% done\n"),
+ mirrorp->percent_done/10,
+ mirrorp->percent_done%10) == EOF) {
+ goto out;
+ }
+ } else {
+ if (fprintf(fp, dgettext(TEXT_DOMAIN,
+ " Resync in progress: %d %% done\n"),
+ mirrorp->percent_done) == EOF) {
+ goto out;
+ }
+ }
+ }
+
+ /* print options */
+ if (meta_get_tstate(mirrorp->common.namep->dev, &tstate, ep) != 0)
+ return (-1);
+
+ if (meta_print_mirror_options(mirrorp->read_option,
+ mirrorp->write_option, mirrorp->pass_num,
+ tstate, fname, sp, fp, ep) != 0)
+ return (-1);
+
+ /* print mirror owner for multi-node metadevice */
+ if (meta_is_mn_set(sp, ep)) {
+ md_set_mmown_params_t ownpar;
+ mdname_t *mirnp = mirrorp->common.namep;
+ char *node_name;
+
+ (void) memset(&ownpar, 0, sizeof (ownpar));
+ ownpar.d.mnum = meta_getminor(mirnp->dev);
+ MD_SETDRIVERNAME(&ownpar, MD_MIRROR, sp->setno);
+
+ if (metaioctl(MD_MN_GET_MM_OWNER, &ownpar, ep,
+ "MD_MN_GET_MM_OWNER") != 0) {
+ return (-1);
+ }
+
+ node_name = get_node_name(ownpar.d.owner, ep);
+ if (node_name == NULL)
+ return (-1);
+ else if (fprintf(fp, dgettext(TEXT_DOMAIN, " Owner: %s\n"),
+ node_name) == EOF) {
+ Free(node_name);
+ goto out;
+ }
+ Free(node_name);
+
+ }
+
+ /* print size */
+ if (fprintf(fp, dgettext(TEXT_DOMAIN, " Size: %lld blocks (%s)\n"),
+ mirrorp->common.size,
+ meta_number_to_string(mirrorp->common.size, DEV_BSIZE))
+ == EOF) {
+ goto out;
+ }
+
+ /* MD_DEBUG stuff */
+ if (options & PRINT_DEBUG) {
+ mdname_t *mirnp = mirrorp->common.namep;
+ mm_unit_t *mm;
+ mddb_optloc_t optloc;
+ uint_t i;
+
+ /* get real mirror unit */
+ if ((mm = (mm_unit_t *)meta_get_mdunit(sp, mirnp, ep))
+ == NULL) {
+ return (-1);
+ }
+ assert(mm->c.un_type == MD_METAMIRROR);
+
+ /* print dirty regions */
+ if (fprintf(fp, dgettext(TEXT_DOMAIN,
+" Regions which are dirty: %d%% (blksize %d num %d)\n"),
+ mirrorp->percent_dirty, mm->un_rrd_blksize,
+ mm->un_rrd_num) == EOF) {
+ Free(mm);
+ goto out;
+ }
+
+ /* print optimized resync record locations */
+ (void) memset(&optloc, 0, sizeof (optloc));
+ optloc.recid = mm->un_rr_dirty_recid;
+ if (metaioctl(MD_DB_GETOPTLOC, &optloc, ep,
+ "MD_DB_GETOPTLOC") != 0) {
+ Free(mm);
+ return (-1);
+ }
+ for (i = 0; (i < ((sizeof optloc.li) / sizeof (optloc.li[0])));
+ ++i) {
+ mddb_config_t dbconf;
+ char *devname;
+
+ (void) memset(&dbconf, 0, sizeof (dbconf));
+ dbconf.c_id = optloc.li[i];
+ dbconf.c_setno = sp->setno;
+ dbconf.c_subcmd = MDDB_CONFIG_ABS;
+ /* Don't need device id information from this ioctl */
+ dbconf.c_locator.l_devid = (uint64_t)0;
+ dbconf.c_locator.l_devid_flags = 0;
+ if (metaioctl(MD_DB_ENDDEV, &dbconf, &dbconf.c_mde,
+ "MD_DB_ENDDEV") != 0) {
+ Free(mm);
+ return (mdstealerror(ep, &dbconf.c_mde));
+ }
+ if ((devname = splicename(&dbconf.c_devname))
+ == NULL) {
+ devname = Strdup(dgettext(TEXT_DOMAIN,
+ "unknown"));
+ }
+ if (fprintf(fp, dgettext(TEXT_DOMAIN,
+ " Resync record[%u]: %d (%s %d %d)\n"), i,
+ optloc.li[i], devname, dbconf.c_locator.l_blkno,
+ (dbconf.c_dbend - dbconf.c_locator.l_blkno + 1))
+ == EOF) {
+ Free(mm);
+ Free(devname);
+ goto out;
+ }
+ Free(devname);
+ }
+ Free(mm);
+ }
+
+ /* print submirror details */
+ for (smi = 0; (smi < NMIRROR); ++smi) {
+ md_submirror_t *mdsp = &mirrorp->submirrors[smi];
+ mdname_t *submirnamep = mdsp->submirnamep;
+ char *sm_state;
+ md_timeval32_t tv;
+ char *timep;
+
+ /* skip unused submirrors */
+ if (submirnamep == NULL) {
+ assert(mdsp->state == SMS_UNUSED);
+ continue;
+ }
+
+ /* add extra line */
+ if (fprintf(fp, "\n") == EOF)
+ goto out;
+
+ /* print submirror */
+ if (fprintf(fp, dgettext(TEXT_DOMAIN,
+ "%s: Submirror of %s\n"),
+ submirnamep->cname,
+ mirrorp->common.namep->cname) == EOF) {
+ goto out;
+ }
+
+ /* print state */
+ if (metaismeta(mdsp->submirnamep)) {
+ if (meta_get_tstate(mdsp->submirnamep->dev, &tstate, ep)
+ != 0)
+ return (-1);
+ }
+ sm_state = sm_state_to_name(mdsp, status, &tv, NULL);
+ if (options & PRINT_TIMES) {
+ timep = meta_print_time(&tv);
+ } else {
+ timep = "";
+ }
+
+ if ((tstate & MD_DEV_ERRORED) == 0) {
+ if (fprintf(fp, dgettext(TEXT_DOMAIN,
+ " State: %-12s %s\n"),
+ sm_state, timep) == EOF) {
+ goto out;
+ }
+
+ /* print what to do */
+ if (sm_state_to_action(sp, mdsp, status,
+ mirrorp, &p, ep) != 0)
+ return (-1);
+ if ((p != NULL) &&
+ (fprintf(fp, dgettext(TEXT_DOMAIN,
+ " Invoke: %s\n"), p) == EOF)) {
+ goto out;
+ }
+ }
+
+ /* print underlying metadevice */
+ if ((metaismeta(submirnamep)) &&
+ (meta_print_name(sp, submirnamep, nlpp, fname, fp,
+ ((options & ~PRINT_HEADER) | PRINT_SUBDEVS),
+ NULL, ep) != 0)) {
+ return (-1);
+ }
+ }
+
+ /* add extra line */
+ if (fprintf(fp, "\n") == EOF)
+ goto out;
+
+ /* success */
+ rval = 0;
+
+ /* cleanup, return error */
+out:
+ if (rval != 0)
+ (void) mdsyserror(ep, errno, fname);
+ return (rval);
+}
+
+/*
+ * print/report mirror
+ */
+int
+meta_mirror_print(
+ mdsetname_t *sp,
+ mdname_t *mirnp,
+ mdnamelist_t **nlpp,
+ char *fname,
+ FILE *fp,
+ mdprtopts_t options,
+ md_error_t *ep
+)
+{
+ md_mirror_t *mirrorp;
+ uint_t smi;
+
+ /* should have same set */
+ assert(sp != NULL);
+ assert((mirnp == NULL) ||
+ (sp->setno == MD_MIN2SET(meta_getminor(mirnp->dev))));
+
+ /* print all mirrors */
+ if (mirnp == NULL) {
+ mdnamelist_t *nlp = NULL;
+ mdnamelist_t *p;
+ int cnt;
+ int rval = 0;
+
+ /* get list */
+ if ((cnt = meta_get_mirror_names(sp, &nlp, options, ep)) < 0)
+ return (-1);
+ else if (cnt == 0)
+ return (0);
+
+ /* recurse */
+ for (p = nlp; (p != NULL); p = p->next) {
+ mdname_t *np = p->namep;
+
+ if (meta_mirror_print(sp, np, nlpp, fname, fp,
+ options, ep) != 0)
+ rval = -1;
+ }
+
+ /* cleanup, return success */
+ metafreenamelist(nlp);
+ return (rval);
+ }
+
+ /* get unit structure */
+ if ((mirrorp = meta_get_mirror_common(sp, mirnp,
+ ((options & PRINT_FAST) ? 1 : 0), ep)) == NULL)
+ return (-1);
+
+ /* check for parented */
+ if ((! (options & PRINT_SUBDEVS)) &&
+ (MD_HAS_PARENT(mirrorp->common.parent))) {
+ return (0);
+ }
+
+ /* print appropriate detail */
+ if (options & PRINT_SHORT) {
+ /* print mirror */
+ if (mirror_print(mirrorp, fname, fp, options, ep) != 0)
+ return (-1);
+
+ /* print underlying metadevices */
+ for (smi = 0; (smi < NMIRROR); ++smi) {
+ md_submirror_t *mdsp = &mirrorp->submirrors[smi];
+ mdname_t *submirnamep = mdsp->submirnamep;
+
+ /* skip unused submirrors */
+ if (submirnamep == NULL) {
+ assert(mdsp->state == SMS_UNUSED);
+ continue;
+ }
+
+ /* print submirror */
+ if (metaismeta(submirnamep)) {
+ if (meta_print_name(sp, submirnamep, nlpp,
+ fname, fp, (options | PRINT_SUBDEVS), NULL,
+ ep) != 0) {
+ return (-1);
+ }
+ }
+ }
+
+ /* return success */
+ return (0);
+ } else {
+ return (mirror_report(sp, mirrorp, nlpp, fname, fp,
+ options, ep));
+ }
+}
+
+/*
+ * online submirror
+ */
+int
+meta_mirror_online(
+ mdsetname_t *sp,
+ mdname_t *mirnp,
+ mdname_t *submirnp,
+ mdcmdopts_t options,
+ md_error_t *ep
+)
+{
+ md_i_off_on_t mio;
+ md_mirror_t *mirrorp;
+ md_set_desc *sd;
+ uint_t tstate;
+
+ /* should have same set */
+ assert(sp != NULL);
+ assert(sp->setno == MD_MIN2SET(meta_getminor(mirnp->dev)));
+
+ /* check name */
+ if (metachkmeta(mirnp, ep) != 0)
+ return (-1);
+
+ if ((mirrorp = meta_get_mirror(sp, mirnp, ep)) == NULL)
+ return (-1);
+
+ /* Only valid for mirror without ABR set */
+ if (meta_get_tstate(mirrorp->common.namep->dev, &tstate, ep) != 0)
+ return (-1);
+ if (tstate & MD_ABR_CAP) {
+ (void) mderror(ep, MDE_ABR_SET, NULL);
+ return (-1);
+ }
+
+ /*
+ * In a MN set, the master always executes the online command first.
+ * Before the master executes the IOC_ONLINE ioctl,
+ * the master sends a message to all nodes to suspend writes to
+ * this mirror. Then the master executes the IOC_ONLINE ioctl
+ * which resumes writes to this mirror from the master node.
+ * As each slave executes the online command, each slave will
+ * call the IOC_ONLINE ioctl which will resume writes to this mirror
+ * from that slave node.
+ */
+ if (! metaislocalset(sp)) {
+ if ((sd = metaget_setdesc(sp, ep)) == NULL)
+ return (-1);
+ if ((MD_MNSET_DESC(sd)) && sd->sd_mn_am_i_master)
+ if (meta_mn_send_suspend_writes(
+ meta_getminor(mirnp->dev), ep) != 0)
+ return (-1);
+ }
+
+ /* online submirror */
+ (void) memset(&mio, 0, sizeof (mio));
+ mio.mnum = meta_getminor(mirnp->dev);
+ MD_SETDRIVERNAME(&mio, MD_MIRROR, sp->setno);
+ mio.submirror = submirnp->dev;
+ if (metaioctl(MD_IOCONLINE, &mio, &mio.mde, NULL) != 0)
+ return (mdstealerror(ep, &mio.mde));
+
+ /* clear cache */
+ meta_invalidate_name(mirnp);
+ meta_invalidate_name(submirnp);
+
+ /* let em know */
+ if (options & MDCMD_PRINT) {
+ (void) printf(dgettext(TEXT_DOMAIN,
+ "%s: submirror %s is onlined\n"),
+ mirnp->cname, submirnp->cname);
+ (void) fflush(stdout);
+ }
+
+ /* return success */
+ return (0);
+}
+
+/*
+ * offline submirror
+ */
+int
+meta_mirror_offline(
+ mdsetname_t *sp,
+ mdname_t *mirnp,
+ mdname_t *submirnp,
+ mdcmdopts_t options,
+ md_error_t *ep
+)
+{
+ int force = ((options & MDCMD_FORCE) ? 1 : 0);
+ md_i_off_on_t mio;
+ md_mirror_t *mirrorp;
+ md_set_desc *sd;
+ uint_t tstate;
+
+ /* should have same set */
+ assert(sp != NULL);
+ assert(sp->setno == MD_MIN2SET(meta_getminor(mirnp->dev)));
+
+ /* check name */
+ if (metachkmeta(mirnp, ep) != 0)
+ return (-1);
+
+ if ((mirrorp = meta_get_mirror(sp, mirnp, ep)) == NULL)
+ return (-1);
+
+ /* Only valid for mirror without ABR set */
+ if (meta_get_tstate(mirrorp->common.namep->dev, &tstate, ep) != 0)
+ return (-1);
+ if (tstate & MD_ABR_CAP) {
+ (void) mderror(ep, MDE_ABR_SET, NULL);
+ return (-1);
+ }
+
+ /*
+ * In a MN set, the master always executes the offline command first.
+ * Before the master executes the IOC_OFFLINE ioctl,
+ * the master sends a message to all nodes to suspend writes to
+ * this mirror. Then the master executes the IOC_OFFLINE ioctl
+ * which resumes writes to this mirror from the master node.
+ * As each slave executes the offline command, each slave will
+ * call the IOC_OFFLINE ioctl which will resume writes to this mirror
+ * from that slave node.
+ */
+ if (! metaislocalset(sp)) {
+ if ((sd = metaget_setdesc(sp, ep)) == NULL)
+ return (-1);
+ if ((MD_MNSET_DESC(sd)) && sd->sd_mn_am_i_master)
+ if (meta_mn_send_suspend_writes(
+ meta_getminor(mirnp->dev), ep) != 0)
+ return (-1);
+ }
+
+ /* offline submirror */
+ (void) memset(&mio, 0, sizeof (mio));
+ mio.mnum = meta_getminor(mirnp->dev);
+ MD_SETDRIVERNAME(&mio, MD_MIRROR, sp->setno);
+ mio.submirror = submirnp->dev;
+ mio.force_offline = force;
+ if (metaioctl(MD_IOCOFFLINE, &mio, &mio.mde, NULL) != 0)
+ return (mdstealerror(ep, &mio.mde));
+
+ /* clear cache */
+ meta_invalidate_name(mirnp);
+ meta_invalidate_name(submirnp);
+
+ /* let em know */
+ if (options & MDCMD_PRINT) {
+ (void) printf(dgettext(TEXT_DOMAIN,
+ "%s: submirror %s is offlined\n"),
+ mirnp->cname, submirnp->cname);
+ (void) fflush(stdout);
+ }
+
+ /* return success */
+ return (0);
+}
+
+/*
+ * attach submirror to mirror
+ * we actually never have to worry about crossing a thresh hold here.
+ * 2 cases 1) attach and the only way the mirror can be 64 bit is if
+ * one of the submirrors already is. 2) grow and the only way the mirror
+ * is 64 bit is if one of the submirror's already is.
+ */
+int
+meta_mirror_attach(
+ mdsetname_t *sp,
+ mdname_t *mirnp,
+ mdname_t *submirnp,
+ mdcmdopts_t options,
+ md_error_t *ep
+)
+{
+ md_att_struct_t att;
+ md_set_desc *sd;
+
+ /* should have same set */
+ assert(sp != NULL);
+ assert(sp->setno == MD_MIN2SET(meta_getminor(mirnp->dev)));
+
+ /* check name */
+ if (metachkmeta(mirnp, ep) != 0)
+ return (-1);
+
+ /* just grow */
+ if (submirnp == NULL) {
+ return (meta_concat_generic(sp, mirnp, NULL, ep));
+ }
+
+ /* check submirror */
+ if (meta_check_submirror(sp, submirnp, mirnp, 0, ep) != 0)
+ return (-1);
+
+ /* In dryrun mode (DOIT not set) we must not alter the mddb */
+ if (options & MDCMD_DOIT) {
+ /* store name in namespace */
+ if (add_key_name(sp, submirnp, NULL, ep) != 0)
+ return (-1);
+ }
+
+ /*
+ * In a MN set, the master always executes the attach command first.
+ * Before the master executes the IOC_ATTACH ioctl, in non-DRYRUN mode
+ * the master sends a message to all nodes to suspend writes to
+ * this mirror. Then the master executes the IOC_ATTACH ioctl
+ * which resumes writes to this mirror from the master node.
+ * As each slave executes the attach command, each slave will
+ * call the IOC_ATTACH ioctl which will resume writes to this mirror
+ * from that slave node.
+ */
+ if (! metaislocalset(sp)) {
+ if ((sd = metaget_setdesc(sp, ep)) == NULL)
+ return (-1);
+ if ((MD_MNSET_DESC(sd)) && (options & MDCMD_DOIT) &&
+ sd->sd_mn_am_i_master)
+ if (meta_mn_send_suspend_writes(
+ meta_getminor(mirnp->dev), ep) != 0)
+ return (-1);
+ }
+
+ /* attach submirror */
+ (void) memset(&att, 0, sizeof (att));
+ att.mnum = meta_getminor(mirnp->dev);
+ MD_SETDRIVERNAME(&att, MD_MIRROR, sp->setno);
+ att.submirror = submirnp->dev;
+ att.key = submirnp->key;
+ /* if the comamnd was issued with -n option, use dryrun mode */
+ if ((options & MDCMD_DOIT) == 0) {
+ att.options = MDIOCTL_DRYRUN;
+ }
+ if (metaioctl(MD_IOCATTACH, &att, &att.mde, NULL) != 0) {
+ /* In dryrun mode (DOIT not set) we must not alter the mddb */
+ if (options & MDCMD_DOIT) {
+ (void) del_key_name(sp, submirnp, ep);
+ }
+ return (mdstealerror(ep, &att.mde));
+ }
+
+ /* In dryrun mode (DOIT not set) we must not alter the mddb */
+ if (options & MDCMD_DOIT) {
+ /* clear cache */
+ meta_invalidate_name(mirnp);
+ meta_invalidate_name(submirnp);
+ }
+
+ /* let em know */
+ if (options & MDCMD_PRINT) {
+ (void) printf(dgettext(TEXT_DOMAIN,
+ "%s: submirror %s %s\n"), mirnp->cname, submirnp->cname,
+ (options & MDCMD_DOIT) ? "is attached" : "would attach");
+ (void) fflush(stdout);
+ }
+
+ /* return success */
+ return (0);
+}
+
+/*
+ * detach submirror
+ */
+int
+meta_mirror_detach(
+ mdsetname_t *sp,
+ mdname_t *mirnp,
+ mdname_t *submirnp,
+ mdcmdopts_t options,
+ md_error_t *ep
+)
+{
+ int force = ((options & MDCMD_FORCE) ? 1 : 0);
+ md_detach_params_t detach;
+ md_set_desc *sd;
+
+ /* should have same set */
+ assert(sp != NULL);
+ assert(sp->setno == MD_MIN2SET(meta_getminor(mirnp->dev)));
+
+ /* check name */
+ if (metachkmeta(mirnp, ep) != 0)
+ return (-1);
+
+ /*
+ * In a MN set, the master always executes the detach command first.
+ * Before the master executes the IOC_DETACH ioctl,
+ * the master sends a message to all nodes to suspend writes to
+ * this mirror. Then the master executes the IOC_DETACH ioctl
+ * which resumes writes to this mirror from the master node.
+ * As each slave executes the detach command, each slave will
+ * call the IOC_DETACH ioctl which will resume writes to this mirror
+ * from that slave node.
+ */
+ if (! metaislocalset(sp)) {
+ if ((sd = metaget_setdesc(sp, ep)) == NULL)
+ return (-1);
+ if ((MD_MNSET_DESC(sd)) && sd->sd_mn_am_i_master)
+ if (meta_mn_send_suspend_writes(
+ meta_getminor(mirnp->dev), ep) != 0)
+ return (-1);
+ }
+
+ /* detach submirror */
+ (void) memset(&detach, 0, sizeof (detach));
+ detach.mnum = meta_getminor(mirnp->dev);
+ MD_SETDRIVERNAME(&detach, MD_MIRROR, sp->setno);
+ detach.submirror = submirnp->dev;
+ detach.force_detach = force;
+ if (metaioctl(MD_IOCDETACH, &detach, &detach.mde, NULL) != 0)
+ return (mdstealerror(ep, &detach.mde));
+
+ /* clear cache */
+ meta_invalidate_name(mirnp);
+ meta_invalidate_name(submirnp);
+
+ /* let em know */
+ if (options & MDCMD_PRINT) {
+ (void) printf(dgettext(TEXT_DOMAIN,
+ "%s: submirror %s is detached\n"),
+ mirnp->cname, submirnp->cname);
+ (void) fflush(stdout);
+ }
+
+ /* return success */
+ return (0);
+}
+
+/*
+ * get mirror parameters
+ */
+int
+meta_mirror_get_params(
+ mdsetname_t *sp,
+ mdname_t *mirnp,
+ mm_params_t *paramsp,
+ md_error_t *ep
+)
+{
+ md_mirror_t *mirrorp;
+
+ /* should have a set */
+ assert(sp != NULL);
+ assert(sp->setno == MD_MIN2SET(meta_getminor(mirnp->dev)));
+
+ /* check name */
+ if (metachkmeta(mirnp, ep) != 0)
+ return (-1);
+
+ /* get unit */
+ if ((mirrorp = meta_get_mirror(sp, mirnp, ep)) == NULL)
+ return (-1);
+
+ /* return parameters */
+ (void) memset(paramsp, 0, sizeof (*paramsp));
+ paramsp->read_option = mirrorp->read_option;
+ paramsp->write_option = mirrorp->write_option;
+ paramsp->pass_num = mirrorp->pass_num;
+ return (0);
+}
+
+/*
+ * set mirror parameters
+ */
+int
+meta_mirror_set_params(
+ mdsetname_t *sp,
+ mdname_t *mirnp,
+ mm_params_t *paramsp,
+ md_error_t *ep
+)
+{
+ md_mirror_params_t mmp;
+
+ /* should have a set */
+ assert(sp != NULL);
+ assert(sp->setno == MD_MIN2SET(meta_getminor(mirnp->dev)));
+
+ /* check name */
+ if (metachkmeta(mirnp, ep) != 0)
+ return (-1);
+
+ /* set parameters */
+ (void) memset(&mmp, 0, sizeof (mmp));
+ MD_SETDRIVERNAME(&mmp, MD_MIRROR, sp->setno);
+ mmp.mnum = meta_getminor(mirnp->dev);
+ mmp.params = *paramsp;
+ if (metaioctl(MD_IOCCHANGE, &mmp, &mmp.mde, mirnp->cname) != 0)
+ return (mdstealerror(ep, &mmp.mde));
+
+ /* clear cache */
+ meta_invalidate_name(mirnp);
+
+ /* return success */
+ return (0);
+}
+
+/*
+ * invalidate submirror names
+ */
+static int
+invalidate_submirrors(
+ mdsetname_t *sp,
+ mdname_t *mirnp,
+ md_error_t *ep
+)
+{
+ md_mirror_t *mirrorp;
+ uint_t smi;
+
+ if ((mirrorp = meta_get_mirror(sp, mirnp, ep)) == NULL)
+ return (-1);
+ for (smi = 0; (smi < NMIRROR); ++smi) {
+ md_submirror_t *mdsp = &mirrorp->submirrors[smi];
+ mdname_t *submirnp = mdsp->submirnamep;
+
+ if (submirnp == NULL) {
+ assert(mdsp->state == SMS_UNUSED);
+ continue;
+ }
+ meta_invalidate_name(submirnp);
+ }
+ return (0);
+}
+
+/*
+ * replace mirror component
+ */
+int
+meta_mirror_replace(
+ mdsetname_t *sp,
+ mdname_t *mirnp,
+ mdname_t *oldnp,
+ mdname_t *newnp,
+ mdcmdopts_t options,
+ md_error_t *ep
+)
+{
+ md_mirror_t *mirrorp;
+ uint_t smi;
+ replace_params_t params;
+ diskaddr_t size, label, start_blk;
+ md_dev64_t old_dev, new_dev;
+ diskaddr_t new_start_blk, new_end_blk;
+ int rebind;
+ md_set_desc *sd;
+ char *new_devidp = NULL;
+ int ret;
+ md_error_t xep = mdnullerror;
+
+ /* should have same set */
+ assert(sp != NULL);
+ assert(sp->setno == MD_MIN2SET(meta_getminor(mirnp->dev)));
+
+ /* check name */
+ if (metachkmeta(mirnp, ep) != 0)
+ return (-1);
+
+ /* save new binding incase this is a rebind where oldnp==newnp */
+ new_dev = newnp->dev;
+ new_start_blk = newnp->start_blk;
+ new_end_blk = newnp->end_blk;
+
+ /* invalidate, then get the mirror (fill in oldnp from metadb) */
+ meta_invalidate_name(mirnp);
+ if ((mirrorp = meta_get_mirror(sp, mirnp, ep)) == NULL)
+ return (-1);
+ for (smi = 0; (smi < NMIRROR); ++smi) {
+ md_submirror_t *mdsp = &mirrorp->submirrors[smi];
+ mdname_t *submirnp = mdsp->submirnamep;
+
+ if (submirnp == NULL) {
+ assert(mdsp->state == SMS_UNUSED);
+ continue;
+ }
+
+ if (! metaismeta(submirnp))
+ continue;
+
+ meta_invalidate_name(submirnp);
+ if (meta_get_unit(sp, submirnp, ep) == NULL)
+ return (-1);
+ }
+
+ /* the old device binding is now established */
+ if ((old_dev = oldnp->dev) == NODEV64)
+ return (mdsyserror(ep, ENODEV, oldnp->cname));
+
+ /*
+ * check for the case where oldnp and newnp indicate the same
+ * device, but the dev_t of the device has changed between old
+ * and new. This is called a rebind. On entry the dev_t
+ * represents the new device binding determined from the
+ * filesystem (meta_getdev). After calling meta_get_unit
+ * oldnp (and maybe newnp if this is a rebind) is updated based
+ * to the old binding from the metadb (done by metakeyname).
+ */
+ if ((strcmp(oldnp->rname, newnp->rname) == 0) &&
+ (old_dev != new_dev)) {
+ rebind = 1;
+ } else {
+ rebind = 0;
+ }
+ if (rebind) {
+ newnp->dev = new_dev;
+ newnp->start_blk = new_start_blk;
+ newnp->end_blk = new_end_blk;
+ }
+
+ /*
+ * Save a copy of the devid associated with the new disk, the reason
+ * is that if we are rebinding then the call to meta_check_component()
+ * will cause the devid of the disk to be overwritten with what is in
+ * the replica namespace. The function that actually overwrites the
+ * devid is dr2drivedesc().
+ */
+ if (newnp->drivenamep->devid != NULL)
+ new_devidp = Strdup(newnp->drivenamep->devid);
+
+ /* if it's a multi-node diskset clear new_devidp */
+ if (!metaislocalset(sp)) {
+ if ((sd = metaget_setdesc(sp, ep)) == NULL)
+ return (-1);
+ if (MD_MNSET_DESC(sd))
+ new_devidp = NULL;
+ }
+
+ /* check it out (dup on rebind is ok) */
+ if (meta_check_component(sp, newnp, 0, ep) != 0) {
+ if ((! rebind) || (! mdisuseerror(ep, MDE_ALREADY))) {
+ Free(new_devidp);
+ return (-1);
+ }
+ mdclrerror(ep);
+ }
+ if ((size = metagetsize(newnp, ep)) == MD_DISKADDR_ERROR) {
+ Free(new_devidp);
+ return (-1);
+ }
+ if ((label = metagetlabel(newnp, ep)) == MD_DISKADDR_ERROR) {
+ Free(new_devidp);
+ return (-1);
+ }
+ if ((start_blk = metagetstart(sp, newnp, ep)) == MD_DISKADDR_ERROR) {
+ Free(new_devidp);
+ return (-1);
+ }
+ if (start_blk >= size) {
+ (void) mdsyserror(ep, ENOSPC, newnp->cname);
+ Free(new_devidp);
+ return (-1);
+ }
+
+ /*
+ * Copy back the saved devid.
+ */
+ Free(newnp->drivenamep->devid);
+ if (new_devidp != NULL) {
+ newnp->drivenamep->devid = Strdup(new_devidp);
+ Free(new_devidp);
+ }
+
+ /* store name in namespace, allocate new key */
+ if (add_key_name(sp, newnp, NULL, ep) != 0)
+ return (-1);
+
+ /*
+ * In a MN set, the master always executes the replace command first.
+ * Before the master executes the IOC_REPLACE ioctl, in non-DRYRUN mode
+ * the master sends a message to all nodes to suspend writes to
+ * this mirror. Then the master executes the IOC_REPLACE ioctl
+ * which resumes writes to this mirror from the master node.
+ * As each slave executes the replace command, each slave will
+ * call the IOC_REPLACE ioctl which will resume writes to this mirror
+ * from that slave node.
+ */
+ if (! metaislocalset(sp)) {
+ if ((MD_MNSET_DESC(sd)) && (options & MDCMD_DOIT) &&
+ sd->sd_mn_am_i_master)
+ if (meta_mn_send_suspend_writes(
+ meta_getminor(mirnp->dev), ep) != 0)
+ return (-1);
+ }
+
+ if (rebind && !metaislocalset(sp)) {
+ /*
+ * We are 'rebind'ing a disk that is in a diskset so as well
+ * as updating the diskset's namespace the local set needs
+ * to be updated because it also contains a reference to
+ * the disk in question.
+ */
+ ret = meta_fixdevid(sp, DEV_UPDATE|DEV_LOCAL_SET,
+ newnp->cname, ep);
+
+ if (ret != METADEVADM_SUCCESS) {
+ (void) del_key_name(sp, newnp, &xep);
+ return (-1);
+ }
+ }
+
+ /* replace component */
+ (void) memset(&params, 0, sizeof (params));
+ params.mnum = meta_getminor(mirnp->dev);
+ MD_SETDRIVERNAME(&params, MD_MIRROR, sp->setno);
+ params.cmd = REPLACE_COMP;
+ params.old_dev = old_dev;
+ params.new_dev = new_dev;
+ params.start_blk = start_blk;
+ params.has_label = ((label > 0) ? 1 : 0);
+ params.number_blks = size;
+ params.new_key = newnp->key;
+ /* Is this just a dryrun ? */
+ if ((options & MDCMD_DOIT) == 0) {
+ params.options |= MDIOCTL_DRYRUN;
+ }
+ if (metaioctl(MD_IOCREPLACE, &params, &params.mde, NULL) != 0) {
+ (void) del_key_name(sp, newnp, ep);
+ return (mdstealerror(ep, &params.mde));
+ }
+
+ /* clear cache */
+ meta_invalidate_name(oldnp);
+ meta_invalidate_name(newnp);
+ if (invalidate_submirrors(sp, mirnp, ep) != 0) {
+ meta_invalidate_name(mirnp);
+ return (-1);
+ }
+ meta_invalidate_name(mirnp);
+
+ /* let em know */
+ if (options & MDCMD_PRINT) {
+ (void) printf(dgettext(TEXT_DOMAIN,
+ "%s: device %s is replaced with %s\n"),
+ mirnp->cname, oldnp->cname, newnp->cname);
+ (void) fflush(stdout);
+ }
+
+ /* return success */
+ return (0);
+}
+
+/*
+ * enable mirror component
+ */
+int
+meta_mirror_enable(
+ mdsetname_t *sp,
+ mdname_t *mirnp,
+ mdname_t *compnp,
+ mdcmdopts_t options,
+ md_error_t *ep
+)
+{
+ md_mirror_t *mirrorp;
+ uint_t smi;
+ replace_params_t params;
+ diskaddr_t size, label, start_blk;
+ md_dev64_t fs_dev;
+ md_set_desc *sd;
+ int ret;
+
+ /* should have same set */
+ assert(sp != NULL);
+ assert(sp->setno == MD_MIN2SET(meta_getminor(mirnp->dev)));
+
+ /* check name */
+ if (metachkmeta(mirnp, ep) != 0)
+ return (-1);
+
+ /* get the file_system dev binding */
+ if (meta_getdev(sp, compnp, ep) != 0)
+ return (-1);
+ fs_dev = compnp->dev;
+
+ /* get the mirror unit (fill in compnp->dev with metadb version) */
+ meta_invalidate_name(mirnp);
+ if ((mirrorp = meta_get_mirror(sp, mirnp, ep)) == NULL)
+ return (-1);
+
+ for (smi = 0; (smi < NMIRROR); ++smi) {
+ md_submirror_t *mdsp = &mirrorp->submirrors[smi];
+ mdname_t *submirnp = mdsp->submirnamep;
+
+ if (submirnp == NULL) {
+ assert(mdsp->state == SMS_UNUSED);
+ continue;
+ }
+
+ if (! metaismeta(submirnp))
+ continue;
+
+ meta_invalidate_name(submirnp);
+ if (meta_get_unit(sp, submirnp, ep) == NULL)
+ return (-1);
+ }
+
+ /* the metadb device binding is now established */
+ if (compnp->dev == NODEV64)
+ return (mdsyserror(ep, ENODEV, compnp->cname));
+
+ /*
+ * check for the case where the dev_t has changed between the
+ * filesystem and the metadb. This is called a rebind, and
+ * is handled by meta_mirror_replace.
+ */
+ if (fs_dev != compnp->dev) {
+ /* establish file system binding with invalid start/end */
+ compnp->dev = fs_dev;
+ compnp->start_blk = -1;
+ compnp->end_blk = -1;
+ return (meta_mirror_replace(sp, mirnp,
+ compnp, compnp, options, ep));
+ }
+
+ /* setup mirror info */
+ (void) memset(&params, 0, sizeof (params));
+ params.mnum = meta_getminor(mirnp->dev);
+ MD_SETDRIVERNAME(&params, MD_MIRROR, sp->setno);
+ params.cmd = ENABLE_COMP;
+
+ /* check it out */
+ if (meta_check_component(sp, compnp, 0, ep) != 0) {
+ if (! mdisuseerror(ep, MDE_ALREADY))
+ return (-1);
+ mdclrerror(ep);
+ }
+
+ if ((size = metagetsize(compnp, ep)) == MD_DISKADDR_ERROR)
+ return (-1);
+ if ((label = metagetlabel(compnp, ep)) == MD_DISKADDR_ERROR)
+ return (-1);
+ if ((start_blk = metagetstart(sp, compnp, ep)) == MD_DISKADDR_ERROR)
+ return (-1);
+ if (start_blk >= size) {
+ (void) mdsyserror(ep, ENOSPC, compnp->cname);
+ return (-1);
+ }
+
+ /*
+ * In a MN set, the master always executes the replace command first.
+ * Before the master executes the IOC_REPLACE ioctl, in non-DRYRUN mode
+ * the master sends a message to all nodes to suspend writes to
+ * this mirror. Then the master executes the IOC_REPLACE ioctl
+ * which resumes writes to this mirror from the master node.
+ * As each slave executes the replace command, each slave will
+ * call the IOC_REPLACE ioctl which will resume writes to this mirror
+ * from that slave node.
+ */
+ if (! metaislocalset(sp)) {
+ if ((sd = metaget_setdesc(sp, ep)) == NULL)
+ return (-1);
+ if ((MD_MNSET_DESC(sd)) && (options & MDCMD_DOIT) &&
+ sd->sd_mn_am_i_master)
+ if (meta_mn_send_suspend_writes(
+ meta_getminor(mirnp->dev), ep) != 0)
+ return (-1);
+ }
+
+ /* enable component */
+ params.old_dev = compnp->dev;
+ params.new_dev = compnp->dev;
+ params.start_blk = start_blk;
+ params.has_label = ((label > 0) ? 1 : 0);
+ params.number_blks = size;
+
+ /* Is this just a dryrun ? */
+ if ((options & MDCMD_DOIT) == 0) {
+ params.options |= MDIOCTL_DRYRUN;
+ }
+ if (metaioctl(MD_IOCREPLACE, &params, &params.mde, NULL) != 0)
+ return (mdstealerror(ep, &params.mde));
+
+ /*
+ * Are we dealing with a non-local set? If so need to update the
+ * local namespace so that the disk record has the correct devid.
+ */
+ if (!metaislocalset(sp)) {
+ ret = meta_fixdevid(sp, DEV_UPDATE|DEV_LOCAL_SET, compnp->cname,
+ ep);
+
+ if (ret != METADEVADM_SUCCESS) {
+ /*
+ * Failed to update the local set. Nothing to do here
+ * apart from report the error. The namespace is
+ * most likely broken and some form of remedial
+ * recovery is going to be required.
+ */
+ mde_perror(ep, "");
+ mdclrerror(ep);
+ }
+ }
+
+ /* clear cache */
+ meta_invalidate_name(compnp);
+ if (invalidate_submirrors(sp, mirnp, ep) != 0) {
+ meta_invalidate_name(mirnp);
+ return (-1);
+ }
+ meta_invalidate_name(mirnp);
+
+ /* let em know */
+ if (options & MDCMD_PRINT) {
+ (void) printf(dgettext(TEXT_DOMAIN,
+ "%s: device %s is enabled\n"),
+ mirnp->cname, compnp->cname);
+ (void) fflush(stdout);
+ }
+
+ /* return success */
+ return (0);
+}
+
+/*
+ * check for dups in the mirror itself
+ */
+static int
+check_twice(
+ md_mirror_t *mirrorp,
+ uint_t smi,
+ md_error_t *ep
+)
+{
+ mdname_t *mirnp = mirrorp->common.namep;
+ mdname_t *thisnp;
+ uint_t s;
+
+ thisnp = mirrorp->submirrors[smi].submirnamep;
+ for (s = 0; (s < smi); ++s) {
+ md_submirror_t *mdsp = &mirrorp->submirrors[s];
+ mdname_t *submirnp = mdsp->submirnamep;
+
+ if (submirnp == NULL)
+ continue;
+
+ if (meta_check_overlap(mirnp->cname, thisnp, 0, -1,
+ submirnp, 0, -1, ep) != 0) {
+ return (-1);
+ }
+ }
+ return (0);
+}
+
+/*
+ * check mirror
+ */
+int
+meta_check_mirror(
+ mdsetname_t *sp,
+ md_mirror_t *mirrorp,
+ mdcmdopts_t options,
+ md_error_t *ep
+)
+{
+ mdname_t *mirnp = mirrorp->common.namep;
+ int force = ((options & MDCMD_FORCE) ? 1 : 0);
+ int doit = ((options & MDCMD_DOIT) ? 1 : 0);
+ uint_t nsm = 0;
+ uint_t smi;
+
+ /* check submirrors */
+ for (smi = 0; (smi < NMIRROR); ++smi) {
+ md_submirror_t *mdsp = &mirrorp->submirrors[smi];
+ mdname_t *submirnp = mdsp->submirnamep;
+
+ if (submirnp == NULL)
+ continue;
+ ++nsm;
+ }
+ if (nsm < 1) {
+ return (mdmderror(ep, MDE_BAD_MIRROR,
+ meta_getminor(mirnp->dev), mirnp->cname));
+ }
+ for (smi = 0; (smi < NMIRROR); ++smi) {
+ md_submirror_t *mdsp = &mirrorp->submirrors[smi];
+ mdname_t *submirnp = mdsp->submirnamep;
+ diskaddr_t size;
+
+ /* skip unused submirrors */
+ if (submirnp == NULL) {
+ if (mdsp->state != SMS_UNUSED) {
+ return (mdmderror(ep, MDE_BAD_MIRROR,
+ meta_getminor(mirnp->dev), mirnp->cname));
+ }
+ continue;
+ }
+
+ /* check submirror */
+ if (doit) {
+ if (meta_check_submirror(sp, submirnp, NULL, force,
+ ep) != 0)
+ return (-1);
+ if ((size = metagetsize(submirnp, ep)) ==
+ MD_DISKADDR_ERROR) {
+ return (-1);
+ } else if (size == 0) {
+ return (mdsyserror(ep, ENOSPC,
+ submirnp->cname));
+ }
+ }
+
+ /* check this mirror too */
+ if (check_twice(mirrorp, smi, ep) != 0)
+ return (-1);
+ }
+
+ /* check read option */
+ switch (mirrorp->read_option) {
+ case RD_LOAD_BAL:
+ case RD_GEOMETRY:
+ case RD_FIRST:
+ break;
+ default:
+ return (mderror(ep, MDE_BAD_RD_OPT, mirnp->cname));
+ }
+
+ /* check write option */
+ switch (mirrorp->write_option) {
+ case WR_PARALLEL:
+ case WR_SERIAL:
+ break;
+ default:
+ return (mderror(ep, MDE_BAD_WR_OPT, mirnp->cname));
+ }
+
+ /* check pass number */
+ if ((mirrorp->pass_num < 0) || (mirrorp->pass_num > MD_PASS_MAX))
+ return (mderror(ep, MDE_BAD_PASS_NUM, mirnp->cname));
+
+ /* return success */
+ return (0);
+}
+
+/*
+ * setup mirror geometry
+ */
+static int
+mirror_geom(
+ md_mirror_t *mirrorp,
+ mm_unit_t *mm,
+ md_error_t *ep
+)
+{
+ uint_t write_reinstruct = 0;
+ uint_t read_reinstruct = 0;
+ uint_t round_cyl = 1;
+ mdname_t *smnp = NULL;
+ uint_t smi;
+ mdgeom_t *geomp;
+
+ /* get worst reinstructs */
+ for (smi = 0; (smi < NMIRROR); ++smi) {
+ md_submirror_t *mdsp = &mirrorp->submirrors[smi];
+ mdname_t *submirnp = mdsp->submirnamep;
+
+ if (submirnp == NULL)
+ continue;
+
+ if ((geomp = metagetgeom(submirnp, ep)) == NULL)
+ return (-1);
+ if (geomp->write_reinstruct > write_reinstruct)
+ write_reinstruct = geomp->write_reinstruct;
+ if (geomp->read_reinstruct > read_reinstruct)
+ read_reinstruct = geomp->read_reinstruct;
+
+ if (smnp == NULL)
+ smnp = submirnp;
+ }
+
+ /* setup geometry from first submirror */
+ assert(smnp != NULL);
+ if ((geomp = metagetgeom(smnp, ep)) == NULL)
+ return (-1);
+ if (meta_setup_geom((md_unit_t *)mm, mirrorp->common.namep, geomp,
+ write_reinstruct, read_reinstruct, round_cyl, ep) != 0)
+ return (-1);
+
+ /* return success */
+ return (0);
+}
+
+/*
+ * create mirror
+ */
+int
+meta_create_mirror(
+ mdsetname_t *sp,
+ md_mirror_t *mirrorp,
+ mdcmdopts_t options,
+ md_error_t *ep
+)
+{
+ mdname_t *mirnp = mirrorp->common.namep;
+ mm_unit_t *mm;
+ diskaddr_t submir_size = MD_DISKADDR_ERROR;
+ ushort_t nsm = 0;
+ uint_t smi;
+ mdnamelist_t *keynlp = NULL;
+ md_set_params_t set_params;
+ int rval = -1;
+ md_timeval32_t creation_time;
+ int create_flag = MD_CRO_32BIT;
+
+ /* validate mirror */
+ if (meta_check_mirror(sp, mirrorp, options, ep) != 0)
+ return (-1);
+
+
+ /* allocate mirror unit */
+ mm = Zalloc(sizeof (*mm));
+
+ if (meta_gettimeofday(&creation_time) == -1)
+ return (mdsyserror(ep, errno, NULL));
+
+ /* do submirrors */
+ for (smi = 0; (smi < NMIRROR); ++smi) {
+ md_submirror_t *mdsp = &mirrorp->submirrors[smi];
+ mdname_t *submirnp = mdsp->submirnamep;
+ mm_submirror_t *mmsp = &mm->un_sm[smi];
+ diskaddr_t size;
+
+ /* skip unused submirrors */
+ if (submirnp == NULL) {
+ assert(mdsp->state == SMS_UNUSED);
+ continue;
+ }
+ ++nsm;
+
+ /* get size */
+ if ((size = metagetsize(submirnp, ep)) == MD_DISKADDR_ERROR)
+ goto out;
+ assert(size > 0);
+
+ /* adjust for smallest submirror */
+ if (submir_size == MD_DISKADDR_ERROR) {
+ submir_size = size;
+ } else if (size < submir_size) {
+ submir_size = size;
+ }
+
+ if (options & MDCMD_DOIT) {
+ /* store name in namespace */
+ if (add_key_name(sp, submirnp, &keynlp, ep) != 0)
+ goto out;
+ }
+
+ /* setup submirror */
+ mmsp->sm_key = submirnp->key;
+ mmsp->sm_dev = submirnp->dev;
+ mmsp->sm_state = SMS_RUNNING;
+ mmsp->sm_timestamp = creation_time;
+ }
+
+ /* setup unit */
+ mm->c.un_type = MD_METAMIRROR;
+ MD_SID(mm) = meta_getminor(mirnp->dev);
+ mm->c.un_actual_tb = submir_size;
+ mm->c.un_size = offsetof(mm_unit_t, un_smic);
+ mm->un_nsm = nsm;
+ mm->un_read_option = mirrorp->read_option;
+ mm->un_write_option = mirrorp->write_option;
+ mm->un_pass_num = mirrorp->pass_num;
+ if (mirror_geom(mirrorp, mm, ep) != 0)
+ goto out;
+
+ /* fill in the size of the mirror */
+ if (options & MDCMD_UPDATE) {
+ mirrorp->common.size = mm->c.un_total_blocks;
+ }
+
+ /* if we're not doing anything, return success */
+ if (! (options & MDCMD_DOIT)) {
+ rval = 0; /* success */
+ goto out;
+ }
+
+ /* create mirror */
+ (void) memset(&set_params, 0, sizeof (set_params));
+ /* did the user tell us to generate a large device? */
+ create_flag = meta_check_devicesize(mm->c.un_total_blocks);
+ if (create_flag == MD_CRO_64BIT) {
+ mm->c.un_revision = MD_64BIT_META_DEV;
+ set_params.options = MD_CRO_64BIT;
+ } else {
+ mm->c.un_revision = MD_32BIT_META_DEV;
+ set_params.options = MD_CRO_32BIT;
+ }
+ set_params.mnum = MD_SID(mm);
+ set_params.size = mm->c.un_size;
+ set_params.mdp = (uintptr_t)mm;
+ MD_SETDRIVERNAME(&set_params, MD_MIRROR, MD_MIN2SET(set_params.mnum));
+ if (metaioctl(MD_IOCSET, &set_params, &set_params.mde,
+ mirnp->cname) != 0) {
+ (void) mdstealerror(ep, &set_params.mde);
+ goto out;
+ }
+ rval = 0; /* success */
+
+ /* cleanup, return success */
+out:
+ Free(mm);
+ if (rval != 0) {
+ (void) del_key_names(sp, keynlp, NULL);
+ }
+ metafreenamelist(keynlp);
+ if ((rval == 0) && (options & MDCMD_DOIT)) {
+ if (invalidate_submirrors(sp, mirnp, ep) != 0)
+ rval = -1;
+ meta_invalidate_name(mirnp);
+ }
+ return (rval);
+}
+
+/*
+ * initialize mirror
+ * NOTE: this functions is metainit(1m)'s command line parser!
+ */
+int
+meta_init_mirror(
+ mdsetname_t **spp,
+ int argc,
+ char *argv[],
+ mdcmdopts_t options,
+ md_error_t *ep
+)
+{
+ char *uname = argv[0];
+ mdname_t *mirnp = NULL;
+ int old_optind;
+ int c;
+ md_mirror_t *mirrorp = NULL;
+ uint_t smi;
+ int rval = -1;
+
+ /* get mirror name */
+ assert(argc > 0);
+ if (argc < 1)
+ goto syntax;
+ if ((mirnp = metaname(spp, uname, ep)) == NULL)
+ goto out;
+ assert(*spp != NULL);
+ uname = mirnp->cname;
+ if (metachkmeta(mirnp, ep) != 0)
+ goto out;
+
+ if (!(options & MDCMD_NOLOCK)) {
+ /* grab set lock */
+ if (meta_lock(*spp, TRUE, ep) != 0)
+ goto out;
+
+ if (meta_check_ownership(*spp, ep) != 0)
+ goto out;
+ }
+
+ /* see if it exists already */
+ if (metagetmiscname(mirnp, ep) != NULL) {
+ (void) mdmderror(ep, MDE_UNIT_ALREADY_SETUP,
+ meta_getminor(mirnp->dev), uname);
+ goto out;
+ } else if (! mdismderror(ep, MDE_UNIT_NOT_SETUP)) {
+ goto out;
+ } else {
+ mdclrerror(ep);
+ }
+ --argc, ++argv;
+
+ /* grab -m */
+ if ((argc < 1) || (strcmp(argv[0], "-m") != 0))
+ goto syntax;
+ --argc, ++argv;
+
+ if (argc == 0)
+ goto syntax;
+
+ /* parse general options */
+ optind = 0;
+ opterr = 0;
+ if (getopt(argc, argv, "") != -1)
+ goto options;
+
+ /* allocate mirror */
+ mirrorp = Zalloc(sizeof (*mirrorp));
+
+ /* setup common */
+ mirrorp->common.namep = mirnp;
+ mirrorp->common.type = MD_METAMIRROR;
+
+ /* parse submirrors */
+ for (smi = 0; ((argc > 0) && (argv[0][0] != '-') &&
+ (! isdigit(argv[0][0]))); ++smi) {
+ md_submirror_t *mdsm = &mirrorp->submirrors[smi];
+ mdname_t *submirnamep;
+
+ /* check for room */
+ if (smi >= NMIRROR) {
+ (void) mdmderror(ep, MDE_MIRROR_FULL,
+ meta_getminor(mirnp->dev), uname);
+ goto out;
+ }
+
+ /* parse submirror name */
+ if ((submirnamep = metaname(spp, argv[0], ep)) == NULL)
+ goto out;
+ mdsm->submirnamep = submirnamep;
+ --argc, ++argv;
+ }
+ if (smi == 0) {
+ (void) mdmderror(ep, MDE_NSUBMIRS, meta_getminor(mirnp->dev),
+ uname);
+ goto out;
+ }
+
+ /* dangerous n-way mirror creation */
+ if ((smi > 1) && (options & MDCMD_PRINT)) {
+ md_eprintf(dgettext(TEXT_DOMAIN,
+"%s: WARNING: This form of metainit is not recommended.\n"
+"The submirrors may not have the same data.\n"
+"Please see ERRORS in metainit(1M) for additional information.\n"),
+ uname);
+ }
+
+ /* parse mirror options */
+ mirrorp->read_option = RD_LOAD_BAL;
+ mirrorp->write_option = WR_PARALLEL;
+ mirrorp->pass_num = MD_PASS_DEFAULT;
+ old_optind = optind = 0;
+ opterr = 0;
+ while ((c = getopt(argc, argv, "grS")) != -1) {
+ switch (c) {
+ case 'g':
+ if (mirrorp->read_option != RD_LOAD_BAL) {
+ (void) mderror(ep, MDE_BAD_RD_OPT, uname);
+ goto out;
+ }
+ mirrorp->read_option = RD_GEOMETRY;
+ break;
+
+ case 'r':
+ if (mirrorp->read_option != RD_LOAD_BAL) {
+ (void) mderror(ep, MDE_BAD_RD_OPT, uname);
+ goto out;
+ }
+ mirrorp->read_option = RD_FIRST;
+ break;
+
+ case 'S':
+ if (mirrorp->write_option != WR_PARALLEL) {
+ (void) mderror(ep, MDE_BAD_WR_OPT, uname);
+ goto out;
+ }
+ mirrorp->write_option = WR_SERIAL;
+ break;
+
+ default:
+ argc -= old_optind;
+ argv += old_optind;
+ goto options;
+ }
+ old_optind = optind;
+ }
+ argc -= optind;
+ argv += optind;
+
+ /* parse pass number */
+ if ((argc > 0) && (isdigit(argv[0][0]))) {
+ if (name_to_pass_num(uname, argv[0],
+ &mirrorp->pass_num, ep) != 0) {
+ goto out;
+ }
+ --argc, ++argv;
+ }
+
+ /* we should be at the end */
+ if (argc != 0)
+ goto syntax;
+
+ /* create mirror */
+ if (meta_create_mirror(*spp, mirrorp, options, ep) != 0)
+ goto out;
+ rval = 0; /* success */
+
+ /* let em know */
+ if (options & MDCMD_PRINT) {
+ (void) printf(dgettext(TEXT_DOMAIN,
+ "%s: Mirror is setup\n"),
+ uname);
+ (void) fflush(stdout);
+ }
+ goto out;
+
+ /* syntax error */
+syntax:
+ rval = meta_cook_syntax(ep, MDE_SYNTAX, uname, argc, argv);
+ goto out;
+
+ /* options error */
+options:
+ rval = meta_cook_syntax(ep, MDE_OPTION, uname, argc, argv);
+ goto out;
+
+ /* cleanup, return error */
+out:
+ if (mirrorp != NULL)
+ meta_free_mirror(mirrorp);
+ return (rval);
+}
+
+/*
+ * reset mirrors
+ */
+int
+meta_mirror_reset(
+ mdsetname_t *sp,
+ mdname_t *mirnp,
+ mdcmdopts_t options,
+ md_error_t *ep
+)
+{
+ md_mirror_t *mirrorp;
+ uint_t smi;
+ int rval = -1;
+
+ /* should have same set */
+ assert(sp != NULL);
+ assert((mirnp == NULL) ||
+ (sp->setno == MD_MIN2SET(meta_getminor(mirnp->dev))));
+
+ /* reset all mirrors */
+ if (mirnp == NULL) {
+ mdnamelist_t *mirrornlp = NULL;
+ mdnamelist_t *p;
+
+ /* for each mirror */
+ rval = 0;
+ if (meta_get_mirror_names(sp, &mirrornlp, 0, ep) < 0)
+ return (-1);
+ for (p = mirrornlp; (p != NULL); p = p->next) {
+ /* reset mirror */
+ mirnp = p->namep;
+ /*
+ * If this is a multi-node set, we send a series
+ * of individual metaclear commands.
+ */
+ if (meta_is_mn_set(sp, ep)) {
+ if (meta_mn_send_metaclear_command(sp,
+ mirnp->cname, options, 0, ep) != 0) {
+ rval = -1;
+ break;
+ }
+ } else {
+ if (meta_mirror_reset(sp, mirnp, options,
+ ep) != 0) {
+ rval = -1;
+ break;
+ }
+ }
+ }
+
+ /* cleanup return success */
+ metafreenamelist(mirrornlp);
+ return (rval);
+ }
+
+ /* check name */
+ if (metachkmeta(mirnp, ep) != 0)
+ return (-1);
+
+ /* get unit structure */
+ if ((mirrorp = meta_get_mirror(sp, mirnp, ep)) == NULL)
+ return (-1);
+
+ /* make sure nobody owns us */
+ if (MD_HAS_PARENT(mirrorp->common.parent)) {
+ return (mdmderror(ep, MDE_IN_USE, meta_getminor(mirnp->dev),
+ mirnp->cname));
+ }
+
+ /* clear subdevices cache */
+ if (invalidate_submirrors(sp, mirnp, ep) != 0)
+ return (-1);
+
+ /* clear metadevice */
+ if (meta_reset(sp, mirnp, options, ep) != 0)
+ goto out;
+ rval = 0; /* success */
+
+ /* let em know */
+ if (options & MDCMD_PRINT) {
+ (void) printf(dgettext(TEXT_DOMAIN,
+ "%s: Mirror is cleared\n"), mirnp->cname);
+ (void) fflush(stdout);
+ }
+
+ /* clear subdevices */
+ if (! (options & MDCMD_RECURSE))
+ goto out;
+ for (smi = 0; (smi < NMIRROR); ++smi) {
+ md_submirror_t *mdsp = &mirrorp->submirrors[smi];
+ mdname_t *submirnp = mdsp->submirnamep;
+
+ /* skip unused submirrors */
+ if (submirnp == NULL) {
+ assert(mdsp->state == SMS_UNUSED);
+ continue;
+ }
+
+ /* make sure we have a metadevice */
+ if (! metaismeta(submirnp))
+ continue;
+
+ /* clear submirror */
+ if (meta_reset_by_name(sp, submirnp, options, ep) != 0)
+ rval = -1;
+ }
+
+ /* cleanup, return success */
+out:
+ meta_invalidate_name(mirnp);
+ return (rval);
+}
+
+/*
+ * reports TRUE if any mirror component is in error
+ */
+int
+meta_mirror_anycomp_is_err(mdsetname_t *sp, mdnamelist_t *mirror_names)
+{
+ mdnamelist_t *nlp;
+ md_error_t status = mdnullerror;
+ md_error_t *ep = &status;
+ int any_errs = FALSE;
+
+ for (nlp = mirror_names; nlp; nlp = nlp->next) {
+ md_mirror_t *mirrorp;
+ int smi;
+
+ if ((mirrorp = meta_get_mirror(sp, nlp->namep, ep)) == NULL) {
+ any_errs |= TRUE;
+ goto out;
+ }
+
+ for (smi = 0; smi < NMIRROR; ++smi) {
+ md_submirror_t *mdsp = &mirrorp->submirrors[smi];
+
+ if (mdsp->state &
+ (SMS_COMP_ERRED|SMS_ATTACHED|SMS_OFFLINE)) {
+ any_errs |= TRUE;
+ goto out;
+ }
+ }
+ }
+out:
+ if (!mdisok(ep))
+ mdclrerror(ep);
+
+ return (any_errs);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_mirror_resync.c b/usr/src/lib/lvm/libmeta/common/meta_mirror_resync.c
new file mode 100644
index 0000000000..f833ce5c3e
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_mirror_resync.c
@@ -0,0 +1,658 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * mirror operations
+ */
+
+#include <meta.h>
+#include <sys/lvm/md_mirror.h>
+#include <thread.h>
+
+extern int md_in_daemon;
+extern md_mn_client_list_t *mdmn_clients;
+
+/*
+ * chain of mirrors
+ */
+typedef struct mm_unit_list {
+ struct mm_unit_list *next; /* next in chain */
+ mdname_t *namep; /* mirror name */
+ mm_pass_num_t pass; /* pass number */
+ uint_t done; /* resync done */
+} mm_unit_list_t;
+
+/*
+ * resync mirror
+ * meta_lock for this set should be held on entry.
+ */
+int
+meta_mirror_resync(
+ mdsetname_t *sp,
+ mdname_t *mirnp,
+ daddr_t size,
+ md_error_t *ep,
+ md_resync_cmd_t cmd /* Start/Block/Unblock/Kill */
+)
+{
+ char *miscname;
+ md_resync_ioctl_t ri;
+
+ /* should have a set */
+ assert(sp != NULL);
+ assert(sp->setno == MD_MIN2SET(meta_getminor(mirnp->dev)));
+
+ /* make sure we have a mirror */
+ if ((miscname = metagetmiscname(mirnp, ep)) == NULL)
+ return (-1);
+ if (strcmp(miscname, MD_MIRROR) != 0) {
+ return (mdmderror(ep, MDE_NOT_MM, meta_getminor(mirnp->dev),
+ mirnp->cname));
+ }
+
+ /* start resync */
+ (void) memset(&ri, 0, sizeof (ri));
+ MD_SETDRIVERNAME(&ri, MD_MIRROR, sp->setno);
+ ri.ri_mnum = meta_getminor(mirnp->dev);
+ ri.ri_copysize = size;
+ switch (cmd) {
+ case MD_RESYNC_FORCE_MNSTART:
+ ri.ri_flags |= MD_RI_RESYNC_FORCE_MNSTART;
+ break;
+ case MD_RESYNC_START:
+ ri.ri_flags = 0;
+ break;
+ case MD_RESYNC_BLOCK:
+ ri.ri_flags = MD_RI_BLOCK;
+ break;
+ case MD_RESYNC_UNBLOCK:
+ ri.ri_flags = MD_RI_UNBLOCK;
+ break;
+ case MD_RESYNC_KILL:
+ ri.ri_flags = MD_RI_KILL;
+ break;
+ case MD_RESYNC_KILL_NO_WAIT:
+ ri.ri_flags = MD_RI_KILL | MD_RI_NO_WAIT;
+ break;
+ default:
+ /* TODO: Add new error MDE_BAD_RESYNC_FLAGS */
+ return (mderror(ep, MDE_BAD_RESYNC_OPT, mirnp->cname));
+ }
+
+ if (metaioctl(MD_IOCSETSYNC, &ri, &ri.mde, mirnp->cname) != 0)
+ return (mdstealerror(ep, &ri.mde));
+
+ /* return success */
+ return (0);
+}
+
+/*
+ * free units
+ */
+static void
+free_units(
+ mm_unit_list_t *mirrors[MD_PASS_MAX + 1]
+)
+{
+ uint_t i;
+
+ for (i = 0; (i < (MD_PASS_MAX + 1)); ++i) {
+ mm_unit_list_t *p, *n;
+
+ for (p = mirrors[i], n = NULL; (p != NULL); p = n) {
+ n = p->next;
+ Free(p);
+ }
+ mirrors[i] = NULL;
+ }
+}
+
+/*
+ * setup_units: build lists of units for each pass
+ */
+static int
+setup_units(
+ mdsetname_t *sp,
+ mm_unit_list_t *mirrors[MD_PASS_MAX + 1],
+ md_error_t *ep
+)
+{
+ mdnamelist_t *mirrornlp = NULL;
+ mdnamelist_t *p;
+ int rval = 0;
+
+ /* should have a set */
+ assert(sp != NULL);
+
+ /* for each mirror */
+ if (meta_get_mirror_names(sp, &mirrornlp, 0, ep) < 0)
+ return (-1);
+ for (p = mirrornlp; (p != NULL); p = p->next) {
+ md_mirror_t *mirrorp;
+ mm_unit_list_t *lp;
+
+ /* get unit structure */
+ if ((mirrorp = meta_get_mirror(sp, p->namep, ep)) == NULL) {
+ rval = -1; /* record, but ignore errors */
+ continue;
+ }
+
+ /* save info */
+ lp = Zalloc(sizeof (*lp));
+ lp->namep = p->namep;
+ lp->pass = mirrorp->pass_num;
+ if ((lp->pass < 0) || (lp->pass > MD_PASS_MAX))
+ lp->pass = MD_PASS_MAX;
+
+ /* put on list */
+ lp->next = mirrors[lp->pass];
+ mirrors[lp->pass] = lp;
+ }
+
+ /* cleanup, return error */
+ metafreenamelist(mirrornlp);
+ return (rval);
+}
+
+/*
+ * resync all mirrors (in background)
+ */
+int
+meta_mirror_resync_all(
+ mdsetname_t *sp,
+ daddr_t size,
+ md_error_t *ep
+)
+{
+ mm_unit_list_t *mirrors[MD_PASS_MAX + 1];
+ mm_pass_num_t pass, max_pass;
+ int rval = 0, fval;
+
+ /* should have a set */
+ assert(sp != NULL);
+
+ /* get mirrors */
+ (void) memset(mirrors, 0, sizeof (mirrors));
+ if (setup_units(sp, mirrors, ep) != 0)
+ rval = -1;
+
+ /* fork a process */
+ if ((fval = md_daemonize(sp, ep)) != 0) {
+ /*
+ * md_daemonize will fork off a process. The is the
+ * parent or error.
+ */
+ if (fval > 0) {
+ free_units(mirrors);
+ return (0);
+ }
+ mdclrerror(ep);
+ }
+ /*
+ * Closing stdin/out/err here.
+ * In case this was called thru rsh, the calling process on the other
+ * side will know, it doesn't have to wait until all the resyncs have
+ * finished.
+ * Also initialise the rpc client pool so that this process will use
+ * a unique pool of clients. If we don't do this, all of the forked
+ * clients will end up using the same pool of clients which can result
+ * in hung clients.
+ */
+ if (meta_is_mn_set(sp, ep)) {
+ (void) close(0);
+ (void) close(1);
+ (void) close(2);
+ mdmn_clients = NULL;
+ }
+ assert((fval == 0) || (fval == -1));
+
+ /*
+ * Determine which pass level is the highest that contains mirrors to
+ * resync. We only need to wait for completion of earlier levels below
+ * this high watermark. If all mirrors are at the same pass level
+ * there is no requirement to wait for completion.
+ */
+
+ max_pass = 1;
+ for (pass = MD_PASS_MAX; pass > 1; --pass) {
+ if (mirrors[pass] != NULL) {
+ max_pass = pass;
+ break;
+ }
+ }
+
+ /*
+ * max_pass now contains the highest pass-level with resyncable mirrors
+ */
+
+ /* do passes */
+ for (pass = 1; (pass <= MD_PASS_MAX); ++pass) {
+ int dispatched = 0;
+ unsigned howlong = 1;
+ mm_unit_list_t *lp;
+
+ /* skip empty passes */
+ if (mirrors[pass] == NULL)
+ continue;
+
+ /* dispatch all resyncs in pass */
+ for (lp = mirrors[pass]; (lp != NULL); lp = lp->next) {
+ if (meta_is_mn_set(sp, ep)) {
+ if (meta_mn_send_setsync(sp, lp->namep,
+ size, ep) != 0) {
+ rval = -1;
+ lp->done = 1;
+ } else {
+ ++dispatched;
+ }
+ } else {
+ if (meta_mirror_resync(sp, lp->namep, size, ep,
+ MD_RESYNC_START) != 0) {
+ rval = -1;
+ lp->done = 1;
+ } else {
+ ++dispatched;
+ }
+ }
+ }
+
+ /*
+ * Wait for them to finish iff we are at a level lower than
+ * max_pass. This orders the resyncs into distinct levels.
+ * I.e. level 2 resyncs won't start until all level 1 ones
+ * have completed.
+ */
+ if (pass == max_pass)
+ continue;
+
+ howlong = 1;
+ while (dispatched > 0) {
+
+ /* wait a while */
+ (void) sleep(howlong);
+
+ /* see if any finished */
+ for (lp = mirrors[pass]; lp != NULL; lp = lp->next) {
+ md_resync_ioctl_t ri;
+
+ if (lp->done)
+ continue;
+
+ (void) memset(&ri, '\0', sizeof (ri));
+ ri.ri_mnum = meta_getminor(lp->namep->dev);
+ MD_SETDRIVERNAME(&ri, MD_MIRROR, sp->setno);
+ if (metaioctl(MD_IOCGETSYNC, &ri, &ri.mde,
+ lp->namep->cname) != 0) {
+ (void) mdstealerror(ep, &ri.mde);
+ rval = -1;
+ lp->done = 1;
+ --dispatched;
+ } else if (! (ri.ri_flags & MD_RI_INPROGRESS)) {
+ lp->done = 1;
+ --dispatched;
+ }
+ }
+
+ /* wait a little longer next time */
+ if (howlong < 10)
+ ++howlong;
+ }
+ }
+
+ /* cleanup, return success */
+ free_units(mirrors);
+ if (fval == 0) /* we are the child process so exit */
+ exit(0);
+ return (rval);
+}
+
+/*
+ * meta_mn_mirror_resync_all:
+ * -------------------------
+ * Resync all mirrors associated with given set (arg). Called when master
+ * node is adding a node to a diskset. Only want to initiate the resync on
+ * the current node.
+ */
+void *
+meta_mn_mirror_resync_all(void *arg)
+{
+ set_t setno = *((set_t *)arg);
+ mdsetname_t *sp;
+ mm_unit_list_t *mirrors[MD_PASS_MAX + 1];
+ mm_pass_num_t pass, max_pass;
+ md_error_t mde = mdnullerror;
+ int fval;
+
+
+ /* should have a set */
+ assert(setno != NULL);
+
+ if ((sp = metasetnosetname(setno, &mde)) == NULL) {
+ mde_perror(&mde, "");
+ return (NULL);
+ }
+
+ if (!(meta_is_mn_set(sp, &mde))) {
+ mde_perror(&mde, "");
+ return (NULL);
+ }
+
+ /* fork a process */
+ if ((fval = md_daemonize(sp, &mde)) != 0) {
+ /*
+ * md_daemonize will fork off a process. The is the
+ * parent or error.
+ */
+ if (fval > 0) {
+ return (NULL);
+ }
+ mde_perror(&mde, "");
+ return (NULL);
+ }
+ /*
+ * Child process should never return back to rpc.metad, but
+ * should exit.
+ * Flush all internally cached data inherited from parent process
+ * since cached data will be cleared when parent process RPC request
+ * has completed (which is possibly before this child process
+ * can complete).
+ * Child process can retrieve and cache its own copy of data from
+ * rpc.metad that won't be changed by the parent process.
+ *
+ * Reset md_in_daemon since this child will be a client of rpc.metad
+ * not part of the rpc.metad daemon itself.
+ * md_in_daemon is used by rpc.metad so that libmeta can tell if
+ * this thread is rpc.metad or any other thread. (If this thread
+ * was rpc.metad it could use some short circuit code to get data
+ * directly from rpc.metad instead of doing an RPC call to rpc.metad).
+ */
+ md_in_daemon = 0;
+ metaflushsetname(sp);
+ sr_cache_flush_setno(setno);
+ if ((sp = metasetnosetname(setno, &mde)) == NULL) {
+ mde_perror(&mde, "");
+ md_exit(sp, 1);
+ }
+
+ if (meta_lock(sp, TRUE, &mde) != 0) {
+ mde_perror(&mde, "");
+ md_exit(sp, 1);
+ }
+
+ /*
+ * Closing stdin/out/err here.
+ */
+ (void) close(0);
+ (void) close(1);
+ (void) close(2);
+ assert(fval == 0);
+
+ /* get mirrors */
+ (void) memset(mirrors, 0, sizeof (mirrors));
+ if (setup_units(sp, mirrors, &mde) != 0) {
+ (void) meta_unlock(sp, &mde);
+ md_exit(sp, 1);
+ }
+
+ /*
+ * Determine which pass level is the highest that contains mirrors to
+ * resync. We only need to wait for completion of earlier levels below
+ * this high watermark. If all mirrors are at the same pass level
+ * there is no requirement to wait for completion.
+ */
+ max_pass = 1;
+ for (pass = MD_PASS_MAX; pass > 1; --pass) {
+ if (mirrors[pass] != NULL) {
+ max_pass = pass;
+ break;
+ }
+ }
+
+ /*
+ * max_pass now contains the highest pass-level with resyncable mirrors
+ */
+ /* do passes */
+ for (pass = 1; (pass <= MD_PASS_MAX); ++pass) {
+ int dispatched = 0;
+ unsigned howlong = 1;
+ mm_unit_list_t *lp;
+
+ /* skip empty passes */
+ if (mirrors[pass] == NULL)
+ continue;
+
+ /* dispatch all resyncs in pass */
+ for (lp = mirrors[pass]; (lp != NULL); lp = lp->next) {
+ if (meta_mirror_resync(sp, lp->namep, 0, &mde,
+ MD_RESYNC_FORCE_MNSTART) != 0) {
+ mdclrerror(&mde);
+ lp->done = 1;
+ } else {
+ ++dispatched;
+ }
+ }
+
+ /*
+ * Wait for them to finish iff we are at a level lower than
+ * max_pass. This orders the resyncs into distinct levels.
+ * I.e. level 2 resyncs won't start until all level 1 ones
+ * have completed.
+ */
+ if (pass == max_pass)
+ continue;
+
+ howlong = 1;
+ while (dispatched > 0) {
+
+ /* wait a while */
+ (void) sleep(howlong);
+
+ /* see if any finished */
+ for (lp = mirrors[pass]; lp != NULL; lp = lp->next) {
+ md_resync_ioctl_t ri;
+
+ if (lp->done)
+ continue;
+
+ (void) memset(&ri, '\0', sizeof (ri));
+ ri.ri_mnum = meta_getminor(lp->namep->dev);
+ MD_SETDRIVERNAME(&ri, MD_MIRROR, sp->setno);
+ if (metaioctl(MD_IOCGETSYNC, &ri, &ri.mde,
+ lp->namep->cname) != 0) {
+ mdclrerror(&mde);
+ lp->done = 1;
+ --dispatched;
+ } else if (! (ri.ri_flags & MD_RI_INPROGRESS)) {
+ lp->done = 1;
+ --dispatched;
+ }
+ }
+
+ /* wait a little longer next time */
+ if (howlong < 10)
+ ++howlong;
+ }
+ }
+
+ /* cleanup, return success */
+ free_units(mirrors);
+ (void) meta_unlock(sp, &mde);
+ md_exit(sp, 0);
+ /*NOTREACHED*/
+}
+
+/*
+ * meta_mirror_resync_process:
+ * --------------------------
+ * Modify any resync that is in progress on this node for the given set.
+ *
+ * Input Parameters:
+ * sp setname to scan for mirrors
+ * cmd action to take:
+ * MD_RESYNC_KILL - kill all resync threads
+ * MD_RESYNC_BLOCK - block all resync threads
+ * MD_RESYNC_UNBLOCK - resume all resync threads
+ * Output Parameters
+ * ep error return structure
+ *
+ * meta_lock for this set should be held on entry.
+ */
+static void
+meta_mirror_resync_process(mdsetname_t *sp, md_error_t *ep, md_resync_cmd_t cmd)
+{
+ mm_unit_list_t *mirrors[MD_PASS_MAX + 1];
+ mm_pass_num_t pass;
+
+ /* Grab all the mirrors from the set (if any) */
+ (void) memset(mirrors, 0, sizeof (mirrors));
+ if (setup_units(sp, mirrors, ep) != 0)
+ return;
+
+ /* do passes */
+ for (pass = 1; (pass <= MD_PASS_MAX); ++pass) {
+ mm_unit_list_t *lp;
+
+ /* skip empty passes */
+ if (mirrors[pass] == NULL)
+ continue;
+
+ /* Process all resyncs in pass */
+ for (lp = mirrors[pass]; (lp != NULL); lp = lp->next) {
+ (void) meta_mirror_resync(sp, lp->namep, 0, ep,
+ cmd);
+ }
+ }
+
+ /* Clear up mirror units */
+ free_units(mirrors);
+}
+
+/*
+ * meta_mirror_resync_process_all:
+ * ------------------------------
+ * Issue the given resync command to all mirrors contained in all multi-node
+ * sets.
+ *
+ * Input Parameters:
+ * cmd - MD_RESYNC_KILL, MD_RESYNC_BLOCK, MD_RESYNC_UNBLOCK
+ */
+static void
+meta_mirror_resync_process_all(md_resync_cmd_t cmd)
+{
+ set_t setno, max_sets;
+ md_error_t mde = mdnullerror;
+ mdsetname_t *this_sp;
+ md_set_desc *sd;
+
+ /*
+ * Traverse all sets looking for multi-node capable ones.
+ */
+ max_sets = get_max_sets(&mde);
+ for (setno = 1; setno < max_sets; setno++) {
+ mde = mdnullerror;
+ if (this_sp = metasetnosetname(setno, &mde)) {
+ if ((sd = metaget_setdesc(this_sp, &mde)) == NULL)
+ continue;
+ if (!MD_MNSET_DESC(sd))
+ continue;
+
+ if (meta_lock(this_sp, TRUE, &mde)) {
+ continue;
+ }
+ meta_mirror_resync_process(this_sp, &mde, cmd);
+ (void) meta_unlock(this_sp, &mde);
+ }
+ }
+}
+
+/*
+ * meta_mirror_resync_kill_all:
+ * ---------------------------
+ * Abort any resync that is in progress on this node. Scan all sets for all
+ * mirrors.
+ * Note: this routine is provided for future use. For example to kill all
+ * resyncs on a node this could be used as long as the
+ * mddoors / rpc.mdcommd tuple is running on all members of the cluster.
+ */
+void
+meta_mirror_resync_kill_all(void)
+{
+ meta_mirror_resync_process_all(MD_RESYNC_KILL);
+}
+
+/*
+ * meta_mirror_resync_block_all:
+ * ----------------------------
+ * Block all resyncs that are in progress. This causes the resync state to
+ * freeze on this machine, and can be resumed by calling
+ * meta_mirror_resync_unblock_all.
+ */
+void
+meta_mirror_resync_block_all(void)
+{
+ meta_mirror_resync_process_all(MD_RESYNC_BLOCK);
+}
+
+/*
+ * meta_mirror_resync_unblock_all:
+ * ------------------------------
+ * Unblock all previously blocked resync threads on this node.
+ */
+void
+meta_mirror_resync_unblock_all(void)
+{
+ meta_mirror_resync_process_all(MD_RESYNC_UNBLOCK);
+}
+
+/*
+ * meta_mirror_resync_unblock:
+ * --------------------------
+ * Unblock any previously blocked resync threads for the given set.
+ * meta_lock for this set should be held on entry.
+ */
+void
+meta_mirror_resync_unblock(mdsetname_t *sp)
+{
+ md_error_t mde = mdnullerror;
+
+ meta_mirror_resync_process(sp, &mde, MD_RESYNC_UNBLOCK);
+}
+
+/*
+ * meta_mirror_resync_kill:
+ * -----------------------
+ * Kill any resync threads running on mirrors in the given set.
+ * Called when releasing a set (meta_set_prv.c`halt_set)
+ */
+void
+meta_mirror_resync_kill(mdsetname_t *sp)
+{
+ md_error_t mde = mdnullerror;
+
+ meta_mirror_resync_process(sp, &mde, MD_RESYNC_KILL);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_mn_changelog.c b/usr/src/lib/lvm/libmeta/common/meta_mn_changelog.c
new file mode 100644
index 0000000000..bd9b5cc508
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_mn_changelog.c
@@ -0,0 +1,636 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <wait.h>
+#include <sys/time.h>
+#include <meta.h>
+#include <metad.h>
+#include <mdmn_changelog.h>
+#include <syslog.h>
+#include <umem.h>
+
+/*
+ * Number of log entries per set.
+ *
+ * We want at least 4 spares available at all times
+ * in case new classes are added during a live upgrade.
+ *
+ * Allocate the entries in chunks of 16
+ */
+#define MDMN_LOGRECS_QUANTA 16
+#define MDMN_LOGRECS_MINSPARES 4
+#define MDMN_LOGHDR_SIZE sizeof (mdmn_changelog_record_t)
+#define MDMN_LOGRECSIZE (MDMN_LOGHDR_SIZE + MD_MN_MSG_MAXDATALEN)
+#define MDMN_LOGRECSIZE_OD sizeof (mdmn_changelog_record_od_t)
+#define MDMN_LOGRECS_TRIMUP ((MD_MN_NCLASSES % MDMN_LOGRECS_QUANTA) > \
+ (MDMN_LOGRECS_QUANTA - MDMN_LOGRECS_MINSPARES))
+
+static int mdmn_commitlog(md_set_desc *, md_error_t *);
+static int mdmn_log_it(set_t, md_error_t *, mdmn_changelog_record_t *lr);
+
+
+/* Global variables */
+
+mdmn_changelog_record_t *mdmn_changelog[MD_MAXSETS];
+int mdmn_changelog_snarfed[MD_MAXSETS];
+
+/* Total number of log records */
+int mdmn_logrecs = (MDMN_LOGRECS_QUANTA +
+ ((MD_MN_NCLASSES/MDMN_LOGRECS_QUANTA) * MDMN_LOGRECS_QUANTA));
+
+#ifdef DEBUG
+void
+dump_rec(char *fn_name, mdmn_changelog_record_t *lr)
+{
+ syslog(LOG_DEBUG, "%s incore: selfid 0x%x class %d flags %d "
+ "msglen %d\n", fn_name, lr->lr_selfid, lr->lr_class,
+ lr->lr_flags, lr->lr_msglen);
+}
+void
+dump_rec_od(char *fn_name, mdmn_changelog_record_od_t *lr)
+{
+ syslog(LOG_DEBUG, "%s ondisk: selfid 0x%x class %d flags %d "
+ "msglen %d\n", fn_name, lr->lr_selfid, lr->lr_class,
+ lr->lr_flags, lr->lr_msglen);
+}
+
+void
+dump_array(char *fn_name, set_t setno)
+{
+ int i;
+ char tchar[80];
+
+ mdmn_changelog_record_t *tlr;
+
+ for (i = 0; i < mdmn_logrecs; i++) {
+ tlr = &mdmn_changelog[setno][i];
+ (void) snprintf(tchar, sizeof (tchar), "%s class %d ",
+ fn_name, i);
+ dump_rec(tchar, tlr);
+ }
+}
+#endif
+
+/*
+ * copy_changelog: copies changelog ondisk<->incore records.
+ * The argument "direction" controls the direction to copy the
+ * the records. Incore and ondisk changlog structures must be
+ * allocated when calling this routine.
+ *
+ * The purpose of changelog is to store a message that is in progress.
+ * Therefore the changlog structure embeds the message structure.
+ * Incore and ondisk changelog structures are created to handle the
+ * incore and ondisk message formats. The incore message has a pointer
+ * to the payload. The ondisk message format has payload embedded as
+ * part of the message.
+ *
+ * Caveat Emptor: Incore and ondisk structures have the payload buffers
+ * correctly allocated.
+ */
+
+static void
+copy_changelog(mdmn_changelog_record_t *incp,
+ mdmn_changelog_record_od_t *odp, int direction)
+{
+ assert(incp != NULL && odp != NULL);
+ assert((direction == MD_MN_COPY_TO_ONDISK) ||
+ (direction == MD_MN_COPY_TO_INCORE));
+
+ if (direction == MD_MN_COPY_TO_ONDISK) {
+ odp->lr_revision = incp->lr_revision;
+ odp->lr_flags = incp->lr_flags;
+ odp->lr_selfid = incp->lr_selfid;
+ odp->lr_class = incp->lr_class;
+ odp->lr_msglen = incp->lr_msglen;
+ if (incp->lr_msglen)
+ copy_msg_1(&incp->lr_msg, &odp->lr_od_msg, direction);
+ } else {
+ incp->lr_revision = odp->lr_revision;
+ incp->lr_flags = odp->lr_flags;
+ incp->lr_selfid = odp->lr_selfid;
+ incp->lr_class = odp->lr_class;
+ incp->lr_msglen = odp->lr_msglen;
+ if (odp->lr_msglen)
+ copy_msg_1(&incp->lr_msg, &odp->lr_od_msg, direction);
+ }
+}
+
+/*
+ * mdmn_allocate_changelog
+ *
+ * Changelog records are allocated on a per multi-node basis.
+ * This routine is called during MN set creation.
+ * It pre-allocates the changelog, as user records
+ * one per message class plus some spares.
+ * Once the records are allocated they are never freed until
+ * the mddb is deleted. The preallocation ensures that all nodes
+ * will have a consistent view of the mddb.
+ *
+ * Each record is large enough to hold a maximum sized message
+ * Return Values:
+ * 0 - success
+ * -1 - fail
+ */
+int
+mdmn_allocate_changelog(mdsetname_t *sp, md_error_t *ep)
+{
+ mddb_userreq_t req;
+ md_set_desc *sd;
+ mdmn_changelog_record_t *tlr;
+ int i;
+ set_t setno;
+
+ /* Get a pointer to the incore md_set_desc for this MN set */
+ if ((sd = metaget_setdesc(sp, ep)) == NULL)
+ return (-1);
+ setno = sd->sd_setno;
+ /*
+ * Round up the number of changelog records
+ * to the next value of MDMN_LOGRECS_QUANTA
+ *
+ * In all cases, make sure we have at least
+ * four more entries than the number of classes
+ * in order to provide space for live upgrades that
+ * might add classes.
+ */
+
+ mdmn_logrecs += (MDMN_LOGRECS_TRIMUP) ? MDMN_LOGRECS_QUANTA : 0;
+
+ mdmn_changelog[setno] = Zalloc(MDMN_LOGHDR_SIZE * mdmn_logrecs);
+
+ for (i = 0; i < mdmn_logrecs; i++) {
+ (void) memset(&req, 0, sizeof (req));
+ METAD_SETUP_LR(MD_DB_CREATE, setno, 0);
+ /* grab a record big enough for max message size */
+ req.ur_size = MDMN_LOGRECSIZE_OD;
+
+ if (metaioctl(MD_MN_DB_USERREQ, &req, &req.ur_mde, NULL) != 0) {
+ (void) mdstealerror(ep, &req.ur_mde);
+#ifdef DEBUG
+ syslog(LOG_DEBUG, "allocate_log: %s\n",
+ mde_sperror(ep, ""));
+#endif
+ Free(mdmn_changelog[setno]);
+ return (-1);
+ }
+
+ tlr = &mdmn_changelog[setno][i];
+ tlr->lr_selfid = req.ur_recid;
+ tlr->lr_revision = MD_MN_CHANGELOG_RECORD_REVISION;
+ tlr->lr_class = i;
+ }
+
+ /* commit class, and selfid */
+ (void) mdmn_commitlog(sd, ep);
+ Free(mdmn_changelog[setno]);
+ return (0);
+}
+
+/*
+ * mdmn_reset_changelog
+ *
+ * Called during reconfig step 2.
+ * The only time the changelog is reset is when all nodes in a cluster
+ * are starting up. In this case changelog must be ignored, therefore
+ * it is reset.
+ *
+ * The function frees the incore data structures and zeros out the
+ * records. The ondisk records are never freed.
+ *
+ * Return Values:
+ * 0 - success
+ * -1 - fail
+ */
+int
+mdmn_reset_changelog(mdsetname_t *sp, md_error_t *ep, int flag)
+{
+ md_set_desc *sd;
+ mdmn_changelog_record_t *lr;
+ set_t setno;
+ int lrc;
+
+ /* Get a pointer to the incore md_set_desc this MN set */
+ if ((sd = metaget_setdesc(sp, ep)) == NULL)
+ return (-1);
+
+ setno = sd->sd_setno;
+
+ if (mdmn_snarf_changelog(setno, ep) == 0) {
+ return (0);
+ }
+
+ if (flag & MDMN_CLF_RESETLOG) {
+ for (lrc = 0; lrc < mdmn_logrecs; lrc++) {
+ lr = &mdmn_changelog[setno][lrc];
+ Free(lr->lr_msg.msg_event_data);
+ (void) memset(&lr->lr_msg, 0, sizeof (md_mn_msg_t));
+ lr->lr_msglen = 0;
+ lr->lr_flags = 0;
+ }
+ (void) mdmn_commitlog(sd, ep);
+#ifdef DEBUG
+ syslog(LOG_DEBUG, "reset_changelog: Log reset\n");
+#endif
+ }
+ /* now zap the array */
+ if (flag & MDMN_CLF_RESETCACHE) {
+#ifdef DEBUG
+ syslog(LOG_DEBUG, "reset_changelog: cache reset\n");
+#endif
+ Free(&mdmn_changelog[setno]);
+ mdmn_changelog[setno] = NULL;
+ mdmn_changelog_snarfed[setno] = 0;
+ }
+ return (0);
+}
+
+/*
+ * Log a given message in the changelog.
+ * This function is only executed by the master node
+ * Return Values:
+ * MDMNE_NULL:
+ * success, the log slot is free
+ *
+ * MDMNE_ACK:
+ * success,
+ * the log slot is occupied with the same msg from a previous try.
+ *
+ * MDMNE_CLASS_BUSY:
+ * This means the appropriate slot is occupied with a different
+ * message. In that case the stored message needs being replayed,
+ * while the current message will be rejected with MDMNE_CLASS_BUSY
+ * to the initiator.
+ *
+ * MDMNE_LOG_FAIL:
+ * Bad things happend, cannot continue.
+ */
+int
+mdmn_log_msg(md_mn_msg_t *msg)
+{
+ set_t setno;
+ md_mn_msgclass_t class;
+ mdmn_changelog_record_t *lr;
+ md_error_t err = mdnullerror;
+ md_error_t *ep = &err;
+ int retval = 0;
+
+ setno = msg->msg_setno;
+ class = mdmn_get_message_class(msg->msg_type);
+
+ /* if not snarfed, snarf it */
+ if (mdmn_snarf_changelog(setno, ep) <= 0) {
+ syslog(LOG_DAEMON | LOG_ERR, dgettext(TEXT_DOMAIN,
+ "log_msg: No records snarfed\n"));
+ return (-1);
+ }
+
+
+ /* log entry for the class */
+ lr = &mdmn_changelog[setno][class];
+
+ /* Check if the class is occupied */
+ if (lr->lr_flags & MD_MN_LR_INUSE) {
+ if (!MSGID_CMP(&(msg->msg_msgid), &(lr->lr_msg.msg_msgid))) {
+ syslog(LOG_DAEMON | LOG_DEBUG, dgettext(TEXT_DOMAIN,
+ "log_msg: id mismatch:\n"
+ " stored : ID = (%d, 0x%llx-%d)"
+ " setno %d class %d type %d\n"
+ " msg to log: ID = (%d, 0x%llx-%d)"
+ " setno %d class %d type %d.\n"),
+ MSGID_ELEMS(lr->lr_msg.msg_msgid), lr->lr_setno,
+ lr->lr_class, lr->lr_msgtype,
+ MSGID_ELEMS(msg->msg_msgid), msg->msg_setno, class,
+ msg->msg_type);
+ return (MDMNE_CLASS_BUSY);
+ } else {
+ syslog(LOG_DAEMON | LOG_DEBUG, dgettext(TEXT_DOMAIN,
+ "log_msg: msgid already logged:\n ID = "
+ " (%d, 0x%llx-%d) setno %d class %d type %d\n"),
+ MSGID_ELEMS(lr->lr_msg.msg_msgid), lr->lr_setno,
+ lr->lr_class, lr->lr_msgtype);
+ return (MDMNE_ACK);
+ }
+ }
+
+ lr->lr_flags |= MD_MN_LR_INUSE;
+ lr->lr_msglen = MD_MN_MSG_LEN(msg);
+ assert(lr->lr_msg.msg_event_data == NULL);
+ if (msg->msg_event_size)
+ lr->lr_msg.msg_event_data = Zalloc(msg->msg_event_size);
+ (void) copy_msg(msg, &(lr->lr_msg));
+ retval = mdmn_log_it(setno, ep, lr);
+ if (retval != 0) {
+ syslog(LOG_DAEMON | LOG_ERR, dgettext(TEXT_DOMAIN,
+ "mdmn_log_msg - failure committing logged msg to disk\n"));
+ return (MDMNE_LOG_FAIL);
+ }
+
+ return (MDMNE_NULL); /* this is good */
+}
+
+/*
+ * mdmn_unlog_msg(md_mn_msg_t *)
+ *
+ * Clear the log entry holding the indicated message.
+ * Only the set master can do this.
+ *
+ * Return Values:
+ * 0 - success
+ * -1 - fail
+ */
+int
+mdmn_unlog_msg(md_mn_msg_t *msg)
+{
+ set_t setno;
+ md_mn_msgclass_t class;
+ md_error_t err = mdnullerror;
+ md_error_t *ep = &err;
+ int retval = 0;
+ mdmn_changelog_record_t *lr = NULL;
+
+ setno = msg->msg_setno;
+ class = mdmn_get_message_class(msg->msg_type);
+
+ /* Find the log entry holding the indicated message */
+ if (mdmn_snarf_changelog(setno, ep) == 0)
+ return (-1);
+
+ lr = &mdmn_changelog[setno][class];
+
+ /* assert the message is still logged */
+ assert(lr != NULL);
+ if (!MSGID_CMP(&(msg->msg_msgid), &(lr->lr_msg.msg_msgid))) {
+ syslog(LOG_ERR, dgettext(TEXT_DOMAIN,
+ "unlog_msg: msgid mismatch\n"
+ "\t\tstored: ID = (%d, 0x%llx-%d) setno %d class %d type %d\n"
+ "\t\tattempting to unlog:\n"
+ "\t\tID = (%d, 0x%llx-%d) setno %d class %d type %d.\n"),
+ MSGID_ELEMS(lr->lr_msg.msg_msgid), lr->lr_setno,
+ lr->lr_class, lr->lr_msgtype, MSGID_ELEMS(msg->msg_msgid),
+ msg->msg_setno, class, msg->msg_type);
+ return (-1);
+ }
+ lr->lr_msglen = 0;
+ lr->lr_flags &= ~(MD_MN_LR_INUSE);
+ if (lr->lr_msg.msg_event_data) {
+ Free(lr->lr_msg.msg_event_data);
+ lr->lr_msg.msg_event_data = NULL;
+ }
+ /* commit the updated log record to disk */
+ retval = mdmn_log_it(setno, ep, lr);
+#ifdef DEBUG
+ dump_rec("mdmn_unlog_msg: ", lr);
+#endif
+ return (retval);
+}
+
+
+/*
+ * mdmn_get_changelogrec(set_t , md_mn_msgclass_t)
+ * Returns a pointer to incore changelog record.
+ *
+ * Return Values:
+ * non-NULL - success
+ * NULL - fail
+ */
+mdmn_changelog_record_t *
+mdmn_get_changelogrec(set_t setno, md_mn_msgclass_t class)
+{
+ md_error_t err = mdnullerror;
+
+ if (mdmn_snarf_changelog(setno, &err) == 0)
+ return (NULL);
+ assert(mdmn_changelog[setno] != NULL);
+
+ return (&mdmn_changelog[setno][class]);
+}
+
+/*
+ * mdmn_commitlog(md_set_desc *, md_error_t *)
+ *
+ * Commit the set record and all of the changelog entry records to disk.
+ * Don't bother with other stuff hanging off the set record
+ * (e.g. drive records) since none of that is changing.
+ * Called only at changelog pre-allocation time or when flushing a log.
+ *
+ * Return Values:
+ * 0 - success
+ * errno - fail
+ */
+
+static int
+mdmn_commitlog(md_set_desc *sd, md_error_t *ep)
+{
+ int lrc;
+ int *recs;
+ uint_t size;
+ mdmn_changelog_record_t *lr;
+ mdmn_changelog_record_od_t clodrec; /* changelog ondisk record */
+ mddb_userreq_t req;
+ int retval = 0;
+ set_t setno;
+
+ /* Check for master and bounce non-master requests */
+ if (!(MD_MNSET_DESC(sd)) || !sd->sd_mn_am_i_master) {
+ if (!(MD_MNSET_DESC(sd))) {
+ syslog(LOG_DAEMON | LOG_ERR, dgettext(TEXT_DOMAIN,
+ "mdmn_commitlog - Not MN Set\n"));
+ } else {
+ syslog(LOG_DAEMON | LOG_ERR, dgettext(TEXT_DOMAIN,
+ "mdmn_commit_log - Not Master\n"));
+ }
+ return (-1);
+ }
+ (void) memset(&req, 0, sizeof (req));
+ /* create the records to commit the info to the mddb */
+
+ size = (mdmn_logrecs + 1) * sizeof (int);
+ recs = Zalloc(size);
+ /* Initialize the log entry records for update */
+ setno = sd->sd_setno;
+
+ for (lrc = 0; lrc < mdmn_logrecs; lrc++) {
+ lr = &mdmn_changelog[setno][lrc];
+ recs[lrc] = lr->lr_selfid;
+ copy_changelog(lr, &clodrec, MD_MN_COPY_TO_ONDISK);
+ METAD_SETUP_LR(MD_DB_SETDATA, setno, lr->lr_selfid);
+ req.ur_size = MDMN_LOGRECSIZE_OD;
+ req.ur_data = (uint64_t)&clodrec;
+ if ((retval = metaioctl(MD_MN_DB_USERREQ, &req, &req.ur_mde,
+ NULL)) != 0) {
+ (void) mdstealerror(ep, &req.ur_mde);
+#ifdef DEBUG
+ syslog(LOG_DAEMON|LOG_DEBUG,
+ "mdmn_commitlog - metaioctl SETDATA failure\n%s",
+ mde_sperror(ep, ""));
+#endif
+ break;
+ }
+ }
+
+ if (retval == 0) {
+ /* set last rec to be 0 to indicate completion */
+ recs[lrc] = 0;
+ /* Commit to mddb on disk */
+ METAD_SETUP_LR(MD_DB_COMMIT_MANY, setno,
+ mdmn_changelog[setno][0].lr_selfid);
+ req.ur_size = size;
+ req.ur_data = (uint64_t)recs;
+ if ((retval = metaioctl(MD_MN_DB_USERREQ, &req,
+ &req.ur_mde, NULL)) != 0) {
+ (void) mdstealerror(ep, &req.ur_mde);
+#ifdef DEBUG
+ syslog(LOG_DAEMON|LOG_DEBUG,
+ "mdmn_commitlog - metaioctl COMMIT_MANY"
+ "Failure\n%s", mde_sperror(ep, ""));
+#endif
+ }
+ }
+
+ Free(recs);
+ return (retval);
+}
+
+/*
+ * mdmn_log_it(set_t, md_error_t *, mdmn_changelog_record_t *)
+ *
+ * Commit the changed log record to disk.
+ *
+ * Return Values:
+ * 0 - success
+ * -1 - fail
+ */
+static int
+mdmn_log_it(set_t set, md_error_t *ep, mdmn_changelog_record_t *lr)
+{
+ int *recs;
+ uint_t size;
+ mddb_userreq_t req;
+ mdmn_changelog_record_od_t clodrec;
+
+ (void) memset(&req, 0, sizeof (req));
+
+ /* Initialize the log entry record for update */
+
+ copy_changelog(lr, &clodrec, MD_MN_COPY_TO_ONDISK);
+ METAD_SETUP_LR(MD_DB_SETDATA, set, lr->lr_selfid);
+ req.ur_size = MDMN_LOGRECSIZE_OD;
+ req.ur_data = (uint64_t)&clodrec;
+ if (metaioctl(MD_MN_DB_USERREQ, &req, &req.ur_mde, NULL) != 0) {
+ (void) mdstealerror(ep, &req.ur_mde);
+#ifdef DEBUG
+ syslog(LOG_DEBUG, "mdmn_log_it: DB_SETDATA failed\n"
+ "set %d selfid %d, size %d\n%s", set, lr->lr_selfid,
+ req.ur_size, mde_sperror(ep, ""));
+#endif
+ return (-1);
+ }
+ /* Set up the recid to be updated */
+ size = 2 * sizeof (int); /* the changed record, plus null terminator */
+ recs = Zalloc(size);
+ recs[0] = lr->lr_selfid;
+ recs[1] = 0;
+ /* Commit to mddb on disk */
+ METAD_SETUP_LR(MD_DB_COMMIT_ONE, set, lr->lr_selfid);
+ req.ur_size = size;
+ req.ur_data = (uint64_t)recs;
+ if (metaioctl(MD_MN_DB_USERREQ, &req, &req.ur_mde, NULL) != 0) {
+ (void) mdstealerror(ep, &req.ur_mde);
+#ifdef DEBUG
+ syslog(LOG_DEBUG, "mdmn_log_it: DB_COMMIT_ONE failed\n"
+ "set %d selfid %d, size %d\n%s", set, lr->lr_selfid,
+ req.ur_size, mde_sperror(ep, ""));
+#endif
+ Free(recs);
+ return (-1);
+ }
+ Free(recs);
+ return (0);
+}
+
+/*
+ * mdmn_snarf_changelog(set_t, md_error_t *)
+ *
+ * snarf in the changelog entries and allocate incore structures
+ * if required.
+ * mdmn_changelog_snarfed array if set to MDMN_CLF_SNARFED, then
+ * then the records are already snarfed.
+ *
+ * Called from set_snarf(), mdmn_log_msg(), and mdmn_unlog_msg()
+ * Return Values:
+ * non-zero - success
+ * 0 - fail
+ */
+int
+mdmn_snarf_changelog(set_t set, md_error_t *ep)
+{
+ mdmn_changelog_record_t *tlr;
+ mdmn_changelog_record_od_t *lr;
+ mddb_recid_t id;
+ md_mn_msgclass_t class;
+
+
+ if (set == MD_LOCAL_SET)
+ return (0);
+
+ id = 0;
+
+ if (mdmn_changelog_snarfed[set] & MDMN_CLF_SNARFED) {
+ assert(mdmn_changelog[set] != NULL);
+ return (mdmn_logrecs);
+ }
+
+ lr = (mdmn_changelog_record_od_t *)get_ur_rec(set, MD_UR_GET_NEXT,
+ MDDB_UR_LR, &id, ep);
+ if (lr == NULL)
+ return (0);
+
+ /* only allocate if Log records exist */
+
+ if (mdmn_changelog[set] == NULL) {
+ /* Allocate incore state for the log */
+ mdmn_changelog[set] = Zalloc(MDMN_LOGHDR_SIZE *
+ mdmn_logrecs);
+ }
+
+ do {
+ class = lr->lr_class;
+ tlr = &mdmn_changelog[set][class];
+ copy_changelog(tlr, lr, MD_MN_COPY_TO_INCORE);
+ Free(lr);
+ lr = (mdmn_changelog_record_od_t *)get_ur_rec(set,
+ MD_UR_GET_NEXT, MDDB_UR_LR, &id, ep);
+ } while (lr != NULL);
+
+ /* Since log records counts are fixed return that value */
+ mdmn_changelog_snarfed[set] |= MDMN_CLF_SNARFED;
+ return (mdmn_logrecs);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_mn_comm.c b/usr/src/lib/lvm/libmeta/common/meta_mn_comm.c
new file mode 100644
index 0000000000..02ad7bf1e6
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_mn_comm.c
@@ -0,0 +1,984 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <wait.h>
+#include <sys/time.h>
+#include <strings.h>
+#include <meta.h>
+#include <syslog.h>
+
+extern md_mn_msg_tbl_entry_t msg_table[];
+
+/*
+ * When contacting the local rpc.mdcommd we always want to do that using
+ * the IPv4 version of localhost.
+ */
+#define LOCALHOST_IPv4 "127.0.0.1"
+
+md_mn_msgclass_t
+mdmn_get_message_class(md_mn_msgtype_t msgtype)
+{
+ return (msg_table[msgtype].mte_class);
+}
+
+void (*
+mdmn_get_handler(md_mn_msgtype_t msgtype))
+ (md_mn_msg_t *msg, uint_t flags, md_mn_result_t *res)
+{
+ return (msg_table[msgtype].mte_handler);
+}
+
+int (*
+mdmn_get_submessage_generator(md_mn_msgtype_t msgtype))
+ (md_mn_msg_t *msg, md_mn_msg_t **msglist)
+{
+ return (msg_table[msgtype].mte_smgen);
+}
+
+time_t
+mdmn_get_timeout(md_mn_msgtype_t msgtype)
+{
+ return (msg_table[msgtype].mte_timeout);
+}
+
+
+void
+ldump_msg(char *prefix, md_mn_msg_t *msg)
+{
+ (void) fprintf(stderr, "%s &msg = 0x%x\n", prefix, (uint_t)msg);
+ (void) fprintf(stderr, "%s ID = (%d, 0x%llx-%d)\n", prefix,
+ MSGID_ELEMS(msg->msg_msgid));
+ (void) fprintf(stderr, "%s sender = %d\n", prefix, msg->msg_sender);
+ (void) fprintf(stderr, "%s flags = 0x%x\n", prefix, msg->msg_flags);
+ (void) fprintf(stderr, "%s setno = %d\n", prefix, msg->msg_setno);
+ (void) fprintf(stderr, "%s type = %d\n", prefix, msg->msg_type);
+ (void) fprintf(stderr, "%s size = %d\n", prefix, msg->msg_event_size);
+}
+
+
+/* Default timeout can be changed using clnt_control() */
+static struct timeval TIMEOUT = { 25, 0 };
+
+md_mn_result_t *
+mdmn_send_1(argp, clnt)
+ md_mn_msg_t *argp;
+ CLIENT *clnt;
+{
+ md_mn_result_t *clnt_res = Zalloc(sizeof (md_mn_result_t));
+
+ if (clnt_call(clnt, mdmn_send,
+ (xdrproc_t)xdr_md_mn_msg_t, (caddr_t)argp,
+ (xdrproc_t)xdr_md_mn_result_t, (caddr_t)clnt_res,
+ TIMEOUT) != RPC_SUCCESS) {
+ return (NULL);
+ }
+ return (clnt_res);
+}
+
+int *
+mdmn_work_1(argp, clnt)
+ md_mn_msg_t *argp;
+ CLIENT *clnt;
+{
+ int *clnt_res = Zalloc(sizeof (int));
+
+ if (clnt_call(clnt, mdmn_work,
+ (xdrproc_t)xdr_md_mn_msg_t, (caddr_t)argp,
+ (xdrproc_t)xdr_int, (caddr_t)clnt_res,
+ TIMEOUT) != RPC_SUCCESS) {
+ Free(clnt_res);
+ return (NULL);
+ }
+ return (clnt_res);
+}
+
+int *
+mdmn_wakeup_initiator_1(argp, clnt)
+ md_mn_result_t *argp;
+ CLIENT *clnt;
+{
+ int *clnt_res = Zalloc(sizeof (int));
+
+ if (clnt_call(clnt, mdmn_wakeup_initiator,
+ (xdrproc_t)xdr_md_mn_result_t, (caddr_t)argp,
+ (xdrproc_t)xdr_int, (caddr_t)clnt_res,
+ TIMEOUT) != RPC_SUCCESS) {
+ Free(clnt_res);
+ return (NULL);
+ }
+ return (clnt_res);
+}
+
+int *
+mdmn_wakeup_master_1(argp, clnt)
+ md_mn_result_t *argp;
+ CLIENT *clnt;
+{
+ int *clnt_res = Zalloc(sizeof (int));
+
+ if (clnt_call(clnt, mdmn_wakeup_master,
+ (xdrproc_t)xdr_md_mn_result_t, (caddr_t)argp,
+ (xdrproc_t)xdr_int, (caddr_t)clnt_res,
+ TIMEOUT) != RPC_SUCCESS) {
+ Free(clnt_res);
+ return (NULL);
+ }
+ return (clnt_res);
+}
+
+int *
+mdmn_comm_lock_1(argp, clnt)
+ md_mn_set_and_class_t *argp;
+ CLIENT *clnt;
+{
+ int *clnt_res = Zalloc(sizeof (int));
+
+ if (clnt_call(clnt, mdmn_comm_lock,
+ (xdrproc_t)xdr_md_mn_set_and_class_t, (caddr_t)argp,
+ (xdrproc_t)xdr_int, (caddr_t)clnt_res,
+ TIMEOUT) != RPC_SUCCESS) {
+ return (NULL);
+ }
+ return (clnt_res);
+}
+
+int *
+mdmn_comm_unlock_1(argp, clnt)
+ md_mn_set_and_class_t *argp;
+ CLIENT *clnt;
+{
+ int *clnt_res = Zalloc(sizeof (int));
+
+ if (clnt_call(clnt, mdmn_comm_unlock,
+ (xdrproc_t)xdr_md_mn_set_and_class_t, (caddr_t)argp,
+ (xdrproc_t)xdr_int, (caddr_t)clnt_res,
+ TIMEOUT) != RPC_SUCCESS) {
+ return (NULL);
+ }
+ return (clnt_res);
+}
+
+int *
+mdmn_comm_suspend_1(argp, clnt)
+ md_mn_set_and_class_t *argp;
+ CLIENT *clnt;
+{
+ int *clnt_res = Zalloc(sizeof (int));
+
+ if (clnt_call(clnt, mdmn_comm_suspend,
+ (xdrproc_t)xdr_md_mn_set_and_class_t, (caddr_t)argp,
+ (xdrproc_t)xdr_int, (caddr_t)clnt_res,
+ TIMEOUT) != RPC_SUCCESS) {
+ return (NULL);
+ }
+ return (clnt_res);
+}
+
+int *
+mdmn_comm_resume_1(argp, clnt)
+ md_mn_set_and_class_t *argp;
+ CLIENT *clnt;
+{
+ int *clnt_res = Zalloc(sizeof (int));
+
+ if (clnt_call(clnt, mdmn_comm_resume,
+ (xdrproc_t)xdr_md_mn_set_and_class_t, (caddr_t)argp,
+ (xdrproc_t)xdr_int, (caddr_t)clnt_res,
+ TIMEOUT) != RPC_SUCCESS) {
+ return (NULL);
+ }
+ return (clnt_res);
+}
+
+int *
+mdmn_comm_reinit_set_1(argp, clnt)
+ set_t *argp;
+ CLIENT *clnt;
+{
+ int *clnt_res = Zalloc(sizeof (int));
+
+ if (clnt_call(clnt, mdmn_comm_reinit_set,
+ (xdrproc_t)xdr_set_t, (caddr_t)argp,
+ (xdrproc_t)xdr_int, (caddr_t)clnt_res,
+ TIMEOUT) != RPC_SUCCESS) {
+ return (NULL);
+ }
+ return (clnt_res);
+}
+
+int *
+mdmn_comm_msglock_1(argp, clnt)
+ md_mn_type_and_lock_t *argp;
+ CLIENT *clnt;
+{
+ int *clnt_res = Zalloc(sizeof (int));
+
+ if (clnt_call(clnt, mdmn_comm_msglock,
+ (xdrproc_t)xdr_md_mn_type_and_lock_t, (caddr_t)argp,
+ (xdrproc_t)xdr_int, (caddr_t)clnt_res,
+ TIMEOUT) != RPC_SUCCESS) {
+ return (NULL);
+ }
+ return (clnt_res);
+}
+
+
+#define USECS_PER_TICK 10000
+
+
+/*
+ * Let the kernel create a clusterwide unique message ID
+ *
+ * returns 0 on success
+ * 1 on failure
+ */
+
+int
+mdmn_create_msgid(md_mn_msgid_t *msgid)
+{
+ md_error_t mde = mdnullerror;
+
+ if (msgid == NULL) {
+ return (1); /* failure */
+ }
+
+ if (metaioctl(MD_IOCGUNIQMSGID, msgid, &mde, NULL) != 0) {
+ msgid->mid_nid = ~0u;
+ msgid->mid_time = 0LL;
+ return (1); /* failure */
+ }
+
+ /*
+ * mid_smid and mid_oclass are only used for submessages.
+ * mdmn_create_msgid is never called for submessages, as they inherit
+ * the message ID from their parent.
+ * Thus we can safely null out the following fields.
+ */
+ msgid->mid_smid = 0;
+ msgid->mid_oclass = 0;
+
+ /* if the node_id is not set yet, somethings seems to be wrong */
+ if (msgid->mid_nid == ~0u) {
+ return (1); /* failure */
+ }
+
+ return (0); /* success */
+}
+
+md_mn_result_t *
+copy_result(md_mn_result_t *res)
+{
+ md_mn_result_t *nres;
+ nres = Zalloc(sizeof (md_mn_result_t));
+ /* It's MSGID_COPY(from, to); */
+ MSGID_COPY(&(res->mmr_msgid), &(nres->mmr_msgid));
+ nres->mmr_msgtype = res->mmr_msgtype;
+ nres->mmr_setno = res->mmr_setno;
+ nres->mmr_flags = res->mmr_flags;
+ nres->mmr_sender = res->mmr_sender;
+ nres->mmr_failing_node = res->mmr_failing_node;
+ nres->mmr_comm_state = res->mmr_comm_state;
+ nres->mmr_exitval = res->mmr_exitval;
+ nres->mmr_out_size = res->mmr_out_size;
+ nres->mmr_err_size = res->mmr_err_size;
+ if (res->mmr_out_size > 0) {
+ nres->mmr_out = Zalloc(res->mmr_out_size);
+ bcopy(res->mmr_out, nres->mmr_out, res->mmr_out_size);
+ }
+ if (res->mmr_err_size > 0) {
+ nres->mmr_err = Zalloc(res->mmr_err_size);
+ bcopy(res->mmr_err, nres->mmr_err, res->mmr_err_size);
+ }
+ if (res->mmr_ep.host != '\0') {
+ nres->mmr_ep.host = strdup(res->mmr_ep.host);
+ }
+ if (res->mmr_ep.extra != '\0') {
+ nres->mmr_ep.extra = strdup(res->mmr_ep.extra);
+ }
+ if (res->mmr_ep.name != '\0') {
+ nres->mmr_ep.name = strdup(res->mmr_ep.name);
+ }
+ return (nres);
+}
+
+void
+free_result(md_mn_result_t *res)
+{
+ if (res->mmr_out_size > 0) {
+ Free(res->mmr_out);
+ }
+ if (res->mmr_err_size > 0) {
+ Free(res->mmr_err);
+ }
+ if (res->mmr_ep.host != '\0') {
+ Free(res->mmr_ep.host);
+ }
+ if (res->mmr_ep.extra != '\0') {
+ Free(res->mmr_ep.extra);
+ }
+ if (res->mmr_ep.name != '\0') {
+ Free(res->mmr_ep.name);
+ }
+ Free(res);
+}
+
+
+/* allocate a new message and copy a given message into it */
+md_mn_msg_t *
+copy_msg(md_mn_msg_t *msg, md_mn_msg_t *dest)
+{
+ md_mn_msg_t *nmsg;
+
+ nmsg = dest;
+
+ if (nmsg == NULL) {
+ nmsg = Zalloc(sizeof (md_mn_msg_t));
+ }
+ if (nmsg->msg_event_data == NULL) {
+ nmsg->msg_event_data = Zalloc(msg->msg_event_size);
+ }
+ /* It's MSGID_COPY(from, to); */
+ MSGID_COPY(&(msg->msg_msgid), &(nmsg->msg_msgid));
+ nmsg->msg_sender = msg->msg_sender;
+ nmsg->msg_flags = msg->msg_flags;
+ nmsg->msg_setno = msg->msg_setno;
+ nmsg->msg_type = msg->msg_type;
+ nmsg->msg_event_size = msg->msg_event_size;
+ if (msg->msg_event_size > 0) {
+ bcopy(msg->msg_event_data, nmsg->msg_event_data,
+ msg->msg_event_size);
+ }
+ return (nmsg);
+}
+
+void
+copy_msg_1(md_mn_msg_t *msg, md_mn_msg_od_t *msgod, int direction)
+{
+ assert((direction == MD_MN_COPY_TO_ONDISK) ||
+ (direction == MD_MN_COPY_TO_INCORE));
+
+ if (direction == MD_MN_COPY_TO_ONDISK) {
+ MSGID_COPY(&(msg->msg_msgid), &(msgod->msg_msgid));
+ msgod->msg_sender = msg->msg_sender;
+ msgod->msg_flags = msg->msg_flags;
+ msgod->msg_setno = msg->msg_setno;
+ msgod->msg_type = msg->msg_type;
+ msgod->msg_od_event_size = msg->msg_event_size;
+ /* paranoid checks */
+ if (msg->msg_event_size != 0 && msg->msg_event_data != NULL)
+ bcopy(msg->msg_event_data,
+ &msgod->msg_od_event_data[0], msg->msg_event_size);
+ } else {
+ MSGID_COPY(&(msgod->msg_msgid), &(msg->msg_msgid));
+ msg->msg_sender = msgod->msg_sender;
+ msg->msg_flags = msgod->msg_flags;
+ msg->msg_setno = msgod->msg_setno;
+ msg->msg_type = msgod->msg_type;
+ msg->msg_event_size = msgod->msg_od_event_size;
+ if (msg->msg_event_data == NULL)
+ msg->msg_event_data = Zalloc(msg->msg_event_size);
+
+ bcopy(&msgod->msg_od_event_data[0],
+ msg->msg_event_data, msgod->msg_od_event_size);
+ }
+}
+
+/* Free a message */
+void
+free_msg(md_mn_msg_t *msg)
+{
+ if (msg->msg_event_size > 0) {
+ Free(msg->msg_event_data);
+ }
+ Free(msg);
+}
+
+
+/* The following declarations are only for the next two routines */
+
+md_mn_client_list_t *mdmn_clients;
+
+mutex_t mcl_mutex;
+#define MNGLC_INIT_ONLY 0x0001
+#define MNGLC_FOR_REAL 0x0002
+/*
+ * mdmn_get_local_clnt(flag)
+ * If there is a client in the free pool, get one,
+ * If no client is available, create one.
+ * Every multithreaded application that uses mdmn_send_message must call it
+ * single threaded first with special flags so we do the initialization
+ * stuff in a safe environment.
+ *
+ * Input: MNGLC_INIT_ONLY: just initializes the mutex
+ * MNGLC_FOR_REAL : do real work
+ * Output:
+ * An rpc client for sending rpc requests to the local commd
+ * NULL in case of an error
+ *
+ */
+static CLIENT *
+mdmn_get_local_clnt(uint_t flag)
+{
+ CLIENT *local_daemon;
+ static int inited = 0;
+ md_mn_client_list_t *tmp;
+
+ if (inited == 0) {
+ (void) mutex_init(&mcl_mutex, USYNC_THREAD, NULL);
+ inited = 1;
+ }
+
+ if (flag == MNGLC_INIT_ONLY)
+ return ((CLIENT *)NULL);
+
+ (void) mutex_lock(&mcl_mutex);
+ if (mdmn_clients == (md_mn_client_list_t *)NULL) {
+ /* if there is no entry, create a client and return a it */
+ local_daemon = meta_client_create(LOCALHOST_IPv4, MDMN_COMMD,
+ ONE, "tcp");
+ } else {
+ /*
+ * If there is an entry from a previous put operation,
+ * remove it from the head of the list and free the list stuff
+ * around it. Then return the client
+ */
+ local_daemon = mdmn_clients->mcl_clnt;
+ tmp = mdmn_clients;
+ mdmn_clients = mdmn_clients->mcl_next;
+ Free(tmp);
+ }
+ (void) mutex_unlock(&mcl_mutex);
+
+
+ if (local_daemon == (CLIENT *)NULL) {
+ clnt_pcreateerror("local_daemon");
+ }
+
+ return (local_daemon);
+}
+
+/*
+ * mdmn_put_local_clnt()
+ * returns a no longer used client to the pool
+ *
+ * Input: an RPC client
+ * Output: void
+ */
+static void
+mdmn_put_local_clnt(CLIENT *local_daemon)
+{
+ md_mn_client_list_t *tmp;
+
+ (void) mutex_lock(&mcl_mutex);
+
+ tmp = mdmn_clients;
+ mdmn_clients = (md_mn_client_list_t *)
+ malloc(sizeof (md_mn_client_list_t));
+ mdmn_clients->mcl_clnt = local_daemon;
+ mdmn_clients->mcl_next = tmp;
+
+ (void) mutex_unlock(&mcl_mutex);
+}
+
+/*
+ * This is the regular interface for sending a message.
+ * This function only passes through all arguments to
+ * mdmn_send_message_with_msgid() and adds a NULL for the message ID.
+ *
+ * Normally, you don't have already a message ID for the message you want
+ * to send. Only in case of replaying a previously logged message,
+ * a msgid is already attached to it.
+ * In that case mdmn_send_message_with_msgid() has to be called directly.
+ *
+ * Return values / CAVEAT EMPTOR: see mdmn_send_message_with_msgid()
+ */
+
+int
+mdmn_send_message(
+ set_t setno,
+ md_mn_msgtype_t type,
+ uint_t flags,
+ char *data,
+ int size,
+ md_mn_result_t **result,
+ md_error_t *ep)
+{
+ return (mdmn_send_message_with_msgid(
+ setno, type, flags, data, size, result, MD_NULL_MSGID, ep));
+}
+/*
+ * mdmn_send_message_with_msgid()
+ * Create a message from the given pieces of data and hand it over
+ * to the local commd.
+ * This may fail for various reasons (rpc error / class busy / class locked ...)
+ * Some error types are immediately deadly, others will cause retries
+ * until the request is fulfilled or until the retries are ecxceeded.
+ *
+ * In case an error is returned it is up to the user to decide what to do.
+ *
+ * Returns:
+ * 0 on success
+ * 1 if retries1 exceeded
+ * 2 if retries2 exceeded
+ * -1 if connecting to the local daemon failed
+ * -2 if the RPC call to the local daemon failed
+ * -3 if this node hasn't yet joined the set
+ * -4 if any other problem occured
+ *
+ * CAVEAT EMPTOR:
+ * The caller is responsible for calling free_result() when finished with
+ * the results!
+ */
+int
+mdmn_send_message_with_msgid(
+ set_t setno,
+ md_mn_msgtype_t type,
+ uint_t flags,
+ char *data,
+ int size,
+ md_mn_result_t **result,
+ md_mn_msgid_t *msgid,
+ md_error_t *ep)
+{
+ uint_t retry1, ticks1, retry2, ticks2;
+ int retval;
+
+ CLIENT *local_daemon;
+ struct timeval timeout;
+
+ md_mn_msg_t msg;
+ md_mn_result_t *resp;
+
+ /*
+ * Special case for multithreaded applications:
+ * When starting up, the application should call mdmn_send_message
+ * single threaded with all parameters set to NULL.
+ * When we detect this we know, we safely can do initialization
+ * stuff here.
+ * We only check for set and type being zero
+ */
+ if ((setno == 0) && (type == 0)) {
+ /* do all needed initializations here */
+ (void) mdmn_get_local_clnt(MNGLC_INIT_ONLY);
+ return (0); /* success */
+ }
+
+
+ /* did the caller specify space to store the result pointer? */
+ if (result == (md_mn_result_t **)NULL) {
+ syslog(LOG_INFO, dgettext(TEXT_DOMAIN,
+ "FATAL, can not allocate result structure\n"));
+ return (-4);
+ }
+ *result = NULL;
+
+ /* Replay messages already have their msgID */
+ if ((flags & MD_MSGF_REPLAY_MSG) == 0) {
+ if (mdmn_create_msgid(&msg.msg_msgid) != 0) {
+ syslog(LOG_INFO, dgettext(TEXT_DOMAIN,
+ "FATAL, can not create message ID\n"));
+ return (-4);
+ }
+ } else {
+ /* in this case a message ID must be specified */
+ assert(msgid != MD_NULL_MSGID);
+ MSGID_COPY(msgid, &msg.msg_msgid);
+ }
+
+
+ /*
+ * When setting the flags, additionally apply the
+ * default flags for this message type.
+ */
+ msg.msg_flags = flags;
+ msg.msg_setno = setno;
+ msg.msg_type = type;
+ msg.msg_event_size = size;
+ msg.msg_event_data = data;
+
+ /*
+ * For the timeout pick the specific timeout for the message times the
+ * the maximum number of nodes.
+ * This is a better estimate than 1 hour or 3 days or never.
+ */
+ timeout.tv_sec = mdmn_get_timeout(type) * NNODES;
+ timeout.tv_usec = 0;
+
+ if (flags & MD_MSGF_VERBOSE) {
+ syslog(LOG_INFO, "send_message: ID=(%d, 0x%llx-%d)\n",
+ MSGID_ELEMS(msg.msg_msgid));
+ }
+
+ /* get an RPC client to the local commd */
+ local_daemon = mdmn_get_local_clnt(MNGLC_FOR_REAL);
+ if (local_daemon == (CLIENT *)NULL) {
+ return (-1);
+ }
+ clnt_control(local_daemon, CLSET_TIMEOUT, (char *)&timeout);
+
+ retry1 = msg_table[type].mte_retry1;
+ ticks1 = msg_table[type].mte_ticks1;
+ retry2 = msg_table[type].mte_retry2;
+ ticks2 = msg_table[type].mte_ticks2;
+
+ /*
+ * run that loop until:
+ * - commstate is Ok
+ * - deadly commstate occured
+ * - retries1 or retries2 exceeded
+ */
+ for (; ; ) {
+ *result = mdmn_send_1(&msg, local_daemon);
+ resp = *result;
+ if (resp != (md_mn_result_t *)NULL) {
+ /* Bingo! */
+ if (resp->mmr_comm_state == MDMNE_ACK) {
+ retval = 0;
+ goto out;
+ }
+ /* Hmm... what if there's no handler? */
+ if (resp->mmr_comm_state == MDMNE_NO_HANDLER) {
+ retval = 0;
+ goto out;
+
+ }
+ /*
+ * This node didn't yet join the disk set. It is not
+ * supposed to send any messages then.
+ * This is deadly (no retries)
+ */
+ if (resp->mmr_comm_state == MDMNE_NOT_JOINED) {
+ retval = -3;
+ goto out;
+
+ }
+ /* these two are deadly too (no retries) */
+ if ((resp->mmr_comm_state == MDMNE_NO_WAKEUP_ENTRY) ||
+ (resp->mmr_comm_state == MDMNE_LOG_FAIL)) {
+ retval = -4;
+ goto out;
+
+ }
+ /* Class busy? Use retry1 */
+ if (resp->mmr_comm_state == MDMNE_CLASS_BUSY) {
+ if (retry1-- == 0) {
+ retval = 1; /* retry1 exceeded */
+ goto out;
+ }
+ (void) usleep(ticks1 * USECS_PER_TICK);
+ free_result(resp);
+
+ if (flags & MD_MSGF_VERBOSE)
+ (void) printf("#Resend1 ID=(%d, "
+ "0x%llx-%d)\n",
+ MSGID_ELEMS(msg.msg_msgid));
+ continue;
+ }
+ if ((resp->mmr_comm_state == MDMNE_CLASS_LOCKED) ||
+ (resp->mmr_comm_state == MDMNE_ABORT)) {
+ /*
+ * Be patient, wait for 1 secs and try again.
+ * It's not likely that the ABORT condition ever
+ * goes away, but it won't hurt to retry
+ */
+ free_result(resp);
+ (void) sleep(1);
+ continue;
+ }
+ if (resp->mmr_comm_state == MDMNE_SUSPENDED) {
+ if (flags & MD_MSGF_FAIL_ON_SUSPEND) {
+ /* caller wants us to fail here */
+ (void) mddserror(ep,
+ MDE_DS_NOTNOW_RECONFIG, setno,
+ mynode(), mynode(), NULL);
+ retval = -4;
+ goto out;
+ } else {
+ /* wait for 1 secs and try again. */
+ free_result(resp);
+ (void) sleep(1);
+ continue;
+ }
+ }
+ } else {
+ /*
+ * If we get a NULL back from the rpc call, try to
+ * reinitialize the client.
+ * Depending on retries2 we try again, or not.
+ */
+ syslog(LOG_INFO,
+ "send_message: ID=(%d, 0x%llx-%d) resp = NULL\n",
+ MSGID_ELEMS(msg.msg_msgid));
+
+ clnt_destroy(local_daemon);
+ local_daemon = mdmn_get_local_clnt(MNGLC_FOR_REAL);
+
+ if (local_daemon == (CLIENT *)NULL) {
+ return (-1);
+ }
+ clnt_control(local_daemon, CLSET_TIMEOUT,
+ (char *)&timeout);
+ }
+
+ /*
+ * If we are here, either resp is zero or resp is non-zero
+ * but some commstate not mentioned above occured.
+ * In either case we use retry2
+ */
+ if (retry2-- == 0) {
+ syslog(LOG_INFO, dgettext(TEXT_DOMAIN,
+ "send_message: (%d, 0x%llx-%d) retry2 exceeded\n"),
+ MSGID_ELEMS(msg.msg_msgid));
+
+ retval = 2; /* retry2 exceeded */
+ goto out;
+ }
+ if (flags & MD_MSGF_VERBOSE) {
+ syslog(LOG_DEBUG, dgettext(TEXT_DOMAIN,
+ "send_message: (%d, 0x%llx-%d) resend on retry2\n"),
+ MSGID_ELEMS(msg.msg_msgid));
+ }
+
+ (void) usleep(ticks2 * USECS_PER_TICK);
+
+ if (resp != (md_mn_result_t *)NULL) {
+ free_result(resp);
+ }
+ }
+out:
+ mdmn_put_local_clnt(local_daemon);
+ return (retval);
+}
+
+/*
+ * suspend the commd for a given set/class combination.
+ *
+ * Parameter:
+ * set number or 0 (meaning all sets)
+ * class number or 0 (meaning all classes)
+ *
+ * Returns:
+ * 0 on success (set is suspended and all messages drained)
+ * MDE_DS_COMMDCTL_SUSPEND_NYD if set is not yet drained
+ * MDE_DS_COMMDCTL_SUSPEND_FAIL if any failure occurred
+ */
+int
+mdmn_suspend(set_t setno, md_mn_msgclass_t class)
+{
+ int *resp;
+ CLIENT *local_daemon;
+ md_mn_set_and_class_t msc;
+
+ if ((setno >= MD_MAXSETS) || (class >= MD_MN_NCLASSES)) {
+ return (MDE_DS_COMMDCTL_SUSPEND_FAIL);
+ }
+ local_daemon = meta_client_create(LOCALHOST_IPv4, MDMN_COMMD, ONE,
+ "tcp");
+ if (local_daemon == (CLIENT *)NULL) {
+ clnt_pcreateerror("local_daemon");
+ return (MDE_DS_COMMDCTL_SUSPEND_FAIL);
+ }
+ msc.msc_set = setno;
+ msc.msc_class = class;
+ msc.msc_flags = 0;
+
+ resp = mdmn_comm_suspend_1(&msc, local_daemon);
+ clnt_destroy(local_daemon);
+
+ if (resp == NULL) {
+ return (MDE_DS_COMMDCTL_SUSPEND_FAIL);
+ }
+
+ if (*resp == MDMNE_ACK) {
+ /* set successfully drained, no outstanding messages */
+ return (0);
+ }
+ if (*resp != MDMNE_SET_NOT_DRAINED) {
+ /* some error occurred */
+ return (MDE_DS_COMMDCTL_SUSPEND_FAIL);
+ }
+
+ /* still outstanding messages, return not yet drained failure */
+ return (MDE_DS_COMMDCTL_SUSPEND_NYD);
+}
+
+/*
+ * resume the commd for a given set/class combination.
+ *
+ * Parameter:
+ * set number or 0 (meaning all sets)
+ * class number or 0 (meaning all classes)
+ *
+ * Returns:
+ * 0 on success
+ * MDE_DS_COMMDCTL_RESUME_FAIL on failure
+ */
+int
+mdmn_resume(set_t setno, md_mn_msgclass_t class, uint_t flags)
+{
+ md_mn_set_and_class_t msc;
+ int ret = MDE_DS_COMMDCTL_RESUME_FAIL;
+ int *resp;
+ CLIENT *local_daemon;
+
+ if ((setno >= MD_MAXSETS) || (class >= MD_MN_NCLASSES)) {
+ return (MDE_DS_COMMDCTL_RESUME_FAIL);
+ }
+ local_daemon = meta_client_create(LOCALHOST_IPv4, MDMN_COMMD, ONE,
+ "tcp");
+ if (local_daemon == (CLIENT *)NULL) {
+ clnt_pcreateerror("local_daemon");
+ return (MDE_DS_COMMDCTL_RESUME_FAIL);
+ }
+
+ msc.msc_set = setno;
+ msc.msc_class = class;
+ msc.msc_flags = flags;
+
+ resp = mdmn_comm_resume_1(&msc, local_daemon);
+
+ if (resp != NULL) {
+ if (*resp == MDMNE_ACK) {
+ ret = 0;
+ }
+ Free(resp);
+ }
+
+ clnt_destroy(local_daemon);
+ return (ret);
+}
+
+/*
+ * abort all communication
+ *
+ * returns void, because: if *this* get's an error what do you want to do?
+ */
+void
+mdmn_abort(void)
+{
+ char *dummy = "abort";
+ md_mn_result_t *resultp = NULL;
+ md_error_t mdne = mdnullerror;
+
+ (void) mdmn_send_message(0, /* No set is needed for this message */
+ MD_MN_MSG_ABORT,
+ MD_MSGF_LOCAL_ONLY,
+ dummy, sizeof (dummy),
+ &resultp, &mdne);
+
+ if (resultp != NULL) {
+ Free(resultp);
+ }
+}
+
+/*
+ * trigger the reinitialization for a given set.
+ *
+ * Parameter: set number
+ *
+ * Returns:
+ * 0 on success
+ * 1 on failure
+ */
+int
+mdmn_reinit_set(set_t setno)
+{
+ int ret = 1;
+ int *resp;
+ CLIENT *local_daemon;
+
+
+ if ((setno == 0) || (setno >= MD_MAXSETS)) {
+ return (1);
+ }
+ local_daemon = meta_client_create(LOCALHOST_IPv4, MDMN_COMMD, ONE,
+ "tcp");
+ if (local_daemon == (CLIENT *)NULL) {
+ clnt_pcreateerror("local_daemon");
+ return (1);
+ }
+
+ resp = mdmn_comm_reinit_set_1(&setno, local_daemon);
+
+ if (resp != NULL) {
+ if (*resp == MDMNE_ACK) {
+ ret = 0;
+ }
+ Free(resp);
+ }
+
+ clnt_destroy(local_daemon);
+ return (ret);
+}
+
+
+/*
+ * Lock a single message type from being processed on this node
+ *
+ * Parameter: md_mn_msgtype_t msgtype, uint_t locktype
+ *
+ * Returns:
+ * 0 on success
+ * 1 on failure
+ */
+int
+mdmn_msgtype_lock(md_mn_msgtype_t msgtype, uint_t locktype)
+{
+ int ret = 1;
+ int *resp;
+ CLIENT *local_daemon;
+ md_mn_type_and_lock_t mmtl;
+
+
+ if ((msgtype == 0) || (msgtype >= MD_MN_NMESSAGES)) {
+ return (1);
+ }
+ local_daemon = meta_client_create(LOCALHOST_IPv4, MDMN_COMMD, ONE,
+ "tcp");
+ if (local_daemon == (CLIENT *)NULL) {
+ clnt_pcreateerror("local_daemon");
+ return (1);
+ }
+ mmtl.mmtl_type = msgtype;
+ mmtl.mmtl_lock = locktype;
+
+ resp = mdmn_comm_msglock_1(&mmtl, local_daemon);
+
+ if (resp != NULL) {
+ if (*resp == MDMNE_ACK) {
+ ret = 0;
+ }
+ Free(resp);
+ }
+
+ clnt_destroy(local_daemon);
+ return (ret);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_mn_handlers.c b/usr/src/lib/lvm/libmeta/common/meta_mn_handlers.c
new file mode 100644
index 0000000000..8603aca5ac
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_mn_handlers.c
@@ -0,0 +1,1957 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <wait.h>
+#include <sys/time.h>
+#include <syslog.h>
+
+#include <meta.h>
+#include <sys/lvm/mdio.h>
+#include <sys/lvm/md_mddb.h>
+#include <sys/lvm/md_mirror.h>
+
+#define MAX_N_ARGS 64
+#define MAX_ARG_LEN 1024
+
+/* we reserve 1024 bytes for stdout and the same for stderr */
+#define MAX_OUT 1024
+#define MAX_ERR 1024
+#define JUNK 128 /* used to flush stdout and stderr */
+
+
+/*ARGSUSED*/
+void
+mdmn_do_cmd(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp)
+{
+
+ /*
+ * We are given one string containing all the arguments
+ * For execvp() we have to regenerate the arguments again
+ */
+ int arg; /* argument that is currently been built */
+ int index; /* runs through arg above */
+ int i; /* helper for for loop */
+ char *argv[MAX_N_ARGS]; /* argument array for execvp */
+ char *cp; /* runs through the given command line string */
+ char *command = NULL; /* the command we call locally */
+ int pout[2]; /* pipe for stdout */
+ int perr[2]; /* pipe for stderr */
+ pid_t pid; /* process id */
+
+ cp = msg->msg_event_data;
+ arg = 0;
+ index = 0;
+
+ /* init the args array alloc the first one and null out the rest */
+ argv[0] = Malloc(MAX_ARG_LEN);
+ for (i = 1; i < MAX_N_ARGS; i++) {
+ argv[i] = NULL;
+ }
+
+ resp->mmr_comm_state = MDMNE_ACK; /* Ok state */;
+
+ while (*cp != '\0') {
+ if (arg == MAX_N_ARGS) {
+ (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
+ "PANIC: too many arguments specified\n"));
+ resp->mmr_comm_state = MDMNE_HANDLER_FAILED;
+ goto out;
+ }
+ if (index == MAX_ARG_LEN) {
+ (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
+ "PANIC: argument too long\n"));
+ resp->mmr_comm_state = MDMNE_HANDLER_FAILED;
+ goto out;
+ }
+
+ if ((*cp != ' ') && (*cp != '\t')) {
+ /*
+ * No space or tab: copy char into current
+ * argv and advance both pointers
+ */
+
+ argv[arg][index] = *cp;
+ cp++; /* next char in command line */
+ index++; /* next char in argument */
+ } else {
+ /*
+ * space or tab: terminate current argv,
+ * advance arg, reset pointer into arg,
+ * advance pointer in command line
+ */
+ argv[arg][index] = '\0';
+ arg++; /* next argument */
+ argv[arg] = Malloc(MAX_ARG_LEN);
+ cp++; /* next char in command line */
+ index = 0; /* starts at char 0 */
+ }
+ }
+ /* terminate the last real argument */
+ argv[arg][index] = '\0';
+ /* the last argument is an NULL pointer */
+ argv[++arg] = NULL;
+ if (pipe(pout) < 0) {
+ (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
+ "PANIC: pipe failed\n"));
+ resp->mmr_comm_state = MDMNE_HANDLER_FAILED;
+ goto out;
+ }
+ if (pipe(perr) < 0) {
+ (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
+ "PANIC: pipe failed\n"));
+ (void) close(pout[0]);
+ (void) close(pout[1]);
+ resp->mmr_comm_state = MDMNE_HANDLER_FAILED;
+ goto out;
+ }
+ command = Strdup(argv[0]);
+ (void) strcat(argv[0], ".rpc_call");
+ pid = fork1();
+ if (pid == (pid_t)-1) {
+ (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
+ "PANIC: fork failed\n"));
+ resp->mmr_comm_state = MDMNE_HANDLER_FAILED;
+ (void) close(pout[0]);
+ (void) close(pout[1]);
+ (void) close(perr[0]);
+ (void) close(perr[1]);
+ goto out;
+ } else if (pid == (pid_t)0) {
+ /* child */
+ (void) close(0);
+ /* close the reading channels of pout and perr */
+ (void) close(pout[0]);
+ (void) close(perr[0]);
+ /* redirect stdout */
+ if (dup2(pout[1], 1) < 0) {
+ (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
+ "PANIC: dup2 failed\n"));
+ resp->mmr_comm_state = MDMNE_HANDLER_FAILED;
+ return;
+ }
+
+ /* redirect stderr */
+ if (dup2(perr[1], 2) < 0) {
+ (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
+ "PANIC: dup2 failed\n"));
+ resp->mmr_comm_state = MDMNE_HANDLER_FAILED;
+ return;
+ }
+
+ (void) execvp(command, (char *const *)argv);
+ perror("execvp");
+ _exit(1);
+ } else {
+ /* parent process */
+ int stat_loc;
+ char *out, *err; /* for stdout and stderr of child */
+ int i; /* index into the aboves */
+ char junk[JUNK];
+ int out_done = 0;
+ int err_done = 0;
+ int out_read = 0;
+ int err_read = 0;
+ int maxfd;
+ fd_set rset;
+
+
+ /* close the writing channels of pout and perr */
+ (void) close(pout[1]);
+ (void) close(perr[1]);
+ resp->mmr_out = Malloc(MAX_OUT);
+ resp->mmr_err = Malloc(MAX_ERR);
+ resp->mmr_out_size = MAX_OUT;
+ resp->mmr_err_size = MAX_ERR;
+ out = resp->mmr_out;
+ err = resp->mmr_err;
+ FD_ZERO(&rset);
+ while ((out_done == 0) || (err_done == 0)) {
+ FD_SET(pout[0], &rset);
+ FD_SET(perr[0], &rset);
+ maxfd = max(pout[0], perr[0]) + 1;
+ (void) select(maxfd, &rset, NULL, NULL, NULL);
+
+ /*
+ * Did the child produce some output to stdout?
+ * If so, read it until we either reach the end of the
+ * output or until we read MAX_OUT bytes.
+ * Whatever comes first.
+ * In case we already read MAX_OUT bytes we simply
+ * read away the output into a junk buffer.
+ * Just to make the child happy
+ */
+ if (FD_ISSET(pout[0], &rset)) {
+ if (MAX_OUT - out_read - 1 > 0) {
+ i = read(pout[0], out,
+ MAX_OUT - out_read);
+ out_read += i;
+ out += i;
+ } else {
+ /* buffer full, empty stdout */
+ i = read(pout[0], junk, JUNK);
+ }
+ if (i == 0) {
+ /* stdout is closed by child */
+ out_done++;
+ }
+ }
+ /* same comment as above | sed -e 's/stdout/stderr/' */
+ if (FD_ISSET(perr[0], &rset)) {
+ if (MAX_ERR - err_read - 1 > 0) {
+ i = read(perr[0], err,
+ MAX_ERR - err_read);
+ err_read += i;
+ err += i;
+ } else {
+ /* buffer full, empty stderr */
+ i = read(perr[0], junk, JUNK);
+ }
+ if (i == 0) {
+ /* stderr is closed by child */
+ err_done++;
+ }
+ }
+ }
+ resp->mmr_out[out_read] = '\0';
+ resp->mmr_err[err_read] = '\0';
+
+ while (waitpid(pid, &stat_loc, 0) < 0) {
+ if (errno != EINTR) {
+ resp->mmr_comm_state = MDMNE_HANDLER_FAILED;
+ break;
+ }
+ }
+ if (errno == 0)
+ resp->mmr_exitval = WEXITSTATUS(stat_loc);
+
+ (void) close(pout[0]);
+ (void) close(perr[0]);
+ }
+out:
+ for (i = 0; i < MAX_N_ARGS; i++) {
+ if (argv[i] != NULL) {
+ free(argv[i]);
+ }
+ }
+ if (command != NULL) {
+ Free(command);
+ }
+}
+
+/*
+ * This is for checking if a metadevice is opened, and for
+ * locking in case it is not and for
+ * unlocking a locked device
+ */
+/*ARGSUSED*/
+void
+mdmn_do_clu(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp)
+{
+ if (msg->msg_type == MD_MN_MSG_CLU_CHECK) {
+ md_isopen_t *d;
+ int ret;
+
+ resp->mmr_comm_state = MDMNE_ACK; /* Ok state */;
+ resp->mmr_out_size = 0;
+ resp->mmr_err_size = 0;
+ resp->mmr_out = NULL;
+ resp->mmr_err = NULL;
+ d = (md_isopen_t *)(void *)msg->msg_event_data;
+ ret = metaioctl(MD_IOCISOPEN, d, &(d->mde), NULL);
+ /*
+ * In case the ioctl succeeded, return the open state of
+ * the metadevice. Otherwise we return the error the ioctl
+ * produced. As this is not zero, no attempt is made to
+ * remove/rename the metadevice later
+ */
+
+ if (ret == 0) {
+ resp->mmr_exitval = d->isopen;
+ } else {
+ /*
+ * When doing a metaclear, one node after the other
+ * does the two steps:
+ * - check on all nodes if this md is opened.
+ * - remove the md locally.
+ * When the 2nd node asks all nodes if the md is
+ * open it starts with the first node.
+ * As this already removed the md, the check
+ * returns MDE_UNIT_NOT_SETUP.
+ * In order to not keep the 2nd node from proceeding,
+ * we map this to an Ok.
+ */
+ if (mdismderror(&(d->mde), MDE_UNIT_NOT_SETUP)) {
+ mdclrerror(&(d->mde));
+ ret = 0;
+ }
+
+ resp->mmr_exitval = ret;
+ }
+ }
+}
+
+/* handler for MD_MN_MSG_REQUIRE_OWNER */
+/*ARGSUSED*/
+void
+mdmn_do_req_owner(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp)
+{
+ md_set_mmown_params_t setown;
+ md_mn_req_owner_t *d;
+ int ret, n = 0;
+
+ resp->mmr_out_size = 0;
+ resp->mmr_err_size = 0;
+ resp->mmr_out = NULL;
+ resp->mmr_err = NULL;
+ resp->mmr_comm_state = MDMNE_ACK;
+ d = (md_mn_req_owner_t *)(void *)msg->msg_event_data;
+
+ (void) memset(&setown, 0, sizeof (setown));
+ MD_SETDRIVERNAME(&setown, MD_MIRROR, MD_MIN2SET(d->mnum))
+ setown.d.mnum = d->mnum;
+ setown.d.owner = d->owner;
+
+ /* Retry ownership change if we get EAGAIN returned */
+ while ((ret = metaioctl(MD_MN_SET_MM_OWNER, &setown, &setown.mde, NULL))
+ != 0) {
+ md_sys_error_t *ip =
+ &setown.mde.info.md_error_info_t_u.sys_error;
+ if (ip->errnum != EAGAIN) {
+ break;
+ }
+ if (n++ >= 10) {
+ break;
+ }
+ (void) sleep(1);
+ }
+
+ resp->mmr_exitval = ret;
+}
+
+/*
+ * handler for MD_MN_MSG_CHOOSE_OWNER
+ * This is called when a mirror resync has no owner. The master node generates
+ * this message which is not broadcast to the other nodes. The message is
+ * required as the kernel does not have access to the nodelist for the set.
+ */
+/*ARGSUSED*/
+void
+mdmn_do_choose_owner(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp)
+{
+ md_mn_msg_chowner_t chownermsg;
+ md_mn_msg_chooseid_t *d;
+ int ret = 0;
+ int nodecnt;
+ int nodeno;
+ uint_t nodeid;
+ uint_t myflags;
+ set_t setno;
+ mdsetname_t *sp;
+ md_set_desc *sd;
+ md_mnnode_desc *nd;
+ md_error_t mde = mdnullerror;
+ md_mn_result_t *resp1 = NULL;
+
+ resp->mmr_out_size = 0;
+ resp->mmr_err_size = 0;
+ resp->mmr_out = NULL;
+ resp->mmr_err = NULL;
+ resp->mmr_comm_state = MDMNE_ACK;
+ d = (md_mn_msg_chooseid_t *)(void *)msg->msg_event_data;
+
+ /*
+ * The node to be chosen will be the resync count for the set
+ * modulo the number of live nodes in the set
+ */
+ setno = MD_MIN2SET(d->msg_chooseid_mnum);
+ if ((sp = metasetnosetname(setno, &mde)) == NULL) {
+ syslog(LOG_ERR, dgettext(TEXT_DOMAIN,
+ "MD_MN_MSG_CHOOSE_OWNER: Invalid setno %d\n"), setno);
+ resp->mmr_exitval = 1;
+ return;
+ }
+ if ((sd = metaget_setdesc(sp, &mde)) == NULL) {
+ syslog(LOG_ERR, dgettext(TEXT_DOMAIN,
+ "MD_MN_MSG_CHOOSE_OWNER: Invalid set pointer\n"));
+ resp->mmr_exitval = 1;
+ return;
+ }
+
+ /* Count the number of live nodes */
+ nodecnt = 0;
+ nd = sd->sd_nodelist;
+ while (nd) {
+ if (nd->nd_flags & MD_MN_NODE_ALIVE)
+ nodecnt++;
+ nd = nd->nd_next;
+ }
+ nodeno = (d->msg_chooseid_rcnt%nodecnt);
+
+ /*
+ * If we've been called with msg_chooseid_set_node set TRUE then we
+ * are simply re-setting the owner id to ensure consistency across
+ * the cluster.
+ * If the flag is reset (B_FALSE) we are requesting a new owner to be
+ * determined.
+ */
+ if (d->msg_chooseid_set_node) {
+ nodeid = d->msg_chooseid_rcnt;
+ } else {
+ /* scan the nodelist looking for the required node */
+ nodecnt = 0;
+ nd = sd->sd_nodelist;
+ while (nd) {
+ if (nd->nd_flags & MD_MN_NODE_ALIVE) {
+ if (nodecnt == nodeno)
+ break;
+ nodecnt++;
+ }
+ nd = nd->nd_next;
+ }
+ nodeid = nd->nd_nodeid;
+ }
+
+ /* Send message to all nodes to make ownership change */
+ chownermsg.msg_chowner_mnum = d->msg_chooseid_mnum;
+ chownermsg.msg_chowner_nodeid = nodeid;
+ myflags = MD_MSGF_NO_LOG;
+
+ /* inherit some flags from the parent message */
+ myflags |= msg->msg_flags & MD_MSGF_INHERIT_BITS;
+
+ ret = mdmn_send_message(MD_MIN2SET(d->msg_chooseid_mnum),
+ MD_MN_MSG_CHANGE_OWNER, myflags, (char *)&chownermsg,
+ sizeof (chownermsg), &resp1, &mde);
+ if (resp1 != NULL)
+ free_result(resp1);
+ resp->mmr_exitval = ret;
+}
+
+/*
+ * Handler for MD_MN_MSG_CHANGE_OWNER
+ * This is called when we are perfoming a resync and wish to change from
+ * no mirror owner to an owner chosen by the master.
+ * This mesage is only relevant for the new owner, the message will be
+ * ignored by all other nodes
+ */
+/*ARGSUSED*/
+void
+mdmn_do_change_owner(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp)
+{
+ md_set_mmown_params_t setown;
+ md_mn_msg_chowner_t *d;
+ int ret = 0;
+ set_t setno;
+ mdsetname_t *sp;
+ md_set_desc *sd;
+ md_error_t mde = mdnullerror;
+
+ resp->mmr_out_size = 0;
+ resp->mmr_err_size = 0;
+ resp->mmr_out = NULL;
+ resp->mmr_err = NULL;
+ resp->mmr_comm_state = MDMNE_ACK;
+ d = (md_mn_msg_chowner_t *)(void *)msg->msg_event_data;
+
+ setno = MD_MIN2SET(d->msg_chowner_mnum);
+ if ((sp = metasetnosetname(setno, &mde)) == NULL) {
+ syslog(LOG_ERR, dgettext(TEXT_DOMAIN,
+ "MD_MN_MSG_CHANGE_OWNER: Invalid setno %d\n"), setno);
+ resp->mmr_exitval = 1;
+ return;
+ }
+ if ((sd = metaget_setdesc(sp, &mde)) == NULL) {
+ syslog(LOG_ERR, dgettext(TEXT_DOMAIN,
+ "MD_MN_MSG_CHANGE_OWNER: Invalid set pointer\n"));
+ resp->mmr_exitval = 1;
+ return;
+ }
+
+ if (d->msg_chowner_nodeid == sd->sd_mn_mynode->nd_nodeid) {
+ /*
+ * If we are the chosen owner, issue ioctl to make the
+ * ownership change
+ */
+ (void) memset(&setown, 0, sizeof (md_set_mmown_params_t));
+ setown.d.mnum = d->msg_chowner_mnum;
+ setown.d.owner = d->msg_chowner_nodeid;
+ setown.d.flags = MD_MN_MM_SPAWN_THREAD;
+ MD_SETDRIVERNAME(&setown, MD_MIRROR,
+ MD_MIN2SET(d->msg_chowner_mnum));
+
+ /*
+ * Single shot at changing the the owner, if it fails EAGAIN,
+ * another node must have become the owner while we are in the
+ * process of making this choice.
+ */
+
+ ret = metaioctl(MD_MN_SET_MM_OWNER, &setown,
+ &(setown.mde), NULL);
+ if (ret == EAGAIN)
+ ret = 0;
+ }
+ resp->mmr_exitval = ret;
+}
+
+/* handler for MD_MN_MSG_SUSPEND_WRITES */
+/*ARGSUSED*/
+void
+mdmn_do_susp_write(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp)
+{
+ /* Suspend writes to a region of a mirror */
+ md_suspend_wr_params_t suspwr_ioc;
+ md_mn_msg_suspwr_t *d;
+ int ret;
+
+ resp->mmr_out_size = 0;
+ resp->mmr_err_size = 0;
+ resp->mmr_out = NULL;
+ resp->mmr_err = NULL;
+ resp->mmr_comm_state = MDMNE_ACK;
+ d = (md_mn_msg_suspwr_t *)(void *)msg->msg_event_data;
+
+ (void) memset(&suspwr_ioc, 0, sizeof (md_suspend_wr_params_t));
+ MD_SETDRIVERNAME(&suspwr_ioc, MD_MIRROR,
+ MD_MIN2SET(d->msg_suspwr_mnum));
+ suspwr_ioc.mnum = d->msg_suspwr_mnum;
+ ret = metaioctl(MD_MN_SUSPEND_WRITES, &suspwr_ioc,
+ &(suspwr_ioc.mde), NULL);
+ resp->mmr_exitval = ret;
+}
+
+/*
+ * handler for MD_MN_MSG_STATE_UPDATE_RESWR
+ * This functions update a submirror component state and then resumes writes
+ * to the mirror
+ */
+/*ARGSUSED*/
+void
+mdmn_do_state_upd_reswr(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp)
+{
+ /* Update the state of the component of a mirror */
+ md_set_state_params_t setstate_ioc;
+ md_mn_msg_stch_t *d;
+ int ret;
+
+ resp->mmr_out_size = 0;
+ resp->mmr_err_size = 0;
+ resp->mmr_out = NULL;
+ resp->mmr_err = NULL;
+ resp->mmr_comm_state = MDMNE_ACK;
+ d = (md_mn_msg_stch_t *)(void *)msg->msg_event_data;
+
+ (void) memset(&setstate_ioc, 0, sizeof (md_set_state_params_t));
+ MD_SETDRIVERNAME(&setstate_ioc, MD_MIRROR,
+ MD_MIN2SET(d->msg_stch_mnum));
+ setstate_ioc.mnum = d->msg_stch_mnum;
+ setstate_ioc.sm = d->msg_stch_sm;
+ setstate_ioc.comp = d->msg_stch_comp;
+ setstate_ioc.state = d->msg_stch_new_state;
+ setstate_ioc.hs_id = d->msg_stch_hs_id;
+ ret = metaioctl(MD_MN_SET_STATE, &setstate_ioc,
+ &(setstate_ioc.mde), NULL);
+ resp->mmr_exitval = ret;
+}
+
+/*
+ * submessage generator for MD_MN_MSG_STATE_UPDATE and MD_MN_MSG_STATE_UPDATE2
+ * This generates 2 messages, the first is SUSPEND_WRITES and
+ * depending on the type of the original message the second one is
+ * either STATE_UPDATE_RESWR or STATE_UPDATE_RESWR2 which actually does
+ * the same, but runs on a higher class.
+ */
+int
+mdmn_smgen_state_upd(md_mn_msg_t *msg, md_mn_msg_t *msglist[])
+{
+ md_mn_msg_t *nmsg;
+ md_mn_msg_stch_t *d;
+ md_mn_msg_stch_t *stch_data;
+ md_mn_msg_suspwr_t *suspwr_data;
+
+ d = (md_mn_msg_stch_t *)(void *)msg->msg_event_data;
+
+ nmsg = Zalloc(sizeof (md_mn_msg_t));
+ MSGID_COPY(&(msg->msg_msgid), &(nmsg->msg_msgid));
+
+ nmsg->msg_flags = MD_MSGF_NO_LOG; /* Don't log submessages */
+ nmsg->msg_setno = msg->msg_setno;
+ nmsg->msg_type = MD_MN_MSG_SUSPEND_WRITES;
+ nmsg->msg_event_size = sizeof (md_mn_msg_suspwr_t);
+ nmsg->msg_event_data = Zalloc(sizeof (md_mn_msg_suspwr_t));
+ suspwr_data = (md_mn_msg_suspwr_t *)(void *)nmsg->msg_event_data;
+ suspwr_data->msg_suspwr_mnum = d->msg_stch_mnum;
+ msglist[0] = nmsg;
+
+ nmsg = Zalloc(sizeof (md_mn_msg_t));
+ MSGID_COPY(&(msg->msg_msgid), &(nmsg->msg_msgid));
+
+ nmsg->msg_flags = MD_MSGF_NO_LOG; /* Don't log submessages */
+ nmsg->msg_setno = msg->msg_setno;
+ if (msg->msg_type == MD_MN_MSG_STATE_UPDATE2) {
+ nmsg->msg_type = MD_MN_MSG_STATE_UPDATE_RESWR2;
+ } else {
+ nmsg->msg_type = MD_MN_MSG_STATE_UPDATE_RESWR;
+ }
+ nmsg->msg_event_size = sizeof (md_mn_msg_stch_t);
+ nmsg->msg_event_data = Zalloc(sizeof (md_mn_msg_stch_t));
+ stch_data = (md_mn_msg_stch_t *)(void *)nmsg->msg_event_data;
+ stch_data->msg_stch_mnum = d->msg_stch_mnum;
+ stch_data->msg_stch_sm = d->msg_stch_sm;
+ stch_data->msg_stch_comp = d->msg_stch_comp;
+ stch_data->msg_stch_new_state = d->msg_stch_new_state;
+ stch_data->msg_stch_hs_id = d->msg_stch_hs_id;
+ msglist[1] = nmsg;
+ return (2); /* Return the number of submessages generated */
+}
+
+/*
+ * handler for MD_MN_MSG_ALLOCATE_HOTSPARE and MD_MN_MSG_ALLOCATE_HOTSPARE2
+ * This sends a message to all nodes requesting them to allocate a hotspare
+ * for the specified component. The component is specified by the mnum of
+ * the mirror, the submirror index and the component index.
+ */
+/*ARGSUSED*/
+void
+mdmn_do_allocate_hotspare(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp)
+{
+ /* Allocate a hotspare for a mirror component */
+ md_alloc_hotsp_params_t allochsp_ioc;
+ md_mn_msg_allochsp_t *d;
+ int ret;
+
+ resp->mmr_out_size = 0;
+ resp->mmr_err_size = 0;
+ resp->mmr_out = NULL;
+ resp->mmr_err = NULL;
+ resp->mmr_comm_state = MDMNE_ACK;
+ d = (md_mn_msg_allochsp_t *)((void *)(msg->msg_event_data));
+
+ (void) memset(&allochsp_ioc, 0,
+ sizeof (md_alloc_hotsp_params_t));
+ MD_SETDRIVERNAME(&allochsp_ioc, MD_MIRROR,
+ MD_MIN2SET(d->msg_allochsp_mnum));
+ allochsp_ioc.mnum = d->msg_allochsp_mnum;
+ allochsp_ioc.sm = d->msg_allochsp_sm;
+ allochsp_ioc.comp = d->msg_allochsp_comp;
+ allochsp_ioc.hs_id = d->msg_allochsp_hs_id;
+ ret = metaioctl(MD_MN_ALLOCATE_HOTSPARE, &allochsp_ioc,
+ &(allochsp_ioc.mde), NULL);
+ resp->mmr_exitval = ret;
+}
+
+/*
+ * handler for MD_MN_MSG_RESYNC_STARTING,MD_MN_MSG_RESYNC_FIRST,
+ * MD_MN_MSG_RESYNC_NEXT, MD_MN_MSG_RESYNC_FINISH, MD_MN_MSG_RESYNC_PHASE_DONE
+ */
+/*ARGSUSED*/
+void
+mdmn_do_resync(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp)
+{
+ md_mn_msg_resync_t *d;
+ md_mn_rs_params_t respar;
+ int ret;
+ int smi;
+
+ resp->mmr_out_size = 0;
+ resp->mmr_err_size = 0;
+ resp->mmr_out = NULL;
+ resp->mmr_err = NULL;
+ resp->mmr_comm_state = MDMNE_ACK;
+ d = (md_mn_msg_resync_t *)((void *)(msg->msg_event_data));
+
+ (void) memset(&respar, 0, sizeof (respar));
+ MD_SETDRIVERNAME(&respar, MD_MIRROR,
+ MD_MIN2SET(d->msg_resync_mnum))
+ respar.msg_type = (int)msg->msg_type;
+ respar.mnum = d->msg_resync_mnum;
+ respar.rs_type = d->msg_resync_type;
+ respar.rs_start = d->msg_resync_start;
+ respar.rs_size = d->msg_resync_rsize;
+ respar.rs_done = d->msg_resync_done;
+ respar.rs_2_do = d->msg_resync_2_do;
+ respar.rs_originator = d->msg_originator;
+ respar.rs_flags = d->msg_resync_flags;
+
+ for (smi = 0; smi < NMIRROR; smi++) {
+ respar.rs_sm_state[smi] = d->msg_sm_state[smi];
+ respar.rs_sm_flags[smi] = d->msg_sm_flags[smi];
+ }
+
+ ret = metaioctl(MD_MN_RESYNC, &respar, &respar.mde, NULL);
+
+ resp->mmr_exitval = ret;
+}
+
+/*
+ * handler for MD_MN_MSG_SETSYNC
+ */
+/*ARGSUSED*/
+void
+mdmn_do_setsync(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp)
+{
+ md_mn_msg_setsync_t *d;
+ md_resync_ioctl_t ri;
+ int ret;
+
+ resp->mmr_out_size = 0;
+ resp->mmr_err_size = 0;
+ resp->mmr_out = NULL;
+ resp->mmr_err = NULL;
+ resp->mmr_comm_state = MDMNE_ACK;
+ d = (md_mn_msg_setsync_t *)((void *)(msg->msg_event_data));
+
+ (void) memset(&ri, 0, sizeof (ri));
+ MD_SETDRIVERNAME(&ri, MD_MIRROR, MD_MIN2SET(d->setsync_mnum))
+ ri.ri_mnum = d->setsync_mnum;
+ ri.ri_copysize = d->setsync_copysize;
+ ri.ri_flags = d->setsync_flags;
+
+ ret = metaioctl(MD_MN_SETSYNC, &ri, &ri.mde, NULL);
+
+ resp->mmr_exitval = ret;
+}
+
+/*
+ * handler for MD_MN_MSG_SET_CAP. As this handler can deal with both mirrors
+ * and soft partitions, the driver name that is required for the ioctl call
+ * is included in the message.
+ */
+/*ARGSUSED*/
+void
+mdmn_do_set_cap(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp)
+{
+ md_mn_msg_setcap_t *d;
+ md_mn_setcap_params_t setcap_ioc;
+ minor_t mnum;
+ int ret;
+
+ resp->mmr_out_size = 0;
+ resp->mmr_err_size = 0;
+ resp->mmr_out = NULL;
+ resp->mmr_err = NULL;
+ resp->mmr_comm_state = MDMNE_ACK;
+ d = (md_mn_msg_setcap_t *)((void *)(msg->msg_event_data));
+ mnum = d->msg_setcap_mnum;
+
+ (void) memset(&setcap_ioc, 0, sizeof (setcap_ioc));
+
+ MD_SETDRIVERNAME(&setcap_ioc, d->msg_setcap_driver, MD_MIN2SET(mnum));
+ setcap_ioc.mnum = mnum;
+ setcap_ioc.sc_set = d->msg_setcap_set;
+
+ ret = metaioctl(MD_MN_SET_CAP, &setcap_ioc, &setcap_ioc.mde, NULL);
+
+ resp->mmr_exitval = ret;
+}
+
+/*
+ * Dummy handler for various CLASS0 messages like
+ * MD_MN_MSG_VERBOSITY / MD_MN_MSG_RESUME / MD_MN_MSG_SUSPEND ...
+ */
+/*ARGSUSED*/
+void
+mdmn_do_dummy(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp)
+{
+ resp->mmr_out_size = 0;
+ resp->mmr_err_size = 0;
+ resp->mmr_out = NULL;
+ resp->mmr_err = NULL;
+ resp->mmr_exitval = 0;
+ resp->mmr_comm_state = MDMNE_ACK;
+}
+
+/*
+ * Overall description of mdcommd support that keeps all nodes in-sync
+ * with the ondisk diskset mddbs.
+ *
+ * All configuration changes to the mddb - addition/deletion of metadevices
+ * or replicas must use a CLASS1 message to block out these changes.
+ * Changes to the state of existing replicas do not need to block CLASS1
+ * since there is no conflict when just updating the state of a replica.
+ *
+ * Error encountered when master writes to mddbs:
+ * As the master updates parts of the mddbs, flags are updated describing
+ * what has been written. When all locks are dropped (either in
+ * mddb_setexit or mdioctl), a PARSE message will be generated to all
+ * nodes with an index list of known good mddbs and the parse flags.
+ * The master node ignore the parse message since it sent it.
+ * The slave nodes re-read in the changed part of the mddb using the list
+ * of known good replicas that was passed.
+ * PARSE message does not block CLASS1.
+ * The PARSE message must be the highest class message. Since this
+ * message could be sent on any ioctl, this PARSE message class must
+ * be higher than any other class message that could issue an ioctl.
+ *
+ * Master Slave1 Slave2
+ * Handles_error
+ * PARSE PARSE PARSE
+ *
+ *
+ * Add/Delete mddbs can occur from the following commands:
+ * metadb -s set_name -a/-d
+ * metaset -s set_name -a/-d disk
+ * metaset -s set_name -b
+ *
+ * The metadb/metaset command is run on the node executing the command
+ * and sends an ATTACH/DETACH message to the master node blocking CLASS1
+ * messages on all nodes until this message is finished. The master
+ * node generates 3 submessages of BLOCK, SM_ATTACH/SM_DETACH, UNBLOCK.
+ * The BLOCK message is only run on the master node and will BLOCK
+ * the PARSE messages from being sent to the nodes.
+ * The SM_ATTACH/SM_DETACH message is run on all nodes and actually adds or
+ * removes the replica(s) from the given disk slice.
+ * The UNBLOCK message is only run on the master node and allows the
+ * sending of PARSE messages.
+ *
+ * Master Slave1 Slave2
+ * Add mddb cmd
+ * ATTACH msg to master
+ * BLOCK
+ * ATTACH ATTACH ATTACH
+ * UNBLOCK
+ * PARSE PARSE PARSE
+ * ATTACH msg finished
+ *
+ * Add/Delete host side information from the following commands:
+ * metaset -s set_name -a/-d -h
+ *
+ * The metaset command is run on the node executing the command and
+ * sends a DB_NEWSIDE/DB_DELSIDE message and a MD_NEWSIDE/MD_DELSIDE
+ * message whenever a host is added to or deleted from the diskset.
+ *
+ * The side information contains the major name and minor number
+ * associated with a disk slice from a certain node's perspective
+ * in an (failed) effort to support clustered systems that don't have the
+ * same device name for a physical device. (The original designers of
+ * SVM eventually took the shortcut of assuming that all device names
+ * are the same on all systems, but left the side information in the
+ * mddb and namespace.) The side information is used for disk slices
+ * that contain mddbs and/or are components for metadevices.
+ *
+ * The DB_NEWSIDE/DELSIDE command adds or deletes the side information
+ * for each mddb for the host being added or deleted.
+ * The MD_ADDSIDE/MD_DELSIDE command adds or deletes the side information
+ * for all disk slice components that are in the namespace records for
+ * the host being added or deleted.
+ *
+ * The DB_NEWSIDE/DB_DELSIDE message does not change any mddb records
+ * and only needs to be executed on the master node since the slave
+ * nodes will be brought up to date by the PARSE message that is
+ * generated as a result of a change to the mddb.
+ * The MD_ADDSIDE/MD_DELSIDE message does modify the records in the mddb
+ * and needs to be run on all nodes. The message must block class1
+ * messages so that record changing commands don't interfere.
+ *
+ * Master Slave1 Slave2
+ * Add host
+ * DB_NEWSIDE msg to master
+ * DB_NEWSIDE
+ * PARSE PARSE PARSE
+ * DB_NEWSIDE msg finished
+ * MD_NEWSIDE msg to master
+ * MD_NEWSIDE MD_NEWSIDE MD_NEWSIDE
+ * MD_NEWSIDE msg finished
+ *
+ *
+ * Optimized resync record failure:
+ * When any node sees a failure to write an optimized resync record
+ * that node notifies the master node of the replica that failed.
+ * The master node handles the error and updates the rest of the
+ * nodes using a PARSE message. The PARSE message also calls
+ * fixoptrecord on each slave node causing each node to fix up
+ * the optimized resync records that are owned by that node (the mirror
+ * owner code also sets the optimized resync record owner). The master
+ * node will fix up all optimized resync records that have no owner or
+ * are owned by the master node.
+ *
+ * Master Slave1 Slave2
+ * Optimized Record Failure
+ * OPTRECERR msg to master
+ * Master handles opt rec failure
+ * PARSE PARSE PARSE
+ * OPTRECERR msg finished
+ * Slave rewrites optimized record
+ *
+ */
+
+/*
+ * Handler for MD_MN_MSG_MDDB_PARSE which send parse messages to the
+ * slave nodes in order to keep the incore view of the mddbs the
+ * same on all nodes.
+ *
+ * Since master node generated the mddb parse message, do nothing
+ * if this is the master node.
+ *
+ * If this is a slave node, send the parse message down to the kernel
+ * where this node will re-read in parts of the mddbs.
+ *
+ */
+void
+mdmn_do_mddb_parse(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp)
+{
+ md_mn_msg_mddb_parse_t *d;
+ mddb_parse_parm_t mpp;
+ int ret = 0;
+ int i;
+
+ resp->mmr_out_size = 0;
+ resp->mmr_err_size = 0;
+ resp->mmr_out = NULL;
+ resp->mmr_err = NULL;
+ resp->mmr_comm_state = MDMNE_ACK;
+ d = (md_mn_msg_mddb_parse_t *)((void *)(msg->msg_event_data));
+
+ if (flags & MD_MSGF_ON_MASTER)
+ return;
+
+ (void) memset(&mpp, 0, sizeof (mpp));
+ mpp.c_setno = msg->msg_setno;
+ mpp.c_parse_flags = d->msg_parse_flags;
+ for (i = 0; i < MDDB_NLB; i++) {
+ mpp.c_lb_flags[i] = d->msg_lb_flags[i];
+ }
+ ret = metaioctl(MD_MN_MDDB_PARSE, &mpp, &mpp.c_mde, NULL);
+ if (ret)
+ (void) mdstealerror(&(resp->mmr_ep), &mpp.c_mde);
+
+ resp->mmr_exitval = ret;
+}
+
+/*
+ * Handler for MD_MN_MSG_MDDB_BLOCK which blocks the generation
+ * of parse messages from this node.
+ *
+ * This is needed when attaching/detaching mddbs on the master and the
+ * slave node is unable to handle a parse message until the slave node
+ * has done the attach/detach of the mddbs. So, master node will block
+ * the parse messages, execute the attach/detach on all nodes and
+ * then unblock the parse messages which causes the parse message to
+ * be sent to all nodes.
+ */
+/*ARGSUSED*/
+void
+mdmn_do_mddb_block(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp)
+{
+ md_mn_msg_mddb_block_t *d;
+ mddb_block_parm_t mbp;
+ int ret;
+
+ resp->mmr_out_size = 0;
+ resp->mmr_err_size = 0;
+ resp->mmr_out = NULL;
+ resp->mmr_err = NULL;
+ resp->mmr_comm_state = MDMNE_ACK;
+ d = (md_mn_msg_mddb_block_t *)((void *)(msg->msg_event_data));
+
+ (void) memset(&mbp, 0, sizeof (mbp));
+ mbp.c_setno = msg->msg_setno;
+ mbp.c_blk_flags = d->msg_block_flags;
+ ret = metaioctl(MD_MN_MDDB_BLOCK, &mbp, &mbp.c_mde, NULL);
+ if (ret)
+ (void) mdstealerror(&(resp->mmr_ep), &mbp.c_mde);
+
+ resp->mmr_exitval = ret;
+}
+
+/*
+ * Submessage generator for MD_MN_MSG_META_DB_ATTACH which generates
+ * a BLOCK message on the master node only, a MD_MN_MSG_SM_MDDB_ATTACH
+ * message on all nodes and then an UNBLOCK message on the master only.
+ */
+int
+mdmn_smgen_mddb_attach(md_mn_msg_t *msg, md_mn_msg_t *msglist[])
+{
+ md_mn_msg_t *nmsg;
+ md_mn_msg_meta_db_attach_t *d;
+ md_mn_msg_meta_db_attach_t *attach_d;
+ md_mn_msg_mddb_block_t *block_d;
+
+ d = (md_mn_msg_meta_db_attach_t *)(void *)msg->msg_event_data;
+
+ nmsg = Zalloc(sizeof (md_mn_msg_t));
+ MSGID_COPY(&(msg->msg_msgid), &(nmsg->msg_msgid));
+
+ nmsg->msg_flags = (MD_MSGF_NO_LOG | MD_MSGF_NO_BCAST);
+ nmsg->msg_setno = msg->msg_setno;
+ nmsg->msg_type = MD_MN_MSG_MDDB_BLOCK;
+ nmsg->msg_event_size = sizeof (md_mn_msg_mddb_block_t);
+ nmsg->msg_event_data = Zalloc(sizeof (md_mn_msg_mddb_block_t));
+ block_d = (md_mn_msg_mddb_block_t *)(void *)nmsg->msg_event_data;
+ block_d->msg_block_flags = MDDB_BLOCK_PARSE;
+ msglist[0] = nmsg;
+
+ nmsg = Zalloc(sizeof (md_mn_msg_t));
+ MSGID_COPY(&(msg->msg_msgid), &(nmsg->msg_msgid));
+
+ /* Don't log submessages and panic on inconsistent results */
+ nmsg->msg_flags = MD_MSGF_NO_LOG |
+ MD_MSGF_PANIC_WHEN_INCONSISTENT;
+ nmsg->msg_setno = msg->msg_setno;
+ nmsg->msg_type = MD_MN_MSG_SM_MDDB_ATTACH;
+ nmsg->msg_event_size = sizeof (md_mn_msg_meta_db_attach_t);
+ nmsg->msg_event_data = Zalloc(sizeof (md_mn_msg_meta_db_attach_t));
+ attach_d = (md_mn_msg_meta_db_attach_t *)
+ (void *)nmsg->msg_event_data;
+ attach_d->msg_l_dev = d->msg_l_dev;
+ attach_d->msg_cnt = d->msg_cnt;
+ attach_d->msg_dbsize = d->msg_dbsize;
+ (void) strncpy(attach_d->msg_dname, d->msg_dname, 16);
+ attach_d->msg_splitname = d->msg_splitname;
+ attach_d->msg_options = d->msg_options;
+ msglist[1] = nmsg;
+
+ nmsg = Zalloc(sizeof (md_mn_msg_t));
+ MSGID_COPY(&(msg->msg_msgid), &(nmsg->msg_msgid));
+
+ nmsg->msg_flags = (MD_MSGF_NO_LOG | MD_MSGF_NO_BCAST);
+ nmsg->msg_setno = msg->msg_setno;
+ nmsg->msg_type = MD_MN_MSG_MDDB_BLOCK;
+ nmsg->msg_event_size = sizeof (md_mn_msg_mddb_block_t);
+ nmsg->msg_event_data = Zalloc(sizeof (md_mn_msg_mddb_block_t));
+ block_d = (md_mn_msg_mddb_block_t *)(void *)nmsg->msg_event_data;
+ block_d->msg_block_flags = MDDB_UNBLOCK_PARSE;
+ msglist[2] = nmsg;
+
+ return (3); /* Return the number of submessages generated */
+}
+
+/*
+ * Submessage generator for MD_MN_MSG_META_DB_DETACH which generates
+ * a BLOCK message on the master node only, a MD_MN_MSG_SM_MDDB_DETACH
+ * message on all nodes and then an UNBLOCK message on the master only.
+ */
+int
+mdmn_smgen_mddb_detach(md_mn_msg_t *msg, md_mn_msg_t *msglist[])
+{
+ md_mn_msg_t *nmsg;
+ md_mn_msg_meta_db_detach_t *d;
+ md_mn_msg_meta_db_detach_t *detach_d;
+ md_mn_msg_mddb_block_t *block_d;
+
+ d = (md_mn_msg_meta_db_detach_t *)(void *)msg->msg_event_data;
+
+ nmsg = Zalloc(sizeof (md_mn_msg_t));
+ MSGID_COPY(&(msg->msg_msgid), &(nmsg->msg_msgid));
+
+ nmsg->msg_flags = (MD_MSGF_NO_LOG | MD_MSGF_NO_BCAST);
+ nmsg->msg_setno = msg->msg_setno;
+ nmsg->msg_type = MD_MN_MSG_MDDB_BLOCK;
+ nmsg->msg_event_size = sizeof (md_mn_msg_mddb_block_t);
+ nmsg->msg_event_data = Zalloc(sizeof (md_mn_msg_mddb_block_t));
+ block_d = (md_mn_msg_mddb_block_t *)(void *)nmsg->msg_event_data;
+ block_d->msg_block_flags = MDDB_BLOCK_PARSE;
+ msglist[0] = nmsg;
+
+ nmsg = Zalloc(sizeof (md_mn_msg_t));
+ MSGID_COPY(&(msg->msg_msgid), &(nmsg->msg_msgid));
+
+ /* Don't log submessages and panic on inconsistent results */
+ nmsg->msg_flags = MD_MSGF_NO_LOG |
+ MD_MSGF_PANIC_WHEN_INCONSISTENT;
+ nmsg->msg_setno = msg->msg_setno;
+ nmsg->msg_type = MD_MN_MSG_SM_MDDB_DETACH;
+ nmsg->msg_event_size = sizeof (md_mn_msg_meta_db_detach_t);
+ nmsg->msg_event_data = Zalloc(sizeof (md_mn_msg_meta_db_detach_t));
+ detach_d = (md_mn_msg_meta_db_detach_t *)
+ (void *)nmsg->msg_event_data;
+ detach_d->msg_splitname = d->msg_splitname;
+ msglist[1] = nmsg;
+
+ nmsg = Zalloc(sizeof (md_mn_msg_t));
+ MSGID_COPY(&(msg->msg_msgid), &(nmsg->msg_msgid));
+
+ nmsg->msg_flags = (MD_MSGF_NO_LOG | MD_MSGF_NO_BCAST);
+ nmsg->msg_setno = msg->msg_setno;
+ nmsg->msg_type = MD_MN_MSG_MDDB_BLOCK;
+ nmsg->msg_event_size = sizeof (md_mn_msg_mddb_block_t);
+ nmsg->msg_event_data = Zalloc(sizeof (md_mn_msg_mddb_block_t));
+ block_d = (md_mn_msg_mddb_block_t *)(void *)nmsg->msg_event_data;
+ block_d->msg_block_flags = MDDB_UNBLOCK_PARSE;
+ msglist[2] = nmsg;
+
+ return (3); /* Return the number of submessages generated */
+}
+
+/*
+ * Handler for MD_MN_MSG_SM_MDDB_ATTACH which is used to attach mddbs.
+ *
+ * Used when running:
+ * metadb -s set_name -a
+ * metaset -s set_name -a/-d disk
+ * metaset -s set_name -b
+ */
+/*ARGSUSED*/
+void
+mdmn_do_sm_mddb_attach(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp)
+{
+ md_mn_msg_meta_db_attach_t *d;
+ struct mddb_config c;
+ int i;
+ int ret = 0;
+ md_error_t ep = mdnullerror;
+ char *name, *add_name;
+ mdname_t *np;
+ mdsetname_t *sp;
+
+ resp->mmr_out_size = 0;
+ resp->mmr_err_size = 0;
+ resp->mmr_out = NULL;
+ resp->mmr_err = NULL;
+ resp->mmr_comm_state = MDMNE_ACK;
+ d = (md_mn_msg_meta_db_attach_t *)((void *)(msg->msg_event_data));
+
+ (void) memset(&c, 0, sizeof (c));
+ c.c_setno = msg->msg_setno;
+ c.c_locator.l_dev = meta_cmpldev(d->msg_l_dev);
+ (void) strncpy(c.c_locator.l_driver, d->msg_dname,
+ sizeof (c.c_locator.l_driver));
+ c.c_devname = d->msg_splitname;
+ c.c_locator.l_mnum = meta_getminor(d->msg_l_dev);
+ c.c_multi_node = 1;
+ if ((sp = metasetnosetname(c.c_setno, &ep)) == NULL) {
+ (void) mdstealerror(&(resp->mmr_ep), &ep);
+ resp->mmr_exitval = -1;
+ return;
+ }
+ (void) strcpy(c.c_setname, sp->setname);
+ c.c_sideno = getmyside(sp, &ep);
+ if (c.c_sideno == MD_SIDEWILD) {
+ (void) mdstealerror(&(resp->mmr_ep), &ep);
+ resp->mmr_exitval = -1;
+ return;
+ }
+
+ name = splicename(&d->msg_splitname);
+ if ((np = metaname(&sp, name, &ep)) == NULL) {
+ Free(name);
+ (void) mdstealerror(&(resp->mmr_ep), &ep);
+ resp->mmr_exitval = -1;
+ return;
+ }
+ /*
+ * All nodes in MN diskset must do meta_check_replica
+ * since this causes the shared namespace to be
+ * populated by the md driver names while checking
+ * to see if this device is already in use as a
+ * metadevice.
+ */
+ if (meta_check_replica(sp, np, d->msg_options, 0,
+ (d->msg_cnt * d->msg_dbsize), &ep)) {
+ (void) mdstealerror(&(resp->mmr_ep), &ep);
+ resp->mmr_exitval = -1;
+ return;
+ }
+
+ for (i = 0; i < d->msg_cnt; i++) {
+ c.c_locator.l_blkno = i * d->msg_dbsize + 16;
+ if (setup_med_cfg(sp, &c,
+ (d->msg_options & MDCHK_SET_FORCE), &ep)) {
+ ret = -1;
+ (void) mdstealerror(&(resp->mmr_ep), &ep);
+ break;
+ }
+ ret = metaioctl(MD_DB_NEWDEV, &c, &c.c_mde, NULL);
+ /* If newdev was successful, continue with attach */
+ if (ret == 0) {
+ if (meta_db_addsidenms(sp, np, c.c_locator.l_blkno,
+ DB_ADDSIDENMS_NO_BCAST, &ep)) {
+ ret = -1;
+ (void) mdstealerror(&(resp->mmr_ep), &ep);
+ break;
+ }
+ } else {
+ (void) mdstealerror(&(resp->mmr_ep), &c.c_mde);
+ break;
+ }
+ }
+ add_name = splicename(&d->msg_splitname);
+ if ((np = metaname(&sp, add_name, &ep)) != NULL) {
+ meta_invalidate_name(np);
+ } else {
+ ret = -1;
+ (void) mdstealerror(&(resp->mmr_ep), &ep);
+ }
+ Free(add_name);
+
+ resp->mmr_exitval = ret;
+}
+
+/*
+ * Handler for MD_MN_MSG_SM_MDDB_DETACH which is used to detach mddbs.
+ *
+ * Used when running:
+ * metadb -s set_name -d
+ * metaset -s set_name -a/-d disk
+ * metaset -s set_name -b
+ */
+/*ARGSUSED*/
+void
+mdmn_do_sm_mddb_detach(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp)
+{
+ md_mn_msg_meta_db_detach_t *d;
+ struct mddb_config c;
+ int i;
+ int ret = 0;
+ md_error_t ep = mdnullerror;
+ char *name, *del_name;
+ mdname_t *np;
+ mdsetname_t *sp;
+
+ resp->mmr_out_size = 0;
+ resp->mmr_err_size = 0;
+ resp->mmr_out = NULL;
+ resp->mmr_err = NULL;
+ resp->mmr_comm_state = MDMNE_ACK;
+ d = (md_mn_msg_meta_db_detach_t *)((void *)(msg->msg_event_data));
+
+ if ((sp = metasetnosetname(msg->msg_setno, &ep)) == NULL) {
+ (void) mdstealerror(&(resp->mmr_ep), &ep);
+ resp->mmr_exitval = -1;
+ return;
+ }
+
+ (void) memset(&c, 0, sizeof (c));
+ c.c_setno = msg->msg_setno;
+ if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0) {
+ resp->mmr_exitval = -1;
+ (void) mdstealerror(&(resp->mmr_ep), &c.c_mde);
+ return;
+ }
+ i = 0;
+ del_name = splicename(&d->msg_splitname);
+ while (i < c.c_dbcnt) {
+ c.c_id = i;
+ if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0) {
+ ret = -1;
+ (void) mdstealerror(&(resp->mmr_ep), &c.c_mde);
+ break;
+ }
+ name = splicename(&c.c_devname);
+ if (strcmp(name, del_name) != 0) {
+ Free(name);
+ i++;
+ continue;
+ }
+ Free(name);
+ /* Found a match - delete mddb */
+ if (metaioctl(MD_DB_DELDEV, &c, &c.c_mde, NULL) != 0) {
+ ret = -1;
+ (void) mdstealerror(&(resp->mmr_ep), &c.c_mde);
+ break;
+ }
+ /* Not incrementing "i" intentionally (dbcnt is changed) */
+ }
+ if ((np = metaname(&sp, del_name, &ep)) != NULL) {
+ meta_invalidate_name(np);
+ } else {
+ ret = -1;
+ (void) mdstealerror(&(resp->mmr_ep), &ep);
+ }
+ Free(del_name);
+
+ resp->mmr_exitval = ret;
+}
+
+/*
+ * Handler for MD_MN_MSG_META_DB_NEWSIDE which is used to update the
+ * side information for each diskset mddb when a new host has been
+ * added to the diskset. The side information is the /dev/dsk/ctds name
+ * that the new node would use to access each mddb.
+ *
+ * Since this routine makes no changes to the records in the diskset mddb,
+ * this routine only needs to be run on the master node. The master node's
+ * kernel code will detect that portions of the mddb have changed and
+ * will send a parse message to all nodes to re-parse parts of the mddb.
+ *
+ * Used when running:
+ * metaset -s set_name -a -h new_hostname
+ */
+/*ARGSUSED*/
+void
+mdmn_do_meta_db_newside(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp)
+{
+ md_mn_msg_meta_db_newside_t *d;
+ struct mddb_config c;
+ int ret = 0;
+ mdsetname_t *sp;
+ md_error_t ep = mdnullerror;
+
+ resp->mmr_out_size = 0;
+ resp->mmr_err_size = 0;
+ resp->mmr_out = NULL;
+ resp->mmr_err = NULL;
+ resp->mmr_comm_state = MDMNE_ACK;
+ d = (md_mn_msg_meta_db_newside_t *)((void *)(msg->msg_event_data));
+
+ (void) memset(&c, 0, sizeof (c));
+ c.c_setno = msg->msg_setno;
+ c.c_locator.l_dev = meta_cmpldev(d->msg_l_dev);
+ c.c_locator.l_blkno = d->msg_blkno;
+ (void) strncpy(c.c_locator.l_driver, d->msg_dname,
+ sizeof (c.c_locator.l_driver));
+ c.c_devname = d->msg_splitname;
+ c.c_locator.l_mnum = d->msg_mnum;
+ c.c_multi_node = 1;
+ if ((sp = metasetnosetname(c.c_setno, &ep)) == NULL) {
+ (void) mdstealerror(&(resp->mmr_ep), &ep);
+ resp->mmr_exitval = -1;
+ return;
+ }
+ (void) strcpy(c.c_setname, sp->setname);
+ c.c_sideno = d->msg_sideno;
+
+ if ((ret = metaioctl(MD_DB_NEWSIDE, &c, &c.c_mde, NULL)) != 0) {
+ (void) mdstealerror(&(resp->mmr_ep), &c.c_mde);
+ }
+ resp->mmr_exitval = ret;
+}
+
+/*
+ * Handler for MD_MN_MSG_META_DB_DELSIDE which is used to remove the
+ * side information for each diskset mddb when a host has been
+ * deleted from the diskset. The side information is the /dev/dsk/ctds name
+ * that the node would use to access each mddb.
+ *
+ * Since this routine makes no changes to the records in the diskset mddb,
+ * this routine only needs to be run on the master node. The master node's
+ * kernel code will detect that portions of the mddb have changed and
+ * will send a parse message to all nodes to re-parse parts of the mddb.
+ *
+ * Used when running:
+ * metaset -s set_name -d -h hostname
+ */
+/*ARGSUSED*/
+void
+mdmn_do_meta_db_delside(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp)
+{
+ md_mn_msg_meta_db_delside_t *d;
+ mddb_config_t c;
+ int ret = 0;
+ mdsetname_t *sp;
+ md_error_t ep = mdnullerror;
+
+ resp->mmr_out_size = 0;
+ resp->mmr_err_size = 0;
+ resp->mmr_out = NULL;
+ resp->mmr_err = NULL;
+ resp->mmr_comm_state = MDMNE_ACK;
+ d = (md_mn_msg_meta_db_delside_t *)((void *)(msg->msg_event_data));
+
+ (void) memset(&c, 0, sizeof (c));
+ c.c_setno = msg->msg_setno;
+ c.c_locator.l_dev = meta_cmpldev(d->msg_l_dev);
+ c.c_locator.l_blkno = d->msg_blkno;
+ c.c_multi_node = 1;
+ if ((sp = metasetnosetname(c.c_setno, &ep)) == NULL) {
+ (void) mdstealerror(&(resp->mmr_ep), &ep);
+ resp->mmr_exitval = -1;
+ return;
+ }
+ (void) strcpy(c.c_setname, sp->setname);
+ c.c_sideno = d->msg_sideno;
+
+ if ((ret = metaioctl(MD_DB_DELSIDE, &c, &c.c_mde, NULL)) != 0) {
+ (void) mdstealerror(&(resp->mmr_ep), &c.c_mde);
+ }
+ resp->mmr_exitval = ret;
+}
+
+/*
+ * Handler for MD_MN_MSG_META_MD_ADDSIDE which is used to add the
+ * side information for each diskset metadevice component (if that
+ * component is a disk) when a host has been added to the diskset.
+ * The side information is the /dev/dsk/ctds name that the node would
+ * use to access the metadevice component.
+ *
+ * This routine makes changes to the mddb records and must be run
+ * on all nodes.
+ *
+ * Used when running:
+ * metaset -s set_name -a -h new_hostname
+ */
+/*ARGSUSED*/
+void
+mdmn_do_meta_md_addside(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp)
+{
+ md_mn_msg_meta_md_addside_t *d;
+ mdnm_params_t nm;
+ mdsetname_t *sp;
+ char *cname, *dname;
+ minor_t mnum;
+ int done, i;
+ md_error_t ep = mdnullerror;
+
+ resp->mmr_out_size = 0;
+ resp->mmr_err_size = 0;
+ resp->mmr_out = NULL;
+ resp->mmr_err = NULL;
+ resp->mmr_comm_state = MDMNE_ACK;
+ d = (md_mn_msg_meta_md_addside_t *)((void *)(msg->msg_event_data));
+
+ (void) memset(&nm, 0, sizeof (nm));
+ if ((sp = metasetnosetname(msg->msg_setno, &ep)) == NULL) {
+ (void) mdstealerror(&(resp->mmr_ep), &ep);
+ resp->mmr_exitval = -1;
+ return;
+ }
+ /* While loop continues until IOCNXTKEY_NM gives nm.key of KEYWILD */
+ /*CONSTCOND*/
+ while (1) {
+ nm.mde = mdnullerror;
+ nm.setno = msg->msg_setno;
+ nm.side = d->msg_otherside;
+ if (metaioctl(MD_IOCNXTKEY_NM, &nm, &nm.mde, NULL) != 0) {
+ (void) mdstealerror(&(resp->mmr_ep), &nm.mde);
+ resp->mmr_exitval = -1;
+ return;
+ }
+
+ /* Normal exit path is to eventually get a KEYWILD */
+ if (nm.key == MD_KEYWILD) {
+ resp->mmr_exitval = 0;
+ return;
+ }
+
+ nm.devname = (uint64_t)meta_getnmbykey(msg->msg_setno,
+ d->msg_otherside, nm.key, &ep);
+ if (nm.devname == NULL) {
+ (void) mdstealerror(&(resp->mmr_ep), &ep);
+ resp->mmr_exitval = -1;
+ return;
+ }
+ nm.side = d->msg_sideno;
+ if ((done = meta_getside_devinfo(sp, (char *)nm.devname,
+ d->msg_sideno, &cname, &dname, &mnum, &ep)) == -1) {
+ (void) mdstealerror(&(resp->mmr_ep), &ep);
+ Free((void *)nm.devname);
+ resp->mmr_exitval = -1;
+ return;
+ }
+ Free((void *)nm.devname);
+ if (done != 1) {
+ Free(cname);
+ Free(dname);
+ resp->mmr_exitval = -1;
+ return;
+ }
+
+ /*
+ * The device reference count can be greater than 1 if
+ * more than one softpart is configured on top of the
+ * same device. If this is the case then we want to
+ * increment the count to sync up with the other sides.
+ */
+ for (i = 0; i < nm.ref_count; i++) {
+ if (add_name(sp, d->msg_sideno, nm.key, dname, mnum,
+ cname, &ep) == -1) {
+ (void) mdstealerror(&(resp->mmr_ep), &ep);
+ Free(cname);
+ Free(dname);
+ resp->mmr_exitval = -1;
+ return;
+ }
+ }
+ Free(cname);
+ Free(dname);
+ }
+
+ /*NOTREACHED*/
+}
+/*
+ * Handler for MD_MN_MSG_META_MD_DELSIDE which is used to delete the
+ * side information for each diskset metadevice component (if that
+ * component is a disk) when a host has been removed from the diskset.
+ * The side information is the /dev/dsk/ctds name that the node would
+ * use to access the metadevice component.
+ *
+ * This routine makes changes to the mddb records and must be run
+ * on all nodes.
+ *
+ * Used when running:
+ * metaset -s set_name -d -h hostname
+ */
+/*ARGSUSED*/
+void
+mdmn_do_meta_md_delside(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp)
+{
+ md_mn_msg_meta_md_delside_t *d;
+ mdnm_params_t nm;
+ mdsetname_t *sp;
+ md_error_t ep = mdnullerror;
+ int i;
+
+ resp->mmr_out_size = 0;
+ resp->mmr_err_size = 0;
+ resp->mmr_out = NULL;
+ resp->mmr_err = NULL;
+ resp->mmr_comm_state = MDMNE_ACK;
+ d = (md_mn_msg_meta_md_delside_t *)((void *)(msg->msg_event_data));
+
+ if ((sp = metasetnosetname(msg->msg_setno, &ep)) == NULL) {
+ (void) mdstealerror(&(resp->mmr_ep), &ep);
+ resp->mmr_exitval = -1;
+ return;
+ }
+
+ (void) memset(&nm, 0, sizeof (nm));
+ nm.key = MD_KEYWILD;
+ /*CONSTCOND*/
+ while (1) {
+ nm.mde = mdnullerror;
+ nm.setno = msg->msg_setno;
+ nm.side = MD_SIDEWILD;
+ if (metaioctl(MD_IOCNXTKEY_NM, &nm, &nm.mde, NULL) != 0) {
+ (void) mdstealerror(&(resp->mmr_ep), &nm.mde);
+ resp->mmr_exitval = -1;
+ return;
+ }
+
+ /* Normal exit path is to eventually get a KEYWILD */
+ if (nm.key == MD_KEYWILD) {
+ resp->mmr_exitval = 0;
+ return;
+ }
+
+ /*
+ * The device reference count can be greater than 1 if
+ * more than one softpart is configured on top of the
+ * same device. If this is the case then we want to
+ * decrement the count to zero so the entry can be
+ * actually removed.
+ */
+ for (i = 0; i < nm.ref_count; i++) {
+ if (del_name(sp, d->msg_sideno, nm.key, &ep) == -1) {
+ (void) mdstealerror(&(resp->mmr_ep), &ep);
+ resp->mmr_exitval = -1;
+ return;
+ }
+ }
+ }
+
+ /*NOTREACHED*/
+}
+
+/*
+ * Handler for MD_MN_MSG_MDDB_OPTRECERR which is used to notify
+ * the master node that a node has seen an error when attempting to
+ * write to the optimized resync records that reside on 2 of the diskset
+ * mddbs. Master node will mark the failed replica in error and this
+ * will send a parse message to all nodes to re-read parts of the mddb
+ * and to fix their optimized resync records based on this information.
+ */
+/*ARGSUSED*/
+void
+mdmn_do_mddb_optrecerr(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp)
+{
+ md_mn_msg_mddb_optrecerr_t *d;
+ mddb_optrec_parm_t mop;
+ int ret;
+ int i;
+
+ resp->mmr_out_size = 0;
+ resp->mmr_err_size = 0;
+ resp->mmr_out = NULL;
+ resp->mmr_err = NULL;
+ resp->mmr_comm_state = MDMNE_ACK;
+ d = (md_mn_msg_mddb_optrecerr_t *)((void *)(msg->msg_event_data));
+
+ (void) memset(&mop, 0, sizeof (mop));
+ mop.c_setno = msg->msg_setno;
+ for (i = 0; i < 2; i++) {
+ mop.c_recerr[i] = d->msg_recerr[i];
+ }
+ ret = metaioctl(MD_MN_MDDB_OPTRECFIX, &mop, &mop.c_mde, NULL);
+ if (ret)
+ (void) mdstealerror(&(resp->mmr_ep), &mop.c_mde);
+
+ resp->mmr_exitval = ret;
+}
+
+int
+mdmn_smgen_test6(md_mn_msg_t *msg, md_mn_msg_t **msglist)
+{
+ md_mn_msg_t *nmsg;
+
+ nmsg = Zalloc(sizeof (md_mn_msg_t));
+ MSGID_COPY(&(msg->msg_msgid), &(nmsg->msg_msgid));
+
+ nmsg->msg_flags = MD_MSGF_NO_LOG; /* Don't log submessages */
+ nmsg->msg_setno = msg->msg_setno;
+ nmsg->msg_type = MD_MN_MSG_TEST2;
+ nmsg->msg_event_size = sizeof ("test2");
+ nmsg->msg_event_data = Strdup("test2");
+ msglist[0] = nmsg;
+
+ nmsg = Zalloc(sizeof (md_mn_msg_t));
+ MSGID_COPY(&(msg->msg_msgid), &(nmsg->msg_msgid));
+
+ nmsg->msg_flags = MD_MSGF_NO_LOG; /* Don't log submessages */
+ nmsg->msg_setno = msg->msg_setno;
+ nmsg->msg_type = MD_MN_MSG_TEST2;
+ nmsg->msg_event_size = sizeof ("test2");
+ nmsg->msg_event_data = Strdup("test2");
+ msglist[1] = nmsg;
+
+ nmsg = Zalloc(sizeof (md_mn_msg_t));
+ MSGID_COPY(&(msg->msg_msgid), &(nmsg->msg_msgid));
+
+ nmsg->msg_flags = MD_MSGF_NO_LOG; /* Don't log submessages */
+ nmsg->msg_setno = msg->msg_setno;
+ nmsg->msg_type = MD_MN_MSG_TEST3;
+ nmsg->msg_event_size = sizeof ("test3");
+ nmsg->msg_event_data = Strdup("test3");
+ msglist[2] = nmsg;
+
+ nmsg = Zalloc(sizeof (md_mn_msg_t));
+ MSGID_COPY(&(msg->msg_msgid), &(nmsg->msg_msgid));
+
+ nmsg->msg_flags = MD_MSGF_NO_LOG; /* Don't log submessages */
+ nmsg->msg_setno = msg->msg_setno;
+ nmsg->msg_type = MD_MN_MSG_TEST4;
+ nmsg->msg_event_size = sizeof ("test4");
+ nmsg->msg_event_data = Strdup("test4");
+ msglist[3] = nmsg;
+
+ return (4); /* Return the number of submessages generated */
+}
+
+/*
+ * This is to send an MD_IOCSET ioctl to all nodes to create a soft
+ * partition.
+ */
+/*ARGSUSED*/
+void
+mdmn_do_iocset(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp)
+{
+ md_mn_msg_iocset_t *d;
+ int ret;
+ set_t setno;
+ mdsetname_t *sp;
+ mdname_t *np;
+ md_error_t mde = mdnullerror;
+
+ resp->mmr_comm_state = MDMNE_ACK; /* Ok state */;
+ resp->mmr_out_size = 0;
+ resp->mmr_err_size = 0;
+ resp->mmr_out = NULL;
+ resp->mmr_err = NULL;
+ d = (md_mn_msg_iocset_t *)(void *)msg->msg_event_data;
+
+ setno = MD_MIN2SET(d->iocset_params.mnum);
+ if ((sp = metasetnosetname(setno, &mde)) == NULL) {
+ syslog(LOG_ERR, dgettext(TEXT_DOMAIN,
+ "MD_MN_MSG_IOCSET: Invalid setno %d\n"), setno);
+ resp->mmr_exitval = 1;
+ return;
+ }
+
+ if ((np = metamnumname(&sp, d->iocset_params.mnum, 1, &mde)) == NULL) {
+ syslog(LOG_ERR, dgettext(TEXT_DOMAIN,
+ "MD_MN_MSG_IOCSET: Invalid mnum %d\n"),
+ d->iocset_params.mnum);
+ resp->mmr_exitval = 1;
+ return;
+ }
+
+ if (meta_init_make_device(&sp, np->cname, &mde) == -1) {
+ syslog(LOG_ERR, dgettext(TEXT_DOMAIN,
+ "MD_MN_MSG_IOCSET: Invalid metadevice name %s\n"),
+ np->cname);
+ resp->mmr_exitval = 1;
+ return;
+ }
+
+ d->iocset_params.mdp = (uint64_t)&d->unit; /* set pointer to unit */
+ ret = metaioctl(MD_IOCSET, &(d->iocset_params), &mde, np->cname);
+ resp->mmr_exitval = ret;
+}
+
+/*
+ * This is to update the status of a softpart
+ */
+/*ARGSUSED*/
+void
+mdmn_do_sp_setstat(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp)
+{
+ md_mn_msg_sp_setstat_t *d;
+ int ret;
+ set_t setno;
+ mdsetname_t *sp;
+ minor_t mnum;
+ md_error_t mde = mdnullerror;
+
+ resp->mmr_comm_state = MDMNE_ACK; /* Ok state */;
+ resp->mmr_out_size = 0;
+ resp->mmr_err_size = 0;
+ resp->mmr_out = NULL;
+ resp->mmr_err = NULL;
+ d = (md_mn_msg_sp_setstat_t *)(void *)msg->msg_event_data;
+
+ mnum = d->sp_setstat_mnum;
+ setno = MD_MIN2SET(mnum);
+ if ((sp = metasetnosetname(setno, &mde)) == NULL) {
+ syslog(LOG_ERR, dgettext(TEXT_DOMAIN,
+ "MD_MN_MSG_IOCSET: Invalid setno %d\n"), setno);
+ resp->mmr_exitval = 1;
+ return;
+ }
+
+ ret = meta_sp_setstatus(sp, &mnum, 1, d->sp_setstat_status, &mde);
+ resp->mmr_exitval = ret;
+}
+
+/*
+ * This is to add a key to the namespace
+ */
+/*ARGSUSED*/
+void
+mdmn_do_addkeyname(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp)
+{
+ md_mn_msg_addkeyname_t *d;
+ int ret;
+ set_t setno;
+ mdsetname_t *sp;
+ md_error_t mde = mdnullerror;
+ mdname_t *compnp;
+
+ resp->mmr_comm_state = MDMNE_ACK; /* Ok state */;
+ resp->mmr_out_size = 0;
+ resp->mmr_err_size = 0;
+ resp->mmr_out = NULL;
+ resp->mmr_err = NULL;
+ d = (md_mn_msg_addkeyname_t *)(void *)msg->msg_event_data;
+
+ setno = d->addkeyname_setno;
+ if ((sp = metasetnosetname(setno, &mde)) == NULL) {
+ syslog(LOG_ERR, dgettext(TEXT_DOMAIN,
+ "MD_MN_ADDKEYNAME: Invalid setno %d\n"), setno);
+ resp->mmr_exitval = -1;
+ return;
+ }
+
+ compnp = metaname(&sp, d->addkeyname_name, &mde);
+ if (compnp != NULL) {
+ ret = add_key_name(sp, compnp, NULL, &mde);
+ if (ret < 0)
+ resp->mmr_exitval = -1;
+ else
+ resp->mmr_exitval = compnp->key;
+ } else {
+ resp->mmr_exitval = -1;
+ }
+}
+
+/*
+ * This is to delete a key from the namespace
+ */
+/*ARGSUSED*/
+void
+mdmn_do_delkeyname(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp)
+{
+ md_mn_msg_delkeyname_t *d;
+ int ret;
+ set_t setno;
+ mdsetname_t *sp;
+ md_error_t mde = mdnullerror;
+ mdname_t *compnp;
+
+ resp->mmr_comm_state = MDMNE_ACK; /* Ok state */;
+ resp->mmr_out_size = 0;
+ resp->mmr_err_size = 0;
+ resp->mmr_out = NULL;
+ resp->mmr_err = NULL;
+ d = (md_mn_msg_delkeyname_t *)(void *)msg->msg_event_data;
+
+ setno = d->delkeyname_setno;
+ if ((sp = metasetnosetname(setno, &mde)) == NULL) {
+ syslog(LOG_ERR, dgettext(TEXT_DOMAIN,
+ "MD_MN_DELKEYNAME: Invalid setno %d\n"), setno);
+ resp->mmr_exitval = -1;
+ return;
+ }
+
+ compnp = metadevname(&sp, d->delkeyname_dev, &mde);
+ if (compnp != NULL) {
+ /*
+ * Reset the key value for the name. This is required because
+ * any previous call of del_key_name for the same component
+ * will have resulted in the key value being reset to MD_KEYBAD
+ * even though there may still be references to this component.
+ */
+ compnp->key = d->delkeyname_key;
+ ret = del_key_name(sp, compnp, &mde);
+ resp->mmr_exitval = ret;
+ } else {
+ resp->mmr_exitval = -1;
+ }
+}
+
+/*
+ * This is to get the value of tstate from the master node. We use this
+ * to get the ABR state of a metadevice from the master.
+ */
+/*ARGSUSED*/
+void
+mdmn_do_get_tstate(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp)
+{
+ md_mn_msg_gettstate_t *d;
+ int ret;
+ uint_t tstate;
+ md_error_t mde = mdnullerror;
+
+ resp->mmr_comm_state = MDMNE_ACK; /* Ok state */;
+ resp->mmr_out_size = 0;
+ resp->mmr_err_size = 0;
+ resp->mmr_out = NULL;
+ resp->mmr_err = NULL;
+ d = (md_mn_msg_gettstate_t *)(void *)msg->msg_event_data;
+
+ ret = meta_get_tstate(d->gettstate_dev, &tstate, &mde);
+ if (ret != 0) {
+ syslog(LOG_ERR, dgettext(TEXT_DOMAIN,
+ "MD_MN_GET_TSTATE: Invalid dev %llx\n"), d->gettstate_dev);
+ tstate = 0;
+ }
+ resp->mmr_exitval = tstate;
+}
+
+/*
+ * This is to get the mirror ABR state and the state of its submirrors from
+ * the master node. We need this to ensure consistent output from metastat
+ * when a new node joins the cluster during a resync. Without this the
+ * submirror status will be incorrect until the whole resync is complete which
+ * may take days for very large metadevices.
+ */
+/*ARGSUSED*/
+void
+mdmn_do_get_mirstate(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp)
+{
+ md_mn_msg_mir_state_t *d;
+ md_mn_msg_mir_state_res_t *res; /* Results */
+ set_t setno;
+ mdsetname_t *sp; /* Set name */
+ mdname_t *mirnp; /* Mirror name */
+ md_error_t mde = mdnullerror;
+ mm_unit_t *mm; /* Mirror */
+ int smi;
+ uint_t tstate;
+
+ resp->mmr_comm_state = MDMNE_ACK;
+ resp->mmr_out_size = sizeof (md_mn_msg_mir_state_res_t);
+ resp->mmr_err_size = 0;
+ resp->mmr_out = Malloc(resp->mmr_out_size);
+ resp->mmr_err = NULL;
+ d = (md_mn_msg_mir_state_t *)(void *)msg->msg_event_data;
+ res = (md_mn_msg_mir_state_res_t *)(void *)resp->mmr_out;
+
+ /* Validate set information from minor number */
+ setno = MD_MIN2SET(d->mir_state_mnum);
+ sp = metasetnosetname(setno, &mde);
+ if (sp == NULL) {
+ syslog(LOG_ERR, dgettext(TEXT_DOMAIN,
+ "MD_MN_GET_MIRROR_STATE: Invalid set %d\n"), setno);
+ resp->mmr_exitval = 1; /* Failure */
+ Free(resp->mmr_out);
+ resp->mmr_out_size = 0;
+ return;
+ }
+
+ /* Construct mirror name from minor number */
+ mirnp = metamnumname(&sp, d->mir_state_mnum, 0, &mde);
+ if (mirnp == NULL) {
+ syslog(LOG_ERR, dgettext(TEXT_DOMAIN,
+ "MD_MN_GET_MIRROR_STATE: Invalid minor %lx\n"),
+ d->mir_state_mnum);
+ resp->mmr_exitval = 2; /* Failure */
+ Free(resp->mmr_out);
+ resp->mmr_out_size = 0;
+ return;
+ }
+
+ /* Get common mirror structure */
+ mm = (mm_unit_t *)meta_get_mdunit(sp, mirnp, &mde);
+ if (mm == NULL) {
+ syslog(LOG_ERR, dgettext(TEXT_DOMAIN,
+ "MD_MN_GET_MIRROR_STATE: Invalid mirror minor %x\n"),
+ d->mir_state_mnum);
+ resp->mmr_exitval = 3; /* Failure */
+ Free(resp->mmr_out);
+ resp->mmr_out_size = 0;
+ return;
+ }
+
+ if (meta_get_tstate(d->mir_state_mnum, &tstate, &mde) != 0) {
+ syslog(LOG_ERR, dgettext(TEXT_DOMAIN,
+ "MD_MN_GET_MIRROR_STATE: Invalid minor %lx\n"),
+ d->mir_state_mnum);
+ resp->mmr_exitval = 4; /* Failure */
+ Free(resp->mmr_out);
+ resp->mmr_out_size = 0;
+ return;
+ }
+ /*
+ * Fill in the sm_state/sm_flags value in the results structure which
+ * gets passed back to the message originator
+ */
+ resp->mmr_exitval = 0;
+ for (smi = 0; (smi < NMIRROR); smi++) {
+ mm_submirror_t *mmsp = &mm->un_sm[smi];
+ res->sm_state[smi] = mmsp->sm_state;
+ res->sm_flags[smi] = mmsp->sm_flags;
+ }
+ /* Returm value of tstate for mirror */
+ res->mir_tstate = tstate;
+}
+
+/*
+ * This is to issue an ioctl to call poke_hotspares
+ */
+/*ARGSUSED*/
+void
+mdmn_do_poke_hotspares(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp)
+{
+
+ md_mn_poke_hotspares_t pokehsp;
+ md_mn_msg_pokehsp_t *d;
+
+ resp->mmr_out_size = 0;
+ resp->mmr_err_size = 0;
+ resp->mmr_out = NULL;
+ resp->mmr_err = NULL;
+ resp->mmr_comm_state = MDMNE_ACK;
+ d = (md_mn_msg_pokehsp_t *)(void *)msg->msg_event_data;
+
+ (void) memset(&pokehsp, 0, sizeof (pokehsp));
+ MD_SETDRIVERNAME(&pokehsp, MD_MIRROR, d->pokehsp_setno);
+
+ resp->mmr_exitval = metaioctl(MD_MN_POKE_HOTSPARES, &pokehsp,
+ &pokehsp.mde, NULL);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_mn_msg_table.c b/usr/src/lib/lvm/libmeta/common/meta_mn_msg_table.c
new file mode 100644
index 0000000000..a6ba008376
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_mn_msg_table.c
@@ -0,0 +1,690 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <meta.h>
+
+extern void mdmn_do_cmd(HANDLER_PARMS);
+extern void mdmn_do_clu(HANDLER_PARMS);
+extern void mdmn_do_req_owner(HANDLER_PARMS);
+extern void mdmn_do_susp_write(HANDLER_PARMS);
+extern void mdmn_do_state_upd_reswr(HANDLER_PARMS);
+extern void mdmn_do_allocate_hotspare(HANDLER_PARMS);
+extern void mdmn_do_poke_hotspares(HANDLER_PARMS);
+extern void mdmn_do_resync(HANDLER_PARMS);
+extern void mdmn_do_setsync(HANDLER_PARMS);
+extern void mdmn_do_choose_owner(HANDLER_PARMS);
+extern void mdmn_do_change_owner(HANDLER_PARMS);
+extern void mdmn_do_set_cap(HANDLER_PARMS);
+extern void mdmn_do_dummy(HANDLER_PARMS);
+extern void mdmn_do_mddb_parse(HANDLER_PARMS);
+extern void mdmn_do_mddb_block(HANDLER_PARMS);
+extern void mdmn_do_sm_mddb_attach(HANDLER_PARMS);
+extern void mdmn_do_sm_mddb_detach(HANDLER_PARMS);
+extern void mdmn_do_meta_db_newside(HANDLER_PARMS);
+extern void mdmn_do_meta_db_delside(HANDLER_PARMS);
+extern void mdmn_do_meta_md_addside(HANDLER_PARMS);
+extern void mdmn_do_meta_md_delside(HANDLER_PARMS);
+extern void mdmn_do_mddb_optrecerr(HANDLER_PARMS);
+extern void mdmn_do_iocset(HANDLER_PARMS);
+extern void mdmn_do_sp_setstat(HANDLER_PARMS);
+extern void mdmn_do_addkeyname(HANDLER_PARMS);
+extern void mdmn_do_delkeyname(HANDLER_PARMS);
+extern void mdmn_do_get_tstate(HANDLER_PARMS);
+extern void mdmn_do_get_mirstate(HANDLER_PARMS);
+
+extern int mdmn_smgen_test6(SMGEN_PARMS);
+extern int mdmn_smgen_state_upd(SMGEN_PARMS);
+extern int mdmn_smgen_mddb_attach(SMGEN_PARMS);
+extern int mdmn_smgen_mddb_detach(SMGEN_PARMS);
+
+md_mn_msg_tbl_entry_t msg_table[MD_MN_NMESSAGES] = {
+
+/*
+ * In order to have fast direct access to the table, we use the message type as
+ * an index into it.
+ * Thus the order of the elements in this table MUST match the order of the
+ * message types specified in mdmn_commd.x!
+ * See the definition of md_mn_msg_t.
+ *
+ * Be careful and do not disturb the order of the messages!
+ */
+ {
+ /* MD_MN_MSG_NULL */
+ MD_MSG_CLASS0, /* message class */
+ NULL, /* message handler */
+ NULL, /* submessage generator */
+ 1, /* timeout in seconds */
+ 0, 0, /* class busy retry / time delta */
+ 0, 0 /* comm fail retry / time delta */
+ },
+
+ {
+ /* MD_MN_MSG_TEST1 */
+ MD_MSG_CLASS1, /* message class */
+ mdmn_do_dummy, /* message handler */
+ NULL, /* submessage generator */
+ 1, /* timeout in seconds */
+ 200, 4, /* class busy retry / time delta */
+ 10, 100 /* comm fail retry / time delta */
+ },
+
+ {
+ /* MD_MN_MSG_TEST2 */
+ MD_MSG_CLASS2, /* message class */
+ mdmn_do_dummy, /* message handler */
+ NULL, /* submessage generator */
+ 1, /* timeout in seconds */
+ 200, 4, /* class busy retry / time delta */
+ 10, 100 /* comm fail retry / time delta */
+ },
+
+ {
+ /* MD_MN_MSG_TEST3 */
+ MD_MSG_CLASS3, /* message class */
+ mdmn_do_dummy, /* message handler */
+ NULL, /* submessage generator */
+ 1, /* timeout in seconds */
+ 200, 4, /* class busy retry / time delta */
+ 10, 100 /* comm fail retry / time delta */
+ },
+
+ {
+ /* MD_MN_MSG_TEST4 */
+ MD_MSG_CLASS4, /* message class */
+ mdmn_do_dummy, /* message handler */
+ NULL, /* submessage generator */
+ 1, /* timeout in seconds */
+ 200, 4, /* class busy retry / time delta */
+ 10, 100 /* comm fail retry / time delta */
+ },
+
+ {
+ /* MD_MN_MSG_TEST5 */
+ MD_MSG_CLASS5, /* message class */
+ mdmn_do_dummy, /* message handler */
+ NULL, /* submessage generator */
+ 4, /* timeout in seconds */
+ 200, 4, /* class busy retry / time delta */
+ 10, 100 /* comm fail retry / time delta */
+ },
+
+ {
+ /* MD_MN_MSG_TEST6 */
+ MD_MSG_CLASS1, /* message class */
+ NULL, /* message handler */
+ mdmn_smgen_test6, /* submessage generator */
+ 1, /* timeout in seconds */
+ 200, 4, /* class busy retry / time delta */
+ 10, 100 /* comm fail retry / time delta */
+ },
+
+ {
+ /*
+ * MD_MN_MSG_CMD
+ * Send a command string to all nodes
+ */
+ MD_MSG_CLASS1, /* message class */
+ mdmn_do_cmd, /* message handler */
+ NULL, /* submessage generator */
+ 90, /* times out in 90 secs */
+ 40, 20, /* class busy retry / time delta */
+ 10, 1000 /* comm fail retry / time delta */
+ },
+
+ {
+ /*
+ * MD_MN_MSG_CMD_RETRY
+ * Send a command string to all nodes and retry on busy
+ */
+ MD_MSG_CLASS1, /* message class */
+ mdmn_do_cmd, /* message handler */
+ NULL, /* submessage generator */
+ 90, /* times out in 90 secs */
+ 100000, 20, /* class busy retry / time delta */
+ 10, 1000 /* comm fail retry / time delta */
+ },
+
+ {
+ /* MD_MN_MSG_CLU_CHECK */
+ MD_MSG_CLASS2, /* message class */
+ mdmn_do_clu, /* message handler */
+ NULL, /* submessage generator */
+ 5, /* timeout in seconds */
+ 10000, 2, /* class busy retry / time delta */
+ 0, 0 /* comm fail retry / time delta */
+ },
+
+ {
+ /* MD_MN_MSG_CLU_LOCK */
+ MD_MSG_CLASS2, /* message class */
+ mdmn_do_clu, /* message handler */
+ NULL, /* submessage generator */
+ 1, /* timeout in seconds */
+ 10000, 2, /* class busy retry / time delta */
+ 0, 0 /* comm fail retry / time delta */
+ },
+
+ {
+ /* MD_MN_MSG_CLU_UNLOCK */
+ MD_MSG_CLASS2, /* message class */
+ mdmn_do_clu, /* message handler */
+ NULL, /* submessage generator */
+ 1, /* timeout in seconds */
+ 10000, 2, /* class busy retry / time delta */
+ 0, 0 /* comm fail retry / time delta */
+ },
+
+ {
+ /* MD_MN_MSG_REQUIRE_OWNER */
+ MD_MSG_CLASS5, /* message class */
+ mdmn_do_req_owner, /* message handler */
+ NULL, /* submessage generator */
+ 12, /* timeout in seconds */
+ UINT_MAX, 10, /* class busy retry / time delta */
+ UINT_MAX, 100 /* comm fail retry / time delta */
+ },
+
+ {
+ /*
+ * MD_MN_MSG_CHOOSE_OWNER
+ * Using the current resync count for the set, choose a resync
+ * owner and send a CHANGE_OWNER message to request that node
+ * to make itself the owner
+ */
+ MD_MSG_CLASS3, /* message class */
+ mdmn_do_choose_owner, /* message handler */
+ NULL, /* submessage generator */
+ 12, /* timeout in seconds */
+ UINT_MAX, 10, /* class busy retry / time delta */
+ UINT_MAX, 100 /* comm fail retry / time delta */
+ },
+
+ {
+ /*
+ * MD_MN_MSG_CHANGE_OWNER
+ * Request a change of ownership to the specified node
+ */
+ MD_MSG_CLASS4, /* message class */
+ mdmn_do_change_owner, /* message handler */
+ NULL, /* submessage generator */
+ 12, /* timeout in seconds */
+ UINT_MAX, 10, /* class busy retry / time delta */
+ UINT_MAX, 100 /* comm fail retry / time delta */
+ },
+
+ {
+ /*
+ * MD_MN_MSG_SUSPEND_WRITES
+ * Suspend all writes to the specified mirror
+ */
+ MD_MSG_CLASS6, /* message class */
+ mdmn_do_susp_write, /* message handler */
+ NULL, /* submessage generator */
+ 8, /* timeout in seconds */
+ UINT_MAX, 10, /* class busy retry / time delta */
+ 200, 100 /* comm fail retry / time delta */
+ },
+
+ {
+ /*
+ * MD_MN_MSG_STATE_UPDATE_RESWR
+ * Update the state of a mirror component
+ */
+ MD_MSG_CLASS1, /* message class */
+ mdmn_do_state_upd_reswr, /* message handler */
+ NULL, /* submessage generator */
+ 8, /* timeout in seconds */
+ UINT_MAX, 10, /* class busy retry / time delta */
+ UINT_MAX, 100 /* comm fail retry / time delta */
+ },
+
+ {
+ /*
+ * MD_MN_MSG_STATE_UPDATE
+ * Suspend writes to a mirror and then update the state of a
+ * mirror component
+ */
+ MD_MSG_CLASS1, /* message class */
+ NULL, /* message handler */
+ mdmn_smgen_state_upd, /* submessage generator */
+ 16, /* SUSPEND_WRITES + STATE_UPDATE_RESWR */
+ UINT_MAX, 10, /* class busy retry / time delta */
+ UINT_MAX, 100 /* comm fail retry / time delta */
+ },
+
+ {
+ /*
+ * MD_MN_MSG_ALLOCATE_HOTSPARE
+ * Allocate a hotspare for a mirror component
+ */
+ MD_MSG_CLASS1, /* message class */
+ mdmn_do_allocate_hotspare, /* message handler */
+ NULL, /* submessage generator */
+ 8, /* timeout in seconds */
+ UINT_MAX, 10, /* class busy retry / time delta */
+ UINT_MAX, 100 /* comm fail retry / time delta */
+ },
+
+ {
+ /*
+ * MD_MN_MSG_RESYNC_STARTING
+ * Start a resync thread for the specified mirror
+ */
+ MD_MSG_CLASS2, /* message class */
+ mdmn_do_resync, /* message handler */
+ NULL, /* submessage generator */
+ 8, /* timeout in seconds */
+ UINT_MAX, 10, /* class busy retry / time delta */
+ UINT_MAX, 100 /* comm fail retry / time delta */
+ },
+
+ {
+ /*
+ * MD_MN_MSG_RESYNC_NEXT
+ * Send the next region to be resyned to all nodes. For ABR
+ * mirrors, the nodes must suspend all writes to this region until
+ * the next message of this type or a RESYNC_FINISH
+ */
+ MD_MSG_CLASS2, /* message class */
+ mdmn_do_resync, /* message handler */
+ NULL, /* submessage generator */
+ 8, /* timeout in seconds */
+ UINT_MAX, 10, /* class busy retry / time delta */
+ UINT_MAX, 100 /* comm fail retry / time delta */
+ },
+
+ {
+ /*
+ * MD_MN_MSG_RESYNC_FINISH
+ * All resyncs for a mirror are complete, terminate resync thread
+ */
+ MD_MSG_CLASS1, /* message class */
+ mdmn_do_resync, /* message handler */
+ NULL, /* submessage generator */
+ 8, /* timeout in seconds */
+ UINT_MAX, 10, /* class busy retry / time delta */
+ UINT_MAX, 100 /* comm fail retry / time delta */
+ },
+
+ {
+ /*
+ * MD_MN_MSG_RESYNC_PHASE_DONE
+ * A resync phase, optimized, submirror or component is complete
+ */
+ MD_MSG_CLASS2, /* message class */
+ mdmn_do_resync, /* message handler */
+ NULL, /* submessage generator */
+ 8, /* timeout in seconds */
+ UINT_MAX, 10, /* class busy retry / time delta */
+ UINT_MAX, 100 /* comm fail retry / time delta */
+ },
+
+ {
+ /*
+ * MD_MN_MSG_SET_CAP
+ * Set the specified metadevice capability on all nodes
+ * This is used to propagate the ABR capability
+ */
+ MD_MSG_CLASS1, /* message class */
+ mdmn_do_set_cap, /* message handler */
+ NULL, /* submessage generator */
+ 8, /* timeout in seconds */
+ 100000, 10, /* class busy retry/ time delta */
+ 200, 100 /* comm fail retry / time delta */
+ },
+
+ {
+ /* MD_MN_MSG_VERBOSITY */
+ MD_MSG_CLASS0, /* special message class */
+ mdmn_do_dummy, /* dummy handler */
+ NULL, /* submessage generator */
+ 1, /* timeout in seconds */
+ 0, 0, /* No retries for class busy */
+ 0, 0 /* No retries for comm fail */
+ },
+
+ {
+ /*
+ * MD_MN_MSG_MDDB_PARSE
+ * Message cannot fail unless node failure causes node panic
+ */
+ MD_MSG_CLASS7, /* message class */
+ mdmn_do_mddb_parse, /* reparse mddb */
+ NULL, /* submessage generator */
+ 10, /* timeout in seconds */
+ UINT_MAX, 2, /* class busy retry / time delta */
+ UINT_MAX, 100 /* comm fail retry / time delta */
+ },
+
+ {
+ /*
+ * MD_MN_MSG_MDDB_BLOCK
+ * Message cannot fail unless node failure causes node panic
+ */
+ MD_MSG_CLASS3, /* message class */
+ mdmn_do_mddb_block, /* block/unblock reparse */
+ NULL, /* submessage generator */
+ 5, /* timeout in seconds */
+ UINT_MAX, 2, /* class busy retry / time delta */
+ UINT_MAX, 100 /* comm fail retry / time delta */
+ },
+
+ {
+ /*
+ * MD_MN_MSG_META_DB_ATTACH
+ */
+ MD_MSG_CLASS3, /* message class */
+ NULL, /* message handler */
+ mdmn_smgen_mddb_attach, /* submessage generator */
+ 30, /* timeout in seconds */
+ UINT_MAX, 2, /* class busy retry / time delta */
+ 10, 100 /* comm fail retry / time delta */
+ },
+
+ {
+ /*
+ * MD_MN_MSG_SM_MDDB_ATTACH
+ */
+ MD_MSG_CLASS3, /* message class */
+ mdmn_do_sm_mddb_attach, /* message handler */
+ NULL, /* submessage generator */
+ 20, /* timeout in seconds */
+ /* creates mddbs */
+ UINT_MAX, 2, /* class busy retry / time delta */
+ 10, 100 /* comm fail retry / time delta */
+ },
+
+ {
+ /*
+ * MD_MN_MSG_META_DB_DETACH
+ */
+ MD_MSG_CLASS3, /* message class */
+ NULL, /* detach mddb */
+ mdmn_smgen_mddb_detach, /* submessage generator */
+ 10, /* timeout in seconds */
+ UINT_MAX, 2, /* class busy retry / time delta */
+ 10, 100 /* comm fail retry / time delta */
+ },
+ {
+
+ /*
+ * MD_MN_MSG_SM_MDDB_DETACH
+ */
+ MD_MSG_CLASS3, /* message class */
+ mdmn_do_sm_mddb_detach, /* detach mddb */
+ NULL, /* submessage generator */
+ 5, /* timeout in seconds */
+ UINT_MAX, 2, /* class busy retry / time delta */
+ 10, 100 /* comm fail retry / time delta */
+ },
+
+ {
+ /*
+ * MD_MN_MSG_META_DB_NEWSIDE
+ */
+ MD_MSG_CLASS3, /* message class */
+ mdmn_do_meta_db_newside, /* add new mddb side info */
+ NULL, /* submessage generator */
+ 10, /* timeout in seconds */
+ UINT_MAX, 2, /* class busy retry / time delta */
+ 10, 100 /* comm fail retry / time delta */
+ },
+
+ {
+ /*
+ * MD_MN_MSG_META_DB_DELSIDE
+ */
+ MD_MSG_CLASS3, /* message class */
+ mdmn_do_meta_db_delside, /* delete mddb side info */
+ NULL, /* submessage generator */
+ 10, /* timeout in seconds */
+ UINT_MAX, 2, /* class busy retry / time delta */
+ 10, 100 /* comm fail retry / time delta */
+ },
+
+ {
+ /*
+ * MD_MN_MSG_META_MD_ADDSIDE
+ */
+ MD_MSG_CLASS3, /* message class */
+ mdmn_do_meta_md_addside, /* add new md side info */
+ NULL, /* submessage generator */
+ 10, /* timeout in seconds */
+ UINT_MAX, 2, /* class busy retry / time delta */
+ 10, 100 /* comm fail retry / time delta */
+ },
+
+ {
+ /*
+ * MD_MN_MSG_META_MD_DELSIDE
+ */
+ MD_MSG_CLASS3, /* message class */
+ mdmn_do_meta_md_delside, /* delete md side info */
+ NULL, /* submessage generator */
+ 10, /* timeout in seconds */
+ UINT_MAX, 2, /* class busy retry / time delta */
+ 10, 100 /* comm fail retry / time delta */
+ },
+
+ {
+ /*
+ * MD_MN_MSG_MDDB_OPTRECERR
+ * Message cannot fail unless node failure causes node panic
+ */
+ MD_MSG_CLASS3, /* message class */
+ mdmn_do_mddb_optrecerr, /* fix opt rec mddb */
+ NULL, /* submessage generator */
+ 3, /* timeout in seconds */
+ UINT_MAX, 2, /* class busy retry / time delta */
+ 10, 100 /* comm fail retry / time delta */
+ },
+
+ {
+ /*
+ * MD_MN_MSG_ABORT
+ */
+ MD_MSG_CLASS0, /* special message class */
+ mdmn_do_dummy, /* dummy handler */
+ NULL, /* submessage generator */
+ 1, /* timeout in seconds */
+ 0, 0, /* No retries for class busy */
+ 0, 0 /* No retries for comm fail */
+ },
+
+ {
+ /*
+ * MD_MN_MSG_STATE_UPDATE_RESWR2
+ * Update the state of a mirror component, called if during the updates
+ * of the watermarks for a softpartition, an IO error on a submirror
+ * occurs. Need to have a class different from CLASS1, otherwise we
+ * deadlock with the command that is currently being processed
+ * (metainit/metaclear/metattach/metarecover)
+ *
+ * And we may actually use a class different than CLASS1 because this
+ * can only happen when a metainit or similar is called, and in that
+ * case all potential metadb or metaset commands are blocked anyway.
+ * Besides the different class it does exactly what
+ * MD_MN_MSG_STATE_UPDATE_RESWR would do
+ */
+ MD_MSG_CLASS3, /* message class */
+ mdmn_do_state_upd_reswr, /* message handler */
+ NULL, /* submessage generator */
+ 8, /* timeout in seconds */
+ UINT_MAX, 10, /* class busy retry / time delta */
+ UINT_MAX, 100 /* comm fail retry / time delta */
+ },
+
+ {
+ /*
+ * MD_MN_MSG_STATE_UPDATE2
+ * Like MD_MN_MSG_STATE_UPDATE only using a different class.
+ * See comment for MD_MN_MSG_STATE_UPDATE_RESWR2
+ */
+ MD_MSG_CLASS3, /* message class */
+ NULL, /* message handler */
+ mdmn_smgen_state_upd, /* submessage generator */
+ 16, /* SUSPEND_WRITES + STATE_UPDATE_RESWR */
+ UINT_MAX, 10, /* class busy retry / time delta */
+ UINT_MAX, 100 /* comm fail retry / time delta */
+ },
+
+ {
+ /*
+ * MD_MN_MSG_ALLOCATE_HOTSPARE2
+ * Like MD_MN_MSG_ALLOCATE_HOTSPARE only using a different class.
+ * See comment for MD_MN_MSG_STATE_UPDATE_RESWR2
+ */
+ MD_MSG_CLASS3, /* message class */
+ mdmn_do_allocate_hotspare, /* message handler */
+ NULL, /* submessage generator */
+ 8, /* timeout in seconds */
+ UINT_MAX, 10, /* class busy retry / time delta */
+ UINT_MAX, 100 /* comm fail retry / time delta */
+ },
+
+ {
+ /*
+ * MD_MN_MSG_IOCSET
+ * Send IOCSET ioctl to create a soft part
+ */
+ MD_MSG_CLASS1, /* message class */
+ mdmn_do_iocset, /* create softpart */
+ NULL, /* submessage generator */
+ 90, /* times out in 90 secs */
+ 10000, 2, /* class busy retry / time delta */
+ 10, 1000 /* comm fail retry / time delta */
+ },
+
+ {
+ /*
+ * MD_MN_MSG_SP_SETSTAT
+ * Update the status of a softpart
+ */
+ MD_MSG_CLASS1, /* message class */
+ mdmn_do_sp_setstat, /* create softpart */
+ NULL, /* submessage generator */
+ 90, /* times out in 90 secs */
+ 10000, 2, /* class busy retry / time delta */
+ 10, 1000 /* comm fail retry / time delta */
+ },
+
+ {
+ /*
+ * MD_MN_MSG_ADDKEYNAME
+ * Add a key to the namespace
+ */
+ MD_MSG_CLASS1, /* message class */
+ mdmn_do_addkeyname, /* add key */
+ NULL, /* submessage generator */
+ 90, /* times out in 90 secs */
+ 10000, 2, /* class busy retry / time delta */
+ 10, 1000 /* comm fail retry / time delta */
+ },
+
+ {
+ /*
+ * MD_MN_MSG_SP_DELKEYNAME
+ * Remove a key from the namespace
+ */
+ MD_MSG_CLASS1, /* message class */
+ mdmn_do_delkeyname, /* delete key */
+ NULL, /* submessage generator */
+ 90, /* times out in 90 secs */
+ 10000, 2, /* class busy retry / time delta */
+ 10, 1000 /* comm fail retry / time delta */
+ },
+
+ {
+ /*
+ * MD_MN_MSG_GET_TSTATE
+ * Get ui_tstate for a metadevice from the master. Used to get ABR
+ * state from the master node.
+ */
+ MD_MSG_CLASS2, /* message class */
+ mdmn_do_get_tstate, /* get tstate */
+ NULL, /* submessage generator */
+ 5, /* times out in 5 secs */
+ UINT_MAX, 10, /* class busy retry / time delta */
+ UINT_MAX, 100 /* comm fail retry / time delta */
+ },
+
+ {
+ /*
+ * MD_MN_MSG_GET_MIRROR_STATE
+ * Get submirror state for specified submirror from master node.
+ * Used to synchronise initial resync state across a cluster.
+ */
+ MD_MSG_CLASS1, /* message class */
+ mdmn_do_get_mirstate, /* get smstate */
+ NULL, /* submessage generator */
+ 5, /* times out in 5 secs */
+ UINT_MAX, 10, /* class busy retry / time delta */
+ UINT_MAX, 100 /* comm fail retry / time delta */
+ },
+
+ {
+ /*
+ * MD_MN_MSG_SP_SETSTAT2
+ * Update the status of a softpart. Used for propagating an error from
+ * the soft-part sp_error() routine
+ */
+ MD_MSG_CLASS4, /* message class */
+ mdmn_do_sp_setstat, /* update softpart state */
+ NULL, /* submessage generator */
+ 90, /* times out in 90 secs */
+ 10000, 2, /* class busy retry / time delta */
+ 10, 1000 /* comm fail retry / time delta */
+ },
+
+ {
+ /*
+ * MD_MN_MSG_SETSYNC
+ * Start a resync thread for the specified mirror
+ */
+ MD_MSG_CLASS1, /* message class */
+ mdmn_do_setsync, /* message handler */
+ NULL, /* submessage generator */
+ 90, /* timeout in seconds */
+ 10000, 2, /* class busy retry / time delta */
+ 10, 1000 /* comm fail retry / time delta */
+ },
+
+ {
+ /*
+ * MD_MN_MSG_POKE_HOTSPARES
+ * Call poke_hotspares()
+ */
+ MD_MSG_CLASS1, /* message class */
+ mdmn_do_poke_hotspares, /* message handler */
+ NULL, /* submessage generator */
+ 8, /* timeout in seconds */
+ UINT_MAX, 10, /* class busy retry / time delta */
+ UINT_MAX, 100 /* comm fail retry / time delta */
+ },
+
+};
diff --git a/usr/src/lib/lvm/libmeta/common/meta_mn_subr.c b/usr/src/lib/lvm/libmeta/common/meta_mn_subr.c
new file mode 100644
index 0000000000..582b7d293e
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_mn_subr.c
@@ -0,0 +1,922 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * Just in case we're not in a build environment, make sure that
+ * TEXT_DOMAIN gets set to something.
+ */
+#if !defined(TEXT_DOMAIN)
+#define TEXT_DOMAIN "SYS_TEST"
+#endif
+
+#include <meta.h>
+#include <sdssc.h>
+#include <arpa/inet.h>
+#include <sys/lvm/md_mddb.h>
+
+#define MAX_LINE_SIZE 1024
+
+/*
+ * Maximum amount of time to spend waiting for an ownership change to complete.
+ */
+static const int OWNER_TIMEOUT = 3;
+
+/*
+ * FUNCTION: meta_is_mn_set()
+ * INPUT: sp - the set name
+ * OUTPUT: ep - return error pointer
+ * RETURNS: int - 1 if MultiNode set else 0
+ * PURPOSE: checks if the set is a MultiNode set
+ */
+int
+meta_is_mn_set(
+ mdsetname_t *sp,
+ md_error_t *ep
+)
+{
+ md_set_desc *sd;
+
+ /* Local set cannot be MultiNode */
+ if ((sp == NULL) || (sp->setname == NULL) ||
+ (strcmp(sp->setname, MD_LOCAL_NAME) == 0))
+ return (0);
+ sd = metaget_setdesc(sp, ep);
+ ASSERT(sd != NULL);
+ if (sd->sd_flags & MD_SR_MN)
+ return (1);
+ return (0);
+}
+
+/*
+ * FUNCTION: meta_is_mn_name()
+ * INPUT: spp - ptr to the set name, if NULL the setname is derived
+ * from the metadevice name (eg set/d10 )
+ * name - the metadevice name
+ * OUTPUT: ep - return error pointer
+ * RETURNS: int - 1 if MultiNode set else 0
+ * PURPOSE: checks if the metadevice is in a MultiNode set
+ */
+int
+meta_is_mn_name(
+ mdsetname_t **spp,
+ char *name,
+ md_error_t *ep
+)
+{
+ md_error_t t_e = mdnullerror;
+ char *cname;
+
+ if (*spp == NULL) {
+ if (is_hspname(name)) {
+ if (metahspname(spp, name, ep) == NULL)
+ return (0);
+ } else if (is_metaname(name)) {
+ /* Will fill in *spp based on name */
+ if ((cname = meta_name_getname(spp, name, &t_e))
+ != NULL)
+ Free(cname);
+ if (! mdisok(&t_e)) {
+ (void) mdstealerror(ep, &t_e);
+ return (0);
+ }
+ } else return (0);
+ }
+
+ if ((strcmp((*spp)->setname, MD_LOCAL_NAME) != 0) &&
+ (metaget_setdesc(*spp, ep) != NULL) &&
+ ((*spp)->setdesc->sd_flags & MD_SR_MN)) {
+ return (1);
+ }
+ return (0);
+}
+
+/*
+ * meta_ping_mnset(set_t setno)
+ * Send a test message for this set in order to make commd do some init stuff
+ * Don't bother changelog.
+ * If set is suspended, fail immediately.
+ */
+void
+meta_ping_mnset(set_t setno)
+{
+ char *data = "test";
+ md_error_t mde = mdnullerror;
+ md_mn_result_t *resp = NULL;
+
+ (void) mdmn_send_message(setno, MD_MN_MSG_TEST2,
+ MD_MSGF_NO_LOG | MD_MSGF_FAIL_ON_SUSPEND, data,
+ sizeof (data), &resp, &mde);
+
+ if (resp != (md_mn_result_t *)NULL) {
+ free_result(resp);
+ }
+}
+
+/*
+ *
+ * FUNCTION: print_stderr
+ * INPUT: errstr - the error message returned by the command
+ * context - the context string from metainit -a
+ * PURPOSE: called from meta_mn_send_command to print the error message
+ * to stderr. When context is NO_CONTEXT_STRING, the errstr string
+ * is output unchanged. When context is a string, it is the context
+ * string for the metainit -a command and in this case the errstr
+ * string has to be parsed to extract the command and node name
+ * and to send a message to stderr in the format
+ * command: node: context: error message
+ */
+static void
+print_stderr(
+ char *errstr,
+ char *context
+)
+{
+ char *command;
+ char *node;
+ char *message;
+ int length = strlen(errstr + 1);
+
+ if (context == NO_CONTEXT_STRING) {
+ (void) fprintf(stderr, "%s", errstr);
+ } else {
+ command = Malloc(length);
+ node = Malloc(length);
+ message = Malloc(length);
+ if (sscanf(errstr, "%[^:]: %[^:]: %[^\n]", command, node,
+ message) == 3) {
+ (void) fprintf(stderr, "%s: %s: %s: %s\n", command,
+ node, context, message);
+ } else {
+ (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
+ "%s: Invalid format error message"), errstr);
+ }
+ Free(command);
+ Free(node);
+ Free(message);
+ }
+}
+
+/*
+ * FUNCTION: meta_mn_send_command()
+ * INPUT: sp - the set name
+ * argc - number of arguments
+ * argv - arg list
+ * flags - some controlling flags
+ * initall_context - context string for metainit -a
+ * OUTPUT: ep - return error pointer
+ * RETURNS: return exitval from mdmn_send_message
+ * PURPOSE: sends the command to the master node for execution
+ */
+int
+meta_mn_send_command(
+ mdsetname_t *sp,
+ int argc,
+ char *argv[],
+ int flags,
+ char *initall_context,
+ md_error_t *ep
+)
+{
+ int a;
+ int err;
+ int retval;
+ int send_message_flags = MD_MSGF_DEFAULT_FLAGS;
+ int send_message_type;
+ char *cmd;
+ md_mn_result_t *resp = NULL;
+
+ cmd = Malloc(1024);
+ (void) strlcpy(cmd, argv[0], 1024);
+ for (a = 1; a < argc; a++) {
+ /* don't copy empty arguments */
+ if (*argv[a] == '\0') {
+ continue;
+ }
+ (void) strcat(cmd, " ");
+ (void) strcat(cmd, argv[a]);
+ }
+ /*
+ * in dryrun mode stop on the first error
+ * use the CMD_RETRY message type if RETRY_BUSY flag set
+ */
+ if (flags & MD_DRYRUN)
+ send_message_flags |= MD_MSGF_STOP_ON_ERROR;
+ if (flags & MD_NOLOG)
+ send_message_flags |= MD_MSGF_NO_LOG;
+ if (flags & MD_PANIC_WHEN_INCONSISTENT)
+ send_message_flags |= MD_MSGF_PANIC_WHEN_INCONSISTENT;
+ if (flags & MD_RETRY_BUSY) {
+ send_message_type = MD_MN_MSG_BC_CMD_RETRY;
+ } else {
+ send_message_type = MD_MN_MSG_BC_CMD;
+ }
+ err = mdmn_send_message(
+ sp->setno, send_message_type, send_message_flags,
+ cmd, 1024, &resp, ep);
+
+ free(cmd);
+
+ if (err == 0) {
+ /*
+ * stderr may be turned off by IGNORE_STDERR
+ * In dryrun we only print stderr if the exit_val is non-zero
+ */
+ if ((resp->mmr_err_size != 0) &&
+ ((flags & MD_IGNORE_STDERR) == 0)) {
+ if (((flags & MD_DRYRUN) == 0) ||
+ (resp->mmr_exitval != 0)) {
+ print_stderr(resp->mmr_err, initall_context);
+ }
+ }
+
+ /*
+ * If dryrun is set, we don't display stdout,
+ * because the real run has yet to follow.
+ */
+ if (((flags & MD_DRYRUN) == 0) && (resp->mmr_out_size != 0)) {
+ (void) printf("%s", resp->mmr_out);
+ }
+ retval = resp->mmr_exitval;
+ free_result(resp);
+ return (retval);
+ }
+ if (resp != NULL) {
+ if (resp->mmr_comm_state == MDMNE_CLASS_BUSY) {
+ (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
+ "rpc.mdcommd currently busy. "
+ "Retry operation later.\n"));
+ } else if (resp->mmr_comm_state == MDMNE_NOT_JOINED) {
+ (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
+ "Node %s must join the %s multi-owner diskset to "
+ "issue commands.\n"
+ "To join, use: metaset -s %s -j\n"),
+ mynode(), sp->setname, sp->setname);
+ } else if (resp->mmr_comm_state == MDMNE_LOG_FAIL) {
+ mddb_config_t c;
+
+ (void) memset(&c, 0, sizeof (c));
+ c.c_setno = sp->setno;
+ (void) metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL);
+ (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
+ "Command not attempted: Unable to log message "
+ "in set %s\n"), sp->setname);
+ if (c.c_flags & MDDB_C_STALE) {
+ (void) mdmddberror(ep, MDE_DB_STALE, NODEV64,
+ sp->setno, 0, NULL);
+ mde_perror(ep, "");
+ }
+ } else {
+ (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
+ "Command failed: Commd State %d "
+ "encountered.\n"), resp->mmr_comm_state);
+ }
+ free_result(resp);
+ } else {
+ (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
+ "Command failed: mdmn_send_message returned %d.\n"),
+ err);
+ }
+
+
+ return (1);
+}
+
+/*
+ * FUNCTION: meta_mn_send_suspend_writes()
+ * INPUT: mnum - minor num of mirror
+ * OUTPUT: ep - return error pointer
+ * RETURNS: return value from mdmn_send_message()
+ * PURPOSE: sends message to all nodes to suspend writes to the mirror.
+ */
+int
+meta_mn_send_suspend_writes(
+ minor_t mnum,
+ md_error_t *ep
+)
+{
+ int result;
+ md_mn_msg_suspwr_t suspwrmsg;
+ md_mn_result_t *resp = NULL;
+
+ suspwrmsg.msg_suspwr_mnum = mnum;
+ /*
+ * This message is never directly issued.
+ * So we launch it with a suspend override flag.
+ * If the commd is suspended, and this message comes
+ * along it must be sent due to replaying a command or similar.
+ * In that case we don't want this message to be blocked.
+ * If the commd is not suspended, the flag does no harm.
+ */
+ result = mdmn_send_message(MD_MIN2SET(mnum),
+ MD_MN_MSG_SUSPEND_WRITES,
+ MD_MSGF_NO_LOG | MD_MSGF_OVERRIDE_SUSPEND,
+ (char *)&suspwrmsg, sizeof (suspwrmsg), &resp, ep);
+ if (resp != NULL) {
+ free_result(resp);
+ }
+ return (result);
+}
+
+/*
+ * Parse the multi-node list file
+ *
+ * Return Values: Zero - Success
+ * Non Zero - Failure
+ *
+ * File content: The content of the nodelist file should consist of
+ * triplets of nodeid, nodename and private interconnect
+ * address seperated by one or more white space.
+ * e.g.
+ * 1 node_a 192.168.111.3
+ * 2 node_b 192.168.111.5
+ *
+ * Any missing fields will result in an error.
+ */
+int
+meta_read_nodelist(
+ int *nodecnt,
+ mndiskset_membershiplist_t **nl,
+ md_error_t *ep
+)
+{
+ FILE *fp = NULL;
+ char line[MAX_LINE_SIZE];
+ char *buf;
+ uint_t i;
+ int sz;
+ mndiskset_membershiplist_t **tailp = nl;
+
+ /* open file */
+ if ((fp = fopen(META_MNSET_NODELIST, "r")) == NULL) {
+ mndiskset_membershiplist_t *nlp;
+ struct hostent *hp;
+
+ /* return this node with id of 1 */
+ nlp = *tailp = Zalloc(sizeof (*nlp));
+ tailp = &nlp->next;
+
+ *nodecnt = 1;
+ nlp->msl_node_id = 1;
+ buf = mynode();
+ sz = min(strlen(buf), sizeof (nlp->msl_node_name) - 1);
+ (void) strncpy(nlp->msl_node_name, buf, sz);
+ nlp->msl_node_name[sz] = '\0';
+
+ /* retrieve info about our host */
+ if ((hp = gethostbyname(buf)) == NULL) {
+ return (mdsyserror(ep, EADDRNOTAVAIL, buf));
+ }
+ /* We only do IPv4 addresses, for now */
+ if (hp->h_addrtype != AF_INET) {
+ return (mdsyserror(ep, EPFNOSUPPORT, buf));
+ }
+ /* We take the first address only */
+ if (*hp->h_addr_list) {
+ struct in_addr in;
+
+ (void) memcpy(&in.s_addr, *hp->h_addr_list,
+ sizeof (struct in_addr));
+ (void) strncpy(nlp->msl_node_addr, inet_ntoa(in),
+ MD_MAX_NODENAME);
+ } else {
+ return (mdsyserror(ep, EADDRNOTAVAIL, buf));
+ }
+
+ return (0);
+ }
+
+ *nl = NULL;
+ *nodecnt = 0;
+
+ while ((fp != NULL) && ((buf = fgets(line, sizeof (line) - 1, fp)) !=
+ NULL)) {
+ mndiskset_membershiplist_t *nlp;
+
+ /* skip leading spaces */
+ while ((*buf != '\0') && (i = strcspn(buf, " \t\n")) == 0)
+ buf++;
+
+ /* skip comments and blank lines */
+ if (*buf == '\0' || *buf == '#')
+ continue;
+
+ /* allocate memory and set tail pointer */
+ nlp = *tailp = Zalloc(sizeof (*nlp));
+ tailp = &nlp->next;
+
+ /* parse node id */
+ nlp->msl_node_id = strtoul(buf, NULL, 0);
+ buf += i;
+
+ /* skip leading spaces */
+ while ((*buf != '\0') && (i = strcspn(buf, " \t\n")) == 0)
+ buf++;
+
+ /* fields missing, return error */
+ if (*buf == '\0' || *buf == '#') {
+ meta_free_nodelist(*nl);
+ *nl = NULL;
+ *nodecnt = 0;
+
+ /* close file and return */
+ if ((fp) && (fclose(fp) != 0))
+ return (mdsyserror(ep, errno,
+ META_MNSET_NODELIST));
+
+ return (mdsyserror(ep, EINVAL, META_MNSET_NODELIST));
+ }
+
+ /* parse node name */
+ sz = min(i, sizeof (nlp->msl_node_name) - 1);
+ (void) strncpy(nlp->msl_node_name, buf, sz);
+ nlp->msl_node_name[sz] = '\0';
+ buf += i;
+
+ /* skip leading spaces */
+ while ((*buf != '\0') && (i = strcspn(buf, " \t\n")) == 0)
+ buf++;
+
+ /* fields missing, return error */
+ if (*buf == '\0' || *buf == '#') {
+ meta_free_nodelist(*nl);
+ *nl = NULL;
+ *nodecnt = 0;
+
+ /* close file and return */
+ if ((fp) && (fclose(fp) != 0))
+ return (mdsyserror(ep, errno,
+ META_MNSET_NODELIST));
+
+ return (mdsyserror(ep, EINVAL, META_MNSET_NODELIST));
+ }
+
+ /* parse node address */
+ sz = min(i, sizeof (nlp->msl_node_addr) - 1);
+ (void) strncpy(nlp->msl_node_addr, buf, sz);
+ nlp->msl_node_addr[sz] = '\0';
+
+ ++*nodecnt;
+ }
+
+ /* close file */
+ if ((fp) && (fclose(fp) != 0))
+ return (mdsyserror(ep, errno, META_MNSET_NODELIST));
+
+ return (0);
+}
+
+/*
+ * Populate the multi-node list file from a given list of node id's
+ * The nids must have only one node id in each cell. Range of node
+ * id's in the form 1-n are not allowed.
+ *
+ * Return Values: Zero - Success
+ * Non Zero - Failure
+ */
+int
+meta_write_nodelist(
+ int nodecnt,
+ char **nids,
+ md_error_t *ep
+)
+{
+ FILE *fp = NULL;
+ char name[MAX_LINE_SIZE], addr[MAX_LINE_SIZE];
+ uint_t i, nid;
+ struct in_addr ipaddr;
+ int err = 0;
+
+ /* check if we are running on clustering */
+ if ((err = sdssc_bind_library()) != SDSSC_OKAY) {
+ return (mdsyserror(ep, err, META_MNSET_NODELIST));
+ }
+
+ /* open file for writing */
+ if ((fp = fopen(META_MNSET_NODELIST, "w")) == NULL) {
+ return (mdsyserror(ep, errno, META_MNSET_NODELIST));
+ }
+
+ for (i = 0; i < nodecnt; i++) {
+ /* extract the node id */
+ errno = 0;
+ nid = strtoul(nids[i], NULL, 0);
+ if (errno != 0) {
+ if ((fp) && (fclose(fp) != 0))
+ return (mdsyserror(ep, errno,
+ META_MNSET_NODELIST));
+
+ return (mdsyserror(ep, EINVAL, META_MNSET_NODELIST));
+ }
+
+ /* get node name */
+ (void) snprintf(name, sizeof (name), "%d", nid);
+ sdssc_cm_nid2nm(name);
+
+ /* finally get the private ip address */
+ (void) snprintf(addr, sizeof (addr), "%s", name);
+ if (sdssc_get_priv_ipaddr(addr, &ipaddr) != SDSSC_OKAY) {
+ if ((fp) && (fclose(fp) != 0))
+ return (mdsyserror(ep, errno,
+ META_MNSET_NODELIST));
+
+ return (mdsyserror(ep, EINVAL, META_MNSET_NODELIST));
+ }
+
+ (void) fprintf(fp, "%d\t%s\t%s\n", nid, name,
+ inet_ntoa(ipaddr));
+ }
+
+ /* close file */
+ if ((fp) && (fclose(fp) != 0))
+ return (mdsyserror(ep, errno, META_MNSET_NODELIST));
+
+ return (0);
+}
+
+/*
+ * Free node list
+ */
+void
+meta_free_nodelist(
+ mndiskset_membershiplist_t *nl
+)
+{
+ mndiskset_membershiplist_t *next = NULL;
+
+ for (/* void */; (nl != NULL); nl = next) {
+ next = nl->next;
+ Free(nl);
+ }
+}
+
+/*
+ * FUNCTION: meta_mn_send_setsync()
+ * INPUT: sp - setname
+ * mirnp - mirror name
+ * size - buffer size, 0 if none
+ * OUTPUT: ep - return error pointer
+ * RETURNS: return value from meta_mn_send_command()
+ * PURPOSE: Send a setsync command to all nodes to set resync status
+ */
+
+int
+meta_mn_send_setsync(
+ mdsetname_t *sp,
+ mdname_t *mirnp,
+ daddr_t size,
+ md_error_t *ep
+)
+{
+ md_mn_msg_setsync_t setsyncmsg;
+ int ret;
+ md_mn_result_t *resp = NULL;
+
+ setsyncmsg.setsync_mnum = meta_getminor(mirnp->dev);
+ setsyncmsg.setsync_copysize = size;
+ setsyncmsg.setsync_flags = 0;
+
+ /*
+ * We do not log the metasync command as it will have no effect on the
+ * underlying metadb state. If we have a master change the
+ * reconfiguration process will issue a new 'metasync' to all affected
+ * mirrors, so we would actually end up sending the message twice.
+ * Removing the logging of the message helps reduce the processing
+ * time required.
+ */
+ ret = mdmn_send_message(sp->setno, MD_MN_MSG_SETSYNC,
+ MD_MSGF_NO_LOG | MD_MSGF_OVERRIDE_SUSPEND,
+ (char *)&setsyncmsg, sizeof (setsyncmsg), &resp, ep);
+ if (resp != NULL) {
+ free_result(resp);
+ }
+
+ /*
+ * Unlike non-MN sets, the metasync command does not actually
+ * start a resync, it simply updates the state on all of the
+ * nodes. Therefore, to start a resync we send a resync starting
+ * message for the metadevice
+ */
+ if (ret == 0)
+ ret = meta_mn_send_resync_starting(mirnp, ep);
+ return (ret);
+}
+
+/*
+ * FUNCTION: meta_mn_send_metaclear_command()
+ * INPUT: sp - setname
+ * name - metadevice name
+ * options - command options
+ * pflag - clear all soft partitions for a given device
+ * OUTPUT: ep - return error pointer
+ * RETURNS: return value from meta_mn_send_command()
+ * PURPOSE: Send a metaclear command to all nodes with force(-f) and
+ * recurse(-r) options set if required. For hotspare pool and
+ * metadevices, the metadevice name is of the form setname/dxx or
+ * setname/hspxxx so a '-s' argument isn't required. If pflag is set
+ * the name refers to a metadevice or component and in the is case
+ * a '-s' argument is required to define the set.
+ */
+
+int
+meta_mn_send_metaclear_command(
+ mdsetname_t *sp,
+ char *name,
+ mdcmdopts_t options,
+ int pflag,
+ md_error_t *ep
+)
+{
+ int newargc;
+ char **newargv;
+ int ret;
+
+ /*
+ * Allocate an array large enough to hold all of the possible
+ * metaclear arguments
+ */
+ newargv = Calloc(7, sizeof (char *));
+ newargv[0] = "metaclear";
+ newargc = 1;
+ if (pflag) {
+ newargv[newargc] = "-s";
+ newargc++;
+ newargv[newargc] = sp->setname;
+ newargc++;
+ }
+ if (options & MDCMD_FORCE) {
+ newargv[newargc] = "-f";
+ newargc++;
+ }
+ if (options & MDCMD_RECURSE) {
+ newargv[newargc] = "-r";
+ newargc++;
+ }
+ if (pflag) {
+ newargv[newargc] = "-p";
+ newargc++;
+ }
+ newargv[newargc] = name;
+ newargc++;
+
+ ret = meta_mn_send_command(sp, newargc, newargv,
+ MD_DISP_STDERR, NO_CONTEXT_STRING, ep);
+
+ free(newargv);
+ return (ret);
+}
+
+/*
+ * FUNCTION: meta_mn_send_resync_starting()
+ * INPUT: sp - setname
+ * mirnp - mirror name
+ * OUTPUT: ep - return error pointer
+ * RETURNS: return value from mdmn_send_message()
+ * PURPOSE: Send a resync starting message to all nodes.
+ */
+
+int
+meta_mn_send_resync_starting(
+ mdname_t *mirnp,
+ md_error_t *ep
+)
+{
+ int result;
+ md_mn_msg_resync_t resyncmsg;
+ md_mn_result_t *resp = NULL;
+ minor_t mnum = meta_getminor(mirnp->dev);
+
+ /*
+ * This message is never directly issued.
+ * So we launch it with a suspend override flag.
+ * If the commd is suspended, and this message comes
+ * along it must be sent due to replaying a command or similar.
+ * In that case we don't want this message to be blocked.
+ * If the commd is not suspended, the flag does no harm.
+ */
+ resyncmsg.msg_resync_mnum = mnum;
+ result = mdmn_send_message(MD_MIN2SET(mnum),
+ MD_MN_MSG_RESYNC_STARTING,
+ MD_MSGF_NO_LOG | MD_MSGF_OVERRIDE_SUSPEND,
+ (char *)&resyncmsg, sizeof (resyncmsg), &resp, ep);
+
+ if (resp != NULL) {
+ free_result(resp);
+ }
+ return (result);
+}
+
+/*
+ * FUNCTION: meta_mn_change_owner()
+ * INPUT: opp - pointer to parameter block
+ * setno - set number of mirror metadevice
+ * mnum - minor number of mirror metadevice
+ * owner - node ID of mirror owner
+ * flags - flag field for ioctl
+ * OUTPUT: opp - parameter block used to send ioctl
+ * RETURNS: int - 0 success, -1 error
+ * PURPOSE: issue an ioctl to change the ownership of the specified mirror
+ * to our node ID. We need to be the owner before any watermarks
+ * are committed to the device otherwise we'll enter a deadly
+ * embrace when attempting to write the watermark.
+ * This function can also be used so set the owner on a node to
+ * NULL. In this case the change is only made on the local node.
+ * In addition by setting the MD_MN_MM_CHOOSE_OWNER flag, the
+ * function can also be used to choose a mirror resync owner. This
+ * function should only be called on the master and it will
+ * select the owner and request it to become the owner.
+ */
+int
+meta_mn_change_owner(
+ md_set_mmown_params_t **opp, /* Returned parameter block */
+ set_t setno, /* Mirror set number */
+ uint_t mnum, /* Minor number */
+ uint_t owner, /* Node ID of mirror owner */
+ uint_t flags /* Flags */
+)
+{
+ md_set_mmown_params_t *ownpar = *opp;
+ md_mn_own_status_t *ownstat = NULL;
+ struct timeval tvs, tve;
+ int n = 0;
+ int rval;
+
+ if (ownpar != NULL) {
+ (void) memset(ownpar, 0, sizeof (*ownpar));
+ } else {
+ ownpar = Zalloc(sizeof (*ownpar));
+ }
+ ownstat = Zalloc(sizeof (*ownstat));
+
+ ownpar->d.mnum = mnum;
+ ownpar->d.owner = owner;
+ ownpar->d.flags = flags;
+ MD_SETDRIVERNAME(ownpar, MD_MIRROR, setno);
+ MD_SETDRIVERNAME(ownstat, MD_MIRROR, setno);
+
+ /*
+ * Attempt to change the ownership to the specified node. We retry this
+ * up to 10 times if we receive EAGAIN from the metadevice. This only
+ * happens if the underlying metadevice is busy with outstanding i/o
+ * that requires ownership change.
+ */
+ while ((rval = metaioctl(MD_MN_SET_MM_OWNER, ownpar, &ownpar->mde,
+ NULL)) != 0) {
+ md_sys_error_t *ip =
+ &ownpar->mde.info.md_error_info_t_u.sys_error;
+ if (ip->errnum != EAGAIN)
+ break;
+ if (n++ >= 10)
+ break;
+ (void) sleep(1);
+ }
+
+ /*
+ * There is no need to wait for the ioctl completion if we are setting
+ * the owner to NULL or requesting the master to choose the owner
+ */
+ if ((owner == 0) || (flags & MD_MN_MM_CHOOSE_OWNER)) {
+ Free(ownstat);
+ *opp = ownpar;
+ return (0);
+ }
+
+ /*
+ * Wait for ioctl completion or a timeout to occur. If we
+ * timeout we fail the i/o request.
+ */
+ ownstat->mnum = ownpar->d.mnum;
+ (void) gettimeofday(&tvs, NULL);
+
+ while ((rval == 0) && !(ownstat->flags & MD_MN_MM_RESULT)) {
+ while ((rval = metaioctl(MD_MN_MM_OWNER_STATUS, ownstat,
+ &ownstat->mde, NULL)) != 0) {
+ (void) gettimeofday(&tve, NULL);
+ if ((tve.tv_sec - tvs.tv_sec) > OWNER_TIMEOUT) {
+ rval = -1;
+ break;
+ }
+ (void) sleep(1);
+ }
+ }
+
+ /* we did not not timeout but ioctl failed set rval */
+
+ if (rval == 0) {
+ rval = (ownstat->flags & MD_MN_MM_RES_FAIL) ? -1 : 0;
+ }
+
+ Free(ownstat);
+ *opp = ownpar;
+ return (rval);
+}
+/*
+ * special handling is required when running on a single node
+ * non-SC3.x environment. This function determines tests
+ * for that case.
+ *
+ * Return values:
+ * 0 - no nodes or joined or in a SC3.x env
+ * 1 - 1 node and not in SC3.x env
+ */
+
+int
+meta_mn_singlenode()
+{
+ md_error_t xep = mdnullerror;
+ int nodecnt;
+ int mnset_single_node = 0;
+ mndiskset_membershiplist_t *nl;
+
+ /*
+ * If running on SunCluster, then don't validate MN sets,
+ * this is done during a reconfig cycle since all nodes must
+ * take the same action.
+ *
+ * Only cleanup in case of a single node situation
+ * when not running on SunCluster. This single node
+ * situation occurs when the nodelist only contains
+ * this node and the MN setrecords only contain this
+ * node.
+ */
+ if (meta_read_nodelist(&nodecnt, &nl, &xep) == -1) {
+ nodecnt = 0; /* no nodes are alive */
+ nl = NULL;
+ mdclrerror(&xep);
+ } else {
+ /*
+ * If only 1 node in nodelist and not running
+ * on SunCluster, set single_node flag.
+ */
+ if ((nodecnt == 1) &&
+ (strcmp(nl->msl_node_name, mynode()) == 0) &&
+ ((sdssc_bind_library()) != SDSSC_OKAY)) {
+ mnset_single_node = 1;
+ }
+ meta_free_nodelist(nl);
+ }
+ return (mnset_single_node);
+}
+
+/*
+ * FUNCTION: meta_mn_send_get_tstate()
+ * INPUT: dev - dev_t of device
+ * OUTPUT: tstatep - tstate value
+ * ep - return error pointer
+ * RETURNS: return value from mdmn_send_message()
+ * PURPOSE: Send a message to the master to get ui_tstate for a given device.
+ */
+
+int
+meta_mn_send_get_tstate(
+ md_dev64_t dev,
+ uint_t *tstatep,
+ md_error_t *ep
+)
+{
+ int result;
+ md_mn_msg_gettstate_t tstatemsg;
+ md_mn_result_t *resp = NULL;
+ minor_t mnum = meta_getminor(dev);
+
+ tstatemsg.gettstate_dev = dev;
+ result = mdmn_send_message(MD_MIN2SET(mnum),
+ MD_MN_MSG_GET_TSTATE,
+ MD_MSGF_NO_LOG | MD_MSGF_NO_BCAST,
+ (char *)&tstatemsg, sizeof (tstatemsg), &resp, ep);
+
+ if (result == 0)
+ *tstatep = resp->mmr_exitval;
+ else
+ /* If some error occurred set tstate to 0 */
+ *tstatep = 0;
+
+ if (resp != NULL) {
+ free_result(resp);
+ }
+ return (result);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_mount.c b/usr/src/lib/lvm/libmeta/common/meta_mount.c
new file mode 100644
index 0000000000..6d9cf39b4b
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_mount.c
@@ -0,0 +1,97 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2003 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * return mount association with meta device
+ */
+
+#include <meta.h>
+
+#include <sys/mnttab.h>
+
+#include "meta_lib_prv.h"
+
+/*
+ * return associated mount point with this mdname_t
+ */
+char *
+meta_get_mountp(
+ mdsetname_t *sp,
+ mdname_t *np,
+ md_error_t *ep
+)
+{
+ FILE *mfp;
+ struct mnttab m;
+ char *mountp = NULL;
+ char mnt_mountp[MNT_LINE_MAX];
+ char mnt_special[MNT_LINE_MAX];
+
+ /* should have a set */
+ assert(sp != NULL);
+
+ /* look in mnttab */
+ if ((mfp = open_mnttab()) == NULL) {
+ (void) mdsyserror(ep, errno, MNTTAB);
+ return (NULL);
+ }
+
+ while ((!mountp) && (getmntent(mfp, &m) == 0)) {
+ mdname_t *mnp;
+
+ if ((m.mnt_special == NULL) || (m.mnt_mountp == NULL))
+ continue;
+
+ if (m.mnt_mountp[0] != '/')
+ continue;
+
+ if ((strcmp(m.mnt_fstype, "nfs") == 0) ||
+ (strcmp(m.mnt_fstype, "autofs") == 0) ||
+ (strcmp(m.mnt_fstype, "proc") == 0) ||
+ (strcmp(m.mnt_fstype, "tmpfs") == 0) ||
+ (strcmp(m.mnt_fstype, "cachefs") == 0) ||
+ (strcmp(m.mnt_fstype, "lofs") == 0) ||
+ (strcmp(m.mnt_fstype, "rfs") == 0) ||
+ (strcmp(m.mnt_fstype, "fd") == 0))
+ continue;
+
+ (void) strcpy(mnt_mountp, m.mnt_mountp);
+ (void) strcpy(mnt_special, m.mnt_special);
+ if ((mnp = metaname(&sp, mnt_special, ep)) == NULL) {
+ mdclrerror(ep);
+ continue;
+ }
+
+ if (np->dev == mnp->dev) {
+ mountp = mnt_mountp;
+ }
+ }
+
+ /* return success, if found */
+ return (mountp? Strdup(mountp): NULL);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_name.c b/usr/src/lib/lvm/libmeta/common/meta_name.c
new file mode 100644
index 0000000000..7becd6af2f
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_name.c
@@ -0,0 +1,3289 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <meta.h>
+#include <metad.h>
+
+#include <ctype.h>
+#include <string.h>
+
+/*
+ * Just in case we're not in a build environment, make sure that
+ * TEXT_DOMAIN gets set to something.
+ */
+#if !defined(TEXT_DOMAIN)
+#define TEXT_DOMAIN "SYS_TEST"
+#endif
+
+/*
+ * Macros to produce a quoted string containing the value of a
+ * preprocessor macro. For example, if SIZE is defined to be 256,
+ * VAL2STR(SIZE) is "256". This is used to construct format
+ * strings for scanf-family functions below.
+ */
+#define QUOTE(x) #x
+#define VAL2STR(x) QUOTE(x)
+
+extern char *getfullblkname();
+extern char *getfullrawname();
+
+/*
+ * caches
+ */
+static mdsetnamelist_t *setlistp = NULL;
+static mddrivenamelist_t *drivelistp = NULL;
+static mdnamelist_t *fastnmlp = NULL;
+static mdhspnamelist_t *hsplistp = NULL;
+
+/*
+ * leak proof name conversion
+ */
+static char *
+rawname(
+ char *uname
+)
+{
+ char *p;
+ struct stat sbuf1, sbuf2;
+
+ if ((p = getfullrawname(uname)) == NULL) {
+ return (NULL);
+ } else if (*p == '\0') {
+ Free(p);
+ return (NULL);
+ } else {
+ if (stat(uname, &sbuf1) != 0) {
+ (void) printf(dgettext(TEXT_DOMAIN,
+ "device to mount in /etc/vfstab is "
+ "invalid for device %s\n"), uname);
+ exit(1);
+ }
+ if (stat(p, &sbuf2) != 0) {
+ (void) printf(dgettext(TEXT_DOMAIN,
+ "device to fsck in /etc/vfstab is "
+ "invalid for raw device %s\n"), p);
+ exit(1);
+ }
+ if (sbuf1.st_rdev != sbuf2.st_rdev) {
+ (void) printf(dgettext(TEXT_DOMAIN,
+ "/etc/vfstab entries inconsistent on "
+ "line containing device %s\n"), uname);
+ exit(1);
+ }
+ if ((sbuf1.st_mode & S_IFBLK) == 0) {
+ (void) printf(dgettext(TEXT_DOMAIN,
+ "/etc/vfstab device to mount is not a "
+ "block device for device %s\n"), uname);
+ exit(1);
+ }
+ if ((sbuf2.st_mode & S_IFCHR) == 0) {
+ (void) printf(dgettext(TEXT_DOMAIN,
+ "/etc/vfstab device to fsck is not a "
+ "raw device for device %s\n"), p);
+ exit(1);
+ }
+ return (p);
+ }
+}
+
+char *
+blkname(
+ char *uname
+)
+{
+ char *p;
+
+ if ((p = getfullblkname(uname)) == NULL) {
+ return (NULL);
+ } else if (*p == '\0') {
+ Free(p);
+ return (NULL);
+ } else {
+ return (p);
+ }
+}
+
+/*
+ * parse up metadevice name
+ */
+static int
+parse_metadevice(
+ char *uname,
+ char **snamep,
+ unit_t *unitp
+)
+{
+ char *sname = Malloc(strlen(uname) + 1);
+ char *tname = Malloc(strlen(uname) + 1);
+
+ unit_t unit;
+ int len;
+ char *up;
+ char *tp;
+ int lcws; /* last character was slash */
+
+ /* handle dont cares */
+ if (unitp == NULL)
+ unitp = &unit;
+
+ /* Now copy uname to tname by throwing away any duplicate '/' */
+ for (lcws = 0, tp = tname, up = uname; *up; up++) {
+ if (lcws) {
+ if (*up == '/') {
+ continue;
+ } else {
+ lcws = 0;
+ }
+ }
+ if (*up == '/') {
+ lcws = 1;
+ }
+ *tp++ = *up; /* ++ is done by for loop */
+ }
+ *tp = '\0';
+
+ /* without set */
+ if ((sscanf(tname, "d%lu%n", unitp, &len) == 1) &&
+ (strlen(tname) == len) && ((long)*unitp >= 0)) {
+ if (snamep != NULL)
+ *snamep = NULL;
+ Free(sname);
+ Free(tname);
+ return (0);
+ }
+
+ /* fully-qualified without set */
+ if (((sscanf(tname, "/dev/md/dsk/d%lu%n", unitp, &len) == 1) &&
+ (strlen(tname) == len) && ((long)*unitp >= 0)) ||
+ ((sscanf(tname, "/dev/md/rdsk/d%lu%n", unitp, &len) == 1) &&
+ (strlen(tname) == len) && ((long)*unitp >= 0))) {
+ if (snamep != NULL)
+ *snamep = Strdup(MD_LOCAL_NAME);
+ Free(sname);
+ Free(tname);
+ return (0);
+ }
+
+ /* with set */
+ if (((sscanf(tname, "%[^/]/d%lu%n", sname, unitp, &len) == 2) &&
+ (strlen(tname) == len) && ((long)*unitp >= 0)) ||
+ ((sscanf(tname, "/dev/md/%[^/]/dsk/d%lu%n", sname,
+ unitp, &len) == 2) &&
+ (strlen(tname) == len) && ((long)*unitp >= 0)) ||
+ ((sscanf(tname, "/dev/md/%[^/]/rdsk/d%lu%n", sname,
+ unitp, &len) == 2) &&
+ (strlen(tname) == len) && ((long)*unitp >= 0))) {
+ if (snamep != NULL) {
+ *snamep = sname;
+ } else {
+ Free(sname);
+ }
+ Free(tname);
+ return (0);
+ }
+
+ /* no match */
+ if (snamep != NULL)
+ *snamep = NULL;
+ Free(sname);
+ Free(tname);
+ return (-1);
+}
+
+/*
+ * FUNCTION: parse_device()
+ * INPUT: sp - pointer to setname struct
+ * uname - Name of either a hotspare pool or metadevice
+ * This can either be a fully qualified path or
+ * in the form [set name/]device
+ * OUTPUT: setnamep - name of the set that uname is in
+ * uname - name of the hotspare pools or metadevice
+ * only contains the name of the device with all
+ * other path information stripped off.
+ * PURPOSE: Parse uname and sp into the set name and device name strings.
+ * If the set name is specified as part of uname then use that
+ * otherwise attempt to get the set name from sp.
+ */
+static void
+parse_device(
+ mdsetname_t *sp,
+ char *uname,
+ char **setnamep /* dynamically alloced - caller must free */
+)
+{
+ char setname[FILENAME_MAX+1];
+ char *tname = Malloc(strlen(uname) + 1);
+
+ int len;
+ char *up;
+ char *tp;
+ int lcws; /* last character was slash */
+
+ /* Now copy uname to tname by throwing away any duplicate '/' */
+ for (lcws = 0, tp = tname, up = uname; *up; up++) {
+ if (lcws) {
+ if (*up == '/') {
+ continue;
+ } else {
+ lcws = 0;
+ }
+ }
+ if (*up == '/') {
+ lcws = 1;
+ }
+ *tp++ = *up; /* ++ is done by for loop */
+ }
+ *tp = '\0';
+
+ /* fully-qualified - local set */
+ if (((sscanf(tname, "/dev/md/dsk/%" VAL2STR(FILENAME_MAX) "s%n",
+ uname, &len) == 1) && (strlen(tname) == len)) ||
+ ((sscanf(tname, "/dev/md/rdsk/%" VAL2STR(FILENAME_MAX) "s%n",
+ uname, &len) == 1) && (strlen(tname) == len))) {
+ if (setnamep != NULL)
+ *setnamep = NULL;
+ Free(tname);
+ return;
+ }
+
+ /* with setname specified - either fully qualified and relative spec */
+ if (((sscanf(tname, "%" VAL2STR(FILENAME_MAX) "s/%"
+ VAL2STR(FILENAME_MAX) "s%n", setname, uname, &len) == 2) &&
+ (strlen(tname) == len)) ||
+ ((sscanf(tname, "/dev/md/%[^/]/dsk/%" VAL2STR(FILENAME_MAX) "s%n",
+ setname, uname, &len) == 2) && (strlen(tname) == len)) ||
+ ((sscanf(tname, "/dev/md/%[^/]/rdsk/%" VAL2STR(FILENAME_MAX) "s%n",
+ setname, uname, &len) == 2) && (strlen(tname) == len))) {
+
+ if (setnamep != NULL) {
+ *setnamep = Strdup(setname);
+ }
+ Free(tname);
+ return;
+ }
+
+ /* without setname specified */
+ (void) strcpy(uname, tname);
+ if (setnamep != NULL) {
+ if (sp != NULL && !metaislocalset(sp))
+ *setnamep = Strdup(sp->setname);
+ else
+ *setnamep = NULL;
+ }
+ Free(tname);
+}
+
+/*
+ * parse up hotspare pool name
+ */
+static int
+parse_hsp(
+ char *uname,
+ char **snamep,
+ hsp_t *hspp
+)
+{
+ char *sname = Malloc(strlen(uname) + 1);
+ hsp_t hsp;
+ int len;
+
+ /* handle dont cares */
+ if (hspp == NULL)
+ hspp = &hsp;
+
+ /* without set */
+ if ((sscanf(uname, "hsp%03u%n", hspp, &len) == 1) &&
+ (strlen(uname) == len) && ((long)*hspp >= 0)) {
+ if (snamep != NULL)
+ *snamep = NULL;
+ Free(sname);
+ return (0);
+ }
+
+ /* with set */
+ if ((sscanf(uname, "%[^/]/hsp%03u%n", sname,
+ hspp, &len) == 2) &&
+ (strlen(uname) == len) && ((long)*hspp >= 0)) {
+ if (snamep != NULL) {
+ *snamep = sname;
+ } else {
+ Free(sname);
+ }
+ return (0);
+ }
+
+ /* no match */
+ Free(sname);
+ return (-1);
+}
+
+/*
+ * canonicalize metadevice name
+ */
+static char *
+canon_metadevice(
+ char *sname,
+ unit_t unit
+)
+{
+ char *cname;
+ size_t len;
+
+ if ((sname == NULL) || (strcmp(sname, MD_LOCAL_NAME) == 0)) {
+ len = strlen("d") + 20 + 1;
+ cname = Malloc(len);
+ (void) snprintf(cname, len, "d%lu", unit);
+ } else {
+ len = strlen(sname) + strlen("/d") + 20 + 1;
+ cname = Malloc(len);
+ (void) snprintf(cname, len, "%s/d%lu", sname, unit);
+ }
+
+ return (cname);
+}
+
+/*
+ * canonicalize hotspare pool name
+ */
+static char *
+canon_hsp(
+ char *sname,
+ hsp_t hsp
+)
+{
+ char *cname;
+ size_t len;
+
+ if ((sname == NULL) || (strcmp(sname, MD_LOCAL_NAME) == 0)) {
+ cname = Malloc(strlen("hsp000") + 1);
+ (void) sprintf(cname, "hsp%03u", hsp);
+ } else {
+ len = strlen(sname) + strlen("/hsp000") + 1;
+ cname = Malloc(len);
+ (void) snprintf(cname, len, "%s/hsp%03lu", sname, hsp);
+ }
+
+ return (cname);
+}
+
+/*
+ * canonicalize name, return type
+ *
+ * NOTE: this is really only for use by meta_tab*
+ */
+char *
+meta_canonicalize(
+ mdsetname_t *sp,
+ char *uname
+)
+{
+ char *sname = NULL;
+ char *cname;
+
+ /* return the set name and dev name */
+ parse_device(sp, uname, &sname);
+
+ if (sname == NULL)
+ cname = Strdup(uname);
+ else {
+ size_t cname_len;
+
+ cname_len = strlen(uname) + strlen(sname) + 2;
+ cname = Malloc(cname_len);
+ (void) snprintf(
+ cname, cname_len, "%s/%s", sname, uname);
+ Free(sname);
+ }
+ return (cname);
+}
+
+/*
+ * check that name is a metadevice
+ */
+int
+is_metaname(
+ char *uname
+)
+{
+ if (parse_metadevice(uname, NULL, NULL) == 0)
+ return (1);
+ else
+ return (0);
+}
+
+/*
+ * check that name is a hotspare pool
+ */
+int
+is_hspname(
+ char *uname
+)
+{
+ if (parse_hsp(uname, NULL, NULL) == 0)
+ return (1);
+ else
+ return (0);
+}
+
+/*
+ * mdsetname_t stuff
+ */
+
+/*
+ * initialize setname
+ */
+static void
+metainitsetname(
+ mdsetname_t *sp
+)
+{
+ (void) memset(sp, '\0', sizeof (*sp));
+}
+
+static void
+metafreesetdesc(md_set_desc *sd)
+{
+ md_mnnode_desc *nd;
+
+ if (MD_MNSET_DESC(sd)) {
+ nd = sd->sd_nodelist;
+ while (nd) {
+ sd->sd_nodelist = nd->nd_next;
+ Free(nd);
+ nd = sd->sd_nodelist;
+ }
+ }
+ metafreedrivedesc(&sd->sd_drvs);
+ Free(sd);
+}
+
+/*
+ * free allocated setname
+ */
+static void
+metafreesetname(
+ mdsetname_t *sp
+)
+{
+ if (sp->setname != NULL)
+ Free(sp->setname);
+ if (sp->setdesc != NULL)
+ metafreesetdesc(sp->setdesc);
+ metainitsetname(sp);
+}
+
+/*
+ * flush the setname cache
+ */
+static void
+metaflushsetnames()
+{
+ mdsetnamelist_t *p, *n;
+
+ for (p = setlistp, n = NULL; (p != NULL); p = n) {
+ n = p->next;
+ metafreesetname(p->sp);
+ Free(p->sp);
+ Free(p);
+ }
+ setlistp = NULL;
+}
+
+/*
+ * get set number
+ */
+static int
+getsetno(
+ char *sname,
+ set_t *setnop,
+ md_error_t *ep
+)
+{
+ md_set_record *sr;
+ size_t len;
+
+ /* local set */
+ if ((sname == NULL) || (strcmp(sname, MD_LOCAL_NAME) == 0)) {
+ *setnop = 0;
+ return (0);
+ }
+
+ /* shared set */
+ if ((sr = getsetbyname(sname, ep)) == NULL) {
+ if (mdisrpcerror(ep, RPC_PROGNOTREGISTERED)) {
+ char *p;
+
+ len = strlen(sname) + 30;
+ p = Malloc(len);
+
+ (void) snprintf(p, len, "setname \"%s\"", sname);
+ (void) mderror(ep, MDE_NO_SET, p);
+ Free(p);
+ }
+ return (-1);
+ }
+ *setnop = sr->sr_setno;
+ free_sr(sr);
+ return (0);
+}
+
+/*
+ * find setname from name
+ */
+mdsetname_t *
+metasetname(
+ char *sname,
+ md_error_t *ep
+)
+{
+ mdsetnamelist_t **tail;
+ set_t setno;
+ mdsetname_t *sp;
+
+ /* look for cached value first */
+ assert(sname != NULL);
+ for (tail = &setlistp; (*tail != NULL); tail = &(*tail)->next) {
+ sp = (*tail)->sp;
+ if (strcmp(sp->setname, sname) == 0) {
+ return (sp);
+ }
+ }
+
+ /* setup set */
+ if (getsetno(sname, &setno, ep) != 0)
+ return (NULL);
+
+ /* allocate new list element and setname */
+ *tail = Zalloc(sizeof (**tail));
+ sp = (*tail)->sp = Zalloc(sizeof (*sp));
+
+ sp->setname = Strdup(sname);
+ sp->setno = setno;
+ sp->lockfd = MD_NO_LOCK;
+
+ return (sp);
+}
+
+/*
+ * find setname from setno
+ */
+mdsetname_t *
+metasetnosetname(
+ set_t setno,
+ md_error_t *ep
+)
+{
+ mdsetnamelist_t *slp;
+ mdsetname_t *sp;
+ md_set_record *sr;
+
+ /* look for cached value first */
+ for (slp = setlistp; (slp != NULL); slp = slp->next) {
+ sp = slp->sp;
+ if (sp->setno == setno)
+ return (sp);
+ }
+
+ /* local set */
+ if (setno == MD_LOCAL_SET)
+ return (metasetname(MD_LOCAL_NAME, ep));
+
+ /* shared set */
+ if ((sr = getsetbynum(setno, ep)) == NULL)
+ return (NULL);
+ sp = metasetname(sr->sr_setname, ep);
+ free_sr(sr);
+ return (sp);
+}
+
+mdsetname_t *
+metafakesetname(
+ set_t setno,
+ char *sname
+)
+{
+ mdsetnamelist_t **tail;
+ mdsetname_t *sp;
+
+ /* look for cached value first */
+ for (tail = &setlistp; (*tail != NULL); tail = &(*tail)->next) {
+ sp = (*tail)->sp;
+ if (sp->setno == setno) {
+ if ((sp->setname == NULL) && (sname != NULL))
+ sp->setname = Strdup(sname);
+ return (sp);
+ }
+ }
+
+ /* allocate new list element and setname */
+ *tail = Zalloc(sizeof (**tail));
+ sp = (*tail)->sp = Zalloc(sizeof (*sp));
+
+ if (sname != NULL)
+ sp->setname = Strdup(sname);
+ sp->setno = setno;
+ sp->lockfd = MD_NO_LOCK;
+
+ return (sp);
+}
+
+
+/*
+ * setup set record (sr) and cache it in the mdsetname_t struct
+ */
+md_set_desc *
+sr2setdesc(
+ md_set_record *sr
+)
+{
+ md_set_desc *sd;
+ int i;
+ md_mnset_record *mnsr;
+ md_mnnode_desc *nd, *nd_prev = 0;
+ md_mnnode_record *nr;
+ md_error_t status = mdnullerror;
+ md_error_t *ep = &status;
+ int nodecnt, nrcnt;
+ mndiskset_membershiplist_t *nl, *nl2;
+
+ sd = Zalloc(sizeof (*sd));
+ sd->sd_ctime = sr->sr_ctime;
+ sd->sd_genid = sr->sr_genid;
+ sd->sd_setno = sr->sr_setno;
+ sd->sd_flags = sr->sr_flags;
+
+ if (MD_MNSET_DESC(sd)) {
+ mnsr = (md_mnset_record *)sr;
+ (void) strlcpy(sd->sd_mn_master_nodenm,
+ mnsr->sr_master_nodenm, sizeof (sd->sd_mn_master_nodenm));
+ sd->sd_mn_master_nodeid = mnsr->sr_master_nodeid;
+ if (strcmp(mnsr->sr_master_nodenm, mynode()) == 0) {
+ sd->sd_mn_am_i_master = 1;
+ }
+
+ /*
+ * Get membershiplist from API routine. If there's
+ * an error, just use a NULL nodelist.
+ */
+ if (meta_read_nodelist(&nodecnt, &nl, ep) == -1) {
+ nodecnt = 0; /* no nodes are alive */
+ nl = NULL;
+ }
+ nr = mnsr->sr_nodechain;
+ nrcnt = 0;
+ /*
+ * Node descriptor node list must be built in
+ * ascending order of nodeid. The nodechain
+ * in the mnset record is in ascending order,
+ * so just make them the same.
+ */
+ while (nr) {
+ nd = Zalloc(sizeof (*nd));
+ if (nd_prev) {
+ nd_prev->nd_next = nd;
+ } else {
+ sd->sd_nodelist = nd;
+ }
+ nd->nd_ctime = nr->nr_ctime;
+ nd->nd_genid = nr->nr_genid;
+ nd->nd_flags = nr->nr_flags;
+
+ (void) strlcpy(nd->nd_nodename, nr->nr_nodename,
+ sizeof (nd->nd_nodename));
+ nd->nd_nodeid = nr->nr_nodeid;
+ if (strcmp(nd->nd_nodename, mynode()) == 0) {
+ sd->sd_mn_mynode = nd;
+ }
+ if (nd->nd_nodeid == sd->sd_mn_master_nodeid) {
+ sd->sd_mn_masternode = nd;
+ }
+
+ /*
+ * If node is marked ALIVE, then set priv_ic
+ * from membership list. During the early part
+ * of a reconfig cycle, the membership list may
+ * have been changed, (a node entering or leaving
+ * the cluster), but rpc.metad hasn't flushed
+ * its data yet. So, if node is marked alive, but
+ * is no longer in the membership list (node has
+ * left the cluster) then just leave priv_ic to NULL.
+ */
+ if (nd->nd_flags & MD_MN_NODE_ALIVE) {
+ nl2 = nl;
+ while (nl2) {
+ if (nl2->msl_node_id == nd->nd_nodeid) {
+ (void) strlcpy(nd->nd_priv_ic,
+ nl2->msl_node_addr,
+ sizeof (nd->nd_priv_ic));
+ break;
+ }
+ nl2 = nl2->next;
+ }
+ }
+
+ nr = nr->nr_next;
+ nrcnt++;
+ nd_prev = nd;
+ }
+ sd->sd_mn_numnodes = nrcnt;
+ if (nodecnt)
+ meta_free_nodelist(nl);
+
+ /* Just copying to keep consistent view between sr & sd */
+ (void) strlcpy(sd->sd_nodes[0], mnsr->sr_nodes_bw_compat[0],
+ sizeof (sd->sd_nodes[0]));
+ } else {
+ for (i = 0; i < MD_MAXSIDES; i++)
+ (void) strlcpy(sd->sd_nodes[i], sr->sr_nodes[i],
+ sizeof (sd->sd_nodes[i]));
+ }
+
+ sd->sd_med = sr->sr_med; /* structure assignment */
+
+ return (sd);
+}
+
+md_set_desc *
+metaget_setdesc(
+ mdsetname_t *sp,
+ md_error_t *ep
+)
+{
+ md_set_record *sr;
+
+ if (sp->setdesc != NULL)
+ return (sp->setdesc);
+
+ if (sp->setname != NULL) {
+ if ((sr = getsetbyname(sp->setname, ep)) != NULL) {
+ sp->setdesc = sr2setdesc(sr);
+ free_sr(sr);
+ return (sp->setdesc);
+ }
+ }
+
+ if (sp->setno > 0) {
+ if ((sr = getsetbynum(sp->setno, ep)) != NULL) {
+ sp->setdesc = sr2setdesc(sr);
+ free_sr(sr);
+ return (sp->setdesc);
+ }
+ }
+
+ return (NULL);
+}
+
+void
+metaflushsetname(mdsetname_t *sp)
+{
+ if (sp == NULL)
+ return;
+
+ if (sp->setdesc == NULL)
+ return;
+
+ metafreesetdesc(sp->setdesc);
+ sp->setdesc = NULL;
+}
+
+/*
+ * check for local set
+ */
+int
+metaislocalset(
+ mdsetname_t *sp
+)
+{
+ assert(sp->setname != NULL);
+ if (strcmp(sp->setname, MD_LOCAL_NAME) == 0) {
+ assert(sp->setno == MD_LOCAL_SET);
+ return (1);
+ } else {
+ assert(sp->setno != MD_LOCAL_SET);
+ return (0);
+ }
+}
+
+/*
+ * check for same set
+ */
+int
+metaissameset(
+ mdsetname_t *sp1,
+ mdsetname_t *sp2
+)
+{
+ if (strcmp(sp1->setname, sp2->setname) == 0) {
+ assert(sp1->setno == sp2->setno);
+ return (1);
+ } else {
+ assert(sp1->setno != sp2->setno);
+ return (0);
+ }
+}
+
+/*
+ * check to see if set changed
+ */
+static int
+chkset(
+ mdsetname_t **spp,
+ char *sname,
+ md_error_t *ep
+)
+{
+ /* if we already have a set, make sure it's the same */
+ if (*spp != NULL) {
+ if ((*spp)->setname != sname &&
+ strcmp((*spp)->setname, sname) != 0) {
+ return (mderror(ep, MDE_SET_DIFF, sname));
+ }
+ return (0);
+ }
+
+ /* otherwise store new set name and number */
+ if ((*spp = metasetname(sname, ep)) == NULL) {
+ return (-1);
+ }
+
+ /* return success */
+ return (0);
+}
+
+/*
+ * check to see if set changed from default
+ */
+static int
+chksetname(
+ mdsetname_t **spp,
+ char *sname,
+ md_error_t *ep
+)
+{
+ /* default to *spp's setname, or if that is NULL to MD_LOCAL_NAME */
+ if (sname == NULL) {
+ if (*spp) {
+ sname = (*spp)->setname;
+ } else {
+ sname = MD_LOCAL_NAME;
+ }
+ }
+
+ /* see if changed */
+ return (chkset(spp, sname, ep));
+}
+
+/*
+ * check setname from setno
+ */
+static int
+chksetno(
+ mdsetname_t **spp,
+ set_t setno,
+ md_error_t *ep
+)
+{
+ md_set_record *sr;
+ int rval;
+
+ /* local set */
+ if (setno == 0)
+ return (chkset(spp, MD_LOCAL_NAME, ep));
+
+ /* shared set */
+ if ((sr = getsetbynum(setno, ep)) == NULL)
+ return (-1);
+ rval = chkset(spp, sr->sr_setname, ep);
+ free_sr(sr);
+ return (rval);
+}
+
+/*
+ * mddrivename_t stuff
+ */
+
+/*
+ * initialize name
+ */
+static void
+metainitname(
+ mdname_t *np
+)
+{
+ (void) memset(np, 0, sizeof (*np));
+ np->dev = NODEV64;
+ np->key = MD_KEYBAD;
+ np->end_blk = -1;
+ np->start_blk = -1;
+}
+
+/*
+ * free allocated name
+ */
+static void
+metafreename(
+ mdname_t *np
+)
+{
+ if (np->cname != NULL)
+ Free(np->cname);
+ if (np->bname != NULL)
+ Free(np->bname);
+ if (np->rname != NULL)
+ Free(np->rname);
+ if (np->devicesname != NULL)
+ Free(np->devicesname);
+ metainitname(np);
+}
+
+/*
+ * initialize drive name
+ */
+static void
+metainitdrivename(
+ mddrivename_t *dnp
+)
+{
+ (void) memset(dnp, 0, sizeof (*dnp));
+ dnp->side_names_key = MD_KEYBAD;
+}
+
+/*
+ * flush side names
+ */
+void
+metaflushsidenames(
+ mddrivename_t *dnp
+)
+{
+ mdsidenames_t *p, *n;
+
+ for (p = dnp->side_names, n = NULL; (p != NULL); p = n) {
+ n = p->next;
+ if (p->dname != NULL)
+ Free(p->dname);
+ if (p->cname != NULL)
+ Free(p->cname);
+ Free(p);
+ }
+ dnp->side_names = NULL;
+}
+
+/*
+ * free drive name
+ */
+void
+metafreedrivename(
+ mddrivename_t *dnp
+)
+{
+ uint_t slice;
+
+ if (dnp->cname != NULL)
+ Free(dnp->cname);
+ if (dnp->rname != NULL)
+ Free(dnp->rname);
+ metafreevtoc(&dnp->vtoc);
+ for (slice = 0; (slice < dnp->parts.parts_len); ++slice)
+ metafreename(&dnp->parts.parts_val[slice]);
+ if (dnp->parts.parts_val != NULL)
+ Free(dnp->parts.parts_val);
+ metaflushsidenames(dnp);
+ if (dnp->miscname != NULL)
+ Free(dnp->miscname);
+ meta_free_unit(dnp);
+ metainitdrivename(dnp);
+}
+
+/*
+ * flush the drive name cache
+ */
+static void
+metaflushdrivenames()
+{
+ mddrivenamelist_t *p, *n;
+
+ for (p = drivelistp, n = NULL; (p != NULL); p = n) {
+ n = p->next;
+ metafreedrivename(p->drivenamep);
+ Free(p->drivenamep);
+ Free(p);
+ }
+ drivelistp = NULL;
+}
+
+/*
+ * peel off s%u from name
+ */
+char *
+metadiskname(
+ char *name
+)
+{
+ char *p, *e;
+ char onmb[BUFSIZ+1], cnmb[BUFSIZ];
+ uint_t d = 0;
+ int l = 0;
+ int cl = strlen(name);
+
+ if (is_metaname(name))
+ return (Strdup(name));
+
+ /*
+ * Handle old style names, which are of the form /dev/rXXNN[a-h].
+ */
+ if (sscanf(name, "/dev/r%" VAL2STR(BUFSIZ) "[^0-9/]%u%*[a-h]%n",
+ onmb, &d, &l) == 2 && l == cl) {
+ (void) snprintf(cnmb, sizeof (cnmb), "/dev/r%s%u", onmb, d);
+ return (Strdup(cnmb));
+ }
+
+ /*
+ * Handle old style names, which are of the form /dev/XXNN[a-h].
+ */
+ if (sscanf(name, "/dev/%" VAL2STR(BUFSIZ) "[^0-9/]%u%*[a-h]%n",
+ onmb, &d, &l) == 2 && l == cl) {
+ (void) snprintf(cnmb, sizeof (cnmb), "/dev/%s%u", onmb, d);
+ return (Strdup(cnmb));
+ }
+
+ /* gobble number and 's' */
+ p = e = name + strlen(name) - 1;
+ for (; (p > name); --p) {
+ if (!isdigit(*p))
+ break;
+ }
+ if ((p == e) || (p <= name))
+ return (Strdup(name));
+
+ if (*p != 's' && strchr("dt", *p) == NULL)
+ return (Strdup(name));
+ else if (strchr("dt", *p) != NULL)
+ return (Strdup(name));
+ p--;
+
+ if ((p <= name) || (!isdigit(*p)))
+ return (Strdup(name));
+
+ *(++p) = '\0';
+ e = Strdup(name);
+ *p = 's';
+
+ return (e);
+}
+
+/*
+ * free list of drivenames
+ */
+void
+metafreedrivenamelist(
+ mddrivenamelist_t *dnlp
+)
+{
+ mddrivenamelist_t *next = NULL;
+
+ for (/* void */; (dnlp != NULL); dnlp = next) {
+ next = dnlp->next;
+ Free(dnlp);
+ }
+}
+
+/*
+ * build list of drivenames
+ */
+int
+metadrivenamelist(
+ mdsetname_t **spp,
+ mddrivenamelist_t **dnlpp,
+ int argc,
+ char *argv[],
+ md_error_t *ep
+)
+{
+ mddrivenamelist_t **tailpp = dnlpp;
+ int count = 0;
+
+ for (*dnlpp = NULL; (argc > 0); ++count, --argc, ++argv) {
+ mddrivenamelist_t *dnlp = Zalloc(sizeof (*dnlp));
+
+ if ((dnlp->drivenamep = metadrivename(spp, argv[0],
+ ep)) == NULL) {
+ metafreedrivenamelist(*dnlpp);
+ *dnlpp = NULL;
+ return (-1);
+ }
+ *tailpp = dnlp;
+ tailpp = &dnlp->next;
+ }
+ return (count);
+}
+
+/*
+ * append to end of drivename list
+ */
+mddrivename_t *
+metadrivenamelist_append(
+ mddrivenamelist_t **dnlpp,
+ mddrivename_t *dnp
+)
+{
+ mddrivenamelist_t *dnlp;
+
+ /* run to end of list */
+ for (; (*dnlpp != NULL); dnlpp = &(*dnlpp)->next)
+ ;
+
+ /* allocate new list element */
+ dnlp = *dnlpp = Zalloc(sizeof (*dnlp));
+
+ /* append drivename */
+ dnlp->drivenamep = dnp;
+ return (dnp);
+}
+
+/*
+ * FUNCTION: meta_drivenamelist_append_wrapper()
+ * INPUT: tailpp - pointer to the list tail pointer
+ * dnp - name node to be appended to list
+ * OUTPUT: none
+ * RETURNS: mddrivenamelist_t * - new tail of the list.
+ * PURPOSE: wrapper to meta_namelist_append for performance.
+ * metanamelist_append finds the tail each time which slows
+ * down long lists. By keeping track of the tail ourselves
+ * we can change metadrivenamelist_append into a
+ * constant time operation.
+ */
+mddrivenamelist_t **
+meta_drivenamelist_append_wrapper(
+ mddrivenamelist_t **tailpp,
+ mddrivename_t *dnp
+)
+{
+ (void) metadrivenamelist_append(tailpp, dnp);
+
+ /* If it's the first item in the list, return it instead of the next */
+ if ((*tailpp)->next == NULL)
+ return (tailpp);
+
+ return (&(*tailpp)->next);
+}
+
+
+/*
+ * mdname_t stuff
+ */
+
+/*
+ * check set and get comparison name
+ */
+char *
+meta_name_getname(
+ mdsetname_t **spp,
+ char *uname,
+ md_error_t *ep
+)
+{
+ char *sname = NULL;
+ int ismeta = 0;
+ unit_t unit;
+
+ /* check set name */
+ if (parse_metadevice(uname, &sname, &unit) == 0)
+ ismeta = 1;
+ if (chksetname(spp, sname, ep) != 0) {
+ if (sname != NULL)
+ Free(sname);
+ return (NULL);
+ }
+ if (sname != NULL)
+ Free(sname);
+
+ /* return comparison name */
+ if (ismeta)
+ return (canon_metadevice((*spp)->setname, unit));
+ else
+ return (Strdup(uname));
+}
+
+/*
+ * FUNCTION: getrname()
+ * INPUT: spp - the setname struct
+ * uname - the possibly unqualified device name
+ * OUTPUT: ep - return error pointer
+ * RETURNS: char* - character string containing the fully
+ * qualified raw device name
+ * PURPOSE: Create the fully qualified raw name for the possibly
+ * unqualified device name. If uname is an absolute
+ * path the raw name is derived from the input string.
+ * Otherwise, an attempt is made to get the rawname by
+ * catting "/dev/md/rdsk" and "/dev/rdsk".
+ */
+static char *
+getrname(mdsetname_t **spp, char *uname, md_error_t *ep)
+{
+ char *rname,
+ *fname;
+ int constructed = 0;
+
+ assert(uname != NULL);
+ /* if it is an absolute name then just call rawname on the input */
+ if (uname[0] == '/') {
+ if ((rname = rawname(uname)) != NULL)
+ return (rname);
+
+ /* out of luck */
+ (void) mdsyserror(ep, ENOENT, uname);
+ return (NULL);
+ }
+
+ /*
+ * Check for metadevice before physical device.
+ * With the introduction of softpartitions it is more
+ * likely to be a metadevice.
+ */
+
+ /* metadevice short form */
+ if (metaislocalset(*spp)) {
+ fname = Malloc(strlen(uname) + strlen("/dev/md/rdsk/") + 1);
+ (void) strcpy(fname, "/dev/md/rdsk/");
+ (void) strcat(fname, uname);
+ if (*uname == 'd')
+ constructed = 1;
+ } else {
+ char *p;
+ size_t len;
+
+ if ((p = strchr(uname, '/')) != NULL) {
+ ++p;
+ } else {
+ p = uname;
+ }
+ len = strlen((*spp)->setname) + strlen(p) +
+ strlen("/dev/md//rdsk/") + 1;
+ fname = Malloc(len);
+ (void) snprintf(fname, len, "/dev/md/%s/rdsk/%s",
+ (*spp)->setname, p);
+ if (*p == 'd')
+ constructed = 1;
+ }
+ rname = rawname(fname);
+
+ /*
+ * Handle the case where we have a new metadevice that does not yet
+ * exist in the name-space. In this case we return the constructed
+ * metadevice name as that will exist after the metainit call has
+ * created it.
+ */
+ if ((rname == NULL) && constructed) {
+ rname = Strdup(fname);
+ }
+ Free(fname);
+ if (rname != NULL)
+ return (rname);
+
+ fname = Malloc(strlen(uname) + strlen("/dev/rdsk/") + 1);
+ (void) strcpy(fname, "/dev/rdsk/");
+ (void) strcat(fname, uname);
+ rname = rawname(fname);
+ Free(fname);
+ if (rname != NULL)
+ return (rname);
+
+ /*
+ * If all else fails try the straight uname.
+ * NOTE: This check was at the beginning of getrname instead
+ * of here. It was moved to avoid a conflict with SC3.0. If
+ * a diskset was mounted with the same name it would hang
+ * the cluster in a loop. Example:
+ *
+ * fubar/d10 -m fubar/d0 fubar/d1
+ * mount /dev/md/fubar/dsk/d10 /fubar
+ *
+ * When the system was booted DiskSuite would try to take ownership
+ * of diskset fubar. This would cause rawname("fubar/d10") to be
+ * called. rawname() stats the string which caused the cluster
+ * reservation code to try and take ownership which it was already
+ * doing and a deadlock would occur. By moving this final attempt
+ * at resolving the rawname to the end we avoid this deadlock.
+ */
+ if (rname = rawname(uname))
+ return (rname);
+
+ /* out of luck */
+ (void) mdsyserror(ep, ENOENT, uname);
+ return (NULL);
+}
+
+/*
+ * get raw slice and drive names
+ */
+static char *
+getrawnames(
+ mdsetname_t **spp,
+ char *uname,
+ char **dnamep,
+ md_error_t *ep
+)
+{
+ char *rname;
+ size_t len;
+
+ /* initialize */
+ *dnamep = NULL;
+
+ /* get slice name */
+ if ((rname = getrname(spp, uname, ep)) != NULL) {
+ *dnamep = metadiskname(rname);
+ return (rname);
+ }
+
+ /*
+ * If name cannot be found, if may be because is is not accessible.
+ * If it is an absolute name, try all possible disk name formats and
+ * if it is device name, assume it is /dev/rdsk/...
+ */
+ if (mdissyserror(ep, ENOENT)) {
+ if (uname[0] == '/') {
+ /* Absolute name */
+ char *p;
+ uint_t d = 0;
+ int l = 0;
+ char onmb[BUFSIZ+1], snm[BUFSIZ+1];
+
+ /*
+ * Handle old style raw names
+ */
+ if (sscanf(uname,
+ "/dev/r%" VAL2STR(BUFSIZ) "[^0-9/]%u"
+ "%" VAL2STR(BUFSIZ) "[a-h]%n",
+ onmb, &d, snm, &l) == 3 && l == strlen(uname)) {
+ mdclrerror(ep);
+ rname = Strdup(uname);
+ *dnamep = metadiskname(rname);
+ return (rname);
+ }
+
+ /*
+ * Handle old style block names
+ */
+ if (sscanf(uname,
+ "/dev/%" VAL2STR(BUFSIZ) "[^0-9/]%u"
+ "%" VAL2STR(BUFSIZ) "[a-h]%n",
+ onmb, &d, snm, &l) == 3 && l == strlen(uname)) {
+ len = strlen(uname) + 1 + 1;
+ rname = Malloc(len);
+ (void) snprintf(rname, len, "/dev/r%s%u%s",
+ onmb, d, snm);
+ *dnamep = metadiskname(rname);
+ return (rname);
+ }
+
+ /* /.../dsk/... */
+ if ((p = strstr(uname, "/dsk/")) != NULL) {
+ mdclrerror(ep);
+ ++p;
+ rname = Malloc(strlen(uname) + 1 + 1);
+ (void) strncpy(rname, uname, (p - uname));
+ rname[(p - uname)] = 'r';
+ (void) strcpy(&rname[(p - uname) + 1], p);
+ *dnamep = metadiskname(rname);
+ return (rname);
+ }
+
+ /* /.../rdsk/... */
+ else if (strstr(uname, "/rdsk/") != NULL) {
+ mdclrerror(ep);
+ rname = Strdup(uname);
+ *dnamep = metadiskname(rname);
+ return (rname);
+ }
+ } else {
+ /*
+ * If it's not an absolute name but is a valid ctd name,
+ * guess at /dev/rdsk/...
+ */
+ uint_t s;
+ if (parse_ctd(uname, &s) == 0) {
+ len = strlen(uname) + strlen("/dev/rdsk/") + 1;
+ rname = Malloc(len);
+ (void) snprintf(rname, len, "/dev/rdsk/%s",
+ uname);
+ *dnamep = metadiskname(rname);
+ return (rname);
+ }
+ }
+ }
+
+ /* out of luck */
+ return (NULL);
+}
+
+/*
+ * get number of slices for name
+ */
+static int
+getnslice(
+ char *rname,
+ char *dname,
+ uint_t *slicep
+)
+{
+ char *srname;
+ uint_t nslice;
+ size_t dl = strlen(dname);
+ size_t rl = strlen(rname);
+ size_t l = 0;
+ size_t len;
+
+ /*
+ * get our slice number - works only with names that end in s%u -
+ * all others return -1.
+ */
+ if (dl >= rl ||
+ sscanf(&rname[dl], "s%u%n", slicep, &l) != 1 || l != rl ||
+ (int)*slicep < 0) {
+ return (-1);
+ }
+
+ /*
+ * go find how many slices there really are
+ */
+ len = strlen(dname) + 20 + 1;
+ srname = Malloc(len);
+ for (nslice = 0; /* void */; ++nslice) {
+ struct stat statbuf;
+
+ /* build slice name */
+ (void) snprintf(srname, len, "%ss%u", dname, nslice);
+
+ /* see if it's there */
+ if ((meta_stat(srname, &statbuf) != 0) ||
+ (! S_ISCHR(statbuf.st_mode))) {
+ break;
+ }
+ }
+ Free(srname);
+
+ /* Need to make sure that we at least have V_NUMPAR */
+ nslice = max(nslice, V_NUMPAR);
+
+ /* make sure we have at least our slice */
+ if (nslice < *slicep)
+ return (-1);
+
+ /* return number of slices */
+ return (nslice);
+}
+
+/*
+ * Attempt to parse the input string as a c[t]ds specifier
+ * The target can either be a SCSI target id or if the device
+ * is in a fabric configuration in a fibre channel setup then
+ * the target is a standard WWN (world wide name).
+ *
+ * if successful return 0
+ * if c[t]dp name return 1
+ * otherwise return -1
+ */
+int
+parse_ctd(
+ char *uname,
+ uint_t *slice)
+{
+ uint_t channel;
+ uint_t target;
+ uint_t device;
+ int has_target = 1;
+ uint_t cl;
+ uint_t target_str_len;
+ char *partial_ctd_str;
+ char *target_str;
+ char *device_start_pos;
+ int l = -1;
+
+ /* pull off the channel spec and the 't' for the target */
+ if (sscanf(uname, "c%ut%n", &channel, &l) != 1 || l == -1) {
+ /* check for cds style name */
+ if (sscanf(uname, "c%ud%n", &channel, &l) != 1 || l == -1) {
+ return (-1);
+ } else {
+ l--; /* we want to be on the 'd' */
+ has_target = 0;
+ }
+ }
+ partial_ctd_str = uname + l;
+
+ /* find the beginning of the device specifier */
+ device_start_pos = strrchr(partial_ctd_str, 'd');
+ if (device_start_pos == NULL) {
+ return (-1);
+ }
+
+ /* check to see if it is a ctd with a WWN or SCSI target */
+ if (has_target) {
+ /* pull off the target and see if it is a WWN */
+ target_str_len = device_start_pos - partial_ctd_str + 2;
+ target_str = (char *)Malloc(target_str_len+1);
+ (void) strcpy(target_str, "0X");
+ (void) strncpy(target_str+2, partial_ctd_str,
+ target_str_len - 2);
+ target_str[target_str_len] = '\0';
+ if (sscanf(target_str, "%x%n", &target, &l) != 1 ||
+ l != target_str_len) {
+ Free(target_str);
+ return (-1);
+ }
+ Free(target_str);
+ }
+
+ /* check the device and slice */
+ cl = strlen(device_start_pos);
+ if (sscanf(device_start_pos, "d%us%u%n", &device, slice, &l) != 2 ||
+ l != cl) {
+ /* check the device and partition */
+ if (sscanf(device_start_pos, "d%up%u%n", &device, slice, &l)
+ == 2 && l == cl) {
+ return (1);
+ }
+ return (-1);
+ }
+
+ return (0);
+}
+
+
+/*
+ * get number of slices for name
+ */
+static int
+uname2sliceno(
+ char *uname,
+ uint_t *slicep,
+ md_error_t *ep
+)
+{
+ uint_t c = 0, t = 0, d = 0;
+ int l = 0, cl = 0;
+ int fd;
+ struct dk_cinfo cinfo;
+ char *p;
+ char *rname = NULL;
+
+ if (is_metaname(uname))
+ return (*slicep = 0);
+
+ if ((p = strrchr(uname, '/')) != NULL)
+ p++;
+ else
+ p = uname;
+
+ cl = strlen(p);
+
+ if (parse_ctd(p, slicep) == 0)
+ return (*slicep);
+ else if (sscanf(p, "mc%ut%ud%us%u%n", &c, &t, &d, slicep, &l) == 4 &&
+ l == cl)
+ return (*slicep);
+ else if (sscanf(p, "d%us%u%n", &d, slicep, &l) == 2 && l == cl)
+ return (*slicep);
+
+ /*
+ * If we can't get the slice from the name, then we have to do it the
+ * hard and expensive way.
+ */
+ if ((rname = rawname(uname)) == NULL)
+ return (-1);
+
+ /* get controller info */
+ if ((fd = open(rname, (O_RDONLY|O_NDELAY), 0)) < 0) {
+ Free(rname);
+ return (-1);
+ }
+
+ if (ioctl(fd, DKIOCINFO, &cinfo) != 0) {
+ int save = errno;
+
+ if (save == ENOTTY)
+ (void) mddeverror(ep, MDE_NOT_DISK, NODEV64, rname);
+ else
+ (void) mdsyserror(ep, save, rname);
+
+ Free(rname);
+ (void) close(fd);
+ return (-1);
+ }
+ (void) close(fd); /* sd/ssd bug */
+
+ if (cinfo.dki_partition < V_NUMPAR) {
+ Free(rname);
+ return (*slicep = cinfo.dki_partition);
+ }
+
+ return (mddeverror(ep, MDE_NOT_DISK, NODEV64, rname));
+}
+
+/*
+ * get partition info
+ */
+static int
+getparts(
+ mddrivename_t *dnp,
+ char *rname,
+ char *dname,
+ uint_t *npartsp,
+ uint_t *partnop,
+ md_error_t *ep
+)
+{
+ int nparts;
+ uint_t partno;
+ mdname_t name;
+ mdvtoc_t *vtocp;
+
+ /* metadevice */
+ if (is_metaname(rname)) {
+ dnp->type = MDT_META;
+ nparts = 1;
+ partno = 0;
+ goto gotit;
+ }
+
+ /* see how many partitions in drive, this is really tricky */
+ metainitname(&name);
+ name.rname = rname;
+ name.drivenamep = dnp;
+ if ((vtocp = metagetvtoc(&name, TRUE, &partno, ep)) != NULL) {
+ dnp->type = MDT_COMP;
+ nparts = vtocp->nparts;
+ /* partno already setup */
+ /* dname already setup */
+ goto gotit;
+ }
+
+ if ((ep->info.errclass == MDEC_DEV) &&
+ (ep->info.md_error_info_t_u.dev_error.errnum == MDE_TOO_MANY_PARTS))
+ return (-1);
+
+ /* fallback and try and guess (used to check for just EACCES here) */
+ if ((dname != NULL) &&
+ ((nparts = getnslice(rname, dname, &partno)) > 0)) {
+ dnp->type = MDT_ACCES;
+ if (mdanysyserror(ep)) {
+ dnp->errnum =
+ ep->info.md_error_info_t_u.sys_error.errnum;
+ } else {
+ dnp->errnum = ENOENT;
+ }
+ mdclrerror(ep);
+ /* nparts already setup */
+ /* partno already setup */
+ /* dname already setup */
+ nparts = roundup(nparts, V_NUMPAR);
+ goto gotit;
+ }
+
+ /* nothing worked */
+ dnp->type = MDT_UNKNOWN;
+ if (mdissyserror(ep, EACCES))
+ dnp->type = MDT_ACCES;
+
+ if (mdanysyserror(ep)) {
+ dnp->errnum = ep->info.md_error_info_t_u.sys_error.errnum;
+ } else {
+ dnp->errnum = ENOENT;
+ }
+
+ mdclrerror(ep);
+ nparts = V_NUMPAR;
+ if (uname2sliceno(rname, &partno, ep) < 0) {
+ mdclrerror(ep);
+ partno = 0;
+ }
+
+ /* return success */
+gotit:
+ assert(nparts > 0);
+
+ if (partno >= nparts)
+ return (mdsyserror(ep, ENOENT, rname));
+
+ *npartsp = nparts;
+ *partnop = partno;
+ return (0);
+}
+
+/*
+ * get block name
+ */
+static int
+getbname(
+ mdname_t *np,
+ md_error_t *ep
+)
+{
+ char *rname = np->rname;
+ char *bname;
+
+ /* fully qualified */
+ assert(rname != NULL);
+ if ((bname = blkname(rname)) != NULL) {
+ if (np->bname)
+ Free(np->bname);
+ np->bname = bname;
+ return (0);
+ }
+
+ /* out of luck */
+ return (mdsyserror(ep, ENOENT, rname));
+}
+
+static void
+getcname(
+ mdsetname_t *sp,
+ mdname_t *np
+)
+{
+ char *sname = sp->setname;
+ char *bname = np->bname;
+ char *p;
+ size_t len;
+
+ assert(sname != NULL);
+ assert(bname != NULL);
+ assert(np->drivenamep->type != MDT_FAST_COMP &&
+ np->drivenamep->type != MDT_FAST_META);
+
+ /* regular device */
+ if ((strncmp(bname, "/dev/dsk/", strlen("/dev/dsk/")) == 0) &&
+ (strchr((p = bname + strlen("/dev/dsk/")), '/') == NULL)) {
+ if (np->cname)
+ Free(np->cname);
+ np->cname = Strdup(p);
+ return;
+ }
+
+ if ((strncmp(bname, "/dev/ap/dsk/", strlen("/dev/ap/dsk/")) == 0) &&
+ (strchr((p = bname + strlen("/dev/ap/dsk/")), '/') == NULL)) {
+ if (np->cname)
+ Free(np->cname);
+ np->cname = Strdup(p);
+ return;
+ }
+
+ if ((strncmp(bname, "/dev/did/dsk/", strlen("/dev/did/dsk/")) == 0) &&
+ (strchr((p = bname + strlen("/dev/did/dsk/")), '/') == NULL)) {
+ if (np->cname)
+ Free(np->cname);
+ np->cname = Strdup(p);
+ return;
+ }
+
+ /* anything else but metadevice */
+ if (np->drivenamep->type != MDT_META) {
+ if (np->cname)
+ Free(np->cname);
+ np->cname = Strdup(bname);
+ return;
+ }
+
+ /* metadevice */
+ p = strrchr(bname, '/');
+ assert(p != NULL);
+ ++p;
+ if (metaislocalset(sp)) {
+ if (np->cname)
+ Free(np->cname);
+ np->cname = Strdup(p);
+ } else {
+ assert(sname[0] != '\0');
+ if (np->cname)
+ Free(np->cname);
+ len = strlen(sname) + 1 + strlen(p) + 1;
+ np->cname = Malloc(len);
+ (void) snprintf(np->cname, len, "%s/%s", sname, p);
+ }
+}
+
+/*
+ * get dev
+ */
+int
+meta_getdev(
+ mdsetname_t *sp,
+ mdname_t *np,
+ md_error_t *ep
+)
+{
+ struct stat statbuf;
+
+ /* get dev */
+ if (meta_stat(np->rname, &statbuf) != 0)
+ return (mdsyserror(ep, errno, np->rname));
+ else if (! S_ISCHR(statbuf.st_mode))
+ return (mddeverror(ep, MDE_NOT_DISK, NODEV64, np->rname));
+ np->dev = meta_expldev(statbuf.st_rdev);
+
+ assert(np->drivenamep->type != MDT_FAST_META &&
+ np->drivenamep->type != MDT_FAST_COMP);
+
+ /* check set */
+ assert((np->drivenamep->type == MDT_META) ?
+ (sp->setno == MD_MIN2SET(meta_getminor(np->dev))) : 1);
+
+ /* return sucess */
+ return (0);
+}
+
+/*
+ * set up names for a slice
+ */
+static int
+getnames(
+ mdsetname_t *sp,
+ mdname_t *np,
+ char *rname,
+ md_error_t *ep
+)
+{
+ /* get names */
+ if (np->rname)
+ Free(np->rname);
+ np->rname = Strdup(rname);
+ if (getbname(np, ep) != 0)
+ return (-1);
+ getcname(sp, np);
+ if (meta_getdev(sp, np, ep) != 0)
+ return (-1);
+
+ /* return success */
+ return (0);
+}
+
+/*
+ * fake up names for a slice
+ */
+static void
+getfakenames(
+ mdsetname_t *sp,
+ mdname_t *np,
+ char *rname
+)
+{
+ char *p;
+ char onmb[BUFSIZ+1], snm[BUFSIZ+1];
+ uint_t d = 0;
+ int l = 0;
+
+ /* fake names */
+ if (np->rname != NULL)
+ Free(np->rname);
+ np->rname = Strdup(rname);
+
+ if (np->bname != NULL)
+ Free(np->bname);
+ np->bname = Strdup(rname);
+
+ /*
+ * Fixup old style names
+ */
+ if (sscanf(rname, "/dev/r%" VAL2STR(BUFSIZ) "[^0-9/]%u"
+ "%" VAL2STR(BUFSIZ) "[a-h]%n",
+ onmb, &d, snm, &l) == 3 && l == strlen(rname))
+ (void) snprintf(np->bname, l, "/dev/%s%u%s", onmb, d, snm);
+
+ /*
+ * Fixup new style names
+ */
+ if ((p = strstr(np->bname, "/rdsk/")) != NULL) {
+ for (++p; (*(p + 1) != '\0'); ++p)
+ *p = *(p + 1);
+ *p = '\0';
+ }
+
+ if (np->cname != NULL)
+ Free(np->cname);
+ getcname(sp, np);
+}
+
+static mdname_t *
+setup_slice(
+ mdsetname_t *sp,
+ mddrivename_t *dnp,
+ char *uname,
+ char *rname,
+ char *dname,
+ uint_t partno,
+ md_error_t *ep
+)
+{
+ char *srname = NULL;
+ mdname_t *np;
+
+ /* must have a set */
+ assert(sp != NULL);
+ assert(partno < dnp->parts.parts_len);
+ assert(dname != NULL);
+
+ np = &dnp->parts.parts_val[partno];
+
+ if (rname)
+ srname = rname;
+ else if (is_metaname(dname))
+ srname = dname;
+ else {
+ char onmb[BUFSIZ+1];
+ uint_t d = 0;
+ int l = 0, cl = strlen(dname);
+ size_t len;
+
+ len = cl + 20 + 1;
+ srname = Malloc(len);
+
+ /*
+ * Handle /dev/rXXNN.
+ */
+ if (sscanf(dname, "/dev/r%" VAL2STR(BUFSIZ) "[^0-9/]%u%n",
+ onmb, &d, &l) == 2 && l == cl) {
+ (void) snprintf(srname, len, "/dev/r%s%u%c", onmb, d,
+ 'a' + partno);
+ } else if (sscanf(dname, "/dev/%" VAL2STR(BUFSIZ) "[^0-9/]%u%n",
+ onmb, &d, &l) == 2 && l == cl) {
+ (void) snprintf(srname, len, "/dev/%s%u%c", onmb, d,
+ 'a' + partno);
+ } else {
+ /* build the slice that is wanted */
+ (void) snprintf(srname, len, "%ss%u", dname, partno);
+ }
+ }
+
+ if (getnames(sp, np, srname, ep) != 0) {
+ if (dnp->type == MDT_UNKNOWN) {
+ mdclrerror(ep);
+ getfakenames(sp, np, srname);
+ } else if (dnp->type == MDT_COMP && mdissyserror(ep, ENOENT)) {
+ dnp->type = MDT_UNKNOWN;
+ if (mdanysyserror(ep)) {
+ dnp->errnum =
+ ep->info.md_error_info_t_u.sys_error.errnum;
+ } else {
+ dnp->errnum = ENOENT;
+ }
+ mdclrerror(ep);
+ getfakenames(sp, np, srname);
+ } else {
+ mdclrerror(ep);
+ if (getnames(sp, np, dname, ep) != 0) {
+ np = NULL;
+ goto fixup;
+ }
+ }
+ }
+
+out:
+ if ((srname != rname) && (srname != dname))
+ Free(srname);
+
+ /* return name */
+ return (np);
+
+fixup:
+ if (mdanysyserror(ep)) {
+ char *p;
+ int errnum = ep->info.md_error_info_t_u.sys_error.errnum;
+
+ mdclrerror(ep);
+ if (uname && *uname) {
+ if ((p = strrchr(uname, '/')) != NULL)
+ (void) mdsyserror(ep, errnum, ++p);
+ else
+ (void) mdsyserror(ep, errnum, uname);
+ } else {
+ if ((p = strrchr(srname, '/')) != NULL)
+ (void) mdsyserror(ep, errnum, ++p);
+ else
+ (void) mdsyserror(ep, errnum, srname);
+ }
+ }
+ goto out;
+}
+
+/*
+ * flush the fast name cache
+ */
+static void
+metafreefastnm(mdname_t **np)
+{
+ mddrivename_t *dnp;
+
+ assert(np != NULL && *np != NULL);
+
+ if ((dnp = (*np)->drivenamep) != NULL) {
+ if (dnp->cname != NULL)
+ Free(dnp->cname);
+ if (dnp->rname != NULL)
+ Free(dnp->rname);
+ if (dnp->miscname != NULL)
+ Free(dnp->miscname);
+ meta_free_unit(dnp);
+ Free(dnp);
+ }
+ if ((*np)->cname != NULL)
+ Free((*np)->cname);
+ if ((*np)->bname != NULL)
+ Free((*np)->bname);
+ if ((*np)->rname != NULL)
+ Free((*np)->rname);
+ if ((*np)->devicesname != NULL)
+ Free((*np)->devicesname);
+ Free(*np);
+ *np = NULL;
+}
+
+/*
+ * flush the fast name cache
+ */
+static void
+metaflushfastnames()
+{
+ mdnamelist_t *p, *n;
+
+ for (p = fastnmlp, n = NULL; (p != NULL); p = n) {
+ n = p->next;
+ metafreefastnm(&p->namep);
+ Free(p);
+ }
+ fastnmlp = NULL;
+}
+
+static char *
+getrname_fast(char *unm, md_error_t *ep)
+{
+ uint_t d = 0;
+ int l = 0;
+ int cl = strlen(unm);
+ char onmb[BUFSIZ+1], snm[BUFSIZ+1], cnmb[BUFSIZ];
+ char *rnm;
+ char *p;
+ size_t len;
+
+ if (is_metaname(unm)) {
+ /* without set */
+ if (sscanf(unm, "d%u%n", &d, &l) == 1 && cl == l) {
+ rnm = Zalloc(14 + cl + 1);
+ (void) sprintf(rnm, "/dev/md/rdsk/d%u", d);
+ return (rnm);
+ }
+
+ /* fully-qualified without set */
+ if ((sscanf(unm, "/dev/md/dsk/d%u%n", &d, &l) == 1 ||
+ sscanf(unm, "/dev/md/rdsk/d%u%n", &d, &l) == 1) &&
+ cl == l) {
+ rnm = Zalloc(14 + cl + 1);
+ (void) sprintf(rnm, "/dev/md/rdsk/d%u", d);
+ return (rnm);
+ }
+
+ /* with set */
+ if ((sscanf(unm,
+ "%" VAL2STR(BUFSIZ) "[^/]/d%u%n", snm, &d, &l) == 2 ||
+ sscanf(unm, "/dev/md/%" VAL2STR(BUFSIZ) "[^/]/dsk/d%u%n",
+ snm, &d, &l) == 2 ||
+ sscanf(unm, "/dev/md/%" VAL2STR(BUFSIZ) "[^/]/rdsk/d%u%n",
+ snm, &d, &l) == 2) && cl == l) {
+ len = 14 + cl + strlen(snm) + 1;
+ rnm = Zalloc(len);
+ (void) snprintf(rnm, len, "/dev/md/%s/rdsk/d%u",
+ snm, d);
+ return (rnm);
+ }
+ }
+
+ /* NOT Fully qualified path, done */
+ if (unm[0] != '/') {
+ (void) mdsyserror(ep, EINVAL, unm);
+ return (NULL);
+ }
+
+ /*
+ * Get slice information from old style names of the form
+ * /dev/rXXNN[a-h] or /dev/XXNN[a-h], must be done before regular
+ * devices, but after metadevices.
+ */
+ if ((sscanf(unm, "/dev/r%" VAL2STR(BUFSIZ) "[^0-9/]%u"
+ "%" VAL2STR(BUFSIZ) "[a-h]%n",
+ onmb, &d, snm, &l) == 3 ||
+ sscanf(unm, "/dev/%" VAL2STR(BUFSIZ) "[^0-9/]%u"
+ "%" VAL2STR(BUFSIZ) "[a-h]%n",
+ onmb, &d, snm, &l) == 3) && l == cl) {
+ if ((p = strchr("abcdefgh", snm[0])) != NULL) {
+ (void) snprintf(cnmb, sizeof (cnmb), "/dev/r%s%u%s",
+ onmb, d, snm);
+ return (Strdup(cnmb));
+ }
+ }
+
+ if ((p = strstr(unm, "/dsk/")) != NULL) { /* /.../dsk/... */
+ ++p;
+ rnm = Zalloc(strlen(unm) + 1 + 1);
+ (void) strncpy(rnm, unm, (p - unm));
+ rnm[(p - unm)] = 'r';
+ (void) strcpy(&rnm[(p - unm) + 1], p);
+ return (rnm);
+ } else if (strstr(unm, "/rdsk/") != NULL) { /* /.../rdsk/... */
+ return (Strdup(unm));
+ }
+
+ /*
+ * Shouldn't get here but if we do then we have an unrecognized
+ * fully qualified path - error
+ */
+ (void) mdsyserror(ep, EINVAL, unm);
+ return (NULL);
+}
+
+static mdname_t *
+metainitfastname(
+ mdsetname_t *sp,
+ char *uname,
+ md_error_t *ep
+)
+{
+ uint_t c = 0, t = 0, d = 0, s = 0;
+ int l = 0;
+ mddrivename_t *dnp;
+ mdname_t *np;
+ mdnamelist_t **fnlpp;
+
+ for (fnlpp = &fastnmlp; (*fnlpp != NULL); fnlpp = &(*fnlpp)->next) {
+ np = (*fnlpp)->namep;
+
+ if (strcmp(np->bname, uname) == 0)
+ return (np);
+ }
+
+ *fnlpp = Zalloc(sizeof (**fnlpp));
+ np = (*fnlpp)->namep = Zalloc(sizeof (mdname_t));
+ metainitname(np);
+ dnp = np->drivenamep = Zalloc(sizeof (mddrivename_t));
+ metainitdrivename(dnp);
+
+
+ /* Metadevices */
+ if (is_metaname(uname)) {
+ char *p;
+ size_t len;
+
+ if ((p = strrchr(uname, '/')) != NULL)
+ ++p;
+ else
+ p = uname;
+
+ if (metaislocalset(sp)) {
+ if (np->cname)
+ Free(np->cname);
+ np->cname = Strdup(p);
+ } else {
+ if (np->cname)
+ Free(np->cname);
+ len = strlen(sp->setname) + 1 + strlen(p) + 1;
+ np->cname = Zalloc(len);
+ (void) snprintf(np->cname, len, "%s/%s",
+ sp->setname, p);
+ }
+ dnp->type = MDT_FAST_META;
+ goto done;
+ }
+
+ /* Others */
+ dnp->type = MDT_FAST_COMP;
+
+ if (((sscanf(uname, "/dev/rdsk/c%ut%ud%us%u%n", &c, &t, &d,
+ &s, &l) == 4 ||
+ sscanf(uname, "/dev/dsk/c%ut%ud%us%u%n", &c, &t, &d,
+ &s, &l) == 4 ||
+ sscanf(uname, "/dev/ap/rdsk/mc%ut%ud%us%u%n", &c, &t, &d,
+ &s, &l) == 4 ||
+ sscanf(uname, "/dev/ap/dsk/mc%ut%ud%us%u%n", &c, &t, &d,
+ &s, &l) == 4 ||
+ sscanf(uname, "/dev/did/rdsk/d%us%u%n", &t, &s, &l) == 2 ||
+ sscanf(uname, "/dev/did/dsk/d%us%u%n", &t, &s, &l) == 2||
+ sscanf(uname, "/dev/rdsk/c%ud%us%u%n", &c, &d, &s, &l) == 3 ||
+ sscanf(uname, "/dev/dsk/c%ud%us%u%n", &c, &d, &s, &l) == 3 ||
+ sscanf(uname, "/dev/rdsk/c%ut%ud%u%n", &c, &t, &d, &l) == 3 ||
+ sscanf(uname, "/dev/dsk/c%ut%ud%u%n", &c, &t, &d, &l) == 3 ||
+ sscanf(uname, "/dev/ap/rdsk/mc%ut%ud%u%n", &c, &t, &d, &l) == 3 ||
+ sscanf(uname, "/dev/ap/dsk/mc%ut%ud%u%n", &c, &t, &d, &l) == 3 ||
+ sscanf(uname, "/dev/did/rdsk/d%u%n", &t, &l) == 1 ||
+ sscanf(uname, "/dev/did/dsk/d%u%n", &t, &l) == 1 ||
+ sscanf(uname, "/dev/rdsk/c%ud%u%n", &c, &d, &l) == 2 ||
+ sscanf(uname, "/dev/dsk/c%ud%u%n", &c, &d, &l) == 2) &&
+ l == strlen(uname))) {
+ if ((np->cname = strrchr(uname, '/')) == NULL)
+ np->cname = Strdup(uname);
+ else
+ np->cname = Strdup(++np->cname);
+ } else {
+ np->cname = Strdup(uname);
+ }
+
+done:
+ /* Driver always gives us block names */
+ np->bname = Strdup(uname);
+
+ /* canonical disk name */
+ if ((dnp->cname = metadiskname(np->cname)) == NULL)
+ dnp->cname = Strdup(np->cname);
+
+ if ((np->rname = getrname_fast(uname, ep)) != NULL) {
+ if ((dnp->rname = metadiskname(np->rname)) == NULL)
+ dnp->rname = Strdup(np->rname);
+ } else {
+ metafreefastnm(&(*fnlpp)->namep);
+ Free(*fnlpp);
+ *fnlpp = NULL;
+ return (NULL);
+ }
+
+ /* cleanup, return success */
+ return (np);
+}
+
+/*
+ * set up names for a device
+ */
+static mdname_t *
+metaname_common(
+ mdsetname_t **spp,
+ char *uname,
+ int fast,
+ md_error_t *ep
+)
+{
+ mddrivenamelist_t **tail;
+ mddrivename_t *dnp;
+ uint_t slice;
+ mdname_t *np;
+ char *rname = NULL;
+ char *dname = NULL;
+ char *cname = NULL;
+ uint_t nparts, partno;
+
+ assert(uname != NULL);
+
+ /* check setname */
+ if ((cname = meta_name_getname(spp, uname, ep)) == NULL)
+ return (NULL);
+
+ assert(*spp != NULL);
+ Free(cname);
+
+ /* get raw name (rname) of the slice and drive (dname) we have */
+ if ((rname = getrawnames(spp, uname, &dname, ep)) == NULL) {
+ return (NULL);
+ }
+
+ /* look in cache first */
+ for (tail = &drivelistp; (*tail != NULL); tail = &(*tail)->next) {
+ dnp = (*tail)->drivenamep;
+
+ /* check to see if the drive name is already in the cache */
+ if ((dnp->rname != NULL) && strcmp(dnp->rname, dname) == 0) {
+
+ Free(rname);
+ if (dname != NULL)
+ Free(dname);
+
+ if (uname2sliceno(uname, &partno, ep) < 0)
+ return (NULL);
+
+ return (metaslicename(dnp, partno, ep));
+ }
+ }
+
+ /*
+ * If a fast names is OK, then get one, and be done.
+ */
+ if (fast) {
+ Free(rname);
+ if (dname != NULL)
+ Free(dname);
+
+ return (metainitfastname(*spp, uname, ep));
+ }
+
+ /* allocate new list element and drive */
+ *tail = Zalloc(sizeof (**tail));
+ dnp = (*tail)->drivenamep = Zalloc(sizeof (*dnp));
+
+ metainitdrivename(dnp);
+
+ /* get parts info */
+ if (getparts(dnp, rname, dname, &nparts, &partno, ep) != 0)
+ goto out;
+
+ /*
+ * libmeta needs at least V_NUMPAR partitions.
+ * If we have an EFI partition with less than V_NUMPAR slices,
+ * we nevertheless reserve space for V_NUMPAR
+ */
+ if (nparts < V_NUMPAR) {
+ nparts = V_NUMPAR;
+ }
+
+ /* allocate and link in parts */
+ dnp->parts.parts_len = nparts;
+ dnp->parts.parts_val = Zalloc((sizeof (*dnp->parts.parts_val)) *
+ dnp->parts.parts_len);
+ for (slice = 0; (slice < nparts); ++slice) {
+ np = &dnp->parts.parts_val[slice];
+ metainitname(np);
+ np->drivenamep = dnp;
+ }
+
+ /* setup name_t (or slice) wanted */
+ if ((np = setup_slice(*spp, dnp, uname, rname, dname, partno, ep))
+ == NULL)
+ goto out;
+
+ /* canonical disk name */
+ if ((dnp->cname = metadiskname(np->cname)) == NULL)
+ dnp->cname = Strdup(np->cname);
+ if ((dnp->rname = metadiskname(np->rname)) == NULL)
+ dnp->rname = Strdup(np->rname);
+
+ /* cleanup, return success */
+ if (dname != NULL)
+ Free(dname);
+ Free(rname);
+ return (np);
+
+ /* cleanup, return error */
+out:
+ if (dname != NULL)
+ Free(dname);
+ if (rname != NULL)
+ Free(rname);
+
+ metafreedrivename(dnp);
+ Free(dnp);
+ Free(*tail);
+ *tail = NULL;
+ return (NULL);
+}
+
+mdname_t *
+metaname(
+ mdsetname_t **spp,
+ char *uname,
+ md_error_t *ep
+)
+{
+ return (metaname_common(spp, uname, 0, ep));
+}
+
+mdname_t *
+metaname_fast(
+ mdsetname_t **spp,
+ char *uname,
+ md_error_t *ep
+)
+{
+ return (metaname_common(spp, uname, 1, ep));
+}
+
+/*
+ * set up names for a drive
+ */
+mddrivename_t *
+metadrivename(
+ mdsetname_t **spp,
+ char *uname,
+ md_error_t *ep
+)
+{
+ char *slicename;
+ mdname_t *np;
+
+ char *cname;
+ mddrivenamelist_t **tail;
+ mddrivename_t *dnp;
+ char *dname;
+ int i;
+ int mplen;
+ size_t len;
+
+ /* check setname, get comparison name */
+ assert(uname != NULL);
+ if ((cname = meta_name_getname(spp, uname, ep)) == NULL) {
+ (void) mdsyserror(ep, ENOENT, uname);
+ return (NULL);
+ }
+
+ assert(*spp != NULL);
+
+ if ((dname = metadiskname(cname)) == NULL) {
+ (void) mdsyserror(ep, ENOENT, cname);
+ Free(cname);
+ return (NULL);
+ }
+
+ /* look in cache first */
+ for (tail = &drivelistp; (*tail != NULL); tail = &(*tail)->next) {
+ dnp = (*tail)->drivenamep;
+ if ((dnp->cname != NULL &&
+ (strcmp(dnp->cname, dname) == 0)) ||
+ (dnp->rname != NULL &&
+ (strcmp(dnp->rname, dname) == 0))) {
+ Free(cname);
+ Free(dname);
+ return (dnp);
+ }
+ }
+
+ /* Check each possible slice name based on MD_MAX_PARTS. */
+
+ /*
+ * Figure out how much string space to reserve to fit
+ * (MD_MAX_PARTS - 1) into the name string; the loop will
+ * increment the mplen counter once for each decimal digit in
+ * (MD_MAX_PARTS - 1).
+ */
+ for (i = MD_MAX_PARTS - 1, mplen = 0; i; i /= 10, ++mplen);
+ len = strlen(uname) + mplen + 2;
+ slicename = Malloc(len);
+
+ /* Check for each slice in turn until we find one */
+ for (np = NULL, i = 0; ((np == NULL) && (i < MD_MAX_PARTS)); ++i) {
+ (void) snprintf(slicename, len, "%ss%d", uname, i);
+ np = metaname(spp, slicename, ep);
+ }
+ Free(slicename);
+
+ if (np == NULL) {
+ char *dname;
+
+ if ((mdissyserror(ep, ENOENT)) &&
+ ((dname = metadiskname(uname)) != NULL)) {
+ Free(dname);
+ (void) mderror(ep, MDE_NOT_DRIVENAME, uname);
+ }
+ return (NULL);
+ }
+ return (np->drivenamep);
+}
+
+/*
+ * FUNCTION: metaslicename()
+ * INPUT: dnp - the drivename structure
+ * sliceno - the slice on the drive to return
+ * OUTPUT: ep - return error pointer
+ * RETURNS: mdname_t- pointer the the slice name structure
+ * PURPOSE: interface to the parts struct in the drive name struct
+ * Since there is no guarantee that the slice name
+ * structures are populated users should call this
+ * function rather than accessing the structure directly
+ * since it will populate the structure values if they
+ * haven't already been populated before returning.
+ */
+mdname_t *
+metaslicename(
+ mddrivename_t *dnp,
+ uint_t sliceno,
+ md_error_t *ep
+)
+{
+ mdsetname_t *sp = NULL;
+ char *namep = NULL;
+ mdname_t *np;
+
+ assert(dnp->type != MDT_FAST_COMP && dnp->type != MDT_FAST_META);
+
+ if (sliceno >= dnp->parts.parts_len) {
+ (void) mderror(ep, MDE_NOSLICE, dnp->cname);
+ return (NULL);
+ }
+
+ np = &dnp->parts.parts_val[sliceno];
+
+ /* check to see if the struct is already populated */
+ if (np->cname) {
+ return (np);
+ }
+
+ if ((namep = meta_name_getname(&sp, dnp->cname, ep)) == NULL)
+ return (NULL);
+
+ np = setup_slice(sp, dnp, NULL, NULL, dnp->rname, sliceno, ep);
+
+ Free(namep);
+
+ return (np);
+}
+
+/*
+ * set up metadevice name from id
+ */
+mdname_t *
+metamnumname(
+ mdsetname_t **spp,
+ minor_t mnum,
+ int fast,
+ md_error_t *ep
+)
+{
+ set_t setno = MD_MIN2SET(mnum);
+ mdsetname_t *sp = NULL;
+ char *uname;
+ mdname_t *np;
+ size_t len;
+
+ /* check set first */
+ if (spp == NULL)
+ spp = &sp;
+ if (chksetno(spp, setno, ep) != 0)
+ return (NULL);
+ assert(*spp != NULL);
+ sp = *spp;
+
+ /* build corresponding device name */
+ if (metaislocalset(sp)) {
+ uname = Malloc(20);
+ (void) sprintf(uname, "d%lu", MD_MIN2UNIT(mnum));
+ } else {
+ len = strlen(sp->setname) + 1 + 20;
+ uname = Malloc(len);
+ (void) snprintf(uname, len, "%s/d%lu", sp->setname,
+ MD_MIN2UNIT(mnum));
+ }
+
+ /* setup name */
+ if (fast) {
+ np = metaname_fast(spp, uname, ep);
+ np->dev = metamakedev(mnum);
+ } else
+ np = metaname(spp, uname, ep);
+
+ Free(uname);
+ return (np);
+}
+
+/*
+ * return metadevice name
+ */
+char *
+get_mdname(
+ minor_t mnum
+)
+{
+ mdname_t *np;
+ md_error_t status = mdnullerror;
+
+ /* get name */
+ if ((np = metamnumname(NULL, mnum, 0, &status)) == NULL) {
+ mdclrerror(&status);
+ return (NULL);
+ }
+ assert(meta_getminor(np->dev) == mnum);
+
+ /* return name */
+ return (np->cname);
+}
+
+/*
+ * check for device type
+ */
+int
+metaismeta(
+ mdname_t *np
+)
+{
+ return (np->drivenamep->type == MDT_META ||
+ np->drivenamep->type == MDT_FAST_META);
+}
+
+int
+metachkmeta(
+ mdname_t *np,
+ md_error_t *ep
+)
+{
+ if (! metaismeta(np)) {
+ return (mddeverror(ep, MDE_NOT_META, np->dev,
+ np->cname));
+ }
+ return (0);
+}
+
+int
+metachkdisk(
+ mdname_t *np,
+ md_error_t *ep
+)
+{
+ mddrivename_t *dnp = np->drivenamep;
+
+ assert(dnp->type != MDT_FAST_COMP && dnp->type != MDT_FAST_META);
+
+ if ((! metaismeta(np)) && (dnp->type != MDT_COMP)) {
+ switch (dnp->type) {
+ case MDT_ACCES:
+ case MDT_UNKNOWN:
+ return (mdsyserror(ep, dnp->errnum, np->bname));
+ default:
+ assert(0);
+ return (mddeverror(ep, MDE_NOT_DISK, np->dev,
+ np->cname));
+ }
+ }
+ return (0);
+}
+
+int
+metachkcomp(
+ mdname_t *np,
+ md_error_t *ep
+)
+{
+ if (metaismeta(np)) {
+ return (mddeverror(ep, MDE_IS_META, np->dev,
+ np->cname));
+ }
+ return (metachkdisk(np, ep));
+}
+
+/*
+ * free list of names
+ */
+void
+metafreenamelist(
+ mdnamelist_t *nlp
+)
+{
+ mdnamelist_t *next = NULL;
+
+ for (/* void */; (nlp != NULL); nlp = next) {
+ next = nlp->next;
+ Free(nlp);
+ }
+}
+
+/*
+ * build list of names
+ */
+int
+metanamelist(
+ mdsetname_t **spp,
+ mdnamelist_t **nlpp,
+ int argc,
+ char *argv[],
+ md_error_t *ep
+)
+{
+ mdnamelist_t **tailpp = nlpp;
+ int count = 0;
+
+ for (*nlpp = NULL; (argc > 0); ++count, --argc, ++argv) {
+ mdnamelist_t *nlp = Zalloc(sizeof (*nlp));
+
+ if ((nlp->namep = metaname(spp, argv[0], ep)) == NULL) {
+ metafreenamelist(*nlpp);
+ *nlpp = NULL;
+ return (-1);
+ }
+ *tailpp = nlp;
+ tailpp = &nlp->next;
+ }
+ return (count);
+}
+
+/*
+ * append to end of name list
+ */
+mdname_t *
+metanamelist_append(
+ mdnamelist_t **nlpp,
+ mdname_t *np
+)
+{
+ mdnamelist_t *nlp;
+
+ /* run to end of list */
+ for (; (*nlpp != NULL); nlpp = &(*nlpp)->next)
+ ;
+
+ /* allocate new list element */
+ nlp = *nlpp = Zalloc(sizeof (*nlp));
+
+ /* append name */
+ nlp->namep = np;
+ return (np);
+}
+
+/*
+ * FUNCTION: meta_namelist_append_wrapper()
+ * INPUT: tailpp - pointer to the list tail pointer
+ * np - name node to be appended to list
+ * OUTPUT: none
+ * RETURNS: mdnamelist_t * - new tail of the list.
+ * PURPOSE: wrapper to meta_namelist_append for performance.
+ * metanamelist_append finds the tail each time which slows
+ * down long lists. By keeping track of the tail ourselves
+ * we can change metanamelist_append into a constant time
+ * operation.
+ */
+mdnamelist_t **
+meta_namelist_append_wrapper(
+ mdnamelist_t **tailpp,
+ mdname_t *np
+)
+{
+ (void) metanamelist_append(tailpp, np);
+
+ /* If it's the first item in the list, return it instead of the next */
+ if ((*tailpp)->next == NULL)
+ return (tailpp);
+
+ return (&(*tailpp)->next);
+}
+
+
+/*
+ * mdhspname_t stuff
+ */
+
+/*
+ * initialize hspname
+ */
+static void
+metainithspname(
+ mdhspname_t *hspnamep
+)
+{
+ (void) memset(hspnamep, '\0', sizeof (*hspnamep));
+ hspnamep->hsp = MD_HSP_NONE;
+}
+
+/*
+ * free allocated hspname
+ */
+static void
+metafreehspname(
+ mdhspname_t *hspnamep
+)
+{
+ if (hspnamep->hspname != NULL)
+ Free(hspnamep->hspname);
+ if (hspnamep->unitp != NULL)
+ meta_invalidate_hsp(hspnamep);
+ metainithspname(hspnamep);
+}
+
+/*
+ * clear the hspname cache
+ */
+static void
+metaflushhspnames()
+{
+ mdhspnamelist_t *p, *n;
+
+ for (p = hsplistp, n = NULL; (p != NULL); p = n) {
+ n = p->next;
+ metafreehspname(p->hspnamep);
+ Free(p->hspnamep);
+ Free(p);
+ }
+ hsplistp = NULL;
+}
+
+/*
+ * check set and get comparison name
+ */
+static char *
+gethspname(
+ mdsetname_t **spp,
+ char *uname,
+ hsp_t *hspp,
+ md_error_t *ep
+)
+{
+ char *sname = NULL;
+
+ /* check setname */
+ assert(uname != NULL);
+ if (parse_hsp(uname, &sname, hspp) != 0) {
+ (void) mdsyserror(ep, ENOENT, uname);
+ return (NULL);
+ }
+ if (chksetname(spp, sname, ep) != 0) {
+ if (sname != NULL)
+ Free(sname);
+ return (NULL);
+ }
+ if (sname != NULL)
+ Free(sname);
+
+ /* return comparison name */
+ return (canon_hsp((*spp)->setname, *hspp));
+}
+
+/*
+ * set up names for a hotspare pool
+ */
+mdhspname_t *
+metahspname(
+ mdsetname_t **spp,
+ char *uname,
+ md_error_t *ep
+)
+{
+ char *cname;
+ hsp_t hsp;
+ mdhspnamelist_t **tail;
+ mdhspname_t *hspnp;
+
+ /* check setname */
+ assert(uname != NULL);
+ if ((cname = gethspname(spp, uname, &hsp, ep)) == NULL)
+ return (NULL);
+ assert(*spp != NULL);
+
+ /* look in cache first */
+ for (tail = &hsplistp; (*tail != NULL); tail = &(*tail)->next) {
+ hspnp = (*tail)->hspnamep;
+ if (strcmp(hspnp->hspname, cname) == 0) {
+ Free(cname);
+ return (hspnp);
+ }
+ }
+
+ /* allocate new list element and hspname */
+ *tail = Zalloc(sizeof (**tail));
+ hspnp = (*tail)->hspnamep = Zalloc(sizeof (*hspnp));
+ metainithspname(hspnp);
+
+ /* save hspname and number */
+ hspnp->hspname = cname;
+ hspnp->hsp = MAKE_HSP_ID((*spp)->setno, hsp);
+
+ /* success */
+ return (hspnp);
+
+ /* cleanup, return error */
+out:
+ metafreehspname(hspnp);
+ Free(hspnp);
+ Free(*tail);
+ *tail = NULL;
+ return (NULL);
+
+
+}
+
+/*
+ * set up hotspare pool name from id
+ */
+mdhspname_t *
+metahsphspname(
+ mdsetname_t **spp,
+ hsp_t hsp,
+ md_error_t *ep
+)
+{
+ set_t setno = HSP_SET(hsp);
+ mdsetname_t *sp = NULL;
+ char *uname;
+ mdhspname_t *hspnp;
+ size_t len;
+
+ /* check set first */
+ if (spp == NULL)
+ spp = &sp;
+ if (chksetno(spp, setno, ep) != 0)
+ return (NULL);
+ assert(*spp != NULL);
+ sp = *spp;
+
+ /* build corresponding hotspare pool name */
+ if (metaislocalset(sp)) {
+ uname = Malloc(20);
+ (void) sprintf(uname, "hsp%03u", HSP_ID(hsp));
+ } else {
+ len = strlen(sp->setname) + 1 + 20;
+ uname = Malloc(len);
+ (void) snprintf(uname, len, "%s/hsp%03lu", sp->setname,
+ HSP_ID(hsp));
+ }
+
+ /* setup name */
+ hspnp = metahspname(spp, uname, ep);
+ Free(uname);
+ return (hspnp);
+}
+
+/*
+ * return hotspare pool name
+ */
+char *
+get_hspname(hsp_t hsp)
+{
+ mdhspname_t *hspnp;
+ md_error_t status = mdnullerror;
+
+ /* get name */
+ if ((hspnp = metahsphspname(NULL, hsp, &status)) == NULL) {
+ mdclrerror(&status);
+ return (NULL);
+ }
+
+ /* return name */
+ return (hspnp->hspname);
+}
+
+/*
+ * free hotspare pool list
+ */
+void
+metafreehspnamelist(mdhspnamelist_t *hspnlp)
+{
+ mdhspnamelist_t *next = NULL;
+
+ for (/* void */; (hspnlp != NULL); hspnlp = next) {
+ next = hspnlp->next;
+ Free(hspnlp);
+ }
+}
+
+/*
+ * build list of hotspare pool names
+ */
+int
+metahspnamelist(
+ mdsetname_t **spp,
+ mdhspnamelist_t **hspnlpp,
+ int argc,
+ char *argv[],
+ md_error_t *ep
+)
+{
+ mdhspnamelist_t **tailpp = hspnlpp;
+ int count = 0;
+
+ for (*hspnlpp = NULL; (argc > 0); ++count, --argc, ++argv) {
+ mdhspnamelist_t *hspnlp = Zalloc(sizeof (*hspnlp));
+
+ if ((hspnlp->hspnamep = metahspname(spp, argv[0],
+ ep)) == NULL) {
+ metafreehspnamelist(*hspnlpp);
+ *hspnlpp = NULL;
+ return (-1);
+ }
+ *tailpp = hspnlp;
+ tailpp = &hspnlp->next;
+ }
+ return (count);
+}
+
+/*
+ * append to end of hotspare pool list
+ */
+mdhspname_t *
+metahspnamelist_append(mdhspnamelist_t **hspnlpp, mdhspname_t *hspnp)
+{
+ mdhspnamelist_t *hspnlp;
+
+ /* run to end of list */
+ for (; (*hspnlpp != NULL); hspnlpp = &(*hspnlpp)->next)
+ ;
+
+ /* allocate new list element */
+ hspnlp = *hspnlpp = Zalloc(sizeof (*hspnlp));
+
+ /* append hotspare pool name */
+ hspnlp->hspnamep = hspnp;
+ return (hspnp);
+}
+
+/*
+ * get name from dev
+ */
+mdname_t *
+metadevname(
+ mdsetname_t **spp,
+ md_dev64_t dev,
+ md_error_t *ep)
+{
+ char *device_name;
+ mdname_t *namep;
+ mdkey_t key;
+
+ /* short circuit metadevices */
+ assert(dev != NODEV64);
+ if (meta_dev_ismeta(dev))
+ return (metamnumname(spp, meta_getminor(dev), 0, ep));
+
+ /* create local set, if necessary */
+ if (*spp == NULL) {
+ if ((*spp = metasetname(MD_LOCAL_NAME, ep)) == NULL)
+ return (NULL);
+ }
+
+ /* get name from namespace */
+ if ((device_name = meta_getnmentbydev((*spp)->setno, MD_SIDEWILD,
+ dev, NULL, NULL, &key, ep)) == NULL) {
+ return (NULL);
+ }
+ namep = metaname_fast(spp, device_name, ep);
+ if (namep != NULL)
+ namep->key = key;
+
+ Free(device_name);
+ return (namep);
+}
+
+/*
+ * return cached name from md_dev64_t
+ */
+static char *
+metadevtocachename(md_dev64_t dev)
+{
+ mddrivenamelist_t *dnlp;
+
+ /* look in cache */
+ for (dnlp = drivelistp; (dnlp != NULL); dnlp = dnlp->next) {
+ mddrivename_t *dnp = dnlp->drivenamep;
+ uint_t i;
+
+ for (i = 0; (i < dnp->parts.parts_len); ++i) {
+ mdname_t *np = &dnp->parts.parts_val[i];
+
+ if (np->dev == dev)
+ return (np->cname);
+ }
+ }
+
+ /* not found */
+ return (NULL);
+}
+
+/*
+ * Ask the driver for the name, which has been stored in the
+ * metadevice state database (on behalf of the utilities).
+ * (by devno)
+ */
+char *
+get_devname(
+ set_t setno,
+ md_dev64_t dev)
+{
+ mdsetname_t *sp;
+ mdname_t *np;
+ md_error_t status = mdnullerror;
+
+ /* get name */
+ if ((setno == MD_SET_BAD) ||
+ ((sp = metasetnosetname(setno, &status)) == NULL) ||
+ ((np = metadevname(&sp, dev, &status)) == NULL)) {
+ mdclrerror(&status);
+ return (metadevtocachename(dev));
+ }
+
+ /* return name */
+ return (np->cname);
+}
+
+/*
+ * get name from key
+ */
+mdname_t *
+metakeyname(
+ mdsetname_t **spp,
+ mdkey_t key,
+ int fast,
+ md_error_t *ep
+)
+{
+ char *device_name;
+ md_dev64_t dev = NODEV64;
+ mdname_t *namep;
+
+ /* create local set, if necessary */
+ if (*spp == NULL) {
+ if ((*spp = metasetname(MD_LOCAL_NAME, ep)) == NULL)
+ return (NULL);
+ }
+
+ /* get name from namespace */
+ if ((device_name = meta_getnmentbykey((*spp)->setno, MD_SIDEWILD,
+ key, NULL, NULL, &dev, ep)) == NULL) {
+ return (NULL);
+ }
+ if (fast)
+ namep = metaname_fast(spp, device_name, ep);
+ else
+ namep = metaname(spp, device_name, ep);
+
+ assert(dev != NODEV64);
+ if (namep)
+ namep->dev = dev;
+ Free(device_name);
+ return (namep);
+}
+
+/*
+ * completely flush the caches
+ */
+void
+metaflushnames(int flush_sr_cache)
+{
+ metaflushhspnames();
+ metaflushdrivenames();
+ metaflushsetnames();
+ metaflushctlrcache();
+ metaflushfastnames();
+ metaflushstatcache();
+ if (flush_sr_cache)
+ sr_cache_flush(0);
+}
+
+/*
+ * meta_get_hotspare_names
+ * returns an mdnamelist_t of hot spare names
+ */
+
+int
+meta_get_hotspare_names(
+ mdsetname_t *sp,
+ mdnamelist_t **nlpp,
+ int options,
+ md_error_t *ep
+)
+{
+ mdhspnamelist_t *hspnlp = NULL;
+ mdhspnamelist_t *hspp;
+ int cnt = 0;
+
+ assert(nlpp != NULL);
+
+ /* get hotspare names */
+ if (meta_get_hsp_names(sp, &hspnlp, options, ep) < 0) {
+ cnt = -1;
+ goto out;
+ }
+
+ /* build name list */
+ for (hspp = hspnlp; (hspp != NULL); hspp = hspp->next) {
+ md_hsp_t *hsp;
+ int i;
+
+ if ((hsp = meta_get_hsp(sp, hspp->hspnamep, ep)) == NULL) {
+ cnt = -1;
+ goto out;
+ }
+ for (i = 0; (i < hsp->hotspares.hotspares_len); i++) {
+ md_hs_t *hs = &hsp->hotspares.hotspares_val[i];
+
+ (void) metanamelist_append(nlpp, hs->hsnamep);
+ ++cnt;
+ }
+ }
+
+ /* cleanup and return count or error */
+out:
+ metafreehspnamelist(hspnlp);
+ if ((cnt == -1) && mdisok(ep)) {
+ /*
+ * At least try to give some sort of meaningful error
+ */
+ (void) mderror(ep, MDE_NO_HSPS, "Generic Hotspare Error");
+ }
+
+ return (cnt);
+}
+/*
+ * meta_create_non_dup_list
+ * INPUT: mdnp mdname_t pointer to add to the list if a new name
+ * ldevidp list of non-duplicate names.
+ * OUTPUT: ldevidp list of non-duplicate names.
+ * meta_create_non_dup_list will take a mdname_t pointer and if the device
+ * is not in the list (ldevidp) will add it to the list.
+ * User needs to free allocated memory.
+ */
+void
+meta_create_non_dup_list(
+ mdname_t *mdnp,
+ mddevid_t **ldevidpp
+)
+{
+ char *lcname;
+ mddevid_t *tmp;
+ mddevid_t *lastdevidp;
+ mddevid_t *lldevidp;
+ char *ctd, *slice;
+ mddevid_t *ldevidp;
+
+ if (mdnp == NULL)
+ return;
+
+ ldevidp = *ldevidpp;
+ /*
+ * Grab the name of the device and strip off slice information
+ */
+ lcname = Strdup(mdnp->cname);
+ if (lcname == NULL) {
+ return;
+ }
+ ctd = strrchr(lcname, '/');
+ if (ctd != NULL)
+ slice = strrchr(ctd, 's');
+ else
+ slice = strrchr(lcname, 's');
+
+ if (slice != NULL)
+ *slice = '\0';
+
+ if (ldevidp == NULL) {
+ /* first item in list */
+ ldevidp = Zalloc(sizeof (mddevid_t));
+ ldevidp->ctdname = lcname;
+ ldevidp->key = mdnp->key;
+ *ldevidpp = ldevidp;
+ } else {
+ for (tmp = ldevidp; (tmp != NULL); tmp = tmp->next) {
+ if (strcmp(tmp->ctdname, lcname) == 0) {
+ /* already there so just return */
+ Free(lcname);
+ return;
+ }
+ lastdevidp = tmp;
+ }
+ lldevidp = Zalloc(sizeof (mddevid_t));
+ lldevidp->ctdname = lcname;
+ lldevidp->key = mdnp->key;
+ lastdevidp->next = lldevidp;
+ }
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_nameinfo.c b/usr/src/lib/lvm/libmeta/common/meta_nameinfo.c
new file mode 100644
index 0000000000..337b48f98e
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_nameinfo.c
@@ -0,0 +1,1267 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <dlfcn.h>
+#include <meta.h>
+#include <metadyn.h>
+#include <ctype.h>
+#include <dirent.h>
+#include <devid.h>
+#include <sys/param.h>
+#include <sys/scsi/impl/uscsi.h>
+#include <sys/scsi/generic/commands.h>
+#include <sys/scsi/generic/inquiry.h>
+#include <sys/efi_partition.h>
+
+#define MD_EFI_FG_HEADS 128
+#define MD_EFI_FG_SECTORS 256
+#define MD_EFI_FG_RPM 7200
+#define MD_EFI_FG_WRI 1
+#define MD_EFI_FG_RRI 1
+
+
+typedef struct ctlr_cache {
+ char *ctlr_nm;
+ int ctlr_ty;
+ struct ctlr_cache *ctlr_nx;
+} ctlr_cache_t;
+
+static ctlr_cache_t *ctlr_cache = NULL;
+
+
+/*
+ * return set for a device
+ */
+mdsetname_t *
+metagetset(
+ mdname_t *np,
+ int bypass_daemon,
+ md_error_t *ep
+)
+{
+ mdsetname_t *sp;
+
+ /* metadevice */
+ if (metaismeta(np))
+ return (metasetnosetname(MD_MIN2SET(meta_getminor(np->dev)),
+ ep));
+
+ /* regular device */
+ if (meta_is_drive_in_anyset(np->drivenamep, &sp, bypass_daemon,
+ ep) != 0)
+ return (NULL);
+
+ if (sp != NULL)
+ return (sp);
+
+ return (metasetnosetname(MD_LOCAL_SET, ep));
+}
+
+/*
+ * convert system to md types
+ */
+static void
+meta_geom_to_md(
+ struct dk_geom *gp,
+ mdgeom_t *mdgp
+)
+{
+ (void) memset(mdgp, '\0', sizeof (*mdgp));
+ mdgp->ncyl = gp->dkg_ncyl;
+ mdgp->nhead = gp->dkg_nhead;
+ mdgp->nsect = gp->dkg_nsect;
+ mdgp->rpm = gp->dkg_rpm;
+ mdgp->write_reinstruct = gp->dkg_write_reinstruct;
+ mdgp->read_reinstruct = gp->dkg_read_reinstruct;
+ mdgp->blk_sz = DEV_BSIZE;
+}
+
+/*
+ * convert efi to md types
+ */
+static void
+meta_efi_to_mdgeom(struct dk_gpt *gpt, mdgeom_t *mdgp)
+{
+ (void) memset(mdgp, '\0', sizeof (*mdgp));
+ mdgp->ncyl = (gpt->efi_last_u_lba - gpt->efi_first_u_lba) /
+ (MD_EFI_FG_HEADS * MD_EFI_FG_SECTORS);
+ mdgp->nhead = MD_EFI_FG_HEADS;
+ mdgp->nsect = MD_EFI_FG_SECTORS;
+ mdgp->rpm = MD_EFI_FG_RPM;
+ mdgp->write_reinstruct = MD_EFI_FG_WRI;
+ mdgp->read_reinstruct = MD_EFI_FG_RRI;
+ mdgp->blk_sz = DEV_BSIZE;
+}
+
+static void
+meta_efi_to_mdvtoc(struct dk_gpt *gpt, mdvtoc_t *mdvp)
+{
+ char typename[EFI_PART_NAME_LEN];
+ uint_t i;
+
+ (void) memset(mdvp, '\0', sizeof (*mdvp));
+ mdvp->nparts = gpt->efi_nparts;
+ if (mdvp->nparts > MD_MAX_PARTS)
+ return;
+
+ mdvp->first_lba = gpt->efi_first_u_lba;
+ mdvp->last_lba = gpt->efi_last_u_lba;
+ mdvp->lbasize = gpt->efi_lbasize;
+
+ for (i = 0; (i < gpt->efi_nparts); ++i) {
+ mdvp->parts[i].start = gpt->efi_parts[i].p_start;
+ mdvp->parts[i].size = gpt->efi_parts[i].p_size;
+ mdvp->parts[i].tag = gpt->efi_parts[i].p_tag;
+ mdvp->parts[i].flag = gpt->efi_parts[i].p_flag;
+ /*
+ * Due to the lack of a label for the entire partition table,
+ * we use p_name of the reserved partition
+ */
+ if ((gpt->efi_parts[i].p_tag == V_RESERVED) &&
+ (gpt->efi_parts[i].p_name != NULL)) {
+ (void) strlcpy(typename, gpt->efi_parts[i].p_name,
+ EFI_PART_NAME_LEN);
+ /* Stop at first (if any) space or tab */
+ (void) strtok(typename, " \t");
+ mdvp->typename = Strdup(typename);
+ }
+ }
+}
+
+static void
+meta_mdvtoc_to_efi(mdvtoc_t *mdvp, struct dk_gpt **gpt)
+{
+ char typename[EFI_PART_NAME_LEN];
+ uint_t i;
+ uint_t lastpart;
+ size_t size;
+
+ /* first we count how many partitions we have to send */
+ for (i = 0; i < MD_MAX_PARTS; i++) {
+ if ((mdvp->parts[i].start == 0) &&
+ (mdvp->parts[i].size == 0) &&
+ (mdvp->parts[i].tag != V_RESERVED)) {
+ continue;
+ }
+ /* if we are here, we know the partition is really used */
+ lastpart = i;
+ }
+ size = sizeof (struct dk_gpt) + (sizeof (struct dk_part) * lastpart);
+ *gpt = calloc(size, sizeof (char));
+
+ (*gpt)->efi_nparts = lastpart + 1;
+ (*gpt)->efi_first_u_lba = mdvp->first_lba;
+ (*gpt)->efi_last_u_lba = mdvp->last_lba;
+ (*gpt)->efi_lbasize = mdvp->lbasize;
+ for (i = 0; (i < (*gpt)->efi_nparts); ++i) {
+ (*gpt)->efi_parts[i].p_start = mdvp->parts[i].start;
+ (*gpt)->efi_parts[i].p_size = mdvp->parts[i].size;
+ (*gpt)->efi_parts[i].p_tag = mdvp->parts[i].tag;
+ (*gpt)->efi_parts[i].p_flag = mdvp->parts[i].flag;
+ /*
+ * Due to the lack of a label for the entire partition table,
+ * we use p_name of the reserved partition
+ */
+ if (((*gpt)->efi_parts[i].p_tag == V_RESERVED) &&
+ (mdvp->typename != NULL)) {
+ (void) strlcpy((*gpt)->efi_parts[i].p_name, typename,
+ EFI_PART_NAME_LEN);
+ }
+ }
+}
+
+
+void
+ctlr_cache_add(char *nm, int ty)
+{
+ ctlr_cache_t **ccpp;
+
+ for (ccpp = &ctlr_cache; *ccpp != NULL; ccpp = &(*ccpp)->ctlr_nx)
+ if (strcmp((*ccpp)->ctlr_nm, nm) == 0)
+ return;
+
+ *ccpp = Zalloc(sizeof (ctlr_cache_t));
+ (*ccpp)->ctlr_nm = Strdup(nm);
+ (*ccpp)->ctlr_ty = ty;
+}
+
+int
+ctlr_cache_look(char *nm)
+{
+ ctlr_cache_t *tcp;
+
+ for (tcp = ctlr_cache; tcp != NULL; tcp = tcp->ctlr_nx)
+ if (strcmp(tcp->ctlr_nm, nm) == 0)
+ return (tcp->ctlr_ty);
+
+ return (-1);
+}
+
+
+void
+metaflushctlrcache(void)
+{
+ ctlr_cache_t *cp, *np;
+
+ for (cp = ctlr_cache, np = NULL; cp != NULL; cp = np) {
+ np = cp->ctlr_nx;
+ Free(cp->ctlr_nm);
+ Free(cp);
+ }
+ ctlr_cache = NULL;
+}
+
+/*
+ * getdrvnode -- return the driver name based on mdname_t->bname
+ * Need to free pointer when finished.
+ */
+char *
+getdrvnode(mdname_t *np, md_error_t *ep)
+{
+ char *devicespath,
+ *drvnode,
+ *cp;
+
+ if ((devicespath = metagetdevicesname(np, ep)) == NULL)
+ return (NULL);
+
+ /*
+ * At this point devicespath should be like the following
+ * "/devices/<unknow_and_dont_care>/xxxx@vvvv"
+ *
+ * There's a couple of 'if' statements below which could
+ * return an error condition, but I've decide to allow
+ * a more open approach regarding the mapping so as to
+ * not restrict possible future projects.
+ */
+ if (drvnode = strrchr(devicespath, '/'))
+ /*
+ * drvnode now just "xxxx@vvvv"
+ */
+ drvnode++;
+
+ if (cp = strrchr(drvnode, '@'))
+ /*
+ * Now drvnode is just the driver name "xxxx"
+ */
+ *cp = '\0';
+
+ cp = Strdup(drvnode);
+ Free(devicespath);
+ np->devicesname = NULL;
+
+ return (cp);
+}
+
+/*
+ * meta_load_dl -- open dynamic library using LDLIBRARYPATH, a debug
+ * environment variable METALDPATH, or the default location.
+ */
+static void *
+meta_load_dl(mdname_t *np, md_error_t *ep)
+{
+ char *drvnode,
+ newpath[MAXPATHLEN],
+ *p;
+ void *cookie;
+
+ if ((drvnode = getdrvnode(np, ep)) != NULL) {
+
+ /*
+ * Library seach algorithm:
+ * 1) Use LDLIBRARYPATH which is implied when a non-absolute
+ * path name is passed to dlopen()
+ * 2) Use the value of METALDPATH as the directory. Mainly
+ * used for debugging
+ * 3) Last search the default location of "/usr/lib"
+ */
+ (void) snprintf(newpath, sizeof (newpath), "lib%s.so.1",
+ drvnode);
+ if ((cookie = dlopen(newpath, RTLD_LAZY)) == NULL) {
+ if ((p = getenv("METALDPATH")) == NULL)
+ p = METALDPATH_DEFAULT;
+ (void) snprintf(newpath, sizeof (newpath),
+ "%s/lib%s.so.1", p, drvnode);
+ Free(drvnode);
+ if ((cookie = dlopen(newpath, RTLD_LAZY)) != NULL) {
+ /*
+ * Common failure here would be failing to
+ * find a libXX.so.1 such as libsd.so.1
+ * Some controllers will not have a library
+ * because there's no enclosure or name
+ * translation required.
+ */
+ return (cookie);
+ }
+ } else {
+ Free(drvnode);
+ return (cookie);
+ }
+ }
+ return (NULL);
+}
+
+/*
+ * meta_match_names -- possibly convert the driver names returned by CINFO
+ */
+static void
+meta_match_names(mdname_t *np, struct dk_cinfo *cp, mdcinfo_t *mdcp,
+ md_error_t *ep)
+{
+ void *cookie;
+ meta_convert_e ((*fptr)(mdname_t *, struct dk_cinfo *, mdcinfo_t *,
+ md_error_t *));
+
+ if ((cookie = meta_load_dl(np, ep)) != NULL) {
+ fptr = (meta_convert_e (*)(mdname_t *, struct dk_cinfo *,
+ mdcinfo_t *, md_error_t *))dlsym(cookie, "convert_path");
+ if (fptr != NULL)
+ (void) (*fptr)(np, cp, mdcp, ep);
+ (void) dlclose(cookie);
+ }
+}
+
+/*
+ * meta_match_enclosure -- return any enclosure info if found
+ */
+int
+meta_match_enclosure(mdname_t *np, mdcinfo_t *mdcp, md_error_t *ep)
+{
+ meta_enclosure_e e,
+ ((*fptr)(mdname_t *, mdcinfo_t *,
+ md_error_t *));
+ void *cookie;
+
+ if ((cookie = meta_load_dl(np, ep)) != NULL) {
+ fptr = (meta_enclosure_e (*)(mdname_t *, mdcinfo_t *,
+ md_error_t *))dlsym(cookie, "get_enclosure");
+ if (fptr != NULL) {
+ e = (*fptr)(np, mdcp, ep);
+ switch (e) {
+ case Enclosure_Error:
+ /*
+ * Looks like this library wanted to handle
+ * our device and had an internal error.
+ */
+ return (1);
+
+ case Enclosure_Okay:
+ /*
+ * Found a library to handle the request so
+ * just return with data provided.
+ */
+ return (0);
+
+ case Enclosure_Noop:
+ /*
+ * Need to continue the search
+ */
+ break;
+ }
+ }
+ (void) dlclose(cookie);
+ }
+ return (0);
+}
+
+static int
+meta_cinfo_to_md(mdname_t *np, struct dk_cinfo *cp, mdcinfo_t *mdcp,
+ md_error_t *ep)
+{
+ /* default */
+ (void) memset(mdcp, '\0', sizeof (*mdcp));
+ (void) strncpy(mdcp->cname, cp->dki_cname,
+ min((sizeof (mdcp->cname) - 1), sizeof (cp->dki_cname)));
+ mdcp->ctype = MHD_CTLR_GENERIC;
+ mdcp->cnum = cp->dki_cnum;
+ (void) strncpy(mdcp->dname, cp->dki_dname,
+ min((sizeof (mdcp->dname) - 1), sizeof (cp->dki_dname)));
+ mdcp->unit = cp->dki_unit;
+ mdcp->maxtransfer = cp->dki_maxtransfer;
+
+ /*
+ * See if the driver name returned from DKIOCINFO
+ * is valid or not. In somecases, such as the ap_dmd
+ * driver, we need to modify the name that's return
+ * for everything to work.
+ */
+ meta_match_names(np, cp, mdcp, ep);
+
+ if (meta_match_enclosure(np, mdcp, ep))
+ return (-1);
+
+ /* return success */
+ return (0);
+}
+
+static void
+meta_vtoc_to_md(
+ struct vtoc *vp,
+ mdvtoc_t *mdvp
+)
+{
+ char typename[sizeof (vp->v_asciilabel) + 1];
+ uint_t i;
+
+ (void) memset(mdvp, '\0', sizeof (*mdvp));
+ (void) strncpy(typename, vp->v_asciilabel,
+ sizeof (vp->v_asciilabel));
+ typename[sizeof (typename) - 1] = '\0';
+ for (i = 0; ((i < sizeof (typename)) && (typename[i] != '\0')); ++i) {
+ if ((typename[i] == ' ') || (typename[i] == '\t')) {
+ typename[i] = '\0';
+ break;
+ }
+ }
+ mdvp->typename = Strdup(typename);
+ mdvp->nparts = vp->v_nparts;
+ for (i = 0; (i < vp->v_nparts); ++i) {
+ mdvp->parts[i].start = vp->v_part[i].p_start;
+ mdvp->parts[i].size = vp->v_part[i].p_size;
+ mdvp->parts[i].tag = vp->v_part[i].p_tag;
+ mdvp->parts[i].flag = vp->v_part[i].p_flag;
+ if (vp->v_part[i].p_start == 0 && vp->v_part[i].p_size > 0)
+ mdvp->parts[i].label = btodb(DK_LABEL_SIZE);
+ }
+}
+
+/*
+ * free allocations in vtoc
+ */
+void
+metafreevtoc(
+ mdvtoc_t *vtocp
+)
+{
+ if (vtocp->typename != NULL)
+ Free(vtocp->typename);
+ (void) memset(vtocp, 0, sizeof (*vtocp));
+}
+
+/*
+ * return md types
+ */
+mdvtoc_t *
+metagetvtoc(
+ mdname_t *np, /* only rname, drivenamep, are setup */
+ int nocache,
+ uint_t *partnop,
+ md_error_t *ep
+)
+{
+ mddrivename_t *dnp = np->drivenamep;
+ struct dk_geom geom;
+ char *minor_name = NULL;
+ char *rname = np->rname;
+ int fd;
+ int partno;
+ int err = 0; /* saves errno from ioctl */
+ ddi_devid_t devid;
+ char *p;
+
+ /* short circuit */
+ if ((! nocache) && (dnp->vtoc.nparts != 0)) {
+ if (partnop != NULL) {
+ /*
+ * the following assigment works because the
+ * mdname_t structs are always created as part
+ * of the drivenamep struct. When a user
+ * creates an mdname_t struct it either
+ * uses an existing drivenamep struct or creates
+ * a new one and then adds the mdname_t struct
+ * as part of its parts_val array. So what is
+ * being computed below is the slice offset in
+ * the parts_val array.
+ */
+ *partnop = np - np->drivenamep->parts.parts_val;
+ assert(*partnop < dnp->parts.parts_len);
+ }
+ return (&dnp->vtoc);
+ }
+
+ /* can't get vtoc */
+ if (! nocache) {
+ switch (dnp->type) {
+ case MDT_ACCES:
+ case MDT_UNKNOWN:
+ (void) mdsyserror(ep, dnp->errnum, rname);
+ return (NULL);
+ }
+ }
+
+ /* get all the info */
+ if ((fd = open(rname, (O_RDONLY|O_NDELAY), 0)) < 0) {
+ (void) mdsyserror(ep, errno, rname);
+ return (NULL);
+ }
+
+ /*
+ * The disk is open so this is a good point to get the devid
+ * otherwise it will need to be done at another time which
+ * means reopening it.
+ */
+ if (devid_get(fd, &devid) != 0) {
+ /* there is no devid for the disk */
+ if (((p = getenv("MD_DEBUG")) != NULL) &&
+ (strstr(p, "DEVID") != NULL)) {
+ (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
+ "%s has no device id\n"), np->rname);
+ }
+ np->minor_name = (char *)NULL;
+ dnp->devid = NULL;
+ } else {
+ (void) devid_get_minor_name(fd, &minor_name);
+ /*
+ * The minor name could be NULL if the underlying
+ * device driver does not support 'minor names'.
+ * This means we do not use devid's for this device.
+ * SunCluster did driver does not support minor names.
+ */
+ if (minor_name != NULL) {
+ np->minor_name = Strdup(minor_name);
+ devid_str_free(minor_name);
+ dnp->devid = devid_str_encode(devid, NULL);
+ } else {
+ np->minor_name = (char *)NULL;
+ dnp->devid = NULL;
+
+ if (((p = getenv("MD_DEBUG")) != NULL) &&
+ (strstr(p, "DEVID") != NULL)) {
+ (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
+ "%s no minor name (no devid)\n"),
+ np->rname);
+ }
+ }
+ devid_free(devid);
+ }
+
+ /*
+ * if our drivenamep points to a device not supporting DKIOCGGEOM,
+ * it's likely to have an EFI label.
+ */
+ (void) memset(&geom, 0, sizeof (geom));
+ if (ioctl(fd, DKIOCGGEOM, &geom) != 0) {
+ err = errno;
+ if (err == ENOTTY) {
+ (void) mddeverror(ep, MDE_NOT_DISK, NODEV, rname);
+ (void) close(fd);
+ return (NULL);
+ } else if (err != ENOTSUP) {
+ (void) mdsyserror(ep, err, rname);
+ (void) close(fd);
+ return (NULL);
+ }
+
+ }
+ /*
+ * If we are here, there was either no failure on DKIOCGGEOM or
+ * the failure was ENOTSUP
+ */
+ if (err == ENOTSUP) {
+ /* DKIOCGGEOM yielded ENOTSUP => try efi_alloc_and_read */
+ struct dk_gpt *gpt;
+ int save_errno;
+
+ /* this also sets errno */
+ partno = efi_alloc_and_read(fd, &gpt);
+ save_errno = errno;
+ (void) close(fd);
+ if (partno < 0) {
+ efi_free(gpt);
+ (void) mdsyserror(ep, save_errno, rname);
+ return (NULL);
+ }
+ if (partno >= gpt->efi_nparts) {
+ efi_free(gpt);
+ (void) mddeverror(ep, MDE_INVALID_PART, NODEV64,
+ rname);
+ return (NULL);
+ }
+
+ /* convert to our format */
+ metafreevtoc(&dnp->vtoc);
+ meta_efi_to_mdvtoc(gpt, &dnp->vtoc);
+ if (dnp->vtoc.nparts > MD_MAX_PARTS) {
+ (void) mddeverror(ep, MDE_TOO_MANY_PARTS, NODEV64,
+ rname);
+ return (NULL);
+ }
+ /*
+ * libmeta needs at least V_NUMPAR partitions.
+ * If we have an EFI partition with less than V_NUMPAR slices,
+ * we nevertheless reserve space for V_NUMPAR
+ */
+
+ if (dnp->vtoc.nparts < V_NUMPAR) {
+ dnp->vtoc.nparts = V_NUMPAR;
+ }
+ meta_efi_to_mdgeom(gpt, &dnp->geom);
+ efi_free(gpt);
+ } else {
+ /* no error on DKIOCGGEOM, try meta_getvtoc */
+ struct vtoc vtoc;
+
+ if (meta_getvtoc(fd, np->cname, &vtoc, &partno, ep) < 0) {
+ (void) close(fd);
+ return (NULL);
+ }
+ (void) close(fd);
+
+ /* convert to our format */
+ meta_geom_to_md(&geom, &dnp->geom);
+ metafreevtoc(&dnp->vtoc);
+ meta_vtoc_to_md(&vtoc, &dnp->vtoc);
+ }
+
+ /* fix up any drives which are now accessible */
+ if ((nocache) && (dnp->type == MDT_ACCES) &&
+ (dnp->vtoc.nparts == dnp->parts.parts_len)) {
+ dnp->type = MDT_COMP;
+ dnp->errnum = 0;
+ }
+
+ /* save partno */
+ assert(partno < dnp->vtoc.nparts);
+ if (partnop != NULL)
+ *partnop = partno;
+
+ /* return info */
+ return (&dnp->vtoc);
+}
+
+static void
+meta_mdvtoc_to_vtoc(
+ mdvtoc_t *mdvp,
+ struct vtoc *vp
+)
+{
+ uint_t i;
+
+ (void) memset(&vp->v_part, '\0', sizeof (vp->v_part));
+ vp->v_nparts = (ushort_t)mdvp->nparts;
+ for (i = 0; (i < mdvp->nparts); ++i) {
+ vp->v_part[i].p_start = (daddr32_t)mdvp->parts[i].start;
+ vp->v_part[i].p_size = (daddr32_t)mdvp->parts[i].size;
+ vp->v_part[i].p_tag = mdvp->parts[i].tag;
+ vp->v_part[i].p_flag = mdvp->parts[i].flag;
+ }
+}
+
+/*
+ * Set the vtoc, but use the cached copy to get the info from.
+ * We write np->drivenamep->vtoc to disk.
+ * Before we can do this we read the vtoc in.
+ * if we're dealing with a metadevice and this metadevice is a 64 bit device
+ * we can use meta_getmdvtoc/meta_setmdvtoc
+ * else
+ * we use meta_getvtoc/meta_setvtoc but than we first have to convert
+ * dnp->vtoc (actually being a mdvtoc_t) into a vtoc_t
+ */
+int
+metasetvtoc(
+ mdname_t *np,
+ md_error_t *ep
+)
+{
+ char *rname = np->rname;
+ mddrivename_t *dnp = np->drivenamep;
+ int fd;
+ int err;
+ int save_errno;
+ struct dk_geom geom;
+
+ if ((fd = open(rname, (O_RDONLY | O_NDELAY), 0)) < 0)
+ return (mdsyserror(ep, errno, rname));
+
+ err = ioctl(fd, DKIOCGGEOM, &geom);
+ save_errno = errno;
+ if (err == 0) {
+ struct vtoc vtoc;
+
+ if (meta_getvtoc(fd, np->cname, &vtoc, NULL, ep) < 0) {
+ (void) close(fd);
+ return (-1);
+ }
+
+ meta_mdvtoc_to_vtoc(&dnp->vtoc, &vtoc);
+
+ if (meta_setvtoc(fd, np->cname, &vtoc, ep) < 0) {
+ (void) close(fd);
+ return (-1);
+ }
+ } else if (save_errno == ENOTSUP) {
+ struct dk_gpt *gpt;
+ int ret;
+
+ /* allocation of gpt is done in meta_mdvtoc_to_efi */
+ meta_mdvtoc_to_efi(&dnp->vtoc, &gpt);
+
+ ret = efi_write(fd, gpt);
+ save_errno = errno;
+ free(gpt);
+ if (ret != 0) {
+ (void) close(fd);
+ return (mdsyserror(ep, save_errno, rname));
+ } else {
+ (void) close(fd);
+ return (0);
+ }
+
+ } else {
+ (void) close(fd);
+ return (mdsyserror(ep, save_errno, rname));
+ }
+
+ (void) close(fd);
+
+ return (0);
+}
+
+mdgeom_t *
+metagetgeom(
+ mdname_t *np, /* only rname, drivenamep, are setup */
+ md_error_t *ep
+)
+{
+ if (metagetvtoc(np, FALSE, NULL, ep) == NULL)
+ return (NULL);
+ return (&np->drivenamep->geom);
+}
+
+mdcinfo_t *
+metagetcinfo(
+ mdname_t *np, /* only rname, drivenamep, are setup */
+ md_error_t *ep
+)
+{
+ char *rname = np->rname;
+ mddrivename_t *dnp = np->drivenamep;
+ int fd;
+ struct dk_cinfo cinfo;
+
+ /* short circuit */
+ if (dnp->cinfo.cname[0] != '\0')
+ return (&dnp->cinfo);
+
+ /* get controller info */
+ if ((fd = open(rname, (O_RDONLY|O_NDELAY), 0)) < 0) {
+ (void) mdsyserror(ep, errno, rname);
+ return (NULL);
+ }
+ if (ioctl(fd, DKIOCINFO, &cinfo) != 0) {
+ int save = errno;
+
+ (void) close(fd);
+ if (save == ENOTTY) {
+ (void) mddeverror(ep, MDE_NOT_DISK, NODEV64, rname);
+ } else {
+ (void) mdsyserror(ep, save, rname);
+ }
+ return (NULL);
+ }
+ (void) close(fd); /* sd/ssd bug */
+
+ /* convert to our format */
+ if (meta_cinfo_to_md(np, &cinfo, &dnp->cinfo, ep) != 0)
+ return (NULL);
+
+ /* return info */
+ return (&dnp->cinfo);
+}
+
+/*
+ * get partition number
+ */
+int
+metagetpartno(
+ mdname_t *np,
+ md_error_t *ep
+)
+{
+ mdvtoc_t *vtocp;
+ uint_t partno;
+
+ if ((vtocp = metagetvtoc(np, FALSE, &partno, ep)) == NULL)
+ return (-1);
+ assert(partno < vtocp->nparts);
+ return (partno);
+}
+
+/*
+ * get size of device
+ */
+diskaddr_t
+metagetsize(
+ mdname_t *np,
+ md_error_t *ep
+)
+{
+ mdvtoc_t *vtocp;
+ uint_t partno;
+
+ if ((vtocp = metagetvtoc(np, FALSE, &partno, ep)) == NULL)
+ return (MD_DISKADDR_ERROR);
+ assert(partno < vtocp->nparts);
+ return (vtocp->parts[partno].size);
+}
+
+/*
+ * get label of device
+ */
+diskaddr_t
+metagetlabel(
+ mdname_t *np,
+ md_error_t *ep
+)
+{
+ mdvtoc_t *vtocp;
+ uint_t partno;
+
+ if ((vtocp = metagetvtoc(np, FALSE, &partno, ep)) == NULL)
+ return (MD_DISKADDR_ERROR);
+ assert(partno < vtocp->nparts);
+ return (vtocp->parts[partno].label);
+}
+
+/*
+ * find out where database replicas end
+ */
+static int
+mddb_getendblk(
+ mdsetname_t *sp,
+ mdname_t *np,
+ diskaddr_t *endblkp,
+ md_error_t *ep
+)
+{
+ md_replicalist_t *rlp = NULL;
+ md_replicalist_t *rl;
+
+ /* make sure we have a component */
+ *endblkp = 0;
+ if (metaismeta(np))
+ return (0);
+
+ /* get replicas, quit if none */
+ if (metareplicalist(sp, MD_BASICNAME_OK | PRINT_FAST, &rlp, ep) < 0) {
+ if (! mdismddberror(ep, MDE_DB_NODB))
+ return (-1);
+ mdclrerror(ep);
+ return (0);
+ } else if (rlp == NULL)
+ return (0);
+
+ /* go through all the replicas */
+ for (rl = rlp; (rl != NULL); rl = rl->rl_next) {
+ md_replica_t *rp = rl->rl_repp;
+ mdname_t *repnamep = rp->r_namep;
+ diskaddr_t dbend;
+
+ if (np->dev != repnamep->dev)
+ continue;
+ dbend = rp->r_blkno + rp->r_nblk - 1;
+ if (dbend > *endblkp)
+ *endblkp = dbend;
+ }
+
+ /* cleanup, return success */
+ metafreereplicalist(rlp);
+ return (0);
+}
+
+/*
+ * return cached start block
+ */
+static diskaddr_t
+metagetend(
+ mdsetname_t *sp,
+ mdname_t *np,
+ md_error_t *ep
+)
+{
+ diskaddr_t end_blk = MD_DISKADDR_ERROR;
+
+ /* short circuit */
+ if (np->end_blk != MD_DISKADDR_ERROR)
+ return (np->end_blk);
+
+ /* look for database locations */
+ if (mddb_getendblk(sp, np, &end_blk, ep) != 0)
+ return (MD_DISKADDR_ERROR);
+
+ /* success */
+ np->end_blk = end_blk;
+ return (end_blk);
+}
+
+/*
+ * does device have a metadb
+ */
+int
+metahasmddb(
+ mdsetname_t *sp,
+ mdname_t *np,
+ md_error_t *ep
+)
+{
+ if (metagetend(sp, np, ep) == MD_DISKADDR_ERROR)
+ return (-1);
+ else if (np->end_blk > 0)
+ return (1);
+ else
+ return (0);
+}
+
+/*
+ * return cached start block
+ */
+diskaddr_t
+metagetstart(
+ mdsetname_t *sp,
+ mdname_t *np,
+ md_error_t *ep
+)
+{
+ diskaddr_t start_blk = MD_DISKADDR_ERROR;
+
+ /* short circuit */
+ if (np->start_blk != MD_DISKADDR_ERROR)
+ return (np->start_blk);
+
+ /* look for database locations */
+ if ((start_blk = metagetend(sp, np, ep)) == MD_DISKADDR_ERROR)
+ return (MD_DISKADDR_ERROR);
+
+ /* check for label */
+ if (start_blk == 0) {
+ start_blk = metagetlabel(np, ep);
+ if (start_blk == MD_DISKADDR_ERROR) {
+ return (MD_DISKADDR_ERROR);
+ }
+ }
+
+ /* roundup to next cylinder */
+ if (start_blk != 0) {
+ mdgeom_t *geomp;
+
+ if ((geomp = metagetgeom(np, ep)) == NULL)
+ return (MD_DISKADDR_ERROR);
+ start_blk = roundup(start_blk, (geomp->nhead * geomp->nsect));
+ }
+
+ /* success */
+ np->start_blk = start_blk;
+ return (start_blk);
+}
+
+/*
+ * return cached devices name
+ */
+char *
+metagetdevicesname(
+ mdname_t *np,
+ md_error_t *ep
+)
+{
+ char path[MAXPATHLEN + 1];
+ int len;
+
+ /* short circuit */
+ if (np->devicesname != NULL)
+ return (np->devicesname);
+
+ /* follow symlink */
+ if ((len = readlink(np->bname, path, (sizeof (path) - 1))) < 0) {
+ (void) mdsyserror(ep, errno, np->bname);
+ return (NULL);
+ } else if (len >= sizeof (path)) {
+ (void) mdsyserror(ep, ENAMETOOLONG, np->bname);
+ return (NULL);
+ }
+ path[len] = '\0';
+ if ((len = strfind(path, "/devices/")) < 0) {
+ (void) mddeverror(ep, MDE_DEVICES_NAME, np->dev, np->bname);
+ return (NULL);
+ }
+
+ /* return name */
+ np->devicesname = Strdup(path + len + strlen("/devices"));
+ return (np->devicesname);
+}
+
+/*
+ * get metadevice misc name
+ */
+char *
+metagetmiscname(
+ mdname_t *np,
+ md_error_t *ep
+)
+{
+ mddrivename_t *dnp = np->drivenamep;
+ md_i_driverinfo_t mid;
+
+ /* short circuit */
+ if (dnp->miscname != NULL)
+ return (dnp->miscname);
+ if (metachkmeta(np, ep) != 0)
+ return (NULL);
+
+ /* get misc module from driver */
+ (void) memset(&mid, 0, sizeof (mid));
+ mid.mnum = meta_getminor(np->dev);
+ if (metaioctl(MD_IOCGET_DRVNM, &mid, &mid.mde, np->cname) != 0) {
+ (void) mdstealerror(ep, &mid.mde);
+ return (NULL);
+ }
+
+ /* return miscname */
+ dnp->miscname = Strdup(MD_PNTDRIVERNAME(&mid));
+ return (dnp->miscname);
+}
+
+/*
+ * get unit structure from driver
+ */
+md_unit_t *
+meta_get_mdunit(
+ mdsetname_t *sp,
+ mdname_t *np,
+ md_error_t *ep
+)
+{
+ md_i_get_t mig;
+ char *miscname = NULL;
+
+ /* should have a set */
+ assert(sp != NULL);
+ assert(sp->setno == MD_MIN2SET(meta_getminor(np->dev)));
+
+ /* get size of unit structure */
+ if (metachkmeta(np, ep) != 0)
+ return (NULL);
+ if ((miscname = metagetmiscname(np, ep)) == NULL)
+ return (NULL);
+ (void) memset(&mig, '\0', sizeof (mig));
+ MD_SETDRIVERNAME(&mig, miscname, sp->setno);
+ mig.id = meta_getminor(np->dev);
+ if (metaioctl(MD_IOCGET, &mig, &mig.mde, np->cname) != 0) {
+ (void) mdstealerror(ep, &mig.mde);
+ return (NULL);
+ }
+
+ /* get actual unit structure */
+ assert(mig.size > 0);
+ mig.mdp = (uintptr_t)Zalloc(mig.size);
+ if (metaioctl(MD_IOCGET, &mig, &mig.mde, np->cname) != 0) {
+ (void) mdstealerror(ep, &mig.mde);
+ Free((void *)mig.mdp);
+ return (NULL);
+ }
+
+ return ((md_unit_t *)mig.mdp);
+}
+
+/*
+ * free metadevice unit
+ */
+void
+meta_free_unit(
+ mddrivename_t *dnp
+)
+{
+ if (dnp->unitp != NULL) {
+ switch (dnp->unitp->type) {
+ case MD_DEVICE:
+ meta_free_stripe((md_stripe_t *)dnp->unitp);
+ break;
+ case MD_METAMIRROR:
+ meta_free_mirror((md_mirror_t *)dnp->unitp);
+ break;
+ case MD_METATRANS:
+ meta_free_trans((md_trans_t *)dnp->unitp);
+ break;
+ case MD_METARAID:
+ meta_free_raid((md_raid_t *)dnp->unitp);
+ break;
+ case MD_METASP:
+ meta_free_sp((md_sp_t *)dnp->unitp);
+ break;
+ default:
+ assert(0);
+ break;
+ }
+ dnp->unitp = NULL;
+ }
+}
+
+/*
+ * free metadevice name info
+ */
+void
+meta_invalidate_name(
+ mdname_t *namep
+)
+{
+ mddrivename_t *dnp = namep->drivenamep;
+
+ /* get rid of cached name info */
+ if (namep->devicesname != NULL) {
+ Free(namep->devicesname);
+ namep->devicesname = NULL;
+ }
+ namep->key = MD_KEYBAD;
+ namep->start_blk = -1;
+ namep->end_blk = -1;
+
+ /* get rid of cached drivename info */
+ (void) memset(&dnp->geom, 0, sizeof (dnp->geom));
+ (void) memset(&dnp->cinfo, 0, sizeof (dnp->cinfo));
+ metafreevtoc(&dnp->vtoc);
+ metaflushsidenames(dnp);
+ dnp->side_names_key = MD_KEYBAD;
+ if (dnp->miscname != NULL) {
+ Free(dnp->miscname);
+ dnp->miscname = NULL;
+ }
+ meta_free_unit(dnp);
+}
+
+/*
+ * get metadevice unit
+ */
+md_common_t *
+meta_get_unit(
+ mdsetname_t *sp,
+ mdname_t *np,
+ md_error_t *ep
+)
+{
+ char *miscname;
+
+ /* short circuit */
+ if (np->drivenamep->unitp != NULL)
+ return (np->drivenamep->unitp);
+ if (metachkmeta(np, ep) != 0)
+ return (NULL);
+
+ /* dispatch */
+ if ((miscname = metagetmiscname(np, ep)) == NULL)
+ return (NULL);
+ else if (strcmp(miscname, MD_STRIPE) == 0)
+ return ((md_common_t *)meta_get_stripe(sp, np, ep));
+ else if (strcmp(miscname, MD_MIRROR) == 0)
+ return ((md_common_t *)meta_get_mirror(sp, np, ep));
+ else if (strcmp(miscname, MD_TRANS) == 0)
+ return ((md_common_t *)meta_get_trans(sp, np, ep));
+ else if (strcmp(miscname, MD_RAID) == 0)
+ return ((md_common_t *)meta_get_raid(sp, np, ep));
+ else if (strcmp(miscname, MD_SP) == 0)
+ return ((md_common_t *)meta_get_sp(sp, np, ep));
+ else {
+ (void) mdmderror(ep, MDE_UNKNOWN_TYPE, meta_getminor(np->dev),
+ np->cname);
+ return (NULL);
+ }
+}
+
+
+int
+meta_isopen(
+ mdsetname_t *sp,
+ mdname_t *np,
+ md_error_t *ep,
+ mdcmdopts_t options
+)
+{
+ md_isopen_t d;
+
+ if (metachkmeta(np, ep) != 0)
+ return (-1);
+
+ (void) memset(&d, '\0', sizeof (d));
+ d.dev = np->dev;
+ if (metaioctl(MD_IOCISOPEN, &d, &d.mde, np->cname) != 0)
+ return (mdstealerror(ep, &d.mde));
+
+ /*
+ * shortcut: if the device is open, no need to check on other nodes,
+ * even in case of a mn metadevice
+ * Also return in case we're told not to check on other nodes.
+ */
+ if ((d.isopen != 0) || ((options & MDCMD_MN_OPEN_CHECK) == 0)) {
+ return (d.isopen);
+ }
+
+ /*
+ * If the device is closed locally, but it's a mn device,
+ * check on all other nodes, too
+ */
+ if (sp->setno != MD_LOCAL_SET) {
+ (void) metaget_setdesc(sp, ep); /* not supposed to fail */
+ if (sp->setdesc->sd_flags & MD_SR_MN) {
+ int err = 0;
+ md_mn_result_t *resp;
+ /*
+ * This message is never directly issued.
+ * So we launch it with a suspend override flag.
+ * If the commd is suspended, and this message comes
+ * along it must be sent due to replaying a metainit or
+ * similar. In that case we don't want this message to
+ * be blocked.
+ * If the commd is not suspended, the flag does no harm.
+ * Additionally we don't want the result of the message
+ * cached in the MCT, because we want uptodate results,
+ * and the message doesn't need being logged either.
+ * Hence NO_LOG and NO_MCT
+ */
+ err = mdmn_send_message(
+ sp->setno,
+ MD_MN_MSG_CLU_CHECK,
+ MD_MSGF_NO_MCT | MD_MSGF_STOP_ON_ERROR |
+ MD_MSGF_NO_LOG | MD_MSGF_OVERRIDE_SUSPEND,
+ (char *)&d, sizeof (md_isopen_t),
+ &resp, ep);
+ if (err == 0) {
+ d.isopen = resp->mmr_exitval;
+ } else {
+ /*
+ * in case some error occurred,
+ * we better say the device is open
+ */
+ d.isopen = 1;
+ }
+ if (resp != (md_mn_result_t *)NULL) {
+ free_result(resp);
+ }
+
+ }
+ }
+
+ return (d.isopen);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_namespace.c b/usr/src/lib/lvm/libmeta/common/meta_namespace.c
new file mode 100644
index 0000000000..eb21cbbdd3
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_namespace.c
@@ -0,0 +1,601 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * namespace utilities
+ */
+
+#include <meta.h>
+
+typedef struct deviceinfo {
+ char *bname; /* block name of the device */
+ char *dname; /* driver for the device */
+ minor_t mnum; /* minor number for the device */
+} deviceinfo_t;
+
+static deviceinfo_t devlist[MD_MNMAXSIDES];
+
+/*
+ * Ask the driver for the device name, driver name, and minor number;
+ * which has been stored in the metadevice state database
+ * (on behalf of the utilities).
+ * (by key)
+ */
+char *
+meta_getnmentbykey(
+ set_t setno,
+ side_t sideno,
+ mdkey_t key,
+ char **drvnm,
+ minor_t *mnum,
+ md_dev64_t *dev,
+ md_error_t *ep
+)
+{
+ struct mdnm_params nm;
+ static char device_name[MAXPATHLEN];
+
+ (void) memset(&nm, '\0', sizeof (nm));
+ nm.setno = setno;
+ nm.side = sideno;
+ nm.key = key;
+ nm.devname = (uint64_t)device_name;
+
+ if (metaioctl(MD_IOCGET_NM, &nm, &nm.mde, NULL) != 0) {
+ (void) mdstealerror(ep, &nm.mde);
+ return (NULL);
+ }
+
+ if (drvnm != NULL)
+ *drvnm = Strdup(nm.drvnm);
+
+ if (mnum != NULL)
+ *mnum = nm.mnum;
+
+ if (dev != NULL)
+ *dev = meta_expldev(makedevice(nm.major, nm.mnum));
+
+ return (Strdup(device_name));
+}
+
+/*
+ * Ask the driver for the minor name which has been stored in the
+ * metadevice state database.
+ * (by key)
+ */
+char *
+meta_getdidminorbykey(
+ set_t setno,
+ side_t sideno,
+ mdkey_t key,
+ md_error_t *ep
+)
+{
+ struct mdnm_params nm;
+ static char minorname[MAXPATHLEN];
+
+ (void) memset(&nm, '\0', sizeof (nm));
+ nm.setno = setno;
+ nm.side = sideno;
+ nm.key = key;
+ nm.minorname = (uint64_t)minorname;
+
+ if (metaioctl(MD_IOCGET_DIDMIN, &nm, &nm.mde, NULL) != 0) {
+ (void) mdstealerror(ep, &nm.mde);
+ return (NULL);
+ }
+
+ return (Strdup(minorname));
+}
+
+/*
+ * Ask the driver for the device id string which has been stored in the
+ * metadevice state database (on behalf of the utilities).
+ * (by key)
+ */
+ddi_devid_t
+meta_getdidbykey(
+ set_t setno,
+ side_t sideno,
+ mdkey_t key,
+ md_error_t *ep
+)
+{
+ struct mdnm_params nm;
+
+ (void) memset(&nm, '\0', sizeof (nm));
+ nm.setno = setno;
+ nm.side = sideno;
+ nm.key = key;
+
+ /*
+ * First ask the driver for the size of the device id string. This is
+ * signaled by passing the driver a devid_size of zero.
+ */
+ nm.devid_size = 0;
+ if (metaioctl(MD_IOCGET_DID, &nm, &nm.mde, NULL) != 0) {
+ (void) mdstealerror(ep, &nm.mde);
+ return (NULL);
+ }
+
+ /*
+ * If the devid_size is still zero then something is wrong.
+ */
+ if (nm.devid_size == 0) {
+ (void) mdstealerror(ep, &nm.mde);
+ return (NULL);
+ }
+
+ /*
+ * Now go get the actual device id string. Caller is responsible for
+ * free'ing device id memory buffer.
+ */
+ if ((nm.devid = (uintptr_t)malloc(nm.devid_size)) == NULL) {
+ return (NULL);
+ }
+ if (metaioctl(MD_IOCGET_DID, &nm, &nm.mde, NULL) != 0) {
+ (void) mdstealerror(ep, &nm.mde);
+ (void) free((void *)nm.devid);
+ return (NULL);
+ }
+
+ return ((void *)nm.devid);
+}
+
+/*
+ * set the devid.
+ */
+int
+meta_setdid(
+ set_t setno,
+ side_t sideno,
+ mdkey_t key,
+ md_error_t *ep
+)
+{
+ struct mdnm_params nm;
+ int i;
+
+ (void) memset(&nm, '\0', sizeof (nm));
+ nm.setno = setno;
+ nm.side = sideno;
+ nm.key = key;
+
+ if (metaioctl(MD_IOCSET_DID, &nm, &nm.mde, NULL) != 0) {
+ (void) mdstealerror(ep, &nm.mde);
+ return (-1);
+ }
+
+ if (setno == MD_LOCAL_SET) {
+ /*
+ * If this is the local set then we are adding in the devids
+ * for the disks in the diskset and so this means adding
+ * a reference count for each side. Need to do this after
+ * the initial add so that the correct devid is picked up.
+ * The key is the key of the drive record and as such this
+ * means the minor number of the device which is used to
+ * get the devid. If the wrong side is used then it would
+ * be possible to get the wrong devid in the namespace, hence
+ * the requirement to process the local side first of all.
+ */
+ for (i = 0 + SKEW; i < MD_MAXSIDES; i++) {
+ /*
+ * We can just call the ioctl again because it will
+ * fail with ENOENT if the side does not exist, and
+ * more importantly does not increment the usage count
+ * on the devid.
+ */
+ nm.side = (side_t)i;
+ if (nm.side == sideno)
+ continue;
+ if (metaioctl(MD_IOCSET_DID, &nm, &nm.mde, NULL) != 0) {
+ if (mdissyserror(&nm.mde, ENODEV)) {
+ mdclrerror(&nm.mde);
+ } else {
+ (void) mdstealerror(ep, &nm.mde);
+ return (-1);
+ }
+ }
+ }
+ }
+ return (0);
+}
+/*
+ * Ask the driver for the name, which has been stored in the
+ * metadevice state database (on behalf of the utilities).
+ * (by key)
+ */
+char *
+meta_getnmbykey(
+ set_t setno,
+ side_t sideno,
+ mdkey_t key,
+ md_error_t *ep
+)
+{
+ return (meta_getnmentbykey(setno, sideno, key, NULL, NULL, NULL, ep));
+}
+
+/*
+ * Ask the driver for the device name, driver name, minor number, and key;
+ * which has been stored in the metadevice state database
+ * (on behalf of the utilities).
+ * (by md_dev64_t)
+ */
+char *
+meta_getnmentbydev(
+ set_t setno,
+ side_t sideno,
+ md_dev64_t dev,
+ char **drvnm,
+ minor_t *mnum,
+ mdkey_t *key,
+ md_error_t *ep
+)
+{
+ struct mdnm_params nm;
+ static char device_name[MAXPATHLEN];
+
+ /* must have a dev */
+ assert(dev != NODEV64);
+
+ (void) memset(&nm, '\0', sizeof (nm));
+ nm.setno = setno;
+ nm.side = sideno;
+ nm.key = MD_KEYWILD;
+ nm.major = meta_getmajor(dev);
+ nm.mnum = meta_getminor(dev);
+ nm.devname = (uint64_t)device_name;
+
+ if (metaioctl(MD_IOCGET_NM, &nm, &nm.mde, NULL) != 0) {
+ (void) mdstealerror(ep, &nm.mde);
+ return (NULL);
+ }
+
+ if (drvnm != NULL)
+ *drvnm = Strdup(nm.drvnm);
+ if (mnum != NULL)
+ *mnum = nm.mnum;
+
+ if (key != NULL)
+ *key = nm.retkey;
+
+ return (Strdup(device_name));
+}
+
+int
+add_name(
+ mdsetname_t *sp,
+ side_t sideno,
+ mdkey_t key,
+ char *dname,
+ minor_t mnum,
+ char *bname,
+ md_error_t *ep
+)
+{
+ struct mdnm_params nm;
+
+ (void) memset(&nm, '\0', sizeof (nm));
+ nm.setno = sp->setno;
+ nm.side = sideno;
+ nm.key = key;
+ nm.mnum = mnum;
+ (void) strncpy(nm.drvnm, dname, sizeof (nm.drvnm));
+ nm.devname_len = strlen(bname) + 1;
+ nm.devname = (uintptr_t)bname;
+
+ if (metaioctl(MD_IOCSET_NM, &nm, &nm.mde, bname) < 0)
+ return (mdstealerror(ep, &nm.mde));
+
+ return (nm.key);
+}
+
+/*
+ * Remove the device name which corresponds to the given device number.
+ */
+int
+del_name(
+ mdsetname_t *sp,
+ side_t sideno,
+ mdkey_t key,
+ md_error_t *ep
+)
+{
+ struct mdnm_params nm;
+
+ (void) memset(&nm, '\0', sizeof (nm));
+ nm.setno = sp->setno;
+ nm.side = sideno;
+ nm.key = key;
+
+ if (metaioctl(MD_IOCREM_NM, &nm, &nm.mde, NULL) != 0)
+ return (mdstealerror(ep, &nm.mde));
+
+ return (0);
+}
+
+static void
+empty_devicelist()
+{
+ side_t sideno;
+
+ for (sideno = 0; sideno < MD_MNMAXSIDES; sideno++) {
+ if (devlist[sideno].bname != (char *)NULL) {
+ Free(devlist[sideno].bname);
+ Free(devlist[sideno].dname);
+ devlist[sideno].mnum = NODEV;
+ }
+ }
+}
+
+static void
+add_to_devicelist(
+ side_t sideno,
+ char *bname,
+ char *dname,
+ minor_t mnum
+)
+{
+ devlist[sideno].bname = Strdup(bname);
+ devlist[sideno].dname = Strdup(dname);
+
+ devlist[sideno].mnum = mnum;
+}
+
+/*
+ * Build a list of the names on the systems, if this fails the caller
+ * will tidy up the entries in the devlist.
+ */
+static int
+build_sidenamelist(
+ mdsetname_t *sp,
+ mdname_t *np,
+ md_error_t *ep
+)
+{
+ side_t sideno = MD_SIDEWILD;
+ minor_t mnum = NODEV;
+ char *bname = NULL;
+ char *dname = NULL;
+ int err;
+
+ /*CONSTCOND*/
+ while (1) {
+
+ if ((err = meta_getnextside_devinfo(sp, np->bname, &sideno,
+ &bname, &dname, &mnum, ep)) == -1)
+ return (-1);
+
+ if (err == 0)
+ break;
+
+ /* the sideno gives us the index into the array */
+ add_to_devicelist(sideno, bname, dname, mnum);
+ }
+ return (0);
+}
+
+/*
+ * add name key
+ * the meta_create* functions should be the only ones using this. The
+ * adding of a name to the namespace must be done in a particular order
+ * to devid support for the disksets. The order is: add the 'local' side
+ * first of all, so the devid lookup in the kernel will use the correct
+ * device information and then add in the other sides.
+ */
+int
+add_key_name(
+ mdsetname_t *sp,
+ mdname_t *np,
+ mdnamelist_t **nlpp,
+ md_error_t *ep
+)
+{
+ int err;
+ side_t sideno = MD_SIDEWILD;
+ side_t thisside;
+ mdkey_t key = MD_KEYWILD;
+ md_set_desc *sd;
+ int maxsides;
+
+ /* should have a set */
+ assert(sp != NULL);
+
+ if (! metaislocalset(sp)) {
+ if ((sd = metaget_setdesc(sp, ep)) == NULL) {
+ return (-1);
+ }
+ }
+
+ if (build_sidenamelist(sp, np, ep) == -1) {
+ empty_devicelist();
+ return (-1);
+ }
+
+ /*
+ * When a disk is added into the namespace the local information for
+ * that disk is added in first of all. For the local set this is not
+ * a concern and for the host that owns the diskset it is not a concern
+ * but when a disk is added in the remote namespace we *must* use the
+ * local information for that disk first of all. This is because when
+ * in the kernel (md_setdevname) the passed in dev_t is used to find
+ * the devid of the disk. This means we have to cater for the following:
+ *
+ * - a disk on the remote host having the dev_t that has been passed
+ * into the kernel and this disk is not actually the disk that is
+ * being added into the diskset.
+ * - the dev_t does not exist on this node
+ *
+ * So putting in the local information first of all makes sure that the
+ * dev_t passed into the kernel is correct with respect to that node
+ * and then any further additions for that name match on the key
+ * passed back.
+ */
+ thisside = getmyside(sp, ep);
+
+ if (devlist[thisside].dname == NULL ||
+ strlen(devlist[thisside].dname) == 0) {
+ /*
+ * Did not find the disk information for the disk. This can
+ * be because of an inconsistancy in the namespace: that is the
+ * devid we have in the namespace does not exist on the
+ * system and thus when looking up the disk information
+ * using this devid we fail to find anything.
+ */
+ (void) mdcomperror(ep, MDE_SP_COMP_OPEN_ERR, 0, np->dev,
+ np->cname);
+ empty_devicelist();
+ return (-1);
+ }
+
+ if ((err = add_name(sp, thisside, key, devlist[thisside].dname,
+ devlist[thisside].mnum, devlist[thisside].bname, ep)) == -1) {
+ empty_devicelist();
+ return (-1);
+ }
+
+ /* We now have a 'key' so add in the other sides */
+ key = (mdkey_t)err;
+
+ if (metaislocalset(sp))
+ goto done;
+
+ if (MD_MNSET_DESC(sd))
+ maxsides = MD_MNMAXSIDES;
+ else
+ maxsides = MD_MAXSIDES;
+
+ for (sideno = 0; sideno < maxsides; sideno++) {
+ /* ignore thisside, as it has been added above */
+ if (sideno == thisside)
+ continue;
+
+ if (devlist[sideno].dname != NULL) {
+ err = add_name(sp, sideno, key, devlist[sideno].dname,
+ devlist[sideno].mnum, devlist[sideno].bname, ep);
+ if (err == -1) {
+ empty_devicelist();
+ return (-1);
+ }
+ }
+ }
+
+done:
+ empty_devicelist();
+ /* save key, return success */
+ np->key = key;
+ if (nlpp != NULL)
+ (void) metanamelist_append(nlpp, np);
+ return (0);
+}
+
+/*
+ * delete name key
+ * the meta_create* functions should be the only ones using this. The
+ * removal of the names must be done in a particular order: remove the
+ * non-local entries first of all and then finally the local entry.
+ */
+int
+del_key_name(
+ mdsetname_t *sp,
+ mdname_t *np,
+ md_error_t *ep
+)
+{
+ side_t sideno = MD_SIDEWILD;
+ int err;
+ int retval = 0;
+ side_t thisside;
+
+ /* should have a set */
+ assert(sp != NULL);
+
+ /* should have a key */
+ assert((np->key != MD_KEYWILD) && (np->key != MD_KEYBAD));
+
+ thisside = getmyside(sp, ep);
+
+ /* remove the remote sides first of all */
+ for (;;) {
+ if ((err = meta_getnextside_devinfo(sp, np->bname, &sideno,
+ NULL, NULL, NULL, ep)) == -1)
+ return (-1);
+
+ if (err == 0)
+ break;
+
+ /* ignore thisside */
+ if (thisside == sideno) {
+ continue;
+ }
+ if ((err = del_name(sp, sideno, np->key, ep)) == -1)
+ retval = -1;
+ }
+
+ /* now remove this side */
+ if (retval == 0)
+ if ((err = del_name(sp, thisside, np->key, ep)) == -1)
+ retval = -1;
+
+ np->key = MD_KEYBAD;
+ return (retval);
+}
+
+/*
+ * delete namelist keys
+ * the meta_create* functions should be the only ones using this
+ */
+int
+del_key_names(
+ mdsetname_t *sp,
+ mdnamelist_t *nlp,
+ md_error_t *ep
+)
+{
+ mdnamelist_t *p;
+ md_error_t status = mdnullerror;
+ int rval = 0;
+
+ /* if ignoring errors */
+ if (ep == NULL)
+ ep = &status;
+
+ /* delete names */
+ for (p = nlp; (p != NULL); p = p->next) {
+ mdname_t *np = p->namep;
+
+ if (del_key_name(sp, np, ep) != 0)
+ rval = -1;
+ }
+
+ /* cleanup, return success */
+ if (ep == &status)
+ mdclrerror(&status);
+ return (rval);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_notify.c b/usr/src/lib/lvm/libmeta/common/meta_notify.c
new file mode 100644
index 0000000000..5f66758f76
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_notify.c
@@ -0,0 +1,692 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 1995-2003 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * Just in case we're not in a build environment, make sure that
+ * TEXT_DOMAIN gets set to something.
+ */
+#if !defined(TEXT_DOMAIN)
+#define TEXT_DOMAIN "SYS_TEST"
+#endif
+
+/*
+ * libmeta wrappers for event notification
+ */
+
+#include <meta.h>
+#include <sys/lvm/md_notify.h>
+
+#if defined(DEBUG)
+#include <assert.h>
+#endif /* DEBUG */
+
+struct tag2obj_type {
+ md_tags_t tag;
+ ev_obj_t obj;
+} tag2obj_typetab[] =
+{
+ { TAG_EMPTY, EVO_EMPTY },
+ { TAG_METADEVICE, EVO_METADEV },
+ { TAG_REPLICA, EVO_REPLICA },
+ { TAG_HSP, EVO_HSP },
+ { TAG_HS, EVO_HS },
+ { TAG_SET, EVO_SET },
+ { TAG_DRIVE, EVO_DRIVE },
+ { TAG_HOST, EVO_HOST },
+ { TAG_MEDIATOR, EVO_MEDIATOR },
+ { TAG_UNK, EVO_UNSPECIFIED },
+
+ { TAG_LAST, EVO_LAST }
+};
+
+struct evdrv2evlib_type {
+ md_event_type_t drv;
+ evid_t lib;
+} evdrv2evlib_typetab[] =
+{
+ { EQ_EMPTY, EV_EMPTY },
+ { EQ_CREATE, EV_CREATE },
+ { EQ_DELETE, EV_DELETE },
+ { EQ_ADD, EV_ADD },
+ { EQ_REMOVE, EV_REMOVE },
+ { EQ_REPLACE, EV_REPLACE },
+ { EQ_MEDIATOR_ADD, EV_MEDIATOR_ADD },
+ { EQ_MEDIATOR_DELETE, EV_MEDIATOR_DELETE },
+ { EQ_HOST_ADD, EV_HOST_ADD },
+ { EQ_HOST_DELETE, EV_HOST_DELETE },
+ { EQ_DRIVE_ADD, EV_DRIVE_ADD },
+ { EQ_DRIVE_DELETE, EV_DRIVE_DELETE },
+ { EQ_RENAME_SRC, EV_RENAME_SRC },
+ { EQ_RENAME_DST, EV_RENAME_DST },
+ { EQ_INIT_START, EV_INIT_START },
+ { EQ_INIT_FAILED, EV_INIT_FAILED },
+ { EQ_INIT_FATAL, EV_INIT_FATAL },
+ { EQ_INIT_SUCCESS, EV_INIT_SUCCESS },
+ { EQ_IOERR, EV_IOERR },
+ { EQ_ERRED, EV_ERRED },
+ { EQ_LASTERRED, EV_LASTERRED },
+ { EQ_OK, EV_OK },
+ { EQ_ENABLE, EV_ENABLE },
+ { EQ_RESYNC_START, EV_RESYNC_START },
+ { EQ_RESYNC_FAILED, EV_RESYNC_FAILED },
+ { EQ_RESYNC_SUCCESS, EV_RESYNC_SUCCESS },
+ { EQ_RESYNC_DONE, EV_RESYNC_DONE },
+ { EQ_HOTSPARED, EV_HOTSPARED },
+ { EQ_HS_FREED, EV_HS_FREED },
+ { EQ_TAKEOVER, EV_TAKEOVER },
+ { EQ_RELEASE, EV_RELEASE },
+ { EQ_OPEN_FAIL, EV_OPEN_FAIL },
+ { EQ_OFFLINE, EV_OFFLINE },
+ { EQ_ONLINE, EV_ONLINE },
+ { EQ_GROW, EV_GROW },
+ { EQ_DETACH, EV_DETACH },
+ { EQ_DETACHING, EV_DETACHING },
+ { EQ_ATTACH, EV_ATTACH },
+ { EQ_ATTACHING, EV_ATTACHING },
+ { EQ_CHANGE, EV_CHANGE },
+ { EQ_EXCHANGE, EV_EXCHANGE },
+ { EQ_REGEN_START, EV_REGEN_START },
+ { EQ_REGEN_DONE, EV_REGEN_DONE },
+ { EQ_REGEN_FAILED, EV_REGEN_FAILED },
+ { EQ_USER, EV_USER },
+ { EQ_NOTIFY_LOST, EV_NOTIFY_LOST },
+
+ { EQ_LAST, EV_LAST }
+};
+
+static ev_obj_t
+dev2tag(md_dev64_t dev, set_t setno, md_error_t *ep)
+{
+ mdname_t *np = NULL;
+ mdsetname_t *sp = NULL;
+ ev_obj_t obj = EVO_METADEV;
+ char *miscname;
+
+ if ((sp = metasetnosetname(setno, ep)) == NULL) {
+ goto out;
+ }
+ if (!(np = metamnumname(&sp, meta_getminor(dev), 0, ep))) {
+ goto out;
+ }
+
+ /* need to invalidate name in case rename or delete/create done */
+ meta_invalidate_name(np);
+
+ if (!(miscname = metagetmiscname(np, ep))) {
+ goto out;
+ }
+ if (strcmp(miscname, MD_STRIPE) == 0) {
+ obj = EVO_STRIPE;
+ } else if (strcmp(miscname, MD_MIRROR) == 0) {
+ obj = EVO_MIRROR;
+ } else if (strcmp(miscname, MD_RAID) == 0) {
+ obj = EVO_RAID5;
+ } else if (strcmp(miscname, MD_TRANS) == 0) {
+ obj = EVO_TRANS;
+ }
+out:
+ return (obj);
+}
+
+static ev_obj_t
+tagdrv_2_objlib(md_tags_t tag)
+{
+ int i;
+
+ for (i = 0; tag2obj_typetab[i].tag != TAG_LAST; i++) {
+ if (tag2obj_typetab[i].tag == tag)
+ return (tag2obj_typetab[i].obj);
+ }
+ return (EVO_UNSPECIFIED);
+}
+
+static md_tags_t
+objlib_2_tagdrv(ev_obj_t obj)
+{
+ int i;
+
+ for (i = 0; tag2obj_typetab[i].tag != TAG_LAST; i++) {
+ if (tag2obj_typetab[i].obj == obj)
+ return (tag2obj_typetab[i].tag);
+ }
+ return (TAG_UNK);
+}
+
+
+static evid_t
+evdrv_2_evlib(md_event_type_t drv_ev)
+{
+ int i;
+
+ for (i = 0; evdrv2evlib_typetab[i].drv != EQ_LAST; i++) {
+ if (evdrv2evlib_typetab[i].drv == drv_ev)
+ return (evdrv2evlib_typetab[i].lib);
+ }
+ return (EV_UNK);
+}
+
+static md_event_type_t
+evlib_2_evdrv(evid_t lib_ev)
+{
+ int i;
+
+ for (i = 0; evdrv2evlib_typetab[i].drv != EQ_LAST; i++) {
+ if (evdrv2evlib_typetab[i].lib == lib_ev)
+ return (evdrv2evlib_typetab[i].drv);
+ }
+ return (EQ_EMPTY);
+}
+
+
+/*
+ * meta_event
+ * returns 0 on succcess or < 0 to indicate error.
+ * abs(return code) = errno
+ */
+static int
+meta_event(md_event_ioctl_t *evctl, md_error_t *ep)
+{
+ int l;
+
+ if (!evctl || !ep)
+ return (-EINVAL);
+
+ l = strlen(evctl->mdn_name);
+ if ((l == 0 && evctl->mdn_cmd != EQ_PUT) || l >= MD_NOTIFY_NAME_SIZE) {
+ return (-EINVAL);
+ }
+
+ MD_SETDRIVERNAME(evctl, MD_NOTIFY, 0);
+ mdclrerror(ep);
+ errno = 0;
+
+ if (metaioctl(MD_IOCNOTIFY, evctl, ep, evctl->mdn_name) != 0) {
+ if (errno == 0) {
+ errno = EINVAL;
+ }
+ if (mdisok(ep)) {
+ (void) mdsyserror(ep, errno, evctl->mdn_name);
+ }
+ return (-errno);
+ }
+
+ return (0);
+}
+
+static void
+init_evctl(char *qname,
+ md_tags_t tag,
+ md_event_type_t ev,
+ uint_t flags,
+ set_t set,
+ md_dev64_t dev,
+ md_event_cmds_t cmd,
+ u_longlong_t udata,
+ md_event_ioctl_t *evctlp)
+{
+
+ assert(evctlp);
+
+ (void) memset(evctlp, 0, sizeof (md_event_ioctl_t));
+
+ evctlp->mdn_magic = MD_EVENT_ID;
+ evctlp->mdn_rev = MD_NOTIFY_REVISION;
+
+ if (qname)
+ (void) strncpy(evctlp->mdn_name, qname, MD_NOTIFY_NAME_SIZE-1);
+ else
+ (void) memset(evctlp->mdn_name, 0, MD_NOTIFY_NAME_SIZE);
+
+ evctlp->mdn_tag = tag;
+ evctlp->mdn_event = ev;
+ evctlp->mdn_flags = flags;
+ evctlp->mdn_set = set;
+ evctlp->mdn_dev = dev;
+ evctlp->mdn_cmd = cmd;
+ evctlp->mdn_user = udata;
+}
+
+/*
+ * meta_notify_createq
+ * - creates an eventq
+ * - returns 0 on success or errno and sets ep
+ */
+int
+meta_notify_createq(char *qname, ulong_t flags, md_error_t *ep)
+{
+ md_event_ioctl_t evctl;
+ int err = 0;
+
+ mdclrerror(ep);
+ if (!qname || strlen(qname) == 0) {
+ (void) mdsyserror(ep, EINVAL,
+ dgettext(TEXT_DOMAIN,
+ "null or zero-length queue name"));
+ return (EINVAL);
+ }
+
+ init_evctl(qname,
+ TAG_EMPTY,
+ EQ_EMPTY,
+ (flags & EVFLG_PERMANENT) != 0? EQ_Q_PERM: 0,
+ /* set */ 0,
+ /* dev */ 0,
+ EQ_ON,
+ /* user-defined event data */ 0,
+ &evctl);
+
+ err = meta_event(&evctl, ep);
+
+ if (err == -EEXIST && !(flags & EVFLG_EXISTERR)) {
+ err = 0;
+ mdclrerror(ep);
+ }
+ if (!mdisok(ep) && mdanysyserror(ep)) {
+ err = (ep)->info.md_error_info_t_u.ds_error.errnum;
+ }
+ return (-err);
+}
+
+/*
+ * meta_notify_deleteq
+ * - deletes an eventq
+ * - free's any underlying resources
+ * - returns 0 on success or errno and sets ep
+ */
+int
+meta_notify_deleteq(char *qname, md_error_t *ep)
+{
+ md_event_ioctl_t evctl;
+ int err;
+
+ init_evctl(qname,
+ TAG_EMPTY,
+ EQ_EMPTY,
+ /* flags */ 0,
+ /* set */ 0,
+ /* dev */ 0,
+ EQ_OFF,
+ /* user-defined event data */ 0,
+ &evctl);
+
+ err = meta_event(&evctl, ep);
+ return (-err);
+}
+
+/*
+ * meta_notify_validq
+ * - verifies that the queue exists
+ * - returns true or false, ep may be changed as a side-effect
+ */
+bool_t
+meta_notify_validq(char *qname, md_error_t *ep)
+{
+ md_event_ioctl_t evctl;
+
+ init_evctl(qname,
+ TAG_EMPTY,
+ EQ_EMPTY,
+ /* flags */ 0,
+ /* set */ 0,
+ /* dev */ 0,
+ EQ_ON,
+ /* user-defined event data */ 0,
+ &evctl);
+
+ return (meta_event(&evctl, ep) == -EEXIST);
+}
+
+/*
+ * meta_notify_listq
+ * - returns number of (currently) active queus or -errno
+ * - allocates qnames array and sets user's pointer to it,
+ * fills in array with vector of qnames
+ */
+int
+meta_notify_listq(char ***qnames, md_error_t *ep)
+{
+
+#ifdef lint
+ qnames = qnames;
+#endif /* lint */
+
+ mdclrerror(ep);
+ (void) mdsyserror(ep, EOPNOTSUPP, "EOPNOTSUPP");
+ return (-EOPNOTSUPP);
+}
+
+/*
+ * meta_notify_flushq
+ * - calls the underlying notify driver to flush all events
+ * from the named queue
+ * - returns 0 on success or errno and sets ep as necessary
+ */
+int
+meta_notify_flushq(char *qname, md_error_t *ep)
+{
+
+#ifdef lint
+ qname = qname;
+#endif /* lint */
+
+ mdclrerror(ep);
+ (void) mdsyserror(ep, EOPNOTSUPP, "EOPNOTSUPP");
+ return (EOPNOTSUPP);
+}
+
+static void
+cook_ev(md_event_ioctl_t *evctlp, md_ev_t *evp, md_error_t *ep)
+{
+ assert(evctlp);
+ assert(evp);
+
+ evp->obj_type = tagdrv_2_objlib(evctlp->mdn_tag);
+
+ if (evp->obj_type == EVO_METADEV) {
+ evp->obj_type = dev2tag(evctlp->mdn_dev, evctlp->mdn_set, ep);
+ }
+
+ evp->setno = evctlp->mdn_set;
+ evp->ev = evdrv_2_evlib(evctlp->mdn_event);
+ evp->obj = evctlp->mdn_dev;
+ evp->uev = evctlp->mdn_user;
+}
+
+/*
+ * meta_notify_getev
+ * - collects up to 1 event and stores it into md_ev_t
+ * - returns number of events found (0 or 1) on success or -errno
+ * - flags governs whether an empty queue is waited upon (EVFLG_WAIT)
+ */
+int
+meta_notify_getev(char *qname, ulong_t flags, md_ev_t *evp, md_error_t *ep)
+{
+ md_event_ioctl_t evctl;
+ int n_ev;
+ int err = -EINVAL;
+
+ if (!evp) {
+ goto out;
+ }
+
+ init_evctl(qname,
+ TAG_EMPTY,
+ EQ_EMPTY,
+ /* flags (unused in get) */ 0,
+ (evp->setno == EV_ALLSETS)? MD_ALLSETS: evp->setno,
+ (evp->obj == EV_ALLOBJS)? MD_ALLDEVS: evp->obj,
+ (flags & EVFLG_WAIT) != 0? EQ_GET_WAIT: EQ_GET_NOWAIT,
+ /* user-defined event data */ 0,
+ &evctl);
+
+ err = meta_event(&evctl, ep);
+
+ /*
+ * trap EAGAIN so that EV_EMPTY events get returned, but
+ * be sure n_ev = 0 so that users who just watch the count
+ * will also work
+ */
+ switch (err) {
+ case -EAGAIN:
+ err = n_ev = 0;
+ cook_ev(&evctl, evp, ep);
+ break;
+ case 0:
+ n_ev = 1;
+ cook_ev(&evctl, evp, ep);
+ break;
+ }
+out:
+ return (err == 0? n_ev: err);
+}
+
+
+/*
+ * meta_notify_getevlist
+ * - collects all pending events in the named queue and allocates
+ * an md_evlist_t * to return them
+ * - returns the number of events found (may be 0 if !WAIT) on success
+ * or -errno and sets ep as necessary
+ */
+int
+meta_notify_getevlist(char *qname,
+ ulong_t flags,
+ md_evlist_t **evpp_arg,
+ md_error_t *ep)
+{
+ md_ev_t *evp = NULL;
+ md_evlist_t *evlp = NULL;
+ md_evlist_t *evlp_head = NULL;
+ md_evlist_t *new = NULL;
+ int n_ev = 0;
+ int err = -EINVAL;
+
+ mdclrerror(ep);
+ if (!evpp_arg) {
+ (void) mdsyserror(ep, EINVAL, dgettext(TEXT_DOMAIN,
+ "No event list pointer"));
+ goto out;
+ }
+
+ if (!qname || strlen(qname) == 0) {
+ (void) mdsyserror(ep, EINVAL, dgettext(TEXT_DOMAIN,
+ "Null or zero-length queue name"));
+ goto out;
+ }
+
+ do {
+ if (!(evp = (md_ev_t *)Malloc(sizeof (md_ev_t)))) {
+ (void) mdsyserror(ep, ENOMEM, qname);
+ continue;
+ }
+ evp->obj_type = EVO_EMPTY;
+ evp->setno = EV_ALLSETS;
+ evp->ev = EV_EMPTY;
+ evp->obj = EV_ALLOBJS;
+ evp->uev = 0ULL;
+
+ err = meta_notify_getev(qname, flags, evp, ep);
+
+ if (evp->ev != EV_EMPTY) {
+ new = (md_evlist_t *)Zalloc(sizeof (md_evlist_t));
+ if (evlp_head == NULL) {
+ evlp = evlp_head = new;
+ } else {
+ evlp->next = new;
+ evlp = new;
+ }
+ evlp->evp = evp;
+ n_ev++;
+ }
+
+ } while (err >= 0 && evp && evp->ev != EV_EMPTY);
+out:
+ if (err == -EAGAIN) {
+ err = 0;
+ }
+
+ if (err < 0) {
+ meta_notify_freeevlist(evlp_head);
+ evlp_head = NULL;
+ return (err);
+ } else if ((err == 0) && (evp->ev == EV_EMPTY)) {
+ Free(evp);
+ evp = NULL;
+ }
+
+ if (evpp_arg) {
+ *evpp_arg = evlp_head;
+ }
+
+ return (n_ev);
+}
+
+
+/*
+ * the guts of meta_notify_putev() and meta_notify_sendev()
+ * are within this function.
+ *
+ * meta_notify_putev() is intended for general use by user-level code,
+ * such as the GUI, to send user-defined events.
+ *
+ * meta_notify_sendev() is for "user-level driver" code, such as
+ * set manipulation and the multi-host daemon to generate events.
+ *
+ * Note- only convention enforces this usage.
+ */
+int
+meta_notify_doputev(md_ev_t *evp, md_error_t *ep)
+{
+ md_event_ioctl_t evctl;
+
+ if (!evp || !ep) {
+ return (EINVAL);
+ }
+
+ /*
+ * users may only put events of type EQ_USER
+ */
+ init_evctl(/* qname (unused in put) */ NULL,
+ TAG_EMPTY,
+ EQ_EMPTY,
+ /* flags (unused in put) */ 0,
+ (evp->setno == EV_ALLSETS)? MD_ALLSETS: evp->setno,
+ (evp->obj == EV_ALLOBJS)? MD_ALLDEVS: evp->obj,
+ EQ_PUT,
+ evp->uev,
+ &evctl);
+
+ evctl.mdn_tag = objlib_2_tagdrv(evp->obj_type);
+ evctl.mdn_event = evlib_2_evdrv(evp->ev);
+
+ return (-meta_event(&evctl, ep));
+}
+
+/*
+ * meta_notify_putev
+ * - sends an event down to the notify driver (hence, all queues)
+ * - returns 0 on success or errno
+ */
+int
+meta_notify_putev(md_ev_t *evp, md_error_t *ep)
+{
+ if (!evp || !ep) {
+ return (EINVAL);
+ }
+
+ evp->ev = EV_USER; /* by definition */
+
+ return (meta_notify_doputev(evp, ep));
+}
+
+/*
+ * alternate put event entry point which allows
+ * more control of event innards (for use by md "user-level drivers")
+ *
+ * Since this routine isn't for use by clients, the user event data
+ * is always forced to be 0. That is only meaningful for events
+ * of type EQ_USER (and those go through meta_notify_putev()), so
+ * this is consistent.
+ */
+int
+meta_notify_sendev(
+ ev_obj_t tag,
+ set_t set,
+ md_dev64_t dev,
+ evid_t ev)
+{
+ md_error_t status = mdnullerror;
+ md_error_t *ep = &status;
+ md_ev_t ev_packet;
+ int rc;
+
+ ev_packet.obj_type = tag;
+ ev_packet.setno = set;
+ ev_packet.obj = dev;
+ ev_packet.ev = ev;
+ ev_packet.uev = 0ULL;
+
+ rc = meta_notify_doputev(&ev_packet, ep);
+
+ if (0 == rc && !mdisok(ep)) {
+ rc = EINVAL;
+ mdclrerror(ep);
+ }
+ return (rc);
+}
+
+/*
+ * meta_notify_putevlist
+ * - sends all of the events in the event list
+ * - returns number of events sent (>= 0) on success or -errno
+ */
+int
+meta_notify_putevlist(md_evlist_t *evlp, md_error_t *ep)
+{
+ md_evlist_t *evlpi;
+ int n_ev = 0;
+ int err;
+
+ if (!evlp) {
+ err = 0;
+ goto out; /* that was easy */
+ }
+
+ for (n_ev = 0, evlpi = evlp; evlpi; evlpi = evlpi->next) {
+ if ((err = meta_notify_putev(evlpi->evp, ep)) < 0) {
+ goto out;
+ }
+ n_ev++;
+ }
+out:
+ return (err != 0? err: n_ev);
+}
+
+/*
+ * meta_notify_freevlist
+ * - frees any memory allocated within the event list
+ * - returns 0 on success or errno and sets ep as necessary
+ */
+void
+meta_notify_freeevlist(md_evlist_t *evlp)
+{
+ md_evlist_t *i;
+ md_evlist_t *next;
+
+ for (i = evlp; i; i = i->next) {
+ if (i && i->evp) {
+ Free(i->evp);
+ i->evp = NULL;
+ }
+ }
+ for (i = evlp; i; /* NULL */) {
+ next = i->next;
+ Free(i);
+ i = next;
+ }
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_patch.c b/usr/src/lib/lvm/libmeta/common/meta_patch.c
new file mode 100644
index 0000000000..7c0ff549f1
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_patch.c
@@ -0,0 +1,299 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2003 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * Just in case we're not in a build environment, make sure that
+ * TEXT_DOMAIN gets set to something.
+ */
+#if !defined(TEXT_DOMAIN)
+#define TEXT_DOMAIN "SYS_TEST"
+#endif
+
+/*
+ * patch /etc/vfstab file
+ */
+#include <meta.h>
+#include <string.h>
+
+/*
+ * patch filesystem lines into vfstab file, return tempfilename
+ */
+int
+meta_patch_vfstab(
+ char *cmpname, /* filesystem mount point or */
+ /* "swap" if updating swap partition */
+ mdname_t *fsnp, /* filesystem device name */
+ char *vname, /* vfstab file name */
+ char *old_bdevname, /* old name of block device, needed */
+ /* for deciding which of multiple */
+ /* swap file entries to change */
+ /* if NULL then not changing swap */
+ int doit, /* really patch file */
+ int verbose, /* show what we're doing */
+ char **tname, /* returned temp file name */
+ md_error_t *ep /* returned error */
+)
+{
+ char *chrname = fsnp->rname;
+ char *blkname = fsnp->bname;
+ FILE *fp = NULL;
+ FILE *tfp = NULL;
+ struct stat sbuf;
+ char buf[512];
+ char cdev[512];
+ char bdev[512];
+ char mntpt[512];
+ char fstype[512];
+ char fsckpass[512];
+ char mntboot[512];
+ char mntopt[512];
+ int gotfs = 0;
+ char *cmpstr = &mntpt[0]; /* compare against mntpnt if fs, */
+ /* or fstype if swap */
+ char *char_device = chrname;
+
+ /* check names */
+ assert(vname != NULL);
+ assert(tname != NULL);
+
+ /* get temp names */
+ *tname = NULL;
+ *tname = Malloc(strlen(vname) + strlen(".tmp") + 1);
+ (void) strcpy(*tname, vname);
+ (void) strcat(*tname, ".tmp");
+
+ /* check if going to update swap entry in file */
+ /* if so then compare against file system type */
+ if ((old_bdevname != NULL) && (strcmp("swap", cmpname) == 0)) {
+ cmpstr = &fstype[0];
+ char_device = &cdev[0];
+ }
+
+ /* copy vfstab file, replace filesystem line */
+ if ((fp = fopen(vname, "r")) == NULL) {
+ (void) mdsyserror(ep, errno, vname);
+ goto out;
+ }
+ if (fstat(fileno(fp), &sbuf) != 0) {
+ (void) mdsyserror(ep, errno, vname);
+ goto out;
+ }
+ if (doit) {
+ if ((tfp = fopen(*tname, "w")) == NULL) {
+ (void) mdsyserror(ep, errno, *tname);
+ goto out;
+ }
+ if (fchmod(fileno(tfp), (sbuf.st_mode & 0777)) != 0) {
+ (void) mdsyserror(ep, errno, *tname);
+ goto out;
+ }
+ if (fchown(fileno(tfp), sbuf.st_uid, sbuf.st_gid) != 0) {
+ (void) mdsyserror(ep, errno, *tname);
+ goto out;
+ }
+ }
+ while (fgets(buf, sizeof (buf), fp) != NULL) {
+
+ /* check that have all required params from vfstab file */
+ /* or that the line isnt a comment */
+ /* or that the fstype/mntpoint match what was passed in */
+ /* or that the block device matches if changing swap */
+ /* the last check is needed since there may be multiple */
+ /* entries of swap in the file, and so the fstype is not */
+ /* a sufficient check */
+ if ((sscanf(buf, "%512s %512s %512s %512s %512s %512s %512s",
+ bdev, cdev, mntpt, fstype, fsckpass,
+ mntboot, mntopt) != 7) ||
+ (bdev[0] == '#') || (strcmp(cmpstr, cmpname) != 0) ||
+ ((old_bdevname != NULL) &&
+ (strstr(bdev, old_bdevname) == NULL))) {
+ if (doit) {
+ if (fputs(buf, tfp) == EOF) {
+ (void) mdsyserror(ep, errno, *tname);
+ goto out;
+ }
+ }
+ continue;
+ }
+
+ if (verbose) {
+ (void) printf(dgettext(TEXT_DOMAIN,
+ "Delete the following line from %s:\n\n"),
+ vname);
+ (void) printf("%s\n", buf);
+ (void) printf(
+ dgettext(TEXT_DOMAIN,
+ "Add the following line to %s:\n\n"),
+ vname);
+ (void) printf("%s\t%s\t%s\t%s\t%s\t%s\t%s\n\n",
+ blkname, char_device, mntpt, fstype, fsckpass,
+ mntboot, mntopt);
+ }
+ if (doit) {
+ if (fprintf(tfp, "%s\t%s\t%s\t%s\t%s\t%s\t%s\n",
+ blkname, char_device, mntpt, fstype, fsckpass,
+ mntboot, mntopt) == EOF) {
+ (void) mdsyserror(ep, errno, *tname);
+ goto out;
+ }
+ }
+
+
+ gotfs = 1;
+ }
+ if (! feof(fp)) {
+ (void) mdsyserror(ep, errno, vname);
+ goto out;
+ }
+ if (! gotfs) {
+ (void) mderror(ep, MDE_VFSTAB_FILE, vname);
+ goto out;
+ }
+ if (fclose(fp) != 0) {
+ (void) mdsyserror(ep, errno, vname);
+ goto out;
+ }
+ fp = NULL;
+ if (doit) {
+ if ((fflush(tfp) != 0) ||
+ (fsync(fileno(tfp)) != 0) ||
+ (fclose(tfp) != 0)) {
+ (void) mdsyserror(ep, errno, *tname);
+ goto out;
+ }
+ tfp = NULL;
+ }
+
+ /* return success */
+ return (0);
+
+ /* cleanup, return error */
+out:
+ if (fp != NULL)
+ (void) fclose(fp);
+ if (tfp != NULL)
+ (void) fclose(tfp);
+ if (*tname != NULL) {
+ (void) unlink(*tname);
+ Free(*tname);
+ }
+ return (-1);
+}
+
+
+/*
+ * set filesystem device name in vfstab
+ */
+int
+meta_patch_fsdev(
+ char *fsname, /* filesystem mount point */
+ mdname_t *fsnp, /* filesystem device */
+ char *vname, /* vfstab file name */
+ md_error_t *ep /* returned error */
+)
+{
+ int doit = 1;
+ int verbose = 0;
+ char *tvname = NULL;
+ int rval = -1;
+
+ /* check names */
+ assert(fsname != NULL);
+ if (vname == NULL)
+ vname = "/etc/vfstab";
+
+ /* replace lines in vfstab */
+ if (meta_patch_vfstab(fsname, fsnp, vname, NULL, doit, verbose, &tvname,
+ ep) != 0) {
+ goto out;
+ }
+
+ /* rename temp file on top of real one */
+ if (rename(tvname, vname) != 0) {
+ (void) mdsyserror(ep, errno, vname);
+ goto out;
+ }
+ Free(tvname);
+ tvname = NULL;
+ rval = 0;
+
+ /* cleanup, return error */
+out:
+ if (tvname != NULL) {
+ if (doit)
+ (void) unlink(tvname);
+ Free(tvname);
+ }
+ return (rval);
+}
+
+
+/*
+ * set filesystem device name in vfstab
+ */
+int
+meta_patch_swapdev(
+ mdname_t *fsnp, /* filesystem device */
+ char *vname, /* vfstab file name */
+ char *old_bdevname, /* block device name to change */
+ md_error_t *ep /* returned error */
+)
+{
+ int doit = 1;
+ int verbose = 0;
+ char *tvname = NULL;
+ int rval = -1;
+
+ /* check names */
+ if (vname == NULL)
+ vname = "/etc/vfstab";
+
+ /* replace lines in vfstab */
+ if (meta_patch_vfstab("swap", fsnp, vname, old_bdevname, doit,
+ verbose, &tvname, ep) != 0) {
+ goto out;
+ }
+
+ /* rename temp file on top of real one */
+ if (rename(tvname, vname) != 0) {
+ (void) mdsyserror(ep, errno, vname);
+ goto out;
+ }
+ Free(tvname);
+ tvname = NULL;
+ rval = 0;
+
+ /* cleanup, return error */
+out:
+ if (tvname != NULL) {
+ if (doit)
+ (void) unlink(tvname);
+ Free(tvname);
+ }
+ return (rval);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_patch_root.c b/usr/src/lib/lvm/libmeta/common/meta_patch_root.c
new file mode 100644
index 0000000000..ac3f4b04d9
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_patch_root.c
@@ -0,0 +1,171 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 1992-1994, 2000-2002 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * patch /etc/system file for the root device
+ */
+
+#include <dlfcn.h>
+#include <meta.h>
+
+
+/*
+ * set root device name in md.conf and vfstab, patch in mddb locations
+ */
+int
+meta_patch_rootdev(
+ mdname_t *rootnp, /* root device */
+ char *sname, /* system file name */
+ char *vname, /* vfstab file name */
+ char *cname, /* mddb.cf file name */
+ char *dbname, /* md.conf file name */
+ int doit, /* really patch files */
+ int verbose, /* show what we're doing */
+ md_error_t *ep /* returned error */
+)
+{
+ mdsetname_t *sp;
+ int ismeta = metaismeta(rootnp);
+ char *tsname = NULL;
+ FILE *tsfp = NULL;
+ char *dbtname = NULL;
+ FILE *dbtfp = NULL;
+ char *tvname = NULL;
+ int rval = -1;
+
+ /* check names */
+ if (sname == NULL)
+ sname = "/etc/system";
+ if (vname == NULL)
+ vname = "/etc/vfstab";
+ if (cname == NULL)
+ cname = META_DBCONF;
+ if (dbname == NULL)
+ dbname = "/kernel/drv/md.conf";
+
+ /* make sure we have a local name */
+ if ((sp = metagetset(rootnp, TRUE, ep)) == NULL)
+ return (-1);
+
+ if (! metaislocalset(sp)) {
+ return (mddeverror(ep, MDE_NOT_LOCAL, rootnp->dev,
+ rootnp->cname));
+ }
+
+ /* replace forceload and rootdev lines in system */
+ if (meta_systemfile_copy(sname, 1, 0, doit, verbose, &tsname, &tsfp,
+ ep) != 0) {
+ goto out;
+ }
+ if (meta_systemfile_append_mdroot(rootnp, sname,
+ tsname, tsfp, ismeta, doit, verbose, ep) != 0) {
+ goto out;
+ }
+
+ /* replace bootlist lines in /kernel/drv/md.conf */
+ if (meta_systemfile_copy(dbname, 0, 1, doit, verbose, &dbtname,
+ &dbtfp, ep) != 0) {
+ goto out;
+ }
+ if (meta_systemfile_append_mddb(cname, dbname, dbtname, dbtfp, doit,
+ verbose, ep) != 0) {
+ goto out;
+ }
+
+ /* force the file contents out to disk */
+ if (doit) {
+ if ((fflush(tsfp) != 0) ||
+ (fsync(fileno(tsfp)) != 0) ||
+ (fclose(tsfp) != 0)) {
+ (void) mdsyserror(ep, errno, tsname);
+ goto out;
+ }
+ tsfp = NULL;
+ if ((fflush(dbtfp) != 0) ||
+ (fsync(fileno(dbtfp)) != 0) ||
+ (fclose(dbtfp) != 0)) {
+ (void) mdsyserror(ep, errno, dbtname);
+ goto out;
+ }
+ dbtfp = NULL;
+ }
+
+ /* replace lines in vfstab */
+ if (meta_patch_vfstab("/", rootnp, vname, NULL, doit, verbose, &tvname,
+ ep) != 0) {
+ goto out;
+ }
+
+ /* rename files, better hope both work */
+ if (doit) {
+ if (rename(tsname, sname) != 0) {
+ (void) mdsyserror(ep, errno, sname);
+ goto out;
+ }
+ Free(tsname);
+ tsname = NULL;
+ if (rename(dbtname, dbname) != 0) {
+ (void) mdsyserror(ep, errno, dbname);
+ goto out;
+ }
+ Free(dbtname);
+ dbtname = NULL;
+ if (rename(tvname, vname) != 0) {
+ (void) mdsyserror(ep, errno, vname);
+ goto out;
+ }
+ Free(tvname);
+ tvname = NULL;
+ }
+ rval = 0;
+
+ /* cleanup, return error */
+out:
+ if (tsfp != NULL)
+ (void) fclose(tsfp);
+ if (tsname != NULL) {
+ if (doit)
+ (void) unlink(tsname);
+ Free(tsname);
+ }
+ if (tvname != NULL) {
+ if (doit)
+ (void) unlink(tvname);
+ Free(tvname);
+ }
+
+ /* free the temporary files for md.conf */
+ if (dbtfp != NULL)
+ (void) fclose(dbtfp);
+ if (dbtname != NULL) {
+ if (doit)
+ (void) unlink(dbtname);
+ Free(dbtname);
+ }
+ return (rval);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_print.c b/usr/src/lib/lvm/libmeta/common/meta_print.c
new file mode 100644
index 0000000000..a539628685
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_print.c
@@ -0,0 +1,439 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * Just in case we're not in a build environment, make sure that
+ * TEXT_DOMAIN gets set to something.
+ */
+#if !defined(TEXT_DOMAIN)
+#define TEXT_DOMAIN "SYS_TEST"
+#endif
+
+/*
+ * report metadevice status
+ */
+
+#include <meta.h>
+
+/*
+ * print named metadevice
+ */
+int
+meta_print_name(
+ mdsetname_t *sp,
+ mdname_t *namep,
+ mdnamelist_t **nlpp,
+ char *fname,
+ FILE *fp,
+ mdprtopts_t options,
+ mdnamelist_t **lognlpp,
+ md_error_t *ep
+)
+{
+ char *miscname;
+
+ /* must have set */
+ assert(sp != NULL);
+
+ /* get type */
+ if ((miscname = metagetmiscname(namep, ep)) == NULL)
+ return (-1);
+
+ /* dispatch */
+ if (strcmp(miscname, MD_TRANS) == 0) {
+ return (meta_trans_print(sp, namep, nlpp, fname, fp,
+ options, NULL, lognlpp, ep));
+ }
+ if (strcmp(miscname, MD_MIRROR) == 0) {
+ return (meta_mirror_print(sp, namep, nlpp, fname, fp,
+ options, ep));
+ }
+ if (strcmp(miscname, MD_RAID) == 0) {
+ return (meta_raid_print(sp, namep, nlpp, fname, fp,
+ options, ep));
+ }
+ if (strcmp(miscname, MD_STRIPE) == 0) {
+ return (meta_stripe_print(sp, namep, nlpp, fname, fp,
+ options, ep));
+ }
+ if (strcmp(miscname, MD_SP) == 0) {
+ return (meta_sp_print(sp, namep, nlpp, fname, fp,
+ options, ep));
+ }
+
+ /* unknown type */
+ return (mdmderror(ep, MDE_UNKNOWN_TYPE, meta_getminor(namep->dev),
+ namep->cname));
+}
+
+/*
+ * print all metadevices
+ */
+int
+meta_print_all(
+ mdsetname_t *sp,
+ char *fname,
+ mdnamelist_t **nlpp,
+ FILE *fp,
+ mdprtopts_t options,
+ int *meta_print_trans_msgp,
+ md_error_t *ep
+)
+{
+ md_error_t status = mdnullerror;
+ int rval = 0;
+ mdnamelist_t *lognlp = NULL;
+
+
+ /* print various types (save first error) */
+ if (meta_trans_print(sp, NULL, nlpp, fname, fp, options,
+ meta_print_trans_msgp, &lognlp, ep) != 0) {
+ rval = -1;
+ ep = &status;
+ }
+ if (meta_logs_print(sp, lognlp, nlpp, fname, fp, options, ep) != 0) {
+ rval = -1;
+ ep = &status;
+ }
+ metafreenamelist(lognlp);
+ if (meta_mirror_print(sp, NULL, nlpp, fname, fp, options, ep) != 0) {
+ rval = -1;
+ ep = &status;
+ }
+ if (meta_raid_print(sp, NULL, nlpp, fname, fp, options, ep) != 0) {
+ rval = -1;
+ ep = &status;
+ }
+ if (meta_stripe_print(sp, NULL, nlpp, fname, fp, options, ep) != 0) {
+ rval = -1;
+ ep = &status;
+ }
+ if (meta_sp_print(sp, NULL, nlpp, fname, fp, options, ep) != 0) {
+ rval = -1;
+ ep = &status;
+ }
+ if (meta_hsp_print(sp, NULL, nlpp, fname, fp, options, ep) != 0) {
+ rval = -1;
+ ep = &status;
+ }
+
+ /* discard further errors */
+ mdclrerror(&status);
+
+ /* return success */
+ return (rval);
+}
+
+/*
+ * format timestamp
+ */
+char *
+meta_print_time(
+ md_timeval32_t *tvp
+)
+{
+ static char buf[128];
+ struct tm *tmp;
+ char *dcmsg;
+
+ if (tvp == NULL)
+ return ("");
+
+ /*
+ * TRANSLATION_NOTE_LC_TIME
+ * This message is the format of file
+ * timestamps written with the -C and
+ * -c options.
+ * %a -- locale's abbreviated weekday name
+ * %b -- locale's abbreviated month name
+ * %e -- day of month [1,31]
+ * %T -- Time as %H:%M:%S
+ * %Y -- Year, including the century
+ */
+ dcmsg = dcgettext(TEXT_DOMAIN, "%a %b %e %T %Y", LC_TIME);
+
+ if (((tvp->tv_sec == 0) && (tvp->tv_usec == 0)) ||
+ ((tmp = localtime((const time_t *)&tvp->tv_sec)) == NULL) ||
+ (strftime(buf, sizeof (buf), dcmsg, tmp) == 0)) {
+ return (dgettext(TEXT_DOMAIN, "(invalid time)"));
+ }
+ return (buf);
+}
+
+/*
+ * format high resolution time into a tuple of seconds:milliseconds:microseconds
+ */
+char *
+meta_print_hrtime(
+ hrtime_t secs
+)
+{
+ long long sec, msec, usec;
+ static char buf[128];
+
+ usec = secs / 1000;
+ msec = usec / 1000;
+ sec = msec / 1000;
+ msec %= 1000;
+ usec %= 1000;
+
+ (void) snprintf(buf, sizeof (buf), "%4lld:%03lld:%03lld", sec, msec,
+ usec);
+ return (buf);
+}
+
+/*
+ * Routine to print 32 bit bitmasks
+ *
+ * Takes:
+ * fp - a file descriptor
+ * fmt - optional text
+ * ul - unsigned long bit vector
+ * bitfmt - special string to map bits to words.
+ * bitfmt is layed out as follows:
+ * byte 0 is the output base.
+ * byte 1 a bit position less than 32
+ * byte 2-n text for position in byte 1
+ * byte n+1 another bit position
+ * byte n+2-m text for position in byte n+1
+ * .
+ * .
+ * .
+ *
+ * Eg. - "\020\001DOG\002CAT\003PIG"
+ * Print the bitmask in hex.
+ * If bit 1 (0x0001) is set print "<DOG>"
+ * If bit 2 (0x0002) is set print "<CAT>"
+ * If bit 3 (0x0004) is set print "<PIG>"
+ * If bit 4 (0x0008) is set nothing is printed.
+ * If bit 1 and bit 2 (0x0003) are set print <DOG,CAT>
+ *
+ * Returns 0 on OK
+ * EOF on error
+ *
+ * Outputs on fp
+ *
+ */
+
+int
+meta_prbits(FILE *fp, const char *fmt, ...)
+{
+ va_list ap;
+ unsigned long ul;
+ int set;
+ int n;
+ char *p;
+
+ va_start(ap, fmt);
+
+ if (fmt && *fmt)
+ if (fprintf(fp, fmt) == EOF)
+ return (EOF);
+
+ ul = va_arg(ap, int);
+ p = va_arg(ap, char *);
+
+ switch (*p++) {
+ case 8:
+ if (fprintf(fp, "0%lo", ul) == EOF)
+ return (EOF);
+ break;
+
+ case 16:
+ if (fprintf(fp, "0x%lx", ul) == EOF)
+ return (EOF);
+ break;
+
+ default:
+ case 10:
+ if (fprintf(fp, "%ld", ul) == EOF)
+ return (EOF);
+ break;
+ }
+
+ if (! ul)
+ return (0);
+
+ for (set = 0; (n = *p++) != '\0'; /* void */) {
+ if (ul & (1 << (n - 1))) {
+ if (fputc(set ? ',' : '<', fp) == EOF)
+ return (EOF);
+ for (/* void */; (n = *p) > ' '; ++p)
+ if (fputc(n, fp) == EOF)
+ return (EOF);
+ set = 1;
+ } else
+ for (/* void */; *p > ' '; ++p);
+ }
+ if (set)
+ if (fputc('>', fp) == EOF)
+ return (EOF);
+
+ return (0);
+}
+
+
+/*
+ * Convert a number of blocks to a string representation
+ * Input: 64 bit wide number of blocks
+ * Outout: string like "199MB" or "27TB" or "3.5GB"
+ * Returns a pointer to the buffer.
+ */
+char *
+meta_number_to_string(diskaddr_t number, u_longlong_t blk_sz)
+{
+ diskaddr_t save = 0;
+ char *M = " KMGTPE"; /* kilo, mega, giga, tera, peta, exa */
+ char *uom = M; /* unit of measurement, initially ' ' (=M[0]) */
+ static char buf[64];
+ u_longlong_t total_bytes;
+
+ /* convert from blocks to bytes */
+ total_bytes = number * blk_sz;
+
+ /*
+ * Stop scaling when we reached exa bytes, then something is
+ * probably wrong with our number.
+ */
+ while ((total_bytes >= 1024) && (*uom != 'E')) {
+ uom++; /* next unit of measurement */
+ save = total_bytes;
+ total_bytes = total_bytes / 1024;
+ }
+
+ /* check if we should output a decimal place after the point */
+ if (save && ((save / 1024) < 10)) {
+ /* sprintf() will round for us */
+ float fnum = (float)save / 1024;
+ (void) sprintf(buf, "%1.1f %cB", fnum, *uom);
+ } else {
+ (void) sprintf(buf, "%llu %cB", total_bytes, *uom);
+ }
+ return (buf);
+}
+
+/*
+ * meta_get_tstate: get the transient state bits from the kernel.
+ * this is for use with printing out the state field in metastat.
+ * INPUT: dev64 -- devt of the metadevice
+ * tstatep -- return for tstate
+ * ep -- error
+ * RETURN: -1 for error
+ * 0 for success
+ */
+int
+meta_get_tstate(md_dev64_t dev64, uint_t *tstatep, md_error_t *ep)
+{
+ md_i_get_tstate_t params;
+ minor_t mnum = meta_getminor(dev64);
+
+ (void) memset(&params, 0, sizeof (params));
+ params.id = mnum;
+ if (metaioctl(MD_IOCGET_TSTATE, &params, &params.mde, NULL) != 0) {
+ return (mdstealerror(ep, &params.mde));
+ }
+ *tstatep = params.tstate;
+ return (0);
+}
+
+/*
+ * meta_print_devid: print out the devid information, given a mddevid_t list.
+ * INPUT: mdsetname_t set we're looking at
+ * FILE where to print to
+ * mddevid_t list to print from.
+ * md_error_t error
+ * RETURN: -1 for error
+ * 0 for success
+ */
+int
+meta_print_devid(
+ mdsetname_t *sp,
+ FILE *fp,
+ mddevid_t *mddevidp,
+ md_error_t *ep
+)
+{
+ int len = 0;
+ mddevid_t *tmp_mddevidp = NULL;
+ ddi_devid_t did = NULL;
+ char *devid = "";
+ int freedevid = 0;
+ char *reloc = "";
+
+
+ /* print header */
+ if (fprintf(fp, gettext("Device Relocation Information:\n")) < 0)
+ return (-1);
+
+ /*
+ * Building a format string on the fly that will
+ * be used in (f)printf. This allows the length
+ * of the ctd to vary from small to large without
+ * looking horrible.
+ */
+
+ tmp_mddevidp = mddevidp;
+ while (tmp_mddevidp != NULL) {
+ len = max(len, strlen(tmp_mddevidp->ctdname));
+ tmp_mddevidp = tmp_mddevidp->next;
+ }
+
+ if (fprintf(fp, "%-*s %-5s\t%s\n", len + 2,
+ gettext("Device "),
+ gettext("Reloc"),
+ gettext("Device ID")) < 0)
+ return (-1);
+
+ /* print ctd's and devids */
+ while (mddevidp != NULL) {
+ did = (ddi_devid_t)
+ meta_getdidbykey(sp->setno, getmyside(sp, ep),
+ mddevidp->key, ep);
+
+ if (did == (ddi_devid_t)NULL) {
+ devid = "-";
+ reloc = gettext("No ");
+ freedevid = 0;
+ } else {
+ devid = devid_str_encode(did, NULL);
+ reloc = gettext("Yes");
+ freedevid = 1;
+ Free(did);
+ }
+
+ if (fprintf(fp, "%-*s %-5s\t%s\n", len + 2, mddevidp->ctdname,
+ reloc, devid) < 0)
+ return (-1);
+
+ mddevidp = mddevidp->next;
+
+ if (freedevid == 1)
+ devid_str_free(devid);
+ }
+ return (0);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_raid.c b/usr/src/lib/lvm/libmeta/common/meta_raid.c
new file mode 100644
index 0000000000..cce31ad3fa
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_raid.c
@@ -0,0 +1,2784 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+/*
+ * Just in case we're not in a build environment, make sure that
+ * TEXT_DOMAIN gets set to something.
+ */
+#if !defined(TEXT_DOMAIN)
+#define TEXT_DOMAIN "SYS_TEST"
+#endif
+
+/*
+ * RAID operations
+ */
+
+#include <stdlib.h>
+#include <meta.h>
+#include <sys/lvm/md_raid.h>
+#include <sys/lvm/mdvar.h>
+#include <sys/lvm/md_convert.h>
+#include <stddef.h>
+
+/*
+ * FUNCTION: meta_get_raid_names()
+ * INPUT: sp - the set name to get raid from
+ * options - options from the command line
+ * OUTPUT: nlpp - list of all raid names
+ * ep - return error pointer
+ * RETURNS: int - -1 if error, 0 success
+ * PURPOSE: returns a list of all raid in the metadb
+ * for all devices in the specified set
+ */
+int
+meta_get_raid_names(
+ mdsetname_t *sp,
+ mdnamelist_t **nlpp,
+ int options,
+ md_error_t *ep
+)
+{
+ return (meta_get_names(MD_RAID, sp, nlpp, options, ep));
+}
+
+/*
+ * free raid unit
+ */
+void
+meta_free_raid(
+ md_raid_t *raidp
+)
+{
+ if (raidp->cols.cols_val != NULL) {
+ assert(raidp->cols.cols_len > 0);
+ Free(raidp->cols.cols_val);
+ }
+ Free(raidp);
+}
+
+/*
+ * get raid (common)
+ */
+md_raid_t *
+meta_get_raid_common(
+ mdsetname_t *sp,
+ mdname_t *raidnp,
+ int fast,
+ md_error_t *ep
+)
+{
+ mddrivename_t *dnp = raidnp->drivenamep;
+ char *miscname;
+ mr_unit_t *mr;
+ md_raid_t *raidp;
+ uint_t ncol;
+ uint_t col;
+ md_resync_ioctl_t ri;
+
+ /* must have set */
+ assert(sp != NULL);
+ assert(sp->setno == MD_MIN2SET(meta_getminor(raidnp->dev)));
+
+ /* short circuit */
+ if (dnp->unitp != NULL) {
+ assert(dnp->unitp->type == MD_METARAID);
+ return ((md_raid_t *)dnp->unitp);
+ }
+
+ /* get miscname and unit */
+ if ((miscname = metagetmiscname(raidnp, ep)) == NULL)
+ return (NULL);
+ if (strcmp(miscname, MD_RAID) != 0) {
+ (void) mdmderror(ep, MDE_NOT_RAID, meta_getminor(raidnp->dev),
+ raidnp->cname);
+ return (NULL);
+ }
+ if ((mr = (mr_unit_t *)meta_get_mdunit(sp, raidnp, ep)) == NULL)
+ return (NULL);
+ assert(mr->c.un_type == MD_METARAID);
+
+ /* allocate raid */
+ raidp = Zalloc(sizeof (*raidp));
+
+ /* allocate columns */
+ ncol = mr->un_totalcolumncnt;
+ assert(ncol >= MD_RAID_MIN);
+ raidp->cols.cols_len = ncol;
+ raidp->cols.cols_val = Zalloc(raidp->cols.cols_len *
+ sizeof (*raidp->cols.cols_val));
+
+ /* get common info */
+ raidp->common.namep = raidnp;
+ raidp->common.type = mr->c.un_type;
+ raidp->common.state = mr->c.un_status;
+ raidp->common.capabilities = mr->c.un_capabilities;
+ raidp->common.parent = mr->c.un_parent;
+ raidp->common.size = mr->c.un_total_blocks;
+ raidp->common.user_flags = mr->c.un_user_flags;
+ raidp->common.revision = mr->c.un_revision;
+
+ /* get options */
+ raidp->state = mr->un_state;
+ raidp->timestamp = mr->un_timestamp;
+ raidp->interlace = mr->un_segsize;
+ raidp->orig_ncol = mr->un_origcolumncnt;
+ raidp->column_size = mr->un_segsize * mr->un_segsincolumn;
+ raidp->pw_count = mr->un_pwcnt;
+ assert(raidp->orig_ncol <= ncol);
+ if ((mr->un_hsp_id != MD_HSP_NONE) &&
+ ((raidp->hspnamep = metahsphspname(&sp, mr->un_hsp_id,
+ ep)) == NULL)) {
+ goto out;
+ }
+
+ /* get columns, update unit state */
+ for (col = 0; (col < ncol); ++col) {
+ mr_column_t *rcp = &mr->un_column[col];
+ md_raidcol_t *mdrcp = &raidp->cols.cols_val[col];
+
+ /* get column name */
+ mdrcp->colnamep = metakeyname(&sp, rcp->un_orig_key, fast, ep);
+ if (mdrcp->colnamep == NULL)
+ goto out;
+
+ /* override any start_blk */
+#ifdef DEBUG
+ if (metagetstart(sp, mdrcp->colnamep, ep) !=
+ MD_DISKADDR_ERROR) {
+ assert(mdrcp->colnamep->start_blk <=
+ rcp->un_orig_devstart);
+ } else {
+ mdclrerror(ep);
+ }
+#endif /* DEBUG */
+ mdrcp->colnamep->start_blk = rcp->un_orig_devstart;
+
+ /* if hotspared */
+ if (HOTSPARED(mr, col)) {
+ /* get hotspare name */
+ mdrcp->hsnamep = metakeyname(&sp, rcp->un_hs_key,
+ fast, ep);
+ if (mdrcp->hsnamep == NULL)
+ goto out;
+
+ if (getenv("META_DEBUG_START_BLK") != NULL) {
+ if (metagetstart(sp, mdrcp->hsnamep, ep) ==
+ MD_DISKADDR_ERROR)
+ mdclrerror(ep);
+
+ if ((mdrcp->hsnamep->start_blk == 0) &&
+ (rcp->un_hs_pwstart != 0))
+ md_eprintf(dgettext(TEXT_DOMAIN,
+ "%s: suspected bad start block,"
+ " seems labelled [raid]\n"),
+ mdrcp->hsnamep->cname);
+
+ if ((mdrcp->hsnamep->start_blk > 0) &&
+ (rcp->un_hs_pwstart == 0))
+ md_eprintf(dgettext(TEXT_DOMAIN,
+ "%s: suspected bad start block, "
+ " seems unlabelled [raid]\n"),
+ mdrcp->hsnamep->cname);
+ }
+
+ /* override any start_blk */
+ mdrcp->hsnamep->start_blk = rcp->un_hs_devstart;
+ }
+
+ /* get state, flags, and timestamp */
+ mdrcp->state = rcp->un_devstate;
+ mdrcp->flags = rcp->un_devflags;
+ mdrcp->timestamp = rcp->un_devtimestamp;
+ }
+
+ /* get resync info */
+ (void) memset(&ri, 0, sizeof (ri));
+ ri.ri_mnum = meta_getminor(raidnp->dev);
+ MD_SETDRIVERNAME(&ri, MD_RAID, sp->setno);
+ if (metaioctl(MD_IOCGETSYNC, &ri, &ri.mde, raidnp->cname) != 0) {
+ (void) mdstealerror(ep, &ri.mde);
+ goto out;
+ }
+ raidp->resync_flags = ri.ri_flags;
+ raidp->percent_dirty = ri.ri_percent_dirty;
+ raidp->percent_done = ri.ri_percent_done;
+
+ /* cleanup, return success */
+ Free(mr);
+ dnp->unitp = (md_common_t *)raidp;
+ return (raidp);
+
+ /* cleanup, return error */
+out:
+ Free(mr);
+ meta_free_raid(raidp);
+ return (NULL);
+}
+
+/*
+ * get raid
+ */
+md_raid_t *
+meta_get_raid(
+ mdsetname_t *sp,
+ mdname_t *raidnp,
+ md_error_t *ep
+)
+{
+ return (meta_get_raid_common(sp, raidnp, 0, ep));
+}
+
+/*
+ * check raid for dev
+ */
+static int
+in_raid(
+ mdsetname_t *sp,
+ mdname_t *raidnp,
+ mdname_t *np,
+ diskaddr_t slblk,
+ diskaddr_t nblks,
+ md_error_t *ep
+)
+{
+ md_raid_t *raidp;
+ uint_t col;
+
+ /* should be in the same set */
+ assert(sp != NULL);
+ assert(sp->setno == MD_MIN2SET(meta_getminor(raidnp->dev)));
+
+ /* get unit */
+ if ((raidp = meta_get_raid(sp, raidnp, ep)) == NULL)
+ return (-1);
+
+ /* look in columns */
+ for (col = 0; (col < raidp->cols.cols_len); ++col) {
+ md_raidcol_t *cp = &raidp->cols.cols_val[col];
+ mdname_t *colnp = cp->colnamep;
+ diskaddr_t col_sblk;
+ int err;
+
+ /* check same drive since metagetstart() can fail */
+ if ((err = meta_check_samedrive(np, colnp, ep)) < 0)
+ return (-1);
+ else if (err == 0)
+ continue;
+
+ /* check overlap */
+ if ((col_sblk = metagetstart(sp, colnp, ep)) ==
+ MD_DISKADDR_ERROR)
+ return (-1);
+ if (meta_check_overlap(raidnp->cname, np, slblk, nblks,
+ colnp, col_sblk, -1, ep) != 0) {
+ return (-1);
+ }
+ }
+
+ /* return success */
+ return (0);
+}
+
+/*
+ * check to see if we're in a raid
+ */
+int
+meta_check_inraid(
+ mdsetname_t *sp,
+ mdname_t *np,
+ diskaddr_t slblk,
+ diskaddr_t nblks,
+ md_error_t *ep
+)
+{
+ mdnamelist_t *raidnlp = NULL;
+ mdnamelist_t *p;
+ int rval = 0;
+
+ /* should have a set */
+ assert(sp != NULL);
+
+ /* for each raid */
+ if (meta_get_raid_names(sp, &raidnlp, 0, ep) < 0)
+ return (-1);
+ for (p = raidnlp; (p != NULL); p = p->next) {
+ mdname_t *raidnp = p->namep;
+
+ /* check raid */
+ if (in_raid(sp, raidnp, np, slblk, nblks, ep) != 0) {
+ rval = -1;
+ break;
+ }
+ }
+
+ /* cleanup, return success */
+ metafreenamelist(raidnlp);
+ return (rval);
+}
+
+/*
+ * check column
+ */
+int
+meta_check_column(
+ mdsetname_t *sp,
+ mdname_t *np,
+ md_error_t *ep
+)
+{
+ mdchkopts_t options = (MDCHK_ALLOW_MDDB);
+
+ /* check for soft partitions */
+ if (meta_sp_issp(sp, np, ep) != 0) {
+ /* make sure we have a disk */
+ if (metachkcomp(np, ep) != 0)
+ return (-1);
+ }
+
+ /* check to ensure that it is not already in use */
+ if (meta_check_inuse(sp, np, MDCHK_INUSE, ep) != 0) {
+ return (-1);
+ }
+
+ /* make sure it is in the set */
+ if (meta_check_inset(sp, np, ep) != 0)
+ return (-1);
+
+ /* make sure its not in a metadevice */
+ if (meta_check_inmeta(sp, np, options, 0, -1, ep) != 0)
+ return (-1);
+
+ /* return success */
+ return (0);
+}
+
+/*
+ * print raid
+ */
+static int
+raid_print(
+ md_raid_t *raidp,
+ char *fname,
+ FILE *fp,
+ mdprtopts_t options,
+ md_error_t *ep
+)
+{
+ uint_t col;
+ int rval = -1;
+
+
+ if (options & PRINT_LARGEDEVICES) {
+ if (raidp->common.revision != MD_64BIT_META_DEV) {
+ rval = 0;
+ goto out;
+ }
+ }
+
+ /* print name and -r */
+ if (fprintf(fp, "%s -r", raidp->common.namep->cname) == EOF)
+ goto out;
+
+ /* print columns */
+ for (col = 0; (col < raidp->cols.cols_len); ++col) {
+ md_raidcol_t *mdrcp = &raidp->cols.cols_val[col];
+
+ /* print column */
+ /*
+ * If the path is our standard /dev/rdsk or /dev/md/rdsk
+ * then just print out the cxtxdxsx or the dx, metainit
+ * will assume the default, otherwise we need the full
+ * pathname to make sure this works as we intend.
+ */
+ if ((strstr(mdrcp->colnamep->rname, "/dev/rdsk") == NULL) &&
+ (strstr(mdrcp->colnamep->rname, "/dev/md/rdsk") == NULL) &&
+ (strstr(mdrcp->colnamep->rname, "/dev/td/") == NULL)) {
+ /* not standard path, print full pathname */
+ if (fprintf(fp, " %s", mdrcp->colnamep->rname) == EOF)
+ goto out;
+ } else {
+ /* standard path so print ctd or d number */
+ if (fprintf(fp, " %s", mdrcp->colnamep->cname) == EOF)
+ goto out;
+ }
+ }
+
+ if (fprintf(fp, " -k") == EOF)
+ goto out;
+
+ /* print options */
+ if (fprintf(fp, " -i %lldb", raidp->interlace) == EOF)
+ goto out;
+
+ if (raidp->pw_count != PWCNT_MIN)
+ if (fprintf(fp, " -w %d", raidp->pw_count) == EOF)
+ goto out;
+
+ if (raidp->hspnamep != NULL) {
+ if (fprintf(fp, " -h %s", raidp->hspnamep->hspname) == EOF)
+ goto out;
+ }
+ if (raidp->orig_ncol != raidp->cols.cols_len) {
+ assert(raidp->orig_ncol < raidp->cols.cols_len);
+ if (fprintf(fp, " -o %u", raidp->orig_ncol) == EOF)
+ goto out;
+ }
+
+ /* terminate last line */
+ if (fprintf(fp, "\n") == EOF)
+ goto out;
+
+ /* success */
+ rval = 0;
+
+ /* cleanup, return error */
+out:
+ if (rval != 0)
+ (void) mdsyserror(ep, errno, fname);
+ return (rval);
+}
+
+static int
+find_resyncing_column(
+ md_raid_t *raidp
+)
+{
+ int col;
+
+ for (col = 0; (col < raidp->cols.cols_len); ++col) {
+ md_raidcol_t *cp = &raidp->cols.cols_val[col];
+ if (cp->state & RCS_RESYNC)
+ return (col);
+ }
+
+ /* No resyncing columns */
+ return (-1);
+}
+
+/*
+ * convert raid state to name
+ */
+char *
+raid_state_to_name(
+ md_raid_t *raidp,
+ md_timeval32_t *tvp,
+ uint_t tstate /* Errored tstate flags */
+)
+{
+
+ /* grab time */
+ if (tvp != NULL)
+ *tvp = raidp->timestamp;
+
+ /*
+ * If the device has a transient error state (due to it being DR'ed or
+ * failed) and there has been no I/O to it (the actual device is still
+ * marked as 'Okay') then we cannot know what the state is or what
+ * action to take on it. Therefore report the device as 'Unavailable'.
+ * A subsequent I/O to the device will cause the 'Okay' status to
+ * disappear if the device is actually gone and then we will print out
+ * the appropriate status. The MD_INACCESSIBLE state is only set
+ * on the raid when we open it or probe it. One the raid is open
+ * then we will just have regular error status on the device.
+ */
+ if (tstate & MD_INACCESSIBLE) {
+ return (dgettext(TEXT_DOMAIN, "Unavailable"));
+ }
+
+ /* resyncing */
+ if (find_resyncing_column(raidp) >= 0)
+ return (dgettext(TEXT_DOMAIN, "Resyncing"));
+
+ /* everything else */
+ switch (raidp->state) {
+ case RUS_INIT :
+ return (dgettext(TEXT_DOMAIN, "Initializing"));
+ case RUS_OKAY :
+ return (dgettext(TEXT_DOMAIN, "Okay"));
+ case RUS_ERRED :
+ /*FALLTHROUGH*/
+ case RUS_LAST_ERRED :
+ return (dgettext(TEXT_DOMAIN, "Needs Maintenance"));
+ case RUS_DOI :
+ return (dgettext(TEXT_DOMAIN, "Initialization Failed"));
+ case RUS_REGEN :
+ return (dgettext(TEXT_DOMAIN, "Regen"));
+ default :
+ return (dgettext(TEXT_DOMAIN, "invalid"));
+ } /* switch */
+}
+
+static int
+find_erred_column(md_raid_t *raidp, rcs_state_t state)
+{
+ int col;
+
+ for (col = 0; (col < raidp->cols.cols_len); ++col) {
+ md_raidcol_t *cp = &raidp->cols.cols_val[col];
+ if (cp->state & state)
+ return (col);
+ }
+
+ /* No erred columns */
+ return (-1);
+}
+
+/*
+ * convert raid state to repair action
+ */
+char *
+raid_state_to_action(md_raid_t *raidp)
+{
+ static char emsg[1024];
+ mdname_t *raidnp = raidp->common.namep;
+ int err_col;
+
+ /* first check for full init failure */
+ if (raidp->state & RUS_DOI) {
+ (void) snprintf(emsg, sizeof (emsg),
+ "metaclear -f %s", raidnp->cname);
+ return (emsg);
+ }
+
+ /* replace errored or init errored raid column */
+ if ((err_col = find_erred_column(raidp,
+ (RCS_ERRED | RCS_INIT_ERRED))) >= 0) {
+ mdname_t *colnp;
+
+ /* get column with error */
+ assert(err_col < raidp->cols.cols_len);
+ colnp = raidp->cols.cols_val[err_col].colnamep;
+ (void) snprintf(emsg, sizeof (emsg),
+ "metareplace %s%s %s <%s>",
+ ((raidp->state == RUS_LAST_ERRED) ? "-f " : ""),
+ raidnp->cname, colnp->cname,
+ dgettext(TEXT_DOMAIN, "new device"));
+ return (emsg);
+ }
+
+
+ /* replace last errored raid column */
+ if ((err_col = find_erred_column(raidp, RCS_LAST_ERRED)) >= 0) {
+ mdname_t *colnp;
+
+ assert(err_col < raidp->cols.cols_len);
+ colnp = raidp->cols.cols_val[err_col].colnamep;
+ (void) snprintf(emsg, sizeof (emsg),
+ "metareplace %s %s %s <%s>",
+ ((raidp->state == RUS_LAST_ERRED) ? "-f " : ""),
+ raidnp->cname, colnp->cname,
+ dgettext(TEXT_DOMAIN, "new device"));
+ return (emsg);
+ }
+
+ /* OK */
+ return (NULL);
+}
+
+/*
+ * get printable raid column state
+ */
+char *
+raid_col_state_to_name(
+ md_raidcol_t *colp,
+ md_timeval32_t *tvp,
+ uint_t tstate
+)
+{
+ /* grab time */
+ if (tvp != NULL)
+ *tvp = colp->timestamp;
+
+ if (tstate != 0) {
+ return (dgettext(TEXT_DOMAIN, "Unavailable"));
+ }
+
+ /* everything else */
+ switch (colp->state) {
+ case RCS_INIT:
+ return (dgettext(TEXT_DOMAIN, "Initializing"));
+
+ case RCS_OKAY:
+ return (dgettext(TEXT_DOMAIN, "Okay"));
+
+ case RCS_INIT_ERRED:
+ /*FALLTHROUGH*/
+ case RCS_ERRED:
+ return (dgettext(TEXT_DOMAIN, "Maintenance"));
+
+ case RCS_LAST_ERRED:
+ return (dgettext(TEXT_DOMAIN, "Last Erred"));
+
+ case RCS_RESYNC:
+ return (dgettext(TEXT_DOMAIN, "Resyncing"));
+
+ default:
+ return (dgettext(TEXT_DOMAIN, "Unknown"));
+ }
+}
+
+/*
+ * print raid column
+ */
+static int
+display_raid_device_info(
+ mdsetname_t *sp,
+ md_raidcol_t *colp,
+ char *fname,
+ FILE *fp,
+ mdprtopts_t options,
+ int print_len,
+ uint_t top_tstate, /* Errored tstate flags */
+ md_error_t *ep
+)
+{
+ mdname_t *namep = ((colp->hsnamep != NULL) ?
+ colp->hsnamep : colp->colnamep);
+ char *devid = "";
+ char *cname = colp->colnamep->cname;
+ diskaddr_t start_blk;
+ int has_mddb;
+ char *has_mddb_str;
+ char *col_state;
+ md_timeval32_t tv;
+ char *hsname = ((colp->hsnamep != NULL) ?
+ colp->hsnamep->cname : "");
+ int rval = -1;
+ mdname_t *didnp = NULL;
+ ddi_devid_t dtp;
+ uint_t tstate = 0;
+
+ /* get info */
+ if ((start_blk = metagetstart(sp, namep, ep)) == MD_DISKADDR_ERROR)
+ return (-1);
+ if ((has_mddb = metahasmddb(sp, namep, ep)) < 0)
+ return (-1);
+ if (has_mddb)
+ has_mddb_str = dgettext(TEXT_DOMAIN, "Yes");
+ else
+ has_mddb_str = dgettext(TEXT_DOMAIN, "No");
+
+ if (metaismeta(namep)) {
+ if (meta_get_tstate(namep->dev, &tstate, ep) != 0)
+ return (-1);
+ col_state = raid_col_state_to_name(colp, &tv,
+ tstate & MD_DEV_ERRORED);
+ } else {
+ /*
+ * if top_tstate is set, that implies that you have
+ * a ctd type device with an unavailable metadevice
+ * on top of it. If so, print a - for it's state
+ */
+ if (top_tstate != 0)
+ col_state = "-";
+ else
+ col_state = raid_col_state_to_name(colp, &tv, tstate);
+ }
+
+ /* populate the key in the name_p structure */
+ if ((didnp = metadevname(&sp, namep->dev, ep)) == NULL)
+ return (-1);
+
+ /* determine if devid does NOT exist */
+ if (options & PRINT_DEVID) {
+ if ((dtp = meta_getdidbykey(sp->setno, getmyside(sp, ep),
+ didnp->key, ep)) == NULL)
+ devid = dgettext(TEXT_DOMAIN, "No ");
+ else {
+ devid = dgettext(TEXT_DOMAIN, "Yes");
+ free(dtp);
+ }
+ }
+ /* print column */
+ /*
+ * Building a format string on the fly that will
+ * be used in (f)printf. This allows the length
+ * of the ctd to vary from small to large without
+ * looking horrible.
+ */
+ if (! (options & PRINT_TIMES)) {
+ if (fprintf(fp,
+ "\t%-*.*s %8lld %5.5s %12.12s %5.5s %s\n",
+ print_len, print_len, cname, start_blk, has_mddb_str,
+ col_state, devid, hsname) == EOF) {
+ goto out;
+ }
+ } else {
+ char *timep = meta_print_time(&tv);
+
+ if (fprintf(fp,
+ "\t%-*s %5lld %-5s %-11s %-5s %-9s %s\n",
+ print_len, cname, start_blk, has_mddb_str,
+ col_state, devid, hsname, timep) == EOF) {
+ goto out;
+ }
+ }
+
+ /* success */
+ rval = 0;
+
+ /* cleanup, return error */
+out:
+ if (rval != 0)
+ (void) mdsyserror(ep, errno, fname);
+
+ return (rval);
+}
+
+/*
+ * print raid options
+ */
+int
+meta_print_raid_options(
+ mdhspname_t *hspnamep,
+ char *fname,
+ FILE *fp,
+ md_error_t *ep
+)
+{
+ char *hspname = ((hspnamep != NULL) ? hspnamep->hspname :
+ dgettext(TEXT_DOMAIN, "none"));
+ int rval = -1;
+
+ /* print options */
+ if (fprintf(fp, dgettext(TEXT_DOMAIN,
+ " Hot spare pool: %s\n"), hspname) == EOF) {
+ goto out;
+ }
+
+ /* success */
+ rval = 0;
+
+ /* cleanup, return error */
+out:
+ if (rval != 0)
+ (void) mdsyserror(ep, errno, fname);
+ return (rval);
+}
+
+/*
+ * report raid
+ */
+static int
+raid_report(
+ mdsetname_t *sp,
+ md_raid_t *raidp,
+ char *fname,
+ FILE *fp,
+ mdprtopts_t options,
+ md_error_t *ep
+)
+{
+ char *p;
+ uint_t ncol = raidp->cols.cols_len;
+ uint_t orig_ncol = raidp->orig_ncol;
+ diskaddr_t column_size = raidp->column_size;
+ char *raid_state;
+ md_timeval32_t tv;
+ char *timep;
+ uint_t col;
+ int rval = -1;
+ int len = 0;
+ uint_t tstate = 0;
+
+ if (options & PRINT_LARGEDEVICES) {
+ if (raidp->common.revision != MD_64BIT_META_DEV) {
+ rval = 0;
+ goto out;
+ }
+ }
+
+ /* print header */
+ if (options & PRINT_HEADER) {
+ if (fprintf(fp, dgettext(TEXT_DOMAIN, "%s: RAID\n"),
+ raidp->common.namep->cname) == EOF) {
+ goto out;
+ }
+
+ }
+
+ /* print state */
+ if (metaismeta(raidp->common.namep)) {
+ if (meta_get_tstate(raidp->common.namep->dev, &tstate, ep) != 0)
+ return (-1);
+ }
+ tstate &= MD_DEV_ERRORED; /* extract the errored tstate bits */
+ raid_state = raid_state_to_name(raidp, &tv, tstate);
+ if (options & PRINT_TIMES) {
+ timep = meta_print_time(&tv);
+ } else {
+ timep = "";
+ }
+
+ if (fprintf(fp, dgettext(TEXT_DOMAIN, " State: %-12s %s\n"),
+ raid_state, timep) == EOF) {
+ goto out;
+ }
+
+ /*
+ * Display recovery action if we're marked in the Unavailable state.
+ */
+ if ((tstate == 0) || (tstate & MD_INACCESSIBLE)) {
+ /* print what to do */
+ if (tstate & MD_INACCESSIBLE) {
+ char sname[MD_MAX_SETNAME + 3]; /* 3 = sizeof("-s ") */
+
+ if (metaislocalset(sp)) {
+ sname[0] = '\0';
+ } else {
+ (void) snprintf(sname, MD_MAX_SETNAME + 3,
+ "-s %s", sp->setname);
+ }
+ if (fprintf(fp, dgettext(TEXT_DOMAIN,
+ " Invoke: metastat -i %s\n"), sname) == EOF) {
+ goto out;
+ }
+ } else if ((p = raid_state_to_action(raidp)) != NULL) {
+ if (fprintf(fp, dgettext(TEXT_DOMAIN,
+ " Invoke: %s\n"), p) == EOF) {
+ goto out;
+ }
+ }
+
+ /* resync status */
+ if (raidp->resync_flags & MD_RI_INPROGRESS) {
+ if (fprintf(fp, dgettext(TEXT_DOMAIN,
+ " Resync in progress: %2d.%1d%% done\n"),
+ raidp->percent_done/10,
+ raidp->percent_done % 10) == EOF) {
+ goto out;
+ }
+ } else if (raidp->resync_flags & MD_GROW_INPROGRESS) {
+ if (fprintf(fp, dgettext(TEXT_DOMAIN,
+ " Initialization in progress: %2d.%1d%% "
+ "done\n"),
+ raidp->percent_done/10,
+ raidp->percent_done % 10) == EOF) {
+ goto out;
+ }
+ } else if (raidp->state & RUS_REGEN) {
+ if (fprintf(fp, dgettext(TEXT_DOMAIN,
+ " Parity regeneration in progress: %2d.%1d%% "
+ "done\n"),
+ raidp->percent_done/10,
+ raidp->percent_done % 10) == EOF) {
+ goto out;
+ }
+ }
+ }
+
+ /* print hotspare pool */
+ if (raidp->hspnamep != NULL) {
+ if (meta_print_raid_options(raidp->hspnamep,
+ fname, fp, ep) != 0) {
+ return (-1);
+ }
+ }
+
+ /* print interlace */
+ if (fprintf(fp, dgettext(TEXT_DOMAIN, " Interlace: %lld blocks\n"),
+ raidp->interlace) == EOF) {
+ goto out;
+ }
+
+ /* print size */
+ if (fprintf(fp, dgettext(TEXT_DOMAIN, " Size: %lld blocks (%s)\n"),
+ raidp->common.size,
+ meta_number_to_string(raidp->common.size, DEV_BSIZE)) == EOF) {
+ goto out;
+ }
+
+ /* MD_DEBUG stuff */
+ if (options & PRINT_DEBUG) {
+ mdname_t *raidnp = raidp->common.namep;
+ mr_unit_t *mr;
+
+ /* get additional info */
+ if ((mr = (mr_unit_t *)meta_get_mdunit(sp, raidnp, ep)) == NULL)
+ return (-1);
+ assert(mr->c.un_type == MD_METARAID);
+
+ /* print prewrite count and size */
+ if (fprintf(fp, dgettext(TEXT_DOMAIN,
+ " Prewrite Count: %u slots\n"),
+ mr->un_pwcnt) == EOF) {
+ Free(mr);
+ goto out;
+ }
+ if (fprintf(fp, dgettext(TEXT_DOMAIN,
+ " Prewrite Slot Size: %u blocks\n"),
+ (mr->un_pwsize / mr->un_pwcnt)) == EOF) {
+ Free(mr);
+ goto out;
+ }
+ if (fprintf(fp, dgettext(TEXT_DOMAIN,
+ " Prewrite Total Size: %u blocks\n"),
+ mr->un_pwsize) == EOF) {
+ Free(mr);
+ goto out;
+ }
+ Free(mr);
+ }
+
+ /* print original devices */
+ if (fprintf(fp, dgettext(TEXT_DOMAIN, "Original device:\n")) == EOF)
+ goto out;
+ if (fprintf(fp, dgettext(TEXT_DOMAIN, " Size: %lld blocks (%s)\n"),
+ column_size * (orig_ncol - 1),
+ meta_number_to_string(column_size * (orig_ncol - 1), DEV_BSIZE))
+ == EOF) {
+ goto out;
+ }
+ /*
+ * Building a format string on the fly that will
+ * be used in (f)printf. This allows the length
+ * of the ctd to vary from small to large without
+ * looking horrible.
+ */
+ for (col = 0; (col < orig_ncol); ++col) {
+ len = max(len,
+ strlen(raidp->cols.cols_val[col].colnamep->cname));
+ }
+
+ len = max(len, strlen(dgettext(TEXT_DOMAIN, "Device")));
+ len += 2;
+
+ if (! (options & PRINT_TIMES)) {
+ if (fprintf(fp,
+ "\t%-*.*s %-12.12s %-5.5s %12.12s %-5.5s %s\n",
+ len, len,
+ dgettext(TEXT_DOMAIN, "Device"),
+ dgettext(TEXT_DOMAIN, "Start Block"),
+ dgettext(TEXT_DOMAIN, "Dbase"),
+ dgettext(TEXT_DOMAIN, "State"),
+ dgettext(TEXT_DOMAIN, "Reloc"),
+ dgettext(TEXT_DOMAIN, "Hot Spare")) == EOF) {
+ goto out;
+ }
+ } else {
+ if (fprintf(fp,
+ "\t%-*s %5s %-5s %-11s %-5s %-9s %s\n",
+ len,
+ dgettext(TEXT_DOMAIN, "Device"),
+ dgettext(TEXT_DOMAIN, "Start"),
+ dgettext(TEXT_DOMAIN, "Dbase"),
+ dgettext(TEXT_DOMAIN, "State"),
+ dgettext(TEXT_DOMAIN, "Reloc"),
+ dgettext(TEXT_DOMAIN, "Hot Spare"),
+ dgettext(TEXT_DOMAIN, "Time")) == EOF) {
+ goto out;
+ }
+ }
+ for (col = 0; (col < orig_ncol); ++col) {
+ md_raidcol_t *mdrcp = &raidp->cols.cols_val[col];
+
+ if (display_raid_device_info(sp, mdrcp, fname, fp, options,
+ len, tstate, ep) != 0) {
+ return (-1);
+ }
+ }
+
+ /* print concatenated devices */
+ if (col < ncol) {
+ if (fprintf(fp, dgettext(TEXT_DOMAIN,
+ "Concatenated Devices:\n")) == EOF) {
+ goto out;
+ }
+ if (fprintf(fp, dgettext(TEXT_DOMAIN,
+ " Size: %lld blocks (%s)\n"),
+ column_size * (ncol - orig_ncol),
+ meta_number_to_string(column_size * (ncol - orig_ncol),
+ DEV_BSIZE))
+ == EOF) {
+ goto out;
+ }
+ /*
+ * This allows the length
+ * of the ctd to vary from small to large without
+ * looking horrible.
+ */
+ if (! (options & PRINT_TIMES)) {
+ if (fprintf(fp,
+ "\t%-*.*s %-12.12s %-5.5s %-12.12s %5.5s %s\n",
+ len, len,
+ dgettext(TEXT_DOMAIN, "Device"),
+ dgettext(TEXT_DOMAIN, "Start Block"),
+ dgettext(TEXT_DOMAIN, "Dbase"),
+ dgettext(TEXT_DOMAIN, "State"),
+ dgettext(TEXT_DOMAIN, "Reloc"),
+ dgettext(TEXT_DOMAIN, "Hot Spare")) == EOF) {
+ goto out;
+ }
+ } else {
+ if (fprintf(fp,
+ "\t%-*s %5s %-5s %-11s %-9s %s\t%s\n",
+ len,
+ dgettext(TEXT_DOMAIN, "Device"),
+ dgettext(TEXT_DOMAIN, "Start"),
+ dgettext(TEXT_DOMAIN, "Dbase"),
+ dgettext(TEXT_DOMAIN, "State"),
+ dgettext(TEXT_DOMAIN, "Reloc"),
+ dgettext(TEXT_DOMAIN, "Hot Spare"),
+ dgettext(TEXT_DOMAIN, "Time")) == EOF) {
+ goto out;
+ }
+ }
+ assert(col == orig_ncol);
+ for (/* void */; (col < ncol); col++) {
+ md_raidcol_t *mdrcp = &raidp->cols.cols_val[col];
+
+ if (display_raid_device_info(sp, mdrcp, fname, fp,
+ options, len, tstate, ep) != 0) {
+ return (-1);
+ }
+ }
+ }
+
+ /* add extra line */
+ if (fprintf(fp, "\n") == EOF)
+ goto out;
+
+ /* success */
+ rval = 0;
+
+ /* cleanup, return error */
+out:
+ if (rval != 0)
+ (void) mdsyserror(ep, errno, fname);
+ return (rval);
+}
+
+/*
+ * print/report raid
+ */
+int
+meta_raid_print(
+ mdsetname_t *sp,
+ mdname_t *raidnp,
+ mdnamelist_t **nlpp,
+ char *fname,
+ FILE *fp,
+ mdprtopts_t options,
+ md_error_t *ep
+)
+{
+ md_raid_t *raidp;
+ int col;
+
+ /* should have same set */
+ assert(sp != NULL);
+ assert((raidnp == NULL) ||
+ (sp->setno == MD_MIN2SET(meta_getminor(raidnp->dev))));
+
+ /* print all raids */
+ if (raidnp == NULL) {
+ mdnamelist_t *nlp = NULL;
+ mdnamelist_t *p;
+ int cnt;
+ int rval = 0;
+
+ /* get list */
+ if ((cnt = meta_get_raid_names(sp, &nlp, options, ep)) < 0)
+ return (-1);
+ else if (cnt == 0)
+ return (0);
+
+ /* recurse */
+ for (p = nlp; (p != NULL); p = p->next) {
+ mdname_t *np = p->namep;
+
+ if (meta_raid_print(sp, np, nlpp, fname, fp,
+ options, ep) != 0)
+ rval = -1;
+ }
+
+ /* cleanup, return success */
+ metafreenamelist(nlp);
+ return (rval);
+ }
+
+ /* get unit structure */
+ if ((raidp = meta_get_raid_common(sp, raidnp,
+ ((options & PRINT_FAST) ? 1 : 0), ep)) == NULL)
+ return (-1);
+
+ /* check for parented */
+ if ((! (options & PRINT_SUBDEVS)) &&
+ (MD_HAS_PARENT(raidp->common.parent))) {
+ return (0);
+ }
+
+ /* print appropriate detail */
+ if (options & PRINT_SHORT) {
+ if (raid_print(raidp, fname, fp, options, ep) != 0)
+ return (-1);
+ } else {
+ if (raid_report(sp, raidp, fname, fp, options, ep) != 0)
+ return (-1);
+ }
+
+ /* Recurse on components that are metadevices */
+ for (col = 0; col < raidp->cols.cols_len; ++col) {
+ md_raidcol_t *colp = &raidp->cols.cols_val[col];
+ mdname_t *namep = colp->colnamep;
+
+ if ((metaismeta(namep)) &&
+ (meta_print_name(sp, namep, nlpp, fname, fp,
+ (options | PRINT_HEADER | PRINT_SUBDEVS),
+ NULL, ep) != 0)) {
+ return (-1);
+ }
+ }
+
+ return (0);
+}
+
+/*
+ * adjust raid geometry
+ */
+static int
+adjust_geom(
+ mdname_t *raidnp,
+ mdname_t *colnp,
+ mr_unit_t *mr,
+ md_error_t *ep
+)
+{
+ uint_t round_cyl = 1;
+ mdgeom_t *geomp;
+
+ /* get reinstructs */
+ if ((geomp = metagetgeom(colnp, ep)) == NULL)
+ return (-1);
+
+ /* adjust geometry */
+ if (meta_adjust_geom((md_unit_t *)mr, raidnp, geomp->write_reinstruct,
+ geomp->read_reinstruct, round_cyl, ep) != 0)
+ return (-1);
+
+ /* return success */
+ return (0);
+}
+
+/*
+ * add another column to the raid unit structure
+ */
+static int
+attach_raid_col(
+ mdsetname_t *sp,
+ mdname_t *raidnp,
+ mr_unit_t *mr,
+ mr_column_t *mdc,
+ mdname_t *colnp,
+ rcs_state_t state,
+ mdnamelist_t **keynlpp,
+ mdcmdopts_t options,
+ md_error_t *ep
+)
+{
+ diskaddr_t column_size = mr->un_segsize * mr->un_segsincolumn;
+ diskaddr_t size;
+ uint_t maxio;
+ mdcinfo_t *cinfop;
+ md_timeval32_t tmp_time;
+
+ /* setup state and timestamp */
+ mdc->un_devstate = state;
+ if (meta_gettimeofday(&tmp_time) == -1)
+ return (mdsyserror(ep, errno, NULL));
+
+ mdc->un_devtimestamp = tmp_time;
+ /* get start, size, and maxio */
+ if ((mdc->un_orig_devstart = metagetstart(sp, colnp, ep)) ==
+ MD_DISKADDR_ERROR)
+ return (-1);
+ if ((size = metagetsize(colnp, ep)) == MD_DISKADDR_ERROR)
+ return (-1);
+ if ((cinfop = metagetcinfo(colnp, ep)) == NULL)
+ return (-1);
+ maxio = cinfop->maxtransfer;
+
+ /* adjust start and size by prewrite */
+ mdc->un_orig_pwstart = mdc->un_orig_devstart;
+ mdc->un_orig_devstart += mr->un_pwsize;
+
+ /* make sure we still have something left */
+ if ((mdc->un_orig_devstart >= size) ||
+ ((size - mdc->un_orig_devstart) < column_size)) {
+ return (mdsyserror(ep, ENOSPC, colnp->cname));
+ }
+ size -= mdc->un_orig_devstart;
+ if (maxio < mr->un_maxio) {
+ return (mdcomperror(ep, MDE_MAXIO,
+ meta_getminor(raidnp->dev), colnp->dev, colnp->cname));
+ }
+
+ if (options & MDCMD_DOIT) {
+ /* store name in namespace */
+ if (add_key_name(sp, colnp, keynlpp, ep) != 0)
+ return (-1);
+ }
+
+ /* setup column */
+ mdc->un_orig_dev = colnp->dev;
+ mdc->un_orig_key = colnp->key;
+ mdc->un_dev = colnp->dev;
+ mdc->un_pwstart = mdc->un_orig_pwstart;
+ mdc->un_devstart = mdc->un_orig_devstart;
+ mdc->un_alt_dev = NODEV64;
+ mdc->un_alt_pwstart = 0;
+ mdc->un_alt_devstart = 0;
+ mdc->un_hs_id = 0;
+
+ /* add the size (we use) of the device to the total */
+ mr->c.un_actual_tb += column_size;
+
+ /* adjust geometry */
+ if (adjust_geom(raidnp, colnp, mr, ep) != 0)
+ return (-1);
+
+ /* count column */
+ mr->un_totalcolumncnt++;
+
+ /* return success */
+ return (0);
+}
+
+/*
+ * invalidate column names
+ */
+static int
+invalidate_columns(
+ mdsetname_t *sp,
+ mdname_t *raidnp,
+ md_error_t *ep
+)
+{
+ md_raid_t *raidp;
+ uint_t col;
+
+ if ((raidp = meta_get_raid(sp, raidnp, ep)) == NULL)
+ return (-1);
+ for (col = 0; (col < raidp->cols.cols_len); ++col) {
+ md_raidcol_t *cp = &raidp->cols.cols_val[col];
+ mdname_t *colnp = cp->colnamep;
+
+ meta_invalidate_name(colnp);
+ }
+ return (0);
+}
+
+/*
+ * attach columns to raid
+ */
+int
+meta_raid_attach(
+ mdsetname_t *sp,
+ mdname_t *raidnp,
+ mdnamelist_t *colnlp,
+ mdcmdopts_t options,
+ md_error_t *ep
+)
+{
+ uint_t concat_cnt = 0;
+ mdnamelist_t *p;
+ mr_unit_t *old_mr;
+ mr_unit_t *new_mr;
+ size_t old_rusize;
+ size_t new_rusize;
+ mdnamelist_t *keynlp = NULL;
+ md_grow_params_t mgp;
+ int rval = -1;
+ int create_flag = MD_CRO_32BIT;
+
+ /* should have a set */
+ assert(sp != NULL);
+ assert(sp->setno == MD_MIN2SET(meta_getminor(raidnp->dev)));
+
+ /* check type */
+ if (metachkmeta(raidnp, ep) != 0)
+ return (-1);
+
+ /* check and count new columns */
+ for (p = colnlp; (p != NULL); p = p->next) {
+ mdname_t *np = p->namep;
+ mdnamelist_t *p2;
+
+ /* check against existing devices */
+ if (meta_check_column(sp, np, ep) != 0)
+ return (-1);
+
+ /* check against ourselves */
+ for (p2 = p->next; (p2 != NULL); p2 = p2->next) {
+ if (meta_check_overlap(np->cname, np, 0, -1,
+ p2->namep, 0, -1, ep) != 0) {
+ return (-1);
+ }
+ }
+
+ /* count */
+ ++concat_cnt;
+ }
+
+ /* get old unit */
+ if ((old_mr = (mr_unit_t *)meta_get_mdunit(sp, raidnp, ep)) == NULL)
+ return (-1);
+
+ /*
+ * calculate the size needed for the new raid unit and allocate
+ * the appropriate structure. allocate new unit.
+ */
+ old_rusize = sizeof (*old_mr) - sizeof (old_mr->un_column[0]);
+ old_rusize += old_mr->un_totalcolumncnt * sizeof (old_mr->un_column[0]);
+ new_rusize = sizeof (*new_mr) - sizeof (new_mr->un_column[0]);
+ new_rusize += (old_mr->un_totalcolumncnt + concat_cnt)
+ * sizeof (new_mr->un_column[0]);
+ new_mr = Zalloc(new_rusize);
+ (void) memcpy(new_mr, old_mr, old_rusize);
+
+ /* We always want a do-it, this is for attach_raid_col below */
+ options |= MDCMD_DOIT;
+
+ /* build new unit structure */
+ for (p = colnlp; (p != NULL); p = p->next) {
+ mdname_t *colnp = p->namep;
+ mr_column_t *mdc;
+
+ /* attach column */
+ mdc = &new_mr->un_column[new_mr->un_totalcolumncnt];
+ if (attach_raid_col(sp, raidnp, new_mr, mdc, colnp,
+ RCS_INIT, &keynlp, options, ep) != 0) {
+ goto out;
+ }
+ }
+ assert(new_mr->un_totalcolumncnt
+ == (old_mr->un_totalcolumncnt + concat_cnt));
+
+
+ create_flag = meta_check_devicesize(new_mr->c.un_total_blocks);
+
+ /* grow raid */
+ (void) memset(&mgp, 0, sizeof (mgp));
+ mgp.mnum = MD_SID(new_mr);
+ MD_SETDRIVERNAME(&mgp, MD_RAID, sp->setno);
+ mgp.size = new_rusize;
+ mgp.mdp = (uintptr_t)new_mr;
+
+ if (create_flag == MD_CRO_32BIT) {
+ mgp.options = MD_CRO_32BIT;
+ new_mr->c.un_revision = MD_32BIT_META_DEV;
+ } else {
+ mgp.options = MD_CRO_64BIT;
+ new_mr->c.un_revision = MD_64BIT_META_DEV;
+ }
+ if (metaioctl(MD_IOCGROW, &mgp, &mgp.mde, NULL) != 0) {
+ (void) mdstealerror(ep, &mgp.mde);
+ goto out;
+ }
+
+ /* clear cache */
+ if (invalidate_columns(sp, raidnp, ep) != 0)
+ goto out;
+ meta_invalidate_name(raidnp);
+
+ /* let em know */
+ if (options & MDCMD_PRINT) {
+ if (concat_cnt == 1) {
+ (void) printf(dgettext(TEXT_DOMAIN,
+ "%s: component is attached\n"),
+ raidnp->cname);
+ } else {
+ (void) printf(dgettext(TEXT_DOMAIN,
+ "%s: components are attached\n"),
+ raidnp->cname);
+ }
+ (void) fflush(stdout);
+ }
+
+
+ /* grow any parents */
+ if (meta_concat_parent(sp, raidnp, ep) != 0)
+ goto out;
+ rval = 0; /* success */
+
+ /* cleanup, return error */
+out:
+ Free(old_mr);
+ Free(new_mr);
+ if (rval != 0)
+ (void) del_key_names(sp, keynlp, NULL);
+ metafreenamelist(keynlp);
+ return (rval);
+}
+
+/*
+ * get raid parameters
+ */
+int
+meta_raid_get_params(
+ mdsetname_t *sp,
+ mdname_t *raidnp,
+ mr_params_t *paramsp,
+ md_error_t *ep
+)
+{
+ md_raid_t *raidp;
+
+ /* should have a set */
+ assert(sp != NULL);
+ assert(sp->setno == MD_MIN2SET(meta_getminor(raidnp->dev)));
+
+ /* check name */
+ if (metachkmeta(raidnp, ep) != 0)
+ return (-1);
+
+ /* get unit */
+ if ((raidp = meta_get_raid(sp, raidnp, ep)) == NULL)
+ return (-1);
+
+ /* return parameters */
+ (void) memset(paramsp, 0, sizeof (*paramsp));
+ if (raidp->hspnamep == NULL)
+ paramsp->hsp_id = MD_HSP_NONE;
+ else
+ paramsp->hsp_id = raidp->hspnamep->hsp;
+ return (0);
+}
+
+/*
+ * set raid parameters
+ */
+int
+meta_raid_set_params(
+ mdsetname_t *sp,
+ mdname_t *raidnp,
+ mr_params_t *paramsp,
+ md_error_t *ep
+)
+{
+ md_raid_params_t msp;
+
+ /* should have a set */
+ assert(sp != NULL);
+ assert(sp->setno == MD_MIN2SET(meta_getminor(raidnp->dev)));
+
+ /* check name */
+ if (metachkmeta(raidnp, ep) != 0)
+ return (-1);
+
+ /* set parameters */
+ (void) memset(&msp, 0, sizeof (msp));
+ MD_SETDRIVERNAME(&msp, MD_RAID, sp->setno);
+ msp.mnum = meta_getminor(raidnp->dev);
+ msp.params = *paramsp;
+ if (metaioctl(MD_IOCCHANGE, &msp, &msp.mde, raidnp->cname) != 0)
+ return (mdstealerror(ep, &msp.mde));
+
+ /* clear cache */
+ meta_invalidate_name(raidnp);
+
+ /* return success */
+ return (0);
+}
+
+/*
+ * validate raid replace column
+ */
+static int
+validate_new_raid(
+ mdsetname_t *sp,
+ mdname_t *raidnp,
+ mdname_t *colnp,
+ replace_params_t *paramsp,
+ int dup_ok,
+ md_error_t *ep
+)
+{
+ mr_unit_t *mr;
+ diskaddr_t column_size;
+ diskaddr_t label;
+ mdcinfo_t *cinfop;
+ int rval = -1;
+
+ /* get raid unit */
+ if ((mr = (mr_unit_t *)meta_get_mdunit(sp, raidnp, ep)) == NULL)
+ return (-1);
+ column_size = mr->un_segsize * mr->un_segsincolumn;
+
+ /* check it out */
+ if (meta_check_column(sp, colnp, ep) != 0) {
+ if ((! dup_ok) || (! mdisuseerror(ep, MDE_ALREADY)))
+ goto out;
+ mdclrerror(ep);
+ }
+ if ((paramsp->number_blks = metagetsize(colnp, ep)) ==
+ MD_DISKADDR_ERROR)
+ goto out;
+ if ((label = metagetlabel(colnp, ep)) == MD_DISKADDR_ERROR)
+ goto out;
+ paramsp->has_label = ((label > 0) ? 1 : 0);
+ if ((paramsp->start_blk = metagetstart(sp, colnp, ep)) ==
+ MD_DISKADDR_ERROR)
+ goto out;
+ if ((paramsp->number_blks - paramsp->start_blk) < column_size) {
+ (void) mdsyserror(ep, ENOSPC, colnp->cname);
+ goto out;
+ }
+ if ((cinfop = metagetcinfo(colnp, ep)) == NULL)
+ goto out;
+ if (cinfop->maxtransfer < mr->un_maxio) {
+ (void) mdcomperror(ep, MDE_MAXIO, meta_getminor(raidnp->dev),
+ colnp->dev, colnp->cname);
+ goto out;
+ }
+
+ /* success */
+ rval = 0;
+
+ /* cleanup, return error */
+out:
+ Free(mr);
+ return (rval);
+}
+
+/*
+ * replace raid column
+ */
+int
+meta_raid_replace(
+ mdsetname_t *sp,
+ mdname_t *raidnp,
+ mdname_t *oldnp,
+ mdname_t *newnp,
+ mdcmdopts_t options,
+ md_error_t *ep
+)
+{
+ int force = ((options & MDCMD_FORCE) ? 1 : 0);
+ replace_params_t params;
+ md_dev64_t old_dev, new_dev;
+ diskaddr_t new_start_blk, new_end_blk;
+ int rebind;
+ mr_unit_t *mr;
+ char *new_devidp = NULL;
+ md_error_t xep = mdnullerror;
+ int ret;
+ md_set_desc *sd;
+ uint_t tstate;
+
+ /* should have same set */
+ assert(sp != NULL);
+ assert(sp->setno == MD_MIN2SET(meta_getminor(raidnp->dev)));
+
+ /* check name */
+ if (metachkmeta(raidnp, ep) != 0)
+ return (-1);
+
+ /* save new binding incase this is a rebind where oldnp==newnp */
+ new_dev = newnp->dev;
+ new_start_blk = newnp->start_blk;
+ new_end_blk = newnp->end_blk;
+
+ /* invalidate, then get the raid (fill in oldnp from metadb) */
+ meta_invalidate_name(raidnp);
+ if (meta_get_raid(sp, raidnp, ep) == NULL)
+ return (-1);
+
+ /* can't replace a component if the raid inaccessible */
+ if (meta_get_tstate(raidnp->dev, &tstate, ep) != 0) {
+ return (-1);
+ }
+ if (tstate & MD_INACCESSIBLE) {
+ return (mdmderror(ep, MDE_IN_UNAVAIL_STATE,
+ meta_getminor(raidnp->dev), raidnp->cname));
+ }
+
+ /* the old device binding is now established */
+ if ((old_dev = oldnp->dev) == NODEV64)
+ return (mdsyserror(ep, ENODEV, oldnp->cname));
+
+
+ /* setup raid info */
+ (void) memset(&params, 0, sizeof (params));
+ params.mnum = meta_getminor(raidnp->dev);
+ MD_SETDRIVERNAME(&params, MD_RAID, sp->setno);
+ params.old_dev = old_dev;
+ params.cmd = force ? FORCE_REPLACE_COMP : REPLACE_COMP;
+
+ if (options & MDCMD_CLUSTER_REPLACE) {
+ if ((mr = (mr_unit_t *)meta_get_mdunit(sp, raidnp, ep)) == NULL)
+ return (NULL);
+ Free(mr);
+ params.options = MDIOCTL_NO_RESYNC_RAID;
+ params.number_blks = metagetsize(newnp, ep);
+ if ((metagetlabel(newnp, ep) == MD_DISKADDR_ERROR) ||
+ (metagetlabel(newnp, ep) == 0))
+ params.has_label = 0;
+ else
+ params.has_label = 1;
+ params.start_blk = metagetstart(sp, newnp, ep);
+ } else {
+ if ((strcmp(oldnp->rname, newnp->rname) == 0) &&
+ (old_dev != new_dev)) {
+ rebind = 1;
+ } else {
+ rebind = 0;
+ }
+ if (rebind) {
+ newnp->dev = new_dev;
+ newnp->start_blk = new_start_blk;
+ newnp->end_blk = new_end_blk;
+ }
+
+ /*
+ * Save a copy of the devid associated with the new disk, the
+ * reason is that the checks for the column (meta_check_column)
+ * via validate_new_raid(), could cause the disk's devid to be
+ * changed to that of the devid that is currently stored in the
+ * replica namespace for the disk in question. This devid could
+ * be stale if we are replacing the disk. The actual function
+ * that overwrites the devid is dr2drivedesc().
+ */
+
+ /* don't setup new_devid if no devid's or MN diskset */
+ if (newnp->drivenamep->devid != NULL)
+ new_devidp = Strdup(newnp->drivenamep->devid);
+
+ if (!metaislocalset(sp)) {
+ if ((sd = metaget_setdesc(sp, ep)) == NULL)
+ return (-1);
+ if (MD_MNSET_DESC(sd))
+ new_devidp = NULL;
+ }
+
+ /* check out new (sets up start_blk, has_label, number_blks) */
+ if (validate_new_raid(sp, raidnp, newnp, &params, rebind,
+ ep) != 0) {
+ Free(new_devidp);
+ return (-1);
+ }
+
+ /*
+ * Copy back the saved devid.
+ */
+ Free(newnp->drivenamep->devid);
+ if (new_devidp) {
+ newnp->drivenamep->devid = Strdup(new_devidp);
+ Free(new_devidp);
+ }
+ }
+
+ /* store name in namespace, allocate new key */
+ if (add_key_name(sp, newnp, NULL, ep) != 0)
+ return (-1);
+
+ if (rebind && !metaislocalset(sp)) {
+ /*
+ * We are 'rebind'ing a disk that is in a diskset so as well
+ * as updating the diskset's namespace the local set needs
+ * to be updated because it also contains a reference to the
+ * disk in question.
+ */
+ ret = meta_fixdevid(sp, DEV_UPDATE|DEV_LOCAL_SET,
+ newnp->cname, ep);
+
+ if (ret != METADEVADM_SUCCESS) {
+ (void) del_key_name(sp, newnp, &xep);
+ return (-1);
+ }
+ }
+
+ /* replace column */
+ params.new_dev = new_dev;
+ params.new_key = newnp->key;
+ if (metaioctl(MD_IOCREPLACE, &params, &params.mde, NULL) != 0) {
+ (void) del_key_name(sp, newnp, ep);
+ return (mdstealerror(ep, &params.mde));
+ }
+
+ /* clear cache */
+ meta_invalidate_name(oldnp);
+ meta_invalidate_name(newnp);
+ meta_invalidate_name(raidnp);
+
+ /* let em know */
+ if (options & MDCMD_PRINT) {
+ (void) printf(dgettext(TEXT_DOMAIN,
+ "%s: device %s is replaced with %s\n"),
+ raidnp->cname, oldnp->cname, newnp->cname);
+ (void) fflush(stdout);
+ }
+
+ /* return success */
+ return (0);
+}
+
+/*
+ * enable raid column
+ */
+int
+meta_raid_enable(
+ mdsetname_t *sp,
+ mdname_t *raidnp,
+ mdname_t *colnp,
+ mdcmdopts_t options,
+ md_error_t *ep
+)
+{
+ int force = ((options & MDCMD_FORCE) ? 1 : 0);
+ replace_params_t params;
+ md_dev64_t fs_dev, del_dev;
+ int err = 0;
+ char *devnm;
+ int ret;
+ uint_t tstate;
+
+ /* should have same set */
+ assert(sp != NULL);
+ assert(sp->setno == MD_MIN2SET(meta_getminor(raidnp->dev)));
+
+ /* check name */
+ if (metachkmeta(raidnp, ep) != 0)
+ return (-1);
+
+ /* get the file_system dev binding */
+ if (meta_getdev(sp, colnp, ep) != 0)
+ return (-1);
+ fs_dev = colnp->dev;
+
+ /* get the raid unit (fill in colnp->dev with metadb version) */
+ meta_invalidate_name(raidnp);
+ if (meta_get_raid(sp, raidnp, ep) == NULL)
+ return (-1);
+
+ /* enabling a component can't work if the raid inaccessible */
+ if (meta_get_tstate(raidnp->dev, &tstate, ep) != 0) {
+ return (-1);
+ }
+ if (tstate & MD_INACCESSIBLE) {
+ return (mdmderror(ep, MDE_IN_UNAVAIL_STATE,
+ meta_getminor(raidnp->dev), raidnp->cname));
+ }
+
+ /* the metadb device binding is now established */
+ if (colnp->dev == NODEV64)
+ return (mdsyserror(ep, ENODEV, colnp->cname));
+
+ /*
+ * check for the case where the dev_t has changed between the
+ * filesystem and the metadb. This is called a rebind, and
+ * is handled by meta_raid_replace.
+ */
+ if (fs_dev != colnp->dev) {
+ /*
+ * Save the devt of mddb version
+ */
+ del_dev = colnp->dev;
+
+ /* establish file system binding with invalid start/end */
+ colnp->dev = fs_dev;
+ colnp->start_blk = -1;
+ colnp->end_blk = -1;
+ err = meta_raid_replace(sp, raidnp, colnp, colnp, options, ep);
+
+ /*
+ * Don't do it if meta_raid_replace returns an error
+ */
+ if (!err && (devnm = meta_getnmentbydev(sp->setno, MD_SIDEWILD,
+ del_dev, NULL, NULL, &colnp->key, ep)) != NULL) {
+ (void) del_key_name(sp, colnp, ep);
+ Free(devnm);
+ }
+ return (err);
+ }
+
+ /* setup raid info */
+ (void) memset(&params, 0, sizeof (params));
+ params.mnum = meta_getminor(raidnp->dev);
+ MD_SETDRIVERNAME(&params, MD_RAID, sp->setno);
+ params.old_dev = params.new_dev = colnp->dev;
+ if (force)
+ params.cmd = FORCE_ENABLE_COMP;
+ else
+ params.cmd = ENABLE_COMP;
+
+ /* check it out */
+ if (validate_new_raid(sp, raidnp, colnp, &params, 1, ep) != 0)
+ return (-1);
+
+ /* enable column */
+ if (metaioctl(MD_IOCREPLACE, &params, &params.mde, NULL) != 0)
+ return (mdstealerror(ep, &params.mde));
+
+ /*
+ * are we dealing with a non-local set? If so need to update the
+ * local namespace so that the disk record has the correct devid.
+ */
+ if (!metaislocalset(sp)) {
+ ret = meta_fixdevid(sp, DEV_UPDATE|DEV_LOCAL_SET, colnp->cname,
+ ep);
+
+ if (ret != METADEVADM_SUCCESS) {
+ /*
+ * Failed to update the local set. Nothing to do here
+ * apart from report the error. The namespace is
+ * most likely broken and some form of remedial
+ * recovery is going to be required.
+ */
+ mde_perror(ep, "");
+ mdclrerror(ep);
+ }
+ }
+
+ /* clear cache */
+ meta_invalidate_name(colnp);
+ meta_invalidate_name(raidnp);
+
+ /* let em know */
+ if (options & MDCMD_PRINT) {
+ (void) printf(dgettext(TEXT_DOMAIN,
+ "%s: device %s is enabled\n"),
+ raidnp->cname, colnp->cname);
+ (void) fflush(stdout);
+ }
+
+ /* return success */
+ return (0);
+}
+
+/*
+ * check for dups in the raid itself
+ */
+static int
+check_twice(
+ md_raid_t *raidp,
+ uint_t col,
+ md_error_t *ep
+)
+{
+ mdname_t *raidnp = raidp->common.namep;
+ mdname_t *thisnp;
+ uint_t c;
+
+ thisnp = raidp->cols.cols_val[col].colnamep;
+ for (c = 0; (c < col); ++c) {
+ md_raidcol_t *mdcp = &raidp->cols.cols_val[c];
+ mdname_t *colnp = mdcp->colnamep;
+
+ if (meta_check_overlap(raidnp->cname, thisnp, 0, -1,
+ colnp, 0, -1, ep) != 0) {
+ return (-1);
+ }
+ }
+ return (0);
+}
+
+/*
+ * default raid interlace
+ */
+diskaddr_t
+meta_default_raid_interlace(void)
+{
+ diskaddr_t interlace;
+
+ /* default to 16k, round up if necessary */
+ interlace = btodb(16 * 1024);
+ if (interlace < lbtodb(MININTERLACE))
+ interlace = roundup(MININTERLACE, interlace);
+ return (interlace);
+}
+
+/*
+ * convert interlaces
+ */
+int
+meta_raid_check_interlace(
+ diskaddr_t interlace,
+ char *uname,
+ md_error_t *ep
+)
+{
+ if ((interlace < btodb(RAID_MIN_INTERLACE)) ||
+ (interlace > btodb(MAXINTERLACE))) {
+ return (mderror(ep, MDE_BAD_INTERLACE, uname));
+ }
+ return (0);
+}
+
+/*
+ * check raid
+ */
+int
+meta_check_raid(
+ mdsetname_t *sp,
+ md_raid_t *raidp,
+ mdcmdopts_t options,
+ md_error_t *ep
+)
+{
+ mdname_t *raidnp = raidp->common.namep;
+ int doit = ((options & MDCMD_DOIT) ? 1 : 0);
+ int updateit = ((options & MDCMD_UPDATE) ? 1 : 0);
+ uint_t ncol;
+ uint_t col;
+ minor_t mnum = meta_getminor(raidnp->dev);
+
+ /* check number */
+ if (((ncol = raidp->cols.cols_len) < MD_RAID_MIN) ||
+ (raidp->orig_ncol > ncol)) {
+ return (mdmderror(ep, MDE_BAD_RAID, mnum, raidnp->cname));
+ }
+
+ /* compute default interlace */
+ if (raidp->interlace == 0) {
+ raidp->interlace = meta_default_raid_interlace();
+ }
+
+ /* check state */
+ switch (raidp->state) {
+ case RUS_INIT:
+ case RUS_OKAY:
+ break;
+
+ default:
+ return (mdmderror(ep, MDE_BAD_RAID, mnum, raidnp->cname));
+ }
+
+ /* check interlace */
+ if (meta_raid_check_interlace(raidp->interlace, raidnp->cname, ep) != 0)
+ return (-1);
+
+ /* check hotspare pool name */
+ if (doit) {
+ if ((raidp->hspnamep != NULL) &&
+ (metachkhsp(sp, raidp->hspnamep, ep) != 0)) {
+ return (-1);
+ }
+ }
+
+ /* check columns */
+ for (col = 0; (col < ncol); ++col) {
+ md_raidcol_t *mdcp = &raidp->cols.cols_val[col];
+ mdname_t *colnp = mdcp->colnamep;
+ diskaddr_t start_blk, size;
+
+ /* setup column */
+ if (raidp->state == RUS_INIT)
+ mdcp->state = RCS_INIT;
+ else
+ mdcp->state = RCS_OKAY;
+
+ /* check column */
+ if (!updateit) {
+ if (meta_check_column(sp, colnp, ep) != 0)
+ return (-1);
+ if (((start_blk = metagetstart(sp, colnp, ep)) ==
+ MD_DISKADDR_ERROR) || ((size = metagetsize(colnp,
+ ep)) == MD_DISKADDR_ERROR)) {
+ return (-1);
+ }
+ if (start_blk >= size)
+ return (mdsyserror(ep, ENOSPC, colnp->cname));
+ size -= start_blk;
+ size = rounddown(size, raidp->interlace);
+ if (size == 0)
+ return (mdsyserror(ep, ENOSPC, colnp->cname));
+ }
+
+ /* check this raid too */
+ if (check_twice(raidp, col, ep) != 0)
+ return (-1);
+ }
+
+ /* return success */
+ return (0);
+}
+
+/*
+ * setup raid geometry
+ */
+static int
+raid_geom(
+ md_raid_t *raidp,
+ mr_unit_t *mr,
+ md_error_t *ep
+)
+{
+ uint_t write_reinstruct = 0;
+ uint_t read_reinstruct = 0;
+ uint_t round_cyl = 1;
+ uint_t col;
+ mdgeom_t *geomp;
+
+ /* get worst reinstructs */
+ for (col = 0; (col < raidp->cols.cols_len); ++col) {
+ md_raidcol_t *mdcp = &raidp->cols.cols_val[col];
+ mdname_t *colnp = mdcp->colnamep;
+
+ if ((geomp = metagetgeom(colnp, ep)) == NULL)
+ return (-1);
+ if (geomp->write_reinstruct > write_reinstruct)
+ write_reinstruct = geomp->write_reinstruct;
+ if (geomp->read_reinstruct > read_reinstruct)
+ read_reinstruct = geomp->read_reinstruct;
+ }
+
+ /* setup geometry from first column */
+ assert(raidp->cols.cols_len > 0);
+ if ((geomp = metagetgeom(raidp->cols.cols_val[0].colnamep,
+ ep)) == NULL) {
+ return (-1);
+ }
+ if (meta_setup_geom((md_unit_t *)mr, raidp->common.namep, geomp,
+ write_reinstruct, read_reinstruct, round_cyl, ep) != 0)
+ return (-1);
+
+ /* return success */
+ return (0);
+}
+
+int
+meta_raid_state_cnt(mr_unit_t *mr, rcs_state_t state)
+{
+ int statecnt = 0;
+ int col;
+
+ for (col = 0; col < mr->un_totalcolumncnt; col++)
+ if (mr->un_column[col].un_devstate & state)
+ statecnt++;
+ return (statecnt);
+}
+/*
+ * validate that a raid device being created with the -k flag is a real
+ * raid device
+ */
+int
+meta_raid_valid(md_raid_t *raidp, mr_unit_t *mr)
+{
+ long long buf[DEV_BSIZE / sizeof (long long)];
+ raid_pwhdr_t pwhdr;
+ raid_pwhdr_t *rpw = &pwhdr;
+ minor_t mnum;
+ int col;
+ int fd;
+
+ for (col = 0; col < mr->un_totalcolumncnt; col++) {
+ md_raidcol_t *cp = &raidp->cols.cols_val[col];
+ mdname_t *colnp = cp->colnamep;
+
+ if ((fd = open(colnp->rname, O_RDONLY)) < 0)
+ goto error_exit;
+
+ if (lseek64(fd,
+ (mr->un_column[col].un_pwstart * DEV_BSIZE), SEEK_SET) < 0)
+ goto error_exit;
+
+ if (read(fd, buf, DEV_BSIZE) < 0)
+ goto error_exit;
+
+ /*
+ * If our raid device is a 64 bit device, we can accept the
+ * pw header we just read in.
+ * Otherwise it's of type raid_pwhdr32_od_t and has to
+ * be converted.
+ */
+ if (mr->c.un_revision == MD_64BIT_META_DEV) {
+ rpw = (raid_pwhdr_t *)buf;
+ } else {
+ RAID_CONVERT_RPW((raid_pwhdr32_od_t *)buf, rpw);
+ }
+
+ if (rpw->rpw_column != col)
+ goto error_exit;
+
+ if (col == 0)
+ mnum = rpw->rpw_unit;
+
+ if (rpw->rpw_unit != mnum)
+ goto error_exit;
+
+ if (rpw->rpw_magic_ext == RAID_PWMAGIC) {
+ /* 4.1 prewrite header */
+ if ((rpw->rpw_origcolumncnt != mr->un_origcolumncnt) ||
+ (rpw->rpw_totalcolumncnt
+ != mr->un_totalcolumncnt) ||
+ (rpw->rpw_segsize != mr->un_segsize) ||
+ (rpw->rpw_segsincolumn != mr->un_segsincolumn) ||
+ (rpw->rpw_pwcnt != mr->un_pwcnt) ||
+ (rpw->rpw_pwstart !=
+ mr->un_column[col].un_pwstart) ||
+ (rpw->rpw_devstart !=
+ mr->un_column[col].un_devstart) ||
+ (rpw->rpw_pwsize != mr->un_pwsize))
+ goto error_exit;
+ }
+ /*
+ * this is an old prewrite header (4.0) the unit structure
+ * will have to be trusted.
+ */
+ (void) close(fd);
+ }
+
+ return (0);
+
+error_exit:
+ (void) close(fd);
+ return (-1);
+}
+
+/*
+ * create raid
+ */
+int
+meta_create_raid(
+ mdsetname_t *sp,
+ md_raid_t *raidp,
+ mdcmdopts_t options,
+ md_error_t *ep
+)
+{
+ mdname_t *raidnp = raidp->common.namep;
+ uint_t ncol = raidp->cols.cols_len;
+ uint_t orig_ncol = raidp->orig_ncol;
+ size_t rdsize;
+ mr_unit_t *mr;
+ uint_t col;
+ diskaddr_t disk_size = 0;
+ uint_t disk_maxio = 0;
+ uint_t pwes;
+ diskaddr_t non_pw_blks, column_size;
+ mdnamelist_t *keynlp = NULL;
+ md_set_params_t set_params;
+ int rval = -1;
+ md_timeval32_t creation_time;
+ int create_flag = MD_CRO_32BIT;
+
+ /* validate raid */
+ if (meta_check_raid(sp, raidp, options, ep) != 0)
+ return (-1);
+
+ /* allocate raid unit */
+ rdsize = sizeof (*mr) - sizeof (mr->un_column[0]);
+ rdsize += ncol * sizeof (mr->un_column[0]);
+ mr = Zalloc(rdsize);
+
+ if (meta_gettimeofday(&creation_time) == -1)
+ return (mdsyserror(ep, errno, NULL));
+ /*
+ * initialize the top level mr_unit_t structure
+ * setup the unit state to indicate whether to retain
+ * any data currently on the metadevice or to clear it
+ */
+ mr->c.un_type = MD_METARAID;
+ MD_SID(mr) = meta_getminor(raidnp->dev);
+ mr->c.un_size = rdsize;
+ mr->un_magic = RAID_UNMAGIC;
+ mr->un_state = raidp->state;
+ mr->un_timestamp = creation_time;
+ mr->un_origcolumncnt = orig_ncol;
+ mr->un_segsize = (uint_t)raidp->interlace;
+ if (raidp->hspnamep != NULL) {
+ mr->un_hsp_id = raidp->hspnamep->hsp;
+ } else {
+ mr->un_hsp_id = MD_HSP_NONE;
+ }
+ /*
+ * setup original columns, saving start_block and
+ * finding smallest size and maxio
+ */
+ for (col = 0; (col < orig_ncol); ++col) {
+ md_raidcol_t *cp = &raidp->cols.cols_val[col];
+ mdname_t *colnp = cp->colnamep;
+ mr_column_t *mdc = &mr->un_column[col];
+ diskaddr_t size;
+ uint_t maxio;
+ mdcinfo_t *cinfop;
+
+ /* setup state */
+ mdc->un_devstate = cp->state;
+
+ /* setup creation time */
+ mdc->un_devtimestamp = creation_time;
+
+ /* get start, size, and maxio */
+ if ((mdc->un_orig_devstart = metagetstart(sp, colnp, ep)) ==
+ MD_DISKADDR_ERROR)
+ goto out;
+ if ((size = metagetsize(colnp, ep)) == MD_DISKADDR_ERROR)
+ goto out;
+ size -= mdc->un_orig_devstart;
+ if ((cinfop = metagetcinfo(colnp, ep)) == NULL)
+ goto out;
+ maxio = cinfop->maxtransfer;
+
+ if (options & MDCMD_DOIT) {
+ /* store name in namespace */
+ if (add_key_name(sp, colnp, &keynlp, ep) != 0)
+ goto out;
+ }
+
+ /* setup column */
+ mdc->un_orig_key = colnp->key;
+ mdc->un_orig_dev = colnp->dev;
+ mdc->un_dev = mdc->un_orig_dev;
+ mdc->un_pwstart = mdc->un_orig_pwstart;
+ mdc->un_devstart = mdc->un_orig_devstart;
+ mdc->un_alt_dev = NODEV64;
+ mdc->un_alt_pwstart = 0;
+ mdc->un_alt_devstart = 0;
+ mdc->un_hs_id = 0;
+ if (mr->un_state == RUS_INIT)
+ mdc->un_devstate = RCS_INIT;
+ else
+ mdc->un_devstate = RCS_OKAY;
+
+ /* adjust for smallest disk */
+ if (disk_size == 0) {
+ disk_size = size;
+ } else if (size < disk_size) {
+ disk_size = size;
+ }
+ if (disk_maxio == 0) {
+ disk_maxio = maxio;
+ } else if (maxio < disk_maxio) {
+ disk_maxio = maxio;
+ }
+ }
+ assert(col == mr->un_origcolumncnt);
+
+ /*
+ * before processing any of the attached column(s)
+ * set up the composition of the metadevice for column
+ * sizes and pre-write information
+ */
+ mr->un_maxio = disk_maxio; /* smallest maxio */
+ mr->un_iosize = min(mr->un_maxio, (mr->un_segsize + 1));
+ pwes = mr->un_iosize;
+ if (raidp->pw_count)
+ mr->un_pwcnt = raidp->pw_count;
+ else
+ mr->un_pwcnt = PWCNT_MIN;
+ if ((mr->un_pwcnt < PWCNT_MIN) || (mr->un_pwcnt > PWCNT_MAX)) {
+ (void) mderror(ep, MDE_RAID_BAD_PW_CNT, raidnp->cname);
+ goto out;
+ }
+ mr->un_pwsize = roundup((mr->un_pwcnt * pwes), 2);
+
+ /* now calculate the number of segments per column */
+ non_pw_blks = disk_size - mr->un_pwsize; /* smallest disk */
+ if ((mr->un_pwsize > disk_size) ||
+ (non_pw_blks < (diskaddr_t)mr->un_segsize)) {
+ (void) mdsyserror(ep, ENOSPC, raidnp->cname);
+ goto out;
+ }
+ mr->un_segsincolumn = non_pw_blks / mr->un_segsize;
+ column_size = mr->un_segsize * mr->un_segsincolumn;
+
+ /*
+ * adjust the pw_cnt, pw_size, to fit into any fragmentation
+ * left over after column_size has been computed
+ */
+ mr->un_pwsize = rounddown(((uint_t)(disk_size - column_size)), 2);
+ mr->un_pwcnt = mr->un_pwsize / pwes;
+ assert(mr->un_pwcnt >= PWCNT_MIN);
+ mr->un_pwsize = roundup((mr->un_pwcnt * pwes), 2);
+ assert((mr->un_pwsize + column_size) <= disk_size);
+
+ /*
+ * calculate the actual block count available based on the
+ * segment size and the number of segments per column ...
+ * ... and adjust for the number of parity segments
+ */
+ mr->c.un_actual_tb = column_size * (mr->un_origcolumncnt - 1);
+
+ if (raid_geom(raidp, mr, ep) != 0)
+ goto out;
+
+ create_flag = meta_check_devicesize(mr->c.un_total_blocks);
+
+ /*
+ * now calculate the pre-write offset and update the column
+ * structures to include the address of the individual pre-write
+ * areas
+ */
+ for (col = 0; (col < orig_ncol); ++col) {
+ md_raidcol_t *cp = &raidp->cols.cols_val[col];
+ mdname_t *colnp = cp->colnamep;
+ mr_column_t *mdc = &mr->un_column[col];
+ diskaddr_t size;
+
+ /* get size */
+ if ((size = metagetsize(colnp, ep)) == MD_DISKADDR_ERROR)
+ goto out;
+
+ /* adjust start and size by prewrite */
+ mdc->un_orig_pwstart = mdc->un_orig_devstart;
+ mdc->un_orig_devstart += mr->un_pwsize;
+ mdc->un_pwstart = mdc->un_orig_pwstart;
+ mdc->un_devstart = mdc->un_orig_devstart;
+
+ assert(size >= mdc->un_orig_devstart);
+ size -= mdc->un_orig_devstart;
+
+ /* make sure we still have something left */
+ assert(size >= column_size);
+ }
+
+ /* do concat cols */
+ mr->un_totalcolumncnt = mr->un_origcolumncnt;
+ assert(col == mr->un_origcolumncnt);
+ for (col = orig_ncol; (col < ncol); ++col) {
+ md_raidcol_t *cp = &raidp->cols.cols_val[col];
+ mdname_t *colnp = cp->colnamep;
+ mr_column_t *mdc = &mr->un_column[col];
+
+ /* attach column */
+ if (attach_raid_col(sp, raidnp, mr, mdc, colnp,
+ cp->state, &keynlp, options, ep) != 0) {
+ goto out;
+ }
+ }
+ assert(mr->un_totalcolumncnt == ncol);
+
+ /* fill in the size of the raid */
+ if (options & MDCMD_UPDATE) {
+ raidp->common.size = mr->c.un_total_blocks;
+ raidp->column_size = mr->un_segsize * mr->un_segsincolumn;
+ }
+
+ /* if we're not doing anything, return success */
+ if (! (options & MDCMD_DOIT)) {
+ rval = 0; /* success */
+ goto out;
+ }
+
+ if ((mr->un_state & RUS_OKAY) &&
+ (meta_raid_valid(raidp, mr) != 0)) {
+ (void) mderror(ep, MDE_RAID_INVALID, raidnp->cname);
+ goto out;
+ }
+
+ /* create raid */
+ (void) memset(&set_params, 0, sizeof (set_params));
+ /* did the user tell us to generate a large device? */
+ if (create_flag == MD_CRO_64BIT) {
+ mr->c.un_revision = MD_64BIT_META_DEV;
+ set_params.options = MD_CRO_64BIT;
+ } else {
+ mr->c.un_revision = MD_32BIT_META_DEV;
+ set_params.options = MD_CRO_32BIT;
+ }
+ set_params.mnum = MD_SID(mr);
+ set_params.size = mr->c.un_size;
+ set_params.mdp = (uintptr_t)mr;
+ MD_SETDRIVERNAME(&set_params, MD_RAID, MD_MIN2SET(set_params.mnum));
+ if (metaioctl(MD_IOCSET, &set_params, &set_params.mde,
+ raidnp->cname) != 0) {
+ (void) mdstealerror(ep, &set_params.mde);
+ goto out;
+ }
+ rval = 0; /* success */
+
+ /* cleanup, return success */
+out:
+ Free(mr);
+ if (rval != 0) {
+ (void) del_key_names(sp, keynlp, NULL);
+ }
+ metafreenamelist(keynlp);
+ if ((rval == 0) && (options & MDCMD_DOIT)) {
+ if (invalidate_columns(sp, raidnp, ep) != 0)
+ rval = -1;
+ meta_invalidate_name(raidnp);
+ }
+ return (rval);
+}
+
+/*
+ * initialize raid
+ * NOTE: this functions is metainit(1m)'s command line parser!
+ */
+int
+meta_init_raid(
+ mdsetname_t **spp,
+ int argc,
+ char *argv[],
+ mdcmdopts_t options,
+ md_error_t *ep
+)
+{
+ char *uname = argv[0];
+ mdname_t *raidnp = NULL;
+ int old_optind;
+ int c;
+ md_raid_t *raidp = NULL;
+ uint_t ncol, col;
+ int rval = -1;
+ md_set_desc *sd;
+
+ /* get raid name */
+ assert(argc > 0);
+ if (argc < 1)
+ goto syntax;
+ if ((raidnp = metaname(spp, uname, ep)) == NULL)
+ goto out;
+ assert(*spp != NULL);
+
+ /*
+ * Raid metadevice not allowed on multi-node diskset.
+ */
+ if (! metaislocalset(*spp)) {
+ if ((sd = metaget_setdesc(*spp, ep)) == NULL)
+ goto out;
+ if (MD_MNSET_DESC(sd)) {
+ rval = meta_cook_syntax(ep, MDE_MNSET_NORAID, uname,
+ argc, argv);
+ goto out;
+ }
+ }
+
+ uname = raidnp->cname;
+ if (metachkmeta(raidnp, ep) != 0)
+ goto out;
+
+ if (!(options & MDCMD_NOLOCK)) {
+ /* grab set lock */
+ if (meta_lock(*spp, TRUE, ep) != 0)
+ goto out;
+
+ if (meta_check_ownership(*spp, ep) != 0)
+ goto out;
+ }
+
+ /* see if it exists already */
+ if (metagetmiscname(raidnp, ep) != NULL) {
+ (void) mdmderror(ep, MDE_UNIT_ALREADY_SETUP,
+ meta_getminor(raidnp->dev), uname);
+ goto out;
+ } else if (! mdismderror(ep, MDE_UNIT_NOT_SETUP)) {
+ goto out;
+ } else {
+ mdclrerror(ep);
+ }
+ --argc, ++argv;
+
+ /* grab -r */
+ if ((argc < 1) || (strcmp(argv[0], "-r") != 0))
+ goto syntax;
+ --argc, ++argv;
+
+ /* parse general options */
+ optind = 0;
+ opterr = 0;
+ if (getopt(argc, argv, "") != -1)
+ goto options;
+
+ /* allocate raid */
+ raidp = Zalloc(sizeof (*raidp));
+
+ /* setup common */
+ raidp->common.namep = raidnp;
+ raidp->common.type = MD_METARAID;
+ raidp->state = RUS_INIT;
+
+ /* allocate and parse cols */
+ for (ncol = 0; ((ncol < argc) && (argv[ncol][0] != '-')); ++ncol)
+ ;
+ raidp->cols.cols_len = ncol;
+ if (ncol != 0) {
+ raidp->cols.cols_val =
+ Zalloc(ncol * sizeof (*raidp->cols.cols_val));
+ }
+ for (col = 0; ((argc > 0) && (col < ncol)); ++col) {
+ md_raidcol_t *mdc = &raidp->cols.cols_val[col];
+ mdname_t *colnp;
+
+ /* parse column name */
+ if ((colnp = metaname(spp, argv[0], ep)) == NULL)
+ goto out;
+ /* check for soft partitions */
+ if (meta_sp_issp(*spp, colnp, ep) != 0) {
+ /* check disks */
+ if (metachkcomp(colnp, ep) != 0)
+ goto out;
+ }
+ mdc->colnamep = colnp;
+ --argc, ++argv;
+ }
+
+ /* parse raid options */
+ old_optind = optind = 0;
+ opterr = 0;
+ while ((c = getopt(argc, argv, "h:i:ko:w:")) != -1) {
+ switch (c) {
+ case 'h':
+ if ((raidp->hspnamep = metahspname(spp, optarg,
+ ep)) == NULL) {
+ goto out;
+ }
+ break;
+
+ case 'i':
+ if (parse_interlace(uname, optarg, &raidp->interlace,
+ ep) != 0) {
+ goto out;
+ }
+ if (meta_raid_check_interlace(raidp->interlace,
+ uname, ep))
+ goto out;
+ break;
+
+ case 'k':
+ raidp->state = RUS_OKAY;
+ break;
+
+ case 'o':
+ if ((sscanf(optarg, "%u", &raidp->orig_ncol) != 1) ||
+ ((int)raidp->orig_ncol < 0)) {
+ goto syntax;
+ }
+ if ((raidp->orig_ncol < MD_RAID_MIN) ||
+ (raidp->orig_ncol > ncol)) {
+ rval = mderror(ep, MDE_BAD_ORIG_NCOL, uname);
+ goto out;
+ }
+ break;
+ case 'w':
+ if ((sscanf(optarg, "%d", &raidp->pw_count) != 1) ||
+ ((int)raidp->pw_count < 0))
+ goto syntax;
+ if (((int)raidp->pw_count < PWCNT_MIN) ||
+ ((int)raidp->pw_count > PWCNT_MAX)) {
+ rval = mderror(ep, MDE_RAID_BAD_PW_CNT, uname);
+ goto out;
+ }
+ break;
+ default:
+ argc += old_optind;
+ argv -= old_optind;
+ goto options;
+ }
+ old_optind = optind;
+ }
+ argc -= optind;
+ argv += optind;
+
+ /* we should be at the end */
+ if (argc != 0)
+ goto syntax;
+
+ /* default to all original columns */
+ if (raidp->orig_ncol == 0)
+ raidp->orig_ncol = ncol;
+
+ /* create raid */
+ if (meta_create_raid(*spp, raidp, options, ep) != 0)
+ goto out;
+ rval = 0; /* success */
+
+ /* let em know */
+ if (options & MDCMD_PRINT) {
+ (void) printf(dgettext(TEXT_DOMAIN, "%s: RAID is setup\n"),
+ uname);
+ (void) fflush(stdout);
+ }
+ goto out;
+
+ /* syntax error */
+syntax:
+ rval = meta_cook_syntax(ep, MDE_SYNTAX, uname, argc, argv);
+ goto out;
+
+ /* options error */
+options:
+ rval = meta_cook_syntax(ep, MDE_OPTION, uname, argc, argv);
+ goto out;
+
+ /* cleanup, return error */
+out:
+ if (raidp != NULL)
+ meta_free_raid(raidp);
+ return (rval);
+}
+
+/*
+ * reset RAIDs
+ */
+int
+meta_raid_reset(
+ mdsetname_t *sp,
+ mdname_t *raidnp,
+ mdcmdopts_t options,
+ md_error_t *ep
+)
+{
+ md_raid_t *raidp;
+ int rval = -1;
+ int col;
+
+ /* should have same set */
+ assert(sp != NULL);
+ assert((raidnp == NULL) ||
+ (sp->setno == MD_MIN2SET(meta_getminor(raidnp->dev))));
+
+ /* reset all raids */
+ if (raidnp == NULL) {
+ mdnamelist_t *raidnlp = NULL;
+ mdnamelist_t *p;
+
+ /* for each raid */
+ rval = 0;
+ if (meta_get_raid_names(sp, &raidnlp, 0, ep) < 0)
+ return (-1);
+ for (p = raidnlp; (p != NULL); p = p->next) {
+ /* reset RAID */
+ raidnp = p->namep;
+ if (meta_raid_reset(sp, raidnp, options, ep) != 0) {
+ rval = -1;
+ break;
+ }
+ }
+
+ /* cleanup, return success */
+ metafreenamelist(raidnlp);
+ return (rval);
+ }
+
+ /* check name */
+ if (metachkmeta(raidnp, ep) != 0)
+ return (-1);
+
+ /* get unit structure */
+ if ((raidp = meta_get_raid(sp, raidnp, ep)) == NULL)
+ return (-1);
+
+ /* make sure nobody owns us */
+ if (MD_HAS_PARENT(raidp->common.parent)) {
+ return (mdmderror(ep, MDE_IN_USE, meta_getminor(raidnp->dev),
+ raidnp->cname));
+ }
+
+ /* clear subdevices cache */
+ if (invalidate_columns(sp, raidnp, ep) != 0)
+ return (-1);
+
+ /* clear metadevice */
+ if (meta_reset(sp, raidnp, options, ep) != 0)
+ goto out;
+ rval = 0; /* success */
+
+ /* let em know */
+ if (options & MDCMD_PRINT) {
+ (void) printf(dgettext(TEXT_DOMAIN, "%s: RAID is cleared\n"),
+ raidnp->cname);
+ (void) fflush(stdout);
+ }
+
+ /* clear subdevices */
+ if (! (options & MDCMD_RECURSE))
+ goto out;
+
+ for (col = 0; (col < raidp->cols.cols_len); ++col) {
+ md_raidcol_t *cp = &raidp->cols.cols_val[col];
+ mdname_t *colnp = cp->colnamep;
+
+ /* only recurse on metadevices */
+ if (! metaismeta(colnp))
+ continue;
+
+ if (meta_reset_by_name(sp, colnp, options, ep) != 0)
+ rval = -1;
+ }
+
+ /* cleanup, return success */
+out:
+ meta_invalidate_name(raidnp);
+ return (rval);
+}
+
+/*
+ * reports TRUE if any RAID component is in error
+ */
+int
+meta_raid_anycomp_is_err(mdsetname_t *sp, mdnamelist_t *raid_names)
+{
+ mdnamelist_t *nlp;
+ md_error_t status = mdnullerror;
+ md_error_t *ep = &status;
+ int any_errs = FALSE;
+
+ for (nlp = raid_names; nlp; nlp = nlp->next) {
+ md_raid_t *raidp;
+
+ if ((raidp = meta_get_raid(sp, nlp->namep, ep)) == NULL) {
+ any_errs |= TRUE;
+ goto out;
+ }
+ if (raidp->state != RUS_OKAY && raidp->state != RUS_INIT) {
+ any_errs |= TRUE;
+ goto out;
+ }
+ }
+out:
+ if (!mdisok(ep))
+ mdclrerror(ep);
+
+ return (any_errs);
+}
+/*
+ * regen parity on a raid
+ */
+int
+meta_raid_regen_byname(mdsetname_t *sp, mdname_t *raidnp, diskaddr_t size,
+ md_error_t *ep)
+{
+ char *miscname;
+ md_resync_ioctl_t ri;
+
+ /* should have a set */
+ assert(sp != NULL);
+ assert(sp->setno == MD_MIN2SET(meta_getminor(raidnp->dev)));
+
+ /* make sure we have a raid */
+ if ((miscname = metagetmiscname(raidnp, ep)) == NULL)
+ return (-1);
+ if (strcmp(miscname, MD_RAID) != 0) {
+ return (mdmderror(ep, MDE_NOT_RAID, meta_getminor(raidnp->dev),
+ raidnp->cname));
+ }
+
+ /* start resync */
+ (void) memset(&ri, 0, sizeof (ri));
+ MD_SETDRIVERNAME(&ri, MD_RAID, sp->setno);
+ ri.ri_mnum = meta_getminor(raidnp->dev);
+ ri.ri_copysize = size;
+ if (metaioctl(MD_IOCSETREGEN, &ri, &ri.mde, raidnp->cname) != 0)
+ return (mdstealerror(ep, &ri.mde));
+
+ /* return success */
+ return (0);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_raid_resync.c b/usr/src/lib/lvm/libmeta/common/meta_raid_resync.c
new file mode 100644
index 0000000000..061299022f
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_raid_resync.c
@@ -0,0 +1,130 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 1994-2002 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * raid operations
+ */
+
+#include <meta.h>
+#include <sys/lvm/md_mirror.h>
+
+/*
+ * resync raid
+ */
+int
+meta_raid_resync(
+ mdsetname_t *sp,
+ mdname_t *raidnp,
+ daddr_t size,
+ md_error_t *ep
+)
+{
+ char *miscname;
+ md_resync_ioctl_t ri;
+
+ /* should have a set */
+ assert(sp != NULL);
+ assert(sp->setno == MD_MIN2SET(meta_getminor(raidnp->dev)));
+
+ /* make sure we have a raid */
+ if ((miscname = metagetmiscname(raidnp, ep)) == NULL)
+ return (-1);
+ if (strcmp(miscname, MD_RAID) != 0) {
+ return (mdmderror(ep, MDE_NOT_RAID, meta_getminor(raidnp->dev),
+ raidnp->cname));
+ }
+
+ /* start resync */
+ (void) memset(&ri, 0, sizeof (ri));
+ MD_SETDRIVERNAME(&ri, MD_RAID, sp->setno);
+ ri.ri_mnum = meta_getminor(raidnp->dev);
+ ri.ri_copysize = size;
+ if (metaioctl(MD_IOCSETSYNC, &ri, &ri.mde, raidnp->cname) != 0)
+ return (mdstealerror(ep, &ri.mde));
+
+ /* return success */
+ return (0);
+}
+
+/*
+ * NAME: meta_raid_resync_all
+ * DESCRIPTION: loop through the RAID devices synch'ing all
+ * PARAMETERS: char *sp - the set to synch
+ * daddr_t size - resync size
+ * md_error_t *ep - return error info
+ *
+ */
+int
+meta_raid_resync_all(
+ mdsetname_t *sp,
+ daddr_t size,
+ md_error_t *ep
+)
+{
+ mdnamelist_t *nlp = NULL;
+ mdnamelist_t *p;
+ int rval = 0, fval;
+
+ /* should have a set */
+ assert(sp != NULL);
+
+ /* get raids */
+ if (meta_get_raid_names(sp, &nlp, 0, ep) < 0)
+ return (-1);
+
+ /* fork a process */
+ if ((fval = md_daemonize(sp, ep)) != 0) {
+ /*
+ * md_daemonize forks off a process to do the work. This
+ * is the parent or errror.
+ */
+ if (fval > 0) {
+ if (nlp != NULL)
+ metafreenamelist(nlp);
+ return (0);
+ }
+ mdclrerror(ep);
+ }
+
+ assert((fval == 0) || (fval == -1));
+
+ /* resync each raid */
+ for (p = nlp; (p != NULL); p = p->next) {
+ mdname_t *raidnp = p->namep;
+
+ if (meta_raid_resync(sp, raidnp, size, ep) != 0)
+ rval = -1;
+ }
+
+ /* cleanup, return success */
+ if (nlp != NULL)
+ metafreenamelist(nlp);
+ if (fval == 0)
+ exit(0);
+ return (rval);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_rename.c b/usr/src/lib/lvm/libmeta/common/meta_rename.c
new file mode 100644
index 0000000000..617b3f3694
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_rename.c
@@ -0,0 +1,539 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * Just in case we're not in a build environment, make sure that
+ * TEXT_DOMAIN gets set to something.
+ */
+#if !defined(TEXT_DOMAIN)
+#define TEXT_DOMAIN "SYS_TEST"
+#endif
+
+/*
+ * change the identity of a metadevice
+ * These are the "do it" functions for the metarename command.
+ */
+
+#include <string.h>
+#include <meta.h>
+#include <sys/lvm/md_rename.h>
+
+/* private */
+#define FORCE (0x00000001)
+#define NOISY (0x00000010)
+#define NOFLIP (0x00000020)
+#define DRYRUN (0x00000040)
+
+#define OP_STR(op) \
+ ((op) == MDRNOP_EXCHANGE? "exchange": \
+ (op) == MDRNOP_RENAME? "rename": \
+ (op) == MDRNOP_UNK? "<unknown>": "garbage")
+
+
+/*
+ * Check if from_np is open
+ * Return 0 if not open, -1 if open
+ */
+static int
+check_open(
+ mdsetname_t *sp,
+ mdname_t *from_np,
+ md_error_t *ep)
+{
+ int rc;
+
+ if ((rc = meta_isopen(sp, from_np, ep, (mdcmdopts_t)0)) < 0) {
+ assert(!mdisok(ep));
+ return (-1);
+
+ } else if (rc > 0) {
+ if (mdisok(ep)) {
+ (void) mdmderror(ep, MDE_RENAME_BUSY,
+ meta_getminor(from_np->dev),
+ from_np->cname);
+ }
+ return (-1);
+ }
+ return (0);
+}
+
+/*
+ * meta_swap is the common code used by the
+ * meta_rename() and meta_exchange() entry points
+ */
+
+static int
+meta_swap(
+ mdsetname_t *sp,
+ mdname_t *from_np,
+ mdname_t *to_np,
+ md_renop_t op,
+ int flags,
+ md_error_t *ep)
+{
+ md_rename_t txn;
+
+ /*
+ * If the device exists a key may already exist so need to find it
+ * otherwise we'll end up adding the key in again which will lead
+ * to an inconsistent n_count for the namespace record.
+ */
+ if (from_np->dev != NODEV) {
+ (void) meta_getnmentbydev(sp->setno, MD_SIDEWILD, from_np->dev,
+ NULL, NULL, &from_np->key, ep);
+ }
+
+ if ((from_np->key == MD_KEYWILD) || (from_np->key == MD_KEYBAD)) {
+ if (add_key_name(sp, from_np, NULL, ep) != 0) {
+ assert(!mdisok(ep));
+ return (-1);
+ }
+ }
+
+ (void) memset(&txn, 0, sizeof (txn));
+
+ txn.op = op;
+ txn.revision = MD_RENAME_VERSION;
+ txn.flags = 0;
+ txn.from.mnum = meta_getminor(from_np->dev);
+ txn.from.key = from_np->key;
+
+ if ((txn.from.key == MD_KEYBAD) || (txn.from.key == MD_KEYWILD)) {
+ (void) mdmderror(ep, MDE_RENAME_SOURCE_BAD, txn.from.mnum,
+ from_np->cname);
+ return (-1);
+ }
+
+ if ((to_np->key == MD_KEYWILD) || (to_np->key == MD_KEYBAD)) {
+ if (add_key_name(sp, to_np, NULL, ep) != 0) {
+ assert(!mdisok(ep));
+ return (-1);
+ }
+ }
+
+ txn.to.mnum = meta_getminor(to_np->dev);
+ txn.to.key = to_np->key;
+
+ if ((txn.to.key == MD_KEYBAD) || (txn.to.key == MD_KEYWILD)) {
+ (void) mdmderror(ep, MDE_RENAME_TARGET_BAD, txn.to.mnum,
+ to_np->cname);
+ return (-1);
+ }
+
+ if (flags & NOISY) {
+ (void) fprintf(stderr, "\top: %s\n", OP_STR(txn.op));
+ (void) fprintf(stderr, "\trevision: %d, flags: %d\n",
+ txn.revision, txn.flags);
+ (void) fprintf(stderr,
+ "\tfrom(mnum,key): %ld, %d\tto: %ld, %d\n",
+ txn.from.mnum, txn.from.key,
+ txn.to.mnum, txn.to.key);
+ }
+
+ mdclrerror(ep);
+ if (metaioctl(MD_IOCRENAME, &txn, &txn.mde, from_np->cname) != 0) {
+ (void) del_key_name(sp, to_np, ep);
+ return (mdstealerror(ep, &txn.mde));
+ }
+
+ /* force the name cache to re-read device state */
+ meta_invalidate_name(from_np);
+ meta_invalidate_name(to_np);
+
+ return (0);
+}
+
+/*
+ * rename a metadevice
+ */
+int
+meta_rename(
+ mdsetname_t *sp,
+ mdname_t *from_np,
+ mdname_t *to_np,
+ mdcmdopts_t options,
+ md_error_t *ep
+)
+{
+ int flags = (options & MDCMD_FORCE)? FORCE: 0;
+ int rc = 0;
+ mdcinfo_t *cinfop;
+ char *p;
+ md_set_desc *sd;
+ mdkey_t side_key = MD_KEYWILD;
+ md_error_t dummy_ep = mdnullerror;
+ int i, j;
+ md_mnnode_desc *nd, *nd_del;
+
+ /* must have a set */
+ assert(sp != NULL);
+ assert(sp->setno == MD_MIN2SET(meta_getminor(from_np->dev)));
+
+ mdclrerror(ep);
+
+ if (((p = getenv("MD_DEBUG")) != NULL) &&
+ (strstr(p, "RENAME") != NULL)) {
+ flags |= NOISY;
+ }
+ /* if DOIT is not set, we are in dryrun mode */
+ if ((options & MDCMD_DOIT) == 0) {
+ flags |= DRYRUN;
+ }
+
+
+ if (metachkmeta(from_np, ep) != 0) {
+ assert(!mdisok(ep));
+ return (-1);
+ }
+
+ mdclrerror(ep);
+
+ if (meta_get_mdunit(sp, from_np, ep) == NULL) {
+ assert(!mdisok(ep));
+ return (-1);
+ }
+
+ if (meta_get_mdunit(sp, to_np, ep) != NULL) {
+ if (mdisok(ep)) {
+ (void) mdmderror(ep, MDE_UNIT_ALREADY_SETUP,
+ meta_getminor(to_np->dev),
+ to_np->cname);
+ }
+ return (-1);
+ }
+ mdclrerror(ep);
+
+ /* If FORCE is not set, check if metadevice is open */
+ if (!(flags & FORCE)) {
+ if (check_open(sp, from_np, ep) != 0) {
+ return (-1);
+ }
+ }
+
+ /*
+ * All checks are done, now we do the real work.
+ * If we are in dryrun mode, we're done.
+ */
+ if (flags & DRYRUN) {
+ return (0); /* success */
+ }
+
+ /*
+ * add key for new name to the namespace
+ */
+ if ((cinfop = metagetcinfo(from_np, ep)) == NULL) {
+ assert(!mdisok(ep));
+ return (-1);
+ }
+
+ if (metaislocalset(sp)) {
+ to_np->key = add_name(sp, MD_SIDEWILD, MD_KEYWILD,
+ cinfop->dname, meta_getminor(to_np->dev), to_np->bname, ep);
+ } else {
+ /*
+ * As this is not the local set we have to create a namespace
+ * record for each side (host) in the set. We cannot use
+ * add_key_names() because the destination device (to_np)
+ * should not exist and so the subsequent metagetcinfo()
+ * call will fail when it tries to open the device, so we
+ * have to use the information from the source device (from_np)
+ */
+ if ((sd = metaget_setdesc(sp, ep)) == (md_set_desc *)NULL) {
+ return (-1);
+ }
+ to_np->key = MD_KEYWILD;
+
+ if (MD_MNSET_DESC(sd)) {
+ nd = sd->sd_nodelist;
+ while (nd) {
+ side_key = add_name(sp, (side_t)nd->nd_nodeid,
+ to_np->key, cinfop->dname,
+ meta_getminor(to_np->dev),
+ to_np->bname, ep);
+ /*
+ * Break out if failed to add the key,
+ * but delete any name space records that
+ * were added.
+ */
+ if (side_key == MD_KEYBAD ||
+ side_key == MD_KEYWILD) {
+ /*
+ * If we have a valid to_np->key then
+ * a record was added correctly but
+ * we do not know for which side, so
+ * we need to try to delete all of them.
+ */
+
+ if (to_np->key != MD_KEYBAD &&
+ to_np->key != MD_KEYWILD) {
+ nd_del = sd->sd_nodelist;
+ while ((nd_del != nd) &&
+ (nd_del != NULL)) {
+ (void) del_name(sp,
+ (side_t)nd_del->nd_nodeid,
+ to_np->key, &dummy_ep);
+ nd_del = nd_del->nd_next;
+ }
+ /* preserve error key state */
+ to_np->key = side_key;
+ }
+ break;
+ }
+ to_np->key = side_key;
+ nd = nd->nd_next;
+ }
+ } else {
+ for (i = 0; i < MD_MAXSIDES; i++) {
+ if (sd->sd_nodes[i][0] != '\0') {
+ side_key = add_name(sp, (side_t)i,
+ to_np->key, cinfop->dname,
+ meta_getminor(to_np->dev),
+ to_np->bname, ep);
+ /*
+ * Break out if failed to add the key,
+ * but delete any name space records
+ * that were added.
+ */
+ if (side_key == MD_KEYBAD ||
+ side_key == MD_KEYWILD) {
+ /*
+ * If we have a valid
+ * to_np->key then a record was
+ * added correctly but we do
+ * not know for which side, so
+ * we need to try to delete
+ * all of them.
+ */
+ if (to_np->key != MD_KEYBAD &&
+ to_np->key != MD_KEYWILD) {
+ for (j = 0; j < i;
+ j++) {
+ (void) del_name(sp,
+ (side_t)j,
+ to_np->key,
+ &dummy_ep);
+ }
+ /*
+ * preserve err
+ * key state
+ */
+ to_np->key = side_key;
+ }
+ break;
+ }
+ to_np->key = side_key;
+ }
+ }
+ }
+ }
+
+ if (to_np->key == MD_KEYBAD || to_np->key == MD_KEYWILD) {
+ assert(!mdisok(ep));
+ return (-1);
+ }
+
+ rc = meta_swap(sp, from_np, to_np, MDRNOP_RENAME, flags, ep);
+
+ if (rc == 0) {
+ if (options & MDCMD_PRINT) {
+ (void) fprintf(stdout, dgettext(TEXT_DOMAIN,
+ "%s: has been renamed to %s\n"),
+ from_np->cname, to_np->cname);
+ }
+ }
+
+ return (rc);
+}
+
+/*
+ * return TRUE if current <from>, <to> ordering would
+ * prevent <from> from being in the role of <self>
+ */
+static bool_t
+meta_exchange_need_to_flip(
+ md_common_t *from_mdp,
+ md_common_t *to_mdp
+)
+{
+ assert(from_mdp);
+ assert(to_mdp);
+
+ /*
+ * ?
+ * \
+ * <to>
+ * \
+ * <from>
+ */
+
+ if (MD_HAS_PARENT(from_mdp->parent)) {
+ if (MD_HAS_PARENT(to_mdp->parent)) {
+ if (from_mdp->parent ==
+ meta_getminor(to_mdp->namep->dev)) {
+ return (TRUE);
+ }
+ }
+ }
+
+ /*
+ * <from>
+ * \
+ * <to>
+ * \
+ * ?
+ */
+
+ if (MD_HAS_PARENT(to_mdp->parent)) {
+ if (to_mdp->capabilities & MD_CAN_META_CHILD) {
+ return (TRUE);
+ }
+ }
+
+ /*
+ * <to>
+ * \
+ * <from>
+ */
+
+ if (MD_HAS_PARENT(from_mdp->parent)) {
+ if (from_mdp->parent == meta_getminor(to_mdp->namep->dev)) {
+ if (!(from_mdp->capabilities & MD_CAN_META_CHILD)) {
+ return (TRUE);
+ }
+ }
+ }
+
+ /*
+ * <from> or <to>
+ * \ \
+ * <to> <from>
+ * \
+ * ?
+ */
+
+ return (FALSE);
+}
+
+/*
+ * exchange the names of two metadevices
+ */
+int
+meta_exchange(
+ mdsetname_t *sp,
+ mdname_t *from_np,
+ mdname_t *to_np,
+ mdcmdopts_t options,
+ md_error_t *ep
+)
+{
+ int flags = (options & MDCMD_FORCE)? FORCE: 0;
+ md_common_t *from_mdp, *to_mdp;
+ int rc;
+ char *p, *p2;
+
+ /* must have a set */
+ assert(sp != NULL);
+ assert(sp->setno == MD_MIN2SET(meta_getminor(from_np->dev)));
+ assert(sp->setno == MD_MIN2SET(meta_getminor(to_np->dev)));
+
+ if (metachkmeta(from_np, ep) != 0) {
+ assert(!mdisok(ep));
+ return (-1);
+ }
+
+ if (metachkmeta(to_np, ep) != 0) {
+ assert(!mdisok(ep));
+ return (-1);
+ }
+
+ if ((options & MDCMD_DOIT) == 0) {
+ flags |= DRYRUN;
+ }
+
+ if ((p = getenv("MD_DEBUG")) != NULL) {
+ if ((p2 = strstr(p, "EXCHANGE=")) != NULL) {
+ flags |= NOISY;
+ if ((p2 = strchr(p2, '=')) != NULL) {
+ if (strcmp((p2+1), "NOFLIP") == 0) {
+ flags |= NOFLIP;
+ }
+ }
+ } else if (strstr(p, "EXCHANGE") != NULL) {
+ flags |= NOISY;
+ }
+ }
+
+ if ((from_mdp = meta_get_unit(sp, from_np, ep)) == NULL) {
+ assert(!mdisok(ep));
+ return (-1);
+ }
+
+ if ((to_mdp = meta_get_unit(sp, to_np, ep)) == NULL) {
+ assert(!mdisok(ep));
+ return (-1);
+ }
+ assert(mdisok(ep));
+
+ /* If FORCE is not set, check if metadevice is open */
+ if (!(flags & FORCE)) {
+ if (check_open(sp, from_np, ep) != 0) {
+ return (-1);
+ }
+ }
+
+ /*
+ * All checks are done, now we do the real work.
+ * If we are in dryrun mode, we're done.
+ */
+ if (flags & DRYRUN) {
+ return (0); /* success */
+ }
+
+ /*
+ * NOFLIP is used only for debugging; the driver
+ * will catch this and return MDE_RENAME_ORDER, if necessary
+ */
+ if (((flags & NOFLIP) == 0) &&
+ meta_exchange_need_to_flip(from_mdp, to_mdp)) {
+
+ rc = meta_swap(sp, to_np, from_np, MDRNOP_EXCHANGE, flags, ep);
+
+ } else {
+ rc = meta_swap(sp, from_np, to_np, MDRNOP_EXCHANGE, flags, ep);
+ }
+
+ if (rc == 0) {
+ if (options & MDCMD_PRINT) {
+ (void) fprintf(stdout, dgettext(TEXT_DOMAIN,
+ "%s and %s have exchanged identities\n"),
+ from_np->cname, to_np->cname);
+ }
+ }
+
+ return (rc);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_repartition.c b/usr/src/lib/lvm/libmeta/common/meta_repartition.c
new file mode 100644
index 0000000000..16bf7ea597
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_repartition.c
@@ -0,0 +1,415 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <stdio.h>
+#include <meta.h>
+#include "meta_repartition.h"
+
+
+
+/*
+ * FUNCTION: meta_replicaslice()
+ * INPUT: dnp - the name of the drive to check
+ * OUTPUT: slicep - pointer to slice number
+ * ep - pointer to an md_error_t structure in which
+ * to return errors to the caller
+ * RETURNS: int - 0 - value pointed to by slicep is valid
+ * -1 - otherwise
+ *
+ * PURPOSE: Determine which slice of the specified drive to
+ * reserve, presumably for metadb replica usage.
+ *
+ * NOTE: If slicep is NULL, the return code will indicate
+ * whether or not the slice number could be determined
+ */
+int
+meta_replicaslice(
+ mddrivename_t *dnp,
+ uint_t *slicep,
+ md_error_t *ep
+)
+{
+ int err = 0;
+ int ioctl_return;
+ int fd;
+ char *rname;
+ struct dk_geom geom;
+
+ rname = dnp->rname;
+ if ((fd = open(rname, (O_RDONLY|O_NDELAY), 0)) < 0) {
+ char *n;
+ int open_errno;
+ size_t len;
+
+ if (errno != ENOENT)
+ return (mdsyserror(ep, errno, rname));
+
+ len = strlen(rname) + 3;
+ n = Zalloc(len);
+ (void) snprintf(n, len, "%ss0", rname);
+ fd = open(n, (O_RDONLY|O_NDELAY), 0);
+ open_errno = errno;
+ Free(n);
+ if (fd < 0) {
+ return (mdsyserror(ep, open_errno, rname));
+ }
+ }
+
+ /*
+ * if our drivenamep points to a device not supporting
+ * DKIOCGGEOM, we have an EFI label.
+ */
+ errno = 0;
+ ioctl_return = ioctl(fd, DKIOCGGEOM, &geom);
+ err = errno;
+
+ (void) close(fd);
+
+ /*
+ * If the DKIOCGGEOM ioctl succeeded, then the device has a
+ * VTOC style label. In this case, we use slice 7.
+ */
+ if (ioctl_return == 0) {
+ if (slicep != NULL) {
+ *slicep = MD_SLICE7;
+ }
+ return (0);
+ }
+
+ /*
+ * ENOTSUP indicates an EFI style label, in which case slice 7
+ * cannot be used because its minor number is reserved. In
+ * this case, use slice 6.
+ */
+ if (err == ENOTSUP) {
+ if (slicep != NULL) {
+ *slicep = MD_SLICE6;
+ }
+ return (0);
+ }
+
+ /*
+ * Those are the only two cases we know how to deal with;
+ * either the drivenamep didn't point to a disk, or the ioctl
+ * failed for some other reason.
+ */
+ if (err == ENOTTY) {
+ return (mddeverror(ep, MDE_NOT_DISK, NODEV, rname));
+ }
+
+ return (mdsyserror(ep, err, rname));
+}
+
+
+
+/*
+ * FUNCTION: meta_repartition_drive()
+ * INPUT: sp - the set name for the device to check
+ * dnp - the name of the drive to partition
+ * options - options (see NOTES)
+ * OUTPUT: vtocp - pointer to an mdvtoc_t structure in which
+ * to return the new VTOC to the caller
+ * ep - pointer to an md_error_t structure in which
+ * to return errors to the caller
+ * RETURNS: int - 0 - drive was or can be repartitioned
+ * -1 - drive could not or should not be
+ * repartitioned
+ * PURPOSE: Repartition a disk for use in a disk set or in order
+ * to create soft partitions on it. Alternatively,
+ * return the VTOC that the disk would have if it were
+ * repartitioned without actually repartitioning it.
+ *
+ * NOTES:
+ *
+ * This routine will repartition a drive to make it suitable for
+ * inclusion in a diskset. Specifically, it will create a
+ * proposed VTOC that specifies a replica slice that begins at the
+ * first valid lba, is large enough to hold a label and a metadb
+ * replica, does not overlap any other slices, and is unmountable.
+ * If the current replica slice already satisfies those criteria,
+ * the routine will neither create a proposed VTOC nor repartition
+ * the drive unless the MD_REPART_FORCE flag is passed into the
+ * routine in the options argument. If the routine does create a
+ * proposed VTOC, it will return the proposed VTOC in *vtocp if
+ * vtocp isn't NULL.
+ *
+ * The slice to be used as the replica slice is determined by the
+ * function meta_replicaslice().
+ *
+ * If the replica slice does not satisfy the above criteria or the
+ * MD_REPART_FORCE flag is set, the proposed VTOC will specify a
+ * replica slice that satisfies the above criteria, a slice zero
+ * that contains the remaining space on the disk, and no other
+ * slices. If that repartitioning would cause the replica slice
+ * to move or shrink, and the MD_REPART_LEAVE_REP option is set,
+ * the routine will return -1 without creating or returning a
+ * proposed vtoc, and without repartitioning the disk. Otherwise
+ * the routine will repartition the disk unless the
+ * MD_REPART_DONT_LABEL flag is set in the options argument.
+ *
+ * If the MD_REPART_DONT_LABEL flag is set in the options argument,
+ * but the routine would otherwise repartition the drive, the
+ * routine won't repartition the drive, but will create a proposed
+ * VTOC that satisfies the criteria defined above and return it
+ * it in *vtocp if vtocp isn't NULL, The MD_REPART_DONT_LABEL
+ * option allows calling routines to determine what the contents of
+ * the drive's VTOC would be if the drive were repartitioned without
+ * actually repartitioning the drive.
+ */
+int
+meta_repartition_drive(
+ mdsetname_t *sp,
+ mddrivename_t *dnp,
+ int options,
+ mdvtoc_t *vtocp,
+ md_error_t *ep
+)
+{
+ uint_t replicaslice;
+ diskaddr_t first_lba, last_lba;
+ int round_sizes = 1;
+ unsigned long long cylsize;
+ unsigned long long drvsize;
+ int i;
+ mdgeom_t *mdgp;
+ mdvtoc_t *mdvp;
+ mdvtoc_t proposed_vtoc;
+ uint_t reservedcyl;
+ ushort_t resflag;
+ mdname_t *resnp;
+ unsigned long long ressize;
+ md_set_desc *sd;
+ daddr_t dbsize;
+ diskaddr_t replica_start;
+ diskaddr_t replica_size;
+ diskaddr_t replica_end;
+ diskaddr_t data_start;
+ diskaddr_t data_size;
+
+ if (meta_replicaslice(dnp, &replicaslice, ep) != 0) {
+ return (-1);
+ }
+
+ /* Don't round for EFI disks */
+ if (replicaslice == MD_SLICE6)
+ round_sizes = 0;
+
+ /*
+ * We took as argument a drive name pointer, but we need a
+ * slice name pointer to retrieve vtoc information. So get
+ * the name pointer for slice zero first, then use it to get
+ * the vtoc info for the disk.
+ */
+ if ((resnp = metaslicename(dnp, MD_SLICE0, ep)) == NULL)
+ return (-1);
+
+ if ((mdvp = metagetvtoc(resnp, FALSE, NULL, ep)) == NULL)
+ return (-1);
+
+ /*
+ * Determine the metadb size.
+ */
+ dbsize = MD_DBSIZE;
+ if (!metaislocalset(sp)) {
+ if ((sd = metaget_setdesc(sp, ep)) == NULL)
+ return (-1);
+
+ if (MD_MNSET_DESC(sd))
+ dbsize = MD_MN_DBSIZE;
+ }
+
+ /* If we've got an efi disk, we better have lba info */
+ first_lba = mdvp->first_lba;
+ last_lba = mdvp->last_lba;
+ ASSERT((round_sizes != 0) || (last_lba > 0));
+
+ /*
+ * At this point, ressize is used as a minimum value. Later
+ * it will be rounded up to a cylinder boundary if
+ * appropriate. ressize is in units of disk sectors.
+ */
+ ressize = dbsize + VTOC_SIZE;
+ resflag = V_UNMNT;
+
+ /*
+ * If we're forcing the repartition, we can skip the replica
+ * slice and overlap tests.
+ */
+ if (options & MD_REPART_FORCE) {
+ goto do_repartition;
+ }
+
+ /*
+ * Replica slice tests: it must begin at first_lba, be long
+ * enough, have the right flags, and not overlap any other
+ * slices. If any of these conditions is violated, we need to
+ * repartition the disk.
+ */
+ if (mdvp->parts[replicaslice].start != first_lba) {
+ goto do_repartition;
+ }
+
+ if (mdvp->parts[replicaslice].size < ressize) {
+ goto do_repartition;
+ }
+
+ if (mdvp->parts[replicaslice].flag != resflag) {
+ goto do_repartition;
+ }
+
+ /*
+ * Check for overlap: this test should use the actual size of
+ * the replica slice, as contained in the vtoc, and NOT the
+ * minimum size calculated above.
+ */
+ replica_end = first_lba + mdvp->parts[replicaslice].size;
+ for (i = 0; i < mdvp->nparts; i++) {
+ if (i != replicaslice) {
+ if ((mdvp->parts[i].size > 0) &&
+ (mdvp->parts[i].start < replica_end)) {
+ goto do_repartition;
+ }
+ }
+ }
+
+ /*
+ * If we passed the above tests, then the disk is already
+ * partitioned appropriately, and we're not being told to
+ * force a change.
+ */
+ return (0);
+
+do_repartition:
+
+ /* Retrieve disk geometry info and round to cylinder sizes */
+ if (round_sizes != 0) {
+
+ if ((mdgp = metagetgeom(resnp, ep)) == NULL)
+ return (-1);
+
+ /*
+ * Both cylsize and drvsize are in units of disk
+ * sectors.
+ *
+ * The intended results are of type unsigned long
+ * long. Since each operand of the first
+ * multiplication is of type unsigned int, we risk
+ * overflow by multiplying and then converting the
+ * result. Therefore we explicitly cast (at least)
+ * one of the operands, forcing conversion BEFORE
+ * multiplication, and avoiding overflow. The second
+ * assignment is OK, since one of the operands is
+ * already of the desired type.
+ */
+ cylsize =
+ ((unsigned long long)mdgp->nhead) * mdgp->nsect;
+ drvsize = cylsize * mdgp->ncyl;
+
+ /*
+ * How many cylinders must we reserve for the replica
+ * slice to ensure that it meets the previously
+ * calculated minimum size?
+ */
+ reservedcyl = (ressize + cylsize - 1) / cylsize;
+ ressize = reservedcyl * cylsize;
+ } else {
+ drvsize = last_lba - first_lba;
+ }
+
+ /* Would this require a forbidden change? */
+ if (options & MD_REPART_LEAVE_REP) {
+ if ((mdvp->parts[replicaslice].start != first_lba) ||
+ (mdvp->parts[replicaslice].size < ressize)) {
+ return (mddeverror(ep, MDE_REPART_REPLICA,
+ resnp->dev, NULL));
+ }
+ }
+
+ /*
+ * It seems unlikely that someone would pass us too small a
+ * disk, but it's still worth checking for...
+ */
+ if (((round_sizes != 0) && (reservedcyl >= (int)mdgp->ncyl)) ||
+ ((round_sizes == 0) && (ressize + first_lba >= last_lba))) {
+ return (mdmddberror(ep, MDE_DB_TOOSMALL,
+ meta_getminor(resnp->dev), sp->setno, 0, NULL));
+ }
+
+ replica_start = first_lba;
+ replica_size = ressize;
+ data_start = first_lba + ressize;
+ data_size = drvsize - ressize;
+
+ /*
+ * Create the proposed VTOC. First copy the current VTOC
+ * into the proposed VTOC to duplicate the values that don't
+ * need to change. Then change the partition table and set
+ * the flag value for the replica slice to resflag to reserve it
+ * for metadata.
+ */
+ proposed_vtoc = *mdvp;
+ /* We need at least replicaslice partitions in the proposed vtoc */
+ if (replicaslice >= proposed_vtoc.nparts) {
+ proposed_vtoc.nparts = replicaslice + 1;
+ }
+ for (i = 0; i < proposed_vtoc.nparts; i++) {
+ /* don't change the reserved partition of an EFI device */
+ if (proposed_vtoc.parts[i].tag == V_RESERVED)
+ data_size = proposed_vtoc.parts[i].start - data_start;
+ else
+ (void) memset(&proposed_vtoc.parts[i], '\0',
+ sizeof (proposed_vtoc.parts[i]));
+ }
+
+ proposed_vtoc.parts[MD_SLICE0].start = data_start;
+ proposed_vtoc.parts[MD_SLICE0].size = data_size;
+ proposed_vtoc.parts[MD_SLICE0].tag = V_USR;
+ proposed_vtoc.parts[replicaslice].start = replica_start;
+ proposed_vtoc.parts[replicaslice].size = replica_size;
+ proposed_vtoc.parts[replicaslice].flag = resflag;
+ proposed_vtoc.parts[replicaslice].tag = V_USR;
+
+ if (!(options & MD_REPART_DONT_LABEL)) {
+ /*
+ * Label the disk with the proposed VTOC.
+ */
+ *mdvp = proposed_vtoc;
+ if (metasetvtoc(resnp, ep) != 0) {
+ return (-1);
+ }
+ }
+
+ if (vtocp != NULL) {
+ /*
+ * Return the proposed VTOC.
+ */
+ *vtocp = proposed_vtoc;
+ }
+
+ return (0);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_replace.c b/usr/src/lib/lvm/libmeta/common/meta_replace.c
new file mode 100644
index 0000000000..3165bd0d53
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_replace.c
@@ -0,0 +1,144 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2003 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * replace components in metadevices
+ */
+
+#include <meta.h>
+#include <sys/lvm/md_stripe.h>
+
+int
+meta_replace(mdsetname_t *sp, mdname_t *metanp, mdname_t *oldnp,
+ mdname_t *newnp, char *uname, mdcmdopts_t options, md_error_t *ep)
+{
+ char *miscname;
+
+ assert(sp != NULL);
+
+ if (is_hspname(uname)) {
+ mdhspname_t *hspnp;
+
+ if ((hspnp = metahspname(&sp, uname, ep)) == NULL)
+ return (-1);
+ assert(sp != NULL);
+ (void) meta_hs_replace(sp, hspnp, oldnp, newnp, options, ep);
+ return (0);
+ }
+ assert(sp->setno == MD_MIN2SET(meta_getminor(metanp->dev)));
+ if (metachkmeta(metanp, ep) != 0)
+ return (-1);
+
+
+ if ((miscname = metagetmiscname(metanp, ep)) == NULL)
+ return (-1);
+
+ if (strcmp(miscname, MD_RAID) == 0) {
+ return (meta_raid_replace(sp, metanp, oldnp, newnp,
+ options, ep));
+ } else if (strcmp(miscname, MD_TRANS) == 0) {
+ return (meta_trans_replace(sp, metanp, oldnp, newnp,
+ options, ep));
+ } else if (strcmp(miscname, MD_STRIPE) == 0) {
+ return (meta_stripe_replace(sp, metanp, oldnp, newnp,
+ options, ep));
+ }
+
+ return (mdmderror(ep, MDE_UNKNOWN_TYPE, meta_getminor(metanp->dev),
+ metanp->cname));
+}
+/*
+ * replace named device
+ */
+int
+meta_replace_byname(
+ mdsetname_t *sp,
+ mdname_t *np,
+ mdname_t *oldnp,
+ mdname_t *newnp,
+ mdcmdopts_t options,
+ md_error_t *ep
+)
+{
+ char *miscname;
+
+ /* should have a set */
+ assert(sp != NULL);
+ assert(sp->setno == MD_MIN2SET(meta_getminor(np->dev)));
+
+ /* get type */
+ if (metachkmeta(np, ep) != 0)
+ return (-1);
+ if ((miscname = metagetmiscname(np, ep)) == NULL)
+ return (-1);
+
+ /* dispatch */
+ if (strcmp(miscname, MD_RAID) == 0) {
+ return (meta_raid_replace(sp, np, oldnp, newnp, options, ep));
+ } else if (strcmp(miscname, MD_MIRROR) == 0) {
+ return (meta_mirror_replace(sp, np, oldnp, newnp, options, ep));
+ } else {
+ return (mdmderror(ep, MDE_UNKNOWN_TYPE, meta_getminor(np->dev),
+ np->cname));
+ }
+}
+
+/*
+ * enable named device
+ */
+int
+meta_enable_byname(
+ mdsetname_t *sp,
+ mdname_t *np,
+ mdname_t *compnp,
+ mdcmdopts_t options,
+ md_error_t *ep
+)
+{
+ char *miscname;
+
+ /* should have a set */
+ assert(sp != NULL);
+ assert(sp->setno == MD_MIN2SET(meta_getminor(np->dev)));
+
+ /* get type */
+ if (metachkmeta(np, ep) != 0)
+ return (-1);
+ if ((miscname = metagetmiscname(np, ep)) == NULL)
+ return (-1);
+
+ /* dispatch */
+ if (strcmp(miscname, MD_RAID) == 0) {
+ return (meta_raid_enable(sp, np, compnp, options, ep));
+ } else if (strcmp(miscname, MD_MIRROR) == 0) {
+ return (meta_mirror_enable(sp, np, compnp, options, ep));
+ } else {
+ return (mdmderror(ep, MDE_UNKNOWN_TYPE, meta_getminor(np->dev),
+ np->cname));
+ }
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_reset.c b/usr/src/lib/lvm/libmeta/common/meta_reset.c
new file mode 100644
index 0000000000..ae04edce5d
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_reset.c
@@ -0,0 +1,146 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * clear metadevices
+ */
+
+#include <meta.h>
+
+/*
+ * clear a metadevice.
+ */
+int
+meta_reset(
+ mdsetname_t *sp,
+ mdname_t *np,
+ mdcmdopts_t options,
+ md_error_t *ep
+)
+{
+ char *miscname;
+ md_i_reset_t mir;
+
+ /* should have a set */
+ assert(sp != NULL);
+ assert(sp->setno == MD_MIN2SET(meta_getminor(np->dev)));
+ /* clear device */
+ if ((miscname = metagetmiscname(np, ep)) == NULL)
+ return (-1);
+ if (meta_isopen(sp, np, ep, options) != 0) {
+ return (mdmderror(ep, MDE_IS_OPEN, meta_getminor(np->dev),
+ np->cname));
+ }
+ (void) memset(&mir, '\0', sizeof (mir));
+ MD_SETDRIVERNAME(&mir, miscname, sp->setno);
+ mir.mnum = meta_getminor(np->dev);
+ mir.force = (options & MDCMD_FORCE) ? 1 : 0;
+ if (metaioctl(MD_IOCRESET, &mir, &mir.mde, np->cname) != 0)
+ return (mdstealerror(ep, &mir.mde));
+
+ /* return success */
+ return (0);
+}
+
+/*
+ * reset all the metadevice and hotspares
+ */
+int
+meta_reset_all(
+ mdsetname_t *sp,
+ mdcmdopts_t options,
+ md_error_t *ep
+)
+{
+ options |= MDCMD_RECURSE;
+
+ /*
+ * since soft partitions can appear at the top and bottom
+ * of the stack, we call meta_sp_reset twice to handle all
+ * cases.
+ */
+ if (meta_trans_reset(sp, NULL, options, ep) != 0)
+ return (-1);
+ if (meta_sp_reset(sp, NULL, options, ep) != 0)
+ return (-1);
+ if (meta_raid_reset(sp, NULL, options, ep) != 0)
+ return (-1);
+ if (meta_mirror_reset(sp, NULL, options, ep) != 0)
+ return (-1);
+ if (meta_stripe_reset(sp, NULL, options, ep) != 0)
+ return (-1);
+ if (meta_hsp_reset(sp, NULL, options, ep) != 0)
+ return (-1);
+ if (meta_sp_reset(sp, NULL, options, ep) != 0)
+ return (-1);
+
+ return (0);
+}
+
+/*
+ * reset named device
+ */
+int
+meta_reset_by_name(
+ mdsetname_t *sp,
+ mdname_t *np,
+ mdcmdopts_t options,
+ md_error_t *ep
+)
+{
+ char *miscname;
+ int rval = 0;
+
+ /* should have a set */
+ assert(sp != NULL);
+ assert(sp->setno == MD_MIN2SET(meta_getminor(np->dev)));
+
+ /* get type */
+ if (metachkmeta(np, ep) != 0)
+ return (-1);
+ if ((miscname = metagetmiscname(np, ep)) == NULL)
+ return (-1);
+ /* dispatch */
+ if (strcmp(miscname, MD_STRIPE) == 0) {
+ rval = meta_stripe_reset(sp, np, options, ep);
+ } else if (strcmp(miscname, MD_MIRROR) == 0) {
+ rval = meta_mirror_reset(sp, np, options, ep);
+ } else if (strcmp(miscname, MD_TRANS) == 0) {
+ rval = meta_trans_reset(sp, np, options, ep);
+ } else if (strcmp(miscname, MD_RAID) == 0) {
+ rval = meta_raid_reset(sp, np, options, ep);
+ } else if (strcmp(miscname, MD_SP) == 0) {
+ rval = meta_sp_reset(sp, np, options, ep);
+ } else {
+ rval = mdmderror(ep, MDE_UNKNOWN_TYPE, meta_getminor(np->dev),
+ np->cname);
+ }
+
+ /* cleanup */
+ return (rval);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_resync.c b/usr/src/lib/lvm/libmeta/common/meta_resync.c
new file mode 100644
index 0000000000..b57dfb1197
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_resync.c
@@ -0,0 +1,108 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * mirror operations
+ */
+
+#include <meta.h>
+#include <sdssc.h>
+
+/*
+ * resync named device
+ */
+int
+meta_resync_byname(
+ mdsetname_t *sp,
+ mdname_t *np,
+ daddr_t size,
+ md_error_t *ep,
+ md_resync_cmd_t cmd /* action to perform */
+)
+{
+ char *miscname;
+
+ /* should have a set */
+ assert(sp != NULL);
+ assert(sp->setno == MD_MIN2SET(meta_getminor(np->dev)));
+
+ /* get type */
+ if (metachkmeta(np, ep) != 0)
+ return (-1);
+ if ((miscname = metagetmiscname(np, ep)) == NULL)
+ return (-1);
+
+ /* dispatch */
+ if (strcmp(miscname, MD_RAID) == 0) {
+ return (meta_raid_resync(sp, np, size, ep));
+ } else if (strcmp(miscname, MD_MIRROR) == 0) {
+ return (meta_mirror_resync(sp, np, size, ep, cmd));
+ } else {
+ return (mdmderror(ep, MDE_UNKNOWN_TYPE, meta_getminor(np->dev),
+ np->cname));
+ }
+}
+
+/*
+ * resync all devices
+ */
+int
+meta_resync_all(
+ mdsetname_t *sp,
+ daddr_t size,
+ md_error_t *ep
+)
+{
+ int rval = 0;
+ md_set_desc *sd;
+
+ /* see if we have any databases */
+ if (meta_setup_db_locations(ep) != 0) {
+ if (mdismddberror(ep, MDE_DB_NODB)) {
+ mdclrerror(ep);
+ return (0);
+ }
+ rval = -1;
+ }
+
+ if (!(metaislocalset(sp))) {
+ if ((sd = metaget_setdesc(sp, ep)) == NULL)
+ return (-1);
+
+ /* MN disksets don't use DCS clustering services. */
+ if (!(MD_MNSET_DESC(sd)))
+ sdssc_notify_service(NULL, Shutdown_Services);
+ }
+
+ /* resync units */
+ if (meta_mirror_resync_all(sp, size, ep) != 0)
+ rval = -1;
+ if (meta_raid_resync_all(sp, size, ep) != 0)
+ rval = -1;
+ return (rval);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_runtime.c b/usr/src/lib/lvm/libmeta/common/meta_runtime.c
new file mode 100644
index 0000000000..f9c5915088
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_runtime.c
@@ -0,0 +1,301 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * Just in case we're not in a build environment, make sure that
+ * TEXT_DOMAIN gets set to something.
+ */
+#if !defined(TEXT_DOMAIN)
+#define TEXT_DOMAIN "SYS_TEST"
+#endif
+
+/*
+ * Return the values of runtime parameters stored in
+ * /etc/lvm/runtime.cf, converting them to data
+ * types appropriate for use by functions whose behavior
+ * is affected by those values.
+ */
+
+/*
+ * system include files
+ */
+
+#include <libintl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <syslog.h>
+
+/*
+ * SUNWmd include files
+ */
+
+#include <meta.h> /* for MDD_DOMAIN */
+#include <meta_runtime.h> /* external interface definition */
+#include <sdssc.h>
+
+/*
+ * The following lines define the runtime parameter configuration file.
+ */
+
+static const char *param_file_namep = "/etc/lvm/runtime.cf";
+
+/*
+ * The runtime parameter configuration file is an ascii text file.
+ * Each text line in the file has a maximum length of 80 four-byte
+ * wide characters. The line buffer size defined below accomodates
+ * the maximum line length plus the newline character at the end of
+ * the line and the null character that fgets() adds at the end of
+ * the line when it writes the line to the buffer.
+ */
+
+static const int line_buffer_size = 325;
+
+/*
+ * The format for parameter entries in the file is "name=value".
+ * Each "name=value" string must begin a line of the file.
+ * The "name" and "value" tokens may be preceded or followed by
+ * spaces. Lines beginning with "#" are comment lines.
+ */
+
+static const char *token_separator_listp = " =";
+
+/*
+ * If a runtime parameter that can be set in the file is not set,
+ * or is set to an invalid value, or if the file can't be opened,
+ * the parameter takes on the default value given in the comments
+ * below.
+ */
+
+/*
+ * The following string constant declarations name the runtime
+ * configuration parameters that can be set in the runtime parameter
+ * configuration file. The allowed values of parameters that
+ * range over small sets of discrete values are also declared below
+ * as string constants.
+ *
+ * CAUTION: When adding new runtime parameters to the runtime
+ * parameter configuration file, declare their names
+ * as string constants below, and check for conflicts
+ * with the names of existing parameters.
+ */
+
+static const char *ownerioctls_namep = "ownerioctls";
+
+/*
+ * allowed values:
+ */
+
+static const char *ownerioctls_onp = "on"; /* default value */
+static const char *ownerioctls_offp = "off";
+
+/*
+ * The "ownerioctls" parameter controls whether the metaset -t and
+ * metaset -r commands issue the MHIOCTKOWN, MHIOCRELEASE, and
+ * MHIOCENFAILFAST ioctls when taking or releasing ownership of disksets.
+ * The allowed parameter values are "on" and "off".
+ *
+ * If the line "ownerioctls=off" appears in the runtime configuration file,
+ * the metaset -t command doesn't issue the MHIOCTKOWN ioctl when taking
+ * ownership of disksets, and the metaset -r command doesn't issue the
+ * MHIOCRELEASE and MHIOCENFAILFAST ioctls when releasing ownership of
+ * disksets.
+ *
+ * If the line "ownerioctls=on" appears in the file, the metaset -t
+ * command issues the MHIOCTKOWN ioctl when taking ownership of disksets,
+ * and the metaset -r command issues the MHIOCRELEASE AND MHIOCENFAILFAST
+ * icotls when releasing ownership of disksets.
+ *
+ * The default value of "ownerioctls" is "on".
+ */
+
+/*
+ * The following lines make forward declarations of private functions.
+ */
+
+static
+char *
+meta_get_rt_param(const char *param_namep, boolean_t warn_if_not_found);
+
+/*
+ * The following lines define public functions.
+ */
+
+boolean_t
+do_owner_ioctls(void)
+{
+ const char *function_namep = "do_owner_ioctls()";
+ char *param_valuep;
+ boolean_t return_value = B_TRUE; /* default behavior */
+ sdssc_version_t version;
+
+ if ((sdssc_version(&version) == SDSSC_OKAY) && (version.major >= 3)) {
+ /*
+ * If we're bound to a cluster machine never do ioctls.
+ * The SC3.0 cluster code will always deal with disk
+ * reservation.
+ */
+
+ return_value = B_FALSE;
+ } else {
+ param_valuep = meta_get_rt_param(ownerioctls_namep, B_TRUE);
+ if (param_valuep != NULL) {
+ if (strcmp(param_valuep, ownerioctls_offp) == 0) {
+ return_value = B_FALSE;
+ } else if (strcmp(param_valuep,
+ ownerioctls_onp) != 0) {
+ (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
+ "%s: illegal value for %s: %s.\n"),
+ function_namep,
+ ownerioctls_namep,
+ param_valuep);
+ syslog(LOG_ERR, dgettext(TEXT_DOMAIN,
+ "%s: illegal value for %s: %s.\n"),
+ function_namep,
+ ownerioctls_namep,
+ param_valuep);
+ }
+ free(param_valuep);
+ }
+ }
+ return (return_value);
+}
+
+/*
+ * Retrieve the verbosity level for rpc.mdcommd from the config file.
+ * If none is specified, don't print a warning and return 0
+ */
+uint_t
+commd_get_verbosity(void)
+{
+ char *param_valuep;
+ uint_t retval = 0;
+ param_valuep = meta_get_rt_param("commd_verbosity", B_FALSE);
+ if (param_valuep != NULL) {
+ retval = (uint_t)strtol(param_valuep, NULL, 16);
+ free(param_valuep);
+ }
+ return (retval);
+}
+
+/*
+ * Retrieve the debug output file for rpc.mdcommd from the config file.
+ * If none is specified, don't print a warning.
+ * Note that if returning non-NULL, the caller is responsible for freeing
+ * the result pointer.
+ */
+char *
+commd_get_outfile(void)
+{
+ return (meta_get_rt_param("commd_out_file", B_FALSE));
+}
+
+/*
+ * The following lines define private functions
+ */
+
+static char *
+meta_get_rt_param(const char *param_namep, boolean_t warn_if_not_found)
+{
+ const char *function_namep = "meta_get_rt_param()";
+ char *line_bufferp = NULL;
+ char *newlinep = NULL;
+ FILE *param_filep = NULL;
+ char *param_name_tokenp = NULL;
+ char *param_valuep = NULL;
+ char *param_value_tokenp = NULL;
+
+ line_bufferp = (char *)malloc(line_buffer_size);
+ if (line_bufferp == NULL) {
+ (void) fprintf(stderr,
+ dgettext(TEXT_DOMAIN, "%s: malloc failed\n"),
+ function_namep);
+ syslog(LOG_ERR,
+ dgettext(TEXT_DOMAIN, "%s: malloc failed\n"),
+ function_namep);
+ return (param_valuep);
+ }
+ param_filep = fopen(param_file_namep, "r");
+ if (param_filep == NULL) {
+ (void) fprintf(stderr,
+ dgettext(TEXT_DOMAIN, "%s: can't open %s\n"),
+ function_namep, param_file_namep);
+ syslog(LOG_ERR,
+ dgettext(TEXT_DOMAIN, "%s: can't open %s\n"),
+ function_namep, param_file_namep);
+ free(line_bufferp);
+ return (param_valuep);
+ }
+ while ((fgets(line_bufferp, line_buffer_size, param_filep) != NULL) &&
+ (param_valuep == NULL)) {
+
+ newlinep = strchr(line_bufferp, '\n');
+ if (newlinep != NULL) {
+ *newlinep = '\0';
+ newlinep = NULL;
+ }
+ param_name_tokenp = strtok(line_bufferp, token_separator_listp);
+ if ((param_name_tokenp != NULL) &&
+ (strcmp(param_namep, param_name_tokenp) == 0)) {
+
+ param_value_tokenp = strtok(NULL,
+ token_separator_listp);
+ }
+ if (param_value_tokenp != NULL) {
+ param_valuep = strdup(param_value_tokenp);
+ if (param_valuep == NULL) {
+ (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
+ "%s: strdup failed\n"),
+ function_namep);
+ syslog(LOG_ERR, dgettext(TEXT_DOMAIN,
+ "%s: strdup failed\n"),
+ function_namep);
+ free(line_bufferp);
+ (void) fclose(param_filep);
+ return (param_valuep);
+ }
+ }
+ }
+ if ((param_valuep == NULL) && (warn_if_not_found == B_TRUE)) {
+ (void) fprintf(stderr,
+ dgettext(TEXT_DOMAIN,
+ "%s: value of %s not set or error in %s\n"),
+ function_namep,
+ param_namep,
+ param_file_namep);
+ syslog(LOG_ERR,
+ dgettext(TEXT_DOMAIN,
+ "%s: value of %s not set or error in %s\n"),
+ function_namep,
+ param_namep,
+ param_file_namep);
+ }
+ free(line_bufferp);
+ (void) fclose(param_filep);
+ return (param_valuep);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_se_notify.c b/usr/src/lib/lvm/libmeta/common/meta_se_notify.c
new file mode 100644
index 0000000000..7ee231aa42
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_se_notify.c
@@ -0,0 +1,399 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2002 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <stdlib.h>
+#include <meta.h>
+#include <libsysevent.h>
+#include <libnvpair.h>
+#include <sys/sysevent/svm.h>
+#include <sys/sysevent/eventdefs.h>
+#include <dlfcn.h>
+
+char *
+obj2devname(uint32_t tag, set_t setno, md_dev64_t dev)
+{
+ char *setname;
+ char name[MD_MAX_CTDLEN];
+ mdsetname_t *sp;
+ md_error_t status = mdnullerror;
+ md_set_record *md_sr;
+ minor_t mnum = meta_getminor(dev);
+ int rtn = 0;
+
+ setname = NULL;
+ if ((setno != MD_SET_BAD) &&
+ ((sp = metasetnosetname(setno, &status)) != NULL)) {
+ setname = sp->setname;
+ }
+
+ name[0] = '\0';
+ switch (tag) {
+ case SVM_TAG_HS:
+ case SVM_TAG_METADEVICE:
+ case SVM_TAG_MIRROR:
+ case SVM_TAG_RAID5:
+ case SVM_TAG_STRIPE:
+ case SVM_TAG_TRANS:
+ if (setno == 0) {
+ rtn = snprintf(name, sizeof (name), "d%u",
+ (unsigned)MD_MIN2UNIT(mnum));
+ } else if (setname != NULL) {
+ rtn = snprintf(name, sizeof (name), "%s/d%u", setname,
+ (unsigned)MD_MIN2UNIT(mnum));
+ }
+ break;
+ case SVM_TAG_HSP:
+ if (setno == 0) {
+ rtn = snprintf(name, sizeof (name), "hsp%u",
+ (unsigned)MD_MIN2UNIT(mnum));
+ } else if (setname != NULL) {
+ rtn = snprintf(name, sizeof (name), "%s/hsp%u",
+ setname, (unsigned)MD_MIN2UNIT(mnum));
+ }
+ break;
+ case SVM_TAG_DRIVE:
+ (void) sprintf(name, "drive");
+ break;
+ case SVM_TAG_HOST:
+ md_sr = NULL;
+ if (setname != NULL) {
+ md_sr = getsetbyname(setname, &status);
+ }
+ if ((md_sr != NULL) && (md_sr->sr_nodes[mnum] != NULL)) {
+ /*
+ * Get the host data from the node array.
+ */
+ rtn = snprintf(name, sizeof (name), "%s",
+ md_sr->sr_nodes[mnum]);
+ }
+ if ((name[0] == '\0') || (rtn >= sizeof (name))) {
+ (void) sprintf(name, "host");
+ rtn = 0;
+ }
+ break;
+ case SVM_TAG_SET:
+ if (setname == NULL) {
+ (void) sprintf(name, "diskset");
+ } else {
+ rtn = snprintf(name, sizeof (name), "%s", setname);
+ }
+ break;
+ default:
+ if ((setname = get_devname(setno, dev)) != NULL) {
+ rtn = snprintf(name, sizeof (name), "%s", setname);
+ }
+ break;
+ }
+ mdclrerror(&status);
+
+ /* Check if we got any rubbish for any of the snprintf's */
+ if ((name[0] == '\0') || (rtn >= sizeof (name))) {
+ return (NULL);
+ }
+
+ return (strdup(name));
+}
+
+/* Sysevent subclass and mdnotify event type pairs */
+struct node {
+ char *se_ev;
+ evid_t md_ev;
+};
+
+/* Table must be sorted in ascending order */
+static struct node ev_table[] = {
+ { ESC_SVM_ADD, EV_ADD },
+ { ESC_SVM_ATTACH, EV_ATTACH },
+ { ESC_SVM_ATTACHING, EV_ATTACHING },
+ { ESC_SVM_CHANGE, EV_CHANGE },
+ { ESC_SVM_CREATE, EV_CREATE },
+ { ESC_SVM_DELETE, EV_DELETE },
+ { ESC_SVM_DETACH, EV_DETACH },
+ { ESC_SVM_DETACHING, EV_DETACHING },
+ { ESC_SVM_DRIVE_ADD, EV_DRIVE_ADD },
+ { ESC_SVM_DRIVE_DELETE, EV_DRIVE_DELETE },
+ { ESC_SVM_ENABLE, EV_ENABLE },
+ { ESC_SVM_ERRED, EV_ERRED },
+ { ESC_SVM_EXCHANGE, EV_EXCHANGE },
+ { ESC_SVM_GROW, EV_GROW },
+ { ESC_SVM_HS_CHANGED, EV_HS_CHANGED },
+ { ESC_SVM_HS_FREED, EV_HS_FREED },
+ { ESC_SVM_HOST_ADD, EV_HOST_ADD },
+ { ESC_SVM_HOST_DELETE, EV_HOST_DELETE },
+ { ESC_SVM_HOTSPARED, EV_HOTSPARED },
+ { ESC_SVM_INIT_FAILED, EV_INIT_FAILED },
+ { ESC_SVM_INIT_FATAL, EV_INIT_FATAL },
+ { ESC_SVM_INIT_START, EV_INIT_START },
+ { ESC_SVM_INIT_SUCCESS, EV_INIT_SUCCESS },
+ { ESC_SVM_IOERR, EV_IOERR },
+ { ESC_SVM_LASTERRED, EV_LASTERRED },
+ { ESC_SVM_MEDIATOR_ADD, EV_MEDIATOR_ADD },
+ { ESC_SVM_MEDIATOR_DELETE, EV_MEDIATOR_DELETE },
+ { ESC_SVM_OFFLINE, EV_OFFLINE },
+ { ESC_SVM_OK, EV_OK },
+ { ESC_SVM_ONLINE, EV_ONLINE },
+ { ESC_SVM_OPEN_FAIL, EV_OPEN_FAIL },
+ { ESC_SVM_REGEN_DONE, EV_REGEN_DONE },
+ { ESC_SVM_REGEN_FAILED, EV_REGEN_FAILED },
+ { ESC_SVM_REGEN_START, EV_REGEN_START },
+ { ESC_SVM_RELEASE, EV_RELEASE },
+ { ESC_SVM_REMOVE, EV_REMOVE },
+ { ESC_SVM_RENAME_DST, EV_RENAME_DST },
+ { ESC_SVM_RENAME_SRC, EV_RENAME_SRC },
+ { ESC_SVM_REPLACE, EV_REPLACE },
+ { ESC_SVM_RESYNC_DONE, EV_RESYNC_DONE },
+ { ESC_SVM_RESYNC_FAILED, EV_RESYNC_FAILED },
+ { ESC_SVM_RESYNC_START, EV_RESYNC_START },
+ { ESC_SVM_RESYNC_SUCCESS, EV_RESYNC_SUCCESS },
+ { ESC_SVM_TAKEOVER, EV_TAKEOVER }
+};
+
+static ev_obj_t md_tags[] = {
+ EVO_UNSPECIFIED,
+ EVO_METADEV,
+ EVO_MIRROR,
+ EVO_STRIPE,
+ EVO_RAID5,
+ EVO_TRANS,
+ EVO_REPLICA,
+ EVO_HSP,
+ EVO_HS,
+ EVO_SET,
+ EVO_DRIVE,
+ EVO_HOST,
+ EVO_MEDIATOR
+};
+
+static int
+ev_compare(const void *node1, const void *node2)
+{
+ return (strcmp((const char *)node1,
+ ((const struct node *)node2)->se_ev));
+}
+
+/*
+ * Log mdnotify event
+ */
+void
+do_mdnotify(char *se_subclass, uint32_t tag, set_t setno, md_dev64_t devid)
+{
+ evid_t ev_type;
+ ev_obj_t md_tag;
+ struct node *node_ptr;
+
+ /* Translate sysevent into mdnotify event */
+ node_ptr = bsearch(se_subclass, ev_table, (sizeof (ev_table) /
+ sizeof (ev_table[0])), sizeof (ev_table[0]), ev_compare);
+
+ if (node_ptr == NULL) {
+ ev_type = EV_EMPTY;
+ } else {
+ ev_type = node_ptr->md_ev;
+ }
+
+ if (tag >= (sizeof (md_tags) / sizeof (md_tags[0]))) {
+ md_tag = EVO_UNSPECIFIED;
+ } else {
+ md_tag = md_tags[tag];
+ }
+
+ NOTIFY_MD(md_tag, setno, devid, ev_type);
+}
+
+/*
+ * External symbols from libsysevent and libnvpair which are not
+ * available in static forms
+ */
+static void *se_handle = NULL, *nv_handle = NULL;
+static int (*_sysevent_post_event)(char *, char *, char *, char *,
+ nvlist_t *, sysevent_id_t *) = NULL;
+static int (*_nvlist_alloc)(nvlist_t **, uint_t, int) = NULL;
+static void (*_nvlist_free)(nvlist_t *) = NULL;
+static int (*_nvlist_add_uint32)(nvlist_t *, char *, uint32_t) = NULL;
+static int (*_nvlist_add_uint64)(nvlist_t *, char *, uint64_t) = NULL;
+static int (*_nvlist_add_string)(nvlist_t *, char *, char *) = NULL;
+
+/*
+ * Load nvpair and sysevent symbols
+ */
+static int
+load_sev_lib()
+{
+ /* Try to load the sysevent symbol */
+ if (se_handle == NULL) {
+ se_handle = dlopen("/usr/lib/libsysevent.so.1", RTLD_LAZY);
+ }
+ if (se_handle != NULL) {
+ if ((_sysevent_post_event == NULL) &&
+ (_sysevent_post_event = (int (*)(char *, char *, char *,
+ char *, nvlist_t *, sysevent_id_t *))
+ dlsym(se_handle, "sysevent_post_event")) == NULL) {
+ goto out;
+ }
+ } else {
+ return (1);
+ }
+
+ /* Try to load the nvpair symbols */
+ if (nv_handle == NULL) {
+ nv_handle = dlopen("/usr/lib/libnvpair.so.1", RTLD_LAZY);
+ }
+ if (nv_handle != NULL) {
+ if ((_nvlist_alloc == NULL) &&
+ (_nvlist_alloc = (int (*)(nvlist_t **, uint_t, int))
+ dlsym(nv_handle, "nvlist_alloc")) == NULL) {
+ goto out;
+ }
+ if ((_nvlist_free == NULL) &&
+ (_nvlist_free = (void (*)(nvlist_t *))dlsym(nv_handle,
+ "nvlist_free")) == NULL) {
+ goto out;
+ }
+ if ((_nvlist_add_uint32 == NULL) &&
+ (_nvlist_add_uint32 = (int (*)(nvlist_t *, char *,
+ uint32_t))dlsym(nv_handle,
+ "nvlist_add_uint32")) == NULL) {
+ goto out;
+ }
+ if ((_nvlist_add_uint64 == NULL) &&
+ (_nvlist_add_uint64 = (int (*)(nvlist_t *, char *,
+ uint64_t))dlsym(nv_handle,
+ "nvlist_add_uint64")) == NULL) {
+ goto out;
+ }
+ if ((_nvlist_add_string == NULL) &&
+ (_nvlist_add_string = (int (*)(nvlist_t *, char *,
+ char *))dlsym(nv_handle,
+ "nvlist_add_string")) == NULL) {
+ goto out;
+ }
+
+ return (0);
+ }
+
+out:
+ if ((se_handle != NULL) && (dlclose(se_handle) == 0)) {
+ se_handle = NULL;
+ }
+
+ if ((nv_handle != NULL) && (dlclose(nv_handle) == 0)) {
+ nv_handle = NULL;
+ }
+
+ _sysevent_post_event = NULL;
+ _nvlist_alloc = NULL;
+ _nvlist_free = NULL;
+ _nvlist_add_uint32 = NULL;
+ _nvlist_add_uint64 = NULL;
+ _nvlist_add_string = NULL;
+
+ return (1);
+}
+
+/*
+ * Log SVM sys events
+ */
+void
+meta_svm_sysevent(
+ char *se_class,
+ char *se_subclass,
+ uint32_t tag,
+ set_t setno,
+ md_dev64_t devid
+)
+{
+ sysevent_id_t eid;
+ nvlist_t *attr_list;
+ int err = 0;
+ char *devname;
+
+ /* Raise the mdnotify event before anything else */
+ do_mdnotify(se_subclass, tag, setno, devid);
+
+ /* Just get out if the sysevent symbol can't be loaded */
+ if (load_sev_lib()) {
+ return;
+ }
+
+ err = (*_nvlist_alloc)(&attr_list, NV_UNIQUE_NAME, 0);
+
+ if (err == 0) {
+ /* Add the version number */
+ err = (*_nvlist_add_uint32)(attr_list, SVM_VERSION_NO,
+ (uint32_t)SVM_VERSION);
+ if (err != 0) {
+ goto fail;
+ }
+
+ /* Add the tag attribute */
+ err = (*_nvlist_add_uint32)(attr_list, SVM_TAG, (uint32_t)tag);
+ if (err != 0) {
+ goto fail;
+ }
+
+ /* Add the set number attribute */
+ err = (*_nvlist_add_uint32)(attr_list, SVM_SET_NO,
+ (uint32_t)setno);
+ if (err != 0) {
+ goto fail;
+ }
+
+ /* Add the device id attribute */
+ err = (*_nvlist_add_uint64)(attr_list, SVM_DEV_ID,
+ (uint64_t)devid);
+ if (err != 0) {
+ goto fail;
+ }
+
+ /* Add the device name attribute */
+ devname = obj2devname(tag, setno, devid);
+ if (devname != NULL) {
+ err = (*_nvlist_add_string)(attr_list, SVM_DEV_NAME,
+ devname);
+ free(devname);
+ } else {
+ err = (*_nvlist_add_string)(attr_list, SVM_DEV_NAME,
+ "unspecified");
+ }
+ if (err != 0) {
+ goto fail;
+ }
+
+ /* Attempt to post event */
+ (void) (*_sysevent_post_event)(se_class, se_subclass,
+ SUNW_VENDOR, EP_SVM, attr_list, &eid);
+
+ (*_nvlist_free)(attr_list);
+ }
+
+ return;
+
+fail:
+ (*_nvlist_free)(attr_list);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_set.c b/usr/src/lib/lvm/libmeta/common/meta_set.c
new file mode 100644
index 0000000000..7634779ce5
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_set.c
@@ -0,0 +1,5918 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * Just in case we're not in a build environment, make sure that
+ * TEXT_DOMAIN gets set to something.
+ */
+#if !defined(TEXT_DOMAIN)
+#define TEXT_DOMAIN "SYS_TEST"
+#endif
+
+/*
+ * Metadevice diskset interfaces
+ */
+
+#include "meta_set_prv.h"
+#include <meta.h>
+#include <metad.h>
+#include <mdmn_changelog.h>
+#include <sys/lvm/md_crc.h>
+#include <sys/utsname.h>
+#include <sdssc.h>
+
+#include <sys/sysevent/eventdefs.h>
+#include <sys/sysevent/svm.h>
+extern char *blkname(char *);
+
+static md_drive_desc *
+dr2drivedesc(
+ mdsetname_t *sp,
+ side_t sideno,
+ int flags,
+ md_error_t *ep
+)
+{
+ md_set_record *sr;
+ md_drive_record *dr;
+ mddrivename_t *dnp;
+ md_drive_desc *dd_head = NULL;
+ md_set_desc *sd;
+
+ if (flags & MD_BYPASS_DAEMON) {
+ if ((sr = metad_getsetbynum(sp->setno, ep)) == NULL)
+ return (NULL);
+ sd = metaget_setdesc(sp, ep);
+ sideno = getnodeside(mynode(), sd);
+ sp = metafakesetname(sp->setno, sr->sr_setname);
+ } else {
+ if ((sr = getsetbyname(sp->setname, ep)) == NULL)
+ return (NULL);
+ }
+
+ assert(sideno != MD_SIDEWILD);
+
+ /*
+ * WARNING:
+ * The act of getting the dnp from the namespace means that we
+ * will get the devid of the disk as recorded in the namespace.
+ * This devid has the potential to be stale if the disk is being
+ * replaced via a rebind, this means that any code that relies
+ * on any of the dnp information should take the appropriate action
+ * to preserve that information. For example in the rebind code the
+ * devid of the new disk is saved off and then copied back in once
+ * the code that has called this function has completed.
+ */
+ for (dr = sr->sr_drivechain; dr != NULL; dr = dr->dr_next) {
+ if ((dnp = metadrivename_withdrkey(sp, sideno, dr->dr_key,
+ flags, ep)) == NULL) {
+ if (!(flags & MD_BYPASS_DAEMON))
+ free_sr(sr);
+ metafreedrivedesc(&dd_head);
+ return (NULL);
+ }
+
+ (void) metadrivedesc_append(&dd_head, dnp, dr->dr_dbcnt,
+ dr->dr_dbsize, dr->dr_ctime, dr->dr_genid, dr->dr_flags);
+ }
+
+ if (!(flags & MD_BYPASS_DAEMON)) {
+ free_sr(sr);
+ }
+ return (dd_head);
+}
+
+static int
+get_sidenmlist(
+ mdsetname_t *sp,
+ mddrivename_t *dnp,
+ md_error_t *ep
+)
+{
+ md_set_desc *sd;
+ mdsidenames_t *sn, **sn_next;
+ int i;
+
+ if ((sd = metaget_setdesc(sp, ep)) == NULL)
+ return (-1);
+
+ metaflushsidenames(dnp);
+ sn_next = &dnp->side_names;
+ if (MD_MNSET_DESC(sd)) {
+ /*
+ * Only get sidenames for this node since
+ * that is the only side information stored in
+ * the local mddb for a multi-node diskset.
+ */
+ if (sd->sd_mn_mynode) {
+ sn = Zalloc(sizeof (*sn));
+ sn->sideno = sd->sd_mn_mynode->nd_nodeid;
+ if ((sn->cname = meta_getnmentbykey(MD_LOCAL_SET,
+ sn->sideno, dnp->side_names_key, &sn->dname,
+ &sn->mnum, NULL, ep)) == NULL) {
+ if (sn->dname != NULL)
+ Free(sn->dname);
+ Free(sn);
+ return (-1);
+ }
+
+ /* Add to the end of the linked list */
+ assert(*sn_next == NULL);
+ *sn_next = sn;
+ sn_next = &sn->next;
+ }
+ } else {
+ for (i = 0; i < MD_MAXSIDES; i++) {
+ /* Skip empty slots */
+ if (sd->sd_nodes[i][0] == '\0')
+ continue;
+
+ sn = Zalloc(sizeof (*sn));
+ sn->sideno = i;
+ if ((sn->cname = meta_getnmentbykey(MD_LOCAL_SET,
+ i+SKEW, dnp->side_names_key, &sn->dname,
+ &sn->mnum, NULL, ep)) == NULL) {
+ /*
+ * It is possible that during the add of a
+ * host to have a 'missing' side as the side
+ * for this disk will be added later. So ignore
+ * the error. The 'missing' side will be added
+ * once the addhosts process has completed.
+ */
+ if (mdissyserror(ep, ENOENT)) {
+ mdclrerror(ep);
+ Free(sn);
+ continue;
+ }
+
+ if (sn->dname != NULL)
+ Free(sn->dname);
+ Free(sn);
+ return (-1);
+ }
+
+ /* Add to the end of the linked list */
+ assert(*sn_next == NULL);
+ *sn_next = sn;
+ sn_next = &sn->next;
+ }
+ }
+
+ return (0);
+}
+
+static md_drive_desc *
+rl_to_dd(
+ mdsetname_t *sp,
+ md_replicalist_t *rlp,
+ md_error_t *ep
+)
+{
+ md_replicalist_t *rl;
+ md_replica_t *r;
+ md_drive_desc *dd = NULL;
+ md_drive_desc *d;
+ int found;
+ md_set_desc *sd;
+ daddr_t nblks = 0;
+
+ if ((sd = metaget_setdesc(sp, ep)) == NULL)
+ return (NULL);
+
+ /* find the smallest existing replica */
+ for (rl = rlp; rl != NULL; rl = rl->rl_next) {
+ r = rl->rl_repp;
+ nblks = ((nblks == 0) ? r->r_nblk : min(r->r_nblk, nblks));
+ }
+
+ if (nblks <= 0)
+ nblks = (MD_MNSET_DESC(sd)) ? MD_MN_DBSIZE : MD_DBSIZE;
+
+ for (rl = rlp; rl != NULL; rl = rl->rl_next) {
+ r = rl->rl_repp;
+
+ found = 0;
+ for (d = dd; d != NULL; d = d->dd_next) {
+ if (strcmp(r->r_namep->drivenamep->cname,
+ d->dd_dnp->cname) == 0) {
+ found = 1;
+ dd->dd_dbcnt++;
+ break;
+ }
+ }
+
+ if (! found)
+ (void) metadrivedesc_append(&dd, r->r_namep->drivenamep,
+ 1, nblks, sd->sd_ctime, sd->sd_genid, MD_DR_OK);
+ }
+
+ return (dd);
+}
+
+/*
+ * Exported Entry Points
+ */
+
+set_t
+get_max_sets(md_error_t *ep)
+{
+
+ static set_t max_sets = 0;
+
+ if (max_sets == 0)
+ if (metaioctl(MD_IOCGETNSET, &max_sets, ep, NULL) != 0)
+ return (0);
+
+ return (max_sets);
+}
+
+int
+get_max_meds(md_error_t *ep)
+{
+ static int max_meds = 0;
+
+ if (max_meds == 0)
+ if (metaioctl(MD_MED_GET_NMED, &max_meds, ep, NULL) != 0)
+ return (0);
+
+ return (max_meds);
+}
+
+side_t
+getmyside(mdsetname_t *sp, md_error_t *ep)
+{
+ md_set_desc *sd;
+ char *node = NULL;
+ side_t sideno;
+
+ if (sp->setno == 0)
+ return (0);
+
+ if ((sd = metaget_setdesc(sp, ep)) == NULL)
+ return (MD_SIDEWILD);
+
+ node = mynode();
+
+ assert(node != NULL);
+
+ sideno = getnodeside(node, sd);
+
+ if (sideno != MD_SIDEWILD)
+ return (sideno);
+
+ return (mddserror(ep, MDE_DS_HOSTNOSIDE, sp->setno, node, NULL, node));
+}
+
+/*
+ * get set info from name
+ */
+md_set_record *
+getsetbyname(char *setname, md_error_t *ep)
+{
+ md_set_record *sr = NULL;
+ md_mnset_record *mnsr = NULL;
+ char *p;
+ size_t len;
+
+ /* get set info from daemon */
+ if (clnt_getset(mynode(), setname, MD_SET_BAD, &sr, ep) == -1)
+ return (NULL);
+ if (sr != NULL) {
+ /*
+ * Returned record could be for a multi-node set or a
+ * non-multi-node set.
+ */
+ if (MD_MNSET_REC(sr)) {
+ /*
+ * Record is for a multi-node set. Reissue call
+ * to get mnset information. Need to free
+ * record as if a non-multi-node set record since
+ * that is what clnt_getset gave us. If in
+ * the daemon, don't free since this is a pointer
+ * into the setrecords array.
+ */
+ if (! md_in_daemon) {
+ sr->sr_flags &= ~MD_SR_MN;
+ free_sr(sr);
+ }
+ if (clnt_mngetset(mynode(), setname, MD_SET_BAD, &mnsr,
+ ep) == -1)
+ return (NULL);
+ if (mnsr != NULL)
+ return ((struct md_set_record *)mnsr);
+ } else {
+ return (sr);
+ }
+ }
+
+ /* no such set */
+ len = strlen(setname) + 30;
+ p = Malloc(len);
+ (void) snprintf(p, len, "setname \"%s\"", setname);
+ (void) mderror(ep, MDE_NO_SET, p);
+ Free(p);
+ return (NULL);
+}
+
+/*
+ * get set info from number
+ */
+md_set_record *
+getsetbynum(set_t setno, md_error_t *ep)
+{
+ md_set_record *sr;
+ md_mnset_record *mnsr = NULL;
+ char buf[100];
+
+ if (clnt_getset(mynode(), NULL, setno, &sr, ep) == -1)
+ return (NULL);
+
+ if (sr != NULL) {
+ /*
+ * Record is for a multi-node set. Reissue call
+ * to get mnset information. Need to free
+ * record as if a non-multi-node set record since
+ * that is what clnt_getset gave us. If in
+ * the daemon, don't free since this is a pointer
+ * into the setrecords array.
+ */
+ if (MD_MNSET_REC(sr)) {
+ /*
+ * Record is for a multi-node set. Reissue call
+ * to get mnset information.
+ */
+ if (! md_in_daemon) {
+ sr->sr_flags &= ~MD_SR_MN;
+ free_sr(sr);
+ }
+ if (clnt_mngetset(mynode(), NULL, setno, &mnsr,
+ ep) == -1)
+ return (NULL);
+ if (mnsr != NULL)
+ return ((struct md_set_record *)mnsr);
+ } else {
+ return (sr);
+ }
+ }
+
+ (void) sprintf(buf, "setno %u", setno);
+ (void) mderror(ep, MDE_NO_SET, buf);
+ return (NULL);
+}
+
+int
+meta_check_drive_inuse(
+ mdsetname_t *sp,
+ mddrivename_t *dnp,
+ int check_db,
+ md_error_t *ep
+)
+{
+ mdnamelist_t *nlp = NULL;
+ mdnamelist_t *p;
+ int rval = 0;
+
+ /* get all underlying partitions */
+ if (meta_getalldevs(sp, &nlp, check_db, ep) != 0)
+ return (-1);
+
+ /* search for drive */
+ for (p = nlp; (p != NULL); p = p->next) {
+ mdname_t *np = p->namep;
+
+ if (strcmp(dnp->cname, np->drivenamep->cname) == 0) {
+ rval = (mddserror(ep, MDE_DS_DRIVEINUSE, sp->setno,
+ NULL, dnp->cname, sp->setname));
+ break;
+ }
+ }
+
+ /* cleanup, return success */
+ metafreenamelist(nlp);
+ return (rval);
+}
+
+/*
+ * simple check for ownership
+ */
+int
+meta_check_ownership(mdsetname_t *sp, md_error_t *ep)
+{
+ int ownset;
+ md_set_desc *sd;
+ md_drive_desc *dd;
+ md_replicalist_t *rlp = NULL;
+ md_error_t xep = mdnullerror;
+
+ if (metaislocalset(sp))
+ return (0);
+
+ ownset = own_set(sp, NULL, TRUE, ep);
+ if (! mdisok(ep))
+ return (-1);
+
+ if ((sd = metaget_setdesc(sp, ep)) == NULL)
+ return (-1);
+
+ dd = metaget_drivedesc(sp, (MD_BASICNAME_OK | PRINT_FAST), ep);
+ if (! mdisok(ep))
+ return (-1);
+
+ /* If we have no drive descriptors, check for no ownership */
+ if (dd == NULL) {
+ if (ownset == MD_SETOWNER_NONE)
+ return (0);
+
+ /* If ownership somehow has come to exist, we must clean up */
+
+ if (metareplicalist(sp, (MD_BASICNAME_OK | PRINT_FAST), &rlp,
+ &xep) < 0)
+ mdclrerror(&xep);
+
+ if ((dd = rl_to_dd(sp, rlp, &xep)) == NULL)
+ if (! mdisok(&xep))
+ mdclrerror(&xep);
+
+ if (!(MD_MNSET_DESC(sd)) && !MD_ATSET_DESC(sd)) {
+ if (rel_own_bydd(sp, dd, TRUE, &xep))
+ mdclrerror(&xep);
+ }
+
+ if (halt_set(sp, &xep))
+ mdclrerror(&xep);
+
+ metafreereplicalist(rlp);
+
+ metafreedrivedesc(&dd);
+
+ return (0);
+ }
+
+ metafreedrivedesc(&sd->sd_drvs);
+
+ if (ownset == MD_SETOWNER_YES)
+ return (0);
+
+ return (mddserror(ep, MDE_DS_NOOWNER, sp->setno, NULL, NULL,
+ sp->setname));
+}
+
+/*
+ * simple check for ownership
+ */
+int
+meta_check_ownership_on_host(mdsetname_t *sp, char *hostname, md_error_t *ep)
+{
+ md_set_desc *sd;
+ md_drive_desc *dd;
+ int bool;
+
+ if (metaislocalset(sp))
+ return (0);
+
+ if ((sd = metaget_setdesc(sp, ep)) == NULL)
+ return (-1);
+
+ if (getnodeside(hostname, sd) == MD_SIDEWILD)
+ return (mddserror(ep, MDE_DS_NODENOTINSET, sp->setno,
+ hostname, NULL, sp->setname));
+
+ dd = metaget_drivedesc(sp, (MD_BASICNAME_OK | PRINT_FAST), ep);
+ if (! mdisok(ep))
+ return (-1);
+
+ if (clnt_ownset(hostname, sp, &bool, ep) == -1)
+ return (-1);
+
+ if (dd == NULL)
+ return (0);
+
+ metafreedrivedesc(&sd->sd_drvs);
+
+ if (bool == TRUE)
+ return (0);
+
+ return (mddserror(ep, MDE_DS_NODEISNOTOWNER, sp->setno, hostname, NULL,
+ sp->setname));
+}
+
+/*
+ * Function that determines if a node is in the multinode diskset
+ * membership list. Calling node passes in node to be checked and
+ * the nodelist as returned from meta_read_nodelist. This routine
+ * anticipates being called many times using the same diskset membership
+ * list which is why the alloc and free of the diskset membership list
+ * is left to the calling routine.
+ * Returns:
+ * 1 - if a member
+ * 0 - not a member
+ */
+int
+meta_is_member(
+ char *node_name,
+ md_mn_nodeid_t node_id,
+ mndiskset_membershiplist_t *nl
+)
+{
+ mndiskset_membershiplist_t *nl2;
+ int flag_check_name;
+
+ if (node_id != 0)
+ flag_check_name = 0;
+ else if (node_name != NULL)
+ flag_check_name = 1;
+ else
+ return (0);
+
+ nl2 = nl;
+ while (nl2) {
+ if (flag_check_name) {
+ /* Compare given name against name in member list */
+ if (strcmp(nl2->msl_node_name, node_name) == 0)
+ break;
+ } else {
+ /* Compare given nodeid against nodeid in member list */
+ if (nl2->msl_node_id == node_id)
+ break;
+ }
+ nl2 = nl2->next;
+ }
+ /* No match found in member list */
+ if (nl2 == NULL) {
+ return (0);
+ }
+ /* Return 1 if node is in member list */
+ return (1);
+}
+
+/*
+ * meta_getnext_devinfo should go to the host that
+ * has the device, to return the device name, driver name, minor num.
+ * We can take the big cheat for now, since it is a requirement
+ * that the device names and device numbers are the same, and
+ * just get the info locally.
+ *
+ * This routine is very similar to meta_getnextside_devinfo except
+ * that the specific side to be used is being passed in.
+ *
+ * Exit status:
+ * 0 - No more side info to return
+ * 1 - More side info's to return
+ * -1 - An error has been detected
+ */
+/*ARGSUSED*/
+int
+meta_getside_devinfo(
+ mdsetname_t *sp, /* for this set */
+ char *bname, /* local block name (myside) */
+ side_t sideno, /* sideno */
+ char **ret_bname, /* block device name of returned side */
+ char **ret_dname, /* driver name of returned side */
+ minor_t *ret_mnum, /* minor number of returned side */
+ md_error_t *ep
+)
+{
+ mdname_t *np;
+
+ if (ret_bname != NULL)
+ *ret_bname = NULL;
+ if (ret_dname != NULL)
+ *ret_dname = NULL;
+ if (ret_mnum != NULL)
+ *ret_mnum = NODEV32;
+
+
+ if ((np = metaname(&sp, bname, ep)) == NULL)
+ return (-1);
+
+/*
+ * NOTE (future) - There will be more work here once devids are integrated
+ * into disksets. Then the side should be used to find the correct
+ * host and the b/d names should be gotten from that host.
+ */
+
+ /*
+ * Return the side info.
+ */
+ if (ret_bname != NULL)
+ *ret_bname = Strdup(np->bname);
+
+ if (ret_dname != NULL) {
+ mdcinfo_t *cinfo;
+
+ if ((cinfo = metagetcinfo(np, ep)) == NULL)
+ return (-1);
+
+ *ret_dname = Strdup(cinfo->dname);
+ }
+
+ if (ret_mnum != NULL)
+ *ret_mnum = meta_getminor(np->dev);
+
+ return (1);
+}
+
+/*
+ * Get the information on the device from the remote node using the devid
+ * of the disk.
+ *
+ * Exit status:
+ * 0 - No more side info to return
+ * 1 - More side info's to return
+ * -1 - An error has been detected
+ */
+int
+meta_getnextside_devinfo(
+ mdsetname_t *sp, /* for this set */
+ char *bname, /* local block name (myside) */
+ side_t *sideno, /* previous sideno & returned sideno */
+ char **ret_bname, /* block device name of returned side */
+ char **ret_dname, /* driver name of returned side */
+ minor_t *ret_mnum, /* minor number of returned side */
+ md_error_t *ep
+)
+{
+ md_set_desc *sd;
+ int i;
+ mdname_t *np;
+ mddrivename_t *dnp;
+ char *devidstr = NULL;
+ int devidstrlen;
+ md_dev64_t retdev = NODEV64;
+ char *ret_devname = NULL;
+ char *ret_blkdevname = NULL;
+ char *ret_driver = NULL;
+ char *nodename;
+ int fd;
+ int ret = -1;
+ char *minor_name = NULL;
+ md_mnnode_desc *nd;
+
+
+ if (ret_bname != NULL)
+ *ret_bname = NULL;
+ if (ret_dname != NULL)
+ *ret_dname = NULL;
+ if (ret_mnum != NULL)
+ *ret_mnum = NODEV32;
+
+ if (metaislocalset(sp)) {
+ /* no more sides - we are done */
+ if (*sideno != MD_SIDEWILD)
+ return (0);
+
+ /* First time through - set up return sideno */
+ *sideno = 0;
+ } else {
+
+ /*
+ * Find the next sideno, starting after the one given.
+ */
+ if ((sd = metaget_setdesc(sp, ep)) == NULL)
+ return (-1);
+
+ if (MD_MNSET_DESC(sd)) {
+ nd = sd->sd_nodelist;
+ if ((*sideno == MD_SIDEWILD) &&
+ (nd != (struct md_mnnode_desc *)NULL)) {
+ *sideno = nd->nd_nodeid;
+ } else {
+ while (nd) {
+ /*
+ * Found given sideno, now find
+ * next sideno, if there is one.
+ */
+ if ((*sideno == nd->nd_nodeid) &&
+ (nd->nd_next !=
+ (struct md_mnnode_desc *)NULL)) {
+ *sideno =
+ nd->nd_next->nd_nodeid;
+ break;
+ }
+ nd = nd->nd_next;
+ }
+ if (nd == NULL) {
+ return (0);
+ }
+ }
+ if (*sideno == MD_SIDEWILD)
+ return (0);
+ } else {
+ for (i = (*sideno)+1; i < MD_MAXSIDES; i++)
+ /* Find next full slot */
+ if (sd->sd_nodes[i][0] != '\0')
+ break;
+
+ /* No more sides - we are done */
+ if (i == MD_MAXSIDES)
+ return (0);
+
+ /* Set up the return sideno */
+ *sideno = i;
+ nodename = (char *)sd->sd_nodes[i];
+ }
+ }
+
+ /*
+ * Need to pass the node the devid of the disk and get it to
+ * send back the details of the disk from that side.
+ */
+ if ((np = metaname(&sp, bname, ep)) == NULL)
+ return (-1);
+
+ dnp = np->drivenamep;
+
+ /*
+ * By default, set up the parameters so that they are copied out.
+ */
+ if (ret_bname != NULL)
+ *ret_bname = Strdup(np->bname);
+
+ if (ret_dname != NULL) {
+ mdcinfo_t *cinfo;
+
+ if ((cinfo = metagetcinfo(np, ep)) == NULL)
+ return (-1);
+
+ *ret_dname = Strdup(cinfo->dname);
+ }
+
+ if (ret_mnum != NULL)
+ *ret_mnum = meta_getminor(np->dev);
+
+ /*
+ * Try some optimization. If this is the local set or the device
+ * is a metadevice then just copy the information. If the device
+ * does not have a devid (due to not having a minor name) then
+ * fall back to the pre-devid behaviour of copying the information
+ * on the device: this is okay because the sanity checks before this
+ * call would have found any issues with the device. If it's a
+ * multi-node diskset also just return ie. copy.
+ */
+ if (metaislocalset(sp) || metaismeta(np) || (dnp->devid == NULL) ||
+ (MD_MNSET_DESC(sd)))
+ return (1);
+
+ if (np->minor_name == (char *)NULL) {
+ /*
+ * Have to get the minor name then. The slice should exist
+ * on the disk because it will have already been repartitioned
+ * up prior to getting to this point.
+ */
+ if ((fd = open(np->bname, (O_RDONLY|O_NDELAY), 0)) < 0) {
+ (void) mdsyserror(ep, errno, np->bname);
+ return (-1);
+ }
+ (void) devid_get_minor_name(fd, &minor_name);
+ np->minor_name = Strdup(minor_name);
+ devid_str_free(minor_name);
+ (void) close(fd);
+ }
+
+ /* allocate extra space for "/" and NULL hence +2 */
+ devidstrlen = strlen(dnp->devid) + strlen(np->minor_name) + 2;
+ devidstr = (char *)Malloc(devidstrlen);
+
+ /*
+ * As a minor name is supplied then the ret_devname will be
+ * appropriate to that minor_name and in this case it will be
+ * a block device ie /dev/dsk.
+ */
+ (void) snprintf(devidstr, devidstrlen,
+ "%s/%s", dnp->devid, np->minor_name);
+
+ ret = clnt_devinfo_by_devid(nodename, sp, devidstr, &retdev,
+ np->bname, &ret_devname, &ret_driver, ep);
+
+ Free(devidstr);
+
+ /*
+ * If the other side is not running device id in disksets,
+ * 'ret' is set to ENOTSUP in which case we fallback to
+ * the existing behaviour
+ */
+ if (ret == ENOTSUP)
+ return (1);
+ else if (ret == -1)
+ return (-1);
+
+ /*
+ * ret_devname comes from the rpc call and is a
+ * raw device name. We need to make this into a
+ * block device via blkname for further processing.
+ * Unfortunately, when our device id isn't found in
+ * the system, the rpc call will return a " " in
+ * ret_devname in which case we need to fill that in
+ * as ret_blkname because blkname of " " returns NULL.
+ */
+ if (ret_bname != NULL && ret_devname != NULL) {
+ ret_blkdevname = blkname(ret_devname);
+ if (ret_blkdevname == NULL)
+ *ret_bname = Strdup(ret_devname);
+ else
+ *ret_bname = Strdup(ret_blkdevname);
+ }
+
+ if (ret_dname != NULL && ret_driver != NULL)
+ *ret_dname = Strdup(ret_driver);
+
+ if (ret_mnum != NULL)
+ *ret_mnum = meta_getminor(retdev);
+
+ return (1);
+}
+
+int
+meta_is_drive_in_anyset(
+ mddrivename_t *dnp,
+ mdsetname_t **spp,
+ int bypass_daemon,
+ md_error_t *ep
+)
+{
+ set_t setno;
+ mdsetname_t *this_sp;
+ int is_it;
+ set_t max_sets;
+
+ if ((max_sets = get_max_sets(ep)) == 0)
+ return (-1);
+
+ assert(spp != NULL);
+ *spp = NULL;
+
+ for (setno = 1; setno < max_sets; setno++) {
+ if (!bypass_daemon) {
+ if ((this_sp = metasetnosetname(setno, ep)) == NULL) {
+ if (mdismddberror(ep, MDE_DB_NODB)) {
+ mdclrerror(ep);
+ return (0);
+ }
+ if (mdiserror(ep, MDE_NO_SET)) {
+ mdclrerror(ep);
+ continue;
+ }
+ return (-1);
+ }
+ } else
+ this_sp = metafakesetname(setno, NULL);
+
+ if ((is_it = meta_is_drive_in_thisset(this_sp, dnp,
+ bypass_daemon, ep)) == -1) {
+ if (mdiserror(ep, MDE_NO_SET)) {
+ mdclrerror(ep);
+ continue;
+ }
+ return (-1);
+ }
+ if (is_it) {
+ *spp = this_sp;
+ return (0);
+ }
+ }
+ return (0);
+}
+
+int
+meta_is_drive_in_thisset(
+ mdsetname_t *sp,
+ mddrivename_t *dnp,
+ int bypass_daemon,
+ md_error_t *ep
+)
+{
+ md_drive_desc *dd, *p;
+
+ if (bypass_daemon)
+ dd = dr2drivedesc(sp, MD_SIDEWILD,
+ (MD_BASICNAME_OK | MD_BYPASS_DAEMON), ep);
+ else
+ dd = metaget_drivedesc(sp, MD_BASICNAME_OK, ep);
+
+ if (dd == NULL) {
+ if (! mdisok(ep))
+ return (-1);
+ return (0);
+ }
+
+
+ for (p = dd; p != NULL; p = p->dd_next)
+ if (strcmp(p->dd_dnp->cname, dnp->cname) == 0)
+ return (1);
+ return (0);
+}
+
+int
+meta_set_balance(
+ mdsetname_t *sp,
+ md_error_t *ep
+)
+{
+ md_set_desc *sd;
+ md_drive_desc *dd, *curdd;
+ daddr_t dbsize;
+ daddr_t nblks;
+ int i;
+ int rval = 0;
+ sigset_t oldsigs;
+ md_setkey_t *cl_sk;
+ md_error_t xep = mdnullerror;
+ md_mnnode_desc *nd;
+ int suspend1_flag = 0;
+
+ if ((sd = metaget_setdesc(sp, ep)) == NULL)
+ return (-1);
+
+ dbsize = (MD_MNSET_DESC(sd)) ? MD_MN_DBSIZE : MD_DBSIZE;
+
+ /* Make sure we own the set */
+ if (meta_check_ownership(sp, ep) != 0)
+ return (-1);
+
+ /* END CHECK CODE */
+
+ /*
+ * Get drive descriptors for the drives that are currently in the set.
+ */
+ curdd = metaget_drivedesc(sp, MD_FULLNAME_ONLY, ep);
+
+ if (! mdisok(ep))
+ return (-1);
+
+ /* Find the minimum replica size in use is or use the default */
+ if ((nblks = meta_db_minreplica(sp, ep)) < 0)
+ mdclrerror(ep);
+ else
+ dbsize = nblks; /* adjust replica size */
+
+ /* Make sure we are blocking all signals */
+ if (procsigs(TRUE, &oldsigs, &xep) < 0)
+ mdclrerror(&xep);
+
+ /*
+ * Lock the set on current set members.
+ * For MN diskset lock_set and SUSPEND are used to protect against
+ * other meta* commands running on the other nodes.
+ */
+ if (MD_MNSET_DESC(sd)) {
+ nd = sd->sd_nodelist;
+ while (nd) {
+ if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+ nd = nd->nd_next;
+ continue;
+ }
+ if (clnt_lock_set(nd->nd_nodename, sp, ep)) {
+ rval = -1;
+ goto out;
+ }
+ nd = nd->nd_next;
+ }
+ /*
+ * Lock out other meta* commands by suspending
+ * class 1 messages across the diskset.
+ */
+ nd = sd->sd_nodelist;
+ while (nd) {
+ if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+ nd = nd->nd_next;
+ continue;
+ }
+ if (clnt_mdcommdctl(nd->nd_nodename,
+ COMMDCTL_SUSPEND, sp, MD_MSG_CLASS1,
+ MD_MSCF_NO_FLAGS, ep)) {
+ rval = -1;
+ goto out;
+ }
+ suspend1_flag = 1;
+ nd = nd->nd_next;
+ }
+ } else {
+ for (i = 0; i < MD_MAXSIDES; i++) {
+ /* Skip empty slots */
+ if (sd->sd_nodes[i][0] == '\0') continue;
+
+ if (clnt_lock_set(sd->sd_nodes[i], sp, ep)) {
+ rval = -1;
+ goto out;
+ }
+ }
+ }
+
+ /* We are not adding or deleting any drives, just balancing */
+ dd = NULL;
+
+ /*
+ * Balance the DB's according to the list of existing drives and the
+ * list of added drives.
+ */
+ if ((rval = meta_db_balance(sp, dd, curdd, dbsize, ep)) == -1)
+ goto out;
+
+out:
+ /*
+ * Unlock diskset by resuming class 1 messages across the diskset.
+ * Just resume all classes so that resume is the same whether
+ * just one class was locked or all classes were locked.
+ */
+ if (suspend1_flag) {
+ nd = sd->sd_nodelist;
+ while (nd) {
+ if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+ nd = nd->nd_next;
+ continue;
+ }
+ if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_RESUME,
+ sp, MD_MSG_CLASS0, MD_MSCF_NO_FLAGS, &xep)) {
+ /*
+ * We are here because we failed to resume
+ * rpc.mdcommd. However we potentially have
+ * an error from the previous call
+ * (meta_db_balance). If the previous call
+ * did fail, we capture that error and
+ * generate a perror withthe string,
+ * "Unable to resume...".
+ * Setting rval to -1 ensures that in the
+ * next iteration of the loop, ep is not
+ * clobbered.
+ */
+ if (rval == 0)
+ (void) mdstealerror(ep, &xep);
+ else
+ mdclrerror(&xep);
+ rval = -1;
+ mde_perror(ep, dgettext(TEXT_DOMAIN,
+ "Unable to resume rpc.mdcommd."));
+ }
+ nd = nd->nd_next;
+ }
+ }
+
+ /* Unlock the set */
+ cl_sk = cl_get_setkey(sp->setno, sp->setname);
+ if (MD_MNSET_DESC(sd)) {
+ nd = sd->sd_nodelist;
+ while (nd) {
+ if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+ nd = nd->nd_next;
+ continue;
+ }
+ if (clnt_unlock_set(nd->nd_nodename, cl_sk, &xep)) {
+ if (rval == 0)
+ (void) mdstealerror(ep, &xep);
+ else
+ mdclrerror(&xep);
+ rval = -1;
+ }
+ nd = nd->nd_next;
+ }
+ } else {
+ for (i = 0; i < MD_MAXSIDES; i++) {
+ /* Skip empty slots */
+ if (sd->sd_nodes[i][0] == '\0')
+ continue;
+
+ if (clnt_unlock_set(sd->sd_nodes[i], cl_sk, &xep)) {
+ if (rval == 0)
+ (void) mdstealerror(ep, &xep);
+ rval = -1;
+ }
+ }
+ }
+
+ /* release signals back to what they were on entry */
+ if (procsigs(FALSE, &oldsigs, &xep) < 0)
+ mdclrerror(&xep);
+
+ cl_set_setkey(NULL);
+
+ metaflushsetname(sp);
+
+ return (rval);
+}
+
+int
+meta_set_destroy(
+ mdsetname_t *sp,
+ int lock_set,
+ md_error_t *ep
+)
+{
+ int i;
+ med_rec_t medr;
+ md_set_desc *sd;
+ md_drive_desc *dd, *p, *p1;
+ mddrivename_t *dnp;
+ mdname_t *np;
+ mdnamelist_t *nlp = NULL;
+ int num_users = 0;
+ int has_set;
+ side_t mysideno;
+ sigset_t oldsigs;
+ md_error_t xep = mdnullerror;
+ md_setkey_t *cl_sk;
+ int rval = 0;
+ int delete_end = 1;
+
+ /* Make sure we are blocking all signals */
+ if (procsigs(TRUE, &oldsigs, ep) < 0)
+ return (-1);
+
+ if ((sd = metaget_setdesc(sp, ep)) == NULL) {
+ if (! mdisok(ep))
+ rval = -1;
+ goto out;
+ }
+
+ /*
+ * meta_set_destroy should not be called for a MN diskset.
+ * This routine destroys a set without communicating this information
+ * to the other nodes which would lead to an inconsistency in
+ * the MN diskset.
+ */
+ if (MD_MNSET_DESC(sd)) {
+ rval = -1;
+ goto out;
+ }
+
+ /* Continue if a traditional diskset */
+
+ /*
+ * Check to see who has the set. If we are not the last user of the
+ * set, we will not touch the replicas.
+ */
+ for (i = 0; i < MD_MAXSIDES; i++) {
+ /* Skip empty slots */
+ if (sd->sd_nodes[i][0] == '\0')
+ continue;
+
+ has_set = nodehasset(sp, sd->sd_nodes[i], NHS_NST_EQ,
+ ep);
+
+ if (has_set < 0) {
+ mdclrerror(ep);
+ } else
+ num_users++;
+ }
+
+ if ((dd = metaget_drivedesc(sp, MD_BASICNAME_OK, ep)) == NULL) {
+ if (! mdisok(ep)) {
+ rval = -1;
+ goto out;
+ }
+ }
+
+ if (setup_db_bydd(sp, dd, TRUE, ep) == -1) {
+ rval = -1;
+ goto out;
+ }
+
+ if (lock_set == TRUE) {
+ /* Lock the set on our side */
+ if (clnt_lock_set(mynode(), sp, ep)) {
+ rval = -1;
+ goto out;
+ }
+ }
+
+ /*
+ * A traditional diskset has no diskset stale information to send
+ * since there can only be one owner node at a time.
+ */
+ if (snarf_set(sp, FALSE, ep))
+ mdclrerror(ep);
+
+ if (dd != NULL) {
+ /*
+ * Make sure that no drives are in use as parts of metadrives
+ * or hot spare pools, this is one of the few error conditions
+ * that will stop this routine, unless the environment has
+ * META_DESTROY_SET_OK set, in which case, the operation will
+ * proceed.
+ */
+ if (getenv("META_DESTROY_SET_OK") == NULL) {
+ for (p = dd; p != NULL; p = p->dd_next) {
+ dnp = p->dd_dnp;
+
+ i = meta_check_drive_inuse(sp, dnp, FALSE, ep);
+ if (i == -1) {
+ /* need xep - wire calls clear error */
+ i = metaget_setownership(sp, &xep);
+ if (i == -1) {
+ rval = -1;
+ goto out;
+ }
+
+ mysideno = getmyside(sp, &xep);
+
+ if (mysideno == MD_SIDEWILD) {
+ rval = -1;
+ goto out;
+ }
+
+ if (sd->sd_isown[mysideno] == FALSE)
+ if (halt_set(sp, &xep)) {
+ rval = -1;
+ goto out;
+ }
+
+ rval = -1;
+ goto out;
+ }
+ }
+ }
+
+ for (i = 0; i < MD_MAXSIDES; i++) {
+ /* Skip empty slots */
+ if (sd->sd_nodes[i][0] == '\0')
+ continue;
+
+ /* Skip non local nodes */
+ if (strcmp(mynode(), sd->sd_nodes[i]) != 0)
+ continue;
+
+ if (clnt_deldrvs(sd->sd_nodes[i], sp, dd, ep))
+ mdclrerror(ep);
+ }
+
+ /*
+ * Go thru each drive and individually delete the replicas.
+ * This way we can ignore individual errors.
+ */
+ for (p = dd; p != NULL; p = p->dd_next) {
+ uint_t rep_slice;
+
+ dnp = p->dd_dnp;
+ if ((meta_replicaslice(dnp, &rep_slice, ep) != 0) ||
+ (((np = metaslicename(dnp, rep_slice, ep))
+ == NULL) &&
+ ((np = metaslicename(dnp, MD_SLICE0, ep))
+ == NULL))) {
+ rval = -1;
+ goto out;
+ }
+
+ if ((np = metaslicename(dnp,
+ rep_slice, ep)) == NULL) {
+ if ((np = metaslicename(dnp,
+ MD_SLICE0, ep)) == NULL) {
+ rval = -1;
+ goto out;
+ }
+ mdclrerror(ep);
+ }
+
+ /* Yes this is UGLY!!! */
+ p1 = p->dd_next;
+ p->dd_next = NULL;
+ if (rel_own_bydd(sp, p, FALSE, ep))
+ mdclrerror(ep);
+ p->dd_next = p1;
+
+ if (p->dd_dbcnt == 0)
+ continue;
+
+ /*
+ * Skip the replica removal if we are not the last user
+ */
+ if (num_users != 1)
+ continue;
+
+ nlp = NULL;
+ (void) metanamelist_append(&nlp, np);
+ if (meta_db_detach(sp, nlp,
+ (MDFORCE_DS | MDFORCE_SET_LOCKED), NULL, ep))
+ mdclrerror(ep);
+ metafreenamelist(nlp);
+ }
+ }
+
+ if (halt_set(sp, ep)) {
+ rval = -1;
+ goto out;
+ }
+
+ /* Setup the mediator record */
+ (void) memset(&medr, '\0', sizeof (med_rec_t));
+ medr.med_rec_mag = MED_REC_MAGIC;
+ medr.med_rec_rev = MED_REC_REV;
+ medr.med_rec_fl = 0;
+ medr.med_rec_sn = sp->setno;
+ (void) strcpy(medr.med_rec_snm, sp->setname);
+ medr.med_rec_meds = sd->sd_med; /* structure assigment */
+ (void) memset(&medr.med_rec_data, '\0', sizeof (med_data_t));
+ medr.med_rec_foff = 0;
+
+ /*
+ * If we are the last remaining user, then remove the mediator hosts
+ */
+ if (num_users == 1) {
+ for (i = 0; i < MED_MAX_HOSTS; i++) {
+ if (medr.med_rec_meds.n_lst[i].a_cnt != 0)
+ SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_REMOVE,
+ SVM_TAG_MEDIATOR, sp->setno, i);
+ (void) memset(&medr.med_rec_meds.n_lst[i], '\0',
+ sizeof (md_h_t));
+ }
+ medr.med_rec_meds.n_cnt = 0;
+ } else { /* Remove this host from the mediator node list. */
+ for (i = 0; i < MD_MAXSIDES; i++) {
+ /* Skip empty slots */
+ if (sd->sd_nodes[i][0] == '\0')
+ continue;
+
+ /* Copy non local node */
+ if (strcmp(mynode(), sd->sd_nodes[i]) != 0) {
+ (void) strcpy(medr.med_rec_nodes[i],
+ sd->sd_nodes[i]);
+ continue;
+ }
+
+ /* Clear local node */
+ (void) memset(&medr.med_rec_nodes[i], '\0',
+ sizeof (md_node_nm_t));
+ }
+ }
+
+ crcgen(&medr, &medr.med_rec_cks, sizeof (med_rec_t), NULL);
+
+ /*
+ * If the client is part of a cluster put the DCS service
+ * into a deleteing state.
+ */
+ if (sdssc_delete_begin(sp->setname) == SDSSC_ERROR) {
+ if (metad_isautotakebyname(sp->setname)) {
+ delete_end = 0;
+ } else {
+ mdclrerror(ep);
+ goto out;
+ }
+ }
+
+ /* Inform the mediator hosts of the new information */
+ for (i = 0; i < MED_MAX_HOSTS; i++) {
+ if (sd->sd_med.n_lst[i].a_cnt == 0)
+ continue;
+
+ if (clnt_med_upd_rec(&sd->sd_med.n_lst[i], sp, &medr, ep))
+ mdclrerror(ep);
+ }
+
+ /* Delete the set locally */
+ for (i = 0; i < MD_MAXSIDES; i++) {
+ /* Skip empty slots */
+ if (sd->sd_nodes[i][0] == '\0')
+ continue;
+
+ /* Skip non local nodes */
+ if (strcmp(mynode(), sd->sd_nodes[i]) != 0)
+ continue;
+
+ if (clnt_delset(sd->sd_nodes[i], sp, ep) == -1)
+ mdclrerror(ep);
+ }
+ if (delete_end &&
+ sdssc_delete_end(sp->setname, SDSSC_COMMIT) == SDSSC_ERROR)
+ rval = -1;
+
+out:
+ /* release signals back to what they were on entry */
+ if (procsigs(FALSE, &oldsigs, &xep) < 0) {
+ if (rval == 0)
+ (void) mdstealerror(ep, &xep);
+ rval = -1;
+ }
+
+ if (lock_set == TRUE) {
+ cl_sk = cl_get_setkey(sp->setno, sp->setname);
+ if (clnt_unlock_set(mynode(), cl_sk, &xep)) {
+ if (rval == 0)
+ (void) mdstealerror(ep, &xep);
+ rval = -1;
+ }
+ cl_set_setkey(NULL);
+ }
+
+ metaflushsetname(sp);
+ return (rval);
+}
+
+int
+meta_set_purge(
+ mdsetname_t *sp,
+ int bypass_cluster,
+ int forceflg,
+ md_error_t *ep
+)
+{
+ char *thishost = mynode();
+ md_set_desc *sd;
+ md_setkey_t *cl_sk;
+ md_error_t xep = mdnullerror;
+ int rval = 0;
+ int i, num_hosts = 0;
+ int has_set = 0;
+ int max_node = 0;
+ int delete_end = 1;
+ md_mnnode_desc *nd;
+
+ if ((sd = metaget_setdesc(sp, ep)) == NULL) {
+ /* unable to find set description */
+ rval = 1;
+ return (rval);
+ }
+
+ if (MD_MNSET_DESC(sd)) {
+ /*
+ * Get a count of the hosts in the set and also lock the set
+ * on those hosts that know about it.
+ */
+ nd = sd->sd_nodelist;
+ while (nd) {
+ if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+ nd = nd->nd_next;
+ continue;
+ }
+ has_set = nodehasset(sp, nd->nd_nodename,
+ NHS_NST_EQ, ep);
+
+ /*
+ * The host is not aware of this set (has_set < 0) or
+ * the set does not match (has_set == 0). This check
+ * prevents the code getting confused by an apparent
+ * inconsistancy in the set's state, this is in the
+ * purge code so something is broken in any case and
+ * this is just trying to fix the brokeness.
+ */
+ if (has_set <= 0) {
+ mdclrerror(ep);
+ nd->nd_flags |= MD_MN_NODE_NOSET;
+ } else {
+ num_hosts++;
+ if (clnt_lock_set(nd->nd_nodename, sp, ep)) {
+ /*
+ * If the force flag is set then
+ * ignore any RPC failures because we
+ * are only really interested with
+ * the set on local node.
+ */
+ if (forceflg && mdanyrpcerror(ep)) {
+ mdclrerror(ep);
+ } else {
+ /*
+ * set max_node so that in the
+ * unlock code nodes in the
+ * set that have not been
+ * locked are not unlocked.
+ */
+ max_node = nd->nd_nodeid;
+ rval = 2;
+ goto out1;
+ }
+ }
+
+ }
+ nd = nd->nd_next;
+ }
+ max_node = 0;
+ } else {
+ /*
+ * Get a count of the hosts in the set and also lock the set
+ * on those hosts that know about it.
+ */
+ for (i = 0; i < MD_MAXSIDES; i++) {
+ /* Skip empty slots */
+ if (sd->sd_nodes[i][0] == '\0')
+ continue;
+
+ has_set = nodehasset(sp, sd->sd_nodes[i],
+ NHS_NST_EQ, ep);
+
+ /*
+ * The host is not aware of this set (has_set < 0) or
+ * the set does not match (has_set == 0). This check
+ * prevents the code getting confused by an apparent
+ * inconsistancy in the set's state, this is in the
+ * purge code so something is broken in any case and
+ * this is just trying to fix the brokeness.
+ */
+ if (has_set <= 0) {
+ mdclrerror(ep);
+ /*
+ * set the node to NULL to prevent further
+ * requests to this unresponsive node.
+ */
+ sd->sd_nodes[i][0] = '\0';
+ } else {
+ num_hosts++;
+ if (clnt_lock_set(sd->sd_nodes[i], sp, ep)) {
+ /*
+ * If the force flag is set then
+ * ignore any RPC failures because we
+ * are only really interested with
+ * the set on local node.
+ */
+ if (forceflg && mdanyrpcerror(ep)) {
+ mdclrerror(ep);
+ } else {
+ rval = 2;
+ /*
+ * set max_node so that in the
+ * unlock code nodes in the
+ * set that have not been
+ * locked are not unlocked.
+ */
+ max_node = i;
+ goto out1;
+ }
+ }
+ }
+ }
+ max_node = i; /* now MD_MAXSIDES */
+ }
+ if (!bypass_cluster) {
+ /*
+ * If there is only one host associated with the
+ * set then remove the set from the cluster.
+ */
+ if (num_hosts == 1) {
+ if (sdssc_delete_begin(sp->setname) == SDSSC_ERROR) {
+ if (metad_isautotakebyname(sp->setname)) {
+ delete_end = 0;
+ } else {
+ mdclrerror(ep);
+ rval = 3;
+ goto out1;
+ }
+ }
+ }
+ }
+
+ if (MD_MNSET_DESC(sd)) {
+ /*
+ * Get a count of the hosts in the set and also lock the set
+ * on those hosts that know about it.
+ */
+ nd = sd->sd_nodelist;
+ while (nd) {
+ if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+ nd = nd->nd_next;
+ continue;
+ }
+ if (nd->nd_nodeid != sd->sd_mn_mynode->nd_nodeid) {
+ /*
+ * Tell the remote node to remove this node
+ */
+ if (clnt_delhosts(nd->nd_nodename, sp, 1,
+ &thishost, ep) == -1) {
+ /*
+ * If we fail to delete ourselves
+ * from the remote host it does not
+ * really matter because the set is
+ * being "purged" from this node. The
+ * set can be purged from the other
+ * node at a later time.
+ */
+ mdclrerror(ep);
+ }
+ nd = nd->nd_next;
+ continue;
+ }
+ /* remove the set from this host */
+ if (clnt_delset(nd->nd_nodename, sp, ep) == -1) {
+ md_perror(dgettext(TEXT_DOMAIN, "delset"));
+ if (!bypass_cluster && num_hosts == 1)
+ (void) sdssc_delete_end(sp->setname,
+ SDSSC_CLEANUP);
+ mdclrerror(ep);
+ goto out1;
+ }
+ nd = nd->nd_next;
+ }
+ } else {
+ for (i = 0; i < MD_MAXSIDES; i++) {
+ /* Skip empty slots */
+ if (sd->sd_nodes[i][0] == '\0')
+ continue;
+ if (strcmp(thishost, sd->sd_nodes[i]) != 0) {
+ /*
+ * Tell the remote node to remove this node
+ */
+ if (clnt_delhosts(sd->sd_nodes[i], sp, 1,
+ &thishost, ep) == -1) {
+ /*
+ * If we fail to delete ourselves
+ * from the remote host it does not
+ * really matter because the set is
+ * being "purged" from this node. The
+ * set can be purged from the other
+ * node at a later time.
+ */
+ mdclrerror(ep);
+ }
+ continue;
+ }
+
+ /* remove the set from this host */
+ if (clnt_delset(sd->sd_nodes[i], sp, ep) == -1) {
+ md_perror(dgettext(TEXT_DOMAIN, "delset"));
+ if (!bypass_cluster && num_hosts == 1)
+ (void) sdssc_delete_end(sp->setname,
+ SDSSC_CLEANUP);
+ mdclrerror(ep);
+ goto out1;
+ }
+ }
+ }
+
+ if (!bypass_cluster && num_hosts == 1) {
+ if (delete_end && sdssc_delete_end(sp->setname, SDSSC_COMMIT) ==
+ SDSSC_ERROR) {
+ rval = 4;
+ }
+ }
+
+out1:
+
+ cl_sk = cl_get_setkey(sp->setno, sp->setname);
+
+ /*
+ * Remove the set lock on those nodes that had the set locked
+ * max_node will either be MD_MAXSIDES or array index of the last
+ * node contacted (or rather failed to contact) for traditional
+ * diskset. For a MN diskset, max_node is the node_id of the node
+ * that failed the lock.
+ */
+ if (MD_MNSET_DESC(sd)) {
+ nd = sd->sd_nodelist;
+ while (nd) {
+ if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+ nd = nd->nd_next;
+ continue;
+ }
+ if (nd->nd_nodeid == max_node)
+ break;
+ if (clnt_unlock_set(nd->nd_nodename, cl_sk, &xep)) {
+ if (forceflg && mdanyrpcerror(&xep)) {
+ mdclrerror(&xep);
+ nd = nd->nd_next;
+ continue;
+ }
+ if (rval == 0)
+ (void) mdstealerror(ep, &xep);
+ rval = 5;
+ }
+ nd = nd->nd_next;
+ }
+ } else {
+ for (i = 0; i < max_node; i++) {
+ /* Skip empty slots */
+ if (sd->sd_nodes[i][0] == '\0')
+ continue;
+
+ if (clnt_unlock_set(sd->sd_nodes[i], cl_sk, &xep)) {
+ if (forceflg && mdanyrpcerror(&xep)) {
+ mdclrerror(&xep);
+ continue;
+ }
+ if (rval == 0)
+ (void) mdstealerror(ep, &xep);
+ rval = 5;
+ }
+ }
+ }
+
+ cl_set_setkey(NULL);
+
+ return (rval);
+}
+
+int
+meta_set_query(
+ mdsetname_t *sp,
+ mddb_dtag_lst_t **dtlpp,
+ md_error_t *ep
+)
+{
+ mddb_dtag_get_parm_t dtgp;
+
+ (void) memset(&dtgp, '\0', sizeof (mddb_dtag_get_parm_t));
+ dtgp.dtgp_setno = sp->setno;
+
+ /*CONSTCOND*/
+ while (1) {
+ if (metaioctl(MD_MED_GET_TAG, &dtgp, &dtgp.dtgp_mde, NULL) != 0)
+ if (! mdismddberror(&dtgp.dtgp_mde, MDE_DB_NOTAG) ||
+ *dtlpp == NULL)
+ return (mdstealerror(ep, &dtgp.dtgp_mde));
+ else
+ break;
+
+ /*
+ * Run to the end of the list
+ */
+ for (/* void */; (*dtlpp != NULL); dtlpp = &(*dtlpp)->dtl_nx)
+ /* void */;
+
+ *dtlpp = Zalloc(sizeof (mddb_dtag_lst_t));
+
+ (void) memmove(&(*dtlpp)->dtl_dt, &dtgp.dtgp_dt,
+ sizeof (mddb_dtag_t));
+
+ dtgp.dtgp_dt.dt_id++;
+ }
+ return (0);
+}
+
+/*
+ * return drivename get by key
+ */
+mddrivename_t *
+metadrivename_withdrkey(
+ mdsetname_t *sp,
+ side_t sideno,
+ mdkey_t key,
+ int flags,
+ md_error_t *ep
+)
+{
+ char *nm;
+ mdname_t *np;
+ mddrivename_t *dnp;
+ ddi_devid_t devidp;
+ md_set_desc *sd;
+
+ if ((sd = metaget_setdesc(sp, ep)) == NULL) {
+ return (NULL);
+ }
+
+ /* get namespace info */
+ if (MD_MNSET_DESC(sd)) {
+ if ((nm = meta_getnmbykey(MD_LOCAL_SET, sideno,
+ key, ep)) == NULL)
+ return (NULL);
+ } else {
+ if ((nm = meta_getnmbykey(MD_LOCAL_SET, sideno+SKEW,
+ key, ep)) == NULL)
+ return (NULL);
+ }
+
+ /* get device name */
+ if (flags & PRINT_FAST) {
+ if ((np = metaname_fast(&sp, nm, ep)) == NULL) {
+ Free(nm);
+ return (NULL);
+ }
+ } else {
+ if ((np = metaname(&sp, nm, ep)) == NULL) {
+ Free(nm);
+ return (NULL);
+ }
+ }
+ Free(nm);
+
+ /* make sure it's OK */
+ if ((! (flags & MD_BASICNAME_OK)) && (metachkcomp(np, ep) != 0))
+ return (NULL);
+
+ /* get drivename */
+ dnp = np->drivenamep;
+ dnp->side_names_key = key;
+
+ /*
+ * Skip the following devid check if dnp is did device
+ * The device id is disabled for did device due to the
+ * lack of minor name support in the did driver. The following
+ * devid code path can set and propagate the error and
+ * eventually prevent did disks from being added to the
+ * diskset under SunCluster systems
+ */
+ if (strncmp(dnp->rname, "/dev/did/", strlen("/dev/did/")) == 0) {
+ goto out;
+ }
+
+ /* Also, Skip the check if MN diskset, no devid's */
+ if (MD_MNSET_DESC(sd)) {
+ goto out;
+ }
+
+ /*
+ * Get the devid associated with the key.
+ *
+ * If a devid was returned, it MUST be valid even in
+ * the case where a device id has been "updated". The
+ * "update" of the device id may have occured due to
+ * a firmware upgrade.
+ */
+ if ((devidp = meta_getdidbykey(MD_LOCAL_SET, sideno+SKEW, key, ep))
+ != NULL) {
+ dnp->devid = devid_str_encode(devidp, NULL);
+ free(devidp);
+ } else {
+ /*
+ * It is okay if replica is not in devid mode
+ */
+ if (mdissyserror(ep, MDDB_F_NODEVID)) {
+ mdclrerror(ep);
+ goto out;
+ }
+
+ /*
+ * devid is missing so this means that we have
+ * just upgraded from a configuration where
+ * devid's were not used so try to add in
+ * the devid and requery.
+ */
+ if (meta_setdid(MD_LOCAL_SET, sideno + SKEW, key,
+ ep) < 0)
+ return (NULL);
+ if ((devidp = (ddi_devid_t)meta_getdidbykey(MD_LOCAL_SET,
+ sideno+SKEW, key, ep)) == NULL)
+ return (NULL);
+ dnp->devid = devid_str_encode(devidp, NULL);
+ devid_free(devidp);
+ }
+
+out:
+ if (flags & MD_BYPASS_DAEMON)
+ return (dnp);
+
+ if (get_sidenmlist(sp, dnp, ep))
+ return (NULL);
+
+ /* return success */
+ return (dnp);
+}
+
+void
+metafreedrivedesc(md_drive_desc **dd)
+{
+ md_drive_desc *p, *next = NULL;
+
+ for (p = *dd; p != NULL; p = next) {
+ next = p->dd_next;
+ Free(p);
+ }
+ *dd = NULL;
+}
+
+md_drive_desc *
+metaget_drivedesc(
+ mdsetname_t *sp,
+ int flags,
+ md_error_t *ep
+)
+{
+ side_t sideno = MD_SIDEWILD;
+
+ assert(! (flags & MD_BYPASS_DAEMON));
+
+ if ((sideno = getmyside(sp, ep)) == MD_SIDEWILD)
+ return (NULL);
+
+ return (metaget_drivedesc_sideno(sp, sideno, flags, ep));
+}
+
+md_drive_desc *
+metaget_drivedesc_fromnamelist(
+ mdsetname_t *sp,
+ mdnamelist_t *nlp,
+ md_error_t *ep
+)
+{
+ md_set_desc *sd;
+ mdnamelist_t *p;
+ md_drive_desc *dd = NULL;
+
+ if ((sd = metaget_setdesc(sp, ep)) == NULL)
+ return (NULL);
+
+ for (p = nlp; p != NULL; p = p->next)
+ (void) metadrivedesc_append(&dd, p->namep->drivenamep, 0, 0,
+ sd->sd_ctime, sd->sd_genid, MD_DR_ADD);
+
+ return (dd);
+}
+
+md_drive_desc *
+metaget_drivedesc_sideno(
+ mdsetname_t *sp,
+ side_t sideno,
+ int flags,
+ md_error_t *ep
+)
+{
+ md_set_desc *sd = NULL;
+
+ assert(! (flags & MD_BYPASS_DAEMON));
+
+ if ((sd = metaget_setdesc(sp, ep)) == NULL)
+ return (NULL);
+
+ if (sd->sd_drvs)
+ return (sd->sd_drvs);
+
+ if ((sd->sd_drvs = dr2drivedesc(sp, sideno, flags, ep)) == NULL)
+ return (NULL);
+
+ return (sd->sd_drvs);
+}
+
+int
+metaget_setownership(
+ mdsetname_t *sp,
+ md_error_t *ep
+)
+{
+ md_set_desc *sd;
+ int bool;
+ int i;
+ md_mnnode_desc *nd;
+
+ if ((sd = metaget_setdesc(sp, ep)) == NULL)
+ return (-1);
+
+ if (MD_MNSET_DESC(sd)) {
+ nd = sd->sd_nodelist;
+ while (nd) {
+ /* If node isn't alive, can't own diskset */
+ if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+ nd->nd_flags &= ~MD_MN_NODE_OWN;
+ nd = nd->nd_next;
+ continue;
+ }
+ /*
+ * If can't communicate with rpc.metad, then mark
+ * this node as not an owner. That node may
+ * in fact, be an owner, but without rpc.metad running
+ * that node can't do much.
+ */
+ if (clnt_ownset(nd->nd_nodename, sp, &bool, ep) == -1) {
+ nd->nd_flags &= ~MD_MN_NODE_OWN;
+ } else if (bool == TRUE) {
+ nd->nd_flags |= MD_MN_NODE_OWN;
+ } else {
+ nd->nd_flags &= ~MD_MN_NODE_OWN;
+ }
+ nd = nd->nd_next;
+ }
+ return (0);
+ }
+
+ /* Rest of code handles traditional disksets */
+
+ for (i = 0; i < MD_MAXSIDES; i++)
+ sd->sd_isown[i] = 0;
+
+ if (clnt_ownset(mynode(), sp, &bool, ep) == -1)
+ return (-1);
+
+ if (bool == TRUE)
+ sd->sd_isown[getmyside(sp, ep)] = 1;
+
+ return (0);
+}
+
+char *
+mynode(void)
+{
+ static struct utsname myuname;
+ static int done = 0;
+
+ if (! done) {
+ if (uname(&myuname) == -1) {
+ md_perror(dgettext(TEXT_DOMAIN, "uname"));
+ assert(0);
+ }
+ done = 1;
+ }
+ return (myuname.nodename);
+}
+
+int
+strinlst(char *str, int cnt, char **lst)
+{
+ int i;
+
+ for (i = 0; i < cnt; i++)
+ if (strcmp(lst[i], str) == 0)
+ return (TRUE);
+
+ return (FALSE);
+}
+
+/*
+ * meta_get_reserved_names
+ * returns an mdnamelist_t of reserved slices
+ * reserved slices are those that are used but don't necessarily
+ * show up as metadevices (ex. reserved slice for db in sets, logs)
+ */
+
+/*ARGSUSED*/
+int
+meta_get_reserved_names(
+ mdsetname_t *sp,
+ mdnamelist_t **nlpp,
+ int options,
+ md_error_t *ep)
+{
+ int count = 0;
+ mdname_t *np = NULL;
+ mdnamelist_t *transnlp = NULL;
+ mdnamelist_t **tailpp = nlpp;
+ mdnamelist_t *nlp;
+ md_drive_desc *dd, *di;
+
+ if (metaislocalset(sp))
+ goto out;
+
+ if (!(dd = metaget_drivedesc(sp, MD_BASICNAME_OK, ep)) && !mdisok(ep)) {
+ count = -1;
+ goto out;
+ }
+
+ /* db in for sets on reserved slice */
+ for (di = dd; di && count >= 0; di = di->dd_next) {
+ uint_t rep_slice;
+
+ /*
+ * Add the name struct to the end of the
+ * namelist but keep a pointer to the last
+ * element so that we don't incur the overhead
+ * of traversing the list each time
+ */
+ if (di->dd_dnp &&
+ (meta_replicaslice(di->dd_dnp, &rep_slice, ep) == 0) &&
+ (np = metaslicename(di->dd_dnp, rep_slice, ep)) &&
+ (tailpp = meta_namelist_append_wrapper(tailpp, np)))
+ count++;
+ else
+ count = -1;
+ }
+
+ /* now find logs */
+ if (meta_get_trans_names(sp, &transnlp, 0, ep) < 0) {
+ count = -1;
+ goto out;
+ }
+
+ for (nlp = transnlp; (nlp != NULL); nlp = nlp->next) {
+ mdname_t *transnp = nlp->namep;
+ md_trans_t *transp;
+
+ if ((transp = meta_get_trans(sp, transnp, ep)) == NULL) {
+ count = -1;
+ goto out;
+ }
+ if (transp->lognamep) {
+ /*
+ * Add the name struct to the end of the
+ * namelist but keep a pointer to the last
+ * element so that we don't incur the overhead
+ * of traversing the list each time
+ */
+ tailpp = meta_namelist_append_wrapper(
+ tailpp, transp->lognamep);
+ }
+ }
+out:
+ metafreenamelist(transnlp);
+ return (count);
+}
+
+/*
+ * Entry point to join a node to MultiNode diskset.
+ *
+ * Validate host in diskset.
+ * - Should be in membership list from API
+ * - Should not already be joined into diskset.
+ * - Set must have drives
+ * Assume valid configuration is stored in the set/drive/node records
+ * in the local mddb since no node or drive can be added to the MNset
+ * unless all drives and nodes are available. Reconfig steps will
+ * resync all ALIVE nodes in case of panic in critical areas.
+ *
+ * Lock down the set.
+ * Verify host is a member of this diskset.
+ * If drives exist in the configuration, load the mddbs.
+ * Set this node to active by notifying master if one exists.
+ * If this is the first node active in the diskset, this node
+ * becomes the master.
+ * Unlock the set.
+ *
+ * Mirror Resync:
+ * If this node is the last node to join the set and clustering
+ * isn't running, then start the 'metasync -r' type resync
+ * on all mirrors in this diskset.
+ * If clustering is running, this resync operation will
+ * be handled by the reconfig steps and should NOT
+ * be handled during a join operation.
+ *
+ * There are multiple return values in order to assist
+ * the join operation of all sets in the metaset command.
+ *
+ * Return values:
+ * 0 - Node successfully joined to set.
+ * -1 - Join attempted but failed
+ * - any failure from libmeta calls
+ * - node not in the member list
+ * -2 - Join not attempted since
+ * - this set had no drives in set
+ * - this node already joined to set
+ * - set is not a multinode set
+ * -3 - Node joined to STALE set.
+ */
+extern int
+meta_set_join(
+ mdsetname_t *sp,
+ md_error_t *ep
+)
+{
+ md_set_desc *sd;
+ md_drive_desc *dd;
+ md_mnnode_desc *nd, *nd2, my_nd;
+ int rval = 0;
+ md_setkey_t *cl_sk;
+ md_error_t xep = mdnullerror;
+ md_error_t ep_snarf = mdnullerror;
+ int master_flag = 0;
+ md_mnset_record *mas_mnsr = NULL;
+ int clear_nr_flags = 0;
+ md_mnnode_record *nr;
+ int stale_set = 0;
+ int rb_flags = 0;
+ int stale_bool = FALSE;
+ int suspendall_flag = 0;
+ int suspend1_flag = 0;
+ sigset_t oldsigs;
+ int send_reinit = 0;
+
+ if ((sd = metaget_setdesc(sp, ep)) == NULL) {
+ return (-1);
+ }
+
+ /* Must be a multinode diskset */
+ if (!MD_MNSET_DESC(sd)) {
+ (void) mderror(ep, MDE_NOT_MN, sp->setname);
+ return (-2);
+ }
+
+ /* Verify that the node is ALIVE (i.e. is in the API membership list) */
+ if (!(sd->sd_mn_mynode->nd_flags & MD_MN_NODE_ALIVE)) {
+ (void) mddserror(ep, MDE_DS_NOTINMEMBERLIST, sp->setno,
+ sd->sd_mn_mynode->nd_nodename, NULL,
+ sp->setname);
+ return (-1);
+ }
+
+ /* Make sure we are blocking all signals */
+ if (procsigs(TRUE, &oldsigs, &xep) < 0)
+ mdclrerror(&xep);
+
+ /*
+ * Lock the set on current set members.
+ * For MN diskset lock_set and SUSPEND are used to protect against
+ * other meta* commands running on the other nodes.
+ */
+ nd = sd->sd_nodelist;
+ while (nd) {
+ if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+ nd = nd->nd_next;
+ continue;
+ }
+ if (clnt_lock_set(nd->nd_nodename, sp, ep)) {
+ rval = -1;
+ goto out;
+ }
+ nd = nd->nd_next;
+ }
+
+ /*
+ * Lock out other meta* commands by suspending
+ * class 1 messages across the diskset.
+ */
+ nd = sd->sd_nodelist;
+ while (nd) {
+ if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+ nd = nd->nd_next;
+ continue;
+ }
+ if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_SUSPEND,
+ sp, MD_MSG_CLASS1, MD_MSCF_NO_FLAGS, ep)) {
+ rval = -1;
+ goto out;
+ }
+ suspend1_flag = 1;
+ nd = nd->nd_next;
+ }
+
+ /*
+ * Verify that this host is a member (in the host list) of the set.
+ */
+ nd = sd->sd_nodelist;
+ while (nd) {
+ if (strcmp(mynode(), nd->nd_nodename) == 0) {
+ break;
+ }
+ nd = nd->nd_next;
+ }
+ if (!nd) {
+ (void) mddserror(ep, MDE_DS_NODENOTINSET, sp->setno,
+ sd->sd_mn_mynode->nd_nodename, NULL,
+ sp->setname);
+ rval = -1;
+ goto out;
+ }
+
+ /*
+ * Need to return failure if host is already 'joined'
+ * into the set. This is done so that if later the user
+ * issues a command to join all sets and a failure is
+ * encountered - that the resulting cleanup effort
+ * (withdrawing from all sets that were joined
+ * during that command) won't withdraw from this set.
+ */
+ if (nd->nd_flags & MD_MN_NODE_OWN) {
+ rval = -2;
+ goto out2;
+ }
+
+ /*
+ * Call metaget_setownership that calls each node in diskset and
+ * marks in set descriptor if node is an owner of the set or not.
+ * metaget_setownership checks to see if a node is an owner by
+ * checking to see if that node's kernel has the mddb loaded.
+ * If a node had panic'd during a reconfig or an
+ * add/delete/join/withdraw operation, the other nodes' node
+ * records may not reflect the current state of the diskset,
+ * so calling metaget_setownership is the safest thing to do.
+ */
+ if (metaget_setownership(sp, ep) == -1) {
+ rval = -1;
+ goto out;
+ }
+
+ /* If first active member of diskset, become the master. */
+ nd = sd->sd_nodelist;
+ while (nd) {
+ if (nd->nd_flags & MD_MN_NODE_OWN)
+ break;
+ nd = nd->nd_next;
+ }
+ if (nd == NULL)
+ master_flag = 1;
+
+ /*
+ * If not first active member of diskset, then get the
+ * master information from a node that is already joined
+ * and set the master information for this node. Be sure
+ * that this node (the already joined node) has its own
+ * join flag set. If not, then this diskset isn't currently
+ * consistent and shouldn't allow a node to join. This diskset
+ * inconsistency should only occur when a node has panic'd in
+ * the set while doing a metaset operation and the sysadmin is
+ * attempting to join a node into the set. This inconsistency
+ * will be fixed during a reconfig cycle which should be occurring
+ * soon since a node panic'd.
+ *
+ * If unable to get this information from an owning node, then
+ * this diskset isn't currently consistent and shouldn't
+ * allow a node to join.
+ */
+ if (!master_flag) {
+ /* get master information from an owner (joined) node */
+ if (clnt_mngetset(nd->nd_nodename, sp->setname,
+ sp->setno, &mas_mnsr, ep) == -1) {
+ rval = -1;
+ goto out;
+ }
+
+ /* Verify that owner (joined) node has its own JOIN flag set */
+ nr = mas_mnsr->sr_nodechain;
+ while (nr) {
+ if ((nd->nd_nodeid == nr->nr_nodeid) &&
+ ((nr->nr_flags & MD_MN_NODE_OWN) == NULL)) {
+ (void) mddserror(ep, MDE_DS_NODENOSET,
+ sp->setno, nd->nd_nodename, NULL,
+ nd->nd_nodename);
+ free_sr((md_set_record *)mas_mnsr);
+ rval = -1;
+ goto out;
+ }
+ nr = nr->nr_next;
+ }
+
+ /*
+ * Does master have set marked as STALE?
+ * If so, need to pass this down to kernel when
+ * this node snarfs the set.
+ */
+ if (clnt_mn_is_stale(nd->nd_nodename, sp,
+ &stale_bool, ep) == -1) {
+ rval = -1;
+ goto out;
+ }
+
+ /* set master information in my rpc.metad's set record */
+ if (clnt_mnsetmaster(mynode(), sp, mas_mnsr->sr_master_nodenm,
+ mas_mnsr->sr_master_nodeid, ep)) {
+ free_sr((md_set_record *)mas_mnsr);
+ rval = -1;
+ goto out;
+ }
+
+ /* set master information in my cached set desc */
+ (void) strcpy(sd->sd_mn_master_nodenm,
+ mas_mnsr->sr_master_nodenm);
+ sd->sd_mn_master_nodeid = mas_mnsr->sr_master_nodeid;
+ nd2 = sd->sd_nodelist;
+ while (nd2) {
+ if (nd2->nd_nodeid == mas_mnsr->sr_master_nodeid) {
+ sd->sd_mn_masternode = nd2;
+ break;
+ }
+ nd2 = nd2->nd_next;
+ }
+ free_sr((md_set_record *)mas_mnsr);
+
+ /*
+ * Set the node flags in mynode's rpc.metad node records for
+ * the nodes that are in the diskset. Can use my sd
+ * since earlier call to metaget_setownership set the
+ * owner flags based on whether that node had snarfed
+ * the MN diskset mddb. Reconfig steps guarantee that
+ * return of metaget_setownership will match the owning
+ * node's owner list except in the case where a node
+ * has just panic'd and in this case, a reconfig will
+ * be starting immediately and the owner lists will
+ * be sync'd up by the reconfig.
+ *
+ * Flag of SET means to take no action except to
+ * set the node flags as given in the nodelist linked list.
+ */
+ if (clnt_upd_nr_flags(mynode(), sp, sd->sd_nodelist,
+ MD_NR_SET, NULL, ep)) {
+ rval = -1;
+ goto out;
+ }
+ }
+
+ /*
+ * Read in the mddb if there are drives in the set.
+ */
+ if ((dd = metaget_drivedesc(sp, (MD_BASICNAME_OK | PRINT_FAST),
+ ep)) == NULL) {
+ /* No drives in list */
+ if (! mdisok(ep)) {
+ rval = -1;
+ goto out;
+ }
+ rval = -2;
+ goto out;
+ }
+
+ /*
+ * Notify rpc.mdcommd on all nodes of a nodelist change.
+ * Start by suspending rpc.mdcommd (which drains it of all messages),
+ * then change the nodelist followed by a reinit and resume.
+ */
+ nd = sd->sd_nodelist;
+ while (nd) {
+ if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+ nd = nd->nd_next;
+ continue;
+ }
+
+ if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_SUSPEND, sp,
+ MD_MSG_CLASS0, MD_MSCF_NO_FLAGS, ep)) {
+ rval = -1;
+ goto out;
+ }
+ suspendall_flag = 1;
+ nd = nd->nd_next;
+ }
+
+ /* Set master in my set record in rpc.metad */
+ if (master_flag) {
+ if (clnt_mnsetmaster(mynode(), sp,
+ sd->sd_mn_mynode->nd_nodename,
+ sd->sd_mn_mynode->nd_nodeid, ep)) {
+ rval = -1;
+ goto out;
+ }
+ }
+ /* Causes mddbs to be loaded in kernel */
+ if (setup_db_bydd(sp, dd, 0, ep) == -1) {
+ mde_perror(ep, dgettext(TEXT_DOMAIN,
+ "Host not able to start diskset."));
+ rval = -1;
+ goto out;
+ }
+
+ if (! mdisok(ep)) {
+ rval = -1;
+ goto out;
+ }
+
+ /*
+ * Set rollback flags to 1 so that halt_set is called if a failure
+ * is seen after this point. If snarf_set fails, still need to
+ * call halt_set to cleanup the diskset.
+ */
+ rb_flags = 1;
+
+ /* Starts the set */
+ if (snarf_set(sp, stale_bool, ep) != 0) {
+ if (mdismddberror(ep, MDE_DB_STALE)) {
+ /*
+ * Don't fail join, STALE means that set has
+ * < 50% mddbs.
+ */
+ (void) mdstealerror(&ep_snarf, ep);
+ stale_set = 1;
+ } else if (mdisok(ep)) {
+ /* If snarf failed, but no error was set - set it */
+ (void) mdmddberror(ep, MDE_DB_NOTNOW, NODEV64,
+ sp->setno, 0, NULL);
+ rval = -1;
+ goto out;
+ } else if (!(mdismddberror(ep, MDE_DB_ACCOK))) {
+ /*
+ * Don't fail join if ACCOK; ACCOK means that mediator
+ * provided extra vote.
+ */
+ rval = -1;
+ goto out;
+ }
+ }
+
+ /* Did set really get snarfed? */
+ if (own_set(sp, NULL, TRUE, ep) == MD_SETOWNER_NO) {
+ if (mdisok(ep)) {
+ /* If snarf failed, but no error was set - set it */
+ (void) mdmddberror(ep, MDE_DB_NOTNOW, NODEV64,
+ sp->setno, 0, NULL);
+ }
+ mde_perror(ep, dgettext(TEXT_DOMAIN,
+ "Host not able to start diskset."));
+ rval = -1;
+ goto out;
+ }
+
+ /* Change to nodelist so need to send reinit to rpc.mdcommd */
+ send_reinit = 1;
+
+ /* If first node to enter set, setup master and clear change log */
+ if (master_flag) {
+ /* Set master in my locally cached set descriptor */
+ (void) strcpy(sd->sd_mn_master_nodenm,
+ sd->sd_mn_mynode->nd_nodename);
+ sd->sd_mn_master_nodeid = sd->sd_mn_mynode->nd_nodeid;
+ sd->sd_mn_am_i_master = 1;
+
+ /*
+ * If first node to join set, then clear out change log
+ * entries. Change log entries are only needed when a
+ * change of master is occurring in a diskset that has
+ * multiple owners. Since this node is the first owner
+ * of the diskset, clear the entries.
+ *
+ * Only do this if we are in a single node non-SC3.x
+ * situation.
+ */
+ if (meta_mn_singlenode() &&
+ mdmn_reset_changelog(sp, ep, MDMN_CLF_RESETLOG) != 0) {
+ mde_perror(ep, dgettext(TEXT_DOMAIN,
+ "Unable to reset changelog."));
+ rval = -1;
+ goto out;
+ }
+ }
+
+ /* Set my locally cached flag */
+ sd->sd_mn_mynode->nd_flags |= MD_MN_NODE_OWN;
+
+ /*
+ * Set this node's own flag on all joined nodes in the set
+ * (including my node).
+ */
+ clear_nr_flags = 1;
+
+ my_nd = *(sd->sd_mn_mynode);
+ my_nd.nd_next = NULL;
+ nd = sd->sd_nodelist;
+ while (nd) {
+ if (!(nd->nd_flags & MD_MN_NODE_OWN)) {
+ nd = nd->nd_next;
+ continue;
+ }
+ if (clnt_upd_nr_flags(nd->nd_nodename, sp, &my_nd,
+ MD_NR_JOIN, NULL, ep)) {
+ rval = -1;
+ goto out;
+ }
+ nd = nd->nd_next;
+ }
+
+out:
+ if (rval != NULL) {
+ /*
+ * If rollback flag is 1, then node was joined to set.
+ * Since an error occurred, withdraw node from set in
+ * order to rollback to before command was run.
+ * Need to preserve ep so that calling function can
+ * get error information.
+ */
+ if (rb_flags == 1) {
+ if (halt_set(sp, &xep)) {
+ mdclrerror(&xep);
+ }
+ }
+
+ /*
+ * If error, reset master to INVALID.
+ * Ignore error since (next) first node to successfully join
+ * will set master on all nodes.
+ */
+ (void) clnt_mnsetmaster(mynode(), sp, "",
+ MD_MN_INVALID_NID, &xep);
+ mdclrerror(&xep);
+ /* Reset master in my locally cached set descriptor */
+ sd->sd_mn_master_nodeid = MD_MN_INVALID_NID;
+ sd->sd_mn_am_i_master = 0;
+
+ /*
+ * If nr flags set on other nodes, reset them.
+ */
+ if (clear_nr_flags) {
+ nd = sd->sd_nodelist;
+ while (nd) {
+ if (!(nd->nd_flags & MD_MN_NODE_OWN)) {
+ nd = nd->nd_next;
+ continue;
+ }
+ (void) clnt_upd_nr_flags(nd->nd_nodename, sp,
+ &my_nd, MD_NR_WITHDRAW, NULL, &xep);
+ mdclrerror(&xep);
+ nd = nd->nd_next;
+ }
+ /* Reset my locally cached flag */
+ sd->sd_mn_mynode->nd_flags &= ~MD_MN_NODE_OWN;
+ }
+ }
+
+ /*
+ * Notify rpc.mdcommd on all nodes of a nodelist change.
+ * Send reinit command to mdcommd which forces it to get
+ * fresh set description.
+ */
+ if (send_reinit) {
+ /* Send reinit */
+ nd = sd->sd_nodelist;
+ while (nd) {
+ if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+ nd = nd->nd_next;
+ continue;
+ }
+
+ /* Class is ignored for REINIT */
+ if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_REINIT,
+ sp, NULL, MD_MSCF_NO_FLAGS, &xep)) {
+ /*
+ * We are here because we failed to resume
+ * rpc.mdcommd. However we potentially have
+ * an error from the previous call
+ * If the previous call did fail, we capture
+ * that error and generate a perror with
+ * the string, "Unable to resume...".
+ * Setting rval to -1 ensures that in the
+ * next iteration of the loop, ep is not
+ * clobbered.
+ */
+ if (rval == 0)
+ (void) mdstealerror(ep, &xep);
+ else
+ mdclrerror(&xep);
+ rval = -1;
+ mde_perror(ep, dgettext(TEXT_DOMAIN,
+ "Unable to reinit rpc.mdcommd."));
+ }
+ nd = nd->nd_next;
+ }
+
+ }
+
+out2:
+ /*
+ * Unlock diskset by resuming messages across the diskset.
+ * Just resume all classes so that resume is the same whether
+ * just one class was locked or all classes were locked.
+ */
+ if ((suspend1_flag) || (suspendall_flag)) {
+ nd = sd->sd_nodelist;
+ while (nd) {
+ if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+ nd = nd->nd_next;
+ continue;
+ }
+ if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_RESUME,
+ sp, MD_MSG_CLASS0, MD_MSCF_NO_FLAGS, &xep)) {
+ /*
+ * We are here because we failed to resume
+ * rpc.mdcommd. However we potentially have
+ * an error from the previous call
+ * If the previous call did fail, we capture
+ * that error and generate a perror with
+ * the string, "Unable to resume...".
+ * Setting rval to -1 ensures that in the
+ * next iteration of the loop, ep is not
+ * clobbered.
+ */
+ if (rval == 0)
+ (void) mdstealerror(ep, &xep);
+ else
+ mdclrerror(&xep);
+ rval = -1;
+ mde_perror(ep, dgettext(TEXT_DOMAIN,
+ "Unable to resume rpc.mdcommd."));
+ }
+ nd = nd->nd_next;
+ }
+ meta_ping_mnset(sp->setno);
+ }
+
+ /*
+ * Unlock set. This flushes the caches on the servers.
+ */
+ cl_sk = cl_get_setkey(sp->setno, sp->setname);
+ nd = sd->sd_nodelist;
+ while (nd) {
+ if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+ nd = nd->nd_next;
+ continue;
+ }
+ if (clnt_unlock_set(nd->nd_nodename, cl_sk, &xep)) {
+ if (rval == 0)
+ (void) mdstealerror(ep, &xep);
+ else
+ mdclrerror(&xep);
+ rval = -1;
+ }
+ nd = nd->nd_next;
+ }
+
+ /*
+ * If this node is the last to join the diskset and clustering isn't
+ * running, then resync the mirrors in the diskset. We have to wait
+ * until all nodes are joined so that the status gets propagated to
+ * all of the members of the set.
+ * Ignore any error from the resync as the join function shouldn't fail
+ * because the mirror resync had a problem.
+ *
+ * Don't start resync if set is stale.
+ */
+ if ((rval == 0) && (sdssc_bind_library() != SDSSC_OKAY) &&
+ (stale_set != 1)) {
+ nd = sd->sd_nodelist;
+ while (nd) {
+ if (!(nd->nd_flags & MD_MN_NODE_OWN))
+ break;
+ nd = nd->nd_next;
+ }
+ /*
+ * nd set to NULL means that we have no nodes in the set that
+ * haven't joined. In this case we start the resync.
+ */
+ if (nd == NULL) {
+ (void) meta_mirror_resync_all(sp, 0, &xep);
+ mdclrerror(&xep);
+ }
+ }
+
+ /* Update ABR state for all soft partitions */
+ (void) meta_sp_update_abr(sp, &xep);
+ mdclrerror(&xep);
+
+ /*
+ * call metaflushsetnames to reset local cache for master and
+ * node information.
+ */
+ metaflushsetname(sp);
+
+ /* release signals back to what they were on entry */
+ if (procsigs(FALSE, &oldsigs, &xep) < 0)
+ mdclrerror(&xep);
+
+ /*
+ * If no error and stale_set is set, then set ep back
+ * to ep from snarf_set call and return -3. If another error
+ * occurred and rval is not 0, then that error would have
+ * caused the node to be withdrawn from the set and would
+ * have set ep to that error information.
+ */
+ if ((rval == 0) && (stale_set)) {
+ (void) mdstealerror(ep, &ep_snarf);
+ return (-3);
+ }
+
+ return (rval);
+}
+
+/*
+ * Entry point to withdraw a node from MultiNode diskset.
+ *
+ * Validate host in diskset.
+ * - Should be joined into diskset.
+ * Assume valid configuration is stored in the set/drive/node records
+ * in the local mddb since no node or drive can be added to the MNset
+ * unless all drives and nodes are available. Reconfig steps will
+ * resync all ALIVE nodes in case of panic in critical areas.
+ *
+ * Lock down the set.
+ * Verify that drives exist in configuration.
+ * Verify host is a member of this diskset.
+ * Verify host is an owner of the diskset (host is joined to diskset).
+ * Only allow withdrawal of master node if master node is the only joined
+ * in the diskset.
+ * Halt the diskset on this node.
+ * Reset Master on this node.
+ * Updated node flags that this node with withdrawn.
+ * Unlock the set.
+ *
+ * Return values:
+ * 0 - Node successfully withdrew from set.
+ * -1 - Withdrawal attempted but failed
+ * - any failure from libmeta calls
+ * - node not in the member list
+ * -2 - Withdrawal not attempted since
+ * - this set had no drives in set
+ * - this node not joined to set
+ * - set is not a multinode set
+ */
+extern int
+meta_set_withdraw(
+ mdsetname_t *sp,
+ md_error_t *ep
+)
+{
+ md_set_desc *sd;
+ md_drive_desc *dd = 0;
+ md_mnnode_desc *nd, my_nd;
+ int rval = 0;
+ md_setkey_t *cl_sk;
+ md_error_t xep = mdnullerror;
+ int set_halted = 0;
+ int suspendall_flag = 0;
+ int suspend1_flag = 0;
+ bool_t stale_bool = FALSE;
+ mddb_config_t c;
+ int node_id_list[1];
+ sigset_t oldsigs;
+ int send_reinit = 0;
+
+ if ((sd = metaget_setdesc(sp, ep)) == NULL) {
+ return (-1);
+ }
+
+ /* Must be a multinode diskset */
+ if (!MD_MNSET_DESC(sd)) {
+ (void) mderror(ep, MDE_NOT_MN, sp->setname);
+ return (-1);
+ }
+
+ /* Make sure we are blocking all signals */
+ if (procsigs(TRUE, &oldsigs, &xep) < 0)
+ mdclrerror(&xep);
+
+ /*
+ * Lock the set on current set members.
+ * For MN diskset lock_set and SUSPEND are used to protect against
+ * other meta* commands running on the other nodes.
+ */
+ nd = sd->sd_nodelist;
+ while (nd) {
+ if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+ nd = nd->nd_next;
+ continue;
+ }
+ if (clnt_lock_set(nd->nd_nodename, sp, ep)) {
+ rval = -1;
+ goto out;
+ }
+ nd = nd->nd_next;
+ }
+ /*
+ * Lock out other meta* commands by suspending
+ * class 1 messages across the diskset.
+ */
+ nd = sd->sd_nodelist;
+ while (nd) {
+ if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+ nd = nd->nd_next;
+ continue;
+ }
+ if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_SUSPEND,
+ sp, MD_MSG_CLASS1, MD_MSCF_NO_FLAGS, ep)) {
+ rval = -1;
+ goto out;
+ }
+ suspend1_flag = 1;
+ nd = nd->nd_next;
+ }
+
+ /* Get list of drives - needed in case of failure */
+ if ((dd = metaget_drivedesc(sp, (MD_BASICNAME_OK | PRINT_FAST),
+ ep)) == NULL) {
+ /* Error getting drives in list */
+ if (! mdisok(ep)) {
+ rval = -1;
+ goto out2;
+ }
+ /* no drives in list */
+ rval = -2;
+ goto out2;
+ }
+
+ /*
+ * Verify that this host is a member (in the host list) of the set.
+ */
+ nd = sd->sd_nodelist;
+ while (nd) {
+ if (strcmp(mynode(), nd->nd_nodename) == 0) {
+ break;
+ }
+ nd = nd->nd_next;
+ }
+ if (!nd) {
+ (void) mddserror(ep, MDE_DS_NODENOTINSET, sp->setno,
+ sd->sd_mn_mynode->nd_nodename, NULL,
+ sp->setname);
+ rval = -1;
+ goto out2;
+ }
+
+ /*
+ * Call metaget_setownership that calls each node in diskset and
+ * marks in set descriptor if node is an owner of the set or not.
+ * metaget_setownership checks to see if a node is an owner by
+ * checking to see if that node's kernel has the mddb loaded.
+ * If a node had panic'd during a reconfig or an
+ * add/delete/join/withdraw operation, the other nodes' node
+ * records may not reflect the current state of the diskset,
+ * so calling metaget_setownership is the safest thing to do.
+ */
+ if (metaget_setownership(sp, ep) == -1) {
+ rval = -1;
+ goto out2;
+ }
+
+ /*
+ * Verify that this node is joined
+ * to diskset (i.e. is an owner of the diskset).
+ */
+ if (!(sd->sd_mn_mynode->nd_flags & MD_MN_NODE_OWN)) {
+ rval = -2;
+ goto out2;
+ }
+
+ /*
+ * For a MN diskset, only withdraw master if it is
+ * the only joined node.
+ */
+ if (sd->sd_mn_master_nodeid == sd->sd_mn_mynode->nd_nodeid) {
+ nd = sd->sd_nodelist;
+ while (nd) {
+ /* Skip my node since checking for other owners */
+ if (nd->nd_nodeid == sd->sd_mn_master_nodeid) {
+ nd = nd->nd_next;
+ continue;
+ }
+ /* If another owner node if found, error */
+ if (nd->nd_flags & MD_MN_NODE_OWN) {
+ (void) mddserror(ep, MDE_DS_WITHDRAWMASTER,
+ sp->setno,
+ sd->sd_mn_mynode->nd_nodename, NULL,
+ sp->setname);
+ rval = -1;
+ goto out2;
+ }
+ nd = nd->nd_next;
+ }
+ }
+
+ /*
+ * Is current set STALE?
+ */
+ (void) memset(&c, 0, sizeof (c));
+ c.c_id = 0;
+ c.c_setno = sp->setno;
+ if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0) {
+ (void) mdstealerror(ep, &c.c_mde);
+ rval = -1;
+ goto out;
+ }
+ if (c.c_flags & MDDB_C_STALE) {
+ stale_bool = TRUE;
+ }
+
+ /*
+ * Notify rpc.mdcommd on all nodes of a nodelist change.
+ * Start by suspending rpc.mdcommd (which drains it of all messages),
+ * then change the nodelist followed by a reinit and resume.
+ */
+ nd = sd->sd_nodelist;
+ while (nd) {
+ if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+ nd = nd->nd_next;
+ continue;
+ }
+
+ if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_SUSPEND,
+ sp, MD_MSG_CLASS0, MD_MSCF_NO_FLAGS, ep)) {
+ rval = -1;
+ goto out;
+ }
+ suspendall_flag = 1;
+ nd = nd->nd_next;
+ }
+
+ /*
+ * Withdraw the set - halt set.
+ * This will fail if any I/O is occuring to any metadevice which
+ * includes a resync to a mirror metadevice.
+ */
+ set_halted = 1;
+ if (halt_set(sp, ep)) {
+ /* Was set actually halted? */
+ if (own_set(sp, NULL, TRUE, ep) == MD_SETOWNER_YES) {
+ set_halted = 0;
+ }
+ rval = -1;
+ goto out;
+ }
+
+ /* Change to nodelist so need to send reinit to rpc.mdcommd */
+ send_reinit = 1;
+
+ /* Reset master on withdrawn node */
+ if (clnt_mnsetmaster(sd->sd_mn_mynode->nd_nodename, sp, "",
+ MD_MN_INVALID_NID, ep)) {
+ rval = -1;
+ goto out;
+ }
+
+ /* Mark my node as withdrawn and send to other nodes */
+ nd = sd->sd_nodelist;
+ my_nd = *(sd->sd_mn_mynode); /* structure copy */
+ my_nd.nd_next = NULL;
+ while (nd) {
+ if (!(nd->nd_flags & MD_MN_NODE_OWN)) {
+ nd = nd->nd_next;
+ continue;
+ }
+ if (clnt_upd_nr_flags(nd->nd_nodename, sp, &my_nd,
+ MD_NR_WITHDRAW, NULL, ep)) {
+ rval = -1;
+ goto out;
+ }
+ nd = nd->nd_next;
+ }
+
+ /*
+ * If withdrawn node is a mirror owner, reset mirror owner
+ * to NULL. If an error occurs, print a warning and continue.
+ * Don't fail metaset because of mirror owner reset problem since
+ * next node to grab mirror will resolve this issue.
+ * Before next node grabs mirrors, metaset will show the withdrawn
+ * node as owner which is why an attempt to reset the mirror owner
+ * is made.
+ */
+ node_id_list[0] = sd->sd_mn_mynode->nd_nodeid; /* Setup my nodeid */
+ nd = sd->sd_nodelist;
+ while (nd) {
+ if (!(nd->nd_flags & MD_MN_NODE_OWN)) {
+ nd = nd->nd_next;
+ continue;
+ }
+ if (clnt_reset_mirror_owner(nd->nd_nodename, sp,
+ 1, &node_id_list[0], &xep) == 01) {
+ mde_perror(&xep, dgettext(TEXT_DOMAIN,
+ "Unable to reset mirror owner on node %s"),
+ nd->nd_nodename);
+ mdclrerror(&xep);
+ }
+ nd = nd->nd_next;
+ }
+
+out:
+ if (rval == -1) {
+ /* Rejoin node - Mark node as joined and send to other nodes */
+ nd = sd->sd_nodelist;
+ my_nd = *(sd->sd_mn_mynode); /* structure copy */
+ my_nd.nd_next = NULL;
+ while (nd) {
+ if (!(nd->nd_flags & MD_MN_NODE_OWN)) {
+ nd = nd->nd_next;
+ continue;
+ }
+ if (clnt_upd_nr_flags(nd->nd_nodename, sp, &my_nd,
+ MD_NR_JOIN, NULL, &xep)) {
+ mdclrerror(&xep);
+ }
+ nd = nd->nd_next;
+ }
+
+ /* Set master on withdrawn node */
+ if (clnt_mnsetmaster(sd->sd_mn_mynode->nd_nodename, sp,
+ sd->sd_mn_master_nodenm,
+ sd->sd_mn_master_nodeid, &xep)) {
+ mdclrerror(&xep);
+ }
+
+ /* Join set if halt_set had succeeded */
+ if (set_halted) {
+ if (setup_db_bydd(sp, dd, 0, &xep) == -1) {
+ mdclrerror(&xep);
+ }
+ /* If set previously stale - make it so at re-join */
+ if (snarf_set(sp, stale_bool, &xep) != 0) {
+ mdclrerror(&xep);
+ (void) halt_set(sp, &xep);
+ mdclrerror(&xep);
+ }
+ }
+ }
+
+ /*
+ * Notify rpc.mdcommd on all nodes of a nodelist change.
+ * Send reinit command to mdcommd which forces it to get
+ * fresh set description.
+ */
+ if (send_reinit) {
+ /* Send reinit */
+ nd = sd->sd_nodelist;
+ while (nd) {
+ if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+ nd = nd->nd_next;
+ continue;
+ }
+
+ /* Class is ignored for REINIT */
+ if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_REINIT,
+ sp, NULL, MD_MSCF_NO_FLAGS, &xep)) {
+ /*
+ * We are here because we failed to resume
+ * rpc.mdcommd. However we potentially have
+ * an error from the previous call.
+ * If the previous call did fail, we
+ * capture that error and generate a perror
+ * withthe string, "Unable to resume...".
+ * Setting rval to -1 ensures that in the
+ * next iteration of the loop, ep is not
+ * clobbered.
+ */
+ if (rval == 0)
+ (void) mdstealerror(ep, &xep);
+ else
+ mdclrerror(&xep);
+ rval = -1;
+ mde_perror(ep, dgettext(TEXT_DOMAIN,
+ "Unable to reinit rpc.mdcommd."));
+ }
+ nd = nd->nd_next;
+ }
+ }
+
+out2:
+ /*
+ * Unlock diskset by resuming messages across the diskset.
+ * Just resume all classes so that resume is the same whether
+ * just one class was locked or all classes were locked.
+ */
+ if ((suspend1_flag) || (suspendall_flag)) {
+ nd = sd->sd_nodelist;
+ while (nd) {
+ if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+ nd = nd->nd_next;
+ continue;
+ }
+ if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_RESUME,
+ sp, MD_MSG_CLASS0, MD_MSCF_NO_FLAGS, &xep)) {
+ /*
+ * We are here because we failed to resume
+ * rpc.mdcommd. However we potentially have
+ * an error from the previous call
+ * If the previous call did fail, we capture
+ * that error and generate a perror with
+ * the string, "Unable to resume...".
+ * Setting rval to -1 ensures that in the
+ * next iteration of the loop, ep is not
+ * clobbered.
+ */
+ if (rval == 0)
+ (void) mdstealerror(ep, &xep);
+ else
+ mdclrerror(&xep);
+ rval = -1;
+ mde_perror(ep, dgettext(TEXT_DOMAIN,
+ "Unable to resume rpc.mdcommd."));
+ }
+ nd = nd->nd_next;
+ }
+ meta_ping_mnset(sp->setno);
+ }
+
+ /*
+ * Unlock set. This flushes the caches on the servers.
+ */
+ cl_sk = cl_get_setkey(sp->setno, sp->setname);
+ nd = sd->sd_nodelist;
+ while (nd) {
+ if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+ nd = nd->nd_next;
+ continue;
+ }
+ if (clnt_unlock_set(nd->nd_nodename, cl_sk, &xep)) {
+ if (rval == 0)
+ (void) mdstealerror(ep, &xep);
+ else
+ mdclrerror(&xep);
+ rval = -1;
+ }
+ nd = nd->nd_next;
+ }
+
+ /*
+ * call metaflushsetnames to reset local cache for master and
+ * node information.
+ */
+ metaflushsetname(sp);
+
+ /* release signals back to what they were on entry */
+ if (procsigs(FALSE, &oldsigs, &xep) < 0)
+ mdclrerror(&xep);
+
+ return (rval);
+
+}
+
+/*
+ * Update nodelist with cluster member information.
+ * A node not in the member list will be marked
+ * as not ALIVE and not OWN.
+ * A node in the member list will be marked ALIVE, but
+ * the OWN bit will not be changed.
+ *
+ * If mynode isn't in the membership list, fail causing
+ * another reconfig cycle to be started since a non-member
+ * node shouldn't be taking part in the reconfig cycle.
+ *
+ * Return values:
+ * 0 - No problem.
+ * 1 - Any failure including RPC failure to my node.
+ */
+int
+meta_reconfig_update_nodelist(
+ mdsetname_t *sp,
+ mndiskset_membershiplist_t *nl,
+ md_set_desc *sd,
+ md_error_t *ep
+)
+{
+ mndiskset_membershiplist_t *nl2;
+ md_mnnode_desc *nd;
+ md_error_t xep = mdnullerror;
+ int rval = 0;
+
+ /*
+ * Walk through nodelist, checking to see if each
+ * node is in the member list.
+ * If node is not a member, reset ALIVE and OWN node flag.
+ * If node is a member, set ALIVE.
+ * If mynode's OWN flag gets reset, then halt the diskset on this node.
+ */
+ nd = sd->sd_nodelist;
+ while (nd) {
+ nl2 = nl;
+ while (nl2) {
+ /* If node is in member list, set ALIVE */
+ if (nl2->msl_node_id == nd->nd_nodeid) {
+ nd->nd_flags |= MD_MN_NODE_ALIVE;
+ break;
+ } else {
+ nl2 = nl2->next;
+ }
+ /* node is not in member list, mark !ALIVE and !OWN */
+ if (nl2 == NULL) {
+ /* If node is mynode, then halt set if needed */
+ if (strcmp(mynode(), nd->nd_nodename) == 0) {
+ /*
+ * This shouldn't happen, but just
+ * in case... Any node not in the
+ * membership list should be dead and
+ * not running reconfig step1.
+ */
+ if (nd->nd_flags & MD_MN_NODE_OWN) {
+ if (halt_set(sp, &xep)) {
+ mde_perror(&xep, "");
+ mdclrerror(&xep);
+ }
+ }
+ /*
+ * Return failure since this node
+ * (mynode) is not in the membership
+ * list, but process the rest of the
+ * nodelist first so that rpc.metad
+ * can be updated with the latest
+ * membership information.
+ */
+ (void) mddserror(ep,
+ MDE_DS_NOTINMEMBERLIST,
+ sp->setno, nd->nd_nodename, NULL,
+ sp->setname);
+ rval = 1;
+ }
+ nd->nd_flags &= ~MD_MN_NODE_ALIVE;
+ nd->nd_flags &= ~MD_MN_NODE_OWN;
+ }
+ }
+ nd = nd->nd_next;
+ }
+
+ /* Send this information to rpc.metad */
+ if (clnt_upd_nr_flags(mynode(), sp, sd->sd_nodelist,
+ MD_NR_SET, MNSET_IN_RECONFIG, &xep)) {
+ /* Return failure if can't send node flags to rpc.metad */
+ if (rval == 0) {
+ (void) mdstealerror(ep, &xep);
+ rval = 1;
+ }
+ }
+ return (rval);
+}
+
+/*
+ * Choose master determines the master for a diskset.
+ * Each node determines the master on its own and
+ * adds this information to its local rpc.metad nodelist
+ * and also sends it to the kernel.
+ *
+ * Nodelist in set descriptor (sd) is sorted in
+ * monotonically increasing sequence of nodeid.
+ *
+ * Return values:
+ * 0 - No problem.
+ * 205 - There was an RPC problem to another node.
+ * -1 - There was an error. This could be an RPC error to my node.
+ * This is a catastrophic failure causing node to panic.
+ */
+int
+meta_reconfig_choose_master_for_set(
+ mdsetname_t *sp,
+ md_set_desc *sd,
+ md_error_t *ep
+)
+{
+ int is_owner;
+ md_mnset_record *mnsr = NULL;
+ int lowest_alive_nodeid = 0;
+ uint_t master_nodeid;
+ md_mnnode_desc *nd, *nd2;
+ md_mnnode_record *nr;
+ md_drive_desc *dd;
+ md_setkey_t *cl_sk;
+ int rval = 0;
+ md_error_t xep = mdnullerror;
+ mddb_setflags_config_t sf;
+
+ /*
+ * Is current node joined to diskset?
+ * Don't trust flags, really check to see if mddb is snarfed.
+ */
+ if (s_ownset(sp->setno, ep) == MD_SETOWNER_YES) {
+ /*
+ * If a node is joined to the diskset, this node checks
+ * to see if the current master of the diskset is valid and
+ * is still in the membership list (ALIVE) and is
+ * still joined (OWN). Need to verify if master is
+ * really joined - don't trust the flags. (Can trust
+ * ALIVE since set during earlier part of reconfig cycle.)
+ * If the current master is valid, still in the membership
+ * list and joined, then master is not changed on this node.
+ * Just return.
+ *
+ * Verify that nodeid is valid before accessing masternode.
+ */
+ if ((sd->sd_mn_master_nodeid != MD_MN_INVALID_NID) &&
+ (sd->sd_mn_masternode->nd_flags & MD_MN_NODE_ALIVE)) {
+ if (clnt_ownset(sd->sd_mn_master_nodenm, sp,
+ &is_owner, ep) == -1) {
+ /* If RPC failure to another node return 205 */
+ if ((mdanyrpcerror(ep)) &&
+ (sd->sd_mn_mynode->nd_nodeid !=
+ sd->sd_mn_master_nodeid)) {
+ return (205);
+ } else {
+ /* Any other failure */
+ return (-1);
+ }
+ } else {
+ if (is_owner == TRUE) {
+
+ meta_mc_log(MC_LOG5, dgettext(
+ TEXT_DOMAIN, "Set %s previous "
+ "master chosen %s (%d): %s"),
+ sp->setname,
+ sd->sd_mn_master_nodenm,
+ sd->sd_mn_master_nodeid,
+ meta_print_hrtime(gethrtime() -
+ start_time));
+
+ /* Previous master is ok - done */
+ return (0);
+ }
+ }
+ }
+
+ /*
+ * If current master is no longer in the membership list or
+ * is no longer joined, then this node uses the following
+ * algorithm:
+ * - node calls RPC routine clnt_ownset to get latest
+ * information on which nodes are owners of diskset.
+ * clnt_ownset checks on each node to see if its kernel
+ * has that diskset snarfed.
+ */
+ nd = sd->sd_nodelist;
+ while (nd) {
+ /* Don't consider node that isn't in member list */
+ if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+ nd = nd->nd_next;
+ continue;
+ }
+
+ if (clnt_ownset(nd->nd_nodename, sp,
+ &is_owner, ep) == -1) {
+ /* If RPC failure to another node return 205 */
+ if ((mdanyrpcerror(ep)) &&
+ (sd->sd_mn_mynode->nd_nodeid !=
+ nd->nd_nodeid)) {
+ return (205);
+ } else {
+ /* Any other failure */
+ return (-1);
+ }
+ }
+
+ /*
+ * Set owner flag for each node based on whether
+ * that node really has a diskset mddb snarfed in
+ * or not.
+ */
+ if (is_owner == TRUE)
+ nd->nd_flags |= MD_MN_NODE_OWN;
+ else
+ nd->nd_flags &= ~MD_MN_NODE_OWN;
+
+ nd = nd->nd_next;
+ }
+
+ /*
+ * - node walks through nodelist looking for nodes that are
+ * owners of the diskset that are in the membership list.
+ * - for each owner, node calls RPC routine clnt_getset to
+ * see if that node has its node record set to OK.
+ * - If so, master is chosen to be this owner node.
+ */
+ nd = sd->sd_nodelist;
+ while (nd) {
+ /* Don't consider node that isn't in member list */
+ if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+ nd = nd->nd_next;
+ continue;
+ }
+
+ /* Don't consider a node that isn't an owner */
+ if (!(nd->nd_flags & MD_MN_NODE_OWN)) {
+ nd = nd->nd_next;
+ continue;
+ }
+
+ /* Does node has its own node record set to OK? */
+ if (clnt_mngetset(nd->nd_nodename, sp->setname,
+ MD_SET_BAD, &mnsr, ep) == -1) {
+ /* If RPC failure to another node return 205 */
+ if ((mdanyrpcerror(ep)) &&
+ (sd->sd_mn_mynode->nd_nodeid !=
+ nd->nd_nodeid)) {
+ return (205);
+ } else {
+ /* Any other failure */
+ return (-1);
+ }
+ }
+ nr = mnsr->sr_nodechain;
+ while (nr) {
+ if (nd->nd_nodeid == nr->nr_nodeid) {
+ if (nr->nr_flags & MD_MN_NODE_OK) {
+ /* Found a master */
+ free_sr(
+ (md_set_record *)mnsr);
+ goto found_master;
+ }
+ }
+ nr = nr->nr_next;
+ }
+ free_sr((md_set_record *)mnsr);
+ nd = nd->nd_next;
+ }
+
+ /*
+ * - If no owner node has its own node record on its own node
+ * set to OK, then this node checks all of the non-owner
+ * nodes that are in the membership list.
+ * - for each non-owner, node calls RPC routine clnt_getset to
+ * see if that node has its node record set to OK.
+ * - If set doesn't exist, don't choose node for master.
+ * - If so, master is chosen to be this non-owner node.
+ *
+ */
+ nd = sd->sd_nodelist;
+ while (nd) {
+ /* Don't consider node that isn't in member list */
+ if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+ nd = nd->nd_next;
+ continue;
+ }
+
+ /* Only checking non-owner nodes this time around */
+ if (nd->nd_flags & MD_MN_NODE_OWN) {
+ nd = nd->nd_next;
+ continue;
+ }
+
+ /* Does node has its own node record set to OK? */
+ if (clnt_mngetset(nd->nd_nodename, sp->setname,
+ MD_SET_BAD, &mnsr, ep) == -1) {
+ /*
+ * If set doesn't exist on non-owner node,
+ * don't consider this node for master.
+ */
+ if (mdiserror(ep, MDE_NO_SET)) {
+ nd = nd->nd_next;
+ continue;
+ } else if ((mdanyrpcerror(ep)) &&
+ (sd->sd_mn_mynode->nd_nodeid !=
+ nd->nd_nodeid)) {
+ /* RPC failure to another node */
+ return (205);
+ } else {
+ /* Any other failure */
+ return (-1);
+ }
+ }
+ nr = mnsr->sr_nodechain;
+ while (nr) {
+ if (nd->nd_nodeid == nr->nr_nodeid) {
+ if (nr->nr_flags & MD_MN_NODE_OK) {
+ /* Found a master */
+ free_sr(
+ (md_set_record *)mnsr);
+ goto found_master;
+ }
+ }
+ nr = nr->nr_next;
+ }
+ free_sr((md_set_record *)mnsr);
+ nd = nd->nd_next;
+ }
+
+ /*
+ * - If no node can be found that has its own node record on
+ * its node to be set to OK, then all alive nodes
+ * were in the process of being added to or deleted
+ * from set. Each alive node will remove all
+ * information pertaining to this set from its node.
+ *
+ * If all nodes in set are ALIVE, then call sdssc end routines
+ * since set was truly being initially created or destroyed.
+ */
+ goto delete_set;
+ } else {
+
+ /*
+ * If node is not joined to diskset, then this
+ * node uses the following algorithm:
+ * - If unjoined node doesn't have a node record for itself,
+ * just delete the diskset since diskset was in the
+ * process of being created.
+ * - node needs to find master of diskset before
+ * reconfig cycle, if a master existed.
+ * - node calls RPC routine clnt_ownset to get latest
+ * information on which nodes are owners of diskset.
+ * clnt_ownset checks on each node to see if its
+ * kernel has that diskset snarfed.
+ */
+
+ /*
+ * Is my node in the set description?
+ * If not, delete the set from this node.
+ * sr2setdesc sets sd_mn_mynode pointer to the node
+ * descriptor for this node if there was a node
+ * record for this node.
+ *
+ */
+ if (sd->sd_mn_mynode == NULL) {
+ goto delete_set;
+ }
+
+ nd = sd->sd_nodelist;
+ while (nd) {
+ /* Don't consider node that isn't in member list */
+ if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+ nd = nd->nd_next;
+ continue;
+ }
+
+ if (clnt_ownset(nd->nd_nodename, sp,
+ &is_owner, ep) == -1) {
+ /* If RPC failure to another node return 205 */
+ if ((mdanyrpcerror(ep)) &&
+ (sd->sd_mn_mynode->nd_nodeid !=
+ nd->nd_nodeid)) {
+ return (205);
+ } else {
+ /* Any other failure */
+ return (-1);
+ }
+ }
+
+ /*
+ * Set owner flag for each node based on whether
+ * that node really has a diskset mddb snarfed in
+ * or not.
+ */
+ if (is_owner == TRUE)
+ nd->nd_flags |= MD_MN_NODE_OWN;
+ else
+ nd->nd_flags &= ~MD_MN_NODE_OWN;
+
+ nd = nd->nd_next;
+ }
+
+ /*
+ * - node walks through nodelist looking for nodes that
+ * are owners of the diskset that are in
+ * the membership list.
+ * - for each owner, node calls RPC routine clnt_getset to
+ * see if that node has a master set and to get the
+ * diskset description.
+ * - If the owner node has a set description that doesn't
+ * include the non-joined node in the nodelist, this node
+ * removes its set description of that diskset
+ * (i.e. removes the set from its local mddbs). This is
+ * handling the case of when a node was removed from a
+ * diskset while it was not in the cluster membership
+ * list.
+ * - If that node has a master set and the master is in the
+ * membership list and is an owner, then either this was
+ * the master from before the reconfig cycle or this
+ * node has already chosen a new master - either way,
+ * the master value is valid as long as it is in the
+ * membership list and is an owner
+ * - master is chosen to be owner node's master
+ */
+ nd = sd->sd_nodelist;
+ while (nd) {
+ /* Don't consider node that isn't in member list */
+ if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+ nd = nd->nd_next;
+ continue;
+ }
+
+ /* Don't consider a node that isn't an owner */
+ if (!(nd->nd_flags & MD_MN_NODE_OWN)) {
+ nd = nd->nd_next;
+ continue;
+ }
+
+ /* Get owner node's set record */
+ if (clnt_mngetset(nd->nd_nodename, sp->setname,
+ MD_SET_BAD, &mnsr, ep) == -1) {
+ /* If RPC failure to another node return 205 */
+ if ((mdanyrpcerror(ep)) &&
+ (sd->sd_mn_mynode->nd_nodeid !=
+ nd->nd_nodeid)) {
+ return (205);
+ } else {
+ /* Any other failure */
+ return (-1);
+ }
+ }
+
+ /* Is this node in the owner node's set record */
+ nr = mnsr->sr_nodechain;
+ while (nr) {
+ if (sd->sd_mn_mynode->nd_nodeid ==
+ nr->nr_nodeid) {
+ break;
+ }
+ nr = nr->nr_next;
+ }
+ if (nr == NULL) {
+ /* my node not found - delete set */
+ free_sr((md_set_record *)mnsr);
+ goto delete_set;
+ }
+
+ /* Is owner's node's master valid? */
+ master_nodeid = mnsr->sr_master_nodeid;
+ free_sr((md_set_record *)mnsr);
+ if (master_nodeid == MD_MN_INVALID_NID) {
+ nd = nd->nd_next;
+ continue;
+ }
+
+ nd2 = sd->sd_nodelist;
+ while (nd2) {
+ if ((nd2->nd_nodeid == master_nodeid) &&
+ (nd2->nd_flags & MD_MN_NODE_ALIVE) &&
+ (nd2->nd_flags & MD_MN_NODE_OWN)) {
+ nd = nd2;
+ goto found_master;
+ }
+ nd2 = nd2->nd_next;
+ }
+ nd = nd->nd_next;
+ }
+
+ /*
+ * - If no owner node has a valid master, then follow
+ * algorithm of when a node is joined to the diskset.
+ * - node walks through nodelist looking for nodes that are
+ * owners of the diskset that are in the membership list.
+ * - for each owner, node calls RPC routine clnt_getset to
+ * see if that node has its node record set to OK.
+ * - If so, master is chosen to be this owner node.
+ */
+ nd = sd->sd_nodelist;
+ while (nd) {
+ /* Don't consider node that isn't in member list */
+ if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+ nd = nd->nd_next;
+ continue;
+ }
+
+ /* Don't consider a node that isn't an owner */
+ if (!(nd->nd_flags & MD_MN_NODE_OWN)) {
+ nd = nd->nd_next;
+ continue;
+ }
+
+ /* Does node has its own node record set to OK? */
+ if (clnt_mngetset(nd->nd_nodename, sp->setname,
+ MD_SET_BAD, &mnsr, ep) == -1) {
+ /* If RPC failure to another node return 205 */
+ if ((mdanyrpcerror(ep)) &&
+ (sd->sd_mn_mynode->nd_nodeid !=
+ nd->nd_nodeid)) {
+ return (205);
+ } else {
+ /* Any other failure */
+ return (-1);
+ }
+ }
+ nr = mnsr->sr_nodechain;
+ while (nr) {
+ if (nd->nd_nodeid == nr->nr_nodeid) {
+ if (nr->nr_flags & MD_MN_NODE_OK) {
+ /* Found a master */
+ free_sr(
+ (md_set_record *)mnsr);
+ goto found_master;
+ }
+ }
+ nr = nr->nr_next;
+ }
+ free_sr((md_set_record *)mnsr);
+ nd = nd->nd_next;
+ }
+
+ /*
+ * - If no owner node has its own node record on its own node
+ * set to OK, then this node checks all of the non-owner
+ * nodes that are in the membership list.
+ * - for each non-owner, node calls RPC routine clnt_getset to
+ * see if that node has its node record set to OK.
+ * - If set doesn't exist, don't choose node for master.
+ * - If this node doesn't exist in the nodelist on any of the
+ * non-owner nodes, this node removes its set description
+ * of that diskset (i.e. removes the set from its local
+ * mddbs). This is handling the case of when a node was
+ * removed from a diskset while it was not in the
+ * cluster membership list.
+ * - If non-owner node has its node record set to OK and if
+ * this node hasn't removed this diskset (step directly
+ * before this one), then the master is chosen to be this
+ * non-owner node.
+ */
+ nd = sd->sd_nodelist;
+ while (nd) {
+ /* Don't consider node that isn't in member list */
+ if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+ nd->nd_flags |= MD_MN_NODE_DEL;
+ nd = nd->nd_next;
+ continue;
+ }
+
+ /* Don't consider owner nodes since none are OK */
+ if (nd->nd_flags & MD_MN_NODE_OWN) {
+ nd->nd_flags |= MD_MN_NODE_DEL;
+ nd = nd->nd_next;
+ continue;
+ }
+
+ /*
+ * Don't need to get nodelist from my node since
+ * this is where sd_nodelist was obtained.
+ */
+ if (sd->sd_mn_mynode->nd_nodeid == nd->nd_nodeid) {
+ nd = nd->nd_next;
+ continue;
+ }
+
+ /*
+ * If node has already been decided against for
+ * master, then skip it.
+ */
+ if (nd->nd_flags & MD_MN_NODE_DEL) {
+ nd = nd->nd_next;
+ continue;
+ }
+
+ /*
+ * Does node in my nodelist have its own node
+ * record marked OK on its node? And does node
+ * in my nodelist exist on all other nodes?
+ * Don't want to choose a node for master unless
+ * that node is marked OK on its own node and that
+ * node exists on all other alive nodes.
+ *
+ * This is guarding against the case when several
+ * nodes are down and one of the downed nodes is
+ * deleted from the diskset. When the down nodes
+ * are rebooted into the cluster, you don't want
+ * any node to pick the deleted node as the master.
+ */
+ if (clnt_mngetset(nd->nd_nodename, sp->setname,
+ MD_SET_BAD, &mnsr, ep) == -1) {
+ /*
+ * If set doesn't exist on non-owner node,
+ * don't consider this node for master.
+ */
+ if (mdiserror(ep, MDE_NO_SET)) {
+ nd->nd_flags |= MD_MN_NODE_DEL;
+ nd = nd->nd_next;
+ continue;
+ } else if (mdanyrpcerror(ep)) {
+ /* RPC failure to another node */
+ return (205);
+ } else {
+ /* Any other failure */
+ return (-1);
+ }
+ }
+ /*
+ * Is my node in the nodelist gotten from the other
+ * node? If not, then remove the set from my node
+ * since set was deleted from my node while my node
+ * was out of the cluster.
+ */
+ nr = mnsr->sr_nodechain;
+ while (nr) {
+ if (sd->sd_mn_mynode->nd_nodeid ==
+ nr->nr_nodeid) {
+ break;
+ }
+ nr = nr->nr_next;
+ }
+ if (nr == NULL) {
+ /* my node not found - delete set */
+ free_sr((md_set_record *)mnsr);
+ goto delete_set;
+ }
+
+ /* Is node being checked marked OK on its own node? */
+ nr = mnsr->sr_nodechain;
+ while (nr) {
+ if (nd->nd_nodeid == nr->nr_nodeid) {
+ if (!(nr->nr_flags & MD_MN_NODE_OK)) {
+ nd->nd_flags |= MD_MN_NODE_DEL;
+ }
+ break;
+ }
+ nr = nr->nr_next;
+ }
+ /*
+ * If node being checked doesn't exist on its
+ * own node - don't choose it as master.
+ */
+ if (nr == NULL) {
+ nd->nd_flags |= MD_MN_NODE_DEL;
+ }
+
+ /*
+ * Check every node in my node's nodelist against
+ * the nodelist gotten from the other node.
+ * If a node in my node's nodelist is not found in the
+ * other node's nodelist, then set the DEL flag.
+ */
+ nd2 = sd->sd_nodelist;
+ while (nd2) {
+ nr = mnsr->sr_nodechain;
+ while (nr) {
+ if (nd2->nd_nodeid == nr->nr_nodeid) {
+ break;
+ }
+ nr = nr->nr_next;
+ }
+ /* nd2 not found in other node's nodelist */
+ if (nr == NULL) {
+ nd2->nd_flags |= MD_MN_NODE_DEL;
+ }
+ nd2 = nd2->nd_next;
+ }
+
+ free_sr((md_set_record *)mnsr);
+ nd = nd->nd_next;
+ }
+
+ /*
+ * Rescan list look for node that has not been marked DEL.
+ * First node found is the master.
+ */
+ nd = sd->sd_nodelist;
+ while (nd) {
+ if (!(nd->nd_flags & MD_MN_NODE_DEL)) {
+ break;
+ }
+ nd = nd->nd_next;
+ continue;
+ }
+ if (nd) {
+ /* Found a master */
+ goto found_master;
+ }
+
+ /*
+ * - If no node can be found that has its own node record on
+ * its node to be set to OK, then all alive nodes
+ * were in the process of being added to or deleted
+ * from set. Each alive node will remove all
+ * information pertaining to this set from its node.
+ *
+ * If all nodes in set are ALIVE, then call sdssc end routines
+ * since set was truly being initially created or destroyed.
+ */
+ goto delete_set;
+ }
+
+found_master:
+ meta_mc_log(MC_LOG5, dgettext(TEXT_DOMAIN,
+ "Set %s master chosen %s (%d): %s"),
+ sp->setname, nd->nd_nodename, nd->nd_nodeid,
+ meta_print_hrtime(gethrtime() - start_time));
+
+ if (clnt_lock_set(mynode(), sp, ep) == -1) {
+ return (-1);
+ }
+
+ cl_sk = cl_get_setkey(sp->setno, sp->setname);
+
+ if (clnt_mnsetmaster(mynode(), sp,
+ nd->nd_nodename, nd->nd_nodeid, ep)) {
+ rval = -1;
+ } else if (sd->sd_mn_mynode->nd_nodeid == nd->nd_nodeid) {
+ /* If this node is new master, set flag in this node's kernel */
+ (void) memset(&sf, 0, sizeof (sf));
+ sf.sf_setno = sp->setno;
+ sf.sf_setflags = MD_SET_MN_NEWMAS_RC;
+ /* Use magic to help protect ioctl against attack. */
+ sf.sf_magic = MDDB_SETFLAGS_MAGIC;
+ sf.sf_flags = MDDB_NM_SET;
+
+ meta_mc_log(MC_LOG5, dgettext(TEXT_DOMAIN,
+ "Setting new master flag for set %s: %s"),
+ sp->setname, meta_print_hrtime(gethrtime() - start_time));
+
+ /*
+ * Fail reconfig cycle if ioctl fails since it is critical
+ * to set new master flag.
+ */
+ if (metaioctl(MD_MN_SET_SETFLAGS, &sf, &sf.sf_mde,
+ NULL) != NULL) {
+ (void) mdstealerror(ep, &sf.sf_mde);
+ rval = -1;
+ }
+ }
+
+ if (clnt_unlock_set(mynode(), cl_sk, &xep) == -1) {
+ if (rval == 0) {
+ (void) mdstealerror(ep, &xep);
+ rval = -1;
+ }
+ }
+
+ cl_set_setkey(NULL);
+
+ metaflushsetname(sp);
+
+ return (rval);
+
+delete_set:
+ meta_mc_log(MC_LOG5, dgettext(TEXT_DOMAIN,
+ "Master not chosen, deleting set %s: %s"),
+ sp->setname, meta_print_hrtime(gethrtime() - start_time));
+
+ /*
+ * Remove all set information from this node:
+ * - node records for this set
+ * - drive records for this set
+ * - set record for this set
+ * (Only do this on this node since each node
+ * will do it for its own local mddb.)
+ *
+ * If all nodes in set are ALIVE, then
+ * the lowest numbered ALIVE nodeid in set
+ * (irregardless of whether an owner node or not) will
+ * call the DCS service to cleanup for create/delete of set.
+ * sdssc_create_end(cleanup) if set was being created or
+ * sdssc_delete_end(cleanup) if set was being deleted.
+ * A node record with flag ADD denotes a set being
+ * created. A node record with flag DEL denotes a
+ * set being deleted.
+ */
+ nd = sd->sd_nodelist;
+ while (nd) {
+ /* Found a node that isn't alive */
+ if (!(nd->nd_flags & MD_MN_NODE_ALIVE))
+ break;
+
+ /* Is my node the lowest numbered ALIVE node? */
+ if (nd->nd_nodeid < sd->sd_mn_mynode->nd_nodeid) {
+ break;
+ }
+ nd = nd->nd_next;
+ }
+ if (nd == NULL) {
+ /* All nodes ALIVE and this is the lowest nodeid */
+ lowest_alive_nodeid = 1;
+ }
+
+ if (clnt_lock_set(mynode(), sp, ep) == -1) {
+ return (-1);
+ }
+
+
+ /*
+ * If this node had been joined, withdraw and reset master.
+ *
+ * This could happen if a node was being added to or removed
+ * from a diskset and the node doing the add/delete operation and
+ * all other nodes in the diskset have left the cluster.
+ */
+ if (sd->sd_mn_mynode) {
+ nd = sd->sd_mn_mynode;
+ if (nd->nd_flags & MD_MN_NODE_OWN) {
+ if (clnt_withdrawset(mynode(), sp, ep)) {
+ rval = -1;
+ goto out;
+ }
+ if (clnt_mnsetmaster(mynode(), sp, "",
+ MD_MN_INVALID_NID, ep)) {
+ rval = -1;
+ goto out;
+ }
+ }
+ }
+
+ /*
+ * Remove side records for this node (side) from local mddb
+ * (clnt_deldrvs does this) if there are drives in the set.
+ *
+ * Don't need to mark this node as DEL since already marked as
+ * ADD or DEL (or this node would have been chosen as master).
+ * Don't need to mark other node records, drive records or
+ * set records as DEL. If a panic occurs during clnt_delset,
+ * these records will be deleted the next time this node
+ * becomes a member and goes through the reconfig cycle.
+ */
+ /* Get the drive descriptors for this set */
+ if ((dd = metaget_drivedesc(sp, (MD_BASICNAME_OK | PRINT_FAST),
+ ep)) == NULL) {
+ if (! mdisok(ep)) {
+ /*
+ * Ignore and clear out any failures from
+ * metaget_drivedesc since a panic could have
+ * occurred when a node was partially added to a set.
+ */
+ mdclrerror(ep);
+ }
+ } else {
+ if (clnt_deldrvs(mynode(), sp, dd, ep)) {
+ rval = -1;
+ goto out;
+ }
+ }
+
+ /*
+ * Now, delete the set - this removes the node, drive
+ * and set records from the local mddb.
+ */
+ if (clnt_delset(mynode(), sp, ep)) {
+ rval = -1;
+ goto out;
+ }
+
+out:
+ cl_sk = cl_get_setkey(sp->setno, sp->setname);
+
+ /*
+ * Ignore errors from unlock of set since set is no longer
+ * known (if clnt_delset worked).
+ */
+ if (clnt_unlock_set(mynode(), cl_sk, &xep) == -1) {
+ mdclrerror(&xep);
+ }
+
+ cl_set_setkey(NULL);
+
+ metaflushsetname(sp);
+
+ /*
+ * If this node is the lowest numbered nodeid then
+ * call sdssc_create/delete_end depending on whether
+ * this node is marked as ADD or DEL in the node record.
+ */
+ if (lowest_alive_nodeid) {
+ if (nd->nd_flags & MD_MN_NODE_ADD)
+ sdssc_create_end(sp->setname, SDSSC_CLEANUP);
+ else if (nd->nd_flags & MD_MN_NODE_DEL)
+ sdssc_delete_end(sp->setname, SDSSC_CLEANUP);
+ }
+
+ /* Finished with this set -- return */
+ return (rval);
+}
+
+/*
+ * Reconfig step to choose a new master for all MN disksets.
+ * Return values:
+ * 0 - Everything is great.
+ * 1 - This node failed to reconfig.
+ * 205 - Cause another reconfig due to a nodelist problem
+ * or RPC failure to another node
+ */
+int
+meta_reconfig_choose_master(
+ md_error_t *ep
+)
+{
+ set_t max_sets, setno;
+ int nodecnt;
+ mndiskset_membershiplist_t *nl;
+ md_set_desc *sd;
+ mdsetname_t *sp;
+ int rval = 0;
+ mddb_setflags_config_t sf;
+ int start_node_delayed = 0;
+
+ if ((max_sets = get_max_sets(ep)) == 0) {
+ mde_perror(ep, dgettext(TEXT_DOMAIN,
+ "Unable to get number of sets"));
+ return (1);
+ }
+
+ /*
+ * Get membershiplist from API routine. If there's
+ * an error, return a 205 to cause another reconfig.
+ */
+ if (meta_read_nodelist(&nodecnt, &nl, ep) == -1) {
+ mde_perror(ep, "");
+ return (205);
+ }
+
+ for (setno = 1; setno < max_sets; setno++) {
+ if ((sp = metasetnosetname(setno, ep)) == NULL) {
+ if (mdiserror(ep, MDE_NO_SET)) {
+ /* No set for this setno - continue */
+ mdclrerror(ep);
+ continue;
+ } else {
+ /*
+ * If encountered an RPC error from my node,
+ * then immediately fail.
+ */
+ if (mdanyrpcerror(ep)) {
+ mde_perror(ep, "");
+ return (1);
+ }
+ /* Can't get set information */
+ mde_perror(ep, dgettext(TEXT_DOMAIN,
+ "Unable to get information for "
+ "set number %d"), setno);
+ mdclrerror(ep);
+ continue;
+ }
+ }
+
+ /* If setname is there, set desc should exist. */
+ if ((sd = metaget_setdesc(sp, ep)) == NULL) {
+ /*
+ * If encountered an RPC error from my node,
+ * then immediately fail.
+ */
+ if (mdanyrpcerror(ep)) {
+ mde_perror(ep, "");
+ return (1);
+ }
+ mde_perror(ep, dgettext(TEXT_DOMAIN,
+ "Unable to get set %s desc information"),
+ sp->setname);
+ mdclrerror(ep);
+ continue;
+ }
+
+ /* Only reconfig MN disksets */
+ if (!MD_MNSET_DESC(sd)) {
+ continue;
+ }
+
+ meta_mc_log(MC_LOG5, dgettext(TEXT_DOMAIN,
+ "Begin choose master for set %s: %s"),
+ sp->setname, meta_print_hrtime(gethrtime() - start_time));
+
+ /* Update nodelist with member information. */
+ if (meta_reconfig_update_nodelist(sp, nl, sd, ep)) {
+ /*
+ * If encountered an RPC error from my node,
+ * then immediately fail.
+ */
+ if (mdanyrpcerror(ep)) {
+ mde_perror(ep, "");
+ return (1);
+ }
+ mde_perror(ep, "");
+ mdclrerror(ep);
+ continue;
+ }
+
+ /*
+ * If all nodes in a cluster are starting, then
+ * all nodes will attempt to contact all other nodes
+ * to determine a master node. This can lead to a
+ * problem where node 1 is trying to contact the rpc.metad
+ * node 2 and node 2 is trying to contact the rpc.metad
+ * on node 1 -- and this causes the rpc call to fail
+ * on both nodes and causes a new reconfig cycle.
+ *
+ * In order to break this problem, a newly starting node
+ * will delay a small amount of time (nodeid mod 4 seconds)
+ * and will then run the code to choose a master for the
+ * first set. Delay will only be done once regardless of the
+ * number of sets.
+ */
+ if (start_node_delayed == 0) {
+ (void) memset(&sf, 0, sizeof (sf));
+ sf.sf_setno = sp->setno;
+ sf.sf_flags = MDDB_NM_GET;
+ /* Use magic to help protect ioctl against attack. */
+ sf.sf_magic = MDDB_SETFLAGS_MAGIC;
+ if ((metaioctl(MD_MN_GET_SETFLAGS, &sf,
+ &sf.sf_mde, NULL) == 0) &&
+ ((sf.sf_setflags & MD_SET_MN_START_RC) ==
+ MD_SET_MN_START_RC)) {
+ (void) sleep(sd->sd_mn_mynode->nd_nodeid % 4);
+ }
+ start_node_delayed = 1;
+ }
+
+ /* Choose master for this set */
+ rval = meta_reconfig_choose_master_for_set(sp, sd, ep);
+ if (rval == -1) {
+ mde_perror(ep, "");
+ return (1);
+ } else if (rval == 205) {
+ mde_perror(ep, "");
+ return (205);
+ }
+
+ /* Send new nodelist to rpc.mdcommd */
+ (void) mdmn_reinit_set(sp->setno);
+
+ meta_mc_log(MC_LOG5, dgettext(TEXT_DOMAIN,
+ "Choose master for set %s completed: %s"),
+ sp->setname, meta_print_hrtime(gethrtime() - start_time));
+ }
+
+ /*
+ * Each node turns on I/Os for all MN disksets.
+ * This is to recover from the situation where the master died
+ * during a previous reconfig cycle when I/Os were suspended
+ * for a MN diskset.
+ * If a failure occurs return a 1 which will force this node to
+ * panic. Cannot leave node in the situation where I/Os are
+ * not resumed.
+ */
+ setno = 0; /* 0 means all MN sets */
+ if (metaioctl(MD_MN_RESUME_SET, &setno, ep, NULL)) {
+ mde_perror(ep, "");
+ return (1);
+ }
+
+ /* Free the nodelist */
+ if (nodecnt)
+ meta_free_nodelist(nl);
+
+ return (0);
+}
+
+/*
+ * meta_mnsync_user_records will synchronize the diskset user records across
+ * all nodes in the diskset. The diskset user records are stored in
+ * each node's local set mddb.
+ *
+ * This needs to be done even if there is no master change during the
+ * reconfig cycle since this routine should clean up any mess left by
+ * the untimely termination of a metaset or metadb command (due to a
+ * node panic or to user intervention).
+ *
+ * Caller is the Master node.
+ *
+ * Returns 0 - Success
+ * 205 - Failure during RPC to another node
+ * -1 - Any other failure and ep is filled in.
+ */
+int
+meta_mnsync_user_records(
+ mdsetname_t *sp,
+ md_error_t *ep
+)
+{
+ md_set_desc *sd;
+ md_mnnode_desc *master_nodelist, *nd, *nd2, *ndtail;
+ md_mnset_record *mnsr;
+ md_mnsr_node_t *master_mnsr_node = NULL, *mnsr_node = NULL;
+ md_mnnode_record *nr;
+ md_drive_record *dr;
+ int dr_cnt, dd_cnt;
+ int found_my_nr;
+ md_drive_desc *dd, *dd_prev, *master_dd, *other_dd;
+ int all_drives_ok;
+ int rval = 0;
+ int max_genid = 0;
+ int num_alive_nodes, num_alive_nodes_del = 0;
+ int set_locked = 0;
+ md_setkey_t *cl_sk;
+ md_error_t xep = mdnullerror;
+ char *anode[1];
+ mddb_setflags_config_t sf;
+
+ /*
+ * Sync up node records first.
+ * Construct a master nodelist using the nodelist from this
+ * node's rpc.metad node records and then setting the state of each
+ * node following these rules:
+ * - If a node record is marked OK on its node, mark it OK
+ * in the master nodelist (and later OK on all nodes)
+ * If a node record is also marked OWN on its node,
+ * mark it OWN in the master nodelist.
+ * - If a node record is not marked OK on its node, then mark
+ * it as DEL in the master list (later deleting it)
+ * - If node record doesn't exist on that node, then mark it DEL
+ * (later deleting it)
+ * - If set record doesn't exist on that node, mark node as DEL
+ * - If a node record doesn't exist on all nodes, then mark it DEL
+ * - If a node is not ALIVE, then
+ * - If that node marked DEL on any node - mark it DEL
+ * in master list but leave in nodelist
+ * - If that node is marked as ADD on any node, mark it
+ * ADD in the master list but leave in nodelist
+ * - When that node returns to the living, the DEL
+ * node record will be removed and the ADD node
+ * record may be removed if marked ADD on that
+ * node.
+ * The key rule is to not remove a node from the nodelist until
+ * that node record is removed from its own node. Do not want to
+ * remove a node's record from all other nodes and then have
+ * that node have its own record marked OK so that a node will pick
+ * a different master than the other nodes.
+ *
+ * Next,
+ * If node is ALIVE and node record is marked DEL in master nodelist,
+ * remove node from set.
+ * If node is ALIVE and node record is marked OK in master nodelist,
+ * mark it OK on all other nodes.
+ * If node is not ALIVE and node record is marked DEL in master
+ * nodelist, mark it DEL on all other nodes.
+ * If node is not ALIVE and node record is marked ADD in master,
+ * nodelist, mark it ADD on all other nodes.
+ */
+ if ((sd = metaget_setdesc(sp, ep)) == NULL) {
+ return (-1);
+ }
+ master_nodelist = sd->sd_nodelist;
+
+ /*
+ * Walk through nodelist creating a master nodelist.
+ */
+ num_alive_nodes = 0;
+ nd = master_nodelist;
+ while (nd) {
+ if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+ nd = nd->nd_next;
+ continue;
+ }
+ num_alive_nodes++;
+ if (clnt_mngetset(nd->nd_nodename, sp->setname,
+ MD_SET_BAD, &mnsr, ep) == -1) {
+ if (mdiserror(ep, MDE_NO_SET)) {
+ /* set doesn't exist, mark node as DEL */
+ nd->nd_flags &= ~MD_MN_NODE_OK;
+ nd->nd_flags &= ~MD_MN_NODE_ADD;
+ nd->nd_flags |= MD_MN_NODE_DEL;
+ nd->nd_flags |= MD_MN_NODE_NOSET;
+ nd = nd->nd_next;
+ continue;
+ } else {
+ /* If RPC failure to another node return 205 */
+ if ((mdanyrpcerror(ep)) &&
+ (sd->sd_mn_mynode->nd_nodeid !=
+ nd->nd_nodeid)) {
+ rval = 205;
+ } else {
+ /* Any other failure */
+ rval = -1;
+ }
+ goto out;
+ }
+ }
+ /* Find biggest genid in records for this diskset */
+ if (mnsr->sr_genid > max_genid)
+ max_genid = mnsr->sr_genid;
+
+ dr = mnsr->sr_drivechain;
+ while (dr) {
+ /* Find biggest genid in records for this diskset */
+ if (dr->dr_genid > max_genid) {
+ max_genid = dr->dr_genid;
+ }
+ dr = dr->dr_next;
+ }
+
+ found_my_nr = 0;
+ nr = mnsr->sr_nodechain;
+ /* nr is the list of node recs from nd_nodename node */
+ while (nr) {
+ /* Find biggest genid in records for this diskset */
+ if (nr->nr_genid > max_genid)
+ max_genid = nr->nr_genid;
+ nd2 = master_nodelist;
+ ndtail = NULL;
+ /* For each node record, is it in master list? */
+ while (nd2) {
+ if (nd2->nd_nodeid == nr->nr_nodeid)
+ break;
+ if (nd2->nd_next == NULL)
+ ndtail = nd2;
+ nd2 = nd2->nd_next;
+ }
+ /*
+ * Found node record not in master list -- add it
+ * to list marking it as DEL since node record
+ * should exist on all nodes unless a panic occurred
+ * during addition or deletion of host to diskset.
+ */
+ if (nd2 == NULL) {
+ nd2 = Zalloc(sizeof (*nd2));
+ (void) strcpy(nd2->nd_nodename,
+ nr->nr_nodename);
+ nd2->nd_flags = nr->nr_flags;
+ nd2->nd_flags |= MD_MN_NODE_DEL;
+ nd2->nd_nodeid = nr->nr_nodeid;
+ nd2->nd_next = NULL;
+ ndtail->nd_next = nd2;
+ nd2 = NULL;
+ nr = nr->nr_next;
+ continue;
+ }
+ /*
+ * Is this the node record for the node that
+ * we requested the set desc from?
+ * If so, check if node has its own node record
+ * marked OK. If marked OK, check for the OWN bit.
+ */
+ if (nr->nr_nodeid == nd->nd_nodeid) {
+ found_my_nr = 1;
+ if (nr->nr_flags & MD_MN_NODE_OK) {
+ /*
+ * If node record is marked OK
+ * on its own node, then mark it OK
+ * in the master list. Node record
+ * would have to exist on all nodes
+ * in the ADD state before it could
+ * be put into the OK state.
+ */
+ nd->nd_flags |= MD_MN_NODE_OK;
+ nd->nd_flags &=
+ ~(MD_MN_NODE_ADD | MD_MN_NODE_DEL);
+ /*
+ * Mark own in master list as marked
+ * on own node.
+ */
+ if (nr->nr_flags & MD_MN_NODE_OWN)
+ nd->nd_flags |= MD_MN_NODE_OWN;
+ else
+ nd->nd_flags &= ~MD_MN_NODE_OWN;
+ } else {
+ /* Otherwise, mark node as DEL */
+ nd->nd_flags &= ~MD_MN_NODE_OK;
+ nd->nd_flags &= ~MD_MN_NODE_ADD;
+ nd->nd_flags |= MD_MN_NODE_DEL;
+ }
+ }
+ /*
+ * If node is not ALIVE and marked DEL
+ * on any node, make it DEL in master list.
+ * If node is not ALIVE and marked ADD
+ * on any node, make it ADD in master list
+ * unless node record has already been marked DEL.
+ */
+ if (!(nr->nr_flags & MD_MN_NODE_ALIVE)) {
+ if (nr->nr_flags & MD_MN_NODE_ADD) {
+ if (!(nd->nd_flags & MD_MN_NODE_DEL)) {
+ /* If not DEL - mark it ADD */
+ nd->nd_flags |= MD_MN_NODE_ADD;
+ nd->nd_flags &= ~MD_MN_NODE_OK;
+ }
+ }
+ if (nr->nr_flags & MD_MN_NODE_DEL) {
+ nd->nd_flags |= MD_MN_NODE_DEL;
+ nd->nd_flags &= ~MD_MN_NODE_OK;
+ /* Could already be ADD - make it DEL */
+ nd->nd_flags &= ~MD_MN_NODE_ADD;
+ }
+ }
+ nr = nr->nr_next;
+ }
+ /*
+ * If a node record doesn't exist on its own node,
+ * then mark node as DEL.
+ */
+ if (found_my_nr == 0) {
+ nd->nd_flags &= ~MD_MN_NODE_OK;
+ nd->nd_flags |= MD_MN_NODE_DEL;
+ }
+
+ /*
+ * If node is OK - put mnsr onto master_mnsr_node list for
+ * later use when syncing up the drive records in the set.
+ */
+ if (nd->nd_flags & MD_MN_NODE_OK) {
+ mnsr_node = Zalloc(sizeof (*mnsr_node));
+ mnsr_node->mmn_mnsr = mnsr;
+ (void) strncpy(mnsr_node->mmn_nodename,
+ nd->nd_nodename, MD_MAX_MNNODENAME_PLUS_1);
+ mnsr_node->mmn_next = master_mnsr_node;
+ master_mnsr_node = mnsr_node;
+ } else {
+ free_sr((struct md_set_record *)mnsr);
+ }
+
+ nd = nd->nd_next;
+ }
+
+ meta_mc_log(MC_LOG5, dgettext(TEXT_DOMAIN,
+ "Master nodelist created for set %s: %s"),
+ sp->setname, meta_print_hrtime(gethrtime() - start_time));
+
+ /*
+ * Send master nodelist to the rpc.metad on all nodes (including
+ * myself) and each node will update itself. This will set the
+ * ADD and DEL flags on each node as setup in the master nodelist.
+ * Don't send nodelist to node where set doesn't exist.
+ */
+ nd = master_nodelist;
+ while (nd) {
+ if (!(nd->nd_flags & MD_MN_NODE_ALIVE) ||
+ (nd->nd_flags & MD_MN_NODE_NOSET)) {
+ nd = nd->nd_next;
+ continue;
+ }
+ if (clnt_upd_nr_flags(nd->nd_nodename, sp,
+ master_nodelist, MD_NR_SET, MNSET_IN_RECONFIG, ep)) {
+ /* If RPC failure to another node return 205 */
+ if ((mdanyrpcerror(ep)) &&
+ (sd->sd_mn_mynode->nd_nodeid !=
+ nd->nd_nodeid)) {
+ rval = 205;
+ } else {
+ /* Any other failure */
+ rval = -1;
+ }
+ goto out;
+ }
+ nd = nd->nd_next;
+ }
+
+ /*
+ * Now, delete nodes that need to be deleted.
+ */
+ if ((dd = metaget_drivedesc(sp, (MD_BASICNAME_OK | PRINT_FAST),
+ ep)) == NULL) {
+ if (! mdisok(ep)) {
+ rval = -1;
+ goto out;
+ }
+ }
+
+ /*
+ * May be doing lots of RPC commands to the nodes, so lock the
+ * ALIVE members of the set since most of the rpc.metad routines
+ * require this for security reasons.
+ */
+ nd = master_nodelist;
+ while (nd) {
+ /* Skip non-alive nodes and node without set */
+ if (!(nd->nd_flags & MD_MN_NODE_ALIVE) ||
+ (nd->nd_flags & MD_MN_NODE_NOSET)) {
+ nd = nd->nd_next;
+ continue;
+ }
+ if (clnt_lock_set(nd->nd_nodename, sp, ep)) {
+ /* If RPC failure to another node return 205 */
+ if ((mdanyrpcerror(ep)) &&
+ (sd->sd_mn_mynode->nd_nodeid !=
+ nd->nd_nodeid)) {
+ rval = 205;
+ } else {
+ /* Any other failure */
+ rval = -1;
+ }
+ goto out;
+ }
+ set_locked = 1;
+ nd = nd->nd_next;
+ }
+
+ nd = master_nodelist;
+ while (nd) {
+ /* Skip non-alive nodes */
+ if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+ nd = nd->nd_next;
+ continue;
+ }
+ if (nd->nd_flags & MD_MN_NODE_DEL) {
+ num_alive_nodes_del++;
+ /*
+ * Delete this node rec from all ALIVE nodes in diskset.
+ */
+ nd2 = master_nodelist;
+ while (nd2) {
+ /* Skip non-alive nodes and node without set */
+ if (!(nd2->nd_flags & MD_MN_NODE_ALIVE) ||
+ (nd2->nd_flags & MD_MN_NODE_NOSET)) {
+ nd2 = nd2->nd_next;
+ continue;
+ }
+
+ /* This is a node being deleted from set */
+ if (nd2->nd_nodeid == nd->nd_nodeid) {
+ /* Mark set record as DEL */
+ if (clnt_upd_sr_flags(nd->nd_nodename,
+ sp, MD_SR_DEL, ep)) {
+ /* RPC failure to !my node */
+ if ((mdanyrpcerror(ep)) &&
+ (sd->sd_mn_mynode->
+ nd_nodeid
+ != nd->nd_nodeid)) {
+ rval = 205;
+ } else {
+ /* Any other failure */
+ rval = -1;
+ }
+ goto out;
+ }
+ if (clnt_deldrvs(nd->nd_nodename, sp,
+ dd, ep)) {
+ /* RPC failure to !my node */
+ if ((mdanyrpcerror(ep)) &&
+ (sd->sd_mn_mynode->
+ nd_nodeid
+ != nd->nd_nodeid)) {
+ rval = 205;
+ } else {
+ /* Any other failure */
+ rval = -1;
+ }
+ goto out;
+ }
+ if (clnt_delset(nd->nd_nodename, sp,
+ ep) == -1) {
+ /* RPC failure to !my node */
+ if ((mdanyrpcerror(ep)) &&
+ (sd->sd_mn_mynode->
+ nd_nodeid
+ != nd->nd_nodeid)) {
+ rval = 205;
+ } else {
+ /* Any other failure */
+ rval = -1;
+ }
+ goto out;
+ }
+ } else {
+ /*
+ * Delete host from sets on hosts
+ * not being deleted.
+ */
+ anode[0] = Strdup(nd->nd_nodename);
+ if (clnt_delhosts(nd2->nd_nodename, sp,
+ 1, anode, ep) == -1) {
+ Free(anode[0]);
+ /* RPC failure to !my node */
+ if ((mdanyrpcerror(ep)) &&
+ (sd->sd_mn_mynode->
+ nd_nodeid
+ != nd2->nd_nodeid)) {
+ rval = 205;
+ } else {
+ /* Any other failure */
+ rval = -1;
+ }
+ goto out;
+ }
+
+ meta_mc_log(MC_LOG5,
+ dgettext(TEXT_DOMAIN,
+ "Deleted node %s (%d) on node %s "
+ "from set %s: %s"),
+ nd->nd_nodename, nd->nd_nodeid,
+ nd2->nd_nodename,
+ sp->setname,
+ meta_print_hrtime(
+ gethrtime() - start_time));
+
+ Free(anode[0]);
+ }
+ nd2 = nd2->nd_next;
+ }
+ }
+ nd = nd->nd_next;
+ }
+
+ nd = master_nodelist;
+ cl_sk = cl_get_setkey(sp->setno, sp->setname);
+ while (nd) {
+ /* Skip non-alive nodes and node without set */
+ if (!(nd->nd_flags & MD_MN_NODE_ALIVE) ||
+ (nd->nd_flags & MD_MN_NODE_NOSET)) {
+ nd = nd->nd_next;
+ continue;
+ }
+ if (clnt_unlock_set(nd->nd_nodename, cl_sk, ep)) {
+ /* If RPC failure to another node return 205 */
+ if ((mdanyrpcerror(ep)) &&
+ (sd->sd_mn_mynode->nd_nodeid !=
+ nd->nd_nodeid)) {
+ rval = 205;
+ } else {
+ /* Any other failure */
+ rval = -1;
+ }
+ goto out;
+ }
+ nd = nd->nd_next;
+ }
+ cl_set_setkey(NULL);
+ set_locked = 0;
+
+ meta_mc_log(MC_LOG5, dgettext(TEXT_DOMAIN,
+ "Nodelist syncronization complete for set %s: %s"),
+ sp->setname, meta_print_hrtime(gethrtime() - start_time));
+
+ metaflushsetname(sp);
+
+ /*
+ * If all alive nodes have been deleted from set, just
+ * return since nothing else can be done until non-alive
+ * nodes (if there are any) rejoin the cluster.
+ */
+ if (num_alive_nodes == num_alive_nodes_del) {
+ rval = 0;
+ goto out;
+ }
+
+ /*
+ * Sync up drive records.
+ *
+ * If a node panic'd (or metaset command was killed) during the
+ * addition or deletion of a drive to the diskset, the nodes
+ * may have a different view of the drive list. During cleanup
+ * of the drive list during reconfig, a drive will be deleted
+ * from the list if the master node sees that the drive has been
+ * marked in the ADD state on any node or is marked in the DEL state
+ * on all nodes.
+ * This cleanup must occur even if all nodes in the cluster are
+ * not part of the cluster so that all nodes have the same view
+ * of the drivelist.
+ * Then if the entire cluster goes down and comes back up, the
+ * new master node could be a node that wasn't in the cluster when
+ * the node was deleted. This could lead to a situation where the
+ * master node thinks that a drive is OK, but this drive isn't
+ * known to the other nodes.
+ * This situation can also occur during the addition of a drive
+ * where a node has the drive marked OK, but the node executing the
+ * metaset command enountered a failure before marking that drive OK
+ * on the rest of the nodes. If the node with the OK drive then
+ * panics, then rest of the nodes will remove that drive marked ADD
+ * and when the node with the OK drive rejoins the cluster, it will
+ * have a drive marked OK that is unknown by the other nodes.
+ *
+ * There are 2 situations to consider:
+ * A) Master knows about a drive that other nodes don't know about.
+ * B) At least one slave node knows about a drive that the master
+ * node doesn't know about.
+ *
+ * To handle these situations the following steps are followed:
+ * 1) Count number of drives known by this master node and the
+ * other slave nodes.
+ * If all nodes have the same number of drives and the master has
+ * all drives marked OK, then skip to step4.
+ *
+ * 2) If a node has less drives listed than the master, the master
+ * must get the drive descriptor list from that node so that
+ * master can determine which drive it needs to delete from that
+ * node. Master must get the drive descriptor list since the
+ * drive record list does not contain the name of the drive, but
+ * only a key and the key can only be interprested on that other
+ * node.
+ *
+ * 3) The master will then create the master drive list by doing:
+ * - Master starts with drive list known by master.
+ * - Any drive marked ADD will be removed from the list.
+ * - Any drive not known by another node (from step2) will be
+ * removed from the drive list.
+ * - If a drive is marked DEL on the master, the master must
+ * verify that the drive record is marked DEL on all nodes.
+ * If any node has the drive record marked OK, mark it OK
+ * on the master. (The reason why is described below).
+ *
+ * 4) The master sends out the master drive list and the slave
+ * nodes will force their drive lists to match the master
+ * drive list by deleting drives, if necessary and by changing
+ * the drive record states from ADD->OK if master has drive
+ * marked OK and slave has drive marked ADD.
+ *
+ * Interesting scenarios:
+ *
+ * 1) System has 4 nodes with node 1 as the master. Node 3 starts
+ * to delete a drive record (drive record on node 1 is marked DEL),
+ * but is stopped when node 3 panics. Node 1 also panics.
+ * During reconfig cycle, node 2 is picked as master and the drive
+ * record is left alone since all nodes in the cluster have it
+ * marked OK. User now sees drive as part of diskset.
+ * Now, entire cluster is rebooted and node 1 rejoins the cluster.
+ * Node 1 is picked as the master and node 1 has drive record
+ * marked DEL. Node 1 contacts all other nodes in the cluster
+ * and since at least one node has the drive record marked OK,
+ * the master marks the drive record OK.
+ * User continues to see the drive as part of the diskset.
+ */
+
+ /* Reget set descriptor since flushed above */
+ if ((sd = metaget_setdesc(sp, ep)) == NULL) {
+ rval = -1;
+ goto out;
+ }
+
+ /* Has side effect of setting sd->sd_drvs to same as master_dd */
+ if ((master_dd = metaget_drivedesc_sideno(sp,
+ sd->sd_mn_mynode->nd_nodeid,
+ (MD_BASICNAME_OK | PRINT_FAST), ep)) == NULL) {
+ /* No drives in list */
+ if (!mdisok(ep)) {
+ /*
+ * Can't get drive list for this node, so
+ * return -1 causing this node to be removed
+ * cluster config and fixed.
+ */
+ rval = -1;
+ goto out;
+ }
+ }
+
+ /* Count the number of drives for all nodes */
+ mnsr_node = master_mnsr_node;
+ while (mnsr_node) {
+ dr_cnt = 0;
+ dr = mnsr_node->mmn_mnsr->sr_drivechain;
+ while (dr) {
+ dr_cnt++;
+ dr = dr->dr_next;
+ }
+ mnsr_node->mmn_numdrives = dr_cnt;
+ mnsr_node = mnsr_node->mmn_next;
+ }
+
+ /* Count the number of drives for the master; also check flags */
+ all_drives_ok = 1;
+ dd_cnt = 0;
+ dd = master_dd;
+ while (dd) {
+ dd_cnt++;
+ if (!(dd->dd_flags & MD_DR_OK))
+ all_drives_ok = 0;
+ dd = dd->dd_next;
+ }
+
+ /* If all drives are ok, do quick check against number of drives */
+ if (all_drives_ok) {
+ /* If all nodes have same number of drives, almost done */
+ mnsr_node = master_mnsr_node;
+ while (mnsr_node) {
+ if (mnsr_node->mmn_numdrives != dd_cnt)
+ break;
+ mnsr_node = mnsr_node->mmn_next;
+ }
+ /* All nodes have same number of drives, just send flags */
+ if (mnsr_node == NULL) {
+ goto send_drive_list;
+ }
+ }
+
+ meta_mc_log(MC_LOG5, dgettext(TEXT_DOMAIN,
+ "Begin detailed drive synchronization for set %s: %s"),
+ sp->setname, meta_print_hrtime(gethrtime() - start_time));
+
+ /* Detailed check required */
+ mnsr_node = master_mnsr_node;
+ while (mnsr_node) {
+ /* Does slave node have less drives than master? */
+ if (mnsr_node->mmn_numdrives < dd_cnt) {
+ /* Yes - must determine which drive is missing */
+ if (clnt_getdrivedesc(mnsr_node->mmn_nodename, sp,
+ &other_dd, ep)) {
+ /* RPC failure to !my node */
+ if ((mdanyrpcerror(ep)) &&
+ (strcmp(mynode(), mnsr_node->mmn_nodename)
+ != 0)) {
+ rval = 205;
+ } else {
+ /* Any other failure */
+ rval = -1;
+ }
+ mde_perror(ep, dgettext(TEXT_DOMAIN,
+ "Master node %s unable to "
+ "retrieve drive list from node %s"),
+ mynode(), mnsr_node->mmn_nodename);
+ goto out;
+ }
+ mnsr_node->mmn_dd = other_dd;
+ dd = master_dd;
+ while (dd) {
+ if (!(dd->dd_flags & MD_DR_OK)) {
+ dd = dd->dd_next;
+ continue;
+ }
+ other_dd = mnsr_node->mmn_dd;
+ while (other_dd) {
+ /* Convert to devids, when available */
+ if (strcmp(other_dd->dd_dnp->cname,
+ dd->dd_dnp->cname) == 0) {
+ break;
+ }
+ other_dd = other_dd->dd_next;
+ }
+ /*
+ * dd not found on slave so mark it
+ * ADD for later deletion (drives in ADD
+ * state are deleted later in this routine).
+ */
+ if (other_dd == NULL) {
+ dd->dd_flags = MD_DR_ADD;
+ }
+ dd = dd->dd_next;
+ }
+
+ }
+ mnsr_node = mnsr_node->mmn_next;
+ }
+
+ meta_mc_log(MC_LOG5, dgettext(TEXT_DOMAIN,
+ "Drive check completed for set %s: %s"),
+ sp->setname, meta_print_hrtime(gethrtime() - start_time));
+
+ dd = master_dd;
+ dd_prev = 0;
+ while (dd) {
+ /* Remove any ADD drives from list */
+ if (dd->dd_flags & MD_DR_ADD) {
+ if (dd_prev) {
+ dd_prev->dd_next = dd->dd_next;
+ dd->dd_next = NULL;
+ metafreedrivedesc(&dd);
+ dd = dd_prev->dd_next;
+ } else {
+ /*
+ * If removing drive descriptor from head
+ * of linked list, also change sd->sd_drvs.
+ */
+ master_dd = sd->sd_drvs = dd->dd_next;
+ dd->dd_next = NULL;
+ metafreedrivedesc(&dd);
+ dd = master_dd;
+ }
+ /* dd setup in if/else above */
+ continue;
+ }
+ /*
+ * If drive is marked DEL, check all other nodes.
+ * If drive on another node is marked OK, mark drive OK
+ * in master list. If drive is marked DEL or doesn't exist
+ * on all nodes, remove drive from list.
+ */
+ if (dd->dd_flags & MD_DR_DEL) {
+ mnsr_node = master_mnsr_node;
+ while (mnsr_node) {
+ if (mnsr_node->mmn_dd == NULL) {
+ if (clnt_getdrivedesc(
+ mnsr_node->mmn_nodename, sp,
+ &other_dd, ep)) {
+ /* RPC failure to !my node */
+ if ((mdanyrpcerror(ep)) &&
+ (strcmp(mynode(),
+ mnsr_node->mmn_nodename)
+ != 0)) {
+ rval = 205;
+ } else {
+ /* Any other failure */
+ rval = -1;
+ }
+ mde_perror(ep, dgettext(TEXT_DOMAIN,
+ "Master node %s unable "
+ "to retrieve drive list from "
+ "node %s"), mynode(),
+ mnsr_node->mmn_nodename);
+ goto out;
+ }
+ mnsr_node->mmn_dd = other_dd;
+ }
+ other_dd = mnsr_node->mmn_dd;
+ while (other_dd) {
+ /* Found drive (OK) from other node */
+ if (strcmp(dd->dd_dnp->cname,
+ other_dd->dd_dnp->cname)
+ == 0) {
+ /* Drive marked OK */
+ if (other_dd->dd_flags &
+ MD_DR_OK) {
+ dd->dd_flags = MD_DR_OK;
+ }
+ break;
+ }
+ other_dd = other_dd->dd_next;
+ }
+ if (dd->dd_flags == MD_DR_OK)
+ break;
+
+ mnsr_node = mnsr_node->mmn_next;
+ }
+ /*
+ * If no node had this drive marked OK, delete it.
+ */
+ if (dd->dd_flags & MD_DR_DEL) {
+ if (dd_prev) {
+ dd_prev->dd_next = dd->dd_next;
+ dd->dd_next = NULL;
+ metafreedrivedesc(&dd);
+ dd = dd_prev->dd_next;
+ } else {
+ /*
+ * If removing drive descriptor from
+ * head of linked list, also change
+ * sd->sd_drvs.
+ */
+ master_dd = sd->sd_drvs = dd->dd_next;
+ dd->dd_next = NULL;
+ metafreedrivedesc(&dd);
+ dd = master_dd;
+ }
+ /* dd setup in if/else above */
+ continue;
+ }
+ }
+ dd_prev = dd;
+ dd = dd->dd_next;
+ }
+
+ meta_mc_log(MC_LOG5, dgettext(TEXT_DOMAIN,
+ "Setting drive states completed for set %s: %s"),
+ sp->setname, meta_print_hrtime(gethrtime() - start_time));
+
+send_drive_list:
+ /*
+ * Set genid on all drives to be the highest value seen.
+ */
+ dd = master_dd;
+ while (dd) {
+ dd->dd_genid = max_genid;
+ dd = dd->dd_next;
+ }
+ /*
+ * Send updated drive list to all alive nodes.
+ * Will also set genid on set and node records to have same
+ * as the drive records.
+ */
+ nd = sd->sd_nodelist;
+ while (nd) {
+ /* Skip non-alive nodes */
+ if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+ nd = nd->nd_next;
+ continue;
+ }
+ if (clnt_upd_dr_reconfig(nd->nd_nodename, sp, master_dd, ep)) {
+ /* RPC failure to another node */
+ if ((mdanyrpcerror(ep)) &&
+ (sd->sd_mn_mynode->nd_nodeid != nd->nd_nodeid)) {
+ rval = 205;
+ } else {
+ /* Any other failure */
+ rval = -1;
+ }
+ goto out;
+ }
+ nd = nd->nd_next;
+ }
+
+ meta_mc_log(MC_LOG5, dgettext(TEXT_DOMAIN,
+ "Sent drive list to all nodes for set %s: %s"),
+ sp->setname, meta_print_hrtime(gethrtime() - start_time));
+
+ /*
+ * If no drive records left in set and nodes had been joined,
+ * withdraw the nodes. Always reset the master and mark
+ * all nodes as withdrawn on all nodes.
+ */
+ if (master_dd == NULL) {
+ /* Reset new master flag since no longer master */
+ (void) memset(&sf, 0, sizeof (sf));
+ sf.sf_setno = sp->setno;
+ sf.sf_setflags = MD_SET_MN_NEWMAS_RC;
+ sf.sf_flags = MDDB_NM_RESET;
+ /* Use magic to help protect ioctl against attack. */
+ sf.sf_magic = MDDB_SETFLAGS_MAGIC;
+ /* Ignore failure, failure to reset flag isn't catastrophic */
+ (void) metaioctl(MD_MN_SET_SETFLAGS, &sf,
+ &sf.sf_mde, NULL);
+
+ meta_mc_log(MC_LOG5, dgettext(TEXT_DOMAIN,
+ "Reset new master flag for " "set %s: %s"),
+ sp->setname, meta_print_hrtime(gethrtime() - start_time));
+
+ nd = sd->sd_nodelist;
+ while (nd) {
+ /* Skip non-alive nodes */
+ if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+ nd = nd->nd_next;
+ continue;
+ }
+
+ if (clnt_lock_set(nd->nd_nodename, sp, ep)) {
+ /* RPC failure to another node */
+ if ((mdanyrpcerror(ep)) &&
+ (sd->sd_mn_mynode->nd_nodeid !=
+ nd->nd_nodeid)) {
+ rval = 205;
+ } else {
+ /* Any other failure */
+ rval = -1;
+ }
+ goto out;
+ }
+ set_locked = 1;
+
+ /* Withdraw node from set if owner */
+ if ((nd->nd_flags & MD_MN_NODE_OWN) &&
+ (clnt_withdrawset(nd->nd_nodename, sp, ep))) {
+ /* RPC failure to another node */
+ if ((mdanyrpcerror(ep)) &&
+ (sd->sd_mn_mynode->nd_nodeid !=
+ nd->nd_nodeid)) {
+ rval = 205;
+ } else {
+ /* Any other failure */
+ rval = -1;
+ }
+ goto out;
+ }
+
+ /* Mark all nodes as withdrawn on this node */
+ if (clnt_upd_nr_flags(nd->nd_nodename, sp,
+ sd->sd_nodelist, MD_NR_WITHDRAW, NULL, ep)) {
+ /* RPC failure to another node */
+ if ((mdanyrpcerror(ep)) &&
+ (sd->sd_mn_mynode->nd_nodeid !=
+ nd->nd_nodeid)) {
+ rval = 205;
+ } else {
+ /* Any other failure */
+ rval = -1;
+ }
+ goto out;
+ }
+
+ /* Resets master to no-master on this node */
+ if (clnt_mnsetmaster(nd->nd_nodename, sp,
+ "", MD_MN_INVALID_NID, ep)) {
+ /* RPC failure to another node */
+ if ((mdanyrpcerror(ep)) &&
+ (sd->sd_mn_mynode->nd_nodeid !=
+ nd->nd_nodeid)) {
+ rval = 205;
+ } else {
+ /* Any other failure */
+ rval = -1;
+ }
+ goto out;
+ }
+
+ cl_sk = cl_get_setkey(sp->setno, sp->setname);
+ if (clnt_unlock_set(nd->nd_nodename, cl_sk, ep)) {
+ /* RPC failure to another node */
+ if ((mdanyrpcerror(ep)) &&
+ (sd->sd_mn_mynode->nd_nodeid !=
+ nd->nd_nodeid)) {
+ rval = 205;
+ } else {
+ /* Any other failure */
+ rval = -1;
+ }
+ goto out;
+ }
+ set_locked = 0;
+ nd = nd->nd_next;
+ }
+ }
+
+out:
+ /*
+ * If got here and set is still locked, then an error has
+ * occurred and master_nodelist is still valid.
+ * If error is not an RPC error, then unlock.
+ * If error is an RPC error, skip unlocks since this could cause
+ * yet another RPC timeout if a node has failed.
+ * Ignore failures in unlock since unlock is just trying to
+ * clean things up.
+ */
+ if ((set_locked) && !(mdanyrpcerror(ep))) {
+ nd = master_nodelist;
+ cl_sk = cl_get_setkey(sp->setno, sp->setname);
+ while (nd) {
+ /* Skip non-alive nodes */
+ if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+ nd = nd->nd_next;
+ continue;
+ }
+ /*
+ * If clnt_unlock fails, just break out since next
+ * reconfig cycle will reset the locks anyway.
+ */
+ if (clnt_unlock_set(nd->nd_nodename, cl_sk, &xep)) {
+ break;
+ }
+ nd = nd->nd_next;
+ }
+ cl_set_setkey(NULL);
+ }
+ /* Free master_mnsr and drive descs */
+ mnsr_node = master_mnsr_node;
+ while (mnsr_node) {
+ master_mnsr_node = mnsr_node->mmn_next;
+ free_sr((md_set_record *)mnsr_node->mmn_mnsr);
+ free_rem_dd(mnsr_node->mmn_dd);
+ Free(mnsr_node);
+ mnsr_node = master_mnsr_node;
+ }
+
+ /* Frees sd->sd_drvs (which is also master_dd) */
+ metaflushsetname(sp);
+ return (rval);
+}
+
+/*
+ * meta_mnsync_diskset_mddbs
+ * Calling node is guaranteed to be an owner node.
+ * Calling node is the master node.
+ *
+ * Master node verifies that ondisk mddb format matches its incore format.
+ * If no nodes are joined to set, remove the change log entries.
+ * If a node is joined to set, play the change log.
+ *
+ * Returns 0 - Success
+ * 1 - Master unable to join to set.
+ * 205 - Failure during RPC to another node
+ * -1 - Any other failure and ep is filled in.
+ * -1 return will eventually cause node to panic
+ * in a SunCluster environment.
+ */
+int
+meta_mnsync_diskset_mddbs(
+ mdsetname_t *sp,
+ md_error_t *ep
+)
+{
+ md_set_desc *sd;
+ mddb_config_t c;
+ md_mn_msgclass_t class;
+ mddb_setflags_config_t sf;
+ md_mnnode_desc *nd, *nd2;
+ md_error_t xep = mdnullerror;
+ int stale_set = 0;
+
+ /* If setname is there, set desc should exist. */
+ if ((sd = metaget_setdesc(sp, ep)) == NULL) {
+ mde_perror(ep, dgettext(TEXT_DOMAIN,
+ "Unable to get set %s desc information"), sp->setname);
+ return (-1);
+ }
+
+ /* Are there drives in the set? */
+ if (metaget_drivedesc(sp, (MD_BASICNAME_OK | PRINT_FAST),
+ ep) == NULL) {
+ if (! mdisok(ep)) {
+ return (-1);
+ }
+ /* No drives in set -- nothing to sync up */
+ return (0);
+ }
+
+ /*
+ * Is master node (which is this node) joined to set?
+ * If master node isn't joined (which means that no nodes
+ * are joined to diskset), remove the change log entries
+ * since no need to replay them - all nodes will have same
+ * view of mddbs since all nodes are reading in the mddbs
+ * from disk.
+ * There is also no need to sync up the master and ondisk mddbs
+ * since master has no incore knowledge.
+ * Need to join master to set in order to flush the change
+ * log entries. Don't need to block I/O during join of master
+ * to set since no other nodes are joined to set and so no I/O
+ * can be occurring.
+ */
+ if (!(sd->sd_mn_mynode->nd_flags & MD_MN_NODE_OWN)) {
+ /* Join master to set */
+ if (clnt_joinset(mynode(), sp,
+ MNSET_IN_RECONFIG, ep)) {
+ if (mdismddberror(ep, MDE_DB_STALE)) {
+ /*
+ * If STALE, print message and continue on.
+ * Don't do any writes or reads to mddbs
+ * so don't clear change log.
+ */
+ mde_perror(ep, dgettext(TEXT_DOMAIN,
+ "Join of master node to STALE set %s"),
+ sp->setname);
+ stale_set = 1;
+ mdclrerror(ep);
+ } else if (mdismddberror(ep, MDE_DB_ACCOK)) {
+ /* ACCOK means mediator provided extra vote */
+ mdclrerror(ep);
+ } else {
+ /*
+ * If master is unable to join set, print an
+ * error message. Don't return failure or node
+ * will panic during cluster reconfig cycle.
+ * Also, withdraw node from set in order to
+ * cleanup from failed join attempt.
+ */
+ mde_perror(ep, dgettext(TEXT_DOMAIN,
+ "Join of master node in set %s failed"),
+ sp->setname);
+ if (clnt_withdrawset(mynode(), sp, &xep))
+ mdclrerror(&xep);
+ return (1);
+ }
+ }
+ /*
+ * Master node successfully joined.
+ * Set local copy of flags to OWN and
+ * send owner flag to rpc.metad. If not stale,
+ * flush the change log.
+ */
+ sd->sd_mn_mynode->nd_flags |= MD_MN_NODE_OWN;
+ if (clnt_upd_nr_flags(mynode(), sp, sd->sd_nodelist, MD_NR_SET,
+ MNSET_IN_RECONFIG, ep)) {
+ mde_perror(ep, dgettext(TEXT_DOMAIN,
+ "Flag update of master node join in set %s failed"),
+ sp->setname);
+ return (-1);
+ }
+
+ if (!stale_set) {
+ if (mdmn_reset_changelog(sp, ep,
+ MDMN_CLF_RESETLOG) != 0) {
+ mde_perror(ep, dgettext(TEXT_DOMAIN,
+ "Unable to reset changelog."));
+ return (-1);
+ }
+ meta_mc_log(MC_LOG5, dgettext(TEXT_DOMAIN,
+ "Removed changelog entries for set %s: %s"),
+ sp->setname,
+ meta_print_hrtime(gethrtime() - start_time));
+ }
+ /* Reset new master flag before return */
+ (void) memset(&sf, 0, sizeof (sf));
+ sf.sf_setno = sp->setno;
+ sf.sf_setflags = MD_SET_MN_NEWMAS_RC;
+ sf.sf_flags = MDDB_NM_RESET;
+ /* Use magic to help protect ioctl against attack. */
+ sf.sf_magic = MDDB_SETFLAGS_MAGIC;
+ /* Ignore failure, failure to reset flag isn't catastrophic */
+ (void) metaioctl(MD_MN_SET_SETFLAGS, &sf,
+ &sf.sf_mde, NULL);
+
+ meta_mc_log(MC_LOG5, dgettext(TEXT_DOMAIN,
+ "Reset new master flag for set %s: %s"),
+ sp->setname, meta_print_hrtime(gethrtime() - start_time));
+
+ return (0);
+ }
+
+ /*
+ * Is master already joined to STALE set (< 50% mddbs avail)?
+ * If so, can make no config changes to mddbs so don't check or play
+ * changelog and don't sync master node to ondisk mddbs.
+ * To get out of the stale state all nodes must be withdrawn
+ * from set. Then as nodes are re-joined, all nodes will
+ * have same view of mddbs since all nodes are reading the
+ * mddbs from disk.
+ */
+ (void) memset(&c, 0, sizeof (c));
+ c.c_id = 0;
+ c.c_setno = sp->setno;
+ if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0) {
+ (void) mdstealerror(ep, &c.c_mde);
+ return (-1);
+ }
+ if (c.c_flags & MDDB_C_STALE) {
+ return (0);
+ }
+
+ /*
+ * If this node is NOT a newly chosen master, then there's
+ * nothing else to do since the change log should be empty and
+ * the ondisk and incore mddbs are already consistent.
+ *
+ * A newly chosen master is a node that was not the master
+ * at the beginning of the reconfig cycle. If a node is a new
+ * master, then the new master state is reset after the ondisk
+ * and incore mddbs are consistent and the change log has
+ * been replayed.
+ */
+ (void) memset(&sf, 0, sizeof (sf));
+ sf.sf_setno = sp->setno;
+ sf.sf_flags = MDDB_NM_GET;
+ /* Use magic to help protect ioctl against attack. */
+ sf.sf_magic = MDDB_SETFLAGS_MAGIC;
+ if ((metaioctl(MD_MN_GET_SETFLAGS, &sf, &sf.sf_mde, NULL) == 0) &&
+ ((sf.sf_setflags & MD_SET_MN_NEWMAS_RC) == 0)) {
+ return (0);
+ }
+
+ /*
+ * Now, sync up incore master view to ondisk mddbs.
+ * This is needed in the case where a master node
+ * had made a change to the mddb, but this change
+ * may not have been relayed to the slaves yet.
+ * So, the new master needs to verify that the ondisk
+ * mddbs match what the new master has incore -
+ * if different, new master rewrites all of the mddbs.
+ * Then the new master will replay the changelog and the
+ * new master will then execute what the old master had
+ * done.
+ *
+ * Block all I/Os to disks in this diskset on all nodes in
+ * the diskset. This will allow the rewriting of the mddbs
+ * (if needed), to proceed in a timely manner.
+ *
+ * If block of I/Os fail, return a -1.
+ */
+
+ nd = sd->sd_nodelist;
+ while (nd) {
+ /* Skip non-alive and non-owner nodes */
+ if ((!(nd->nd_flags & MD_MN_NODE_ALIVE)) ||
+ (!(nd->nd_flags & MD_MN_NODE_OWN))) {
+ nd = nd->nd_next;
+ continue;
+ }
+ if (clnt_mn_susp_res_io(nd->nd_nodename, sp->setno,
+ MN_SUSP_IO, ep)) {
+ mde_perror(ep, dgettext(TEXT_DOMAIN,
+ "Unable to suspend I/O on node %s in set %s"),
+ nd->nd_nodename, sp->setname);
+
+ /*
+ * Resume all other nodes that had been suspended.
+ * (Reconfig return step also resumes I/Os
+ * for all sets.)
+ */
+ nd2 = sd->sd_nodelist;
+ while (nd2) {
+ /* Stop when reaching failed node */
+ if (nd2->nd_nodeid == nd->nd_nodeid)
+ break;
+ /* Skip non-alive and non-owner nodes */
+ if ((!(nd2->nd_flags & MD_MN_NODE_ALIVE)) ||
+ (!(nd2->nd_flags & MD_MN_NODE_OWN))) {
+ nd2 = nd2->nd_next;
+ continue;
+ }
+ (void) (clnt_mn_susp_res_io(nd2->nd_nodename,
+ sp->setno, MN_RES_IO, &xep));
+ nd2 = nd2->nd_next;
+ }
+
+ /*
+ * If an RPC failure on another node, return a 205.
+ * Otherwise, exit with failure.
+ */
+ if ((mdanyrpcerror(ep)) &&
+ (sd->sd_mn_mynode->nd_nodeid !=
+ nd->nd_nodeid)) {
+ return (205);
+ } else {
+ return (-1);
+ }
+
+ }
+ nd = nd->nd_next;
+ }
+
+ (void) memset(&c, 0, sizeof (c));
+ c.c_id = 0;
+ c.c_setno = sp->setno;
+ /* Master can't sync up to ondisk mddbs? Kick it out of cluster */
+ if (metaioctl(MD_MN_CHK_WRT_MDDB, &c, &c.c_mde, NULL) != 0)
+ return (-1);
+
+ /*
+ * Resume I/Os that were suspended above.
+ */
+ nd = sd->sd_nodelist;
+ while (nd) {
+ /* Skip non-alive and non-owner nodes */
+ if ((!(nd->nd_flags & MD_MN_NODE_ALIVE)) ||
+ (!(nd->nd_flags & MD_MN_NODE_OWN))) {
+ nd = nd->nd_next;
+ continue;
+ }
+ if (clnt_mn_susp_res_io(nd->nd_nodename, sp->setno,
+ MN_RES_IO, ep)) {
+ mde_perror(ep, dgettext(TEXT_DOMAIN,
+ "Unable to resume I/O on node %s in set %s"),
+ nd->nd_nodename, sp->setname);
+
+ /*
+ * If an RPC failure then don't do any
+ * more RPC calls, since one timeout is enough
+ * to endure. If RPC failure to another node, return
+ * 205. If RPC failure to my node, return -1.
+ * If not an RPC failure, continue resuming the
+ * rest of the nodes and then return -1.
+ */
+ if (mdanyrpcerror(ep)) {
+ if (sd->sd_mn_mynode->nd_nodeid ==
+ nd->nd_nodeid) {
+ return (-1);
+ } else {
+ return (205);
+ }
+ }
+
+ /*
+ * If not an RPC error, continue resuming rest of
+ * nodes, ignoring any failures except for an
+ * RPC failure which constitutes an immediate exit.
+ * Start in middle of list with failing node.
+ */
+ nd2 = nd->nd_next;
+ while (nd2) {
+ /* Skip non-alive and non-owner nodes */
+ if ((!(nd2->nd_flags & MD_MN_NODE_ALIVE)) ||
+ (!(nd2->nd_flags & MD_MN_NODE_OWN))) {
+ nd2 = nd2->nd_next;
+ continue;
+ }
+ (void) (clnt_mn_susp_res_io(nd2->nd_nodename,
+ sp->setno, MN_RES_IO, &xep));
+ if (mdanyrpcerror(&xep)) {
+ return (-1);
+ }
+ nd2 = nd2->nd_next;
+ }
+ }
+ nd = nd->nd_next;
+ }
+
+ meta_mc_log(MC_LOG5, dgettext(TEXT_DOMAIN, "Master node has completed "
+ "checking/writing the mddb for set %s: %s"), sp->setname,
+ meta_print_hrtime(gethrtime() - start_time));
+
+ /*
+ * Send (aka replay) all messages we find in the changelog.
+ * Flag the messages with
+ * MD_MSGF_REPLAY_MSG, so no new message ID is generated for them
+ * MD_MSGF_OVERRIDE_SUSPEND so they can pass the suspended commd.
+ */
+ for (class = MD_MN_NCLASSES - 1; class > 0; class--) {
+ mdmn_changelog_record_t *lr;
+ md_error_t xep = mdnullerror;
+ md_mn_result_t *resultp = NULL;
+ int ret;
+
+ lr = mdmn_get_changelogrec(sp->setno, class);
+ if ((lr->lr_flags & MD_MN_LR_INUSE) == 0) {
+ /* no entry for this class */
+ continue;
+ }
+
+ meta_mc_log(MC_LOG1, dgettext(TEXT_DOMAIN,
+ "replaying message ID=(%d, 0x%llx-%d)\n"),
+ MSGID_ELEMS(lr->lr_msg.msg_msgid));
+
+ ret = mdmn_send_message_with_msgid(
+ lr->lr_msg.msg_setno,
+ lr->lr_msg.msg_type,
+ lr->lr_msg.msg_flags | MD_MSGF_REPLAY_MSG |
+ MD_MSGF_OVERRIDE_SUSPEND,
+ lr->lr_msg.msg_event_data,
+ lr->lr_msg.msg_event_size,
+ &resultp,
+ &lr->lr_msg.msg_msgid,
+ &xep);
+
+ meta_mc_log(MC_LOG1, dgettext(TEXT_DOMAIN,
+ "mdmn_send_message returned %d\n"), ret);
+
+ if (resultp)
+ free_result(resultp);
+ }
+
+ meta_mc_log(MC_LOG5, dgettext(TEXT_DOMAIN,
+ "Playing changelog completed for set %s: %s"),
+ sp->setname, meta_print_hrtime(gethrtime() - start_time));
+
+ /*
+ * Now that new master has ondisk and incore mddbs in sync, reset
+ * this node's new master kernel flag (for this set). If this node
+ * re-enters another reconfig cycle before the completion of this
+ * reconfig cycle, this master node won't need to check if the ondisk
+ * and incore mddbs are in sync since this node won't be considered
+ * a new master (since this flag is being reset here in the middle of
+ * step2). This will save time during any subsequent reconfig
+ * cycles as long as this node continues to be master.
+ */
+ (void) memset(&sf, 0, sizeof (sf));
+ sf.sf_setno = sp->setno;
+ sf.sf_setflags = MD_SET_MN_NEWMAS_RC;
+ sf.sf_flags = MDDB_NM_RESET;
+ /* Use magic to help protect ioctl against attack. */
+ sf.sf_magic = MDDB_SETFLAGS_MAGIC;
+ /* Ignore failure, since failure to reset flag isn't catastrophic */
+ (void) metaioctl(MD_MN_SET_SETFLAGS, &sf, &sf.sf_mde, NULL);
+
+ meta_mc_log(MC_LOG5, dgettext(TEXT_DOMAIN,
+ "Reset new master flag for set %s: %s"),
+ sp->setname, meta_print_hrtime(gethrtime() - start_time));
+
+ return (0);
+}
+
+/*
+ * meta_mnjoin_all will join all starting nodes in the diskset.
+ * A starting node is considered to be any node that is not
+ * an owner of the set but is a member of the cluster.
+ * Master node is already joined to set (done in meta_mnsync_diskset_mddbs).
+ *
+ * Caller is the Master node.
+ *
+ * Returns 0 - Success
+ * 205 - Failure during RPC to another node
+ * -1 - Any other failure and ep is filled in.
+ */
+int
+meta_mnjoin_all(
+ mdsetname_t *sp,
+ md_error_t *ep
+)
+{
+ md_set_desc *sd;
+ md_mnnode_desc *nd, *nd2;
+ int rval = 0;
+ int stale_flag = 0;
+ mddb_config_t c;
+ int susp_res_flag = 0;
+ md_error_t xep = mdnullerror;
+
+ /* If setname is there, set desc should exist. */
+ if ((sd = metaget_setdesc(sp, ep)) == NULL) {
+ mde_perror(ep, dgettext(TEXT_DOMAIN,
+ "Unable to get set %s desc information"), sp->setname);
+ return (-1);
+ }
+
+ /* Are there drives in the set? */
+ if (metaget_drivedesc(sp, (MD_BASICNAME_OK | PRINT_FAST),
+ ep) == NULL) {
+ if (! mdisok(ep)) {
+ return (-1);
+ }
+ /* No drives in set -- nothing to join */
+ return (0);
+ }
+
+ /*
+ * Is set currently stale?
+ */
+ (void) memset(&c, 0, sizeof (c));
+ c.c_id = 0;
+ c.c_setno = sp->setno;
+ /* Ignore failure since master node may not be joined yet */
+ (void) metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL);
+ if (c.c_flags & MDDB_C_STALE) {
+ stale_flag = MNSET_IS_STALE;
+ }
+
+ /*
+ * If any nodes are going to be joined to diskset, then
+ * suspend I/O to all disks in diskset so that nodes can join
+ * (read in mddbs) in a reasonable amount of time even under
+ * high I/O load. Don't need to do this if set is STALE since
+ * no I/O can be occurring to a STALE set.
+ */
+ if (stale_flag != MNSET_IS_STALE) {
+ nd = sd->sd_nodelist;
+ while (nd) {
+ /* Found a node that will be joined to diskset */
+ if ((nd->nd_flags & MD_MN_NODE_ALIVE) &&
+ (!(nd->nd_flags & MD_MN_NODE_OWN))) {
+ /* Set flag that diskset should be suspended */
+ susp_res_flag = 1;
+ break;
+ }
+ nd = nd->nd_next;
+ }
+ }
+
+ if (susp_res_flag) {
+ /*
+ * Block all I/Os to disks in this diskset on all joined
+ * nodes in the diskset.
+ * If block of I/Os fails due to an RPC failure on another
+ * node, return 205; otherwise, return -1.
+ */
+ nd = sd->sd_nodelist;
+ while (nd) {
+ /* Skip non-alive and non-owner nodes */
+ if ((!(nd->nd_flags & MD_MN_NODE_ALIVE)) ||
+ (!(nd->nd_flags & MD_MN_NODE_OWN))) {
+ nd = nd->nd_next;
+ continue;
+ }
+ if (clnt_mn_susp_res_io(nd->nd_nodename, sp->setno,
+ MN_SUSP_IO, ep)) {
+ mde_perror(ep, dgettext(TEXT_DOMAIN,
+ "Unable to suspend I/O on node %s"
+ " in set %s"), nd->nd_nodename,
+ sp->setname);
+ /*
+ * Resume other nodes that had been suspended.
+ * (Reconfig return step also resumes I/Os
+ * for all sets.)
+ */
+ nd2 = sd->sd_nodelist;
+ while (nd2) {
+ /* Stop when reaching failed node */
+ if (nd2->nd_nodeid == nd->nd_nodeid)
+ break;
+ /* Skip non-alive/non-owner nodes */
+ if ((!(nd2->nd_flags &
+ MD_MN_NODE_ALIVE)) ||
+ (!(nd2->nd_flags &
+ MD_MN_NODE_OWN))) {
+ nd2 = nd2->nd_next;
+ continue;
+ }
+ (void) (clnt_mn_susp_res_io(
+ nd2->nd_nodename, sp->setno,
+ MN_RES_IO, &xep));
+ nd2 = nd2->nd_next;
+ }
+
+ /*
+ * If the suspend failed due to an
+ * RPC failure on another node, return
+ * a 205.
+ * Otherwise, exit with failure.
+ * The return reconfig step will resume
+ * I/Os for all disksets.
+ */
+ if ((mdanyrpcerror(ep)) &&
+ (sd->sd_mn_mynode->nd_nodeid !=
+ nd->nd_nodeid)) {
+ return (205);
+ } else {
+ return (-1);
+ }
+ }
+ nd = nd->nd_next;
+ }
+ }
+
+ nd = sd->sd_nodelist;
+ while (nd) {
+ /*
+ * If a node is in the membership list but isn't joined
+ * to the set, try to join the node.
+ */
+ if ((nd->nd_flags & MD_MN_NODE_ALIVE) &&
+ (!(nd->nd_flags & MD_MN_NODE_OWN))) {
+ if (clnt_joinset(nd->nd_nodename, sp,
+ (MNSET_IN_RECONFIG | stale_flag), ep)) {
+ /*
+ * If RPC failure to another node
+ * then exit without attempting anything else.
+ * (Reconfig return step will resume I/Os
+ * for all sets.)
+ */
+ if (mdanyrpcerror(ep)) {
+ mde_perror(ep, "");
+ return (205);
+ }
+ /*
+ * STALE and ACCOK failures aren't true
+ * failures. STALE means that <50% mddbs
+ * are available. ACCOK means that the
+ * mediator provided the extra vote.
+ * If a true failure, then print messasge
+ * and withdraw node from set in order to
+ * cleanup from failed join attempt.
+ */
+ if ((!mdismddberror(ep, MDE_DB_STALE)) &&
+ (!mdismddberror(ep, MDE_DB_ACCOK))) {
+ mde_perror(ep,
+ "WARNING: Unable to join node %s "
+ "to set %s", nd->nd_nodename,
+ sp->setname);
+ mdclrerror(ep);
+ if (clnt_withdrawset(nd->nd_nodename,
+ sp, &xep))
+ mdclrerror(&xep);
+ nd = nd->nd_next;
+ continue;
+ }
+ }
+ /* Set owner flag even if STALE or ACCOK */
+ nd->nd_flags |= MD_MN_NODE_OWN;
+ }
+ nd = nd->nd_next;
+ }
+ /*
+ * Resume I/Os if suspended above.
+ */
+ if (susp_res_flag) {
+ nd = sd->sd_nodelist;
+ while (nd) {
+ /*
+ * Skip non-alive and non-owner nodes
+ * (this list doesn't include any of
+ * the nodes that were joined).
+ */
+ if ((!(nd->nd_flags & MD_MN_NODE_ALIVE)) ||
+ (!(nd->nd_flags & MD_MN_NODE_OWN))) {
+ nd = nd->nd_next;
+ continue;
+ }
+ if (clnt_mn_susp_res_io(nd->nd_nodename, sp->setno,
+ MN_RES_IO, ep)) {
+ mde_perror(ep, dgettext(TEXT_DOMAIN,
+ "Unable to resume I/O on node %s"
+ " in set %s"), nd->nd_nodename,
+ sp->setname);
+
+ /*
+ * If an RPC failure then don't do any
+ * more RPC calls, since one timeout is enough
+ * to endure. If RPC failure to another node,
+ * return 205. If RPC failure to my node,
+ * return -1.
+ * (Reconfig return step will resume I/Os
+ * for all sets.)
+ * If not an RPC failure, continue resuming the
+ * rest of the nodes and then return -1.
+ */
+ if (mdanyrpcerror(ep)) {
+ if (sd->sd_mn_mynode->nd_nodeid ==
+ nd->nd_nodeid) {
+ return (-1);
+ } else {
+ return (205);
+ }
+ }
+
+ /*
+ * If not an RPC error, continue resuming rest
+ * of nodes, ignoring any failures except for
+ * an RPC failure which constitutes an
+ * immediate exit.
+ * Start in middle of list with failing node.
+ */
+ nd2 = nd->nd_next;
+ while (nd2) {
+ /* Skip non-owner nodes */
+ if ((!(nd2->nd_flags &
+ MD_MN_NODE_ALIVE)) ||
+ (!(nd2->nd_flags &
+ MD_MN_NODE_OWN))) {
+ nd2 = nd2->nd_next;
+ continue;
+ }
+ (void) (clnt_mn_susp_res_io(
+ nd2->nd_nodename, sp->setno,
+ MN_RES_IO, &xep));
+ if (mdanyrpcerror(&xep)) {
+ return (-1);
+ }
+ nd2 = nd2->nd_next;
+ }
+ }
+ nd = nd->nd_next;
+ }
+ }
+
+ nd = sd->sd_nodelist;
+ while (nd) {
+ if (!(nd->nd_flags & MD_MN_NODE_OWN)) {
+ nd = nd->nd_next;
+ continue;
+ }
+ /*
+ * If 1 node fails - go ahead and update the rest except
+ * in the case of an RPC failure, fail immediately.
+ */
+ if (clnt_upd_nr_flags(nd->nd_nodename, sp,
+ sd->sd_nodelist, MD_NR_SET, MNSET_IN_RECONFIG, ep)) {
+ /* RPC failure to another node */
+ if (mdanyrpcerror(ep)) {
+ return (205);
+ }
+ nd = nd->nd_next;
+ rval = -1;
+ continue;
+ }
+ nd = nd->nd_next;
+ }
+
+ meta_mc_log(MC_LOG5, dgettext(TEXT_DOMAIN,
+ "Join of all nodes completed for set %s: %s"),
+ sp->setname, meta_print_hrtime(gethrtime() - start_time));
+
+ return (rval);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_set_drv.c b/usr/src/lib/lvm/libmeta/common/meta_set_drv.c
new file mode 100644
index 0000000000..5fad53ad7b
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_set_drv.c
@@ -0,0 +1,1948 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * Metadevice diskset interfaces
+ */
+
+#include <meta.h>
+#include <mdmn_changelog.h>
+#include "meta_set_prv.h"
+#include "meta_repartition.h"
+
+static int
+check_setnodes_againstdrivelist(
+ mdsetname_t *sp,
+ mddrivenamelist_t *dnlp,
+ md_error_t *ep
+)
+{
+ md_set_desc *sd;
+ mddrivenamelist_t *p;
+ int i;
+ md_mnnode_desc *nd;
+
+ if ((sd = metaget_setdesc(sp, ep)) == NULL)
+ return (-1);
+
+ if (MD_MNSET_DESC(sd)) {
+ nd = sd->sd_nodelist;
+ while (nd) {
+ if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+ nd = nd->nd_next;
+ continue;
+ }
+ for (p = dnlp; p != NULL; p = p->next)
+ if (checkdrive_onnode(sp, p->drivenamep,
+ nd->nd_nodename, ep))
+ return (-1);
+ nd = nd->nd_next;
+ }
+ } else {
+ for (i = 0; i < MD_MAXSIDES; i++) {
+ /* Skip empty slots */
+ if (sd->sd_nodes[i][0] == '\0')
+ continue;
+
+ for (p = dnlp; p != NULL; p = p->next)
+ if (checkdrive_onnode(sp, p->drivenamep,
+ sd->sd_nodes[i], ep))
+ return (-1);
+ }
+ }
+ return (0);
+}
+
+static int
+drvsuniq(mdsetname_t *sp, mddrivenamelist_t *dnlp, md_error_t *ep)
+{
+ mddrivenamelist_t *dl1, *dl2;
+ mddrivename_t *dn1, *dn2;
+
+ for (dl1 = dnlp; dl1 != NULL; dl1 = dl1->next) {
+ dn1 = dl1->drivenamep;
+
+ for (dl2 = dl1->next; dl2 != NULL; dl2 = dl2->next) {
+ dn2 = dl2->drivenamep;
+ if (strcmp(dn1->cname, dn2->cname) != 0)
+ continue;
+
+ return (mddserror(ep, MDE_DS_DUPDRIVE, sp->setno,
+ NULL, dn1->cname, sp->setname));
+ }
+ }
+ return (0);
+}
+
+static md_drive_desc *
+metaget_drivedesc_fromdrivelist(
+ mdsetname_t *sp,
+ mddrivenamelist_t *dnlp,
+ uint_t flags,
+ md_error_t *ep
+)
+{
+ mddrivenamelist_t *p;
+ md_drive_desc *dd = NULL;
+ md_set_desc *sd;
+
+ if ((sd = metaget_setdesc(sp, ep)) == NULL)
+ return (NULL);
+
+ for (p = dnlp; p != NULL; p = p->next) {
+ (void) metadrivedesc_append(&dd, p->drivenamep, 0, 0,
+ sd->sd_ctime, sd->sd_genid, flags);
+ }
+
+ return (dd);
+}
+
+/*
+ * Exported Entry Points
+ */
+
+int
+meta_make_sidenmlist(
+ mdsetname_t *sp,
+ mddrivename_t *dnp,
+ md_error_t *ep
+)
+{
+ mdsidenames_t *sn, **sn_next;
+ mdname_t *np;
+ int done;
+ side_t sideno = MD_SIDEWILD;
+ uint_t rep_slice;
+
+ if (meta_replicaslice(dnp, &rep_slice, ep) != 0)
+ return (-1);
+
+ dnp->side_names_key = MD_KEYWILD;
+
+ if ((np = metaslicename(dnp, rep_slice, ep)) == NULL)
+ return (-1);
+
+ metaflushsidenames(dnp);
+ sn_next = &dnp->side_names;
+ /*CONSTCOND*/
+ while (1) {
+ sn = Zalloc(sizeof (*sn));
+
+ if ((done = meta_getnextside_devinfo(sp, np->bname,
+ &sideno, &sn->cname, &sn->dname, &sn->mnum, ep)) == -1) {
+ Free(sn);
+ return (-1);
+ }
+
+ if (done == 0) {
+ Free(sn);
+ return (0);
+ }
+
+ sn->sideno = sideno;
+
+ /* Add to the end of the linked list */
+ assert(*sn_next == NULL);
+ *sn_next = sn;
+ sn_next = &sn->next;
+ }
+ /*NOTREACHED*/
+}
+
+int
+meta_set_adddrives(
+ mdsetname_t *sp,
+ mddrivenamelist_t *dnlp,
+ daddr_t dbsize,
+ int force_label,
+ md_error_t *ep
+)
+{
+ md_set_desc *sd;
+ md_drive_desc *dd = NULL, *curdd = NULL, *ddp;
+ int i;
+ mddrivenamelist_t *p;
+ mhd_mhiargs_t mhiargs;
+ int rval = 0;
+ md_timeval32_t now;
+ sigset_t oldsigs;
+ ulong_t genid;
+ ulong_t max_genid = 0;
+ md_setkey_t *cl_sk;
+ int rb_level = 0;
+ md_error_t xep = mdnullerror;
+ md_mnnode_desc *nd;
+ int suspendall_flag = 0;
+ int suspend1_flag = 0;
+ int lock_flag = 0;
+ int flush_set_onerr = 0;
+
+ if ((sd = metaget_setdesc(sp, ep)) == NULL)
+ return (-1);
+
+ /* Make sure we own the set */
+ if (meta_check_ownership(sp, ep) != 0)
+ return (-1);
+
+ /*
+ * The drive and node records are stored in the local mddbs of each
+ * node in the diskset. Each node's rpc.metad daemon reads in the set,
+ * drive and node records from that node's local mddb and caches them
+ * internally. Any process needing diskset information contacts its
+ * local rpc.metad to get this information. Since each node in the
+ * diskset is independently reading the set information from its local
+ * mddb, the set, drive and node records in the local mddbs must stay
+ * in-sync, so that all nodes have a consistent view of the diskset.
+ *
+ * For a multinode diskset, explicitly verify that all nodes in the
+ * diskset are ALIVE (i.e. are in the API membership list). Otherwise,
+ * fail this operation since all nodes must be ALIVE in order to add
+ * the new drive record to their local mddb. If a panic of this node
+ * leaves the local mddbs set, node and drive records out-of-sync, the
+ * reconfig cycle will fix the local mddbs and force them back into
+ * synchronization.
+ */
+ if (MD_MNSET_DESC(sd)) {
+ nd = sd->sd_nodelist;
+ while (nd) {
+ if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+ (void) mddserror(ep, MDE_DS_NOTINMEMBERLIST,
+ sp->setno,
+ nd->nd_nodename, NULL, sp->setname);
+ return (-1);
+ }
+ nd = nd->nd_next;
+ }
+ }
+
+ if (drvsuniq(sp, dnlp, ep) == -1)
+ return (-1);
+
+ /*
+ * Lock the set on current set members.
+ * Set locking done much earlier for MN diskset than for traditional
+ * diskset since lock_set and SUSPEND are used to protect against
+ * other meta* commands running on the other nodes.
+ */
+ if (MD_MNSET_DESC(sd)) {
+ /* Make sure we are blocking all signals */
+ if (procsigs(TRUE, &oldsigs, &xep) < 0)
+ mdclrerror(&xep);
+
+ nd = sd->sd_nodelist;
+ /* All nodes are guaranteed to be ALIVE */
+ while (nd) {
+ if (clnt_lock_set(nd->nd_nodename, sp, ep)) {
+ rval = -1;
+ goto out;
+ }
+ lock_flag = 1;
+ nd = nd->nd_next;
+ }
+ /*
+ * Lock out other meta* commands by suspending
+ * class 1 messages across the diskset.
+ */
+ nd = sd->sd_nodelist;
+ /* All nodes are guaranteed to be ALIVE */
+ while (nd) {
+ if (clnt_mdcommdctl(nd->nd_nodename,
+ COMMDCTL_SUSPEND, sp, MD_MSG_CLASS1,
+ MD_MSCF_NO_FLAGS, ep)) {
+ rval = -1;
+ goto out;
+ }
+ suspend1_flag = 1;
+ nd = nd->nd_next;
+ }
+ }
+
+ if (check_setnodes_againstdrivelist(sp, dnlp, ep)) {
+ rval = -1;
+ goto out;
+ }
+
+ for (p = dnlp; p != NULL; p = p->next) {
+ mdsetname_t *tmp;
+
+ if (meta_is_drive_in_anyset(p->drivenamep, &tmp, FALSE,
+ ep) == -1) {
+ rval = -1;
+ goto out;
+ }
+
+ if (tmp != NULL) {
+ (void) mddserror(ep, MDE_DS_DRIVEINSET, sp->setno,
+ tmp->setname, p->drivenamep->cname, sp->setname);
+ rval = -1;
+ goto out;
+ }
+ }
+
+ /* END CHECK CODE */
+
+ /*
+ * This is a separate loop (from above) so that we validate all the
+ * drives handed to us before we repartition any one drive.
+ */
+ for (p = dnlp; p != NULL; p = p->next) {
+ if (meta_repartition_drive(sp,
+ p->drivenamep,
+ force_label == TRUE ? MD_REPART_FORCE : 0,
+ NULL, /* Don't return the VTOC. */
+ ep) != 0) {
+ rval = -1;
+ goto out;
+ }
+
+ /*
+ * Create the names for the drives we are adding per side.
+ */
+ if (meta_make_sidenmlist(sp, p->drivenamep, ep) == -1) {
+ rval = -1;
+ goto out;
+ }
+ }
+
+ /*
+ * Get the list of drives descriptors that we are adding.
+ */
+ dd = metaget_drivedesc_fromdrivelist(sp, dnlp, MD_DR_ADD, ep);
+
+ if (! mdisok(ep)) {
+ rval = -1;
+ goto out;
+ }
+
+ /*
+ * Slam a dummy master block on all the disks that we are adding
+ * Used by diskset import if the disksets are remotely replicated
+ */
+ for (ddp = dd; ddp != NULL; ddp = ddp->dd_next) {
+ uint_t rep_slice;
+ int fd = -1;
+ mdname_t *np = NULL;
+
+ if (meta_replicaslice(ddp->dd_dnp, &rep_slice, &xep) != 0) {
+ mdclrerror(&xep);
+ continue;
+ }
+
+ if ((np = metaslicename(ddp->dd_dnp, rep_slice, &xep))
+ == NULL) {
+ mdclrerror(&xep);
+ continue;
+ }
+
+ if ((fd = open(np->rname, O_RDWR)) >= 0) {
+ meta_mkdummymaster(sp, fd, 16);
+ (void) close(fd);
+ }
+ }
+
+ /*
+ * Get the set timeout information.
+ */
+ (void) memset(&mhiargs, '\0', sizeof (mhiargs));
+ if (clnt_gtimeout(mynode(), sp, &mhiargs, ep) == -1) {
+ rval = -1;
+ goto out;
+ }
+
+ /*
+ * Get timestamp and generation id for new records
+ */
+ now = sd->sd_ctime;
+ genid = sd->sd_genid;
+
+
+ /* At this point, in case of error, set should be flushed. */
+ flush_set_onerr = 1;
+
+ /* Lock the set on current set members */
+ if (!(MD_MNSET_DESC(sd))) {
+ md_rb_sig_handling_on();
+ for (i = 0; i < MD_MAXSIDES; i++) {
+ /* Skip empty slots */
+ if (sd->sd_nodes[i][0] == '\0')
+ continue;
+
+ if (clnt_lock_set(sd->sd_nodes[i], sp, ep)) {
+ rval = -1;
+ goto out;
+ }
+ lock_flag = 1;
+ }
+ }
+
+ /*
+ * Get drive descriptors for the drives that are currently in the set.
+ */
+ curdd = metaget_drivedesc(sp, MD_FULLNAME_ONLY, ep);
+ if (! mdisok(ep))
+ goto rollback;
+
+ /*
+ * If first drive being added to set, set the mastership
+ * of the multinode diskset to be this node.
+ * Only set it on this node. If all goes well
+ * and there are no errors, the mastership of this node will be set
+ * on all nodes in user space and in the kernel.
+ */
+ if ((MD_MNSET_DESC(sd)) && (curdd == NULL)) {
+ if (clnt_mnsetmaster(mynode(), sp,
+ sd->sd_mn_mynode->nd_nodename,
+ sd->sd_mn_mynode->nd_nodeid, ep)) {
+ goto rollback;
+ }
+ /*
+ * Set this up in my local cache of the set desc so that
+ * the set descriptor won't have to be gotten again from
+ * rpc.metad. If it is flushed and gotten again, these
+ * values will be set in sr2setdesc.
+ */
+ sd->sd_mn_master_nodeid = sd->sd_mn_mynode->nd_nodeid;
+ (void) strcpy(sd->sd_mn_master_nodenm,
+ sd->sd_mn_mynode->nd_nodename);
+ sd->sd_mn_am_i_master = 1;
+ }
+
+ RB_TEST(1, "adddrives", ep)
+
+ RB_PREEMPT;
+ rb_level = 1; /* level 1 */
+
+ RB_TEST(2, "adddrives", ep)
+
+ /*
+ * Add the drive records for the drives that we are adding to
+ * each host in the set. Marks the drive as MD_DR_ADD.
+ */
+ if (MD_MNSET_DESC(sd)) {
+ nd = sd->sd_nodelist;
+ /* All nodes are guaranteed to be ALIVE */
+ while (nd) {
+ if (clnt_adddrvs(nd->nd_nodename, sp, dd, now, genid,
+ ep) == -1)
+ goto rollback;
+
+ RB_TEST(3, "adddrives", ep)
+ nd = nd->nd_next;
+ }
+ } else {
+ for (i = 0; i < MD_MAXSIDES; i++) {
+ /* Skip empty slots */
+ if (sd->sd_nodes[i][0] == '\0')
+ continue;
+
+ if (clnt_adddrvs(sd->sd_nodes[i], sp, dd, now, genid,
+ ep) == -1)
+ goto rollback;
+
+ RB_TEST(3, "adddrives", ep)
+ }
+ }
+
+ RB_TEST(4, "adddrives", ep)
+
+ RB_PREEMPT;
+ rb_level = 2; /* level 2 */
+
+ RB_TEST(5, "adddrives", ep)
+
+ /*
+ * Take ownership of the added drives.
+ */
+ if (!(MD_MNSET_DESC(sd)) && !MD_ATSET_DESC(sd)) {
+ if (tk_own_bydd(sp, dd, &mhiargs, TRUE, ep))
+ goto rollback;
+ }
+
+ RB_TEST(6, "adddrives", ep)
+
+ RB_PREEMPT;
+ rb_level = 3; /* level 3 */
+
+ RB_TEST(7, "adddrives", ep)
+
+ /*
+ * Balance the DB's according to the list of existing drives and the
+ * list of added drives.
+ */
+ if ((rval = meta_db_balance(sp, dd, curdd, dbsize, ep)) == -1)
+ goto rollback;
+
+ if ((curdd == NULL) && (MD_MNSET_DESC(sd))) {
+ /*
+ * Notify rpc.mdcommd on all nodes of a nodelist change.
+ * Start by suspending rpc.mdcommd (which drains it of all
+ * messages), then change the nodelist followed by a reinit
+ * and resume.
+ */
+ nd = sd->sd_nodelist;
+ /* All nodes are guaranteed to be ALIVE */
+ while (nd) {
+ if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_SUSPEND,
+ sp, MD_MSG_CLASS0, MD_MSCF_NO_FLAGS, ep)) {
+ rval = -1;
+ goto out;
+ }
+ suspendall_flag = 1;
+ nd = nd->nd_next;
+ }
+ }
+
+ /*
+ * If a MN diskset and this is the first disk(s) being added
+ * to set, then pre-allocate change log records here.
+ * When the other nodes are joined into the MN diskset, the
+ * USER records will just be snarfed in.
+ */
+ if ((MD_MNSET_DESC(sd)) && (curdd == NULL)) {
+ if (mdmn_allocate_changelog(sp, ep) != 0)
+ goto rollback;
+ }
+
+ /*
+ * Mark the drives MD_DR_OK.
+ * If first drive being added to MN diskset, then set
+ * master on all nodes to be this node and then join
+ * all alive nodes (nodes in membership list) to set.
+ */
+ if (MD_MNSET_DESC(sd)) {
+ nd = sd->sd_nodelist;
+ /* All nodes are guaranteed to be ALIVE */
+ while (nd) {
+ /* don't set master on this node - done earlier */
+ if ((curdd == NULL) && (nd->nd_nodeid !=
+ sd->sd_mn_mynode->nd_nodeid)) {
+ /*
+ * Set master on all alive nodes since
+ * all alive nodes will become joined nodes.
+ */
+ if (clnt_mnsetmaster(nd->nd_nodename, sp,
+ sd->sd_mn_mynode->nd_nodename,
+ sd->sd_mn_mynode->nd_nodeid, ep)) {
+ goto rollback;
+ }
+ }
+
+ if (curdd == NULL) {
+ /*
+ * No special flags for join set. Since
+ * all nodes are joining if 1st drive is being
+ * added to set then all nodes will be either
+ * STALE or non-STALE and each node can
+ * determine this on its own.
+ */
+ if (clnt_joinset(nd->nd_nodename, sp,
+ NULL, ep)) {
+ goto rollback;
+ }
+ /* Sets join node flag on all nodes in list */
+ if (clnt_upd_nr_flags(nd->nd_nodename, sp,
+ sd->sd_nodelist, MD_NR_JOIN, NULL, ep)) {
+ goto rollback;
+ }
+ }
+
+ /*
+ * Set MD_DR_OK as last thing before unlock.
+ * In case of panic on this node, recovery
+ * code can check for MD_DR_OK to determine
+ * status of diskset.
+ */
+ if (clnt_upd_dr_flags(nd->nd_nodename, sp, dd,
+ MD_DR_OK, ep) == -1)
+ goto rollback;
+
+
+ RB_TEST(8, "adddrives", ep)
+ nd = nd->nd_next;
+ }
+ } else {
+ for (i = 0; i < MD_MAXSIDES; i++) {
+ /* Skip empty slots */
+ if (sd->sd_nodes[i][0] == '\0')
+ continue;
+
+ if (clnt_upd_dr_flags(sd->sd_nodes[i], sp, dd, MD_DR_OK,
+ ep) == -1)
+ goto rollback;
+
+ RB_TEST(8, "adddrives", ep)
+ }
+ }
+
+ RB_TEST(9, "adddrives", ep)
+
+out:
+ /*
+ * Notify rpc.mdcommd on all nodes of a nodelist change.
+ * Send reinit command to mdcommd which forces it to get
+ * fresh set description.
+ */
+ if (suspendall_flag) {
+ /* Send reinit */
+ nd = sd->sd_nodelist;
+ /* All nodes are guaranteed to be ALIVE */
+ while (nd) {
+ /* Class is ignored for REINIT */
+ if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_REINIT,
+ sp, NULL, MD_MSCF_NO_FLAGS, &xep)) {
+ if (rval == 0)
+ (void) mdstealerror(ep, &xep);
+ rval = -1;
+ mde_perror(ep, dgettext(TEXT_DOMAIN,
+ "Unable to reinit rpc.mdcommd.\n"));
+ }
+ nd = nd->nd_next;
+ }
+ }
+ /*
+ * Unlock diskset by resuming messages across the diskset.
+ * Just resume all classes so that resume is the same whether
+ * just one class was locked or all classes were locked.
+ */
+ if ((suspend1_flag) || (suspendall_flag)) {
+ nd = sd->sd_nodelist;
+ /* All nodes are guaranteed to be ALIVE */
+ while (nd) {
+ if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_RESUME,
+ sp, MD_MSG_CLASS0, MD_MSCF_NO_FLAGS, &xep)) {
+ if (rval == 0)
+ (void) mdstealerror(ep, &xep);
+ rval = -1;
+ mde_perror(ep, dgettext(TEXT_DOMAIN,
+ "Unable to resume rpc.mdcommd.\n"));
+ }
+ nd = nd->nd_next;
+ }
+ meta_ping_mnset(sp->setno);
+ }
+
+ if (lock_flag) {
+ cl_sk = cl_get_setkey(sp->setno, sp->setname);
+ if (MD_MNSET_DESC(sd)) {
+ nd = sd->sd_nodelist;
+ /* All nodes are guaranteed to be ALIVE */
+ while (nd) {
+ if (clnt_unlock_set(nd->nd_nodename,
+ cl_sk, &xep)) {
+ if (rval == 0)
+ (void) mdstealerror(ep, &xep);
+ rval = -1;
+ }
+ nd = nd->nd_next;
+ }
+ } else {
+ for (i = 0; i < MD_MAXSIDES; i++) {
+ /* Skip empty slots */
+ if (sd->sd_nodes[i][0] == '\0')
+ continue;
+
+ if (clnt_unlock_set(sd->sd_nodes[i],
+ cl_sk, &xep)) {
+ if (rval == 0)
+ (void) mdstealerror(ep, &xep);
+ rval = -1;
+ }
+ }
+ }
+ cl_set_setkey(NULL);
+ }
+
+ metafreedrivedesc(&dd);
+
+ if (flush_set_onerr) {
+ metaflushsetname(sp);
+ if (!(MD_MNSET_DESC(sd))) {
+ md_rb_sig_handling_off(md_got_sig(), md_which_sig());
+ }
+ }
+
+ if (MD_MNSET_DESC(sd)) {
+ /* release signals back to what they were on entry */
+ if (procsigs(FALSE, &oldsigs, &xep) < 0)
+ mdclrerror(&xep);
+ }
+
+ return (rval);
+
+rollback:
+ /* all signals already blocked for MN disket */
+ if (!(MD_MNSET_DESC(sd))) {
+ /* Make sure we are blocking all signals */
+ if (procsigs(TRUE, &oldsigs, &xep) < 0)
+ mdclrerror(&xep);
+ }
+
+ rval = -1;
+
+ max_genid = sd->sd_genid;
+
+ /* level 3 */
+ if (rb_level > 2) {
+ /*
+ * Since the add drive operation is failing, need
+ * to reset config back to the way it was
+ * before the add drive opration.
+ * If a MN diskset and this is the first drive being added,
+ * then reset master on all ALIVE nodes (which is all nodes)
+ * since the master would have not been set previously.
+ * Don't reset master on this node, since this
+ * is done later.
+ * This is ok to fail since next node to add first
+ * disk to diskset will also set the master on all nodes.
+ *
+ * Also, if this is the first drive being added,
+ * need to have each node withdraw itself from the set.
+ */
+ if ((MD_MNSET_DESC(sd)) && (curdd == NULL)) {
+ nd = sd->sd_nodelist;
+ /* All nodes are guaranteed to be ALIVE */
+ while (nd) {
+ /*
+ * Be careful with ordering in case of
+ * panic between the steps and the
+ * effect on recovery during reconfig.
+ */
+ if (clnt_withdrawset(nd->nd_nodename, sp, &xep))
+ mdclrerror(&xep);
+
+ /* Sets withdraw flag on all nodes in list */
+ if (clnt_upd_nr_flags(nd->nd_nodename, sp,
+ sd->sd_nodelist, MD_NR_WITHDRAW,
+ NULL, &xep)) {
+ mdclrerror(&xep);
+ }
+
+ /* Skip this node */
+ if (nd->nd_nodeid ==
+ sd->sd_mn_mynode->nd_nodeid) {
+ nd = nd->nd_next;
+ continue;
+ }
+ /* Reset master on all of the other nodes. */
+ if (clnt_mnsetmaster(nd->nd_nodename, sp,
+ "", MD_MN_INVALID_NID, &xep))
+ mdclrerror(&xep);
+ nd = nd->nd_next;
+ }
+ }
+ }
+
+ /*
+ * Send resume command to mdcommd. Don't send reinit command
+ * since nodelist should not have changed.
+ * If suspendall_flag is set, then user would have been adding
+ * first drives to set. Since this failed, there is certainly
+ * no reinit message to send to rpc.commd since no nodes will
+ * be joined to set at the end of this metaset command.
+ */
+ if (suspendall_flag) {
+ /* Send resume */
+ nd = sd->sd_nodelist;
+ /* All nodes are guaranteed to be ALIVE */
+ while (nd) {
+ /*
+ * Resume all classes but class 1 so that lock is held
+ * against meta* commands.
+ * To later resume class1, must issue a class0 resume.
+ */
+ if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_RESUME,
+ sp, MD_MSG_CLASS0,
+ MD_MSCF_DONT_RESUME_CLASS1, &xep)) {
+ mde_perror(&xep, dgettext(TEXT_DOMAIN,
+ "Unable to resume rpc.mdcommd.\n"));
+ mdclrerror(&xep);
+ }
+ nd = nd->nd_next;
+ }
+ meta_ping_mnset(sp->setno);
+ }
+
+ /* level 3 */
+ if (rb_level > 2) {
+ mdnamelist_t *nlp;
+ mdname_t *np;
+
+ for (ddp = dd; ddp != NULL; ddp = ddp->dd_next) {
+ uint_t rep_slice;
+
+ if ((meta_replicaslice(ddp->dd_dnp,
+ &rep_slice, &xep) != 0) ||
+ ((np = metaslicename(ddp->dd_dnp, rep_slice,
+ &xep)) == NULL)) {
+ mdclrerror(&xep);
+ continue;
+ }
+ nlp = NULL;
+ (void) metanamelist_append(&nlp, np);
+
+ if (meta_db_detach(sp, nlp,
+ (MDFORCE_DS | MDFORCE_SET_LOCKED), NULL, &xep))
+ mdclrerror(&xep);
+
+ metafreenamelist(nlp);
+ }
+
+ /* Re-balance */
+ if (meta_db_balance(sp, NULL, curdd, 0, &xep) == -1)
+ mdclrerror(&xep);
+
+ /* Only if we are adding the first drive */
+ /* Handled MN diskset above. */
+ if ((curdd == NULL) && !(MD_MNSET_DESC(sd))) {
+ if (clnt_stimeout(mynode(), sp, &defmhiargs,
+ &xep) == -1)
+ mdclrerror(&xep);
+
+ /* This is needed because of a corner case */
+ if (halt_set(sp, &xep))
+ mdclrerror(&xep);
+ }
+ max_genid++;
+ }
+
+ /* level 2 */
+ if (rb_level > 1) {
+ if (!(MD_MNSET_DESC(sd)) && !MD_ATSET_DESC(sd)) {
+ if (rel_own_bydd(sp, dd, TRUE, &xep))
+ mdclrerror(&xep);
+ }
+ }
+
+ /* level 1 */
+ if (rb_level > 0) {
+ if (MD_MNSET_DESC(sd)) {
+ nd = sd->sd_nodelist;
+ /* All nodes are guaranteed to be ALIVE */
+ while (nd) {
+ if (clnt_deldrvs(nd->nd_nodename, sp, dd,
+ &xep) == -1)
+ mdclrerror(&xep);
+ nd = nd->nd_next;
+ }
+ } else {
+ for (i = 0; i < MD_MAXSIDES; i++) {
+ /* Skip empty slots */
+ if (sd->sd_nodes[i][0] == '\0')
+ continue;
+
+ if (clnt_deldrvs(sd->sd_nodes[i], sp, dd,
+ &xep) == -1)
+ mdclrerror(&xep);
+ }
+ }
+ max_genid += 2;
+ resync_genid(sp, sd, max_genid, 0, NULL);
+ }
+
+ if ((suspend1_flag) || (suspendall_flag)) {
+ /* Send resume */
+ nd = sd->sd_nodelist;
+ /* All nodes are guaranteed to be ALIVE */
+ while (nd) {
+ /*
+ * Just resume all classes so that resume is the
+ * same whether just one class was locked or all
+ * classes were locked.
+ */
+ if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_RESUME,
+ sp, MD_MSG_CLASS0, MD_MSCF_NO_FLAGS, &xep)) {
+ mdclrerror(&xep);
+ }
+ nd = nd->nd_next;
+ }
+ meta_ping_mnset(sp->setno);
+ }
+
+ /* level 0 */
+ cl_sk = cl_get_setkey(sp->setno, sp->setname);
+ /* Don't test lock flag since guaranteed to be set if in rollback */
+ if (MD_MNSET_DESC(sd)) {
+ /*
+ * Since the add drive operation is failing, need
+ * to reset config back to the way it was
+ * before the add drive opration.
+ * If a MN diskset and this is the first drive being
+ * added, then reset master on this node since
+ * the master would have not been set previously.
+ * This is ok to fail since next node to add first
+ * disk to diskset will also set the master on all nodes.
+ */
+ if (curdd == NULL) {
+ /* Reset master on mynode */
+ if (clnt_mnsetmaster(mynode(), sp, "",
+ MD_MN_INVALID_NID, &xep))
+ mdclrerror(&xep);
+ }
+ nd = sd->sd_nodelist;
+ /* All nodes are guaranteed to be ALIVE */
+ while (nd) {
+ if (clnt_unlock_set(nd->nd_nodename, cl_sk, &xep))
+ mdclrerror(&xep);
+ nd = nd->nd_next;
+ }
+ } else {
+ for (i = 0; i < MD_MAXSIDES; i++) {
+ /* Skip empty slots */
+ if (sd->sd_nodes[i][0] == '\0')
+ continue;
+
+ if (clnt_unlock_set(sd->sd_nodes[i], cl_sk, &xep))
+ mdclrerror(&xep);
+ }
+ }
+ cl_set_setkey(NULL);
+
+ /* release signals back to what they were on entry */
+ if (procsigs(FALSE, &oldsigs, &xep) < 0)
+ mdclrerror(&xep);
+
+ metafreedrivedesc(&dd);
+
+ if (flush_set_onerr) {
+ metaflushsetname(sp);
+ if (!(MD_MNSET_DESC(sd))) {
+ md_rb_sig_handling_off(md_got_sig(), md_which_sig());
+ }
+ }
+
+ return (rval);
+}
+
+int
+meta_set_deletedrives(
+ mdsetname_t *sp,
+ mddrivenamelist_t *dnlp,
+ int forceflg,
+ md_error_t *ep
+)
+{
+ md_set_desc *sd;
+ md_drive_desc *ddp, *dd = NULL, *curdd = NULL;
+ md_replicalist_t *rlp = NULL, *rl;
+ mddrivenamelist_t *p;
+ int deldrvcnt = 0;
+ int rval = 0;
+ mhd_mhiargs_t mhiargs;
+ int i;
+ sigset_t oldsigs;
+ md_setkey_t *cl_sk;
+ ulong_t max_genid = 0;
+ int rb_level = 0;
+ md_error_t xep = mdnullerror;
+ md_mnnode_desc *nd;
+ int has_set;
+ int current_drv_cnt = 0;
+ int suspendall_flag = 0, suspendall_flag_rb = 0;
+ int suspend1_flag = 0;
+ int lock_flag = 0;
+ bool_t stale_bool = FALSE;
+ int flush_set_onerr = 0;
+ mdnamelist_t *nlp;
+ mdname_t *np;
+
+ if ((sd = metaget_setdesc(sp, ep)) == NULL)
+ return (-1);
+
+ /* Make sure we own the set */
+ if (meta_check_ownership(sp, ep) != 0)
+ return (-1);
+
+ if (drvsuniq(sp, dnlp, ep) == -1)
+ return (-1);
+
+ /*
+ * Check and see if all the nodes have the set.
+ *
+ * The drive and node records are stored in the local mddbs of each
+ * node in the diskset. Each node's rpc.metad daemon reads in the set,
+ * drive and node records from that node's local mddb and caches them
+ * internally. Any process needing diskset information contacts its
+ * local rpc.metad to get this information. Since each node in the
+ * diskset is independently reading the set information from its local
+ * mddb, the set, drive and node records in the local mddbs must stay
+ * in-sync, so that all nodes have a consistent view of the diskset.
+ *
+ * For a multinode diskset, explicitly verify that all nodes in the
+ * diskset are ALIVE (i.e. are in the API membership list). Otherwise,
+ * fail this operation since all nodes must be ALIVE in order to delete
+ * a drive record from their local mddb. If a panic of this node
+ * leaves the local mddbs set, node and drive records out-of-sync, the
+ * reconfig cycle will fix the local mddbs and force them back into
+ * synchronization.
+ */
+ if (MD_MNSET_DESC(sd)) {
+ nd = sd->sd_nodelist;
+ while (nd) {
+ if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+ (void) mddserror(ep, MDE_DS_NOTINMEMBERLIST,
+ sp->setno,
+ nd->nd_nodename, NULL, sp->setname);
+ return (-1);
+ }
+ nd = nd->nd_next;
+ }
+
+ /* Make sure we are blocking all signals */
+ if (procsigs(TRUE, &oldsigs, &xep) < 0)
+ mdclrerror(&xep);
+
+ /*
+ * Lock the set on current set members.
+ * Set locking done much earlier for MN diskset than for
+ * traditional diskset since lock_set and SUSPEND are used
+ * to protect against other meta* commands running on the
+ * other nodes.
+ */
+ nd = sd->sd_nodelist;
+ /* All nodes are guaranteed to be ALIVE */
+ while (nd) {
+ if (clnt_lock_set(nd->nd_nodename, sp, ep)) {
+ rval = -1;
+ goto out;
+ }
+ lock_flag = 1;
+ nd = nd->nd_next;
+ }
+ /*
+ * Lock out other meta* commands by suspending
+ * class 1 messages across the diskset.
+ */
+ nd = sd->sd_nodelist;
+ /* All nodes are guaranteed to be ALIVE */
+ while (nd) {
+ if (clnt_mdcommdctl(nd->nd_nodename,
+ COMMDCTL_SUSPEND, sp, MD_MSG_CLASS1,
+ MD_MSCF_NO_FLAGS, ep)) {
+ rval = -1;
+ goto out;
+ }
+ suspend1_flag = 1;
+ nd = nd->nd_next;
+ }
+
+ nd = sd->sd_nodelist;
+ /* All nodes are guaranteed to be ALIVE */
+ while (nd) {
+ if (strcmp(nd->nd_nodename, mynode()) == 0) {
+ nd = nd->nd_next;
+ continue;
+ }
+
+ has_set = nodehasset(sp, nd->nd_nodename,
+ NHS_NSTG_EQ, ep);
+ if (has_set < 0) {
+ rval = -1;
+ goto out;
+ }
+
+ if (! has_set) {
+ (void) mddserror(ep, MDE_DS_NODENOSET,
+ sp->setno, nd->nd_nodename,
+ NULL, sp->setname);
+ rval = -1;
+ goto out;
+ }
+ nd = nd->nd_next;
+ }
+ } else {
+ for (i = 0; i < MD_MAXSIDES; i++) {
+ /* Skip empty slots */
+ if (sd->sd_nodes[i][0] == '\0')
+ continue;
+
+ if (strcmp(sd->sd_nodes[i], mynode()) == 0)
+ continue;
+
+ has_set = nodehasset(sp, sd->sd_nodes[i], NHS_NSTG_EQ,
+ ep);
+ if (has_set < 0) {
+ /*
+ * Can directly return since !MN diskset;
+ * nothing to unlock.
+ */
+ return (-1);
+ }
+
+ if (! has_set) {
+ /*
+ * Can directly return since !MN diskset;
+ * nothing to unlock.
+ */
+ return (mddserror(ep, MDE_DS_NODENOSET,
+ sp->setno, sd->sd_nodes[i], NULL,
+ sp->setname));
+ }
+ }
+ }
+
+ for (p = dnlp; p != NULL; p = p->next) {
+ int is_it;
+ mddrivename_t *dnp;
+
+ dnp = p->drivenamep;
+
+ if ((is_it = meta_is_drive_in_thisset(sp, dnp, FALSE, ep))
+ == -1) {
+ rval = -1;
+ goto out;
+ }
+
+ if (! is_it) {
+ (void) mddserror(ep, MDE_DS_DRIVENOTINSET, sp->setno,
+ NULL, dnp->cname, sp->setname);
+ rval = -1;
+ goto out;
+ }
+
+ if ((meta_check_drive_inuse(sp, dnp, FALSE, ep)) == -1) {
+ rval = -1;
+ goto out;
+ }
+
+ deldrvcnt++;
+ }
+ current_drv_cnt = deldrvcnt;
+
+ /*
+ * Get drive descriptors for the drives that are currently in the set.
+ */
+ curdd = metaget_drivedesc(sp, MD_BASICNAME_OK, ep);
+ if (! mdisok(ep)) {
+ rval = -1;
+ goto out;
+ }
+
+ /*
+ * Decrement the the delete drive count for each drive currently in the
+ * set.
+ */
+ for (ddp = curdd; ddp != NULL; ddp = ddp->dd_next)
+ deldrvcnt--;
+
+ /*
+ * If the count of drives we are deleting is equal to the drives in the
+ * set, and we haven't specified forceflg, return an error
+ */
+ if (deldrvcnt == 0 && forceflg == FALSE) {
+ (void) mderror(ep, MDE_FORCE_DEL_ALL_DRV, NULL);
+ rval = -1;
+ goto out;
+ }
+
+ /*
+ * Get the list of drive descriptors that we are deleting.
+ */
+ dd = metaget_drivedesc_fromdrivelist(sp, dnlp, MD_DR_DEL, ep);
+ if (! mdisok(ep)) {
+ rval = -1;
+ goto out;
+ }
+
+ /*
+ * Get the set timeout information in case we have to roll back.
+ */
+ (void) memset(&mhiargs, '\0', sizeof (mhiargs));
+ if (clnt_gtimeout(mynode(), sp, &mhiargs, ep) == -1) {
+ rval = -1;
+ goto out;
+ }
+
+ /* At this point, in case of error, set should be flushed. */
+ flush_set_onerr = 1;
+
+ /* END CHECK CODE */
+
+ /* Lock the set on current set members */
+ if (!(MD_MNSET_DESC(sd))) {
+ md_rb_sig_handling_on();
+ for (i = 0; i < MD_MAXSIDES; i++) {
+ /* Skip empty slots */
+ if (sd->sd_nodes[i][0] == '\0')
+ continue;
+
+ if (clnt_lock_set(sd->sd_nodes[i], sp, ep)) {
+ rval = -1;
+ goto out;
+ }
+ lock_flag = 1;
+ }
+ }
+
+ if ((deldrvcnt == 0) && (MD_MNSET_DESC(sd))) {
+ mddb_config_t c;
+ /*
+ * Is current set STALE?
+ */
+ (void) memset(&c, 0, sizeof (c));
+ c.c_id = 0;
+ c.c_setno = sp->setno;
+ if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0) {
+ (void) mdstealerror(ep, &c.c_mde);
+ rval = -1;
+ goto out;
+ }
+ if (c.c_flags & MDDB_C_STALE) {
+ stale_bool = TRUE;
+ }
+ }
+
+ RB_TEST(1, "deletedrives", ep)
+
+ RB_PREEMPT;
+ rb_level = 1; /* level 1 */
+
+ RB_TEST(2, "deletedrives", ep)
+
+ /*
+ * Mark the drives MD_DR_DEL
+ */
+ if (MD_MNSET_DESC(sd)) {
+ nd = sd->sd_nodelist;
+ /* All nodes are guaranteed to be ALIVE */
+ while (nd) {
+ if (clnt_upd_dr_flags(nd->nd_nodename, sp, dd,
+ MD_DR_DEL, ep) == -1)
+ goto rollback;
+
+ RB_TEST(3, "deletedrives", ep)
+ nd = nd->nd_next;
+ }
+ } else {
+ for (i = 0; i < MD_MAXSIDES; i++) {
+ /* Skip empty slots */
+ if (sd->sd_nodes[i][0] == '\0')
+ continue;
+
+ if (clnt_upd_dr_flags(sd->sd_nodes[i], sp, dd,
+ MD_DR_DEL, ep) == -1)
+ goto rollback;
+
+ RB_TEST(3, "deletedrives", ep)
+ }
+ }
+
+ RB_TEST(4, "deletedrives", ep)
+
+ RB_PREEMPT;
+ rb_level = 2; /* level 2 */
+
+ RB_TEST(5, "deletedrives", ep)
+
+ /*
+ * Balance the DB's according to the list of existing drives and the
+ * list of deleted drives.
+ */
+ if (meta_db_balance(sp, dd, curdd, 0, ep) == -1)
+ goto rollback;
+
+ /*
+ * If the drive(s) to be deleted cannot be accessed,
+ * they haven't really been deleted yet. Check and delete now
+ * if need be.
+ */
+ if (metareplicalist(sp, MD_BASICNAME_OK, &rlp, ep) >= 0) {
+ nlp = NULL;
+ for (ddp = dd; ddp != NULL; ddp = ddp->dd_next) {
+ char *delete_name;
+
+ delete_name = ddp->dd_dnp->cname;
+
+ for (rl = rlp; rl != NULL; rl = rl->rl_next) {
+ char *cur_name;
+
+ cur_name =
+ rl->rl_repp->r_namep->drivenamep->cname;
+
+ if (strcmp(delete_name, cur_name) == 0) {
+ /* put it on the delete list */
+ np = rl->rl_repp->r_namep;
+ (void) metanamelist_append(&nlp, np);
+
+ }
+ }
+ }
+
+ if (nlp != NULL) {
+ if (meta_db_detach(sp, nlp,
+ (MDFORCE_DS | MDFORCE_SET_LOCKED), NULL,
+ ep) == -1) {
+ metafreenamelist(nlp);
+ goto rollback;
+ }
+ metafreenamelist(nlp);
+ }
+ }
+
+ RB_TEST(6, "deletedrives", ep)
+
+ RB_PREEMPT;
+ rb_level = 3; /* level 3 */
+
+ RB_TEST(7, "deletedrives", ep)
+
+ /*
+ * Cannot suspend set until after meta_db_balance since
+ * meta_db_balance uses META_DB_ATTACH/DETACH messages.
+ */
+ if ((deldrvcnt == 0) && (MD_MNSET_DESC(sd))) {
+ /*
+ * Notify rpc.mdcommd on all nodes of a nodelist change.
+ * Start by suspending rpc.mdcommd (which drains it of all
+ * messages), then change the nodelist followed by a reinit
+ * and resume.
+ */
+ nd = sd->sd_nodelist;
+ /* All nodes are guaranteed to be ALIVE */
+ while (nd) {
+ if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_SUSPEND,
+ sp, MD_MSG_CLASS0, MD_MSCF_NO_FLAGS, ep)) {
+ rval = -1;
+ goto out;
+ }
+ suspendall_flag = 1;
+ nd = nd->nd_next;
+ }
+ }
+
+ /*
+ * Remove the drive records for the drives that were deleted from
+ * each host in the set. This removes the record and dr_flags.
+ */
+ if (MD_MNSET_DESC(sd)) {
+ nd = sd->sd_nodelist;
+ /* All nodes are guaranteed to be ALIVE */
+ while (nd) {
+ if (clnt_deldrvs(nd->nd_nodename, sp, dd, ep) == -1)
+ goto rollback;
+
+ RB_TEST(8, "deletedrives", ep)
+ nd = nd->nd_next;
+ }
+ } else {
+ for (i = 0; i < MD_MAXSIDES; i++) {
+ /* Skip empty slots */
+ if (sd->sd_nodes[i][0] == '\0')
+ continue;
+
+ if (clnt_deldrvs(sd->sd_nodes[i], sp, dd, ep) == -1)
+ goto rollback;
+
+ RB_TEST(8, "deletedrives", ep)
+ }
+ }
+
+ RB_TEST(9, "deletedrives", ep)
+
+ RB_PREEMPT;
+ rb_level = 4; /* level 4 */
+
+ RB_TEST(10, "deletedrives", ep)
+
+ if (!(MD_MNSET_DESC(sd)) && !MD_ATSET_DESC(sd)) {
+ if (rel_own_bydd(sp, dd, TRUE, ep))
+ goto rollback;
+ }
+
+ /* If we deleted all the drives, then we need to halt the set. */
+ if (deldrvcnt == 0) {
+ RB_TEST(11, "deletedrives", ep)
+
+ RB_PREEMPT;
+ rb_level = 5; /* level 5 */
+
+ RB_TEST(12, "deletedrives", ep)
+
+ if (clnt_stimeout(mynode(), sp, &defmhiargs, ep) == -1)
+ goto rollback;
+
+ RB_TEST(13, "deletedrives", ep)
+
+ RB_PREEMPT;
+ rb_level = 6; /* level 6 */
+
+ RB_TEST(14, "deletedrives", ep)
+
+ /* Halt MN diskset on all nodes by having node withdraw */
+ if (MD_MNSET_DESC(sd)) {
+ nd = sd->sd_nodelist;
+ /* All nodes are guaranteed to be ALIVE */
+ while (nd) {
+ /* Only withdraw nodes that are joined */
+ if (!(nd->nd_flags & MD_MN_NODE_OWN)) {
+ nd = nd->nd_next;
+ continue;
+ }
+ /*
+ * Going to set locally cached node flags to
+ * rollback join so in case of error, the
+ * rollback code knows which nodes to re-join.
+ */
+ nd->nd_flags |= MD_MN_NODE_RB_JOIN;
+
+ /*
+ * Be careful in ordering of following steps
+ * so that recovery from a panic between
+ * the steps is viable.
+ * Only reset master info in rpc.metad -
+ * don't reset local cached information
+ * which will be used to set master information
+ * back in case of failure (rollback).
+ */
+ if (clnt_withdrawset(nd->nd_nodename, sp, ep))
+ goto rollback;
+ /* Sets withdraw flag on all nodes in list */
+ if (clnt_upd_nr_flags(nd->nd_nodename, sp,
+ sd->sd_nodelist, MD_NR_WITHDRAW,
+ NULL, ep)) {
+ goto rollback;
+ }
+ if (clnt_mnsetmaster(nd->nd_nodename, sp,
+ "", MD_MN_INVALID_NID, ep)) {
+ goto rollback;
+ }
+ nd = nd->nd_next;
+ }
+ } else {
+ if (halt_set(sp, ep))
+ goto rollback;
+ }
+
+ RB_TEST(15, "deletedrives", ep)
+ }
+
+ RB_TEST(16, "deletedrives", ep)
+
+out:
+ /*
+ * Notify rpc.mdcommd on all nodes of a nodelist change.
+ * Send reinit command to mdcommd which forces it to get
+ * fresh set description.
+ */
+ if (suspendall_flag) {
+ /* Send reinit */
+ nd = sd->sd_nodelist;
+ /* All nodes are guaranteed to be ALIVE */
+ while (nd) {
+ /* Class is ignored for REINIT */
+ if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_REINIT,
+ sp, NULL, MD_MSCF_NO_FLAGS, &xep)) {
+ if (rval == 0)
+ (void) mdstealerror(ep, &xep);
+ rval = -1;
+ mde_perror(ep, dgettext(TEXT_DOMAIN,
+ "Unable to reinit rpc.mdcommd.\n"));
+ }
+ nd = nd->nd_next;
+ }
+ }
+
+ /*
+ * Just resume all classes so that resume is the same whether
+ * just one class was locked or all classes were locked.
+ */
+ if ((suspend1_flag) || (suspendall_flag)) {
+ /* Send resume */
+ nd = sd->sd_nodelist;
+ /* All nodes are guaranteed to be ALIVE */
+ while (nd) {
+ if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_RESUME,
+ sp, MD_MSG_CLASS0, MD_MSCF_NO_FLAGS, &xep)) {
+ if (rval == 0)
+ (void) mdstealerror(ep, &xep);
+ rval = -1;
+ mde_perror(ep, dgettext(TEXT_DOMAIN,
+ "Unable to resume rpc.mdcommd.\n"));
+ }
+ nd = nd->nd_next;
+ }
+ meta_ping_mnset(sp->setno);
+ }
+ if (lock_flag) {
+ cl_sk = cl_get_setkey(sp->setno, sp->setname);
+ if (MD_MNSET_DESC(sd)) {
+ nd = sd->sd_nodelist;
+ /* All nodes are guaranteed to be ALIVE */
+ while (nd) {
+ if (clnt_unlock_set(nd->nd_nodename,
+ cl_sk, &xep)) {
+ if (rval == 0)
+ (void) mdstealerror(ep, &xep);
+ rval = -1;
+ }
+ nd = nd->nd_next;
+ }
+ } else {
+ for (i = 0; i < MD_MAXSIDES; i++) {
+ /* Skip empty slots */
+ if (sd->sd_nodes[i][0] == '\0')
+ continue;
+
+ if (clnt_unlock_set(sd->sd_nodes[i],
+ cl_sk, &xep)) {
+ if (rval == 0)
+ (void) mdstealerror(ep, &xep);
+ rval = -1;
+ }
+ }
+ }
+ cl_set_setkey(NULL);
+ }
+
+ metafreedrivedesc(&dd);
+
+ if (flush_set_onerr) {
+ metaflushsetname(sp);
+ if (!(MD_MNSET_DESC(sd))) {
+ md_rb_sig_handling_off(md_got_sig(), md_which_sig());
+ }
+ }
+
+ if (MD_MNSET_DESC(sd)) {
+ /* release signals back to what they were on entry */
+ if (procsigs(FALSE, &oldsigs, &xep) < 0)
+ mdclrerror(&xep);
+ }
+
+ return (rval);
+
+rollback:
+ /* all signals already blocked for MN disket */
+ if (!(MD_MNSET_DESC(sd))) {
+ /* Make sure we are blocking all signals */
+ if (procsigs(TRUE, &oldsigs, &xep) < 0)
+ mdclrerror(&xep);
+ }
+
+ rval = -1;
+
+ max_genid = sd->sd_genid;
+
+ /* Set the master on all nodes first thing */
+ if (rb_level > 5) {
+ if (MD_MNSET_DESC(sd)) {
+ nd = sd->sd_nodelist;
+ /* All nodes are guaranteed to be ALIVE */
+ while (nd) {
+ if (!(nd->nd_flags & MD_MN_NODE_RB_JOIN)) {
+ continue;
+ }
+ /*
+ * Set master on all re-joining nodes to be
+ * my cached view of master.
+ */
+ if (clnt_mnsetmaster(nd->nd_nodename, sp,
+ sd->sd_mn_master_nodenm,
+ sd->sd_mn_master_nodeid, &xep)) {
+ mdclrerror(&xep);
+ }
+ }
+ }
+ }
+
+ /* level 3 */
+ if (rb_level > 2) {
+ md_set_record *sr;
+ md_mnset_record *mnsr;
+ md_drive_record *dr;
+ int sr_drive_cnt;
+
+ /*
+ * See if we have to re-add the drives specified.
+ */
+ if (MD_MNSET_DESC(sd)) {
+ nd = sd->sd_nodelist;
+ /* All nodes are guaranteed to be ALIVE */
+ while (nd) {
+ /*
+ * Must get current set record from each
+ * node to see what else must be done
+ * to recover.
+ * Record should be for a multi-node diskset.
+ */
+ if (clnt_mngetset(nd->nd_nodename, sp->setname,
+ MD_SET_BAD, &mnsr, &xep) == -1) {
+ mdclrerror(&xep);
+ nd = nd->nd_next;
+ continue;
+ }
+
+ /*
+ * If all drives are already there, skip
+ * to next node.
+ */
+ sr_drive_cnt = 0;
+ dr = mnsr->sr_drivechain;
+ while (dr) {
+ sr_drive_cnt++;
+ dr = dr->dr_next;
+ }
+ if (sr_drive_cnt == current_drv_cnt) {
+ free_sr((md_set_record *)mnsr);
+ nd = nd->nd_next;
+ continue;
+ }
+
+ /* Readd all drives */
+ if (clnt_adddrvs(nd->nd_nodename, sp, dd,
+ mnsr->sr_ctime, mnsr->sr_genid, &xep) == -1)
+ mdclrerror(&xep);
+
+ free_sr((struct md_set_record *)mnsr);
+ nd = nd->nd_next;
+ }
+ } else {
+ for (i = 0; i < MD_MAXSIDES; i++) {
+ /* Skip empty slots */
+ if (sd->sd_nodes[i][0] == '\0')
+ continue;
+
+ /* Record should be for a non-multi-node set */
+ if (clnt_getset(sd->sd_nodes[i], sp->setname,
+ MD_SET_BAD, &sr, &xep) == -1) {
+ mdclrerror(&xep);
+ continue;
+ }
+
+ /*
+ * Set record structure was allocated from RPC
+ * routine getset so this structure is only of
+ * size md_set_record even if the MN flag is
+ * set. So, clear the flag so that the free
+ * code doesn't attempt to free a structure
+ * the size of md_mnset_record.
+ */
+ if (MD_MNSET_REC(sr)) {
+ sr->sr_flags &= ~MD_SR_MN;
+ free_sr(sr);
+ continue;
+ }
+
+ /* Drive already added, skip to next node */
+ if (sr->sr_drivechain != NULL) {
+ free_sr(sr);
+ continue;
+ }
+
+ if (clnt_adddrvs(sd->sd_nodes[i], sp, dd,
+ sr->sr_ctime, sr->sr_genid, &xep) == -1)
+ mdclrerror(&xep);
+
+ free_sr(sr);
+ }
+ }
+ max_genid += 2;
+ }
+
+ /*
+ * Notify rpc.mdcommd on all nodes of a nodelist change.
+ * At this point in time, don't know which nodes are joined
+ * to the set. So, send a reinit command to mdcommd
+ * which forces it to get fresh set description. Then send resume.
+ *
+ * Later, this code will use rpc.mdcommd messages to reattach disks
+ * and then rpc.mdcommd may be suspended again, rest of the nodes
+ * joined, rpc.mdcommd reinited and then resumed.
+ */
+ if (suspendall_flag) {
+ /* Send reinit */
+ nd = sd->sd_nodelist;
+ /* All nodes are guaranteed to be ALIVE */
+ while (nd) {
+ /* Class is ignored for REINIT */
+ if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_REINIT,
+ sp, NULL, MD_MSCF_NO_FLAGS, &xep)) {
+ mde_perror(&xep, dgettext(TEXT_DOMAIN,
+ "Unable to reinit rpc.mdcommd.\n"));
+ mdclrerror(&xep);
+ }
+ nd = nd->nd_next;
+ }
+
+ /* Send resume */
+ nd = sd->sd_nodelist;
+ /* All nodes are guaranteed to be ALIVE */
+ while (nd) {
+ /*
+ * Resume all classes but class 1 so that lock is held
+ * against meta* commands.
+ * To later resume class1, must issue a class0 resume.
+ */
+ if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_RESUME,
+ sp, MD_MSG_CLASS0,
+ MD_MSCF_DONT_RESUME_CLASS1, &xep)) {
+ mde_perror(&xep, dgettext(TEXT_DOMAIN,
+ "Unable to resume rpc.mdcommd.\n"));
+ mdclrerror(&xep);
+ }
+ nd = nd->nd_next;
+ }
+ meta_ping_mnset(sp->setno);
+ }
+
+ /* level 2 */
+ if (rb_level > 1) {
+ mdnamelist_t *nlp;
+ mdname_t *np;
+
+ for (ddp = dd; ddp != NULL; ddp = ddp->dd_next) {
+ uint_t rep_slice;
+
+ if ((meta_replicaslice(ddp->dd_dnp,
+ &rep_slice, &xep) != 0) ||
+ ((np = metaslicename(ddp->dd_dnp, rep_slice,
+ &xep)) == NULL)) {
+ mdclrerror(&xep);
+ continue;
+ }
+ nlp = NULL;
+ (void) metanamelist_append(&nlp, np);
+
+ if (meta_db_attach(sp, nlp,
+ (MDCHK_DRVINSET | MDCHK_SET_LOCKED),
+ &sd->sd_ctime, ddp->dd_dbcnt, ddp->dd_dbsize,
+ NULL, &xep) == -1)
+ mdclrerror(&xep);
+
+ metafreenamelist(nlp);
+ }
+ /* Re-balance */
+ if (meta_db_balance(sp, NULL, curdd, 0, &xep) == -1)
+ mdclrerror(&xep);
+ }
+
+ /* level 4 */
+ if (rb_level > 3) {
+ if (!(MD_MNSET_DESC(sd)) && !MD_ATSET_DESC(sd)) {
+ if (tk_own_bydd(sp, dd, &mhiargs, TRUE, &xep))
+ mdclrerror(&xep);
+ }
+ }
+
+ /* level 5 */
+ if (rb_level > 4) {
+ if (clnt_stimeout(mynode(), sp, &mhiargs, &xep) == -1)
+ mdclrerror(&xep);
+ }
+
+ /*
+ * If at least one node needs to be rejoined to MN diskset,
+ * then suspend commd again.
+ */
+ if (MD_MNSET_DESC(sd)) {
+ nd = sd->sd_nodelist;
+ /* All nodes are guaranteed to be ALIVE */
+ while (nd) {
+ if (!(nd->nd_flags & MD_MN_NODE_RB_JOIN)) {
+ nd = nd->nd_next;
+ continue;
+ }
+ break;
+ }
+ if (nd) {
+ /*
+ * Found node that will be rejoined so
+ * notify rpc.mdcommd on all nodes of a nodelist change.
+ * Start by suspending rpc.mdcommd (which drains it of
+ * all messages), then change the nodelist followed by
+ * a reinit and resume.
+ */
+ nd = sd->sd_nodelist;
+ /* All nodes are guaranteed to be ALIVE */
+ while (nd) {
+ if (clnt_mdcommdctl(nd->nd_nodename,
+ COMMDCTL_SUSPEND, sp, MD_MSG_CLASS0,
+ MD_MSCF_NO_FLAGS, &xep)) {
+ mdclrerror(&xep);
+ }
+ suspendall_flag_rb = 1;
+ nd = nd->nd_next;
+ }
+ }
+ }
+
+
+
+ /* level 6 */
+ if (rb_level > 5) {
+ if (MD_MNSET_DESC(sd)) {
+ int join_flags = 0;
+
+ nd = sd->sd_nodelist;
+ /* All nodes are guaranteed to be ALIVE */
+ while (nd) {
+ /* Only rejoin nodes that were joined before */
+ if (!(nd->nd_flags & MD_MN_NODE_RB_JOIN)) {
+ nd = nd->nd_next;
+ continue;
+ }
+ /*
+ * Rejoin nodes to same state as before -
+ * either STALE or non-STALE.
+ */
+ if (stale_bool == TRUE)
+ join_flags = MNSET_IS_STALE;
+ if (clnt_joinset(nd->nd_nodename, sp,
+ join_flags, &xep))
+ mdclrerror(&xep);
+ /* Sets OWN flag on all nodes in list */
+ if (clnt_upd_nr_flags(nd->nd_nodename, sp,
+ sd->sd_nodelist, MD_NR_JOIN, NULL, &xep)) {
+ mdclrerror(&xep);
+ }
+ nd = nd->nd_next;
+ }
+ } else {
+ if (setup_db_bydd(sp, dd, TRUE, &xep) == -1)
+ mdclrerror(&xep);
+
+ /* No special flag for traditional diskset */
+ if (snarf_set(sp, NULL, &xep))
+ mdclrerror(&xep);
+ }
+ }
+
+ /* level 1 */
+ if (rb_level > 0) {
+ /*
+ * Mark the drives as OK.
+ */
+ if (MD_MNSET_DESC(sd)) {
+ nd = sd->sd_nodelist;
+ /* All nodes are guaranteed to be ALIVE */
+ while (nd) {
+ /*
+ * Must be last action before unlock.
+ * In case of panic, recovery code checks
+ * for MD_DR_OK to know that drive
+ * and possible master are fully added back.
+ */
+ if (clnt_upd_dr_flags(nd->nd_nodename, sp, dd,
+ MD_DR_OK, &xep) == -1)
+ mdclrerror(&xep);
+ nd = nd->nd_next;
+ }
+ } else {
+ for (i = 0; i < MD_MAXSIDES; i++) {
+ /* Skip empty slots */
+ if (sd->sd_nodes[i][0] == '\0')
+ continue;
+
+ if (clnt_upd_dr_flags(sd->sd_nodes[i], sp, dd,
+ MD_DR_OK, &xep) == -1)
+ mdclrerror(&xep);
+
+ }
+ }
+ max_genid += 2;
+ resync_genid(sp, sd, max_genid, 0, NULL);
+ }
+ /*
+ * Notify rpc.mdcommd on all nodes of a nodelist change.
+ * Send a reinit command to mdcommd which forces it to get
+ * fresh set description.
+ */
+ if (suspendall_flag_rb) {
+ /* Send reinit */
+ nd = sd->sd_nodelist;
+ /* All nodes are guaranteed to be ALIVE */
+ while (nd) {
+ /* Class is ignored for REINIT */
+ if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_REINIT,
+ sp, NULL, MD_MSCF_NO_FLAGS, &xep)) {
+ mde_perror(&xep, dgettext(TEXT_DOMAIN,
+ "Unable to reinit rpc.mdcommd.\n"));
+ mdclrerror(&xep);
+ }
+ nd = nd->nd_next;
+ }
+ }
+
+ /*
+ * Just resume all classes so that resume is the same whether
+ * just one class was locked or all classes were locked.
+ */
+ if ((suspend1_flag) || (suspendall_flag_rb) || (suspendall_flag)) {
+ /* Send resume */
+ nd = sd->sd_nodelist;
+ /* All nodes are guaranteed to be ALIVE */
+ while (nd) {
+ if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_RESUME,
+ sp, MD_MSG_CLASS0, MD_MSCF_NO_FLAGS, &xep)) {
+ mde_perror(&xep, dgettext(TEXT_DOMAIN,
+ "Unable to resume rpc.mdcommd.\n"));
+ mdclrerror(&xep);
+ }
+ nd = nd->nd_next;
+ }
+ meta_ping_mnset(sp->setno);
+ }
+
+
+ /* level 0 */
+ cl_sk = cl_get_setkey(sp->setno, sp->setname);
+ /* Don't test lock flag since guaranteed to be set if in rollback */
+ if (MD_MNSET_DESC(sd)) {
+ nd = sd->sd_nodelist;
+ /* All nodes are guaranteed to be ALIVE */
+ while (nd) {
+ if (clnt_unlock_set(nd->nd_nodename, cl_sk, &xep))
+ mdclrerror(&xep);
+ nd = nd->nd_next;
+ }
+ } else {
+ for (i = 0; i < MD_MAXSIDES; i++) {
+ /* Skip empty slots */
+ if (sd->sd_nodes[i][0] == '\0')
+ continue;
+
+ if (clnt_unlock_set(sd->sd_nodes[i], cl_sk, &xep))
+ mdclrerror(&xep);
+ }
+ }
+ cl_set_setkey(NULL);
+
+ /* release signals back to what they were on entry */
+ if (procsigs(FALSE, &oldsigs, &xep) < 0)
+ mdclrerror(&xep);
+
+ metafreedrivedesc(&dd);
+
+ if (flush_set_onerr) {
+ metaflushsetname(sp);
+ if (!(MD_MNSET_DESC(sd))) {
+ md_rb_sig_handling_off(md_got_sig(), md_which_sig());
+ }
+ }
+
+ return (rval);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_set_hst.c b/usr/src/lib/lvm/libmeta/common/meta_set_hst.c
new file mode 100644
index 0000000000..d5e5f43ed1
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_set_hst.c
@@ -0,0 +1,5688 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * Just in case we're not in a build environment, make sure that
+ * TEXT_DOMAIN gets set to something.
+ */
+#if !defined(TEXT_DOMAIN)
+#define TEXT_DOMAIN "SYS_TEST"
+#endif
+
+/*
+ * Metadevice diskset interfaces
+ */
+
+#include "meta_set_prv.h"
+#include <meta.h>
+#include <sys/lvm/md_crc.h>
+#include <sys/time.h>
+#include <sdssc.h>
+
+static int
+add_db_sidenms(
+ mdsetname_t *sp,
+ md_error_t *ep
+)
+{
+ md_replicalist_t *rlp = NULL;
+ md_replicalist_t *rl;
+ int rval = 0;
+
+ if (metareplicalist(sp, MD_FULLNAME_ONLY, &rlp, ep) < 0)
+ return (-1);
+
+ for (rl = rlp; rl != NULL; rl = rl->rl_next) {
+ md_replica_t *r = rl->rl_repp;
+
+ /*
+ * This is not the first replica being added to the
+ * diskset so call with ADDSIDENMS_BCAST. If this
+ * is a traditional diskset, the bcast flag is ignored
+ * since traditional disksets don't use the rpc.mdcommd.
+ */
+ if (meta_db_addsidenms(sp, r->r_namep, r->r_blkno,
+ DB_ADDSIDENMS_BCAST, ep)) {
+ rval = -1;
+ goto out;
+ }
+ }
+
+out:
+ metafreereplicalist(rlp);
+ return (rval);
+}
+
+static int
+add_drvs_to_hosts(
+ mdsetname_t *sp,
+ int node_c,
+ char **node_v,
+ md_error_t *ep
+)
+{
+ int i;
+ md_set_desc *sd;
+ md_drive_desc *dd;
+ md_timeval32_t now;
+ ulong_t genid;
+
+ if ((sd = metaget_setdesc(sp, ep)) == NULL)
+ return (-1);
+
+ if ((dd = metaget_drivedesc(sp, MD_FULLNAME_ONLY, ep)) == NULL) {
+ if (! mdisok(ep))
+ return (-1);
+ return (0);
+ }
+
+ now = sd->sd_ctime;
+ genid = sd->sd_genid - 1;
+
+ for (i = 0; i < node_c; i++) {
+ if (clnt_adddrvs(node_v[i], sp, dd, now, genid, ep) == -1)
+ return (-1);
+ }
+
+ return (0);
+}
+
+static int
+add_md_sidenms(mdsetname_t *sp, side_t sideno, side_t otherside, md_error_t *ep)
+{
+ mdnm_params_t nm;
+ char *cname, *dname;
+ side_t tmp_sideno;
+ minor_t mnum;
+ int done, i;
+ int rval = 0;
+ md_set_desc *sd;
+
+ (void) memset(&nm, '\0', sizeof (nm));
+ nm.key = MD_KEYWILD;
+
+ if (!metaislocalset(sp)) {
+ if ((sd = metaget_setdesc(sp, ep)) == NULL)
+ return (-1);
+ }
+ /* Use rpc.mdcommd to add md side info from all nodes */
+ if ((! metaislocalset(sp)) && MD_MNSET_DESC(sd) &&
+ (sd->sd_mn_mynode->nd_flags & MD_MN_NODE_OWN)) {
+ md_mn_result_t *resultp = NULL;
+ md_mn_msg_meta_md_addside_t md_as;
+ int send_rval;
+
+ md_as.msg_sideno = sideno;
+ md_as.msg_otherside = otherside;
+ /*
+ * If reconfig cycle has been started, this node is stuck in
+ * in the return step until this command has completed. If
+ * mdcommd is suspended, ask send_message to fail (instead of
+ * retrying) so that metaset can finish allowing the
+ * reconfig cycle to proceed.
+ */
+ send_rval = mdmn_send_message(sp->setno,
+ MD_MN_MSG_META_MD_ADDSIDE,
+ MD_MSGF_FAIL_ON_SUSPEND | MD_MSGF_PANIC_WHEN_INCONSISTENT,
+ (char *)&md_as, sizeof (md_mn_msg_meta_md_addside_t),
+ &resultp, ep);
+ if (send_rval != 0) {
+ (void) mdstealerror(ep, &(resultp->mmr_ep));
+ if (resultp)
+ free_result(resultp);
+ return (-1);
+ }
+ if (resultp)
+ free_result(resultp);
+ return (0);
+ } else {
+ /*CONSTCOND*/
+ while (1) {
+ nm.mde = mdnullerror;
+ nm.setno = sp->setno;
+ nm.side = otherside;
+ if (metaioctl(MD_IOCNXTKEY_NM, &nm, &nm.mde, NULL) != 0)
+ return (mdstealerror(ep, &nm.mde));
+
+ if (nm.key == MD_KEYWILD)
+ return (0);
+
+ nm.devname = (uintptr_t)meta_getnmbykey(sp->setno,
+ otherside, nm.key, ep);
+ if (nm.devname == NULL)
+ return (-1);
+
+ nm.side = sideno;
+ if (MD_MNSET_DESC(sd)) {
+ tmp_sideno = sideno;
+ } else {
+ tmp_sideno = sideno - 1;
+ }
+
+ if ((done = meta_getnextside_devinfo(sp,
+ (char *)nm.devname, &tmp_sideno,
+ &cname, &dname, &mnum, ep)) == -1) {
+ Free((void *)nm.devname);
+ return (-1);
+ }
+
+ assert(done == 1);
+ Free((void *)nm.devname);
+
+ /*
+ * The device reference count can be greater than 1 if
+ * more than one softpart is configured on top of the
+ * same device. If this is the case then we want to
+ * increment the count to sync up with the other sides.
+ */
+ for (i = 0; i < nm.ref_count; i++) {
+ if (add_name(sp, sideno, nm.key, dname, mnum, cname,
+ ep) == -1)
+ rval = -1;
+ }
+
+ Free(cname);
+ Free(dname);
+
+ if (rval != 0)
+ return (rval);
+ }
+ }
+
+ /*NOTREACHED*/
+}
+
+static int
+check_setdrvs_againstnode(mdsetname_t *sp, char *node, md_error_t *ep)
+{
+ mddrivename_t *dp;
+ md_drive_desc *dd, *ddp;
+
+ if ((dd = metaget_drivedesc(sp, MD_FULLNAME_ONLY, ep)) == NULL)
+ if (! mdisok(ep))
+ return (-1);
+
+ for (ddp = dd; ddp != NULL; ddp = ddp->dd_next) {
+ dp = ddp->dd_dnp;
+
+ if (checkdrive_onnode(sp, dp, node, ep))
+ return (-1);
+ }
+
+ return (0);
+}
+
+static int
+create_multinode_set_on_hosts(
+ mdsetname_t *sp,
+ int node_c, /* Number of new nodes */
+ char **node_v, /* Nodes which are being added */
+ int new_set,
+ md_error_t *ep
+)
+{
+ int i;
+ md_set_desc *sd;
+ md_timeval32_t now;
+ ulong_t genid;
+ int rval = 0;
+ md_mnnode_desc *nd, *ndm = NULL;
+ md_mnnode_desc *nd_prev, *nd_curr;
+ int nodecnt;
+ mndiskset_membershiplist_t *nl, *nl2;
+
+ if (!new_set) {
+ if ((sd = metaget_setdesc(sp, ep)) == NULL)
+ return (-1);
+ now = sd->sd_ctime;
+ genid = sd->sd_genid - 1;
+ if (sd->sd_drvs)
+ genid--;
+ } else {
+ sd = Zalloc(sizeof (*sd));
+
+ if (meta_gettimeofday(&now) == -1) {
+ (void) mdsyserror(ep, errno,
+ dgettext(TEXT_DOMAIN, "meta_gettimeofday()"));
+ rval = -1;
+ goto out;
+ }
+
+ /* Put the new entries into the set */
+ /*
+ * Get membershiplist from API routine. If there's
+ * an error, fail to create set and pass back error.
+ */
+ if (meta_read_nodelist(&nodecnt, &nl, ep) == -1) {
+ rval = -1;
+ goto out;
+ }
+
+ /*
+ * meta_set_addhosts has already verified that
+ * this node list is in the membership list
+ * so set ALIVE flag.
+ * Since this is a new set, all hosts being
+ * added are new to the set, so also set ADD flag.
+ */
+ for (i = 0; i < node_c; i++) {
+ nd = Zalloc(sizeof (*nd));
+ (void) strcpy(nd->nd_nodename, node_v[i]);
+ nd->nd_ctime = now;
+ nd->nd_flags = (MD_MN_NODE_ALIVE |
+ MD_MN_NODE_ADD);
+ nl2 = nl;
+ while (nl2) {
+ if (strcmp(nl2->msl_node_name,
+ node_v[i]) == 0) {
+ nd->nd_nodeid = nl2->msl_node_id;
+ (void) strcpy(nd->nd_priv_ic,
+ nl2->msl_node_addr);
+ break;
+ }
+ nl2 = nl2->next;
+ }
+
+ /*
+ * Nodelist must be kept in ascending
+ * nodeid order.
+ */
+ if (sd->sd_nodelist == NULL) {
+ /* Nothing in list, just add it */
+ sd->sd_nodelist = nd;
+ } else if (nd->nd_nodeid < sd->sd_nodelist->nd_nodeid) {
+ /* Add to head of list */
+ nd->nd_next = sd->sd_nodelist;
+ sd->sd_nodelist = nd;
+ } else {
+ nd_curr = sd->sd_nodelist->nd_next;
+ nd_prev = sd->sd_nodelist;
+ /* Search for place ot add it */
+ while (nd_curr) {
+ if (nd->nd_nodeid <
+ nd_curr->nd_nodeid) {
+ /* Add before nd_curr */
+ nd->nd_next = nd_curr;
+ nd_prev->nd_next = nd;
+ break;
+ }
+ nd_prev = nd_curr;
+ nd_curr = nd_curr->nd_next;
+ }
+ /* Add to end of list */
+ if (nd_curr == NULL) {
+ nd_prev->nd_next = nd;
+ }
+
+ }
+ /* Set master to be first node added */
+ if (ndm == NULL)
+ ndm = nd;
+ }
+
+ meta_free_nodelist(nl);
+ /*
+ * Creating mnset for first time.
+ * Set master to be invalid until first drive is
+ * in set.
+ */
+ (void) strcpy(sd->sd_mn_master_nodenm, "");
+ sd->sd_mn_master_nodeid = MD_MN_INVALID_NID;
+ sd->sd_mn_masternode = ndm;
+ sd->sd_ctime = now;
+ genid = sd->sd_genid = 0;
+ }
+
+ /* Create the set where needed */
+ for (i = 0; i < node_c; i++) {
+ /*
+ * Create the set on each new node. If the set already
+ * exists, then the node list being created on each new node
+ * is the current node list from before the new nodes
+ * were added. If the set doesn't exist, then the node
+ * list being created on each new node is the entire
+ * new node list.
+ */
+ if (clnt_mncreateset(node_v[i], sp, sd->sd_nodelist,
+ now, genid, sd->sd_mn_master_nodenm,
+ sd->sd_mn_master_nodeid, ep) == -1) {
+ rval = -1;
+ break;
+ }
+ }
+
+out:
+ if (new_set) {
+ nd = sd->sd_nodelist;
+ while (nd) {
+ sd->sd_nodelist = nd->nd_next;
+ Free(nd);
+ nd = sd->sd_nodelist;
+ }
+ Free(sd);
+ }
+
+ if (rval != 0 || new_set)
+ return (rval);
+
+ /*
+ * Add the drive records to the new sets
+ * and names for the new sides.
+ */
+ return (add_drvs_to_hosts(sp, node_c, node_v, ep));
+}
+
+
+static int
+create_traditional_set_on_hosts(
+ mdsetname_t *sp,
+ int node_c, /* Number of new nodes */
+ char **node_v, /* Nodes which are being added */
+ int new_set,
+ md_error_t *ep
+)
+{
+ int i;
+ md_set_desc *sd;
+ md_timeval32_t now;
+ ulong_t genid;
+ int rval = 0;
+
+ if (!new_set) {
+
+ if ((sd = metaget_setdesc(sp, ep)) == NULL)
+ return (-1);
+ now = sd->sd_ctime;
+
+ genid = sd->sd_genid;
+
+ if (sd->sd_drvs)
+ genid--;
+ } else {
+ if (node_c > MD_MAXSIDES)
+ return (mddserror(ep, MDE_DS_SIDENUMNOTAVAIL,
+ sp->setno, NULL, NULL, sp->setname));
+
+ sd = Zalloc(sizeof (*sd));
+
+ /* Put the new entries into the set */
+ for (i = 0; i < node_c; i++) {
+ (void) strcpy(sd->sd_nodes[i], node_v[i]);
+ }
+
+ if (meta_gettimeofday(&now) == -1) {
+ (void) mdsyserror(ep, errno, "meta_gettimeofday()");
+ rval = -1;
+ goto out;
+ }
+
+ sd->sd_ctime = now;
+ genid = sd->sd_genid = 0;
+ }
+
+ /* Create the set where needed */
+ for (i = 0; i < node_c; i++) {
+ /*
+ * Create the set on each new host
+ */
+ if (clnt_createset(node_v[i], sp, sd->sd_nodes, now, genid,
+ ep) == -1) {
+ rval = -1;
+ break;
+ }
+ }
+
+out:
+ if (new_set)
+ Free(sd);
+
+ if (rval != 0 || new_set)
+ return (rval);
+
+ /*
+ * Add the drive records to the new sets
+ * and names for the new sides.
+ */
+ return (add_drvs_to_hosts(sp, node_c, node_v, ep));
+}
+
+static int
+create_set_on_hosts(
+ mdsetname_t *sp,
+ int multi_node, /* Multi_node diskset or not? */
+ int node_c, /* Number of new nodes */
+ char **node_v, /* Nodes which are being added */
+ int new_set,
+ md_error_t *ep
+)
+{
+ if (multi_node)
+ return (create_multinode_set_on_hosts(sp, node_c, node_v,
+ new_set, ep));
+ else
+ return (create_traditional_set_on_hosts(sp, node_c, node_v,
+ new_set, ep));
+}
+
+static int
+create_set(
+ mdsetname_t *sp,
+ int multi_node, /* Multi-node diskset or not? */
+ int node_c,
+ char **node_v,
+ int auto_take,
+ md_error_t *ep
+)
+{
+ int i;
+ int rval = 0;
+ set_t max_sets;
+ set_t setno;
+ int bool;
+ uint_t sr_flags;
+ sigset_t oldsigs;
+ md_setkey_t *cl_sk;
+ int rb_level = 0;
+ md_error_t xep = mdnullerror;
+ rval_e sdssc_rval;
+ int lock_flag = 0;
+ int sig_flag = 0;
+
+ if ((max_sets = get_max_sets(ep)) == 0)
+ return (-1);
+
+ /* We must be a member of the set we are creating */
+ if (! strinlst(mynode(), node_c, node_v))
+ return (mddserror(ep, MDE_DS_SELFNOTIN,
+ sp->setno, mynode(), NULL, sp->setname));
+
+ /*
+ * If auto_take then we must be the only member of the set
+ * that we are creating.
+ */
+ if (auto_take && node_c > 1)
+ return (mddserror(ep, MDE_DS_SINGLEHOST, sp->setno, NULL, NULL,
+ sp->setname));
+
+ /*
+ * If we're part of SC3.0 we'll already have allocated the
+ * set number so we can skip the allocation algorithm used.
+ * Set number is unique across traditional and MN disksets.
+ */
+ if ((sdssc_rval = sdssc_get_index(sp->setname, &setno))
+ == SDSSC_NOT_BOUND) {
+
+ for (i = 0; i < node_c; i++) {
+ int has_set;
+
+ /* Skip my node */
+ if (strcmp(mynode(), node_v[i]) == 0)
+ continue;
+
+ /*
+ * Make sure this set name is not used on the
+ * other hosts
+ */
+ has_set = nodehasset(sp, node_v[i], NHS_N_EQ, ep);
+ if (has_set < 0) {
+ if (! mdiserror(ep, MDE_NO_SET)) {
+ rval = -1;
+ goto out;
+ }
+ mdclrerror(ep);
+ continue;
+ }
+
+ if (has_set) {
+ (void) mddserror(ep, MDE_DS_NODEHASSET,
+ sp->setno, node_v[i], NULL, sp->setname);
+ rval = -1;
+ goto out;
+ }
+ }
+
+ for (setno = 1; setno < max_sets; setno++) {
+ for (i = 0; i < node_c; i++) {
+ if (clnt_setnumbusy(node_v[i], setno,
+ &bool, ep) == -1) {
+ rval = -1;
+ goto out;
+ }
+
+ if (bool == TRUE)
+ break;
+ }
+ if (i == node_c)
+ break;
+ }
+ } else if (sdssc_rval != SDSSC_OKAY) {
+ (void) mddserror(ep, MDE_DS_SETNUMNOTAVAIL, MD_SET_BAD, NULL,
+ NULL, sp->setname);
+ rval = -1;
+ goto out;
+ }
+
+ if (setno == max_sets) {
+ (void) mddserror(ep, MDE_DS_SETNUMNOTAVAIL, MD_SET_BAD, NULL,
+ NULL, sp->setname);
+ rval = -1;
+ goto out;
+ }
+
+ sp->setno = setno;
+
+ /*
+ * Lock the set on current set members.
+ * Set locking done much earlier for MN diskset than for traditional
+ * diskset since lock_set is used to protect against
+ * other meta* commands running on the other nodes.
+ * Don't issue mdcommd SUSPEND command since there is nothing
+ * to suspend since there currently is no set.
+ */
+ if (multi_node) {
+ /* Make sure we are blocking all signals */
+ if (procsigs(TRUE, &oldsigs, &xep) < 0)
+ mdclrerror(&xep);
+ sig_flag = 1;
+
+ /* Lock the set on new set members */
+ for (i = 0; i < node_c; i++) {
+ if (clnt_lock_set(node_v[i], sp, ep)) {
+ rval = -1;
+ goto out;
+ }
+ lock_flag = 1;
+ }
+ /* Now have the diskset locked, verify set number is still ok */
+ for (i = 0; i < node_c; i++) {
+ if (clnt_setnumbusy(node_v[i], setno,
+ &bool, ep) == -1) {
+ rval = -1;
+ goto out;
+ }
+ }
+ }
+
+
+ if (meta_set_checkname(sp->setname, ep)) {
+ rval = -1;
+ goto out;
+ }
+
+ for (i = 0; i < node_c; i++) {
+ if (clnt_setnameok(node_v[i], sp, &bool, ep) == -1) {
+ rval = -1;
+ goto out;
+ }
+ if (bool == FALSE) {
+ (void) mddserror(ep, MDE_DS_SETNAMEBUSY, sp->setno,
+ node_v[i], NULL, sp->setname);
+ rval = -1;
+ goto out;
+ }
+ }
+
+ /* END CHECK CODE */
+
+ /* Lock the set on new set members */
+ if (!multi_node) {
+ md_rb_sig_handling_on();
+ sig_flag = 1;
+ for (i = 0; i < node_c; i++) {
+ if (clnt_lock_set(node_v[i], sp, ep)) {
+ rval = -1;
+ goto out;
+ }
+ lock_flag = 1;
+ }
+ }
+
+ RB_TEST(1, "create_set", ep)
+
+ RB_PREEMPT;
+ rb_level = 1; /* level 1 */
+
+ RB_TEST(2, "create_set", ep)
+
+ if ((rval = create_set_on_hosts(sp, multi_node, node_c, node_v,
+ 1, ep)) == -1)
+ goto rollback;
+
+ RB_TEST(3, "create_set", ep)
+
+ if (auto_take)
+ sr_flags = MD_SR_OK | MD_SR_AUTO_TAKE;
+ else
+ sr_flags = MD_SR_OK;
+
+ /*
+ * Mark the set record MD_SR_OK
+ */
+ for (i = 0; i < node_c; i++)
+ if (clnt_upd_sr_flags(node_v[i], sp, sr_flags, ep))
+ goto rollback;
+
+ rb_level = 2; /* level 2 */
+
+ /*
+ * For MN diskset:
+ * On each added node, set the node record for that node
+ * to OK. Then set all node records for the newly added
+ * nodes on all nodes to ok.
+ *
+ * By setting a node's own node record to ok first, even if
+ * the node adding the hosts panics, the rest of the nodes can
+ * determine the same node list during the choosing of the master
+ * during reconfig. So, only nodes considered for mastership
+ * are nodes that have both MD_MN_NODE_OK and MD_SR_OK set
+ * on that node's rpc.metad. If all nodes have MD_SR_OK set,
+ * but no node has its own MD_MN_NODE_OK set, then the set will
+ * be removed during reconfig since a panic occurred during the
+ * creation of the initial diskset.
+ */
+
+ if (multi_node) {
+ md_mnnode_desc *nd, *saved_nd_next;
+ md_set_desc *sd;
+
+ if ((sd = metaget_setdesc(sp, ep)) == NULL) {
+ goto rollback;
+ }
+
+ for (i = 0; i < node_c; i++) {
+ nd = sd->sd_nodelist;
+ /* All nodes are guaranteed to be ALIVE */
+ while (nd) {
+ if (strcmp(nd->nd_nodename, node_v[i]) == 0)
+ break;
+ nd = nd->nd_next;
+ }
+ /* Something wrong, will pick this up in next loop */
+ if (nd == NULL)
+ continue;
+
+ /* Only changing my local cache of node list */
+ saved_nd_next = nd->nd_next;
+ nd->nd_next = NULL;
+
+ /* Set node record for added host to ok on that host */
+ if (clnt_upd_nr_flags(node_v[i], sp,
+ nd, MD_NR_OK, NULL, ep)) {
+ nd->nd_next = saved_nd_next;
+ goto rollback;
+ }
+ nd->nd_next = saved_nd_next;
+ }
+
+ /* Now set all node records on all nodes to be ok */
+ nd = sd->sd_nodelist;
+ /* All nodes are guaranteed to be ALIVE */
+ while (nd) {
+ if (clnt_upd_nr_flags(nd->nd_nodename, sp,
+ sd->sd_nodelist, MD_NR_OK, NULL, ep)) {
+ goto rollback;
+ }
+ nd = nd->nd_next;
+ }
+ }
+
+ RB_TEST(4, "create_set", ep)
+
+out:
+ if ((rval == 0) && multi_node) {
+ /*
+ * Set successfully created.
+ * Notify rpc.mdcommd on all nodes of a nodelist change.
+ * Send reinit command to mdcommd which forces it to get
+ * fresh set description. Then send resume.
+ * Resume on class 0 will resume all classes.
+ */
+ for (i = 0; i < node_c; i++) {
+ /* Class is ignored for REINIT */
+ if (clnt_mdcommdctl(node_v[i], COMMDCTL_REINIT,
+ sp, NULL, MD_MSCF_NO_FLAGS, &xep)) {
+ if (rval == 0)
+ (void) mdstealerror(ep, &xep);
+ rval = -1;
+ mde_perror(ep, dgettext(TEXT_DOMAIN,
+ "Unable to reinit rpc.mdcommd.\n"));
+ }
+ }
+ for (i = 0; i < node_c; i++) {
+ if (clnt_mdcommdctl(node_v[i], COMMDCTL_RESUME,
+ sp, MD_MSG_CLASS0, MD_MSCF_NO_FLAGS, &xep)) {
+ if (rval == 0)
+ (void) mdstealerror(ep, &xep);
+ rval = -1;
+ mde_perror(ep, dgettext(TEXT_DOMAIN,
+ "Unable to resume rpc.mdcommd.\n"));
+ }
+ }
+ meta_ping_mnset(sp->setno);
+ }
+ if (lock_flag) {
+ cl_sk = cl_get_setkey(sp->setno, sp->setname);
+ for (i = 0; i < node_c; i++) {
+ if (clnt_unlock_set(node_v[i], cl_sk, &xep)) {
+ if (rval == 0)
+ (void) mdstealerror(ep, &xep);
+ rval = -1;
+ }
+ }
+ cl_set_setkey(NULL);
+ }
+
+ if (sig_flag) {
+ if (multi_node) {
+ /* release signals back to what they were on entry */
+ if (procsigs(FALSE, &oldsigs, &xep) < 0)
+ mdclrerror(&xep);
+ } else {
+ md_rb_sig_handling_off(md_got_sig(), md_which_sig());
+ }
+ }
+
+ return (rval);
+
+rollback:
+ /* all signals already blocked for MN disket */
+ if (!multi_node) {
+ /* Make sure we are blocking all signals */
+ if (procsigs(TRUE, &oldsigs, &xep) < 0)
+ mdclrerror(&xep);
+ }
+
+ rval = -1;
+
+ /*
+ * For MN diskset:
+ * On each added node (which is now each node to be deleted),
+ * set the node record for that node to DEL. Then set all
+ * node records for the newly added (soon to be deleted) nodes
+ * on all nodes to ok.
+ *
+ * By setting a node's own node record to DEL first, even if
+ * the node doing the rollback panics, the rest of the nodes can
+ * determine the same node list during the choosing of the master
+ * during reconfig.
+ */
+
+ /* level 3 */
+ if ((rb_level > 1) && (multi_node)) {
+ md_mnnode_desc *nd, *saved_nd_next;
+ md_set_desc *sd;
+
+ if ((sd = metaget_setdesc(sp, &xep)) == NULL) {
+ mdclrerror(&xep);
+ }
+
+ for (i = 0; i < node_c; i++) {
+ nd = sd->sd_nodelist;
+ /* All nodes are guaranteed to be ALIVE */
+ while (nd) {
+ if (strcmp(nd->nd_nodename, node_v[i]) == 0)
+ break;
+ nd = nd->nd_next;
+ }
+ /* Something wrong, will pick this up in next loop */
+ if (nd == NULL)
+ continue;
+
+ /* Only changing my local cache of node list */
+ saved_nd_next = nd->nd_next;
+ nd->nd_next = NULL;
+
+ /* Set node record for added host to DEL on that host */
+ if (clnt_upd_nr_flags(node_v[i], sp,
+ nd, MD_NR_DEL, NULL, &xep)) {
+ nd->nd_next = saved_nd_next;
+ mdclrerror(&xep);
+ }
+ nd->nd_next = saved_nd_next;
+ }
+
+ /* Now set all node records on all nodes to be DEL */
+ nd = sd->sd_nodelist;
+ /* All nodes are guaranteed to be ALIVE */
+ while (nd) {
+ if (clnt_upd_nr_flags(nd->nd_nodename, sp,
+ sd->sd_nodelist, MD_NR_DEL, NULL, &xep)) {
+ mdclrerror(&xep);
+ }
+ nd = nd->nd_next;
+ }
+
+ /* Mark set record on all hosts to be DELETED */
+ for (i = 0; i < node_c; i++) {
+ if (clnt_upd_sr_flags(node_v[i], sp, MD_SR_DEL, &xep)) {
+ mdclrerror(&xep);
+ }
+ }
+ }
+ /* level 1 */
+ if (rb_level > 0) {
+ for (i = 0; i < node_c; i++) {
+ if (clnt_delset(node_v[i], sp, &xep) == -1)
+ mdclrerror(&xep);
+ }
+ }
+
+ /* level 0 */
+ /* Don't test lock flag since guaranteed to be set if in rollback */
+ cl_sk = cl_get_setkey(sp->setno, sp->setname);
+ for (i = 0; i < node_c; i++) {
+ if (clnt_unlock_set(node_v[i], cl_sk, &xep))
+ mdclrerror(&xep);
+ }
+ cl_set_setkey(NULL);
+
+ /* release signals back to what they were on entry */
+ if (procsigs(FALSE, &oldsigs, &xep) < 0)
+ mdclrerror(&xep);
+
+ if ((sig_flag) && (!multi_node))
+ md_rb_sig_handling_off(md_got_sig(), md_which_sig());
+
+ return (rval);
+}
+
+static int
+del_db_sidenms(
+ mdsetname_t *sp,
+ side_t sideno,
+ md_error_t *ep
+)
+{
+ md_replicalist_t *rlp = NULL;
+ md_replicalist_t *rl;
+ int rval = 0;
+
+ if (metareplicalist(sp, MD_BASICNAME_OK, &rlp, ep) < 0)
+ return (-1);
+
+ for (rl = rlp; rl != NULL; rl = rl->rl_next) {
+ md_replica_t *r = rl->rl_repp;
+
+ if (meta_db_delsidenm(sp, sideno, r->r_namep, r->r_blkno, ep)) {
+ rval = -1;
+ goto out;
+ }
+ }
+
+out:
+ metafreereplicalist(rlp);
+ return (rval);
+}
+
+static int
+del_drvs_from_hosts(
+ mdsetname_t *sp,
+ md_set_desc *sd,
+ md_drive_desc *dd,
+ int node_c,
+ char **node_v,
+ int oha,
+ md_error_t *ep
+)
+{
+ int i;
+ md_mnnode_desc *nd;
+
+ for (i = 0; i < node_c; i++) {
+ if (MD_MNSET_DESC(sd) && (oha == TRUE)) {
+ /*
+ * During OHA mode, don't issue RPCs to
+ * non-alive nodes since there is no reason to
+ * wait for RPC timeouts.
+ */
+ nd = sd->sd_nodelist;
+ while (nd) {
+ if (strcmp(nd->nd_nodename, node_v[i]) == 0)
+ break;
+ nd = nd->nd_next;
+ }
+ if (nd == NULL) {
+ return (mddserror(ep, MDE_DS_NOTINMEMBERLIST,
+ sp->setno, nd->nd_nodename,
+ NULL, sp->setname));
+ }
+
+ if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+ continue;
+ }
+ if (clnt_deldrvs(node_v[i], sp, dd, ep)) {
+ return (-1);
+ }
+ } else if (MD_MNSET_DESC(sd) && (oha == FALSE)) {
+ /*
+ * All nodes should be alive in non-oha mode.
+ */
+ if (clnt_deldrvs(node_v[i], sp, dd, ep)) {
+ return (-1);
+ }
+ } else {
+ /*
+ * For traditional diskset, issue the RPC and
+ * ignore RPC failure if in OHA mode.
+ */
+ if (clnt_deldrvs(node_v[i], sp, dd, ep)) {
+ if (oha == TRUE && mdanyrpcerror(ep)) {
+ mdclrerror(ep);
+ continue;
+ }
+ return (-1);
+ }
+ }
+ }
+
+ return (0);
+}
+
+static int
+del_host_noset(
+ mdsetname_t *sp,
+ char **anode,
+ md_error_t *ep
+)
+{
+ int rval = 0;
+ md_setkey_t *cl_sk;
+ md_drive_desc *dd;
+ md_error_t xep = mdnullerror;
+ md_set_desc *sd;
+
+ if ((sd = metaget_setdesc(sp, ep)) == NULL)
+ return (-1);
+
+ /* Make sure we own the set */
+ if (meta_check_ownership(sp, ep) != 0)
+ return (-1);
+
+ /* Lock the set on our side */
+ if (clnt_lock_set(mynode(), sp, ep)) {
+ rval = -1;
+ goto out;
+ }
+
+ if (clnt_delhosts(mynode(), sp, 1, anode, ep)) {
+ rval = -1;
+ goto out;
+ }
+
+ if (!MD_MNSET_DESC(sd)) {
+ if ((dd = metaget_drivedesc(sp, (MD_BASICNAME_OK | PRINT_FAST),
+ ep)) == NULL) {
+ if (! mdisok(ep)) {
+ rval = -1;
+ goto out;
+ }
+ }
+
+ /* If we have drives */
+ if (dd != NULL) {
+ if (clnt_del_drv_sidenms(mynode(), sp, ep)) {
+ rval = -1;
+ goto out;
+ }
+ }
+ }
+
+out:
+ cl_sk = cl_get_setkey(sp->setno, sp->setname);
+ if (clnt_unlock_set(mynode(), cl_sk, &xep)) {
+ if (rval == 0)
+ (void) mdstealerror(ep, &xep);
+ rval = -1;
+ }
+ cl_set_setkey(NULL);
+
+ metaflushsetname(sp);
+
+ return (rval);
+}
+
+static int
+del_md_sidenms(mdsetname_t *sp, side_t sideno, md_error_t *ep)
+{
+ mdnm_params_t nm;
+ md_set_desc *sd;
+ int i;
+
+ if (!metaislocalset(sp)) {
+ if ((sd = metaget_setdesc(sp, ep)) == NULL)
+ return (-1);
+ }
+ /* Use rpc.mdcommd to add md side info from all nodes */
+ if ((! metaislocalset(sp)) && MD_MNSET_DESC(sd) &&
+ (sd->sd_mn_mynode->nd_flags & MD_MN_NODE_OWN)) {
+ md_mn_result_t *resultp = NULL;
+ md_mn_msg_meta_md_delside_t md_ds;
+ int send_rval;
+
+ md_ds.msg_sideno = sideno;
+ /*
+ * If reconfig cycle has been started, this node is stuck in
+ * in the return step until this command has completed. If
+ * mdcommd is suspended, ask send_message to fail (instead of
+ * retrying) so that metaset can finish allowing the
+ * reconfig cycle to proceed.
+ */
+ send_rval = mdmn_send_message(sp->setno,
+ MD_MN_MSG_META_MD_DELSIDE,
+ MD_MSGF_FAIL_ON_SUSPEND | MD_MSGF_PANIC_WHEN_INCONSISTENT,
+ (char *)&md_ds, sizeof (md_mn_msg_meta_md_delside_t),
+ &resultp, ep);
+ if (send_rval != 0) {
+ (void) mdstealerror(ep, &(resultp->mmr_ep));
+ if (resultp)
+ free_result(resultp);
+ return (-1);
+ }
+ if (resultp)
+ free_result(resultp);
+ } else {
+ (void) memset(&nm, '\0', sizeof (nm));
+ nm.key = MD_KEYWILD;
+
+ /*CONSTCOND*/
+ while (1) {
+ nm.mde = mdnullerror;
+ nm.setno = sp->setno;
+ nm.side = MD_SIDEWILD;
+ if (metaioctl(MD_IOCNXTKEY_NM, &nm, &nm.mde, NULL) != 0)
+ return (mdstealerror(ep, &nm.mde));
+
+ if (nm.key == MD_KEYWILD)
+ return (0);
+
+ /*
+ * The device reference count can be greater than 1 if
+ * more than one softpart is configured on top of the
+ * same device. If this is the case then we want to
+ * decrement the count to zero so the entry can be
+ * actually removed.
+ */
+ for (i = 0; i < nm.ref_count; i++) {
+ if (del_name(sp, sideno, nm.key, ep) == -1)
+ return (-1);
+ }
+ }
+ }
+ return (0);
+}
+
+static void
+recreate_set(
+ mdsetname_t *sp,
+ md_set_desc *sd
+)
+{
+ int i;
+ int has_set;
+ md_error_t xep = mdnullerror;
+ md_mnnode_desc *nd;
+
+ if (MD_MNSET_DESC(sd)) {
+ nd = sd->sd_nodelist;
+ while (nd) {
+ if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+ nd = nd->nd_next;
+ continue;
+ }
+ has_set = nodehasset(sp, nd->nd_nodename,
+ NHS_NST_EQ, &xep);
+
+ if (has_set >= 0) {
+ nd = nd->nd_next;
+ continue;
+ }
+
+ mdclrerror(&xep);
+
+ if (clnt_mncreateset(nd->nd_nodename, sp,
+ sd->sd_nodelist,
+ sd->sd_ctime, sd->sd_genid,
+ sd->sd_mn_master_nodenm,
+ sd->sd_mn_master_nodeid, &xep) == -1)
+ mdclrerror(&xep);
+ nd = nd->nd_next;
+ }
+ } else {
+ for (i = 0; i < MD_MAXSIDES; i++) {
+ /* Skip empty slots */
+ if (sd->sd_nodes[i][0] == '\0')
+ continue;
+
+ has_set = nodehasset(sp, sd->sd_nodes[i],
+ NHS_NST_EQ, &xep);
+
+ if (has_set >= 0)
+ continue;
+
+ mdclrerror(&xep);
+
+ if (clnt_createset(sd->sd_nodes[i], sp, sd->sd_nodes,
+ sd->sd_ctime, sd->sd_genid, &xep) == -1)
+ mdclrerror(&xep);
+ }
+ }
+}
+
+/*
+ * If a MN diskset, set is already locked on all nodes via clnt_lock_set.
+ */
+static int
+del_set_nodrives(
+ mdsetname_t *sp,
+ int node_c,
+ char **node_v,
+ int oha,
+ md_error_t *ep
+)
+{
+ md_set_desc *sd;
+ int i;
+ sigset_t oldsigs;
+ md_setkey_t *cl_sk;
+ int rb_level = 0;
+ ulong_t max_genid = 0;
+ int rval = 0;
+ md_error_t xep = mdnullerror;
+ md_mnnode_desc *nd;
+ int delete_end = 1;
+
+ if ((sd = metaget_setdesc(sp, ep)) == NULL)
+ return (-1);
+
+ if (MD_MNSET_DESC(sd)) {
+ /* Make sure we are blocking all signals */
+ if (procsigs(TRUE, &oldsigs, &xep) < 0)
+ mdclrerror(&xep);
+ } else {
+ md_rb_sig_handling_on();
+ }
+
+ /*
+ * Lock the set on current set members for traditional disksets.
+ */
+ if (!(MD_MNSET_DESC(sd))) {
+ for (i = 0; i < node_c; i++) {
+ /*
+ * For traditional diskset, issue the RPC and
+ * ignore RPC failure if in OHA mode.
+ */
+ if (clnt_lock_set(node_v[i], sp, ep)) {
+ if (oha == TRUE && mdanyrpcerror(ep)) {
+ mdclrerror(ep);
+ continue;
+ }
+ rval = -1;
+ goto out;
+ }
+ }
+ }
+
+
+ RB_TEST(1, "deletehosts", ep)
+
+ RB_PREEMPT;
+ rb_level = 1; /* level 1 */
+
+ RB_TEST(2, "deletehosts", ep)
+
+ /*
+ * Mark the set record MD_SR_DEL
+ */
+ for (i = 0; i < node_c; i++) {
+
+ RB_TEST(3, "deletehosts", ep)
+
+ if (MD_MNSET_DESC(sd) && (oha == TRUE)) {
+ /*
+ * During OHA mode, don't issue RPCs to
+ * non-alive nodes since there is no reason to
+ * wait for RPC timeouts.
+ */
+ nd = sd->sd_nodelist;
+ while (nd) {
+ if (strcmp(nd->nd_nodename, node_v[i]) == 0)
+ break;
+ nd = nd->nd_next;
+ }
+ if (nd == NULL) {
+ (void) mddserror(ep, MDE_DS_NOTINMEMBERLIST,
+ sp->setno, nd->nd_nodename,
+ NULL, sp->setname);
+ goto rollback;
+ }
+
+ if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+ continue;
+ }
+
+ if (clnt_upd_sr_flags(node_v[i], sp, MD_SR_DEL, ep)) {
+ goto rollback;
+ }
+ } else if (MD_MNSET_DESC(sd) && (oha == FALSE)) {
+ /*
+ * All nodes should be alive in non-oha mode.
+ */
+ if (clnt_upd_sr_flags(node_v[i], sp, MD_SR_DEL, ep)) {
+ goto rollback;
+ }
+ } else {
+ /*
+ * For traditional diskset, issue the RPC and
+ * ignore RPC failure if in OHA mode.
+ */
+ if (clnt_upd_sr_flags(node_v[i], sp, MD_SR_DEL, ep)) {
+ if (oha == TRUE && mdanyrpcerror(ep)) {
+ mdclrerror(ep);
+ continue;
+ }
+ goto rollback;
+ }
+ }
+
+ RB_TEST(4, "deletehosts", ep)
+ }
+
+ RB_TEST(5, "deletehosts", ep)
+
+ RB_PREEMPT;
+ rb_level = 2; /* level 2 */
+
+ RB_TEST(6, "deletehosts", ep)
+
+ if (sdssc_delete_begin(sp->setname) == SDSSC_ERROR)
+ if (metad_isautotakebyname(sp->setname))
+ delete_end = 0;
+ else
+ goto rollback;
+
+ /* The set is OK to delete, make it so. */
+ for (i = 0; i < node_c; i++) {
+
+ RB_TEST(7, "deletehosts", ep)
+
+ if (MD_MNSET_DESC(sd) && (oha == TRUE)) {
+ /*
+ * During OHA mode, don't issue RPCs to
+ * non-alive nodes since there is no reason to
+ * wait for RPC timeouts.
+ */
+ nd = sd->sd_nodelist;
+ while (nd) {
+ if (strcmp(nd->nd_nodename, node_v[i]) == 0)
+ break;
+ nd = nd->nd_next;
+ }
+ if (nd == NULL) {
+ (void) mddserror(ep, MDE_DS_NOTINMEMBERLIST,
+ sp->setno, nd->nd_nodename,
+ NULL, sp->setname);
+ goto rollback;
+ }
+
+ if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+ continue;
+ }
+
+ if (clnt_delset(node_v[i], sp, ep) == -1) {
+ goto rollback;
+ }
+ } else if (MD_MNSET_DESC(sd) && (oha == FALSE)) {
+ /*
+ * All nodes should be alive in non-oha mode.
+ */
+ if (clnt_delset(node_v[i], sp, ep) == -1) {
+ goto rollback;
+ }
+ } else {
+ /*
+ * For traditional diskset, issue the RPC and
+ * ignore RPC failure if in OHA mode.
+ */
+ if (clnt_delset(node_v[i], sp, ep) == -1) {
+ if (oha == TRUE && mdanyrpcerror(ep)) {
+ mdclrerror(ep);
+ continue;
+ }
+ goto rollback;
+ }
+ }
+
+ RB_TEST(8, "deletehosts", ep)
+ }
+
+ RB_TEST(9, "deletehosts", ep)
+
+out:
+ /*
+ * Unlock the set on current set members
+ * for traditional disksets.
+ */
+ if (!(MD_MNSET_DESC(sd))) {
+ cl_sk = cl_get_setkey(sp->setno, sp->setname);
+ for (i = 0; i < node_c; i++) {
+ /*
+ * For traditional diskset, issue the RPC and
+ * ignore RPC failure if in OHA mode.
+ */
+ if (clnt_unlock_set(node_v[i], cl_sk, &xep)) {
+ if (oha == TRUE && mdanyrpcerror(&xep)) {
+ mdclrerror(&xep);
+ continue;
+ }
+ if (rval == 0)
+ (void) mdstealerror(ep, &xep);
+ rval = -1;
+ }
+ }
+ cl_set_setkey(NULL);
+ }
+
+ /*
+ * A MN diskset has the clnt_locks held by meta_set_deletehosts so
+ * don't flush that data until meta_set_deletehosts has finished
+ * with it. meta_set_deletehosts will handle the flush of the
+ * setname.
+ */
+ if (!(MD_MNSET_DESC(sd))) {
+ metaflushsetname(sp);
+ }
+
+ if (delete_end &&
+ sdssc_delete_end(sp->setname, SDSSC_COMMIT) == SDSSC_ERROR)
+ rval = -1;
+
+ if (MD_MNSET_DESC(sd)) {
+ /* release signals back to what they were on entry */
+ if (procsigs(FALSE, &oldsigs, &xep) < 0)
+ mdclrerror(&xep);
+ } else {
+ md_rb_sig_handling_off(md_got_sig(), md_which_sig());
+ }
+
+ return (rval);
+
+rollback:
+ /* all signals already blocked for MN disket */
+ if (!(MD_MNSET_DESC(sd))) {
+ /* Make sure we are blocking all signals */
+ if (procsigs(TRUE, &oldsigs, &xep) < 0)
+ mdclrerror(&xep);
+ }
+
+ rval = -1;
+
+ max_genid = sd->sd_genid;
+
+ /* level 2 */
+ if (rb_level > 1) {
+ recreate_set(sp, sd);
+ max_genid++;
+
+ if (delete_end)
+ (void) sdssc_delete_end(sp->setname, SDSSC_CLEANUP);
+ }
+
+ /* level 1 */
+ if (rb_level > 0) {
+ max_genid++;
+ resync_genid(sp, sd, max_genid, node_c, node_v);
+ }
+
+ /* level 0 */
+ /*
+ * Unlock the set on current set members
+ * for traditional disksets.
+ */
+ if (!(MD_MNSET_DESC(sd))) {
+ cl_sk = cl_get_setkey(sp->setno, sp->setname);
+ for (i = 0; i < node_c; i++) {
+ /*
+ * For traditional diskset, issue the RPC and
+ * ignore RPC failure if in OHA mode.
+ */
+ if (clnt_unlock_set(node_v[i], cl_sk, &xep))
+ mdclrerror(&xep);
+ }
+ cl_set_setkey(NULL);
+ }
+
+ /* release signals back to what they were on entry */
+ if (procsigs(FALSE, &oldsigs, &xep) < 0)
+ mdclrerror(&xep);
+
+ /*
+ * A MN diskset has the clnt_locks held by meta_set_deletehosts so
+ * don't flush that data until meta_set_deletehosts has finished
+ * with it. meta_set_deletehosts will handle the flush of the
+ * setname.
+ */
+ if (!(MD_MNSET_DESC(sd))) {
+ metaflushsetname(sp);
+ md_rb_sig_handling_off(md_got_sig(), md_which_sig());
+ }
+
+ return (rval);
+}
+
+/*
+ * On entry:
+ * procsigs already called for MN diskset.
+ * md_rb_sig_handling already called for traditional diskset.
+ */
+static int
+del_set_on_hosts(
+ mdsetname_t *sp,
+ md_set_desc *sd,
+ md_drive_desc *dd,
+ int node_c, /* Number of nodes */
+ char **node_v, /* Nodes being deleted */
+ int oha,
+ md_error_t *ep
+)
+{
+ int i;
+ int j;
+ side_t sideno;
+ md_replicalist_t *rlp = NULL;
+ sigset_t oldsigs;
+ md_setkey_t *cl_sk;
+ ulong_t max_genid = 0;
+ int rb_level = 1; /* This is a special case */
+ md_error_t xep = mdnullerror;
+ md_mnnode_desc *nd;
+
+ RB_PREEMPT;
+
+ RB_TEST(7, "deletehosts", ep)
+
+ if (dd != NULL) {
+ /*
+ * May need this to re-add sidenames on roll back.
+ */
+ if (metareplicalist(sp, (MD_BASICNAME_OK | PRINT_FAST), &rlp,
+ ep) < 0)
+ goto rollback;
+
+ RB_TEST(8, "deletehosts", ep)
+
+ RB_PREEMPT;
+ rb_level = 2; /* level 2 */
+
+ RB_TEST(9, "deletehosts", ep)
+
+ if (del_drvs_from_hosts(sp, sd, dd, node_c, node_v, oha, ep))
+ goto rollback;
+
+ RB_TEST(10, "deletehosts", ep)
+
+ RB_PREEMPT;
+ rb_level = 3; /* level 3 */
+
+ RB_TEST(11, "deletehosts", ep)
+
+ /*
+ * Delete the db replica sides
+ * This is done before the next loop, so that
+ * the db does not get unloaded before we are finished
+ * deleting the sides.
+ */
+ if (MD_MNSET_DESC(sd)) {
+ nd = sd->sd_nodelist;
+ while (nd) {
+ /* Skip hosts not being deleted */
+ if (! strinlst(nd->nd_nodename, node_c,
+ node_v)) {
+ nd = nd->nd_next;
+ continue;
+ }
+
+ if (del_db_sidenms(sp, nd->nd_nodeid, ep))
+ goto rollback;
+
+ RB_TEST(12, "deletehosts", ep)
+ nd = nd->nd_next;
+ }
+ } else {
+ for (sideno = 0; sideno < MD_MAXSIDES; sideno++) {
+ /* Skip empty slots */
+ if (sd->sd_nodes[sideno][0] == '\0')
+ continue;
+
+ /* Skip hosts not being deleted */
+ if (! strinlst(sd->sd_nodes[sideno], node_c,
+ node_v))
+ continue;
+
+ if (del_db_sidenms(sp, sideno, ep))
+ goto rollback;
+
+ RB_TEST(12, "deletehosts", ep)
+ }
+ }
+
+ RB_TEST(13, "deletehosts", ep)
+
+ RB_PREEMPT;
+ rb_level = 4; /* level 4 */
+
+ RB_TEST(14, "deletehosts", ep)
+
+ /* Delete the names from the namespace */
+ if (MD_MNSET_DESC(sd)) {
+ nd = sd->sd_nodelist;
+ while (nd) {
+ /* Skip hosts not being deleted */
+ if (! strinlst(nd->nd_nodename, node_c,
+ node_v)) {
+ nd = nd->nd_next;
+ continue;
+ }
+
+ if (del_md_sidenms(sp, nd->nd_nodeid, ep))
+ goto rollback;
+
+ RB_TEST(15, "deletehosts", ep)
+ nd = nd->nd_next;
+ }
+ } else {
+ for (sideno = 0; sideno < MD_MAXSIDES; sideno++) {
+ /* Skip empty slots */
+ if (sd->sd_nodes[sideno][0] == '\0')
+ continue;
+
+ /* Skip hosts not being deleted */
+ if (! strinlst(sd->sd_nodes[sideno], node_c,
+ node_v))
+ continue;
+
+ if (del_md_sidenms(sp, sideno, ep))
+ goto rollback;
+
+ RB_TEST(15, "deletehosts", ep)
+ }
+ }
+ }
+
+ RB_TEST(16, "deletehosts", ep)
+
+ RB_PREEMPT;
+ rb_level = 5; /* level 6 */
+
+ RB_TEST(17, "deletehosts", ep)
+
+ for (i = 0; i < node_c; i++) {
+ if (MD_MNSET_DESC(sd) && (oha == TRUE)) {
+ /*
+ * During OHA mode, don't issue RPCs to
+ * non-alive nodes since there is no reason to
+ * wait for RPC timeouts.
+ */
+ nd = sd->sd_nodelist;
+ while (nd) {
+ if (strcmp(nd->nd_nodename, node_v[i]) == 0)
+ break;
+ nd = nd->nd_next;
+ }
+ if (nd == NULL) {
+ (void) mddserror(ep, MDE_DS_NOTINMEMBERLIST,
+ sp->setno, nd->nd_nodename,
+ NULL, sp->setname);
+ goto rollback;
+ }
+
+ if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+ continue;
+ }
+
+ if (clnt_delset(node_v[i], sp, ep) == -1) {
+ goto rollback;
+ }
+ } else if (MD_MNSET_DESC(sd) && (oha == FALSE)) {
+ /*
+ * All nodes should be alive in non-oha mode.
+ */
+ if (clnt_delset(node_v[i], sp, ep) == -1) {
+ goto rollback;
+ }
+ } else {
+ /*
+ * For traditional diskset, issue the RPC and
+ * ignore RPC failure if in OHA mode.
+ */
+ if (clnt_delset(node_v[i], sp, ep) == -1) {
+ if (oha == TRUE && mdanyrpcerror(ep)) {
+ mdclrerror(ep);
+ continue;
+ }
+ goto rollback;
+ }
+ }
+
+ RB_TEST(18, "deletehosts", ep)
+ }
+
+ metafreereplicalist(rlp);
+
+ if (MD_MNSET_DESC(sd)) {
+ /* release signals back to what they were on entry */
+ if (procsigs(FALSE, &oldsigs, &xep) < 0)
+ mdclrerror(&xep);
+ } else {
+ md_rb_sig_handling_off(md_got_sig(), md_which_sig());
+ }
+
+ return (0);
+
+rollback:
+ /* all signals already blocked for MN disket */
+ if (!(MD_MNSET_DESC(sd))) {
+ /* Make sure we are blocking all signals */
+ if (procsigs(TRUE, &oldsigs, &xep) < 0)
+ mdclrerror(&xep);
+ }
+
+ max_genid = sd->sd_genid;
+
+ /* level 5 */
+ if (rb_level > 4) {
+ recreate_set(sp, sd);
+ max_genid++;
+ }
+
+ /* level 2 */
+ if (rb_level > 1 && dd != NULL) {
+ /*
+ * See if we have to re-add the drives specified.
+ */
+ for (i = 0; i < node_c; i++) {
+ md_set_record *sr;
+
+ if (MD_MNSET_DESC(sd) && (oha == TRUE)) {
+ /*
+ * During OHA mode, don't issue RPCs to
+ * non-alive nodes since there is no reason to
+ * wait for RPC timeouts.
+ */
+ nd = sd->sd_nodelist;
+ while (nd) {
+ if (strcmp(nd->nd_nodename, node_v[i])
+ == 0)
+ break;
+ nd = nd->nd_next;
+ }
+ if (nd == NULL)
+ continue;
+
+ if (!(nd->nd_flags & MD_MN_NODE_ALIVE))
+ continue;
+ }
+
+ /* Don't care if set record is MN or not */
+ if (clnt_getset(node_v[i], sp->setname,
+ MD_SET_BAD, &sr, &xep) == -1) {
+ mdclrerror(&xep);
+ continue;
+ }
+
+ /* Drive already added, skip to next node */
+ if (sr->sr_drivechain != NULL) {
+ /*
+ * Set record structure was allocated from RPC
+ * routine getset so this structure is only of
+ * size md_set_record even if the MN flag is
+ * set. So, clear the flag so that the free
+ * code doesn't attempt to free a structure
+ * the size of md_mnset_record.
+ */
+ sr->sr_flags &= ~MD_SR_MN;
+ free_sr(sr);
+ continue;
+ }
+
+ if (clnt_adddrvs(node_v[i], sp, dd,
+ sr->sr_ctime, sr->sr_genid, &xep) == -1)
+ mdclrerror(&xep);
+
+ if (clnt_upd_dr_flags(node_v[i], sp, dd,
+ MD_DR_OK, &xep) == -1)
+ mdclrerror(&xep);
+
+ /*
+ * Set record structure was allocated from RPC routine
+ * getset so this structure is only of size
+ * md_set_record even if the MN flag is set. So,
+ * clear the flag so that the free code doesn't
+ * attempt to free a structure the size of
+ * md_mnset_record.
+ */
+ sr->sr_flags &= ~MD_SR_MN;
+ free_sr(sr);
+ }
+ max_genid += 3;
+ }
+
+ /* level 3 */
+ if (rb_level > 2 && dd != NULL) {
+ md_replicalist_t *rl;
+
+ for (rl = rlp; rl != NULL; rl = rl->rl_next) {
+ md_replica_t *r = rl->rl_repp;
+
+ /*
+ * This is not the first replica being added to the
+ * diskset so call with ADDSIDENMS_BCAST. If this
+ * is a traditional diskset, the bcast flag is ignored
+ * since traditional disksets don't use the rpc.mdcommd.
+ */
+ if (meta_db_addsidenms(sp, r->r_namep, r->r_blkno,
+ DB_ADDSIDENMS_BCAST, &xep))
+ mdclrerror(&xep);
+ }
+ }
+
+ /* level 4 */
+ if (rb_level > 3 && dd != NULL) {
+ int nodeid_addsides = 0;
+ /*
+ * Add the device names for the new sides into the namespace,
+ * on all hosts not being deleted.
+ */
+ if (MD_MNSET_DESC(sd)) {
+ nd = sd->sd_nodelist;
+ while (nd) {
+ /* Find a node that is not being deleted */
+ if (! strinlst(nd->nd_nodename, node_c,
+ node_v)) {
+ nodeid_addsides = nd->nd_nodeid;
+ break;
+ }
+ nd = nd->nd_next;
+ }
+ } else {
+ for (j = 0; j < MD_MAXSIDES; j++) {
+ /* Skip empty slots */
+ if (sd->sd_nodes[j][0] == '\0')
+ continue;
+
+ /* Find a node that is not being deleted */
+ if (! strinlst(sd->sd_nodes[j], node_c,
+ node_v))
+ break;
+ }
+ nodeid_addsides = j;
+ }
+
+ if (MD_MNSET_DESC(sd)) {
+ nd = sd->sd_nodelist;
+ while (nd) {
+ /* Skip nodes not being deleted */
+ if (!strinlst(nd->nd_nodename, node_c,
+ node_v)) {
+ nd = nd->nd_next;
+ continue;
+ }
+
+ /* this side was just created, add the names */
+ if (add_md_sidenms(sp, nd->nd_nodeid,
+ nodeid_addsides, &xep))
+ mdclrerror(&xep);
+ nd = nd->nd_next;
+ }
+ } else {
+ for (i = 0; i < MD_MAXSIDES; i++) {
+ /* Skip empty slots */
+ if (sd->sd_nodes[i][0] == '\0')
+ continue;
+
+ /* Skip nodes not being deleted */
+ if (!strinlst(sd->sd_nodes[i], node_c, node_v))
+ continue;
+
+ /* this side was just created, add the names */
+ if (add_md_sidenms(sp, i, nodeid_addsides,
+ &xep))
+ mdclrerror(&xep);
+ }
+ }
+ }
+
+ /* level 1 */
+ if (rb_level > 0) {
+ max_genid++;
+ resync_genid(sp, sd, max_genid, node_c, node_v);
+ }
+
+ /* level 0 */
+ cl_sk = cl_get_setkey(sp->setno, sp->setname);
+ if (MD_MNSET_DESC(sd)) {
+ nd = sd->sd_nodelist;
+ while (nd) {
+ if (!(nd->nd_flags & MD_MN_NODE_ALIVE))
+ continue;
+ /* To balance lock/unlock; can send to dead node */
+ if (clnt_unlock_set(nd->nd_nodename, cl_sk, &xep))
+ mdclrerror(&xep);
+ nd = nd->nd_next;
+ }
+ } else {
+ for (i = 0; i < MD_MAXSIDES; i++) {
+ /* Skip empty slots */
+ if (sd->sd_nodes[i][0] == '\0')
+ continue;
+
+ if (clnt_unlock_set(sd->sd_nodes[i], cl_sk, &xep))
+ mdclrerror(&xep);
+ }
+ }
+ cl_set_setkey(NULL);
+
+ /* release signals back to what they were on entry */
+ if (procsigs(FALSE, &oldsigs, &xep) < 0)
+ mdclrerror(&xep);
+
+ metafreereplicalist(rlp);
+
+ if (!(MD_MNSET_DESC(sd))) {
+ md_rb_sig_handling_off(md_got_sig(), md_which_sig());
+ }
+
+ return (-1);
+}
+
+static int
+make_sideno_sidenm(
+ mdsetname_t *sp,
+ mddrivename_t *dnp,
+ side_t sideno,
+ md_error_t *ep
+)
+{
+ mdsidenames_t *sn, **sn_next;
+ md_set_desc *sd;
+ mdname_t *np;
+ uint_t rep_slice;
+ int err = 0;
+
+ assert(dnp->side_names_key != MD_KEYWILD);
+
+ if ((sd = metaget_setdesc(sp, ep)) == NULL)
+ return (-1);
+
+ /* find the end of the link list */
+ for (sn = dnp->side_names; sn->next != NULL; sn = sn->next);
+ sn_next = &sn->next;
+
+ if (meta_replicaslice(dnp, &rep_slice, ep) != 0)
+ return (-1);
+
+ if ((np = metaslicename(dnp, rep_slice, ep)) == NULL)
+ return (-1);
+
+ sn = Zalloc(sizeof (*sn));
+ sn->sideno = sideno;
+
+ if (MD_MNSET_DESC(sd)) {
+ /*
+ * For MO diskset the sideno is not an index into
+ * the array of nodes. Hence getside_devinfo is
+ * used instead of meta_getnextside_devinfo.
+ */
+ if (meta_getside_devinfo(sp, np->bname, sideno, &sn->cname,
+ &sn->dname, &sn->mnum, ep) == -1)
+ err = -1;
+ } else {
+ /* decrement sideno, to look like the previous sideno */
+ sideno--;
+ if (meta_getnextside_devinfo(sp, np->bname, &sideno, &sn->cname,
+ &sn->dname, &sn->mnum, ep) == -1)
+ err = -1;
+ }
+
+ if (err) {
+ Free(sn);
+ return (err);
+ }
+ assert(sn->sideno == sideno);
+
+ /* Add to the end of the linked list */
+ *sn_next = sn;
+ return (0);
+}
+
+static int
+validate_nodes(
+ mdsetname_t *sp,
+ int node_c,
+ char **node_v,
+ md_error_t *ep
+)
+{
+ char *hostname;
+ int i;
+
+
+ for (i = 0; i < node_c; i++) {
+ if (strlen(node_v[i]) > (size_t)MD_MAX_NODENAME)
+ return (mddserror(ep, MDE_DS_NODENAMETOOLONG,
+ sp->setno, node_v[i], NULL, sp->setname));
+ if (clnt_hostname(node_v[i], &hostname, ep))
+ return (-1);
+ if (strcmp(node_v[i], hostname) != 0) {
+ Free(hostname);
+ return (mddserror(ep, MDE_DS_NOTNODENAME, sp->setno,
+ node_v[i], NULL, sp->setname));
+ }
+ Free(hostname);
+ }
+ return (0);
+}
+
+/*
+ * Exported Entry Points
+ */
+
+/*
+ * Check the given disk set name for syntactic correctness.
+ */
+int
+meta_set_checkname(char *setname, md_error_t *ep)
+{
+ char *cp;
+
+ if (strlen(setname) > (size_t)MD_MAX_SETNAME)
+ return (mddserror(ep, MDE_DS_SETNAMETOOLONG,
+ MD_SET_BAD, NULL, NULL, setname));
+
+ for (cp = setname; *cp; cp++)
+ if (!isprint(*cp) || strchr(INVALID_IN_NAMES, *cp) != NULL)
+ return (mddserror(ep, MDE_DS_INVALIDSETNAME,
+ MD_SET_BAD, NULL, NULL, setname));
+ return (0);
+}
+
+/*
+ * Add host(s) to the multi-node diskset provided in sp.
+ * - create set if non-existent.
+ */
+static int
+meta_multinode_set_addhosts(
+ mdsetname_t *sp,
+ int multi_node,
+ int node_c,
+ char **node_v,
+ int auto_take,
+ md_error_t *ep
+)
+{
+ md_set_desc *sd;
+ md_drive_desc *dd, *p;
+ int rval = 0;
+ int bool;
+ int nodeindex;
+ int i;
+ int has_set;
+ sigset_t oldsigs;
+ md_setkey_t *cl_sk;
+ int rb_level = 0;
+ md_error_t xep = mdnullerror;
+ md_mnnode_desc *nd, *nd_curr, *nd_prev;
+ md_timeval32_t now;
+ int nodecnt;
+ mndiskset_membershiplist_t *nl, *nl2;
+ int suspendall_flag = 0;
+ int suspend1_flag = 0;
+ int lock_flag = 0;
+ int stale_flag = 0;
+ md_mnnode_desc *saved_nd_next;
+ int remote_sets_created = 0;
+
+ /*
+ * Check membershiplist first. If there's
+ * an error, fail to create set and pass back error.
+ */
+ if (meta_read_nodelist(&nodecnt, &nl, ep) == -1) {
+ return (-1);
+ }
+ /* Verify that all nodes are in member list */
+ for (i = 0; i < node_c; i++) {
+ /*
+ * If node in list isn't a member of the membership,
+ * just return error.
+ */
+ if (meta_is_member(node_v[i], NULL, nl) == 0) {
+ meta_free_nodelist(nl);
+ return (mddserror(ep, MDE_DS_NOTINMEMBERLIST,
+ sp->setno, node_v[i], NULL, sp->setname));
+ }
+ }
+ /*
+ * Node list is needed later, but there is a lot of error
+ * checking and possible failures between here and there, so
+ * just re-get the list later if there are no errors.
+ */
+ meta_free_nodelist(nl);
+ nl = NULL;
+
+ /*
+ * Verify that list of nodes being added contains no
+ * duplicates.
+ */
+ if (nodesuniq(sp, node_c, node_v, ep))
+ return (-1);
+
+ /*
+ * Verify that each node being added thinks that its nodename
+ * is the same as the nodename given.
+ */
+ if (validate_nodes(sp, node_c, node_v, ep))
+ return (-1);
+
+ if ((sd = metaget_setdesc(sp, ep)) == NULL) {
+ if (! mdiserror(ep, MDE_NO_SET))
+ return (-1);
+ mdclrerror(ep);
+ return (create_set(sp, multi_node, node_c, node_v, auto_take,
+ ep));
+ } else {
+ /*
+ * If this node and another node were both attempting to
+ * create the same setname at the same time, and the other
+ * node has just created the set on this node then sd would
+ * be non-NULL, but sp->setno would be null (setno is filled
+ * in by the create_set). If this is true, then fail since
+ * the other node has already won this race.
+ */
+ if (sp->setno == NULL) {
+ return (mddserror(ep, MDE_DS_NODEINSET,
+ NULL, mynode(), NULL, sp->setname));
+ }
+ }
+
+ /* The auto_take behavior is inconsistent with multiple hosts. */
+ if (auto_take || sd->sd_flags & MD_SR_AUTO_TAKE) {
+ (void) mddserror(ep, MDE_DS_SINGLEHOST, sp->setno, NULL, NULL,
+ sp->setname);
+ return (-1);
+ }
+
+ /*
+ * We already have the set.
+ */
+
+ /* Make sure we own the set */
+ if (meta_check_ownership(sp, ep) != 0)
+ return (-1);
+
+ /*
+ * The drive and node records are stored in the local mddbs of each
+ * node in the diskset. Each node's rpc.metad daemon reads in the set,
+ * drive and node records from that node's local mddb and caches them
+ * internally. Any process needing diskset information contacts its
+ * local rpc.metad to get this information. Since each node in the
+ * diskset is independently reading the set information from its local
+ * mddb, the set, drive and node records in the local mddbs must stay
+ * in-sync, so that all nodes have a consistent view of the diskset.
+ *
+ * For a multinode diskset, explicitly verify that all nodes in the
+ * diskset are ALIVE (i.e. are in the API membership list). Otherwise,
+ * fail this operation since all nodes must be ALIVE in order to add
+ * the new node record to their local mddb. If a panic of this node
+ * leaves the local mddbs set, node and drive records out-of-sync, the
+ * reconfig cycle will fix the local mddbs and force them back into
+ * synchronization.
+ */
+ nd = sd->sd_nodelist;
+ while (nd) {
+ if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+ return (mddserror(ep, MDE_DS_NOTINMEMBERLIST,
+ sp->setno, nd->nd_nodename, NULL,
+ sp->setname));
+ }
+ nd = nd->nd_next;
+ }
+
+ /*
+ * Check if node is already in set.
+ */
+ for (i = 0; i < node_c; i++) {
+ /* Is node already in set? */
+ nd = sd->sd_nodelist;
+ while (nd) {
+ if (strcmp(nd->nd_nodename, node_v[i]) == 0)
+ break;
+ nd = nd->nd_next;
+ }
+ if (nd) {
+ return (mddserror(ep, MDE_DS_NODEINSET,
+ sp->setno, node_v[i], NULL,
+ sp->setname));
+ }
+ }
+
+ /*
+ * Lock the set on current set members.
+ * Set locking done much earlier for MN diskset than for traditional
+ * diskset since lock_set and SUSPEND are used to protect against
+ * other meta* commands running on the other nodes.
+ */
+ /* Make sure we are blocking all signals */
+ if (procsigs(TRUE, &oldsigs, &xep) < 0)
+ mdclrerror(&xep);
+
+ nd = sd->sd_nodelist;
+ /* All nodes are guaranteed to be ALIVE */
+ while (nd) {
+ if (clnt_lock_set(nd->nd_nodename, sp, ep)) {
+ rval = -1;
+ goto out;
+ }
+ lock_flag = 1;
+ nd = nd->nd_next;
+ }
+ /*
+ * Lock out other meta* commands by suspending
+ * class 1 messages across the diskset.
+ */
+ nd = sd->sd_nodelist;
+ /* Send suspend to nodes in nodelist before addhosts call */
+ /* All nodes are guaranteed to be ALIVE */
+ while (nd) {
+ if (clnt_mdcommdctl(nd->nd_nodename,
+ COMMDCTL_SUSPEND, sp, MD_MSG_CLASS1,
+ MD_MSCF_NO_FLAGS, ep)) {
+ rval = -1;
+ goto out;
+ }
+ suspend1_flag = 1;
+ nd = nd->nd_next;
+ }
+
+ /* Lock the set on new set members */
+ for (i = 0; i < node_c; i++) {
+ /* Already verified to be alive */
+ if (clnt_lock_set(node_v[i], sp, ep)) {
+ rval = -1;
+ goto out;
+ }
+ lock_flag = 1;
+ }
+
+ /*
+ * Perform the required checks for new hosts
+ */
+ for (i = 0; i < node_c; i++) {
+ /* Make sure this set name is not used on the other hosts */
+ has_set = nodehasset(sp, node_v[i], NHS_N_EQ, ep);
+ if (has_set < 0) {
+ if (! mdiserror(ep, MDE_NO_SET)) {
+ rval = -1;
+ goto out;
+ }
+ /* Keep on truck'n */
+ mdclrerror(ep);
+ } else if (has_set) {
+ (void) mddserror(ep, MDE_DS_NODEHASSET, sp->setno,
+ node_v[i], NULL, sp->setname);
+ rval = -1;
+ goto out;
+ }
+
+ if (clnt_setnumbusy(node_v[i], sp->setno, &bool, ep) == -1) {
+ rval = -1;
+ goto out;
+ }
+
+ if (bool == TRUE) {
+ (void) mddserror(ep, MDE_DS_SETNUMBUSY, sp->setno,
+ node_v[i], NULL, sp->setname);
+ rval = -1;
+ goto out;
+ }
+
+ if (clnt_setnameok(node_v[i], sp, &bool, ep) == -1) {
+ rval = -1;
+ goto out;
+ }
+
+ if (bool == FALSE) {
+ (void) mddserror(ep, MDE_DS_SETNAMEBUSY, sp->setno,
+ node_v[i], NULL, sp->setname);
+ rval = -1;
+ goto out;
+ }
+
+ if (check_setdrvs_againstnode(sp, node_v[i], ep)) {
+ rval = -1;
+ goto out;
+ }
+ }
+
+ /* Get drive descriptors for the set */
+ if ((dd = metaget_drivedesc(sp, MD_FULLNAME_ONLY, ep)) == NULL) {
+ if (! mdisok(ep)) {
+ rval = -1;
+ goto out;
+ }
+ }
+
+ /* END CHECK CODE */
+
+ RB_TEST(1, "addhosts", ep)
+
+ RB_PREEMPT;
+ rb_level = 1; /* level 1 */
+
+ RB_TEST(2, "addhosts", ep)
+
+ /*
+ * Create the set where needed
+ */
+ if (create_set_on_hosts(sp, multi_node, node_c, node_v, 0, ep)) {
+ goto rollback;
+ }
+
+ /*
+ * Send suspend to rpc.mdcommd on nodes where a set has been
+ * created since rpc.mdcommd must now be running on the remote nodes.
+ */
+ remote_sets_created = 1;
+ for (i = 0; i < node_c; i++) {
+ /*
+ * Lock out other meta* commands by suspending
+ * class 1 messages across the diskset.
+ */
+ if (clnt_mdcommdctl(node_v[i],
+ COMMDCTL_SUSPEND, sp, MD_MSG_CLASS1,
+ MD_MSCF_NO_FLAGS, ep)) {
+ rval = -1;
+ goto rollback;
+ }
+ }
+
+ /*
+ * Merge the new entries into the set with the existing sides.
+ * Get membershiplist from API routine. If there's
+ * an error, fail to create set and pass back error.
+ */
+ if (meta_read_nodelist(&nodecnt, &nl, ep) == -1) {
+ goto rollback;
+ }
+ if (meta_gettimeofday(&now) == -1) {
+ meta_free_nodelist(nl);
+ (void) mdsyserror(ep, errno,
+ dgettext(TEXT_DOMAIN, "meta_gettimeofday()"));
+ goto rollback;
+ }
+ for (nodeindex = 0; nodeindex < node_c; nodeindex++) {
+ nd = Zalloc(sizeof (*nd));
+ (void) strcpy(nd->nd_nodename, node_v[nodeindex]);
+ nd->nd_ctime = now;
+ nl2 = nl;
+ while (nl2) {
+ if (strcmp(nl2->msl_node_name,
+ node_v[nodeindex]) == 0) {
+ nd->nd_nodeid = nl2->msl_node_id;
+ (void) strcpy(nd->nd_priv_ic,
+ nl2->msl_node_addr);
+ break;
+ }
+ nl2 = nl2->next;
+ }
+
+ /*
+ * Nodelist must be kept in ascending nodeid order.
+ */
+ if (sd->sd_nodelist == NULL) {
+ /* Nothing in list, just add it */
+ sd->sd_nodelist = nd;
+ } else if (nd->nd_nodeid <
+ sd->sd_nodelist->nd_nodeid) {
+ /* Add to head of list */
+ nd->nd_next = sd->sd_nodelist;
+ sd->sd_nodelist = nd;
+ } else {
+ nd_curr = sd->sd_nodelist->nd_next;
+ nd_prev = sd->sd_nodelist;
+ /* Search for place to add it */
+ while (nd_curr) {
+ if (nd->nd_nodeid < nd_curr->nd_nodeid) {
+ /* Add before nd_curr */
+ nd->nd_next = nd_curr;
+ nd_prev->nd_next = nd;
+ break;
+ }
+ nd_prev = nd_curr;
+ nd_curr = nd_curr->nd_next;
+ }
+ /* Add to end of list */
+ if (nd_curr == NULL) {
+ nd_prev->nd_next = nd;
+ }
+
+ }
+ /* Node already verified to be in membership */
+ nd->nd_flags |= MD_MN_NODE_ALIVE;
+ }
+ meta_free_nodelist(nl);
+
+ /* If we have drives */
+ if (dd != NULL) {
+ /*
+ * For all the hosts being added, create a sidename structure
+ */
+ nd = sd->sd_nodelist;
+ while (nd) {
+ /* Skip nodes not being added */
+ if (!strinlst(nd->nd_nodename, node_c, node_v)) {
+ nd = nd->nd_next;
+ continue;
+ }
+ for (p = dd; p != NULL; p = p->dd_next) {
+ if (make_sideno_sidenm(sp, p->dd_dnp,
+ nd->nd_nodeid, ep) != 0)
+ goto rollback;
+ }
+ nd = nd->nd_next;
+ }
+
+ RB_PREEMPT;
+ rb_level = 2; /* level 2 */
+
+ RB_TEST(4, "addhosts", ep)
+
+ /*
+ * Add the new sidename for each drive to all the hosts
+ *
+ * If a multi-node diskset, each host only stores
+ * the side information for itself. So, only send
+ * side information to the new hosts where each host
+ * will add the appropriate side information to its
+ * local mddb.
+ */
+ nd = sd->sd_nodelist;
+ while (nd) {
+ /* Skip nodes not being added */
+ if (!strinlst(nd->nd_nodename, node_c,
+ node_v)) {
+ nd = nd->nd_next;
+ continue;
+ }
+
+ /* Add side info to new hosts */
+ if (clnt_add_drv_sidenms(nd->nd_nodename,
+ mynode(), sp, sd, node_c, node_v, ep))
+ goto rollback;
+
+ nd = nd->nd_next;
+ }
+
+ RB_TEST(5, "addhosts", ep)
+
+ RB_PREEMPT;
+ rb_level = 3; /* level 3 */
+
+ RB_TEST(6, "addhosts", ep)
+
+ /*
+ * Add the device names for the new sides into the namespace
+ * for all hosts being added. This is adding the side
+ * names to the diskset's mddb so add sidenames for all
+ * of the new hosts.
+ */
+ nd = sd->sd_nodelist;
+ while (nd) {
+ /* Skip nodes not being added */
+ if (!strinlst(nd->nd_nodename, node_c, node_v)) {
+ nd = nd->nd_next;
+ continue;
+ }
+
+ /* this side was just created, add the names */
+ if (add_md_sidenms(sp, nd->nd_nodeid,
+ MD_SIDEWILD, ep))
+ goto rollback;
+
+ nd = nd->nd_next;
+ }
+
+ RB_TEST(7, "addhosts", ep)
+
+ RB_PREEMPT;
+ rb_level = 4; /* level 4 */
+
+ RB_TEST(8, "addhosts", ep)
+
+ if (add_db_sidenms(sp, ep))
+ goto rollback;
+
+ } else {
+ RB_PREEMPT;
+ rb_level = 4;
+ }
+
+ RB_TEST(9, "addhosts", ep)
+
+ RB_PREEMPT;
+ rb_level = 5; /* level 5 */
+
+ RB_TEST(10, "addhosts", ep)
+
+ if (dd != NULL) {
+ /*
+ * Notify rpc.mdcommd on all nodes of a nodelist change.
+ * Start by suspending rpc.mdcommd (which drains it of all
+ * messages), then change the nodelist followed by a reinit
+ * and resume.
+ */
+ nd = sd->sd_nodelist;
+ /* Send suspend_all to nodes in nodelist (existing + new) */
+ /* All nodes are guaranteed to be ALIVE */
+ while (nd) {
+ if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_SUSPEND,
+ sp, MD_MSG_CLASS0, MD_MSCF_NO_FLAGS, ep)) {
+ rval = -1;
+ goto rollback;
+ }
+ suspendall_flag = 1;
+ nd = nd->nd_next;
+ }
+ }
+
+ /* Add the node(s) to the each host that is currently in the set */
+ nd = sd->sd_nodelist;
+ /* All nodes are guaranteed to be ALIVE */
+ while (nd) {
+ if (clnt_addhosts(nd->nd_nodename, sp, node_c, node_v, ep)) {
+ goto rollback;
+ }
+ nd = nd->nd_next;
+ }
+
+ RB_TEST(11, "addhosts", ep)
+
+ if (dd != NULL) {
+ /*
+ * Mark the drives MD_DR_OK.
+ */
+ nd = sd->sd_nodelist;
+ /* All nodes are guaranteed to be ALIVE */
+ while (nd) {
+ if (clnt_upd_dr_flags(nd->nd_nodename, sp, dd,
+ MD_DR_OK, ep) == -1)
+ goto rollback;
+ nd = nd->nd_next;
+ }
+ }
+
+ RB_TEST(12, "addhosts", ep)
+
+ RB_PREEMPT;
+ rb_level = 6; /* level 6 */
+
+ RB_TEST(13, "addhosts", ep)
+
+
+ /* Add the mediator information to all hosts in the set. */
+ nd = sd->sd_nodelist;
+ /* All nodes are guaranteed to be ALIVE */
+ while (nd) {
+ if (clnt_updmeds(nd->nd_nodename, sp, &sd->sd_med, ep))
+ goto rollback;
+ nd = nd->nd_next;
+ }
+
+ RB_TEST(14, "addhosts", ep)
+
+ /*
+ * If a MN diskset and there are drives in the set,
+ * set the master on the new nodes and
+ * automatically join the new nodes into the set.
+ */
+ if (dd != NULL) {
+ mddb_config_t c;
+ /*
+ * Is current set STALE?
+ */
+ (void) memset(&c, 0, sizeof (c));
+ c.c_id = 0;
+ c.c_setno = sp->setno;
+ if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0) {
+ (void) mdstealerror(ep, &c.c_mde);
+ rval = -1;
+ goto out;
+ }
+ if (c.c_flags & MDDB_C_STALE) {
+ stale_flag = MNSET_IS_STALE;
+ }
+
+ /* Set master on newly added nodes */
+ for (i = 0; i < node_c; i++) {
+ if (clnt_mnsetmaster(node_v[i], sp,
+ sd->sd_mn_master_nodenm,
+ sd->sd_mn_master_nodeid, ep)) {
+ goto rollback;
+ }
+ }
+ /* Join newly added nodes to diskset and set OWN flag */
+ for (i = 0; i < node_c; i++) {
+ if (clnt_joinset(node_v[i], sp, stale_flag, ep))
+ goto rollback;
+ nd = sd->sd_nodelist;
+ while (nd) {
+ if (strcmp(nd->nd_nodename, node_v[i]) == 0) {
+ nd->nd_flags |= MD_MN_NODE_OWN;
+ /*
+ * Also set ADD flag since this flag
+ * is already set in rpc.metad - it's
+ * just not in the local copy.
+ * Could flush local cache and call
+ * metaget_setdesc, but this just
+ * adds time. Since this node knows
+ * the state of the node flags in
+ * rpc.metad, just set the ADD
+ * flag and save time.
+ */
+ nd->nd_flags |= MD_MN_NODE_ADD;
+ break;
+ }
+ nd = nd->nd_next;
+ }
+ }
+
+ /* Send new node flag list to all Owner nodes */
+ nd = sd->sd_nodelist;
+ while (nd) {
+ if (!(nd->nd_flags & MD_MN_NODE_OWN)) {
+ nd = nd->nd_next;
+ continue;
+ }
+ /*
+ * Will effectively set OWN flag in records kept
+ * cached in rpc.metad. The ADD flag would have
+ * already been set by the call to clnt_addhosts.
+ */
+ if (clnt_upd_nr_flags(nd->nd_nodename, sp,
+ sd->sd_nodelist, MD_NR_SET, NULL, ep)) {
+ goto rollback;
+ }
+ nd = nd->nd_next;
+ }
+ }
+
+ /*
+ * Mark the set record MD_SR_OK
+ */
+ nd = sd->sd_nodelist;
+ /* All nodes are guaranteed to be ALIVE */
+ while (nd) {
+ if (clnt_upd_sr_flags(nd->nd_nodename, sp, MD_SR_OK,
+ ep)) {
+ goto rollback;
+ }
+ nd = nd->nd_next;
+ }
+
+ /*
+ * For MN diskset:
+ * On each newly added node, set the node record for that node
+ * to OK. Then set all node records for the newly added
+ * nodes on all nodes to ok.
+ *
+ * By setting a node's own node record to ok first, even if
+ * the node adding the hosts panics, the rest of the nodes can
+ * determine the same node list during the choosing of the master
+ * during reconfig. So, only nodes considered for mastership
+ * are nodes that have both MD_MN_NODE_OK and MD_SR_OK set
+ * on that node's rpc.metad. If all nodes have MD_SR_OK set,
+ * but no node has its own MD_MN_NODE_OK set, then the set will
+ * be removed during reconfig since a panic occurred during the
+ * creation of the initial diskset.
+ */
+
+ for (i = 0; i < node_c; i++) {
+ nd = sd->sd_nodelist;
+ /* All nodes are guaranteed to be ALIVE */
+ while (nd) {
+ if (strcmp(nd->nd_nodename, node_v[i]) == 0)
+ break;
+ nd = nd->nd_next;
+ }
+ /* Something wrong, will pick this up in next loop */
+ if (nd == NULL)
+ continue;
+
+ /* Only changing my local cache of node list */
+ saved_nd_next = nd->nd_next;
+ nd->nd_next = NULL;
+
+ /* Set node record for added host to ok on that host */
+ if (clnt_upd_nr_flags(node_v[i], sp,
+ nd, MD_NR_OK, NULL, ep)) {
+ nd->nd_next = saved_nd_next;
+ goto rollback;
+ }
+ nd->nd_next = saved_nd_next;
+ }
+
+ /* Now set all node records on all nodes to be ok */
+ nd = sd->sd_nodelist;
+ /* All nodes are guaranteed to be ALIVE */
+ while (nd) {
+ if (clnt_upd_nr_flags(nd->nd_nodename, sp,
+ sd->sd_nodelist, MD_NR_OK, NULL, ep)) {
+ goto rollback;
+ }
+ nd = nd->nd_next;
+ }
+
+ RB_TEST(15, "addhosts", ep)
+out:
+ /*
+ * Notify rpc.mdcommd on all nodes of a nodelist change.
+ * Send reinit command to mdcommd which forces it to get
+ * fresh set description. Then send resume.
+ * Resume on class 0 will resume all classes, so can skip
+ * doing an explicit resume of class1 (ignore suspend1_flag).
+ */
+ if (suspendall_flag) {
+ /*
+ * Don't know if nodelist contains the nodes being added
+ * or not, so do reinit to nodes not being added (by skipping
+ * any nodes in the nodelist being added) and then do
+ * reinit to nodes being added if remote_sets_created is 1.
+ */
+ nd = sd->sd_nodelist;
+ /* All nodes are guaranteed to be ALIVE */
+ while (nd) {
+ /* Skip nodes being added - handled later */
+ if (strinlst(nd->nd_nodename, node_c, node_v)) {
+ nd = nd->nd_next;
+ continue;
+ }
+ /* Class is ignored for REINIT */
+ if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_REINIT,
+ sp, NULL, MD_MSCF_NO_FLAGS, &xep)) {
+ if (rval == 0)
+ (void) mdstealerror(ep, &xep);
+ rval = -1;
+ mde_perror(ep, dgettext(TEXT_DOMAIN,
+ "Unable to reinit rpc.mdcommd.\n"));
+ }
+ nd = nd->nd_next;
+ }
+ /*
+ * Send reinit to added nodes that had a set created since
+ * rpc.mdcommd is running on the nodes with a set.
+ */
+ if (remote_sets_created == 1) {
+ for (i = 0; i < node_c; i++) {
+ if (clnt_mdcommdctl(node_v[i], COMMDCTL_REINIT,
+ sp, NULL, MD_MSCF_NO_FLAGS, &xep)) {
+ if (rval == 0)
+ (void) mdstealerror(ep, &xep);
+ rval = -1;
+ mde_perror(ep, dgettext(TEXT_DOMAIN,
+ "Unable to reinit rpc.mdcommd.\n"));
+ }
+ }
+ }
+ }
+ if ((suspend1_flag) || (suspendall_flag)) {
+ /*
+ * Unlock diskset by resuming messages across the diskset.
+ * Just resume all classes so that resume is the same whether
+ * just one class was locked or all classes were locked.
+ *
+ * Don't know if nodelist contains the nodes being added
+ * or not, so do resume_all to nodes not being added (by
+ * skipping any nodes in the nodelist being added) and then do
+ * resume_all to nodes being added if remote_sets_created is 1.
+ */
+ nd = sd->sd_nodelist;
+ /* All nodes are guaranteed to be ALIVE */
+ while (nd) {
+ /* Skip nodes being added - handled later */
+ if (strinlst(nd->nd_nodename, node_c, node_v)) {
+ nd = nd->nd_next;
+ continue;
+ }
+ if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_RESUME,
+ sp, MD_MSG_CLASS0, MD_MSCF_NO_FLAGS, &xep)) {
+ if (rval == 0)
+ (void) mdstealerror(ep, &xep);
+ rval = -1;
+ mde_perror(ep, dgettext(TEXT_DOMAIN,
+ "Unable to resume rpc.mdcommd.\n"));
+ }
+ nd = nd->nd_next;
+ }
+ /*
+ * Send resume to added nodes that had a set created since
+ * rpc.mdcommd is be running on the nodes with a set.
+ */
+ if (remote_sets_created == 1) {
+ for (i = 0; i < node_c; i++) {
+ /* Already verified to be alive */
+ if (clnt_mdcommdctl(node_v[i], COMMDCTL_RESUME,
+ sp, MD_MSG_CLASS0, MD_MSCF_NO_FLAGS, &xep)) {
+ if (rval == 0)
+ (void) mdstealerror(ep, &xep);
+ rval = -1;
+ mde_perror(ep, dgettext(TEXT_DOMAIN,
+ "Unable to resume rpc.mdcommd.\n"));
+ }
+ }
+ }
+ meta_ping_mnset(sp->setno);
+ /*
+ * Start a resync thread on the newly added nodes
+ * if set is not stale. Also start a thread to update the
+ * abr state of all soft partitions
+ */
+ if (stale_flag != MNSET_IS_STALE) {
+ for (i = 0; i < node_c; i++) {
+ if (clnt_mn_mirror_resync_all(node_v[i],
+ sp->setno, &xep)) {
+ if (rval == 0)
+ (void) mdstealerror(ep, &xep);
+ rval = -1;
+ mde_perror(ep, dgettext(TEXT_DOMAIN,
+ "Unable to start resync "
+ "thread.\n"));
+ }
+ if (clnt_mn_sp_update_abr(node_v[i],
+ sp->setno, &xep)) {
+ if (rval == 0)
+ (void) mdstealerror(ep, &xep);
+ rval = -1;
+ mde_perror(ep, dgettext(TEXT_DOMAIN,
+ "Unable to start sp update "
+ "thread.\n"));
+ }
+ }
+ }
+ }
+ cl_sk = cl_get_setkey(sp->setno, sp->setname);
+ /*
+ * Don't know if nodelist contains the nodes being added
+ * or not, so do clnt_unlock_set to nodes not being added (by
+ * skipping any nodes in the nodelist being added) and then do
+ * clnt_unlock_set to nodes being added.
+ */
+ if (lock_flag) {
+ nd = sd->sd_nodelist;
+ /* All nodes are guaranteed to be ALIVE */
+ while (nd) {
+ /* Skip hosts we get in the next loop */
+ if (strinlst(nd->nd_nodename, node_c, node_v)) {
+ nd = nd->nd_next;
+ continue;
+ }
+ if (clnt_unlock_set(nd->nd_nodename, cl_sk, &xep)) {
+ if (rval == 0)
+ (void) mdstealerror(ep, &xep);
+ rval = -1;
+ }
+ nd = nd->nd_next;
+ }
+ for (i = 0; i < node_c; i++) {
+ /* Already verified to be alive */
+ if (clnt_unlock_set(node_v[i], cl_sk, &xep)) {
+ if (rval == 0)
+ (void) mdstealerror(ep, &xep);
+ rval = -1;
+ }
+ }
+ }
+ cl_set_setkey(NULL);
+
+ metaflushsetname(sp);
+
+ /* release signals back to what they were on entry */
+ if (procsigs(FALSE, &oldsigs, &xep) < 0)
+ mdclrerror(&xep);
+
+ return (rval);
+
+rollback:
+ rval = -1;
+
+ /* level 6 */
+ if (rb_level > 5) {
+ /*
+ * For each node being deleted, set DEL flag and
+ * reset OK flag on that node first.
+ * Until a node has turned off its own
+ * rpc.metad's NODE_OK flag, that node could be
+ * considered for master during a reconfig.
+ */
+ for (i = 0; i < node_c; i++) {
+ nd = sd->sd_nodelist;
+ /* All nodes are guaranteed to be ALIVE */
+ while (nd) {
+ if (strcmp(nd->nd_nodename, node_v[i]) == 0)
+ break;
+ nd = nd->nd_next;
+ }
+ /* Something wrong, handle this in next loop */
+ if (nd == NULL)
+ continue;
+
+ /* Only changing my local cache of node list */
+ saved_nd_next = nd->nd_next;
+ nd->nd_next = NULL;
+
+ /* Set flags for del host to DEL on that host */
+ if (clnt_upd_nr_flags(node_v[i], sp,
+ nd, MD_NR_DEL, NULL, &xep)) {
+ mdclrerror(&xep);
+ }
+ nd->nd_next = saved_nd_next;
+ }
+
+ for (i = 0; i < node_c; i++) {
+ if (dd != NULL) {
+ /* Reset master on newly added node */
+ if (clnt_mnsetmaster(node_v[i], sp, "",
+ MD_MN_INVALID_NID, &xep))
+ mdclrerror(&xep);
+ /* Withdraw set on newly added node */
+ if (clnt_withdrawset(node_v[i], sp, &xep))
+ mdclrerror(&xep);
+ }
+ /*
+ * Turn off owner flag in nodes to be deleted
+ * if there are drives in the set.
+ * Also, turn off NODE_OK and turn on NODE_DEL
+ * for nodes to be deleted.
+ * These flags are used to set the node
+ * record flags in all nodes in the set.
+ */
+ nd = sd->sd_nodelist;
+ while (nd) {
+ if (strcmp(nd->nd_nodename, node_v[i]) == 0) {
+ if (dd != NULL) {
+ nd->nd_flags &= ~MD_MN_NODE_OWN;
+ }
+ nd->nd_flags |= MD_MN_NODE_DEL;
+ nd->nd_flags &= ~MD_MN_NODE_OK;
+ break;
+ }
+ nd = nd->nd_next;
+ }
+ }
+
+ /*
+ * Now, reset owner and set delete flags for the deleted
+ * nodes on all nodes.
+ */
+ nd = sd->sd_nodelist;
+ while (nd) {
+ if (clnt_upd_nr_flags(nd->nd_nodename, sp,
+ sd->sd_nodelist, MD_NR_SET, NULL, &xep)) {
+ mdclrerror(&xep);
+ }
+ nd = nd->nd_next;
+ }
+
+ /*
+ * On each node being deleted, set the set record
+ * to be in DEL state.
+ */
+ for (i = 0; i < node_c; i++) {
+ if (clnt_upd_sr_flags(node_v[i], sp, MD_SR_DEL, &xep)) {
+ mdclrerror(&xep);
+ }
+ }
+ }
+
+ /* level 5 */
+ if (rb_level > 4) {
+ nd = sd->sd_nodelist;
+ /* All nodes are guaranteed to be ALIVE */
+ while (nd) {
+ if (clnt_delhosts(nd->nd_nodename, sp, node_c,
+ node_v, &xep) == -1)
+ mdclrerror(&xep);
+ nd = nd->nd_next;
+ }
+ }
+
+ /*
+ * Notify rpc.mdcommd on all nodes of a nodelist change.
+ * Send reinit command to mdcommd which forces it to get
+ * fresh set description. Then send resume.
+ * Nodelist contains all nodes (existing + added).
+ */
+ if (suspendall_flag) {
+ /* Send reinit */
+ nd = sd->sd_nodelist;
+ /* All nodes are guaranteed to be ALIVE */
+ /* Send reinit to nodes in nodelist before addhosts call */
+ while (nd) {
+ /*
+ * Skip nodes being added if remote sets were not
+ * created since rpc.mdcommd may not be running
+ * on the remote nodes.
+ */
+ if ((remote_sets_created == 0) &&
+ (strinlst(nd->nd_nodename, node_c, node_v))) {
+ nd = nd->nd_next;
+ continue;
+ }
+ /* Class is ignored for REINIT */
+ if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_REINIT,
+ sp, NULL, MD_MSCF_NO_FLAGS, &xep)) {
+ mde_perror(&xep, dgettext(TEXT_DOMAIN,
+ "Unable to reinit rpc.mdcommd.\n"));
+ mdclrerror(&xep);
+ }
+ nd = nd->nd_next;
+ }
+
+ /* Send resume */
+ nd = sd->sd_nodelist;
+ /* All nodes are guaranteed to be ALIVE */
+ while (nd) {
+ /*
+ * Skip nodes being added if remote sets were not
+ * created since rpc.mdcommd may not be running
+ * on the remote nodes.
+ */
+ if ((remote_sets_created == 0) &&
+ (strinlst(nd->nd_nodename, node_c, node_v))) {
+ nd = nd->nd_next;
+ continue;
+ }
+ /*
+ * Resume all classes but class 1 so that lock is held
+ * against meta* commands.
+ * Send resume_all_but_1 to nodes in nodelist
+ * before addhosts call.
+ */
+ if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_RESUME,
+ sp, MD_MSG_CLASS0, MD_MSCF_DONT_RESUME_CLASS1,
+ &xep)) {
+ mde_perror(&xep, dgettext(TEXT_DOMAIN,
+ "Unable to resume rpc.mdcommd.\n"));
+ mdclrerror(&xep);
+ }
+ nd = nd->nd_next;
+ }
+ meta_ping_mnset(sp->setno);
+ }
+
+ /* level 4 */
+ /* Nodelist may or may not contain nodes being added. */
+ if (rb_level > 3 && dd != NULL) {
+ nd = sd->sd_nodelist;
+ while (nd) {
+ /* Skip nodes not being added */
+ if (!strinlst(nd->nd_nodename, node_c, node_v)) {
+ nd = nd->nd_next;
+ continue;
+ }
+
+ if (del_db_sidenms(sp, nd->nd_nodeid, &xep))
+ mdclrerror(&xep);
+ nd = nd->nd_next;
+ }
+ }
+
+ /* level 3 */
+ /* Nodelist may or may not contain nodes being added. */
+ if (rb_level > 2 && dd != NULL) {
+ nd = sd->sd_nodelist;
+ while (nd) {
+ /* Skip nodes not being added */
+ if (!strinlst(nd->nd_nodename, node_c, node_v)) {
+ nd = nd->nd_next;
+ continue;
+ }
+
+ if (del_md_sidenms(sp, nd->nd_nodeid, &xep))
+ mdclrerror(&xep);
+ nd = nd->nd_next;
+ }
+ }
+
+ /* level 1 */
+ if (rb_level > 0) {
+ if (dd != NULL) {
+ /* delete the drive records */
+ for (i = 0; i < node_c; i++) {
+ if (clnt_deldrvs(node_v[i], sp, dd, &xep) == -1)
+ mdclrerror(&xep);
+ }
+ }
+
+ /* delete the set record */
+ for (i = 0; i < node_c; i++) {
+ if (clnt_delset(node_v[i], sp, &xep) == -1)
+ mdclrerror(&xep);
+ }
+ }
+
+ /* level 0 */
+ cl_sk = cl_get_setkey(sp->setno, sp->setname);
+ /* Don't test lock flag since guaranteed to be set if in rollback */
+ /* Nodelist may or may not contain nodes being added. */
+ /*
+ * Unlock diskset by resuming messages across the diskset.
+ * Just resume all classes so that resume is the same whether
+ * just one class was locked or all classes were locked.
+ */
+ if ((suspend1_flag) || (suspendall_flag)) {
+ /* All nodes are guaranteed to be ALIVE */
+ nd = sd->sd_nodelist;
+ while (nd) {
+ /*
+ * Skip nodes being added since remote sets
+ * were either created and then deleted or
+ * were never created. Either way - rpc.mdcommd
+ * may not be running on the remote node.
+ */
+ if (strinlst(nd->nd_nodename, node_c, node_v)) {
+ nd = nd->nd_next;
+ continue;
+ }
+ if (clnt_mdcommdctl(nd->nd_nodename,
+ COMMDCTL_RESUME, sp, MD_MSG_CLASS0,
+ MD_MSCF_NO_FLAGS, &xep)) {
+ mde_perror(&xep, dgettext(TEXT_DOMAIN,
+ "Unable to resume rpc.mdcommd.\n"));
+ mdclrerror(&xep);
+ }
+ nd = nd->nd_next;
+ }
+ meta_ping_mnset(sp->setno);
+ }
+ nd = sd->sd_nodelist;
+ /* All nodes are guaranteed to be ALIVE */
+ while (nd) {
+ /* Skip hosts we get in the next loop */
+ if (strinlst(nd->nd_nodename, node_c, node_v)) {
+ nd = nd->nd_next;
+ continue;
+ }
+
+ if (clnt_unlock_set(nd->nd_nodename, cl_sk, &xep))
+ mdclrerror(&xep);
+ nd = nd->nd_next;
+ }
+
+ for (i = 0; i < node_c; i++)
+ if (clnt_unlock_set(node_v[i], cl_sk, &xep))
+ mdclrerror(&xep);
+ cl_set_setkey(NULL);
+
+ /* release signals back to what they were on entry */
+ if (procsigs(FALSE, &oldsigs, &xep) < 0)
+ mdclrerror(&xep);
+
+ metaflushsetname(sp);
+
+ return (rval);
+}
+
+/*
+ * Add host(s) to the traditional diskset provided in sp.
+ * - create set if non-existent.
+ */
+static int
+meta_traditional_set_addhosts(
+ mdsetname_t *sp,
+ int multi_node,
+ int node_c,
+ char **node_v,
+ int auto_take,
+ md_error_t *ep
+)
+{
+ md_set_desc *sd;
+ md_drive_desc *dd, *p;
+ med_rec_t medr;
+ med_rec_t rb_medr;
+ int rval = 0;
+ int bool;
+ int nodeindex;
+ int i;
+ int has_set;
+ int numsides;
+ sigset_t oldsigs;
+ md_setkey_t *cl_sk;
+ int rb_level = 0;
+ md_error_t xep = mdnullerror;
+ int max_meds;
+
+ if (nodesuniq(sp, node_c, node_v, ep))
+ return (-1);
+
+ if (validate_nodes(sp, node_c, node_v, ep))
+ return (-1);
+
+ if ((sd = metaget_setdesc(sp, ep)) == NULL) {
+ if (! mdiserror(ep, MDE_NO_SET))
+ return (-1);
+ mdclrerror(ep);
+ return (create_set(sp, multi_node, node_c, node_v, auto_take,
+ ep));
+ }
+
+ /* The auto_take behavior is inconsistent with multiple hosts. */
+ if (auto_take || sd->sd_flags & MD_SR_AUTO_TAKE) {
+ (void) mddserror(ep, MDE_DS_SINGLEHOST, sp->setno, NULL, NULL,
+ sp->setname);
+ return (-1);
+ }
+
+ /*
+ * We already have the set.
+ */
+
+ /* Make sure we own the set */
+ if (meta_check_ownership(sp, ep) != 0)
+ return (-1);
+
+ /*
+ * Perform the required checks for new hosts
+ */
+ for (i = 0; i < node_c; i++) {
+ if (getnodeside(node_v[i], sd) != MD_SIDEWILD)
+ return (mddserror(ep, MDE_DS_NODEINSET, sp->setno,
+ node_v[i], NULL, sp->setname));
+
+ /* Make sure this set name is not used on the other hosts */
+ has_set = nodehasset(sp, node_v[i], NHS_N_EQ, ep);
+ if (has_set < 0) {
+ if (! mdiserror(ep, MDE_NO_SET))
+ return (-1);
+ /* Keep on truck'n */
+ mdclrerror(ep);
+ } else if (has_set)
+ return (mddserror(ep, MDE_DS_NODEHASSET, sp->setno,
+ node_v[i], NULL, sp->setname));
+
+ if (clnt_setnumbusy(node_v[i], sp->setno, &bool, ep) == -1)
+ return (-1);
+
+ if (bool == TRUE)
+ return (mddserror(ep, MDE_DS_SETNUMBUSY, sp->setno,
+ node_v[i], NULL, sp->setname));
+
+ if (clnt_setnameok(node_v[i], sp, &bool, ep) == -1)
+ return (-1);
+
+ if (bool == FALSE)
+ return (mddserror(ep, MDE_DS_SETNAMEBUSY, sp->setno,
+ node_v[i], NULL, sp->setname));
+
+ if (check_setdrvs_againstnode(sp, node_v[i], ep))
+ return (-1);
+ }
+
+ /* Count the number of occupied slots */
+ numsides = 0;
+ for (i = 0; i < MD_MAXSIDES; i++) {
+ /* Count occupied slots */
+ if (sd->sd_nodes[i][0] != '\0')
+ numsides++;
+ }
+
+ /* Make sure the we have space to add the new sides */
+ if ((numsides + node_c) > MD_MAXSIDES) {
+ (void) mddserror(ep, MDE_DS_SIDENUMNOTAVAIL, sp->setno, NULL,
+ NULL, sp->setname);
+ return (-1);
+ }
+
+ /* Get drive descriptors for the set */
+ if ((dd = metaget_drivedesc(sp, MD_FULLNAME_ONLY, ep)) == NULL)
+ if (! mdisok(ep))
+ return (-1);
+
+ /* Setup the mediator record roll-back structure */
+ (void) memset(&rb_medr, '\0', sizeof (med_rec_t));
+ rb_medr.med_rec_mag = MED_REC_MAGIC;
+ rb_medr.med_rec_rev = MED_REC_REV;
+ rb_medr.med_rec_fl = 0;
+ rb_medr.med_rec_sn = sp->setno;
+ (void) strcpy(rb_medr.med_rec_snm, sp->setname);
+ for (i = 0; i < MD_MAXSIDES; i++)
+ (void) strcpy(rb_medr.med_rec_nodes[i], sd->sd_nodes[i]);
+ rb_medr.med_rec_meds = sd->sd_med; /* structure assigment */
+ (void) memset(&rb_medr.med_rec_data, '\0', sizeof (med_data_t));
+ rb_medr.med_rec_foff = 0;
+ crcgen(&rb_medr, &rb_medr.med_rec_cks, sizeof (med_rec_t), NULL);
+
+ if ((max_meds = get_max_meds(ep)) == 0)
+ return (-1);
+
+ /* END CHECK CODE */
+
+ md_rb_sig_handling_on();
+
+ /* Lock the set on current set members */
+ for (i = 0; i < MD_MAXSIDES; i++) {
+ /* Skip empty slots */
+ if (sd->sd_nodes[i][0] == '\0')
+ continue;
+
+ if (clnt_lock_set(sd->sd_nodes[i], sp, ep)) {
+ rval = -1;
+ goto out;
+ }
+ }
+
+ /* Lock the set on new set members */
+ for (i = 0; i < node_c; i++) {
+ if (clnt_lock_set(node_v[i], sp, ep)) {
+ rval = -1;
+ goto out;
+ }
+ }
+
+ RB_TEST(1, "addhosts", ep)
+
+ RB_PREEMPT;
+ rb_level = 1; /* level 1 */
+
+ RB_TEST(2, "addhosts", ep)
+
+ /*
+ * Add the new hosts to the existing set record on the existing hosts
+ */
+ for (i = 0; i < MD_MAXSIDES; i++) {
+ /* skip empty slots */
+ if (sd->sd_nodes[i][0] == '\0')
+ continue;
+
+ if (clnt_addhosts(sd->sd_nodes[i], sp, node_c, node_v, ep))
+ goto rollback;
+ }
+
+ RB_PREEMPT;
+ rb_level = 2; /* level 2 */
+
+ RB_TEST(3, "addhosts", ep);
+
+ /* Merge the new entries into the set with the existing sides */
+ nodeindex = 0;
+ for (i = 0; i < MD_MAXSIDES; i++) {
+ /* Skip full slots */
+ if (sd->sd_nodes[i][0] != '\0')
+ continue;
+
+ (void) strcpy(sd->sd_nodes[i], node_v[nodeindex++]);
+ if (nodeindex == node_c)
+ break;
+ }
+
+ /* If we have drives */
+ if (dd != NULL) {
+ /*
+ * For all the hosts being added, create a sidename structure
+ */
+ for (i = 0; i < MD_MAXSIDES; i++) {
+ /* Skip empty slots */
+ if (sd->sd_nodes[i][0] == '\0')
+ continue;
+
+ /* Skip nodes not being added */
+ if (! strinlst(sd->sd_nodes[i], node_c, node_v))
+ continue;
+
+ for (p = dd; p != NULL; p = p->dd_next) {
+ if (make_sideno_sidenm(sp, p->dd_dnp, i,
+ ep) != 0)
+ goto rollback;
+ }
+ }
+
+ /*
+ * Add the new sidename for each drive to the existing hosts
+ */
+ for (i = 0; i < MD_MAXSIDES; i++) {
+ /* Skip empty slots */
+ if (sd->sd_nodes[i][0] == '\0')
+ continue;
+
+ /* Skip nodes being added */
+ if (strinlst(sd->sd_nodes[i], node_c, node_v))
+ continue;
+
+ if (clnt_add_drv_sidenms(sd->sd_nodes[i], mynode(), sp,
+ sd, node_c, node_v, ep)) {
+ goto rollback;
+ }
+ }
+
+ RB_TEST(4, "addhosts", ep)
+
+ RB_PREEMPT;
+ rb_level = 3; /* level 3 */
+
+ RB_TEST(5, "addhosts", ep)
+
+ if (add_db_sidenms(sp, ep)) {
+ goto rollback;
+ }
+
+ } else {
+ RB_PREEMPT;
+ rb_level = 3;
+ }
+
+ RB_TEST(6, "addhosts", ep)
+
+ RB_PREEMPT;
+ rb_level = 4; /* level 4 */
+
+ RB_TEST(7, "addhosts", ep)
+
+
+ /* create the set on the new nodes, this adds the drives as well */
+ if (create_set_on_hosts(sp, multi_node, node_c, node_v, 0, ep)) {
+ goto rollback;
+ }
+
+ RB_TEST(8, "addhosts", ep)
+
+ RB_PREEMPT;
+ rb_level = 5; /* level 5 */
+
+ RB_TEST(9, "addhosts", ep)
+
+ if (dd != NULL) {
+
+ /*
+ * Add the device entries for the new sides into the namespace.
+ */
+ for (i = 0; i < MD_MAXSIDES; i++) {
+ /* Skip empty slots */
+ if (sd->sd_nodes[i][0] == '\0')
+ continue;
+
+ /* Skip nodes not being added */
+ if (! strinlst(sd->sd_nodes[i], node_c, node_v))
+ continue;
+
+ if (add_md_sidenms(sp, i, MD_SIDEWILD, ep))
+ goto rollback;
+ }
+ }
+
+ RB_TEST(10, "addhosts", ep)
+
+ RB_PREEMPT;
+ rb_level = 6; /* level 6 */
+
+ RB_TEST(11, "addhosts", ep);
+
+ if (dd != NULL) {
+ /*
+ * Mark the drives MD_DR_OK.
+ */
+ for (i = 0; i < MD_MAXSIDES; i++) {
+ /* Skip empty slots */
+ if (sd->sd_nodes[i][0] == '\0')
+ continue;
+
+ if (clnt_upd_dr_flags(sd->sd_nodes[i], sp, dd,
+ MD_DR_OK, ep) == -1) {
+ goto rollback;
+ }
+ }
+ }
+
+ RB_TEST(12, "addhosts", ep)
+
+ /* Bring the mediator record up to date with the set record */
+ medr = rb_medr; /* structure assignment */
+ for (i = 0; i < MD_MAXSIDES; i++)
+ (void) strcpy(medr.med_rec_nodes[i], sd->sd_nodes[i]);
+ crcgen(&medr, &medr.med_rec_cks, sizeof (med_rec_t), NULL);
+
+ /* Inform the mediator hosts of the new node list */
+ for (i = 0; i < max_meds; i++) {
+ if (sd->sd_med.n_lst[i].a_cnt == 0)
+ continue;
+
+ if (clnt_med_upd_rec(&sd->sd_med.n_lst[i], sp, &medr, ep))
+ goto rollback;
+ }
+
+ /* Add the mediator information to all hosts in the set */
+ for (i = 0; i < MD_MAXSIDES; i++) {
+ /* Skip empty slots */
+ if (sd->sd_nodes[i][0] == '\0')
+ continue;
+
+ if (clnt_updmeds(sd->sd_nodes[i], sp, &sd->sd_med, ep))
+ goto rollback;
+ }
+
+ RB_TEST(13, "addhosts", ep)
+
+ /*
+ * Mark the set record MD_SR_OK
+ */
+ for (i = 0; i < MD_MAXSIDES; i++) {
+ /* Skip empty slots */
+ if (sd->sd_nodes[i][0] == '\0')
+ continue;
+
+ if (clnt_upd_sr_flags(sd->sd_nodes[i], sp, MD_SR_OK, ep))
+ goto rollback;
+ }
+
+ RB_TEST(14, "addhosts", ep)
+
+out:
+ cl_sk = cl_get_setkey(sp->setno, sp->setname);
+ for (i = 0; i < MD_MAXSIDES; i++) {
+ /* Skip empty slots */
+ if (sd->sd_nodes[i][0] == '\0')
+ continue;
+
+ /* Skip hosts we get in the next loop */
+ if (strinlst(sd->sd_nodes[i], node_c, node_v))
+ continue;
+
+ if (clnt_unlock_set(sd->sd_nodes[i], cl_sk, &xep)) {
+ if (rval == 0)
+ (void) mdstealerror(ep, &xep);
+ rval = -1;
+ }
+ }
+
+ if (rval == 0) {
+ for (i = 0; i < node_c; i++)
+ if (clnt_unlock_set(node_v[i], cl_sk, &xep)) {
+ if (rval == 0)
+ (void) mdstealerror(ep, &xep);
+ rval = -1;
+ }
+ }
+ cl_set_setkey(NULL);
+
+ metaflushsetname(sp);
+
+ md_rb_sig_handling_off(md_got_sig(), md_which_sig());
+
+ return (rval);
+
+rollback:
+ /* Make sure we are blocking all signals */
+ if (procsigs(TRUE, &oldsigs, &xep) < 0)
+ mdclrerror(&xep);
+
+ rval = -1;
+
+ /* level 6 */
+ if (rb_level > 5) {
+ for (i = 0; i < max_meds; i++) {
+ if (sd->sd_med.n_lst[i].a_cnt == 0)
+ continue;
+
+ if (clnt_med_upd_rec(&sd->sd_med.n_lst[i], sp,
+ &rb_medr, &xep))
+ mdclrerror(&xep);
+ }
+ if (dd != NULL) {
+ for (i = 0; i < MD_MAXSIDES; i++) {
+ /* Skip empty slots */
+ if (sd->sd_nodes[i][0] == '\0')
+ continue;
+
+ /* Skip nodes not being added */
+ if (! strinlst(sd->sd_nodes[i], node_c, node_v))
+ continue;
+
+ if (del_md_sidenms(sp, i, &xep))
+ mdclrerror(&xep);
+ }
+ }
+ }
+
+ /* level 5 */
+ if (rb_level > 4) {
+ if (dd != NULL) {
+ /* delete the drive records */
+ for (i = 0; i < node_c; i++) {
+ if (clnt_deldrvs(node_v[i], sp, dd, &xep) == -1)
+ mdclrerror(&xep);
+ }
+ }
+ /* delete the set record on the 'new' hosts */
+ for (i = 0; i < node_c; i++) {
+ if (clnt_delset(node_v[i], sp, &xep) == -1)
+ mdclrerror(&xep);
+ }
+ }
+
+ /* level 4 */
+ if (rb_level > 3 && dd != NULL) {
+ for (i = 0; i < MD_MAXSIDES; i++) {
+ /* Skip empty slots */
+ if (sd->sd_nodes[i][0] == '\0')
+ continue;
+
+ /* Skip nodes not being added */
+ if (! strinlst(sd->sd_nodes[i], node_c, node_v))
+ continue;
+
+ if (del_db_sidenms(sp, i, &xep))
+ mdclrerror(&xep);
+ }
+ }
+
+ /* level 3 */
+ if (rb_level > 2 && dd != NULL) {
+ for (i = 0; i < MD_MAXSIDES; i++) {
+ /* Skip empty slots */
+ if (sd->sd_nodes[i][0] == '\0')
+ continue;
+
+ /* Skip nodes not being added */
+ if (! strinlst(sd->sd_nodes[i], node_c, node_v))
+ continue;
+
+ if (clnt_del_drv_sidenms(sd->sd_nodes[i], sp,
+ &xep) == -1)
+ mdclrerror(&xep);
+ }
+ }
+
+ /* level 2 */
+ if (rb_level > 1) {
+ for (i = 0; i < MD_MAXSIDES; i++) {
+ /* Skip empty slots */
+ if (sd->sd_nodes[i][0] == '\0')
+ continue;
+
+ if (clnt_delhosts(sd->sd_nodes[i], sp, node_c, node_v,
+ &xep) == -1)
+ mdclrerror(&xep);
+ }
+ }
+
+ /* level 1 */
+ if (rb_level > 0) {
+ cl_sk = cl_get_setkey(sp->setno, sp->setname);
+ for (i = 0; i < MD_MAXSIDES; i++) {
+ /* Skip empty slots */
+ if (sd->sd_nodes[i][0] == '\0')
+ continue;
+
+ /* Skip hosts we get in the next loop */
+ if (strinlst(sd->sd_nodes[i], node_c, node_v))
+ continue;
+
+ if (clnt_unlock_set(sd->sd_nodes[i], cl_sk, &xep))
+ mdclrerror(&xep);
+ }
+
+ for (i = 0; i < node_c; i++)
+ if (clnt_unlock_set(node_v[i], cl_sk, &xep))
+ mdclrerror(&xep);
+ cl_set_setkey(NULL);
+ }
+
+ /* release signals back to what they were on entry */
+ if (procsigs(FALSE, &oldsigs, &xep) < 0)
+ mdclrerror(&xep);
+
+ metaflushsetname(sp);
+
+ md_rb_sig_handling_off(md_got_sig(), md_which_sig());
+
+ return (rval);
+}
+
+/*
+ * Add host(s) to the diskset provided in sp.
+ * - create set if non-existent.
+ */
+int
+meta_set_addhosts(
+ mdsetname_t *sp,
+ int multi_node,
+ int node_c,
+ char **node_v,
+ int auto_take,
+ md_error_t *ep
+)
+{
+ if (multi_node)
+ return (meta_multinode_set_addhosts(sp, multi_node, node_c,
+ node_v, auto_take, ep));
+ else
+ return (meta_traditional_set_addhosts(sp, multi_node, node_c,
+ node_v, auto_take, ep));
+}
+
+/*
+ * Delete host(s) from the diskset provided in sp.
+ * - destroy set if last host in set is removed.
+ */
+int
+meta_set_deletehosts(
+ mdsetname_t *sp,
+ int node_c,
+ char **node_v,
+ int forceflg,
+ md_error_t *ep
+)
+{
+ md_set_desc *sd;
+ md_drive_desc *dd;
+ med_rec_t medr;
+ med_rec_t rb_medr;
+ int i, j;
+ int has_set;
+ int numsides = 0;
+ int oha = FALSE;
+ sigset_t oldsigs;
+ mhd_mhiargs_t mhiargs;
+ md_replicalist_t *rlp = NULL;
+ md_setkey_t *cl_sk;
+ ulong_t max_genid = 0;
+ int rval = 0;
+ int rb_level = 0;
+ int max_meds = 0;
+ md_error_t xep = mdnullerror;
+ md_mnnode_desc *nd;
+ md_mnnode_record *nr;
+ int delete_master = 0;
+ int suspendall_flag = 0, suspendall_flag_rb = 0;
+ int suspend1_flag = 0;
+ int lock_flag = 0;
+ int stale_flag = 0;
+ int *node_id_list = NULL;
+ int remote_sets_deleted = 0;
+
+ if ((sd = metaget_setdesc(sp, ep)) == NULL)
+ return (-1);
+
+ /*
+ * Verify that list of nodes being deleted contains no
+ * duplicates.
+ */
+ if (nodesuniq(sp, node_c, node_v, ep))
+ return (-1);
+
+ /* Make sure we own the set */
+ if (meta_check_ownership(sp, ep) != 0)
+ return (-1);
+
+ /*
+ * The drive and node records are stored in the local mddbs of each
+ * node in the diskset. Each node's rpc.metad daemon reads in the set,
+ * drive and node records from that node's local mddb and caches them
+ * internally. Any process needing diskset information contacts its
+ * local rpc.metad to get this information. Since each node in the
+ * diskset is independently reading the set information from its local
+ * mddb, the set, drive and node records in the local mddbs must stay
+ * in-sync, so that all nodes have a consistent view of the diskset.
+ *
+ * For a multinode diskset, explicitly verify that all nodes in the
+ * diskset are ALIVE (i.e. are in the API membership list) if the
+ * forceflag is FALSE. (The case of forceflag being TRUE is handled
+ * in OHA check above.)
+ *
+ * If forceflag is FALSE and a node in the diskset is not in
+ * the membership list, then fail this operation since all nodes must
+ * be ALIVE in order to delete the node record from their local mddb.
+ * If a panic of this node leaves the local mddbs set, node and drive
+ * records out-of-sync, the reconfig cycle will fix the local mddbs
+ * and force them back into synchronization.
+ */
+ if ((forceflg == FALSE) && (MD_MNSET_DESC(sd))) {
+ nd = sd->sd_nodelist;
+ while (nd) {
+ if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+ return (mddserror(ep, MDE_DS_NOTINMEMBERLIST,
+ sp->setno, nd->nd_nodename,
+ NULL, sp->setname));
+ }
+ nd = nd->nd_next;
+ }
+ }
+
+
+ /*
+ * Lock the set on current set members.
+ * Set locking done much earlier for MN diskset than for traditional
+ * diskset since lock_set and SUSPEND are used to protect against
+ * other meta* commands running on the other nodes.
+ */
+ if (MD_MNSET_DESC(sd)) {
+ /* Make sure we are blocking all signals */
+ if (procsigs(TRUE, &oldsigs, &xep) < 0)
+ mdclrerror(&xep);
+
+ nd = sd->sd_nodelist;
+ while (nd) {
+ if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+ nd = nd->nd_next;
+ continue;
+ }
+
+ if (clnt_lock_set(nd->nd_nodename, sp, ep)) {
+ rval = -1;
+ goto out2;
+ }
+ lock_flag = 1;
+ nd = nd->nd_next;
+ }
+ /*
+ * Lock out other meta* commands by suspending
+ * class 1 messages across the diskset.
+ */
+ nd = sd->sd_nodelist;
+ while (nd) {
+ if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+ nd = nd->nd_next;
+ continue;
+ }
+ if (clnt_mdcommdctl(nd->nd_nodename,
+ COMMDCTL_SUSPEND, sp, MD_MSG_CLASS1,
+ MD_MSCF_NO_FLAGS, ep)) {
+ rval = -1;
+ goto out2;
+ }
+ suspend1_flag = 1;
+ nd = nd->nd_next;
+ }
+ }
+
+ for (i = 0; i < node_c; i++)
+ if (getnodeside(node_v[i], sd) == MD_SIDEWILD) {
+ (void) mddserror(ep, MDE_DS_NODENOTINSET, sp->setno,
+ node_v[i], NULL, sp->setname);
+ rval = -1;
+ goto out2;
+ }
+
+ /*
+ * Count the number of nodes currently in the set.
+ */
+ if (MD_MNSET_DESC(sd)) {
+ nd = sd->sd_nodelist;
+ while (nd) {
+ numsides++;
+ nd = nd->nd_next;
+ }
+ } else {
+ for (i = 0; i < MD_MAXSIDES; i++)
+ /* Count full slots */
+ if (sd->sd_nodes[i][0] != '\0')
+ numsides++;
+ }
+
+ /*
+ * OHA mode == -f -h <hostname>
+ * OHA is One Host Administration that occurs when the forceflag (-f)
+ * is set and at least one host in the diskset isn't responding
+ * to RPC requests.
+ *
+ * When in OHA mode, a node cannot delete itself from a diskset.
+ * When in OHA mode, a node can delete a list of nodes from a diskset
+ * even if some of the nodes in the diskset are unresponsive.
+ *
+ * For multinode diskset, only allow OHA mode when the nodes that
+ * aren't responding in the diskset are not in the membership list
+ * (i.e. nodes that aren't responding are not marked ALIVE).
+ * Nodes that aren't in the membership list will be rejoining
+ * the diskset through a reconfig cycle and the local mddb set
+ * and node records can be reconciled during the reconfig cycle.
+ *
+ * If a node isn't responding, but is still in the membership list,
+ * fail the request since the node may not be responding because
+ * rpc.metad died and is restarting. In this case, no reconfig
+ * cycle will be started, so there's no way to recover if
+ * the host delete operation was allowed.
+ *
+ * NOTE: if nodes that weren't in the membership when the OHA host
+ * delete occurred are now the only nodes in membership list,
+ * those nodes will see the old view of the diskset. As soon as
+ * a node re-enters the cluster that was present in the cluster
+ * during the host deletion, the diskset will reflect the host
+ * deletion on all nodes presently in the cluster.
+ */
+ if (forceflg == TRUE) {
+ if (MD_MNSET_DESC(sd)) {
+ nd = sd->sd_nodelist;
+ while (nd) {
+ /*
+ * If a node isn't ALIVE (in member list),
+ * then allow a force-able delete in OHA mode.
+ */
+ if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+ oha = TRUE;
+ break;
+ }
+ /*
+ * Don't test for clnt_nullproc since already
+ * tested the RPC connections by clnt_lock_set.
+ */
+ nd = nd->nd_next;
+ }
+ } else {
+ for (i = 0; i < MD_MAXSIDES; i++) {
+ /* Skip empty slots */
+ if (sd->sd_nodes[i][0] == '\0')
+ continue;
+
+ if (clnt_nullproc(sd->sd_nodes[i], ep) == -1) {
+ /*
+ * If we timeout to at least one
+ * client, then we can allow OHA mode,
+ * otherwise, we are in normal mode.
+ */
+ if (mdanyrpcerror(ep)) {
+ mdclrerror(ep);
+ if (strinlst(sd->sd_nodes[i],
+ node_c, node_v)) {
+ oha = TRUE;
+ break;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ /*
+ * Don't allow this for MN diskset since meta_set_destroy of 1 node
+ * does NOT remove this node's node record from the other node's set
+ * records in their local mddb. This leaves a MN diskset in a very
+ * messed up state.
+ */
+ if (!(MD_MNSET_DESC(sd))) {
+ /* Destroy set */
+ if (forceflg == TRUE && node_c == 1 &&
+ strcmp(mynode(), node_v[0]) == 0) {
+ /* Can return since !MN diskset so nothing to unlock */
+ return (meta_set_destroy(sp, TRUE, ep));
+ }
+ }
+
+
+ /*
+ * In multinode diskset, can only delete self if this
+ * is the last node in the set or if all nodes in
+ * the set are being deleted. The traditional diskset code
+ * allows a node to delete itself (when there are other nodes
+ * in the diskset) when using the force flag, but that code
+ * path doesn't have the node remove itself from
+ * the set node list on the other nodes. Since this isn't
+ * satisfactory for the multinode diskset, just don't
+ * allow this operation.
+ */
+ if (MD_MNSET_DESC(sd) && (numsides > 1) && (node_c != numsides) &&
+ strinlst(mynode(), node_c, node_v)) {
+ (void) mddserror(ep, MDE_DS_MNCANTDELSELF, sp->setno,
+ mynode(), NULL, sp->setname);
+ rval = -1;
+ goto out2;
+ }
+
+ /*
+ * In multinode diskset, don't allow deletion of master node unless
+ * this is the only node left or unless all nodes are being
+ * deleted since there is no way to switch
+ * master ownership (unless via a cluster reconfig cycle).
+ */
+ delete_master = strinlst(sd->sd_mn_master_nodenm, node_c, node_v);
+ if (MD_MNSET_DESC(sd) && (numsides > 1) && (node_c != numsides) &&
+ delete_master) {
+ (void) mddserror(ep, MDE_DS_CANTDELMASTER, sp->setno,
+ sd->sd_mn_master_nodenm, NULL, sp->setname);
+ rval = -1;
+ goto out2;
+ }
+
+
+ /* Deleting self w/o forceflg */
+ if (forceflg == FALSE && numsides > 1 &&
+ strinlst(mynode(), node_c, node_v)) {
+ (void) mddserror(ep, MDE_DS_CANTDELSELF, sp->setno,
+ mynode(), NULL, sp->setname);
+ rval = -1;
+ goto out2;
+ }
+
+ /*
+ * Setup the mediator record roll-back structure for a trad diskset.
+ *
+ * For a MN diskset, the deletion of a host in the diskset
+ * does not cause an update of the mediator record. If the
+ * host deletion will cause the diskset to be removed (this is
+ * the last host being removed or all hosts are being removed)
+ * then the mediator record must have already been removed by the
+ * user or this delete host operation will fail (a check for
+ * this is done later in this routine).
+ */
+ if (!(MD_MNSET_DESC(sd))) {
+ (void) memset(&rb_medr, '\0', sizeof (med_rec_t));
+ rb_medr.med_rec_mag = MED_REC_MAGIC;
+ rb_medr.med_rec_rev = MED_REC_REV;
+ rb_medr.med_rec_fl = 0;
+ rb_medr.med_rec_sn = sp->setno;
+ (void) strcpy(rb_medr.med_rec_snm, sp->setname);
+ for (i = 0; i < MD_MAXSIDES; i++)
+ (void) strcpy(rb_medr.med_rec_nodes[i], sd->sd_nodes[i]);
+ rb_medr.med_rec_meds = sd->sd_med; /* structure assigment */
+ (void) memset(&rb_medr.med_rec_data, '\0', sizeof (med_data_t));
+ rb_medr.med_rec_foff = 0;
+ crcgen(&rb_medr, &rb_medr.med_rec_cks,
+ sizeof (med_rec_t), NULL);
+
+ /* Bring the mediator record up to date with the set record */
+ medr = rb_medr; /* structure assignment */
+
+ if ((max_meds = get_max_meds(ep)) == 0) {
+ rval = -1;
+ goto out2;
+ }
+ }
+
+ /*
+ * For traditional diskset:
+ * Check to see if all the hosts we are trying to delete the set from
+ * have a set "setname" that is the same as ours, i.e. - same name,
+ * same time stamp, same genid. We only do this if forceflg is not
+ * specified or we are in OHA mode.
+ */
+ if (!(MD_MNSET_DESC(sd)) && (forceflg == FALSE || oha == TRUE)) {
+ int fix_node_v = FALSE;
+ int j;
+
+ for (i = 0; i < node_c; i++) {
+ /* We skip this side */
+ if (strcmp(mynode(), node_v[i]) == 0)
+ continue;
+
+ has_set = nodehasset(sp, node_v[i], NHS_NSTG_EQ, ep);
+
+ if (has_set < 0) {
+ char *anode[1];
+
+ /*
+ * Can't talk to the host only allowed in OHA
+ * mode.
+ */
+ if (oha == TRUE && mdanyrpcerror(ep)) {
+ mdclrerror(ep);
+ continue;
+ }
+
+ /*
+ * We got an error we do not, or are not,
+ * prepared to handle.
+ */
+ if (! mdiserror(ep, MDE_NO_SET) &&
+ ! mdismddberror(ep, MDE_DB_NODB)) {
+ rval = -1;
+ goto out2;
+ }
+ mdclrerror(ep);
+
+ /*
+ * If we got here: both hosts are up; a host in
+ * our set record does not have the set. So we
+ * delete the host from our set and invalidate
+ * the node.
+ */
+ anode[0] = Strdup(node_v[i]);
+
+ rval = del_host_noset(sp, anode, ep);
+
+ /*
+ * If we delete a host, make sure the mediator
+ * hosts are made aware of this.
+ */
+ for (j = 0; j < MD_MAXSIDES; j++) {
+ if (strcmp(medr.med_rec_nodes[j],
+ node_v[i]) != 0)
+ continue;
+ (void) memset(&medr.med_rec_nodes[j],
+ '\0', sizeof (md_node_nm_t));
+ }
+ crcgen(&medr, &medr.med_rec_cks,
+ sizeof (med_rec_t), NULL);
+
+ rb_medr = medr; /* struct assignment */
+
+ Free(anode[0]);
+
+ if (rval == -1)
+ goto out2;
+
+ node_v[i][0] = '\0';
+ fix_node_v = TRUE;
+ continue;
+ }
+
+ /*
+ * If we can talk to the host, and they do not have the
+ * exact set, then we disallow the operation.
+ */
+ if (has_set == FALSE) {
+ (void) mddserror(ep, MDE_DS_NODENOSET,
+ sp->setno, node_v[i], NULL, sp->setname);
+ rval = -1;
+ goto out2;
+ }
+ }
+
+ /*
+ * Here we prune the node_v's that were invalidated above.
+ */
+ if (fix_node_v == TRUE) {
+ i = 0;
+ while (i < node_c) {
+ if (node_v[i][0] == '\0') {
+ for (j = i; (j + 1) < node_c; j++)
+ node_v[j] = node_v[j + 1];
+ node_c--;
+ }
+ i++;
+ }
+ /*
+ * If we are left with no nodes, then we have
+ * compeleted the operation.
+ */
+ if (node_c == 0) {
+ /*
+ * Inform the mediator hosts of the new node
+ * list
+ */
+ for (i = 0; i < max_meds; i++) {
+ if (sd->sd_med.n_lst[i].a_cnt == 0)
+ continue;
+
+ if (clnt_med_upd_rec(
+ &sd->sd_med.n_lst[i], sp, &medr,
+ ep))
+ mdclrerror(ep);
+ }
+ rval = 0;
+ goto out2;
+ }
+ }
+ }
+
+ /*
+ * For multinode diskset:
+ * If forceflag is FALSE then check to see if all the hosts we
+ * are trying to delete the set from have a set "setname" that
+ * is the same as ours, i.e. - same name, same time stamp, same genid.
+ * If forceflag is TRUE, then we don't care if the hosts being
+ * deleted have the same set information or not since user is forcing
+ * those hosts to be deleted.
+ */
+ if ((MD_MNSET_DESC(sd)) && (forceflg == FALSE)) {
+ for (i = 0; i < node_c; i++) {
+ /* We skip this node since comparing against it */
+ if (strcmp(mynode(), node_v[i]) == 0)
+ continue;
+
+ has_set = nodehasset(sp, node_v[i], NHS_NSTG_EQ, ep);
+
+ if (has_set < 0) {
+ rval = -1;
+ goto out2;
+ }
+
+ /*
+ * If we can talk to the host, and they do not have the
+ * exact set, then we disallow the operation.
+ */
+ if (has_set == FALSE) {
+ (void) mddserror(ep, MDE_DS_NODENOSET,
+ sp->setno, node_v[i], NULL, sp->setname);
+ rval = -1;
+ goto out2;
+ }
+ }
+ }
+
+ /*
+ * For traditional diskset:
+ * Can't allow user to delete their node (without deleting all nodes)
+ * out of a set in OHA mode, would leave a real mess.
+ * This action was already failed above for a MN diskset.
+ */
+ if (!(MD_MNSET_DESC(sd)) && (oha == TRUE) &&
+ strinlst(mynode(), node_c, node_v)) {
+ /* Can directly return since !MN diskset; nothing to unlock */
+ return (mddserror(ep, MDE_DS_OHACANTDELSELF, sp->setno,
+ mynode(), NULL, sp->setname));
+ }
+
+
+ /* Get the drive descriptors for this set */
+ if ((dd = metaget_drivedesc(sp, (MD_BASICNAME_OK | PRINT_FAST),
+ ep)) == NULL) {
+ if (! mdisok(ep)) {
+ rval = -1;
+ goto out2;
+ }
+ }
+
+ /*
+ * We have been asked to delete all the hosts in the set, i.e. - delete
+ * the whole set.
+ */
+ if (node_c == numsides) {
+ /*
+ * This is only a valid operation if all drives have been
+ * removed first.
+ */
+
+ if (dd != NULL) {
+ (void) mddserror(ep, MDE_DS_HASDRIVES, sp->setno,
+ NULL, NULL, sp->setname);
+ rval = -1;
+ goto out2;
+ }
+
+ /*
+ * If a mediator is currently associated with this set,
+ * fail the deletion of the last host(s).
+ */
+ if (sd->sd_med.n_cnt != 0) {
+ (void) mddserror(ep, MDE_DS_HASMED, sp->setno,
+ NULL, NULL, sp->setname);
+ rval = -1;
+ goto out2;
+ }
+
+ if (! mdisok(ep)) {
+ rval = -1;
+ goto out2;
+ }
+
+ rval = del_set_nodrives(sp, node_c, node_v, oha, ep);
+ remote_sets_deleted = 1;
+ goto out2;
+ }
+
+ /*
+ * Get timeout values in case we need to roll back
+ */
+ (void) memset(&mhiargs, '\0', sizeof (mhiargs));
+ if (clnt_gtimeout(mynode(), sp, &mhiargs, ep) != 0) {
+ rval = -1;
+ goto out2;
+ }
+
+ if (dd != NULL) {
+ /*
+ * We need this around for re-adding DB side names later.
+ */
+ if (metareplicalist(sp, MD_BASICNAME_OK, &rlp, ep) < 0) {
+ rval = -1;
+ goto out2;
+ }
+
+ /*
+ * Alloc nodeid list if drives are present in diskset.
+ * nodeid list is used to reset mirror owners if the
+ * owner is a deleted node.
+ */
+ if (MD_MNSET_DESC(sd)) {
+ node_id_list = Zalloc(sizeof (int) * node_c);
+ }
+ }
+
+ /* Lock the set on current set members */
+ if (!(MD_MNSET_DESC(sd))) {
+ md_rb_sig_handling_on();
+ for (i = 0; i < MD_MAXSIDES; i++) {
+ /* Skip empty slots */
+ if (sd->sd_nodes[i][0] == '\0')
+ continue;
+
+ if (clnt_lock_set(sd->sd_nodes[i], sp, ep)) {
+ if (oha == TRUE && mdanyrpcerror(ep)) {
+ mdclrerror(ep);
+ continue;
+ }
+ rval = -1;
+ goto out2;
+ }
+ lock_flag = 1;
+ }
+ }
+
+ RB_TEST(1, "deletehosts", ep)
+
+ RB_PREEMPT;
+ rb_level = 1; /* level 1 */
+
+ RB_TEST(2, "deletehosts", ep)
+
+ if (MD_MNSET_DESC(sd)) {
+ md_mnnode_desc *saved_nd_next;
+ mddb_config_t c;
+
+ if (dd != NULL) {
+ /*
+ * Notify rpc.mdcommd on all nodes of a nodelist change.
+ * Start by suspending rpc.mdcommd (which drains it of
+ * all messages), then change the nodelist followed
+ * by a reinit and resume.
+ */
+ nd = sd->sd_nodelist;
+ while (nd) {
+ if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+ nd = nd->nd_next;
+ continue;
+ }
+ if (clnt_mdcommdctl(nd->nd_nodename,
+ COMMDCTL_SUSPEND, sp,
+ MD_MSG_CLASS0,
+ MD_MSCF_NO_FLAGS, ep)) {
+ rval = -1;
+ goto out2;
+ }
+ suspendall_flag = 1;
+ nd = nd->nd_next;
+ }
+ /*
+ * Is current set STALE?
+ * Need to know this if delete host fails and node
+ * is re-joined to diskset.
+ */
+ (void) memset(&c, 0, sizeof (c));
+ c.c_id = 0;
+ c.c_setno = sp->setno;
+ if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0) {
+ (void) mdstealerror(ep, &c.c_mde);
+ rval = -1;
+ goto out2;
+ }
+ if (c.c_flags & MDDB_C_STALE) {
+ stale_flag = MNSET_IS_STALE;
+ }
+ }
+
+ /*
+ * For each node being deleted, set DEL flag and
+ * reset OK flag on that node first.
+ * Until a node has turned off its own
+ * rpc.metad's NODE_OK flag, that node could be
+ * considered for master during a reconfig.
+ */
+ for (i = 0; i < node_c; i++) {
+ /*
+ * During OHA mode, don't issue RPCs to
+ * non-alive nodes since there is no reason to
+ * wait for RPC timeouts.
+ */
+ nd = sd->sd_nodelist;
+ while (nd) {
+ if (strcmp(nd->nd_nodename, node_v[i]) == 0)
+ break;
+ nd = nd->nd_next;
+ }
+ /* Something wrong, handle this in next loop */
+ if (nd == NULL)
+ continue;
+
+ /* If node_id_list is alloc'd, fill in for later use */
+ if (node_id_list)
+ node_id_list[i] = nd->nd_nodeid;
+
+ /* All nodes are guaranteed to be ALIVE unless OHA */
+ if ((oha == TRUE) &&
+ (!(nd->nd_flags & MD_MN_NODE_ALIVE))) {
+ continue;
+ }
+
+ /* Only changing my local cache of node list */
+ saved_nd_next = nd->nd_next;
+ nd->nd_next = NULL;
+
+ /* Set flags for del host to DEL on that host */
+ if (clnt_upd_nr_flags(node_v[i], sp,
+ nd, MD_NR_DEL, NULL, ep)) {
+ nd->nd_next = saved_nd_next;
+ goto rollback;
+ }
+ nd->nd_next = saved_nd_next;
+ }
+ for (i = 0; i < node_c; i++) {
+ /*
+ * Turn off owner flag in nodes to be deleted
+ * if this node has been joined.
+ * Also, turn off NODE_OK and turn on NODE_DEL
+ * for nodes to be deleted.
+ * These flags are used to set the node
+ * record flags in all nodes in the set.
+ * Only withdraw nodes that are joined.
+ */
+ nd = sd->sd_nodelist;
+ while (nd) {
+ /*
+ * Don't communicate with non-ALIVE node if
+ * in OHA - but set flags in master list so
+ * alive nodes are updated correctly.
+ */
+ if (strcmp(nd->nd_nodename, node_v[i]) == 0) {
+ if ((oha == TRUE) &&
+ (!(nd->nd_flags & MD_MN_NODE_ALIVE))) {
+ nd->nd_flags |= MD_MN_NODE_DEL;
+ nd->nd_flags &= ~MD_MN_NODE_OK;
+ nd = nd->nd_next;
+ continue;
+ }
+ if (nd->nd_flags & MD_MN_NODE_OWN) {
+ /*
+ * Going to set locally cached node
+ * flags to rollback join so in case
+ * of error, the rollback code knows
+ * which nodes to re-join.
+ * rpc.metad ignores the RB_JOIN flag.
+ */
+ nd->nd_flags |= MD_MN_NODE_RB_JOIN;
+ nd->nd_flags &= ~MD_MN_NODE_OWN;
+
+ /*
+ * Be careful in ordering of following
+ * steps so that recovery from a panic
+ * between the steps is viable.
+ * Only reset master info in rpc.metad
+ * - don't reset local cached info
+ * which will be used to set master
+ * info back if failure (rollback).
+ */
+ if (clnt_withdrawset(nd->nd_nodename,
+ sp, ep))
+ goto rollback;
+
+ /* Reset master on deleted node */
+ if (clnt_mnsetmaster(node_v[i], sp, "",
+ MD_MN_INVALID_NID, ep))
+ goto rollback;
+ }
+
+ nd->nd_flags |= MD_MN_NODE_DEL;
+ nd->nd_flags &= ~MD_MN_NODE_OK;
+ }
+ nd = nd->nd_next;
+ }
+ }
+
+ /*
+ * Now, reset owner and set delete flags for the
+ * deleted nodes on all nodes.
+ */
+ nd = sd->sd_nodelist;
+ while (nd) {
+ /* Skip non-ALIVE node if in OHA */
+ if ((oha == TRUE) &&
+ (!(nd->nd_flags & MD_MN_NODE_ALIVE))) {
+ nd = nd->nd_next;
+ continue;
+ }
+ if (clnt_upd_nr_flags(nd->nd_nodename, sp,
+ sd->sd_nodelist, MD_NR_SET, NULL, ep)) {
+ goto rollback;
+ }
+ nd = nd->nd_next;
+ }
+ /*
+ * Notify rpc.mdcommd on all nodes of a nodelist change.
+ * Send reinit command to mdcommd which forces it to get
+ * fresh set description.
+ */
+ if (suspendall_flag) {
+ /* Send reinit */
+ nd = sd->sd_nodelist;
+ while (nd) {
+ if ((oha == TRUE) &&
+ (!(nd->nd_flags & MD_MN_NODE_ALIVE))) {
+ nd = nd->nd_next;
+ continue;
+ }
+ /* Class is ignored for REINIT */
+ if (clnt_mdcommdctl(nd->nd_nodename,
+ COMMDCTL_REINIT,
+ sp, NULL, MD_MSCF_NO_FLAGS, ep)) {
+ mde_perror(ep, dgettext(TEXT_DOMAIN,
+ "Unable to reinit rpc.mdcommd.\n"));
+ goto rollback;
+ }
+ nd = nd->nd_next;
+ }
+ /* Send resume */
+ nd = sd->sd_nodelist;
+ while (nd) {
+ if ((oha == TRUE) &&
+ (!(nd->nd_flags & MD_MN_NODE_ALIVE))) {
+ nd = nd->nd_next;
+ continue;
+ }
+ if (clnt_mdcommdctl(nd->nd_nodename,
+ COMMDCTL_RESUME, sp, MD_MSG_CLASS0,
+ MD_MSCF_DONT_RESUME_CLASS1, ep)) {
+ mde_perror(ep, dgettext(TEXT_DOMAIN,
+ "Unable to resume rpc.mdcommd.\n"));
+ goto rollback;
+ }
+ nd = nd->nd_next;
+ }
+ meta_ping_mnset(sp->setno);
+ }
+ }
+
+
+ /*
+ * Mark the set record MD_SR_DEL on the hosts we are deleting
+ * If a MN diskset and OHA mode, don't issue RPC to nodes that
+ * are not ALIVE.
+ * If a MN diskset and not in OHA mode, then all nodes must respond
+ * to RPC (be alive) or this routine will return failure.
+ * If a traditional diskset, all RPC failures if in OHA mode.
+ */
+ for (i = 0; i < node_c; i++) {
+
+ RB_TEST(3, "deletehosts", ep)
+
+ if ((MD_MNSET_DESC(sd)) && (oha == TRUE)) {
+ /*
+ * During OHA mode, don't issue RPCs to
+ * non-alive nodes since there is no reason to
+ * wait for RPC timeouts.
+ */
+ nd = sd->sd_nodelist;
+ while (nd) {
+ if (strcmp(nd->nd_nodename, node_v[i]) == 0) {
+ break;
+ }
+ nd = nd->nd_next;
+ }
+ if (nd == NULL) {
+ (void) mddserror(ep, MDE_DS_NODENOTINSET,
+ sp->setno, node_v[i], NULL, sp->setname);
+ goto rollback;
+ } else if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+ /* Skip non-ALIVE node if in OHA mode */
+ continue;
+ } else {
+ if (clnt_upd_sr_flags(node_v[i], sp,
+ MD_SR_DEL, ep)) {
+ goto rollback;
+ }
+ }
+ } else if ((MD_MNSET_DESC(sd)) && (oha == FALSE)) {
+ /*
+ * All nodes should be alive in non-oha mode.
+ */
+ if (clnt_upd_sr_flags(node_v[i], sp, MD_SR_DEL, ep)) {
+ goto rollback;
+ }
+ } else {
+ /*
+ * For traditional diskset, issue the RPC and
+ * ignore RPC failure if in OHA mode.
+ */
+ if (clnt_upd_sr_flags(node_v[i], sp, MD_SR_DEL, ep)) {
+ if (oha == TRUE && mdanyrpcerror(ep)) {
+ mdclrerror(ep);
+ continue;
+ }
+ goto rollback;
+ }
+ }
+
+ RB_TEST(4, "deletehosts", ep)
+ }
+
+ RB_TEST(5, "deletehosts", ep)
+
+ RB_PREEMPT;
+ rb_level = 2; /* level 2 */
+
+ RB_TEST(6, "deletehosts", ep)
+
+ /* Delete the set on the hosts we are deleting */
+ if (del_set_on_hosts(sp, sd, dd, node_c, node_v, oha, ep)) {
+ if (node_id_list)
+ Free(node_id_list);
+ /*
+ * Failure during del_set_on_hosts would have recreated
+ * the diskset on the remote hosts, but for multi-owner
+ * disksets need to set node flags properly and REINIT and
+ * RESUME rpc.mdcommd, so just let the rollback code
+ * do this.
+ */
+ if (MD_MNSET_DESC(sd))
+ goto rollback;
+ return (-1);
+ }
+ remote_sets_deleted = 1;
+
+ RB_TEST(19, "deletehosts", ep)
+
+ RB_PREEMPT;
+ rb_level = 3; /* level 3 */
+
+ RB_TEST(20, "deletehosts", ep)
+
+ /* Delete the host from sets on hosts not being deleted */
+ if (MD_MNSET_DESC(sd)) {
+ nd = sd->sd_nodelist;
+ /* All nodes are guaranteed to be ALIVE unless in oha mode */
+ while (nd) {
+ /*
+ * During OHA mode, don't issue RPCs to
+ * non-alive nodes since there is no reason to
+ * wait for RPC timeouts.
+ */
+ if ((oha == TRUE) &&
+ (!(nd->nd_flags & MD_MN_NODE_ALIVE))) {
+ nd = nd->nd_next;
+ continue;
+ }
+
+ /* Skip nodes being deleted */
+ if (strinlst(nd->nd_nodename, node_c, node_v)) {
+ nd = nd->nd_next;
+ continue;
+ }
+ if (clnt_delhosts(nd->nd_nodename, sp, node_c, node_v,
+ ep) == -1) {
+ goto rollback;
+ }
+
+ RB_TEST(21, "deletehosts", ep)
+ nd = nd->nd_next;
+ }
+ } else {
+ for (i = 0; i < MD_MAXSIDES; i++) {
+ /* Skip empty slots */
+ if (sd->sd_nodes[i][0] == '\0')
+ continue;
+
+ /* Skip nodes being deleted */
+ if (strinlst(sd->sd_nodes[i], node_c, node_v))
+ continue;
+
+ if (clnt_delhosts(sd->sd_nodes[i], sp, node_c, node_v,
+ ep) == -1) {
+ if (oha == TRUE && mdanyrpcerror(ep)) {
+ mdclrerror(ep);
+ continue;
+ }
+ goto rollback;
+ }
+
+ RB_TEST(21, "deletehosts", ep)
+ }
+ }
+
+ /* We have drives */
+ if (dd != NULL) {
+ RB_TEST(22, "deletehosts", ep)
+
+ RB_PREEMPT;
+ rb_level = 4; /* level 4 */
+
+ RB_TEST(23, "deletehosts", ep)
+
+ /*
+ * Delete the old sidename for each drive on all the hosts.
+ * If a multi-node diskset, each host only stores
+ * the side information for itself. So, a multi-node
+ * diskset doesn't delete the old sidename for
+ * an old host.
+ *
+ * If a MN diskset, reset owners of mirrors that are
+ * owned by the deleted nodes.
+ */
+ if (!(MD_MNSET_DESC(sd))) {
+ for (i = 0; i < MD_MAXSIDES; i++) {
+ /* Skip empty slots */
+ if (sd->sd_nodes[i][0] == '\0')
+ continue;
+
+ /* Skip nodes being deleted */
+ if (strinlst(sd->sd_nodes[i], node_c, node_v))
+ continue;
+
+ if (clnt_del_drv_sidenms(sd->sd_nodes[i], sp,
+ ep)) {
+ if (oha == TRUE && mdanyrpcerror(ep)) {
+ mdclrerror(ep);
+ continue;
+ }
+ metaflushsetname(sp);
+ goto rollback;
+ }
+
+ RB_TEST(24, "deletehosts", ep)
+ }
+ } else {
+ nd = sd->sd_nodelist;
+ /* All nodes guaranteed to be ALIVE unless in oha mode */
+ while (nd) {
+ /*
+ * If mirror owner was set to a deleted node, then
+ * each existing node resets mirror owner to NULL.
+ *
+ * During OHA mode, don't issue RPCs to
+ * non-alive nodes since there is no reason to
+ * wait for RPC timeouts.
+ */
+ if ((oha == TRUE) &&
+ (!(nd->nd_flags & MD_MN_NODE_ALIVE))) {
+ nd = nd->nd_next;
+ continue;
+ }
+
+ /* Skip nodes being deleted */
+ if (strinlst(nd->nd_nodename, node_c, node_v)) {
+ nd = nd->nd_next;
+ continue;
+ }
+
+ /*
+ * If mirror owner is a deleted node, reset mirror
+ * owners to NULL. If an error occurs, print a
+ * warning and continue. Don't fail metaset
+ * because of mirror owner reset problem since next
+ * node to grab mirror will resolve this issue.
+ * Before next node grabs mirrors, metaset will show
+ * the deleted node as owner which is why an attempt
+ * to reset the mirror owner is made.
+ */
+ if (clnt_reset_mirror_owner(nd->nd_nodename, sp,
+ node_c, &node_id_list[0], &xep) == -1) {
+ mde_perror(&xep, dgettext(TEXT_DOMAIN,
+ "Unable to reset mirror owner on"
+ " node %s\n"), nd->nd_nodename);
+ mdclrerror(&xep);
+ }
+
+ RB_TEST(21, "deletehosts", ep)
+ nd = nd->nd_next;
+ }
+ }
+ }
+
+ RB_TEST(25, "deletehosts", ep)
+
+ RB_PREEMPT;
+ rb_level = 4; /* level 4 */
+
+ RB_TEST(26, "deletehosts", ep)
+
+ /*
+ * Bring the mediator record up to date with the set record for
+ * traditional diskset.
+ */
+ if (!(MD_MNSET_DESC(sd))) {
+ medr = rb_medr; /* structure assignment */
+ for (i = 0; i < MD_MAXSIDES; i++) {
+ if (strinlst(sd->sd_nodes[i], node_c, node_v))
+ (void) memset(&medr.med_rec_nodes[i],
+ '\0', sizeof (md_node_nm_t));
+ else
+ (void) strcpy(medr.med_rec_nodes[i],
+ sd->sd_nodes[i]);
+ }
+ crcgen(&medr, &medr.med_rec_cks, sizeof (med_rec_t), NULL);
+
+ /* Inform the mediator hosts of the new node list */
+ for (i = 0; i < max_meds; i++) {
+ if (sd->sd_med.n_lst[i].a_cnt == 0)
+ continue;
+
+ if (clnt_med_upd_rec(&sd->sd_med.n_lst[i], sp,
+ &medr, ep)) {
+ if (oha == TRUE && mdanyrpcerror(ep)) {
+ mdclrerror(ep);
+ continue;
+ }
+ goto rollback;
+ }
+ }
+ }
+
+ RB_TEST(27, "deletehosts", ep)
+
+ /*
+ * For traditional diskset:
+ * We are deleting ourselves out of the set and we have drives to
+ * consider; so we need to halt the set, release the drives and
+ * reset the timeout. **** THIS IS A ONE WAY TICKET, NO ROLL BACK
+ * IS POSSIBLE AS SOON AS THE HALT SET COMPLETES, SO THIS IS DONE
+ * WITH ALL SIGNALS BLOCKED AND LAST ****
+ *
+ * This situation cannot occur in a MN diskset since a node can't
+ * delete itself unless all nodes are being deleted and a diskset
+ * cannot contain any drives if all nodes are being deleted.
+ * So, don't even test for this if a MN diskset.
+ */
+ if (!(MD_MNSET_DESC(sd)) && (dd != NULL) &&
+ strinlst(mynode(), node_c, node_v)) {
+ /* Make sure we are blocking all signals */
+ if (procsigs(TRUE, &oldsigs, ep) < 0) {
+ rval = -1;
+ goto out1;
+ }
+
+ if (halt_set(sp, ep)) {
+ rval = -1;
+ goto out1;
+ }
+
+ if (rel_own_bydd(sp, dd, FALSE, ep))
+ rval = -1;
+
+out1:
+ /* release signals back to what they were on entry */
+ if (procsigs(FALSE, &oldsigs, &xep) < 0) {
+ if (rval == 0)
+ (void) mdstealerror(ep, &xep);
+ rval = -1;
+ }
+ }
+
+out2:
+ /*
+ * Unlock diskset by resuming messages across the diskset.
+ * Just resume all classes so that resume is the same whether
+ * just one class was locked or all classes were locked.
+ */
+ if ((suspend1_flag) || (suspendall_flag)) {
+ /* Send resume */
+ nd = sd->sd_nodelist;
+ while (nd) {
+ if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+ nd = nd->nd_next;
+ continue;
+ }
+ /*
+ * Skip nodes being deleted if remote set
+ * was deleted since rpc.mdcommd may no longer
+ * be running on remote node.
+ */
+ if ((remote_sets_deleted == 1) &&
+ (strinlst(nd->nd_nodename, node_c, node_v))) {
+ nd = nd->nd_next;
+ continue;
+ }
+ if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_RESUME,
+ sp, MD_MSG_CLASS0, MD_MSCF_NO_FLAGS, &xep)) {
+ if (rval == 0)
+ (void) mdstealerror(ep, &xep);
+ rval = -1;
+ mde_perror(ep, dgettext(TEXT_DOMAIN,
+ "Unable to resume rpc.mdcommd.\n"));
+ }
+ nd = nd->nd_next;
+ }
+ meta_ping_mnset(sp->setno);
+ }
+
+ cl_sk = cl_get_setkey(sp->setno, sp->setname);
+ if (lock_flag) {
+ if (MD_MNSET_DESC(sd)) {
+ nd = sd->sd_nodelist;
+ while (nd) {
+ /*
+ * During OHA mode, don't issue RPCs to
+ * non-alive nodes since there is no reason to
+ * wait for RPC timeouts.
+ */
+ if ((oha == TRUE) &&
+ (!(nd->nd_flags & MD_MN_NODE_ALIVE))) {
+ nd = nd->nd_next;
+ continue;
+ }
+ if (clnt_unlock_set(nd->nd_nodename,
+ cl_sk, &xep)) {
+ if (rval == 0)
+ (void) mdstealerror(ep, &xep);
+ rval = -1;
+ }
+ nd = nd->nd_next;
+ }
+ } else {
+ for (i = 0; i < MD_MAXSIDES; i++) {
+ /* Skip empty slots */
+ if (sd->sd_nodes[i][0] == '\0')
+ continue;
+
+ if (clnt_unlock_set(sd->sd_nodes[i],
+ cl_sk, &xep)) {
+ if (oha == TRUE &&
+ mdanyrpcerror(&xep)) {
+ mdclrerror(&xep);
+ continue;
+ }
+ if (rval == 0)
+ (void) mdstealerror(ep, &xep);
+ rval = -1;
+ }
+ }
+ }
+ }
+ cl_set_setkey(NULL);
+
+out3:
+ metafreereplicalist(rlp);
+ if (node_id_list)
+ Free(node_id_list);
+
+ metaflushsetname(sp);
+
+ if (MD_MNSET_DESC(sd)) {
+ /* release signals back to what they were on entry */
+ if (procsigs(FALSE, &oldsigs, &xep) < 0)
+ mdclrerror(&xep);
+ } else {
+ md_rb_sig_handling_off(md_got_sig(), md_which_sig());
+ }
+
+
+ return (rval);
+
+rollback:
+ /* all signals already blocked for MN disket */
+ if (!(MD_MNSET_DESC(sd))) {
+ if (procsigs(TRUE, &oldsigs, &xep) < 0)
+ mdclrerror(&xep);
+ }
+
+ rval = -1;
+
+ max_genid = sd->sd_genid;
+
+
+ /*
+ * Send reinit command to rpc.mdcommd which forces it to get
+ * fresh set description and resume all classes but class 0.
+ * Don't send any commands to rpc.mdcommd if set on that node
+ * has been removed.
+ */
+ if (suspendall_flag) {
+ /* Send reinit */
+ nd = sd->sd_nodelist;
+ while (nd) {
+ if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+ nd = nd->nd_next;
+ continue;
+ }
+ /*
+ * If the remote set was deleted, rpc.mdcommd
+ * may no longer be running so send nothing to it.
+ */
+ if ((remote_sets_deleted == 1) &&
+ (strinlst(nd->nd_nodename, node_c, node_v))) {
+ nd = nd->nd_next;
+ continue;
+ }
+ /* Class is ignored for REINIT */
+ if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_REINIT,
+ sp, NULL, MD_MSCF_NO_FLAGS, &xep)) {
+ mde_perror(&xep, dgettext(TEXT_DOMAIN,
+ "Unable to reinit rpc.mdcommd.\n"));
+ mdclrerror(&xep);
+ }
+ nd = nd->nd_next;
+ }
+ /* Send resume */
+ nd = sd->sd_nodelist;
+ while (nd) {
+ if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+ nd = nd->nd_next;
+ continue;
+ }
+ /*
+ * If the remote set was deleted, rpc.mdcommd
+ * may no longer be running so send nothing to it.
+ */
+ if ((remote_sets_deleted == 1) &&
+ (strinlst(nd->nd_nodename, node_c, node_v))) {
+ nd = nd->nd_next;
+ continue;
+ }
+ if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_RESUME,
+ sp, MD_MSG_CLASS0, MD_MSCF_DONT_RESUME_CLASS1,
+ &xep)) {
+ mde_perror(&xep, dgettext(TEXT_DOMAIN,
+ "Unable to resume rpc.mdcommd.\n"));
+ mdclrerror(&xep);
+ }
+ nd = nd->nd_next;
+ }
+ meta_ping_mnset(sp->setno);
+ }
+
+ /* level 2 */
+ if (rb_level > 1) {
+ md_set_record *sr;
+ md_replicalist_t *rl;
+
+ recreate_set(sp, sd);
+
+ /*
+ * Lock out other meta* commands on nodes with the newly
+ * re-created sets by suspending class 1 messages
+ * across the diskset.
+ */
+ nd = sd->sd_nodelist;
+ while (nd) {
+ /* Skip nodes not being deleted */
+ if (!(strinlst(nd->nd_nodename, node_c, node_v))) {
+ nd = nd->nd_next;
+ continue;
+ }
+ /* Suspend commd on nodes with re-created sets */
+ if (clnt_mdcommdctl(nd->nd_nodename,
+ COMMDCTL_SUSPEND, sp, MD_MSG_CLASS1,
+ MD_MSCF_NO_FLAGS, &xep)) {
+ mde_perror(&xep, dgettext(TEXT_DOMAIN,
+ "Unable to suspend rpc.mdcommd.\n"));
+ mdclrerror(&xep);
+ }
+ nd = nd->nd_next;
+ }
+
+ max_genid++;
+
+ /*
+ * See if we have to re-add the drives specified.
+ */
+ for (i = 0; i < node_c; i++) {
+ if (MD_MNSET_DESC(sd) && (oha == TRUE)) {
+ /*
+ * During OHA mode, don't issue RPCs to
+ * non-alive nodes since there is no reason to
+ * wait for RPC timeouts.
+ */
+ nd = sd->sd_nodelist;
+ while (nd) {
+ if (strcmp(nd->nd_nodename, node_v[i])
+ == 0) {
+ break;
+ }
+ nd = nd->nd_next;
+ }
+ if (nd == 0)
+ continue;
+ if (!(nd->nd_flags & MD_MN_NODE_ALIVE))
+ continue;
+ }
+
+ /* Don't care if set record is MN or not */
+ if (clnt_getset(node_v[i], sp->setname, MD_SET_BAD, &sr,
+ &xep) == -1) {
+ mdclrerror(&xep);
+ continue;
+ }
+
+ /* Drive already added, skip to next node */
+ if (sr->sr_drivechain != NULL) {
+ /*
+ * Set record structure was allocated from RPC
+ * routine getset so this structure is only of
+ * size md_set_record even if the MN flag is
+ * set. So, clear the flag so that the free
+ * code doesn't attempt to free a structure
+ * the size of md_mnset_record.
+ */
+ sr->sr_flags &= ~MD_SR_MN;
+ free_sr(sr);
+ continue;
+ }
+
+ if (clnt_adddrvs(node_v[i], sp, dd, sr->sr_ctime,
+ sr->sr_genid, &xep) == -1)
+ mdclrerror(&xep);
+
+ if (clnt_upd_dr_flags(node_v[i], sp, dd, MD_DR_OK,
+ &xep) == -1)
+ mdclrerror(&xep);
+
+ /*
+ * Set record structure was allocated from RPC routine
+ * getset so this structure is only of size
+ * md_set_record even if the MN flag is set. So,
+ * clear the flag so that the free code doesn't
+ * attempt to free a structure the size of
+ * md_mnset_record.
+ */
+ sr->sr_flags &= ~MD_SR_MN;
+ free_sr(sr);
+ }
+ max_genid += 3;
+
+ for (rl = rlp; rl != NULL; rl = rl->rl_next) {
+ md_replica_t *r = rl->rl_repp;
+ /*
+ * This is not the first replica being added to the
+ * diskset so call with ADDSIDENMS_BCAST. If this
+ * is a traditional diskset, the bcast flag is ignored
+ * since traditional disksets don't use the rpc.mdcommd.
+ */
+ if (meta_db_addsidenms(sp, r->r_namep, r->r_blkno,
+ DB_ADDSIDENMS_BCAST, &xep))
+ mdclrerror(&xep);
+ }
+
+ /*
+ * Add the device names for the new sides into the namespace,
+ * on all hosts not being deleted.
+ */
+ if (MD_MNSET_DESC(sd)) {
+ nd = sd->sd_nodelist;
+ while (nd) {
+ /* Find a node that is not being deleted */
+ if (!strinlst(nd->nd_nodename, node_c,
+ node_v)) {
+ j = nd->nd_nodeid;
+ break;
+ }
+ nd = nd->nd_next;
+ }
+ } else {
+ for (j = 0; j < MD_MAXSIDES; j++) {
+ /* Skip empty slots */
+ if (sd->sd_nodes[j][0] == '\0')
+ continue;
+
+ /* Find a node that is not being deleted */
+ if (!strinlst(sd->sd_nodes[j], node_c, node_v))
+ break;
+ }
+ }
+
+ if (MD_MNSET_DESC(sd)) {
+ nd = sd->sd_nodelist;
+ while (nd) {
+ /* Skip nodes not being deleted */
+ if (!strinlst(nd->nd_nodename, node_c,
+ node_v)) {
+ nd = nd->nd_next;
+ continue;
+ }
+
+ /* this side was just created, add the names */
+ if (add_md_sidenms(sp, nd->nd_nodeid, j, &xep))
+ mdclrerror(&xep);
+ nd = nd->nd_next;
+ }
+ } else {
+ for (i = 0; i < MD_MAXSIDES; i++) {
+ /* Skip empty slots */
+ if (sd->sd_nodes[i][0] == '\0')
+ continue;
+
+ /* Skip nodes not being deleted */
+ if (!strinlst(sd->sd_nodes[i], node_c, node_v))
+ continue;
+
+ /* this side was just created, add the names */
+ if (add_md_sidenms(sp, i, j, &xep))
+ mdclrerror(&xep);
+ }
+ }
+ }
+
+ /* level 4 */
+ if (rb_level > 3 && dd != NULL) {
+ /*
+ * Add the new sidename for each drive to all the hosts
+ * Multi-node disksets only store the sidename for
+ * that host, so there is nothing to re-add.
+ */
+ if (!(MD_MNSET_DESC(sd))) {
+ for (j = 0; j < MD_MAXSIDES; j++) {
+ /* Skip empty slots */
+ if (sd->sd_nodes[j][0] == '\0')
+ continue;
+
+ /* Skip nodes not being deleted */
+ if (!strinlst(sd->sd_nodes[j], node_c, node_v))
+ break;
+ }
+ for (i = 0; i < MD_MAXSIDES; i++) {
+ /* Skip empty slots */
+ if (sd->sd_nodes[i][0] == '\0')
+ continue;
+
+ if (clnt_add_drv_sidenms(sd->sd_nodes[i],
+ sd->sd_nodes[j], sp, sd, node_c, node_v,
+ &xep))
+ mdclrerror(&xep);
+ }
+ }
+
+ }
+
+ /* level 5 */
+ if ((rb_level > 4) && (!(MD_MNSET_DESC(sd)))) {
+ /* rollback the mediator record */
+ for (i = 0; i < max_meds; i++) {
+ if (sd->sd_med.n_lst[i].a_cnt == 0)
+ continue;
+
+ if (clnt_med_upd_rec(&sd->sd_med.n_lst[i], sp,
+ &rb_medr, &xep))
+ mdclrerror(&xep);
+ }
+ }
+
+ /* level 3 */
+ if (rb_level > 2) {
+ md_set_record *sr;
+ md_mnset_record *mnsr;
+
+ if (MD_MNSET_DESC(sd)) {
+ nd = sd->sd_nodelist;
+ /*
+ * During OHA mode, don't issue RPCs to
+ * non-alive nodes since there is no reason to
+ * wait for RPC timeouts.
+ */
+ while (nd) {
+ if ((oha == TRUE) &&
+ (!(nd->nd_flags & MD_MN_NODE_ALIVE))) {
+ nd = nd->nd_next;
+ continue;
+ }
+ /* Record should be for a multi-node diskset */
+ if (clnt_mngetset(nd->nd_nodename, sp->setname,
+ MD_SET_BAD, &mnsr, &xep) == -1) {
+ mdclrerror(&xep);
+ nd = nd->nd_next;
+ continue;
+ }
+
+ has_set = 1;
+
+ nr = mnsr->sr_nodechain;
+ while (nr) {
+ if (nd->nd_nodeid == nr->nr_nodeid) {
+ break;
+ }
+ nr = nr->nr_next;
+ }
+ if (nr == NULL)
+ has_set = 0;
+
+ free_sr((struct md_set_record *)mnsr);
+ if (has_set) {
+ nd = nd->nd_next;
+ continue;
+ }
+
+ if (clnt_addhosts(nd->nd_nodename, sp, node_c,
+ node_v, &xep) == -1)
+ mdclrerror(&xep);
+
+ nd = nd->nd_next;
+ }
+ } else {
+ for (i = 0; i < MD_MAXSIDES; i++) {
+ /* Skip empty slots */
+ if (sd->sd_nodes[i][0] == '\0')
+ continue;
+
+ /* Record should be for a non-multi-node set */
+ if (clnt_getset(sd->sd_nodes[i], sp->setname,
+ MD_SET_BAD, &sr, &xep) == -1) {
+ mdclrerror(&xep);
+ continue;
+ }
+
+ /*
+ * Set record structure was allocated from RPC
+ * routine getset so this structure is only of
+ * size md_set_record even if the MN flag is
+ * set. So, clear the flag so that the free
+ * code doesn't attempt to free a structure
+ * the size of md_mnset_record.
+ */
+ if (MD_MNSET_REC(sr)) {
+ sr->sr_flags &= ~MD_SR_MN;
+ free_sr(sr);
+ continue;
+ }
+
+ has_set = 1;
+ for (j = 0; j < MD_MAXSIDES; j++) {
+ /* Skip empty slots */
+ if (sd->sd_nodes[j][0] == '\0')
+ continue;
+
+ if (sr->sr_nodes[j][0] == '\0') {
+ has_set = 0;
+ break;
+ }
+ }
+
+ free_sr(sr);
+ if (has_set)
+ continue;
+
+ if (clnt_addhosts(sd->sd_nodes[i], sp, node_c,
+ node_v, &xep) == -1)
+ mdclrerror(&xep);
+ }
+ }
+ max_genid++;
+ }
+
+ /* level 1 */
+ if (rb_level > 0) {
+ max_genid++;
+ /* Sets MD_SR_OK on given nodes. */
+ resync_genid(sp, sd, max_genid, node_c, node_v);
+
+ /*
+ * For MN diskset:
+ * On each newly re-added node, set the node record for that
+ * node to OK. Then set all node records for the newly added
+ * nodes on all nodes to ok.
+ *
+ * By setting a node's own node record to ok first, even if
+ * the node re-adding the hosts panics, the rest of the nodes
+ * can determine the same node list during the choosing of the
+ * master during reconfig. So, only nodes considered for
+ * mastership are nodes that have both MD_MN_NODE_OK and
+ * MD_SR_OK set on that node's rpc.metad. If all nodes have
+ * MD_SR_OK set, but no node has its own MD_MN_NODE_OK set,
+ * then the set will be removed during reconfig since a panic
+ * occurred during the re-creation of the deletion of
+ * the initial diskset.
+ */
+ if (MD_MNSET_DESC(sd)) {
+ md_mnnode_desc *saved_nd_next;
+ if (dd != NULL) {
+ /*
+ * Notify rpc.mdcommd on all nodes of a
+ * nodelist change. Start by suspending
+ * rpc.mdcommd (which drains it of all
+ * messages), then change the nodelist
+ * followed by a reinit and resume.
+ */
+ nd = sd->sd_nodelist;
+ while (nd) {
+ if (!(nd->nd_flags &
+ MD_MN_NODE_ALIVE)) {
+ nd = nd->nd_next;
+ continue;
+ }
+ if (clnt_mdcommdctl(nd->nd_nodename,
+ COMMDCTL_SUSPEND, sp,
+ MD_MSG_CLASS0,
+ MD_MSCF_NO_FLAGS, &xep)) {
+ mde_perror(&xep,
+ dgettext(TEXT_DOMAIN,
+ "Unable to suspend "
+ "rpc.mdcommd.\n"));
+ mdclrerror(&xep);
+ }
+ suspendall_flag_rb = 1;
+ nd = nd->nd_next;
+ }
+ }
+ for (i = 0; i < node_c; i++) {
+ /*
+ * During OHA mode, don't issue RPCs to
+ * non-alive nodes since there is no reason to
+ * wait for RPC timeouts.
+ */
+ nd = sd->sd_nodelist;
+ while (nd) {
+ if (strcmp(nd->nd_nodename, node_v[i])
+ == 0)
+ break;
+ nd = nd->nd_next;
+ }
+ /* Something wrong, finish this in next loop */
+ if (nd == NULL)
+ continue;
+
+ if ((oha == TRUE) &&
+ (!(nd->nd_flags & MD_MN_NODE_ALIVE))) {
+ continue;
+ }
+
+ if (dd != NULL) {
+ /* Set master on re-joining node. */
+ if (clnt_mnsetmaster(node_v[i], sp,
+ sd->sd_mn_master_nodenm,
+ sd->sd_mn_master_nodeid, &xep)) {
+ mdclrerror(&xep);
+ }
+
+ /*
+ * Re-join set to same state as
+ * before - stale or non-stale.
+ */
+ if (clnt_joinset(node_v[i], sp,
+ stale_flag, &xep)) {
+ mdclrerror(&xep);
+ }
+ }
+
+ /* Only changing my local cache of node list */
+ saved_nd_next = nd->nd_next;
+ nd->nd_next = NULL;
+
+ /* Set record for host to ok on that host */
+ if (clnt_upd_nr_flags(node_v[i], sp,
+ nd, MD_NR_OK, NULL, &xep)) {
+ mdclrerror(&xep);
+ }
+ nd->nd_next = saved_nd_next;
+ }
+
+ /* Now set all node records on all nodes to be ok */
+ nd = sd->sd_nodelist;
+ while (nd) {
+ /*
+ * During OHA mode, don't issue RPCs to
+ * non-alive nodes since there is no reason to
+ * wait for RPC timeouts.
+ */
+ if ((oha == TRUE) &&
+ (!(nd->nd_flags & MD_MN_NODE_ALIVE))) {
+ nd = nd->nd_next;
+ continue;
+ }
+ if (clnt_upd_nr_flags(nd->nd_nodename, sp,
+ sd->sd_nodelist, MD_NR_OK, NULL, &xep)) {
+ mdclrerror(&xep);
+ }
+ nd = nd->nd_next;
+ }
+ }
+ }
+
+ /*
+ * Notify rpc.mdcommd on all nodes of a nodelist change.
+ * Send reinit command to mdcommd which forces it to get
+ * fresh set description.
+ */
+ if (suspendall_flag_rb) {
+ /* Send reinit */
+ nd = sd->sd_nodelist;
+ while (nd) {
+ if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+ nd = nd->nd_next;
+ continue;
+ }
+
+ /* Class is ignored for REINIT */
+ if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_REINIT,
+ sp, NULL, MD_MSCF_NO_FLAGS, &xep)) {
+ mde_perror(&xep, dgettext(TEXT_DOMAIN,
+ "Unable to reinit rpc.mdcommd.\n"));
+ mdclrerror(&xep);
+ }
+ nd = nd->nd_next;
+ }
+ }
+
+ /*
+ * Unlock diskset by resuming messages across the diskset.
+ * Just resume all classes so that resume is the same whether
+ * just one class was locked or all classes were locked.
+ */
+ if ((suspend1_flag) || (suspendall_flag) || (suspendall_flag_rb)) {
+ /* Send resume */
+ nd = sd->sd_nodelist;
+ while (nd) {
+ if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+ nd = nd->nd_next;
+ continue;
+ }
+ if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_RESUME,
+ sp, MD_MSG_CLASS0, MD_MSCF_NO_FLAGS, &xep)) {
+ mde_perror(&xep, dgettext(TEXT_DOMAIN,
+ "Unable to resume rpc.mdcommd.\n"));
+ }
+ nd = nd->nd_next;
+ }
+ meta_ping_mnset(sp->setno);
+ }
+
+ /*
+ * Start a resync thread on the re-added nodes
+ * if set is not stale. Also start a thread to update the
+ * abr state of all soft partitions
+ */
+ if (stale_flag != MNSET_IS_STALE) {
+ for (i = 0; i < node_c; i++) {
+ /*
+ * During OHA mode, don't issue RPCs to
+ * non-alive nodes since there is no reason to
+ * wait for RPC timeouts.
+ */
+ nd = sd->sd_nodelist;
+ while (nd) {
+ if (strcmp(nd->nd_nodename, node_v[i])
+ == 0)
+ break;
+ nd = nd->nd_next;
+ }
+ if (nd == NULL)
+ continue;
+
+ if ((oha == TRUE) &&
+ (!(nd->nd_flags & MD_MN_NODE_ALIVE))) {
+ continue;
+ }
+
+ if (dd != 0) {
+ if (clnt_mn_mirror_resync_all(node_v[i],
+ sp->setno, &xep)) {
+ mde_perror(ep, dgettext(TEXT_DOMAIN,
+ "Unable to start resync "
+ "thread.\n"));
+ }
+ if (clnt_mn_sp_update_abr(node_v[i],
+ sp->setno, &xep)) {
+ mde_perror(ep, dgettext(TEXT_DOMAIN,
+ "Unable to start sp update "
+ "thread.\n"));
+ }
+ }
+ }
+ }
+
+ /* level 0 */
+ cl_sk = cl_get_setkey(sp->setno, sp->setname);
+ /* Don't test lock flag since guaranteed to be set if in rollback */
+ if (MD_MNSET_DESC(sd)) {
+ nd = sd->sd_nodelist;
+ while (nd) {
+ /*
+ * During OHA mode, don't issue RPCs to
+ * non-alive nodes since there is no reason to
+ * wait for RPC timeouts.
+ */
+ if ((oha == TRUE) &&
+ (!(nd->nd_flags & MD_MN_NODE_ALIVE))) {
+ nd = nd->nd_next;
+ continue;
+ }
+ if (clnt_unlock_set(nd->nd_nodename, cl_sk, &xep))
+ mdclrerror(&xep);
+ nd = nd->nd_next;
+ }
+ } else {
+ for (i = 0; i < MD_MAXSIDES; i++) {
+ /* Skip empty slots */
+ if (sd->sd_nodes[i][0] == '\0')
+ continue;
+
+ if (clnt_unlock_set(sd->sd_nodes[i], cl_sk, &xep))
+ mdclrerror(&xep);
+ }
+ }
+ cl_set_setkey(NULL);
+
+ /* release signals back to what they were on entry */
+ if (procsigs(FALSE, &oldsigs, &xep) < 0)
+ mdclrerror(&xep);
+
+ metafreereplicalist(rlp);
+ if (node_id_list)
+ Free(node_id_list);
+
+ metaflushsetname(sp);
+
+ if (!(MD_MNSET_DESC(sd))) {
+ md_rb_sig_handling_off(md_got_sig(), md_which_sig());
+ }
+
+ return (rval);
+}
+
+int
+meta_set_auto_take(
+ mdsetname_t *sp,
+ int take_val,
+ md_error_t *ep
+)
+{
+ int i;
+ md_set_desc *sd;
+ int rval = 0;
+ md_setkey_t *cl_sk;
+ md_error_t xep = mdnullerror;
+ char *hostname;
+ md_drive_desc *dd;
+
+ if ((sd = metaget_setdesc(sp, ep)) == NULL)
+ return (-1);
+
+ /* Make sure we own the set */
+ if (meta_check_ownership(sp, ep) != 0)
+ return (-1);
+
+ hostname = mynode();
+
+ /* Lock the set on our side */
+ if (clnt_lock_set(hostname, sp, ep)) {
+ rval = -1;
+ goto out;
+ }
+
+ if (take_val) {
+ /* enable auto_take but only if it is not already set */
+ if (! (sd->sd_flags & MD_SR_AUTO_TAKE)) {
+ /* verify that we're the only host in the set */
+ for (i = 0; i < MD_MAXSIDES; i++) {
+ if (sd->sd_nodes[i] == NULL || sd->sd_nodes[i][0] == '\0')
+ continue;
+
+ if (strcmp(sd->sd_nodes[i], hostname) != 0) {
+ (void) mddserror(ep, MDE_DS_SINGLEHOST, sp->setno, NULL,
+ NULL, sp->setname);
+ rval = -1;
+ goto out;
+ }
+ }
+
+ if (clnt_enable_sr_flags(hostname, sp, MD_SR_AUTO_TAKE, ep))
+ rval = -1;
+
+ /* Disable SCSI reservations */
+ if (sd->sd_flags & MD_SR_MB_DEVID)
+ dd = metaget_drivedesc(sp, MD_BASICNAME_OK | PRINT_FAST,
+ &xep);
+ else
+ dd = metaget_drivedesc(sp, MD_BASICNAME_OK, &xep);
+ if (! mdisok(&xep))
+ mdclrerror(&xep);
+
+ if (dd != NULL) {
+ if (rel_own_bydd(sp, dd, TRUE, &xep))
+ mdclrerror(&xep);
+ }
+ }
+
+ } else {
+ /* disable auto_take, if set, or error */
+ if (sd->sd_flags & MD_SR_AUTO_TAKE) {
+ if (clnt_disable_sr_flags(hostname, sp, MD_SR_AUTO_TAKE, ep))
+ rval = -1;
+
+ /* Enable SCSI reservations */
+ if (sd->sd_flags & MD_SR_MB_DEVID)
+ dd = metaget_drivedesc(sp, MD_BASICNAME_OK | PRINT_FAST,
+ &xep);
+ else
+ dd = metaget_drivedesc(sp, MD_BASICNAME_OK, &xep);
+ if (! mdisok(&xep))
+ mdclrerror(&xep);
+
+ if (dd != NULL) {
+ mhd_mhiargs_t mhiargs = defmhiargs;
+
+ if (tk_own_bydd(sp, dd, &mhiargs, TRUE, &xep))
+ mdclrerror(&xep);
+ }
+
+ } else {
+ (void) mddserror(ep, MDE_DS_AUTONOTSET, sp->setno, NULL, NULL,
+ sp->setname);
+ rval = -1;
+ }
+ }
+
+out:
+ cl_sk = cl_get_setkey(sp->setno, sp->setname);
+ if (clnt_unlock_set(hostname, cl_sk, &xep)) {
+ if (rval == 0)
+ (void) mdstealerror(ep, &xep);
+ rval = -1;
+ }
+ cl_set_setkey(NULL);
+
+ return (rval);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_set_med.c b/usr/src/lib/lvm/libmeta/common/meta_set_med.c
new file mode 100644
index 0000000000..02b39d39ee
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_set_med.c
@@ -0,0 +1,1253 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * Metadevice diskset interfaces
+ */
+
+#include "meta_set_prv.h"
+#include <sys/lvm/md_crc.h>
+#include <sys/lvm/mdmed.h>
+
+#include <sys/sysevent/eventdefs.h>
+#include <sys/sysevent/svm.h>
+
+#define MALSIZ 32
+
+static int
+add_lst(char ***listp, char *item)
+{
+ int i, j;
+
+ if (*listp) {
+ for (i = 0; (*listp)[i]; i++)
+ /* void */;
+ } else {
+ *listp = (char **)Zalloc(MALSIZ * sizeof (char *));
+ i = 0;
+ }
+
+ (*listp)[i] = Strdup(item);
+
+ if ((++i % MALSIZ) == 0) {
+ *listp = (char **)Realloc((void *)*listp,
+ (i + MALSIZ) * sizeof (char *));
+ for (j = i; j < (i + MALSIZ); j++)
+ (*listp)[j] = (char *)NULL;
+ }
+ return (i);
+}
+
+static int
+del_lst(char ***listp)
+{
+ int i;
+
+ if (*listp) {
+ for (i = 0; (*listp)[i]; i++)
+ free((*listp)[i]);
+ free(*listp);
+ *listp = NULL;
+ return (1);
+ } else
+ return (0);
+}
+
+
+static int
+validate_med_nodes(
+ mdsetname_t *sp,
+ md_h_arr_t *mhp,
+ md_error_t *ep
+)
+{
+ char *hostname;
+ char *nodename;
+ char *nm;
+ char *cp;
+ int i, j;
+
+
+ for (i = 0; i < MED_MAX_HOSTS; i++) {
+ if (mhp->n_lst[i].a_cnt == 0)
+ continue;
+
+ for (j = 0; j < mhp->n_lst[i].a_cnt; j++) {
+ nm = mhp->n_lst[i].a_nm[j];
+
+ for (cp = nm; *cp; cp++)
+ if (!isprint(*cp) ||
+ strchr(INVALID_IN_NAMES, *cp) != NULL)
+ return (mddserror(ep,
+ MDE_DS_INVALIDMEDNAME,
+ sp->setno, nm, NULL, sp->setname));
+
+ if (clnt_med_hostname(nm, &hostname, ep))
+ return (-1);
+
+ if (j == 0) {
+ if (strcmp(nm, hostname) != 0) {
+ Free(hostname);
+ return (mddserror(ep,
+ MDE_DS_NOTNODENAME, sp->setno, nm,
+ NULL, sp->setname));
+ }
+ nodename = nm;
+ } else {
+ if (strcmp(nodename, hostname) != 0) {
+ Free(hostname);
+ return (mddserror(ep,
+ MDE_DS_ALIASNOMATCH, sp->setno, nm,
+ nodename, sp->setname));
+ }
+ }
+ Free(hostname);
+ }
+ }
+ return (0);
+}
+
+/*
+ * Exported Entry Points
+ */
+
+int
+meta_set_addmeds(
+ mdsetname_t *sp,
+ int node_c,
+ char **node_v,
+ md_error_t *ep
+)
+{
+ md_set_desc *sd = NULL;
+ md_drive_desc *dd = NULL;
+ mddb_med_parm_t mp;
+ mddb_med_upd_parm_t mup;
+ md_h_arr_t t;
+ md_h_arr_t rb_t;
+ med_rec_t medr;
+ med_rec_t rb_medr;
+ char *cp;
+ char **n_l = NULL;
+ int n_c = 0;
+ int i, j;
+ sigset_t oldsigs;
+ md_setkey_t *cl_sk;
+ int rb_level = 0;
+ md_error_t xep = mdnullerror;
+ int rval = 0;
+ int max_meds;
+ md_mnnode_desc *nd;
+ int suspend1_flag = 0;
+ int lock_flag = 0;
+
+ /* Initialize */
+ (void) memset(&t, '\0', sizeof (t));
+ t.n_cnt = node_c;
+ mdclrerror(ep);
+
+ if ((sd = metaget_setdesc(sp, ep)) == NULL)
+ return (-1);
+
+ /* Make sure we own the set */
+ if (meta_check_ownership(sp, ep) != 0)
+ return (-1);
+
+ if ((max_meds = get_max_meds(ep)) == 0)
+ return (-1);
+
+ /*
+ * The mediator information (which is part of the set record) is
+ * stored in the local mddbs of each node in the diskset.
+ * Each node's rpc.metad daemon reads in the set
+ * records from that node's local mddb and caches them
+ * internally. Any process needing diskset information contacts its
+ * local rpc.metad to get this information. Since each node in the
+ * diskset is independently reading the set information from its local
+ * mddb, the set records in the local mddbs must stay
+ * in-sync, so that all nodes have a consistent view of the diskset.
+ *
+ * For a multinode diskset, explicitly verify that all nodes in the
+ * diskset are ALIVE (i.e. are in the API membership list). Otherwise,
+ * fail this operation since all nodes must be ALIVE in order to add
+ * the mediator information to the set record in their local mddb.
+ * If a panic of this node leaves the local mddbs set records
+ * out-of-sync, the reconfig cycle will fix the local mddbs and
+ * force them back into synchronization.
+ */
+ if (MD_MNSET_DESC(sd)) {
+ nd = sd->sd_nodelist;
+ while (nd) {
+ if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+ (void) mddserror(ep, MDE_DS_NOTINMEMBERLIST,
+ sp->setno,
+ nd->nd_nodename, NULL, sp->setname);
+ return (-1);
+ }
+ nd = nd->nd_next;
+ }
+ }
+
+ /* Parse the command line into a the md_h_arr_t structure */
+ for (i = 0; i < t.n_cnt; i++) {
+ cp = strtok(node_v[i], ",");
+ j = 0;
+ while (cp) {
+ if (strlen(cp) > (size_t)MD_MAX_NODENAME)
+ return (mddserror(ep, MDE_DS_NODENAMETOOLONG,
+ sp->setno, cp, NULL, sp->setname));
+ if (j >= MAX_HOST_ADDRS)
+ return (mddserror(ep, MDE_DS_TOOMANYALIAS,
+ sp->setno, cp, NULL, sp->setname));
+
+ (void) strcpy(t.n_lst[i].a_nm[j], cp);
+
+ j++;
+
+ cp = strtok(NULL, ",");
+ }
+ t.n_lst[i].a_cnt = j;
+ }
+
+ /* Make a list of nodes to check */
+ for (i = 0; i < t.n_cnt; i++)
+ for (j = 0; j < t.n_lst[i].a_cnt; j++)
+ n_c = add_lst(&n_l, t.n_lst[i].a_nm[j]);
+
+ /* Make sure that there are no redundant nodes */
+ rval = nodesuniq(sp, n_c, n_l, ep);
+
+ (void) del_lst(&n_l);
+
+ if (rval != 0)
+ return (rval);
+
+ /*
+ * Lock the set on current set members.
+ * Set locking done much earlier for MN diskset than for traditional
+ * diskset since lock_set and SUSPEND are used to protect against
+ * other metaset commands running on the other nodes.
+ */
+ if (MD_MNSET_DESC(sd)) {
+ /* Make sure we are blocking all signals */
+ if (procsigs(TRUE, &oldsigs, &xep) < 0)
+ mdclrerror(&xep);
+ nd = sd->sd_nodelist;
+ /* All nodes are guaranteed to be ALIVE */
+ while (nd) {
+ if (clnt_lock_set(nd->nd_nodename, sp, ep)) {
+ rval = -1;
+ goto out;
+ }
+ lock_flag = 1;
+ nd = nd->nd_next;
+ }
+ /*
+ * Lock out other meta* commands by suspending
+ * class 1 messages across the diskset.
+ */
+ nd = sd->sd_nodelist;
+ /* All nodes are guaranteed to be ALIVE */
+ while (nd) {
+ if (clnt_mdcommdctl(nd->nd_nodename,
+ COMMDCTL_SUSPEND, sp, MD_MSG_CLASS1,
+ MD_MSCF_NO_FLAGS, ep)) {
+ rval = -1;
+ goto out;
+ }
+ suspend1_flag = 1;
+ nd = nd->nd_next;
+ }
+ }
+
+ if (validate_med_nodes(sp, &t, ep)) {
+ rval = -1;
+ goto out;
+ }
+
+ /* Check existing mediators against new, if any */
+ if (sd->sd_med.n_cnt > 0) {
+ for (i = 0; i < max_meds; i++)
+ if (sd->sd_med.n_lst[i].a_cnt > 0)
+ n_c = add_lst(&n_l,
+ sd->sd_med.n_lst[i].a_nm[0]);
+
+ for (i = 0; i < t.n_cnt; i++) {
+ if (strinlst(t.n_lst[i].a_nm[0], n_c, n_l)) {
+ (void) del_lst(&n_l);
+ (void) mddserror(ep, MDE_DS_ISMED, sp->setno,
+ t.n_lst[i].a_nm[0], NULL,
+ sp->setname);
+ rval = -1;
+ goto out;
+ }
+ }
+ (void) del_lst(&n_l);
+ }
+
+ if ((t.n_cnt + sd->sd_med.n_cnt) > max_meds) {
+ (void) mderror(ep, MDE_TOOMANYMED, NULL);
+ rval = -1;
+ goto out;
+ }
+
+ /* Copy the current mediator list for rollback */
+ rb_t = sd->sd_med; /* structure assignment */
+
+ /* Setup the mediator record roll-back structure */
+ (void) memset(&rb_medr, '\0', sizeof (med_rec_t));
+ rb_medr.med_rec_mag = MED_REC_MAGIC;
+ rb_medr.med_rec_rev = MED_REC_REV;
+ rb_medr.med_rec_fl = 0;
+ rb_medr.med_rec_sn = sp->setno;
+ (void) strcpy(rb_medr.med_rec_snm, sp->setname);
+ if (MD_MNSET_DESC(sd)) {
+ /*
+ * For a MN diskset the mediator is not given a list of
+ * hosts in the set. Instead a generic name (multiowner) is
+ * given to the mediator which will allow any node to access
+ * the mediator data as long as it provides the correct
+ * setname and set number. In a MN diskset, the mediator
+ * data is only used when a first node joins the diskset
+ * and becomes the master of the MN diskset.
+ *
+ * The traditional diskset code keeps the host list in
+ * the mediator record up to date with respect to the host
+ * list in the traditional diskset. This keeps an unauthorized
+ * node in the traditional diskset from accessing the data
+ * in the mediator record and being able to 'take' the
+ * diskset.
+ *
+ * This additional check is needed in the traditional diskset
+ * since a panic during the metaset command can leave
+ * the diskset with some nodes thinking that an
+ * action has occurred and other nodes thinking the opposite.
+ * A node may have really been removed from a diskset, but
+ * that node doesn't realize this so this node must be
+ * blocked from using the mediator data when attempting
+ * to 'take' the diskset.
+ * (Traditional diskset code has each node's rpc.metad
+ * cleaning up from an inconsistent state without any
+ * knowledge from the other nodes in the diskset).
+ *
+ * In the MN diskset, the reconfig steps force a consistent
+ * state across all nodes in the diskset, so no node
+ * needs to be blocked from accessing the mediator data.
+ * This allow the MN diskset to use a common 'nodename'
+ * in the mediator record. This allows the mediator
+ * daemon to remain unchanged even though a large number of
+ * nodes are supported by the MN diskset.
+ */
+ (void) strlcpy(rb_medr.med_rec_nodes[0], MED_MN_CALLER,
+ MD_MAX_NODENAME_PLUS_1);
+ } else {
+ for (i = 0; i < MD_MAXSIDES; i++)
+ (void) strcpy(rb_medr.med_rec_nodes[i],
+ sd->sd_nodes[i]);
+ }
+ rb_medr.med_rec_meds = sd->sd_med; /* structure assigment */
+ (void) memset(&rb_medr.med_rec_data, '\0', sizeof (med_data_t));
+ rb_medr.med_rec_foff = 0;
+ crcgen(&rb_medr, &rb_medr.med_rec_cks, sizeof (med_rec_t), NULL);
+
+ /* Merge new mediators into the set record */
+ for (i = 0; i < t.n_cnt; i++) {
+ for (j = 0; j < max_meds; j++) {
+ if (sd->sd_med.n_lst[j].a_cnt > 0)
+ continue;
+ sd->sd_med.n_lst[j] = t.n_lst[i];
+ SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_ADD, SVM_TAG_MEDIATOR,
+ sp->setno, j);
+ sd->sd_med.n_cnt++;
+ break;
+ }
+ }
+
+ /*
+ * Setup the kernel mediator list, which also validates that the
+ * hosts have valid IP addresses
+ */
+ (void) memset(&mp, '\0', sizeof (mddb_med_parm_t));
+ mp.med_setno = sp->setno;
+
+ /* Copy the hostnames */
+ if (meta_h2hi(&sd->sd_med, &mp.med, ep)) {
+ rval = -1;
+ goto out;
+ }
+
+ /* Resolve the IP addresses for the host list */
+ if (meta_med_hnm2ip(&mp.med, ep)) {
+ rval = -1;
+ goto out;
+ }
+
+ /* Bring the mediator record up to date with the set record */
+ medr = rb_medr; /* structure assignment */
+ medr.med_rec_meds = sd->sd_med; /* structure assigment */
+ crcgen(&medr, &medr.med_rec_cks, sizeof (med_rec_t), NULL);
+
+ /* END CHECK CODE */
+
+ /* Lock the set on current set members */
+ if (!(MD_MNSET_DESC(sd))) {
+ /* all signals already blocked for MN disket */
+ md_rb_sig_handling_on();
+ for (i = 0; i < MD_MAXSIDES; i++) {
+ /* Skip empty slots */
+ if (sd->sd_nodes[i][0] == '\0')
+ continue;
+
+ if (clnt_lock_set(sd->sd_nodes[i], sp, ep)) {
+ rval = -1;
+ goto out;
+ }
+ lock_flag = 1;
+ }
+ }
+
+ RB_TEST(1, "meta_set_addmeds", ep)
+
+ RB_PREEMPT;
+ rb_level = 1; /* level 1 */
+
+ RB_TEST(2, "meta_set_addmeds", ep)
+
+ /*
+ * Add the new mediator information to all hosts in the set.
+ * For MN diskset, each node sends mediator list to its kernel.
+ */
+ if (MD_MNSET_DESC(sd)) {
+ nd = sd->sd_nodelist;
+ while (nd) {
+ /* All nodes are guaranteed to be ALIVE */
+ if (clnt_updmeds(nd->nd_nodename, sp, &sd->sd_med, ep))
+ goto rollback;
+ nd = nd->nd_next;
+ }
+ } else {
+ for (i = 0; i < MD_MAXSIDES; i++) {
+ /* Skip empty slots */
+ if (sd->sd_nodes[i][0] == '\0')
+ continue;
+
+ if (clnt_updmeds(sd->sd_nodes[i], sp, &sd->sd_med, ep))
+ goto rollback;
+ }
+ }
+
+ RB_TEST(3, "meta_set_addmeds", ep)
+
+ RB_PREEMPT;
+ rb_level = 2; /* level 2 */
+
+ RB_TEST(4, "meta_set_addmeds", ep)
+
+ if ((dd = metaget_drivedesc(sp, (MD_BASICNAME_OK | PRINT_FAST),
+ ep)) == NULL) {
+ if (! mdisok(ep))
+ goto rollback;
+ }
+
+ RB_TEST(5, "meta_set_addmeds", ep)
+
+ RB_PREEMPT;
+ rb_level = 3; /* level 3 */
+
+ RB_TEST(6, "meta_set_addmeds", ep)
+
+ /* Inform the mediator hosts of the new information */
+ for (i = 0; i < max_meds; i++) {
+ if (sd->sd_med.n_lst[i].a_cnt == 0)
+ continue;
+
+ /* medr contains new mediator node list */
+ if (clnt_med_upd_rec(&sd->sd_med.n_lst[i], sp, &medr, ep))
+ goto rollback;
+ }
+
+ RB_TEST(7, "meta_set_addmeds", ep)
+
+ RB_PREEMPT;
+ rb_level = 4; /* level 4 */
+
+ RB_TEST(8, "meta_set_addmeds", ep)
+
+ /* In MN diskset, mediator list updated in clnt_updmeds call */
+ if (dd != NULL) {
+ if (!(MD_MNSET_DESC(sd))) {
+ if (metaioctl(MD_MED_SET_LST, &mp, &mp.med_mde,
+ NULL) != 0) {
+ (void) mdstealerror(ep, &mp.med_mde);
+ goto rollback;
+ }
+ }
+
+ /*
+ * If only 50% mddbs available, mediator will be
+ * golden by this ioctl on a traditional diskset.
+ *
+ * On a MN disket, this only happens if the mediator
+ * add operation is executed on the master node.
+ * If a slave node is adding the mediator, the mediator
+ * won't be marked golden until the next mddb change.
+ */
+ (void) memset(&mup, '\0', sizeof (mddb_med_upd_parm_t));
+ mup.med_setno = sp->setno;
+ if (metaioctl(MD_MED_UPD_MED, &mup, &mup.med_mde, NULL) != 0)
+ mdclrerror(&mup.med_mde);
+ }
+
+out:
+ if (suspend1_flag) {
+ /*
+ * Unlock diskset by resuming messages across the diskset.
+ * Just resume all classes so that resume is the same whether
+ * just one class was locked or all classes were locked.
+ */
+ nd = sd->sd_nodelist;
+ /* All nodes are guaranteed to be ALIVE */
+ while (nd) {
+ if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_RESUME,
+ sp, MD_MSG_CLASS0, MD_MSCF_NO_FLAGS, &xep)) {
+ if (rval == 0)
+ (void) mdstealerror(ep, &xep);
+ rval = -1;
+ mde_perror(ep, dgettext(TEXT_DOMAIN,
+ "Unable to resume rpc.mdcommd.\n"));
+ }
+ nd = nd->nd_next;
+ }
+ meta_ping_mnset(sp->setno);
+ }
+ if (lock_flag) {
+ cl_sk = cl_get_setkey(sp->setno, sp->setname);
+ if (MD_MNSET_DESC(sd)) {
+ nd = sd->sd_nodelist;
+ while (nd) {
+ /* All nodes are guaranteed to be ALIVE */
+ if (clnt_unlock_set(nd->nd_nodename,
+ cl_sk, &xep)) {
+ if (rval == 0)
+ (void) mdstealerror(ep, &xep);
+ rval = -1;
+ }
+ nd = nd->nd_next;
+ }
+ } else {
+ for (i = 0; i < MD_MAXSIDES; i++) {
+ /* Skip empty slots */
+ if (sd->sd_nodes[i][0] == '\0')
+ continue;
+
+ if (clnt_unlock_set(sd->sd_nodes[i],
+ cl_sk, &xep)) {
+ if (rval == 0)
+ (void) mdstealerror(ep, &xep);
+ rval = -1;
+ }
+ }
+ }
+ cl_set_setkey(NULL);
+ }
+
+ metafreedrivedesc(&dd);
+
+ if (MD_MNSET_DESC(sd)) {
+ /* release signals back to what they were on entry */
+ if (procsigs(FALSE, &oldsigs, &xep) < 0)
+ mdclrerror(&xep);
+ } else {
+ md_rb_sig_handling_off(md_got_sig(), md_which_sig());
+ }
+
+ return (rval);
+
+rollback:
+ /* all signals already blocked for MN disket */
+ if (!(MD_MNSET_DESC(sd))) {
+ if (procsigs(TRUE, &oldsigs, &xep) < 0)
+ mdclrerror(&xep);
+ }
+
+ rval = -1;
+
+ /*
+ * level 4
+ * In MN diskset, mediator list updated in clnt_updmeds call
+ */
+ if (rb_level > 3 && (dd != NULL) && (!(MD_MNSET_DESC(sd)))) {
+ (void) memset(&mp, '\0', sizeof (mddb_med_parm_t));
+ mp.med_setno = sp->setno;
+ (void) meta_h2hi(&rb_t, &mp.med, &xep);
+ mdclrerror(&xep);
+ (void) meta_med_hnm2ip(&mp.med, &xep);
+ mdclrerror(&xep);
+ (void) metaioctl(MD_MED_SET_LST, &mp, &mp.med_mde, NULL);
+ }
+
+ /* level 3 */
+ if (rb_level > 2) {
+ for (i = 0; i < max_meds; i++) {
+ if (sd->sd_med.n_lst[i].a_cnt == 0)
+ continue;
+
+ /*
+ * rb_medr contains the rollback mediator node list.
+ * Send the rollback mediator information to the
+ * new mediator node list. If a node had this RPC
+ * called, but its node is not in the mediator node
+ * list, rpc.metamedd will delete the mediator
+ * record on that node.
+ */
+ if (clnt_med_upd_rec(&sd->sd_med.n_lst[i], sp,
+ &rb_medr, &xep))
+ mdclrerror(&xep);
+ }
+ }
+
+ /* level 2 */
+ if (rb_level > 1) {
+ metafreedrivedesc(&dd);
+ }
+
+ /* level 1 */
+ if (rb_level > 0) {
+ /* Delete mediator information from all hosts in the set */
+ if (MD_MNSET_DESC(sd)) {
+ nd = sd->sd_nodelist;
+ while (nd) {
+ /* All nodes are guaranteed to be ALIVE */
+ if (clnt_updmeds(nd->nd_nodename, sp, &rb_t,
+ &xep))
+ mdclrerror(&xep);
+ nd = nd->nd_next;
+ }
+ } else {
+ for (i = 0; i < MD_MAXSIDES; i++) {
+ /* Skip empty slots */
+ if (sd->sd_nodes[i][0] == '\0')
+ continue;
+
+ if (clnt_updmeds(sd->sd_nodes[i], sp, &rb_t,
+ &xep))
+ mdclrerror(&xep);
+ }
+ }
+ }
+
+ /* level 0 */
+ if (suspend1_flag) {
+ /*
+ * Unlock diskset by resuming messages across the diskset.
+ * Just resume all classes so that resume is the same whether
+ * just one class was locked or all classes were locked.
+ */
+ nd = sd->sd_nodelist;
+ /* All nodes are guaranteed to be ALIVE */
+ while (nd) {
+ if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_RESUME,
+ sp, MD_MSG_CLASS0, MD_MSCF_NO_FLAGS, &xep)) {
+ mdclrerror(&xep);
+ mde_perror(ep, dgettext(TEXT_DOMAIN,
+ "Unable to resume rpc.mdcommd.\n"));
+ }
+ nd = nd->nd_next;
+ }
+ meta_ping_mnset(sp->setno);
+ }
+ if (lock_flag) {
+ cl_sk = cl_get_setkey(sp->setno, sp->setname);
+ if (MD_MNSET_DESC(sd)) {
+ nd = sd->sd_nodelist;
+ while (nd) {
+ /* All nodes are guaranteed to be ALIVE */
+ if (clnt_unlock_set(nd->nd_nodename,
+ cl_sk, &xep)) {
+ mdclrerror(&xep);
+ }
+ nd = nd->nd_next;
+ }
+ } else {
+ for (i = 0; i < MD_MAXSIDES; i++) {
+ /* Skip empty slots */
+ if (sd->sd_nodes[i][0] == '\0')
+ continue;
+
+ if (clnt_unlock_set(sd->sd_nodes[i],
+ cl_sk, &xep)) {
+ mdclrerror(&xep);
+ }
+ }
+ }
+ cl_set_setkey(NULL);
+ }
+
+ /* release signals back to what they were on entry */
+ if (procsigs(FALSE, &oldsigs, &xep) < 0)
+ mdclrerror(&xep);
+
+ if (!(MD_MNSET_DESC(sd))) {
+ md_rb_sig_handling_off(md_got_sig(), md_which_sig());
+ }
+
+ return (rval);
+}
+
+int
+meta_set_deletemeds(
+ mdsetname_t *sp,
+ int node_c,
+ char **node_v,
+ int forceflg,
+ md_error_t *ep
+)
+{
+ md_set_desc *sd = NULL;
+ md_drive_desc *dd = NULL;
+ mddb_med_parm_t mp;
+ md_h_arr_t rb_t;
+ med_rec_t medr;
+ med_rec_t rb_medr;
+ int i, j;
+ char **n_l = NULL;
+ int n_c = 0;
+ sigset_t oldsigs;
+ md_setkey_t *cl_sk;
+ int rb_level = 0;
+ md_error_t xep = mdnullerror;
+ int rval = 0;
+ int max_meds;
+ md_mnnode_desc *nd;
+ int suspend1_flag = 0;
+ int lock_flag = 0;
+
+ mdclrerror(ep);
+
+ if ((sd = metaget_setdesc(sp, ep)) == NULL)
+ return (-1);
+
+ /* Make sure we own the set */
+ if (meta_check_ownership(sp, ep) != 0)
+ return (-1);
+
+ for (i = 0; i < node_c; i++)
+ if (strchr(node_v[i], ',') != NULL)
+ return (mderror(ep, MDE_ONLYNODENAME, node_v[i]));
+
+ if (nodesuniq(sp, node_c, node_v, ep))
+ return (-1);
+
+ if ((max_meds = get_max_meds(ep)) == 0)
+ return (-1);
+
+ /*
+ * The mediator information (which is part of the set record) is
+ * stored in the local mddbs of each node in the diskset.
+ * Each node's rpc.metad daemon reads in the set
+ * records from that node's local mddb and caches them
+ * internally. Any process needing diskset information contacts its
+ * local rpc.metad to get this information. Since each node in the
+ * diskset is independently reading the set information from its local
+ * mddb, the set records in the local mddbs must stay
+ * in-sync, so that all nodes have a consistent view of the diskset.
+ *
+ * For a multinode diskset, explicitly verify that all nodes in the
+ * diskset are ALIVE (i.e. are in the API membership list). Otherwise,
+ * fail this operation since all nodes must be ALIVE in order to delete
+ * the mediator information from the set record in their local mddb.
+ * If a panic of this node leaves the local mddbs set records
+ * out-of-sync, the reconfig cycle will fix the local mddbs and
+ * force them back into synchronization.
+ */
+ if (MD_MNSET_DESC(sd)) {
+ nd = sd->sd_nodelist;
+ while (nd) {
+ if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+ (void) mddserror(ep, MDE_DS_NOTINMEMBERLIST,
+ sp->setno,
+ nd->nd_nodename, NULL, sp->setname);
+ return (-1);
+ }
+ nd = nd->nd_next;
+ }
+ }
+
+ if (sd->sd_med.n_cnt == 0)
+ return (mderror(ep, MDE_NOMED, NULL));
+
+ /* Make a list of nodes to check */
+ for (i = 0; i < max_meds; i++)
+ if (sd->sd_med.n_lst[i].a_cnt > 0)
+ n_c = add_lst(&n_l, sd->sd_med.n_lst[i].a_nm[0]);
+
+ for (i = 0; i < node_c; i++) {
+ if (! strinlst(node_v[i], n_c, n_l)) {
+ (void) del_lst(&n_l);
+ return (mddserror(ep, MDE_DS_ISNOTMED, sp->setno,
+ node_v[i], NULL, sp->setname));
+ }
+ }
+
+ (void) del_lst(&n_l);
+
+ /* Save a copy of the current mediator information */
+ rb_t = sd->sd_med; /* structure assignment */
+
+ /* Setup the mediator record for rollback */
+ (void) memset(&rb_medr, '\0', sizeof (med_rec_t));
+ rb_medr.med_rec_mag = MED_REC_MAGIC;
+ rb_medr.med_rec_rev = MED_REC_REV;
+ rb_medr.med_rec_fl = 0;
+ rb_medr.med_rec_sn = sp->setno;
+ (void) strcpy(rb_medr.med_rec_snm, sp->setname);
+ if (MD_MNSET_DESC(sd)) {
+ /*
+ * In MN diskset, use a generic nodename, multiowner, in the
+ * mediator record which allows any node to access mediator
+ * information. MN diskset reconfig cycle forces consistent
+ * view of set/node/drive/mediator information across all nodes
+ * in the MN diskset. This allows the relaxation of
+ * node name checking in rpc.metamedd for MN disksets.
+ *
+ * In the traditional diskset, only a node that is in the
+ * mediator record's diskset nodelist can access mediator
+ * data.
+ */
+ (void) strlcpy(rb_medr.med_rec_nodes[0], MED_MN_CALLER,
+ MD_MAX_NODENAME_PLUS_1);
+ } else {
+ for (i = 0; i < MD_MAXSIDES; i++)
+ (void) strcpy(rb_medr.med_rec_nodes[i],
+ sd->sd_nodes[i]);
+ }
+ rb_medr.med_rec_meds = sd->sd_med; /* structure assignment */
+ (void) memset(&rb_medr.med_rec_data, '\0', sizeof (med_data_t));
+ rb_medr.med_rec_foff = 0;
+ crcgen(&rb_medr, &rb_medr.med_rec_cks, sizeof (med_rec_t), NULL);
+
+ /* Delete the mediators requested from the set */
+ for (i = 0; i < node_c; i++) {
+ for (j = 0; j < max_meds; j++) {
+ if (sd->sd_med.n_lst[j].a_cnt == 0)
+ continue;
+ if (strcmp(node_v[i],
+ sd->sd_med.n_lst[j].a_nm[0]) != 0)
+ continue;
+ SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_REMOVE,
+ SVM_TAG_MEDIATOR, sp->setno, j);
+ (void) memset(&sd->sd_med.n_lst[j], '\0',
+ sizeof (md_h_t));
+ sd->sd_med.n_cnt--;
+ break;
+ }
+ }
+
+ medr = rb_medr; /* structure assignment */
+ medr.med_rec_meds = sd->sd_med; /* structure assignment */
+ crcgen(&medr, &medr.med_rec_cks, sizeof (med_rec_t), NULL);
+
+ /* END CHECK CODE */
+
+ /* Lock the set on current set members */
+ if (MD_MNSET_DESC(sd)) {
+ /* Make sure we are blocking all signals */
+ if (procsigs(TRUE, &oldsigs, &xep) < 0)
+ mdclrerror(&xep);
+ /*
+ * Lock the set on current set members.
+ * lock_set and SUSPEND are used to protect against
+ * other metaset commands running on the other nodes.
+ */
+ nd = sd->sd_nodelist;
+ while (nd) {
+ /* All nodes are guaranteed to be ALIVE */
+ if (clnt_lock_set(nd->nd_nodename, sp, ep)) {
+ if (forceflg && strcmp(mynode(),
+ nd->nd_nodename) != 0) {
+ mdclrerror(ep);
+ nd = nd->nd_next;
+ continue;
+ }
+ rval = -1;
+ goto out;
+ }
+ lock_flag = 1;
+ nd = nd->nd_next;
+ }
+ /*
+ * Lock out other meta* commands by suspending
+ * class 1 messages across the diskset.
+ */
+ nd = sd->sd_nodelist;
+ /* All nodes are guaranteed to be ALIVE */
+ while (nd) {
+ if (clnt_mdcommdctl(nd->nd_nodename,
+ COMMDCTL_SUSPEND, sp, MD_MSG_CLASS1,
+ MD_MSCF_NO_FLAGS, ep)) {
+ rval = -1;
+ goto out;
+ }
+ suspend1_flag = 1;
+ nd = nd->nd_next;
+ }
+ } else {
+ md_rb_sig_handling_on();
+ for (i = 0; i < MD_MAXSIDES; i++) {
+ /* Skip empty slots */
+ if (sd->sd_nodes[i][0] == '\0')
+ continue;
+
+ if (clnt_lock_set(sd->sd_nodes[i], sp, ep)) {
+ if (forceflg &&
+ strcmp(mynode(), sd->sd_nodes[i]) != 0) {
+ mdclrerror(ep);
+ continue;
+ }
+ rval = -1;
+ goto out;
+ }
+ lock_flag = 1;
+ }
+ }
+
+ RB_TEST(1, "meta_set_deletemeds", ep)
+
+ RB_PREEMPT;
+ rb_level = 1; /* level 1 */
+
+ RB_TEST(2, "meta_set_deletemeds", ep)
+
+ /* Update the mediator information on all hosts in the set */
+ if (MD_MNSET_DESC(sd)) {
+ nd = sd->sd_nodelist;
+ while (nd) {
+ /* All nodes are guaranteed to be ALIVE */
+ if (clnt_updmeds(nd->nd_nodename, sp, &sd->sd_med,
+ ep)) {
+ if (forceflg && strcmp(mynode(),
+ nd->nd_nodename) != 0) {
+ mdclrerror(ep);
+ continue;
+ }
+ goto rollback;
+ }
+ nd = nd->nd_next;
+ }
+ } else {
+ for (i = 0; i < MD_MAXSIDES; i++) {
+ /* Skip empty slots */
+ if (sd->sd_nodes[i][0] == '\0')
+ continue;
+
+ if (clnt_updmeds(sd->sd_nodes[i], sp, &sd->sd_med,
+ ep)) {
+ if (forceflg && strcmp(mynode(),
+ sd->sd_nodes[i]) != 0) {
+ mdclrerror(ep);
+ continue;
+ }
+ goto rollback;
+ }
+ }
+ }
+
+ RB_TEST(3, "meta_set_deletemeds", ep)
+
+ RB_PREEMPT;
+ rb_level = 2; /* level 2 */
+
+ RB_TEST(5, "meta_set_deletemeds", ep)
+
+ if ((dd = metaget_drivedesc(sp, (MD_BASICNAME_OK | PRINT_FAST),
+ ep)) == NULL) {
+ if (! mdisok(ep))
+ goto rollback;
+ }
+
+ RB_TEST(5, "meta_set_deletemeds", ep)
+
+ RB_PREEMPT;
+ rb_level = 3; /* level 3 */
+
+ RB_TEST(6, "meta_set_deletemeds", ep)
+
+ if (dd != NULL) {
+ /*
+ * Set up the parameters to the call to update the
+ * kernel mediator list
+ */
+ (void) memset(&mp, '\0', sizeof (mddb_med_parm_t));
+ mp.med_setno = sp->setno;
+ if (meta_h2hi(&sd->sd_med, &mp.med, ep))
+ goto rollback;
+
+ /* Resolve the IP addresses for the host list */
+ if (meta_med_hnm2ip(&mp.med, ep))
+ goto rollback;
+
+ if (metaioctl(MD_MED_SET_LST, &mp, &mp.med_mde, NULL) != 0) {
+ (void) mdstealerror(ep, &mp.med_mde);
+ goto rollback;
+ }
+ }
+
+ RB_TEST(7, "meta_set_deletemeds", ep)
+
+ RB_PREEMPT;
+ rb_level = 4; /* level 4 */
+
+ RB_TEST(8, "meta_set_deletemeds", ep)
+
+ /* Inform the mediator hosts of the new status */
+ for (i = 0; i < max_meds; i++) {
+ if (rb_t.n_lst[i].a_cnt == 0)
+ continue;
+
+ /*
+ * medr contains the new mediator node list.
+ * Send the new mediator information to the
+ * new mediator node list. If a node had this RPC
+ * called, but its node is no longer in the new mediator
+ * node list, rpc.metamedd will delete the mediator
+ * record on that node.
+ */
+ if (clnt_med_upd_rec(&rb_t.n_lst[i], sp, &medr, ep)) {
+ if ((forceflg && mdanyrpcerror(ep)) ||
+ mdisrpcerror(ep, RPC_PROGNOTREGISTERED)) {
+ mdclrerror(ep);
+ continue;
+ }
+ goto rollback;
+ }
+ }
+
+out:
+ if (dd)
+ metafreedrivedesc(&dd);
+
+ if (suspend1_flag) {
+ /*
+ * Unlock diskset by resuming messages across the diskset.
+ * Just resume all classes so that resume is the same whether
+ * just one class was locked or all classes were locked.
+ */
+ nd = sd->sd_nodelist;
+ /* All nodes are guaranteed to be ALIVE */
+ while (nd) {
+ if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_RESUME,
+ sp, MD_MSG_CLASS0, MD_MSCF_NO_FLAGS, &xep)) {
+ if (rval == 0)
+ (void) mdstealerror(ep, &xep);
+ rval = -1;
+ mde_perror(ep, dgettext(TEXT_DOMAIN,
+ "Unable to resume rpc.mdcommd.\n"));
+ }
+ nd = nd->nd_next;
+ }
+ meta_ping_mnset(sp->setno);
+ }
+
+ cl_sk = cl_get_setkey(sp->setno, sp->setname);
+ if (lock_flag) {
+ if (MD_MNSET_DESC(sd)) {
+ nd = sd->sd_nodelist;
+ while (nd) {
+ /* All nodes are guaranteed to be ALIVE */
+ if (clnt_unlock_set(nd->nd_nodename,
+ cl_sk, &xep)) {
+ if (forceflg &&
+ strcmp(mynode(),
+ nd->nd_nodename) != 0) {
+ mdclrerror(ep);
+ continue;
+ }
+ if (rval == 0)
+ (void) mdstealerror(ep, &xep);
+ rval = -1;
+ }
+ nd = nd->nd_next;
+ }
+ } else {
+ for (i = 0; i < MD_MAXSIDES; i++) {
+ /* Skip empty slots */
+ if (sd->sd_nodes[i][0] == '\0')
+ continue;
+
+ if (clnt_unlock_set(sd->sd_nodes[i],
+ cl_sk, &xep)) {
+ if (forceflg &&
+ strcmp(mynode(),
+ sd->sd_nodes[i]) != 0) {
+ mdclrerror(ep);
+ continue;
+ }
+ if (rval == 0)
+ (void) mdstealerror(ep, &xep);
+ rval = -1;
+ }
+ }
+ }
+ }
+ cl_set_setkey(NULL);
+
+ if (MD_MNSET_DESC(sd)) {
+ /* release signals back to what they were on entry */
+ if (procsigs(FALSE, &oldsigs, &xep) < 0)
+ mdclrerror(&xep);
+ } else {
+ md_rb_sig_handling_off(md_got_sig(), md_which_sig());
+ }
+
+ return (rval);
+
+rollback:
+ /* all signals already blocked for MN disket */
+ if (!(MD_MNSET_DESC(sd))) {
+ if (procsigs(TRUE, &oldsigs, &xep) < 0)
+ mdclrerror(&xep);
+ }
+
+ rval = -1;
+
+ (void) del_lst(&n_l);
+
+ /* level 4 */
+ if (rb_level > 4) {
+ for (i = 0; i < max_meds; i++) {
+ if (rb_t.n_lst[i].a_cnt == 0)
+ continue;
+
+ /*
+ * rb_medr contains the rollback mediator node list.
+ * Send the rollback mediator information to the
+ * new mediator node list. This will recreate the
+ * mediator record on all nodes where the mediator
+ * record had been removed.
+ */
+ if (clnt_med_upd_rec(&rb_t.n_lst[i], sp, &rb_medr,
+ &xep))
+ mdclrerror(&xep);
+ }
+ }
+
+ /* level 3 */
+ if (rb_level > 2 && dd != NULL) {
+ (void) memset(&mp, '\0', sizeof (mddb_med_parm_t));
+ mp.med_setno = sp->setno;
+ (void) meta_h2hi(&rb_t, &mp.med, &xep);
+ mdclrerror(&xep);
+ (void) meta_med_hnm2ip(&mp.med, &xep);
+ mdclrerror(&xep);
+ (void) metaioctl(MD_MED_SET_LST, &mp, &mp.med_mde, NULL);
+ }
+
+ /* level 2 */
+ if (rb_level > 1) {
+ metafreedrivedesc(&dd);
+ }
+
+ /* level 1 */
+ if (rb_level > 0) {
+ /* Delete mediator information from all hosts in the set */
+ if (MD_MNSET_DESC(sd)) {
+ nd = sd->sd_nodelist;
+ while (nd) {
+ /* All nodes are guaranteed to be ALIVE */
+ if (clnt_updmeds(nd->nd_nodename, sp, &rb_t,
+ &xep))
+ mdclrerror(&xep);
+ nd = nd->nd_next;
+ }
+ } else {
+ for (i = 0; i < MD_MAXSIDES; i++) {
+ /* Skip empty slots */
+ if (sd->sd_nodes[i][0] == '\0')
+ continue;
+
+ if (clnt_updmeds(sd->sd_nodes[i], sp, &rb_t,
+ &xep))
+ mdclrerror(&xep);
+ }
+ }
+ }
+
+ /* level 0 */
+ cl_sk = cl_get_setkey(sp->setno, sp->setname);
+ /* Unlock the set */
+ /* Don't test lock flag since guaranteed to be set if in rollback */
+ if (MD_MNSET_DESC(sd)) {
+ /*
+ * Unlock diskset by resuming messages across the diskset.
+ * Just resume all classes so that resume is the same whether
+ * just one class was locked or all classes were locked.
+ */
+ if (suspend1_flag) {
+ /* All nodes are guaranteed to be ALIVE */
+ nd = sd->sd_nodelist;
+ while (nd) {
+ if (clnt_mdcommdctl(nd->nd_nodename,
+ COMMDCTL_RESUME, sp, MD_MSG_CLASS0,
+ MD_MSCF_NO_FLAGS, &xep)) {
+ mde_perror(&xep, dgettext(TEXT_DOMAIN,
+ "Unable to resume rpc.mdcommd.\n"));
+ mdclrerror(&xep);
+ }
+ nd = nd->nd_next;
+ }
+ meta_ping_mnset(sp->setno);
+ }
+ nd = sd->sd_nodelist;
+ /* All nodes are guaranteed to be ALIVE */
+ while (nd) {
+ if (clnt_unlock_set(nd->nd_nodename, cl_sk, &xep))
+ mdclrerror(&xep);
+ nd = nd->nd_next;
+ }
+ } else {
+ for (i = 0; i < MD_MAXSIDES; i++) {
+ /* Skip empty slots */
+ if (sd->sd_nodes[i][0] == '\0')
+ continue;
+
+ if (clnt_unlock_set(sd->sd_nodes[i], cl_sk, &xep))
+ mdclrerror(&xep);
+ }
+ }
+ cl_set_setkey(NULL);
+
+ /* release signals back to what they were on entry */
+ if (procsigs(FALSE, &oldsigs, &xep) < 0)
+ mdclrerror(&xep);
+
+ if (!(MD_MNSET_DESC(sd))) {
+ md_rb_sig_handling_off(md_got_sig(), md_which_sig());
+ }
+
+ return (rval);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_set_prv.c b/usr/src/lib/lvm/libmeta/common/meta_set_prv.c
new file mode 100644
index 0000000000..8b615d9af0
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_set_prv.c
@@ -0,0 +1,818 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * Metadevice diskset interfaces
+ */
+
+#include "meta_set_prv.h"
+#include <meta.h>
+#include <sys/lvm/md_mddb.h>
+#include <sys/cladm.h>
+#include <devid.h>
+#include <sys/lvm/md_convert.h>
+
+/*
+ * Exported Entry Points
+ */
+
+int
+checkdrive_onnode(
+ mdsetname_t *sp,
+ mddrivename_t *dnp,
+ char *node,
+ md_error_t *ep)
+{
+ time_t mystamp, otherstamp;
+ md_dev64_t otherdev;
+ mdname_t *np, *remote_np;
+ mddrivename_t *remote_dnp;
+ int release = 0;
+ md_drive_desc dd;
+ int rval = 0;
+ int ret = -1;
+ mhd_mhiargs_t mhiargs;
+ md_set_desc *sd;
+ int is_efi = 0;
+ int do_fallback = 0;
+
+ (void) memset(&mhiargs, '\0', sizeof (mhiargs));
+
+ if ((sd = metaget_setdesc(sp, ep)) == NULL)
+ return (-1);
+
+ if (meta_is_drive_in_thisset(sp, dnp, FALSE, ep)) {
+ release = 1;
+ dd.dd_next = NULL;
+ dd.dd_dbcnt = 0;
+ dd.dd_dbsize = 0;
+ dd.dd_dnp = dnp;
+ if (clnt_gtimeout(mynode(), sp, &mhiargs, ep) != 0)
+ return (-1);
+ if (!(MD_MNSET_DESC(sd)) && !MD_ATSET_DESC(sd)) {
+ if (rel_own_bydd(sp, &dd, TRUE, ep))
+ return (-1);
+ }
+ }
+ if ((np = metaslicename(dnp, MD_SLICE0, ep)) == NULL) {
+ rval = -1;
+ goto out;
+ }
+
+ /*
+ * First try and operate assuming the other side
+ * is running a SVM version that supports device id
+ * in disksets i.e. is running SVM RPC version 2.
+ *
+ * If this call fails due to the other side running
+ * a SVM version that does not support device id
+ * in disksets i.e. is running SVM RPC version 1, we
+ * fallback to the old behaviour.
+ */
+ if ((dnp->devid != NULL) && (!(MD_MNSET_DESC(sd)))) {
+ char *rname = NULL;
+ md_dev64_t dev = NODEV64;
+
+ /*
+ * If the disk is connected to the remote node then the
+ * only thing we can be certain of is that the disk will
+ * have the same devid on that node, it may not have the
+ * same minor number nor the same ctd name. But if it
+ * does have the same ctd name then use it. In most cases
+ * there will only be a single entry returned but if the
+ * system has multi-path disks with MPXIO turned off there
+ * will be multiple entries. Attempting to choose the same
+ * name will give the user as consistent a view across the
+ * nodes as possible.
+ */
+ ret = clnt_devinfo_by_devid(node, sp, dnp->devid, &dev,
+ np->rname, &rname, NULL, ep);
+
+ /*
+ * If the return value was ENOTSUP, we know the
+ * other side is not running a SVM version that
+ * supports device id in disksets. We fallback
+ * to the previous behaviour in that case.
+ */
+ if (ret == ENOTSUP) {
+ do_fallback++;
+ goto fallback;
+ } else if (ret == -1) {
+ rval = -1;
+ goto out;
+ }
+
+ /*
+ * If the device does not exist on the remote node then
+ * the returned dev should indicate this (NODEV64) but
+ * we also check to make sure the returned name is not
+ * empty to make sure that the namespace does not get
+ * created with a NULL/empty entry (should not be possbile
+ * but being paranoid).
+ */
+ if (dev == NODEV64 || rname == (char *)NULL ||
+ strcmp(rname, "") == 0) {
+ rval = mddserror(ep, MDE_DS_DRIVENOTCOMMON, sp->setno,
+ node, dnp->cname, sp->setname);
+ goto out;
+ }
+
+ /*
+ * The rname returned from the remote node maybe different
+ * to the rname on this node, therefore we need to build up
+ * a dnp for this new rname.
+ */
+ if (strcmp(np->rname, rname) != 0) {
+ /* different rname */
+ remote_np = metaname_fast(&sp, rname, ep);
+ if (remote_np != NULL) {
+ remote_dnp = remote_np->drivenamep;
+ }
+ } else {
+ remote_dnp = dnp;
+ }
+ } else {
+ do_fallback++;
+ }
+
+fallback:
+ if (do_fallback) {
+ ret = setdevstamp(dnp, &mystamp, ep);
+ /*
+ * Check if the disk in question is an EFI disk.
+ */
+ if (ret == ENOTSUP)
+ is_efi++;
+ else if (ret == -1)
+ return (-1);
+
+ if ((np = metaslicename(dnp, MD_SLICE0, ep)) == NULL) {
+ rval = -1;
+ goto out;
+ }
+
+ if (is_efi) {
+ /*
+ * For EFI disks, we compare the device
+ * id for the disks in question.
+ */
+ ddi_devid_t thisdevid, otherdevid;
+ char *encoded_otherdevid = NULL;
+ char *encoded_thisdevid = NULL;
+
+ if (clnt_devinfo(node, sp, dnp, &otherdev, NULL, ep)
+ == -1) {
+ rval = -1;
+ goto out;
+ }
+ if (np->dev != otherdev) {
+ rval = mddserror(ep, MDE_DS_DRIVENOTCOMMON,
+ sp->setno, node, dnp->cname, sp->setname);
+ goto out;
+ }
+
+ if (clnt_devid(node, sp, dnp, &encoded_otherdevid,
+ ep) == -1) {
+ rval = -1;
+ goto out;
+ }
+ if (encoded_otherdevid == NULL) {
+ rval = -1;
+ goto out;
+ }
+ if (devid_str_decode(encoded_otherdevid, &otherdevid,
+ NULL) == 0) {
+ /*
+ * If we are here, it means that dnp->devid
+ * is NULL. This will typically happen if
+ * we are dealing with SunCluster DID devices.
+ *
+ * We want to explicitly get the device id
+ * for such a disk
+ */
+ encoded_thisdevid = meta_get_devid(dnp->rname);
+ ret = devid_str_decode(encoded_thisdevid,
+ &thisdevid, NULL);
+ if (ret == 0) {
+ ret = devid_compare(thisdevid,
+ otherdevid);
+ devid_free(thisdevid);
+ }
+ devid_free(otherdevid);
+ if (encoded_thisdevid)
+ Free(encoded_thisdevid);
+ }
+
+ Free(encoded_otherdevid);
+ if (ret != 0) {
+ rval = mddserror(ep, MDE_DS_DRIVENOTCOMMON,
+ sp->setno, node, dnp->cname, sp->setname);
+ goto out;
+ }
+ } else {
+ /*
+ * For VTOC disks, we compare the dev_t and
+ * timestamp for the disks in question.
+ */
+ if (clnt_devinfo(node, sp, dnp, &otherdev,
+ &otherstamp, ep) == -1) {
+ rval = -1;
+ goto out;
+ }
+ if ((mystamp != otherstamp) || (np->dev != otherdev)) {
+ rval = mddserror(ep, MDE_DS_DRIVENOTCOMMON,
+ sp->setno, node, dnp->cname, sp->setname);
+ goto out;
+ }
+ }
+ remote_dnp = dnp;
+ }
+
+ if (clnt_drvused(node, sp, remote_dnp, ep) == -1)
+ rval = -1;
+
+out:
+ if (release)
+ if (!(MD_MNSET_DESC(sd)) && !MD_ATSET_DESC(sd)) {
+ if (tk_own_bydd(sp, &dd, &mhiargs, TRUE, ep))
+ rval = -1;
+ }
+
+ return (rval);
+}
+
+side_t
+getnodeside(char *node, md_set_desc *sd)
+{
+ side_t sideno;
+ int nid;
+ md_mnnode_desc *nd;
+
+ if (MD_MNSET_DESC(sd)) {
+ nd = sd->sd_nodelist;
+ while (nd) {
+ if (strcmp(nd->nd_nodename, node) == 0) {
+ return (nd->nd_nodeid);
+ }
+ nd = nd->nd_next;
+ }
+ return (MD_SIDEWILD);
+ }
+
+
+ /* If regular diskset */
+ for (sideno = 0; sideno < MD_MAXSIDES; sideno++) {
+ if (sd->sd_nodes[sideno] == NULL ||
+ sd->sd_nodes[sideno][0] == '\0')
+ continue;
+
+ if (strcmp(sd->sd_nodes[sideno], node) == 0) {
+ return (sideno);
+ }
+ }
+
+ /*
+ * If the first loop fails we may be in a situation where this host
+ * is configured as part of a cluster yet not running in the cluster
+ * mode. If so, the names stored in sd->sd_nodes[] are going to be
+ * nodeid's instead of hostnames. See if we can find a match that way.
+ */
+ if (_cladm(CL_CONFIG, CL_NODEID, &nid) == 0) {
+ for (sideno = 0; sideno < MD_MAXSIDES; sideno++) {
+ if (sd->sd_nodes[sideno] == NULL ||
+ sd->sd_nodes[sideno][0] == '\0')
+ continue;
+ if (atoi(sd->sd_nodes[sideno]) == nid)
+ return (sideno);
+ }
+ }
+
+ return (MD_SIDEWILD);
+}
+
+int
+halt_set(mdsetname_t *sp, md_error_t *ep)
+{
+ mddb_config_t c;
+
+ (void) memset(&c, 0, sizeof (c));
+ c.c_setno = sp->setno;
+ if ((c.c_sideno = getmyside(sp, ep)) == MD_SIDEWILD)
+ return (-1);
+
+ if (s_ownset(sp->setno, ep) == MD_SETOWNER_YES) {
+ /* Don't need device id information from this ioctl */
+ c.c_locator.l_devid = (uint64_t)0;
+ c.c_locator.l_devid_flags = 0;
+ /* Kill any resyncs that are running on mirrors in this set */
+ meta_mirror_resync_kill(sp);
+ if (metaioctl(MD_RELEASE_SET, &c, &c.c_mde, NULL) != 0)
+ return (mdstealerror(ep, &c.c_mde));
+ }
+
+ return (0);
+}
+
+md_drive_desc *
+metadrivedesc_append(
+ md_drive_desc **dd,
+ mddrivename_t *dnp,
+ int dbcnt,
+ int dbsize,
+ md_timeval32_t timestamp,
+ ulong_t genid,
+ uint_t flags
+)
+{
+ md_drive_desc *p;
+
+ /* run to end of list */
+ for (/* void */; (*dd != NULL); dd = &(*dd)->dd_next)
+ /* void */;
+
+ /* allocate new list element */
+ p = *dd = Zalloc(sizeof (*p));
+
+ p->dd_dnp = dnp;
+ p->dd_dbcnt = dbcnt;
+ p->dd_dbsize = dbsize;
+ p->dd_ctime = timestamp;
+ p->dd_genid = genid;
+ p->dd_flags = flags;
+ return (p);
+}
+
+int
+nodehasset(
+ mdsetname_t *sp,
+ char *node,
+ uint_t match_flag,
+ md_error_t *ep
+)
+{
+ md_set_desc *sd;
+ md_set_record *sr;
+ int rval = 0;
+
+ if ((sd = metaget_setdesc(sp, ep)) == NULL)
+ return (-1);
+
+ /* Don't care if set record is MN or not */
+ if (clnt_getset(node, sp->setname, MD_SET_BAD, &sr, ep))
+ return (-1);
+
+ if (sr == NULL) {
+ if (! mdisok(ep))
+ return (-1);
+ return (0);
+ }
+
+ /* Looking for name only match */
+ if ((match_flag & NHS_N_EQ) == NHS_N_EQ) {
+ rval = 1;
+ goto out;
+ }
+
+ if (sd->sd_setno != sr->sr_setno)
+ goto out;
+
+ /* Looking for name and setno match */
+ if ((match_flag & NHS_NS_EQ) == NHS_NS_EQ) {
+ rval = 1;
+ goto out;
+ }
+
+ if (sd->sd_ctime.tv_sec != sr->sr_ctime.tv_sec ||
+ sd->sd_ctime.tv_usec != sr->sr_ctime.tv_usec)
+ goto out;
+
+ /* Looking for name, setno, and timestamp match */
+ if ((match_flag & NHS_NST_EQ) == NHS_NST_EQ) {
+ rval = 1;
+ goto out;
+ }
+
+ if (sd->sd_genid != sr->sr_genid) {
+ if (sd->sd_genid < sr->sr_genid) {
+ /*
+ * Looking for name, setno, timestamp, and genid on
+ * other host is GT than other host.
+ */
+ if ((match_flag & NHS_NST_EQ_G_GT) == NHS_NST_EQ_G_GT) {
+ rval = 1;
+ goto out;
+ }
+ }
+ goto out;
+ }
+
+ /* Looking for name, setno, timestamp, and genid match */
+ if ((match_flag & NHS_NSTG_EQ) == NHS_NSTG_EQ)
+ rval = 1;
+
+out:
+ /*
+ * Set record structure was allocated from RPC routine getset
+ * so this structure is only of size md_set_record even if
+ * the MN flag is set. So, clear the flag so that the free
+ * code doesn't attempt to free a structure the size of
+ * md_mnset_record.
+ */
+ sr->sr_flags &= ~MD_SR_MN;
+ free_sr(sr);
+
+ return (rval);
+}
+
+int
+nodesuniq(mdsetname_t *sp, int cnt, char **strings, md_error_t *ep)
+{
+ int i, j;
+ for (i = 0; i < cnt; i++)
+ for (j = i + 1; j < cnt; j++)
+ if (strcmp(strings[i], strings[j]) == 0)
+ return (mddserror(ep, MDE_DS_DUPHOST,
+ sp->setno, strings[i], NULL, sp->setname));
+ return (0);
+}
+
+int
+own_set(mdsetname_t *sp, char **owner_of_set, int forceflg, md_error_t *ep)
+{
+ md_set_desc *sd;
+ int am_i_owner;
+ int i;
+
+ if (metaislocalset(sp)) {
+ if (owner_of_set != NULL)
+ *owner_of_set = Strdup(mynode());
+ return (MD_SETOWNER_YES);
+ }
+
+ if ((sd = metaget_setdesc(sp, ep)) == NULL)
+ return (-1);
+
+ if (clnt_ownset(mynode(), sp, &am_i_owner, ep) == -1)
+ return (-1);
+
+ if (MD_MNSET_DESC(sd)) {
+ if (am_i_owner == TRUE)
+ return (MD_SETOWNER_YES);
+ else
+ return (MD_SETOWNER_NO);
+ }
+
+ if (forceflg == TRUE) {
+ if (am_i_owner == TRUE) {
+ if (owner_of_set != NULL)
+ *owner_of_set = Strdup(mynode());
+ return (MD_SETOWNER_YES);
+ }
+
+ if (owner_of_set != NULL)
+ *owner_of_set = NULL;
+ return (MD_SETOWNER_NONE);
+ }
+
+ if (am_i_owner == TRUE) {
+ if (owner_of_set != NULL)
+ *owner_of_set = Strdup(mynode());
+ return (MD_SETOWNER_YES);
+ }
+
+
+ for (i = 0; i < MD_MAXSIDES; i++) {
+ /*
+ * Skip empty slots, and my own slot.
+ */
+ if (sd->sd_nodes[i][0] == '\0' ||
+ strcmp(sd->sd_nodes[i], mynode()) == 0)
+ continue;
+
+ if (clnt_ownset(sd->sd_nodes[i], sp, &am_i_owner, ep) == -1)
+ return (-1);
+
+ if (am_i_owner == TRUE) {
+ if (owner_of_set != NULL)
+ *owner_of_set = Strdup(sd->sd_nodes[i]);
+ return (MD_SETOWNER_NO);
+ }
+ }
+
+ /* We get here, we currently have no owner. */
+ if (owner_of_set != NULL)
+ *owner_of_set = NULL;
+ return (MD_SETOWNER_NONE);
+}
+
+void
+resync_genid(
+ mdsetname_t *sp,
+ md_set_desc *sd,
+ ulong_t max_genid,
+ int node_c,
+ char **node_v
+)
+{
+ int i, j;
+ ulong_t cur_genid[MD_MAXSIDES];
+ md_set_record *sr;
+ md_error_t xep = mdnullerror;
+ md_mnnode_desc *nd;
+ md_mnset_record *mnsr;
+
+ if (node_c > 0 && node_v && *node_v) {
+ /*
+ * Mark the set record MD_SR_OK.
+ */
+ for (i = 0; i < node_c; i++)
+ if (clnt_upd_sr_flags(node_v[i], sp, MD_SR_OK, &xep))
+ mdclrerror(&xep);
+ max_genid++;
+ }
+
+ if (MD_MNSET_DESC(sd)) {
+ nd = sd->sd_nodelist;
+ while (nd) {
+ if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+ nd = nd->nd_next;
+ continue;
+ }
+ /* Will only return a multi-node diskset record */
+ if (clnt_mngetset(nd->nd_nodename, sp->setname,
+ MD_SET_BAD, &mnsr, &xep) == -1) {
+ mdclrerror(&xep);
+ nd = nd->nd_next;
+ continue;
+ }
+ for (j = mnsr->sr_genid; j < max_genid; j++) {
+ if (clnt_upd_sr_flags(nd->nd_nodename, sp,
+ MD_SR_OK, &xep))
+ mdclrerror(&xep);
+ }
+ free_sr((struct md_set_record *)mnsr);
+ nd = nd->nd_next;
+ }
+ return;
+ }
+
+ /*
+ * Get current genid for each node.
+ */
+ for (i = 0; i < MD_MAXSIDES; i++) {
+ cur_genid[i] = 0;
+
+ /* Skip empty slots */
+ if (sd->sd_nodes[i][0] == '\0')
+ continue;
+
+ /* Should be a non-multinode diskset */
+ if (clnt_getset(sd->sd_nodes[i], sp->setname,
+ MD_SET_BAD, &sr, &xep) == -1) {
+ mdclrerror(&xep);
+ continue;
+ }
+
+ if (MD_MNSET_REC(sr)) {
+ /*
+ * Set record structure was allocated from RPC routine
+ * getset so this structure is only of size
+ * md_set_record even if the MN flag is set. So,
+ * clear the flag so that the free code doesn't
+ * attempt to free a structure the size of
+ * md_mnset_record.
+ */
+ sr->sr_flags &= ~MD_SR_MN;
+ free_sr(sr);
+ continue;
+ }
+
+ cur_genid[i] = sr->sr_genid;
+
+ free_sr(sr);
+ }
+
+ /*
+ * Mark the set record MD_SR_OK
+ */
+ for (i = 0; i < MD_MAXSIDES; i++) {
+ /* Skip empty slots */
+ if (sd->sd_nodes[i][0] == '\0')
+ continue;
+
+ for (j = cur_genid[i]; j < max_genid; j++)
+ if (clnt_upd_sr_flags(sd->sd_nodes[i], sp, MD_SR_OK,
+ &xep))
+ mdclrerror(&xep);
+
+ }
+}
+
+int
+setup_db_bydd(mdsetname_t *sp, md_drive_desc *dd, int force, md_error_t *ep)
+{
+ md_drive_desc *p;
+ struct mddb_config c;
+ int i;
+ md_set_desc *sd;
+ int use_devid = 1;
+ ddi_devid_t devidp;
+ char *minor_name = NULL;
+ size_t sz;
+ char *devid_str = NULL;
+
+ if ((sd = metaget_setdesc(sp, ep)) == NULL)
+ return (-1);
+
+ (void) memset(&c, 0, sizeof (c));
+
+ c.c_setno = sp->setno;
+ (void) strcpy(c.c_setname, sp->setname);
+ if ((c.c_sideno = getmyside(sp, ep)) == MD_SIDEWILD)
+ return (-1);
+
+ c.c_timestamp = sd->sd_ctime;
+
+ if (setup_med_cfg(sp, &c, force, ep))
+ return (-1);
+
+ for (p = dd; p != NULL; p = p->dd_next) {
+ mddrivename_t *dnp;
+ mdname_t *np;
+ mdcinfo_t *cinfo;
+ mdsidenames_t *sn = NULL;
+
+ if (p->dd_dbcnt == 0)
+ continue;
+
+ dnp = p->dd_dnp;
+
+ assert(dnp != NULL);
+
+ for (sn = dnp->side_names; sn != NULL; sn = sn->next) {
+ if (sn->sideno == c.c_sideno)
+ break;
+ }
+
+ /*
+ * The disk has no side name information
+ */
+ if (sn == NULL) {
+ uint_t rep_slice;
+
+ if ((meta_replicaslice(dnp, &rep_slice, ep) != 0) ||
+ ((np = metaslicename(dnp, rep_slice, ep))
+ == NULL)) {
+ mdclrerror(ep);
+ continue;
+ }
+
+ if (np->dev == NODEV64)
+ continue;
+
+ c.c_locator.l_dev = meta_cmpldev(np->dev);
+ c.c_locator.l_mnum = meta_getminor(np->dev);
+
+ if (!MD_MNSET_DESC(sd)) {
+ /*
+ * minor_name will be NULL if dnp->devid == NULL
+ * - see metagetvtoc()
+ */
+ if (np->minor_name != NULL) {
+ minor_name = Strdup(np->minor_name);
+ }
+ }
+
+ if ((cinfo = metagetcinfo(np, ep)) == NULL) {
+ mdclrerror(ep);
+ continue;
+ }
+
+ (void) strncpy(c.c_locator.l_driver, cinfo->dname,
+ sizeof (c.c_locator.l_driver));
+ } else {
+ c.c_locator.l_dev = NODEV32;
+ c.c_locator.l_mnum = sn->mnum;
+ (void) strncpy(c.c_locator.l_driver, sn->dname,
+ sizeof (c.c_locator.l_driver));
+
+ if (!MD_MNSET_DESC(sd)) {
+ if (dnp->devid != NULL) {
+ minor_name = meta_getdidminorbykey(
+ MD_LOCAL_SET, sn->sideno + SKEW,
+ dnp->side_names_key, ep);
+ }
+ }
+ }
+
+ if ((dnp->devid == NULL) || MD_MNSET_DESC(sd)) {
+ use_devid = 0;
+ }
+
+ if (use_devid) {
+ /*
+ * The devid associated with the dnp does not have
+ * a minor name and so we must add it in.
+ */
+ size_t len = strlen(dnp->devid) +
+ strlen(minor_name) + 2;
+ devid_str = (char *)Malloc(len);
+ (void) snprintf(devid_str, len, "%s/%s", dnp->devid,
+ minor_name);
+ (void) devid_str_decode(devid_str, &devidp, NULL);
+
+ sz = devid_sizeof(devidp);
+ c.c_locator.l_devid = (uintptr_t)malloc(sz);
+ c.c_locator.l_devid_sz = sz;
+ (void) memcpy((void *)c.c_locator.l_devid, devidp, sz);
+ if (minor_name == NULL) {
+ /* ERROR fix up */
+ Free(devid_str);
+ return (-1);
+ }
+ (void) strcpy(c.c_locator.l_minor_name, minor_name);
+ c.c_locator.l_devid_flags = MDDB_DEVID_VALID |
+ MDDB_DEVID_SPACE | MDDB_DEVID_SZ;
+ } else {
+ /*
+ * Don't need device id information from
+ * this ioctl
+ */
+ c.c_locator.l_devid = (uint64_t)0;
+ c.c_locator.l_devid_flags = 0;
+ }
+
+
+ for (i = 0; i < p->dd_dbcnt; i++) {
+ c.c_locator.l_flags = 0;
+ c.c_locator.l_blkno = 16 + i * p->dd_dbsize;
+
+ if (metaioctl(MD_DB_USEDEV, &c, &c.c_mde, NULL) != 0) {
+ if (use_devid) {
+ Free(devid_str);
+ }
+ Free(minor_name);
+ return (mdstealerror(ep, &c.c_mde));
+ }
+ }
+ if (use_devid) {
+ Free(devid_str);
+ }
+ Free(minor_name);
+ }
+
+ /* return success */
+ return (0);
+}
+
+int
+snarf_set(mdsetname_t *sp, bool_t stale_bool, md_error_t *ep)
+{
+ mddb_config_t c;
+
+ (void) memset(&c, '\0', sizeof (c));
+
+ c.c_setno = sp->setno;
+ if ((c.c_sideno = getmyside(sp, ep)) == MD_SIDEWILD)
+ return (-1);
+
+ /* Don't need device id information from this ioctl */
+ c.c_locator.l_devid = (uint64_t)0;
+ c.c_locator.l_devid_flags = 0;
+ if (stale_bool == TRUE) {
+ c.c_flags = MDDB_C_STALE;
+ }
+ if (metaioctl(MD_GRAB_SET, &c, &c.c_mde, NULL) != 0)
+ return (mdstealerror(ep, &c.c_mde));
+
+ if (c.c_flags & MDDB_C_STALE)
+ return (mdmddberror(ep, MDE_DB_STALE, NODEV64, sp->setno,
+ 0, NULL));
+
+ return (0);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_set_tkr.c b/usr/src/lib/lvm/libmeta/common/meta_set_tkr.c
new file mode 100644
index 0000000000..b13c483af0
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_set_tkr.c
@@ -0,0 +1,1079 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * Metadevice diskset interfaces
+ */
+
+#include "meta_set_prv.h"
+#include <sys/lvm/md_crc.h>
+
+
+static int
+upd_dr_dbinfo(
+ mdsetname_t *sp,
+ md_set_desc *sd,
+ md_drive_desc *dd,
+ md_replicalist_t *rlp,
+ int forceflg,
+ md_error_t *ep
+)
+{
+ md_drive_desc *p;
+ md_replica_t *r;
+ md_replicalist_t *rl;
+ int i;
+ int dbcnt;
+ int rval = 0;
+ daddr_t nblks = 0;
+ md_setkey_t *cl_sk;
+ md_error_t xep = mdnullerror;
+ md_mnnode_desc *nd;
+ ddi_devid_t devid;
+
+ /* find the smallest existing replica */
+ for (rl = rlp; rl != NULL; rl = rl->rl_next) {
+ r = rl->rl_repp;
+ nblks = ((nblks == 0) ? r->r_nblk : min(r->r_nblk, nblks));
+ }
+
+ if (nblks <= 0)
+ nblks = (MD_MNSET_DESC(sd)) ? MD_MN_DBSIZE : MD_DBSIZE;
+
+ for (p = dd; p != NULL; p = p->dd_next) {
+ dbcnt = 0;
+ for (rl = rlp; rl != NULL; rl = rl->rl_next) {
+ r = rl->rl_repp;
+
+ /*
+ * Before we bump up the dbcnt, if we're
+ * running with device ids in disksets, let's
+ * compare the device ids otherwise we compare
+ * the ctd names.
+ *
+ * There is a possibility the device ids might
+ * have changed. To account for that case, we
+ * fallback to comparing the ctd names if the
+ * device id comparison fails. If we aren't running
+ * in device id mode and a disk has moved, the ctd's
+ * won't match.
+ */
+ if ((p->dd_dnp->devid != NULL) &&
+ (r->r_devid != NULL) && (!MD_MNSET_DESC(sd))) {
+ (void) devid_str_decode(p->dd_dnp->devid,
+ &devid, NULL);
+ if ((devid_compare(devid, r->r_devid) == 0) ||
+ (strcmp(r->r_namep->drivenamep->cname,
+ p->dd_dnp->cname) == 0))
+ dbcnt++;
+ devid_free(devid);
+ } else {
+ if (strcmp(r->r_namep->drivenamep->cname,
+ p->dd_dnp->cname) == 0)
+ dbcnt++;
+ }
+ }
+ p->dd_dbcnt = dbcnt;
+ p->dd_dbsize = dbcnt > 0 ? nblks : 0;
+ }
+
+ /* Lock the set on current set members */
+ if (MD_MNSET_DESC(sd)) {
+ nd = sd->sd_nodelist;
+ while (nd) {
+ /* If this is forced, don't lock other sides */
+ if (forceflg && strcmp(mynode(), nd->nd_nodename)
+ != 0) {
+ nd = nd->nd_next;
+ continue;
+ }
+
+ /* We already locked this side in the caller */
+ if (strcmp(mynode(), nd->nd_nodename) == 0) {
+ nd = nd->nd_next;
+ continue;
+ }
+
+ if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+ nd = nd->nd_next;
+ continue;
+ }
+
+ if (clnt_lock_set(nd->nd_nodename, sp, ep)) {
+ rval = -1;
+ goto out;
+ }
+ nd = nd->nd_next;
+ }
+ } else {
+ for (i = 0; i < MD_MAXSIDES; i++) {
+ /* Skip empty slots */
+ if (sd->sd_nodes[i][0] == '\0')
+ continue;
+
+ /* If this is forced, don't lock other sides */
+ if (forceflg && strcmp(mynode(), sd->sd_nodes[i]) != 0)
+ continue;
+
+ /* We already locked this side in the caller */
+ if (strcmp(mynode(), sd->sd_nodes[i]) == 0)
+ continue;
+
+ if (clnt_lock_set(sd->sd_nodes[i], sp, ep)) {
+ rval = -1;
+ goto out;
+ }
+ }
+ }
+
+ if (MD_MNSET_DESC(sd)) {
+ nd = sd->sd_nodelist;
+ while (nd) {
+ /* If this is forced, then only care about this node */
+ if (forceflg && strcmp(mynode(), nd->nd_nodename)
+ != 0) {
+ nd = nd->nd_next;
+ continue;
+ }
+
+ if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+ nd = nd->nd_next;
+ continue;
+ }
+
+ if (clnt_upd_dr_dbinfo(nd->nd_nodename, sp, dd,
+ ep) == -1) {
+ if (! mdiserror(ep, MDE_NO_SET) &&
+ ! mdismddberror(ep, MDE_DB_NODB)) {
+ rval = -1;
+ break;
+ }
+ mdclrerror(ep);
+ }
+ nd = nd->nd_next;
+ }
+ } else {
+ for (i = 0; i < MD_MAXSIDES; i++) {
+ /* Skip empty slots */
+ if (sd->sd_nodes[i][0] == '\0')
+ continue;
+
+ /* If this is forced, then only care about this node */
+ if (forceflg && strcmp(mynode(), sd->sd_nodes[i]) != 0)
+ continue;
+
+ if (clnt_upd_dr_dbinfo(sd->sd_nodes[i], sp, dd,
+ ep) == -1) {
+ if (! mdiserror(ep, MDE_NO_SET) &&
+ ! mdismddberror(ep, MDE_DB_NODB)) {
+ rval = -1;
+ break;
+ }
+ mdclrerror(ep);
+ }
+ }
+ }
+
+out:
+ cl_sk = cl_get_setkey(sp->setno, sp->setname);
+ if (MD_MNSET_DESC(sd)) {
+ nd = sd->sd_nodelist;
+ while (nd) {
+ /* If this is forced, don't unlock other sides */
+ if (forceflg && strcmp(mynode(), nd->nd_nodename)
+ != 0) {
+ nd = nd->nd_next;
+ continue;
+ }
+
+ /* We will unlocked this side in the caller */
+ if (strcmp(mynode(), nd->nd_nodename) == 0) {
+ nd = nd->nd_next;
+ continue;
+ }
+
+ if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+ nd = nd->nd_next;
+ continue;
+ }
+
+ if (clnt_unlock_set(nd->nd_nodename, cl_sk, &xep)) {
+ if (rval == 0)
+ (void) mdstealerror(ep, &xep);
+ rval = -1;
+ }
+ nd = nd->nd_next;
+ }
+ } else {
+ for (i = 0; i < MD_MAXSIDES; i++) {
+ /* Skip empty slots */
+ if (sd->sd_nodes[i][0] == '\0')
+ continue;
+
+ /* If this is forced, don't unlock other sides */
+ if (forceflg && strcmp(mynode(), sd->sd_nodes[i]) != 0)
+ continue;
+
+ /* We will unlocked this side in the caller */
+ if (strcmp(mynode(), sd->sd_nodes[i]) == 0)
+ continue;
+
+ if (clnt_unlock_set(sd->sd_nodes[i], cl_sk, &xep)) {
+ if (rval == 0)
+ (void) mdstealerror(ep, &xep);
+ rval = -1;
+ }
+ }
+ }
+ /* Do not clear the key, via cl_set_setkey(NULL) this is nested */
+
+ return (rval);
+}
+
+static int
+usetag_take(set_t setno, int usetag, md_error_t *ep)
+{
+ mddb_dtag_use_parm_t dtup;
+
+ (void) memset(&dtup, '\0', sizeof (mddb_dtag_use_parm_t));
+ dtup.dtup_id = usetag;
+ dtup.dtup_setno = setno;
+
+ if (metaioctl(MD_MED_USE_TAG, &dtup, &dtup.dtup_mde, NULL) != 0)
+ return (mdstealerror(ep, &dtup.dtup_mde));
+
+ return (0);
+}
+
+static int
+useit_take(set_t setno, md_error_t *ep)
+{
+ mddb_accept_parm_t accp;
+
+ (void) memset(&accp, '\0', sizeof (mddb_accept_parm_t));
+ accp.accp_setno = setno;
+
+ if (metaioctl(MD_MED_ACCEPT, &accp, &accp.accp_mde, NULL) != 0)
+ return (mdstealerror(ep, &accp.accp_mde));
+
+ return (0);
+}
+
+/*
+ * Update the master block with the device id information for the disks
+ * in the diskset. The device id information will be consumed by the
+ * diskset import code in case of remotely replicated disksets.
+ *
+ * For the drives that have a valid diskset mddb on them, we add the
+ * device id for the drive to the unused portion of the mddb.
+ *
+ * For the drives that don't have a diskset mddb on them, we add a dummy
+ * master block that contains the device id for the drive. A dummy master
+ * block is signified by changing the master block magic number, mb_magic,
+ * to MDDB_MAGIC_DU.
+ *
+ * This code is responsible primarily for adding the appropriate device id
+ * information to diskset disks that didn't have the information. This would
+ * typically occur when the OS has been upgraded from an OS release prior to
+ * Solaris 10
+ *
+ * The error path in this routine is defined as - if an error occurs while
+ * updating the mddb for one disk in the diskset, don't bother updating *any*
+ * of the mddbs because it's game over anyways as far as disaster recovery for
+ * that diskset is concerned.
+ *
+ * This code will need to be revisited if and when support for importing
+ * partial disksets is added.
+ *
+ * NOTE: This code relies heavily on the meta_repartition() working correctly
+ * and reformatting a drive, so that there's enough room for a dummy master
+ * block, every time a drive is added to a diskset. Should
+ * the meta_repartition() code change in future, this code will have to be
+ * revisited.
+ *
+ * Returns 0 on success and -1 on failure
+ */
+int
+meta_update_mb(mdsetname_t *sp, md_drive_desc *drivedesc, md_error_t *ep)
+{
+ uint_t sliceno, offset;
+ void *mb;
+ mddb_mb_t *mbp;
+ int fd = -1;
+ ddi_devid_t devid = NULL;
+ md_drive_desc *dd;
+ mddrivename_t *dnp;
+ mdname_t *rsp;
+ int dbcnt;
+ int dbsize;
+ size_t len;
+ md_set_desc *sd;
+
+ /*
+ * Don't do anything for MN diskset for now.
+ */
+ if (! metaislocalset(sp)) {
+ if ((sd = metaget_setdesc(sp, ep)) == NULL)
+ return (-1);
+
+ if (MD_MNSET_DESC(sd))
+ return (0);
+ }
+
+ mb = Malloc(DEV_BSIZE);
+ mbp = (mddb_mb_t *)mb;
+
+ /*
+ * For every drive in the drive descriptor, iterate through all
+ * the mddbs present on it and check to see if mb_devid_magic is
+ * set. If it isn't, then update the master block with the correct
+ * device id information
+ */
+ for (dd = drivedesc; dd != NULL; dd = dd->dd_next) {
+ int i = 0;
+
+ dnp = dd->dd_dnp;
+ dbcnt = dd->dd_dbcnt;
+ dbsize = dd->dd_dbsize;
+
+ /*
+ * When the import support for remotely replicated
+ * disksets gets implemented, we probably want to
+ * inform the user that the disks won't be self
+ * identifying if any of these calls fails
+ */
+ if (meta_replicaslice(dnp, &sliceno, ep) != 0)
+ return (-1);
+
+ if ((rsp = metaslicename(dnp, sliceno, ep)) == NULL)
+ return (-1);
+
+ if ((fd = open(rsp->rname, O_RDWR)) < 0)
+ goto cleanup;
+
+ /* if devid_str_decode fails, make sure devid is null */
+ if (devid_str_decode(dnp->devid, &devid, NULL) != 0) {
+ devid = NULL;
+ }
+
+ do {
+ int push = 0;
+
+ offset = (i * dbsize + 16);
+ ++i;
+
+ if (lseek(fd, (off_t)dbtob(offset), SEEK_SET) < 0)
+ goto cleanup;
+
+ if (read(fd, mbp, DEV_BSIZE) != DEV_BSIZE)
+ goto cleanup;
+
+ if (crcchk((uchar_t *)mbp, (uint_t *)&mbp->mb_checksum,
+ (uint_t)DEV_BSIZE, (crc_skip_t *)NULL))
+ goto cleanup;
+
+ /*
+ * If the disk is one of the ones that doesn't
+ * have a shared mddb on it, we put a dummy
+ * master block on it.
+ */
+ if (mbp->mb_devid_magic != MDDB_MAGIC_DE) {
+ if (dbcnt == 0) {
+ meta_mkdummymaster(sp, fd, 16);
+ break;
+ }
+ }
+
+ /*
+ * if mb_setcreatetime is 0, this field was never
+ * filled in so do it now.
+ */
+ if ((mbp->mb_setcreatetime.tv_sec == 0) &&
+ (mbp->mb_setcreatetime.tv_usec == 0)) {
+ mbp->mb_setcreatetime =
+ meta_get_lb_inittime(sp, ep);
+ push = 1;
+ }
+
+ /*
+ * If MDDB_MAGIC_DE is set in the
+ * mb_devid_magic field then we know we
+ * have a valid device id and we don't
+ * need to add it to the master block.
+ *
+ * This would have to be revisited if device
+ * ids change as a result of device id
+ * algorithms changing or somesuch.
+ */
+ if (mbp->mb_devid_magic != MDDB_MAGIC_DE) {
+ if (devid != NULL) {
+ len = devid_sizeof(devid);
+ if (len <= (DEV_BSIZE -
+ sizeof (mddb_mb_t))) {
+ /*
+ * there's enough space to
+ * store the devid
+ */
+ mbp->mb_devid_magic =
+ MDDB_MAGIC_DE;
+ mbp->mb_devid_len = len;
+ (void) memcpy(mbp->mb_devid,
+ (char *)devid, len);
+ push = 1;
+ }
+ }
+ }
+
+ /*
+ * write out (push) any changes we have to the mb
+ */
+ if (push) {
+ crcgen((uchar_t *)mbp,
+ (uint_t *)&mbp->mb_checksum,
+ (uint_t)DEV_BSIZE, (crc_skip_t *)NULL);
+
+ if (lseek(fd, (off_t)dbtob(offset), SEEK_SET)
+ < 0)
+ goto cleanup;
+
+ if (write(fd, mbp, DEV_BSIZE) != DEV_BSIZE)
+ goto cleanup;
+ }
+ if (devid)
+ devid_free(devid);
+ } while (i < dbcnt);
+ (void) close(fd);
+ }
+ /* success */
+ return (0);
+
+cleanup:
+ if (fd != -1)
+ (void) close(fd);
+ if (devid)
+ devid_free(devid);
+ return (-1);
+}
+
+/*
+ * Exported Entry Points
+ */
+int
+meta_set_take(
+ mdsetname_t *sp,
+ mhd_mhiargs_t *mhiargsp,
+ int flags,
+ int usetag,
+ md_error_t *ep
+)
+{
+ md_set_desc *sd;
+ md_drive_desc *dd;
+ md_drive_desc *d = NULL;
+ char *owner = NULL;
+ int rval = 0;
+ int i;
+ int has_set;
+ int matches = 0;
+ int numsides = 0;
+ md_replicalist_t *rlp = NULL;
+ sigset_t oldsigs;
+ md_setkey_t *cl_sk;
+ int rb_level = 0;
+ md_error_t xep = mdnullerror;
+ mdsetname_t *local_sp = NULL;
+ side_t side = MD_KEYWILD;
+ int ret = 0;
+ char *newname = NULL;
+ mdkey_t side_names_key;
+
+ if ((flags & TAKE_USETAG) || (flags & TAKE_USEIT)) {
+ if (flags & TAKE_USETAG) {
+ if (usetag_take(sp->setno, usetag, ep))
+ return (-1);
+ } else {
+ if (useit_take(sp->setno, ep))
+ return (-1);
+ }
+
+ if (meta_resync_all(sp, MD_DEF_RESYNC_BUF_SIZE, ep) != 0)
+ mdclrerror(ep);
+ }
+
+ /* Do we own the set? */
+ i = own_set(sp, &owner, (flags & TAKE_FORCE), ep);
+ if (! mdisok(ep)) {
+ if (owner != NULL)
+ Free(owner);
+ return (-1);
+ }
+
+ if (i == MD_SETOWNER_NO) {
+ (void) mddserror(ep, MDE_DS_NOTOWNER, sp->setno, owner, NULL,
+ sp->setname);
+ if (owner != NULL)
+ Free(owner);
+ return (-1);
+ }
+
+ if (owner != NULL) {
+ Free(owner);
+ owner = NULL;
+ }
+
+ /* We already own it, we are done. */
+ if (i == MD_SETOWNER_YES)
+ return (0);
+
+ if ((sd = metaget_setdesc(sp, &xep)) == NULL)
+ return (-1);
+
+ /* You can not take ownership of a set that has no drives */
+ if (sd->sd_flags & MD_SR_MB_DEVID)
+ dd = metaget_drivedesc(sp, MD_BASICNAME_OK | PRINT_FAST, ep);
+ else
+ dd = metaget_drivedesc(sp, MD_BASICNAME_OK, ep);
+
+ if (dd == NULL) {
+ if (! mdisok(ep))
+ return (-1);
+ return (0);
+ }
+
+ /* END CHECK CODE */
+
+ md_rb_sig_handling_on();
+
+ /* Lock the set on our side */
+ if (clnt_lock_set(mynode(), sp, ep)) {
+ rval = -1;
+ goto out;
+ }
+ /*
+ * Get the current side number - do not use getmyside()
+ * as this code is essentially getnodeside() and this saves
+ * some instructions.
+ */
+ for (i = 0; i < MD_MAXSIDES; i++) {
+ if (sd->sd_nodes[i][0] == '\0')
+ continue;
+ if (strcmp(sd->sd_nodes[i], mynode()) == 0) {
+ /*
+ * SKEW is required for the local set
+ * as side 0 in this set is the node
+ * associated with it (this node).
+ */
+ side = i + SKEW;
+ break;
+ }
+ }
+ if (side == MD_KEYWILD)
+ return (mddserror(ep, MDE_DS_HOSTNOSIDE, sp->setno, mynode(),
+ NULL, mynode()));
+
+ /*
+ * Check the local devid namespace to see if the disks
+ * have been moved. Use the local set first of all as this contains
+ * entries for the disks in the set.
+ *
+ * This is being done before the tk_own_bydd because the disks
+ * in the dd list could be wrong! But it should be done with the lock
+ * held for the set.
+ */
+ local_sp = metasetname(MD_LOCAL_NAME, ep);
+ for (d = dd; d != NULL; d = d->dd_next) {
+ /*
+ * Actually do the check of the disks.
+ */
+ ret = meta_upd_ctdnames(&local_sp, 0, side, d->dd_dnp, &newname,
+ ep);
+
+ if ((ret == METADEVADM_ERR) ||
+ (ret == METADEVADM_DSKNAME_ERR)) {
+ /* check failed in some unknown manner */
+ rval = -1;
+ goto out;
+ } else if (ret == METADEVADM_DISKMOVE) {
+
+ /*
+ * Update the dd namelist so that the rpc.metamhd
+ * gets the correct disks to reserve - it is the rname
+ * we are interested in.
+ */
+ if (newname != NULL) {
+ /*
+ * Need to save the side names key as this
+ * points to the namespace entry that will
+ * need to be updated. In addition the call
+ * to meta_make_sidenmlist does not actually
+ * set the namespace key.
+ */
+ side_names_key = d->dd_dnp->side_names_key;
+ metafreedrivename(d->dd_dnp);
+ d->dd_dnp = metadrivename(&sp,
+ metadiskname(newname), ep);
+ Free(newname);
+ /*
+ * null newname so we are reset for next time
+ * through
+ */
+ newname = NULL;
+ ret = meta_make_sidenmlist(sp, d->dd_dnp, ep);
+ d->dd_dnp->side_names_key = side_names_key;
+ if (ret == -1) {
+ rval = -1;
+ goto out;
+ }
+ }
+ }
+ }
+
+
+ RB_TEST(1, "take", ep)
+
+ RB_PREEMPT;
+ rb_level = 1; /* level 1 */
+
+ RB_TEST(2, "take", ep)
+
+ if (!MD_ATSET_DESC(sd)) {
+ if (tk_own_bydd(sp, dd, mhiargsp, FALSE, ep))
+ goto rollback;
+ }
+
+ RB_TEST(3, "take", ep)
+
+ RB_PREEMPT;
+ rb_level = 2; /* level 2 */
+
+ RB_TEST(4, "take", ep)
+
+ if (clnt_stimeout(mynode(), sp, mhiargsp, ep) == -1)
+ goto rollback;
+
+ if (setup_db_bydd(sp, dd, (flags & TAKE_FORCE), ep) == -1) {
+ if (! mdismddberror(ep, MDE_DB_ACCOK) &&
+ ! mdismddberror(ep, MDE_DB_TAGDATA))
+ goto rollback;
+ mdclrerror(ep);
+ }
+
+ RB_TEST(5, "take", ep)
+
+ RB_PREEMPT;
+ rb_level = 3; /* level 3 */
+
+ RB_TEST(6, "take", ep)
+
+ /* Snarf set of traditional diskset doesn't use stale information */
+ if (snarf_set(sp, FALSE, ep)) {
+ if (mdismddberror(ep, MDE_DB_STALE) ||
+ mdismddberror(ep, MDE_DB_ACCOK) ||
+ mdismddberror(ep, MDE_DB_TAGDATA)) {
+ rval = -1;
+ goto out;
+ }
+
+ if (! mdismddberror(ep, MDE_DB_NODB) &&
+ ! mdismddberror(ep, MDE_DB_NOTOWNER))
+ goto rollback;
+
+ /*
+ * Look at the set on all other hosts, if every other host
+ * has the same set with a larger genid, then we destroy this
+ * copy.
+ */
+ for (i = 0; i < MD_MAXSIDES; i++) {
+ /* Skip empty slots */
+ if (sd->sd_nodes[i][0] == '\0')
+ continue;
+
+ /* Skip this node */
+ if (strcmp(sd->sd_nodes[i], mynode()) == 0)
+ continue;
+
+ numsides++;
+
+ has_set = nodehasset(sp, sd->sd_nodes[i],
+ NHS_NST_EQ_G_GT, &xep);
+
+ if (has_set < 0) {
+ if (! mdiserror(&xep, MDE_NO_SET) &&
+ ! mdismddberror(&xep, MDE_DB_NODB))
+ goto rollback;
+ matches++;
+ mdclrerror(&xep);
+ continue;
+ }
+
+ if (has_set)
+ matches++;
+ }
+
+ /* Destroy the set */
+ if (numsides > 0 && (numsides - matches) == 0) {
+ if (meta_set_destroy(sp, FALSE, &xep))
+ mdclrerror(&xep);
+ (void) mddserror(ep, MDE_DS_SETCLEANUP, sp->setno,
+ sp->setname, NULL, mynode());
+ rval = -1;
+ goto out;
+ }
+ goto rollback;
+ }
+
+ rval = pathname_reload(&sp, sp->setno, ep);
+ if ((rval == METADEVADM_ERR) || (rval == METADEVADM_DSKNAME_ERR)) {
+ goto rollback;
+ }
+
+
+ if (metareplicalist(sp, (MD_BASICNAME_OK | PRINT_FAST), &rlp, ep) < 0)
+ goto rollback;
+
+ if (upd_dr_dbinfo(sp, sd, dd, rlp, (flags & TAKE_FORCE), ep) < 0) {
+ metafreereplicalist(rlp);
+ goto rollback;
+ }
+
+ metafreereplicalist(rlp);
+
+ /*
+ * If the set doesn't have the MD_SR_MB_DEVID bit set, i.e
+ * the drives in the set don't have the device id information,
+ * then stick it in if possible.
+ *
+ * If updating the master block fails for whatever reason, it's
+ * okay. It just means the disk(s) in the diskset won't be self
+ * identifying.
+ */
+ if (!(sd->sd_flags & MD_SR_MB_DEVID)) {
+ /* Lock the set on current set members */
+ for (i = 0; i < MD_MAXSIDES; i++) {
+ /* Skip empty slots */
+ if (sd->sd_nodes[i][0] == '\0')
+ continue;
+
+ /* We already locked this side */
+ if (strcmp(mynode(), sd->sd_nodes[i]) == 0)
+ continue;
+
+ if (clnt_lock_set(sd->sd_nodes[i], sp, ep)) {
+ rval = -1;
+ goto out;
+ }
+ }
+ rb_level = 4; /* level 4 */
+
+ if (meta_update_mb(sp, dd, ep) == 0)
+ /* update the sr_flags on all hosts */
+ for (i = 0; i < MD_MAXSIDES; i++) {
+ /* Skip empty slots */
+ if (sd->sd_nodes[i][0] == '\0')
+ continue;
+
+ if (clnt_upd_sr_flags(sd->sd_nodes[i],
+ sp, (sd->sd_flags | MD_SR_MB_DEVID), ep))
+ goto rollback;
+ }
+
+ cl_sk = cl_get_setkey(sp->setno, sp->setname);
+ for (i = 0; i < MD_MAXSIDES; i++) {
+ /* Skip empty slots */
+ if (sd->sd_nodes[i][0] == '\0')
+ continue;
+
+ /* Unlocked of this side is done later */
+ if (strcmp(mynode(), sd->sd_nodes[i]) == 0)
+ continue;
+
+ if (clnt_unlock_set(sd->sd_nodes[i], cl_sk, &xep)) {
+ if (rval == 0)
+ (void) mdstealerror(ep, &xep);
+ rval = -1;
+ }
+ }
+ }
+
+ /*
+ * If we get here, we need to unlock the set before the resync
+ * gets called, otherwise the "daemon" will hold the set lock
+ * until the resync is done!
+ */
+
+ cl_sk = cl_get_setkey(sp->setno, sp->setname);
+ if (clnt_unlock_set(mynode(), cl_sk, &xep)) {
+ if (rval == 0)
+ (void) mdstealerror(ep, &xep);
+ rval = -1;
+ }
+ cl_set_setkey(NULL);
+
+ md_rb_sig_handling_off(md_got_sig(), md_which_sig());
+
+ /* We try to get things resync'ed, but this can fail */
+ mdclrerror(&xep);
+ if (meta_resync_all(sp, MD_DEF_RESYNC_BUF_SIZE, &xep) != 0) {
+ if (rval == 0)
+ (void) mdstealerror(ep, &xep);
+ rval = -1;
+ }
+
+ RB_TEST(7, "take", ep)
+
+ return (rval);
+
+out:
+ cl_sk = cl_get_setkey(sp->setno, sp->setname);
+ if (clnt_unlock_set(mynode(), cl_sk, &xep)) {
+ if (rval == 0)
+ (void) mdstealerror(ep, &xep);
+ rval = -1;
+ }
+ if (!(sd->sd_flags & MD_SR_MB_DEVID) && (rb_level > 2)) {
+ for (i = 0; i < MD_MAXSIDES; i++) {
+ /* Skip empty slots */
+ if (sd->sd_nodes[i][0] == '\0')
+ continue;
+
+ /* We already unlocked this side */
+ if (strcmp(mynode(), sd->sd_nodes[i]) == 0)
+ continue;
+
+ if (clnt_unlock_set(sd->sd_nodes[i], cl_sk, &xep)) {
+ if (rval == 0)
+ (void) mdstealerror(ep, &xep);
+ rval = -1;
+ }
+ }
+ }
+ cl_set_setkey(NULL);
+
+ md_rb_sig_handling_off(md_got_sig(), md_which_sig());
+
+ return (rval);
+
+rollback:
+ /* Make sure we are blocking all signals */
+ if (procsigs(TRUE, &oldsigs, &xep) < 0)
+ mdclrerror(&xep);
+
+ rval = -1;
+
+ /* level 4 */
+ if (rb_level > 3) {
+ if (sd->sd_flags & MD_SR_MB_DEVID) {
+ /* update the sr_flags on all hosts */
+ for (i = 0; i < MD_MAXSIDES; i++) {
+ /* Skip empty slots */
+ if (sd->sd_nodes[i][0] == '\0')
+ continue;
+
+ if (clnt_upd_sr_flags(sd->sd_nodes[i], sp,
+ (sd->sd_flags & ~MD_SR_MB_DEVID), &xep))
+ mdclrerror(&xep);
+ }
+ }
+
+ cl_sk = cl_get_setkey(sp->setno, sp->setname);
+ for (i = 0; i < MD_MAXSIDES; i++) {
+ /* Skip empty slots */
+ if (sd->sd_nodes[i][0] == '\0')
+ continue;
+
+ /* We will unlocked this side below */
+ if (strcmp(mynode(), sd->sd_nodes[i]) == 0)
+ continue;
+
+ if (clnt_unlock_set(sd->sd_nodes[i], cl_sk, &xep))
+ mdclrerror(&xep);
+ }
+ }
+
+ /* level 3 */
+ if (rb_level > 2) {
+ if (halt_set(sp, &xep))
+ mdclrerror(&xep);
+ }
+
+ /* level 2 */
+ if (rb_level > 1) {
+ if (clnt_stimeout(mynode(), sp, &defmhiargs, &xep) == -1)
+ mdclrerror(&xep);
+ }
+
+ /* level 1 */
+ if (rb_level > 0) {
+ if (!MD_ATSET_DESC(sd)) {
+ if (rel_own_bydd(sp, dd, FALSE, &xep))
+ mdclrerror(&xep);
+ }
+ }
+
+ /* level 0 */
+ cl_sk = cl_get_setkey(sp->setno, sp->setname);
+ if (clnt_unlock_set(mynode(), cl_sk, &xep))
+ mdclrerror(&xep);
+ cl_set_setkey(NULL);
+
+ /* release signals back to what they were on entry */
+ if (procsigs(FALSE, &oldsigs, &xep) < 0)
+ mdclrerror(&xep);
+
+ md_rb_sig_handling_off(md_got_sig(), md_which_sig());
+
+ return (rval);
+}
+
+int
+meta_set_release(
+ mdsetname_t *sp,
+ md_error_t *ep
+)
+{
+ int rval = 0;
+ md_drive_desc *dd;
+ mhd_mhiargs_t mhiargs;
+ sigset_t oldsigs;
+ md_setkey_t *cl_sk;
+ int rb_level = 0;
+ md_error_t xep = mdnullerror;
+
+ /* Make sure we own the set */
+ if (meta_check_ownership(sp, ep) != 0)
+ return (-1);
+
+ /* Get the drive descriptors */
+ if ((dd = metaget_drivedesc(sp, (MD_BASICNAME_OK | PRINT_FAST),
+ ep)) == NULL)
+ if (! mdisok(ep))
+ return (-1);
+
+ /* Get timeout values in case we need to roll back this release */
+ (void) memset(&mhiargs, '\0', sizeof (mhiargs));
+ if (clnt_gtimeout(mynode(), sp, &mhiargs, ep) != 0)
+ return (-1);
+
+ /* END CHECK CODE */
+
+ md_rb_sig_handling_on();
+
+ /* Lock the set on our side */
+ if (clnt_lock_set(mynode(), sp, ep)) {
+ rval = -1;
+ goto out;
+ }
+
+ RB_TEST(1, "release", ep)
+
+ RB_PREEMPT;
+ rb_level = 1; /* level 1 */
+
+ RB_TEST(2, "release", ep)
+
+ if (halt_set(sp, ep))
+ goto rollback;
+
+ RB_TEST(3, "release", ep)
+
+ RB_PREEMPT;
+ rb_level = 2; /* level 2 */
+
+ RB_TEST(4, "release", ep)
+
+ if (rel_own_bydd(sp, dd, FALSE, ep))
+ goto rollback;
+
+ RB_TEST(5, "release", ep)
+
+ RB_PREEMPT;
+ rb_level = 3; /* level 3 */
+
+ RB_TEST(6, "release", ep)
+
+ if (clnt_stimeout(mynode(), sp, &defmhiargs, ep) == -1)
+ goto rollback;
+
+ RB_TEST(7, "release", ep)
+
+out:
+ cl_sk = cl_get_setkey(sp->setno, sp->setname);
+ if (clnt_unlock_set(mynode(), cl_sk, &xep)) {
+ if (rval == 0)
+ (void) mdstealerror(ep, &xep);
+ rval = -1;
+ }
+ cl_set_setkey(NULL);
+
+ md_rb_sig_handling_off(md_got_sig(), md_which_sig());
+
+ return (rval);
+
+rollback:
+ /* Make sure we are blocking all signals */
+ if (procsigs(TRUE, &oldsigs, &xep) < 0)
+ mdclrerror(&xep);
+
+ rval = -1;
+
+ /* level 3 */
+ if (rb_level > 2) {
+ if (clnt_stimeout(mynode(), sp, &mhiargs, &xep) == -1)
+ mdclrerror(&xep);
+ }
+
+ /* level 2 */
+ if (rb_level > 1) {
+ if (tk_own_bydd(sp, dd, &mhiargs, FALSE, &xep))
+ mdclrerror(&xep);
+ }
+
+ /* level 1 */
+ if (rb_level > 0) {
+ if (setup_db_bydd(sp, dd, TRUE, &xep) == -1)
+ mdclrerror(&xep);
+
+ /* Snarf set of trad diskset doesn't use stale information */
+ if (snarf_set(sp, FALSE, &xep))
+ mdclrerror(&xep);
+ }
+
+ /* level 0 */
+ cl_sk = cl_get_setkey(sp->setno, sp->setname);
+ if (clnt_unlock_set(mynode(), cl_sk, &xep))
+ mdclrerror(&xep);
+ cl_set_setkey(NULL);
+
+ /* release signals back to what they were on entry */
+ if (procsigs(FALSE, &oldsigs, &xep) < 0)
+ mdclrerror(&xep);
+
+ md_rb_sig_handling_off(md_got_sig(), md_which_sig());
+
+ return (rval);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_setup.c b/usr/src/lib/lvm/libmeta/common/meta_setup.c
new file mode 100644
index 0000000000..64bdc73c3c
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_setup.c
@@ -0,0 +1,897 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * Just in case we're not in a build environment, make sure that
+ * TEXT_DOMAIN gets set to something.
+ */
+#if !defined(TEXT_DOMAIN)
+#define TEXT_DOMAIN "SYS_TEST"
+#endif
+
+/*
+ * setup utility
+ */
+
+#include "meta_set_prv.h"
+#include <sys/resource.h>
+#include <syslog.h>
+
+
+/* globals */
+char *myname = "";
+FILE *metalogfp = NULL;
+int metasyslog = 0;
+uint_t verbosity = 0;
+hrtime_t start_time = 0;
+sigset_t allsigs;
+
+/* locals */
+static int rb_signal_handling = FALSE;
+static int rb_signal_caught = FALSE;
+static int rb_signal_which = 0;
+static size_t metansig = 0;
+static struct sigaction *metahandlers = NULL;
+#ifdef _DEBUG_MALLOC_INC
+static ulong_t malloc_histid_begin;
+static ulong_t malloc_histid_end;
+static ulong_t malloc_inuse_begin;
+static ulong_t malloc_inuse_end;
+#endif /* _DEBUG_MALLOC_INC */
+
+/* forwards */
+static void md_catcher(int sig);
+
+/*
+ * push/pop signal handlers
+ */
+static int
+md_pushsig(
+ unsigned sig,
+ void (*handler)(int sig),
+ md_error_t *ep
+)
+{
+ struct sigaction newhandler;
+
+ /* expand vector as neccessary */
+ if (sig >= metansig) {
+ if (metahandlers == NULL) {
+ metahandlers = Zalloc(
+ (sig + 1) * sizeof (metahandlers[0]));
+ } else {
+ metahandlers = Realloc(metahandlers,
+ ((sig + 1) * sizeof (metahandlers[0])));
+ (void) memset(&metahandlers[metansig], 0,
+ ((sig - metansig) * sizeof (metahandlers[0])));
+ }
+ metansig = sig;
+ }
+
+ /* We need to have a seperate stack to handle rollback properly */
+ newhandler.sa_flags = 0;
+ if (sigfillset(&newhandler.sa_mask) < 0)
+ return (mdsyserror(ep, errno,
+ "sigfillset(&newhandler.sa_mask)"));
+ newhandler.sa_handler = handler;
+
+ /* push handler */
+ if (sigaction(sig, &newhandler, &metahandlers[sig]) < 0)
+ return (mdsyserror(ep, errno, "sigaction(&newhandler)"));
+
+ /* return success */
+ return (0);
+}
+
+static int
+md_popsig(
+ unsigned sig,
+ md_error_t *ep
+)
+{
+ /* can't pop what isn't pushed */
+ assert(sig <= metansig);
+ assert(metahandlers[sig].sa_handler != md_catcher);
+
+ /* pop handler */
+ if (sigaction(sig, &metahandlers[sig], NULL) < 0)
+ return (mdsyserror(ep, errno, "sigaction(&metahandlers)"));
+
+ /* return success */
+ return (0);
+}
+
+char *
+meta_lock_name(
+ set_t setno
+)
+{
+ char lockname[30];
+
+ if (setno == MD_LOCAL_SET)
+ return (strdup(METALOCK));
+
+ (void) snprintf(lockname, sizeof (lockname), "%s.%ld", METALOCK, setno);
+ return (strdup(lockname));
+}
+
+#define META_LOCK_FD(sp) ((sp)->lockfd)
+#define META_LOCK_NAME(sp) (meta_lock_name((sp)->setno))
+
+/*
+ * open lock
+ */
+static int
+meta_lock_open(
+ mdsetname_t *sp,
+ md_error_t *ep
+)
+{
+ int lockfd = META_LOCK_FD(sp);
+ char *lockname = META_LOCK_NAME(sp);
+
+ /* check for already open */
+ if (lockfd >= 0)
+ goto success;
+ assert(lockfd == MD_NO_LOCK);
+
+ /* open and/or create lock file */
+ if ((lockfd = open(lockname, O_WRONLY, 0)) < 0) {
+ if (errno == EROFS) {
+ lockfd = MD_NO_LOCK;
+ goto success;
+ }
+ if (errno != ENOENT) {
+ (void) mdsyserror(ep, errno, lockname);
+ goto failure;
+ }
+ if ((lockfd = open(lockname, (O_WRONLY|O_CREAT),
+ 0644)) < 0) {
+ (void) mdsyserror(ep, errno, lockname);
+ goto failure;
+ }
+ if (fchmod(lockfd, 0644) != 0) {
+ (void) mdsyserror(ep, errno, lockname);
+ goto failure;
+ }
+ }
+
+ /* return success */
+success:
+ if (lockname != NULL)
+ free(lockname);
+ META_LOCK_FD(sp) = lockfd;
+ return (0);
+
+ /* flag failure */
+failure:
+ if (lockname != NULL)
+ free(lockname);
+ if (lockfd >= 0)
+ (void) close(lockfd);
+ return (-1);
+}
+
+static int
+meta_lock_close(
+ mdsetname_t *sp,
+ md_error_t *ep
+)
+{
+ int retval = 0;
+
+ if (close(META_LOCK_FD(sp)) != 0) {
+ if (ep != NULL) {
+ char *lockname = META_LOCK_NAME(sp);
+ (void) mdsyserror(ep, errno, lockname);
+ if (lockname != NULL)
+ free(lockname);
+ }
+
+ retval = -1;
+ }
+ META_LOCK_FD(sp) = MD_NO_LOCK;
+ return (retval);
+}
+
+/*
+ * unlock
+ */
+int
+meta_unlock(
+ mdsetname_t *sp,
+ md_error_t *ep
+)
+{
+ int lockfd = META_LOCK_FD(sp);
+
+ /* ignore read-only filesystem */
+ if (lockfd == MD_NO_LOCK)
+ return (0);
+
+ assert(lockfd >= 0);
+
+ /* unlock and discard */
+ if (lockf(lockfd, F_ULOCK, 0) != 0) {
+ (void) mdsyserror(ep, errno, METALOCK);
+ (void) meta_lock_close(sp, NULL);
+ return (-1);
+ }
+ return (meta_lock_close(sp, ep));
+}
+
+/*
+ * lock
+ */
+int
+meta_lock(
+ mdsetname_t *sp,
+ int print_status,
+ md_error_t *ep
+)
+{
+ int lockfd;
+ char *lockname = NULL;
+
+ /* open lock file */
+ if (meta_lock_open(sp, ep) != 0) {
+ assert(META_LOCK_FD(sp) == MD_NO_LOCK);
+ goto failure;
+ }
+
+ /* ignore read-only filesystem */
+ if ((lockfd = META_LOCK_FD(sp)) == MD_NO_LOCK)
+ goto success;
+ assert(lockfd >= 0);
+
+ lockname = META_LOCK_NAME(sp);
+
+ /* grab lock */
+ if (lockf(lockfd, F_TLOCK, 0) != 0) {
+ if ((errno != EACCES) && (errno != EAGAIN)) {
+ (void) mdsyserror(ep, errno, lockname);
+ goto failure;
+ }
+ if (print_status)
+ (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
+ "%s: waiting on %s\n"),
+ myname, lockname);
+ if (lockf(lockfd, F_LOCK, 0) != 0) {
+ (void) mdsyserror(ep, errno, lockname);
+ goto failure;
+ }
+ }
+
+ /* return success */
+success:
+ if (lockname != NULL)
+ free(lockname);
+ return (0);
+
+ /* flag failure */
+failure:
+ if (lockname != NULL)
+ free(lockname);
+ if (lockfd >= 0)
+ (void) meta_lock_close(sp, ep);
+ return (-1);
+}
+
+int
+meta_lock_nowait(
+ mdsetname_t *sp,
+ md_error_t *ep
+)
+{
+ int lockfd;
+ char *lockname = NULL;
+
+ /* open lock file */
+ if (meta_lock_open(sp, ep) != 0) {
+ assert(META_LOCK_FD(sp) == MD_NO_LOCK);
+ goto failure;
+ }
+
+ /* ignore read-only filesystem */
+ if ((lockfd = META_LOCK_FD(sp)) == MD_NO_LOCK)
+ goto success;
+ assert(lockfd >= 0);
+
+ lockname = META_LOCK_NAME(sp);
+
+ /* grab lock */
+ if (lockf(lockfd, F_TLOCK, 0) != 0) {
+ if ((errno != EACCES) && (errno != EAGAIN)) {
+ (void) mdsyserror(ep, errno, lockname);
+ goto failure;
+ }
+ (void) mdsyserror(ep, EAGAIN, lockname);
+ goto failure;
+ }
+
+ /* return success */
+success:
+ if (lockname != NULL)
+ free(lockname);
+ return (0);
+
+ /* flag failure */
+failure:
+ if (lockname != NULL)
+ free(lockname);
+ if (lockfd >= 0)
+ (void) meta_lock_close(sp, ep);
+ return (-1);
+}
+
+/*
+ * lock status
+ */
+int
+meta_lock_status(
+ mdsetname_t *sp,
+ md_error_t *ep
+)
+{
+ int lockfd;
+
+ /* open lock file */
+ if (meta_lock_open(sp, ep) != 0) {
+ assert(META_LOCK_FD(sp) == MD_NO_LOCK);
+ return (-1);
+ }
+
+ lockfd = META_LOCK_FD(sp);
+ /* ignore read-only filesystem */
+ if (lockfd == MD_NO_LOCK)
+ return (0);
+ assert(lockfd >= 0);
+
+ /* test lock */
+ if (lockf(lockfd, F_TEST, 0) != 0) {
+ char *lockname = META_LOCK_NAME(sp);
+ (void) mdsyserror(ep, errno, lockname);
+ if (lockname != NULL)
+ free(lockname);
+ return (-1);
+ }
+
+ return (0);
+}
+
+/*
+ * setup for syslog daemon output
+ */
+static void
+md_syslog(
+ char *name /* name of program */
+)
+{
+ if ((name == NULL) || (*name == '\0'))
+ name = "md";
+ openlog(name, LOG_CONS, LOG_DAEMON);
+ metasyslog = 1;
+}
+
+/*
+ * daemonize: put in background
+ */
+int
+md_daemonize(
+ mdsetname_t *sp,
+ md_error_t *ep
+)
+{
+ char *p;
+ struct rlimit rlim;
+ pid_t pid;
+ int i;
+
+ /* debug */
+ if (((p = getenv("MD_DEBUG")) != NULL) &&
+ (strstr(p, "NODAEMON") != NULL)) {
+ return (0); /* do nothing */
+ }
+
+ /* get number of file descriptors */
+ if (getrlimit(RLIMIT_NOFILE, &rlim) != 0) {
+ return (mdsyserror(ep, errno, "getrlimit(RLIMIT_NOFILE)"));
+ }
+
+ /* fork and kill parent */
+ if ((pid = fork()) == -1)
+ return (mdsyserror(ep, errno, "fork"));
+ else if (pid != 0)
+ return (pid);
+
+ /*
+ * We need to close the admin device and reset the specialfd to force
+ * the child process to reopen it, since we are going to close all
+ * descriptors from 3 up to RLIMIT_NOFILE in the child.
+ */
+ if (close_admin(ep) != 0)
+ return (-1);
+
+ /* close RPC connections */
+ metarpccloseall();
+
+ /* drop lock */
+ if (meta_unlock(sp, ep) != 0)
+ return (-1);
+
+ if (rlim.rlim_cur != RLIM_INFINITY) {
+ /*
+ * close all but stdout, stderr, and metalogfp
+ */
+
+ for (i = 0; (i < rlim.rlim_cur); ++i) {
+ if ((i == fileno(stdout)) ||
+ (i == fileno(stderr)) ||
+ ((metalogfp != NULL) &&
+ (i == fileno(metalogfp)))) {
+ continue;
+ }
+ (void) close(i);
+ }
+ }
+
+ /* put in own process group */
+ if (setsid() == -1)
+ return (mdsyserror(ep, errno, "setsid"));
+
+ /* setup syslog */
+ md_syslog(myname);
+
+ /* return success */
+ return (0);
+}
+
+/*
+ * flush and sync fp
+ */
+static void
+flushfp(
+ FILE *fp
+)
+{
+ (void) fflush(fp);
+ (void) fsync(fileno(fp));
+}
+
+/*
+ * reset and exit utility
+ */
+void
+md_exit(
+ mdsetname_t *sp,
+ int eval
+)
+{
+ md_error_t status = mdnullerror;
+ md_error_t *ep = &status;
+
+
+ /* close RPC connections */
+ metarpccloseall();
+
+ if (sp != NULL) {
+ if (meta_unlock(sp, ep) != 0) {
+ mde_perror(ep, "");
+ mdclrerror(ep);
+ if (eval == 0)
+ eval = 1;
+ }
+ }
+
+ /* flush name caches */
+#ifdef DEBUG
+ metaflushnames(1);
+#endif /* DEBUG */
+
+ /* log exit */
+ if (metalogfp != NULL) {
+ md_logpfx(metalogfp);
+ (void) fprintf(metalogfp, dgettext(TEXT_DOMAIN,
+ "exiting with %d\n"), eval);
+ flushfp(metalogfp);
+ (void) fclose(metalogfp);
+ metalogfp = NULL;
+ }
+ if ((metasyslog) && (eval != 0)) {
+ syslog(LOG_ERR, dgettext(TEXT_DOMAIN,
+ "exiting with %d\n"), eval);
+ closelog();
+ metasyslog = 0;
+ }
+
+ /* check arena, print malloc usage */
+#ifdef _DEBUG_MALLOC_INC
+ (void) malloc_chain_check(1);
+ {
+ char *p;
+
+ if (((p = getenv("MD_DEBUG")) != NULL) &&
+ (strstr(p, "MALLOC") != NULL)) {
+ malloc_inuse_end = malloc_inuse(&malloc_histid_end);
+ (void) fprintf(stderr, "%s: end malloc_inuse %lu\n",
+ myname, malloc_inuse_end);
+ if (malloc_inuse_end != malloc_inuse_begin) {
+ malloc_list(fileno(stderr),
+ malloc_histid_begin, malloc_histid_end);
+ }
+ }
+ }
+#endif /* _DEBUG_MALLOC_INC */
+
+ /* exit with value */
+ exit(eval);
+}
+
+/*
+ * signal catcher
+ */
+static void
+md_catcher(
+ int sig
+)
+{
+ char buf[128];
+ char *msg;
+ md_error_t status = mdnullerror;
+ md_error_t *ep = &status;
+ struct sigaction defhandler;
+
+ /* log signal */
+ if ((msg = strsignal(sig)) == NULL) {
+ (void) snprintf(buf, sizeof (buf),
+ dgettext(TEXT_DOMAIN, "unknown signal %d"), sig);
+ msg = buf;
+ }
+ md_eprintf("%s\n", msg);
+
+ /*
+ * In roll_back crtical section handling, the first instance of a user
+ * generated signal is caught, a flag is set to allow preemption at a
+ * "convenient" point and md_catcher returns. If the user continues
+ * generate the signal, the second instance will invoke the default
+ * handler and exit.
+ */
+ if (rb_signal_handling == TRUE) {
+ if (sig != SIGABRT && sig != SIGBUS && sig != SIGSEGV) {
+ if (rb_signal_caught == FALSE) {
+ rb_signal_caught = TRUE;
+ rb_signal_which = sig;
+ return;
+ }
+ }
+ }
+
+ /* let default handler do it's thing */
+ if (md_popsig(sig, ep) != 0) {
+ mde_perror(ep, "");
+ mdclrerror(ep);
+ defhandler.sa_flags = 0;
+ if (sigfillset(&defhandler.sa_mask) < 0) {
+ (void) mdsyserror(ep, errno,
+ "sigfillset(&defhandler.sa_mask)");
+ mde_perror(ep, "");
+ md_exit(NULL, 1);
+ }
+ defhandler.sa_handler = SIG_DFL;
+ if (sigaction(sig, &defhandler, NULL) < 0) {
+ (void) mdsyserror(ep, errno, "sigaction(&defhandler)");
+ mde_perror(ep, "");
+ md_exit(NULL, 1);
+ }
+ }
+
+ md_post_sig(sig);
+}
+
+void
+md_post_sig(int sig)
+{
+ if (kill(getpid(), sig) != 0) {
+ md_perror("kill(getpid())");
+ md_exit(NULL, -sig);
+ }
+}
+
+int
+md_got_sig(void)
+{
+ return (rb_signal_caught);
+}
+
+int
+md_which_sig(void)
+{
+ return (rb_signal_which);
+}
+
+void
+md_rb_sig_handling_on(void)
+{
+ rb_signal_handling = TRUE;
+}
+
+void
+md_rb_sig_handling_off(int sig_seen, int sig)
+{
+ rb_signal_handling = FALSE;
+ rb_signal_caught = FALSE;
+ rb_signal_which = 0;
+ if (sig_seen)
+ md_post_sig(sig);
+}
+
+/*
+ * setup metaclust variables
+ */
+void
+setup_mc_log(
+ uint_t level
+)
+{
+ /* initialise externals */
+ verbosity = level;
+ start_time = gethrtime();
+}
+
+/*
+ * initilize utility
+ */
+int
+md_init(
+ int argc,
+ char *argv[],
+ int dosyslog,
+ int doadmin,
+ md_error_t *ep
+)
+{
+ int ret = 0;
+
+ /* initialize everything but the signals */
+ if ((ret = md_init_nosig(argc, argv, dosyslog,
+ doadmin, ep)) != 0)
+ return (ret);
+
+
+ if (sigfillset(&allsigs) < 0)
+ return (mdsyserror(ep, errno, "sigfillset(&allsigs)"));
+
+ /* catch common signals */
+ if ((md_pushsig(SIGHUP, md_catcher, ep) != 0) ||
+ (md_pushsig(SIGINT, md_catcher, ep) != 0) ||
+ (md_pushsig(SIGQUIT, md_catcher, ep) != 0) ||
+ (md_pushsig(SIGABRT, md_catcher, ep) != 0) ||
+ (md_pushsig(SIGBUS, md_catcher, ep) != 0) ||
+ (md_pushsig(SIGSEGV, md_catcher, ep) != 0) ||
+ (md_pushsig(SIGPIPE, md_catcher, ep) != 0) ||
+ (md_pushsig(SIGTERM, md_catcher, ep) != 0)) {
+ return (-1);
+ }
+
+ /* return success */
+ return (0);
+}
+
+
+/*
+ * initilize utility without setting up sighandlers
+ * setting up signal handlers in libmeta can affect others
+ * programs that link with libmeta but have their own handlers
+ */
+int
+md_init_nosig(
+ int argc,
+ char *argv[],
+ int dosyslog,
+ int doadmin,
+ md_error_t *ep
+)
+{
+ /* setup myname */
+ if ((myname = strrchr(argv[0], '/')) != NULL)
+ ++myname;
+ else
+ myname = argv[0];
+
+#if !defined(TEXT_DOMAIN)
+#define TEXT_DOMAIN "SYS_TEST"
+#endif
+
+ /* print malloc usage */
+#ifdef _DEBUG_MALLOC_INC
+ {
+ char *p;
+
+ if (((p = getenv("MD_DEBUG")) != NULL) &&
+ (strstr(p, "MALLOC") != NULL)) {
+ malloc_inuse_begin =
+ malloc_inuse(&malloc_histid_begin);
+ (void) fprintf(stderr, "%s: begin malloc_inuse %lu\n",
+ myname, malloc_inuse_begin);
+ }
+ }
+#endif /* _DEBUG_MALLOC_INC */
+
+ /* open syslog */
+ if (dosyslog)
+ md_syslog(myname);
+
+ /* log command */
+ if (getenv(METALOGENV) != NULL) {
+ if ((metalogfp = fopen(METALOG, "a")) != NULL) {
+ int i;
+
+ (void) fchmod(fileno(metalogfp), 0664);
+ md_logpfx(metalogfp);
+ for (i = 1; (i < argc); ++i)
+ (void) fprintf(metalogfp, " %s", argv[i]);
+ (void) fprintf(metalogfp, "\n");
+ flushfp(metalogfp);
+ }
+ }
+
+ /* make sure we can open the admin device before we do anything else */
+ if (doadmin)
+ if (open_admin(ep) < 0)
+ return (-1);
+
+ /* flush name caches */
+ metaflushnames(1);
+
+ /* return success */
+ return (0);
+}
+
+/*
+ * (re)initilize daemon
+ */
+int
+md_init_daemon(
+ char *name,
+ md_error_t *ep
+)
+{
+ static int already = 0;
+ int dosyslog = 1;
+ int doadmin = 1;
+
+ /* setup */
+ if (! already) {
+ if (md_init(1, &name, dosyslog, doadmin, ep) != 0)
+ return (-1);
+ already = 1;
+ }
+
+ /* return success */
+ return (0);
+}
+
+/*
+ * Roll back functions for handling sync and async cleanup.
+ */
+
+int
+procsigs(int block, sigset_t *oldsigs, md_error_t *ep)
+{
+ if (block == TRUE) {
+ if (sigprocmask(SIG_BLOCK, &allsigs, oldsigs) < 0) {
+ (void) mdsyserror(ep, errno, "sigprocmask(SIG_BLOCK)");
+ return (-1);
+ }
+ } else {
+ if (sigprocmask(SIG_SETMASK, oldsigs, NULL) < 0) {
+ (void) mdsyserror(ep, errno,
+ "sigprocmask(SIG_SETMASK)");
+ return (-1);
+ }
+ }
+ return (0);
+}
+
+#ifdef DEBUG
+int
+rb_test(
+ int rbt_sel_tpt,
+ char *rbt_sel_tag,
+ md_error_t *ep
+)
+{
+ char *rbt_env_tpt = getenv("META_RBT_TPT");
+ char *rbt_env_tag = getenv("META_RBT_TAG");
+ int sig = 0;
+ int rbt_int_tpt;
+ int rbt_tag_match = 1;
+ sigset_t curmask;
+ md_error_t xep = mdnullerror;
+
+ if (rbt_env_tpt) {
+ rbt_int_tpt = atoi(rbt_env_tpt);
+ if (rbt_int_tpt < 0) {
+ sig = 1;
+ rbt_int_tpt = -1 * rbt_int_tpt;
+ }
+
+ assert(rbt_sel_tpt != 0);
+
+ if (rbt_int_tpt == 0)
+ return (0);
+
+ if (rbt_env_tag && rbt_sel_tag)
+ if (strcmp(rbt_env_tag, rbt_sel_tag) != 0)
+ rbt_tag_match = 0;
+
+ if (rbt_int_tpt == rbt_sel_tpt && rbt_tag_match) {
+ md_eprintf(
+ "******************** RB_TEST(%s, %d, sig=%s)\n",
+ rbt_sel_tag, rbt_sel_tpt,
+ (sig != 0) ? "True" : "False");
+ if (sig) {
+ md_eprintf("********** sigsuspend()\n");
+ if (sigprocmask(NULL, NULL, &curmask) < 0) {
+ (void) mdsyserror(&xep, errno, NULL);
+ mde_perror(&xep, "sigprocmask(GET)");
+ md_exit(NULL, 1);
+ }
+
+ if (sigsuspend(&curmask) < 0) {
+ (void) mdsyserror(&xep, errno, NULL);
+ mde_perror(&xep,
+ "sigsuspend(&curmask)");
+ md_exit(NULL, 1);
+ }
+
+ if (md_got_sig())
+ return (-1);
+ }
+ (void) mderror(ep, MDE_TESTERROR,
+ "********** rb_test()");
+ md_eprintf("******************** rollback\n");
+ return (-1);
+ }
+ }
+ return (0);
+}
+#else
+/* ARGSUSED */
+int
+rb_test(
+ int rbt_sel_tpt,
+ char *rbt_sel_tag,
+ md_error_t *ep
+)
+{
+ (void) mderror(ep, MDE_TESTERROR, "******** rb_test:Not supported\n");
+ return (-1);
+
+}
+#endif /* DEBUG */
diff --git a/usr/src/lib/lvm/libmeta/common/meta_smf.c b/usr/src/lib/lvm/libmeta/common/meta_smf.c
new file mode 100644
index 0000000000..204691a1a3
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_smf.c
@@ -0,0 +1,351 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * Service Management Facility (SMF) interfaces.
+ */
+
+#include <stdio.h>
+#include <libscf.h>
+#include <meta.h>
+
+static void enable(char *svc_names[], md_error_t *ep);
+static void disable(char *svc_names[], md_error_t *ep);
+static int enabled(char *svc_name);
+static int online(char *svc_names[], char **names);
+static void wait_online(char *svc_names[]);
+static int is_online(char *svc_name);
+
+static char
+*svm_core_svcs[] = {
+ "system/metainit:default",
+ "system/mdmonitor:default",
+ "network/rpc/meta:default",
+ NULL
+};
+
+static char
+*svm_diskset_svcs[] = {
+ "network/rpc/metamed:default",
+ "network/rpc/metamh:default",
+ NULL
+};
+
+static char
+*svm_mn_diskset_svcs[] = {
+ "network/rpc/mdcomm:default",
+ NULL
+};
+
+/*
+ * Enable the specified SVM services through the SMF.
+ */
+int
+meta_smf_enable(uint_t flags, md_error_t *ep)
+{
+ if (flags & META_SMF_CORE) {
+ enable(svm_core_svcs, ep);
+ wait_online(svm_core_svcs);
+ }
+
+ if (flags & META_SMF_DISKSET) {
+ enable(svm_diskset_svcs, ep);
+ wait_online(svm_diskset_svcs);
+ }
+
+ if (flags & META_SMF_MN_DISKSET) {
+ enable(svm_mn_diskset_svcs, ep);
+ wait_online(svm_mn_diskset_svcs);
+ }
+
+ if (ep != NULL)
+ return ((mdisok(ep)) ? 0 : -1);
+ else
+ return (0);
+}
+
+/*
+ * Disable the specified SVM services through the SMF.
+ */
+int
+meta_smf_disable(uint_t flags, md_error_t *ep)
+{
+ if (flags & META_SMF_CORE) {
+ disable(svm_core_svcs, ep);
+ }
+
+ if (flags & META_SMF_DISKSET) {
+ disable(svm_diskset_svcs, ep);
+ }
+
+ if (flags & META_SMF_MN_DISKSET) {
+ disable(svm_mn_diskset_svcs, ep);
+ }
+
+ if (ep != NULL)
+ return ((mdisok(ep)) ? 0 : -1);
+ else
+ return (0);
+}
+
+/*
+ * Determine if desired services are online. If all services in the
+ * classes specified by flags are online, 1 is returned. Otherwise
+ * 0 is returned.
+ */
+
+int
+meta_smf_isonline(uint_t flags, md_error_t *ep)
+{
+ int ret = 1;
+ char *names = NULL;
+
+ if (flags & META_SMF_CORE) {
+ if (online(svm_core_svcs, &names) == 0)
+ ret = 0;
+ }
+ if (flags & META_SMF_DISKSET) {
+ if (online(svm_diskset_svcs, &names) == 0)
+ ret = 0;
+ }
+ if (flags & META_SMF_MN_DISKSET) {
+ if (online(svm_mn_diskset_svcs, &names) == 0)
+ ret = 0;
+ }
+
+ if (ret == 0) {
+ (void) mderror(ep, MDE_SMF_NO_SERVICE, names);
+ Free(names);
+ }
+
+ return (ret);
+}
+
+/*
+ * Return a bitmask of the META_SMF_* flags indicating which services should be
+ * online given the current SVM configuration.
+ */
+int
+meta_smf_getmask()
+{
+ int mask = 0;
+ mdsetname_t *sp = NULL;
+ mddb_config_t c;
+ md_error_t status = mdnullerror;
+ md_error_t *ep = &status;
+ int max_sets;
+
+ /*
+ * If there are any local metadbs configured then the core services
+ * are needed.
+ */
+ (void) memset(&c, 0, sizeof (c));
+ c.c_setno = MD_LOCAL_SET;
+ if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0 || c.c_dbcnt == 0)
+ return (mask);
+
+ mask |= META_SMF_CORE;
+
+ /*
+ * If any disksets configured then the diskset services are needed.
+ * Also check for multi-node sets.
+ */
+ if ((max_sets = get_max_sets(ep)) > 0) {
+ int i;
+
+ mdclrerror(ep);
+ for (i = 1; i < max_sets; i++) {
+ md_set_desc *sd;
+
+ if ((sp = metasetnosetname(i, ep)) == NULL) {
+ if (!mdisok(ep) && !mdiserror(ep, MDE_NO_SET) &&
+ !mdismddberror(ep, MDE_NOTENOUGH_DB) &&
+ !mdiserror(ep, MDE_SMF_NO_SERVICE) &&
+ ep->info.errclass != MDEC_RPC) {
+ /*
+ * metad rpc program not registered
+ * can't get diskset info
+ */
+ break;
+ }
+
+ } else {
+ mask |= META_SMF_DISKSET;
+
+ if ((sd = metaget_setdesc(sp, ep)) != NULL) {
+ if (MD_MNSET_DESC(sd)) {
+ mask |= META_SMF_MN_DISKSET;
+
+ /*
+ * we don't have to check the
+ * rest of the disksets at this
+ * point
+ */
+ break;
+ }
+ }
+ }
+
+ mdclrerror(ep);
+ }
+ }
+
+ return (mask);
+}
+
+static void
+enable(char *svc_names[], md_error_t *ep)
+{
+ int i;
+
+ for (i = 0; svc_names[i]; i++) {
+ if (!enabled(svc_names[i]))
+ if (smf_enable_instance(svc_names[i], 0) != 0) {
+ if (ep != NULL) {
+ (void) mderror(ep, MDE_SMF_FAIL,
+ svc_names[i]);
+ }
+ }
+ }
+}
+
+static void
+disable(char *svc_names[], md_error_t *ep)
+{
+ int i;
+
+ for (i = 0; svc_names[i]; i++) {
+ if (enabled(svc_names[i]))
+ if (smf_disable_instance(svc_names[i], 0) != 0) {
+ if (ep != NULL) {
+ (void) mderror(ep, MDE_SMF_FAIL,
+ svc_names[i]);
+ }
+ }
+ }
+}
+
+static int
+enabled(char *svc_name)
+{
+ scf_simple_prop_t *prop;
+ int rval = 0;
+
+ prop = scf_simple_prop_get(NULL, svc_name, SCF_PG_GENERAL,
+ SCF_PROPERTY_ENABLED);
+
+ if (scf_simple_prop_numvalues(prop) == 1) {
+ if (*scf_simple_prop_next_boolean(prop) != 0)
+ rval = 1;
+ }
+
+ scf_simple_prop_free(prop);
+
+ return (rval);
+}
+
+/*
+ * There can be a delay while the RPC services get going. Try to
+ * make sure the RPC daemons are ready to run before we return.
+ * Check 15 times (15 seconds total wait time) and then just
+ * return.
+ */
+static void
+wait_online(char *svc_names[])
+{
+ int i;
+ char *names = NULL;
+
+ for (i = 0; i < 15; i++) {
+ if (online(svc_names, &names))
+ break;
+ (void) sleep(1);
+ }
+
+ if (names != NULL)
+ Free(names);
+}
+
+/*
+ * Check to see if all services in the svc_names are online. If they are
+ * all online 1 is returned, otherwise 0 is returned.
+ */
+
+static int
+online(char *svc_names[], char **names)
+{
+ int i;
+ int rv = 1;
+
+ for (i = 0; svc_names[i]; i++) {
+ if (is_online(svc_names[i]) == 0) {
+ int sz;
+ char *p;
+
+ /*
+ * Need space for the name, the new line, the
+ * tab and the null terminator.
+ */
+ sz = strlen(svc_names[i]) + 3;
+
+ if (*names == NULL) {
+ p = Malloc(sz);
+ (void) snprintf(p, sz, "\n\t%s", svc_names[i]);
+
+ } else {
+ /* Add space for existing names */
+ sz += strlen(*names);
+ p = Malloc(sz);
+ (void) snprintf(p, sz, "%s\n\t%s", *names,
+ svc_names[i]);
+ Free(names);
+ }
+
+ *names = p;
+ rv = 0;
+ }
+ }
+ return (rv);
+}
+
+/*
+ * Return 1 if the specified service is online. Otherwise, return 0.
+ */
+static int
+is_online(char *svc_name)
+{
+ int rval = 0;
+ char *s;
+
+ if ((s = smf_get_state(svc_name)) != NULL) {
+ if (strcmp(s, "online") == 0)
+ rval = 1;
+ free(s);
+ }
+ return (rval);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_sp.c b/usr/src/lib/lvm/libmeta/common/meta_sp.c
new file mode 100644
index 0000000000..ce3965489f
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_sp.c
@@ -0,0 +1,6652 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * Just in case we're not in a build environment, make sure that
+ * TEXT_DOMAIN gets set to something.
+ */
+#if !defined(TEXT_DOMAIN)
+#define TEXT_DOMAIN "SYS_TEST"
+#endif
+
+/*
+ * soft partition operations
+ *
+ * Soft Partitions provide a virtual disk mechanism which is used to
+ * divide a large volume into many small pieces, each appearing as a
+ * separate device. A soft partition consists of a series of extents,
+ * each having an offset and a length. The extents are logically
+ * contiguous, so where the first extent leaves off the second extent
+ * picks up. Which extent a given "virtual offset" belongs to is
+ * dependent on the size of all the previous extents in the soft
+ * partition.
+ *
+ * Soft partitions are represented in memory by an extent node
+ * (sp_ext_node_t) which contains all of the information necessary to
+ * create a unit structure and update the on-disk format, called
+ * "watermarks". These extent nodes are typically kept in a doubly
+ * linked list and are manipulated by list manipulation routines. A
+ * list of extents may represent all of the soft partitions on a volume,
+ * a single soft partition, or perhaps just a set of extents that need
+ * to be updated. Extent lists may be sorted by extent or by name/seq#,
+ * depending on which compare function is used. Most of the routines
+ * require the list be sorted by offset to work, and that's the typical
+ * configuration.
+ *
+ * In order to do an allocation, knowledge of all soft partitions on the
+ * volume is required. Then free space is determined from the space
+ * that is not allocated, and new allocations can be made from the free
+ * space. Once the new allocations are made, a unit structure is created
+ * and the watermarks are updated. The status is then changed to "okay"
+ * on the unit structure to commit the transaction. If updating the
+ * watermarks fails, the unit structure is in an intermediate state and
+ * the driver will not allow access to the device.
+ *
+ * A typical sequence of events is:
+ * 1. Fetch the list of names for all soft partitions on a volume
+ * meta_sp_get_by_component()
+ * 2. Construct an extent list from the name list
+ * meta_sp_extlist_from_namelist()
+ * 3. Fill the gaps in the extent list with free extents
+ * meta_sp_list_freefill()
+ * 4. Allocate from the free extents
+ * meta_sp_alloc_by_len()
+ * meta_sp_alloc_by_list()
+ * 5. Create the unit structure from the extent list
+ * meta_sp_createunit()
+ * meta_sp_updateunit()
+ * 6. Write out the watermarks
+ * meta_sp_update_wm()
+ * 7. Set the status to "Okay"
+ * meta_sp_setstatus()
+ *
+ */
+
+#include <stdio.h>
+#include <meta.h>
+#include "meta_repartition.h"
+#include <sys/lvm/md_sp.h>
+#include <sys/lvm/md_crc.h>
+#include <strings.h>
+#include <sys/lvm/md_mirror.h>
+#include <sys/bitmap.h>
+
+extern int md_in_daemon;
+
+typedef struct sp_ext_node {
+ struct sp_ext_node *ext_next; /* next element */
+ struct sp_ext_node *ext_prev; /* previous element */
+ sp_ext_type_t ext_type; /* type of extent */
+ sp_ext_offset_t ext_offset; /* starting offset */
+ sp_ext_length_t ext_length; /* length of this node */
+ uint_t ext_flags; /* extent flags */
+ uint32_t ext_seq; /* watermark seq no */
+ mdname_t *ext_namep; /* name pointer */
+ mdsetname_t *ext_setp; /* set pointer */
+} sp_ext_node_t;
+
+/* extent flags */
+#define EXTFLG_UPDATE (1)
+
+/* Extent node compare function for list sorting */
+typedef int (*ext_cmpfunc_t)(sp_ext_node_t *, sp_ext_node_t *);
+
+
+/* Function Prototypes */
+
+/* Debugging Functions */
+static void meta_sp_debug(char *format, ...);
+static void meta_sp_printunit(mp_unit_t *mp);
+
+/* Misc Support Functions */
+int meta_sp_parsesize(char *s, sp_ext_length_t *szp);
+static int meta_sp_parsesizestring(char *s, sp_ext_length_t *szp);
+static int meta_sp_setgeom(mdname_t *np, mdname_t *compnp, mp_unit_t *mp,
+ md_error_t *ep);
+static int meta_sp_get_by_component(mdsetname_t *sp, mdname_t *compnp,
+ mdnamelist_t **nlpp, int force, md_error_t *ep);
+static sp_ext_length_t meta_sp_get_default_alignment(mdsetname_t *sp,
+ mdname_t *compnp, md_error_t *ep);
+
+/* Extent List Manipulation Functions */
+static int meta_sp_cmp_by_nameseq(sp_ext_node_t *e1, sp_ext_node_t *e2);
+static int meta_sp_cmp_by_offset(sp_ext_node_t *e1, sp_ext_node_t *e2);
+static void meta_sp_list_insert(mdsetname_t *sp, mdname_t *np,
+ sp_ext_node_t **head, sp_ext_offset_t offset, sp_ext_length_t length,
+ sp_ext_type_t type, uint_t seq, uint_t flags, ext_cmpfunc_t compare);
+static void meta_sp_list_free(sp_ext_node_t **head);
+static void meta_sp_list_remove(sp_ext_node_t **head, sp_ext_node_t *ext);
+static sp_ext_length_t meta_sp_list_size(sp_ext_node_t *head,
+ sp_ext_type_t exttype, int exclude_wm);
+static sp_ext_node_t *meta_sp_list_find(sp_ext_node_t *head,
+ sp_ext_offset_t offset);
+static void meta_sp_list_freefill(sp_ext_node_t **extlist,
+ sp_ext_length_t size);
+static void meta_sp_list_dump(sp_ext_node_t *head);
+static int meta_sp_list_overlaps(sp_ext_node_t *head);
+
+/* Extent List Query Functions */
+static boolean_t meta_sp_enough_space(int desired_number_of_sps,
+ blkcnt_t desired_sp_size, sp_ext_node_t **extent_listpp,
+ sp_ext_length_t alignment);
+static boolean_t meta_sp_get_extent_list(mdsetname_t *mdsetnamep,
+ mdname_t *device_mdnamep, sp_ext_node_t **extent_listpp,
+ md_error_t *ep);
+static boolean_t meta_sp_get_extent_list_for_drive(mdsetname_t *mdsetnamep,
+ mddrivename_t *mddrivenamep, sp_ext_node_t **extent_listpp);
+
+
+/* Extent Allocation Functions */
+static void meta_sp_alloc_by_ext(mdsetname_t *sp, mdname_t *np,
+ sp_ext_node_t **extlist, sp_ext_node_t *free_ext,
+ sp_ext_offset_t alloc_offset, sp_ext_length_t alloc_length, uint_t seq);
+static int meta_sp_alloc_by_len(mdsetname_t *sp, mdname_t *np,
+ sp_ext_node_t **extlist, sp_ext_length_t *lp,
+ sp_ext_offset_t last_off, sp_ext_length_t alignment);
+static int meta_sp_alloc_by_list(mdsetname_t *sp, mdname_t *np,
+ sp_ext_node_t **extlist, sp_ext_node_t *oblist);
+
+/* Extent List Population Functions */
+static int meta_sp_extlist_from_namelist(mdsetname_t *sp, mdnamelist_t *spnlp,
+ sp_ext_node_t **extlist, md_error_t *ep);
+static int meta_sp_extlist_from_wm(mdsetname_t *sp, mdname_t *compnp,
+ sp_ext_node_t **extlist, ext_cmpfunc_t compare, md_error_t *ep);
+
+/* Print (metastat) Functions */
+static int meta_sp_short_print(md_sp_t *msp, char *fname, FILE *fp,
+ mdprtopts_t options, md_error_t *ep);
+static char *meta_sp_status_to_name(xsp_status_t xsp_status, uint_t tstate);
+static int meta_sp_report(mdsetname_t *sp, md_sp_t *msp, mdnamelist_t **nlpp,
+ char *fname, FILE *fp, mdprtopts_t options, md_error_t *ep);
+
+/* Watermark Manipulation Functions */
+static int meta_sp_update_wm(mdsetname_t *sp, md_sp_t *msp,
+ sp_ext_node_t *extlist, md_error_t *ep);
+static int meta_sp_clear_wm(mdsetname_t *sp, md_sp_t *msp, md_error_t *ep);
+static int meta_sp_read_wm(mdsetname_t *sp, mdname_t *compnp,
+ mp_watermark_t *wm, sp_ext_offset_t offset, md_error_t *ep);
+static diskaddr_t meta_sp_get_start(mdsetname_t *sp, mdname_t *compnp,
+ md_error_t *ep);
+
+/* Unit Structure Manipulation Functions */
+static void meta_sp_fillextarray(mp_unit_t *mp, sp_ext_node_t *extlist);
+static mp_unit_t *meta_sp_createunit(mdname_t *np, mdname_t *compnp,
+ sp_ext_node_t *extlist, int numexts, sp_ext_length_t len,
+ sp_status_t status, md_error_t *ep);
+static mp_unit_t *meta_sp_updateunit(mdname_t *np, mp_unit_t *old_un,
+ sp_ext_node_t *extlist, sp_ext_length_t grow_len, int numexts,
+ md_error_t *ep);
+static int meta_create_sp(mdsetname_t *sp, md_sp_t *msp, sp_ext_node_t *oblist,
+ mdcmdopts_t options, sp_ext_length_t alignment, md_error_t *ep);
+static int meta_check_sp(mdsetname_t *sp, md_sp_t *msp, mdcmdopts_t options,
+ int *repart_options, md_error_t *ep);
+
+/* Reset (metaclear) Functions */
+static int meta_sp_reset_common(mdsetname_t *sp, mdname_t *np, md_sp_t *msp,
+ md_sp_reset_t reset_params, mdcmdopts_t options, md_error_t *ep);
+
+/* Recovery (metarecover) Functions */
+static void meta_sp_display_exthdr(void);
+static void meta_sp_display_ext(sp_ext_node_t *ext);
+static int meta_sp_checkseq(sp_ext_node_t *extlist);
+static int meta_sp_resolve_name_conflict(mdsetname_t *, mdname_t *,
+ mdname_t **, md_error_t *);
+static int meta_sp_validate_wm(mdsetname_t *sp, mdname_t *np,
+ mdcmdopts_t options, md_error_t *ep);
+static int meta_sp_validate_unit(mdsetname_t *sp, mdname_t *compnp,
+ mdcmdopts_t options, md_error_t *ep);
+static int meta_sp_validate_wm_and_unit(mdsetname_t *sp, mdname_t *np,
+ mdcmdopts_t options, md_error_t *ep);
+static int meta_sp_validate_exts(mdname_t *np, sp_ext_node_t *wmext,
+ sp_ext_node_t *unitext, md_error_t *ep);
+static int meta_sp_recover_from_wm(mdsetname_t *sp, mdname_t *compnp,
+ mdcmdopts_t options, md_error_t *ep);
+static int meta_sp_recover_from_unit(mdsetname_t *sp, mdname_t *np,
+ mdcmdopts_t options, md_error_t *ep);
+
+/*
+ * Private Constants
+ */
+
+static const int FORCE_RELOAD_CACHE = 1;
+static const uint_t NO_FLAGS = 0;
+static const sp_ext_offset_t NO_OFFSET = 0ULL;
+static const uint_t NO_SEQUENCE_NUMBER = 0;
+static const int ONE_SOFT_PARTITION = 1;
+
+static unsigned long sp_parent_printed[BT_BITOUL(MD_MAXUNITS)];
+
+#define TEST_SOFT_PARTITION_NAMEP NULL
+#define TEST_SETNAMEP NULL
+
+#define EXCLUDE_WM (1)
+#define INCLUDE_WM (0)
+
+#define SP_UNALIGNED (0LL)
+
+/*
+ * **************************************************************************
+ * Debugging Functions *
+ * **************************************************************************
+ */
+
+/*PRINTFLIKE1*/
+static void
+meta_sp_debug(char *format, ...)
+{
+ static int debug;
+ static int debug_set = 0;
+ va_list ap;
+
+ if (!debug_set) {
+ debug = getenv(META_SP_DEBUG) ? 1 : 0;
+ debug_set = 1;
+ }
+
+ if (debug) {
+ va_start(ap, format);
+ (void) vfprintf(stderr, format, ap);
+ va_end(ap);
+ }
+}
+
+static void
+meta_sp_printunit(mp_unit_t *mp)
+{
+ int i;
+
+ if (mp == NULL)
+ return;
+
+ /* print the common fields we know about */
+ (void) fprintf(stderr, "\tmp->c.un_type: %d\n", mp->c.un_type);
+ (void) fprintf(stderr, "\tmp->c.un_size: %u\n", mp->c.un_size);
+ (void) fprintf(stderr, "\tmp->c.un_self_id: %lu\n", MD_SID(mp));
+
+ /* sp-specific fields */
+ (void) fprintf(stderr, "\tmp->un_status: %u\n", mp->un_status);
+ (void) fprintf(stderr, "\tmp->un_numexts: %u\n", mp->un_numexts);
+ (void) fprintf(stderr, "\tmp->un_length: %llu\n", mp->un_length);
+ (void) fprintf(stderr, "\tmp->un_dev(32): 0x%llx\n", mp->un_dev);
+ (void) fprintf(stderr, "\tmp->un_dev(64): 0x%llx\n", mp->un_dev);
+ (void) fprintf(stderr, "\tmp->un_key: %d\n", mp->un_key);
+
+ /* print extent information */
+ (void) fprintf(stderr, "\tExt#\tvoff\t\tpoff\t\tLen\n");
+ for (i = 0; i < mp->un_numexts; i++) {
+ (void) fprintf(stderr, "\t%d\t%llu\t\t%llu\t\t%llu\n", i,
+ mp->un_ext[i].un_voff, mp->un_ext[i].un_poff,
+ mp->un_ext[i].un_len);
+ }
+}
+
+/*
+ * FUNCTION: meta_sp_parsesize()
+ * INPUT: s - the string to parse
+ * OUTPUT: *szp - disk block count (0 for "all")
+ * RETURNS: -1 for error, 0 for success
+ * PURPOSE: parses the command line parameter that specifies the
+ * requested size of a soft partition. The input string
+ * is either the literal "all" or a numeric value
+ * followed by a single character, b for disk blocks, k
+ * for kilobytes, m for megabytes, g for gigabytes, or t
+ * for terabytes. p for petabytes and e for exabytes
+ * have been added as undocumented features for future
+ * expansion. For example, 100m is 100 megabytes, while
+ * 50g is 50 gigabytes. All values are rounded up to the
+ * nearest block size.
+ */
+int
+meta_sp_parsesize(char *s, sp_ext_length_t *szp)
+{
+ if (s == NULL || szp == NULL) {
+ return (-1);
+ }
+
+ /* Check for literal "all" */
+ if (strcasecmp(s, "all") == 0) {
+ *szp = 0;
+ return (0);
+ }
+
+ return (meta_sp_parsesizestring(s, szp));
+}
+
+/*
+ * FUNCTION: meta_sp_parsesizestring()
+ * INPUT: s - the string to parse
+ * OUTPUT: *szp - disk block count
+ * RETURNS: -1 for error, 0 for success
+ * PURPOSE: parses a string that specifies size. The input string is a
+ * numeric value followed by a single character, b for disk blocks,
+ * k for kilobytes, m for megabytes, g for gigabytes, or t for
+ * terabytes. p for petabytes and e for exabytes have been added
+ * as undocumented features for future expansion. For example,
+ * 100m is 100 megabytes, while 50g is 50 gigabytes. All values
+ * are rounded up to the nearest block size.
+ */
+static int
+meta_sp_parsesizestring(char *s, sp_ext_length_t *szp)
+{
+ sp_ext_length_t len = 0;
+ char len_type[2];
+
+ if (s == NULL || szp == NULL) {
+ return (-1);
+ }
+
+ /*
+ * make sure block offset does not overflow 2^64 bytes.
+ */
+ if ((sscanf(s, "%llu%1[BbKkMmGgTt]", &len, len_type) != 2) ||
+ (len == 0LL) ||
+ (len > (1LL << (64 - DEV_BSHIFT))))
+ return (-1);
+
+ switch (len_type[0]) {
+ case 'B':
+ case 'b':
+ len = lbtodb(roundup(len * DEV_BSIZE, DEV_BSIZE));
+ break;
+ case 'K':
+ case 'k':
+ len = lbtodb(roundup(len * 1024ULL, DEV_BSIZE));
+ break;
+ case 'M':
+ case 'm':
+ len = lbtodb(roundup(len * 1024ULL*1024ULL, DEV_BSIZE));
+ break;
+ case 'g':
+ case 'G':
+ len = lbtodb(roundup(len * 1024ULL*1024ULL*1024ULL, DEV_BSIZE));
+ break;
+ case 't':
+ case 'T':
+ len = lbtodb(roundup(len * 1024ULL*1024ULL*1024ULL*1024ULL,
+ DEV_BSIZE));
+ break;
+ case 'p':
+ case 'P':
+ len = lbtodb(roundup(
+ len * 1024ULL*1024ULL*1024ULL*1024ULL*1024ULL,
+ DEV_BSIZE));
+ break;
+ case 'e':
+ case 'E':
+ len = lbtodb(roundup(
+ len * 1024ULL*1024ULL*1024ULL*1024ULL*1024ULL*1024ULL,
+ DEV_BSIZE));
+ break;
+ default:
+ /* error */
+ return (-1);
+ }
+
+ *szp = len;
+ return (0);
+}
+
+/*
+ * FUNCTION: meta_sp_setgeom()
+ * INPUT: np - the underlying device to setup geometry for
+ * compnp - the underlying device to setup geometry for
+ * mp - the unit structure to set the geometry for
+ * OUTPUT: ep - return error pointer
+ * RETURNS: int - -1 if error, 0 otherwise
+ * PURPOSE: establishes geometry information for a device
+ */
+static int
+meta_sp_setgeom(
+ mdname_t *np,
+ mdname_t *compnp,
+ mp_unit_t *mp,
+ md_error_t *ep
+)
+{
+ mdgeom_t *geomp;
+ uint_t round_cyl = 0;
+
+ if ((geomp = metagetgeom(compnp, ep)) == NULL)
+ return (-1);
+ if (meta_setup_geom((md_unit_t *)mp, np, geomp, geomp->write_reinstruct,
+ geomp->read_reinstruct, round_cyl, ep) != 0)
+ return (-1);
+
+ return (0);
+}
+
+/*
+ * FUNCTION: meta_sp_setstatus()
+ * INPUT: sp - the set name for the devices to set the status on
+ * minors - an array of minor numbers of devices to set status on
+ * num_units - number of entries in the array
+ * status - status value to set all units to
+ * OUTPUT: ep - return error pointer
+ * RETURNS: int - -1 if error, 0 success
+ * PURPOSE: sets the status of one or more soft partitions to the
+ * requested value
+ */
+int
+meta_sp_setstatus(
+ mdsetname_t *sp,
+ minor_t *minors,
+ int num_units,
+ sp_status_t status,
+ md_error_t *ep
+)
+{
+ md_sp_statusset_t status_params;
+
+ assert(minors != NULL);
+
+ /* update status of all soft partitions to the status passed in */
+ (void) memset(&status_params, 0, sizeof (status_params));
+ status_params.num_units = num_units;
+ status_params.new_status = status;
+ status_params.size = num_units * sizeof (minor_t);
+ status_params.minors = (uintptr_t)minors;
+ MD_SETDRIVERNAME(&status_params, MD_SP, sp->setno);
+ if (metaioctl(MD_IOC_SPSTATUS, &status_params, &status_params.mde,
+ NULL) != 0) {
+ (void) mdstealerror(ep, &status_params.mde);
+ return (-1);
+ }
+ return (0);
+}
+
+/*
+ * FUNCTION: meta_get_sp_names()
+ * INPUT: sp - the set name to get soft partitions from
+ * options - options from the command line
+ * OUTPUT: nlpp - list of all soft partition names
+ * ep - return error pointer
+ * RETURNS: int - -1 if error, 0 success
+ * PURPOSE: returns a list of all soft partitions in the metadb
+ * for all devices in the specified set
+ */
+int
+meta_get_sp_names(
+ mdsetname_t *sp,
+ mdnamelist_t **nlpp,
+ int options,
+ md_error_t *ep
+)
+{
+ return (meta_get_names(MD_SP, sp, nlpp, options, ep));
+}
+
+/*
+ * FUNCTION: meta_get_by_component()
+ * INPUT: sp - the set name to get soft partitions from
+ * compnp - the name of the device containing the soft
+ * partitions that will be returned
+ * force - 0 - reads cached namelist if available,
+ * 1 - reloads cached namelist, frees old namelist
+ * OUTPUT: nlpp - list of all soft partition names
+ * ep - return error pointer
+ * RETURNS: int - -1 error, otherwise the number of soft partitions
+ * found on the component (0 = none found).
+ * PURPOSE: returns a list of all soft partitions on a given device
+ * from the metadb information
+ */
+static int
+meta_sp_get_by_component(
+ mdsetname_t *sp,
+ mdname_t *compnp,
+ mdnamelist_t **nlpp,
+ int force,
+ md_error_t *ep
+)
+{
+ static mdnamelist_t *cached_list = NULL; /* cached namelist */
+ static int cached_count = 0; /* cached count */
+ mdnamelist_t *spnlp = NULL; /* all sp names */
+ mdnamelist_t *namep; /* list iterator */
+ mdnamelist_t **tailpp = nlpp; /* namelist tail */
+ mdnamelist_t **cachetailpp; /* cache tail */
+ md_sp_t *msp; /* unit structure */
+ int count = 0; /* count of sp's */
+ int err;
+ mdname_t *curnp;
+
+ if ((cached_list != NULL) && (!force)) {
+ /* return a copy of the cached list */
+ for (namep = cached_list; namep != NULL; namep = namep->next)
+ tailpp = meta_namelist_append_wrapper(tailpp,
+ namep->namep);
+ return (cached_count);
+ }
+
+ /* free the cache and reset values to zeros to prepare for a new list */
+ metafreenamelist(cached_list);
+ cached_count = 0;
+ cached_list = NULL;
+ cachetailpp = &cached_list;
+ *nlpp = NULL;
+
+ /* get all the softpartitions first of all */
+ if (meta_get_sp_names(sp, &spnlp, 0, ep) < 0)
+ return (-1);
+
+ /*
+ * Now for each sp, see if it resides on the component we
+ * are interested in, if so then add it to our list
+ */
+ for (namep = spnlp; namep != NULL; namep = namep->next) {
+ curnp = namep->namep;
+
+ /* get the unit structure */
+ if ((msp = meta_get_sp_common(sp, curnp, 0, ep)) == NULL)
+ continue;
+
+ /*
+ * If the current soft partition is not on the same
+ * component, continue the search. If it is on the same
+ * component, add it to our namelist.
+ */
+ err = meta_check_samedrive(compnp, msp->compnamep, ep);
+ if (err <= 0) {
+ /* not on the same device, check the next one */
+ continue;
+ }
+
+ /* it's on the same drive */
+
+ /*
+ * Check for overlapping partitions if the component is not
+ * a metadevice.
+ */
+ if (!metaismeta(msp->compnamep)) {
+ /*
+ * if they're on the same drive, neither
+ * should be a metadevice if one isn't
+ */
+ assert(!metaismeta(compnp));
+
+ if (meta_check_overlap(msp->compnamep->cname,
+ compnp, 0, -1, msp->compnamep, 0, -1, ep) == 0)
+ continue;
+
+ /* in this case it's not an error for them to overlap */
+ mdclrerror(ep);
+ }
+
+ /* Component is on the same device, add to the used list */
+ tailpp = meta_namelist_append_wrapper(tailpp, curnp);
+ cachetailpp = meta_namelist_append_wrapper(cachetailpp,
+ curnp);
+
+ ++count;
+ ++cached_count;
+ }
+
+ assert(count == cached_count);
+ return (count);
+
+out:
+ metafreenamelist(*nlpp);
+ *nlpp = NULL;
+ return (-1);
+}
+
+/*
+ * FUNCTION: meta_sp_get_default_alignment()
+ * INPUT: sp - the pertinent set name
+ * compnp - the name of the underlying component
+ * OUTPUT: ep - return error pointer
+ * RETURNS: sp_ext_length_t =0: no default alignment
+ * >0: default alignment
+ * PURPOSE: returns the default alignment for soft partitions to
+ * be built on top of the specified component or
+ * metadevice
+ */
+static sp_ext_length_t
+meta_sp_get_default_alignment(
+ mdsetname_t *sp,
+ mdname_t *compnp,
+ md_error_t *ep
+)
+{
+ sp_ext_length_t a = SP_UNALIGNED;
+ char *mname;
+
+ assert(compnp != NULL);
+
+ /*
+ * We treat raw devices as opaque, and assume nothing about
+ * their alignment requirements.
+ */
+ if (!metaismeta(compnp))
+ return (SP_UNALIGNED);
+
+ /*
+ * We already know it's a metadevice from the previous test;
+ * metagetmiscname() will tell us which metadevice type we
+ * have
+ */
+ mname = metagetmiscname(compnp, ep);
+ if (mname == NULL)
+ goto out;
+
+ /*
+ * For a mirror, we want to deal with the stripe that is the
+ * primary side. If it happens to be asymmetrically
+ * configured, there is no simple way to fake a universal
+ * alignment. There's a chance that the least common
+ * denominator of the set of interlaces from all stripes of
+ * all submirrors would do it, but nobody that really cared
+ * that much about this issue would create an asymmetric
+ * config to start with.
+ *
+ * If the component underlying the soft partition is a mirror,
+ * then at the exit of this loop, compnp will have been
+ * updated to describe the first active submirror.
+ */
+ if (strcmp(mname, MD_MIRROR) == 0) {
+ md_mirror_t *mp;
+ int smi;
+ md_submirror_t *smp;
+
+ mp = meta_get_mirror(sp, compnp, ep);
+ if (mp == NULL)
+ goto out;
+
+ for (smi = 0; smi < NMIRROR; smi++) {
+
+ smp = &mp->submirrors[smi];
+ if (smp->state == SMS_UNUSED)
+ continue;
+
+ compnp = smp->submirnamep;
+ assert(compnp != NULL);
+
+ mname = metagetmiscname(compnp, ep);
+ if (mname == NULL)
+ goto out;
+
+ break;
+ }
+
+ if (smi == NMIRROR)
+ goto out;
+ }
+
+ /*
+ * Handle stripes and submirrors identically; just return the
+ * interlace of the first row.
+ */
+ if (strcmp(mname, MD_STRIPE) == 0) {
+ md_stripe_t *stp;
+
+ stp = meta_get_stripe(sp, compnp, ep);
+ if (stp == NULL)
+ goto out;
+
+ a = stp->rows.rows_val[0].interlace;
+ goto out;
+ }
+
+ /*
+ * Raid is even more straightforward; the interlace applies to
+ * the entire device.
+ */
+ if (strcmp(mname, MD_RAID) == 0) {
+ md_raid_t *rp;
+
+ rp = meta_get_raid(sp, compnp, ep);
+ if (rp == NULL)
+ goto out;
+
+ a = rp->interlace;
+ goto out;
+ }
+
+ /*
+ * If we have arrived here with the alignment still not set,
+ * then we expect the error to have been set by one of the
+ * routines we called. If neither is the case, something has
+ * really gone wrong above. (Probably the submirror walk
+ * failed to produce a valid submirror, but that would be
+ * really bad...)
+ */
+out:
+ meta_sp_debug("meta_sp_get_default_alignment: miscname %s, "
+ "alignment %lld\n", (mname == NULL) ? "NULL" : mname, a);
+
+ if (getenv(META_SP_DEBUG) && !mdisok(ep)) {
+ mde_perror(ep, NULL);
+ }
+
+ assert((a > 0) || (!mdisok(ep)));
+
+ return (a);
+}
+
+
+
+/*
+ * FUNCTION: meta_check_insp()
+ * INPUT: sp - the set name for the device to check
+ * np - the name of the device to check
+ * slblk - the starting offset of the device to check
+ * nblks - the number of blocks in the device to check
+ * OUTPUT: ep - return error pointer
+ * RETURNS: int - 0 - device contains soft partitions
+ * -1 - device does not contain soft partitions
+ * PURPOSE: determines whether a device contains any soft partitions
+ */
+/* ARGSUSED */
+int
+meta_check_insp(
+ mdsetname_t *sp,
+ mdname_t *np,
+ diskaddr_t slblk,
+ diskaddr_t nblks,
+ md_error_t *ep
+)
+{
+ mdnamelist_t *spnlp = NULL; /* soft partition name list */
+ int count;
+ int rval;
+
+ /* check set pointer */
+ assert(sp != NULL);
+
+ /* find all soft partitions on the component */
+ count = meta_sp_get_by_component(sp, np, &spnlp, 0, ep);
+
+ if (count == -1) {
+ rval = -1;
+ } else if (count > 0) {
+ rval = mduseerror(ep, MDE_ALREADY, np->dev,
+ spnlp->namep->cname, np->cname);
+ } else {
+ rval = 0;
+ }
+
+ metafreenamelist(spnlp);
+ return (rval);
+}
+
+/*
+ * **************************************************************************
+ * Extent List Manipulation Functions *
+ * **************************************************************************
+ */
+
+/*
+ * FUNCTION: meta_sp_cmp_by_nameseq()
+ * INPUT: e1 - first node to compare
+ * e2 - second node to compare
+ * OUTPUT: none
+ * RETURNS: int - =0 - nodes are equal
+ * <0 - e1 should go before e2
+ * >0 - e1 should go after e2
+ * PURPOSE: used for sorted list inserts to build a list sorted by
+ * name first and sequence number second.
+ */
+static int
+meta_sp_cmp_by_nameseq(sp_ext_node_t *e1, sp_ext_node_t *e2)
+{
+ int rval;
+
+ if (e1->ext_namep == NULL)
+ return (1);
+ if (e2->ext_namep == NULL)
+ return (-1);
+ if ((rval = strcmp(e1->ext_namep->cname, e2->ext_namep->cname)) != 0)
+ return (rval);
+
+ /* the names are equal, compare sequence numbers */
+ if (e1->ext_seq > e2->ext_seq)
+ return (1);
+ if (e1->ext_seq < e2->ext_seq)
+ return (-1);
+ /* sequence numbers are also equal */
+ return (0);
+}
+
+/*
+ * FUNCTION: meta_sp_cmp_by_offset()
+ * INPUT: e1 - first node to compare
+ * e2 - second node to compare
+ * OUTPUT: none
+ * RETURNS: int - =0 - nodes are equal
+ * <0 - e1 should go before e2
+ * >0 - e1 should go after e2
+ * PURPOSE: used for sorted list inserts to build a list sorted by offset
+ */
+static int
+meta_sp_cmp_by_offset(sp_ext_node_t *e1, sp_ext_node_t *e2)
+{
+ if (e1->ext_offset > e2->ext_offset)
+ return (1);
+ if (e1->ext_offset < e2->ext_offset)
+ return (-1);
+ /* offsets are equal */
+ return (0);
+}
+
+/*
+ * FUNCTION: meta_sp_list_insert()
+ * INPUT: sp - the set name for the device the node belongs to
+ * np - the name of the device the node belongs to
+ * head - the head of the list, must be NULL for empty list
+ * offset - the physical offset of this extent in sectors
+ * length - the length of this extent in sectors
+ * type - the type of the extent being inserted
+ * seq - the sequence number of the extent being inserted
+ * flags - extent flags (eg. whether it needs to be updated)
+ * compare - the compare function to use
+ * OUTPUT: head - points to the new head if a node was inserted
+ * at the beginning
+ * RETURNS: void
+ * PURPOSE: inserts an extent node into a sorted doubly linked list.
+ * The sort order is determined by the compare function.
+ * Memory is allocated for the node in this function and it
+ * is up to the caller to free it, possibly using
+ * meta_sp_list_free(). If a node is inserted at the
+ * beginning of the list, the head pointer is updated to
+ * point to the new first node.
+ */
+static void
+meta_sp_list_insert(
+ mdsetname_t *sp,
+ mdname_t *np,
+ sp_ext_node_t **head,
+ sp_ext_offset_t offset,
+ sp_ext_length_t length,
+ sp_ext_type_t type,
+ uint_t seq,
+ uint_t flags,
+ ext_cmpfunc_t compare
+)
+{
+ sp_ext_node_t *newext;
+ sp_ext_node_t *curext;
+
+ assert(head != NULL);
+
+ /* Don't bother adding zero length nodes */
+ if (length == 0ULL)
+ return;
+
+ /* allocate and fill in new ext_node */
+ newext = Zalloc(sizeof (sp_ext_node_t));
+
+ newext->ext_offset = offset;
+ newext->ext_length = length;
+ newext->ext_flags = flags;
+ newext->ext_type = type;
+ newext->ext_seq = seq;
+ newext->ext_setp = sp;
+ newext->ext_namep = np;
+
+ /* first node in the list */
+ if (*head == NULL) {
+ newext->ext_next = newext->ext_prev = NULL;
+ *head = newext;
+ } else if ((*compare)(*head, newext) >= 0) {
+ /* the first node has a bigger offset, so insert before it */
+ assert((*head)->ext_prev == NULL);
+
+ newext->ext_prev = NULL;
+ newext->ext_next = *head;
+ (*head)->ext_prev = newext;
+ *head = newext;
+ } else {
+ /*
+ * find the next node whose offset is greater than
+ * the one we want to insert, or the end of the list.
+ */
+ for (curext = *head;
+ (curext->ext_next != NULL) &&
+ ((*compare)(curext->ext_next, newext) < 0);
+ (curext = curext->ext_next))
+ ;
+
+ /* link the new node in after the current node */
+ newext->ext_next = curext->ext_next;
+ newext->ext_prev = curext;
+
+ if (curext->ext_next != NULL)
+ curext->ext_next->ext_prev = newext;
+
+ curext->ext_next = newext;
+ }
+}
+
+/*
+ * FUNCTION: meta_sp_list_free()
+ * INPUT: head - the head of the list, must be NULL for empty list
+ * OUTPUT: head - points to NULL on return
+ * RETURNS: void
+ * PURPOSE: walks a double linked extent list and frees each node
+ */
+static void
+meta_sp_list_free(sp_ext_node_t **head)
+{
+ sp_ext_node_t *ext;
+ sp_ext_node_t *next;
+
+ assert(head != NULL);
+
+ ext = *head;
+ while (ext) {
+ next = ext->ext_next;
+ Free(ext);
+ ext = next;
+ }
+ *head = NULL;
+}
+
+/*
+ * FUNCTION: meta_sp_list_remove()
+ * INPUT: head - the head of the list, must be NULL for empty list
+ * ext - the extent to remove, must be a member of the list
+ * OUTPUT: head - points to the new head of the list
+ * RETURNS: void
+ * PURPOSE: unlinks the node specified by ext from the list and
+ * frees it, possibly moving the head pointer forward if
+ * the head is the node being removed.
+ */
+static void
+meta_sp_list_remove(sp_ext_node_t **head, sp_ext_node_t *ext)
+{
+ assert(head != NULL);
+ assert(*head != NULL);
+
+ if (*head == ext)
+ *head = ext->ext_next;
+
+ if (ext->ext_prev != NULL)
+ ext->ext_prev->ext_next = ext->ext_next;
+ if (ext->ext_next != NULL)
+ ext->ext_next->ext_prev = ext->ext_prev;
+ Free(ext);
+}
+
+/*
+ * FUNCTION: meta_sp_list_size()
+ * INPUT: head - the head of the list, must be NULL for empty list
+ * exttype - the type of the extents to sum
+ * exclude_wm - subtract space for extent headers from total
+ * OUTPUT: none
+ * RETURNS: sp_ext_length_t - the sum of all of the lengths
+ * PURPOSE: sums the lengths of all extents in the list matching the
+ * specified type. This could be used for computing the
+ * amount of free or used space, for example.
+ */
+static sp_ext_length_t
+meta_sp_list_size(sp_ext_node_t *head, sp_ext_type_t exttype, int exclude_wm)
+{
+ sp_ext_node_t *ext;
+ sp_ext_length_t size = 0LL;
+
+ for (ext = head; ext != NULL; ext = ext->ext_next)
+ if (ext->ext_type == exttype)
+ size += ext->ext_length -
+ ((exclude_wm) ? MD_SP_WMSIZE : 0);
+
+ return (size);
+}
+
+/*
+ * FUNCTION: meta_sp_list_find()
+ * INPUT: head - the head of the list, must be NULL for empty list
+ * offset - the offset contained by the node to find
+ * OUTPUT: none
+ * RETURNS: sp_ext_node_t * - the node containing the requested offset
+ * or NULL if no such nodes were found.
+ * PURPOSE: finds a node in a list containing the requested offset
+ * (inclusive). If multiple nodes contain this offset then
+ * only the first will be returned, though typically these
+ * lists are managed with non-overlapping nodes.
+ *
+ * *The list MUST be sorted by offset for this function to work.*
+ */
+static sp_ext_node_t *
+meta_sp_list_find(
+ sp_ext_node_t *head,
+ sp_ext_offset_t offset
+)
+{
+ sp_ext_node_t *ext;
+
+ for (ext = head; ext != NULL; ext = ext->ext_next) {
+ /* check if the offset lies within this extent */
+ if ((offset >= ext->ext_offset) &&
+ (offset < ext->ext_offset + ext->ext_length)) {
+ /*
+ * the requested extent should always be a
+ * subset of an extent in the list.
+ */
+ return (ext);
+ }
+ }
+ return (NULL);
+}
+
+/*
+ * FUNCTION: meta_sp_list_freefill()
+ * INPUT: head - the head of the list, must be NULL for empty list
+ * size - the size of the volume this extent list is
+ * representing
+ * OUTPUT: head - the new head of the list
+ * RETURNS: void
+ * PURPOSE: finds gaps in the extent list and fills them with a free
+ * node. If there is a gap at the beginning the head
+ * pointer will be changed to point to the new free node.
+ * If there is free space at the end, the last free extent
+ * will extend all the way out to the size specified.
+ *
+ * *The list MUST be sorted by offset for this function to work.*
+ */
+static void
+meta_sp_list_freefill(
+ sp_ext_node_t **head,
+ sp_ext_length_t size
+)
+{
+ sp_ext_node_t *ext;
+ sp_ext_offset_t curoff = 0LL;
+
+ for (ext = *head; ext != NULL; ext = ext->ext_next) {
+ if (curoff < ext->ext_offset)
+ meta_sp_list_insert(NULL, NULL, head,
+ curoff, ext->ext_offset - curoff,
+ EXTTYP_FREE, 0, 0, meta_sp_cmp_by_offset);
+ curoff = ext->ext_offset + ext->ext_length;
+ }
+
+ /* pad inverse list out to the end */
+ if (curoff < size)
+ meta_sp_list_insert(NULL, NULL, head, curoff, size - curoff,
+ EXTTYP_FREE, 0, 0, meta_sp_cmp_by_offset);
+
+ if (getenv(META_SP_DEBUG)) {
+ meta_sp_debug("meta_sp_list_freefill: Extent list with "
+ "holes freefilled:\n");
+ meta_sp_list_dump(*head);
+ }
+}
+
+/*
+ * FUNCTION: meta_sp_list_dump()
+ * INPUT: head - the head of the list, must be NULL for empty list
+ * OUTPUT: none
+ * RETURNS: void
+ * PURPOSE: dumps the entire extent list to stdout for easy debugging
+ */
+static void
+meta_sp_list_dump(sp_ext_node_t *head)
+{
+ sp_ext_node_t *ext;
+
+ meta_sp_debug("meta_sp_list_dump: dumping extent list:\n");
+ meta_sp_debug("%5s %10s %5s %7s %10s %10s %5s %10s %10s\n", "Name",
+ "Addr", "Seq#", "Type", "Offset", "Length", "Flags", "Prev",
+ "Next");
+ for (ext = head; ext != NULL; ext = ext->ext_next) {
+ if (ext->ext_namep != NULL)
+ meta_sp_debug("%5s", ext->ext_namep->cname);
+ else
+ meta_sp_debug("%5s", "NONE");
+
+ meta_sp_debug("%10p %5u ", (void *) ext, ext->ext_seq);
+ switch (ext->ext_type) {
+ case EXTTYP_ALLOC:
+ meta_sp_debug("%7s ", "ALLOC");
+ break;
+ case EXTTYP_FREE:
+ meta_sp_debug("%7s ", "FREE");
+ break;
+ case EXTTYP_END:
+ meta_sp_debug("%7s ", "END");
+ break;
+ case EXTTYP_RESERVED:
+ meta_sp_debug("%7s ", "RESV");
+ break;
+ default:
+ meta_sp_debug("%7s ", "INVLD");
+ break;
+ }
+
+ meta_sp_debug("%10llu %10llu %5u %10p %10p\n",
+ ext->ext_offset, ext->ext_length,
+ ext->ext_flags, (void *) ext->ext_prev,
+ (void *) ext->ext_next);
+ }
+ meta_sp_debug("\n");
+}
+
+/*
+ * FUNCTION: meta_sp_list_overlaps()
+ * INPUT: head - the head of the list, must be NULL for empty list
+ * OUTPUT: none
+ * RETURNS: int - 1 if extents overlap, 0 if ok
+ * PURPOSE: checks a list for overlaps. The list MUST be sorted by
+ * offset for this function to work properly.
+ */
+static int
+meta_sp_list_overlaps(sp_ext_node_t *head)
+{
+ sp_ext_node_t *ext;
+
+ for (ext = head; ext->ext_next != NULL; ext = ext->ext_next) {
+ if (ext->ext_offset + ext->ext_length >
+ ext->ext_next->ext_offset)
+ return (1);
+ }
+ return (0);
+}
+
+/*
+ * **************************************************************************
+ * Extent Allocation Functions *
+ * **************************************************************************
+ */
+
+/*
+ * FUNCTION: meta_sp_alloc_by_ext()
+ * INPUT: sp - the set name for the device the node belongs to
+ * np - the name of the device the node belongs to
+ * head - the head of the list, must be NULL for empty list
+ * free_ext - the free extent being allocated from
+ * alloc_offset - the offset of the allocation
+ * alloc_len - the length of the allocation
+ * seq - the sequence number of the allocation
+ * OUTPUT: head - the new head pointer
+ * RETURNS: void
+ * PURPOSE: allocates a portion of the free extent free_ext. The
+ * allocated portion starts at alloc_offset and is
+ * alloc_length long. Both (alloc_offset) and (alloc_offset +
+ * alloc_length) must be contained within the free extent.
+ *
+ * The free extent is split into as many as 3 pieces - a
+ * free extent containing [ free_offset .. alloc_offset ), an
+ * allocated extent containing the range [ alloc_offset ..
+ * alloc_end ], and another free extent containing the
+ * range ( alloc_end .. free_end ]. If either of the two
+ * new free extents would be zero length, they are not created.
+ *
+ * Finally, the original free extent is removed. All newly
+ * created extents have the EXTFLG_UPDATE flag set.
+ */
+static void
+meta_sp_alloc_by_ext(
+ mdsetname_t *sp,
+ mdname_t *np,
+ sp_ext_node_t **head,
+ sp_ext_node_t *free_ext,
+ sp_ext_offset_t alloc_offset,
+ sp_ext_length_t alloc_length,
+ uint_t seq
+)
+{
+ sp_ext_offset_t free_offset = free_ext->ext_offset;
+ sp_ext_length_t free_length = free_ext->ext_length;
+
+ sp_ext_offset_t alloc_end = alloc_offset + alloc_length;
+ sp_ext_offset_t free_end = free_offset + free_length;
+
+ /* allocated extent must be a subset of the free extent */
+ assert(free_offset <= alloc_offset);
+ assert(free_end >= alloc_end);
+
+ meta_sp_list_remove(head, free_ext);
+
+ if (free_offset < alloc_offset) {
+ meta_sp_list_insert(NULL, NULL, head, free_offset,
+ (alloc_offset - free_offset), EXTTYP_FREE, 0,
+ EXTFLG_UPDATE, meta_sp_cmp_by_offset);
+ }
+
+ if (free_end > alloc_end) {
+ meta_sp_list_insert(NULL, NULL, head, alloc_end,
+ (free_end - alloc_end), EXTTYP_FREE, 0, EXTFLG_UPDATE,
+ meta_sp_cmp_by_offset);
+ }
+
+ meta_sp_list_insert(sp, np, head, alloc_offset, alloc_length,
+ EXTTYP_ALLOC, seq, EXTFLG_UPDATE, meta_sp_cmp_by_offset);
+
+ if (getenv(META_SP_DEBUG)) {
+ meta_sp_debug("meta_sp_alloc_by_ext: extent list:\n");
+ meta_sp_list_dump(*head);
+ }
+}
+
+/*
+ * FUNCTION: meta_sp_alloc_by_len()
+ * INPUT: sp - the set name for the device the node belongs to
+ * np - the name of the device the node belongs to
+ * head - the head of the list, must be NULL for empty list
+ * *lp - the requested length to allocate
+ * last_off - the last offset already allocated.
+ * alignment - the desired extent alignmeent
+ * OUTPUT: head - the new head pointer
+ * *lp - the length allocated
+ * RETURNS: int - -1 if error, the number of new extents on success
+ * PURPOSE: allocates extents from free space to satisfy the requested
+ * length. If requested length is zero, allocates all
+ * remaining free space. This function provides the meat
+ * of the extent allocation algorithm. Allocation is a
+ * three tier process:
+ *
+ * 1. If last_off is nonzero and there is free space following
+ * that node, then it is extended to allocate as much of that
+ * free space as possible. This is useful for metattach.
+ * 2. If a free extent can be found to satisfy the remaining
+ * requested space, then satisfy the rest of the request
+ * from that extent.
+ * 3. Start allocating space from any remaining free extents until
+ * the remainder of the request is satisified.
+ *
+ * If alignment is non-zero, then every extent modified
+ * or newly allocated will be aligned modulo alignment,
+ * with a length that is an integer multiple of
+ * alignment.
+ *
+ * The EXTFLG_UPDATE flag is set for all nodes (free and
+ * allocated) that require updated watermarks.
+ *
+ * This algorithm may have a negative impact on fragmentation
+ * in pathological cases and may be improved if it turns out
+ * to be a problem. This may be exacerbated by particularly
+ * large alignments.
+ *
+ * NOTE: It's confusing, so it demands an explanation:
+ * - len is used to represent requested data space; it
+ * does not include room for a watermark. On each full
+ * or partial allocation, len will be decremented by
+ * alloc_len (see next paragraph) until it reaches
+ * zero.
+ * - alloc_len is used to represent data space allocated
+ * from a particular extent; it does not include space
+ * for a watermark. In the rare event that a_length
+ * (see next paragraph) is equal to MD_SP_WMSIZE,
+ * alloc_len will be zero and the resulting MD_SP_WMSIZE
+ * fragment of space will be utterly unusable.
+ * - a_length is used to represent all space to be
+ * allocated from a particular extent; it DOES include
+ * space for a watermark.
+ */
+static int
+meta_sp_alloc_by_len(
+ mdsetname_t *sp,
+ mdname_t *np,
+ sp_ext_node_t **head,
+ sp_ext_length_t *lp,
+ sp_ext_offset_t last_off,
+ sp_ext_offset_t alignment
+)
+{
+ sp_ext_node_t *free_ext;
+ sp_ext_node_t *alloc_ext;
+ uint_t last_seq = 0;
+ uint_t numexts = 0;
+ sp_ext_length_t freespace;
+ sp_ext_length_t alloc_len;
+ sp_ext_length_t len;
+
+ /* We're DOA if we can't read *lp */
+ assert(lp != NULL);
+ len = *lp;
+
+ /*
+ * Process the nominal case first: we've been given an actual
+ * size argument, rather than the literal "all"
+ */
+
+ if (len != 0) {
+
+ /*
+ * Short circuit the check for free space. This may
+ * tell us we have enough space when we really don't
+ * because each extent loses space to a watermark, but
+ * it will always tell us there isn't enough space
+ * correctly. Worst case we do some extra work.
+ */
+ freespace = meta_sp_list_size(*head, EXTTYP_FREE,
+ INCLUDE_WM);
+
+ if (freespace < len)
+ return (-1);
+
+ /*
+ * First see if we can extend the last extent for an
+ * attach.
+ */
+ if (last_off != 0LL) {
+ int align = 0;
+
+ alloc_ext =
+ meta_sp_list_find(*head, last_off);
+ assert(alloc_ext != NULL);
+
+ /*
+ * The offset test reflects the
+ * inclusion of the watermark in the extent
+ */
+ align = (alignment > 0) &&
+ (((alloc_ext->ext_offset + MD_SP_WMSIZE) %
+ alignment) == 0);
+
+ /*
+ * If we decided not to align here, we should
+ * also reset "alignment" so we don't bother
+ * later, either.
+ */
+ if (!align) {
+ alignment = 0;
+ }
+
+ last_seq = alloc_ext->ext_seq;
+
+ free_ext = meta_sp_list_find(*head,
+ alloc_ext->ext_offset +
+ alloc_ext->ext_length);
+
+ /*
+ * If a free extent follows our last allocated
+ * extent, then remove the last allocated
+ * extent and increase the size of the free
+ * extent to overlap it, then allocate the
+ * total space from the new free extent.
+ */
+ if (free_ext != NULL &&
+ free_ext->ext_type == EXTTYP_FREE) {
+ assert(free_ext->ext_offset ==
+ alloc_ext->ext_offset +
+ alloc_ext->ext_length);
+
+ alloc_len =
+ MIN(len, free_ext->ext_length);
+
+ if (align && (alloc_len < len)) {
+ /* No watermark space needed */
+ alloc_len -= alloc_len % alignment;
+ }
+
+ if (alloc_len > 0) {
+ free_ext->ext_offset -=
+ alloc_ext->ext_length;
+ free_ext->ext_length +=
+ alloc_ext->ext_length;
+
+ meta_sp_alloc_by_ext(sp, np, head,
+ free_ext, free_ext->ext_offset,
+ alloc_ext->ext_length + alloc_len,
+ last_seq);
+
+ /*
+ * now remove the original allocated
+ * node. We may have overlapping
+ * extents for a short time before
+ * this node is removed.
+ */
+ meta_sp_list_remove(head, alloc_ext);
+ len -= alloc_len;
+ }
+ }
+ last_seq++;
+ }
+
+ if (len == 0LL)
+ goto out;
+
+ /*
+ * Next, see if we can find a single allocation for
+ * the remainder. This may make fragmentation worse
+ * in some cases, but there's no good way to allocate
+ * that doesn't have a highly fragmented corner case.
+ */
+ for (free_ext = *head; free_ext != NULL;
+ free_ext = free_ext->ext_next) {
+ sp_ext_offset_t a_offset;
+ sp_ext_offset_t a_length;
+
+ if (free_ext->ext_type != EXTTYP_FREE)
+ continue;
+
+ /*
+ * The length test should include space for
+ * the watermark
+ */
+
+ a_offset = free_ext->ext_offset;
+ a_length = free_ext->ext_length;
+
+ if (alignment > 0) {
+
+ /*
+ * Shortcut for extents that have been
+ * previously added to pad out the
+ * data space
+ */
+ if (a_length < alignment) {
+ continue;
+ }
+
+ /*
+ * Round up so the data space begins
+ * on a properly aligned boundary.
+ */
+ a_offset += alignment -
+ (a_offset % alignment) - MD_SP_WMSIZE;
+
+ /*
+ * This is only necessary in case the
+ * watermark size is ever greater than
+ * one. It'll never happen, of
+ * course; we'll get rid of watermarks
+ * before we make 'em bigger.
+ */
+ if (a_offset < free_ext->ext_offset) {
+ a_offset += alignment;
+ }
+
+ /*
+ * Adjust the length to account for
+ * the space lost above (if any)
+ */
+ a_length -=
+ (a_offset - free_ext->ext_offset);
+ }
+
+ if (a_length >= len + MD_SP_WMSIZE) {
+ meta_sp_alloc_by_ext(sp, np, head,
+ free_ext, a_offset,
+ len + MD_SP_WMSIZE, last_seq);
+
+ len = 0LL;
+ numexts++;
+ break;
+ }
+ }
+
+ if (len == 0LL)
+ goto out;
+
+
+ /*
+ * If the request could not be satisfied by extending
+ * the last extent or by a single extent, then put
+ * multiple smaller extents together until the request
+ * is satisfied.
+ */
+ for (free_ext = *head; (free_ext != NULL) && (len > 0);
+ free_ext = free_ext->ext_next) {
+ sp_ext_offset_t a_offset;
+ sp_ext_length_t a_length;
+
+ if (free_ext->ext_type != EXTTYP_FREE)
+ continue;
+
+ a_offset = free_ext->ext_offset;
+ a_length = free_ext->ext_length;
+
+ if (alignment > 0) {
+
+ /*
+ * Shortcut for extents that have been
+ * previously added to pad out the
+ * data space
+ */
+ if (a_length < alignment) {
+ continue;
+ }
+
+ /*
+ * Round up so the data space begins
+ * on a properly aligned boundary.
+ */
+ a_offset += alignment -
+ (a_offset % alignment) - MD_SP_WMSIZE;
+
+ /*
+ * This is only necessary in case the
+ * watermark size is ever greater than
+ * one. It'll never happen, of
+ * course; we'll get rid of watermarks
+ * before we make 'em bigger.
+ */
+ if (a_offset < free_ext->ext_offset) {
+ a_offset += alignment;
+ }
+
+ /*
+ * Adjust the length to account for
+ * the space lost above (if any)
+ */
+ a_length -=
+ (a_offset - free_ext->ext_offset);
+
+ /*
+ * Adjust the length to be properly
+ * aligned if it is NOT to be the
+ * last extent in the soft partition.
+ */
+ if ((a_length - MD_SP_WMSIZE) < len)
+ a_length -=
+ (a_length - MD_SP_WMSIZE)
+ % alignment;
+ }
+
+ alloc_len = MIN(len, a_length - MD_SP_WMSIZE);
+ if (alloc_len == 0)
+ continue;
+
+ /*
+ * meta_sp_alloc_by_ext() expects the
+ * allocation length to include the watermark
+ * size, which is why we don't simply pass in
+ * alloc_len here.
+ */
+ meta_sp_alloc_by_ext(sp, np, head, free_ext,
+ a_offset, MIN(len + MD_SP_WMSIZE, a_length),
+ last_seq);
+
+ len -= alloc_len;
+ numexts++;
+ last_seq++;
+ }
+
+
+ /*
+ * If there was not enough space we can throw it all
+ * away since no real work has been done yet.
+ */
+ if (len != 0) {
+ meta_sp_list_free(head);
+ return (-1);
+ }
+ }
+
+ /*
+ * Otherwise, the literal "all" was specified: allocate all
+ * available free space. Don't bother with alignment.
+ */
+ else {
+ /* First, extend the last extent if this is a grow */
+ if (last_off != 0LL) {
+ alloc_ext =
+ meta_sp_list_find(*head, last_off);
+ assert(alloc_ext != NULL);
+
+ last_seq = alloc_ext->ext_seq;
+
+ free_ext = meta_sp_list_find(*head,
+ alloc_ext->ext_offset +
+ alloc_ext->ext_length);
+
+ /*
+ * If a free extent follows our last allocated
+ * extent, then remove the last allocated
+ * extent and increase the size of the free
+ * extent to overlap it, then allocate the
+ * total space from the new free extent.
+ */
+ if (free_ext != NULL &&
+ free_ext->ext_type == EXTTYP_FREE) {
+ assert(free_ext->ext_offset ==
+ alloc_ext->ext_offset +
+ alloc_ext->ext_length);
+
+ len = alloc_len =
+ free_ext->ext_length;
+
+ free_ext->ext_offset -=
+ alloc_ext->ext_length;
+ free_ext->ext_length +=
+ alloc_ext->ext_length;
+
+ meta_sp_alloc_by_ext(sp, np, head,
+ free_ext, free_ext->ext_offset,
+ alloc_ext->ext_length + alloc_len,
+ last_seq);
+
+ /*
+ * now remove the original allocated
+ * node. We may have overlapping
+ * extents for a short time before
+ * this node is removed.
+ */
+ meta_sp_list_remove(head, alloc_ext);
+ }
+
+ last_seq++;
+ }
+
+ /* Next, grab all remaining free space */
+ for (free_ext = *head; free_ext != NULL;
+ free_ext = free_ext->ext_next) {
+
+ if (free_ext->ext_type == EXTTYP_FREE) {
+ alloc_len =
+ free_ext->ext_length - MD_SP_WMSIZE;
+ if (alloc_len == 0)
+ continue;
+
+ /*
+ * meta_sp_alloc_by_ext() expects the
+ * allocation length to include the
+ * watermark size, which is why we
+ * don't simply pass in alloc_len
+ * here.
+ */
+ meta_sp_alloc_by_ext(sp, np, head,
+ free_ext, free_ext->ext_offset,
+ free_ext->ext_length,
+ last_seq);
+
+ len += alloc_len;
+ numexts++;
+ last_seq++;
+ }
+ }
+ }
+
+out:
+ if (getenv(META_SP_DEBUG)) {
+ meta_sp_debug("meta_sp_alloc_by_len: Extent list after "
+ "allocation:\n");
+ meta_sp_list_dump(*head);
+ }
+
+ if (*lp == 0) {
+ *lp = len;
+
+ /*
+ * Make sure the callers hit a no space error if we
+ * didn't actually find anything.
+ */
+ if (len == 0) {
+ return (-1);
+ }
+ }
+
+ return (numexts);
+}
+
+/*
+ * FUNCTION: meta_sp_alloc_by_list()
+ * INPUT: sp - the set name for the device the node belongs to
+ * np - the name of the device the node belongs to
+ * head - the head of the list, must be NULL for empty list
+ * oblist - an extent list containing requested nodes to allocate
+ * OUTPUT: head - the new head pointer
+ * RETURNS: int - -1 if error, the number of new extents on success
+ * PURPOSE: allocates extents from free space to satisfy the requested
+ * extent list. This is primarily used for the -o/-b options
+ * where the user may specifically request extents to allocate.
+ * Each extent in the oblist must be a subset (inclusive) of a
+ * free extent and may not overlap each other. This
+ * function sets the EXTFLG_UPDATE flag for each node that
+ * requires a watermark update after allocating.
+ */
+static int
+meta_sp_alloc_by_list(
+ mdsetname_t *sp,
+ mdname_t *np,
+ sp_ext_node_t **head,
+ sp_ext_node_t *oblist
+)
+{
+ sp_ext_node_t *ext;
+ sp_ext_node_t *free_ext;
+ uint_t numexts = 0;
+
+ for (ext = oblist; ext != NULL; ext = ext->ext_next) {
+
+ free_ext = meta_sp_list_find(*head,
+ ext->ext_offset - MD_SP_WMSIZE);
+
+ /* Make sure the allocation is within the free extent */
+ if ((free_ext == NULL) ||
+ (ext->ext_offset + ext->ext_length >
+ free_ext->ext_offset + free_ext->ext_length) ||
+ (free_ext->ext_type != EXTTYP_FREE))
+ return (-1);
+
+ meta_sp_alloc_by_ext(sp, np, head, free_ext,
+ ext->ext_offset - MD_SP_WMSIZE,
+ ext->ext_length + MD_SP_WMSIZE, ext->ext_seq);
+
+ numexts++;
+ }
+
+ assert(meta_sp_list_overlaps(*head) == 0);
+
+ if (getenv(META_SP_DEBUG)) {
+ meta_sp_debug("meta_sp_alloc_by_list: Extent list after "
+ "allocation:\n");
+ meta_sp_list_dump(*head);
+ }
+
+ return (numexts);
+}
+
+/*
+ * **************************************************************************
+ * Extent List Population Functions *
+ * **************************************************************************
+ */
+
+/*
+ * FUNCTION: meta_sp_extlist_from_namelist()
+ * INPUT: sp - the set name for the device the node belongs to
+ * spnplp - the namelist of soft partitions to build a list from
+ * OUTPUT: extlist - the extent list built from the SPs in the namelist
+ * ep - return error pointer
+ * RETURNS: int - -1 if error, 0 on success
+ * PURPOSE: builds an extent list representing the soft partitions
+ * specified in the namelist. Each extent in each soft
+ * partition is added to the list with the type EXTTYP_ALLOC.
+ * The EXTFLG_UPDATE flag is not set on any nodes. Each
+ * extent in the list includes the space occupied by the
+ * watermark, which is not included in the unit structures.
+ */
+static int
+meta_sp_extlist_from_namelist(
+ mdsetname_t *sp,
+ mdnamelist_t *spnlp,
+ sp_ext_node_t **extlist,
+ md_error_t *ep
+)
+{
+ int extn;
+ md_sp_t *msp; /* unit structure of the sp's */
+ mdnamelist_t *namep;
+
+ assert(sp != NULL);
+
+ /*
+ * Now go through the soft partitions and add a node to the used
+ * list for each allocated extent.
+ */
+ for (namep = spnlp; namep != NULL; namep = namep->next) {
+ mdname_t *curnp = namep->namep;
+
+ /* get the unit structure */
+ if ((msp = meta_get_sp_common(sp, curnp, 0, ep)) == NULL)
+ return (-1);
+
+ for (extn = 0; (extn < msp->ext.ext_len); extn++) {
+ md_sp_ext_t *extp = &msp->ext.ext_val[extn];
+
+ /*
+ * subtract from offset and add to the length
+ * to account for the watermark, which is not
+ * contained in the extents in the unit structure.
+ */
+ meta_sp_list_insert(sp, curnp, extlist,
+ extp->poff - MD_SP_WMSIZE, extp->len + MD_SP_WMSIZE,
+ EXTTYP_ALLOC, extn, 0, meta_sp_cmp_by_offset);
+ }
+ }
+ return (0);
+}
+
+/*
+ * FUNCTION: meta_sp_extlist_from_wm()
+ * INPUT: sp - the set name for the device the node belongs to
+ * compnp - the name of the device to scan watermarks on
+ * OUTPUT: extlist - the extent list built from the SPs in the namelist
+ * ep - return error pointer
+ * RETURNS: int - -1 if error, 0 on success
+ * PURPOSE: builds an extent list representing the soft partitions
+ * specified in the namelist. Each extent in each soft
+ * partition is added to the list with the type EXTTYP_ALLOC.
+ * The EXTFLG_UPDATE flag is not set on any nodes. Each
+ * extent in the list includes the space occupied by the
+ * watermark, which is not included in the unit structures.
+ */
+static int
+meta_sp_extlist_from_wm(
+ mdsetname_t *sp,
+ mdname_t *compnp,
+ sp_ext_node_t **extlist,
+ ext_cmpfunc_t compare,
+ md_error_t *ep
+)
+{
+ mp_watermark_t wm;
+ mdname_t *np = NULL;
+ mdsetname_t *spsetp = NULL;
+ sp_ext_offset_t cur_off;
+
+ if ((cur_off = meta_sp_get_start(sp, compnp, ep)) == MD_DISKADDR_ERROR)
+ return (-1);
+
+ for (;;) {
+ if (meta_sp_read_wm(sp, compnp, &wm, cur_off, ep) != 0) {
+ return (-1);
+ }
+
+ /* get the set and name pointers */
+ if (strcmp(wm.wm_setname, MD_SP_LOCALSETNAME) != 0) {
+ if ((spsetp = metasetname(wm.wm_setname, ep)) == NULL) {
+ return (-1);
+ }
+ }
+
+ if (strcmp(wm.wm_mdname, MD_SP_FREEWMNAME) != 0) {
+ if (meta_init_make_device(&sp, wm.wm_mdname, ep) != 0)
+ return (-1);
+ np = metaname(&spsetp, wm.wm_mdname, ep);
+ if (np == NULL) {
+ return (-1);
+ }
+ }
+
+ /* insert watermark into extent list */
+ meta_sp_list_insert(spsetp, np, extlist, cur_off,
+ wm.wm_length + MD_SP_WMSIZE, wm.wm_type, wm.wm_seq,
+ EXTFLG_UPDATE, compare);
+
+ /* if we see the end watermark, we're done */
+ if (wm.wm_type == EXTTYP_END)
+ break;
+
+ cur_off += wm.wm_length + 1;
+
+ /* clear out set and name pointers for next iteration */
+ np = NULL;
+ spsetp = NULL;
+ }
+
+ return (0);
+}
+
+/*
+ * **************************************************************************
+ * Print (metastat) Functions *
+ * **************************************************************************
+ */
+
+/*
+ * FUNCTION: meta_sp_short_print()
+ * INPUT: msp - the unit structure to display
+ * fp - the file pointer to send output to
+ * options - print options from the command line processor
+ * OUTPUT: ep - return error pointer
+ * RETURNS: int - -1 if error, 0 on success
+ * PURPOSE: display a short report of the soft partition in md.tab
+ * form, primarily used for metastat -p.
+ */
+static int
+meta_sp_short_print(
+ md_sp_t *msp,
+ char *fname,
+ FILE *fp,
+ mdprtopts_t options,
+ md_error_t *ep
+)
+{
+ int extn;
+
+ if (options & PRINT_LARGEDEVICES) {
+ if (msp->common.revision != MD_64BIT_META_DEV)
+ return (0);
+ }
+
+ /* print name and -p */
+ if (fprintf(fp, "%s -p", msp->common.namep->cname) == EOF)
+ return (mdsyserror(ep, errno, fname));
+
+ /* print the component */
+ /*
+ * If the path is our standard /dev/rdsk or /dev/md/rdsk
+ * then just print out the cxtxdxsx or the dx, metainit
+ * will assume the default, otherwise we need the full
+ * pathname to make sure this works as we intend.
+ */
+ if ((strstr(msp->compnamep->rname, "/dev/rdsk") == NULL) &&
+ (strstr(msp->compnamep->rname, "/dev/md/rdsk") == NULL) &&
+ (strstr(msp->compnamep->rname, "/dev/td/") == NULL)) {
+ /* not standard path so print full pathname */
+ if (fprintf(fp, " %s", msp->compnamep->rname) == EOF)
+ return (mdsyserror(ep, errno, fname));
+ } else {
+ /* standard path so print ctds or d number */
+ if (fprintf(fp, " %s", msp->compnamep->cname) == EOF)
+ return (mdsyserror(ep, errno, fname));
+ }
+
+ /* print out each extent */
+ for (extn = 0; (extn < msp->ext.ext_len); extn++) {
+ md_sp_ext_t *extp = &msp->ext.ext_val[extn];
+ if (fprintf(fp, " -o %llu -b %llu ", extp->poff,
+ extp->len) == EOF)
+ return (mdsyserror(ep, errno, fname));
+ }
+
+ if (fprintf(fp, "\n") == EOF)
+ return (mdsyserror(ep, errno, fname));
+
+ /* success */
+ return (0);
+}
+
+/*
+ * FUNCTION: meta_sp_status_to_name()
+ * INPUT: xsp_status - the status value to convert to a string
+ * tstate - transient errored device state. If set the
+ * device is Unavailable
+ * OUTPUT: none
+ * RETURNS: char * - a pointer to the string representing the status value
+ * PURPOSE: return an internationalized string representing the
+ * status value for a soft partition. The strings are
+ * strdup'd and must be freed by the caller.
+ */
+static char *
+meta_sp_status_to_name(
+ xsp_status_t xsp_status,
+ uint_t tstate
+)
+{
+ char *rval = NULL;
+
+ /*
+ * Check to see if we have MD_INACCESSIBLE set. This is the only valid
+ * value for an 'Unavailable' return. tstate can be set because of
+ * other multi-node reasons (e.g. ABR being set)
+ */
+ if (tstate & MD_INACCESSIBLE) {
+ return (Strdup(dgettext(TEXT_DOMAIN, "Unavailable")));
+ }
+
+ switch (xsp_status) {
+ case MD_SP_CREATEPEND:
+ rval = Strdup(dgettext(TEXT_DOMAIN, "Creating"));
+ break;
+ case MD_SP_GROWPEND:
+ rval = Strdup(dgettext(TEXT_DOMAIN, "Growing"));
+ break;
+ case MD_SP_DELPEND:
+ rval = Strdup(dgettext(TEXT_DOMAIN, "Deleting"));
+ break;
+ case MD_SP_OK:
+ rval = Strdup(dgettext(TEXT_DOMAIN, "Okay"));
+ break;
+ case MD_SP_ERR:
+ rval = Strdup(dgettext(TEXT_DOMAIN, "Errored"));
+ break;
+ case MD_SP_RECOVER:
+ rval = Strdup(dgettext(TEXT_DOMAIN, "Recovering"));
+ break;
+ }
+
+ if (rval == NULL)
+ rval = Strdup(dgettext(TEXT_DOMAIN, "Invalid"));
+
+ return (rval);
+}
+
+/*
+ * FUNCTION: meta_sp_report()
+ * INPUT: sp - the set name for the unit being displayed
+ * msp - the unit structure to display
+ * nlpp - pass back the large devs
+ * fp - the file pointer to send output to
+ * options - print options from the command line processor
+ * OUTPUT: ep - return error pointer
+ * RETURNS: int - -1 if error, 0 on success
+ * PURPOSE: print a full report of the device specified
+ */
+static int
+meta_sp_report(
+ mdsetname_t *sp,
+ md_sp_t *msp,
+ mdnamelist_t **nlpp,
+ char *fname,
+ FILE *fp,
+ mdprtopts_t options,
+ md_error_t *ep
+)
+{
+ uint_t extn;
+ char *status;
+ char *devid = "";
+ mdname_t *didnp = NULL;
+ ddi_devid_t dtp;
+ int len;
+ uint_t tstate = 0;
+
+ if (options & PRINT_LARGEDEVICES) {
+ if (msp->common.revision != MD_64BIT_META_DEV) {
+ return (0);
+ } else {
+ if (meta_getdevs(sp, msp->common.namep, nlpp, ep) != 0)
+ return (-1);
+ }
+ }
+
+ if (options & PRINT_HEADER) {
+ if (fprintf(fp, dgettext(TEXT_DOMAIN, "%s: Soft Partition\n"),
+ msp->common.namep->cname) == EOF)
+ return (mdsyserror(ep, errno, fname));
+ }
+
+ if (fprintf(fp, dgettext(TEXT_DOMAIN, " Device: %s\n"),
+ msp->compnamep->cname) == EOF)
+ return (mdsyserror(ep, errno, fname));
+
+ /* Determine if device is available before displaying status */
+ if (metaismeta(msp->common.namep)) {
+ if (meta_get_tstate(msp->common.namep->dev, &tstate, ep) != 0)
+ return (-1);
+ }
+ status = meta_sp_status_to_name(msp->status, tstate & MD_DEV_ERRORED);
+
+ /* print out "State" to be consistent with other metadevices */
+ if (tstate & MD_ABR_CAP) {
+ if (fprintf(fp, dgettext(TEXT_DOMAIN,
+ " State: %s - Application Based Recovery (ABR)\n"),
+ status) == EOF) {
+ Free(status);
+ return (mdsyserror(ep, errno, fname));
+ }
+ } else {
+ if (fprintf(fp, dgettext(TEXT_DOMAIN,
+ " State: %s\n"), status) == EOF) {
+ Free(status);
+ return (mdsyserror(ep, errno, fname));
+ }
+ }
+ free(status);
+
+ if (fprintf(fp, dgettext(TEXT_DOMAIN, " Size: %llu blocks (%s)\n"),
+ msp->common.size,
+ meta_number_to_string(msp->common.size, DEV_BSIZE)) == EOF)
+ return (mdsyserror(ep, errno, fname));
+
+ /* print component details */
+ if (! metaismeta(msp->compnamep)) {
+ diskaddr_t start_blk;
+ int has_mddb;
+ char *has_mddb_str;
+
+ /* print header */
+ /*
+ * Building a format string on the fly that will
+ * be used in (f)printf. This allows the length
+ * of the ctd to vary from small to large without
+ * looking horrible.
+ */
+ len = strlen(msp->compnamep->cname);
+ len = max(len, strlen(dgettext(TEXT_DOMAIN, "Device")));
+ len += 2;
+ if (fprintf(fp,
+ "\t%-*.*s %-12.12s %-5.5s %s\n",
+ len, len,
+ dgettext(TEXT_DOMAIN, "Device"),
+ dgettext(TEXT_DOMAIN, "Start Block"),
+ dgettext(TEXT_DOMAIN, "Dbase"),
+ dgettext(TEXT_DOMAIN, "Reloc")) == EOF) {
+ return (mdsyserror(ep, errno, fname));
+ }
+
+
+ /* get info */
+ if ((start_blk = meta_sp_get_start(sp, msp->compnamep, ep)) ==
+ MD_DISKADDR_ERROR)
+ return (-1);
+
+ if ((has_mddb = metahasmddb(sp, msp->compnamep, ep)) < 0)
+ return (-1);
+
+ if (has_mddb)
+ has_mddb_str = dgettext(TEXT_DOMAIN, "Yes");
+ else
+ has_mddb_str = dgettext(TEXT_DOMAIN, "No");
+
+ /* populate the key in the name_p structure */
+ didnp = metadevname(&sp, msp->compnamep->dev, ep);
+ if (didnp == NULL) {
+ return (-1);
+ }
+
+ /* determine if devid does NOT exist */
+ if (options & PRINT_DEVID) {
+ if ((dtp = meta_getdidbykey(sp->setno, getmyside(sp, ep),
+ didnp->key, ep)) == NULL)
+ devid = dgettext(TEXT_DOMAIN, "No ");
+ else {
+ devid = dgettext(TEXT_DOMAIN, "Yes");
+ free(dtp);
+ }
+ }
+
+ /* print info */
+ /*
+ * This allows the length
+ * of the ctd to vary from small to large without
+ * looking horrible.
+ */
+ if (fprintf(fp, "\t%-*s %8lld %-5.5s %s\n",
+ len, msp->compnamep->cname,
+ start_blk, has_mddb_str, devid) == EOF) {
+ return (mdsyserror(ep, errno, fname));
+ }
+ (void) fprintf(fp, "\n");
+ }
+
+
+ /* print the headers */
+ if (fprintf(fp, "\t%6.6s %24.24s %24.24s\n",
+ dgettext(TEXT_DOMAIN, "Extent"),
+ dgettext(TEXT_DOMAIN, "Start Block"),
+ dgettext(TEXT_DOMAIN, "Block count")) == EOF)
+ return (mdsyserror(ep, errno, fname));
+
+ /* print out each extent */
+ for (extn = 0; (extn < msp->ext.ext_len); extn++) {
+ md_sp_ext_t *extp = &msp->ext.ext_val[extn];
+
+ /* If PRINT_TIMES option is ever supported, add output here */
+ if (fprintf(fp, "\t%6u %24llu %24llu\n",
+ extn, extp->poff, extp->len) == EOF)
+ return (mdsyserror(ep, errno, fname));
+ }
+
+ /* separate records with a newline */
+ (void) fprintf(fp, "\n");
+ return (0);
+}
+
+/*
+ * FUNCTION: meta_sp_print()
+ * INPUT: sp - the set name for the unit being displayed
+ * np - the name of the device to print
+ * fname - ??? not used
+ * fp - the file pointer to send output to
+ * options - print options from the command line processor
+ * OUTPUT: ep - return error pointer
+ * RETURNS: int - -1 if error, 0 on success
+ * PURPOSE: print a full report of the device specified by metastat.
+ * This is the main entry point for printing.
+ */
+int
+meta_sp_print(
+ mdsetname_t *sp,
+ mdname_t *np,
+ mdnamelist_t **nlpp,
+ char *fname,
+ FILE *fp,
+ mdprtopts_t options,
+ md_error_t *ep
+)
+{
+ md_sp_t *msp;
+ md_unit_t *mdp;
+ int rval = 0;
+
+ /* should always have the same set */
+ assert(sp != NULL);
+
+ /* print all the soft partitions */
+ if (np == NULL) {
+ mdnamelist_t *nlp = NULL;
+ mdnamelist_t *p;
+ int cnt;
+
+ if ((cnt = meta_get_sp_names(sp, &nlp, options, ep)) < 0)
+ return (-1);
+ else if (cnt == 0)
+ return (0);
+
+ /* recusively print them out */
+ for (p = nlp; (p != NULL); p = p->next) {
+ mdname_t *curnp = p->namep;
+
+ /*
+ * one problem with the rval of -1 here is that
+ * the error gets "lost" when the next device is
+ * printed, but we want to print them all anyway.
+ */
+ rval = meta_sp_print(sp, curnp, nlpp, fname, fp,
+ options, ep);
+ }
+
+ /* clean up, return success */
+ metafreenamelist(nlp);
+ return (rval);
+ }
+
+ /* get the unit structure */
+ if ((msp = meta_get_sp_common(sp, np,
+ ((options & PRINT_FAST) ? 1 : 0), ep)) == NULL)
+ return (-1);
+
+ /* check for parented */
+ if ((! (options & PRINT_SUBDEVS)) &&
+ (MD_HAS_PARENT(msp->common.parent))) {
+ return (0);
+ }
+
+ /* print appropriate detail */
+ if (options & PRINT_SHORT) {
+ if (meta_sp_short_print(msp, fname, fp, options, ep) != 0)
+ return (-1);
+ } else {
+ if (meta_sp_report(sp, msp, nlpp, fname, fp, options, ep) != 0)
+ return (-1);
+ }
+
+ /*
+ * Print underlying metadevices if they are parented to us and
+ * if the info for the underlying metadevice has not been printed.
+ */
+ if (metaismeta(msp->compnamep)) {
+ /* get the unit structure for the subdevice */
+ if ((mdp = meta_get_mdunit(sp, msp->compnamep, ep)) == NULL)
+ return (-1);
+
+ /* If info not already printed, recurse */
+ if (!BT_TEST(sp_parent_printed, MD_MIN2UNIT(MD_SID(mdp)))) {
+ if (meta_print_name(sp, msp->compnamep, nlpp, fname, fp,
+ (options | PRINT_HEADER | PRINT_SUBDEVS),
+ NULL, ep) != 0) {
+ return (-1);
+ }
+ BT_SET(sp_parent_printed, MD_MIN2UNIT(MD_SID(mdp)));
+ }
+ }
+ return (0);
+}
+
+/*
+ * **************************************************************************
+ * Watermark Manipulation Functions *
+ * **************************************************************************
+ */
+
+/*
+ * FUNCTION: meta_sp_get_start()
+ * INPUT: sp - the operating set
+ * np - device upon which the sp is being built
+ * OUTPUT: ep - return error pointer
+ * RETURNS: daddr_t - -1 if error, otherwise the start block
+ * PURPOSE: Encapsulate the determination of the start block of the
+ * device upon which the sp is built or being built.
+ * This is done to hide the ugliness of the algorithm. In
+ * the case where a sp is being built upon a stripe of > 1
+ * TB that is made up of a set of disks in which the first
+ * has a VTOC label the result returned from the call to
+ * metagetstart is incorrect. The reason being that a > 1
+ * TB metadevice will manufacture an EFI label in which the
+ * start address is zero. This is irrespective of the underlying
+ * devices. The long term fix for this is to fix
+ * meta_efi_to_mdvtoc and meta_efi_to mdgeom so that they return
+ * values that are indicative of the first underlying device in
+ * metadevice.
+ */
+static diskaddr_t
+meta_sp_get_start(
+ mdsetname_t *sp,
+ mdname_t *np,
+ md_error_t *ep
+)
+{
+ daddr_t start_block;
+
+ if ((start_block = metagetstart(sp, np, ep)) != MD_DISKADDR_ERROR) {
+ start_block += MD_SP_START;
+ /*
+ * In the case that the device upon which the sp is being
+ * created is a metadevice then ensure that in the case that
+ * the first underlying device has a vtoc label that it is
+ * not overwritten with a watermark by setting the start block
+ * to point just past the vtoc label
+ */
+ if (start_block < VTOC_SIZE && metaismeta(np))
+ start_block = VTOC_SIZE;
+ }
+
+ return (start_block);
+}
+
+/*
+ * FUNCTION: meta_sp_update_wm()
+ * INPUT: sp - the operating set
+ * msp - a pointer to the XDR unit structure
+ * extlist - the extent list specifying watermarks to update
+ * OUTPUT: ep - return error pointer
+ * RETURNS: int - -1 if error, 0 on success
+ * PURPOSE: steps backwards through the extent list updating
+ * watermarks for all extents with the EXTFLG_UPDATE flag
+ * set. Writing the watermarks guarantees consistency when
+ * extents must be broken into pieces since the original
+ * watermark will be the last to be updated, and will be
+ * changed to point to a new watermark that is already
+ * known to be consistent. If one of the writes fails, the
+ * original watermark stays intact and none of the changes
+ * are realized.
+ */
+static int
+meta_sp_update_wm(
+ mdsetname_t *sp,
+ md_sp_t *msp,
+ sp_ext_node_t *extlist,
+ md_error_t *ep
+)
+{
+ sp_ext_node_t *ext;
+ sp_ext_node_t *tail;
+ mp_watermark_t *wmp, *watermarks;
+ xsp_offset_t *osp, *offsets;
+ int update_count = 0;
+ int rval = 0;
+ md_unit_t *mdp;
+ md_sp_update_wm_t update_params;
+
+ if (getenv(META_SP_DEBUG)) {
+ meta_sp_debug("meta_sp_update_wm: Updating watermarks:\n");
+ meta_sp_list_dump(extlist);
+ }
+
+ /*
+ * find the last node so we can write the watermarks backwards
+ * and count watermarks to update so we can allocate space
+ */
+ for (ext = extlist; ext != NULL; ext = ext->ext_next) {
+ if ((ext->ext_flags & EXTFLG_UPDATE) != 0) {
+ update_count++;
+ }
+
+ if (ext->ext_next == NULL) {
+ tail = ext;
+ }
+ }
+ ext = tail;
+
+ wmp = watermarks =
+ Zalloc(update_count * sizeof (mp_watermark_t));
+ osp = offsets =
+ Zalloc(update_count * sizeof (sp_ext_offset_t));
+
+ while (ext != NULL) {
+ if ((ext->ext_flags & EXTFLG_UPDATE) != 0) {
+ /* update watermark */
+ wmp->wm_magic = MD_SP_MAGIC;
+ wmp->wm_version = MD_SP_VERSION;
+ wmp->wm_type = ext->ext_type;
+ wmp->wm_seq = ext->ext_seq;
+ wmp->wm_length = ext->ext_length - MD_SP_WMSIZE;
+
+ /* fill in the volume name and set name */
+ if (ext->ext_namep != NULL)
+ (void) strcpy(wmp->wm_mdname,
+ ext->ext_namep->cname);
+ else
+ (void) strcpy(wmp->wm_mdname, MD_SP_FREEWMNAME);
+ if (ext->ext_setp != NULL &&
+ ext->ext_setp->setno != MD_LOCAL_SET)
+ (void) strcpy(wmp->wm_setname,
+ ext->ext_setp->setname);
+ else
+ (void) strcpy(wmp->wm_setname,
+ MD_SP_LOCALSETNAME);
+
+ /* Generate the checksum */
+ wmp->wm_checksum = 0;
+ crcgen((uchar_t *)wmp, (uint_t *)&wmp->wm_checksum,
+ sizeof (*wmp), NULL);
+
+ /* record the extent offset */
+ *osp = ext->ext_offset;
+
+ /* Advance the placeholders */
+ osp++; wmp++;
+ }
+ ext = ext->ext_prev;
+ }
+
+ mdp = meta_get_mdunit(sp, msp->common.namep, ep);
+ if (mdp == NULL) {
+ rval = -1;
+ goto out;
+ }
+
+ (void) memset(&update_params, 0, sizeof (update_params));
+ update_params.mnum = MD_SID(mdp);
+ update_params.count = update_count;
+ update_params.wmp = (uintptr_t)watermarks;
+ update_params.osp = (uintptr_t)offsets;
+ MD_SETDRIVERNAME(&update_params, MD_SP,
+ MD_MIN2SET(update_params.mnum));
+
+ if (metaioctl(MD_IOC_SPUPDATEWM, &update_params,
+ &update_params.mde, msp->common.namep->cname) != 0) {
+ (void) mdstealerror(ep, &update_params.mde);
+ rval = -1;
+ goto out;
+ }
+
+out:
+ Free(watermarks);
+ Free(offsets);
+
+ return (rval);
+}
+
+/*
+ * FUNCTION: meta_sp_clear_wm()
+ * INPUT: sp - the operating set
+ * msp - the unit structure for the soft partition to clear
+ * OUTPUT: ep - return error pointer
+ * RETURNS: int - -1 if error, 0 on success
+ * PURPOSE: steps through the extents for a soft partition unit and
+ * creates an extent list designed to mark all of the
+ * watermarks for those extents as free. The extent list
+ * is then passed to meta_sp_update_wm() to actually write
+ * the watermarks out.
+ */
+static int
+meta_sp_clear_wm(
+ mdsetname_t *sp,
+ md_sp_t *msp,
+ md_error_t *ep
+)
+{
+ sp_ext_node_t *extlist = NULL;
+ int numexts = msp->ext.ext_len;
+ uint_t i;
+ int rval = 0;
+
+ /* for each watermark must set the flag to SP_FREE */
+ for (i = 0; i < numexts; i++) {
+ md_sp_ext_t *extp = &msp->ext.ext_val[i];
+
+ meta_sp_list_insert(NULL, NULL, &extlist,
+ extp->poff - MD_SP_WMSIZE, extp->len + MD_SP_WMSIZE,
+ EXTTYP_FREE, 0, EXTFLG_UPDATE, meta_sp_cmp_by_offset);
+ }
+
+ /* update watermarks */
+ rval = meta_sp_update_wm(sp, msp, extlist, ep);
+
+ meta_sp_list_free(&extlist);
+ return (rval);
+}
+
+/*
+ * FUNCTION: meta_sp_read_wm()
+ * INPUT: sp - setname for component
+ * compnp - mdname_t for component
+ * offset - the offset of the watermark to read (sectors)
+ * OUTPUT: wm - the watermark structure to read into
+ * ep - return error pointer
+ * RETURNS: int - -1 if error, 0 on success
+ * PURPOSE: seeks out to the requested offset and reads a watermark.
+ * It then verifies that the magic number is correct and
+ * that the checksum is valid, returning an error if either
+ * is wrong.
+ */
+static int
+meta_sp_read_wm(
+ mdsetname_t *sp,
+ mdname_t *compnp,
+ mp_watermark_t *wm,
+ sp_ext_offset_t offset,
+ md_error_t *ep
+)
+{
+ md_sp_read_wm_t read_params;
+
+ /*
+ * make sure block offset does not overflow 2^64 bytes and it's a
+ * multiple of the block size.
+ */
+ assert(offset <= (1LL << (64 - DEV_BSHIFT)));
+ /* LINTED */
+ assert((sizeof (*wm) % DEV_BSIZE) == 0);
+
+ (void) memset(wm, 0, sizeof (*wm));
+
+ (void) memset(&read_params, 0, sizeof (read_params));
+ read_params.rdev = compnp->dev;
+ read_params.wmp = (uintptr_t)wm;
+ read_params.offset = offset;
+ MD_SETDRIVERNAME(&read_params, MD_SP, sp->setno);
+
+ if (metaioctl(MD_IOC_SPREADWM, &read_params,
+ &read_params.mde, compnp->cname) != 0) {
+
+ (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
+ "Extent header read failed, block %llu.\n"), offset);
+ return (mdstealerror(ep, &read_params.mde));
+ }
+
+ /* make sure magic number is correct */
+ if (wm->wm_magic != MD_SP_MAGIC) {
+ (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
+ "found incorrect magic number %x, expected %x.\n"),
+ wm->wm_magic, MD_SP_MAGIC);
+ /*
+ * Pass NULL for the device name as we don't have
+ * valid watermark contents.
+ */
+ return (mdmderror(ep, MDE_SP_BADWMMAGIC, 0, NULL));
+ }
+
+ if (crcchk((uchar_t *)wm, (uint_t *)&wm->wm_checksum,
+ sizeof (*wm), NULL)) {
+ (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
+ "found incorrect checksum %x.\n"),
+ wm->wm_checksum);
+ return (mdmderror(ep, MDE_SP_BADWMCRC, 0, wm->wm_mdname));
+ }
+
+ return (0);
+}
+
+/*
+ * **************************************************************************
+ * Query Functions
+ * **************************************************************************
+ */
+
+/*
+ * IMPORTANT NOTE: This is a static function that assumes that
+ * its input parameters have been checked and
+ * have valid values that lie within acceptable
+ * ranges.
+ *
+ * FUNCTION: meta_sp_enough_space()
+ * INPUT: desired_number_of_sps - the number of soft partitions desired;
+ * must be > 0
+ * desired_sp_size - the desired soft partition size in blocks;
+ * must be > 0
+ * extent_listpp - a reference to a reference to an extent
+ * list that lists the extents on a device;
+ * must be a reference to a reference to a
+ * valid extent list
+ * alignment - the desired data space alignment for the sp's
+ * OUTPUT: boolean_t return value
+ * RETURNS: boolean_t - B_TRUE if there's enough space in the extent
+ * list to create the desired soft partitions,
+ * B_FALSE if there's not enough space
+ * PURPOSE: determines whether there's enough free space in an extent
+ * list to allow creation of a set of soft partitions
+ */
+static boolean_t
+meta_sp_enough_space(
+ int desired_number_of_sps,
+ blkcnt_t desired_sp_size,
+ sp_ext_node_t **extent_listpp,
+ sp_ext_length_t alignment
+)
+{
+ boolean_t enough_space;
+ int number_of_sps;
+ int number_of_extents_used;
+ sp_ext_length_t desired_ext_length = desired_sp_size;
+
+ enough_space = B_TRUE;
+ number_of_sps = 0;
+ while ((enough_space == B_TRUE) &&
+ (number_of_sps < desired_number_of_sps)) {
+ /*
+ * Use the extent allocation algorithm implemented by
+ * meta_sp_alloc_by_len() to test whether the free
+ * extents in the extent list referenced by *extent_listpp
+ * contain enough space to accomodate a soft partition
+ * of size desired_ext_length.
+ *
+ * Repeat the test <desired_number_of_sps> times
+ * or until it fails, whichever comes first,
+ * each time allocating the extents required to
+ * create the soft partition without actually
+ * creating the soft partition.
+ */
+ number_of_extents_used = meta_sp_alloc_by_len(
+ TEST_SETNAMEP,
+ TEST_SOFT_PARTITION_NAMEP,
+ extent_listpp,
+ &desired_ext_length,
+ NO_OFFSET,
+ alignment);
+ if (number_of_extents_used == -1) {
+ enough_space = B_FALSE;
+ } else {
+ number_of_sps++;
+ }
+ }
+ return (enough_space);
+}
+
+/*
+ * IMPORTANT NOTE: This is a static function that calls other functions
+ * that check its mdsetnamep and device_mdnamep
+ * input parameters, but expects extent_listpp to
+ * be a initialized to a valid address to which
+ * it can write a reference to the extent list that
+ * it creates.
+ *
+ * FUNCTION: meta_sp_get_extent_list()
+ * INPUT: mdsetnamep - a reference to the mdsetname_t structure
+ * for the set containing the device for
+ * which the extents are to be listed
+ * device_mdnamep - a reference to the mdname_t structure
+ * for the device for which the extents
+ * are to be listed
+ * OUTPUT: *extent_listpp - a reference to the extent list for
+ * the device; NULL if the function fails
+ * *ep - the libmeta error encountered, if any
+ * RETURNS: boolean_t - B_TRUE if the function call was successful,
+ * B_FALSE if not
+ * PURPOSE: gets the extent list for a device
+ */
+static boolean_t
+meta_sp_get_extent_list(
+ mdsetname_t *mdsetnamep,
+ mdname_t *device_mdnamep,
+ sp_ext_node_t **extent_listpp,
+ md_error_t *ep
+)
+{
+ diskaddr_t device_size_in_blocks;
+ mdnamelist_t *sp_name_listp;
+ diskaddr_t start_block_address_in_blocks;
+
+ *extent_listpp = NULL;
+ sp_name_listp = NULL;
+
+ start_block_address_in_blocks = meta_sp_get_start(mdsetnamep,
+ device_mdnamep,
+ ep);
+ if (start_block_address_in_blocks == MD_DISKADDR_ERROR) {
+ if (getenv(META_SP_DEBUG)) {
+ mde_perror(ep, "meta_sp_get_extent_list:meta_sp_get_start");
+ }
+ return (B_FALSE);
+ }
+
+ device_size_in_blocks = metagetsize(device_mdnamep, ep);
+ if (device_size_in_blocks == MD_DISKADDR_ERROR) {
+ if (getenv(META_SP_DEBUG)) {
+ mde_perror(ep,
+ "meta_sp_get_extent_list:metagetsize");
+ }
+ return (B_FALSE);
+ }
+
+ /*
+ * Sanity check: the start block will have skipped an integer
+ * number of cylinders, C. C will usually be zero. If (C > 0),
+ * and the disk slice happens to only be C cylinders in total
+ * size, we'll fail this check.
+ */
+ if (device_size_in_blocks <=
+ (start_block_address_in_blocks + MD_SP_WMSIZE)) {
+ (void) mdmderror(ep, MDE_SP_NOSPACE, 0, device_mdnamep->cname);
+ return (B_FALSE);
+ }
+
+ /*
+ * After this point, we will have allocated resources, so any
+ * failure returns must be through the supplied "fail" label
+ * to properly deallocate things.
+ */
+
+ /*
+ * Create an empty extent list that starts one watermark past
+ * the start block of the device and ends one watermark before
+ * the end of the device.
+ */
+ meta_sp_list_insert(TEST_SETNAMEP,
+ TEST_SOFT_PARTITION_NAMEP,
+ extent_listpp,
+ NO_OFFSET,
+ (sp_ext_length_t)start_block_address_in_blocks,
+ EXTTYP_RESERVED,
+ NO_SEQUENCE_NUMBER,
+ NO_FLAGS,
+ meta_sp_cmp_by_offset);
+ meta_sp_list_insert(TEST_SETNAMEP,
+ TEST_SOFT_PARTITION_NAMEP,
+ extent_listpp,
+ (sp_ext_offset_t)(device_size_in_blocks -
+ MD_SP_WMSIZE),
+ MD_SP_WMSIZE,
+ EXTTYP_END,
+ NO_SEQUENCE_NUMBER,
+ NO_FLAGS,
+ meta_sp_cmp_by_offset);
+
+ /*
+ * Get the list of soft partitions that are already on the
+ * device.
+ */
+ if (meta_sp_get_by_component(mdsetnamep, device_mdnamep,
+ &sp_name_listp, FORCE_RELOAD_CACHE, ep) < 1) {
+ if (getenv(META_SP_DEBUG)) {
+ mde_perror(ep,
+ "meta_sp_get_extent_list:meta_sp_get_by_component");
+ }
+ goto fail;
+ }
+
+ if (sp_name_listp != NULL) {
+ /*
+ * If there are soft partitions on the device, add the
+ * extents used in them to the extent list.
+ */
+ if (meta_sp_extlist_from_namelist(mdsetnamep, sp_name_listp,
+ extent_listpp, ep) == -1) {
+ if (getenv(META_SP_DEBUG)) {
+ mde_perror(ep, "meta_sp_get_extent_list:"
+ "meta_sp_extlist_from_namelist");
+ }
+ goto fail;
+ }
+ metafreenamelist(sp_name_listp);
+ }
+
+ /*
+ * Add free extents to the extent list to represent
+ * the remaining regions of free space on the
+ * device.
+ */
+ meta_sp_list_freefill(extent_listpp, device_size_in_blocks);
+ return (B_TRUE);
+
+fail:
+ if (sp_name_listp != NULL) {
+ metafreenamelist(sp_name_listp);
+ }
+
+ if (*extent_listpp != NULL) {
+ /*
+ * meta_sp_list_free sets *extent_listpp to NULL.
+ */
+ meta_sp_list_free(extent_listpp);
+ }
+ return (B_FALSE);
+}
+
+/*
+ * IMPORTANT NOTE: This is a static function that calls other functions
+ * that check its mdsetnamep and mddrivenamep
+ * input parameters, but expects extent_listpp to
+ * be a initialized to a valid address to which
+ * it can write a reference to the extent list that
+ * it creates.
+ *
+ * FUNCTION: meta_sp_get_extent_list_for_drive()
+ * INPUT: mdsetnamep - a reference to the mdsetname_t structure
+ * for the set containing the drive for
+ * which the extents are to be listed
+ * mddrivenamep - a reference to the mddrivename_t structure
+ * for the drive for which the extents
+ * are to be listed
+ * OUTPUT: *extent_listpp - a reference to the extent list for
+ * the drive; NULL if the function fails
+ * RETURNS: boolean_t - B_TRUE if the function call was successful,
+ * B_FALSE if not
+ * PURPOSE: gets the extent list for a drive when the entire drive
+ * is to be soft partitioned
+ */
+static boolean_t
+meta_sp_get_extent_list_for_drive(
+ mdsetname_t *mdsetnamep,
+ mddrivename_t *mddrivenamep,
+ sp_ext_node_t **extent_listpp
+)
+{
+ boolean_t can_use;
+ diskaddr_t free_space;
+ md_error_t mderror;
+ mdvtoc_t proposed_vtoc;
+ int repartition_options;
+ int return_value;
+ md_sp_t test_sp_struct;
+
+ can_use = B_TRUE;
+ *extent_listpp = NULL;
+ mderror = mdnullerror;
+ test_sp_struct.compnamep = metaslicename(mddrivenamep, MD_SLICE0,
+ &mderror);
+ if (test_sp_struct.compnamep == NULL) {
+ can_use = B_FALSE;
+ }
+
+ if (can_use == B_TRUE) {
+ mderror = mdnullerror;
+ repartition_options = 0;
+ return_value = meta_check_sp(mdsetnamep, &test_sp_struct,
+ MDCMD_USE_WHOLE_DISK, &repartition_options,
+ &mderror);
+ if (return_value != 0) {
+ can_use = B_FALSE;
+ }
+ }
+
+ if (can_use == B_TRUE) {
+ mderror = mdnullerror;
+ repartition_options = repartition_options |
+ (MD_REPART_FORCE | MD_REPART_DONT_LABEL);
+ return_value = meta_repartition_drive(mdsetnamep, mddrivenamep,
+ repartition_options, &proposed_vtoc, &mderror);
+ if (return_value != 0) {
+ can_use = B_FALSE;
+ }
+ }
+
+ if (can_use == B_TRUE) {
+ free_space = proposed_vtoc.parts[MD_SLICE0].size;
+ if (free_space <= (MD_SP_START + MD_SP_WMSIZE)) {
+ can_use = B_FALSE;
+ }
+ }
+
+ if (can_use == B_TRUE) {
+ /*
+ * Create an extent list that starts with
+ * a reserved extent that ends at the start
+ * of the usable space on slice zero of the
+ * proposed VTOC, ends with an extent that
+ * reserves space for a watermark at the end
+ * of slice zero, and contains a single free
+ * extent that occupies the rest of the space
+ * on the slice.
+ *
+ * NOTE:
+ *
+ * Don't use metagetstart() or metagetsize() to
+ * find the usable space. They query the mdname_t
+ * structure that represents an actual device to
+ * determine the amount of space on the device that
+ * contains metadata and the total amount of space
+ * on the device. Since this function creates a
+ * proposed extent list that doesn't reflect the
+ * state of an actual device, there's no mdname_t
+ * structure to be queried.
+ *
+ * When a drive is reformatted to prepare for
+ * soft partitioning, all of slice seven is
+ * reserved for metadata, all of slice zero is
+ * available for soft partitioning, and all other
+ * slices on the drive are empty. The proposed
+ * extent list for the drive therefore contains
+ * only three extents: a reserved extent that ends
+ * at the start of the usable space on slice zero,
+ * a single free extent that occupies all the usable
+ * space on slice zero, and an ending extent that
+ * reserves space for a watermark at the end of
+ * slice zero.
+ */
+ meta_sp_list_insert(TEST_SETNAMEP,
+ TEST_SOFT_PARTITION_NAMEP,
+ extent_listpp,
+ NO_OFFSET,
+ (sp_ext_length_t)(MD_SP_START),
+ EXTTYP_RESERVED,
+ NO_SEQUENCE_NUMBER,
+ NO_FLAGS,
+ meta_sp_cmp_by_offset);
+ meta_sp_list_insert(TEST_SETNAMEP,
+ TEST_SOFT_PARTITION_NAMEP,
+ extent_listpp,
+ (sp_ext_offset_t)(free_space - MD_SP_WMSIZE),
+ MD_SP_WMSIZE,
+ EXTTYP_END,
+ NO_SEQUENCE_NUMBER,
+ NO_FLAGS,
+ meta_sp_cmp_by_offset);
+ meta_sp_list_freefill(extent_listpp, free_space);
+ }
+ return (can_use);
+}
+
+/*
+ * FUNCTION: meta_sp_can_create_sps()
+ * INPUT: mdsetnamep - a reference to the mdsetname_t structure
+ * for the set containing the device for
+ * which the extents are to be listed
+ * mdnamep - a reference to the mdname_t of the device
+ * on which the soft parititions are to be created
+ * number_of_sps - the desired number of soft partitions
+ * sp_size - the desired soft partition size
+ * OUTPUT: boolean_t return value
+ * RETURNS: boolean_t - B_TRUE if the soft partitionns can be created,
+ * B_FALSE if not
+ * PURPOSE: determines whether a set of soft partitions can be created
+ * on a device
+ */
+boolean_t
+meta_sp_can_create_sps(
+ mdsetname_t *mdsetnamep,
+ mdname_t *mdnamep,
+ int number_of_sps,
+ blkcnt_t sp_size
+)
+{
+ sp_ext_node_t *extent_listp;
+ boolean_t succeeded;
+ md_error_t mde;
+
+ if ((number_of_sps > 0) && (sp_size > 0)) {
+ succeeded = meta_sp_get_extent_list(mdsetnamep, mdnamep,
+ &extent_listp, &mde);
+ } else {
+ succeeded = B_FALSE;
+ }
+
+ /*
+ * We don't really care about an error return from the
+ * alignment call; that will just result in passing zero,
+ * which will be interpreted as no alignment.
+ */
+
+ if (succeeded == B_TRUE) {
+ succeeded = meta_sp_enough_space(number_of_sps,
+ sp_size, &extent_listp,
+ meta_sp_get_default_alignment(mdsetnamep, mdnamep, &mde));
+ meta_sp_list_free(&extent_listp);
+ }
+ return (succeeded);
+}
+
+/*
+ * FUNCTION: meta_sp_can_create_sps_on_drive()
+ * INPUT: mdsetnamep - a reference to the mdsetname_t structure
+ * for the set containing the drive for
+ * which the extents are to be listed
+ * mddrivenamep - a reference to the mddrivename_t of the drive
+ * on which the soft parititions are to be created
+ * number_of_sps - the desired number of soft partitions
+ * sp_size - the desired soft partition size
+ * OUTPUT: boolean_t return value
+ * RETURNS: boolean_t - B_TRUE if the soft partitionns can be created,
+ * B_FALSE if not
+ * PURPOSE: determines whether a set of soft partitions can be created
+ * on a drive if the entire drive is soft partitioned
+ */
+boolean_t
+meta_sp_can_create_sps_on_drive(
+ mdsetname_t *mdsetnamep,
+ mddrivename_t *mddrivenamep,
+ int number_of_sps,
+ blkcnt_t sp_size
+)
+{
+ sp_ext_node_t *extent_listp;
+ boolean_t succeeded;
+
+ if ((number_of_sps > 0) && (sp_size > 0)) {
+ succeeded = meta_sp_get_extent_list_for_drive(mdsetnamep,
+ mddrivenamep,
+ &extent_listp);
+ } else {
+ succeeded = B_FALSE;
+ }
+
+ /*
+ * We don't care about alignment on the space call because
+ * we're specifically dealing with a drive, which will have no
+ * inherent alignment.
+ */
+
+ if (succeeded == B_TRUE) {
+ succeeded = meta_sp_enough_space(number_of_sps, sp_size,
+ &extent_listp, SP_UNALIGNED);
+ meta_sp_list_free(&extent_listp);
+ }
+ return (succeeded);
+}
+
+/*
+ * FUNCTION: meta_sp_get_free_space()
+ * INPUT: mdsetnamep - a reference to the mdsetname_t structure
+ * for the set containing the device for
+ * which the free space is to be returned
+ * mdnamep - a reference to the mdname_t of the device
+ * for which the free space is to be returned
+ * OUTPUT: blkcnt_t return value
+ * RETURNS: blkcnt_t - the number of blocks of free space on the device
+ * PURPOSE: returns the number of blocks of free space on a device
+ */
+blkcnt_t
+meta_sp_get_free_space(
+ mdsetname_t *mdsetnamep,
+ mdname_t *mdnamep
+)
+{
+ sp_ext_node_t *extent_listp;
+ sp_ext_length_t free_blocks;
+ boolean_t succeeded;
+ md_error_t mde;
+
+ extent_listp = NULL;
+ free_blocks = 0;
+ succeeded = meta_sp_get_extent_list(mdsetnamep, mdnamep,
+ &extent_listp, &mde);
+ if (succeeded == B_TRUE) {
+ free_blocks = meta_sp_list_size(extent_listp,
+ EXTTYP_FREE, INCLUDE_WM);
+ meta_sp_list_free(&extent_listp);
+ if (free_blocks > (10 * MD_SP_WMSIZE)) {
+ /*
+ * Subtract a safety margin for watermarks when
+ * computing the number of blocks available for
+ * use. The actual number of watermarks can't
+ * be calculated without knowing the exact numbers
+ * and sizes of both the free extents and the soft
+ * partitions to be created. The calculation is
+ * highly complex and error-prone even if those
+ * quantities are known. The approximate value
+ * 10 * MD_SP_WMSIZE is within a few blocks of the
+ * correct value in all practical cases.
+ */
+ free_blocks = free_blocks - (10 * MD_SP_WMSIZE);
+ } else {
+ free_blocks = 0;
+ }
+ } else {
+ mdclrerror(&mde);
+ }
+
+ return (free_blocks);
+}
+
+/*
+ * FUNCTION: meta_sp_get_free_space_on_drive()
+ * INPUT: mdsetnamep - a reference to the mdsetname_t structure
+ * for the set containing the drive for
+ * which the free space is to be returned
+ * mddrivenamep - a reference to the mddrivename_t of the drive
+ * for which the free space is to be returned
+ * OUTPUT: blkcnt_t return value
+ * RETURNS: blkcnt_t - the number of blocks of free space on the drive
+ * PURPOSE: returns the number of blocks of space usable for soft
+ * partitions on an entire drive, if the entire drive is
+ * soft partitioned
+ */
+blkcnt_t
+meta_sp_get_free_space_on_drive(
+ mdsetname_t *mdsetnamep,
+ mddrivename_t *mddrivenamep
+)
+{
+ sp_ext_node_t *extent_listp;
+ sp_ext_length_t free_blocks;
+ boolean_t succeeded;
+
+ extent_listp = NULL;
+ free_blocks = 0;
+ succeeded = meta_sp_get_extent_list_for_drive(mdsetnamep,
+ mddrivenamep, &extent_listp);
+ if (succeeded == B_TRUE) {
+ free_blocks = meta_sp_list_size(extent_listp,
+ EXTTYP_FREE, INCLUDE_WM);
+ meta_sp_list_free(&extent_listp);
+ if (free_blocks > (10 * MD_SP_WMSIZE)) {
+ /*
+ * Subtract a safety margin for watermarks when
+ * computing the number of blocks available for
+ * use. The actual number of watermarks can't
+ * be calculated without knowing the exact numbers
+ * and sizes of both the free extents and the soft
+ * partitions to be created. The calculation is
+ * highly complex and error-prone even if those
+ * quantities are known. The approximate value
+ * 10 * MD_SP_WMSIZE is within a few blocks of the
+ * correct value in all practical cases.
+ */
+ free_blocks = free_blocks - (10 * MD_SP_WMSIZE);
+ } else {
+ free_blocks = 0;
+ }
+ }
+ return (free_blocks);
+}
+
+/*
+ * FUNCTION: meta_sp_get_number_of_possible_sps()
+ * INPUT: mdsetnamep - a reference to the mdsetname_t structure
+ * for the set containing the device for
+ * which the number of possible soft partitions
+ * is to be returned
+ * mdnamep - a reference to the mdname_t of the device
+ * for which the number of possible soft partitions
+ * is to be returned
+ * OUTPUT: int return value
+ * RETURNS: int - the number of soft partitions of the desired size
+ * that can be created on the device
+ * PURPOSE: returns the number of soft partitions of a given size
+ * that can be created on a device
+ */
+int
+meta_sp_get_number_of_possible_sps(
+ mdsetname_t *mdsetnamep,
+ mdname_t *mdnamep,
+ blkcnt_t sp_size
+)
+{
+ sp_ext_node_t *extent_listp;
+ int number_of_possible_sps;
+ boolean_t succeeded;
+ md_error_t mde;
+ sp_ext_length_t alignment;
+
+ extent_listp = NULL;
+ number_of_possible_sps = 0;
+ if (sp_size > 0) {
+ if ((succeeded = meta_sp_get_extent_list(mdsetnamep,
+ mdnamep, &extent_listp, &mde)) == B_FALSE)
+ mdclrerror(&mde);
+ } else {
+ succeeded = B_FALSE;
+ }
+
+ if (succeeded == B_TRUE) {
+ alignment = meta_sp_get_default_alignment(mdsetnamep,
+ mdnamep, &mde);
+ }
+
+ while (succeeded == B_TRUE) {
+ /*
+ * Keep allocating space from the extent list
+ * for soft partitions of the desired size until
+ * there's not enough free space left in the list
+ * for another soft partiition of that size.
+ * Add one to the number of possible soft partitions
+ * for each soft partition for which there is
+ * enough free space left.
+ */
+ succeeded = meta_sp_enough_space(ONE_SOFT_PARTITION,
+ sp_size, &extent_listp, alignment);
+ if (succeeded == B_TRUE) {
+ number_of_possible_sps++;
+ }
+ }
+ if (extent_listp != NULL) {
+ meta_sp_list_free(&extent_listp);
+ }
+ return (number_of_possible_sps);
+}
+
+/*
+ * FUNCTION: meta_sp_get_number_of_possible_sps_on_drive()
+ * INPUT: mdsetnamep - a reference to the mdsetname_t structure
+ * for the set containing the drive for
+ * which the number of possible soft partitions
+ * is to be returned
+ * mddrivenamep - a reference to the mddrivename_t of the drive
+ * for which the number of possible soft partitions
+ * is to be returned
+ * sp_size - the size in blocks of the proposed soft partitions
+ * OUTPUT: int return value
+ * RETURNS: int - the number of soft partitions of the desired size
+ * that can be created on the drive
+ * PURPOSE: returns the number of soft partitions of a given size
+ * that can be created on a drive, if the entire drive is
+ * soft partitioned
+ */
+int
+meta_sp_get_number_of_possible_sps_on_drive(
+ mdsetname_t *mdsetnamep,
+ mddrivename_t *mddrivenamep,
+ blkcnt_t sp_size
+)
+{
+ sp_ext_node_t *extent_listp;
+ int number_of_possible_sps;
+ boolean_t succeeded;
+
+ extent_listp = NULL;
+ number_of_possible_sps = 0;
+ if (sp_size > 0) {
+ succeeded = meta_sp_get_extent_list_for_drive(mdsetnamep,
+ mddrivenamep, &extent_listp);
+ } else {
+ succeeded = B_FALSE;
+ }
+ while (succeeded == B_TRUE) {
+ /*
+ * Keep allocating space from the extent list
+ * for soft partitions of the desired size until
+ * there's not enough free space left in the list
+ * for another soft partition of that size.
+ * Add one to the number of possible soft partitions
+ * for each soft partition for which there is
+ * enough free space left.
+ *
+ * Since it's a drive, not a metadevice, make no
+ * assumptions about alignment.
+ */
+ succeeded = meta_sp_enough_space(ONE_SOFT_PARTITION,
+ sp_size, &extent_listp, SP_UNALIGNED);
+ if (succeeded == B_TRUE) {
+ number_of_possible_sps++;
+ }
+ }
+ if (extent_listp != NULL) {
+ meta_sp_list_free(&extent_listp);
+ }
+ return (number_of_possible_sps);
+}
+
+/*
+ * FUNCTION: meta_sp_get_possible_sp_size()
+ * INPUT: mdsetnamep - a reference to the mdsetname_t structure
+ * for the set containing the device for
+ * which the possible soft partition size
+ * is to be returned
+ * mdnamep - a reference to the mdname_t of the device
+ * for which the possible soft partition size
+ * is to be returned
+ * number_of_sps - the desired number of soft partitions
+ * OUTPUT: blkcnt_t return value
+ * RETURNS: blkcnt_t - the possible soft partition size in blocks
+ * PURPOSE: returns the maximum possible size of each of a given number of
+ * soft partitions of equal size that can be created on a device
+ */
+blkcnt_t
+meta_sp_get_possible_sp_size(
+ mdsetname_t *mdsetnamep,
+ mdname_t *mdnamep,
+ int number_of_sps
+)
+{
+ blkcnt_t free_blocks;
+ blkcnt_t sp_size;
+ boolean_t succeeded;
+
+ sp_size = 0;
+ if (number_of_sps > 0) {
+ free_blocks = meta_sp_get_free_space(mdsetnamep, mdnamep);
+ sp_size = free_blocks / number_of_sps;
+ succeeded = meta_sp_can_create_sps(mdsetnamep, mdnamep,
+ number_of_sps, sp_size);
+ while ((succeeded == B_FALSE) && (sp_size > 0)) {
+ /*
+ * To compensate for space that may have been
+ * occupied by watermarks, reduce sp_size by a
+ * number of blocks equal to the number of soft
+ * partitions desired, and test again to see
+ * whether the desired number of soft partitions
+ * can be created.
+ */
+ sp_size = sp_size - ((blkcnt_t)number_of_sps);
+ succeeded = meta_sp_can_create_sps(mdsetnamep, mdnamep,
+ number_of_sps, sp_size);
+ }
+ if (sp_size < 0) {
+ sp_size = 0;
+ }
+ }
+ return (sp_size);
+}
+
+/*
+ * FUNCTION: meta_sp_get_possible_sp_size_on_drive()
+ * INPUT: mdsetnamep - a reference to the mdsetname_t structure
+ * for the set containing the drive for
+ * which the possible soft partition size
+ * is to be returned
+ * mddrivenamep - a reference to the mddrivename_t of the drive
+ * for which the possible soft partition size
+ * is to be returned
+ * number_of_sps - the desired number of soft partitions
+ * OUTPUT: blkcnt_t return value
+ * RETURNS: blkcnt_t - the possible soft partition size in blocks
+ * PURPOSE: returns the maximum possible size of each of a given number of
+ * soft partitions of equal size that can be created on a drive
+ * if the entire drive is soft partitioned
+ */
+blkcnt_t
+meta_sp_get_possible_sp_size_on_drive(
+ mdsetname_t *mdsetnamep,
+ mddrivename_t *mddrivenamep,
+ int number_of_sps
+)
+{
+ blkcnt_t free_blocks;
+ blkcnt_t sp_size;
+ boolean_t succeeded;
+
+ sp_size = 0;
+ if (number_of_sps > 0) {
+ free_blocks = meta_sp_get_free_space_on_drive(mdsetnamep,
+ mddrivenamep);
+ sp_size = free_blocks / number_of_sps;
+ succeeded = meta_sp_can_create_sps_on_drive(mdsetnamep,
+ mddrivenamep,
+ number_of_sps, sp_size);
+ while ((succeeded == B_FALSE) && (sp_size > 0)) {
+ /*
+ * To compensate for space that may have been
+ * occupied by watermarks, reduce sp_size by a
+ * number of blocks equal to the number of soft
+ * partitions desired, and test again to see
+ * whether the desired number of soft partitions
+ * can be created.
+ */
+ sp_size = sp_size - ((blkcnt_t)number_of_sps);
+ succeeded = meta_sp_can_create_sps_on_drive(mdsetnamep,
+ mddrivenamep,
+ number_of_sps, sp_size);
+ }
+ if (sp_size < 0) {
+ sp_size = 0;
+ }
+ }
+ return (sp_size);
+}
+
+/*
+ * **************************************************************************
+ * Unit Structure Manipulation Functions *
+ * **************************************************************************
+ */
+
+/*
+ * FUNCTION: meta_sp_fillextarray()
+ * INPUT: mp - the unit structure to fill
+ * extlist - the list of extents to fill with
+ * OUTPUT: none
+ * RETURNS: void
+ * PURPOSE: fills in the unit structure extent list with the extents
+ * specified by extlist. Only extents in extlist with the
+ * EXTFLG_UPDATE flag are changed in the unit structure,
+ * and the index into the unit structure is the sequence
+ * number in the extent list. After all of the nodes have
+ * been updated the virtual offsets in the unit structure
+ * are updated to reflect the new lengths.
+ */
+static void
+meta_sp_fillextarray(
+ mp_unit_t *mp,
+ sp_ext_node_t *extlist
+)
+{
+ int i;
+ sp_ext_node_t *ext;
+ sp_ext_offset_t curvoff = 0LL;
+
+ assert(mp != NULL);
+
+ /* go through the allocation list and fill in our unit structure */
+ for (ext = extlist; ext != NULL; ext = ext->ext_next) {
+ if ((ext->ext_type == EXTTYP_ALLOC) &&
+ (ext->ext_flags & EXTFLG_UPDATE) != 0) {
+ mp->un_ext[ext->ext_seq].un_poff =
+ ext->ext_offset + MD_SP_WMSIZE;
+ mp->un_ext[ext->ext_seq].un_len =
+ ext->ext_length - MD_SP_WMSIZE;
+ }
+ }
+
+ for (i = 0; i < mp->un_numexts; i++) {
+ assert(mp->un_ext[i].un_poff != 0);
+ assert(mp->un_ext[i].un_len != 0);
+ mp->un_ext[i].un_voff = curvoff;
+ curvoff += mp->un_ext[i].un_len;
+ }
+}
+
+/*
+ * FUNCTION: meta_sp_createunit()
+ * INPUT: np - the name of the device to create a unit structure for
+ * compnp - the name of the device the soft partition is on
+ * extlist - the extent list to populate the new unit with
+ * numexts - the number of extents in the extent list
+ * len - the total size of the soft partition (sectors)
+ * status - the initial status of the unit structure
+ * OUTPUT: ep - return error pointer
+ * RETURNS: mp_unit_t * - the new unit structure.
+ * PURPOSE: allocates and fills in a new soft partition unit
+ * structure to be passed to the soft partitioning driver
+ * for creation.
+ */
+static mp_unit_t *
+meta_sp_createunit(
+ mdname_t *np,
+ mdname_t *compnp,
+ sp_ext_node_t *extlist,
+ int numexts,
+ sp_ext_length_t len,
+ sp_status_t status,
+ md_error_t *ep
+)
+{
+ mp_unit_t *mp;
+ uint_t ms_size;
+
+ ms_size = (sizeof (*mp) - sizeof (mp->un_ext[0])) +
+ (numexts * sizeof (mp->un_ext[0]));
+
+ mp = Zalloc(ms_size);
+
+ /* fill in fields in common unit structure */
+ mp->c.un_type = MD_METASP;
+ mp->c.un_size = ms_size;
+ MD_SID(mp) = meta_getminor(np->dev);
+ mp->c.un_total_blocks = len;
+ mp->c.un_actual_tb = len;
+
+ /* set up geometry */
+ (void) meta_sp_setgeom(np, compnp, mp, ep);
+
+ /* if we're building on metadevice we can't parent */
+ if (metaismeta(compnp))
+ MD_CAPAB(mp) = MD_CANT_PARENT;
+ else
+ MD_CAPAB(mp) = MD_CAN_PARENT;
+
+ /* fill soft partition-specific fields */
+ mp->un_dev = compnp->dev;
+ mp->un_key = compnp->key;
+
+ /* mdname_t start_blk field is not 64-bit! */
+ mp->un_start_blk = (sp_ext_offset_t)compnp->start_blk;
+ mp->un_status = status;
+ mp->un_numexts = numexts;
+ mp->un_length = len;
+
+ /* fill in the extent array */
+ meta_sp_fillextarray(mp, extlist);
+
+ return (mp);
+}
+
+/*
+ * FUNCTION: meta_sp_updateunit()
+ * INPUT: np - name structure for the metadevice being updated
+ * old_un - the original unit structure that is being updated
+ * extlist - the extent list to populate the new unit with
+ * grow_len - the amount by which the partition is being grown
+ * numexts - the number of extents in the extent list
+ * ep - return error pointer
+ * OUTPUT: none
+ * RETURNS: mp_unit_t * - the updated unit structure
+ * PURPOSE: allocates and fills in a new soft partition unit structure to
+ * be passed to the soft partitioning driver for creation. The
+ * old unit structure is first copied in, and then the updated
+ * extents are changed in the new unit structure. This is
+ * typically used when the size of an existing unit is changed.
+ */
+static mp_unit_t *
+meta_sp_updateunit(
+ mdname_t *np,
+ mp_unit_t *old_un,
+ sp_ext_node_t *extlist,
+ sp_ext_length_t grow_len,
+ int numexts,
+ md_error_t *ep
+)
+{
+ mp_unit_t *new_un;
+ sp_ext_length_t new_len;
+ uint_t new_size;
+
+ assert(old_un != NULL);
+ assert(extlist != NULL);
+
+ /* allocate new unit structure and copy in old unit */
+ new_size = (sizeof (*old_un) - sizeof (old_un->un_ext[0])) +
+ ((old_un->un_numexts + numexts) * sizeof (old_un->un_ext[0]));
+ new_len = old_un->un_length + grow_len;
+ new_un = Zalloc(new_size);
+ bcopy(old_un, new_un, old_un->c.un_size);
+
+ /* update size and geometry information */
+ new_un->c.un_size = new_size;
+ new_un->un_length = new_len;
+ new_un->c.un_total_blocks = new_len;
+ new_un->c.un_actual_tb = new_len;
+ if (meta_adjust_geom((md_unit_t *)new_un, np,
+ old_un->c.un_wr_reinstruct, old_un->c.un_rd_reinstruct,
+ 0, ep) != 0) {
+ Free(new_un);
+ return (NULL);
+ }
+
+ /* update extent information */
+ new_un->un_numexts += numexts;
+
+ meta_sp_fillextarray(new_un, extlist);
+
+ return (new_un);
+}
+
+/*
+ * FUNCTION: meta_get_sp()
+ * INPUT: sp - the set name for the device to get
+ * np - the name of the device to get
+ * OUTPUT: ep - return error pointer
+ * RETURNS: md_sp_t * - the XDR unit structure for the soft partition
+ * PURPOSE: interface to the rest of libmeta for fetching a unit structure
+ * for the named device. Just a wrapper for meta_get_sp_common().
+ */
+md_sp_t *
+meta_get_sp(
+ mdsetname_t *sp,
+ mdname_t *np,
+ md_error_t *ep
+)
+{
+ return (meta_get_sp_common(sp, np, 0, ep));
+}
+
+/*
+ * FUNCTION: meta_get_sp_common()
+ * INPUT: sp - the set name for the device to get
+ * np - the name of the device to get
+ * fast - whether to use the cache or not (NOT IMPLEMENTED!)
+ * OUTPUT: ep - return error pointer
+ * RETURNS: md_sp_t * - the XDR unit structure for the soft partition,
+ * NULL if np is not a soft partition
+ * PURPOSE: common routine for fetching a soft partition unit structure
+ */
+md_sp_t *
+meta_get_sp_common(
+ mdsetname_t *sp,
+ mdname_t *np,
+ int fast,
+ md_error_t *ep
+)
+{
+ mddrivename_t *dnp = np->drivenamep;
+ char *miscname;
+ mp_unit_t *mp;
+ md_sp_t *msp;
+ int i;
+
+ /* must have set */
+ assert(sp != NULL);
+
+ /* short circuit */
+ if (dnp->unitp != NULL) {
+ if (dnp->unitp->type != MD_METASP)
+ return (NULL);
+ return ((md_sp_t *)dnp->unitp);
+ }
+ /* get miscname and unit */
+ if ((miscname = metagetmiscname(np, ep)) == NULL)
+ return (NULL);
+
+ if (strcmp(miscname, MD_SP) != 0) {
+ (void) mdmderror(ep, MDE_NOT_SP, 0, np->cname);
+ return (NULL);
+ }
+
+ if ((mp = (mp_unit_t *)meta_get_mdunit(sp, np, ep)) == NULL)
+ return (NULL);
+
+ assert(mp->c.un_type == MD_METASP);
+
+ /* allocate soft partition */
+ msp = Zalloc(sizeof (*msp));
+
+ /* get the common information */
+ msp->common.namep = np;
+ msp->common.type = mp->c.un_type;
+ msp->common.state = mp->c.un_status;
+ msp->common.capabilities = mp->c.un_capabilities;
+ msp->common.parent = mp->c.un_parent;
+ msp->common.size = mp->c.un_total_blocks;
+ msp->common.user_flags = mp->c.un_user_flags;
+ msp->common.revision = mp->c.un_revision;
+
+ /* get soft partition information */
+ if ((msp->compnamep = metakeyname(&sp, mp->un_key, fast, ep)) == NULL)
+ goto out;
+
+ /*
+ * Fill in the key and the start block. Note that the start
+ * block in the unit structure is 64 bits but the name pointer
+ * only supports 32 bits.
+ */
+ msp->compnamep->key = mp->un_key;
+ msp->compnamep->start_blk = mp->un_start_blk;
+
+ /* fill in status field */
+ msp->status = mp->un_status;
+
+ /* allocate the extents */
+ msp->ext.ext_val = Zalloc(mp->un_numexts * sizeof (*msp->ext.ext_val));
+ msp->ext.ext_len = mp->un_numexts;
+
+ /* do the extents for this soft partition */
+ for (i = 0; i < mp->un_numexts; i++) {
+ struct mp_ext *mde = &mp->un_ext[i];
+ md_sp_ext_t *extp = &msp->ext.ext_val[i];
+
+ extp->voff = mde->un_voff;
+ extp->poff = mde->un_poff;
+ extp->len = mde->un_len;
+ }
+
+ /* cleanup, return success */
+ Free(mp);
+ dnp->unitp = (md_common_t *)msp;
+ return (msp);
+
+out:
+ /* clean up and return error */
+ Free(mp);
+ Free(msp);
+ return (NULL);
+}
+
+
+/*
+ * FUNCTION: meta_init_sp()
+ * INPUT: spp - the set name for the new device
+ * argc - the remaining argument count for the metainit cmdline
+ * argv - the remainder of the unparsed command line
+ * options - global options parsed by metainit
+ * OUTPUT: ep - return error pointer
+ * RETURNS: int - -1 failure, 0 success
+ * PURPOSE: provides the command line parsing and name management overhead
+ * for creating a new soft partition. Ultimately this calls
+ * meta_create_sp() which does the real work of allocating space
+ * for the new soft partition.
+ */
+int
+meta_init_sp(
+ mdsetname_t **spp,
+ int argc,
+ char *argv[],
+ mdcmdopts_t options,
+ md_error_t *ep
+)
+{
+ char *compname = NULL;
+ mdname_t *spcompnp = NULL; /* name of component volume */
+ char *devname = argv[0]; /* unit name */
+ mdname_t *np = NULL; /* name of soft partition */
+ md_sp_t *msp = NULL;
+ int c;
+ int old_optind;
+ sp_ext_length_t len = 0LL;
+ int rval = -1;
+ uint_t seq;
+ int oflag;
+ int failed;
+ mddrivename_t *dnp = NULL;
+ sp_ext_length_t alignment = 0LL;
+ sp_ext_node_t *extlist = NULL;
+
+ assert(argc > 0);
+
+ /* expect sp name, -p, optional -e, compname, and size parameters */
+ /* grab soft partition name */
+ if ((np = metaname(spp, devname, ep)) == NULL)
+ goto out;
+
+ /* see if it exists already */
+ if (metagetmiscname(np, ep) != NULL) {
+ (void) mdmderror(ep, MDE_UNIT_ALREADY_SETUP,
+ meta_getminor(np->dev), devname);
+ goto out;
+ } else if (! mdismderror(ep, MDE_UNIT_NOT_SETUP)) {
+ goto out;
+ } else {
+ mdclrerror(ep);
+ }
+ --argc, ++argv;
+
+ if (argc == 0)
+ goto syntax;
+
+ /* grab -p */
+ if (strcmp(argv[0], "-p") != 0)
+ goto syntax;
+ --argc, ++argv;
+
+ if (argc == 0)
+ goto syntax;
+
+ /* see if -e is there */
+ if (strcmp(argv[0], "-e") == 0) {
+ /* use the whole disk */
+ options |= MDCMD_USE_WHOLE_DISK;
+ --argc, ++argv;
+ }
+
+ if (argc == 0)
+ goto syntax;
+
+ /* get component name */
+ compname = Strdup(argv[0]);
+
+ if (options & MDCMD_USE_WHOLE_DISK) {
+ if ((dnp = metadrivename(spp, compname, ep)) == NULL) {
+ goto out;
+ }
+ if ((spcompnp = metaslicename(dnp, 0, ep)) == NULL) {
+ goto out;
+ }
+ } else if ((spcompnp = metaname(spp, compname, ep)) == NULL) {
+ goto out;
+ }
+ assert(*spp != NULL);
+
+ if (!(options & MDCMD_NOLOCK)) {
+ /* grab set lock */
+ if (meta_lock(*spp, TRUE, ep))
+ goto out;
+
+ if (meta_check_ownership(*spp, ep) != 0)
+ goto out;
+ }
+
+ /* allocate the soft partition */
+ msp = Zalloc(sizeof (*msp));
+
+ /* setup common */
+ msp->common.namep = np;
+ msp->common.type = MD_METASP;
+
+ compname = spcompnp->cname;
+
+ assert(spcompnp->rname != NULL);
+ --argc, ++argv;
+
+ if (argc == 0) {
+ goto syntax;
+ }
+
+ if (*argv[0] == '-') {
+ /*
+ * parse any other command line options, this includes
+ * the recovery options -o and -b. The special thing
+ * with these options is that the len needs to be
+ * kept track of otherwise when the geometry of the
+ * "device" is built it will create an invalid geometry
+ */
+ old_optind = optind = 0;
+ opterr = 0;
+ oflag = 0;
+ seq = 0;
+ failed = 0;
+ while ((c = getopt(argc, argv, "A:o:b:")) != -1) {
+ sp_ext_offset_t offset;
+ sp_ext_length_t length;
+ longlong_t tmp_size;
+
+ switch (c) {
+ case 'A': /* data alignment */
+ if (meta_sp_parsesizestring(optarg,
+ &alignment) == -1) {
+ failed = 1;
+ }
+ break;
+ case 'o': /* offset in the partition */
+ if (oflag == 1) {
+ failed = 1;
+ } else {
+ tmp_size = atoll(optarg);
+ if (tmp_size <= 0) {
+ failed = 1;
+ } else {
+ oflag = 1;
+ options |= MDCMD_DIRECT;
+
+ offset = tmp_size;
+ }
+ }
+
+ break;
+ case 'b': /* number of blocks */
+ if (oflag == 0) {
+ failed = 1;
+ } else {
+ tmp_size = atoll(optarg);
+ if (tmp_size <= 0) {
+ failed = 1;
+ } else {
+ oflag = 0;
+
+ length = tmp_size;
+
+ /* we have a pair of values */
+ meta_sp_list_insert(*spp, np,
+ &extlist, offset,
+ length, EXTTYP_ALLOC,
+ seq++, EXTFLG_UPDATE,
+ meta_sp_cmp_by_offset);
+ len += length;
+ }
+ }
+
+ break;
+ default:
+ argc -= old_optind;
+ argv += old_optind;
+ goto options;
+ }
+
+ if (failed) {
+ argc -= old_optind;
+ argv += old_optind;
+ goto syntax;
+ }
+
+ old_optind = optind;
+ }
+ argc -= optind;
+ argv += optind;
+
+ /*
+ * Must have matching pairs of -o and -b flags
+ */
+ if (oflag != 0)
+ goto syntax;
+
+ /*
+ * Can't specify both layout (indicated indirectly by
+ * len being set by thye -o/-b cases above) AND
+ * alignment
+ */
+ if ((len > 0LL) && (alignment > 0LL))
+ goto syntax;
+
+ /*
+ * sanity check the allocation list
+ */
+ if ((extlist != NULL) && meta_sp_list_overlaps(extlist))
+ goto syntax;
+ }
+
+ if (len == 0LL) {
+ if (argc == 0)
+ goto syntax;
+ if (meta_sp_parsesize(argv[0], &len) == -1)
+ goto syntax;
+ --argc, ++argv;
+ }
+
+ msp->ext.ext_val = Zalloc(sizeof (*msp->ext.ext_val));
+ msp->ext.ext_val->len = len;
+ msp->compnamep = spcompnp;
+
+ /* we should be at the end */
+ if (argc != 0)
+ goto syntax;
+
+ /* create soft partition */
+ if (meta_create_sp(*spp, msp, extlist, options, alignment, ep) != 0)
+ goto out;
+ rval = 0;
+
+ /* let em know */
+ if (options & MDCMD_PRINT) {
+ (void) printf(dgettext(TEXT_DOMAIN,
+ "%s: Soft Partition is setup\n"),
+ devname);
+ (void) fflush(stdout);
+ }
+ goto out;
+
+syntax:
+ /* syntax error */
+ rval = meta_cook_syntax(ep, MDE_SYNTAX, compname, argc, argv);
+ goto out;
+
+options:
+ /* options error */
+ rval = meta_cook_syntax(ep, MDE_OPTION, compname, argc, argv);
+ goto out;
+
+out:
+ if (msp != NULL) {
+ if (msp->ext.ext_val != NULL) {
+ Free(msp->ext.ext_val);
+ }
+ Free(msp);
+ }
+
+ return (rval);
+}
+
+/*
+ * FUNCTION: meta_free_sp()
+ * INPUT: msp - the soft partition unit to free
+ * OUTPUT: none
+ * RETURNS: void
+ * PURPOSE: provides an interface from the rest of libmeta for freeing a
+ * soft partition unit
+ */
+void
+meta_free_sp(md_sp_t *msp)
+{
+ Free(msp);
+}
+
+/*
+ * FUNCTION: meta_sp_issp()
+ * INPUT: sp - the set name to check
+ * np - the name to check
+ * OUTPUT: ep - return error pointer
+ * RETURNS: int - 0 means sp,np is a soft partition
+ * 1 means sp,np is not a soft partition
+ * PURPOSE: determines whether the given device is a soft partition
+ * device. This is called by other metadevice check routines.
+ */
+int
+meta_sp_issp(
+ mdsetname_t *sp,
+ mdname_t *np,
+ md_error_t *ep
+)
+{
+ if (meta_get_sp_common(sp, np, 0, ep) == NULL)
+ return (1);
+
+ return (0);
+}
+
+/*
+ * FUNCTION: meta_check_sp()
+ * INPUT: sp - the set name to check
+ * msp - the unit structure to check
+ * options - creation options
+ * OUTPUT: repart_options - options to be passed to
+ * meta_repartition_drive()
+ * ep - return error pointer
+ * RETURNS: int - 0 ok to create on this component
+ * -1 error or not ok to create on this component
+ * PURPOSE: Checks to determine whether the rules for creation of
+ * soft partitions allow creation of a soft partition on
+ * the device described by the mdname_t structure referred
+ * to by msp->compnamep.
+ *
+ * NOTE: Does NOT check to determine whether the extents
+ * described in the md_sp_t structure referred to by
+ * msp will fit on the device described by the mdname_t
+ * structure located at msp->compnamep.
+ */
+static int
+meta_check_sp(
+ mdsetname_t *sp,
+ md_sp_t *msp,
+ mdcmdopts_t options,
+ int *repart_options,
+ md_error_t *ep
+)
+{
+ md_common_t *mdp;
+ mdname_t *compnp = msp->compnamep;
+ uint_t slice;
+ mddrivename_t *dnp;
+ mdname_t *slicenp;
+ mdvtoc_t *vtocp;
+
+ /* make sure it is in the set */
+ if (meta_check_inset(sp, compnp, ep) != 0)
+ return (-1);
+
+ if ((options & MDCMD_USE_WHOLE_DISK) != 0) {
+ uint_t rep_slice;
+
+ /*
+ * check to make sure we can partition this drive.
+ * we cannot continue if any of the following are
+ * true:
+ * The drive is a metadevice.
+ * The drive contains a mounted slice.
+ * The drive contains a slice being swapped to.
+ * The drive contains slices which are part of other
+ * metadevices.
+ * The drive contains a metadb.
+ */
+ if (metaismeta(compnp))
+ return (mddeverror(ep, MDE_IS_META, compnp->dev,
+ compnp->cname));
+
+ assert(compnp->drivenamep != NULL);
+
+ /*
+ * ensure that we have slice 0 since the disk will be
+ * repartitioned in the USE_WHOLE_DISK case. this check
+ * is redundant unless the user incorrectly specifies a
+ * a fully qualified drive AND slice name (i.e.,
+ * /dev/dsk/cXtXdXsX), which will be incorrectly
+ * recognized as a drive name by the metaname code.
+ */
+
+ if ((vtocp = metagetvtoc(compnp, FALSE, &slice, ep)) == NULL)
+ return (-1);
+ if (slice != MD_SLICE0)
+ return (mderror(ep, MDE_NOT_DRIVENAME, compnp->cname));
+
+ dnp = compnp->drivenamep;
+ if (meta_replicaslice(dnp, &rep_slice, ep) != 0)
+ return (-1);
+
+ for (slice = 0; slice < vtocp->nparts; slice++) {
+
+ /* only check if the slice really exists */
+ if (vtocp->parts[slice].size == 0)
+ continue;
+
+ slicenp = metaslicename(dnp, slice, ep);
+ if (slicenp == NULL)
+ return (-1);
+
+ /* check to ensure that it is not already in use */
+ if (meta_check_inuse(sp,
+ slicenp, MDCHK_INUSE, ep) != 0) {
+ return (-1);
+ }
+
+ /*
+ * Up to this point, tests are applied to all
+ * slices uniformly.
+ */
+
+ if (slice == rep_slice) {
+ /*
+ * Tests inside the body of this
+ * conditional are applied only to
+ * slice seven.
+ */
+ if (meta_check_inmeta(sp, slicenp,
+ options | MDCHK_ALLOW_MDDB |
+ MDCHK_ALLOW_REPSLICE, 0, -1, ep) != 0)
+ return (-1);
+
+ /*
+ * For slice seven, a metadb is NOT an
+ * automatic failure. It merely means
+ * that we're not allowed to muck
+ * about with the partitioning of that
+ * slice. We indicate this by masking
+ * in the MD_REPART_LEAVE_REP flag.
+ */
+ if (metahasmddb(sp, slicenp, ep)) {
+ assert(repart_options !=
+ NULL);
+ *repart_options |=
+ MD_REPART_LEAVE_REP;
+ }
+
+ /*
+ * Skip the remaining tests for slice
+ * seven
+ */
+ continue;
+ }
+
+ /*
+ * Tests below this point will be applied to
+ * all slices EXCEPT for the replica slice.
+ */
+
+
+ /* check if component is in a metadevice */
+ if (meta_check_inmeta(sp, slicenp, options, 0,
+ -1, ep) != 0)
+ return (-1);
+
+ /* check to see if component has a metadb */
+ if (metahasmddb(sp, slicenp, ep))
+ return (mddeverror(ep, MDE_HAS_MDDB,
+ slicenp->dev, slicenp->cname));
+ }
+ /*
+ * This should be all of the testing necessary when
+ * the MDCMD_USE_WHOLE_DISK flag is set; the rest of
+ * meta_check_sp() is oriented towards component
+ * arguments instead of disks.
+ */
+ goto meta_check_sp_ok;
+
+ }
+
+ /* check to ensure that it is not already in use */
+ if (meta_check_inuse(sp, compnp, MDCHK_INUSE, ep) != 0) {
+ return (-1);
+ }
+
+ if (!metaismeta(compnp)) { /* handle non-metadevices */
+
+ /*
+ * The component can have one or more soft partitions on it
+ * already, but can't be part of any other type of metadevice,
+ * so if it is used for a metadevice, but the metadevice
+ * isn't a soft partition, return failure.
+ */
+
+ if (meta_check_inmeta(sp, compnp, options, 0, -1, ep) != 0 &&
+ meta_check_insp(sp, compnp, 0, -1, ep) == 0) {
+ return (-1);
+ }
+ } else { /* handle metadevices */
+ /* get underlying unit & check capabilities */
+ if ((mdp = meta_get_unit(sp, compnp, ep)) == NULL)
+ return (-1);
+
+ if ((! (mdp->capabilities & MD_CAN_PARENT)) ||
+ (! (mdp->capabilities & MD_CAN_SP)))
+ return (mdmderror(ep, MDE_INVAL_UNIT,
+ meta_getminor(compnp->dev), compnp->cname));
+ }
+
+meta_check_sp_ok:
+ mdclrerror(ep);
+ return (0);
+}
+
+/*
+ * FUNCTION: meta_create_sp()
+ * INPUT: sp - the set name to create in
+ * msp - the unit structure to create
+ * oblist - an optional list of requested extents (-o/-b options)
+ * options - creation options
+ * alignment - data alignment
+ * OUTPUT: ep - return error pointer
+ * RETURNS: int - 0 success, -1 error
+ * PURPOSE: does most of the work for creating a soft partition. If
+ * metainit -p -e was used, first partition the drive. Then
+ * create an extent list based on the existing soft partitions
+ * and assume all space not used by them is free. Storage for
+ * the new soft partition is allocated from the free extents
+ * based on the length specified on the command line or the
+ * oblist passed in. The unit structure is then committed and
+ * the watermarks are updated. Finally, the status is changed to
+ * Okay and the process is complete.
+ */
+static int
+meta_create_sp(
+ mdsetname_t *sp,
+ md_sp_t *msp,
+ sp_ext_node_t *oblist,
+ mdcmdopts_t options,
+ sp_ext_length_t alignment,
+ md_error_t *ep
+)
+{
+ mdname_t *np = msp->common.namep;
+ mdname_t *compnp = msp->compnamep;
+ mp_unit_t *mp = NULL;
+ mdnamelist_t *keynlp = NULL, *spnlp = NULL;
+ md_set_params_t set_params;
+ int rval = -1;
+ diskaddr_t comp_size;
+ diskaddr_t sp_start;
+ sp_ext_node_t *extlist = NULL;
+ int numexts = 0; /* number of extents */
+ int count = 0;
+ int committed = 0;
+ int repart_options = MD_REPART_FORCE;
+ int create_flag = MD_CRO_32BIT;
+
+ md_set_desc *sd;
+ mm_unit_t *mm;
+ md_set_mmown_params_t *ownpar = NULL;
+ int comp_is_mirror = 0;
+
+ /* validate soft partition */
+ if (meta_check_sp(sp, msp, options, &repart_options, ep) != 0)
+ return (-1);
+
+ if ((options & MDCMD_USE_WHOLE_DISK) != 0) {
+ if ((options & MDCMD_DOIT) != 0) {
+ if (meta_repartition_drive(sp,
+ compnp->drivenamep,
+ repart_options,
+ NULL, /* Don't return the VTOC */
+ ep) != 0)
+
+ return (-1);
+ } else {
+ /*
+ * If -n and -e are both specified, it doesn't make
+ * sense to continue without actually partitioning
+ * the drive.
+ */
+ return (0);
+ }
+ }
+
+ /* populate the start_blk field of the component name */
+ if ((sp_start = meta_sp_get_start(sp, compnp, ep)) ==
+ MD_DISKADDR_ERROR) {
+ rval = -1;
+ goto out;
+ }
+
+ if (options & MDCMD_DOIT) {
+ /* store name in namespace */
+ if (add_key_name(sp, compnp, &keynlp, ep) != 0) {
+ rval = -1;
+ goto out;
+ }
+ }
+
+ /*
+ * Get a list of the soft partitions that currently reside on
+ * the component. We should ALWAYS force reload the cache,
+ * because if this is a single creation, there will not BE a
+ * cached list, and if we're using the md.tab, we must rebuild
+ * the list because it won't contain the previous (if any)
+ * soft partition.
+ */
+ count = meta_sp_get_by_component(sp, compnp, &spnlp, 1, ep);
+ if (count < 0) {
+ /* error occured */
+ rval = -1;
+ goto out;
+ }
+
+ /*
+ * get the size of the underlying device. if the size is smaller
+ * than or equal to the watermark size, we know there isn't
+ * enough space.
+ */
+ if ((comp_size = metagetsize(compnp, ep)) == MD_DISKADDR_ERROR) {
+ rval = -1;
+ goto out;
+ } else if (comp_size <= MD_SP_WMSIZE) {
+ (void) mdmderror(ep, MDE_SP_NOSPACE, 0, compnp->cname);
+ rval = -1;
+ goto out;
+ }
+ /*
+ * seed extlist with reserved space at the beginning of the volume and
+ * enough space for the end watermark. The end watermark always gets
+ * updated, but if the underlying device changes size it may not be
+ * pointed to until the extent before it is updated. Since the
+ * end of the reserved space is where the first watermark starts,
+ * the reserved extent should never be marked for updating.
+ */
+
+ meta_sp_list_insert(NULL, NULL, &extlist,
+ 0ULL, sp_start, EXTTYP_RESERVED, 0, 0, meta_sp_cmp_by_offset);
+ meta_sp_list_insert(NULL, NULL, &extlist,
+ (sp_ext_offset_t)(comp_size - MD_SP_WMSIZE), MD_SP_WMSIZE,
+ EXTTYP_END, 0, EXTFLG_UPDATE, meta_sp_cmp_by_offset);
+
+ if (meta_sp_extlist_from_namelist(sp, spnlp, &extlist, ep) == -1) {
+ rval = -1;
+ goto out;
+ }
+
+ metafreenamelist(spnlp);
+
+ if (getenv(META_SP_DEBUG)) {
+ meta_sp_debug("meta_create_sp: list of used extents:\n");
+ meta_sp_list_dump(extlist);
+ }
+
+ meta_sp_list_freefill(&extlist, metagetsize(compnp, ep));
+
+ /* get extent list from -o/-b options or from free space */
+ if (options & MDCMD_DIRECT) {
+ if (getenv(META_SP_DEBUG)) {
+ meta_sp_debug("meta_create_sp: Dumping -o/-b list:\n");
+ meta_sp_list_dump(oblist);
+ }
+
+ numexts = meta_sp_alloc_by_list(sp, np, &extlist, oblist);
+ if (numexts == -1) {
+ (void) mdmderror(ep, MDE_SP_OVERLAP, 0, np->cname);
+ rval = -1;
+ goto out;
+ }
+ } else {
+ numexts = meta_sp_alloc_by_len(sp, np, &extlist,
+ &msp->ext.ext_val->len, 0LL, (alignment > 0) ? alignment :
+ meta_sp_get_default_alignment(sp, compnp, ep));
+ if (numexts == -1) {
+ (void) mdmderror(ep, MDE_SP_NOSPACE, 0, np->cname);
+ rval = -1;
+ goto out;
+ }
+ }
+
+ assert(extlist != NULL);
+
+ /* create soft partition */
+ mp = meta_sp_createunit(msp->common.namep, msp->compnamep,
+ extlist, numexts, msp->ext.ext_val->len, MD_SP_CREATEPEND, ep);
+
+ create_flag = meta_check_devicesize(mp->c.un_total_blocks);
+
+ /* if we're not doing anything (metainit -n), return success */
+ if (! (options & MDCMD_DOIT)) {
+ rval = 0; /* success */
+ goto out;
+ }
+
+ (void) memset(&set_params, 0, sizeof (set_params));
+
+ if (create_flag == MD_CRO_64BIT) {
+ mp->c.un_revision = MD_64BIT_META_DEV;
+ set_params.options = MD_CRO_64BIT;
+ } else {
+ mp->c.un_revision = MD_32BIT_META_DEV;
+ set_params.options = MD_CRO_32BIT;
+ }
+
+ if (getenv(META_SP_DEBUG)) {
+ meta_sp_debug("meta_create_sp: printing unit structure\n");
+ meta_sp_printunit(mp);
+ }
+
+ /*
+ * Check to see if we're trying to create a partition on a mirror. If so
+ * we may have to enforce an ownership change before writing the
+ * watermark out.
+ */
+ if (metaismeta(compnp)) {
+ char *miscname;
+
+ miscname = metagetmiscname(compnp, ep);
+ if (miscname != NULL)
+ comp_is_mirror = (strcmp(miscname, MD_MIRROR) == 0);
+ else
+ comp_is_mirror = 0;
+ } else {
+ comp_is_mirror = 0;
+ }
+
+ /*
+ * For a multi-node environment we have to ensure that the master
+ * node owns an underlying mirror before we issue the MD_IOCSET ioctl.
+ * If the master does not own the device we will deadlock as the
+ * implicit write of the watermarks (in sp_ioctl.c) will cause an
+ * ownership change that will block as the MD_IOCSET is still in
+ * progress. To close this window we force an owner change to occur
+ * before issuing the MD_IOCSET. We cannot simply open the device and
+ * write to it as this will only work for the first soft-partition
+ * creation.
+ */
+
+ if (comp_is_mirror && !metaislocalset(sp)) {
+
+ if ((sd = metaget_setdesc(sp, ep)) == NULL) {
+ rval = -1;
+ goto out;
+ }
+ if (MD_MNSET_DESC(sd) && sd->sd_mn_am_i_master) {
+ mm = (mm_unit_t *)meta_get_unit(sp, compnp, ep);
+ if (mm == NULL) {
+ rval = -1;
+ goto out;
+ } else {
+ rval = meta_mn_change_owner(&ownpar, sp->setno,
+ meta_getminor(compnp->dev),
+ sd->sd_mn_mynode->nd_nodeid,
+ MD_MN_MM_PREVENT_CHANGE |
+ MD_MN_MM_SPAWN_THREAD);
+ if (rval == -1)
+ goto out;
+ }
+ }
+ }
+
+ set_params.mnum = MD_SID(mp);
+ set_params.size = mp->c.un_size;
+ set_params.mdp = (uintptr_t)mp;
+ MD_SETDRIVERNAME(&set_params, MD_SP, MD_MIN2SET(set_params.mnum));
+
+ /* first phase of commit. */
+ if (metaioctl(MD_IOCSET, &set_params, &set_params.mde,
+ np->cname) != 0) {
+ (void) mdstealerror(ep, &set_params.mde);
+ rval = -1;
+ goto out;
+ }
+
+ /* we've successfully committed the record */
+ committed = 1;
+
+ /* write watermarks */
+ if (meta_sp_update_wm(sp, msp, extlist, ep) < 0) {
+ rval = -1;
+ goto out;
+ }
+
+ /*
+ * Allow mirror ownership to change. If we don't succeed in this
+ * ioctl it isn't fatal, but the cluster will probably hang fairly
+ * soon as the mirror owner won't change. However, we have
+ * successfully written the watermarks out to the device so the
+ * softpart creation has succeeded
+ */
+ if (ownpar) {
+ (void) meta_mn_change_owner(&ownpar, sp->setno, ownpar->d.mnum,
+ ownpar->d.owner,
+ MD_MN_MM_ALLOW_CHANGE | MD_MN_MM_SPAWN_THREAD);
+ }
+
+ /* second phase of commit, set status to MD_SP_OK */
+ if (meta_sp_setstatus(sp, &(MD_SID(mp)), 1, MD_SP_OK, ep) < 0) {
+ rval = -1;
+ goto out;
+ }
+ rval = 0;
+out:
+ Free(mp);
+ if (ownpar)
+ Free(ownpar);
+
+ if (extlist != NULL)
+ meta_sp_list_free(&extlist);
+
+ if (rval != 0 && keynlp != NULL && committed != 1)
+ (void) del_key_names(sp, keynlp, NULL);
+
+ metafreenamelist(keynlp);
+
+ return (rval);
+}
+
+/*
+ * **************************************************************************
+ * Reset (metaclear) Functions *
+ * **************************************************************************
+ */
+
+/*
+ * FUNCTION: meta_sp_reset_common()
+ * INPUT: sp - the set name of the device to reset
+ * np - the name of the device to reset
+ * msp - the unit structure to reset
+ * options - metaclear options
+ * OUTPUT: ep - return error pointer
+ * RETURNS: int - 0 success, -1 error
+ * PURPOSE: "resets", or more accurately deletes, the soft partition
+ * specified. First the state is set to "deleting" and then the
+ * watermarks are all cleared out. Once the watermarks have been
+ * updated, the unit structure is deleted from the metadb.
+ */
+static int
+meta_sp_reset_common(
+ mdsetname_t *sp,
+ mdname_t *np,
+ md_sp_t *msp,
+ md_sp_reset_t reset_params,
+ mdcmdopts_t options,
+ md_error_t *ep
+)
+{
+ char *miscname;
+ int rval = -1;
+ int is_open = 0;
+
+ /* make sure that nobody owns us */
+ if (MD_HAS_PARENT(msp->common.parent))
+ return (mdmderror(ep, MDE_IN_USE, meta_getminor(np->dev),
+ np->cname));
+
+ /* make sure that the soft partition isn't open */
+ if ((is_open = meta_isopen(sp, np, ep, options)) < 0)
+ return (-1);
+ else if (is_open)
+ return (mdmderror(ep, MDE_IS_OPEN, meta_getminor(np->dev),
+ np->cname));
+
+ /* get miscname */
+ if ((miscname = metagetmiscname(np, ep)) == NULL)
+ return (-1);
+
+ /* fill in reset params */
+ MD_SETDRIVERNAME(&reset_params, miscname, sp->setno);
+ reset_params.mnum = meta_getminor(np->dev);
+ reset_params.force = (options & MDCMD_FORCE) ? 1 : 0;
+
+ /*
+ * clear soft partition - phase one.
+ * place the soft partition into the "delete pending" state.
+ */
+ if (meta_sp_setstatus(sp, &reset_params.mnum, 1, MD_SP_DELPEND, ep) < 0)
+ return (-1);
+
+ /*
+ * Now clear the watermarks. If the force flag is specified,
+ * ignore any errors writing the watermarks and delete the unit
+ * structure anyway. An error may leave the on-disk format in a
+ * corrupt state. If force is not specified and we fail here,
+ * the soft partition will remain in the "delete pending" state.
+ */
+ if ((meta_sp_clear_wm(sp, msp, ep) < 0) &&
+ ((options & MDCMD_FORCE) == 0))
+ goto out;
+
+ /*
+ * clear soft partition - phase two.
+ * the driver removes the soft partition from the metadb and
+ * zeros out incore version.
+ */
+ if (metaioctl(MD_IOCRESET, &reset_params,
+ &reset_params.mde, np->cname) != 0) {
+ (void) mdstealerror(ep, &reset_params.mde);
+ goto out;
+ }
+ rval = 0; /* success */
+
+ if (options & MDCMD_PRINT) {
+ (void) printf(dgettext(TEXT_DOMAIN,
+ "%s: Soft Partition is cleared\n"),
+ np->cname);
+ (void) fflush(stdout);
+ }
+
+ /*
+ * if told to recurse and on a metadevice, then attempt to
+ * clear the subdevices. Indicate failure if the clear fails.
+ */
+ if ((options & MDCMD_RECURSE) &&
+ (metaismeta(msp->compnamep)) &&
+ (meta_reset_by_name(sp, msp->compnamep, options, ep) != 0))
+ rval = -1;
+
+out:
+ meta_invalidate_name(np);
+ return (rval);
+}
+
+/*
+ * FUNCTION: meta_sp_reset()
+ * INPUT: sp - the set name of the device to reset
+ * np - the name of the device to reset
+ * options - metaclear options
+ * OUTPUT: ep - return error pointer
+ * RETURNS: int - 0 success, -1 error
+ * PURPOSE: provides the entry point to the rest of libmeta for deleting a
+ * soft partition. If np is NULL, then soft partitions are
+ * all deleted at the current level and then recursively deleted.
+ * Otherwise, if a name is specified either directly or as a
+ * result of a recursive operation, it deletes only that name.
+ * Since something sitting under a soft partition may be parented
+ * to it, we have to reparent that other device to another soft
+ * partition on the same component if we're deleting the one it's
+ * parented to.
+ */
+int
+meta_sp_reset(
+ mdsetname_t *sp,
+ mdname_t *np,
+ mdcmdopts_t options,
+ md_error_t *ep
+)
+{
+ md_sp_t *msp;
+ int rval = -1;
+ mdnamelist_t *spnlp = NULL, *nlp = NULL;
+ md_sp_reset_t reset_params;
+ int num_sp;
+
+ assert(sp != NULL);
+
+ /* reset/delete all soft paritions */
+ if (np == NULL) {
+ /*
+ * meta_reset_all sets MDCMD_RECURSE, but this behavior
+ * is incorrect for soft partitions. We want to clear
+ * all soft partitions at a particular level in the
+ * metadevice stack before moving to the next level.
+ * Thus, we clear MDCMD_RECURSE from the options.
+ */
+ options &= ~MDCMD_RECURSE;
+
+ /* for each soft partition */
+ rval = 0;
+ if (meta_get_sp_names(sp, &spnlp, 0, ep) < 0)
+ rval = -1;
+
+ for (nlp = spnlp; (nlp != NULL); nlp = nlp->next) {
+ np = nlp->namep;
+ if ((msp = meta_get_sp(sp, np, ep)) == NULL) {
+ rval = -1;
+ break;
+ }
+ /*
+ * meta_reset_all calls us twice to get soft
+ * partitions at the top and bottom of the stack.
+ * thus, if we have a parent, we'll get deleted
+ * on the next call.
+ */
+ if (MD_HAS_PARENT(msp->common.parent))
+ continue;
+ /*
+ * If this is a multi-node set, we send a series
+ * of individual metaclear commands.
+ */
+ if (meta_is_mn_set(sp, ep)) {
+ if (meta_mn_send_metaclear_command(sp,
+ np->cname, options, 0, ep) != 0) {
+ rval = -1;
+ break;
+ }
+ } else {
+ if (meta_sp_reset(sp, np, options, ep) != 0) {
+ rval = -1;
+ break;
+ }
+ }
+ }
+ /* cleanup return status */
+ metafreenamelist(spnlp);
+ return (rval);
+ }
+
+ /* check the name */
+ if (metachkmeta(np, ep) != 0)
+ return (-1);
+
+ /* get the unit structure */
+ if ((msp = meta_get_sp(sp, np, ep)) == NULL)
+ return (-1);
+
+ /* clear out reset parameters */
+ (void) memset(&reset_params, 0, sizeof (reset_params));
+
+ /* if our child is a metadevice, we need to deparent/reparent it */
+ if (metaismeta(msp->compnamep)) {
+ /* get sp's on this component */
+ if ((num_sp = meta_sp_get_by_component(sp, msp->compnamep,
+ &spnlp, 1, ep)) <= 0)
+ /* no sp's on this device. error! */
+ return (-1);
+ else if (num_sp == 1)
+ /* last sp on this device, so we deparent */
+ reset_params.new_parent = MD_NO_PARENT;
+ else {
+ /* have to reparent this metadevice */
+ for (nlp = spnlp; nlp != NULL; nlp = nlp->next) {
+ if (meta_getminor(nlp->namep->dev) ==
+ meta_getminor(np->dev))
+ continue;
+ /*
+ * this isn't the softpart we are deleting,
+ * so use this device as the new parent.
+ */
+ reset_params.new_parent =
+ meta_getminor(nlp->namep->dev);
+ break;
+ }
+ }
+ metafreenamelist(spnlp);
+ }
+
+ if (meta_sp_reset_common(sp, np, msp, reset_params, options, ep) != 0)
+ return (-1);
+
+ return (0);
+}
+
+/*
+ * FUNCTION: meta_sp_reset_component()
+ * INPUT: sp - the set name of the device to reset
+ * name - the string name of the device to reset
+ * options - metaclear options
+ * OUTPUT: ep - return error pointer
+ * RETURNS: int - 0 success, -1 error
+ * PURPOSE: provides the ability to delete all soft partitions on a
+ * specified device (metaclear -p). It first gets all of the
+ * soft partitions on the component and then deletes each one
+ * individually.
+ */
+int
+meta_sp_reset_component(
+ mdsetname_t *sp,
+ char *name,
+ mdcmdopts_t options,
+ md_error_t *ep
+)
+{
+ mdname_t *compnp, *np;
+ mdnamelist_t *spnlp = NULL;
+ mdnamelist_t *nlp = NULL;
+ md_sp_t *msp;
+ int count;
+ md_sp_reset_t reset_params;
+
+ if ((compnp = metaname(&sp, name, ep)) == NULL)
+ return (-1);
+
+ /* If we're starting out with no soft partitions, it's an error */
+ count = meta_sp_get_by_component(sp, compnp, &spnlp, 1, ep);
+ if (count == 0)
+ return (mdmderror(ep, MDE_SP_NOSP, 0, compnp->cname));
+ else if (count < 0)
+ return (-1);
+
+ /*
+ * clear all soft partitions on this component.
+ * NOTE: we reparent underlying metadevices as we go so that
+ * things stay sane. Also, if we encounter an error, we stop
+ * and go no further in case recovery might be needed.
+ */
+ for (nlp = spnlp; nlp != NULL; nlp = nlp->next) {
+ /* clear out reset parameters */
+ (void) memset(&reset_params, 0, sizeof (reset_params));
+
+ /* check the name */
+ np = nlp->namep;
+
+ if (metachkmeta(np, ep) != 0) {
+ metafreenamelist(spnlp);
+ return (-1);
+ }
+
+ /* get the unit structure */
+ if ((msp = meta_get_sp(sp, np, ep)) == NULL) {
+ metafreenamelist(spnlp);
+ return (-1);
+ }
+
+ /* have to deparent/reparent metadevices */
+ if (metaismeta(compnp)) {
+ if (nlp->next == NULL)
+ reset_params.new_parent = MD_NO_PARENT;
+ else
+ reset_params.new_parent =
+ meta_getminor(spnlp->next->namep->dev);
+ }
+
+ /* clear soft partition */
+ if (meta_sp_reset_common(sp, np, msp, reset_params,
+ options, ep) < 0) {
+ metafreenamelist(spnlp);
+ return (-1);
+ }
+ }
+ metafreenamelist(spnlp);
+ return (0);
+}
+
+/*
+ * **************************************************************************
+ * Grow (metattach) Functions *
+ * **************************************************************************
+ */
+
+/*
+ * FUNCTION: meta_sp_attach()
+ * INPUT: sp - the set name of the device to attach to
+ * np - the name of the device to attach to
+ * addsize - the unparsed string holding the amount of space to add
+ * options - metattach options
+ * alignment - data alignment
+ * OUTPUT: ep - return error pointer
+ * RETURNS: int - 0 success, -1 error
+ * PURPOSE: grows a soft partition by reading in the existing unit
+ * structure and setting its state to Growing, allocating more
+ * space (similar to meta_create_sp()), updating the watermarks,
+ * and then writing out the new unit structure in the Okay state.
+ */
+int
+meta_sp_attach(
+ mdsetname_t *sp,
+ mdname_t *np,
+ char *addsize,
+ mdcmdopts_t options,
+ sp_ext_length_t alignment,
+ md_error_t *ep
+)
+{
+ md_grow_params_t grow_params;
+ sp_ext_length_t grow_len; /* amount to grow */
+ mp_unit_t *mp, *new_un;
+ mdname_t *compnp = NULL;
+
+ sp_ext_node_t *extlist = NULL;
+ int numexts;
+ mdnamelist_t *spnlp = NULL;
+ int count;
+ md_sp_t *msp;
+ daddr_t start_block;
+
+ /* should have the same set */
+ assert(sp != NULL);
+ assert(sp->setno == MD_MIN2SET(meta_getminor(np->dev)));
+
+ /* check name */
+ if (metachkmeta(np, ep) != 0)
+ return (-1);
+
+ if (meta_sp_parsesize(addsize, &grow_len) == -1) {
+ return (mdmderror(ep, MDE_SP_BAD_LENGTH, 0, np->cname));
+ }
+
+ if ((mp = (mp_unit_t *)meta_get_mdunit(sp, np, ep)) == NULL)
+ return (-1);
+
+ /* make sure we don't have a parent */
+ if (MD_HAS_PARENT(mp->c.un_parent)) {
+ Free(mp);
+ return (mdmderror(ep, MDE_INVAL_UNIT, 0, np->cname));
+ }
+
+ if (getenv(META_SP_DEBUG)) {
+ meta_sp_debug("meta_sp_attach: Unit structure before new "
+ "space:\n");
+ meta_sp_printunit(mp);
+ }
+
+ /*
+ * NOTE: the fast option to metakeyname is 0 as opposed to 1
+ * If this was not the case we would suffer the following
+ * assertion failure:
+ * Assertion failed: type1 != MDT_FAST_META && type1 != MDT_FAST_COMP
+ * file meta_check.x, line 315
+ * I guess this is because we have not "seen" this drive before
+ * and hence hit the failure - this is of course the attach routine
+ */
+ if ((compnp = metakeyname(&sp, mp->un_key, 0, ep)) == NULL) {
+ Free(mp);
+ return (-1);
+ }
+
+ /* metakeyname does not fill in the key. */
+ compnp->key = mp->un_key;
+
+ /* work out the space on the component that we are dealing with */
+ count = meta_sp_get_by_component(sp, compnp, &spnlp, 0, ep);
+
+ /*
+ * see if the component has been soft partitioned yet, or if an
+ * error occurred.
+ */
+ if (count == 0) {
+ Free(mp);
+ return (mdmderror(ep, MDE_NOT_SP, 0, np->cname));
+ } else if (count < 0) {
+ Free(mp);
+ return (-1);
+ }
+
+ /*
+ * seed extlist with reserved space at the beginning of the volume and
+ * enough space for the end watermark. The end watermark always gets
+ * updated, but if the underlying device changes size it may not be
+ * pointed to until the extent before it is updated. Since the
+ * end of the reserved space is where the first watermark starts,
+ * the reserved extent should never be marked for updating.
+ */
+ if ((start_block = meta_sp_get_start(sp, compnp, ep)) ==
+ MD_DISKADDR_ERROR) {
+ Free(mp);
+ return (-1);
+ }
+
+ meta_sp_list_insert(NULL, NULL, &extlist, 0ULL, start_block,
+ EXTTYP_RESERVED, 0, 0, meta_sp_cmp_by_offset);
+ meta_sp_list_insert(NULL, NULL, &extlist,
+ metagetsize(compnp, ep) - MD_SP_WMSIZE, MD_SP_WMSIZE,
+ EXTTYP_END, 0, EXTFLG_UPDATE, meta_sp_cmp_by_offset);
+
+ if (meta_sp_extlist_from_namelist(sp, spnlp, &extlist, ep) == -1) {
+ Free(mp);
+ return (-1);
+ }
+
+ metafreenamelist(spnlp);
+
+ if (getenv(META_SP_DEBUG)) {
+ meta_sp_debug("meta_sp_attach: list of used extents:\n");
+ meta_sp_list_dump(extlist);
+ }
+
+ meta_sp_list_freefill(&extlist, metagetsize(compnp, ep));
+
+ assert(mp->un_numexts >= 1);
+ numexts = meta_sp_alloc_by_len(sp, np, &extlist, &grow_len,
+ mp->un_ext[mp->un_numexts - 1].un_poff,
+ (alignment > 0) ? alignment :
+ meta_sp_get_default_alignment(sp, compnp, ep));
+
+ if (numexts == -1) {
+ Free(mp);
+ return (mdmderror(ep, MDE_SP_NOSPACE, 0, np->cname));
+ }
+
+ /* allocate new unit structure and copy in old unit */
+ if ((new_un = meta_sp_updateunit(np, mp, extlist,
+ grow_len, numexts, ep)) == NULL) {
+ Free(mp);
+ return (-1);
+ }
+ Free(mp);
+
+ /* If running in dryrun mode (-n option), we're done here */
+ if ((options & MDCMD_DOIT) == 0) {
+ if (options & MDCMD_PRINT) {
+ (void) printf(dgettext(TEXT_DOMAIN,
+ "%s: Soft Partition would grow\n"),
+ np->cname);
+ (void) fflush(stdout);
+ }
+ return (0);
+ }
+
+ if (getenv(META_SP_DEBUG)) {
+ meta_sp_debug("meta_sp_attach: updated unit structure:\n");
+ meta_sp_printunit(new_un);
+ }
+
+ assert(new_un != NULL);
+
+ (void) memset(&grow_params, 0, sizeof (grow_params));
+ if (new_un->c.un_total_blocks > MD_MAX_BLKS_FOR_SMALL_DEVS) {
+ grow_params.options = MD_CRO_64BIT;
+ new_un->c.un_revision = MD_64BIT_META_DEV;
+ } else {
+ grow_params.options = MD_CRO_32BIT;
+ new_un->c.un_revision = MD_32BIT_META_DEV;
+ }
+ grow_params.mnum = MD_SID(new_un);
+ grow_params.size = new_un->c.un_size;
+ grow_params.mdp = (uintptr_t)new_un;
+ MD_SETDRIVERNAME(&grow_params, MD_SP, MD_MIN2SET(grow_params.mnum));
+
+ if (metaioctl(MD_IOCGROW, &grow_params, &grow_params.mde,
+ np->cname) != 0) {
+ (void) mdstealerror(ep, &grow_params.mde);
+ return (-1);
+ }
+
+ /* update all watermarks */
+
+ if ((msp = meta_get_sp(sp, np, ep)) == NULL)
+ return (-1);
+ if (meta_sp_update_wm(sp, msp, extlist, ep) < 0)
+ return (-1);
+
+
+ /* second phase of commit, set status to MD_SP_OK */
+ if (meta_sp_setstatus(sp, &(MD_SID(new_un)), 1, MD_SP_OK, ep) < 0)
+ return (-1);
+
+ meta_invalidate_name(np);
+
+ if (options & MDCMD_PRINT) {
+ (void) printf(dgettext(TEXT_DOMAIN,
+ "%s: Soft Partition has been grown\n"),
+ np->cname);
+ (void) fflush(stdout);
+ }
+
+ return (0);
+}
+
+/*
+ * **************************************************************************
+ * Recovery (metarecover) Functions *
+ * **************************************************************************
+ */
+
+/*
+ * FUNCTION: meta_recover_sp()
+ * INPUT: sp - the name of the set we are recovering on
+ * compnp - name pointer for device we are recovering on
+ * argc - argument count
+ * argv - left over arguments not parsed by metarecover command
+ * options - metarecover options
+ * OUTPUT: ep - return error pointer
+ * RETURNS: int - 0 - success, -1 - error
+ * PURPOSE: parse soft partitioning-specific metarecover options and
+ * dispatch to the appropriate function to handle recovery.
+ */
+int
+meta_recover_sp(
+ mdsetname_t *sp,
+ mdname_t *compnp,
+ int argc,
+ char *argv[],
+ mdcmdopts_t options,
+ md_error_t *ep
+)
+{
+ md_set_desc *sd;
+
+ if (argc > 1) {
+ (void) meta_cook_syntax(ep, MDE_SYNTAX, compnp->cname,
+ argc, argv);
+ return (-1);
+ }
+
+ /*
+ * For a MN set, this operation must be performed on the master
+ * as it is responsible for maintaining the watermarks
+ */
+ if (!metaislocalset(sp)) {
+ if ((sd = metaget_setdesc(sp, ep)) == NULL)
+ return (-1);
+ if (MD_MNSET_DESC(sd) && !sd->sd_mn_am_i_master) {
+ (void) mddserror(ep, MDE_DS_MASTER_ONLY, sp->setno,
+ sd->sd_mn_master_nodenm, NULL, NULL);
+ return (-1);
+ }
+ }
+ if (argc == 0) {
+ /*
+ * if no additional arguments are passed, metarecover should
+ * validate both on-disk and metadb structures as well as
+ * checking that both are consistent with each other
+ */
+ if (meta_sp_validate_wm(sp, compnp, options, ep) < 0)
+ return (-1);
+ if (meta_sp_validate_unit(sp, compnp, options, ep) < 0)
+ return (-1);
+ if (meta_sp_validate_wm_and_unit(sp, compnp, options, ep) < 0)
+ return (-1);
+ } else if (strcmp(argv[0], "-d") == 0) {
+ /*
+ * Ensure that there is no existing valid record for this
+ * soft-partition. If there is we have nothing to do.
+ */
+ if (meta_sp_validate_unit(sp, compnp, options, ep) == 0)
+ return (-1);
+ /* validate and recover from on-disk structures */
+ if (meta_sp_validate_wm(sp, compnp, options, ep) < 0)
+ return (-1);
+ if (meta_sp_recover_from_wm(sp, compnp, options, ep) < 0)
+ return (-1);
+ } else if (strcmp(argv[0], "-m") == 0) {
+ /* validate and recover from metadb structures */
+ if (meta_sp_validate_unit(sp, compnp, options, ep) < 0)
+ return (-1);
+ if (meta_sp_recover_from_unit(sp, compnp, options, ep) < 0)
+ return (-1);
+ } else {
+ /* syntax error */
+ (void) meta_cook_syntax(ep, MDE_SYNTAX, compnp->cname,
+ argc, argv);
+ return (-1);
+ }
+
+ return (0);
+}
+
+/*
+ * FUNCTION: meta_sp_display_exthdr()
+ * INPUT: none
+ * OUTPUT: none
+ * RETURNS: void
+ * PURPOSE: print header line for sp_ext_node_t information. to be used
+ * in conjunction with meta_sp_display_ext().
+ */
+static void
+meta_sp_display_exthdr(void)
+{
+ (void) printf("%20s %5s %7s %20s %20s\n",
+ dgettext(TEXT_DOMAIN, "Name"),
+ dgettext(TEXT_DOMAIN, "Seq#"),
+ dgettext(TEXT_DOMAIN, "Type"),
+ dgettext(TEXT_DOMAIN, "Offset"),
+ dgettext(TEXT_DOMAIN, "Length"));
+}
+
+
+/*
+ * FUNCTION: meta_sp_display_ext()
+ * INPUT: ext - extent to display
+ * OUTPUT: none
+ * RETURNS: void
+ * PURPOSE: print selected fields from sp_ext_node_t.
+ */
+static void
+meta_sp_display_ext(sp_ext_node_t *ext)
+{
+ /* print extent information */
+ if (ext->ext_namep != NULL)
+ (void) printf("%20s ", ext->ext_namep->cname);
+ else
+ (void) printf("%20s ", "NONE");
+
+ (void) printf("%5u ", ext->ext_seq);
+
+ switch (ext->ext_type) {
+ case EXTTYP_ALLOC:
+ (void) printf("%7s ", "ALLOC");
+ break;
+ case EXTTYP_FREE:
+ (void) printf("%7s ", "FREE");
+ break;
+ case EXTTYP_RESERVED:
+ (void) printf("%7s ", "RESV");
+ break;
+ case EXTTYP_END:
+ (void) printf("%7s ", "END");
+ break;
+ default:
+ (void) printf("%7s ", "INVLD");
+ break;
+ }
+
+ (void) printf("%20llu %20llu\n", ext->ext_offset, ext->ext_length);
+}
+
+
+/*
+ * FUNCTION: meta_sp_checkseq()
+ * INPUT: extlist - list of extents to be checked
+ * OUTPUT: none
+ * RETURNS: int - 0 - success, -1 - error
+ * PURPOSE: check soft partition sequence numbers. this function assumes
+ * that a list of extents representing 1 or more soft partitions
+ * is passed in sorted in sequence number order. within a
+ * single soft partition, there may not be any missing or
+ * duplicate sequence numbers.
+ */
+static int
+meta_sp_checkseq(sp_ext_node_t *extlist)
+{
+ sp_ext_node_t *ext;
+
+ assert(extlist != NULL);
+
+ for (ext = extlist;
+ ext->ext_next != NULL && ext->ext_next->ext_type == EXTTYP_ALLOC;
+ ext = ext->ext_next) {
+ if (ext->ext_next->ext_namep != NULL &&
+ strcmp(ext->ext_next->ext_namep->cname,
+ ext->ext_namep->cname) != 0)
+ continue;
+
+ if (ext->ext_next->ext_seq != ext->ext_seq + 1) {
+ (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
+ "%s: sequence numbers are "
+ "incorrect: %d should be %d\n"),
+ ext->ext_next->ext_namep->cname,
+ ext->ext_next->ext_seq, ext->ext_seq + 1);
+ return (-1);
+ }
+ }
+ return (0);
+}
+
+
+/*
+ * FUNCTION: meta_sp_resolve_name_conflict()
+ * INPUT: sp - name of set we're are recovering in.
+ * old_np - name pointer of soft partition we found on disk.
+ * OUTPUT: new_np - name pointer for new soft partition name.
+ * ep - error pointer returned.
+ * RETURNS: int - 0 - name not replace, 1 - name replaced, -1 - error
+ * PURPOSE: Check to see if the name of one of the soft partitions we found
+ * on disk already exists in the metadb. If so, prompt for a new
+ * name. In addition, we keep a static array of names that
+ * will be recovered from this device since these names don't
+ * exist in the configuration at this point but cannot be
+ * recovered more than once.
+ */
+static int
+meta_sp_resolve_name_conflict(
+ mdsetname_t *sp,
+ mdname_t *old_np,
+ mdname_t **new_np,
+ md_error_t *ep
+)
+{
+ char yesno[255];
+ char *yes;
+ char newname[MD_SP_MAX_DEVNAME_PLUS_1];
+ int nunits;
+ static int *used_names = NULL;
+
+ assert(old_np != NULL);
+
+ if (used_names == NULL) {
+ if ((nunits = meta_get_nunits(ep)) < 0)
+ return (-1);
+ used_names = Zalloc(nunits * sizeof (int));
+ }
+
+ /* see if it exists already */
+ if (used_names[MD_MIN2UNIT(meta_getminor(old_np->dev))] == 0 &&
+ metagetmiscname(old_np, ep) == NULL) {
+ if (! mdismderror(ep, MDE_UNIT_NOT_SETUP))
+ return (-1);
+ else {
+ used_names[MD_MIN2UNIT(meta_getminor(old_np->dev))] = 1;
+ mdclrerror(ep);
+ return (0);
+ }
+ }
+
+ /* name exists, ask the user for a new one */
+ (void) printf(dgettext(TEXT_DOMAIN,
+ "WARNING: A soft partition named %s was found in the extent\n"
+ "headers, but this name already exists in the metadb "
+ "configuration.\n"
+ "In order to continue recovery you must supply\n"
+ "a new name for this soft partition.\n"), old_np->cname);
+ (void) printf(dgettext(TEXT_DOMAIN,
+ "Would you like to continue and supply a new name? (yes/no) "));
+
+ (void) fflush(stdout);
+ if ((fgets(yesno, sizeof (yesno), stdin) == NULL) ||
+ (strlen(yesno) == 1))
+ (void) snprintf(yesno, sizeof (yesno), "%s\n",
+ dgettext(TEXT_DOMAIN, "no"));
+ yes = dgettext(TEXT_DOMAIN, "yes");
+ if (strncasecmp(yesno, yes, strlen(yesno) - 1) != 0) {
+ return (-1);
+ }
+
+ (void) fflush(stdin);
+
+ /* get the new name */
+ for (;;) {
+ (void) printf(dgettext(TEXT_DOMAIN, "Please enter a new name "
+ "for this soft partition (dXXXX) "));
+ (void) fflush(stdout);
+ if (fgets(newname, MD_SP_MAX_DEVNAME_PLUS_1, stdin) == NULL)
+ (void) strcpy(newname, "");
+
+ /* remove newline character */
+ if (newname[strlen(newname) - 1] == '\n')
+ newname[strlen(newname) - 1] = '\0';
+
+ if (!(is_metaname(newname)) ||
+ (meta_init_make_device(&sp, newname, ep) != 0)) {
+ (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
+ "Invalid metadevice name\n"));
+ (void) fflush(stderr);
+ continue;
+ }
+
+ if ((*new_np = metaname(&sp, newname, ep)) == NULL) {
+ (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
+ "Invalid metadevice name\n"));
+ (void) fflush(stderr);
+ continue;
+ }
+
+ assert(MD_MIN2UNIT(meta_getminor((*new_np)->dev)) < nunits);
+ /* make sure the name isn't already being used */
+ if (used_names[MD_MIN2UNIT(meta_getminor((*new_np)->dev))] ||
+ metagetmiscname(*new_np, ep) != NULL) {
+ (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
+ "That name already exists\n"));
+ continue;
+ } else if (! mdismderror(ep, MDE_UNIT_NOT_SETUP))
+ return (-1);
+
+ break;
+ }
+
+ /* got a new name, place in used array and return */
+ used_names[MD_MIN2UNIT(meta_getminor((*new_np)->dev))] = 1;
+ mdclrerror(ep);
+ return (1);
+}
+
+/*
+ * FUNCTION: meta_sp_validate_wm()
+ * INPUT: sp - set name we are recovering in
+ * compnp - name pointer for device we are recovering from
+ * options - metarecover options
+ * OUTPUT: ep - error pointer returned
+ * RETURNS: int - 0 - success, -1 - error
+ * PURPOSE: validate and display watermark configuration. walk the
+ * on-disk watermark structures and validate the information
+ * found within. since a watermark configuration is
+ * "self-defining", the act of traversing the watermarks
+ * is part of the validation process.
+ */
+static int
+meta_sp_validate_wm(
+ mdsetname_t *sp,
+ mdname_t *compnp,
+ mdcmdopts_t options,
+ md_error_t *ep
+)
+{
+ sp_ext_node_t *extlist = NULL;
+ sp_ext_node_t *ext;
+ int num_sps = 0;
+ int rval;
+
+ if ((options & MDCMD_VERBOSE) != 0)
+ (void) printf(dgettext(TEXT_DOMAIN,
+ "Verifying on-disk structures on %s.\n"),
+ compnp->cname);
+
+ /*
+ * for each watermark, build an ext_node, place on list.
+ */
+ rval = meta_sp_extlist_from_wm(sp, compnp, &extlist,
+ meta_sp_cmp_by_nameseq, ep);
+
+ if ((options & MDCMD_VERBOSE) != 0) {
+ /* print out what we found */
+ if (extlist == NULL)
+ (void) printf(dgettext(TEXT_DOMAIN,
+ "No extent headers found on %s.\n"),
+ compnp->cname);
+ else {
+ (void) printf(dgettext(TEXT_DOMAIN,
+ "The following extent headers were found on %s.\n"),
+ compnp->cname);
+ meta_sp_display_exthdr();
+ }
+ for (ext = extlist; ext != NULL; ext = ext->ext_next)
+ meta_sp_display_ext(ext);
+ }
+
+ if (rval < 0) {
+ (void) printf(dgettext(TEXT_DOMAIN,
+ "%s: On-disk structures invalid or "
+ "no soft partitions found.\n"),
+ compnp->cname);
+ return (-1);
+ }
+
+ assert(extlist != NULL);
+
+ /* count number of soft partitions */
+ for (ext = extlist;
+ ext != NULL && ext->ext_type == EXTTYP_ALLOC;
+ ext = ext->ext_next) {
+ if (ext->ext_next != NULL &&
+ ext->ext_next->ext_namep != NULL &&
+ strcmp(ext->ext_next->ext_namep->cname,
+ ext->ext_namep->cname) == 0)
+ continue;
+ num_sps++;
+ }
+
+ if ((options & MDCMD_VERBOSE) != 0)
+ (void) printf(dgettext(TEXT_DOMAIN,
+ "Found %d soft partition(s) on %s.\n"), num_sps,
+ compnp->cname);
+
+ if (num_sps == 0) {
+ (void) printf(dgettext(TEXT_DOMAIN,
+ "%s: No soft partitions.\n"), compnp->cname);
+ return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname));
+ }
+
+ /* check sequence numbers */
+ if ((options & MDCMD_VERBOSE) != 0)
+ (void) printf(dgettext(TEXT_DOMAIN,
+ "Checking sequence numbers.\n"));
+
+ if (meta_sp_checkseq(extlist) != 0)
+ return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname));
+
+ return (0);
+}
+
+/*
+ * FUNCTION: meta_sp_validate_unit()
+ * INPUT: sp - name of set we are recovering in
+ * compnp - name of component we are recovering from
+ * options - metarecover options
+ * OUTPUT: ep - error pointer returned
+ * RETURNS: int - 0 - success, -1 - error
+ * PURPOSE: validate and display metadb configuration. begin by getting
+ * all soft partitions built on the specified component. get
+ * the unit structure for each one and validate the fields within.
+ */
+static int
+meta_sp_validate_unit(
+ mdsetname_t *sp,
+ mdname_t *compnp,
+ mdcmdopts_t options,
+ md_error_t *ep
+)
+{
+ md_sp_t *msp;
+ mdnamelist_t *spnlp = NULL;
+ mdnamelist_t *namep = NULL;
+ int count;
+ uint_t extn;
+ sp_ext_length_t size;
+
+ if ((options & MDCMD_VERBOSE) != 0)
+ (void) printf(dgettext(TEXT_DOMAIN,
+ "%s: Validating soft partition metadb entries.\n"),
+ compnp->cname);
+
+ if ((size = metagetsize(compnp, ep)) == MD_DISKADDR_ERROR)
+ return (-1);
+
+ /* get all soft partitions on component */
+ count = meta_sp_get_by_component(sp, compnp, &spnlp, 0, ep);
+
+ if (count == 0) {
+ (void) printf(dgettext(TEXT_DOMAIN,
+ "%s: No soft partitions.\n"), compnp->cname);
+ return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname));
+ } else if (count < 0) {
+ return (-1);
+ }
+
+ /* Now go through the soft partitions and check each one */
+ for (namep = spnlp; namep != NULL; namep = namep->next) {
+ mdname_t *curnp = namep->namep;
+ sp_ext_offset_t curvoff;
+
+ /* get the unit structure */
+ if ((msp = meta_get_sp_common(sp, curnp, 0, ep)) == NULL)
+ return (-1);
+
+ /* verify generic unit structure parameters */
+ if ((options & MDCMD_VERBOSE) != 0)
+ (void) printf(dgettext(TEXT_DOMAIN,
+ "\nVerifying device %s.\n"),
+ curnp->cname);
+
+ /*
+ * MD_SP_LAST is an invalid state and is always the
+ * highest numbered.
+ */
+ if (msp->status >= MD_SP_LAST) {
+ (void) printf(dgettext(TEXT_DOMAIN,
+ "%s: status value %u is out of range.\n"),
+ curnp->cname, msp->status);
+ return (mdmderror(ep, MDE_RECOVER_FAILED,
+ 0, curnp->cname));
+ } else if ((options & MDCMD_VERBOSE) != 0) {
+ uint_t tstate = 0;
+
+ if (metaismeta(msp->compnamep)) {
+ if (meta_get_tstate(msp->common.namep->dev,
+ &tstate, ep) != 0)
+ return (-1);
+ }
+ (void) printf(dgettext(TEXT_DOMAIN,
+ "%s: Status \"%s\" is valid.\n"),
+ curnp->cname, meta_sp_status_to_name(msp->status,
+ tstate & MD_DEV_ERRORED));
+ }
+
+ /* Now verify each extent */
+ if ((options & MDCMD_VERBOSE) != 0)
+ (void) printf("%14s %21s %21s %21s\n",
+ dgettext(TEXT_DOMAIN, "Extent Number"),
+ dgettext(TEXT_DOMAIN, "Virtual Offset"),
+ dgettext(TEXT_DOMAIN, "Physical Offset"),
+ dgettext(TEXT_DOMAIN, "Length"));
+
+ curvoff = 0ULL;
+ for (extn = 0; extn < msp->ext.ext_len; extn++) {
+ md_sp_ext_t *extp = &msp->ext.ext_val[extn];
+
+ if ((options & MDCMD_VERBOSE) != 0)
+ (void) printf("%14u %21llu %21llu %21llu\n",
+ extn, extp->voff, extp->poff, extp->len);
+
+ if (extp->voff != curvoff) {
+ (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
+ "%s: virtual offset for extent %u "
+ "is inconsistent, expected %llu, "
+ "got %llu.\n"), curnp->cname, extn,
+ curvoff, extp->voff);
+ return (mdmderror(ep, MDE_RECOVER_FAILED,
+ 0, compnp->cname));
+ }
+
+ /* make sure extent does not drop off the end */
+ if ((extp->poff + extp->len) == size) {
+ (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
+ "%s: extent %u at offset %llu, "
+ "length %llu exceeds the size of the "
+ "device, %llu.\n"), curnp->cname,
+ extn, extp->poff, extp->len, size);
+ return (mdmderror(ep, MDE_RECOVER_FAILED,
+ 0, compnp->cname));
+ }
+
+ curvoff += extp->len;
+ }
+ }
+ if (options & MDCMD_PRINT) {
+ (void) printf(dgettext(TEXT_DOMAIN,
+ "%s: Soft Partition metadb configuration is valid\n"),
+ compnp->cname);
+ }
+ return (0);
+}
+
+/*
+ * FUNCTION: meta_sp_validate_wm_and_unit()
+ * INPUT: sp - name of set we are recovering in
+ * compnp - name of device we are recovering from
+ * options - metarecover options
+ * OUTPUT: ep - error pointer returned
+ * RETURNS: int - 0 - success, -1 error
+ * PURPOSE: cross-validate and display watermarks and metadb records.
+ * get both the unit structures for the soft partitions built
+ * on the specified component and the watermarks found on that
+ * component and check to make sure they are consistent with
+ * each other.
+ */
+static int
+meta_sp_validate_wm_and_unit(
+ mdsetname_t *sp,
+ mdname_t *np,
+ mdcmdopts_t options,
+ md_error_t *ep
+)
+{
+ sp_ext_node_t *wmlist = NULL;
+ sp_ext_node_t *unitlist = NULL;
+ sp_ext_node_t *unitext;
+ sp_ext_node_t *wmext;
+ sp_ext_offset_t tmpunitoff;
+ mdnamelist_t *spnlp = NULL;
+ int count;
+ int rval = 0;
+ int verbose = (options & MDCMD_VERBOSE);
+
+ /* get unit structure list */
+ count = meta_sp_get_by_component(sp, np, &spnlp, 0, ep);
+ if (count <= 0)
+ return (-1);
+
+ meta_sp_list_insert(NULL, NULL, &unitlist,
+ metagetsize(np, ep) - MD_SP_WMSIZE, MD_SP_WMSIZE,
+ EXTTYP_END, 0, EXTFLG_UPDATE, meta_sp_cmp_by_offset);
+
+ if (meta_sp_extlist_from_namelist(sp, spnlp, &unitlist, ep) == -1) {
+ metafreenamelist(spnlp);
+ return (-1);
+ }
+
+ metafreenamelist(spnlp);
+
+ meta_sp_list_freefill(&unitlist, metagetsize(np, ep));
+
+ if (meta_sp_extlist_from_wm(sp, np, &wmlist,
+ meta_sp_cmp_by_offset, ep) < 0) {
+ meta_sp_list_free(&unitlist);
+ return (-1);
+ }
+
+ if (getenv(META_SP_DEBUG)) {
+ meta_sp_debug("meta_sp_validate_wm_and_unit: unit list:\n");
+ meta_sp_list_dump(unitlist);
+ meta_sp_debug("meta_sp_validate_wm_and_unit: wm list:\n");
+ meta_sp_list_dump(wmlist);
+ }
+
+ /*
+ * step through both lists and compare allocated nodes. Free
+ * nodes and end watermarks may differ between the two but
+ * that's generally ok, and if they're wrong will typically
+ * cause misplaced allocated extents.
+ */
+ if (verbose)
+ (void) printf(dgettext(TEXT_DOMAIN, "\n%s: Verifying metadb "
+ "allocations match extent headers.\n"), np->cname);
+
+ unitext = unitlist;
+ wmext = wmlist;
+ while ((wmext != NULL) && (unitext != NULL)) {
+ /* find next allocated extents in each list */
+ while (wmext != NULL && wmext->ext_type != EXTTYP_ALLOC)
+ wmext = wmext->ext_next;
+
+ while (unitext != NULL && unitext->ext_type != EXTTYP_ALLOC)
+ unitext = unitext->ext_next;
+
+ if (wmext == NULL || unitext == NULL)
+ break;
+
+ if (verbose) {
+ (void) printf(dgettext(TEXT_DOMAIN,
+ "Metadb extent:\n"));
+ meta_sp_display_exthdr();
+ meta_sp_display_ext(unitext);
+ (void) printf(dgettext(TEXT_DOMAIN,
+ "Extent header extent:\n"));
+ meta_sp_display_exthdr();
+ meta_sp_display_ext(wmext);
+ (void) printf("\n");
+ }
+
+ if (meta_sp_validate_exts(np, wmext, unitext, ep) < 0)
+ rval = -1;
+
+ /*
+ * if the offsets aren't equal, only increment the
+ * lowest one in hopes of getting the lists back in sync.
+ */
+ tmpunitoff = unitext->ext_offset;
+ if (unitext->ext_offset <= wmext->ext_offset)
+ unitext = unitext->ext_next;
+ if (wmext->ext_offset <= tmpunitoff)
+ wmext = wmext->ext_next;
+ }
+
+ /*
+ * if both lists aren't at the end then there are extra
+ * allocated nodes in one of them.
+ */
+ if (wmext != NULL) {
+ (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
+ "%s: extent headers contain allocations not in "
+ "the metadb\n\n"), np->cname);
+ rval = -1;
+ }
+
+ if (unitext != NULL) {
+ (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
+ "%s: metadb contains allocations not in the extent "
+ "headers\n\n"), np->cname);
+ rval = -1;
+ }
+
+ if (options & MDCMD_PRINT) {
+ if (rval == 0) {
+ (void) printf(dgettext(TEXT_DOMAIN,
+ "%s: Soft Partition metadb matches extent "
+ "header configuration\n"), np->cname);
+ } else {
+ (void) printf(dgettext(TEXT_DOMAIN,
+ "%s: Soft Partition metadb does not match extent "
+ "header configuration\n"), np->cname);
+ }
+ }
+
+ return (rval);
+}
+
+/*
+ * FUNCTION: meta_sp_validate_exts()
+ * INPUT: compnp - name pointer for device we are recovering from
+ * wmext - extent node representing watermark
+ * unitext - extent node from unit structure
+ * OUTPUT: ep - return error pointer
+ * RETURNS: int - 0 - succes, mdmderror return code - error
+ * PURPOSE: Takes two extent nodes and checks them against each other.
+ * offset, length, sequence number, set, and name are compared.
+ */
+static int
+meta_sp_validate_exts(
+ mdname_t *compnp,
+ sp_ext_node_t *wmext,
+ sp_ext_node_t *unitext,
+ md_error_t *ep
+)
+{
+ if (wmext->ext_offset != unitext->ext_offset) {
+ (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
+ "%s: unit structure and extent header offsets differ.\n"),
+ compnp->cname);
+ return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname));
+ }
+
+ if (wmext->ext_length != unitext->ext_length) {
+ (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
+ "%s: unit structure and extent header lengths differ.\n"),
+ compnp->cname);
+ return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname));
+ }
+
+ if (wmext->ext_seq != unitext->ext_seq) {
+ (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
+ "%s: unit structure and extent header sequence numbers "
+ "differ.\n"), compnp->cname);
+ return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname));
+ }
+
+ if (wmext->ext_type != unitext->ext_type) {
+ (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
+ "%s: unit structure and extent header types differ.\n"),
+ compnp->cname);
+ return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname));
+ }
+
+ /*
+ * If one has a set pointer and the other doesn't, error.
+ * If both extents have setnames, then make sure they match
+ * If both are NULL, it's ok, they match.
+ */
+ if ((unitext->ext_setp == NULL) ^ (wmext->ext_setp == NULL)) {
+ (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
+ "%s: unit structure and extent header set values "
+ "differ.\n"), compnp->cname);
+ return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname));
+ }
+
+ if (unitext->ext_setp != NULL) {
+ if (strcmp(unitext->ext_setp->setname,
+ wmext->ext_setp->setname) != 0) {
+ (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
+ "%s: unit structure and extent header set names "
+ "differ.\n"), compnp->cname);
+ return (mdmderror(ep, MDE_RECOVER_FAILED,
+ 0, compnp->cname));
+ }
+ }
+
+ /*
+ * If one has a name pointer and the other doesn't, error.
+ * If both extents have names, then make sure they match
+ * If both are NULL, it's ok, they match.
+ */
+ if ((unitext->ext_namep == NULL) ^ (wmext->ext_namep == NULL)) {
+ (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
+ "%s: unit structure and extent header name values "
+ "differ.\n"), compnp->cname);
+ return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname));
+ }
+
+ if (unitext->ext_namep != NULL) {
+ if (strcmp(wmext->ext_namep->cname,
+ unitext->ext_namep->cname) != 0) {
+ (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
+ "%s: unit structure and extent header names "
+ "differ.\n"), compnp->cname);
+ return (mdmderror(ep, MDE_RECOVER_FAILED,
+ 0, compnp->cname));
+ }
+ }
+
+ return (0);
+}
+
+/*
+ * FUNCTION: update_sp_status()
+ * INPUT: sp - name of set we are recovering in
+ * minors - pointer to an array of soft partition minor numbers
+ * num_sps - number of minor numbers in array
+ * status - new status to be applied to all soft parts in array
+ * mn_set - set if current set is a multi-node set
+ * OUTPUT: ep - return error pointer
+ * RETURNS: int - 0 - success, -1 - error
+ * PURPOSE: update status of soft partitions to new status. minors is an
+ * array of minor numbers to apply the new status to.
+ * If mn_set is set, a message is sent to all nodes in the
+ * cluster to update the status locally.
+ */
+static int
+update_sp_status(
+ mdsetname_t *sp,
+ minor_t *minors,
+ int num_sps,
+ sp_status_t status,
+ bool_t mn_set,
+ md_error_t *ep
+)
+{
+ int i;
+ int err = 0;
+
+ if (mn_set) {
+ md_mn_msg_sp_setstat_t sp_setstat_params;
+ int result;
+ md_mn_result_t *resp = NULL;
+
+ for (i = 0; i < num_sps; i++) {
+ sp_setstat_params.sp_setstat_mnum = minors[i];
+ sp_setstat_params.sp_setstat_status = status;
+
+ result = mdmn_send_message(sp->setno,
+ MD_MN_MSG_SP_SETSTAT, MD_MSGF_DEFAULT_FLAGS,
+ (char *)&sp_setstat_params,
+ sizeof (sp_setstat_params),
+ &resp, ep);
+ if (resp != NULL) {
+ if (resp->mmr_exitval != 0)
+ err = -1;
+ free_result(resp);
+ }
+ if (result != 0) {
+ err = -1;
+ }
+ }
+ } else {
+ if (meta_sp_setstatus(sp, minors, num_sps, status, ep) < 0)
+ err = -1;
+ }
+ if (err < 0) {
+ (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
+ "Error updating status on recovered soft "
+ "partitions.\n"));
+ }
+ return (err);
+}
+
+/*
+ * FUNCTION: meta_sp_recover_from_wm()
+ * INPUT: sp - name of set we are recovering in
+ * compnp - name pointer for component we are recovering from
+ * options - metarecover options
+ * OUTPUT: ep - return error pointer
+ * RETURNS: int - 0 - success, -1 - error
+ * PURPOSE: update metadb records to match watermarks. begin by getting
+ * an extlist representing all soft partitions on the component.
+ * then build a unit structure for each soft partition.
+ * notify user of changes, then commit each soft partition to
+ * the metadb one at a time in the "recovering" state. update
+ * any watermarks that may need it (to reflect possible name
+ * changes), and, finally, set the status of all recovered
+ * partitions to the "OK" state at once.
+ */
+static int
+meta_sp_recover_from_wm(
+ mdsetname_t *sp,
+ mdname_t *compnp,
+ mdcmdopts_t options,
+ md_error_t *ep
+)
+{
+ sp_ext_node_t *extlist = NULL;
+ sp_ext_node_t *sp_list = NULL;
+ sp_ext_node_t *update_list = NULL;
+ sp_ext_node_t *ext;
+ sp_ext_node_t *sp_ext;
+ mp_unit_t *mp;
+ mp_unit_t **un_array;
+ int numexts = 0, num_sps = 0, i = 0;
+ int err = 0;
+ int not_recovered = 0;
+ int committed = 0;
+ sp_ext_length_t sp_length = 0LL;
+ mdnamelist_t *keynlp = NULL;
+ mdname_t *np;
+ mdname_t *new_np;
+ int new_name;
+ md_set_params_t set_params;
+ minor_t *minors = NULL;
+ char yesno[255];
+ char *yes;
+ bool_t mn_set = 0;
+ md_set_desc *sd;
+ mm_unit_t *mm;
+ md_set_mmown_params_t *ownpar = NULL;
+ int comp_is_mirror = 0;
+
+ /*
+ * if this component appears in another metadevice already, do
+ * NOT recover from it.
+ */
+ if (meta_check_inmeta(sp, compnp, options, 0, -1, ep) != 0)
+ return (-1);
+
+ /* set flag if dealing with a MN set */
+ if (!metaislocalset(sp)) {
+ if ((sd = metaget_setdesc(sp, ep)) == NULL) {
+ return (-1);
+ }
+ if (MD_MNSET_DESC(sd))
+ mn_set = 1;
+ }
+ /*
+ * for each watermark, build an ext_node, place on list.
+ */
+ if (meta_sp_extlist_from_wm(sp, compnp, &extlist,
+ meta_sp_cmp_by_nameseq, ep) < 0)
+ return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname));
+
+ assert(extlist != NULL);
+
+ /* count number of soft partitions */
+ for (ext = extlist;
+ ext != NULL && ext->ext_type == EXTTYP_ALLOC;
+ ext = ext->ext_next) {
+ if (ext->ext_next != NULL &&
+ ext->ext_next->ext_namep != NULL &&
+ strcmp(ext->ext_next->ext_namep->cname,
+ ext->ext_namep->cname) == 0)
+ continue;
+ num_sps++;
+ }
+
+ /* allocate array of unit structure pointers */
+ un_array = Zalloc(num_sps * sizeof (mp_unit_t *));
+
+ /*
+ * build unit structures from list of ext_nodes.
+ */
+ for (ext = extlist;
+ ext != NULL && ext->ext_type == EXTTYP_ALLOC;
+ ext = ext->ext_next) {
+ meta_sp_list_insert(ext->ext_setp, ext->ext_namep,
+ &sp_list, ext->ext_offset, ext->ext_length,
+ ext->ext_type, ext->ext_seq, ext->ext_flags,
+ meta_sp_cmp_by_nameseq);
+
+ numexts++;
+ sp_length += ext->ext_length - MD_SP_WMSIZE;
+
+ if (ext->ext_next != NULL &&
+ ext->ext_next->ext_namep != NULL &&
+ strcmp(ext->ext_next->ext_namep->cname,
+ ext->ext_namep->cname) == 0)
+ continue;
+
+ /*
+ * if we made it here, we are at a soft partition
+ * boundary in the list.
+ */
+ if (getenv(META_SP_DEBUG)) {
+ meta_sp_debug("meta_recover_from_wm: dumping wm "
+ "list:\n");
+ meta_sp_list_dump(sp_list);
+ }
+
+ assert(sp_list != NULL);
+ assert(sp_list->ext_namep != NULL);
+
+ if ((new_name = meta_sp_resolve_name_conflict(sp,
+ sp_list->ext_namep, &new_np, ep)) < 0) {
+ err = 1;
+ goto out;
+ } else if (new_name) {
+ for (sp_ext = sp_list;
+ sp_ext != NULL;
+ sp_ext = sp_ext->ext_next) {
+ /*
+ * insert into the update list for
+ * watermark update.
+ */
+ meta_sp_list_insert(sp_ext->ext_setp,
+ new_np, &update_list, sp_ext->ext_offset,
+ sp_ext->ext_length, sp_ext->ext_type,
+ sp_ext->ext_seq, EXTFLG_UPDATE,
+ meta_sp_cmp_by_offset);
+ }
+
+ }
+ if (options & MDCMD_DOIT) {
+ /* store name in namespace */
+ if (mn_set) {
+ /* send message to all nodes to return key */
+ md_mn_msg_addkeyname_t *send_params;
+ int result;
+ md_mn_result_t *resp = NULL;
+ int message_size;
+
+ message_size = sizeof (*send_params) +
+ strlen(compnp->cname) + 1;
+ send_params = Zalloc(message_size);
+ send_params->addkeyname_setno = sp->setno;
+ (void) strcpy(&send_params->addkeyname_name[0],
+ compnp->cname);
+ result = mdmn_send_message(sp->setno,
+ MD_MN_MSG_ADDKEYNAME, MD_MSGF_DEFAULT_FLAGS,
+ (char *)send_params, message_size, &resp,
+ ep);
+ Free(send_params);
+ if (resp != NULL) {
+ if (resp->mmr_exitval >= 0) {
+ compnp->key =
+ (mdkey_t)resp->mmr_exitval;
+ } else {
+ err = 1;
+ free_result(resp);
+ goto out;
+ }
+ free_result(resp);
+ }
+ if (result != 0) {
+ err = 1;
+ goto out;
+ }
+ (void) metanamelist_append(&keynlp, compnp);
+ } else {
+ if (add_key_name(sp, compnp, &keynlp,
+ ep) != 0) {
+ err = 1;
+ goto out;
+ }
+ }
+ }
+
+ /* create the unit structure */
+ if ((mp = meta_sp_createunit(
+ (new_name) ? new_np : sp_list->ext_namep, compnp,
+ sp_list, numexts, sp_length, MD_SP_RECOVER, ep)) == NULL) {
+ err = 1;
+ goto out;
+ }
+
+ if (getenv(META_SP_DEBUG)) {
+ meta_sp_debug("meta_sp_recover_from_wm: "
+ "printing newly created unit structure");
+ meta_sp_printunit(mp);
+ }
+
+ /* place in unit structure array */
+ un_array[i++] = mp;
+
+ /* free sp_list */
+ meta_sp_list_free(&sp_list);
+ sp_list = NULL;
+ numexts = 0;
+ sp_length = 0LL;
+ }
+
+ /* display configuration updates */
+ (void) printf(dgettext(TEXT_DOMAIN,
+ "The following soft partitions were found and will be added to\n"
+ "your metadevice configuration.\n"));
+ (void) printf("%5s %15s %18s\n",
+ dgettext(TEXT_DOMAIN, "Name"),
+ dgettext(TEXT_DOMAIN, "Size"),
+ dgettext(TEXT_DOMAIN, "No. of Extents"));
+ for (i = 0; i < num_sps; i++) {
+ (void) printf("%5s%lu %15llu %9d\n", "d",
+ MD_MIN2UNIT(MD_SID(un_array[i])),
+ un_array[i]->un_length, un_array[i]->un_numexts);
+ }
+
+ if (!(options & MDCMD_DOIT)) {
+ not_recovered = 1;
+ goto out;
+ }
+
+ /* ask user for confirmation */
+ (void) printf(dgettext(TEXT_DOMAIN,
+ "WARNING: You are about to add one or more soft partition\n"
+ "metadevices to your metadevice configuration. If there\n"
+ "appears to be an error in the soft partition(s) displayed\n"
+ "above, do NOT proceed with this recovery operation.\n"));
+ (void) printf(dgettext(TEXT_DOMAIN,
+ "Are you sure you want to do this (yes/no)? "));
+
+ (void) fflush(stdout);
+ if ((fgets(yesno, sizeof (yesno), stdin) == NULL) ||
+ (strlen(yesno) == 1))
+ (void) snprintf(yesno, sizeof (yesno), "%s\n",
+ dgettext(TEXT_DOMAIN, "no"));
+ yes = dgettext(TEXT_DOMAIN, "yes");
+ if (strncasecmp(yesno, yes, strlen(yesno) - 1) != 0) {
+ not_recovered = 1;
+ goto out;
+ }
+
+ /* commit records one at a time */
+ for (i = 0; i < num_sps; i++) {
+ (void) memset(&set_params, 0, sizeof (set_params));
+ set_params.mnum = MD_SID(un_array[i]);
+ set_params.size = (un_array[i])->c.un_size;
+ set_params.mdp = (uintptr_t)(un_array[i]);
+ set_params.options =
+ meta_check_devicesize(un_array[i]->un_length);
+ if (set_params.options == MD_CRO_64BIT) {
+ un_array[i]->c.un_revision = MD_64BIT_META_DEV;
+ } else {
+ un_array[i]->c.un_revision = MD_32BIT_META_DEV;
+ }
+ MD_SETDRIVERNAME(&set_params, MD_SP,
+ MD_MIN2SET(set_params.mnum));
+
+ np = metamnumname(&sp, MD_SID(un_array[i]), 0, ep);
+
+ /*
+ * If this is an MN set, send the MD_IOCSET ioctl to all nodes
+ */
+ if (mn_set) {
+ md_mn_msg_iocset_t send_params;
+ int result;
+ md_mn_result_t *resp = NULL;
+ int mess_size;
+
+ /*
+ * Calculate message size. md_mn_msg_iocset_t only
+ * contains one extent, so increment the size to
+ * include all extents
+ */
+ mess_size = sizeof (send_params) -
+ sizeof (mp_ext_t) +
+ (un_array[i]->un_numexts * sizeof (mp_ext_t));
+
+ send_params.iocset_params = set_params;
+ (void) memcpy(&send_params.unit, un_array[i],
+ sizeof (*un_array[i]) - sizeof (mp_ext_t) +
+ (un_array[i]->un_numexts * sizeof (mp_ext_t)));
+ result = mdmn_send_message(sp->setno,
+ MD_MN_MSG_IOCSET, MD_MSGF_DEFAULT_FLAGS,
+ (char *)&send_params, mess_size, &resp,
+ ep);
+ if (resp != NULL) {
+ if (resp->mmr_exitval != 0)
+ err = 1;
+ free_result(resp);
+ }
+ if (result != 0) {
+ err = 1;
+ }
+ } else {
+ if (metaioctl(MD_IOCSET, &set_params, &set_params.mde,
+ np->cname) != 0) {
+ err = 1;
+ }
+ }
+
+ if (err == 1) {
+ (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
+ "%s: Error committing record to metadb.\n"),
+ np->cname);
+ goto out;
+ }
+
+ /* note that we've committed a record */
+ if (!committed)
+ committed = 1;
+
+ /* update any watermarks that need it */
+ if (update_list != NULL) {
+ md_sp_t *msp;
+
+ /*
+ * Check to see if we're trying to create a partition
+ * on a mirror. If so we may have to enforce an
+ * ownership change before writing the watermark out.
+ */
+ if (metaismeta(compnp)) {
+ char *miscname;
+
+ miscname = metagetmiscname(compnp, ep);
+ if (miscname != NULL)
+ comp_is_mirror = (strcmp(miscname,
+ MD_MIRROR) == 0);
+ else
+ comp_is_mirror = 0;
+ }
+ /*
+ * If this is a MN set and the component is a mirror,
+ * change ownership to this node in order to write the
+ * watermarks
+ */
+ if (mn_set && comp_is_mirror) {
+ mm = (mm_unit_t *)meta_get_unit(sp, compnp, ep);
+ if (mm == NULL) {
+ err = 1;
+ goto out;
+ } else {
+ err = meta_mn_change_owner(&ownpar,
+ sp->setno,
+ meta_getminor(compnp->dev),
+ sd->sd_mn_mynode->nd_nodeid,
+ MD_MN_MM_PREVENT_CHANGE |
+ MD_MN_MM_SPAWN_THREAD);
+ if (err != 0)
+ goto out;
+ }
+ }
+
+ if ((msp = meta_get_sp(sp, np, ep)) == NULL) {
+ err = 1;
+ (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
+ "%s: Error updating extent headers.\n"),
+ np->cname);
+ goto out;
+ }
+ if (meta_sp_update_wm(sp, msp, update_list, ep) < 0) {
+ err = 1;
+ (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
+ "%s: Error updating extent headers "
+ "on disk.\n"), np->cname);
+ goto out;
+ }
+ }
+ /*
+ * If we have changed ownership earlier and prevented any
+ * ownership changes, we can now allow ownership changes
+ * again.
+ */
+ if (ownpar) {
+ (void) meta_mn_change_owner(&ownpar, sp->setno,
+ ownpar->d.mnum,
+ ownpar->d.owner,
+ MD_MN_MM_ALLOW_CHANGE | MD_MN_MM_SPAWN_THREAD);
+ }
+ }
+
+ /* update status of all soft partitions to OK */
+ minors = Zalloc(num_sps * sizeof (minor_t));
+ for (i = 0; i < num_sps; i++)
+ minors[i] = MD_SID(un_array[i]);
+
+ err = update_sp_status(sp, minors, num_sps, MD_SP_OK, mn_set, ep);
+ if (err != 0)
+ goto out;
+
+ if (options & MDCMD_PRINT)
+ (void) printf(dgettext(TEXT_DOMAIN, "%s: "
+ "Soft Partitions recovered from device.\n"),
+ compnp->cname);
+out:
+ /* free memory */
+ if (extlist != NULL)
+ meta_sp_list_free(&extlist);
+ if (sp_list != NULL)
+ meta_sp_list_free(&sp_list);
+ if (update_list != NULL)
+ meta_sp_list_free(&update_list);
+ if (un_array != NULL) {
+ for (i = 0; i < num_sps; i++)
+ Free(un_array[i]);
+ Free(un_array);
+ }
+ if (minors != NULL)
+ Free(minors);
+ if (ownpar != NULL)
+ Free(ownpar);
+ (void) fflush(stdout);
+
+ if ((keynlp != NULL) && (committed != 1)) {
+ /*
+ * if we haven't committed any softparts, either because of an
+ * error or because the user decided not to proceed, delete
+ * namelist key for the component
+ */
+ if (mn_set) {
+ mdnamelist_t *p;
+
+ for (p = keynlp; (p != NULL); p = p->next) {
+ mdname_t *np = p->namep;
+ md_mn_msg_delkeyname_t send_params;
+ md_mn_result_t *resp = NULL;
+
+ send_params.delkeyname_dev = np->dev;
+ send_params.delkeyname_setno = sp->setno;
+ send_params.delkeyname_key = np->key;
+ (void) mdmn_send_message(sp->setno,
+ MD_MN_MSG_DELKEYNAME, MD_MSGF_DEFAULT_FLAGS,
+ (char *)&send_params, sizeof (send_params),
+ &resp, ep);
+ if (resp != NULL) {
+ free_result(resp);
+ }
+ }
+ } else {
+ (void) del_key_names(sp, keynlp, NULL);
+ }
+ }
+
+ metafreenamelist(keynlp);
+
+ if (err)
+ return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname));
+
+ if (not_recovered)
+ if (options & MDCMD_PRINT)
+ (void) printf(dgettext(TEXT_DOMAIN, "%s: "
+ "Soft Partitions NOT recovered from device.\n"),
+ compnp->cname);
+ return (0);
+}
+
+/*
+ * FUNCTION: meta_sp_recover_from_unit()
+ * INPUT: sp - name of set we are recovering in
+ * compnp - name of component we are recovering from
+ * options - metarecover options
+ * OUTPUT: ep - return error pointer
+ * RETURNS: int - 0 - success, -1 - error
+ * PURPOSE: update watermarks to match metadb records. begin by getting
+ * a namelist representing all soft partitions on the specified
+ * component. then, build an extlist representing the soft
+ * partitions, filling in the freespace extents. notify user
+ * of changes, place all soft partitions into the "recovering"
+ * state and update the watermarks. finally, return all soft
+ * partitions to the "OK" state.
+ */
+static int
+meta_sp_recover_from_unit(
+ mdsetname_t *sp,
+ mdname_t *compnp,
+ mdcmdopts_t options,
+ md_error_t *ep
+)
+{
+ mdnamelist_t *spnlp = NULL;
+ mdnamelist_t *nlp = NULL;
+ sp_ext_node_t *ext = NULL;
+ sp_ext_node_t *extlist = NULL;
+ int count;
+ char yesno[255];
+ char *yes;
+ int rval = 0;
+ minor_t *minors = NULL;
+ int i;
+ md_sp_t *msp;
+ md_set_desc *sd;
+ bool_t mn_set = 0;
+ daddr_t start_block;
+
+ count = meta_sp_get_by_component(sp, compnp, &spnlp, 0, ep);
+ if (count <= 0)
+ return (-1);
+
+ /* set flag if dealing with a MN set */
+ if (!metaislocalset(sp)) {
+ if ((sd = metaget_setdesc(sp, ep)) == NULL) {
+ return (-1);
+ }
+ if (MD_MNSET_DESC(sd))
+ mn_set = 1;
+ }
+ /*
+ * Save the XDR unit structure for one of the soft partitions;
+ * we'll use this later to provide metadevice context to
+ * update the watermarks so the device can be resolved by
+ * devid instead of dev_t.
+ */
+ if ((msp = meta_get_sp(sp, spnlp->namep, ep)) == NULL) {
+ metafreenamelist(spnlp);
+ return (-1);
+ }
+
+ if ((start_block = meta_sp_get_start(sp, compnp, ep)) ==
+ MD_DISKADDR_ERROR) {
+ return (-1);
+ }
+
+ meta_sp_list_insert(NULL, NULL, &extlist, 0ULL, start_block,
+ EXTTYP_RESERVED, 0, 0, meta_sp_cmp_by_offset);
+ meta_sp_list_insert(NULL, NULL, &extlist,
+ metagetsize(compnp, ep) - MD_SP_WMSIZE, MD_SP_WMSIZE,
+ EXTTYP_END, 0, EXTFLG_UPDATE, meta_sp_cmp_by_offset);
+
+ if (meta_sp_extlist_from_namelist(sp, spnlp, &extlist, ep) == -1) {
+ metafreenamelist(spnlp);
+ return (-1);
+ }
+
+ assert(extlist != NULL);
+ if ((options & MDCMD_VERBOSE) != 0) {
+ (void) printf(dgettext(TEXT_DOMAIN,
+ "Updating extent headers on device %s from metadb.\n\n"),
+ compnp->cname);
+ (void) printf(dgettext(TEXT_DOMAIN,
+ "The following extent headers will be written:\n"));
+ meta_sp_display_exthdr();
+ }
+
+ meta_sp_list_freefill(&extlist, metagetsize(compnp, ep));
+
+ for (ext = extlist; ext != NULL; ext = ext->ext_next) {
+
+ /* mark every node for updating except the reserved space */
+ if (ext->ext_type != EXTTYP_RESERVED) {
+ ext->ext_flags |= EXTFLG_UPDATE;
+
+ /* print extent information */
+ if ((options & MDCMD_VERBOSE) != 0)
+ meta_sp_display_ext(ext);
+ }
+ }
+
+ /* request verification and then update all watermarks */
+ if ((options & MDCMD_DOIT) != 0) {
+
+ (void) printf(dgettext(TEXT_DOMAIN,
+ "\nWARNING: You are about to overwrite portions of %s\n"
+ "with soft partition metadata. The extent headers will be\n"
+ "written to match the existing metadb configuration. If\n"
+ "the device was not previously setup with this\n"
+ "configuration, data loss may result.\n\n"),
+ compnp->cname);
+ (void) printf(dgettext(TEXT_DOMAIN,
+ "Are you sure you want to do this (yes/no)? "));
+
+ (void) fflush(stdout);
+ if ((fgets(yesno, sizeof (yesno), stdin) == NULL) ||
+ (strlen(yesno) == 1))
+ (void) snprintf(yesno, sizeof (yesno),
+ "%s\n", dgettext(TEXT_DOMAIN, "no"));
+ yes = dgettext(TEXT_DOMAIN, "yes");
+ if (strncasecmp(yesno, yes, strlen(yesno) - 1) == 0) {
+ /* place soft partitions into recovering state */
+ minors = Zalloc(count * sizeof (minor_t));
+ for (nlp = spnlp, i = 0;
+ nlp != NULL && i < count;
+ nlp = nlp->next, i++) {
+ assert(nlp->namep != NULL);
+ minors[i] = meta_getminor(nlp->namep->dev);
+ }
+ if (update_sp_status(sp, minors, count,
+ MD_SP_RECOVER, mn_set, ep) != 0) {
+ rval = -1;
+ goto out;
+ }
+
+ /* update the watermarks */
+ if (meta_sp_update_wm(sp, msp, extlist, ep) < 0) {
+ rval = -1;
+ goto out;
+ }
+
+ if (options & MDCMD_PRINT) {
+ (void) printf(dgettext(TEXT_DOMAIN, "%s: "
+ "Soft Partitions recovered from metadb\n"),
+ compnp->cname);
+ }
+
+ /* return soft partitions to the OK state */
+ if (update_sp_status(sp, minors, count,
+ MD_SP_OK, mn_set, ep) != 0) {
+ rval = -1;
+ goto out;
+ }
+
+ rval = 0;
+ goto out;
+ }
+ }
+
+ if (options & MDCMD_PRINT) {
+ (void) printf(dgettext(TEXT_DOMAIN,
+ "%s: Soft Partitions NOT recovered from metadb\n"),
+ compnp->cname);
+ }
+
+out:
+ if (minors != NULL)
+ Free(minors);
+ metafreenamelist(spnlp);
+ meta_sp_list_free(&extlist);
+ (void) fflush(stdout);
+ return (rval);
+}
+
+
+/*
+ * FUNCTION: meta_sp_update_abr()
+ * INPUT: sp - name of set we are recovering in
+ * OUTPUT: ep - return error pointer
+ * RETURNS: int - 0 - success, -1 - error
+ * PURPOSE: update the ABR state for all soft partitions in the set. This
+ * is called when joining a set. It sends a message to the master
+ * node for each soft partition to get the value of tstate and
+ * then sets ABR ,if required, by opening the sp, setting ABR
+ * and then closing the sp. This approach is taken rather that
+ * just issuing the MD_MN_SET_CAP ioctl, in order to deal with
+ * the case when we have another node simultaneously unsetting ABR.
+ */
+int
+meta_sp_update_abr(
+ mdsetname_t *sp,
+ md_error_t *ep
+)
+{
+ mdnamelist_t *devnlp = NULL;
+ mdnamelist_t *p;
+ mdname_t *devnp = NULL;
+ md_unit_t *un;
+ char fname[MAXPATHLEN];
+ int mnum, fd;
+ volcap_t vc;
+ uint_t tstate;
+
+
+ if (meta_get_sp_names(sp, &devnlp, 0, ep) < 0) {
+ return (-1);
+ }
+
+ /* Exit if no soft partitions in this set */
+ if (devnlp == NULL)
+ return (0);
+
+ /* For each soft partition */
+ for (p = devnlp; (p != NULL); p = p->next) {
+ devnp = p->namep;
+
+ /* check if this is a top level metadevice */
+ if ((un = meta_get_mdunit(sp, devnp, ep)) == NULL)
+ goto out;
+ if (MD_HAS_PARENT(MD_PARENT(un))) {
+ Free(un);
+ continue;
+ }
+ Free(un);
+
+ /* Get tstate from Master */
+ if (meta_mn_send_get_tstate(devnp->dev, &tstate, ep) != 0) {
+ mdname_t *np;
+ np = metamnumname(&sp, meta_getminor(devnp->dev), 0,
+ ep);
+ if (np) {
+ md_perror(dgettext(TEXT_DOMAIN,
+ "Unable to get tstate for %s"), np->cname);
+ }
+ continue;
+ }
+ /* If not set on the master, nothing to do */
+ if (!(tstate & MD_ABR_CAP))
+ continue;
+
+ mnum = meta_getminor(devnp->dev);
+ (void) snprintf(fname, MAXPATHLEN, "/dev/md/%s/rdsk/d%u",
+ sp->setname, (unsigned)MD_MIN2UNIT(mnum));
+ if ((fd = open(fname, O_RDWR, 0)) < 0) {
+ md_perror(dgettext(TEXT_DOMAIN,
+ "Could not open device %s"), fname);
+ continue;
+ }
+
+ /* Set ABR state */
+ vc.vc_info = 0;
+ vc.vc_set = 0;
+ if (ioctl(fd, DKIOCGETVOLCAP, &vc) < 0) {
+ (void) close(fd);
+ continue;
+ }
+
+ vc.vc_set = DKV_ABR_CAP;
+ if (ioctl(fd, DKIOCSETVOLCAP, &vc) < 0) {
+ (void) close(fd);
+ goto out;
+ }
+
+ (void) close(fd);
+ }
+ metafreenamelist(devnlp);
+ return (0);
+out:
+ metafreenamelist(devnlp);
+ return (-1);
+}
+
+/*
+ * FUNCTION: meta_mn_sp_update_abr()
+ * INPUT: arg - Given set.
+ * PURPOSE: update the ABR state for all soft partitions in the set by
+ * forking a process to call meta_sp_update_abr()
+ * This function is only called via rpc.metad when adding a node
+ * to a set, ie this node is beong joined to the set by another
+ * node.
+ */
+void *
+meta_mn_sp_update_abr(void *arg)
+{
+ set_t setno = *((set_t *)arg);
+ mdsetname_t *sp;
+ md_error_t mde = mdnullerror;
+ int fval;
+
+ /* should have a set */
+ assert(setno != NULL);
+
+ if ((sp = metasetnosetname(setno, &mde)) == NULL) {
+ mde_perror(&mde, "");
+ return (NULL);
+ }
+
+ if (!(meta_is_mn_set(sp, &mde))) {
+ mde_perror(&mde, "");
+ return (NULL);
+ }
+
+ /* fork a process */
+ if ((fval = md_daemonize(sp, &mde)) != 0) {
+ /*
+ * md_daemonize will fork off a process. The is the
+ * parent or error.
+ */
+ if (fval > 0) {
+ return (NULL);
+ }
+ mde_perror(&mde, "");
+ return (NULL);
+ }
+ /*
+ * Child process should never return back to rpc.metad, but
+ * should exit.
+ * Flush all internally cached data inherited from parent process
+ * since cached data will be cleared when parent process RPC request
+ * has completed (which is possibly before this child process
+ * can complete).
+ * Child process can retrieve and cache its own copy of data from
+ * rpc.metad that won't be changed by the parent process.
+ *
+ * Reset md_in_daemon since this child will be a client of rpc.metad
+ * not part of the rpc.metad daemon itself.
+ * md_in_daemon is used by rpc.metad so that libmeta can tell if
+ * this thread is rpc.metad or any other thread. (If this thread
+ * was rpc.metad it could use some short circuit code to get data
+ * directly from rpc.metad instead of doing an RPC call to rpc.metad).
+ */
+ md_in_daemon = 0;
+ metaflushsetname(sp);
+ sr_cache_flush_setno(setno);
+ if ((sp = metasetnosetname(setno, &mde)) == NULL) {
+ mde_perror(&mde, "");
+ md_exit(sp, 1);
+ }
+
+
+ /*
+ * Closing stdin/out/err here.
+ */
+ (void) close(0);
+ (void) close(1);
+ (void) close(2);
+ assert(fval == 0);
+
+ (void) meta_sp_update_abr(sp, &mde);
+
+ md_exit(sp, 0);
+ /*NOTREACHED*/
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_stat.c b/usr/src/lib/lvm/libmeta/common/meta_stat.c
new file mode 100644
index 0000000000..90844f9148
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_stat.c
@@ -0,0 +1,103 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 1992, 1993, 1994, 2000 by Sun Microsystems, Inc.
+ * All rights reserved.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * Caching stat function
+ */
+
+#include <meta.h>
+
+#define MD_NUM_STAT_HEAD 16
+
+struct statcache {
+ struct statcache *sc_next;
+ struct stat sc_stat;
+ char *sc_filename;
+};
+
+static struct statcache *statcache_head[MD_NUM_STAT_HEAD] =
+ {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+
+int
+meta_stat(const char *filename, struct stat *sbp)
+{
+ struct statcache *scp;
+ int hash;
+ char *cp;
+
+ hash = 0;
+ for (cp = (char *)filename; *cp != 0; cp++)
+ hash += *cp;
+
+ hash &= 0xf;
+
+ for (scp = statcache_head[hash]; scp != NULL; scp = scp->sc_next)
+ if (strcmp(filename, scp->sc_filename) == 0)
+ break;
+ if (scp) {
+ (void) memcpy((caddr_t)sbp, (caddr_t)&scp->sc_stat,
+ sizeof (*sbp));
+ return (0);
+ }
+ if (stat(filename, sbp) != 0)
+ return (-1);
+
+ if (!S_ISBLK(sbp->st_mode) && !S_ISCHR(sbp->st_mode))
+ return (-1);
+
+ scp = (struct statcache *)malloc(sizeof (*scp));
+ if (scp != NULL) {
+ (void) memcpy((caddr_t)&scp->sc_stat, (caddr_t)sbp,
+ sizeof (*sbp));
+ scp->sc_filename = strdup(filename);
+ if (scp->sc_filename == NULL) {
+ free((char *)scp);
+ return (0);
+ }
+ scp->sc_next = statcache_head[hash];
+ statcache_head[hash] = scp;
+ }
+ return (0);
+}
+
+void
+metaflushstatcache(void)
+{
+ struct statcache *p, *n;
+ int i;
+
+ for (i = 0; i < MD_NUM_STAT_HEAD; i++) {
+ for (p = statcache_head[i], n = NULL; p != NULL; p = n) {
+ n = p->sc_next;
+ Free(p->sc_filename);
+ Free(p);
+ }
+ statcache_head[i] = NULL;
+ }
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_stripe.c b/usr/src/lib/lvm/libmeta/common/meta_stripe.c
new file mode 100644
index 0000000000..237afcd60b
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_stripe.c
@@ -0,0 +1,2496 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * Just in case we're not in a build environment, make sure that
+ * TEXT_DOMAIN gets set to something.
+ */
+#if !defined(TEXT_DOMAIN)
+#define TEXT_DOMAIN "SYS_TEST"
+#endif
+
+/*
+ * stripe operations
+ */
+
+#include <limits.h>
+#include <stdlib.h>
+#include <meta.h>
+#include <sys/lvm/md_stripe.h>
+#include <sys/lvm/md_convert.h>
+
+#define QUOTE(x) #x
+#define VAL2STR(x) QUOTE(x)
+
+/*
+ * replace stripe/concat
+ */
+int
+meta_stripe_replace(
+ mdsetname_t *sp,
+ mdname_t *stripenp,
+ mdname_t *oldnp,
+ mdname_t *newnp,
+ mdcmdopts_t options,
+ md_error_t *ep
+)
+{
+ replace_params_t params;
+ md_dev64_t old_dev,
+ new_dev;
+ diskaddr_t new_start_blk,
+ new_end_blk,
+ label,
+ size,
+ start_blk;
+
+ /* should have same set */
+ assert(sp != NULL);
+ assert(sp->setno == MD_MIN2SET(meta_getminor(stripenp->dev)));
+
+ new_dev = newnp->dev;
+ new_start_blk = newnp->start_blk;
+ new_end_blk = newnp->end_blk;
+
+ meta_invalidate_name(stripenp);
+
+ /* the old device binding is now established */
+ if ((old_dev = oldnp->dev) == NODEV64)
+ return (mdsyserror(ep, ENODEV, oldnp->cname));
+
+ if (((strcmp(oldnp->rname, newnp->rname) == 0) &&
+ (old_dev != new_dev))) {
+ newnp->dev = new_dev;
+ newnp->start_blk = new_start_blk;
+ newnp->end_blk = new_end_blk;
+ }
+
+ if ((size = metagetsize(newnp, ep)) == MD_DISKADDR_ERROR)
+ return (-1);
+ if ((label = metagetlabel(newnp, ep)) == MD_DISKADDR_ERROR)
+ return (-1);
+ if ((start_blk = metagetstart(sp, newnp, ep)) == MD_DISKADDR_ERROR)
+ return (-1);
+ if (start_blk >= size) {
+ (void) mdsyserror(ep, ENOSPC, newnp->cname);
+ return (-1);
+ }
+
+ /* In dryrun mode (DOIT not set) we must not alter the mddb */
+ if (options & MDCMD_DOIT) {
+ if (add_key_name(sp, newnp, NULL, ep) != 0)
+ return (-1);
+ }
+
+ /*
+ * There is no need to call meta_fixdevid() here as this function is
+ * only called by the metareplace -c command which actually does
+ * nothing (in terms of a resync) and thus does nothing with the devid.
+ */
+
+ (void) memset(&params, 0, sizeof (params));
+ params.mnum = meta_getminor(stripenp->dev);
+ MD_SETDRIVERNAME(&params, MD_STRIPE, sp->setno);
+
+ params.cmd = REPLACE_COMP;
+ params.old_dev = old_dev;
+ params.new_dev = new_dev;
+ params.new_key = newnp->key;
+ params.start_blk = newnp->start_blk;
+ params.number_blks = size;
+ /* Is this just a dryrun ? */
+ if ((options & MDCMD_DOIT) == 0) {
+ params.options |= MDIOCTL_DRYRUN;
+ }
+ if (label == 0)
+ params.has_label = 0;
+ else
+ params.has_label = 1;
+ if (metaioctl(MD_IOCREPLACE, &params, &params.mde, NULL) != 0) {
+ if (options & MDCMD_DOIT)
+ (void) del_key_name(sp, newnp, ep);
+ return (mdstealerror(ep, &params.mde));
+ }
+ meta_invalidate_name(oldnp);
+ meta_invalidate_name(newnp);
+ meta_invalidate_name(stripenp);
+
+ if (options & MDCMD_PRINT) {
+ (void) printf(dgettext(TEXT_DOMAIN,
+ "%s: device %s is replaced with %s\n"),
+ stripenp->cname, oldnp->cname, newnp->cname);
+
+ }
+ return (0);
+}
+
+
+/*
+ * FUNCTION: meta_get_stripe_names()
+ * INPUT: sp - the set name to get stripes from
+ * options - options from the command line
+ * OUTPUT: nlpp - list of all stripe names
+ * ep - return error pointer
+ * RETURNS: int - -1 if error, 0 success
+ * PURPOSE: returns a list of all stripes in the metadb
+ * for all devices in the specified set
+ */
+int
+meta_get_stripe_names(
+ mdsetname_t *sp,
+ mdnamelist_t **nlpp,
+ int options,
+ md_error_t *ep
+)
+{
+ return (meta_get_names(MD_STRIPE, sp, nlpp, options, ep));
+}
+
+/*
+ * free stripe
+ */
+void
+meta_free_stripe(
+ md_stripe_t *stripep
+)
+{
+ uint_t row;
+
+ for (row = 0; (row < stripep->rows.rows_len); ++row) {
+ md_row_t *rp = &stripep->rows.rows_val[row];
+
+ if (rp->comps.comps_val != NULL) {
+ assert(rp->comps.comps_len > 0);
+ Free(rp->comps.comps_val);
+ }
+ }
+ if (stripep->rows.rows_val != NULL) {
+ assert(stripep->rows.rows_len > 0);
+ Free(stripep->rows.rows_val);
+ }
+ Free(stripep);
+}
+
+
+/*
+ * get stripe (common)
+ */
+md_stripe_t *
+meta_get_stripe_common(
+ mdsetname_t *sp,
+ mdname_t *stripenp,
+ int fast,
+ md_error_t *ep
+)
+{
+ mddrivename_t *dnp = stripenp->drivenamep;
+ char *miscname;
+ ms_unit_t *ms;
+ md_stripe_t *stripep;
+ uint_t row;
+
+ /* must have set */
+ assert(sp != NULL);
+ assert(sp->setno == MD_MIN2SET(meta_getminor(stripenp->dev)));
+
+ /* short circuit */
+ if (dnp->unitp != NULL) {
+ assert(dnp->unitp->type == MD_DEVICE);
+ return ((md_stripe_t *)dnp->unitp);
+ }
+
+ /* get miscname and unit */
+ if ((miscname = metagetmiscname(stripenp, ep)) == NULL)
+ return (NULL);
+ if (strcmp(miscname, MD_STRIPE) != 0) {
+ (void) mdmderror(ep, MDE_NOT_STRIPE,
+ meta_getminor(stripenp->dev), stripenp->cname);
+ return (NULL);
+ }
+ if ((ms = (ms_unit_t *)meta_get_mdunit(sp, stripenp, ep)) == NULL)
+ return (NULL);
+ assert(ms->c.un_type == MD_DEVICE);
+
+ /* allocate stripe */
+ stripep = Zalloc(sizeof (*stripep));
+
+ /* allocate rows */
+ assert(ms->un_nrows > 0);
+ stripep->rows.rows_len = ms->un_nrows;
+ stripep->rows.rows_val = Zalloc(stripep->rows.rows_len *
+ sizeof (*stripep->rows.rows_val));
+
+ /* get common info */
+ stripep->common.namep = stripenp;
+ stripep->common.type = ms->c.un_type;
+ stripep->common.state = ms->c.un_status;
+ stripep->common.capabilities = ms->c.un_capabilities;
+ stripep->common.parent = ms->c.un_parent;
+ stripep->common.size = ms->c.un_total_blocks;
+ stripep->common.user_flags = ms->c.un_user_flags;
+ stripep->common.revision = ms->c.un_revision;
+
+ /* get options */
+ if ((ms->un_hsp_id != MD_HSP_NONE) &&
+ ((stripep->hspnamep = metahsphspname(&sp, ms->un_hsp_id,
+ ep)) == NULL)) {
+ goto out;
+ }
+
+ /* get rows */
+ for (row = 0; (row < ms->un_nrows); ++row) {
+ struct ms_row *mdr = &ms->un_row[row];
+ struct ms_comp *mdcomp = (void *)&((char *)ms)[ms->un_ocomp];
+ md_row_t *rp = &stripep->rows.rows_val[row];
+ uint_t comp, c;
+
+ /* get interlace */
+ rp->interlace = mdr->un_interlace;
+
+ /* allocate comps */
+ assert(mdr->un_ncomp > 0);
+ rp->comps.comps_len = mdr->un_ncomp;
+ rp->comps.comps_val = Zalloc(rp->comps.comps_len *
+ sizeof (*rp->comps.comps_val));
+
+ /* get components */
+ for (comp = 0, c = mdr->un_icomp; (comp < mdr->un_ncomp);
+ ++comp, ++c) {
+ struct ms_comp *mdc = &mdcomp[c];
+ diskaddr_t comp_start_blk = mdc->un_start_block;
+ md_comp_t *cp = &rp->comps.comps_val[comp];
+
+ /* get the component name */
+ cp->compnamep = metakeyname(&sp, mdc->un_key, fast, ep);
+ if (cp->compnamep == NULL)
+ goto out;
+
+ /* if hotspared */
+ if (mdc->un_mirror.ms_hs_id != 0) {
+ diskaddr_t hs_start_blk = mdc->un_start_block;
+
+ /* get the hotspare name */
+ cp->hsnamep = metakeyname(&sp,
+ mdc->un_mirror.ms_hs_key, fast, ep);
+ if (cp->hsnamep == NULL)
+ goto out;
+
+ if (getenv("META_DEBUG_START_BLK") != NULL) {
+ if (metagetstart(sp, cp->hsnamep,
+ ep) == MD_DISKADDR_ERROR)
+ mdclrerror(ep);
+
+ if ((cp->hsnamep->start_blk == 0) &&
+ (hs_start_blk != 0))
+ md_eprintf(dgettext(TEXT_DOMAIN,
+ "%s: suspected bad start block,"
+ " seems labelled [stripe/hs]\n"),
+ cp->hsnamep->cname);
+
+ if ((cp->hsnamep->start_blk > 0) &&
+ (hs_start_blk == 0) &&
+ ! ((row == 0) && (comp == 0)))
+ md_eprintf(dgettext(TEXT_DOMAIN,
+ "%s: suspected bad start block, "
+ "seems unlabelled [stripe/hs]\n"),
+ cp->hsnamep->cname);
+ }
+ /* override any start_blk */
+ cp->hsnamep->start_blk = hs_start_blk;
+
+ /* get the right component start_blk */
+ comp_start_blk = mdc->un_mirror.ms_orig_blk;
+ } else {
+ if (getenv("META_DEBUG_START_BLK") != NULL) {
+ if (metagetstart(sp, cp->compnamep,
+ ep) == MD_DISKADDR_ERROR)
+ mdclrerror(ep);
+
+ if ((cp->compnamep->start_blk == 0) &&
+ (comp_start_blk != 0))
+ md_eprintf(dgettext(TEXT_DOMAIN,
+ "%s: suspected bad start block,"
+ " seems labelled [stripe]"),
+ cp->compnamep->cname);
+
+ if ((cp->compnamep->start_blk > 0) &&
+ (comp_start_blk == 0) &&
+ ! ((row == 0) && (comp == 0)))
+ md_eprintf(dgettext(TEXT_DOMAIN,
+ "%s: suspected bad start block, "
+ "seems unlabelled [stripe]"),
+ cp->compnamep->cname);
+ }
+ }
+
+ /* override any start_blk */
+ cp->compnamep->start_blk = comp_start_blk;
+
+ /* get state */
+ cp->state = mdc->un_mirror.ms_state;
+
+ /* get time of last state change */
+ cp->timestamp = mdc->un_mirror.ms_timestamp;
+
+ /* get lasterr count */
+ cp->lasterrcnt = mdc->un_mirror.ms_lasterrcnt;
+ }
+ }
+
+ /* cleanup, return success */
+ Free(ms);
+ dnp->unitp = (md_common_t *)stripep;
+ return (stripep);
+
+ /* cleanup, return error */
+out:
+ Free(ms);
+ meta_free_stripe(stripep);
+ return (NULL);
+}
+
+/*
+ * get stripe
+ */
+md_stripe_t *
+meta_get_stripe(
+ mdsetname_t *sp,
+ mdname_t *stripenp,
+ md_error_t *ep
+)
+{
+ return (meta_get_stripe_common(sp, stripenp, 0, ep));
+}
+
+/*
+ * check stripe for dev
+ */
+static int
+in_stripe(
+ mdsetname_t *sp,
+ mdname_t *stripenp,
+ mdname_t *np,
+ diskaddr_t slblk,
+ diskaddr_t nblks,
+ md_error_t *ep
+)
+{
+ md_stripe_t *stripep;
+ uint_t row;
+
+ /* should be in the same set */
+ assert(sp != NULL);
+
+ /* get unit */
+ if ((stripep = meta_get_stripe(sp, stripenp, ep)) == NULL)
+ return (-1);
+
+ /* look in rows */
+ for (row = 0; (row < stripep->rows.rows_len); ++row) {
+ md_row_t *rp = &stripep->rows.rows_val[row];
+ uint_t comp;
+
+ /* look in columns */
+ for (comp = 0; (comp < rp->comps.comps_len); ++comp) {
+ md_comp_t *cp = &rp->comps.comps_val[comp];
+ mdname_t *compnp = cp->compnamep;
+ diskaddr_t comp_sblk;
+ int err;
+
+ /* check same drive since metagetstart() can fail */
+ if ((err = meta_check_samedrive(np, compnp, ep)) < 0)
+ return (-1);
+ else if (err == 0)
+ continue;
+
+ /* check overlap */
+ if ((comp_sblk = metagetstart(sp, compnp, ep)) ==
+ MD_DISKADDR_ERROR)
+ return (-1);
+ if (meta_check_overlap(stripenp->cname, np,
+ slblk, nblks, compnp, comp_sblk, -1,
+ ep) != 0) {
+ return (-1);
+ }
+ }
+ }
+
+ /* return success */
+ return (0);
+}
+
+/*
+ * check to see if we're in a stripe
+ */
+int
+meta_check_instripe(
+ mdsetname_t *sp,
+ mdname_t *np,
+ diskaddr_t slblk,
+ diskaddr_t nblks,
+ md_error_t *ep
+)
+{
+ mdnamelist_t *stripenlp = NULL;
+ mdnamelist_t *p;
+ int rval = 0;
+
+ /* should have a set */
+ assert(sp != NULL);
+
+ /* for each stripe */
+ if (meta_get_stripe_names(sp, &stripenlp, 0, ep) < 0)
+ return (-1);
+ for (p = stripenlp; (p != NULL); p = p->next) {
+ mdname_t *stripenp = p->namep;
+
+ /* check stripe */
+ if (in_stripe(sp, stripenp, np, slblk, nblks, ep) != 0) {
+ rval = -1;
+ break;
+ }
+ }
+
+ /* cleanup, return success */
+ metafreenamelist(stripenlp);
+ return (rval);
+}
+
+/*
+ * check component
+ */
+int
+meta_check_component(
+ mdsetname_t *sp,
+ mdname_t *np,
+ int force,
+ md_error_t *ep
+)
+{
+ mdchkopts_t options = (MDCHK_ALLOW_MDDB);
+ md_common_t *mdp;
+
+ /*
+ * See if we are a soft partition: meta_sp_issp() returns 0 if
+ * np points to a soft partition, so the if and else clauses
+ * here represent "not a soft partition" and "soft partition,"
+ * respectively.
+ */
+ if (meta_sp_issp(sp, np, ep) != 0) {
+ /* make sure we have a disk */
+ if (metachkcomp(np, ep) != 0)
+ return (-1);
+ } else {
+ /* make sure soft partition can parent & doesn't have parent */
+ if ((mdp = meta_get_unit(sp, np, ep)) == NULL)
+ return (mdmderror(ep, MDE_INVAL_UNIT, NULL,
+ np->cname));
+ if (mdp->capabilities == MD_CANT_PARENT)
+ return (mdmderror(ep, MDE_INVAL_UNIT, NULL,
+ np->cname));
+ if (MD_HAS_PARENT(mdp->parent)) {
+ mdname_t *pnp;
+
+ pnp = metamnumname(&sp, mdp->parent, 0, ep);
+ if (pnp == NULL) {
+ return (-1);
+ }
+
+ return (mduseerror(ep, MDE_ALREADY, np->dev,
+ pnp->cname, np->cname));
+ }
+ }
+
+ /* check to ensure that it is not already in use */
+ if ((! force) &&
+ (meta_check_inuse(sp, np, MDCHK_INUSE, ep) != 0)) {
+ return (-1);
+ }
+
+ /* make sure it is in the set */
+ if (meta_check_inset(sp, np, ep) != 0)
+ return (-1);
+
+ /* make sure its not in a metadevice */
+ if (meta_check_inmeta(sp, np, options, 0, -1, ep) != 0)
+ return (-1);
+
+ /* return success */
+ return (0);
+}
+
+/*
+ * print stripe
+ */
+static int
+stripe_print(
+ md_stripe_t *stripep,
+ char *fname,
+ FILE *fp,
+ mdprtopts_t options,
+ md_error_t *ep
+)
+{
+ uint_t row;
+ int rval = -1;
+
+ if (options & PRINT_LARGEDEVICES) {
+ if (stripep->common.revision != MD_64BIT_META_DEV) {
+ rval = 0;
+ goto out;
+ }
+ }
+
+ /* print name and num rows */
+ if (fprintf(fp, "%s %u",
+ stripep->common.namep->cname, stripep->rows.rows_len) == EOF)
+ goto out;
+
+ /* print rows */
+ for (row = 0; (row < stripep->rows.rows_len); ++row) {
+ md_row_t *rp = &stripep->rows.rows_val[row];
+ uint_t comp;
+
+ /* print num components */
+ if (fprintf(fp, " %u", rp->comps.comps_len) == EOF)
+ goto out;
+
+ /* print components */
+ for (comp = 0; (comp < rp->comps.comps_len); ++comp) {
+ md_comp_t *cp = &rp->comps.comps_val[comp];
+
+ /* print component */
+ /*
+ * If the path is our standard /dev/rdsk or /dev/md/rdsk
+ * then just print out the cxtxdxsx or the dx, metainit
+ * will assume the default, otherwise we need the full
+ * pathname to make sure this works as we intend.
+ */
+ if ((strstr(cp->compnamep->rname, "/dev/rdsk") ==
+ NULL) && (strstr(cp->compnamep->rname,
+ "/dev/md/rdsk") == NULL) &&
+ (strstr(cp->compnamep->rname, "/dev/td/") ==
+ NULL)) {
+ /* not standard path, print full pathname */
+ if (fprintf(fp, " %s", cp->compnamep->rname)
+ == EOF)
+ goto out;
+ } else {
+ /* standard path */
+ if (fprintf(fp, " %s", cp->compnamep->cname)
+ == EOF)
+ goto out;
+ }
+ }
+
+ /* print interlace */
+ if (rp->comps.comps_len > 1)
+ if (fprintf(fp, " -i %lldb", rp->interlace) == EOF)
+ goto out;
+
+ /* print continuation */
+ if (row != (stripep->rows.rows_len - 1))
+ if (fprintf(fp, " \\\n\t") == EOF)
+ goto out;
+ }
+
+ /* print hotspare name */
+ if (stripep->hspnamep != NULL)
+ if (fprintf(fp, " -h %s", stripep->hspnamep->hspname) == EOF)
+ goto out;
+
+ /* terminate last line */
+ if (fprintf(fp, "\n") == EOF)
+ goto out;
+
+ /* success */
+ rval = 0;
+
+ /* cleanup, return error */
+out:
+ if (rval != 0)
+ (void) mdsyserror(ep, errno, fname);
+ return (rval);
+}
+
+/*
+ * convert component state to name
+ */
+char *
+comp_state_to_name(
+ md_comp_t *mdcp,
+ md_timeval32_t *tvp,
+ uint_t tstate /* Errored tstate flags */
+)
+{
+ comp_state_t state = mdcp->state;
+
+ /* grab time */
+ if (tvp != NULL)
+ *tvp = mdcp->timestamp;
+
+ if (tstate != 0) {
+ return (dgettext(TEXT_DOMAIN, "Unavailable"));
+ }
+
+ /* return state */
+ switch (state) {
+ case CS_OKAY:
+ return (dgettext(TEXT_DOMAIN, "Okay"));
+ case CS_ERRED:
+ return (dgettext(TEXT_DOMAIN, "Maintenance"));
+ case CS_LAST_ERRED:
+ return (dgettext(TEXT_DOMAIN, "Last Erred"));
+ case CS_RESYNC:
+ return (dgettext(TEXT_DOMAIN, "Resyncing"));
+ default:
+ return (dgettext(TEXT_DOMAIN, "invalid"));
+ }
+}
+
+/*
+ * print subdevice stripe row
+ */
+static int
+subdev_row_report(
+ mdsetname_t *sp,
+ md_row_t *rp,
+ char *fname,
+ FILE *fp,
+ mdprtopts_t options,
+ uint_t top_tstate, /* Errored tstate flags */
+ md_error_t *ep
+)
+{
+ uint_t comp;
+ int rval = -1;
+ ddi_devid_t dtp;
+ int len = 0;
+
+
+ /*
+ * building a format string on the fly that will be used
+ * in fprintf. This is to allow really really long ctd names
+ */
+ for (comp = 0; (comp < rp->comps.comps_len); ++comp) {
+ md_comp_t *cp = &rp->comps.comps_val[comp];
+ char *cname = cp->compnamep->cname;
+
+ len = max(len, strlen(cname));
+ }
+
+ len = max(len, strlen(dgettext(TEXT_DOMAIN, "Device")));
+ len += 2;
+ /* print header */
+ if (! (options & PRINT_TIMES)) {
+ if (fprintf(fp,
+ "\t%-*.*s %-12.12s %5.5s %12.12s %5.5s %s\n",
+ len, len,
+ dgettext(TEXT_DOMAIN, "Device"),
+ dgettext(TEXT_DOMAIN, "Start Block"),
+ dgettext(TEXT_DOMAIN, "Dbase"),
+ dgettext(TEXT_DOMAIN, "State"),
+ dgettext(TEXT_DOMAIN, "Reloc"),
+ dgettext(TEXT_DOMAIN, "Hot Spare")) == EOF) {
+ goto out;
+ }
+ } else {
+ if (fprintf(fp,
+ "\t%-*s %5s %5s %-11s %-5s %-9s %s\n",
+ len,
+ dgettext(TEXT_DOMAIN, "Device"),
+ dgettext(TEXT_DOMAIN, "Start"),
+ dgettext(TEXT_DOMAIN, "Dbase"),
+ dgettext(TEXT_DOMAIN, "State"),
+ dgettext(TEXT_DOMAIN, "Reloc"),
+ dgettext(TEXT_DOMAIN, "Hot Spare"),
+ dgettext(TEXT_DOMAIN, "Time")) == EOF) {
+ goto out;
+ }
+ }
+
+
+ /* print components */
+ for (comp = 0; (comp < rp->comps.comps_len); ++comp) {
+ md_comp_t *cp = &rp->comps.comps_val[comp];
+ mdname_t *namep = cp->compnamep;
+ char *cname = namep->cname;
+ diskaddr_t start_blk;
+ int has_mddb;
+ char *has_mddb_str;
+ char *comp_state;
+ md_timeval32_t tv;
+ char *hsname = ((cp->hsnamep != NULL) ?
+ cp->hsnamep->cname : "");
+ char *devid = " ";
+ mdname_t *didnp = NULL;
+ uint_t tstate = 0;
+
+ /* get info */
+ if ((start_blk = metagetstart(sp, namep, ep)) ==
+ MD_DISKADDR_ERROR) {
+ return (-1);
+ }
+ if ((has_mddb = metahasmddb(sp, namep, ep)) < 0) {
+ return (-1);
+ }
+ if (has_mddb)
+ has_mddb_str = dgettext(TEXT_DOMAIN, "Yes");
+ else
+ has_mddb_str = dgettext(TEXT_DOMAIN, "No");
+
+ /*
+ * If the component is a metadevice, print out either
+ * unavailable or the state of the metadevice, if not
+ * a metadevice, print nothing if the state of the
+ * stripe is unavailable
+ */
+ if (metaismeta(namep)) {
+ if (meta_get_tstate(namep->dev, &tstate, ep) != 0)
+ return (-1);
+ comp_state = comp_state_to_name(cp, &tv, tstate &
+ MD_DEV_ERRORED);
+ } else {
+ /*
+ * if top_tstate is set, that implies that you have
+ * a ctd type device with an unavailable metadevice
+ * on top of it. If so, print a - for it's state
+ */
+ if (top_tstate != 0)
+ comp_state = "-";
+ else
+ comp_state = comp_state_to_name(cp, &tv,
+ tstate & MD_DEV_ERRORED);
+ }
+
+ /* populate the key in the name_p structure */
+ if ((didnp = metadevname(&sp, namep->dev, ep))
+ == NULL) {
+ return (-1);
+ }
+
+ /* determine if devid does NOT exist */
+ if (options & PRINT_DEVID) {
+ if ((dtp = meta_getdidbykey(sp->setno, getmyside(sp, ep),
+ didnp->key, ep)) == NULL)
+ devid = dgettext(TEXT_DOMAIN, "No ");
+ else {
+ devid = dgettext(TEXT_DOMAIN, "Yes");
+ free(dtp);
+ }
+ }
+ /* print info */
+ /*
+ * building a format string on the fly that will be used
+ * in fprintf. This is to allow really really long ctd names
+ */
+ if (! (options & PRINT_TIMES)) {
+ if (fprintf(fp,
+ "\t%-*s %8lld %-5.5s %12.12s %5.5s %s\n",
+ len, cname, start_blk,
+ has_mddb_str, comp_state, devid, hsname) == EOF) {
+ goto out;
+ }
+ } else {
+ char *timep = meta_print_time(&tv);
+
+ if (fprintf(fp,
+ "\t%-*s %5lld %-5s %-11s %-5s %-9s %s\n",
+ len, cname, start_blk,
+ has_mddb_str, comp_state, devid, hsname,
+ timep) == EOF) {
+ goto out;
+ }
+ }
+ }
+
+ /* success */
+ rval = 0;
+
+ /* cleanup, return error */
+out:
+ if (rval != 0)
+ (void) mdsyserror(ep, errno, fname);
+ return (rval);
+}
+
+/*
+ * print toplevel stripe row
+ */
+/*ARGSUSED4*/
+static int
+toplev_row_report(
+ mdsetname_t *sp,
+ md_row_t *rp,
+ char *fname,
+ FILE *fp,
+ mdprtopts_t options,
+ md_error_t *ep
+)
+{
+ uint_t comp;
+ int rval = -1;
+ char *devid = " ";
+ mdname_t *didnp = NULL;
+ int len = 0;
+
+ /*
+ * building a format string on the fly that will be used
+ * in fprintf. This is to allow really really long ctd names
+ */
+ for (comp = 0; (comp < rp->comps.comps_len); ++comp) {
+ len = max(len,
+ strlen(rp->comps.comps_val[comp].compnamep->cname));
+ }
+
+ len = max(len, strlen(dgettext(TEXT_DOMAIN, "Device")));
+ len += 2;
+ /* print header */
+ if (fprintf(fp,
+ "\t%-*.*s %-12.12s %-5.5s\t%s\n",
+ len, len,
+ dgettext(TEXT_DOMAIN, "Device"),
+ dgettext(TEXT_DOMAIN, "Start Block"),
+ dgettext(TEXT_DOMAIN, "Dbase"),
+ dgettext(TEXT_DOMAIN, "Reloc")) == EOF) {
+ goto out;
+ }
+
+ /* print components */
+ for (comp = 0; (comp < rp->comps.comps_len); ++comp) {
+ md_comp_t *cp = &rp->comps.comps_val[comp];
+ mdname_t *namep = cp->compnamep;
+ char *cname = namep->cname;
+ diskaddr_t start_blk;
+ int has_mddb;
+ char *has_mddb_str;
+ ddi_devid_t dtp;
+
+ /* get info */
+ if ((start_blk = metagetstart(sp, namep, ep)) ==
+ MD_DISKADDR_ERROR) {
+ return (-1);
+ }
+ if ((has_mddb = metahasmddb(sp, namep, ep)) < 0) {
+ return (-1);
+ }
+ if (has_mddb)
+ has_mddb_str = dgettext(TEXT_DOMAIN, "Yes");
+ else
+ has_mddb_str = dgettext(TEXT_DOMAIN, "No");
+
+ /* populate the key in the name_p structure */
+ if ((didnp = metadevname(&sp, namep->dev, ep))
+ == NULL) {
+ return (-1);
+ }
+
+ /* determine if devid does NOT exist */
+ if (options & PRINT_DEVID) {
+ if ((dtp = meta_getdidbykey(sp->setno, getmyside(sp, ep),
+ didnp->key, ep)) == NULL) {
+ devid = dgettext(TEXT_DOMAIN, "No ");
+ } else {
+ devid = dgettext(TEXT_DOMAIN, "Yes");
+ free(dtp);
+ }
+ }
+ /* print info */
+ /*
+ * building a format string on the fly that will be used
+ * in fprintf. This is to allow really really long ctd names
+ */
+ if (fprintf(fp,
+ "\t%-*s %8lld %-5.5s\t%s\n", len,
+ cname, start_blk, has_mddb_str, devid) == EOF) {
+ goto out;
+ }
+ }
+
+ /* success */
+ rval = 0;
+
+ /* cleanup, return error */
+out:
+ if (rval != 0)
+ (void) mdsyserror(ep, errno, fname);
+ return (rval);
+}
+
+/*
+ * print stripe options
+ */
+int
+meta_print_stripe_options(
+ mdhspname_t *hspnamep,
+ char *fname,
+ FILE *fp,
+ md_error_t *ep
+)
+{
+ char *hspname = ((hspnamep != NULL) ? hspnamep->hspname :
+ dgettext(TEXT_DOMAIN, "none"));
+ int rval = -1;
+
+ /* print options */
+ if (fprintf(fp, dgettext(TEXT_DOMAIN,
+ " Hot spare pool: %s\n"), hspname) == EOF) {
+ goto out;
+ }
+
+ /* success */
+ rval = 0;
+
+ /* cleanup, return error */
+out:
+ if (rval != 0)
+ (void) mdsyserror(ep, errno, fname);
+ return (rval);
+}
+
+/*
+ * report stripe
+ */
+static int
+stripe_report(
+ mdsetname_t *sp,
+ md_stripe_t *stripep,
+ mdnamelist_t **nlpp,
+ char *fname,
+ FILE *fp,
+ mdprtopts_t options,
+ md_error_t *ep
+)
+{
+ uint_t row;
+ int rval = -1;
+ uint_t tstate = 0;
+
+ /*
+ * if the -B option has been specified check to see if the
+ * metadevice is s "big" one and print if so, also if a
+ * big device we need to store the ctd involved for use in
+ * printing out the relocation information.
+ */
+ if (options & PRINT_LARGEDEVICES) {
+ if (stripep->common.revision != MD_64BIT_META_DEV) {
+ rval = 0;
+ goto out;
+ } else {
+ if (meta_getdevs(sp, stripep->common.namep,
+ nlpp, ep) != 0)
+ goto out;
+ }
+ }
+
+ /* print header */
+ if (options & PRINT_HEADER) {
+ if (fprintf(fp, "%s: Concat/Stripe\n",
+ stripep->common.namep->cname) == EOF) {
+ goto out;
+ }
+
+ }
+
+ /* print hotspare pool */
+ if (stripep->hspnamep != NULL) {
+ if (meta_print_stripe_options(stripep->hspnamep,
+ fname, fp, ep) != 0) {
+ return (-1);
+ }
+ }
+
+ if (metaismeta(stripep->common.namep)) {
+ if (meta_get_tstate(stripep->common.namep->dev, &tstate, ep)
+ != 0)
+ return (-1);
+ }
+ if ((tstate & MD_DEV_ERRORED) != 0) {
+ if (fprintf(fp, dgettext(TEXT_DOMAIN,
+ " State: Unavailable\n"
+ " Reconnect disk and invoke: metastat -i\n")) == EOF) {
+ goto out;
+ }
+ }
+
+ /* print size */
+ if (fprintf(fp, dgettext(TEXT_DOMAIN, " Size: %lld blocks (%s)\n"),
+ stripep->common.size,
+ meta_number_to_string(stripep->common.size, DEV_BSIZE))
+ == EOF) {
+ goto out;
+ }
+
+ /* print rows */
+ for (row = 0; (row < stripep->rows.rows_len); ++row) {
+ md_row_t *rp = &stripep->rows.rows_val[row];
+
+ /* print stripe and interlace */
+ if (rp->comps.comps_len > 1) {
+ if (fprintf(fp, dgettext(TEXT_DOMAIN,
+ " Stripe %u: (interlace: %lld blocks)\n"),
+ row, rp->interlace) == EOF) {
+ goto out;
+ }
+ } else {
+ if (fprintf(fp, dgettext(TEXT_DOMAIN,
+ " Stripe %u:\n"),
+ row) == EOF) {
+ goto out;
+ }
+ }
+
+ /* print components appropriately */
+ if (MD_HAS_PARENT(stripep->common.parent)) {
+ if (subdev_row_report(sp, rp, fname, fp, options,
+ tstate & MD_DEV_ERRORED, ep) != 0) {
+ return (-1);
+ }
+ } else {
+ if (toplev_row_report(sp, rp, fname, fp, options,
+ ep) != 0) {
+ return (-1);
+ }
+ }
+ }
+
+ /* add extra line */
+ if (fprintf(fp, "\n") == EOF)
+ goto out;
+
+ /* success */
+ rval = 0;
+
+ /* cleanup, return error */
+out:
+ if (rval != 0)
+ (void) mdsyserror(ep, errno, fname);
+ return (rval);
+}
+
+/*
+ * print/report stripe
+ */
+int
+meta_stripe_print(
+ mdsetname_t *sp,
+ mdname_t *stripenp,
+ mdnamelist_t **nlpp,
+ char *fname,
+ FILE *fp,
+ mdprtopts_t options,
+ md_error_t *ep
+)
+{
+ md_stripe_t *stripep;
+ int row, comp;
+
+ /* should have same set */
+ assert(sp != NULL);
+ assert((stripenp == NULL) ||
+ (sp->setno == MD_MIN2SET(meta_getminor(stripenp->dev))));
+
+ /* print all stripes */
+ if (stripenp == NULL) {
+ mdnamelist_t *nlp = NULL;
+ mdnamelist_t *p;
+ int cnt;
+ int rval = 0;
+
+ /* get list */
+ if ((cnt = meta_get_stripe_names(sp, &nlp, options, ep)) < 0)
+ return (-1);
+ else if (cnt == 0)
+ return (0);
+
+ /* recurse */
+ for (p = nlp; (p != NULL); p = p->next) {
+ mdname_t *np = p->namep;
+
+ if (meta_stripe_print(sp, np, nlpp, fname, fp,
+ options, ep) != 0)
+ rval = -1;
+ }
+
+ /* cleanup, return success */
+ metafreenamelist(nlp);
+ return (rval);
+ }
+
+ /* get unit structure */
+ if ((stripep = meta_get_stripe_common(sp, stripenp,
+ ((options & PRINT_FAST) ? 1 : 0), ep)) == NULL)
+ return (-1);
+
+ /* check for parented */
+ if ((! (options & PRINT_SUBDEVS)) &&
+ (MD_HAS_PARENT(stripep->common.parent))) {
+ return (0);
+ }
+
+ /* print appropriate detail */
+ if (options & PRINT_SHORT) {
+ if (stripe_print(stripep, fname, fp, options, ep) != 0)
+ return (-1);
+ } else {
+ if (stripe_report(sp, stripep, nlpp, fname, fp, options,
+ ep) != 0)
+ return (-1);
+ }
+
+ /* Recurse on components that are metadevices */
+ for (row = 0; (row < stripep->rows.rows_len); ++row) {
+ md_row_t *rp = &stripep->rows.rows_val[row];
+
+ /* look for components that are metadevices */
+ for (comp = 0; (comp < rp->comps.comps_len); ++comp) {
+ md_comp_t *cp = &rp->comps.comps_val[comp];
+ mdname_t *namep = cp->compnamep;
+
+ if ((metaismeta(namep)) &&
+ (meta_print_name(sp, namep, nlpp, fname, fp,
+ (options | PRINT_HEADER | PRINT_SUBDEVS),
+ NULL, ep) != 0)) {
+ return (-1);
+ }
+ }
+ }
+ return (0);
+}
+
+/*
+ * find stripe component to replace
+ */
+int
+meta_find_erred_comp(
+ mdsetname_t *sp,
+ mdname_t *stripenp,
+ mdname_t **compnpp,
+ comp_state_t *compstate,
+ md_error_t *ep
+)
+{
+ md_stripe_t *stripep;
+ md_comp_t *compp = NULL;
+ uint_t lasterrcnt = 0;
+ uint_t row;
+
+ /* get stripe */
+ *compnpp = NULL;
+ if ((stripep = meta_get_stripe_common(sp, stripenp, 1, ep)) == NULL)
+ return (-1);
+
+ /*
+ * Try to find the first erred component.
+ * If there is not one, then look for the
+ * first last_erred component.
+ */
+ for (row = 0; (row < stripep->rows.rows_len); ++row) {
+ md_row_t *rp = &stripep->rows.rows_val[row];
+ uint_t comp;
+
+ for (comp = 0; (comp < rp->comps.comps_len); ++comp) {
+ md_comp_t *cp = &rp->comps.comps_val[comp];
+
+ if ((cp->state == CS_ERRED) && ((compp == NULL) ||
+ (cp->lasterrcnt < lasterrcnt))) {
+ compp = cp;
+ lasterrcnt = cp->lasterrcnt;
+ }
+ }
+ }
+ for (row = 0; (row < stripep->rows.rows_len); ++row) {
+ md_row_t *rp = &stripep->rows.rows_val[row];
+ uint_t comp;
+
+ for (comp = 0; (comp < rp->comps.comps_len); ++comp) {
+ md_comp_t *cp = &rp->comps.comps_val[comp];
+
+ if ((cp->state == CS_LAST_ERRED) && ((compp == NULL) ||
+ (cp->lasterrcnt < lasterrcnt))) {
+ compp = cp;
+ lasterrcnt = cp->lasterrcnt;
+ }
+ }
+ }
+
+ /* return component */
+ if (compp != NULL) {
+ *compnpp = compp->compnamep;
+ *compstate = compp->state;
+ }
+
+ /* return success */
+ return (0);
+}
+
+/*
+ * invalidate component names
+ */
+static int
+invalidate_components(
+ mdsetname_t *sp,
+ mdname_t *stripenp,
+ md_error_t *ep
+)
+{
+ md_stripe_t *stripep;
+ uint_t row;
+
+ if ((stripep = meta_get_stripe(sp, stripenp, ep)) == NULL)
+ return (-1);
+ for (row = 0; (row < stripep->rows.rows_len); ++row) {
+ md_row_t *rp = &stripep->rows.rows_val[row];
+ uint_t comp;
+
+ for (comp = 0; (comp < rp->comps.comps_len); ++comp) {
+ md_comp_t *cp = &rp->comps.comps_val[comp];
+ mdname_t *compnp = cp->compnamep;
+
+ meta_invalidate_name(compnp);
+ }
+ }
+ return (0);
+}
+
+/*
+ * attach components to stripe
+ */
+int
+meta_stripe_attach(
+ mdsetname_t *sp,
+ mdname_t *stripenp,
+ mdnamelist_t *nlp,
+ diskaddr_t interlace,
+ mdcmdopts_t options,
+ md_error_t *ep
+)
+{
+ mdnamelist_t *lp;
+ ms_unit_t *old_un, *new_un;
+ struct ms_row *mdr, *new_mdr;
+ uint_t newcomps, ncomps, icomp;
+ uint_t row;
+ size_t mdsize, first_comp;
+ diskaddr_t new_blks;
+ diskaddr_t limit;
+ diskaddr_t disk_size = 0;
+ ms_comp_t *mdcomp, *new_comp;
+ uint_t write_reinstruct = 0;
+ uint_t read_reinstruct = 0;
+ mdnamelist_t *keynlp = NULL;
+ uint_t round_cyl = 1;
+ minor_t parent;
+ md_grow_params_t mgp;
+ int rval = -1;
+ md_timeval32_t creation_time;
+ int create_flag = MD_CRO_32BIT;
+
+ /* should have a set */
+ assert(sp != NULL);
+ assert(sp->setno == MD_MIN2SET(meta_getminor(stripenp->dev)));
+
+ /* check type */
+ if (metachkmeta(stripenp, ep) != 0)
+ return (-1);
+
+ /* check and count components */
+ assert(nlp != NULL);
+ newcomps = 0;
+ for (lp = nlp; (lp != NULL); lp = lp->next) {
+ mdname_t *np = lp->namep;
+ mdnamelist_t *p;
+
+ /* check against existing devices */
+ if (meta_check_component(sp, np, 0, ep) != 0)
+ return (-1);
+
+ /* check against ourselves */
+ for (p = lp->next; (p != NULL); p = p->next) {
+ if (meta_check_overlap(np->cname, np, 0, -1,
+ p->namep, 0, -1, ep) != 0) {
+ return (-1);
+ }
+ }
+
+ /* count */
+ ++newcomps;
+ }
+
+ /* get old unit */
+ if ((old_un = (ms_unit_t *)meta_get_mdunit(sp, stripenp, ep)) == NULL)
+ return (-1);
+
+ /* if zero, inherit the last rows interlace value */
+ if (interlace == 0) {
+ mdr = &old_un->un_row[old_un->un_nrows - 1];
+ interlace = mdr->un_interlace;
+ }
+
+ /*
+ * calculate size of new unit structure
+ */
+
+ /* unit + rows */
+ mdsize = sizeof (ms_unit_t) - sizeof (struct ms_row);
+ mdsize += sizeof (struct ms_row) * (old_un->un_nrows + 1);
+
+ /* number of new components being added */
+ ncomps = newcomps;
+
+ /* count the # of components in the old unit */
+ mdr = &old_un->un_row[0];
+ for (row = 0; (row < old_un->un_nrows); row++)
+ ncomps += mdr[row].un_ncomp;
+ first_comp = roundup(mdsize, sizeof (long long));
+ mdsize += sizeof (ms_comp_t) * ncomps + (first_comp - mdsize);
+
+ /* allocate new unit */
+ new_un = Zalloc(mdsize);
+ new_un->un_ocomp = first_comp;
+
+ /* compute new data */
+ new_mdr = &new_un->un_row[old_un->un_nrows];
+ new_mdr->un_icomp = ncomps - newcomps;
+ new_mdr->un_ncomp = newcomps;
+ new_mdr->un_blocks = 0;
+ new_mdr->un_cum_blocks =
+ old_un->un_row[old_un->un_nrows - 1].un_cum_blocks;
+ new_mdr->un_interlace = interlace;
+
+ /* for each new device */
+ mdcomp = (struct ms_comp *)(void *)&((char *)new_un)[new_un->un_ocomp];
+ icomp = new_mdr->un_icomp;
+ if (meta_gettimeofday(&creation_time) == -1)
+ return (mdsyserror(ep, errno, NULL));
+ for (lp = nlp; (lp != NULL); lp = lp->next) {
+ mdname_t *np = lp->namep;
+ diskaddr_t size, start_blk;
+ mdgeom_t *geomp;
+
+ /* figure out how big */
+ if ((size = metagetsize(np, ep)) == MD_DISKADDR_ERROR)
+ goto out;
+ if ((start_blk = metagetstart(sp, np, ep)) ==
+ MD_DISKADDR_ERROR)
+ goto out;
+ if (start_blk >= size) {
+ (void) mdsyserror(ep, ENOSPC, np->cname);
+ goto out;
+ }
+ size -= start_blk;
+ if (newcomps > 1)
+ size = rounddown(size, interlace);
+
+ /* adjust for smallest disk */
+ if (disk_size == 0) {
+ disk_size = size;
+ } else if (size < disk_size) {
+ disk_size = size;
+ }
+
+ /* get worst reinstructs */
+ if ((geomp = metagetgeom(np, ep)) == NULL)
+ goto out;
+ if (geomp->write_reinstruct > write_reinstruct)
+ write_reinstruct = geomp->write_reinstruct;
+ if (geomp->read_reinstruct > read_reinstruct)
+ read_reinstruct = geomp->read_reinstruct;
+
+ /* In dryrun mode (DOIT not set) we must not alter the mddb */
+ if (options & MDCMD_DOIT) {
+ /* store name in namespace */
+ if (add_key_name(sp, np, &keynlp, ep) != 0)
+ goto out;
+ }
+
+ /* build new component */
+ new_comp = &mdcomp[icomp++];
+ new_comp->un_key = np->key;
+ new_comp->un_dev = np->dev;
+ new_comp->un_start_block = start_blk;
+ new_comp->un_mirror.ms_state = CS_OKAY;
+ new_comp->un_mirror.ms_timestamp = creation_time;
+ }
+
+ limit = LLONG_MAX;
+
+ /* compute new size */
+ new_mdr->un_blocks = new_mdr->un_ncomp * disk_size;
+ new_blks = new_mdr->un_cum_blocks + new_mdr->un_blocks;
+ if (new_blks > limit) {
+ new_mdr->un_cum_blocks = limit;
+ new_blks = limit;
+ md_eprintf(dgettext(TEXT_DOMAIN,
+ "unit size overflow, limit is %lld blocks\n"),
+ limit);
+ } else {
+ new_mdr->un_cum_blocks += new_mdr->un_blocks;
+ }
+ new_un->c.un_actual_tb = new_mdr->un_cum_blocks;
+ new_un->un_nrows = old_un->un_nrows + 1;
+
+ /* adjust geometry */
+ new_un->c.un_nhead = old_un->c.un_nhead;
+ new_un->c.un_nsect = old_un->c.un_nsect;
+ new_un->c.un_rpm = old_un->c.un_rpm;
+ new_un->c.un_wr_reinstruct = old_un->c.un_wr_reinstruct;
+ new_un->c.un_rd_reinstruct = old_un->c.un_rd_reinstruct;
+ if (meta_adjust_geom((md_unit_t *)new_un, stripenp,
+ write_reinstruct, read_reinstruct, round_cyl, ep) != 0)
+ goto out;
+
+ /* if in dryrun mode, we are done here. */
+ if ((options & MDCMD_DOIT) == 0) {
+ if (options & MDCMD_PRINT) {
+ if (newcomps == 1) {
+ (void) printf(dgettext(TEXT_DOMAIN,
+ "%s: attaching component would suceed\n"),
+ stripenp->cname);
+ } else {
+ (void) printf(dgettext(TEXT_DOMAIN,
+ "%s: attaching components would suceed\n"),
+ stripenp->cname);
+ }
+ }
+ rval = 0; /* success */
+ goto out;
+ }
+
+ create_flag = meta_check_devicesize(new_un->c.un_total_blocks);
+
+ /* grow stripe */
+ (void) memset(&mgp, 0, sizeof (mgp));
+ mgp.mnum = MD_SID(old_un);
+ MD_SETDRIVERNAME(&mgp, MD_STRIPE, sp->setno);
+ mgp.size = mdsize;
+ mgp.mdp = (uintptr_t)new_un;
+ mgp.nrows = old_un->un_nrows;
+ if (create_flag == MD_CRO_32BIT) {
+ mgp.options = MD_CRO_32BIT;
+ new_un->c.un_revision = MD_32BIT_META_DEV;
+ } else {
+ mgp.options = MD_CRO_64BIT;
+ new_un->c.un_revision = MD_64BIT_META_DEV;
+ }
+
+ if ((MD_HAS_PARENT(old_un->c.un_parent)) &&
+ (old_un->c.un_parent != MD_MULTI_PARENT)) {
+ mgp.npar = 1;
+ parent = old_un->c.un_parent;
+ mgp.par = (uintptr_t)(&parent);
+ }
+
+ if (metaioctl(MD_IOCGROW, &mgp, &mgp.mde, NULL) != 0) {
+ (void) mdstealerror(ep, &mgp.mde);
+ goto out;
+ }
+
+ /* clear cache */
+ if (invalidate_components(sp, stripenp, ep) != 0)
+ goto out;
+ meta_invalidate_name(stripenp);
+
+ /* let em know */
+ if (options & MDCMD_PRINT) {
+ if (newcomps == 1) {
+ (void) printf(dgettext(TEXT_DOMAIN,
+ "%s: component is attached\n"), stripenp->cname);
+ } else {
+ (void) printf(dgettext(TEXT_DOMAIN,
+ "%s: components are attached\n"), stripenp->cname);
+ }
+ (void) fflush(stdout);
+ }
+
+ /* grow any parents */
+ if (meta_concat_parent(sp, stripenp, ep) != 0)
+ return (-1);
+
+ rval = 0; /* success */
+
+ /* cleanup, return error */
+out:
+ Free(old_un);
+ Free(new_un);
+ if (options & MDCMD_DOIT) {
+ if (rval != 0)
+ (void) del_key_names(sp, keynlp, NULL);
+ metafreenamelist(keynlp);
+ }
+ return (rval);
+}
+
+/*
+ * get stripe parameters
+ */
+int
+meta_stripe_get_params(
+ mdsetname_t *sp,
+ mdname_t *stripenp,
+ ms_params_t *paramsp,
+ md_error_t *ep
+)
+{
+ md_stripe_t *stripep;
+
+ /* should have a set */
+ assert(sp != NULL);
+ assert(sp->setno == MD_MIN2SET(meta_getminor(stripenp->dev)));
+
+ /* check name */
+ if (metachkmeta(stripenp, ep) != 0)
+ return (-1);
+
+ /* get unit */
+ if ((stripep = meta_get_stripe(sp, stripenp, ep)) == NULL)
+ return (-1);
+
+ /* return parameters */
+ (void) memset(paramsp, 0, sizeof (*paramsp));
+ if (stripep->hspnamep == NULL)
+ paramsp->hsp_id = MD_HSP_NONE;
+ else
+ paramsp->hsp_id = stripep->hspnamep->hsp;
+ return (0);
+}
+
+/*
+ * set stripe parameters
+ */
+int
+meta_stripe_set_params(
+ mdsetname_t *sp,
+ mdname_t *stripenp,
+ ms_params_t *paramsp,
+ md_error_t *ep
+)
+{
+ md_stripe_params_t msp;
+
+ /* should have a set */
+ assert(sp != NULL);
+ assert(sp->setno == MD_MIN2SET(meta_getminor(stripenp->dev)));
+
+ /* check name */
+ if (metachkmeta(stripenp, ep) != 0)
+ return (-1);
+
+ /* set parameters */
+ (void) memset(&msp, 0, sizeof (msp));
+ MD_SETDRIVERNAME(&msp, MD_STRIPE, sp->setno);
+ msp.mnum = meta_getminor(stripenp->dev);
+ msp.params = *paramsp;
+ if (metaioctl(MD_IOCCHANGE, &msp, &msp.mde, stripenp->cname) != 0)
+ return (mdstealerror(ep, &msp.mde));
+
+ /* clear cache */
+ meta_invalidate_name(stripenp);
+
+ /* return success */
+ return (0);
+}
+
+/*
+ * check for dups in the stripe itself
+ */
+static int
+check_twice(
+ md_stripe_t *stripep,
+ uint_t row,
+ uint_t comp,
+ md_error_t *ep
+)
+{
+ mdname_t *stripenp = stripep->common.namep;
+ mdname_t *thisnp;
+ uint_t r;
+
+ thisnp = stripep->rows.rows_val[row].comps.comps_val[comp].compnamep;
+ for (r = 0; (r <= row); ++r) {
+ md_row_t *rp = &stripep->rows.rows_val[r];
+ uint_t e = ((r == row) ? comp : rp->comps.comps_len);
+ uint_t c;
+
+ for (c = 0; (c < e); ++c) {
+ md_comp_t *cp = &rp->comps.comps_val[c];
+ mdname_t *compnp = cp->compnamep;
+
+ if (meta_check_overlap(stripenp->cname, thisnp, 0, -1,
+ compnp, 0, -1, ep) != 0) {
+ return (-1);
+ }
+ }
+ }
+ return (0);
+}
+
+/*
+ * default stripe interlace
+ */
+diskaddr_t
+meta_default_stripe_interlace(void)
+{
+ diskaddr_t interlace;
+
+ /* default to 16k, round up if necessary */
+ interlace = btodb(16 * 1024);
+ if (interlace < btodb(MININTERLACE))
+ interlace = roundup(MININTERLACE, interlace);
+ return (interlace);
+}
+
+/*
+ * convert interlaces
+ */
+int
+meta_stripe_check_interlace(
+ diskaddr_t interlace,
+ char *uname,
+ md_error_t *ep
+)
+{
+ if ((interlace < btodb(MININTERLACE)) ||
+ (interlace > btodb(MAXINTERLACE))) {
+ return (mderror(ep, MDE_BAD_INTERLACE, uname));
+ }
+ return (0);
+}
+
+
+/*
+ * check stripe
+ */
+int
+meta_check_stripe(
+ mdsetname_t *sp,
+ md_stripe_t *stripep,
+ mdcmdopts_t options,
+ md_error_t *ep
+)
+{
+ mdname_t *stripenp = stripep->common.namep;
+ int force = ((options & MDCMD_FORCE) ? 1 : 0);
+ int doit = ((options & MDCMD_DOIT) ? 1 : 0);
+ int updateit = ((options & MDCMD_UPDATE) ? 1 : 0);
+ uint_t row;
+
+ /* check rows */
+ if (stripep->rows.rows_len < 1) {
+ return (mdmderror(ep, MDE_BAD_STRIPE,
+ meta_getminor(stripenp->dev), stripenp->cname));
+ }
+ for (row = 0; (row < stripep->rows.rows_len); ++row) {
+ md_row_t *rp = &stripep->rows.rows_val[row];
+ uint_t comp;
+
+ /* check number */
+ if (rp->comps.comps_len < 1) {
+ return (mdmderror(ep, MDE_BAD_STRIPE,
+ meta_getminor(stripenp->dev), stripenp->cname));
+ }
+
+ /* compute default interlace */
+ if (rp->interlace == 0) {
+ rp->interlace = meta_default_stripe_interlace();
+ }
+
+ /* check interlace */
+ if (meta_stripe_check_interlace(rp->interlace, stripenp->cname,
+ ep) != 0) {
+ return (-1);
+ }
+
+ /* check components */
+ for (comp = 0; (comp < rp->comps.comps_len); ++comp) {
+ md_comp_t *cp = &rp->comps.comps_val[comp];
+ mdname_t *compnp = cp->compnamep;
+ diskaddr_t start_blk, size;
+
+ /* check component */
+ if (!updateit) {
+ if (meta_check_component(sp, compnp,
+ force, ep) != 0)
+ return (-1);
+ if (((start_blk = metagetstart(sp, compnp,
+ ep)) == MD_DISKADDR_ERROR) ||
+ ((size = metagetsize(compnp, ep)) ==
+ MD_DISKADDR_ERROR)) {
+ return (-1);
+ }
+ if (start_blk >= size)
+ return (mdsyserror(ep, ENOSPC,
+ compnp->cname));
+ size -= start_blk;
+ size = rounddown(size, rp->interlace);
+ if (size == 0)
+ return (mdsyserror(ep, ENOSPC,
+ compnp->cname));
+ }
+
+ /* check this stripe too */
+ if (check_twice(stripep, row, comp, ep) != 0)
+ return (-1);
+ }
+ }
+
+ /* check hotspare pool name */
+ if (doit) {
+ if ((stripep->hspnamep != NULL) &&
+ (metachkhsp(sp, stripep->hspnamep, ep) != 0)) {
+ return (-1);
+ }
+ }
+
+ /* return success */
+ return (0);
+}
+
+/*
+ * setup stripe geometry
+ */
+static int
+stripe_geom(
+ md_stripe_t *stripep,
+ ms_unit_t *ms,
+ md_error_t *ep
+)
+{
+ uint_t nrow = stripep->rows.rows_len;
+ uint_t write_reinstruct = 0;
+ uint_t read_reinstruct = 0;
+ uint_t round_cyl = 1;
+ uint_t row;
+ mdgeom_t *geomp;
+ diskaddr_t first_row_size = 0;
+ char *miscname;
+ int is_sp = 0;
+
+ /* get worst reinstructs */
+ for (row = 0; (row < nrow); ++row) {
+ md_row_t *rp = &stripep->rows.rows_val[row];
+ uint_t ncomp = rp->comps.comps_len;
+ uint_t comp;
+
+ for (comp = 0; (comp < ncomp); ++comp) {
+ md_comp_t *cp = &rp->comps.comps_val[comp];
+ mdname_t *compnp = cp->compnamep;
+
+ if ((geomp = metagetgeom(compnp, ep)) == NULL)
+ return (-1);
+ if (geomp->write_reinstruct > write_reinstruct)
+ write_reinstruct = geomp->write_reinstruct;
+ if (geomp->read_reinstruct > read_reinstruct)
+ read_reinstruct = geomp->read_reinstruct;
+ }
+ }
+
+ if ((geomp = metagetgeom(
+ stripep->rows.rows_val[0].comps.comps_val[0].compnamep,
+ ep)) == NULL) {
+ return (-1);
+ }
+ /*
+ * Figure out if the first component is a softpartition as the
+ * truncation check only occurs on them.
+ */
+ if ((miscname = metagetmiscname(
+ stripep->rows.rows_val[0].comps.comps_val[0].compnamep,
+ ep)) == NULL) {
+ if (!mdisdeverror(ep, MDE_NOT_META))
+ return (-1);
+ } else if (strcmp(miscname, MD_SP) == 0) {
+ is_sp = 1;
+ }
+
+
+ /* setup geometry from first device */
+ if (meta_setup_geom((md_unit_t *)ms, stripep->common.namep, geomp,
+ write_reinstruct, read_reinstruct, round_cyl, ep) != 0)
+ return (-1);
+
+ /*
+ * Here we want to make sure that any truncation did not
+ * result in lost data (or, more appropriately, inaccessible
+ * data).
+ *
+ * This is mainly a danger for (1, 1) concats, but it is
+ * mathematically possible for other somewhat contrived
+ * arrangements where in the sum of the lengths of each row
+ * beyond the first is smaller than the cylinder size of the
+ * only component in the first row.
+ *
+ * It is tempting to simply test for truncation here, by
+ * (md->c.un_total_blocks < md->c.un_actual_tb). That does
+ * not tell us, however, if rounding resulted in data loss,
+ * rather only that it occurred. The somewhat less obvious
+ * test below covers both the obvious (1, 1) case and the
+ * aforementioned corner case.
+ */
+ first_row_size = ms->un_row[0].un_blocks;
+ if (is_sp == 1) {
+ md_unit_t *md = (md_unit_t *)ms;
+
+ if (md->c.un_total_blocks < first_row_size) {
+ char buf[] = VAL2STR(ULLONG_MAX);
+
+ /*
+ * The only difference here is the text of the error
+ * message, since the remediation is slightly
+ * different in the one-component versus
+ * multiple-component cases.
+ */
+ if (nrow == 1) {
+ (void) mderror(ep, MDE_STRIPE_TRUNC_SINGLE,
+ stripep->common.namep->cname);
+ } else {
+ (void) mderror(ep, MDE_STRIPE_TRUNC_MULTIPLE,
+ stripep->common.namep->cname);
+ }
+
+ /*
+ * By the size comparison above and the initialization
+ * of buf[] in terms of ULLONG_MAX, we guarantee that
+ * the value arg is non-negative and that we won't
+ * overflow the container.
+ */
+ mderrorextra(ep, ulltostr((md->c.un_total_blocks +
+ (geomp->nhead * geomp->nsect))
+ - first_row_size, &buf[sizeof (buf) - 1]));
+
+ return (-1);
+ }
+ }
+
+ /* return success */
+ return (0);
+}
+
+/*
+ * create stripe
+ */
+int
+meta_create_stripe(
+ mdsetname_t *sp,
+ md_stripe_t *stripep,
+ mdcmdopts_t options,
+ md_error_t *ep
+)
+{
+ mdname_t *stripenp = stripep->common.namep;
+ int force = ((options & MDCMD_FORCE) ? 1 : 0);
+ int doall = ((options & MDCMD_ALLOPTION) ? 1 : 0);
+ uint_t nrow = stripep->rows.rows_len;
+ uint_t ncomp = 0;
+ uint_t icomp = 0;
+ diskaddr_t cum_blocks = 0;
+ diskaddr_t limit;
+ size_t mdsize, first_comp;
+ uint_t row;
+ ms_unit_t *ms;
+ ms_comp_t *mdcomp;
+ mdnamelist_t *keynlp = NULL;
+ md_set_params_t set_params;
+ int rval = -1;
+ md_timeval32_t creation_time;
+ int create_flag = MD_CRO_32BIT;
+
+ /* validate stripe */
+ if (meta_check_stripe(sp, stripep, options, ep) != 0)
+ return (-1);
+
+ /* allocate stripe unit */
+ mdsize = sizeof (*ms) - sizeof (ms->un_row[0]);
+ mdsize += sizeof (ms->un_row) * nrow;
+ for (row = 0; (row < nrow); ++row) {
+ md_row_t *rp = &stripep->rows.rows_val[row];
+
+ ncomp += rp->comps.comps_len;
+ }
+ first_comp = roundup(mdsize, sizeof (long long));
+ mdsize += (first_comp - mdsize) + (ncomp * sizeof (ms_comp_t));
+ ms = Zalloc(mdsize);
+ ms->un_ocomp = first_comp;
+ if (meta_gettimeofday(&creation_time) == -1)
+ return (mdsyserror(ep, errno, NULL));
+
+ /* do rows */
+ mdcomp = (ms_comp_t *)(void *)&((char *)ms)[ms->un_ocomp];
+ for (row = 0; (row < nrow); ++row) {
+ md_row_t *rp = &stripep->rows.rows_val[row];
+ uint_t ncomp = rp->comps.comps_len;
+ struct ms_row *mdr = &ms->un_row[row];
+ diskaddr_t disk_size = 0;
+ uint_t comp;
+
+ /* setup component count and offfset */
+ mdr->un_icomp = icomp;
+ mdr->un_ncomp = ncomp;
+
+ /* do components */
+ for (comp = 0; (comp < ncomp); ++comp) {
+ md_comp_t *cp = &rp->comps.comps_val[comp];
+ mdname_t *compnp = cp->compnamep;
+ ms_comp_t *mdc = &mdcomp[icomp++];
+ diskaddr_t size, start_blk;
+
+ /*
+ * get start and size
+ * if first component is labelled, include label
+ */
+ if ((size = metagetsize(compnp, ep)) ==
+ MD_DISKADDR_ERROR)
+ goto out;
+ if ((start_blk = metagetstart(sp, compnp, ep)) ==
+ MD_DISKADDR_ERROR)
+ goto out;
+ if ((row == 0) && (comp == 0)) {
+ diskaddr_t label;
+ int has_db;
+
+ if ((has_db = metahasmddb(sp, compnp, ep)) < 0)
+ goto out;
+ if ((label = metagetlabel(compnp, ep)) ==
+ MD_DISKADDR_ERROR)
+ goto out;
+ if ((has_db == 0) && (label != 0)) {
+ ms->c.un_flag |= MD_LABELED;
+ start_blk = compnp->start_blk = 0;
+ }
+ }
+ /* make sure we still have something left */
+ if (start_blk >= size) {
+ (void) mdsyserror(ep, ENOSPC, compnp->cname);
+ goto out;
+ }
+ size -= start_blk;
+
+ /*
+ * round down by interlace: this only applies
+ * if this row is a stripe, as indicated by
+ * (ncomp > 1)
+ */
+ if (ncomp > 1)
+ size = rounddown(size, rp->interlace);
+
+ if (size == 0) {
+ (void) mdsyserror(ep, ENOSPC, compnp->cname);
+ goto out;
+ }
+
+ /*
+ * adjust for smallest disk: for a concat (any
+ * row with only one component), this will
+ * never hit the second conditional.
+ */
+ if (disk_size == 0) {
+ disk_size = size;
+ } else if (size < disk_size) {
+ disk_size = size;
+ }
+
+ if (options & MDCMD_DOIT) {
+ /* store name in namespace */
+ if (add_key_name(sp, compnp, &keynlp, ep) != 0)
+ goto out;
+ }
+
+ /* setup component */
+ mdc->un_key = compnp->key;
+ mdc->un_dev = compnp->dev;
+ mdc->un_start_block = start_blk;
+ mdc->un_mirror.ms_state = CS_OKAY;
+ mdc->un_mirror.ms_timestamp = creation_time;
+ }
+ limit = LLONG_MAX;
+
+ /* setup row */
+ mdr->un_blocks = mdr->un_ncomp * disk_size;
+ cum_blocks += mdr->un_blocks;
+ if (cum_blocks > limit) {
+ cum_blocks = limit;
+ md_eprintf(dgettext(TEXT_DOMAIN,
+ "unit size overflow, limit is %lld blocks\n"),
+ limit);
+ }
+ mdr->un_cum_blocks = cum_blocks;
+ mdr->un_interlace = rp->interlace;
+ }
+
+ /* setup unit */
+ ms->c.un_type = MD_DEVICE;
+ MD_SID(ms) = meta_getminor(stripenp->dev);
+ ms->c.un_actual_tb = cum_blocks;
+ ms->c.un_size = mdsize;
+ if (stripep->hspnamep != NULL)
+ ms->un_hsp_id = stripep->hspnamep->hsp;
+ else
+ ms->un_hsp_id = MD_HSP_NONE;
+ ms->un_nrows = nrow;
+
+ /* fill in the size of the stripe */
+ if (options & MDCMD_UPDATE) {
+ stripep->common.size = ms->c.un_total_blocks;
+ for (row = 0; (row < nrow); ++row) {
+ stripep->rows.rows_val[row].row_size =
+ ms->un_row[row].un_blocks;
+ }
+ }
+
+ if (stripe_geom(stripep, ms, ep) != 0) {
+ /*
+ * If the device is being truncated then only allow this
+ * if the user is aware (using the -f option) or they
+ * are in a recovery/complete build situation (using the -a
+ * option).
+ */
+ if ((mdiserror(ep, MDE_STRIPE_TRUNC_SINGLE) ||
+ mdiserror(ep, MDE_STRIPE_TRUNC_MULTIPLE)) &&
+ (force || doall)) {
+ md_eprintf(dgettext(TEXT_DOMAIN,
+"%s: WARNING: This form of metainit is not recommended.\n"
+"The stripe is truncating the size of the underlying device.\n"
+"Please see ERRORS in metainit(1M) for additional information.\n"),
+ stripenp->cname);
+ mdclrerror(ep);
+ } else {
+ goto out;
+ }
+ }
+
+ create_flag = meta_check_devicesize(ms->c.un_total_blocks);
+
+ /* if we're not doing anything, return success */
+ if (! (options & MDCMD_DOIT)) {
+ rval = 0; /* success */
+ goto out;
+ }
+
+ /* create stripe */
+ (void) memset(&set_params, 0, sizeof (set_params));
+
+ /* did the user tell us to generate a large device? */
+ if (create_flag == MD_CRO_64BIT) {
+ ms->c.un_revision = MD_64BIT_META_DEV;
+ set_params.options = MD_CRO_64BIT;
+ } else {
+ ms->c.un_revision = MD_32BIT_META_DEV;
+ set_params.options = MD_CRO_32BIT;
+ }
+
+ set_params.mnum = MD_SID(ms);
+ set_params.size = ms->c.un_size;
+ set_params.mdp = (uintptr_t)ms;
+ MD_SETDRIVERNAME(&set_params, MD_STRIPE, MD_MIN2SET(set_params.mnum));
+ if (metaioctl(MD_IOCSET, &set_params, &set_params.mde,
+ stripenp->cname) != 0) {
+ (void) mdstealerror(ep, &set_params.mde);
+ goto out;
+ }
+ rval = 0; /* success */
+
+ /* cleanup, return success */
+out:
+ Free(ms);
+ if (rval != 0) {
+ (void) del_key_names(sp, keynlp, NULL);
+ }
+
+ metafreenamelist(keynlp);
+ if ((rval == 0) && (options & MDCMD_DOIT)) {
+ if (invalidate_components(sp, stripenp, ep) != 0)
+ rval = -1;
+ meta_invalidate_name(stripenp);
+ }
+ return (rval);
+}
+
+/*
+ * initialize stripe
+ * NOTE: this functions is metainit(1m)'s command line parser!
+ */
+int
+meta_init_stripe(
+ mdsetname_t **spp,
+ int argc,
+ char *argv[],
+ mdcmdopts_t options,
+ md_error_t *ep
+)
+{
+ char *uname = argv[0];
+ mdname_t *stripenp = NULL;
+ int old_optind;
+ int c;
+ md_stripe_t *stripep = NULL;
+ uint_t nrow, row;
+ int rval = -1;
+
+ /* get stripe name */
+ assert(argc > 0);
+ if (argc < 1)
+ goto syntax;
+
+ if ((stripenp = metaname(spp, uname, ep)) == NULL)
+ goto out;
+ assert(*spp != NULL);
+ uname = stripenp->cname;
+ if (metachkmeta(stripenp, ep) != 0)
+ goto out;
+
+ if (!(options & MDCMD_NOLOCK)) {
+ /* grab set lock */
+ if (meta_lock(*spp, TRUE, ep))
+ goto out;
+
+ if (meta_check_ownership(*spp, ep) != 0)
+ goto out;
+ }
+
+ /* see if it exists already */
+ if (metagetmiscname(stripenp, ep) != NULL) {
+ (void) mdmderror(ep, MDE_UNIT_ALREADY_SETUP,
+ meta_getminor(stripenp->dev), uname);
+ goto out;
+ } else if (! mdismderror(ep, MDE_UNIT_NOT_SETUP)) {
+ goto out;
+ } else {
+ mdclrerror(ep);
+ }
+ --argc, ++argv;
+
+ /* parse general options */
+ optind = 0;
+ opterr = 0;
+ if (getopt(argc, argv, "") != -1)
+ goto options;
+
+ /* allocate stripe */
+ stripep = Zalloc(sizeof (*stripep));
+
+ /* setup common */
+ stripep->common.namep = stripenp;
+ stripep->common.type = MD_DEVICE;
+
+ /* allocate and parse rows */
+ if (argc < 1) {
+ (void) mdmderror(ep, MDE_NROWS, meta_getminor(stripenp->dev),
+ uname);
+ goto out;
+ } else if ((sscanf(argv[0], "%u", &nrow) != 1) || ((int)nrow < 0)) {
+ goto syntax;
+ } else if (nrow < 1) {
+ (void) mdmderror(ep, MDE_NROWS, meta_getminor(stripenp->dev),
+ uname);
+ goto out;
+ }
+ --argc, ++argv;
+ stripep->rows.rows_len = nrow;
+ stripep->rows.rows_val =
+ Zalloc(nrow * sizeof (*stripep->rows.rows_val));
+ for (row = 0; (row < nrow); ++row) {
+ md_row_t *mdr = &stripep->rows.rows_val[row];
+ uint_t ncomp, comp;
+
+ /* allocate and parse components */
+ if (argc < 1) {
+ (void) mdmderror(ep, MDE_NROWS,
+ meta_getminor(stripenp->dev), uname);
+ goto out;
+ } else if ((sscanf(argv[0], "%u", &ncomp) != 1) ||
+ ((int)ncomp < 0)) {
+ goto syntax;
+ } else if (ncomp < 1) {
+ (void) mdmderror(ep, MDE_NCOMPS,
+ meta_getminor(stripenp->dev), uname);
+ goto out;
+ }
+ --argc, ++argv;
+ mdr->comps.comps_len = ncomp;
+ mdr->comps.comps_val =
+ Zalloc(ncomp * sizeof (*mdr->comps.comps_val));
+ for (comp = 0; (comp < ncomp); ++comp) {
+ md_comp_t *mdc = &mdr->comps.comps_val[comp];
+ mdname_t *compnp;
+
+ /* parse component name */
+ if (argc < 1) {
+ (void) mdmderror(ep, MDE_NCOMPS,
+ meta_getminor(stripenp->dev), uname);
+ goto out;
+ }
+ if ((compnp = metaname(spp, argv[0], ep)) == NULL) {
+ goto out;
+ }
+ /* check for soft partition */
+ if (meta_sp_issp(*spp, compnp, ep) != 0) {
+ /* check disk */
+ if (metachkcomp(compnp, ep) != 0) {
+ goto out;
+ }
+ }
+ mdc->compnamep = compnp;
+ --argc, ++argv;
+ }
+
+ /* parse row options */
+ old_optind = optind = 0;
+ opterr = 0;
+ while ((c = getopt(argc, argv, "i:")) != -1) {
+ switch (c) {
+ case 'i':
+ if (parse_interlace(uname, optarg,
+ &mdr->interlace, ep) != 0) {
+ goto out;
+ }
+ if (meta_stripe_check_interlace(mdr->interlace,
+ uname, ep))
+ goto out;
+ break;
+
+ default:
+ optind = old_optind; /* bomb out later */
+ goto done_row_opts;
+ }
+ old_optind = optind;
+ }
+done_row_opts:
+ argc -= optind;
+ argv += optind;
+ }
+
+ /* parse stripe options */
+ old_optind = optind = 0;
+ opterr = 0;
+ while ((c = getopt(argc, argv, "h:")) != -1) {
+ switch (c) {
+ case 'h':
+ if ((stripep->hspnamep = metahspname(spp, optarg,
+ ep)) == NULL) {
+ goto out;
+ }
+ break;
+
+ default:
+ argc += old_optind;
+ argv += old_optind;
+ goto options;
+ }
+ old_optind = optind;
+ }
+ argc -= optind;
+ argv += optind;
+
+ /* we should be at the end */
+ if (argc != 0)
+ goto syntax;
+
+ /* create stripe */
+ if (meta_create_stripe(*spp, stripep, options, ep) != 0)
+ goto out;
+ rval = 0; /* success */
+
+ /* let em know */
+ if (options & MDCMD_PRINT) {
+ (void) printf(dgettext(TEXT_DOMAIN,
+ "%s: Concat/Stripe is setup\n"),
+ uname);
+ (void) fflush(stdout);
+ }
+ goto out;
+
+ /* syntax error */
+syntax:
+ rval = meta_cook_syntax(ep, MDE_SYNTAX, uname, argc, argv);
+ goto out;
+
+ /* options error */
+options:
+ rval = meta_cook_syntax(ep, MDE_OPTION, uname, argc, argv);
+ goto out;
+
+ /* cleanup, return error */
+out:
+ if (stripep != NULL)
+ meta_free_stripe(stripep);
+ return (rval);
+}
+
+/*
+ * reset stripes
+ */
+int
+meta_stripe_reset(
+ mdsetname_t *sp,
+ mdname_t *stripenp,
+ mdcmdopts_t options,
+ md_error_t *ep
+)
+{
+ md_stripe_t *stripep;
+ int rval = -1;
+ int row, comp;
+
+ /* should have same set */
+ assert(sp != NULL);
+ assert((stripenp == NULL) ||
+ (sp->setno == MD_MIN2SET(meta_getminor(stripenp->dev))));
+
+ /* reset all stripes */
+ if (stripenp == NULL) {
+ mdnamelist_t *stripenlp = NULL;
+ mdnamelist_t *p;
+
+ /* for each stripe */
+ rval = 0;
+ if (meta_get_stripe_names(sp, &stripenlp, 0, ep) < 0)
+ return (-1);
+ for (p = stripenlp; (p != NULL); p = p->next) {
+ /* reset stripe */
+ stripenp = p->namep;
+
+ /*
+ * If this is a multi-node set, we send a series
+ * of individual metaclear commands.
+ */
+ if (meta_is_mn_set(sp, ep)) {
+ if (meta_mn_send_metaclear_command(sp,
+ stripenp->cname, options, 0, ep) != 0) {
+ rval = -1;
+ break;
+ }
+ } else {
+ if (meta_stripe_reset(sp, stripenp,
+ options, ep) != 0) {
+ rval = -1;
+ break;
+ }
+ }
+ }
+
+ /* cleanup, return success */
+ metafreenamelist(stripenlp);
+ return (rval);
+ }
+
+ /* check name */
+ if (metachkmeta(stripenp, ep) != 0)
+ return (-1);
+
+ /* get unit structure */
+ if ((stripep = meta_get_stripe(sp, stripenp, ep)) == NULL)
+ return (-1);
+
+ /* make sure nobody owns us */
+ if (MD_HAS_PARENT(stripep->common.parent)) {
+ return (mdmderror(ep, MDE_IN_USE, meta_getminor(stripenp->dev),
+ stripenp->cname));
+ }
+
+ /* clear subdevices cache */
+ if (invalidate_components(sp, stripenp, ep) != 0)
+ return (-1);
+
+ /* clear metadevice */
+ if (meta_reset(sp, stripenp, options, ep) != 0)
+ goto out;
+ rval = 0; /* success */
+
+ /* let em know */
+ if (options & MDCMD_PRINT) {
+ (void) printf(dgettext(TEXT_DOMAIN,
+ "%s: Concat/Stripe is cleared\n"),
+ stripenp->cname);
+ (void) fflush(stdout);
+ }
+
+ /* clear subdevices */
+ if (! (options & MDCMD_RECURSE))
+ goto out;
+
+ for (row = 0; (row < stripep->rows.rows_len); ++row) {
+ md_row_t *rp = &stripep->rows.rows_val[row];
+ for (comp = 0; (comp < rp->comps.comps_len); ++comp) {
+ md_comp_t *cp = &rp->comps.comps_val[comp];
+ mdname_t *compnp = cp->compnamep;
+
+ /* only recurse on metadevices */
+ if (! metaismeta(compnp))
+ continue;
+
+ if (meta_reset_by_name(sp, compnp, options, ep) != 0)
+ rval = -1;
+ }
+ }
+
+ /* cleanup, return success */
+out:
+ meta_invalidate_name(stripenp);
+ return (rval);
+}
+
+/*
+ * reports TRUE if any stripe component is in error
+ */
+int
+meta_stripe_anycomp_is_err(mdsetname_t *sp, mdnamelist_t *stripe_names)
+{
+ mdnamelist_t *nlp;
+ md_error_t status = mdnullerror;
+ md_error_t *ep = &status;
+ int any_errs = FALSE;
+
+ for (nlp = stripe_names; nlp; nlp = nlp->next) {
+ md_stripe_t *stripep;
+ int row;
+
+ if ((stripep = meta_get_stripe(sp, nlp->namep, ep)) == NULL) {
+ any_errs |= TRUE;
+ goto out;
+ }
+
+ for (row = 0; row < stripep->rows.rows_len; ++row) {
+ md_row_t *rp = &stripep->rows.rows_val[row];
+ uint_t comp;
+
+ for (comp = 0; comp < rp->comps.comps_len; ++comp) {
+ md_comp_t *cp = &rp->comps.comps_val[comp];
+
+ if (cp->state != CS_OKAY) {
+ any_errs |= TRUE;
+ goto out;
+ }
+ }
+ }
+ }
+out:
+ if (!mdisok(ep))
+ mdclrerror(ep);
+
+ return (any_errs);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_systemfile.c b/usr/src/lib/lvm/libmeta/common/meta_systemfile.c
new file mode 100644
index 0000000000..9e5e20f057
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_systemfile.c
@@ -0,0 +1,475 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2003 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * Just in case we're not in a build environment, make sure that
+ * TEXT_DOMAIN gets set to something.
+ */
+#if !defined(TEXT_DOMAIN)
+#define TEXT_DOMAIN "SYS_TEST"
+#endif
+
+/*
+ * patch /kernel/drv/md.conf file
+ */
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <meta.h>
+#include <sys/lvm/md_mddb.h>
+
+/*
+ * magic strings in system
+ */
+#define BEGROOTSTR "* Begin MDD root info (do not edit)\n"
+#define ENDROOTSTR "* End MDD root info (do not edit)\n"
+#define BEGMDDBSTR "# Begin MDD database info (do not edit)\n"
+#define ENDMDDBSTR "# End MDD database info (do not edit)\n"
+
+/*
+ * copy system file, yank root and database lines
+ */
+int
+meta_systemfile_copy(
+ char *sname, /* system file name */
+ int doroot, /* remove mdd root stuff */
+ int domddb, /* remove mdd database stuff */
+ int doit, /* really copy file */
+ int verbose, /* show what we're doing */
+ char **tname, /* returned temp file name */
+ FILE **tfp, /* returned open FILE */
+ md_error_t *ep /* returned error */
+)
+{
+ FILE *fp;
+ struct stat sbuf;
+ char buf[MDDB_BOOTLIST_MAX_LEN];
+ int delroot = 0;
+ int delmddb = 0;
+
+ /* check names */
+ assert(sname != NULL);
+ assert(tname != NULL);
+ assert(tfp != NULL);
+
+ /* get temp name */
+ *tfp = NULL;
+ *tname = Malloc(strlen(sname) + strlen(".tmp") + 1);
+ (void) strcpy(*tname, sname);
+ (void) strcat(*tname, ".tmp");
+
+ /* copy system file, yank stuff */
+ if (((fp = fopen(sname, "r")) == NULL) ||
+ (fstat(fileno(fp), &sbuf) != 0)) {
+ if (errno != ENOENT) {
+ (void) mdsyserror(ep, errno, sname);
+ goto out;
+ }
+ }
+ if (doit) {
+ if ((*tfp = fopen(*tname, "w")) == NULL) {
+ /*
+ * If we are on the miniroot we need to create
+ * files in /var/tmp. Opening a writable file
+ * in the miniroot result is EROFS error.
+ */
+ if (errno != EROFS) {
+ (void) mdsyserror(ep, errno, *tname);
+ goto out;
+ }
+ Free(*tname);
+ *tname = tempnam("/var/tmp", "svm_");
+ if (*tname == NULL) {
+ (void) mdsyserror(ep, errno, NULL);
+ goto out;
+ }
+ if ((*tfp = fopen(*tname, "w")) == NULL) {
+ (void) mdsyserror(ep, errno, *tname);
+ goto out;
+ }
+ }
+ if (fp != NULL) {
+ if ((fchmod(fileno(*tfp), (sbuf.st_mode & 0777))
+ != 0) ||
+ (fchown(fileno(*tfp), sbuf.st_uid, sbuf.st_gid)
+ != 0)) {
+ (void) mdsyserror(ep, errno, *tname);
+ goto out;
+ }
+ }
+ }
+ if (verbose) {
+ (void) printf(dgettext(TEXT_DOMAIN,
+ "Delete the following lines from %s:\n\n"), sname);
+ }
+ while ((fp != NULL) && (fgets(buf, sizeof (buf), fp) != NULL)) {
+ if ((doroot) && (strcmp(buf, BEGROOTSTR) == 0)) {
+ delroot = 1;
+ if (verbose)
+ (void) printf("%s", buf);
+ continue;
+ }
+ if (delroot) {
+ if (strcmp(buf, ENDROOTSTR) == 0)
+ delroot = 0;
+ if (verbose)
+ (void) printf("%s", buf);
+ continue;
+ }
+ if ((domddb) && (strcmp(buf, BEGMDDBSTR) == 0)) {
+ delmddb = 1;
+ if (verbose)
+ (void) printf("%s", buf);
+ continue;
+ }
+ if (delmddb) {
+ if (strcmp(buf, ENDMDDBSTR) == 0)
+ delmddb = 0;
+ if (verbose)
+ (void) printf("%s", buf);
+ continue;
+ }
+ if (doit) {
+ if (fputs(buf, *tfp) == EOF) {
+ (void) mdsyserror(ep, errno, *tname);
+ goto out;
+ }
+ }
+ }
+ if (fp != NULL) {
+ if ((! feof(fp)) ||
+ (fclose(fp) != 0)) {
+ (void) mdsyserror(ep, errno, sname);
+ goto out;
+ }
+ fp = NULL;
+ }
+ if (verbose)
+ (void) printf("\n");
+
+ /* make sure we didn't stop mid-delete */
+ if ((delroot) || (delmddb)) {
+ (void) mderror(ep, MDE_SYSTEM_FILE, sname);
+ goto out;
+ }
+
+ /* flush stuff */
+ if (doit) {
+ if ((fflush(*tfp) != 0) ||
+ (fsync(fileno(*tfp)) != 0)) {
+ (void) mdsyserror(ep, errno, *tname);
+ goto out;
+ }
+ }
+
+ /* return success */
+ return (0);
+
+ /* cleanup, return error */
+out:
+ if (fp != NULL)
+ (void) fclose(fp);
+ if (*tname != NULL) {
+ (void) unlink(*tname);
+ Free(*tname);
+ }
+ if (*tfp != NULL)
+ (void) fclose(*tfp);
+ return (-1);
+}
+
+/*
+ * append root on MD lines to system
+ */
+int
+meta_systemfile_append_mdroot(
+ mdname_t *rootnp, /* root device name */
+ char *sname, /* system file name */
+ char *tname, /* temp file name */
+ FILE *tfp, /* temp FILE */
+ int ismeta, /* is a metadevice */
+ int doit, /* really patch file */
+ int verbose, /* show what we're doing */
+ md_error_t *ep
+)
+{
+ char *longblkname;
+
+ /* check names */
+ assert(sname != NULL);
+ assert(tname != NULL);
+ assert(!doit || tfp != NULL);
+
+ /* get root /devices name */
+ if ((longblkname = metagetdevicesname(rootnp, ep)) == NULL)
+ return (-1);
+
+ /* add header */
+ if (verbose) {
+ (void) printf(dgettext(TEXT_DOMAIN,
+ "Add the following lines to %s:\n\n"), sname);
+ (void) printf("%s", BEGROOTSTR);
+ }
+ if (doit) {
+ if (fprintf(tfp, "%s", BEGROOTSTR) == EOF) {
+ return (mdsyserror(ep, errno, tname));
+ }
+ }
+
+ /* add rootdev */
+ if (ismeta) {
+ if (verbose)
+ (void) printf("rootdev:%s\n", longblkname);
+ if (doit) {
+ if (fprintf(tfp, "rootdev:%s\n", longblkname) == EOF) {
+ return (mdsyserror(ep, errno, tname));
+ }
+ }
+ }
+
+ /* add trailer */
+ if (verbose) {
+ (void) printf("%s\n", ENDROOTSTR);
+ }
+ if (doit) {
+ if (fprintf(tfp, "%s", ENDROOTSTR) == EOF) {
+ return (mdsyserror(ep, errno, tname));
+ }
+ }
+
+ /* flush stuff */
+ if (doit) {
+ if ((fflush(tfp) != 0) ||
+ (fsync(fileno(tfp)) != 0)) {
+ return (mdsyserror(ep, errno, tname));
+ }
+ }
+
+ /* return success */
+ return (0);
+}
+
+/*
+ * parse mddb.cf line
+ *
+ * Caller of this routine needs to free the device id string that
+ * is passed back during a successful return.
+ */
+static int
+confline(
+ char *line, /* line in file */
+ char **driver, /* returned driver name */
+ minor_t *mnump, /* returned minor number */
+ daddr_t *block, /* returned block offset */
+ char **devid_char_pp /* returned device id string */
+)
+{
+ char *p = line;
+ int chksum = 0;
+ int i;
+ uint_t devid_size;
+
+ if (*p == '#') {
+ return (-1);
+ }
+ *driver = p;
+ while ((*p != ' ') && (*p != '\t'))
+ chksum += *p++;
+ if (*driver == p) {
+ return (-1);
+ }
+ *p++ = '\0';
+ *mnump = strtoul(p, &p, 10);
+ chksum += *mnump;
+ *block = strtol(p, &p, 10);
+ chksum += *block;
+
+ /* parse out devid */
+ while ((*p == ' ') || (*p == '\t')) {
+ p++;
+ }
+ i = strcspn(p, " \t");
+ *devid_char_pp = Malloc(i+1);
+ (void) strncpy(*devid_char_pp, p, i);
+ (*devid_char_pp)[i] = '\0';
+ devid_size = i;
+ p += devid_size;
+ for (i = 0; i < devid_size; i++) {
+ chksum += (*devid_char_pp)[i];
+ }
+
+ chksum += strtol(p, &p, 10);
+ if (chksum != 42) {
+ Free (*devid_char_pp);
+ devid_char_pp = NULL;
+ return (-1);
+ }
+ return (0);
+}
+
+/*
+ * append MDDB lines to system
+ */
+int
+meta_systemfile_append_mddb(
+ char *cname, /* mddb.cf file name */
+ char *sname, /* system file name */
+ char *tname, /* temp file name */
+ FILE *tfp, /* temp FILE */
+ int doit, /* really patch file */
+ int verbose, /* show what we're doing */
+ md_error_t *ep /* returned error */
+)
+{
+ FILE *cfp = NULL;
+ char buf[1024];
+ char *p;
+ int i;
+ char *driver;
+ minor_t mnum;
+ daddr_t block;
+ char line[MDDB_BOOTLIST_MAX_LEN];
+ char entry[MDDB_BOOTLIST_MAX_LEN];
+ char *devid_char_p = NULL;
+ struct stat statbuf;
+
+ /* check names */
+ assert(cname != NULL);
+ assert(sname != NULL);
+ assert(tname != NULL);
+ assert(!doit || tfp != NULL);
+
+ /* open database conf file */
+ if ((cfp = fopen(cname, "r")) == NULL) {
+ (void) mdsyserror(ep, errno, cname);
+ goto out;
+ }
+ /* Check that it is an ordinary file */
+ if (stat(cname, &statbuf) != 0) {
+ (void) mdsyserror(ep, errno, cname);
+ goto out;
+ }
+ if ((statbuf.st_mode & S_IFMT) != S_IFREG) {
+ (void) mderror(ep, MDE_MDDB_FILE, cname);
+ goto out;
+ }
+
+ /* add header */
+ if (verbose) {
+ (void) printf(dgettext(TEXT_DOMAIN,
+ "Add the following lines to %s:\n\n"), sname);
+ (void) printf("%s", BEGMDDBSTR);
+ }
+ if (doit) {
+ if (fprintf(tfp, "%s", BEGMDDBSTR) == EOF) {
+ (void) mdsyserror(ep, errno, tname);
+ goto out;
+ }
+ }
+
+ /* append database lines */
+ while (((p = fgets(buf, sizeof (buf), cfp)) != NULL) &&
+ (confline(buf, &driver, &mnum, &block, &devid_char_p) != 0))
+ ;
+ for (i = 1; ((p != NULL) && (i <= MDDB_MAX_PATCH)); ++i) {
+ (void) snprintf(line, sizeof (line),
+ "mddb_bootlist%d=\"%s:%lu:%ld:%s",
+ i, driver, mnum, block, devid_char_p);
+ if (devid_char_p != NULL) {
+ free(devid_char_p);
+ devid_char_p = NULL;
+ }
+
+ while ((p = fgets(buf, sizeof (buf), cfp)) != NULL) {
+ if (confline(buf, &driver, &mnum, &block,
+ &devid_char_p) != 0) {
+ continue;
+ }
+ (void) snprintf(entry, sizeof (entry), " %s:%lu:%ld:%s",
+ driver, mnum, block, devid_char_p);
+
+ if ((strlen(line) + strlen(entry) + 4) > sizeof (line))
+ break;
+ (void) strcat(line, entry);
+ if (devid_char_p != NULL) {
+ free(devid_char_p);
+ devid_char_p = NULL;
+ }
+ }
+ if (verbose)
+ /* CSTYLED */
+ (void) printf("%s\";\n", line);
+ if (doit) {
+ /* CSTYLED */
+ if (fprintf(tfp, "%s\";\n", line) <= 0) {
+ (void) mdsyserror(ep, errno, tname);
+ goto out;
+ }
+ }
+ }
+
+ if (devid_char_p != NULL) {
+ free(devid_char_p);
+ devid_char_p = NULL;
+ }
+
+ /* add trailer */
+ if (verbose)
+ (void) printf("%s\n", ENDMDDBSTR);
+ if (doit) {
+ if (fprintf(tfp, "%s", ENDMDDBSTR) == EOF) {
+ (void) mdsyserror(ep, errno, tname);
+ goto out;
+ }
+ }
+
+ /* close database conf file */
+ if (fclose(cfp) != 0) {
+ cfp = NULL;
+ (void) mdsyserror(ep, errno, cname);
+ goto out;
+ }
+ cfp = NULL;
+
+ /* flush stuff */
+ if (doit) {
+ if ((fflush(tfp) != 0) ||
+ (fsync(fileno(tfp)) != 0)) {
+ (void) mdsyserror(ep, errno, tname);
+ goto out;
+ }
+ }
+
+ /* return success */
+ return (0);
+
+ /* cleanup, return error */
+out:
+ if (cfp != NULL)
+ (void) fclose(cfp);
+ return (-1);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_tab.c b/usr/src/lib/lvm/libmeta/common/meta_tab.c
new file mode 100644
index 0000000000..7e1ed32a6b
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_tab.c
@@ -0,0 +1,342 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * Just in case we're not in a build environment, make sure that
+ * TEXT_DOMAIN gets set to something.
+ */
+#if !defined(TEXT_DOMAIN)
+#define TEXT_DOMAIN "SYS_TEST"
+#endif
+
+#include <meta.h>
+
+#include <ctype.h>
+
+/*
+ * free md.tab struct
+ */
+void
+meta_tab_free(
+ md_tab_t *tabp
+)
+{
+ size_t line;
+
+ Free(tabp->filename);
+ Free(tabp->data);
+ if (tabp->lines != NULL) {
+ assert(tabp->alloc > 0);
+ for (line = 0; (line < tabp->nlines); ++line) {
+ md_tab_line_t *linep = &tabp->lines[line];
+
+ if (linep->context != NULL)
+ Free(linep->context);
+ if (linep->cname != NULL)
+ Free(linep->cname);
+ if (linep->argv != NULL) {
+ assert(linep->alloc > 0);
+ Free(linep->argv);
+ }
+ }
+ Free(tabp->lines);
+ }
+ Free(tabp);
+}
+
+/*
+ * (re)allocate argv array
+ */
+static void
+realloc_argv(
+ md_tab_line_t *linep,
+ size_t argc
+)
+{
+ /* allocate in chunks */
+ argc = roundup(argc, TAB_ARG_ALLOC);
+ if (argc < linep->alloc)
+ return;
+
+ /* (re)allocate */
+ if (linep->alloc == 0) {
+ linep->argv = Malloc(argc * sizeof (*linep->argv));
+ } else {
+ assert(linep->argv != NULL);
+ linep->argv =
+ Realloc(linep->argv, (argc * sizeof (*linep->argv)));
+ }
+
+ /* zero out new stuff */
+ (void) memset(&linep->argv[linep->alloc], 0,
+ ((argc - linep->alloc) * sizeof (*linep->argv)));
+
+ /* adjust for new size */
+ linep->alloc = argc;
+}
+
+/*
+ * (re)allocate line array
+ */
+static void
+realloc_lines(
+ md_tab_t *tabp,
+ size_t nlines
+)
+{
+ /* allocate in chunks */
+ nlines = roundup(nlines, TAB_LINE_ALLOC);
+ if (nlines < tabp->alloc)
+ return;
+
+ /* (re)allocate */
+ if (tabp->alloc == 0) {
+ assert(tabp->lines == NULL);
+ tabp->lines = Malloc(nlines * sizeof (*tabp->lines));
+ } else {
+ assert(tabp->lines != NULL);
+ tabp->lines =
+ Realloc(tabp->lines, (nlines * sizeof (*tabp->lines)));
+ }
+
+ /* zero out new stuff */
+ (void) memset(&tabp->lines[tabp->alloc], 0,
+ ((nlines - tabp->alloc) * sizeof (*tabp->lines)));
+
+ /* adjust for new size */
+ tabp->alloc = nlines;
+}
+
+/*
+ * parse up md.tab struct
+ */
+static void
+parse_tab(
+ md_tab_t *tabp
+)
+{
+ uint_t lineno = 1;
+ char *p = tabp->data;
+ char *e = tabp->data + tabp->total - 1;
+ char *context;
+ size_t len;
+
+ /* we can count on '\n\0' as the last characters */
+ assert(tabp->total >= 2);
+ assert(tabp->data[tabp->total - 2] == '\n');
+ assert(tabp->data[tabp->total - 1] == '\0');
+
+ /* allocate context buffer "file line XXX" */
+ assert(tabp->filename != NULL);
+ len = strlen(tabp->filename) +
+ strlen(dgettext(TEXT_DOMAIN, "%s line %u")) + 20 + 1;
+ context = Malloc(len);
+
+ /* parse lines */
+ while (p < e) {
+ md_tab_line_t *linep;
+ char *t;
+
+ /* allocate new line */
+ realloc_lines(tabp, (tabp->nlines + 1));
+ linep = &tabp->lines[tabp->nlines];
+ (void) snprintf(context, len,
+ dgettext(TEXT_DOMAIN, "%s line %u"), tabp->filename,
+ lineno);
+
+ /* comments */
+ if (*p == '#') {
+ while (*p != '\n')
+ ++p;
+ }
+
+ /* coalesce \ continuations */
+ t = p;
+ while (*t != '\n') {
+ if ((*t == '\\') && (*(t + 1) == '\n')) {
+ *t++ = ' ';
+ *t = ' ';
+ ++lineno;
+ }
+ ++t;
+ }
+
+ /* leading whitespace */
+ while ((*p != '\n') && (isspace(*p)))
+ ++p;
+
+ /* count lines */
+ if (*p == '\n') {
+ ++p;
+ ++lineno;
+ continue;
+ }
+
+ /* tokenize line */
+ while ((p < e) && (*p != '\n')) {
+ char **argvp;
+
+ /* allocate new token */
+ realloc_argv(linep, (linep->argc + 1));
+ argvp = &linep->argv[linep->argc++];
+
+ /* find end of token */
+ *argvp = p;
+ while ((*p != '\n') && (! isspace(*p)))
+ ++p;
+
+ /* terminate */
+ if (*p == '\n') {
+ *p++ = '\0';
+ ++lineno;
+ break;
+ }
+
+ /* eat white space */
+ *p++ = '\0';
+ while ((p < e) && (*p != '\n') && (isspace(*p)))
+ ++p;
+ }
+ tabp->nlines++;
+
+ /* fill in the rest */
+ assert((linep->argc > 0) && (linep->argv != NULL) &&
+ (linep->argv[0][0] != '\0') &&
+ (! isspace(linep->argv[0][0])));
+ linep->context = Strdup(context);
+ linep->type = meta_get_init_type(linep->argc, linep->argv);
+ linep->cname = Strdup(meta_canonicalize(NULL, linep->argv[0]));
+ assert(linep->cname != NULL);
+ }
+
+ /* cleanup */
+ Free(context);
+}
+
+/*
+ * read in md.tab file and return struct
+ */
+md_tab_t *
+meta_tab_parse(
+ char *filename,
+ md_error_t *ep
+)
+{
+ md_tab_t *tabp = NULL;
+ int fd = -1;
+ struct stat statbuf;
+ size_t sofar;
+ char *p;
+
+ /* open tab file */
+ if (filename == NULL)
+ filename = METATAB;
+ if ((fd = open(filename, O_RDONLY, 0)) < 0) {
+ (void) mdsyserror(ep, errno, filename);
+ goto out;
+ }
+ if (fstat(fd, &statbuf) != 0) {
+ (void) mdsyserror(ep, errno, filename);
+ goto out;
+ }
+
+ /* allocate table */
+ tabp = Zalloc(sizeof (*tabp));
+ tabp->filename = Strdup(filename);
+ tabp->total = statbuf.st_size + 2; /* terminating "\n\0" */
+ tabp->data = Malloc(tabp->total);
+
+ /* read in data */
+ sofar = 0;
+ p = tabp->data;
+ while (sofar < statbuf.st_size) {
+ int cnt;
+
+ if ((cnt = read(fd, p, 8192)) < 0) {
+ (void) mdsyserror(ep, errno, filename);
+ goto out;
+ } else if (cnt == 0) {
+ (void) mderror(ep, MDE_SYNTAX, filename);
+ goto out;
+ }
+ sofar += cnt;
+ p += cnt;
+ }
+ tabp->data[tabp->total - 2] = '\n';
+ tabp->data[tabp->total - 1] = '\0';
+
+ /* close file */
+ if (close(fd) != 0) {
+ (void) mdsyserror(ep, errno, filename);
+ fd = -1;
+ goto out;
+ }
+ fd = -1;
+
+ /* parse it up */
+ parse_tab(tabp);
+
+ /* return success */
+ return (tabp);
+
+ /* cleanup, return error */
+out:
+ if (fd >= 0)
+ (void) close(fd);
+ if (tabp != NULL)
+ meta_tab_free(tabp);
+ return (NULL);
+}
+
+/*
+ * find line in md.tab
+ */
+md_tab_line_t *
+meta_tab_find(
+ mdsetname_t *sp,
+ md_tab_t *tabp,
+ char *name,
+ mdinittypes_t type
+)
+{
+ char *cname = meta_canonicalize(sp, name);
+ size_t line;
+
+ for (line = 0; (line < tabp->nlines); ++line) {
+ md_tab_line_t *linep = &tabp->lines[line];
+
+ assert((linep->argc > 0) && (linep->argv[0] != NULL));
+ if (((linep->type & type) != 0) &&
+ (strcmp(linep->cname, cname) == 0)) {
+ Free(cname);
+ return (linep);
+ }
+ }
+ Free(cname);
+ return (NULL);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_time.c b/usr/src/lib/lvm/libmeta/common/meta_time.c
new file mode 100644
index 0000000000..ace6483a08
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_time.c
@@ -0,0 +1,53 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2002 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * 32-bit only version of gettimeofday
+ */
+
+#include <sys/time.h>
+#include <sys/types32.h>
+#include <meta.h>
+
+int
+meta_gettimeofday(md_timeval32_t *tv32)
+{
+ struct timeval tv;
+ int retval;
+
+ if (tv32 == NULL)
+ return (0);
+
+ if ((retval = gettimeofday(&tv, NULL)) == 0) {
+ tv32->tv_sec = (time32_t)tv.tv_sec;
+ tv32->tv_usec = (int32_t)tv.tv_usec;
+ return (0);
+ }
+
+ return (retval);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_trans.c b/usr/src/lib/lvm/libmeta/common/meta_trans.c
new file mode 100644
index 0000000000..e350e2d2d5
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_trans.c
@@ -0,0 +1,1761 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * Just in case we're not in a build environment, make sure that
+ * TEXT_DOMAIN gets set to something.
+ */
+#if !defined(TEXT_DOMAIN)
+#define TEXT_DOMAIN "SYS_TEST"
+#endif
+
+/*
+ * trans operations
+ */
+
+#include <meta.h>
+#include <meta_basic.h>
+#include <sys/lvm/md_trans.h>
+#include <sys/wait.h>
+#include <sys/mnttab.h>
+#include <stddef.h>
+
+extern char *getfullblkname();
+
+/*
+ * replace trans
+ */
+
+int
+meta_trans_replace(mdsetname_t *sp, mdname_t *transnp, mdname_t *oldnp,
+ mdname_t *newnp, mdcmdopts_t options, md_error_t *ep)
+{
+ replace_params_t params;
+ md_dev64_t old_dev,
+ new_dev;
+ daddr_t new_start_blk,
+ new_end_blk;
+
+ /* should have same set */
+ assert(sp != NULL);
+ assert(sp->setno == MD_MIN2SET(meta_getminor(transnp->dev)));
+
+ new_dev = newnp->dev;
+ new_start_blk = newnp->start_blk;
+ new_end_blk = newnp->end_blk;
+
+ meta_invalidate_name(transnp);
+ /* the old device binding is now established */
+ if ((old_dev = oldnp->dev) == NODEV64)
+ return (mdsyserror(ep, ENODEV, oldnp->cname));
+
+ if (((strcmp(oldnp->rname, newnp->rname) == 0) &&
+ (old_dev != new_dev))) {
+ newnp->dev = new_dev;
+ newnp->start_blk = new_start_blk;
+ newnp->end_blk = new_end_blk;
+ }
+
+ if (add_key_name(sp, newnp, NULL, ep) != 0)
+ return (-1);
+
+ (void) memset(&params, 0, sizeof (params));
+ params.mnum = meta_getminor(transnp->dev);
+ MD_SETDRIVERNAME(&params, MD_TRANS, sp->setno);
+
+ params.cmd = REPLACE_COMP;
+ params.old_dev = old_dev;
+ params.new_dev = new_dev;
+ params.new_key = newnp->key;
+ if (metaioctl(MD_IOCREPLACE, &params, &params.mde, NULL) != 0) {
+ (void) del_key_name(sp, newnp, ep);
+ return (mdstealerror(ep, &params.mde));
+ }
+ meta_invalidate_name(oldnp);
+ meta_invalidate_name(newnp);
+ meta_invalidate_name(transnp);
+
+ if (options & MDCMD_PRINT) {
+ (void) printf(dgettext(TEXT_DOMAIN,
+ "%s: device %s is replaced with %s\n"),
+ transnp->cname, oldnp->cname, newnp->cname);
+ }
+ return (0);
+}
+
+
+
+/*
+ * FUNCTION: meta_get_trans_names()
+ * INPUT: sp - the set name to get trans from
+ * options - options from the command line
+ * OUTPUT: nlpp - list of all trans names
+ * ep - return error pointer
+ * RETURNS: int - -1 if error, 0 success
+ * PURPOSE: returns a list of all trans in the metadb
+ * for all devices in the specified set
+ */
+int
+meta_get_trans_names(
+ mdsetname_t *sp,
+ mdnamelist_t **nlpp,
+ int options,
+ md_error_t *ep
+)
+{
+ return (meta_get_names(MD_TRANS, sp, nlpp, options, ep));
+}
+
+/*
+ * free trans unit
+ */
+void
+meta_free_trans(
+ md_trans_t *transp
+)
+{
+ Free(transp);
+}
+
+/*
+ * get trans (common)
+ */
+md_trans_t *
+meta_get_trans_common(
+ mdsetname_t *sp,
+ mdname_t *transnp,
+ int fast,
+ md_error_t *ep
+)
+{
+ mddrivename_t *dnp = transnp->drivenamep;
+ char *miscname;
+ mt_unit_t *mt;
+ md_trans_t *transp;
+ int gotlog;
+
+ /* must have set */
+ assert(sp != NULL);
+ assert(sp->setno == MD_MIN2SET(meta_getminor(transnp->dev)));
+
+ /* short circuit */
+ if (dnp->unitp != NULL) {
+ assert(dnp->unitp->type == MD_METATRANS);
+ return ((md_trans_t *)dnp->unitp);
+ }
+
+ /* get miscname and unit */
+ if ((miscname = metagetmiscname(transnp, ep)) == NULL)
+ return (NULL);
+ if (strcmp(miscname, MD_TRANS) != 0) {
+ (void) mdmderror(ep, MDE_NOT_MT,
+ meta_getminor(transnp->dev), transnp->cname);
+ return (NULL);
+ }
+ if ((mt = (mt_unit_t *)meta_get_mdunit(sp, transnp, ep)) == NULL)
+ return (NULL);
+ assert(mt->c.un_type == MD_METATRANS);
+
+ /* allocate trans */
+ transp = Zalloc(sizeof (*transp));
+
+ /* get common info */
+ transp->common.namep = transnp;
+ transp->common.type = mt->c.un_type;
+ transp->common.state = mt->c.un_status;
+ transp->common.capabilities = mt->c.un_capabilities;
+ transp->common.parent = mt->c.un_parent;
+ transp->common.size = mt->c.un_total_blocks;
+ transp->common.user_flags = mt->c.un_user_flags;
+ transp->common.revision = mt->c.un_revision;
+
+ /* get master */
+ transp->masternamep = metakeyname(&sp, mt->un_m_key, fast, ep);
+ if (transp->masternamep == NULL)
+ goto out;
+
+ /* get log */
+ gotlog = ((mt->un_flags & TRANS_DETACHED) == 0);
+ if (gotlog) {
+ daddr_t sblk;
+
+ transp->lognamep = metakeyname(&sp, mt->un_l_key, fast, ep);
+ if (transp->lognamep == NULL)
+ goto out;
+
+ /* calculate the kernels start block */
+ sblk = mt->un_l_pwsblk + mt->un_l_maxtransfer;
+
+ if (getenv("META_DEBUG_START_BLK") != NULL) {
+ if (metagetstart(sp, transp->lognamep, ep) ==
+ MD_DISKADDR_ERROR)
+ mdclrerror(ep);
+
+ if (transp->lognamep->start_blk > sblk)
+ md_eprintf(dgettext(TEXT_DOMAIN,
+ "%s: suspected bad start block [trans]\n"),
+ transp->lognamep->cname);
+ }
+
+ /* override any start_blk */
+ transp->lognamep->start_blk = sblk;
+ }
+
+ /* get flags, etc. */
+ transp->flags = mt->un_flags;
+ transp->timestamp = mt->un_timestamp;
+ transp->log_error = mt->un_l_error;
+ transp->log_timestamp = mt->un_l_timestamp;
+ transp->log_size = mt->un_l_nblks;
+ transp->debug = mt->un_debug;
+
+ /* cleanup, return success */
+ Free(mt);
+ dnp->unitp = (md_common_t *)transp;
+ return (transp);
+
+ /* cleanup, return error */
+out:
+ Free(mt);
+ meta_free_trans(transp);
+ return (NULL);
+}
+
+/*
+ * get trans
+ */
+md_trans_t *
+meta_get_trans(
+ mdsetname_t *sp,
+ mdname_t *transnp,
+ md_error_t *ep
+)
+{
+ return (meta_get_trans_common(sp, transnp, 0, ep));
+}
+
+/*
+ * check trans for dev
+ */
+static int
+in_trans(
+ mdsetname_t *sp,
+ mdname_t *transnp,
+ mdname_t *np,
+ mdchkopts_t options,
+ diskaddr_t slblk,
+ diskaddr_t nblks,
+ md_error_t *ep
+)
+{
+ md_trans_t *transp;
+ mdname_t *masternp;
+ mdname_t *lognp;
+
+ /* should be in the same set */
+ assert(sp != NULL);
+ assert(sp->setno == MD_MIN2SET(meta_getminor(transnp->dev)));
+
+ /* get unit */
+ if ((transp = meta_get_trans(sp, transnp, ep)) == NULL)
+ return (-1);
+
+ /* check master */
+ masternp = transp->masternamep;
+ if ((! metaismeta(masternp)) &&
+ (meta_check_overlap(transnp->cname, np, slblk, nblks,
+ masternp, 0, -1, ep) != 0)) {
+ return (-1);
+ }
+
+ /* check log */
+ if (((lognp = transp->lognamep) != NULL) &&
+ (! (options & MDCHK_ALLOW_LOG)) &&
+ (! metaismeta(lognp))) {
+ daddr_t log_start;
+ int err;
+
+ /* check same drive since metagetstart() can fail */
+ if ((err = meta_check_samedrive(np, lognp, ep)) < 0)
+ return (-1);
+
+ /* check overlap */
+ if (err != 0) {
+ if ((log_start = metagetstart(sp, lognp, ep)) ==
+ MD_DISKADDR_ERROR)
+ return (-1);
+ if (meta_check_overlap(transnp->cname, np, slblk,
+ nblks, lognp, log_start, -1, ep) != 0) {
+ return (-1);
+ }
+ }
+ }
+
+ /* return success */
+ return (0);
+}
+
+/*
+ * check to see if we're in a trans
+ */
+int
+meta_check_intrans(
+ mdsetname_t *sp,
+ mdname_t *np,
+ mdchkopts_t options,
+ diskaddr_t slblk,
+ diskaddr_t nblks,
+ md_error_t *ep
+)
+{
+ mdnamelist_t *transnlp = NULL;
+ mdnamelist_t *p;
+ int rval = 0;
+
+ /* should have a set */
+ assert(sp != NULL);
+
+ /* for each trans */
+ if (meta_get_trans_names(sp, &transnlp, 0, ep) < 0)
+ return (-1);
+ for (p = transnlp; (p != NULL); p = p->next) {
+ mdname_t *transnp = p->namep;
+
+ /* check trans */
+ if (in_trans(sp, transnp, np, options, slblk, nblks, ep) != 0) {
+ rval = -1;
+ break;
+ }
+ }
+
+ /* cleanup, return success */
+ metafreenamelist(transnlp);
+ return (rval);
+}
+
+/*
+ * check master
+ */
+int
+meta_check_master(
+ mdsetname_t *sp,
+ mdname_t *np,
+ int force,
+ md_error_t *ep
+)
+{
+ mdchkopts_t options = 0;
+ md_common_t *mdp;
+
+ /* make sure we have a disk */
+ if (metachkdisk(np, ep) != 0)
+ return (-1);
+
+ /* check to ensure that it is not already in use */
+ if ((!force) && meta_check_inuse(sp, np, MDCHK_INUSE, ep) != 0) {
+ return (-1);
+ }
+
+ /* make sure it is in the set */
+ if (meta_check_inset(sp, np, ep) != 0)
+ return (-1);
+
+ /* make sure its not in a metadevice */
+ if (! metaismeta(np)) { /* Non-metadevices */
+ if (meta_check_inmeta(sp, np, options, 0, -1, ep) != 0)
+ return (-1);
+ } else { /* Metadevices only! */
+ if ((mdp = meta_get_unit(sp, np, ep)) == NULL)
+ return (-1);
+
+ /*
+ * Since soft partitions may appear at the top or bottom
+ * of the metadevice stack, we check them separately.
+ * A trans may be built on top of a soft partition if
+ * the soft partition has no parent (can't rely on the
+ * MD_CAN_PARENT flag in this case since a soft partition
+ * built on a metadevice clears this flag to prevent nested
+ * configurations).
+ */
+ if ((meta_sp_issp(sp, np, ep) == 0) &&
+ (mdp->parent == MD_NO_PARENT))
+ return (0);
+
+ if ((! (mdp->capabilities & MD_CAN_PARENT)) ||
+ (mdp->parent != MD_NO_PARENT)) {
+ return (mdmderror(ep, MDE_INVAL_UNIT,
+ meta_getminor(np->dev), np->cname));
+ }
+ }
+
+ /* return success */
+ return (0);
+}
+
+/*
+ * check log
+ */
+int
+meta_check_log(
+ mdsetname_t *sp,
+ mdname_t *np,
+ md_error_t *ep
+)
+{
+ mdchkopts_t options = (MDCHK_ALLOW_MDDB | MDCHK_ALLOW_LOG);
+ md_common_t *mdp;
+
+ /* make sure we have a disk */
+ if (metachkdisk(np, ep) != 0)
+ return (-1);
+
+ /* check to ensure that it is not already in use */
+ if (meta_check_inuse(sp, np, MDCHK_INUSE, ep) != 0) {
+ return (-1);
+ }
+
+ /* make sure it is in the set */
+ if (meta_check_inset(sp, np, ep) != 0)
+ return (-1);
+
+ /* make sure its not in a metadevice */
+ if (! metaismeta(np)) { /* Non-metadevices */
+ if (meta_check_inmeta(sp, np, options, 0, -1, ep) != 0)
+ return (-1);
+ } else { /* Metadevices only! */
+ if ((mdp = meta_get_unit(sp, np, ep)) == NULL)
+ return (-1);
+
+ /*
+ * Since soft partitions may appear at the top or bottom
+ * of the metadevice stack, we check them separately.
+ * A trans may be built on top of a soft partition if
+ * the soft partition has no parent (can't rely on the
+ * MD_CAN_PARENT flag in this case since a soft partition
+ * built on a metadevice clears this flag to prevent nested
+ * configurations).
+ *
+ */
+ if ((meta_sp_issp(sp, np, ep) == 0) &&
+ (mdp->parent == MD_NO_PARENT))
+ return (0);
+
+ if ((! (mdp->capabilities & MD_CAN_PARENT)) ||
+ ((mdp->parent != MD_NO_PARENT) &&
+ (mdp->parent != MD_MULTI_PARENT))) {
+ return (mdmderror(ep, MDE_INVAL_UNIT,
+ meta_getminor(np->dev), np->cname));
+ }
+ }
+
+ /* return success */
+ return (0);
+}
+
+/*
+ * print trans
+ */
+static int
+trans_print(
+ md_trans_t *transp,
+ char *fname,
+ FILE *fp,
+ md_error_t *ep
+)
+{
+ int rval = -1;
+
+ /* print name and -t */
+ if (fprintf(fp, "%s -t", transp->common.namep->cname) == EOF)
+ goto out;
+
+ /* print master */
+ /*
+ * If the path is our standard /dev/rdsk or /dev/md/rdsk
+ * then just print out the cxtxdxsx or the dx, metainit
+ * will assume the default, otherwise we need the full
+ * pathname to make sure this works as we intend.
+ */
+ if ((strstr(transp->masternamep->rname, "/dev/rdsk") == NULL) &&
+ (strstr(transp->masternamep->rname, "/dev/md/rdsk") == NULL) &&
+ (strstr(transp->masternamep->rname, "/dev/td/") == NULL)) {
+ /* not standard path, print full pathname */
+ if (fprintf(fp, " %s", transp->masternamep->rname) == EOF)
+ goto out;
+ } else {
+ /* standard path, print ctds or d number */
+ if (fprintf(fp, " %s", transp->masternamep->cname) == EOF)
+ goto out;
+ }
+
+
+ /* print log */
+ if (transp->lognamep != NULL) {
+ /*
+ * If the path is our standard /dev/rdsk or /dev/md/rdsk
+ * then just print out the cxtxdxsx or the dx, metainit
+ * will assume the default, otherwise we need the full
+ * pathname to make sure this works as we intend.
+ */
+ if ((strstr(transp->lognamep->rname, "/dev/rdsk") == NULL) &&
+ (strstr(transp->lognamep->rname, "/dev/md/rdsk") == NULL) &&
+ (strstr(transp->lognamep->rname, "/dev/td/") == NULL)) {
+ /* not standard path, print full pathname */
+ if (fprintf(fp, " %s", transp->lognamep->rname) == EOF)
+ goto out;
+ } else {
+ /* standard path */
+ if (fprintf(fp, " %s", transp->lognamep->cname) == EOF)
+ goto out;
+ }
+ }
+
+ /* print terminating newline */
+ if (fprintf(fp, "\n") == EOF)
+ goto out;
+
+ /* success */
+ rval = 0;
+
+ /* cleanup, return error */
+out:
+ if (rval != 0)
+ (void) mdsyserror(ep, errno, fname);
+ return (rval);
+}
+
+/*
+ * convert flags to repair action
+ */
+
+char *
+mt_flags_to_action(
+ md_trans_t *transp
+)
+{
+ int len;
+ char *actionp = NULL;
+ int err = -1;
+
+ if (!transp) {
+ goto out;
+ }
+
+ /*
+ * if in any of these states, the log_error word is not (yet) meaningful
+ */
+ if (transp->flags & (TRANS_DETACHED|TRANS_DETACHING|TRANS_ATTACHING)) {
+ goto out;
+ }
+
+ if (transp->log_error & LDL_ANYERROR) {
+ char *fix_msg = dgettext(TEXT_DOMAIN,
+ " To Fix: Please refer to the log device's status.\n");
+
+ if ((len = strlen(fix_msg)) <= 0) {
+ goto out;
+ }
+ if (!(actionp = Zalloc(len+1))) {
+ goto out;
+ }
+ if (strncpy(actionp, fix_msg, len + 1) != actionp) {
+ goto out;
+ }
+ }
+ err = 0;
+out:
+ if (err != 0) {
+ if (actionp) {
+ Free(actionp);
+ actionp = NULL;
+ }
+ }
+ return (actionp);
+}
+
+/*
+ * convert log state to repair action
+ */
+char *
+mt_l_error_to_action(
+ mdsetname_t *sp,
+ mdnamelist_t *transnlp,
+ mdname_t *lognamep,
+ md_error_t *ep
+)
+{
+ char umnt_msg[1024];
+ char fsck_msg[1024];
+ char mnt_msg[1024];
+ mdnamelist_t *p;
+ md_trans_t *tp;
+ int rc;
+ int len = 0;
+ char *rmsg = NULL;
+ char *mp = NULL;
+ bool_t is_mounted = FALSE;
+ bool_t any_in_error = FALSE;
+ int only_fsck = TRUE;
+
+ (void) memset(umnt_msg, 0, sizeof (umnt_msg));
+ (void) memset(fsck_msg, 0, sizeof (fsck_msg));
+ (void) memset(mnt_msg, 0, sizeof (mnt_msg));
+
+ /*
+ * If a the trans devices listed in transnlp contain
+ * devices which are in error and are sub-mount points
+ * of each other, than it would need to be reverse sorted.
+ * When this actually occurs, and customers find the usage
+ * message insufficiently clear, then we should take the
+ * hit to sort it.
+ */
+
+ /*
+ * this preliminary loop is necessary to keep the
+ * fsck message greppable, if possible
+ */
+ for (p = transnlp; ((p != NULL) && (only_fsck == TRUE)); p = p->next) {
+
+ if ((tp = meta_get_trans(sp, p->namep, ep)) == NULL) {
+ goto out;
+ }
+
+ if (!(tp->log_error & LDL_ANYERROR)) {
+ continue;
+ }
+
+ if ((tp->lognamep == NULL) ||
+ (strcmp(lognamep->bname, tp->lognamep->bname) != 0)) {
+ continue;
+ }
+
+ mdclrerror(ep);
+ is_mounted = (meta_check_inuse(sp,
+ p->namep, MDCHK_MOUNTED, ep) != 0);
+
+ if (!mdisok(ep) && mdiserror(ep, MDE_IS_MOUNTED)) {
+ goto out;
+ }
+
+ mdclrerror(ep);
+ mp = meta_get_mountp(sp, p->namep, ep);
+
+ if (!mdisok(ep)) {
+ goto out;
+ }
+
+ if (is_mounted) {
+ if (!mp) {
+ goto out;
+ }
+ only_fsck = FALSE;
+
+ /*
+ * not greppable; there must be multiple commands, so
+ * add preliminary newline so the formatting is uniform
+ */
+ if (sprintf(umnt_msg, "\n") == EOF) {
+ goto out;
+ }
+
+ }
+
+ if (mp) {
+ Free(mp);
+ mp = NULL;
+ }
+ }
+
+ /*
+ * although the log may either be in error or hard-error
+ * states, the action is the same; unmount, fsck and remount
+ * all fs associated with this log
+ */
+ for (p = transnlp; (p != NULL); p = p->next) {
+
+ if ((tp = meta_get_trans(sp, p->namep, ep)) == NULL) {
+ goto out;
+ }
+
+ if (!(tp->log_error & LDL_ANYERROR)) {
+ continue;
+ }
+
+ if ((tp->lognamep == NULL) ||
+ (strcmp(lognamep->bname, tp->lognamep->bname) != 0)) {
+ continue;
+ }
+
+ mdclrerror(ep);
+ is_mounted = (meta_check_inuse(sp,
+ p->namep, MDCHK_MOUNTED, ep) != 0);
+
+ if (!mdisok(ep) && mdiserror(ep, MDE_IS_MOUNTED)) {
+ goto out;
+ }
+
+ mdclrerror(ep);
+ mp = meta_get_mountp(sp, p->namep, ep);
+
+ if (!mdisok(ep)) {
+ goto out;
+ }
+
+ if (is_mounted) {
+ if (!mp) {
+ goto out;
+ }
+ }
+
+ if (is_mounted) {
+ rc = snprintf(umnt_msg, sizeof (umnt_msg),
+ "%s umount %s\n", umnt_msg, mp);
+
+ if (rc < 0) {
+ goto out;
+ }
+ }
+
+ rc = snprintf(fsck_msg, sizeof (fsck_msg), "%s %s",
+ (any_in_error) ? fsck_msg :
+ ((only_fsck) ? "fsck" : " fsck"),
+ p->namep->rname);
+ if (rc < 0) {
+ goto out;
+ }
+
+ if (is_mounted) {
+ rc = snprintf(mnt_msg, sizeof (mnt_msg),
+ "%s mount %s %s\n",
+ mnt_msg, p->namep->bname, mp);
+
+ if (rc < 0) {
+ goto out;
+ }
+ }
+
+ if (mp) {
+ Free(mp);
+ mp = NULL;
+ }
+
+ any_in_error |= TRUE;
+ }
+
+ if (!any_in_error) {
+ goto out;
+ }
+
+ len = strlen(umnt_msg) + strlen(fsck_msg) + strlen(mnt_msg) +
+ (only_fsck? 1: 0) + 1;
+ if (!(rmsg = Zalloc(len))) {
+ len = 0;
+ goto out;
+ }
+ rc = snprintf(rmsg, len, "%s%s%s%s", umnt_msg, fsck_msg,
+ !only_fsck? "\n": "", mnt_msg);
+ if (rc == EOF) {
+ goto out;
+ }
+
+out:
+ if (mp) {
+ Free(mp);
+ mp = NULL;
+ }
+ if (len == 0 && rmsg) {
+ Free(rmsg);
+ rmsg = NULL;
+ }
+
+ return (rmsg);
+}
+
+/*
+ * printable log state
+ */
+char *
+mt_l_error_to_name(
+ md_trans_t *transp,
+ md_timeval32_t *tvp,
+ uint_t tstate /* Errored tstate flags */
+)
+{
+ mt_l_error_t log_error = transp->log_error;
+
+ /* grab time */
+ if (tvp != NULL)
+ *tvp = transp->log_timestamp;
+
+ if (tstate != 0) {
+ return (dgettext(TEXT_DOMAIN, "Unavailable"));
+ }
+
+ /* return state */
+ if (log_error & LDL_ERROR) {
+ return (dgettext(TEXT_DOMAIN, "Error"));
+ } else if (log_error & LDL_HERROR) {
+ return (dgettext(TEXT_DOMAIN, "Hard Error"));
+ } else {
+ return (dgettext(TEXT_DOMAIN, "Okay"));
+ }
+}
+
+/*
+ * printable trans state
+ */
+char *
+mt_flags_to_name(
+ md_trans_t *transp,
+ md_timeval32_t *tvp,
+ uint_t tstate /* Errored tstate flags */
+)
+{
+ /* grab time */
+ if (tvp != NULL)
+ *tvp = transp->timestamp;
+
+ if (tstate != 0) {
+ return (dgettext(TEXT_DOMAIN, "Unavailable"));
+ }
+
+ /* return state */
+ if (transp->flags & TRANS_DETACHED)
+ return (dgettext(TEXT_DOMAIN, "Detached"));
+ else if (transp->flags & TRANS_DETACHING)
+ return (dgettext(TEXT_DOMAIN, "Detaching"));
+ else if (transp->flags & TRANS_ATTACHING)
+ return (dgettext(TEXT_DOMAIN, "Attaching"));
+ return (mt_l_error_to_name(transp, tvp, tstate));
+}
+
+/*
+ * report trans
+ */
+static int
+trans_report(
+ mdsetname_t *sp,
+ md_trans_t *transp,
+ char *fname,
+ FILE *fp,
+ mdprtopts_t options,
+ md_error_t *ep
+)
+{
+ char *mt_state;
+ md_timeval32_t tv;
+ char *timep;
+ int rval = -1;
+ char *actionp = NULL;
+ char *devid = "";
+ mdname_t *didnp = NULL;
+ ddi_devid_t dtp;
+ uint_t tstate = 0;
+
+ /* print header */
+ if (options & PRINT_HEADER) {
+ if (fprintf(fp, dgettext(TEXT_DOMAIN, "%s: Trans"
+ " (Feature replaced see message below)\n"),
+ transp->common.namep->cname) == EOF) {
+ goto out;
+ }
+ }
+
+ /* print state */
+ if (metaismeta(transp->common.namep)) {
+ if (meta_get_tstate(transp->common.namep->dev, &tstate, ep)
+ != 0)
+ goto out;
+ }
+ mt_state = mt_flags_to_name(transp, &tv, tstate & MD_DEV_ERRORED);
+ if (options & PRINT_TIMES) {
+ timep = meta_print_time(&tv);
+ } else {
+ timep = "";
+ }
+ if (fprintf(fp, dgettext(TEXT_DOMAIN, " State: %-12s %s\n"),
+ mt_state, timep) == EOF) {
+ goto out;
+ }
+
+ if ((tstate & MD_DEV_ERRORED) == 0) {
+ actionp = mt_flags_to_action(transp);
+ if (actionp) {
+ if (fprintf(fp, "%s", actionp) == EOF) {
+ goto out;
+ }
+ Free(actionp);
+ actionp = NULL;
+ }
+ }
+
+ /* debug stuff */
+ if (transp->debug) {
+ if (fprintf(fp,
+ " Debug Modes:%s%s%s%s%s%s%s%s%s%s%s\n",
+ (transp->debug & MT_TRANSACT) ? " TRANSACT" : "",
+ (transp->debug & MT_MATAMAP) ? " METADATA" : "",
+ (transp->debug & MT_WRITE_CHECK) ? " WRITES" : "",
+ (transp->debug & MT_LOG_WRITE_CHECK) ? " LOGWRITES" : "",
+ (transp->debug & MT_CHECK_MAP) ? " MAP" : "",
+ (transp->debug & MT_TRACE) ? " TRACE" : "",
+ (transp->debug & MT_SIZE) ? " SIZE" : "",
+ (transp->debug & MT_NOASYNC) ? " NOASYNC" : "",
+ (transp->debug & MT_FORCEROLL) ? " FORCEROLL" : "",
+ (transp->debug & MT_SCAN) ? " SCAN" : "",
+ (transp->debug & MT_PREWRITE) ? " PREWRITE" : "")
+ == EOF) {
+ goto out;
+ }
+ }
+
+ /* print size */
+ if (fprintf(fp, dgettext(TEXT_DOMAIN, " Size: %lld blocks (%s)\n"),
+ transp->common.size,
+ meta_number_to_string(transp->common.size, DEV_BSIZE)) == EOF) {
+ goto out;
+ }
+
+
+ /* print master */
+ if (fprintf(fp, dgettext(TEXT_DOMAIN, " Master Device: %s\n"),
+ transp->masternamep->cname) == EOF) {
+ goto out;
+ }
+
+ /* print log */
+ if (transp->lognamep != NULL) {
+ if (fprintf(fp, dgettext(TEXT_DOMAIN,
+ " Logging Device: %s\n"),
+ transp->lognamep->cname) == EOF) {
+ goto out;
+ }
+ }
+
+ /* add extra line */
+ if (fprintf(fp, "\n") == EOF)
+ goto out;
+
+ /* print master details if regular device */
+ if (! metaismeta(transp->masternamep)) {
+ daddr_t start_blk = 0;
+ char *has_mddb_str = dgettext(TEXT_DOMAIN, "No");
+ int len;
+
+ /*
+ * Building a format string on the fly that will
+ * be used in (f)printf. This allows the length
+ * of the ctd to vary from small to large without
+ * looking horrible.
+ */
+ len = strlen(transp->masternamep->cname) + 2;
+ len = max(len, strlen(dgettext(TEXT_DOMAIN, "Master Device")));
+
+ /* print header */
+ if (fprintf(fp,
+ "\t%-*.*s %-12.12s %-5.5s %s\n",
+ len, len,
+ dgettext(TEXT_DOMAIN, "Master Device"),
+ dgettext(TEXT_DOMAIN, "Start Block"),
+ dgettext(TEXT_DOMAIN, "Dbase"),
+ dgettext(TEXT_DOMAIN, "Reloc")) == EOF) {
+ goto out;
+ }
+
+ /* populate the key in the name_p structure */
+ if ((didnp = metadevname(&sp,
+ transp->masternamep->dev, ep)) == NULL) {
+ return (-1);
+ }
+
+ /* determine if devid does NOT exist */
+ if (options & PRINT_DEVID)
+ if ((dtp = meta_getdidbykey(sp->setno, getmyside(sp, ep),
+ didnp->key, ep)) == NULL)
+ devid = dgettext(TEXT_DOMAIN, "No ");
+ else {
+ devid = dgettext(TEXT_DOMAIN, "Yes");
+ free(dtp);
+ }
+
+ /* print info */
+ /*
+ * This allows the length
+ * of the ctd to vary from small to large without
+ * looking horrible.
+ */
+ if (fprintf(fp, "\t%-*s %8ld %-5.5s %s\n", len,
+ transp->masternamep->cname,
+ start_blk, has_mddb_str, devid) == EOF) {
+ goto out;
+ }
+ /* add extra line */
+ if (fprintf(fp, "\n") == EOF)
+ goto out;
+ }
+
+ /* success */
+ rval = 0;
+
+ /* cleanup, return error */
+out:
+ if (rval != 0)
+ (void) mdsyserror(ep, errno, fname);
+ return (rval);
+}
+
+/*
+ * print/report trans
+ */
+int
+meta_trans_print(
+ mdsetname_t *sp,
+ mdname_t *transnp,
+ mdnamelist_t **nlistpp,
+ char *fname,
+ FILE *fp,
+ mdprtopts_t options,
+ int *meta_print_trans_msgp, /* NULL if transnp != NULL */
+ mdnamelist_t **lognlpp,
+ md_error_t *ep
+)
+{
+ md_trans_t *transp;
+ mdname_t *lognamep;
+
+ /* should have same set */
+ assert(sp != NULL);
+
+ /* print all transs */
+ if (transnp == NULL) {
+ mdnamelist_t *nlp = NULL;
+ mdnamelist_t *p;
+ int cnt;
+ int rval = 0;
+
+ /* get list */
+ if ((cnt = meta_get_trans_names(sp, &nlp, options, ep)) < 0)
+ return (-1);
+ else if (cnt == 0)
+ return (0);
+
+ /* recurse */
+ for (p = nlp; (p != NULL); p = p->next) {
+ mdname_t *np = p->namep;
+
+ if (meta_trans_print(sp, np, nlistpp, fname, fp,
+ options, meta_print_trans_msgp, lognlpp, ep) != 0)
+ rval = -1;
+ }
+
+ if (meta_print_trans_msgp)
+ *meta_print_trans_msgp = 1;
+
+ /* cleanup, return success */
+ metafreenamelist(nlp);
+ return (rval);
+ }
+
+
+ /* get unit structure */
+ if ((transp = meta_get_trans_common(sp, transnp,
+ ((options & PRINT_FAST) ? 1 : 0), ep)) == NULL)
+ return (-1);
+
+ /* save unique log */
+ if ((lognlpp != NULL) &&
+ ((lognamep = transp->lognamep) != NULL)) {
+ mdnamelist_t *p;
+
+ for (p = *lognlpp; (p != NULL); p = p->next) {
+ if (strcmp(lognamep->bname, p->namep->bname) == 0)
+ break;
+ }
+ if (p == NULL)
+ (void) metanamelist_append(lognlpp, lognamep);
+ }
+
+ /* check for parented */
+ if ((! (options & PRINT_SUBDEVS)) &&
+ (MD_HAS_PARENT(transp->common.parent))) {
+ return (0);
+ }
+
+ /* can't have a large trans */
+ if (!(options & PRINT_LARGEDEVICES)) {
+ /* print appropriate detail */
+ if (options & PRINT_SHORT) {
+ if (trans_print(transp, fname, fp, ep) != 0)
+ return (-1);
+ } else {
+ if (trans_report(sp, transp, fname, fp, options, ep)
+ != 0)
+ return (-1);
+ }
+ }
+
+ /* print underlying metadevices, log is later */
+ if (metaismeta(transp->masternamep)) {
+ if (meta_print_name(sp, transp->masternamep, nlistpp, fname,
+ fp, (options | PRINT_HEADER | PRINT_SUBDEVS), NULL, ep)
+ != 0) {
+ return (-1);
+ }
+ }
+
+ /* return success */
+ return (0);
+}
+
+/*
+ * print log
+ */
+static int
+log_print(
+ mdsetname_t *sp,
+ mdname_t *lognamep,
+ char *fname,
+ FILE *fp,
+ mdprtopts_t options,
+ md_error_t *ep
+)
+{
+ mdnamelist_t *nlp = NULL;
+
+ /* metadevice info */
+ if (metaismeta(lognamep)) {
+ return (meta_print_name(sp, lognamep, &nlp, fname, fp,
+ options, NULL, ep));
+ }
+
+ /* regular device info */
+ return (0);
+}
+
+/*
+ * report log
+ */
+static int
+log_report(
+ mdsetname_t *sp,
+ mdname_t *lognamep,
+ mdnamelist_t **nlistpp,
+ char *fname,
+ FILE *fp,
+ mdprtopts_t options,
+ mdnamelist_t *transnlp,
+ md_error_t *ep
+)
+{
+ md_trans_t *transp = NULL;
+ mdnamelist_t *p;
+ char *ml_state;
+ md_timeval32_t tv;
+ char *timep;
+ char *actionp = NULL;
+ int rval = -1;
+ char *devid = " ";
+ mdname_t *didnp = NULL;
+ ddi_devid_t dtp;
+ uint_t tstate = 0;
+
+ for (p = transnlp; (p != NULL); p = p->next) {
+ md_trans_t *tp;
+
+ if ((tp = meta_get_trans(sp, p->namep, ep)) == NULL)
+ return (-1);
+ if ((tp->lognamep != NULL) &&
+ (strcmp(lognamep->bname, tp->lognamep->bname) == 0)) {
+ transp = tp; /* save any parent trans */
+ }
+ }
+
+ /* we must have at least one trans */
+ assert(transp != NULL);
+ if (transp == NULL) {
+ rval = 0;
+ goto out;
+ }
+
+ if ((options & PRINT_LARGEDEVICES) &&
+ (transp->log_size <= MD_MAX_BLKS_FOR_SMALL_DEVS)) {
+ rval = 0;
+ goto out;
+ }
+
+ /* print header and trans devices, collect log_error and size */
+ if (fprintf(fp, dgettext(TEXT_DOMAIN, "%s: Logging device for"),
+ lognamep->cname) == EOF) {
+ goto out;
+ }
+
+ if ((transp->lognamep != NULL) &&
+ (strcmp(lognamep->bname, transp->lognamep->bname) == 0)) {
+ if (fprintf(fp, " %s", transp->common.namep->cname)
+ == EOF) {
+ goto out;
+ }
+ }
+ if (fprintf(fp, "\n") == EOF)
+ goto out;
+
+ /* print state */
+ if (metaismeta(transp->lognamep)) {
+ if (meta_get_tstate(transp->lognamep->dev, &tstate, ep) != 0)
+ return (-1);
+ }
+ ml_state = mt_l_error_to_name(transp, &tv, tstate & MD_DEV_ERRORED);
+ if (options & PRINT_TIMES) {
+ timep = meta_print_time(&tv);
+ } else {
+ timep = "";
+ }
+ if (fprintf(fp, dgettext(TEXT_DOMAIN, " State: %-12s %s\n"),
+ ml_state, timep) == EOF) {
+ goto out;
+ }
+
+ if ((tstate & MD_DEV_ERRORED) == 0) {
+ actionp = mt_l_error_to_action(sp, transnlp, lognamep, ep);
+ if (actionp) {
+ if (fprintf(fp, dgettext(TEXT_DOMAIN,
+ " Invoke: %s\n"), actionp) == EOF) {
+ goto out;
+ }
+ Free(actionp);
+ actionp = NULL;
+ }
+ }
+
+ /* print size */
+ if (fprintf(fp, dgettext(TEXT_DOMAIN, " Size: %ld blocks (%s)\n"),
+ transp->log_size,
+ meta_number_to_string(transp->log_size, DEV_BSIZE)) == EOF) {
+ goto out;
+ }
+
+ /* MD_DEBUG stuff */
+ if (options & PRINT_DEBUG) {
+ mdname_t *transnp = transp->common.namep;
+ mt_unit_t *mt;
+ daddr_t blksinuse, head, tail, nblks, eblk, sblk;
+ int percent;
+
+ if ((mt = (mt_unit_t *)meta_get_mdunit(sp, transnp, ep))
+ == NULL) {
+ return (-1);
+ }
+ assert(mt->c.un_type == MD_METATRANS);
+
+ if (fprintf(fp, dgettext(TEXT_DOMAIN,
+ " Transfer Size: %d blocks\n"),
+ mt->un_l_maxtransfer) == EOF) {
+ Free(mt);
+ goto out;
+ }
+
+ head = mt->un_l_head;
+ tail = mt->un_l_tail;
+ sblk = mt->un_l_sblk;
+ nblks = mt->un_l_nblks;
+ eblk = sblk + nblks;
+ if (head <= tail)
+ blksinuse = tail - head;
+ else
+ blksinuse = (eblk - head) + (tail - sblk);
+
+ percent = ((u_longlong_t)blksinuse * 100) / nblks;
+ if (fprintf(fp, dgettext(TEXT_DOMAIN,
+ " Full: %d%% (%ld of %ld blocks)\n"),
+ percent, blksinuse, nblks) == EOF) {
+ Free(mt);
+ goto out;
+ }
+
+ percent = ((u_longlong_t)mt->un_l_resv * 100) /
+ mt->un_l_maxresv;
+ if (fprintf(fp, dgettext(TEXT_DOMAIN,
+ " Reserved: %d%% (%ud of %ud bytes)\n"),
+ percent, mt->un_l_resv, mt->un_l_maxresv) == EOF) {
+ Free(mt);
+ goto out;
+ }
+ Free(mt);
+ }
+
+ /* add extra line */
+ if (fprintf(fp, "\n") == EOF)
+ goto out;
+
+ /* print log details */
+ if (metaismeta(lognamep)) {
+ if (meta_print_name(sp, lognamep, nlistpp, fname, fp,
+ options, NULL, ep) != 0) {
+ return (-1);
+ }
+ } else {
+ daddr_t start_blk;
+ int has_mddb;
+ char *has_mddb_str;
+ int len;
+
+ /*
+ * Building a format string on the fly that will
+ * be used in (f)printf. This allows the length
+ * of the ctd to vary from small to large without
+ * looking horrible.
+ */
+ len = strlen(lognamep->cname) + 2;
+ len = max(len, strlen(dgettext(TEXT_DOMAIN, "Logging Device")));
+ /* print header */
+ if (fprintf(fp,
+ "\t%-*.*s %-12.12s %-5.5s %s\n",
+ len, len,
+ dgettext(TEXT_DOMAIN, "Logging Device"),
+ dgettext(TEXT_DOMAIN, "Start Block"),
+ dgettext(TEXT_DOMAIN, "Dbase"),
+ dgettext(TEXT_DOMAIN, "Reloc")) == EOF) {
+ goto out;
+ }
+ /* get info */
+ if ((start_blk = metagetstart(sp, lognamep, ep)) ==
+ MD_DISKADDR_ERROR) {
+ return (-1);
+ }
+ if ((has_mddb = metahasmddb(sp, lognamep, ep)) < 0) {
+ return (-1);
+ }
+ if (has_mddb)
+ has_mddb_str = dgettext(TEXT_DOMAIN, "Yes");
+ else
+ has_mddb_str = dgettext(TEXT_DOMAIN, "No");
+
+ /* populate the key in the name_p structure */
+ if ((didnp = metadevname(&sp, lognamep->dev, ep)) == NULL) {
+ return (-1);
+ }
+
+ /* determine if devid does NOT exist */
+ if (options & PRINT_DEVID)
+ if ((dtp = meta_getdidbykey(sp->setno, getmyside(sp, ep),
+ didnp->key, ep)) == NULL)
+ devid = dgettext(TEXT_DOMAIN, "No ");
+ else {
+ devid = dgettext(TEXT_DOMAIN, "Yes");
+ free(dtp);
+ }
+
+ /* print info */
+ /*
+ * This allows the length
+ * of the ctd to vary from small to large without
+ * looking horrible.
+ */
+ if (fprintf(fp, "\t%-*s %8ld %-5.5s %s\n",
+ len, lognamep->cname, start_blk,
+ has_mddb_str, devid) == EOF) {
+ goto out;
+ }
+ }
+
+ /* add extra line */
+ if (fprintf(fp, "\n") == EOF)
+ goto out;
+
+ /* success */
+ rval = 0;
+
+ /* cleanup, return error */
+out:
+ if (rval != 0)
+ (void) mdsyserror(ep, errno, fname);
+ return (rval);
+}
+
+/*
+ * print/report logs
+ */
+int
+meta_logs_print(
+ mdsetname_t *sp,
+ mdnamelist_t *lognlp,
+ mdnamelist_t **nlistpp,
+ char *fname,
+ FILE *fp,
+ mdprtopts_t options,
+ md_error_t *ep
+)
+{
+ mdnamelist_t *transnlp = NULL;
+ mdnamelist_t *p;
+ int rval = 0;
+
+ /* must have a set */
+ assert(sp != NULL);
+
+ /* get trans devices */
+ if (lognlp == NULL)
+ return (0);
+
+ if (! (options & PRINT_SHORT))
+ if (meta_get_trans_names(sp, &transnlp, options, ep) < 0)
+ return (-1);
+
+ /* print all logs */
+ options |= PRINT_SUBDEVS;
+ for (p = lognlp; (p != NULL); p = p->next) {
+ mdname_t *lognamep = p->namep;
+
+ /* print appropriate detail */
+ if (options & PRINT_SHORT) {
+ if (log_print(sp, lognamep, fname, fp, options,
+ ep) != 0) {
+ rval = -1;
+ }
+ } else {
+ if (log_report(sp, lognamep, nlistpp, fname, fp,
+ options, transnlp, ep) != 0) {
+ rval = -1;
+ }
+ }
+ }
+
+ /* cleanup, return success */
+out:
+ metafreenamelist(transnlp);
+ return (rval);
+}
+
+/*
+ * meta_lockfs_common -- common lock and unlock code
+ *
+ * Normally this routine will return a 0 for success. Even if
+ * lockfs wasn't able to lock down the filesystem. The reason
+ * for this is that the master device can be in an errored state
+ * and the lock can't be obtained. We don't want to prevent
+ * possible recovery in this case and it's not likely any activity
+ * will be occurring. If the filesystem is healthy with activity
+ * lockfs will successfully lock the filesystem and return an
+ * error code of 0.
+ *
+ * The one case where this routine returns a non-zero value would
+ * be if we can't determine the outcome of the lockfs. This should
+ * never occur because we don't catch signals that could cause
+ * waitpid() to prematurely return.
+ */
+static int
+meta_lockfs_common(mdname_t *fs, void **cookie, int lockit)
+{
+ char *blkname;
+ FILE *m;
+ struct mnttab tab_wildcard, tab_match;
+ pid_t pid;
+ int lock_exit;
+
+ (void) memset(&tab_wildcard, 0, sizeof (tab_wildcard));
+ (void) memset(&tab_match, 0, sizeof (tab_match));
+
+ if ((blkname = fs->bname) == NULL)
+ blkname = getfullblkname(fs->cname);
+
+ tab_wildcard.mnt_special = blkname;
+
+ if ((m = fopen(MNTTAB, "r")) == NULL) {
+ /*
+ * No mnttab means nothing is mounted
+ */
+ *cookie = 0;
+ return (0);
+ }
+
+ if (getmntany(m, &tab_match, &tab_wildcard)) {
+ /*
+ * No match in mnttab so we're not mounted ... at least
+ * nothing better be mounted.
+ */
+ *cookie = 0;
+ return (0);
+ }
+
+ (void) fclose(m);
+
+ switch (pid = fork()) {
+ case -1:
+ /*
+ * We've got some major trouble here and shouldn't
+ * continue. The user needs to clear up the problems
+ * that the system currently has before proceeding
+ * to detach the log.
+ */
+ (void) printf(dgettext(TEXT_DOMAIN, "failed to fork lockfs\n"));
+ *cookie = 0;
+ return (1);
+
+ case 0:
+ (void) execl("/usr/sbin/lockfs", "lockfs", lockit ? "-w" : "-u",
+ "-c", "Solaris Volume Manager detach lock",
+ tab_match.mnt_mountp, 0);
+ /*
+ * Shouldn't reach here, but if this code is run on
+ * a release that doesn't have lockfs return an error
+ * code so that the -f (force) option could be used
+ * by metadetach.
+ */
+ exit(1);
+
+ default:
+ if (waitpid(pid, &lock_exit, 0) != pid) {
+ /*
+ * We couldn't get status regarding the
+ * outcome of the lockfs command. We should
+ * attempt to unlock the filesystem though.
+ * Return an error code so that if the user
+ * is trying to force the detach make them
+ * clear up this problem first.
+ */
+ *cookie = (void *)1;
+ return (1);
+ }
+
+ *cookie = (void *)1;
+ return (0);
+ }
+}
+
+/*
+ * meta_lockfs - if mounted, lock a given device against writes
+ *
+ * See comment section for meta_lockfs_common
+ */
+static int
+meta_lockfs(mdname_t *fs, void **cookie)
+{
+ return (meta_lockfs_common(fs, cookie, 1));
+}
+
+/*
+ * meta_unlockfs - if mounted, unlock the filesystem if previously locked
+ *
+ * See comment section for meta_lockfs_common
+ */
+static void
+meta_unlockfs(mdname_t *fs, void **cookie)
+{
+ /*
+ * Simple time saver. We could always try to unlock
+ * the filesystem, that takes time a resources.
+ */
+ if (*cookie == (void *)1)
+ (void) meta_lockfs_common(fs, cookie, 0);
+}
+
+/*
+ * meta_trans_detach -- detach log from trans device
+ */
+int
+meta_trans_detach(
+ mdsetname_t *sp,
+ mdname_t *transnp,
+ mdcmdopts_t options,
+ int *delayed,
+ md_error_t *ep
+)
+{
+ int force = ((options & MDCMD_FORCE) ? 1 : 0);
+ md_i_get_t detach;
+ md_trans_t *transp;
+ mdname_t *lognp;
+ void *lock_cookie;
+
+ /* should have a set */
+ assert(sp != NULL);
+ assert(sp->setno == MD_MIN2SET(meta_getminor(transnp->dev)));
+
+ /* check name */
+ if (metachkmeta(transnp, ep) != 0)
+ return (-1);
+
+ /* save log name */
+ if ((transp = meta_get_trans(sp, transnp, ep)) == NULL)
+ return (-1);
+ if ((lognp = transp->lognamep) == NULL)
+ return (mdmderror(ep, MDE_NO_LOG, meta_getminor(transnp->dev),
+ transnp->cname));
+
+ /*
+ * If trans device is mounted lock the filesystem
+ * against writes and mod time updates.
+ */
+ if (force && meta_lockfs(transnp, &lock_cookie)) {
+ /*
+ * This device is mounted and we were unable
+ * lock the device. Data corruption can occur
+ * if we don't lock the device before removing
+ * the log so bail out here.
+ * NOTE: There's one case were the exist status
+ * of lockfs could have been lost yet the command
+ * could have run. We should try to unlock the filesystem
+ * before returning.
+ */
+ meta_unlockfs(transnp, &lock_cookie);
+ return (mdmderror(ep, MDE_UNKNOWN_TYPE,
+ meta_getminor(transnp->dev), transnp->cname));
+ }
+
+ /* detach log */
+ *delayed = 0;
+ (void) memset(&detach, 0, sizeof (detach));
+ detach.id = meta_getminor(transnp->dev);
+ MD_SETDRIVERNAME(&detach, MD_TRANS, sp->setno);
+ detach.size = force;
+ if (metaioctl(MD_IOC_TRANS_DETACH, &detach, &detach.mde, NULL) != 0) {
+ /* delayed detach */
+ if ((force) && (mdissyserror(&detach.mde, EBUSY))) {
+ *delayed = 1;
+ mdclrerror(&detach.mde);
+ } else {
+ meta_unlockfs(transnp, &lock_cookie);
+ return (mdstealerror(ep, &detach.mde));
+ }
+ }
+
+ /*
+ * Unlock the filesystem
+ */
+ meta_unlockfs(transnp, &lock_cookie);
+
+ /* clear cache */
+ meta_invalidate_name(lognp);
+ meta_invalidate_name(transnp);
+
+ /* let em know */
+ if (options & MDCMD_PRINT) {
+ if (*delayed) {
+ (void) printf(dgettext(TEXT_DOMAIN,
+"%s: logging device %s will be detached at unmount or reboot\n"),
+ transnp->cname, lognp->cname);
+ } else {
+ (void) printf(dgettext(TEXT_DOMAIN,
+ "%s: logging device %s is detached\n"),
+ transnp->cname, lognp->cname);
+ }
+ (void) fflush(stdout);
+ }
+
+ /* return success */
+ return (0);
+}
+
+/*
+ * reset trans
+ */
+int
+meta_trans_reset(
+ mdsetname_t *sp,
+ mdname_t *transnp,
+ mdcmdopts_t options,
+ md_error_t *ep
+)
+{
+ md_trans_t *transp;
+ int rval = -1;
+
+ /* should have a set */
+ assert(sp != NULL);
+ assert((transnp == NULL) ||
+ (sp->setno == MD_MIN2SET(meta_getminor(transnp->dev))));
+
+ /* reset all trans */
+ if (transnp == NULL) {
+ mdnamelist_t *transnlp = NULL;
+ mdnamelist_t *p;
+
+ /* for each trans */
+ rval = 0;
+ if (meta_get_trans_names(sp, &transnlp, 0, ep) < 0)
+ return (-1);
+ for (p = transnlp; (p != NULL); p = p->next) {
+ /* reset trans */
+ transnp = p->namep;
+ if (meta_trans_reset(sp, transnp, options, ep) != 0) {
+ rval = -1;
+ break;
+ }
+ }
+
+ /* cleanup, return success */
+ metafreenamelist(transnlp);
+ return (rval);
+ }
+
+ /* check name */
+ if (metachkmeta(transnp, ep) != 0)
+ return (-1);
+ /* get unit structure */
+ if ((transp = meta_get_trans(sp, transnp, ep)) == NULL)
+ return (-1);
+
+ /* make sure nobody owns us */
+ if (MD_HAS_PARENT(transp->common.parent)) {
+ return (mdmderror(ep, MDE_IN_USE, meta_getminor(transnp->dev),
+ transnp->cname));
+ }
+
+ /* clear subdevices cache */
+ meta_invalidate_name(transp->masternamep);
+ if (transp->lognamep)
+ meta_invalidate_name(transp->lognamep);
+
+ /* clear metadevice */
+ if (meta_reset(sp, transnp, options, ep) != 0)
+ goto out;
+ rval = 0; /* success */
+
+ /* let em know */
+ if (options & MDCMD_PRINT) {
+ (void) printf(dgettext(TEXT_DOMAIN, "%s: Trans is cleared\n"),
+ transnp->cname);
+ (void) fflush(stdout);
+ }
+
+ /* clear subdevices */
+ if (! (options & MDCMD_RECURSE))
+ goto out;
+ if (metaismeta(transp->masternamep)) {
+ mdname_t *masternp = transp->masternamep;
+
+ if (meta_reset_by_name(sp, masternp, options, ep) != 0)
+ rval = -1;
+ }
+ /* (multi-parented) log will be cleared later */
+
+ /* cleanup, return success */
+out:
+ meta_invalidate_name(transnp);
+ return (rval);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_userflags.c b/usr/src/lib/lvm/libmeta/common/meta_userflags.c
new file mode 100644
index 0000000000..6ac028625d
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_userflags.c
@@ -0,0 +1,98 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 1993-2002 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * get/set user flags for the metadevices (FOR GUI USE ONLY)
+ */
+
+#include <meta.h>
+
+/*
+ * get user flags stored in the common unit structure.
+ */
+int
+meta_getuserflags(
+ mdsetname_t *sp,
+ mdname_t *np,
+ uint_t *userflags,
+ md_error_t *ep
+)
+{
+ md_common_t *mdp;
+
+ /* should have a set */
+ assert(sp != NULL);
+ assert(sp->setno == MD_MIN2SET(meta_getminor(np->dev)));
+
+ if ((mdp = meta_get_unit(sp, np, ep)) == NULL)
+ return (-1);
+
+ *userflags = mdp->user_flags;
+ return (0);
+}
+
+
+/*
+ * set user flags, stored in the common unit structure.
+ */
+int
+meta_setuserflags(
+ mdsetname_t *sp,
+ mdname_t *np,
+ uint_t userflags,
+ md_error_t *ep
+)
+{
+ md_set_userflags_t msu;
+ char *miscname;
+
+ /* should have a set */
+ assert(sp != NULL);
+ assert(sp->setno == MD_MIN2SET(meta_getminor(np->dev)));
+
+ /* check name */
+ if (metachkmeta(np, ep) != 0)
+ return (-1);
+
+ /* get misc name */
+ if ((miscname = metagetmiscname(np, ep)) == NULL)
+ return (-1);
+
+ /* set parameters */
+ (void) memset(&msu, 0, sizeof (msu));
+ MD_SETDRIVERNAME(&msu, miscname, sp->setno);
+ msu.mnum = meta_getminor(np->dev);
+ msu.userflags = userflags;
+ if (metaioctl(MD_IOCSET_FLAGS, &msu, &msu.mde, np->cname) != 0)
+ return (mdstealerror(ep, &msu.mde));
+
+ /* clear cache */
+ meta_invalidate_name(np);
+
+ return (0);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/metad_svc_stubs.c b/usr/src/lib/lvm/libmeta/common/metad_svc_stubs.c
new file mode 100644
index 0000000000..32be258ab3
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/metad_svc_stubs.c
@@ -0,0 +1,825 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <meta.h>
+#include <metad.h>
+
+#pragma weak mdrpc_nullproc_1_svc = _mdrpc_nullproc_1_svc
+#pragma weak mdrpc_hostname_1_svc = _mdrpc_hostname_1_svc
+#pragma weak mdrpc_addhosts_1_svc = _mdrpc_addhosts_1_svc
+#pragma weak mdrpc_delhosts_1_svc = _mdrpc_delhosts_1_svc
+#pragma weak mdrpc_createset_1_svc = _mdrpc_createset_1_svc
+#pragma weak mdrpc_delset_1_svc = _mdrpc_delset_1_svc
+#pragma weak mdrpc_getset_1_svc = _mdrpc_getset_1_svc
+#pragma weak mdrpc_setnumbusy_1_svc = _mdrpc_setnumbusy_1_svc
+#pragma weak mdrpc_setnameok_1_svc = _mdrpc_setnameok_1_svc
+#pragma weak mdrpc_ownset_1_svc = _mdrpc_ownset_1_svc
+#pragma weak mdrpc_adddrvs_1_svc = _mdrpc_adddrvs_1_svc
+#pragma weak mdrpc_deldrvs_1_svc = _mdrpc_deldrvs_1_svc
+#pragma weak mdrpc_upd_dr_dbinfo_1_svc = _mdrpc_upd_dr_dbinfo_1_svc
+#pragma weak mdrpc_devinfo_1_svc = _mdrpc_devinfo_1_svc
+#pragma weak mdrpc_drvused_1_svc = _mdrpc_drvused_1_svc
+#pragma weak mdrpc_add_drv_sidenms_1_svc = _mdrpc_add_drv_sidenms_1_svc
+#pragma weak mdrpc_del_drv_sidenms_1_svc = _mdrpc_del_drv_sidenms_1_svc
+#pragma weak mdrpc_gtimeout_1_svc = _mdrpc_gtimeout_1_svc
+#pragma weak mdrpc_stimeout_1_svc = _mdrpc_stimeout_1_svc
+#pragma weak mdrpc_upd_dr_flags_1_svc = _mdrpc_upd_dr_flags_1_svc
+#pragma weak mdrpc_upd_sr_flags_1_svc = _mdrpc_upd_sr_flags_1_svc
+#pragma weak mdrpc_unlock_set_1_svc = _mdrpc_unlock_set_1_svc
+#pragma weak mdrpc_lock_set_1_svc = _mdrpc_lock_set_1_svc
+#pragma weak mdrpc_updmeds_1_svc = _mdrpc_updmeds_1_svc
+
+#pragma weak mdrpc_nullproc_2_svc = _mdrpc_nullproc_2_svc
+#pragma weak mdrpc_hostname_2_svc = _mdrpc_hostname_2_svc
+#pragma weak mdrpc_addhosts_2_svc = _mdrpc_addhosts_2_svc
+#pragma weak mdrpc_delhosts_2_svc = _mdrpc_delhosts_2_svc
+#pragma weak mdrpc_createset_2_svc = _mdrpc_createset_2_svc
+#pragma weak mdrpc_delset_2_svc = _mdrpc_delset_2_svc
+#pragma weak mdrpc_getset_2_svc = _mdrpc_getset_2_svc
+#pragma weak mdrpc_setnumbusy_2_svc = _mdrpc_setnumbusy_2_svc
+#pragma weak mdrpc_setnameok_2_svc = _mdrpc_setnameok_2_svc
+#pragma weak mdrpc_ownset_2_svc = _mdrpc_ownset_2_svc
+#pragma weak mdrpc_adddrvs_2_svc = _mdrpc_adddrvs_2_svc
+#pragma weak mdrpc_deldrvs_2_svc = _mdrpc_deldrvs_2_svc
+#pragma weak mdrpc_upd_dr_dbinfo_2_svc = _mdrpc_upd_dr_dbinfo_2_svc
+#pragma weak mdrpc_devinfo_2_svc = _mdrpc_devinfo_2_svc
+#pragma weak mdrpc_devid_2_svc = _mdrpc_devid_2_svc
+#pragma weak mdrpc_devinfo_by_devid_2_svc = _mdrpc_devinfo_by_devid_2_svc
+#pragma weak mdrpc_devinfo_by_devid_name_2_svc =\
+ _mdrpc_devinfo_by_devid_name_2_svc
+#pragma weak mdrpc_drvused_2_svc = _mdrpc_drvused_2_svc
+#pragma weak mdrpc_add_drv_sidenms_2_svc = _mdrpc_add_drv_sidenms_2_svc
+#pragma weak mdrpc_del_drv_sidenms_2_svc = _mdrpc_del_drv_sidenms_2_svc
+#pragma weak mdrpc_gtimeout_2_svc = _mdrpc_gtimeout_2_svc
+#pragma weak mdrpc_stimeout_2_svc = _mdrpc_stimeout_2_svc
+#pragma weak mdrpc_upd_dr_flags_2_svc = _mdrpc_upd_dr_flags_2_svc
+#pragma weak mdrpc_upd_sr_flags_2_svc = _mdrpc_upd_sr_flags_2_svc
+#pragma weak mdrpc_unlock_set_2_svc = _mdrpc_unlock_set_2_svc
+#pragma weak mdrpc_lock_set_2_svc = _mdrpc_lock_set_2_svc
+#pragma weak mdrpc_updmeds_2_svc = _mdrpc_updmeds_2_svc
+#pragma weak mdrpc_mncreateset_2_svc = _mdrpc_mncreateset_2_svc
+#pragma weak mdrpc_mngetset_2_svc = _mdrpc_mngetset_2_svc
+#pragma weak mdrpc_mnsetmaster_2_svc = _mdrpc_mnsetmaster_2_svc
+#pragma weak mdrpc_joinset_2_svc = _mdrpc_joinset_2_svc
+#pragma weak mdrpc_withdrawset_2_svc = _mdrpc_withdrawset_2_svc
+#pragma weak mdrpc_upd_nr_flags_2_svc = _mdrpc_upd_nr_flags_2_svc
+#pragma weak mdrpc_mn_is_stale_2_svc = _mdrpc_mn_is_stale_2_svc
+#pragma weak mdrpc_mdcommdctl_2_svc = _mdrpc_mdcommdctl_2_svc
+#pragma weak mdrpc_upd_dr_reconfig_2_svc = _mdrpc_upd_dr_reconfig_2_svc
+#pragma weak mdrpc_getdrivedesc_2_svc = _mdrpc_getdrivedesc_2_svc
+#pragma weak mdrpc_reset_mirror_owner_2_svc = _mdrpc_reset_mirror_owner_2_svc
+#pragma weak mdrpc_mn_susp_res_io_2_svc = _mdrpc_mn_susp_res_io_2_svc
+#pragma weak mdrpc_resnarf_set_2_svc = _mdrpc_resnarf_set_2_svc
+#pragma weak mdrpc_mn_mirror_resync_all_2_svc = \
+ _mdrpc_mn_mirror_resync_all_2_svc
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_nullproc_1_svc(
+ mdrpc_null_args *a,
+ mdrpc_generic_res *b,
+ struct svc_req *c)
+{
+ assert(0);
+ return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_hostname_1_svc(
+ mdrpc_null_args *a,
+ mdrpc_hostname_res *b,
+ struct svc_req *c)
+{
+ assert(0);
+ return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_addhosts_1_svc(
+ mdrpc_host_args *a,
+ mdrpc_generic_res *b,
+ struct svc_req *c)
+{
+ assert(0);
+ return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_delhosts_1_svc(
+ mdrpc_host_args *a,
+ mdrpc_generic_res *b,
+ struct svc_req *c)
+{
+ assert(0);
+ return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_createset_1_svc(
+ mdrpc_createset_args *a,
+ mdrpc_generic_res *b,
+ struct svc_req *c)
+{
+ assert(0);
+ return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_delset_1_svc(
+ mdrpc_sp_args *a,
+ mdrpc_generic_res *b,
+ struct svc_req *c)
+{
+ assert(0);
+ return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_getset_1_svc(
+ mdrpc_getset_args *a,
+ mdrpc_getset_res *b,
+ struct svc_req *c)
+{
+ assert(0);
+ return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_setnumbusy_1_svc(
+ mdrpc_setno_args *a,
+ mdrpc_bool_res *b,
+ struct svc_req *c)
+{
+ assert(0);
+ return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_setnameok_1_svc(
+ mdrpc_sp_args *a,
+ mdrpc_bool_res *b,
+ struct svc_req *c)
+{
+ assert(0);
+ return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_ownset_1_svc(
+ mdrpc_sp_args *a,
+ mdrpc_bool_res *b,
+ struct svc_req *c)
+{
+ assert(0);
+ return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_adddrvs_1_svc(
+ mdrpc_drives_args *a,
+ mdrpc_generic_res *b,
+ struct svc_req *c)
+{
+ assert(0);
+ return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_deldrvs_1_svc(
+ mdrpc_drives_args *a,
+ mdrpc_generic_res *b,
+ struct svc_req *c)
+{
+ assert(0);
+ return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_upd_dr_dbinfo_1_svc(
+ mdrpc_drives_args *a,
+ mdrpc_generic_res *b,
+ struct svc_req *c)
+{
+ assert(0);
+ return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_devinfo_1_svc(
+ mdrpc_devinfo_args *a,
+ mdrpc_devinfo_res *b,
+ struct svc_req *c)
+{
+ assert(0);
+ return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_drvused_1_svc(
+ mdrpc_drvused_args *a,
+ mdrpc_generic_res *b,
+ struct svc_req *c)
+{
+ assert(0);
+ return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_add_drv_sidenms_1_svc(
+ mdrpc_drv_sidenm_args *a,
+ mdrpc_generic_res *b,
+ struct svc_req *c)
+{
+ assert(0);
+ return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_del_drv_sidenms_1_svc(
+ mdrpc_sp_args *a,
+ mdrpc_generic_res *b,
+ struct svc_req *c)
+{
+ assert(0);
+ return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_gtimeout_1_svc(
+ mdrpc_sp_args *a,
+ mdrpc_gtimeout_res *b,
+ struct svc_req *c)
+{
+ assert(0);
+ return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_stimeout_1_svc(
+ mdrpc_stimeout_args *a,
+ mdrpc_generic_res *b,
+ struct svc_req *c)
+{
+ assert(0);
+ return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_upd_dr_flags_1_svc(
+ mdrpc_upd_dr_flags_args *a,
+ mdrpc_generic_res *b,
+ struct svc_req *c)
+{
+ assert(0);
+ return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_upd_sr_flags_1_svc(
+ mdrpc_upd_sr_flags_args *a,
+ mdrpc_generic_res *b,
+ struct svc_req *c)
+{
+ assert(0);
+ return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_unlock_set_1_svc(
+ mdrpc_null_args *a,
+ mdrpc_setlock_res *b,
+ struct svc_req *c)
+{
+ assert(0);
+ return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_lock_set_1_svc(
+ mdrpc_null_args *a,
+ mdrpc_setlock_res *b,
+ struct svc_req *c)
+{
+ assert(0);
+ return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_updmeds_1_svc(
+ mdrpc_updmeds_args *a,
+ mdrpc_generic_res *b,
+ struct svc_req *c)
+{
+ assert(0);
+ return (TRUE);
+}
+
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_nullproc_2_svc(
+ mdrpc_null_args *a,
+ mdrpc_generic_res *b,
+ struct svc_req *c)
+{
+ assert(0);
+ return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_hostname_2_svc(
+ mdrpc_null_args *a,
+ mdrpc_hostname_res *b,
+ struct svc_req *c)
+{
+ assert(0);
+ return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_addhosts_2_svc(
+ mdrpc_host_args *a,
+ mdrpc_generic_res *b,
+ struct svc_req *c)
+{
+ assert(0);
+ return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_delhosts_2_svc(
+ mdrpc_host_args *a,
+ mdrpc_generic_res *b,
+ struct svc_req *c)
+{
+ assert(0);
+ return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_createset_2_svc(
+ mdrpc_createset_args *a,
+ mdrpc_generic_res *b,
+ struct svc_req *c)
+{
+ assert(0);
+ return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_delset_2_svc(
+ mdrpc_sp_args *a,
+ mdrpc_generic_res *b,
+ struct svc_req *c)
+{
+ assert(0);
+ return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_getset_2_svc(
+ mdrpc_getset_args *a,
+ mdrpc_getset_res *b,
+ struct svc_req *c)
+{
+ assert(0);
+ return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_setnumbusy_2_svc(
+ mdrpc_setno_args *a,
+ mdrpc_bool_res *b,
+ struct svc_req *c)
+{
+ assert(0);
+ return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_setnameok_2_svc(
+ mdrpc_sp_args *a,
+ mdrpc_bool_res *b,
+ struct svc_req *c)
+{
+ assert(0);
+ return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_ownset_2_svc(
+ mdrpc_sp_args *a,
+ mdrpc_bool_res *b,
+ struct svc_req *c)
+{
+ assert(0);
+ return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_adddrvs_2_svc(
+ mdrpc_drives_2_args *a,
+ mdrpc_generic_res *b,
+ struct svc_req *c)
+{
+ assert(0);
+ return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_deldrvs_2_svc(
+ mdrpc_drives_2_args *a,
+ mdrpc_generic_res *b,
+ struct svc_req *c)
+{
+ assert(0);
+ return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_upd_dr_dbinfo_2_svc(
+ mdrpc_drives_2_args *a,
+ mdrpc_generic_res *b,
+ struct svc_req *c)
+{
+ assert(0);
+ return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_devinfo_2_svc(
+ mdrpc_devinfo_2_args *a,
+ mdrpc_devinfo_2_res *b,
+ struct svc_req *c)
+{
+ assert(0);
+ return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_devid_2_svc(
+ mdrpc_devid_args *a,
+ mdrpc_devid_res *b,
+ struct svc_req *c)
+{
+ assert(0);
+ return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_devinfo_by_devid_2_svc(
+ mdrpc_devidstr_args *a,
+ mdrpc_devinfo_2_res *b,
+ struct svc_req *c)
+{
+ assert(0);
+ return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_devinfo_by_devid_name_2_svc(
+ mdrpc_devid_name_2_args *a,
+ mdrpc_devinfo_2_res *b,
+ struct svc_req *c
+)
+{
+ assert(0);
+ return (TRUE);
+}
+
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_drvused_2_svc(
+ mdrpc_drvused_2_args *a,
+ mdrpc_generic_res *b,
+ struct svc_req *c)
+{
+ assert(0);
+ return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_add_drv_sidenms_2_svc(
+ mdrpc_drv_sidenm_2_args *a,
+ mdrpc_generic_res *b,
+ struct svc_req *c)
+{
+ assert(0);
+ return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_del_drv_sidenms_2_svc(
+ mdrpc_sp_args *a,
+ mdrpc_generic_res *b,
+ struct svc_req *c)
+{
+ assert(0);
+ return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_gtimeout_2_svc(
+ mdrpc_sp_args *a,
+ mdrpc_gtimeout_res *b,
+ struct svc_req *c)
+{
+ assert(0);
+ return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_stimeout_2_svc(
+ mdrpc_stimeout_args *a,
+ mdrpc_generic_res *b,
+ struct svc_req *c)
+{
+ assert(0);
+ return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_upd_dr_flags_2_svc(
+ mdrpc_upd_dr_flags_2_args *a,
+ mdrpc_generic_res *b,
+ struct svc_req *c)
+{
+ assert(0);
+ return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_upd_sr_flags_2_svc(
+ mdrpc_upd_sr_flags_args *a,
+ mdrpc_generic_res *b,
+ struct svc_req *c
+)
+{
+ assert(0);
+ return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_unlock_set_2_svc(
+ mdrpc_null_args *a,
+ mdrpc_setlock_res *b,
+ struct svc_req *c
+)
+{
+ assert(0);
+ return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_lock_set_2_svc(
+ mdrpc_null_args *a,
+ mdrpc_setlock_res *b,
+ struct svc_req *c
+)
+{
+ assert(0);
+ return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_updmeds_2_svc(
+ mdrpc_updmeds_args *a,
+ mdrpc_generic_res *b,
+ struct svc_req *c
+)
+{
+ assert(0);
+ return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_mncreateset_2_svc(
+ mdrpc_mncreateset_2_args *a,
+ mdrpc_generic_res *b,
+ struct svc_req *c
+)
+{
+ assert(0);
+ return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_mngetset_2_svc(
+ mdrpc_getset_2_args *a,
+ mdrpc_mngetset_res *b,
+ struct svc_req *c
+)
+{
+ assert(0);
+ return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_mnsetmaster_2_svc(
+ mdrpc_mnsetmaster_2_args *a,
+ mdrpc_generic_res *b,
+ struct svc_req *c
+)
+{
+ assert(0);
+ return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_joinset_2_svc(
+ mdrpc_sp_2_args *a,
+ mdrpc_generic_res *b,
+ struct svc_req *c
+)
+{
+ assert(0);
+ return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_withdrawset_2_svc(
+ mdrpc_sp_2_args *a,
+ mdrpc_generic_res *b,
+ struct svc_req *c)
+{
+ assert(0);
+ return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_upd_nr_flags_2_svc(
+ mdrpc_upd_nr_flags_2_args *a,
+ mdrpc_generic_res *b,
+ struct svc_req *c)
+{
+ assert(0);
+ return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_mn_is_stale_2_svc(
+ mdrpc_setno_2_args *a,
+ mdrpc_bool_res *b,
+ struct svc_req *c)
+{
+ assert(0);
+ return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_mdcommdctl_2_svc(
+ mdrpc_mdcommdctl_2_args *a,
+ mdrpc_generic_res *b,
+ struct svc_req *c)
+{
+ assert(0);
+ return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_resnarf_set_2_svc(
+ mdrpc_setno_2_args *a,
+ mdrpc_generic_res *b,
+ struct svc_req *c)
+{
+ assert(0);
+ return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_upd_dr_reconfig_2_svc(
+ mdrpc_upd_dr_flags_2_args *a,
+ mdrpc_generic_res *b,
+ struct svc_req *c)
+{
+ assert(0);
+ return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_getdrivedesc_2_svc(
+ mdrpc_sp_2_args *a,
+ mdrpc_getdrivedesc_res *b,
+ struct svc_req *c)
+{
+ assert(0);
+ return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_reset_mirror_owner_2_svc(
+ mdrpc_nodeid_2_args *a,
+ mdrpc_generic_res *b,
+ struct svc_req *c)
+{
+ assert(0);
+ return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_mn_susp_res_io_2_svc(
+ mdrpc_mn_susp_res_io_2_args *a,
+ mdrpc_generic_res *b,
+ struct svc_req *c)
+{
+ assert(0);
+ return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_mn_mirror_resync_all_2_svc(
+ mdrpc_setno_2_args *a,
+ mdrpc_generic_res *b,
+ struct svc_req *c)
+{
+ assert(0);
+ return (TRUE);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/metagetroot.c b/usr/src/lib/lvm/libmeta/common/metagetroot.c
new file mode 100644
index 0000000000..3891c6bd74
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/metagetroot.c
@@ -0,0 +1,121 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2003 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * get root device
+ */
+
+#include <meta.h>
+#include "meta_lib_prv.h"
+
+#include <sys/mnttab.h>
+
+/*
+ * Return the current root filesystem block device name
+ */
+void *
+meta_get_current_root(
+ md_error_t *ep
+)
+{
+ FILE *fp;
+ struct mnttab mp;
+
+ if ((fp = open_mnttab()) == NULL) {
+ (void) mdsyserror(ep, errno, MNTTAB);
+ return (NULL);
+ }
+
+ while (getmntent(fp, &mp) == 0) {
+ if (strcmp(mp.mnt_mountp, "/") == 0)
+ return (mp.mnt_special);
+ }
+ (void) mderror(ep, MDE_NOROOT, NULL);
+ return (NULL);
+}
+
+/*
+ * Return the current root filesystem block device name. This is only valid
+ * when root is either a slice, a stripe or a mirror.
+ */
+mdname_t *
+meta_get_current_root_dev(
+ mdsetname_t *sp,
+ md_error_t *ep
+)
+{
+ md_stripe_t *stripep;
+ md_mirror_t *mirrorp;
+ md_row_t *rp;
+ md_comp_t *cp;
+ mdname_t *rootnp;
+ void *curroot;
+ char *miscname;
+ int smi;
+
+ if ((curroot = meta_get_current_root(ep)) == NULL)
+ return (NULL);
+ if ((rootnp = metaname(&sp, curroot, ep)) == NULL)
+ return (NULL);
+ if (metaismeta(rootnp)) {
+ if ((miscname = metagetmiscname(rootnp, ep)) == NULL)
+ return (NULL);
+ if ((strcmp(miscname, MD_MIRROR) == 0) &&
+ ((mirrorp = meta_get_mirror(sp, rootnp, ep)) != NULL)) {
+ for (smi = 0; smi < NMIRROR; smi++) {
+ md_submirror_t *mdsp =
+ &mirrorp->submirrors[smi];
+ rootnp = mdsp->submirnamep;
+ /* skip unused submirrors */
+ if (rootnp == NULL) {
+ assert(mdsp->state == SMS_UNUSED);
+ continue;
+ }
+ if ((miscname = metagetmiscname(rootnp, ep))
+ == NULL) {
+ (void) mdmderror(ep, MDE_UNKNOWN_TYPE,
+ meta_getminor(rootnp->dev),
+ rootnp->cname);
+ return (NULL);
+ }
+ break;
+ }
+ }
+ if ((strcmp(miscname, MD_STRIPE) == 0) &&
+ ((stripep = meta_get_stripe(sp, rootnp, ep)) != NULL)) {
+ rp = &stripep->rows.rows_val[0];
+ cp = &rp->comps.comps_val[0];
+ if (metachkcomp(cp->compnamep, ep) == 0)
+ return (cp->compnamep);
+ }
+ /* Root is not a single stripe metadevice */
+ (void) mddeverror(ep, MDE_INV_ROOT, rootnp->dev, rootnp->cname);
+ return (NULL);
+ } else return (rootnp);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/metarpcopen.c b/usr/src/lib/lvm/libmeta/common/metarpcopen.c
new file mode 100644
index 0000000000..bd0f4232f5
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/metarpcopen.c
@@ -0,0 +1,422 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * Just in case we're not in a build environment, make sure that
+ * TEXT_DOMAIN gets set to something.
+ */
+#if !defined(TEXT_DOMAIN)
+#define TEXT_DOMAIN "SYS_TEST"
+#endif
+
+#include <meta.h>
+#include <metad.h>
+
+#define CC_TTL_MAX 20
+
+typedef struct {
+ char *cc_node;
+ struct timeval cc_ttl;
+ CLIENT *cc_clp;
+} client_cache_t;
+
+typedef struct client_header {
+ client_cache_t **ch_cache; /* array of clients. */
+ mutex_t ch_mutex; /* lock access to ch_cache */
+} client_header_t;
+
+/*
+ * This structure is used to pass data from meta_client_create to
+ * client_create_helper via meta_client_create_retry.
+ */
+typedef struct clnt_data {
+ rpcprog_t cd_prognum; /* RPC program number */
+ rpcvers_t cd_version; /* Desired interface version */
+ char *cd_nettype; /* Type of network to use */
+} clnt_data_t;
+
+#define MALLOC_BLK_SIZE 10
+static client_header_t client_header = {(client_cache_t **)NULL, DEFAULTMUTEX};
+
+static void
+cc_add(
+ client_header_t *header,
+ char *node,
+ CLIENT *clntp,
+ md_error_t *ep
+)
+{
+ client_cache_t ***cachep = &header->ch_cache;
+ struct timeval now;
+ int i;
+ int j = 0;
+
+ if (gettimeofday(&now, NULL) == -1) {
+ (void) mdsyserror(ep, errno, "gettimeofday()");
+ return;
+ }
+
+ (void) mutex_lock(&header->ch_mutex);
+ if (*cachep) {
+ for (i = 0; (*cachep)[i] != NULL; i++)
+ if (strcmp((*cachep)[i]->cc_node, node) == 0 &&
+ (*cachep)[i]->cc_clp == NULL) {
+ (*cachep)[i]->cc_clp = clntp;
+ (*cachep)[i]->cc_ttl = now;
+ (void) mutex_unlock(&header->ch_mutex);
+ return;
+ }
+ } else {
+ *cachep = Calloc(MALLOC_BLK_SIZE, sizeof (**cachep));
+ i = 0;
+ }
+
+ (*cachep)[i] = Zalloc(sizeof (***cachep));
+ (*cachep)[i]->cc_node = Strdup(node);
+ (*cachep)[i]->cc_clp = clntp;
+ (*cachep)[i]->cc_ttl = now;
+
+ if ((++i % MALLOC_BLK_SIZE) == 0) {
+ *cachep = Realloc(*cachep,
+ (i + MALLOC_BLK_SIZE) * sizeof (**cachep));
+ for (j = i; j < (i + MALLOC_BLK_SIZE); j++)
+ (*cachep)[j] = NULL;
+ }
+ (void) mutex_unlock(&header->ch_mutex);
+}
+
+static void
+rel_clntp(client_cache_t *cachep)
+{
+ CLIENT *clntp = cachep->cc_clp;
+
+ if (clntp != NULL) {
+ auth_destroy(clntp->cl_auth);
+ clnt_destroy(clntp);
+ }
+ cachep->cc_clp = NULL;
+}
+
+static void
+cc_destroy(client_header_t *header)
+{
+ client_cache_t ***cachep = &header->ch_cache;
+ int i;
+
+ (void) mutex_lock(&header->ch_mutex);
+ if (*cachep) {
+ for (i = 0; ((*cachep)[i] != NULL); i++) {
+ client_cache_t *p = (*cachep)[i];
+
+ Free(p->cc_node);
+ rel_clntp(p);
+ Free(p);
+ }
+ Free(*cachep);
+ *cachep = NULL;
+ }
+ (void) mutex_unlock(&header->ch_mutex);
+}
+
+/*
+ * Set the timeout value for this client handle.
+ */
+static int
+cl_sto(
+ CLIENT *clntp,
+ char *hostname,
+ long time_out,
+ md_error_t *ep
+)
+{
+ struct timeval nto;
+
+ (void) memset(&nto, '\0', sizeof (nto));
+
+ nto.tv_sec = time_out;
+
+ if (clnt_control(clntp, CLSET_TIMEOUT, (char *)&nto) != TRUE)
+ return (mdrpcerror(ep, clntp, hostname,
+ dgettext(TEXT_DOMAIN, "metad client set timeout")));
+
+ return (0);
+}
+
+/*
+ * client_create_vers_retry is the helper function to be passed to
+ * meta_client_create_retry to do the actual work of creating the client
+ * when version selection is necessary.
+ */
+
+/* ARGSUSED */
+static CLIENT *
+client_create_vers_retry(char *hostname,
+ void *ignore,
+ struct timeval *tout
+)
+{
+ rpcvers_t vers; /* Version # not needed. */
+
+ return (clnt_create_vers_timed(hostname, METAD, &vers,
+ METAD_VERSION, METAD_VERSION_DEVID, "tcp", tout));
+}
+
+/*
+ * client_create_helper is the helper function to be passed to
+ * meta_client_create_retry when plain vanilla client create is desired.
+ */
+static CLIENT *
+client_create_helper(char *hostname, void *private, struct timeval *time_out)
+{
+ clnt_data_t *cd = (clnt_data_t *)private;
+
+ return (clnt_create_timed(hostname, cd->cd_prognum, cd->cd_version,
+ cd->cd_nettype, time_out));
+}
+
+/*
+ * meta_client_create_retry is a general function to assist in creating RPC
+ * clients. This function handles retrying if the attempt to create a
+ * client fails. meta_client_create_retry itself does not actually create
+ * the client. Instead it calls the helper function, func, to do that job.
+ *
+ * With the help of func, meta_client_create_retry will create an RPC
+ * connection allowing up to tout seconds to complete the task. If the
+ * connection creation fails for RPC_RPCBFAILURE, RPC_CANTRECV or
+ * RPC_PROGNOTREGISTERED and tout seconds have not passed,
+ * meta_client_create_retry will try again. The reason retries are
+ * important is that when the inet daemon is being refreshed, it can take
+ * 15-20 seconds for it to start responding again.
+ *
+ * Arguments:
+ *
+ * hostname - Name of remote host
+ *
+ * func - Pointer to the helper function, that will
+ * actually try to create the client.
+ *
+ * data - Private data to be passed on to func.
+ * meta_client_create_retry treats this as an opaque
+ * pointer.
+ *
+ * tout - Number of seconds to allow for the connection
+ * attempt.
+ *
+ * ep - Standard SVM error pointer. May be NULL.
+ */
+CLIENT *
+meta_client_create_retry(
+ char *hostname,
+ clnt_create_func_t func,
+ void *data,
+ time_t tout,
+ md_error_t *ep
+)
+{
+ static int debug; /* print debugging info */
+ static int debug_set = 0;
+
+ CLIENT *clnt = (CLIENT *) NULL;
+ struct timeval curtime;
+ char *d;
+ struct timeval start;
+ struct timeval timeout;
+
+ if (debug_set == 0) {
+ d = getenv("MD_DEBUG");
+ if (d == NULL) {
+ debug = 0;
+ } else {
+ debug = (strstr(d, "RPC") == NULL) ? 0 : 1;
+ }
+ debug_set = 1;
+ }
+ timeout.tv_usec = 0;
+ if (gettimeofday(&start, NULL) == -1) {
+ if (ep != (md_error_t *)NULL) {
+ (void) mdsyserror(ep, errno, "gettimeofday()");
+ }
+ return (clnt);
+ }
+ curtime = start;
+ while ((curtime.tv_sec - start.tv_sec) < tout) {
+ /* Use remaining time as the timeout value. */
+ timeout.tv_sec = tout - (curtime.tv_sec - start.tv_sec);
+ clnt = (*func)(hostname, data, &timeout);
+ if (clnt != (CLIENT *) NULL)
+ break;
+ if ((rpc_createerr.cf_stat == RPC_RPCBFAILURE) ||
+ (rpc_createerr.cf_stat == RPC_PROGNOTREGISTERED) ||
+ (rpc_createerr.cf_stat == RPC_CANTRECV)) {
+ if (debug) {
+ clnt_pcreateerror("meta_client_create_retry");
+ }
+ /* If error might be fixed in time, sleep & try again */
+ (void) sleep(2);
+ if (gettimeofday(&curtime, NULL) == -1) {
+ if (ep != (md_error_t *)NULL) {
+ (void) mdsyserror(ep, errno,
+ "gettimeofday()");
+ }
+ return (clnt);
+ }
+ } else {
+ /* Not a recoverable error. */
+ break;
+ }
+ }
+ if ((clnt == (CLIENT *) NULL) && (ep != (md_error_t *)NULL)) {
+ (void) mdrpccreateerror(ep, hostname,
+ "meta_client_create_retry");
+ }
+ return (clnt);
+}
+
+/*
+ * meta_client_create is intended to be used within SVM as a replacement
+ * for calls to clnt_create. meta_client_create invokes the retry
+ * mechanism of meta_client_create_retry.
+ */
+CLIENT *
+meta_client_create(char *host, rpcprog_t prognum, rpcvers_t version,
+ char *nettype)
+{
+ clnt_data_t cd;
+
+ cd.cd_prognum = prognum;
+ cd.cd_version = version;
+ cd.cd_nettype = nettype;
+ return (meta_client_create_retry(host, client_create_helper,
+ (void *)&cd, MD_CLNT_CREATE_TOUT, (md_error_t *)NULL));
+}
+
+/*
+ * create and return RPC connection
+ */
+CLIENT *
+metarpcopen(
+ char *hostname,
+ long time_out,
+ md_error_t *ep
+)
+{
+ CLIENT *clntp = NULL;
+ client_cache_t ***cachep = &client_header.ch_cache;
+ int i;
+ long delta;
+ struct timeval now;
+
+ if (gettimeofday(&now, NULL) == -1) {
+ (void) mdsyserror(ep, errno, "gettimeofday()");
+ return (NULL);
+ }
+
+ /*
+ * Before trying to create the client, make sure that the core SVM
+ * services are enabled by the Service Management Facility. We
+ * don't want to suffer the 60 second timeout if the services are
+ * not even enabled. This call actually only verifies that they
+ * are enabled on this host no matter which host the caller wants
+ * to connect to. Nonetheless, if the services are not enabled on
+ * the local host, our RPC stuff is not going to work as expected.
+ */
+ if (meta_smf_isonline(META_SMF_CORE, ep) == 0) {
+ return (NULL);
+ }
+
+ (void) mutex_lock(&client_header.ch_mutex);
+ if (client_header.ch_cache) {
+ for (i = 0; (*cachep)[i] != NULL; i++) {
+ if (strcmp((*cachep)[i]->cc_node, hostname) == 0) {
+ clntp = (*cachep)[i]->cc_clp;
+ if (clntp == NULL)
+ continue;
+ delta = now.tv_sec -
+ (*cachep)[i]->cc_ttl.tv_sec;
+ if (delta > CC_TTL_MAX) {
+ rel_clntp((*cachep)[i]);
+ continue;
+ }
+ if (cl_sto(clntp, hostname, time_out,
+ ep) != 0) {
+ (void) mutex_unlock(
+ &client_header.ch_mutex);
+ return (NULL);
+ }
+ (void) mutex_unlock(&client_header.ch_mutex);
+ return (clntp);
+ }
+ }
+ }
+ (void) mutex_unlock(&client_header.ch_mutex);
+
+ /*
+ * Try to create a version 2 client handle by default.
+ * If this fails (i.e. client is version 1), try to
+ * create a version 1 client handle.
+ */
+ clntp = meta_client_create_retry(hostname, client_create_vers_retry,
+ (void *)NULL, MD_CLNT_CREATE_TOUT, ep);
+
+ /* open connection */
+ if (clntp == NULL) {
+ (void) mdrpccreateerror(ep, hostname,
+ dgettext(TEXT_DOMAIN, "metad client create"));
+ cc_add(&client_header, hostname, NULL, ep);
+ return (NULL);
+ } else {
+ auth_destroy(clntp->cl_auth);
+ clntp->cl_auth = authsys_create_default();
+ assert(clntp->cl_auth != NULL);
+ }
+
+ cc_add(&client_header, hostname, clntp, ep);
+
+ if (cl_sto(clntp, hostname, time_out, ep) != 0)
+ return (NULL);
+
+ return (clntp);
+}
+
+/*
+ * metarpcclose - is a place holder so that when using
+ * metarpcopen, it does not appear that
+ * we have dangling opens. We can at some
+ * later decrement open counts here too, if needed.
+ */
+/*ARGSUSED*/
+void
+metarpcclose(CLIENT *clntp)
+{
+}
+
+void
+metarpccloseall(void)
+{
+ cc_destroy(&client_header);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/metasplitname.c b/usr/src/lib/lvm/libmeta/common/metasplitname.c
new file mode 100644
index 0000000000..84634a109e
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/metasplitname.c
@@ -0,0 +1,77 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 1992, 1993, 2000 by Sun Microsystems, Inc.
+ * All rights reserved.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * split and splice name
+ */
+
+#include <meta.h>
+
+int
+splitname(char *name, md_splitname *spn)
+{
+ size_t prefixlen;
+ size_t suffixlen;
+ char *lastslash;
+ lastslash = strrchr(name, '/');
+ if (lastslash != NULL) {
+ prefixlen = lastslash - name;
+ suffixlen = (strlen(name) - prefixlen) - 1; /* slash dropped */
+ } else {
+ prefixlen = 0;
+ suffixlen = strlen(name);
+ }
+ if (prefixlen > MD_MAXPREFIX ||
+ suffixlen > MD_MAXSUFFIX)
+ return (1);
+ (void) memcpy(SPN_PREFIX(spn).pre_data, name, prefixlen);
+ SPN_PREFIX(spn).pre_len = prefixlen;
+ (void) memcpy(SPN_SUFFIX(spn).suf_data, lastslash + 1, suffixlen);
+ SPN_SUFFIX(spn).suf_len = suffixlen;
+ return (0);
+}
+
+char *
+splicename(md_splitname *spn)
+{
+ char *name;
+ char *suffix;
+ size_t prefixlen;
+ size_t suffixlen;
+
+ prefixlen = SPN_PREFIX(spn).pre_len;
+ suffixlen = SPN_SUFFIX(spn).suf_len;
+ name = Malloc(prefixlen + suffixlen + 2);
+ (void) memcpy(name, SPN_PREFIX(spn).pre_data, prefixlen);
+ name[prefixlen] = '/';
+ suffix = name + (prefixlen + 1);
+ (void) memcpy(suffix, SPN_SUFFIX(spn).suf_data, suffixlen);
+ name[prefixlen + suffixlen + 1] = 0;
+ return (name);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/sdssc_bind.c b/usr/src/lib/lvm/libmeta/common/sdssc_bind.c
new file mode 100644
index 0000000000..c8e1f8c3ee
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/sdssc_bind.c
@@ -0,0 +1,205 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * Block comment which describes the contents of this file.
+ */
+
+#include <dlfcn.h>
+#include <meta.h>
+#include <metadyn.h>
+#include <sdssc.h>
+
+#define SDSSC_PATH SDSSC_CL_LIBDIR "/sc/libsds_sc.so"
+
+static func_table_t dl_table[] = {
+ { "_sdssc_version", (void **)&sdssc_version },
+ { "_sdssc_create_begin", (void **)&sdssc_create_begin },
+ { "_sdssc_mo_create_begin", (void **)&sdssc_mo_create_begin },
+ { "_sdssc_create_end", (void **)&sdssc_create_end },
+ { "_sdssc_delete_begin", (void **)&sdssc_delete_begin },
+ { "_sdssc_delete_end", (void **)&sdssc_delete_end },
+ { "_sdssc_get_index", (void **)&sdssc_get_index },
+ { "_sdssc_add_hosts", (void **)&sdssc_add_hosts },
+ { "_sdssc_delete_hosts", (void **)&sdssc_delete_hosts },
+ { "_sdssc_get_primary_host", (void **)&sdssc_get_primary_host },
+ { "_sdssc_cmd_proxy", (void **)&sdssc_cmd_proxy },
+ { "_sdssc_getnodelist", (void **)&sdssc_getnodelist },
+ { "_sdssc_freenodelist", (void **)&sdssc_freenodelist },
+ { "_sdssc_binddevs", (void **)&sdssc_binddevs },
+ { "_sdssc_bindclusterdevs", (void **)&sdssc_bindclusterdevs },
+ { "_sdssc_gettransportbynode", (void **)&sdssc_gettransportbynode },
+ { "_sdssc_free_mdcerr_list", (void **)&sdssc_free_mdcerr_list },
+ { "_sdssc_property_get", (void **)&sdssc_property_get },
+ { "_sdssc_property_set", (void **)&sdssc_property_set },
+ { "_sdssc_get_services", (void **)&sdssc_get_services },
+ { "_sdssc_get_services_free", (void **)&sdssc_get_services_free },
+ { "_sdssc_suspend", (void **)&sdssc_suspend },
+ { "_sdssc_convert_cluster_path",
+ (void **)&sdssc_convert_cluster_path },
+ { "_sdssc_convert_ctd_path",
+ (void **)&sdssc_convert_ctd_path },
+ { "_sdssc_convert_path_free",
+ (void **)&sdssc_convert_path_free },
+ { "_sdssc_notify_service", (void **)&sdssc_notify_service },
+ { "_sdssc_cm_nm2nid", (void **)&sdssc_cm_nm2nid },
+ { "_sdssc_cm_sr_nm2nid", (void **)&sdssc_cm_sr_nm2nid },
+ { "_sdssc_cm_nid2nm", (void **)&sdssc_cm_nid2nm },
+ { "_sdssc_cm_sr_nid2nm", (void **)&sdssc_cm_sr_nid2nm },
+ { "_sdssc_get_priv_ipaddr", (void **)&sdssc_get_priv_ipaddr },
+ { (char *)0, (void **)0 }
+};
+
+static rval_e
+just_dup_string(const char *source, char **dest)
+{
+ *dest = strdup(source);
+ return (SDSSC_OKAY);
+}
+
+static void
+free_dup_string(char *source)
+{
+ free(source);
+}
+
+/*
+ * not_bound -- routine to always return NOT_BOUND
+ */
+static rval_e
+not_bound(void)
+{
+ return (SDSSC_NOT_BOUND);
+}
+
+/*
+ * not_bound_error -- routine to always return SDSSC_NOT_BOUND_ERROR since
+ * routine is not bound. This is used when using an older version
+ * of libsdssc that doesn't support MN disksets. When an MN specific
+ * routine is called (such as sdssc_mo_create_set) an SDSSC_NOT_BOUND_ERROR
+ * will be returned.
+ */
+static rval_e
+not_bound_error(void)
+{
+ return (SDSSC_NOT_BOUND_ERROR);
+}
+
+
+/*
+ * set_common_routine -- set cluster interface routines to return NOT_BOUND
+ */
+static void
+set_common_routine()
+{
+ func_table_p f;
+
+ for (f = dl_table; f->fptr != (void *)0; f++) {
+ if (strcmp(f->fname, "_sdssc_convert_cluster_path") == 0) {
+ *f->fptr = (void *)&just_dup_string;
+ } else if (strcmp(f->fname, "_sdssc_free_convert_cluster_path")
+ == 0) {
+ *f->fptr = (void *)&free_dup_string;
+ } else {
+ *f->fptr = (void *)&not_bound;
+ }
+ }
+}
+
+/*
+ * sdssc_bind_library -- entry point which resolves all cluster interface pts.
+ */
+rval_e
+sdssc_bind_library(void)
+{
+ void *dp;
+ int (*lb)();
+ func_table_p ftp;
+
+ /*
+ * If already bound then just return okay so this routine
+ * becomes idempotent. If this check isn't made then we'll
+ * fail when calling the "_bind_library" function because
+ * dcs_initialize() can only be called once.
+ */
+ if (sdssc_version != 0) {
+ if ((void *)sdssc_version == (void *)not_bound)
+ return (SDSSC_NOT_BOUND);
+ else
+ return (SDSSC_OKAY);
+ }
+
+ if ((dp = dlopen(SDSSC_PATH, RTLD_LAZY)) == NULL) {
+ set_common_routine();
+ return (SDSSC_NOT_BOUND);
+ } else {
+
+ /*
+ * Allow the binding library to initialize state if
+ * necessary. Currently this calls the DCS initialize()
+ * routine which checks to see if we're part of a cluster.
+ */
+ if ((lb = (int (*)())dlsym(dp, "_bind_library")) != NULL) {
+ if (lb() != 0) {
+ set_common_routine();
+ return (SDSSC_NOT_BOUND);
+ }
+ }
+
+ /*
+ * Load 'em up. Pick up the function address and store
+ * the values in the global pointers for other routines
+ * to use.
+ */
+ for (ftp = dl_table; ftp->fptr != (void *)0; ftp++) {
+ if ((*ftp->fptr = dlsym(dp, ftp->fname)) == NULL) {
+
+ /*
+ * If old libsdssc library is there, then
+ * sdssc_mo_create_begin is not yet supported.
+ */
+ if (strcmp(ftp->fname,
+ "sdssc_mo_create_begin")) {
+ *ftp->fptr = (void *)&not_bound_error;
+ continue;
+ }
+ /*
+ * If this routine fails to find a single
+ * entry point that it's expecting
+ * (except sdssc_mo_create_begin) then
+ * setup non-sdssc stubs routines
+ * as function pointers.
+ */
+ set_common_routine();
+ return (SDSSC_ERROR);
+ }
+ }
+
+ return (SDSSC_OKAY);
+ }
+}
diff --git a/usr/src/lib/lvm/libmeta/i386/Makefile b/usr/src/lib/lvm/libmeta/i386/Makefile
new file mode 100644
index 0000000000..17c519db5c
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/i386/Makefile
@@ -0,0 +1,31 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License"). You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+include ../Makefile.com
+
+install debug: all $(ROOTLIBS) $(ROOTLINT) $(ROOTLINKS)
diff --git a/usr/src/lib/lvm/libmeta/sparc/Makefile b/usr/src/lib/lvm/libmeta/sparc/Makefile
new file mode 100644
index 0000000000..75eec28afb
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/sparc/Makefile
@@ -0,0 +1,31 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License"). You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright (c) 1998-2001 by Sun Microsystems, Inc.
+# All rights reserved.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+include ../Makefile.com
+
+install debug: all $(ROOTLIBS) $(ROOTLINT) $(ROOTLINKS)
diff --git a/usr/src/lib/lvm/libmeta/spec/Makefile b/usr/src/lib/lvm/libmeta/spec/Makefile
new file mode 100644
index 0000000000..5e88f3ac7e
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/spec/Makefile
@@ -0,0 +1,29 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License"). You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#ident "%Z%%M% %I% %E% SMI"
+#
+# Copyright (c) 2000 by Sun Microsystems, Inc.
+# All rights reserved.
+#
+# lib/lvm/libmeta/spec/Makefile
+
+include $(SRC)/lib/Makefile.spec.arch
diff --git a/usr/src/lib/lvm/libmeta/spec/Makefile.targ b/usr/src/lib/lvm/libmeta/spec/Makefile.targ
new file mode 100644
index 0000000000..40ffb28073
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/spec/Makefile.targ
@@ -0,0 +1,36 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License"). You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#ident "%Z%%M% %I% %E% SMI"
+#
+# Copyright (c) 2000 by Sun Microsystems, Inc.
+# All rights reserved.
+#
+# lib/lvm/libmeta/spec/Makefile.targ
+
+LIBRARY = libmeta.a
+VERS = .1
+
+OBJECTS = meta.o
+
+TRANSCPP =
+
+SPECCPP = -I.. -I../../inc
diff --git a/usr/src/lib/lvm/libmeta/spec/amd64/Makefile b/usr/src/lib/lvm/libmeta/spec/amd64/Makefile
new file mode 100644
index 0000000000..c7d89e007c
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/spec/amd64/Makefile
@@ -0,0 +1,46 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License"). You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+.KEEP_STATE:
+
+# To enable apptrace, comment out the following line
+DISABLE_APPTRACE= $(POUND_SIGN)
+
+include ../Makefile.targ
+
+# Add arch specific objects here
+OBJECTS +=
+
+include $(SRC)/lib/Makefile.lib
+
+# Uncomment the following if the linker complains
+#amd64_C_PICFLAGS = $(amd64_C_BIGPICFLAGS)
+
+include $(SRC)/lib/Makefile.spec
+
+$(DISABLE_APPTRACE)install: $(ROOTABILIB64)
diff --git a/usr/src/lib/lvm/libmeta/spec/i386/Makefile b/usr/src/lib/lvm/libmeta/spec/i386/Makefile
new file mode 100644
index 0000000000..3fe06d99af
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/spec/i386/Makefile
@@ -0,0 +1,47 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License"). You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+#ident "%Z%%M% %I% %E% SMI"
+#
+# Copyright (c) 2000-2001 by Sun Microsystems, Inc.
+# All rights reserved.
+#
+# lib/lvm/libmeta/spec/i386/Makefile
+
+.KEEP_STATE:
+
+# To enable apptrace, comment out the following line
+DISABLE_APPTRACE= $(POUND_SIGN)
+
+include ../Makefile.targ
+
+# Add arch specific objects here
+OBJECTS +=
+
+include $(SRC)/lib/Makefile.lib
+
+# Uncomment the following if the linker complains
+#i386_C_PICFLAGS = -K PIC
+
+include $(SRC)/lib/Makefile.spec
+
+$(DISABLE_APPTRACE)install: $(ROOTABILIB)
diff --git a/usr/src/lib/lvm/libmeta/spec/meta.spec b/usr/src/lib/lvm/libmeta/spec/meta.spec
new file mode 100644
index 0000000000..48d7d2b30e
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/spec/meta.spec
@@ -0,0 +1,3699 @@
+#
+# Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License"). You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#pragma ident "%Z%%M% %I% %E% SMI"
+#
+# lib/lvm/libmeta/spec/meta.spec
+
+function meta_smf_enable
+version SUNWprivate_1.1
+end
+
+function meta_smf_disable
+version SUNWprivate_1.1
+end
+
+function meta_smf_getmask
+version SUNWprivate_1.1
+end
+
+function meta_smf_isonline
+version SUNWprivate_1.1
+end
+
+function meta_svm_sysevent
+version SUNWprivate_1.1
+end
+
+function close_admin
+version SUNWprivate_1.1
+end
+
+function meta_dev_ismeta
+version SUNWprivate_1.1
+end
+
+function meta_get_nunits
+version SUNWprivate_1.1
+end
+
+function metamakedev
+version SUNWprivate_1.1
+end
+
+function meta_get_tstate
+version SUNWprivate_1.1
+end
+
+function meta_expldev
+version SUNWprivate_1.1
+end
+
+function meta_cmpldev
+version SUNWprivate_1.1
+end
+
+function meta_getmajor
+version SUNWprivate_1.1
+end
+
+function meta_getminor
+version SUNWprivate_1.1
+end
+
+function open_admin
+version SUNWprivate_1.1
+end
+
+function meta_concat_generic
+version SUNWprivate_1.1
+end
+
+function meta_concat_parent
+version SUNWprivate_1.1
+end
+
+function meta_check_driveinset
+version SUNWprivate_1.1
+end
+
+function meta_check_drivemounted
+version SUNWprivate_1.1
+end
+
+function meta_check_driveswapped
+version SUNWprivate_1.1
+end
+
+function meta_check_inmeta
+version SUNWprivate_1.1
+end
+
+function meta_check_inset
+version SUNWprivate_1.1
+end
+
+function meta_check_root
+version SUNWprivate_1.1
+end
+
+function meta_check_inuse
+version SUNWprivate_1.1
+end
+
+function meta_imp_drvused
+version SUNWprivate_1.1
+end
+
+function meta_check_overlap
+version SUNWprivate_1.1
+end
+
+function meta_check_samedrive
+version SUNWprivate_1.1
+end
+
+function meta_check_inreplica
+version SUNWprivate_1.1
+end
+
+function meta_check_replica
+version SUNWprivate_1.1
+end
+
+function meta_db_addsidenms
+version SUNWprivate_1.1
+end
+
+function meta_db_attach
+version SUNWprivate_1.1
+end
+
+function meta_db_delsidenm
+version SUNWprivate_1.1
+end
+
+function meta_db_detach
+version SUNWprivate_1.1
+end
+
+function meta_db_minreplica
+version SUNWprivate_1.1
+end
+
+function meta_db_patch
+version SUNWprivate_1.1
+end
+
+function meta_get_replica_names
+version SUNWprivate_1.1
+end
+
+function meta_setup_db_locations
+version SUNWprivate_1.1
+end
+
+function meta_sync_db_locations
+version SUNWprivate_1.1
+end
+
+function meta_getdidminorbykey
+version SUNWprivate_1.1
+end
+
+function meta_getdidbykey
+version SUNWprivate_1.1
+end
+
+function meta_setdid
+version SUNWprivate_1.1
+end
+
+function metafreereplicalist
+version SUNWprivate_1.1
+end
+
+function metareplicalist
+version SUNWprivate_1.1
+end
+
+function meta_db_balance
+version SUNWprivate_1.1
+end
+
+function meta_create_non_dup_list
+version SUNWprivate_1.1
+end
+
+function sdssc_add_hosts
+version SUNWprivate_1.1
+end
+
+function sdssc_bind_library
+version SUNWprivate_1.1
+end
+
+function sdssc_bindclusterdevs
+version SUNWprivate_1.1
+end
+
+function sdssc_binddevs
+version SUNWprivate_1.1
+end
+
+function sdssc_clnt_bind_devs
+version SUNWprivate_1.1
+end
+
+function sdssc_clnt_proxy_cmd
+version SUNWprivate_1.1
+end
+
+function sdssc_cm_nid2nm
+version SUNWprivate_1.1
+end
+
+function sdssc_cm_nm2nid
+version SUNWprivate_1.1
+end
+
+function sdssc_cm_sr_nid2nm
+version SUNWprivate_1.1
+end
+
+function sdssc_cm_sr_nm2nid
+version SUNWprivate_1.1
+end
+
+function sdssc_cmd_proxy
+version SUNWprivate_1.1
+end
+
+function sdssc_convert_cluster_path
+version SUNWprivate_1.1
+end
+
+function sdssc_convert_ctd_path
+version SUNWprivate_1.1
+end
+
+function sdssc_convert_path_free
+version SUNWprivate_1.1
+end
+
+function sdssc_create_begin
+version SUNWprivate_1.1
+end
+
+function sdssc_mo_create_begin
+version SUNWprivate_1.1
+end
+
+function sdssc_create_end
+version SUNWprivate_1.1
+end
+
+function sdssc_delete_begin
+version SUNWprivate_1.1
+end
+
+function sdssc_delete_end
+version SUNWprivate_1.1
+end
+
+function sdssc_delete_hosts
+version SUNWprivate_1.1
+end
+
+function sdssc_free_mdcerr_list
+version SUNWprivate_1.1
+end
+
+function sdssc_freenodelist
+version SUNWprivate_1.1
+end
+
+function sdssc_get_index
+version SUNWprivate_1.1
+end
+
+function sdssc_get_primary_host
+version SUNWprivate_1.1
+end
+
+function sdssc_get_priv_ipaddr
+version SUNWprivate_1.1
+end
+
+function sdssc_get_services
+version SUNWprivate_1.1
+end
+
+function sdssc_get_services_free
+version SUNWprivate_1.1
+end
+
+function sdssc_getnodelist
+version SUNWprivate_1.1
+end
+
+function sdssc_gettransportbynode
+version SUNWprivate_1.1
+end
+
+function sdssc_notify_service
+version SUNWprivate_1.1
+end
+
+function sdssc_property_get
+version SUNWprivate_1.1
+end
+
+function sdssc_property_set
+version SUNWprivate_1.1
+end
+
+function sdssc_suspend
+version SUNWprivate_1.1
+end
+
+function sdssc_version
+version SUNWprivate_1.1
+end
+
+function getdevstamp
+version SUNWprivate_1.1
+end
+
+function setdevstamp
+version SUNWprivate_1.1
+end
+
+function md_eprintf
+version SUNWprivate_1.1
+end
+
+function meta_mc_log
+version SUNWprivate_1.1
+end
+
+function md_logpfx
+version SUNWprivate_1.1
+end
+
+function md_perror
+version SUNWprivate_1.1
+end
+
+function mdclrerror
+version SUNWprivate_1.1
+end
+
+function mdcomperror
+version SUNWprivate_1.1
+end
+
+function mddeverror
+version SUNWprivate_1.1
+end
+
+function mddserror
+version SUNWprivate_1.1
+end
+
+function mde_perror
+version SUNWprivate_1.1
+end
+
+function mde_sperror
+version SUNWprivate_1.1
+end
+
+function mderror
+version SUNWprivate_1.1
+end
+
+function mderrorextra
+version SUNWprivate_1.1
+end
+
+function mdhserror
+version SUNWprivate_1.1
+end
+
+function mdhsperror
+version SUNWprivate_1.1
+end
+
+function mdmddberror
+version SUNWprivate_1.1
+end
+
+function mdmderror
+version SUNWprivate_1.1
+end
+
+function mdrpccreateerror
+version SUNWprivate_1.1
+end
+
+function mdrpcerror
+version SUNWprivate_1.1
+end
+
+function mdstealerror
+version SUNWprivate_1.1
+end
+
+function mdsyserror
+version SUNWprivate_1.1
+end
+
+function mduseerror
+version SUNWprivate_1.1
+end
+
+function metaioctl
+version SUNWprivate_1.1
+end
+
+function meta_getalldevs
+version SUNWprivate_1.1
+end
+
+function meta_getdevs
+version SUNWprivate_1.1
+end
+
+function meta_getvtoc
+version SUNWprivate_1.1
+end
+
+function meta_setvtoc
+version SUNWprivate_1.1
+end
+
+function hs_state_to_name
+version SUNWprivate_1.1
+end
+
+function meta_check_hotspare
+version SUNWprivate_1.1
+end
+
+function meta_check_hsp
+version SUNWprivate_1.1
+end
+
+function meta_check_inhsp
+version SUNWprivate_1.1
+end
+
+function meta_create_hsp
+version SUNWprivate_1.1
+end
+
+function meta_free_hsp
+version SUNWprivate_1.1
+end
+
+function meta_get_hsp
+version SUNWprivate_1.1
+end
+
+function meta_get_hsp_common
+version SUNWprivate_1.1
+end
+
+function meta_get_hsp_names
+version SUNWprivate_1.1
+end
+
+function meta_hs_add
+version SUNWprivate_1.1
+end
+
+function meta_hs_delete
+version SUNWprivate_1.1
+end
+
+function meta_hs_enable
+version SUNWprivate_1.1
+end
+
+function meta_hs_replace
+version SUNWprivate_1.1
+end
+
+function meta_hsp_print
+version SUNWprivate_1.1
+end
+
+function meta_hsp_reset
+version SUNWprivate_1.1
+end
+
+function meta_init_hsp
+version SUNWprivate_1.1
+end
+
+function meta_invalidate_hsp
+version SUNWprivate_1.1
+end
+
+function metachkhsp
+version SUNWprivate_1.1
+end
+
+function meta_adjust_geom
+version SUNWprivate_1.1
+end
+
+function meta_cook_syntax
+version SUNWprivate_1.1
+end
+
+function meta_init_name
+version SUNWprivate_1.1
+end
+
+function meta_init_make_device
+version SUNWprivate_1.1
+end
+
+function meta_setup_geom
+version SUNWprivate_1.1
+end
+
+function parse_interlace
+version SUNWprivate_1.1
+end
+
+function close_mnttab
+version SUNWprivate_1.1
+end
+
+function open_mnttab
+version SUNWprivate_1.1
+end
+
+function meta_update_md_cf
+version SUNWprivate_1.1
+end
+
+function med_errnum_to_str
+version SUNWprivate_1.1
+end
+
+function Calloc
+version SUNWprivate_1.1
+end
+
+function Free
+version SUNWprivate_1.1
+end
+
+function Malloc
+version SUNWprivate_1.1
+end
+
+function Realloc
+version SUNWprivate_1.1
+end
+
+function Strdup
+version SUNWprivate_1.1
+end
+
+function Zalloc
+version SUNWprivate_1.1
+end
+
+function cl_get_setkey
+version SUNWprivate_1.1
+end
+
+function cl_set_setkey
+version SUNWprivate_1.1
+end
+
+function clnt_add_drv_sidenms
+version SUNWprivate_1.1
+end
+
+function clnt_adddrvs
+version SUNWprivate_1.1
+end
+
+function clnt_addhosts
+version SUNWprivate_1.1
+end
+
+function clnt_createset
+version SUNWprivate_1.1
+end
+
+function clnt_del_drv_sidenms
+version SUNWprivate_1.1
+end
+
+function clnt_deldrvs
+version SUNWprivate_1.1
+end
+
+function clnt_delhosts
+version SUNWprivate_1.1
+end
+
+function clnt_delset
+version SUNWprivate_1.1
+end
+
+function clnt_devinfo
+version SUNWprivate_1.1
+end
+
+function clnt_drvused
+version SUNWprivate_1.1
+end
+
+function clnt_devinfo_by_devid
+version SUNWprivate_1.1
+end
+
+function clnt_getset
+version SUNWprivate_1.1
+end
+
+function clnt_mngetset
+version SUNWprivate_1.1
+end
+
+function clnt_gtimeout
+version SUNWprivate_1.1
+end
+
+function clnt_hostname
+version SUNWprivate_1.1
+end
+
+function clnt_lock_set
+version SUNWprivate_1.1
+end
+
+function clnt_nullproc
+version SUNWprivate_1.1
+end
+
+function clnt_ownset
+version SUNWprivate_1.1
+end
+
+function clnt_setnameok
+version SUNWprivate_1.1
+end
+
+function clnt_setnumbusy
+version SUNWprivate_1.1
+end
+
+function clnt_stimeout
+version SUNWprivate_1.1
+end
+
+function clnt_unlock_set
+version SUNWprivate_1.1
+end
+
+function clnt_upd_dr_dbinfo
+version SUNWprivate_1.1
+end
+
+function clnt_upd_dr_flags
+version SUNWprivate_1.1
+end
+
+function clnt_upd_sr_flags
+version SUNWprivate_1.1
+end
+
+function clnt_upd_nr_flags
+version SUNWprivate_1.1
+end
+
+function clnt_updmeds
+version SUNWprivate_1.1
+end
+
+function meta_conv_drvdesc_new2old
+version SUNWprivate_1.1
+end
+
+function meta_conv_drvdesc_old2new
+version SUNWprivate_1.1
+end
+
+function meta_conv_drvname_new2old
+version SUNWprivate_1.1
+end
+
+function meta_conv_drvname_old2new
+version SUNWprivate_1.1
+end
+
+function alloc_olddrvdesc
+version SUNWprivate_1.1
+end
+
+function alloc_newdrvdesc
+version SUNWprivate_1.1
+end
+
+function free_olddrvdesc
+version SUNWprivate_1.1
+end
+
+function free_newdrvdesc
+version SUNWprivate_1.1
+end
+
+function meta_get_devid
+version SUNWprivate_1.1
+end
+
+function meta_print_devid
+version SUNWprivate_1.1
+end
+
+function clnt_mncreateset
+version SUNWprivate_1.1
+end
+
+function clnt_joinset
+version SUNWprivate_1.1
+end
+
+function clnt_mnsetmaster
+version SUNWprivate_1.1
+end
+
+function clnt_mn_mirror_resync_all
+version SUNWprivate_1.1
+end
+
+function clnt_mn_sp_update_abr
+version SUNWprivate_1.1
+end
+
+function free_sr
+version SUNWprivate_1.1
+end
+
+function short_circuit_getset
+version SUNWprivate_1.1
+end
+
+function commitset
+version SUNWprivate_1.1
+end
+
+function dr_cache_add
+version SUNWprivate_1.1
+end
+
+function dr_cache_del
+version SUNWprivate_1.1
+end
+
+function mnnr_cache_add
+version SUNWprivate_1.1
+end
+
+function mnnr_cache_del
+version SUNWprivate_1.1
+end
+
+function drdup
+version SUNWprivate_1.1
+end
+
+function get_db_rec
+version SUNWprivate_1.1
+end
+
+function get_ur_rec
+version SUNWprivate_1.1
+end
+
+function metad_getsetbyname
+version SUNWprivate_1.1
+end
+
+function metad_getsetbynum
+version SUNWprivate_1.1
+end
+
+function resnarf_set
+version SUNWprivate_1.1
+end
+
+function metad_isautotakebyname
+version SUNWprivate_1.1
+end
+
+function metad_isautotakebynum
+version SUNWprivate_1.1
+end
+
+function s_delrec
+version SUNWprivate_1.1
+end
+
+function s_delset
+version SUNWprivate_1.1
+end
+
+function s_ownset
+version SUNWprivate_1.1
+end
+
+function set_snarf
+version SUNWprivate_1.1
+end
+
+function setdup
+version SUNWprivate_1.1
+end
+
+function mnsetdup
+version SUNWprivate_1.1
+end
+
+function sr_cache_add
+version SUNWprivate_1.1
+end
+
+function sr_cache_del
+version SUNWprivate_1.1
+end
+
+function sr_cache_flush
+version SUNWprivate_1.1
+end
+
+function sr_cache_flush_setno
+version SUNWprivate_1.1
+end
+
+function sr_validate
+version SUNWprivate_1.1
+end
+
+function sr_del_drv
+version SUNWprivate_1.1
+end
+
+function clnt_med_get_data
+version SUNWprivate_1.1
+end
+
+function clnt_med_get_rec
+version SUNWprivate_1.1
+end
+
+function clnt_med_hostname
+version SUNWprivate_1.1
+end
+
+function clnt_med_null
+version SUNWprivate_1.1
+end
+
+function clnt_med_upd_data
+version SUNWprivate_1.1
+end
+
+function clnt_med_upd_rec
+version SUNWprivate_1.1
+end
+
+function meddstealerror
+version SUNWprivate_1.1
+end
+
+function meta_h2hi
+version SUNWprivate_1.1
+end
+
+function meta_hi2h
+version SUNWprivate_1.1
+end
+
+function meta_med_hnm2ip
+version SUNWprivate_1.1
+end
+
+function setup_med_cfg
+version SUNWprivate_1.1
+end
+
+function defmhiargs
+version SUNWprivate_1.1
+end
+
+function meta_drive_to_disk_status_list
+version SUNWprivate_1.1
+end
+
+function meta_free_disk_status_list
+version SUNWprivate_1.1
+end
+
+function meta_free_drive_info_list
+version SUNWprivate_1.1
+end
+
+function meta_get_drive_names
+version SUNWprivate_1.1
+end
+
+function meta_list_disks
+version SUNWprivate_1.1
+end
+
+function meta_imp_set
+version SUNWprivate_1.1
+end
+
+function meta_list_drives
+version SUNWprivate_1.1
+end
+
+function meta_get_set_info
+version SUNWprivate_1.1
+end
+
+function meta_prune_cnames
+version SUNWprivate_1.1
+end
+
+function meta_rel_own
+version SUNWprivate_1.1
+end
+
+function meta_status_own
+version SUNWprivate_1.1
+end
+
+function meta_take_own
+version SUNWprivate_1.1
+end
+
+function mhstealerror
+version SUNWprivate_1.1
+end
+
+function rel_own_bydd
+version SUNWprivate_1.1
+end
+
+function tk_own_bydd
+version SUNWprivate_1.1
+end
+
+function meta_check_inmirror
+version SUNWprivate_1.1
+end
+
+function meta_check_mirror
+version SUNWprivate_1.1
+end
+
+function meta_check_submirror
+version SUNWprivate_1.1
+end
+
+function meta_create_mirror
+version SUNWprivate_1.1
+end
+
+function meta_free_mirror
+version SUNWprivate_1.1
+end
+
+function meta_get_mirror
+version SUNWprivate_1.1
+end
+
+function meta_get_mirror_names
+version SUNWprivate_1.1
+end
+
+function meta_init_mirror
+version SUNWprivate_1.1
+end
+
+function meta_mirror_anycomp_is_err
+version SUNWprivate_1.1
+end
+
+function meta_mirror_attach
+version SUNWprivate_1.1
+end
+
+function meta_mirror_detach
+version SUNWprivate_1.1
+end
+
+function meta_mirror_enable
+version SUNWprivate_1.1
+end
+
+function meta_mirror_get_params
+version SUNWprivate_1.1
+end
+
+function meta_mirror_offline
+version SUNWprivate_1.1
+end
+
+function meta_mirror_online
+version SUNWprivate_1.1
+end
+
+function meta_mirror_print
+version SUNWprivate_1.1
+end
+
+function meta_mirror_replace
+version SUNWprivate_1.1
+end
+
+function meta_mirror_reset
+version SUNWprivate_1.1
+end
+
+function meta_mirror_set_params
+version SUNWprivate_1.1
+end
+
+function meta_print_mirror_options
+version SUNWprivate_1.1
+end
+
+function name_to_pass_num
+version SUNWprivate_1.1
+end
+
+function name_to_rd_opt
+version SUNWprivate_1.1
+end
+
+function name_to_wr_opt
+version SUNWprivate_1.1
+end
+
+function rd_opt_to_name
+version SUNWprivate_1.1
+end
+
+function sm_state_to_action
+version SUNWprivate_1.1
+end
+
+function sm_state_to_name
+version SUNWprivate_1.1
+end
+
+function wr_opt_to_name
+version SUNWprivate_1.1
+end
+
+function meta_mirror_resync
+version SUNWprivate_1.1
+end
+
+function meta_mirror_resync_all
+version SUNWprivate_1.1
+end
+
+function meta_mn_mirror_resync_all
+version SUNWprivate_1.1
+end
+
+function meta_mirror_resync_kill_all
+version SUNWprivate_1.1
+end
+
+function meta_mirror_resync_block_all
+version SUNWprivate_1.1
+end
+
+function meta_mirror_resync_unblock_all
+version SUNWprivate_1.1
+end
+
+function meta_mirror_resync_unblock
+version SUNWprivate_1.1
+end
+
+function meta_mirror_resync_kill
+version SUNWprivate_1.1
+end
+
+function meta_get_mountp
+version SUNWprivate_1.1
+end
+
+function blkname
+version SUNWprivate_1.1
+end
+
+function get_devname
+version SUNWprivate_1.1
+end
+
+function get_hspname
+version SUNWprivate_1.1
+end
+
+function get_mdname
+version SUNWprivate_1.1
+end
+
+function is_hspname
+version SUNWprivate_1.1
+end
+
+function sr2setdesc
+version SUNWprivate_1.1
+end
+
+function is_metaname
+version SUNWprivate_1.1
+end
+
+function meta_canonicalize
+version SUNWprivate_1.1
+end
+
+function meta_get_hotspare_names
+version SUNWprivate_1.1
+end
+
+function meta_getdev
+version SUNWprivate_1.1
+end
+
+function metachkcomp
+version SUNWprivate_1.1
+end
+
+function metachkdisk
+version SUNWprivate_1.1
+end
+
+function metachkmeta
+version SUNWprivate_1.1
+end
+
+function metadevname
+version SUNWprivate_1.1
+end
+
+function metadiskname
+version SUNWprivate_1.1
+end
+
+function metadrivename
+version SUNWprivate_1.1
+end
+
+function metadrivenamelist
+version SUNWprivate_1.1
+end
+
+function metadrivenamelist_append
+version SUNWprivate_1.1
+end
+
+function meta_drivenamelist_append_wrapper
+version SUNWprivate_1.1
+end
+
+function metafakesetname
+version SUNWprivate_1.1
+end
+
+function metaflushnames
+version SUNWprivate_1.1
+end
+
+function metaflushsetname
+version SUNWprivate_1.1
+end
+
+function metaflushsidenames
+version SUNWprivate_1.1
+end
+
+function metafreedrivename
+version SUNWprivate_1.1
+end
+
+function metafreedrivenamelist
+version SUNWprivate_1.1
+end
+
+function metafreehspnamelist
+version SUNWprivate_1.1
+end
+
+function metafreenamelist
+version SUNWprivate_1.1
+end
+
+function metaget_setdesc
+version SUNWprivate_1.1
+end
+
+function metahsphspname
+version SUNWprivate_1.1
+end
+
+function metahspname
+version SUNWprivate_1.1
+end
+
+function metahspnamelist
+version SUNWprivate_1.1
+end
+
+function metahspnamelist_append
+version SUNWprivate_1.1
+end
+
+function metaislocalset
+version SUNWprivate_1.1
+end
+
+function metaismeta
+version SUNWprivate_1.1
+end
+
+function metaissameset
+version SUNWprivate_1.1
+end
+
+function metakeyname
+version SUNWprivate_1.1
+end
+
+function metamnumname
+version SUNWprivate_1.1
+end
+
+function metaname
+version SUNWprivate_1.1
+end
+
+function metaname_fast
+version SUNWprivate_1.1
+end
+
+function metanamelist
+version SUNWprivate_1.1
+end
+
+function metanamelist_append
+version SUNWprivate_1.1
+end
+
+function metasetname
+version SUNWprivate_1.1
+end
+
+function metasetnosetname
+version SUNWprivate_1.1
+end
+
+function metaslicename
+version SUNWprivate_1.1
+end
+
+function ctlr_cache_add
+version SUNWprivate_1.1
+end
+
+function ctlr_cache_look
+version SUNWprivate_1.1
+end
+
+function getdrvnode
+version SUNWprivate_1.1
+end
+
+function meta_free_unit
+version SUNWprivate_1.1
+end
+
+function meta_get_mdunit
+version SUNWprivate_1.1
+end
+
+function meta_get_unit
+version SUNWprivate_1.1
+end
+
+function meta_invalidate_name
+version SUNWprivate_1.1
+end
+
+function meta_isopen
+version SUNWprivate_1.1
+end
+
+function meta_match_enclosure
+version SUNWprivate_1.1
+end
+
+function metaflushctlrcache
+version SUNWprivate_1.1
+end
+
+function metafreevtoc
+version SUNWprivate_1.1
+end
+
+function metagetcinfo
+version SUNWprivate_1.1
+end
+
+function metagetdevicesname
+version SUNWprivate_1.1
+end
+
+function metagetgeom
+version SUNWprivate_1.1
+end
+
+function metagetlabel
+version SUNWprivate_1.1
+end
+
+function metagetmiscname
+version SUNWprivate_1.1
+end
+
+function metagetpartno
+version SUNWprivate_1.1
+end
+
+function metagetset
+version SUNWprivate_1.1
+end
+
+function metagetsize
+version SUNWprivate_1.1
+end
+
+function metagetstart
+version SUNWprivate_1.1
+end
+
+function metagetvtoc
+version SUNWprivate_1.1
+end
+
+function metahasmddb
+version SUNWprivate_1.1
+end
+
+function metasetvtoc
+version SUNWprivate_1.1
+end
+
+function add_key_name
+version SUNWprivate_1.1
+end
+
+function add_name
+version SUNWprivate_1.1
+end
+
+function del_key_name
+version SUNWprivate_1.1
+end
+
+function del_key_names
+version SUNWprivate_1.1
+end
+
+function del_name
+version SUNWprivate_1.1
+end
+
+function meta_getnmbykey
+version SUNWprivate_1.1
+end
+
+function meta_getnmentbydev
+version SUNWprivate_1.1
+end
+
+function meta_getnmentbykey
+version SUNWprivate_1.1
+end
+
+function evdrv2evlib_typetab
+version SUNWprivate_1.1
+end
+
+function meta_notify_createq
+version SUNWprivate_1.1
+end
+
+function meta_notify_deleteq
+version SUNWprivate_1.1
+end
+
+function meta_notify_doputev
+version SUNWprivate_1.1
+end
+
+function meta_notify_flushq
+version SUNWprivate_1.1
+end
+
+function meta_notify_freeevlist
+version SUNWprivate_1.1
+end
+
+function meta_notify_getev
+version SUNWprivate_1.1
+end
+
+function meta_notify_getevlist
+version SUNWprivate_1.1
+end
+
+function meta_notify_listq
+version SUNWprivate_1.1
+end
+
+function meta_notify_putev
+version SUNWprivate_1.1
+end
+
+function meta_notify_putevlist
+version SUNWprivate_1.1
+end
+
+function meta_notify_sendev
+version SUNWprivate_1.1
+end
+
+function meta_notify_validq
+version SUNWprivate_1.1
+end
+
+function tag2obj_typetab
+version SUNWprivate_1.1
+end
+
+function meta_patch_fsdev
+version SUNWprivate_1.1
+end
+
+function meta_patch_swapdev
+version SUNWprivate_1.1
+end
+
+function meta_patch_vfstab
+version SUNWprivate_1.1
+end
+
+function meta_patch_rootdev
+version SUNWprivate_1.1
+end
+
+function meta_prbits
+version SUNWprivate_1.1
+end
+
+function meta_print_all
+version SUNWprivate_1.1
+end
+
+function meta_print_name
+version SUNWprivate_1.1
+end
+
+function meta_print_time
+version SUNWprivate_1.1
+end
+
+function meta_print_hrtime
+version SUNWprivate_1.1
+end
+
+function meta_check_column
+version SUNWprivate_1.1
+end
+
+function meta_check_inraid
+version SUNWprivate_1.1
+end
+
+function meta_check_raid
+version SUNWprivate_1.1
+end
+
+function meta_create_raid
+version SUNWprivate_1.1
+end
+
+function meta_default_raid_interlace
+version SUNWprivate_1.1
+end
+
+function meta_free_raid
+version SUNWprivate_1.1
+end
+
+function meta_get_raid_common
+version SUNWprivate_1.1
+end
+
+function meta_get_raid
+version SUNWprivate_1.1
+end
+
+function meta_get_raid_names
+version SUNWprivate_1.1
+end
+
+function meta_init_raid
+version SUNWprivate_1.1
+end
+
+function meta_print_raid_options
+version SUNWprivate_1.1
+end
+
+function meta_raid_anycomp_is_err
+version SUNWprivate_1.1
+end
+
+function meta_raid_attach
+version SUNWprivate_1.1
+end
+
+function meta_raid_check_interlace
+version SUNWprivate_1.1
+end
+
+function meta_raid_enable
+version SUNWprivate_1.1
+end
+
+function meta_raid_get_params
+version SUNWprivate_1.1
+end
+
+function meta_raid_print
+version SUNWprivate_1.1
+end
+
+function meta_raid_regen_byname
+version SUNWprivate_1.1
+end
+
+function meta_raid_replace
+version SUNWprivate_1.1
+end
+
+function meta_raid_reset
+version SUNWprivate_1.1
+end
+
+function meta_raid_set_params
+version SUNWprivate_1.1
+end
+
+function meta_raid_state_cnt
+version SUNWprivate_1.1
+end
+
+function meta_raid_valid
+version SUNWprivate_1.1
+end
+
+function raid_col_state_to_name
+version SUNWprivate_1.1
+end
+
+function raid_state_to_action
+version SUNWprivate_1.1
+end
+
+function raid_state_to_name
+version SUNWprivate_1.1
+end
+
+function meta_raid_resync
+version SUNWprivate_1.1
+end
+
+function meta_raid_resync_all
+version SUNWprivate_1.1
+end
+
+function meta_exchange
+version SUNWprivate_1.1
+end
+
+function meta_rename
+version SUNWprivate_1.1
+end
+
+function meta_enable_byname
+version SUNWprivate_1.1
+end
+
+function meta_replace
+version SUNWprivate_1.1
+end
+
+function meta_replace_byname
+version SUNWprivate_1.1
+end
+
+function meta_reset
+version SUNWprivate_1.1
+end
+
+function meta_reset_all
+version SUNWprivate_1.1
+end
+
+function meta_reset_by_name
+version SUNWprivate_1.1
+end
+
+function meta_resync_all
+version SUNWprivate_1.1
+end
+
+function meta_resync_byname
+version SUNWprivate_1.1
+end
+
+function do_owner_ioctls
+version SUNWprivate_1.1
+end
+
+function commd_get_verbosity
+version SUNWprivate_1.1
+end
+
+function commd_get_outfile
+version SUNWprivate_1.1
+end
+
+function get_max_meds
+version SUNWprivate_1.1
+end
+
+function get_max_sets
+version SUNWprivate_1.1
+end
+
+function getmyside
+version SUNWprivate_1.1
+end
+
+function getsetbyname
+version SUNWprivate_1.1
+end
+
+function getsetbynum
+version SUNWprivate_1.1
+end
+
+function meta_check_drive_inuse
+version SUNWprivate_1.1
+end
+
+function meta_check_ownership
+version SUNWprivate_1.1
+end
+
+function meta_check_ownership_on_host
+version SUNWprivate_1.1
+end
+
+function meta_get_reserved_names
+version SUNWprivate_1.1
+end
+
+function meta_getnextside_devinfo
+version SUNWprivate_1.1
+end
+
+function meta_is_drive_in_anyset
+version SUNWprivate_1.1
+end
+
+function meta_is_drive_in_thisset
+version SUNWprivate_1.1
+end
+
+function meta_set_balance
+version SUNWprivate_1.1
+end
+
+function meta_set_destroy
+version SUNWprivate_1.1
+end
+
+function meta_set_purge
+version SUNWprivate_1.1
+end
+
+function meta_set_query
+version SUNWprivate_1.1
+end
+
+function metadrivename_withdrkey
+version SUNWprivate_1.1
+end
+
+function metafreedrivedesc
+version SUNWprivate_1.1
+end
+
+function metaget_drivedesc
+version SUNWprivate_1.1
+end
+
+function metaget_drivedesc_fromnamelist
+version SUNWprivate_1.1
+end
+
+function metaget_drivedesc_sideno
+version SUNWprivate_1.1
+end
+
+function metaget_setownership
+version SUNWprivate_1.1
+end
+
+function mynode
+version SUNWprivate_1.1
+end
+
+function strinlst
+version SUNWprivate_1.1
+end
+
+function meta_make_sidenmlist
+version SUNWprivate_1.1
+end
+
+function meta_set_adddrives
+version SUNWprivate_1.1
+end
+
+function meta_set_deletedrives
+version SUNWprivate_1.1
+end
+
+function meta_set_checkname
+version SUNWprivate_1.1
+end
+
+function meta_set_addhosts
+version SUNWprivate_1.1
+end
+
+function meta_set_deletehosts
+version SUNWprivate_1.1
+end
+
+function meta_set_addmeds
+version SUNWprivate_1.1
+end
+
+function meta_set_deletemeds
+version SUNWprivate_1.1
+end
+
+function meta_set_auto_take
+version SUNWprivate_1.1
+end
+
+function checkdrive_onnode
+version SUNWprivate_1.1
+end
+
+function getnodeside
+version SUNWprivate_1.1
+end
+
+function halt_set
+version SUNWprivate_1.1
+end
+
+function metadrivedesc_append
+version SUNWprivate_1.1
+end
+
+function nodehasset
+version SUNWprivate_1.1
+end
+
+function nodesuniq
+version SUNWprivate_1.1
+end
+
+function own_set
+version SUNWprivate_1.1
+end
+
+function resync_genid
+version SUNWprivate_1.1
+end
+
+function setup_db_bydd
+version SUNWprivate_1.1
+end
+
+function snarf_set
+version SUNWprivate_1.1
+end
+
+function meta_set_release
+version SUNWprivate_1.1
+end
+
+function meta_set_take
+version SUNWprivate_1.1
+end
+
+function meta_set_join
+version SUNWprivate_1.1
+end
+
+function meta_set_withdraw
+version SUNWprivate_1.1
+end
+
+function meta_update_mb
+version SUNWprivate_1.1
+end
+
+function allsigs
+version SUNWprivate_1.1
+end
+
+function md_daemonize
+version SUNWprivate_1.1
+end
+
+function md_exit
+version SUNWprivate_1.1
+end
+
+function md_got_sig
+version SUNWprivate_1.1
+end
+
+function setup_mc_log
+version SUNWprivate_1.1
+end
+
+function md_init
+version SUNWprivate_1.1
+end
+
+function md_init_nosig
+version SUNWprivate_1.1
+end
+
+function md_init_daemon
+version SUNWprivate_1.1
+end
+
+function md_post_sig
+version SUNWprivate_1.1
+end
+
+function md_rb_sig_handling_off
+version SUNWprivate_1.1
+end
+
+function md_rb_sig_handling_on
+version SUNWprivate_1.1
+end
+
+function md_which_sig
+version SUNWprivate_1.1
+end
+
+function meta_lock
+version SUNWprivate_1.1
+end
+
+function meta_lock_name
+version SUNWprivate_1.1
+end
+
+function meta_lock_nowait
+version SUNWprivate_1.1
+end
+
+function meta_lock_status
+version SUNWprivate_1.1
+end
+
+function meta_unlock
+version SUNWprivate_1.1
+end
+
+function metalogfp
+version SUNWprivate_1.1
+end
+
+function metasyslog
+version SUNWprivate_1.1
+end
+
+function verbosity
+version SUNWprivate_1.1
+end
+
+function start_time
+version SUNWprivate_1.1
+end
+
+function myname
+version SUNWprivate_1.1
+end
+
+function procsigs
+version SUNWprivate_1.1
+end
+
+function rb_test
+version SUNWprivate_1.1
+end
+
+function meta_stat
+version SUNWprivate_1.1
+end
+
+function metaflushstatcache
+version SUNWprivate_1.1
+end
+
+function comp_state_to_name
+version SUNWprivate_1.1
+end
+
+function meta_check_component
+version SUNWprivate_1.1
+end
+
+function meta_check_instripe
+version SUNWprivate_1.1
+end
+
+function meta_check_stripe
+version SUNWprivate_1.1
+end
+
+function meta_create_stripe
+version SUNWprivate_1.1
+end
+
+function meta_default_stripe_interlace
+version SUNWprivate_1.1
+end
+
+function meta_find_erred_comp
+version SUNWprivate_1.1
+end
+
+function meta_free_stripe
+version SUNWprivate_1.1
+end
+
+function meta_get_stripe_common
+version SUNWprivate_1.1
+end
+
+function meta_get_stripe
+version SUNWprivate_1.1
+end
+
+function meta_get_stripe_names
+version SUNWprivate_1.1
+end
+
+function meta_init_stripe
+version SUNWprivate_1.1
+end
+
+function meta_print_stripe_options
+version SUNWprivate_1.1
+end
+
+function meta_recover_sp
+version SUNWprivate_1.1
+end
+
+function meta_sp_issp
+version SUNWprivate_1.1
+end
+
+function meta_sp_reset_component
+version SUNWprivate_1.1
+end
+
+function meta_sp_attach
+version SUNWprivate_1.1
+end
+
+function meta_sp_update_abr
+version SUNWprivate_1.1
+end
+
+function meta_mn_sp_update_abr
+version SUNWprivate_1.1
+end
+
+function meta_get_sp_common
+version SUNWprivate_1.1
+end
+
+function meta_get_sp
+version SUNWprivate_1.1
+end
+
+function meta_free_sp
+version SUNWprivate_1.1
+end
+
+function meta_get_sp_names
+version SUNWprivate_1.1
+end
+
+function meta_sp_can_create_sps
+version SUNWprivate_1.1
+end
+
+function meta_sp_can_create_sps_on_drive
+version SUNWprivate_1.1
+end
+
+function meta_sp_get_free_space
+version SUNWprivate_1.1
+end
+
+function meta_sp_get_free_space_on_drive
+version SUNWprivate_1.1
+end
+
+function meta_sp_get_number_of_possible_sps
+version SUNWprivate_1.1
+end
+
+function meta_sp_get_number_of_possible_sps_on_drive
+version SUNWprivate_1.1
+end
+
+function meta_sp_get_possible_sp_size
+version SUNWprivate_1.1
+end
+
+function meta_sp_get_possible_sp_size_on_drive
+version SUNWprivate_1.1
+end
+
+function meta_sp_parsesize
+version SUNWprivate_1.1
+end
+
+function meta_stripe_anycomp_is_err
+version SUNWprivate_1.1
+end
+
+function meta_stripe_attach
+version SUNWprivate_1.1
+end
+
+function meta_stripe_check_interlace
+version SUNWprivate_1.1
+end
+
+function meta_stripe_get_params
+version SUNWprivate_1.1
+end
+
+function meta_stripe_print
+version SUNWprivate_1.1
+end
+
+function meta_stripe_replace
+version SUNWprivate_1.1
+end
+
+function meta_stripe_reset
+version SUNWprivate_1.1
+end
+
+function meta_stripe_set_params
+version SUNWprivate_1.1
+end
+
+function meta_systemfile_append_mddb
+version SUNWprivate_1.1
+end
+
+function meta_systemfile_append_mdroot
+version SUNWprivate_1.1
+end
+
+function meta_systemfile_copy
+version SUNWprivate_1.1
+end
+
+function meta_tab_find
+version SUNWprivate_1.1
+end
+
+function meta_tab_free
+version SUNWprivate_1.1
+end
+
+function meta_tab_parse
+version SUNWprivate_1.1
+end
+
+function meta_check_intrans
+version SUNWprivate_1.1
+end
+
+function meta_check_log
+version SUNWprivate_1.1
+end
+
+function meta_check_master
+version SUNWprivate_1.1
+end
+
+function meta_free_trans
+version SUNWprivate_1.1
+end
+
+function meta_get_trans
+version SUNWprivate_1.1
+end
+
+function meta_get_trans_common
+version SUNWprivate_1.1
+end
+
+function meta_get_trans_names
+version SUNWprivate_1.1
+end
+
+function meta_logs_print
+version SUNWprivate_1.1
+end
+
+function meta_trans_detach
+version SUNWprivate_1.1
+end
+
+function meta_trans_print
+version SUNWprivate_1.1
+end
+
+function meta_trans_replace
+version SUNWprivate_1.1
+end
+
+function meta_trans_reset
+version SUNWprivate_1.1
+end
+
+function mt_flags_to_action
+version SUNWprivate_1.1
+end
+
+function mt_flags_to_name
+version SUNWprivate_1.1
+end
+
+function mt_l_error_to_action
+version SUNWprivate_1.1
+end
+
+function mt_l_error_to_name
+version SUNWprivate_1.1
+end
+
+function transstats
+version SUNWprivate_1.1
+end
+
+function meta_getuserflags
+version SUNWprivate_1.1
+end
+
+function meta_setuserflags
+version SUNWprivate_1.1
+end
+
+function metarpcclose
+version SUNWprivate_1.1
+end
+
+function metarpccloseall
+version SUNWprivate_1.1
+end
+
+function metarpcopen
+version SUNWprivate_1.1
+end
+
+function splicename
+version SUNWprivate_1.1
+end
+
+function splitname
+version SUNWprivate_1.1
+end
+
+function crcfreetab
+version SUNWprivate_1.1
+end
+
+function crcfunc
+version SUNWprivate_1.1
+end
+
+function mdnullerror
+version SUNWprivate_1.1
+end
+
+function xdr_comp_state_t
+version SUNWprivate_1.1
+end
+
+function xdr_comp_t
+version SUNWprivate_1.1
+end
+
+function xdr_diskaddr_t
+version SUNWprivate_1.1
+end
+
+function xdr_hotspare_states_t
+version SUNWprivate_1.1
+end
+
+function xdr_hs_t
+version SUNWprivate_1.1
+end
+
+function xdr_hsp_t
+version SUNWprivate_1.1
+end
+
+function xdr_md_common_t
+version SUNWprivate_1.1
+end
+
+function xdr_md_comp_errno_t
+version SUNWprivate_1.1
+end
+
+function xdr_md_comp_error_t
+version SUNWprivate_1.1
+end
+
+function xdr_md_comp_t
+version SUNWprivate_1.1
+end
+
+function xdr_md_dev_errno_t
+version SUNWprivate_1.1
+end
+
+function xdr_md_dev_error_t
+version SUNWprivate_1.1
+end
+
+function xdr_md_drive_desc
+version SUNWprivate_1.1
+end
+
+function xdr_md_drive_record
+version SUNWprivate_1.1
+end
+
+function xdr_md_ds_errno_t
+version SUNWprivate_1.1
+end
+
+function xdr_md_ds_error_t
+version SUNWprivate_1.1
+end
+
+function xdr_md_errclass_t
+version SUNWprivate_1.1
+end
+
+function xdr_md_error_info_t
+version SUNWprivate_1.1
+end
+
+function xdr_md_error_t
+version SUNWprivate_1.1
+end
+
+function xdr_md_hs_errno_t
+version SUNWprivate_1.1
+end
+
+function xdr_md_hs_error_t
+version SUNWprivate_1.1
+end
+
+function xdr_md_hs_t
+version SUNWprivate_1.1
+end
+
+function xdr_md_hsp_errno_t
+version SUNWprivate_1.1
+end
+
+function xdr_md_hsp_error_t
+version SUNWprivate_1.1
+end
+
+function xdr_md_hsp_t
+version SUNWprivate_1.1
+end
+
+function xdr_md_md_errno_t
+version SUNWprivate_1.1
+end
+
+function xdr_md_md_error_t
+version SUNWprivate_1.1
+end
+
+function xdr_md_mddb_errno_t
+version SUNWprivate_1.1
+end
+
+function xdr_md_mddb_error_t
+version SUNWprivate_1.1
+end
+
+function xdr_md_mirror_t
+version SUNWprivate_1.1
+end
+
+function xdr_md_name_prefix
+version SUNWprivate_1.1
+end
+
+function xdr_md_name_suffix
+version SUNWprivate_1.1
+end
+
+function xdr_md_parent_t
+version SUNWprivate_1.1
+end
+
+function xdr_md_raid_t
+version SUNWprivate_1.1
+end
+
+function xdr_md_raidcol_t
+version SUNWprivate_1.1
+end
+
+function xdr_md_replica_t
+version SUNWprivate_1.1
+end
+
+function xdr_md_replica_recerr_t
+version SUNWprivate_1.1
+end
+
+function xdr_md_replicalist_t
+version SUNWprivate_1.1
+end
+
+function xdr_md_riflags_t
+version SUNWprivate_1.1
+end
+
+function xdr_md_row_t
+version SUNWprivate_1.1
+end
+
+function xdr_md_rpc_error_t
+version SUNWprivate_1.1
+end
+
+function xdr_md_set_desc
+version SUNWprivate_1.1
+end
+
+function xdr_md_set_record
+version SUNWprivate_1.1
+end
+
+function xdr_md_setkey_t
+version SUNWprivate_1.1
+end
+
+function xdr_md_shared_t
+version SUNWprivate_1.1
+end
+
+function xdr_md_splitname
+version SUNWprivate_1.1
+end
+
+function xdr_md_stackcap_t
+version SUNWprivate_1.1
+end
+
+function xdr_md_status_t
+version SUNWprivate_1.1
+end
+
+function xdr_md_stripe_t
+version SUNWprivate_1.1
+end
+
+function xdr_md_submirror_t
+version SUNWprivate_1.1
+end
+
+function xdr_md_sys_error_t
+version SUNWprivate_1.1
+end
+
+function xdr_md_trans_t
+version SUNWprivate_1.1
+end
+
+function xdr_md_types_t
+version SUNWprivate_1.1
+end
+
+function xdr_md_ur_get_cmd_t
+version SUNWprivate_1.1
+end
+
+function xdr_md_use_errno_t
+version SUNWprivate_1.1
+end
+
+function xdr_md_use_error_t
+version SUNWprivate_1.1
+end
+
+function xdr_md_void_errno_t
+version SUNWprivate_1.1
+end
+
+function xdr_md_void_error_t
+version SUNWprivate_1.1
+end
+
+function xdr_mdcinfo_t
+version SUNWprivate_1.1
+end
+
+function xdr_mddb_cfgcmd_t
+version SUNWprivate_1.1
+end
+
+function xdr_mddb_recstatus_t
+version SUNWprivate_1.1
+end
+
+function xdr_mddb_type_t
+version SUNWprivate_1.1
+end
+
+function xdr_mddb_usercmd_t
+version SUNWprivate_1.1
+end
+
+function xdr_mddb_userrec_t
+version SUNWprivate_1.1
+end
+
+function xdr_mddrivename_t
+version SUNWprivate_1.1
+end
+
+function xdr_mddrivenamelist_t
+version SUNWprivate_1.1
+end
+
+function xdr_mdgeom_t
+version SUNWprivate_1.1
+end
+
+function xdr_mdhspname_t
+version SUNWprivate_1.1
+end
+
+function xdr_mdhspnamelist_t
+version SUNWprivate_1.1
+end
+
+function xdr_mdname_t
+version SUNWprivate_1.1
+end
+
+function xdr_mdnamelist_t
+version SUNWprivate_1.1
+end
+
+function xdr_mdnmtype_t
+version SUNWprivate_1.1
+end
+
+function xdr_mdpart_t
+version SUNWprivate_1.1
+end
+
+function xdr_mdsetname_t
+version SUNWprivate_1.1
+end
+
+function xdr_mdsetnamelist_t
+version SUNWprivate_1.1
+end
+
+function xdr_mdsidenames_t
+version SUNWprivate_1.1
+end
+
+function xdr_mdvtoc_t
+version SUNWprivate_1.1
+end
+
+function xdr_minor_or_hsp_t
+version SUNWprivate_1.1
+end
+
+function xdr_mm_params_t
+version SUNWprivate_1.1
+end
+
+function xdr_mm_pass_num_t
+version SUNWprivate_1.1
+end
+
+function xdr_mm_rd_opt_t
+version SUNWprivate_1.1
+end
+
+function xdr_mm_wr_opt_t
+version SUNWprivate_1.1
+end
+
+function xdr_mr_params_t
+version SUNWprivate_1.1
+end
+
+function xdr_ms_params_t
+version SUNWprivate_1.1
+end
+
+function xdr_mt_debug_t
+version SUNWprivate_1.1
+end
+
+function xdr_mt_flags_t
+version SUNWprivate_1.1
+end
+
+function xdr_mt_l_error_t
+version SUNWprivate_1.1
+end
+
+function xdr_rcs_flags_t
+version SUNWprivate_1.1
+end
+
+function xdr_rcs_state_t
+version SUNWprivate_1.1
+end
+
+function xdr_replica_flags_t
+version SUNWprivate_1.1
+end
+
+function xdr_rus_state_t
+version SUNWprivate_1.1
+end
+
+function xdr_sm_flags_t
+version SUNWprivate_1.1
+end
+
+function xdr_sm_state_t
+version SUNWprivate_1.1
+end
+
+function xdr_unit_t
+version SUNWprivate_1.1
+end
+
+function xdr_clnt_stat
+version SUNWprivate_1.1
+end
+
+function xdr_md_timeval32_t
+version SUNWprivate_1.1
+end
+
+function xdr_daddr_t
+version SUNWprivate_1.1
+end
+
+function xdr_md_dev64_t
+version SUNWprivate_1.1
+end
+
+function xdr_dev_t
+version SUNWprivate_1.1
+end
+
+function xdr_md_alias_ip_t
+version SUNWprivate_1.1
+end
+
+function xdr_md_alias_nm_t
+version SUNWprivate_1.1
+end
+
+function xdr_md_h_arr_t
+version SUNWprivate_1.1
+end
+
+function xdr_md_h_t
+version SUNWprivate_1.1
+end
+
+function xdr_md_hi_arr_t
+version SUNWprivate_1.1
+end
+
+function xdr_md_hi_t
+version SUNWprivate_1.1
+end
+
+function xdr_md_node_nm_arr_t
+version SUNWprivate_1.1
+end
+
+function xdr_md_node_nm_t
+version SUNWprivate_1.1
+end
+
+function xdr_md_set_nm_t
+version SUNWprivate_1.1
+end
+
+function xdr_mddb_recid_t
+version SUNWprivate_1.1
+end
+
+function xdr_mdkey_t
+version SUNWprivate_1.1
+end
+
+function xdr_minor_t
+version SUNWprivate_1.1
+end
+
+function xdr_off_t
+version SUNWprivate_1.1
+end
+
+function xdr_set_t
+version SUNWprivate_1.1
+end
+
+function xdr_side_t
+version SUNWprivate_1.1
+end
+
+function xdr_size_t
+version SUNWprivate_1.1
+end
+
+function xdr_timeval
+version SUNWprivate_1.1
+end
+
+function md_in_daemon
+version SUNWprivate_1.1
+end
+
+function mdrpc_add_drv_sidenms_1
+version SUNWprivate_1.1
+end
+
+function mdrpc_adddrvs_1
+version SUNWprivate_1.1
+end
+
+function mdrpc_addhosts_1
+version SUNWprivate_1.1
+end
+
+function mdrpc_createset_1
+version SUNWprivate_1.1
+end
+
+function mdrpc_del_drv_sidenms_1
+version SUNWprivate_1.1
+end
+
+function mdrpc_deldrvs_1
+version SUNWprivate_1.1
+end
+
+function mdrpc_delhosts_1
+version SUNWprivate_1.1
+end
+
+function mdrpc_delset_1
+version SUNWprivate_1.1
+end
+
+function mdrpc_drvused_1
+version SUNWprivate_1.1
+end
+
+function mdrpc_flush_internal_1
+version SUNWprivate_1.1
+end
+
+function mdrpc_getset_1
+version SUNWprivate_1.1
+end
+
+function mdrpc_gtimeout_1
+version SUNWprivate_1.1
+end
+
+function mdrpc_hostname_1
+version SUNWprivate_1.1
+end
+
+function mdrpc_lock_set_1
+version SUNWprivate_1.1
+end
+
+function mdrpc_nullproc_1
+version SUNWprivate_1.1
+end
+
+function mdrpc_ownset_1
+version SUNWprivate_1.1
+end
+
+function mdrpc_setnameok_1
+version SUNWprivate_1.1
+end
+
+function mdrpc_setnumbusy_1
+version SUNWprivate_1.1
+end
+
+function mdrpc_stimeout_1
+version SUNWprivate_1.1
+end
+
+function mdrpc_unlock_set_1
+version SUNWprivate_1.1
+end
+
+function mdrpc_upd_dr_dbinfo_1
+version SUNWprivate_1.1
+end
+
+function mdrpc_upd_dr_flags_1
+version SUNWprivate_1.1
+end
+
+function mdrpc_upd_sr_flags_1
+version SUNWprivate_1.1
+end
+
+function mdrpc_updmeds_1
+version SUNWprivate_1.1
+end
+
+function mdrpc_add_drv_sidenms_2
+version SUNWprivate_1.1
+end
+
+function mdrpc_adddrvs_2
+version SUNWprivate_1.1
+end
+
+function mdrpc_addhosts_2
+version SUNWprivate_1.1
+end
+
+function mdrpc_createset_2
+version SUNWprivate_1.1
+end
+
+function mdrpc_del_drv_sidenms_2
+version SUNWprivate_1.1
+end
+
+function mdrpc_deldrvs_2
+version SUNWprivate_1.1
+end
+
+function mdrpc_delhosts_2
+version SUNWprivate_1.1
+end
+
+function mdrpc_delset_2
+version SUNWprivate_1.1
+end
+
+function mdrpc_devinfo_2
+version SUNWprivate_1.1
+end
+
+function mdrpc_drvused_2
+version SUNWprivate_1.1
+end
+
+function mdrpc_flush_internal_2
+version SUNWprivate_1.1
+end
+
+function mdrpc_getset_2
+version SUNWprivate_1.1
+end
+
+function mdrpc_mngetset_2
+version SUNWprivate_1.1
+end
+
+function mdrpc_gtimeout_2
+version SUNWprivate_1.1
+end
+
+function mdrpc_hostname_2
+version SUNWprivate_1.1
+end
+
+function mdrpc_lock_set_2
+version SUNWprivate_1.1
+end
+
+function mdrpc_nullproc_2
+version SUNWprivate_1.1
+end
+
+function mdrpc_ownset_2
+version SUNWprivate_1.1
+end
+
+function mdrpc_setnameok_2
+version SUNWprivate_1.1
+end
+
+function mdrpc_setnumbusy_2
+version SUNWprivate_1.1
+end
+
+function mdrpc_stimeout_2
+version SUNWprivate_1.1
+end
+
+function mdrpc_unlock_set_2
+version SUNWprivate_1.1
+end
+
+function mdrpc_upd_dr_dbinfo_2
+version SUNWprivate_1.1
+end
+
+function mdrpc_upd_dr_flags_2
+version SUNWprivate_1.1
+end
+
+function mdrpc_upd_sr_flags_2
+version SUNWprivate_1.1
+end
+
+function mdrpc_upd_nr_flags_2
+version SUNWprivate_1.1
+end
+
+function mdrpc_updmeds_2
+version SUNWprivate_1.1
+end
+
+function mdrpc_mncreateset_2
+version SUNWprivate_1.1
+end
+
+function mdrpc_mnsetmaster_2
+version SUNWprivate_1.1
+end
+
+function mdrpc_mn_mirror_resync_all_2
+version SUNWprivate_1.1
+end
+
+function mdrpc_mn_sp_update_abr_2
+version SUNWprivate_1.1
+end
+
+function xdr_mdrpc_bool_res
+version SUNWprivate_1.1
+end
+
+function xdr_mdrpc_createset_args
+version SUNWprivate_1.1
+end
+
+function xdr_mdrpc_createset_2_args
+version SUNWprivate_1.1
+end
+
+function xdr_mdrpc_mncreateset_2_args
+version SUNWprivate_1.1
+end
+
+function xdr_mdrpc_devinfo_args
+version SUNWprivate_1.1
+end
+
+function xdr_mdrpc_devidstr_args
+version SUNWprivate_1.1
+end
+
+function xdr_mdrpc_devid_name_2_args
+version SUNWprivate_1.1
+end
+
+function xdr_mdrpc_devinfo_2_args
+version SUNWprivate_1.1
+end
+
+function xdr_mdrpc_devinfo_res
+version SUNWprivate_1.1
+end
+
+function xdr_mdrpc_devinfo_2_res
+version SUNWprivate_1.1
+end
+
+function xdr_mdrpc_devid_args
+version SUNWprivate_1.1
+end
+
+function xdr_mdrpc_devid_2_args
+version SUNWprivate_1.1
+end
+
+function xdr_mdrpc_devid_res
+version SUNWprivate_1.1
+end
+
+function xdr_mdrpc_drives_args
+version SUNWprivate_1.1
+end
+
+function xdr_mdrpc_drives_2_args
+version SUNWprivate_1.1
+end
+
+function xdr_mdrpc_drv_sidenm_args
+version SUNWprivate_1.1
+end
+
+function xdr_mdrpc_drv_sidenm_2_args
+version SUNWprivate_1.1
+end
+
+function xdr_mdrpc_drvused_args
+version SUNWprivate_1.1
+end
+
+function xdr_mdrpc_drvused_2_args
+version SUNWprivate_1.1
+end
+
+function xdr_mdrpc_generic_res
+version SUNWprivate_1.1
+end
+
+function xdr_mdrpc_getset_args
+version SUNWprivate_1.1
+end
+
+function xdr_mdrpc_getset_2_args
+version SUNWprivate_1.1
+end
+
+function xdr_mdrpc_getset_res
+version SUNWprivate_1.1
+end
+
+function xdr_mdrpc_mngetset_res
+version SUNWprivate_1.1
+end
+
+function xdr_mdrpc_gtimeout_res
+version SUNWprivate_1.1
+end
+
+function xdr_mdrpc_host_args
+version SUNWprivate_1.1
+end
+
+function xdr_mdrpc_host_2_args
+version SUNWprivate_1.1
+end
+
+function xdr_mdrpc_hostname_res
+version SUNWprivate_1.1
+end
+
+function xdr_mdrpc_null_args
+version SUNWprivate_1.1
+end
+
+function xdr_mdrpc_setlock_res
+version SUNWprivate_1.1
+end
+
+function xdr_mdrpc_setno_args
+version SUNWprivate_1.1
+end
+
+function xdr_mdrpc_setno_2_args
+version SUNWprivate_1.1
+end
+
+function xdr_mdrpc_sp_args
+version SUNWprivate_1.1
+end
+
+function xdr_mdrpc_sp_2_args
+version SUNWprivate_1.1
+end
+
+function xdr_mdrpc_stimeout_args
+version SUNWprivate_1.1
+end
+
+function xdr_mdrpc_stimeout_2_args
+version SUNWprivate_1.1
+end
+
+function xdr_mdrpc_upd_dr_flags_args
+version SUNWprivate_1.1
+end
+
+function xdr_mdrpc_upd_dr_flags_2_args
+version SUNWprivate_1.1
+end
+
+function xdr_mdrpc_upd_sr_flags_args
+version SUNWprivate_1.1
+end
+
+function xdr_mdrpc_upd_sr_flags_2_args
+version SUNWprivate_1.1
+end
+
+function xdr_mdrpc_upd_nr_flags_2_args
+version SUNWprivate_1.1
+end
+
+function xdr_mdrpc_updmeds_args
+version SUNWprivate_1.1
+end
+
+function xdr_mdrpc_updmeds_2_args
+version SUNWprivate_1.1
+end
+
+function xdr_mdrpc_mnsetmaster_2_args
+version SUNWprivate_1.1
+end
+
+function xdr_stringarray
+version SUNWprivate_1.1
+end
+
+function med_get_data_1
+version SUNWprivate_1.1
+end
+
+function med_get_rec_1
+version SUNWprivate_1.1
+end
+
+function med_hostname_1
+version SUNWprivate_1.1
+end
+
+function med_null_1
+version SUNWprivate_1.1
+end
+
+function med_upd_data_1
+version SUNWprivate_1.1
+end
+
+function med_upd_rec_1
+version SUNWprivate_1.1
+end
+
+function md_med_def_timeout
+version SUNWprivate_1.1
+end
+
+function md_med_pmap_timeout
+version SUNWprivate_1.1
+end
+
+function med_null_err
+version SUNWprivate_1.1
+end
+
+function xdr_md_med_errno_t
+version SUNWprivate_1.1
+end
+
+function xdr_med_args_t
+version SUNWprivate_1.1
+end
+
+function xdr_med_data_t
+version SUNWprivate_1.1
+end
+
+function xdr_med_err_t
+version SUNWprivate_1.1
+end
+
+function xdr_med_get_data_res_t
+version SUNWprivate_1.1
+end
+
+function xdr_med_get_rec_res_t
+version SUNWprivate_1.1
+end
+
+function xdr_med_hnm_res_t
+version SUNWprivate_1.1
+end
+
+function xdr_med_med_t
+version SUNWprivate_1.1
+end
+
+function xdr_med_rec_t
+version SUNWprivate_1.1
+end
+
+function xdr_med_res_t
+version SUNWprivate_1.1
+end
+
+function xdr_med_upd_data_args_t
+version SUNWprivate_1.1
+end
+
+function xdr_med_upd_rec_args_t
+version SUNWprivate_1.1
+end
+
+function mhd_list_1
+version SUNWprivate_1.1
+end
+
+function mhd_relown_1
+version SUNWprivate_1.1
+end
+
+function mhd_status_1
+version SUNWprivate_1.1
+end
+
+function mhd_tkown_1
+version SUNWprivate_1.1
+end
+
+function mhd_null_error
+version SUNWprivate_1.1
+end
+
+function xdr_mhd_drive_status_t
+version SUNWprivate_1.1
+end
+
+function xdr_mhd_drivename_t
+version SUNWprivate_1.1
+end
+
+function xdr_mhd_error_t
+version SUNWprivate_1.1
+end
+
+function xdr_mhd_ff_mode_t
+version SUNWprivate_1.1
+end
+
+function xdr_mhd_list_args_t
+version SUNWprivate_1.1
+end
+
+function xdr_mhd_list_res_t
+version SUNWprivate_1.1
+end
+
+function xdr_mhd_opts_t
+version SUNWprivate_1.1
+end
+
+function xdr_mhd_relown_args_t
+version SUNWprivate_1.1
+end
+
+function xdr_mhd_set_t
+version SUNWprivate_1.1
+end
+
+function xdr_mhd_status_args_t
+version SUNWprivate_1.1
+end
+
+function xdr_mhd_status_res_t
+version SUNWprivate_1.1
+end
+
+function xdr_mhd_tkown_args_t
+version SUNWprivate_1.1
+end
+
+function xdr_mhd_cinfo_t
+version SUNWprivate_1.1
+end
+
+function xdr_mhd_ctlrtype_t
+version SUNWprivate_1.1
+end
+
+function xdr_mhd_did_flags_t
+version SUNWprivate_1.1
+end
+
+function xdr_mhd_drive_id_t
+version SUNWprivate_1.1
+end
+
+function xdr_mhd_drive_info_list_t
+version SUNWprivate_1.1
+end
+
+function xdr_mhd_drive_info_t
+version SUNWprivate_1.1
+end
+
+function xdr_mhd_mhiargs_t
+version SUNWprivate_1.1
+end
+
+function xdr_mhd_serial_t
+version SUNWprivate_1.1
+end
+
+function xdr_mhd_mhioctkown_t
+version SUNWprivate_1.1
+end
+
+function xdr_md_mn_msg_t
+version SUNWprivate_1.1
+end
+
+function xdr_md_mn_nodeid_t
+version SUNWprivate_1.1
+end
+
+function meta_get_current_root
+version SUNWprivate_1.1
+end
+
+function meta_get_current_root_dev
+version SUNWprivate_1.1
+end
+
+function meta_gettimeofday
+version SUNWprivate_1.1
+end
+
+function meta_replicaslice
+version SUNWprivate_1.1
+end
+
+function meta_get_tstate
+version SUNWprivate_1.1
+end
+
+function meta_setmdvtoc
+version SUNWprivate_1.1
+end
+
+function meta_check_devicesize
+version SUNWprivate_1.1
+end
+
+function clnt_devid
+version SUNWprivate_1.1
+end
+
+function meta_number_to_string
+version SUNWprivate_1.1
+end
+
+function meta_repartition_drive
+version SUNWprivate_1.1
+end
+
+function mdmn_send_message
+version SUNWprivate_1.1
+end
+
+function copy_result
+version SUNWprivate_1.1
+end
+
+function free_result
+version SUNWprivate_1.1
+end
+
+function copy_msg
+version SUNWprivate_1.1
+end
+
+function copy_msg_1
+version SUNWprivate_1.1
+end
+
+function free_msg
+version SUNWprivate_1.1
+end
+
+function mdmn_get_handler
+version SUNWprivate_1.1
+end
+
+function mdmn_get_submessage_generator
+version SUNWprivate_1.1
+end
+
+function mdmn_get_message_class
+version SUNWprivate_1.1
+end
+
+function mdmn_get_timeout
+version SUNWprivate_1.1
+end
+
+function meta_read_nodelist
+version SUNWprivate_1.1
+end
+
+function meta_write_nodelist
+version SUNWprivate_1.1
+end
+
+function meta_free_nodelist
+version SUNWprivate_1.1
+end
+
+function meta_is_mn_set
+version SUNWprivate_1.1
+end
+
+function meta_ping_mnset
+version SUNWprivate_1.1
+end
+
+function meta_mn_send_command
+version SUNWprivate_1.1
+end
+
+function meta_mn_send_suspend_writes
+version SUNWprivate_1.1
+end
+
+function meta_mn_send_setsync
+version SUNWprivate_1.1
+end
+
+function meta_mn_send_metaclear_command
+version SUNWprivate_1.1
+end
+
+function meta_mn_send_resync_starting
+version SUNWprivate_1.1
+end
+
+function meta_mn_change_owner
+version SUNWprivate_1.1
+end
+
+function meta_is_mn_name
+version SUNWprivate_1.1
+end
+
+function meta_reconfig_choose_master
+version SUNWprivate_1.1
+end
+
+function meta_mnsync_user_records
+version SUNWprivate_1.1
+end
+
+function meta_mnsync_diskset_mddbs
+version SUNWprivate_1.1
+end
+
+function meta_mnjoin_all
+version SUNWprivate_1.1
+end
+
+function mdmn_create_msgid
+version SUNWprivate_1.1
+end
+
+function mdmn_suspend
+version SUNWprivate_1.1
+end
+
+function mdmn_resume
+version SUNWprivate_1.1
+end
+
+function mdmn_reinit_set
+version SUNWprivate_1.1
+end
+
+function mdmn_msgtype_lock
+version SUNWprivate_1.1
+end
+
+function mdmn_abort
+version SUNWprivate_1.1
+end
+
+function mdmn_send_1
+version SUNWprivate_1.1
+end
+
+function mdmn_work_1
+version SUNWprivate_1.1
+end
+
+function mdmn_wakeup_initiator_1
+version SUNWprivate_1.1
+end
+
+function mdmn_wakeup_master_1
+version SUNWprivate_1.1
+end
+
+function mdmn_comm_lock_1
+version SUNWprivate_1.1
+end
+
+function mdmn_comm_unlock_1
+version SUNWprivate_1.1
+end
+
+function mdmn_comm_suspend_1
+version SUNWprivate_1.1
+end
+
+function mdmn_comm_resume_1
+version SUNWprivate_1.1
+end
+
+function mdmn_comm_reinit_set_1
+version SUNWprivate_1.1
+end
+
+function mdmn_comm_msglock_1
+version SUNWprivate_1.1
+end
+
+function clnt_mdcommdctl
+version SUNWprivate_1.1
+end
+
+function mdrpc_mdcommdctl_2
+version SUNWprivate_1.1
+end
+
+function clnt_mn_is_stale
+version SUNWprivate_1.1
+end
+
+function mdrpc_mn_is_stale_2
+version SUNWprivate_1.1
+end
+
+function clnt_clr_mnsetlock
+version SUNWprivate_1.1
+end
+
+function mdrpc_clr_mnsetlock_2
+version SUNWprivate_1.1
+end
+
+function xdr_mdrpc_sp_flags_args
+version SUNWprivate_1.1
+end
+
+function xdr_mdrpc_sp_flags_2_args
+version SUNWprivate_1.1
+end
+
+function xdr_mdrpc_mdcommdctl_args
+version SUNWprivate_1.1
+end
+
+function xdr_mdrpc_mdcommdctl_2_args
+version SUNWprivate_1.1
+end
+
+function xdr_mdrpc_getdrivedesc_res
+version SUNWprivate_1.1
+end
+
+function dd_list_dup
+version SUNWprivate_1.1
+end
+
+function mdmn_allocate_changelog
+version SUNWprivate_1.1
+end
+
+function mdmn_reset_changelog
+version SUNWprivate_1.1
+end
+
+function mdmn_log_msg
+version SUNWprivate_1.1
+end
+
+function mdmn_unlog_msg
+version SUNWprivate_1.1
+end
+
+function mdmn_snarf_changelog
+version SUNWprivate_1.1
+end
+
+function mdmn_get_changelogrec
+version SUNWprivate_1.1
+end
+
+function clnt_reset_mirror_owner
+version SUNWprivate_1.1
+end
+
+function mdrpc_reset_mirror_owner_2
+version SUNWprivate_1.1
+end
+
+function clnt_mn_susp_res_io
+version SUNWprivate_1.1
+end
+
+function mdrpc_mn_susp_res_io_2
+version SUNWprivate_1.1
+end
+
+function xdr_mdrpc_mn_susp_res_io_args
+version SUNWprivate_1.1
+end
+
+function xdr_mdrpc_mn_susp_res_io_2_args
+version SUNWprivate_1.1
+end
+
+function xdr_mdrpc_nodeid_args
+version SUNWprivate_1.1
+end
+
+function xdr_mdrpc_nodeid_2_args
+version SUNWprivate_1.1
+end
+
+function meta_is_member
+version SUNWprivate_1.1
+end
+
+function meta_mn_singlenode
+version SUNWprivate_1.1
+end
+
+function meta_sp_setstatus
+version SUNWprivate_1.1
+end
+
+function xdr_mp_unit_t
+version SUNWprivate_1.1
+end
+
+function xdr_md_set_params_t
+version SUNWprivate_1.1
+end
+
+function meta_fixdevid
+version SUNWprivate_1.1
+end
+
+function meta_upd_ctdnames
+version SUNWprivate_1.1
+end
+
+function pathname_reload
+version SUNWprivate_1.1
+end
+
+function meta_deviceid_to_nmlist
+version SUNWprivate_1.1
+end
+
+function meta_mn_send_get_tstate
+version SUNWprivate_1.1
+end
+
+function meta_client_create_retry
+version SUNWprivate_1.1
+end
+
+function meta_client_create
+version SUNWprivate_1.1
+end
+
+function read_master_block
+version SUNWprivate_1.1
+end
diff --git a/usr/src/lib/lvm/libmeta/spec/sparc/Makefile b/usr/src/lib/lvm/libmeta/spec/sparc/Makefile
new file mode 100644
index 0000000000..8d93c87287
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/spec/sparc/Makefile
@@ -0,0 +1,47 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License"). You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+#ident "%Z%%M% %I% %E% SMI"
+#
+# Copyright (c) 2000-2001 by Sun Microsystems, Inc.
+# All rights reserved.
+#
+# lib/lvm/libmeta/spec/sparc/Makefile
+
+.KEEP_STATE:
+
+# To enable apptrace, comment out the following line
+DISABLE_APPTRACE= $(POUND_SIGN)
+
+include ../Makefile.targ
+
+# Add arch specific objects here
+OBJECTS +=
+
+include $(SRC)/lib/Makefile.lib
+
+# Uncomment the following if the linker complains
+sparc_C_PICFLAGS = -K PIC
+
+include $(SRC)/lib/Makefile.spec
+
+$(DISABLE_APPTRACE)install: $(ROOTABILIB)
diff --git a/usr/src/lib/lvm/libmeta/spec/sparcv9/Makefile b/usr/src/lib/lvm/libmeta/spec/sparcv9/Makefile
new file mode 100644
index 0000000000..5a7be1d65b
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/spec/sparcv9/Makefile
@@ -0,0 +1,47 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License"). You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+#ident "%Z%%M% %I% %E% SMI"
+#
+# Copyright (c) 2000-2001 by Sun Microsystems, Inc.
+# All rights reserved.
+#
+# lib/lvm/libmeta/spec/sparcv9/Makefile
+
+.KEEP_STATE:
+
+# To enable apptrace, comment out the following line
+DISABLE_APPTRACE= $(POUND_SIGN)
+
+include ../Makefile.targ
+
+# Add arch specific objects here
+OBJECTS +=
+
+include $(SRC)/lib/Makefile.lib
+
+# Uncomment the following if the linker complains
+sparc_C_PICFLAGS = -K PIC
+
+include $(SRC)/lib/Makefile.spec
+
+$(DISABLE_APPTRACE)install: $(ROOTABILIB64)
diff --git a/usr/src/lib/lvm/libmeta/spec/versions b/usr/src/lib/lvm/libmeta/spec/versions
new file mode 100644
index 0000000000..523cb927d5
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/spec/versions
@@ -0,0 +1,31 @@
+#pragma ident "%Z%%M% %I% %E% SMI"
+# Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License"). You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+i386 {
+ SUNWprivate_1.1;
+}
+sparc {
+ SUNWprivate_1.1;
+}
diff --git a/usr/src/lib/lvm/libpreen/Makefile b/usr/src/lib/lvm/libpreen/Makefile
new file mode 100644
index 0000000000..ca1bb50937
--- /dev/null
+++ b/usr/src/lib/lvm/libpreen/Makefile
@@ -0,0 +1,49 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License"). You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright (c) 1998-2001 by Sun Microsystems, Inc.
+# All rights reserved.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+include $(SRC)/lib/Makefile.lib
+
+SUBDIRS = $(MACH)
+
+all := TARGET= all
+install := TARGET= install
+clean := TARGET= clean
+clobber := TARGET= clobber
+lint := TARGET= lint
+debug := TARGET= debug
+
+.KEEP_STATE:
+
+all clean clobber debug install: spec .WAIT $(SUBDIRS)
+
+lint: $(SUBDIRS)
+
+spec $(SUBDIRS): FRC
+ @cd $@; pwd; $(MAKE) $(TARGET)
+
+FRC:
diff --git a/usr/src/lib/lvm/libpreen/Makefile.com b/usr/src/lib/lvm/libpreen/Makefile.com
new file mode 100644
index 0000000000..fdfee4627d
--- /dev/null
+++ b/usr/src/lib/lvm/libpreen/Makefile.com
@@ -0,0 +1,50 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License"). You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+LIBRARY= preen_md.a
+VERS= .1
+OBJECTS= mdpreen.o
+
+include $(SRC)/lib/lvm/Makefile.lvm
+
+ROOTLIBDIR= $(ROOT)/usr/lib/drv
+LIBS= $(DYNLIB) # don't build a static lib
+CPPFLAGS += -D_FILE_OFFSET_BITS=64
+LDLIBS += -lmeta -lc
+ZDEFS=
+
+MAPDIR= $(SRC)/lib/lvm/libpreen/spec/$(TRANSMACH)
+SPECMAPFILE= $(MAPDIR)/mapfile
+
+.KEEP_STATE:
+
+all: $(LIBS)
+
+include $(SRC)/lib/lvm/Makefile.targ
+
+$(ROOTLIBDIR)/$(DYNLIB) := FILEMODE= 555
diff --git a/usr/src/lib/lvm/libpreen/common/mdpreen.c b/usr/src/lib/lvm/libpreen/common/mdpreen.c
new file mode 100644
index 0000000000..a28a6c2560
--- /dev/null
+++ b/usr/src/lib/lvm/libpreen/common/mdpreen.c
@@ -0,0 +1,335 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2003 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * preenlib interface for SVM.
+ *
+ * On startup fsck attempts to check filesystems in parallel. However
+ * running mutiple fscks on the same disk at the same time
+ * significantly degrades the performance. fsck code avoids such
+ * behavior. To analyse such patterns it needs the physical disk
+ * instance. preen_build_devs provides that information for
+ * filesystems that are on top of metadevices.
+ */
+
+#include <meta.h>
+#include <limits.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include <sdssc.h>
+
+#define MAX_N2M_ALIAS_LINE (2*FILENAME_MAX + 1)
+#define NAME_TO_MAJOR "/etc/name_to_major"
+#define MD_MODULE "md"
+
+/*
+ * Macros to produce a quoted string containing the value of a
+ * preprocessor macro. For example, if SIZE is defined to be 256,
+ * VAL2STR(SIZE) is "256". This is used to construct format
+ * strings for scanf-family functions below.
+ */
+#define QUOTE(x) #x
+#define VAL2STR(x) QUOTE(x)
+
+extern void preen_addunit(void *cookie, char *dname, int (*cf)(),
+ void *datap, uint_t unit);
+extern int preen_subdev(char *name, struct dk_cinfo *dkiop, void *dp);
+
+static int
+get_major_from_n2m(char *modname, int *major)
+{
+ FILE *fp;
+ char drv[FILENAME_MAX + 1];
+ int entry;
+ int found = 0;
+ char line[MAX_N2M_ALIAS_LINE];
+ int status = 0;
+
+ if ((fp = fopen(NAME_TO_MAJOR, "r")) == NULL) {
+ return (-1);
+ }
+
+ while ((fgets(line, sizeof (line), fp) != NULL) &&
+ status == 0) {
+
+ if (sscanf(line, "%" VAL2STR(FILENAME_MAX) "s %d",
+ drv, &entry) != 2) {
+ status = -1;
+ }
+ if (strcmp(drv, modname) == 0) {
+ *major = entry;
+ found = 1;
+ break;
+ }
+ }
+
+ /*
+ * if no match is found return -1
+ */
+ if (found == 0)
+ status = -1;
+
+ (void) fclose(fp);
+ return (status);
+}
+
+/*
+ * If the name contains a diskset name, it is parsed out and returned.
+ * The dev_path can be either a md pathname /dev/md/rdsk/d0 or a path
+ * name that contains a diskset /dev/md/red/rdsk/d0.
+ */
+
+static char *
+parse_path(char *dev_path)
+{
+ char *cpdev;
+ char *cp, *cpp;
+ char *setname;
+ size_t size;
+
+ /*
+ * paths are /dev/md/rdsk/dx or /dev/md/<setname>/rdsk/dx
+ * cp points to /rdsk/dx. Scan back to the previous slash.
+ * If this matches "dev", then path is a local set.
+ *
+ * The /rdsk/d pattern in strstr is used so that users with
+ * a twisted mind can create a diskset called "rdsk" and
+ * would still want everything to work!!
+ */
+ cp = strstr(dev_path, "/rdsk/d");
+
+ for (cpdev = cp - 1; *cpdev != '/'; cpdev--);
+ cpdev = cpdev - 3; /* backspace 3 char */
+ if (strncmp(cpdev, "dev", strlen("dev")) == 0)
+ return (Strdup(MD_LOCAL_NAME));
+
+ /*
+ * extract the setname from the path
+ */
+ cpp = cp;
+ for (cp--; *cp != '/'; cp--);
+ size = (size_t)(cpp - cp);
+ setname = (char *)Malloc(size);
+ (void) strlcpy(setname, (const char *)(cp + 1), size);
+
+ return (setname);
+}
+
+/*
+ * This routine is called from preenlib the first time. It is then
+ * recursively called through preen_subdev.
+ *
+ * The argument passed in (uname) starts with the special device from
+ * /etc/vfstab. Recursive calls pass in the underlying physical device
+ * names.
+ */
+void
+preen_build_devs(
+ char *uname, /* name of metadevice */
+ struct dk_cinfo *dkiop, /* associated controller info */
+ void *dp /* magic info */
+)
+{
+ char *setname = NULL;
+ mdsetname_t *sp;
+ mdname_t *namep; /* metadevice name */
+ mdnamelist_t *nlp = NULL; /* list of real devices */
+ mdnamelist_t *p;
+ devid_nmlist_t *nm_list = NULL;
+ md_error_t status = mdnullerror;
+ md_error_t *ep = &status;
+ int ep_valid = 0; /* does ep contain a real error */
+ struct stat statb;
+ static int md_major = -1;
+ side_t sideno;
+
+ if (stat(uname, &statb) != 0)
+ return;
+
+ if (md_major == -1 &&
+ get_major_from_n2m(MD_MODULE, &md_major) != 0)
+ return;
+
+ /*
+ * If the path passed in is not a metadevice, then add that
+ * device to the list (preen_addunit) since it has to be a
+ * physical device.
+ */
+
+ if (major(statb.st_rdev) != md_major) {
+ preen_addunit(dp, dkiop->dki_dname, NULL, NULL,
+ dkiop->dki_unit);
+ return;
+ }
+ /*
+ * Bind to the cluster library
+ */
+
+ if (sdssc_bind_library() == SDSSC_ERROR)
+ return;
+
+ if (md_init_daemon("fsck", ep) != 0) {
+ ep_valid = 1;
+ goto out;
+ }
+
+ /*
+ * parse the path name to get the diskset name.
+ */
+
+ setname = parse_path(uname);
+ if ((sp = metasetname(setname, ep)) == NULL) {
+ ep_valid = 1;
+ goto out;
+ }
+
+ /* check for ownership */
+ if (meta_check_ownership(sp, ep) != 0) {
+ /*
+ * Don't own the set but we are here implies
+ * that this is a clustered proxy device. Simply add
+ * the unit.
+ */
+ preen_addunit(dp, dkiop->dki_dname, NULL, NULL,
+ dkiop->dki_unit);
+ ep_valid = 1;
+ goto out;
+ }
+
+ /*
+ * get list of underlying physical devices.
+ */
+ if ((namep = metaname(&sp, uname, ep)) == NULL) {
+ ep_valid = 1;
+ goto out;
+ }
+
+ if (namep->dev == NODEV64) {
+ goto out;
+ }
+
+ if (meta_getdevs(sp, namep, &nlp, ep) != 0) {
+ ep_valid = 1;
+ goto out;
+ }
+
+ if ((sideno = getmyside(sp, ep)) == MD_SIDEWILD) {
+ ep_valid = 1;
+ goto out;
+ }
+
+ /* gather and add the underlying devs */
+ for (p = nlp; (p != NULL); p = p->next) {
+ mdname_t *devnp = p->namep;
+ int fd;
+ struct dk_cinfo cinfo;
+ ddi_devid_t md_did;
+ char *devname;
+ char *minor_name = NULL;
+ char mname[MAXPATHLEN];
+
+ /*
+ * we don't want to use the rname anymore because
+ * that may have changed. Use the device id information
+ * to find the correct ctd name and open based on that.
+ * If there isn't a devid or we have a did device, then
+ * use the rname. In clustering, it's corrected for us.
+ * If no devid it's at least worth a try.
+ */
+ if (((md_did = meta_getdidbykey(sp->setno, sideno,
+ devnp->key, ep)) == NULL) || ((minor_name =
+ meta_getdidminorbykey(sp->setno, sideno,
+ devnp->key, ep)) == NULL)) {
+ devname = devnp->rname;
+ if (md_did)
+ Free(md_did);
+ } else {
+ if (strstr(minor_name, ",raw") == NULL) {
+ (void) snprintf(mname, MAXPATHLEN, "%s,raw",
+ minor_name);
+ } else {
+ (void) snprintf(mname, MAXPATHLEN, "%s",
+ minor_name);
+ }
+
+ /*
+ * We need to make sure we call this with a specific
+ * mname (raw mname) so that we get the exact slice
+ * with the given device id. Otherwise we could try
+ * to open a slice that doesn't really exist.
+ */
+ if (meta_deviceid_to_nmlist("/dev", md_did,
+ mname, &nm_list) != 0) {
+ (void) mdsyserror(ep, errno, devnp->rname);
+ ep_valid = 1;
+ Free(md_did);
+ Free(minor_name);
+ goto out;
+ }
+ devname = Strdup(nm_list->devname);
+ Free(md_did);
+ Free(minor_name);
+ devid_free_nmlist(nm_list);
+ }
+ /* get device name and (real) cinfo */
+ if ((fd = open(devname, O_RDONLY, 0)) < 0) {
+ (void) mdsyserror(ep, errno, devname);
+ ep_valid = 1;
+ goto out;
+ }
+
+ if (ioctl(fd, DKIOCINFO, &cinfo) != 0) {
+ (void) mdsyserror(ep, errno, devname);
+ (void) close(fd);
+ ep_valid = 1;
+ goto out;
+ }
+ (void) close(fd); /* sd/ssd bug */
+
+ /*
+ * preen_subdev fails when the device name has been
+ * resolved to the physical layer. Hence it is added
+ * to preen_addunit.
+ */
+ if (preen_subdev(devname, &cinfo, dp) != 0) {
+ preen_addunit(dp, cinfo.dki_dname, NULL, NULL,
+ cinfo.dki_unit);
+ }
+ }
+
+ /* cleanup, if we fail, just add this composite device to the list */
+out:
+ if (setname != NULL)
+ Free(setname);
+ if (ep_valid != 0) {
+ mde_perror(&status, "");
+ mdclrerror(&status);
+ }
+ metafreenamelist(nlp);
+}
diff --git a/usr/src/lib/lvm/libpreen/i386/Makefile b/usr/src/lib/lvm/libpreen/i386/Makefile
new file mode 100644
index 0000000000..bb9355b10a
--- /dev/null
+++ b/usr/src/lib/lvm/libpreen/i386/Makefile
@@ -0,0 +1,31 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License"). You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright (c) 2000-2001 by Sun Microsystems, Inc.
+# All rights reserved.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+include ../Makefile.com
+
+install debug: all $(ROOTLIBDIR) $(ROOTLIBS)
diff --git a/usr/src/lib/lvm/libpreen/sparc/Makefile b/usr/src/lib/lvm/libpreen/sparc/Makefile
new file mode 100644
index 0000000000..eff30f413e
--- /dev/null
+++ b/usr/src/lib/lvm/libpreen/sparc/Makefile
@@ -0,0 +1,30 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License"). You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright (c) 2000-2001 by Sun Microsystems, Inc.
+# All rights reserved.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+include ../Makefile.com
+
+install debug: all $(ROOTLIBDIR) $(ROOTLIBS)
diff --git a/usr/src/lib/lvm/libpreen/spec/Makefile b/usr/src/lib/lvm/libpreen/spec/Makefile
new file mode 100644
index 0000000000..4f28d95836
--- /dev/null
+++ b/usr/src/lib/lvm/libpreen/spec/Makefile
@@ -0,0 +1,29 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License"). You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#ident "%Z%%M% %I% %E% SMI"
+#
+# Copyright (c) 2000 by Sun Microsystems, Inc.
+# All rights reserved.
+#
+# lib/lvm/libpreen/spec/Makefile
+
+include $(SRC)/lib/Makefile.spec.arch
diff --git a/usr/src/lib/lvm/libpreen/spec/Makefile.targ b/usr/src/lib/lvm/libpreen/spec/Makefile.targ
new file mode 100644
index 0000000000..582a2c4653
--- /dev/null
+++ b/usr/src/lib/lvm/libpreen/spec/Makefile.targ
@@ -0,0 +1,36 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License"). You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#ident "%Z%%M% %I% %E% SMI"
+#
+# Copyright (c) 2000 by Sun Microsystems, Inc.
+# All rights reserved.
+#
+# lib/lvm/libpreen/spec/Makefile.targ
+
+LIBRARY = libpreen.a
+VERS = .1
+
+OBJECTS = preen.o
+
+TRANSCPP =
+
+SPECCPP = -I.. -I../../inc
diff --git a/usr/src/lib/lvm/libpreen/spec/amd64/Makefile b/usr/src/lib/lvm/libpreen/spec/amd64/Makefile
new file mode 100644
index 0000000000..c7d89e007c
--- /dev/null
+++ b/usr/src/lib/lvm/libpreen/spec/amd64/Makefile
@@ -0,0 +1,46 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License"). You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+.KEEP_STATE:
+
+# To enable apptrace, comment out the following line
+DISABLE_APPTRACE= $(POUND_SIGN)
+
+include ../Makefile.targ
+
+# Add arch specific objects here
+OBJECTS +=
+
+include $(SRC)/lib/Makefile.lib
+
+# Uncomment the following if the linker complains
+#amd64_C_PICFLAGS = $(amd64_C_BIGPICFLAGS)
+
+include $(SRC)/lib/Makefile.spec
+
+$(DISABLE_APPTRACE)install: $(ROOTABILIB64)
diff --git a/usr/src/lib/lvm/libpreen/spec/i386/Makefile b/usr/src/lib/lvm/libpreen/spec/i386/Makefile
new file mode 100644
index 0000000000..a50dd5cd36
--- /dev/null
+++ b/usr/src/lib/lvm/libpreen/spec/i386/Makefile
@@ -0,0 +1,47 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License"). You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+#ident "%Z%%M% %I% %E% SMI"
+#
+# Copyright (c) 2000-2001 by Sun Microsystems, Inc.
+# All rights reserved.
+#
+# lib/lvm/libpreen/spec/i386/Makefile
+
+.KEEP_STATE:
+
+# To enable apptrace, comment out the following line
+DISABLE_APPTRACE= $(POUND_SIGN)
+
+include ../Makefile.targ
+
+# Add arch specific objects here
+OBJECTS +=
+
+include $(SRC)/lib/Makefile.lib
+
+# Uncomment the following if the linker complains
+#i386_C_PICFLAGS = -K PIC
+
+include $(SRC)/lib/Makefile.spec
+
+$(DISABLE_APPTRACE)install: $(ROOTABILIB)
diff --git a/usr/src/lib/lvm/libpreen/spec/preen.spec b/usr/src/lib/lvm/libpreen/spec/preen.spec
new file mode 100644
index 0000000000..839c0fcafe
--- /dev/null
+++ b/usr/src/lib/lvm/libpreen/spec/preen.spec
@@ -0,0 +1,31 @@
+#
+# Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License"). You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#pragma ident "%Z%%M% %I% %E% SMI"
+#
+# lib/lvm/libpreen/spec/preen.spec
+
+function preen_build_devs
+version SUNWprivate_1.1
+end
diff --git a/usr/src/lib/lvm/libpreen/spec/sparc/Makefile b/usr/src/lib/lvm/libpreen/spec/sparc/Makefile
new file mode 100644
index 0000000000..f3d039672f
--- /dev/null
+++ b/usr/src/lib/lvm/libpreen/spec/sparc/Makefile
@@ -0,0 +1,47 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License"). You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+#ident "%Z%%M% %I% %E% SMI"
+#
+# Copyright (c) 2000-2001 by Sun Microsystems, Inc.
+# All rights reserved.
+#
+# lib/lvm/libpreen/spec/sparc/Makefile
+
+.KEEP_STATE:
+
+# To enable apptrace, comment out the following line
+DISABLE_APPTRACE= $(POUND_SIGN)
+
+include ../Makefile.targ
+
+# Add arch specific objects here
+OBJECTS +=
+
+include $(SRC)/lib/Makefile.lib
+
+# Uncomment the following if the linker complains
+sparc_C_PICFLAGS = -K PIC
+
+include $(SRC)/lib/Makefile.spec
+
+$(DISABLE_APPTRACE)install: $(ROOTABILIB)
diff --git a/usr/src/lib/lvm/libpreen/spec/sparcv9/Makefile b/usr/src/lib/lvm/libpreen/spec/sparcv9/Makefile
new file mode 100644
index 0000000000..520e996ffb
--- /dev/null
+++ b/usr/src/lib/lvm/libpreen/spec/sparcv9/Makefile
@@ -0,0 +1,47 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License"). You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+#ident "%Z%%M% %I% %E% SMI"
+#
+# Copyright (c) 2000-2001 by Sun Microsystems, Inc.
+# All rights reserved.
+#
+# lib/lvm/libpreen/spec/sparcv9/Makefile
+
+.KEEP_STATE:
+
+# To enable apptrace, comment out the following line
+DISABLE_APPTRACE= $(POUND_SIGN)
+
+include ../Makefile.targ
+
+# Add arch specific objects here
+OBJECTS +=
+
+include $(SRC)/lib/Makefile.lib
+
+# Uncomment the following if the linker complains
+sparc_C_PICFLAGS = -K PIC
+
+include $(SRC)/lib/Makefile.spec
+
+$(DISABLE_APPTRACE)install: $(ROOTABILIB64)
diff --git a/usr/src/lib/lvm/libpreen/spec/versions b/usr/src/lib/lvm/libpreen/spec/versions
new file mode 100644
index 0000000000..523cb927d5
--- /dev/null
+++ b/usr/src/lib/lvm/libpreen/spec/versions
@@ -0,0 +1,31 @@
+#pragma ident "%Z%%M% %I% %E% SMI"
+# Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License"). You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+i386 {
+ SUNWprivate_1.1;
+}
+sparc {
+ SUNWprivate_1.1;
+}
diff --git a/usr/src/lib/lvm/libsvm/Makefile b/usr/src/lib/lvm/libsvm/Makefile
new file mode 100644
index 0000000000..1917939812
--- /dev/null
+++ b/usr/src/lib/lvm/libsvm/Makefile
@@ -0,0 +1,58 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License"). You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright (c) 2001 by Sun Microsystems, Inc.
+# All rights reserved.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+include $(SRC)/lib/Makefile.lib
+
+HDRS = libsvm.h
+HDRDIR = common/hdrs
+SUBDIRS = $(MACH)
+
+all := TARGET= all
+install := TARGET= install
+check := TARGET= check
+clean := TARGET= clean
+clobber := TARGET= clobber
+lint := TARGET= lint
+debug := TARGET= debug
+
+.KEEP_STATE:
+
+all clean clobber debug install: spec .WAIT $(SUBDIRS)
+
+install_h: $(ROOTHDRS)
+
+check: $(CHECKHDRS)
+
+lint: $(SUBDIRS)
+
+spec $(SUBDIRS): FRC
+ @cd $@; pwd; $(MAKE) $(TARGET)
+
+FRC:
+
+include $(SRC)/lib/Makefile.targ
diff --git a/usr/src/lib/lvm/libsvm/Makefile.com b/usr/src/lib/lvm/libsvm/Makefile.com
new file mode 100644
index 0000000000..3a87715a12
--- /dev/null
+++ b/usr/src/lib/lvm/libsvm/Makefile.com
@@ -0,0 +1,64 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License"). You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+LIBRARY= libsvm.a
+VERS= .1
+OBJECTS= check_svm.o \
+ getdrvname.o \
+ metaconf.o \
+ metainterfaces.o \
+ modops.o \
+ start_svm.o \
+ debug.o \
+ update_mdconf.o
+
+include $(SRC)/lib/lvm/Makefile.lvm
+
+ROOTLIBDIR= $(ROOT)/usr/snadm/lib
+
+LIBS = $(DYNLIB) # don't build a static lib
+LDLIBS += -lmeta -ldevid -lc
+#
+# XXX There isn't a lint library for libspmicommon. For now, we work
+# around this by only using the library when we build (as opposed to lint).
+#
+all debug install := LDLIBS += -L/usr/snadm/lib -lspmicommon
+
+DYNFLAGS += -R/usr/snadm/lib
+CPPFLAGS += -D_FILE_OFFSET_BITS=64
+CPPFLAGS += -I$(SRC)/lib/lvm/libsvm/common/hdrs
+ZDEFS =
+
+MAPDIR= $(SRC)/lib/lvm/libsvm/spec/$(TRANSMACH)
+SPECMAPFILE= $(MAPDIR)/mapfile
+
+.KEEP_STATE:
+
+all: $(LIBS)
+
+include $(SRC)/lib/lvm/Makefile.targ
diff --git a/usr/src/lib/lvm/libsvm/common/check_svm.c b/usr/src/lib/lvm/libsvm/common/check_svm.c
new file mode 100644
index 0000000000..5c92ac2788
--- /dev/null
+++ b/usr/src/lib/lvm/libsvm/common/check_svm.c
@@ -0,0 +1,169 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2003 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <string.h>
+#include <meta.h>
+#include <sys/types.h>
+#include <sys/mkdev.h>
+#include <sys/stat.h>
+#include <limits.h>
+#include <svm.h>
+
+/*
+ * FUNCTION: valid_bootlist
+ *
+ * INPUT: file pointer, line buffer, line_length
+ *
+ * RETURN VALUES:
+ * 0 - SUCCESS
+ * -1 - FAIL
+ *
+ */
+
+int
+valid_bootlist(FILE *fp, int line_len)
+{
+ char *bp = NULL;
+ char *line;
+
+ /*
+ * errno may not be cleared by callee routines and we
+ * we want to catch fgets failures hence errno is reset.
+ */
+ errno = 0;
+ if ((line = malloc(line_len)) == NULL)
+ return (RET_ERROR);
+
+ while (fgets(line, line_len, fp) != NULL) {
+ bp = strstr(line, "mddb_bootlist");
+ if (bp != NULL) {
+ /* if not commented out then breakout */
+ if (*line != '*' && *line != '#') {
+ break;
+ }
+ }
+ }
+
+ free(line);
+ if (bp == NULL || errno != 0)
+ return (RET_ERROR);
+
+ return (RET_SUCCESS);
+}
+
+/*
+ * FUNCTION: svm_check
+ * Check the existance of DiskSuite or SVM
+ *
+ * INPUT: rootpath
+ *
+ * RETURN VALUES:
+ * 0 - SUCCESS
+ * -1 - FAIL
+ */
+
+int
+svm_check(char *path)
+{
+ FILE *fp;
+ char tmppath[PATH_MAX];
+ int rval;
+
+ (void) strcat(strcpy(tmppath, path), MD_CONF);
+
+ if ((fp = fopen(tmppath, "r")) == NULL) {
+ rval = errno;
+ goto free_exit;
+ }
+
+ rval = valid_bootlist(fp, MDDB_BOOTLIST_MAX_LEN);
+
+ debug_printf("svm_check(): valid bootlist in %s. status %d\n",
+ tmppath, rval);
+
+ if (rval == RET_SUCCESS) {
+ goto free_exit;
+ }
+ (void) fclose(fp);
+
+ /* not found in md.conf try etc/system */
+ (void) strcat(strcpy(tmppath, path), SYSTEM_FILE);
+
+ if ((fp = fopen(tmppath, "r")) == NULL) {
+ rval = errno;
+ goto free_exit;
+ }
+
+ rval = valid_bootlist(fp, MDDB_BOOTLIST_MAX_LEN);
+
+ debug_printf("svm_check(): valid bootlist in %s. status %d\n",
+ tmppath, rval);
+free_exit:
+ (void) fclose(fp);
+ if (rval > 0)
+ rval = RET_ERROR;
+ return (rval);
+}
+
+/*
+ * FUNCTION: svm_is_md
+ * Check if the the given device name has an md driver.
+ * INPUT: special device name (/dev/dsk/c0t0d0s0 or /dev/md/dsk/d10)
+ *
+ * RETURN:
+ * 1 - if it is a metadevice.
+ * 0 - if it is not a metadevice.
+ */
+
+int
+svm_is_md(char *device_name)
+{
+ char buf[30];
+ struct stat sbuf;
+ int rval = 0;
+
+ (void) memset(buf, 0, 30);
+
+ debug_printf("svm_is_md(): device %s\n", device_name);
+ if (stat(device_name, &sbuf) != 0)
+ return (RET_ERROR);
+
+ if (get_drv_name(major(sbuf.st_rdev), "/", buf) == RET_ERROR) {
+ debug_printf("svm_is_md(): device get_drv_name failed: %s\n",
+ device_name);
+ return (0);
+ }
+ if (strcmp(buf, MD_MODULE) == 0) {
+ debug_printf("svm_is_md(): device %s succeed\n", device_name);
+ rval = 1;
+ }
+ return (rval);
+}
diff --git a/usr/src/lib/lvm/libsvm/common/debug.c b/usr/src/lib/lvm/libsvm/common/debug.c
new file mode 100644
index 0000000000..38f7ae56cc
--- /dev/null
+++ b/usr/src/lib/lvm/libsvm/common/debug.c
@@ -0,0 +1,76 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2003 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <stdio.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <limits.h>
+#include <string.h>
+
+/* The following defines are for tracing output (from libsmpicommon) */
+
+#define LOG 0x1 /* write message to log file */
+#define SCR 0x2 /* write message to the screen */
+#define LOGSCR LOG|SCR /* write message to the log and screen */
+#define LEVEL0 0x0001 /* message level 0 */
+#define LEVEL1 0x0002 /* message level 1 */
+#define LEVEL2 0x0004 /* message level 2 */
+#define LEVEL3 0x0010 /* message level 3 */
+
+extern int get_trace_level(void);
+extern int write_status(unsigned char, unsigned int, char *, ...);
+
+const char libsvm_str[] = "LIB_SVM: ";
+const int libsvm_len = sizeof (libsvm_str);
+
+/*PRINTFLIKE1*/
+void
+debug_printf(char *fmt, ...)
+{
+ va_list ap;
+ char *cp;
+ char *buf;
+
+ if (get_trace_level() > 5) {
+ if ((buf = calloc(PATH_MAX, sizeof (char))) == NULL)
+ return;
+ (void) strcpy(buf, libsvm_str);
+ /*
+ * libsvm_len - 1 is because the length includes NULL
+ */
+
+ cp = buf + (libsvm_len - 1);
+ va_start(ap, fmt);
+ if (vsnprintf(cp, (PATH_MAX - (libsvm_len - 1)),
+ fmt, ap) >= 0) {
+ write_status(LOGSCR, LEVEL0, buf);
+ }
+ free(buf);
+ va_end(ap);
+ }
+}
diff --git a/usr/src/lib/lvm/libsvm/common/getdrvname.c b/usr/src/lib/lvm/libsvm/common/getdrvname.c
new file mode 100644
index 0000000000..9bef7fa115
--- /dev/null
+++ b/usr/src/lib/lvm/libsvm/common/getdrvname.c
@@ -0,0 +1,90 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2003 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+#include <sys/types.h>
+#include <svm.h>
+
+/*
+ * Macros to produce a quoted string containing the value of a
+ * preprocessor macro. For example, if SIZE is defined to be 256,
+ * VAL2STR(SIZE) is "256". This is used to construct format
+ * strings for scanf-family functions below.
+ */
+#define QUOTE(x) #x
+#define VAL2STR(x) QUOTE(x)
+
+/*
+ * FUNCTION:
+ * Return the driver name for a major number
+ *
+ * INPUT: major number, mount point for name_to_major file, pointer
+ * to a valid buffer.
+ *
+ * RETURN VALUES:
+ * 0 - SUCCESS - buf contain the driver name.
+ * -1 - FAIL
+ *
+ */
+
+int
+get_drv_name(major_t major, char *mnt, char *buf)
+{
+ FILE *fp;
+ char drv[FILENAME_MAX + 1];
+ char entry[FILENAME_MAX + 1];
+ char line[MAX_N2M_ALIAS_LINE];
+ char fname[PATH_MAX];
+
+ int status = RET_NOERROR;
+ (void) snprintf(fname, sizeof (fname), "%s%s", mnt, NAME_TO_MAJOR);
+
+ if ((fp = fopen(fname, "r")) == NULL) {
+ return (RET_ERROR);
+ }
+
+ while ((fgets(line, sizeof (line), fp) != NULL) &&
+ status == RET_NOERROR) {
+ if (sscanf(line,
+ "%" VAL2STR(FILENAME_MAX) "s %" VAL2STR(FILENAME_MAX) "s",
+ drv, entry) != 2) {
+ status = RET_ERROR;
+ }
+ if (atoi(entry) == major)
+ break;
+
+ }
+
+ if (status == RET_NOERROR)
+ (void) strcpy(buf, drv);
+ (void) fclose(fp);
+ return (status);
+}
diff --git a/usr/src/lib/lvm/libsvm/common/hdrs/libsvm.h b/usr/src/lib/lvm/libsvm/common/hdrs/libsvm.h
new file mode 100644
index 0000000000..98c13a2684
--- /dev/null
+++ b/usr/src/lib/lvm/libsvm/common/hdrs/libsvm.h
@@ -0,0 +1,70 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _LIBSVM_H
+#define _LIBSVM_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/*
+ * The following declarations are for libsvm which provides
+ * Solaris Install with a set of interfaces required to upgrade
+ * mirrored roots. These are controlled by a Contract PSARC 2000/049
+ * and should not be changed without informing Install.
+ */
+
+typedef struct {
+ char *root_md; /* metaroot device name */
+ int count; /* number of components in the metadevice */
+ char *md_comps[1]; /* array of "ctds" component names */
+} svm_info_t;
+
+/* Convertion of MDDB flags */
+#define SVM_DONT_CONV 0x01 /* Don't convert MDDB to devid mode */
+#define SVM_CONV 0x02 /* Convert MDDB to devid mode */
+
+
+extern int svm_check(char *rootpath);
+extern int svm_start(char *rootpath, svm_info_t **svm_infopp,
+ int repl_state_flag);
+extern int svm_stop();
+extern void svm_free(svm_info_t *svm_infop);
+extern int svm_is_md(char *device_name);
+extern int svm_get_components(char *root_md_device, svm_info_t **svmpp);
+extern svm_info_t *svm_alloc();
+extern int get_mdcomponents(char *devname, svm_info_t **pp);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LIBSVM_H */
diff --git a/usr/src/lib/lvm/libsvm/common/hdrs/svm.h b/usr/src/lib/lvm/libsvm/common/hdrs/svm.h
new file mode 100644
index 0000000000..cb5d60f30f
--- /dev/null
+++ b/usr/src/lib/lvm/libsvm/common/hdrs/svm.h
@@ -0,0 +1,95 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+
+#ifndef _SVM_H
+#define _SVM_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+#define RET_SUCCESS 0
+#define RET_ERROR -1
+#define RET_NOERROR RET_SUCCESS
+
+
+#define PROP_KEEP_REPL_STATE "md_keep_repl_state"
+#define PROP_DEVID_DESTROY "md_devid_destroy"
+
+#define MD_CONF "/kernel/drv/md.conf"
+#define MD_CONF_ORIG "/tmp/md.conf.orig"
+#define SYSTEM_FILE "/etc/system"
+#define NAME_TO_MAJOR "/etc/name_to_major"
+#define VFSTAB "/etc/vfstab"
+
+#define MD_MODULE "md"
+#define ROOT_MNTPT "/"
+#define ROOT_METADEVICE "/dev/md/dsk/"
+
+
+typedef enum {
+ MD_STR_NOTFOUND, /* bootlist not found */
+ MD_STR_START, /* bootlist found, convertion started */
+ MD_STR_DONE /* bootlist converversion done */
+} convflag_t;
+
+/* The following defines have been taken from addrem.h */
+#define MAX_CMD_LINE 256
+#define MAX_N2M_ALIAS_LINE FILENAME_MAX + FILENAME_MAX + 1
+#define MAXLEN_NAM_TO_MAJ_ENT FILENAME_MAX + MAX_STR_MAJOR + 1
+#define OPT_LEN 128
+#define CADDR_HEX_STR 16
+#define UINT_STR 10
+#define MODLINE_ENT_MAX (4 * UINT_STR) + CADDR_HEX_STR + MODMAXNAMELEN
+#define MAX_STR_MAJOR UINT_STR
+#define STR_LONG 10
+#define PERM_STR 4
+#define MAX_PERM_ENTRY (2 * STR_LONG) + PERM_STR + (2 * FILENAME_MAX) + 1
+#define MAX_DBFILE_ENTRY MAX_PERM_ENTRY
+
+extern void create_diskset_links();
+extern int copyfile(char *from, char *to);
+extern int get_drv_name(major_t major, char *file_name, char *buf);
+extern int mod_unload(char *modname);
+extern int valid_bootlist(FILE *fp, int line_size);
+extern int convert_bootlist(char *systemfile, char *mdconf, char **tmpfilename);
+extern int write_xlate_to_mdconf(char *rootpath);
+extern int write_targ_nm_table(char *rootpath);
+extern int get_rootmetadevice(char *rootpath, char **devname);
+extern void set_upgrade_prop(char *prop_name, int val);
+extern int is_upgrade_prop(char *prop_name);
+extern int create_in_file_prop(char *prop_name, char *fname);
+extern void debug_printf(char *fmt, ...);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SVM_H */
diff --git a/usr/src/lib/lvm/libsvm/common/metaconf.c b/usr/src/lib/lvm/libsvm/common/metaconf.c
new file mode 100644
index 0000000000..504f38ba73
--- /dev/null
+++ b/usr/src/lib/lvm/libsvm/common/metaconf.c
@@ -0,0 +1,195 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+
+#include <stdio.h>
+#include <ctype.h>
+#include <sys/types.h>
+#include <sys/mkdev.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <dirent.h>
+#include <limits.h>
+#include <string.h>
+#include <libsvm.h>
+#include <svm.h>
+#include <errno.h>
+
+
+#define VERSION "1.0"
+#define DISK_DIR "/dev/rdsk"
+
+extern int _map_to_effective_dev();
+
+int
+is_blankline(char *buf)
+{
+ for (; *buf != 0; buf++) {
+ if (!isspace(*buf))
+ return (0);
+ }
+ return (1);
+}
+
+/*
+ * FUNCTION: write_targ_nm_table
+ * creates a tuple table of <driver name, major number > in md.conf
+ * INPUT: rootpath
+ *
+ * RETURN VALUES:
+ * RET_SUCCESS
+ * RET_ERROR
+ */
+
+int
+write_targ_nm_table(char *path)
+{
+ FILE *targfp = NULL;
+ FILE *mdfp = NULL;
+ char buf[PATH_MAX], *cp;
+ int retval = RET_SUCCESS;
+ int first_entry = 1;
+
+ if ((mdfp = fopen(MD_CONF, "a")) == NULL)
+ return (RET_ERROR);
+
+ (void) snprintf(buf, sizeof (buf), "%s%s", path, NAME_TO_MAJOR);
+
+ if ((targfp = fopen(buf, "r")) == NULL) {
+ (void) fclose(mdfp);
+ return (RET_ERROR);
+ }
+
+ while (fgets(buf, PATH_MAX, targfp) != NULL &&
+ (retval == RET_SUCCESS)) {
+ cp = strrchr(buf, '\n');
+ *cp = 0;
+ if (is_blankline(buf))
+ continue;
+ if (first_entry) {
+ if (fprintf(mdfp, "md_targ_nm_table=\"%s\"", buf) < 0)
+ retval = RET_ERROR;
+ first_entry = 0;
+ }
+ if (fprintf(mdfp, ",\"%s\"", buf) < 0)
+ retval = RET_ERROR;
+ }
+ if (!first_entry)
+ if (fprintf(mdfp, ";\n") < 0)
+ retval = RET_ERROR;
+ (void) fclose(mdfp);
+ (void) fclose(targfp);
+ return (retval);
+}
+
+/*
+ * FUNCTION: write_xlate_to_mdconf
+ * creates a tuple table of <miniroot devt, target devt> in md.conf
+ * INPUT: rootpath
+ *
+ * RETURN VALUES:
+ * RET_SUCCESS
+ * RET_ERROR
+ */
+
+int
+write_xlate_to_mdconf(char *path)
+{
+ FILE *fptr = NULL;
+ struct dirent *dp;
+ DIR *dirp;
+ struct stat statb_dev;
+ struct stat statb_edev;
+ char *devname;
+ char edevname[PATH_MAX];
+ char targname[PATH_MAX];
+ char diskdir[PATH_MAX];
+ int first_devid = 1;
+ int ret = RET_SUCCESS;
+
+ if ((fptr = fopen(MD_CONF, "a")) == NULL) {
+ return (RET_ERROR);
+ }
+
+
+ (void) snprintf(diskdir, sizeof (diskdir), "%s%s", path, DISK_DIR);
+ if ((dirp = opendir(diskdir)) == NULL) {
+ (void) fclose(fptr);
+ return (RET_ERROR);
+ }
+
+ /* special case to write the first tuple in the table */
+ while (((dp = readdir(dirp)) != (struct dirent *)0) &&
+ (ret != RET_ERROR)) {
+ if ((strcmp(dp->d_name, ".") == 0) ||
+ (strcmp(dp->d_name, "..") == 0))
+ continue;
+
+ if ((strlen(diskdir) + strlen(dp->d_name) + 2) > PATH_MAX) {
+ continue;
+ }
+
+ (void) snprintf(targname, sizeof (targname), "%s/%s",
+ diskdir, dp->d_name);
+
+ if (stat(targname, &statb_dev) != 0) {
+ continue;
+ }
+
+ if ((devname = strstr(targname, DISK_DIR)) == NULL) {
+ continue;
+ }
+
+ if (_map_to_effective_dev((char *)devname, (char *)&edevname)
+ != 0) {
+ continue;
+ }
+
+ if (stat(edevname, &statb_edev) != 0) {
+ continue;
+ }
+
+ if (first_devid) {
+ if (fprintf(fptr, "md_xlate_ver=\"%s\";\n"
+ "md_xlate=%lu,%lu", VERSION,
+ statb_edev.st_rdev, statb_dev.st_rdev) < 0)
+ ret = RET_ERROR;
+ first_devid = 0;
+ }
+ if (fprintf(fptr, ",%lu,%lu", statb_edev.st_rdev,
+ statb_dev.st_rdev) < 0)
+ ret = RET_ERROR;
+ } /* end while */
+
+ if (!first_devid)
+ if (fprintf(fptr, ";\n") < 0)
+ ret = RET_ERROR;
+ (void) fclose(fptr);
+ (void) closedir(dirp);
+ return (ret);
+}
diff --git a/usr/src/lib/lvm/libsvm/common/metainterfaces.c b/usr/src/lib/lvm/libsvm/common/metainterfaces.c
new file mode 100644
index 0000000000..20746d4b58
--- /dev/null
+++ b/usr/src/lib/lvm/libsvm/common/metainterfaces.c
@@ -0,0 +1,490 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/vfstab.h>
+#include <meta.h>
+#include <libsvm.h>
+#include <svm.h>
+#include <sdssc.h>
+
+
+extern int mod_unload(char *modname);
+static int inited = 0;
+
+/*
+ * FUNCTION: init_metalib
+ * initialize libmeta only once.
+ *
+ * RETURN VALUES:
+ * 0 - SUCCESS
+ * -1 - FAIL
+ */
+
+static int
+init_metalib()
+{
+ int largc = 1;
+ char *largv = "libsvm";
+ md_error_t status = mdnullerror;
+
+ if (!inited) {
+ if (md_init_nosig(largc, &largv, 0, 1, &status) != 0 ||
+ meta_check_root(&status) != 0) {
+ return (-1);
+ }
+ inited = 1;
+ }
+ return (RET_SUCCESS);
+}
+
+/*
+ * FUNCTION: reset_metalib
+ *
+ * INPUT: ptr to md_error_t
+ */
+
+static void
+reset_metalib(md_error_t *ep)
+{
+ inited = 0;
+ (void) close_admin(ep);
+}
+
+/*
+ * FUNCTION: metahalt
+ * halt the metadb
+ *
+ */
+
+static void
+metahalt()
+{
+ mdsetname_t *sp;
+ md_error_t status = mdnullerror;
+
+ (void) init_metalib();
+ if ((sp = metasetname(MD_LOCAL_NAME, &status)) == NULL) {
+ return;
+ }
+ if (meta_lock(sp, TRUE, &status)) {
+ return;
+ }
+ if (metaioctl(MD_HALT, NULL, &status, NULL) != 0) {
+ debug_printf("metahalt(): errno %d\n",
+ status.info.md_error_info_t_u.sys_error.errnum);
+ }
+ (void) meta_unlock(sp, &status);
+ reset_metalib(&status);
+}
+
+/*
+ * FUNCTION: svm_stop
+ * Halt the SDS/SVM configuration and unload md module.
+ *
+ * RETURN VALUES:
+ * 0 - SUCCESS
+ * RET_ERROR
+ */
+
+#define MAX_TIMEOUT 1800
+int
+svm_stop()
+{
+ int rval = RET_SUCCESS;
+ int timeval = 0;
+ int sleep_int = 5;
+
+ metahalt();
+
+ if ((rval = mod_unload(MD_MODULE)) != 0) {
+ timeval += sleep_int;
+ (void) sleep(sleep_int);
+ while (timeval < MAX_TIMEOUT) {
+ if ((rval = mod_unload(MD_MODULE)) == 0) {
+ debug_printf("svm_stop(): mod_unload succeeded."
+ " Time %d\n", timeval);
+
+ break;
+ }
+
+ debug_printf("svm_stop(): mod_unload failed. Trying "
+ "in %d s (%d)\n", sleep_int, timeval);
+
+ timeval += sleep_int;
+ (void) sleep(sleep_int);
+ metahalt();
+ }
+
+ if (rval != 0) {
+ rval = RET_ERROR;
+ debug_printf("svm_stop(): mod_unload FAILED!\n");
+ }
+ }
+
+ return (rval);
+}
+
+/*
+ * FUNCTION: get_rootmetadevice
+ * parses the vfstab to return the metadevice
+ *
+ * INPUT:
+ * mount point
+ * mdname - pointer to string pointer that will contain the
+ * metadevice name. Caller must free the allocated space.
+ * RETURN VALUES:
+ * mdname - md root device name
+ * 0 - SUCCESS
+ * !0 - FAIL
+ * > 0 errno
+ * RET_ERROR
+ */
+
+int
+get_rootmetadevice(char *mntpath, char **mdname)
+{
+ struct vfstab v;
+ FILE *fp;
+ int rval = RET_SUCCESS;
+ char *cp;
+ char vfstab_name[PATH_MAX + 1];
+
+ if (mdname == NULL)
+ return (EINVAL);
+
+ *mdname = NULL;
+
+ if (snprintf(vfstab_name, PATH_MAX + 1, "%s%s", mntpath, VFSTAB) < 0)
+ return (ENOMEM);
+
+ debug_printf("get_rootmetadevice(): mntpath %s %s\n", mntpath,
+ vfstab_name);
+
+ if ((fp = fopen(vfstab_name, "r")) == NULL) {
+ rval = errno;
+ return (rval);
+ }
+
+ if ((rval = getvfsfile(fp, &v, ROOT_MNTPT)) != 0) {
+ goto out;
+ }
+
+
+ debug_printf("get_rootmetadevice(): vfs_special %s\n", v.vfs_special);
+ if (strstr(v.vfs_special, ROOT_METADEVICE) == NULL) {
+ /* md device not found */
+ rval = RET_ERROR;
+ goto out;
+ }
+
+ /* found a match fill it and return */
+ cp = v.vfs_special + strlen(ROOT_METADEVICE);
+
+ *mdname = (char *)malloc(strlen(cp) + 1);
+
+ if (*mdname == NULL) {
+ rval = ENOMEM;
+ goto out;
+ }
+ (void) strcpy(*mdname, cp);
+ debug_printf("get_rootmetadevice(): *mdname %s rval %d\n",
+ *mdname, rval);
+out:
+ (void) fclose(fp);
+ return (rval);
+}
+
+/*
+ * FUNCTION: create_diskset_links
+ * Create the diskset name symlinks in /dev/md from the diskset
+ * names found in the set records. These are normally created
+ * in rpc.metad when you create the set but those symlinks are
+ * sitting out on the real system disk and we're running off the
+ * devfs that got created when we booted off the install image.
+ */
+
+void
+create_diskset_links()
+{
+ int max_sets;
+ int i;
+ md_error_t error = mdnullerror;
+
+ /*
+ * Resolve the function pointers for libsds_sc so that we can
+ * snarf the set records.
+ */
+ (void) sdssc_bind_library();
+ (void) init_metalib();
+
+ if ((max_sets = get_max_sets(&error)) == 0) {
+ debug_printf("create_diskset_links(): get_max_sets failed\n");
+ mdclrerror(&error);
+ return;
+ }
+
+ for (i = 1; i < max_sets; i++) {
+ md_set_record *sr;
+ char setname[MAXPATHLEN];
+ char setnum[MAXPATHLEN];
+
+ if ((sr = metad_getsetbynum(i, &error)) == NULL) {
+ mdclrerror(&error);
+ continue;
+ }
+
+ (void) snprintf(setname, MAXPATHLEN, "/dev/md/%s",
+ sr->sr_setname);
+ (void) snprintf(setnum, MAXPATHLEN, "shared/%d", i);
+ /*
+ * Ignore failures to create the symlink. This could
+ * happen because suninstall is restartable so the
+ * symlink might have already been created.
+ */
+ (void) symlink(setnum, setname);
+ }
+}
+
+/*
+ * FUNCTION: svm_alloc
+ * Return a pointer to an opaque piece of zeroed memory.
+ *
+ * RETURN VALUES:
+ * Non null - SUCCESS
+ * NULL - FAIL
+ */
+
+svm_info_t *
+svm_alloc()
+{
+ return ((svm_info_t *)calloc(1, sizeof (svm_info_t)));
+}
+
+/*
+ * FUNCTION: svm_free
+ *
+ * INPUT: pointer to struct svm_info
+ */
+
+void
+svm_free(svm_info_t *svmp)
+{
+ int i;
+
+ if (svmp == NULL)
+ return;
+
+ for (i = 0; i < svmp->count; i++) {
+ free(svmp->md_comps[i]);
+ }
+ free(svmp->root_md);
+ free(svmp);
+}
+
+/*
+ * FUNCTION: get_mdcomponents
+ * Given "uname" metadevice, return the physical components
+ * of that metadevice.
+ *
+ * INPUT:
+ * uname - metadevice name
+ *
+ * RETURN VALUES:
+ * svmp - structure containing md name and components
+ * RET_SUCCESS
+ * RET_ERROR
+ *
+ */
+
+int
+get_mdcomponents(char *uname, svm_info_t **svmpp)
+{
+
+ svm_info_t *svmp;
+ md_error_t status, *ep;
+ mdname_t *namep;
+ mdnamelist_t *nlp = NULL;
+ mdnamelist_t *p;
+ mdsetname_t *sp = NULL;
+ char *strp = NULL;
+ int rval, cnt;
+
+ rval = RET_SUCCESS;
+ cnt = 0;
+ status = mdnullerror;
+ ep = &status;
+ svmp = *svmpp;
+
+ (void) init_metalib();
+
+ debug_printf("get_mdcomponents(): Enter unit name %s\n", uname);
+
+ if (((namep = metaname(&sp, uname, ep)) == NULL) ||
+ (metachkmeta(namep, ep) != 0)) {
+ debug_printf("get_mdcomponents(): "
+ "metaname or metachkmeta failed\n");
+ mdclrerror(ep);
+ return (RET_ERROR);
+ }
+
+ debug_printf("get_mdcomponents(): meta_getdevs %s\n", namep->cname);
+
+ if ((meta_getdevs(sp, namep, &nlp, ep)) < 0) {
+ debug_printf("get_mdcomponents(): "
+ "comp %s - meta_getdevs failed\n", uname);
+ metafreenamelist(nlp);
+ mdclrerror(ep);
+ return (RET_ERROR);
+ }
+
+ /* compute the number of devices */
+
+ for (p = nlp, cnt = 0; p != NULL; p = p->next, cnt++)
+ ;
+
+ /*
+ * Need to add n -1 components since slvmp already has space
+ * for one device.
+ */
+
+ svmp = (svm_info_t *)realloc(svmp, sizeof (svm_info_t) +
+ (sizeof (char *) * (cnt - 1)));
+
+ if (svmp == NULL) {
+ debug_printf("get_mdcomponents(): realloc of svmp failed\n");
+ metafreenamelist(nlp);
+ return (RET_ERROR);
+ }
+
+
+ for (p = nlp, cnt = 0; p != NULL; p = p->next, cnt++) {
+ mdname_t *devnp = p->namep;
+
+ if ((strp = strdup(devnp->cname)) == NULL) {
+ rval = RET_ERROR;
+ break;
+ }
+ svmp->md_comps[cnt] = strp;
+ }
+
+ /* count is set to the number of devices in the list */
+
+ svmp->count = cnt;
+ svmp->root_md = strdup(uname);
+ if (rval == RET_SUCCESS && svmp->root_md != NULL) {
+ debug_printf("get_mdcomponents(): root_md %s count %d \n",
+ svmp->root_md, svmp->count);
+ for (cnt = 0; cnt < svmp->count; cnt++)
+ debug_printf("get_mdcomponents(): %s\n",
+ svmp->md_comps[cnt]);
+ } else {
+ rval = RET_ERROR;
+ svm_free(svmp);
+ svmp = NULL;
+ debug_printf("get_mdcomponents(): malloc failed\n");
+
+ }
+
+
+ metafreenamelist(nlp);
+ *svmpp = svmp;
+ return (rval);
+}
+
+
+/*
+ * FUNCTION: svm_get_components
+ * return svm_infop with the components of a metadevice.
+ *
+ * INPUT:
+ * md_device - eg. /dev/md/dsk/d10, /dev/md/foo/dsk/d10, or
+ * /dev/md/shared/1/dsk/d10
+ *
+ * RETURN:
+ * 0 - SUCCESS
+ * !0 - FAIL
+ */
+
+int
+svm_get_components(char *md_device, svm_info_t **svmpp)
+{
+ int len;
+
+ /*
+ * If this is a named diskset with a shared name
+ * (e.g. /dev/md/shared/1/dsk/d10) call get_mdcomponents with
+ * the diskset and metadevice name (e.g. foo/d10).
+ * Otherwise this is a regular name (e.g. /dev/md/dsk/d10 or
+ * /dev/md/foo/dsk/d10 or d10 or foo/d10) all of which
+ * get_mdcomponents can handle directly.
+ */
+
+ len = strlen("/dev/md/shared/");
+ if (strncmp(md_device, "/dev/md/shared/", len) == 0) {
+ int numlen;
+ int setnum;
+ char *cp;
+ char *slashp;
+ char mdname[MAXPATHLEN];
+ mdsetname_t *sp;
+ md_error_t error = mdnullerror;
+
+ cp = md_device + len;
+
+ if ((slashp = strstr(cp, "/")) == NULL)
+ return (RET_ERROR);
+ numlen = slashp - cp;
+ if (numlen >= MAXPATHLEN - 1)
+ return (RET_ERROR);
+
+ (void) strlcpy(mdname, cp, numlen + 1);
+ /* setnum now contains the diskset number */
+ setnum = atoi(mdname);
+ if ((sp = metasetnosetname(setnum, &error)) == NULL ||
+ !mdisok(&error))
+ return (RET_ERROR);
+
+ cp = slashp + 1;
+ /* cp now pointing at dsk/... */
+ if ((slashp = strstr(cp, "/")) == NULL)
+ return (RET_ERROR);
+
+ (void) snprintf(mdname, MAXPATHLEN, "%s/%s", sp->setname,
+ slashp + 1);
+ /* mdname now contains diskset and metadevice name e.g. foo/d10 */
+
+ debug_printf("svm_get_components(): mdname %s\n", mdname);
+ return (get_mdcomponents(mdname, svmpp));
+
+ } else {
+ debug_printf("svm_get_components(): md_device %s\n", md_device);
+ return (get_mdcomponents(md_device, svmpp));
+ }
+}
diff --git a/usr/src/lib/lvm/libsvm/common/modops.c b/usr/src/lib/lvm/libsvm/common/modops.c
new file mode 100644
index 0000000000..78914a9069
--- /dev/null
+++ b/usr/src/lib/lvm/libsvm/common/modops.c
@@ -0,0 +1,120 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+#include <sys/errno.h>
+#include <sys/modctl.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <svm.h>
+
+/*
+ * FUNCTION: get modid
+ * Given a module name returns module id.
+ *
+ * INPUT: module name
+ *
+ * RETURN VALUES:
+ * > 0 SUCCESS
+ * -1 FAIL
+ */
+
+static int
+get_modid(char *modname)
+{
+ struct modinfo modinfo;
+ int id;
+ int rval = RET_ERROR;
+
+ id = -1; /* look for all modules */
+
+ modinfo.mi_id = modinfo.mi_nextid = id;
+ modinfo.mi_info = MI_INFO_ALL | MI_INFO_NOBASE;
+
+ do {
+ if (modctl(MODINFO, id, &modinfo) < 0)
+ break;
+
+ modinfo.mi_name[MODMAXNAMELEN - 1] = '\0';
+ /* if we find a match break out */
+ if (strcmp(modinfo.mi_name, modname) == 0) {
+ rval = modinfo.mi_id;
+ break;
+ }
+ /* LINTED */
+ } while (1);
+
+ return (rval);
+}
+
+/*
+ * FUNCTION: mod_unload
+ * unload a module.
+ *
+ * INPUT: module name
+ *
+ * RETURN VALUES:
+ * 0 - SUCCESS
+ * !0 - FAIL
+ * > 0 errno
+ * -1
+ * NOTE: If we fail to get the module id because the module is not
+ * currently loaded we still want to try to force a reload of the
+ * .conf file when it does load.
+ */
+int
+mod_unload(char *modname)
+{
+ int id;
+ major_t major;
+ int rval = RET_SUCCESS;
+
+ id = get_modid(modname);
+
+ if (id != -1) {
+ if (modctl(MODUNLOAD, id) < 0) {
+ rval = errno;
+ }
+ }
+
+ if ((modctl(MODGETMAJBIND, modname, strlen(modname) + 1,
+ &major)) != 0) {
+ return (errno);
+ }
+
+ if ((modctl(MODUNLOADDRVCONF, major) != 0) ||
+ (modctl(MODLOADDRVCONF, major) != 0)) {
+ return (errno);
+ }
+
+ return (rval);
+}
diff --git a/usr/src/lib/lvm/libsvm/common/start_svm.c b/usr/src/lib/lvm/libsvm/common/start_svm.c
new file mode 100644
index 0000000000..f423d4f418
--- /dev/null
+++ b/usr/src/lib/lvm/libsvm/common/start_svm.c
@@ -0,0 +1,284 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <ctype.h>
+#include <malloc.h>
+#include <stdio.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/param.h>
+#include <limits.h>
+#include <meta.h>
+#include <svm.h>
+#include <libsvm.h>
+
+#define MODEBITS (S_ISUID|S_ISGID|S_ISVTX|S_IRWXU|S_IRWXG|S_IRWXO)
+#define ISREG(A) (((A).st_mode & S_IFMT) == S_IFREG)
+#define DEFAULT_ROOTDIR "/a"
+
+
+/*
+ * FUNCTION: svm_start
+ * starts SDS/SVM configuration. If root mirroring exists then the
+ * components of the root mirror are returned in svmpp.
+ *
+ * INPUT: mntpnt - root mount point
+ * svmpp - prealloced structure to return components
+ * repl_state_flag - SVM_CONV/SVM_DONT_CONV
+ *
+ * RETURN:
+ * 0 - SUCCESS
+ * !0 - ERROR
+ * if > 0 errno
+ */
+
+int
+svm_start(char *mntpnt, svm_info_t **svmpp, int repl_state_flag)
+{
+ char *rootdir, *tf;
+ char *mdevnamep = NULL;
+ char system_file[PATH_MAX];
+ char mdconf[PATH_MAX];
+ int rval = 0;
+
+ if (mntpnt == NULL)
+ rootdir = DEFAULT_ROOTDIR;
+ else
+ rootdir = mntpnt;
+
+ if ((rval = snprintf(system_file, PATH_MAX, "%s%s",
+ rootdir, SYSTEM_FILE)) < 0) {
+ return (RET_ERROR);
+ }
+
+ if ((rval = snprintf(mdconf, PATH_MAX, "%s%s",
+ rootdir, MD_CONF)) < 0) {
+ return (RET_ERROR);
+ }
+
+ debug_printf("svm_start(): repl_state_flag %s\n",
+ (repl_state_flag == SVM_DONT_CONV) ? "SVM_DONT_CONV":
+ "SVM_CONV");
+
+ if (copyfile(MD_CONF, MD_CONF_ORIG))
+ return (RET_ERROR);
+
+ switch (rval = convert_bootlist(system_file, mdconf, &tf)) {
+ case 0:
+ case -1: /* found in etc/system flag */
+ break;
+ default: /* convert bootlist failed */
+ debug_printf("svm_start(): convert_bootlist failed."
+ "rval %d\n", rval);
+ goto errout;
+ }
+
+ if (repl_state_flag == SVM_DONT_CONV) {
+ rval = create_in_file_prop(PROP_KEEP_REPL_STATE, tf);
+ if (rval != 0)
+ goto errout;
+ }
+
+ if (is_upgrade_prop(PROP_DEVID_DESTROY)) {
+ rval = create_in_file_prop(PROP_DEVID_DESTROY, tf);
+ /*
+ * For the idempotent behavior reset internal
+ * flag incase we have to return due to errors
+ */
+ set_upgrade_prop(PROP_DEVID_DESTROY, 0);
+ if (rval != 0)
+ goto errout;
+ }
+
+
+ /*
+ * Since svm_start is called only after svm_check,
+ * we can assume that there is a valid metadb. If the mddb_bootlist
+ * is not found in etc/system, then it must be in md.conf which
+ * we copied to temporary file pointed to by tf
+ */
+ if (copyfile(tf, MD_CONF)) {
+ debug_printf("svm_start(): copy of %s to %s failed\n", tf,
+ MD_CONF);
+ goto errout;
+ }
+
+ if ((rval = write_xlate_to_mdconf(rootdir)) != 0) {
+ debug_printf("svm_start(): write_xlate_to_mdconf(%s) failed\n",
+ rootdir);
+ goto errout;
+ }
+
+ if ((rval = write_targ_nm_table(rootdir)) != 0) {
+ goto errout;
+ }
+
+ /* run devfsadm to create the devices specified in md.conf */
+ if ((rval = system("/usr/sbin/devfsadm -r /tmp -p "
+ "/tmp/root/etc/path_to_inst -i md")) != 0) {
+ debug_printf("svm_start(): devfsadm -i md failed: %d\n", rval);
+ goto errout;
+ }
+
+ /*
+ * We have to unload md after the devfsadm run so that when metainit
+ * loads things it gets the right information from md.conf.
+ */
+ if (rval = svm_stop()) {
+ debug_printf("svm_start(): svm_stop failed.\n");
+ return (RET_ERROR);
+ }
+
+ if ((rval = system("/usr/sbin/metainit -r")) != 0) {
+ debug_printf("svm_start(): metainit -r failed: %d\n", rval);
+ goto errout;
+ }
+
+ create_diskset_links();
+
+ if ((rval = system("/usr/sbin/metasync -r")) != 0) {
+ debug_printf("svm_start(): metasync -r failed: %d\n", rval);
+ goto errout;
+ }
+
+ /*
+ * We ignore failures from metadevadm, since it can fail if
+ * miniroot dev_t's don't match target dev_ts. But it still
+ * will update md.conf with device Id information which is
+ * why we are calling it here.
+ */
+
+ (void) system("/usr/sbin/metadevadm -r");
+
+ /*
+ * check to see if we have a root metadevice and if so
+ * get its components.
+ */
+
+ if ((rval = get_rootmetadevice(rootdir, &mdevnamep)) == 0) {
+ if (rval = get_mdcomponents(mdevnamep, svmpp)) {
+ debug_printf("svm_start(): get_mdcomponents(%s,..)"
+ "failed %d\n", mdevnamep, rval);
+ goto errout;
+ }
+
+ } else {
+ rval = 0; /* not a mirrored root */
+ debug_printf("svm_start(): get_rootmetadevice(%s,..) "
+ "No root mirrors! ", rootdir);
+ }
+errout:
+ free(mdevnamep);
+ if (rval != 0) {
+ struct stat sbuf;
+ if (stat(MD_CONF_ORIG, &sbuf) == 0)
+ (void) copyfile(MD_CONF_ORIG, MD_CONF);
+ debug_printf("svm_start(): svm_start failed: %d\n", rval);
+ } else {
+ int i;
+
+ if ((*svmpp)->count > 0) {
+ debug_printf("svmpp: ");
+ debug_printf(" root_md: %s", (*svmpp)->root_md);
+ debug_printf(" count: %d", (*svmpp)->count);
+ for (i = 0; i < (*svmpp)->count; i++) {
+ debug_printf(" md_comps[%d]: %s", i,
+ (*svmpp)->md_comps[i]);
+ }
+ debug_printf(" \n");
+ } else {
+ if ((*svmpp)->count == 0)
+ debug_printf("svm_start(): no mirrored root\n");
+ }
+ debug_printf("svm_start(): svm_start succeeded.\n");
+ }
+ return (rval);
+}
+
+/*
+ * FUNCTION: copyfile
+ *
+ * INPUT: self descriptive
+ *
+ * RETURN:
+ * RET_SUCCESS
+ * RET_ERROR
+ */
+int
+copyfile(char *from, char *to)
+{
+ int fromfd, tofd;
+ char buf[1024];
+ ssize_t rbytes;
+ struct stat fromstat;
+
+ if ((fromfd = open(from, O_RDONLY | O_NDELAY)) < 0)
+ return (RET_ERROR);
+
+ if ((fstat(fromfd, &fromstat) < 0) || ! ISREG(fromstat)) {
+ (void) close(fromfd);
+ return (RET_ERROR);
+ }
+
+ if ((tofd = open(to, O_CREAT | O_WRONLY | O_TRUNC,
+ (fromstat.st_mode & MODEBITS))) < 0) {
+ (void) close(fromfd);
+ return (RET_ERROR);
+ }
+
+ /*
+ * in case the file exists then perm is forced by this chmod
+ */
+ (void) fchmod(tofd, fromstat.st_mode & MODEBITS);
+
+ for (;;) {
+ rbytes = read(fromfd, buf, sizeof (buf));
+ /*
+ * no need to check for negative values since the file
+ * has been successfully stat'ed
+ */
+ if (rbytes == 0)
+ break;
+ if (write(tofd, buf, rbytes) != rbytes) {
+ rbytes = -1;
+ break;
+ }
+ }
+
+ (void) close(fromfd);
+ (void) close(tofd);
+ if (rbytes < 0) {
+ (void) unlink(to);
+ return (RET_ERROR);
+ }
+ return (RET_SUCCESS);
+}
diff --git a/usr/src/lib/lvm/libsvm/common/update_mdconf.c b/usr/src/lib/lvm/libsvm/common/update_mdconf.c
new file mode 100644
index 0000000000..f757648911
--- /dev/null
+++ b/usr/src/lib/lvm/libsvm/common/update_mdconf.c
@@ -0,0 +1,379 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <devid.h>
+#include <errno.h>
+#include <string.h>
+#include <assert.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <meta.h>
+#include <libsvm.h>
+#include <svm.h>
+
+/*
+ * magic strings in system
+ */
+#define BEGMDDBSTR "* Begin MDD database info (do not edit)\n"
+#define ENDMDDBSTR "* End MDD database info (do not edit)\n"
+#define NEW_BEGMDDBSTR "# Begin MDD database info (do not edit)\n"
+#define NEW_ENDMDDBSTR "# End MDD database info (do not edit)\n"
+
+#define MDDBBOOTLIST "mddb_bootlist"
+
+#define SYS_COMMENTCHAR '*'
+#define CONF_COMMENTCHAR '#'
+
+typedef struct {
+ char *prop_name;
+ int prop_val;
+} md_prop_t;
+
+typedef enum {
+ MDDB_SYS_FILE,
+ MDDB_MDCONF_FILE
+} ftype_t;
+
+static md_prop_t upgrade_props[] = {
+ { PROP_KEEP_REPL_STATE, 0 },
+ { PROP_DEVID_DESTROY, 0},
+ { NULL, 0}
+};
+
+/*
+ * The following functions manage upgrade properties
+ */
+
+void
+set_upgrade_prop(char *prop_name, int val)
+{
+ md_prop_t *upp;
+
+ upp = &upgrade_props[0];
+
+ for (; upp->prop_name != NULL; upp++) {
+ if (strcmp(upp->prop_name, prop_name) == 0) {
+ upp->prop_val = val;
+ return;
+ }
+ }
+}
+
+int
+is_upgrade_prop(char *prop_name)
+{
+ md_prop_t *upp;
+
+ upp = &upgrade_props[0];
+
+ for (; upp->prop_name != NULL; upp++) {
+ if (strcmp(upp->prop_name, prop_name) == 0) {
+ return (upp->prop_val == 1);
+ }
+ }
+ return (0);
+}
+
+int
+create_in_file_prop(char *prop_name, char *fname)
+{
+ FILE *fp;
+ md_prop_t *upp;
+ int rval = RET_ERROR;
+
+ if ((fp = fopen(fname, "a")) == NULL) {
+ return (errno);
+ }
+
+ upp = &upgrade_props[0];
+
+ for (; upp->prop_name != NULL; upp++) {
+ if (strcmp(upp->prop_name, prop_name) == 0) {
+ (void) fprintf(fp, "%s = 1;\n", upp->prop_name);
+ rval = RET_SUCCESS;
+ break;
+ }
+ }
+ (void) fclose(fp);
+ return (rval);
+}
+
+static int
+is_devid_added(char *str)
+{
+ int cnt = 0;
+ char *cp;
+
+ /* there are exactly 3 colons in the string for devid */
+ for (cnt = 0; cnt < 4; cnt++) {
+ if ((cp = strchr(str, ':')) == NULL)
+ break;
+ str = ++cp;
+ }
+ return (cnt == 3);
+}
+
+/*
+ * FUNCTION: parse_bootlist
+ * Parse the bootlist and add the extra field to mddb_boolist entry to
+ * conform to devid changes.
+ *
+ * Old format: <drivername>:<minor_number>:<offset>
+ * New format: <drivername>:<minor_number>:<offset>:<devid>
+ * Devid of id0 implies no device id.
+ *
+ * INPUT: *line - contains the mddb_bootlist
+ * *tfp - File pointer to the md.conf.tmp file.
+ *
+ * RETURN:
+ * 0 - Success
+ * > 0 - Failure. Errno returned
+ */
+
+static int
+parse_bootlist(char *line, FILE *tfp)
+{
+ char output[1024];
+ char *cp;
+ int retval = RET_SUCCESS;
+
+ (void) memset(output, 0, sizeof (output));
+
+ if (line[0] == SYS_COMMENTCHAR) {
+ output[0] = CONF_COMMENTCHAR;
+ }
+ /* move the line start of mddbbootlist */
+ cp = strstr(line, MDDBBOOTLIST);
+ if (cp != NULL)
+ line = cp;
+
+ /* grab the "mddb_boolist" word */
+ cp = strtok(line, "= ");
+ (void) strcat(output, cp);
+ (void) strcat(output, "=\042"); /* add back the EQUAL and QUOTE chars */
+
+ /*
+ * The line passed in is for example,
+ * mddb_bootlist1="sd:7:16:id1,sd@SIBM_DDRS34560SUN4.2G2N9688_____/h";
+ * At this point mddb_bootlist and "=" have been parsed out.
+ * The remaining string consists of driver name, colon separator and
+ * the device id(if it exists) within quotes.
+ * The deviceid string can contain upper and lower letters, digits
+ * and +-.=_~. Quotes, spaces and \n and \t are not
+ * allowed. They are converted to either _ or their ascii value.
+ * So using space,\n,;and quotes as a separator is safe.
+ */
+
+ while ((cp = strtok(NULL, " \n\042;")) != NULL) {
+ (void) strcat(output, cp);
+ if (!is_devid_added(cp)) {
+ /* append :id0 for devid */
+ (void) strcat(strcat(output, ":"),
+ devid_str_encode(NULL, NULL));
+
+ /* no devid => SDS->SLVM migration. Set the flag */
+ set_upgrade_prop(PROP_DEVID_DESTROY, 1);
+ }
+ (void) strcat(output, " "); /* leave space between entries */
+ }
+
+ /* remove the extra space at the end */
+ output[strlen(output) - 1] = 0;
+ (void) strcat(output, "\042;\n");
+ if (fprintf(tfp, "%s", output) < 0) {
+ retval = errno;
+ }
+ return (retval);
+}
+
+/*
+ * FUNCTION: snarf_n_modify_bootlist
+ * This function stuffs the mddb_bootlist from either etc/system
+ * or kernel/drv/md.conf of the target system into a temporary file tname.
+ * The boolist in the temporary file is in device ID format.
+ *
+ * INPUT: *fp - file pointer that contains the mddb_bootlist.
+ * *tname - file into which the modified bootlist will be written to.
+ * * buf - buffer handed by upper level routine for reading in contents.
+ * * bufsiz - size of the buffer.
+ * mddb_file - flag
+ *
+ * RETURN:
+ * 0 - Success
+ * > 0 - Failure. Errno returned.
+ */
+
+static int
+snarf_n_modify_bootlist(
+ FILE *fp, /* File pointer to snarf from */
+ char *tname, /* name of the temporary file */
+ char *buf, /* Buffer to read into */
+ int bufsz, /* buffer size */
+ ftype_t mddb_file /* flag to indicate if its /etc/system or md.conf */
+)
+{
+ FILE *tfp;
+ int rval = RET_SUCCESS;
+ char *fname = SYSTEM_FILE;
+ char *mddb_start = BEGMDDBSTR;
+ char *mddb_end = ENDMDDBSTR;
+ convflag_t cstatus = MD_STR_NOTFOUND;
+
+ if (mddb_file == MDDB_MDCONF_FILE) {
+ fname = MD_CONF;
+ mddb_start = NEW_BEGMDDBSTR;
+ mddb_end = NEW_ENDMDDBSTR;
+ }
+
+ if ((tfp = fopen(tname, "a")) == NULL)
+ return (errno);
+ debug_printf("Convert from %s\n", fname);
+
+ rewind(fp);
+ while (fgets(buf, bufsz, fp) != NULL) {
+ if (strcmp(buf, mddb_start) == 0) {
+ cstatus = MD_STR_START;
+ if (fprintf(tfp, "%s", NEW_BEGMDDBSTR) < 0) {
+ rval = errno;
+ break;
+ }
+ continue;
+ }
+ if (cstatus == MD_STR_START) {
+ if (strcmp(buf, mddb_end) == 0) {
+ cstatus = MD_STR_DONE;
+ if (fprintf(tfp, "%s", NEW_ENDMDDBSTR) < 0) {
+ rval = errno;
+ break;
+ }
+
+ if (mddb_file == MDDB_MDCONF_FILE)
+ continue;
+ else
+ break;
+ }
+
+ rval = parse_bootlist(buf, tfp);
+ if (rval == RET_SUCCESS)
+ continue;
+ else
+ break;
+ }
+ if (mddb_file == MDDB_MDCONF_FILE) {
+ if (fprintf(tfp, "%s\n", buf) < 0) {
+ rval = errno;
+ break;
+ }
+ }
+
+ } /* while (fgets */
+
+ if (cstatus == MD_STR_NOTFOUND || cstatus == MD_STR_START)
+ rval = RET_ERROR;
+ (void) fclose(tfp);
+ return (rval);
+}
+
+
+/*
+ * FUNCTION: convert_bootlist
+ * Get the bootlist from $ROOT/etc/system and add modified bootlist to
+ * md.conf.
+ * The function converts the mddb_boolist format from that in /etc/system
+ * to md.conf. Also new fields are added to handle the devid id format.
+ * A copy of md.conf is created and the new entries are added to it.
+ * The name of the new file is returned to the calling program.
+ *
+ * Input: system file name
+ * md.conf file name
+ * pointer to temp file name.
+ * RETURN:
+ * *tname - name of the file that has md.conf + new mddb_boolist entries
+ * 0 - success
+ * -1 - mddb_bootlist not found
+ * > 0 - errno
+ *
+ */
+
+int
+convert_bootlist(
+ char *sname, /* system file name */
+ char *mdconf, /* md.conf file name */
+ char **tname /* temp file name */
+)
+{
+ FILE *fp;
+ char cmd_buf[MDDB_BOOTLIST_MAX_LEN];
+ int retval = RET_SUCCESS;
+
+ /* check names */
+ assert(sname != NULL);
+ assert(tname != NULL);
+
+ /* get temp name */
+ *tname = tmpnam(NULL);
+
+ if ((fp = fopen(sname, "r")) == NULL) {
+ retval = errno;
+ goto out;
+ }
+ if (valid_bootlist(fp, MDDB_BOOTLIST_MAX_LEN) == RET_SUCCESS) {
+ if ((retval = copyfile(mdconf, *tname)) == RET_ERROR) {
+ debug_printf("convert_bootlist: copy %s %s failed\n",
+ mdconf, *tname);
+ goto out;
+ }
+ retval = snarf_n_modify_bootlist(fp, *tname, cmd_buf,
+ MDDB_BOOTLIST_MAX_LEN, MDDB_SYS_FILE);
+ } else {
+ (void) fclose(fp); /* close system file */
+ if ((fp = fopen(mdconf, "r")) == NULL) {
+ retval = errno;
+ goto out;
+ }
+ if (valid_bootlist(fp, MDDB_BOOTLIST_MAX_LEN) == RET_ERROR) {
+ retval = RET_ERROR;
+ goto out;
+ }
+ retval = snarf_n_modify_bootlist(fp, *tname, cmd_buf,
+ MDDB_BOOTLIST_MAX_LEN, MDDB_MDCONF_FILE);
+ }
+out:
+ debug_printf("convert_bootlist: retval %d\n", retval);
+ if (fp != NULL)
+ (void) fclose(fp);
+
+ if ((retval != RET_SUCCESS) && (*tname != NULL)) {
+ (void) unlink(*tname);
+ free(*tname);
+ }
+ return (retval);
+}
diff --git a/usr/src/lib/lvm/libsvm/i386/Makefile b/usr/src/lib/lvm/libsvm/i386/Makefile
new file mode 100644
index 0000000000..4fc9526b2a
--- /dev/null
+++ b/usr/src/lib/lvm/libsvm/i386/Makefile
@@ -0,0 +1,31 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License"). You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright (c) 2001 by Sun Microsystems, Inc.
+# All rights reserved.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+include ../Makefile.com
+
+install debug: $(ROOTLIBDIR) $(ROOTLIBS) $(ROOTLINKS)
diff --git a/usr/src/lib/lvm/libsvm/sparc/Makefile b/usr/src/lib/lvm/libsvm/sparc/Makefile
new file mode 100644
index 0000000000..4fc9526b2a
--- /dev/null
+++ b/usr/src/lib/lvm/libsvm/sparc/Makefile
@@ -0,0 +1,31 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License"). You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright (c) 2001 by Sun Microsystems, Inc.
+# All rights reserved.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+include ../Makefile.com
+
+install debug: $(ROOTLIBDIR) $(ROOTLIBS) $(ROOTLINKS)
diff --git a/usr/src/lib/lvm/libsvm/spec/Makefile b/usr/src/lib/lvm/libsvm/spec/Makefile
new file mode 100644
index 0000000000..7256a09a20
--- /dev/null
+++ b/usr/src/lib/lvm/libsvm/spec/Makefile
@@ -0,0 +1,28 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License"). You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#pragma ident "%Z%%M% %I% %E% SMI"
+#
+# Copyright (c) 2001 by Sun Microsystems, Inc.
+# All rights reserved.
+#
+
+include $(SRC)/lib/Makefile.spec.arch
diff --git a/usr/src/lib/lvm/libsvm/spec/Makefile.targ b/usr/src/lib/lvm/libsvm/spec/Makefile.targ
new file mode 100644
index 0000000000..3a5ed0eb26
--- /dev/null
+++ b/usr/src/lib/lvm/libsvm/spec/Makefile.targ
@@ -0,0 +1,41 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License"). You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#pragma ident "%Z%%M% %I% %E% SMI"
+#
+# Copyright (c) 2001 by Sun Microsystems, Inc.
+# All rights reserved.
+#
+
+LIBRARY = libsvm.a
+VERS = .1
+
+OBJECTS = svm.o
+
+TRANSCPP =
+
+SPECCPP = -I.. -I../../inc
+
+#
+# usr/snadm/lib/abi targets
+#
+SVMLIB_ABILIB= $(SNADMINLIB_ABI)/$(ABILIB)
+SNADMINLIB_ABI=$(ROOT)/usr/snadm/lib/abi
diff --git a/usr/src/lib/lvm/libsvm/spec/amd64/Makefile b/usr/src/lib/lvm/libsvm/spec/amd64/Makefile
new file mode 100644
index 0000000000..a3a067f7a0
--- /dev/null
+++ b/usr/src/lib/lvm/libsvm/spec/amd64/Makefile
@@ -0,0 +1,50 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License"). You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+.KEEP_STATE:
+
+# To enable apptrace, comment out the following line
+DISABLE_APPTRACE= $(POUND_SIGN)
+
+include ../Makefile.targ
+
+# Add arch specific objects here
+OBJECTS +=
+
+include $(SRC)/lib/Makefile.lib
+
+# Uncomment the following if the linker complains
+#amd64_C_PICFLAGS = $(amd64_C_BIGPICFLAGS)
+
+include $(SRC)/lib/Makefile.spec
+
+$(DISABLE_APPTRACE)install: $(SPECMAP) $(ABILIB)
+$(DISABLE_APPTRACE) $(INS) -s -d -m $(DIRMODE) -u $(OWNER) \
+$(DISABLE_APPTRACE) -g $(GROUP) $(SNADMINLIB_ABI)/amd64
+$(DISABLE_APPTRACE) $(INS) -s -m $(FILEMODE) -u $(OWNER) \
+$(DISABLE_APPTRACE) -g $(GROUP) -f $(SNADMINLIB_ABI)/amd64 $(ABILIB)
diff --git a/usr/src/lib/lvm/libsvm/spec/i386/Makefile b/usr/src/lib/lvm/libsvm/spec/i386/Makefile
new file mode 100644
index 0000000000..23807a39c5
--- /dev/null
+++ b/usr/src/lib/lvm/libsvm/spec/i386/Makefile
@@ -0,0 +1,53 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License"). You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+#ident "%Z%%M% %I% %E% SMI"
+#
+# Copyright 2001-2003 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+.KEEP_STATE:
+
+# To enable apptrace, comment out the following line
+DISABLE_APPTRACE= $(POUND_SIGN)
+
+include ../Makefile.targ
+
+# Add arch specific objects here
+OBJECTS +=
+
+include $(SRC)/lib/Makefile.lib
+
+# Uncomment the following if the linker complains
+#i386_C_PICFLAGS = -K PIC
+
+include $(SRC)/lib/Makefile.spec
+
+ROOTABILIB= $(SNADMINLIB_ABI)/$(ABILIB)
+
+$(ROOTABILIB): $(SNADMINLIB_ABI) $(SPECMAP)
+
+$(ROOTABILIB): $(ABILIB)
+ $(INS.file)
+
+$(DISABLE_APPTRACE)install: $(ROOTABILIB)
diff --git a/usr/src/lib/lvm/libsvm/spec/sparc/Makefile b/usr/src/lib/lvm/libsvm/spec/sparc/Makefile
new file mode 100644
index 0000000000..19aecc452c
--- /dev/null
+++ b/usr/src/lib/lvm/libsvm/spec/sparc/Makefile
@@ -0,0 +1,50 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License"). You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+#pragma ident "%Z%%M% %I% %E% SMI"
+#
+# Copyright (c) 2001 by Sun Microsystems, Inc.
+# All rights reserved.
+#
+
+.KEEP_STATE:
+
+# To enable apptrace, comment out the following line
+DISABLE_APPTRACE= $(POUND_SIGN)
+
+include ../Makefile.targ
+
+# Add arch specific objects here
+OBJECTS +=
+
+include $(SRC)/lib/Makefile.lib
+
+# Uncomment the following if the linker complains
+#sparc_C_PICFLAGS = -K PIC
+
+include $(SRC)/lib/Makefile.spec
+
+$(DISABLE_APPTRACE)install: $(SPECMAP) $(ABILIB)
+$(DISABLE_APPTRACE) $(INS) -s -d -m $(DIRMODE) -u $(OWNER) \
+$(DISABLE_APPTRACE) -g $(GROUP) $(SNADMINLIB_ABI)
+$(DISABLE_APPTRACE) $(INS) -s -m $(FILEMODE) -u $(OWNER) \
+$(DISABLE_APPTRACE) -g $(GROUP) -f $(SNADMINLIB_ABI) $(ABILIB)
diff --git a/usr/src/lib/lvm/libsvm/spec/sparcv9/Makefile b/usr/src/lib/lvm/libsvm/spec/sparcv9/Makefile
new file mode 100644
index 0000000000..704f35f988
--- /dev/null
+++ b/usr/src/lib/lvm/libsvm/spec/sparcv9/Makefile
@@ -0,0 +1,50 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License"). You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+#pragma ident "%Z%%M% %I% %E% SMI"
+#
+# Copyright (c) 2001 by Sun Microsystems, Inc.
+# All rights reserved.
+#
+
+.KEEP_STATE:
+
+# To enable apptrace, comment out the following line
+DISABLE_APPTRACE= $(POUND_SIGN)
+
+include ../Makefile.targ
+
+# Add arch specific objects here
+OBJECTS +=
+
+include $(SRC)/lib/Makefile.lib
+
+# Uncomment the following if the linker complains
+#sparc_C_PICFLAGS = -K PIC
+
+include $(SRC)/lib/Makefile.spec
+
+$(DISABLE_APPTRACE)install: $(SPECMAP) $(ABILIB)
+$(DISABLE_APPTRACE) $(INS) -s -d -m $(DIRMODE) -u $(OWNER) \
+$(DISABLE_APPTRACE) -g $(GROUP) $(SNADMINLIB_ABI)/sparcv9
+$(DISABLE_APPTRACE) $(INS) -s -m $(FILEMODE) -u $(OWNER) \
+$(DISABLE_APPTRACE) -g $(GROUP) -f $(SNADMINLIB_ABI)/sparcv9 $(ABILIB)
diff --git a/usr/src/lib/lvm/libsvm/spec/svm.spec b/usr/src/lib/lvm/libsvm/spec/svm.spec
new file mode 100644
index 0000000000..ae381eaae5
--- /dev/null
+++ b/usr/src/lib/lvm/libsvm/spec/svm.spec
@@ -0,0 +1,55 @@
+#
+# Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License"). You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#pragma ident "%Z%%M% %I% %E% SMI"
+#
+
+function svm_check
+version SUNWprivate_1.1
+end
+
+function svm_start
+version SUNWprivate_1.1
+end
+
+function svm_stop
+version SUNWprivate_1.1
+end
+
+function svm_is_md
+version SUNWprivate_1.1
+end
+
+function svm_get_components
+version SUNWprivate_1.1
+end
+
+function svm_alloc
+version SUNWprivate_1.1
+end
+
+function svm_free
+version SUNWprivate_1.1
+end
+
diff --git a/usr/src/lib/lvm/libsvm/spec/versions b/usr/src/lib/lvm/libsvm/spec/versions
new file mode 100644
index 0000000000..523cb927d5
--- /dev/null
+++ b/usr/src/lib/lvm/libsvm/spec/versions
@@ -0,0 +1,31 @@
+#pragma ident "%Z%%M% %I% %E% SMI"
+# Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License"). You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+i386 {
+ SUNWprivate_1.1;
+}
+sparc {
+ SUNWprivate_1.1;
+}