OpenSolaris Launch

author: stevel@tonic-gate <none@none> 2005-06-14 00:00:00 -0700
committer: stevel@tonic-gate <none@none> 2005-06-14 00:00:00 -0700
commit: 7c478bd95313f5f23a4c958a745db2134aa03244 (patch)
tree: c871e58545497667cbb4b0a4f2daf204743e1fe7 /usr/src/lib/lvm
download: illumos-joyent-7c478bd95313f5f23a4c958a745db2134aa03244.tar.gz
120 files changed, 82196 insertions, 0 deletions
diff --git a/usr/src/lib/lvm/Makefile b/usr/src/lib/lvm/Makefile
new file mode 100644
index 0000000000..15d40f66b7
--- /dev/null
+++ b/usr/src/lib/lvm/Makefile
@@ -0,0 +1,66 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License").  You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 1998-2002 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+# ident	"%Z%%M%	%I%	%E% SMI"
+#
+
+include ../Makefile.lib
+
+SUBDIRS =	libmeta .WAIT libpreen libsvm
+HDRSUBDIRS = 	libsvm
+DCSUBDIRS =	libmeta
+MSGSUBDIRS =	libmeta
+
+all	:= TARGET = all
+clean	:= TARGET = clean
+clobber	:= TARGET = clobber
+check	:= TARGET = check
+debug	:= TARGET = debug
+install	:= TARGET = install
+install_h := TARGET = install_h
+lint	:= TARGET = lint
+_dc	:= TARGET = _dc
+_msg	:= TARGET = _msg
+
+.KEEP_STATE:
+
+.PARALLEL: $(SUBDIRS)
+
+all clean clobber debug lint: $(SUBDIRS)
+
+install: $(ROOTDIRS) $(SUBDIRS)
+
+_dc: $(DCSUBDIRS)
+
+check install_h: $(HDRSUBDIRS)
+
+_msg: $(MSGSUBDIRS)
+
+$(SUBDIRS): FRC
+	@cd $@; pwd; $(MAKE) $(TARGET)
+
+FRC:
+
+include $(SRC)/lib/Makefile.targ
diff --git a/usr/src/lib/lvm/Makefile.lvm b/usr/src/lib/lvm/Makefile.lvm
new file mode 100644
index 0000000000..99a4d94727
--- /dev/null
+++ b/usr/src/lib/lvm/Makefile.lvm
@@ -0,0 +1,39 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License").  You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+# ident	"%Z%%M%	%I%	%E% SMI"
+#
+
+include $(SRC)/lib/Makefile.lib
+
+sparc_C_PICFLAGS =	-K PIC
+
+SRCDIR =		../common
+
+# base target directories
+ROOTDIRS = $(ROOT)/usr $(ROOTHDRDIR) $(ROOTLIBDIR) 
+
+RPCGENFLAGS =	-C -M
+CFLAGS +=	$(CCVERBOSE)
diff --git a/usr/src/lib/lvm/Makefile.targ b/usr/src/lib/lvm/Makefile.targ
new file mode 100644
index 0000000000..41665529c2
--- /dev/null
+++ b/usr/src/lib/lvm/Makefile.targ
@@ -0,0 +1,42 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License").  You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright (c) 1998-2001 by Sun Microsystems, Inc.
+# All rights reserved.
+#
+# ident	"%Z%%M%	%I%	%E% SMI"
+#
+
+include $(SRC)/lib/Makefile.targ
+
+# basic target directories
+$(ROOTDIRS):
+	$(INS.dir)
+
+debug	:=	COPTFLAG = -g
+debug	:=	COPTFLAG64 = -g
+debug	:=	DYNFLAGS += -g
+
+cstyle:
+	cstyle -pP $(SRCS)
+
+lint:	lintcheck
diff --git a/usr/src/lib/lvm/libmeta/Makefile b/usr/src/lib/lvm/libmeta/Makefile
new file mode 100644
index 0000000000..ab4fe9337c
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/Makefile
@@ -0,0 +1,78 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License").  You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2003 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+# ident	"%Z%%M%	%I%	%E% SMI"
+#
+
+include $(SRC)/lib/Makefile.lib
+
+SUBDIRS =	$(MACH) 
+
+MSGSRCS :sh= echo */*.[ch]
+MSGFILES = $(MSGSRCS:%.c=%.i)
+POFILE = libmeta.po	
+
+DCFILES = common/meta_print.po
+DCFILE = libmeta.dc
+
+install :=	TARGET= install
+clean :=	TARGET= clean
+clobber :=	TARGET= clobber
+lint :=		TARGET= lint
+test :=		TARGET= test
+debug :=	TARGET= debug
+
+CPPFLAGS += -I$(SRC)/lib/lvm/libmeta/common/hdrs
+
+.KEEP_STATE:
+
+all debug install: spec .WAIT $(SUBDIRS)
+
+clean: spec $(SUBDIRS)
+	$(RM) $(MSGFILES) $(DCFILES)
+
+clobber: spec $(SUBDIRS)
+	$(RM) $(POFILE) $(DCFILE)
+
+lint: $(SUBDIRS)
+
+$(DCFILE):= XGETFLAGS = -c TRANSLATION_NOTE_LC_TIME -t
+
+$(DCFILE): $(DCFILES)
+	$(CAT) $(DCFILES) > $(DCFILE)
+
+$(POFILE): $(MSGFILES)
+	$(BUILDPO.msgfiles)
+
+_msg: $(MSGDOMAINPOFILE)
+
+_dc: $(DCMSGDOMAINPOFILE)
+
+spec $(MACH): FRC
+	@cd $@; pwd; $(MAKE) $(TARGET)
+
+FRC:
+
+include $(SRC)/Makefile.msg.targ
diff --git a/usr/src/lib/lvm/libmeta/Makefile.com b/usr/src/lib/lvm/libmeta/Makefile.com
new file mode 100644
index 0000000000..ad747e2331
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/Makefile.com
@@ -0,0 +1,189 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License").  You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+# ident	"%Z%%M%	%I%	%E% SMI"
+#
+
+LIBRARY=       	libmeta.a 
+VERS=          	.1 
+COMMON =	$(SRC)/common/lvm
+
+CMN_OBJS = md_crc.o
+
+DERIVED_OBJS = \
+	mdiox_xdr.o \
+	meta_basic_xdr.o \
+	metad_clnt.o \
+	metad_xdr.o \
+	metamed_clnt.o \
+	metamed_xdr.o \
+	metamhd_clnt.o \
+	metamhd_xdr.o \
+	mdmn_commd_xdr.o \
+	mhdx_xdr.o
+
+LOCAL_OBJS=        \
+	metad_svc_stubs.o \
+	meta_admin.o \
+	meta_attach.o \
+	meta_db.o \
+	meta_db_balance.o \
+	meta_devadm.o \
+	meta_devstamp.o \
+	meta_error.o \
+	meta_getdevs.o \
+	meta_hotspares.o \
+	meta_import.o \
+	meta_init.o \
+	meta_lib_prv.o \
+	meta_mdcf.o \
+	meta_med_err.o \
+	meta_mem.o \
+	meta_metad.o \
+	meta_metad_subr.o \
+	meta_med.o \
+	meta_mh.o \
+	meta_mirror.o \
+	meta_mirror_resync.o \
+	meta_mn_comm.o \
+	meta_mn_changelog.o \
+	meta_mn_handlers.o \
+	meta_mn_msg_table.o \
+	meta_mn_subr.o \
+	meta_mount.o \
+	meta_name.o \
+	meta_nameinfo.o \
+	meta_namespace.o \
+	meta_notify.o \
+	meta_se_notify.o \
+	meta_patch.o \
+	meta_patch_root.o \
+	meta_print.o \
+	meta_raid.o \
+	meta_raid_resync.o \
+	meta_rename.o \
+	meta_repartition.o \
+	meta_replace.o \
+	meta_reset.o \
+	meta_resync.o \
+	meta_runtime.o \
+	meta_set.o \
+	meta_set_drv.o \
+	meta_set_hst.o \
+	meta_set_med.o \
+	meta_set_prv.o \
+	meta_set_tkr.o \
+	meta_setup.o \
+	meta_smf.o \
+	meta_stat.o \
+	meta_sp.o \
+	meta_stripe.o \
+	meta_systemfile.o \
+	meta_tab.o \
+	meta_time.o \
+	meta_trans.o \
+	meta_userflags.o \
+	metarpcopen.o \
+	metasplitname.o \
+	metagetroot.o \
+	sdssc_bind.o
+
+SPC_OBJS= meta_check.o
+
+CMN_SRCS =	$(CMN_OBJS:%.o=$(COMMON)/%.c)
+LOCAL_SRCS =	$(LOCAL_OBJS:%.o=../common/%.c)
+DERIVED_SRCS =	$(DERIVED_OBJS:%.o=%.c)
+SPC_SRCS = 	$(SPC_OBJS:%.o=../common/%.c)
+
+include ../../../Makefile.lib
+
+MAPDIR=         $(SRC)/lib/lvm/libmeta/spec/$(TRANSMACH)
+SPECMAPFILE =	$(MAPDIR)/mapfile
+OBJECTS64 =	$(LOCAL_OBJS) $(DERIVED_OBJS) $(CMN_OBJS)
+OBJECTS =	$(OBJECTS64) $(SPC_OBJS)
+
+include $(SRC)/lib/lvm/Makefile.lvm
+
+# install this library in the root filesystem
+include ../../../Makefile.rootfs
+
+LIBS =		$(DYNLIB) $(LINTLIB)
+SRCS =		$(CMN_SRCS) $(LOCAL_SRCS) $(DERIVED_SRCS)
+$(LINTLIB) :=	SRCS = $(SRCDIR)/$(LINTSRC)
+lint :=		SRCS = $(CMN_SRCS) $(LOCAL_SRCS) $(SPC_SRCS)
+CPPFLAGS +=     -I$(SRC)/lib/lvm/libmeta/common/hdrs
+LDLIBS += 	-lnsl -lc -ladm -ldevid -lgen -lefi -ldevinfo -lscf
+CLEANFILES += 	$(DERIVED_SRCS)
+
+.KEEP_STATE:
+
+BIG_TARGETS = $(OBJECTS64:%=pics/%)
+
+$(BIG_TARGETS) := CPPFLAGS += -D_LARGEFILE_SOURCE=1 -D_FILE_OFFSET_BITS=64
+
+$(LINTLIB) := CPPFLAGS += -D_LARGEFILE_SOURCE=1 -D_FILE_OFFSET_BITS=64
+
+all: $(LIBS)
+
+objs/%.o profs/%.o pics/%.o: $(COMMON)/%.c
+	$(COMPILE.c) -o $@ $<
+	$(POST_PROCESS_O)
+
+mdiox_xdr.c: $(SRC)/uts/common/sys/lvm/mdiox.x
+	$(RPCGEN) $(RPCGENFLAGS) -c -i 100 $(SRC)/uts/common/sys/lvm/mdiox.x | \
+	nawk '{sub(/uts\/common\/sys\/lvm/, "head"); print $$0}' >$@
+
+meta_basic_xdr.c: $(SRC)/uts/common/sys/lvm/meta_basic.x
+	$(RPCGEN) $(RPCGENFLAGS) -c $(SRC)/uts/common/sys/lvm/meta_basic.x | \
+	nawk '{sub(/uts\/common\/sys\/lvm/, "head"); print $$0}' >$@
+
+metad_clnt.c: $(SRC)/head/metad.x 
+	$(RPCGEN) $(RPCGENFLAGS) -l $(SRC)/head/metad.x -o $@
+
+metad_xdr.c: $(SRC)/head/metad.x
+	$(RPCGEN) $(RPCGENFLAGS) -c $(SRC)/head/metad.x -o $@
+
+metamed_clnt.c: $(SRC)/uts/common/sys/lvm/metamed.x
+	$(RPCGEN) $(RPCGENFLAGS) -l $(SRC)/uts/common/sys/lvm/metamed.x | \
+	nawk '{sub(/uts\/common\/sys\/lvm/, "head"); print $$0}' >$@
+
+metamed_xdr.c: $(SRC)/uts/common/sys/lvm/metamed.x 
+	$(RPCGEN) $(RPCGENFLAGS) -c $(SRC)/uts/common/sys/lvm/metamed.x | \
+	nawk '{sub(/uts\/common\/sys\/lvm/, "head"); print $$0}' >$@
+
+metamhd_clnt.c: $(SRC)/head/metamhd.x 
+	$(RPCGEN) $(RPCGENFLAGS) -l $(SRC)/head/metamhd.x -o $@
+
+metamhd_xdr.c: $(SRC)/head/metamhd.x 
+	$(RPCGEN) $(RPCGENFLAGS) -c $(SRC)/head/metamhd.x -o $@
+
+mhdx_xdr.c: $(SRC)/uts/common/sys/lvm/mhdx.x
+	$(RPCGEN) $(RPCGENFLAGS) -c $(SRC)/uts/common/sys/lvm/mhdx.x | \
+	nawk '{sub(/uts\/common\/sys\/lvm/, "head"); print $$0}' >$@
+
+mdmn_commd_xdr.c: $(SRC)/uts/common/sys/lvm/mdmn_commd.x
+	$(RPCGEN) -c $(SRC)/uts/common/sys/lvm/mdmn_commd.x -o $@
+
+include $(SRC)/lib/lvm/Makefile.targ
diff --git a/usr/src/lib/lvm/libmeta/common/hdrs/meta_lib_prv.h b/usr/src/lib/lvm/libmeta/common/hdrs/meta_lib_prv.h
new file mode 100644
index 0000000000..e35cd8c07e
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/hdrs/meta_lib_prv.h
@@ -0,0 +1,48 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 1992, 1993, 1994, 2000 by Sun Microsystems, Inc.
+ * All rights reserved.
+ */
+
+#ifndef	_META_SET_COM_H
+#define	_META_SET_COM_H
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include <meta.h>
+#include <ctype.h>
+#include <sys/mnttab.h>
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+/* meta_lib_prv.c */
+extern	FILE		*open_mnttab(void);
+extern	int		close_mnttab(void);
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* _META_SET_COM_H */
diff --git a/usr/src/lib/lvm/libmeta/common/hdrs/meta_repartition.h b/usr/src/lib/lvm/libmeta/common/hdrs/meta_repartition.h
new file mode 100644
index 0000000000..f5053acccd
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/hdrs/meta_repartition.h
@@ -0,0 +1,51 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2001, 2002 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef	_META_REPARTITION_H
+#define	_META_REPARTITION_H
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include <meta.h>
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+/* meta_repartition_drive() option flags */
+#define	MD_REPART_FORCE		0x01
+#define	MD_REPART_LEAVE_REP	0x02
+#define	MD_REPART_DONT_LABEL	0x04
+
+/* meta_repartition.c */
+extern	int meta_repartition_drive(mdsetname_t *sp,
+    mddrivename_t *dnp, int options, mdvtoc_t *vtocp, md_error_t *ep);
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* _META_REPARTITION_H */
diff --git a/usr/src/lib/lvm/libmeta/common/hdrs/meta_set_prv.h b/usr/src/lib/lvm/libmeta/common/hdrs/meta_set_prv.h
new file mode 100644
index 0000000000..6f63b161e1
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/hdrs/meta_set_prv.h
@@ -0,0 +1,96 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef	_META_SET_COM_H
+#define	_META_SET_COM_H
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include <meta.h>
+#include <ctype.h>
+#include <sys/lvm/md_convert.h>
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+#define	RB_PREEMPT	if (md_got_sig()) goto rollback
+#ifdef DEBUG
+#define	RB_TEST(tstpt, tag, ep)	if (rb_test(tstpt, tag, (ep)) < 0) \
+					goto rollback;
+#else	/* !DEBUG */
+#define	RB_TEST(tstpt, tag, ep)
+#endif	/* DEBUG */
+
+/* meta_setup.c */
+extern int	procsigs(int block, sigset_t *oldsigs, md_error_t *ep);
+
+#ifdef DEBUG
+extern int	rb_test(int rbt_sel_tpt, char *rbt_sel_tag, md_error_t *ep);
+#endif	/* DEBUG */
+
+/*
+ * Flag values used by the nodehasset() function.
+ */
+#define	NHS_N_EQ	0x00000001	/* name == */
+#define	NHS_NS_EQ	0x00000002	/* name, setno == */
+#define	NHS_NST_EQ	0x00000004	/* name, setno, TS == */
+#define	NHS_NSTG_EQ	0x00000008	/* name, setno, TS, genid == */
+#define	NHS_NST_EQ_G_GT	0x00000010	/* name, setno, TS ==, genid > */
+
+/*
+ * Node, set, and mediator names can be any printable characters
+ * (isprint()) except for the characters in the #define that follows.
+ */
+#define	INVALID_IN_NAMES	" *?/"
+
+/* meta_set_prv.c */
+extern	int		checkdrive_onnode(mdsetname_t *sp, mddrivename_t *dnp,
+			    char *node, md_error_t *ep);
+extern	side_t		getnodeside(char *node, md_set_desc *sd);
+extern	int		halt_set(mdsetname_t *sp, md_error_t *ep);
+extern	md_drive_desc	*metadrivedesc_append(md_drive_desc **dd,
+			    mddrivename_t *dnp, int dbcnt, int dbsize,
+			    md_timeval32_t timestamp, ulong_t genid,
+			    uint_t flags);
+extern	int		nodehasset(mdsetname_t *sp, char *node,
+			    uint_t match_flag, md_error_t *ep);
+extern	int		nodesuniq(mdsetname_t *sp, int cnt, char **strings,
+			    md_error_t *ep);
+extern	int		own_set(mdsetname_t *sp, char **owner_of_set,
+			    int forceflg, md_error_t *ep);
+extern	void		resync_genid(mdsetname_t *sp, md_set_desc *sd,
+			    ulong_t max_genid, int node_c, char **node_v);
+extern	int		setup_db_bydd(mdsetname_t *sp, md_drive_desc *dd,
+			    int force, md_error_t *ep);
+extern	int		snarf_set(mdsetname_t *sp, bool_t stale_bool,
+				md_error_t *ep);
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* _META_SET_COM_H */
diff --git a/usr/src/lib/lvm/libmeta/common/inc.flg b/usr/src/lib/lvm/libmeta/common/inc.flg
new file mode 100644
index 0000000000..58651e7f09
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/inc.flg
@@ -0,0 +1,29 @@
+#!/bin/sh
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License").  You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+#ident	"%Z%%M%	%I%	%E% SMI"
+#
+# Copyright (c) 1995, 2000 by Sun Microsystems, Inc.
+# All rights reserved.
+
+echo_file usr/src/common/lvm/md_crc.c
diff --git a/usr/src/lib/lvm/libmeta/common/llib-lmeta b/usr/src/lib/lvm/libmeta/common/llib-lmeta
new file mode 100644
index 0000000000..747bc0f9ae
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/llib-lmeta
@@ -0,0 +1,33 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2001 by Sun Microsystems, Inc.
+ * All rights reserved.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+/* LINTLIBRARY */
+/* PROTOLIB1 */
+
+#include <meta_lib_prv.h>
+#include <meta_set_prv.h>
diff --git a/usr/src/lib/lvm/libmeta/common/meta_admin.c b/usr/src/lib/lvm/libmeta/common/meta_admin.c
new file mode 100644
index 0000000000..9b3e13c10f
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_admin.c
@@ -0,0 +1,133 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 1992-1994, 2000-2002 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+
+/*
+ * miscellaneous utilities
+ */
+
+#include <meta.h>
+
+static	int	meta_fd = -1;
+static	major_t	meta_major;
+
+/*
+ * open administrative device
+ */
+int
+open_admin(
+	md_error_t	*ep
+)
+{
+	struct stat	buf;
+
+	/* if not already open */
+	if (meta_fd < 0) {
+		ulong_t	dversion = 0;
+
+		/* try read/write fall back to readonly */
+		if ((meta_fd = open(ADMSPECIAL, O_RDWR, 0)) < 0) {
+			if (errno != EACCES)
+				return (mdsyserror(ep, errno, ADMSPECIAL));
+			if ((meta_fd = open(ADMSPECIAL, O_RDONLY, 0)) < 0)
+				return (mdsyserror(ep, errno, ADMSPECIAL));
+		}
+
+		/* get major */
+		if (fstat(meta_fd, &buf) != 0)
+			return (mdsyserror(ep, errno, ADMSPECIAL));
+		meta_major = major(buf.st_rdev);
+
+		/* check driver version */
+		if (metaioctl(MD_IOCGVERSION, &dversion, ep, NULL) != 0)
+			return (-1);
+		if (dversion != MD_DVERSION)
+			return (mderror(ep, MDE_DVERSION, NULL));
+	}
+
+	/* return fd */
+	return (meta_fd);
+}
+
+int
+close_admin(
+	md_error_t	*ep
+)
+{
+	if (meta_fd >= 0) {
+		if (close(meta_fd) == -1)
+			return (mdsyserror(ep, errno, ADMSPECIAL));
+		meta_fd = -1;
+	}
+
+	return (0);
+}
+
+/*
+ * Returns True if the md_dev64_t passed in is a metadevice.
+ * Else it returns False.
+ */
+int
+meta_dev_ismeta(
+	md_dev64_t	dev
+)
+{
+	int		fd;
+	md_error_t	status = mdnullerror;
+
+	fd = open_admin(&status);
+	assert(fd >= 0);
+	return (meta_getmajor(dev) == meta_major);
+}
+
+
+int
+meta_get_nunits(md_error_t *ep)
+{
+
+	static set_t		max_nunits = 0;
+
+	if (max_nunits == 0)
+		if (metaioctl(MD_IOCGETNUNITS, &max_nunits, ep, NULL) != 0)
+			return (-1);
+
+	return (max_nunits);
+}
+
+md_dev64_t
+metamakedev(minor_t mnum)
+{
+	int		fd;
+	md_error_t	status = mdnullerror;
+
+	fd = open_admin(&status);
+
+	assert(fd >= 0);
+
+	return (((md_dev64_t)meta_major << NBITSMINOR64) | mnum);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_attach.c b/usr/src/lib/lvm/libmeta/common/meta_attach.c
new file mode 100644
index 0000000000..275640d927
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_attach.c
@@ -0,0 +1,120 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 1992-2002 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+
+/*
+ * attach operations
+ */
+
+#include <meta.h>
+
+/*
+ * grow generic device
+ */
+int
+meta_concat_generic(
+	mdsetname_t		*sp,
+	mdname_t		*namep,
+	u_longlong_t		big_or_little,
+	md_error_t		*ep
+)
+{
+	md_grow_params_t	mgp;
+	char			*miscname;
+
+	/* should have a set */
+	assert(sp != NULL);
+	assert(sp->setno == MD_MIN2SET(meta_getminor(namep->dev)));
+
+	/* get type */
+	if ((miscname = metagetmiscname(namep, ep)) == NULL)
+		return (-1);
+
+	/* grow device */
+	(void) memset(&mgp, 0, sizeof (mgp));
+	if (big_or_little == MD_64BIT_META_DEV)
+		mgp.options = MD_CRO_64BIT;
+	else
+		mgp.options = MD_CRO_32BIT;
+
+	mgp.mnum = meta_getminor(namep->dev);
+	MD_SETDRIVERNAME(&mgp, miscname, sp->setno);
+	if (metaioctl(MD_IOCGROW, &mgp, &mgp.mde, namep->cname) != 0)
+		return (mdstealerror(ep, &mgp.mde));
+
+	/* clear cache */
+	meta_invalidate_name(namep);
+
+	/* return success */
+	return (0);
+}
+
+/*
+ * grow the parent of a device
+ */
+int
+meta_concat_parent(
+	mdsetname_t	*sp,
+	mdname_t	*childnp,
+	md_error_t	*ep
+)
+{
+	md_common_t	*mdp;
+	mdname_t	*parentnp;
+	md_unit_t	*mup;
+
+	/* should have a set */
+	assert(sp != NULL);
+	assert(sp->setno == MD_MIN2SET(meta_getminor(childnp->dev)));
+
+	/* get parent */
+	if ((mdp = meta_get_unit(sp, childnp, ep)) == NULL)
+		return (-1);
+	if (! MD_HAS_PARENT(mdp->parent))
+		return (0);
+	if (mdp->parent == MD_MULTI_PARENT)
+		return (0);
+
+	/* single parent */
+	if ((parentnp = metamnumname(&sp, mdp->parent, 0, ep)) == NULL)
+		return (-1);
+	/* don't grow non-metadevices or soft partitions */
+	if (! metaismeta(parentnp) || meta_sp_issp(sp, parentnp, ep) == 0)
+		return (0);
+
+	if ((mup = meta_get_mdunit(sp, childnp, ep)) == NULL)
+		return (-1);
+
+	/* grow parent */
+	if (meta_concat_generic(sp, parentnp, mup->c.un_revision, ep) != 0)
+		return (-1);
+
+	/* recursively check for parents of parents */
+	return (meta_concat_parent(sp, parentnp, ep));
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_check.c b/usr/src/lib/lvm/libmeta/common/meta_check.c
new file mode 100644
index 0000000000..94c103e0aa
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_check.c
@@ -0,0 +1,874 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+/*
+ * Just in case we're not in a build environment, make sure that
+ * TEXT_DOMAIN gets set to something.
+ */
+#if !defined(TEXT_DOMAIN)
+#define	TEXT_DOMAIN "SYS_TEST"
+#endif
+
+/*
+ * check componets
+ */
+
+#include <meta.h>
+#include "meta_lib_prv.h"
+
+#include <sys/mnttab.h>
+#include <sys/swap.h>
+
+#include "meta_lib_prv.h"
+#include <devid.h>
+#include <sys/dumpadm.h>
+
+/*
+ * static list(s)
+ */
+typedef struct dev_list {
+	char			*dev_name;
+	ddi_devid_t		devid;
+	struct dev_list		*dev_nxt;
+} dev_list_t;
+
+static dev_list_t	*devnamelist = NULL;
+
+/*
+ * free swap info
+ */
+static void
+free_swapinfo(
+	struct swaptable	*swtp
+)
+{
+	int			i;
+
+	if (swtp == NULL)
+		return;
+
+	for (i = 0; (i < swtp->swt_n); ++i) {
+		if (swtp->swt_ent[i].ste_path != NULL)
+			Free(swtp->swt_ent[i].ste_path);
+	}
+
+	Free(swtp);
+}
+
+/*
+ * get swap info
+ */
+static int
+get_swapinfo(
+	struct swaptable	**swtpp,
+	int			*nswap,
+	md_error_t		*ep
+)
+{
+	int			i;
+	size_t			swtsize;
+
+	*swtpp = NULL;
+
+	/* get number of entries */
+	if ((*nswap = swapctl(SC_GETNSWP, NULL)) < 0) {
+		return (mdsyserror(ep, errno, "swapctl(SC_GETNSWP)"));
+	}
+
+	/* allocate structure */
+	swtsize = sizeof ((*swtpp)->swt_n) +
+	    ((*nswap) * sizeof ((*swtpp)->swt_ent[0]));
+	*swtpp = (struct swaptable *)Zalloc(swtsize);
+	(*swtpp)->swt_n = *nswap;
+	for (i = 0; (i < (*nswap)); ++i)
+		(*swtpp)->swt_ent[i].ste_path = Zalloc(MAXPATHLEN);
+
+	/* get info */
+	if (((*nswap) = swapctl(SC_LIST, (*swtpp))) < 0) {
+		(void) mdsyserror(ep, errno, "swapctl(SC_LIST)");
+		free_swapinfo(*swtpp);
+		return (-1);
+	}
+
+	/* return success */
+	return (0);
+}
+
+/*
+ * check whether device is swapped on
+ */
+static int
+meta_check_swapped(
+	mdsetname_t		*sp,
+	mdname_t		*np,
+	md_error_t		*ep
+)
+{
+	struct swaptable	*swtp;
+	int			nswap;
+	int			i;
+	int			rval = 0;
+
+	/* should have a set */
+	assert(sp != NULL);
+
+	/* get swap info */
+	if (get_swapinfo(&swtp, &nswap, ep) != 0)
+		return (-1);
+
+	/* look for match */
+	for (i = 0; ((i < nswap) && (rval == 0)); ++i) {
+		mdname_t	*snp;
+
+		if ((snp = metaname(&sp, swtp->swt_ent[i].ste_path,
+		    ep)) == NULL) {
+			mdclrerror(ep);
+			continue;
+		}
+		if (np->dev == snp->dev) {
+			rval = mddeverror(ep, MDE_IS_SWAPPED,
+			    np->dev, np->cname);
+		} else { /* not swap - does it overlap */
+			rval = meta_check_overlap(snp->cname, np, 0, -1,
+			    snp, 0, -1, ep);
+			if (rval != 0) {
+				(void) mdoverlaperror(ep, MDE_OVERLAP_SWAP,
+					np->cname, NULL, snp->cname);
+			}
+		}
+	}
+	free_swapinfo(swtp);
+
+	/* return success */
+	return (rval);
+}
+
+/*
+ * Is a driver currently swapped on?
+ */
+int
+meta_check_driveswapped(
+	mdsetname_t		*sp,
+	mddrivename_t		*dnp,
+	md_error_t		*ep
+)
+{
+	struct swaptable	*swtp;
+	int			nswap;
+	int			i;
+	int			rval = 0;
+
+	/* should have a set */
+	assert(sp != NULL);
+
+	/* get swap info */
+	if (get_swapinfo(&swtp, &nswap, ep) != 0)
+		return (-1);
+
+	/* look for match */
+	for (i = 0; (i < nswap); ++i) {
+		mdname_t	*snp;
+
+		if ((snp = metaname(&sp, swtp->swt_ent[i].ste_path,
+		    ep)) == NULL) {
+			mdclrerror(ep);
+			continue;
+		}
+
+		if (strcmp(dnp->cname, snp->drivenamep->cname) == 0) {
+			rval = mddeverror(ep, MDE_IS_SWAPPED, NODEV64,
+			    dnp->cname);
+		}
+	}
+	free_swapinfo(swtp);
+
+	/* return success */
+	return (rval);
+}
+
+/*
+ * check whether device is a dump device
+ */
+static int
+meta_check_dump(
+	mdsetname_t	*sp,
+	mdname_t	*np,
+	md_error_t	*ep
+)
+{
+	int		rval = 0;
+	int		dump_fd;
+	char		device[MAXPATHLEN];
+
+
+	if ((dump_fd = open("/dev/dump", O_RDONLY)) < 0)
+		return (mdsyserror(ep, errno, "/dev/dump"));
+
+	if (ioctl(dump_fd, DIOCGETDEV, device) != -1) {
+		mdname_t	*dump_np;
+
+		if ((dump_np = metaname(&sp, device, ep)) == NULL) {
+			mdclrerror(ep);
+			(void) close(dump_fd);
+			return (0);
+		}
+
+		if (np->dev == dump_np->dev) {
+			rval = mddeverror(ep, MDE_IS_DUMP,
+			    np->dev, np->cname);
+		} else { /* not a dump device - but does it overlap? */
+			rval = meta_check_overlap(dump_np->cname, np, 0, -1,
+			    dump_np, 0, -1, ep);
+			if (rval != 0) {
+				(void) mdoverlaperror(ep, MDE_OVERLAP_DUMP,
+					np->cname, NULL, dump_np->cname);
+			}
+		}
+	}
+	(void) close(dump_fd);
+	return (rval);
+}
+
+/*
+ * check whether device is mounted
+ */
+static int
+meta_check_mounted(
+	mdsetname_t	*sp,
+	mdname_t	*np,
+	md_error_t	*ep
+)
+{
+	FILE		*mfp;
+	struct mnttab	m;
+	int		rval = 0;
+	char		mountp[MNT_LINE_MAX];
+	char		mnt_special[MNT_LINE_MAX];
+
+	/* should have a set */
+	assert(sp != NULL);
+
+	/* look in mnttab */
+	if ((mfp = open_mnttab()) == NULL)
+		return (mdsyserror(ep, errno, MNTTAB));
+	while ((getmntent(mfp, &m) == 0) && (rval == 0)) {
+		mdname_t	*mnp;
+
+		if ((m.mnt_special == NULL) || (m.mnt_mountp == NULL))
+			continue;
+
+		if (m.mnt_mountp[0] != '/')
+			continue;
+
+		if ((strcmp(m.mnt_fstype, "nfs") == 0) ||
+		    (strcmp(m.mnt_fstype, "autofs") == 0) ||
+		    (strcmp(m.mnt_fstype, "proc") == 0) ||
+		    (strcmp(m.mnt_fstype, "tmpfs") == 0) ||
+		    (strcmp(m.mnt_fstype, "cachefs") == 0) ||
+		    (strcmp(m.mnt_fstype, "lofs") == 0) ||
+		    (strcmp(m.mnt_fstype, "rfs") == 0) ||
+		    (strcmp(m.mnt_fstype, "fd") == 0) ||
+		    (strcmp(m.mnt_fstype, "mntfs") == 0) ||
+		    (strcmp(m.mnt_fstype, "devfs") == 0))
+			continue;
+
+		(void) strcpy(mountp, m.mnt_mountp);
+		(void) strcpy(mnt_special, m.mnt_special);
+
+		if ((mnp = metaname(&sp, mnt_special, ep)) == NULL) {
+			mdclrerror(ep);
+			continue;
+		}
+
+		if (np->dev == mnp->dev) {
+			rval = mduseerror(ep, MDE_IS_MOUNTED,
+			    np->dev, mountp, np->cname);
+		} else { /* device isn't in mnttab - does it overlap? */
+			rval = meta_check_overlap(mnp->cname, np, 0, -1,
+			    mnp, 0, -1, ep);
+			if (rval != 0) {
+				(void) mdoverlaperror(ep, MDE_OVERLAP_MOUNTED,
+					np->cname, mountp, mnp->cname);
+			}
+		}
+	}
+
+	/* return success */
+	return (rval);
+}
+
+
+/*
+ * Is a file system currently mounted on this disk drive?
+ */
+int
+meta_check_drivemounted(
+	mdsetname_t	*sp,
+	mddrivename_t	*dnp,
+	md_error_t	*ep
+)
+{
+	FILE		*mfp;
+	struct mnttab	m;
+	int		rval = 0;
+	char		mountp[MNT_LINE_MAX];
+	char		mnt_special[MNT_LINE_MAX];
+
+	/* should have a set */
+	assert(sp != NULL);
+
+	/* look in mnttab */
+	if ((mfp = open_mnttab()) == NULL)
+		return (mdsyserror(ep, errno, MNTTAB));
+	while ((getmntent(mfp, &m) == 0) && (rval == 0)) {
+		mdname_t	*mnp;
+
+		if ((m.mnt_special == NULL) || (m.mnt_mountp == NULL))
+			continue;
+
+		if (m.mnt_mountp[0] != '/')
+			continue;
+
+		if ((strcmp(m.mnt_fstype, "nfs") == 0) ||
+		    (strcmp(m.mnt_fstype, "autofs") == 0) ||
+		    (strcmp(m.mnt_fstype, "proc") == 0) ||
+		    (strcmp(m.mnt_fstype, "tmpfs") == 0) ||
+		    (strcmp(m.mnt_fstype, "cachefs") == 0) ||
+		    (strcmp(m.mnt_fstype, "lofs") == 0) ||
+		    (strcmp(m.mnt_fstype, "rfs") == 0) ||
+		    (strcmp(m.mnt_fstype, "fd") == 0))
+			continue;
+
+		(void) strcpy(mountp, m.mnt_mountp);
+		(void) strcpy(mnt_special, m.mnt_special);
+		if ((mnp = metaname(&sp, mnt_special, ep)) == NULL) {
+			mdclrerror(ep);
+			continue;
+		}
+		if (strcmp(dnp->cname, mnp->drivenamep->cname) == 0) {
+			rval = mduseerror(ep, MDE_IS_MOUNTED, NODEV64,
+			    mountp, dnp->cname);
+		}
+	}
+
+	/* return success */
+	return (rval);
+}
+
+/*
+ * Check to see if the specified name is already in use or overlaps
+ * with a device already in use. Checks are made to determine whether
+ * the device is mounted, is a swap device, or a dump device.  In each
+ * case if the device is not in use then an overlap check is done to ensure
+ * that the specified slice does not overlap.
+ */
+int
+meta_check_inuse(
+	mdsetname_t	*sp,
+	mdname_t	*np,
+	mdinuseopts_t	inuse_flags,
+	md_error_t	*ep
+)
+{
+	int			rval = 0;
+
+	if ((inuse_flags & MDCHK_MOUNTED) &&
+	    (rval = meta_check_mounted(sp, np, ep)) != 0)
+		return (rval);
+
+	if ((inuse_flags & MDCHK_SWAP) &&
+	    (rval = meta_check_swapped(sp, np, ep)) != 0)
+		return (rval);
+
+	if ((inuse_flags & MDCHK_DUMP) &&
+	    (rval = meta_check_dump(sp, np, ep)) != 0)
+		return (rval);
+
+	return (rval);
+}
+
+int
+meta_check_driveinset(mdsetname_t *sp, mddrivename_t *dn, md_error_t *ep)
+{
+	set_t		setno;
+	set_t		max_sets;
+
+	if ((max_sets = get_max_sets(ep)) == 0)
+		return (-1);
+
+	for (setno = 1; setno < max_sets; setno++) {
+		mdsetname_t	*sp1;
+		int		is_it;
+
+		if (setno == sp->setno)
+			continue;
+
+		if ((sp1 = metasetnosetname(setno, ep)) == NULL) {
+			if (mdismddberror(ep, MDE_DB_NODB)) {
+				mdclrerror(ep);
+				return (0);
+			}
+			if (mdiserror(ep, MDE_NO_SET)) {
+				mdclrerror(ep);
+				continue;
+			}
+			return (-1);
+		}
+
+		metaflushsetname(sp1);
+
+		if ((is_it = meta_is_drive_in_thisset(sp1, dn, FALSE, ep))
+		    == -1)
+			return (-1);
+
+		if (is_it)
+			return (mddserror(ep, MDE_DS_DRIVEINSET, sp->setno,
+			    sp1->setname, dn->cname, sp->setname));
+	}
+
+	return (0);
+}
+
+/*
+ * Add a device/device id tuple to the devname cache
+ */
+static void
+add_to_devname_list(
+	char 		*device_name,		/* fully qualified dev name */
+	ddi_devid_t	devid			/* device id */
+)
+{
+	dev_list_t	*dnlp;
+
+	dnlp = Zalloc(sizeof (*dnlp));
+	dnlp->dev_name = Strdup(device_name);
+	dnlp->devid = devid;
+
+	/* link the node into the devname list */
+	dnlp->dev_nxt = devnamelist;
+	devnamelist = dnlp;
+}
+
+/*
+ * check for same drive
+ */
+int
+meta_check_samedrive(
+	mdname_t	*np1,		/* first comp */
+	mdname_t	*np2,		/* second comp */
+	md_error_t	*ep
+)
+{
+
+	mdcinfo_t	*cinfop1, *cinfop2;
+	mdnmtype_t	type1 = np1->drivenamep->type;
+	mdnmtype_t	type2 = np2->drivenamep->type;
+	int		l = 0;
+
+	char		*name1 = NULL;
+	char		*name2 = NULL;
+
+	int		retval = -1;
+	int		fd1 = -1;
+	int		fd2 = -1;
+	int		rc1 = -2, rc2 = -2;
+	uint_t		strl1 = 0, strl2 = 0;
+	int		devid1_found = 0;
+	int		devid2_found = 0;
+
+	ddi_devid_t	devid1 = NULL;
+	ddi_devid_t	devid2 = NULL;
+	dev_list_t	*dnlp = NULL;
+
+	assert(type1 != MDT_FAST_META && type1 != MDT_FAST_COMP);
+	assert(type2 != MDT_FAST_META && type2 != MDT_FAST_COMP);
+
+	/*
+	 * The process of determining if 2 names are the same drive is
+	 * as follows:
+	 *
+	 * Case 1 - The filenames are identical
+	 *
+	 * Case 2 - Either name is a metadevice name.  If so then they
+	 *	are not the same drive.
+	 *
+	 * Case 3 - Both devices have a devid
+	 * 	get and compare the devids for the devices. If both
+	 * 	devices have a devid then the compare will is all
+	 *	that is needed we are done.
+	 *
+	 * Case 4 - One or more devices does not have a devid
+	 *	start by doing a simple compare of the name, if they
+	 *	are the same just return.
+	 *
+	 *	If the names differ then keep going and see if the
+	 *	may be the same underlying devic.  First check to
+	 *	see if the sd name is the same (old code).
+	 *
+	 *	Then check the major and minor numbers to see if
+	 *	they are the same.  If they are then return (old code).
+	 *
+	 *	Next compare the raw name and the component name and
+	 *	if they are the same then return.
+	 *
+	 *	All else has failed so use the component name (cname)
+	 *	component number and unit number.  If they all are
+	 *	equal then call them the same drive.
+	 *
+	 */
+
+	if ((np1 == NULL) || (np2 == NULL))
+		return (0);
+
+	/* if the name structs are the same then the drives must be */
+	if (np1 == np2)
+		return (1);
+
+	name1 = np1->bname;
+	name2 = np2->bname;
+
+	if ((name1 == NULL) || ((strl1 = strlen(name1)) == 0) ||
+	    (name2 == NULL) || ((strl2 = strlen(name2)) == 0))
+		return (0);
+
+	if ((strl1 == strl2) && (strcmp(name1, name2) == 0)) {
+		/* names are identical */
+		return (1);
+	}
+
+	if (is_metaname(name1) || is_metaname(name2))
+		return (0);
+
+	/*
+	 * Check to see if the devicename is in the static list.  If so,
+	 * use its devid.  Otherwise do the expensive operations
+	 * of opening the device, getting the devid, and closing the
+	 * device.  Add the result into the static list.
+	 *
+	 * The case where this list will be useful is when there are soft
+	 * partitions on multiple drives and a new soft partition is being
+	 * created.  In that situation the underlying physical device name
+	 * for the new soft partition would be compared against each of the
+	 * existing soft partititions.  Without this static list that would
+	 * involve 2 opens, closes, and devid gets for each existing soft
+	 * partition
+	 */
+	for (dnlp = devnamelist;
+			(dnlp != NULL) && !(devid1_found && devid2_found);
+			dnlp = dnlp->dev_nxt) {
+		if (!devid1_found && (strcmp(dnlp->dev_name, name1) == 0)) {
+			devid1_found = 1;
+			devid1 = dnlp->devid;
+			if (devid1 == NULL)
+				rc1 = 1;
+			else
+				rc1 = 0;
+			continue;
+		}
+		if (!devid2_found && (strcmp(dnlp->dev_name, name2) == 0)) {
+			devid2_found = 1;
+			devid2 = dnlp->devid;
+			if (devid2 == NULL)
+				rc2 = 1;
+			else
+				rc2 = 0;
+			continue;
+		}
+	}
+
+	/*
+	 * Start by checking if the device has a device id, and if they
+	 * are equal.  If they are there is no question there is a match.
+	 *
+	 * The process here is open each disk, get the devid for each
+	 * disk.  If they both have a devid compare them and return
+	 * the results.
+	 */
+	if (!devid1_found) {
+		if ((fd1 = open(name1, O_RDONLY | O_NDELAY)) < 0) {
+			return (0);
+		}
+		rc1 = devid_get(fd1, &devid1);
+		(void) close(fd1);
+
+		/* add the name and devid to the cache */
+		add_to_devname_list(name1, devid1);
+	}
+
+	if (!devid2_found) {
+		if ((fd2 = open(name2, O_RDONLY | O_NDELAY)) < 0) {
+			return (0);
+		}
+		rc2 = devid_get(fd2, &devid2);
+		(void) close(fd2);
+
+		/* add the name and devid to the cache */
+		add_to_devname_list(name2, devid2);
+	}
+
+
+	if ((rc1 == 0) && (rc2 == 0)) {
+		if (devid_compare(devid1, devid2) == 0)
+			retval = 1; /* same drive */
+		else
+			retval = 0; /* different drives */
+
+	}
+
+	if (retval >= 0) {
+		return (retval);
+	}
+
+	/*
+	 * At this point in time one of the two drives did not have a
+	 * device ID.  Do not make the assumption that is one drive
+	 * did have a device id and the other did not that they are not
+	 * the same.  One drive could be covered by a device and still
+	 * be the same drive.  This is a general flaw in the system at
+	 * this time.
+	 */
+
+	/*
+	 * The optimization can not happen if we are given an old style name
+	 * in the form /dev/XXNN[a-h], since the name caches differently and
+	 * allows overlaps to happen.
+	 */
+	if (! ((sscanf(np1->bname, "/dev/%*[^0-9/]%*u%*[a-h]%n", &l) == 0 &&
+	    l == strlen(np1->bname)) ||
+	    (sscanf(np2->bname, "/dev/%*[^0-9/]%*u%*[a-h]%n", &l) == 0 &&
+	    l == strlen(np2->bname))) &&
+	    ((type1 == MDT_COMP) || (type1 == MDT_META)) &&
+	    ((type2 == MDT_COMP) || (type2 == MDT_META)))
+		return (np1->drivenamep == np2->drivenamep);
+
+	/* check for same drive */
+	if (meta_getmajor(np1->dev) != meta_getmajor(np2->dev))
+		return (0);		/* not same drive */
+
+	if (((cinfop1 = metagetcinfo(np1, ep)) == NULL) ||
+	    ((cinfop2 = metagetcinfo(np2, ep)) == NULL)) {
+		if ((strcmp(np1->drivenamep->cname,
+		    np2->drivenamep->cname) != 0) &&
+		    (strcmp(np1->drivenamep->rname,
+		    np2->drivenamep->rname) != 0)) {
+			mdclrerror(ep);
+			return (0);	/* not same drive */
+		} else {
+			return (-1);	/* can't tell */
+		}
+	} else if ((strncmp(cinfop1->cname, cinfop2->cname,
+	    sizeof (cinfop1->cname)) != 0) ||
+	    (cinfop1->cnum != cinfop2->cnum) ||
+	    (cinfop1->unit != cinfop2->unit)) {
+		return (0);		/* not same drive */
+	}
+
+	/* same drive */
+	return (1);
+}
+
+/*
+ * check for overlap
+ */
+int
+meta_check_overlap(
+	char		*uname,		/* user supplied name for errors */
+	mdname_t	*np1,		/* first comp */
+	diskaddr_t	slblk1,		/* first comp - start logical block */
+	diskaddr_t	nblks1,		/* first comp - # of blocks */
+	mdname_t	*np2,		/* second comp */
+	diskaddr_t	slblk2,		/* second comp - start logical block */
+	diskaddr_t	nblks2,		/* second comp - # of blocks */
+	md_error_t	*ep
+)
+{
+	diskaddr_t	sblk1, sblk2;
+	mdvtoc_t	*vtocp1, *vtocp2;
+	uint_t		partno1, partno2;
+	mdpart_t	*partp1, *partp2;
+	int		err;
+
+	/* verify args */
+	if (slblk1 == MD_DISKADDR_ERROR) {
+		assert(0);
+		return (mdsyserror(ep, EINVAL, np1->cname));
+	}
+	if (slblk2 == MD_DISKADDR_ERROR) {
+		assert(0);
+		return (mdsyserror(ep, EINVAL, np2->cname));
+	}
+
+	/* check for same drive */
+	if ((err = meta_check_samedrive(np1, np2, ep)) == 0) {
+		return (0);			/* not same drive */
+	} else if (err < 0) {
+		return (-1);			/* can't tell */
+	}
+
+	/* check for overlap */
+	if (((vtocp1 = metagetvtoc(np1, FALSE, &partno1, ep)) == NULL) ||
+	    ((vtocp2 = metagetvtoc(np2, FALSE, &partno2, ep)) == NULL)) {
+		return (-1);			/* can't tell */
+	}
+	partp1 = &vtocp1->parts[partno1];
+	partp2 = &vtocp2->parts[partno2];
+	sblk1 = partp1->start + slblk1;
+	if (nblks1 == -1)
+		nblks1 = partp1->size - slblk1;
+	sblk2 = partp2->start + slblk2;
+	if (nblks2 == -1)
+		nblks2 = partp2->size - slblk2;
+	if (((sblk1 >= sblk2) && (sblk1 < (sblk2 + nblks2))) ||
+	    ((sblk2 >= sblk1) && (sblk2 < (sblk1 + nblks1)))) {
+		if (np1->dev == np2->dev) {	/* slice in use */
+			return (mduseerror(ep, MDE_ALREADY, np1->dev,
+			    uname, np1->cname));
+		}
+		return (mduseerror(ep,		/* slice overlaps */
+		    MDE_OVERLAP, np1->dev, uname, np1->cname));
+	}
+
+	/* return success */
+	return (0);				/* no overlap */
+}
+
+/*
+ * check to see if a device is in a metadevice
+ */
+int
+meta_check_inmeta(
+	mdsetname_t	*sp,
+	mdname_t	*np,
+	mdchkopts_t	options,
+	diskaddr_t	slblk,
+	diskaddr_t	nblks,
+	md_error_t	*ep
+)
+{
+	uint_t		partno;
+
+	/* see if replica slice is ok, only applies to disks in sets */
+	if (! (options & MDCHK_ALLOW_REPSLICE) &&
+	    ! metaislocalset(sp)) {
+		uint_t	rep_slice;
+
+		if (metagetvtoc(np, FALSE, &partno, ep) == NULL)
+			return (-1);
+		if (meta_replicaslice(np->drivenamep, &rep_slice, ep)
+		    != 0)
+			return (-1);
+		if (partno == rep_slice)
+			return (mddeverror(ep, MDE_REPCOMP_INVAL, np->dev,
+			    np->cname));
+	}
+
+	/* check for databases */
+	if (meta_check_inreplica(sp, np, slblk, nblks, ep) != 0) {
+		if (mdisuseerror(ep, MDE_ALREADY)) {
+			if (options & MDCHK_ALLOW_MDDB) {
+				mdclrerror(ep);
+			} else {
+				return (mddeverror(ep, MDE_HAS_MDDB,
+				    np->dev, np->cname));
+			}
+		} else {
+			return (-1);
+		}
+	}
+
+	/* check metadevices */
+	if (meta_check_instripe(sp, np, slblk, nblks, ep) != 0)
+		return (-1);
+	if (meta_check_inmirror(sp, np, slblk, nblks, ep) != 0)
+		return (-1);
+	if (meta_check_intrans(sp, np, options, slblk, nblks, ep) != 0)
+		return (-1);
+	if (meta_check_insp(sp, np, slblk, nblks, ep) != 0)
+		return (-1);
+	if (! (options & MDCHK_ALLOW_HS)) {
+		if (meta_check_inhsp(sp, np, slblk, nblks, ep) != 0)
+			return (-1);
+	}
+	if (meta_check_inraid(sp, np, slblk, nblks, ep) != 0)
+		return (-1);
+
+	/* return success */
+	return (0);
+}
+
+/*
+ * check to see if a device is in its set
+ */
+int
+meta_check_inset(
+	mdsetname_t	*sp,
+	mdname_t	*np,
+	md_error_t	*ep
+)
+{
+	mdsetname_t	*npsp;
+	int		bypass_daemon = FALSE;
+
+
+	/* check devices set */
+	if (metaislocalset(sp))
+		bypass_daemon = TRUE;
+	if ((npsp = metagetset(np, bypass_daemon, ep)) == NULL) {
+		if ((! metaismeta(np)) &&
+		    (metaislocalset(sp)) &&
+		    (mdismddberror(ep, MDE_DB_NODB))) {
+			mdclrerror(ep);
+			npsp = sp;
+		} else {
+			return (-1);
+		}
+	}
+
+	/* check set */
+	if (metaissameset(sp, npsp))
+		return (0);
+
+	/* return appropriate error */
+	if (metaislocalset(sp))
+		return (mddeverror(ep, MDE_IN_SHARED_SET, np->dev, np->cname));
+	else
+		return (mddeverror(ep, MDE_NOT_IN_SET, np->dev, np->cname));
+}
+
+/*
+ * check to see if current user is root
+ */
+int
+meta_check_root(md_error_t *ep)
+{
+	if (geteuid() != 0) {
+		(void) mderror(ep, MDE_NOPERM, "");
+		return (-1);
+	}
+	return (0);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_db.c b/usr/src/lib/lvm/libmeta/common/meta_db.c
new file mode 100644
index 0000000000..e30eb58c06
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_db.c
@@ -0,0 +1,2517 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+/*
+ * Just in case we're not in a build environment, make sure that
+ * TEXT_DOMAIN gets set to something.
+ */
+#if !defined(TEXT_DOMAIN)
+#define	TEXT_DOMAIN "SYS_TEST"
+#endif
+
+/*
+ * Metadevice database interfaces.
+ */
+
+#define	MDDB
+
+#include <meta.h>
+#include <sys/lvm/md_mddb.h>
+#include <sys/lvm/md_crc.h>
+#include <sys/lvm/mdio.h>
+#include <string.h>
+#include <strings.h>
+#include <ctype.h>
+
+struct svm_daemon {
+	char *svmd_name;
+	char *svmd_kill_val;
+};
+
+struct svm_daemon svmd_kill_list[] = {
+		{"mdmonitord", "HUP"},
+		{"mddoors", "KILL"},
+	};
+
+#define	DAEMON_COUNT (sizeof (svmd_kill_list)/ sizeof (struct svm_daemon))
+#define	MDMONITORD	"/usr/sbin/mdmonitord"
+
+extern int procsigs(int block, sigset_t *oldsigs, md_error_t *ep);
+
+/*
+ * meta_get_lb_inittime sends a request for the lb_inittime to the kernel
+ */
+md_timeval32_t
+meta_get_lb_inittime(
+	mdsetname_t	*sp,
+	md_error_t	*ep
+)
+{
+	mddb_config_t	c;
+
+	(void) memset(&c, 0, sizeof (c));
+
+	/* Fill in setno, setname, and sideno */
+	c.c_setno = sp->setno;
+
+	if (metaioctl(MD_DB_LBINITTIME, &c, &c.c_mde, NULL) != 0) {
+		(void) mdstealerror(ep, &c.c_mde);
+	}
+
+	return (c.c_timestamp);
+}
+
+/*
+ * mkmasterblks writes out the master blocks of the mddb to the replica.
+ *
+ * In a MN diskset, this is called by the node that is adding this replica
+ * to the diskset.
+ */
+
+#define	MDDB_VERIFY_SIZE	8192
+
+static int
+mkmasterblks(
+	mdsetname_t	*sp,
+	mdname_t	*np,
+	int		fd,
+	daddr_t		firstblk,
+	int		dbsize,
+	md_timeval32_t	inittime,
+	md_error_t	*ep
+)
+{
+	int		consecutive;
+	md_timeval32_t	tp;
+	struct mddb_mb	*mb;
+	char		*buffer;
+	int		iosize;
+	md_set_desc	*sd;
+	int		mn_set = 0;
+	daddr_t		startblk;
+	int		cnt;
+	ddi_devid_t	devid;
+
+	if (! metaislocalset(sp)) {
+		if ((sd = metaget_setdesc(sp, ep)) == NULL)
+			return (-1);
+
+		if (MD_MNSET_DESC(sd)) {
+			mn_set = 1;		/* Used later */
+		}
+	}
+
+	/*
+	 * Loop to verify the entire mddb region on disk is read/writable.
+	 * buffer is used to write/read in at most MDDB_VERIFY_SIZE block
+	 * chunks.
+	 *
+	 * A side-effect of this loop is to zero out the entire mddb region
+	 */
+	if ((buffer = Zalloc(MDDB_VERIFY_SIZE * DEV_BSIZE)) == NULL)
+		return (mdsyserror(ep, ENOMEM, np->rname));
+
+	startblk = firstblk;
+	for (cnt = dbsize; cnt > 0; cnt -= consecutive) {
+
+		if (cnt > MDDB_VERIFY_SIZE)
+			consecutive = MDDB_VERIFY_SIZE;
+		else
+			consecutive = cnt;
+
+		if (lseek(fd, (off_t)(startblk * DEV_BSIZE), SEEK_SET) < 0) {
+			Free(buffer);
+			return (mdsyserror(ep, errno, np->rname));
+		}
+
+		iosize = DEV_BSIZE * consecutive;
+		if (write(fd, buffer, iosize) != iosize) {
+			Free(buffer);
+			return (mdsyserror(ep, errno, np->rname));
+		}
+
+		if (lseek(fd, (off_t)(startblk * DEV_BSIZE), SEEK_SET) < 0) {
+			Free(buffer);
+			return (mdsyserror(ep, errno, np->rname));
+		}
+
+		if (read(fd, buffer, iosize) != iosize) {
+			Free(buffer);
+			return (mdsyserror(ep, errno, np->rname));
+		}
+
+		startblk += consecutive;
+	}
+
+	Free(buffer);
+	if ((mb = Zalloc(DEV_BSIZE)) == NULL)
+		return (mdsyserror(ep, ENOMEM, np->rname));
+
+	if (meta_gettimeofday(&tp) == -1) {
+		Free(mb);
+		return (mdsyserror(ep, errno, np->rname));
+	}
+
+	mb->mb_magic = MDDB_MAGIC_MB;
+	/*
+	 * If a MN diskset, set master block revision for a MN set.
+	 * Even though the master block structure is no different
+	 * for a MN set, setting the revision field to a different
+	 * number keeps any pre-MN_diskset code from accessing
+	 * this diskset.  It also allows for an early determination
+	 * of a MN diskset when reading in from disk so that the
+	 * proper size locator block and locator names structure
+	 * can be read in thus saving time on diskset startup.
+	 */
+	if (mn_set)
+		mb->mb_revision = MDDB_REV_MNMB;
+	else
+		mb->mb_revision = MDDB_REV_MB;
+	mb->mb_timestamp = tp;
+	mb->mb_setno = sp->setno;
+	mb->mb_blkcnt = dbsize - 1;
+	mb->mb_blkno = firstblk;
+	mb->mb_nextblk = 0;
+
+	mb->mb_blkmap.m_firstblk = firstblk + 1;
+	mb->mb_blkmap.m_consecutive = dbsize - 1;
+	if (! metaislocalset(sp)) {
+		mb->mb_setcreatetime = inittime;
+	}
+
+	/*
+	 * We try to save the disks device ID into the remaining bytes in
+	 * the master block. The saved devid is used to provide a mapping
+	 * between this disk's devid and the devid stored into the master
+	 * block. This allows the disk image to be self-identifying
+	 * if it gets copied (e.g. SNDR, True Copy, etc.).  This is used
+	 * when we try to import these disks on the remote copied image.
+	 * If we cannot save the disks device ID onto the master block that is
+	 * ok.  The disk is just not self-identifying and won't be importable
+	 * in the remote copy scenario.
+	 */
+	if (devid_get(fd, &devid) == 0) {
+		size_t len;
+
+		len = devid_sizeof(devid);
+		if (len <= DEV_BSIZE - sizeof (*mb)) {
+			/* there is enough space to store the devid */
+			mb->mb_devid_magic = MDDB_MAGIC_DE;
+			mb->mb_devid_len = len;
+			(void) memcpy(mb->mb_devid, devid, len);
+		}
+		devid_free(devid);
+	}
+
+	crcgen((uchar_t *)mb, (uint_t *)&mb->mb_checksum, (uint_t)DEV_BSIZE,
+	    (crc_skip_t *)NULL);
+
+	if (lseek(fd, (off_t)(firstblk * DEV_BSIZE), SEEK_SET) < 0) {
+		Free(mb);
+		return (mdsyserror(ep, errno, np->rname));
+	}
+
+	if (write(fd, mb, DEV_BSIZE) != DEV_BSIZE) {
+		Free(mb);
+		return (mdsyserror(ep, errno, np->rname));
+	}
+
+	if (lseek(fd, (off_t)(firstblk * DEV_BSIZE), SEEK_SET) < 0) {
+		Free(mb);
+		return (mdsyserror(ep, errno, np->rname));
+	}
+
+	if (read(fd, mb, DEV_BSIZE) != DEV_BSIZE) {
+		Free(mb);
+		return (mdsyserror(ep, errno, np->rname));
+	}
+
+	if (crcchk((uchar_t *)mb, (uint_t *)&mb->mb_checksum,
+		(uint_t)DEV_BSIZE, (crc_skip_t *)NULL)) {
+		Free(mb);
+		return (mdmddberror(ep, MDE_NOTVERIFIED,
+			meta_getminor(np->dev), sp->setno, 0, np->rname));
+	}
+
+	Free(mb);
+	return (0);
+}
+
+void
+meta_mkdummymaster(
+	mdsetname_t	*sp,
+	int		fd,
+	daddr_t		firstblk
+)
+{
+	md_timeval32_t	tp;
+	struct mddb_mb	*mb;
+	ddi_devid_t	devid;
+	md_set_desc	*sd;
+	md_error_t	ep = mdnullerror;
+	md_timeval32_t	inittime;
+
+	/*
+	 * No dummy master blocks are written for a MN diskset since devids
+	 * are not supported in MN disksets.
+	 */
+	if (! metaislocalset(sp)) {
+		if ((sd = metaget_setdesc(sp, &ep)) == NULL)
+			return;
+
+		if (MD_MNSET_DESC(sd))
+			return;
+	}
+
+	if ((mb = Zalloc(DEV_BSIZE)) == NULL)
+		return;
+
+	mb->mb_magic = MDDB_MAGIC_DU;
+	mb->mb_revision = MDDB_REV_MB;
+	mb->mb_setno = sp->setno;
+	inittime = meta_get_lb_inittime(sp, &ep);
+	mb->mb_setcreatetime = inittime;
+
+	if (meta_gettimeofday(&tp) != -1)
+		mb->mb_timestamp = tp;
+
+	/*
+	 * We try to save the disks device ID into the remaining bytes in
+	 * the master block.  This allows the disk image to be self-identifying
+	 * if it gets copied (e.g. SNDR, True Copy, etc.).  This is used
+	 * when we try to import these disks on the remote copied image.
+	 * If we cannot save the disks device ID onto the master block that is
+	 * ok.  The disk is just not self-identifying and won't be importable
+	 * in the remote copy scenario.
+	 */
+	if (devid_get(fd, &devid) == 0) {
+		int len;
+
+		len = devid_sizeof(devid);
+		if (len <= DEV_BSIZE - sizeof (*mb)) {
+			/* there is enough space to store the devid */
+			mb->mb_devid_magic = MDDB_MAGIC_DE;
+			mb->mb_devid_len = len;
+			(void) memcpy(mb->mb_devid, (char *)devid, len);
+		}
+		devid_free(devid);
+	}
+
+	crcgen((uchar_t *)mb, (uint_t *)&mb->mb_checksum, (uint_t)DEV_BSIZE,
+	    (crc_skip_t *)NULL);
+
+	/*
+	 * If any of these operations fail, we need to inform the
+	 * user that the disk won't be self identifying. When support
+	 * for importing remotely replicated disksets is added, we
+	 * want to add the error messages here.
+	 */
+	if (lseek(fd, (off_t)(firstblk * DEV_BSIZE), SEEK_SET) < 0)
+		goto out;
+
+	if (write(fd, mb, DEV_BSIZE) != DEV_BSIZE)
+		goto out;
+
+	if (lseek(fd, (off_t)(firstblk * DEV_BSIZE), SEEK_SET) < 0)
+		goto out;
+
+	if (read(fd, mb, DEV_BSIZE) != DEV_BSIZE)
+		goto out;
+
+	if (crcchk((uchar_t *)mb, (uint_t *)&mb->mb_checksum,
+	    (uint_t)DEV_BSIZE, (crc_skip_t *)NULL))
+		goto out;
+
+out:
+	Free(mb);
+}
+
+static int
+buildconf(mdsetname_t *sp, md_error_t *ep)
+{
+	md_replicalist_t	*rlp = NULL;
+	md_replicalist_t	*rl;
+	FILE			*cfp = NULL;
+	FILE			*mfp = NULL;
+	struct stat		sbuf;
+	int			rval = 0;
+	int			in_miniroot = 0;
+	char			line[MDDB_BOOTLIST_MAX_LEN];
+	char			*tname = NULL;
+
+	/* get list of local replicas */
+	if (! metaislocalset(sp))
+		return (0);
+
+	if (metareplicalist(sp, MD_BASICNAME_OK, &rlp, ep) < 0)
+		return (-1);
+
+	/* open tempfile, copy permissions of original file */
+	if ((cfp = fopen(META_DBCONFTMP, "w+")) == NULL) {
+		/*
+		 * On the miniroot tmp files must be created in /var/tmp.
+		 * If we get a EROFS error, we assume that we are in the
+		 * miniroot.
+		 */
+		if (errno != EROFS)
+			goto error;
+		in_miniroot = 1;
+		errno = 0;
+		tname = tempnam("/var/tmp", "slvm_");
+		if (tname == NULL && errno == EROFS) {
+			/*
+			 * If we are booted on a read-only root because
+			 * of mddb quorum problems we don't want to emit
+			 * any scary error messages.
+			 */
+			errno = 0;
+			goto out;
+		}
+
+		/* open tempfile, copy permissions of original file */
+		if ((cfp = fopen(tname, "w+")) == NULL)
+			goto error;
+	}
+	if (stat(META_DBCONF, &sbuf) == 0) {
+		if (fchmod(fileno(cfp), (sbuf.st_mode & 0666)) != 0)
+			goto error;
+		if (fchown(fileno(cfp), sbuf.st_uid, sbuf.st_gid) != 0)
+			goto error;
+	}
+
+	/* print header */
+	if (fprintf(cfp, "#metadevice database location file ") == EOF)
+		goto error;
+	if (fprintf(cfp, "do not hand edit\n") < 0)
+		goto error;
+	if (fprintf(cfp,
+		"#driver\tminor_t\tdaddr_t\tdevice id\tchecksum\n") < 0)
+		goto error;
+
+	/* dump replicas */
+	for (rl = rlp; (rl != NULL); rl = rl->rl_next) {
+		md_replica_t	*r = rl->rl_repp;
+		int		checksum = 42;
+		int		i;
+		char		*devidp;
+		minor_t		min;
+
+		devidp = devid_str_encode(r->r_devid, r->r_minor_name);
+		/* If devid code can't encode devidp - skip entry */
+		if (devidp == NULL) {
+			continue;
+		}
+
+		/* compute checksum */
+		for (i = 0; ((r->r_driver_name[i] != '\0') &&
+		    (i < sizeof (r->r_driver_name))); i++) {
+			checksum -= r->r_driver_name[i];
+		}
+		min = meta_getminor(r->r_namep->dev);
+		checksum -= min;
+		checksum -= r->r_blkno;
+
+		for (i = 0; i < strlen(devidp); i++) {
+			checksum -= devidp[i];
+		}
+		/* print info */
+		if (fprintf(cfp, "%s\t%lu\t%ld\t%s\t%d\n",
+		    r->r_driver_name, min, r->r_blkno, devidp, checksum) < 0) {
+			goto error;
+		}
+
+		devid_str_free(devidp);
+	}
+
+	/* close and rename to real file */
+	if (fflush(cfp) != 0)
+		goto error;
+	if (fsync(fileno(cfp)) != 0)
+		goto error;
+	if (fclose(cfp) != 0) {
+		cfp = NULL;
+		goto error;
+	}
+	cfp = NULL;
+
+	/*
+	 * Renames don't work in the miniroot since tmpfiles are
+	 * created in /var/tmp. Hence we copy the data out.
+	 */
+
+	if (! in_miniroot) {
+		if (rename(META_DBCONFTMP, META_DBCONF) != 0)
+			goto error;
+	} else {
+		if ((cfp = fopen(tname, "r")) == NULL)
+			goto error;
+		if ((mfp = fopen(META_DBCONF, "w+")) == NULL)
+			goto error;
+		while (fgets(line, MDDB_BOOTLIST_MAX_LEN, cfp) != NULL) {
+			if (fputs(line, mfp) == NULL)
+				goto error;
+		}
+		(void) fclose(cfp);
+		cfp = NULL;
+		if (fflush(mfp) != 0)
+			goto error;
+		if (fsync(fileno(mfp)) != 0)
+			goto error;
+		if (fclose(mfp) != 0) {
+			mfp = NULL;
+			goto error;
+		}
+		/* delete the tempfile */
+		(void) unlink(tname);
+	}
+	/* success */
+	rval = 0;
+	goto out;
+
+	/* tempfile error */
+error:
+	rval = (in_miniroot) ? mdsyserror(ep, errno, tname):
+				mdsyserror(ep, errno, META_DBCONFTMP);
+
+
+	/* cleanup, return success */
+out:
+	if (rlp != NULL)
+		metafreereplicalist(rlp);
+	if ((cfp != NULL) && (fclose(cfp) != 0) && (rval == 0)) {
+		rval = (in_miniroot) ? mdsyserror(ep, errno, tname):
+					mdsyserror(ep, errno, META_DBCONFTMP);
+	}
+	free(tname);
+	return (rval);
+}
+
+/*
+ * check replica for dev
+ */
+static int
+in_replica(
+	mdsetname_t	*sp,
+	md_replica_t	*rp,
+	mdname_t	*np,
+	diskaddr_t	slblk,
+	diskaddr_t	nblks,
+	md_error_t	*ep
+)
+{
+	mdname_t	*repnp = rp->r_namep;
+	diskaddr_t	rep_sblk = rp->r_blkno;
+	diskaddr_t	rep_nblks = rp->r_nblk;
+
+	/* should be in the same set */
+	assert(sp != NULL);
+
+	/* if error in master block, assume whole partition */
+	if ((rep_sblk == MD_DISKADDR_ERROR) ||
+	    (rep_nblks == MD_DISKADDR_ERROR)) {
+		rep_sblk = 0;
+		rep_nblks = MD_DISKADDR_ERROR;
+	}
+
+	/* check overlap */
+	if (meta_check_overlap(
+	    MDB_STR, np, slblk, nblks, repnp, rep_sblk, rep_nblks, ep) != 0) {
+		return (-1);
+	}
+
+	/* return success */
+	return (0);
+}
+
+/*
+ * check to see if we're in a replica
+ */
+int
+meta_check_inreplica(
+	mdsetname_t		*sp,
+	mdname_t		*np,
+	diskaddr_t		slblk,
+	diskaddr_t		nblks,
+	md_error_t		*ep
+)
+{
+	md_replicalist_t	*rlp = NULL;
+	md_replicalist_t	*rl;
+	int			rval = 0;
+
+	/* should have a set */
+	assert(sp != NULL);
+
+	/* for each replica */
+	if (metareplicalist(sp, MD_BASICNAME_OK, &rlp, ep) < 0)
+		return (-1);
+	for (rl = rlp; (rl != NULL); rl = rl->rl_next) {
+		md_replica_t	*rp = rl->rl_repp;
+
+		/* check replica */
+		if (in_replica(sp, rp, np, slblk, nblks, ep) != 0) {
+			rval = -1;
+			break;
+		}
+	}
+
+	/* cleanup, return success */
+	metafreereplicalist(rlp);
+	return (rval);
+}
+
+/*
+ * check replica
+ */
+int
+meta_check_replica(
+	mdsetname_t	*sp,		/* set to check against */
+	mdname_t	*np,		/* component to check against */
+	mdchkopts_t	options,	/* option flags */
+	diskaddr_t	slblk,		/* start logical block */
+	diskaddr_t	nblks,		/* number of blocks (-1,rest of them) */
+	md_error_t	*ep		/* error packet */
+)
+{
+	mdchkopts_t	chkoptions = MDCHK_ALLOW_REPSLICE;
+
+	/* make sure we have a disk */
+	if (metachkcomp(np, ep) != 0)
+		return (-1);
+
+	/* check to ensure that it is not already in use */
+	if (meta_check_inuse(sp, np, MDCHK_INUSE, ep) != 0) {
+		return (-1);
+	}
+
+	if (options & MDCHK_ALLOW_NODBS)
+		return (0);
+
+	if (options & MDCHK_DRVINSET)
+		return (0);
+
+	/* make sure it is in the set */
+	if (meta_check_inset(sp, np, ep) != 0)
+		return (-1);
+
+	/* make sure its not in a metadevice */
+	if (meta_check_inmeta(sp, np, chkoptions, slblk, nblks, ep) != 0)
+		return (-1);
+
+	/* return success */
+	return (0);
+}
+
+static int
+update_dbinfo_on_drives(
+	mdsetname_t	*sp,
+	md_drive_desc	*dd,
+	int		set_locked,
+	int		force,
+	md_error_t	*ep
+)
+{
+	md_set_desc		*sd;
+	int			i;
+	md_setkey_t		*cl_sk;
+	int			rval = 0;
+	md_mnnode_desc		*nd;
+
+	if ((sd = metaget_setdesc(sp, ep)) == NULL)
+		return (-1);
+
+	if (! set_locked) {
+		if (MD_MNSET_DESC(sd)) {
+			md_error_t xep = mdnullerror;
+			sigset_t sigs;
+			/* Make sure we are blocking all signals */
+			if (procsigs(TRUE, &sigs, &xep) < 0)
+				mdclrerror(&xep);
+
+			nd = sd->sd_nodelist;
+			while (nd) {
+				if (force && strcmp(nd->nd_nodename,
+				    mynode()) != 0) {
+					nd = nd->nd_next;
+					continue;
+				}
+
+				if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+					nd = nd->nd_next;
+					continue;
+				}
+
+				if (clnt_lock_set(nd->nd_nodename, sp, ep))
+					return (-1);
+				nd = nd->nd_next;
+			}
+		} else {
+			for (i = 0; i < MD_MAXSIDES; i++) {
+				/* Skip empty slots */
+				if (sd->sd_nodes[i][0] == '\0')
+					continue;
+
+				if (force && strcmp(sd->sd_nodes[i],
+				    mynode()) != 0)
+					continue;
+
+				if (clnt_lock_set(sd->sd_nodes[i], sp, ep))
+					return (-1);
+			}
+		}
+	}
+
+	if (MD_MNSET_DESC(sd)) {
+		nd = sd->sd_nodelist;
+		while (nd) {
+			if (force && strcmp(nd->nd_nodename, mynode()) != 0) {
+				nd = nd->nd_next;
+				continue;
+			}
+
+			if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+				nd = nd->nd_next;
+				continue;
+			}
+
+			if (clnt_upd_dr_dbinfo(nd->nd_nodename, sp, dd, ep)
+			    == -1) {
+				rval = -1;
+				break;
+			}
+			nd = nd->nd_next;
+		}
+	} else {
+		for (i = 0; i < MD_MAXSIDES; i++) {
+			/* Skip empty slots */
+			if (sd->sd_nodes[i][0] == '\0')
+				continue;
+
+			if (force && strcmp(sd->sd_nodes[i], mynode()) != 0)
+				continue;
+
+			if (clnt_upd_dr_dbinfo(sd->sd_nodes[i], sp, dd, ep)
+			    == -1) {
+				rval = -1;
+				break;
+			}
+		}
+	}
+
+	if (! set_locked) {
+		cl_sk = cl_get_setkey(sp->setno, sp->setname);
+		if (MD_MNSET_DESC(sd)) {
+			nd = sd->sd_nodelist;
+			while (nd) {
+				if (force &&
+				    strcmp(nd->nd_nodename, mynode()) != 0) {
+					nd = nd->nd_next;
+					continue;
+				}
+
+				if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+					nd = nd->nd_next;
+					continue;
+				}
+
+				if (clnt_unlock_set(nd->nd_nodename, cl_sk,
+				    ep)) {
+					rval = -1;
+					break;
+				}
+				nd = nd->nd_next;
+			}
+		} else {
+			for (i = 0; i < MD_MAXSIDES; i++) {
+				/* Skip empty slots */
+				if (sd->sd_nodes[i][0] == '\0')
+					continue;
+
+				if (force &&
+				    strcmp(sd->sd_nodes[i], mynode()) != 0)
+					continue;
+
+				if (clnt_unlock_set(sd->sd_nodes[i], cl_sk,
+				    ep)) {
+					rval = -1;
+					break;
+				}
+			}
+
+		}
+		cl_set_setkey(NULL);
+	}
+
+	return (rval);
+}
+
+int
+meta_db_addsidenms(
+	mdsetname_t	*sp,
+	mdname_t	*np,
+	daddr_t		blkno,
+	int		bcast,
+	md_error_t	*ep
+)
+{
+	side_t		sideno;
+	char		*bname = NULL;
+	char		*dname = NULL;
+	minor_t		mnum;
+	mddb_config_t	c;
+	int		done;
+	int		rval = 0;
+	md_set_desc	*sd;
+
+	sideno = MD_SIDEWILD;
+	/*CONSTCOND*/
+	while (1) {
+		if (bname != NULL) {
+			Free(bname);
+			bname = NULL;
+		}
+		if (dname != NULL) {
+			Free(dname);
+			dname = NULL;
+		}
+		if ((done = meta_getnextside_devinfo(sp, np->bname,
+		    &sideno, &bname, &dname, &mnum, ep)) == -1) {
+			rval = -1;
+			break;
+		}
+
+		if (done == 0)
+			break;
+
+		if (! metaislocalset(sp)) {
+			if ((sd = metaget_setdesc(sp, ep)) == NULL) {
+				rval = -1;
+				break;
+			}
+		}
+
+		/*
+		 * Send addsidenms to all nodes using rpc.mdcommd if
+		 * sidename is being added to MN diskset.
+		 *
+		 *   It's ok to broadcast this call to other nodes.
+		 *
+		 *   Note: The broadcast to other nodes isn't needed during
+		 *   the addition of the first mddbs to the set since the
+		 *   other nodes haven't been joined to the set yet.  All
+		 *   nodes in a MN diskset are (implicitly) joined to the set
+		 *   on the addition of the first mddb.
+		 */
+		if ((! metaislocalset(sp)) && MD_MNSET_DESC(sd) &&
+		    (bcast == DB_ADDSIDENMS_BCAST)) {
+			md_mn_result_t			*resultp = NULL;
+			md_mn_msg_meta_db_newside_t	db_ns;
+			int				send_rval;
+
+			db_ns.msg_l_dev = np->dev;
+			db_ns.msg_sideno = sideno;
+			db_ns.msg_blkno = blkno;
+			(void) strncpy(db_ns.msg_dname, dname,
+			    sizeof (db_ns.msg_dname));
+			(void) splitname(np->bname, &db_ns.msg_splitname);
+			db_ns.msg_mnum = mnum;
+
+			/* Set devid to NULL until devids are supported */
+			db_ns.msg_devid[0] = NULL;
+
+			/*
+			 * If reconfig cycle has been started, this node is
+			 * stuck in in the return step until this command has
+			 * completed.  If mdcommd is suspended, ask
+			 * send_message to fail (instead of retrying)
+			 * so that metaset can finish allowing the reconfig
+			 * cycle to proceed.
+			 */
+			send_rval = mdmn_send_message(sp->setno,
+			    MD_MN_MSG_META_DB_NEWSIDE, MD_MSGF_FAIL_ON_SUSPEND |
+			    MD_MSGF_PANIC_WHEN_INCONSISTENT, (char *)&db_ns,
+			    sizeof (md_mn_msg_meta_db_newside_t),
+			    &resultp, ep);
+			if (send_rval != 0) {
+				rval = -1;
+				if (resultp == NULL)
+					(void) mddserror(ep,
+					    MDE_DS_COMMD_SEND_FAIL,
+					    sp->setno, NULL, NULL,
+					    sp->setname);
+				else {
+					(void) mdstealerror(ep,
+					    &(resultp->mmr_ep));
+					if (mdisok(ep)) {
+						(void) mddserror(ep,
+						    MDE_DS_COMMD_SEND_FAIL,
+						    sp->setno, NULL, NULL,
+						    sp->setname);
+					}
+					free_result(resultp);
+				}
+				break;
+			}
+			if (resultp)
+				free_result(resultp);
+		} else {
+			/*
+			 * Let this side's  device name, minor # and driver name
+			 * be known to the database replica.
+			 */
+			(void) memset(&c, 0, sizeof (c));
+
+			/* Fill in device/replica info */
+			c.c_locator.l_dev = meta_cmpldev(np->dev);
+			c.c_locator.l_blkno = blkno;
+			(void) strncpy(c.c_locator.l_driver, dname,
+			    sizeof (c.c_locator.l_driver));
+			(void) splitname(bname, &c.c_devname);
+			c.c_locator.l_mnum = mnum;
+
+			/* Fill in setno, setname, and sideno */
+			c.c_setno = sp->setno;
+			(void) strncpy(c.c_setname, sp->setname,
+				sizeof (c.c_setname));
+			c.c_sideno = sideno;
+
+			/*
+			 * Don't need device id information from this ioctl
+			 * Kernel determines device id from dev_t, which
+			 * is just what this code would do.
+			 */
+			c.c_locator.l_devid = (uint64_t)0;
+			c.c_locator.l_devid_flags = 0;
+
+			if (metaioctl(MD_DB_NEWSIDE, &c, &c.c_mde, NULL) != 0) {
+				rval = mdstealerror(ep, &c.c_mde);
+				break;
+			}
+		}
+	}
+
+	/* cleanup, return success */
+	if (bname != NULL) {
+		Free(bname);
+		bname = NULL;
+	}
+	if (dname != NULL) {
+		Free(dname);
+		dname = NULL;
+	}
+	return (rval);
+}
+
+
+int
+meta_db_delsidenm(
+	mdsetname_t	*sp,
+	side_t		sideno,
+	mdname_t	*np,
+	daddr_t		blkno,
+	md_error_t	*ep
+)
+{
+	mddb_config_t	c;
+	md_set_desc	*sd;
+
+	if (! metaislocalset(sp)) {
+		if ((sd = metaget_setdesc(sp, ep)) == NULL)
+			return (-1);
+	}
+	/* Use rpc.mdcommd to delete mddb side from all nodes */
+	if ((! metaislocalset(sp)) && MD_MNSET_DESC(sd) &&
+	    (sd->sd_mn_mynode->nd_flags & MD_MN_NODE_OWN)) {
+		md_mn_result_t			*resultp = NULL;
+		md_mn_msg_meta_db_delside_t	db_ds;
+		int				send_rval;
+
+		db_ds.msg_l_dev = np->dev;
+		db_ds.msg_blkno = blkno;
+		db_ds.msg_sideno = sideno;
+
+		/* Set devid to NULL until devids are supported */
+		db_ds.msg_devid[0] = NULL;
+
+		/*
+		 * If reconfig cycle has been started, this node is
+		 * stuck in in the return step until this command has
+		 * completed.  If mdcommd is suspended, ask
+		 * send_message to fail (instead of retrying)
+		 * so that metaset can finish allowing the reconfig
+		 * cycle to proceed.
+		 */
+		send_rval = mdmn_send_message(sp->setno,
+		    MD_MN_MSG_META_DB_DELSIDE, MD_MSGF_FAIL_ON_SUSPEND |
+		    MD_MSGF_PANIC_WHEN_INCONSISTENT, (char *)&db_ds,
+		    sizeof (md_mn_msg_meta_db_delside_t), &resultp, ep);
+		if (send_rval != 0) {
+			if (resultp == NULL)
+				(void) mddserror(ep,
+				    MDE_DS_COMMD_SEND_FAIL,
+				    sp->setno, NULL, NULL,
+				    sp->setname);
+			else {
+				(void) mdstealerror(ep, &(resultp->mmr_ep));
+				if (mdisok(ep)) {
+					(void) mddserror(ep,
+					    MDE_DS_COMMD_SEND_FAIL,
+					    sp->setno, NULL, NULL,
+					    sp->setname);
+				}
+				free_result(resultp);
+			}
+			return (-1);
+		}
+		if (resultp)
+			free_result(resultp);
+
+	} else {
+		/*
+		 * Let this side's  device name, minor # and driver name
+		 * be known to the database replica.
+		 */
+		(void) memset(&c, 0, sizeof (c));
+
+		/* Fill in device/replica info */
+		c.c_locator.l_dev = meta_cmpldev(np->dev);
+		c.c_locator.l_blkno = blkno;
+
+		/* Fill in setno, setname, and sideno */
+		c.c_setno = sp->setno;
+		(void) strcpy(c.c_setname, sp->setname);
+		c.c_sideno = sideno;
+
+		/*
+		 * Don't need device id information from this ioctl
+		 * Kernel determines device id from dev_t, which
+		 * is just what this code would do.
+		 */
+		c.c_locator.l_devid = (uint64_t)0;
+		c.c_locator.l_devid_flags = 0;
+
+		if (metaioctl(MD_DB_DELSIDE, &c, &c.c_mde, NULL) != 0)
+			return (mdstealerror(ep, &c.c_mde));
+	}
+	return (0);
+}
+
+
+static int
+mdnamesareunique(mdnamelist_t *nlp, md_error_t *ep)
+{
+	mdnamelist_t		*dnp1, *dnp2;
+
+	for (dnp1 = nlp; dnp1 != NULL; dnp1 = dnp1->next) {
+		for (dnp2 = dnp1->next; dnp2 != NULL; dnp2 = dnp2->next) {
+			if (strcmp(dnp1->namep->cname, dnp2->namep->cname) == 0)
+				return (mderror(ep, MDE_DUPDRIVE,
+				    dnp1->namep->cname));
+		}
+	}
+	return (0);
+}
+
+
+/*
+ * Return 1 if files are different, else return 0
+ */
+static int
+filediff(char *tsname, char *sname)
+{
+	int ret = 1, fd;
+	size_t tsz, sz;
+	struct stat sbuf;
+	char *tbuf, *buf;
+
+	if (stat(tsname, &sbuf) != 0)
+		return (1);
+	tsz = sbuf.st_size;
+	if (stat(sname, &sbuf) != 0)
+		return (1);
+	sz = sbuf.st_size;
+	if (tsz != sz)
+		return (1);
+
+	/* allocate memory and read both files into buffer */
+	tbuf = malloc(tsz);
+	buf = malloc(sz);
+	if (tbuf == NULL || buf == NULL)
+		goto out;
+
+	fd = open(tsname, O_RDONLY);
+	if (fd == -1)
+		goto out;
+	sz = read(fd, tbuf, tsz);
+	(void) close(fd);
+	if (sz != tsz)
+		goto out;
+
+	fd = open(sname, O_RDONLY);
+	if (fd == -1)
+		goto out;
+	sz = read(fd, buf, tsz);
+	(void) close(fd);
+	if (sz != tsz)
+		goto out;
+
+	/* compare content */
+	ret = bcmp(tbuf, buf, tsz);
+out:
+	if (tbuf)
+		free(tbuf);
+	if (buf)
+		free(buf);
+	return (ret);
+}
+
+/*
+ * patch md.conf file with mddb locations
+ */
+int
+meta_db_patch(
+	char		*sname,		/* system file name */
+	char		*cname,		/* mddb.cf file name */
+	int		patch,		/* patching locally */
+	md_error_t	*ep
+)
+{
+	char		*tsname = NULL;
+	char		line[MDDB_BOOTLIST_MAX_LEN];
+	FILE		*tsfp = NULL;
+	FILE		*mfp = NULL;
+	int		rval = -1;
+
+	/* check names */
+	if (sname == NULL) {
+		if (patch)
+			sname = "md.conf";
+		else
+			sname = "/kernel/drv/md.conf";
+	}
+	if (cname == NULL)
+		cname = META_DBCONF;
+
+	/*
+	 * edit file
+	 */
+	if (meta_systemfile_copy(sname, 0, 1, 1, 0, &tsname, &tsfp, ep) != 0) {
+		if (mdissyserror(ep, EROFS)) {
+			/*
+			 * If we are booted on a read-only root because
+			 * of mddb quorum problems we don't want to emit
+			 * any scary error messages.
+			 */
+			mdclrerror(ep);
+			rval = 0;
+		}
+		goto out;
+	}
+
+	if (meta_systemfile_append_mddb(cname, sname, tsname, tsfp, 1, 0,
+	    ep) != 0)
+		goto out;
+
+	/* if file content is identical, skip rename */
+	if (filediff(tsname, sname) == 0) {
+		rval = 0;
+		goto out;
+	}
+
+	if ((fflush(tsfp) != 0) || (fsync(fileno(tsfp)) != 0) ||
+					    (fclose(tsfp) != 0)) {
+		(void) mdsyserror(ep, errno, tsname);
+		goto out;
+	}
+
+	tsfp = NULL;
+
+	/*
+	 * rename file. If we get a Cross Device error then it
+	 * is because we are in the miniroot.
+	 */
+	if (rename(tsname, sname) != 0 && errno != EXDEV) {
+		(void) mdsyserror(ep, errno, sname);
+		goto out;
+	}
+
+	if (errno == EXDEV) {
+		if ((tsfp = fopen(tsname, "r")) == NULL)
+			goto out;
+		if ((mfp = fopen(sname, "w+")) == NULL)
+			goto out;
+		while (fgets(line, sizeof (line), tsfp) != NULL) {
+			if (fputs(line, mfp) == NULL)
+				goto out;
+		}
+		(void) fclose(tsfp);
+		tsfp = NULL;
+		if (fflush(mfp) != 0)
+			goto out;
+		if (fsync(fileno(mfp)) != 0)
+			goto out;
+		if (fclose(mfp) != 0) {
+			mfp = NULL;
+			goto out;
+		}
+	}
+
+	Free(tsname);
+	tsname = NULL;
+	rval = 0;
+
+	/* cleanup, return error */
+out:
+	if (tsfp != NULL)
+		(void) fclose(tsfp);
+	if (tsname != NULL) {
+		(void) unlink(tsname);
+		Free(tsname);
+	}
+	return (rval);
+}
+
+/*
+ * Add replicas to set.  This happens as a result of:
+ *	- metadb [-s set_name] -a
+ *	- metaset -s set_name -a disk
+ *	- metaset -s set_name -d disk	 (causes a rebalance of mddbs)
+ *	- metaset -s set_name -b
+ *
+ * For a local set, this routine is run on the local set host.
+ *
+ * For a traditional diskset, this routine is run on the node that
+ * is running the metaset command.
+ *
+ * For a multinode diskset, this routine is run by the node that is
+ * running the metaset command.  If this is the first mddb added to
+ * the MN diskset, then no communication is made to other nodes via commd
+ * since the other nodes will be in-sync with respect to the mddbs when
+ * those other nodes join the set and snarf in the newly created mddb.
+ * If this is not the first mddb added to the MN diskset, then this
+ * attach command is sent to all of the nodes using commd.  This keeps
+ * the nodes in-sync.
+ */
+int
+meta_db_attach(
+	mdsetname_t		*sp,
+	mdnamelist_t		*db_nlp,
+	mdchkopts_t		options,
+	md_timeval32_t		*timeval,
+	int			dbcnt,
+	int			dbsize,
+	char			*sysfilename,
+	md_error_t		*ep
+)
+{
+	struct mddb_config	c;
+	mdnamelist_t		*nlp;
+	mdname_t		*np;
+	md_drive_desc		*dd = NULL;
+	md_drive_desc		*p;
+	int			i;
+	int			fd;
+	side_t			sideno;
+	daddr_t			blkno;
+	int			replicacount = 0;
+	int			start_mdmonitord = 0;
+	int			rval = 0;
+	md_error_t		status = mdnullerror;
+	md_set_desc		*sd;
+	int			stale_bool = FALSE;
+	int			flags;
+	int			firstmddb = 1;
+	md_timeval32_t		inittime = {0, 0};
+
+	/*
+	 * Error if we don't get some work to do.
+	 */
+	if (db_nlp == NULL)
+		return (mdsyserror(ep, EINVAL, NULL));
+
+	if (mdnamesareunique(db_nlp, ep) != 0)
+		return (-1);
+	(void) memset(&c, 0, sizeof (c));
+	c.c_id = 0;
+	c.c_setno = sp->setno;
+
+	/* Don't need device id information from this ioctl */
+	c.c_locator.l_devid = (uint64_t)0;
+	c.c_locator.l_devid_flags = 0;
+	if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0) {
+		if (metaislocalset(sp)) {
+			if (mdismddberror(&c.c_mde, MDE_DB_INVALID))
+				mdclrerror(&c.c_mde);
+			else if (! mdismddberror(&c.c_mde, MDE_DB_NODB) ||
+			    (! (options & MDCHK_ALLOW_NODBS)))
+				return (mdstealerror(ep, &c.c_mde));
+		} else {
+			if (! mdismddberror(&c.c_mde, MDE_DB_NOTOWNER))
+				return (mdstealerror(ep, &c.c_mde));
+		}
+		mdclrerror(&c.c_mde);
+	}
+	/*
+	 * Is current set STALE?
+	 */
+	if (c.c_flags & MDDB_C_STALE) {
+		stale_bool = TRUE;
+	}
+
+	assert(db_nlp != NULL);
+
+	/* if creating the metadbs for the first time start mdmonitord */
+	if (c.c_dbcnt == 0)
+		start_mdmonitord = 1;
+
+	/*
+	 * check to see if we will go over the total possible number
+	 * of data bases
+	 */
+	nlp = db_nlp;
+	while (nlp) {
+		replicacount += dbcnt;
+		nlp = nlp->next;
+	}
+
+	if ((replicacount + c.c_dbcnt) > c.c_dbmax)
+		return (mdmddberror(ep, MDE_TOOMANY_REPLICAS, NODEV32,
+		    sp->setno, c.c_dbcnt + replicacount, NULL));
+
+	/*
+	 * go through and check to make sure all locations specified
+	 * are legal also pick out driver name;
+	 */
+	for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) {
+		diskaddr_t devsize;
+
+		np = nlp->namep;
+
+		if (! metaislocalset(sp)) {
+			uint_t	partno;
+			uint_t	rep_partno;
+			mddrivename_t	*dnp = np->drivenamep;
+
+			/*
+			 * make sure that non-local database replicas
+			 * are always on the replica slice.
+			 */
+			if (meta_replicaslice(dnp,
+			    &rep_partno, ep) != 0)
+				return (-1);
+			if (metagetvtoc(np, FALSE, &partno, ep) == NULL)
+				return (-1);
+			if (partno != rep_partno)
+				return (mddeverror(ep, MDE_REPCOMP_ONLY,
+				    np->dev, sp->setname));
+		}
+
+		if (meta_check_replica(sp, np, options, 0, (dbcnt * dbsize),
+		    ep)) {
+			return (-1);
+		}
+
+		if ((devsize = metagetsize(np, ep)) == -1)
+			return (-1);
+
+		if (devsize < (diskaddr_t)((dbcnt * dbsize) + 16))
+			return (mdmddberror(ep, MDE_REPLICA_TOOSMALL,
+			    meta_getminor(np->dev), sp->setno, devsize,
+			    np->cname));
+	}
+
+	/*
+	 * If first disk in set we don't have lb_inittime yet for use as
+	 * mb_setcreatetime so don't go looking for it. WE'll come back
+	 * later and update after the locator block has been created.
+	 * If this isn't the first disk in the set, we have a locator
+	 * block and thus we have lb_inittime. Set mb_setcreatetime to
+	 * lb_inittime.
+	 */
+	if (! metaislocalset(sp)) {
+		if (c.c_dbcnt != 0) {
+			firstmddb = 0;
+			inittime = meta_get_lb_inittime(sp, ep);
+		}
+	}
+
+	/*
+	 * go through and write all master blocks
+	 */
+
+	for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) {
+		np = nlp->namep;
+
+		if ((fd = open(np->rname, O_RDWR)) < 0)
+			return (mdsyserror(ep, errno, np->rname));
+
+		for (i = 0; i < dbcnt; i++) {
+			if (mkmasterblks(sp, np, fd, (i * dbsize + 16), dbsize,
+			    inittime, ep)) {
+				(void) close(fd);
+				return (-1);
+			}
+		}
+		(void) close(fd);
+	}
+
+	if ((sideno = getmyside(sp, ep)) == MD_SIDEWILD)
+		return (-1);
+
+	if (! metaislocalset(sp)) {
+		dd = metaget_drivedesc_fromnamelist(sp, db_nlp, ep);
+		if (! mdisok(ep))
+			return (-1);
+		if ((sd = metaget_setdesc(sp, ep)) == NULL)
+			return (-1);
+
+	}
+
+	/*
+	 * go through and tell kernel to add them
+	 */
+	for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) {
+		mdcinfo_t	*cinfo;
+
+		np = nlp->namep;
+
+		if ((cinfo = metagetcinfo(np, ep)) == NULL) {
+			rval = -1;
+			goto out;
+		}
+
+		/*
+		 * If mddb is being added to MN diskset and there already
+		 * exists a valid mddb in the set (which equates to this
+		 * node being an owner of the set) then use rpc.mdcommd
+		 * mechanism to add mddb(s) so that all nodes stay in sync.
+		 * If set is stale, don't log the message since rpc.mdcommd
+		 * can't write the message to the mddb.
+		 *
+		 * Otherwise, just add mddb to this node.
+		 */
+		if ((! metaislocalset(sp)) && MD_MNSET_DESC(sd) &&
+		    (sd->sd_mn_mynode->nd_flags & MD_MN_NODE_OWN)) {
+			md_mn_result_t			*resultp = NULL;
+			md_mn_msg_meta_db_attach_t	attach;
+			int 				send_rval;
+
+			/*
+			 * In a scenario where new replicas had been added on
+			 * the master, and then all of the old replicas failed
+			 * before the slaves had knowledge of the new replicas,
+			 * the slaves are unable to re-parse in the mddb
+			 * from the new replicas since the slaves have no
+			 * knowledge of the new replicas.  The following
+			 * algorithm solves this problem:
+			 * 	- META_DB_ATTACH message generates submsgs
+			 * 		- BLOCK parse (master)
+			 * 		- MDDB_ATTACH new replicas
+			 * 		- UNBLOCK parse (master) causing parse
+			 *		information to be sent from master
+			 *		to slaves at a higher class than the
+			 *		unblock so the parse message will
+			 *		reach slaves before unblock message.
+			 */
+			attach.msg_l_dev = np->dev;
+			attach.msg_cnt = dbcnt;
+			attach.msg_dbsize = dbsize;
+			(void) strncpy(attach.msg_dname, cinfo->dname,
+			    sizeof (attach.msg_dname));
+			(void) splitname(np->bname, &attach.msg_splitname);
+			attach.msg_options = options;
+
+			/* Set devid to NULL until devids are supported */
+			attach.msg_devid[0] = NULL;
+
+			/*
+			 * If reconfig cycle has been started, this node is
+			 * stuck in in the return step until this command has
+			 * completed.  If mdcommd is suspended, ask
+			 * send_message to fail (instead of retrying)
+			 * so that metaset can finish allowing the reconfig
+			 * cycle to proceed.
+			 */
+			flags = MD_MSGF_FAIL_ON_SUSPEND;
+			if (stale_bool == TRUE)
+				flags |= MD_MSGF_NO_LOG;
+			send_rval = mdmn_send_message(sp->setno,
+				MD_MN_MSG_META_DB_ATTACH,
+				flags, (char *)&attach,
+				sizeof (md_mn_msg_meta_db_attach_t),
+				&resultp, ep);
+			if (send_rval != 0) {
+				rval = -1;
+				if (resultp == NULL)
+					(void) mddserror(ep,
+					    MDE_DS_COMMD_SEND_FAIL,
+					    sp->setno, NULL, NULL,
+					    sp->setname);
+				else {
+					(void) mdstealerror(ep,
+					    &(resultp->mmr_ep));
+					if (mdisok(ep)) {
+						(void) mddserror(ep,
+						    MDE_DS_COMMD_SEND_FAIL,
+						    sp->setno, NULL, NULL,
+						    sp->setname);
+					}
+					free_result(resultp);
+				}
+				goto out;
+			}
+			if (resultp)
+				free_result(resultp);
+		} else {
+		    /* Adding mddb(s) to just this node */
+		    for (i = 0; i < dbcnt; i++) {
+			(void) memset(&c, 0, sizeof (c));
+			/* Fill in device/replica info */
+			c.c_locator.l_dev = meta_cmpldev(np->dev);
+			c.c_locator.l_blkno = i * dbsize + 16;
+			blkno = c.c_locator.l_blkno;
+			(void) strncpy(c.c_locator.l_driver, cinfo->dname,
+			    sizeof (c.c_locator.l_driver));
+			(void) splitname(np->bname, &c.c_devname);
+			c.c_locator.l_mnum = meta_getminor(np->dev);
+
+			/* Fill in setno, setname, and sideno */
+			c.c_setno = sp->setno;
+			if (! metaislocalset(sp)) {
+				if (MD_MNSET_DESC(sd)) {
+					c.c_multi_node = 1;
+				}
+			}
+			(void) strcpy(c.c_setname, sp->setname);
+			c.c_sideno = sideno;
+
+			/*
+			 * Don't need device id information from this ioctl
+			 * Kernel determines device id from dev_t, which
+			 * is just what this code would do.
+			 */
+			c.c_locator.l_devid = (uint64_t)0;
+			c.c_locator.l_devid_flags = 0;
+
+			if (timeval != NULL)
+				c.c_timestamp = *timeval;
+
+			if (setup_med_cfg(sp, &c, (options & MDCHK_SET_FORCE),
+			    ep)) {
+				rval = -1;
+				goto out;
+			}
+
+			if (metaioctl(MD_DB_NEWDEV, &c, &c.c_mde, NULL) != 0) {
+				rval = mdstealerror(ep, &c.c_mde);
+				goto out;
+			}
+			/*
+			 * This is either a traditional diskset OR this
+			 * is the first replica added to a MN diskset.
+			 * In either case, set broadcast to NO_BCAST so
+			 * that message won't go through rpc.mdcommd.
+			 * If this is a traditional diskset, the bcast
+			 * flag is ignored since traditional disksets
+			 * don't use the rpc.mdcommd.
+			 */
+			if (meta_db_addsidenms(sp, np, blkno,
+			    DB_ADDSIDENMS_NO_BCAST, ep))
+				goto out;
+		    }
+		}
+		if (! metaislocalset(sp)) {
+			/* update the dbcnt and size in dd */
+			for (p = dd; p != NULL; p = p->dd_next)
+				if (p->dd_dnp == np->drivenamep) {
+					p->dd_dbcnt = dbcnt;
+					p->dd_dbsize  = dbsize;
+					break;
+				}
+		}
+
+		/*
+		 * If this was the first addition of disks to the
+		 * diskset you now need to update the mb_setcreatetime
+		 * which needed lb_inittime which wasn't there until now.
+		 */
+		if (firstmddb) {
+			if (meta_update_mb(sp, dd, ep) != 0) {
+				return (-1);
+			}
+		}
+		(void) close(fd);
+	}
+
+out:
+	if (metaislocalset(sp)) {
+
+		/* everything looks fine. Start mdmonitord */
+		/* Note: popen/pclose is the MT-safe replacement for system */
+		if (rval == 0 && start_mdmonitord  == 1) {
+			if (pclose(popen(MDMONITORD, "w")) == -1)
+				md_perror(MDMONITORD);
+
+			if (meta_smf_enable(META_SMF_CORE, &status) == -1) {
+				mde_perror(&status, "");
+				mdclrerror(&status);
+			}
+		}
+
+		if (buildconf(sp, &status)) {
+			/* Don't mask any previous errors */
+			if (rval == 0)
+				rval = mdstealerror(ep, &status);
+			return (rval);
+		}
+
+		if (meta_db_patch(sysfilename, NULL, 0, &status)) {
+			/* Don't mask any previous errors */
+			if (rval == 0)
+				rval = mdstealerror(ep, &status);
+		}
+	} else {
+		if (update_dbinfo_on_drives(sp, dd,
+		    (options & MDCHK_SET_LOCKED),
+		    (options & MDCHK_SET_FORCE),
+		    &status)) {
+			/* Don't mask any previous errors */
+			if (rval == 0)
+				rval = mdstealerror(ep, &status);
+			else
+				mdclrerror(&status);
+		}
+		metafreedrivedesc(&dd);
+	}
+	/*
+	 * For MN disksets that already had already had nodes joined
+	 * before the attach of this mddb(s), the name invalidation is
+	 * done by the commd handler routine.  Otherwise, if this
+	 * is the first attach of a MN diskset mddb, the invalidation
+	 * must be done here since the first attach cannot be sent
+	 * via the commd since there are no nodes joined to the set yet.
+	 */
+	if ((metaislocalset(sp)) || (!MD_MNSET_DESC(sd)) ||
+	    (MD_MNSET_DESC(sd) &&
+	    (!(sd->sd_mn_mynode->nd_flags & MD_MN_NODE_OWN)))) {
+		for (nlp = db_nlp; (nlp != NULL); nlp = nlp->next) {
+			meta_invalidate_name(nlp->namep);
+		}
+	}
+	return (rval);
+}
+
+/*
+ * deletelist_length
+ *
+ *	return the number of slices that have been specified for deletion
+ *	on the metadb command line.  This does not calculate the number
+ *	of replicas because there may be multiple replicas per slice.
+ */
+static int
+deletelist_length(mdnamelist_t *db_nlp)
+{
+
+	mdnamelist_t		*nlp;
+	int			list_length = 0;
+
+	for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) {
+		list_length++;
+	}
+
+	return (list_length);
+}
+
+static int
+in_deletelist(char *devname, mdnamelist_t *db_nlp)
+{
+
+	mdnamelist_t		*nlp;
+	mdname_t		*np;
+	int			index = 0;
+
+	for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) {
+		np = nlp->namep;
+
+		if (strcmp(devname, np->bname) == 0)
+			return (index);
+		index++;
+	}
+
+	return (-1);
+}
+
+/*
+ * Delete replicas from set.  This happens as a result of:
+ *	- metadb [-s set_name] -d
+ *	- metaset -s set_name -a disk	(causes a rebalance of mddbs)
+ *	- metaset -s set_name -d disk
+ *	- metaset -s set_name -b
+ *
+ * For a local set, this routine is run on the local set host.
+ *
+ * For a traditional diskset, this routine is run on the node that
+ * is running the metaset command.
+ *
+ * For a multinode diskset, this routine is run by the node that is
+ * running the metaset command.  This detach routine is sent to all
+ * of the joined nodes in the diskset using commd.  This keeps
+ * the nodes in-sync.
+ */
+int
+meta_db_detach(
+	mdsetname_t		*sp,
+	mdnamelist_t		*db_nlp,
+	mdforceopts_t		force_option,
+	char			*sysfilename,
+	md_error_t		*ep
+)
+{
+	struct mddb_config	c;
+	mdnamelist_t		*nlp;
+	mdname_t		*np;
+	md_drive_desc		*dd = NULL;
+	md_drive_desc		*p;
+	int			replicacount;
+	int			replica_delete_count;
+	int			nr_replica_slices;
+	int			i;
+	int			stop_svmdaemons = 0;
+	int			rval = 0;
+	int			index;
+	int			valid_replicas_nottodelete = 0;
+	int			invalid_replicas_nottodelete = 0;
+	int			invalid_replicas_todelete = 0;
+	int			errored = 0;
+	int			*tag_array;
+	int			fd = -1;
+	md_error_t		status = mdnullerror;
+	md_set_desc		*sd;
+	int			stale_bool = FALSE;
+	int			flags;
+
+	/*
+	 * Error if we don't get some work to do.
+	 */
+	if (db_nlp == NULL)
+		return (mdsyserror(ep, EINVAL, NULL));
+
+	if (mdnamesareunique(db_nlp, ep) != 0)
+		return (-1);
+
+	(void) memset(&c, 0, sizeof (c));
+	c.c_id = 0;
+	c.c_setno = sp->setno;
+
+	/* Don't need device id information from this ioctl */
+	c.c_locator.l_devid = (uint64_t)0;
+	c.c_locator.l_devid_flags = 0;
+
+	if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0)
+		return (mdstealerror(ep, &c.c_mde));
+
+	/*
+	 * Is current set STALE?
+	 */
+	if (c.c_flags & MDDB_C_STALE) {
+		stale_bool = TRUE;
+	}
+
+	replicacount = c.c_dbcnt;
+
+	assert(db_nlp != NULL);
+
+	/*
+	 * go through and gather how many data bases are on each
+	 * device specified.
+	 */
+
+	nr_replica_slices = deletelist_length(db_nlp);
+	tag_array = (int *)calloc(nr_replica_slices, sizeof (int));
+
+	replica_delete_count = 0;
+	for (i = 0; i < replicacount; i++) {
+		char	*devname;
+		int	found = 0;
+
+		c.c_id = i;
+
+		/* Don't need device id information from this ioctl */
+		c.c_locator.l_devid = (uint64_t)0;
+		c.c_locator.l_devid_flags = 0;
+
+		if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0)
+			return (mdstealerror(ep, &c.c_mde));
+
+		devname = splicename(&c.c_devname);
+
+		if ((index = in_deletelist(devname, db_nlp)) != -1) {
+			found = 1;
+			tag_array[index] = 1;
+			replica_delete_count++;
+		}
+
+		errored = c.c_locator.l_flags & (MDDB_F_EREAD |
+				MDDB_F_EWRITE | MDDB_F_TOOSMALL |
+				MDDB_F_EFMT | MDDB_F_EDATA |
+				MDDB_F_EMASTER);
+
+		/*
+		 * There are four combinations of "errored" and "found"
+		 * and they are used to find the number of
+		 * (a) valid/invalid replicas that are not in the delete
+		 * list and are available in the system.
+		 * (b) valid/invalid replicas that are to be deleted.
+		 */
+
+		if (errored && !found)		/* errored and !found */
+			invalid_replicas_nottodelete++;
+		else if (!found)		/* !errored and !found */
+			valid_replicas_nottodelete++;
+		else if (errored)		/* errored and found */
+			invalid_replicas_todelete++;
+		/*
+		 * else it is !errored and found. This means
+		 * valid_replicas_todelete++; But this variable will not
+		 * be used anywhere
+		 */
+
+		Free(devname);
+	}
+
+	index = 0;
+	for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) {
+		np = nlp->namep;
+		if (tag_array[index++] != 1) {
+			Free(tag_array);
+			return (mddeverror(ep, MDE_NO_DB, np->dev, np->cname));
+		}
+	}
+
+	Free(tag_array);
+
+
+	/* if all replicas are deleted stop mdmonitord */
+	if ((replicacount - replica_delete_count) == 0)
+		stop_svmdaemons = 1;
+
+	if (((replicacount - replica_delete_count) < MD_MINREPLICAS)) {
+		if (force_option & MDFORCE_NONE)
+			return (mderror(ep, MDE_NOTENOUGH_DB, sp->setname));
+		if (! metaislocalset(sp) && ! (force_option & MDFORCE_DS))
+			return (mderror(ep, MDE_DELDB_NOTALLOWED, sp->setname));
+	}
+
+	/*
+	 * The following algorithms are followed to check for deletion:
+	 * (a) If the delete list(db_nlp) has all invalid replicas and no valid
+	 * replicas, then deletion should be allowed.
+	 * (b) Deletion should be allowed only if valid replicas that are "not"
+	 * to be deleted is always greater than the invalid replicas that
+	 * are "not" to be deleted.
+	 * (c) If the user uses -f option, then deletion should be allowed.
+	 */
+
+	if ((invalid_replicas_todelete != replica_delete_count) &&
+		(invalid_replicas_nottodelete > valid_replicas_nottodelete) &&
+				(force_option != MDFORCE_LOCAL))
+		return (mderror(ep, MDE_DEL_VALIDDB_NOTALLOWED, sp->setname));
+
+	/*
+	 * go through and tell kernel to delete them
+	 */
+
+	/* Don't need device id information from this ioctl */
+	c.c_locator.l_devid = (uint64_t)0;
+	c.c_locator.l_devid_flags = 0;
+
+	if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0)
+		return (mdstealerror(ep, &c.c_mde));
+
+	if (! metaislocalset(sp)) {
+		dd = metaget_drivedesc_fromnamelist(sp, db_nlp, ep);
+		if (! mdisok(ep))
+			return (-1);
+		if ((sd = metaget_setdesc(sp, ep)) == NULL)
+			return (-1);
+	}
+
+	for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) {
+		np = nlp->namep;
+
+		/*
+		 * If mddb is being deleted from MN diskset and node is
+		 * an owner of the diskset then use rpc.mdcommd
+		 * mechanism to add mddb(s) so that all nodes stay in sync.
+		 * If set is stale, don't log the message since rpc.mdcommd
+		 * can't write the message to the mddb.
+		 *
+		 * When mddbs are first being added to set, a detach can
+		 * be called before any node has joined the diskset, so
+		 * must check to see if node is an owner of the diskset.
+		 *
+		 * Otherwise, just delete mddb from this node.
+		 */
+
+		if ((! metaislocalset(sp)) && MD_MNSET_DESC(sd) &&
+		    (sd->sd_mn_mynode->nd_flags & MD_MN_NODE_OWN)) {
+			md_mn_result_t			*resultp;
+			md_mn_msg_meta_db_detach_t	detach;
+			int				send_rval;
+
+			/*
+			 * The following algorithm is used to detach replicas.
+			 * 	- META_DB_DETACH message generates submsgs
+			 * 		- BLOCK parse (master)
+			 * 		- MDDB_DETACH replicas
+			 * 		- UNBLOCK parse (master) causing parse
+			 *		information to be sent from master
+			 *		to slaves at a higher class than the
+			 *		unblock so the parse message will
+			 *		reach slaves before unblock message.
+			 */
+			(void) splitname(np->bname, &detach.msg_splitname);
+
+			/* Set devid to NULL until devids are supported */
+			detach.msg_devid[0] = NULL;
+
+			/*
+			 * If reconfig cycle has been started, this node is
+			 * stuck in in the return step until this command has
+			 * completed.  If mdcommd is suspended, ask
+			 * send_message to fail (instead of retrying)
+			 * so that metaset can finish allowing the reconfig
+			 * cycle to proceed.
+			 */
+			flags = MD_MSGF_FAIL_ON_SUSPEND;
+			if (stale_bool == TRUE)
+				flags |= MD_MSGF_NO_LOG;
+			send_rval = mdmn_send_message(sp->setno,
+				MD_MN_MSG_META_DB_DETACH,
+				flags, (char *)&detach,
+				sizeof (md_mn_msg_meta_db_detach_t),
+				&resultp, ep);
+			if (send_rval != 0) {
+				rval = -1;
+				if (resultp == NULL)
+					(void) mddserror(ep,
+					    MDE_DS_COMMD_SEND_FAIL,
+					    sp->setno, NULL, NULL,
+					    sp->setname);
+				else {
+					(void) mdstealerror(ep,
+					    &(resultp->mmr_ep));
+					if (mdisok(ep)) {
+						(void) mddserror(ep,
+						    MDE_DS_COMMD_SEND_FAIL,
+						    sp->setno, NULL, NULL,
+						    sp->setname);
+					}
+					free_result(resultp);
+				}
+				goto out;
+			}
+			if (resultp)
+				free_result(resultp);
+		} else {
+			i = 0;
+			while (i < c.c_dbcnt) {
+				char	*devname;
+
+				c.c_id = i;
+
+				/* Don't need devid info from this ioctl */
+				c.c_locator.l_devid = (uint64_t)0;
+				c.c_locator.l_devid_flags = 0;
+
+				if (metaioctl(MD_DB_GETDEV, &c,
+				    &c.c_mde, NULL)) {
+					rval = mdstealerror(ep, &c.c_mde);
+					goto out;
+				}
+
+				devname = splicename(&c.c_devname);
+				if (strcmp(devname, np->bname) != 0) {
+					Free(devname);
+					i++;
+					continue;
+				}
+				Free(devname);
+
+				/* Don't need devid info from this ioctl */
+				c.c_locator.l_devid = (uint64_t)0;
+				c.c_locator.l_devid_flags = 0;
+
+				if (metaioctl(MD_DB_DELDEV, &c,
+				    &c.c_mde, NULL) != 0) {
+					rval = mdstealerror(ep, &c.c_mde);
+					goto out;
+				}
+
+				/* Not incrementing "i" intentionally */
+			}
+		}
+		if (! metaislocalset(sp)) {
+			/* update the dbcnt and size in dd */
+			for (p = dd; p != NULL; p = p->dd_next) {
+				if (p->dd_dnp == np->drivenamep) {
+					p->dd_dbcnt = 0;
+					p->dd_dbsize  = 0;
+					break;
+				}
+			}
+
+			/*
+			 * Slam a dummy master block and make it self
+			 * identifying
+			 */
+			if ((fd = open(np->rname, O_RDWR)) >= 0) {
+				meta_mkdummymaster(sp, fd, 16);
+				(void) close(fd);
+			}
+		}
+	}
+out:
+	if (metaislocalset(sp)) {
+		/*
+		 * Stop all the daemons if there are
+		 * no more replicas so that the module can be
+		 * unloaded.
+		 */
+		if (rval == 0 && stop_svmdaemons == 1) {
+			char buf[MAXPATHLEN];
+			int i;
+
+			for (i = 0; i < DAEMON_COUNT; i++) {
+				(void) snprintf(buf, MAXPATHLEN,
+					"/usr/bin/pkill -%s -x %s",
+					svmd_kill_list[i].svmd_kill_val,
+					svmd_kill_list[i].svmd_name);
+				if (pclose(popen(buf, "w")) == -1)
+					md_perror(buf);
+			}
+
+			if (meta_smf_disable(META_SMF_ALL, &status) == -1) {
+				mde_perror(&status, "");
+				mdclrerror(&status);
+			}
+		}
+		if (buildconf(sp, &status)) {
+			/* Don't mask any previous errors */
+			if (rval == 0)
+				rval = mdstealerror(ep, &status);
+			else
+				mdclrerror(&status);
+			return (rval);
+		}
+
+		if (meta_db_patch(sysfilename, NULL, 0, &status)) {
+			/* Don't mask any previous errors */
+			if (rval == 0)
+				rval = mdstealerror(ep, &status);
+			else
+				mdclrerror(&status);
+		}
+	} else {
+		if (update_dbinfo_on_drives(sp, dd,
+		    (force_option & MDFORCE_SET_LOCKED),
+		    ((force_option & MDFORCE_LOCAL) |
+		    (force_option & MDFORCE_DS)), &status)) {
+			/* Don't mask any previous errors */
+			if (rval == 0)
+				rval = mdstealerror(ep, &status);
+			else
+				mdclrerror(&status);
+		}
+		metafreedrivedesc(&dd);
+	}
+	if ((metaislocalset(sp)) || (!(MD_MNSET_DESC(sd)))) {
+		for (nlp = db_nlp; (nlp != NULL); nlp = nlp->next) {
+			meta_invalidate_name(nlp->namep);
+		}
+	}
+	return (rval);
+}
+
+static md_replica_t *
+metareplicaname(
+	mdsetname_t		*sp,
+	int			flags,
+	struct mddb_config	*c,
+	md_error_t		*ep
+)
+{
+	md_replica_t	*rp;
+	char		*devname;
+	size_t		sz;
+
+	/* allocate replicaname */
+	rp = Zalloc(sizeof (*rp));
+
+	/* get device name */
+	devname = splicename(&c->c_devname);
+	if (flags & PRINT_FAST) {
+		if ((rp->r_namep = metaname_fast(&sp, devname, ep)) == NULL) {
+			Free(devname);
+			Free(rp);
+			return (NULL);
+		}
+	} else {
+		if ((rp->r_namep = metaname(&sp, devname, ep)) == NULL) {
+			Free(devname);
+			Free(rp);
+			return (NULL);
+		}
+	}
+	Free(devname);
+
+	/* make sure it's OK */
+	if ((! (flags & MD_BASICNAME_OK)) &&
+	    (metachkcomp(rp->r_namep, ep) != 0)) {
+		Free(rp);
+		return (NULL);
+	}
+
+	rp->r_blkno = MD_DISKADDR_ERROR;
+	rp->r_nblk = MD_DISKADDR_ERROR;
+	rp->r_flags = c->c_locator.l_flags | MDDB_F_NODEVID;
+	if (c->c_locator.l_devid_flags & MDDB_DEVID_VALID) {
+		sz = devid_sizeof((ddi_devid_t)(c->c_locator.l_devid));
+		if ((rp->r_devid = (ddi_devid_t)malloc(sz)) ==
+		    (ddi_devid_t)NULL) {
+			Free(rp);
+			return (NULL);
+		}
+		(void) memcpy((void *)rp->r_devid,
+		    (void *)c->c_locator.l_devid, sz);
+		(void) strcpy(rp->r_minor_name, c->c_locator.l_minor_name);
+		rp->r_flags &= ~MDDB_F_NODEVID;
+		/* Overwrite dev derived from name with dev from devid */
+		rp->r_namep->dev = meta_expldev(c->c_locator.l_dev);
+	}
+	(void) strcpy(rp->r_driver_name, c->c_locator.l_driver);
+
+	rp->r_blkno = c->c_locator.l_blkno;
+	if (c->c_dbend != 0)
+		rp->r_nblk = c->c_dbend - c->c_locator.l_blkno + 1;
+
+	/* return replica */
+	return (rp);
+}
+
+/*
+ * free replica list
+ */
+void
+metafreereplicalist(
+	md_replicalist_t	*rlp
+)
+{
+	md_replicalist_t	*rl = NULL;
+
+	for (/* void */; (rlp != NULL); rlp = rl) {
+		rl = rlp->rl_next;
+		if (rlp->rl_repp->r_devid != (ddi_devid_t)0) {
+			free(rlp->rl_repp->r_devid);
+		}
+		Free(rlp->rl_repp);
+		Free(rlp);
+	}
+}
+
+/*
+ * return list of all replicas in set
+ */
+int
+metareplicalist(
+	mdsetname_t		*sp,
+	int			flags,
+	md_replicalist_t	**rlpp,
+	md_error_t		*ep
+)
+{
+	md_replicalist_t	**tail = rlpp;
+	int			count = 0;
+	struct mddb_config	c;
+	int			i;
+	char			*devid;
+
+	/* for each replica */
+	i = 0;
+	do {
+		md_replica_t	*rp;
+
+		/* get next replica */
+		(void) memset(&c, 0, sizeof (c));
+		c.c_id = i;
+		c.c_setno = sp->setno;
+
+		c.c_locator.l_devid_flags = MDDB_DEVID_GETSZ;
+		if (metaioctl(MD_DB_ENDDEV, &c, &c.c_mde, NULL) != 0) {
+			if (mdismddberror(&c.c_mde, MDE_DB_INVALID)) {
+				mdclrerror(&c.c_mde);
+				break;	/* handle none at all */
+			}
+			(void) mdstealerror(ep, &c.c_mde);
+			goto out;
+		}
+
+		if (c.c_locator.l_devid_flags & MDDB_DEVID_SZ) {
+			if ((devid = malloc(c.c_locator.l_devid_sz)) == NULL) {
+				(void) mdsyserror(ep, ENOMEM, META_DBCONF);
+				goto out;
+			}
+			c.c_locator.l_devid = (uintptr_t)devid;
+			/*
+			 * Turn on space and sz flags since 'sz' amount of
+			 * space has been alloc'd.
+			 */
+			c.c_locator.l_devid_flags =
+				MDDB_DEVID_SPACE | MDDB_DEVID_SZ;
+		}
+
+		if (metaioctl(MD_DB_ENDDEV, &c, &c.c_mde, NULL) != 0) {
+			if (mdismddberror(&c.c_mde, MDE_DB_INVALID)) {
+				mdclrerror(&c.c_mde);
+				break;	/* handle none at all */
+			}
+			(void) mdstealerror(ep, &c.c_mde);
+			goto out;
+		}
+
+		/*
+		 * Paranoid check - shouldn't happen, but is left as
+		 * a place holder for changes that will be needed after
+		 * dynamic reconfiguration changes are added to SVM (to
+		 * support movement of disks at any point in time).
+		 */
+		if (c.c_locator.l_devid_flags & MDDB_DEVID_NOSPACE) {
+			(void) fprintf(stderr,
+			    dgettext(TEXT_DOMAIN,
+				"Error: Relocation Information "
+				"(drvnm=%s, mnum=0x%lx) \n"
+				"relocation information size changed - \n"
+				"rerun command\n"),
+			    c.c_locator.l_driver, c.c_locator.l_mnum);
+			(void) mderror(ep, MDE_DEVID_TOOBIG, NULL);
+			goto out;
+		}
+
+		if (c.c_dbcnt == 0)
+			break;		/* handle none at all */
+
+		/* get info */
+		if ((rp = metareplicaname(sp, flags, &c, ep)) == NULL)
+			goto out;
+
+		/* append to list */
+		*tail = Zalloc(sizeof (**tail));
+		(*tail)->rl_repp = rp;
+		tail = &(*tail)->rl_next;
+		++count;
+
+		if (c.c_locator.l_devid_flags & MDDB_DEVID_SPACE) {
+			free(devid);
+			c.c_locator.l_devid_flags = 0;
+		}
+
+	} while (++i < c.c_dbcnt);
+
+	if (c.c_locator.l_devid_flags & MDDB_DEVID_SPACE) {
+		free(devid);
+	}
+
+	/* return count */
+	return (count);
+
+	/* cleanup, return error */
+out:
+	if (c.c_locator.l_devid_flags & MDDB_DEVID_SPACE) {
+		free(devid);
+	}
+	metafreereplicalist(*rlpp);
+	*rlpp = NULL;
+	return (-1);
+}
+
+/*
+ * meta_sync_db_locations - get list of replicas from kernel and write
+ * 	out to mddb.cf and md.conf.  'Syncs up' the replica list in
+ * 	the kernel with the replica list in the conf files.
+ *
+ */
+void
+meta_sync_db_locations(
+	mdsetname_t	*sp,
+	md_error_t	*ep
+)
+{
+	char		*sname = 0;		/* system file name */
+	char 		*cname = 0;		/* config file name */
+
+	if (!metaislocalset(sp))
+		return;
+
+	/* Updates backup of configuration file (aka mddb.cf) */
+	if (buildconf(sp, ep) != 0)
+		return;
+
+	/* Updates system configuration file (aka md.conf) */
+	(void) meta_db_patch(sname, cname, 0, ep);
+}
+
+/*
+ * setup_db_locations - parse the mddb.cf file and
+ *			tells the driver which db locations to use.
+ */
+int
+meta_setup_db_locations(
+	md_error_t	*ep
+)
+{
+	mddb_config_t	c;
+	FILE		*fp;
+	char		inbuff[1024];
+	char		*buff;
+	uint_t		i;
+	size_t		sz;
+	int		rval = 0;
+	char		*devidp;
+	uint_t		devid_size;
+	char		*minor_name = NULL;
+	ddi_devid_t	devid_decode;
+	int		checksum;
+
+	/* do mddb.cf file */
+	(void) memset(&c, '\0', sizeof (c));
+	if ((fp = fopen(META_DBCONF, "r")) == NULL) {
+		if (errno != ENOENT)
+			return (mdsyserror(ep, errno, META_DBCONF));
+	}
+	while ((fp != NULL) && ((buff = fgets(inbuff, (sizeof (inbuff) - 1),
+	    fp)) != NULL)) {
+
+		/* ignore comments */
+		if (*buff == '#')
+			continue;
+
+		/* parse locator */
+		(void) memset(&c, 0, sizeof (c));
+		c.c_setno = MD_LOCAL_SET;
+		i = strcspn(buff, " \t");
+		if (i > sizeof (c.c_locator.l_driver))
+			i = sizeof (c.c_locator.l_driver);
+		(void) strncpy(c.c_locator.l_driver, buff, i);
+		buff += i;
+		c.c_locator.l_dev =
+		    makedev((major_t)0, (minor_t)strtol(buff, &buff, 10));
+		c.c_locator.l_blkno = (daddr_t)strtol(buff, &buff, 10);
+		c.c_locator.l_mnum = minor(c.c_locator.l_dev);
+
+		/* parse out devid */
+		while (isspace((int)(*buff)))
+			buff += 1;
+		i = strcspn(buff, " \t");
+		if ((devidp = (char *)malloc(i+1)) == NULL)
+			return (mdsyserror(ep, ENOMEM, META_DBCONF));
+
+		(void) strncpy(devidp, buff, i);
+		devidp[i] = '\0';
+		if (devid_str_decode(devidp, &devid_decode,
+		    &minor_name) == -1) {
+			free(devidp);
+			continue;
+		}
+
+		/* Conf file must have minor name associated with devid */
+		if (minor_name == NULL) {
+			free(devidp);
+			devid_free(devid_decode);
+			continue;
+		}
+
+		sz = devid_sizeof(devid_decode);
+		/* Copy to devid size buffer that ioctl expects */
+		if ((c.c_locator.l_devid = (uintptr_t)malloc(sz)) == NULL) {
+			devid_free(devid_decode);
+			free(minor_name);
+			free(devidp);
+			return (mdsyserror(ep, ENOMEM, META_DBCONF));
+		}
+
+		(void) memcpy((void *)c.c_locator.l_devid,
+		    (void *)devid_decode, sz);
+
+		devid_free(devid_decode);
+
+		if (strlen(minor_name) > MDDB_MINOR_NAME_MAX) {
+			free(minor_name);
+			free(devidp);
+			free((void *)c.c_locator.l_devid);
+			return (mdsyserror(ep, ENOMEM, META_DBCONF));
+		}
+		(void) strcpy(c.c_locator.l_minor_name, minor_name);
+		free(minor_name);
+		c.c_locator.l_devid_flags = MDDB_DEVID_VALID |
+			MDDB_DEVID_SPACE | MDDB_DEVID_SZ;
+		c.c_locator.l_devid_sz = sz;
+
+		devid_size = strlen(devidp);
+		buff += devid_size;
+
+		checksum = strtol(buff, &buff, 10);
+		for (i = 0; c.c_locator.l_driver[i] != 0; i++)
+			checksum += c.c_locator.l_driver[i];
+		for (i = 0; i < devid_size; i++) {
+			checksum += devidp[i];
+		}
+		free(devidp);
+
+		checksum += minor(c.c_locator.l_dev);
+		checksum += c.c_locator.l_blkno;
+		if (checksum != 42) {
+			/* overwritten later for more serious problems */
+			rval = mderror(ep, MDE_MDDB_CKSUM, META_DBCONF);
+			free((void *)c.c_locator.l_devid);
+			continue;
+		}
+		c.c_locator.l_flags = 0;
+
+		/* use db location */
+		if (metaioctl(MD_DB_USEDEV, &c, &c.c_mde, NULL) != 0) {
+			free((void *)c.c_locator.l_devid);
+			return (mdstealerror(ep, &c.c_mde));
+		}
+
+		/* free up devid if in use */
+		free((void *)c.c_locator.l_devid);
+		c.c_locator.l_devid = (uint64_t)0;
+		c.c_locator.l_devid_flags = 0;
+	}
+	if ((fp) && (fclose(fp) != 0))
+		return (mdsyserror(ep, errno, META_DBCONF));
+
+	/* check for stale database */
+	(void) memset((char *)&c, 0, sizeof (struct mddb_config));
+	c.c_id = 0;
+	c.c_setno = MD_LOCAL_SET;
+
+	/* Don't need device id information from this ioctl */
+	c.c_locator.l_devid = (uint64_t)0;
+	c.c_locator.l_devid_flags = 0;
+
+	if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0) {
+		if (! mdismddberror(&c.c_mde, MDE_DB_INVALID))
+			return (mdstealerror(ep, &c.c_mde));
+		mdclrerror(&c.c_mde);
+	}
+
+	if (c.c_flags & MDDB_C_STALE)
+		return (mdmddberror(ep, MDE_DB_STALE, NODEV32, MD_LOCAL_SET,
+		    0, NULL));
+
+	/* success */
+	return (rval);
+}
+
+/*
+ * meta_db_minreplica - returns the minimum size replica currently in use.
+ */
+daddr_t
+meta_db_minreplica(
+	mdsetname_t	*sp,
+	md_error_t	*ep
+)
+{
+	md_replica_t		*r;
+	md_replicalist_t	*rl, *rlp = NULL;
+	daddr_t			nblks = 0;
+
+	if (metareplicalist(sp, (MD_BASICNAME_OK | PRINT_FAST), &rlp, ep) < 0)
+		return (-1);
+
+	if (rlp == NULL)
+		return (-1);
+
+	/* find the smallest existing replica */
+	for (rl = rlp; rl != NULL; rl = rl->rl_next) {
+		r = rl->rl_repp;
+		nblks = ((nblks == 0) ? r->r_nblk : min(r->r_nblk, nblks));
+	}
+
+	metafreereplicalist(rlp);
+	return (nblks);
+}
+
+/*
+ * meta_get_replica_names
+ *  returns an mdnamelist_t of replica slices
+ */
+/*ARGSUSED*/
+int
+meta_get_replica_names(
+	mdsetname_t	*sp,
+	mdnamelist_t	**nlpp,
+	int		options,
+	md_error_t	*ep
+)
+{
+	md_replicalist_t	*rlp = NULL;
+	md_replicalist_t	*rl;
+	mdnamelist_t		**tailpp = nlpp;
+	int			cnt = 0;
+
+	assert(nlpp != NULL);
+
+	if (!metaislocalset(sp))
+		goto out;
+
+	/* get replicas */
+	if (metareplicalist(sp, MD_BASICNAME_OK, &rlp, ep) < 0) {
+		cnt = -1;
+		goto out;
+	}
+
+	/* build name list */
+	for (rl = rlp; (rl != NULL); rl = rl->rl_next) {
+		/*
+		 * Add the name struct to the end of the
+		 * namelist but keep a pointer to the last
+		 * element so that we don't incur the overhead
+		 * of traversing the list each time
+		 */
+		tailpp = meta_namelist_append_wrapper(
+			tailpp, rl->rl_repp->r_namep);
+		++cnt;
+	}
+
+	/* cleanup, return count or error */
+out:
+	metafreereplicalist(rlp);
+	return (cnt);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_db_balance.c b/usr/src/lib/lvm/libmeta/common/meta_db_balance.c
new file mode 100644
index 0000000000..2becd5a5a4
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_db_balance.c
@@ -0,0 +1,1215 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+/*
+ * Database location balancing code.
+ */
+
+#include <meta.h>
+#include <sys/lvm/md_mddb.h>
+#include <sdssc.h>
+
+#define	MD_MINBALREP	2
+
+/*
+ * Stuff for DB balancing.
+ */
+enum md_ctlr_ops_t {
+	DRV_NOP = 0,
+	DRV_ADD = 1,
+	DRV_DEL = 2
+};
+typedef enum md_ctlr_ops_t md_ctlr_ops_t;
+
+/* drive flag fields */
+#define	DRV_F_ERROR	0x1
+#define	DRV_F_INDISKSET	0x2
+
+struct md_ctlr_drv_t {
+	md_ctlr_ops_t drv_op;
+	int drv_flags;
+	int drv_dbcnt;
+	int drv_new_dbcnt;
+	daddr_t drv_dbsize;
+	mddrivename_t *drv_dnp;
+	struct md_ctlr_drv_t *drv_next;
+};
+typedef struct md_ctlr_drv_t md_ctlr_drv_t;
+
+struct md_ctlr_ctl_t {
+	mdcinfo_t *ctl_cinfop;
+	int ctl_dbcnt;
+	int ctl_drcnt;
+	md_ctlr_drv_t *ctl_drvs;
+	struct md_ctlr_ctl_t *ctl_next;
+};
+typedef struct md_ctlr_ctl_t md_ctlr_ctl_t;
+
+static int
+add_replica(
+	mdsetname_t		*sp,
+	mddrivename_t		*dnp,
+	int			dbcnt,
+	daddr_t			dbsize,
+	md_error_t		*ep
+)
+{
+	mdnamelist_t		*nlp = NULL;
+	mdname_t		*np;
+	md_set_desc		*sd;
+	uint_t			rep_slice;
+
+	if (meta_replicaslice(dnp, &rep_slice, ep) != 0)
+		return (-1);
+
+	if ((np = metaslicename(dnp, rep_slice, ep)) == NULL)
+		return (-1);
+
+	(void) metanamelist_append(&nlp, np);
+
+	if ((sd = metaget_setdesc(sp, ep)) == NULL) {
+		metafreenamelist(nlp);
+		return (-1);
+	}
+
+	if (meta_db_attach(sp, nlp, (MDCHK_DRVINSET | MDCHK_SET_LOCKED),
+	    (&sd->sd_ctime), dbcnt, dbsize, NULL, ep) == -1) {
+		metafreenamelist(nlp);
+		return (-1);
+	}
+
+	metafreenamelist(nlp);
+	return (0);
+}
+
+static int
+del_replica(
+	mdsetname_t		*sp,
+	mddrivename_t		*dnp,
+	md_error_t		*ep
+)
+{
+	mdnamelist_t		*nlp = NULL;
+	mdname_t		*np;
+	uint_t			rep_slice;
+
+	if (meta_replicaslice(dnp, &rep_slice, ep) != 0)
+		return (-1);
+
+	if ((np = metaslicename(dnp, rep_slice, ep)) == NULL)
+		return (-1);
+
+	(void) metanamelist_append(&nlp, np);
+
+	if (meta_db_detach(sp, nlp, (MDFORCE_DS | MDFORCE_SET_LOCKED),
+	    NULL, ep) == -1) {
+		metafreenamelist(nlp);
+		return (-1);
+	}
+
+	metafreenamelist(nlp);
+	return (0);
+}
+
+static int
+rep_has_err(md_replicalist_t *rlp, mdname_t *np)
+{
+	md_replicalist_t	*rl;
+
+	for (rl = rlp; rl != NULL; rl = rl->rl_next) {
+		md_replica_t	*r = rl->rl_repp;
+
+		if (strcmp(r->r_namep->cname, np->cname) != 0)
+			continue;
+
+		if (r->r_flags & (MDDB_F_EREAD | MDDB_F_EFMT | MDDB_F_EDATA |
+		    MDDB_F_EMASTER | MDDB_F_EWRITE))
+			return (1);
+
+	}
+	return (0);
+}
+
+static int
+add_drv_to_ctl_lst(
+	md_ctlr_ctl_t		**clpp,
+	md_replicalist_t	*rlp,
+	mddrivename_t		*dnp,
+	int			dbcnt,
+	daddr_t			dbsize,
+	mdcinfo_t		*cinfop,
+	int			indiskset,
+	int			with_bus,
+	int			errored,
+	md_error_t		*ep
+)
+{
+	md_ctlr_drv_t		**dpp;
+	mdname_t		*np;
+	mdcinfo_t		*tcinfop;
+	char			*cmp_name_1,
+				*cmp_name_2;
+	int			not_found;
+
+	/*
+	 * The user must pass in a list head.
+	 */
+	assert(clpp != NULL);
+
+	if (cinfop == NULL) {
+		uint_t	rep_slice;
+
+		if (meta_replicaslice(dnp, &rep_slice, ep) != 0) {
+			/*
+			 * A failure to get the slice information can occur
+			 * because the drive has failed, if this is the
+			 * case then there is nothing that can be done
+			 * with this drive, so do not include it in the
+			 * list of drives. Clear the error and return.
+			 */
+			mdclrerror(ep);
+			return (0);
+		}
+
+		if ((np = metaslicename(dnp, rep_slice, ep)) == NULL)
+			return (-1);
+
+		if ((tcinfop = metagetcinfo(np, ep)) == NULL)
+			return (-1);
+
+		if (metagetvtoc(np, FALSE, NULL, ep) == NULL)
+			errored = 1;
+
+		if (rep_has_err(rlp, np))
+			errored = 1;
+	} else
+		tcinfop = cinfop;
+
+	for (/* void */; *clpp != NULL; clpp = &(*clpp)->ctl_next) {
+		/*
+		 * Try to locate ctlr.
+		 */
+		(void) sdssc_convert_cluster_path(tcinfop->cname, &cmp_name_1);
+		(void) sdssc_convert_cluster_path((*clpp)->ctl_cinfop->cname,
+		    &cmp_name_2);
+
+		if (tcinfop->ctype != (*clpp)->ctl_cinfop->ctype ||
+		    tcinfop->cnum != (*clpp)->ctl_cinfop->cnum ||
+		    strncmp(cmp_name_1, cmp_name_2, 16) != 0 ||
+		    (with_bus && tcinfop->bus != (*clpp)->ctl_cinfop->bus)) {
+			not_found = 1;
+		} else
+			not_found = 0;
+
+
+		sdssc_convert_path_free(cmp_name_1);
+		sdssc_convert_path_free(cmp_name_2);
+
+		if (not_found)
+			continue;
+
+		/*
+		 * Found ctlr, try to locate the drive.
+		 */
+		for (dpp = &(*clpp)->ctl_drvs; *dpp != NULL;
+		    dpp = &(*dpp)->drv_next) {
+			(void) sdssc_convert_cluster_path(
+			    (*dpp)->drv_dnp->cname, &cmp_name_1);
+			(void) sdssc_convert_cluster_path(dnp->cname,
+			    &cmp_name_2);
+
+			not_found = strcmp(cmp_name_1, cmp_name_2);
+
+			sdssc_convert_path_free(cmp_name_1);
+			sdssc_convert_path_free(cmp_name_2);
+
+			if (not_found)
+			    continue;
+
+			/*
+			 * Found drive, must be deleting.
+			 */
+			(*dpp)->drv_op = DRV_DEL;
+			if (indiskset)
+				(*dpp)->drv_flags |= DRV_F_INDISKSET;
+			if (errored) {
+				mdclrerror(ep);
+				(*dpp)->drv_flags |= DRV_F_ERROR;
+			}
+			(*clpp)->ctl_dbcnt -= (*dpp)->drv_dbcnt;
+			(*clpp)->ctl_drcnt--;
+			return (0);
+		}
+		/*
+		 * The ctlr was found, but not the drive, so add
+		 * the drive
+		 */
+		(*dpp) = Zalloc(sizeof (**dpp));
+
+
+		if (indiskset) {
+			(*dpp)->drv_op = DRV_NOP;
+			(*dpp)->drv_flags |= DRV_F_INDISKSET;
+			if (errored) {
+				mdclrerror(ep);
+				(*dpp)->drv_flags |= DRV_F_ERROR;
+			}
+		} else {
+			(*dpp)->drv_op = DRV_ADD;
+			if (errored) {
+				(*dpp)->drv_flags |= DRV_F_ERROR;
+				return (-1);
+			}
+			assert(dbsize != 0);
+		}
+		(*dpp)->drv_dbcnt = dbcnt;
+		(*dpp)->drv_dbsize = dbsize;
+		(*dpp)->drv_dnp = dnp;
+		(*clpp)->ctl_dbcnt += dbcnt;
+		(*clpp)->ctl_drcnt++;
+		return (0);
+	}
+	/*
+	 * No ctlr was located, so add the ctlr, then recurse to add the
+	 * drive to the ctlr.
+	 */
+	(*clpp) = Zalloc(sizeof (**clpp));
+
+	(*clpp)->ctl_cinfop = tcinfop;
+
+	return (add_drv_to_ctl_lst(clpp, rlp, dnp, dbcnt, dbsize, tcinfop,
+	    indiskset, with_bus, errored, ep));
+}
+
+static int
+add_replica_to_ctl(
+	mdsetname_t		*sp,
+	md_ctlr_ctl_t		*c,
+	int			minimum_replicas,
+	md_error_t		*ep
+)
+{
+	md_ctlr_drv_t		*d;
+	int			maxdb = 0;
+
+	/*
+	 * If this ctrl has no "usable" drives, assert() or just return if
+	 * assert()'s are turned off.
+	 */
+	if (c->ctl_drcnt == 0) {
+		assert(0);
+		return (0);
+	}
+
+	/*
+	 * Determine the largest DB count on a drive.
+	 */
+	for (d = c->ctl_drvs; d != NULL; d = d->drv_next)
+		if (d->drv_dbcnt > maxdb && d->drv_op != DRV_DEL)
+			maxdb = d->drv_dbcnt;
+
+	/*
+	 * Make sure we start at a reasonable number
+	 */
+	if (maxdb == 0)
+		maxdb = 1;
+
+	/*
+	 * Add a replica to a drive on this ctrl.
+	 */
+	/*CONSTCOND*/
+	while (1) {
+		for (d = c->ctl_drvs; d != NULL; d = d->drv_next) {
+			/*
+			 * If this drive is being deleted, skip it.
+			 */
+			if (d->drv_op == DRV_DEL)
+				continue;
+
+			if (d->drv_flags & DRV_F_ERROR)
+				continue;
+			/*
+			 * Make sure that the replicas are distributed across
+			 * the drives.
+			 */
+			if (d->drv_dbcnt >= maxdb)
+				continue;
+			/*
+			 * See if the drive already has replicas,
+			 * if it does, then delete the exisiting
+			 * replica(s) and re-add n+1 replicas to the drive.
+			 */
+			/* ==== Vulnerability - no DB's start ==== */
+			if (d->drv_dbcnt > 0) {
+				if (del_replica(sp, d->drv_dnp, ep) == -1) {
+					d->drv_flags |= DRV_F_ERROR;
+					if (! (d->drv_flags & DRV_F_INDISKSET))
+						return (-1);
+					mdclrerror(ep);
+					continue;
+				}
+			}
+			if (add_replica(sp, d->drv_dnp, (d->drv_dbcnt + 1),
+			    d->drv_dbsize, ep) == -1) {
+				if (d->drv_dbcnt) {
+					c->ctl_dbcnt -= d->drv_dbcnt;
+					d->drv_dbcnt = 0;
+				}
+
+				if (mdismddberror(ep, MDE_TOOMANY_REPLICAS))
+					return (-1);
+
+				if (mdismddberror(ep, MDE_REPLICA_TOOSMALL))
+					return (-1);
+
+				d->drv_flags |= DRV_F_ERROR;
+				if (! (d->drv_flags & DRV_F_INDISKSET))
+					return (-1);
+				mdclrerror(ep);
+				continue;
+			}
+
+			d->drv_dbcnt++;
+			c->ctl_dbcnt++;
+			/* ==== Vulnerability - no DB's end ==== */
+			return (1);
+		}
+		maxdb++;
+		if (maxdb > minimum_replicas)
+			return (0);
+	}
+	/*NOTREACHED*/
+}
+
+static int
+del_replica_from_ctl(
+	mdsetname_t		*sp,
+	md_ctlr_ctl_t		*c,
+	md_error_t		*ep
+)
+{
+	md_ctlr_drv_t		*d;
+	int			maxdb = 0;
+
+	/*
+	 * If this ctrl has no "usable" drives, assert() or just return if
+	 * assert()'s are turned off.
+	 */
+	if (c->ctl_drcnt == 0) {
+		assert(0);
+		return (0);
+	}
+
+	/*
+	 * Determine the largest DB count on a drive.
+	 */
+	for (d = c->ctl_drvs; d != NULL; d = d->drv_next)
+		if (d->drv_dbcnt > maxdb && d->drv_op != DRV_DEL)
+			maxdb = d->drv_dbcnt;
+
+	if (maxdb == 0)
+		return (0);
+
+	/*
+	 * Delete a replica from a drive on this ctrl.
+	 */
+	/*CONSTCOND*/
+	while (1) {
+		for (d = c->ctl_drvs; d != NULL; d = d->drv_next) {
+			/*
+			 * If this drive is being deleted, skip it.
+			 */
+			if (d->drv_op == DRV_DEL)
+				continue;
+
+			/*
+			 * Make sure that there are replicas on this drive to
+			 * delete.
+			 */
+			if (d->drv_dbcnt == 0)
+				continue;
+
+			if (d->drv_flags & DRV_F_ERROR)
+				continue;
+
+			/*
+			 * We need to keep the DB's distributed across the
+			 * drives.
+			 */
+			if (d->drv_dbcnt < maxdb)
+				continue;
+
+			/*
+			 * Delete all the replicas on the drive.
+			 */
+			/* ==== Vulnerability - no DB's start ==== */
+			if (del_replica(sp, d->drv_dnp, ep) == -1) {
+				d->drv_flags |= DRV_F_ERROR;
+				if (! (d->drv_flags & DRV_F_INDISKSET))
+					return (-1);
+				mdclrerror(ep);
+				continue;
+			}
+			d->drv_dbcnt--;
+			c->ctl_dbcnt--;
+			/*
+			 * If there is still a dbcnt for this drive, then add
+			 * back the needed DB's.
+			 */
+			if (d->drv_dbcnt > 0) {
+				if (add_replica(sp, d->drv_dnp, d->drv_dbcnt,
+				    d->drv_dbsize, ep) == -1) {
+					c->ctl_dbcnt -= d->drv_dbcnt;
+					d->drv_dbcnt = 0;
+
+					if (mdismddberror(ep,
+					    MDE_TOOMANY_REPLICAS))
+						return (-1);
+
+					d->drv_flags |= DRV_F_ERROR;
+					if (! (d->drv_flags & DRV_F_INDISKSET))
+						return (-1);
+					mdclrerror(ep);
+					continue;
+				}
+			}
+			/* ==== Vulnerability - no DB's end ==== */
+			return (1);
+		}
+		maxdb--;
+		if (maxdb <= 0)
+			return (0);
+	}
+	/*NOTREACHED*/
+}
+
+static int
+del_replicas(mdsetname_t *sp, md_ctlr_ctl_t *clp, md_error_t *ep)
+{
+	md_ctlr_ctl_t		*c;
+	md_ctlr_drv_t		*d;
+	mdnamelist_t		*nlp;
+	mdname_t		*np;
+
+	for (c = clp; c != NULL; c = c->ctl_next) {
+		for (d = c->ctl_drvs; d != NULL; d = d->drv_next) {
+			uint_t	rep_slice;
+
+			if (! (d->drv_flags & DRV_F_ERROR) &&
+			    (d->drv_op != DRV_DEL))
+				continue;
+
+			if (d->drv_dbcnt == 0)
+				continue;
+
+			if (meta_replicaslice(d->drv_dnp,
+			    &rep_slice, ep) != 0)
+				return (-1);
+
+			np = metaslicename(d->drv_dnp, rep_slice, ep);
+			if (np == NULL)
+				return (-1);
+
+			nlp = NULL;
+			(void) metanamelist_append(&nlp, np);
+
+			/*
+			 * Delete the replicas listed.
+			 */
+			if (meta_db_detach(sp, nlp,
+			    (MDFORCE_DS | MDFORCE_SET_LOCKED), NULL,
+			    ep) == -1) {
+				metafreenamelist(nlp);
+				if (d->drv_flags & DRV_F_INDISKSET) {
+					mdclrerror(ep);
+					continue;
+				}
+				return (-1);
+			}
+			metafreenamelist(nlp);
+		}
+	}
+
+	return (0);
+}
+
+static void
+free_ctlr_lst(md_ctlr_ctl_t **clpp)
+{
+	md_ctlr_ctl_t		*c, *tc = NULL;
+	md_ctlr_drv_t		*d, *td = NULL;
+
+	for (c = *clpp; c != NULL; c = tc) {
+		tc = c->ctl_next;
+		for (d = c->ctl_drvs; d != NULL; d = td) {
+			td = d->drv_next;
+			Free(d);
+		}
+		Free(c);
+	}
+	*clpp = NULL;
+}
+
+static int
+build_ctlr_lst(
+	mdsetname_t		*sp,
+	md_ctlr_ctl_t		**clpp,
+	md_drive_desc		*opdd,
+	md_drive_desc		*curdd,
+	int			with_bus,
+	daddr_t			dbsize,
+	md_error_t		*ep
+)
+{
+	md_drive_desc			*d;
+	md_set_desc			*sd;
+	daddr_t				nblks;
+	md_replicalist_t		*rlp = NULL;
+	static	daddr_t			min_dbsize = 0;
+
+	if (min_dbsize == 0) {
+		if ((nblks = meta_db_minreplica(sp, ep)) < 0) {
+			min_dbsize = MD_DBSIZE;
+
+			if (! metaislocalset(sp)) {
+				if ((sd = metaget_setdesc(sp, ep)) == NULL)
+					return (-1);
+
+				if (MD_MNSET_DESC(sd))
+					min_dbsize = MD_MN_DBSIZE;
+			}
+			mdclrerror(ep);
+		} else
+			min_dbsize = nblks;
+	}
+
+	if (metareplicalist(sp, MD_BASICNAME_OK, &rlp, ep) < 0) {
+		if (! mdismddberror(ep, MDE_DB_NODB) &&
+		    ! mdismddberror(ep, MDE_DB_NOTOWNER))
+			return (-1);
+		mdclrerror(ep);
+	}
+
+	/*
+	 * Add drives currently in the set to the ctlr list.
+	 */
+	for (d = curdd; d != NULL; d = d->dd_next) {
+		daddr_t	this_dbsize = d->dd_dbsize;
+
+		if (this_dbsize == 0)
+			this_dbsize = min_dbsize;
+
+		if (add_drv_to_ctl_lst(clpp, rlp, d->dd_dnp, d->dd_dbcnt,
+		    this_dbsize, NULL, TRUE, with_bus, 0, ep) == -1)
+			return (-1);
+	}
+
+	/*
+	 * Add the drives that are being operated on to the ctlr list.
+	 */
+	for (d = opdd; d != NULL; d = d->dd_next)
+		if (add_drv_to_ctl_lst(clpp, rlp, d->dd_dnp, 0, dbsize, NULL,
+		    FALSE, with_bus, 0, ep) == -1)
+			return (-1);
+
+	metafreereplicalist(rlp);
+	return (0);
+}
+
+static int
+count_replica_on_ctl(
+	md_ctlr_ctl_t		*c,
+	int			adding,
+	int			*db_cnt,
+	int			minimum_replicas
+)
+{
+	md_ctlr_drv_t		*d;
+	int			maxdb = 0;
+
+	/*
+	 * If this ctrl has no "usable" drives, nothing to do.
+	 */
+	if (c->ctl_drcnt == 0)
+		return (0);
+
+	/*
+	 * Determine the largest DB count on a drive.
+	 */
+	for (d = c->ctl_drvs; d != NULL; d = d->drv_next)
+		if (d->drv_new_dbcnt > maxdb && d->drv_op != DRV_DEL)
+			maxdb = d->drv_new_dbcnt;
+
+	/*
+	 * Make sure we start at a reasonable number
+	 */
+	if (maxdb == 0) {
+		if (!adding)
+			return (0);
+		maxdb = 1;
+	}
+
+	/*
+	 * Count or Un-Count replicas that would be
+	 * added or deleted respectively.
+	 */
+	/*CONSTCOND*/
+	while (1) {
+		for (d = c->ctl_drvs; d != NULL; d = d->drv_next) {
+			/*
+			 * If this drive is being deleted, skip it.
+			 */
+			if (d->drv_op == DRV_DEL)
+				continue;
+
+			/*
+			 * If the drive is errored and adding, skip it.
+			 */
+			if (adding && (d->drv_flags & DRV_F_ERROR))
+				continue;
+
+			/*
+			 * Make sure that the replicas are distributed across
+			 * the drives.
+			 */
+			if (adding) {
+				if (d->drv_new_dbcnt >= maxdb)
+					continue;
+			} else {
+				if (d->drv_new_dbcnt == 0)
+					continue;
+				if (d->drv_new_dbcnt < maxdb)
+					continue;
+			}
+
+			/*
+			 * Count or Un-Count replicas here.
+			 */
+			if (adding) {
+				mdpart_t	*partp;
+				uint_t		rep_slice;
+				md_error_t	mde;
+
+				if (meta_replicaslice(d->drv_dnp,
+				    &rep_slice, &mde) != 0)
+					continue;
+
+				partp = &d->drv_dnp->vtoc.parts[rep_slice];
+				if (! partp)
+					continue;
+
+				if (((d->drv_new_dbcnt + 1) * d->drv_dbsize) >
+				    (partp->size - 16))
+					continue;
+				(*db_cnt)++;
+				d->drv_new_dbcnt++;
+			} else {
+				(*db_cnt)--;
+				d->drv_new_dbcnt--;
+			}
+			return (0);
+		}
+
+		/*
+		 * This should make sure they get spread
+		 * around.  This is to emulate the {add,del}_replica
+		 * routines.
+		 */
+		if (adding) {
+			maxdb++;
+			if (maxdb > minimum_replicas)
+				return (-1);
+		} else {
+			maxdb--;
+			if (maxdb <= 0)
+				return (-1);
+		}
+	}
+	/*NOTREACHED*/
+}
+
+static int
+count_replicas(
+	md_ctlr_ctl_t		*clp,
+	int			min_reps
+)
+{
+	md_ctlr_ctl_t		*c;
+	md_ctlr_drv_t		*d;
+	int			db_cnt;
+	int			uctlrs = 0;
+	int			total_cnt = 0;
+
+	/*
+	 * Count the number of controllers,
+	 * counting the replicas is slightly different based
+	 * on the controller count.
+	 */
+	for (c = clp; c != NULL; c = c->ctl_next)
+		if (c->ctl_drcnt > 0) {
+			uctlrs++;
+			for (d = c->ctl_drvs; d != NULL; d = d->drv_next)
+				d->drv_new_dbcnt = d->drv_dbcnt;
+		}
+
+	if (uctlrs > 2) {
+		for (c = clp; c != NULL; c = c->ctl_next) {
+			if (c->ctl_drcnt == 0)
+				continue;
+
+			db_cnt = c->ctl_dbcnt;
+			/*
+			 * Count the replicas that would be added.
+			 */
+			while (db_cnt < min_reps)
+				if (count_replica_on_ctl(c, TRUE,
+				    &db_cnt, min_reps))
+					return (-1);
+
+			/*
+			 * Un-Count the replicas that would be deleted.
+			 */
+			while (db_cnt > min_reps)
+				if (count_replica_on_ctl(c, FALSE,
+				    &db_cnt, min_reps))
+					return (-1);
+			total_cnt += db_cnt;
+		}
+	} else {
+		for (c = clp; c != NULL; c = c->ctl_next) {
+			if (c->ctl_drcnt == 0)
+				continue;
+
+			db_cnt = c->ctl_dbcnt;
+			/*
+			 * Count the replicas that woud be added.
+			 */
+			while (db_cnt < (min_reps * c->ctl_drcnt))
+				if (count_replica_on_ctl(c, TRUE,
+				    &db_cnt, min_reps))
+					return (-1);
+
+			total_cnt += db_cnt;
+		}
+	}
+
+	return (total_cnt);
+}
+
+static int
+balance_replicas(
+	mdsetname_t		*sp,
+	md_ctlr_ctl_t		**clpp,
+	md_drive_desc		*opdd,
+	md_drive_desc		*curdd,
+	daddr_t			dbsize,
+	int			*minimum_replicas,
+	md_error_t		*ep
+)
+{
+	int			n;
+	int			rctlrs = 0;
+	int			uctlrs;
+	int			ructlrs;
+	int			octlrs;
+	int			save_done;
+	int			prevcnt = 0, issame = 1;
+	uint_t			drvcnt = ~0U;
+	uint_t			save_cnum;
+	mhd_ctlrtype_t		save_ctype;
+	char			save_cname[16],
+				*cmp_name_1,
+				*cmp_name_2;
+	int			reps;
+	md_ctlr_ctl_t		*c;
+
+	/*
+	 * Build a ctlr list with SSA-100 busses NOT as separate controllers.
+	 */
+	if (build_ctlr_lst(sp, clpp, opdd, curdd, FALSE, dbsize, ep) == -1)
+		return (-1);
+
+	/*
+	 * Determine what controllers are usable in the sense of being able to
+	 * add a replica to a drive on the controller.
+	 * Also find the minimum number of drives on a controller.
+	 */
+	for (c = *clpp; c != NULL; c = c->ctl_next) {
+		if (c->ctl_drcnt > 0) {
+			rctlrs++;
+			drvcnt = min(drvcnt, c->ctl_drcnt);
+			if (prevcnt == 0)
+				prevcnt = c->ctl_drcnt;
+			else if (prevcnt != c->ctl_drcnt)
+				issame = 0;
+		}
+	}
+
+	if ((rctlrs <= 2) || (issame && (drvcnt >= 30)))
+		goto cont;
+
+	/*
+	 * If here: Handling 3 or more controllers most
+	 *	    likely with non-symmetrical number of
+	 *	    disks. The number of replicas will be
+	 *	    the minimum number of disks on a controller.
+	 *
+	 *	    The main point is to insure that a
+	 *	    controller does not have more than half
+	 *	    of the replicas.
+	 */
+	drvcnt = min(drvcnt, 12);
+	drvcnt = max(drvcnt, MD_MINBALREP);
+
+	/*
+	 * Can we find fewer than the maximum replicas by reducing the
+	 * number of replicas per drive.
+	 */
+	for (n = drvcnt; n > 0; n--) {
+		reps = count_replicas(*clpp, n);
+		if (reps > 0 && reps <= MDDB_NLB) {
+			*minimum_replicas = n;
+			return (0);
+		}
+	}
+
+cont:
+	free_ctlr_lst(clpp);
+
+	/*
+	 * Build a ctlr list with SSA-100 busses as separate controllers.
+	 *
+	 * If Here: Try to put 2 replicas per controller/bus
+	 *	    If that doesn't work put 1 replica per controller/bus
+	 */
+	if (build_ctlr_lst(sp, clpp, opdd, curdd, TRUE, dbsize, ep) == -1)
+		return (-1);
+
+	/*
+	 * If the number of "real" controllers is 2, special handling may be
+	 * needed.
+	 */
+	if (rctlrs != 2) {
+		drvcnt = MD_MINBALREP;
+		goto other;
+	}
+
+	/*
+	 * Determine what controllers are usable in the sense of being able to
+	 * add a replica to a drive on the controller.
+	 * Also find the minimum number of drives on a controller.
+	 */
+	drvcnt = ~0U;
+	uctlrs = 0;
+	for (c = *clpp; c != NULL; c = c->ctl_next) {
+		if (c->ctl_drcnt > 0) {
+			uctlrs++;
+			drvcnt = min(drvcnt, c->ctl_drcnt);
+		}
+	}
+
+	/*
+	 * If the number of controllers is not changed, continue with original
+	 * strategy.
+	 */
+	if (uctlrs == rctlrs) {
+		drvcnt = MD_MINBALREP;
+		goto other;
+	}
+
+	/*
+	 * Check the distribution of bus ctlrs across real controllers.
+	 */
+	ructlrs = 0;
+	octlrs = 0;
+	save_done = 0;
+	for (c = *clpp; c != NULL; c = c->ctl_next) {
+		if (c->ctl_drcnt == 0)
+			continue;
+
+		if (! save_done) {
+			save_cnum = c->ctl_cinfop->cnum;
+			save_ctype = c->ctl_cinfop->ctype;
+			(void) strncpy(save_cname, c->ctl_cinfop->cname, 16);
+			save_done = 1;
+		}
+
+		(void) sdssc_convert_cluster_path(c->ctl_cinfop->cname,
+		    &cmp_name_1);
+		(void) sdssc_convert_cluster_path(save_cname, &cmp_name_2);
+
+		if (save_ctype != c->ctl_cinfop->ctype ||
+		    save_cnum != c->ctl_cinfop->cnum ||
+		    strncmp(cmp_name_1, cmp_name_2, 16) != 0)
+			octlrs++;
+		else
+			ructlrs++;
+
+		sdssc_convert_path_free(cmp_name_1);
+		sdssc_convert_path_free(cmp_name_2);
+	}
+
+	/*
+	 * Take the largest of the counts
+	 */
+	ructlrs = max(ructlrs, octlrs);
+
+	/*
+	 * If the distribution of bus controlers is half of the total, then
+	 * this layout strategy will work, doit.
+	 */
+	if ((uctlrs / 2) == ructlrs) {
+		drvcnt = MD_MINBALREP;
+		goto other;
+	}
+
+	/*
+	 * If here, there is a distribution of bus controllers that will cause
+	 * the real controller distribution to be unbalanced, so a different
+	 * strategy is used.
+	 */
+	free_ctlr_lst(clpp);
+
+	/*
+	 * Build the ctlr list with SSA-100 busses NOT as separate controllers.
+	 */
+	if (build_ctlr_lst(sp, clpp, opdd, curdd, FALSE, dbsize, ep) == -1)
+		return (-1);
+
+	/*
+	 * Make ctl_drcnt limit the number of replicas
+	 */
+	for (c = *clpp; c != NULL; c = c->ctl_next)
+		c->ctl_drcnt = min(drvcnt, c->ctl_drcnt);
+
+	/*
+	 * Try at least MD_MINBALREP's per controller after changing ctl_drcnt
+	 */
+	drvcnt = MD_MINBALREP;
+
+other:
+	/*
+	 * Can we find fewer than the maximum replicas by reducing the number
+	 * of replicas per drive.
+	 */
+	for (n = drvcnt; n > 0; n--) {
+		reps = count_replicas(*clpp, n);
+		if (reps > 0 && reps <= MDDB_NLB) {
+			*minimum_replicas = n;
+			return (0);
+		}
+	}
+
+	free_ctlr_lst(clpp);
+
+	/*
+	 * Build a ctlr list with SSA-100 busses NOT as separate controllers.
+	 *
+	 * If Here: Try to put 2 replicas per controller (not on busses)
+	 *	    If that doesn't work put 1 replica per controller
+	 */
+	if (build_ctlr_lst(sp, clpp, opdd, curdd, FALSE, dbsize, ep) == -1)
+		return (-1);
+
+	/*
+	 * Can we find fewer than the maximum replicas by reducing the
+	 * number of replicas per drive.
+	 */
+	for (n = MD_MINBALREP; n > 0; n--) {
+		reps = count_replicas(*clpp, n);
+		if (reps > 0 && reps <= MDDB_NLB) {
+			*minimum_replicas = n;
+			return (0);
+		}
+	}
+
+	/*
+	 * Return a ctrl list that does not include the SSA-100 buses as
+	 * separate controllers.  This will create fewer separate controllers.
+	 */
+	*minimum_replicas = 1;
+	return (0);
+}
+
+static int
+morethan2_ctl_balance(
+	mdsetname_t		*sp,
+	md_ctlr_ctl_t		*clp,
+	int			min_reps,
+	md_error_t		*ep
+)
+{
+	md_ctlr_ctl_t		*c;
+	int			err;
+
+	for (c = clp; c != NULL; c = c->ctl_next) {
+		if (c->ctl_drcnt == 0)
+			continue;
+
+		while (c->ctl_dbcnt < min_reps) {
+			if ((err = add_replica_to_ctl(sp, c, min_reps, ep)) < 0)
+				return (-1);
+			if (err == 0)
+				break;
+		}
+
+		while (c->ctl_dbcnt > min_reps) {
+			if ((err = del_replica_from_ctl(sp, c, ep)) < 0)
+				return (-1);
+			if (err == 0)
+				break;
+		}
+	}
+
+	return (0);
+}
+
+static int
+lessthan3_ctl_balance(
+	mdsetname_t		*sp,
+	md_ctlr_ctl_t		*clp,
+	int			min_reps,
+	md_error_t		*ep
+)
+{
+	md_ctlr_ctl_t		*c;
+	int			err;
+
+	for (c = clp; c != NULL; c = c->ctl_next) {
+		if (c->ctl_drcnt == 0)
+			continue;
+
+		while (c->ctl_dbcnt < (min_reps * c->ctl_drcnt)) {
+			if ((err = add_replica_to_ctl(sp, c, min_reps, ep)) < 0)
+				return (-1);
+			if (err == 0)
+				break;
+		}
+
+		while (c->ctl_dbcnt > (min_reps * c->ctl_drcnt)) {
+			if ((err = del_replica_from_ctl(sp, c, ep)) < 0)
+				return (-1);
+			if (err == 0)
+				break;
+		}
+	}
+
+	return (0);
+}
+
+static int
+try_again(
+	md_ctlr_ctl_t	*clp,
+	md_error_t	*ep
+)
+{
+	md_ctlr_ctl_t	*c;
+	md_ctlr_drv_t	*d;
+
+	if (mdismddberror(ep, MDE_TOOMANY_REPLICAS))
+		return (TRUE);
+
+	/*
+	 * retry if all the errored drives are already in the diskset.
+	 */
+	for (c = clp; c != NULL; c = c->ctl_next) {
+		for (d = c->ctl_drvs; d != NULL; d = d->drv_next) {
+			if ((d->drv_flags & (DRV_F_INDISKSET|DRV_F_ERROR))
+			    == DRV_F_ERROR)
+				return (FALSE);
+		}
+	}
+	return (TRUE);
+}
+
+int
+meta_db_balance(
+	mdsetname_t		*sp,
+	md_drive_desc		*opdd,
+	md_drive_desc		*curdd,
+	daddr_t			dbsize,
+	md_error_t		*ep
+)
+{
+	int			min_reps;
+	md_ctlr_ctl_t		*c, *cl = NULL;
+	int			uctlrs = 0;
+	int			retry = 0;
+	int			rval = 0;
+
+	if (balance_replicas(sp, &cl, opdd, curdd, dbsize, &min_reps, ep) == -1)
+		return (-1);
+
+	/*
+	 * Determine what controllers are usable in the sense of being able to
+	 * add a replica to a drive on the controller.
+	 */
+	for (c = cl; c != NULL; c = c->ctl_next)
+		if (c->ctl_drcnt > 0)
+			uctlrs++;
+
+	/*
+	 * Add replicas to achieve a balance.
+	 */
+	if (uctlrs > 2)
+		rval = morethan2_ctl_balance(sp, cl, min_reps, ep);
+	else
+		rval = lessthan3_ctl_balance(sp, cl, min_reps, ep);
+
+	if (rval) {
+		if ((retry = try_again(cl, ep)) == TRUE) {
+			mdclrerror(ep);
+			rval = 0;
+		}
+	}
+
+	/*
+	 * Delete all the replicas from drives that are so marked.
+	 */
+	if (! rval)
+		rval = del_replicas(sp, cl, ep);
+
+	if (retry) {
+		if (uctlrs > 2)
+			rval = morethan2_ctl_balance(sp, cl, min_reps, ep);
+		else
+			rval = lessthan3_ctl_balance(sp, cl, min_reps, ep);
+
+		if (rval && mdismddberror(ep, MDE_TOOMANY_REPLICAS)) {
+			mdclrerror(ep);
+			rval = 0;
+		}
+	}
+
+	/*
+	 * Free up the ctlr list.
+	 */
+	free_ctlr_lst(&cl);
+
+	return (rval);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_devadm.c b/usr/src/lib/lvm/libmeta/common/meta_devadm.c
new file mode 100644
index 0000000000..a30789a72e
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_devadm.c
@@ -0,0 +1,1607 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include	<stdio.h>
+#include	<stdarg.h>
+#include	<ctype.h>
+#include	<sys/fcntl.h>
+#include	<sys/types.h>
+#include	<devid.h>
+#include	<ftw.h>
+#include	<string.h>
+#include	<mdiox.h>
+#include	<sys/lvm/mdio.h>
+#include 	<meta.h>
+#include 	<syslog.h>
+#include	<sdssc.h>
+#include	"meta_set_prv.h"
+
+/*
+ * Just in case we're not in a build environment, make sure that
+ * TEXT_DOMAIN gets set to something.
+ */
+#if !defined(TEXT_DOMAIN)
+#define	TEXT_DOMAIN	"SYS_TEST"
+#endif
+
+#define	RAW_PATH		0x001	/* rdsk */
+#define	BLOCK_PATH		0x002	/* dsk */
+#define	DSK_TYPE		0x004	/* normal /dev/[r]dsk */
+#define	TEST_TYPE		0x008	/* test driver path */
+#define	DID_TYPE		0x010	/* cluster did path */
+#define	AP_TYPE			0x020	/* should be obsolete */
+
+typedef struct path_list {
+	char			*search_path;
+	char			*search_type;
+	int			path_type;
+} path_list_t;
+
+/*
+ * A table of the supported path types - this should ideally be generated
+ * by reading the /etc/lvm/devpath file
+ */
+static path_list_t plist[] = {
+	{"/dev/rdsk", DEVID_MINOR_NAME_ALL_CHR, RAW_PATH|DSK_TYPE},
+	{"/dev/dsk", DEVID_MINOR_NAME_ALL_BLK, BLOCK_PATH|DSK_TYPE},
+	{"/dev/did/rdsk", DEVID_MINOR_NAME_ALL_CHR, RAW_PATH|DID_TYPE},
+	{"/dev/did/dsk", DEVID_MINOR_NAME_ALL_BLK, BLOCK_PATH|DID_TYPE},
+	{"/dev/td/dsk", DEVID_MINOR_NAME_ALL_BLK, BLOCK_PATH|TEST_TYPE},
+	{"/dev/td/rdsk", DEVID_MINOR_NAME_ALL_CHR, RAW_PATH|TEST_TYPE},
+};
+static int num = sizeof (plist)/sizeof (path_list_t);
+
+static mddevopts_t	dev_options = 0;
+
+/* indicate whether to print an error message or not */
+static int	firsttime = 1;
+
+#define	DEV_MATCH	0x1
+#define	NAME_MATCH	0x2
+
+#define	DEBUGON		1
+#define	DEBUGOFF	2
+
+/*
+ * Debug function: to turn on devadm function debugging include DEVADM
+ * in the MD_DEBUG enviroment variable: MD_DEBUG=...,DEVADM...
+ */
+/*PRINTFLIKE1*/
+static void
+mda_debug(char *format, ...)
+{
+	char	*p;
+	static int debug_set = 0;
+	va_list ap;
+
+	if (debug_set == 0) {
+		if (((p = getenv("MD_DEBUG")) != NULL) &&
+		    (strstr(p, "DEVADM") != NULL))
+			debug_set = DEBUGON;
+		else
+			debug_set = DEBUGOFF;
+	}
+	if (debug_set == DEBUGON) {
+		va_start(ap, format);
+		(void) vfprintf(stderr, format, ap);
+		va_end(ap);
+	}
+}
+
+/* print error messages to the terminal or syslog */
+/*PRINTFLIKE1*/
+static void
+mda_print(char *message, ...)
+{
+	va_list	ap;
+
+	va_start(ap, message);
+	if (dev_options & DEV_LOG) {
+		/*
+		 * The program is a daemon in the sense that it
+		 * is a system utility.
+		 */
+		(void) vsyslog((LOG_ERR | LOG_DAEMON), message, ap);
+	} else {
+		(void) vfprintf(stderr, message, ap);
+	}
+	va_end(ap);
+}
+
+/*
+ * Utility to find the correct options to use for the devid search
+ * based upon the path of the device.
+ *
+ * RETURN:
+ *	-1 	Error, the path passed in is not in the table
+ *      >= 0    The element number for the options within the table
+ */
+static int
+mda_findpath(char *path)
+{
+	int	i = 0;
+
+	for (i = 0; i < num; i++) {
+		if (strncmp(plist[i].search_path, path,
+		    strlen(plist[i].search_path)) == 0)
+			return (i);
+	}
+	return (-1);
+}
+
+/*
+ * Utility to get the path of a device
+ */
+static char *
+mda_getpath(char *devname)
+{
+	char	*ptr;
+	char	*pathname;
+	size_t	len;
+
+	if ((ptr = strrchr(devname, '/')) == NULL) {
+		mda_debug("Invalid format: %s\n", devname);
+		return (NULL);
+	}
+	ptr++;
+	len = strlen(devname) - strlen(ptr);
+	pathname = Malloc(len + 1);
+	(void) strncpy(pathname, devname, len);
+	pathname[len] = '\0';
+	return (pathname);
+}
+
+/*
+ * update_locator_namespace -- Contains the ioctl call that will update
+ *		the ctds and pathname (ie. /dev/dsk etc) within the
+ *		locator block namespace.
+ *
+ * RETURN
+ *	METADEVADM_ERR		ioctl failed and ep is updated with the error
+ *	METADEVADM_SUCCESS	success
+ */
+static int
+update_locator_namespace(
+	set_t		setno,
+	side_t		sideno,
+	char		*devname,
+	md_dev64_t	dev,
+	char		*pname,
+	md_error_t	*ep
+)
+{
+	mdnm_params_t	nm;
+
+	(void) memset(&nm, '\0', sizeof (nm));
+	nm.mde = mdnullerror;
+	nm.setno = setno;
+	nm.side = sideno;
+	nm.devname = (uintptr_t)devname;
+	nm.devname_len = strlen(devname);
+	nm.devt = dev;
+	nm.pathname = (uintptr_t)pname;
+	nm.pathname_len = strlen(pname);
+	if (metaioctl(MD_IOCUPD_LOCNM, &nm, &nm.mde, NULL) != 0) {
+		(void) mdstealerror(ep, &nm.mde);
+		return (METADEVADM_ERR);
+	}
+	return (METADEVADM_SUCCESS);
+}
+
+/*
+ * update_namespace -- Contains the ioctl call that will update the
+ * 	device name and pathname in the namespace area.
+ *
+ * RETURN
+ *	METADEVADM_ERR		ioctl failed and ep is updated with the error
+ *	METADEVADM_SUCCESS	success
+ */
+static int
+update_namespace(
+	set_t		setno,
+	side_t		sideno,
+	char		*devname,
+	md_dev64_t	dev,
+	mdkey_t		key,
+	char		*pname,
+	md_error_t	*ep
+)
+{
+	mdnm_params_t	nm;
+
+	(void) memset(&nm, '\0', sizeof (nm));
+	nm.mde = mdnullerror;
+	nm.setno = setno;
+	nm.side = sideno;
+	nm.devname = (uintptr_t)devname;
+	nm.devname_len = strlen(devname);
+	nm.mnum = meta_getminor(dev);
+	nm.key = key;
+	nm.pathname = (uintptr_t)pname;
+	nm.pathname_len = strlen(pname);
+	if (metaioctl(MD_IOCUPD_NM, &nm, &nm.mde, NULL) != 0) {
+		(void) mdstealerror(ep, &nm.mde);
+		return (METADEVADM_ERR);
+	}
+	return (METADEVADM_SUCCESS);
+}
+
+/*
+ * stripS - Strip s<digits> off the end of the ctds name if it exists
+ */
+static void
+stripS(char *name)
+{
+	char	*p;
+
+	/* gobble number and 's' */
+	p = name + strlen(name) - 1;
+	for (; (p > name); --p) {
+		if (!isdigit(*p))
+			break;
+	}
+
+	if (*p == 's') {
+		*p = '\0';
+	}
+}
+
+/*
+ * getdiskname -- to be used when scanning the input from the -u arg.
+ * 	This routine will strip off input that is anything but cxtxdx.
+ *	ie. it will call stripS to get rid of slice info. Will also
+ *	strip off /dev/dsk, /dev/rdsk, /dev/ap/dsk, /dev/ap/rdsk,
+ *	/dev/did/dsk, or /dev/did/rdsk. The caller will need to free
+ *	the return value.
+ *
+ * RETURN
+ *	 string that has the disk name in it ie. c0t0d0
+ */
+static char *
+getdiskname(
+	char	*name
+)
+{
+	char	*p;
+	char	*diskname;
+
+	/* regular device */
+	if ((strncmp(name, "/dev/dsk/", strlen("/dev/dsk/")) == 0) &&
+	    (strchr((p = name + strlen("/dev/dsk/")), '/') == NULL)) {
+		diskname = Strdup(p);
+		stripS(diskname);
+		return (diskname);
+	}
+
+	if ((strncmp(name, "/dev/rdsk/", strlen("/dev/rdsk/")) == 0) &&
+	    (strchr((p = name + strlen("/dev/rdsk/")), '/') == NULL)) {
+		diskname = Strdup(p);
+		stripS(diskname);
+		return (diskname);
+	}
+
+	if ((strncmp(name, "/dev/ap/dsk/", strlen("/dev/ap/dsk/")) == 0) &&
+	    (strchr((p = name + strlen("/dev/ap/dsk/")), '/') == NULL)) {
+		diskname = Strdup(p);
+		stripS(diskname);
+		return (diskname);
+	}
+
+	if ((strncmp(name, "/dev/ap/rdsk/", strlen("/dev/ap/rdsk/")) == 0) &&
+	    (strchr((p = name + strlen("/dev/ap/rdsk/")), '/') == NULL)) {
+		diskname = Strdup(p);
+		stripS(diskname);
+		return (diskname);
+	}
+
+	if ((strncmp(name, "/dev/did/dsk/", strlen("/dev/did/dsk/")) == 0) &&
+	    (strchr((p = name + strlen("/dev/did/dsk/")), '/') == NULL)) {
+		diskname = Strdup(p);
+		stripS(diskname);
+		return (diskname);
+	}
+
+	if ((strncmp(name, "/dev/did/rdsk/", strlen("/dev/did/rdsk/")) == 0) &&
+	    (strchr((p = name + strlen("/dev/did/rdsk/")), '/') == NULL)) {
+		diskname = Strdup(p);
+		stripS(diskname);
+		return (diskname);
+	}
+
+	diskname = Strdup(name);
+	stripS(diskname);
+	return (diskname);
+}
+
+/*
+ * has_devid -- return the device ID for a given key
+ *
+ * RETURN
+ *	NULL	error
+ *	devid	devid found that corresponds to the given key.
+ */
+static ddi_devid_t
+has_devid(set_t setno, side_t sideno,  mdkey_t key, md_error_t *ep)
+{
+	return (meta_getdidbykey(setno, sideno, key, ep));
+}
+
+/*
+ * Go through the existing list of replicas and check to see
+ * if their disk has moved, if so update the replica list
+ *
+ * RETURN
+ *	-1	error
+ *	 0	success
+ */
+static int
+fix_replicanames(
+	mdsetname_t	*sp,
+	md_error_t	*ep
+)
+{
+	md_replicalist_t	*rlp = NULL;
+	md_replicalist_t	*rl;
+	int			ret = -1;
+	int			match_type = 0;
+	devid_nmlist_t		*disklist = NULL;
+	dev_t			small_dev = (dev_t)NODEV;
+	side_t			sideno;
+	set_t			setno = sp->setno;
+	char			*search_path;
+	int			search_number;
+	char			*ctds_name;
+	char			*path_name;
+	int			i;
+
+	sideno = getmyside(sp, ep);
+	if (sideno == MD_SIDEWILD) {
+		mda_debug("Failed to find the side number\n");
+		return (-1);
+	}
+
+	if (metareplicalist(sp, MD_BASICNAME_OK | PRINT_FAST, &rlp, ep) < 0) {
+		mda_debug("Unable to get a list of replicas\n");
+		return (METADEVADM_ERR);
+	}
+
+	for (rl = rlp; (rl != NULL); rl = rl->rl_next) {
+		md_replica_t	*r = rl->rl_repp;
+
+		small_dev = meta_cmpldev(r->r_namep->dev);
+		search_number = mda_findpath(r->r_namep->bname);
+		if (search_number == -1) {
+			mda_debug("replica update: invalid path: %s",
+			    r->r_namep->bname);
+			continue;
+		} else {
+			search_path = plist[search_number].search_path;
+		}
+
+		if (r->r_devid == NULL)
+			continue;
+
+		ret = meta_deviceid_to_nmlist(search_path, r->r_devid,
+		    r->r_minor_name, &disklist);
+
+		mda_debug("replica update: search_path %s\n", search_path);
+
+		if (ret != 0) {
+			/*
+			 * Failed to find the disk, nothing can be done.
+			 * The replica will be marked as bad later.
+			 */
+			mda_debug("replica update: failed to find disk %s\n",
+			    r->r_namep->cname);
+			continue;
+		}
+		mda_debug("replica update: current %s (%p)\n",
+			r->r_namep->bname, (void *) small_dev);
+
+		/*
+		 * Check to see if the returned disk matches the stored one
+		 */
+		for (i = 0; disklist[i].dev != NODEV; i++) {
+			match_type = 0;
+
+			mda_debug("replica update: devid list: %s (%p)\n",
+			    disklist[i].devname, (void *) disklist[i].dev);
+
+			if (disklist[i].dev == small_dev) {
+				match_type |= DEV_MATCH;
+			}
+
+			if (strncmp(r->r_namep->bname, disklist[i].devname,
+			    strlen(r->r_namep->bname)) == 0) {
+				match_type |= NAME_MATCH;
+			}
+
+			/*
+			 * break out if some sort of match is found because
+			 * we already match on the devid.
+			 */
+			if (match_type != 0)
+				break;
+		}
+
+		mda_debug("fix_replicanames: match: %x i: %d\n", match_type, i);
+
+		if (match_type == (DEV_MATCH|NAME_MATCH)) {
+			/* no change */
+			mda_debug("replica update: no change %s\n",
+			    disklist[i].devname);
+			devid_free_nmlist(disklist);
+			continue;
+		}
+
+		/* No match found - use the first entry in disklist */
+		if (disklist[i].dev == NODEV)
+			i = 0;
+
+		mda_debug("replica update: reloading %s %p\n",
+		    disklist[i].devname,
+		    (void *) meta_expldev(disklist[i].dev));
+
+		if (firsttime) {
+			mda_print(dgettext(TEXT_DOMAIN,
+			    "Disk movement detected\n"));
+			mda_print(dgettext(TEXT_DOMAIN,
+			    "Updating device names in Solaris Volume "
+			    "Manager\n"));
+			firsttime = 0;
+		}
+
+		if (dev_options & DEV_VERBOSE) {
+			char	*devidstr;
+
+			devidstr =
+			    devid_str_encode(r->r_devid, r->r_minor_name);
+			if (devidstr == NULL) {
+				mda_print(dgettext(TEXT_DOMAIN,
+				    "Failed to encode the devid\n"));
+				continue;
+			}
+			mda_print(dgettext(TEXT_DOMAIN,
+			    "%s changed to %s from device relocation "
+			    "information %s\n"),
+			    (char *)r->r_namep->cname, disklist[i].devname,
+			    devidstr);
+		}
+
+		if (!(dev_options & DEV_NOACTION)) {
+			mda_debug("Updating locator name\n");
+			ctds_name = strrchr(disklist[i].devname, '/');
+			ctds_name++;
+			if ((path_name = mda_getpath(disklist[i].devname))
+			    == NULL) {
+				continue;
+			}
+			if (update_locator_namespace(setno, sideno,
+			    ctds_name, meta_expldev(disklist[i].dev),
+			    path_name, ep) != 0) {
+				mda_debug("replica update: ioctl failed\n");
+				if (dev_options & DEV_VERBOSE) {
+					mda_print(dgettext(TEXT_DOMAIN,
+					    "Failed to update locator "
+					    "namespace on change from %s "
+					    "to %s\n"), ctds_name,
+					    disklist[i].devname);
+				}
+			}
+		}
+		Free(path_name);
+		devid_free_nmlist(disklist);
+	}
+	metafreereplicalist(rlp);
+	return (0);
+}
+
+/*
+ * pathname_reload - main function for the -r option. Will reload the
+ *	pathname in both the main namespace and the locator namespace.
+ *	Also, checks both areas for invalid device ID's and prints them
+ *	out.
+ *
+ *    If the set is a multi-node diskset that means there are no devid's
+ *    so just return.
+ *
+ * RETURN
+ *	METADEVADM_ERR		error
+ *	METADEVADM_SUCCESS 	success
+ *	METADEVADM_DEVIDINVALID	success, but invalid devids detected
+ */
+int
+pathname_reload(
+	mdsetname_t		**spp,
+	set_t			setno,
+	md_error_t		*ep)
+{
+	char			*drvnmp;
+	minor_t			mnum = 0;
+	md_dev64_t		dev = 0;
+	mdnm_params_t		nm;
+	char			*ctds_name;
+	ddi_devid_t		devidp;
+	md_i_didstat_t		ds;
+	side_t			sideno;
+	char			*search_path = NULL;
+	int			search_number;
+	devid_nmlist_t		*disklist = NULL;
+	char			*minor_name = NULL;
+	char			*devidstr = NULL;
+	char			*path = NULL;
+	int			ret;
+	dev_t			small_dev = (dev_t)NODEV;
+	int			match_type;
+	char			*tmp = NULL;
+	mdsetname_t		*sp = *spp;
+	md_set_desc		*sd;
+	int			i;
+
+	/*
+	 * Check for multi-node diskset and return if it is one.
+	 */
+	if (!metaislocalset(sp)) {
+		if ((sd = metaget_setdesc(sp, ep)) == NULL)
+			return (METADEVADM_ERR);
+
+		if (MD_MNSET_DESC(sd))
+			return (METADEVADM_SUCCESS);
+	}
+
+	/*
+	 * Get the entry of the namespace via the key. To do this
+	 * call MD_IOCNXTKEY until no more.
+	 * For each entry in the namespace we want to check
+	 * for devid and update
+	 */
+
+	(void) memset(&nm, '\0', sizeof (nm));
+	nm.key = MD_KEYWILD;
+
+	sideno = getmyside(*spp, ep);
+	if (sideno == MD_SIDEWILD) {
+		/* failed to find this node in the set */
+		mda_debug("Failed to find the side number\n");
+		return (METADEVADM_ERR);
+	}
+
+	/* LINTED */
+	while (1) {
+		nm.mde	= mdnullerror;
+		nm.setno = setno;
+		nm.side = sideno;
+		/* look at each key in the namespace */
+		if (metaioctl(MD_IOCNXTKEY_NM, &nm, &nm.mde, NULL) != 0) {
+			(void) mdstealerror(ep, &nm.mde);
+			return (METADEVADM_ERR);
+		}
+
+		if (nm.key == MD_KEYWILD) {
+			/* no more entries */
+			break;
+		}
+
+		/*
+		 * get the nm entry using the key. Then check to see if
+		 * there's a devid associated with this entry
+		 * If not, go onto next key.
+		 */
+		if ((nm.devname = (uintptr_t)meta_getnmentbykey(setno, sideno,
+		    nm.key, &drvnmp, &mnum, &dev, ep)) == NULL) {
+			mda_debug("pathname_reload: no name for key: %d\n",
+			    nm.key);
+			continue;
+		}
+
+		mda_debug("pathname_reload: examining %s\n",
+		    (char *)nm.devname);
+
+		if ((devidp = has_devid(setno, sideno, nm.key, ep)) == NULL) {
+			/* metadevices do not have devid's in them */
+			mda_debug("pathname_reload: no devid for %s\n",
+			    (char *)nm.devname);
+			continue;
+		}
+
+		if ((minor_name = meta_getdidminorbykey(setno, sideno,
+		    nm.key, ep)) == NULL) {
+			/*
+			 * In theory this is impossible because if the
+			 * devidp is non-null then the minor_name has
+			 * already been looked up.
+			 */
+			mda_debug("No minor name for %s\n", (char *)nm.devname);
+			free(devidp);
+			continue;
+		}
+		/*
+		 * If there is a devid then we have a real device that
+		 * could have moved.
+		 */
+		devidstr = devid_str_encode(devidp, minor_name);
+		if (devidstr == NULL) {
+			mda_debug("Failed to encode the devid\n");
+			free(devidp);
+			continue;
+		}
+		mda_debug("devid: %s\n", devidstr);
+
+		/*
+		 * Find the search path that should be used. This is an
+		 * optimization to try and prevent a search for the complete
+		 * /dev namespace.
+		 */
+		search_number = mda_findpath((char *)nm.devname);
+		if (search_number == -1) {
+			search_path = "/dev";
+		} else {
+			search_path = plist[search_number].search_path;
+		}
+
+		/* now look for the disk name using the devid */
+		ret = meta_deviceid_to_nmlist(search_path, devidp,
+		    minor_name, &disklist);
+		free(devidp);
+
+		if (ret != 0) {
+			/*
+			 * Failed to find the disk
+			 */
+			devid_str_free(devidstr);
+			continue;
+		}
+
+		small_dev = meta_cmpldev(dev);
+		mda_debug("Old device lookup: %s (%p)\n",
+				(char *)nm.devname, (void *)small_dev);
+
+		/*
+		 * Check to see if the returned disk matches the stored one
+		 */
+		for (i = 0; disklist[i].dev != NODEV; i++) {
+			match_type = 0;
+			mda_debug("From devid lookup: %s (%p)\n",
+				(char *)disklist[i].devname,
+				(void *)disklist[i].dev);
+
+			if (disklist[i].dev == small_dev) {
+				match_type |= DEV_MATCH;
+			}
+
+			if (strncmp((char *)nm.devname, disklist[i].devname,
+			    strlen((char *)nm.devname)) == 0) {
+				mda_debug("Name match: %s and %s (%d)\n",
+				    disklist[i].devname, (char *)nm.devname,
+				    strlen((char *)nm.devname));
+				match_type |= NAME_MATCH;
+			}
+
+			if (match_type == (DEV_MATCH|NAME_MATCH))
+				break;
+		}
+
+		if (match_type == (DEV_MATCH|NAME_MATCH)) {
+			/* no change */
+			devid_str_free(devidstr);
+			mda_debug("All matched %s\n", disklist[i].devname);
+			devid_free_nmlist(disklist);
+			continue;
+		}
+
+		/* No match found - use the first entry in disklist */
+		i = 0;
+
+		if (firsttime) {
+			mda_print(dgettext(TEXT_DOMAIN,
+			    "Disk movement detected\n"));
+			mda_print(dgettext(TEXT_DOMAIN,
+			    "Updating device names in "
+			    "Solaris Volume Manager\n"));
+			firsttime = 0;
+		}
+		if (dev_options & DEV_VERBOSE) {
+			mda_print(dgettext(TEXT_DOMAIN,
+			    "%s changed to %s from device relocation "
+			    "information %s\n"),
+			    (char *)nm.devname, disklist[i].devname,
+			    devidstr);
+		}
+		devid_str_free(devidstr);
+
+		/* need to build up the path of the disk */
+		if ((path = Strdup(disklist[i].devname)) == NULL) {
+			mda_debug("Failed to duplicate path: %s\n",
+			    disklist[i].devname);
+			devid_free_nmlist(disklist);
+			continue;
+		}
+		if ((tmp = strrchr(path, '/')) == NULL) {
+			mda_debug("Failed to parse %s\n", path);
+			devid_free_nmlist(disklist);
+			Free(path);
+			continue;
+		}
+		tmp += sizeof (char);
+		*tmp = '\0';
+
+		if ((ctds_name = strrchr(disklist[i].devname, '/')) == NULL) {
+			mda_debug("Failed to parse ctds name: %s\n",
+			    disklist[i].devname);
+			devid_free_nmlist(disklist);
+			Free(path);
+			continue;
+		}
+		ctds_name += sizeof (char);
+
+		mda_debug("Reloading disk %s %s %p\n",
+		    ctds_name, path, (void *) meta_expldev(disklist[i].dev));
+
+		if (!(dev_options & DEV_NOACTION)) {
+			/* Something has changed so update the namespace */
+			if (update_namespace(setno, sideno, ctds_name,
+			    meta_expldev(disklist[i].dev), nm.key, path,
+			    ep) != 0) {
+				mda_debug("Failed to update namespace\n");
+				if (dev_options & DEV_VERBOSE) {
+					mda_print(dgettext(TEXT_DOMAIN,
+					    "Failed to update namespace on "
+					    "change from %s to %s\n"),
+					    ctds_name, disklist[i].devname);
+				}
+			}
+		}
+		devid_free_nmlist(disklist);
+		Free(path);
+	}
+
+	if (fix_replicanames(*spp, ep) == -1)
+		mda_debug("Failed to update replicas\n");
+
+	/*
+	 * check for invalid device id's
+	 */
+	(void) memset(&ds, '\0', sizeof (ds));
+	ds.setno = setno;
+	ds.side = sideno;
+	ds.mode = MD_FIND_INVDID;
+	/* get count of number of invalid device id's */
+	if (metaioctl(MD_IOCDID_STAT, &ds, &ds.mde, NULL) != 0) {
+		(void) mdstealerror(ep, &ds.mde);
+		return (METADEVADM_ERR);
+	}
+	if (ds.cnt != 0) {
+		char	*ctdptr, *ctdp;
+		/*
+		 * we have some invalid device id's so we need to
+		 * print them out
+		 */
+		ds.mode = MD_GET_INVDID;
+		/* malloc buffer for kernel to place devid list into */
+		if ((ctdptr = (char *)Malloc((ds.cnt * ds.maxsz) + 1)) == 0) {
+			return (METADEVADM_ERR);
+		}
+		ds.ctdp = (uintptr_t)ctdptr;
+		/* get actual list of invalid device id's */
+		if (metaioctl(MD_IOCDID_STAT, &ds, &ds.mde, NULL) != 0) {
+			Free(ctdptr);
+			(void) mdstealerror(ep, &ds.mde);
+			return (METADEVADM_ERR);
+		}
+
+		/* print out the invalid devid's */
+		mda_print(dgettext(TEXT_DOMAIN,
+		    "Invalid device relocation information "
+		    "detected in Solaris Volume Manager\n"));
+		mda_print(dgettext(TEXT_DOMAIN,
+		    "Please check the status of the following disk(s):\n"));
+		ctdp = (char *)ds.ctdp;
+		while (*ctdp != NULL) {
+			mda_print("\t%s\n", ctdp);
+			ctdp += ds.maxsz;
+		}
+		Free(ctdptr);
+		return (METADEVADM_DEVIDINVALID);
+	}
+	return (METADEVADM_SUCCESS);
+}
+
+/*
+ * replica_update_devid - cycle through the replica list, rlp, and
+ *  update the device ids on all of the replicas that are on the
+ *  device specified by lp. A side effect is to update the value of
+ *  cdevidpp to contain the character representation of the device
+ *  id before updating if it is not already set.
+ *
+ * RETURN
+ *	METADEVADM_ERR		error
+ *	METADEVADM_SUCCESS	success
+ */
+static int
+replica_update_devid(
+	md_replicalist_t *rlp,
+	mddrivename_t	*dnp,
+	set_t		setno,
+	char		**cdevidpp,
+	md_error_t	*ep
+)
+{
+	mddb_config_t		db_c;
+	md_replicalist_t	*rl;
+	ddi_devid_t		devidp;
+	int			ret;
+
+	if (cdevidpp == NULL)
+		return (METADEVADM_ERR);
+
+	ret = devid_str_decode(dnp->devid, &devidp, NULL);
+	if (ret != 0) {
+		/* failed to encode the devid */
+		mda_debug("Failed to decode %s into a valid devid\n",
+		    dnp->devid);
+		return (METADEVADM_ERR);
+	}
+
+	/* search replica list for give ctd name */
+	for (rl = rlp; (rl != NULL); rl = rl->rl_next) {
+		md_replica_t    *r = rl->rl_repp;
+		mdname_t	*rnp = r->r_namep;
+
+		if (strncmp(rnp->cname, dnp->cname, strlen(dnp->cname)) == 0) {
+
+			/* found the replica, now grab the devid */
+			if (*cdevidpp == NULL) {
+				*cdevidpp = devid_str_encode(r->r_devid, NULL);
+			}
+
+			if (*cdevidpp == NULL) {
+				devid_free(devidp);
+				return (METADEVADM_ERR);
+			}
+
+			mda_debug("Updating replica %s, set %d, old devid %s\n",
+			    rnp->cname, setno, *cdevidpp);
+
+			if (dev_options & DEV_VERBOSE) {
+				mda_print(dgettext(TEXT_DOMAIN,
+				    "Updating replica %s of set number %d from "
+				    "device id %s to device id %s\n"),
+				    rnp->cname, setno, *cdevidpp, dnp->devid);
+			}
+
+			(void) memset(&db_c, '\0', sizeof (db_c));
+
+			db_c.c_setno = setno;
+			db_c.c_devt = rnp->dev;
+
+			if (!(dev_options & DEV_NOACTION)) {
+
+				mda_debug("Updating replica\n");
+
+				/*
+				 * call into kernel to update lb
+				 * namespace device id
+				 * of given devt
+				 */
+				if (metaioctl(MD_DB_SETDID, &db_c,
+				    &db_c.c_mde, NULL) != 0) {
+					devid_free(devidp);
+					(void) mdstealerror(ep, &db_c.c_mde);
+					return (METADEVADM_ERR);
+				}
+			}
+
+		}
+	}
+	devid_free(devidp);
+	return (METADEVADM_SUCCESS);
+}
+
+/*
+ * devid_update -- main routine for the -u option. Will update both the
+ * 	namespace and the locator block with the correct devid for the
+ * 	disk specified.
+ *
+ * RETURN
+ *	METADEVADM_ERR		error
+ *	METADEVADM_SUCCESS	success
+ */
+static int
+devid_update(
+	mdsetname_t	**spp,
+	set_t		setno,
+	char		*ctd,
+	md_error_t	*ep
+)
+{
+	md_drive_desc		*dd, *ddp;
+	mddrivename_t		*dnp;
+	mdnm_params_t		nm;
+	ddi_devid_t		devidp;
+	side_t			side;
+	char			*old_cdevidp = NULL;
+	md_replicalist_t	*rlp = NULL;
+	int			rval = METADEVADM_ERR;
+	mdname_t		*np = NULL;
+	uint_t			rep_slice;
+	char			*pathname = NULL;
+	char			*diskname = NULL;
+	int			fd = -1;
+	int			len;
+	char			*fp;
+
+	side = getmyside(*spp, ep);
+	if (side == MD_SIDEWILD) {
+		/* failed to find this node in the set */
+		mda_debug("Failed to find the side number\n");
+		return (METADEVADM_ERR);
+	}
+
+	if ((dnp = metadrivename(spp, ctd, ep)) == NULL) {
+		mda_debug("Failed to create a dnp for %s\n", ctd);
+		return (METADEVADM_ERR);
+	}
+	if (dnp->devid == NULL) {
+		/*
+		 * Disk does not have a devid! So cannot update the
+		 * devid within the replica.
+		 */
+		mda_debug("%s does not have a devid\n", dnp->cname);
+		if (dev_options & DEV_VERBOSE) {
+			mda_print(dgettext(TEXT_DOMAIN,
+			    "%s does not have a device id. Cannot update "
+			    "device id if none exists\n"), ctd);
+		}
+		return (METADEVADM_ERR);
+	}
+
+	mda_debug("Devid update to: %s\n", dnp->devid);
+
+	/*
+	 * Check if we own the set, if we do then do some processing
+	 * on the replicas.
+	 */
+	if (meta_check_ownership(*spp, ep) == 0) {
+
+		/* get the replicas */
+		if (metareplicalist(*spp, MD_BASICNAME_OK | PRINT_FAST, &rlp,
+		    ep) < 0)
+			return (METADEVADM_ERR);
+
+		/* update the devids in the replicas if necessary */
+		if (replica_update_devid(rlp, dnp, setno, &old_cdevidp,
+		    ep) != METADEVADM_SUCCESS) {
+			metafreereplicalist(rlp);
+			return (METADEVADM_ERR);
+		}
+
+		metafreereplicalist(rlp);
+	}
+
+	/*
+	 * If this is not the LOCAL set then need to update the LOCAL
+	 * replica with the new disk record.
+	 */
+
+	if (setno != MD_LOCAL_SET) {
+		mda_debug("Non-local set: %d side %d\n", setno, side);
+
+		/*
+		 * Need to find the disk record within the set and then
+		 * update it.
+		 */
+		if ((dd =
+		    metaget_drivedesc(*spp, MD_FULLNAME_ONLY, ep)) == NULL) {
+			if (! mdisok(ep))
+				goto out;
+			/* no disks in the set - no point continuing */
+			mda_debug("No disks in diskset\n");
+			rval = METADEVADM_SUCCESS;
+			goto out;
+		}
+
+		for (ddp = dd; ddp != NULL; ddp = ddp->dd_next) {
+			if (strncmp(ddp->dd_dnp->cname, dnp->cname,
+			    strlen(dnp->cname)) == 0)
+				break;
+		}
+
+		if (ddp == NULL) {
+			/* failed to finddisk in the set */
+			mda_print(dgettext(TEXT_DOMAIN,
+			    "%s not found in set %s. Check your syntax\n"),
+			    ctd, (*spp)->setname);
+			(void) mddserror(ep, MDE_DS_DRIVENOTINSET, setno, NULL,
+			    ctd, (*spp)->setname);
+			goto out;
+		}
+
+		/*
+		 * Now figure out the correct slice, for a diskset the slice
+		 * we care about is always the 'replica' slice.
+		 */
+		if (meta_replicaslice(dnp, &rep_slice, ep) != 0) {
+			mda_debug("Unable to find replica slice for %s\n",
+			    dnp->cname);
+			goto out;
+		}
+
+		mda_debug("slice no: %d disk %s\n", rep_slice, dnp->cname);
+
+		if ((np = metaslicename(dnp, rep_slice, ep)) == NULL) {
+			mda_debug("Unable to build namespace\n");
+			goto out;
+		}
+
+		mda_debug("check: ctdname: %s\n", np->cname);
+		mda_debug("check: ctdname: %s\n", np->rname);
+		mda_debug("check: ctdname: %s\n", np->bname);
+
+		if (!(dev_options & DEV_NOACTION)) {
+
+			mda_debug("Updating record: key %d name %s\n",
+			    ddp->dd_dnp->side_names_key, np->cname);
+
+			pathname = mda_getpath(np->bname);
+
+			if (update_namespace(MD_LOCAL_SET, side + SKEW,
+			    np->cname, np->dev, ddp->dd_dnp->side_names_key,
+			    pathname, ep) != 0) {
+				goto out;
+			}
+
+			/*
+			 * Now update the devid entry as well, this works
+			 * correctly because the prior call to
+			 * update_namespace() above puts the correct dev_t
+			 * in the namespace which will then be resolved
+			 * to the new devid by the ioctl now called.
+			 */
+			nm.mde = mdnullerror;
+			nm.setno = MD_LOCAL_SET;
+			nm.side = side + SKEW;
+			nm.key = ddp->dd_dnp->side_names_key;
+			if (metaioctl(MD_SETNMDID, &nm, &nm.mde, NULL) != 0) {
+				(void) mdstealerror(ep, &nm.mde);
+				goto out;
+			}
+		}
+	}
+
+	if ((dev_options & DEV_LOCAL_SET) && (setno != MD_LOCAL_SET)) {
+		/*
+		 * Only want to update the local set so do not continue.
+		 */
+		rval = METADEVADM_SUCCESS;
+		goto out;
+	}
+
+	/*
+	 * Iterate through all of the metadevices looking for the
+	 * passed in ctd.  If found then update the devid
+	 */
+	(void) memset(&nm, '\0', sizeof (nm));
+	nm.key = MD_KEYWILD;
+	/* LINTED */
+	while (1) {
+		nm.mde = mdnullerror;
+		nm.setno = setno;
+		nm.side = side;
+
+		/* search each namespace entry */
+		if (metaioctl(MD_IOCNXTKEY_NM, &nm, &nm.mde, NULL) != 0) {
+			(void) mdstealerror(ep, &nm.mde);
+			rval = METADEVADM_ERR;
+			goto out;
+		}
+		if (nm.key == MD_KEYWILD) {
+			if (setno != MD_LOCAL_SET) {
+				mda_print(dgettext(TEXT_DOMAIN,
+				    "%s not found in set %s. Check your "
+				    "syntax\n"), ctd, (*spp)->setname);
+				goto out;
+			} else {
+				mda_print(dgettext(TEXT_DOMAIN,
+				    "%s not found in local set. "
+				    "Check your syntax\n"), ctd);
+				goto out;
+			}
+		}
+
+		nm.devname = (uintptr_t)meta_getnmentbykey(setno, side, nm.key,
+		    NULL, NULL, NULL, ep);
+		if (nm.devname == NULL) {
+			rval = METADEVADM_ERR;
+			goto out;
+		}
+
+		diskname = getdiskname((char *)nm.devname);
+
+		mda_debug("Checking %s with %s\n", diskname, dnp->cname);
+		if (strcmp(diskname, dnp->cname) != 0)
+			continue;
+
+		mda_debug("Updating device %s in namespace\n",
+		    (char *)nm.devname);
+
+		/*
+		 * found disk, does it have a devid within the namespace ?
+		 * It might not because it does not support devid's or was
+		 * put into the namespace when there was no devid support
+		 */
+		if ((devidp = has_devid(setno, side, nm.key, ep)) == NULL) {
+			mda_debug("%s has no devid in the namespace",
+			    (char *)nm.devname);
+			if (dev_options & DEV_VERBOSE) {
+				mda_print(dgettext(TEXT_DOMAIN,
+				"SVM has no device id for "
+				"%s, cannot update.\n"), (char *)nm.devname);
+			}
+			continue; /* no devid. go on to next */
+		}
+		if (old_cdevidp == NULL) {
+			old_cdevidp = devid_str_encode(devidp, NULL);
+		}
+		free(devidp);
+
+		/*
+		 * has devid so update namespace, note the key has been set
+		 * by the prior MD_IOCNXTKEY_NM ioctl.
+		 */
+		nm.mde = mdnullerror;
+		nm.setno = setno;
+		nm.side = side;
+		if (!(dev_options & DEV_NOACTION)) {
+			/*
+			 * The call below may fail if the -u option is being
+			 * used to update a disk that has been replaced.
+			 * The -u option to metadevadm should not be used
+			 * for this purpose because we trust the dev_t of
+			 * the device in the replica and if we have replaced
+			 * the device and it is a fibre one then the dev_t
+			 * will have changed. This means we end up looking for
+			 * the devid of a non-existant disk and we subsequently
+			 * fail with NODEVID.
+			 */
+			if (metaioctl(MD_SETNMDID, &nm,
+					&nm.mde, NULL) != 0) {
+				if (dev_options & DEV_VERBOSE) {
+					mda_print(dgettext(TEXT_DOMAIN,
+					    "SVM failed to update the device "
+					    "id for %s probably due to both "
+					    "devt and device id changing.\n"),
+					    (char *)nm.devname);
+				}
+				(void) mdstealerror(ep, &nm.mde);
+				mde_perror(ep, "");
+				rval = METADEVADM_ERR;
+				goto out;
+			}
+		}
+		if (old_cdevidp == NULL) {
+			rval = METADEVADM_ERR;
+			goto out;
+		}
+		break;
+	} /* end while */
+
+	mda_print(dgettext(TEXT_DOMAIN,
+		    "Updating Solaris Volume Manager device relocation "
+		    "information for %s\n"), ctd);
+
+	mda_print(dgettext(TEXT_DOMAIN,
+	    "Old device reloc information:\n\t%s\n"), old_cdevidp);
+
+	len = strlen(dnp->rname) + strlen("s0");
+	if ((fp = (char *)Malloc(len + 1)) == NULL) {
+		mda_print(dgettext(TEXT_DOMAIN,
+		    "insufficient memory, device Reloc info not "
+		    "available\n"));
+	} else {
+		(void) snprintf(fp, len + 1, "%ss0", dnp->rname);
+		if ((fd = open(fp, O_RDONLY|O_NDELAY)) < 0) {
+			mda_print(dgettext(TEXT_DOMAIN,
+			    "Open of %s failed\n"), fp);
+		} else {
+			int		rc = -1;
+			ddi_devid_t	devid1 = NULL;
+			char		*cdevidp;
+
+			rc = devid_get(fd, &devid1);
+			if (close(fd) < 0) {
+				mda_print(dgettext(TEXT_DOMAIN,
+				    "Close of %s failed\n"), fp);
+			}
+			if (rc != 0) {
+				mda_print(dgettext(TEXT_DOMAIN,
+				    "Unable to obtain device "
+				    "Reloc info for %s\n"), fp);
+			} else {
+				cdevidp = devid_str_encode(devid1, NULL);
+				if (cdevidp == NULL) {
+					mda_print(dgettext(TEXT_DOMAIN,
+					    "Unable to print "
+					    "device Reloc info for %s\n"), fp);
+				} else {
+					mda_print(dgettext(TEXT_DOMAIN,
+					    "New device reloc "
+					    "information:\n\t%s\n"), cdevidp);
+					devid_str_free(cdevidp);
+				}
+				devid_free(devid1);
+			}
+		}
+		Free(fp);
+	}
+
+	rval = METADEVADM_SUCCESS;
+
+out:
+	if (diskname)
+		Free(diskname);
+	if (pathname)
+		Free(pathname);
+	if (old_cdevidp) {
+		devid_str_free(old_cdevidp);
+	}
+	return (rval);
+
+}
+
+/*
+ * Check the ctd name of the disk to see if the disk has moved. If it
+ * has moved then the newname is returned in 'newname', it is up to
+ * the caller to free the memory associated with it.
+ *
+ * RETURN
+ *	METADEVADM_ERR		error
+ *	METADEVADM_SUCCESS	success
+ *	METADEVADM_DISKMOVE	success, and the disk has moved
+ *	METADEVADM_DSKNAME_ERR	error creating the disk name structures.
+ */
+int
+meta_upd_ctdnames(
+	mdsetname_t	**spp,
+	set_t		setno,
+	side_t		sideno,
+	mddrivename_t	*dnp,
+	char		**newname,
+	md_error_t	*ep
+)
+{
+	char		*drvnmp;
+	int		i;
+	minor_t		mnum = 0;
+	md_dev64_t	dev = 0;
+	dev_t		small_dev = (dev_t)NODEV;
+	mdnm_params_t	nm;
+	char		*pathname;
+	char		*minor_name = NULL;
+	ddi_devid_t	devidp;
+	devid_nmlist_t	*disklist = NULL;
+	int		ret = 0;
+	mdsidenames_t	*snp;
+	int		match_type;
+	int		search_number = -1;
+	char		*search_type = NULL;
+	char		*search_path = NULL;
+	uint_t		rep_slice;
+	mddrivename_t	*newdnp;
+	mdname_t	*np;
+	mdsetname_t	*sp = *spp;
+	md_set_desc	*sd;
+
+	/*
+	 * setno should always be 0 but we're going to
+	 * check for multi-node diskset and return if it is one.
+	 */
+	if (!metaislocalset(sp)) {
+		if ((sd = metaget_setdesc(sp, ep)) == NULL)
+			return (METADEVADM_ERR);
+
+		if (MD_MNSET_DESC(sd))
+			return (METADEVADM_SUCCESS);
+	}
+
+	if (dnp->devid == NULL) {
+		/* no devid, nothing can be done */
+		mda_debug("meta_upd_ctdnames: %s has no devid\n", dnp->cname);
+		if (dev_options & DEV_VERBOSE) {
+			mda_print(dgettext(TEXT_DOMAIN,
+			    "%s has no devid, cannot detect "
+			    "disk movement for this disk.\n"), dnp->cname);
+		}
+		return (ret);
+	}
+
+	/*
+	 * Find the correct side name for the disk. There is a sidename
+	 * for each host associated with the diskset.
+	 */
+	for (snp = dnp->side_names; snp != NULL; snp = snp->next) {
+		mda_debug("meta_upd_ctdnames: %s %d args: setno %d sideno %d\n",
+		    snp->cname, snp->sideno, setno, sideno);
+		/* only use SKEW for the local replica */
+		if (setno == 0) {
+			if (snp->sideno + SKEW == sideno)
+				break;
+		} else {
+			if (snp->sideno == sideno)
+				break;
+		}
+	}
+
+	if (snp == NULL) {
+		/*
+		 * Failed to find the side name, this should not
+		 * be possible. However if it does happen this is an
+		 * indication of an inconsistant replica - something
+		 * might have gone wrong during an add or a delete of
+		 * a host.
+		 */
+		mda_debug("Unable to find the side information for disk %s",
+		    dnp->cname);
+		(void) mddserror(ep, MDE_DS_HOSTNOSIDE, (*spp)->setno, mynode(),
+		    NULL, dnp->cname);
+		return (METADEVADM_ERR);
+	}
+	/*
+	 * Find the type of device we are to be searching on
+	 */
+	search_number = mda_findpath(snp->cname);
+	if (search_number == -1) {
+		search_path = "/dev";
+		search_type = DEVID_MINOR_NAME_ALL;
+	} else {
+		search_path = plist[search_number].search_path;
+		search_type = plist[search_number].search_type;
+	}
+
+	mda_debug("Search path :%s searth_type: %x\n",
+	    search_path, (int)search_type);
+	(void) memset(&nm, '\0', sizeof (nm));
+
+	nm.mde = mdnullerror;
+	nm.setno = setno;
+	nm.side = sideno;
+
+	/*
+	 * Get the devname from the name space.
+	 */
+	if ((nm.devname = (uintptr_t)meta_getnmentbykey(setno, sideno,
+	    dnp->side_names_key, &drvnmp, &mnum, &dev, ep)) == NULL) {
+		return (METADEVADM_ERR);
+	}
+
+	ret = devid_str_decode(dnp->devid, &devidp, &minor_name);
+	devid_str_free(minor_name);
+
+	if (ret != 0) {
+		/*
+		 * Failed to encode the devid.
+		 */
+		devid_free(devidp);
+		return (METADEVADM_ERR);
+	}
+
+	/*
+	 * Use the stored devid to find the existing device node and check
+	 * to see if the disk has moved. Use the raw devices as the name
+	 * of the disk is stored as the raw device, if this is not done
+	 * then the disk will not be found.
+	 */
+	ret = meta_deviceid_to_nmlist(search_path, devidp,
+	    search_type, &disklist);
+
+	if (ret != 0) {
+		if (dev_options & DEV_VERBOSE) {
+			mda_print(dgettext(TEXT_DOMAIN,
+			    "Device ID %s last associated with "
+			    "disk %s no longer found in system\n"),
+			    dnp->devid, dnp->cname);
+		}
+		devid_free(devidp);
+		devid_free_nmlist(disklist);
+		return (METADEVADM_SUCCESS);
+	}
+
+	small_dev = meta_cmpldev(dev);
+	mda_debug("Old device lookup: %s (%p)\n",
+			(char *)nm.devname, (void *)small_dev);
+	/*
+	 * Check to see if the returned disk matches the stored one
+	 */
+	for (i = 0; disklist[i].dev != NODEV; i++) {
+		match_type = 0;
+		mda_debug("From devid lookup: %s (%p)\n",
+				disklist[i].devname, (void *)disklist[i].dev);
+
+		if (disklist[i].dev == small_dev) {
+			match_type |= DEV_MATCH;
+		}
+
+		if (strncmp((char *)nm.devname, disklist[i].devname,
+		    strlen((char *)nm.devname)) == 0) {
+			match_type |= NAME_MATCH;
+		}
+
+		if (match_type != 0)
+			break;
+	}
+	devid_free(devidp);
+
+	mda_debug("meta_upd_ctdnames: match: %x i: %d\n", match_type, i);
+
+	if (match_type == (DEV_MATCH|NAME_MATCH)) {
+		/* no change */
+		devid_free_nmlist(disklist);
+		return (METADEVADM_SUCCESS);
+	}
+
+	/* No match found - use the first entry in disklist */
+	if (disklist[i].dev == NODEV)
+		i = 0;
+
+	if (!(match_type & DEV_MATCH)) {
+		/* did not match on the dev, so dev_t has changed */
+		mda_debug("Did not match on dev: %p %p\n",
+		    (void *) small_dev, (void *) disklist[i].dev);
+		dev = meta_expldev(disklist[i].dev);
+	}
+
+	if (!(match_type & NAME_MATCH)) {
+		mda_debug("Did not match on name: %s (%p)\n",
+		    (char *)nm.devname, (void *) disklist[i].dev);
+	}
+
+	/*
+	 * If here, then the name in the disklist is the one we
+	 * want in any case so use it.
+	 */
+	mda_debug("devname: %s\n", disklist[i].devname);
+	/*
+	 * Need to remove the slice as metadrivename() expects a diskname
+	 */
+	stripS(disklist[i].devname);
+	/*
+	 * Build an mddrivename_t to use
+	 */
+	if ((newdnp = metadrivename(spp, disklist[i].devname, ep)) == NULL) {
+		mda_debug("Unable to make a dnp out of %s\n",
+		    disklist[i].devname);
+		return (METADEVADM_DSKNAME_ERR);
+	}
+	/*
+	 * Need to find the correct slice used for the replica
+	 */
+	if (meta_replicaslice(newdnp, &rep_slice, ep) != 0) {
+		return (METADEVADM_DSKNAME_ERR);
+	}
+
+	if ((np = metaslicename(newdnp, rep_slice, ep)) == NULL) {
+		mda_debug("Failed to build an np for %s\n", dnp->rname);
+		return (METADEVADM_DSKNAME_ERR);
+	}
+	mda_debug("check: cname: %s\n", np->cname);
+	mda_debug("check: rname: %s\n", np->rname);
+	mda_debug("check: bname: %s\n", np->bname);
+
+	if (newname != NULL)
+		*newname = Strdup(np->bname);
+
+	if (!(dev_options & DEV_NOACTION)) {
+
+		mda_debug("update namespace\n");
+
+		/* get the block path */
+		pathname = mda_getpath(np->bname);
+
+		if (update_namespace(setno, sideno, np->cname,
+		    dev, dnp->side_names_key, pathname, ep) != 0) {
+			/* finished with the list so return the memory */
+			Free(pathname);
+			devid_free_nmlist(disklist);
+			return (METADEVADM_ERR);
+		}
+	}
+	/* finished with the list so return the memory */
+	Free(pathname);
+	devid_free_nmlist(disklist);
+	ret = METADEVADM_DISKMOVE;
+	return (ret);
+}
+
+int
+meta_fixdevid(
+	mdsetname_t	*sp,
+	mddevopts_t	options,
+	char		*diskname,
+	md_error_t	*ep
+)
+{
+	set_t		setno = sp->setno;
+	int		ret = 0;
+	char		*pathname = NULL;
+	mdsetname_t	*local_sp = NULL;
+	md_drive_desc	*d = NULL;
+	char		*newname = NULL;
+	md_drive_desc	*dd;
+	side_t		sideno;
+	md_set_desc	*sd;
+
+	/* if MN diskset just return */
+	if (!metaislocalset(sp)) {
+		if ((sd = metaget_setdesc(sp, ep)) == NULL) {
+			return (METADEVADM_ERR);
+		}
+		if (MD_MNSET_DESC(sd))
+			return (METADEVADM_SUCCESS);
+	}
+
+	dev_options |= options;
+	mda_debug("dev_options: %x\n", dev_options);
+	if (dev_options & DEV_RELOAD) {
+		/*
+		 * If it's not the local set we need to check the local
+		 * namespace to see if disks have moved as it contains
+		 * entries for the disks in the set.
+		 */
+		if (setno != MD_LOCAL_SET) {
+			if ((dd = metaget_drivedesc(sp, MD_BASICNAME_OK |
+			    PRINT_FAST, ep)) == NULL) {
+				mde_perror(ep, "");
+				mdclrerror(ep);
+				return (METADEVADM_ERR);
+			}
+			local_sp = metasetname(MD_LOCAL_NAME, ep);
+			sideno = getmyside(sp, ep) + SKEW;
+			for (d = dd; d != NULL; d = d->dd_next) {
+				/*
+				 * Actually do the check of the disks.
+				 */
+				ret = meta_upd_ctdnames(&local_sp, 0, sideno,
+				    d->dd_dnp, &newname, ep);
+
+				if ((ret == METADEVADM_ERR) ||
+				    (ret == METADEVADM_DSKNAME_ERR)) {
+					/* check failed in unknown manner */
+					mda_debug("meta_upd_ctdnames failed\n");
+					return (METADEVADM_ERR);
+				}
+			}
+		}
+
+		/* do a reload of the devid namespace */
+		ret = pathname_reload(&sp, setno, ep);
+	} else if (dev_options & DEV_UPDATE) {
+		pathname = getdiskname(diskname);
+		ret = devid_update(&sp, setno, pathname, ep);
+		free(pathname);
+	}
+	return (ret);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_devstamp.c b/usr/src/lib/lvm/libmeta/common/meta_devstamp.c
new file mode 100644
index 0000000000..1a3cf3e1ce
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_devstamp.c
@@ -0,0 +1,127 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 1993-2002 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+/*
+ * get timestamp from device
+ */
+
+#include <meta.h>
+
+/*
+ * get timestamp
+ */
+int
+getdevstamp(
+	mddrivename_t	*dnp,
+	time_t		*stamp,		/* return timestamp here */
+	md_error_t	*ep
+)
+{
+	int		fd;
+	int		partno;
+	struct vtoc	vtocbuf;
+	mdname_t	*np;
+
+	if ((np = metaslicename(dnp, MD_SLICE0, ep)) == NULL)
+		return (-1);
+
+	/* open given device */
+	if ((fd = open(np->rname, O_RDONLY | O_NDELAY, 0)) < 0)
+		return (mdsyserror(ep, errno, np->cname));
+
+	/* re-read vtoc */
+	if (meta_getvtoc(fd, np->cname, &vtocbuf, &partno, ep) == -1) {
+		(void) close(fd);
+		return (-1);
+	}
+
+	/* close device */
+	(void) close(fd);	/* sd/ssd bug */
+
+	/* return timestamp, success */
+	*stamp = vtocbuf.timestamp[partno];
+	return (0);
+}
+
+/*
+ * returns
+ *	0 on success,
+ * 	ENOTSUP if it's not a device with a vtoc
+ *	-1 on failure
+ */
+int
+setdevstamp(
+	mddrivename_t	*dnp,
+	time_t		*stamp,		/* returned timestamp */
+	md_error_t	*ep
+)
+{
+	int		fd;
+	int		partno;
+	struct vtoc	vtocbuf;
+	time_t		now = time(NULL);
+	mdname_t	*np;
+
+	if ((np = metaslicename(dnp, MD_SLICE0, ep)) == NULL)
+		return (-1);
+
+	/* open for vtoc */
+	if ((fd = open(np->rname, O_RDWR | O_NDELAY, 0)) < 0)
+		return (mdsyserror(ep, errno, np->cname));
+
+	if (meta_getvtoc(fd, np->cname, &vtocbuf, &partno, ep) == -1) {
+		(void) close(fd);
+		if (partno == VT_ENOTSUP)
+			return (ENOTSUP);
+		else
+			return (-1);
+	}
+
+	*stamp = vtocbuf.timestamp[partno] = now;
+
+	if (meta_setvtoc(fd, np->cname, &vtocbuf, ep) == -1) {
+		(void) close(fd);
+		return (-1);
+	}
+
+	/* Clear the timestamp */
+	vtocbuf.timestamp[partno] = 0;
+
+	if (meta_getvtoc(fd, np->cname, &vtocbuf, &partno, ep) == -1) {
+		(void) close(fd);
+		return (-1);
+	}
+
+	(void) close(fd);	/* sd/ssd bug */
+
+	if (*stamp != vtocbuf.timestamp[partno])
+		return (mddeverror(ep, MDE_CANTVERIFY_VTOC, NODEV64,
+				    np->cname));
+
+	return (0);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_error.c b/usr/src/lib/lvm/libmeta/common/meta_error.c
new file mode 100644
index 0000000000..0c359f344b
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_error.c
@@ -0,0 +1,2309 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+/*
+ * Just in case we're not in a build environment, make sure that
+ * TEXT_DOMAIN gets set to something.
+ */
+#if !defined(TEXT_DOMAIN)
+#define	TEXT_DOMAIN "SYS_TEST"
+#endif
+
+/*
+ * print metedevice errors
+ */
+
+#include <meta.h>
+#include <sys/lvm/md_mddb.h>
+
+#include <syslog.h>
+
+/*
+ * clear error
+ */
+void
+mdclrerror(
+	md_error_t	*ep
+)
+{
+	if (ep->name != NULL)
+		Free(ep->name);
+	if (ep->host != NULL)
+		Free(ep->host);
+	if (ep->extra != NULL)
+		Free(ep->extra);
+	(void) memset(ep, '\0', sizeof (*ep));
+}
+
+/*
+ * cook names
+ */
+static char *
+md_name(
+	minor_t	mnum
+)
+{
+	char	*name;
+
+	/* get name, or fake it */
+	if ((name = get_mdname(mnum)) == NULL) {
+		char	buf[40];
+
+		(void) sprintf(buf, "%lu/d%lu", MD_MIN2SET(mnum),
+		    MD_MIN2UNIT(mnum));
+		return (Strdup(buf));
+	}
+	return (Strdup(name));
+}
+
+static char *
+dev_name(
+	set_t	setno,
+	md_dev64_t dev
+)
+{
+	char	*name;
+
+	/* get name or fake it */
+	if (dev == NODEV64)
+		return (Strdup(dgettext(TEXT_DOMAIN, "unknown device")));
+	if ((name = get_devname(setno, dev)) == NULL) {
+		char	buf[40];
+
+		(void) sprintf(buf, "%lu.%lu", meta_getmajor(dev),
+		    meta_getminor(dev));
+		return (Strdup(buf));
+	}
+	return (Strdup(name));
+}
+
+static char *
+hsp_name(
+	hsp_t	hsp
+)
+{
+	char	*name;
+
+	if ((name = get_hspname(hsp)) == NULL) {
+		char	buf[40];
+
+		(void) sprintf(buf, "%u/hsp%03u", HSP_SET(hsp), HSP_ID(hsp));
+		return (Strdup(buf));
+	}
+	return (Strdup(name));
+}
+
+static char *
+set_name(
+	set_t		setno
+)
+{
+	mdsetname_t	*sp;
+	md_error_t	xep = mdnullerror;
+
+	if (setno == MD_SET_BAD)
+		return (NULL);
+
+	if ((sp = metasetnosetname(setno, &xep)) == NULL) {
+		char	buf[40];
+
+		mdclrerror(&xep);
+		(void) sprintf(buf, "setno %u", setno);
+		return (Strdup(buf));
+	}
+	return (Strdup(sp->setname));
+}
+
+/*
+ * fill in all the appropriate md_error_t fields
+ */
+static void
+metacookerror(
+	md_error_t	*ep,		/* generic error */
+	char		*name		/* optional name or host */
+)
+{
+	/* get host name */
+	if (ep->host != NULL) {
+		Free(ep->host);
+		ep->host = NULL;
+	}
+	if ((ep->info.errclass == MDEC_RPC) &&
+	    (name != NULL) && (*name != '\0')) {
+		ep->host = Strdup(name);
+		name = NULL;
+	} else
+		ep->host = Strdup(mynode());
+
+	/* get appropriate name */
+	if (ep->name != NULL) {
+		Free(ep->name);
+		ep->name = NULL;
+	}
+	if ((name != NULL) && (*name != '\0')) {
+		ep->name = Strdup(name);
+	} else {
+		switch (ep->info.errclass) {
+
+		/* can't do anything about these */
+		case MDEC_VOID:
+		case MDEC_SYS:
+		case MDEC_RPC:
+		default:
+			break;
+
+		/* device name */
+		case MDEC_DEV:
+		{
+			md_dev_error_t	*ip =
+					&ep->info.md_error_info_t_u.dev_error;
+
+			ep->name = dev_name(MD_SET_BAD, ip->dev);
+			break;
+		}
+
+		/* device name */
+		case MDEC_USE:
+		{
+			md_use_error_t	*ip =
+					&ep->info.md_error_info_t_u.use_error;
+
+			ep->name = dev_name(MD_SET_BAD, ip->dev);
+			if (ip->where == NULL) {
+				ip->where = Strdup(dgettext(TEXT_DOMAIN,
+				    "unknown"));
+			}
+			break;
+		}
+
+		/* metadevice name */
+		case MDEC_MD:
+		{
+			md_md_error_t	*ip =
+					&ep->info.md_error_info_t_u.md_error;
+
+			ep->name = md_name(ip->mnum);
+			break;
+		}
+
+		/* component name */
+		case MDEC_COMP:
+		{
+			md_comp_error_t	*ip =
+					&ep->info.md_error_info_t_u.comp_error;
+			char		*mdname, *devname;
+			size_t 		len;
+
+			mdname = md_name(ip->comp.mnum);
+			devname = dev_name(MD_MIN2SET(ip->comp.mnum),
+			    ip->comp.dev);
+			len = strlen(mdname) + strlen(": ")
+			    + strlen(devname) + 1;
+			ep->name = Malloc(len);
+			(void) snprintf(ep->name, len, "%s: %s",
+			    mdname, devname);
+			Free(mdname);
+			Free(devname);
+			break;
+		}
+
+		/* hotspare pool name */
+		case MDEC_HSP:
+		{
+			md_hsp_error_t	*ip =
+					&ep->info.md_error_info_t_u.hsp_error;
+
+			ep->name = hsp_name(ip->hsp);
+			break;
+		}
+
+		/* hotspare name */
+		case MDEC_HS:
+		{
+			md_hs_error_t	*ip =
+					&ep->info.md_error_info_t_u.hs_error;
+			char		*hspname, *devname;
+			size_t 		len;
+
+			hspname = hsp_name(ip->hs.hsp);
+			devname = dev_name(HSP_SET(ip->hs.hsp), ip->hs.dev);
+			len = strlen(hspname) + strlen(": ")
+			    + strlen(devname) + 1;
+			ep->name = Malloc(len);
+			(void) snprintf(ep->name, len, "%s: %s",
+			    hspname, devname);
+			Free(hspname);
+			Free(devname);
+			break;
+		}
+
+		/* mddb name */
+		case MDEC_MDDB:
+		{
+			md_mddb_error_t	*ip =
+					&ep->info.md_error_info_t_u.mddb_error;
+			if (ip->mnum != NODEV32)
+				ep->name = md_name(ip->mnum);
+			ep->name = set_name(ip->setno);
+			break;
+		}
+
+		/* set name */
+		case MDEC_DS:
+		{
+			md_ds_error_t	*ip =
+			    &ep->info.md_error_info_t_u.ds_error;
+
+			ep->name = set_name(ip->setno);
+			break;
+		}
+		}
+	}
+}
+
+/*
+ * simple error
+ */
+int
+mderror(
+	md_error_t	*ep,
+	md_void_errno_t	errnum,
+	char		*name
+)
+{
+	md_void_error_t	*ip = &ep->info.md_error_info_t_u.void_error;
+
+	mdclrerror(ep);
+	ep->info.errclass = MDEC_VOID;
+	ip->errnum = errnum;
+
+	metacookerror(ep, name);
+	return (-1);
+}
+
+/*
+ * system error
+ */
+int
+mdsyserror(
+	md_error_t	*ep,
+	int		errnum,
+	char		*name
+)
+{
+	md_sys_error_t	*ip = &ep->info.md_error_info_t_u.sys_error;
+
+	mdclrerror(ep);
+	if (errnum != 0) {
+		ep->info.errclass = MDEC_SYS;
+		ip->errnum = errnum;
+	}
+
+	metacookerror(ep, name);
+	return (-1);
+}
+
+/*
+ * RPC error
+ */
+int
+mdrpcerror(
+	md_error_t	*ep,
+	CLIENT		*clntp,
+	char		*host,
+	char		*extra
+)
+{
+	md_rpc_error_t	*ip = &ep->info.md_error_info_t_u.rpc_error;
+	struct rpc_err	rpcerr;
+
+	mdclrerror(ep);
+	clnt_geterr(clntp, &rpcerr);
+	ep->info.errclass = MDEC_RPC;
+	ip->errnum = rpcerr.re_status;
+
+	metacookerror(ep, host);
+	mderrorextra(ep, extra);
+	return (-1);
+}
+
+/*
+ * RPC create error
+ */
+int
+mdrpccreateerror(
+	md_error_t	*ep,
+	char		*host,
+	char		*extra
+)
+{
+	md_rpc_error_t	*ip = &ep->info.md_error_info_t_u.rpc_error;
+
+	mdclrerror(ep);
+	ep->info.errclass = MDEC_RPC;
+	ip->errnum = rpc_createerr.cf_stat;
+
+	metacookerror(ep, host);
+	mderrorextra(ep, extra);
+	return (-1);
+}
+
+/*
+ * device error
+ */
+int
+mddeverror(
+	md_error_t	*ep,
+	md_dev_errno_t	errnum,
+	md_dev64_t	dev,
+	char		*name
+)
+{
+	md_dev_error_t	*ip = &ep->info.md_error_info_t_u.dev_error;
+
+	mdclrerror(ep);
+	ep->info.errclass = MDEC_DEV;
+	ip->errnum = errnum;
+	ip->dev = dev;
+
+	metacookerror(ep, name);
+	return (-1);
+}
+
+/*
+ * use error
+ */
+int
+mduseerror(
+	md_error_t	*ep,
+	md_use_errno_t	errnum,
+	md_dev64_t	dev,
+	char		*where,
+	char		*name
+)
+{
+	md_use_error_t	*ip = &ep->info.md_error_info_t_u.use_error;
+
+	assert(where != NULL);
+	mdclrerror(ep);
+	ep->info.errclass = MDEC_USE;
+	ip->errnum = errnum;
+	ip->dev = dev;
+	ip->where = Strdup(where);
+
+	metacookerror(ep, name);
+	return (-1);
+}
+
+/*
+ * overlap error
+ */
+int
+mdoverlaperror(
+	md_error_t		*ep,
+	md_overlap_errno_t	errnum,
+	char			*name,
+	char			*where,
+	char			*overlap
+)
+{
+	md_overlap_error_t *ip =
+			&ep->info.md_error_info_t_u.overlap_error;
+
+	assert(overlap != NULL);
+	mdclrerror(ep);
+	ep->info.errclass = MDEC_OVERLAP;
+	ip->errnum = errnum;
+	ip->overlap = Strdup(overlap);
+	ip->where = NULL;
+	if (where != NULL)
+	    ip->where = Strdup(where);
+
+	metacookerror(ep, name);
+	return (-1);
+}
+
+/*
+ * metadevice error
+ */
+int
+mdmderror(
+	md_error_t	*ep,
+	md_md_errno_t	errnum,
+	minor_t		mnum,
+	char		*name
+)
+{
+	md_md_error_t	*ip = &ep->info.md_error_info_t_u.md_error;
+
+	mdclrerror(ep);
+	ep->info.errclass = MDEC_MD;
+	ip->errnum = errnum;
+	ip->mnum = mnum;
+
+	metacookerror(ep, name);
+	return (-1);
+}
+
+/*
+ * component error
+ */
+int
+mdcomperror(
+	md_error_t	*ep,
+	md_comp_errno_t	errnum,
+	minor_t		mnum,
+	md_dev64_t	dev,
+	char		*name
+)
+{
+	md_comp_error_t	*ip = &ep->info.md_error_info_t_u.comp_error;
+
+	mdclrerror(ep);
+	ep->info.errclass = MDEC_COMP;
+	ip->errnum = errnum;
+	ip->comp.mnum = mnum;
+	ip->comp.dev = dev;
+
+	metacookerror(ep, name);
+	return (-1);
+}
+
+/*
+ * hotspare pool error
+ */
+int
+mdhsperror(
+	md_error_t	*ep,
+	md_hsp_errno_t	errnum,
+	hsp_t		hsp,
+	char		*name
+)
+{
+	md_hsp_error_t	*ip = &ep->info.md_error_info_t_u.hsp_error;
+
+	mdclrerror(ep);
+	ep->info.errclass = MDEC_HSP;
+	ip->errnum = errnum;
+	ip->hsp = hsp;
+
+	metacookerror(ep, name);
+	return (-1);
+}
+
+/*
+ * hotspare error
+ */
+int
+mdhserror(
+	md_error_t	*ep,
+	md_hs_errno_t	errnum,
+	hsp_t		hsp,
+	md_dev64_t	dev,
+	char		*name
+)
+{
+	md_hs_error_t	*ip = &ep->info.md_error_info_t_u.hs_error;
+
+	mdclrerror(ep);
+	ep->info.errclass = MDEC_HS;
+	ip->errnum = errnum;
+	ip->hs.hsp = hsp;
+	ip->hs.dev = dev;
+
+	metacookerror(ep, name);
+	return (-1);
+}
+
+/*
+ * MDDB error
+ */
+int
+mdmddberror(
+	md_error_t	*ep,
+	md_mddb_errno_t	errnum,
+	minor_t		mnum,
+	set_t		setno,
+	size_t		size,
+	char		*name
+)
+{
+	md_mddb_error_t	*ip = &ep->info.md_error_info_t_u.mddb_error;
+
+	mdclrerror(ep);
+	ep->info.errclass = MDEC_MDDB;
+	ip->errnum = errnum;
+	ip->mnum = mnum;
+	ip->setno = setno;
+	ip->size = size;
+
+	metacookerror(ep, name);
+	return (-1);
+}
+
+/*
+ * metadevice diskset (ds) error
+ */
+int
+mddserror(
+	md_error_t	*ep,
+	md_ds_errno_t	errnum,
+	set_t		setno,
+	char		*node,
+	char		*drive,
+	char		*name
+)
+{
+	md_ds_error_t	*ip = &ep->info.md_error_info_t_u.ds_error;
+
+	mdclrerror(ep);
+	ep->info.errclass = MDEC_DS;
+	ip->errnum = errnum;
+	ip->setno = setno;
+	ip->node = ((node != NULL) ? Strdup(node) : NULL);
+	ip->drive = ((drive != NULL) ? Strdup(drive) : NULL);
+
+	metacookerror(ep, name);
+	return (-1);
+}
+
+/*
+ * clear/attach extra context information
+ */
+void
+mderrorextra(
+	md_error_t	*ep,
+	char		*extra
+)
+{
+	if (ep->extra != NULL)
+		Free(ep->extra);
+	if (extra != NULL)
+		ep->extra = Strdup(extra);
+	else
+		ep->extra = NULL;
+}
+
+/*
+ * steal (copy) an error code safely
+ */
+int
+mdstealerror(
+	md_error_t	*to,
+	md_error_t	*from
+)
+{
+	mdclrerror(to);
+	*to = *from;
+	(void) memset(from, '\0', sizeof (*from));
+	return (-1);
+}
+
+/*
+ * do an ioctl, cook the error, and return status
+ */
+int
+metaioctl(
+	int		cmd,
+	void		*data,
+	md_error_t	*ep,
+	char		*name
+)
+{
+	int		fd;
+
+	/* open admin device */
+	if ((fd = open_admin(ep)) < 0)
+		return (-1);
+
+	/* do ioctl */
+	mdclrerror(ep);
+	if (ioctl(fd, cmd, data) != 0) {
+		return (mdsyserror(ep, errno, name));
+	} else if (! mdisok(ep)) {
+		metacookerror(ep, name);
+		return (-1);
+	}
+
+	/* return success */
+	return (0);
+}
+
+/*
+ * print void class errors
+ */
+static char *
+void_to_str(
+	md_error_t	*ep,
+	char		*buf,
+	size_t		size
+)
+{
+	md_void_error_t	*ip = &ep->info.md_error_info_t_u.void_error;
+	char		*p = buf + strlen(buf);
+	size_t		psize = size - strlen(buf);
+
+	switch (ip->errnum) {
+	case MDE_NONE:
+		(void) snprintf(p, psize,
+		    dgettext(TEXT_DOMAIN, "no error"));
+		break;
+	case MDE_UNIT_NOT_FOUND:
+		(void) snprintf(p, psize,
+		    dgettext(TEXT_DOMAIN, "unit not found"));
+		break;
+	case MDE_DUPDRIVE:
+		(void) snprintf(p, psize,
+		    dgettext(TEXT_DOMAIN, "drive specified more than once"));
+		break;
+	case MDE_INVAL_HSOP:
+		(void) snprintf(p, psize,
+		    dgettext(TEXT_DOMAIN, "illegal hot spare operation"));
+		break;
+	case MDE_NO_SET:
+		(void) snprintf(p, psize,
+		    dgettext(TEXT_DOMAIN, "no such set"));
+		break;
+	case MDE_SET_DIFF:
+		(void) snprintf(p, psize,
+		    dgettext(TEXT_DOMAIN, "set name is inconsistent"));
+		break;
+	case MDE_BAD_RD_OPT:
+		(void) snprintf(p, psize,
+		    dgettext(TEXT_DOMAIN, "invalid read option"));
+		break;
+	case MDE_BAD_WR_OPT:
+		(void) snprintf(p, psize,
+		    dgettext(TEXT_DOMAIN, "invalid write option"));
+		break;
+	case MDE_BAD_PASS_NUM:
+		(void) snprintf(p, psize,
+		    dgettext(TEXT_DOMAIN, "invalid pass number"));
+		break;
+	case MDE_BAD_RESYNC_OPT:
+		(void) snprintf(p, psize,
+		    dgettext(TEXT_DOMAIN, "invalid resync option"));
+		break;
+	case MDE_BAD_INTERLACE:
+		(void) snprintf(p, psize,
+		    dgettext(TEXT_DOMAIN, "invalid interlace"));
+		break;
+	case MDE_NO_HSPS:
+		(void) snprintf(p, psize,
+		    dgettext(TEXT_DOMAIN, "no hotspare pools found"));
+		break;
+	case MDE_NOTENOUGH_DB:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+			"must have at least 1 database (-f overrides)"));
+		break;
+	case MDE_DELDB_NOTALLOWED:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "cannot delete the last database replica in the diskset"));
+		break;
+	case MDE_DEL_VALIDDB_NOTALLOWED:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "Deleting specified valid replicas results in stale "
+		    "state database. Configuration changes with stale "
+		    "database result in panic(-f overrides)"));
+		break;
+	case MDE_SYSTEM_FILE:
+		(void) snprintf(p, psize,
+		    dgettext(TEXT_DOMAIN, "error in system file"));
+		break;
+	case MDE_MDDB_FILE:
+		(void) snprintf(p, psize,
+		    dgettext(TEXT_DOMAIN, "error in mddb.cf file"));
+		break;
+	case MDE_MDDB_CKSUM:
+		(void) snprintf(p, psize,
+		    dgettext(TEXT_DOMAIN, "checksum error in mddb.cf file"));
+		break;
+	case MDE_VFSTAB_FILE:
+		(void) snprintf(p, psize,
+		    dgettext(TEXT_DOMAIN, "error in vfstab file"));
+		break;
+	case MDE_NOSLICE:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "invalid slice number for drive name"));
+		break;
+	case MDE_SYNTAX:
+		(void) snprintf(p, psize,
+		    dgettext(TEXT_DOMAIN, "syntax error"));
+		break;
+	case MDE_OPTION:
+		(void) snprintf(p, psize,
+		    dgettext(TEXT_DOMAIN, "illegal option"));
+		break;
+	case MDE_TAKE_OWN:
+		(void) snprintf(p, psize,
+		    dgettext(TEXT_DOMAIN, "failed to reserve any drives"));
+		break;
+	case MDE_NOT_DRIVENAME:
+		(void) snprintf(p, psize,
+		    dgettext(TEXT_DOMAIN, "not a valid drive name"));
+		break;
+	case MDE_RESERVED:
+		(void) snprintf(p, psize,
+		    dgettext(TEXT_DOMAIN, "reserved by another host"));
+		break;
+	case MDE_DVERSION:
+		(void) snprintf(p, psize,
+		    dgettext(TEXT_DOMAIN, "driver version mismatch"));
+		break;
+	case MDE_MVERSION:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "metadevice state database version mismatch"));
+		break;
+	case MDE_TESTERROR:
+		(void) snprintf(p, psize,
+		    dgettext(TEXT_DOMAIN, "TEST ERROR MESSAGE"));
+		break;
+	case MDE_BAD_ORIG_NCOL:
+		(void) snprintf(p, psize,
+		    dgettext(TEXT_DOMAIN, "invalid column count"));
+		break;
+	case MDE_RAID_INVALID:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "devices were not RAIDed previously or "
+		    "are specified in the wrong order"));
+		break;
+	case MDE_MED_ERROR:
+		break;
+	case MDE_TOOMANYMED:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "too many mediator hosts requested"));
+		break;
+	case MDE_NOMED:
+		(void) snprintf(p, psize,
+		    dgettext(TEXT_DOMAIN, "no mediator hosts found"));
+		break;
+	case MDE_ONLYNODENAME:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "only the nodename of a host is required for deletes"));
+		break;
+	case MDE_RAID_BAD_PW_CNT:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "simultaneous writes out of range"));
+		break;
+	case MDE_DEVID_TOOBIG:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "relocation information size is greater than reported"));
+		break;
+	case MDE_NOPERM:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "Permission denied.  You must have root privilege "
+		    "to execute this command."));
+		break;
+	case MDE_NODEVID:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "Device relocation information not available "
+		    "for this device"));
+		break;
+	case MDE_NOROOT:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "no root filesystem in /etc/mnttab"));
+		break;
+	case MDE_EOF_TRANS:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    MD_EOF_TRANS_MSG));
+		break;
+	case MDE_NOT_MN:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "option only valid within a multi-owner set"));
+		break;
+	case MDE_ABR_SET:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "Invalid command for mirror with ABR set"));
+		break;
+	case MDE_INVAL_MNOP:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "Invalid operation on multi-owner set"));
+		break;
+	case MDE_MNSET_NOTRANS:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "Trans metadevice not supported on multi-owner set"));
+		break;
+	case MDE_MNSET_NORAID:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "RAID-5 metadevice not supported on multi-owner set"));
+		break;
+	case MDE_FORCE_DEL_ALL_DRV:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "Must specify -f option to delete all drives from set"));
+		break;
+	case MDE_STRIPE_TRUNC_SINGLE:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "The necessary rounding would result in data loss.  "
+		    "You can avoid this by concatenating additional devices "
+		    "totaling at least %s blocks, or by increasing the size "
+		    "of the specified component by exactly %s blocks."),
+		    ep->extra, ep->extra);
+		break;
+	case MDE_STRIPE_TRUNC_MULTIPLE:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "The necessary rounding would result in data loss.  "
+		    "You can avoid this by concatenating additional devices "
+		    "totaling at least %s blocks."), ep->extra);
+		break;
+	case MDE_SMF_FAIL:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "failed to enable/disable SVM service"));
+		break;
+	case MDE_SMF_NO_SERVICE:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "service(s) not online in SMF"));
+		break;
+	default:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "unknown void error code %d"), ip->errnum);
+		break;
+	}
+
+	return (buf);
+}
+
+/*
+ * print sys class errors
+ */
+static char *
+sys_to_str(
+	md_error_t	*ep,
+	char		*buf,
+	size_t		size
+)
+{
+	md_sys_error_t	*ip = &ep->info.md_error_info_t_u.sys_error;
+	char		*emsg;
+	char		*p = buf + strlen(buf);
+	size_t		psize = size - strlen(buf);
+
+	if ((emsg = strerror(ip->errnum)) == NULL) {
+		(void) snprintf(p, psize,
+		    dgettext(TEXT_DOMAIN, "unknown errno %d out of range"),
+		    ip->errnum);
+	} else {
+		(void) snprintf(p, psize, "%s", emsg);
+	}
+
+	return (buf);
+}
+
+/*
+ * print RPC class errors
+ */
+static char *
+rpc_to_str(
+	md_error_t	*ep,
+	char		*buf,
+	size_t		size
+)
+{
+	md_rpc_error_t	*ip = &ep->info.md_error_info_t_u.rpc_error;
+	char		*p = buf + strlen(buf);
+	size_t		psize = size - strlen(buf);
+
+	(void) snprintf(p, psize, "%s", clnt_sperrno(ip->errnum));
+	return (buf);
+}
+
+/*
+ * print dev class errors
+ */
+static char *
+dev_to_str(
+	md_error_t	*ep,
+	char		*buf,
+	size_t		size
+)
+{
+	md_dev_error_t	*ip = &ep->info.md_error_info_t_u.dev_error;
+	char		*p = buf + strlen(buf);
+	size_t		psize = size - strlen(buf);
+
+	switch (ip->errnum) {
+	case MDE_INVAL_HS:
+		(void) snprintf(p, psize,
+		    dgettext(TEXT_DOMAIN, "hotspare doesn't exist"));
+		break;
+	case MDE_FIX_INVAL_STATE:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "cannot enable hotspared device"));
+		break;
+	case MDE_FIX_INVAL_HS_STATE:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "hotspare isn't broken, can't enable"));
+		break;
+	case MDE_NOT_META:
+		(void) snprintf(p, psize,
+		    dgettext(TEXT_DOMAIN, "not a metadevice"));
+		break;
+	case MDE_IS_DUMP:
+		(void) snprintf(p, psize,
+		    dgettext(TEXT_DOMAIN, "is a dump device"));
+		break;
+	case MDE_IS_META:
+		(void) snprintf(p, psize,
+		    dgettext(TEXT_DOMAIN, "is a metadevice"));
+		break;
+	case MDE_IS_SWAPPED:
+		(void) snprintf(p, psize,
+		    dgettext(TEXT_DOMAIN, "is swapped on"));
+		break;
+	case MDE_NAME_SPACE:
+		(void) snprintf(p, psize,
+		    dgettext(TEXT_DOMAIN, "namespace error"));
+		break;
+	case MDE_IN_SHARED_SET:
+		(void) snprintf(p, psize,
+		    dgettext(TEXT_DOMAIN, "device in shared set"));
+		break;
+	case MDE_NOT_IN_SET:
+		(void) snprintf(p, psize,
+		    dgettext(TEXT_DOMAIN, "device not in set"));
+		break;
+	case MDE_NOT_DISK:
+		(void) snprintf(p, psize,
+		    dgettext(TEXT_DOMAIN, "not a disk device"));
+		break;
+	case MDE_CANT_CONFIRM:
+		(void) snprintf(p, psize,
+		    dgettext(TEXT_DOMAIN, "can't confirm device"));
+		break;
+	case MDE_INVALID_PART:
+		(void) snprintf(p, psize,
+		    dgettext(TEXT_DOMAIN, "invalid partition"));
+		break;
+	case MDE_HAS_MDDB:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "has a metadevice database replica"));
+		break;
+	case MDE_NO_DB:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "no metadevice database replica on device"));
+		break;
+	case MDE_CANTVERIFY_VTOC:
+		(void) snprintf(p, psize,
+		    dgettext(TEXT_DOMAIN, "unable to verify the vtoc"));
+		break;
+	case MDE_NOT_LOCAL:
+		(void) snprintf(p, psize,
+		    dgettext(TEXT_DOMAIN, "not in local set"));
+		break;
+	case MDE_DEVICES_NAME:
+		(void) snprintf(p, psize,
+		    dgettext(TEXT_DOMAIN, "can't parse /devices name"));
+		break;
+	case MDE_REPCOMP_INVAL:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "replica slice is not usable as a metadevice component"));
+		break;
+	case MDE_REPCOMP_ONLY:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "only replica slice is usable for a diskset "
+		    "database replica"));
+		break;
+	case MDE_INV_ROOT:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "invalid root device for this operation"));
+		break;
+	case MDE_MULTNM:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "multiple entries for device in Solaris Volume Manager "
+		    "configuration"));
+		break;
+	case MDE_TOO_MANY_PARTS:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "Disks with more than %d partitions are not supported "
+		    "in Solaris Volume Manager"), MD_MAX_PARTS);
+		break;
+	case MDE_REPART_REPLICA:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "cannot repartition a slice with an existing replica"));
+		break;
+	default:
+		(void) snprintf(p, psize,
+		    dgettext(TEXT_DOMAIN, "unknown dev error code %d"),
+		    ip->errnum);
+		break;
+	}
+
+	return (buf);
+}
+
+/*
+ * print overlap class errors
+ */
+static char *
+overlap_to_str(
+	md_error_t	*ep,
+	char		*buf,
+	size_t		size
+)
+{
+	md_overlap_error_t	*ip =
+			&ep->info.md_error_info_t_u.overlap_error;
+	char		*p = buf + strlen(buf);
+	size_t		psize = size - strlen(buf);
+
+	switch (ip->errnum) {
+	case MDE_OVERLAP_MOUNTED:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "overlaps with %s which is mounted as \'%s\'"),
+			ip->overlap, ip->where);
+		break;
+	case MDE_OVERLAP_SWAP:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "overlaps with %s which is a swap device"), ip->overlap);
+		break;
+	case MDE_OVERLAP_DUMP:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "overlaps with %s which is the dump device"), ip->overlap);
+		break;
+	default:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "unknown overlap error code %d"), ip->errnum);
+		break;
+	}
+
+	return (buf);
+}
+
+/*
+ * print use class errors
+ */
+static char *
+use_to_str(
+	md_error_t	*ep,
+	char		*buf,
+	size_t		size
+)
+{
+	md_use_error_t	*ip = &ep->info.md_error_info_t_u.use_error;
+	char		*p = buf + strlen(buf);
+	size_t		psize = size - strlen(buf);
+
+	switch (ip->errnum) {
+	case MDE_IS_MOUNTED:
+		(void) snprintf(p, psize,
+		    dgettext(TEXT_DOMAIN, "is mounted on %s"),
+		    ip->where);
+		break;
+	case MDE_ALREADY:
+		/*
+		 * when the object of the error (existing device that
+		 * would being used by SVM) is the metadb then it is necessary
+		 * to explicitly specify the string in the error message so
+		 * that it can be successfully localized for the Asian locales.
+		 */
+		if (strcmp(ip->where, MDB_STR) != 0) {
+			(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+				"has appeared more than once in the "
+				"specification of %s"), ip->where);
+		} else {
+			(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+				"has appeared more than once in the "
+				"specification of " MDB_STR));
+		}
+		break;
+	case MDE_OVERLAP:
+		/*
+		 * when the object of the error (existing device that
+		 * would overlap) is the metadb then it is necessary
+		 * to explicitly specify the string in the error message so
+		 * that it can be successfully localized for the Asian locales.
+		 */
+		if (strcmp(ip->where, MDB_STR) != 0) {
+			(void) snprintf(p, psize,
+			    dgettext(TEXT_DOMAIN, "overlaps with device in %s"),
+			    ip->where);
+		} else {
+			(void) snprintf(p, psize,
+			    dgettext(TEXT_DOMAIN, "overlaps with device in "
+			    MDB_STR));
+		}
+		break;
+	default:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "unknown dev error code %d"), ip->errnum);
+		break;
+	}
+
+	return (buf);
+}
+
+/*
+ * print md class errors
+ */
+static char *
+md_to_str(
+	md_error_t	*ep,
+	char		*buf,
+	size_t		size
+)
+{
+	md_md_error_t	*ip = &ep->info.md_error_info_t_u.md_error;
+	char		*p = buf + strlen(buf);
+	size_t		psize = size - strlen(buf);
+
+	switch (ip->errnum) {
+	case MDE_INVAL_UNIT:
+		(void) snprintf(p, psize,
+		    dgettext(TEXT_DOMAIN, "invalid unit"));
+		break;
+	case MDE_UNIT_NOT_SETUP:
+		(void) snprintf(p, psize,
+		    dgettext(TEXT_DOMAIN, "unit not set up"));
+		break;
+	case MDE_UNIT_ALREADY_SETUP:
+		(void) snprintf(p, psize,
+		    dgettext(TEXT_DOMAIN, "unit already set up"));
+		break;
+	case MDE_NOT_MM:
+		(void) snprintf(p, psize,
+		    dgettext(TEXT_DOMAIN, "unit is not a mirror"));
+		break;
+	case MDE_IS_SM:
+		(void) snprintf(p, psize,
+		    dgettext(TEXT_DOMAIN, "illegal to clear submirror"));
+		break;
+	case MDE_IS_OPEN:
+		(void) snprintf(p, psize,
+		    dgettext(TEXT_DOMAIN, "metadevice is open"));
+		break;
+	case MDE_C_WITH_INVAL_SM:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "attempted to clear mirror with submirror(s) "
+		    "in invalid state"));
+		break;
+	case MDE_RESYNC_ACTIVE:
+		(void) snprintf(p, psize,
+		    dgettext(TEXT_DOMAIN, "resync in progress"));
+		break;
+	case MDE_LAST_SM_RE:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "attempt to replace a component on the last "
+		    "running submirror"));
+		break;
+	case MDE_MIRROR_FULL:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "mirror has maximum number of submirrors"));
+		break;
+	case MDE_IN_UNAVAIL_STATE:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "component is in unavailable state; run 'metastat -i'"));
+		break;
+	case MDE_IN_USE:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "metadevice in use"));
+		break;
+	case MDE_SM_TOO_SMALL:
+		(void) snprintf(p, psize,
+		    dgettext(TEXT_DOMAIN, "submirror too small to attach"));
+		break;
+	case MDE_NO_LABELED_SM:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "can't attach labeled submirror to an unlabeled mirror"));
+		break;
+	case MDE_SM_OPEN_ERR:
+		(void) snprintf(p, psize,
+		    dgettext(TEXT_DOMAIN, "submirror open error"));
+		break;
+	case MDE_CANT_FIND_SM:
+		(void) snprintf(p, psize,
+		    dgettext(TEXT_DOMAIN, "can't find submirror in mirror"));
+		break;
+	case MDE_LAST_SM:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+			"attempt to detach last running submirror"));
+		break;
+	case MDE_NO_READABLE_SM:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "operation would result in no readable submirrors"));
+		break;
+	case MDE_SM_FAILED_COMPS:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "attempt an operation on a submirror "
+		    "that has erred components"));
+		break;
+	case MDE_ILLEGAL_SM_STATE:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "attempt operation on a submirror in illegal state"));
+		break;
+	case MDE_RR_ALLOC_ERROR:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "attach failed, unable to allocate new resync info"));
+		break;
+	case MDE_MIRROR_OPEN_FAILURE:
+		(void) snprintf(p, psize,
+		    dgettext(TEXT_DOMAIN, "insufficient devices to open"));
+		break;
+	case MDE_MIRROR_THREAD_FAILURE:
+		(void) snprintf(p, psize,
+		    dgettext(TEXT_DOMAIN, "mirror thread failure"));
+		break;
+	case MDE_GROW_DELAYED:
+		(void) snprintf(p, psize,
+		    dgettext(TEXT_DOMAIN, "growing of metadevice delayed"));
+		break;
+	case MDE_NOT_MT:
+		(void) snprintf(p, psize,
+		    dgettext(TEXT_DOMAIN, "unit is not a trans"));
+		break;
+	case MDE_HS_IN_USE:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "can't modify hot spare pool, hot spare in use"));
+		break;
+	case MDE_HAS_LOG:
+		(void) snprintf(p, psize,
+		    dgettext(TEXT_DOMAIN, "already has log"));
+		break;
+	case MDE_UNKNOWN_TYPE:
+		(void) snprintf(p, psize,
+		    dgettext(TEXT_DOMAIN, "unknown metadevice type"));
+		break;
+	case MDE_NOT_STRIPE:
+		(void) snprintf(p, psize,
+		    dgettext(TEXT_DOMAIN, "unit is not a concat/stripe"));
+		break;
+	case MDE_NOT_RAID:
+		(void) snprintf(p, psize,
+		    dgettext(TEXT_DOMAIN, "unit is not a RAID"));
+		break;
+	case MDE_NROWS:
+		(void) snprintf(p, psize,
+		    dgettext(TEXT_DOMAIN, "not enough stripes specified"));
+		break;
+	case MDE_NCOMPS:
+		(void) snprintf(p, psize,
+		    dgettext(TEXT_DOMAIN, "not enough components specified"));
+		break;
+	case MDE_NSUBMIRS:
+		(void) snprintf(p, psize,
+		    dgettext(TEXT_DOMAIN, "not enough submirrors specified"));
+		break;
+	case MDE_BAD_STRIPE:
+		(void) snprintf(p, psize,
+		    dgettext(TEXT_DOMAIN, "invalid stripe configuration"));
+		break;
+	case MDE_BAD_MIRROR:
+		(void) snprintf(p, psize,
+		    dgettext(TEXT_DOMAIN, "invalid mirror configuration"));
+		break;
+	case MDE_BAD_TRANS:
+		(void) snprintf(p, psize,
+		    dgettext(TEXT_DOMAIN, "invalid trans configuration"));
+		break;
+	case MDE_BAD_RAID:
+		(void) snprintf(p, psize,
+		    dgettext(TEXT_DOMAIN, "invalid RAID configuration"));
+		break;
+	case MDE_RAID_OPEN_FAILURE:
+		(void) snprintf(p, psize,
+		    dgettext(TEXT_DOMAIN, "resync unable to open RAID unit"));
+		break;
+	case MDE_RAID_THREAD_FAILURE:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "attempt to start resync thread failed"));
+		break;
+	case MDE_RAID_NEED_FORCE:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "operation requires -f (force) flag"));
+		break;
+	case MDE_NO_LOG:
+		(void) snprintf(p, psize,
+		    dgettext(TEXT_DOMAIN, "log has already been detached"));
+		break;
+	case MDE_RAID_DOI:
+		(void) snprintf(p, psize,
+		    dgettext(TEXT_DOMAIN, "only valid action is metaclear"));
+		break;
+	case MDE_RAID_LAST_ERRED:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "in Last Erred state, "
+		    "errored components must be replaced"));
+		break;
+	case MDE_RAID_NOT_OKAY:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "all components must be Okay to perform this operation"));
+		break;
+	case MDE_RENAME_BUSY:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "metadevice is temporarily too busy for renames"));
+		break;
+	case MDE_RENAME_SOURCE_BAD:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "source metadevice is not able to be renamed"));
+		break;
+	case MDE_RENAME_TARGET_BAD:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "target metadevice is not able to be renamed"));
+		break;
+	case MDE_RENAME_TARGET_UNRELATED:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "target metadevice is not related to source metadevice"));
+		break;
+	case MDE_RENAME_CONFIG_ERROR:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "metadevice driver configuration error; "
+		    "rename can't occur"));
+		break;
+	case MDE_RENAME_ORDER:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "units may not be renamed in that order"));
+		break;
+	case MDE_RECOVER_FAILED:
+		(void) snprintf(p, psize,
+		    dgettext(TEXT_DOMAIN, "recovery failed"));
+		break;
+	case MDE_SP_NOSPACE:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "not enough space available for request"));
+		break;
+	case MDE_SP_BADWMREAD:
+		(void) snprintf(p, psize,
+		    dgettext(TEXT_DOMAIN, "error reading extent header"));
+		break;
+	case MDE_SP_BADWMWRITE:
+		(void) snprintf(p, psize,
+		    dgettext(TEXT_DOMAIN, "error writing extent header"));
+		break;
+	case MDE_SP_BADWMMAGIC:
+		(void) snprintf(p, psize,
+		    dgettext(TEXT_DOMAIN, "bad magic number in extent header"));
+		break;
+	case MDE_SP_BADWMCRC:
+		(void) snprintf(p, psize,
+		    dgettext(TEXT_DOMAIN, "bad checksum in extent header"));
+		break;
+	case MDE_NOT_SP:
+		(void) snprintf(p, psize,
+		    dgettext(TEXT_DOMAIN, "unit is not a soft partition"));
+		break;
+	case MDE_SP_OVERLAP:
+		(void) snprintf(p, psize,
+		    dgettext(TEXT_DOMAIN, "overlapping extents specified"));
+		break;
+	case MDE_SP_BAD_LENGTH:
+		(void) snprintf(p, psize,
+		    dgettext(TEXT_DOMAIN, "bad length specified"));
+		break;
+	case MDE_SP_NOSP:
+		(void) snprintf(p, psize,
+		    dgettext(TEXT_DOMAIN, "no soft partitions on this device"));
+		break;
+	case MDE_UNIT_TOO_LARGE:
+		(void) snprintf(p, psize,
+		    dgettext(TEXT_DOMAIN, "Volume size cannot exceed 1 TByte"));
+		break;
+	case MDE_LOG_TOO_LARGE:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "Trans log size must be less than 1 TByte"));
+		break;
+	default:
+		(void) snprintf(p, psize,
+		    dgettext(TEXT_DOMAIN, "unknown md error code %d"),
+		    ip->errnum);
+		break;
+	}
+
+	return (buf);
+}
+
+/*
+ * print comp class errors
+ */
+static char *
+comp_to_str(
+	md_error_t	*ep,
+	char		*buf,
+	size_t		size
+)
+{
+	md_comp_error_t	*ip = &ep->info.md_error_info_t_u.comp_error;
+	char		*p = buf + strlen(buf);
+	size_t		psize = size - strlen(buf);
+
+	switch (ip->errnum) {
+	case MDE_CANT_FIND_COMP:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "can't find component in unit"));
+		break;
+	case MDE_REPL_INVAL_STATE:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "component in invalid state to replace - "
+		    "Replace \"Maintenance\" components first"));
+		break;
+	case MDE_COMP_TOO_SMALL:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "replace failure, new component is too small"));
+		break;
+	case MDE_COMP_OPEN_ERR:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "unable to open concat/stripe component"));
+		break;
+	case MDE_RAID_COMP_ERRED:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "must replace errored component first"));
+		break;
+	case MDE_MAXIO:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "maxtransfer is too small"));
+		break;
+	case MDE_SP_COMP_OPEN_ERR:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "error opening device under soft partition. Check"
+		    " device status, then use metadevadm(1M)."));
+		break;
+	default:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "unknown comp error code %d"), ip->errnum);
+		break;
+	}
+
+	return (buf);
+}
+
+/*
+ * print hsp class errors
+ */
+static char *
+hsp_to_str(
+	md_error_t	*ep,
+	char		*buf,
+	size_t		size
+)
+{
+	md_hsp_error_t	*ip = &ep->info.md_error_info_t_u.hsp_error;
+	char		*p = buf + strlen(buf);
+	size_t		psize = size - strlen(buf);
+
+	switch (ip->errnum) {
+	case MDE_HSP_CREATE_FAILURE:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "hotspare pool database create failure"));
+		break;
+	case MDE_HSP_IN_USE:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "hotspare pool in use"));
+		break;
+	case MDE_INVAL_HSP:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "invalid hotspare pool"));
+		break;
+	case MDE_HSP_BUSY:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "hotspare pool is busy"));
+		break;
+	case MDE_HSP_REF:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "hotspare pool is referenced"));
+		break;
+	case MDE_HSP_ALREADY_SETUP:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "hotspare pool is already setup"));
+		break;
+	case MDE_BAD_HSP:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "invalid hotspare pool configuration"));
+		break;
+	case MDE_HSP_UNIT_TOO_LARGE:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "units in the hotspare pool cannot exceed 1 TByte"));
+		break;
+	default:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "unknown hsp error code %d"), ip->errnum);
+		break;
+	}
+
+	return (buf);
+}
+
+/*
+ * print hs class errors
+ */
+static char *
+hs_to_str(
+	md_error_t	*ep,
+	char		*buf,
+	size_t		size
+)
+{
+	md_hs_error_t	*ip = &ep->info.md_error_info_t_u.hs_error;
+	char		*p = buf + strlen(buf);
+	size_t		psize = size - strlen(buf);
+
+	switch (ip->errnum) {
+	case MDE_HS_RESVD:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "hotspare is in use"));
+		break;
+	case MDE_HS_CREATE_FAILURE:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "hotspare database create failure"));
+		break;
+	case MDE_HS_INUSE:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "add or replace failed, hot spare is already in use"));
+		break;
+	case MDE_HS_UNIT_TOO_LARGE:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "hotspare size cannot exceed 1 TByte"));
+		break;
+	default:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "unknown hs error code %d"), ip->errnum);
+		break;
+	}
+
+	return (buf);
+}
+
+/*
+ * print mddb class errors
+ */
+static char *
+mddb_to_str(
+	md_error_t	*ep,
+	char		*buf,
+	size_t		size
+)
+{
+	md_mddb_error_t	*ip = &ep->info.md_error_info_t_u.mddb_error;
+	char		*p = buf + strlen(buf);
+	size_t		psize = size - strlen(buf);
+
+	switch (ip->errnum) {
+	case MDE_TOOMANY_REPLICAS:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+	"%d metadevice database replicas is too many; the maximum is %d"),
+		    ip->size, MDDB_NLB);
+		break;
+	case MDE_REPLICA_TOOSMALL:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+	"device size %d is too small for metadevice database replica"),
+		    ip->size);
+		break;
+	case MDE_NOTVERIFIED:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "data not returned correctly from disk"));
+		break;
+	case MDE_DB_INVALID:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "invalid argument"));
+		break;
+	case MDE_DB_EXISTS:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "metadevice database replica exists on device"));
+		break;
+	case MDE_DB_MASTER:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "has bad master block on device"));
+		break;
+	case MDE_DB_TOOSMALL:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "device is too small"));
+		break;
+	case MDE_DB_NORECORD:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "no such metadevice database record"));
+		break;
+	case MDE_DB_NOSPACE:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "metadevice database is full, can't create new records"));
+		break;
+	case MDE_DB_NOTNOW:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "metadevice database has too few replicas, for "
+		    "metadevice database operation"));
+		break;
+	case MDE_DB_NODB:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "there are no existing databases"));
+		break;
+	case MDE_DB_NOTOWNER:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "not owner of metadevice database"));
+		break;
+	case MDE_DB_STALE:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "stale databases"));
+		break;
+	case MDE_DB_TOOFEW:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "not enough databases"));
+		break;
+	case MDE_DB_TAGDATA:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "tagged data detected, user intervention required"));
+		break;
+	case MDE_DB_ACCOK:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "50% replicas & 50% mediator hosts available, "
+		    "user intervention required"));
+		break;
+	case MDE_DB_NTAGDATA:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "no tagged data available or only one tag found"));
+		break;
+	case MDE_DB_ACCNOTOK:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "50% replicas & 50% mediator hosts not available"));
+		break;
+	case MDE_DB_NOLOCBLK:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "no valid locator blocks were found"));
+		break;
+	case MDE_DB_NOLOCNMS:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "no valid locator name information was found"));
+		break;
+	case MDE_DB_NODIRBLK:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "no valid directory blocks were found"));
+		break;
+	case MDE_DB_NOTAGREC:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "no tag record was allocated, so data "
+		    "tagging is disabled"));
+		break;
+	case MDE_DB_NOTAG:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "no tag records exist or no matching tag was found"));
+		break;
+	case MDE_DB_BLKRANGE:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "logical block number %d out of range"), ip->size);
+		break;
+	default:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "unknown mddb error code %d"), ip->errnum);
+		break;
+	}
+
+	return (buf);
+}
+
+/*
+ * print diskset (ds) class errors
+ */
+static char *
+ds_to_str(
+	md_error_t	*ep,
+	char		*buf,
+	size_t		size
+)
+{
+	md_ds_error_t	*ip = &ep->info.md_error_info_t_u.ds_error;
+	char		*p = buf + strlen(buf);
+	size_t		psize = size - strlen(buf);
+
+	switch (ip->errnum) {
+	case MDE_DS_DUPHOST:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "host %s is specified more than once"), ip->node);
+		break;
+	case MDE_DS_NOTNODENAME:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "\"%s\" is not a nodename, but a network name"), ip->node);
+		break;
+	case MDE_DS_SELFNOTIN:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "nodename of host %s creating the set must be included"),
+		    ip->node);
+		break;
+	case MDE_DS_NODEHASSET:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "host %s already has set"), ip->node);
+		break;
+	case MDE_DS_NODENOSET:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "host %s does not have set"), ip->node);
+		break;
+	case MDE_DS_NOOWNER:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "must be owner of the set for this command"));
+		break;
+	case MDE_DS_NOTOWNER:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "only the current owner %s may operate on this set"),
+		    ip->node);
+		break;
+	case MDE_DS_NODEISNOTOWNER:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "host %s is not the owner"), ip->node);
+		break;
+	case MDE_DS_NODEINSET:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "host %s is already in the set"), ip->node);
+		break;
+	case MDE_DS_NODENOTINSET:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "host %s is not in the set"), ip->node);
+		break;
+	case MDE_DS_SETNUMBUSY:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "host %s already has a set numbered %ld"),
+		    ip->node, ip->setno);
+		break;
+	case MDE_DS_SETNUMNOTAVAIL:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "no available set numbers"));
+		break;
+	case MDE_DS_SETNAMEBUSY:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "set name is in-use or invalid on host %s"), ip->node);
+		break;
+	case MDE_DS_DRIVENOTCOMMON:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "drive %s is not common with host %s"),
+		    ip->drive, ip->node);
+		break;
+	case MDE_DS_DRIVEINSET:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "drive %s is in set %s"), ip->drive, ip->node);
+		break;
+	case MDE_DS_DRIVENOTINSET:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "drive %s is not in set"), ip->drive);
+		break;
+	case MDE_DS_DRIVEINUSE:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "drive %s is in use"), ip->drive);
+		break;
+	case MDE_DS_DUPDRIVE:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "drive %s is specified more than once"), ip->drive);
+		break;
+	case MDE_DS_INVALIDSETNAME:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "set name contains invalid characters"));
+		break;
+	case MDE_DS_HASDRIVES:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "unable to delete set, it still has drives"));
+		break;
+	case MDE_DS_SIDENUMNOTAVAIL:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "maximum number of nodenames exceeded"));
+		break;
+	case MDE_DS_SETNAMETOOLONG:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "set name is too long"));
+		break;
+	case MDE_DS_NODENAMETOOLONG:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "host name %s is too long"), ip->node);
+		break;
+	case MDE_DS_OHACANTDELSELF:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+"administrator host %s deletion disallowed in one host admin mode"),
+		    ip->node);
+		break;
+	case MDE_DS_HOSTNOSIDE:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "side information missing for host %s"), ip->node);
+		break;
+	case MDE_DS_SETLOCKED:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+	    "host %s is modifying set - try later or restart rpc.metad"),
+		    ip->drive);
+		break;
+	case MDE_DS_ULKSBADKEY:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "set unlock failed - bad key"));
+		break;
+	case MDE_DS_LKSBADKEY:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "set lock failed - bad key"));
+		break;
+	case MDE_DS_WRITEWITHSULK:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "write operation attempted on set with set unlocked"));
+		break;
+	case MDE_DS_SETCLEANUP:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "set \"%s\" is out of date - cleaning up - take failed"),
+		    ip->node);
+		break;
+	case MDE_DS_CANTDELSELF:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+"administrator host %s can't be deleted, other hosts still in set\n"
+"Use -f to override"), ip->node);
+		break;
+	case MDE_DS_HASMED:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "unable to delete set, it still has mediator hosts"));
+		break;
+	case MDE_DS_TOOMANYALIAS:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "%s causes there to be more aliases than allowed"),
+		    ip->node);
+		break;
+	case MDE_DS_ISMED:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "%s is already a mediator host"), ip->node);
+		break;
+	case MDE_DS_ISNOTMED:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "%s is not a mediator host"), ip->node);
+		break;
+	case MDE_DS_INVALIDMEDNAME:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "mediator name \"%s\" contains invalid characters"),
+		    ip->node);
+		break;
+	case MDE_DS_ALIASNOMATCH:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "mediator alias \"%s\" is not an alias for host "
+		    "\"%s\""), ip->node, ip->drive);
+		break;
+	case MDE_DS_NOMEDONHOST:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "unable to contact %s on host \"%s\""),
+		    MED_SERVNAME, ip->node);
+		break;
+	case MDE_DS_DRIVENOTONHOST:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "drive %s is not present on host %s"),
+		    ip->drive, ip->node);
+		break;
+	case MDE_DS_CANTDELMASTER:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "master %s can't be deleted, other hosts still in set"),
+		    ip->node);
+		break;
+	case MDE_DS_NOTINMEMBERLIST:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "node %s is not in membership list"),
+		    ip->node);
+		break;
+	case MDE_DS_MNCANTDELSELF:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "host %s can't delete self from multi-owner set\n"
+		    "while other hosts still in set"),
+		    ip->node);
+		break;
+	case MDE_DS_RPCVERSMISMATCH:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "host %s does not support multi-owner diskset"),
+		    ip->node);
+		break;
+	case MDE_DS_WITHDRAWMASTER:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "master host %s cannot withdraw from multi-owner diskset "
+		    "when other owner nodes are still present in diskset"),
+		    ip->node);
+		break;
+	case MDE_DS_CANTRESNARF:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "imported set could not be loaded"));
+		break;
+	case MDE_DS_INSUFQUORUM:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "insufficient replica quorum detected. Use "
+		    "-f to force import of the set"));
+		break;
+	case MDE_DS_EXTENDEDNM:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "multiple namespace records detected"));
+		break;
+	case MDE_DS_PARTIALSET:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "partial diskset detected\n"
+		    "Please refer to the Solaris Volume Manager documentation,"
+		    "\nTroubleshooting section, at http://docs.sun.com or from"
+		    "\nyour local copy"));
+		break;
+	case MDE_DS_COMMDCTL_SUSPEND_NYD:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "rpc.mdcommd on host %s is not yet drained during "
+		    "suspend operation"),
+		    ip->node);
+		break;
+	case MDE_DS_COMMDCTL_SUSPEND_FAIL:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "rpc.mdcommd on host %s failed suspend operation"),
+		    ip->node);
+		break;
+	case MDE_DS_COMMDCTL_REINIT_FAIL:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "rpc.mdcommd on host %s failed reinitialization operation"),
+		    ip->node);
+		break;
+	case MDE_DS_COMMDCTL_RESUME_FAIL:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "rpc.mdcommd on host %s failed resume operation"),
+		    ip->node);
+		break;
+	case MDE_DS_NOTNOW_RECONFIG:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "command terminated, host %s starting reconfig cycle"),
+		    ip->node);
+		break;
+	case MDE_DS_NOTNOW_CMD:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "metaset or metadb command already running on diskset "
+		    "on host %s"), ip->node);
+		break;
+	case MDE_DS_COMMD_SEND_FAIL:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "rpc.mdcommd on host %s failed operation"),
+		    ip->node);
+		break;
+	case MDE_DS_MASTER_ONLY:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "this command must be run on the master node of the set,"
+		    " which is currently %s"), ip->node);
+		break;
+	case MDE_DS_SINGLEHOST:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "diskset is auto-take; cannot accept additional hosts"));
+		break;
+	case MDE_DS_AUTONOTSET:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "auto-take is not enabled on diskset"));
+		break;
+	case MDE_DS_INVALIDDEVID:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "Invalid device id on drive %s on host %s"), ip->drive,
+		    ip->node);
+		break;
+	case MDE_DS_SETNOTIMP:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "Unable to import set on node %s"), ip->node);
+		break;
+	case MDE_DS_NOTSELFIDENTIFY:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "Drive %s won't be self identifying"), ip->drive);
+		break;
+	default:
+		(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+		    "unknown diskset error code %d"), ip->errnum);
+		break;
+	}
+
+	return (buf);
+}
+
+/*
+ * convert error to printable string
+ */
+static char *
+mde_to_str(
+	md_error_t	*ep
+)
+{
+	static char	buf[BUFSIZ];
+	size_t		bufsz;
+
+	/* intialize buf */
+	buf[0] = '\0';
+	bufsz  = sizeof (buf);
+
+	/* class specific */
+	switch (ep->info.errclass) {
+	case MDEC_VOID:
+		return (void_to_str(ep, buf, bufsz));
+	case MDEC_SYS:
+		return (sys_to_str(ep, buf, bufsz));
+	case MDEC_RPC:
+		return (rpc_to_str(ep, buf, bufsz));
+	case MDEC_DEV:
+		return (dev_to_str(ep, buf, bufsz));
+	case MDEC_USE:
+		return (use_to_str(ep, buf, bufsz));
+	case MDEC_MD:
+		return (md_to_str(ep, buf, bufsz));
+	case MDEC_COMP:
+		return (comp_to_str(ep, buf, bufsz));
+	case MDEC_HSP:
+		return (hsp_to_str(ep, buf, bufsz));
+	case MDEC_HS:
+		return (hs_to_str(ep, buf, bufsz));
+	case MDEC_MDDB:
+		return (mddb_to_str(ep, buf, bufsz));
+	case MDEC_DS:
+		return (ds_to_str(ep, buf, bufsz));
+	case MDEC_OVERLAP:
+		return (overlap_to_str(ep, buf, bufsz));
+	default:
+		(void) snprintf(buf, bufsz,
+		    dgettext(TEXT_DOMAIN, "unknown error class %d"),
+		    ep->info.errclass);
+		return (buf);
+	}
+}
+
+/*
+ * print log prefix
+ */
+void
+md_logpfx(
+	FILE		*fp
+)
+{
+	time_t		t;
+	struct tm	*tm;
+	char		buf[100];
+
+	if ((time(&t) != (time_t)-1) &&
+	    ((tm = localtime(&t)) != NULL) &&
+	    (strftime(buf, sizeof (buf), (char *)0, tm) < sizeof (buf))) {
+		(void) fprintf(fp, "%s: ", buf);
+	}
+	(void) fprintf(fp, "%s: ", myname);
+}
+
+/*
+ * varargs sperror()
+ */
+/*PRINTFLIKE2*/
+static char *
+mde_vsperror(
+	md_error_t	*ep,
+	const char	*fmt,
+	va_list		ap
+)
+{
+	static char	buf[BUFSIZ];
+	size_t		bufsz = sizeof (buf);
+	char		*p = buf;
+	char		*host1 = "";
+	char		*host2 = "";
+	char		*extra1 = "";
+	char		*extra2 = "";
+	char		*name1 = "";
+	char		*name2 = "";
+
+	/* get stuff */
+	if ((ep->host != NULL) && (*(ep->host) != '\0')) {
+		host1 = ep->host;
+		host2 = ": ";
+	}
+	if ((ep->extra != NULL) && (*(ep->extra) != '\0')) {
+		extra1 = ep->extra;
+		extra2 = ": ";
+	}
+	if ((ep->name != NULL) && (*(ep->name) != '\0')) {
+		name1 = ep->name;
+		name2 = ": ";
+	}
+
+	/* context */
+	(void) snprintf(p, bufsz, "%s%s%s%s%s%s",
+	    host1, host2, extra1, extra2, name1, name2);
+	p = &buf[strlen(buf)];
+	bufsz -= strlen(buf);
+
+	/* user defined part */
+	if ((fmt != NULL) && (*fmt != '\0')) {
+		(void) vsnprintf(p, bufsz, fmt, ap);
+		p = &buf[strlen(buf)];
+		bufsz = sizeof (buf) - strlen(buf);
+		(void) snprintf(p, bufsz, ": ");
+		p = &buf[strlen(buf)];
+		bufsz = sizeof (buf) - strlen(buf);
+	}
+
+	/* error code */
+	(void) snprintf(p, bufsz, "%s\n", mde_to_str(ep));
+
+	/* return error message */
+	return (buf);
+}
+
+/*
+ * printf-like sperror()
+ */
+/*PRINTFLIKE2*/
+char *
+mde_sperror(
+	md_error_t	*ep,
+	const char	*fmt,
+	...
+)
+{
+	va_list		ap;
+	char		*emsg;
+
+	va_start(ap, fmt);
+	emsg = mde_vsperror(ep, fmt, ap);
+	va_end(ap);
+	return (emsg);
+}
+
+/*
+ * printf-like perror()
+ */
+/*PRINTFLIKE2*/
+void
+mde_perror(
+	md_error_t	*ep,
+	const char	*fmt,
+	...
+)
+{
+	va_list		ap;
+	char		*emsg;
+
+	/* get error message */
+	va_start(ap, fmt);
+	emsg = mde_vsperror(ep, fmt, ap);
+	va_end(ap);
+	assert((emsg != NULL) && (*emsg != '\0'));
+
+	/* stderr */
+	(void) fprintf(stderr, "%s: %s\n", myname, emsg);
+	(void) fflush(stderr);
+
+	/* metalog */
+	if (metalogfp != NULL) {
+		md_logpfx(metalogfp);
+		(void) fprintf(metalogfp, "%s\n", emsg);
+		(void) fflush(metalogfp);
+		(void) fsync(fileno(metalogfp));
+	}
+
+	/* syslog */
+	if (metasyslog) {
+		syslog(LOG_ERR, emsg);
+	}
+}
+
+/*
+ * printf-like perror()
+ */
+/*PRINTFLIKE1*/
+void
+md_perror(
+	const char	*fmt,
+	...
+)
+{
+	md_error_t	status = mdnullerror;
+	va_list		ap;
+	char		*emsg;
+
+	/* get error message */
+	(void) mdsyserror(&status, errno, NULL);
+	va_start(ap, fmt);
+	emsg = mde_vsperror(&status, fmt, ap);
+	va_end(ap);
+	assert((emsg != NULL) && (*emsg != '\0'));
+	mdclrerror(&status);
+
+	/* stderr */
+	(void) fprintf(stderr, "%s: %s\n", myname, emsg);
+	(void) fflush(stderr);
+
+	/* metalog */
+	if (metalogfp != NULL) {
+		md_logpfx(metalogfp);
+		(void) fprintf(metalogfp, "%s\n", emsg);
+		(void) fflush(metalogfp);
+		(void) fsync(fileno(metalogfp));
+	}
+
+	/* syslog */
+	if (metasyslog) {
+		syslog(LOG_ERR, emsg);
+	}
+}
+
+/*
+ * printf-like log
+ */
+/*PRINTFLIKE1*/
+void
+md_eprintf(
+	const char	*fmt,
+	...
+)
+{
+	va_list		ap;
+
+	/* begin */
+	va_start(ap, fmt);
+
+	/* stderr */
+	(void) fprintf(stderr, "%s: ", myname);
+	(void) vfprintf(stderr, fmt, ap);
+	(void) fflush(stderr);
+
+	/* metalog */
+	if (metalogfp != NULL) {
+		md_logpfx(metalogfp);
+		(void) vfprintf(metalogfp, fmt, ap);
+		(void) fflush(metalogfp);
+		(void) fsync(fileno(metalogfp));
+	}
+
+	/* syslog */
+	if (metasyslog) {
+		vsyslog(LOG_ERR, fmt, ap);
+	}
+
+	/* end */
+	va_end(ap);
+}
+
+/*
+ * metaclust timing messages logging routine
+ *
+ * level	- The class of the message to be logged. Message will be logged
+ *		  if this is less than or equal to the verbosity level.
+ */
+void
+meta_mc_log(int level, const char *fmt, ...)
+{
+	va_list	args;
+
+	va_start(args, fmt);
+	/*
+	 * Log all messages upto MC_LOG2 to syslog regardless of the
+	 * verbosity level
+	 */
+	if (metasyslog && (level <= MC_LOG2)) {
+		if (level <= MC_LOG1)
+			(void) vsyslog(LOG_ERR, fmt, args);
+		else
+			(void) vsyslog(LOG_INFO, fmt, args);
+	}
+	/*
+	 * Print all messages to stderr provided the message level is
+	 * within the verbosity level
+	 */
+	if (level <= verbosity) {
+		(void) fprintf(stderr, "%s: ", myname);
+		(void) vfprintf(stderr, fmt, args);
+		(void) fprintf(stderr, "\n");
+		(void) fflush(stderr);
+	}
+	va_end(args);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_getdevs.c b/usr/src/lib/lvm/libmeta/common/meta_getdevs.c
new file mode 100644
index 0000000000..af828bd083
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_getdevs.c
@@ -0,0 +1,592 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+/*
+ * get dev_t list
+ */
+
+#include <meta.h>
+
+#include <sys/mhd.h>
+#include <strings.h>
+
+/*
+ * private version of minor(), able to handle 64 bit and 32 bit devices.
+ * print a warning out in case a 32 bit dev is specified.
+ */
+minor_t
+meta_getminor(md_dev64_t dev64)
+{
+	/* check if it's a real 64 bit dev */
+	if ((dev64 >> NBITSMAJOR64) > 0) {
+		return ((minor_t)(dev64 & MAXMIN64));
+	} else {
+		if (getenv("META_DEBUG"))
+			(void) printf(
+			    "meta_getminor called with 32 bit dev: 0x%llx\n",
+				dev64);
+		return ((minor_t)(dev64 & MAXMIN32));
+	}
+}
+
+/*
+ * private version of major(), able to handle 64 bit and 32 bit devices.
+ * print a warning out in case a 32 bit dev is specified.
+ */
+major_t
+meta_getmajor(md_dev64_t dev64)
+{
+	/* check if it's a real 64 bit dev */
+	if ((dev64 >> NBITSMAJOR64) > 0) {
+		return ((major_t)((dev64 >> NBITSMINOR64) & MAXMAJ64));
+	} else {
+		if (getenv("META_DEBUG"))
+			(void) printf(
+			    "meta_getmajor called with 32 bit dev: 0x%llx\n",
+			dev64);
+		return ((major_t)((dev64 >> NBITSMINOR32) & MAXMAJ32));
+	}
+}
+
+/*
+ * private version of cmpldev(), able to handle 64 bit and 32 bit devices.
+ */
+dev32_t
+meta_cmpldev(md_dev64_t dev64)
+{
+	minor_t minor;
+	major_t major;
+
+	major = (major_t)(dev64 >> NBITSMAJOR64);
+	if (major == 0) {
+		return ((dev32_t)dev64);
+	}
+	minor = (dev32_t)dev64 & MAXMIN32;
+	return ((major << NBITSMINOR32) | minor);
+}
+
+/*
+ * private version of expldev(), able to handle 64 bit and 32 bit devices.
+ */
+md_dev64_t
+meta_expldev(md_dev64_t dev64)
+{
+	minor_t minor;
+	major_t major;
+
+	major = (major_t)(dev64 >> NBITSMAJOR64);
+	if (major > 0) { /* a 64 bit device was given, return unchanged */
+		return (dev64);
+	}
+	minor = (minor_t)(dev64) & MAXMIN32;
+	major = ((major_t)dev64 >> NBITSMINOR32) & MAXMAJ32;
+	return (((md_dev64_t)major << NBITSMINOR64) | minor);
+}
+
+/*
+ * get underlying devices (recursively)
+ */
+int
+meta_getdevs(
+	mdsetname_t		*sp,
+	mdname_t		*namep,
+	mdnamelist_t		**nlpp,
+	md_error_t		*ep
+)
+{
+	char			*miscname;
+	md_dev64_t		*mydevs = NULL;
+	md_getdevs_params_t	mgd;
+	size_t			i;
+	int			rval = -1;
+	md_sys_error_t		*ip;
+
+	/* must have local set */
+	assert(sp != NULL);
+
+	/* just add regular devices */
+	if (! metaismeta(namep)) {
+		mdnamelist_t	*p;
+
+		/*
+		 * If the dev_t is in the array already
+		 * then let's continue.
+		 */
+		for (p = *nlpp; (p != NULL); p = p->next) {
+			if (strcmp(namep->bname, p->namep->bname) == 0) {
+				rval = 0;
+				goto out;
+			}
+		}
+
+		/* add to list */
+		(void) metanamelist_append(nlpp, namep);
+		rval = 0;
+		goto out;
+	}
+
+	/* get MD misc module */
+	if ((miscname = metagetmiscname(namep, ep)) == NULL)
+		goto out;
+
+	/* get count of underlying devices */
+	(void) memset(&mgd, '\0', sizeof (mgd));
+	MD_SETDRIVERNAME(&mgd, miscname, sp->setno);
+	mgd.mnum = meta_getminor(namep->dev);
+	mgd.cnt = 0;
+	mgd.devs = NULL;
+	if (metaioctl(MD_IOCGET_DEVS, &mgd, &mgd.mde, namep->cname) != 0) {
+		if (mgd.mde.info.errclass == MDEC_SYS) {
+			ip = &mgd.mde.info.md_error_info_t_u.sys_error;
+			if (ip->errnum == ENODEV) {
+				rval = 0;
+				goto out;
+			}
+		}
+		(void) mdstealerror(ep, &mgd.mde);
+		goto out;
+	} else if (mgd.cnt <= 0) {
+		assert(mgd.cnt >= 0);
+		rval = 0;
+		goto out;
+	}
+
+	/* get underlying devices */
+	mydevs = Zalloc(sizeof (*mydevs) * mgd.cnt);
+	mgd.devs = (uintptr_t)mydevs;
+	if (metaioctl(MD_IOCGET_DEVS, &mgd, &mgd.mde, namep->cname) != 0) {
+		if (mgd.mde.info.errclass == MDEC_SYS) {
+			ip = &mgd.mde.info.md_error_info_t_u.sys_error;
+			if (ip->errnum == ENODEV) {
+				rval = 0;
+				goto out;
+			}
+		}
+		(void) mdstealerror(ep, &mgd.mde);
+		goto out;
+	} else if (mgd.cnt <= 0) {
+		assert(mgd.cnt >= 0);
+		rval = 0;
+		goto out;
+	}
+	/* recurse */
+	for (i = 0; (i < mgd.cnt); ++i) {
+		mdname_t	*devnp;
+
+		if (mydevs[i] == NODEV64) {
+			continue;
+		}
+		if ((devnp = metadevname(&sp, mydevs[i], ep)) == NULL) {
+			goto out;
+		}
+		if (meta_getdevs(sp, devnp, nlpp, ep) != 0)
+			goto out;
+	}
+
+	/* success */
+	rval = 0;
+
+	/* cleanup, return error */
+out:
+	if (mydevs != NULL)
+		Free(mydevs);
+	return (rval);
+}
+
+/*
+ * get all dev_t for a set
+ */
+int
+meta_getalldevs(
+	mdsetname_t		*sp,		/* set to look in */
+	mdnamelist_t		**nlpp,		/* returned devices */
+	int			check_db,
+	md_error_t		*ep
+)
+{
+	md_replicalist_t	*rlp, *rp;
+	mdnamelist_t		*nlp, *np;
+	mdhspnamelist_t		*hspnlp, *hspp;
+	int			rval = 0;
+
+	assert(sp != NULL);
+
+	/*
+	 * Get a replica namelist,
+	 * and then get all the devs within the replicas.
+	 */
+	if (check_db == TRUE) {
+		rlp = NULL;
+		if (metareplicalist(sp, MD_BASICNAME_OK, &rlp, ep) < 0)
+			rval = -1;
+		for (rp = rlp; (rp != NULL); rp = rp->rl_next) {
+			if (meta_getdevs(sp, rp->rl_repp->r_namep,
+			    nlpp, ep) != 0)
+				rval = -1;
+		}
+		metafreereplicalist(rlp);
+	}
+
+	/*
+	 * Get a stripe namelist,
+	 * and then get all the devs within the stripes.
+	 */
+	nlp = NULL;
+	if (meta_get_stripe_names(sp, &nlp, 0, ep) < 0)
+		rval = -1;
+	for (np = nlp; (np != NULL); np = np->next) {
+		if (meta_getdevs(sp, np->namep, nlpp, ep) != 0)
+			rval = -1;
+	}
+	metafreenamelist(nlp);
+
+	/*
+	 * Get a mirror namelist,
+	 * and then get all the devs within the mirrors.
+	 */
+	nlp = NULL;
+	if (meta_get_mirror_names(sp, &nlp, 0, ep) < 0)
+		rval = -1;
+	for (np = nlp; (np != NULL); np = np->next) {
+		if (meta_getdevs(sp, np->namep, nlpp, ep) != 0)
+			rval = -1;
+	}
+	metafreenamelist(nlp);
+
+	/*
+	 * Get a trans namelist,
+	 * and then get all the devs within the trans.
+	 */
+	nlp = NULL;
+
+	if (meta_get_trans_names(sp, &nlp, 0, ep) < 0)
+		rval = -1;
+	for (np = nlp; (np != NULL); np = np->next) {
+		if (meta_getdevs(sp, np->namep, nlpp, ep) != 0)
+			rval = -1;
+	}
+	metafreenamelist(nlp);
+
+	/*
+	 * Get a hot spare pool namelist,
+	 * and then get all the devs within the hot spare pools.
+	 */
+	hspnlp = NULL;
+	if (meta_get_hsp_names(sp, &hspnlp, 0, ep) < 0)
+		rval = -1;
+	for (hspp = hspnlp; (hspp != NULL); hspp = hspp->next) {
+		md_hsp_t	*hsp;
+		uint_t		i;
+
+		if ((hsp = meta_get_hsp(sp, hspp->hspnamep, ep)) == NULL)
+			rval = -1;
+		else for (i = 0; (i < hsp->hotspares.hotspares_len); ++i) {
+			md_hs_t	*hs = &hsp->hotspares.hotspares_val[i];
+
+			if (meta_getdevs(sp, hs->hsnamep, nlpp, ep) != 0)
+				rval = -1;
+		}
+	}
+	metafreehspnamelist(hspnlp);
+
+	/*
+	 * Get a raid namelist,
+	 * and then get all the devs within the raids.
+	 */
+	nlp = NULL;
+	if (meta_get_raid_names(sp, &nlp, 0, ep) < 0)
+		rval = -1;
+	for (np = nlp; (np != NULL); np = np->next) {
+		if (meta_getdevs(sp, np->namep, nlpp, ep) != 0)
+			rval = -1;
+	}
+	metafreenamelist(nlp);
+
+	/*
+	 * Get a soft partition namelist,
+	 * and then get all the devs within the softpartitions
+	 */
+	nlp = NULL;
+	if (meta_get_sp_names(sp, &nlp, 0, ep) < 0)
+		rval = -1;
+	for (np = nlp; (np != NULL); np = np->next) {
+		if (meta_getdevs(sp, np->namep, nlpp, ep) != 0)
+			rval = -1;
+	}
+	metafreenamelist(nlp);
+
+	return (rval);
+}
+
+/*
+ * get vtoc from a device already opened.
+ * returns
+ *	0 on success,
+ *	-1 on error. If the error was  ENOTSUP, partno will be set to
+ *		VT_ENOTSUP if possible.
+ */
+int
+meta_getvtoc(
+	int		fd,		/* fd for named device */
+	char		*devname,	/* name of device */
+	struct vtoc	*vtocbufp,	/* vtoc buffer to fill */
+	int		*partno,	/* return partno here */
+	md_error_t	*ep
+)
+{
+	int		part;
+
+	(void) memset(vtocbufp, 0, sizeof (*vtocbufp));
+	if ((part = read_vtoc(fd, vtocbufp)) < 0) {
+		int	err = errno;
+
+		if (ioctl(fd, MHIOCSTATUS, NULL) == 1)
+			err = EACCES;
+		else if (part == VT_EINVAL)
+			err = EINVAL;
+		else if (part == VT_EIO)
+			err = EIO;
+		else if (part == VT_ENOTSUP) {
+			if (partno) {
+				*partno = VT_ENOTSUP;
+				return (-1);
+			}
+		}
+		return (mdsyserror(ep, err, devname));
+	}
+
+	/* Slice number for *p0 partition (whole disk on x86) is 16 */
+	if (part >= V_NUMPAR)
+		return (mdsyserror(ep, EINVAL, devname));
+
+	if (partno)
+		*partno = part;
+	return (0);
+}
+/*
+ * set mdvtoc for a meta devices
+ */
+int
+meta_setmdvtoc(
+	int		fd,		/* fd for named device */
+	char		*devname,	/* name of device */
+	mdvtoc_t	*mdvtocp,	/* mdvtoc buffer to fill */
+	md_error_t	*ep
+)
+{
+	uint_t i;
+
+	/*
+	 * Sanity-check the mdvtoc
+	 */
+
+	if (mdvtocp->nparts > V_NUMPAR) {
+		return (-1);
+	}
+
+	/*
+	 * since many drivers won't allow opening a device make sure
+	 * all partitions aren't being set to zero. If all are zero then
+	 * we have no way to set them to something else
+	 */
+
+	for (i = 0; i < mdvtocp->nparts; i++)
+		if (mdvtocp->parts[i].size > 0)
+			break;
+	if (i == mdvtocp->nparts)
+		return (-1);
+
+	/*
+	 * Write the mdvtoc
+	 */
+	if (ioctl(fd, DKIOCSVTOC, (caddr_t)mdvtocp) == -1) {
+		return (mdsyserror(ep, errno, devname));
+	}
+
+	return (0);
+}
+
+/*
+ * set vtoc
+ */
+int
+meta_setvtoc(
+	int		fd,		/* fd for named device */
+	char		*devname,	/* name of device */
+	struct vtoc	*vtocbufp,	/* vtoc buffer to fill */
+	md_error_t	*ep
+)
+{
+	int		part;
+	int		err;
+
+	if ((part = write_vtoc(fd, vtocbufp)) < 0) {
+		if (part == VT_EINVAL)
+			err = EINVAL;
+		else if (part == VT_EIO)
+			err = EIO;
+		else
+			err = errno;
+		return (mdsyserror(ep, err, devname));
+	}
+
+	return (0);
+}
+
+/*
+ * FUNCTION:	meta_get_names()
+ * INPUT:	drivername - char string containing the driver name
+ *		sp	- the set name to get soft partitions from
+ *		options	- options from the command line
+ * OUTPUT:	nlpp	- list of all soft partition names
+ *		ep	- return error pointer
+ * RETURNS:	int	- -1 if error, 0 success
+ * PURPOSE:	returns a list of all specified devices in the metadb
+ *		for all devices in the specified set
+ */
+int
+meta_get_names(
+	char		*drivername,
+	mdsetname_t	*sp,
+	mdnamelist_t	**nlpp,
+	mdprtopts_t	options,
+	md_error_t	*ep
+)
+{
+	md_i_getnum_t	gn;		/* MD_IOCGET_NUM params */
+	mdnamelist_t	**tailpp = nlpp;
+	minor_t		*minors = NULL;
+	minor_t		*m_ptr;
+	int		i;
+
+	(void) memset(&gn, '\0', sizeof (gn));
+	MD_SETDRIVERNAME(&gn, drivername, sp->setno);
+
+	/* get number of devices */
+	if (metaioctl(MD_IOCGET_NUM, &gn, &gn.mde, NULL) != 0) {
+		if (mdiserror(&gn.mde, MDE_UNIT_NOT_FOUND)) {
+			mdclrerror(&gn.mde);
+		} else {
+			(void) mdstealerror(ep, &gn.mde);
+			return (-1);
+		}
+	}
+
+	if (gn.size > 0) {
+		/* malloc minor number buffer to be filled by ioctl */
+		if ((minors = (minor_t *)malloc(
+				gn.size * sizeof (minor_t))) == 0) {
+			return (ENOMEM);
+		}
+		gn.minors = (uintptr_t)minors;
+		if (metaioctl(MD_IOCGET_NUM, &gn, &gn.mde, NULL) != 0) {
+			(void) mdstealerror(ep, &gn.mde);
+			free(minors);
+			return (-1);
+		}
+		m_ptr = minors;
+		for (i = 0; i < gn.size; i++) {
+			mdname_t	*np;
+
+			/* get name */
+			np = metamnumname(&sp, *m_ptr,
+				((options & PRINT_FAST) ? 1 : 0), ep);
+			if (np == NULL)
+				goto out;
+
+			tailpp = meta_namelist_append_wrapper(
+				tailpp, np);
+
+			/* next device */
+			m_ptr++;
+		}
+		free(minors);
+	}
+	return (gn.size);
+
+out:
+	if (minors != NULL)
+		free(minors);
+	metafreenamelist(*nlpp);
+	*nlpp = NULL;
+	return (-1);
+}
+
+/*
+ * Wrap lib/libdevid/devid_deviceid_to_nmlist.  We want to take the
+ * results from that function and filter out the c[t]dp style names that
+ * we typically see on x86 so that we never see them.
+ */
+int
+meta_deviceid_to_nmlist(char *search_path, ddi_devid_t devid, char *minor_name,
+	devid_nmlist_t	**retlist)
+{
+	int		res;
+	devid_nmlist_t	*dp;
+	devid_nmlist_t	*tmp_retlist;
+	int		i = 1;
+	devid_nmlist_t	*rp;
+
+	res = devid_deviceid_to_nmlist(search_path, devid, minor_name, retlist);
+	if (res != 0) {
+		return (res);
+	}
+
+
+	/* first count the number of non c[t]dp items in retlist */
+	for (dp = *retlist; dp->dev != NODEV; dp++) {
+		uint_t		s;
+
+		/* Check if this is a c[t]dp style name.  */
+		if (parse_ctd(basename(dp->devname), &s) != 1) {
+			i++;
+		}
+	}
+
+	/* create an array to hold the non c[t]dp items */
+	tmp_retlist = Malloc(sizeof (devid_nmlist_t) * i);
+	/* copy the non c[t]dp items to the array */
+	for (dp = *retlist, rp = tmp_retlist; dp->dev != NODEV; dp++) {
+		uint_t		s;
+
+		/* Check if this is a c[t]dp style name.  */
+		if (parse_ctd(basename(dp->devname), &s) != 1) {
+			/* nope, so copy and go to the next */
+			rp->dev = dp->dev;
+			rp->devname = Strdup(dp->devname);
+			rp++;
+		}
+		/* if it is c[t]dp, just skip the element */
+	}
+	/* copy the list terminator */
+	rp->dev = NODEV;
+	rp->devname = NULL;
+	devid_free_nmlist (*retlist);
+	*retlist = tmp_retlist;
+	return (res);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_hotspares.c b/usr/src/lib/lvm/libmeta/common/meta_hotspares.c
new file mode 100644
index 0000000000..a76f9f3765
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_hotspares.c
@@ -0,0 +1,1630 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+/*
+ * Just in case we're not in a build environment, make sure that
+ * TEXT_DOMAIN gets set to something.
+ */
+#if !defined(TEXT_DOMAIN)
+#define	TEXT_DOMAIN "SYS_TEST"
+#endif
+
+/*
+ * hotspares utilities
+ */
+
+#include <meta.h>
+#include <sys/lvm/md_hotspares.h>
+#include <sys/lvm/md_convert.h>
+
+
+/*
+ * FUNCTION:	meta_get_hsp_names()
+ * INPUT:	sp	- the set name to get hotspares from
+ *		options	- options from the command line
+ * OUTPUT:	hspnlpp	- list of all hotspare names
+ *		ep	- return error pointer
+ * RETURNS:	int	- -1 if error, 0 success
+ * PURPOSE:	returns a list of all hotspares in the metadb
+ *		for all devices in the specified set
+ */
+/*ARGSUSED*/
+int
+meta_get_hsp_names(
+	mdsetname_t	*sp,
+	mdhspnamelist_t	**hspnlpp,
+	int		options,
+	md_error_t	*ep
+)
+{
+	md_i_getnum_t	gn;		/* MD_IOCGET_NUM params */
+	minor_t		*minors = NULL;
+	minor_t		*m_ptr;
+	int		i;
+
+	/* we must have a set */
+	assert(sp != NULL);
+
+	(void) memset(&gn, 0, sizeof (gn));
+	MD_SETDRIVERNAME(&gn, MD_HOTSPARES, sp->setno);
+
+	/* get number of devices */
+	if (metaioctl(MD_IOCGET_NUM, &gn, &gn.mde, NULL) != 0) {
+		if (mdiserror(&gn.mde, MDE_UNIT_NOT_FOUND)) {
+			mdclrerror(&gn.mde);
+		} else {
+			(void) mdstealerror(ep, &gn.mde);
+			return (-1);
+		}
+	}
+
+	if (gn.size > 0) {
+		/* malloc minor number buffer to be filled by ioctl */
+		if ((minors = (minor_t *)malloc(
+				gn.size * sizeof (minor_t))) == 0) {
+			return (ENOMEM);
+		}
+		gn.minors = (uintptr_t)minors;
+		if (metaioctl(MD_IOCGET_NUM, &gn, &gn.mde, NULL) != 0) {
+			(void) mdstealerror(ep, &gn.mde);
+			free(minors);
+			return (-1);
+		}
+		m_ptr = minors;
+		for (i = 0; i < gn.size; i++) {
+			mdhspname_t	*hspnp;
+
+
+			/* get name */
+			if ((hspnp = metahsphspname(&sp, *m_ptr, ep))
+					== NULL)
+				goto out;
+
+			/* append to list */
+			(void) metahspnamelist_append(hspnlpp, hspnp);
+
+			/* next device */
+			m_ptr++;
+		}
+		free(minors);
+	}
+	return (gn.size);
+
+out:
+	if (minors != NULL)
+		free(minors);
+	metafreehspnamelist(*hspnlpp);
+	*hspnlpp = NULL;
+	return (-1);
+}
+
+/*
+ * get information of a specific hotspare pool from driver
+ */
+static get_hsp_t *
+get_hspinfo(
+	mdsetname_t	*sp,
+	mdhspname_t	*hspnp,
+	md_error_t	*ep
+)
+{
+	md_i_get_t	mig;
+
+	/* should have a set */
+	assert(sp != NULL);
+	assert(sp->setno == HSP_SET(hspnp->hsp));
+
+	/* get size of unit structure */
+	(void) memset(&mig, 0, sizeof (mig));
+	MD_SETDRIVERNAME(&mig, MD_HOTSPARES, sp->setno);
+	mig.id = hspnp->hsp;
+	if (metaioctl(MD_IOCGET, &mig, &mig.mde, hspnp->hspname) != 0) {
+		(void) mdstealerror(ep, &mig.mde);
+		return (NULL);
+	}
+
+	/* get actual unit structure */
+	assert(mig.size > 0);
+	mig.mdp = (uintptr_t)Zalloc(mig.size);
+	if (metaioctl(MD_IOCGET, &mig, &mig.mde, hspnp->hspname) != 0) {
+		(void) mdstealerror(ep, &mig.mde);
+		Free((void *)mig.mdp);
+		return (NULL);
+	}
+	return ((get_hsp_t *)mig.mdp);
+}
+
+/*
+ * free hotspare pool unit
+ */
+void
+meta_free_hsp(
+	md_hsp_t	*hspp
+)
+{
+	if (hspp->hotspares.hotspares_val != NULL) {
+		assert(hspp->hotspares.hotspares_len > 0);
+		Free(hspp->hotspares.hotspares_val);
+	}
+	Free(hspp);
+}
+
+/*
+ * get hotspare pool unit (common)
+ */
+md_hsp_t *
+meta_get_hsp_common(
+	mdsetname_t	*sp,
+	mdhspname_t	*hspnp,
+	int		fast,
+	md_error_t	*ep
+)
+{
+	get_hsp_t	*ghsp;
+	md_hsp_t	*hspp;
+	uint_t		hsi;
+
+	/* must have set */
+	assert(sp != NULL);
+	assert(sp->setno == HSP_SET(hspnp->hsp));
+
+	/* short circuit */
+	if (hspnp->unitp != NULL)
+		return (hspnp->unitp);
+
+	/* get unit */
+	if ((ghsp = get_hspinfo(sp, hspnp, ep)) == NULL)
+		return (NULL);
+
+	/* allocate hsp */
+	hspp = Zalloc(sizeof (*hspp));
+
+	/* allocate hotspares */
+	hspp->hotspares.hotspares_len = ghsp->ghsp_nhotspares;
+
+	/* if empty hotspare pool, we are done */
+	if (hspp->hotspares.hotspares_len != 0)
+		hspp->hotspares.hotspares_val =
+		    Zalloc(hspp->hotspares.hotspares_len *
+		    sizeof (*hspp->hotspares.hotspares_val));
+
+	/* get name, refcount */
+	hspp->hspnamep = hspnp;
+	hspp->refcount = ghsp->ghsp_refcount;
+
+	/* get hotspares */
+	for (hsi = 0; (hsi < hspp->hotspares.hotspares_len); ++hsi) {
+		mdkey_t		hs_key = ghsp->ghsp_hs_keys[hsi];
+		md_hs_t		*hsp = &hspp->hotspares.hotspares_val[hsi];
+		get_hs_params_t	ghs;
+
+		/* get hotspare name */
+		hsp->hsnamep = metakeyname(&sp, hs_key, fast, ep);
+		if (hsp->hsnamep == NULL)
+			goto out;
+
+		/* get hotspare state */
+		(void) memset(&ghs, 0, sizeof (ghs));
+		MD_SETDRIVERNAME(&ghs, MD_HOTSPARES, sp->setno);
+		ghs.ghs_key = hs_key;
+		if (metaioctl(MD_IOCGET_HS, &ghs, &ghs.mde, NULL) != 0) {
+			(void) mdstealerror(ep, &ghs.mde);
+			goto out;
+		}
+		hsp->state = ghs.ghs_state;
+		hsp->size = ghs.ghs_number_blks;
+		hsp->timestamp = ghs.ghs_timestamp;
+		hsp->revision = ghs.ghs_revision;
+	}
+
+	/* cleanup, return success */
+	Free(ghsp);
+	hspnp->unitp = hspp;
+	return (hspp);
+
+	/* cleanup, return error */
+out:
+	Free(ghsp);
+	meta_free_hsp(hspp);
+	return (NULL);
+}
+
+/*
+ * get hotspare pool unit
+ */
+md_hsp_t *
+meta_get_hsp(
+	mdsetname_t	*sp,
+	mdhspname_t	*hspnp,
+	md_error_t	*ep
+)
+{
+	return (meta_get_hsp_common(sp, hspnp, 0, ep));
+}
+
+/*
+ * check hotspare pool for dev
+ */
+static int
+in_hsp(
+	mdsetname_t	*sp,
+	mdhspname_t	*hspnp,
+	mdname_t	*np,
+	diskaddr_t	slblk,
+	diskaddr_t	nblks,
+	md_error_t	*ep
+)
+{
+	md_hsp_t	*hspp;
+	uint_t		i;
+
+	/* should be in the same set */
+	assert(sp != NULL);
+	assert(sp->setno == HSP_SET(hspnp->hsp));
+
+	/* get unit */
+	if ((hspp = meta_get_hsp(sp, hspnp, ep)) == NULL)
+		return (-1);
+
+	/* look in hotspares */
+	for (i = 0; (i < hspp->hotspares.hotspares_len); ++i) {
+		md_hs_t		*hs = &hspp->hotspares.hotspares_val[i];
+		mdname_t	*hsnp = hs->hsnamep;
+
+		/* check overlap */
+		if (metaismeta(hsnp))
+			continue;
+		if (meta_check_overlap(hspnp->hspname, np, slblk, nblks,
+		    hsnp, 0, -1, ep) != 0)
+			return (-1);
+	}
+
+	/* return success */
+	return (0);
+}
+
+/*
+ * check to see if we're in a hotspare pool
+ */
+int
+meta_check_inhsp(
+	mdsetname_t	*sp,
+	mdname_t	*np,
+	diskaddr_t	slblk,
+	diskaddr_t	nblks,
+	md_error_t	*ep
+)
+{
+	mdhspnamelist_t	*hspnlp = NULL;
+	mdhspnamelist_t	*p;
+	int		rval = 0;
+
+	/* should have a set */
+	assert(sp != NULL);
+
+	/* for each hotspare pool */
+	if (meta_get_hsp_names(sp, &hspnlp, 0, ep) < 0)
+		return (-1);
+	for (p = hspnlp; (p != NULL); p = p->next) {
+		mdhspname_t	*hspnp = p->hspnamep;
+
+		/* check hotspare pool */
+		if (in_hsp(sp, hspnp, np, slblk, nblks, ep) != 0) {
+			rval = -1;
+			break;
+		}
+	}
+
+	/* cleanup, return success */
+	metafreehspnamelist(hspnlp);
+	return (rval);
+}
+
+/*
+ * check hotspare
+ */
+int
+meta_check_hotspare(
+	mdsetname_t	*sp,
+	mdname_t	*np,
+	md_error_t	*ep
+)
+{
+	mdchkopts_t	options = (MDCHK_ALLOW_HS);
+
+	/* make sure we have a disk */
+	if (metachkcomp(np, ep) != 0)
+		return (-1);
+
+	/* check to ensure that it is not already in use */
+	if (meta_check_inuse(sp, np, MDCHK_INUSE, ep) != 0) {
+		return (-1);
+	}
+
+	/* make sure it is in the set */
+	if (meta_check_inset(sp, np, ep) != 0)
+		return (-1);
+
+	/* make sure its not in a metadevice */
+	if (meta_check_inmeta(sp, np, options, 0, -1, ep) != 0)
+		return (-1);
+
+	/* return success */
+	return (0);
+}
+
+/*
+ * print hsp
+ */
+static int
+hsp_print(
+	md_hsp_t	*hspp,
+	char		*fname,
+	FILE		*fp,
+	md_error_t	*ep
+)
+{
+	uint_t		hsi;
+	int		rval = -1;
+
+	/* print name */
+	if (fprintf(fp, "%s", hspp->hspnamep->hspname) == EOF)
+		goto out;
+
+	/* print hotspares */
+	for (hsi = 0; (hsi < hspp->hotspares.hotspares_len); ++hsi) {
+		md_hs_t		*hsp = &hspp->hotspares.hotspares_val[hsi];
+
+		/* print hotspare */
+		/*
+		 * If the path is our standard /dev/rdsk or /dev/md/rdsk
+		 * then just print out the cxtxdxsx or the dx, metainit
+		 * will assume the default, otherwise we need the full
+		 * pathname to make sure this works as we intend.
+		 */
+		if ((strstr(hsp->hsnamep->rname, "/dev/rdsk") == NULL) &&
+		    (strstr(hsp->hsnamep->rname, "/dev/md/rdsk") == NULL) &&
+		    (strstr(hsp->hsnamep->rname, "/dev/td/") == NULL)) {
+			/* not standard path, print full pathname */
+			if (fprintf(fp, " %s", hsp->hsnamep->rname) == EOF)
+				goto out;
+		} else {
+			/* standard path, just print ctd or d value */
+			if (fprintf(fp, " %s", hsp->hsnamep->cname) == EOF)
+				goto out;
+		}
+	}
+
+	/* terminate last line */
+	if (fprintf(fp, "\n") == EOF)
+		goto out;
+
+	/* success */
+	rval = 0;
+
+	/* cleanup, return error */
+out:
+	if (rval != 0)
+		(void) mdsyserror(ep, errno, fname);
+	return (rval);
+}
+
+/*
+ * hotspare state name
+ */
+char *
+hs_state_to_name(
+	md_hs_t			*hsp,
+	md_timeval32_t		*tvp
+)
+{
+	hotspare_states_t	state = hsp->state;
+
+	/* grab time */
+	if (tvp != NULL)
+		*tvp = hsp->timestamp;
+
+	switch (state) {
+	case HSS_AVAILABLE:
+		return (dgettext(TEXT_DOMAIN, "Available"));
+	case HSS_RESERVED:
+		return (dgettext(TEXT_DOMAIN, "In use"));
+	case HSS_BROKEN:
+		return (dgettext(TEXT_DOMAIN, "Broken"));
+	case HSS_UNUSED:
+	default:
+		return (dgettext(TEXT_DOMAIN, "invalid"));
+	}
+}
+
+/*
+ * report hsp
+ */
+static int
+hsp_report(
+	md_hsp_t	*hspp,
+	mdnamelist_t	**nlpp,
+	char		*fname,
+	FILE		*fp,
+	mdprtopts_t	options,
+	md_error_t	*ep,
+	mdsetname_t	*sp
+)
+{
+	uint_t		hsi;
+	int		rval = -1;
+	char		*devid = "";
+	mdname_t	*didnp = NULL;
+	uint_t		len;
+	int		large_hs_dev_cnt = 0;
+
+	if (options & PRINT_LARGEDEVICES) {
+		for (hsi = 0; (hsi < hspp->hotspares.hotspares_len); ++hsi) {
+			md_hs_t	*hsp = &hspp->hotspares.hotspares_val[hsi];
+			if (hsp->revision == MD_64BIT_META_DEV) {
+				large_hs_dev_cnt += 1;
+				if (meta_getdevs(sp, hsp->hsnamep, nlpp, ep)
+				    != 0)
+					goto out;
+			}
+		}
+
+		if (large_hs_dev_cnt == 0) {
+			rval = 0;
+			goto out;
+		}
+	}
+	/* print header */
+	if (hspp->hotspares.hotspares_len == 0) {
+		if (fprintf(fp, dgettext(TEXT_DOMAIN, "%s: is empty\n"),
+		    hspp->hspnamep->hspname) == EOF) {
+			goto out;
+		}
+	} else if (hspp->hotspares.hotspares_len == 1) {
+
+		/*
+		 * This allows the length
+		 * of the ctd to vary from small to large without
+		 * looking horrible.
+		 */
+
+		len = strlen(hspp->hotspares.hotspares_val[0].hsnamep->cname);
+		/*
+		 * if the length is to short to print out all of the header
+		 * force the matter
+		 */
+		len = max(len, strlen(dgettext(TEXT_DOMAIN, "Device")));
+		len += 2;
+		if (options & PRINT_LARGEDEVICES) {
+			if (fprintf(fp,
+			    "%s: 1 hot spare (1 big device)\n\t%-*.*s  "
+			    "%-12.12s%-8.6s\t\t%s\n",
+			    hspp->hspnamep->hspname, len, len,
+			    dgettext(TEXT_DOMAIN, "Device"),
+			    dgettext(TEXT_DOMAIN, "Status"),
+			    dgettext(TEXT_DOMAIN, "Length"),
+			    dgettext(TEXT_DOMAIN, "Reloc")) == EOF) {
+				goto out;
+			}
+		} else {
+			if (fprintf(fp,
+			    "%s: 1 hot spare\n\t%-*.*s %-12.12s%-8.6s\t\t%s\n",
+			    hspp->hspnamep->hspname, len, len,
+			    dgettext(TEXT_DOMAIN, "Device"),
+			    dgettext(TEXT_DOMAIN, "Status"),
+			    dgettext(TEXT_DOMAIN, "Length"),
+			    dgettext(TEXT_DOMAIN, "Reloc")) == EOF) {
+				goto out;
+			}
+		}
+	} else {
+		/*
+		 * This allows the length
+		 * of the ctd to vary from small to large without
+		 * looking horrible.
+		 */
+		len = 0;
+		for (hsi = 0; (hsi < hspp->hotspares.hotspares_len); ++hsi) {
+			len = max(len, strlen(hspp->
+			    hotspares.hotspares_val[hsi].hsnamep->cname));
+		}
+		len = max(len, strlen(dgettext(TEXT_DOMAIN, "Device")));
+		len += 2;
+		if (options & PRINT_LARGEDEVICES) {
+			if (fprintf(fp,
+			    "%s: %u hot spares (%d big device(s))\n\t%-*.*s "
+			    "%-12.12s%-8.6s\t\t%s\n",
+			    hspp->hspnamep->hspname,
+			    hspp->hotspares.hotspares_len,
+			    large_hs_dev_cnt, len, len,
+			    dgettext(TEXT_DOMAIN, "Device"),
+			    dgettext(TEXT_DOMAIN, "Status"),
+			    dgettext(TEXT_DOMAIN, "Length"),
+			    dgettext(TEXT_DOMAIN, "Reloc")) == EOF) {
+				goto out;
+			}
+		} else {
+			if (fprintf(fp, "%s: %u hot spares\n\t%-*.*s "
+			    "%-12.12s%-8.6s\t\t%s\n",
+			    hspp->hspnamep->hspname,
+			    hspp->hotspares.hotspares_len, len, len,
+			    dgettext(TEXT_DOMAIN, "Device"),
+			    dgettext(TEXT_DOMAIN, "Status"),
+			    dgettext(TEXT_DOMAIN, "Length"),
+			    dgettext(TEXT_DOMAIN, "Reloc")) == EOF) {
+				goto out;
+			}
+		}
+	}
+
+	/* print hotspares */
+	for (hsi = 0; (hsi < hspp->hotspares.hotspares_len); ++hsi) {
+		md_hs_t		*hsp = &hspp->hotspares.hotspares_val[hsi];
+		char		*cname = hsp->hsnamep->cname;
+		char		*hs_state;
+		md_timeval32_t	tv;
+		char		*timep;
+		ddi_devid_t	dtp;
+
+		/* populate the key in the name_p structure */
+		if ((didnp = metadevname(&sp, hsp->hsnamep->dev, ep)) == NULL) {
+			return (-1);
+		}
+
+		if (options & PRINT_LARGEDEVICES) {
+			if (hsp->revision != MD_64BIT_META_DEV)
+				continue;
+		}
+		/* determine if devid does NOT exist */
+		if (options & PRINT_DEVID) {
+		    if ((dtp = meta_getdidbykey(sp->setno, getmyside(sp, ep),
+				didnp->key, ep)) == NULL)
+				devid = dgettext(TEXT_DOMAIN, "No ");
+			else {
+				devid = dgettext(TEXT_DOMAIN, "Yes");
+				free(dtp);
+			}
+		}
+		/* print hotspare */
+		hs_state = hs_state_to_name(hsp, &tv);
+		/*
+		 * This allows the length
+		 * of the ctd to vary from small to large without
+		 * looking horrible.
+		 */
+		if (! (options & PRINT_TIMES)) {
+			if (fprintf(fp,
+			    "        %-*s %-12s %lld blocks\t%s\n",
+			    len, cname, hs_state,
+			    hsp->size, devid) == EOF) {
+				goto out;
+			}
+		} else {
+			timep = meta_print_time(&tv);
+
+			if (fprintf(fp,
+			    "        %-*s\t    %-11s %8lld blocks%s\t%s\n",
+			    len, cname, hs_state,
+			    hsp->size, devid, timep) == EOF) {
+				goto out;
+			}
+		}
+	}
+
+	/* add extra line */
+	if (fprintf(fp, "\n") == EOF)
+		goto out;
+
+	/* success */
+	rval = 0;
+
+	/* cleanup, return error */
+out:
+	if (rval != 0)
+		(void) mdsyserror(ep, errno, fname);
+	return (rval);
+}
+
+/*
+ * print/report hsp
+ */
+int
+meta_hsp_print(
+	mdsetname_t	*sp,
+	mdhspname_t	*hspnp,
+	mdnamelist_t	**nlpp,
+	char		*fname,
+	FILE		*fp,
+	mdprtopts_t	options,
+	md_error_t	*ep
+)
+{
+	md_hsp_t	*hspp;
+
+	/* should have same set */
+	assert(sp != NULL);
+	assert((hspnp == NULL) || (sp->setno == HSP_SET(hspnp->hsp)));
+
+	/* print all hsps */
+	if (hspnp == NULL) {
+		mdhspnamelist_t	*hspnlp = NULL;
+		mdhspnamelist_t	*p;
+		int		cnt;
+		int		rval = 0;
+
+		if ((cnt = meta_get_hsp_names(sp, &hspnlp, options, ep)) < 0)
+			return (-1);
+		else if (cnt == 0)
+			return (0);
+
+		/* recurse */
+		for (p = hspnlp; (p != NULL); p = p->next) {
+			mdhspname_t	*hspnp = p->hspnamep;
+
+			if (meta_hsp_print(sp, hspnp, nlpp, fname, fp,
+			    options, ep) != 0)
+				rval = -1;
+		}
+
+		/* cleanup, return success */
+		metafreehspnamelist(hspnlp);
+		return (rval);
+	}
+
+	/* get unit structure */
+	if ((hspp = meta_get_hsp_common(sp, hspnp,
+	    ((options & PRINT_FAST) ? 1 : 0), ep)) == NULL)
+		return (-1);
+
+	/* print appropriate detail */
+	if (options & PRINT_SHORT)
+		return (hsp_print(hspp, fname, fp, ep));
+	else
+		return (hsp_report(hspp, nlpp, fname, fp, options, ep, sp));
+}
+
+/*
+ * check for valid hotspare pool
+ */
+int
+metachkhsp(
+	mdsetname_t	*sp,
+	mdhspname_t	*hspnp,
+	md_error_t	*ep
+)
+{
+	if (meta_get_hsp(sp, hspnp, ep) == NULL)
+		return (-1);
+	return (0);
+}
+
+/*
+ * invalidate hotspare pool info
+ */
+void
+meta_invalidate_hsp(
+	mdhspname_t	*hspnp
+)
+{
+	md_hsp_t	*hspp = hspnp->unitp;
+
+	/* free it up */
+	if (hspp == NULL)
+		return;
+	meta_free_hsp(hspp);
+
+	/* clear cache */
+	hspnp->unitp = NULL;
+}
+
+/*
+ * add hotspares and/or hotspare pool
+ */
+int
+meta_hs_add(
+	mdsetname_t	*sp,
+	mdhspname_t	*hspnp,
+	mdnamelist_t	*hsnlp,
+	mdcmdopts_t	options,
+	md_error_t	*ep
+)
+{
+	mdnamelist_t	*p;
+	set_hs_params_t	shs;
+
+	/* should have a set */
+	assert(sp != NULL);
+	assert(sp->setno == HSP_SET(hspnp->hsp));
+
+	/* clear cache */
+	meta_invalidate_hsp(hspnp);
+
+	/* setup hotspare pool info */
+	(void) memset(&shs, 0, sizeof (shs));
+	shs.shs_cmd = ADD_HOT_SPARE;
+	shs.shs_hot_spare_pool = hspnp->hsp;
+	MD_SETDRIVERNAME(&shs, MD_HOTSPARES, sp->setno);
+
+	/* add empty hotspare pool */
+	if (hsnlp == NULL) {
+		shs.shs_options = HS_OPT_POOL;
+		/* If DOIT is not set, it's a dryrun */
+		if ((options & MDCMD_DOIT) == 0) {
+			shs.shs_options |= HS_OPT_DRYRUN;
+		}
+		if (metaioctl(MD_IOCSET_HS, &shs, &shs.mde,
+		    hspnp->hspname) != 0)
+			return (mdstealerror(ep, &shs.mde));
+		goto success;
+	}
+
+	/* add hotspares */
+	shs.shs_options = HS_OPT_NONE;
+	/* If DOIT is not set, it's a dryrun */
+	if ((options & MDCMD_DOIT) == 0) {
+		shs.shs_options |= HS_OPT_DRYRUN;
+	}
+	for (p = hsnlp; (p != NULL); p = p->next) {
+		mdname_t	*hsnp = p->namep;
+		diskaddr_t	size, label, start_blk;
+
+		/* should be in same set */
+		assert(sp->setno == HSP_SET(hspnp->hsp));
+
+		/* check it out */
+		if (meta_check_hotspare(sp, hsnp, ep) != 0)
+			return (-1);
+		if ((size = metagetsize(hsnp, ep)) == MD_DISKADDR_ERROR)
+			return (-1);
+		else if (size == 0)
+			return (mdsyserror(ep, ENOSPC, hsnp->cname));
+		if ((label = metagetlabel(hsnp, ep)) == MD_DISKADDR_ERROR)
+			return (-1);
+		if ((start_blk = metagetstart(sp, hsnp, ep))
+		    == MD_DISKADDR_ERROR)
+			return (-1);
+
+		shs.shs_size_option = meta_check_devicesize(size);
+
+		/* In dryrun mode (DOIT not set) we must not alter the mddb */
+		if (options & MDCMD_DOIT) {
+			/* store name in namespace */
+			if (add_key_name(sp, hsnp, NULL, ep) != 0)
+				return (-1);
+		}
+
+		/* add hotspare and/or hotspare pool */
+		shs.shs_component_old = hsnp->dev;
+		shs.shs_start_blk = start_blk;
+		shs.shs_has_label = ((label > 0) ? 1 : 0);
+		shs.shs_number_blks = size;
+		shs.shs_key_old = hsnp->key;
+		if (metaioctl(MD_IOCSET_HS, &shs, &shs.mde, NULL) != 0) {
+			if ((options & MDCMD_DOIT) &&
+			    (shs.shs_options != HS_OPT_POOL)) {
+				(void) del_key_name(sp, hsnp, ep);
+			}
+			return (mdstealerror(ep, &shs.mde));
+		}
+	}
+
+	/* print success message */
+success:
+	if (options & MDCMD_PRINT) {
+		if ((options & MDCMD_INIT) || (hsnlp == NULL)) {
+			(void) printf(dgettext(TEXT_DOMAIN,
+			    "%s: Hotspare pool is setup\n"),
+			    hspnp->hspname);
+		} else if (hsnlp->next == NULL) {
+			(void) printf(dgettext(TEXT_DOMAIN,
+			    "%s: Hotspare is added\n"),
+			    hspnp->hspname);
+		} else {
+			(void) printf(dgettext(TEXT_DOMAIN,
+			    "%s: Hotspares are added\n"),
+			    hspnp->hspname);
+		}
+		(void) fflush(stdout);
+	}
+
+	/* return success */
+	return (0);
+}
+
+/*
+ * delete hotspares from pool
+ */
+int
+meta_hs_delete(
+	mdsetname_t	*sp,
+	mdhspname_t	*hspnp,
+	mdnamelist_t	*hsnlp,
+	mdcmdopts_t	options,
+	md_error_t	*ep
+)
+{
+	mdnamelist_t	*p;
+	set_hs_params_t	shs;
+
+	/* should have a set */
+	assert(sp != NULL);
+	assert(sp->setno == HSP_SET(hspnp->hsp));
+
+	/* clear cache */
+	meta_invalidate_hsp(hspnp);
+
+	/* setup hotspare pool info */
+	(void) memset(&shs, 0, sizeof (shs));
+	shs.shs_hot_spare_pool = hspnp->hsp;
+	MD_SETDRIVERNAME(&shs, MD_HOTSPARES, sp->setno);
+	shs.shs_cmd = DELETE_HOT_SPARE;
+
+	/* delete empty hotspare pool */
+	if (hsnlp == NULL) {
+		shs.shs_options = HS_OPT_POOL;
+		/* If DOIT is not set, it's a dryrun */
+		if ((options & MDCMD_DOIT) == 0) {
+			shs.shs_options |= HS_OPT_DRYRUN;
+		}
+		if (metaioctl(MD_IOCSET_HS, &shs, &shs.mde,
+		    hspnp->hspname) != 0)
+			return (mdstealerror(ep, &shs.mde));
+		goto success;
+	}
+
+	/* delete hotspares */
+	shs.shs_options = HS_OPT_NONE;
+	/* If DOIT is not set, it's a dryrun */
+	if ((options & MDCMD_DOIT) == 0) {
+		shs.shs_options |= HS_OPT_DRYRUN;
+	}
+	for (p = hsnlp; (p != NULL); p = p->next) {
+		mdname_t	*hsnp = p->namep;
+
+		/* should be in same set */
+		assert(sp->setno == HSP_SET(hspnp->hsp));
+
+		/* delete hotspare */
+		shs.shs_component_old = hsnp->dev;
+		meta_invalidate_name(hsnp);
+		if (metaioctl(MD_IOCSET_HS, &shs, &shs.mde, hsnp->cname) != 0)
+			return (mdstealerror(ep, &shs.mde));
+	}
+
+	/* print success message */
+success:
+	if (options & MDCMD_PRINT) {
+		if (hsnlp == NULL) {
+			(void) printf(dgettext(TEXT_DOMAIN,
+			    "%s: Hotspare pool is cleared\n"),
+			    hspnp->hspname);
+		} else if (hsnlp->next == NULL) {
+			(void) printf(dgettext(TEXT_DOMAIN,
+			    "%s: Hotspare is deleted\n"),
+			    hspnp->hspname);
+		} else {
+			(void) printf(dgettext(TEXT_DOMAIN,
+			    "%s: Hotspares are deleted\n"),
+			    hspnp->hspname);
+		}
+		(void) fflush(stdout);
+	}
+
+	/* return success */
+	return (0);
+}
+
+/*
+ * replace hotspare in pool
+ */
+int
+meta_hs_replace(
+	mdsetname_t	*sp,
+	mdhspname_t	*hspnp,
+	mdname_t	*oldnp,
+	mdname_t	*newnp,
+	mdcmdopts_t	options,
+	md_error_t	*ep
+)
+{
+	set_hs_params_t	shs;
+	diskaddr_t	size, label, start_blk;
+	md_dev64_t	old_dev, new_dev;
+	diskaddr_t	new_start_blk, new_end_blk;
+	int		rebind;
+	char		*new_devidp = NULL;
+	int		ret;
+	md_set_desc	*sd;
+
+	/* should be in same set */
+	assert(sp != NULL);
+	assert(sp->setno == HSP_SET(hspnp->hsp));
+
+	/* save new binding incase this is a rebind where oldnp==newnp */
+	new_dev = newnp->dev;
+	new_start_blk = newnp->start_blk;
+	new_end_blk = newnp->end_blk;
+
+	/* invalidate, then get the hotspare (fill in oldnp from metadb) */
+	meta_invalidate_hsp(hspnp);
+	if (meta_get_hsp(sp, hspnp, ep) == NULL)
+		return (-1);
+
+	/* the old device binding is now established */
+	if ((old_dev = oldnp->dev) == NODEV64)
+		return (mdsyserror(ep, ENODEV, oldnp->cname));
+
+	/*
+	 * check for the case where oldnp and newnp indicate the same
+	 * device, but the dev_t of the device has changed between old
+	 * and new.  This is called a rebind.  On entry the dev_t
+	 * represents the new device binding determined from the
+	 * filesystem (meta_getdev). After calling meta_get_hsp
+	 * oldnp (and maybe newnp if this is a rebind) is updated based
+	 * to the old binding from the metadb (done by metakeyname).
+	 */
+	if ((strcmp(oldnp->rname, newnp->rname) == 0) &&
+	    (old_dev != new_dev)) {
+		rebind = 1;
+	} else {
+		rebind = 0;
+	}
+	if (rebind) {
+		newnp->dev = new_dev;
+		newnp->start_blk = new_start_blk;
+		newnp->end_blk = new_end_blk;
+	}
+
+	/*
+	 * Save a copy of the devid associated with the new disk, the reason
+	 * is that the meta_check_hotspare() call could cause the devid to
+	 * be changed to that of the devid that is currently stored in the
+	 * replica namespace for the disk in question. This devid could be
+	 * stale if we are replacing the disk. The function that overwrites
+	 * the devid is dr2drivedesc().
+	 */
+	if (newnp->drivenamep->devid != NULL)
+		new_devidp = Strdup(newnp->drivenamep->devid);
+
+	/* if it's a multi-node diskset clear new_devidp */
+	if (!metaislocalset(sp)) {
+		if ((sd = metaget_setdesc(sp, ep)) == NULL) {
+			Free(new_devidp);
+			return (-1);
+		}
+		if (MD_MNSET_DESC(sd)) {
+			Free(new_devidp);
+			new_devidp = NULL;
+		}
+	}
+
+	/* check it out */
+	if (meta_check_hotspare(sp, newnp, ep) != 0) {
+		if ((! rebind) || (! mdisuseerror(ep, MDE_ALREADY))) {
+			Free(new_devidp);
+			return (-1);
+		}
+		mdclrerror(ep);
+	}
+	if ((size = metagetsize(newnp, ep)) == MD_DISKADDR_ERROR) {
+		Free(new_devidp);
+		return (-1);
+	}
+	if ((label = metagetlabel(newnp, ep)) == MD_DISKADDR_ERROR) {
+		Free(new_devidp);
+		return (-1);
+	}
+	if ((start_blk = metagetstart(sp, newnp, ep)) == MD_DISKADDR_ERROR) {
+		Free(new_devidp);
+		return (-1);
+	}
+	if (start_blk >= size) {
+		(void) mdsyserror(ep, ENOSPC, newnp->cname);
+		Free(new_devidp);
+		return (-1);
+	}
+
+	/* In dryrun mode (DOIT not set) we must not alter the mddb */
+	if (options & MDCMD_DOIT) {
+		/* store name in namespace */
+		if (add_key_name(sp, newnp, NULL, ep) != 0)
+			return (-1);
+	}
+
+	/*
+	 * Copy back the saved devid.
+	 */
+	Free(newnp->drivenamep->devid);
+	if (new_devidp != NULL) {
+		newnp->drivenamep->devid = new_devidp;
+		new_devidp = NULL;
+	}
+
+	/* In dryrun mode (DOIT not set) we must not alter the mddb */
+	if (options & MDCMD_DOIT) {
+		/* store name in namespace */
+		if (add_key_name(sp, newnp, NULL, ep) != 0)
+			return (-1);
+	}
+
+	if (rebind && !metaislocalset(sp)) {
+		/*
+		 * We are 'rebind'ing a disk that is in a diskset so as well
+		 * as updating the diskset's namespace the local set needs
+		 * to be updated because it also contains a reference to the
+		 * disk in question.
+		 */
+		ret = meta_fixdevid(sp, DEV_UPDATE|DEV_LOCAL_SET, newnp->cname,
+		    ep);
+
+		if (ret != METADEVADM_SUCCESS) {
+			md_error_t	xep = mdnullerror;
+
+			/*
+			 * In dryrun mode (DOIT not set) we must not alter
+			 * the mddb
+			 */
+			if (options & MDCMD_DOIT) {
+				(void) del_key_name(sp, newnp, &xep);
+				mdclrerror(&xep);
+				return (-1);
+			}
+		}
+	}
+
+	/* replace hotspare */
+	(void) memset(&shs, 0, sizeof (shs));
+
+	shs.shs_size_option = meta_check_devicesize(size);
+
+	shs.shs_cmd = REPLACE_HOT_SPARE;
+	shs.shs_hot_spare_pool = hspnp->hsp;
+	MD_SETDRIVERNAME(&shs, MD_HOTSPARES, sp->setno);
+	shs.shs_component_old = old_dev;
+	shs.shs_options = HS_OPT_NONE;
+	/* If DOIT is not set, it's a dryrun */
+	if ((options & MDCMD_DOIT) == 0) {
+		shs.shs_options |= HS_OPT_DRYRUN;
+	}
+	shs.shs_component_new = new_dev;
+	shs.shs_start_blk = start_blk;
+	shs.shs_has_label = ((label > 0) ? 1 : 0);
+	shs.shs_number_blks = size;
+	shs.shs_key_new = newnp->key;
+	if (metaioctl(MD_IOCSET_HS, &shs, &shs.mde, NULL) != 0) {
+		if (options & MDCMD_DOIT) {
+			(void) del_key_name(sp, newnp, ep);
+		}
+		return (mdstealerror(ep, &shs.mde));
+	}
+
+	/* clear cache */
+	meta_invalidate_name(oldnp);
+	meta_invalidate_name(newnp);
+	meta_invalidate_hsp(hspnp);
+
+	/* let em know */
+	if (options & MDCMD_PRINT) {
+		(void) printf(dgettext(TEXT_DOMAIN,
+		    "%s: Hotspare %s is replaced with %s\n"),
+		    hspnp->hspname, oldnp->cname, newnp->cname);
+		(void) fflush(stdout);
+	}
+
+	/* return success */
+	return (0);
+}
+
+/*
+ * enable hotspares
+ */
+int
+meta_hs_enable(
+	mdsetname_t	*sp,
+	mdnamelist_t	*hsnlp,
+	mdcmdopts_t	options,
+	md_error_t	*ep
+)
+{
+	mdhspnamelist_t	*hspnlp = NULL;
+	mdhspnamelist_t	*hspnp;
+	set_hs_params_t	shs;
+	int		rval = -1;
+
+	/* should have a set */
+	assert(sp != NULL);
+
+	/* setup device info */
+	(void) memset(&shs, 0, sizeof (shs));
+	MD_SETDRIVERNAME(&shs, MD_HOTSPARES, sp->setno);
+	shs.shs_cmd = FIX_HOT_SPARE;
+	shs.shs_options = HS_OPT_NONE;
+	/* If DOIT is not set, it's a dryrun */
+	if ((options & MDCMD_DOIT) == 0) {
+		shs.shs_options |= HS_OPT_DRYRUN;
+	}
+
+	/* get the list of hotspare names */
+	if (meta_get_hsp_names(sp, &hspnlp, 0, ep) < 0)
+		goto out;
+
+	/* enable hotspares for each components */
+	for (; (hsnlp != NULL); hsnlp = hsnlp->next) {
+		mdname_t	*hsnp = hsnlp->namep;
+		md_dev64_t	fs_dev;
+		int		rebind = 0;
+		diskaddr_t	size, label, start_blk;
+
+		/* get the file_system dev binding */
+		if (meta_getdev(sp, hsnp, ep) != 0)
+			return (-1);
+		fs_dev = hsnp->dev;
+
+		/*
+		 * search for the component in each hotspare pool
+		 * and replace it (instead of enable) if the binding
+		 * has changed.
+		 */
+		for (hspnp = hspnlp; (hspnp != NULL); hspnp = hspnp->next) {
+			/*
+			 * in_hsp will call meta_get_hsp which will fill
+			 * in hspnp with metadb version of component
+			 */
+			meta_invalidate_hsp(hspnp->hspnamep);
+			if (in_hsp(sp, hspnp->hspnamep, hsnp, 0, -1, ep) != 0) {
+				/*
+				 * check for the case where the dev_t has
+				 * changed between the filesystem and the
+				 * metadb.  This is called a rebind, and
+				 * is handled by meta_hs_replace.
+				 */
+				if (fs_dev != hsnp->dev) {
+					/*
+					 * establish file system binding
+					 * with invalid start/end
+					 */
+					rebind++;
+					hsnp->dev = fs_dev;
+					hsnp->start_blk = -1;
+					hsnp->end_blk = -1;
+					rval = meta_hs_replace(sp,
+					    hspnp->hspnamep,
+					    hsnp, hsnp, options, ep);
+					if (rval != 0)
+						goto out;
+				}
+			}
+		}
+		if (rebind)
+			continue;
+
+		/* enable the component in all hotspares that use it */
+		if (meta_check_hotspare(sp, hsnp, ep) != 0)
+			goto out;
+
+		if ((size = metagetsize(hsnp, ep)) == MD_DISKADDR_ERROR)
+			goto out;
+		if ((label = metagetlabel(hsnp, ep)) == MD_DISKADDR_ERROR)
+			goto out;
+		if ((start_blk = metagetstart(sp, hsnp, ep))
+		    == MD_DISKADDR_ERROR)
+			goto out;
+		if (start_blk >= size) {
+			(void) mdsyserror(ep, ENOSPC, hsnp->cname);
+			goto out;
+		}
+
+		/* enable hotspare */
+		shs.shs_component_old = hsnp->dev;
+		shs.shs_component_new = hsnp->dev;
+		shs.shs_start_blk = start_blk;
+		shs.shs_has_label = ((label > 0) ? 1 : 0);
+		shs.shs_number_blks = size;
+		if (metaioctl(MD_IOCSET_HS, &shs, &shs.mde, hsnp->cname) != 0) {
+			rval = mdstealerror(ep, &shs.mde);
+			goto out;
+		}
+
+		/*
+		 * Are we dealing with a non-local set? If so need to update
+		 * the local namespace so that the disk record has the correct
+		 * devid.
+		 */
+		if (!metaislocalset(sp)) {
+			rval = meta_fixdevid(sp, DEV_UPDATE|DEV_LOCAL_SET,
+			    hsnp->cname, ep);
+
+			if (rval != METADEVADM_SUCCESS) {
+				/*
+				 * Failed to update the local set. Nothing to
+				 * do here apart from report the error. The
+				 * namespace is most likely broken and some
+				 * form of remedial recovery is going to
+				 * be required.
+				 */
+				mde_perror(ep, "");
+				mdclrerror(ep);
+			}
+		}
+
+		/* clear cache */
+		meta_invalidate_name(hsnp);
+
+		/* let em know */
+		if (options & MDCMD_PRINT) {
+			(void) printf(dgettext(TEXT_DOMAIN,
+			    "hotspare %s is enabled\n"),
+			    hsnp->cname);
+			(void) fflush(stdout);
+		}
+	}
+
+	/* clear whole cache */
+	for (hspnp = hspnlp; (hspnp != NULL); hspnp = hspnp->next) {
+		meta_invalidate_hsp(hspnp->hspnamep);
+	}
+
+
+	/* return success */
+	rval = 0;
+
+out:
+	if (hspnlp)
+		metafreehspnamelist(hspnlp);
+	return (rval);
+}
+
+/*
+ * check for dups in the hsp itself
+ */
+static int
+check_twice(
+	md_hsp_t	*hspp,
+	uint_t		hsi,
+	md_error_t	*ep
+)
+{
+	mdhspname_t	*hspnp = hspp->hspnamep;
+	mdname_t	*thisnp;
+	uint_t		h;
+
+	thisnp = hspp->hotspares.hotspares_val[hsi].hsnamep;
+	for (h = 0; (h < hsi); ++h) {
+		md_hs_t		*hsp = &hspp->hotspares.hotspares_val[h];
+		mdname_t	*hsnp = hsp->hsnamep;
+
+		if (meta_check_overlap(hspnp->hspname, thisnp, 0, -1,
+		    hsnp, 0, -1, ep) != 0)
+			return (-1);
+	}
+	return (0);
+}
+
+/*
+ * check hsp
+ */
+/*ARGSUSED2*/
+int
+meta_check_hsp(
+	mdsetname_t	*sp,
+	md_hsp_t	*hspp,
+	mdcmdopts_t	options,
+	md_error_t	*ep
+)
+{
+	mdhspname_t	*hspnp = hspp->hspnamep;
+	uint_t		hsi;
+
+	/* check hotspares */
+	for (hsi = 0; (hsi < hspp->hotspares.hotspares_len); ++hsi) {
+		md_hs_t		*hsp = &hspp->hotspares.hotspares_val[hsi];
+		mdname_t	*hsnp = hsp->hsnamep;
+		diskaddr_t	size;
+
+		/* check hotspare */
+		if (meta_check_hotspare(sp, hsnp, ep) != 0)
+			return (-1);
+		if ((size = metagetsize(hsnp, ep)) == MD_DISKADDR_ERROR) {
+			return (-1);
+		} else if (size == 0) {
+			return (mdsyserror(ep, ENOSPC, hspnp->hspname));
+		}
+
+		/* check this hsp too */
+		if (check_twice(hspp, hsi, ep) != 0)
+			return (-1);
+	}
+
+	/* return success */
+	return (0);
+}
+
+/*
+ * create hsp
+ */
+int
+meta_create_hsp(
+	mdsetname_t	*sp,
+	md_hsp_t	*hspp,
+	mdcmdopts_t	options,
+	md_error_t	*ep
+)
+{
+	mdhspname_t	*hspnp = hspp->hspnamep;
+	mdnamelist_t	*hsnlp = NULL;
+	uint_t		hsi;
+	int		rval = -1;
+
+	/* validate hsp */
+	if (meta_check_hsp(sp, hspp, options, ep) != 0)
+		return (-1);
+
+	/* if we're not doing anything, return success */
+	if (! (options & MDCMD_DOIT))
+		return (0);
+
+	/* create hsp */
+	for (hsi = 0; (hsi < hspp->hotspares.hotspares_len); ++hsi) {
+		md_hs_t		*hsp = &hspp->hotspares.hotspares_val[hsi];
+		mdname_t	*hsnp = hsp->hsnamep;
+
+		(void) metanamelist_append(&hsnlp, hsnp);
+	}
+	options |= MDCMD_INIT;
+	rval = meta_hs_add(sp, hspnp, hsnlp, options, ep);
+
+	/* cleanup, return success */
+	metafreenamelist(hsnlp);
+	return (rval);
+}
+
+/*
+ * initialize hsp
+ * NOTE: this functions is metainit(1m)'s command line parser!
+ */
+int
+meta_init_hsp(
+	mdsetname_t	**spp,
+	int		argc,
+	char		*argv[],
+	mdcmdopts_t	options,
+	md_error_t	*ep
+)
+{
+	char		*uname = argv[0];
+	mdhspname_t	*hspnp = NULL;
+	md_hsp_t	*hspp = NULL;
+	uint_t		hsi;
+	int		rval = -1;
+
+
+	/* get hsp name */
+	assert(argc > 0);
+	if (argc < 1)
+		goto syntax;
+	if ((hspnp = metahspname(spp, uname, ep)) == NULL)
+		goto out;
+	assert(*spp != NULL);
+	uname = hspnp->hspname;
+
+	if (!(options & MDCMD_NOLOCK)) {
+		/* grab set lock */
+		if (meta_lock(*spp, TRUE, ep))
+			goto out;
+
+		if (meta_check_ownership(*spp, ep) != 0)
+			goto out;
+	}
+
+	/* see if it exists already */
+	if (meta_get_hsp(*spp, hspnp, ep) != NULL) {
+		(void) mdhsperror(ep, MDE_HSP_ALREADY_SETUP, hspnp->hsp, uname);
+		goto out;
+	} else if (! mdishsperror(ep, MDE_INVAL_HSP)) {
+		goto out;
+	} else {
+		mdclrerror(ep);
+	}
+	--argc, ++argv;
+
+	/* parse general options */
+	optind = 0;
+	opterr = 0;
+	if (getopt(argc, argv, "") != -1)
+		goto options;
+
+	/* allocate hsp */
+	hspp = Zalloc(sizeof (*hspp));
+	hspp->hotspares.hotspares_len = argc;
+	if (argc > 0) {
+		hspp->hotspares.hotspares_val =
+		    Zalloc(argc * sizeof (*hspp->hotspares.hotspares_val));
+	}
+
+	/* setup pool */
+	hspp->hspnamep = hspnp;
+
+	/* parse hotspares */
+	for (hsi = 0; ((argc > 0) && (hsi < hspp->hotspares.hotspares_len));
+	    ++hsi) {
+		md_hs_t		*hsp = &hspp->hotspares.hotspares_val[hsi];
+		mdname_t	*hsnamep;
+
+		/* parse hotspare name */
+		if ((hsnamep = metaname(spp, argv[0], ep)) == NULL)
+			goto out;
+		hsp->hsnamep = hsnamep;
+		--argc, ++argv;
+	}
+
+	/* we should be at the end */
+	if (argc != 0)
+		goto syntax;
+
+	/* create hotspare pool */
+	if (meta_create_hsp(*spp, hspp, options, ep) != 0)
+		goto out;
+	rval = 0;	/* success */
+	goto out;
+
+	/* syntax error */
+syntax:
+	rval = meta_cook_syntax(ep, MDE_SYNTAX, uname, argc, argv);
+	goto out;
+
+	/* options error */
+options:
+	rval = meta_cook_syntax(ep, MDE_OPTION, uname, argc, argv);
+	goto out;
+
+	/* cleanup, return error */
+out:
+	if (hspp != NULL)
+		meta_free_hsp(hspp);
+	return (rval);
+}
+
+/*
+ * reset hotspare pool
+ */
+int
+meta_hsp_reset(
+	mdsetname_t	*sp,
+	mdhspname_t	*hspnp,
+	mdcmdopts_t	options,
+	md_error_t	*ep
+)
+{
+	md_hsp_t	*hspp;
+	set_hs_params_t	shs;
+	uint_t		i;
+	int		rval = -1;
+
+	/* should have the same set */
+	assert(sp != NULL);
+	assert((hspnp == NULL) || (sp->setno == HSP_SET(hspnp->hsp)));
+
+	/* reset all hotspares */
+	if (hspnp == NULL) {
+		mdhspnamelist_t	*hspnlp = NULL;
+		mdhspnamelist_t	*p;
+
+		/* for each hotspare pool */
+		rval = 0;
+		if (meta_get_hsp_names(sp, &hspnlp, 0, ep) < 0)
+			return (-1);
+		for (p = hspnlp; (p != NULL); p = p->next) {
+			/* reset hotspare pool */
+			hspnp = p->hspnamep;
+
+			/*
+			 * If this is a multi-node set, we send a series
+			 * of individual metaclear commands.
+			 */
+			if (meta_is_mn_set(sp, ep)) {
+				if (meta_mn_send_metaclear_command(sp,
+				    hspnp->hspname, options, 0, ep) != 0) {
+					rval = -1;
+					break;
+				}
+			} else {
+				if (meta_hsp_reset(sp, hspnp, options,
+				    ep) != 0) {
+					rval = -1;
+					break;
+				}
+			}
+		}
+
+		/* cleanup, return success */
+		metafreehspnamelist(hspnlp);
+		return (rval);
+	}
+
+	/* get unit structure */
+	if ((hspp = meta_get_hsp(sp, hspnp, ep)) == NULL)
+		return (-1);
+
+	/* make sure nobody owns us */
+	if (hspp->refcount > 0) {
+		return (mdhsperror(ep, MDE_HSP_IN_USE, hspnp->hsp,
+		    hspnp->hspname));
+	}
+
+	/* clear hotspare pool members */
+	(void) memset(&shs, 0, sizeof (shs));
+	MD_SETDRIVERNAME(&shs, MD_HOTSPARES, sp->setno);
+	shs.shs_cmd = DELETE_HOT_SPARE;
+	shs.shs_hot_spare_pool = hspnp->hsp;
+	for (i = 0; (i < hspp->hotspares.hotspares_len); ++i) {
+		md_hs_t		*hs = &hspp->hotspares.hotspares_val[i];
+		mdname_t	*hsnamep = hs->hsnamep;
+
+		/* clear cache */
+		meta_invalidate_name(hsnamep);
+
+		/* clear hotspare */
+		shs.shs_component_old = hsnamep->dev;
+		shs.shs_options = HS_OPT_FORCE;
+		/* If DOIT is not set, it's a dryrun */
+		if ((options & MDCMD_DOIT) == 0) {
+			shs.shs_options |= HS_OPT_DRYRUN;
+		}
+		if (metaioctl(MD_IOCSET_HS, &shs, &shs.mde, NULL) != 0) {
+			(void) mdstealerror(ep, &shs.mde);
+			goto out;
+		}
+	}
+
+	/* clear hotspare pool */
+	shs.shs_options = HS_OPT_POOL;
+	/* If DOIT is not set, it's a dryrun */
+	if ((options & MDCMD_DOIT) == 0) {
+		shs.shs_options |= HS_OPT_DRYRUN;
+	}
+	if (metaioctl(MD_IOCSET_HS, &shs, &shs.mde, hspnp->hspname) != 0) {
+		(void) mdstealerror(ep, &shs.mde);
+		goto out;
+	}
+	rval = 0;	/* success */
+
+	/* let em know */
+	if (options & MDCMD_PRINT) {
+		(void) printf(dgettext(TEXT_DOMAIN,
+		    "%s: Hotspare pool is cleared\n"),
+		    hspnp->hspname);
+		(void) fflush(stdout);
+	}
+
+	/* clear subdevices (nothing to do) */
+
+	/* cleanup, return success */
+out:
+	meta_invalidate_hsp(hspnp);
+	return (rval);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_import.c b/usr/src/lib/lvm/libmeta/common/meta_import.c
new file mode 100644
index 0000000000..ec8819794c
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_import.c
@@ -0,0 +1,2179 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include <assert.h>
+#include <ctype.h>
+#include <libdevinfo.h>
+#include <mdiox.h>
+#include <meta.h>
+#include "meta_repartition.h"
+#include "meta_set_prv.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/lvm/md_mddb.h>
+#include <sys/lvm/md_names.h>
+#include <sys/lvm/md_crc.h>
+
+typedef struct did_list {
+	void		*rdid;	/* real did if replicated set */
+	void		*did;	/* did stored in lb */
+	char		*devname;
+	dev_t		dev;
+	uint_t		did_index;
+	char		*minor_name;
+	struct did_list	*next;
+} did_list_t;
+
+typedef struct replicated_disk {
+	void			*old_devid;
+	void 			*new_devid;
+	struct replicated_disk	*next;
+} replicated_disk_t;
+
+/*
+ * The current implementation limits the max device id length to 256 bytes.
+ * Should the max device id length be increased, this define would have to
+ * be bumped up accordingly
+ */
+#define	MAX_DEVID_LEN		256
+
+/*
+ * We store a global list of all the replicated disks in the system. In
+ * order to prevent us from performing a linear search on this list, we
+ * store the disks in a two dimensional sparse array. The disks are bucketed
+ * based on the length of their device ids.
+ */
+static replicated_disk_t *replicated_disk_list[MAX_DEVID_LEN + 1] = {NULL};
+
+/*
+ * The list of replicated disks is built just once and this flag is set
+ * once it's done
+ */
+static int replicated_disk_list_built = 0;
+
+/*
+ * Map logical blk to physical
+ *
+ * This is based on the routine of the same name in the md kernel module (see
+ * file md_mddb.c), with the following caveats:
+ *
+ * - The kernel routine works on in core master blocks, or mddb_mb_ic_t; this
+ * routine works instead on the mddb_mb_t read directly from the disk
+ */
+static daddr_t
+getphysblk(
+	mddb_block_t	blk,
+	mddb_mb_t	*mbp
+)
+{
+	/*
+	 * Sanity check: is the block within range?  If so, we then assume
+	 * that the block range map in the master block is valid and
+	 * consistent with the block count.  Unfortunately, there is no
+	 * reliable way to validate this assumption.
+	 */
+	if (blk >= mbp->mb_blkcnt || blk >= mbp->mb_blkmap.m_consecutive)
+		return ((daddr_t)-1);
+
+	return (mbp->mb_blkmap.m_firstblk + blk);
+}
+
+
+
+/*
+ * drive_append()
+ *
+ * Append to tail of linked list of md_im_drive_info_t.
+ *
+ * Will allocate space for new node and copy args into new space.
+ *
+ * Returns pointer to new node.
+ */
+static md_im_drive_info_t *
+drive_append(
+	md_im_drive_info_t	**midpp,
+	mddrivename_t		*dnp,
+	void			*devid,
+	void			*rdevid,
+	int			devid_sz,
+	char			*minor_name,
+	md_timeval32_t		timestamp,
+	md_im_replica_info_t	*mirp
+)
+{
+	md_im_drive_info_t	*midp;
+	int			o_devid_sz;
+
+	for (; (*midpp != NULL); midpp = &((*midpp)->mid_next))
+		;
+
+	midp = *midpp = Zalloc(sizeof (md_im_drive_info_t));
+
+	midp->mid_dnp = dnp;
+
+	/*
+	 * If rdevid is not NULL then we know we are dealing with
+	 * replicated diskset case. 'devid_sz' will always be the
+	 * size of a valid devid which can be 'devid' or 'rdevid'
+	 */
+	midp->mid_devid = (void *)Malloc(devid_sz);
+
+	if (rdevid) {
+		(void) memcpy(midp->mid_devid, rdevid, devid_sz);
+		/*
+		 * Also need to store the 'other' devid
+		 */
+		o_devid_sz = devid_sizeof((ddi_devid_t)devid);
+		midp->mid_o_devid = (void *)Malloc(o_devid_sz);
+		(void) memcpy(midp->mid_o_devid, devid, o_devid_sz);
+		midp->mid_o_devid_sz = o_devid_sz;
+	} else {
+		/*
+		 * In the case of regular diskset, midp->mid_o_devid
+		 * will be a NULL pointer
+		 */
+		(void) memcpy(midp->mid_devid, devid, devid_sz);
+	}
+
+	midp->mid_devid_sz = devid_sz;
+	midp->mid_setcreatetimestamp = timestamp;
+	(void) strlcpy(midp->mid_minor_name, minor_name, MDDB_MINOR_NAME_MAX);
+	midp->mid_replicas = mirp;
+
+	return (midp);
+}
+
+
+
+/*
+ * drive_append_wrapper()
+ *
+ * Constant time append wrapper; the append function will always walk the list,
+ * this will take a tail argument and use the append function on just the tail
+ * node, doing the appropriate old-tail-next-pointer bookkeeping.
+ */
+static md_im_drive_info_t **
+drive_append_wrapper(
+	md_im_drive_info_t	**tailpp,
+	mddrivename_t		*dnp,
+	void 			*devid,
+	void			*rdevid,
+	int			devid_sz,
+	char			*minor_name,
+	md_timeval32_t		timestamp,
+	md_im_replica_info_t	*mirp
+)
+{
+	(void) drive_append(tailpp, dnp, devid, rdevid, devid_sz, minor_name,
+		timestamp, mirp);
+
+	if ((*tailpp)->mid_next == NULL)
+		return (tailpp);
+
+	return (&((*tailpp)->mid_next));
+}
+
+
+
+/*
+ * replica_append()
+ *
+ * Append to tail of linked list of md_im_replica_info_t.
+ *
+ * Will allocate space for new node and copy args into new space.
+ *
+ * Returns pointer to new node.
+ */
+static md_im_replica_info_t *
+replica_append(
+	md_im_replica_info_t	**mirpp,
+	int			flags,
+	daddr32_t		offset,
+	daddr32_t		length,
+	md_timeval32_t		timestamp
+)
+{
+	md_im_replica_info_t	*mirp;
+
+	for (; (*mirpp != NULL); mirpp = &((*mirpp)->mir_next))
+		;
+
+	mirp = *mirpp = Zalloc(sizeof (md_im_replica_info_t));
+
+	mirp->mir_flags = flags;
+	mirp->mir_offset = offset;
+	mirp->mir_length = length;
+	mirp->mir_timestamp = timestamp;
+
+	return (mirp);
+
+}
+
+
+
+/*
+ * replica_append_wrapper()
+ *
+ * Constant time append wrapper; the append function will always walk the list,
+ * this will take a tail argument and use the append function on just the tail
+ * node, doing the appropriate old-tail-next-pointer bookkeeping.
+ */
+static md_im_replica_info_t **
+replica_append_wrapper(
+	md_im_replica_info_t	**tailpp,
+	int			flags,
+	daddr32_t		offset,
+	daddr32_t		length,
+	md_timeval32_t		timestamp
+)
+{
+	(void) replica_append(tailpp, flags, offset, length, timestamp);
+
+	if ((*tailpp)->mir_next == NULL)
+		return (tailpp);
+
+	return (&(*tailpp)->mir_next);
+}
+
+/*
+ * map_replica_disk()
+ *
+ * Searches the device id list for a specific
+ * disk based on the locator block device id array index.
+ *
+ * Returns a pointer to the did_list node if a match was
+ * found or NULL otherwise.
+ */
+static did_list_t *
+map_replica_disk(
+	did_list_t	*did_listp,
+	int		did_index
+)
+{
+	did_list_t	*tailp = did_listp;
+
+	while (tailp != NULL) {
+		if (tailp->did_index == did_index)
+			return (tailp);
+		tailp = tailp->next;
+	}
+
+	/* not found, return failure */
+	return (NULL);
+}
+
+/*
+ * replicated_list_lookup()
+ *
+ * looks up a replicated disk entry in the global replicated disk list
+ * based upon the length of that disk's device id. returns the new device id
+ * for the disk.
+ * If you store the returned devid you must create a local copy.
+ */
+static void *
+replicated_list_lookup(
+	uint_t	devid_len,
+	void	*old_devid
+)
+{
+	replicated_disk_t *head = NULL;
+
+	assert(devid_len <= MAX_DEVID_LEN);
+	head = replicated_disk_list[devid_len];
+
+	if (head == NULL)
+		return (NULL);
+
+	do {
+		if (devid_compare((ddi_devid_t)old_devid,
+			(ddi_devid_t)head->old_devid) == 0)
+			return (head->new_devid);
+		head = head->next;
+	} while (head != NULL);
+
+	return (NULL);
+}
+
+/*
+ * replicated_list_insert()
+ *
+ * inserts a replicated disk entry into the global replicated disk list
+ */
+static void
+replicated_list_insert(
+	size_t	old_devid_len,
+	void	*old_devid,
+	void	*new_devid
+)
+{
+	replicated_disk_t	*repl_disk, **first_entry;
+	void			*repl_old_devid = NULL;
+
+	assert(old_devid_len <= MAX_DEVID_LEN);
+
+	repl_disk = Zalloc(sizeof (replicated_disk_t));
+	repl_old_devid = Zalloc(old_devid_len);
+	(void) memcpy(repl_old_devid, (void *)old_devid, old_devid_len);
+
+	repl_disk->old_devid = repl_old_devid;
+	repl_disk->new_devid = new_devid;
+
+	first_entry = &replicated_disk_list[old_devid_len];
+
+	if (*first_entry == NULL) {
+		*first_entry = repl_disk;
+		return;
+	}
+
+	repl_disk->next = *first_entry;
+	replicated_disk_list[old_devid_len] = repl_disk;
+}
+
+/*
+ * get_replica_disks()
+ *
+ * Will step through the locator records in the supplied locator block, and add
+ * each one with an active replica to a supplied list of md_im_drive_info_t, and
+ * add the appropriate replicas to the md_im_replica_info_t contained therein.
+ */
+static void
+get_replica_disks(
+	md_im_set_desc_t	*misp,
+	did_list_t		*did_listp,
+	mddb_mb_t		*mb,
+	mddb_lb_t		*lbp,
+	md_error_t		*ep,
+	int			replicated
+)
+{
+	mddrivename_t		*dnp;
+	int			indx, on_list;
+	mdsetname_t		*sp = metasetname(MD_LOCAL_NAME, ep);
+	int			flags;
+	int			devid_sz;
+	char			*minor_name;
+	did_list_t		*replica_disk;
+	daddr32_t		offset;
+	daddr32_t		length;
+	md_timeval32_t		timestamp;
+	md_im_replica_info_t	**mirpp = NULL;
+	md_im_drive_info_t	**midpp = &misp->mis_drives;
+	md_im_drive_info_t	*midp;
+	void			*did;
+
+	for (indx = 0; indx < lbp->lb_loccnt; indx++) {
+
+		on_list = 0;
+		if (lbp->lb_locators[indx].l_flags & MDDB_F_ACTIVE) {
+
+			/*
+			 * search the device id list for a
+			 * specific ctds based on the locator
+			 * block device id array index.
+			 */
+			replica_disk = map_replica_disk(did_listp, indx);
+
+			assert(replica_disk != NULL);
+
+
+			/*
+			 * metadrivename() can fail for a slice name
+			 * if there is not an existing mddrivename_t.
+			 * So we use metadiskname() to strip the slice
+			 * number.
+			 */
+			dnp = metadrivename(&sp,
+			    metadiskname(replica_disk->devname), ep);
+
+			for (midp = misp->mis_drives; midp != NULL;
+				midp = midp->mid_next) {
+				if (dnp == midp->mid_dnp) {
+					on_list = 1;
+					mirpp = &midp->mid_replicas;
+					break;
+				}
+			}
+
+			/*
+			 * Get the correct devid_sz
+			 */
+			if (replicated)
+				did = replica_disk->rdid;
+			else
+				did = replica_disk->did;
+
+			devid_sz = devid_sizeof((ddi_devid_t)did);
+			minor_name = replica_disk->minor_name;
+
+			/*
+			 * New on the list so add it
+			 */
+			if (!on_list) {
+				mddb_mb_t	*mbp;
+				uint_t		sliceno;
+				mdname_t	*rsp;
+				int		fd = -1;
+
+				mbp = Malloc(DEV_BSIZE);
+
+				/* determine the replica slice */
+				if (meta_replicaslice(dnp, &sliceno,
+				    ep) != 0) {
+					Free(mbp);
+					continue;
+				}
+
+				/*
+				 * if the replica slice size is zero,
+				 * don't bother opening
+				 */
+				if (dnp->vtoc.parts[sliceno].size == 0) {
+					Free(mbp);
+					continue;
+				}
+
+				if ((rsp = metaslicename(dnp, sliceno,
+				    ep)) == NULL) {
+					Free(mbp);
+					continue;
+				}
+
+				if ((fd = open(rsp->rname,
+				    O_RDONLY| O_NDELAY)) < 0) {
+					Free(mbp);
+					continue;
+				}
+
+				/*
+				 * a drive may not have a master block
+				 */
+				if (read_master_block(ep, fd, mbp,
+				    DEV_BSIZE) <= 0) {
+					mdclrerror(ep);
+					Free(mbp);
+					(void) close(fd);
+					continue;
+				}
+
+				(void) close(fd);
+				midpp = drive_append_wrapper(midpp, dnp,
+				    replica_disk->did, replica_disk->rdid,
+				    devid_sz, minor_name, mbp->mb_setcreatetime,
+				    NULL);
+				mirpp = &((*midpp)->mid_replicas);
+				Free(mbp);
+			}
+
+			/*
+			 * For either of these assertions to fail, it implies
+			 * a NULL return from metadrivename() above.  Since
+			 * the args came from a presumed valid locator block,
+			 * that's Bad.
+			 */
+			assert(midpp != NULL);
+			assert(mirpp != NULL);
+
+			/*
+			 * Extract the parameters describing this replica.
+			 *
+			 * The magic "1" in the length calculation accounts
+			 * for the length of the master block, in addition to
+			 * the block count it describes.  (The master block
+			 * will always take up one block on the disk, and
+			 * there will always only be one master block per
+			 * replica, even though much of the code is structured
+			 * to handle noncontiguous replicas.)
+			 */
+			flags = lbp->lb_locators[indx].l_flags;
+			offset = lbp->lb_locators[indx].l_blkno;
+			length = mb->mb_blkcnt + 1;
+			timestamp = mb->mb_setcreatetime;
+
+			mirpp = replica_append_wrapper(mirpp, flags,
+				offset, length, timestamp);
+
+			/*
+			 * If we're here it means -
+			 *
+			 * a) we had an active copy of the replica, and
+			 * b) we've added the disk to the list of
+			 *    disks as well.
+			 *
+			 * We need to bump up the number of active
+			 * replica count for each such replica so that it
+			 * can be used later for replica quorum check.
+			 */
+			misp->mis_active_replicas++;
+		}
+	}
+}
+
+
+
+/*
+ * get_nonreplica_disks()
+ *
+ * Extracts the disks without replicas from the locator name space and adds them
+ * to the supplied list of md_im_drive_info_t.
+ */
+static void
+get_nonreplica_disks(
+	md_im_set_desc_t	*misp,
+	mddb_rb_t		*did_nm,
+	mddb_rb_t		*did_shrnm,
+	md_error_t		*ep,
+	int			replicated
+)
+{
+	char			*search_path = "/dev";
+	devid_nmlist_t		*nmlist;
+	md_im_drive_info_t	*midp, **midpp = &misp->mis_drives;
+	mddrivename_t		*dnp;
+	mdsetname_t		*sp = metasetname(MD_LOCAL_NAME, ep);
+	mddb_rb_t		*rbp_did = did_nm;
+	mddb_rb_t		*rbp_did_shr = did_shrnm;
+	int			on_list = 0;
+	int			devid_sz;
+	struct devid_min_rec	*did_rec;
+	struct devid_shr_rec	*did_shr_rec;
+	struct did_shr_name	*did;
+	struct did_min_name	*min;
+	void			*r_did;	/* NULL if not a replicated diskset */
+	void			*valid_did;
+
+	/*
+	 * We got a pointer to an mddb record, which we expect to contain a
+	 * name record; extract the pointer thereto.
+	 */
+	/* LINTED */
+	did_rec = (struct devid_min_rec *)((caddr_t)(&rbp_did->rb_data));
+	/* LINTED */
+	did_shr_rec = (struct devid_shr_rec *)
+	    ((caddr_t)(&rbp_did_shr->rb_data));
+
+	/*
+	 * Skip the nm_rec_hdr and iterate on the array of struct minor_name
+	 * at the end of the devid_min_rec
+	 */
+	for (min = &did_rec->minor_name[0]; min->min_devid_key != 0;
+	    /* LINTED */
+	    min = (struct did_min_name *)((char *)min + DID_NAMSIZ(min))) {
+
+		on_list = 0;
+		r_did = NULL;
+
+		/*
+		 * For a give DID_NM key, locate the corresponding device
+		 * id from DID_NM_SHR
+		 */
+		for (did = &did_shr_rec->device_id[0]; did->did_key != 0;
+		    /* LINTED */
+		    did = (struct did_shr_name *)
+		    ((char *)did + DID_SHR_NAMSIZ(did))) {
+			/*
+			 * We got a match, this is the device id we're
+			 * looking for
+			 */
+			if (min->min_devid_key == did->did_key)
+				break;
+		}
+
+		if (did->did_key == 0) {
+			/* we didn't find a match */
+			assert(did->did_key != 0);
+			md_exit(NULL, 1);
+		}
+
+		/*
+		 * If replicated diskset
+		 */
+		if (replicated) {
+			size_t		new_devid_len;
+			char		*temp;
+			/*
+			 * In this case, did->did_devid will
+			 * be invalid so lookup the real one
+			 */
+			temp = replicated_list_lookup(did->did_size,
+			    did->did_devid);
+			new_devid_len = devid_sizeof((ddi_devid_t)temp);
+			r_did = Zalloc(new_devid_len);
+			(void) memcpy(r_did, temp, new_devid_len);
+			valid_did = r_did;
+		} else {
+			valid_did = did->did_devid;
+		}
+
+		/* Get the ctds mapping for that device id */
+		if (meta_deviceid_to_nmlist(search_path,
+		    (ddi_devid_t)valid_did,
+		    &min->min_name[0], &nmlist) == 0) {
+
+			assert(nmlist->devname != NULL);
+			/* Don't bother with metadevices, but track disks */
+			if (!is_metaname(nmlist->devname)) {
+				dnp = metadrivename(&sp,
+				    metadiskname(nmlist->devname), ep);
+
+				assert(dnp != NULL);
+				/* Is it already on the list? */
+				for (midp = misp->mis_drives; midp != NULL;
+				    midp = midp->mid_next) {
+					if (midp->mid_dnp == dnp) {
+						on_list = 1;
+						break;
+					}
+				}
+
+				devid_sz = devid_sizeof(
+				    (ddi_devid_t)valid_did);
+
+				if (!on_list) {
+					mddb_mb_t	*mbp;
+					uint_t		sliceno;
+					mdname_t	*rsp;
+					int		fd = -1;
+
+					mbp = Malloc(DEV_BSIZE);
+
+					/* determine the replica slice */
+					if (meta_replicaslice(dnp, &sliceno,
+					    ep) != 0) {
+						Free(mbp);
+						continue;
+					}
+
+					/*
+					 * if the replica slice size is zero,
+					 * don't bother opening
+					 */
+					if (dnp->vtoc.parts[sliceno].size
+					    == 0) {
+						Free(mbp);
+						continue;
+					}
+
+					if ((rsp = metaslicename(dnp, sliceno,
+					    ep)) == NULL) {
+						Free(mbp);
+						continue;
+					}
+
+					if ((fd = open(rsp->rname,
+					    O_RDONLY| O_NDELAY)) < 0) {
+						Free(mbp);
+						continue;
+					}
+
+					/*
+					 * a drive may not have a master block
+					 */
+					if (read_master_block(ep, fd, mbp,
+					    DEV_BSIZE) <= 0) {
+						mdclrerror(ep);
+						Free(mbp);
+						(void) close(fd);
+						continue;
+					}
+
+					(void) close(fd);
+					/*
+					 * If it is replicated diskset,
+					 * r_did will be non-NULL and
+					 * devid_sz will be its size
+					 */
+					midpp = drive_append_wrapper(midpp,
+					    dnp, &did->did_devid, r_did,
+					    devid_sz, &min->min_name[0],
+					    mbp->mb_setcreatetime, NULL);
+					Free(mbp);
+				}
+			}
+			devid_free_nmlist(nmlist);
+		}
+	}
+}
+
+/*
+ * set_append()
+ *
+ * Append to tail of linked list of md_im_set_desc_t.
+ *
+ * Will allocate space for new node AND populate it by extracting disks with
+ * and without replicas from the locator blocks and locator namespace.
+ *
+ * Returns pointer to new node.
+ */
+static md_im_set_desc_t *
+set_append(
+	md_im_set_desc_t	**mispp,
+	did_list_t		*did_listp,
+	mddb_mb_t		*mb,
+	mddb_lb_t		*lbp,
+	mddb_rb_t		*nm,
+	mddb_rb_t		*did_nm,
+	mddb_rb_t		*did_shrnm,
+	md_error_t		*ep,
+	int			replicated
+)
+{
+	md_im_set_desc_t	*misp;
+	set_t			setno = mb->mb_setno;
+
+	/* run to end of list */
+	for (; (*mispp != NULL); mispp = &((*mispp)->mis_next))
+		;
+
+	/* allocate new list element */
+	misp = *mispp = Zalloc(sizeof (md_im_set_desc_t));
+
+	if (replicated)
+		misp->mis_flags = MD_IM_SET_REPLICATED;
+
+	misp->mis_oldsetno = setno;
+
+	/* Get the disks with and without replicas */
+	get_replica_disks(misp, did_listp, mb, lbp, ep, replicated);
+
+	if (nm != NULL && did_nm != NULL && did_shrnm != NULL) {
+		get_nonreplica_disks(misp, did_nm, did_shrnm, ep, replicated);
+	}
+
+	/*
+	 * An error in this struct could come from either of the above routines;
+	 * in both cases, we want to pass it back on up.
+	 */
+	return (misp);
+}
+
+
+
+/*
+ * set_append_wrapper()
+ *
+ * Constant time append wrapper; the append function will always walk the list,
+ * this will take a tail argument and use the append function on just the tail
+ * node, doing the appropriate old-tail-next-pointer bookkeeping.
+ */
+static md_im_set_desc_t **
+set_append_wrapper(
+	md_im_set_desc_t	**tailpp,
+	did_list_t		*did_listp,
+	mddb_mb_t		*mb,
+	mddb_lb_t		*lbp,
+	mddb_rb_t		*nm,
+	mddb_rb_t		*did_nm,
+	mddb_rb_t		*did_shrnm,
+	md_error_t		*ep,
+	int			replicated
+)
+{
+	(void) set_append(tailpp, did_listp, mb, lbp, nm, did_nm,
+	    did_shrnm, ep, replicated);
+
+	/* it's the first item in the list, return it instead of the next */
+	return (((*tailpp)->mis_next == NULL) ? tailpp : &(*tailpp)->mis_next);
+}
+
+
+
+/*
+ * add_disk_names()
+ *
+ * Iterator to walk the minor node tree of the device snapshot, adding only the
+ * first non-block instance of each non-cdrom minor node to a list of disks.
+ */
+static int
+add_disk_names(di_node_t node, di_minor_t minor, void *args)
+{
+	char			*search_path = "/dev";
+	ddi_devid_t		devid = di_devid(node);
+	devid_nmlist_t		*nm;
+	char			*min = di_minor_name(minor);
+	md_im_names_t		*cnames = (md_im_names_t *)args;
+	static di_node_t	save_node = NULL;
+
+	/*
+	 * skip CD devices
+	 * If a device does not have a device id, we can't
+	 * do anything with it so just exclude it from our
+	 * list.
+	 *
+	 * This would also encompass CD devices and floppy
+	 * devices that don't have a device id.
+	 */
+	if (devid == NULL) {
+		return (DI_WALK_CONTINUE);
+	}
+
+	/* char disk devices (as opposed to block) */
+	if (di_minor_spectype(minor) == S_IFCHR) {
+
+		/* only first occurrence (slice 0) of each instance */
+		if (save_node == NULL || node != save_node) {
+			save_node = node;
+			if (meta_deviceid_to_nmlist(search_path, devid,
+			    min, &nm) == 0) {
+				int	index = cnames->min_count++;
+
+				assert(nm->devname != NULL);
+				cnames->min_names =
+					Realloc(cnames->min_names,
+						cnames->min_count *
+						sizeof (char *));
+
+				assert(cnames->min_names != NULL);
+				cnames->min_names[index] =
+					metadiskname(nm->devname);
+				devid_free_nmlist(nm);
+			}
+		}
+	}
+	return (DI_WALK_CONTINUE);
+}
+
+
+
+/*
+ * meta_list_disks()
+ *
+ * Snapshots the device tree and extracts disk devices from the snapshot.
+ */
+int
+meta_list_disks(md_error_t *ep, md_im_names_t *cnames)
+{
+	di_node_t root_node;
+
+	assert(cnames != NULL);
+	cnames->min_count = 0;
+	cnames->min_names = NULL;
+
+	if ((root_node = di_init("/", DINFOCPYALL|DINFOFORCE))
+	    == DI_NODE_NIL) {
+		return (mdsyserror(ep, errno, NULL));
+	}
+
+	(void) di_walk_minor(root_node, DDI_NT_BLOCK, 0, cnames,
+	    add_disk_names);
+
+	di_fini(root_node);
+	return (0);
+}
+
+/*
+ * meta_imp_drvused
+ *
+ * Checks if given drive is mounted, swapped, part of disk configuration
+ * or in use by SVM.  ep also has error code set up if drive is in use.
+ *
+ * Returns 1 if drive is in use.
+ * Returns 0 if drive is not in use.
+ */
+int
+meta_imp_drvused(
+	mdsetname_t		*sp,
+	mddrivename_t		*dnp,
+	md_error_t		*ep
+)
+{
+	md_error_t		status = mdnullerror;
+	md_error_t		*db_ep = &status;
+
+	/*
+	 * We pass in db_ep to meta_setup_db_locations
+	 * and never ever use the error contained therein
+	 * because all we're interested in is a check to
+	 * see whether any local metadbs are present.
+	 */
+	if ((meta_check_drivemounted(sp, dnp, ep) != 0) ||
+	    (meta_check_driveswapped(sp, dnp, ep) != 0) ||
+	    (((meta_setup_db_locations(db_ep) == 0) &&
+	    ((meta_check_drive_inuse(sp, dnp, 1, ep) != 0) ||
+	    (meta_check_driveinset(sp, dnp, ep) != 0))))) {
+		return (1);
+	} else {
+		return (0);
+	}
+}
+
+/*
+ * meta_prune_cnames()
+ *
+ * Removes in-use disks from the list prior to further processing.
+ *
+ * Return value depends on err_on_prune flag: if set, and one or more disks
+ * are pruned, the return list will be the pruned disks.  If not set, or if no
+ * disks are pruned, the return list will be the unpruned disks.
+ */
+mddrivenamelist_t *
+meta_prune_cnames(
+	md_error_t *ep,
+	md_im_names_t *cnames,
+	int err_on_prune
+)
+{
+	int			d;
+	int			fcount = 0;
+	mddrivenamelist_t	*dnlp = NULL;
+	mddrivenamelist_t	**dnlpp = &dnlp;
+	mddrivenamelist_t	*fdnlp = NULL;
+	mddrivenamelist_t	**fdnlpp = &fdnlp;
+	mdsetname_t		*sp = metasetname(MD_LOCAL_NAME, ep);
+
+	for (d = 0; d < cnames->min_count; ++d) {
+		mddrivename_t	*dnp;
+
+		dnp = metadrivename(&sp, cnames->min_names[d], ep);
+		if (dnp == NULL) {
+			/*
+			 * Assuming we're interested in knowing about
+			 * whatever error occurred, but not in stopping.
+			 */
+			mde_perror(ep, cnames->min_names[d]);
+			mdclrerror(ep);
+
+			continue;
+		}
+
+		/*
+		 * Check if the drive is inuse.
+		 */
+		if (meta_imp_drvused(sp, dnp, ep)) {
+			fdnlpp = meta_drivenamelist_append_wrapper(fdnlpp, dnp);
+			fcount++;
+			mdclrerror(ep);
+		} else {
+			dnlpp = meta_drivenamelist_append_wrapper(dnlpp, dnp);
+		}
+	}
+
+	if (fcount) {
+		if (err_on_prune) {
+			(void) mddserror(ep, MDE_DS_DRIVEINUSE, 0,
+			    NULL, fdnlp->drivenamep->cname, NULL);
+			metafreedrivenamelist(dnlp);
+			return (fdnlp);
+		}
+		metafreedrivenamelist(fdnlp);
+	}
+
+	return (dnlp);
+}
+
+/*
+ * read_master_block()
+ *
+ * Returns:
+ *	< 0 for failure
+ *	  0 for no valid master block
+ *	  1 for valid master block
+ *
+ * The supplied buffer will be filled in for EITHER 0 or 1.
+ */
+int
+read_master_block(
+	md_error_t	*ep,
+	int		fd,
+	void		*bp,
+	int		bsize
+)
+{
+	mddb_mb_t	*mbp = bp;
+	int		rval = 1;
+
+	assert(bp != NULL);
+
+	if (lseek(fd, (off_t)dbtob(16), SEEK_SET) < 0)
+		return (mdsyserror(ep, errno, NULL));
+
+	if (read(fd, bp, bsize) != bsize)
+		return (mdsyserror(ep, errno, NULL));
+
+	/*
+	 * The master block magic number can either be MDDB_MAGIC_MB in
+	 * the case of a real master block, or, it can be MDDB_MAGIC_DU
+	 * in the case of a dummy master block
+	 */
+	if ((mbp->mb_magic != MDDB_MAGIC_MB) &&
+	    (mbp->mb_magic != MDDB_MAGIC_DU)) {
+		rval = 0;
+		(void) mdmddberror(ep, MDE_DB_MASTER, 0, 0, 0, NULL);
+	}
+
+	if (mbp->mb_revision != MDDB_REV_MB) {
+		rval = 0;
+	}
+
+	return (rval);
+}
+
+/*
+ * read_locator_block()
+ *
+ * Returns:
+ *	< 0 for failure
+ *	  0 for no valid locator block
+ *	  1 for valid locator block
+ */
+int
+read_locator_block(
+	md_error_t	*ep,
+	int		fd,
+	mddb_mb_t	*mbp,
+	void		*bp,
+	int		bsize
+)
+{
+	mddb_lb_t	*lbp = bp;
+
+	assert(bp != NULL);
+
+	if (lseek(fd, (off_t)dbtob(mbp->mb_blkmap.m_firstblk), SEEK_SET) < 0)
+		return (mdsyserror(ep, errno, NULL));
+
+	if (read(fd, bp, bsize) != bsize)
+		return (mdsyserror(ep, errno, NULL));
+
+	return ((lbp->lb_magic == MDDB_MAGIC_LB) ? 1 : 0);
+}
+
+int
+phys_read(
+	md_error_t	*ep,
+	int		fd,
+	mddb_mb_t	*mbp,
+	daddr_t		blk,
+	void		*bp,
+	int		bcount
+)
+{
+	daddr_t		pblk;
+
+	if ((pblk = getphysblk(blk, mbp)) < 0)
+		return (mdmddberror(ep, MDE_DB_BLKRANGE, NODEV32,
+			MD_LOCAL_SET, blk, NULL));
+
+	if (lseek(fd, (off_t)dbtob(pblk), SEEK_SET) < 0)
+		return (mdsyserror(ep, errno, NULL));
+
+	if (read(fd, bp, bcount) != bcount)
+		return (mdsyserror(ep, errno, NULL));
+
+	return (bcount);
+}
+
+/*
+ * read_locator_block_did()
+ *
+ * Returns:
+ * 	< 0 for failure
+ *	  0 for no valid locator name struct
+ *	  1 for valid locator name struct
+ */
+int
+read_locator_block_did(
+	md_error_t	*ep,
+	int		fd,
+	mddb_mb_t	*mbp,
+	mddb_lb_t	*lbp,
+	void		*bp,
+	int		bsize
+)
+{
+	int		lb_didfirstblk = lbp->lb_didfirstblk;
+	mddb_did_blk_t	*lbdidp = bp;
+	int		rval;
+
+	assert(bp != NULL);
+
+	if ((rval = phys_read(ep, fd, mbp, lb_didfirstblk, bp, bsize)) < 0)
+		return (rval);
+
+	return ((lbdidp->blk_magic == MDDB_MAGIC_DI) ? 1 : 0);
+}
+
+/*
+ * read_locator_names()
+ *
+ * Returns:
+ *	< 0 for failure
+ *	  0 for no valid locator name struct
+ *	  1 for valid locator name struct
+ */
+int
+read_locator_names(
+	md_error_t	*ep,
+	int		fd,
+	mddb_mb_t	*mbp,
+	mddb_lb_t	*lbp,
+	void		*bp,
+	int		bsize
+)
+{
+	int		lnfirstblk = lbp->lb_lnfirstblk;
+	mddb_ln_t	*lnp = bp;
+	int		rval;
+
+	assert(bp != NULL);
+
+	if ((rval = phys_read(ep, fd, mbp, lnfirstblk, bp, bsize)) < 0)
+		return (rval);
+
+	return ((lnp->ln_magic == MDDB_MAGIC_LN) ? 1 : 0);
+}
+
+
+int
+read_database_block(
+	md_error_t	*ep,
+	int		fd,
+	mddb_mb_t	*mbp,
+	int		dbblk,
+	void		*bp,
+	int		bsize
+)
+{
+	mddb_db_t	*dbp = bp;
+	int		rval;
+
+	assert(bp != NULL);
+
+	if ((rval = phys_read(ep, fd, mbp, dbblk, bp, bsize)) < 0)
+		return (rval);
+
+	return ((dbp->db_magic == MDDB_MAGIC_DB) ? 1 : 0);
+}
+
+int
+read_loc_didblks(
+	md_error_t	*ep,
+	int		fd,
+	mddb_mb_t	*mbp,
+	int		didblk,
+	void		*bp,
+	int		bsize
+)
+{
+	mddb_did_blk_t	*didbp = bp;
+	int		rval;
+
+	assert(bp != NULL);
+
+	if ((rval = phys_read(ep, fd, mbp, didblk, bp, bsize)) < 0)
+		return (rval);
+
+	return ((didbp->blk_magic == MDDB_MAGIC_DI) ? 1 : 0);
+}
+
+
+int
+read_loc_didinfo(
+	md_error_t	*ep,
+	int		fd,
+	mddb_mb_t	*mbp,
+	int		infoblk,
+	void		*bp,
+	int		bsize
+)
+{
+	int		rval = 1;
+	mddb_did_info_t	*infop = bp;
+
+	assert(bp != NULL);
+
+	if ((rval = phys_read(ep, fd, mbp, infoblk, bp, bsize)) < 0)
+		return (rval);
+
+	return ((infop->info_flags & MDDB_DID_EXISTS) ? 1 : 0);
+}
+
+/*
+ * meta_nm_rec()
+ *
+ * Return the DE corresponding to the requested namespace record type.
+ * Modifies dbp to have a firstentry if one isn't there.
+ */
+static mddb_de_t *
+meta_nm_rec(mddb_db_t *dbp, mddb_type_t rectype)
+{
+	mddb_de_t *dep;
+	int	desize;
+
+	if (dbp->db_firstentry != NULL) {
+		/* LINTED */
+		dep = (mddb_de_t *)((caddr_t)(&dbp->db_firstentry)
+				    + sizeof (dbp->db_firstentry));
+		dbp->db_firstentry = dep;
+		while (dep && dep->de_next) {
+			desize = sizeof (*dep) - sizeof (dep->de_blks) +
+				sizeof (daddr_t) * dep->de_blkcount;
+			/* LINTED */
+			dep->de_next = (mddb_de_t *)
+				((caddr_t)dep + desize);
+			dep = dep->de_next;
+		}
+	}
+
+	for (dep = dbp->db_firstentry; dep != NULL; dep = dep->de_next) {
+		if (dep->de_type1 == rectype)
+			break;
+	}
+	return (dep);
+}
+
+/*
+ * read_nm_rec()
+ *
+ * Reads the NM, NM_DID or NM_DID_SHR record in the mddb and stores the
+ * configuration data in the buffer 'nm'
+ *
+ * Returns:
+ *	< 0 for failure
+ *	  0 for no valid NM/DID_NM/DID_NM_SHR record
+ *	  1 for valid NM/DID_NM/DID_NM_SHR record
+ *
+ */
+static int
+read_nm_rec(
+	md_error_t 	*ep,
+	int 		fd,
+	mddb_mb_t	*mbp,
+	mddb_lb_t	*lbp,
+	char		**nm,
+	mddb_type_t	rectype,
+	char		*diskname
+)
+{
+	int		cnt, dbblk, rval = 0;
+	char		db[DEV_BSIZE];
+	mddb_de_t	*dep;
+	/*LINTED*/
+	mddb_db_t	*dbp = (mddb_db_t *)&db;
+	char 		*tmpnm = NULL;
+	daddr_t		pblk;
+
+	for (dbblk = lbp->lb_dbfirstblk;
+	    dbblk != 0;
+	    dbblk = dbp->db_nextblk) {
+
+		if ((rval = read_database_block(ep, fd, mbp, dbblk, dbp,
+		    sizeof (db))) <= 0)
+			return (rval);
+
+		/*
+		 * Locate NM/DID_NM/DID_NM_SHR record. Normally there is
+		 * only one record per mddb. There is a rare case when we
+		 * can't expand the record. If this is the case then we
+		 * will have multiple NM/DID_NM/DID_NM_SHR records linked
+		 * with r_next_recid.
+		 *
+		 * For now assume the normal case and handle the extended
+		 * namespace in Phase 2.
+		 */
+		if ((dep = meta_nm_rec(dbp, rectype)) != NULL)
+			break;
+	}
+
+	/* If meta_nm_rec() never succeeded, bail out */
+	if (dep == NULL)
+		return (0);
+
+	/* Read in the appropriate record and return configurations */
+	tmpnm = (char *)Zalloc(dbtob(dep->de_blkcount));
+	*nm = tmpnm;
+
+	for (cnt = 0; cnt < dep->de_blkcount; cnt++) {
+		if ((pblk = getphysblk(dep->de_blks[cnt], mbp)) < 0) {
+			rval = mdmddberror(ep, MDE_DB_BLKRANGE,
+			    NODEV32, MD_LOCAL_SET,
+			    dep->de_blks[cnt], diskname);
+			return (rval);
+		}
+
+		if (lseek(fd, (off_t)dbtob(pblk), SEEK_SET) < 0) {
+			rval = mdsyserror(ep, errno, diskname);
+			return (rval);
+		}
+
+		if (read(fd, tmpnm, DEV_BSIZE) != DEV_BSIZE) {
+			rval = mdsyserror(ep, errno, diskname);
+			return (rval);
+		}
+
+		tmpnm += DEV_BSIZE;
+	}
+	return (1);
+}
+
+/*
+ * is_replicated
+ *
+ * Determines whether a disk has been replicated or not. It checks to see
+ * if the device id stored in the master block is the same as the device id
+ * registered for that disk on the current system. If the two device ids are
+ * different, then we know that the disk has been replicated.
+ *
+ * If need_devid is set and the disk is replicated, fill in the new_devid.
+ * Also, if need_devid is set, this routine allocates memory for the device
+ * ids; the caller of this routine is responsible for free'ing up the memory.
+ *
+ * Returns:
+ * 	1	if it's a replicated disk
+ * 	0 	if it's not a replicated disk
+ */
+static int
+is_replicated(
+	int fd,
+	mddb_mb_t *mbp,
+	int need_devid,
+	void **new_devid
+)
+{
+	ddi_devid_t	current_devid;
+	int		retval = 0;
+	size_t		new_devid_len;
+
+	if (mbp->mb_devid_magic != MDDB_MAGIC_DE)
+		return (retval);
+
+	if (devid_get(fd, &current_devid) != 0)
+		return (retval);
+
+	if (devid_compare((ddi_devid_t)mbp->mb_devid, current_devid) != 0)
+		retval = 1;
+
+	if (retval && need_devid) {
+		new_devid_len = devid_sizeof(current_devid);
+		*new_devid = Zalloc(new_devid_len);
+		(void) memcpy(*new_devid, (void *)current_devid, new_devid_len);
+	}
+
+	devid_free(current_devid);
+	return (retval);
+}
+
+/*
+ * free_replicated_disks_list()
+ *
+ * this frees up all the memory allocated by build_replicated_disks_list
+ */
+static void
+free_replicated_disks_list()
+{
+	replicated_disk_t 	**repl_disk, *temp;
+	int 			index;
+
+	for (index = 0; index <= MAX_DEVID_LEN; index++) {
+		repl_disk = &replicated_disk_list[index];
+
+		while (*repl_disk != NULL) {
+			temp = *repl_disk;
+			*repl_disk = (*repl_disk)->next;
+
+			Free(temp->old_devid);
+			Free(temp->new_devid);
+			Free(temp);
+		}
+	}
+}
+
+/*
+ * build_replicated_disks_list()
+ *
+ * Builds a list of disks that have been replicated using either a
+ * remote replication or a point-in-time replication software. The
+ * list is stored as a two dimensional sparse array.
+ *
+ * Returns
+ * 	1	on success
+ * 	0 	on failure
+ */
+static int
+build_replicated_disks_list(
+	md_error_t *ep,
+	mddrivenamelist_t *dnlp
+)
+{
+	uint_t			sliceno;
+	int			fd = -1;
+	mddrivenamelist_t	*dp;
+	mdname_t		*rsp;
+	mddb_mb_t		*mbp;
+
+	mbp = Malloc(DEV_BSIZE);
+
+	for (dp = dnlp; dp != NULL; dp = dp->next) {
+		mddrivename_t *dnp;
+		void *new_devid;
+
+		dnp = dp->drivenamep;
+		/* determine the replica slice */
+		if (meta_replicaslice(dnp, &sliceno, ep) != 0)
+			continue;
+
+		/*
+		 * if the replica slice size is zero, don't bother opening
+		 */
+		if (dnp->vtoc.parts[sliceno].size == 0)
+			continue;
+
+		if ((rsp = metaslicename(dnp, sliceno, ep)) == NULL)
+			continue;
+
+		if ((fd = open(rsp->rname, O_RDONLY| O_NDELAY)) < 0)
+			return (mdsyserror(ep, errno, rsp->rname));
+
+		/* a drive may not have a master block so we just continue */
+		if (read_master_block(ep, fd, mbp, DEV_BSIZE) <= 0) {
+			(void) close(fd);
+			mdclrerror(ep);
+			continue;
+		}
+
+		if (is_replicated(fd, mbp, 1, &new_devid)) {
+			replicated_list_insert(mbp->mb_devid_len,
+			    mbp->mb_devid, new_devid);
+		}
+		(void) close(fd);
+	}
+	replicated_disk_list_built = 1;
+
+	Free(mbp);
+	return (1);
+}
+
+/*
+ * free_did_list()
+ *
+ * Frees the did_list allocated as part of build_did_list
+ */
+static void
+free_did_list(
+	did_list_t	*did_listp
+)
+{
+	did_list_t	*temp, *head;
+
+	head = did_listp;
+
+	while (head != NULL) {
+		temp = head;
+		head = head->next;
+		if (temp->rdid)
+			Free(temp->rdid);
+		if (temp->did)
+			Free(temp->did);
+		if (temp->devname)
+			Free(temp->devname);
+		if (temp->minor_name)
+			Free(temp->minor_name);
+		Free(temp);
+	}
+}
+
+/*
+ * build_did_list()
+ *
+ * Build a list of device ids corresponding to disks in the locator block.
+ * Memory is allocated here for the nodes in the did_list. The callers of
+ * this routine must also call free_did_list to free up the memory after
+ * they're done.
+ *
+ * Returns:
+ *	< 0 		for failure
+ *	  0 		for no valid locator block device id array
+ *	  1 		for valid locator block device id array
+ *	  ENOTSUP	partial diskset, not all disks in a diskset on the
+ *			system where import is being executed
+ */
+static int
+build_did_list(
+	md_error_t	*ep,
+	int		fd,
+	mddb_mb_t	*mb,
+	mddb_did_blk_t	*lbdidp,
+	did_list_t	**did_listp,
+	int		replicated
+)
+{
+	char 		*search_path = "/dev";
+	char		*minor_name;
+	int		rval, cnt;
+	devid_nmlist_t	*nm;
+	uint_t		did_info_length = 0;
+	uint_t		did_info_firstblk = 0;
+	did_list_t	*new, *head = NULL;
+	char		*bp = NULL, *temp;
+	mddb_did_info_t	*did_info = NULL;
+	void		*did = NULL;
+	size_t		new_devid_len;
+
+	for (cnt = 0; cnt < MDDB_NLB; cnt++) {
+		did_info = &lbdidp->blk_info[cnt];
+
+		if (!(did_info->info_flags & MDDB_DID_EXISTS))
+			continue;
+
+		new = Zalloc(sizeof (did_list_t));
+		new->did = Zalloc(did_info->info_length);
+
+		/*
+		 * If we can re-use the buffer already has been
+		 * read in then just use it.  Otherwise free
+		 * the previous one and alloc a new one
+		 */
+		if (dbtob(did_info->info_blkcnt) != did_info_length &&
+		    did_info->info_firstblk != did_info_firstblk) {
+
+			did_info_length = dbtob(did_info->info_blkcnt);
+			did_info_firstblk = did_info->info_firstblk;
+
+			if (bp)
+				Free(bp);
+			bp = temp = Zalloc(did_info_length);
+
+			if ((rval = phys_read(ep, fd, mb, did_info_firstblk,
+			    (void *)bp, did_info_length)) < 0)
+				return (rval);
+		} else {
+			temp = bp;
+		}
+
+		temp += did_info->info_offset;
+		(void) memcpy(new->did, temp, did_info->info_length);
+		new->did_index = cnt;
+		minor_name = did_info->info_minor_name;
+
+		/*
+		 * If we are not able to find the ctd mapping corresponding
+		 * to a given device id, it probably means the device id in
+		 * question is not registered with the system.
+		 *
+		 * Highly likely that the only time this happens, we've hit
+		 * a case where not all the disks that are a part of the
+		 * diskset were moved before importing the diskset.
+		 *
+		 * If set is a replicated diskset, then the device id we get
+		 * from 'lb' will be the 'other' did and we need to lookup
+		 * the real one before we call this routine.
+		 */
+		if (replicated) {
+		    temp = replicated_list_lookup(did_info->info_length,
+			new->did);
+		    new_devid_len = devid_sizeof((ddi_devid_t)temp);
+		    new->rdid = Zalloc(new_devid_len);
+		    (void) memcpy(new->rdid, temp, new_devid_len);
+		    did = new->rdid;
+		} else {
+		    did = new->did;
+		}
+
+		if (devid_valid((ddi_devid_t)(did)) == 0) {
+			return (-1);
+		}
+
+		if ((rval = meta_deviceid_to_nmlist(search_path,
+		    (ddi_devid_t)did, minor_name, &nm)) != 0) {
+			*did_listp = head;
+			free_did_list(*did_listp);
+			*did_listp = NULL;
+			(void) mddserror(ep, MDE_DS_PARTIALSET, MD_SET_BAD,
+			    mynode(), NULL, NULL);
+			return (ENOTSUP);
+		}
+
+		assert(nm->devname != NULL);
+		new->devname = Strdup(nm->devname);
+		new->dev = nm->dev;
+		new->minor_name = Strdup(minor_name);
+
+		devid_free_nmlist(nm);
+
+		new->next = head;
+		head = new;
+	}
+
+	/* Free the last bp */
+	if (bp)
+		Free(bp);
+	*did_listp = head;
+	return (1);
+}
+
+/*
+ * meta_get_set_info
+ *
+ * Scans a given drive for set specific information. If the given drive
+ * has a shared metadb, scans the shared metadb for information pertaining
+ * to the set.
+ *
+ * Returns:
+ * 	<0 	for failure
+ *	0	success but no replicas were found
+ *	1	success and a replica was found
+ *	ENOTSUP for partial disksets detected
+ */
+int
+meta_get_set_info(
+	mddrivenamelist_t *dp,
+	md_im_set_desc_t **mispp,
+	int local_mb_ok,
+	md_error_t *ep
+)
+{
+	uint_t			s;
+	mdname_t		*rsp;
+	int			fd;
+	char			mb[DEV_BSIZE];
+				/*LINTED*/
+	mddb_mb_t		*mbp = (mddb_mb_t *)mb;
+	char			lb[dbtob(MDDB_LBCNT)];
+				/*LINTED*/
+	mddb_lb_t		*lbp = (mddb_lb_t *)lb;
+	mddb_did_blk_t		*lbdidp = NULL;
+	mddb_ln_t		*lnp = NULL;
+	int			lnsize, lbdid_size;
+	int			rval = 0;
+	char			db[DEV_BSIZE];
+				/*LINTED*/
+	mddb_db_t		*dbp = (mddb_db_t *)db;
+	did_list_t		*did_listp = NULL;
+	mddrivenamelist_t	*dnlp;
+	mddrivename_t 		*dnp;
+	md_im_names_t		cnames = { 0, NULL};
+	char			*nm = NULL;
+	char			*did_nm = NULL, *did_shrnm = NULL;
+	struct nm_rec		*nmp;
+	struct devid_shr_rec	*did_shrnmp;
+	struct devid_min_rec	*did_nmp;
+	int			extended_namespace = 0;
+	int			replicated = 0;
+
+	dnp = dp->drivenamep;
+
+	/*
+	 * Determine and open the replica slice
+	 */
+	if (meta_replicaslice(dnp, &s, ep) != 0) {
+		return (-1);
+	}
+
+	/*
+	 * Test for the size of replica slice in question. If
+	 * the size is zero, we know that this is not a disk that was
+	 * part of a set and it should be silently ignored for import.
+	 */
+	if (dnp->vtoc.parts[s].size == 0)
+		return (0);
+
+	if ((rsp = metaslicename(dnp, s, ep)) == NULL) {
+		return (-1);
+	}
+
+	if ((fd = open(rsp->rname, O_RDONLY|O_NDELAY)) < 0)
+		return (mdsyserror(ep, errno, rsp->cname));
+
+	/*
+	 * After the open() succeeds, we should return via the "out"
+	 * label to clean up after ourselves.  (Up 'til now, we can
+	 * just return directly, because there are no resources to
+	 * give back.)
+	 */
+
+	if ((rval = read_master_block(ep, fd, mbp, sizeof (mb))) <= 0)
+		goto out;
+
+	replicated = is_replicated(fd, mbp, 0, NULL);
+
+	if (!local_mb_ok && mbp->mb_setno == 0) {
+		rval = 0;
+		goto out;
+	}
+
+	if ((rval = read_locator_block(ep, fd, mbp, lbp, sizeof (lb))) <= 0)
+		goto out;
+
+	/*
+	 * Once the locator block has been read, we need to
+	 * check if the locator block commit count is zero.
+	 * If it is zero, we know that the replica we're dealing
+	 * with is on a disk that was deleted from the disk set;
+	 * and, it potentially has stale data. We need to quit
+	 * in that case
+	 */
+	if (lbp->lb_commitcnt == 0) {
+		rval = 0;
+		goto out;
+	}
+
+	/*
+	 * Make sure that the disk being imported has device id
+	 * namespace present for disksets. If a disk doesn't have
+	 * device id namespace, we skip reading the replica on that disk
+	 */
+	if (!(lbp->lb_flags & MDDB_DEVID_STYLE)) {
+		rval = 0;
+		goto out;
+	}
+
+	/*
+	 * Grab the locator block device id array. Allocate memory for the
+	 * array first.
+	 */
+	lbdid_size = dbtob(lbp->lb_didblkcnt);
+	lbdidp = Zalloc(lbdid_size);
+
+	if ((rval = read_locator_block_did(ep, fd, mbp, lbp, lbdidp,
+	    lbdid_size)) <= 0)
+		goto out;
+
+	/*
+	 * For a disk that has not been replicated, extract the device ids
+	 * stored in the locator block device id array and store them in
+	 * a list.
+	 *
+	 * If the disk has been replicated using replication software such
+	 * as HDS Truecopy/ShadowImage or EMC SRDF/BCV, the device ids in
+	 * the locator block are invalid and we need to build a list of
+	 * replicated disks.
+	 */
+	if (replicated && !replicated_disk_list_built) {
+		/*
+		 * if there's a replicated diskset involved, we need to
+		 * scan the system one more time and build a list of all
+		 * candidate disks that might be part of that replicated set
+		 */
+		if (meta_list_disks(ep, &cnames) != 0) {
+			rval = 0;
+			goto out;
+		}
+		dnlp = meta_prune_cnames(ep, &cnames, 0);
+		rval = build_replicated_disks_list(ep, dnlp);
+		if (rval == 0)
+			goto out;
+	}
+
+	rval = build_did_list(ep, fd, mbp, lbdidp, &did_listp, replicated);
+
+	if ((rval <= 0) || (rval == ENOTSUP))
+		goto out;
+
+	/*
+	 * Until here, we've gotten away with fixed sizes for the
+	 * master block and locator block.  The locator names,
+	 * however, are sized (and therefore allocated) dynamically
+	 * according to information in the locator block.
+	 */
+	lnsize = dbtob(lbp->lb_lnblkcnt);
+	lnp = Zalloc(lnsize);
+
+	if ((rval = read_locator_names(ep, fd, mbp, lbp, lnp, lnsize)) <= 0)
+		goto out;
+
+	/*
+	 * Read in the NM record
+	 * If no NM record was found, it still is a valid configuration
+	 * but it also means that we won't find any corresponding DID_NM
+	 * or DID_SHR_NM.
+	 */
+	if ((rval = read_nm_rec(ep, fd, mbp, lbp, &nm, MDDB_NM, rsp->cname))
+	    < 0)
+		goto out;
+	else if (rval == 0)
+		goto append;
+
+	/*
+	 * At this point, we have read in all of the blocks that form
+	 * the nm_rec.  We should at least detect the corner case
+	 * mentioned above, in which r_next_recid links to another
+	 * nm_rec. Extended namespace handling is left for Phase 2.
+	 *
+	 * What this should really be is a loop, each iteration of
+	 * which reads in a nm_rec and calls the set_append_wrapper().
+	 */
+	/*LINTED*/
+	nmp = (struct nm_rec *)(nm + sizeof (mddb_rb_t));
+	if (nmp->r_rec_hdr.r_next_recid != (mddb_recid_t)0) {
+		extended_namespace = 1;
+		rval = 0;
+		goto out;
+	}
+
+	if ((rval = read_nm_rec(ep, fd, mbp, lbp, &did_nm,
+	    MDDB_DID_NM, rsp->cname)) < 0)
+		goto out;
+	else if (rval == 0)
+		goto append;
+
+	/*LINTED*/
+	did_nmp = (struct devid_min_rec *)(did_nm + sizeof (mddb_rb_t));
+	if (did_nmp->min_rec_hdr.r_next_recid != (mddb_recid_t)0) {
+		extended_namespace = 1;
+		rval = 0;
+		goto out;
+	}
+
+	if ((rval = read_nm_rec(ep, fd, mbp, lbp, &did_shrnm,
+	    MDDB_DID_SHR_NM, rsp->cname)) < 0)
+		goto out;
+	else if (rval == 0)
+		goto append;
+
+	/*LINTED*/
+	did_shrnmp = (struct devid_shr_rec *)(did_shrnm + sizeof (mddb_rb_t));
+	if (did_shrnmp->did_rec_hdr.r_next_recid != (mddb_recid_t)0) {
+		extended_namespace = 1;
+		rval = 0;
+		goto out;
+	}
+
+append:
+	/* Finally, we've got what we need to process this replica. */
+	mispp = set_append_wrapper(mispp, did_listp, mbp, lbp,
+	    /*LINTED*/
+	    (mddb_rb_t *)nm, (mddb_rb_t *)did_nm, (mddb_rb_t *)did_shrnm,
+	    ep, replicated);
+
+	/* Return the fact that we found at least one set */
+	rval = 1;
+
+out:
+	if (fd >= 0)
+		(void) close(fd);
+	if (did_listp != NULL)
+		free_did_list(did_listp);
+	if (lnp != NULL)
+		Free(lnp);
+	if (nm != NULL)
+		Free(nm);
+	if (did_nm != NULL)
+		Free(did_nm);
+	if (did_shrnm != NULL)
+		Free(did_shrnm);
+
+	/*
+	 * If we are at the end of the list, we must free up
+	 * the replicated list too
+	 */
+	if (dp->next == NULL)
+		free_replicated_disks_list();
+
+	if (extended_namespace)
+		return (mddserror(ep, MDE_DS_EXTENDEDNM, MD_SET_BAD,
+		    mynode(), NULL, NULL));
+
+	return (rval);
+}
+
+/*
+ * Return the minor name associated with a given disk slice
+ */
+static char *
+meta_getminor_name(
+	char *devname,
+	md_error_t *ep
+)
+{
+	int 	fd = -1;
+	char 	*minor_name = NULL;
+	char	*ret_minor_name = NULL;
+
+	if (devname == NULL)
+		return (NULL);
+
+	if ((fd = open(devname, O_RDONLY|O_NDELAY, 0)) < 0) {
+		(void) mdsyserror(ep, errno, devname);
+		return (NULL);
+	}
+
+	if (devid_get_minor_name(fd, &minor_name) == 0) {
+		ret_minor_name = Strdup(minor_name);
+		devid_str_free(minor_name);
+	}
+
+	(void) close(fd);
+	return (ret_minor_name);
+}
+
+static int
+meta_replica_quorum(
+	md_im_set_desc_t *misp,
+	md_error_t *ep
+)
+{
+	md_im_drive_info_t	*midp;
+	mddrivename_t		*dnp;
+	md_im_replica_info_t    *midr;
+	mdname_t		*np;
+	struct stat		st_buf;
+	uint_t			rep_slice;
+	int			replica_count = 0;
+
+	for (midp = misp->mis_drives; midp != NULL;
+		midp = midp->mid_next) {
+
+		dnp = midp->mid_dnp;
+
+		if ((meta_replicaslice(dnp, &rep_slice, ep) != 0) ||
+			((np = metaslicename(dnp, rep_slice, ep))
+			== NULL)) {
+			mdclrerror(ep);
+			continue;
+		}
+
+		if (stat(np->bname, &st_buf) != 0)
+			continue;
+
+		/*
+		 * The drive is okay now count its replicas
+		 */
+		for (midr = midp->mid_replicas; midr != NULL;
+			midr = midr->mir_next) {
+			replica_count++;
+		}
+	}
+
+	if (replica_count < (misp->mis_active_replicas + 1)/2)
+		return (-1);
+
+	return (0);
+}
+
+static set_t
+meta_imp_setno(
+	md_error_t *ep
+)
+{
+	set_t	max_sets, setno;
+	int	bool;
+
+	if ((max_sets = get_max_sets(ep)) == 0) {
+		return (MD_SET_BAD);
+	}
+
+	/*
+	 * This code needs to be expanded when we run in SunCluster
+	 * environment SunCluster obtains setno internally
+	 */
+	for (setno = 1; setno < max_sets; setno++) {
+		if (clnt_setnumbusy(mynode(), setno,
+			&bool, ep) == -1) {
+			setno = MD_SET_BAD;
+			break;
+		}
+		/*
+		 * found one available
+		 */
+		if (bool == FALSE)
+			break;
+	}
+
+	if (setno == max_sets) {
+		setno = MD_SET_BAD;
+	}
+
+	return (setno);
+}
+
+int
+meta_imp_set(
+	md_im_set_desc_t *misp,
+	char		*setname,
+	int		force,
+	bool_t		dry_run,
+	md_error_t	*ep
+)
+{
+	md_timeval32_t		tp;
+	md_im_drive_info_t	*midp;
+	uint_t			rep_slice;
+	mddrivename_t		*dnp;
+	struct mddb_config	c;
+	mdname_t		*np;
+	md_im_replica_info_t	*mirp;
+	char			setnum_link[MAXPATHLEN];
+	char			setname_link[MAXPATHLEN];
+	char			*minor_name = NULL;
+
+	(void) memset(&c, 0, sizeof (c));
+	(void) strlcpy(c.c_setname, setname, sizeof (c.c_setname));
+	c.c_sideno = 0;
+	c.c_flags = MDDB_C_IMPORT;
+
+	/*
+	 * Check to see if the setname that the set is being imported into,
+	 * already exists.
+	 */
+	if (getsetbyname(c.c_setname, ep) != NULL) {
+		return (mddserror(ep, MDE_DS_SETNAMEBUSY, MD_SET_BAD,
+		    mynode(), NULL, c.c_setname));
+	}
+
+	/*
+	 * Find the next available set number
+	 */
+	if ((c.c_setno = meta_imp_setno(ep)) == MD_SET_BAD) {
+		return (mddserror(ep, MDE_DS_SETNOTIMP, MD_SET_BAD,
+		    mynode(), NULL, c.c_setname));
+	}
+
+	if (meta_gettimeofday(&tp) == -1) {
+		return (mdsyserror(ep, errno, NULL));
+	}
+	c.c_timestamp = tp;
+
+	/* Check to see if replica quorum requirement is fulfilled */
+	if (!force && meta_replica_quorum(misp, ep) == -1)
+		return (mddserror(ep, MDE_DS_INSUFQUORUM, MD_SET_BAD,
+		    mynode(), NULL, c.c_setname));
+
+	for (midp = misp->mis_drives; midp != NULL;
+		midp = midp->mid_next) {
+		mdcinfo_t	*cinfo;
+
+		/*
+		 * We pass down the list of the drives in the
+		 * set down to the kernel irrespective of
+		 * whether the drives have a replica or not.
+		 *
+		 * The kernel detects which of the drives don't
+		 * have a replica and accordingly does the
+		 * right thing.
+		 */
+		dnp = midp->mid_dnp;
+		if ((meta_replicaslice(dnp, &rep_slice, ep) != 0) ||
+		    ((np = metaslicename(dnp, rep_slice, ep))
+		    == NULL)) {
+			mdclrerror(ep);
+			continue;
+		}
+
+		(void) strcpy(c.c_locator.l_devname, np->bname);
+		c.c_locator.l_dev = meta_cmpldev(np->dev);
+		c.c_locator.l_mnum = meta_getminor(np->dev);
+		c.c_locator.l_devid = (uintptr_t)Malloc(midp->mid_devid_sz);
+		(void) memcpy((void *)c.c_locator.l_devid, midp->mid_devid,
+		    midp->mid_devid_sz);
+		c.c_locator.l_devid_sz = midp->mid_devid_sz;
+		c.c_locator.l_devid_flags =
+		    MDDB_DEVID_VALID | MDDB_DEVID_SPACE | MDDB_DEVID_SZ;
+		if (midp->mid_o_devid) {
+			c.c_locator.l_old_devid =
+			    (uint64_t)Malloc(midp->mid_o_devid_sz);
+			(void) memcpy((void *)c.c_locator.l_old_devid,
+			    midp->mid_o_devid, midp->mid_o_devid_sz);
+			c.c_locator.l_old_devid_sz = midp->mid_o_devid_sz;
+		}
+		minor_name = meta_getminor_name(np->bname, ep);
+		(void) strncpy(c.c_locator.l_minor_name, minor_name,
+		    sizeof (c.c_locator.l_minor_name));
+
+		if ((cinfo = metagetcinfo(np, ep)) == NULL) {
+			mdclrerror(ep);
+			continue;
+		}
+		(void) strncpy(c.c_locator.l_driver, cinfo->dname,
+		    sizeof (c.c_locator.l_driver));
+
+		mirp = midp->mid_replicas;
+
+		do {
+			if (mirp) {
+				c.c_locator.l_flags = 0;
+				c.c_locator.l_blkno = mirp->mir_offset;
+				mirp = mirp->mir_next;
+			} else {
+				/*
+				 * Default offset for dummy is 16
+				 */
+				c.c_locator.l_blkno = 16;
+			}
+
+			if (metaioctl(MD_DB_USEDEV, &c, &c.c_mde, NULL) != 0) {
+				Free((void *)c.c_locator.l_devid);
+				if (c.c_locator.l_old_devid)
+					Free((void *)c.c_locator.l_old_devid);
+				return (mdstealerror(ep, &c.c_mde));
+			}
+		} while (mirp != NULL);
+	}
+
+	/*
+	 * If the dry run option was specified, flag success
+	 * and exit out
+	 */
+	if (dry_run == 1) {
+		md_eprintf("%s\n", dgettext(TEXT_DOMAIN,
+		    "import should be successful"));
+		Free((void *)c.c_locator.l_devid);
+		if (c.c_locator.l_old_devid)
+			Free((void *)c.c_locator.l_old_devid);
+		return (0);
+	}
+
+	/*
+	 * Now kernel should have all the information
+	 * regarding the import diskset replica.
+	 * Tell kernel to load them up and import the set
+	 */
+	if (metaioctl(MD_IOCIMP_LOAD, &c.c_setno, &c.c_mde, NULL) != 0) {
+		Free((void *)c.c_locator.l_devid);
+		if (c.c_locator.l_old_devid)
+			Free((void *)c.c_locator.l_old_devid);
+		return (mdstealerror(ep, &c.c_mde));
+	}
+
+	(void) meta_smf_enable(META_SMF_DISKSET, NULL);
+
+	/* The set has now been imported, create the appropriate symlink */
+	(void) snprintf(setname_link, MAXPATHLEN, "/dev/md/%s", setname);
+	(void) snprintf(setnum_link, MAXPATHLEN, "shared/%d", c.c_setno);
+
+	/*
+	 * Since we already verified that the setname was OK, make sure to
+	 * cleanup before proceeding.
+	 */
+	if (unlink(setname_link) == -1) {
+		if (errno != ENOENT)
+			(void) mdsyserror(ep, errno, setname_link);
+	}
+
+	if (symlink(setnum_link, setname_link) == -1)
+		(void) mdsyserror(ep, errno, setname_link);
+
+	/* resnarf the set that has just been imported */
+	if (clnt_resnarf_set(mynode(), c.c_setno, ep) != 0)
+		md_eprintf("%s\n", dgettext(TEXT_DOMAIN, "Please stop and "
+		    "restart rpc.metad"));
+
+	Free((void *)c.c_locator.l_devid);
+	if (c.c_locator.l_old_devid)
+		Free((void *)c.c_locator.l_old_devid);
+	return (0);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_init.c b/usr/src/lib/lvm/libmeta/common/meta_init.c
new file mode 100644
index 0000000000..5775af48bc
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_init.c
@@ -0,0 +1,453 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+/*
+ * initialize metadevices
+ */
+
+#include <meta.h>
+#include <libdevinfo.h>
+
+
+int
+parse_interlace(
+	char		*uname,		/* Meta Device name (eg d0) */
+	char		*str,		/* String to Parse		 */
+	diskaddr_t	*interlacep,
+	md_error_t	*ep
+)
+{
+	diskaddr_t	num;
+	char		c;
+	int		cnt;
+
+	/* parse interlace */
+	if ((cnt = sscanf(str, "%llu%c", &num, &c)) < 1) {
+		return (meta_cook_syntax(ep, MDE_BAD_INTERLACE,
+		    uname, 1, &str));
+	} else if (cnt == 1) {
+		if (num & (DEV_BSIZE - 1)) {
+			return (meta_cook_syntax(ep, MDE_BAD_INTERLACE,
+			    uname, 1, &str));
+		}
+		num = lbtodb(num);
+	} else switch (c) {
+	case 'b':
+	case 'B':
+		num *= DEV_BSIZE / DEV_BSIZE;
+		break;
+	case 'k':
+	case 'K':
+		num *= 1024 / DEV_BSIZE;
+		break;
+	case 'm':
+	case 'M':
+		num *= 1024 * 1024 / DEV_BSIZE;
+		break;
+	default:
+		return (meta_cook_syntax(ep, MDE_BAD_INTERLACE,
+		    NULL, 1, &str));
+	}
+
+	/* return success */
+	*interlacep = num;
+	return (0);
+}
+
+/*
+ * cook up syntax error
+ */
+int
+meta_cook_syntax(
+	md_error_t	*ep,
+	md_void_errno_t	errcode,
+	char		*uname,
+	int		argc,
+	char		*argv[]
+)
+{
+	int		rval;
+
+	/* if we have a token, concat it to uname */
+	if ((argc > 0) && (argv[0] != NULL) && (argv[0][0] != '\0')) {
+		char	*p;
+
+		if ((uname != NULL) && (uname[0] != '\0')) {
+			p = Malloc(strlen(uname) + 2
+			    + 1 + strlen(argv[0]) + 1 + 1);
+			(void) strcpy(p, uname);
+			(void) strcat(p, ": ");
+		} else {
+			p = Malloc(1 + strlen(argv[0]) + 1 + 1);
+			p[0] = '\0';
+		}
+		(void) strcat(p, "\"");
+		(void) strcat(p, argv[0]);
+		(void) strcat(p, "\"");
+		rval = mderror(ep, errcode, p);
+		Free(p);
+	} else {
+		rval = mderror(ep, errcode, uname);
+	}
+
+	return (rval);
+}
+
+int
+meta_check_devicesize(
+	diskaddr_t	total_blocks
+)
+{
+	int	rval = MD_CRO_32BIT;
+
+
+	if (total_blocks > MD_MAX_BLKS_FOR_SMALL_DEVS) {
+		rval = MD_CRO_64BIT;
+	}
+	return (rval);
+}
+
+
+/*
+ * setup metadevice geometry
+ */
+/*ARGSUSED*/
+int
+meta_setup_geom(
+	md_unit_t	*md,
+	mdname_t	*np,
+	mdgeom_t	*geomp,
+	uint_t		write_reinstruct,
+	uint_t		read_reinstruct,
+	uint_t		round_cyl,
+	md_error_t	*ep
+)
+{
+	diskaddr_t	cylsize = geomp->nhead * geomp->nsect;
+	diskaddr_t	total_blocks;
+
+	if (round_cyl) {
+		total_blocks = rounddown(md->c.un_actual_tb, cylsize);
+	} else {
+		total_blocks = md->c.un_actual_tb;
+	}
+
+	md->c.un_total_blocks = total_blocks;
+	md->c.un_nhead = geomp->nhead;
+	md->c.un_nsect = geomp->nsect;
+	md->c.un_rpm = geomp->rpm;
+	md->c.un_wr_reinstruct = write_reinstruct;
+	md->c.un_rd_reinstruct = read_reinstruct;
+	return (0);
+}
+
+/*
+ * adjust metadevice geometry
+ */
+/*ARGSUSED*/
+int
+meta_adjust_geom(
+	md_unit_t	*md,
+	mdname_t	*np,
+	uint_t		write_reinstruct,
+	uint_t		read_reinstruct,
+	uint_t		round_cyl,
+	md_error_t	*ep
+)
+{
+	diskaddr_t	cylsize = md->c.un_nhead * md->c.un_nsect;
+	diskaddr_t	total_blocks;
+
+	if (round_cyl) {
+		total_blocks = rounddown(md->c.un_actual_tb, cylsize);
+	} else {
+		total_blocks = md->c.un_actual_tb;
+	}
+
+	md->c.un_total_blocks = total_blocks;
+	if (write_reinstruct > md->c.un_wr_reinstruct)
+		md->c.un_wr_reinstruct = write_reinstruct;
+	if (read_reinstruct > md->c.un_rd_reinstruct)
+		md->c.un_rd_reinstruct = read_reinstruct;
+	return (0);
+}
+
+/*
+ * Function: meta_init_make_device
+ * Purpose:
+ * 	Create the device node <uname> by constructing the necessary
+ * 	md_mkdev_params_t structure. We have to handle relative names
+ *	(e.g. "d80") and fully-qualified names (e.g. "/dev/md/red/dsk/d80").
+ *	The field that we need is the unit number of the metadevice (80 in
+ *	the above examples).
+ * Input:	spp	set structure
+ *		uname	unit-name (fully qualified or relative)
+ * Output:	ep	error return structure
+ * Returns:	0	success
+ *		-1	Error. <ep> contains error reason
+ */
+int
+meta_init_make_device(
+	mdsetname_t	**spp,
+	char		*uname,
+	md_error_t	*ep
+)
+{
+	di_devlink_handle_t	hdl;
+	md_mkdev_params_t	params;
+	int			rval = 0;
+	char			*p, *e = uname;
+	size_t			len = strlen(uname);
+
+	e += len;
+	(void) memset(&params, 0, sizeof (params));
+	MD_SETDRIVERNAME(&params, "md", (*spp)->setno);
+
+	/*
+	 * Find the start of the unit within <uname>.
+	 */
+	p = strrchr(uname, '/');
+	if (p == NULL) {
+		/* Relative name (e.g. d80) */
+		p = &uname[1];
+	} else {
+		/* qualified name (e.g. /dev/md/dsk/d80) */
+		p += 2;
+		if (p >= e) {
+			/* Invalid drive name */
+			p = Malloc(len + 3);
+			(void) snprintf(p, len + 3, "\"%s\"", uname);
+			rval = mderror(ep, MDE_NOT_DRIVENAME, p);
+			Free(p);
+			return (rval);
+		}
+	}
+	e = NULL;
+	params.mnum = strtoul(p, &e, 10);
+	if (e == p) {
+		/* Invalid drive name */
+		p = Malloc(len + 3);
+		(void) snprintf(p, len + 3, "\"%s\"", uname);
+		rval = mderror(ep, MDE_NOT_DRIVENAME, p);
+		Free(p);
+		return (rval);
+	}
+
+	if (metaioctl(MD_IOCMAKE_DEV, &params, &params.mde, NULL) != 0) {
+		return (mdstealerror(ep, &params.mde));
+	}
+	/*
+	 * Wait until device appears in namespace. di_devlink_init() returns
+	 * once the /dev links have been created. If NULL is returned the
+	 * link operation failed and we haven't got a device to use.
+	 * NOTE: This will take a _long_ time for large numbers of metadevices.
+	 *	 Change to use the enhanced di_devlink_init() interface when
+	 *	 available.
+	 */
+	hdl = di_devlink_init("md", DI_MAKE_LINK);
+	if (hdl != NULL) {
+		(void) di_devlink_fini(&hdl);
+	} else {
+		p = Malloc(len + 3);
+		(void) snprintf(p, len + 3, "\"%s\"", uname);
+		rval = mderror(ep, MDE_UNIT_NOT_FOUND, p);
+		Free(p);
+	}
+	return (rval);
+}
+
+/*
+ * FUNCTION:	is_metadb_cmd()
+ * INPUT:	argc	- number of command line arguments
+ *		argv	- pointer to array of command line arguments
+ * OUTPUT:	none
+ * RETURNS:	TRUE if a metadb is to be created, FALSE otherwise
+ * PURPOSE:	parses enough of the command line to determine if a metadb
+ *		create is being attempted
+ */
+static boolean_t
+is_metadb_cmd(
+	int	argc,
+	char	*argv[]
+)
+{
+	ulong_t	num;
+	int	len;
+
+	/* look for match */
+	if (argc > 0 && (sscanf(argv[0], "mddb%lu%n", &num, &len) == 1) &&
+		    (strlen(argv[0]) == len) && ((long)num >= 0)) {
+		return (B_TRUE);
+	}
+
+	return (B_FALSE);
+}
+
+/*
+ * FUNCTION:	is_stripe_cmd()
+ * INPUT:	argc	- number of command line arguments
+ *		argv	- pointer to array of command line arguments
+ * OUTPUT:	none
+ * RETURNS:	TRUE if a stripe is to be created, FALSE otherwise
+ * PURPOSE:	parses enough of the command line to determine if a stripe
+ *		create is being attempted
+ */
+static boolean_t
+is_stripe_cmd(
+	int	argc,
+	char	*argv[]
+)
+{
+	uint_t	nrow;
+
+	if (argc > 1 && (sscanf(argv[1], "%u", &nrow) != 1) || ((int)nrow < 0))
+		return (B_FALSE);
+
+	return (B_TRUE);
+}
+
+/*
+ * FUNCTION:	meta_get_init_type()
+ * INPUT:	argc	- number of command line arguments
+ *		argv	- pointer to array of command line arguments
+ * OUTPUT:	none
+ * RETURNS:	type of metadevice or hot spare pools being initialized
+ * PURPOSE:	parses enough of the command line to determine what type
+ *		of metainit is being attempted
+ */
+mdinittypes_t
+meta_get_init_type(
+	int 	argc,
+	char	*argv[]
+)
+{
+	char		*arg = argv[1];
+	mdinittypes_t	init_type;
+
+	if (argc == 1) /* must be a hot spare pool w/o devices */
+		return (TAB_HSP);
+
+	init_type = TAB_UNKNOWN;
+	if (arg != NULL) {
+		if (strcmp(arg, "-m") == 0) {
+			init_type = TAB_MIRROR;
+		} else if (strcmp(arg, "-r") == 0) {
+			init_type = TAB_RAID;
+		} else if (strcmp(arg, "-p") == 0) {
+			init_type = TAB_SP;
+		} else if (strcmp(arg, "-t") == 0) {
+			init_type = TAB_TRANS;
+		} else if (is_metadb_cmd(argc, argv)) {
+			init_type = TAB_MDDB;
+		} else if (is_stripe_cmd(argc, argv)) {
+			init_type = TAB_STRIPE;
+		} else { /* assume that it is a hsp */
+			init_type = TAB_HSP;
+		}
+	}
+	return (init_type);
+}
+
+/*
+ * initialize named device or hotspare pool
+ */
+int
+meta_init_name(
+	mdsetname_t	**spp,
+	int		argc,
+	char		*argv[],
+	mdcmdopts_t	options,
+	md_error_t	*ep
+)
+{
+	mdinittypes_t	init_type;
+	char		*p;
+	int		rval;
+	char		*uname = argv[0];
+
+	assert(argc > 0);
+
+	/* determine type of metadevice or hot spare pool being created */
+	init_type = meta_get_init_type(argc, argv);
+
+	/* hotspare pool */
+	if (init_type == TAB_HSP)
+		return (meta_init_hsp(spp, argc, argv, options, ep));
+
+	/* metadevice */
+	if (argc >= 2 && init_type != TAB_UNKNOWN) {
+		md_error_t	t_e = mdnullerror;
+		char	*cname;
+
+		/*
+		 * We need to create the device node if the specified metadevice
+		 * does not already exist in the database. The actual creation
+		 * is undertaken by the md driver and the links propagated by
+		 * devfsadm.
+		 */
+
+		/* initialize the spp properly */
+		if ((cname = meta_name_getname(spp, uname, &t_e)) != NULL)
+			Free(cname);
+		if (! mdisok(&t_e))
+			return (mdstealerror(ep, &t_e));
+
+		/* Create device node */
+		if (meta_init_make_device(spp, uname, &t_e) != 0) {
+			return (mdstealerror(ep, &t_e));
+		}
+
+		switch (init_type) {
+		case TAB_MIRROR:
+			return (meta_init_mirror(spp, argc, argv, options, ep));
+			break;
+		case TAB_RAID:
+			return (meta_init_raid(spp, argc, argv, options, ep));
+			break;
+		case TAB_SP:
+			return (meta_init_sp(spp, argc, argv, options, ep));
+			break;
+		case TAB_TRANS:
+			return (mderror(ep, MDE_EOF_TRANS, NULL));
+			break;
+		case TAB_STRIPE:
+			return (meta_init_stripe(spp, argc, argv, options, ep));
+			break;
+		}
+	}
+
+	/* unknown type */
+	p = Malloc(1 + strlen(uname) + 1 + 1);
+	(void) strcpy(p, "\"");
+	(void) strcat(p, uname);
+	(void) strcat(p, "\"");
+	rval = mderror(ep, MDE_SYNTAX, p);
+	Free(p);
+	return (rval);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_lib_prv.c b/usr/src/lib/lvm/libmeta/common/meta_lib_prv.c
new file mode 100644
index 0000000000..1b63a2a03e
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_lib_prv.c
@@ -0,0 +1,69 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 1992-2002 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+/*
+ * Metadevice diskset interfaces
+ */
+
+#include "meta_lib_prv.h"
+#include <sys/vfstab.h>
+
+static 	FILE	*mfp = NULL;
+
+FILE *
+open_mnttab(void)
+{
+	if (mfp != NULL) {
+		if (fseeko(mfp, (off_t)0L, SEEK_SET) == -1) {
+			(void) fclose(mfp);
+			mfp = NULL;
+			return (NULL);
+		}
+		return (mfp);
+	}
+
+	if ((mfp = fopen(MNTTAB, "r")) == NULL)
+		return (NULL);
+
+	return (mfp);
+}
+
+int
+close_mnttab(void)
+{
+	int	ret = -1;
+
+	if (mfp == NULL)
+		return (0);
+
+	ret = fclose(mfp);
+
+	mfp = NULL;
+
+	return (ret);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_mdcf.c b/usr/src/lib/lvm/libmeta/common/meta_mdcf.c
new file mode 100644
index 0000000000..3af1c3be19
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_mdcf.c
@@ -0,0 +1,148 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+
+/*
+ * Just in case we're not in a build environment, make sure that
+ * TEXT_DOMAIN gets set to something.
+ */
+#if !defined(TEXT_DOMAIN)
+#define	TEXT_DOMAIN "SYS_TEST"
+#endif
+
+/*
+ * patch md.cf file
+ */
+
+#include <meta.h>
+
+/*
+ * save metadevice configuration in md.cf
+ */
+int
+meta_update_md_cf(
+	mdsetname_t	*sp,
+	md_error_t	*ep
+)
+{
+	char		*name = METACONF;
+	char		*tname = METACONFTMP;
+	FILE		*tfp = NULL;
+	FILE		*mfp = NULL;
+	mdprtopts_t	options = PRINT_SHORT | PRINT_FAST;
+	struct stat	sbuf;
+	char		line[1000];
+
+	/* If this is not the local set, no need to do anything */
+	if (!metaislocalset(sp))
+		return (0);
+
+	/* open temp file */
+	if ((tfp = fopen(tname, "w")) == NULL)
+		return (mdsyserror(ep, errno, tname));
+	if (stat(name, &sbuf) == 0) {
+		(void) fchmod(fileno(tfp), (sbuf.st_mode & 0777));
+		(void) fchown(fileno(tfp), sbuf.st_uid, sbuf.st_gid);
+	}
+
+	/* dump header */
+	if (fputs(dgettext(TEXT_DOMAIN,
+	    "# metadevice configuration file\n"
+	    "# do not hand edit\n"), tfp) == EOF) {
+		(void) mdsyserror(ep, errno, tname);
+		goto errout;
+	}
+
+	/* dump device configuration */
+	if (meta_print_all(sp, tname, NULL, tfp, options, NULL, ep) != 0)
+		goto errout;
+
+	/* close and rename file */
+	if (fclose(tfp) != 0) {
+		(void) mdsyserror(ep, errno, tname);
+		goto errout;
+	}
+	tfp = NULL;
+
+	/*
+	 * Renames don't work in the miniroot since tmpfiles are
+	 * created in /var/tmp. Hence we copy the data out.
+	 */
+
+	if (rename(tname, name) != 0) {
+		if (errno == EROFS) {
+			if ((tfp = fopen(tname, "r")) == NULL) {
+				goto errout;
+			}
+			if ((mfp = fopen(METACONF, "w+")) == NULL) {
+				goto errout;
+			}
+			while (fgets(line, 1000, tfp) != NULL) {
+				if (fputs(line, mfp) == NULL) {
+					(void) mdsyserror(ep, errno, METACONF);
+					goto errout;
+				}
+			}
+			if (fclose(tfp) != 0) {
+				tfp = NULL;
+				goto errout;
+			}
+			tfp = NULL;
+			/* delete the tempfile */
+			(void) unlink(tname);
+			if (fflush(mfp) != 0) {
+				goto errout;
+			}
+			if (fsync(fileno(mfp)) != 0) {
+				goto errout;
+			}
+			if (fclose(mfp) != 0) {
+				mfp = NULL;
+				goto errout;
+			}
+			mfp = NULL;
+		} else {
+			(void) mdsyserror(ep, errno, name);
+			goto errout;
+		}
+	}
+
+	/* success */
+	return (0);
+
+	/* cleanup, return error */
+errout:
+	if (tfp != NULL) {
+		(void) fclose(tfp);
+		(void) unlink(tname);
+	}
+	if (mfp != NULL) {
+		(void) fclose(mfp);
+	}
+	return (-1);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_med.c b/usr/src/lib/lvm/libmeta/common/meta_med.c
new file mode 100644
index 0000000000..b11f86a0c1
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_med.c
@@ -0,0 +1,851 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+/*
+ * Just in case we're not in a build environment, make sure that
+ * TEXT_DOMAIN gets set to something.
+ */
+#if !defined(TEXT_DOMAIN)
+#define	TEXT_DOMAIN "SYS_TEST"
+#endif
+
+/*
+ * Mediator functions
+ */
+
+#include <meta.h>
+#include <metamed.h>
+#include <dlfcn.h>
+#include <sdssc.h>
+
+/*
+ * There are too many external factors that affect the timing of the
+ * operations, so we set the timeout to a very large value, in this
+ * case 1 day, which should handle HW timeouts, large configurations,
+ * and other potential delays.
+ */
+#define	CL_LONG_TMO	86400L			/* 1 day */
+#define	CL_MEDIUM_TMO	3600L			/* 1 hour */
+#define	CL_SHORT_TMO	600L			/* 10 minutes */
+#define	CL_DEF_TMO	10L			/* 10 seconds */
+
+static	md_timeval32_t def_rpcb_timeout =  { MD_CLNT_CREATE_TOUT, 0 };
+
+/*
+ * RPC handle
+ */
+typedef struct {
+	char	*hostname;
+	CLIENT	*clntp;
+} med_handle_t;
+
+/*
+ * Data to be sent from med_clnt_create_timed to med_create_helper via
+ * meta_client_create_retry.
+ */
+typedef struct {
+	rpcprog_t	mcd_program;	/* RPC program designation */
+	rpcvers_t	mcd_version;	/* RPC version */
+	char		*mcd_nettype;	/* Type of network to use for RPC */
+} med_create_data_t;
+
+/*
+ * Perform the work of actually doing the clnt_create for
+ * meta_client_create_retry.
+ */
+static CLIENT *
+med_create_helper(char *hostname, void *private, struct timeval *time_out)
+{
+	med_create_data_t	*cd = (med_create_data_t *)private;
+
+	return (clnt_create_timed(hostname, cd->mcd_program, cd->mcd_version,
+		cd->mcd_nettype, time_out));
+}
+
+static
+CLIENT *med_clnt_create_timed(
+	char *hostname,
+	const ulong_t prog,
+	const ulong_t vers,
+	char *nettype,
+	const md_timeval32_t *tp
+)
+{
+	med_create_data_t	cd;	/* Create data. */
+
+	cd.mcd_program = prog;
+	cd.mcd_version = vers;
+	cd.mcd_nettype = nettype;
+	return (meta_client_create_retry(hostname, med_create_helper,
+		(void *)&cd, (time_t)tp->tv_sec, NULL));
+}
+
+/*
+ * Set the timeout value for this client handle.
+ */
+static int
+cl_sto_medd(
+	CLIENT		*clntp,
+	char		*hostname,
+	long		time_out,
+	md_error_t	*ep
+)
+{
+	md_timeval32_t	nto;
+
+	(void) memset(&nto, '\0', sizeof (nto));
+
+	nto.tv_sec = time_out;
+
+	if (clnt_control(clntp, CLSET_TIMEOUT, (char *)&nto) != TRUE)
+		return (mdrpcerror(ep, clntp, hostname,
+		    dgettext(TEXT_DOMAIN, "metad client set timeout")));
+
+	return (0);
+}
+
+/*
+ * close RPC connection
+ */
+static void
+close_medd(
+	med_handle_t	*hp
+)
+{
+	assert(hp != NULL);
+	if (hp->hostname != NULL) {
+		Free(hp->hostname);
+	}
+	if (hp->clntp != NULL) {
+		auth_destroy(hp->clntp->cl_auth);
+		clnt_destroy(hp->clntp);
+	}
+	Free(hp);
+}
+
+/*
+ * open RPC connection to rpc.medd
+ */
+static med_handle_t *
+open_medd(
+	char		*hostname,
+	long		time_out,
+	md_error_t	*ep
+)
+{
+	CLIENT		*clntp;
+	med_handle_t	*hp;
+
+	/* default to local host */
+	if ((hostname == NULL) || (*hostname == '\0'))
+		hostname = mynode();
+
+	/* open RPC connection */
+	assert(hostname != NULL);
+	if ((clntp = med_clnt_create_timed(hostname, MED_PROG, MED_VERS,
+	    "tcp", &def_rpcb_timeout)) == NULL) {
+		if (rpc_createerr.cf_stat != RPC_PROGNOTREGISTERED)
+			clnt_pcreateerror(hostname);
+		(void) mdrpccreateerror(ep, hostname,
+		    "medd med_clnt_create_timed");
+		return (NULL);
+	} else {
+		auth_destroy(clntp->cl_auth);
+		clntp->cl_auth = authsys_create_default();
+		assert(clntp->cl_auth != NULL);
+	}
+
+	if (cl_sto_medd(clntp, hostname, time_out, ep) != 0)
+		return (NULL);
+
+	/* return connection */
+	hp = Zalloc(sizeof (*hp));
+	hp->hostname = Strdup(hostname);
+	hp->clntp = clntp;
+
+	return (hp);
+}
+
+/*
+ * steal and convert med_err_t
+ */
+int
+meddstealerror(
+	md_error_t	*ep,
+	med_err_t	*medep
+)
+{
+	char		buf[BUFSIZ];
+	char		*p = buf;
+	size_t		psize = BUFSIZ;
+	char		*emsg;
+	int		rval = -1;
+
+	/* no error */
+	if (medep->med_errno == 0) {
+		/* assert(medep->name == NULL); */
+		rval = 0;
+		goto out;
+	}
+
+	/* steal error */
+	if ((medep->med_node != NULL) && (medep->med_node[0] != '\0')) {
+		(void) snprintf(p, psize, "%s: ", medep->med_node);
+		p = &buf[strlen(buf)];
+		psize = buf + BUFSIZ - p;
+	}
+
+	if ((medep->med_misc != NULL) && (medep->med_misc[0] != '\0')) {
+		(void) snprintf(p, psize, "%s: ", medep->med_misc);
+		p = &buf[strlen(buf)];
+		psize = buf + BUFSIZ - p;
+	}
+
+	if (medep->med_errno < 0) {
+		if ((emsg = med_errnum_to_str(medep->med_errno)) != NULL)
+			(void) snprintf(p, psize, "%s", emsg);
+		else
+			(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+			    "unknown mediator errno %d\n"), medep->med_errno);
+	} else {
+		if ((emsg = strerror(medep->med_errno)) != NULL)
+			(void) snprintf(p, psize, "%s", emsg);
+		else
+			(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
+			    "errno %d out of range"), medep->med_errno);
+	}
+	(void) mderror(ep, MDE_MED_ERROR, buf);
+
+	/* cleanup, return success */
+out:
+	if (medep->med_node != NULL)
+		Free(medep->med_node);
+	if (medep->med_misc != NULL)
+		Free(medep->med_misc);
+	(void) memset(medep, 0, sizeof (*medep));
+	return (rval);
+}
+
+static med_handle_t *
+open_medd_wrap(
+	md_h_t		*mdhp,
+	long		time_out,
+	md_error_t	*ep
+)
+{
+	med_handle_t		*hp = NULL;
+	int			i;
+	char    		*hnm;
+
+	assert(mdhp && mdhp->a_cnt > 0);
+
+	/* Loop through the hosts listed */
+	i = min(mdhp->a_cnt, MAX_HOST_ADDRS) - 1;
+	for (; i >= 0; i--) {
+		hnm = mdhp->a_nm[i];
+
+		if ((hp = open_medd(hnm, time_out, ep)) == NULL) {
+			if (mdanyrpcerror(ep) && i != 0) {
+				mdclrerror(ep);
+				continue;
+			}
+		}
+		return (hp);
+	}
+
+	rpc_createerr.cf_stat = RPC_CANTSEND;
+	rpc_createerr.cf_error.re_status = 0;
+	(void) mdrpccreateerror(ep, mdhp->a_nm[0],
+	    dgettext(TEXT_DOMAIN, "medd open wrap"));
+
+	return (NULL);
+}
+
+static int
+setup_med_transtab(md_error_t *ep)
+{
+	mddb_med_t_parm_t	*tp = NULL;
+	struct	stat		statb;
+	int			i;
+	size_t			alloc_size = 0;
+	int			err = 0;
+
+
+	if ((tp = Zalloc(sizeof (mddb_med_t_parm_t))) == NULL)
+		return (mdsyserror(ep, ENOMEM, "setup_med_transtab"));
+
+	if (metaioctl(MD_MED_GET_TLEN, tp, &tp->med_tp_mde, NULL) != 0) {
+		err = mdstealerror(ep, &tp->med_tp_mde);
+		goto out;
+	}
+
+	if (tp->med_tp_setup == 1)
+		goto out;
+
+	alloc_size = (sizeof (mddb_med_t_parm_t) - sizeof (mddb_med_t_ent_t)) +
+	    (sizeof (mddb_med_t_ent_t) * tp->med_tp_nents);
+
+	if ((tp = Realloc(tp, alloc_size)) == NULL) {
+		err = mdsyserror(ep, ENOMEM, "setup_med_transtab");
+		goto out;
+	}
+
+	if (metaioctl(MD_MED_GET_T, tp, &tp->med_tp_mde, NULL) != 0) {
+		err = mdstealerror(ep, &tp->med_tp_mde);
+		goto out;
+	}
+
+	for (i = 0; i < tp->med_tp_nents; i++) {
+		if (meta_stat(tp->med_tp_ents[i].med_te_nm, &statb) == -1) {
+			md_perror("setup_med_transtab(): stat():");
+			tp->med_tp_ents[i].med_te_dev = NODEV64;
+		} else {
+			tp->med_tp_ents[i].med_te_dev =
+				meta_expldev(statb.st_rdev);
+		}
+	}
+
+	if (metaioctl(MD_MED_SET_T, tp, &tp->med_tp_mde, NULL) != 0)
+		err = mdstealerror(ep, &tp->med_tp_mde);
+
+out:
+	Free(tp);
+	return (err);
+}
+
+/*
+ * Externals
+ */
+
+/*
+ * NULLPROC - just returns a response
+ */
+int
+clnt_med_null(
+	char			*hostname,
+	md_error_t		*ep
+)
+{
+	med_handle_t		*hp;
+	med_err_t		res;
+
+	/* initialize */
+	mdclrerror(ep);
+
+	/* do it */
+	if ((hp = open_medd(hostname, CL_DEF_TMO, ep)) == NULL)
+		return (-1);
+
+	if (med_null_1(NULL, &res, hp->clntp) != RPC_SUCCESS)
+		(void) mdrpcerror(ep, hp->clntp, hostname,
+		    dgettext(TEXT_DOMAIN, "medd nullproc"));
+
+	close_medd(hp);
+
+	xdr_free(xdr_med_err_t, (char *)&res);
+
+	if (! mdisok(ep))
+		return (-1);
+
+	return (0);
+}
+
+/*
+ * Update the mediator information on the mediator.
+ * *** This is not normally called from user code, the kernel does this! ***
+ */
+int
+clnt_med_upd_data(
+	md_h_t			*mdhp,
+	mdsetname_t		*sp,
+	med_data_t		*meddp,
+	md_error_t		*ep
+)
+{
+	med_handle_t		*hp;
+	med_upd_data_args_t	args;
+	med_err_t		res;
+	md_set_desc		*sd;
+
+	/* initialize */
+	mdclrerror(ep);
+	(void) memset(&args, 0, sizeof (args));
+	(void) memset(&res, 0, sizeof (res));
+
+	/* build args */
+	if ((sd = metaget_setdesc(sp, ep)) == NULL)
+		return (-1);
+
+	if (MD_MNSET_DESC(sd))
+		/*
+		 * In the MN diskset, use a generic nodename, multiowner, as
+		 * the node initiating the RPC request.  This allows
+		 * any node to access mediator information.
+		 *
+		 * MN diskset reconfig cycle forces consistent
+		 * view of set/node/drive/mediator information across all nodes
+		 * in the MN diskset.  This allows the relaxation of
+		 * node name checking in rpc.metamedd for MN disksets.
+		 *
+		 * In the traditional diskset, only a calling node that is
+		 * in the mediator record's diskset nodelist can access
+		 * mediator data.
+		 */
+		args.med.med_caller = Strdup(MED_MN_CALLER);
+	else
+		args.med.med_caller = Strdup(mynode());
+	args.med.med_setname = Strdup(sp->setname);
+	args.med.med_setno = sp->setno;
+	args.med_data = *meddp;
+
+	/* do it */
+	if ((hp = open_medd_wrap(mdhp, CL_DEF_TMO, ep)) == NULL)
+		return (-1);
+
+	if (med_upd_data_1(&args, &res, hp->clntp) != RPC_SUCCESS)
+		(void) mdrpcerror(ep, hp->clntp, hp->hostname,
+		    dgettext(TEXT_DOMAIN, "medd update data"));
+	else
+		(void) meddstealerror(ep, &res);
+
+	close_medd(hp);
+
+	xdr_free(xdr_med_upd_data_args_t, (char *)&args);
+	xdr_free(xdr_med_err_t, (char *)&res);
+
+	if (! mdisok(ep))
+		return (-1);
+
+	return (0);
+}
+
+/*
+ * Get the mediator data for this client from the mediator
+ */
+int
+clnt_med_get_data(
+	md_h_t			*mdhp,
+	mdsetname_t		*sp,
+	med_data_t		*meddp,
+	md_error_t		*ep
+)
+{
+	med_handle_t		*hp;
+	med_args_t		args;
+	med_get_data_res_t	res;
+	int			rval = -1;
+	md_set_desc		*sd;
+
+	/* initialize */
+	mdclrerror(ep);
+	(void) memset(&args, 0, sizeof (args));
+	(void) memset(&res, 0, sizeof (res));
+
+	/* build args */
+	if ((sd = metaget_setdesc(sp, ep)) == NULL)
+		return (-1);
+
+	if (MD_MNSET_DESC(sd))
+		/*
+		 * In the MN diskset, use a generic nodename, multiowner, as
+		 * the node initiating the RPC request.  This allows
+		 * any node to access mediator information.
+		 *
+		 * MN diskset reconfig cycle forces consistent
+		 * view of set/node/drive/mediator information across all nodes
+		 * in the MN diskset.  This allows the relaxation of
+		 * node name checking in rpc.metamedd for MN disksets.
+		 *
+		 * In the traditional diskset, only a calling node that is
+		 * in the mediator record's diskset nodelist can access
+		 * mediator data.
+		 */
+		args.med.med_caller = Strdup(MED_MN_CALLER);
+	else
+		args.med.med_caller = Strdup(mynode());
+	args.med.med_setname = Strdup(sp->setname);
+	args.med.med_setno = sp->setno;
+
+	/* do it */
+	if ((hp = open_medd_wrap(mdhp, CL_DEF_TMO, ep)) == NULL)
+		return (-1);
+
+	if (med_get_data_1(&args, &res, hp->clntp) != RPC_SUCCESS)
+		(void) mdrpcerror(ep, hp->clntp, hp->hostname,
+		    dgettext(TEXT_DOMAIN, "medd get data"));
+	else
+		(void) meddstealerror(ep, &res.med_status);
+
+	close_medd(hp);
+
+	if (mdisok(ep)) {
+		/* do something with the results */
+		(void) memmove(meddp, &res.med_data, sizeof (med_data_t));
+		rval = 0;
+	}
+
+	xdr_free(xdr_med_args_t, (char *)&args);
+	xdr_free(xdr_med_get_data_res_t, (char *)&res);
+
+	return (rval);
+}
+
+/*
+ * Update the mediator record on the mediator.
+ */
+int
+clnt_med_upd_rec(
+	md_h_t			*mdhp,
+	mdsetname_t		*sp,
+	med_rec_t		*medrp,
+	md_error_t		*ep
+)
+{
+	med_handle_t		*hp;
+	med_upd_rec_args_t	args;
+	med_err_t		res;
+	md_set_desc		*sd;
+
+	/* initialize */
+	mdclrerror(ep);
+	(void) memset(&args, 0, sizeof (args));
+	(void) memset(&res, 0, sizeof (res));
+
+	/* build args */
+	if ((sd = metaget_setdesc(sp, ep)) == NULL)
+		return (-1);
+
+	if (MD_MNSET_DESC(sd))
+		/*
+		 * In the MN diskset, use a generic nodename, multiowner, as
+		 * the node initiating the RPC request.  This allows
+		 * any node to access mediator information.
+		 *
+		 * MN diskset reconfig cycle forces consistent
+		 * view of set/node/drive/mediator information across all nodes
+		 * in the MN diskset.  This allows the relaxation of
+		 * node name checking in rpc.metamedd for MN disksets.
+		 *
+		 * In the traditional diskset, only a calling node that is
+		 * in the mediator record's diskset nodelist can access
+		 * mediator data.
+		 */
+		args.med.med_caller = Strdup(MED_MN_CALLER);
+	else
+		args.med.med_caller = Strdup(mynode());
+	args.med.med_setname = Strdup(sp->setname);
+	args.med.med_setno = sp->setno;
+	args.med_flags = 0;
+	args.med_rec = *medrp;			/* structure assignment */
+
+	/* do it */
+	if ((hp = open_medd_wrap(mdhp, CL_DEF_TMO, ep)) == NULL)
+		return (-1);
+
+	if (med_upd_rec_1(&args, &res, hp->clntp) != RPC_SUCCESS)
+		(void) mdrpcerror(ep, hp->clntp, hp->hostname,
+		    dgettext(TEXT_DOMAIN, "medd update record"));
+	else
+		(void) meddstealerror(ep, &res);
+
+	close_medd(hp);
+
+	xdr_free(xdr_med_upd_rec_args_t, (char *)&args);
+	xdr_free(xdr_med_err_t, (char *)&res);
+
+	if (! mdisok(ep))
+		return (-1);
+
+	return (0);
+}
+
+/*
+ * Get the mediator record for this client from the mediator
+ */
+int
+clnt_med_get_rec(
+	md_h_t			*mdhp,
+	mdsetname_t		*sp,
+	med_rec_t		*medrp,
+	md_error_t		*ep
+)
+{
+	med_handle_t		*hp;
+	med_args_t		args;
+	med_get_rec_res_t	res;
+	int			rval = -1;
+	md_set_desc		*sd;
+
+	/* initialize */
+	mdclrerror(ep);
+	(void) memset(&args, 0, sizeof (args));
+	(void) memset(&res, 0, sizeof (res));
+
+	/* build args */
+	if ((sd = metaget_setdesc(sp, ep)) == NULL)
+		return (-1);
+
+	if (MD_MNSET_DESC(sd))
+		/*
+		 * In the MN diskset, use a generic nodename, multiowner, as
+		 * the node initiating the RPC request.  This allows
+		 * any node to access mediator information.
+		 *
+		 * MN diskset reconfig cycle forces consistent
+		 * view of set/node/drive/mediator information across all nodes
+		 * in the MN diskset.  This allows the relaxation of
+		 * node name checking in rpc.metamedd for MN disksets.
+		 *
+		 * In the traditional diskset, only a calling node that is
+		 * in the mediator record's diskset nodelist can access
+		 * mediator data.
+		 */
+		args.med.med_caller = Strdup(MED_MN_CALLER);
+	else
+		args.med.med_caller = Strdup(mynode());
+	args.med.med_setname = Strdup(sp->setname);
+	args.med.med_setno = sp->setno;
+
+	/* do it */
+	if ((hp = open_medd_wrap(mdhp, CL_DEF_TMO, ep)) == NULL)
+		return (-1);
+
+	if (med_get_rec_1(&args, &res, hp->clntp) != RPC_SUCCESS)
+		(void) mdrpcerror(ep, hp->clntp, hp->hostname,
+		    dgettext(TEXT_DOMAIN, "medd get record"));
+	else
+		(void) meddstealerror(ep, &res.med_status);
+
+	close_medd(hp);
+
+	if (mdisok(ep)) {
+		/* do something with the results */
+		(void) memmove(medrp, &res.med_rec, sizeof (med_rec_t));
+		rval = 0;
+	}
+
+	xdr_free(xdr_med_args_t, (char *)&args);
+	xdr_free(xdr_med_get_rec_res_t, (char *)&res);
+
+	return (rval);
+}
+
+/*
+ * Get the name of the host from the mediator daemon.
+ */
+int
+clnt_med_hostname(
+	char			*hostname,
+	char			**ret_hostname,
+	md_error_t		*ep
+)
+{
+	med_handle_t		*hp;
+	med_hnm_res_t		res;
+	int			rval = -1;
+
+	/* initialize */
+	mdclrerror(ep);
+	(void) memset(&res, 0, sizeof (res));
+
+	/* No args */
+
+	/* do it */
+	if ((hp = open_medd(hostname, CL_DEF_TMO, ep)) == NULL)
+		return (-1);
+
+	if (med_hostname_1(NULL, &res, hp->clntp) != RPC_SUCCESS)
+		(void) mdrpcerror(ep, hp->clntp, hostname,
+		    dgettext(TEXT_DOMAIN, "medd hostname"));
+	else
+		(void) meddstealerror(ep, &res.med_status);
+
+	close_medd(hp);
+
+	if (mdisok(ep)) {
+		/* do something with the results */
+		rval = 0;
+
+		if (ret_hostname != NULL)
+			*ret_hostname = Strdup(res.med_hnm);
+	}
+
+	xdr_free(xdr_med_hnm_res_t, (char *)&res);
+
+	return (rval);
+}
+
+int
+meta_med_hnm2ip(md_hi_arr_t *mp, md_error_t *ep)
+{
+	int		i, j;
+	int		max_meds;
+
+	if ((max_meds = get_max_meds(ep)) == 0)
+		return (-1);
+
+	for (i = 0; i < max_meds; i++) {
+		mp->n_lst[i].a_flg = 0;
+		/* See if this is the local host */
+		if (mp->n_lst[i].a_cnt > 0 &&
+		    strcmp(mp->n_lst[i].a_nm[0], mynode()) == NULL)
+			mp->n_lst[i].a_flg |= NMIP_F_LOCAL;
+
+		for (j = 0; j < mp->n_lst[i].a_cnt; j++) {
+			struct hostent	*hp;
+			char		*hnm = mp->n_lst[i].a_nm[j];
+
+			/*
+			 * Cluster nodename support
+			 *
+			 * See if the clustering code can give us an IP addr
+			 * for the stored name. If not, find it the old way
+			 * which will use the public interface.
+			 */
+			if (sdssc_get_priv_ipaddr(mp->n_lst[i].a_nm[j],
+			    (struct in_addr *)&mp->n_lst[i].a_ip[j]) !=
+					SDSSC_OKAY) {
+				if ((hp = gethostbyname(hnm)) == NULL)
+					return (mdsyserror(ep, EADDRNOTAVAIL,
+					    hnm));
+
+				/* We only do INET addresses */
+				if (hp->h_addrtype != AF_INET)
+					return (mdsyserror(ep, EPFNOSUPPORT,
+					    hnm));
+
+				/* We take the first address only */
+				if (*hp->h_addr_list) {
+					(void) memmove(&mp->n_lst[i].a_ip[j],
+					    *hp->h_addr_list,
+					    sizeof (struct in_addr));
+				} else
+					return (mdsyserror(ep, EADDRNOTAVAIL,
+					    hnm));
+			}
+
+		}
+	}
+	return (0);
+}
+
+int
+meta_h2hi(md_h_arr_t *mdhp, md_hi_arr_t *mdhip, md_error_t *ep)
+{
+	int			i, j;
+	int			max_meds;
+
+	if ((max_meds = get_max_meds(ep)) == 0)
+		return (-1);
+
+	mdhip->n_cnt = mdhp->n_cnt;
+
+	for (i = 0; i < max_meds; i++) {
+		mdhip->n_lst[i].a_flg = 0;
+		mdhip->n_lst[i].a_cnt = mdhp->n_lst[i].a_cnt;
+		if (mdhp->n_lst[i].a_cnt == 0)
+			continue;
+		for (j = 0; j < mdhp->n_lst[i].a_cnt; j++)
+			(void) strcpy(mdhip->n_lst[i].a_nm[j],
+			    mdhp->n_lst[i].a_nm[j]);
+	}
+	return (0);
+}
+
+int
+meta_hi2h(md_hi_arr_t *mdhip, md_h_arr_t *mdhp, md_error_t *ep)
+{
+	int			i, j;
+	int			max_meds;
+
+	if ((max_meds = get_max_meds(ep)) == 0)
+		return (-1);
+
+	mdhp->n_cnt = mdhip->n_cnt;
+	for (i = 0; i < max_meds; i++) {
+		mdhp->n_lst[i].a_cnt = mdhip->n_lst[i].a_cnt;
+		if (mdhip->n_lst[i].a_cnt == 0)
+			continue;
+		for (j = 0; j < mdhip->n_lst[i].a_cnt; j++)
+			(void) strcpy(mdhp->n_lst[i].a_nm[j],
+			    mdhip->n_lst[i].a_nm[j]);
+	}
+	return (0);
+}
+
+int
+setup_med_cfg(
+	mdsetname_t		*sp,
+	mddb_config_t		*cp,
+	int			force,
+	md_error_t		*ep
+)
+{
+	md_set_desc		*sd;
+	int			i;
+	int			max_meds;
+
+	if (metaislocalset(sp))
+		return (0);
+
+	if ((sd = metaget_setdesc(sp, ep)) == NULL)
+		return (-1);
+
+	if (setup_med_transtab(ep))
+		return (-1);
+
+	if (meta_h2hi(&sd->sd_med, &cp->c_med, ep))
+		return (-1);
+
+	/* Make sure the ip addresses are current */
+	if (meta_med_hnm2ip(&cp->c_med, ep))
+		return (-1);
+
+	if (force)
+		return (0);
+
+	if ((max_meds = get_max_meds(ep)) == 0)
+		return (-1);
+
+	/* Make sure metamedd still running on host - only chk nodename */
+	for (i = 0; i < max_meds; i++) {
+		char		*hostname;
+		char		*hnm;
+
+		if (sd->sd_med.n_lst[i].a_cnt == 0)
+			continue;
+
+		hnm = sd->sd_med.n_lst[i].a_nm[0];
+
+		if (clnt_med_hostname(hnm, &hostname, ep))
+			return (mddserror(ep, MDE_DS_NOMEDONHOST, sp->setno,
+			    hnm, NULL, sp->setname));
+		Free(hostname);
+	}
+	return (0);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_med_err.c b/usr/src/lib/lvm/libmeta/common/meta_med_err.c
new file mode 100644
index 0000000000..6b83280f89
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_med_err.c
@@ -0,0 +1,97 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 1992-2003 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+/*
+ * Just in case we're not in a build environment, make sure that
+ * TEXT_DOMAIN gets set to something.
+ */
+#if !defined(TEXT_DOMAIN)
+#define	TEXT_DOMAIN "SYS_TEST"
+#endif
+
+#include <meta.h>
+#include <metamed.h>
+
+char *
+med_errnum_to_str(int errnum)
+{
+	switch (errnum) {
+	    case MDE_MED_NOERROR:
+		return (dgettext(TEXT_DOMAIN, "No Error"));
+	    case MDE_MED_HOSTNOMED:
+		return (dgettext(TEXT_DOMAIN,
+		    "mediator host has no mediator data for host"));
+	    case MDE_MED_DBNOTINIT:
+		return (dgettext(TEXT_DOMAIN,
+		    "mediator database is not initialized"));
+	    case MDE_MED_DBSZBAD:
+		return (dgettext(TEXT_DOMAIN,
+		    "mediator database size is not valid"));
+	    case MDE_MED_DBKEYADDFAIL:
+		return (dgettext(TEXT_DOMAIN,
+		    "unable to add key to mediator database"));
+	    case MDE_MED_DBKEYDELFAIL:
+		return (dgettext(TEXT_DOMAIN,
+		    "unable to delete key from mediator database"));
+	    case MDE_MED_DBHDRSZBAD:
+		return (dgettext(TEXT_DOMAIN,
+		    "mediator database header record size is not valid"));
+	    case MDE_MED_DBHDRMAGBAD:
+		return (dgettext(TEXT_DOMAIN,
+		    "mediator database header magic is not valid"));
+	    case MDE_MED_DBHDRREVBAD:
+		return (dgettext(TEXT_DOMAIN,
+		    "mediator database header revision is not valid"));
+	    case MDE_MED_DBHDRCKSBAD:
+		return (dgettext(TEXT_DOMAIN,
+		    "mediator database header checksum is not valid"));
+	    case MDE_MED_DBRECSZBAD:
+		return (dgettext(TEXT_DOMAIN,
+		    "mediator database record record size is not valid"));
+	    case MDE_MED_DBRECMAGBAD:
+		return (dgettext(TEXT_DOMAIN,
+		    "mediator database record magic is not valid"));
+	    case MDE_MED_DBRECREVBAD:
+		return (dgettext(TEXT_DOMAIN,
+		    "mediator database record revision is not valid"));
+	    case MDE_MED_DBRECCKSBAD:
+		return (dgettext(TEXT_DOMAIN,
+		    "mediator database record checksum is not valid"));
+	    case MDE_MED_DBRECOFFBAD:
+		return (dgettext(TEXT_DOMAIN,
+		    "mediator database record offset in not valid"));
+	    case MDE_MED_DBRECNOENT:
+		return (dgettext(TEXT_DOMAIN,
+		    "no matching mediator record found"));
+	    case MDE_MED_DBARGSMISMATCH:
+		return (dgettext(TEXT_DOMAIN, "set number in arguments "
+		    "different from set number in data"));
+	    default:
+		return (NULL);
+	}
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_mem.c b/usr/src/lib/lvm/libmeta/common/meta_mem.c
new file mode 100644
index 0000000000..d685f57c09
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_mem.c
@@ -0,0 +1,250 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 1992, 1993, 2000 by Sun Microsystems, Inc.
+ * All rights reserved.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include <meta.h>
+
+/*
+ * free
+ */
+#ifdef	_DEBUG_MALLOC_INC
+
+void
+_Free(
+	char	*file,
+	int	line,
+	void	*p
+)
+{
+	debug_free(file, line, p);
+}
+
+#else	/* ! _DEBUG_MALLOC_INC */
+
+void
+Free(
+	void	*p
+)
+{
+	free(p);
+}
+
+#endif	/* ! _DEBUG_MALLOC_INC */
+
+/*
+ * malloc
+ */
+#ifdef	_DEBUG_MALLOC_INC
+
+void *
+_Malloc(
+	char	*file,
+	int	line,
+	size_t	s
+)
+{
+	void *mem;
+
+	mem = debug_malloc(file, line, s);
+	if (mem == NULL) {
+		md_perror("");
+		md_exit(NULL, 1);
+	}
+	return (mem);
+}
+
+#else	/* ! _DEBUG_MALLOC_INC */
+
+void *
+Malloc(
+	size_t	s
+)
+{
+	void *mem;
+
+	if ((mem = malloc(s)) == NULL) {
+		md_perror("");
+		md_exit(NULL, 1);
+	}
+	return (mem);
+}
+
+#endif	/* ! _DEBUG_MALLOC_INC */
+
+/*
+ * zalloc
+ */
+#ifdef	_DEBUG_MALLOC_INC
+
+void *
+_Zalloc(
+	char	*file,
+	int	line,
+	size_t	s
+)
+{
+	return (memset(_Malloc(file, line, s), 0, s));
+}
+
+#else	/* ! _DEBUG_MALLOC_INC */
+
+void *
+Zalloc(
+	size_t	s
+)
+{
+	return (memset(Malloc(s), 0, s));
+}
+
+#endif	/* ! _DEBUG_MALLOC_INC */
+
+/*
+ * realloc
+ */
+#ifdef	_DEBUG_MALLOC_INC
+
+void *
+_Realloc(
+	char	*file,
+	int	line,
+	void	*p,
+	size_t	s
+)
+{
+	if (p == NULL)
+		p = debug_malloc(file, line, s);
+	else
+		p = debug_realloc(file, line, p, s);
+	if (p == NULL) {
+		md_perror("");
+		md_exit(NULL, 1);
+	}
+	return (p);
+}
+
+#else	/* ! _DEBUG_MALLOC_INC */
+
+void *
+Realloc(
+	void	*p,
+	size_t	s
+)
+{
+	if ((p = realloc(p, s)) == NULL) {
+		md_perror("");
+		md_exit(NULL, 1);
+	}
+	return (p);
+}
+
+#endif	/* ! _DEBUG_MALLOC_INC */
+
+/*
+ * calloc
+ */
+#ifdef	_DEBUG_MALLOC_INC
+
+void *
+_Calloc(
+	char	*file,
+	int	line,
+	size_t	n,
+	size_t	s
+)
+{
+	unsigned long total;
+
+	if (n == 0 || s == 0) {
+		total = 0;
+	} else {
+		total = (unsigned long)n * s;
+		/* check for overflow */
+		if (total / n != s)
+			return (NULL);
+	}
+	return (_Zalloc(file, line, total));
+}
+
+#else	/* ! _DEBUG_MALLOC_INC */
+
+void *
+Calloc(
+	size_t	n,
+	size_t	s
+)
+{
+	unsigned long total;
+
+	if (n == 0 || s == 0) {
+		total = 0;
+	} else {
+		total = (unsigned long)n * s;
+		/* check for overflow */
+		if (total / n != s)
+			return (NULL);
+	}
+	return (Zalloc(total));
+}
+
+#endif	/* ! _DEBUG_MALLOC_INC */
+
+/*
+ * strdup
+ */
+#ifdef	_DEBUG_MALLOC_INC
+
+char *
+_Strdup(
+	char	*file,
+	int	line,
+	char	*p
+)
+{
+	p = DBstrdup(file, line, p);
+	if (p == NULL) {
+		md_perror("");
+		md_exit(NULL, 1);
+	}
+	return (p);
+}
+
+#else	/* ! _DEBUG_MALLOC_INC */
+
+char *
+Strdup(
+	char	*p
+)
+{
+	if ((p = strdup(p)) == NULL) {
+		md_perror("");
+		md_exit(NULL, 1);
+	}
+	return (p);
+}
+
+#endif	/* ! _DEBUG_MALLOC_INC */
diff --git a/usr/src/lib/lvm/libmeta/common/meta_metad.c b/usr/src/lib/lvm/libmeta/common/meta_metad.c
new file mode 100644
index 0000000000..7588843f5c
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_metad.c
@@ -0,0 +1,4082 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+/*
+ * Just in case we're not in a build environment, make sure that
+ * TEXT_DOMAIN gets set to something.
+ */
+#if !defined(TEXT_DOMAIN)
+#define	TEXT_DOMAIN "SYS_TEST"
+#endif
+
+#include <meta.h>
+#include <metad.h>
+#include <devid.h>
+
+static md_setkey_t	*my_cl_sk = NULL;
+
+#define	CL_DEF_TMO	30L
+
+/*
+ * Convert an old style mddrivename_t into a new style
+ * mddrivename_t. Meant to be used *ONLY* by rpc.metad
+ */
+void
+meta_conv_drvname_old2new(
+	o_mddrivename_t		*v1_dp,
+	mddrivename_t		*v2_dp
+)
+{
+	int 		sliceno;
+	o_mdname_t	*v1_np;
+	mdname_t	*v2_np;
+
+	/* fields that haven't changed */
+	v2_dp->cname   = v1_dp->cname;
+	v2_dp->rname   = v1_dp->rname;
+	v2_dp->type    = v1_dp->type;
+	v2_dp->errnum  = v1_dp->errnum;
+
+	/* geometry information */
+	v2_dp->geom.ncyl  = v1_dp->geom.ncyl;
+	v2_dp->geom.nhead = v1_dp->geom.nhead;
+	v2_dp->geom.nsect = v1_dp->geom.nsect;
+	v2_dp->geom.rpm   = v1_dp->geom.rpm;
+	v2_dp->geom.write_reinstruct = v1_dp->geom.write_reinstruct;
+	v2_dp->geom.read_reinstruct  = v1_dp->geom.read_reinstruct;
+	v2_dp->geom.blk_sz = 0;
+
+	/* controller information */
+	v2_dp->cinfo = v1_dp->cinfo;
+
+	/* vtoc information */
+	v2_dp->vtoc.nparts    = v1_dp->vtoc.nparts;
+	v2_dp->vtoc.first_lba = 0;
+	v2_dp->vtoc.last_lba  = 0;
+	v2_dp->vtoc.lbasize   = 0;
+
+	for (sliceno = 0; sliceno < (MD_MAX_PARTS - 1); sliceno++) {
+		v2_dp->vtoc.parts[sliceno].start =
+		    (diskaddr_t)v1_dp->vtoc.parts[sliceno].start;
+		v2_dp->vtoc.parts[sliceno].size =
+		    (diskaddr_t)v1_dp->vtoc.parts[sliceno].size;
+		v2_dp->vtoc.parts[sliceno].tag =
+		    v1_dp->vtoc.parts[sliceno].tag;
+		v2_dp->vtoc.parts[sliceno].flag =
+		    v1_dp->vtoc.parts[sliceno].flag;
+		v2_dp->vtoc.parts[sliceno].label =
+		    (diskaddr_t)v1_dp->vtoc.parts[sliceno].label;
+	}
+
+	/* The new style vtoc has 17 partitions */
+	v2_dp->vtoc.parts[MD_MAX_PARTS - 1].start = 0;
+	v2_dp->vtoc.parts[MD_MAX_PARTS - 1].size  = 0;
+	v2_dp->vtoc.parts[MD_MAX_PARTS - 1].tag   = 0;
+	v2_dp->vtoc.parts[MD_MAX_PARTS - 1].flag  = 0;
+	v2_dp->vtoc.parts[MD_MAX_PARTS - 1].label = 0;
+
+	v2_dp->vtoc.typename = v1_dp->vtoc.typename;
+
+	/* partition information */
+	v2_dp->parts.parts_len = v1_dp->parts.parts_len;
+	for (sliceno = 0; sliceno < v1_dp->parts.parts_len; sliceno++) {
+		v1_np = &v1_dp->parts.parts_val[sliceno];
+		v2_np = &v2_dp->parts.parts_val[sliceno];
+
+		/*
+		 * We speculate that if cname for a particular
+		 * partition does not exist, the other fields
+		 * don't exist either. In such a case, we don't
+		 * need to do anything for that partition.
+		 */
+		if (v1_np->cname != NULL) {
+			v2_np->cname = v1_np->cname;
+			v2_np->bname = v1_np->bname;
+			v2_np->rname = v1_np->rname;
+			v2_np->devicesname = v1_np->devicesname;
+			v2_np->dev = meta_expldev(v1_np->dev);
+			v2_np->key = v1_np->key;
+			v2_np->end_blk = (diskaddr_t)v1_np->end_blk;
+			v2_np->start_blk = (diskaddr_t)v1_np->start_blk;
+		}
+		v2_np->drivenamep = v2_dp;
+	}
+
+	/* We don't care about the rest of the fields */
+	v2_dp->side_names = v1_dp->side_names;
+	v2_dp->side_names_key = v1_dp->side_names_key;
+	v2_dp->miscname = v1_dp->miscname;
+}
+
+/*
+ * Convert a new style mddrivename_t into an old style
+ * mddrivename_t. Meant to be used *ONLY* by rpc.metad
+ */
+void
+meta_conv_drvname_new2old(
+	o_mddrivename_t		*v1_dp,
+	mddrivename_t		*v2_dp
+)
+{
+	int 		sliceno;
+	o_mdname_t	*v1_np;
+	mdname_t	*v2_np;
+
+	/* fields that haven't changed */
+	v1_dp->cname   = v2_dp->cname;
+	v1_dp->rname   = v2_dp->rname;
+	v1_dp->type    = v2_dp->type;
+	v1_dp->errnum  = v2_dp->errnum;
+
+	/* geometry information */
+	v1_dp->geom.ncyl  = v2_dp->geom.ncyl;
+	v1_dp->geom.nhead = v2_dp->geom.nhead;
+	v1_dp->geom.nsect = v2_dp->geom.nsect;
+	v1_dp->geom.rpm   = v2_dp->geom.rpm;
+	v1_dp->geom.write_reinstruct = v2_dp->geom.write_reinstruct;
+	v1_dp->geom.read_reinstruct  = v2_dp->geom.read_reinstruct;
+
+	/* controller information */
+	v1_dp->cinfo = v2_dp->cinfo;
+
+	/* vtoc information */
+	v1_dp->vtoc.typename = v2_dp->vtoc.typename;
+	v1_dp->vtoc.nparts   = v2_dp->vtoc.nparts;
+
+	for (sliceno = 0; sliceno < (MD_MAX_PARTS - 1); sliceno++) {
+		v1_dp->vtoc.parts[sliceno].start =
+		    (daddr_t)v2_dp->vtoc.parts[sliceno].start;
+		v1_dp->vtoc.parts[sliceno].size  =
+		    (daddr_t)v2_dp->vtoc.parts[sliceno].size;
+		v1_dp->vtoc.parts[sliceno].tag   =
+		    v2_dp->vtoc.parts[sliceno].tag;
+		v1_dp->vtoc.parts[sliceno].flag  =
+		    v2_dp->vtoc.parts[sliceno].flag;
+		v1_dp->vtoc.parts[sliceno].label =
+		    (daddr_t)v2_dp->vtoc.parts[sliceno].label;
+	}
+
+	/* partition information */
+	v1_dp->parts.parts_len = v2_dp->parts.parts_len;
+
+	for (sliceno = 0; sliceno < v2_dp->parts.parts_len; sliceno++) {
+		v1_np = &v1_dp->parts.parts_val[sliceno];
+		v2_np = &v2_dp->parts.parts_val[sliceno];
+
+		/*
+		 * We speculate that if cname for a particular
+		 * partition does not exist then the rest of
+		 * the fields a partition don't exist either.
+		 * In such a case, we don't need to do anything
+		 * for that partition.
+		 */
+		if (v2_np->cname != NULL) {
+			v1_np->cname = v2_np->cname;
+			v1_np->bname = v2_np->bname;
+			v1_np->rname = v2_np->rname;
+			v1_np->devicesname = v2_np->devicesname;
+			v1_np->dev = meta_cmpldev(v2_np->dev);
+			v1_np->key = v2_np->key;
+			v1_np->end_blk = (daddr_t)v2_np->end_blk;
+			v1_np->start_blk = (daddr_t)v2_np->start_blk;
+		}
+		v1_np->drivenamep = v1_dp;
+	}
+
+	/* We don't care about the rest of the fields */
+	v1_dp->side_names = v2_dp->side_names;
+	v1_dp->side_names_key = v2_dp->side_names_key;
+	v1_dp->miscname = v2_dp->miscname;
+}
+
+/*
+ * Convert an old style md_drive_desc_t into a new style
+ * md_drive_desc_t. Meant to be used *ONLY* by rpc.metad
+ */
+void
+meta_conv_drvdesc_old2new(
+	o_md_drive_desc		*v1_dd,
+	md_drive_desc		*v2_dd
+)
+{
+	md_drive_desc	*dd;
+	o_md_drive_desc	*o_dd;
+
+	dd = v2_dd;
+
+	for (o_dd = v1_dd; o_dd != NULL; o_dd = o_dd->dd_next) {
+		dd->dd_ctime = o_dd->dd_ctime;
+		dd->dd_genid = o_dd->dd_genid;
+		dd->dd_flags = o_dd->dd_flags;
+		meta_conv_drvname_old2new(o_dd->dd_dnp, dd->dd_dnp);
+		dd->dd_dbcnt = o_dd->dd_dbcnt;
+		dd->dd_dbsize = o_dd->dd_dbsize;
+		dd = dd->dd_next;
+	}
+}
+
+/*
+ * Convert an new style md_drive_desc_t into a old style
+ * md_drive_desc_t. Meant to be used *ONLY* by rpc.metad
+ */
+void
+meta_conv_drvdesc_new2old(
+	o_md_drive_desc		*v1_dd,
+	md_drive_desc		*v2_dd
+)
+{
+	md_drive_desc	*dd;
+	o_md_drive_desc	*o_dd;
+
+	o_dd = v1_dd;
+
+	for (dd = v2_dd; dd != NULL; dd = dd->dd_next) {
+		o_dd->dd_ctime = dd->dd_ctime;
+		o_dd->dd_genid = dd->dd_genid;
+		o_dd->dd_flags = dd->dd_flags;
+		meta_conv_drvname_new2old(o_dd->dd_dnp, dd->dd_dnp);
+		o_dd->dd_dbcnt = dd->dd_dbcnt;
+		o_dd->dd_dbsize = dd->dd_dbsize;
+		o_dd = o_dd->dd_next;
+	}
+}
+
+/*
+ * Allocate memory for v1 drive descriptor
+ * depending upon the number of drives in the
+ * v2 drive descriptor
+ */
+void
+alloc_olddrvdesc(
+	o_md_drive_desc		**v1_dd,
+	md_drive_desc		*v2_dd
+)
+{
+	md_drive_desc	*dd;
+	o_md_drive_desc *new, *head;
+
+	head = NULL;
+
+	for (dd = v2_dd; dd != NULL; dd = dd->dd_next) {
+		new = Zalloc(sizeof (o_md_drive_desc));
+		new->dd_dnp = Zalloc(sizeof (o_mddrivename_t));
+		new->dd_dnp->parts.parts_val = Zalloc(sizeof (o_mdname_t) *
+		    dd->dd_dnp->parts.parts_len);
+		new->dd_next = head;
+		head = new;
+	}
+	*v1_dd = head;
+}
+
+/*
+ * Allocate memory for v2 drive descriptor
+ * depending upon the number of drives in the
+ * v1 drive descriptor
+ */
+void
+alloc_newdrvdesc(
+	o_md_drive_desc		*v1_dd,
+	md_drive_desc		**v2_dd
+)
+{
+	md_drive_desc	*new, *head;
+	o_md_drive_desc	*o_dd;
+
+	head = NULL;
+
+	for (o_dd = v1_dd; o_dd != NULL; o_dd = o_dd->dd_next) {
+		new = Zalloc(sizeof (md_drive_desc));
+		new->dd_dnp = Zalloc(sizeof (mddrivename_t));
+		new->dd_dnp->parts.parts_val = Zalloc(sizeof (mdname_t) *
+		    o_dd->dd_dnp->parts.parts_len);
+		new->dd_next = head;
+		head = new;
+	}
+	*v2_dd = head;
+}
+
+void
+free_olddrvdesc(
+	o_md_drive_desc		*v1_dd
+)
+{
+	o_md_drive_desc	*o_dd, *head;
+
+	head = v1_dd;
+
+	while (head != NULL) {
+		o_dd = head;
+		head = head->dd_next;
+		free(o_dd->dd_dnp->parts.parts_val);
+		free(o_dd->dd_dnp);
+		free(o_dd);
+	}
+}
+
+void
+free_newdrvdesc(
+	md_drive_desc		*v2_dd
+)
+{
+	md_drive_desc	*dd, *head;
+
+	head = v2_dd;
+
+	while (head != NULL) {
+		dd = head;
+		head = head->dd_next;
+		free(dd->dd_dnp->parts.parts_val);
+		free(dd->dd_dnp);
+		free(dd);
+	}
+}
+
+/*
+ * Return the device id for a given device
+ */
+char *
+meta_get_devid(
+	char	*rname
+)
+{
+	ddi_devid_t	devid;
+	int		fd;
+	char		*enc_devid, *dup_enc_devid = NULL;
+
+	if ((fd = open(rname, O_RDWR | O_NDELAY, 0)) < 0)
+		return (NULL);
+
+	if (devid_get(fd, &devid) == -1) {
+		(void) close(fd);
+		return (NULL);
+	}
+	(void) close(fd);
+
+	enc_devid = devid_str_encode(devid, NULL);
+	devid_free(devid);
+
+	if (enc_devid != NULL) {
+		dup_enc_devid = strdup(enc_devid);
+		devid_str_free(enc_devid);
+	}
+
+	return (dup_enc_devid);
+}
+
+/*
+ * Add side names for the diskset drive records
+ * NOTE: these go into the local set's namespace.
+ */
+int
+clnt_add_drv_sidenms(
+	char			*hostname,
+	char			*this_host,
+	mdsetname_t		*sp,
+	md_set_desc		*sd,
+	int			node_c,
+	char			**node_v,
+	md_error_t		*ep
+)
+{
+	CLIENT				*clntp;
+	mdrpc_drv_sidenm_args		v1_args;
+	mdrpc_drv_sidenm_2_args		v2_args;
+	mdrpc_drv_sidenm_2_args_r1	*v21_args;
+	mdrpc_generic_res		res;
+	int				rval;
+	int				version;
+	int				i, j;
+
+	/* initialize */
+	mdclrerror(ep);
+	(void) memset(&v1_args, 0, sizeof (v1_args));
+	(void) memset(&v2_args, 0, sizeof (v2_args));
+	(void) memset(&res, 0, sizeof (res));
+
+	/* build args */
+	v2_args.rev = MD_METAD_ARGS_REV_1;
+	v21_args = &v2_args.mdrpc_drv_sidenm_2_args_u.rev1;
+	v21_args->hostname = this_host;
+	v21_args->cl_sk = cl_get_setkey(sp->setno, sp->setname);
+	v21_args->sp = sp;
+	v21_args->sd = sd;
+	v21_args->node_v.node_v_len = node_c;
+	v21_args->node_v.node_v_val = node_v;
+
+	/* do it */
+	if (md_in_daemon && strcmp(mynode(), hostname) == 0) {
+		int	bool;
+
+		/*
+		 * If the server is local, we call the v2 procedure
+		 */
+		bool = mdrpc_add_drv_sidenms_2_svc(&v2_args, &res, NULL);
+		assert(bool == TRUE);
+		(void) mdstealerror(ep, &res.status);
+	} else {
+		if ((clntp = metarpcopen(hostname, CL_LONG_TMO, ep)) == NULL)
+			return (-1);
+
+		/*
+		 * Check the client handle for the version
+		 * and invoke the appropriate version of the
+		 * remote procedure
+		 */
+		CLNT_CONTROL(clntp, CLGET_VERS, (char *)&version);
+
+		if (version == METAD_VERSION) {	/* version 1 */
+
+			v1_args.sd = Zalloc(sizeof (o_md_set_desc));
+			alloc_olddrvdesc(&v1_args.sd->sd_drvs, sd->sd_drvs);
+
+			/* build args */
+			v1_args.hostname = this_host;
+			v1_args.cl_sk = cl_get_setkey(sp->setno, sp->setname);
+			v1_args.sp = sp;
+			/* set descriptor */
+			v1_args.sd->sd_ctime = sd->sd_ctime;
+			v1_args.sd->sd_genid = sd->sd_genid;
+			v1_args.sd->sd_setno = sd->sd_setno;
+			v1_args.sd->sd_flags = sd->sd_flags;
+			for (i = 0; i < MD_MAXSIDES; i++) {
+				v1_args.sd->sd_isown[i] = sd->sd_isown[i];
+
+				for (j = 0; j < MD_MAX_NODENAME_PLUS_1; j ++)
+					v1_args.sd->sd_nodes[i][j] =
+					    sd->sd_nodes[i][j];
+			}
+			v1_args.sd->sd_med = sd->sd_med;
+			meta_conv_drvdesc_new2old(v1_args.sd->sd_drvs,
+			    sd->sd_drvs);
+			v1_args.node_v.node_v_len = node_c;
+			v1_args.node_v.node_v_val = node_v;
+
+			rval = mdrpc_add_drv_sidenms_1(&v1_args, &res, clntp);
+
+			free_olddrvdesc(v1_args.sd->sd_drvs);
+			free(v1_args.sd);
+
+			if (rval != RPC_SUCCESS)
+				(void) mdrpcerror(ep, clntp, hostname,
+				    dgettext(TEXT_DOMAIN,
+				    "metad add drive sidenames"));
+			else
+				(void) mdstealerror(ep, &res.status);
+		} else {			/* version 2 */
+			rval = mdrpc_add_drv_sidenms_2(&v2_args, &res, clntp);
+
+			if (rval != RPC_SUCCESS)
+				(void) mdrpcerror(ep, clntp, hostname,
+				    dgettext(TEXT_DOMAIN,
+				    "metad add drive sidenames"));
+			else
+				(void) mdstealerror(ep, &res.status);
+		}
+
+		metarpcclose(clntp);
+	}
+
+	xdr_free(xdr_mdrpc_generic_res, (char *)&res);
+
+	if (! mdisok(ep))
+		return (-1);
+
+	return (0);
+}
+
+/*
+ * Add drives to disksets.
+ */
+int
+clnt_adddrvs(
+	char			*hostname,
+	mdsetname_t		*sp,
+	md_drive_desc		*dd,
+	md_timeval32_t		timestamp,
+	ulong_t			genid,
+	md_error_t		*ep
+)
+{
+	CLIENT			*clntp;
+	mdrpc_drives_args	v1_args;
+	mdrpc_drives_2_args	v2_args;
+	mdrpc_drives_2_args_r1	*v21_args;
+	mdrpc_generic_res	res;
+	int			rval;
+	int			version;
+
+	/* initialize */
+	mdclrerror(ep);
+	(void) memset(&v1_args, 0, sizeof (v1_args));
+	(void) memset(&v2_args, 0, sizeof (v2_args));
+	(void) memset(&res, 0, sizeof (res));
+
+	/* build args */
+	v2_args.rev = MD_METAD_ARGS_REV_1;
+	v21_args = &v2_args.mdrpc_drives_2_args_u.rev1;
+	v21_args->sp = sp;
+	v21_args->cl_sk = cl_get_setkey(sp->setno, sp->setname);
+	v21_args->drivedescs = dd;
+	v21_args->timestamp = timestamp;
+	v21_args->genid = genid;
+
+	/* do it */
+	if (md_in_daemon && strcmp(mynode(), hostname) == 0) {
+		int	bool;
+
+		/*
+		 * If the server is local, we call the v2 procedure
+		 */
+		bool = mdrpc_adddrvs_2_svc(&v2_args, &res, NULL);
+		assert(bool == TRUE);
+		(void) mdstealerror(ep, &res.status);
+	} else {
+		if ((clntp = metarpcopen(hostname, CL_LONG_TMO, ep)) == NULL)
+			return (-1);
+
+		/*
+		 * Check the client handle for the version
+		 * and invoke the appropriate version of the
+		 * remote procedure
+		 */
+		CLNT_CONTROL(clntp, CLGET_VERS, (char *)&version);
+
+		if (version == METAD_VERSION) {	/* version 1 */
+
+			alloc_olddrvdesc(&v1_args.drivedescs, dd);
+
+			/* build args */
+			v1_args.sp = sp;
+			v1_args.cl_sk = cl_get_setkey(sp->setno, sp->setname);
+			meta_conv_drvdesc_new2old(v1_args.drivedescs, dd);
+			v1_args.timestamp = timestamp;
+			v1_args.genid = genid;
+
+			rval = mdrpc_adddrvs_1(&v1_args, &res, clntp);
+
+			free_olddrvdesc(v1_args.drivedescs);
+
+			if (rval != RPC_SUCCESS)
+				(void) mdrpcerror(ep, clntp, hostname,
+				    dgettext(TEXT_DOMAIN, "metad add drives"));
+			else
+				(void) mdstealerror(ep, &res.status);
+		} else {			/* version 2 */
+			rval = mdrpc_adddrvs_2(&v2_args, &res, clntp);
+
+			if (rval != RPC_SUCCESS)
+				(void) mdrpcerror(ep, clntp, hostname,
+				    dgettext(TEXT_DOMAIN, "metad add drives"));
+			else
+				(void) mdstealerror(ep, &res.status);
+		}
+
+		metarpcclose(clntp);
+	}
+
+	xdr_free(xdr_mdrpc_generic_res, (char *)&res);
+
+	if (! mdisok(ep))
+		return (-1);
+
+	return (0);
+}
+
+/*
+ * Add hosts to disksets.
+ */
+int
+clnt_addhosts(
+	char			*hostname,
+	mdsetname_t		*sp,
+	int			node_c,
+	char			**node_v,
+	md_error_t		*ep
+)
+{
+	CLIENT			*clntp;
+	mdrpc_host_args		*args;
+	mdrpc_host_2_args	v2_args;
+	mdrpc_generic_res	res;
+	int			version;
+
+	/* initialize */
+	mdclrerror(ep);
+	(void) memset(&v2_args, 0, sizeof (v2_args));
+	(void) memset(&res, 0, sizeof (res));
+
+	/* build args */
+	v2_args.rev = MD_METAD_ARGS_REV_1;
+	args = &v2_args.mdrpc_host_2_args_u.rev1;
+	args->sp = sp;
+	args->cl_sk = cl_get_setkey(sp->setno, sp->setname);
+	args->hosts.hosts_len = node_c;
+	args->hosts.hosts_val = node_v;
+
+	/* do it */
+	if (md_in_daemon && strcmp(mynode(), hostname) == 0) {
+		int bool;
+		bool = mdrpc_addhosts_2_svc(&v2_args, &res, NULL);
+		assert(bool == TRUE);
+		(void) mdstealerror(ep, &res.status);
+	} else {
+		if ((clntp = metarpcopen(hostname, CL_LONG_TMO, ep)) == NULL)
+			return (-1);
+
+		/*
+		 * Check the client handle for the version and invoke
+		 * the appropriate version of the remote procedure
+		 */
+		CLNT_CONTROL(clntp, CLGET_VERS, (char *)&version);
+
+		if (version == METAD_VERSION) {	/* version 1 */
+			if (mdrpc_addhosts_1(args, &res, clntp) != RPC_SUCCESS)
+				(void) mdrpcerror(ep, clntp, hostname,
+				dgettext(TEXT_DOMAIN, "metad add hosts"));
+			else
+				(void) mdstealerror(ep, &res.status);
+		} else {
+			if (mdrpc_addhosts_2(&v2_args, &res, clntp) !=
+			    RPC_SUCCESS)
+				(void) mdrpcerror(ep, clntp, hostname,
+				dgettext(TEXT_DOMAIN, "metad add hosts"));
+			else
+				(void) mdstealerror(ep, &res.status);
+		}
+
+		metarpcclose(clntp);
+	}
+
+	xdr_free(xdr_mdrpc_generic_res, (char *)&res);
+
+	if (! mdisok(ep))
+		return (-1);
+
+	return (0);
+}
+
+/*
+ * Create disksets.
+ */
+int
+clnt_createset(
+	char			*hostname,
+	mdsetname_t		*sp,
+	md_node_nm_arr_t	nodes,
+	md_timeval32_t		timestamp,
+	ulong_t			genid,
+	md_error_t		*ep
+)
+{
+	CLIENT			*clntp;
+	mdrpc_createset_args	*args;
+	mdrpc_createset_2_args	v2_args;
+	mdrpc_generic_res	res;
+	int			i;
+	int			version;
+
+	/* initialize */
+	mdclrerror(ep);
+	(void) memset(&v2_args, 0, sizeof (v2_args));
+	(void) memset(&res, 0, sizeof (res));
+
+	/* build args */
+	v2_args.rev = MD_METAD_ARGS_REV_1;
+	args = &v2_args.mdrpc_createset_2_args_u.rev1;
+	args->sp = sp;
+	args->cl_sk = cl_get_setkey(sp->setno, sp->setname);
+	args->timestamp = timestamp;
+	args->genid = genid;
+	for (i = 0; i < MD_MAXSIDES; i++)
+		(void) strcpy(args->nodes[i], nodes[i]);
+
+	/* do it */
+	if (md_in_daemon && strcmp(mynode(), hostname) == 0) {
+		int	bool;
+		bool = mdrpc_createset_2_svc(&v2_args, &res, NULL);
+		assert(bool == TRUE);
+		(void) mdstealerror(ep, &res.status);
+	} else {
+		if ((clntp = metarpcopen(hostname, CL_LONG_TMO, ep)) == NULL)
+			return (-1);
+
+		/*
+		 * Check the client handle for the version and invoke
+		 * the appropriate version of the remote procedure
+		 */
+		CLNT_CONTROL(clntp, CLGET_VERS, (char *)&version);
+
+		if (version == METAD_VERSION) {	/* version 1 */
+			if (mdrpc_createset_1(args, &res, clntp) !=
+			    RPC_SUCCESS)
+				(void) mdrpcerror(ep, clntp, hostname,
+				dgettext(TEXT_DOMAIN, "metad create set"));
+			else
+				(void) mdstealerror(ep, &res.status);
+		} else {
+			if (mdrpc_createset_2(&v2_args, &res, clntp) !=
+			    RPC_SUCCESS)
+				(void) mdrpcerror(ep, clntp, hostname,
+				dgettext(TEXT_DOMAIN, "metad create set"));
+			else
+				(void) mdstealerror(ep, &res.status);
+		}
+
+		metarpcclose(clntp);
+	}
+
+	xdr_free(xdr_mdrpc_generic_res, (char *)&res);
+
+	if (! mdisok(ep))
+		return (-1);
+
+	return (0);
+}
+
+/*
+ * Create MN disksets.
+ */
+int
+clnt_mncreateset(
+	char			*hostname,
+	mdsetname_t		*sp,
+	md_mnnode_desc		*nodelist,
+	md_timeval32_t		timestamp,
+	ulong_t			genid,
+	md_node_nm_t		master_nodenm,
+	int			master_nodeid,
+	md_error_t		*ep
+)
+{
+	CLIENT			*clntp;
+	mdrpc_mncreateset_args	*args;
+	mdrpc_mncreateset_2_args v2_args;
+	mdrpc_generic_res	res;
+	int			version;
+
+	/* initialize */
+	mdclrerror(ep);
+	(void) memset(&v2_args, 0, sizeof (v2_args));
+	(void) memset(&res, 0, sizeof (res));
+
+	/* build args */
+	v2_args.rev = MD_METAD_ARGS_REV_1;
+	args = &v2_args.mdrpc_mncreateset_2_args_u.rev1;
+	args->sp = sp;
+	args->cl_sk = cl_get_setkey(sp->setno, sp->setname);
+	args->timestamp = timestamp;
+	args->genid = genid;
+	(void) strlcpy(args->master_nodenm, master_nodenm, MD_MAX_NODENAME);
+	args->master_nodeid = master_nodeid;
+	args->nodelist = nodelist;
+
+	/* do it */
+	if (md_in_daemon && strcmp(mynode(), hostname) == 0) {
+		int	bool;
+		bool = mdrpc_mncreateset_2_svc(&v2_args, &res, NULL);
+		assert(bool == TRUE);
+		(void) mdstealerror(ep, &res.status);
+	} else {
+		if ((clntp = metarpcopen(hostname, CL_LONG_TMO, ep)) == NULL)
+			return (-1);
+
+		/*
+		 * Check the client handle for the version
+		 */
+		CLNT_CONTROL(clntp, CLGET_VERS, (char *)&version);
+
+		/*
+		 * If the client is version 1, return error
+		 * otherwise, make the remote procedure call.
+		 */
+		if (version == METAD_VERSION) { /* version 1 */
+			(void) mddserror(ep, MDE_DS_RPCVERSMISMATCH,
+			    sp->setno, hostname, NULL, sp->setname);
+			metarpcclose(clntp);
+			return (-1);
+		} else {
+			if (mdrpc_mncreateset_2(&v2_args, &res, clntp)
+							!= RPC_SUCCESS)
+				(void) mdrpcerror(ep, clntp, hostname,
+				dgettext(TEXT_DOMAIN, "metad mncreate set"));
+			else
+				(void) mdstealerror(ep, &res.status);
+		}
+
+		metarpcclose(clntp);
+	}
+
+	xdr_free(xdr_mdrpc_generic_res, (char *)&res);
+
+	if (! mdisok(ep))
+		return (-1);
+
+	return (0);
+}
+
+/*
+ * Join MN set
+ */
+int
+clnt_joinset(
+	char			*hostname,
+	mdsetname_t		*sp,
+	int			flags,
+	md_error_t		*ep
+)
+{
+	CLIENT			*clntp;
+	mdrpc_sp_flags_args	*args;
+	mdrpc_sp_flags_2_args	v2_args;
+	mdrpc_generic_res	res;
+	int			version;
+
+	/* initialize */
+	mdclrerror(ep);
+	(void) memset(&v2_args, 0, sizeof (v2_args));
+	(void) memset(&res, 0, sizeof (res));
+
+	/* build args */
+	v2_args.rev = MD_METAD_ARGS_REV_1;
+	args = &v2_args.mdrpc_sp_flags_2_args_u.rev1;
+	args->sp = sp;
+	args->flags = flags;
+	args->cl_sk = cl_get_setkey(sp->setno, sp->setname);
+
+	/* do it */
+	if (md_in_daemon && strcmp(mynode(), hostname) == 0) {
+		int	bool;
+		bool = mdrpc_joinset_2_svc(&v2_args, &res, NULL);
+		assert(bool == TRUE);
+		(void) mdstealerror(ep, &res.status);
+	} else {
+		if ((clntp = metarpcopen(hostname, CL_LONG_TMO, ep)) == NULL)
+			return (-1);
+
+		/*
+		 * Check the client handle for the version
+		 */
+		CLNT_CONTROL(clntp, CLGET_VERS, (char *)&version);
+
+		/*
+		 * If the client is version 1, return error
+		 * otherwise, make the remote procedure call.
+		 */
+		if (version == METAD_VERSION) { /* version 1 */
+			(void) mddserror(ep, MDE_DS_RPCVERSMISMATCH,
+			    sp->setno, hostname, NULL, sp->setname);
+			metarpcclose(clntp);
+			return (-1);
+		} else {
+			if (mdrpc_joinset_2(&v2_args, &res, clntp)
+							!= RPC_SUCCESS)
+				(void) mdrpcerror(ep, clntp, hostname,
+				    dgettext(TEXT_DOMAIN, "metad join set"));
+			else
+				(void) mdstealerror(ep, &res.status);
+		}
+
+		metarpcclose(clntp);
+	}
+
+	xdr_free(xdr_mdrpc_generic_res, (char *)&res);
+
+	if (! mdisok(ep))
+		return (-1);
+
+	return (0);
+}
+
+/*
+ * Withdraw from MN set
+ */
+int
+clnt_withdrawset(
+	char			*hostname,
+	mdsetname_t		*sp,
+	md_error_t		*ep
+)
+{
+	CLIENT			*clntp;
+	mdrpc_sp_args		*args;
+	mdrpc_sp_2_args		v2_args;
+	mdrpc_generic_res	res;
+	int			version;
+
+	/* initialize */
+	mdclrerror(ep);
+	(void) memset(&v2_args, 0, sizeof (v2_args));
+	(void) memset(&res, 0, sizeof (res));
+
+	/* build args */
+	v2_args.rev = MD_METAD_ARGS_REV_1;
+	args = &v2_args.mdrpc_sp_2_args_u.rev1;
+	args->sp = sp;
+	args->cl_sk = cl_get_setkey(sp->setno, sp->setname);
+
+	/* do it */
+	if (md_in_daemon && strcmp(mynode(), hostname) == 0) {
+		int	bool;
+		bool = mdrpc_withdrawset_2_svc(&v2_args, &res, NULL);
+		assert(bool == TRUE);
+		(void) mdstealerror(ep, &res.status);
+	} else {
+		if ((clntp = metarpcopen(hostname, CL_LONG_TMO, ep)) == NULL)
+			return (-1);
+
+		/*
+		 * Check the client handle for the version
+		 */
+		CLNT_CONTROL(clntp, CLGET_VERS, (char *)&version);
+
+		/*
+		 * If the client is version 1, return error
+		 * otherwise, make the remote procedure call.
+		 */
+		if (version == METAD_VERSION) { /* version 1 */
+			(void) mddserror(ep, MDE_DS_RPCVERSMISMATCH,
+			    sp->setno, hostname, NULL, sp->setname);
+			metarpcclose(clntp);
+			return (-1);
+		} else {
+			if (mdrpc_withdrawset_2(&v2_args, &res, clntp)
+							!= RPC_SUCCESS)
+				(void) mdrpcerror(ep, clntp, hostname,
+				dgettext(TEXT_DOMAIN,
+				    "metad withdraw set"));
+			else
+				(void) mdstealerror(ep, &res.status);
+		}
+
+		metarpcclose(clntp);
+	}
+
+	xdr_free(xdr_mdrpc_generic_res, (char *)&res);
+
+	if (! mdisok(ep))
+		return (-1);
+
+	return (0);
+}
+
+/*
+ * Delete side names for the diskset drive records
+ * NOTE: these are removed from the local set's namespace.
+ */
+int
+clnt_del_drv_sidenms(
+	char			*hostname,
+	mdsetname_t		*sp,
+	md_error_t		*ep
+)
+{
+	CLIENT			*clntp;
+	mdrpc_sp_args		*args;
+	mdrpc_sp_2_args		v2_args;
+	mdrpc_generic_res	res;
+	int			version;
+
+	/* initialize */
+	mdclrerror(ep);
+	(void) memset(&v2_args, 0, sizeof (v2_args));
+	(void) memset(&res, 0, sizeof (res));
+
+	/* build args */
+	v2_args.rev = MD_METAD_ARGS_REV_1;
+	args = &v2_args.mdrpc_sp_2_args_u.rev1;
+	args->sp = sp;
+	args->cl_sk = cl_get_setkey(sp->setno, sp->setname);
+
+	/* do it */
+	if (md_in_daemon && strcmp(mynode(), hostname) == 0) {
+		int	bool;
+		bool = mdrpc_del_drv_sidenms_2_svc(&v2_args, &res, NULL);
+		assert(bool == TRUE);
+		(void) mdstealerror(ep, &res.status);
+	} else {
+		if ((clntp = metarpcopen(hostname, CL_LONG_TMO, ep)) == NULL)
+			return (-1);
+
+		if (metaget_setdesc(sp, ep) == NULL) {
+			if (! mdisok(ep))
+				return (-1);
+			mdclrerror(ep);
+		}
+
+		/*
+		 * Check the client handle for the version and invoke
+		 * the appropriate version of the remote procedure
+		 */
+		CLNT_CONTROL(clntp, CLGET_VERS, (char *)&version);
+
+		if (version == METAD_VERSION) {	/* version 1 */
+			if (mdrpc_del_drv_sidenms_1(args, &res, clntp) !=
+			    RPC_SUCCESS)
+				(void) mdrpcerror(ep, clntp, hostname,
+				dgettext(TEXT_DOMAIN,
+				    "metad delete drive sidenames"));
+			else
+				(void) mdstealerror(ep, &res.status);
+		} else {
+			if (mdrpc_del_drv_sidenms_2(&v2_args, &res, clntp) !=
+			    RPC_SUCCESS)
+				(void) mdrpcerror(ep, clntp, hostname,
+				dgettext(TEXT_DOMAIN,
+				    "metad delete drive sidenames"));
+			else
+				(void) mdstealerror(ep, &res.status);
+		}
+
+		metarpcclose(clntp);
+	}
+
+	xdr_free(xdr_mdrpc_generic_res, (char *)&res);
+
+	if (! mdisok(ep))
+		return (-1);
+
+	return (0);
+}
+
+/*
+ * delete drives from the set
+ */
+int
+clnt_deldrvs(
+	char			*hostname,
+	mdsetname_t		*sp,
+	md_drive_desc		*dd,
+	md_error_t		*ep
+)
+{
+	CLIENT			*clntp;
+	mdrpc_drives_args	v1_args;
+	mdrpc_drives_2_args	v2_args;
+	mdrpc_drives_2_args_r1	*v21_args;
+	mdrpc_generic_res	res;
+	int			rval;
+	int			version;
+
+	/* initialize */
+	mdclrerror(ep);
+	(void) memset(&v1_args, 0, sizeof (v1_args));
+	(void) memset(&v2_args, 0, sizeof (v2_args));
+	(void) memset(&res, 0, sizeof (res));
+
+	/* build args */
+	v2_args.rev = MD_METAD_ARGS_REV_1;
+	v21_args = &v2_args.mdrpc_drives_2_args_u.rev1;
+	v21_args->sp = sp;
+	v21_args->cl_sk = cl_get_setkey(sp->setno, sp->setname);
+	v21_args->drivedescs = dd;
+
+	/* do it */
+	if (md_in_daemon && strcmp(mynode(), hostname) == 0) {
+		int	bool;
+
+		/*
+		 * If the server is local, we call the v2 procedure
+		 */
+		bool = mdrpc_deldrvs_2_svc(&v2_args, &res, NULL);
+		assert(bool == TRUE);
+		(void) mdstealerror(ep, &res.status);
+	} else {
+		if ((clntp = metarpcopen(hostname, CL_LONG_TMO, ep)) == NULL)
+			return (-1);
+
+		/*
+		 * Check the client handle for the version
+		 * and invoke the appropriate version of the
+		 * remote procedure
+		 */
+		CLNT_CONTROL(clntp, CLGET_VERS, (char *)&version);
+
+		if (version == METAD_VERSION) {	/* version 1 */
+
+			alloc_olddrvdesc(&v1_args.drivedescs, dd);
+
+			/* build args */
+			v1_args.sp = sp;
+			v1_args.cl_sk = cl_get_setkey(sp->setno, sp->setname);
+			meta_conv_drvdesc_new2old(v1_args.drivedescs, dd);
+
+			rval = mdrpc_deldrvs_1(&v1_args, &res, clntp);
+
+			free_olddrvdesc(v1_args.drivedescs);
+
+			if (rval != RPC_SUCCESS)
+				(void) mdrpcerror(ep, clntp, hostname,
+				    dgettext(TEXT_DOMAIN,
+				    "metad delete drives"));
+			else
+				(void) mdstealerror(ep, &res.status);
+		} else {			/* version 2 */
+			rval = mdrpc_deldrvs_2(&v2_args, &res, clntp);
+
+			if (rval != RPC_SUCCESS)
+				(void) mdrpcerror(ep, clntp, hostname,
+				    dgettext(TEXT_DOMAIN,
+				    "metad delete drives"));
+			else
+				(void) mdstealerror(ep, &res.status);
+		}
+
+		metarpcclose(clntp);
+	}
+
+	xdr_free(xdr_mdrpc_generic_res, (char *)&res);
+
+	if (! mdisok(ep))
+		return (-1);
+
+	return (0);
+}
+
+/*
+ * delete host(s) from a set.
+ */
+int
+clnt_delhosts(
+	char			*hostname,
+	mdsetname_t		*sp,
+	int			node_c,
+	char			**node_v,
+	md_error_t		*ep
+)
+{
+	CLIENT			*clntp;
+	mdrpc_host_args		*args;
+	mdrpc_host_2_args	v2_args;
+	mdrpc_generic_res	res;
+	int			version;
+
+	/* initialize */
+	mdclrerror(ep);
+	(void) memset(&v2_args, 0, sizeof (v2_args));
+	(void) memset(&res, 0, sizeof (res));
+
+	/* build args */
+	v2_args.rev = MD_METAD_ARGS_REV_1;
+	args = &v2_args.mdrpc_host_2_args_u.rev1;
+	args->sp = sp;
+	args->cl_sk = cl_get_setkey(sp->setno, sp->setname);
+	args->hosts.hosts_len = node_c;
+	args->hosts.hosts_val = node_v;
+
+	/* do it */
+	if (md_in_daemon && strcmp(mynode(), hostname) == 0) {
+		int	bool;
+		bool = mdrpc_delhosts_2_svc(&v2_args, &res, NULL);
+		assert(bool == TRUE);
+		(void) mdstealerror(ep, &res.status);
+	} else {
+		if ((clntp = metarpcopen(hostname, CL_LONG_TMO, ep)) == NULL)
+			return (-1);
+
+		/*
+		 * Check the client handle for the version
+		 * and invoke the appropriate version of the
+		 * remote procedure
+		 */
+		CLNT_CONTROL(clntp, CLGET_VERS, (char *)&version);
+
+		if (version == METAD_VERSION) {	/* version 1 */
+			if (mdrpc_delhosts_1(args, &res, clntp) != RPC_SUCCESS)
+				(void) mdrpcerror(ep, clntp, hostname,
+				dgettext(TEXT_DOMAIN, "metad delete hosts"));
+			else
+				(void) mdstealerror(ep, &res.status);
+		} else {
+			if (mdrpc_delhosts_2(&v2_args, &res, clntp) !=
+			    RPC_SUCCESS)
+				(void) mdrpcerror(ep, clntp, hostname,
+				dgettext(TEXT_DOMAIN, "metad delete hosts"));
+			else
+				(void) mdstealerror(ep, &res.status);
+		}
+
+		metarpcclose(clntp);
+	}
+
+	xdr_free(xdr_mdrpc_generic_res, (char *)&res);
+
+	if (! mdisok(ep))
+		return (-1);
+
+	return (0);
+}
+
+/*
+ * Delete diskset.
+ */
+int
+clnt_delset(
+	char			*hostname,
+	mdsetname_t		*sp,
+	md_error_t		*ep
+)
+{
+	CLIENT			*clntp;
+	mdrpc_sp_args		*args;
+	mdrpc_sp_2_args		v2_args;
+	mdrpc_generic_res	res;
+	int			version;
+
+	/* initialize */
+	mdclrerror(ep);
+	(void) memset(&v2_args, 0, sizeof (v2_args));
+	(void) memset(&res, 0, sizeof (res));
+
+	/* build args */
+	v2_args.rev = MD_METAD_ARGS_REV_1;
+	args = &v2_args.mdrpc_sp_2_args_u.rev1;
+	args->sp = sp;
+	args->cl_sk = cl_get_setkey(sp->setno, sp->setname);
+
+	/* do it */
+	if (md_in_daemon && strcmp(mynode(), hostname) == 0) {
+		int	bool;
+		bool = mdrpc_delset_2_svc(&v2_args, &res, NULL);
+		assert(bool == TRUE);
+		(void) mdstealerror(ep, &res.status);
+	} else {
+		if ((clntp = metarpcopen(hostname, CL_LONG_TMO, ep)) == NULL)
+			return (-1);
+
+		/*
+		 * Check the client handle for the version
+		 * and invoke the appropriate version of the
+		 * remote procedure
+		 */
+		CLNT_CONTROL(clntp, CLGET_VERS, (char *)&version);
+
+		if (version == METAD_VERSION) {	/* version 1 */
+			if (mdrpc_delset_1(args, &res, clntp) != RPC_SUCCESS)
+				(void) mdrpcerror(ep, clntp, hostname,
+				dgettext(TEXT_DOMAIN, "metad delete set"));
+			else
+				(void) mdstealerror(ep, &res.status);
+		} else {
+			if (mdrpc_delset_2(&v2_args, &res, clntp) !=
+			    RPC_SUCCESS)
+				(void) mdrpcerror(ep, clntp, hostname,
+				dgettext(TEXT_DOMAIN, "metad delete set"));
+			else
+				(void) mdstealerror(ep, &res.status);
+		}
+
+		metarpcclose(clntp);
+	}
+
+	xdr_free(xdr_mdrpc_generic_res, (char *)&res);
+
+	if (! mdisok(ep))
+		return (-1);
+
+	return (0);
+}
+
+/*
+ * return remote device info
+ */
+int
+clnt_devinfo(
+	char			*hostname,
+	mdsetname_t		*sp,
+	mddrivename_t		*dp,
+	md_dev64_t		*ret_dev,
+	time_t			*ret_timestamp,
+	md_error_t		*ep
+)
+{
+	CLIENT			*clntp;
+	mdrpc_devinfo_args	v1_args;
+	mdrpc_devinfo_2_args	v2_args;
+	mdrpc_devinfo_2_args_r1	*v21_args;
+	mdrpc_devinfo_res	v1_res;
+	mdrpc_devinfo_2_res	v2_res;
+	int			rval, version;
+
+	/* initialize */
+	mdclrerror(ep);
+	(void) memset(&v1_args, 0, sizeof (v1_args));
+	(void) memset(&v2_args, 0, sizeof (v2_args));
+	(void) memset(&v1_res,  0, sizeof (v1_res));
+	(void) memset(&v2_res, 	0, sizeof (v2_res));
+
+	/* build args */
+	v2_args.rev = MD_METAD_ARGS_REV_1;
+	v21_args = &v2_args.mdrpc_devinfo_2_args_u.rev1;
+	v21_args->sp = sp;
+	v21_args->cl_sk = cl_get_setkey(sp->setno, sp->setname);
+	v21_args->drivenamep = dp;
+
+	/* do it */
+	if (md_in_daemon && strcmp(mynode(), hostname) == 0) {
+		int	bool;
+
+		/*
+		 * If the server is local, we call the v2 procedure.
+		 */
+		bool = mdrpc_devinfo_2_svc(&v2_args, &v2_res, NULL);
+		assert(bool == TRUE);
+		(void) mdstealerror(ep, &v1_res.status);
+	} else {
+		if ((clntp = metarpcopen(hostname, CL_LONG_TMO, ep)) == NULL)
+			return (-1);
+
+		/*
+		 * Check the client handle for the version
+		 * and invoke the appropriate version of
+		 * the remote procedure.
+		 */
+		CLNT_CONTROL(clntp, CLGET_VERS, (char *)&version);
+
+		if (version == METAD_VERSION) {	/* version 1 */
+			v1_args.drivenamep =
+			    Zalloc(sizeof (o_mddrivename_t));
+			v1_args.drivenamep->parts.parts_val =
+			    Zalloc((sizeof (o_mdname_t)) *
+			    dp->parts.parts_len);
+
+			/* build args */
+			v1_args.sp = sp;
+			v1_args.cl_sk = cl_get_setkey(sp->setno,
+			    sp->setname);
+
+			/*
+			 * Convert v2 arguments to v1 arguments
+			 * before sending over the wire.
+			 */
+			meta_conv_drvname_new2old(v1_args.drivenamep,
+			    v21_args->drivenamep);
+
+			rval = mdrpc_devinfo_1(&v1_args, &v1_res, clntp);
+
+			free(v1_args.drivenamep->parts.parts_val);
+			free(v1_args.drivenamep);
+
+			if (rval != RPC_SUCCESS)
+				(void) mdrpcerror(ep, clntp, hostname,
+				    dgettext(TEXT_DOMAIN, "metad device info"));
+			else
+				(void) mdstealerror(ep, &v1_res.status);
+		} else {			/* version 2 */
+			rval = mdrpc_devinfo_2(&v2_args, &v2_res, clntp);
+			if (rval != RPC_SUCCESS)
+				(void) mdrpcerror(ep, clntp, hostname,
+				    dgettext(TEXT_DOMAIN, "metad device info"));
+			else
+				(void) mdstealerror(ep, &v2_res.status);
+		}
+
+		metarpcclose(clntp);
+	}
+
+	if (mdisok(ep)) {
+		/* do something with the results */
+		rval = 0;
+
+		if (ret_dev != NULL) {
+			if (version == METAD_VERSION)
+				*ret_dev = meta_expldev(v1_res.dev);
+			else
+				*ret_dev = v2_res.dev;
+		}
+
+		if (ret_timestamp != NULL) {
+			if (version == METAD_VERSION)
+				*ret_timestamp = v1_res.vtime;
+			else
+				*ret_timestamp = v2_res.vtime;
+		}
+	}
+
+	if (version == METAD_VERSION)
+		xdr_free(xdr_mdrpc_devinfo_res, (char *)&v1_res);
+	else
+		xdr_free(xdr_mdrpc_devinfo_2_res, (char *)&v2_res);
+
+	return (rval);
+}
+
+/*
+ * return remote device info
+ */
+int
+clnt_devid(
+	char			*hostname,
+	mdsetname_t		*sp,
+	mddrivename_t		*dp,
+	char			**ret_encdevid,
+	md_error_t		*ep
+)
+{
+	CLIENT			*clntp;
+	mdrpc_devid_args	*args;
+	mdrpc_devid_2_args	v2_args;
+	mdrpc_devid_res		res;
+	int			rval;
+	int			version;
+
+	/* initialize */
+	mdclrerror(ep);
+	(void) memset(&v2_args, 0, sizeof (v2_args));
+	(void) memset(&res, 0, sizeof (res));
+
+	/* build args */
+	v2_args.rev = MD_METAD_ARGS_REV_1;
+	args = &v2_args.mdrpc_devid_2_args_u.rev1;
+	args->sp = sp;
+	args->cl_sk = cl_get_setkey(sp->setno, sp->setname);
+	args->drivenamep = dp;
+
+	/* do it */
+	if (md_in_daemon && strcmp(mynode(), hostname) == 0) {
+		int	bool;
+
+		/*
+		 * If the server is local, we call the v2 procedure.
+		 */
+		bool = mdrpc_devid_2_svc(&v2_args, &res, NULL);
+		assert(bool == TRUE);
+		(void) mdstealerror(ep, &res.status);
+	} else {
+		if ((clntp = metarpcopen(hostname, CL_LONG_TMO, ep)) == NULL)
+			return (-1);
+
+		/*
+		 * Check the client handle for the version
+		 */
+		CLNT_CONTROL(clntp, CLGET_VERS, (char *)&version);
+
+		/*
+		 * If the client is version 1, return error
+		 * otherwise, make the remote procedure call.
+		 */
+		if (version == METAD_VERSION) {	/* version 1 */
+			(void) mddserror(ep, MDE_DS_DRIVENOTONHOST, sp->setno,
+			    hostname, dp->cname, sp->setname);
+		} else {			/* version 2 */
+			rval = mdrpc_devid_2(&v2_args, &res, clntp);
+
+			if (rval != RPC_SUCCESS)
+			    (void) mdrpcerror(ep, clntp, hostname,
+			    dgettext(TEXT_DOMAIN, "metad devid info"));
+			else
+			    (void) mdstealerror(ep, &res.status);
+		}
+
+		metarpcclose(clntp);
+	}
+
+	if (mdisok(ep)) {
+		/* do something with the results */
+		rval = 0;
+
+		if (ret_encdevid != NULL)
+			*ret_encdevid = strdup(res.enc_devid);
+
+	}
+
+	xdr_free(xdr_mdrpc_devid_res, (char *)&res);
+
+	return (rval);
+}
+
+/*
+ * Get the device information of a disk on a remote host. The information
+ * retrieved is the device's name, the associated driver and the dev_t.
+ * The lookup is performed by using the devid of the disk as this is
+ * unique to the disk.  The device name on the originating node is passed
+ * in.  If that devname is found when doing the devid to namelist translation
+ * then that value is used to make the device names as consistent as possible
+ * across the nodes.
+ *
+ * An attempt is made to retrieve this information by calling
+ * mdrpc_devinfo_by_devid_name_2_svc.  Locally this call should always
+ * succeed.  In the case where a call is made through a CLIENT handle,
+ * it is possible that the function hasn't been implemented on the called
+ * node.  If this is the case fall back to mdrpc_devinfo_by_devidstr_2_svc.
+ *
+ * Returns:
+ * 	-1 	Error
+ * 	ENOTSUP Operation not supported i.e. procedure not supported on
+ * 		the remote node
+ * 	0	Success
+ */
+int
+clnt_devinfo_by_devid(
+	char		*hostname,
+	mdsetname_t	*sp,
+	char		*devidstr,
+	md_dev64_t	*ret_dev,
+	char		*orig_devname,
+	char		**ret_devname,
+	char		**ret_driver,
+	md_error_t	*ep
+)
+{
+	CLIENT			*clntp;
+	mdrpc_devidstr_args	devid_args;
+	mdrpc_devid_name_args	*args;
+	mdrpc_devid_name_2_args	v2_args;
+	mdrpc_devinfo_2_res	res;
+	int			rval;
+	int			version;
+
+	/* initialize */
+	mdclrerror(ep);
+	(void) memset(&v2_args, 0, sizeof (v2_args));
+	(void) memset(&res, 0, sizeof (res));
+
+	/* build args */
+	v2_args.rev = MD_METAD_ARGS_REV_1;
+	args = &v2_args.mdrpc_devid_name_2_args_u.rev1;
+	args->enc_devid = devidstr;
+	args->orig_devname = orig_devname;
+	args->sp = sp;
+
+	if (md_in_daemon && strcmp(mynode(), hostname) == 0) {
+		int	bool;
+
+		/*
+		 * We are calling this locally so call the function
+		 * directly.
+		 */
+		bool = mdrpc_devinfo_by_devid_name_2_svc(&v2_args, &res, NULL);
+		assert(bool == TRUE);
+		(void) mdstealerror(ep, &res.status);
+	} else {
+
+		/* open connection */
+		if ((clntp = metarpcopen(hostname, CL_LONG_TMO, ep)) == NULL) {
+			return (-1);
+		}
+
+		CLNT_CONTROL(clntp, CLGET_VERS, (char *)&version);
+
+		if (version == METAD_VERSION) { /* Version 1 */
+			metarpcclose(clntp);
+			return (ENOTSUP);
+		}
+
+		rval = mdrpc_devinfo_by_devid_name_2(&v2_args, &res, clntp);
+
+		if (rval != RPC_SUCCESS) {
+			/* try falling back to devidstr_2_svc */
+			(void) memset(&devid_args, 0, sizeof (devid_args));
+			(void) memset(&res, 0, sizeof (res));
+
+			devid_args.enc_devid = devidstr;
+			devid_args.sp = sp;
+
+			rval = mdrpc_devinfo_by_devid_2(
+					&devid_args, &res, clntp);
+
+			if (rval != RPC_SUCCESS) {
+				(void) mdrpcerror(ep, clntp, hostname,
+				    dgettext(TEXT_DOMAIN,
+				    "metad devinfo by devid"));
+			} else {
+				(void) mdstealerror(ep, &res.status);
+			}
+		} else {
+			(void) mdstealerror(ep, &res.status);
+		}
+		metarpcclose(clntp);
+	}
+
+	if (mdisok(ep)) {
+		rval = 0;
+		if (ret_dev != NULL)
+			*ret_dev = res.dev;
+
+		if (ret_devname != NULL && res.devname != NULL)
+			*ret_devname = Strdup(res.devname);
+
+		if (ret_driver != NULL && res.drivername != NULL)
+			*ret_driver = Strdup(res.drivername);
+	}
+
+	xdr_free(xdr_mdrpc_devinfo_2_res, (char *)&res);
+
+	if (! mdisok(ep))
+		return (-1);
+
+	return (0);
+
+}
+
+
+/*
+ * return status of whether driver is used, mount
+ */
+int
+clnt_drvused(
+	char			*hostname,
+	mdsetname_t		*sp,
+	mddrivename_t		*dp,
+	md_error_t		*ep
+)
+{
+	CLIENT			*clntp;
+	mdrpc_drvused_args	v1_args;
+	mdrpc_drvused_2_args	v2_args;
+	mdrpc_drvused_2_args_r1	*v21_args;
+	mdrpc_generic_res	res;
+	int			rval;
+	int			version;
+
+	/* initialize */
+	mdclrerror(ep);
+	(void) memset(&v1_args, 0, sizeof (v1_args));
+	(void) memset(&v2_args, 0, sizeof (v2_args));
+	(void) memset(&res, 0, sizeof (res));
+
+	/* build args */
+	v2_args.rev = MD_METAD_ARGS_REV_1;
+	v21_args = &v2_args.mdrpc_drvused_2_args_u.rev1;
+	v21_args->sp = sp;
+	v21_args->cl_sk = cl_get_setkey(sp->setno, sp->setname);
+	v21_args->drivenamep = dp;
+
+	/* do it */
+	if (md_in_daemon && strcmp(mynode(), hostname) == 0) {
+		int	bool;
+
+		/*
+		 * If the server is local, we call the v2 procedure
+		 */
+		bool = mdrpc_drvused_2_svc(&v2_args, &res, NULL);
+		assert(bool == TRUE);
+		(void) mdstealerror(ep, &res.status);
+	} else {
+		/* open connection */
+		if ((clntp = metarpcopen(hostname, CL_LONG_TMO, ep)) == NULL)
+			return (-1);
+
+		/*
+		 * Check the client handle for the version
+		 * and invoke the appropriate version of the
+		 * remote procedure
+		 */
+		CLNT_CONTROL(clntp, CLGET_VERS, (char *)&version);
+
+		if (version == METAD_VERSION) {	/* version 1 */
+			v1_args.drivenamep =
+			    Zalloc(sizeof (o_mddrivename_t));
+			v1_args.drivenamep->parts.parts_val =
+			    Zalloc((sizeof (o_mdname_t)) *
+			    dp->parts.parts_len);
+
+			/* build args */
+			v1_args.sp = sp;
+			v1_args.cl_sk = cl_get_setkey(sp->setno, sp->setname);
+
+			/* Convert v2 args to v1 args */
+			meta_conv_drvname_new2old(v1_args.drivenamep,
+			    v21_args->drivenamep);
+
+			rval = mdrpc_drvused_1(&v1_args, &res, clntp);
+
+			free(v1_args.drivenamep->parts.parts_val);
+			free(v1_args.drivenamep);
+
+			if (rval != RPC_SUCCESS)
+				(void) mdrpcerror(ep, clntp, hostname,
+				    dgettext(TEXT_DOMAIN, "metad drive used"));
+			else
+				(void) mdstealerror(ep, &res.status);
+		} else {			/* version 2 */
+			rval = mdrpc_drvused_2(&v2_args, &res, clntp);
+			if (rval != RPC_SUCCESS)
+				(void) mdrpcerror(ep, clntp, hostname,
+				    dgettext(TEXT_DOMAIN, "metad drive used"));
+			else
+				(void) mdstealerror(ep, &res.status);
+		}
+
+		metarpcclose(clntp);
+	}
+
+	xdr_free(xdr_mdrpc_generic_res, (char *)&res);
+
+	if (! mdisok(ep))
+		return (-1);
+
+	return (0);
+}
+
+void
+free_sr(md_set_record *sr)
+{
+	mdrpc_getset_res	res;
+	mdrpc_mngetset_res	mnres;
+
+	if (md_in_daemon)
+		return;
+
+	/*
+	 * dummy up a result struct, to do a deep free of the (mn)sr.
+	 * (A deep free means that the xdr_free code will free the
+	 * linked list of drive records for the sr and will also free
+	 * the linked list of node records for the mnsr.)
+	 */
+	if (MD_MNSET_REC(sr)) {
+		(void) memset(&mnres, 0, sizeof (mnres));
+		mnres.mnsr = (struct md_mnset_record *)sr;
+		xdr_free(xdr_mdrpc_mngetset_res, (char *)&mnres);
+	} else {
+		(void) memset(&res, 0, sizeof (res));
+		res.sr = sr;
+		xdr_free(xdr_mdrpc_getset_res, (char *)&res);
+	}
+}
+
+void
+short_circuit_getset(
+	mdrpc_getset_args	*args,
+	mdrpc_getset_res	*res
+)
+{
+	if (args->setname != NULL)
+		res->sr = metad_getsetbyname(args->setname, &res->status);
+	else
+		res->sr = metad_getsetbynum(args->setno, &res->status);
+}
+
+void
+short_circuit_mngetset(
+	mdrpc_getset_args	*args,
+	mdrpc_mngetset_res	*res
+)
+{
+	md_set_record		*sr;
+	if (args->setname != NULL)
+		sr = metad_getsetbyname(args->setname, &res->status);
+	else
+		sr = metad_getsetbynum(args->setno, &res->status);
+
+	if (MD_MNSET_REC(sr)) {
+		res->mnsr = (struct md_mnset_record *)sr;
+	} else {
+		res->mnsr = NULL;
+	}
+}
+
+static int
+is_auto_take_set(char *setname, set_t setno)
+{
+	if (setname != NULL)
+	    return (metad_isautotakebyname(setname));
+	else
+	    return (metad_isautotakebynum(setno));
+}
+
+/*
+ * return the diskset record, and drive records.
+ * If record is a MNdiskset record, then only the first md_set_record
+ * bytes were copied from the daemon.
+ */
+int
+clnt_getset(
+	char			*hostname,
+	char			*setname,
+	set_t			setno,
+	md_set_record		**ret_sr,
+	md_error_t		*ep
+)
+{
+	CLIENT			*clntp;
+	mdrpc_getset_args	*args;
+	mdrpc_getset_2_args	v2_args;
+	mdrpc_getset_res	res;
+	int			rval = -1;
+	int			version;
+
+	/* initialize */
+	mdclrerror(ep);
+	(void) memset(&v2_args, 0, sizeof (v2_args));
+	(void) memset(&res, 0, sizeof (res));
+
+	/* build args */
+	v2_args.rev = MD_METAD_ARGS_REV_1;
+	args = &v2_args.mdrpc_getset_2_args_u.rev1;
+	args->setname = setname;
+	args->setno   = setno;
+
+	/* do it */
+	if (md_in_daemon && strcmp(mynode(), hostname) == 0) {
+		short_circuit_getset(args, &res);
+		(void) mdstealerror(ep, &res.status);
+	} else {
+	    if ((clntp = metarpcopen(hostname, CL_LONG_TMO, ep)) == NULL) {
+		/*
+		 * This has to work during the boot up before the rpc.metad can
+		 * run.  Check to see if we can handle this as a strictly local
+		 * diskset.
+		 */
+		if (is_auto_take_set(setname, setno)) {
+		    mdclrerror(ep);
+		    short_circuit_getset(args, &res);
+		    res.sr = setdup(res.sr);
+		    (void) mdstealerror(ep, &res.status);
+		} else {
+		    return (-1);
+		}
+	    } else {
+
+		/*
+		 * Check the client handle for the version
+		 * and invoke the appropriate version of the
+		 * remote procedure
+		 */
+		CLNT_CONTROL(clntp, CLGET_VERS, (char *)&version);
+
+		if (version == METAD_VERSION) {	/* version 1 */
+			if (mdrpc_getset_1(args, &res, clntp) != RPC_SUCCESS)
+				(void) mdrpcerror(ep, clntp, hostname,
+				dgettext(TEXT_DOMAIN, "metad get set"));
+			else
+				(void) mdstealerror(ep, &res.status);
+		} else {
+			if (mdrpc_getset_2(&v2_args, &res, clntp) !=
+			    RPC_SUCCESS)
+				(void) mdrpcerror(ep, clntp, hostname,
+				dgettext(TEXT_DOMAIN, "metad get set"));
+			else
+				(void) mdstealerror(ep, &res.status);
+		}
+
+		metarpcclose(clntp);
+	    }
+	}
+
+	if (mdisok(ep)) {
+		rval = 0;
+		if (ret_sr != NULL)
+			*ret_sr = res.sr;
+		else
+			if (! md_in_daemon)
+				xdr_free(xdr_mdrpc_getset_res, (char *)&res);
+	}
+
+	return (rval);
+}
+
+/*
+ * return the multi-node diskset record, drive records and node records.
+ */
+clnt_mngetset(
+	char			*hostname,
+	char			*setname,
+	set_t			setno,
+	md_mnset_record		**ret_mnsr,
+	md_error_t		*ep
+)
+{
+	CLIENT			*clntp;
+	mdrpc_getset_args	*args;
+	mdrpc_getset_2_args	v2_args;
+	mdrpc_mngetset_res	res;
+	int			rval = -1;
+	int			version;
+
+	/* initialize */
+	mdclrerror(ep);
+	(void) memset(&v2_args, 0, sizeof (v2_args));
+	(void) memset(&res, 0, sizeof (res));
+
+	/* build args */
+	v2_args.rev = MD_METAD_ARGS_REV_1;
+	args = &v2_args.mdrpc_getset_2_args_u.rev1;
+	args->setname = setname;
+	args->setno   = setno;
+
+	/* do it */
+	if (md_in_daemon && strcmp(mynode(), hostname) == 0) {
+		short_circuit_mngetset(args, &res);
+		(void) mdstealerror(ep, &res.status);
+	} else {
+		if ((clntp = metarpcopen(hostname, CL_LONG_TMO, ep)) == NULL)
+			return (-1);
+
+		/*
+		 * Check the client handle for the version
+		 */
+		CLNT_CONTROL(clntp, CLGET_VERS, (char *)&version);
+
+		/*
+		 * If the client is version 1, return error
+		 * otherwise, make the remote procedure call.
+		 */
+		if (version == METAD_VERSION) { /* version 1 */
+			(void) mddserror(ep, MDE_DS_RPCVERSMISMATCH,
+				setno, hostname, NULL, setname);
+			metarpcclose(clntp);
+			return (-1);
+		} else {
+			if (mdrpc_mngetset_2(&v2_args, &res, clntp)
+							!= RPC_SUCCESS)
+				(void) mdrpcerror(ep, clntp, hostname,
+				    dgettext(TEXT_DOMAIN, "metad mn get set"));
+			else
+				(void) mdstealerror(ep, &res.status);
+		}
+
+		metarpcclose(clntp);
+	}
+
+	/* If no ep error and no version mismatch - rpc call worked ok */
+	if (mdisok(ep)) {
+		rval = 0;
+		if (ret_mnsr != NULL)
+			*ret_mnsr = res.mnsr;
+		else
+			if (! md_in_daemon)
+				xdr_free(xdr_mdrpc_mngetset_res, (char *)&res);
+	}
+
+	return (rval);
+}
+
+/*
+ * Set master nodeid and nodename in multi-node set record.
+ */
+clnt_mnsetmaster(
+	char			*hostname,
+	mdsetname_t		*sp,
+	md_node_nm_t		master_nodenm,
+	int			master_nodeid,
+	md_error_t		*ep
+)
+{
+	CLIENT			*clntp;
+	mdrpc_mnsetmaster_args	*args;
+	mdrpc_mnsetmaster_2_args	v2_args;
+	mdrpc_generic_res	res;
+	int			version;
+
+	/* initialize */
+	mdclrerror(ep);
+	(void) memset(&v2_args, 0, sizeof (v2_args));
+	(void) memset(&res, 0, sizeof (res));
+
+	/* build args */
+	v2_args.rev = MD_METAD_ARGS_REV_1;
+	args = &v2_args.mdrpc_mnsetmaster_2_args_u.rev1;
+	args->sp = sp;
+	args->cl_sk = cl_get_setkey(sp->setno, sp->setname);
+	(void) strlcpy(args->master_nodenm, master_nodenm, MD_MAX_NODENAME);
+	args->master_nodeid = master_nodeid;
+
+	/* do it */
+	if ((clntp = metarpcopen(hostname, CL_LONG_TMO, ep)) == NULL)
+		return (-1);
+
+	/*
+	 * Check the client handle for the version
+	 */
+	CLNT_CONTROL(clntp, CLGET_VERS, (char *)&version);
+
+	/*
+	 * If the client is version 1, return error
+	 * otherwise, make the remote procedure call.
+	 */
+	if (version == METAD_VERSION) { /* version 1 */
+		(void) mddserror(ep, MDE_DS_RPCVERSMISMATCH,
+			sp->setno, hostname, NULL, sp->setname);
+		metarpcclose(clntp);
+		return (-1);
+	} else {
+		if (mdrpc_mnsetmaster_2(&v2_args, &res, clntp) != RPC_SUCCESS)
+			(void) mdrpcerror(ep, clntp, hostname,
+			dgettext(TEXT_DOMAIN, "metad multi-owner set master"));
+		else
+			(void) mdstealerror(ep, &res.status);
+	}
+
+	metarpcclose(clntp);
+
+	xdr_free(xdr_mdrpc_generic_res, (char *)&res);
+
+	if (! mdisok(ep))
+		return (-1);
+
+	return (0);
+}
+
+/*
+ * Get the MH timeout values.
+ */
+int
+clnt_gtimeout(
+	char			*hostname,
+	mdsetname_t		*sp,
+	mhd_mhiargs_t		*ret_mhiargs,
+	md_error_t		*ep
+)
+{
+	CLIENT			*clntp;
+	mdrpc_sp_args		*args;
+	mdrpc_sp_2_args		v2_args;
+	mdrpc_gtimeout_res	res;
+	int			rval = -1;
+	int			version;
+
+	/* initialize */
+	mdclrerror(ep);
+	(void) memset(&v2_args, 0, sizeof (v2_args));
+	(void) memset(&res, 0, sizeof (res));
+
+	/* build args */
+	v2_args.rev = MD_METAD_ARGS_REV_1;
+	args = &v2_args.mdrpc_sp_2_args_u.rev1;
+	args->sp = sp;
+	args->cl_sk = cl_get_setkey(sp->setno, sp->setname);
+
+	/* do it */
+	if (md_in_daemon && strcmp(mynode(), hostname) == 0) {
+		int	bool;
+		bool = mdrpc_gtimeout_2_svc(&v2_args, &res, NULL);
+		assert(bool == TRUE);
+		(void) mdstealerror(ep, &res.status);
+	} else {
+		if ((clntp = metarpcopen(hostname, CL_LONG_TMO, ep)) == NULL)
+			return (-1);
+
+		/*
+		 * Check the client handle for the version
+		 * and invoke the appropriate version of the
+		 * remote procedure
+		 */
+		CLNT_CONTROL(clntp, CLGET_VERS, (char *)&version);
+
+		if (version == METAD_VERSION) {	/* version 1 */
+			if (mdrpc_gtimeout_1(args, &res, clntp) != RPC_SUCCESS)
+				(void) mdrpcerror(ep, clntp, hostname,
+				dgettext(TEXT_DOMAIN, "metad get timeout"));
+			else
+				(void) mdstealerror(ep, &res.status);
+		} else {
+			if (mdrpc_gtimeout_2(&v2_args, &res, clntp) !=
+			    RPC_SUCCESS)
+				(void) mdrpcerror(ep, clntp, hostname,
+				dgettext(TEXT_DOMAIN, "metad get timeout"));
+			else
+				(void) mdstealerror(ep, &res.status);
+		}
+
+		metarpcclose(clntp);
+	}
+
+	if (mdisok(ep)) {
+
+		/* do something with the results */
+		rval = 0;
+
+		/* copy md_mhiargs_t */
+		if (ret_mhiargs != NULL)
+			*ret_mhiargs = *res.mhiargsp;
+	}
+
+	xdr_free(xdr_mdrpc_gtimeout_res, (char *)&res);
+
+	return (rval);
+}
+
+/*
+ * get real hostname from remote host
+ */
+int
+clnt_hostname(
+	char			*hostname,
+	char			**ret_hostname,
+	md_error_t		*ep
+)
+{
+	CLIENT			*clntp;
+	mdrpc_null_args		args;
+	mdrpc_hostname_res	res;
+	int			rval = -1;
+
+	/* initialize */
+	mdclrerror(ep);
+	(void) memset(&args, 0, sizeof (args));
+	(void) memset(&res, 0, sizeof (res));
+
+	/* build args */
+	args.cl_sk = NULL;
+
+	/* do it */
+	if (md_in_daemon && strcmp(mynode(), hostname) == 0) {
+		int	bool;
+		bool = mdrpc_hostname_1_svc(&args, &res, NULL);
+		assert(bool == TRUE);
+		(void) mdstealerror(ep, &res.status);
+	} else {
+		if ((clntp = metarpcopen(hostname, CL_LONG_TMO, ep)) == NULL)
+			return (-1);
+
+		if (mdrpc_hostname_1(&args, &res, clntp) != RPC_SUCCESS)
+			(void) mdrpcerror(ep, clntp, hostname,
+			    dgettext(TEXT_DOMAIN, "metad hostname"));
+		else
+			(void) mdstealerror(ep, &res.status);
+
+		metarpcclose(clntp);
+	}
+
+	if (mdisok(ep)) {
+		/* do something with the results */
+		rval = 0;
+
+		if (ret_hostname != NULL)
+			*ret_hostname = Strdup(res.hostname);
+	}
+
+	xdr_free(xdr_mdrpc_hostname_res, (char *)&res);
+
+	return (rval);
+}
+
+/*
+ * NULLPROC - just returns a response
+ */
+int
+clnt_nullproc(
+	char			*hostname,
+	md_error_t		*ep
+)
+{
+	CLIENT			*clntp;
+
+	/* initialize */
+	mdclrerror(ep);
+
+	/* do it */
+	if (md_in_daemon && strcmp(mynode(), hostname) == 0) {
+		int	bool;
+		bool = mdrpc_nullproc_1_svc(NULL, ep, NULL);
+		assert(bool == TRUE);
+	} else {
+		if ((clntp = metarpcopen(hostname, CL_DEF_TMO, ep)) == NULL)
+			return (-1);
+
+		if (mdrpc_nullproc_1(NULL, ep, clntp) != RPC_SUCCESS)
+			(void) mdrpcerror(ep, clntp, hostname,
+			    dgettext(TEXT_DOMAIN, "metad nullproc"));
+
+		metarpcclose(clntp);
+	}
+
+	if (! mdisok(ep))
+		return (-1);
+
+	return (0);
+}
+
+/*
+ * does host own the set?
+ */
+int
+clnt_ownset(
+	char			*hostname,
+	mdsetname_t		*sp,
+	int			*ret_bool,
+	md_error_t		*ep
+)
+{
+	CLIENT			*clntp;
+	mdrpc_sp_args		*args;
+	mdrpc_sp_2_args		v2_args;
+	mdrpc_bool_res		res;
+	int			rval = -1;
+	int			version;
+
+	/* initialize */
+	mdclrerror(ep);
+	(void) memset(&v2_args, 0, sizeof (v2_args));
+	(void) memset(&res, 0, sizeof (res));
+
+	/* build args */
+	v2_args.rev = MD_METAD_ARGS_REV_1;
+	args = &v2_args.mdrpc_sp_2_args_u.rev1;
+	args->sp = sp;
+	args->cl_sk = cl_get_setkey(sp->setno, sp->setname);
+
+	/* do it */
+	if (md_in_daemon && strcmp(mynode(), hostname) == 0) {
+		int	bool;
+		bool = mdrpc_ownset_2_svc(&v2_args, &res, NULL);
+		assert(bool == TRUE);
+		(void) mdstealerror(ep, &res.status);
+	} else {
+	    if ((clntp = metarpcopen(hostname, CL_LONG_TMO, ep)) == NULL) {
+		/*
+		 * This has to work in the code path from libpreen which is
+		 * running within fsck before the rpc.metad can run.  Check
+		 * to see if we should handle this as an auto-take diskset.
+		 */
+		if (is_auto_take_set(sp->setname, sp->setno)) {
+		    /* Can't call mdrpc_ownset_2_svc since not in daemon */
+		    mdclrerror(ep);
+		    if (s_ownset(sp->setno, ep))
+			res.value = TRUE;
+		    else
+			res.value = FALSE;
+		} else {
+		    return (-1);
+		}
+
+	    } else {
+
+		/*
+		 * Check the client handle for the version
+		 * and invoke the appropriate version of the
+		 * remote procedure
+		 */
+		CLNT_CONTROL(clntp, CLGET_VERS, (char *)&version);
+
+		if (version == METAD_VERSION) {	/* version 1 */
+			if (mdrpc_ownset_1(args, &res, clntp) != RPC_SUCCESS)
+				(void) mdrpcerror(ep, clntp, hostname,
+				dgettext(TEXT_DOMAIN, "metad own set"));
+			else
+				(void) mdstealerror(ep, &res.status);
+		} else {
+			if (mdrpc_ownset_2(&v2_args, &res, clntp) !=
+			    RPC_SUCCESS)
+				(void) mdrpcerror(ep, clntp, hostname,
+				dgettext(TEXT_DOMAIN, "metad own set"));
+			else
+				(void) mdstealerror(ep, &res.status);
+		}
+
+		metarpcclose(clntp);
+	    }
+	}
+
+	if (mdisok(ep)) {
+		/* do something with the results */
+		rval = 0;
+
+		if (ret_bool != NULL)
+			*ret_bool = res.value;
+	}
+
+	xdr_free(xdr_mdrpc_bool_res, (char *)&res);
+
+	return (rval);
+}
+
+/*
+ * Valid set name.
+ */
+int
+clnt_setnameok(
+	char			*hostname,
+	mdsetname_t		*sp,
+	int			*ret_bool,
+	md_error_t		*ep
+)
+{
+	CLIENT			*clntp;
+	mdrpc_sp_args		*args;
+	mdrpc_sp_2_args		v2_args;
+	mdrpc_bool_res		res;
+	int			rval = -1;
+	int			version;
+
+	/* initialize */
+	mdclrerror(ep);
+	(void) memset(&v2_args, 0, sizeof (v2_args));
+	(void) memset(&res, 0, sizeof (res));
+
+	/* build args */
+	v2_args.rev = MD_METAD_ARGS_REV_1;
+	args = &v2_args.mdrpc_sp_2_args_u.rev1;
+	args->sp = sp;
+	args->cl_sk = cl_get_setkey(sp->setno, sp->setname);
+
+	/* do it */
+	if (md_in_daemon && strcmp(mynode(), hostname) == 0) {
+		int	bool;
+		bool = mdrpc_setnameok_2_svc(&v2_args, &res, NULL);
+		assert(bool == TRUE);
+		(void) mdstealerror(ep, &res.status);
+	} else {
+		if ((clntp = metarpcopen(hostname, CL_LONG_TMO, ep)) == NULL)
+			return (-1);
+
+		/*
+		 * Check the client handle for the version
+		 * and invoke the appropriate version of the
+		 * remote procedure
+		 */
+		CLNT_CONTROL(clntp, CLGET_VERS, (char *)&version);
+
+		if (version == METAD_VERSION) {	/* version 1 */
+			if (mdrpc_setnameok_1(args, &res, clntp) != RPC_SUCCESS)
+				(void) mdrpcerror(ep, clntp, hostname,
+				dgettext(TEXT_DOMAIN, "metad setname ok"));
+			else
+				(void) mdstealerror(ep, &res.status);
+		} else {
+			if (mdrpc_setnameok_2(&v2_args, &res, clntp) !=
+			    RPC_SUCCESS)
+				(void) mdrpcerror(ep, clntp, hostname,
+				dgettext(TEXT_DOMAIN, "metad setname ok"));
+			else
+				(void) mdstealerror(ep, &res.status);
+		}
+
+		metarpcclose(clntp);
+	}
+
+	if (mdisok(ep)) {
+		/* do something with the results */
+		rval = 0;
+
+		if (ret_bool != NULL)
+			*ret_bool = res.value;
+	}
+
+	xdr_free(xdr_mdrpc_bool_res, (char *)&res);
+
+	return (rval);
+}
+
+/*
+ * Is set number in-use?
+ */
+int
+clnt_setnumbusy(
+	char			*hostname,
+	set_t			setno,
+	int			*ret_bool,
+	md_error_t		*ep
+)
+{
+	CLIENT			*clntp;
+	mdrpc_setno_args	*args;
+	mdrpc_setno_2_args	v2_args;
+	mdrpc_bool_res		res;
+	int			rval = -1;
+	int			version;
+
+	/* initialize */
+	mdclrerror(ep);
+	(void) memset(&v2_args, 0, sizeof (v2_args));
+	(void) memset(&res, 0, sizeof (res));
+
+	/* build args */
+	v2_args.rev = MD_METAD_ARGS_REV_1;
+	args = &v2_args.mdrpc_setno_2_args_u.rev1;
+	args->setno = setno;
+	args->cl_sk = NULL;
+
+	/* do it */
+	if (md_in_daemon && strcmp(mynode(), hostname) == 0) {
+		int	bool;
+		bool = mdrpc_setnumbusy_2_svc(&v2_args, &res, NULL);
+		assert(bool == TRUE);
+		(void) mdstealerror(ep, &res.status);
+	} else {
+		if ((clntp = metarpcopen(hostname, CL_LONG_TMO, ep)) == NULL)
+			return (-1);
+
+		/*
+		 * Check the client handle for the version
+		 * and invoke the appropriate version of the
+		 * remote procedure
+		 */
+		CLNT_CONTROL(clntp, CLGET_VERS, (char *)&version);
+
+		if (version == METAD_VERSION) {	/* version 1 */
+			if (mdrpc_setnumbusy_1(args, &res, clntp) !=
+			    RPC_SUCCESS)
+				(void) mdrpcerror(ep, clntp, hostname,
+				dgettext(TEXT_DOMAIN, "metad setnumber busy"));
+			else
+				(void) mdstealerror(ep, &res.status);
+		} else {
+			if (mdrpc_setnumbusy_2(&v2_args, &res, clntp) !=
+			    RPC_SUCCESS)
+				(void) mdrpcerror(ep, clntp, hostname,
+				dgettext(TEXT_DOMAIN, "metad setnumber busy"));
+			else
+				(void) mdstealerror(ep, &res.status);
+		}
+
+		metarpcclose(clntp);
+	}
+
+	if (mdisok(ep)) {
+		/* do something with the results */
+		rval = 0;
+
+		if (ret_bool != NULL)
+			*ret_bool = res.value;
+	}
+
+	xdr_free(xdr_mdrpc_bool_res, (char *)&res);
+
+	return (rval);
+}
+
+/*
+ * Set the timeout values used into the drive records.
+ */
+int
+clnt_stimeout(
+	char			*hostname,
+	mdsetname_t		*sp,
+	mhd_mhiargs_t		*mhiargsp,
+	md_error_t		*ep
+)
+{
+	CLIENT			*clntp;
+	mdrpc_stimeout_args	*args;
+	mdrpc_stimeout_2_args	v2_args;
+	mdrpc_generic_res	res;
+	int			version;
+
+	/* initialize */
+	mdclrerror(ep);
+	(void) memset(&v2_args, 0, sizeof (v2_args));
+	(void) memset(&res, 0, sizeof (res));
+
+	/* build args */
+	v2_args.rev = MD_METAD_ARGS_REV_1;
+	args = &v2_args.mdrpc_stimeout_2_args_u.rev1;
+	args->sp = sp;
+	args->cl_sk = cl_get_setkey(sp->setno, sp->setname);
+	args->mhiargsp = mhiargsp;
+
+	/* do it */
+	if (md_in_daemon && strcmp(mynode(), hostname) == 0) {
+		int	bool;
+		bool = mdrpc_stimeout_2_svc(&v2_args, &res, NULL);
+		assert(bool == TRUE);
+		(void) mdstealerror(ep, &res.status);
+	} else {
+		if ((clntp = metarpcopen(hostname, CL_LONG_TMO, ep)) == NULL)
+			return (-1);
+
+		/*
+		 * Check the client handle for the version
+		 * and invoke the appropriate version of the
+		 * remote procedure
+		 */
+		CLNT_CONTROL(clntp, CLGET_VERS, (char *)&version);
+
+		if (version == METAD_VERSION) {	/* version 1 */
+			if (mdrpc_stimeout_1(args, &res, clntp) != RPC_SUCCESS)
+				(void) mdrpcerror(ep, clntp, hostname,
+				dgettext(TEXT_DOMAIN, "metad set timeout"));
+			else
+				(void) mdstealerror(ep, &res.status);
+		} else {
+			if (mdrpc_stimeout_2(&v2_args, &res, clntp) !=
+			    RPC_SUCCESS)
+				(void) mdrpcerror(ep, clntp, hostname,
+				dgettext(TEXT_DOMAIN, "metad set timeout"));
+			else
+				(void) mdstealerror(ep, &res.status);
+		}
+
+		metarpcclose(clntp);
+	}
+
+	xdr_free(xdr_mdrpc_generic_res, (char *)&res);
+
+	if (! mdisok(ep))
+		return (-1);
+
+	return (0);
+}
+
+/*
+ * update drive records
+ */
+int
+clnt_upd_dr_dbinfo(
+	char			*hostname,
+	mdsetname_t		*sp,
+	md_drive_desc		*dd,
+	md_error_t		*ep
+)
+{
+	CLIENT			*clntp;
+	mdrpc_drives_args	v1_args;
+	mdrpc_drives_2_args	v2_args;
+	mdrpc_drives_2_args_r1	*v21_args;
+	mdrpc_generic_res	res;
+	int			rval;
+	int			version;
+
+	/* initialize */
+	mdclrerror(ep);
+	(void) memset(&v1_args, 0, sizeof (v1_args));
+	(void) memset(&v2_args, 0, sizeof (v2_args));
+	(void) memset(&res, 0, sizeof (res));
+
+	/* build args */
+	v2_args.rev = MD_METAD_ARGS_REV_1;
+	v21_args = &v2_args.mdrpc_drives_2_args_u.rev1;
+	v21_args->sp = sp;
+	v21_args->cl_sk = cl_get_setkey(sp->setno, sp->setname);
+	v21_args->drivedescs = dd;
+
+	/* do it */
+	if (md_in_daemon && strcmp(mynode(), hostname) == 0) {
+		int	bool;
+
+		/*
+		 * If the server is local, we call the v2 procedure
+		 */
+		bool = mdrpc_upd_dr_dbinfo_2_svc(&v2_args, &res, NULL);
+		assert(bool == TRUE);
+		(void) mdstealerror(ep, &res.status);
+	} else {
+		if ((clntp = metarpcopen(hostname, CL_LONG_TMO, ep)) == NULL)
+			return (-1);
+
+		/*
+		 * Check the client handle for the version
+		 * and invoke the appropriate version of the
+		 * remote procedure
+		 */
+		CLNT_CONTROL(clntp, CLGET_VERS, (char *)&version);
+
+		if (version == METAD_VERSION) {	/* version 1 */
+
+			alloc_olddrvdesc(&v1_args.drivedescs, dd);
+
+			/* build args */
+			v1_args.sp = sp;
+			v1_args.cl_sk = cl_get_setkey(sp->setno, sp->setname);
+			meta_conv_drvdesc_new2old(v1_args.drivedescs, dd);
+
+			rval = mdrpc_upd_dr_dbinfo_1(&v1_args, &res, clntp);
+
+			free_olddrvdesc(v1_args.drivedescs);
+
+			if (rval != RPC_SUCCESS)
+				(void) mdrpcerror(ep, clntp, hostname,
+				    dgettext(TEXT_DOMAIN,
+				    "metad update drive dbinfo"));
+			else
+				(void) mdstealerror(ep, &res.status);
+		} else {			/* version 2 */
+			rval = mdrpc_upd_dr_dbinfo_2(&v2_args, &res, clntp);
+
+			if (rval != RPC_SUCCESS)
+				(void) mdrpcerror(ep, clntp, hostname,
+				    dgettext(TEXT_DOMAIN,
+				    "metad update drive dbinfo"));
+			else
+				(void) mdstealerror(ep, &res.status);
+		}
+
+		metarpcclose(clntp);
+	}
+
+	xdr_free(xdr_mdrpc_generic_res, (char *)&res);
+
+	if (! mdisok(ep))
+		return (-1);
+
+	return (0);
+}
+
+/*
+ * update dr_flags field of drive record.
+ */
+int
+clnt_upd_dr_flags(
+	char			*hostname,
+	mdsetname_t		*sp,
+	md_drive_desc		*dd,
+	uint_t			new_flags,
+	md_error_t		*ep
+)
+{
+	CLIENT				*clntp;
+	mdrpc_upd_dr_flags_args		v1_args;
+	mdrpc_upd_dr_flags_2_args	v2_args;
+	mdrpc_upd_dr_flags_2_args_r1	*v21_args;
+	mdrpc_generic_res		res;
+	int				rval;
+	int				version;
+
+	/* initialize */
+	mdclrerror(ep);
+	(void) memset(&v1_args, 0, sizeof (v1_args));
+	(void) memset(&v2_args, 0, sizeof (v2_args));
+	(void) memset(&res, 0, sizeof (res));
+
+	/* build args */
+	v2_args.rev = MD_METAD_ARGS_REV_1;
+	v21_args = &v2_args.mdrpc_upd_dr_flags_2_args_u.rev1;
+	v21_args->sp = sp;
+	v21_args->cl_sk = cl_get_setkey(sp->setno, sp->setname);
+	v21_args->drivedescs = dd;
+	v21_args->new_flags = new_flags;
+
+	/* do it */
+	if (md_in_daemon && strcmp(mynode(), hostname) == 0) {
+		int	bool;
+
+		/*
+		 * If the server is local, we call the v2 procedure
+		 */
+		bool = mdrpc_upd_dr_flags_2_svc(&v2_args, &res, NULL);
+		assert(bool == TRUE);
+		(void) mdstealerror(ep, &res.status);
+	} else {
+		if ((clntp = metarpcopen(hostname, CL_LONG_TMO, ep)) == NULL)
+			return (-1);
+
+		/*
+		 * Check the client handle for the version
+		 * and invoke the appropriate version of the
+		 * remote procedure
+		 */
+		CLNT_CONTROL(clntp, CLGET_VERS, (char *)&version);
+
+		if (version == METAD_VERSION) {	/* version 1 */
+
+			alloc_olddrvdesc(&v1_args.drivedescs, dd);
+
+			/* build args */
+			v1_args.sp = sp;
+			v1_args.cl_sk = cl_get_setkey(sp->setno, sp->setname);
+			meta_conv_drvdesc_new2old(v1_args.drivedescs, dd);
+			v1_args.new_flags = new_flags;
+
+			rval = mdrpc_upd_dr_flags_1(&v1_args, &res, clntp);
+
+			free_olddrvdesc(v1_args.drivedescs);
+
+			if (rval != RPC_SUCCESS)
+				(void) mdrpcerror(ep, clntp, hostname,
+				    dgettext(TEXT_DOMAIN,
+				    "metad update drive flags"));
+			else
+				(void) mdstealerror(ep, &res.status);
+		} else {			/* version 2 */
+			rval = mdrpc_upd_dr_flags_2(&v2_args, &res, clntp);
+
+			if (rval != RPC_SUCCESS)
+				(void) mdrpcerror(ep, clntp, hostname,
+				    dgettext(TEXT_DOMAIN,
+				    "metad update drive flags"));
+			else
+				(void) mdstealerror(ep, &res.status);
+		}
+
+		metarpcclose(clntp);
+	}
+
+	xdr_free(xdr_mdrpc_generic_res, (char *)&res);
+
+	if (! mdisok(ep)) {
+		if (! mdanyrpcerror(ep))
+			return (-1);
+		if (strcmp(mynode(), hostname) == 0)
+			return (-1);
+		mdclrerror(ep);
+	}
+
+	return (0);
+}
+
+/*
+ * update set record flags
+ * This replaces all of the sr_flags with the new_flags.  It relies on the
+ * caller to "do the right thing" to preserve the existing flags that should
+ * not be reset.
+ */
+static int
+upd_sr_flags_common(
+	char			*hostname,
+	mdsetname_t		*sp,
+	uint_t			new_flags,
+	md_error_t		*ep
+)
+{
+	CLIENT				*clntp;
+	mdrpc_upd_sr_flags_args		*args;
+	mdrpc_upd_sr_flags_2_args	v2_args;
+	mdrpc_generic_res		res;
+	int				version;
+
+	/* initialize */
+	mdclrerror(ep);
+	(void) memset(&v2_args, 0, sizeof (v2_args));
+	(void) memset(&res, 0, sizeof (res));
+
+	/* build args */
+	v2_args.rev = MD_METAD_ARGS_REV_1;
+	args = &v2_args.mdrpc_upd_sr_flags_2_args_u.rev1;
+	args->sp = sp;
+	args->cl_sk = cl_get_setkey(sp->setno, sp->setname);
+
+	args->new_flags = new_flags;
+
+	/* do it */
+	if (md_in_daemon && strcmp(mynode(), hostname) == 0) {
+		int	bool;
+		bool = mdrpc_upd_sr_flags_2_svc(&v2_args, &res, NULL);
+		assert(bool == TRUE);
+		(void) mdstealerror(ep, &res.status);
+	} else {
+		if ((clntp = metarpcopen(hostname, CL_LONG_TMO, ep)) == NULL)
+			return (-1);
+
+		/*
+		 * Check the client handle for the version
+		 * and invoke the appropriate version of the
+		 * remote procedure
+		 */
+		CLNT_CONTROL(clntp, CLGET_VERS, (char *)&version);
+
+		if (version == METAD_VERSION) {	/* version 1 */
+			if (mdrpc_upd_sr_flags_1(args, &res, clntp) !=
+			    RPC_SUCCESS)
+			    (void) mdrpcerror(ep, clntp, hostname,
+			    dgettext(TEXT_DOMAIN, "metad update set flags"));
+			else
+				(void) mdstealerror(ep, &res.status);
+		} else {
+			if (mdrpc_upd_sr_flags_2(&v2_args, &res, clntp) !=
+			    RPC_SUCCESS)
+			    (void) mdrpcerror(ep, clntp, hostname,
+			    dgettext(TEXT_DOMAIN, "metad update set flags"));
+			else
+				(void) mdstealerror(ep, &res.status);
+		}
+
+		metarpcclose(clntp);
+	}
+
+	xdr_free(xdr_mdrpc_generic_res, (char *)&res);
+
+	if (! mdisok(ep)) {
+		if (! mdanyrpcerror(ep))
+			return (-1);
+		if (strcmp(mynode(), hostname) == 0)
+			return (-1);
+		mdclrerror(ep);
+	}
+
+	return (0);
+}
+
+/*
+ * Enable bits in the set record flags field.  This just turns on the specified
+ * bits and leaves the other bits alone.
+ */
+int
+clnt_enable_sr_flags(
+	char			*hostname,
+	mdsetname_t		*sp,
+	uint_t			flags,
+	md_error_t		*ep
+)
+{
+	uint_t		new_flags;
+	md_set_desc	*sd;
+
+	mdclrerror(ep);
+
+	/* Get the flags from the current set */
+	if ((sd = metaget_setdesc(sp, ep)) == NULL)
+		return (-1);
+
+	/* Turn on the specified bits */
+	new_flags = (sd->sd_flags | flags);
+
+	/* do it */
+	return (upd_sr_flags_common(hostname, sp, new_flags, ep));
+}
+
+/*
+ * Disable bits in the set record flags field.  This just turns off the
+ * specified bits and leaves the other bits alone.
+ */
+int
+clnt_disable_sr_flags(
+	char			*hostname,
+	mdsetname_t		*sp,
+	uint_t			flags,
+	md_error_t		*ep
+)
+{
+	uint_t		new_flags;
+	md_set_desc	*sd;
+
+	mdclrerror(ep);
+
+	/* Get the flags from the current set */
+	if ((sd = metaget_setdesc(sp, ep)) == NULL)
+		return (-1);
+
+	/* Turn off the specified bits */
+	new_flags = (sd->sd_flags & ~flags);
+
+	/* do it */
+	return (upd_sr_flags_common(hostname, sp, new_flags, ep));
+}
+
+/*
+ * Assign the flags as the new value(s) for the MD_SR_STATE_FLAGS within the
+ * set record flags field.  This actually can set any bits but only clears
+ * the bits within the MD_SR_STATE_FLAGS subfield and leaves any other
+ * bits turned on.  It can be used to clear (state) and set bits all in one
+ * rpc call.
+ */
+int
+clnt_upd_sr_flags(
+	char			*hostname,
+	mdsetname_t		*sp,
+	uint_t			flags,
+	md_error_t		*ep
+)
+{
+	uint_t		new_flags;
+	md_set_desc	*sd;
+
+	mdclrerror(ep);
+
+	/* Get the flags from the current set */
+	if ((sd = metaget_setdesc(sp, ep)) == NULL)
+		return (-1);
+
+	/* clear the existing state flags */
+	sd->sd_flags &= ~MD_SR_STATE_FLAGS;
+
+	/* Or in the new value */
+	new_flags = (sd->sd_flags | flags);
+
+	/* do it */
+	return (upd_sr_flags_common(hostname, sp, new_flags, ep));
+}
+
+md_setkey_t *
+cl_get_setkey(set_t setno, char *setname)
+{
+
+	if (my_cl_sk == NULL) {
+		my_cl_sk = Zalloc(sizeof (md_setkey_t));
+		my_cl_sk->sk_setno = setno;
+		my_cl_sk->sk_setname = Strdup(setname);
+		my_cl_sk->sk_host = Strdup(mynode());
+	} else {
+		my_cl_sk->sk_setno = setno;
+		if (my_cl_sk->sk_setname != NULL)
+			Free(my_cl_sk->sk_setname);
+		my_cl_sk->sk_setname = Strdup(setname);
+	}
+
+	return (my_cl_sk);
+}
+
+void
+cl_set_setkey(md_setkey_t *cl_sk)
+{
+	if ((cl_sk != NULL) && (my_cl_sk != NULL)) {
+		assert(my_cl_sk->sk_setno == cl_sk->sk_setno);
+		assert(strcmp(my_cl_sk->sk_setname, cl_sk->sk_setname) == 0);
+		assert(strcmp(my_cl_sk->sk_host, cl_sk->sk_host) == 0);
+		my_cl_sk->sk_key = cl_sk->sk_key;
+		return;
+	}
+
+	if (my_cl_sk != NULL) {
+		if (my_cl_sk->sk_setname != NULL)
+			Free(my_cl_sk->sk_setname);
+		if (my_cl_sk->sk_host != NULL)
+			Free(my_cl_sk->sk_host);
+		Free(my_cl_sk);
+	}
+
+	my_cl_sk = NULL;
+
+	/* get here, if set called before get */
+	if (cl_sk != NULL) {
+		my_cl_sk = Zalloc(sizeof (md_setkey_t));
+		my_cl_sk->sk_host = Strdup(cl_sk->sk_host);
+		my_cl_sk->sk_setno = cl_sk->sk_setno;
+		my_cl_sk->sk_setname = Strdup(cl_sk->sk_setname);
+		my_cl_sk->sk_key = cl_sk->sk_key;
+	}
+}
+
+/*
+ * Unlock the set after operation is complete.
+ */
+int
+clnt_unlock_set(
+	char			*hostname,
+	md_setkey_t		*cl_sk,
+	md_error_t		*ep
+)
+{
+	CLIENT			*clntp;
+	mdrpc_null_args		args;
+	mdrpc_setlock_res	res;
+
+	/* initialize */
+	mdclrerror(ep);
+	(void) memset(&args, 0, sizeof (args));
+	(void) memset(&res, 0, sizeof (res));
+
+	/* build args */
+	args.cl_sk = cl_sk;
+
+	/* do it */
+	if (md_in_daemon && strcmp(mynode(), hostname) == 0) {
+		int	bool;
+		bool = mdrpc_unlock_set_1_svc(&args, &res, NULL);
+		assert(bool == TRUE);
+		(void) mdstealerror(ep, &res.status);
+	} else {
+		if ((clntp = metarpcopen(hostname, CL_LONG_TMO, ep)) == NULL)
+			return (-1);
+
+		if (mdrpc_unlock_set_1(&args, &res, clntp) != RPC_SUCCESS)
+			(void) mdrpcerror(ep, clntp, hostname,
+			    dgettext(TEXT_DOMAIN, "metad unlock set"));
+		else
+			(void) mdstealerror(ep, &res.status);
+
+		metarpcclose(clntp);
+	}
+
+	xdr_free(xdr_mdrpc_setlock_res, (char *)&res);
+
+	if (! mdisok(ep)) {
+		if (! mdanyrpcerror(ep))
+			return (-1);
+		if (strcmp(mynode(), hostname) == 0)
+			return (-1);
+		mdclrerror(ep);
+	}
+
+	return (0);
+}
+
+/*
+ * Lock set so that only operators with valid keys are allowed in the daemon.
+ */
+int
+clnt_lock_set(
+	char			*hostname,
+	mdsetname_t		*sp,
+	md_error_t		*ep
+)
+{
+	CLIENT			*clntp;
+	mdrpc_null_args		args;
+	mdrpc_setlock_res	res;
+
+	/* initialize */
+	mdclrerror(ep);
+	(void) memset(&args, 0, sizeof (args));
+	(void) memset(&res, 0, sizeof (res));
+
+	/* build args */
+	args.cl_sk = cl_get_setkey(sp->setno, sp->setname);
+
+	/* do it */
+	if (md_in_daemon && strcmp(mynode(), hostname) == 0) {
+		int	bool;
+		bool = mdrpc_lock_set_1_svc(&args, &res, NULL);
+		assert(bool == TRUE);
+		(void) mdstealerror(ep, &res.status);
+	} else {
+		if ((clntp = metarpcopen(hostname, CL_LONG_TMO, ep)) == NULL)
+			return (-1);
+
+		if (mdrpc_lock_set_1(&args, &res, clntp) != RPC_SUCCESS)
+			(void) mdrpcerror(ep, clntp, hostname,
+			    dgettext(TEXT_DOMAIN, "metad lock set"));
+		else
+			(void) mdstealerror(ep, &res.status);
+
+		metarpcclose(clntp);
+	}
+
+	if (mdisok(ep))
+		cl_set_setkey(res.cl_sk);
+
+	xdr_free(xdr_mdrpc_setlock_res, (char *)&res);
+
+	if (! mdisok(ep)) {
+		if (! mdanyrpcerror(ep))
+			return (-1);
+		if (strcmp(mynode(), hostname) == 0)
+			return (-1);
+		mdclrerror(ep);
+	}
+
+	return (0);
+}
+
+/*
+ * Add mediator hosts to disksets.
+ */
+int
+clnt_updmeds(
+	char			*hostname,
+	mdsetname_t		*sp,
+	md_h_arr_t		*medp,
+	md_error_t		*ep
+)
+{
+	CLIENT			*clntp;
+	mdrpc_updmeds_args	*args;
+	mdrpc_updmeds_2_args	v2_args;
+	mdrpc_generic_res	res;
+	int			version;
+
+	/* initialize */
+	mdclrerror(ep);
+	(void) memset(&v2_args, 0, sizeof (v2_args));
+	(void) memset(&res, 0, sizeof (res));
+
+	/* build args */
+	v2_args.rev = MD_METAD_ARGS_REV_1;
+	args = &v2_args.mdrpc_updmeds_2_args_u.rev1;
+	args->sp = sp;
+	args->cl_sk = cl_get_setkey(sp->setno, sp->setname);
+	args->meds = *medp;			/* structure assignment */
+
+	/* do it */
+	if (md_in_daemon && strcmp(mynode(), hostname) == 0) {
+		int bool;
+		bool = mdrpc_updmeds_2_svc(&v2_args, &res, NULL);
+		assert(bool == TRUE);
+		(void) mdstealerror(ep, &res.status);
+	} else {
+		if ((clntp = metarpcopen(hostname, CL_LONG_TMO, ep)) == NULL)
+			return (-1);
+
+		/*
+		 * Check the client handle for the version
+		 * and invoke the appropriate version of the
+		 * remote procedure
+		 */
+		CLNT_CONTROL(clntp, CLGET_VERS, (char *)&version);
+
+		if (version == METAD_VERSION) {	/* version 1 */
+			if (mdrpc_updmeds_1(args, &res, clntp) != RPC_SUCCESS)
+				(void) mdrpcerror(ep, clntp, hostname,
+				dgettext(TEXT_DOMAIN, "metad add hosts"));
+			else
+				(void) mdstealerror(ep, &res.status);
+		} else {
+			if (mdrpc_updmeds_2(&v2_args, &res, clntp) !=
+			    RPC_SUCCESS)
+				(void) mdrpcerror(ep, clntp, hostname,
+				dgettext(TEXT_DOMAIN, "metad add hosts"));
+			else
+				(void) mdstealerror(ep, &res.status);
+		}
+
+		metarpcclose(clntp);
+	}
+
+	xdr_free(xdr_mdrpc_generic_res, (char *)&res);
+
+	if (! mdisok(ep))
+		return (-1);
+
+	return (0);
+}
+
+/*
+ * update nr_flags field of node records based
+ * on given action.
+ */
+int
+clnt_upd_nr_flags(
+	char			*hostname,
+	mdsetname_t		*sp,
+	md_mnnode_desc		*nd,
+	uint_t			flag_action,
+	uint_t			flags,
+	md_error_t		*ep
+)
+{
+	CLIENT				*clntp;
+	mdrpc_upd_nr_flags_args		*args;
+	mdrpc_upd_nr_flags_2_args	v2_args;
+	mdrpc_generic_res		res;
+	int				version;
+
+	/* initialize */
+	mdclrerror(ep);
+	(void) memset(&v2_args, 0, sizeof (v2_args));
+	(void) memset(&res, 0, sizeof (res));
+
+	/* build args */
+	v2_args.rev = MD_METAD_ARGS_REV_1;
+	args = &v2_args.mdrpc_upd_nr_flags_2_args_u.rev1;
+	args->sp = sp;
+	args->cl_sk = cl_get_setkey(sp->setno, sp->setname);
+	args->nodedescs = nd;
+	args->flag_action = flag_action;
+	args->flags = flags;
+
+	/* do it */
+	if (md_in_daemon && strcmp(mynode(), hostname) == 0) {
+		int	bool;
+		bool = mdrpc_upd_nr_flags_2_svc(&v2_args, &res, NULL);
+		assert(bool == TRUE);
+		(void) mdstealerror(ep, &res.status);
+	} else {
+		if ((clntp = metarpcopen(hostname, CL_LONG_TMO, ep)) == NULL)
+			return (-1);
+
+		/*
+		 * Check the client handle for the version
+		 */
+		CLNT_CONTROL(clntp, CLGET_VERS, (char *)&version);
+
+		/*
+		 * If the client is version 1, return error
+		 * otherwise, make the remote procedure call.
+		 */
+		if (version == METAD_VERSION) { /* version 1 */
+			(void) mddserror(ep, MDE_DS_RPCVERSMISMATCH,
+				sp->setno, hostname, NULL, sp->setname);
+			metarpcclose(clntp);
+			return (-1);
+		} else {
+			if (mdrpc_upd_nr_flags_2(&v2_args, &res, clntp)
+							!= RPC_SUCCESS)
+				(void) mdrpcerror(ep, clntp, hostname,
+				dgettext(TEXT_DOMAIN,
+				    "metad set node flags"));
+			else
+				(void) mdstealerror(ep, &res.status);
+		}
+
+		metarpcclose(clntp);
+	}
+
+	xdr_free(xdr_mdrpc_generic_res, (char *)&res);
+
+	if (! mdisok(ep)) {
+		if (! mdanyrpcerror(ep))
+			return (-1);
+		if (strcmp(mynode(), hostname) == 0)
+			return (-1);
+		mdclrerror(ep);
+	}
+
+	return (0);
+}
+
+/*
+ * Clear set locks for all MN disksets.
+ * Used during reconfig cycle to recover from failed nodes.
+ */
+int
+clnt_clr_mnsetlock(
+	char			*hostname,
+	md_error_t		*ep
+)
+{
+	CLIENT			*clntp;
+	mdrpc_null_args		args;
+	mdrpc_generic_res	res;
+	int			version;
+
+	/* initialize */
+	mdclrerror(ep);
+	(void) memset(&args, 0, sizeof (args));
+	(void) memset(&res, 0, sizeof (res));
+
+	/* do it */
+	if ((clntp = metarpcopen(hostname, CL_LONG_TMO, ep)) == NULL)
+		return (-1);
+
+	/*
+	 * Check the client handle for the version
+	 */
+	CLNT_CONTROL(clntp, CLGET_VERS, (char *)&version);
+
+	/*
+	 * If the client is version 1, return error
+	 * otherwise, make the remote procedure call.
+	 */
+	if (version == METAD_VERSION) { /* version 1 */
+		(void) mddserror(ep, MDE_DS_RPCVERSMISMATCH,
+			NULL, hostname, NULL, NULL);
+		metarpcclose(clntp);
+		return (-1);
+	} else {
+		if (mdrpc_clr_mnsetlock_2(&args, &res, clntp) != RPC_SUCCESS)
+			(void) mdrpcerror(ep, clntp, hostname,
+			    dgettext(TEXT_DOMAIN, "metad clr mnsetlock"));
+		else
+			(void) mdstealerror(ep, &res.status);
+	}
+
+	metarpcclose(clntp);
+
+	xdr_free(xdr_mdrpc_generic_res, (char *)&res);
+
+	if (! mdisok(ep))
+		return (-1);
+
+	return (0);
+}
+
+/*
+ * Calls to suspend, resume or reinit the rpc.mdcommd daemon.
+ * This allows a node to remotely suspend, reinit and resume the
+ * rpc.mdcommd daemon on the given hostname node.  Used by libmeta
+ * to lock out class 1 messages (metainit, etc) on all nodes when running
+ * metaset and metadb commands on this node.
+ *
+ * When suspending the commd, the suspend request will fail until all
+ * messages have been drained from the rpc.mdcommd.  This routine will
+ * spin sending the suspend request until the rpc.mdcommd is drained
+ * or until rpc.mdcommd returns a failure other than MDMNE_SET_NOT_DRAINED.
+ *
+ * Also used to send the rpc.mdcommd daemon a new nodelist by draining all
+ * messages from the mdcommd and sending a reinit command to have mdcommd
+ * get the new nodelist from rpc.metad.  Used when nodelist is changed
+ * during:
+ *	- addition or deletion of host from diskset
+ *	- join or withdrawal of host from diskset
+ *	- addition of first disk to diskset (joins all nodes)
+ *	- removal of last disk from diskset (withdraws all nodes)
+ */
+int
+clnt_mdcommdctl(
+	char			*hostname,
+	int			flag_action,
+	mdsetname_t		*sp,
+	md_mn_msgclass_t	class,
+	uint_t			flags,
+	md_error_t		*ep
+)
+{
+	CLIENT				*clntp;
+	mdrpc_mdcommdctl_args		*args;
+	mdrpc_mdcommdctl_2_args		v2_args;
+	mdrpc_generic_res		res;
+	int				version;
+	int				suspend_spin = 0;
+
+	/* initialize */
+	mdclrerror(ep);
+	(void) memset(&v2_args, 0, sizeof (v2_args));
+	(void) memset(&res, 0, sizeof (res));
+
+	/* build args */
+	v2_args.rev = MD_METAD_ARGS_REV_1;
+	args = &v2_args.mdrpc_mdcommdctl_2_args_u.rev1;
+	args->flag_action = flag_action;
+	args->setno = sp->setno;
+	args->class = class;
+	args->flags = flags;
+
+	/* do it */
+	if (md_in_daemon && strcmp(mynode(), hostname) == 0) {
+		int	bool;
+		/*
+		 * Call v2 procedure directly if rpc.metad on this node is
+		 * sending message to itself.
+		 */
+		if (flag_action == COMMDCTL_SUSPEND) {
+			suspend_spin = 1;
+			while (suspend_spin) {
+				suspend_spin = 0;
+				bool = mdrpc_mdcommdctl_2_svc(&v2_args, &res,
+					NULL);
+				assert(bool == TRUE);
+				/*
+				 * If set not yet drained, wait a second
+				 * and try again.
+				 */
+				if (mdisdserror(&(res.status),
+				    MDE_DS_COMMDCTL_SUSPEND_NYD)) {
+					/* Wait a second and try again */
+					mdclrerror(&(res.status));
+					(void) sleep(1);
+					suspend_spin = 1;
+				}
+			}
+		} else {
+			bool = mdrpc_mdcommdctl_2_svc(&v2_args, &res, NULL);
+			assert(bool == TRUE);
+		}
+		(void) mdstealerror(ep, &res.status);
+	} else {
+		if ((clntp = metarpcopen(hostname, CL_LONG_TMO, ep)) == NULL)
+			return (-1);
+
+		/*
+		 * Check the client handle for the version
+		 */
+		CLNT_CONTROL(clntp, CLGET_VERS, (char *)&version);
+
+		/*
+		 * If the client is version 1, return error
+		 * otherwise, make the remote procedure call.
+		 */
+		if (version == METAD_VERSION) { /* version 1 */
+			(void) mddserror(ep, MDE_DS_RPCVERSMISMATCH,
+				sp->setno, hostname, NULL, sp->setname);
+			metarpcclose(clntp);
+			return (-1);
+		}
+
+		if (flag_action == COMMDCTL_SUSPEND) {
+			suspend_spin = 1;
+			while (suspend_spin) {
+				suspend_spin = 0;
+				if (mdrpc_mdcommdctl_2(&v2_args, &res,
+				    clntp) != RPC_SUCCESS) {
+					(void) mdrpcerror(ep, clntp,
+					    hostname,
+					    dgettext(TEXT_DOMAIN,
+					    "metad commd control"));
+				} else {
+					/*
+					 * If set not yet drained,
+					 * wait a second and
+					 * and try again.
+					 */
+					if (mdisdserror(&(res.status),
+					    MDE_DS_COMMDCTL_SUSPEND_NYD)) {
+						mdclrerror(&(res.status));
+						(void) sleep(1);
+						suspend_spin = 1;
+					} else {
+						(void) mdstealerror(ep,
+						    &res.status);
+					}
+				}
+			}
+		} else {
+			if (mdrpc_mdcommdctl_2(&v2_args, &res, clntp)
+			    != RPC_SUCCESS)
+				(void) mdrpcerror(ep, clntp, hostname,
+				dgettext(TEXT_DOMAIN,
+				    "metad commd control"));
+			else
+				(void) mdstealerror(ep, &res.status);
+		}
+		metarpcclose(clntp);
+	}
+
+	xdr_free(xdr_mdrpc_generic_res, (char *)&res);
+
+	if (! mdisok(ep)) {
+		if (! mdanyrpcerror(ep))
+			return (-1);
+		if (strcmp(mynode(), hostname) == 0)
+			return (-1);
+		mdclrerror(ep);
+	}
+
+	return (0);
+}
+
+/*
+ * Is owner node stale?
+ */
+int
+clnt_mn_is_stale(
+	char			*hostname,
+	mdsetname_t		*sp,
+	int			*ret_bool,
+	md_error_t		*ep
+)
+{
+	CLIENT			*clntp;
+	mdrpc_setno_args	*args;
+	mdrpc_setno_2_args	v2_args;
+	mdrpc_bool_res		res;
+	int			rval = -1;
+	int			version;
+
+	/* initialize */
+	mdclrerror(ep);
+	(void) memset(&v2_args, 0, sizeof (v2_args));
+	(void) memset(&res, 0, sizeof (res));
+
+	/* build args */
+	v2_args.rev = MD_METAD_ARGS_REV_1;
+	args = &v2_args.mdrpc_setno_2_args_u.rev1;
+	args->setno = sp->setno;
+
+	/* do it */
+	if (md_in_daemon && strcmp(mynode(), hostname) == 0) {
+		int	bool;
+		/*
+		 * Call v2 procedure directly if rpc.metad on this node is
+		 * sending message to itself.
+		 */
+		bool = mdrpc_mn_is_stale_2_svc(&v2_args, &res, NULL);
+		assert(bool == TRUE);
+		(void) mdstealerror(ep, &res.status);
+	} else {
+		if ((clntp = metarpcopen(hostname, CL_LONG_TMO, ep)) == NULL)
+			return (-1);
+
+		/*
+		 * Check the client handle for the version
+		 * and invoke the appropriate version of the
+		 * remote procedure
+		 */
+		CLNT_CONTROL(clntp, CLGET_VERS, (char *)&version);
+
+		/*
+		 * If the client is version 1, return error
+		 * otherwise, make the remote procedure call.
+		 */
+		if (version == METAD_VERSION) { /* version 1 */
+			(void) mddserror(ep, MDE_DS_RPCVERSMISMATCH,
+			    sp->setno, hostname, NULL, sp->setname);
+			metarpcclose(clntp);
+			return (-1);
+		} else {
+			if (mdrpc_mn_is_stale_2(&v2_args, &res, clntp) !=
+			    RPC_SUCCESS)
+				(void) mdrpcerror(ep, clntp, hostname,
+				dgettext(TEXT_DOMAIN, "metad mn is stale"));
+			else
+				(void) mdstealerror(ep, &res.status);
+		}
+
+		metarpcclose(clntp);
+	}
+
+	if (mdisok(ep)) {
+		/* do something with the results */
+		rval = 0;
+
+		if (ret_bool != NULL)
+			*ret_bool = res.value;
+	}
+
+	xdr_free(xdr_mdrpc_bool_res, (char *)&res);
+
+	return (rval);
+}
+
+/*
+ * Free md_drive_desc linked list of drive descriptors that was alloc'd
+ * from a call to the RPC routine clnt_getdrivedesc.  Drive descriptors
+ * are from another node.
+ */
+void
+free_rem_dd(md_drive_desc *dd)
+{
+	mdrpc_getdrivedesc_res	res;
+
+	/*
+	 * dummy up a result struct, to do a deep free of the dd.
+	 * (A deep free means that the xdr_free code will free the
+	 * linked list of drive descs.)
+	 */
+	(void) memset(&res, 0, sizeof (res));
+	res.dd = (struct md_drive_desc *)dd;
+	xdr_free(xdr_mdrpc_getdrivedesc_res, (char *)&res);
+}
+
+/*
+ * Get a partially filled in drive desc from remote node.  Used in MN
+ * disksets during the reconfig cycle to get the diskset drive
+ * information from another host in order to sync up all nodes.
+ * Used when the drive record information isn't good enough
+ * since the drive record doesn't give the name of
+ * the drive, but just a key into that other node's nodespace.
+ * Returned drive desc has the drive name filled in but no other strings
+ * in the drivename structure.
+ *
+ * Returns a 0 if RPC was successful, 1 otherwise.
+ */
+int
+clnt_getdrivedesc(
+	char			*hostname,
+	mdsetname_t		*sp,
+	md_drive_desc		**ret_dd,
+	md_error_t		*ep
+)
+{
+	CLIENT			*clntp;
+	mdrpc_sp_args		*args;
+	mdrpc_sp_2_args		v2_args;
+	mdrpc_getdrivedesc_res	res;
+	int			version;
+	int			rval = -1;
+
+	/* initialize */
+	mdclrerror(ep);
+	(void) memset(&v2_args, 0, sizeof (v2_args));
+	(void) memset(&res, 0, sizeof (res));
+
+	/* build args */
+	v2_args.rev = MD_METAD_ARGS_REV_1;
+	args = &v2_args.mdrpc_sp_2_args_u.rev1;
+	args->sp = sp;
+	args->cl_sk = cl_get_setkey(sp->setno, sp->setname);
+
+	/* do it */
+	if (md_in_daemon && strcmp(mynode(), hostname) == 0) {
+		int	bool;
+		bool = mdrpc_getdrivedesc_2_svc(&v2_args, &res, NULL);
+		assert(bool == TRUE);
+		(void) mdstealerror(ep, &res.status);
+	} else {
+		if ((clntp = metarpcopen(hostname, CL_LONG_TMO, ep)) == NULL)
+			return (-1);
+
+		/*
+		 * Check the client handle for the version
+		 */
+		CLNT_CONTROL(clntp, CLGET_VERS, (char *)&version);
+
+		/*
+		 * If the client is version 1, return error
+		 * otherwise, make the remote procedure call.
+		 */
+		if (version == METAD_VERSION) { /* version 1 */
+			(void) mddserror(ep, MDE_DS_RPCVERSMISMATCH,
+			    sp->setno, hostname, NULL, sp->setname);
+			metarpcclose(clntp);
+			return (-1);
+		} else {
+			if (mdrpc_getdrivedesc_2(&v2_args, &res, clntp)
+							!= RPC_SUCCESS)
+				(void) mdrpcerror(ep, clntp, hostname,
+				dgettext(TEXT_DOMAIN,
+				    "metad get drive desc set"));
+			else
+				(void) mdstealerror(ep, &res.status);
+		}
+
+		metarpcclose(clntp);
+	}
+
+	/* If no ep error and no version mismatch - rpc call worked ok */
+	if (mdisok(ep)) {
+		rval = 0;
+		if (ret_dd != NULL)
+			*ret_dd = res.dd;
+		else
+			xdr_free(xdr_mdrpc_getdrivedesc_res, (char *)&res);
+	}
+
+	return (rval);
+}
+
+/*
+ * update dr_flags field of drive record.
+ * Also sync up genid of drive descriptors and make set
+ * record and node records match the genid.
+ *
+ * Returns a 0 if RPC was successful, 1 otherwise.
+ */
+int
+clnt_upd_dr_reconfig(
+	char			*hostname,
+	mdsetname_t		*sp,
+	md_drive_desc		*dd,
+	md_error_t		*ep
+)
+{
+	CLIENT				*clntp;
+	mdrpc_upd_dr_flags_2_args	v2_args;
+	mdrpc_upd_dr_flags_2_args_r1	*v21_args;
+	mdrpc_generic_res		res;
+	int				rval;
+	int				version;
+
+	/* initialize */
+	mdclrerror(ep);
+	(void) memset(&v2_args, 0, sizeof (v2_args));
+	(void) memset(&res, 0, sizeof (res));
+
+	/* build args */
+	v2_args.rev = MD_METAD_ARGS_REV_1;
+	v21_args = &v2_args.mdrpc_upd_dr_flags_2_args_u.rev1;
+	v21_args->sp = sp;
+	v21_args->drivedescs = dd;
+
+	/* do it */
+	if (md_in_daemon && strcmp(mynode(), hostname) == 0) {
+		int	bool;
+
+		/*
+		 * If the server is local, we call the v2 procedure
+		 */
+		bool = mdrpc_upd_dr_reconfig_2_svc(&v2_args, &res, NULL);
+		assert(bool == TRUE);
+		(void) mdstealerror(ep, &res.status);
+	} else {
+		if ((clntp = metarpcopen(hostname, CL_LONG_TMO, ep)) == NULL)
+			return (-1);
+
+		/*
+		 * Check the client handle for the version
+		 */
+		CLNT_CONTROL(clntp, CLGET_VERS, (char *)&version);
+		/*
+		 * If the client is version 1, return error
+		 * otherwise, make the remote procedure call.
+		 */
+		if (version == METAD_VERSION) { /* version 1 */
+			(void) mddserror(ep, MDE_DS_RPCVERSMISMATCH,
+				sp->setno, hostname, NULL, sp->setname);
+			metarpcclose(clntp);
+			return (-1);
+		} else {
+			rval = mdrpc_upd_dr_reconfig_2(&v2_args, &res, clntp);
+
+			if (rval != RPC_SUCCESS)
+				(void) mdrpcerror(ep, clntp, hostname,
+				    dgettext(TEXT_DOMAIN,
+				    "metad update drive reconfig"));
+			else
+				(void) mdstealerror(ep, &res.status);
+		}
+
+		metarpcclose(clntp);
+	}
+
+	xdr_free(xdr_mdrpc_generic_res, (char *)&res);
+
+	if (! mdisok(ep)) {
+		if (! mdanyrpcerror(ep))
+			return (-1);
+		if (strcmp(mynode(), hostname) == 0)
+			return (-1);
+		mdclrerror(ep);
+	}
+
+	return (0);
+}
+
+/*
+ * Reset mirror owner(s) if mirror owner(s) is in the list of
+ * node's specified in the array of nodeids.
+ * This is called when a node has been deleted or withdrawn
+ * from the diskset.
+ */
+int
+clnt_reset_mirror_owner(
+	char			*hostname,
+	mdsetname_t		*sp,
+	int			node_c,
+	int			node_id[],
+	md_error_t		*ep
+)
+{
+	CLIENT			*clntp;
+	mdrpc_nodeid_args	*args;
+	mdrpc_nodeid_2_args	v2_args;
+	mdrpc_generic_res	res;
+	int			version;
+
+	/* initialize */
+	mdclrerror(ep);
+	(void) memset(&v2_args, 0, sizeof (v2_args));
+	(void) memset(&res, 0, sizeof (res));
+
+	/* build args */
+	v2_args.rev = MD_METAD_ARGS_REV_1;
+	args = &v2_args.mdrpc_nodeid_2_args_u.rev1;
+	args->sp = sp;
+	args->cl_sk = cl_get_setkey(sp->setno, sp->setname);
+	args->nodeid.nodeid_len = node_c;
+	args->nodeid.nodeid_val = &node_id[0];
+
+	/* do it */
+	if (md_in_daemon && strcmp(mynode(), hostname) == 0) {
+		int	bool;
+		bool = mdrpc_reset_mirror_owner_2_svc(&v2_args, &res, NULL);
+		assert(bool == TRUE);
+		(void) mdstealerror(ep, &res.status);
+	} else {
+		if ((clntp = metarpcopen(hostname, CL_LONG_TMO, ep)) == NULL)
+			return (-1);
+
+		/*
+		 * Check the client handle for the version
+		 * and invoke the appropriate version of the
+		 * remote procedure
+		 */
+		CLNT_CONTROL(clntp, CLGET_VERS, (char *)&version);
+
+		/*
+		 * If the client is version 1, return error
+		 * otherwise, make the remote procedure call.
+		 */
+		if (version == METAD_VERSION) { /* version 1 */
+			(void) mddserror(ep, MDE_DS_RPCVERSMISMATCH,
+			    sp->setno, hostname, NULL, sp->setname);
+			metarpcclose(clntp);
+			return (-1);
+		} else {
+			if (mdrpc_reset_mirror_owner_2(&v2_args, &res, clntp)
+			    != RPC_SUCCESS)
+				(void) mdrpcerror(ep, clntp, hostname,
+				dgettext(TEXT_DOMAIN,
+					"metad reset mirror owner"));
+			else
+				(void) mdstealerror(ep, &res.status);
+		}
+
+		metarpcclose(clntp);
+	}
+
+	xdr_free(xdr_mdrpc_generic_res, (char *)&res);
+
+	if (! mdisok(ep))
+		return (-1);
+
+	return (0);
+}
+
+/*
+ * Call to suspend and resume I/O for given diskset(s).
+ * This allows a node to remotely suspend and resume I/O on
+ * a MN diskset.  A diskset number of 0 represents all MN disksets.
+ */
+int
+clnt_mn_susp_res_io(
+	char			*hostname,
+	set_t			setno,
+	int			cmd,
+	md_error_t		*ep
+)
+{
+	CLIENT					*clntp;
+	mdrpc_mn_susp_res_io_args		*args;
+	mdrpc_mn_susp_res_io_2_args		v2_args;
+	mdrpc_generic_res			res;
+	int					version;
+
+	/* initialize */
+	mdclrerror(ep);
+	(void) memset(&v2_args, 0, sizeof (v2_args));
+	(void) memset(&res, 0, sizeof (res));
+
+	/* build args */
+	v2_args.rev = MD_METAD_ARGS_REV_1;
+	args = &v2_args.mdrpc_mn_susp_res_io_2_args_u.rev1;
+	args->susp_res_cmd = cmd;
+	args->susp_res_setno = setno;
+
+	/* do it */
+	if (md_in_daemon && strcmp(mynode(), hostname) == 0) {
+		int	bool;
+		/*
+		 * Call v2 procedure directly if rpc.metad on this node is
+		 * sending message to itself.
+		 */
+		bool = mdrpc_mn_susp_res_io_2_svc(&v2_args, &res, NULL);
+		assert(bool == TRUE);
+		(void) mdstealerror(ep, &res.status);
+	} else {
+		if ((clntp = metarpcopen(hostname, CL_LONG_TMO, ep)) == NULL)
+			return (-1);
+
+		/*
+		 * Check the client handle for the version
+		 */
+		CLNT_CONTROL(clntp, CLGET_VERS, (char *)&version);
+
+		/*
+		 * If the client is version 1, return error
+		 * otherwise, make the remote procedure call.
+		 */
+		if (version == METAD_VERSION) { /* version 1 */
+			(void) mddserror(ep, MDE_DS_RPCVERSMISMATCH,
+			    setno, hostname, NULL, NULL);
+			metarpcclose(clntp);
+			return (-1);
+		} else {
+			if (mdrpc_mn_susp_res_io_2(&v2_args, &res, clntp)
+							!= RPC_SUCCESS)
+				(void) mdrpcerror(ep, clntp, hostname,
+				dgettext(TEXT_DOMAIN,
+				    "metad mn_susp_res_io control"));
+			else
+				(void) mdstealerror(ep, &res.status);
+		}
+
+		metarpcclose(clntp);
+	}
+
+	xdr_free(xdr_mdrpc_generic_res, (char *)&res);
+
+	if (! mdisok(ep)) {
+		if (! mdanyrpcerror(ep))
+			return (-1);
+		if (strcmp(mynode(), hostname) == 0)
+			return (-1);
+		mdclrerror(ep);
+	}
+
+	return (0);
+}
+
+/*
+ * Resnarf the set after the set has been imported
+ *
+ * We should never be making this procedure call
+ * over the wire, it's sole purpose is to snarf
+ * the imported set on the localhost.
+ */
+int
+clnt_resnarf_set(
+	char		*hostname,
+	set_t		setno,
+	md_error_t	*ep
+)
+{
+	CLIENT			*clntp;
+	mdrpc_setno_2_args	args;
+	mdrpc_generic_res	res;
+	int			rval = -1;
+	int			version;
+
+	/* initialize */
+	mdclrerror(ep);
+	(void) memset(&args, 0, sizeof (args));
+	(void) memset(&res, 0, sizeof (res));
+
+	/* build args */
+	args.rev = MD_METAD_ARGS_REV_1;
+	args.mdrpc_setno_2_args_u.rev1.setno = setno;
+	args.mdrpc_setno_2_args_u.rev1.cl_sk = NULL;
+
+	/* do it */
+	if (strcmp(mynode(), hostname) == 0) {
+		if ((clntp = metarpcopen(hostname, CL_LONG_TMO, ep)) == NULL)
+			return (-1);
+
+		/* Check the client handle for the version */
+		CLNT_CONTROL(clntp, CLGET_VERS, (char *)&version);
+
+		/* If the client is version 1, return error */
+		if (version == METAD_VERSION) { /* version 1 */
+			(void) mddserror(ep, MDE_DS_CANTRESNARF, MD_SET_BAD,
+			    mynode(), NULL, NULL);
+		} else {
+			rval = mdrpc_resnarf_set_2(&args, &res, clntp);
+
+			if (rval != RPC_SUCCESS)
+				(void) mdrpcerror(ep, clntp, hostname,
+				    dgettext(TEXT_DOMAIN, "metad resnarf set"));
+			else
+				(void) mdstealerror(ep, &res.status);
+		}
+
+		metarpcclose(clntp);
+
+	} else {
+		(void) mddserror(ep, MDE_DS_CANTRESNARF, MD_SET_BAD,
+		    mynode(), NULL, NULL);
+	}
+
+	if (mdisok(ep))
+		rval = 0;
+
+	xdr_free(xdr_mdrpc_generic_res, (char *)&res);
+
+	return (rval);
+}
+
+/*
+ * Call to start a resync for a given diskset.
+ * Used when a node has been added to a diskset.
+ * Should be called after rpc.mdcommd is resumed.
+ */
+int
+clnt_mn_mirror_resync_all(
+	char			*hostname,
+	set_t			setno,
+	md_error_t		*ep
+)
+{
+	CLIENT					*clntp;
+	mdrpc_setno_2_args			args;
+	mdrpc_generic_res			res;
+	int					version;
+
+	/* initialize */
+	mdclrerror(ep);
+	(void) memset(&args, 0, sizeof (args));
+	(void) memset(&res, 0, sizeof (res));
+
+	/* build args */
+	args.rev = MD_METAD_ARGS_REV_1;
+	args.mdrpc_setno_2_args_u.rev1.setno = setno;
+	args.mdrpc_setno_2_args_u.rev1.cl_sk = NULL;
+
+	/* do it */
+	if (md_in_daemon && strcmp(mynode(), hostname) == 0) {
+		int	bool;
+		/*
+		 * Call v2 procedure directly if rpc.metad on this node is
+		 * sending message to itself.
+		 */
+		bool = mdrpc_mn_mirror_resync_all_2_svc(&args, &res, NULL);
+		assert(bool == TRUE);
+		(void) mdstealerror(ep, &res.status);
+	} else {
+		if ((clntp = metarpcopen(hostname, CL_LONG_TMO, ep)) == NULL)
+			return (-1);
+
+		/*
+		 * Check the client handle for the version
+		 */
+		CLNT_CONTROL(clntp, CLGET_VERS, (char *)&version);
+
+		/*
+		 * If the client is version 1, return error
+		 * otherwise, make the remote procedure call.
+		 */
+		if (version == METAD_VERSION) { /* version 1 */
+			(void) mddserror(ep, MDE_DS_RPCVERSMISMATCH,
+			    setno, hostname, NULL, NULL);
+			metarpcclose(clntp);
+			return (-1);
+		} else {
+			if (mdrpc_mn_mirror_resync_all_2(&args, &res, clntp)
+							!= RPC_SUCCESS)
+				(void) mdrpcerror(ep, clntp, hostname,
+				dgettext(TEXT_DOMAIN,
+				    "metad mn_mirror_resync_all"));
+			else
+				(void) mdstealerror(ep, &res.status);
+		}
+
+		metarpcclose(clntp);
+	}
+
+	xdr_free(xdr_mdrpc_generic_res, (char *)&res);
+
+	if (! mdisok(ep)) {
+		if (! mdanyrpcerror(ep))
+			return (-1);
+		if (strcmp(mynode(), hostname) == 0)
+			return (-1);
+		mdclrerror(ep);
+	}
+
+	return (0);
+}
+
+/*
+ * Call to update the ABR state for all soft partitions.
+ * Used when a node has been added to a diskset.
+ * Should be called after rpc.mdcommd is resumed.
+ */
+int
+clnt_mn_sp_update_abr(
+	char			*hostname,
+	set_t			setno,
+	md_error_t		*ep
+)
+{
+	CLIENT					*clntp;
+	mdrpc_setno_2_args			args;
+	mdrpc_generic_res			res;
+	int					version;
+
+	/* initialize */
+	mdclrerror(ep);
+	(void) memset(&args, 0, sizeof (args));
+	(void) memset(&res, 0, sizeof (res));
+
+	/* build args */
+	args.rev = MD_METAD_ARGS_REV_1;
+	args.mdrpc_setno_2_args_u.rev1.setno = setno;
+	args.mdrpc_setno_2_args_u.rev1.cl_sk = NULL;
+
+	/*
+	 * No need to call function if adding local node as ABR cannot
+	 * be set.
+	 */
+	if (strcmp(mynode(), hostname) != 0) {
+		if ((clntp = metarpcopen(hostname, CL_LONG_TMO, ep)) == NULL)
+			return (-1);
+
+		/*
+		 * Check the client handle for the version
+		 */
+		CLNT_CONTROL(clntp, CLGET_VERS, (char *)&version);
+
+		/*
+		 * If the client is version 1, return error
+		 * otherwise, make the remote procedure call.
+		 */
+		if (version == METAD_VERSION) { /* version 1 */
+			(void) mddserror(ep, MDE_DS_RPCVERSMISMATCH,
+			    setno, hostname, NULL, NULL);
+			metarpcclose(clntp);
+			return (-1);
+		} else {
+			if (mdrpc_mn_sp_update_abr_2(&args, &res, clntp)
+							!= RPC_SUCCESS)
+				(void) mdrpcerror(ep, clntp, hostname,
+				dgettext(TEXT_DOMAIN,
+				    "metad mn_sp_update_abr"));
+			else
+				(void) mdstealerror(ep, &res.status);
+		}
+
+		metarpcclose(clntp);
+	}
+
+	xdr_free(xdr_mdrpc_generic_res, (char *)&res);
+
+	if (! mdisok(ep)) {
+		if (! mdanyrpcerror(ep))
+			return (-1);
+		mdclrerror(ep);
+	}
+
+	return (0);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_metad_subr.c b/usr/src/lib/lvm/libmeta/common/meta_metad_subr.c
new file mode 100644
index 0000000000..df50a7650e
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_metad_subr.c
@@ -0,0 +1,2055 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+/*
+ * Just in case we're not in a build environment, make sure that
+ * TEXT_DOMAIN gets set to something.
+ */
+#if !defined(TEXT_DOMAIN)
+#define	TEXT_DOMAIN "SYS_TEST"
+#endif
+
+/*
+ * interface between user land and the set records
+ */
+
+#include <meta.h>
+#include <metad.h>
+#include <sdssc.h>
+#include <syslog.h>
+#include <sys/cladm.h>
+#include "meta_set_prv.h"
+
+#include <sys/sysevent/eventdefs.h>
+#include <sys/sysevent/svm.h>
+
+static	md_set_record	*setrecords = NULL; /* head of cache linked list */
+static	int		setsnarfdone = 0;
+
+typedef struct key_lst_t {
+	side_t			kl_side;
+	mdkey_t			kl_key;
+	struct key_lst_t	*kl_next;
+} key_lst_t;
+
+typedef struct ur_recid_lst {
+	mddb_recid_t		url_recid;
+	struct	ur_recid_lst	*url_nx;
+} ur_recid_lst_t;
+
+static ur_recid_lst_t		*url_used = NULL;
+static ur_recid_lst_t		*url_tode = NULL;
+
+static void
+url_addl(ur_recid_lst_t **urlpp, mddb_recid_t recid)
+{
+	/* Run to the end of the list */
+	for (/* void */; (*urlpp != NULL); urlpp = &(*urlpp)->url_nx)
+		if ((*urlpp)->url_recid == recid)
+			return;
+
+	/* Add the new member */
+	*urlpp = Zalloc(sizeof (**urlpp));
+	if (*urlpp == NULL)
+		return;
+
+	(*urlpp)->url_recid = recid;
+}
+
+static int
+url_findl(ur_recid_lst_t *urlp, mddb_recid_t recid)
+{
+	while (urlp != NULL) {
+		if (urlp->url_recid == recid)
+			return (1);
+		urlp = urlp->url_nx;
+	}
+	return (0);
+}
+
+static void
+url_freel(ur_recid_lst_t **urlpp)
+{
+	ur_recid_lst_t	*urlp;
+	ur_recid_lst_t	*turlp;
+
+	for (turlp = *urlpp; turlp != NULL; turlp = urlp) {
+		urlp = turlp->url_nx;
+		Free(turlp);
+	}
+	*urlpp = (ur_recid_lst_t *)NULL;
+}
+
+static int
+ckncvt_set_record(mddb_userreq_t *reqp, md_error_t *ep)
+{
+	mddb_userreq_t	req;
+	md_set_record	*sr;
+	int		recs[3];
+
+	if (reqp->ur_size == sizeof (*sr))
+		return (0);
+
+	if (! md_in_daemon) {
+		if (reqp->ur_size >= sizeof (*sr))
+			return (0);
+
+		reqp->ur_data = (uintptr_t)Realloc((void *)reqp->ur_data,
+		    sizeof (*sr));
+		(void) memset(((char *)reqp->ur_data) + reqp->ur_size, '\0',
+		    sizeof (*sr) - reqp->ur_size);
+		reqp->ur_size = sizeof (*sr);
+		return (0);
+	}
+
+	/*
+	 * If here, then the daemon is calling, and so the automatic
+	 * conversion will be performed.
+	 */
+
+	/* shorthand */
+	req = *reqp;			/* structure assignment */
+	sr = (md_set_record *)req.ur_data;
+
+	if (sr->sr_flags & MD_SR_CVT)
+		return (0);
+
+	/* Leave multi-node set records alone */
+	if (MD_MNSET_REC(sr)) {
+		return (0);
+	}
+
+	/* Mark the old record as converted */
+	sr->sr_flags |= MD_SR_CVT;
+
+	METAD_SETUP_SR(MD_DB_SETDATA, sr->sr_selfid)
+
+	if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde, NULL) != 0)
+		return (mdstealerror(ep, &req.ur_mde));
+
+	/* Create space for the new record */
+	METAD_SETUP_SR(MD_DB_CREATE, 0);
+	req.ur_size = sizeof (*sr);
+
+	if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde, NULL) != 0)
+		return (mdstealerror(ep, &req.ur_mde));
+
+	/* Allocate the new record */
+	sr = Zalloc(sizeof (*sr));
+
+	/* copy all the data from the record being converted */
+	(void) memmove(sr, (void *)reqp->ur_data, reqp->ur_size);
+	sr->sr_flags &= ~MD_SR_CVT;
+
+	/* adjust the selfid to point to the new record */
+	sr->sr_selfid = req.ur_recid;
+
+	METAD_SETUP_SR(MD_DB_SETDATA, sr->sr_selfid)
+	req.ur_size = sizeof (*sr);
+	req.ur_data = (uintptr_t)sr;
+
+	if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde, NULL) != 0) {
+		Free(sr);
+		return (mdstealerror(ep, &req.ur_mde));
+	}
+
+	/* Commit the old and the new */
+	recs[0] = ((md_set_record *)reqp->ur_data)->sr_selfid;
+	recs[1] = sr->sr_selfid;
+	recs[2] = 0;
+
+	METAD_SETUP_UR(MD_DB_COMMIT_MANY, 0, 0);
+	req.ur_size = sizeof (recs);
+	req.ur_data = (uintptr_t)recs;
+
+	if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde, NULL) != 0) {
+		Free(sr);
+		return (mdstealerror(ep, &req.ur_mde));
+	}
+
+	/* Add the the old record to the list of records to delete */
+	url_addl(&url_tode, ((md_set_record *)reqp->ur_data)->sr_selfid);
+
+	/* Free the old records space */
+	Free((void *)reqp->ur_data);
+
+	/* Adjust the reqp structure to point to the new record and size */
+	reqp->ur_recid = sr->sr_selfid;
+	reqp->ur_size = sizeof (*sr);
+	reqp->ur_data = (uintptr_t)sr;
+
+	return (0);
+}
+
+mddb_userreq_t *
+get_db_rec(
+	md_ur_get_cmd_t	cmd,
+	set_t		setno,
+	mddb_type_t	type,
+	uint_t		type2,
+	mddb_recid_t	*idp,
+	md_error_t	*ep
+)
+{
+	mddb_userreq_t	*reqp = Zalloc(sizeof (*reqp));
+
+	reqp->ur_setno = setno;
+	reqp->ur_type = type;
+	reqp->ur_type2 = type2;
+
+	switch (cmd) {
+	    case MD_UR_GET_NEXT:
+		    reqp->ur_cmd = MD_DB_GETNEXTREC;
+		    reqp->ur_recid = *idp;
+		    if (metaioctl(MD_DB_USERREQ, reqp, &reqp->ur_mde, NULL)
+			!= 0) {
+			    (void) mdstealerror(ep, &reqp->ur_mde);
+			    Free(reqp);
+			    return (NULL);
+		    }
+		    *idp = reqp->ur_recid;
+		    break;
+	    case MD_UR_GET_WKEY:
+		    reqp->ur_recid = *idp;
+		    break;
+	}
+
+	if (*idp <= 0) {
+		Free(reqp);
+		return (NULL);
+	}
+
+	reqp->ur_cmd = MD_DB_GETSIZE;
+	if (metaioctl(MD_DB_USERREQ, reqp, &reqp->ur_mde, NULL) != 0) {
+		(void) mdstealerror(ep, &reqp->ur_mde);
+		Free(reqp);
+
+		*idp = 0;
+		return (NULL);
+	}
+
+	reqp->ur_cmd = MD_DB_GETDATA;
+	reqp->ur_data = (uintptr_t)Zalloc(reqp->ur_size);
+	if (metaioctl(MD_DB_USERREQ, reqp, &reqp->ur_mde, NULL) != 0) {
+		(void) mdstealerror(ep, &reqp->ur_mde);
+		Free((void *)reqp->ur_data);
+		Free(reqp);
+		*idp = 0;
+		return (NULL);
+	}
+
+	switch (reqp->ur_type) {
+	    case MDDB_USER:
+		    switch (reqp->ur_type2) {
+			case MDDB_UR_SR:
+				if (ckncvt_set_record(reqp, ep)) {
+					Free((void *)reqp->ur_data);
+					Free(reqp);
+					return (NULL);
+				}
+				break;
+		    }
+		    break;
+	}
+
+	return (reqp);
+}
+
+void *
+get_ur_rec(
+	set_t		setno,
+	md_ur_get_cmd_t	cmd,
+	uint_t		type2,
+	mddb_recid_t	*idp,
+	md_error_t	*ep
+)
+{
+	mddb_userreq_t	*reqp = NULL;
+	void		*ret_val;
+
+	assert(idp != NULL);
+
+	reqp = get_db_rec(cmd, setno, MDDB_USER, type2, idp, ep);
+	if (reqp == NULL)
+		return (NULL);
+
+	ret_val = (void *)reqp->ur_data;
+	Free(reqp);
+	return (ret_val);
+}
+
+/*
+ * Called by rpc.metad on startup of disksets to cleanup
+ * the host entries associated with a diskset.  This is needed if
+ * a node failed or the metaset command was killed during the addition
+ * of a node to a diskset.
+ *
+ * This is called for all traditional disksets.
+ * This is only called for MNdisksets when in there is only one node
+ * in all of the MN disksets and this node is not running SunCluster.
+ * (Otherwise, the cleanup of the host entries is handled by a
+ * reconfig cycle that the SunCluster software calls).
+ */
+static int
+sr_hosts(md_set_record *sr)
+{
+	int		i,
+			nid,
+			self_in_set = FALSE;
+	md_error_t	xep = mdnullerror;
+	md_mnnode_record	*nr;
+	md_mnset_record		*mnsr;
+
+	if (MD_MNSET_REC(sr)) {
+		mnsr = (struct md_mnset_record *)sr;
+		nr = mnsr->sr_nodechain;
+		/*
+		 * Already guaranteed to be only 1 node in set which
+		 * is mynode (done in sr_validate).
+		 * Now, check if node is in the OK state.  If not in
+		 * the OK state, leave self_in_set FALSE so that
+		 * set will be removed.
+		 */
+		if (nr->nr_flags & MD_MN_NODE_OK)
+			self_in_set = TRUE;
+	} else {
+		for (i = 0; i < MD_MAXSIDES; i++) {
+			/* Skip empty slots */
+			if (sr->sr_nodes[i][0] == '\0')
+				continue;
+
+			/* Make sure we are in the set and skip this node */
+			if (strcmp(sr->sr_nodes[i], mynode()) == 0) {
+				self_in_set = TRUE;
+				break;
+			}
+		}
+	}
+
+	if ((self_in_set == FALSE) && (!(MD_MNSET_REC(sr))) &&
+	    (_cladm(CL_CONFIG, CL_NODEID, &nid) == 0)) {
+
+		/*
+		 * See if we've got a node which has been booted in
+		 * non-cluster mode. If true the nodeid will match
+		 * one of the sr_nodes values because the conversion
+		 * from nodeid to hostname failed to occur.
+		 */
+		for (i = 0; i < MD_MAXSIDES; i++) {
+			if (sr->sr_nodes[i][0] == 0)
+				continue;
+			if (atoi(sr->sr_nodes[i]) == nid)
+				self_in_set = TRUE;
+		}
+	}
+
+	/* If we aren't in the set, delete the set */
+	if (self_in_set == FALSE) {
+		syslog(LOG_ERR, dgettext(TEXT_DOMAIN,
+		    "Removing set %s from database\n"),
+		    sr->sr_setname);
+		s_delset(sr->sr_setname, &xep);
+		if (! mdisok(&xep))
+			mdclrerror(&xep);
+		return (1);
+	}
+	return (0);
+}
+
+void
+sr_del_drv(md_set_record *sr, mddb_recid_t recid)
+{
+	mddb_userreq_t		req;
+	md_error_t		xep = mdnullerror;
+
+	if (!s_ownset(sr->sr_setno, &xep)) {
+		if (! mdisok(&xep))
+			mdclrerror(&xep);
+		goto skip;
+	}
+
+	/* delete the replicas? */
+	/* release ownership of the drive? */
+	/* NOTE: We may not have a name, so both of the above are ugly! */
+
+skip:
+	(void) memset(&req, 0, sizeof (req));
+	METAD_SETUP_DR(MD_DB_DELETE, recid)
+	if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde, NULL) != 0)
+		mdclrerror(&req.ur_mde);
+
+	dr_cache_del(sr, recid);
+}
+
+static void
+sr_drvs(md_set_record *sr)
+{
+	md_drive_record		*dr;
+	int			i;
+	int			modified = 0;
+	int			sidesok;
+	mdnm_params_t		nm;
+	static	char		device_name[MAXPATHLEN];
+	md_error_t		xep = mdnullerror;
+	md_mnnode_record	*nr;
+	md_mnset_record		*mnsr;
+
+	for (dr = sr->sr_drivechain; dr != NULL; dr = dr->dr_next) {
+		/* If we were mid-add, cleanup */
+		if ((dr->dr_flags & MD_DR_ADD)) {
+			sr_del_drv(sr, dr->dr_selfid);
+			modified++;
+			continue;
+		}
+
+		sidesok = TRUE;
+		if (MD_MNSET_REC(sr)) {
+			mnsr = (md_mnset_record *)sr;
+			nr = mnsr->sr_nodechain;
+			/*
+			 * MultiNode disksets only have entries for
+			 * their side in the local set.  Verify
+			 * that drive has a name associated with
+			 * this node's side.
+			 */
+			while (nr) {
+				/* Find my node */
+				if (strcmp(mynode(), nr->nr_nodename) != 0) {
+					nr = nr->nr_next;
+					continue;
+				}
+
+				(void) memset(&nm, '\0', sizeof (nm));
+				nm.setno = MD_LOCAL_SET;
+				nm.side = nr->nr_nodeid;
+				nm.key = dr->dr_key;
+				nm.devname = (uint64_t)device_name;
+
+				if (metaioctl(MD_IOCGET_NM, &nm, &nm.mde,
+				    NULL) != 0) {
+					if (! mdissyserror(&nm.mde, ENOENT)) {
+						mdclrerror(&nm.mde);
+						return;
+					}
+				}
+
+				/*
+				 * If entry is found for this node, then
+				 * break out of loop walking through
+				 * node list.  For a multi-node diskset,
+				 * there should only be an entry for
+				 * this node.
+				 */
+				if (nm.key != MD_KEYWILD &&
+				    ! mdissyserror(&nm.mde, ENOENT)) {
+					break;
+				}
+
+				/*
+				 * If entry is not found for this node,
+				 * then delete the drive.  No need to
+				 * continue through the node loop since
+				 * our node has already been found.
+				 */
+				sidesok = FALSE;
+				mdclrerror(&nm.mde);
+
+				/* If we are missing a sidename, cleanup */
+				sr_del_drv(sr, dr->dr_selfid);
+				modified++;
+
+				break;
+			}
+		} else  {
+			for (i = 0; i < MD_MAXSIDES; i++) {
+				/* Skip empty slots */
+				if (sr->sr_nodes[i][0] == '\0')
+					continue;
+
+				(void) memset(&nm, '\0', sizeof (nm));
+				nm.setno = MD_LOCAL_SET;
+				nm.side = i + SKEW;
+				nm.key = dr->dr_key;
+				nm.devname = (uint64_t)device_name;
+
+				if (metaioctl(MD_IOCGET_NM, &nm, &nm.mde,
+				    NULL) != 0) {
+					if (! mdissyserror(&nm.mde, ENOENT)) {
+						mdclrerror(&nm.mde);
+						return;
+					}
+				}
+
+				if (nm.key != MD_KEYWILD &&
+				    ! mdissyserror(&nm.mde, ENOENT))
+					continue;
+
+				sidesok = FALSE;
+				mdclrerror(&nm.mde);
+
+				/* If we are missing a sidename, cleanup */
+				sr_del_drv(sr, dr->dr_selfid);
+				modified++;
+
+				break;
+			}
+		}
+
+		if (sidesok == FALSE)
+			continue;
+
+		/*
+		 * If we got this far, the drive record is either in the OK
+		 * or DEL state, if it is in the DEL state and the sidenames
+		 * all checked out, then we will make it OK.
+		 */
+		if ((dr->dr_flags & MD_DR_OK))
+			continue;
+
+		dr->dr_flags = MD_DR_OK;
+
+		modified++;
+	}
+
+	if (modified) {
+		commitset(sr, FALSE, &xep);
+		if (! mdisok(&xep))
+			mdclrerror(&xep);
+	}
+}
+
+static void
+add_key_to_lst(key_lst_t **klpp, side_t side, mdkey_t key)
+{
+	key_lst_t	*klp;
+
+	assert(klpp != NULL);
+
+	for (/* void */; *klpp != NULL; klpp = &(*klpp)->kl_next)
+		/* void */;
+
+	/* allocate new list element */
+	klp = *klpp = Zalloc(sizeof (*klp));
+
+	klp->kl_side = side;
+	klp->kl_key  = key;
+}
+
+#ifdef DUMPKEYLST
+static void
+pr_key_lst(char *tag, key_lst_t *klp)
+{
+	key_lst_t	*tklp;
+
+	md_eprintf("Tag=%s\n", tag);
+	for (tklp = klp; tklp != NULL; tklp = tklp->kl_next)
+		md_eprintf("side=%d, key=%lu\n", tklp->kl_side, tklp->kl_key);
+}
+#endif	/* DUMPKEYLST */
+
+static int
+key_in_key_lst(key_lst_t *klp, side_t side, mdkey_t key)
+{
+	key_lst_t	*tklp;
+
+	for (tklp = klp; tklp != NULL; tklp = tklp->kl_next)
+		if (tklp->kl_side == side && tklp->kl_key == key)
+			return (1);
+
+	return (0);
+}
+
+static void
+destroy_key_lst(key_lst_t **klpp)
+{
+	key_lst_t	*tklp, *klp;
+
+	assert(klpp != NULL);
+
+	tklp = klp = *klpp;
+	while (klp != NULL) {
+		tklp = klp;
+		klp = klp->kl_next;
+		Free(tklp);
+	}
+	*klpp = NULL;
+}
+
+static void
+sr_sidenms(void)
+{
+	md_drive_record		*dr;
+	md_set_record		*sr;
+	key_lst_t		*use = NULL;
+	mdnm_params_t		nm;
+	int			i;
+	md_mnset_record		*mnsr;
+	md_mnnode_record	*nr;
+	side_t			myside = 0;
+
+	/*
+	 * We now go through the list of set and drive records collecting
+	 * the key/side pairs that are being used.
+	 */
+	for (sr = setrecords; sr != NULL; sr = sr->sr_next) {
+		/*
+		 * To handle the multi-node diskset case, get the sideno
+		 * associated with this node.  This sideno will be the
+		 * same across all multi-node disksets.
+		 */
+		if ((myside == 0) && (MD_MNSET_REC(sr))) {
+			mnsr = (struct md_mnset_record *)sr;
+			nr = mnsr->sr_nodechain;
+			while (nr) {
+				if (strcmp(mynode(), nr->nr_nodename) == 0) {
+					myside = nr->nr_nodeid;
+					break;
+				}
+				nr = nr->nr_next;
+			}
+			/*
+			 * If this node is not in this MNset -
+			 * then skip this set.
+			 */
+			if (!nr) {
+				continue;
+			}
+		}
+
+		for (dr = sr->sr_drivechain; dr != NULL; dr = dr->dr_next) {
+			if (MD_MNSET_REC(sr)) {
+				/*
+				 * There are no non-local sidenames in the
+				 * local set for a multi-node diskset.
+				 */
+				add_key_to_lst(&use, myside, dr->dr_key);
+			} else {
+				for (i = 0; i < MD_MAXSIDES; i++) {
+					/* Skip empty slots */
+					if (sr->sr_nodes[i][0] == '\0')
+						continue;
+
+					add_key_to_lst(&use, i + SKEW,
+						dr->dr_key);
+				}
+			}
+		}
+	}
+
+#ifdef DUMPKEYLST
+	pr_key_lst("use", use);
+#endif	/* DUMPKEYLST */
+
+	/*
+	 * We take the list above and get all non-local sidenames, checking
+	 * each to see if they are in use, if they are not used, we delete them.
+	 * Do the check for myside to cover multinode disksets.
+	 * Then do the check for MD_MAXSIDES to cover non-multinode disksets.
+	 * If any multi-node disksets were present, myside would be non-zero.
+	 * myside is the same for all multi-node disksets for this node.
+	 */
+	if (myside) {
+		(void) memset(&nm, '\0', sizeof (nm));
+		nm.setno = MD_LOCAL_SET;
+		nm.side = myside;
+		nm.key = MD_KEYWILD;
+
+		/*CONSTCOND*/
+		while (1) {
+			if (metaioctl(MD_IOCNXTKEY_NM, &nm, &nm.mde,
+			    NULL) != 0) {
+				mdclrerror(&nm.mde);
+				break;
+			}
+
+			if (nm.key == MD_KEYWILD)
+				break;
+
+			if (! key_in_key_lst(use, nm.side, nm.key)) {
+				if (metaioctl(MD_IOCREM_NM, &nm, &nm.mde,
+				    NULL) != 0) {
+					mdclrerror(&nm.mde);
+					continue;
+				}
+			}
+		}
+	}
+	/* Now handle the non-multinode disksets */
+	for (i = 0; i < MD_MAXSIDES; i++) {
+		(void) memset(&nm, '\0', sizeof (nm));
+		nm.setno = MD_LOCAL_SET;
+		nm.side = i + SKEW;
+		nm.key = MD_KEYWILD;
+
+		/*CONSTCOND*/
+		while (1) {
+			if (metaioctl(MD_IOCNXTKEY_NM, &nm, &nm.mde,
+			    NULL) != 0) {
+				mdclrerror(&nm.mde);
+				break;
+			}
+
+			if (nm.key == MD_KEYWILD)
+				break;
+
+			if (! key_in_key_lst(use, nm.side, nm.key)) {
+				if (metaioctl(MD_IOCREM_NM, &nm, &nm.mde,
+				    NULL) != 0) {
+					mdclrerror(&nm.mde);
+					continue;
+				}
+			}
+		}
+	}
+
+	/* Cleanup */
+	destroy_key_lst(&use);
+}
+
+void
+sr_validate(void)
+{
+	md_set_record			*sr;
+	md_error_t			xep = mdnullerror;
+	int				mnset_single_node;
+	md_mnnode_record		*nr;
+	md_mnset_record			*mnsr;
+
+	assert(setsnarfdone != 0);
+
+	/* We have validated the records already */
+	if (setsnarfdone == 3)
+		return;
+
+	/*
+	 * Check if we are in a single node non-SC3.x environmemnt
+	 */
+	mnset_single_node = meta_mn_singlenode();
+	/*
+	 * If a possible single_node situation, verify that all
+	 * MN disksets have only one node (which is mynode()).
+	 */
+	if (mnset_single_node) {
+		for (sr = setrecords; sr != NULL; sr = sr->sr_next) {
+			if (MD_MNSET_REC(sr)) {
+				mnsr = (struct md_mnset_record *)sr;
+				nr = mnsr->sr_nodechain;
+				/*
+				 * If next pointer is non-null (more than
+				 * one node in list) or if the single node
+				 * isn't my node - reset single node flag.
+				 */
+				if ((nr->nr_next) ||
+				    (strcmp(nr->nr_nodename, mynode()) != 0)) {
+					mnset_single_node = 0;
+					break;
+				}
+			}
+		}
+	}
+
+	for (sr = setrecords; sr != NULL; sr = sr->sr_next) {
+		/*
+		 * If a MN diskset and not in the single node
+		 * situation, then don't validate the MN set.
+		 * This is done during a reconfig cycle since all
+		 * nodes must take the same action.
+		 */
+		if (MD_MNSET_REC(sr) && (mnset_single_node == 0))
+			continue;
+
+		/* Since we do "partial" snarf's, we only check new entries */
+		if (! (sr->sr_flags & MD_SR_CHECK))
+			continue;
+
+		/* If we were mid-add, cleanup */
+		if ((sr->sr_flags & MD_SR_ADD)) {
+			s_delset(sr->sr_setname, &xep);
+			if (! mdisok(&xep))
+				mdclrerror(&xep);
+			continue;
+		}
+
+		/* Make sure we are in the set. */
+		if (sr_hosts(sr))
+			continue;
+
+		/* Check has been done, clear the flag */
+		if ((sr->sr_flags & MD_SR_CHECK))
+			sr->sr_flags &= ~MD_SR_CHECK;
+
+		/*
+		 * If we got here, we are in the set, make sure the flags make
+		 * sense.
+		 */
+		if (! (sr->sr_flags & MD_SR_OK)) {
+			sr->sr_flags &= ~MD_SR_STATE_FLAGS;
+			sr->sr_flags |= MD_SR_OK;
+			commitset(sr, FALSE, &xep);
+			if (! mdisok(&xep))
+				mdclrerror(&xep);
+		}
+
+		/* Make sure all the drives are in a stable state. */
+		sr_drvs(sr);
+	}
+
+	/* Cleanup any stray sidenames */
+	sr_sidenms();
+
+	setsnarfdone = 3;
+}
+
+static md_set_record *
+sr_in_cache(mddb_recid_t recid)
+{
+	md_set_record *tsr;
+
+	for (tsr = setrecords; tsr != NULL; tsr = tsr->sr_next)
+		if (tsr->sr_selfid == recid)
+			return (tsr);
+	return ((md_set_record *)NULL);
+}
+
+int
+set_snarf(md_error_t *ep)
+{
+	md_set_record			*sr;
+	md_mnset_record			*mnsr;
+	md_set_record			*tsr;
+	md_drive_record			*dr;
+	mddb_userreq_t			*reqp;
+	ur_recid_lst_t			*urlp;
+	mddb_recid_t			id;
+	mddb_recid_t			*p;
+	md_error_t			xep = mdnullerror;
+	md_mnnode_record		*nr;
+	mddb_set_node_params_t		snp;
+	int				nodecnt;
+	mndiskset_membershiplist_t	 *nl, *nl2;
+
+	/* We have done the snarf call */
+	if (setsnarfdone != 0)
+		return (0);
+
+	if (meta_setup_db_locations(ep) != 0) {
+		if (! mdismddberror(ep, MDE_DB_STALE))
+			return (-1);
+		mdclrerror(ep);
+	}
+
+	/*
+	 * Get membershiplist from API routine.
+	 * If there's an error, just use a NULL
+	 * nodelist.
+	 */
+	if (meta_read_nodelist(&nodecnt, &nl, ep) == -1) {
+		nodecnt = 0;  /* no nodes are alive */
+		nl = NULL;
+		mdclrerror(ep);
+	}
+
+	/* Let sr_cache_add and dr_cache_add know we are doing the snarf */
+	setsnarfdone = 1;
+
+	/* Go get the set records */
+	id = 0;
+	while ((sr = get_ur_rec(MD_LOCAL_SET, MD_UR_GET_NEXT, MDDB_UR_SR,
+							&id, ep)) != NULL) {
+		sr->sr_next = NULL;
+		sr->sr_drivechain = NULL;
+
+		/*
+		 * Cluster nodename support
+		 * Convert nodeid -> nodename
+		 * Don't do this for MN disksets since we've already stored
+		 * both the nodeid and name.
+		 */
+		if (!(MD_MNSET_REC(sr)))
+			sdssc_cm_sr_nid2nm(sr);
+
+		/* If we were mid-cvt, cleanup */
+		if (sr->sr_flags & MD_SR_CVT) {
+			/* If the daemon is calling, cleanup */
+			if (md_in_daemon)
+				url_addl(&url_tode, sr->sr_selfid);
+			continue;
+		}
+
+		if (md_in_daemon)
+			url_addl(&url_used, sr->sr_selfid);
+
+		/* Skip cached records */
+		tsr = sr_in_cache(sr->sr_selfid);
+		if (tsr != (md_set_record *)NULL) {
+			if (MD_MNSET_REC(sr)) {
+				mnsr = (struct md_mnset_record *)sr;
+				Free(mnsr);
+			} else {
+				Free(sr);
+			}
+			if (md_in_daemon)
+				for (dr = tsr->sr_drivechain;
+				    dr != (md_drive_record *)NULL;
+				    dr = dr->dr_next)
+					url_addl(&url_used, dr->dr_selfid);
+			continue;
+		}
+
+		/* Mark the record as one to be checked */
+		sr->sr_flags |= MD_SR_CHECK;
+
+		sr_cache_add(sr);
+
+		/* If MNdiskset, go get the node records */
+		if (MD_MNSET_REC(sr)) {
+			mnsr = (struct md_mnset_record *)sr;
+			mnsr->sr_nodechain = NULL;
+			p = &mnsr->sr_noderec;
+			while ((nr = get_ur_rec(MD_LOCAL_SET, MD_UR_GET_WKEY,
+					MDDB_UR_NR, p, ep)) != NULL) {
+				nr->nr_next = NULL;
+
+				if (md_in_daemon)
+					url_addl(&url_used, nr->nr_selfid);
+
+				/*
+				 * Turn off ALIVE node flag based on member
+				 * list.
+				 * If ALIVE flag is not set, reset OWN flag.
+				 * If this node is mynode, set the OWN flag
+				 * to match the ownership of the diskset.
+				 */
+				if (md_in_daemon) {
+					nr->nr_flags &= ~MD_MN_NODE_ALIVE;
+					nl2 = nl;
+					while (nl2) {
+						/*
+						 * If in member list,
+						 * set alive.
+						 */
+						if (nl2->msl_node_id ==
+						    nr->nr_nodeid) {
+							nr->nr_flags |=
+							    MD_MN_NODE_ALIVE;
+							break;
+						}
+						nl2 = nl2->next;
+					}
+					/*
+					 * If mynode is in member list, then
+					 * check to see if set is snarfed.
+					 * If set snarfed, set own flag;
+					 * otherwise reset it.
+					 * Don't change master even if
+					 * node isn't an owner node, since
+					 * node may be master, but hasn't
+					 * joined the set yet.
+					 */
+					if (nr->nr_flags & MD_MN_NODE_ALIVE) {
+					    if (strcmp(nr->nr_nodename,
+						mynode()) == 0) {
+						    if (s_ownset(
+							mnsr->sr_setno, ep)) {
+							nr->nr_flags |=
+							    MD_MN_NODE_OWN;
+						    } else {
+							nr->nr_flags &=
+							    ~MD_MN_NODE_OWN;
+						    }
+					    }
+					} else {
+					    if (strcmp(nr->nr_nodename,
+						mynode()) == 0) {
+						/*
+						 * If my node isn't in member
+						 * list then reset master.
+						 */
+						mnsr = (struct
+						    md_mnset_record *)sr;
+						mnsr->sr_master_nodeid =
+							MD_MN_INVALID_NID;
+						mnsr->sr_master_nodenm[0] =
+							'\0';
+					    }
+					    nr->nr_flags &= ~MD_MN_NODE_OWN;
+					}
+				}
+
+				/*
+				 * Must grab nr_nextrec now since
+				 * mnnr_cache_add may change it
+				 * (mnnr_cache_add is storing the nodes in
+				 * an ascending nodeid order list in order
+				 * to support reconfig).
+				 */
+				if (nr->nr_nextrec != 0)
+					p = &nr->nr_nextrec;
+				else
+					p = NULL;
+
+				mnnr_cache_add((struct md_mnset_record *)sr,
+					nr);
+
+				if ((md_in_daemon) &&
+				    (strcmp(nr->nr_nodename, mynode()) == 0)) {
+					(void) memset(&snp, 0, sizeof (snp));
+					snp.sn_nodeid = nr->nr_nodeid;
+					snp.sn_setno = mnsr->sr_setno;
+					if (metaioctl(MD_MN_SET_NODEID, &snp,
+					    &snp.sn_mde, NULL) != 0) {
+						(void) mdstealerror(ep,
+							&snp.sn_mde);
+					}
+				}
+
+				if (p == NULL)
+					break;
+			}
+			if (! mdisok(ep)) {
+				if (! mdissyserror(ep, ENOENT))
+					goto out;
+				mdclrerror(ep);
+			}
+		}
+
+		if (sr->sr_driverec == 0)
+			continue;
+
+		/* Go get the drive records */
+		p = &sr->sr_driverec;
+		while ((dr = get_ur_rec(MD_LOCAL_SET, MD_UR_GET_WKEY,
+				MDDB_UR_DR, p, ep)) != NULL) {
+			dr->dr_next = NULL;
+
+			if (md_in_daemon)
+				url_addl(&url_used, dr->dr_selfid);
+
+			dr_cache_add(sr, dr);
+
+			if (dr->dr_nextrec == 0)
+				break;
+
+			p = &dr->dr_nextrec;
+		}
+		if (! mdisok(ep)) {
+			if (! mdissyserror(ep, ENOENT))
+				goto out;
+			mdclrerror(ep);
+			/*
+			 * If dr_nextrec was not valid, or we had some
+			 * problem getting the record, we end up here.
+			 * get_ur_rec() zeroes the recid we passed in,
+			 * if we had a failure getting a record using a key,
+			 * so we simply commit the set record and valid
+			 * drive records, if this fails, we hand an error
+			 * back to the caller.
+			 */
+			commitset(sr, FALSE, ep);
+			if (! mdisok(ep))
+				goto out;
+		}
+	}
+	if (! mdisok(ep)) {
+		if (! mdissyserror(ep, ENOENT))
+			goto out;
+		mdclrerror(ep);
+	}
+
+	/*
+	 * If the daemon called, go through the USER records and cleanup
+	 * any that are not used by valid sets.
+	 */
+	if (md_in_daemon) {
+		id = 0;
+		/* Make a list of records to delete */
+		while ((reqp = get_db_rec(MD_UR_GET_NEXT, MD_LOCAL_SET,
+		    MDDB_USER, 0, &id, ep)) != NULL) {
+			if (reqp->ur_type2 != MDDB_UR_SR &&
+			    reqp->ur_type2 != MDDB_UR_DR) {
+				Free((void *)reqp->ur_data);
+				Free(reqp);
+				continue;
+			}
+			if (! url_findl(url_used, reqp->ur_recid))
+				url_addl(&url_tode, reqp->ur_recid);
+			Free((void *)reqp->ur_data);
+			Free(reqp);
+		}
+		if (! mdisok(ep)) {
+			if (! mdissyserror(ep, ENOENT))
+				goto out;
+			mdclrerror(ep);
+		}
+
+		/* Delete all the delete listed records */
+		for (urlp = url_tode; urlp != NULL; urlp = urlp->url_nx) {
+			s_delrec(urlp->url_recid, &xep);
+			if (! mdisok(&xep))
+				mdclrerror(&xep);
+		}
+	}
+
+	url_freel(&url_used);
+	url_freel(&url_tode);
+
+	if (nodecnt)
+		meta_free_nodelist(nl);
+
+	/* Mark the snarf complete */
+	setsnarfdone = 2;
+	return (0);
+
+out:
+	url_freel(&url_used);
+	url_freel(&url_tode);
+
+	sr_cache_flush(1);
+
+	if (nodecnt)
+		meta_free_nodelist(nl);
+
+	/* Snarf failed, reset state */
+	setsnarfdone = 0;
+
+	return (-1);
+}
+
+void
+sr_cache_add(md_set_record *sr)
+{
+	md_set_record *tsr;
+
+	assert(setsnarfdone != 0);
+
+	if (setrecords == NULL) {
+		setrecords = sr;
+		return;
+	}
+
+	for (tsr = setrecords; tsr->sr_next != NULL; tsr = tsr->sr_next)
+		/* void */;
+	tsr->sr_next = sr;
+}
+
+void
+sr_cache_del(mddb_recid_t recid)
+{
+	md_set_record	*sr, *tsr;
+	md_mnset_record	*mnsr;
+
+	assert(setsnarfdone != 0);
+
+	for (sr = tsr = setrecords; sr != NULL; tsr = sr, sr = sr->sr_next) {
+		if (sr->sr_selfid != recid)
+			continue;
+		if (sr == setrecords)
+			setrecords = sr->sr_next;
+		else
+			tsr->sr_next = sr->sr_next;
+		if (MD_MNSET_REC(sr)) {
+			mnsr = (struct md_mnset_record *)sr;
+			Free(mnsr);
+		} else {
+			Free(sr);
+		}
+		break;
+	}
+	if (setrecords == NULL)
+		setsnarfdone = 0;
+}
+
+void
+dr_cache_add(md_set_record *sr, md_drive_record *dr)
+{
+	md_drive_record	*tdr;
+
+	assert(setsnarfdone != 0);
+
+	assert(sr != NULL);
+
+	if (sr->sr_drivechain == NULL) {
+		sr->sr_drivechain = dr;
+		sr->sr_driverec = dr->dr_selfid;
+		return;
+	}
+
+	for (tdr = sr->sr_drivechain; tdr->dr_next != NULL; tdr = tdr->dr_next)
+		/* void */;
+
+	tdr->dr_next = dr;
+	tdr->dr_nextrec = dr->dr_selfid;
+}
+
+void
+dr_cache_del(md_set_record *sr, mddb_recid_t recid)
+{
+	md_drive_record *dr;
+	md_drive_record *tdr;
+
+	assert(setsnarfdone != 0);
+
+	assert(sr != NULL);
+
+	for (dr = tdr = sr->sr_drivechain; dr != NULL;
+	    tdr = dr, dr = dr->dr_next) {
+		if (dr->dr_selfid != recid)
+			continue;
+
+		if (dr == sr->sr_drivechain) {
+			sr->sr_drivechain = dr->dr_next;
+			sr->sr_driverec = dr->dr_nextrec;
+		} else {
+			tdr->dr_next = dr->dr_next;
+			tdr->dr_nextrec = dr->dr_nextrec;
+		}
+		Free(dr);
+		break;
+	}
+}
+
+/*
+ * Nodes must be kept in ascending node id order in order to
+ * support reconfig.
+ *
+ * This routine may change nr->nr_next and nr->nr_nextrec.
+ */
+void
+mnnr_cache_add(md_mnset_record *mnsr, md_mnnode_record *nr)
+{
+	md_mnnode_record	*tnr, *tnr_prev;
+
+	assert(mnsr != NULL);
+
+	if (mnsr->sr_nodechain == NULL) {
+		mnsr->sr_nodechain = nr;
+		mnsr->sr_noderec = nr->nr_selfid;
+		return;
+	}
+
+	/*
+	 * If new_record->nodeid < first_record->nodeid,
+	 * put new_record at beginning of list.
+	 */
+	if (nr->nr_nodeid < mnsr->sr_nodechain->nr_nodeid) {
+		nr->nr_next = mnsr->sr_nodechain;
+		nr->nr_nextrec = mnsr->sr_noderec;
+		mnsr->sr_nodechain = nr;
+		mnsr->sr_noderec = nr->nr_selfid;
+		return;
+	}
+
+	/*
+	 * Walk list looking for place to insert record.
+	 */
+
+	tnr_prev = mnsr->sr_nodechain;
+	tnr = tnr_prev->nr_next;
+	while (tnr) {
+		/* Insert new record between tnr_prev and tnr */
+		if (nr->nr_nodeid < tnr->nr_nodeid) {
+			nr->nr_next = tnr;
+			nr->nr_nextrec = tnr->nr_selfid; /* tnr's recid */
+			tnr_prev->nr_next = nr;
+			tnr_prev->nr_nextrec = nr->nr_selfid;
+			return;
+		}
+		tnr_prev = tnr;
+		tnr = tnr->nr_next;
+	}
+
+	/*
+	 * Add record to end of list.
+	 */
+	tnr_prev->nr_next = nr;
+	tnr_prev->nr_nextrec = nr->nr_selfid;
+}
+
+void
+mnnr_cache_del(md_mnset_record *mnsr, mddb_recid_t recid)
+{
+	md_mnnode_record *nr;
+	md_mnnode_record *tnr;
+
+	assert(mnsr != NULL);
+
+	tnr = 0;
+	nr = mnsr->sr_nodechain;
+	while (nr) {
+		if (nr->nr_selfid != recid) {
+			tnr = nr;
+			nr = nr->nr_next;
+			continue;
+		}
+
+		if (nr == mnsr->sr_nodechain) {
+			mnsr->sr_nodechain = nr->nr_next;
+			mnsr->sr_noderec = nr->nr_nextrec;
+		} else {
+			tnr->nr_next = nr->nr_next;
+			tnr->nr_nextrec = nr->nr_nextrec;
+		}
+		Free(nr);
+		break;
+	}
+}
+
+int
+metad_isautotakebyname(char *setname)
+{
+	md_error_t	error = mdnullerror;
+	md_set_record	*sr;
+
+	if (md_in_daemon)
+	    assert(setsnarfdone != 0);
+	else if (set_snarf(&error)) {
+	    mdclrerror(&error);
+	    return (0);
+	}
+
+	for (sr = setrecords; sr != NULL; sr = sr->sr_next) {
+	    if (strcmp(setname, sr->sr_setname) == 0) {
+		if (sr->sr_flags & MD_SR_AUTO_TAKE)
+		    return (1);
+		return (0);
+	    }
+	}
+
+	return (0);
+}
+
+int
+metad_isautotakebynum(set_t setno)
+{
+	md_error_t	error = mdnullerror;
+	md_set_record	*sr;
+
+	if (md_in_daemon)
+	    assert(setsnarfdone != 0);
+	else if (set_snarf(&error)) {
+	    mdclrerror(&error);
+	    return (0);
+	}
+
+	for (sr = setrecords; sr != NULL; sr = sr->sr_next) {
+	    if (setno == sr->sr_setno) {
+		if (sr->sr_flags & MD_SR_AUTO_TAKE)
+		    return (1);
+		return (0);
+	    }
+	}
+
+	return (0);
+}
+
+md_set_record *
+metad_getsetbyname(char *setname, md_error_t *ep)
+{
+	md_set_record	*sr;
+	char		buf[100];
+
+	assert(setsnarfdone != 0);
+
+	for (sr = setrecords; sr != NULL; sr = sr->sr_next)
+		if (strcmp(setname, sr->sr_setname) == 0)
+			return (sr);
+
+	(void) snprintf(buf, sizeof (buf), "setname \"%s\"", setname);
+	(void) mderror(ep, MDE_NO_SET, buf);
+	return (NULL);
+}
+
+md_set_record *
+metad_getsetbynum(set_t setno, md_error_t *ep)
+{
+	md_set_record	*sr;
+	char		buf[100];
+
+	if (md_in_daemon)
+		assert(setsnarfdone != 0);
+	else if (set_snarf(ep))		/* BYPASS DAEMON mode */
+		return (NULL);
+
+	for (sr = setrecords; sr != NULL; sr = sr->sr_next)
+		if (setno == sr->sr_setno)
+			return (sr);
+
+	(void) sprintf(buf, "setno %u", setno);
+	(void) mderror(ep, MDE_NO_SET, buf);
+	return (NULL);
+}
+
+
+/*
+ * Commit the set record and all of its associated records
+ * (drive records, node records for a MNset) to the local mddb.
+ */
+void
+commitset(md_set_record *sr, int inc_genid, md_error_t *ep)
+{
+	int		drc, nrc, rc;
+	int		*recs;
+	uint_t		size;
+	md_drive_record	*dr;
+	mddb_userreq_t	req;
+	md_mnset_record	*mnsr;
+	md_mnnode_record	*nr;
+
+	assert(setsnarfdone != 0);
+
+	/*
+	 * Cluster nodename support
+	 * Convert nodename -> nodeid
+	 * Don't do this for MN disksets since we've already stored
+	 * both the nodeid and name.
+	 */
+	if (!(MD_MNSET_REC(sr)))
+		sdssc_cm_sr_nm2nid(sr);
+
+	/* Send down to kernel the data in mddb USER set record */
+	if (inc_genid)
+		sr->sr_genid++;
+	(void) memset(&req, 0, sizeof (req));
+	METAD_SETUP_SR(MD_DB_SETDATA, sr->sr_selfid)
+	if (MD_MNSET_REC(sr)) {
+		req.ur_size = sizeof (*mnsr);
+	} else {
+		req.ur_size = sizeof (*sr);
+	}
+	req.ur_data = (uintptr_t)sr;
+	if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde, NULL) != 0) {
+		(void) mdstealerror(ep, &req.ur_mde);
+		return;
+	}
+
+	/*
+	 * Walk through the drive records associated with this set record
+	 * and send down to kernel the data in mddb USER drive record.
+	 */
+	drc = 0;
+	dr = sr->sr_drivechain;
+	while (dr) {
+		if (inc_genid)
+			dr->dr_genid++;
+		METAD_SETUP_DR(MD_DB_SETDATA, dr->dr_selfid)
+		req.ur_size = sizeof (*dr);
+		req.ur_data = (uintptr_t)dr;
+		if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde, NULL) != 0) {
+			(void) mdstealerror(ep, &req.ur_mde);
+			return;
+		}
+		drc++;
+		dr = dr->dr_next;
+	}
+
+
+	/*
+	 * If this set is a multi-node set -
+	 * walk through the node records associated with this set record
+	 * and send down to kernel the data in mddb USER node record.
+	 */
+	nrc = 0;
+	if (MD_MNSET_REC(sr)) {
+		mnsr = (struct md_mnset_record *)sr;
+		nr = mnsr->sr_nodechain;
+		while (nr) {
+			if (inc_genid)
+				nr->nr_genid++;
+			METAD_SETUP_NR(MD_DB_SETDATA, nr->nr_selfid)
+			req.ur_size = sizeof (*nr);
+			req.ur_data = (uint64_t)nr;
+			if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde, NULL)
+			    != 0) {
+				(void) mdstealerror(ep, &req.ur_mde);
+				return;
+			}
+			nrc++;
+			nr = nr->nr_next;
+		}
+	}
+
+	/*
+	 * Set up list of mddb USER recids containing set and drive records
+	 * and node records if a MNset.
+	 */
+	rc = 0;
+	size = (nrc + drc + 2) * sizeof (int);
+	recs = Zalloc(size);
+	/* First recid in list is the set record's id */
+	recs[rc] = sr->sr_selfid;
+	rc++;
+	dr = sr->sr_drivechain;
+	while (dr) {
+		/* Now, fill in the drive record ids */
+		recs[rc] = dr->dr_selfid;
+		dr = dr->dr_next;
+		rc++;
+	}
+	if (MD_MNSET_REC(sr)) {
+		nr = mnsr->sr_nodechain;
+		while (nr) {
+			/* If a MNset, fill in the node record ids */
+			recs[rc] = nr->nr_selfid;
+			nr = nr->nr_next;
+			rc++;
+		}
+	}
+	/* Set last record to null recid */
+	recs[rc] = 0;
+
+	/* Write out the set and drive and node records to the local mddb */
+	METAD_SETUP_UR(MD_DB_COMMIT_MANY, 0, 0);
+	req.ur_size = size;
+	req.ur_data = (uintptr_t)recs;
+	if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde, NULL) != 0) {
+		(void) mdstealerror(ep, &req.ur_mde);
+		return;
+	}
+
+	/*
+	 * Cluster nodename support
+	 * Convert nodeid -> nodename
+	 * Don't do this for MN disksets since we've already stored
+	 * both the nodeid and name.
+	 */
+	if (!(MD_MNSET_REC(sr)))
+		sdssc_cm_sr_nid2nm(sr);
+
+	Free(recs);
+}
+
+/*
+ * This routine only handles returns a md_set_record structure even
+ * if the set record describes a MN set.  This will allow pre-MN
+ * SVM RPC code to access a MN set record and to display it.
+ *
+ * The MN SVM RPC code detects if the set record returned describes
+ * a MN set and then will copy it using mnsetdup.
+ */
+md_set_record *
+setdup(md_set_record *sr)
+{
+	md_set_record		*tsr = NULL;
+	md_drive_record		**tdrpp = NULL;
+
+	if (sr && (tsr = Malloc(sizeof (*sr))) != NULL) {
+		(void) memmove(tsr, sr, sizeof (*sr));
+		tsr->sr_next = NULL;
+		tdrpp = &tsr->sr_drivechain;
+		while (*tdrpp) {
+			*tdrpp = drdup(*tdrpp);
+			tdrpp = &(*tdrpp)->dr_next;
+		}
+	}
+	return (tsr);
+}
+
+/*
+ * This routine only copies MN set records.   If a non-MN set
+ * record was passed in NULL pointer will be returned.
+ */
+md_mnset_record *
+mnsetdup(md_mnset_record *mnsr)
+{
+	md_mnset_record		*tmnsr = NULL;
+	md_drive_record		**tdrpp = NULL;
+	md_mnnode_record	**tnrpp = NULL;
+
+	if (!MD_MNSET_REC(mnsr)) {
+		return (NULL);
+	}
+
+	if (mnsr && (tmnsr = Malloc(sizeof (*mnsr))) != NULL) {
+		(void) memmove(tmnsr, mnsr, sizeof (*mnsr));
+		tmnsr->sr_next = NULL;
+		tdrpp = &tmnsr->sr_drivechain;
+		while (*tdrpp) {
+			*tdrpp = drdup(*tdrpp);
+			tdrpp = &(*tdrpp)->dr_next;
+		}
+		tnrpp = &tmnsr->sr_nodechain;
+		while (*tnrpp) {
+			*tnrpp = nrdup(*tnrpp);
+			tnrpp = &(*tnrpp)->nr_next;
+		}
+	}
+	return (tmnsr);
+}
+
+md_drive_record *
+drdup(md_drive_record *dr)
+{
+	md_drive_record		*tdr = NULL;
+
+	if (dr && (tdr = Malloc(sizeof (*dr))) != NULL)
+		(void) memmove(tdr, dr, sizeof (*dr));
+	return (tdr);
+}
+
+md_mnnode_record *
+nrdup(md_mnnode_record *nr)
+{
+	md_mnnode_record	*tnr = NULL;
+
+	if (nr && (tnr = Malloc(sizeof (*nr))) != NULL)
+		(void) memmove(tnr, nr, sizeof (*nr));
+	return (tnr);
+}
+
+/*
+ * Duplicate parts of the drive decriptor list for this node.
+ * Only duplicate the drive name string in the mddrivename structure, don't
+ * need to copy any other pointers since only interested in the flags and
+ * the drive name (i.e. other pointers will be set to NULL).
+ *	Returns NULL if failure due to Malloc failure.
+ *	Returns pointer (non-NULL) to dup'd list if successful.
+ */
+md_drive_desc *
+dd_list_dup(md_drive_desc *dd)
+{
+	md_drive_desc	*orig_dd;
+	md_drive_desc	*copy_dd = NULL, *copy_dd_prev = NULL;
+	md_drive_desc	*copy_dd_head = NULL;
+	mddrivename_t	*copy_dnp;
+	char		*copy_cname;
+	char		*copy_devid;
+
+	if (dd == NULL)
+		return (NULL);
+
+	orig_dd = dd;
+
+	while (orig_dd) {
+		copy_dd = Zalloc(sizeof (*copy_dd));
+		copy_dnp = Zalloc(sizeof (mddrivename_t));
+		copy_cname = Zalloc(sizeof (orig_dd->dd_dnp->cname));
+		if (orig_dd->dd_dnp->devid) {
+			copy_devid = Zalloc(sizeof (orig_dd->dd_dnp->devid));
+		} else {
+			copy_devid = NULL;
+		}
+		copy_dd->dd_next = NULL;
+		if ((copy_dd == NULL) || (copy_dnp == NULL) ||
+		    (copy_cname == NULL)) {
+			while (copy_dd_head) {
+				copy_dd = copy_dd_head->dd_next;
+				Free(copy_dd_head);
+				copy_dd_head = copy_dd;
+			}
+			if (copy_dnp)
+				Free(copy_dnp);
+			if (copy_dd)
+				Free(copy_dd);
+			if (copy_cname)
+				Free(copy_cname);
+			if (copy_devid)
+				Free(copy_devid);
+			return (NULL);
+		}
+		(void) memmove(copy_dd, orig_dd, sizeof (*orig_dd));
+		(void) strlcpy(copy_cname, orig_dd->dd_dnp->cname,
+		    sizeof (orig_dd->dd_dnp->cname));
+		copy_dd->dd_next = NULL;
+		copy_dd->dd_dnp = copy_dnp;
+		copy_dd->dd_dnp->cname = copy_cname;
+		if (copy_devid) {
+			(void) strlcpy(copy_devid, orig_dd->dd_dnp->devid,
+			    sizeof (orig_dd->dd_dnp->devid));
+		}
+
+		if (copy_dd_prev == NULL) {
+			copy_dd_head = copy_dd;
+			copy_dd_prev = copy_dd;
+		} else {
+			copy_dd_prev->dd_next = copy_dd;
+			copy_dd_prev = copy_dd;
+		}
+		orig_dd = orig_dd->dd_next;
+	}
+	copy_dd->dd_next = NULL;
+	return (copy_dd_head);
+}
+
+void
+sr_cache_flush(int flushnames)
+{
+	md_set_record	*sr, *tsr;
+	md_mnset_record	*mnsr;
+	md_drive_record *dr, *tdr;
+	md_mnnode_record *nr, *tnr;
+
+	sr = tsr = setrecords;
+	while (sr != NULL) {
+		dr = tdr = sr->sr_drivechain;
+		while (dr != NULL) {
+			tdr = dr;
+			dr = dr->dr_next;
+			Free(tdr);
+		}
+		tsr = sr;
+		sr = sr->sr_next;
+		if (MD_MNSET_REC(tsr)) {
+			mnsr = (struct md_mnset_record *)tsr;
+			nr = tnr = mnsr->sr_nodechain;
+			while (nr != NULL) {
+				tnr = nr;
+				nr = nr->nr_next;
+				Free(tnr);
+			}
+			Free(mnsr);
+		} else {
+			Free(tsr);
+		}
+	}
+
+	setrecords = NULL;
+
+	setsnarfdone = 0;
+
+	/* This will cause the other caches to be cleared */
+	if (flushnames)
+		metaflushnames(0);
+}
+
+void
+sr_cache_flush_setno(set_t setno)
+{
+	md_set_record	*sr, *tsr;
+	md_mnset_record	*mnsr;
+	md_drive_record *dr, *tdr;
+
+	assert(setsnarfdone != 0);
+
+	for (sr = tsr = setrecords; sr; tsr = sr, sr = sr->sr_next) {
+		if (sr->sr_setno != setno)
+			continue;
+
+		dr = tdr = sr->sr_drivechain;
+		while (dr != NULL) {
+			tdr = dr;
+			dr = dr->dr_next;
+			Free(tdr);
+		}
+		if (sr == setrecords)
+			setrecords = sr->sr_next;
+		else
+			tsr->sr_next = sr->sr_next;
+		if (MD_MNSET_REC(sr)) {
+			mnsr = (struct md_mnset_record *)sr;
+			Free(mnsr);
+		} else {
+			Free(sr);
+		}
+		break;
+	}
+
+	setsnarfdone = 0;
+
+	/* This will cause the other caches to be cleared */
+	metaflushnames(0);
+}
+
+int
+s_ownset(set_t setno, md_error_t *ep)
+{
+	mddb_ownset_t		ownset_arg;
+
+	ownset_arg.setno = setno;
+	ownset_arg.owns_set = MD_SETOWNER_NONE;
+
+	if (metaioctl(MD_DB_OWNSET, &ownset_arg, ep, NULL) != 0)
+		return (0);
+
+	return (ownset_arg.owns_set);
+}
+
+void
+s_delset(char *setname, md_error_t *ep)
+{
+	md_set_record		*sr;
+	md_set_record		*tsr;
+	md_drive_record		*dr;
+	md_drive_record		*tdr;
+	md_mnnode_record	*nr, *tnr;
+	mddb_userreq_t		req;
+	char			stringbuf[100];
+	int			i;
+	mdsetname_t		*sp = NULL;
+	mddrivename_t		*dn = NULL;
+	mdname_t		*np = NULL;
+	md_dev64_t		dev;
+	side_t			myside = MD_SIDEWILD;
+	md_error_t		xep = mdnullerror;
+	md_mnset_record		*mnsr;
+	int			num_sets = 0;
+	int			num_mn_sets = 0;
+
+	(void) memset(&req, 0, sizeof (mddb_userreq_t));
+
+	if ((sr = getsetbyname(setname, ep)) == NULL)
+		return;
+
+	sp = metasetnosetname(sr->sr_setno, &xep);
+	mdclrerror(&xep);
+
+	if (MD_MNSET_REC(sr)) {
+		/*
+		 * If this node is a set owner, halt the set before
+		 * deleting the set records.  Ignore any errors since
+		 * s_ownset and halt_set could fail if panic had occurred
+		 * during the add/delete of a node.
+		 */
+		if (s_ownset(sr->sr_setno, &xep)) {
+			mdclrerror(&xep);
+			if (halt_set(sp, &xep))
+				mdclrerror(&xep);
+		}
+	}
+
+	(void) snprintf(stringbuf, sizeof (stringbuf), "/dev/md/%s", setname);
+	(void) unlink(stringbuf);
+	(void) unlink(meta_lock_name(sr->sr_setno));
+
+	if (MD_MNSET_REC(sr)) {
+		mnsr = (struct md_mnset_record *)sr;
+		nr = mnsr->sr_nodechain;
+		while (nr) {
+			/* Setting myside for later use */
+			if (strcmp(mynode(), nr->nr_nodename) == 0)
+				myside = nr->nr_nodeid;
+
+			(void) memset(&req, 0, sizeof (req));
+			METAD_SETUP_NR(MD_DB_DELETE, nr->nr_selfid)
+			if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde,
+			    NULL) != 0) {
+				(void) mdstealerror(ep, &req.ur_mde);
+				free_sr(sr);
+				return;
+			}
+			tnr = nr;
+			nr = nr->nr_next;
+
+			SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_REMOVE, SVM_TAG_HOST,
+			    sr->sr_setno, tnr->nr_nodeid);
+
+			mnnr_cache_del((struct md_mnset_record *)sr,
+			    tnr->nr_selfid);
+		}
+	} else {
+		for (i = 0; i < MD_MAXSIDES; i++) {
+			/* Skip empty slots */
+			if (sr->sr_nodes[i][0] == '\0')
+				continue;
+
+			if (strcmp(mynode(), sr->sr_nodes[i]) == 0)
+				myside = i;
+
+			SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_REMOVE, SVM_TAG_HOST,
+			    sr->sr_setno, i);
+		}
+	}
+
+	dr = sr->sr_drivechain;
+	while (dr) {
+		(void) memset(&req, 0, sizeof (req));
+		METAD_SETUP_DR(MD_DB_DELETE, dr->dr_selfid)
+		if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde, NULL) != 0) {
+			(void) mdstealerror(ep, &req.ur_mde);
+			free_sr(sr);
+			return;
+		}
+		tdr = dr;
+		dr = dr->dr_next;
+
+		dev = NODEV64;
+		if (myside != MD_SIDEWILD && sp != NULL) {
+			dn = metadrivename_withdrkey(sp, myside,
+			    tdr->dr_key, MD_BASICNAME_OK, &xep);
+			if (dn != NULL) {
+				uint_t	rep_slice;
+
+				np = NULL;
+				if (meta_replicaslice(dn, &rep_slice,
+				    &xep) == 0) {
+					np = metaslicename(dn, rep_slice, &xep);
+				}
+
+				if (np != NULL)
+					dev = np->dev;
+				else
+					mdclrerror(&xep);
+			} else
+				mdclrerror(&xep);
+		} else
+			mdclrerror(&xep);
+
+		SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_REMOVE, SVM_TAG_DRIVE,
+		    sr->sr_setno, dev);
+		SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_ADD, SVM_TAG_DRIVE,
+		    MD_LOCAL_SET, dev);
+
+		dr_cache_del(sr, tdr->dr_selfid);
+
+	}
+
+	(void) memset(&req, 0, sizeof (req));
+	METAD_SETUP_SR(MD_DB_DELETE, sr->sr_selfid)
+	if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde, NULL) != 0) {
+		(void) mdstealerror(ep, &req.ur_mde);
+		free_sr(sr);
+		return;
+	}
+
+	SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_DELETE, SVM_TAG_SET, sr->sr_setno,
+	    NODEV64);
+
+	for (tsr = setrecords; tsr; tsr = tsr->sr_next) {
+		if (tsr == sr)
+			continue;
+
+		num_sets++;
+		if (MD_MNSET_REC(tsr))
+			num_mn_sets++;
+	}
+
+	if (num_mn_sets == 0)
+		(void) meta_smf_disable(META_SMF_MN_DISKSET, &xep);
+
+	/* The set we just deleted is the only one left */
+	if (num_sets == 0)
+		(void) meta_smf_disable(META_SMF_DISKSET, &xep);
+
+	sr_cache_del(sr->sr_selfid);
+	free_sr(sr);
+
+}
+
+void
+s_delrec(mddb_recid_t recid, md_error_t *ep)
+{
+	mddb_userreq_t		req;
+
+	(void) memset(&req, 0, sizeof (req));
+
+	METAD_SETUP_SR(MD_DB_DELETE, recid)
+
+	if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde, NULL) != 0)
+		(void) mdstealerror(ep, &req.ur_mde);
+}
+
+/*
+ * resnarf the imported set
+ */
+int
+resnarf_set(
+	set_t			setno,
+	md_error_t		*ep
+)
+{
+	md_set_record	*sr;
+	md_drive_record	*dr;
+	mddb_recid_t	id, *p;
+
+	if (meta_setup_db_locations(ep) != 0) {
+		if (! mdismddberror(ep, MDE_DB_STALE))
+			return (-1);
+		mdclrerror(ep);
+	}
+
+	setsnarfdone = 1;
+
+	id = 0;
+	while ((sr = get_ur_rec(MD_LOCAL_SET, MD_UR_GET_NEXT, MDDB_UR_SR, &id,
+	    ep)) != NULL) {
+
+		if (sr->sr_setno != setno)
+			continue;
+
+		/* Don't allow resnarf of a multi-node diskset */
+		if (MD_MNSET_REC(sr))
+			goto out;
+
+		sr->sr_next = NULL;
+		sr->sr_drivechain = NULL;
+
+		if (md_in_daemon)
+			url_addl(&url_used, sr->sr_selfid);
+
+		sr->sr_flags |= MD_SR_CHECK;
+
+		sr_cache_add(sr);
+
+		if (sr->sr_driverec == 0)
+			break;
+
+		p = &sr->sr_driverec;
+		while ((dr = get_ur_rec(MD_LOCAL_SET, MD_UR_GET_WKEY,
+		    MDDB_UR_DR, p, ep)) != NULL) {
+			dr->dr_next = NULL;
+
+			if (md_in_daemon)
+				url_addl(&url_used, dr->dr_selfid);
+
+			dr_cache_add(sr, dr);
+
+			if (dr->dr_nextrec == 0)
+				break;
+
+			p = &dr->dr_nextrec;
+		}
+		if (! mdisok(ep)) {
+			if (! mdissyserror(ep, ENOENT))
+				goto out;
+			mdclrerror(ep);
+			commitset(sr, FALSE, ep);
+			if (! mdisok(ep))
+				goto out;
+		}
+	}
+	if (! mdisok(ep)) {
+		if (! mdissyserror(ep, ENOENT))
+			goto out;
+		mdclrerror(ep);
+	}
+
+	setsnarfdone = 2;
+
+	url_freel(&url_used);
+	url_freel(&url_tode);
+	return (0);
+
+out:
+	url_freel(&url_used);
+	url_freel(&url_tode);
+
+	sr_cache_flush(1);
+
+	setsnarfdone = 0;
+
+	return (-1);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_mh.c b/usr/src/lib/lvm/libmeta/common/meta_mh.c
new file mode 100644
index 0000000000..ba0ce10656
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_mh.c
@@ -0,0 +1,842 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+/*
+ * Just in case we're not in a build environment, make sure that
+ * TEXT_DOMAIN gets set to something.
+ */
+#if !defined(TEXT_DOMAIN)
+#define	TEXT_DOMAIN "SYS_TEST"
+#endif
+
+/*
+ * MH ioctl functions
+ */
+
+#include <meta.h>
+#include <metamhd.h>
+#include <string.h>
+
+#include "meta_runtime.h"
+
+#define	DEFAULTDEV "/dev/rdsk"
+/*
+ * default timeout values
+ */
+mhd_mhiargs_t	defmhiargs = {
+	1000,			/* failfast */
+	{ 6000, 6000, 30000 }	/* take ownership */
+};
+
+/* RPC timeouts */
+static md_timeval32_t	tk_own_timeout  = { 24 * 60 * 60, 0 };	/* 1 day */
+static md_timeval32_t	rel_own_timeout = { 24 * 60 * 60, 0 };	/* 1 day */
+
+/*
+ * RPC handle
+ */
+typedef struct {
+	char	*hostname;
+	CLIENT	*clientp;
+} mhd_handle_t;
+
+/*
+ * close RPC connection
+ */
+static void
+close_metamhd(
+	mhd_handle_t	*hp
+)
+{
+	assert(hp != NULL);
+	if (hp->hostname != NULL) {
+		Free(hp->hostname);
+	}
+	if (hp->clientp != NULL) {
+		auth_destroy(hp->clientp->cl_auth);
+		clnt_destroy(hp->clientp);
+	}
+	Free(hp);
+}
+
+/*
+ * open RPC connection to rpc.metamhd
+ */
+static mhd_handle_t *
+open_metamhd(
+	char		*hostname,
+	md_error_t	*ep
+)
+{
+	CLIENT		*clientp;
+	mhd_handle_t	*hp;
+
+	/* default to local host */
+	if ((hostname == NULL) || (*hostname == '\0'))
+		hostname = mynode();
+
+	/* open RPC connection */
+	assert(hostname != NULL);
+	if ((clientp = meta_client_create(hostname, METAMHD, METAMHD_VERSION,
+	    "tcp")) == NULL) {
+		clnt_pcreateerror(hostname);
+		(void) mdrpccreateerror(ep, hostname, "metamhd clnt_create");
+		return (NULL);
+	} else {
+		auth_destroy(clientp->cl_auth);
+		clientp->cl_auth = authsys_create_default();
+		assert(clientp->cl_auth != NULL);
+	}
+
+	/* return connection */
+	hp = Zalloc(sizeof (*hp));
+	hp->hostname = Strdup(hostname);
+	hp->clientp = clientp;
+	return (hp);
+}
+
+/*
+ * steal and convert mherror_t
+ */
+int
+mhstealerror(
+	mhd_error_t	*mhep,
+	md_error_t	*ep
+)
+{
+	int		rval = -1;
+
+	/* no error */
+	if (mhep->errnum == 0) {
+		/* assert(mhep->name == NULL); */
+		rval = 0;
+		goto out;
+	}
+
+	/* steal error */
+	switch (mhep->errnum) {
+	case MHD_E_MAJORITY:
+		(void) mderror(ep, MDE_TAKE_OWN, mhep->name);
+		break;
+	case MHD_E_RESERVED:
+		(void) mderror(ep, MDE_RESERVED, mhep->name);
+		break;
+	default:
+		(void) mdsyserror(ep, mhep->errnum, mhep->name);
+		break;
+	}
+
+	/* cleanup, return success */
+out:
+	if (mhep->name != NULL)
+		Free(mhep->name);
+	(void) memset(mhep, 0, sizeof (*mhep));
+	return (rval);
+}
+
+/*
+ * should we do MHIOCTLs ?
+ */
+static int
+do_mhioctl()
+{
+	if (getenv("MD_NOMHIOCTL") != NULL) {
+		(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
+		    "NOT doing MH ioctls\n"));
+		(void) fflush(stderr);
+		return (0);
+	}
+	return (1);
+}
+
+/*
+ * take ownership of drives
+ */
+int
+meta_take_own(
+	char			*sname,
+	mddrivenamelist_t	*dnlp,
+	mhd_mhiargs_t		*mhiargsp,
+	int			partial_set,
+	md_error_t		*ep
+)
+{
+	mddrivenamelist_t	*p;
+	uint_t			ndev = 0;
+	mhd_tkown_args_t	args;
+	mhd_error_t		mherror;
+	mhd_set_t		*mhsp = &args.set;
+	uint_t			i;
+	char			*e;
+	mhd_handle_t		*hp = NULL;
+	int			rval = -1;
+
+	/*
+	 * RFE 4126509.  Check the runtime parameters to see if
+	 * they're set to disable MHIOCTKOWN ioctl() operations
+	 * on the disks.  If so, return immediately without
+	 * performing the operations.
+	 */
+
+	if (do_owner_ioctls() == B_FALSE) {
+		return (0);
+	}
+
+	/* count drives, get set */
+	for (p = dnlp; (p != NULL); p = p->next)
+		++ndev;
+	if (ndev == 0)
+		return (0);
+
+	/* initialize */
+	(void) memset(&args, 0, sizeof (args));
+	(void) memset(&mherror, 0, sizeof (mherror));
+
+	/* build arguments */
+	mhsp->setname = Strdup(sname);
+	mhsp->drives.drives_len = ndev;
+	mhsp->drives.drives_val
+	    = Calloc(ndev, sizeof (*mhsp->drives.drives_val));
+	for (p = dnlp, i = 0; (i < ndev); p = p->next, ++i) {
+		mhsp->drives.drives_val[i] = Strdup(p->drivenamep->rname);
+	}
+	args.timeouts = *mhiargsp;
+	args.ff_mode = MHD_FF_DRIVER;
+	if (((e = getenv("MD_DEBUG")) != NULL) &&
+	    ((e = strstr(e, "FAILFAST=")) != NULL) &&
+	    ((e = strchr(e, '=')) != NULL)) {
+		++e;
+		if (strcmp(e, "NONE") == 0)
+			args.ff_mode = MHD_FF_NONE;
+		else if (strcmp(e, "DRIVER") == 0)
+			args.ff_mode = MHD_FF_DRIVER;
+		else if (strcmp(e, "DEBUG") == 0)
+			args.ff_mode = MHD_FF_DEBUG;
+		else if (strcmp(e, "HALT") == 0)
+			args.ff_mode = MHD_FF_HALT;
+		else if (strcmp(e, "PANIC") == 0)
+			args.ff_mode = MHD_FF_PANIC;
+	}
+	if (partial_set)
+		args.options |= MHD_PARTIAL_SET;
+	if (((e = getenv("MD_DEBUG")) != NULL) &&
+	    (strstr(e, "NOTHREAD") != NULL)) {
+		args.options |= MHD_SERIAL;
+	}
+
+	/* open connection */
+	if ((hp = open_metamhd(NULL, ep)) == NULL)
+		return (-1);
+	clnt_control(hp->clientp, CLSET_TIMEOUT, (char *)&tk_own_timeout);
+
+	/* take ownership */
+	if (mhd_tkown_1(&args, &mherror, hp->clientp) != RPC_SUCCESS) {
+		(void) mdrpcerror(ep, hp->clientp, hp->hostname,
+		    "metamhd tkown");
+	} else if (mhstealerror(&mherror, ep) == 0) {
+		rval = 0;	/* success */
+	}
+
+	/* cleanup, return success */
+out:
+	xdr_free(xdr_mhd_tkown_args_t, (char *)&args);
+	xdr_free(xdr_mhd_error_t, (char *)&mherror);
+	if (hp != NULL)
+		close_metamhd(hp);
+	return (rval);
+}
+
+/*
+ * take ownership of drives
+ */
+int
+tk_own_bydd(
+	mdsetname_t		*sp,
+	md_drive_desc		*ddlp,
+	mhd_mhiargs_t		*mhiargsp,
+	int			partial_set,
+	md_error_t		*ep
+)
+{
+	mddrivenamelist_t	*dnlp = NULL;
+	mddrivenamelist_t	**tailpp = &dnlp;
+	md_drive_desc		*p;
+	int			rval;
+
+	/*
+	 * Add the drivename struct to the end of the
+	 * drivenamelist but keep a pointer to the last
+	 * element so that we don't incur the overhead
+	 * of traversing the list each time
+	 */
+	for (p = ddlp; (p != NULL); p = p->dd_next)
+		tailpp = meta_drivenamelist_append_wrapper(tailpp, p->dd_dnp);
+
+	/* take ownership */
+	rval = meta_take_own(sp->setname, dnlp, mhiargsp, partial_set, ep);
+
+	/* cleanup, return success */
+	metafreedrivenamelist(dnlp);
+	return (rval);
+}
+
+/*
+ * release ownership of drives
+ */
+int
+meta_rel_own(
+	char			*sname,
+	mddrivenamelist_t	*dnlp,
+	int			partial_set,
+	md_error_t		*ep
+)
+{
+	mddrivenamelist_t	*p;
+	uint_t			ndev = 0;
+	mhd_relown_args_t	args;
+	mhd_error_t		mherror;
+	mhd_set_t		*mhsp = &args.set;
+	uint_t			i;
+	char			*e;
+	mhd_handle_t		*hp = NULL;
+	int			rval = -1;
+
+	/*
+	 * RFE 4126509.  Check the runtime parameters to see if
+	 * they're set to disable MHIOCRELEASE and MHIOCENFAILFAST
+	 * ioctl() operations on the disks.  If so, return
+	 * immediately without performing the operations.
+	 */
+
+	if (do_owner_ioctls() == B_FALSE) {
+		return (0);
+	}
+
+	/*
+	 * if not doing ioctls (HK 98/10/28: the following code tests
+	 * an environment variable, and was apparently inserted to
+	 * make testing easier.)
+	 */
+
+	if (! do_mhioctl())
+		return (0);
+
+	/* count drives, get set */
+	for (p = dnlp; (p != NULL); p = p->next)
+		++ndev;
+	if (ndev == 0)
+		return (0);
+
+	/* initialize */
+	(void) memset(&args, 0, sizeof (args));
+	(void) memset(&mherror, 0, sizeof (mherror));
+
+	/* build arguments */
+	mhsp->setname = Strdup(sname);
+	mhsp->drives.drives_len = ndev;
+	mhsp->drives.drives_val
+	    = Calloc(ndev, sizeof (*mhsp->drives.drives_val));
+	for (p = dnlp, i = 0; (i < ndev); p = p->next, ++i) {
+		mhsp->drives.drives_val[i] = Strdup(p->drivenamep->rname);
+	}
+	if (partial_set)
+		args.options |= MHD_PARTIAL_SET;
+	if (((e = getenv("MD_DEBUG")) != NULL) &&
+	    (strstr(e, "NOTHREAD") != NULL)) {
+		args.options |= MHD_SERIAL;
+	}
+
+	/* open connection */
+	if ((hp = open_metamhd(NULL, ep)) == NULL)
+		return (-1);
+	clnt_control(hp->clientp, CLSET_TIMEOUT, (char *)&rel_own_timeout);
+
+	/* take ownership */
+	if (mhd_relown_1(&args, &mherror, hp->clientp) != RPC_SUCCESS) {
+		(void) mdrpcerror(ep, hp->clientp, hp->hostname,
+		    "metamhd relown");
+	} else if (mhstealerror(&mherror, ep) == 0) {
+		rval = 0;	/* success */
+	}
+
+	/* cleanup, return success */
+out:
+	xdr_free(xdr_mhd_relown_args_t, (char *)&args);
+	xdr_free(xdr_mhd_error_t, (char *)&mherror);
+	if (hp != NULL)
+		close_metamhd(hp);
+	return (rval);
+}
+
+/*
+ * release ownership of drives
+ */
+int
+rel_own_bydd(
+	mdsetname_t		*sp,
+	md_drive_desc		*ddlp,
+	int			partial_set,
+	md_error_t		*ep
+)
+{
+	mddrivenamelist_t	*dnlp = NULL;
+	mddrivenamelist_t	**tailpp = &dnlp;
+	md_drive_desc		*p;
+	int			rval;
+
+	/*
+	 * Add the drivename struct to the end of the
+	 * drivenamelist but keep a pointer to the last
+	 * element so that we don't incur the overhead
+	 * of traversing the list each time
+	 */
+	for (p = ddlp; (p != NULL); p = p->dd_next)
+		tailpp = meta_drivenamelist_append_wrapper(tailpp, p->dd_dnp);
+
+	/* release ownership */
+	rval = meta_rel_own(sp->setname, dnlp, partial_set, ep);
+
+	/* cleanup, return success */
+	metafreedrivenamelist(dnlp);
+	return (rval);
+}
+
+/*
+ * get status of drives
+ */
+int
+meta_status_own(
+	char			*sname,
+	md_disk_status_list_t	*dslp,
+	int			partial_set,
+	md_error_t		*ep
+)
+{
+	md_disk_status_list_t	*p;
+	uint_t			ndev = 0;
+	mhd_status_args_t	args;
+	mhd_status_res_t	results;
+	mhd_error_t		*mhep = &results.status;
+	mhd_set_t		*mhsp = &args.set;
+	uint_t			i;
+	char			*e;
+	mhd_handle_t		*hp = NULL;
+	int			rval = -1;
+
+	/* if not doing ioctls */
+	if (! do_mhioctl())
+		return (0);
+
+	/* count drives, get set */
+	for (p = dslp; (p != NULL); p = p->next)
+		++ndev;
+	if (ndev == 0)
+		return (0);
+
+	/* initialize */
+	(void) memset(&args, 0, sizeof (args));
+	(void) memset(&results, 0, sizeof (results));
+
+	/* build arguments */
+	mhsp->setname = Strdup(sname);
+	mhsp->drives.drives_len = ndev;
+	mhsp->drives.drives_val
+	    = Calloc(ndev, sizeof (*mhsp->drives.drives_val));
+	for (p = dslp, i = 0; (i < ndev); p = p->next, ++i) {
+		mhsp->drives.drives_val[i] = Strdup(p->drivenamep->rname);
+	}
+	if (partial_set)
+		args.options |= MHD_PARTIAL_SET;
+	if (((e = getenv("MD_DEBUG")) != NULL) &&
+	    (strstr(e, "NOTHREAD") != NULL)) {
+		args.options |= MHD_SERIAL;
+	}
+
+	/* open connection */
+	if ((hp = open_metamhd(NULL, ep)) == NULL)
+		return (-1);
+	clnt_control(hp->clientp, CLSET_TIMEOUT, (char *)&tk_own_timeout);
+
+	/* get status */
+	if (mhd_status_1(&args, &results, hp->clientp) != RPC_SUCCESS) {
+		(void) mdrpcerror(ep, hp->clientp, hp->hostname,
+		    dgettext(TEXT_DOMAIN, "metamhd status"));
+		goto out;
+	} else if (mhstealerror(mhep, ep) != 0) {
+		goto out;
+	}
+
+	/* do something with it */
+	assert(results.results.results_len == ndev);
+	for (p = dslp, i = 0; (i < ndev); p = p->next, ++i) {
+		mhd_drive_status_t	*resp = &results.results.results_val[i];
+		mddrivename_t		*dp = p->drivenamep;
+		mhd_error_t		mherror;
+
+		/* make sure we have the right drive */
+		assert(strcmp(dp->rname, resp->drive) == 0);
+
+		/* copy status */
+		if (resp->errnum != 0) {
+			(void) memset(&mherror, 0, sizeof (mherror));
+			mherror.errnum = resp->errnum;
+			mherror.name = Strdup(resp->drive);
+			(void) mhstealerror(&mherror, &p->status);
+		}
+	}
+	rval = 0;		/* success */
+
+	/* cleanup, return success */
+out:
+	xdr_free(xdr_mhd_status_args_t, (char *)&args);
+	xdr_free(xdr_mhd_status_res_t, (char *)&results);
+	if (hp != NULL)
+		close_metamhd(hp);
+	return (rval);
+}
+
+/*
+ * build disk status list from drivename list
+ */
+md_disk_status_list_t *
+meta_drive_to_disk_status_list(
+	mddrivenamelist_t	*dnlp
+)
+{
+	md_disk_status_list_t	*head = NULL;
+	md_disk_status_list_t	**tailp = &head;
+	mddrivenamelist_t	*p;
+
+	/* copy list */
+	for (p = dnlp; (p != NULL); p = p->next) {
+		md_disk_status_list_t	*dsp;
+
+		dsp = *tailp = Zalloc(sizeof (*dsp));
+		tailp = &dsp->next;
+		dsp->drivenamep = p->drivenamep;
+	}
+
+	/* return list */
+	return (head);
+}
+
+/*
+ * free disk status list
+ */
+void
+meta_free_disk_status_list(
+	md_disk_status_list_t	*dslp
+)
+{
+	md_disk_status_list_t	*next = NULL;
+
+	for (/* void */; (dslp != NULL); dslp = next) {
+		next = dslp->next;
+		mdclrerror(&dslp->status);
+		Free(dslp);
+	}
+}
+
+/*
+ * free drive info list
+ */
+void
+meta_free_drive_info_list(
+	mhd_drive_info_list_t	*listp
+)
+{
+	xdr_free(xdr_mhd_drive_info_list_t, (char *)listp);
+	(void) memset(listp, 0, sizeof (*listp));
+}
+
+/*
+ * sort drive info list
+ */
+static int
+compare_drives(
+	const void		*p1,
+	const void		*p2
+)
+{
+	const mhd_drive_info_t	*di1 = p1;
+	const mhd_drive_info_t	*di2 = p2;
+	const char		*n1 = di1->dif_name;
+	const char		*n2 = di2->dif_name;
+	uint_t			c1 = 0, t1 = 0, d1 = 0, s1 = 0;
+	uint_t			c2 = 0, t2 = 0, d2 = 0, s2 = 0;
+	uint_t			l, cl;
+
+	if (n1 == NULL)
+		n1 = "";
+	if (n2 == NULL)
+		n2 = "";
+
+	/* attempt to sort correctly for c0t1d0s0 .vs. c0t18d0s0 */
+	if ((n1 = strrchr(n1, '/')) == NULL)
+		goto u;
+	n1 += (n1[1] != 'c') ? 2 : 1;
+	cl = strlen(n1);
+	if ((sscanf(n1, "c%ut%ud%us%u%n", &c1, &t1, &d1, &s1, &l) != 4 &&
+	    sscanf(n1, "c%ud%us%u%n", &c1, &d1, &s1, &l) != 3 &&
+	    sscanf(n1, "c%ut%ud%u%n", &c1, &t1, &d1, &l) != 3 &&
+	    sscanf(n1, "c%ud%u%n", &c1, &d1, &l) != 2) || (l != cl))
+		goto u;
+
+	if ((n2 = strrchr(n2, '/')) == NULL)
+		goto u;
+	n2 += (n2[1] != 'c') ? 2 : 1;
+	cl = strlen(n2);
+	if ((sscanf(n2, "c%ut%ud%us%u%n", &c2, &t2, &d2, &s2, &l) != 4 &&
+	    sscanf(n2, "c%ud%us%u%n", &c2, &d2, &s2, &l) != 3 &&
+	    sscanf(n2, "c%ut%ud%u%n", &c2, &t2, &d2, &l) != 3 &&
+	    sscanf(n2, "c%ud%u%n", &c2, &d2, &l) != 2) || (l != cl))
+		goto u;
+	if (c1 != c2)
+		return ((c1 > c2) ? 1 : -1);
+	if (t1 != t2)
+		return ((t1 > t2) ? 1 : -1);
+	if (d1 != d2)
+		return ((d1 > d2) ? 1 : -1);
+	if (s1 != s2)
+		return ((s1 > s2) ? 1 : -1);
+	return (0);
+
+u:	return (strcmp(di1->dif_name, di2->dif_name));
+}
+
+static void
+sort_drives(
+	mhd_drive_info_list_t	*listp
+)
+{
+	qsort(listp->mhd_drive_info_list_t_val,
+	    listp->mhd_drive_info_list_t_len,
+	    sizeof (*listp->mhd_drive_info_list_t_val),
+	    compare_drives);
+}
+
+/*
+ * return list of all drives
+ */
+int
+meta_list_drives(
+	char			*hostname,
+	char			*path,
+	mhd_did_flags_t		flags,
+	mhd_drive_info_list_t	*listp,
+	md_error_t		*ep
+)
+{
+	mhd_list_args_t		args;
+	mhd_list_res_t		results;
+	mhd_error_t		*mhep = &results.status;
+	mhd_handle_t		*hp = NULL;
+	int			rval = -1;
+
+	/* if not doing ioctls */
+	if (! do_mhioctl())
+		return (0);
+
+	/* initialize */
+	(void) memset(&args, 0, sizeof (args));
+	(void) memset(&results, 0, sizeof (results));
+
+	/* build arguments */
+	if (path == NULL)
+		path = getenv("MD_DRIVE_ROOT");
+	if ((path != NULL) && (*path != '\0'))
+		args.path = Strdup(path);
+	args.flags = flags;
+
+	/* open connection */
+	if ((hp = open_metamhd(hostname, ep)) == NULL)
+		return (-1);
+	clnt_control(hp->clientp, CLSET_TIMEOUT, (char *)&tk_own_timeout);
+
+	/* get list */
+	if (mhd_list_1(&args, &results, hp->clientp) != RPC_SUCCESS) {
+		(void) mdrpcerror(ep, hp->clientp, hp->hostname,
+		    dgettext(TEXT_DOMAIN, "metamhd list"));
+		goto out;
+	} else if (mhstealerror(mhep, ep) != 0) {
+		goto out;
+	}
+
+	/* sort list */
+	sort_drives(&results.results);
+
+	/* steal list */
+	*listp = results.results;
+	results.results.mhd_drive_info_list_t_len = 0;
+	results.results.mhd_drive_info_list_t_val = NULL;
+	rval = listp->mhd_drive_info_list_t_len;	/* success */
+
+	/* cleanup, return success */
+out:
+	xdr_free(xdr_mhd_list_args_t, (char *)&args);
+	xdr_free(xdr_mhd_list_res_t, (char *)&results);
+	if (hp != NULL)
+		close_metamhd(hp);
+	return (rval);
+}
+
+static void
+load_paths_to_metamhd()
+{
+	FILE			*cfp;		/* config file pointer */
+	char			buf[BUFSIZ],
+				*p,
+				*x;
+	mhd_drive_info_list_t	list;
+	md_error_t		ep;
+	mhd_did_flags_t		flags = MHD_DID_SERIAL;
+
+	if ((cfp = fopen(METADEVPATH, "r")) != NULL) {
+		/*
+		 * Read each line from the file. Lines will be either
+		 * comments or path names to pass to rpc.metamhd. If
+		 * path names check to see if their a colon seperate
+		 * list of names which must be processed one at a time.
+		 */
+
+		while (fgets(buf, BUFSIZ, cfp) != NULL) {
+			if (buf[0] == '#') {
+				/*
+				 * Ignore comment lines
+				 */
+				continue;
+
+			} else if (strchr(buf, ':') != NULL) {
+				p = buf;
+				while ((x = strchr(p, ':')) != NULL) {
+					*x = '\0';
+					(void) memset(&ep, '\0', sizeof (ep));
+					(void) meta_list_drives(NULL, p, 0,
+					    &list, &ep);
+					meta_free_drive_info_list(&list);
+					p = x + 1;
+				}
+				/*
+				 * We won't pick up the last path name
+				 * because the line ends with a newline
+				 * not a ':'. So p will still point to
+				 * a valid path in this case. Copy the
+				 * data that p points to to the beginning
+				 * of the buf and let the default case
+				 * handle this buffer.
+				 * NOTE:
+				 * If the file does end with a ":\n", p at
+				 * will point to the newline. The default
+				 * cause would then set the newline to a
+				 * NULL which is okay because meta_list_drives
+				 * interprets a null string as /dev/rdsk.
+				 */
+				(void) memcpy(buf, p, strlen(p));
+			}
+			/*
+			 * Remove any newlines in the buffer.
+			 */
+			if ((p = strchr(buf, '\n')) != NULL)
+				*p = '\0';
+			(void) memset(&ep, '\0', sizeof (ep));
+			(void) memset(&list, '\0', sizeof (list));
+			(void) meta_list_drives(NULL, buf, flags, &list, &ep);
+			meta_free_drive_info_list(&list);
+		}
+		(void) fclose(cfp);
+	}
+}
+
+/*
+ * build list of all drives in set
+ */
+/*ARGSUSED*/
+int
+meta_get_drive_names(
+	mdsetname_t		*sp,
+	mddrivenamelist_t	**dnlpp,
+	int			options,
+	md_error_t		*ep
+)
+{
+	mhd_did_flags_t		flags = MHD_DID_SERIAL;
+	mhd_drive_info_list_t	list;
+	mhd_drive_info_t	*mp;
+	uint_t			i;
+	unsigned		cnt = 0;
+	int			rval = -1;
+	mddrivenamelist_t	**tailpp = dnlpp;
+
+	/* must have a set */
+	assert(sp != NULL);
+
+	load_paths_to_metamhd();
+	(void) memset(&list, 0, sizeof (list));
+	if ((meta_list_drives(NULL, NULL, flags, &list, ep)) < 0)
+		return (-1);
+
+	/* find drives in set */
+	for (i = 0; (i < list.mhd_drive_info_list_t_len); ++i) {
+		mddrivename_t		*dnp;
+		mdname_t		*np;
+
+		mp = &list.mhd_drive_info_list_t_val[i];
+
+		if (mp->dif_id.did_flags & MHD_DID_DUPLICATE)
+			continue;
+
+		/* quietly skip drives which don't conform */
+		if ((dnp = metadrivename(&sp, mp->dif_name, ep)) == NULL) {
+			mdclrerror(ep);
+			continue;
+		}
+
+		/* check in set */
+		if ((np = metaslicename(dnp, MD_SLICE0, ep)) == NULL)
+			goto out;
+		if (meta_check_inset(sp, np, ep) != 0) {
+			mdclrerror(ep);
+			continue;
+		}
+
+		/*
+		 * Add the drivename struct to the end of the
+		 * drivenamelist but keep a pointer to the last
+		 * element so that we don't incur the overhead
+		 * of traversing the list each time
+		 */
+		tailpp = meta_drivenamelist_append_wrapper(tailpp, dnp);
+		++cnt;
+	}
+	rval = cnt;
+
+	/* cleanup, return error */
+out:
+	meta_free_drive_info_list(&list);
+	return (rval);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_mirror.c b/usr/src/lib/lvm/libmeta/common/meta_mirror.c
new file mode 100644
index 0000000000..8be4ada7ae
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_mirror.c
@@ -0,0 +1,2762 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+/*
+ * Just in case we're not in a build environment, make sure that
+ * TEXT_DOMAIN gets set to something.
+ */
+#if !defined(TEXT_DOMAIN)
+#define	TEXT_DOMAIN "SYS_TEST"
+#endif
+
+/*
+ * mirror operations
+ */
+
+#include <meta.h>
+#include <sys/lvm/md_mirror.h>
+#include <sys/lvm/md_convert.h>
+
+#include <ctype.h>
+#include <stddef.h>
+
+/*
+ * FUNCTION:    meta_get_mirror_names()
+ * INPUT:       sp      - the set name to get mirrors from
+ *              options - options from the command line
+ * OUTPUT:      nlpp    - list of all mirror names
+ *              ep      - return error pointer
+ * RETURNS:     int     - -1 if error, 0 success
+ * PURPOSE:     returns a list of all mirrors in the metadb
+ *              for all devices in the specified set
+ */
+int
+meta_get_mirror_names(
+	mdsetname_t	*sp,
+	mdnamelist_t	**nlpp,
+	int		options,
+	md_error_t	*ep
+)
+{
+	return (meta_get_names(MD_MIRROR, sp, nlpp, options, ep));
+}
+
+/*
+ * free mirror unit
+ */
+void
+meta_free_mirror(
+	md_mirror_t	*mirrorp
+)
+{
+	Free(mirrorp);
+}
+
+/*
+ * get mirror unit
+ */
+static md_mirror_t *
+meta_get_mirror_common(
+	mdsetname_t	*sp,
+	mdname_t	*mirnp,
+	int		fast,
+	md_error_t	*ep
+)
+{
+	mddrivename_t	*dnp = mirnp->drivenamep;
+	char		*miscname;
+	mm_unit_t	*mm;
+	md_mirror_t	*mirrorp;
+	uint_t		smi, nsm;
+	md_resync_ioctl_t ri;
+
+	/* must have set */
+	assert(sp != NULL);
+
+	/* short circuit */
+	if (dnp->unitp != NULL) {
+		assert(dnp->unitp->type == MD_METAMIRROR);
+		return ((md_mirror_t *)dnp->unitp);
+	}
+
+	/* get miscname and unit */
+	if ((miscname = metagetmiscname(mirnp, ep)) == NULL)
+		return (NULL);
+	if (strcmp(miscname, MD_MIRROR) != 0) {
+		(void) mdmderror(ep, MDE_NOT_MM, meta_getminor(mirnp->dev),
+		    mirnp->cname);
+		return (NULL);
+	}
+	if ((mm = (mm_unit_t *)meta_get_mdunit(sp, mirnp, ep)) == NULL)
+		return (NULL);
+	assert(mm->c.un_type == MD_METAMIRROR);
+
+	/* allocate mirror */
+	mirrorp = Zalloc(sizeof (*mirrorp));
+
+	/* get common info */
+	mirrorp->common.namep = mirnp;
+	mirrorp->common.type = mm->c.un_type;
+	mirrorp->common.state = mm->c.un_status;
+	mirrorp->common.capabilities = mm->c.un_capabilities;
+	mirrorp->common.parent = mm->c.un_parent;
+	mirrorp->common.size = mm->c.un_total_blocks;
+	mirrorp->common.user_flags = mm->c.un_user_flags;
+	mirrorp->common.revision = mm->c.un_revision;
+
+	/* get options */
+	mirrorp->read_option = mm->un_read_option;
+	mirrorp->write_option = mm->un_write_option;
+	mirrorp->pass_num = mm->un_pass_num;
+
+	/* get submirrors */
+	for (smi = 0, nsm = 0; (smi < NMIRROR); ++smi) {
+		mm_submirror_t	*mmsp = &mm->un_sm[smi];
+		md_submirror_t	*mdsp = &mirrorp->submirrors[smi];
+
+		/* get submirror state */
+		mdsp->state = mmsp->sm_state;
+		if (mdsp->state == SMS_UNUSED)
+			continue;
+		++nsm;
+
+		/* get submirror time of last state change */
+		mdsp->timestamp = mmsp->sm_timestamp;
+
+		/* get submirror flags */
+		mdsp->flags = mmsp->sm_flags;
+
+		/* get submirror name */
+		mdsp->submirnamep = metakeyname(&sp, mmsp->sm_key, fast, ep);
+		if (mdsp->submirnamep == NULL)
+			goto out;
+	}
+	assert(nsm == mm->un_nsm);
+
+	/* get resync info */
+	(void) memset(&ri, 0, sizeof (ri));
+	ri.ri_mnum = meta_getminor(mirnp->dev);
+	MD_SETDRIVERNAME(&ri, MD_MIRROR, sp->setno);
+	if (metaioctl(MD_IOCGETSYNC, &ri, &ri.mde, mirnp->cname) != 0) {
+		(void) mdstealerror(ep, &ri.mde);
+		goto out;
+	}
+	mirrorp->percent_done = ri.ri_percent_done;
+	mirrorp->percent_dirty = ri.ri_percent_dirty;
+
+	/* cleanup, return success */
+	Free(mm);
+	dnp->unitp = (md_common_t *)mirrorp;
+	return (mirrorp);
+
+	/* cleanup, return error */
+out:
+	Free(mm);
+	meta_free_mirror(mirrorp);
+	return (NULL);
+}
+
+/*
+ * get mirror unit
+ */
+md_mirror_t *
+meta_get_mirror(
+	mdsetname_t	*sp,
+	mdname_t	*mirnp,
+	md_error_t	*ep
+)
+{
+	return (meta_get_mirror_common(sp, mirnp, 0, ep));
+}
+
+/*
+ * check mirror for dev
+ */
+static int
+in_mirror(
+	mdsetname_t	*sp,
+	mdname_t	*mirnp,
+	mdname_t	*np,
+	diskaddr_t	slblk,
+	diskaddr_t	nblks,
+	md_error_t	*ep
+)
+{
+	md_mirror_t	*mirrorp;
+	uint_t		smi;
+
+	/* should be in the same set */
+	assert(sp != NULL);
+	assert(sp->setno == MD_MIN2SET(meta_getminor(mirnp->dev)));
+
+	/* get unit */
+	if ((mirrorp = meta_get_mirror(sp, mirnp, ep)) == NULL)
+		return (-1);
+
+	/* look in submirrors */
+	for (smi = 0; (smi < NMIRROR); ++smi) {
+		md_submirror_t	*mdsp = &mirrorp->submirrors[smi];
+		mdname_t	*submirnp = mdsp->submirnamep;
+
+		/* skip unused submirrors */
+		if (submirnp == NULL) {
+			assert(mdsp->state == SMS_UNUSED);
+			continue;
+		}
+
+		/* check overlap */
+		if (metaismeta(submirnp))
+			continue;
+		if (meta_check_overlap(mirnp->cname, np, slblk, nblks,
+		    submirnp, 0, -1, ep) != 0)
+			return (-1);
+	}
+
+	/* return success */
+	return (0);
+}
+
+/*
+ * check to see if we're in a mirror
+ */
+int
+meta_check_inmirror(
+	mdsetname_t	*sp,
+	mdname_t	*np,
+	diskaddr_t	slblk,
+	diskaddr_t	nblks,
+	md_error_t	*ep
+)
+{
+	mdnamelist_t	*mirrornlp = NULL;
+	mdnamelist_t	*p;
+	int		rval = 0;
+
+	/* should have a set */
+	assert(sp != NULL);
+
+	/* for each mirror */
+	if (meta_get_mirror_names(sp, &mirrornlp, 0, ep) < 0)
+		return (-1);
+	for (p = mirrornlp; (p != NULL); p = p->next) {
+		mdname_t	*mirnp = p->namep;
+
+		/* check mirror */
+		if (in_mirror(sp, mirnp, np, slblk, nblks, ep) != 0) {
+			rval = -1;
+			break;
+		}
+	}
+
+	/* cleanup, return success */
+	metafreenamelist(mirrornlp);
+	return (rval);
+}
+
+/*
+ * Check to see if the primary mirror is built on top of a
+ * root slice which is mounted. This check is primarily to
+ * account for this case -
+ *
+ * # metainit -f d1 1 1 <root slice>
+ * # metainit d0 -m d1
+ * # metainit d2 1 1 ctds
+ * # metattach d0 d2
+ *
+ * The metattach here needs to fail if the root slice is
+ * being mirrored; otherwise there is a potential for
+ * data corruption.
+ */
+static int
+meta_check_primary_mirror(
+	mdsetname_t	*sp,
+	mdname_t	*mirnp,
+	md_error_t	*ep
+)
+{
+	int		smi;
+	char		*curroot;
+	mdname_t	*rootnp;
+	md_mirror_t	*mirrorp;
+	md_stripe_t	*stripep;
+	md_row_t	*rp;
+	md_comp_t	*cp;
+
+	if ((curroot = meta_get_current_root(ep)) == NULL)
+		return (-1);
+	/*
+	 * Get device name of current root metadevice. If root
+	 * is net mounted as happens if we're part of the
+	 * install process, rootnp will be set to NULL and we
+	 * return success.
+	 */
+	if ((rootnp = metaname(&sp, curroot, ep)) == NULL)
+		return (0);
+	/*
+	 * If the currently mounted root slice is not a
+	 * ctds, we don't bother checking
+	 */
+	if ((!metaismeta(rootnp)) && metaismeta(mirnp)) {
+		if ((mirrorp = meta_get_mirror(sp, mirnp, ep)) == NULL)
+			return (-1);
+
+		for (smi = 0; (smi < NMIRROR); ++smi) {
+			/* Check all submirrors */
+			md_submirror_t	*mdsp = &mirrorp->submirrors[smi];
+			mdname_t	*submirnamep = mdsp->submirnamep;
+
+			/* skip unused submirrors */
+			if (submirnamep == NULL) {
+				assert(mdsp->state == SMS_UNUSED);
+				continue;
+			}
+			/* check if submirror is a stripe or not */
+			if (strcmp(metagetmiscname(submirnamep, ep), MD_STRIPE)
+			    != 0)
+				return (-1);
+			if ((stripep = meta_get_stripe(sp, submirnamep, ep))
+			    == NULL)
+				return (-1);
+
+			/*
+			 * Examine the first component of the first row and
+			 * check to see if it has a mounted root slice
+			 */
+			rp = &stripep->rows.rows_val[0];
+			cp = &rp->comps.comps_val[0];
+			/*
+			 * we just care about the component built on
+			 * top of a raw device
+			 */
+			if (!metaismeta(cp->compnamep)) {
+				/*
+				 * If root device is the 1st component of
+				 * the stripe, then fail.
+				 */
+				if (strcmp(rootnp->cname, cp->compnamep->cname)
+				    == 0) {
+					(void) mduseerror(ep, MDE_IS_MOUNTED,
+					rootnp->dev, "/", rootnp->cname);
+					return (-1);
+				}
+			}
+		}
+	}
+	/* return success */
+	return (0);
+}
+
+/*
+ * check submirror
+ */
+int
+meta_check_submirror(
+	mdsetname_t	*sp,
+	mdname_t	*np,
+	mdname_t	*mirnp,
+	int		force,
+	md_error_t	*ep
+)
+{
+	mdchkopts_t	options = 0;
+	md_common_t	*mdp;
+
+	/* make sure we have a metadevice disk */
+	if (metachkmeta(np, ep) != 0)
+		return (-1);
+
+	/*
+	 * Check to see if the primary mirror consists of a root
+	 * mounted device
+	 */
+	if (mirnp && (!force) && ((meta_check_primary_mirror(sp, mirnp, ep)
+	    != 0)))
+		return (-1);
+
+	/* check to ensure that it is not already in use */
+	if ((! force) &&
+	    (meta_check_inuse(sp, np, MDCHK_INUSE, ep) != 0)) {
+		return (-1);
+	}
+
+	/* make sure it is in the set */
+	if (meta_check_inset(sp, np, ep) != 0)
+		return (-1);
+
+	/* make sure its not in a metadevice */
+	if (! metaismeta(np)) {		/* Non-metadevices */
+		if (meta_check_inmeta(sp, np, options, 0, -1, ep) != 0)
+			return (-1);
+	} else {			/* Metadevices only! */
+		/* make sure it can be parented */
+		if ((mdp = meta_get_unit(sp, np, ep)) == NULL)
+			return (-1);
+
+		if ((! (mdp->capabilities & MD_CAN_PARENT)) ||
+		    (! (mdp->capabilities & MD_CAN_SUB_MIRROR)) ||
+		    (mdp->parent != MD_NO_PARENT)) {
+			return (mdmderror(ep, MDE_INVAL_UNIT,
+					meta_getminor(np->dev), np->cname));
+		}
+	}
+
+	/* return success */
+	return (0);
+}
+
+/*
+ * convert read options
+ */
+char *
+rd_opt_to_name(
+	mm_rd_opt_t	opt
+)
+{
+	switch (opt) {
+	case RD_LOAD_BAL:
+		return ("roundrobin");
+	case RD_GEOMETRY:
+		return ("geometric");
+	case RD_FIRST:
+		return ("first");
+	default:
+		assert(0);
+		return (dgettext(TEXT_DOMAIN, "invalid"));
+	}
+}
+
+static char *
+rd_opt_to_opt(
+	mm_rd_opt_t	opt
+)
+{
+	switch (opt) {
+	case RD_LOAD_BAL:
+		return (NULL);	/* default */
+	case RD_GEOMETRY:
+		return ("-g");
+	case RD_FIRST:
+		return ("-r");
+	default:
+		assert(0);
+		return (dgettext(TEXT_DOMAIN, "invalid"));
+	}
+}
+
+int
+name_to_rd_opt(
+	char		*uname,
+	char		*name,
+	mm_rd_opt_t	*optp,
+	md_error_t	*ep
+)
+{
+	if (strcasecmp(name, "roundrobin") == 0) {
+		*optp = RD_LOAD_BAL;
+		return (0);
+	}
+	if (strcasecmp(name, "geometric") == 0) {
+		*optp = RD_GEOMETRY;
+		return (0);
+	}
+	if (strcasecmp(name, "first") == 0) {
+		*optp = RD_FIRST;
+		return (0);
+	}
+	return (meta_cook_syntax(ep, MDE_BAD_RD_OPT, uname, 1, &name));
+}
+
+/*
+ * convert write options
+ */
+char *
+wr_opt_to_name(
+	mm_wr_opt_t	opt
+)
+{
+	switch (opt) {
+	case WR_PARALLEL:
+		return ("parallel");
+	case WR_SERIAL:
+		return ("serial");
+	default:
+		assert(0);
+		return (dgettext(TEXT_DOMAIN, "invalid"));
+	}
+}
+
+static char *
+wr_opt_to_opt(
+	mm_wr_opt_t	opt
+)
+{
+	switch (opt) {
+	case WR_PARALLEL:
+		return (NULL);	/* default */
+	case WR_SERIAL:
+		return ("-S");
+	default:
+		assert(0);
+		return (dgettext(TEXT_DOMAIN, "invalid"));
+	}
+}
+
+int
+name_to_wr_opt(
+	char		*uname,
+	char		*name,
+	mm_wr_opt_t	*optp,
+	md_error_t	*ep
+)
+{
+	if (strcasecmp(name, "parallel") == 0) {
+		*optp = WR_PARALLEL;
+		return (0);
+	}
+	if (strcasecmp(name, "serial") == 0) {
+		*optp = WR_SERIAL;
+		return (0);
+	}
+	return (meta_cook_syntax(ep, MDE_BAD_WR_OPT, uname, 1, &name));
+}
+
+/*
+ * convert pass numbers
+ */
+int
+name_to_pass_num(
+	char		*uname,
+	char		*name,
+	mm_pass_num_t	*passp,
+	md_error_t	*ep
+)
+{
+	if ((sscanf(name, "%hd", passp) != 1) ||
+	    (*passp < 0) || (*passp > MD_PASS_MAX)) {
+		return (meta_cook_syntax(ep, MDE_BAD_PASS_NUM,
+		    uname, 1, &name));
+	}
+	return (0);
+}
+
+/*
+ * convert resync option
+ */
+
+static char *
+resync_opt_to_name(
+	uint_t	tstate
+)
+{
+	if (tstate & MD_ABR_CAP)
+		return (dgettext(TEXT_DOMAIN, "application based"));
+	else
+		return (dgettext(TEXT_DOMAIN, "optimized resync"));
+}
+
+/*
+ * print mirror
+ */
+static int
+mirror_print(
+	md_mirror_t	*mirrorp,
+	char		*fname,
+	FILE		*fp,
+	mdprtopts_t	options,
+	md_error_t	*ep
+)
+{
+	uint_t		smi;
+	char		*p;
+	int		rval = -1;
+
+
+	if (options & PRINT_LARGEDEVICES) {
+		if (mirrorp->common.revision != MD_64BIT_META_DEV) {
+			rval = 0;
+			goto out;
+		}
+	}
+
+	/* print name and -m */
+	if (fprintf(fp, "%s -m", mirrorp->common.namep->cname) == EOF)
+		goto out;
+
+	/* print submirrors */
+	for (smi = 0; (smi < NMIRROR); ++smi) {
+		md_submirror_t	*mdsp = &mirrorp->submirrors[smi];
+		mdname_t	*submirnamep = mdsp->submirnamep;
+
+		/* skip unused submirrors */
+		if (submirnamep == NULL) {
+			assert(mdsp->state == SMS_UNUSED);
+			continue;
+		}
+
+		/* print submirror */
+		if (fprintf(fp, " %s", submirnamep->cname) == EOF)
+			goto out;
+	}
+
+	/* print options */
+	if ((p = rd_opt_to_opt(mirrorp->read_option)) != NULL) {
+		if (fprintf(fp, " %s", p) == EOF)
+			goto out;
+	}
+	if ((p = wr_opt_to_opt(mirrorp->write_option)) != NULL) {
+		if (fprintf(fp, " %s", p) == EOF)
+			goto out;
+	}
+	if (fprintf(fp, " %u\n", mirrorp->pass_num) == EOF)
+		goto out;
+
+	/* success */
+	rval = 0;
+
+	/* cleanup, return error */
+out:
+	if (rval != 0)
+		(void) mdsyserror(ep, errno, fname);
+	return (rval);
+}
+
+/*
+ * convert submirror state to name
+ */
+char *
+sm_state_to_name(
+	md_submirror_t	*mdsp,
+	md_status_t	mirror_status,
+	md_timeval32_t	*tvp,
+	uint_t		tstate
+)
+{
+	static char	state_to_str[100];
+	sm_state_t	state = mdsp->state;
+	uint_t		is_target = mdsp->flags & MD_SM_RESYNC_TARGET;
+
+	/* grab time */
+	if (tvp != NULL)
+		*tvp = mdsp->timestamp;
+
+	/*
+	 * Only return Unavailable if there is no flagged error on the
+	 * submirror. If the mirror has received any writes since the submirror
+	 * went into Unavailable state a resync is required. To alert the
+	 * administrator to this we return a 'Needs maintenance' message.
+	 */
+	if ((tstate != 0) && (state & SMS_RUNNING)) {
+		return (dgettext(TEXT_DOMAIN, "Unavailable"));
+	}
+
+	/* all is well */
+	if (state & SMS_RUNNING) {
+		if (!(mirror_status & MD_UN_OPT_NOT_DONE) ||
+		    ((mirror_status & MD_UN_OPT_NOT_DONE) && !is_target)) {
+			return (dgettext(TEXT_DOMAIN, "Okay"));
+		}
+	}
+
+	/* resyncing, needs repair */
+	if ((state & (SMS_COMP_RESYNC | SMS_ATTACHED_RESYNC |
+	    SMS_OFFLINE_RESYNC)) ||
+	    (mirror_status & MD_UN_OPT_NOT_DONE)) {
+		if (mirror_status & MD_UN_RESYNC_ACTIVE) {
+			return (dgettext(TEXT_DOMAIN, "Resyncing"));
+		}
+		if (mirror_status & MD_UN_RESYNC_CANCEL) {
+			return (dgettext(TEXT_DOMAIN, "Resync cancelled"));
+		}
+		return (dgettext(TEXT_DOMAIN, "Needs maintenance"));
+	}
+
+	/* needs repair */
+	if (state & (SMS_COMP_ERRED | SMS_ATTACHED | SMS_OFFLINE)) {
+		if (mirror_status & MD_UN_RESYNC_CANCEL) {
+			return (dgettext(TEXT_DOMAIN, "Resync cancelled"));
+		}
+		return (dgettext(TEXT_DOMAIN, "Needs maintenance"));
+	}
+
+	/* unknown */
+	assert(0);
+	(void) sprintf(state_to_str, "0x%x", state);
+	return (state_to_str);
+}
+
+/*
+ * convert submirror state to repair action
+ */
+int
+sm_state_to_action(
+	mdsetname_t	*sp,
+	md_submirror_t	*mdsp,
+	md_status_t	mirror_status,
+	md_mirror_t	*mirrorp,
+	char		**actionp,
+	md_error_t	*ep
+)
+{
+	static char	buf[1024];
+	mdname_t	*submirnamep = mdsp->submirnamep;
+	sm_state_t	state = mdsp->state;
+	char		*miscname;
+
+	/* all is well */
+	*actionp = NULL;
+	if (mirror_status & MD_UN_RESYNC_ACTIVE)
+		return (0);
+	if ((state == SMS_RUNNING) && !(mirror_status & MD_UN_OPT_NOT_DONE))
+		return (0);
+
+	/* complete cancelled resync */
+	if (mirror_status & MD_UN_RESYNC_CANCEL) {
+		(void) snprintf(buf, sizeof (buf),
+		    dgettext(TEXT_DOMAIN, "metasync %s"),
+		    mirrorp->common.namep->cname);
+		*actionp = buf;
+		return (0);
+	}
+
+	/* replace stripe component */
+	if ((metaismeta(submirnamep)) && (state & SMS_COMP_ERRED)) {
+		if ((miscname = metagetmiscname(submirnamep, ep)) == NULL)
+			return (-1);
+		if (strcmp(miscname, MD_STRIPE) == 0) {
+			mdname_t	*compnamep;
+			comp_state_t	compstate;
+
+			if (meta_find_erred_comp(sp, submirnamep,
+			    &compnamep, &compstate, ep) != 0) {
+				return (-1);
+			}
+			if (compstate != CS_LAST_ERRED)
+				(void) snprintf(buf, sizeof (buf),
+				    "metareplace %s %s <%s>",
+				    mirrorp->common.namep->cname,
+				    compnamep->cname,
+				    dgettext(TEXT_DOMAIN, "new device"));
+			else
+				(void) snprintf(buf, sizeof (buf),
+				    dgettext(TEXT_DOMAIN,
+				    "after replacing \"Maintenance\" "
+				    "components:\n"
+				    "\t\tmetareplace %s %s <new device>"),
+				    mirrorp->common.namep->cname,
+				    compnamep->cname);
+			*actionp = buf;
+			return (0);
+		}
+	}
+
+	/* resync mirror */
+	if ((state & (SMS_ATTACHED_RESYNC | SMS_OFFLINE_RESYNC |
+	    SMS_COMP_RESYNC | SMS_ATTACHED)) ||
+	    (mirror_status & MD_UN_OPT_NOT_DONE)) {
+		(void) snprintf(buf, sizeof (buf), "metasync %s",
+		    mirrorp->common.namep->cname);
+		*actionp = buf;
+		return (0);
+	}
+
+	/* online submirror */
+	if (state & SMS_OFFLINE) {
+		(void) snprintf(buf, sizeof (buf), "metaonline %s %s",
+		    mirrorp->common.namep->cname, submirnamep->cname);
+		*actionp = buf;
+		return (0);
+	}
+
+	/* unknown action */
+	*actionp = dgettext(TEXT_DOMAIN, "???");
+	return (0);
+}
+
+/*
+ * print mirror options
+ */
+int
+meta_print_mirror_options(
+	mm_rd_opt_t	read_option,
+	mm_wr_opt_t	write_option,
+	mm_pass_num_t	pass_num,
+	uint_t		tstate,
+	char		*fname,
+	mdsetname_t	*sp,
+	FILE		*fp,
+	md_error_t	*ep
+)
+{
+	char		*p;
+	int		rval = -1;
+
+	/* print options */
+	if (fprintf(fp, dgettext(TEXT_DOMAIN, "    Pass: %u\n"),
+	    pass_num) == EOF) {
+		goto out;
+	}
+	if ((p = rd_opt_to_opt(read_option)) == NULL)
+		p = dgettext(TEXT_DOMAIN, "default");
+	if (fprintf(fp, dgettext(TEXT_DOMAIN, "    Read option: %s (%s)\n"),
+	    rd_opt_to_name(read_option), p) == EOF) {
+		goto out;
+	}
+	if ((p = wr_opt_to_opt(write_option)) == NULL)
+		p = dgettext(TEXT_DOMAIN, "default");
+	if (fprintf(fp, dgettext(TEXT_DOMAIN, "    Write option: %s (%s)\n"),
+	    wr_opt_to_name(write_option), p) == EOF) {
+		goto out;
+	}
+	/* Display resync option for mirror, if MultiNode set */
+	if (meta_is_mn_set(sp, ep)) {
+		if (fprintf(fp, dgettext(TEXT_DOMAIN,
+		    "    Resync option: %s\n"),
+		    resync_opt_to_name(tstate)) == EOF) {
+			goto out;
+		}
+	}
+
+	/* success */
+	rval = 0;
+
+	/* cleanup, return error */
+out:
+	if (rval != 0)
+		(void) mdsyserror(ep, errno, fname);
+	return (rval);
+}
+
+static char *
+get_node_name(uint_t nid, md_error_t *ep)
+{
+	mndiskset_membershiplist_t	*nl, *p;
+	int				n;
+	char				*node_nm;
+
+	/* get the known membership list */
+	if (meta_read_nodelist(&n, &nl, ep)) {
+		return (NULL);
+	}
+
+	/* find the matching node and return the name */
+	for (p = nl; (p != NULL); p = p->next) {
+		if (nid == p->msl_node_id) {
+			/* match found */
+			node_nm = Strdup(p->msl_node_name);
+			goto out;
+		}
+	}
+
+	/* match not found */
+	node_nm = Strdup(dgettext(TEXT_DOMAIN, "None"));
+
+out:
+	meta_free_nodelist(nl);
+	return (node_nm);
+}
+
+/*
+ * report mirror
+ */
+static int
+mirror_report(
+	mdsetname_t	*sp,
+	md_mirror_t	*mirrorp,
+	mdnamelist_t	**nlpp,
+	char		*fname,
+	FILE		*fp,
+	mdprtopts_t	options,
+	md_error_t	*ep
+)
+{
+	md_status_t	status = mirrorp->common.state;
+	uint_t		smi;
+	char		*p;
+	int		rval = -1;
+	uint_t		tstate = 0;
+
+	/*
+	 * check for the -B option. If -B and the metadevice is
+	 * a 64 bit device, get the dev for relocation information
+	 * printout. If not a 64 bit device, just don't print this
+	 * information out but you need to go down to the subdevice
+	 * level and print there if appropriate.
+	 */
+	if (options & PRINT_LARGEDEVICES) {
+		if (mirrorp->common.revision != MD_64BIT_META_DEV) {
+			for (smi = 0; (smi < NMIRROR); ++smi) {
+				md_submirror_t	*mdsp =
+				    &mirrorp->submirrors[smi];
+				mdname_t	*submirnamep =
+				    mdsp->submirnamep;
+				if (submirnamep == NULL) {
+					continue;
+				}
+				if ((metaismeta(submirnamep)) &&
+				    (meta_print_name(sp, submirnamep, nlpp,
+				    fname, fp, options | PRINT_SUBDEVS, NULL,
+				    ep) != 0)) {
+					return (-1);
+				}
+			}
+			rval = 0;
+			goto out;
+		} else {
+			if (meta_getdevs(sp, mirrorp->common.namep,
+			    nlpp, ep) != 0)
+				goto out;
+		}
+	}
+
+	/* print header */
+	if (options & PRINT_HEADER) {
+		if (fprintf(fp, dgettext(TEXT_DOMAIN, "%s: Mirror\n"),
+		    mirrorp->common.namep->cname) == EOF) {
+			goto out;
+		}
+	}
+
+	/* print submirrors, adjust status */
+	for (smi = 0; (smi < NMIRROR); ++smi) {
+		md_submirror_t	*mdsp = &mirrorp->submirrors[smi];
+		mdname_t	*submirnamep = mdsp->submirnamep;
+		char		*sm_state;
+		md_timeval32_t	tv;
+		char		*timep;
+
+		/* skip unused submirrors */
+		if (submirnamep == NULL) {
+			assert(mdsp->state == SMS_UNUSED);
+			continue;
+		}
+
+		if (mdsp->state & SMS_OFFLINE)
+			status &= ~MD_UN_OPT_NOT_DONE;
+
+		/* print submirror */
+		if (fprintf(fp, dgettext(TEXT_DOMAIN, "    Submirror %u: %s\n"),
+		    smi, submirnamep->cname) == EOF) {
+			goto out;
+		}
+
+		/* print state */
+		if (metaismeta(mdsp->submirnamep)) {
+			if (meta_get_tstate(mdsp->submirnamep->dev, &tstate,
+			    ep) != 0)
+				return (-1);
+		}
+		sm_state = sm_state_to_name(mdsp, status, &tv,
+		    tstate & MD_DEV_ERRORED);
+		if (options & PRINT_TIMES) {
+			timep = meta_print_time(&tv);
+		} else {
+			timep = "";
+		}
+		if (fprintf(fp, dgettext(TEXT_DOMAIN,
+		    "      State: %-12s %s\n"),
+		    sm_state, timep) == EOF) {
+			goto out;
+		}
+	}
+
+	/* print resync status */
+	if (status & MD_UN_RESYNC_CANCEL) {
+		/* Resync was cancelled but is restartable */
+		if (mirrorp->common.revision == MD_64BIT_META_DEV) {
+			if (fprintf(fp, dgettext(TEXT_DOMAIN,
+			    "    Resync cancelled: %2d.%1d %% done\n"),
+			    mirrorp->percent_done/10,
+			    mirrorp->percent_done%10) == EOF) {
+				goto out;
+			}
+		} else {
+			if (fprintf(fp, dgettext(TEXT_DOMAIN,
+			    "    Resync cancelled: %d %% done\n"),
+			    mirrorp->percent_done) == EOF) {
+				goto out;
+			}
+		}
+	} else if (status & MD_UN_RESYNC_ACTIVE) {
+		if (mirrorp->common.revision == MD_64BIT_META_DEV) {
+			if (fprintf(fp, dgettext(TEXT_DOMAIN,
+			    "    Resync in progress: %2d.%1d %% done\n"),
+			    mirrorp->percent_done/10,
+			    mirrorp->percent_done%10) == EOF) {
+				goto out;
+			}
+		} else {
+			if (fprintf(fp, dgettext(TEXT_DOMAIN,
+			    "    Resync in progress: %d %% done\n"),
+			    mirrorp->percent_done) == EOF) {
+				goto out;
+			}
+		}
+	}
+
+	/* print options */
+	if (meta_get_tstate(mirrorp->common.namep->dev, &tstate, ep) != 0)
+		return (-1);
+
+	if (meta_print_mirror_options(mirrorp->read_option,
+	    mirrorp->write_option, mirrorp->pass_num,
+	    tstate, fname, sp, fp, ep) != 0)
+		return (-1);
+
+	/* print mirror owner for multi-node metadevice */
+	if (meta_is_mn_set(sp, ep)) {
+		md_set_mmown_params_t	ownpar;
+		mdname_t		*mirnp = mirrorp->common.namep;
+		char			*node_name;
+
+		(void) memset(&ownpar, 0, sizeof (ownpar));
+		ownpar.d.mnum = meta_getminor(mirnp->dev);
+		MD_SETDRIVERNAME(&ownpar, MD_MIRROR, sp->setno);
+
+		if (metaioctl(MD_MN_GET_MM_OWNER, &ownpar, ep,
+		    "MD_MN_GET_MM_OWNER") != 0) {
+			return (-1);
+		}
+
+		node_name = get_node_name(ownpar.d.owner, ep);
+		if (node_name == NULL)
+			return (-1);
+		else if (fprintf(fp, dgettext(TEXT_DOMAIN, "    Owner: %s\n"),
+		    node_name) == EOF) {
+			Free(node_name);
+			goto out;
+		}
+		Free(node_name);
+
+	}
+
+	/* print size */
+	if (fprintf(fp, dgettext(TEXT_DOMAIN, "    Size: %lld blocks (%s)\n"),
+	    mirrorp->common.size,
+	    meta_number_to_string(mirrorp->common.size, DEV_BSIZE))
+	    == EOF) {
+		goto out;
+	}
+
+	/* MD_DEBUG stuff */
+	if (options & PRINT_DEBUG) {
+		mdname_t	*mirnp = mirrorp->common.namep;
+		mm_unit_t	*mm;
+		mddb_optloc_t	optloc;
+		uint_t		i;
+
+		/* get real mirror unit */
+		if ((mm = (mm_unit_t *)meta_get_mdunit(sp, mirnp, ep))
+		    == NULL) {
+			return (-1);
+		}
+		assert(mm->c.un_type == MD_METAMIRROR);
+
+		/* print dirty regions */
+		if (fprintf(fp, dgettext(TEXT_DOMAIN,
+"    Regions which are dirty: %d%% (blksize %d num %d)\n"),
+		    mirrorp->percent_dirty, mm->un_rrd_blksize,
+		    mm->un_rrd_num) == EOF) {
+			Free(mm);
+			goto out;
+		}
+
+		/* print optimized resync record locations */
+		(void) memset(&optloc, 0, sizeof (optloc));
+		optloc.recid = mm->un_rr_dirty_recid;
+		if (metaioctl(MD_DB_GETOPTLOC, &optloc, ep,
+		    "MD_DB_GETOPTLOC") != 0) {
+			Free(mm);
+			return (-1);
+		}
+		for (i = 0; (i < ((sizeof optloc.li) / sizeof (optloc.li[0])));
+		    ++i) {
+			mddb_config_t	dbconf;
+			char		*devname;
+
+			(void) memset(&dbconf, 0, sizeof (dbconf));
+			dbconf.c_id = optloc.li[i];
+			dbconf.c_setno = sp->setno;
+			dbconf.c_subcmd = MDDB_CONFIG_ABS;
+			/* Don't need device id information from this ioctl */
+			dbconf.c_locator.l_devid = (uint64_t)0;
+			dbconf.c_locator.l_devid_flags = 0;
+			if (metaioctl(MD_DB_ENDDEV, &dbconf, &dbconf.c_mde,
+			    "MD_DB_ENDDEV") != 0) {
+				Free(mm);
+				return (mdstealerror(ep, &dbconf.c_mde));
+			}
+			if ((devname = splicename(&dbconf.c_devname))
+			    == NULL) {
+				devname = Strdup(dgettext(TEXT_DOMAIN,
+				    "unknown"));
+			}
+			if (fprintf(fp, dgettext(TEXT_DOMAIN,
+			    "    Resync record[%u]: %d (%s %d %d)\n"), i,
+			    optloc.li[i], devname, dbconf.c_locator.l_blkno,
+			    (dbconf.c_dbend - dbconf.c_locator.l_blkno + 1))
+			    == EOF) {
+				Free(mm);
+				Free(devname);
+				goto out;
+			}
+			Free(devname);
+		}
+		Free(mm);
+	}
+
+	/* print submirror details */
+	for (smi = 0; (smi < NMIRROR); ++smi) {
+		md_submirror_t	*mdsp = &mirrorp->submirrors[smi];
+		mdname_t	*submirnamep = mdsp->submirnamep;
+		char		*sm_state;
+		md_timeval32_t	tv;
+		char		*timep;
+
+		/* skip unused submirrors */
+		if (submirnamep == NULL) {
+			assert(mdsp->state == SMS_UNUSED);
+			continue;
+		}
+
+		/* add extra line */
+		if (fprintf(fp, "\n") == EOF)
+			goto out;
+
+		/* print submirror */
+		if (fprintf(fp, dgettext(TEXT_DOMAIN,
+		    "%s: Submirror of %s\n"),
+		    submirnamep->cname,
+		    mirrorp->common.namep->cname) == EOF) {
+			goto out;
+		}
+
+		/* print state */
+		if (metaismeta(mdsp->submirnamep)) {
+			if (meta_get_tstate(mdsp->submirnamep->dev, &tstate, ep)
+			    != 0)
+				return (-1);
+		}
+		sm_state = sm_state_to_name(mdsp, status, &tv, NULL);
+		if (options & PRINT_TIMES) {
+			timep = meta_print_time(&tv);
+		} else {
+			timep = "";
+		}
+
+		if ((tstate & MD_DEV_ERRORED) == 0) {
+			if (fprintf(fp, dgettext(TEXT_DOMAIN,
+			    "    State: %-12s %s\n"),
+			    sm_state, timep) == EOF) {
+				goto out;
+			}
+
+			/* print what to do */
+			if (sm_state_to_action(sp, mdsp, status,
+			    mirrorp, &p, ep) != 0)
+				return (-1);
+			if ((p != NULL) &&
+			    (fprintf(fp, dgettext(TEXT_DOMAIN,
+			    "    Invoke: %s\n"), p) == EOF)) {
+				goto out;
+			}
+		}
+
+		/* print underlying metadevice */
+		if ((metaismeta(submirnamep)) &&
+		    (meta_print_name(sp, submirnamep, nlpp, fname, fp,
+		    ((options & ~PRINT_HEADER) | PRINT_SUBDEVS),
+		    NULL, ep) != 0)) {
+			return (-1);
+		}
+	}
+
+	/* add extra line */
+	if (fprintf(fp, "\n") == EOF)
+		goto out;
+
+	/* success */
+	rval = 0;
+
+	/* cleanup, return error */
+out:
+	if (rval != 0)
+		(void) mdsyserror(ep, errno, fname);
+	return (rval);
+}
+
+/*
+ * print/report mirror
+ */
+int
+meta_mirror_print(
+	mdsetname_t	*sp,
+	mdname_t	*mirnp,
+	mdnamelist_t	**nlpp,
+	char		*fname,
+	FILE		*fp,
+	mdprtopts_t	options,
+	md_error_t	*ep
+)
+{
+	md_mirror_t	*mirrorp;
+	uint_t		smi;
+
+	/* should have same set */
+	assert(sp != NULL);
+	assert((mirnp == NULL) ||
+	    (sp->setno == MD_MIN2SET(meta_getminor(mirnp->dev))));
+
+	/* print all mirrors */
+	if (mirnp == NULL) {
+		mdnamelist_t	*nlp = NULL;
+		mdnamelist_t	*p;
+		int		cnt;
+		int		rval = 0;
+
+		/* get list */
+		if ((cnt = meta_get_mirror_names(sp, &nlp, options, ep)) < 0)
+			return (-1);
+		else if (cnt == 0)
+			return (0);
+
+		/* recurse */
+		for (p = nlp; (p != NULL); p = p->next) {
+			mdname_t	*np = p->namep;
+
+			if (meta_mirror_print(sp, np, nlpp, fname, fp,
+			    options, ep) != 0)
+				rval = -1;
+		}
+
+		/* cleanup, return success */
+		metafreenamelist(nlp);
+		return (rval);
+	}
+
+	/* get unit structure */
+	if ((mirrorp = meta_get_mirror_common(sp, mirnp,
+	    ((options & PRINT_FAST) ? 1 : 0), ep)) == NULL)
+		return (-1);
+
+	/* check for parented */
+	if ((! (options & PRINT_SUBDEVS)) &&
+	    (MD_HAS_PARENT(mirrorp->common.parent))) {
+		return (0);
+	}
+
+	/* print appropriate detail */
+	if (options & PRINT_SHORT) {
+		/* print mirror */
+		if (mirror_print(mirrorp, fname, fp, options, ep) != 0)
+			return (-1);
+
+		/* print underlying metadevices */
+		for (smi = 0; (smi < NMIRROR); ++smi) {
+			md_submirror_t	*mdsp = &mirrorp->submirrors[smi];
+			mdname_t	*submirnamep = mdsp->submirnamep;
+
+			/* skip unused submirrors */
+			if (submirnamep == NULL) {
+				assert(mdsp->state == SMS_UNUSED);
+				continue;
+			}
+
+			/* print submirror */
+			if (metaismeta(submirnamep)) {
+				if (meta_print_name(sp, submirnamep, nlpp,
+				    fname, fp, (options | PRINT_SUBDEVS), NULL,
+				    ep) != 0) {
+					return (-1);
+				}
+			}
+		}
+
+		/* return success */
+		return (0);
+	} else {
+		return (mirror_report(sp, mirrorp, nlpp, fname, fp,
+		    options, ep));
+	}
+}
+
+/*
+ * online submirror
+ */
+int
+meta_mirror_online(
+	mdsetname_t	*sp,
+	mdname_t	*mirnp,
+	mdname_t	*submirnp,
+	mdcmdopts_t	options,
+	md_error_t	*ep
+)
+{
+	md_i_off_on_t	mio;
+	md_mirror_t	*mirrorp;
+	md_set_desc	*sd;
+	uint_t		tstate;
+
+	/* should have same set */
+	assert(sp != NULL);
+	assert(sp->setno == MD_MIN2SET(meta_getminor(mirnp->dev)));
+
+	/* check name */
+	if (metachkmeta(mirnp, ep) != 0)
+		return (-1);
+
+	if ((mirrorp = meta_get_mirror(sp, mirnp, ep)) == NULL)
+		return (-1);
+
+	/* Only valid for mirror without ABR set */
+	if (meta_get_tstate(mirrorp->common.namep->dev, &tstate, ep) != 0)
+		return (-1);
+	if (tstate & MD_ABR_CAP) {
+		(void) mderror(ep, MDE_ABR_SET, NULL);
+		return (-1);
+	}
+
+	/*
+	 * In a MN set, the master always executes the online command first.
+	 * Before the master executes the IOC_ONLINE ioctl,
+	 * the master sends a message to all nodes to suspend writes to
+	 * this mirror.  Then the master executes the IOC_ONLINE ioctl
+	 * which resumes writes to this mirror from the master node.
+	 * As each slave executes the online command, each slave will
+	 * call the IOC_ONLINE ioctl which will resume writes to this mirror
+	 * from that slave node.
+	 */
+	if (! metaislocalset(sp)) {
+		if ((sd = metaget_setdesc(sp, ep)) == NULL)
+			return (-1);
+		if ((MD_MNSET_DESC(sd)) && sd->sd_mn_am_i_master)
+			if (meta_mn_send_suspend_writes(
+			    meta_getminor(mirnp->dev), ep) != 0)
+				return (-1);
+	}
+
+	/* online submirror */
+	(void) memset(&mio, 0, sizeof (mio));
+	mio.mnum = meta_getminor(mirnp->dev);
+	MD_SETDRIVERNAME(&mio, MD_MIRROR, sp->setno);
+	mio.submirror = submirnp->dev;
+	if (metaioctl(MD_IOCONLINE, &mio, &mio.mde, NULL) != 0)
+		return (mdstealerror(ep, &mio.mde));
+
+	/* clear cache */
+	meta_invalidate_name(mirnp);
+	meta_invalidate_name(submirnp);
+
+	/* let em know */
+	if (options & MDCMD_PRINT) {
+		(void) printf(dgettext(TEXT_DOMAIN,
+		    "%s: submirror %s is onlined\n"),
+		    mirnp->cname, submirnp->cname);
+		(void) fflush(stdout);
+	}
+
+	/* return success */
+	return (0);
+}
+
+/*
+ * offline submirror
+ */
+int
+meta_mirror_offline(
+	mdsetname_t	*sp,
+	mdname_t	*mirnp,
+	mdname_t	*submirnp,
+	mdcmdopts_t	options,
+	md_error_t	*ep
+)
+{
+	int		force = ((options & MDCMD_FORCE) ? 1 : 0);
+	md_i_off_on_t	mio;
+	md_mirror_t	*mirrorp;
+	md_set_desc	*sd;
+	uint_t		tstate;
+
+	/* should have same set */
+	assert(sp != NULL);
+	assert(sp->setno == MD_MIN2SET(meta_getminor(mirnp->dev)));
+
+	/* check name */
+	if (metachkmeta(mirnp, ep) != 0)
+		return (-1);
+
+	if ((mirrorp = meta_get_mirror(sp, mirnp, ep)) == NULL)
+		return (-1);
+
+	/* Only valid for mirror without ABR set */
+	if (meta_get_tstate(mirrorp->common.namep->dev, &tstate, ep) != 0)
+		return (-1);
+	if (tstate & MD_ABR_CAP) {
+		(void) mderror(ep, MDE_ABR_SET, NULL);
+		return (-1);
+	}
+
+	/*
+	 * In a MN set, the master always executes the offline command first.
+	 * Before the master executes the IOC_OFFLINE ioctl,
+	 * the master sends a message to all nodes to suspend writes to
+	 * this mirror.  Then the master executes the IOC_OFFLINE ioctl
+	 * which resumes writes to this mirror from the master node.
+	 * As each slave executes the offline command, each slave will
+	 * call the IOC_OFFLINE ioctl which will resume writes to this mirror
+	 * from that slave node.
+	 */
+	if (! metaislocalset(sp)) {
+		if ((sd = metaget_setdesc(sp, ep)) == NULL)
+			return (-1);
+		if ((MD_MNSET_DESC(sd)) && sd->sd_mn_am_i_master)
+			if (meta_mn_send_suspend_writes(
+			    meta_getminor(mirnp->dev), ep) != 0)
+				return (-1);
+	}
+
+	/* offline submirror */
+	(void) memset(&mio, 0, sizeof (mio));
+	mio.mnum = meta_getminor(mirnp->dev);
+	MD_SETDRIVERNAME(&mio, MD_MIRROR, sp->setno);
+	mio.submirror = submirnp->dev;
+	mio.force_offline = force;
+	if (metaioctl(MD_IOCOFFLINE, &mio, &mio.mde, NULL) != 0)
+		return (mdstealerror(ep, &mio.mde));
+
+	/* clear cache */
+	meta_invalidate_name(mirnp);
+	meta_invalidate_name(submirnp);
+
+	/* let em know */
+	if (options & MDCMD_PRINT) {
+		(void) printf(dgettext(TEXT_DOMAIN,
+		    "%s: submirror %s is offlined\n"),
+		    mirnp->cname, submirnp->cname);
+		(void) fflush(stdout);
+	}
+
+	/* return success */
+	return (0);
+}
+
+/*
+ * attach submirror to mirror
+ * we actually never have to worry about crossing a thresh hold here.
+ * 2 cases 1) attach and the only way the mirror can be 64 bit is if
+ * one of the submirrors already is. 2) grow and the only way the mirror
+ * is 64 bit is if one of the submirror's already is.
+ */
+int
+meta_mirror_attach(
+	mdsetname_t	*sp,
+	mdname_t	*mirnp,
+	mdname_t	*submirnp,
+	mdcmdopts_t	options,
+	md_error_t	*ep
+)
+{
+	md_att_struct_t	att;
+	md_set_desc		*sd;
+
+	/* should have same set */
+	assert(sp != NULL);
+	assert(sp->setno == MD_MIN2SET(meta_getminor(mirnp->dev)));
+
+	/* check name */
+	if (metachkmeta(mirnp, ep) != 0)
+		return (-1);
+
+	/* just grow */
+	if (submirnp == NULL) {
+		return (meta_concat_generic(sp, mirnp, NULL, ep));
+	}
+
+	/* check submirror */
+	if (meta_check_submirror(sp, submirnp, mirnp, 0, ep) != 0)
+		return (-1);
+
+	/* In dryrun mode (DOIT not set) we must not alter the mddb */
+	if (options & MDCMD_DOIT) {
+		/* store name in namespace */
+		if (add_key_name(sp, submirnp, NULL, ep) != 0)
+			return (-1);
+	}
+
+	/*
+	 * In a MN set, the master always executes the attach command first.
+	 * Before the master executes the IOC_ATTACH ioctl, in non-DRYRUN mode
+	 * the master sends a message to all nodes to suspend writes to
+	 * this mirror.  Then the master executes the IOC_ATTACH ioctl
+	 * which resumes writes to this mirror from the master node.
+	 * As each slave executes the attach command, each slave will
+	 * call the IOC_ATTACH ioctl which will resume writes to this mirror
+	 * from that slave node.
+	 */
+	if (! metaislocalset(sp)) {
+		if ((sd = metaget_setdesc(sp, ep)) == NULL)
+			return (-1);
+		if ((MD_MNSET_DESC(sd)) && (options & MDCMD_DOIT) &&
+		    sd->sd_mn_am_i_master)
+			if (meta_mn_send_suspend_writes(
+			    meta_getminor(mirnp->dev), ep) != 0)
+				return (-1);
+	}
+
+	/* attach submirror */
+	(void) memset(&att, 0, sizeof (att));
+	att.mnum = meta_getminor(mirnp->dev);
+	MD_SETDRIVERNAME(&att, MD_MIRROR, sp->setno);
+	att.submirror = submirnp->dev;
+	att.key = submirnp->key;
+	/* if the comamnd was issued with -n option, use dryrun mode */
+	if ((options & MDCMD_DOIT) == 0) {
+		att.options = MDIOCTL_DRYRUN;
+	}
+	if (metaioctl(MD_IOCATTACH, &att, &att.mde, NULL) != 0) {
+		/* In dryrun mode (DOIT not set) we must not alter the mddb */
+		if (options & MDCMD_DOIT) {
+			(void) del_key_name(sp, submirnp, ep);
+		}
+		return (mdstealerror(ep, &att.mde));
+	}
+
+	/* In dryrun mode (DOIT not set) we must not alter the mddb */
+	if (options & MDCMD_DOIT) {
+		/* clear cache */
+		meta_invalidate_name(mirnp);
+		meta_invalidate_name(submirnp);
+	}
+
+	/* let em know */
+	if (options & MDCMD_PRINT) {
+		(void) printf(dgettext(TEXT_DOMAIN,
+		    "%s: submirror %s %s\n"), mirnp->cname, submirnp->cname,
+		    (options & MDCMD_DOIT) ? "is attached" : "would attach");
+		(void) fflush(stdout);
+	}
+
+	/* return success */
+	return (0);
+}
+
+/*
+ * detach submirror
+ */
+int
+meta_mirror_detach(
+	mdsetname_t		*sp,
+	mdname_t		*mirnp,
+	mdname_t		*submirnp,
+	mdcmdopts_t		options,
+	md_error_t		*ep
+)
+{
+	int			force = ((options & MDCMD_FORCE) ? 1 : 0);
+	md_detach_params_t	detach;
+	md_set_desc		*sd;
+
+	/* should have same set */
+	assert(sp != NULL);
+	assert(sp->setno == MD_MIN2SET(meta_getminor(mirnp->dev)));
+
+	/* check name */
+	if (metachkmeta(mirnp, ep) != 0)
+		return (-1);
+
+	/*
+	 * In a MN set, the master always executes the detach command first.
+	 * Before the master executes the IOC_DETACH ioctl,
+	 * the master sends a message to all nodes to suspend writes to
+	 * this mirror.  Then the master executes the IOC_DETACH ioctl
+	 * which resumes writes to this mirror from the master node.
+	 * As each slave executes the detach command, each slave will
+	 * call the IOC_DETACH ioctl which will resume writes to this mirror
+	 * from that slave node.
+	 */
+	if (! metaislocalset(sp)) {
+		if ((sd = metaget_setdesc(sp, ep)) == NULL)
+			return (-1);
+		if ((MD_MNSET_DESC(sd)) && sd->sd_mn_am_i_master)
+			if (meta_mn_send_suspend_writes(
+			    meta_getminor(mirnp->dev), ep) != 0)
+				return (-1);
+	}
+
+	/* detach submirror */
+	(void) memset(&detach, 0, sizeof (detach));
+	detach.mnum = meta_getminor(mirnp->dev);
+	MD_SETDRIVERNAME(&detach, MD_MIRROR, sp->setno);
+	detach.submirror = submirnp->dev;
+	detach.force_detach = force;
+	if (metaioctl(MD_IOCDETACH, &detach, &detach.mde, NULL) != 0)
+		return (mdstealerror(ep, &detach.mde));
+
+	/* clear cache */
+	meta_invalidate_name(mirnp);
+	meta_invalidate_name(submirnp);
+
+	/* let em know */
+	if (options & MDCMD_PRINT) {
+		(void) printf(dgettext(TEXT_DOMAIN,
+		    "%s: submirror %s is detached\n"),
+		    mirnp->cname, submirnp->cname);
+		(void) fflush(stdout);
+	}
+
+	/* return success */
+	return (0);
+}
+
+/*
+ * get mirror parameters
+ */
+int
+meta_mirror_get_params(
+	mdsetname_t	*sp,
+	mdname_t	*mirnp,
+	mm_params_t	*paramsp,
+	md_error_t	*ep
+)
+{
+	md_mirror_t	*mirrorp;
+
+	/* should have a set */
+	assert(sp != NULL);
+	assert(sp->setno == MD_MIN2SET(meta_getminor(mirnp->dev)));
+
+	/* check name */
+	if (metachkmeta(mirnp, ep) != 0)
+		return (-1);
+
+	/* get unit */
+	if ((mirrorp = meta_get_mirror(sp, mirnp, ep)) == NULL)
+		return (-1);
+
+	/* return parameters */
+	(void) memset(paramsp, 0, sizeof (*paramsp));
+	paramsp->read_option = mirrorp->read_option;
+	paramsp->write_option = mirrorp->write_option;
+	paramsp->pass_num = mirrorp->pass_num;
+	return (0);
+}
+
+/*
+ * set mirror parameters
+ */
+int
+meta_mirror_set_params(
+	mdsetname_t		*sp,
+	mdname_t		*mirnp,
+	mm_params_t		*paramsp,
+	md_error_t		*ep
+)
+{
+	md_mirror_params_t	mmp;
+
+	/* should have a set */
+	assert(sp != NULL);
+	assert(sp->setno == MD_MIN2SET(meta_getminor(mirnp->dev)));
+
+	/* check name */
+	if (metachkmeta(mirnp, ep) != 0)
+		return (-1);
+
+	/* set parameters */
+	(void) memset(&mmp, 0, sizeof (mmp));
+	MD_SETDRIVERNAME(&mmp, MD_MIRROR, sp->setno);
+	mmp.mnum = meta_getminor(mirnp->dev);
+	mmp.params = *paramsp;
+	if (metaioctl(MD_IOCCHANGE, &mmp, &mmp.mde, mirnp->cname) != 0)
+		return (mdstealerror(ep, &mmp.mde));
+
+	/* clear cache */
+	meta_invalidate_name(mirnp);
+
+	/* return success */
+	return (0);
+}
+
+/*
+ * invalidate submirror names
+ */
+static int
+invalidate_submirrors(
+	mdsetname_t	*sp,
+	mdname_t	*mirnp,
+	md_error_t	*ep
+)
+{
+	md_mirror_t	*mirrorp;
+	uint_t		smi;
+
+	if ((mirrorp = meta_get_mirror(sp, mirnp, ep)) == NULL)
+		return (-1);
+	for (smi = 0; (smi < NMIRROR); ++smi) {
+		md_submirror_t	*mdsp = &mirrorp->submirrors[smi];
+		mdname_t	*submirnp = mdsp->submirnamep;
+
+		if (submirnp == NULL) {
+			assert(mdsp->state == SMS_UNUSED);
+			continue;
+		}
+		meta_invalidate_name(submirnp);
+	}
+	return (0);
+}
+
+/*
+ * replace mirror component
+ */
+int
+meta_mirror_replace(
+	mdsetname_t		*sp,
+	mdname_t		*mirnp,
+	mdname_t		*oldnp,
+	mdname_t		*newnp,
+	mdcmdopts_t		options,
+	md_error_t		*ep
+)
+{
+	md_mirror_t		*mirrorp;
+	uint_t			smi;
+	replace_params_t	params;
+	diskaddr_t		size, label, start_blk;
+	md_dev64_t		old_dev, new_dev;
+	diskaddr_t		new_start_blk, new_end_blk;
+	int			rebind;
+	md_set_desc		*sd;
+	char			*new_devidp = NULL;
+	int			ret;
+	md_error_t		xep = mdnullerror;
+
+	/* should have same set */
+	assert(sp != NULL);
+	assert(sp->setno == MD_MIN2SET(meta_getminor(mirnp->dev)));
+
+	/* check name */
+	if (metachkmeta(mirnp, ep) != 0)
+		return (-1);
+
+	/* save new binding incase this is a rebind where oldnp==newnp */
+	new_dev = newnp->dev;
+	new_start_blk = newnp->start_blk;
+	new_end_blk = newnp->end_blk;
+
+	/* invalidate, then get the mirror (fill in oldnp from metadb) */
+	meta_invalidate_name(mirnp);
+	if ((mirrorp = meta_get_mirror(sp, mirnp, ep)) == NULL)
+		return (-1);
+	for (smi = 0; (smi < NMIRROR); ++smi) {
+		md_submirror_t	*mdsp = &mirrorp->submirrors[smi];
+		mdname_t	*submirnp = mdsp->submirnamep;
+
+		if (submirnp == NULL) {
+			assert(mdsp->state == SMS_UNUSED);
+			continue;
+		}
+
+		if (! metaismeta(submirnp))
+			continue;
+
+		meta_invalidate_name(submirnp);
+		if (meta_get_unit(sp, submirnp, ep) == NULL)
+			return (-1);
+	}
+
+	/* the old device binding is now established */
+	if ((old_dev = oldnp->dev) == NODEV64)
+		return (mdsyserror(ep, ENODEV, oldnp->cname));
+
+	/*
+	 * check for the case where oldnp and newnp indicate the same
+	 * device, but the dev_t of the device has changed between old
+	 * and new.  This is called a rebind.  On entry the dev_t
+	 * represents the new device binding determined from the
+	 * filesystem (meta_getdev). After calling meta_get_unit
+	 * oldnp (and maybe newnp if this is a rebind) is updated based
+	 * to the old binding from the metadb (done by metakeyname).
+	 */
+	if ((strcmp(oldnp->rname, newnp->rname) == 0) &&
+	    (old_dev != new_dev)) {
+		rebind = 1;
+	} else {
+		rebind = 0;
+	}
+	if (rebind) {
+		newnp->dev = new_dev;
+		newnp->start_blk = new_start_blk;
+		newnp->end_blk = new_end_blk;
+	}
+
+	/*
+	 * Save a copy of the devid associated with the new disk, the reason
+	 * is that if we are rebinding then the call to meta_check_component()
+	 * will cause the devid of the disk to be overwritten with what is in
+	 * the replica namespace. The function that actually overwrites the
+	 * devid is dr2drivedesc().
+	 */
+	if (newnp->drivenamep->devid != NULL)
+		new_devidp = Strdup(newnp->drivenamep->devid);
+
+	/* if it's a multi-node diskset clear new_devidp */
+	if (!metaislocalset(sp)) {
+		if ((sd = metaget_setdesc(sp, ep)) == NULL)
+			return (-1);
+		if (MD_MNSET_DESC(sd))
+			new_devidp = NULL;
+	}
+
+	/* check it out (dup on rebind is ok) */
+	if (meta_check_component(sp, newnp, 0, ep) != 0) {
+		if ((! rebind) || (! mdisuseerror(ep, MDE_ALREADY))) {
+			Free(new_devidp);
+			return (-1);
+		}
+		mdclrerror(ep);
+	}
+	if ((size = metagetsize(newnp, ep)) == MD_DISKADDR_ERROR) {
+		Free(new_devidp);
+		return (-1);
+	}
+	if ((label = metagetlabel(newnp, ep)) == MD_DISKADDR_ERROR) {
+		Free(new_devidp);
+		return (-1);
+	}
+	if ((start_blk = metagetstart(sp, newnp, ep)) == MD_DISKADDR_ERROR) {
+		Free(new_devidp);
+		return (-1);
+	}
+	if (start_blk >= size) {
+		(void) mdsyserror(ep, ENOSPC, newnp->cname);
+		Free(new_devidp);
+		return (-1);
+	}
+
+	/*
+	 * Copy back the saved devid.
+	 */
+	Free(newnp->drivenamep->devid);
+	if (new_devidp != NULL) {
+		newnp->drivenamep->devid = Strdup(new_devidp);
+		Free(new_devidp);
+	}
+
+	/* store name in namespace, allocate new key */
+	if (add_key_name(sp, newnp, NULL, ep) != 0)
+		return (-1);
+
+	/*
+	 * In a MN set, the master always executes the replace command first.
+	 * Before the master executes the IOC_REPLACE ioctl, in non-DRYRUN mode
+	 * the master sends a message to all nodes to suspend writes to
+	 * this mirror.  Then the master executes the IOC_REPLACE ioctl
+	 * which resumes writes to this mirror from the master node.
+	 * As each slave executes the replace command, each slave will
+	 * call the IOC_REPLACE ioctl which will resume writes to this mirror
+	 * from that slave node.
+	 */
+	if (! metaislocalset(sp)) {
+		if ((MD_MNSET_DESC(sd)) && (options & MDCMD_DOIT) &&
+		    sd->sd_mn_am_i_master)
+			if (meta_mn_send_suspend_writes(
+			    meta_getminor(mirnp->dev), ep) != 0)
+				return (-1);
+	}
+
+	if (rebind && !metaislocalset(sp)) {
+		/*
+		 * We are 'rebind'ing a disk that is in a diskset so as well
+		 * as updating the diskset's namespace the local set needs
+		 * to be updated because it also contains a reference to
+		 * the disk in question.
+		 */
+		ret = meta_fixdevid(sp, DEV_UPDATE|DEV_LOCAL_SET,
+		    newnp->cname, ep);
+
+		if (ret != METADEVADM_SUCCESS) {
+			(void) del_key_name(sp, newnp, &xep);
+			return (-1);
+		}
+	}
+
+	/* replace component */
+	(void) memset(&params, 0, sizeof (params));
+	params.mnum = meta_getminor(mirnp->dev);
+	MD_SETDRIVERNAME(&params, MD_MIRROR, sp->setno);
+	params.cmd = REPLACE_COMP;
+	params.old_dev = old_dev;
+	params.new_dev = new_dev;
+	params.start_blk = start_blk;
+	params.has_label = ((label > 0) ? 1 : 0);
+	params.number_blks = size;
+	params.new_key = newnp->key;
+	/* Is this just a dryrun ? */
+	if ((options & MDCMD_DOIT) == 0) {
+		params.options |= MDIOCTL_DRYRUN;
+	}
+	if (metaioctl(MD_IOCREPLACE, &params, &params.mde, NULL) != 0) {
+		(void) del_key_name(sp, newnp, ep);
+		return (mdstealerror(ep, &params.mde));
+	}
+
+	/* clear cache */
+	meta_invalidate_name(oldnp);
+	meta_invalidate_name(newnp);
+	if (invalidate_submirrors(sp, mirnp, ep) != 0) {
+		meta_invalidate_name(mirnp);
+		return (-1);
+	}
+	meta_invalidate_name(mirnp);
+
+	/* let em know */
+	if (options & MDCMD_PRINT) {
+		(void) printf(dgettext(TEXT_DOMAIN,
+		    "%s: device %s is replaced with %s\n"),
+		    mirnp->cname, oldnp->cname, newnp->cname);
+		(void) fflush(stdout);
+	}
+
+	/* return success */
+	return (0);
+}
+
+/*
+ * enable mirror component
+ */
+int
+meta_mirror_enable(
+	mdsetname_t		*sp,
+	mdname_t		*mirnp,
+	mdname_t		*compnp,
+	mdcmdopts_t		options,
+	md_error_t		*ep
+)
+{
+	md_mirror_t		*mirrorp;
+	uint_t			smi;
+	replace_params_t	params;
+	diskaddr_t		size, label, start_blk;
+	md_dev64_t		fs_dev;
+	md_set_desc		*sd;
+	int			ret;
+
+	/* should have same set */
+	assert(sp != NULL);
+	assert(sp->setno == MD_MIN2SET(meta_getminor(mirnp->dev)));
+
+	/* check name */
+	if (metachkmeta(mirnp, ep) != 0)
+		return (-1);
+
+	/* get the file_system dev binding */
+	if (meta_getdev(sp, compnp, ep) != 0)
+		return (-1);
+	fs_dev = compnp->dev;
+
+	/* get the mirror unit (fill in compnp->dev with metadb version) */
+	meta_invalidate_name(mirnp);
+	if ((mirrorp = meta_get_mirror(sp, mirnp, ep)) == NULL)
+		return (-1);
+
+	for (smi = 0; (smi < NMIRROR); ++smi) {
+		md_submirror_t	*mdsp = &mirrorp->submirrors[smi];
+		mdname_t	*submirnp = mdsp->submirnamep;
+
+		if (submirnp == NULL) {
+			assert(mdsp->state == SMS_UNUSED);
+			continue;
+		}
+
+		if (! metaismeta(submirnp))
+			continue;
+
+		meta_invalidate_name(submirnp);
+		if (meta_get_unit(sp, submirnp, ep) == NULL)
+			return (-1);
+	}
+
+	/* the metadb device binding is now established */
+	if (compnp->dev == NODEV64)
+		return (mdsyserror(ep, ENODEV, compnp->cname));
+
+	/*
+	 * check for the case where the dev_t has changed between the
+	 * filesystem and the metadb.  This is called a rebind, and
+	 * is handled by meta_mirror_replace.
+	 */
+	if (fs_dev != compnp->dev) {
+		/* establish file system binding with invalid start/end */
+		compnp->dev = fs_dev;
+		compnp->start_blk = -1;
+		compnp->end_blk = -1;
+		return (meta_mirror_replace(sp, mirnp,
+		    compnp, compnp, options, ep));
+	}
+
+	/* setup mirror info */
+	(void) memset(&params, 0, sizeof (params));
+	params.mnum = meta_getminor(mirnp->dev);
+	MD_SETDRIVERNAME(&params, MD_MIRROR, sp->setno);
+	params.cmd = ENABLE_COMP;
+
+	/* check it out */
+	if (meta_check_component(sp, compnp, 0, ep) != 0) {
+		if (! mdisuseerror(ep, MDE_ALREADY))
+			return (-1);
+		mdclrerror(ep);
+	}
+
+	if ((size = metagetsize(compnp, ep)) == MD_DISKADDR_ERROR)
+		return (-1);
+	if ((label = metagetlabel(compnp, ep)) == MD_DISKADDR_ERROR)
+		return (-1);
+	if ((start_blk = metagetstart(sp, compnp, ep)) == MD_DISKADDR_ERROR)
+		return (-1);
+	if (start_blk >= size) {
+		(void) mdsyserror(ep, ENOSPC, compnp->cname);
+		return (-1);
+	}
+
+	/*
+	 * In a MN set, the master always executes the replace command first.
+	 * Before the master executes the IOC_REPLACE ioctl, in non-DRYRUN mode
+	 * the master sends a message to all nodes to suspend writes to
+	 * this mirror.  Then the master executes the IOC_REPLACE ioctl
+	 * which resumes writes to this mirror from the master node.
+	 * As each slave executes the replace command, each slave will
+	 * call the IOC_REPLACE ioctl which will resume writes to this mirror
+	 * from that slave node.
+	 */
+	if (! metaislocalset(sp)) {
+		if ((sd = metaget_setdesc(sp, ep)) == NULL)
+			return (-1);
+		if ((MD_MNSET_DESC(sd)) && (options & MDCMD_DOIT) &&
+		    sd->sd_mn_am_i_master)
+			if (meta_mn_send_suspend_writes(
+			    meta_getminor(mirnp->dev), ep) != 0)
+				return (-1);
+	}
+
+	/* enable component */
+	params.old_dev = compnp->dev;
+	params.new_dev = compnp->dev;
+	params.start_blk = start_blk;
+	params.has_label = ((label > 0) ? 1 : 0);
+	params.number_blks = size;
+
+	/* Is this just a dryrun ? */
+	if ((options & MDCMD_DOIT) == 0) {
+		params.options |= MDIOCTL_DRYRUN;
+	}
+	if (metaioctl(MD_IOCREPLACE, &params, &params.mde, NULL) != 0)
+		return (mdstealerror(ep, &params.mde));
+
+	/*
+	 * Are we dealing with a non-local set? If so need to update the
+	 * local namespace so that the disk record has the correct devid.
+	 */
+	if (!metaislocalset(sp)) {
+		ret = meta_fixdevid(sp, DEV_UPDATE|DEV_LOCAL_SET, compnp->cname,
+		    ep);
+
+		if (ret != METADEVADM_SUCCESS) {
+			/*
+			 * Failed to update the local set. Nothing to do here
+			 * apart from report the error. The namespace is
+			 * most likely broken and some form of remedial
+			 * recovery is going to be required.
+			 */
+			mde_perror(ep, "");
+			mdclrerror(ep);
+		}
+	}
+
+	/* clear cache */
+	meta_invalidate_name(compnp);
+	if (invalidate_submirrors(sp, mirnp, ep) != 0) {
+		meta_invalidate_name(mirnp);
+		return (-1);
+	}
+	meta_invalidate_name(mirnp);
+
+	/* let em know */
+	if (options & MDCMD_PRINT) {
+		(void) printf(dgettext(TEXT_DOMAIN,
+		    "%s: device %s is enabled\n"),
+		    mirnp->cname, compnp->cname);
+		(void) fflush(stdout);
+	}
+
+	/* return success */
+	return (0);
+}
+
+/*
+ * check for dups in the mirror itself
+ */
+static int
+check_twice(
+	md_mirror_t	*mirrorp,
+	uint_t		smi,
+	md_error_t	*ep
+)
+{
+	mdname_t	*mirnp = mirrorp->common.namep;
+	mdname_t	*thisnp;
+	uint_t		s;
+
+	thisnp = mirrorp->submirrors[smi].submirnamep;
+	for (s = 0; (s < smi); ++s) {
+		md_submirror_t	*mdsp = &mirrorp->submirrors[s];
+		mdname_t	*submirnp = mdsp->submirnamep;
+
+		if (submirnp == NULL)
+			continue;
+
+		if (meta_check_overlap(mirnp->cname, thisnp, 0, -1,
+		    submirnp, 0, -1, ep) != 0) {
+			return (-1);
+		}
+	}
+	return (0);
+}
+
+/*
+ * check mirror
+ */
+int
+meta_check_mirror(
+	mdsetname_t	*sp,
+	md_mirror_t	*mirrorp,
+	mdcmdopts_t	options,
+	md_error_t	*ep
+)
+{
+	mdname_t	*mirnp = mirrorp->common.namep;
+	int		force = ((options & MDCMD_FORCE) ? 1 : 0);
+	int		doit = ((options & MDCMD_DOIT) ? 1 : 0);
+	uint_t		nsm = 0;
+	uint_t		smi;
+
+	/* check submirrors */
+	for (smi = 0; (smi < NMIRROR); ++smi) {
+		md_submirror_t	*mdsp = &mirrorp->submirrors[smi];
+		mdname_t	*submirnp = mdsp->submirnamep;
+
+		if (submirnp == NULL)
+			continue;
+		++nsm;
+	}
+	if (nsm < 1) {
+		return (mdmderror(ep, MDE_BAD_MIRROR,
+		    meta_getminor(mirnp->dev), mirnp->cname));
+	}
+	for (smi = 0; (smi < NMIRROR); ++smi) {
+		md_submirror_t	*mdsp = &mirrorp->submirrors[smi];
+		mdname_t	*submirnp = mdsp->submirnamep;
+		diskaddr_t	size;
+
+		/* skip unused submirrors */
+		if (submirnp == NULL) {
+			if (mdsp->state != SMS_UNUSED) {
+				return (mdmderror(ep, MDE_BAD_MIRROR,
+				    meta_getminor(mirnp->dev), mirnp->cname));
+			}
+			continue;
+		}
+
+		/* check submirror */
+		if (doit) {
+			if (meta_check_submirror(sp, submirnp, NULL, force,
+			    ep) != 0)
+				return (-1);
+			if ((size = metagetsize(submirnp, ep)) ==
+			    MD_DISKADDR_ERROR) {
+				return (-1);
+			} else if (size == 0) {
+				return (mdsyserror(ep, ENOSPC,
+					submirnp->cname));
+			}
+		}
+
+		/* check this mirror too */
+		if (check_twice(mirrorp, smi, ep) != 0)
+			return (-1);
+	}
+
+	/* check read option */
+	switch (mirrorp->read_option) {
+	case RD_LOAD_BAL:
+	case RD_GEOMETRY:
+	case RD_FIRST:
+		break;
+	default:
+		return (mderror(ep, MDE_BAD_RD_OPT, mirnp->cname));
+	}
+
+	/* check write option */
+	switch (mirrorp->write_option) {
+	case WR_PARALLEL:
+	case WR_SERIAL:
+		break;
+	default:
+		return (mderror(ep, MDE_BAD_WR_OPT, mirnp->cname));
+	}
+
+	/* check pass number */
+	if ((mirrorp->pass_num < 0) || (mirrorp->pass_num > MD_PASS_MAX))
+		return (mderror(ep, MDE_BAD_PASS_NUM, mirnp->cname));
+
+	/* return success */
+	return (0);
+}
+
+/*
+ * setup mirror geometry
+ */
+static int
+mirror_geom(
+	md_mirror_t	*mirrorp,
+	mm_unit_t	*mm,
+	md_error_t	*ep
+)
+{
+	uint_t		write_reinstruct = 0;
+	uint_t		read_reinstruct = 0;
+	uint_t		round_cyl = 1;
+	mdname_t	*smnp = NULL;
+	uint_t		smi;
+	mdgeom_t	*geomp;
+
+	/* get worst reinstructs */
+	for (smi = 0; (smi < NMIRROR); ++smi) {
+		md_submirror_t	*mdsp = &mirrorp->submirrors[smi];
+		mdname_t	*submirnp = mdsp->submirnamep;
+
+		if (submirnp == NULL)
+			continue;
+
+		if ((geomp = metagetgeom(submirnp, ep)) == NULL)
+			return (-1);
+		if (geomp->write_reinstruct > write_reinstruct)
+			write_reinstruct = geomp->write_reinstruct;
+		if (geomp->read_reinstruct > read_reinstruct)
+			read_reinstruct = geomp->read_reinstruct;
+
+		if (smnp == NULL)
+			smnp = submirnp;
+	}
+
+	/* setup geometry from first submirror */
+	assert(smnp != NULL);
+	if ((geomp = metagetgeom(smnp, ep)) == NULL)
+		return (-1);
+	if (meta_setup_geom((md_unit_t *)mm, mirrorp->common.namep, geomp,
+	    write_reinstruct, read_reinstruct, round_cyl, ep) != 0)
+		return (-1);
+
+	/* return success */
+	return (0);
+}
+
+/*
+ * create mirror
+ */
+int
+meta_create_mirror(
+	mdsetname_t	*sp,
+	md_mirror_t	*mirrorp,
+	mdcmdopts_t	options,
+	md_error_t	*ep
+)
+{
+	mdname_t	*mirnp = mirrorp->common.namep;
+	mm_unit_t	*mm;
+	diskaddr_t	submir_size = MD_DISKADDR_ERROR;
+	ushort_t	nsm = 0;
+	uint_t		smi;
+	mdnamelist_t	*keynlp = NULL;
+	md_set_params_t	set_params;
+	int		rval = -1;
+	md_timeval32_t	creation_time;
+	int		create_flag = MD_CRO_32BIT;
+
+	/* validate mirror */
+	if (meta_check_mirror(sp, mirrorp, options, ep) != 0)
+		return (-1);
+
+
+	/* allocate mirror unit */
+	mm = Zalloc(sizeof (*mm));
+
+	if (meta_gettimeofday(&creation_time) == -1)
+		return (mdsyserror(ep, errno, NULL));
+
+	/* do submirrors */
+	for (smi = 0; (smi < NMIRROR); ++smi) {
+		md_submirror_t	*mdsp = &mirrorp->submirrors[smi];
+		mdname_t	*submirnp = mdsp->submirnamep;
+		mm_submirror_t	*mmsp = &mm->un_sm[smi];
+		diskaddr_t	size;
+
+		/* skip unused submirrors */
+		if (submirnp == NULL) {
+			assert(mdsp->state == SMS_UNUSED);
+			continue;
+		}
+		++nsm;
+
+		/* get size */
+		if ((size = metagetsize(submirnp, ep)) == MD_DISKADDR_ERROR)
+			goto out;
+		assert(size > 0);
+
+		/* adjust for smallest submirror */
+		if (submir_size == MD_DISKADDR_ERROR) {
+			submir_size = size;
+		} else if (size < submir_size) {
+			submir_size = size;
+		}
+
+		if (options & MDCMD_DOIT) {
+			/* store name in namespace */
+			if (add_key_name(sp, submirnp, &keynlp, ep) != 0)
+				goto out;
+		}
+
+		/* setup submirror */
+		mmsp->sm_key = submirnp->key;
+		mmsp->sm_dev = submirnp->dev;
+		mmsp->sm_state = SMS_RUNNING;
+		mmsp->sm_timestamp = creation_time;
+	}
+
+	/* setup unit */
+	mm->c.un_type = MD_METAMIRROR;
+	MD_SID(mm) = meta_getminor(mirnp->dev);
+	mm->c.un_actual_tb = submir_size;
+	mm->c.un_size = offsetof(mm_unit_t, un_smic);
+	mm->un_nsm = nsm;
+	mm->un_read_option = mirrorp->read_option;
+	mm->un_write_option = mirrorp->write_option;
+	mm->un_pass_num = mirrorp->pass_num;
+	if (mirror_geom(mirrorp, mm, ep) != 0)
+		goto out;
+
+	/* fill in the size of the mirror */
+	if (options & MDCMD_UPDATE) {
+		mirrorp->common.size = mm->c.un_total_blocks;
+	}
+
+	/* if we're not doing anything, return success */
+	if (! (options & MDCMD_DOIT)) {
+		rval = 0;	/* success */
+		goto out;
+	}
+
+	/* create mirror */
+	(void) memset(&set_params, 0, sizeof (set_params));
+	/* did the user tell us to generate a large device? */
+	create_flag = meta_check_devicesize(mm->c.un_total_blocks);
+	if (create_flag == MD_CRO_64BIT) {
+		mm->c.un_revision = MD_64BIT_META_DEV;
+		set_params.options = MD_CRO_64BIT;
+	} else {
+		mm->c.un_revision = MD_32BIT_META_DEV;
+		set_params.options = MD_CRO_32BIT;
+	}
+	set_params.mnum = MD_SID(mm);
+	set_params.size = mm->c.un_size;
+	set_params.mdp = (uintptr_t)mm;
+	MD_SETDRIVERNAME(&set_params, MD_MIRROR, MD_MIN2SET(set_params.mnum));
+	if (metaioctl(MD_IOCSET, &set_params, &set_params.mde,
+	    mirnp->cname) != 0) {
+		(void) mdstealerror(ep, &set_params.mde);
+		goto out;
+	}
+	rval = 0;	/* success */
+
+	/* cleanup, return success */
+out:
+	Free(mm);
+	if (rval != 0) {
+		(void) del_key_names(sp, keynlp, NULL);
+	}
+	metafreenamelist(keynlp);
+	if ((rval == 0) && (options & MDCMD_DOIT)) {
+		if (invalidate_submirrors(sp, mirnp, ep) != 0)
+			rval = -1;
+		meta_invalidate_name(mirnp);
+	}
+	return (rval);
+}
+
+/*
+ * initialize mirror
+ * NOTE: this functions is metainit(1m)'s command line parser!
+ */
+int
+meta_init_mirror(
+	mdsetname_t	**spp,
+	int		argc,
+	char		*argv[],
+	mdcmdopts_t	options,
+	md_error_t	*ep
+)
+{
+	char		*uname = argv[0];
+	mdname_t	*mirnp = NULL;
+	int		old_optind;
+	int		c;
+	md_mirror_t	*mirrorp = NULL;
+	uint_t		smi;
+	int		rval = -1;
+
+	/* get mirror name */
+	assert(argc > 0);
+	if (argc < 1)
+		goto syntax;
+	if ((mirnp = metaname(spp, uname, ep)) == NULL)
+		goto out;
+	assert(*spp != NULL);
+	uname = mirnp->cname;
+	if (metachkmeta(mirnp, ep) != 0)
+		goto out;
+
+	if (!(options & MDCMD_NOLOCK)) {
+		/* grab set lock */
+		if (meta_lock(*spp, TRUE, ep) != 0)
+			goto out;
+
+		if (meta_check_ownership(*spp, ep) != 0)
+			goto out;
+	}
+
+	/* see if it exists already */
+	if (metagetmiscname(mirnp, ep) != NULL) {
+		(void) mdmderror(ep, MDE_UNIT_ALREADY_SETUP,
+		    meta_getminor(mirnp->dev), uname);
+		goto out;
+	} else if (! mdismderror(ep, MDE_UNIT_NOT_SETUP)) {
+		goto out;
+	} else {
+		mdclrerror(ep);
+	}
+	--argc, ++argv;
+
+	/* grab -m */
+	if ((argc < 1) || (strcmp(argv[0], "-m") != 0))
+		goto syntax;
+	--argc, ++argv;
+
+	if (argc == 0)
+		goto syntax;
+
+	/* parse general options */
+	optind = 0;
+	opterr = 0;
+	if (getopt(argc, argv, "") != -1)
+		goto options;
+
+	/* allocate mirror */
+	mirrorp = Zalloc(sizeof (*mirrorp));
+
+	/* setup common */
+	mirrorp->common.namep = mirnp;
+	mirrorp->common.type = MD_METAMIRROR;
+
+	/* parse submirrors */
+	for (smi = 0; ((argc > 0) && (argv[0][0] != '-') &&
+	    (! isdigit(argv[0][0]))); ++smi) {
+		md_submirror_t	*mdsm = &mirrorp->submirrors[smi];
+		mdname_t	*submirnamep;
+
+		/* check for room */
+		if (smi >= NMIRROR) {
+			(void) mdmderror(ep, MDE_MIRROR_FULL,
+			    meta_getminor(mirnp->dev), uname);
+			goto out;
+		}
+
+		/* parse submirror name */
+		if ((submirnamep = metaname(spp, argv[0], ep)) == NULL)
+			goto out;
+		mdsm->submirnamep = submirnamep;
+		--argc, ++argv;
+	}
+	if (smi == 0) {
+		(void) mdmderror(ep, MDE_NSUBMIRS, meta_getminor(mirnp->dev),
+					uname);
+		goto out;
+	}
+
+	/* dangerous n-way mirror creation */
+	if ((smi > 1) && (options & MDCMD_PRINT)) {
+		md_eprintf(dgettext(TEXT_DOMAIN,
+"%s: WARNING: This form of metainit is not recommended.\n"
+"The submirrors may not have the same data.\n"
+"Please see ERRORS in metainit(1M) for additional information.\n"),
+		    uname);
+	}
+
+	/* parse mirror options */
+	mirrorp->read_option = RD_LOAD_BAL;
+	mirrorp->write_option = WR_PARALLEL;
+	mirrorp->pass_num = MD_PASS_DEFAULT;
+	old_optind = optind = 0;
+	opterr = 0;
+	while ((c = getopt(argc, argv, "grS")) != -1) {
+		switch (c) {
+		case 'g':
+			if (mirrorp->read_option != RD_LOAD_BAL) {
+				(void) mderror(ep, MDE_BAD_RD_OPT, uname);
+				goto out;
+			}
+			mirrorp->read_option = RD_GEOMETRY;
+			break;
+
+		case 'r':
+			if (mirrorp->read_option != RD_LOAD_BAL) {
+				(void) mderror(ep, MDE_BAD_RD_OPT, uname);
+				goto out;
+			}
+			mirrorp->read_option = RD_FIRST;
+			break;
+
+		case 'S':
+			if (mirrorp->write_option != WR_PARALLEL) {
+				(void) mderror(ep, MDE_BAD_WR_OPT, uname);
+				goto out;
+			}
+			mirrorp->write_option = WR_SERIAL;
+			break;
+
+		default:
+			argc -= old_optind;
+			argv += old_optind;
+			goto options;
+		}
+		old_optind = optind;
+	}
+	argc -= optind;
+	argv += optind;
+
+	/* parse pass number */
+	if ((argc > 0) && (isdigit(argv[0][0]))) {
+		if (name_to_pass_num(uname, argv[0],
+		    &mirrorp->pass_num, ep) != 0) {
+			goto out;
+		}
+		--argc, ++argv;
+	}
+
+	/* we should be at the end */
+	if (argc != 0)
+		goto syntax;
+
+	/* create mirror */
+	if (meta_create_mirror(*spp, mirrorp, options, ep) != 0)
+		goto out;
+	rval = 0;	/* success */
+
+	/* let em know */
+	if (options & MDCMD_PRINT) {
+		(void) printf(dgettext(TEXT_DOMAIN,
+		    "%s: Mirror is setup\n"),
+		    uname);
+		(void) fflush(stdout);
+	}
+	goto out;
+
+	/* syntax error */
+syntax:
+	rval = meta_cook_syntax(ep, MDE_SYNTAX, uname, argc, argv);
+	goto out;
+
+	/* options error */
+options:
+	rval = meta_cook_syntax(ep, MDE_OPTION, uname, argc, argv);
+	goto out;
+
+	/* cleanup, return error */
+out:
+	if (mirrorp != NULL)
+		meta_free_mirror(mirrorp);
+	return (rval);
+}
+
+/*
+ * reset mirrors
+ */
+int
+meta_mirror_reset(
+	mdsetname_t	*sp,
+	mdname_t	*mirnp,
+	mdcmdopts_t	options,
+	md_error_t	*ep
+)
+{
+	md_mirror_t	*mirrorp;
+	uint_t		smi;
+	int		rval = -1;
+
+	/* should have same set */
+	assert(sp != NULL);
+	assert((mirnp == NULL) ||
+	    (sp->setno == MD_MIN2SET(meta_getminor(mirnp->dev))));
+
+	/* reset all mirrors */
+	if (mirnp == NULL) {
+		mdnamelist_t	*mirrornlp = NULL;
+		mdnamelist_t	*p;
+
+		/* for each mirror */
+		rval = 0;
+		if (meta_get_mirror_names(sp, &mirrornlp, 0, ep) < 0)
+			return (-1);
+		for (p = mirrornlp; (p != NULL); p = p->next) {
+			/* reset mirror */
+			mirnp = p->namep;
+			/*
+			 * If this is a multi-node set, we send a series
+			 * of individual metaclear commands.
+			 */
+			if (meta_is_mn_set(sp, ep)) {
+				if (meta_mn_send_metaclear_command(sp,
+				    mirnp->cname, options, 0, ep) != 0) {
+					rval = -1;
+					break;
+				}
+			} else {
+				if (meta_mirror_reset(sp, mirnp, options,
+				    ep) != 0) {
+					rval = -1;
+					break;
+				}
+			}
+		}
+
+		/* cleanup return success */
+		metafreenamelist(mirrornlp);
+		return (rval);
+	}
+
+	/* check name */
+	if (metachkmeta(mirnp, ep) != 0)
+		return (-1);
+
+	/* get unit structure */
+	if ((mirrorp = meta_get_mirror(sp, mirnp, ep)) == NULL)
+		return (-1);
+
+	/* make sure nobody owns us */
+	if (MD_HAS_PARENT(mirrorp->common.parent)) {
+		return (mdmderror(ep, MDE_IN_USE, meta_getminor(mirnp->dev),
+		    mirnp->cname));
+	}
+
+	/* clear subdevices cache */
+	if (invalidate_submirrors(sp, mirnp, ep) != 0)
+		return (-1);
+
+	/* clear metadevice */
+	if (meta_reset(sp, mirnp, options, ep) != 0)
+		goto out;
+	rval = 0;	/* success */
+
+	/* let em know */
+	if (options & MDCMD_PRINT) {
+		(void) printf(dgettext(TEXT_DOMAIN,
+		    "%s: Mirror is cleared\n"), mirnp->cname);
+		(void) fflush(stdout);
+	}
+
+	/* clear subdevices */
+	if (! (options & MDCMD_RECURSE))
+		goto out;
+	for (smi = 0; (smi < NMIRROR); ++smi) {
+		md_submirror_t	*mdsp = &mirrorp->submirrors[smi];
+		mdname_t	*submirnp = mdsp->submirnamep;
+
+		/* skip unused submirrors */
+		if (submirnp == NULL) {
+			assert(mdsp->state == SMS_UNUSED);
+			continue;
+		}
+
+		/* make sure we have a metadevice */
+		if (! metaismeta(submirnp))
+			continue;
+
+		/* clear submirror */
+		if (meta_reset_by_name(sp, submirnp, options, ep) != 0)
+			rval = -1;
+	}
+
+	/* cleanup, return success */
+out:
+	meta_invalidate_name(mirnp);
+	return (rval);
+}
+
+/*
+ * reports TRUE if any mirror component is in error
+ */
+int
+meta_mirror_anycomp_is_err(mdsetname_t *sp, mdnamelist_t *mirror_names)
+{
+	mdnamelist_t	*nlp;
+	md_error_t	  status	= mdnullerror;
+	md_error_t	 *ep		= &status;
+	int		  any_errs	= FALSE;
+
+	for (nlp = mirror_names; nlp; nlp = nlp->next) {
+		md_mirror_t	*mirrorp;
+		int		 smi;
+
+		if ((mirrorp = meta_get_mirror(sp, nlp->namep, ep)) == NULL) {
+			any_errs |= TRUE;
+			goto out;
+		}
+
+		for (smi = 0; smi < NMIRROR; ++smi) {
+			md_submirror_t	*mdsp = &mirrorp->submirrors[smi];
+
+			if (mdsp->state &
+			    (SMS_COMP_ERRED|SMS_ATTACHED|SMS_OFFLINE)) {
+				any_errs |= TRUE;
+				goto out;
+			}
+		}
+	}
+out:
+	if (!mdisok(ep))
+		mdclrerror(ep);
+
+	return (any_errs);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_mirror_resync.c b/usr/src/lib/lvm/libmeta/common/meta_mirror_resync.c
new file mode 100644
index 0000000000..f833ce5c3e
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_mirror_resync.c
@@ -0,0 +1,658 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+/*
+ * mirror operations
+ */
+
+#include <meta.h>
+#include <sys/lvm/md_mirror.h>
+#include <thread.h>
+
+extern	int	md_in_daemon;
+extern md_mn_client_list_t *mdmn_clients;
+
+/*
+ * chain of mirrors
+ */
+typedef struct mm_unit_list {
+	struct mm_unit_list	*next;	/* next in chain */
+	mdname_t		*namep;	/* mirror name */
+	mm_pass_num_t		pass;	/* pass number */
+	uint_t			done;	/* resync done */
+} mm_unit_list_t;
+
+/*
+ * resync mirror
+ * meta_lock for this set should be held on entry.
+ */
+int
+meta_mirror_resync(
+	mdsetname_t		*sp,
+	mdname_t		*mirnp,
+	daddr_t			size,
+	md_error_t		*ep,
+	md_resync_cmd_t		cmd	/* Start/Block/Unblock/Kill */
+)
+{
+	char			*miscname;
+	md_resync_ioctl_t	ri;
+
+	/* should have a set */
+	assert(sp != NULL);
+	assert(sp->setno == MD_MIN2SET(meta_getminor(mirnp->dev)));
+
+	/* make sure we have a mirror */
+	if ((miscname = metagetmiscname(mirnp, ep)) == NULL)
+		return (-1);
+	if (strcmp(miscname, MD_MIRROR) != 0) {
+		return (mdmderror(ep, MDE_NOT_MM, meta_getminor(mirnp->dev),
+		    mirnp->cname));
+	}
+
+	/* start resync */
+	(void) memset(&ri, 0, sizeof (ri));
+	MD_SETDRIVERNAME(&ri, MD_MIRROR, sp->setno);
+	ri.ri_mnum = meta_getminor(mirnp->dev);
+	ri.ri_copysize = size;
+	switch (cmd) {
+	case MD_RESYNC_FORCE_MNSTART:
+		ri.ri_flags |= MD_RI_RESYNC_FORCE_MNSTART;
+		break;
+	case MD_RESYNC_START:
+		ri.ri_flags = 0;
+		break;
+	case MD_RESYNC_BLOCK:
+		ri.ri_flags = MD_RI_BLOCK;
+		break;
+	case MD_RESYNC_UNBLOCK:
+		ri.ri_flags = MD_RI_UNBLOCK;
+		break;
+	case MD_RESYNC_KILL:
+		ri.ri_flags = MD_RI_KILL;
+		break;
+	case MD_RESYNC_KILL_NO_WAIT:
+		ri.ri_flags = MD_RI_KILL | MD_RI_NO_WAIT;
+		break;
+	default:
+		/* TODO: Add new error MDE_BAD_RESYNC_FLAGS */
+		return (mderror(ep, MDE_BAD_RESYNC_OPT, mirnp->cname));
+	}
+
+	if (metaioctl(MD_IOCSETSYNC, &ri, &ri.mde, mirnp->cname) != 0)
+		return (mdstealerror(ep, &ri.mde));
+
+	/* return success */
+	return (0);
+}
+
+/*
+ * free units
+ */
+static void
+free_units(
+	mm_unit_list_t	*mirrors[MD_PASS_MAX + 1]
+)
+{
+	uint_t		i;
+
+	for (i = 0; (i < (MD_PASS_MAX + 1)); ++i) {
+		mm_unit_list_t	*p, *n;
+
+		for (p = mirrors[i], n = NULL; (p != NULL); p = n) {
+			n = p->next;
+			Free(p);
+		}
+		mirrors[i] = NULL;
+	}
+}
+
+/*
+ * setup_units:	build lists of units for each pass
+ */
+static int
+setup_units(
+	mdsetname_t	*sp,
+	mm_unit_list_t	*mirrors[MD_PASS_MAX + 1],
+	md_error_t	*ep
+)
+{
+	mdnamelist_t	*mirrornlp = NULL;
+	mdnamelist_t	*p;
+	int		rval = 0;
+
+	/* should have a set */
+	assert(sp != NULL);
+
+	/* for each mirror */
+	if (meta_get_mirror_names(sp, &mirrornlp, 0, ep) < 0)
+		return (-1);
+	for (p = mirrornlp; (p != NULL); p = p->next) {
+		md_mirror_t	*mirrorp;
+		mm_unit_list_t	*lp;
+
+		/* get unit structure */
+		if ((mirrorp = meta_get_mirror(sp, p->namep, ep)) == NULL) {
+			rval = -1;	/* record, but ignore errors */
+			continue;
+		}
+
+		/* save info */
+		lp = Zalloc(sizeof (*lp));
+		lp->namep = p->namep;
+		lp->pass = mirrorp->pass_num;
+		if ((lp->pass < 0) || (lp->pass > MD_PASS_MAX))
+			lp->pass = MD_PASS_MAX;
+
+		/* put on list */
+		lp->next = mirrors[lp->pass];
+		mirrors[lp->pass] = lp;
+	}
+
+	/* cleanup, return error */
+	metafreenamelist(mirrornlp);
+	return (rval);
+}
+
+/*
+ * resync all mirrors (in background)
+ */
+int
+meta_mirror_resync_all(
+	mdsetname_t	*sp,
+	daddr_t		size,
+	md_error_t	*ep
+)
+{
+	mm_unit_list_t	*mirrors[MD_PASS_MAX + 1];
+	mm_pass_num_t	pass, max_pass;
+	int		rval = 0, fval;
+
+	/* should have a set */
+	assert(sp != NULL);
+
+	/* get mirrors */
+	(void) memset(mirrors, 0, sizeof (mirrors));
+	if (setup_units(sp, mirrors, ep) != 0)
+		rval = -1;
+
+	/* fork a process */
+	if ((fval = md_daemonize(sp, ep)) != 0) {
+		/*
+		 * md_daemonize will fork off a process.  The is the
+		 * parent or error.
+		 */
+		if (fval > 0) {
+			free_units(mirrors);
+			return (0);
+		}
+		mdclrerror(ep);
+	}
+	/*
+	 * Closing stdin/out/err here.
+	 * In case this was called thru rsh, the calling process on the other
+	 * side will know, it doesn't have to wait until all the resyncs have
+	 * finished.
+	 * Also initialise the rpc client pool so that this process will use
+	 * a unique pool of clients. If we don't do this, all of the forked
+	 * clients will end up using the same pool of clients which can result
+	 * in hung clients.
+	 */
+	if (meta_is_mn_set(sp, ep)) {
+		(void) close(0);
+		(void) close(1);
+		(void) close(2);
+		mdmn_clients = NULL;
+	}
+	assert((fval == 0) || (fval == -1));
+
+	/*
+	 * Determine which pass level is the highest that contains mirrors to
+	 * resync. We only need to wait for completion of earlier levels below
+	 * this high watermark. If all mirrors are at the same pass level
+	 * there is no requirement to wait for completion.
+	 */
+
+	max_pass = 1;
+	for (pass = MD_PASS_MAX; pass > 1; --pass) {
+		if (mirrors[pass] != NULL) {
+			max_pass = pass;
+			break;
+		}
+	}
+
+	/*
+	 * max_pass now contains the highest pass-level with resyncable mirrors
+	 */
+
+	/* do passes */
+	for (pass = 1; (pass <= MD_PASS_MAX); ++pass) {
+		int			dispatched = 0;
+		unsigned		howlong = 1;
+		mm_unit_list_t		*lp;
+
+		/* skip empty passes */
+		if (mirrors[pass] == NULL)
+			continue;
+
+		/* dispatch all resyncs in pass */
+		for (lp = mirrors[pass]; (lp != NULL); lp = lp->next) {
+			if (meta_is_mn_set(sp, ep)) {
+				if (meta_mn_send_setsync(sp, lp->namep,
+				    size, ep) != 0) {
+					rval = -1;
+					lp->done = 1;
+				} else {
+					++dispatched;
+				}
+			} else {
+				if (meta_mirror_resync(sp, lp->namep, size, ep,
+				    MD_RESYNC_START) != 0) {
+					rval = -1;
+					lp->done = 1;
+				} else {
+					++dispatched;
+				}
+			}
+		}
+
+		/*
+		 * Wait for them to finish iff we are at a level lower than
+		 * max_pass. This orders the resyncs into distinct levels.
+		 * I.e. level 2 resyncs won't start until all level 1 ones
+		 * have completed.
+		 */
+		if (pass == max_pass)
+			continue;
+
+		howlong = 1;
+		while (dispatched > 0) {
+
+			/* wait a while */
+			(void) sleep(howlong);
+
+			/* see if any finished */
+			for (lp = mirrors[pass]; lp != NULL; lp = lp->next) {
+				md_resync_ioctl_t	ri;
+
+				if (lp->done)
+					continue;
+
+				(void) memset(&ri, '\0', sizeof (ri));
+				ri.ri_mnum = meta_getminor(lp->namep->dev);
+				MD_SETDRIVERNAME(&ri, MD_MIRROR, sp->setno);
+				if (metaioctl(MD_IOCGETSYNC, &ri, &ri.mde,
+				    lp->namep->cname) != 0) {
+					(void) mdstealerror(ep, &ri.mde);
+					rval = -1;
+					lp->done = 1;
+					--dispatched;
+				} else if (! (ri.ri_flags & MD_RI_INPROGRESS)) {
+					lp->done = 1;
+					--dispatched;
+				}
+			}
+
+			/* wait a little longer next time */
+			if (howlong < 10)
+				++howlong;
+		}
+	}
+
+	/* cleanup, return success */
+	free_units(mirrors);
+	if (fval == 0)  /* we are the child process so exit */
+		exit(0);
+	return (rval);
+}
+
+/*
+ * meta_mn_mirror_resync_all:
+ * -------------------------
+ * Resync all mirrors associated with given set (arg). Called when master
+ * node is adding a node to a diskset.  Only want to initiate the resync on
+ * the current node.
+ */
+void *
+meta_mn_mirror_resync_all(void *arg)
+{
+	set_t		setno = *((set_t *)arg);
+	mdsetname_t	*sp;
+	mm_unit_list_t	*mirrors[MD_PASS_MAX + 1];
+	mm_pass_num_t	pass, max_pass;
+	md_error_t	mde = mdnullerror;
+	int		fval;
+
+
+	/* should have a set */
+	assert(setno != NULL);
+
+	if ((sp = metasetnosetname(setno, &mde)) == NULL) {
+		mde_perror(&mde, "");
+		return (NULL);
+	}
+
+	if (!(meta_is_mn_set(sp, &mde))) {
+		mde_perror(&mde, "");
+		return (NULL);
+	}
+
+	/* fork a process */
+	if ((fval = md_daemonize(sp, &mde)) != 0) {
+		/*
+		 * md_daemonize will fork off a process.  The is the
+		 * parent or error.
+		 */
+		if (fval > 0) {
+			return (NULL);
+		}
+		mde_perror(&mde, "");
+		return (NULL);
+	}
+	/*
+	 * Child process should never return back to rpc.metad, but
+	 * should exit.
+	 * Flush all internally cached data inherited from parent process
+	 * since cached data will be cleared when parent process RPC request
+	 * has completed (which is possibly before this child process
+	 * can complete).
+	 * Child process can retrieve and cache its own copy of data from
+	 * rpc.metad that won't be changed by the parent process.
+	 *
+	 * Reset md_in_daemon since this child will be a client of rpc.metad
+	 * not part of the rpc.metad daemon itself.
+	 * md_in_daemon is used by rpc.metad so that libmeta can tell if
+	 * this thread is rpc.metad or any other thread.  (If this thread
+	 * was rpc.metad it could use some short circuit code to get data
+	 * directly from rpc.metad instead of doing an RPC call to rpc.metad).
+	 */
+	md_in_daemon = 0;
+	metaflushsetname(sp);
+	sr_cache_flush_setno(setno);
+	if ((sp = metasetnosetname(setno, &mde)) == NULL) {
+		mde_perror(&mde, "");
+		md_exit(sp, 1);
+	}
+
+	if (meta_lock(sp, TRUE, &mde) != 0) {
+		mde_perror(&mde, "");
+		md_exit(sp, 1);
+	}
+
+	/*
+	 * Closing stdin/out/err here.
+	 */
+	(void) close(0);
+	(void) close(1);
+	(void) close(2);
+	assert(fval == 0);
+
+	/* get mirrors */
+	(void) memset(mirrors, 0, sizeof (mirrors));
+	if (setup_units(sp, mirrors, &mde) != 0) {
+		(void) meta_unlock(sp, &mde);
+		md_exit(sp, 1);
+	}
+
+	/*
+	 * Determine which pass level is the highest that contains mirrors to
+	 * resync. We only need to wait for completion of earlier levels below
+	 * this high watermark. If all mirrors are at the same pass level
+	 * there is no requirement to wait for completion.
+	 */
+	max_pass = 1;
+	for (pass = MD_PASS_MAX; pass > 1; --pass) {
+		if (mirrors[pass] != NULL) {
+			max_pass = pass;
+			break;
+		}
+	}
+
+	/*
+	 * max_pass now contains the highest pass-level with resyncable mirrors
+	 */
+	/* do passes */
+	for (pass = 1; (pass <= MD_PASS_MAX); ++pass) {
+		int			dispatched = 0;
+		unsigned		howlong = 1;
+		mm_unit_list_t		*lp;
+
+		/* skip empty passes */
+		if (mirrors[pass] == NULL)
+			continue;
+
+		/* dispatch all resyncs in pass */
+		for (lp = mirrors[pass]; (lp != NULL); lp = lp->next) {
+			if (meta_mirror_resync(sp, lp->namep, 0, &mde,
+			    MD_RESYNC_FORCE_MNSTART) != 0) {
+				mdclrerror(&mde);
+				lp->done = 1;
+			} else {
+				++dispatched;
+			}
+		}
+
+		/*
+		 * Wait for them to finish iff we are at a level lower than
+		 * max_pass. This orders the resyncs into distinct levels.
+		 * I.e. level 2 resyncs won't start until all level 1 ones
+		 * have completed.
+		 */
+		if (pass == max_pass)
+			continue;
+
+		howlong = 1;
+		while (dispatched > 0) {
+
+			/* wait a while */
+			(void) sleep(howlong);
+
+			/* see if any finished */
+			for (lp = mirrors[pass]; lp != NULL; lp = lp->next) {
+				md_resync_ioctl_t	ri;
+
+				if (lp->done)
+					continue;
+
+				(void) memset(&ri, '\0', sizeof (ri));
+				ri.ri_mnum = meta_getminor(lp->namep->dev);
+				MD_SETDRIVERNAME(&ri, MD_MIRROR, sp->setno);
+				if (metaioctl(MD_IOCGETSYNC, &ri, &ri.mde,
+				    lp->namep->cname) != 0) {
+					mdclrerror(&mde);
+					lp->done = 1;
+					--dispatched;
+				} else if (! (ri.ri_flags & MD_RI_INPROGRESS)) {
+					lp->done = 1;
+					--dispatched;
+				}
+			}
+
+			/* wait a little longer next time */
+			if (howlong < 10)
+				++howlong;
+		}
+	}
+
+	/* cleanup, return success */
+	free_units(mirrors);
+	(void) meta_unlock(sp, &mde);
+	md_exit(sp, 0);
+	/*NOTREACHED*/
+}
+
+/*
+ * meta_mirror_resync_process:
+ * --------------------------
+ * Modify any resync that is in progress on this node for the given set.
+ *
+ * Input Parameters:
+ *	sp	setname to scan for mirrors
+ *	cmd	action to take:
+ *		MD_RESYNC_KILL	- kill all resync threads
+ *		MD_RESYNC_BLOCK	- block all resync threads
+ *		MD_RESYNC_UNBLOCK - resume all resync threads
+ * Output Parameters
+ *	ep	error return structure
+ *
+ * meta_lock for this set should be held on entry.
+ */
+static void
+meta_mirror_resync_process(mdsetname_t *sp, md_error_t *ep, md_resync_cmd_t cmd)
+{
+	mm_unit_list_t	*mirrors[MD_PASS_MAX + 1];
+	mm_pass_num_t	pass;
+
+	/* Grab all the mirrors from the set (if any) */
+	(void) memset(mirrors, 0, sizeof (mirrors));
+	if (setup_units(sp, mirrors, ep) != 0)
+		return;
+
+	/* do passes */
+	for (pass = 1; (pass <= MD_PASS_MAX); ++pass) {
+		mm_unit_list_t		*lp;
+
+		/* skip empty passes */
+		if (mirrors[pass] == NULL)
+			continue;
+
+		/* Process all resyncs in pass */
+		for (lp = mirrors[pass]; (lp != NULL); lp = lp->next) {
+			(void) meta_mirror_resync(sp, lp->namep, 0, ep,
+			    cmd);
+		}
+	}
+
+	/* Clear up mirror units */
+	free_units(mirrors);
+}
+
+/*
+ * meta_mirror_resync_process_all:
+ * ------------------------------
+ * Issue the given resync command to all mirrors contained in all multi-node
+ * sets.
+ *
+ * Input Parameters:
+ *	cmd	- MD_RESYNC_KILL, MD_RESYNC_BLOCK, MD_RESYNC_UNBLOCK
+ */
+static void
+meta_mirror_resync_process_all(md_resync_cmd_t cmd)
+{
+	set_t		setno, max_sets;
+	md_error_t	mde = mdnullerror;
+	mdsetname_t	*this_sp;
+	md_set_desc	*sd;
+
+	/*
+	 * Traverse all sets looking for multi-node capable ones.
+	 */
+	max_sets = get_max_sets(&mde);
+	for (setno = 1; setno < max_sets; setno++) {
+		mde = mdnullerror;
+		if (this_sp = metasetnosetname(setno, &mde)) {
+			if ((sd = metaget_setdesc(this_sp, &mde)) == NULL)
+				continue;
+			if (!MD_MNSET_DESC(sd))
+				continue;
+
+			if (meta_lock(this_sp, TRUE, &mde)) {
+				continue;
+			}
+			meta_mirror_resync_process(this_sp, &mde, cmd);
+			(void) meta_unlock(this_sp, &mde);
+		}
+	}
+}
+
+/*
+ * meta_mirror_resync_kill_all:
+ * ---------------------------
+ * Abort any resync that is in progress on this node. Scan all sets for all
+ * mirrors.
+ * Note: this routine is provided for future use. For example to kill all
+ *	 resyncs on a node this could be used as long as the
+ *	 mddoors / rpc.mdcommd tuple is running on all members of the cluster.
+ */
+void
+meta_mirror_resync_kill_all(void)
+{
+	meta_mirror_resync_process_all(MD_RESYNC_KILL);
+}
+
+/*
+ * meta_mirror_resync_block_all:
+ * ----------------------------
+ * Block all resyncs that are in progress. This causes the resync state to
+ * freeze on this machine, and can be resumed by calling
+ * meta_mirror_resync_unblock_all.
+ */
+void
+meta_mirror_resync_block_all(void)
+{
+	meta_mirror_resync_process_all(MD_RESYNC_BLOCK);
+}
+
+/*
+ * meta_mirror_resync_unblock_all:
+ * ------------------------------
+ * Unblock all previously blocked resync threads on this node.
+ */
+void
+meta_mirror_resync_unblock_all(void)
+{
+	meta_mirror_resync_process_all(MD_RESYNC_UNBLOCK);
+}
+
+/*
+ * meta_mirror_resync_unblock:
+ * --------------------------
+ * Unblock any previously blocked resync threads for the given set.
+ * meta_lock for this set should be held on entry.
+ */
+void
+meta_mirror_resync_unblock(mdsetname_t *sp)
+{
+	md_error_t	mde = mdnullerror;
+
+	meta_mirror_resync_process(sp, &mde, MD_RESYNC_UNBLOCK);
+}
+
+/*
+ * meta_mirror_resync_kill:
+ * -----------------------
+ * Kill any resync threads running on mirrors in the given set.
+ * Called when releasing a set (meta_set_prv.c`halt_set)
+ */
+void
+meta_mirror_resync_kill(mdsetname_t *sp)
+{
+	md_error_t	mde = mdnullerror;
+
+	meta_mirror_resync_process(sp, &mde, MD_RESYNC_KILL);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_mn_changelog.c b/usr/src/lib/lvm/libmeta/common/meta_mn_changelog.c
new file mode 100644
index 0000000000..bd9b5cc508
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_mn_changelog.c
@@ -0,0 +1,636 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <wait.h>
+#include <sys/time.h>
+#include <meta.h>
+#include <metad.h>
+#include <mdmn_changelog.h>
+#include <syslog.h>
+#include <umem.h>
+
+/*
+ * Number of log entries per set.
+ *
+ * We want at least 4 spares available at all times
+ * in case new classes are added during a live upgrade.
+ *
+ * Allocate the entries in chunks of 16
+ */
+#define	MDMN_LOGRECS_QUANTA	16
+#define	MDMN_LOGRECS_MINSPARES	4
+#define	MDMN_LOGHDR_SIZE	sizeof (mdmn_changelog_record_t)
+#define	MDMN_LOGRECSIZE	(MDMN_LOGHDR_SIZE + MD_MN_MSG_MAXDATALEN)
+#define	MDMN_LOGRECSIZE_OD	sizeof (mdmn_changelog_record_od_t)
+#define	MDMN_LOGRECS_TRIMUP	((MD_MN_NCLASSES % MDMN_LOGRECS_QUANTA) > \
+				(MDMN_LOGRECS_QUANTA - MDMN_LOGRECS_MINSPARES))
+
+static int	mdmn_commitlog(md_set_desc *, md_error_t *);
+static int	mdmn_log_it(set_t, md_error_t *, mdmn_changelog_record_t *lr);
+
+
+/* Global variables */
+
+mdmn_changelog_record_t	*mdmn_changelog[MD_MAXSETS];
+int mdmn_changelog_snarfed[MD_MAXSETS];
+
+/* Total number of log records */
+int mdmn_logrecs = (MDMN_LOGRECS_QUANTA +
+		((MD_MN_NCLASSES/MDMN_LOGRECS_QUANTA) * MDMN_LOGRECS_QUANTA));
+
+#ifdef DEBUG
+void
+dump_rec(char *fn_name, mdmn_changelog_record_t *lr)
+{
+	syslog(LOG_DEBUG, "%s incore: selfid 0x%x class %d flags %d "
+	    "msglen %d\n", fn_name, lr->lr_selfid, lr->lr_class,
+	    lr->lr_flags, lr->lr_msglen);
+}
+void
+dump_rec_od(char *fn_name, mdmn_changelog_record_od_t *lr)
+{
+	syslog(LOG_DEBUG, "%s ondisk: selfid 0x%x class %d flags %d "
+	    "msglen %d\n", fn_name, lr->lr_selfid, lr->lr_class,
+	    lr->lr_flags, lr->lr_msglen);
+}
+
+void
+dump_array(char *fn_name, set_t setno)
+{
+	int i;
+	char tchar[80];
+
+	mdmn_changelog_record_t *tlr;
+
+	for (i = 0; i < mdmn_logrecs; i++) {
+		tlr = &mdmn_changelog[setno][i];
+		(void) snprintf(tchar, sizeof (tchar), "%s class %d ",
+		    fn_name, i);
+		dump_rec(tchar, tlr);
+	}
+}
+#endif
+
+/*
+ * copy_changelog: copies changelog ondisk<->incore records.
+ * The argument "direction" controls the direction to copy the
+ * the records. Incore and ondisk changlog structures must be
+ * allocated when calling this routine.
+ *
+ * The purpose of changelog is to store a message that is in progress.
+ * Therefore the changlog structure embeds the message structure.
+ * Incore and ondisk changelog structures are created to handle the
+ * incore and ondisk message formats. The incore message has a pointer
+ * to the payload. The ondisk message format has payload embedded as
+ * part of the message.
+ *
+ * Caveat Emptor: Incore and ondisk structures have the payload buffers
+ * correctly allocated.
+ */
+
+static void
+copy_changelog(mdmn_changelog_record_t *incp,
+		mdmn_changelog_record_od_t *odp, int direction)
+{
+	assert(incp != NULL && odp != NULL);
+	assert((direction == MD_MN_COPY_TO_ONDISK) ||
+	    (direction == MD_MN_COPY_TO_INCORE));
+
+	if (direction == MD_MN_COPY_TO_ONDISK) {
+		odp->lr_revision = incp->lr_revision;
+		odp->lr_flags = incp->lr_flags;
+		odp->lr_selfid = incp->lr_selfid;
+		odp->lr_class = incp->lr_class;
+		odp->lr_msglen = incp->lr_msglen;
+		if (incp->lr_msglen)
+			copy_msg_1(&incp->lr_msg, &odp->lr_od_msg, direction);
+	} else {
+		incp->lr_revision = odp->lr_revision;
+		incp->lr_flags = odp->lr_flags;
+		incp->lr_selfid = odp->lr_selfid;
+		incp->lr_class = odp->lr_class;
+		incp->lr_msglen = odp->lr_msglen;
+		if (odp->lr_msglen)
+			copy_msg_1(&incp->lr_msg, &odp->lr_od_msg, direction);
+	}
+}
+
+/*
+ * mdmn_allocate_changelog
+ *
+ * Changelog records are allocated on a per multi-node basis.
+ * This routine is called during MN set creation.
+ * It pre-allocates the changelog, as user records
+ * one per message class plus some spares.
+ * Once the records are allocated they are never freed until
+ * the mddb is deleted. The preallocation ensures that all nodes
+ * will have a consistent view of the mddb.
+ *
+ * Each record is large enough to hold a maximum sized message
+ * Return Values:
+ *	0 - success
+ *	-1 - fail
+ */
+int
+mdmn_allocate_changelog(mdsetname_t *sp, md_error_t *ep)
+{
+	mddb_userreq_t		req;
+	md_set_desc		*sd;
+	mdmn_changelog_record_t	*tlr;
+	int			i;
+	set_t			setno;
+
+	/* Get a pointer to the incore md_set_desc for this MN set */
+	if ((sd = metaget_setdesc(sp, ep)) == NULL)
+		return (-1);
+	setno = sd->sd_setno;
+	/*
+	 * Round up the number of changelog records
+	 * to the next value of MDMN_LOGRECS_QUANTA
+	 *
+	 * In all cases, make sure we have at least
+	 * four more entries than the number of classes
+	 * in order to provide space for live upgrades that
+	 * might add classes.
+	 */
+
+	mdmn_logrecs += (MDMN_LOGRECS_TRIMUP) ? MDMN_LOGRECS_QUANTA : 0;
+
+	mdmn_changelog[setno] = Zalloc(MDMN_LOGHDR_SIZE * mdmn_logrecs);
+
+	for (i = 0; i < mdmn_logrecs; i++) {
+		(void) memset(&req, 0, sizeof (req));
+		METAD_SETUP_LR(MD_DB_CREATE, setno,  0);
+		/* grab a record big enough for max message size */
+		req.ur_size = MDMN_LOGRECSIZE_OD;
+
+		if (metaioctl(MD_MN_DB_USERREQ, &req, &req.ur_mde, NULL) != 0) {
+			(void) mdstealerror(ep, &req.ur_mde);
+#ifdef DEBUG
+			syslog(LOG_DEBUG, "allocate_log: %s\n",
+						mde_sperror(ep, ""));
+#endif
+			Free(mdmn_changelog[setno]);
+			return (-1);
+		}
+
+		tlr = &mdmn_changelog[setno][i];
+		tlr->lr_selfid = req.ur_recid;
+		tlr->lr_revision = MD_MN_CHANGELOG_RECORD_REVISION;
+		tlr->lr_class = i;
+	}
+
+	/* commit class, and selfid */
+	(void) mdmn_commitlog(sd, ep);
+	Free(mdmn_changelog[setno]);
+	return (0);
+}
+
+/*
+ * mdmn_reset_changelog
+ *
+ * Called during reconfig step 2.
+ * The only time the changelog is reset is when all nodes in a cluster
+ * are starting up. In this case changelog must be ignored, therefore
+ * it is reset.
+ *
+ * The function frees the incore data structures and zeros out the
+ * records. The ondisk records are never freed.
+ *
+ * Return Values:
+ *	0 - success
+ *	-1 - fail
+ */
+int
+mdmn_reset_changelog(mdsetname_t *sp, md_error_t *ep, int flag)
+{
+	md_set_desc		*sd;
+	mdmn_changelog_record_t	*lr;
+	set_t			setno;
+	int			lrc;
+
+	/* Get a pointer to the incore md_set_desc this MN set */
+	if ((sd = metaget_setdesc(sp, ep)) == NULL)
+		return (-1);
+
+	setno = sd->sd_setno;
+
+	if (mdmn_snarf_changelog(setno, ep) == 0) {
+		return (0);
+	}
+
+	if (flag & MDMN_CLF_RESETLOG) {
+		for (lrc = 0; lrc < mdmn_logrecs; lrc++) {
+			lr = &mdmn_changelog[setno][lrc];
+			Free(lr->lr_msg.msg_event_data);
+			(void) memset(&lr->lr_msg, 0, sizeof (md_mn_msg_t));
+			lr->lr_msglen = 0;
+			lr->lr_flags = 0;
+		}
+		(void) mdmn_commitlog(sd, ep);
+#ifdef DEBUG
+		syslog(LOG_DEBUG, "reset_changelog: Log reset\n");
+#endif
+	}
+	/* now zap the array */
+	if (flag & MDMN_CLF_RESETCACHE) {
+#ifdef DEBUG
+		syslog(LOG_DEBUG, "reset_changelog: cache reset\n");
+#endif
+		Free(&mdmn_changelog[setno]);
+		mdmn_changelog[setno] = NULL;
+		mdmn_changelog_snarfed[setno] = 0;
+	}
+	return (0);
+}
+
+/*
+ * Log a given message in the changelog.
+ * This function is only executed by the master node
+ * Return Values:
+ *	MDMNE_NULL:
+ *	    success, the log slot is free
+ *
+ *	MDMNE_ACK:
+ *	    success,
+ *	    the log slot is occupied with the same msg from a previous try.
+ *
+ *	MDMNE_CLASS_BUSY:
+ *	    This means the appropriate slot is occupied with a different
+ *	    message. In that case the stored message needs being replayed,
+ *	    while the current message will be rejected with MDMNE_CLASS_BUSY
+ *	    to the initiator.
+ *
+ *	MDMNE_LOG_FAIL:
+ *	    Bad things happend, cannot continue.
+ */
+int
+mdmn_log_msg(md_mn_msg_t *msg)
+{
+	set_t		setno;
+	md_mn_msgclass_t	class;
+	mdmn_changelog_record_t	*lr;
+	md_error_t		err = mdnullerror;
+	md_error_t		*ep = &err;
+	int			retval = 0;
+
+	setno = msg->msg_setno;
+	class = mdmn_get_message_class(msg->msg_type);
+
+	/* if not snarfed, snarf it */
+	if (mdmn_snarf_changelog(setno, ep) <= 0) {
+		syslog(LOG_DAEMON | LOG_ERR, dgettext(TEXT_DOMAIN,
+		    "log_msg: No records snarfed\n"));
+		return (-1);
+	}
+
+
+	/* log entry for the class */
+	lr = &mdmn_changelog[setno][class];
+
+	/* Check if the class is occupied */
+	if (lr->lr_flags & MD_MN_LR_INUSE) {
+		if (!MSGID_CMP(&(msg->msg_msgid), &(lr->lr_msg.msg_msgid))) {
+			syslog(LOG_DAEMON | LOG_DEBUG, dgettext(TEXT_DOMAIN,
+			    "log_msg: id mismatch:\n"
+			    " stored    : ID = (%d, 0x%llx-%d)"
+			    " setno %d class %d type %d\n"
+			    " msg to log: ID = (%d, 0x%llx-%d)"
+			    " setno %d class %d type %d.\n"),
+			    MSGID_ELEMS(lr->lr_msg.msg_msgid), lr->lr_setno,
+			    lr->lr_class, lr->lr_msgtype,
+			    MSGID_ELEMS(msg->msg_msgid), msg->msg_setno, class,
+			    msg->msg_type);
+			return (MDMNE_CLASS_BUSY);
+		} else {
+			syslog(LOG_DAEMON | LOG_DEBUG, dgettext(TEXT_DOMAIN,
+			    "log_msg: msgid already logged:\n ID = "
+			    " (%d, 0x%llx-%d) setno %d class %d type %d\n"),
+			    MSGID_ELEMS(lr->lr_msg.msg_msgid), lr->lr_setno,
+			    lr->lr_class, lr->lr_msgtype);
+			return (MDMNE_ACK);
+		}
+	}
+
+	lr->lr_flags |= MD_MN_LR_INUSE;
+	lr->lr_msglen = MD_MN_MSG_LEN(msg);
+	assert(lr->lr_msg.msg_event_data == NULL);
+	if (msg->msg_event_size)
+		lr->lr_msg.msg_event_data = Zalloc(msg->msg_event_size);
+	(void) copy_msg(msg, &(lr->lr_msg));
+	retval = mdmn_log_it(setno, ep, lr);
+	if (retval != 0) {
+		syslog(LOG_DAEMON | LOG_ERR, dgettext(TEXT_DOMAIN,
+		    "mdmn_log_msg - failure committing logged msg to disk\n"));
+		return (MDMNE_LOG_FAIL);
+	}
+
+	return (MDMNE_NULL); /* this is good */
+}
+
+/*
+ * mdmn_unlog_msg(md_mn_msg_t *)
+ *
+ * Clear the log entry holding the indicated message.
+ * Only the set master can do this.
+ *
+ * Return Values:
+ *	0 - success
+ *	-1 - fail
+ */
+int
+mdmn_unlog_msg(md_mn_msg_t *msg)
+{
+	set_t			setno;
+	md_mn_msgclass_t	class;
+	md_error_t		err = mdnullerror;
+	md_error_t		*ep = &err;
+	int			retval = 0;
+	mdmn_changelog_record_t	*lr = NULL;
+
+	setno = msg->msg_setno;
+	class = mdmn_get_message_class(msg->msg_type);
+
+	/* Find the log entry holding the indicated message */
+	if (mdmn_snarf_changelog(setno, ep) == 0)
+		return (-1);
+
+	lr = &mdmn_changelog[setno][class];
+
+	/* assert the message is still logged */
+	assert(lr != NULL);
+	if (!MSGID_CMP(&(msg->msg_msgid), &(lr->lr_msg.msg_msgid))) {
+		syslog(LOG_ERR, dgettext(TEXT_DOMAIN,
+		"unlog_msg: msgid mismatch\n"
+		"\t\tstored: ID = (%d, 0x%llx-%d) setno %d class %d type %d\n"
+		"\t\tattempting to unlog:\n"
+		"\t\tID = (%d, 0x%llx-%d) setno %d class %d type %d.\n"),
+		MSGID_ELEMS(lr->lr_msg.msg_msgid), lr->lr_setno,
+		lr->lr_class, lr->lr_msgtype, MSGID_ELEMS(msg->msg_msgid),
+		msg->msg_setno, class, msg->msg_type);
+		return (-1);
+	}
+	lr->lr_msglen = 0;
+	lr->lr_flags &= ~(MD_MN_LR_INUSE);
+	if (lr->lr_msg.msg_event_data) {
+		Free(lr->lr_msg.msg_event_data);
+		lr->lr_msg.msg_event_data = NULL;
+	}
+	/* commit the updated log record to disk */
+	retval = mdmn_log_it(setno, ep, lr);
+#ifdef DEBUG
+	dump_rec("mdmn_unlog_msg: ", lr);
+#endif
+	return (retval);
+}
+
+
+/*
+ * mdmn_get_changelogrec(set_t , md_mn_msgclass_t)
+ * Returns a pointer to incore changelog record.
+ *
+ * Return Values:
+ *	non-NULL - success
+ *	NULL - fail
+ */
+mdmn_changelog_record_t *
+mdmn_get_changelogrec(set_t setno, md_mn_msgclass_t class)
+{
+	md_error_t	err = mdnullerror;
+
+	if (mdmn_snarf_changelog(setno, &err) == 0)
+		return (NULL);
+	assert(mdmn_changelog[setno] != NULL);
+
+	return (&mdmn_changelog[setno][class]);
+}
+
+/*
+ * mdmn_commitlog(md_set_desc *, md_error_t *)
+ *
+ * Commit the set record and all of the changelog entry records to disk.
+ * Don't bother with other stuff hanging off the set record
+ * (e.g. drive records) since none of that is changing.
+ * Called only at changelog pre-allocation time or when flushing a log.
+ *
+ * Return Values:
+ *	0 - success
+ *	errno - fail
+ */
+
+static int
+mdmn_commitlog(md_set_desc *sd, md_error_t *ep)
+{
+	int			lrc;
+	int			*recs;
+	uint_t			size;
+	mdmn_changelog_record_t	*lr;
+	mdmn_changelog_record_od_t clodrec; /* changelog ondisk record */
+	mddb_userreq_t		req;
+	int			retval = 0;
+	set_t			setno;
+
+	/* Check for master and bounce non-master requests */
+	if (!(MD_MNSET_DESC(sd)) || !sd->sd_mn_am_i_master) {
+		if (!(MD_MNSET_DESC(sd))) {
+			syslog(LOG_DAEMON | LOG_ERR, dgettext(TEXT_DOMAIN,
+					"mdmn_commitlog - Not MN Set\n"));
+		} else {
+			syslog(LOG_DAEMON | LOG_ERR, dgettext(TEXT_DOMAIN,
+				"mdmn_commit_log - Not Master\n"));
+		}
+		return (-1);
+	}
+	(void) memset(&req, 0, sizeof (req));
+	/* create the records to commit the info to the mddb */
+
+	size = (mdmn_logrecs + 1) * sizeof (int);
+	recs = Zalloc(size);
+	/* Initialize the log entry records for update */
+	setno = sd->sd_setno;
+
+	for (lrc = 0; lrc < mdmn_logrecs; lrc++) {
+		lr = &mdmn_changelog[setno][lrc];
+		recs[lrc] = lr->lr_selfid;
+		copy_changelog(lr, &clodrec, MD_MN_COPY_TO_ONDISK);
+		METAD_SETUP_LR(MD_DB_SETDATA, setno, lr->lr_selfid);
+		req.ur_size  = MDMN_LOGRECSIZE_OD;
+		req.ur_data = (uint64_t)&clodrec;
+		if ((retval = metaioctl(MD_MN_DB_USERREQ, &req, &req.ur_mde,
+							    NULL)) != 0) {
+			(void) mdstealerror(ep, &req.ur_mde);
+#ifdef DEBUG
+			syslog(LOG_DAEMON|LOG_DEBUG,
+			    "mdmn_commitlog - metaioctl SETDATA failure\n%s",
+			    mde_sperror(ep, ""));
+#endif
+			break;
+		}
+	}
+
+	if (retval == 0) {
+		/* set last rec to be 0 to indicate completion */
+		recs[lrc] = 0;
+		/* Commit to mddb  on disk */
+		METAD_SETUP_LR(MD_DB_COMMIT_MANY, setno,
+					mdmn_changelog[setno][0].lr_selfid);
+		req.ur_size = size;
+		req.ur_data = (uint64_t)recs;
+		if ((retval = metaioctl(MD_MN_DB_USERREQ, &req,
+						&req.ur_mde, NULL)) != 0) {
+			(void) mdstealerror(ep, &req.ur_mde);
+#ifdef DEBUG
+			syslog(LOG_DAEMON|LOG_DEBUG,
+					"mdmn_commitlog - metaioctl COMMIT_MANY"
+					"Failure\n%s",  mde_sperror(ep, ""));
+#endif
+		}
+	}
+
+	Free(recs);
+	return (retval);
+}
+
+/*
+ * mdmn_log_it(set_t, md_error_t *, mdmn_changelog_record_t *)
+ *
+ * Commit the changed log record to disk.
+ *
+ * Return Values:
+ *	0 - success
+ *	-1 - fail
+ */
+static int
+mdmn_log_it(set_t set, md_error_t *ep, mdmn_changelog_record_t *lr)
+{
+	int			*recs;
+	uint_t			size;
+	mddb_userreq_t		req;
+	mdmn_changelog_record_od_t	clodrec;
+
+	(void) memset(&req, 0, sizeof (req));
+
+	/* Initialize the log entry record for update */
+
+	copy_changelog(lr, &clodrec, MD_MN_COPY_TO_ONDISK);
+	METAD_SETUP_LR(MD_DB_SETDATA, set, lr->lr_selfid);
+	req.ur_size = MDMN_LOGRECSIZE_OD;
+	req.ur_data = (uint64_t)&clodrec;
+	if (metaioctl(MD_MN_DB_USERREQ, &req, &req.ur_mde, NULL) != 0) {
+		(void) mdstealerror(ep, &req.ur_mde);
+#ifdef DEBUG
+		syslog(LOG_DEBUG, "mdmn_log_it: DB_SETDATA  failed\n"
+		    "set %d selfid %d, size %d\n%s", set, lr->lr_selfid,
+		    req.ur_size, mde_sperror(ep, ""));
+#endif
+		return (-1);
+	}
+	/* Set up the recid to be updated */
+	size = 2 * sizeof (int); /* the changed record, plus null terminator */
+	recs = Zalloc(size);
+	recs[0] = lr->lr_selfid;
+	recs[1] = 0;
+	/* Commit to mddb  on disk */
+	METAD_SETUP_LR(MD_DB_COMMIT_ONE, set, lr->lr_selfid);
+	req.ur_size = size;
+	req.ur_data = (uint64_t)recs;
+	if (metaioctl(MD_MN_DB_USERREQ, &req, &req.ur_mde, NULL) != 0) {
+		(void) mdstealerror(ep, &req.ur_mde);
+#ifdef DEBUG
+		syslog(LOG_DEBUG, "mdmn_log_it: DB_COMMIT_ONE  failed\n"
+		    "set %d selfid %d, size %d\n%s", set, lr->lr_selfid,
+		    req.ur_size, mde_sperror(ep, ""));
+#endif
+		Free(recs);
+		return (-1);
+	}
+	Free(recs);
+	return (0);
+}
+
+/*
+ * mdmn_snarf_changelog(set_t, md_error_t *)
+ *
+ * snarf in the changelog entries and allocate incore structures
+ * if required.
+ * mdmn_changelog_snarfed array if set to MDMN_CLF_SNARFED, then
+ * then the records are already snarfed.
+ *
+ * Called from set_snarf(), mdmn_log_msg(), and mdmn_unlog_msg()
+ * Return Values:
+ *	non-zero - success
+ *	0 - fail
+ */
+int
+mdmn_snarf_changelog(set_t set, md_error_t *ep)
+{
+	mdmn_changelog_record_t	 *tlr;
+	mdmn_changelog_record_od_t	 *lr;
+	mddb_recid_t		id;
+	md_mn_msgclass_t	class;
+
+
+	if (set == MD_LOCAL_SET)
+		return (0);
+
+	id = 0;
+
+	if (mdmn_changelog_snarfed[set] & MDMN_CLF_SNARFED) {
+		assert(mdmn_changelog[set] != NULL);
+		return (mdmn_logrecs);
+	}
+
+	lr = (mdmn_changelog_record_od_t *)get_ur_rec(set, MD_UR_GET_NEXT,
+						    MDDB_UR_LR, &id, ep);
+	if (lr == NULL)
+		return (0);
+
+	/* only allocate if Log records exist */
+
+	if (mdmn_changelog[set] == NULL) {
+		/* Allocate incore state for the log */
+		mdmn_changelog[set] = Zalloc(MDMN_LOGHDR_SIZE *
+			mdmn_logrecs);
+	}
+
+	do {
+		class = lr->lr_class;
+		tlr = &mdmn_changelog[set][class];
+		copy_changelog(tlr, lr, MD_MN_COPY_TO_INCORE);
+		Free(lr);
+		lr = (mdmn_changelog_record_od_t *)get_ur_rec(set,
+		    MD_UR_GET_NEXT, MDDB_UR_LR, &id, ep);
+	} while (lr != NULL);
+
+	/* Since log records counts are fixed return that value */
+	mdmn_changelog_snarfed[set] |= MDMN_CLF_SNARFED;
+	return (mdmn_logrecs);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_mn_comm.c b/usr/src/lib/lvm/libmeta/common/meta_mn_comm.c
new file mode 100644
index 0000000000..02ad7bf1e6
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_mn_comm.c
@@ -0,0 +1,984 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <wait.h>
+#include <sys/time.h>
+#include <strings.h>
+#include <meta.h>
+#include <syslog.h>
+
+extern md_mn_msg_tbl_entry_t  msg_table[];
+
+/*
+ * When contacting the local rpc.mdcommd we always want to do that using
+ * the IPv4 version of localhost.
+ */
+#define	LOCALHOST_IPv4	"127.0.0.1"
+
+md_mn_msgclass_t
+mdmn_get_message_class(md_mn_msgtype_t msgtype)
+{
+	return (msg_table[msgtype].mte_class);
+}
+
+void (*
+mdmn_get_handler(md_mn_msgtype_t msgtype))
+	(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *res)
+{
+	return (msg_table[msgtype].mte_handler);
+}
+
+int (*
+mdmn_get_submessage_generator(md_mn_msgtype_t msgtype))
+	(md_mn_msg_t *msg, md_mn_msg_t **msglist)
+{
+	return (msg_table[msgtype].mte_smgen);
+}
+
+time_t
+mdmn_get_timeout(md_mn_msgtype_t msgtype)
+{
+	return (msg_table[msgtype].mte_timeout);
+}
+
+
+void
+ldump_msg(char *prefix, md_mn_msg_t *msg)
+{
+	(void) fprintf(stderr, "%s &msg   = 0x%x\n", prefix, (uint_t)msg);
+	(void) fprintf(stderr, "%s ID     = (%d, 0x%llx-%d)\n", prefix,
+	    MSGID_ELEMS(msg->msg_msgid));
+	(void) fprintf(stderr, "%s sender = %d\n", prefix, msg->msg_sender);
+	(void) fprintf(stderr, "%s flags  = 0x%x\n", prefix, msg->msg_flags);
+	(void) fprintf(stderr, "%s setno  = %d\n", prefix, msg->msg_setno);
+	(void) fprintf(stderr, "%s type   = %d\n", prefix, msg->msg_type);
+	(void) fprintf(stderr, "%s size   = %d\n", prefix, msg->msg_event_size);
+}
+
+
+/* Default timeout can be changed using clnt_control() */
+static struct timeval TIMEOUT = { 25, 0 };
+
+md_mn_result_t *
+mdmn_send_1(argp, clnt)
+	md_mn_msg_t *argp;
+	CLIENT *clnt;
+{
+	md_mn_result_t *clnt_res = Zalloc(sizeof (md_mn_result_t));
+
+	if (clnt_call(clnt, mdmn_send,
+		(xdrproc_t)xdr_md_mn_msg_t, (caddr_t)argp,
+		(xdrproc_t)xdr_md_mn_result_t, (caddr_t)clnt_res,
+		TIMEOUT) != RPC_SUCCESS) {
+		return (NULL);
+	}
+	return (clnt_res);
+}
+
+int *
+mdmn_work_1(argp, clnt)
+	md_mn_msg_t *argp;
+	CLIENT *clnt;
+{
+	int *clnt_res = Zalloc(sizeof (int));
+
+	if (clnt_call(clnt, mdmn_work,
+		(xdrproc_t)xdr_md_mn_msg_t, (caddr_t)argp,
+		(xdrproc_t)xdr_int, (caddr_t)clnt_res,
+		TIMEOUT) != RPC_SUCCESS) {
+		Free(clnt_res);
+		return (NULL);
+	}
+	return (clnt_res);
+}
+
+int *
+mdmn_wakeup_initiator_1(argp, clnt)
+	md_mn_result_t *argp;
+	CLIENT *clnt;
+{
+	int *clnt_res = Zalloc(sizeof (int));
+
+	if (clnt_call(clnt, mdmn_wakeup_initiator,
+		(xdrproc_t)xdr_md_mn_result_t, (caddr_t)argp,
+		(xdrproc_t)xdr_int, (caddr_t)clnt_res,
+		TIMEOUT) != RPC_SUCCESS) {
+		Free(clnt_res);
+		return (NULL);
+	}
+	return (clnt_res);
+}
+
+int *
+mdmn_wakeup_master_1(argp, clnt)
+	md_mn_result_t *argp;
+	CLIENT *clnt;
+{
+	int *clnt_res = Zalloc(sizeof (int));
+
+	if (clnt_call(clnt, mdmn_wakeup_master,
+		(xdrproc_t)xdr_md_mn_result_t, (caddr_t)argp,
+		(xdrproc_t)xdr_int, (caddr_t)clnt_res,
+		TIMEOUT) != RPC_SUCCESS) {
+		Free(clnt_res);
+		return (NULL);
+	}
+	return (clnt_res);
+}
+
+int *
+mdmn_comm_lock_1(argp, clnt)
+	md_mn_set_and_class_t *argp;
+	CLIENT *clnt;
+{
+	int *clnt_res = Zalloc(sizeof (int));
+
+	if (clnt_call(clnt, mdmn_comm_lock,
+		(xdrproc_t)xdr_md_mn_set_and_class_t, (caddr_t)argp,
+		(xdrproc_t)xdr_int, (caddr_t)clnt_res,
+		TIMEOUT) != RPC_SUCCESS) {
+		return (NULL);
+	}
+	return (clnt_res);
+}
+
+int *
+mdmn_comm_unlock_1(argp, clnt)
+	md_mn_set_and_class_t *argp;
+	CLIENT *clnt;
+{
+	int *clnt_res = Zalloc(sizeof (int));
+
+	if (clnt_call(clnt, mdmn_comm_unlock,
+		(xdrproc_t)xdr_md_mn_set_and_class_t, (caddr_t)argp,
+		(xdrproc_t)xdr_int, (caddr_t)clnt_res,
+		TIMEOUT) != RPC_SUCCESS) {
+		return (NULL);
+	}
+	return (clnt_res);
+}
+
+int *
+mdmn_comm_suspend_1(argp, clnt)
+	md_mn_set_and_class_t *argp;
+	CLIENT *clnt;
+{
+	int *clnt_res = Zalloc(sizeof (int));
+
+	if (clnt_call(clnt, mdmn_comm_suspend,
+		(xdrproc_t)xdr_md_mn_set_and_class_t, (caddr_t)argp,
+		(xdrproc_t)xdr_int, (caddr_t)clnt_res,
+		TIMEOUT) != RPC_SUCCESS) {
+		return (NULL);
+	}
+	return (clnt_res);
+}
+
+int *
+mdmn_comm_resume_1(argp, clnt)
+	md_mn_set_and_class_t *argp;
+	CLIENT *clnt;
+{
+	int *clnt_res = Zalloc(sizeof (int));
+
+	if (clnt_call(clnt, mdmn_comm_resume,
+		(xdrproc_t)xdr_md_mn_set_and_class_t, (caddr_t)argp,
+		(xdrproc_t)xdr_int, (caddr_t)clnt_res,
+		TIMEOUT) != RPC_SUCCESS) {
+		return (NULL);
+	}
+	return (clnt_res);
+}
+
+int *
+mdmn_comm_reinit_set_1(argp, clnt)
+	set_t *argp;
+	CLIENT *clnt;
+{
+	int *clnt_res = Zalloc(sizeof (int));
+
+	if (clnt_call(clnt, mdmn_comm_reinit_set,
+		(xdrproc_t)xdr_set_t, (caddr_t)argp,
+		(xdrproc_t)xdr_int, (caddr_t)clnt_res,
+		TIMEOUT) != RPC_SUCCESS) {
+		return (NULL);
+	}
+	return (clnt_res);
+}
+
+int *
+mdmn_comm_msglock_1(argp, clnt)
+	md_mn_type_and_lock_t *argp;
+	CLIENT *clnt;
+{
+	int *clnt_res = Zalloc(sizeof (int));
+
+	if (clnt_call(clnt, mdmn_comm_msglock,
+		(xdrproc_t)xdr_md_mn_type_and_lock_t, (caddr_t)argp,
+		(xdrproc_t)xdr_int, (caddr_t)clnt_res,
+		TIMEOUT) != RPC_SUCCESS) {
+		return (NULL);
+	}
+	return (clnt_res);
+}
+
+
+#define	USECS_PER_TICK	10000
+
+
+/*
+ * Let the kernel create a clusterwide unique message ID
+ *
+ * returns 0 on success
+ *	   1 on failure
+ */
+
+int
+mdmn_create_msgid(md_mn_msgid_t *msgid)
+{
+	md_error_t	mde = mdnullerror;
+
+	if (msgid == NULL) {
+		return (1); /* failure */
+	}
+
+	if (metaioctl(MD_IOCGUNIQMSGID, msgid, &mde, NULL) != 0) {
+		msgid->mid_nid = ~0u;
+		msgid->mid_time = 0LL;
+		return (1); /* failure */
+	}
+
+	/*
+	 * mid_smid and mid_oclass are only used for submessages.
+	 * mdmn_create_msgid is never called for submessages, as they inherit
+	 * the message ID from their parent.
+	 * Thus we can safely null out the following fields.
+	 */
+	msgid->mid_smid = 0;
+	msgid->mid_oclass = 0;
+
+	/* if the node_id is not set yet, somethings seems to be wrong */
+	if (msgid->mid_nid == ~0u) {
+		return (1); /* failure */
+	}
+
+	return (0); /* success */
+}
+
+md_mn_result_t *
+copy_result(md_mn_result_t *res)
+{
+	md_mn_result_t *nres;
+	nres = Zalloc(sizeof (md_mn_result_t));
+	/* It's MSGID_COPY(from, to); */
+	MSGID_COPY(&(res->mmr_msgid), &(nres->mmr_msgid));
+	nres->mmr_msgtype	= res->mmr_msgtype;
+	nres->mmr_setno		= res->mmr_setno;
+	nres->mmr_flags		= res->mmr_flags;
+	nres->mmr_sender	= res->mmr_sender;
+	nres->mmr_failing_node	= res->mmr_failing_node;
+	nres->mmr_comm_state	= res->mmr_comm_state;
+	nres->mmr_exitval	= res->mmr_exitval;
+	nres->mmr_out_size	= res->mmr_out_size;
+	nres->mmr_err_size	= res->mmr_err_size;
+	if (res->mmr_out_size > 0) {
+		nres->mmr_out = Zalloc(res->mmr_out_size);
+		bcopy(res->mmr_out, nres->mmr_out, res->mmr_out_size);
+	}
+	if (res->mmr_err_size > 0) {
+		nres->mmr_err = Zalloc(res->mmr_err_size);
+		bcopy(res->mmr_err, nres->mmr_err, res->mmr_err_size);
+	}
+	if (res->mmr_ep.host != '\0') {
+		nres->mmr_ep.host = strdup(res->mmr_ep.host);
+	}
+	if (res->mmr_ep.extra != '\0') {
+		nres->mmr_ep.extra = strdup(res->mmr_ep.extra);
+	}
+	if (res->mmr_ep.name != '\0') {
+		nres->mmr_ep.name = strdup(res->mmr_ep.name);
+	}
+	return (nres);
+}
+
+void
+free_result(md_mn_result_t *res)
+{
+	if (res->mmr_out_size > 0) {
+		Free(res->mmr_out);
+	}
+	if (res->mmr_err_size > 0) {
+		Free(res->mmr_err);
+	}
+	if (res->mmr_ep.host != '\0') {
+		Free(res->mmr_ep.host);
+	}
+	if (res->mmr_ep.extra != '\0') {
+		Free(res->mmr_ep.extra);
+	}
+	if (res->mmr_ep.name != '\0') {
+		Free(res->mmr_ep.name);
+	}
+	Free(res);
+}
+
+
+/* allocate a new message and copy a given message into it */
+md_mn_msg_t *
+copy_msg(md_mn_msg_t *msg, md_mn_msg_t *dest)
+{
+	md_mn_msg_t *nmsg;
+
+	nmsg = dest;
+
+	if (nmsg == NULL) {
+		nmsg = Zalloc(sizeof (md_mn_msg_t));
+	}
+	if (nmsg->msg_event_data == NULL) {
+		nmsg->msg_event_data = Zalloc(msg->msg_event_size);
+	}
+	/* It's MSGID_COPY(from, to); */
+	MSGID_COPY(&(msg->msg_msgid), &(nmsg->msg_msgid));
+	nmsg->msg_sender	= msg->msg_sender;
+	nmsg->msg_flags		= msg->msg_flags;
+	nmsg->msg_setno		= msg->msg_setno;
+	nmsg->msg_type		= msg->msg_type;
+	nmsg->msg_event_size	= msg->msg_event_size;
+	if (msg->msg_event_size > 0) {
+		bcopy(msg->msg_event_data, nmsg->msg_event_data,
+		    msg->msg_event_size);
+	}
+	return (nmsg);
+}
+
+void
+copy_msg_1(md_mn_msg_t *msg, md_mn_msg_od_t *msgod, int direction)
+{
+	assert((direction == MD_MN_COPY_TO_ONDISK) ||
+	    (direction == MD_MN_COPY_TO_INCORE));
+
+	if (direction == MD_MN_COPY_TO_ONDISK) {
+		MSGID_COPY(&(msg->msg_msgid), &(msgod->msg_msgid));
+		msgod->msg_sender	= msg->msg_sender;
+		msgod->msg_flags	= msg->msg_flags;
+		msgod->msg_setno	= msg->msg_setno;
+		msgod->msg_type		= msg->msg_type;
+		msgod->msg_od_event_size = msg->msg_event_size;
+		/* paranoid checks */
+		if (msg->msg_event_size != 0 && msg->msg_event_data != NULL)
+			bcopy(msg->msg_event_data,
+			    &msgod->msg_od_event_data[0], msg->msg_event_size);
+	} else {
+		MSGID_COPY(&(msgod->msg_msgid), &(msg->msg_msgid));
+		msg->msg_sender	= msgod->msg_sender;
+		msg->msg_flags		= msgod->msg_flags;
+		msg->msg_setno		= msgod->msg_setno;
+		msg->msg_type		= msgod->msg_type;
+		msg->msg_event_size	= msgod->msg_od_event_size;
+		if (msg->msg_event_data == NULL)
+			msg->msg_event_data = Zalloc(msg->msg_event_size);
+
+		bcopy(&msgod->msg_od_event_data[0],
+		    msg->msg_event_data, msgod->msg_od_event_size);
+	}
+}
+
+/* Free a message */
+void
+free_msg(md_mn_msg_t *msg)
+{
+	if (msg->msg_event_size > 0) {
+		Free(msg->msg_event_data);
+	}
+	Free(msg);
+}
+
+
+/* The following declarations are only for the next two routines */
+
+md_mn_client_list_t *mdmn_clients;
+
+mutex_t	mcl_mutex;
+#define	MNGLC_INIT_ONLY	0x0001
+#define	MNGLC_FOR_REAL	0x0002
+/*
+ * mdmn_get_local_clnt(flag)
+ * If there is a client in the free pool, get one,
+ * If no client is available, create one.
+ * Every multithreaded application that uses mdmn_send_message must call it
+ * single threaded first with special flags so we do the initialization
+ * stuff in a safe environment.
+ *
+ * Input: MNGLC_INIT_ONLY: just initializes the mutex
+ *        MNGLC_FOR_REAL : do real work
+ * Output:
+ *	An rpc client for sending rpc requests to the local commd
+ *	NULL in case of an error
+ *
+ */
+static CLIENT *
+mdmn_get_local_clnt(uint_t flag)
+{
+	CLIENT *local_daemon;
+	static int inited = 0;
+	md_mn_client_list_t *tmp;
+
+	if (inited == 0) {
+		(void) mutex_init(&mcl_mutex, USYNC_THREAD, NULL);
+		inited = 1;
+	}
+
+	if (flag == MNGLC_INIT_ONLY)
+		return ((CLIENT *)NULL);
+
+	(void) mutex_lock(&mcl_mutex);
+	if (mdmn_clients == (md_mn_client_list_t *)NULL) {
+		/* if there is no entry, create a client and return a it */
+		local_daemon = meta_client_create(LOCALHOST_IPv4, MDMN_COMMD,
+			ONE, "tcp");
+	} else {
+		/*
+		 * If there is an entry from a previous put operation,
+		 * remove it from the head of the list and free the list stuff
+		 * around it. Then return the client
+		 */
+		local_daemon = mdmn_clients->mcl_clnt;
+		tmp = mdmn_clients;
+		mdmn_clients = mdmn_clients->mcl_next;
+		Free(tmp);
+	}
+	(void) mutex_unlock(&mcl_mutex);
+
+
+	if (local_daemon == (CLIENT *)NULL) {
+		clnt_pcreateerror("local_daemon");
+	}
+
+	return (local_daemon);
+}
+
+/*
+ * mdmn_put_local_clnt()
+ * returns a no longer used client to the pool
+ *
+ * Input: an RPC client
+ * Output: void
+ */
+static void
+mdmn_put_local_clnt(CLIENT *local_daemon)
+{
+	md_mn_client_list_t *tmp;
+
+	(void) mutex_lock(&mcl_mutex);
+
+	tmp =  mdmn_clients;
+	mdmn_clients = (md_mn_client_list_t *)
+	    malloc(sizeof (md_mn_client_list_t));
+	mdmn_clients->mcl_clnt = local_daemon;
+	mdmn_clients->mcl_next = tmp;
+
+	(void) mutex_unlock(&mcl_mutex);
+}
+
+/*
+ * This is the regular interface for sending a message.
+ * This function only passes through all arguments to
+ * mdmn_send_message_with_msgid() and adds a NULL for the message ID.
+ *
+ * Normally, you don't have already a message ID for the message you want
+ * to send.  Only in case of replaying a previously logged message,
+ * a msgid is already attached to it.
+ * In that case mdmn_send_message_with_msgid() has to be called directly.
+ *
+ * Return values / CAVEAT EMPTOR: see mdmn_send_message_with_msgid()
+ */
+
+int
+mdmn_send_message(
+		set_t setno,
+		md_mn_msgtype_t type,
+		uint_t flags,
+		char *data,
+		int size,
+		md_mn_result_t **result,
+		md_error_t *ep)
+{
+	return (mdmn_send_message_with_msgid(
+		setno, type, flags, data, size, result, MD_NULL_MSGID, ep));
+}
+/*
+ * mdmn_send_message_with_msgid()
+ * Create a message from the given pieces of data and hand it over
+ * to the local commd.
+ * This may fail for various reasons (rpc error / class busy / class locked ...)
+ * Some error types are immediately deadly, others will cause retries
+ * until the request is fulfilled or until the retries are ecxceeded.
+ *
+ * In case an error is returned it is up to the user to decide what to do.
+ *
+ * Returns:
+ *	0 on success
+ *	1 if retries1 exceeded
+ *	2 if retries2 exceeded
+ *	-1 if connecting to the local daemon failed
+ *	-2 if the RPC call to the local daemon failed
+ *	-3 if this node hasn't yet joined the set
+ *	-4 if any other problem occured
+ *
+ * CAVEAT EMPTOR:
+ *	The caller is responsible for calling free_result() when finished with
+ *	the results!
+ */
+int
+mdmn_send_message_with_msgid(
+		set_t setno,
+		md_mn_msgtype_t type,
+		uint_t flags,
+		char *data,
+		int size,
+		md_mn_result_t **result,
+		md_mn_msgid_t *msgid,
+		md_error_t *ep)
+{
+	uint_t retry1, ticks1, retry2, ticks2;
+	int retval;
+
+	CLIENT *local_daemon;
+	struct timeval timeout;
+
+	md_mn_msg_t msg;
+	md_mn_result_t *resp;
+
+	/*
+	 * Special case for multithreaded applications:
+	 * When starting up, the application should call mdmn_send_message
+	 * single threaded with all parameters set to NULL.
+	 * When we detect this we know, we safely can do initialization
+	 * stuff here.
+	 * We only check for set and type being zero
+	 */
+	if ((setno == 0) && (type == 0)) {
+		/* do all needed initializations here */
+		(void) mdmn_get_local_clnt(MNGLC_INIT_ONLY);
+		return (0); /* success */
+	}
+
+
+	/* did the caller specify space to store the result pointer? */
+	if (result == (md_mn_result_t **)NULL) {
+		syslog(LOG_INFO, dgettext(TEXT_DOMAIN,
+		    "FATAL, can not allocate result structure\n"));
+		return (-4);
+	}
+	*result = NULL;
+
+	/* Replay messages already have their msgID */
+	if ((flags & MD_MSGF_REPLAY_MSG) == 0) {
+		if (mdmn_create_msgid(&msg.msg_msgid) != 0) {
+			syslog(LOG_INFO, dgettext(TEXT_DOMAIN,
+			    "FATAL, can not create message ID\n"));
+			return (-4);
+		}
+	} else {
+		/* in this case a message ID must be specified */
+		assert(msgid != MD_NULL_MSGID);
+		MSGID_COPY(msgid, &msg.msg_msgid);
+	}
+
+
+	/*
+	 * When setting the flags, additionally apply the
+	 * default flags for this message type.
+	 */
+	msg.msg_flags		= flags;
+	msg.msg_setno		= setno;
+	msg.msg_type		= type;
+	msg.msg_event_size	= size;
+	msg.msg_event_data	= data;
+
+	/*
+	 * For the timeout pick the specific timeout for the message times the
+	 * the maximum number of nodes.
+	 * This is a better estimate than 1 hour or 3 days or never.
+	 */
+	timeout.tv_sec = mdmn_get_timeout(type) * NNODES;
+	timeout.tv_usec = 0;
+
+	if (flags & MD_MSGF_VERBOSE) {
+		syslog(LOG_INFO, "send_message: ID=(%d, 0x%llx-%d)\n",
+		    MSGID_ELEMS(msg.msg_msgid));
+	}
+
+	/* get an RPC client to the local commd */
+	local_daemon = mdmn_get_local_clnt(MNGLC_FOR_REAL);
+	if (local_daemon == (CLIENT *)NULL) {
+		return (-1);
+	}
+	clnt_control(local_daemon, CLSET_TIMEOUT, (char *)&timeout);
+
+	retry1 = msg_table[type].mte_retry1;
+	ticks1 = msg_table[type].mte_ticks1;
+	retry2 = msg_table[type].mte_retry2;
+	ticks2 = msg_table[type].mte_ticks2;
+
+	/*
+	 * run that loop until:
+	 * - commstate is Ok
+	 * - deadly commstate occured
+	 * - retries1 or retries2 exceeded
+	 */
+	for (; ; ) {
+		*result = mdmn_send_1(&msg, local_daemon);
+		resp = *result;
+		if (resp != (md_mn_result_t *)NULL) {
+			/* Bingo! */
+			if (resp->mmr_comm_state == MDMNE_ACK) {
+				retval = 0;
+				goto out;
+			}
+			/* Hmm... what if there's no handler? */
+			if (resp->mmr_comm_state == MDMNE_NO_HANDLER) {
+				retval = 0;
+				goto out;
+
+			}
+			/*
+			 * This node didn't yet join the disk set. It is not
+			 * supposed to send any messages then.
+			 * This is deadly (no retries)
+			 */
+			if (resp->mmr_comm_state == MDMNE_NOT_JOINED) {
+				retval = -3;
+				goto out;
+
+			}
+			/* these two are deadly too (no retries) */
+			if ((resp->mmr_comm_state == MDMNE_NO_WAKEUP_ENTRY) ||
+			    (resp->mmr_comm_state == MDMNE_LOG_FAIL)) {
+				retval = -4;
+				goto out;
+
+			}
+			/* Class busy? Use retry1 */
+			if (resp->mmr_comm_state == MDMNE_CLASS_BUSY) {
+				if (retry1-- == 0) {
+					retval = 1; /* retry1 exceeded */
+					goto out;
+				}
+				(void) usleep(ticks1 * USECS_PER_TICK);
+				free_result(resp);
+
+				if (flags & MD_MSGF_VERBOSE)
+					(void) printf("#Resend1 ID=(%d, "
+					    "0x%llx-%d)\n",
+					    MSGID_ELEMS(msg.msg_msgid));
+				continue;
+			}
+			if ((resp->mmr_comm_state == MDMNE_CLASS_LOCKED) ||
+			    (resp->mmr_comm_state == MDMNE_ABORT)) {
+				/*
+				 * Be patient, wait for 1 secs and try again.
+				 * It's not likely that the ABORT condition ever
+				 * goes away, but it won't hurt to retry
+				 */
+				free_result(resp);
+				(void) sleep(1);
+				continue;
+			}
+			if (resp->mmr_comm_state == MDMNE_SUSPENDED) {
+				if (flags & MD_MSGF_FAIL_ON_SUSPEND) {
+					/* caller wants us to fail here */
+					(void) mddserror(ep,
+					    MDE_DS_NOTNOW_RECONFIG, setno,
+					    mynode(), mynode(), NULL);
+					retval = -4;
+					goto out;
+				} else {
+					/* wait for 1 secs and try again. */
+					free_result(resp);
+					(void) sleep(1);
+					continue;
+				}
+			}
+		} else {
+			/*
+			 * If we get a NULL back from the rpc call, try to
+			 * reinitialize the client.
+			 * Depending on retries2 we try again, or not.
+			 */
+			syslog(LOG_INFO,
+			    "send_message: ID=(%d, 0x%llx-%d) resp = NULL\n",
+			    MSGID_ELEMS(msg.msg_msgid));
+
+			clnt_destroy(local_daemon);
+			local_daemon = mdmn_get_local_clnt(MNGLC_FOR_REAL);
+
+			if (local_daemon == (CLIENT *)NULL) {
+				return (-1);
+			}
+			clnt_control(local_daemon, CLSET_TIMEOUT,
+			    (char *)&timeout);
+		}
+
+		/*
+		 * If we are here, either resp is zero or resp is non-zero
+		 * but some commstate not mentioned above occured.
+		 * In either case we use retry2
+		 */
+		if (retry2-- == 0) {
+			syslog(LOG_INFO, dgettext(TEXT_DOMAIN,
+			    "send_message: (%d, 0x%llx-%d) retry2 exceeded\n"),
+			    MSGID_ELEMS(msg.msg_msgid));
+
+			retval = 2; /* retry2 exceeded */
+			goto out;
+		}
+		if (flags & MD_MSGF_VERBOSE) {
+			syslog(LOG_DEBUG, dgettext(TEXT_DOMAIN,
+			    "send_message: (%d, 0x%llx-%d) resend on retry2\n"),
+			    MSGID_ELEMS(msg.msg_msgid));
+		}
+
+		(void) usleep(ticks2 * USECS_PER_TICK);
+
+		if (resp != (md_mn_result_t *)NULL) {
+			free_result(resp);
+		}
+	}
+out:
+	mdmn_put_local_clnt(local_daemon);
+	return (retval);
+}
+
+/*
+ * suspend the commd for a given set/class combination.
+ *
+ * Parameter:
+ *	set number or 0 (meaning all sets)
+ *	class number or 0 (meaning all classes)
+ *
+ * Returns:
+ *	0 on success (set is suspended and all messages drained)
+ *	MDE_DS_COMMDCTL_SUSPEND_NYD if set is not yet drained
+ *	MDE_DS_COMMDCTL_SUSPEND_FAIL if any failure occurred
+ */
+int
+mdmn_suspend(set_t setno, md_mn_msgclass_t class)
+{
+	int	*resp;
+	CLIENT	*local_daemon;
+	md_mn_set_and_class_t msc;
+
+	if ((setno >= MD_MAXSETS) || (class >= MD_MN_NCLASSES)) {
+		return (MDE_DS_COMMDCTL_SUSPEND_FAIL);
+	}
+	local_daemon = meta_client_create(LOCALHOST_IPv4, MDMN_COMMD, ONE,
+		"tcp");
+	if (local_daemon == (CLIENT *)NULL) {
+		clnt_pcreateerror("local_daemon");
+		return (MDE_DS_COMMDCTL_SUSPEND_FAIL);
+	}
+	msc.msc_set = setno;
+	msc.msc_class = class;
+	msc.msc_flags = 0;
+
+	resp = mdmn_comm_suspend_1(&msc, local_daemon);
+	clnt_destroy(local_daemon);
+
+	if (resp == NULL) {
+		return (MDE_DS_COMMDCTL_SUSPEND_FAIL);
+	}
+
+	if (*resp == MDMNE_ACK) {
+		/* set successfully drained, no outstanding messages */
+		return (0);
+	}
+	if (*resp != MDMNE_SET_NOT_DRAINED) {
+		/* some error occurred */
+		return (MDE_DS_COMMDCTL_SUSPEND_FAIL);
+	}
+
+	/* still outstanding messages, return not yet drained failure */
+	return (MDE_DS_COMMDCTL_SUSPEND_NYD);
+}
+
+/*
+ * resume the commd for a given set/class combination.
+ *
+ * Parameter:
+ *	set number or 0 (meaning all sets)
+ *	class number or 0 (meaning all classes)
+ *
+ * Returns:
+ *	0 on success
+ *	MDE_DS_COMMDCTL_RESUME_FAIL on failure
+ */
+int
+mdmn_resume(set_t setno, md_mn_msgclass_t class, uint_t flags)
+{
+	md_mn_set_and_class_t msc;
+	int	ret = MDE_DS_COMMDCTL_RESUME_FAIL;
+	int	*resp;
+	CLIENT	*local_daemon;
+
+	if ((setno >= MD_MAXSETS) || (class >= MD_MN_NCLASSES)) {
+		return (MDE_DS_COMMDCTL_RESUME_FAIL);
+	}
+	local_daemon = meta_client_create(LOCALHOST_IPv4, MDMN_COMMD, ONE,
+		"tcp");
+	if (local_daemon == (CLIENT *)NULL) {
+		clnt_pcreateerror("local_daemon");
+		return (MDE_DS_COMMDCTL_RESUME_FAIL);
+	}
+
+	msc.msc_set = setno;
+	msc.msc_class = class;
+	msc.msc_flags = flags;
+
+	resp = mdmn_comm_resume_1(&msc, local_daemon);
+
+	if (resp != NULL) {
+		if (*resp == MDMNE_ACK) {
+			ret = 0;
+		}
+		Free(resp);
+	}
+
+	clnt_destroy(local_daemon);
+	return (ret);
+}
+
+/*
+ * abort all communication
+ *
+ * returns void, because: if *this* get's an error what do you want to do?
+ */
+void
+mdmn_abort(void)
+{
+	char *dummy = "abort";
+	md_mn_result_t	*resultp = NULL;
+	md_error_t	mdne = mdnullerror;
+
+	(void) mdmn_send_message(0, /* No set is needed for this message */
+			MD_MN_MSG_ABORT,
+			MD_MSGF_LOCAL_ONLY,
+			dummy, sizeof (dummy),
+			&resultp, &mdne);
+
+	if (resultp != NULL) {
+		Free(resultp);
+	}
+}
+
+/*
+ * trigger the reinitialization for a given set.
+ *
+ * Parameter: set number
+ *
+ * Returns:
+ *	0 on success
+ *	1 on failure
+ */
+int
+mdmn_reinit_set(set_t setno)
+{
+	int	ret = 1;
+	int	*resp;
+	CLIENT  *local_daemon;
+
+
+	if ((setno == 0) || (setno >= MD_MAXSETS)) {
+		return (1);
+	}
+	local_daemon = meta_client_create(LOCALHOST_IPv4, MDMN_COMMD, ONE,
+		"tcp");
+	if (local_daemon == (CLIENT *)NULL) {
+		clnt_pcreateerror("local_daemon");
+		return (1);
+	}
+
+	resp = mdmn_comm_reinit_set_1(&setno, local_daemon);
+
+	if (resp != NULL) {
+		if (*resp == MDMNE_ACK) {
+			ret = 0;
+		}
+		Free(resp);
+	}
+
+	clnt_destroy(local_daemon);
+	return (ret);
+}
+
+
+/*
+ * Lock a single message type from being processed on this node
+ *
+ * Parameter: md_mn_msgtype_t msgtype, uint_t locktype
+ *
+ * Returns:
+ *	0 on success
+ *	1 on failure
+ */
+int
+mdmn_msgtype_lock(md_mn_msgtype_t msgtype, uint_t locktype)
+{
+	int			ret = 1;
+	int			*resp;
+	CLIENT			*local_daemon;
+	md_mn_type_and_lock_t	mmtl;
+
+
+	if ((msgtype == 0) || (msgtype >= MD_MN_NMESSAGES)) {
+		return (1);
+	}
+	local_daemon = meta_client_create(LOCALHOST_IPv4, MDMN_COMMD, ONE,
+		"tcp");
+	if (local_daemon == (CLIENT *)NULL) {
+		clnt_pcreateerror("local_daemon");
+		return (1);
+	}
+	mmtl.mmtl_type = msgtype;
+	mmtl.mmtl_lock = locktype;
+
+	resp = mdmn_comm_msglock_1(&mmtl, local_daemon);
+
+	if (resp != NULL) {
+		if (*resp == MDMNE_ACK) {
+			ret = 0;
+		}
+		Free(resp);
+	}
+
+	clnt_destroy(local_daemon);
+	return (ret);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_mn_handlers.c b/usr/src/lib/lvm/libmeta/common/meta_mn_handlers.c
new file mode 100644
index 0000000000..8603aca5ac
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_mn_handlers.c
@@ -0,0 +1,1957 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <wait.h>
+#include <sys/time.h>
+#include <syslog.h>
+
+#include <meta.h>
+#include <sys/lvm/mdio.h>
+#include <sys/lvm/md_mddb.h>
+#include <sys/lvm/md_mirror.h>
+
+#define	MAX_N_ARGS 64
+#define	MAX_ARG_LEN 1024
+
+/* we reserve 1024 bytes for stdout and the same for stderr */
+#define	MAX_OUT	1024
+#define	MAX_ERR	1024
+#define	JUNK 128 /* used to flush stdout and stderr */
+
+
+/*ARGSUSED*/
+void
+mdmn_do_cmd(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp)
+{
+
+	/*
+	 * We are given one string containing all the arguments
+	 * For execvp() we have to regenerate the arguments again
+	 */
+	int	arg;		/* argument that is currently been built */
+	int	index;		/* runs through arg above */
+	int	i;		/* helper for for loop */
+	char	*argv[MAX_N_ARGS]; /* argument array for execvp */
+	char	*cp;		/* runs through the given command line string */
+	char	*command = NULL; /* the command we call locally */
+	int	pout[2];	/* pipe for stdout */
+	int	perr[2];	/* pipe for stderr */
+	pid_t	pid;		/* process id */
+
+	cp	= msg->msg_event_data;
+	arg	= 0;
+	index	= 0;
+
+	/* init the args array alloc the first one and null out the rest */
+	argv[0] = Malloc(MAX_ARG_LEN);
+	for (i = 1; i < MAX_N_ARGS; i++) {
+		argv[i] = NULL;
+	}
+
+	resp->mmr_comm_state	= MDMNE_ACK; /* Ok state */;
+
+	while (*cp != '\0') {
+		if (arg == MAX_N_ARGS) {
+			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
+			    "PANIC: too many arguments specified\n"));
+			resp->mmr_comm_state = MDMNE_HANDLER_FAILED;
+			goto out;
+		}
+		if (index == MAX_ARG_LEN) {
+			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
+			    "PANIC: argument too long\n"));
+			resp->mmr_comm_state = MDMNE_HANDLER_FAILED;
+			goto out;
+		}
+
+		if ((*cp != ' ') && (*cp != '\t')) {
+			/*
+			 * No space or tab: copy char into current
+			 * argv and advance both pointers
+			 */
+
+			argv[arg][index] = *cp;
+			cp++;	/* next char in command line	*/
+			index++;	/* next char in argument	*/
+		} else {
+			/*
+			 * space or tab: terminate current argv,
+			 * advance arg, reset pointer into arg,
+			 * advance pointer in command line
+			 */
+			argv[arg][index] = '\0';
+			arg++; /* next argument */
+			argv[arg] = Malloc(MAX_ARG_LEN);
+			cp++; /* next char in command line */
+			index = 0; /* starts at char 0 */
+		}
+	}
+	/* terminate the last real argument */
+	argv[arg][index] = '\0';
+	/* the last argument is an NULL pointer */
+	argv[++arg] = NULL;
+	if (pipe(pout) < 0)  {
+		(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
+		    "PANIC: pipe failed\n"));
+		resp->mmr_comm_state = MDMNE_HANDLER_FAILED;
+		goto out;
+	}
+	if (pipe(perr) < 0) {
+		(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
+		    "PANIC: pipe failed\n"));
+		(void) close(pout[0]);
+		(void) close(pout[1]);
+		resp->mmr_comm_state = MDMNE_HANDLER_FAILED;
+		goto out;
+	}
+	command = Strdup(argv[0]);
+	(void) strcat(argv[0], ".rpc_call");
+	pid = fork1();
+	if (pid == (pid_t)-1) {
+		(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
+		    "PANIC: fork failed\n"));
+		resp->mmr_comm_state = MDMNE_HANDLER_FAILED;
+		(void) close(pout[0]);
+		(void) close(pout[1]);
+		(void) close(perr[0]);
+		(void) close(perr[1]);
+		goto out;
+	} else  if (pid == (pid_t)0) {
+		/* child */
+		(void) close(0);
+		/* close the reading channels of pout and perr */
+		(void) close(pout[0]);
+		(void) close(perr[0]);
+		/* redirect stdout */
+		if (dup2(pout[1], 1) < 0) {
+			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
+			    "PANIC: dup2 failed\n"));
+			resp->mmr_comm_state = MDMNE_HANDLER_FAILED;
+			return;
+		}
+
+		/* redirect stderr */
+		if (dup2(perr[1], 2) < 0) {
+			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
+			    "PANIC: dup2 failed\n"));
+			resp->mmr_comm_state = MDMNE_HANDLER_FAILED;
+			return;
+		}
+
+		(void) execvp(command, (char *const *)argv);
+		perror("execvp");
+		_exit(1);
+	} else {
+		/* parent process */
+		int stat_loc;
+		char *out, *err; /* for stdout and stderr of child */
+		int i; /* index into the aboves */
+		char junk[JUNK];
+		int out_done = 0;
+		int err_done = 0;
+		int out_read = 0;
+		int err_read = 0;
+		int maxfd;
+		fd_set	rset;
+
+
+		/* close the writing channels of pout and perr */
+		(void) close(pout[1]);
+		(void) close(perr[1]);
+		resp->mmr_out = Malloc(MAX_OUT);
+		resp->mmr_err = Malloc(MAX_ERR);
+		resp->mmr_out_size = MAX_OUT;
+		resp->mmr_err_size = MAX_ERR;
+		out = resp->mmr_out;
+		err = resp->mmr_err;
+		FD_ZERO(&rset);
+		while ((out_done == 0) || (err_done == 0)) {
+			FD_SET(pout[0], &rset);
+			FD_SET(perr[0], &rset);
+			maxfd = max(pout[0], perr[0]) + 1;
+			(void) select(maxfd, &rset, NULL, NULL, NULL);
+
+			/*
+			 * Did the child produce some output to stdout?
+			 * If so, read it until we either reach the end of the
+			 * output or until we read MAX_OUT bytes.
+			 * Whatever comes first.
+			 * In case we already read MAX_OUT bytes we simply
+			 * read away the output into a junk buffer.
+			 * Just to make the child happy
+			 */
+			if (FD_ISSET(pout[0], &rset)) {
+				if (MAX_OUT - out_read - 1 > 0) {
+					i = read(pout[0], out,
+						MAX_OUT - out_read);
+					out_read += i;
+					out += i;
+				} else {
+					/* buffer full, empty stdout */
+					i = read(pout[0], junk, JUNK);
+				}
+				if (i == 0) {
+					/* stdout is closed by child */
+					out_done++;
+				}
+			}
+			/* same comment as above | sed -e 's/stdout/stderr/' */
+			if (FD_ISSET(perr[0], &rset)) {
+				if (MAX_ERR - err_read - 1 > 0) {
+					i = read(perr[0], err,
+						MAX_ERR - err_read);
+					err_read += i;
+					err += i;
+				} else {
+					/* buffer full, empty stderr */
+					i = read(perr[0], junk, JUNK);
+				}
+				if (i == 0) {
+					/* stderr is closed by child */
+					err_done++;
+				}
+			}
+		}
+		resp->mmr_out[out_read] = '\0';
+		resp->mmr_err[err_read] = '\0';
+
+		while (waitpid(pid, &stat_loc, 0) < 0) {
+			if (errno != EINTR) {
+				resp->mmr_comm_state = MDMNE_HANDLER_FAILED;
+				break;
+			}
+		}
+		if (errno == 0)
+			resp->mmr_exitval = WEXITSTATUS(stat_loc);
+
+		(void) close(pout[0]);
+		(void) close(perr[0]);
+	}
+out:
+	for (i = 0; i < MAX_N_ARGS; i++) {
+		if (argv[i] != NULL) {
+			free(argv[i]);
+		}
+	}
+	if (command != NULL) {
+		Free(command);
+	}
+}
+
+/*
+ * This is for checking if a metadevice is opened, and for
+ * locking in case it is not and for
+ * unlocking a locked device
+ */
+/*ARGSUSED*/
+void
+mdmn_do_clu(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp)
+{
+	if (msg->msg_type == MD_MN_MSG_CLU_CHECK) {
+		md_isopen_t	*d;
+		int		ret;
+
+		resp->mmr_comm_state = MDMNE_ACK; /* Ok state */;
+		resp->mmr_out_size = 0;
+		resp->mmr_err_size = 0;
+		resp->mmr_out = NULL;
+		resp->mmr_err = NULL;
+		d = (md_isopen_t *)(void *)msg->msg_event_data;
+		ret = metaioctl(MD_IOCISOPEN, d, &(d->mde), NULL);
+		/*
+		 * In case the ioctl succeeded, return the open state of
+		 * the metadevice. Otherwise we return the error the ioctl
+		 * produced. As this is not zero, no attempt is made to
+		 * remove/rename the metadevice later
+		 */
+
+		if (ret == 0) {
+			resp->mmr_exitval = d->isopen;
+		} else {
+			/*
+			 * When doing a metaclear, one node after the other
+			 * does the two steps:
+			 * - check on all nodes if this md is opened.
+			 * - remove the md locally.
+			 * When the 2nd node asks all nodes if the md is
+			 * open it starts with the first node.
+			 * As this already removed the md, the check
+			 * returns MDE_UNIT_NOT_SETUP.
+			 * In order to not keep the 2nd node from proceeding,
+			 * we map this to an Ok.
+			 */
+			if (mdismderror(&(d->mde), MDE_UNIT_NOT_SETUP)) {
+				mdclrerror(&(d->mde));
+				ret = 0;
+			}
+
+			resp->mmr_exitval = ret;
+		}
+	}
+}
+
+/* handler for MD_MN_MSG_REQUIRE_OWNER */
+/*ARGSUSED*/
+void
+mdmn_do_req_owner(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp)
+{
+	md_set_mmown_params_t	setown;
+	md_mn_req_owner_t	*d;
+	int			ret, n = 0;
+
+	resp->mmr_out_size = 0;
+	resp->mmr_err_size = 0;
+	resp->mmr_out = NULL;
+	resp->mmr_err = NULL;
+	resp->mmr_comm_state = MDMNE_ACK;
+	d = (md_mn_req_owner_t *)(void *)msg->msg_event_data;
+
+	(void) memset(&setown, 0, sizeof (setown));
+	MD_SETDRIVERNAME(&setown, MD_MIRROR, MD_MIN2SET(d->mnum))
+	setown.d.mnum = d->mnum;
+	setown.d.owner = d->owner;
+
+	/* Retry ownership change if we get EAGAIN returned */
+	while ((ret = metaioctl(MD_MN_SET_MM_OWNER, &setown, &setown.mde, NULL))
+	    != 0) {
+		md_sys_error_t	*ip =
+		    &setown.mde.info.md_error_info_t_u.sys_error;
+		if (ip->errnum != EAGAIN) {
+			break;
+		}
+		if (n++ >= 10) {
+			break;
+		}
+		(void) sleep(1);
+	}
+
+	resp->mmr_exitval = ret;
+}
+
+/*
+ * handler for MD_MN_MSG_CHOOSE_OWNER
+ * This is called when a mirror resync has no owner. The master node generates
+ * this message which is not broadcast to the other nodes. The message is
+ * required as the kernel does not have access to the nodelist for the set.
+ */
+/*ARGSUSED*/
+void
+mdmn_do_choose_owner(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp)
+{
+	md_mn_msg_chowner_t	chownermsg;
+	md_mn_msg_chooseid_t	*d;
+	int			ret = 0;
+	int			nodecnt;
+	int			nodeno;
+	uint_t			nodeid;
+	uint_t			myflags;
+	set_t			setno;
+	mdsetname_t		*sp;
+	md_set_desc		*sd;
+	md_mnnode_desc		*nd;
+	md_error_t		mde = mdnullerror;
+	md_mn_result_t		*resp1 = NULL;
+
+	resp->mmr_out_size = 0;
+	resp->mmr_err_size = 0;
+	resp->mmr_out = NULL;
+	resp->mmr_err = NULL;
+	resp->mmr_comm_state = MDMNE_ACK;
+	d = (md_mn_msg_chooseid_t *)(void *)msg->msg_event_data;
+
+	/*
+	 * The node to be chosen will be the resync count for the set
+	 * modulo the number of live nodes in the set
+	 */
+	setno = MD_MIN2SET(d->msg_chooseid_mnum);
+	if ((sp = metasetnosetname(setno, &mde)) == NULL) {
+		syslog(LOG_ERR, dgettext(TEXT_DOMAIN,
+		    "MD_MN_MSG_CHOOSE_OWNER: Invalid setno %d\n"), setno);
+		resp->mmr_exitval = 1;
+		return;
+	}
+	if ((sd = metaget_setdesc(sp, &mde)) == NULL) {
+		syslog(LOG_ERR, dgettext(TEXT_DOMAIN,
+		    "MD_MN_MSG_CHOOSE_OWNER: Invalid set pointer\n"));
+		resp->mmr_exitval = 1;
+		return;
+	}
+
+	/* Count the number of live nodes */
+	nodecnt = 0;
+	nd = sd->sd_nodelist;
+	while (nd) {
+		if (nd->nd_flags & MD_MN_NODE_ALIVE)
+			nodecnt++;
+		nd = nd->nd_next;
+	}
+	nodeno = (d->msg_chooseid_rcnt%nodecnt);
+
+	/*
+	 * If we've been called with msg_chooseid_set_node set TRUE then we
+	 * are simply re-setting the owner id to ensure consistency across
+	 * the cluster.
+	 * If the flag is reset (B_FALSE) we are requesting a new owner to be
+	 * determined.
+	 */
+	if (d->msg_chooseid_set_node) {
+		nodeid = d->msg_chooseid_rcnt;
+	} else {
+		/* scan the nodelist looking for the required node */
+		nodecnt = 0;
+		nd = sd->sd_nodelist;
+		while (nd) {
+			if (nd->nd_flags & MD_MN_NODE_ALIVE) {
+				if (nodecnt == nodeno)
+					break;
+				nodecnt++;
+			}
+			nd = nd->nd_next;
+		}
+		nodeid = nd->nd_nodeid;
+	}
+
+	/* Send message to all nodes to make ownership change */
+	chownermsg.msg_chowner_mnum =  d->msg_chooseid_mnum;
+	chownermsg.msg_chowner_nodeid = nodeid;
+	myflags = MD_MSGF_NO_LOG;
+
+	/* inherit some flags from the parent message */
+	myflags |= msg->msg_flags & MD_MSGF_INHERIT_BITS;
+
+	ret = mdmn_send_message(MD_MIN2SET(d->msg_chooseid_mnum),
+	    MD_MN_MSG_CHANGE_OWNER, myflags, (char *)&chownermsg,
+	    sizeof (chownermsg), &resp1, &mde);
+	if (resp1 != NULL)
+		free_result(resp1);
+	resp->mmr_exitval = ret;
+}
+
+/*
+ * Handler for MD_MN_MSG_CHANGE_OWNER
+ * This is called when we are perfoming a resync and wish to change from
+ * no mirror owner to an owner chosen by the master.
+ * This mesage is only relevant for the new owner, the message will be
+ * ignored by all other nodes
+ */
+/*ARGSUSED*/
+void
+mdmn_do_change_owner(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp)
+{
+	md_set_mmown_params_t	setown;
+	md_mn_msg_chowner_t	*d;
+	int			ret = 0;
+	set_t			setno;
+	mdsetname_t		*sp;
+	md_set_desc		*sd;
+	md_error_t		mde = mdnullerror;
+
+	resp->mmr_out_size = 0;
+	resp->mmr_err_size = 0;
+	resp->mmr_out = NULL;
+	resp->mmr_err = NULL;
+	resp->mmr_comm_state = MDMNE_ACK;
+	d = (md_mn_msg_chowner_t *)(void *)msg->msg_event_data;
+
+	setno = MD_MIN2SET(d->msg_chowner_mnum);
+	if ((sp = metasetnosetname(setno, &mde)) == NULL) {
+		syslog(LOG_ERR, dgettext(TEXT_DOMAIN,
+		    "MD_MN_MSG_CHANGE_OWNER: Invalid setno %d\n"), setno);
+		resp->mmr_exitval = 1;
+		return;
+	}
+	if ((sd = metaget_setdesc(sp, &mde)) == NULL) {
+		syslog(LOG_ERR, dgettext(TEXT_DOMAIN,
+		    "MD_MN_MSG_CHANGE_OWNER: Invalid set pointer\n"));
+		resp->mmr_exitval = 1;
+		return;
+	}
+
+	if (d->msg_chowner_nodeid == sd->sd_mn_mynode->nd_nodeid) {
+		/*
+		 * If we are the chosen owner, issue ioctl to make the
+		 * ownership change
+		 */
+		(void) memset(&setown, 0, sizeof (md_set_mmown_params_t));
+		setown.d.mnum = d->msg_chowner_mnum;
+		setown.d.owner = d->msg_chowner_nodeid;
+		setown.d.flags = MD_MN_MM_SPAWN_THREAD;
+		MD_SETDRIVERNAME(&setown, MD_MIRROR,
+		    MD_MIN2SET(d->msg_chowner_mnum));
+
+		/*
+		 * Single shot at changing the the owner, if it fails EAGAIN,
+		 * another node must have become the owner while we are in the
+		 * process of making this choice.
+		 */
+
+		ret = metaioctl(MD_MN_SET_MM_OWNER, &setown,
+		    &(setown.mde), NULL);
+		if (ret == EAGAIN)
+			ret = 0;
+	}
+	resp->mmr_exitval = ret;
+}
+
+/* handler for MD_MN_MSG_SUSPEND_WRITES */
+/*ARGSUSED*/
+void
+mdmn_do_susp_write(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp)
+{
+	/* Suspend writes to a region of a mirror */
+	md_suspend_wr_params_t	suspwr_ioc;
+	md_mn_msg_suspwr_t	*d;
+	int			ret;
+
+	resp->mmr_out_size = 0;
+	resp->mmr_err_size = 0;
+	resp->mmr_out = NULL;
+	resp->mmr_err = NULL;
+	resp->mmr_comm_state = MDMNE_ACK;
+	d = (md_mn_msg_suspwr_t *)(void *)msg->msg_event_data;
+
+	(void) memset(&suspwr_ioc, 0, sizeof (md_suspend_wr_params_t));
+	MD_SETDRIVERNAME(&suspwr_ioc, MD_MIRROR,
+	    MD_MIN2SET(d->msg_suspwr_mnum));
+	suspwr_ioc.mnum = d->msg_suspwr_mnum;
+	ret = metaioctl(MD_MN_SUSPEND_WRITES, &suspwr_ioc,
+	    &(suspwr_ioc.mde), NULL);
+	resp->mmr_exitval = ret;
+}
+
+/*
+ * handler for MD_MN_MSG_STATE_UPDATE_RESWR
+ * This functions update a submirror component state and then resumes writes
+ * to the mirror
+ */
+/*ARGSUSED*/
+void
+mdmn_do_state_upd_reswr(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp)
+{
+	/* Update the state of the component of a mirror */
+	md_set_state_params_t	setstate_ioc;
+	md_mn_msg_stch_t	*d;
+	int			ret;
+
+	resp->mmr_out_size = 0;
+	resp->mmr_err_size = 0;
+	resp->mmr_out = NULL;
+	resp->mmr_err = NULL;
+	resp->mmr_comm_state = MDMNE_ACK;
+	d = (md_mn_msg_stch_t *)(void *)msg->msg_event_data;
+
+	(void) memset(&setstate_ioc, 0, sizeof (md_set_state_params_t));
+	MD_SETDRIVERNAME(&setstate_ioc, MD_MIRROR,
+	    MD_MIN2SET(d->msg_stch_mnum));
+	setstate_ioc.mnum = d->msg_stch_mnum;
+	setstate_ioc.sm = d->msg_stch_sm;
+	setstate_ioc.comp = d->msg_stch_comp;
+	setstate_ioc.state = d->msg_stch_new_state;
+	setstate_ioc.hs_id = d->msg_stch_hs_id;
+	ret = metaioctl(MD_MN_SET_STATE, &setstate_ioc,
+	    &(setstate_ioc.mde), NULL);
+	resp->mmr_exitval = ret;
+}
+
+/*
+ * submessage generator for MD_MN_MSG_STATE_UPDATE and MD_MN_MSG_STATE_UPDATE2
+ * This generates 2 messages, the first is SUSPEND_WRITES and
+ * depending on the type of the original message the second one is
+ * either STATE_UPDATE_RESWR or STATE_UPDATE_RESWR2 which actually does
+ * the same, but runs on a higher class.
+ */
+int
+mdmn_smgen_state_upd(md_mn_msg_t *msg, md_mn_msg_t *msglist[])
+{
+	md_mn_msg_t		*nmsg;
+	md_mn_msg_stch_t	*d;
+	md_mn_msg_stch_t	*stch_data;
+	md_mn_msg_suspwr_t	*suspwr_data;
+
+	d = (md_mn_msg_stch_t *)(void *)msg->msg_event_data;
+
+	nmsg = Zalloc(sizeof (md_mn_msg_t));
+	MSGID_COPY(&(msg->msg_msgid), &(nmsg->msg_msgid));
+
+	nmsg->msg_flags		= MD_MSGF_NO_LOG; /* Don't log submessages */
+	nmsg->msg_setno		= msg->msg_setno;
+	nmsg->msg_type		= MD_MN_MSG_SUSPEND_WRITES;
+	nmsg->msg_event_size	= sizeof (md_mn_msg_suspwr_t);
+	nmsg->msg_event_data	= Zalloc(sizeof (md_mn_msg_suspwr_t));
+	suspwr_data = (md_mn_msg_suspwr_t *)(void *)nmsg->msg_event_data;
+	suspwr_data->msg_suspwr_mnum = d->msg_stch_mnum;
+	msglist[0] = nmsg;
+
+	nmsg = Zalloc(sizeof (md_mn_msg_t));
+	MSGID_COPY(&(msg->msg_msgid), &(nmsg->msg_msgid));
+
+	nmsg->msg_flags		= MD_MSGF_NO_LOG; /* Don't log submessages */
+	nmsg->msg_setno		= msg->msg_setno;
+	if (msg->msg_type == MD_MN_MSG_STATE_UPDATE2) {
+		nmsg->msg_type		= MD_MN_MSG_STATE_UPDATE_RESWR2;
+	} else {
+		nmsg->msg_type		= MD_MN_MSG_STATE_UPDATE_RESWR;
+	}
+	nmsg->msg_event_size	= sizeof (md_mn_msg_stch_t);
+	nmsg->msg_event_data	= Zalloc(sizeof (md_mn_msg_stch_t));
+	stch_data = (md_mn_msg_stch_t *)(void *)nmsg->msg_event_data;
+	stch_data->msg_stch_mnum = d->msg_stch_mnum;
+	stch_data->msg_stch_sm = d->msg_stch_sm;
+	stch_data->msg_stch_comp = d->msg_stch_comp;
+	stch_data->msg_stch_new_state = d->msg_stch_new_state;
+	stch_data->msg_stch_hs_id = d->msg_stch_hs_id;
+	msglist[1] = nmsg;
+	return (2); /* Return the number of submessages generated */
+}
+
+/*
+ * handler for MD_MN_MSG_ALLOCATE_HOTSPARE and MD_MN_MSG_ALLOCATE_HOTSPARE2
+ * This sends a message to all nodes requesting them to allocate a hotspare
+ * for the specified component. The component is specified by the mnum of
+ * the mirror, the submirror index and the component index.
+ */
+/*ARGSUSED*/
+void
+mdmn_do_allocate_hotspare(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp)
+{
+	/* Allocate a hotspare for a mirror component */
+	md_alloc_hotsp_params_t allochsp_ioc;
+	md_mn_msg_allochsp_t    *d;
+	int			ret;
+
+	resp->mmr_out_size = 0;
+	resp->mmr_err_size = 0;
+	resp->mmr_out = NULL;
+	resp->mmr_err = NULL;
+	resp->mmr_comm_state = MDMNE_ACK;
+	d = (md_mn_msg_allochsp_t *)((void *)(msg->msg_event_data));
+
+	(void) memset(&allochsp_ioc, 0,
+	sizeof (md_alloc_hotsp_params_t));
+	MD_SETDRIVERNAME(&allochsp_ioc, MD_MIRROR,
+	    MD_MIN2SET(d->msg_allochsp_mnum));
+	allochsp_ioc.mnum = d->msg_allochsp_mnum;
+	allochsp_ioc.sm = d->msg_allochsp_sm;
+	allochsp_ioc.comp = d->msg_allochsp_comp;
+	allochsp_ioc.hs_id = d->msg_allochsp_hs_id;
+	ret = metaioctl(MD_MN_ALLOCATE_HOTSPARE, &allochsp_ioc,
+	    &(allochsp_ioc.mde), NULL);
+	resp->mmr_exitval = ret;
+}
+
+/*
+ * handler for MD_MN_MSG_RESYNC_STARTING,MD_MN_MSG_RESYNC_FIRST,
+ * MD_MN_MSG_RESYNC_NEXT, MD_MN_MSG_RESYNC_FINISH, MD_MN_MSG_RESYNC_PHASE_DONE
+ */
+/*ARGSUSED*/
+void
+mdmn_do_resync(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp)
+{
+	md_mn_msg_resync_t	*d;
+	md_mn_rs_params_t	respar;
+	int			ret;
+	int			smi;
+
+	resp->mmr_out_size = 0;
+	resp->mmr_err_size = 0;
+	resp->mmr_out = NULL;
+	resp->mmr_err = NULL;
+	resp->mmr_comm_state = MDMNE_ACK;
+	d = (md_mn_msg_resync_t *)((void *)(msg->msg_event_data));
+
+	(void) memset(&respar, 0, sizeof (respar));
+	MD_SETDRIVERNAME(&respar, MD_MIRROR,
+	    MD_MIN2SET(d->msg_resync_mnum))
+	respar.msg_type = (int)msg->msg_type;
+	respar.mnum = d->msg_resync_mnum;
+	respar.rs_type = d->msg_resync_type;
+	respar.rs_start = d->msg_resync_start;
+	respar.rs_size = d->msg_resync_rsize;
+	respar.rs_done = d->msg_resync_done;
+	respar.rs_2_do = d->msg_resync_2_do;
+	respar.rs_originator = d->msg_originator;
+	respar.rs_flags = d->msg_resync_flags;
+
+	for (smi = 0; smi < NMIRROR; smi++) {
+		respar.rs_sm_state[smi] = d->msg_sm_state[smi];
+		respar.rs_sm_flags[smi] = d->msg_sm_flags[smi];
+	}
+
+	ret = metaioctl(MD_MN_RESYNC, &respar, &respar.mde, NULL);
+
+	resp->mmr_exitval = ret;
+}
+
+/*
+ * handler for MD_MN_MSG_SETSYNC
+ */
+/*ARGSUSED*/
+void
+mdmn_do_setsync(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp)
+{
+	md_mn_msg_setsync_t	*d;
+	md_resync_ioctl_t	ri;
+	int			ret;
+
+	resp->mmr_out_size = 0;
+	resp->mmr_err_size = 0;
+	resp->mmr_out = NULL;
+	resp->mmr_err = NULL;
+	resp->mmr_comm_state = MDMNE_ACK;
+	d = (md_mn_msg_setsync_t *)((void *)(msg->msg_event_data));
+
+	(void) memset(&ri, 0, sizeof (ri));
+	MD_SETDRIVERNAME(&ri, MD_MIRROR, MD_MIN2SET(d->setsync_mnum))
+	ri.ri_mnum = d->setsync_mnum;
+	ri.ri_copysize = d->setsync_copysize;
+	ri.ri_flags = d->setsync_flags;
+
+	ret = metaioctl(MD_MN_SETSYNC, &ri, &ri.mde, NULL);
+
+	resp->mmr_exitval = ret;
+}
+
+/*
+ * handler for MD_MN_MSG_SET_CAP. As this handler can deal with both mirrors
+ * and soft partitions, the driver name that is required for the ioctl call
+ * is included in the message.
+ */
+/*ARGSUSED*/
+void
+mdmn_do_set_cap(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp)
+{
+	md_mn_msg_setcap_t	*d;
+	md_mn_setcap_params_t	setcap_ioc;
+	minor_t			mnum;
+	int			ret;
+
+	resp->mmr_out_size = 0;
+	resp->mmr_err_size = 0;
+	resp->mmr_out = NULL;
+	resp->mmr_err = NULL;
+	resp->mmr_comm_state = MDMNE_ACK;
+	d = (md_mn_msg_setcap_t *)((void *)(msg->msg_event_data));
+	mnum = d->msg_setcap_mnum;
+
+	(void) memset(&setcap_ioc, 0, sizeof (setcap_ioc));
+
+	MD_SETDRIVERNAME(&setcap_ioc, d->msg_setcap_driver, MD_MIN2SET(mnum));
+	setcap_ioc.mnum = mnum;
+	setcap_ioc.sc_set = d->msg_setcap_set;
+
+	ret = metaioctl(MD_MN_SET_CAP, &setcap_ioc, &setcap_ioc.mde, NULL);
+
+	resp->mmr_exitval = ret;
+}
+
+/*
+ * Dummy handler for various CLASS0 messages like
+ * MD_MN_MSG_VERBOSITY / MD_MN_MSG_RESUME / MD_MN_MSG_SUSPEND ...
+ */
+/*ARGSUSED*/
+void
+mdmn_do_dummy(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp)
+{
+	resp->mmr_out_size = 0;
+	resp->mmr_err_size = 0;
+	resp->mmr_out = NULL;
+	resp->mmr_err = NULL;
+	resp->mmr_exitval = 0;
+	resp->mmr_comm_state = MDMNE_ACK;
+}
+
+/*
+ * Overall description of mdcommd support that keeps all nodes in-sync
+ * with the ondisk diskset mddbs.
+ *
+ * All configuration changes to the mddb - addition/deletion of metadevices
+ * or replicas must use a CLASS1 message to block out these changes.
+ * Changes to the state of existing replicas do not need to block CLASS1
+ * since there is no conflict when just updating the state of a replica.
+ *
+ * Error encountered when master writes to mddbs:
+ *	As the master updates parts of the mddbs, flags are updated describing
+ *	what has been written.  When all locks are dropped (either in
+ *	mddb_setexit or mdioctl), a PARSE message will be generated to all
+ *	nodes with an index list of known good mddbs and the parse flags.
+ *	The master node ignore the parse message since it sent it.
+ *	The slave nodes re-read in the changed part of the mddb using the list
+ *	of known good replicas that was passed.
+ *	PARSE message does not block CLASS1.
+ *	The PARSE message must be the highest class message.  Since this
+ *	message could be sent on any ioctl, this PARSE message class must
+ *	be higher than any other class message that could issue an ioctl.
+ *
+ *	Master		Slave1		Slave2
+ * 	Handles_error
+ *	PARSE		PARSE		PARSE
+ *
+ *
+ * Add/Delete mddbs can occur from the following commands:
+ *	metadb -s set_name -a/-d
+ *	metaset -s set_name -a/-d disk
+ *	metaset -s set_name -b
+ *
+ *	The metadb/metaset command is run on the node executing the command
+ *	and sends an ATTACH/DETACH message to the master node blocking CLASS1
+ *	messages on all nodes until this message is finished.  The master
+ *	node generates 3 submessages of BLOCK, SM_ATTACH/SM_DETACH, UNBLOCK.
+ *	The BLOCK message is only run on the master node and will BLOCK
+ *	the PARSE messages from being sent to the nodes.
+ *	The SM_ATTACH/SM_DETACH message is run on all nodes and actually adds or
+ *	removes the replica(s) from the given disk slice.
+ *	The UNBLOCK message is only run on the master node and allows the
+ *	sending of PARSE messages.
+ *
+ *	Master		Slave1		Slave2
+ *			Add mddb cmd
+ *			ATTACH msg to master
+ *	BLOCK
+ *	ATTACH		ATTACH		ATTACH
+ *	UNBLOCK
+ *	PARSE		PARSE		PARSE
+ *	ATTACH msg finished
+ *
+ * Add/Delete host side information from the following commands:
+ *	metaset -s set_name -a/-d -h
+ *
+ *	The metaset command is run on the node executing the command and
+ *	sends a DB_NEWSIDE/DB_DELSIDE message and a MD_NEWSIDE/MD_DELSIDE
+ *	message whenever a host is added to or deleted from the diskset.
+ *
+ *	The side information contains the major name and minor number
+ *	associated with a disk slice from a certain node's perspective
+ *	in an (failed) effort to support clustered systems that don't have the
+ *	same device name for a physical device. (The original designers of
+ *	SVM eventually took the shortcut of assuming that all device names
+ *	are the same on all systems, but left the side information in the
+ *	mddb and namespace.)  The side information is used for disk slices
+ *	that contain mddbs and/or are components for metadevices.
+ *
+ *	The DB_NEWSIDE/DELSIDE command adds or deletes the side information
+ *	for each mddb for the host being added or deleted.
+ *	The MD_ADDSIDE/MD_DELSIDE command adds or deletes the side information
+ *	for all disk slice components that are in the namespace records for
+ *	the host being added or deleted.
+ *
+ *	The DB_NEWSIDE/DB_DELSIDE message does not change any mddb records
+ *	and only needs to be executed on the master node since the slave
+ *	nodes will be brought up to date by the PARSE message that is
+ *	generated as a result of a change to the mddb.
+ *	The MD_ADDSIDE/MD_DELSIDE message does modify the records in the mddb
+ *	and needs to be run on all nodes.  The message must block class1
+ *	messages so that record changing commands don't interfere.
+ *
+ *	Master		Slave1		Slave2
+ *			Add host
+ *			DB_NEWSIDE msg to master
+ *	DB_NEWSIDE
+ *	PARSE		PARSE		PARSE
+ *	DB_NEWSIDE msg finished
+ *			MD_NEWSIDE msg to master
+ *	MD_NEWSIDE	MD_NEWSIDE	MD_NEWSIDE
+ *	MD_NEWSIDE msg finished
+ *
+ *
+ * Optimized resync record failure:
+ *	When any node sees a failure to write an optimized resync record
+ *	that node notifies the master node of the replica that failed.
+ *	The master node handles the error and updates the rest of the
+ *	nodes using a PARSE message.  The PARSE message also calls
+ *	fixoptrecord on each slave node causing each node to fix up
+ * 	the optimized resync records that are owned by that node (the mirror
+ *	owner code also sets the optimized resync record owner).  The master
+ *	node will fix up all optimized resync records that have no owner or
+ *	are owned by the master node.
+ *
+ *	Master		Slave1		Slave2
+ *					Optimized Record Failure
+ *					OPTRECERR msg to master
+ *	Master handles opt rec failure
+ *	PARSE		PARSE		PARSE
+ *	OPTRECERR msg finished
+ *					Slave rewrites optimized record
+ *
+ */
+
+/*
+ * Handler for MD_MN_MSG_MDDB_PARSE which send parse messages to the
+ * slave nodes in order to keep the incore view of the mddbs the
+ * same on all nodes.
+ *
+ * Since master node generated the mddb parse message, do nothing
+ * if this is the master node.
+ *
+ * If this is a slave node, send the parse message down to the kernel
+ * where this node will re-read in parts of the mddbs.
+ *
+ */
+void
+mdmn_do_mddb_parse(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp)
+{
+	md_mn_msg_mddb_parse_t	*d;
+	mddb_parse_parm_t	mpp;
+	int			ret = 0;
+	int			i;
+
+	resp->mmr_out_size = 0;
+	resp->mmr_err_size = 0;
+	resp->mmr_out = NULL;
+	resp->mmr_err = NULL;
+	resp->mmr_comm_state = MDMNE_ACK;
+	d = (md_mn_msg_mddb_parse_t *)((void *)(msg->msg_event_data));
+
+	if (flags & MD_MSGF_ON_MASTER)
+		return;
+
+	(void) memset(&mpp, 0, sizeof (mpp));
+	mpp.c_setno = msg->msg_setno;
+	mpp.c_parse_flags = d->msg_parse_flags;
+	for (i = 0; i < MDDB_NLB; i++) {
+		mpp.c_lb_flags[i] = d->msg_lb_flags[i];
+	}
+	ret = metaioctl(MD_MN_MDDB_PARSE, &mpp, &mpp.c_mde, NULL);
+	if (ret)
+		(void) mdstealerror(&(resp->mmr_ep), &mpp.c_mde);
+
+	resp->mmr_exitval = ret;
+}
+
+/*
+ * Handler for MD_MN_MSG_MDDB_BLOCK which blocks the generation
+ * of parse messages from this node.
+ *
+ * This is needed when attaching/detaching mddbs on the master and the
+ * slave node is unable to handle a parse message until the slave node
+ * has done the attach/detach of the mddbs.  So, master node will block
+ * the parse messages, execute the attach/detach on all nodes and
+ * then unblock the parse messages which causes the parse message to
+ * be sent to all nodes.
+ */
+/*ARGSUSED*/
+void
+mdmn_do_mddb_block(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp)
+{
+	md_mn_msg_mddb_block_t	*d;
+	mddb_block_parm_t	mbp;
+	int			ret;
+
+	resp->mmr_out_size = 0;
+	resp->mmr_err_size = 0;
+	resp->mmr_out = NULL;
+	resp->mmr_err = NULL;
+	resp->mmr_comm_state = MDMNE_ACK;
+	d = (md_mn_msg_mddb_block_t *)((void *)(msg->msg_event_data));
+
+	(void) memset(&mbp, 0, sizeof (mbp));
+	mbp.c_setno = msg->msg_setno;
+	mbp.c_blk_flags = d->msg_block_flags;
+	ret = metaioctl(MD_MN_MDDB_BLOCK, &mbp, &mbp.c_mde, NULL);
+	if (ret)
+		(void) mdstealerror(&(resp->mmr_ep), &mbp.c_mde);
+
+	resp->mmr_exitval = ret;
+}
+
+/*
+ * Submessage generator for MD_MN_MSG_META_DB_ATTACH which generates
+ * a BLOCK message on the master node only, a MD_MN_MSG_SM_MDDB_ATTACH
+ * message on all nodes and then an UNBLOCK message on the master only.
+ */
+int
+mdmn_smgen_mddb_attach(md_mn_msg_t *msg, md_mn_msg_t *msglist[])
+{
+	md_mn_msg_t			*nmsg;
+	md_mn_msg_meta_db_attach_t	*d;
+	md_mn_msg_meta_db_attach_t	*attach_d;
+	md_mn_msg_mddb_block_t		*block_d;
+
+	d = (md_mn_msg_meta_db_attach_t *)(void *)msg->msg_event_data;
+
+	nmsg = Zalloc(sizeof (md_mn_msg_t));
+	MSGID_COPY(&(msg->msg_msgid), &(nmsg->msg_msgid));
+
+	nmsg->msg_flags		= (MD_MSGF_NO_LOG | MD_MSGF_NO_BCAST);
+	nmsg->msg_setno		= msg->msg_setno;
+	nmsg->msg_type		= MD_MN_MSG_MDDB_BLOCK;
+	nmsg->msg_event_size	= sizeof (md_mn_msg_mddb_block_t);
+	nmsg->msg_event_data	= Zalloc(sizeof (md_mn_msg_mddb_block_t));
+	block_d = (md_mn_msg_mddb_block_t *)(void *)nmsg->msg_event_data;
+	block_d->msg_block_flags = MDDB_BLOCK_PARSE;
+	msglist[0] = nmsg;
+
+	nmsg = Zalloc(sizeof (md_mn_msg_t));
+	MSGID_COPY(&(msg->msg_msgid), &(nmsg->msg_msgid));
+
+	/* Don't log submessages and panic on inconsistent results */
+	nmsg->msg_flags		= MD_MSGF_NO_LOG |
+				    MD_MSGF_PANIC_WHEN_INCONSISTENT;
+	nmsg->msg_setno		= msg->msg_setno;
+	nmsg->msg_type		= MD_MN_MSG_SM_MDDB_ATTACH;
+	nmsg->msg_event_size	= sizeof (md_mn_msg_meta_db_attach_t);
+	nmsg->msg_event_data	= Zalloc(sizeof (md_mn_msg_meta_db_attach_t));
+	attach_d = (md_mn_msg_meta_db_attach_t *)
+			(void *)nmsg->msg_event_data;
+	attach_d->msg_l_dev = d->msg_l_dev;
+	attach_d->msg_cnt = d->msg_cnt;
+	attach_d->msg_dbsize = d->msg_dbsize;
+	(void) strncpy(attach_d->msg_dname, d->msg_dname, 16);
+	attach_d->msg_splitname = d->msg_splitname;
+	attach_d->msg_options = d->msg_options;
+	msglist[1] = nmsg;
+
+	nmsg = Zalloc(sizeof (md_mn_msg_t));
+	MSGID_COPY(&(msg->msg_msgid), &(nmsg->msg_msgid));
+
+	nmsg->msg_flags		= (MD_MSGF_NO_LOG | MD_MSGF_NO_BCAST);
+	nmsg->msg_setno		= msg->msg_setno;
+	nmsg->msg_type		= MD_MN_MSG_MDDB_BLOCK;
+	nmsg->msg_event_size	= sizeof (md_mn_msg_mddb_block_t);
+	nmsg->msg_event_data	= Zalloc(sizeof (md_mn_msg_mddb_block_t));
+	block_d = (md_mn_msg_mddb_block_t *)(void *)nmsg->msg_event_data;
+	block_d->msg_block_flags = MDDB_UNBLOCK_PARSE;
+	msglist[2] = nmsg;
+
+	return (3); /* Return the number of submessages generated */
+}
+
+/*
+ * Submessage generator for MD_MN_MSG_META_DB_DETACH which generates
+ * a BLOCK message on the master node only, a MD_MN_MSG_SM_MDDB_DETACH
+ * message on all nodes and then an UNBLOCK message on the master only.
+ */
+int
+mdmn_smgen_mddb_detach(md_mn_msg_t *msg, md_mn_msg_t *msglist[])
+{
+	md_mn_msg_t			*nmsg;
+	md_mn_msg_meta_db_detach_t	*d;
+	md_mn_msg_meta_db_detach_t	*detach_d;
+	md_mn_msg_mddb_block_t		*block_d;
+
+	d = (md_mn_msg_meta_db_detach_t *)(void *)msg->msg_event_data;
+
+	nmsg = Zalloc(sizeof (md_mn_msg_t));
+	MSGID_COPY(&(msg->msg_msgid), &(nmsg->msg_msgid));
+
+	nmsg->msg_flags		= (MD_MSGF_NO_LOG | MD_MSGF_NO_BCAST);
+	nmsg->msg_setno		= msg->msg_setno;
+	nmsg->msg_type		= MD_MN_MSG_MDDB_BLOCK;
+	nmsg->msg_event_size	= sizeof (md_mn_msg_mddb_block_t);
+	nmsg->msg_event_data	= Zalloc(sizeof (md_mn_msg_mddb_block_t));
+	block_d = (md_mn_msg_mddb_block_t *)(void *)nmsg->msg_event_data;
+	block_d->msg_block_flags = MDDB_BLOCK_PARSE;
+	msglist[0] = nmsg;
+
+	nmsg = Zalloc(sizeof (md_mn_msg_t));
+	MSGID_COPY(&(msg->msg_msgid), &(nmsg->msg_msgid));
+
+	/* Don't log submessages and panic on inconsistent results */
+	nmsg->msg_flags		= MD_MSGF_NO_LOG |
+				    MD_MSGF_PANIC_WHEN_INCONSISTENT;
+	nmsg->msg_setno		= msg->msg_setno;
+	nmsg->msg_type		= MD_MN_MSG_SM_MDDB_DETACH;
+	nmsg->msg_event_size	= sizeof (md_mn_msg_meta_db_detach_t);
+	nmsg->msg_event_data	= Zalloc(sizeof (md_mn_msg_meta_db_detach_t));
+	detach_d = (md_mn_msg_meta_db_detach_t *)
+			(void *)nmsg->msg_event_data;
+	detach_d->msg_splitname = d->msg_splitname;
+	msglist[1] = nmsg;
+
+	nmsg = Zalloc(sizeof (md_mn_msg_t));
+	MSGID_COPY(&(msg->msg_msgid), &(nmsg->msg_msgid));
+
+	nmsg->msg_flags		= (MD_MSGF_NO_LOG | MD_MSGF_NO_BCAST);
+	nmsg->msg_setno		= msg->msg_setno;
+	nmsg->msg_type		= MD_MN_MSG_MDDB_BLOCK;
+	nmsg->msg_event_size	= sizeof (md_mn_msg_mddb_block_t);
+	nmsg->msg_event_data	= Zalloc(sizeof (md_mn_msg_mddb_block_t));
+	block_d = (md_mn_msg_mddb_block_t *)(void *)nmsg->msg_event_data;
+	block_d->msg_block_flags = MDDB_UNBLOCK_PARSE;
+	msglist[2] = nmsg;
+
+	return (3); /* Return the number of submessages generated */
+}
+
+/*
+ * Handler for MD_MN_MSG_SM_MDDB_ATTACH which is used to attach mddbs.
+ *
+ * Used when running:
+ *	metadb -s set_name -a
+ * 	metaset -s set_name -a/-d disk
+ *	metaset -s set_name -b
+ */
+/*ARGSUSED*/
+void
+mdmn_do_sm_mddb_attach(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp)
+{
+	md_mn_msg_meta_db_attach_t	*d;
+	struct mddb_config		c;
+	int				i;
+	int				ret = 0;
+	md_error_t			ep = mdnullerror;
+	char				*name, *add_name;
+	mdname_t			*np;
+	mdsetname_t			*sp;
+
+	resp->mmr_out_size = 0;
+	resp->mmr_err_size = 0;
+	resp->mmr_out = NULL;
+	resp->mmr_err = NULL;
+	resp->mmr_comm_state = MDMNE_ACK;
+	d = (md_mn_msg_meta_db_attach_t *)((void *)(msg->msg_event_data));
+
+	(void) memset(&c, 0, sizeof (c));
+	c.c_setno = msg->msg_setno;
+	c.c_locator.l_dev = meta_cmpldev(d->msg_l_dev);
+	(void) strncpy(c.c_locator.l_driver, d->msg_dname,
+		sizeof (c.c_locator.l_driver));
+	c.c_devname = d->msg_splitname;
+	c.c_locator.l_mnum = meta_getminor(d->msg_l_dev);
+	c.c_multi_node = 1;
+	if ((sp = metasetnosetname(c.c_setno, &ep)) == NULL) {
+		(void) mdstealerror(&(resp->mmr_ep), &ep);
+		resp->mmr_exitval = -1;
+		return;
+	}
+	(void) strcpy(c.c_setname, sp->setname);
+	c.c_sideno = getmyside(sp, &ep);
+	if (c.c_sideno == MD_SIDEWILD) {
+		(void) mdstealerror(&(resp->mmr_ep), &ep);
+		resp->mmr_exitval = -1;
+		return;
+	}
+
+	name = splicename(&d->msg_splitname);
+	if ((np = metaname(&sp, name, &ep)) == NULL) {
+		Free(name);
+		(void) mdstealerror(&(resp->mmr_ep), &ep);
+		resp->mmr_exitval = -1;
+		return;
+	}
+	/*
+	 * All nodes in MN diskset must do meta_check_replica
+	 * since this causes the shared namespace to be
+	 * populated by the md driver names while checking
+	 * to see if this device is already in use as a
+	 * metadevice.
+	 */
+	if (meta_check_replica(sp, np, d->msg_options, 0,
+	    (d->msg_cnt * d->msg_dbsize), &ep)) {
+		(void) mdstealerror(&(resp->mmr_ep), &ep);
+		resp->mmr_exitval = -1;
+		return;
+	}
+
+	for (i = 0; i < d->msg_cnt; i++) {
+		c.c_locator.l_blkno = i * d->msg_dbsize + 16;
+		if (setup_med_cfg(sp, &c,
+		    (d->msg_options & MDCHK_SET_FORCE), &ep)) {
+			ret = -1;
+			(void) mdstealerror(&(resp->mmr_ep), &ep);
+			break;
+		}
+		ret = metaioctl(MD_DB_NEWDEV, &c, &c.c_mde, NULL);
+		/* If newdev was successful, continue with attach */
+		if (ret == 0) {
+			if (meta_db_addsidenms(sp, np, c.c_locator.l_blkno,
+			    DB_ADDSIDENMS_NO_BCAST, &ep)) {
+				ret = -1;
+				(void) mdstealerror(&(resp->mmr_ep), &ep);
+				break;
+			}
+		} else {
+			(void) mdstealerror(&(resp->mmr_ep), &c.c_mde);
+			break;
+		}
+	}
+	add_name = splicename(&d->msg_splitname);
+	if ((np = metaname(&sp, add_name, &ep)) != NULL) {
+		meta_invalidate_name(np);
+	} else {
+		ret = -1;
+		(void) mdstealerror(&(resp->mmr_ep), &ep);
+	}
+	Free(add_name);
+
+	resp->mmr_exitval = ret;
+}
+
+/*
+ * Handler for MD_MN_MSG_SM_MDDB_DETACH which is used to detach mddbs.
+ *
+ * Used when running:
+ *	metadb -s set_name -d
+ * 	metaset -s set_name -a/-d disk
+ *	metaset -s set_name -b
+ */
+/*ARGSUSED*/
+void
+mdmn_do_sm_mddb_detach(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp)
+{
+	md_mn_msg_meta_db_detach_t	*d;
+	struct mddb_config		c;
+	int				i;
+	int				ret = 0;
+	md_error_t			ep = mdnullerror;
+	char				*name, *del_name;
+	mdname_t			*np;
+	mdsetname_t			*sp;
+
+	resp->mmr_out_size = 0;
+	resp->mmr_err_size = 0;
+	resp->mmr_out = NULL;
+	resp->mmr_err = NULL;
+	resp->mmr_comm_state = MDMNE_ACK;
+	d = (md_mn_msg_meta_db_detach_t *)((void *)(msg->msg_event_data));
+
+	if ((sp = metasetnosetname(msg->msg_setno, &ep)) == NULL) {
+		(void) mdstealerror(&(resp->mmr_ep), &ep);
+		resp->mmr_exitval = -1;
+		return;
+	}
+
+	(void) memset(&c, 0, sizeof (c));
+	c.c_setno = msg->msg_setno;
+	if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0) {
+		resp->mmr_exitval = -1;
+		(void) mdstealerror(&(resp->mmr_ep), &c.c_mde);
+		return;
+	}
+	i = 0;
+	del_name = splicename(&d->msg_splitname);
+	while (i < c.c_dbcnt) {
+		c.c_id = i;
+		if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0) {
+			ret = -1;
+			(void) mdstealerror(&(resp->mmr_ep), &c.c_mde);
+			break;
+		}
+		name = splicename(&c.c_devname);
+		if (strcmp(name, del_name) != 0) {
+			Free(name);
+			i++;
+			continue;
+		}
+		Free(name);
+		/* Found a match - delete mddb */
+		if (metaioctl(MD_DB_DELDEV, &c, &c.c_mde, NULL) != 0) {
+			ret = -1;
+			(void) mdstealerror(&(resp->mmr_ep), &c.c_mde);
+			break;
+		}
+		/* Not incrementing "i" intentionally (dbcnt is changed) */
+	}
+	if ((np = metaname(&sp, del_name, &ep)) != NULL) {
+		meta_invalidate_name(np);
+	} else {
+		ret = -1;
+		(void) mdstealerror(&(resp->mmr_ep), &ep);
+	}
+	Free(del_name);
+
+	resp->mmr_exitval = ret;
+}
+
+/*
+ * Handler for MD_MN_MSG_META_DB_NEWSIDE which is used to update the
+ * side information for each diskset mddb when a new host has been
+ * added to the diskset.  The side information is the /dev/dsk/ctds name
+ * that the new node would use to access each mddb.
+ *
+ * Since this routine makes no changes to the records in the diskset mddb,
+ * this routine only needs to be run on the master node.  The master node's
+ * kernel code will detect that portions of the mddb have changed and
+ * will send a parse message to all nodes to re-parse parts of the mddb.
+ *
+ * Used when running:
+ * 	metaset -s set_name -a -h new_hostname
+ */
+/*ARGSUSED*/
+void
+mdmn_do_meta_db_newside(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp)
+{
+	md_mn_msg_meta_db_newside_t	*d;
+	struct mddb_config		c;
+	int				ret = 0;
+	mdsetname_t			*sp;
+	md_error_t			ep = mdnullerror;
+
+	resp->mmr_out_size = 0;
+	resp->mmr_err_size = 0;
+	resp->mmr_out = NULL;
+	resp->mmr_err = NULL;
+	resp->mmr_comm_state = MDMNE_ACK;
+	d = (md_mn_msg_meta_db_newside_t *)((void *)(msg->msg_event_data));
+
+	(void) memset(&c, 0, sizeof (c));
+	c.c_setno = msg->msg_setno;
+	c.c_locator.l_dev = meta_cmpldev(d->msg_l_dev);
+	c.c_locator.l_blkno = d->msg_blkno;
+	(void) strncpy(c.c_locator.l_driver, d->msg_dname,
+		sizeof (c.c_locator.l_driver));
+	c.c_devname = d->msg_splitname;
+	c.c_locator.l_mnum = d->msg_mnum;
+	c.c_multi_node = 1;
+	if ((sp = metasetnosetname(c.c_setno, &ep)) == NULL) {
+		(void) mdstealerror(&(resp->mmr_ep), &ep);
+		resp->mmr_exitval = -1;
+		return;
+	}
+	(void) strcpy(c.c_setname, sp->setname);
+	c.c_sideno = d->msg_sideno;
+
+	if ((ret = metaioctl(MD_DB_NEWSIDE, &c, &c.c_mde, NULL)) != 0) {
+		(void) mdstealerror(&(resp->mmr_ep), &c.c_mde);
+	}
+	resp->mmr_exitval = ret;
+}
+
+/*
+ * Handler for MD_MN_MSG_META_DB_DELSIDE which is used to remove the
+ * side information for each diskset mddb when a host has been
+ * deleted from the diskset.  The side information is the /dev/dsk/ctds name
+ * that the node would use to access each mddb.
+ *
+ * Since this routine makes no changes to the records in the diskset mddb,
+ * this routine only needs to be run on the master node.  The master node's
+ * kernel code will detect that portions of the mddb have changed and
+ * will send a parse message to all nodes to re-parse parts of the mddb.
+ *
+ * Used when running:
+ * 	metaset -s set_name -d -h hostname
+ */
+/*ARGSUSED*/
+void
+mdmn_do_meta_db_delside(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp)
+{
+	md_mn_msg_meta_db_delside_t	*d;
+	mddb_config_t			c;
+	int				ret = 0;
+	mdsetname_t			*sp;
+	md_error_t			ep = mdnullerror;
+
+	resp->mmr_out_size = 0;
+	resp->mmr_err_size = 0;
+	resp->mmr_out = NULL;
+	resp->mmr_err = NULL;
+	resp->mmr_comm_state = MDMNE_ACK;
+	d = (md_mn_msg_meta_db_delside_t *)((void *)(msg->msg_event_data));
+
+	(void) memset(&c, 0, sizeof (c));
+	c.c_setno = msg->msg_setno;
+	c.c_locator.l_dev = meta_cmpldev(d->msg_l_dev);
+	c.c_locator.l_blkno = d->msg_blkno;
+	c.c_multi_node = 1;
+	if ((sp = metasetnosetname(c.c_setno, &ep)) == NULL) {
+		(void) mdstealerror(&(resp->mmr_ep), &ep);
+		resp->mmr_exitval = -1;
+		return;
+	}
+	(void) strcpy(c.c_setname, sp->setname);
+	c.c_sideno = d->msg_sideno;
+
+	if ((ret = metaioctl(MD_DB_DELSIDE, &c, &c.c_mde, NULL)) != 0) {
+		(void) mdstealerror(&(resp->mmr_ep), &c.c_mde);
+	}
+	resp->mmr_exitval = ret;
+}
+
+/*
+ * Handler for MD_MN_MSG_META_MD_ADDSIDE which is used to add the
+ * side information for each diskset metadevice component (if that
+ * component is a disk) when a host has been added to the diskset.
+ * The side information is the /dev/dsk/ctds name that the node would
+ * use to access the metadevice component.
+ *
+ * This routine makes changes to the mddb records and must be run
+ * on all nodes.
+ *
+ * Used when running:
+ * 	metaset -s set_name -a -h new_hostname
+ */
+/*ARGSUSED*/
+void
+mdmn_do_meta_md_addside(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp)
+{
+	md_mn_msg_meta_md_addside_t	*d;
+	mdnm_params_t			nm;
+	mdsetname_t			*sp;
+	char				*cname, *dname;
+	minor_t				mnum;
+	int				done, i;
+	md_error_t			ep = mdnullerror;
+
+	resp->mmr_out_size = 0;
+	resp->mmr_err_size = 0;
+	resp->mmr_out = NULL;
+	resp->mmr_err = NULL;
+	resp->mmr_comm_state = MDMNE_ACK;
+	d = (md_mn_msg_meta_md_addside_t *)((void *)(msg->msg_event_data));
+
+	(void) memset(&nm, 0, sizeof (nm));
+	if ((sp = metasetnosetname(msg->msg_setno, &ep)) == NULL) {
+		(void) mdstealerror(&(resp->mmr_ep), &ep);
+		resp->mmr_exitval = -1;
+		return;
+	}
+	/* While loop continues until IOCNXTKEY_NM gives nm.key of KEYWILD */
+	/*CONSTCOND*/
+	while (1) {
+		nm.mde = mdnullerror;
+		nm.setno = msg->msg_setno;
+		nm.side = d->msg_otherside;
+		if (metaioctl(MD_IOCNXTKEY_NM, &nm, &nm.mde, NULL) != 0) {
+			(void) mdstealerror(&(resp->mmr_ep), &nm.mde);
+			resp->mmr_exitval = -1;
+			return;
+		}
+
+		/* Normal exit path is to eventually get a KEYWILD */
+		if (nm.key == MD_KEYWILD) {
+			resp->mmr_exitval = 0;
+			return;
+		}
+
+		nm.devname = (uint64_t)meta_getnmbykey(msg->msg_setno,
+			d->msg_otherside, nm.key, &ep);
+		if (nm.devname == NULL) {
+			(void) mdstealerror(&(resp->mmr_ep), &ep);
+			resp->mmr_exitval = -1;
+			return;
+		}
+		nm.side = d->msg_sideno;
+		if ((done = meta_getside_devinfo(sp, (char *)nm.devname,
+		    d->msg_sideno, &cname, &dname, &mnum, &ep)) == -1) {
+			(void) mdstealerror(&(resp->mmr_ep), &ep);
+			Free((void *)nm.devname);
+			resp->mmr_exitval = -1;
+			return;
+		}
+		Free((void *)nm.devname);
+		if (done != 1) {
+			Free(cname);
+			Free(dname);
+			resp->mmr_exitval = -1;
+			return;
+		}
+
+		/*
+		 * The device reference count can be greater than 1 if
+		 * more than one softpart is configured on top of the
+		 * same device.  If this is the case then we want to
+		 * increment the count to sync up with the other sides.
+		 */
+		for (i = 0; i < nm.ref_count; i++) {
+			if (add_name(sp, d->msg_sideno, nm.key, dname, mnum,
+			    cname, &ep) == -1) {
+				(void) mdstealerror(&(resp->mmr_ep), &ep);
+				Free(cname);
+				Free(dname);
+				resp->mmr_exitval = -1;
+				return;
+			}
+		}
+		Free(cname);
+		Free(dname);
+	}
+
+	/*NOTREACHED*/
+}
+/*
+ * Handler for MD_MN_MSG_META_MD_DELSIDE which is used to delete the
+ * side information for each diskset metadevice component (if that
+ * component is a disk) when a host has been removed from the diskset.
+ * The side information is the /dev/dsk/ctds name that the node would
+ * use to access the metadevice component.
+ *
+ * This routine makes changes to the mddb records and must be run
+ * on all nodes.
+ *
+ * Used when running:
+ * 	metaset -s set_name -d -h hostname
+ */
+/*ARGSUSED*/
+void
+mdmn_do_meta_md_delside(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp)
+{
+	md_mn_msg_meta_md_delside_t	*d;
+	mdnm_params_t			nm;
+	mdsetname_t			*sp;
+	md_error_t			ep = mdnullerror;
+	int				i;
+
+	resp->mmr_out_size = 0;
+	resp->mmr_err_size = 0;
+	resp->mmr_out = NULL;
+	resp->mmr_err = NULL;
+	resp->mmr_comm_state = MDMNE_ACK;
+	d = (md_mn_msg_meta_md_delside_t *)((void *)(msg->msg_event_data));
+
+	if ((sp = metasetnosetname(msg->msg_setno, &ep)) == NULL) {
+		(void) mdstealerror(&(resp->mmr_ep), &ep);
+		resp->mmr_exitval = -1;
+		return;
+	}
+
+	(void) memset(&nm, 0, sizeof (nm));
+	nm.key = MD_KEYWILD;
+	/*CONSTCOND*/
+	while (1) {
+		nm.mde = mdnullerror;
+		nm.setno = msg->msg_setno;
+		nm.side = MD_SIDEWILD;
+		if (metaioctl(MD_IOCNXTKEY_NM, &nm, &nm.mde, NULL) != 0) {
+			(void) mdstealerror(&(resp->mmr_ep), &nm.mde);
+			resp->mmr_exitval = -1;
+			return;
+		}
+
+		/* Normal exit path is to eventually get a KEYWILD */
+		if (nm.key == MD_KEYWILD) {
+			resp->mmr_exitval = 0;
+			return;
+		}
+
+		/*
+		 * The device reference count can be greater than 1 if
+		 * more than one softpart is configured on top of the
+		 * same device.  If this is the case then we want to
+		 * decrement the count to zero so the entry can be
+		 * actually removed.
+		 */
+		for (i = 0; i < nm.ref_count; i++) {
+			if (del_name(sp, d->msg_sideno, nm.key, &ep) == -1) {
+				(void) mdstealerror(&(resp->mmr_ep), &ep);
+				resp->mmr_exitval = -1;
+				return;
+			}
+		}
+	}
+
+	/*NOTREACHED*/
+}
+
+/*
+ * Handler for MD_MN_MSG_MDDB_OPTRECERR which is used to notify
+ * the master node that a node has seen an error when attempting to
+ * write to the optimized resync records that reside on 2 of the diskset
+ * mddbs.  Master node will mark the failed replica in error and this
+ * will send a parse message to all nodes to re-read parts of the mddb
+ * and to fix their optimized resync records based on this information.
+ */
+/*ARGSUSED*/
+void
+mdmn_do_mddb_optrecerr(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp)
+{
+	md_mn_msg_mddb_optrecerr_t	*d;
+	mddb_optrec_parm_t		mop;
+	int				ret;
+	int				i;
+
+	resp->mmr_out_size = 0;
+	resp->mmr_err_size = 0;
+	resp->mmr_out = NULL;
+	resp->mmr_err = NULL;
+	resp->mmr_comm_state = MDMNE_ACK;
+	d = (md_mn_msg_mddb_optrecerr_t *)((void *)(msg->msg_event_data));
+
+	(void) memset(&mop, 0, sizeof (mop));
+	mop.c_setno = msg->msg_setno;
+	for (i = 0; i < 2; i++) {
+		mop.c_recerr[i] = d->msg_recerr[i];
+	}
+	ret = metaioctl(MD_MN_MDDB_OPTRECFIX, &mop, &mop.c_mde, NULL);
+	if (ret)
+		(void) mdstealerror(&(resp->mmr_ep), &mop.c_mde);
+
+	resp->mmr_exitval = ret;
+}
+
+int
+mdmn_smgen_test6(md_mn_msg_t *msg, md_mn_msg_t **msglist)
+{
+	md_mn_msg_t	*nmsg;
+
+	nmsg = Zalloc(sizeof (md_mn_msg_t));
+	MSGID_COPY(&(msg->msg_msgid), &(nmsg->msg_msgid));
+
+	nmsg->msg_flags		= MD_MSGF_NO_LOG; /* Don't log submessages */
+	nmsg->msg_setno		= msg->msg_setno;
+	nmsg->msg_type		= MD_MN_MSG_TEST2;
+	nmsg->msg_event_size	= sizeof ("test2");
+	nmsg->msg_event_data	= Strdup("test2");
+	msglist[0] = nmsg;
+
+	nmsg = Zalloc(sizeof (md_mn_msg_t));
+	MSGID_COPY(&(msg->msg_msgid), &(nmsg->msg_msgid));
+
+	nmsg->msg_flags		= MD_MSGF_NO_LOG; /* Don't log submessages */
+	nmsg->msg_setno		= msg->msg_setno;
+	nmsg->msg_type		= MD_MN_MSG_TEST2;
+	nmsg->msg_event_size	= sizeof ("test2");
+	nmsg->msg_event_data	= Strdup("test2");
+	msglist[1] = nmsg;
+
+	nmsg = Zalloc(sizeof (md_mn_msg_t));
+	MSGID_COPY(&(msg->msg_msgid), &(nmsg->msg_msgid));
+
+	nmsg->msg_flags		= MD_MSGF_NO_LOG; /* Don't log submessages */
+	nmsg->msg_setno		= msg->msg_setno;
+	nmsg->msg_type		= MD_MN_MSG_TEST3;
+	nmsg->msg_event_size	= sizeof ("test3");
+	nmsg->msg_event_data	= Strdup("test3");
+	msglist[2] = nmsg;
+
+	nmsg = Zalloc(sizeof (md_mn_msg_t));
+	MSGID_COPY(&(msg->msg_msgid), &(nmsg->msg_msgid));
+
+	nmsg->msg_flags		= MD_MSGF_NO_LOG; /* Don't log submessages */
+	nmsg->msg_setno		= msg->msg_setno;
+	nmsg->msg_type		= MD_MN_MSG_TEST4;
+	nmsg->msg_event_size	= sizeof ("test4");
+	nmsg->msg_event_data	= Strdup("test4");
+	msglist[3] = nmsg;
+
+	return (4); /* Return the number of submessages generated */
+}
+
+/*
+ * This is to send an MD_IOCSET ioctl to all nodes to create a soft
+ * partition.
+ */
+/*ARGSUSED*/
+void
+mdmn_do_iocset(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp)
+{
+	md_mn_msg_iocset_t	*d;
+	int			ret;
+	set_t			setno;
+	mdsetname_t		*sp;
+	mdname_t		*np;
+	md_error_t		mde = mdnullerror;
+
+	resp->mmr_comm_state = MDMNE_ACK; /* Ok state */;
+	resp->mmr_out_size = 0;
+	resp->mmr_err_size = 0;
+	resp->mmr_out = NULL;
+	resp->mmr_err = NULL;
+	d = (md_mn_msg_iocset_t *)(void *)msg->msg_event_data;
+
+	setno = MD_MIN2SET(d->iocset_params.mnum);
+	if ((sp = metasetnosetname(setno, &mde)) == NULL) {
+		syslog(LOG_ERR, dgettext(TEXT_DOMAIN,
+		    "MD_MN_MSG_IOCSET: Invalid setno %d\n"), setno);
+		resp->mmr_exitval = 1;
+		return;
+	}
+
+	if ((np = metamnumname(&sp, d->iocset_params.mnum, 1, &mde)) == NULL) {
+		syslog(LOG_ERR, dgettext(TEXT_DOMAIN,
+		    "MD_MN_MSG_IOCSET: Invalid mnum %d\n"),
+		    d->iocset_params.mnum);
+		resp->mmr_exitval = 1;
+		return;
+	}
+
+	if (meta_init_make_device(&sp, np->cname, &mde) == -1) {
+		syslog(LOG_ERR, dgettext(TEXT_DOMAIN,
+		    "MD_MN_MSG_IOCSET: Invalid metadevice name %s\n"),
+		    np->cname);
+		resp->mmr_exitval = 1;
+		return;
+	}
+
+	d->iocset_params.mdp = (uint64_t)&d->unit; /* set pointer to unit */
+	ret = metaioctl(MD_IOCSET, &(d->iocset_params), &mde, np->cname);
+	resp->mmr_exitval = ret;
+}
+
+/*
+ * This is to update the status of a softpart
+ */
+/*ARGSUSED*/
+void
+mdmn_do_sp_setstat(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp)
+{
+	md_mn_msg_sp_setstat_t	*d;
+	int			ret;
+	set_t			setno;
+	mdsetname_t		*sp;
+	minor_t			mnum;
+	md_error_t		mde = mdnullerror;
+
+	resp->mmr_comm_state = MDMNE_ACK; /* Ok state */;
+	resp->mmr_out_size = 0;
+	resp->mmr_err_size = 0;
+	resp->mmr_out = NULL;
+	resp->mmr_err = NULL;
+	d = (md_mn_msg_sp_setstat_t *)(void *)msg->msg_event_data;
+
+	mnum = d->sp_setstat_mnum;
+	setno = MD_MIN2SET(mnum);
+	if ((sp = metasetnosetname(setno, &mde)) == NULL) {
+		syslog(LOG_ERR, dgettext(TEXT_DOMAIN,
+		    "MD_MN_MSG_IOCSET: Invalid setno %d\n"), setno);
+		resp->mmr_exitval = 1;
+		return;
+	}
+
+	ret = meta_sp_setstatus(sp, &mnum, 1, d->sp_setstat_status, &mde);
+	resp->mmr_exitval = ret;
+}
+
+/*
+ * This is to add a key to the namespace
+ */
+/*ARGSUSED*/
+void
+mdmn_do_addkeyname(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp)
+{
+	md_mn_msg_addkeyname_t	*d;
+	int			ret;
+	set_t			setno;
+	mdsetname_t		*sp;
+	md_error_t		mde = mdnullerror;
+	mdname_t		*compnp;
+
+	resp->mmr_comm_state = MDMNE_ACK; /* Ok state */;
+	resp->mmr_out_size = 0;
+	resp->mmr_err_size = 0;
+	resp->mmr_out = NULL;
+	resp->mmr_err = NULL;
+	d = (md_mn_msg_addkeyname_t *)(void *)msg->msg_event_data;
+
+	setno = d->addkeyname_setno;
+	if ((sp = metasetnosetname(setno, &mde)) == NULL) {
+		syslog(LOG_ERR, dgettext(TEXT_DOMAIN,
+		    "MD_MN_ADDKEYNAME: Invalid setno %d\n"), setno);
+		resp->mmr_exitval = -1;
+		return;
+	}
+
+	compnp = metaname(&sp, d->addkeyname_name, &mde);
+	if (compnp != NULL) {
+		ret = add_key_name(sp, compnp, NULL, &mde);
+		if (ret < 0)
+			resp->mmr_exitval = -1;
+		else
+			resp->mmr_exitval = compnp->key;
+	} else {
+		resp->mmr_exitval = -1;
+	}
+}
+
+/*
+ * This is to delete a key from the namespace
+ */
+/*ARGSUSED*/
+void
+mdmn_do_delkeyname(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp)
+{
+	md_mn_msg_delkeyname_t	*d;
+	int			ret;
+	set_t			setno;
+	mdsetname_t		*sp;
+	md_error_t		mde = mdnullerror;
+	mdname_t		*compnp;
+
+	resp->mmr_comm_state = MDMNE_ACK; /* Ok state */;
+	resp->mmr_out_size = 0;
+	resp->mmr_err_size = 0;
+	resp->mmr_out = NULL;
+	resp->mmr_err = NULL;
+	d = (md_mn_msg_delkeyname_t *)(void *)msg->msg_event_data;
+
+	setno = d->delkeyname_setno;
+	if ((sp = metasetnosetname(setno, &mde)) == NULL) {
+		syslog(LOG_ERR, dgettext(TEXT_DOMAIN,
+		    "MD_MN_DELKEYNAME: Invalid setno %d\n"), setno);
+		resp->mmr_exitval = -1;
+		return;
+	}
+
+	compnp = metadevname(&sp, d->delkeyname_dev, &mde);
+	if (compnp != NULL) {
+		/*
+		 * Reset the key value for the name. This is required because
+		 * any previous call of del_key_name for the same component
+		 * will have resulted in the key value being reset to MD_KEYBAD
+		 * even though there may still be references to this component.
+		 */
+		compnp->key = d->delkeyname_key;
+		ret = del_key_name(sp, compnp, &mde);
+		resp->mmr_exitval = ret;
+	} else {
+		resp->mmr_exitval = -1;
+	}
+}
+
+/*
+ * This is to get the value of tstate from the master node. We use this
+ * to get the ABR state of a metadevice from the master.
+ */
+/*ARGSUSED*/
+void
+mdmn_do_get_tstate(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp)
+{
+	md_mn_msg_gettstate_t	*d;
+	int			ret;
+	uint_t			tstate;
+	md_error_t		mde = mdnullerror;
+
+	resp->mmr_comm_state = MDMNE_ACK; /* Ok state */;
+	resp->mmr_out_size = 0;
+	resp->mmr_err_size = 0;
+	resp->mmr_out = NULL;
+	resp->mmr_err = NULL;
+	d = (md_mn_msg_gettstate_t *)(void *)msg->msg_event_data;
+
+	ret = meta_get_tstate(d->gettstate_dev, &tstate, &mde);
+	if (ret != 0) {
+		syslog(LOG_ERR, dgettext(TEXT_DOMAIN,
+		    "MD_MN_GET_TSTATE: Invalid dev %llx\n"), d->gettstate_dev);
+		tstate = 0;
+	}
+	resp->mmr_exitval = tstate;
+}
+
+/*
+ * This is to get the mirror ABR state and the state of its submirrors from
+ * the master node. We need this to ensure consistent output from metastat
+ * when a new node joins the cluster during a resync. Without this the
+ * submirror status will be incorrect until the whole resync is complete which
+ * may take days for very large metadevices.
+ */
+/*ARGSUSED*/
+void
+mdmn_do_get_mirstate(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp)
+{
+	md_mn_msg_mir_state_t		*d;
+	md_mn_msg_mir_state_res_t	*res;		/* Results */
+	set_t				setno;
+	mdsetname_t			*sp;		/* Set name */
+	mdname_t			*mirnp;		/* Mirror name */
+	md_error_t			mde = mdnullerror;
+	mm_unit_t			*mm;		/* Mirror */
+	int				smi;
+	uint_t				tstate;
+
+	resp->mmr_comm_state = MDMNE_ACK;
+	resp->mmr_out_size = sizeof (md_mn_msg_mir_state_res_t);
+	resp->mmr_err_size = 0;
+	resp->mmr_out = Malloc(resp->mmr_out_size);
+	resp->mmr_err = NULL;
+	d = (md_mn_msg_mir_state_t *)(void *)msg->msg_event_data;
+	res = (md_mn_msg_mir_state_res_t *)(void *)resp->mmr_out;
+
+	/* Validate set information from minor number */
+	setno = MD_MIN2SET(d->mir_state_mnum);
+	sp = metasetnosetname(setno, &mde);
+	if (sp == NULL) {
+		syslog(LOG_ERR, dgettext(TEXT_DOMAIN,
+		    "MD_MN_GET_MIRROR_STATE: Invalid set %d\n"), setno);
+		resp->mmr_exitval = 1;	/* Failure */
+		Free(resp->mmr_out);
+		resp->mmr_out_size = 0;
+		return;
+	}
+
+	/* Construct mirror name from minor number */
+	mirnp = metamnumname(&sp, d->mir_state_mnum, 0, &mde);
+	if (mirnp == NULL) {
+		syslog(LOG_ERR, dgettext(TEXT_DOMAIN,
+		    "MD_MN_GET_MIRROR_STATE: Invalid minor %lx\n"),
+		    d->mir_state_mnum);
+		resp->mmr_exitval = 2;	/* Failure */
+		Free(resp->mmr_out);
+		resp->mmr_out_size = 0;
+		return;
+	}
+
+	/* Get common mirror structure */
+	mm = (mm_unit_t *)meta_get_mdunit(sp, mirnp, &mde);
+	if (mm == NULL) {
+		syslog(LOG_ERR, dgettext(TEXT_DOMAIN,
+		    "MD_MN_GET_MIRROR_STATE: Invalid mirror minor %x\n"),
+		    d->mir_state_mnum);
+		resp->mmr_exitval = 3;	/* Failure */
+		Free(resp->mmr_out);
+		resp->mmr_out_size = 0;
+		return;
+	}
+
+	if (meta_get_tstate(d->mir_state_mnum, &tstate, &mde) != 0) {
+		syslog(LOG_ERR, dgettext(TEXT_DOMAIN,
+		    "MD_MN_GET_MIRROR_STATE: Invalid minor %lx\n"),
+		    d->mir_state_mnum);
+		resp->mmr_exitval = 4;	/* Failure */
+		Free(resp->mmr_out);
+		resp->mmr_out_size = 0;
+		return;
+	}
+	/*
+	 * Fill in the sm_state/sm_flags value in the results structure which
+	 * gets passed back to the message originator
+	 */
+	resp->mmr_exitval = 0;
+	for (smi = 0; (smi < NMIRROR); smi++) {
+		mm_submirror_t *mmsp = &mm->un_sm[smi];
+		res->sm_state[smi] = mmsp->sm_state;
+		res->sm_flags[smi] = mmsp->sm_flags;
+	}
+	/* Returm value of tstate for mirror */
+	res->mir_tstate = tstate;
+}
+
+/*
+ * This is to issue an ioctl to call poke_hotspares
+ */
+/*ARGSUSED*/
+void
+mdmn_do_poke_hotspares(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp)
+{
+
+	md_mn_poke_hotspares_t	pokehsp;
+	md_mn_msg_pokehsp_t	*d;
+
+	resp->mmr_out_size = 0;
+	resp->mmr_err_size = 0;
+	resp->mmr_out = NULL;
+	resp->mmr_err = NULL;
+	resp->mmr_comm_state = MDMNE_ACK;
+	d = (md_mn_msg_pokehsp_t *)(void *)msg->msg_event_data;
+
+	(void) memset(&pokehsp, 0, sizeof (pokehsp));
+	MD_SETDRIVERNAME(&pokehsp, MD_MIRROR, d->pokehsp_setno);
+
+	resp->mmr_exitval = metaioctl(MD_MN_POKE_HOTSPARES, &pokehsp,
+	    &pokehsp.mde, NULL);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_mn_msg_table.c b/usr/src/lib/lvm/libmeta/common/meta_mn_msg_table.c
new file mode 100644
index 0000000000..a6ba008376
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_mn_msg_table.c
@@ -0,0 +1,690 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include <meta.h>
+
+extern void mdmn_do_cmd(HANDLER_PARMS);
+extern void mdmn_do_clu(HANDLER_PARMS);
+extern void mdmn_do_req_owner(HANDLER_PARMS);
+extern void mdmn_do_susp_write(HANDLER_PARMS);
+extern void mdmn_do_state_upd_reswr(HANDLER_PARMS);
+extern void mdmn_do_allocate_hotspare(HANDLER_PARMS);
+extern void mdmn_do_poke_hotspares(HANDLER_PARMS);
+extern void mdmn_do_resync(HANDLER_PARMS);
+extern void mdmn_do_setsync(HANDLER_PARMS);
+extern void mdmn_do_choose_owner(HANDLER_PARMS);
+extern void mdmn_do_change_owner(HANDLER_PARMS);
+extern void mdmn_do_set_cap(HANDLER_PARMS);
+extern void mdmn_do_dummy(HANDLER_PARMS);
+extern void mdmn_do_mddb_parse(HANDLER_PARMS);
+extern void mdmn_do_mddb_block(HANDLER_PARMS);
+extern void mdmn_do_sm_mddb_attach(HANDLER_PARMS);
+extern void mdmn_do_sm_mddb_detach(HANDLER_PARMS);
+extern void mdmn_do_meta_db_newside(HANDLER_PARMS);
+extern void mdmn_do_meta_db_delside(HANDLER_PARMS);
+extern void mdmn_do_meta_md_addside(HANDLER_PARMS);
+extern void mdmn_do_meta_md_delside(HANDLER_PARMS);
+extern void mdmn_do_mddb_optrecerr(HANDLER_PARMS);
+extern void mdmn_do_iocset(HANDLER_PARMS);
+extern void mdmn_do_sp_setstat(HANDLER_PARMS);
+extern void mdmn_do_addkeyname(HANDLER_PARMS);
+extern void mdmn_do_delkeyname(HANDLER_PARMS);
+extern void mdmn_do_get_tstate(HANDLER_PARMS);
+extern void mdmn_do_get_mirstate(HANDLER_PARMS);
+
+extern int mdmn_smgen_test6(SMGEN_PARMS);
+extern int mdmn_smgen_state_upd(SMGEN_PARMS);
+extern int mdmn_smgen_mddb_attach(SMGEN_PARMS);
+extern int mdmn_smgen_mddb_detach(SMGEN_PARMS);
+
+md_mn_msg_tbl_entry_t  msg_table[MD_MN_NMESSAGES] = {
+
+/*
+ * In order to have fast direct access to the table, we use the message type as
+ * an index into it.
+ * Thus the order of the elements in this table MUST match the order of the
+ * message types specified in mdmn_commd.x!
+ * See the definition of md_mn_msg_t.
+ *
+ * Be careful and do not disturb the order of the messages!
+ */
+	{
+	/* MD_MN_MSG_NULL */
+		MD_MSG_CLASS0,	/* message class */
+		NULL, 		/* message handler */
+		NULL, 		/* submessage generator */
+		1,		/* timeout in seconds */
+		0, 0, 		/* class busy retry / time delta */
+		0, 0		/* comm fail retry / time delta */
+	},
+
+	{
+	/* MD_MN_MSG_TEST1 */
+		MD_MSG_CLASS1,	/* message class */
+		mdmn_do_dummy, 	/* message handler */
+		NULL, 		/* submessage generator */
+		1,		/* timeout in seconds */
+		200, 4,		/* class busy retry / time delta */
+		10, 100		/* comm fail retry / time delta */
+	},
+
+	{
+	/* MD_MN_MSG_TEST2 */
+		MD_MSG_CLASS2,	/* message class */
+		mdmn_do_dummy, 	/* message handler */
+		NULL, 		/* submessage generator */
+		1,		/* timeout in seconds */
+		200, 4,		/* class busy retry / time delta */
+		10, 100		/* comm fail retry / time delta */
+	},
+
+	{
+	/* MD_MN_MSG_TEST3 */
+		MD_MSG_CLASS3,	/* message class */
+		mdmn_do_dummy, 	/* message handler */
+		NULL, 		/* submessage generator */
+		1,		/* timeout in seconds */
+		200, 4,		/* class busy retry / time delta */
+		10, 100		/* comm fail retry / time delta */
+	},
+
+	{
+	/* MD_MN_MSG_TEST4 */
+		MD_MSG_CLASS4,	/* message class */
+		mdmn_do_dummy, 	/* message handler */
+		NULL, 		/* submessage generator */
+		1,		/* timeout in seconds */
+		200, 4,		/* class busy retry / time delta */
+		10, 100		/* comm fail retry / time delta */
+	},
+
+	{
+	/* MD_MN_MSG_TEST5 */
+		MD_MSG_CLASS5,	/* message class */
+		mdmn_do_dummy, 	/* message handler */
+		NULL, 		/* submessage generator */
+		4,		/* timeout in seconds */
+		200, 4, 	/* class busy retry / time delta */
+		10, 100		/* comm fail retry / time delta */
+	},
+
+	{
+	/* MD_MN_MSG_TEST6 */
+		MD_MSG_CLASS1,	/* message class */
+		NULL,		/* message handler */
+		mdmn_smgen_test6, /* submessage generator */
+		1,		/* timeout in seconds */
+		200, 4, 	/* class busy retry / time delta */
+		10, 100		/* comm fail retry / time delta */
+	},
+
+	{
+	/*
+	 * MD_MN_MSG_CMD
+	 * Send a command string to all nodes
+	 */
+		MD_MSG_CLASS1,	/* message class */
+		mdmn_do_cmd, 	/* message handler */
+		NULL, 		/* submessage generator */
+		90,		/* times out in 90 secs */
+		40, 20,		/* class busy retry / time delta */
+		10, 1000	/* comm fail retry / time delta */
+	},
+
+	{
+	/*
+	 * MD_MN_MSG_CMD_RETRY
+	 * Send a command string to all nodes and retry on busy
+	 */
+		MD_MSG_CLASS1,	/* message class */
+		mdmn_do_cmd, 	/* message handler */
+		NULL, 		/* submessage generator */
+		90,		/* times out in 90 secs */
+		100000, 20, 	/* class busy retry / time delta */
+		10, 1000	/* comm fail retry / time delta */
+	},
+
+	{
+	/* MD_MN_MSG_CLU_CHECK */
+		MD_MSG_CLASS2,	/* message class */
+		mdmn_do_clu, 	/* message handler */
+		NULL, 		/* submessage generator */
+		5,		/* timeout in seconds */
+		10000, 2, 	/* class busy retry / time delta */
+		0, 0		/* comm fail retry / time delta */
+	},
+
+	{
+	/* MD_MN_MSG_CLU_LOCK */
+		MD_MSG_CLASS2,	/* message class */
+		mdmn_do_clu, 	/* message handler */
+		NULL, 		/* submessage generator */
+		1,		/* timeout in seconds */
+		10000, 2, 	/* class busy retry / time delta */
+		0, 0		/* comm fail retry / time delta */
+	},
+
+	{
+	/* MD_MN_MSG_CLU_UNLOCK */
+		MD_MSG_CLASS2,	/* message class */
+		mdmn_do_clu, 	/* message handler */
+		NULL, 		/* submessage generator */
+		1,		/* timeout in seconds */
+		10000, 2,	/* class busy retry / time delta */
+		0, 0		/* comm fail retry / time delta */
+	},
+
+	{
+	/* MD_MN_MSG_REQUIRE_OWNER */
+		MD_MSG_CLASS5,	/* message class */
+		mdmn_do_req_owner, /* message handler */
+		NULL, 		/* submessage generator */
+		12,		/* timeout in seconds */
+		UINT_MAX, 10,	/* class busy retry / time delta */
+		UINT_MAX, 100	/* comm fail retry / time delta */
+	},
+
+	{
+	/*
+	 * MD_MN_MSG_CHOOSE_OWNER
+	 * Using the current resync count for the set, choose a resync
+	 * owner and send a CHANGE_OWNER message to request that node
+	 * to make itself the owner
+	 */
+		MD_MSG_CLASS3,	/* message class */
+		mdmn_do_choose_owner, /* message handler */
+		NULL, 		/* submessage generator */
+		12,		/* timeout in seconds */
+		UINT_MAX, 10,	/* class busy retry / time delta */
+		UINT_MAX, 100	/* comm fail retry / time delta */
+	},
+
+	{
+	/*
+	 * MD_MN_MSG_CHANGE_OWNER
+	 * Request a change of ownership to the specified node
+	 */
+		MD_MSG_CLASS4,	/* message class */
+		mdmn_do_change_owner, /* message handler */
+		NULL, 		/* submessage generator */
+		12,		/* timeout in seconds */
+		UINT_MAX, 10,	/* class busy retry / time delta */
+		UINT_MAX, 100	/* comm fail retry / time delta */
+	},
+
+	{
+	/*
+	 * MD_MN_MSG_SUSPEND_WRITES
+	 * Suspend all writes to the specified mirror
+	 */
+		MD_MSG_CLASS6,	/* message class */
+		mdmn_do_susp_write, /* message handler */
+		NULL, 		/* submessage generator */
+		8,		/* timeout in seconds */
+		UINT_MAX, 10,	/* class busy retry / time delta */
+		200, 100	/* comm fail retry / time delta */
+	},
+
+	{
+	/*
+	 * MD_MN_MSG_STATE_UPDATE_RESWR
+	 * Update the state of a mirror component
+	 */
+		MD_MSG_CLASS1,	/* message class */
+		mdmn_do_state_upd_reswr, /* message handler */
+		NULL, 		/* submessage generator */
+		8,		/* timeout in seconds */
+		UINT_MAX, 10,	/* class busy retry / time delta */
+		UINT_MAX, 100	/* comm fail retry / time delta */
+	},
+
+	{
+	/*
+	 * MD_MN_MSG_STATE_UPDATE
+	 * Suspend writes to a mirror and then update the state of a
+	 * mirror component
+	 */
+		MD_MSG_CLASS1,	/* message class */
+		NULL,		 /* message handler */
+		mdmn_smgen_state_upd,	/* submessage generator */
+		16,		/* SUSPEND_WRITES + STATE_UPDATE_RESWR */
+		UINT_MAX, 10,	/* class busy retry / time delta */
+		UINT_MAX, 100	/* comm fail retry / time delta */
+	},
+
+	{
+	/*
+	 * MD_MN_MSG_ALLOCATE_HOTSPARE
+	 * Allocate a hotspare for a mirror component
+	 */
+		MD_MSG_CLASS1,	/* message class */
+		mdmn_do_allocate_hotspare, /* message handler */
+		NULL, 		/* submessage generator */
+		8,		/* timeout in seconds */
+		UINT_MAX, 10,	/* class busy retry / time delta */
+		UINT_MAX, 100	/* comm fail retry / time delta */
+	},
+
+	{
+	/*
+	 * MD_MN_MSG_RESYNC_STARTING
+	 * Start a resync thread for the specified mirror
+	 */
+		MD_MSG_CLASS2,	/* message class */
+		mdmn_do_resync, /* message handler */
+		NULL, 		/* submessage generator */
+		8,		/* timeout in seconds */
+		UINT_MAX, 10,	/* class busy retry / time delta */
+		UINT_MAX, 100	/* comm fail retry / time delta */
+	},
+
+	{
+	/*
+	 * MD_MN_MSG_RESYNC_NEXT
+	 * Send the next region to be resyned to all nodes. For ABR
+	 * mirrors, the nodes must suspend all writes to this region until
+	 * the next message of this type or a RESYNC_FINISH
+	 */
+		MD_MSG_CLASS2,	/* message class */
+		mdmn_do_resync, /* message handler */
+		NULL, 		/* submessage generator */
+		8,		/* timeout in seconds */
+		UINT_MAX, 10,	/* class busy retry / time delta */
+		UINT_MAX, 100	/* comm fail retry / time delta */
+	},
+
+	{
+	/*
+	 * MD_MN_MSG_RESYNC_FINISH
+	 * All resyncs for a mirror are complete, terminate resync thread
+	 */
+		MD_MSG_CLASS1,	/* message class */
+		mdmn_do_resync, /* message handler */
+		NULL, 		/* submessage generator */
+		8,		/* timeout in seconds */
+		UINT_MAX, 10,	/* class busy retry / time delta */
+		UINT_MAX, 100	/* comm fail retry / time delta */
+	},
+
+	{
+	/*
+	 * MD_MN_MSG_RESYNC_PHASE_DONE
+	 * A resync phase, optimized, submirror or component is complete
+	 */
+		MD_MSG_CLASS2,	/* message class */
+		mdmn_do_resync, /* message handler */
+		NULL, 		/* submessage generator */
+		8,		/* timeout in seconds */
+		UINT_MAX, 10,	/* class busy retry / time delta */
+		UINT_MAX, 100	/* comm fail retry / time delta */
+	},
+
+	{
+	/*
+	 * MD_MN_MSG_SET_CAP
+	 * Set the specified metadevice capability on all nodes
+	 * This is used to propagate the ABR capability
+	 */
+		MD_MSG_CLASS1,	/* message class */
+		mdmn_do_set_cap, /* message handler */
+		NULL,		/* submessage generator */
+		8,		/* timeout in seconds */
+		100000, 10,	/* class busy retry/ time delta */
+		200, 100	/* comm fail retry / time delta */
+	},
+
+	{
+	/* MD_MN_MSG_VERBOSITY */
+		MD_MSG_CLASS0,	/* special message class */
+		mdmn_do_dummy,	/* dummy handler */
+		NULL, 		/* submessage generator */
+		1,		/* timeout in seconds */
+		0, 0,		/* No retries for class busy */
+		0, 0		/* No retries for comm fail */
+	},
+
+	{
+	/*
+	 * MD_MN_MSG_MDDB_PARSE
+	 * Message cannot fail unless node failure causes node panic
+	 */
+		MD_MSG_CLASS7,		/* message class */
+		mdmn_do_mddb_parse,	/* reparse mddb */
+		NULL, 			/* submessage generator */
+		10,			/* timeout in seconds */
+		UINT_MAX, 2,		/* class busy retry / time delta */
+		UINT_MAX, 100		/* comm fail retry / time delta */
+	},
+
+	{
+	/*
+	 * MD_MN_MSG_MDDB_BLOCK
+	 * Message cannot fail unless node failure causes node panic
+	 */
+		MD_MSG_CLASS3,		/* message class */
+		mdmn_do_mddb_block,	/* block/unblock reparse */
+		NULL, 			/* submessage generator */
+		5,			/* timeout in seconds */
+		UINT_MAX, 2,		/* class busy retry / time delta */
+		UINT_MAX, 100		/* comm fail retry / time delta */
+	},
+
+	{
+	/*
+	 * MD_MN_MSG_META_DB_ATTACH
+	 */
+		MD_MSG_CLASS3,		/* message class */
+		NULL,			/* message handler */
+		mdmn_smgen_mddb_attach,	/* submessage generator */
+		30,			/* timeout in seconds */
+		UINT_MAX, 2,		/* class busy retry / time delta */
+		10, 100			/* comm fail retry / time delta */
+	},
+
+	{
+	/*
+	 * MD_MN_MSG_SM_MDDB_ATTACH
+	 */
+		MD_MSG_CLASS3,		/* message class */
+		mdmn_do_sm_mddb_attach,	/* message handler */
+		NULL,			/* submessage generator */
+		20,			/* timeout in seconds */
+					/* creates mddbs */
+		UINT_MAX, 2,		/* class busy retry / time delta */
+		10, 100			/* comm fail retry / time delta */
+	},
+
+	{
+	/*
+	 * MD_MN_MSG_META_DB_DETACH
+	 */
+		MD_MSG_CLASS3,		/* message class */
+		NULL,			/* detach mddb */
+		mdmn_smgen_mddb_detach,	/* submessage generator */
+		10,			/* timeout in seconds */
+		UINT_MAX, 2,		/* class busy retry / time delta */
+		10, 100			/* comm fail retry / time delta */
+	},
+	{
+
+	/*
+	 * MD_MN_MSG_SM_MDDB_DETACH
+	 */
+		MD_MSG_CLASS3,		/* message class */
+		mdmn_do_sm_mddb_detach,	/* detach mddb */
+		NULL,			/* submessage generator */
+		5,			/* timeout in seconds */
+		UINT_MAX, 2,		/* class busy retry / time delta */
+		10, 100			/* comm fail retry / time delta */
+	},
+
+	{
+	/*
+	 * MD_MN_MSG_META_DB_NEWSIDE
+	 */
+		MD_MSG_CLASS3,		/* message class */
+		mdmn_do_meta_db_newside, /* add new mddb side info */
+		NULL, 			/* submessage generator */
+		10,			/* timeout in seconds */
+		UINT_MAX, 2,		/* class busy retry / time delta */
+		10, 100			/* comm fail retry / time delta */
+	},
+
+	{
+	/*
+	 * MD_MN_MSG_META_DB_DELSIDE
+	 */
+		MD_MSG_CLASS3,		/* message class */
+		mdmn_do_meta_db_delside, /* delete mddb side info */
+		NULL, 			/* submessage generator */
+		10,			/* timeout in seconds */
+		UINT_MAX, 2,		/* class busy retry / time delta */
+		10, 100			/* comm fail retry / time delta */
+	},
+
+	{
+	/*
+	 * MD_MN_MSG_META_MD_ADDSIDE
+	 */
+		MD_MSG_CLASS3,		/* message class */
+		mdmn_do_meta_md_addside, /* add new md side info */
+		NULL, 			/* submessage generator */
+		10,			/* timeout in seconds */
+		UINT_MAX, 2,		/* class busy retry / time delta */
+		10, 100			/* comm fail retry / time delta */
+	},
+
+	{
+	/*
+	 * MD_MN_MSG_META_MD_DELSIDE
+	 */
+		MD_MSG_CLASS3,		/* message class */
+		mdmn_do_meta_md_delside, /* delete md side info */
+		NULL, 			/* submessage generator */
+		10,			/* timeout in seconds */
+		UINT_MAX, 2,		/* class busy retry / time delta */
+		10, 100			/* comm fail retry / time delta */
+	},
+
+	{
+	/*
+	 * MD_MN_MSG_MDDB_OPTRECERR
+	 * Message cannot fail unless node failure causes node panic
+	 */
+		MD_MSG_CLASS3,		/* message class */
+		mdmn_do_mddb_optrecerr,	/* fix opt rec mddb */
+		NULL, 			/* submessage generator */
+		3,			/* timeout in seconds */
+		UINT_MAX, 2,		/* class busy retry / time delta */
+		10, 100			/* comm fail retry / time delta */
+	},
+
+	{
+	/*
+	 * MD_MN_MSG_ABORT
+	 */
+		MD_MSG_CLASS0,		/* special message class */
+		mdmn_do_dummy,		/* dummy handler */
+		NULL, 			/* submessage generator */
+		1,			/* timeout in seconds */
+		0, 0,			/* No retries for class busy */
+		0, 0			/* No retries for comm fail */
+	},
+
+	{
+	/*
+	 * MD_MN_MSG_STATE_UPDATE_RESWR2
+	 * Update the state of a mirror component, called if during the updates
+	 * of the watermarks for a softpartition, an IO error on a submirror
+	 * occurs.  Need to have a class different from CLASS1, otherwise we
+	 * deadlock with the command that is currently being processed
+	 * (metainit/metaclear/metattach/metarecover)
+	 *
+	 * And we may actually use a class different than CLASS1 because this
+	 * can only happen when a metainit or similar is called, and in that
+	 * case all potential metadb or metaset commands are blocked anyway.
+	 * Besides the different class it does exactly what
+	 * MD_MN_MSG_STATE_UPDATE_RESWR would do
+	 */
+		MD_MSG_CLASS3,	/* message class */
+		mdmn_do_state_upd_reswr, /* message handler */
+		NULL, 		/* submessage generator */
+		8,		/* timeout in seconds */
+		UINT_MAX, 10,	/* class busy retry / time delta */
+		UINT_MAX, 100	/* comm fail retry / time delta */
+	},
+
+	{
+	/*
+	 * MD_MN_MSG_STATE_UPDATE2
+	 * Like MD_MN_MSG_STATE_UPDATE only using a different class.
+	 * See comment for MD_MN_MSG_STATE_UPDATE_RESWR2
+	 */
+		MD_MSG_CLASS3,	/* message class */
+		NULL,		 /* message handler */
+		mdmn_smgen_state_upd,	/* submessage generator */
+		16,		/* SUSPEND_WRITES + STATE_UPDATE_RESWR */
+		UINT_MAX, 10,	/* class busy retry / time delta */
+		UINT_MAX, 100	/* comm fail retry / time delta */
+	},
+
+	{
+	/*
+	 * MD_MN_MSG_ALLOCATE_HOTSPARE2
+	 * Like MD_MN_MSG_ALLOCATE_HOTSPARE only using a different class.
+	 * See comment for MD_MN_MSG_STATE_UPDATE_RESWR2
+	 */
+		MD_MSG_CLASS3,	/* message class */
+		mdmn_do_allocate_hotspare, /* message handler */
+		NULL, 		/* submessage generator */
+		8,		/* timeout in seconds */
+		UINT_MAX, 10,	/* class busy retry / time delta */
+		UINT_MAX, 100	/* comm fail retry / time delta */
+	},
+
+	{
+	/*
+	 * MD_MN_MSG_IOCSET
+	 * Send IOCSET ioctl to create a soft part
+	 */
+		MD_MSG_CLASS1,		/* message class */
+		mdmn_do_iocset,		/* create softpart */
+		NULL, 			/* submessage generator */
+		90,			/* times out in 90 secs */
+		10000, 2, 		/* class busy retry / time delta */
+		10, 1000		/* comm fail retry / time delta */
+	},
+
+	{
+	/*
+	 * MD_MN_MSG_SP_SETSTAT
+	 * Update the status of a softpart
+	 */
+		MD_MSG_CLASS1,		/* message class */
+		mdmn_do_sp_setstat,	/* create softpart */
+		NULL, 			/* submessage generator */
+		90,			/* times out in 90 secs */
+		10000, 2, 		/* class busy retry / time delta */
+		10, 1000		/* comm fail retry / time delta */
+	},
+
+	{
+	/*
+	 * MD_MN_MSG_ADDKEYNAME
+	 * Add a key to the namespace
+	 */
+		MD_MSG_CLASS1,		/* message class */
+		mdmn_do_addkeyname,	/* add key */
+		NULL, 			/* submessage generator */
+		90,			/* times out in 90 secs */
+		10000, 2, 		/* class busy retry / time delta */
+		10, 1000		/* comm fail retry / time delta */
+	},
+
+	{
+	/*
+	 * MD_MN_MSG_SP_DELKEYNAME
+	 * Remove a key from the namespace
+	 */
+		MD_MSG_CLASS1,		/* message class */
+		mdmn_do_delkeyname,	/* delete key */
+		NULL, 			/* submessage generator */
+		90,			/* times out in 90 secs */
+		10000, 2, 		/* class busy retry / time delta */
+		10, 1000		/* comm fail retry / time delta */
+	},
+
+	{
+	/*
+	 * MD_MN_MSG_GET_TSTATE
+	 * Get ui_tstate for a metadevice from the master. Used to get ABR
+	 * state from the master node.
+	 */
+		MD_MSG_CLASS2,		/* message class */
+		mdmn_do_get_tstate,	/* get tstate */
+		NULL,			/* submessage generator */
+		5,			/* times out in 5 secs */
+		UINT_MAX, 10, 		/* class busy retry / time delta */
+		UINT_MAX, 100		/* comm fail retry / time delta */
+	},
+
+	{
+	/*
+	 * MD_MN_MSG_GET_MIRROR_STATE
+	 * Get submirror state for specified submirror from master node.
+	 * Used to synchronise initial resync state across a cluster.
+	 */
+		MD_MSG_CLASS1,		/* message class */
+		mdmn_do_get_mirstate,	/* get smstate */
+		NULL,			/* submessage generator */
+		5,			/* times out in 5 secs */
+		UINT_MAX, 10,		/* class busy retry / time delta */
+		UINT_MAX, 100		/* comm fail retry / time delta */
+	},
+
+	{
+	/*
+	 * MD_MN_MSG_SP_SETSTAT2
+	 * Update the status of a softpart. Used for propagating an error from
+	 * the soft-part sp_error() routine
+	 */
+		MD_MSG_CLASS4,		/* message class */
+		mdmn_do_sp_setstat,	/* update softpart state */
+		NULL,			/* submessage generator */
+		90,			/* times out in 90 secs */
+		10000, 2, 		/* class busy retry / time delta */
+		10, 1000		/* comm fail retry / time delta */
+	},
+
+	{
+	/*
+	 * MD_MN_MSG_SETSYNC
+	 * Start a resync thread for the specified mirror
+	 */
+		MD_MSG_CLASS1,		/* message class */
+		mdmn_do_setsync,	/* message handler */
+		NULL, 			/* submessage generator */
+		90,			/* timeout in seconds */
+		10000, 2,		/* class busy retry / time delta */
+		10, 1000		/* comm fail retry / time delta */
+	},
+
+	{
+	/*
+	 * MD_MN_MSG_POKE_HOTSPARES
+	 * Call poke_hotspares()
+	 */
+		MD_MSG_CLASS1,		/* message class */
+		mdmn_do_poke_hotspares,	/* message handler */
+		NULL, 		/* submessage generator */
+		8,		/* timeout in seconds */
+		UINT_MAX, 10,	/* class busy retry / time delta */
+		UINT_MAX, 100	/* comm fail retry / time delta */
+	},
+
+};
diff --git a/usr/src/lib/lvm/libmeta/common/meta_mn_subr.c b/usr/src/lib/lvm/libmeta/common/meta_mn_subr.c
new file mode 100644
index 0000000000..582b7d293e
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_mn_subr.c
@@ -0,0 +1,922 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+/*
+ * Just in case we're not in a build environment, make sure that
+ * TEXT_DOMAIN gets set to something.
+ */
+#if !defined(TEXT_DOMAIN)
+#define	TEXT_DOMAIN "SYS_TEST"
+#endif
+
+#include <meta.h>
+#include <sdssc.h>
+#include <arpa/inet.h>
+#include <sys/lvm/md_mddb.h>
+
+#define	MAX_LINE_SIZE 1024
+
+/*
+ * Maximum amount of time to spend waiting for an ownership change to complete.
+ */
+static const int OWNER_TIMEOUT = 3;
+
+/*
+ * FUNCTION:	meta_is_mn_set()
+ * INPUT:       sp      - the set name
+ * OUTPUT:	ep	- return error pointer
+ * RETURNS:	int	- 1 if MultiNode set else 0
+ * PURPOSE:	checks if the set is a MultiNode set
+ */
+int
+meta_is_mn_set(
+	mdsetname_t	*sp,
+	md_error_t	*ep
+)
+{
+	md_set_desc	*sd;
+
+	/* Local set cannot be MultiNode */
+	if ((sp == NULL) || (sp->setname == NULL) ||
+				(strcmp(sp->setname, MD_LOCAL_NAME) == 0))
+		return (0);
+	sd = metaget_setdesc(sp, ep);
+	ASSERT(sd != NULL);
+	if (sd->sd_flags & MD_SR_MN)
+		return (1);
+	return (0);
+}
+
+/*
+ * FUNCTION:	meta_is_mn_name()
+ * INPUT:       spp     - ptr to the set name, if NULL the setname is derived
+ *			  from the metadevice name (eg set/d10 )
+ *		name	- the metadevice name
+ * OUTPUT:	ep	- return error pointer
+ * RETURNS:	int	- 1 if MultiNode set else 0
+ * PURPOSE:	checks if the metadevice is in a MultiNode set
+ */
+int
+meta_is_mn_name(
+	mdsetname_t	**spp,
+	char		*name,
+	md_error_t	*ep
+)
+{
+	md_error_t	t_e = mdnullerror;
+	char		*cname;
+
+	if (*spp == NULL) {
+		if (is_hspname(name)) {
+			if (metahspname(spp, name, ep) == NULL)
+				return (0);
+		} else if (is_metaname(name)) {
+			/* Will fill in *spp based on name */
+			if ((cname = meta_name_getname(spp, name, &t_e))
+			    != NULL)
+				Free(cname);
+			if (! mdisok(&t_e)) {
+				(void) mdstealerror(ep, &t_e);
+				return (0);
+			}
+		} else return (0);
+	}
+
+	if ((strcmp((*spp)->setname, MD_LOCAL_NAME) != 0) &&
+	    (metaget_setdesc(*spp, ep) != NULL) &&
+	    ((*spp)->setdesc->sd_flags & MD_SR_MN)) {
+		return (1);
+	}
+	return (0);
+}
+
+/*
+ * meta_ping_mnset(set_t setno)
+ * Send a test message for this set in order to make commd do some init stuff
+ * Don't bother changelog.
+ * If set is suspended, fail immediately.
+ */
+void
+meta_ping_mnset(set_t setno)
+{
+	char		*data = "test";
+	md_error_t	mde = mdnullerror;
+	md_mn_result_t	*resp = NULL;
+
+	(void) mdmn_send_message(setno, MD_MN_MSG_TEST2,
+	    MD_MSGF_NO_LOG | MD_MSGF_FAIL_ON_SUSPEND, data,
+	    sizeof (data), &resp, &mde);
+
+	if (resp != (md_mn_result_t *)NULL) {
+		free_result(resp);
+	}
+}
+
+/*
+ *
+ * FUNCTION:	print_stderr
+ * INPUT:	errstr	- the error message returned by the command
+ *		context	- the context string from metainit -a
+ * PURPOSE:	called from meta_mn_send_command to print the error message
+ *		to stderr. When context is NO_CONTEXT_STRING, the errstr string
+ *		is output unchanged. When context is a string, it is the context
+ *		string for the metainit -a command and in this case the errstr
+ *		string has to be parsed to extract the command and node name
+ *		and to send a message to stderr in the format
+ *		command: node: context: error message
+ */
+static void
+print_stderr(
+	char	*errstr,
+	char	*context
+)
+{
+	char	*command;
+	char	*node;
+	char	*message;
+	int	length = strlen(errstr + 1);
+
+	if (context == NO_CONTEXT_STRING) {
+		(void) fprintf(stderr, "%s", errstr);
+	} else {
+		command = Malloc(length);
+		node = Malloc(length);
+		message = Malloc(length);
+		if (sscanf(errstr, "%[^:]: %[^:]: %[^\n]", command, node,
+		    message) == 3) {
+			(void) fprintf(stderr, "%s: %s: %s: %s\n", command,
+			    node, context, message);
+		} else {
+			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
+			    "%s: Invalid format error message"), errstr);
+		}
+		Free(command);
+		Free(node);
+		Free(message);
+	}
+}
+
+/*
+ * FUNCTION:	meta_mn_send_command()
+ * INPUT:	sp	- the set name
+ *		argc	- number of arguments
+ *		argv	- arg list
+ *		flags	- some controlling flags
+ *		initall_context	- context string for metainit -a
+ * OUTPUT:	ep	- return error pointer
+ * RETURNS:	return exitval from mdmn_send_message
+ * PURPOSE:	sends the command to the master node for execution
+ */
+int
+meta_mn_send_command(
+	mdsetname_t	*sp,
+	int		argc,
+	char		*argv[],
+	int		flags,
+	char		*initall_context,
+	md_error_t	*ep
+)
+{
+	int		a;
+	int		err;
+	int		retval;
+	int		send_message_flags = MD_MSGF_DEFAULT_FLAGS;
+	int		send_message_type;
+	char		*cmd;
+	md_mn_result_t	*resp = NULL;
+
+	cmd = Malloc(1024);
+	(void) strlcpy(cmd, argv[0], 1024);
+	for (a = 1; a < argc; a++) {
+		/* don't copy empty arguments */
+		if (*argv[a] == '\0') {
+			continue;
+		}
+		(void) strcat(cmd, " ");
+		(void) strcat(cmd, argv[a]);
+	}
+	/*
+	 * in dryrun mode stop on the first error
+	 * use the CMD_RETRY message type if RETRY_BUSY flag set
+	 */
+	if (flags & MD_DRYRUN)
+		send_message_flags |= MD_MSGF_STOP_ON_ERROR;
+	if (flags & MD_NOLOG)
+		send_message_flags |= MD_MSGF_NO_LOG;
+	if (flags & MD_PANIC_WHEN_INCONSISTENT)
+		send_message_flags |= MD_MSGF_PANIC_WHEN_INCONSISTENT;
+	if (flags & MD_RETRY_BUSY)  {
+		send_message_type = MD_MN_MSG_BC_CMD_RETRY;
+	} else {
+		send_message_type = MD_MN_MSG_BC_CMD;
+	}
+	err = mdmn_send_message(
+		sp->setno, send_message_type, send_message_flags,
+		cmd, 1024, &resp, ep);
+
+	free(cmd);
+
+	if (err == 0) {
+		/*
+		 * stderr may be turned off by IGNORE_STDERR
+		 * In dryrun we only print stderr if the exit_val is non-zero
+		 */
+		if ((resp->mmr_err_size != 0) &&
+		    ((flags & MD_IGNORE_STDERR) == 0)) {
+			if (((flags & MD_DRYRUN) == 0) ||
+			    (resp->mmr_exitval != 0)) {
+				print_stderr(resp->mmr_err, initall_context);
+			}
+		}
+
+		/*
+		 * If dryrun is set, we don't display stdout,
+		 * because the real run has yet to follow.
+		 */
+		if (((flags & MD_DRYRUN) == 0) && (resp->mmr_out_size != 0)) {
+			(void) printf("%s", resp->mmr_out);
+		}
+		retval = resp->mmr_exitval;
+		free_result(resp);
+		return (retval);
+	}
+	if (resp != NULL) {
+		if (resp->mmr_comm_state == MDMNE_CLASS_BUSY) {
+			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
+			    "rpc.mdcommd currently busy. "
+			    "Retry operation later.\n"));
+		} else if (resp->mmr_comm_state == MDMNE_NOT_JOINED) {
+			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
+			    "Node %s must join the %s multi-owner diskset to "
+			    "issue commands.\n"
+			    "To join, use: metaset -s %s -j\n"),
+			    mynode(), sp->setname, sp->setname);
+		} else if (resp->mmr_comm_state == MDMNE_LOG_FAIL) {
+			mddb_config_t	c;
+
+			(void) memset(&c, 0, sizeof (c));
+			c.c_setno = sp->setno;
+			(void) metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL);
+			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
+			    "Command not attempted: Unable to log message "
+			    "in set %s\n"), sp->setname);
+			if (c.c_flags & MDDB_C_STALE) {
+			    (void) mdmddberror(ep, MDE_DB_STALE, NODEV64,
+			    sp->setno, 0, NULL);
+			    mde_perror(ep, "");
+			}
+		} else {
+			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
+			    "Command failed: Commd State %d "
+			    "encountered.\n"), resp->mmr_comm_state);
+		}
+		free_result(resp);
+	} else {
+		(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
+		    "Command failed: mdmn_send_message returned %d.\n"),
+		    err);
+	}
+
+
+	return (1);
+}
+
+/*
+ * FUNCTION:	meta_mn_send_suspend_writes()
+ * INPUT:	mnum	- minor num of mirror
+ * OUTPUT:	ep	- return error pointer
+ * RETURNS:	return value from mdmn_send_message()
+ * PURPOSE:	sends message to all nodes to suspend writes to the mirror.
+ */
+int
+meta_mn_send_suspend_writes(
+	minor_t		mnum,
+	md_error_t	*ep
+)
+{
+	int			result;
+	md_mn_msg_suspwr_t	suspwrmsg;
+	md_mn_result_t		*resp = NULL;
+
+	suspwrmsg.msg_suspwr_mnum =  mnum;
+	/*
+	 * This message is never directly issued.
+	 * So we launch it with a suspend override flag.
+	 * If the commd is suspended, and this message comes
+	 * along it must be sent due to replaying a command or similar.
+	 * In that case we don't want this message to be blocked.
+	 * If the commd is not suspended, the flag does no harm.
+	 */
+	result = mdmn_send_message(MD_MIN2SET(mnum),
+	    MD_MN_MSG_SUSPEND_WRITES,
+	    MD_MSGF_NO_LOG | MD_MSGF_OVERRIDE_SUSPEND,
+	    (char *)&suspwrmsg, sizeof (suspwrmsg), &resp, ep);
+	if (resp != NULL) {
+		free_result(resp);
+	}
+	return (result);
+}
+
+/*
+ * Parse the multi-node list file
+ *
+ * Return Values:	Zero	 - Success
+ *			Non Zero - Failure
+ *
+ * File content:	The content of the nodelist file should consist of
+ *			triplets of nodeid, nodename and private interconnect
+ *			address seperated by one or more white space.
+ * e.g.
+ *			1 node_a 192.168.111.3
+ *			2 node_b 192.168.111.5
+ *
+ *			Any missing fields will result in an error.
+ */
+int
+meta_read_nodelist(
+	int				*nodecnt,
+	mndiskset_membershiplist_t	**nl,
+	md_error_t			*ep
+)
+{
+	FILE				*fp = NULL;
+	char				line[MAX_LINE_SIZE];
+	char				*buf;
+	uint_t				i;
+	int				sz;
+	mndiskset_membershiplist_t	**tailp = nl;
+
+	/* open file */
+	if ((fp = fopen(META_MNSET_NODELIST, "r")) == NULL) {
+		mndiskset_membershiplist_t	*nlp;
+		struct hostent *hp;
+
+		/* return this node with id of 1 */
+		nlp = *tailp = Zalloc(sizeof (*nlp));
+		tailp = &nlp->next;
+
+		*nodecnt = 1;
+		nlp->msl_node_id = 1;
+		buf = mynode();
+		sz = min(strlen(buf), sizeof (nlp->msl_node_name) - 1);
+		(void) strncpy(nlp->msl_node_name, buf, sz);
+		nlp->msl_node_name[sz] = '\0';
+
+		/* retrieve info about our host */
+		if ((hp = gethostbyname(buf)) == NULL) {
+			return (mdsyserror(ep, EADDRNOTAVAIL, buf));
+		}
+		/* We only do IPv4 addresses, for now */
+		if (hp->h_addrtype != AF_INET) {
+			return (mdsyserror(ep, EPFNOSUPPORT, buf));
+		}
+		/* We take the first address only */
+		if (*hp->h_addr_list) {
+			struct in_addr in;
+
+			(void) memcpy(&in.s_addr, *hp->h_addr_list,
+			    sizeof (struct in_addr));
+			(void) strncpy(nlp->msl_node_addr, inet_ntoa(in),
+			    MD_MAX_NODENAME);
+		} else {
+			return (mdsyserror(ep, EADDRNOTAVAIL, buf));
+		}
+
+		return (0);
+	}
+
+	*nl = NULL;
+	*nodecnt = 0;
+
+	while ((fp != NULL) && ((buf = fgets(line, sizeof (line) - 1, fp)) !=
+	    NULL)) {
+		mndiskset_membershiplist_t	*nlp;
+
+		/* skip leading spaces */
+		while ((*buf != '\0') && (i = strcspn(buf, " \t\n")) == 0)
+			buf++;
+
+		/* skip comments and blank lines */
+		if (*buf == '\0' || *buf == '#')
+			continue;
+
+		/* allocate memory and set tail pointer */
+		nlp = *tailp = Zalloc(sizeof (*nlp));
+		tailp = &nlp->next;
+
+		/* parse node id */
+		nlp->msl_node_id = strtoul(buf, NULL, 0);
+		buf += i;
+
+		/* skip leading spaces */
+		while ((*buf != '\0') && (i = strcspn(buf, " \t\n")) == 0)
+			buf++;
+
+		/* fields missing, return error */
+		if (*buf == '\0' || *buf == '#') {
+			meta_free_nodelist(*nl);
+			*nl = NULL;
+			*nodecnt = 0;
+
+			/* close file and return */
+			if ((fp) && (fclose(fp) != 0))
+				return (mdsyserror(ep, errno,
+				    META_MNSET_NODELIST));
+
+			return (mdsyserror(ep, EINVAL, META_MNSET_NODELIST));
+		}
+
+		/* parse node name */
+		sz = min(i, sizeof (nlp->msl_node_name) - 1);
+		(void) strncpy(nlp->msl_node_name, buf, sz);
+		nlp->msl_node_name[sz] = '\0';
+		buf += i;
+
+		/* skip leading spaces */
+		while ((*buf != '\0') && (i = strcspn(buf, " \t\n")) == 0)
+			buf++;
+
+		/* fields missing, return error */
+		if (*buf == '\0' || *buf == '#') {
+			meta_free_nodelist(*nl);
+			*nl = NULL;
+			*nodecnt = 0;
+
+			/* close file and return */
+			if ((fp) && (fclose(fp) != 0))
+				return (mdsyserror(ep, errno,
+				    META_MNSET_NODELIST));
+
+			return (mdsyserror(ep, EINVAL, META_MNSET_NODELIST));
+		}
+
+		/* parse node address */
+		sz = min(i, sizeof (nlp->msl_node_addr) - 1);
+		(void) strncpy(nlp->msl_node_addr, buf, sz);
+		nlp->msl_node_addr[sz] = '\0';
+
+		++*nodecnt;
+	}
+
+	/* close file */
+	if ((fp) && (fclose(fp) != 0))
+		return (mdsyserror(ep, errno, META_MNSET_NODELIST));
+
+	return (0);
+}
+
+/*
+ * Populate the multi-node list file from a given list of node id's
+ * The nids must have only one node id in each cell. Range of node
+ * id's in the form 1-n are not allowed.
+ *
+ * Return Values:	Zero	 - Success
+ *			Non Zero - Failure
+ */
+int
+meta_write_nodelist(
+	int		nodecnt,
+	char		**nids,
+	md_error_t	*ep
+)
+{
+	FILE		*fp = NULL;
+	char		name[MAX_LINE_SIZE], addr[MAX_LINE_SIZE];
+	uint_t		i, nid;
+	struct in_addr	ipaddr;
+	int		err = 0;
+
+	/* check if we are running on clustering */
+	if ((err = sdssc_bind_library()) != SDSSC_OKAY) {
+		return (mdsyserror(ep, err, META_MNSET_NODELIST));
+	}
+
+	/* open file for writing */
+	if ((fp = fopen(META_MNSET_NODELIST, "w")) == NULL) {
+		return (mdsyserror(ep, errno, META_MNSET_NODELIST));
+	}
+
+	for (i = 0; i < nodecnt; i++) {
+		/* extract the node id */
+		errno = 0;
+		nid = strtoul(nids[i], NULL, 0);
+		if (errno != 0) {
+			if ((fp) && (fclose(fp) != 0))
+				return (mdsyserror(ep, errno,
+				    META_MNSET_NODELIST));
+
+			return (mdsyserror(ep, EINVAL, META_MNSET_NODELIST));
+		}
+
+		/* get node name */
+		(void) snprintf(name, sizeof (name), "%d", nid);
+		sdssc_cm_nid2nm(name);
+
+		/* finally get the private ip address */
+		(void) snprintf(addr, sizeof (addr), "%s", name);
+		if (sdssc_get_priv_ipaddr(addr, &ipaddr) != SDSSC_OKAY) {
+			if ((fp) && (fclose(fp) != 0))
+				return (mdsyserror(ep, errno,
+				    META_MNSET_NODELIST));
+
+			return (mdsyserror(ep, EINVAL, META_MNSET_NODELIST));
+		}
+
+		(void) fprintf(fp, "%d\t%s\t%s\n", nid, name,
+		    inet_ntoa(ipaddr));
+	}
+
+	/* close file */
+	if ((fp) && (fclose(fp) != 0))
+		return (mdsyserror(ep, errno, META_MNSET_NODELIST));
+
+	return (0);
+}
+
+/*
+ * Free node list
+ */
+void
+meta_free_nodelist(
+	mndiskset_membershiplist_t	*nl
+)
+{
+	mndiskset_membershiplist_t	*next = NULL;
+
+	for (/* void */; (nl != NULL); nl = next) {
+		next = nl->next;
+		Free(nl);
+	}
+}
+
+/*
+ * FUNCTION:	meta_mn_send_setsync()
+ * INPUT:	sp	- setname
+ *		mirnp	- mirror name
+ *		size	- buffer size, 0 if none
+ * OUTPUT:	ep	- return error pointer
+ * RETURNS:	return value from meta_mn_send_command()
+ * PURPOSE:  Send a setsync command to all nodes to set resync status
+ */
+
+int
+meta_mn_send_setsync(
+	mdsetname_t		*sp,
+	mdname_t		*mirnp,
+	daddr_t			size,
+	md_error_t		*ep
+)
+{
+	md_mn_msg_setsync_t	setsyncmsg;
+	int			ret;
+	md_mn_result_t		*resp = NULL;
+
+	setsyncmsg.setsync_mnum = meta_getminor(mirnp->dev);
+	setsyncmsg.setsync_copysize = size;
+	setsyncmsg.setsync_flags = 0;
+
+	/*
+	 * We do not log the metasync command as it will have no effect on the
+	 * underlying metadb state. If we have a master change the
+	 * reconfiguration process will issue a new 'metasync' to all affected
+	 * mirrors, so we would actually end up sending the message twice.
+	 * Removing the logging of the message helps reduce the processing
+	 * time required.
+	 */
+	ret = mdmn_send_message(sp->setno, MD_MN_MSG_SETSYNC,
+	    MD_MSGF_NO_LOG | MD_MSGF_OVERRIDE_SUSPEND,
+	    (char *)&setsyncmsg, sizeof (setsyncmsg), &resp, ep);
+	if (resp != NULL) {
+		free_result(resp);
+	}
+
+	/*
+	 * Unlike non-MN sets, the metasync command does not actually
+	 * start a resync, it simply updates the state on all of the
+	 * nodes. Therefore, to start a resync we send a resync starting
+	 * message for the metadevice
+	 */
+	if (ret == 0)
+		ret = meta_mn_send_resync_starting(mirnp, ep);
+	return (ret);
+}
+
+/*
+ * FUNCTION:	meta_mn_send_metaclear_command()
+ * INPUT:	sp	- setname
+ *		name	- metadevice name
+ *		options - command options
+ *		pflag	- clear all soft partitions for a given device
+ * OUTPUT:	ep	- return error pointer
+ * RETURNS:	return value from meta_mn_send_command()
+ * PURPOSE:  Send a metaclear command to all nodes with force(-f) and
+ *	     recurse(-r) options set if required. For hotspare pool and
+ *	     metadevices, the metadevice name is of the form setname/dxx or
+ *	     setname/hspxxx so a '-s' argument isn't required. If pflag is set
+ *	     the name refers to a metadevice or component and in the is case
+ *	     a '-s' argument is required to define the set.
+ */
+
+int
+meta_mn_send_metaclear_command(
+	mdsetname_t		*sp,
+	char			*name,
+	mdcmdopts_t		options,
+	int			pflag,
+	md_error_t		*ep
+)
+{
+	int	newargc;
+	char	**newargv;
+	int	ret;
+
+	/*
+	 * Allocate an array large enough to hold all of the possible
+	 * metaclear arguments
+	 */
+	newargv = Calloc(7, sizeof (char *));
+	newargv[0] = "metaclear";
+	newargc = 1;
+	if (pflag) {
+		newargv[newargc] = "-s";
+		newargc++;
+		newargv[newargc] = sp->setname;
+		newargc++;
+	}
+	if (options & MDCMD_FORCE) {
+		newargv[newargc] = "-f";
+		newargc++;
+	}
+	if (options & MDCMD_RECURSE) {
+		newargv[newargc] = "-r";
+		newargc++;
+	}
+	if (pflag) {
+		newargv[newargc] = "-p";
+		newargc++;
+	}
+	newargv[newargc] = name;
+	newargc++;
+
+	ret = meta_mn_send_command(sp, newargc, newargv,
+	    MD_DISP_STDERR, NO_CONTEXT_STRING, ep);
+
+	free(newargv);
+	return (ret);
+}
+
+/*
+ * FUNCTION:	meta_mn_send_resync_starting()
+ * INPUT:	sp	- setname
+ *		mirnp	- mirror name
+ * OUTPUT:	ep	- return error pointer
+ * RETURNS:	return value from mdmn_send_message()
+ * PURPOSE:  Send a resync starting message to all nodes.
+ */
+
+int
+meta_mn_send_resync_starting(
+	mdname_t		*mirnp,
+	md_error_t		*ep
+)
+{
+	int			result;
+	md_mn_msg_resync_t	resyncmsg;
+	md_mn_result_t		*resp = NULL;
+	minor_t			mnum = meta_getminor(mirnp->dev);
+
+	/*
+	 * This message is never directly issued.
+	 * So we launch it with a suspend override flag.
+	 * If the commd is suspended, and this message comes
+	 * along it must be sent due to replaying a command or similar.
+	 * In that case we don't want this message to be blocked.
+	 * If the commd is not suspended, the flag does no harm.
+	 */
+	resyncmsg.msg_resync_mnum =  mnum;
+	result = mdmn_send_message(MD_MIN2SET(mnum),
+	    MD_MN_MSG_RESYNC_STARTING,
+	    MD_MSGF_NO_LOG | MD_MSGF_OVERRIDE_SUSPEND,
+	    (char *)&resyncmsg, sizeof (resyncmsg), &resp, ep);
+
+	if (resp != NULL) {
+		free_result(resp);
+	}
+	return (result);
+}
+
+/*
+ * FUNCTION:	meta_mn_change_owner()
+ * INPUT:	opp	- pointer to parameter block
+ *		setno	- set number of mirror metadevice
+ *		mnum	- minor number of mirror metadevice
+ *		owner	- node ID of mirror owner
+ *		flags	- flag field for ioctl
+ * OUTPUT:	opp	- parameter block used to send ioctl
+ * RETURNS:	int	- 0 success, -1 error
+ * PURPOSE:	issue an ioctl to change the ownership of the specified mirror
+ *		to our node ID. We need to be the owner before any watermarks
+ *		are committed to the device otherwise we'll enter a deadly
+ *		embrace when attempting to write the watermark.
+ *		This function can also be used so set the owner on a node to
+ *		NULL. In this case the change is only made on the local node.
+ *		In addition by setting the MD_MN_MM_CHOOSE_OWNER flag, the
+ *		function can also be used to choose a mirror resync owner. This
+ *		function should only be called on the master and it will
+ *		select the owner and request it to become the owner.
+ */
+int
+meta_mn_change_owner(
+	md_set_mmown_params_t 	**opp,	/* Returned parameter block */
+	set_t			setno,	/* Mirror set number */
+	uint_t 			mnum,	/* Minor number */
+	uint_t			owner,	/* Node ID of mirror owner */
+	uint_t			flags	/* Flags */
+)
+{
+	md_set_mmown_params_t	*ownpar = *opp;
+	md_mn_own_status_t	*ownstat = NULL;
+	struct timeval tvs, tve;
+	int			n = 0;
+	int			rval;
+
+	if (ownpar != NULL) {
+		(void) memset(ownpar, 0, sizeof (*ownpar));
+	} else {
+		ownpar = Zalloc(sizeof (*ownpar));
+	}
+	ownstat = Zalloc(sizeof (*ownstat));
+
+	ownpar->d.mnum = mnum;
+	ownpar->d.owner = owner;
+	ownpar->d.flags = flags;
+	MD_SETDRIVERNAME(ownpar, MD_MIRROR, setno);
+	MD_SETDRIVERNAME(ownstat, MD_MIRROR, setno);
+
+	/*
+	 * Attempt to change the ownership to the specified node. We retry this
+	 * up to 10 times if we receive EAGAIN from the metadevice. This only
+	 * happens if the underlying metadevice is busy with outstanding i/o
+	 * that requires ownership change.
+	 */
+	while ((rval = metaioctl(MD_MN_SET_MM_OWNER, ownpar, &ownpar->mde,
+	    NULL)) != 0) {
+		md_sys_error_t	*ip =
+		    &ownpar->mde.info.md_error_info_t_u.sys_error;
+		if (ip->errnum != EAGAIN)
+			break;
+		if (n++ >= 10)
+			break;
+		(void) sleep(1);
+	}
+
+	/*
+	 * There is no need to wait for the ioctl completion if we are setting
+	 * the owner to NULL or requesting the master to choose the owner
+	 */
+	if ((owner == 0) || (flags & MD_MN_MM_CHOOSE_OWNER)) {
+		Free(ownstat);
+		*opp = ownpar;
+		return (0);
+	}
+
+	/*
+	 * Wait for ioctl completion or a timeout to occur. If we
+	 * timeout we fail the i/o request.
+	 */
+	ownstat->mnum = ownpar->d.mnum;
+	(void) gettimeofday(&tvs, NULL);
+
+	while ((rval == 0) && !(ownstat->flags & MD_MN_MM_RESULT)) {
+		while ((rval = metaioctl(MD_MN_MM_OWNER_STATUS, ownstat,
+		    &ownstat->mde, NULL)) != 0) {
+			(void) gettimeofday(&tve, NULL);
+			if ((tve.tv_sec - tvs.tv_sec) > OWNER_TIMEOUT) {
+				rval = -1;
+				break;
+			}
+			(void) sleep(1);
+		}
+	}
+
+	/* we did not not timeout but ioctl failed set rval */
+
+	if (rval == 0) {
+		rval = (ownstat->flags & MD_MN_MM_RES_FAIL) ? -1 : 0;
+	}
+
+	Free(ownstat);
+	*opp = ownpar;
+	return (rval);
+}
+/*
+ * special handling is required when running on a single node
+ * non-SC3.x environment.  This function determines tests
+ * for that case.
+ *
+ * Return values:
+ *	0 - no nodes or joined or in a SC3.x env
+ *	1 - 1 node and not in SC3.x env
+ */
+
+int
+meta_mn_singlenode()
+{
+	md_error_t			xep = mdnullerror;
+	int				nodecnt;
+	int				mnset_single_node = 0;
+	mndiskset_membershiplist_t	*nl;
+
+	/*
+	 * If running on SunCluster, then don't validate MN sets,
+	 * this is done during a reconfig cycle since all nodes must
+	 * take the same action.
+	 *
+	 * Only cleanup in case of a single node situation
+	 * when not running on SunCluster.  This single node
+	 * situation occurs when the nodelist only contains
+	 * this node and the MN setrecords only contain this
+	 * node.
+	 */
+	if (meta_read_nodelist(&nodecnt, &nl, &xep) == -1) {
+		nodecnt = 0;  /* no nodes are alive */
+		nl = NULL;
+		mdclrerror(&xep);
+	} else {
+		/*
+		 * If only 1 node in nodelist and not running
+		 * on SunCluster, set single_node flag.
+		 */
+		if ((nodecnt == 1) &&
+		    (strcmp(nl->msl_node_name, mynode()) == 0) &&
+		    ((sdssc_bind_library()) != SDSSC_OKAY)) {
+			mnset_single_node = 1;
+		}
+		meta_free_nodelist(nl);
+	}
+	return (mnset_single_node);
+}
+
+/*
+ * FUNCTION:	meta_mn_send_get_tstate()
+ * INPUT:	dev	- dev_t of device
+ * OUTPUT:	tstatep - tstate value
+ *		ep	- return error pointer
+ * RETURNS:	return value from mdmn_send_message()
+ * PURPOSE:  Send a message to the master to get ui_tstate for a given device.
+ */
+
+int
+meta_mn_send_get_tstate(
+	md_dev64_t		dev,
+	uint_t			*tstatep,
+	md_error_t		*ep
+)
+{
+	int			result;
+	md_mn_msg_gettstate_t	tstatemsg;
+	md_mn_result_t		*resp = NULL;
+	minor_t			mnum = meta_getminor(dev);
+
+	tstatemsg.gettstate_dev = dev;
+	result = mdmn_send_message(MD_MIN2SET(mnum),
+	    MD_MN_MSG_GET_TSTATE,
+	    MD_MSGF_NO_LOG | MD_MSGF_NO_BCAST,
+	    (char *)&tstatemsg, sizeof (tstatemsg), &resp, ep);
+
+	if (result == 0)
+		*tstatep = resp->mmr_exitval;
+	else
+		/* If some error occurred set tstate to 0 */
+		*tstatep = 0;
+
+	if (resp != NULL) {
+		free_result(resp);
+	}
+	return (result);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_mount.c b/usr/src/lib/lvm/libmeta/common/meta_mount.c
new file mode 100644
index 0000000000..6d9cf39b4b
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_mount.c
@@ -0,0 +1,97 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2003 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+/*
+ * return mount association with meta device
+ */
+
+#include <meta.h>
+
+#include <sys/mnttab.h>
+
+#include "meta_lib_prv.h"
+
+/*
+ * return associated mount point with this mdname_t
+ */
+char *
+meta_get_mountp(
+	mdsetname_t	*sp,
+	mdname_t	*np,
+	md_error_t	*ep
+)
+{
+	FILE		*mfp;
+	struct mnttab	 m;
+	char		*mountp	= NULL;
+	char		mnt_mountp[MNT_LINE_MAX];
+	char		mnt_special[MNT_LINE_MAX];
+
+	/* should have a set */
+	assert(sp != NULL);
+
+	/* look in mnttab */
+	if ((mfp = open_mnttab()) == NULL) {
+		(void) mdsyserror(ep, errno, MNTTAB);
+		return (NULL);
+	}
+
+	while ((!mountp) && (getmntent(mfp, &m) == 0)) {
+		mdname_t	*mnp;
+
+		if ((m.mnt_special == NULL) || (m.mnt_mountp == NULL))
+			continue;
+
+		if (m.mnt_mountp[0] != '/')
+			continue;
+
+		if ((strcmp(m.mnt_fstype, "nfs") == 0) ||
+		    (strcmp(m.mnt_fstype, "autofs") == 0) ||
+		    (strcmp(m.mnt_fstype, "proc") == 0) ||
+		    (strcmp(m.mnt_fstype, "tmpfs") == 0) ||
+		    (strcmp(m.mnt_fstype, "cachefs") == 0) ||
+		    (strcmp(m.mnt_fstype, "lofs") == 0) ||
+		    (strcmp(m.mnt_fstype, "rfs") == 0) ||
+		    (strcmp(m.mnt_fstype, "fd") == 0))
+			continue;
+
+		(void) strcpy(mnt_mountp, m.mnt_mountp);
+		(void) strcpy(mnt_special, m.mnt_special);
+		if ((mnp = metaname(&sp, mnt_special, ep)) == NULL) {
+			mdclrerror(ep);
+			continue;
+		}
+
+		if (np->dev == mnp->dev) {
+			mountp = mnt_mountp;
+		}
+	}
+
+	/* return success, if found */
+	return (mountp? Strdup(mountp): NULL);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_name.c b/usr/src/lib/lvm/libmeta/common/meta_name.c
new file mode 100644
index 0000000000..7becd6af2f
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_name.c
@@ -0,0 +1,3289 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include <meta.h>
+#include <metad.h>
+
+#include <ctype.h>
+#include <string.h>
+
+/*
+ * Just in case we're not in a build environment, make sure that
+ * TEXT_DOMAIN gets set to something.
+ */
+#if !defined(TEXT_DOMAIN)
+#define	TEXT_DOMAIN "SYS_TEST"
+#endif
+
+/*
+ *	Macros to produce a quoted string containing the value of a
+ *	preprocessor macro. For example, if SIZE is defined to be 256,
+ *	VAL2STR(SIZE) is "256". This is used to construct format
+ *	strings for scanf-family functions below.
+ */
+#define	QUOTE(x)	#x
+#define	VAL2STR(x)	QUOTE(x)
+
+extern	char	*getfullblkname();
+extern	char	*getfullrawname();
+
+/*
+ * caches
+ */
+static	mdsetnamelist_t		*setlistp = NULL;
+static	mddrivenamelist_t	*drivelistp = NULL;
+static	mdnamelist_t		*fastnmlp = NULL;
+static	mdhspnamelist_t		*hsplistp = NULL;
+
+/*
+ * leak proof name conversion
+ */
+static char *
+rawname(
+	char	*uname
+)
+{
+	char	*p;
+	struct stat	sbuf1, sbuf2;
+
+	if ((p = getfullrawname(uname)) == NULL) {
+		return (NULL);
+	} else if (*p == '\0') {
+		Free(p);
+		return (NULL);
+	} else {
+		if (stat(uname, &sbuf1) != 0) {
+			(void) printf(dgettext(TEXT_DOMAIN,
+			    "device to mount in /etc/vfstab is "
+			    "invalid for device %s\n"), uname);
+			exit(1);
+		}
+		if (stat(p, &sbuf2) != 0) {
+			(void) printf(dgettext(TEXT_DOMAIN,
+			    "device to fsck in /etc/vfstab is "
+			    "invalid for raw device %s\n"), p);
+			exit(1);
+		}
+		if (sbuf1.st_rdev != sbuf2.st_rdev) {
+			(void) printf(dgettext(TEXT_DOMAIN,
+			    "/etc/vfstab entries inconsistent on "
+			    "line containing device %s\n"), uname);
+			exit(1);
+		}
+		if ((sbuf1.st_mode & S_IFBLK) == 0) {
+			(void) printf(dgettext(TEXT_DOMAIN,
+			    "/etc/vfstab device to mount is not a "
+			    "block device for device %s\n"), uname);
+			exit(1);
+		}
+		if ((sbuf2.st_mode & S_IFCHR) == 0) {
+			(void) printf(dgettext(TEXT_DOMAIN,
+			    "/etc/vfstab device to fsck is not a "
+			    "raw device for device %s\n"), p);
+			exit(1);
+		}
+		return (p);
+	}
+}
+
+char *
+blkname(
+	char	*uname
+)
+{
+	char	*p;
+
+	if ((p = getfullblkname(uname)) == NULL) {
+		return (NULL);
+	} else if (*p == '\0') {
+		Free(p);
+		return (NULL);
+	} else {
+		return (p);
+	}
+}
+
+/*
+ * parse up metadevice name
+ */
+static int
+parse_metadevice(
+	char		*uname,
+	char		**snamep,
+	unit_t		*unitp
+)
+{
+	char		*sname = Malloc(strlen(uname) + 1);
+	char		*tname = Malloc(strlen(uname) + 1);
+
+	unit_t		unit;
+	int		len;
+	char *up;
+	char *tp;
+	int lcws;	/* last character was slash */
+
+	/* handle dont cares */
+	if (unitp == NULL)
+		unitp = &unit;
+
+	/* Now copy uname to tname by throwing away any duplicate '/' */
+	for (lcws = 0, tp = tname, up = uname; *up; up++) {
+		if (lcws) {
+			if (*up == '/') {
+				continue;
+			} else {
+				lcws = 0;
+			}
+		}
+		if (*up == '/') {
+			lcws = 1;
+		}
+		*tp++ = *up; /* ++ is done by for loop */
+	}
+	*tp = '\0';
+
+	/* without set */
+	if ((sscanf(tname, "d%lu%n", unitp, &len) == 1) &&
+	    (strlen(tname) == len) && ((long)*unitp >= 0)) {
+		if (snamep != NULL)
+			*snamep = NULL;
+		Free(sname);
+		Free(tname);
+		return (0);
+	}
+
+	/* fully-qualified without set */
+	if (((sscanf(tname, "/dev/md/dsk/d%lu%n", unitp, &len) == 1) &&
+	    (strlen(tname) == len) && ((long)*unitp >= 0)) ||
+	    ((sscanf(tname, "/dev/md/rdsk/d%lu%n", unitp, &len) == 1) &&
+	    (strlen(tname) == len) && ((long)*unitp >= 0))) {
+		if (snamep != NULL)
+			*snamep = Strdup(MD_LOCAL_NAME);
+		Free(sname);
+		Free(tname);
+		return (0);
+	}
+
+	/* with set */
+	if (((sscanf(tname, "%[^/]/d%lu%n", sname, unitp, &len) == 2) &&
+	    (strlen(tname) == len) && ((long)*unitp >= 0)) ||
+	    ((sscanf(tname, "/dev/md/%[^/]/dsk/d%lu%n", sname,
+	    unitp, &len) == 2) &&
+	    (strlen(tname) == len) && ((long)*unitp >= 0)) ||
+	    ((sscanf(tname, "/dev/md/%[^/]/rdsk/d%lu%n", sname,
+	    unitp, &len) == 2) &&
+	    (strlen(tname) == len) && ((long)*unitp >= 0))) {
+		if (snamep != NULL) {
+			*snamep = sname;
+		} else {
+			Free(sname);
+		}
+		Free(tname);
+		return (0);
+	}
+
+	/* no match */
+	if (snamep != NULL)
+		*snamep = NULL;
+	Free(sname);
+	Free(tname);
+	return (-1);
+}
+
+/*
+ * FUNCTION:	parse_device()
+ * INPUT:	sp - pointer to setname struct
+ *		uname - Name of either a hotspare pool or metadevice
+ *			This can either be a fully qualified path or
+ *			in the form [set name/]device
+ * OUTPUT:	setnamep - name of the set that uname is in
+ *		uname - name of the hotspare pools or metadevice
+ *			only contains the name of the device with all
+ *			other path information stripped off.
+ * PURPOSE:	Parse uname and sp into the set name and device name strings.
+ *		If the set name is specified as part of uname then use that
+ *		otherwise attempt to get the set name from sp.
+ */
+static void
+parse_device(
+	mdsetname_t	*sp,
+	char		*uname,
+	char		**setnamep /* dynamically alloced - caller must free */
+)
+{
+	char		setname[FILENAME_MAX+1];
+	char		*tname = Malloc(strlen(uname) + 1);
+
+	int		len;
+	char *up;
+	char *tp;
+	int lcws;	/* last character was slash */
+
+	/* Now copy uname to tname by throwing away any duplicate '/' */
+	for (lcws = 0, tp = tname, up = uname; *up; up++) {
+		if (lcws) {
+			if (*up == '/') {
+				continue;
+			} else {
+				lcws = 0;
+			}
+		}
+		if (*up == '/') {
+			lcws = 1;
+		}
+		*tp++ = *up; /* ++ is done by for loop */
+	}
+	*tp = '\0';
+
+	/* fully-qualified  - local set */
+	if (((sscanf(tname, "/dev/md/dsk/%" VAL2STR(FILENAME_MAX) "s%n",
+			uname, &len) == 1) && (strlen(tname) == len)) ||
+	    ((sscanf(tname, "/dev/md/rdsk/%" VAL2STR(FILENAME_MAX) "s%n",
+			uname, &len) == 1) && (strlen(tname) == len))) {
+		if (setnamep != NULL)
+			*setnamep = NULL;
+		Free(tname);
+		return;
+	}
+
+	/* with setname specified - either fully qualified and relative spec */
+	if (((sscanf(tname, "%" VAL2STR(FILENAME_MAX) "s/%"
+	    VAL2STR(FILENAME_MAX) "s%n", setname, uname, &len) == 2) &&
+			(strlen(tname) == len)) ||
+	    ((sscanf(tname, "/dev/md/%[^/]/dsk/%" VAL2STR(FILENAME_MAX) "s%n",
+		setname, uname, &len) == 2) && (strlen(tname) == len)) ||
+	    ((sscanf(tname, "/dev/md/%[^/]/rdsk/%" VAL2STR(FILENAME_MAX) "s%n",
+		setname, uname, &len) == 2) && (strlen(tname) == len))) {
+
+		if (setnamep != NULL) {
+			*setnamep = Strdup(setname);
+		}
+		Free(tname);
+		return;
+	}
+
+	/* without setname specified */
+	(void) strcpy(uname, tname);
+	if (setnamep != NULL) {
+		if (sp != NULL && !metaislocalset(sp))
+			*setnamep = Strdup(sp->setname);
+		else
+			*setnamep = NULL;
+	}
+	Free(tname);
+}
+
+/*
+ * parse up hotspare pool name
+ */
+static int
+parse_hsp(
+	char		*uname,
+	char		**snamep,
+	hsp_t		*hspp
+)
+{
+	char		*sname = Malloc(strlen(uname) + 1);
+	hsp_t		hsp;
+	int		len;
+
+	/* handle dont cares */
+	if (hspp == NULL)
+		hspp = &hsp;
+
+	/* without set */
+	if ((sscanf(uname, "hsp%03u%n", hspp, &len) == 1) &&
+	    (strlen(uname) == len) && ((long)*hspp >= 0)) {
+		if (snamep != NULL)
+			*snamep = NULL;
+		Free(sname);
+		return (0);
+	}
+
+	/* with set */
+	if ((sscanf(uname, "%[^/]/hsp%03u%n", sname,
+	    hspp, &len) == 2) &&
+	    (strlen(uname) == len) && ((long)*hspp >= 0)) {
+		if (snamep != NULL) {
+			*snamep = sname;
+		} else {
+			Free(sname);
+		}
+		return (0);
+	}
+
+	/* no match */
+	Free(sname);
+	return (-1);
+}
+
+/*
+ * canonicalize metadevice name
+ */
+static char *
+canon_metadevice(
+	char	*sname,
+	unit_t	unit
+)
+{
+	char	*cname;
+	size_t	len;
+
+	if ((sname == NULL) || (strcmp(sname, MD_LOCAL_NAME) == 0)) {
+		len = strlen("d") + 20 + 1;
+		cname = Malloc(len);
+		(void) snprintf(cname, len, "d%lu", unit);
+	} else {
+		len = strlen(sname) + strlen("/d") + 20 + 1;
+		cname = Malloc(len);
+		(void) snprintf(cname, len, "%s/d%lu", sname, unit);
+	}
+
+	return (cname);
+}
+
+/*
+ * canonicalize hotspare pool name
+ */
+static char *
+canon_hsp(
+	char	*sname,
+	hsp_t	hsp
+)
+{
+	char	*cname;
+	size_t	len;
+
+	if ((sname == NULL) || (strcmp(sname, MD_LOCAL_NAME) == 0)) {
+		cname = Malloc(strlen("hsp000") + 1);
+		(void) sprintf(cname, "hsp%03u", hsp);
+	} else {
+		len = strlen(sname) + strlen("/hsp000") + 1;
+		cname = Malloc(len);
+		(void) snprintf(cname, len, "%s/hsp%03lu", sname, hsp);
+	}
+
+	return (cname);
+}
+
+/*
+ * canonicalize name, return type
+ *
+ * NOTE: this is really only for use by meta_tab*
+ */
+char *
+meta_canonicalize(
+	mdsetname_t	*sp,
+	char		*uname
+)
+{
+	char	*sname = NULL;
+	char	*cname;
+
+	/* return the set name and dev name */
+	parse_device(sp, uname, &sname);
+
+	if (sname == NULL)
+		cname = Strdup(uname);
+	else {
+		size_t	cname_len;
+
+		cname_len = strlen(uname) + strlen(sname) + 2;
+		cname = Malloc(cname_len);
+		(void) snprintf(
+		    cname, cname_len, "%s/%s", sname, uname);
+		Free(sname);
+	}
+	return (cname);
+}
+
+/*
+ * check that name is a metadevice
+ */
+int
+is_metaname(
+	char	*uname
+)
+{
+	if (parse_metadevice(uname, NULL, NULL) == 0)
+		return (1);
+	else
+		return (0);
+}
+
+/*
+ * check that name is a hotspare pool
+ */
+int
+is_hspname(
+	char	*uname
+)
+{
+	if (parse_hsp(uname, NULL, NULL) == 0)
+		return (1);
+	else
+		return (0);
+}
+
+/*
+ *	mdsetname_t stuff
+ */
+
+/*
+ * initialize setname
+ */
+static void
+metainitsetname(
+	mdsetname_t	*sp
+)
+{
+	(void) memset(sp, '\0', sizeof (*sp));
+}
+
+static void
+metafreesetdesc(md_set_desc *sd)
+{
+	md_mnnode_desc	*nd;
+
+	if (MD_MNSET_DESC(sd)) {
+		nd = sd->sd_nodelist;
+		while (nd) {
+			sd->sd_nodelist = nd->nd_next;
+			Free(nd);
+			nd = sd->sd_nodelist;
+		}
+	}
+	metafreedrivedesc(&sd->sd_drvs);
+	Free(sd);
+}
+
+/*
+ * free allocated setname
+ */
+static void
+metafreesetname(
+	mdsetname_t	*sp
+)
+{
+	if (sp->setname != NULL)
+		Free(sp->setname);
+	if (sp->setdesc != NULL)
+		metafreesetdesc(sp->setdesc);
+	metainitsetname(sp);
+}
+
+/*
+ * flush the setname cache
+ */
+static void
+metaflushsetnames()
+{
+	mdsetnamelist_t		*p, *n;
+
+	for (p = setlistp, n = NULL; (p != NULL); p = n) {
+		n = p->next;
+		metafreesetname(p->sp);
+		Free(p->sp);
+		Free(p);
+	}
+	setlistp = NULL;
+}
+
+/*
+ * get set number
+ */
+static int
+getsetno(
+	char		*sname,
+	set_t		*setnop,
+	md_error_t	*ep
+)
+{
+	md_set_record	*sr;
+	size_t		len;
+
+	/* local set */
+	if ((sname == NULL) || (strcmp(sname, MD_LOCAL_NAME) == 0)) {
+		*setnop = 0;
+		return (0);
+	}
+
+	/* shared set */
+	if ((sr = getsetbyname(sname, ep)) == NULL) {
+		if (mdisrpcerror(ep, RPC_PROGNOTREGISTERED)) {
+			char	*p;
+
+			len = strlen(sname) + 30;
+			p = Malloc(len);
+
+			(void) snprintf(p, len, "setname \"%s\"", sname);
+			(void) mderror(ep, MDE_NO_SET, p);
+			Free(p);
+		}
+		return (-1);
+	}
+	*setnop = sr->sr_setno;
+	free_sr(sr);
+	return (0);
+}
+
+/*
+ * find setname from name
+ */
+mdsetname_t *
+metasetname(
+	char		*sname,
+	md_error_t	*ep
+)
+{
+	mdsetnamelist_t	**tail;
+	set_t		setno;
+	mdsetname_t	*sp;
+
+	/* look for cached value first */
+	assert(sname != NULL);
+	for (tail = &setlistp; (*tail != NULL); tail = &(*tail)->next) {
+		sp = (*tail)->sp;
+		if (strcmp(sp->setname, sname) == 0) {
+			return (sp);
+		}
+	}
+
+	/* setup set */
+	if (getsetno(sname, &setno, ep) != 0)
+		return (NULL);
+
+	/* allocate new list element and setname */
+	*tail = Zalloc(sizeof (**tail));
+	sp = (*tail)->sp = Zalloc(sizeof (*sp));
+
+	sp->setname = Strdup(sname);
+	sp->setno = setno;
+	sp->lockfd = MD_NO_LOCK;
+
+	return (sp);
+}
+
+/*
+ * find setname from setno
+ */
+mdsetname_t *
+metasetnosetname(
+	set_t		setno,
+	md_error_t	*ep
+)
+{
+	mdsetnamelist_t	*slp;
+	mdsetname_t	*sp;
+	md_set_record	*sr;
+
+	/* look for cached value first */
+	for (slp = setlistp; (slp != NULL); slp = slp->next) {
+		sp = slp->sp;
+		if (sp->setno == setno)
+			return (sp);
+	}
+
+	/* local set */
+	if (setno == MD_LOCAL_SET)
+		return (metasetname(MD_LOCAL_NAME, ep));
+
+	/* shared set */
+	if ((sr = getsetbynum(setno, ep)) == NULL)
+		return (NULL);
+	sp = metasetname(sr->sr_setname, ep);
+	free_sr(sr);
+	return (sp);
+}
+
+mdsetname_t *
+metafakesetname(
+	set_t		setno,
+	char		*sname
+)
+{
+	mdsetnamelist_t	**tail;
+	mdsetname_t	*sp;
+
+	/* look for cached value first */
+	for (tail = &setlistp; (*tail != NULL); tail = &(*tail)->next) {
+		sp = (*tail)->sp;
+		if (sp->setno == setno) {
+			if ((sp->setname == NULL) && (sname != NULL))
+				sp->setname = Strdup(sname);
+			return (sp);
+		}
+	}
+
+	/* allocate new list element and setname */
+	*tail = Zalloc(sizeof (**tail));
+	sp = (*tail)->sp = Zalloc(sizeof (*sp));
+
+	if (sname != NULL)
+		sp->setname = Strdup(sname);
+	sp->setno = setno;
+	sp->lockfd = MD_NO_LOCK;
+
+	return (sp);
+}
+
+
+/*
+ * setup set record (sr) and cache it in the mdsetname_t struct
+ */
+md_set_desc *
+sr2setdesc(
+	md_set_record	*sr
+)
+{
+	md_set_desc	*sd;
+	int		i;
+	md_mnset_record	*mnsr;
+	md_mnnode_desc	*nd, *nd_prev = 0;
+	md_mnnode_record	*nr;
+	md_error_t	status = mdnullerror;
+	md_error_t	*ep = &status;
+	int		nodecnt, nrcnt;
+	mndiskset_membershiplist_t *nl, *nl2;
+
+	sd = Zalloc(sizeof (*sd));
+	sd->sd_ctime = sr->sr_ctime;
+	sd->sd_genid = sr->sr_genid;
+	sd->sd_setno = sr->sr_setno;
+	sd->sd_flags = sr->sr_flags;
+
+	if (MD_MNSET_DESC(sd)) {
+		mnsr = (md_mnset_record *)sr;
+		(void) strlcpy(sd->sd_mn_master_nodenm,
+		    mnsr->sr_master_nodenm, sizeof (sd->sd_mn_master_nodenm));
+		sd->sd_mn_master_nodeid = mnsr->sr_master_nodeid;
+		if (strcmp(mnsr->sr_master_nodenm, mynode()) == 0) {
+			sd->sd_mn_am_i_master = 1;
+		}
+
+		/*
+		 * Get membershiplist from API routine.  If there's
+		 * an error, just use a NULL nodelist.
+		 */
+		if (meta_read_nodelist(&nodecnt, &nl, ep) == -1) {
+			nodecnt = 0;  /* no nodes are alive */
+			nl = NULL;
+		}
+		nr = mnsr->sr_nodechain;
+		nrcnt = 0;
+		/*
+		 * Node descriptor node list must be built in
+		 * ascending order of nodeid.  The nodechain
+		 * in the mnset record is in ascending order,
+		 * so just make them the same.
+		 */
+		while (nr) {
+			nd = Zalloc(sizeof (*nd));
+			if (nd_prev) {
+				nd_prev->nd_next = nd;
+			} else {
+				sd->sd_nodelist = nd;
+			}
+			nd->nd_ctime = nr->nr_ctime;
+			nd->nd_genid = nr->nr_genid;
+			nd->nd_flags = nr->nr_flags;
+
+			(void) strlcpy(nd->nd_nodename, nr->nr_nodename,
+			    sizeof (nd->nd_nodename));
+			nd->nd_nodeid = nr->nr_nodeid;
+			if (strcmp(nd->nd_nodename, mynode()) == 0) {
+				sd->sd_mn_mynode = nd;
+			}
+			if (nd->nd_nodeid == sd->sd_mn_master_nodeid) {
+				sd->sd_mn_masternode = nd;
+			}
+
+			/*
+			 * If node is marked ALIVE, then set priv_ic
+			 * from membership list.  During the early part
+			 * of a reconfig cycle, the membership list may
+			 * have been changed, (a node entering or leaving
+			 * the cluster), but rpc.metad hasn't flushed
+			 * its data yet.  So, if node is marked alive, but
+			 * is no longer in the membership list (node has
+			 * left the cluster) then just leave priv_ic to NULL.
+			 */
+			if (nd->nd_flags & MD_MN_NODE_ALIVE) {
+				nl2 = nl;
+				while (nl2) {
+					if (nl2->msl_node_id == nd->nd_nodeid) {
+						(void) strlcpy(nd->nd_priv_ic,
+						    nl2->msl_node_addr,
+						    sizeof (nd->nd_priv_ic));
+						break;
+					}
+					nl2 = nl2->next;
+				}
+			}
+
+			nr = nr->nr_next;
+			nrcnt++;
+			nd_prev = nd;
+		}
+		sd->sd_mn_numnodes = nrcnt;
+		if (nodecnt)
+			meta_free_nodelist(nl);
+
+		/* Just copying to keep consistent view between sr & sd */
+		(void) strlcpy(sd->sd_nodes[0], mnsr->sr_nodes_bw_compat[0],
+		    sizeof (sd->sd_nodes[0]));
+	} else {
+		for (i = 0; i < MD_MAXSIDES; i++)
+			(void) strlcpy(sd->sd_nodes[i], sr->sr_nodes[i],
+			    sizeof (sd->sd_nodes[i]));
+	}
+
+	sd->sd_med = sr->sr_med;		/* structure assignment */
+
+	return (sd);
+}
+
+md_set_desc *
+metaget_setdesc(
+	mdsetname_t	*sp,
+	md_error_t	*ep
+)
+{
+	md_set_record	*sr;
+
+	if (sp->setdesc != NULL)
+		return (sp->setdesc);
+
+	if (sp->setname != NULL) {
+		if ((sr = getsetbyname(sp->setname, ep)) != NULL) {
+			sp->setdesc = sr2setdesc(sr);
+			free_sr(sr);
+			return (sp->setdesc);
+		}
+	}
+
+	if (sp->setno > 0) {
+		if ((sr = getsetbynum(sp->setno, ep)) != NULL) {
+			sp->setdesc = sr2setdesc(sr);
+			free_sr(sr);
+			return (sp->setdesc);
+		}
+	}
+
+	return (NULL);
+}
+
+void
+metaflushsetname(mdsetname_t *sp)
+{
+	if (sp == NULL)
+		return;
+
+	if (sp->setdesc == NULL)
+		return;
+
+	metafreesetdesc(sp->setdesc);
+	sp->setdesc = NULL;
+}
+
+/*
+ * check for local set
+ */
+int
+metaislocalset(
+	mdsetname_t	*sp
+)
+{
+	assert(sp->setname != NULL);
+	if (strcmp(sp->setname, MD_LOCAL_NAME) == 0) {
+		assert(sp->setno == MD_LOCAL_SET);
+		return (1);
+	} else {
+		assert(sp->setno != MD_LOCAL_SET);
+		return (0);
+	}
+}
+
+/*
+ * check for same set
+ */
+int
+metaissameset(
+	mdsetname_t	*sp1,
+	mdsetname_t	*sp2
+)
+{
+	if (strcmp(sp1->setname, sp2->setname) == 0) {
+		assert(sp1->setno == sp2->setno);
+		return (1);
+	} else {
+		assert(sp1->setno != sp2->setno);
+		return (0);
+	}
+}
+
+/*
+ * check to see if set changed
+ */
+static int
+chkset(
+	mdsetname_t	**spp,
+	char		*sname,
+	md_error_t	*ep
+)
+{
+	/* if we already have a set, make sure it's the same */
+	if (*spp != NULL) {
+		if ((*spp)->setname != sname &&
+				strcmp((*spp)->setname, sname) != 0) {
+			return (mderror(ep, MDE_SET_DIFF, sname));
+		}
+		return (0);
+	}
+
+	/* otherwise store new set name and number */
+	if ((*spp = metasetname(sname, ep)) == NULL) {
+		return (-1);
+	}
+
+	/* return success */
+	return (0);
+}
+
+/*
+ * check to see if set changed from default
+ */
+static int
+chksetname(
+	mdsetname_t	**spp,
+	char		*sname,
+	md_error_t	*ep
+)
+{
+	/* default to *spp's setname, or if that is NULL to MD_LOCAL_NAME */
+	if (sname == NULL) {
+		if (*spp) {
+			sname = (*spp)->setname;
+		} else {
+			sname = MD_LOCAL_NAME;
+		}
+	}
+
+	/* see if changed */
+	return (chkset(spp, sname, ep));
+}
+
+/*
+ * check setname from setno
+ */
+static int
+chksetno(
+	mdsetname_t	**spp,
+	set_t		setno,
+	md_error_t	*ep
+)
+{
+	md_set_record	*sr;
+	int		rval;
+
+	/* local set */
+	if (setno == 0)
+		return (chkset(spp, MD_LOCAL_NAME, ep));
+
+	/* shared set */
+	if ((sr = getsetbynum(setno, ep)) == NULL)
+		return (-1);
+	rval = chkset(spp, sr->sr_setname, ep);
+	free_sr(sr);
+	return (rval);
+}
+
+/*
+ *	mddrivename_t stuff
+ */
+
+/*
+ * initialize name
+ */
+static void
+metainitname(
+	mdname_t	*np
+)
+{
+	(void) memset(np, 0, sizeof (*np));
+	np->dev = NODEV64;
+	np->key = MD_KEYBAD;
+	np->end_blk = -1;
+	np->start_blk = -1;
+}
+
+/*
+ * free allocated name
+ */
+static void
+metafreename(
+	mdname_t	*np
+)
+{
+	if (np->cname != NULL)
+		Free(np->cname);
+	if (np->bname != NULL)
+		Free(np->bname);
+	if (np->rname != NULL)
+		Free(np->rname);
+	if (np->devicesname != NULL)
+		Free(np->devicesname);
+	metainitname(np);
+}
+
+/*
+ * initialize drive name
+ */
+static void
+metainitdrivename(
+	mddrivename_t	*dnp
+)
+{
+	(void) memset(dnp, 0, sizeof (*dnp));
+	dnp->side_names_key = MD_KEYBAD;
+}
+
+/*
+ * flush side names
+ */
+void
+metaflushsidenames(
+	mddrivename_t	*dnp
+)
+{
+	mdsidenames_t	*p, *n;
+
+	for (p = dnp->side_names, n = NULL; (p != NULL); p = n) {
+		n = p->next;
+		if (p->dname != NULL)
+			Free(p->dname);
+		if (p->cname != NULL)
+			Free(p->cname);
+		Free(p);
+	}
+	dnp->side_names = NULL;
+}
+
+/*
+ * free drive name
+ */
+void
+metafreedrivename(
+	mddrivename_t	*dnp
+)
+{
+	uint_t		slice;
+
+	if (dnp->cname != NULL)
+		Free(dnp->cname);
+	if (dnp->rname != NULL)
+		Free(dnp->rname);
+	metafreevtoc(&dnp->vtoc);
+	for (slice = 0; (slice < dnp->parts.parts_len); ++slice)
+		metafreename(&dnp->parts.parts_val[slice]);
+	if (dnp->parts.parts_val != NULL)
+		Free(dnp->parts.parts_val);
+	metaflushsidenames(dnp);
+	if (dnp->miscname != NULL)
+		Free(dnp->miscname);
+	meta_free_unit(dnp);
+	metainitdrivename(dnp);
+}
+
+/*
+ * flush the drive name cache
+ */
+static void
+metaflushdrivenames()
+{
+	mddrivenamelist_t	*p, *n;
+
+	for (p = drivelistp, n = NULL; (p != NULL); p = n) {
+		n = p->next;
+		metafreedrivename(p->drivenamep);
+		Free(p->drivenamep);
+		Free(p);
+	}
+	drivelistp = NULL;
+}
+
+/*
+ * peel off s%u from name
+ */
+char *
+metadiskname(
+	char	*name
+)
+{
+	char	*p, *e;
+	char	onmb[BUFSIZ+1], cnmb[BUFSIZ];
+	uint_t	d = 0;
+	int	l = 0;
+	int	cl = strlen(name);
+
+	if (is_metaname(name))
+	    return (Strdup(name));
+
+	/*
+	 * Handle old style names, which are of the form /dev/rXXNN[a-h].
+	 */
+	if (sscanf(name, "/dev/r%" VAL2STR(BUFSIZ) "[^0-9/]%u%*[a-h]%n",
+	    onmb, &d, &l) == 2 && l == cl) {
+		(void) snprintf(cnmb, sizeof (cnmb), "/dev/r%s%u", onmb, d);
+		return (Strdup(cnmb));
+	}
+
+	/*
+	 * Handle old style names, which are of the form /dev/XXNN[a-h].
+	 */
+	if (sscanf(name, "/dev/%" VAL2STR(BUFSIZ) "[^0-9/]%u%*[a-h]%n",
+	    onmb, &d, &l) == 2 && l == cl) {
+		(void) snprintf(cnmb, sizeof (cnmb), "/dev/%s%u", onmb, d);
+		return (Strdup(cnmb));
+	}
+
+	/* gobble number and 's' */
+	p = e = name + strlen(name) - 1;
+	for (; (p > name); --p) {
+		if (!isdigit(*p))
+			break;
+	}
+	if ((p == e) || (p <= name))
+		return (Strdup(name));
+
+	if (*p != 's' && strchr("dt", *p) == NULL)
+		return (Strdup(name));
+	else if (strchr("dt", *p) != NULL)
+		return (Strdup(name));
+	p--;
+
+	if ((p <= name) || (!isdigit(*p)))
+		return (Strdup(name));
+
+	*(++p) = '\0';
+	e = Strdup(name);
+	*p = 's';
+
+	return (e);
+}
+
+/*
+ * free list of drivenames
+ */
+void
+metafreedrivenamelist(
+	mddrivenamelist_t	*dnlp
+)
+{
+	mddrivenamelist_t	*next = NULL;
+
+	for (/* void */; (dnlp != NULL); dnlp = next) {
+		next = dnlp->next;
+		Free(dnlp);
+	}
+}
+
+/*
+ * build list of drivenames
+ */
+int
+metadrivenamelist(
+	mdsetname_t		**spp,
+	mddrivenamelist_t	**dnlpp,
+	int			argc,
+	char			*argv[],
+	md_error_t		*ep
+)
+{
+	mddrivenamelist_t	**tailpp = dnlpp;
+	int			count = 0;
+
+	for (*dnlpp = NULL; (argc > 0); ++count, --argc, ++argv) {
+		mddrivenamelist_t	*dnlp = Zalloc(sizeof (*dnlp));
+
+		if ((dnlp->drivenamep = metadrivename(spp, argv[0],
+		    ep)) == NULL) {
+			metafreedrivenamelist(*dnlpp);
+			*dnlpp = NULL;
+			return (-1);
+		}
+		*tailpp = dnlp;
+		tailpp = &dnlp->next;
+	}
+	return (count);
+}
+
+/*
+ * append to end of drivename list
+ */
+mddrivename_t *
+metadrivenamelist_append(
+	mddrivenamelist_t	**dnlpp,
+	mddrivename_t		*dnp
+)
+{
+	mddrivenamelist_t	*dnlp;
+
+	/* run to end of list */
+	for (; (*dnlpp != NULL); dnlpp = &(*dnlpp)->next)
+		;
+
+	/* allocate new list element */
+	dnlp = *dnlpp = Zalloc(sizeof (*dnlp));
+
+	/* append drivename */
+	dnlp->drivenamep = dnp;
+	return (dnp);
+}
+
+/*
+ * FUNCTION:	meta_drivenamelist_append_wrapper()
+ * INPUT:	tailpp	- pointer to the list tail pointer
+ *		dnp	- name node to be appended to list
+ * OUTPUT:	none
+ * RETURNS:	mddrivenamelist_t * - new tail of the list.
+ * PURPOSE:	wrapper to meta_namelist_append for performance.
+ *		metanamelist_append finds the tail each time which slows
+ *		down long lists.  By keeping track of the tail ourselves
+ *		we can change metadrivenamelist_append into a
+ *		constant time operation.
+ */
+mddrivenamelist_t **
+meta_drivenamelist_append_wrapper(
+	mddrivenamelist_t	**tailpp,
+	mddrivename_t	*dnp
+)
+{
+	(void) metadrivenamelist_append(tailpp, dnp);
+
+	/* If it's the first item in the list, return it instead of the next */
+	if ((*tailpp)->next == NULL)
+		return (tailpp);
+
+	return (&(*tailpp)->next);
+}
+
+
+/*
+ *	mdname_t stuff
+ */
+
+/*
+ * check set and get comparison name
+ */
+char *
+meta_name_getname(
+	mdsetname_t	**spp,
+	char		*uname,
+	md_error_t	*ep
+)
+{
+	char		*sname = NULL;
+	int		ismeta = 0;
+	unit_t		unit;
+
+	/* check set name */
+	if (parse_metadevice(uname, &sname, &unit) == 0)
+		ismeta = 1;
+	if (chksetname(spp, sname, ep) != 0) {
+		if (sname != NULL)
+			Free(sname);
+		return (NULL);
+	}
+	if (sname != NULL)
+		Free(sname);
+
+	/* return comparison name */
+	if (ismeta)
+		return (canon_metadevice((*spp)->setname, unit));
+	else
+		return (Strdup(uname));
+}
+
+/*
+ * FUNCTION:	getrname()
+ * INPUT:	spp	- the setname struct
+ *		uname	- the possibly unqualified device name
+ * OUTPUT:	ep	- return error pointer
+ * RETURNS:	char*	- character string containing the fully
+ *			qualified raw device name
+ * PURPOSE:	Create the fully qualified raw name for the possibly
+ *		unqualified device name.  If uname is an absolute
+ *		path the raw name is derived from the input string.
+ *		Otherwise, an attempt is made to get the rawname by
+ *		catting "/dev/md/rdsk" and "/dev/rdsk".
+ */
+static char *
+getrname(mdsetname_t **spp, char *uname, md_error_t *ep)
+{
+	char	*rname,
+		*fname;
+	int	constructed = 0;
+
+	assert(uname != NULL);
+	/* if it is an absolute name then just call rawname on the input */
+	if (uname[0] == '/') {
+	    if ((rname = rawname(uname)) != NULL)
+		return (rname);
+
+	    /* out of luck */
+	    (void) mdsyserror(ep, ENOENT, uname);
+	    return (NULL);
+	}
+
+	/*
+	 * Check for metadevice before physical device.
+	 * With the introduction of softpartitions it is more
+	 * likely to be a metadevice.
+	 */
+
+	/* metadevice short form */
+	if (metaislocalset(*spp)) {
+		fname = Malloc(strlen(uname) + strlen("/dev/md/rdsk/") + 1);
+		(void) strcpy(fname, "/dev/md/rdsk/");
+		(void) strcat(fname, uname);
+		if (*uname == 'd')
+			constructed = 1;
+	} else {
+		char	*p;
+		size_t	len;
+
+		if ((p = strchr(uname, '/')) != NULL) {
+			++p;
+		} else {
+			p = uname;
+		}
+		len = strlen((*spp)->setname) + strlen(p) +
+		    strlen("/dev/md//rdsk/") + 1;
+		fname = Malloc(len);
+		(void) snprintf(fname, len, "/dev/md/%s/rdsk/%s",
+		    (*spp)->setname, p);
+		if (*p == 'd')
+			constructed = 1;
+	}
+	rname = rawname(fname);
+
+	/*
+	 * Handle the case where we have a new metadevice that does not yet
+	 * exist in the name-space. In this case we return the constructed
+	 * metadevice name as that will exist after the metainit call has
+	 * created it.
+	 */
+	if ((rname == NULL) && constructed) {
+		rname = Strdup(fname);
+	}
+	Free(fname);
+	if (rname != NULL)
+		return (rname);
+
+	fname = Malloc(strlen(uname) + strlen("/dev/rdsk/") + 1);
+	(void) strcpy(fname, "/dev/rdsk/");
+	(void) strcat(fname, uname);
+	rname = rawname(fname);
+	Free(fname);
+	if (rname != NULL)
+		return (rname);
+
+	/*
+	 * If all else fails try the straight uname.
+	 * NOTE: This check was at the beginning of getrname instead
+	 * of here. It was moved to avoid a conflict with SC3.0. If
+	 * a diskset was mounted with the same name it would hang
+	 * the cluster in a loop. Example:
+	 *
+	 *	fubar/d10 -m fubar/d0 fubar/d1
+	 *	mount /dev/md/fubar/dsk/d10 /fubar
+	 *
+	 * When the system was booted DiskSuite would try to take ownership
+	 * of diskset fubar. This would cause rawname("fubar/d10") to be
+	 * called. rawname() stats the string which caused the cluster
+	 * reservation code to try and take ownership which it was already
+	 * doing and a deadlock would occur. By moving this final attempt
+	 * at resolving the rawname to the end we avoid this deadlock.
+	 */
+	if (rname = rawname(uname))
+		return (rname);
+
+	/* out of luck */
+	(void) mdsyserror(ep, ENOENT, uname);
+	return (NULL);
+}
+
+/*
+ * get raw slice and drive names
+ */
+static char *
+getrawnames(
+	mdsetname_t	**spp,
+	char		*uname,
+	char		**dnamep,
+	md_error_t	*ep
+)
+{
+	char		*rname;
+	size_t		len;
+
+	/* initialize */
+	*dnamep = NULL;
+
+	/* get slice name */
+	if ((rname = getrname(spp, uname, ep)) != NULL) {
+		*dnamep = metadiskname(rname);
+		return (rname);
+	}
+
+	/*
+	 * If name cannot be found, if may be because is is not accessible.
+	 * If it is an absolute name, try all possible disk name formats and
+	 * if it is device name, assume it is /dev/rdsk/...
+	 */
+	if (mdissyserror(ep, ENOENT)) {
+		if (uname[0] == '/') {
+			/* Absolute name */
+			char			*p;
+			uint_t			d = 0;
+			int			l = 0;
+			char			onmb[BUFSIZ+1], snm[BUFSIZ+1];
+
+			/*
+			 * Handle old style raw names
+			 */
+			if (sscanf(uname,
+			    "/dev/r%" VAL2STR(BUFSIZ) "[^0-9/]%u"
+			    "%" VAL2STR(BUFSIZ) "[a-h]%n",
+			    onmb, &d, snm, &l) == 3 && l == strlen(uname)) {
+				mdclrerror(ep);
+				rname = Strdup(uname);
+				*dnamep = metadiskname(rname);
+				return (rname);
+			}
+
+			/*
+			 * Handle old style block names
+			 */
+			if (sscanf(uname,
+			    "/dev/%" VAL2STR(BUFSIZ) "[^0-9/]%u"
+			    "%" VAL2STR(BUFSIZ) "[a-h]%n",
+			    onmb, &d, snm, &l) == 3 && l == strlen(uname)) {
+				len = strlen(uname) + 1 + 1;
+				rname = Malloc(len);
+				(void) snprintf(rname, len, "/dev/r%s%u%s",
+				    onmb, d, snm);
+				*dnamep = metadiskname(rname);
+				return (rname);
+			}
+
+			/* /.../dsk/... */
+			if ((p = strstr(uname, "/dsk/")) != NULL) {
+				mdclrerror(ep);
+				++p;
+				rname = Malloc(strlen(uname) + 1 + 1);
+				(void) strncpy(rname, uname, (p - uname));
+				rname[(p - uname)] = 'r';
+				(void) strcpy(&rname[(p - uname) + 1], p);
+				*dnamep = metadiskname(rname);
+				return (rname);
+			}
+
+			/* /.../rdsk/... */
+			else if (strstr(uname, "/rdsk/") != NULL) {
+				mdclrerror(ep);
+				rname = Strdup(uname);
+				*dnamep = metadiskname(rname);
+				return (rname);
+			}
+		} else {
+			/*
+			 * If it's not an absolute name but is a valid ctd name,
+			 * guess at /dev/rdsk/...
+			 */
+			uint_t	s;
+			if (parse_ctd(uname, &s) == 0) {
+				len = strlen(uname) + strlen("/dev/rdsk/") + 1;
+				rname = Malloc(len);
+				(void) snprintf(rname, len, "/dev/rdsk/%s",
+				    uname);
+				*dnamep = metadiskname(rname);
+				return (rname);
+			}
+		}
+	}
+
+	/* out of luck */
+	return (NULL);
+}
+
+/*
+ * get number of slices for name
+ */
+static int
+getnslice(
+	char		*rname,
+	char		*dname,
+	uint_t		*slicep
+)
+{
+	char		*srname;
+	uint_t		nslice;
+	size_t		dl = strlen(dname);
+	size_t		rl = strlen(rname);
+	size_t		l = 0;
+	size_t		len;
+
+	/*
+	 * get our slice number - works only with names that end in s%u -
+	 * all others return -1.
+	 */
+	if (dl >= rl ||
+	    sscanf(&rname[dl], "s%u%n", slicep, &l) != 1 || l != rl ||
+	    (int)*slicep < 0) {
+		return (-1);
+	}
+
+	/*
+	 * go find how many slices there really are
+	 */
+	len = strlen(dname) + 20 + 1;
+	srname = Malloc(len);
+	for (nslice = 0; /* void */; ++nslice) {
+		struct stat	statbuf;
+
+		/* build slice name */
+		(void) snprintf(srname, len, "%ss%u", dname, nslice);
+
+		/* see if it's there */
+		if ((meta_stat(srname, &statbuf) != 0) ||
+		    (! S_ISCHR(statbuf.st_mode))) {
+			break;
+		}
+	}
+	Free(srname);
+
+	/* Need to make sure that we at least have V_NUMPAR */
+	nslice = max(nslice, V_NUMPAR);
+
+	/* make sure we have at least our slice */
+	if (nslice < *slicep)
+		return (-1);
+
+	/* return number of slices */
+	return (nslice);
+}
+
+/*
+ * Attempt to parse the input string as a c[t]ds specifier
+ * The target can either be a SCSI target id or if the device
+ * is in a fabric configuration in a fibre channel setup then
+ * the target is a standard WWN (world wide name).
+ *
+ * if successful	return 0
+ * if c[t]dp name	return 1
+ * otherwise		return -1
+ */
+int
+parse_ctd(
+	char	*uname,
+	uint_t	*slice)
+{
+	uint_t	channel;
+	uint_t	target;
+	uint_t	device;
+	int	has_target = 1;
+	uint_t	cl;
+	uint_t	target_str_len;
+	char	*partial_ctd_str;
+	char	*target_str;
+	char	*device_start_pos;
+	int	l = -1;
+
+	/* pull off the channel spec and the 't' for the target */
+	if (sscanf(uname, "c%ut%n", &channel, &l) != 1 || l == -1) {
+		/* check for cds style name */
+		if (sscanf(uname, "c%ud%n", &channel, &l) != 1 || l == -1) {
+			return (-1);
+		} else {
+			l--;	/* we want to be on the 'd' */
+			has_target = 0;
+		}
+	}
+	partial_ctd_str = uname + l;
+
+	/* find the beginning of the device specifier */
+	device_start_pos = strrchr(partial_ctd_str, 'd');
+	if (device_start_pos == NULL) {
+		return (-1);
+	}
+
+	/* check to see if it is a ctd with a WWN or SCSI target */
+	if (has_target) {
+		/* pull off the target and see if it is a WWN */
+		target_str_len = device_start_pos - partial_ctd_str + 2;
+		target_str = (char *)Malloc(target_str_len+1);
+		(void) strcpy(target_str, "0X");
+		(void) strncpy(target_str+2, partial_ctd_str,
+		    target_str_len - 2);
+		target_str[target_str_len] = '\0';
+		if (sscanf(target_str, "%x%n", &target, &l) != 1 ||
+		    l != target_str_len) {
+			Free(target_str);
+			return (-1);
+		}
+		Free(target_str);
+	}
+
+	/* check the device and slice */
+	cl = strlen(device_start_pos);
+	if (sscanf(device_start_pos, "d%us%u%n", &device, slice, &l) != 2 ||
+			l != cl) {
+		/* check the device and partition */
+		if (sscanf(device_start_pos, "d%up%u%n", &device, slice, &l)
+		    == 2 && l == cl) {
+			return (1);
+		}
+		return (-1);
+	}
+
+	return (0);
+}
+
+
+/*
+ * get number of slices for name
+ */
+static int
+uname2sliceno(
+	char		*uname,
+	uint_t		*slicep,
+	md_error_t	*ep
+)
+{
+	uint_t			c = 0, t = 0, d = 0;
+	int			l = 0, cl = 0;
+	int			fd;
+	struct dk_cinfo		cinfo;
+	char			*p;
+	char			*rname = NULL;
+
+	if (is_metaname(uname))
+		return (*slicep = 0);
+
+	if ((p = strrchr(uname, '/')) != NULL)
+		p++;
+	else
+		p = uname;
+
+	cl = strlen(p);
+
+	if (parse_ctd(p, slicep) == 0)
+		return (*slicep);
+	else if (sscanf(p, "mc%ut%ud%us%u%n", &c, &t, &d, slicep, &l) == 4 &&
+	    l == cl)
+		return (*slicep);
+	else if (sscanf(p, "d%us%u%n", &d, slicep, &l) == 2 && l == cl)
+		return (*slicep);
+
+	/*
+	 * If we can't get the slice from the name, then we have to do it the
+	 * hard and expensive way.
+	 */
+	if ((rname = rawname(uname)) == NULL)
+		return (-1);
+
+	/* get controller info */
+	if ((fd = open(rname, (O_RDONLY|O_NDELAY), 0)) < 0) {
+		Free(rname);
+		return (-1);
+	}
+
+	if (ioctl(fd, DKIOCINFO, &cinfo) != 0) {
+		int	save = errno;
+
+		if (save == ENOTTY)
+			(void) mddeverror(ep, MDE_NOT_DISK, NODEV64, rname);
+		else
+			(void) mdsyserror(ep, save, rname);
+
+		Free(rname);
+		(void) close(fd);
+		return (-1);
+	}
+	(void) close(fd);	/* sd/ssd bug */
+
+	if (cinfo.dki_partition < V_NUMPAR) {
+		Free(rname);
+		return (*slicep = cinfo.dki_partition);
+	}
+
+	return (mddeverror(ep, MDE_NOT_DISK, NODEV64, rname));
+}
+
+/*
+ * get partition info
+ */
+static int
+getparts(
+	mddrivename_t	*dnp,
+	char		*rname,
+	char		*dname,
+	uint_t		*npartsp,
+	uint_t		*partnop,
+	md_error_t	*ep
+)
+{
+	int		nparts;
+	uint_t		partno;
+	mdname_t	name;
+	mdvtoc_t	*vtocp;
+
+	/* metadevice */
+	if (is_metaname(rname)) {
+		dnp->type = MDT_META;
+		nparts = 1;
+		partno = 0;
+		goto gotit;
+	}
+
+	/* see how many partitions in drive, this is really tricky */
+	metainitname(&name);
+	name.rname = rname;
+	name.drivenamep = dnp;
+	if ((vtocp = metagetvtoc(&name, TRUE, &partno, ep)) != NULL) {
+		dnp->type = MDT_COMP;
+		nparts = vtocp->nparts;
+		/* partno already setup */
+		/* dname already setup */
+		goto gotit;
+	}
+
+	if ((ep->info.errclass == MDEC_DEV) &&
+	    (ep->info.md_error_info_t_u.dev_error.errnum == MDE_TOO_MANY_PARTS))
+		return (-1);
+
+	/* fallback and try and guess (used to check for just EACCES here) */
+	if ((dname != NULL) &&
+	    ((nparts = getnslice(rname, dname, &partno)) > 0)) {
+		dnp->type = MDT_ACCES;
+		if (mdanysyserror(ep)) {
+			dnp->errnum =
+			    ep->info.md_error_info_t_u.sys_error.errnum;
+		} else {
+			dnp->errnum = ENOENT;
+		}
+		mdclrerror(ep);
+		/* nparts already setup */
+		/* partno already setup */
+		/* dname already setup */
+		nparts = roundup(nparts, V_NUMPAR);
+		goto gotit;
+	}
+
+	/* nothing worked */
+	dnp->type = MDT_UNKNOWN;
+	if (mdissyserror(ep, EACCES))
+		dnp->type = MDT_ACCES;
+
+	if (mdanysyserror(ep)) {
+		dnp->errnum = ep->info.md_error_info_t_u.sys_error.errnum;
+	} else {
+		dnp->errnum = ENOENT;
+	}
+
+	mdclrerror(ep);
+	nparts = V_NUMPAR;
+	if (uname2sliceno(rname, &partno, ep) < 0) {
+		mdclrerror(ep);
+		partno = 0;
+	}
+
+	/* return success */
+gotit:
+	assert(nparts > 0);
+
+	if (partno >= nparts)
+		return (mdsyserror(ep, ENOENT, rname));
+
+	*npartsp = nparts;
+	*partnop = partno;
+	return (0);
+}
+
+/*
+ * get block name
+ */
+static int
+getbname(
+	mdname_t	*np,
+	md_error_t	*ep
+)
+{
+	char		*rname = np->rname;
+	char		*bname;
+
+	/* fully qualified */
+	assert(rname != NULL);
+	if ((bname = blkname(rname)) != NULL) {
+		if (np->bname)
+			Free(np->bname);
+		np->bname = bname;
+		return (0);
+	}
+
+	/* out of luck */
+	return (mdsyserror(ep, ENOENT, rname));
+}
+
+static void
+getcname(
+	mdsetname_t	*sp,
+	mdname_t	*np
+)
+{
+	char		*sname = sp->setname;
+	char		*bname = np->bname;
+	char		*p;
+	size_t		len;
+
+	assert(sname != NULL);
+	assert(bname != NULL);
+	assert(np->drivenamep->type != MDT_FAST_COMP &&
+	    np->drivenamep->type != MDT_FAST_META);
+
+	/* regular device */
+	if ((strncmp(bname, "/dev/dsk/", strlen("/dev/dsk/")) == 0) &&
+	    (strchr((p = bname + strlen("/dev/dsk/")), '/') == NULL)) {
+		if (np->cname)
+			Free(np->cname);
+		np->cname = Strdup(p);
+		return;
+	}
+
+	if ((strncmp(bname, "/dev/ap/dsk/", strlen("/dev/ap/dsk/")) == 0) &&
+	    (strchr((p = bname + strlen("/dev/ap/dsk/")), '/') == NULL)) {
+		if (np->cname)
+			Free(np->cname);
+		np->cname = Strdup(p);
+		return;
+	}
+
+	if ((strncmp(bname, "/dev/did/dsk/", strlen("/dev/did/dsk/")) == 0) &&
+	    (strchr((p = bname + strlen("/dev/did/dsk/")), '/') == NULL)) {
+		if (np->cname)
+			Free(np->cname);
+		np->cname = Strdup(p);
+		return;
+	}
+
+	/* anything else but metadevice */
+	if (np->drivenamep->type != MDT_META) {
+		if (np->cname)
+			Free(np->cname);
+		np->cname = Strdup(bname);
+		return;
+	}
+
+	/* metadevice */
+	p = strrchr(bname, '/');
+	assert(p != NULL);
+	++p;
+	if (metaislocalset(sp)) {
+		if (np->cname)
+			Free(np->cname);
+		np->cname = Strdup(p);
+	} else {
+		assert(sname[0] != '\0');
+		if (np->cname)
+			Free(np->cname);
+		len = strlen(sname) + 1 + strlen(p) + 1;
+		np->cname = Malloc(len);
+		(void) snprintf(np->cname, len, "%s/%s", sname, p);
+	}
+}
+
+/*
+ * get dev
+ */
+int
+meta_getdev(
+	mdsetname_t	*sp,
+	mdname_t	*np,
+	md_error_t	*ep
+)
+{
+	struct stat	statbuf;
+
+	/* get dev */
+	if (meta_stat(np->rname, &statbuf) != 0)
+		return (mdsyserror(ep, errno, np->rname));
+	else if (! S_ISCHR(statbuf.st_mode))
+		return (mddeverror(ep, MDE_NOT_DISK, NODEV64, np->rname));
+	np->dev = meta_expldev(statbuf.st_rdev);
+
+	assert(np->drivenamep->type != MDT_FAST_META &&
+	    np->drivenamep->type != MDT_FAST_COMP);
+
+	/* check set */
+	assert((np->drivenamep->type == MDT_META) ?
+	    (sp->setno == MD_MIN2SET(meta_getminor(np->dev))) : 1);
+
+	/* return sucess */
+	return (0);
+}
+
+/*
+ * set up names for a slice
+ */
+static int
+getnames(
+	mdsetname_t	*sp,
+	mdname_t	*np,
+	char		*rname,
+	md_error_t	*ep
+)
+{
+	/* get names */
+	if (np->rname)
+		Free(np->rname);
+	np->rname = Strdup(rname);
+	if (getbname(np, ep) != 0)
+		return (-1);
+	getcname(sp, np);
+	if (meta_getdev(sp, np, ep) != 0)
+		return (-1);
+
+	/* return success */
+	return (0);
+}
+
+/*
+ * fake up names for a slice
+ */
+static void
+getfakenames(
+	mdsetname_t	*sp,
+	mdname_t	*np,
+	char		*rname
+)
+{
+	char		*p;
+	char		onmb[BUFSIZ+1], snm[BUFSIZ+1];
+	uint_t		d = 0;
+	int		l = 0;
+
+	/* fake names */
+	if (np->rname != NULL)
+		Free(np->rname);
+	np->rname = Strdup(rname);
+
+	if (np->bname != NULL)
+		Free(np->bname);
+	np->bname = Strdup(rname);
+
+	/*
+	 * Fixup old style names
+	 */
+	if (sscanf(rname, "/dev/r%" VAL2STR(BUFSIZ) "[^0-9/]%u"
+	    "%" VAL2STR(BUFSIZ) "[a-h]%n",
+	    onmb, &d, snm, &l) == 3 && l == strlen(rname))
+		(void) snprintf(np->bname, l, "/dev/%s%u%s", onmb, d, snm);
+
+	/*
+	 * Fixup new style names
+	 */
+	if ((p = strstr(np->bname, "/rdsk/")) != NULL) {
+		for (++p; (*(p + 1) != '\0'); ++p)
+			*p = *(p + 1);
+		*p = '\0';
+	}
+
+	if (np->cname != NULL)
+		Free(np->cname);
+	getcname(sp, np);
+}
+
+static mdname_t *
+setup_slice(
+	mdsetname_t	*sp,
+	mddrivename_t	*dnp,
+	char		*uname,
+	char		*rname,
+	char		*dname,
+	uint_t		partno,
+	md_error_t	*ep
+)
+{
+	char		*srname = NULL;
+	mdname_t	*np;
+
+	/* must have a set */
+	assert(sp != NULL);
+	assert(partno < dnp->parts.parts_len);
+	assert(dname != NULL);
+
+	np = &dnp->parts.parts_val[partno];
+
+	if (rname)
+		srname = rname;
+	else if (is_metaname(dname))
+		srname = dname;
+	else {
+		char	onmb[BUFSIZ+1];
+		uint_t	d = 0;
+		int	l = 0, cl = strlen(dname);
+		size_t	len;
+
+		len = cl + 20 + 1;
+		srname = Malloc(len);
+
+		/*
+		 * Handle /dev/rXXNN.
+		 */
+		if (sscanf(dname, "/dev/r%" VAL2STR(BUFSIZ) "[^0-9/]%u%n",
+		    onmb, &d, &l) == 2 && l == cl) {
+			(void) snprintf(srname, len, "/dev/r%s%u%c", onmb, d,
+			    'a' + partno);
+		} else if (sscanf(dname, "/dev/%" VAL2STR(BUFSIZ) "[^0-9/]%u%n",
+		    onmb, &d, &l) == 2 && l == cl) {
+			    (void) snprintf(srname, len, "/dev/%s%u%c", onmb, d,
+				'a' + partno);
+		} else {
+			/* build the slice that is wanted */
+			(void) snprintf(srname, len, "%ss%u", dname, partno);
+		}
+	}
+
+	if (getnames(sp, np, srname, ep) != 0) {
+		if (dnp->type == MDT_UNKNOWN) {
+			mdclrerror(ep);
+			getfakenames(sp, np, srname);
+		} else if (dnp->type == MDT_COMP && mdissyserror(ep, ENOENT)) {
+			dnp->type = MDT_UNKNOWN;
+			if (mdanysyserror(ep)) {
+				dnp->errnum =
+				    ep->info.md_error_info_t_u.sys_error.errnum;
+			} else {
+				dnp->errnum = ENOENT;
+			}
+			mdclrerror(ep);
+			getfakenames(sp, np, srname);
+		} else {
+			mdclrerror(ep);
+			if (getnames(sp, np, dname, ep) != 0) {
+				np = NULL;
+				goto fixup;
+			}
+		}
+	}
+
+out:
+	if ((srname != rname) && (srname != dname))
+		Free(srname);
+
+	/* return name */
+	return (np);
+
+fixup:
+	if (mdanysyserror(ep)) {
+		char	*p;
+		int	errnum = ep->info.md_error_info_t_u.sys_error.errnum;
+
+		mdclrerror(ep);
+		if (uname && *uname) {
+			if ((p = strrchr(uname, '/')) != NULL)
+				(void) mdsyserror(ep, errnum, ++p);
+			else
+				(void) mdsyserror(ep, errnum, uname);
+		} else {
+			if ((p = strrchr(srname, '/')) != NULL)
+				(void) mdsyserror(ep, errnum, ++p);
+			else
+				(void) mdsyserror(ep, errnum, srname);
+		}
+	}
+	goto out;
+}
+
+/*
+ * flush the fast name cache
+ */
+static void
+metafreefastnm(mdname_t **np)
+{
+	mddrivename_t	*dnp;
+
+	assert(np != NULL && *np != NULL);
+
+	if ((dnp = (*np)->drivenamep) != NULL) {
+		if (dnp->cname != NULL)
+			Free(dnp->cname);
+		if (dnp->rname != NULL)
+			Free(dnp->rname);
+		if (dnp->miscname != NULL)
+			Free(dnp->miscname);
+		meta_free_unit(dnp);
+		Free(dnp);
+	}
+	if ((*np)->cname != NULL)
+		Free((*np)->cname);
+	if ((*np)->bname != NULL)
+		Free((*np)->bname);
+	if ((*np)->rname != NULL)
+		Free((*np)->rname);
+	if ((*np)->devicesname != NULL)
+		Free((*np)->devicesname);
+	Free(*np);
+	*np = NULL;
+}
+
+/*
+ * flush the fast name cache
+ */
+static void
+metaflushfastnames()
+{
+	mdnamelist_t	*p, *n;
+
+	for (p = fastnmlp, n = NULL; (p != NULL); p = n) {
+		n = p->next;
+		metafreefastnm(&p->namep);
+		Free(p);
+	}
+	fastnmlp = NULL;
+}
+
+static char *
+getrname_fast(char *unm, md_error_t *ep)
+{
+	uint_t			d = 0;
+	int			l = 0;
+	int			cl = strlen(unm);
+	char			onmb[BUFSIZ+1], snm[BUFSIZ+1], cnmb[BUFSIZ];
+	char			*rnm;
+	char			*p;
+	size_t			len;
+
+	if (is_metaname(unm)) {
+		/* without set */
+		if (sscanf(unm, "d%u%n", &d, &l) == 1 && cl == l) {
+			rnm = Zalloc(14 + cl + 1);
+			(void) sprintf(rnm, "/dev/md/rdsk/d%u", d);
+			return (rnm);
+		}
+
+		/* fully-qualified without set */
+		if ((sscanf(unm, "/dev/md/dsk/d%u%n", &d, &l) == 1 ||
+		    sscanf(unm, "/dev/md/rdsk/d%u%n", &d, &l) == 1) &&
+		    cl == l) {
+			rnm = Zalloc(14 + cl + 1);
+			(void) sprintf(rnm, "/dev/md/rdsk/d%u", d);
+			return (rnm);
+		}
+
+		/* with set */
+		if ((sscanf(unm,
+		    "%" VAL2STR(BUFSIZ) "[^/]/d%u%n", snm, &d, &l) == 2 ||
+		    sscanf(unm, "/dev/md/%" VAL2STR(BUFSIZ) "[^/]/dsk/d%u%n",
+		    snm, &d, &l) == 2 ||
+		    sscanf(unm, "/dev/md/%" VAL2STR(BUFSIZ) "[^/]/rdsk/d%u%n",
+		    snm, &d, &l) == 2) && cl == l) {
+			len = 14 + cl + strlen(snm) + 1;
+			rnm = Zalloc(len);
+			(void) snprintf(rnm, len, "/dev/md/%s/rdsk/d%u",
+			    snm, d);
+			return (rnm);
+		}
+	}
+
+	/* NOT Fully qualified path, done */
+	if (unm[0] != '/') {
+		(void) mdsyserror(ep, EINVAL, unm);
+		return (NULL);
+	}
+
+	/*
+	 * Get slice information from old style names of the form
+	 * /dev/rXXNN[a-h] or /dev/XXNN[a-h], must be done before regular
+	 * devices, but after metadevices.
+	 */
+	if ((sscanf(unm, "/dev/r%" VAL2STR(BUFSIZ) "[^0-9/]%u"
+	    "%" VAL2STR(BUFSIZ) "[a-h]%n",
+	    onmb, &d, snm, &l) == 3 ||
+	    sscanf(unm, "/dev/%" VAL2STR(BUFSIZ) "[^0-9/]%u"
+	    "%" VAL2STR(BUFSIZ) "[a-h]%n",
+	    onmb, &d, snm, &l) == 3) && l == cl) {
+		if ((p = strchr("abcdefgh", snm[0])) != NULL) {
+			(void) snprintf(cnmb, sizeof (cnmb), "/dev/r%s%u%s",
+			    onmb, d, snm);
+			return (Strdup(cnmb));
+		}
+	}
+
+	if ((p = strstr(unm, "/dsk/")) != NULL) {	/* /.../dsk/... */
+		++p;
+		rnm = Zalloc(strlen(unm) + 1 + 1);
+		(void) strncpy(rnm, unm, (p - unm));
+		rnm[(p - unm)] = 'r';
+		(void) strcpy(&rnm[(p - unm) + 1], p);
+		return (rnm);
+	} else if (strstr(unm, "/rdsk/") != NULL) {	/* /.../rdsk/... */
+		return (Strdup(unm));
+	}
+
+	/*
+	 * Shouldn't get here but if we do then we have an unrecognized
+	 * fully qualified path - error
+	 */
+	(void) mdsyserror(ep, EINVAL, unm);
+	return (NULL);
+}
+
+static mdname_t *
+metainitfastname(
+	mdsetname_t	*sp,
+	char		*uname,
+	md_error_t	*ep
+)
+{
+	uint_t			c = 0, t = 0, d = 0, s = 0;
+	int			l = 0;
+	mddrivename_t		*dnp;
+	mdname_t		*np;
+	mdnamelist_t		**fnlpp;
+
+	for (fnlpp = &fastnmlp; (*fnlpp != NULL); fnlpp = &(*fnlpp)->next) {
+		np = (*fnlpp)->namep;
+
+		if (strcmp(np->bname, uname) == 0)
+			return (np);
+	}
+
+	*fnlpp = Zalloc(sizeof (**fnlpp));
+	np = (*fnlpp)->namep = Zalloc(sizeof (mdname_t));
+	metainitname(np);
+	dnp = np->drivenamep = Zalloc(sizeof (mddrivename_t));
+	metainitdrivename(dnp);
+
+
+	/* Metadevices */
+	if (is_metaname(uname)) {
+		char *p;
+		size_t len;
+
+		if ((p = strrchr(uname, '/')) != NULL)
+			++p;
+		else
+			p = uname;
+
+		if (metaislocalset(sp)) {
+			if (np->cname)
+				Free(np->cname);
+			np->cname = Strdup(p);
+		} else {
+			if (np->cname)
+				Free(np->cname);
+			len = strlen(sp->setname) + 1 + strlen(p) + 1;
+			np->cname = Zalloc(len);
+			(void) snprintf(np->cname, len, "%s/%s",
+			    sp->setname, p);
+		}
+		dnp->type = MDT_FAST_META;
+		goto done;
+	}
+
+	/* Others */
+	dnp->type = MDT_FAST_COMP;
+
+	if (((sscanf(uname, "/dev/rdsk/c%ut%ud%us%u%n", &c, &t, &d,
+		&s, &l) == 4 ||
+	    sscanf(uname, "/dev/dsk/c%ut%ud%us%u%n", &c, &t, &d,
+		&s, &l) == 4 ||
+	    sscanf(uname, "/dev/ap/rdsk/mc%ut%ud%us%u%n", &c, &t, &d,
+		&s, &l) == 4 ||
+	    sscanf(uname, "/dev/ap/dsk/mc%ut%ud%us%u%n", &c, &t, &d,
+		&s, &l) == 4 ||
+	    sscanf(uname, "/dev/did/rdsk/d%us%u%n", &t, &s, &l) == 2 ||
+	    sscanf(uname, "/dev/did/dsk/d%us%u%n", &t, &s, &l) == 2||
+	    sscanf(uname, "/dev/rdsk/c%ud%us%u%n", &c, &d, &s, &l) == 3 ||
+	    sscanf(uname, "/dev/dsk/c%ud%us%u%n", &c, &d, &s, &l) == 3 ||
+	    sscanf(uname, "/dev/rdsk/c%ut%ud%u%n", &c, &t, &d, &l) == 3 ||
+	    sscanf(uname, "/dev/dsk/c%ut%ud%u%n", &c, &t, &d, &l) == 3 ||
+	    sscanf(uname, "/dev/ap/rdsk/mc%ut%ud%u%n", &c, &t, &d, &l) == 3 ||
+	    sscanf(uname, "/dev/ap/dsk/mc%ut%ud%u%n", &c, &t, &d, &l) == 3 ||
+	    sscanf(uname, "/dev/did/rdsk/d%u%n", &t, &l) == 1 ||
+	    sscanf(uname, "/dev/did/dsk/d%u%n", &t, &l) == 1 ||
+	    sscanf(uname, "/dev/rdsk/c%ud%u%n", &c, &d, &l) == 2 ||
+	    sscanf(uname, "/dev/dsk/c%ud%u%n", &c, &d, &l) == 2) &&
+		l == strlen(uname))) {
+		if ((np->cname = strrchr(uname, '/')) == NULL)
+			np->cname = Strdup(uname);
+		else
+			np->cname = Strdup(++np->cname);
+	} else {
+		np->cname = Strdup(uname);
+	}
+
+done:
+	/* Driver always gives us block names */
+	np->bname = Strdup(uname);
+
+	/* canonical disk name */
+	if ((dnp->cname = metadiskname(np->cname)) == NULL)
+		dnp->cname = Strdup(np->cname);
+
+	if ((np->rname = getrname_fast(uname, ep)) != NULL) {
+		if ((dnp->rname = metadiskname(np->rname)) == NULL)
+			dnp->rname = Strdup(np->rname);
+	} else {
+		metafreefastnm(&(*fnlpp)->namep);
+		Free(*fnlpp);
+		*fnlpp = NULL;
+		return (NULL);
+	}
+
+	/* cleanup, return success */
+	return (np);
+}
+
+/*
+ * set up names for a device
+ */
+static mdname_t *
+metaname_common(
+	mdsetname_t	**spp,
+	char		*uname,
+	int		fast,
+	md_error_t	*ep
+)
+{
+	mddrivenamelist_t	**tail;
+	mddrivename_t		*dnp;
+	uint_t			slice;
+	mdname_t		*np;
+	char			*rname = NULL;
+	char			*dname = NULL;
+	char			*cname = NULL;
+	uint_t			nparts, partno;
+
+	assert(uname != NULL);
+
+	/* check setname */
+	if ((cname = meta_name_getname(spp, uname, ep)) == NULL)
+		return (NULL);
+
+	assert(*spp != NULL);
+	Free(cname);
+
+	/* get raw name (rname) of the slice and drive (dname) we have */
+	if ((rname = getrawnames(spp, uname, &dname, ep)) == NULL) {
+		return (NULL);
+	}
+
+	/* look in cache first */
+	for (tail = &drivelistp; (*tail != NULL); tail = &(*tail)->next) {
+		dnp = (*tail)->drivenamep;
+
+		/* check to see if the drive name is already in the cache */
+		if ((dnp->rname != NULL) && strcmp(dnp->rname, dname) == 0) {
+
+			Free(rname);
+			if (dname != NULL)
+				Free(dname);
+
+			if (uname2sliceno(uname, &partno, ep) < 0)
+				return (NULL);
+
+			return (metaslicename(dnp, partno, ep));
+		}
+	}
+
+	/*
+	 * If a fast names is OK, then get one, and be done.
+	 */
+	if (fast) {
+		Free(rname);
+		if (dname != NULL)
+			Free(dname);
+
+		return (metainitfastname(*spp, uname, ep));
+	}
+
+	/* allocate new list element and drive */
+	*tail = Zalloc(sizeof (**tail));
+	dnp = (*tail)->drivenamep = Zalloc(sizeof (*dnp));
+
+	metainitdrivename(dnp);
+
+	/* get parts info */
+	if (getparts(dnp, rname, dname, &nparts, &partno, ep) != 0)
+		goto out;
+
+	/*
+	 * libmeta needs at least V_NUMPAR partitions.
+	 * If we have an EFI partition with less than V_NUMPAR slices,
+	 * we nevertheless reserve space for V_NUMPAR
+	 */
+	if (nparts < V_NUMPAR) {
+		nparts = V_NUMPAR;
+	}
+
+	/* allocate and link in parts */
+	dnp->parts.parts_len = nparts;
+	dnp->parts.parts_val = Zalloc((sizeof (*dnp->parts.parts_val)) *
+	    dnp->parts.parts_len);
+	for (slice = 0; (slice < nparts); ++slice) {
+		np = &dnp->parts.parts_val[slice];
+		metainitname(np);
+		np->drivenamep = dnp;
+	}
+
+	/* setup name_t (or slice) wanted */
+	if ((np = setup_slice(*spp, dnp, uname, rname, dname, partno, ep))
+	    == NULL)
+		goto out;
+
+	/* canonical disk name */
+	if ((dnp->cname = metadiskname(np->cname)) == NULL)
+		dnp->cname = Strdup(np->cname);
+	if ((dnp->rname = metadiskname(np->rname)) == NULL)
+		dnp->rname = Strdup(np->rname);
+
+	/* cleanup, return success */
+	if (dname != NULL)
+		Free(dname);
+	Free(rname);
+	return (np);
+
+	/* cleanup, return error */
+out:
+	if (dname != NULL)
+		Free(dname);
+	if (rname != NULL)
+		Free(rname);
+
+	metafreedrivename(dnp);
+	Free(dnp);
+	Free(*tail);
+	*tail = NULL;
+	return (NULL);
+}
+
+mdname_t *
+metaname(
+	mdsetname_t	**spp,
+	char		*uname,
+	md_error_t	*ep
+)
+{
+	return (metaname_common(spp, uname, 0, ep));
+}
+
+mdname_t *
+metaname_fast(
+	mdsetname_t	**spp,
+	char		*uname,
+	md_error_t	*ep
+)
+{
+	return (metaname_common(spp, uname, 1, ep));
+}
+
+/*
+ * set up names for a drive
+ */
+mddrivename_t *
+metadrivename(
+	mdsetname_t	**spp,
+	char		*uname,
+	md_error_t	*ep
+)
+{
+	char		*slicename;
+	mdname_t	*np;
+
+	char		*cname;
+	mddrivenamelist_t **tail;
+	mddrivename_t	*dnp;
+	char		*dname;
+	int		i;
+	int		mplen;
+	size_t		len;
+
+	/* check setname, get comparison name */
+	assert(uname != NULL);
+	if ((cname = meta_name_getname(spp, uname, ep)) == NULL) {
+		(void) mdsyserror(ep, ENOENT, uname);
+		return (NULL);
+	}
+
+	assert(*spp != NULL);
+
+	if ((dname = metadiskname(cname)) == NULL) {
+		(void) mdsyserror(ep, ENOENT, cname);
+		Free(cname);
+		return (NULL);
+	}
+
+	/* look in cache first */
+	for (tail = &drivelistp; (*tail != NULL); tail = &(*tail)->next) {
+		dnp = (*tail)->drivenamep;
+		if ((dnp->cname != NULL &&
+		    (strcmp(dnp->cname, dname) == 0)) ||
+		    (dnp->rname != NULL &&
+		    (strcmp(dnp->rname, dname) == 0))) {
+			Free(cname);
+			Free(dname);
+			return (dnp);
+		}
+	}
+
+	/* Check each possible slice name based on MD_MAX_PARTS. */
+
+	/*
+	 * Figure out how much string space to reserve to fit
+	 * (MD_MAX_PARTS - 1) into the name string; the loop will
+	 * increment the mplen counter once for each decimal digit in
+	 * (MD_MAX_PARTS - 1).
+	 */
+	for (i = MD_MAX_PARTS - 1, mplen = 0; i; i /= 10, ++mplen);
+	len = strlen(uname) + mplen + 2;
+	slicename = Malloc(len);
+
+	/* Check for each slice in turn until we find one */
+	for (np = NULL, i = 0; ((np == NULL) && (i < MD_MAX_PARTS)); ++i) {
+		(void) snprintf(slicename, len, "%ss%d", uname, i);
+		np = metaname(spp, slicename, ep);
+	}
+	Free(slicename);
+
+	if (np == NULL) {
+		char	*dname;
+
+		if ((mdissyserror(ep, ENOENT)) &&
+		    ((dname = metadiskname(uname)) != NULL)) {
+			Free(dname);
+			(void) mderror(ep, MDE_NOT_DRIVENAME, uname);
+		}
+		return (NULL);
+	}
+	return (np->drivenamep);
+}
+
+/*
+ * FUNCTION:	metaslicename()
+ * INPUT:	dnp	- the drivename structure
+ *		sliceno	- the slice on the drive to return
+ * OUTPUT:	ep	- return error pointer
+ * RETURNS:	mdname_t- pointer the the slice name structure
+ * PURPOSE:	interface to the parts struct in the drive name struct
+ *		Since there is no guarantee that the slice name
+ *		structures are populated users should call this
+ *		function rather than accessing the structure directly
+ *		since it will populate the structure values if they
+ *		haven't already been populated before returning.
+ */
+mdname_t *
+metaslicename(
+	mddrivename_t	*dnp,
+	uint_t		sliceno,
+	md_error_t	*ep
+)
+{
+	mdsetname_t	*sp = NULL;
+	char		*namep = NULL;
+	mdname_t	*np;
+
+	assert(dnp->type != MDT_FAST_COMP && dnp->type != MDT_FAST_META);
+
+	if (sliceno >= dnp->parts.parts_len) {
+		(void) mderror(ep, MDE_NOSLICE, dnp->cname);
+		return (NULL);
+	}
+
+	np = &dnp->parts.parts_val[sliceno];
+
+	/* check to see if the struct is already populated */
+	if (np->cname) {
+		return (np);
+	}
+
+	if ((namep = meta_name_getname(&sp, dnp->cname, ep)) == NULL)
+		return (NULL);
+
+	np = setup_slice(sp, dnp, NULL, NULL, dnp->rname, sliceno, ep);
+
+	Free(namep);
+
+	return (np);
+}
+
+/*
+ * set up metadevice name from id
+ */
+mdname_t *
+metamnumname(
+	mdsetname_t	**spp,
+	minor_t		mnum,
+	int		fast,
+	md_error_t	*ep
+)
+{
+	set_t		setno = MD_MIN2SET(mnum);
+	mdsetname_t	*sp = NULL;
+	char		*uname;
+	mdname_t	*np;
+	size_t		len;
+
+	/* check set first */
+	if (spp == NULL)
+		spp = &sp;
+	if (chksetno(spp, setno, ep) != 0)
+		return (NULL);
+	assert(*spp != NULL);
+	sp = *spp;
+
+	/* build corresponding device name */
+	if (metaislocalset(sp)) {
+		uname = Malloc(20);
+		(void) sprintf(uname, "d%lu", MD_MIN2UNIT(mnum));
+	} else {
+		len = strlen(sp->setname) + 1 + 20;
+		uname = Malloc(len);
+		(void) snprintf(uname, len, "%s/d%lu", sp->setname,
+		    MD_MIN2UNIT(mnum));
+	}
+
+	/* setup name */
+	if (fast) {
+		np = metaname_fast(spp, uname, ep);
+		np->dev = metamakedev(mnum);
+	} else
+		np = metaname(spp, uname, ep);
+
+	Free(uname);
+	return (np);
+}
+
+/*
+ * return metadevice name
+ */
+char *
+get_mdname(
+	minor_t		mnum
+)
+{
+	mdname_t	*np;
+	md_error_t	status = mdnullerror;
+
+	/* get name */
+	if ((np = metamnumname(NULL, mnum, 0, &status)) == NULL) {
+		mdclrerror(&status);
+		return (NULL);
+	}
+	assert(meta_getminor(np->dev) == mnum);
+
+	/* return name */
+	return (np->cname);
+}
+
+/*
+ * check for device type
+ */
+int
+metaismeta(
+	mdname_t	*np
+)
+{
+	return (np->drivenamep->type == MDT_META ||
+		np->drivenamep->type == MDT_FAST_META);
+}
+
+int
+metachkmeta(
+	mdname_t	*np,
+	md_error_t	*ep
+)
+{
+	if (! metaismeta(np)) {
+		return (mddeverror(ep, MDE_NOT_META, np->dev,
+		    np->cname));
+	}
+	return (0);
+}
+
+int
+metachkdisk(
+	mdname_t	*np,
+	md_error_t	*ep
+)
+{
+	mddrivename_t	*dnp = np->drivenamep;
+
+	assert(dnp->type != MDT_FAST_COMP && dnp->type != MDT_FAST_META);
+
+	if ((! metaismeta(np)) && (dnp->type != MDT_COMP)) {
+		switch (dnp->type) {
+		    case MDT_ACCES:
+		    case MDT_UNKNOWN:
+			    return (mdsyserror(ep, dnp->errnum, np->bname));
+		    default:
+			    assert(0);
+			    return (mddeverror(ep, MDE_NOT_DISK, np->dev,
+				np->cname));
+		}
+	}
+	return (0);
+}
+
+int
+metachkcomp(
+	mdname_t	*np,
+	md_error_t	*ep
+)
+{
+	if (metaismeta(np)) {
+		return (mddeverror(ep, MDE_IS_META, np->dev,
+		    np->cname));
+	}
+	return (metachkdisk(np, ep));
+}
+
+/*
+ * free list of names
+ */
+void
+metafreenamelist(
+	mdnamelist_t	*nlp
+)
+{
+	mdnamelist_t	*next = NULL;
+
+	for (/* void */; (nlp != NULL); nlp = next) {
+		next = nlp->next;
+		Free(nlp);
+	}
+}
+
+/*
+ * build list of names
+ */
+int
+metanamelist(
+	mdsetname_t	**spp,
+	mdnamelist_t	**nlpp,
+	int		argc,
+	char		*argv[],
+	md_error_t	*ep
+)
+{
+	mdnamelist_t	**tailpp = nlpp;
+	int		count = 0;
+
+	for (*nlpp = NULL; (argc > 0); ++count, --argc, ++argv) {
+		mdnamelist_t	*nlp = Zalloc(sizeof (*nlp));
+
+		if ((nlp->namep = metaname(spp, argv[0], ep)) == NULL) {
+			metafreenamelist(*nlpp);
+			*nlpp = NULL;
+			return (-1);
+		}
+		*tailpp = nlp;
+		tailpp = &nlp->next;
+	}
+	return (count);
+}
+
+/*
+ * append to end of name list
+ */
+mdname_t *
+metanamelist_append(
+	mdnamelist_t	**nlpp,
+	mdname_t	*np
+)
+{
+	mdnamelist_t	*nlp;
+
+	/* run to end of list */
+	for (; (*nlpp != NULL); nlpp = &(*nlpp)->next)
+		;
+
+	/* allocate new list element */
+	nlp = *nlpp = Zalloc(sizeof (*nlp));
+
+	/* append name */
+	nlp->namep = np;
+	return (np);
+}
+
+/*
+ * FUNCTION:	meta_namelist_append_wrapper()
+ * INPUT:	tailpp	- pointer to the list tail pointer
+ *		np	- name node to be appended to list
+ * OUTPUT:	none
+ * RETURNS:	mdnamelist_t * - new tail of the list.
+ * PURPOSE:	wrapper to meta_namelist_append for performance.
+ *		metanamelist_append finds the tail each time which slows
+ *		down long lists.  By keeping track of the tail ourselves
+ *		we can change metanamelist_append into a constant time
+ *		operation.
+ */
+mdnamelist_t **
+meta_namelist_append_wrapper(
+	mdnamelist_t	**tailpp,
+	mdname_t	*np
+)
+{
+	(void) metanamelist_append(tailpp, np);
+
+	/* If it's the first item in the list, return it instead of the next */
+	if ((*tailpp)->next == NULL)
+		return (tailpp);
+
+	return (&(*tailpp)->next);
+}
+
+
+/*
+ *	mdhspname_t stuff
+ */
+
+/*
+ * initialize hspname
+ */
+static void
+metainithspname(
+	mdhspname_t	*hspnamep
+)
+{
+	(void) memset(hspnamep, '\0', sizeof (*hspnamep));
+	hspnamep->hsp = MD_HSP_NONE;
+}
+
+/*
+ * free allocated hspname
+ */
+static void
+metafreehspname(
+	mdhspname_t	*hspnamep
+)
+{
+	if (hspnamep->hspname != NULL)
+		Free(hspnamep->hspname);
+	if (hspnamep->unitp != NULL)
+		meta_invalidate_hsp(hspnamep);
+	metainithspname(hspnamep);
+}
+
+/*
+ * clear the hspname cache
+ */
+static void
+metaflushhspnames()
+{
+	mdhspnamelist_t		*p, *n;
+
+	for (p = hsplistp, n = NULL; (p != NULL); p = n) {
+		n = p->next;
+		metafreehspname(p->hspnamep);
+		Free(p->hspnamep);
+		Free(p);
+	}
+	hsplistp = NULL;
+}
+
+/*
+ * check set and get comparison name
+ */
+static char *
+gethspname(
+	mdsetname_t	**spp,
+	char		*uname,
+	hsp_t		*hspp,
+	md_error_t	*ep
+)
+{
+	char		*sname = NULL;
+
+	/* check setname */
+	assert(uname != NULL);
+	if (parse_hsp(uname, &sname, hspp) != 0) {
+		(void) mdsyserror(ep, ENOENT, uname);
+		return (NULL);
+	}
+	if (chksetname(spp, sname, ep) != 0) {
+		if (sname != NULL)
+			Free(sname);
+		return (NULL);
+	}
+	if (sname != NULL)
+		Free(sname);
+
+	/* return comparison name */
+	return (canon_hsp((*spp)->setname, *hspp));
+}
+
+/*
+ * set up names for a hotspare pool
+ */
+mdhspname_t *
+metahspname(
+	mdsetname_t	**spp,
+	char		*uname,
+	md_error_t	*ep
+)
+{
+	char		*cname;
+	hsp_t		hsp;
+	mdhspnamelist_t	**tail;
+	mdhspname_t	*hspnp;
+
+	/* check setname */
+	assert(uname != NULL);
+	if ((cname = gethspname(spp, uname, &hsp, ep)) == NULL)
+		return (NULL);
+	assert(*spp != NULL);
+
+	/* look in cache first */
+	for (tail = &hsplistp; (*tail != NULL); tail = &(*tail)->next) {
+		hspnp = (*tail)->hspnamep;
+		if (strcmp(hspnp->hspname, cname) == 0) {
+			Free(cname);
+			return (hspnp);
+		}
+	}
+
+	/* allocate new list element and hspname */
+	*tail = Zalloc(sizeof (**tail));
+	hspnp = (*tail)->hspnamep = Zalloc(sizeof (*hspnp));
+	metainithspname(hspnp);
+
+	/* save hspname and number */
+	hspnp->hspname = cname;
+	hspnp->hsp = MAKE_HSP_ID((*spp)->setno, hsp);
+
+	/* success */
+	return (hspnp);
+
+	/* cleanup, return error */
+out:
+	metafreehspname(hspnp);
+	Free(hspnp);
+	Free(*tail);
+	*tail = NULL;
+	return (NULL);
+
+
+}
+
+/*
+ * set up hotspare pool name from id
+ */
+mdhspname_t *
+metahsphspname(
+	mdsetname_t	**spp,
+	hsp_t		hsp,
+	md_error_t	*ep
+)
+{
+	set_t		setno = HSP_SET(hsp);
+	mdsetname_t	*sp = NULL;
+	char		*uname;
+	mdhspname_t	*hspnp;
+	size_t		len;
+
+	/* check set first */
+	if (spp == NULL)
+		spp = &sp;
+	if (chksetno(spp, setno, ep) != 0)
+		return (NULL);
+	assert(*spp != NULL);
+	sp = *spp;
+
+	/* build corresponding hotspare pool name */
+	if (metaislocalset(sp)) {
+		uname = Malloc(20);
+		(void) sprintf(uname, "hsp%03u", HSP_ID(hsp));
+	} else {
+		len = strlen(sp->setname) + 1 + 20;
+		uname = Malloc(len);
+		(void) snprintf(uname, len, "%s/hsp%03lu", sp->setname,
+		    HSP_ID(hsp));
+	}
+
+	/* setup name */
+	hspnp = metahspname(spp, uname, ep);
+	Free(uname);
+	return (hspnp);
+}
+
+/*
+ * return hotspare pool name
+ */
+char *
+get_hspname(hsp_t hsp)
+{
+	mdhspname_t	*hspnp;
+	md_error_t	status = mdnullerror;
+
+	/* get name */
+	if ((hspnp = metahsphspname(NULL, hsp, &status)) == NULL) {
+		mdclrerror(&status);
+		return (NULL);
+	}
+
+	/* return name */
+	return (hspnp->hspname);
+}
+
+/*
+ * free hotspare pool list
+ */
+void
+metafreehspnamelist(mdhspnamelist_t *hspnlp)
+{
+	mdhspnamelist_t	*next = NULL;
+
+	for (/* void */; (hspnlp != NULL); hspnlp = next) {
+		next = hspnlp->next;
+		Free(hspnlp);
+	}
+}
+
+/*
+ * build list of hotspare pool names
+ */
+int
+metahspnamelist(
+	mdsetname_t	**spp,
+	mdhspnamelist_t	**hspnlpp,
+	int		argc,
+	char		*argv[],
+	md_error_t	*ep
+)
+{
+	mdhspnamelist_t	**tailpp = hspnlpp;
+	int		count = 0;
+
+	for (*hspnlpp = NULL; (argc > 0); ++count, --argc, ++argv) {
+		mdhspnamelist_t	*hspnlp = Zalloc(sizeof (*hspnlp));
+
+		if ((hspnlp->hspnamep = metahspname(spp, argv[0],
+		    ep)) == NULL) {
+			metafreehspnamelist(*hspnlpp);
+			*hspnlpp = NULL;
+			return (-1);
+		}
+		*tailpp = hspnlp;
+		tailpp = &hspnlp->next;
+	}
+	return (count);
+}
+
+/*
+ * append to end of hotspare pool list
+ */
+mdhspname_t *
+metahspnamelist_append(mdhspnamelist_t **hspnlpp, mdhspname_t *hspnp)
+{
+	mdhspnamelist_t	*hspnlp;
+
+	/* run to end of list */
+	for (; (*hspnlpp != NULL); hspnlpp = &(*hspnlpp)->next)
+		;
+
+	/* allocate new list element */
+	hspnlp = *hspnlpp = Zalloc(sizeof (*hspnlp));
+
+	/* append hotspare pool name */
+	hspnlp->hspnamep = hspnp;
+	return (hspnp);
+}
+
+/*
+ * get name from dev
+ */
+mdname_t *
+metadevname(
+	mdsetname_t **spp,
+	md_dev64_t dev,
+	md_error_t *ep)
+{
+	char		*device_name;
+	mdname_t	*namep;
+	mdkey_t		key;
+
+	/* short circuit metadevices */
+	assert(dev != NODEV64);
+	if (meta_dev_ismeta(dev))
+		return (metamnumname(spp, meta_getminor(dev), 0, ep));
+
+	/* create local set, if necessary */
+	if (*spp == NULL) {
+		if ((*spp = metasetname(MD_LOCAL_NAME, ep)) == NULL)
+			return (NULL);
+	}
+
+	/* get name from namespace */
+	if ((device_name = meta_getnmentbydev((*spp)->setno, MD_SIDEWILD,
+	    dev, NULL, NULL, &key, ep)) == NULL) {
+		return (NULL);
+	}
+	namep = metaname_fast(spp, device_name, ep);
+	if (namep != NULL)
+		namep->key = key;
+
+	Free(device_name);
+	return (namep);
+}
+
+/*
+ * return cached name from md_dev64_t
+ */
+static char *
+metadevtocachename(md_dev64_t dev)
+{
+	mddrivenamelist_t	*dnlp;
+
+	/* look in cache */
+	for (dnlp = drivelistp; (dnlp != NULL); dnlp = dnlp->next) {
+		mddrivename_t	*dnp = dnlp->drivenamep;
+		uint_t		i;
+
+		for (i = 0; (i < dnp->parts.parts_len); ++i) {
+			mdname_t	*np = &dnp->parts.parts_val[i];
+
+			if (np->dev == dev)
+				return (np->cname);
+		}
+	}
+
+	/* not found */
+	return (NULL);
+}
+
+/*
+ * Ask the driver for the name, which has been stored in the
+ * metadevice state database (on behalf of the utilities).
+ * (by devno)
+ */
+char *
+get_devname(
+	set_t setno,
+	md_dev64_t dev)
+{
+	mdsetname_t	*sp;
+	mdname_t	*np;
+	md_error_t	status = mdnullerror;
+
+	/* get name */
+	if ((setno == MD_SET_BAD) ||
+	    ((sp = metasetnosetname(setno, &status)) == NULL) ||
+	    ((np = metadevname(&sp, dev, &status)) == NULL)) {
+		mdclrerror(&status);
+		return (metadevtocachename(dev));
+	}
+
+	/* return name */
+	return (np->cname);
+}
+
+/*
+ * get name from key
+ */
+mdname_t *
+metakeyname(
+	mdsetname_t	**spp,
+	mdkey_t		key,
+	int		fast,
+	md_error_t	*ep
+)
+{
+	char		*device_name;
+	md_dev64_t	dev = NODEV64;
+	mdname_t	*namep;
+
+	/* create local set, if necessary */
+	if (*spp == NULL) {
+		if ((*spp = metasetname(MD_LOCAL_NAME, ep)) == NULL)
+			return (NULL);
+	}
+
+	/* get name from namespace */
+	if ((device_name = meta_getnmentbykey((*spp)->setno, MD_SIDEWILD,
+	    key, NULL, NULL, &dev, ep)) == NULL) {
+		return (NULL);
+	}
+	if (fast)
+		namep = metaname_fast(spp, device_name, ep);
+	else
+		namep = metaname(spp, device_name, ep);
+
+	assert(dev != NODEV64);
+	if (namep)
+		namep->dev = dev;
+	Free(device_name);
+	return (namep);
+}
+
+/*
+ * completely flush the caches
+ */
+void
+metaflushnames(int flush_sr_cache)
+{
+	metaflushhspnames();
+	metaflushdrivenames();
+	metaflushsetnames();
+	metaflushctlrcache();
+	metaflushfastnames();
+	metaflushstatcache();
+	if (flush_sr_cache)
+		sr_cache_flush(0);
+}
+
+/*
+ * meta_get_hotspare_names
+ *  returns an mdnamelist_t of hot spare names
+ */
+
+int
+meta_get_hotspare_names(
+	mdsetname_t	*sp,
+	mdnamelist_t	**nlpp,
+	int		options,
+	md_error_t	*ep
+)
+{
+	mdhspnamelist_t		*hspnlp	= NULL;
+	mdhspnamelist_t		*hspp;
+	int			cnt = 0;
+
+	assert(nlpp != NULL);
+
+	/* get hotspare names */
+	if (meta_get_hsp_names(sp, &hspnlp, options, ep) < 0) {
+		cnt = -1;
+		goto out;
+	}
+
+	/* build name list */
+	for (hspp = hspnlp; (hspp != NULL); hspp = hspp->next) {
+		md_hsp_t	*hsp;
+		int		i;
+
+		if ((hsp = meta_get_hsp(sp, hspp->hspnamep, ep)) == NULL) {
+			cnt = -1;
+			goto out;
+		}
+		for (i = 0; (i < hsp->hotspares.hotspares_len); i++) {
+			md_hs_t	*hs = &hsp->hotspares.hotspares_val[i];
+
+			(void) metanamelist_append(nlpp, hs->hsnamep);
+			++cnt;
+		}
+	}
+
+	/* cleanup and return count or error */
+out:
+	metafreehspnamelist(hspnlp);
+	if ((cnt == -1) && mdisok(ep)) {
+		/*
+		 * At least try to give some sort of meaningful error
+		 */
+		(void) mderror(ep, MDE_NO_HSPS, "Generic Hotspare Error");
+	}
+
+	return (cnt);
+}
+/*
+ * meta_create_non_dup_list
+ *    INPUT: mdnp mdname_t pointer to add to the list if a new name
+ *           ldevidp list of non-duplicate names.
+ *    OUTPUT: ldevidp list of non-duplicate names.
+ * meta_create_non_dup_list will take a mdname_t pointer and if the device
+ *    is not in the list (ldevidp) will add it to the list.
+ *    User needs to free allocated memory.
+ */
+void
+meta_create_non_dup_list(
+	mdname_t	*mdnp,
+	mddevid_t	**ldevidpp
+)
+{
+	char		*lcname;
+	mddevid_t	*tmp;
+	mddevid_t	*lastdevidp;
+	mddevid_t	*lldevidp;
+	char		*ctd, *slice;
+	mddevid_t	*ldevidp;
+
+	if (mdnp == NULL)
+		return;
+
+	ldevidp = *ldevidpp;
+	/*
+	 * Grab the name of the device and strip off slice information
+	 */
+	lcname = Strdup(mdnp->cname);
+	if (lcname == NULL) {
+		return;
+	}
+	ctd = strrchr(lcname, '/');
+	if (ctd != NULL)
+		slice = strrchr(ctd, 's');
+	else
+		slice = strrchr(lcname, 's');
+
+	if (slice != NULL)
+		*slice = '\0';
+
+	if (ldevidp == NULL) {
+		/* first item in list */
+		ldevidp = Zalloc(sizeof (mddevid_t));
+		ldevidp->ctdname = lcname;
+		ldevidp->key = mdnp->key;
+		*ldevidpp = ldevidp;
+	} else {
+		for (tmp = ldevidp; (tmp != NULL); tmp = tmp->next) {
+			if (strcmp(tmp->ctdname, lcname) == 0) {
+				/* already there so just return */
+				Free(lcname);
+				return;
+			}
+			lastdevidp = tmp;
+		}
+		lldevidp = Zalloc(sizeof (mddevid_t));
+		lldevidp->ctdname = lcname;
+		lldevidp->key = mdnp->key;
+		lastdevidp->next = lldevidp;
+	}
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_nameinfo.c b/usr/src/lib/lvm/libmeta/common/meta_nameinfo.c
new file mode 100644
index 0000000000..337b48f98e
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_nameinfo.c
@@ -0,0 +1,1267 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include <dlfcn.h>
+#include <meta.h>
+#include <metadyn.h>
+#include <ctype.h>
+#include <dirent.h>
+#include <devid.h>
+#include <sys/param.h>
+#include <sys/scsi/impl/uscsi.h>
+#include <sys/scsi/generic/commands.h>
+#include <sys/scsi/generic/inquiry.h>
+#include <sys/efi_partition.h>
+
+#define	MD_EFI_FG_HEADS		128
+#define	MD_EFI_FG_SECTORS	256
+#define	MD_EFI_FG_RPM		7200
+#define	MD_EFI_FG_WRI		1
+#define	MD_EFI_FG_RRI		1
+
+
+typedef struct ctlr_cache {
+	char			*ctlr_nm;
+	int			ctlr_ty;
+	struct	ctlr_cache	*ctlr_nx;
+} ctlr_cache_t;
+
+static	ctlr_cache_t	*ctlr_cache = NULL;
+
+
+/*
+ * return set for a device
+ */
+mdsetname_t *
+metagetset(
+	mdname_t	*np,
+	int		bypass_daemon,
+	md_error_t	*ep
+)
+{
+	mdsetname_t	*sp;
+
+	/* metadevice */
+	if (metaismeta(np))
+		return (metasetnosetname(MD_MIN2SET(meta_getminor(np->dev)),
+						ep));
+
+	/* regular device */
+	if (meta_is_drive_in_anyset(np->drivenamep, &sp, bypass_daemon,
+	    ep) != 0)
+		return (NULL);
+
+	if (sp != NULL)
+		return (sp);
+
+	return (metasetnosetname(MD_LOCAL_SET, ep));
+}
+
+/*
+ * convert system to md types
+ */
+static void
+meta_geom_to_md(
+	struct dk_geom	*gp,
+	mdgeom_t	*mdgp
+)
+{
+	(void) memset(mdgp, '\0', sizeof (*mdgp));
+	mdgp->ncyl = gp->dkg_ncyl;
+	mdgp->nhead = gp->dkg_nhead;
+	mdgp->nsect = gp->dkg_nsect;
+	mdgp->rpm = gp->dkg_rpm;
+	mdgp->write_reinstruct = gp->dkg_write_reinstruct;
+	mdgp->read_reinstruct = gp->dkg_read_reinstruct;
+	mdgp->blk_sz = DEV_BSIZE;
+}
+
+/*
+ * convert efi to md types
+ */
+static void
+meta_efi_to_mdgeom(struct dk_gpt *gpt, mdgeom_t	*mdgp)
+{
+	(void) memset(mdgp, '\0', sizeof (*mdgp));
+	mdgp->ncyl = (gpt->efi_last_u_lba - gpt->efi_first_u_lba) /
+					(MD_EFI_FG_HEADS * MD_EFI_FG_SECTORS);
+	mdgp->nhead = MD_EFI_FG_HEADS;
+	mdgp->nsect = MD_EFI_FG_SECTORS;
+	mdgp->rpm = MD_EFI_FG_RPM;
+	mdgp->write_reinstruct = MD_EFI_FG_WRI;
+	mdgp->read_reinstruct = MD_EFI_FG_RRI;
+	mdgp->blk_sz = DEV_BSIZE;
+}
+
+static void
+meta_efi_to_mdvtoc(struct dk_gpt *gpt, mdvtoc_t *mdvp)
+{
+	char		typename[EFI_PART_NAME_LEN];
+	uint_t		i;
+
+	(void) memset(mdvp, '\0', sizeof (*mdvp));
+	mdvp->nparts = gpt->efi_nparts;
+	if (mdvp->nparts > MD_MAX_PARTS)
+		return;
+
+	mdvp->first_lba = gpt->efi_first_u_lba;
+	mdvp->last_lba = gpt->efi_last_u_lba;
+	mdvp->lbasize = gpt->efi_lbasize;
+
+	for (i = 0; (i < gpt->efi_nparts); ++i) {
+		mdvp->parts[i].start = gpt->efi_parts[i].p_start;
+		mdvp->parts[i].size = gpt->efi_parts[i].p_size;
+		mdvp->parts[i].tag = gpt->efi_parts[i].p_tag;
+		mdvp->parts[i].flag = gpt->efi_parts[i].p_flag;
+		/*
+		 * Due to the lack of a label for the entire partition table,
+		 * we use p_name of the reserved partition
+		 */
+		if ((gpt->efi_parts[i].p_tag == V_RESERVED) &&
+		    (gpt->efi_parts[i].p_name != NULL)) {
+			(void) strlcpy(typename, gpt->efi_parts[i].p_name,
+					EFI_PART_NAME_LEN);
+			/* Stop at first (if any) space or tab */
+			(void) strtok(typename, " \t");
+			mdvp->typename = Strdup(typename);
+		}
+	}
+}
+
+static void
+meta_mdvtoc_to_efi(mdvtoc_t *mdvp, struct dk_gpt **gpt)
+{
+	char		typename[EFI_PART_NAME_LEN];
+	uint_t		i;
+	uint_t		lastpart;
+	size_t		size;
+
+	/* first we count how many partitions we have to send */
+	for (i = 0; i < MD_MAX_PARTS; i++) {
+		if ((mdvp->parts[i].start == 0) &&
+		    (mdvp->parts[i].size == 0) &&
+		    (mdvp->parts[i].tag != V_RESERVED)) {
+			continue;
+		}
+		/* if we are here, we know the partition is really used */
+		lastpart = i;
+	}
+	size = sizeof (struct dk_gpt) + (sizeof (struct dk_part) * lastpart);
+	*gpt = calloc(size, sizeof (char));
+
+	(*gpt)->efi_nparts = lastpart + 1;
+	(*gpt)->efi_first_u_lba = mdvp->first_lba;
+	(*gpt)->efi_last_u_lba = mdvp->last_lba;
+	(*gpt)->efi_lbasize = mdvp->lbasize;
+	for (i = 0; (i < (*gpt)->efi_nparts); ++i) {
+		(*gpt)->efi_parts[i].p_start = mdvp->parts[i].start;
+		(*gpt)->efi_parts[i].p_size = mdvp->parts[i].size;
+		(*gpt)->efi_parts[i].p_tag = mdvp->parts[i].tag;
+		(*gpt)->efi_parts[i].p_flag = mdvp->parts[i].flag;
+		/*
+		 * Due to the lack of a label for the entire partition table,
+		 * we use p_name of the reserved partition
+		 */
+		if (((*gpt)->efi_parts[i].p_tag == V_RESERVED) &&
+			(mdvp->typename != NULL)) {
+			(void) strlcpy((*gpt)->efi_parts[i].p_name, typename,
+				EFI_PART_NAME_LEN);
+		}
+	}
+}
+
+
+void
+ctlr_cache_add(char *nm, int ty)
+{
+	ctlr_cache_t	**ccpp;
+
+	for (ccpp = &ctlr_cache; *ccpp != NULL; ccpp = &(*ccpp)->ctlr_nx)
+		if (strcmp((*ccpp)->ctlr_nm, nm) == 0)
+			return;
+
+	*ccpp = Zalloc(sizeof (ctlr_cache_t));
+	(*ccpp)->ctlr_nm = Strdup(nm);
+	(*ccpp)->ctlr_ty = ty;
+}
+
+int
+ctlr_cache_look(char *nm)
+{
+	ctlr_cache_t	*tcp;
+
+	for (tcp = ctlr_cache; tcp != NULL; tcp = tcp->ctlr_nx)
+		if (strcmp(tcp->ctlr_nm, nm) == 0)
+			return (tcp->ctlr_ty);
+
+	return (-1);
+}
+
+
+void
+metaflushctlrcache(void)
+{
+	ctlr_cache_t	*cp, *np;
+
+	for (cp = ctlr_cache, np = NULL; cp != NULL; cp = np) {
+		np = cp->ctlr_nx;
+		Free(cp->ctlr_nm);
+		Free(cp);
+	}
+	ctlr_cache = NULL;
+}
+
+/*
+ * getdrvnode -- return the driver name based on mdname_t->bname
+ *	Need to free pointer when finished.
+ */
+char *
+getdrvnode(mdname_t *np, md_error_t *ep)
+{
+	char	*devicespath,
+		*drvnode,
+		*cp;
+
+	if ((devicespath = metagetdevicesname(np, ep)) == NULL)
+		return (NULL);
+
+	/*
+	 * At this point devicespath should be like the following
+	 * "/devices/<unknow_and_dont_care>/xxxx@vvvv"
+	 *
+	 * There's a couple of 'if' statements below which could
+	 * return an error condition, but I've decide to allow
+	 * a more open approach regarding the mapping so as to
+	 * not restrict possible future projects.
+	 */
+	if (drvnode = strrchr(devicespath, '/'))
+		/*
+		 * drvnode now just "xxxx@vvvv"
+		 */
+		drvnode++;
+
+	if (cp = strrchr(drvnode, '@'))
+		/*
+		 * Now drvnode is just the driver name "xxxx"
+		 */
+		*cp = '\0';
+
+	cp = Strdup(drvnode);
+	Free(devicespath);
+	np->devicesname = NULL;
+
+	return (cp);
+}
+
+/*
+ * meta_load_dl -- open dynamic library using LDLIBRARYPATH, a debug
+ *    environment variable METALDPATH, or the default location.
+ */
+static void *
+meta_load_dl(mdname_t *np, md_error_t *ep)
+{
+	char	*drvnode,
+		newpath[MAXPATHLEN],
+		*p;
+	void	*cookie;
+
+	if ((drvnode = getdrvnode(np, ep)) != NULL) {
+
+		/*
+		 * Library seach algorithm:
+		 * 1) Use LDLIBRARYPATH which is implied when a non-absolute
+		 *    path name is passed to dlopen()
+		 * 2) Use the value of METALDPATH as the directory. Mainly
+		 *    used for debugging
+		 * 3) Last search the default location of "/usr/lib"
+		 */
+		(void) snprintf(newpath, sizeof (newpath), "lib%s.so.1",
+		    drvnode);
+		if ((cookie = dlopen(newpath, RTLD_LAZY)) == NULL) {
+			if ((p = getenv("METALDPATH")) == NULL)
+				p = METALDPATH_DEFAULT;
+			(void) snprintf(newpath, sizeof (newpath),
+			    "%s/lib%s.so.1", p, drvnode);
+			Free(drvnode);
+			if ((cookie = dlopen(newpath, RTLD_LAZY)) != NULL) {
+				/*
+				 * Common failure here would be failing to
+				 * find a libXX.so.1 such as libsd.so.1
+				 * Some controllers will not have a library
+				 * because there's no enclosure or name
+				 * translation required.
+				 */
+				return (cookie);
+			}
+		} else {
+			Free(drvnode);
+			return (cookie);
+		}
+	}
+	return (NULL);
+}
+
+/*
+ * meta_match_names -- possibly convert the driver names returned by CINFO
+ */
+static void
+meta_match_names(mdname_t *np, struct dk_cinfo *cp, mdcinfo_t *mdcp,
+    md_error_t *ep)
+{
+	void		*cookie;
+	meta_convert_e	((*fptr)(mdname_t *, struct dk_cinfo *, mdcinfo_t *,
+			    md_error_t *));
+
+	if ((cookie = meta_load_dl(np, ep)) != NULL) {
+		fptr = (meta_convert_e (*)(mdname_t *, struct dk_cinfo *,
+		    mdcinfo_t *, md_error_t *))dlsym(cookie, "convert_path");
+		if (fptr != NULL)
+			(void) (*fptr)(np, cp, mdcp, ep);
+		(void) dlclose(cookie);
+	}
+}
+
+/*
+ * meta_match_enclosure -- return any enclosure info if found
+ */
+int
+meta_match_enclosure(mdname_t *np, mdcinfo_t *mdcp, md_error_t *ep)
+{
+	meta_enclosure_e	e,
+				((*fptr)(mdname_t *, mdcinfo_t *,
+				    md_error_t *));
+	void			*cookie;
+
+	if ((cookie = meta_load_dl(np, ep)) != NULL) {
+		fptr = (meta_enclosure_e (*)(mdname_t *, mdcinfo_t *,
+		    md_error_t *))dlsym(cookie, "get_enclosure");
+		if (fptr != NULL) {
+			e = (*fptr)(np, mdcp, ep);
+			switch (e) {
+			case Enclosure_Error:
+				/*
+				 * Looks like this library wanted to handle
+				 * our device and had an internal error.
+				 */
+				return (1);
+
+			case Enclosure_Okay:
+				/*
+				 * Found a library to handle the request so
+				 * just return with data provided.
+				 */
+				return (0);
+
+			case Enclosure_Noop:
+				/*
+				 * Need to continue the search
+				 */
+				break;
+			}
+		}
+		(void) dlclose(cookie);
+	}
+	return (0);
+}
+
+static int
+meta_cinfo_to_md(mdname_t *np, struct dk_cinfo *cp, mdcinfo_t *mdcp,
+    md_error_t *ep)
+{
+	/* default */
+	(void) memset(mdcp, '\0', sizeof (*mdcp));
+	(void) strncpy(mdcp->cname, cp->dki_cname,
+	    min((sizeof (mdcp->cname) - 1), sizeof (cp->dki_cname)));
+	mdcp->ctype = MHD_CTLR_GENERIC;
+	mdcp->cnum = cp->dki_cnum;
+	(void) strncpy(mdcp->dname, cp->dki_dname,
+	    min((sizeof (mdcp->dname) - 1), sizeof (cp->dki_dname)));
+	mdcp->unit = cp->dki_unit;
+	mdcp->maxtransfer = cp->dki_maxtransfer;
+
+	/*
+	 * See if the driver name returned from DKIOCINFO
+	 * is valid or not. In somecases, such as the ap_dmd
+	 * driver, we need to modify the name that's return
+	 * for everything to work.
+	 */
+	meta_match_names(np, cp, mdcp, ep);
+
+	if (meta_match_enclosure(np, mdcp, ep))
+		return (-1);
+
+	/* return success */
+	return (0);
+}
+
+static void
+meta_vtoc_to_md(
+	struct vtoc	*vp,
+	mdvtoc_t	*mdvp
+)
+{
+	char		typename[sizeof (vp->v_asciilabel) + 1];
+	uint_t		i;
+
+	(void) memset(mdvp, '\0', sizeof (*mdvp));
+	(void) strncpy(typename, vp->v_asciilabel,
+	    sizeof (vp->v_asciilabel));
+	typename[sizeof (typename) - 1] = '\0';
+	for (i = 0; ((i < sizeof (typename)) && (typename[i] != '\0')); ++i) {
+		if ((typename[i] == ' ') || (typename[i] == '\t')) {
+			typename[i] = '\0';
+			break;
+		}
+	}
+	mdvp->typename = Strdup(typename);
+	mdvp->nparts = vp->v_nparts;
+	for (i = 0; (i < vp->v_nparts); ++i) {
+		mdvp->parts[i].start = vp->v_part[i].p_start;
+		mdvp->parts[i].size = vp->v_part[i].p_size;
+		mdvp->parts[i].tag = vp->v_part[i].p_tag;
+		mdvp->parts[i].flag = vp->v_part[i].p_flag;
+		if (vp->v_part[i].p_start == 0 && vp->v_part[i].p_size > 0)
+			mdvp->parts[i].label = btodb(DK_LABEL_SIZE);
+	}
+}
+
+/*
+ * free allocations in vtoc
+ */
+void
+metafreevtoc(
+	mdvtoc_t	*vtocp
+)
+{
+	if (vtocp->typename != NULL)
+		Free(vtocp->typename);
+	(void) memset(vtocp, 0, sizeof (*vtocp));
+}
+
+/*
+ * return md types
+ */
+mdvtoc_t *
+metagetvtoc(
+	mdname_t	*np,	/* only rname, drivenamep, are setup */
+	int		nocache,
+	uint_t		*partnop,
+	md_error_t	*ep
+)
+{
+	mddrivename_t	*dnp = np->drivenamep;
+	struct dk_geom	geom;
+	char		*minor_name = NULL;
+	char		*rname = np->rname;
+	int		fd;
+	int		partno;
+	int		err = 0;	    /* saves errno from ioctl */
+	ddi_devid_t	devid;
+	char		*p;
+
+	/* short circuit */
+	if ((! nocache) && (dnp->vtoc.nparts != 0)) {
+		if (partnop != NULL) {
+			/*
+			 * the following assigment works because the
+			 * mdname_t structs are always created as part
+			 * of the drivenamep struct.  When a user
+			 * creates an mdname_t struct it either
+			 * uses an existing drivenamep struct or creates
+			 * a new one and then adds the mdname_t struct
+			 * as part of its parts_val array.  So what is
+			 * being computed below is the slice offset in
+			 * the parts_val array.
+			 */
+			*partnop = np - np->drivenamep->parts.parts_val;
+			assert(*partnop < dnp->parts.parts_len);
+		}
+		return (&dnp->vtoc);
+	}
+
+	/* can't get vtoc */
+	if (! nocache) {
+		switch (dnp->type) {
+		case MDT_ACCES:
+		case MDT_UNKNOWN:
+			(void) mdsyserror(ep, dnp->errnum, rname);
+			return (NULL);
+		}
+	}
+
+	/* get all the info */
+	if ((fd = open(rname, (O_RDONLY|O_NDELAY), 0)) < 0) {
+		(void) mdsyserror(ep, errno, rname);
+		return (NULL);
+	}
+
+	/*
+	 * The disk is open so this is a good point to get the devid
+	 * otherwise it will need to be done at another time which
+	 * means reopening it.
+	 */
+	if (devid_get(fd, &devid) != 0) {
+		/* there is no devid for the disk */
+		if (((p = getenv("MD_DEBUG")) != NULL) &&
+		    (strstr(p, "DEVID") != NULL)) {
+			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
+			    "%s has no device id\n"), np->rname);
+		}
+		np->minor_name = (char *)NULL;
+		dnp->devid = NULL;
+	} else {
+		(void) devid_get_minor_name(fd, &minor_name);
+		/*
+		 * The minor name could be NULL if the underlying
+		 * device driver does not support 'minor names'.
+		 * This means we do not use devid's for this device.
+		 * SunCluster did driver does not support minor names.
+		 */
+		if (minor_name != NULL) {
+			np->minor_name = Strdup(minor_name);
+			devid_str_free(minor_name);
+			dnp->devid = devid_str_encode(devid, NULL);
+		} else {
+			np->minor_name = (char *)NULL;
+			dnp->devid = NULL;
+
+			if (((p = getenv("MD_DEBUG")) != NULL) &&
+			    (strstr(p, "DEVID") != NULL)) {
+				(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
+				    "%s no minor name (no devid)\n"),
+				    np->rname);
+			}
+		}
+		devid_free(devid);
+	}
+
+	/*
+	 * if our drivenamep points to a device not supporting DKIOCGGEOM,
+	 * it's likely to have an EFI label.
+	 */
+	(void) memset(&geom, 0, sizeof (geom));
+	if (ioctl(fd, DKIOCGGEOM, &geom) != 0) {
+		err = errno;
+		if (err == ENOTTY) {
+			(void) mddeverror(ep, MDE_NOT_DISK, NODEV, rname);
+			(void) close(fd);
+			return (NULL);
+		} else if (err != ENOTSUP) {
+			(void) mdsyserror(ep, err, rname);
+			(void) close(fd);
+			return (NULL);
+		}
+
+	}
+	/*
+	 * If we are here, there was either no failure on DKIOCGGEOM or
+	 * the failure was ENOTSUP
+	 */
+	if (err == ENOTSUP) {
+		/* DKIOCGGEOM yielded ENOTSUP => try efi_alloc_and_read */
+		struct dk_gpt	*gpt;
+		int		save_errno;
+
+		/* this also sets errno */
+		partno = efi_alloc_and_read(fd, &gpt);
+		save_errno = errno;
+		(void) close(fd);
+		if (partno < 0) {
+			efi_free(gpt);
+			(void) mdsyserror(ep, save_errno, rname);
+			return (NULL);
+		}
+		if (partno >= gpt->efi_nparts) {
+			efi_free(gpt);
+			(void) mddeverror(ep, MDE_INVALID_PART, NODEV64,
+						rname);
+			return (NULL);
+		}
+
+		/* convert to our format */
+		metafreevtoc(&dnp->vtoc);
+		meta_efi_to_mdvtoc(gpt, &dnp->vtoc);
+		if (dnp->vtoc.nparts > MD_MAX_PARTS) {
+			(void) mddeverror(ep, MDE_TOO_MANY_PARTS, NODEV64,
+			    rname);
+			return (NULL);
+		}
+		/*
+		 * libmeta needs at least V_NUMPAR partitions.
+		 * If we have an EFI partition with less than V_NUMPAR slices,
+		 * we nevertheless reserve space for V_NUMPAR
+		 */
+
+		if (dnp->vtoc.nparts < V_NUMPAR) {
+			dnp->vtoc.nparts = V_NUMPAR;
+		}
+		meta_efi_to_mdgeom(gpt, &dnp->geom);
+		efi_free(gpt);
+	} else {
+		/* no error on DKIOCGGEOM, try meta_getvtoc */
+		struct vtoc	vtoc;
+
+		if (meta_getvtoc(fd, np->cname, &vtoc, &partno, ep) < 0) {
+			(void) close(fd);
+			return (NULL);
+		}
+		(void) close(fd);
+
+		/* convert to our format */
+		meta_geom_to_md(&geom, &dnp->geom);
+		metafreevtoc(&dnp->vtoc);
+		meta_vtoc_to_md(&vtoc, &dnp->vtoc);
+	}
+
+	/* fix up any drives which are now accessible */
+	if ((nocache) && (dnp->type == MDT_ACCES) &&
+	    (dnp->vtoc.nparts == dnp->parts.parts_len)) {
+		dnp->type = MDT_COMP;
+		dnp->errnum = 0;
+	}
+
+	/* save partno */
+	assert(partno < dnp->vtoc.nparts);
+	if (partnop != NULL)
+		*partnop = partno;
+
+	/* return info */
+	return (&dnp->vtoc);
+}
+
+static void
+meta_mdvtoc_to_vtoc(
+	mdvtoc_t	*mdvp,
+	struct vtoc	*vp
+)
+{
+	uint_t		i;
+
+	(void) memset(&vp->v_part, '\0', sizeof (vp->v_part));
+	vp->v_nparts = (ushort_t)mdvp->nparts;
+	for (i = 0; (i < mdvp->nparts); ++i) {
+		vp->v_part[i].p_start = (daddr32_t)mdvp->parts[i].start;
+		vp->v_part[i].p_size  = (daddr32_t)mdvp->parts[i].size;
+		vp->v_part[i].p_tag   = mdvp->parts[i].tag;
+		vp->v_part[i].p_flag  = mdvp->parts[i].flag;
+	}
+}
+
+/*
+ * Set the vtoc, but use the cached copy to get the info from.
+ * We write np->drivenamep->vtoc to disk.
+ * Before we can do this we read the vtoc in.
+ * if we're dealing with a metadevice and this metadevice is a 64 bit device
+ *	we can use meta_getmdvtoc/meta_setmdvtoc
+ * else
+ * 	we use meta_getvtoc/meta_setvtoc but than we first have to convert
+ *	dnp->vtoc (actually being a mdvtoc_t) into a vtoc_t
+ */
+int
+metasetvtoc(
+	mdname_t	*np,
+	md_error_t	*ep
+)
+{
+	char		*rname = np->rname;
+	mddrivename_t	*dnp = np->drivenamep;
+	int		fd;
+	int		err;
+	int 		save_errno;
+	struct dk_geom	geom;
+
+	if ((fd = open(rname, (O_RDONLY | O_NDELAY), 0)) < 0)
+		return (mdsyserror(ep, errno, rname));
+
+	err = ioctl(fd, DKIOCGGEOM, &geom);
+	save_errno = errno;
+	if (err == 0) {
+		struct vtoc	vtoc;
+
+		if (meta_getvtoc(fd, np->cname, &vtoc, NULL, ep) < 0) {
+			(void) close(fd);
+			return (-1);
+		}
+
+		meta_mdvtoc_to_vtoc(&dnp->vtoc, &vtoc);
+
+		if (meta_setvtoc(fd, np->cname, &vtoc, ep) < 0) {
+			(void) close(fd);
+			return (-1);
+		}
+	} else if (save_errno == ENOTSUP) {
+		struct dk_gpt	*gpt;
+		int		ret;
+
+		/* allocation of gpt is done in meta_mdvtoc_to_efi */
+		meta_mdvtoc_to_efi(&dnp->vtoc, &gpt);
+
+		ret = efi_write(fd, gpt);
+		save_errno = errno;
+		free(gpt);
+		if (ret != 0) {
+			(void) close(fd);
+			return (mdsyserror(ep, save_errno, rname));
+		} else {
+			(void) close(fd);
+			return (0);
+		}
+
+	} else {
+		(void) close(fd);
+		return (mdsyserror(ep, save_errno, rname));
+	}
+
+	(void) close(fd);
+
+	return (0);
+}
+
+mdgeom_t *
+metagetgeom(
+	mdname_t	*np,	/* only rname, drivenamep, are setup */
+	md_error_t	*ep
+)
+{
+	if (metagetvtoc(np, FALSE, NULL, ep) == NULL)
+		return (NULL);
+	return (&np->drivenamep->geom);
+}
+
+mdcinfo_t *
+metagetcinfo(
+	mdname_t	*np,	/* only rname, drivenamep, are setup */
+	md_error_t	*ep
+)
+{
+	char			*rname = np->rname;
+	mddrivename_t		*dnp = np->drivenamep;
+	int			fd;
+	struct dk_cinfo		cinfo;
+
+	/* short circuit */
+	if (dnp->cinfo.cname[0] != '\0')
+		return (&dnp->cinfo);
+
+	/* get controller info */
+	if ((fd = open(rname, (O_RDONLY|O_NDELAY), 0)) < 0) {
+		(void) mdsyserror(ep, errno, rname);
+		return (NULL);
+	}
+	if (ioctl(fd, DKIOCINFO, &cinfo) != 0) {
+		int	save = errno;
+
+		(void) close(fd);
+		if (save == ENOTTY) {
+			(void) mddeverror(ep, MDE_NOT_DISK, NODEV64, rname);
+		} else {
+			(void) mdsyserror(ep, save, rname);
+		}
+		return (NULL);
+	}
+	(void) close(fd);	/* sd/ssd bug */
+
+	/* convert to our format */
+	if (meta_cinfo_to_md(np, &cinfo, &dnp->cinfo, ep) != 0)
+		return (NULL);
+
+	/* return info */
+	return (&dnp->cinfo);
+}
+
+/*
+ * get partition number
+ */
+int
+metagetpartno(
+	mdname_t	*np,
+	md_error_t	*ep
+)
+{
+	mdvtoc_t	*vtocp;
+	uint_t		partno;
+
+	if ((vtocp = metagetvtoc(np, FALSE, &partno, ep)) == NULL)
+		return (-1);
+	assert(partno < vtocp->nparts);
+	return (partno);
+}
+
+/*
+ * get size of device
+ */
+diskaddr_t
+metagetsize(
+	mdname_t	*np,
+	md_error_t	*ep
+)
+{
+	mdvtoc_t	*vtocp;
+	uint_t		partno;
+
+	if ((vtocp = metagetvtoc(np, FALSE, &partno, ep)) == NULL)
+		return (MD_DISKADDR_ERROR);
+	assert(partno < vtocp->nparts);
+	return (vtocp->parts[partno].size);
+}
+
+/*
+ * get label of device
+ */
+diskaddr_t
+metagetlabel(
+	mdname_t	*np,
+	md_error_t	*ep
+)
+{
+	mdvtoc_t	*vtocp;
+	uint_t		partno;
+
+	if ((vtocp = metagetvtoc(np, FALSE, &partno, ep)) == NULL)
+		return (MD_DISKADDR_ERROR);
+	assert(partno < vtocp->nparts);
+	return (vtocp->parts[partno].label);
+}
+
+/*
+ * find out where database replicas end
+ */
+static int
+mddb_getendblk(
+	mdsetname_t		*sp,
+	mdname_t		*np,
+	diskaddr_t		*endblkp,
+	md_error_t		*ep
+)
+{
+	md_replicalist_t	*rlp = NULL;
+	md_replicalist_t	*rl;
+
+	/* make sure we have a component */
+	*endblkp = 0;
+	if (metaismeta(np))
+		return (0);
+
+	/* get replicas, quit if none */
+	if (metareplicalist(sp, MD_BASICNAME_OK | PRINT_FAST, &rlp, ep) < 0) {
+		if (! mdismddberror(ep, MDE_DB_NODB))
+			return (-1);
+		mdclrerror(ep);
+		return (0);
+	} else if (rlp == NULL)
+		return (0);
+
+	/* go through all the replicas */
+	for (rl = rlp; (rl != NULL); rl = rl->rl_next) {
+		md_replica_t	*rp = rl->rl_repp;
+		mdname_t	*repnamep = rp->r_namep;
+		diskaddr_t	dbend;
+
+		if (np->dev != repnamep->dev)
+			continue;
+		dbend = rp->r_blkno + rp->r_nblk - 1;
+		if (dbend > *endblkp)
+			*endblkp = dbend;
+	}
+
+	/* cleanup, return success */
+	metafreereplicalist(rlp);
+	return (0);
+}
+
+/*
+ * return cached start block
+ */
+static diskaddr_t
+metagetend(
+	mdsetname_t	*sp,
+	mdname_t	*np,
+	md_error_t	*ep
+)
+{
+	diskaddr_t	end_blk = MD_DISKADDR_ERROR;
+
+	/* short circuit */
+	if (np->end_blk != MD_DISKADDR_ERROR)
+		return (np->end_blk);
+
+	/* look for database locations */
+	if (mddb_getendblk(sp, np, &end_blk, ep) != 0)
+		return (MD_DISKADDR_ERROR);
+
+	/* success */
+	np->end_blk = end_blk;
+	return (end_blk);
+}
+
+/*
+ * does device have a metadb
+ */
+int
+metahasmddb(
+	mdsetname_t	*sp,
+	mdname_t	*np,
+	md_error_t	*ep
+)
+{
+	if (metagetend(sp, np, ep) == MD_DISKADDR_ERROR)
+		return (-1);
+	else if (np->end_blk > 0)
+		return (1);
+	else
+		return (0);
+}
+
+/*
+ * return cached start block
+ */
+diskaddr_t
+metagetstart(
+	mdsetname_t	*sp,
+	mdname_t	*np,
+	md_error_t	*ep
+)
+{
+	diskaddr_t	start_blk = MD_DISKADDR_ERROR;
+
+	/* short circuit */
+	if (np->start_blk != MD_DISKADDR_ERROR)
+		return (np->start_blk);
+
+	/* look for database locations */
+	if ((start_blk = metagetend(sp, np, ep)) == MD_DISKADDR_ERROR)
+		return (MD_DISKADDR_ERROR);
+
+	/* check for label */
+	if (start_blk == 0) {
+		start_blk = metagetlabel(np, ep);
+		if (start_blk == MD_DISKADDR_ERROR) {
+			return (MD_DISKADDR_ERROR);
+		}
+	}
+
+	/* roundup to next cylinder */
+	if (start_blk != 0) {
+		mdgeom_t	*geomp;
+
+		if ((geomp = metagetgeom(np, ep)) == NULL)
+			return (MD_DISKADDR_ERROR);
+		start_blk = roundup(start_blk, (geomp->nhead * geomp->nsect));
+	}
+
+	/* success */
+	np->start_blk = start_blk;
+	return (start_blk);
+}
+
+/*
+ * return cached devices name
+ */
+char *
+metagetdevicesname(
+	mdname_t	*np,
+	md_error_t	*ep
+)
+{
+	char		path[MAXPATHLEN + 1];
+	int		len;
+
+	/* short circuit */
+	if (np->devicesname != NULL)
+		return (np->devicesname);
+
+	/* follow symlink */
+	if ((len = readlink(np->bname, path, (sizeof (path) - 1))) < 0) {
+		(void) mdsyserror(ep, errno, np->bname);
+		return (NULL);
+	} else if (len >= sizeof (path)) {
+		(void) mdsyserror(ep, ENAMETOOLONG, np->bname);
+		return (NULL);
+	}
+	path[len] = '\0';
+	if ((len = strfind(path, "/devices/")) < 0) {
+		(void) mddeverror(ep, MDE_DEVICES_NAME, np->dev, np->bname);
+		return (NULL);
+	}
+
+	/* return name */
+	np->devicesname = Strdup(path + len + strlen("/devices"));
+	return (np->devicesname);
+}
+
+/*
+ * get metadevice misc name
+ */
+char *
+metagetmiscname(
+	mdname_t		*np,
+	md_error_t		*ep
+)
+{
+	mddrivename_t		*dnp = np->drivenamep;
+	md_i_driverinfo_t	mid;
+
+	/* short circuit */
+	if (dnp->miscname != NULL)
+		return (dnp->miscname);
+	if (metachkmeta(np, ep) != 0)
+		return (NULL);
+
+	/* get misc module from driver */
+	(void) memset(&mid, 0, sizeof (mid));
+	mid.mnum = meta_getminor(np->dev);
+	if (metaioctl(MD_IOCGET_DRVNM, &mid, &mid.mde, np->cname) != 0) {
+		(void) mdstealerror(ep, &mid.mde);
+		return (NULL);
+	}
+
+	/* return miscname */
+	dnp->miscname = Strdup(MD_PNTDRIVERNAME(&mid));
+	return (dnp->miscname);
+}
+
+/*
+ * get unit structure from driver
+ */
+md_unit_t *
+meta_get_mdunit(
+	mdsetname_t	*sp,
+	mdname_t	*np,
+	md_error_t	*ep
+)
+{
+	md_i_get_t	mig;
+	char		*miscname = NULL;
+
+	/* should have a set */
+	assert(sp != NULL);
+	assert(sp->setno == MD_MIN2SET(meta_getminor(np->dev)));
+
+	/* get size of unit structure */
+	if (metachkmeta(np, ep) != 0)
+		return (NULL);
+	if ((miscname = metagetmiscname(np, ep)) == NULL)
+		return (NULL);
+	(void) memset(&mig, '\0', sizeof (mig));
+	MD_SETDRIVERNAME(&mig, miscname, sp->setno);
+	mig.id = meta_getminor(np->dev);
+	if (metaioctl(MD_IOCGET, &mig, &mig.mde, np->cname) != 0) {
+		(void) mdstealerror(ep, &mig.mde);
+		return (NULL);
+	}
+
+	/* get actual unit structure */
+	assert(mig.size > 0);
+	mig.mdp = (uintptr_t)Zalloc(mig.size);
+	if (metaioctl(MD_IOCGET, &mig, &mig.mde, np->cname) != 0) {
+		(void) mdstealerror(ep, &mig.mde);
+		Free((void *)mig.mdp);
+		return (NULL);
+	}
+
+	return ((md_unit_t *)mig.mdp);
+}
+
+/*
+ * free metadevice unit
+ */
+void
+meta_free_unit(
+	mddrivename_t	*dnp
+)
+{
+	if (dnp->unitp != NULL) {
+		switch (dnp->unitp->type) {
+		case MD_DEVICE:
+			meta_free_stripe((md_stripe_t *)dnp->unitp);
+			break;
+		case MD_METAMIRROR:
+			meta_free_mirror((md_mirror_t *)dnp->unitp);
+			break;
+		case MD_METATRANS:
+			meta_free_trans((md_trans_t *)dnp->unitp);
+			break;
+		case MD_METARAID:
+			meta_free_raid((md_raid_t *)dnp->unitp);
+			break;
+		case MD_METASP:
+			meta_free_sp((md_sp_t *)dnp->unitp);
+			break;
+		default:
+			assert(0);
+			break;
+		}
+		dnp->unitp = NULL;
+	}
+}
+
+/*
+ * free metadevice name info
+ */
+void
+meta_invalidate_name(
+	mdname_t	*namep
+)
+{
+	mddrivename_t	*dnp = namep->drivenamep;
+
+	/* get rid of cached name info */
+	if (namep->devicesname != NULL) {
+		Free(namep->devicesname);
+		namep->devicesname = NULL;
+	}
+	namep->key = MD_KEYBAD;
+	namep->start_blk = -1;
+	namep->end_blk = -1;
+
+	/* get rid of cached drivename info */
+	(void) memset(&dnp->geom, 0, sizeof (dnp->geom));
+	(void) memset(&dnp->cinfo, 0, sizeof (dnp->cinfo));
+	metafreevtoc(&dnp->vtoc);
+	metaflushsidenames(dnp);
+	dnp->side_names_key = MD_KEYBAD;
+	if (dnp->miscname != NULL) {
+		Free(dnp->miscname);
+		dnp->miscname = NULL;
+	}
+	meta_free_unit(dnp);
+}
+
+/*
+ * get metadevice unit
+ */
+md_common_t *
+meta_get_unit(
+	mdsetname_t	*sp,
+	mdname_t	*np,
+	md_error_t	*ep
+)
+{
+	char		*miscname;
+
+	/* short circuit */
+	if (np->drivenamep->unitp != NULL)
+		return (np->drivenamep->unitp);
+	if (metachkmeta(np, ep) != 0)
+		return (NULL);
+
+	/* dispatch */
+	if ((miscname = metagetmiscname(np, ep)) == NULL)
+		return (NULL);
+	else if (strcmp(miscname, MD_STRIPE) == 0)
+		return ((md_common_t *)meta_get_stripe(sp, np, ep));
+	else if (strcmp(miscname, MD_MIRROR) == 0)
+		return ((md_common_t *)meta_get_mirror(sp, np, ep));
+	else if (strcmp(miscname, MD_TRANS) == 0)
+		return ((md_common_t *)meta_get_trans(sp, np, ep));
+	else if (strcmp(miscname, MD_RAID) == 0)
+		return ((md_common_t *)meta_get_raid(sp, np, ep));
+	else if (strcmp(miscname, MD_SP) == 0)
+		return ((md_common_t *)meta_get_sp(sp, np, ep));
+	else {
+		(void) mdmderror(ep, MDE_UNKNOWN_TYPE, meta_getminor(np->dev),
+		    np->cname);
+		return (NULL);
+	}
+}
+
+
+int
+meta_isopen(
+	mdsetname_t	*sp,
+	mdname_t	*np,
+	md_error_t	*ep,
+	mdcmdopts_t	options
+)
+{
+	md_isopen_t	d;
+
+	if (metachkmeta(np, ep) != 0)
+		return (-1);
+
+	(void) memset(&d, '\0', sizeof (d));
+	d.dev = np->dev;
+	if (metaioctl(MD_IOCISOPEN, &d, &d.mde, np->cname) != 0)
+		return (mdstealerror(ep, &d.mde));
+
+	/*
+	 * shortcut: if the device is open, no need to check on other nodes,
+	 * even in case of a mn metadevice
+	 * Also return in case we're told not to check on other nodes.
+	 */
+	if ((d.isopen != 0) || ((options & MDCMD_MN_OPEN_CHECK) == 0)) {
+		return (d.isopen);
+	}
+
+	/*
+	 * If the device is closed locally, but it's a mn device,
+	 * check on all other nodes, too
+	 */
+	if (sp->setno != MD_LOCAL_SET) {
+		(void) metaget_setdesc(sp, ep); /* not supposed to fail */
+		if (sp->setdesc->sd_flags & MD_SR_MN) {
+			int		err = 0;
+			md_mn_result_t *resp;
+			/*
+			 * This message is never directly issued.
+			 * So we launch it with a suspend override flag.
+			 * If the commd is suspended, and this message comes
+			 * along it must be sent due to replaying a metainit or
+			 * similar. In that case we don't want this message to
+			 * be blocked.
+			 * If the commd is not suspended, the flag does no harm.
+			 * Additionally we don't want the result of the message
+			 * cached in the MCT, because we want uptodate results,
+			 * and the message doesn't need being logged either.
+			 * Hence NO_LOG and NO_MCT
+			 */
+			err = mdmn_send_message(
+				sp->setno,
+				MD_MN_MSG_CLU_CHECK,
+				MD_MSGF_NO_MCT | MD_MSGF_STOP_ON_ERROR |
+				MD_MSGF_NO_LOG | MD_MSGF_OVERRIDE_SUSPEND,
+				(char *)&d, sizeof (md_isopen_t),
+				&resp, ep);
+			if (err == 0) {
+				d.isopen = resp->mmr_exitval;
+			} else {
+				/*
+				 * in case some error occurred,
+				 * we better say the device is open
+				 */
+				d.isopen = 1;
+			}
+			if (resp != (md_mn_result_t *)NULL) {
+				free_result(resp);
+			}
+
+		}
+	}
+
+	return (d.isopen);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_namespace.c b/usr/src/lib/lvm/libmeta/common/meta_namespace.c
new file mode 100644
index 0000000000..eb21cbbdd3
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_namespace.c
@@ -0,0 +1,601 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+/*
+ * namespace utilities
+ */
+
+#include <meta.h>
+
+typedef struct deviceinfo {
+	char	*bname;		/* block name of the device */
+	char	*dname;		/* driver for the device */
+	minor_t	mnum;		/* minor number for the device */
+} deviceinfo_t;
+
+static	deviceinfo_t	devlist[MD_MNMAXSIDES];
+
+/*
+ * Ask the driver for the device name, driver name, and minor number;
+ * which has been stored in the metadevice state database
+ * (on behalf of the utilities).
+ * (by key)
+ */
+char *
+meta_getnmentbykey(
+	set_t		setno,
+	side_t		sideno,
+	mdkey_t		key,
+	char		**drvnm,
+	minor_t		*mnum,
+	md_dev64_t	*dev,
+	md_error_t	*ep
+)
+{
+	struct mdnm_params	nm;
+	static char		device_name[MAXPATHLEN];
+
+	(void) memset(&nm, '\0', sizeof (nm));
+	nm.setno = setno;
+	nm.side = sideno;
+	nm.key = key;
+	nm.devname = (uint64_t)device_name;
+
+	if (metaioctl(MD_IOCGET_NM, &nm, &nm.mde, NULL) != 0) {
+		(void) mdstealerror(ep, &nm.mde);
+		return (NULL);
+	}
+
+	if (drvnm != NULL)
+		*drvnm = Strdup(nm.drvnm);
+
+	if (mnum != NULL)
+		*mnum = nm.mnum;
+
+	if (dev != NULL)
+		*dev = meta_expldev(makedevice(nm.major, nm.mnum));
+
+	return (Strdup(device_name));
+}
+
+/*
+ * Ask the driver for the minor name which has been stored in the
+ * metadevice state database.
+ * (by key)
+ */
+char *
+meta_getdidminorbykey(
+	set_t		setno,
+	side_t		sideno,
+	mdkey_t		key,
+	md_error_t	*ep
+)
+{
+	struct mdnm_params	nm;
+	static char		minorname[MAXPATHLEN];
+
+	(void) memset(&nm, '\0', sizeof (nm));
+	nm.setno = setno;
+	nm.side = sideno;
+	nm.key = key;
+	nm.minorname = (uint64_t)minorname;
+
+	if (metaioctl(MD_IOCGET_DIDMIN, &nm, &nm.mde, NULL) != 0) {
+		(void) mdstealerror(ep, &nm.mde);
+		return (NULL);
+	}
+
+	return (Strdup(minorname));
+}
+
+/*
+ * Ask the driver for the device id string which has been stored in the
+ * metadevice state database (on behalf of the utilities).
+ * (by key)
+ */
+ddi_devid_t
+meta_getdidbykey(
+	set_t		setno,
+	side_t		sideno,
+	mdkey_t		key,
+	md_error_t	*ep
+)
+{
+	struct mdnm_params	nm;
+
+	(void) memset(&nm, '\0', sizeof (nm));
+	nm.setno = setno;
+	nm.side = sideno;
+	nm.key = key;
+
+	/*
+	 * First ask the driver for the size of the device id string.  This is
+	 * signaled by passing the driver a devid_size of zero.
+	 */
+	nm.devid_size = 0;
+	if (metaioctl(MD_IOCGET_DID, &nm, &nm.mde, NULL) != 0) {
+		(void) mdstealerror(ep, &nm.mde);
+		return (NULL);
+	}
+
+	/*
+	 * If the devid_size is still zero then something is wrong.
+	 */
+	if (nm.devid_size == 0) {
+		(void) mdstealerror(ep, &nm.mde);
+		return (NULL);
+	}
+
+	/*
+	 * Now go get the actual device id string.  Caller is responsible for
+	 * free'ing device id memory buffer.
+	 */
+	if ((nm.devid = (uintptr_t)malloc(nm.devid_size)) == NULL) {
+		return (NULL);
+	}
+	if (metaioctl(MD_IOCGET_DID, &nm, &nm.mde, NULL) != 0) {
+		(void) mdstealerror(ep, &nm.mde);
+		(void) free((void *)nm.devid);
+		return (NULL);
+	}
+
+	return ((void *)nm.devid);
+}
+
+/*
+ * set the devid.
+ */
+int
+meta_setdid(
+	set_t		setno,
+	side_t		sideno,
+	mdkey_t		key,
+	md_error_t	*ep
+)
+{
+	struct mdnm_params	nm;
+	int			i;
+
+	(void) memset(&nm, '\0', sizeof (nm));
+	nm.setno = setno;
+	nm.side = sideno;
+	nm.key = key;
+
+	if (metaioctl(MD_IOCSET_DID, &nm, &nm.mde, NULL) != 0) {
+		(void) mdstealerror(ep, &nm.mde);
+		return (-1);
+	}
+
+	if (setno == MD_LOCAL_SET) {
+		/*
+		 * If this is the local set then we are adding in the devids
+		 * for the disks in the diskset and so this means adding
+		 * a reference count for each side. Need to do this after
+		 * the initial add so that the correct devid is picked up.
+		 * The key is the key of the drive record and as such this
+		 * means the minor number of the device which is used to
+		 * get the devid. If the wrong side is used then it would
+		 * be possible to get the wrong devid in the namespace, hence
+		 * the requirement to process the local side first of all.
+		 */
+		for (i = 0 + SKEW; i < MD_MAXSIDES; i++) {
+			/*
+			 * We can just call the ioctl again because it will
+			 * fail with ENOENT if the side does not exist, and
+			 * more importantly does not increment the usage count
+			 * on the devid.
+			 */
+			nm.side = (side_t)i;
+			if (nm.side == sideno)
+				continue;
+			if (metaioctl(MD_IOCSET_DID, &nm, &nm.mde, NULL) != 0) {
+				if (mdissyserror(&nm.mde, ENODEV)) {
+					mdclrerror(&nm.mde);
+				} else {
+					(void) mdstealerror(ep, &nm.mde);
+					return (-1);
+				}
+			}
+		}
+	}
+	return (0);
+}
+/*
+ * Ask the driver for the name, which has been stored in the
+ * metadevice state database (on behalf of the utilities).
+ * (by key)
+ */
+char *
+meta_getnmbykey(
+	set_t		setno,
+	side_t		sideno,
+	mdkey_t		key,
+	md_error_t	*ep
+)
+{
+	return (meta_getnmentbykey(setno, sideno, key, NULL, NULL, NULL, ep));
+}
+
+/*
+ * Ask the driver for the device name, driver name, minor number, and key;
+ * which has been stored in the metadevice state database
+ * (on behalf of the utilities).
+ * (by md_dev64_t)
+ */
+char *
+meta_getnmentbydev(
+	set_t		setno,
+	side_t		sideno,
+	md_dev64_t	dev,
+	char		**drvnm,
+	minor_t		*mnum,
+	mdkey_t		*key,
+	md_error_t	*ep
+)
+{
+	struct mdnm_params	nm;
+	static char		device_name[MAXPATHLEN];
+
+	/* must have a dev */
+	assert(dev != NODEV64);
+
+	(void) memset(&nm, '\0', sizeof (nm));
+	nm.setno = setno;
+	nm.side = sideno;
+	nm.key = MD_KEYWILD;
+	nm.major = meta_getmajor(dev);
+	nm.mnum = meta_getminor(dev);
+	nm.devname = (uint64_t)device_name;
+
+	if (metaioctl(MD_IOCGET_NM, &nm, &nm.mde, NULL) != 0) {
+		(void) mdstealerror(ep, &nm.mde);
+		return (NULL);
+	}
+
+	if (drvnm != NULL)
+		*drvnm = Strdup(nm.drvnm);
+	if (mnum != NULL)
+		*mnum = nm.mnum;
+
+	if (key != NULL)
+		*key = nm.retkey;
+
+	return (Strdup(device_name));
+}
+
+int
+add_name(
+	mdsetname_t	*sp,
+	side_t		sideno,
+	mdkey_t		key,
+	char		*dname,
+	minor_t		mnum,
+	char		*bname,
+	md_error_t	*ep
+)
+{
+	struct mdnm_params	nm;
+
+	(void) memset(&nm, '\0', sizeof (nm));
+	nm.setno = sp->setno;
+	nm.side = sideno;
+	nm.key = key;
+	nm.mnum = mnum;
+	(void) strncpy(nm.drvnm, dname, sizeof (nm.drvnm));
+	nm.devname_len = strlen(bname) + 1;
+	nm.devname = (uintptr_t)bname;
+
+	if (metaioctl(MD_IOCSET_NM, &nm, &nm.mde, bname) < 0)
+		return (mdstealerror(ep, &nm.mde));
+
+	return (nm.key);
+}
+
+/*
+ * Remove the device name which corresponds to the given device number.
+ */
+int
+del_name(
+	mdsetname_t	*sp,
+	side_t		sideno,
+	mdkey_t		key,
+	md_error_t	*ep
+)
+{
+	struct mdnm_params	nm;
+
+	(void) memset(&nm, '\0', sizeof (nm));
+	nm.setno = sp->setno;
+	nm.side = sideno;
+	nm.key = key;
+
+	if (metaioctl(MD_IOCREM_NM, &nm, &nm.mde, NULL) != 0)
+		return (mdstealerror(ep, &nm.mde));
+
+	return (0);
+}
+
+static void
+empty_devicelist()
+{
+	side_t	sideno;
+
+	for (sideno = 0; sideno < MD_MNMAXSIDES; sideno++) {
+		if (devlist[sideno].bname != (char *)NULL) {
+			Free(devlist[sideno].bname);
+			Free(devlist[sideno].dname);
+			devlist[sideno].mnum = NODEV;
+		}
+	}
+}
+
+static void
+add_to_devicelist(
+	side_t		sideno,
+	char		*bname,
+	char		*dname,
+	minor_t		mnum
+)
+{
+	devlist[sideno].bname = Strdup(bname);
+	devlist[sideno].dname = Strdup(dname);
+
+	devlist[sideno].mnum = mnum;
+}
+
+/*
+ * Build a list of the names on the systems, if this fails the caller
+ * will tidy up the entries in the devlist.
+ */
+static int
+build_sidenamelist(
+	mdsetname_t	*sp,
+	mdname_t	*np,
+	md_error_t	*ep
+)
+{
+	side_t		sideno = MD_SIDEWILD;
+	minor_t		mnum = NODEV;
+	char		*bname = NULL;
+	char		*dname = NULL;
+	int		err;
+
+	/*CONSTCOND*/
+	while (1) {
+
+		if ((err = meta_getnextside_devinfo(sp, np->bname, &sideno,
+		    &bname, &dname, &mnum, ep)) == -1)
+			return (-1);
+
+		if (err == 0)
+			break;
+
+		/* the sideno gives us the index into the array */
+		add_to_devicelist(sideno, bname, dname, mnum);
+	}
+	return (0);
+}
+
+/*
+ * add name key
+ * the meta_create* functions should be the only ones using this. The
+ * adding of a name to the namespace must be done in a particular order
+ * to devid support for the disksets. The order is: add the 'local' side
+ * first of all, so the devid lookup in the kernel will use the correct
+ * device information and then add in the other sides.
+ */
+int
+add_key_name(
+	mdsetname_t	*sp,
+	mdname_t	*np,
+	mdnamelist_t	**nlpp,
+	md_error_t	*ep
+)
+{
+	int		err;
+	side_t		sideno = MD_SIDEWILD;
+	side_t		thisside;
+	mdkey_t		key = MD_KEYWILD;
+	md_set_desc	*sd;
+	int		maxsides;
+
+	/* should have a set */
+	assert(sp != NULL);
+
+	if (! metaislocalset(sp)) {
+		if ((sd = metaget_setdesc(sp, ep)) == NULL) {
+			return (-1);
+		}
+	}
+
+	if (build_sidenamelist(sp, np, ep) == -1) {
+		empty_devicelist();
+		return (-1);
+	}
+
+	/*
+	 * When a disk is added into the namespace the local information for
+	 * that disk is added in first of all. For the local set this is not
+	 * a concern and for the host that owns the diskset it is not a concern
+	 * but when a disk is added in the remote namespace we *must* use the
+	 * local information for that disk first of all. This is because when
+	 * in the kernel (md_setdevname) the passed in dev_t is used to find
+	 * the devid of the disk. This means we have to cater for the following:
+	 *
+	 * - a disk on the remote host having the dev_t that has been passed
+	 *   into the kernel and this disk is not actually the disk that is
+	 *   being added into the diskset.
+	 * - the dev_t does not exist on this node
+	 *
+	 * So putting in the local information first of all makes sure that the
+	 * dev_t passed into the kernel is correct with respect to that node
+	 * and then any further additions for that name match on the key
+	 * passed back.
+	 */
+	thisside = getmyside(sp, ep);
+
+	if (devlist[thisside].dname == NULL ||
+	    strlen(devlist[thisside].dname) == 0) {
+		/*
+		 * Did not find the disk information for the disk. This can
+		 * be because of an inconsistancy in the namespace: that is the
+		 * devid we have in the namespace does not exist on the
+		 * system and thus when looking up the disk information
+		 * using this devid we fail to find anything.
+		 */
+		(void) mdcomperror(ep, MDE_SP_COMP_OPEN_ERR, 0, np->dev,
+		    np->cname);
+		empty_devicelist();
+		return (-1);
+	}
+
+	if ((err = add_name(sp, thisside, key, devlist[thisside].dname,
+	    devlist[thisside].mnum, devlist[thisside].bname, ep)) == -1) {
+		empty_devicelist();
+		return (-1);
+	}
+
+	/* We now have a 'key' so add in the other sides */
+	key = (mdkey_t)err;
+
+	if (metaislocalset(sp))
+		goto done;
+
+	if (MD_MNSET_DESC(sd))
+		maxsides = MD_MNMAXSIDES;
+	else
+		maxsides = MD_MAXSIDES;
+
+	for (sideno = 0; sideno < maxsides; sideno++) {
+		/* ignore thisside, as it has been added above */
+		if (sideno == thisside)
+			continue;
+
+		if (devlist[sideno].dname != NULL) {
+			err = add_name(sp, sideno, key, devlist[sideno].dname,
+			    devlist[sideno].mnum, devlist[sideno].bname, ep);
+			if (err == -1) {
+				empty_devicelist();
+				return (-1);
+			}
+		}
+	}
+
+done:
+	empty_devicelist();
+	/* save key, return success */
+	np->key = key;
+	if (nlpp != NULL)
+		(void) metanamelist_append(nlpp, np);
+	return (0);
+}
+
+/*
+ * delete name key
+ * the meta_create* functions should be the only ones using this. The
+ * removal of the names must be done in a particular order: remove the
+ * non-local entries first of all and then finally the local entry.
+ */
+int
+del_key_name(
+	mdsetname_t	*sp,
+	mdname_t	*np,
+	md_error_t	*ep
+)
+{
+	side_t		sideno = MD_SIDEWILD;
+	int		err;
+	int		retval = 0;
+	side_t		thisside;
+
+	/* should have a set */
+	assert(sp != NULL);
+
+	/* should have a key */
+	assert((np->key != MD_KEYWILD) && (np->key != MD_KEYBAD));
+
+	thisside = getmyside(sp, ep);
+
+	/* remove the remote sides first of all */
+	for (;;) {
+		if ((err = meta_getnextside_devinfo(sp, np->bname, &sideno,
+		    NULL, NULL, NULL, ep)) == -1)
+			return (-1);
+
+		if (err == 0)
+			break;
+
+		/* ignore thisside */
+		if (thisside == sideno) {
+			continue;
+		}
+		if ((err = del_name(sp, sideno, np->key, ep)) == -1)
+			retval = -1;
+	}
+
+	/* now remove this side */
+	if (retval == 0)
+		if ((err = del_name(sp, thisside, np->key, ep)) == -1)
+			retval = -1;
+
+	np->key = MD_KEYBAD;
+	return (retval);
+}
+
+/*
+ * delete namelist keys
+ * the meta_create* functions should be the only ones using this
+ */
+int
+del_key_names(
+	mdsetname_t	*sp,
+	mdnamelist_t	*nlp,
+	md_error_t	*ep
+)
+{
+	mdnamelist_t	*p;
+	md_error_t	status = mdnullerror;
+	int		rval = 0;
+
+	/* if ignoring errors */
+	if (ep == NULL)
+		ep = &status;
+
+	/* delete names */
+	for (p = nlp; (p != NULL); p = p->next) {
+		mdname_t	*np = p->namep;
+
+		if (del_key_name(sp, np, ep) != 0)
+			rval = -1;
+	}
+
+	/* cleanup, return success */
+	if (ep == &status)
+		mdclrerror(&status);
+	return (rval);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_notify.c b/usr/src/lib/lvm/libmeta/common/meta_notify.c
new file mode 100644
index 0000000000..5f66758f76
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_notify.c
@@ -0,0 +1,692 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 1995-2003 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+/*
+ * Just in case we're not in a build environment, make sure that
+ * TEXT_DOMAIN gets set to something.
+ */
+#if !defined(TEXT_DOMAIN)
+#define	TEXT_DOMAIN "SYS_TEST"
+#endif
+
+/*
+ * libmeta wrappers for event notification
+ */
+
+#include <meta.h>
+#include <sys/lvm/md_notify.h>
+
+#if defined(DEBUG)
+#include <assert.h>
+#endif /* DEBUG */
+
+struct tag2obj_type {
+	md_tags_t	tag;
+	ev_obj_t	obj;
+} tag2obj_typetab[] =
+{
+	{ TAG_EMPTY,		EVO_EMPTY	},
+	{ TAG_METADEVICE,	EVO_METADEV	},
+	{ TAG_REPLICA,		EVO_REPLICA	},
+	{ TAG_HSP,		EVO_HSP		},
+	{ TAG_HS,		EVO_HS		},
+	{ TAG_SET,		EVO_SET		},
+	{ TAG_DRIVE,		EVO_DRIVE	},
+	{ TAG_HOST,		EVO_HOST	},
+	{ TAG_MEDIATOR,		EVO_MEDIATOR	},
+	{ TAG_UNK,		EVO_UNSPECIFIED	},
+
+	{ TAG_LAST,		EVO_LAST	}
+};
+
+struct evdrv2evlib_type {
+	md_event_type_t	drv;
+	evid_t		lib;
+} evdrv2evlib_typetab[] =
+{
+	{ EQ_EMPTY,		EV_EMPTY		},
+	{ EQ_CREATE,		EV_CREATE		},
+	{ EQ_DELETE,		EV_DELETE		},
+	{ EQ_ADD,		EV_ADD			},
+	{ EQ_REMOVE,		EV_REMOVE		},
+	{ EQ_REPLACE,		EV_REPLACE		},
+	{ EQ_MEDIATOR_ADD,	EV_MEDIATOR_ADD		},
+	{ EQ_MEDIATOR_DELETE,	EV_MEDIATOR_DELETE	},
+	{ EQ_HOST_ADD,		EV_HOST_ADD		},
+	{ EQ_HOST_DELETE,	EV_HOST_DELETE		},
+	{ EQ_DRIVE_ADD,		EV_DRIVE_ADD		},
+	{ EQ_DRIVE_DELETE,	EV_DRIVE_DELETE		},
+	{ EQ_RENAME_SRC,	EV_RENAME_SRC		},
+	{ EQ_RENAME_DST,	EV_RENAME_DST		},
+	{ EQ_INIT_START,	EV_INIT_START		},
+	{ EQ_INIT_FAILED,	EV_INIT_FAILED		},
+	{ EQ_INIT_FATAL,	EV_INIT_FATAL		},
+	{ EQ_INIT_SUCCESS,	EV_INIT_SUCCESS		},
+	{ EQ_IOERR,		EV_IOERR		},
+	{ EQ_ERRED,		EV_ERRED		},
+	{ EQ_LASTERRED,		EV_LASTERRED		},
+	{ EQ_OK,		EV_OK			},
+	{ EQ_ENABLE,		EV_ENABLE		},
+	{ EQ_RESYNC_START,	EV_RESYNC_START		},
+	{ EQ_RESYNC_FAILED,	EV_RESYNC_FAILED	},
+	{ EQ_RESYNC_SUCCESS,	EV_RESYNC_SUCCESS	},
+	{ EQ_RESYNC_DONE,	EV_RESYNC_DONE		},
+	{ EQ_HOTSPARED,		EV_HOTSPARED		},
+	{ EQ_HS_FREED,		EV_HS_FREED		},
+	{ EQ_TAKEOVER,		EV_TAKEOVER		},
+	{ EQ_RELEASE,		EV_RELEASE		},
+	{ EQ_OPEN_FAIL,		EV_OPEN_FAIL		},
+	{ EQ_OFFLINE,		EV_OFFLINE		},
+	{ EQ_ONLINE,		EV_ONLINE		},
+	{ EQ_GROW,		EV_GROW			},
+	{ EQ_DETACH,		EV_DETACH		},
+	{ EQ_DETACHING,		EV_DETACHING		},
+	{ EQ_ATTACH,		EV_ATTACH		},
+	{ EQ_ATTACHING,		EV_ATTACHING		},
+	{ EQ_CHANGE,		EV_CHANGE		},
+	{ EQ_EXCHANGE,		EV_EXCHANGE		},
+	{ EQ_REGEN_START,	EV_REGEN_START		},
+	{ EQ_REGEN_DONE,	EV_REGEN_DONE		},
+	{ EQ_REGEN_FAILED,	EV_REGEN_FAILED		},
+	{ EQ_USER,		EV_USER			},
+	{ EQ_NOTIFY_LOST,	EV_NOTIFY_LOST		},
+
+	{ EQ_LAST,		EV_LAST }
+};
+
+static ev_obj_t
+dev2tag(md_dev64_t dev, set_t setno, md_error_t *ep)
+{
+	mdname_t	*np	= NULL;
+	mdsetname_t	*sp	= NULL;
+	ev_obj_t	 obj	= EVO_METADEV;
+	char		*miscname;
+
+	if ((sp = metasetnosetname(setno, ep)) == NULL) {
+		goto out;
+	}
+	if (!(np = metamnumname(&sp, meta_getminor(dev), 0, ep))) {
+		goto out;
+	}
+
+	/* need to invalidate name in case rename or delete/create done */
+	meta_invalidate_name(np);
+
+	if (!(miscname = metagetmiscname(np, ep))) {
+		goto out;
+	}
+	if (strcmp(miscname, MD_STRIPE) == 0) {
+		obj = EVO_STRIPE;
+	} else if (strcmp(miscname, MD_MIRROR) == 0) {
+		obj = EVO_MIRROR;
+	} else if (strcmp(miscname, MD_RAID) == 0) {
+		obj = EVO_RAID5;
+	} else if (strcmp(miscname, MD_TRANS) == 0) {
+		obj = EVO_TRANS;
+	}
+out:
+	return (obj);
+}
+
+static ev_obj_t
+tagdrv_2_objlib(md_tags_t tag)
+{
+	int i;
+
+	for (i = 0; tag2obj_typetab[i].tag != TAG_LAST; i++) {
+		if (tag2obj_typetab[i].tag == tag)
+			return (tag2obj_typetab[i].obj);
+	}
+	return (EVO_UNSPECIFIED);
+}
+
+static md_tags_t
+objlib_2_tagdrv(ev_obj_t obj)
+{
+	int i;
+
+	for (i = 0; tag2obj_typetab[i].tag != TAG_LAST; i++) {
+		if (tag2obj_typetab[i].obj == obj)
+			return (tag2obj_typetab[i].tag);
+	}
+	return (TAG_UNK);
+}
+
+
+static evid_t
+evdrv_2_evlib(md_event_type_t drv_ev)
+{
+	int	i;
+
+	for (i = 0; evdrv2evlib_typetab[i].drv != EQ_LAST; i++) {
+		if (evdrv2evlib_typetab[i].drv == drv_ev)
+			return (evdrv2evlib_typetab[i].lib);
+	}
+	return (EV_UNK);
+}
+
+static md_event_type_t
+evlib_2_evdrv(evid_t lib_ev)
+{
+	int	i;
+
+	for (i = 0; evdrv2evlib_typetab[i].drv != EQ_LAST; i++) {
+		if (evdrv2evlib_typetab[i].lib == lib_ev)
+			return (evdrv2evlib_typetab[i].drv);
+	}
+	return (EQ_EMPTY);
+}
+
+
+/*
+ * meta_event
+ *  returns 0 on succcess or < 0 to indicate error.
+ *  abs(return code) = errno
+ */
+static int
+meta_event(md_event_ioctl_t *evctl, md_error_t *ep)
+{
+	int	l;
+
+	if (!evctl || !ep)
+		return (-EINVAL);
+
+	l = strlen(evctl->mdn_name);
+	if ((l == 0 && evctl->mdn_cmd != EQ_PUT) || l >= MD_NOTIFY_NAME_SIZE) {
+		return (-EINVAL);
+	}
+
+	MD_SETDRIVERNAME(evctl, MD_NOTIFY, 0);
+	mdclrerror(ep);
+	errno = 0;
+
+	if (metaioctl(MD_IOCNOTIFY, evctl, ep, evctl->mdn_name) != 0) {
+		if (errno == 0) {
+			errno = EINVAL;
+		}
+		if (mdisok(ep)) {
+			(void) mdsyserror(ep, errno, evctl->mdn_name);
+		}
+		return (-errno);
+	}
+
+	return (0);
+}
+
+static void
+init_evctl(char *qname,
+	md_tags_t tag,
+	md_event_type_t ev,
+	uint_t flags,
+	set_t set,
+	md_dev64_t dev,
+	md_event_cmds_t cmd,
+	u_longlong_t udata,
+	md_event_ioctl_t *evctlp)
+{
+
+	assert(evctlp);
+
+	(void) memset(evctlp, 0, sizeof (md_event_ioctl_t));
+
+	evctlp->mdn_magic	= MD_EVENT_ID;
+	evctlp->mdn_rev		= MD_NOTIFY_REVISION;
+
+	if (qname)
+		(void) strncpy(evctlp->mdn_name, qname, MD_NOTIFY_NAME_SIZE-1);
+	else
+		(void) memset(evctlp->mdn_name, 0, MD_NOTIFY_NAME_SIZE);
+
+	evctlp->mdn_tag		= tag;
+	evctlp->mdn_event	= ev;
+	evctlp->mdn_flags	= flags;
+	evctlp->mdn_set		= set;
+	evctlp->mdn_dev		= dev;
+	evctlp->mdn_cmd		= cmd;
+	evctlp->mdn_user	= udata;
+}
+
+/*
+ * meta_notify_createq
+ * - creates an eventq
+ * - returns 0 on success or errno and sets ep
+ */
+int
+meta_notify_createq(char *qname, ulong_t flags, md_error_t *ep)
+{
+	md_event_ioctl_t	evctl;
+	int			err	= 0;
+
+	mdclrerror(ep);
+	if (!qname || strlen(qname) == 0) {
+		(void) mdsyserror(ep, EINVAL,
+		    dgettext(TEXT_DOMAIN,
+			"null or zero-length queue name"));
+		return (EINVAL);
+	}
+
+	init_evctl(qname,
+			TAG_EMPTY,
+			EQ_EMPTY,
+			(flags & EVFLG_PERMANENT) != 0? EQ_Q_PERM: 0,
+			/* set */ 0,
+			/* dev */ 0,
+			EQ_ON,
+			/* user-defined event data */ 0,
+			&evctl);
+
+	err = meta_event(&evctl, ep);
+
+	if (err == -EEXIST && !(flags & EVFLG_EXISTERR)) {
+		err = 0;
+		mdclrerror(ep);
+	}
+	if (!mdisok(ep) && mdanysyserror(ep)) {
+		err = (ep)->info.md_error_info_t_u.ds_error.errnum;
+	}
+	return (-err);
+}
+
+/*
+ * meta_notify_deleteq
+ * - deletes an eventq
+ * - free's any underlying resources
+ * - returns 0 on success or errno and sets ep
+ */
+int
+meta_notify_deleteq(char *qname, md_error_t *ep)
+{
+	md_event_ioctl_t	evctl;
+	int			err;
+
+	init_evctl(qname,
+			TAG_EMPTY,
+			EQ_EMPTY,
+			/* flags */ 0,
+			/* set */ 0,
+			/* dev */ 0,
+			EQ_OFF,
+			/* user-defined event data */ 0,
+			&evctl);
+
+	err = meta_event(&evctl, ep);
+	return (-err);
+}
+
+/*
+ * meta_notify_validq
+ * - verifies that the queue exists
+ * - returns true or false, ep may be changed as a side-effect
+ */
+bool_t
+meta_notify_validq(char *qname, md_error_t *ep)
+{
+	md_event_ioctl_t	evctl;
+
+	init_evctl(qname,
+			TAG_EMPTY,
+			EQ_EMPTY,
+			/* flags */ 0,
+			/* set */ 0,
+			/* dev */ 0,
+			EQ_ON,
+			/* user-defined event data */ 0,
+			&evctl);
+
+	return (meta_event(&evctl, ep) == -EEXIST);
+}
+
+/*
+ * meta_notify_listq
+ * - returns number of (currently) active queus or -errno
+ * - allocates qnames array and sets user's pointer to it,
+ *   fills in array with vector of qnames
+ */
+int
+meta_notify_listq(char ***qnames, md_error_t *ep)
+{
+
+#ifdef lint
+	qnames = qnames;
+#endif /* lint */
+
+	mdclrerror(ep);
+	(void) mdsyserror(ep, EOPNOTSUPP, "EOPNOTSUPP");
+	return (-EOPNOTSUPP);
+}
+
+/*
+ * meta_notify_flushq
+ * - calls the underlying notify driver to flush all events
+ *   from the named queue
+ * - returns 0 on success or errno and sets ep as necessary
+ */
+int
+meta_notify_flushq(char *qname, md_error_t *ep)
+{
+
+#ifdef lint
+	qname = qname;
+#endif /* lint */
+
+	mdclrerror(ep);
+	(void) mdsyserror(ep, EOPNOTSUPP, "EOPNOTSUPP");
+	return (EOPNOTSUPP);
+}
+
+static void
+cook_ev(md_event_ioctl_t *evctlp, md_ev_t *evp, md_error_t *ep)
+{
+	assert(evctlp);
+	assert(evp);
+
+	evp->obj_type = tagdrv_2_objlib(evctlp->mdn_tag);
+
+	if (evp->obj_type == EVO_METADEV) {
+		evp->obj_type = dev2tag(evctlp->mdn_dev, evctlp->mdn_set, ep);
+	}
+
+	evp->setno	= evctlp->mdn_set;
+	evp->ev		= evdrv_2_evlib(evctlp->mdn_event);
+	evp->obj	= evctlp->mdn_dev;
+	evp->uev	= evctlp->mdn_user;
+}
+
+/*
+ * meta_notify_getev
+ * - collects up to 1 event and stores it into md_ev_t
+ * - returns number of events found (0 or 1) on success or -errno
+ * - flags governs whether an empty queue is waited upon (EVFLG_WAIT)
+ */
+int
+meta_notify_getev(char *qname, ulong_t flags, md_ev_t *evp, md_error_t *ep)
+{
+	md_event_ioctl_t	evctl;
+	int			n_ev;
+	int			err	= -EINVAL;
+
+	if (!evp) {
+		goto out;
+	}
+
+	init_evctl(qname,
+			TAG_EMPTY,
+			EQ_EMPTY,
+			/* flags (unused in get) */ 0,
+			(evp->setno == EV_ALLSETS)? MD_ALLSETS: evp->setno,
+			(evp->obj == EV_ALLOBJS)? MD_ALLDEVS: evp->obj,
+			(flags & EVFLG_WAIT) != 0? EQ_GET_WAIT: EQ_GET_NOWAIT,
+			/* user-defined event data */ 0,
+			&evctl);
+
+	err = meta_event(&evctl, ep);
+
+	/*
+	 * trap EAGAIN so that EV_EMPTY events get returned, but
+	 * be sure n_ev = 0 so that users who just watch the count
+	 * will also work
+	 */
+	switch (err) {
+	case -EAGAIN:
+		err = n_ev = 0;
+		cook_ev(&evctl, evp, ep);
+		break;
+	case 0:
+		n_ev = 1;
+		cook_ev(&evctl, evp, ep);
+		break;
+	}
+out:
+	return (err == 0? n_ev: err);
+}
+
+
+/*
+ * meta_notify_getevlist
+ * - collects all pending events in the named queue and allocates
+ *   an md_evlist_t * to return them
+ * - returns the number of events found (may be 0 if !WAIT) on success
+ *   or -errno and sets ep as necessary
+ */
+int
+meta_notify_getevlist(char *qname,
+			ulong_t  flags,
+			md_evlist_t **evpp_arg,
+			md_error_t *ep)
+{
+	md_ev_t		*evp		= NULL;
+	md_evlist_t	*evlp		= NULL;
+	md_evlist_t	*evlp_head	= NULL;
+	md_evlist_t	*new		= NULL;
+	int		 n_ev		= 0;
+	int		 err		= -EINVAL;
+
+	mdclrerror(ep);
+	if (!evpp_arg) {
+		(void) mdsyserror(ep, EINVAL, dgettext(TEXT_DOMAIN,
+		    "No event list pointer"));
+		goto out;
+	}
+
+	if (!qname || strlen(qname) == 0) {
+		(void) mdsyserror(ep, EINVAL, dgettext(TEXT_DOMAIN,
+		    "Null or zero-length queue name"));
+		goto out;
+	}
+
+	do {
+		if (!(evp = (md_ev_t *)Malloc(sizeof (md_ev_t)))) {
+			(void) mdsyserror(ep, ENOMEM, qname);
+			continue;
+		}
+		evp->obj_type	= EVO_EMPTY;
+		evp->setno	= EV_ALLSETS;
+		evp->ev		= EV_EMPTY;
+		evp->obj	= EV_ALLOBJS;
+		evp->uev	= 0ULL;
+
+		err = meta_notify_getev(qname, flags, evp, ep);
+
+		if (evp->ev != EV_EMPTY) {
+			new = (md_evlist_t *)Zalloc(sizeof (md_evlist_t));
+			if (evlp_head == NULL) {
+				evlp = evlp_head = new;
+			} else {
+				evlp->next = new;
+				evlp = new;
+			}
+			evlp->evp = evp;
+			n_ev++;
+		}
+
+	} while (err >= 0 && evp && evp->ev != EV_EMPTY);
+out:
+	if (err == -EAGAIN) {
+		err = 0;
+	}
+
+	if (err < 0) {
+		meta_notify_freeevlist(evlp_head);
+		evlp_head = NULL;
+		return (err);
+	} else if ((err == 0) && (evp->ev == EV_EMPTY)) {
+	    Free(evp);
+	    evp = NULL;
+	}
+
+	if (evpp_arg) {
+		*evpp_arg = evlp_head;
+	}
+
+	return (n_ev);
+}
+
+
+/*
+ * the guts of meta_notify_putev() and meta_notify_sendev()
+ * are within this function.
+ *
+ * meta_notify_putev() is intended for general use by user-level code,
+ * such as the GUI, to send user-defined events.
+ *
+ * meta_notify_sendev() is for "user-level driver" code, such as
+ * set manipulation and the multi-host daemon to generate events.
+ *
+ * Note- only convention enforces this usage.
+ */
+int
+meta_notify_doputev(md_ev_t *evp, md_error_t *ep)
+{
+	md_event_ioctl_t	evctl;
+
+	if (!evp || !ep) {
+		return (EINVAL);
+	}
+
+	/*
+	 * users may only put events of type EQ_USER
+	 */
+	init_evctl(/* qname (unused in put) */ NULL,
+			TAG_EMPTY,
+			EQ_EMPTY,
+			/* flags (unused in put) */ 0,
+			(evp->setno == EV_ALLSETS)? MD_ALLSETS: evp->setno,
+			(evp->obj == EV_ALLOBJS)? MD_ALLDEVS: evp->obj,
+			EQ_PUT,
+			evp->uev,
+			&evctl);
+
+	evctl.mdn_tag	= objlib_2_tagdrv(evp->obj_type);
+	evctl.mdn_event	= evlib_2_evdrv(evp->ev);
+
+	return (-meta_event(&evctl, ep));
+}
+
+/*
+ * meta_notify_putev
+ * - sends an event down to the notify driver (hence, all queues)
+ * - returns 0 on success or errno
+ */
+int
+meta_notify_putev(md_ev_t *evp, md_error_t *ep)
+{
+	if (!evp || !ep) {
+		return (EINVAL);
+	}
+
+	evp->ev = EV_USER;	/* by definition */
+
+	return (meta_notify_doputev(evp, ep));
+}
+
+/*
+ * alternate put event entry point which allows
+ * more control of event innards (for use by md "user-level drivers")
+ *
+ * Since this routine isn't for use by clients, the user event data
+ * is always forced to be 0. That is only meaningful for events
+ * of type EQ_USER (and those go through meta_notify_putev()), so
+ * this is consistent.
+ */
+int
+meta_notify_sendev(
+	ev_obj_t	tag,
+	set_t		set,
+	md_dev64_t	dev,
+	evid_t		ev)
+{
+	md_error_t		 status	= mdnullerror;
+	md_error_t		*ep	= &status;
+	md_ev_t			 ev_packet;
+	int			 rc;
+
+	ev_packet.obj_type	= tag;
+	ev_packet.setno		= set;
+	ev_packet.obj		= dev;
+	ev_packet.ev		= ev;
+	ev_packet.uev		= 0ULL;
+
+	rc = meta_notify_doputev(&ev_packet, ep);
+
+	if (0 == rc && !mdisok(ep)) {
+		rc = EINVAL;
+		mdclrerror(ep);
+	}
+	return (rc);
+}
+
+/*
+ * meta_notify_putevlist
+ * - sends all of the events in the event list
+ * - returns number of events sent (>= 0) on success or -errno
+ */
+int
+meta_notify_putevlist(md_evlist_t *evlp, md_error_t *ep)
+{
+	md_evlist_t	*evlpi;
+	int		 n_ev	= 0;
+	int		 err;
+
+	if (!evlp) {
+		err = 0;
+		goto out;	/* that was easy */
+	}
+
+	for (n_ev = 0, evlpi = evlp; evlpi; evlpi = evlpi->next) {
+		if ((err = meta_notify_putev(evlpi->evp, ep)) < 0) {
+			goto out;
+		}
+		n_ev++;
+	}
+out:
+	return (err != 0? err: n_ev);
+}
+
+/*
+ * meta_notify_freevlist
+ * - frees any memory allocated within the event list
+ * - returns 0 on success or errno and sets ep as necessary
+ */
+void
+meta_notify_freeevlist(md_evlist_t *evlp)
+{
+	md_evlist_t	*i;
+	md_evlist_t	*next;
+
+	for (i = evlp; i; i = i->next) {
+		if (i && i->evp) {
+			Free(i->evp);
+			i->evp = NULL;
+		}
+	}
+	for (i = evlp; i; /* NULL */) {
+		next = i->next;
+		Free(i);
+		i = next;
+	}
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_patch.c b/usr/src/lib/lvm/libmeta/common/meta_patch.c
new file mode 100644
index 0000000000..7c0ff549f1
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_patch.c
@@ -0,0 +1,299 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2003 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+/*
+ * Just in case we're not in a build environment, make sure that
+ * TEXT_DOMAIN gets set to something.
+ */
+#if !defined(TEXT_DOMAIN)
+#define	TEXT_DOMAIN "SYS_TEST"
+#endif
+
+/*
+ * patch /etc/vfstab file
+ */
+#include <meta.h>
+#include <string.h>
+
+/*
+ * patch filesystem lines into vfstab file, return tempfilename
+ */
+int
+meta_patch_vfstab(
+	char		*cmpname,	/* filesystem mount point or */
+					/* "swap" if updating swap partition */
+	mdname_t	*fsnp,		/* filesystem device name */
+	char		*vname,		/* vfstab file name */
+	char		*old_bdevname,	/* old name of block device, needed */
+					/* for deciding which of multiple   */
+					/* swap file entries to change	    */
+					/* if NULL then not changing swap   */
+	int		doit,		/* really patch file */
+	int		verbose,	/* show what we're doing */
+	char		**tname,	/* returned temp file name */
+	md_error_t	*ep		/* returned error */
+)
+{
+	char		*chrname = fsnp->rname;
+	char		*blkname = fsnp->bname;
+	FILE		*fp = NULL;
+	FILE		*tfp = NULL;
+	struct stat	sbuf;
+	char		buf[512];
+	char		cdev[512];
+	char		bdev[512];
+	char		mntpt[512];
+	char		fstype[512];
+	char		fsckpass[512];
+	char		mntboot[512];
+	char		mntopt[512];
+	int		gotfs = 0;
+	char		*cmpstr = &mntpt[0]; /* compare against mntpnt if fs, */
+						/* or fstype if swap */
+	char		*char_device = chrname;
+
+	/* check names */
+	assert(vname != NULL);
+	assert(tname != NULL);
+
+	/* get temp names */
+	*tname = NULL;
+	*tname = Malloc(strlen(vname) + strlen(".tmp") + 1);
+	(void) strcpy(*tname, vname);
+	(void) strcat(*tname, ".tmp");
+
+	/* check if going to update swap entry in file */
+	/* if so then compare against file system type */
+	if ((old_bdevname != NULL) && (strcmp("swap", cmpname) == 0)) {
+	    cmpstr = &fstype[0];
+	    char_device = &cdev[0];
+	}
+
+	/* copy vfstab file, replace filesystem line */
+	if ((fp = fopen(vname, "r")) == NULL) {
+		(void) mdsyserror(ep, errno, vname);
+		goto out;
+	}
+	if (fstat(fileno(fp), &sbuf) != 0) {
+		(void) mdsyserror(ep, errno, vname);
+		goto out;
+	}
+	if (doit) {
+		if ((tfp = fopen(*tname, "w")) == NULL) {
+			(void) mdsyserror(ep, errno, *tname);
+			goto out;
+		}
+		if (fchmod(fileno(tfp), (sbuf.st_mode & 0777)) != 0) {
+			(void) mdsyserror(ep, errno, *tname);
+			goto out;
+		}
+		if (fchown(fileno(tfp), sbuf.st_uid, sbuf.st_gid) != 0) {
+			(void) mdsyserror(ep, errno, *tname);
+			goto out;
+		}
+	}
+	while (fgets(buf, sizeof (buf), fp) != NULL) {
+
+	    /* check that have all required params from vfstab file  */
+	    /* or that the line isnt a comment	*/
+	    /* or that the fstype/mntpoint match what was passed in  */
+	    /* or that the block device matches if changing swap */
+	    /* the last check is needed since there may be multiple  */
+	    /* entries of swap in the file, and so the fstype is not */
+	    /* a sufficient check */
+		if ((sscanf(buf, "%512s %512s %512s %512s %512s %512s %512s",
+		    bdev, cdev, mntpt, fstype, fsckpass,
+		    mntboot, mntopt) != 7) ||
+		    (bdev[0] == '#') || (strcmp(cmpstr, cmpname) != 0) ||
+		    ((old_bdevname != NULL) &&
+		    (strstr(bdev, old_bdevname) == NULL))) {
+			if (doit) {
+			    if (fputs(buf, tfp) == EOF) {
+				(void) mdsyserror(ep, errno, *tname);
+				goto out;
+			    }
+			}
+			continue;
+		}
+
+		if (verbose) {
+			(void) printf(dgettext(TEXT_DOMAIN,
+			    "Delete the following line from %s:\n\n"),
+			    vname);
+			(void) printf("%s\n", buf);
+			(void) printf(
+			    dgettext(TEXT_DOMAIN,
+			    "Add the following line to %s:\n\n"),
+			    vname);
+			(void) printf("%s\t%s\t%s\t%s\t%s\t%s\t%s\n\n",
+				blkname, char_device, mntpt, fstype, fsckpass,
+				mntboot, mntopt);
+		}
+		if (doit) {
+		    if (fprintf(tfp, "%s\t%s\t%s\t%s\t%s\t%s\t%s\n",
+			blkname, char_device, mntpt, fstype, fsckpass,
+			mntboot, mntopt) == EOF) {
+			(void) mdsyserror(ep, errno, *tname);
+			goto out;
+		    }
+		}
+
+
+		gotfs = 1;
+	}
+	if (! feof(fp)) {
+		(void) mdsyserror(ep, errno, vname);
+		goto out;
+	}
+	if (! gotfs) {
+		(void) mderror(ep, MDE_VFSTAB_FILE, vname);
+		goto out;
+	}
+	if (fclose(fp) != 0) {
+		(void) mdsyserror(ep, errno, vname);
+		goto out;
+	}
+	fp = NULL;
+	if (doit) {
+		if ((fflush(tfp) != 0) ||
+		    (fsync(fileno(tfp)) != 0) ||
+		    (fclose(tfp) != 0)) {
+			(void) mdsyserror(ep, errno, *tname);
+			goto out;
+		}
+		tfp = NULL;
+	}
+
+	/* return success */
+	return (0);
+
+	/* cleanup, return error */
+out:
+	if (fp != NULL)
+		(void) fclose(fp);
+	if (tfp != NULL)
+		(void) fclose(tfp);
+	if (*tname != NULL) {
+		(void) unlink(*tname);
+		Free(*tname);
+	}
+	return (-1);
+}
+
+
+/*
+ * set filesystem device name in vfstab
+ */
+int
+meta_patch_fsdev(
+	char		*fsname,	/* filesystem mount point */
+	mdname_t	*fsnp,		/* filesystem device */
+	char		*vname,		/* vfstab file name */
+	md_error_t	*ep		/* returned error */
+)
+{
+	int		doit = 1;
+	int		verbose = 0;
+	char		*tvname = NULL;
+	int		rval = -1;
+
+	/* check names */
+	assert(fsname != NULL);
+	if (vname == NULL)
+		vname = "/etc/vfstab";
+
+	/* replace lines in vfstab */
+	if (meta_patch_vfstab(fsname, fsnp, vname, NULL, doit, verbose, &tvname,
+	    ep) != 0) {
+		goto out;
+	}
+
+	/* rename temp file on top of real one */
+	if (rename(tvname, vname) != 0) {
+		(void) mdsyserror(ep, errno, vname);
+		goto out;
+	}
+	Free(tvname);
+	tvname = NULL;
+	rval = 0;
+
+	/* cleanup, return error */
+out:
+	if (tvname != NULL) {
+		if (doit)
+			(void) unlink(tvname);
+		Free(tvname);
+	}
+	return (rval);
+}
+
+
+/*
+ * set filesystem device name in vfstab
+ */
+int
+meta_patch_swapdev(
+	mdname_t	*fsnp,		 /* filesystem device */
+	char		*vname,		 /* vfstab file name */
+	char		*old_bdevname,	 /* block device name to change */
+	md_error_t	*ep		 /* returned error */
+)
+{
+	int		doit = 1;
+	int		verbose = 0;
+	char		*tvname = NULL;
+	int		rval = -1;
+
+	/* check names */
+	if (vname == NULL)
+		vname = "/etc/vfstab";
+
+	/* replace lines in vfstab */
+	if (meta_patch_vfstab("swap", fsnp, vname, old_bdevname, doit,
+	    verbose, &tvname, ep) != 0) {
+		goto out;
+	}
+
+	/* rename temp file on top of real one */
+	if (rename(tvname, vname) != 0) {
+		(void) mdsyserror(ep, errno, vname);
+		goto out;
+	}
+	Free(tvname);
+	tvname = NULL;
+	rval = 0;
+
+	/* cleanup, return error */
+out:
+	if (tvname != NULL) {
+		if (doit)
+			(void) unlink(tvname);
+		Free(tvname);
+	}
+	return (rval);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_patch_root.c b/usr/src/lib/lvm/libmeta/common/meta_patch_root.c
new file mode 100644
index 0000000000..ac3f4b04d9
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_patch_root.c
@@ -0,0 +1,171 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 1992-1994, 2000-2002 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+/*
+ * patch /etc/system file for the root device
+ */
+
+#include <dlfcn.h>
+#include <meta.h>
+
+
+/*
+ * set root device name in md.conf and vfstab, patch in mddb locations
+ */
+int
+meta_patch_rootdev(
+	mdname_t	*rootnp,	/* root device */
+	char		*sname,		/* system file name */
+	char		*vname,		/* vfstab file name */
+	char		*cname,		/* mddb.cf file name */
+	char		*dbname,	/* md.conf file name */
+	int		doit,		/* really patch files */
+	int		verbose,	/* show what we're doing */
+	md_error_t	*ep		/* returned error */
+)
+{
+	mdsetname_t	*sp;
+	int		ismeta = metaismeta(rootnp);
+	char		*tsname = NULL;
+	FILE		*tsfp = NULL;
+	char		*dbtname = NULL;
+	FILE		*dbtfp = NULL;
+	char		*tvname = NULL;
+	int		rval = -1;
+
+	/* check names */
+	if (sname == NULL)
+		sname = "/etc/system";
+	if (vname == NULL)
+		vname = "/etc/vfstab";
+	if (cname == NULL)
+		cname = META_DBCONF;
+	if (dbname == NULL)
+		dbname = "/kernel/drv/md.conf";
+
+	/* make sure we have a local name */
+	if ((sp = metagetset(rootnp, TRUE, ep)) == NULL)
+		return (-1);
+
+	if (! metaislocalset(sp)) {
+		return (mddeverror(ep, MDE_NOT_LOCAL, rootnp->dev,
+		    rootnp->cname));
+	}
+
+	/* replace forceload and rootdev lines in system */
+	if (meta_systemfile_copy(sname, 1, 0, doit, verbose, &tsname, &tsfp,
+	    ep) != 0) {
+		goto out;
+	}
+	if (meta_systemfile_append_mdroot(rootnp, sname,
+	    tsname, tsfp, ismeta, doit, verbose, ep) != 0) {
+		goto out;
+	}
+
+	/* replace bootlist lines in /kernel/drv/md.conf */
+	if (meta_systemfile_copy(dbname, 0, 1, doit, verbose, &dbtname,
+	    &dbtfp, ep) != 0) {
+		goto out;
+	}
+	if (meta_systemfile_append_mddb(cname, dbname, dbtname, dbtfp, doit,
+	    verbose, ep) != 0) {
+		goto out;
+	}
+
+	/* force the file contents out to disk */
+	if (doit) {
+		if ((fflush(tsfp) != 0) ||
+		    (fsync(fileno(tsfp)) != 0) ||
+		    (fclose(tsfp) != 0)) {
+			(void) mdsyserror(ep, errno, tsname);
+			goto out;
+		}
+		tsfp = NULL;
+		if ((fflush(dbtfp) != 0) ||
+		    (fsync(fileno(dbtfp)) != 0) ||
+		    (fclose(dbtfp) != 0)) {
+			(void) mdsyserror(ep, errno, dbtname);
+			goto out;
+		}
+		dbtfp = NULL;
+	}
+
+	/* replace lines in vfstab */
+	if (meta_patch_vfstab("/", rootnp, vname, NULL, doit, verbose, &tvname,
+	    ep) != 0) {
+		goto out;
+	}
+
+	/* rename files, better hope both work */
+	if (doit) {
+		if (rename(tsname, sname) != 0) {
+			(void) mdsyserror(ep, errno, sname);
+			goto out;
+		}
+		Free(tsname);
+		tsname = NULL;
+		if (rename(dbtname, dbname) != 0) {
+			(void) mdsyserror(ep, errno, dbname);
+			goto out;
+		}
+		Free(dbtname);
+		dbtname = NULL;
+		if (rename(tvname, vname) != 0) {
+			(void) mdsyserror(ep, errno, vname);
+			goto out;
+		}
+		Free(tvname);
+		tvname = NULL;
+	}
+	rval = 0;
+
+	/* cleanup, return error */
+out:
+	if (tsfp != NULL)
+		(void) fclose(tsfp);
+	if (tsname != NULL) {
+		if (doit)
+			(void) unlink(tsname);
+		Free(tsname);
+	}
+	if (tvname != NULL) {
+		if (doit)
+			(void) unlink(tvname);
+		Free(tvname);
+	}
+
+	/* free the temporary files for md.conf */
+	if (dbtfp != NULL)
+		(void) fclose(dbtfp);
+	if (dbtname != NULL) {
+		if (doit)
+			(void) unlink(dbtname);
+		Free(dbtname);
+	}
+	return (rval);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_print.c b/usr/src/lib/lvm/libmeta/common/meta_print.c
new file mode 100644
index 0000000000..a539628685
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_print.c
@@ -0,0 +1,439 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+/*
+ * Just in case we're not in a build environment, make sure that
+ * TEXT_DOMAIN gets set to something.
+ */
+#if !defined(TEXT_DOMAIN)
+#define	TEXT_DOMAIN "SYS_TEST"
+#endif
+
+/*
+ * report metadevice status
+ */
+
+#include <meta.h>
+
+/*
+ * print named metadevice
+ */
+int
+meta_print_name(
+	mdsetname_t	*sp,
+	mdname_t	*namep,
+	mdnamelist_t   **nlpp,
+	char		*fname,
+	FILE		*fp,
+	mdprtopts_t	options,
+	mdnamelist_t	**lognlpp,
+	md_error_t	*ep
+)
+{
+	char		*miscname;
+
+	/* must have set */
+	assert(sp != NULL);
+
+	/* get type */
+	if ((miscname = metagetmiscname(namep, ep)) == NULL)
+		return (-1);
+
+	/* dispatch */
+	if (strcmp(miscname, MD_TRANS) == 0) {
+		return (meta_trans_print(sp, namep, nlpp, fname, fp,
+		    options, NULL, lognlpp, ep));
+	}
+	if (strcmp(miscname, MD_MIRROR) == 0) {
+		return (meta_mirror_print(sp, namep, nlpp, fname, fp,
+		    options, ep));
+	}
+	if (strcmp(miscname, MD_RAID) == 0) {
+		return (meta_raid_print(sp, namep, nlpp, fname, fp,
+		    options, ep));
+	}
+	if (strcmp(miscname, MD_STRIPE) == 0) {
+		return (meta_stripe_print(sp, namep, nlpp, fname, fp,
+		    options, ep));
+	}
+	if (strcmp(miscname, MD_SP) == 0) {
+		return (meta_sp_print(sp, namep, nlpp, fname, fp,
+		    options, ep));
+	}
+
+	/* unknown type */
+	return (mdmderror(ep, MDE_UNKNOWN_TYPE, meta_getminor(namep->dev),
+	    namep->cname));
+}
+
+/*
+ * print all metadevices
+ */
+int
+meta_print_all(
+	mdsetname_t	*sp,
+	char		*fname,
+	mdnamelist_t	**nlpp,
+	FILE		*fp,
+	mdprtopts_t	options,
+	int		*meta_print_trans_msgp,
+	md_error_t	*ep
+)
+{
+	md_error_t	status = mdnullerror;
+	int		rval = 0;
+	mdnamelist_t	*lognlp = NULL;
+
+
+	/* print various types (save first error) */
+	if (meta_trans_print(sp, NULL, nlpp, fname, fp, options,
+	    meta_print_trans_msgp, &lognlp, ep) != 0) {
+		rval = -1;
+		ep = &status;
+	}
+	if (meta_logs_print(sp, lognlp, nlpp, fname, fp, options, ep) != 0) {
+		rval = -1;
+		ep = &status;
+	}
+	metafreenamelist(lognlp);
+	if (meta_mirror_print(sp, NULL, nlpp, fname, fp, options, ep) != 0) {
+		rval = -1;
+		ep = &status;
+	}
+	if (meta_raid_print(sp, NULL, nlpp, fname, fp, options, ep) != 0) {
+		rval = -1;
+		ep = &status;
+	}
+	if (meta_stripe_print(sp, NULL, nlpp, fname, fp, options, ep) != 0) {
+		rval = -1;
+		ep = &status;
+	}
+	if (meta_sp_print(sp, NULL, nlpp, fname, fp, options, ep) != 0) {
+		rval = -1;
+		ep = &status;
+	}
+	if (meta_hsp_print(sp, NULL, nlpp, fname, fp, options, ep) != 0) {
+		rval = -1;
+		ep = &status;
+	}
+
+	/* discard further errors */
+	mdclrerror(&status);
+
+	/* return success */
+	return (rval);
+}
+
+/*
+ * format timestamp
+ */
+char *
+meta_print_time(
+	md_timeval32_t	*tvp
+)
+{
+	static char	buf[128];
+	struct tm	*tmp;
+	char		*dcmsg;
+
+	if (tvp == NULL)
+		return ("");
+
+	/*
+	 * TRANSLATION_NOTE_LC_TIME
+	 * This message is the format of file
+	 * timestamps written with the -C and
+	 * -c options.
+	 * %a -- locale's abbreviated weekday name
+	 * %b -- locale's abbreviated month name
+	 * %e -- day of month [1,31]
+	 * %T -- Time as %H:%M:%S
+	 * %Y -- Year, including the century
+	 */
+	dcmsg = dcgettext(TEXT_DOMAIN, "%a %b %e %T %Y", LC_TIME);
+
+	if (((tvp->tv_sec == 0) && (tvp->tv_usec == 0)) ||
+	    ((tmp = localtime((const time_t *)&tvp->tv_sec)) == NULL) ||
+	    (strftime(buf, sizeof (buf), dcmsg, tmp) == 0)) {
+		return (dgettext(TEXT_DOMAIN, "(invalid time)"));
+	}
+	return (buf);
+}
+
+/*
+ * format high resolution time into a tuple of seconds:milliseconds:microseconds
+ */
+char *
+meta_print_hrtime(
+	hrtime_t	secs
+)
+{
+	long long	sec, msec, usec;
+	static char	buf[128];
+
+	usec = secs / 1000;
+	msec = usec / 1000;
+	sec  = msec / 1000;
+	msec %= 1000;
+	usec %= 1000;
+
+	(void) snprintf(buf, sizeof (buf), "%4lld:%03lld:%03lld", sec, msec,
+	    usec);
+	return (buf);
+}
+
+/*
+ * Routine to print 32 bit bitmasks
+ *
+ * Takes:
+ *	fp	- a file descriptor
+ *	fmt	- optional text
+ *	ul	- unsigned long bit vector
+ *	bitfmt	- special string to map bits to words.
+ *		bitfmt is layed out as follows:
+ *			byte 0 is the output base.
+ *			byte 1 a bit position less than 32
+ *			byte 2-n text for position in byte 1
+ *			byte n+1 another bit position
+ *			byte n+2-m text for position in byte n+1
+ *				.
+ *				.
+ *				.
+ *
+ *		Eg. - "\020\001DOG\002CAT\003PIG"
+ *		Print the bitmask in hex.
+ *		If bit 1 (0x0001) is set print "<DOG>"
+ *		If bit 2 (0x0002) is set print "<CAT>"
+ *		If bit 3 (0x0004) is set print "<PIG>"
+ *		If bit 4 (0x0008) is set nothing is printed.
+ *		If bit 1 and bit 2 (0x0003) are set print <DOG,CAT>
+ *
+ *	Returns 0 on OK
+ *		EOF on error
+ *
+ *	Outputs on fp
+ *
+ */
+
+int
+meta_prbits(FILE *fp, const char *fmt, ...)
+{
+	va_list		ap;
+	unsigned long	ul;
+	int		set;
+	int		n;
+	char		*p;
+
+	va_start(ap, fmt);
+
+	if (fmt && *fmt)
+		if (fprintf(fp, fmt) == EOF)
+			return (EOF);
+
+	ul = va_arg(ap, int);
+	p = va_arg(ap, char *);
+
+	switch (*p++) {
+	    case 8:
+		if (fprintf(fp, "0%lo", ul) == EOF)
+			return (EOF);
+		break;
+
+	    case 16:
+		if (fprintf(fp, "0x%lx", ul) == EOF)
+			return (EOF);
+		break;
+
+	    default:
+	    case 10:
+		if (fprintf(fp, "%ld", ul) == EOF)
+			return (EOF);
+		break;
+	}
+
+	if (! ul)
+		return (0);
+
+	for (set = 0; (n = *p++) != '\0'; /* void */) {
+		if (ul & (1 << (n - 1))) {
+			if (fputc(set ? ',' : '<', fp) == EOF)
+				return (EOF);
+			for (/* void */; (n = *p) > ' '; ++p)
+				if (fputc(n, fp) == EOF)
+					return (EOF);
+			set = 1;
+		} else
+			for (/* void */; *p > ' '; ++p);
+	}
+	if (set)
+		if (fputc('>', fp) == EOF)
+			return (EOF);
+
+	return (0);
+}
+
+
+/*
+ * Convert a number of blocks to a string representation
+ * Input:  64 bit wide number of blocks
+ * Outout: string like "199MB" or "27TB" or "3.5GB"
+ * Returns a pointer to the buffer.
+ */
+char *
+meta_number_to_string(diskaddr_t number, u_longlong_t blk_sz)
+{
+	diskaddr_t save = 0;
+	char *M = " KMGTPE"; /* kilo, mega, giga, tera, peta, exa */
+	char *uom = M;    /* unit of measurement, initially ' ' (=M[0]) */
+	static char buf[64];
+	u_longlong_t	total_bytes;
+
+	/* convert from blocks to bytes */
+	total_bytes = number * blk_sz;
+
+	/*
+	 * Stop scaling when we reached exa bytes, then something is
+	 * probably wrong with our number.
+	 */
+	while ((total_bytes >= 1024) && (*uom != 'E')) {
+		uom++; /* next unit of measurement */
+		save = total_bytes;
+		total_bytes = total_bytes / 1024;
+	}
+
+	/* check if we should output a decimal place after the point */
+	if (save && ((save / 1024) < 10)) {
+		/* sprintf() will round for us */
+		float fnum = (float)save / 1024;
+		(void) sprintf(buf, "%1.1f %cB", fnum, *uom);
+	} else {
+		(void) sprintf(buf, "%llu %cB", total_bytes, *uom);
+	}
+	return (buf);
+}
+
+/*
+ * meta_get_tstate: get the transient state bits from the kernel.
+ * this is for use with printing out the state field in metastat.
+ * INPUT: dev64 -- devt of the metadevice
+ *	  tstatep -- return for tstate
+ *	  ep	-- error
+ * RETURN: -1 for error
+ *	    0 for success
+ */
+int
+meta_get_tstate(md_dev64_t dev64, uint_t *tstatep, md_error_t *ep)
+{
+	md_i_get_tstate_t	params;
+	minor_t			mnum = meta_getminor(dev64);
+
+	(void) memset(&params, 0, sizeof (params));
+	params.id = mnum;
+	if (metaioctl(MD_IOCGET_TSTATE, &params, &params.mde, NULL) != 0) {
+		return (mdstealerror(ep, &params.mde));
+	}
+	*tstatep = params.tstate;
+	return (0);
+}
+
+/*
+ * meta_print_devid: print out the devid information, given a mddevid_t list.
+ * INPUT: mdsetname_t	set we're looking at
+ *	  FILE	where to print to
+ *        mddevid_t list to print from.
+ *	  md_error_t	error
+ * RETURN: -1 for error
+ *          0 for success
+ */
+int
+meta_print_devid(
+	mdsetname_t	*sp,
+	FILE		*fp,
+	mddevid_t	*mddevidp,
+	md_error_t	*ep
+)
+{
+	int		len = 0;
+	mddevid_t	*tmp_mddevidp = NULL;
+	ddi_devid_t	did = NULL;
+	char		*devid = "";
+	int		freedevid = 0;
+	char		*reloc = "";
+
+
+	/* print header */
+	if (fprintf(fp, gettext("Device Relocation Information:\n")) < 0)
+		return (-1);
+
+	/*
+	 * Building a format string on the fly that will
+	 * be used in (f)printf. This allows the length
+	 * of the ctd to vary from small to large without
+	 * looking horrible.
+	 */
+
+	tmp_mddevidp = mddevidp;
+	while (tmp_mddevidp != NULL) {
+		len = max(len, strlen(tmp_mddevidp->ctdname));
+		tmp_mddevidp = tmp_mddevidp->next;
+	}
+
+	if (fprintf(fp, "%-*s %-5s\t%s\n", len + 2,
+	    gettext("Device  "),
+	    gettext("Reloc"),
+	    gettext("Device ID")) < 0)
+		return (-1);
+
+	/* print ctd's and devids */
+	while (mddevidp != NULL) {
+		did = (ddi_devid_t)
+		    meta_getdidbykey(sp->setno, getmyside(sp, ep),
+		    mddevidp->key, ep);
+
+		if (did == (ddi_devid_t)NULL) {
+			devid = "-";
+			reloc = gettext("No ");
+			freedevid = 0;
+		} else {
+			devid = devid_str_encode(did, NULL);
+			reloc = gettext("Yes");
+			freedevid = 1;
+			Free(did);
+		}
+
+		if (fprintf(fp, "%-*s %-5s\t%s\n", len + 2, mddevidp->ctdname,
+		    reloc, devid) < 0)
+			return (-1);
+
+		mddevidp = mddevidp->next;
+
+		if (freedevid == 1)
+			devid_str_free(devid);
+	}
+	return (0);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_raid.c b/usr/src/lib/lvm/libmeta/common/meta_raid.c
new file mode 100644
index 0000000000..cce31ad3fa
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_raid.c
@@ -0,0 +1,2784 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+/*
+ * Just in case we're not in a build environment, make sure that
+ * TEXT_DOMAIN gets set to something.
+ */
+#if !defined(TEXT_DOMAIN)
+#define	TEXT_DOMAIN "SYS_TEST"
+#endif
+
+/*
+ * RAID operations
+ */
+
+#include <stdlib.h>
+#include <meta.h>
+#include <sys/lvm/md_raid.h>
+#include <sys/lvm/mdvar.h>
+#include <sys/lvm/md_convert.h>
+#include <stddef.h>
+
+/*
+ * FUNCTION:    meta_get_raid_names()
+ * INPUT:       sp      - the set name to get raid from
+ *              options - options from the command line
+ * OUTPUT:      nlpp    - list of all raid names
+ *              ep      - return error pointer
+ * RETURNS:     int     - -1 if error, 0 success
+ * PURPOSE:     returns a list of all raid in the metadb
+ *              for all devices in the specified set
+ */
+int
+meta_get_raid_names(
+	mdsetname_t	*sp,
+	mdnamelist_t	**nlpp,
+	int		options,
+	md_error_t	*ep
+)
+{
+	return (meta_get_names(MD_RAID, sp, nlpp, options, ep));
+}
+
+/*
+ * free raid unit
+ */
+void
+meta_free_raid(
+	md_raid_t	*raidp
+)
+{
+	if (raidp->cols.cols_val != NULL) {
+		assert(raidp->cols.cols_len > 0);
+		Free(raidp->cols.cols_val);
+	}
+	Free(raidp);
+}
+
+/*
+ * get raid (common)
+ */
+md_raid_t *
+meta_get_raid_common(
+	mdsetname_t		*sp,
+	mdname_t		*raidnp,
+	int			fast,
+	md_error_t		*ep
+)
+{
+	mddrivename_t		*dnp = raidnp->drivenamep;
+	char			*miscname;
+	mr_unit_t		*mr;
+	md_raid_t		*raidp;
+	uint_t			ncol;
+	uint_t			col;
+	md_resync_ioctl_t	ri;
+
+	/* must have set */
+	assert(sp != NULL);
+	assert(sp->setno == MD_MIN2SET(meta_getminor(raidnp->dev)));
+
+	/* short circuit */
+	if (dnp->unitp != NULL) {
+		assert(dnp->unitp->type == MD_METARAID);
+		return ((md_raid_t *)dnp->unitp);
+	}
+
+	/* get miscname and unit */
+	if ((miscname = metagetmiscname(raidnp, ep)) == NULL)
+		return (NULL);
+	if (strcmp(miscname, MD_RAID) != 0) {
+		(void) mdmderror(ep, MDE_NOT_RAID, meta_getminor(raidnp->dev),
+		    raidnp->cname);
+		return (NULL);
+	}
+	if ((mr = (mr_unit_t *)meta_get_mdunit(sp, raidnp, ep)) == NULL)
+		return (NULL);
+	assert(mr->c.un_type == MD_METARAID);
+
+	/* allocate raid */
+	raidp = Zalloc(sizeof (*raidp));
+
+	/* allocate columns */
+	ncol = mr->un_totalcolumncnt;
+	assert(ncol >= MD_RAID_MIN);
+	raidp->cols.cols_len = ncol;
+	raidp->cols.cols_val = Zalloc(raidp->cols.cols_len *
+	    sizeof (*raidp->cols.cols_val));
+
+	/* get common info */
+	raidp->common.namep = raidnp;
+	raidp->common.type = mr->c.un_type;
+	raidp->common.state = mr->c.un_status;
+	raidp->common.capabilities = mr->c.un_capabilities;
+	raidp->common.parent = mr->c.un_parent;
+	raidp->common.size = mr->c.un_total_blocks;
+	raidp->common.user_flags = mr->c.un_user_flags;
+	raidp->common.revision = mr->c.un_revision;
+
+	/* get options */
+	raidp->state = mr->un_state;
+	raidp->timestamp = mr->un_timestamp;
+	raidp->interlace = mr->un_segsize;
+	raidp->orig_ncol = mr->un_origcolumncnt;
+	raidp->column_size = mr->un_segsize * mr->un_segsincolumn;
+	raidp->pw_count = mr->un_pwcnt;
+	assert(raidp->orig_ncol <= ncol);
+	if ((mr->un_hsp_id != MD_HSP_NONE) &&
+	    ((raidp->hspnamep = metahsphspname(&sp, mr->un_hsp_id,
+	    ep)) == NULL)) {
+		goto out;
+	}
+
+	/* get columns, update unit state */
+	for (col = 0; (col < ncol); ++col) {
+		mr_column_t	*rcp = &mr->un_column[col];
+		md_raidcol_t	*mdrcp = &raidp->cols.cols_val[col];
+
+		/* get column name */
+		mdrcp->colnamep = metakeyname(&sp, rcp->un_orig_key, fast, ep);
+		if (mdrcp->colnamep == NULL)
+			goto out;
+
+		/* override any start_blk */
+#ifdef	DEBUG
+		if (metagetstart(sp, mdrcp->colnamep, ep) !=
+		    MD_DISKADDR_ERROR) {
+			assert(mdrcp->colnamep->start_blk <=
+			    rcp->un_orig_devstart);
+		} else {
+			mdclrerror(ep);
+		}
+#endif	/* DEBUG */
+		mdrcp->colnamep->start_blk = rcp->un_orig_devstart;
+
+		/* if hotspared */
+		if (HOTSPARED(mr, col)) {
+			/* get hotspare name */
+			mdrcp->hsnamep = metakeyname(&sp, rcp->un_hs_key,
+			    fast, ep);
+			if (mdrcp->hsnamep == NULL)
+				goto out;
+
+			if (getenv("META_DEBUG_START_BLK") != NULL) {
+				if (metagetstart(sp, mdrcp->hsnamep, ep) ==
+				    MD_DISKADDR_ERROR)
+					mdclrerror(ep);
+
+				if ((mdrcp->hsnamep->start_blk == 0) &&
+				    (rcp->un_hs_pwstart != 0))
+					md_eprintf(dgettext(TEXT_DOMAIN,
+					    "%s: suspected bad start block,"
+					    " seems labelled [raid]\n"),
+					    mdrcp->hsnamep->cname);
+
+				if ((mdrcp->hsnamep->start_blk > 0) &&
+				    (rcp->un_hs_pwstart == 0))
+					md_eprintf(dgettext(TEXT_DOMAIN,
+					    "%s: suspected bad start block, "
+					    " seems unlabelled [raid]\n"),
+					    mdrcp->hsnamep->cname);
+			}
+
+			/* override any start_blk */
+			mdrcp->hsnamep->start_blk = rcp->un_hs_devstart;
+		}
+
+		/* get state, flags, and timestamp */
+		mdrcp->state = rcp->un_devstate;
+		mdrcp->flags = rcp->un_devflags;
+		mdrcp->timestamp = rcp->un_devtimestamp;
+	}
+
+	/* get resync info */
+	(void) memset(&ri, 0, sizeof (ri));
+	ri.ri_mnum = meta_getminor(raidnp->dev);
+	MD_SETDRIVERNAME(&ri, MD_RAID, sp->setno);
+	if (metaioctl(MD_IOCGETSYNC, &ri, &ri.mde, raidnp->cname) != 0) {
+		(void) mdstealerror(ep, &ri.mde);
+		goto out;
+	}
+	raidp->resync_flags = ri.ri_flags;
+	raidp->percent_dirty = ri.ri_percent_dirty;
+	raidp->percent_done = ri.ri_percent_done;
+
+	/* cleanup, return success */
+	Free(mr);
+	dnp->unitp = (md_common_t *)raidp;
+	return (raidp);
+
+	/* cleanup, return error */
+out:
+	Free(mr);
+	meta_free_raid(raidp);
+	return (NULL);
+}
+
+/*
+ * get raid
+ */
+md_raid_t *
+meta_get_raid(
+	mdsetname_t		*sp,
+	mdname_t		*raidnp,
+	md_error_t		*ep
+)
+{
+	return (meta_get_raid_common(sp, raidnp, 0, ep));
+}
+
+/*
+ * check raid for dev
+ */
+static int
+in_raid(
+	mdsetname_t	*sp,
+	mdname_t	*raidnp,
+	mdname_t	*np,
+	diskaddr_t	slblk,
+	diskaddr_t	nblks,
+	md_error_t	*ep
+)
+{
+	md_raid_t	*raidp;
+	uint_t		col;
+
+	/* should be in the same set */
+	assert(sp != NULL);
+	assert(sp->setno == MD_MIN2SET(meta_getminor(raidnp->dev)));
+
+	/* get unit */
+	if ((raidp = meta_get_raid(sp, raidnp, ep)) == NULL)
+		return (-1);
+
+	/* look in columns */
+	for (col = 0; (col < raidp->cols.cols_len); ++col) {
+		md_raidcol_t	*cp = &raidp->cols.cols_val[col];
+		mdname_t	*colnp = cp->colnamep;
+		diskaddr_t	col_sblk;
+		int		err;
+
+		/* check same drive since metagetstart() can fail */
+		if ((err = meta_check_samedrive(np, colnp, ep)) < 0)
+			return (-1);
+		else if (err == 0)
+			continue;
+
+		/* check overlap */
+		if ((col_sblk = metagetstart(sp, colnp, ep)) ==
+		    MD_DISKADDR_ERROR)
+			return (-1);
+		if (meta_check_overlap(raidnp->cname, np, slblk, nblks,
+		    colnp, col_sblk, -1, ep) != 0) {
+			return (-1);
+		}
+	}
+
+	/* return success */
+	return (0);
+}
+
+/*
+ * check to see if we're in a raid
+ */
+int
+meta_check_inraid(
+	mdsetname_t	*sp,
+	mdname_t	*np,
+	diskaddr_t	slblk,
+	diskaddr_t	nblks,
+	md_error_t	*ep
+)
+{
+	mdnamelist_t	*raidnlp = NULL;
+	mdnamelist_t	*p;
+	int		rval = 0;
+
+	/* should have a set */
+	assert(sp != NULL);
+
+	/* for each raid */
+	if (meta_get_raid_names(sp, &raidnlp, 0, ep) < 0)
+		return (-1);
+	for (p = raidnlp; (p != NULL); p = p->next) {
+		mdname_t	*raidnp = p->namep;
+
+		/* check raid */
+		if (in_raid(sp, raidnp, np, slblk, nblks, ep) != 0) {
+			rval = -1;
+			break;
+		}
+	}
+
+	/* cleanup, return success */
+	metafreenamelist(raidnlp);
+	return (rval);
+}
+
+/*
+ * check column
+ */
+int
+meta_check_column(
+	mdsetname_t	*sp,
+	mdname_t	*np,
+	md_error_t	*ep
+)
+{
+	mdchkopts_t	options = (MDCHK_ALLOW_MDDB);
+
+	/* check for soft partitions */
+	if (meta_sp_issp(sp, np, ep) != 0) {
+		/* make sure we have a disk */
+		if (metachkcomp(np, ep) != 0)
+			return (-1);
+	}
+
+	/* check to ensure that it is not already in use */
+	if (meta_check_inuse(sp, np, MDCHK_INUSE, ep) != 0) {
+		return (-1);
+	}
+
+	/* make sure it is in the set */
+	if (meta_check_inset(sp, np, ep) != 0)
+		return (-1);
+
+	/* make sure its not in a metadevice */
+	if (meta_check_inmeta(sp, np, options, 0, -1, ep) != 0)
+		return (-1);
+
+	/* return success */
+	return (0);
+}
+
+/*
+ * print raid
+ */
+static int
+raid_print(
+	md_raid_t	*raidp,
+	char		*fname,
+	FILE		*fp,
+	mdprtopts_t	options,
+	md_error_t	*ep
+)
+{
+	uint_t		col;
+	int		rval = -1;
+
+
+	if (options & PRINT_LARGEDEVICES) {
+		if (raidp->common.revision != MD_64BIT_META_DEV) {
+			rval = 0;
+			goto out;
+		}
+	}
+
+	/* print name and -r */
+	if (fprintf(fp, "%s -r", raidp->common.namep->cname) == EOF)
+		goto out;
+
+	/* print columns */
+	for (col = 0; (col < raidp->cols.cols_len); ++col) {
+		md_raidcol_t	*mdrcp = &raidp->cols.cols_val[col];
+
+		/* print column */
+		/*
+		 * If the path is our standard /dev/rdsk or /dev/md/rdsk
+		 * then just print out the cxtxdxsx or the dx, metainit
+		 * will assume the default, otherwise we need the full
+		 * pathname to make sure this works as we intend.
+		 */
+		if ((strstr(mdrcp->colnamep->rname, "/dev/rdsk") == NULL) &&
+		    (strstr(mdrcp->colnamep->rname, "/dev/md/rdsk") == NULL) &&
+		    (strstr(mdrcp->colnamep->rname, "/dev/td/") == NULL)) {
+			/* not standard path, print full pathname */
+			if (fprintf(fp, " %s", mdrcp->colnamep->rname) == EOF)
+				goto out;
+		} else {
+			/* standard path so print ctd or d number */
+			if (fprintf(fp, " %s", mdrcp->colnamep->cname) == EOF)
+				goto out;
+		}
+	}
+
+	if (fprintf(fp, " -k") == EOF)
+		goto out;
+
+	/* print options */
+	if (fprintf(fp, " -i %lldb", raidp->interlace) == EOF)
+		goto out;
+
+	if (raidp->pw_count != PWCNT_MIN)
+		if (fprintf(fp, " -w %d", raidp->pw_count) == EOF)
+			goto out;
+
+	if (raidp->hspnamep != NULL) {
+		if (fprintf(fp, " -h %s", raidp->hspnamep->hspname) == EOF)
+			goto out;
+	}
+	if (raidp->orig_ncol != raidp->cols.cols_len) {
+		assert(raidp->orig_ncol < raidp->cols.cols_len);
+		if (fprintf(fp, " -o %u", raidp->orig_ncol) == EOF)
+			goto out;
+	}
+
+	/* terminate last line */
+	if (fprintf(fp, "\n") == EOF)
+		goto out;
+
+	/* success */
+	rval = 0;
+
+	/* cleanup, return error */
+out:
+	if (rval != 0)
+		(void) mdsyserror(ep, errno, fname);
+	return (rval);
+}
+
+static int
+find_resyncing_column(
+	md_raid_t *raidp
+)
+{
+	int		col;
+
+	for (col = 0; (col < raidp->cols.cols_len); ++col) {
+		md_raidcol_t	*cp = &raidp->cols.cols_val[col];
+		if (cp->state & RCS_RESYNC)
+			return (col);
+	}
+
+	/* No resyncing columns */
+	return (-1);
+}
+
+/*
+ * convert raid state to name
+ */
+char *
+raid_state_to_name(
+	md_raid_t	*raidp,
+	md_timeval32_t	*tvp,
+	uint_t		tstate /* Errored tstate flags */
+)
+{
+
+	/* grab time */
+	if (tvp != NULL)
+		*tvp = raidp->timestamp;
+
+	/*
+	 * If the device has a transient error state (due to it being DR'ed or
+	 * failed) and there has been no I/O to it (the actual device is still
+	 * marked as 'Okay') then we cannot know what the state is or what
+	 * action to take on it. Therefore report the device as 'Unavailable'.
+	 * A subsequent I/O to the device will cause the 'Okay' status to
+	 * disappear if the device is actually gone and then we will print out
+	 * the appropriate status.  The MD_INACCESSIBLE state is only set
+	 * on the raid when we open it or probe it.  One the raid is open
+	 * then we will just have regular error status on the device.
+	 */
+	if (tstate & MD_INACCESSIBLE) {
+		return (dgettext(TEXT_DOMAIN, "Unavailable"));
+	}
+
+	/* resyncing */
+	if (find_resyncing_column(raidp) >= 0)
+		return (dgettext(TEXT_DOMAIN, "Resyncing"));
+
+	/* everything else */
+	switch (raidp->state) {
+		case RUS_INIT :
+			return (dgettext(TEXT_DOMAIN, "Initializing"));
+		case RUS_OKAY :
+			return (dgettext(TEXT_DOMAIN, "Okay"));
+		case RUS_ERRED :
+		/*FALLTHROUGH*/
+		case RUS_LAST_ERRED :
+			return (dgettext(TEXT_DOMAIN, "Needs Maintenance"));
+		case RUS_DOI :
+			return (dgettext(TEXT_DOMAIN, "Initialization Failed"));
+		case RUS_REGEN :
+			return (dgettext(TEXT_DOMAIN, "Regen"));
+		default :
+			return (dgettext(TEXT_DOMAIN, "invalid"));
+	} /* switch */
+}
+
+static int
+find_erred_column(md_raid_t *raidp, rcs_state_t state)
+{
+	int		col;
+
+	for (col = 0; (col < raidp->cols.cols_len); ++col) {
+		md_raidcol_t	*cp = &raidp->cols.cols_val[col];
+		if (cp->state & state)
+			return (col);
+	}
+
+	/* No erred columns */
+	return (-1);
+}
+
+/*
+ * convert raid state to repair action
+ */
+char *
+raid_state_to_action(md_raid_t *raidp)
+{
+	static char	emsg[1024];
+	mdname_t	*raidnp = raidp->common.namep;
+	int		err_col;
+
+	/* first check for full init failure */
+	if (raidp->state & RUS_DOI) {
+		(void) snprintf(emsg, sizeof (emsg),
+		    "metaclear -f %s", raidnp->cname);
+		return (emsg);
+	}
+
+	/* replace errored or init errored raid column */
+	if ((err_col = find_erred_column(raidp,
+	    (RCS_ERRED | RCS_INIT_ERRED))) >= 0) {
+		mdname_t	*colnp;
+
+		/* get column with error */
+		assert(err_col < raidp->cols.cols_len);
+		colnp = raidp->cols.cols_val[err_col].colnamep;
+		(void) snprintf(emsg, sizeof (emsg),
+		    "metareplace %s%s %s <%s>",
+		    ((raidp->state == RUS_LAST_ERRED) ? "-f " : ""),
+		    raidnp->cname, colnp->cname,
+		    dgettext(TEXT_DOMAIN, "new device"));
+		return (emsg);
+	}
+
+
+	/* replace last errored raid column */
+	if ((err_col = find_erred_column(raidp, RCS_LAST_ERRED)) >= 0) {
+		mdname_t	*colnp;
+
+		assert(err_col < raidp->cols.cols_len);
+		colnp = raidp->cols.cols_val[err_col].colnamep;
+		(void) snprintf(emsg, sizeof (emsg),
+		    "metareplace %s %s %s <%s>",
+		    ((raidp->state == RUS_LAST_ERRED) ? "-f " : ""),
+		    raidnp->cname, colnp->cname,
+		    dgettext(TEXT_DOMAIN, "new device"));
+		return (emsg);
+	}
+
+	/* OK */
+	return (NULL);
+}
+
+/*
+ * get printable raid column state
+ */
+char *
+raid_col_state_to_name(
+	md_raidcol_t	*colp,
+	md_timeval32_t	*tvp,
+	uint_t		tstate
+)
+{
+	/* grab time */
+	if (tvp != NULL)
+		*tvp = colp->timestamp;
+
+	if (tstate != 0) {
+		return (dgettext(TEXT_DOMAIN, "Unavailable"));
+	}
+
+	/* everything else */
+	switch (colp->state) {
+	case RCS_INIT:
+		return (dgettext(TEXT_DOMAIN, "Initializing"));
+
+	case RCS_OKAY:
+		return (dgettext(TEXT_DOMAIN, "Okay"));
+
+	case RCS_INIT_ERRED:
+	/*FALLTHROUGH*/
+	case RCS_ERRED:
+		return (dgettext(TEXT_DOMAIN, "Maintenance"));
+
+	case RCS_LAST_ERRED:
+		return (dgettext(TEXT_DOMAIN, "Last Erred"));
+
+	case RCS_RESYNC:
+		return (dgettext(TEXT_DOMAIN, "Resyncing"));
+
+	default:
+		return (dgettext(TEXT_DOMAIN, "Unknown"));
+	}
+}
+
+/*
+ * print raid column
+ */
+static int
+display_raid_device_info(
+	mdsetname_t	*sp,
+	md_raidcol_t	*colp,
+	char		*fname,
+	FILE		*fp,
+	mdprtopts_t	options,
+	int		print_len,
+	uint_t		top_tstate, /* Errored tstate flags */
+	md_error_t	*ep
+)
+{
+	mdname_t	*namep = ((colp->hsnamep != NULL) ?
+				    colp->hsnamep : colp->colnamep);
+	char 		*devid = "";
+	char		*cname = colp->colnamep->cname;
+	diskaddr_t	start_blk;
+	int		has_mddb;
+	char		*has_mddb_str;
+	char		*col_state;
+	md_timeval32_t	tv;
+	char		*hsname = ((colp->hsnamep != NULL) ?
+			    colp->hsnamep->cname : "");
+	int		rval = -1;
+	mdname_t	*didnp = NULL;
+	ddi_devid_t	dtp;
+	uint_t		tstate = 0;
+
+	/* get info */
+	if ((start_blk = metagetstart(sp, namep, ep)) == MD_DISKADDR_ERROR)
+		return (-1);
+	if ((has_mddb = metahasmddb(sp, namep, ep)) < 0)
+		return (-1);
+	if (has_mddb)
+		has_mddb_str = dgettext(TEXT_DOMAIN, "Yes");
+	else
+		has_mddb_str = dgettext(TEXT_DOMAIN, "No");
+
+	if (metaismeta(namep)) {
+		if (meta_get_tstate(namep->dev, &tstate, ep) != 0)
+			return (-1);
+		col_state = raid_col_state_to_name(colp, &tv,
+		    tstate & MD_DEV_ERRORED);
+	} else {
+		/*
+		 * if top_tstate is set, that implies that you have
+		 * a ctd type device with an unavailable metadevice
+		 * on top of it. If so, print a - for it's state
+		 */
+		if (top_tstate != 0)
+			col_state = "-";
+		else
+			col_state = raid_col_state_to_name(colp, &tv, tstate);
+	}
+
+	/* populate the key in the name_p structure */
+	if ((didnp = metadevname(&sp, namep->dev, ep)) == NULL)
+		return (-1);
+
+	/* determine if devid does NOT exist */
+	if (options & PRINT_DEVID) {
+		if ((dtp = meta_getdidbykey(sp->setno, getmyside(sp, ep),
+			didnp->key, ep)) == NULL)
+			devid = dgettext(TEXT_DOMAIN, "No ");
+		else {
+			devid = dgettext(TEXT_DOMAIN, "Yes");
+			free(dtp);
+		}
+	}
+	/* print column */
+	/*
+	 * Building a format string on the fly that will
+	 * be used in (f)printf. This allows the length
+	 * of the ctd to vary from small to large without
+	 * looking horrible.
+	 */
+	if (! (options & PRINT_TIMES)) {
+		if (fprintf(fp,
+		    "\t%-*.*s %8lld     %5.5s %12.12s %5.5s %s\n",
+		    print_len, print_len, cname, start_blk, has_mddb_str,
+		    col_state, devid, hsname) == EOF) {
+			goto out;
+		}
+	} else {
+		char	*timep = meta_print_time(&tv);
+
+		if (fprintf(fp,
+		    "\t%-*s %5lld %-5s %-11s %-5s %-9s %s\n",
+		    print_len, cname, start_blk, has_mddb_str,
+		    col_state, devid, hsname, timep) == EOF) {
+			goto out;
+		}
+	}
+
+	/* success */
+	rval = 0;
+
+	/* cleanup, return error */
+out:
+	if (rval != 0)
+		(void) mdsyserror(ep, errno, fname);
+
+	return (rval);
+}
+
+/*
+ * print raid options
+ */
+int
+meta_print_raid_options(
+	mdhspname_t	*hspnamep,
+	char		*fname,
+	FILE		*fp,
+	md_error_t	*ep
+)
+{
+	char		*hspname = ((hspnamep != NULL) ? hspnamep->hspname :
+					dgettext(TEXT_DOMAIN, "none"));
+	int		rval = -1;
+
+	/* print options */
+	if (fprintf(fp, dgettext(TEXT_DOMAIN,
+	    "    Hot spare pool: %s\n"), hspname) == EOF) {
+		goto out;
+	}
+
+	/* success */
+	rval = 0;
+
+	/* cleanup, return error */
+out:
+	if (rval != 0)
+		(void) mdsyserror(ep, errno, fname);
+	return (rval);
+}
+
+/*
+ * report raid
+ */
+static int
+raid_report(
+	mdsetname_t	*sp,
+	md_raid_t	*raidp,
+	char		*fname,
+	FILE		*fp,
+	mdprtopts_t	options,
+	md_error_t	*ep
+)
+{
+	char		*p;
+	uint_t		ncol = raidp->cols.cols_len;
+	uint_t		orig_ncol = raidp->orig_ncol;
+	diskaddr_t	column_size = raidp->column_size;
+	char		*raid_state;
+	md_timeval32_t	tv;
+	char		*timep;
+	uint_t		col;
+	int		rval = -1;
+	int		len = 0;
+	uint_t		tstate = 0;
+
+	if (options & PRINT_LARGEDEVICES) {
+		if (raidp->common.revision != MD_64BIT_META_DEV) {
+			rval = 0;
+			goto out;
+		}
+	}
+
+	/* print header */
+	if (options & PRINT_HEADER) {
+		if (fprintf(fp, dgettext(TEXT_DOMAIN, "%s: RAID\n"),
+		    raidp->common.namep->cname) == EOF) {
+			goto out;
+		}
+
+	}
+
+	/* print state */
+	if (metaismeta(raidp->common.namep)) {
+		if (meta_get_tstate(raidp->common.namep->dev, &tstate, ep) != 0)
+			return (-1);
+	}
+	tstate &= MD_DEV_ERRORED; /* extract the errored tstate bits */
+	raid_state = raid_state_to_name(raidp, &tv, tstate);
+	if (options & PRINT_TIMES) {
+		timep = meta_print_time(&tv);
+	} else {
+		timep = "";
+	}
+
+	if (fprintf(fp, dgettext(TEXT_DOMAIN, "    State: %-12s %s\n"),
+	    raid_state, timep) == EOF) {
+		goto out;
+	}
+
+	/*
+	 * Display recovery action if we're marked in the Unavailable state.
+	 */
+	if ((tstate == 0) || (tstate & MD_INACCESSIBLE)) {
+		/* print what to do */
+		if (tstate & MD_INACCESSIBLE) {
+			char sname[MD_MAX_SETNAME + 3]; /* 3 = sizeof("-s ") */
+
+			if (metaislocalset(sp)) {
+				sname[0] = '\0';
+			} else {
+				(void) snprintf(sname, MD_MAX_SETNAME + 3,
+				    "-s %s", sp->setname);
+			}
+			if (fprintf(fp, dgettext(TEXT_DOMAIN,
+			    "    Invoke: metastat -i %s\n"), sname) == EOF) {
+				goto out;
+			}
+		} else if ((p = raid_state_to_action(raidp)) != NULL) {
+			if (fprintf(fp, dgettext(TEXT_DOMAIN,
+			    "    Invoke: %s\n"), p) == EOF) {
+				goto out;
+			}
+		}
+
+		/* resync status */
+		if (raidp->resync_flags & MD_RI_INPROGRESS) {
+			if (fprintf(fp, dgettext(TEXT_DOMAIN,
+			    "    Resync in progress: %2d.%1d%% done\n"),
+			    raidp->percent_done/10,
+			    raidp->percent_done % 10) == EOF) {
+				goto out;
+			}
+		} else if (raidp->resync_flags & MD_GROW_INPROGRESS) {
+			if (fprintf(fp, dgettext(TEXT_DOMAIN,
+			    "    Initialization in progress: %2d.%1d%% "
+			    "done\n"),
+			    raidp->percent_done/10,
+			    raidp->percent_done % 10) == EOF) {
+				goto out;
+			}
+		} else if (raidp->state & RUS_REGEN) {
+			if (fprintf(fp, dgettext(TEXT_DOMAIN,
+			    "    Parity regeneration in progress: %2d.%1d%% "
+			    "done\n"),
+			    raidp->percent_done/10,
+			    raidp->percent_done % 10) == EOF) {
+				goto out;
+			}
+		}
+	}
+
+	/* print hotspare pool */
+	if (raidp->hspnamep != NULL) {
+		if (meta_print_raid_options(raidp->hspnamep,
+		    fname, fp, ep) != 0) {
+			return (-1);
+		}
+	}
+
+	/* print interlace */
+	if (fprintf(fp, dgettext(TEXT_DOMAIN, "    Interlace: %lld blocks\n"),
+	    raidp->interlace) == EOF) {
+		goto out;
+	}
+
+	/* print size */
+	if (fprintf(fp, dgettext(TEXT_DOMAIN, "    Size: %lld blocks (%s)\n"),
+	    raidp->common.size,
+	    meta_number_to_string(raidp->common.size, DEV_BSIZE)) == EOF) {
+		goto out;
+	}
+
+	/* MD_DEBUG stuff */
+	if (options & PRINT_DEBUG) {
+		mdname_t	*raidnp = raidp->common.namep;
+		mr_unit_t	*mr;
+
+		/* get additional info */
+		if ((mr = (mr_unit_t *)meta_get_mdunit(sp, raidnp, ep)) == NULL)
+			return (-1);
+		assert(mr->c.un_type == MD_METARAID);
+
+		/* print prewrite count and size */
+		if (fprintf(fp, dgettext(TEXT_DOMAIN,
+		    "    Prewrite Count: %u slots\n"),
+		    mr->un_pwcnt) == EOF) {
+			Free(mr);
+			goto out;
+		}
+		if (fprintf(fp, dgettext(TEXT_DOMAIN,
+		    "    Prewrite Slot Size: %u blocks\n"),
+		    (mr->un_pwsize / mr->un_pwcnt)) == EOF) {
+			Free(mr);
+			goto out;
+		}
+		if (fprintf(fp, dgettext(TEXT_DOMAIN,
+		    "    Prewrite Total Size: %u blocks\n"),
+		    mr->un_pwsize) == EOF) {
+			Free(mr);
+			goto out;
+		}
+		Free(mr);
+	}
+
+	/* print original devices */
+	if (fprintf(fp, dgettext(TEXT_DOMAIN, "Original device:\n")) == EOF)
+		goto out;
+	if (fprintf(fp, dgettext(TEXT_DOMAIN, "    Size: %lld blocks (%s)\n"),
+	    column_size * (orig_ncol - 1),
+	    meta_number_to_string(column_size * (orig_ncol - 1), DEV_BSIZE))
+	    == EOF) {
+		goto out;
+	}
+	/*
+	 * Building a format string on the fly that will
+	 * be used in (f)printf. This allows the length
+	 * of the ctd to vary from small to large without
+	 * looking horrible.
+	 */
+	for (col = 0; (col < orig_ncol); ++col) {
+		len = max(len,
+		    strlen(raidp->cols.cols_val[col].colnamep->cname));
+	}
+
+	len = max(len, strlen(dgettext(TEXT_DOMAIN, "Device")));
+	len += 2;
+
+	if (! (options & PRINT_TIMES)) {
+		if (fprintf(fp,
+		    "\t%-*.*s %-12.12s %-5.5s %12.12s %-5.5s  %s\n",
+		    len, len,
+		    dgettext(TEXT_DOMAIN, "Device"),
+		    dgettext(TEXT_DOMAIN, "Start Block"),
+		    dgettext(TEXT_DOMAIN, "Dbase"),
+		    dgettext(TEXT_DOMAIN, "State"),
+		    dgettext(TEXT_DOMAIN, "Reloc"),
+		    dgettext(TEXT_DOMAIN, "Hot Spare")) == EOF) {
+			goto out;
+		}
+	} else {
+		if (fprintf(fp,
+		    "\t%-*s  %5s  %-5s  %-11s  %-5s   %-9s  %s\n",
+		    len,
+		    dgettext(TEXT_DOMAIN, "Device"),
+		    dgettext(TEXT_DOMAIN, "Start"),
+		    dgettext(TEXT_DOMAIN, "Dbase"),
+		    dgettext(TEXT_DOMAIN, "State"),
+		    dgettext(TEXT_DOMAIN, "Reloc"),
+		    dgettext(TEXT_DOMAIN, "Hot Spare"),
+		    dgettext(TEXT_DOMAIN, "Time")) == EOF) {
+			goto out;
+		}
+	}
+	for (col = 0; (col < orig_ncol); ++col) {
+		md_raidcol_t	*mdrcp = &raidp->cols.cols_val[col];
+
+		if (display_raid_device_info(sp, mdrcp, fname, fp, options,
+		    len, tstate, ep) != 0) {
+			return (-1);
+		}
+	}
+
+	/* print concatenated devices */
+	if (col < ncol) {
+		if (fprintf(fp, dgettext(TEXT_DOMAIN,
+		    "Concatenated Devices:\n")) == EOF) {
+			goto out;
+		}
+		if (fprintf(fp, dgettext(TEXT_DOMAIN,
+		    "    Size: %lld blocks (%s)\n"),
+		    column_size * (ncol - orig_ncol),
+		    meta_number_to_string(column_size * (ncol - orig_ncol),
+		    DEV_BSIZE))
+		    == EOF) {
+			goto out;
+		}
+		/*
+		 * This allows the length
+		 * of the ctd to vary from small to large without
+		 * looking horrible.
+		 */
+		if (! (options & PRINT_TIMES)) {
+			if (fprintf(fp,
+			    "\t%-*.*s %-12.12s %-5.5s %-12.12s %5.5s %s\n",
+			    len, len,
+			    dgettext(TEXT_DOMAIN, "Device"),
+			    dgettext(TEXT_DOMAIN, "Start Block"),
+			    dgettext(TEXT_DOMAIN, "Dbase"),
+			    dgettext(TEXT_DOMAIN, "State"),
+			    dgettext(TEXT_DOMAIN, "Reloc"),
+			    dgettext(TEXT_DOMAIN, "Hot Spare")) == EOF) {
+				goto out;
+			}
+		} else {
+			if (fprintf(fp,
+			    "\t%-*s %5s %-5s %-11s %-9s %s\t%s\n",
+			    len,
+			    dgettext(TEXT_DOMAIN, "Device"),
+			    dgettext(TEXT_DOMAIN, "Start"),
+			    dgettext(TEXT_DOMAIN, "Dbase"),
+			    dgettext(TEXT_DOMAIN, "State"),
+			    dgettext(TEXT_DOMAIN, "Reloc"),
+			    dgettext(TEXT_DOMAIN, "Hot Spare"),
+			    dgettext(TEXT_DOMAIN, "Time")) == EOF) {
+				goto out;
+			}
+		}
+		assert(col == orig_ncol);
+		for (/* void */; (col < ncol); col++) {
+			md_raidcol_t	*mdrcp = &raidp->cols.cols_val[col];
+
+			if (display_raid_device_info(sp, mdrcp, fname, fp,
+			    options, len, tstate, ep) != 0) {
+				return (-1);
+			}
+		}
+	}
+
+	/* add extra line */
+	if (fprintf(fp, "\n") == EOF)
+		goto out;
+
+	/* success */
+	rval = 0;
+
+	/* cleanup, return error */
+out:
+	if (rval != 0)
+		(void) mdsyserror(ep, errno, fname);
+	return (rval);
+}
+
+/*
+ * print/report raid
+ */
+int
+meta_raid_print(
+	mdsetname_t	*sp,
+	mdname_t	*raidnp,
+	mdnamelist_t	**nlpp,
+	char		*fname,
+	FILE		*fp,
+	mdprtopts_t	options,
+	md_error_t	*ep
+)
+{
+	md_raid_t	*raidp;
+	int		col;
+
+	/* should have same set */
+	assert(sp != NULL);
+	assert((raidnp == NULL) ||
+	    (sp->setno == MD_MIN2SET(meta_getminor(raidnp->dev))));
+
+	/* print all raids */
+	if (raidnp == NULL) {
+		mdnamelist_t	*nlp = NULL;
+		mdnamelist_t	*p;
+		int		cnt;
+		int		rval = 0;
+
+		/* get list */
+		if ((cnt = meta_get_raid_names(sp, &nlp, options, ep)) < 0)
+			return (-1);
+		else if (cnt == 0)
+			return (0);
+
+		/* recurse */
+		for (p = nlp; (p != NULL); p = p->next) {
+			mdname_t	*np = p->namep;
+
+			if (meta_raid_print(sp, np, nlpp, fname, fp,
+			    options, ep) != 0)
+				rval = -1;
+		}
+
+		/* cleanup, return success */
+		metafreenamelist(nlp);
+		return (rval);
+	}
+
+	/* get unit structure */
+	if ((raidp = meta_get_raid_common(sp, raidnp,
+	    ((options & PRINT_FAST) ? 1 : 0), ep)) == NULL)
+		return (-1);
+
+	/* check for parented */
+	if ((! (options & PRINT_SUBDEVS)) &&
+	    (MD_HAS_PARENT(raidp->common.parent))) {
+		return (0);
+	}
+
+	/* print appropriate detail */
+	if (options & PRINT_SHORT) {
+		if (raid_print(raidp, fname, fp, options, ep) != 0)
+			return (-1);
+	} else {
+		if (raid_report(sp, raidp, fname, fp, options, ep) != 0)
+			return (-1);
+	}
+
+	/* Recurse on components that are metadevices */
+	for (col = 0; col < raidp->cols.cols_len; ++col) {
+		md_raidcol_t	*colp = &raidp->cols.cols_val[col];
+		mdname_t	*namep = colp->colnamep;
+
+		if ((metaismeta(namep)) &&
+		    (meta_print_name(sp, namep, nlpp, fname, fp,
+		    (options | PRINT_HEADER | PRINT_SUBDEVS),
+		    NULL, ep) != 0)) {
+			return (-1);
+		}
+	}
+
+	return (0);
+}
+
+/*
+ * adjust raid geometry
+ */
+static int
+adjust_geom(
+	mdname_t	*raidnp,
+	mdname_t	*colnp,
+	mr_unit_t	*mr,
+	md_error_t	*ep
+)
+{
+	uint_t		round_cyl = 1;
+	mdgeom_t	*geomp;
+
+	/* get reinstructs */
+	if ((geomp = metagetgeom(colnp, ep)) == NULL)
+		return (-1);
+
+	/* adjust geometry */
+	if (meta_adjust_geom((md_unit_t *)mr, raidnp, geomp->write_reinstruct,
+	    geomp->read_reinstruct, round_cyl, ep) != 0)
+		return (-1);
+
+	/* return success */
+	return (0);
+}
+
+/*
+ * add another column to the raid unit structure
+ */
+static int
+attach_raid_col(
+	mdsetname_t	*sp,
+	mdname_t	*raidnp,
+	mr_unit_t	*mr,
+	mr_column_t	*mdc,
+	mdname_t	*colnp,
+	rcs_state_t	state,
+	mdnamelist_t	**keynlpp,
+	mdcmdopts_t	options,
+	md_error_t	*ep
+)
+{
+	diskaddr_t	column_size = mr->un_segsize * mr->un_segsincolumn;
+	diskaddr_t	size;
+	uint_t		 maxio;
+	mdcinfo_t	*cinfop;
+	md_timeval32_t	tmp_time;
+
+	/* setup state and timestamp */
+	mdc->un_devstate = state;
+	if (meta_gettimeofday(&tmp_time) == -1)
+		return (mdsyserror(ep, errno, NULL));
+
+	mdc->un_devtimestamp = tmp_time;
+	/* get start, size, and maxio */
+	if ((mdc->un_orig_devstart = metagetstart(sp, colnp, ep)) ==
+	    MD_DISKADDR_ERROR)
+		return (-1);
+	if ((size = metagetsize(colnp, ep)) == MD_DISKADDR_ERROR)
+		return (-1);
+	if ((cinfop = metagetcinfo(colnp, ep)) == NULL)
+		return (-1);
+	maxio = cinfop->maxtransfer;
+
+	/* adjust start and size by prewrite */
+	mdc->un_orig_pwstart = mdc->un_orig_devstart;
+	mdc->un_orig_devstart += mr->un_pwsize;
+
+	/* make sure we still have something left */
+	if ((mdc->un_orig_devstart >= size) ||
+	    ((size - mdc->un_orig_devstart) < column_size)) {
+		return (mdsyserror(ep, ENOSPC, colnp->cname));
+	}
+	size -= mdc->un_orig_devstart;
+	if (maxio < mr->un_maxio) {
+		return (mdcomperror(ep, MDE_MAXIO,
+		    meta_getminor(raidnp->dev), colnp->dev, colnp->cname));
+	}
+
+	if (options & MDCMD_DOIT) {
+		/* store name in namespace */
+		if (add_key_name(sp, colnp, keynlpp, ep) != 0)
+			return (-1);
+	}
+
+	/* setup column */
+	mdc->un_orig_dev = colnp->dev;
+	mdc->un_orig_key = colnp->key;
+	mdc->un_dev = colnp->dev;
+	mdc->un_pwstart = mdc->un_orig_pwstart;
+	mdc->un_devstart = mdc->un_orig_devstart;
+	mdc->un_alt_dev = NODEV64;
+	mdc->un_alt_pwstart = 0;
+	mdc->un_alt_devstart = 0;
+	mdc->un_hs_id = 0;
+
+	/* add the size (we use) of the device to the total */
+	mr->c.un_actual_tb += column_size;
+
+	/* adjust geometry */
+	if (adjust_geom(raidnp, colnp, mr, ep) != 0)
+		return (-1);
+
+	/* count column */
+	mr->un_totalcolumncnt++;
+
+	/* return success */
+	return (0);
+}
+
+/*
+ * invalidate column names
+ */
+static int
+invalidate_columns(
+	mdsetname_t	*sp,
+	mdname_t	*raidnp,
+	md_error_t	*ep
+)
+{
+	md_raid_t	*raidp;
+	uint_t		col;
+
+	if ((raidp = meta_get_raid(sp, raidnp, ep)) == NULL)
+		return (-1);
+	for (col = 0; (col < raidp->cols.cols_len); ++col) {
+		md_raidcol_t	*cp = &raidp->cols.cols_val[col];
+		mdname_t	*colnp = cp->colnamep;
+
+		meta_invalidate_name(colnp);
+	}
+	return (0);
+}
+
+/*
+ * attach columns to raid
+ */
+int
+meta_raid_attach(
+	mdsetname_t		*sp,
+	mdname_t		*raidnp,
+	mdnamelist_t		*colnlp,
+	mdcmdopts_t		options,
+	md_error_t		*ep
+)
+{
+	uint_t			concat_cnt = 0;
+	mdnamelist_t		*p;
+	mr_unit_t		*old_mr;
+	mr_unit_t		*new_mr;
+	size_t			old_rusize;
+	size_t			new_rusize;
+	mdnamelist_t		*keynlp = NULL;
+	md_grow_params_t	mgp;
+	int			rval = -1;
+	int			create_flag = MD_CRO_32BIT;
+
+	/* should have a set */
+	assert(sp != NULL);
+	assert(sp->setno == MD_MIN2SET(meta_getminor(raidnp->dev)));
+
+	/* check type */
+	if (metachkmeta(raidnp, ep) != 0)
+		return (-1);
+
+	/* check and count new columns */
+	for (p = colnlp; (p != NULL); p = p->next) {
+		mdname_t	*np = p->namep;
+		mdnamelist_t	*p2;
+
+		/* check against existing devices */
+		if (meta_check_column(sp, np, ep) != 0)
+			return (-1);
+
+		/* check against ourselves */
+		for (p2 = p->next; (p2 != NULL); p2 = p2->next) {
+			if (meta_check_overlap(np->cname, np, 0, -1,
+			    p2->namep, 0, -1, ep) != 0) {
+				return (-1);
+			}
+		}
+
+		/* count */
+		++concat_cnt;
+	}
+
+	/* get old unit */
+	if ((old_mr = (mr_unit_t *)meta_get_mdunit(sp, raidnp, ep)) == NULL)
+		return (-1);
+
+	/*
+	 * calculate the size needed for the new raid unit and allocate
+	 * the appropriate structure. allocate new unit.
+	 */
+	old_rusize = sizeof (*old_mr) - sizeof (old_mr->un_column[0]);
+	old_rusize += old_mr->un_totalcolumncnt * sizeof (old_mr->un_column[0]);
+	new_rusize = sizeof (*new_mr) - sizeof (new_mr->un_column[0]);
+	new_rusize += (old_mr->un_totalcolumncnt + concat_cnt)
+	    * sizeof (new_mr->un_column[0]);
+	new_mr = Zalloc(new_rusize);
+	(void) memcpy(new_mr, old_mr, old_rusize);
+
+	/* We always want a do-it, this is for attach_raid_col below */
+	options |= MDCMD_DOIT;
+
+	/* build new unit structure */
+	for (p = colnlp; (p != NULL); p = p->next) {
+		mdname_t	*colnp = p->namep;
+		mr_column_t	*mdc;
+
+		/* attach column */
+		mdc = &new_mr->un_column[new_mr->un_totalcolumncnt];
+		if (attach_raid_col(sp, raidnp, new_mr, mdc, colnp,
+		    RCS_INIT, &keynlp, options, ep) != 0) {
+			goto out;
+		}
+	}
+	assert(new_mr->un_totalcolumncnt
+	    == (old_mr->un_totalcolumncnt + concat_cnt));
+
+
+	create_flag = meta_check_devicesize(new_mr->c.un_total_blocks);
+
+	/* grow raid */
+	(void) memset(&mgp, 0, sizeof (mgp));
+	mgp.mnum = MD_SID(new_mr);
+	MD_SETDRIVERNAME(&mgp, MD_RAID, sp->setno);
+	mgp.size = new_rusize;
+	mgp.mdp = (uintptr_t)new_mr;
+
+	if (create_flag == MD_CRO_32BIT) {
+		mgp.options = MD_CRO_32BIT;
+		new_mr->c.un_revision = MD_32BIT_META_DEV;
+	} else {
+		mgp.options = MD_CRO_64BIT;
+		new_mr->c.un_revision = MD_64BIT_META_DEV;
+	}
+	if (metaioctl(MD_IOCGROW, &mgp, &mgp.mde, NULL) != 0) {
+		(void) mdstealerror(ep, &mgp.mde);
+		goto out;
+	}
+
+	/* clear cache */
+	if (invalidate_columns(sp, raidnp, ep) != 0)
+		goto out;
+	meta_invalidate_name(raidnp);
+
+	/* let em know */
+	if (options & MDCMD_PRINT) {
+		if (concat_cnt == 1) {
+			(void) printf(dgettext(TEXT_DOMAIN,
+			    "%s: component is attached\n"),
+			    raidnp->cname);
+		} else {
+			(void) printf(dgettext(TEXT_DOMAIN,
+			    "%s: components are attached\n"),
+			    raidnp->cname);
+		}
+		(void) fflush(stdout);
+	}
+
+
+	/* grow any parents */
+	if (meta_concat_parent(sp, raidnp, ep) != 0)
+		goto out;
+	rval = 0;	/* success */
+
+	/* cleanup, return error */
+out:
+	Free(old_mr);
+	Free(new_mr);
+	if (rval != 0)
+		(void) del_key_names(sp, keynlp, NULL);
+	metafreenamelist(keynlp);
+	return (rval);
+}
+
+/*
+ * get raid parameters
+ */
+int
+meta_raid_get_params(
+	mdsetname_t	*sp,
+	mdname_t	*raidnp,
+	mr_params_t	*paramsp,
+	md_error_t	*ep
+)
+{
+	md_raid_t	*raidp;
+
+	/* should have a set */
+	assert(sp != NULL);
+	assert(sp->setno == MD_MIN2SET(meta_getminor(raidnp->dev)));
+
+	/* check name */
+	if (metachkmeta(raidnp, ep) != 0)
+		return (-1);
+
+	/* get unit */
+	if ((raidp = meta_get_raid(sp, raidnp, ep)) == NULL)
+		return (-1);
+
+	/* return parameters */
+	(void) memset(paramsp, 0, sizeof (*paramsp));
+	if (raidp->hspnamep == NULL)
+		paramsp->hsp_id = MD_HSP_NONE;
+	else
+		paramsp->hsp_id = raidp->hspnamep->hsp;
+	return (0);
+}
+
+/*
+ * set raid parameters
+ */
+int
+meta_raid_set_params(
+	mdsetname_t		*sp,
+	mdname_t		*raidnp,
+	mr_params_t		*paramsp,
+	md_error_t		*ep
+)
+{
+	md_raid_params_t	msp;
+
+	/* should have a set */
+	assert(sp != NULL);
+	assert(sp->setno == MD_MIN2SET(meta_getminor(raidnp->dev)));
+
+	/* check name */
+	if (metachkmeta(raidnp, ep) != 0)
+		return (-1);
+
+	/* set parameters */
+	(void) memset(&msp, 0, sizeof (msp));
+	MD_SETDRIVERNAME(&msp, MD_RAID, sp->setno);
+	msp.mnum = meta_getminor(raidnp->dev);
+	msp.params = *paramsp;
+	if (metaioctl(MD_IOCCHANGE, &msp, &msp.mde, raidnp->cname) != 0)
+		return (mdstealerror(ep, &msp.mde));
+
+	/* clear cache */
+	meta_invalidate_name(raidnp);
+
+	/* return success */
+	return (0);
+}
+
+/*
+ * validate raid replace column
+ */
+static int
+validate_new_raid(
+	mdsetname_t	*sp,
+	mdname_t	*raidnp,
+	mdname_t	*colnp,
+	replace_params_t *paramsp,
+	int		dup_ok,
+	md_error_t	*ep
+)
+{
+	mr_unit_t	*mr;
+	diskaddr_t	column_size;
+	diskaddr_t	label;
+	mdcinfo_t	*cinfop;
+	int		rval = -1;
+
+	/* get raid unit */
+	if ((mr = (mr_unit_t *)meta_get_mdunit(sp, raidnp, ep)) == NULL)
+		return (-1);
+	column_size = mr->un_segsize * mr->un_segsincolumn;
+
+	/* check it out */
+	if (meta_check_column(sp, colnp, ep) != 0) {
+		if ((! dup_ok) || (! mdisuseerror(ep, MDE_ALREADY)))
+			goto out;
+		mdclrerror(ep);
+	}
+	if ((paramsp->number_blks = metagetsize(colnp, ep)) ==
+	    MD_DISKADDR_ERROR)
+		goto out;
+	if ((label = metagetlabel(colnp, ep)) == MD_DISKADDR_ERROR)
+		goto out;
+	paramsp->has_label = ((label > 0) ? 1 : 0);
+	if ((paramsp->start_blk = metagetstart(sp, colnp, ep)) ==
+	    MD_DISKADDR_ERROR)
+		goto out;
+	if ((paramsp->number_blks - paramsp->start_blk) < column_size) {
+		(void) mdsyserror(ep, ENOSPC, colnp->cname);
+		goto out;
+	}
+	if ((cinfop = metagetcinfo(colnp, ep)) == NULL)
+		goto out;
+	if (cinfop->maxtransfer < mr->un_maxio) {
+		(void) mdcomperror(ep, MDE_MAXIO, meta_getminor(raidnp->dev),
+		    colnp->dev, colnp->cname);
+		goto out;
+	}
+
+	/* success */
+	rval = 0;
+
+	/* cleanup, return error */
+out:
+	Free(mr);
+	return (rval);
+}
+
+/*
+ * replace raid column
+ */
+int
+meta_raid_replace(
+	mdsetname_t		*sp,
+	mdname_t		*raidnp,
+	mdname_t		*oldnp,
+	mdname_t		*newnp,
+	mdcmdopts_t		options,
+	md_error_t		*ep
+)
+{
+	int			force = ((options & MDCMD_FORCE) ? 1 : 0);
+	replace_params_t	params;
+	md_dev64_t		old_dev, new_dev;
+	diskaddr_t		new_start_blk, new_end_blk;
+	int			rebind;
+	mr_unit_t		*mr;
+	char			*new_devidp = NULL;
+	md_error_t		xep = mdnullerror;
+	int			ret;
+	md_set_desc		*sd;
+	uint_t			tstate;
+
+	/* should have same set */
+	assert(sp != NULL);
+	assert(sp->setno == MD_MIN2SET(meta_getminor(raidnp->dev)));
+
+	/* check name */
+	if (metachkmeta(raidnp, ep) != 0)
+		return (-1);
+
+	/* save new binding incase this is a rebind where oldnp==newnp */
+	new_dev = newnp->dev;
+	new_start_blk = newnp->start_blk;
+	new_end_blk = newnp->end_blk;
+
+	/* invalidate, then get the raid (fill in oldnp from metadb) */
+	meta_invalidate_name(raidnp);
+	if (meta_get_raid(sp, raidnp, ep) == NULL)
+		return (-1);
+
+	/* can't replace a component if the raid inaccessible */
+	if (meta_get_tstate(raidnp->dev, &tstate, ep) != 0) {
+		return (-1);
+	}
+	if (tstate & MD_INACCESSIBLE) {
+		return (mdmderror(ep, MDE_IN_UNAVAIL_STATE,
+		    meta_getminor(raidnp->dev), raidnp->cname));
+	}
+
+	/* the old device binding is now established */
+	if ((old_dev = oldnp->dev) == NODEV64)
+		return (mdsyserror(ep, ENODEV, oldnp->cname));
+
+
+	/* setup raid info */
+	(void) memset(&params, 0, sizeof (params));
+	params.mnum = meta_getminor(raidnp->dev);
+	MD_SETDRIVERNAME(&params, MD_RAID, sp->setno);
+	params.old_dev = old_dev;
+	params.cmd = force ? FORCE_REPLACE_COMP : REPLACE_COMP;
+
+	if (options & MDCMD_CLUSTER_REPLACE) {
+		if ((mr = (mr_unit_t *)meta_get_mdunit(sp, raidnp, ep)) == NULL)
+			return (NULL);
+		Free(mr);
+		params.options = MDIOCTL_NO_RESYNC_RAID;
+		params.number_blks = metagetsize(newnp, ep);
+		if ((metagetlabel(newnp, ep) == MD_DISKADDR_ERROR) ||
+		    (metagetlabel(newnp, ep) == 0))
+			params.has_label = 0;
+		else
+			params.has_label = 1;
+		params.start_blk = metagetstart(sp, newnp, ep);
+	} else {
+		if ((strcmp(oldnp->rname, newnp->rname) == 0) &&
+		    (old_dev != new_dev)) {
+			rebind = 1;
+		} else {
+			rebind = 0;
+		}
+		if (rebind) {
+			newnp->dev = new_dev;
+			newnp->start_blk = new_start_blk;
+			newnp->end_blk = new_end_blk;
+		}
+
+		/*
+		 * Save a copy of the devid associated with the new disk, the
+		 * reason is that the checks for the column (meta_check_column)
+		 * via validate_new_raid(), could cause the disk's devid to be
+		 * changed to that of the devid that is currently stored in the
+		 * replica namespace for the disk in question. This devid could
+		 * be stale if we are replacing the disk. The actual function
+		 * that overwrites the devid is dr2drivedesc().
+		 */
+
+		/* don't setup new_devid if no devid's or MN diskset */
+		if (newnp->drivenamep->devid != NULL)
+			new_devidp = Strdup(newnp->drivenamep->devid);
+
+		if (!metaislocalset(sp)) {
+			if ((sd = metaget_setdesc(sp, ep)) == NULL)
+				return (-1);
+			if (MD_MNSET_DESC(sd))
+				new_devidp = NULL;
+		}
+
+		/* check out new (sets up start_blk, has_label, number_blks) */
+		if (validate_new_raid(sp, raidnp, newnp, &params, rebind,
+		    ep) != 0) {
+			Free(new_devidp);
+			return (-1);
+		}
+
+		/*
+		 * Copy back the saved devid.
+		 */
+		Free(newnp->drivenamep->devid);
+		if (new_devidp) {
+			newnp->drivenamep->devid = Strdup(new_devidp);
+			Free(new_devidp);
+		}
+	}
+
+	/* store name in namespace, allocate new key */
+	if (add_key_name(sp, newnp, NULL, ep) != 0)
+		return (-1);
+
+	if (rebind && !metaislocalset(sp)) {
+		/*
+		 * We are 'rebind'ing a disk that is in a diskset so as well
+		 * as updating the diskset's namespace the local set needs
+		 * to be updated because it also contains a reference to the
+		 * disk in question.
+		 */
+		ret = meta_fixdevid(sp, DEV_UPDATE|DEV_LOCAL_SET,
+		    newnp->cname, ep);
+
+		if (ret != METADEVADM_SUCCESS) {
+			(void) del_key_name(sp, newnp, &xep);
+			return (-1);
+		}
+	}
+
+	/* replace column */
+	params.new_dev = new_dev;
+	params.new_key = newnp->key;
+	if (metaioctl(MD_IOCREPLACE, &params, &params.mde, NULL) != 0) {
+		(void) del_key_name(sp, newnp, ep);
+		return (mdstealerror(ep, &params.mde));
+	}
+
+	/* clear cache */
+	meta_invalidate_name(oldnp);
+	meta_invalidate_name(newnp);
+	meta_invalidate_name(raidnp);
+
+	/* let em know */
+	if (options & MDCMD_PRINT) {
+		(void) printf(dgettext(TEXT_DOMAIN,
+		    "%s: device %s is replaced with %s\n"),
+		    raidnp->cname, oldnp->cname, newnp->cname);
+		(void) fflush(stdout);
+	}
+
+	/* return success */
+	return (0);
+}
+
+/*
+ * enable raid column
+ */
+int
+meta_raid_enable(
+	mdsetname_t		*sp,
+	mdname_t		*raidnp,
+	mdname_t		*colnp,
+	mdcmdopts_t		options,
+	md_error_t		*ep
+)
+{
+	int			force = ((options & MDCMD_FORCE) ? 1 : 0);
+	replace_params_t	params;
+	md_dev64_t		fs_dev, del_dev;
+	int			err = 0;
+	char			*devnm;
+	int			ret;
+	uint_t			tstate;
+
+	/* should have same set */
+	assert(sp != NULL);
+	assert(sp->setno == MD_MIN2SET(meta_getminor(raidnp->dev)));
+
+	/* check name */
+	if (metachkmeta(raidnp, ep) != 0)
+		return (-1);
+
+	/* get the file_system dev binding */
+	if (meta_getdev(sp, colnp, ep) != 0)
+		return (-1);
+	fs_dev = colnp->dev;
+
+	/* get the raid unit (fill in colnp->dev with metadb version) */
+	meta_invalidate_name(raidnp);
+	if (meta_get_raid(sp, raidnp, ep) == NULL)
+		return (-1);
+
+	/* enabling a component can't work if the raid inaccessible */
+	if (meta_get_tstate(raidnp->dev, &tstate, ep) != 0) {
+		return (-1);
+	}
+	if (tstate & MD_INACCESSIBLE) {
+		return (mdmderror(ep, MDE_IN_UNAVAIL_STATE,
+		    meta_getminor(raidnp->dev), raidnp->cname));
+	}
+
+	/* the metadb device binding is now established */
+	if (colnp->dev == NODEV64)
+		return (mdsyserror(ep, ENODEV, colnp->cname));
+
+	/*
+	 * check for the case where the dev_t has changed between the
+	 * filesystem and the metadb.  This is called a rebind, and
+	 * is handled by meta_raid_replace.
+	 */
+	if (fs_dev != colnp->dev) {
+		/*
+		 * Save the devt of mddb version
+		 */
+		del_dev = colnp->dev;
+
+		/* establish file system binding with invalid start/end */
+		colnp->dev = fs_dev;
+		colnp->start_blk = -1;
+		colnp->end_blk = -1;
+		err = meta_raid_replace(sp, raidnp, colnp, colnp, options, ep);
+
+		/*
+		 * Don't do it if meta_raid_replace returns an error
+		 */
+		if (!err && (devnm = meta_getnmentbydev(sp->setno, MD_SIDEWILD,
+			del_dev, NULL, NULL, &colnp->key, ep)) != NULL) {
+			(void) del_key_name(sp, colnp, ep);
+			Free(devnm);
+		}
+		return (err);
+	}
+
+	/* setup raid info */
+	(void) memset(&params, 0, sizeof (params));
+	params.mnum = meta_getminor(raidnp->dev);
+	MD_SETDRIVERNAME(&params, MD_RAID, sp->setno);
+	params.old_dev = params.new_dev = colnp->dev;
+	if (force)
+		params.cmd = FORCE_ENABLE_COMP;
+	else
+		params.cmd = ENABLE_COMP;
+
+	/* check it out */
+	if (validate_new_raid(sp, raidnp, colnp, &params, 1, ep) != 0)
+		return (-1);
+
+	/* enable column */
+	if (metaioctl(MD_IOCREPLACE, &params, &params.mde, NULL) != 0)
+		return (mdstealerror(ep, &params.mde));
+
+	/*
+	 * are we dealing with a non-local set? If so need to update the
+	 * local namespace so that the disk record has the correct devid.
+	 */
+	if (!metaislocalset(sp)) {
+		ret = meta_fixdevid(sp, DEV_UPDATE|DEV_LOCAL_SET, colnp->cname,
+		    ep);
+
+		if (ret != METADEVADM_SUCCESS) {
+			/*
+			 * Failed to update the local set. Nothing to do here
+			 * apart from report the error. The namespace is
+			 * most likely broken and some form of remedial
+			 * recovery is going to be required.
+			 */
+			mde_perror(ep, "");
+			mdclrerror(ep);
+		}
+	}
+
+	/* clear cache */
+	meta_invalidate_name(colnp);
+	meta_invalidate_name(raidnp);
+
+	/* let em know */
+	if (options & MDCMD_PRINT) {
+		(void) printf(dgettext(TEXT_DOMAIN,
+		    "%s: device %s is enabled\n"),
+		    raidnp->cname, colnp->cname);
+		(void) fflush(stdout);
+	}
+
+	/* return success */
+	return (0);
+}
+
+/*
+ * check for dups in the raid itself
+ */
+static int
+check_twice(
+	md_raid_t	*raidp,
+	uint_t		col,
+	md_error_t	*ep
+)
+{
+	mdname_t	*raidnp = raidp->common.namep;
+	mdname_t	*thisnp;
+	uint_t		c;
+
+	thisnp = raidp->cols.cols_val[col].colnamep;
+	for (c = 0; (c < col); ++c) {
+		md_raidcol_t	*mdcp = &raidp->cols.cols_val[c];
+		mdname_t	*colnp = mdcp->colnamep;
+
+		if (meta_check_overlap(raidnp->cname, thisnp, 0, -1,
+		    colnp, 0, -1, ep) != 0) {
+			return (-1);
+		}
+	}
+	return (0);
+}
+
+/*
+ * default raid interlace
+ */
+diskaddr_t
+meta_default_raid_interlace(void)
+{
+	diskaddr_t	interlace;
+
+	/* default to 16k, round up if necessary */
+	interlace = btodb(16 * 1024);
+	if (interlace < lbtodb(MININTERLACE))
+		interlace = roundup(MININTERLACE, interlace);
+	return (interlace);
+}
+
+/*
+ * convert interlaces
+ */
+int
+meta_raid_check_interlace(
+	diskaddr_t	interlace,
+	char		*uname,
+	md_error_t	*ep
+)
+{
+	if ((interlace < btodb(RAID_MIN_INTERLACE)) ||
+	    (interlace > btodb(MAXINTERLACE))) {
+		return (mderror(ep, MDE_BAD_INTERLACE, uname));
+	}
+	return (0);
+}
+
+/*
+ * check raid
+ */
+int
+meta_check_raid(
+	mdsetname_t	*sp,
+	md_raid_t	*raidp,
+	mdcmdopts_t	options,
+	md_error_t	*ep
+)
+{
+	mdname_t	*raidnp = raidp->common.namep;
+	int		doit = ((options & MDCMD_DOIT) ? 1 : 0);
+	int		updateit = ((options & MDCMD_UPDATE) ? 1 : 0);
+	uint_t		ncol;
+	uint_t		col;
+	minor_t		mnum = meta_getminor(raidnp->dev);
+
+	/* check number */
+	if (((ncol = raidp->cols.cols_len) < MD_RAID_MIN) ||
+	    (raidp->orig_ncol > ncol)) {
+		return (mdmderror(ep, MDE_BAD_RAID, mnum, raidnp->cname));
+	}
+
+	/* compute default interlace */
+	if (raidp->interlace == 0) {
+		raidp->interlace = meta_default_raid_interlace();
+	}
+
+	/* check state */
+	switch (raidp->state) {
+	case RUS_INIT:
+	case RUS_OKAY:
+		break;
+
+	default:
+		return (mdmderror(ep, MDE_BAD_RAID, mnum, raidnp->cname));
+	}
+
+	/* check interlace */
+	if (meta_raid_check_interlace(raidp->interlace, raidnp->cname, ep) != 0)
+		return (-1);
+
+	/* check hotspare pool name */
+	if (doit) {
+		if ((raidp->hspnamep != NULL) &&
+		    (metachkhsp(sp, raidp->hspnamep, ep) != 0)) {
+			return (-1);
+		}
+	}
+
+	/* check columns */
+	for (col = 0; (col < ncol); ++col) {
+		md_raidcol_t	*mdcp = &raidp->cols.cols_val[col];
+		mdname_t	*colnp = mdcp->colnamep;
+		diskaddr_t	start_blk, size;
+
+		/* setup column */
+		if (raidp->state == RUS_INIT)
+			mdcp->state = RCS_INIT;
+		else
+			mdcp->state = RCS_OKAY;
+
+		/* check column */
+		if (!updateit) {
+			if (meta_check_column(sp, colnp, ep) != 0)
+				return (-1);
+			if (((start_blk = metagetstart(sp, colnp, ep)) ==
+			    MD_DISKADDR_ERROR) || ((size = metagetsize(colnp,
+			    ep)) == MD_DISKADDR_ERROR)) {
+				return (-1);
+			}
+			if (start_blk >= size)
+				return (mdsyserror(ep, ENOSPC, colnp->cname));
+			size -= start_blk;
+			size = rounddown(size, raidp->interlace);
+			if (size == 0)
+				return (mdsyserror(ep, ENOSPC, colnp->cname));
+		}
+
+		/* check this raid too */
+		if (check_twice(raidp, col, ep) != 0)
+			return (-1);
+	}
+
+	/* return success */
+	return (0);
+}
+
+/*
+ * setup raid geometry
+ */
+static int
+raid_geom(
+	md_raid_t	*raidp,
+	mr_unit_t	*mr,
+	md_error_t	*ep
+)
+{
+	uint_t		write_reinstruct = 0;
+	uint_t		read_reinstruct = 0;
+	uint_t		round_cyl = 1;
+	uint_t		col;
+	mdgeom_t	*geomp;
+
+	/* get worst reinstructs */
+	for (col = 0; (col < raidp->cols.cols_len); ++col) {
+		md_raidcol_t	*mdcp = &raidp->cols.cols_val[col];
+		mdname_t	*colnp = mdcp->colnamep;
+
+		if ((geomp = metagetgeom(colnp, ep)) == NULL)
+			return (-1);
+		if (geomp->write_reinstruct > write_reinstruct)
+			write_reinstruct = geomp->write_reinstruct;
+		if (geomp->read_reinstruct > read_reinstruct)
+			read_reinstruct = geomp->read_reinstruct;
+	}
+
+	/* setup geometry from first column */
+	assert(raidp->cols.cols_len > 0);
+	if ((geomp = metagetgeom(raidp->cols.cols_val[0].colnamep,
+	    ep)) == NULL) {
+		return (-1);
+	}
+	if (meta_setup_geom((md_unit_t *)mr, raidp->common.namep, geomp,
+	    write_reinstruct, read_reinstruct, round_cyl, ep) != 0)
+		return (-1);
+
+	/* return success */
+	return (0);
+}
+
+int
+meta_raid_state_cnt(mr_unit_t *mr, rcs_state_t state)
+{
+	int 	statecnt = 0;
+	int	col;
+
+	for (col = 0; col < mr->un_totalcolumncnt; col++)
+		if (mr->un_column[col].un_devstate & state)
+			statecnt++;
+	return (statecnt);
+}
+/*
+ * validate that a raid device being created with the -k flag is a real
+ * raid device
+ */
+int
+meta_raid_valid(md_raid_t *raidp, mr_unit_t *mr)
+{
+	long long	buf[DEV_BSIZE / sizeof (long long)];
+	raid_pwhdr_t	pwhdr;
+	raid_pwhdr_t	*rpw = &pwhdr;
+	minor_t		mnum;
+	int		col;
+	int		fd;
+
+	for (col = 0; col < mr->un_totalcolumncnt; col++) {
+		md_raidcol_t	*cp = &raidp->cols.cols_val[col];
+		mdname_t	*colnp = cp->colnamep;
+
+		if ((fd = open(colnp->rname, O_RDONLY)) < 0)
+			goto error_exit;
+
+		if (lseek64(fd,
+		    (mr->un_column[col].un_pwstart * DEV_BSIZE), SEEK_SET) < 0)
+			goto error_exit;
+
+		if (read(fd, buf, DEV_BSIZE) < 0)
+			goto error_exit;
+
+		/*
+		 * If our raid device is a 64 bit device, we can accept the
+		 * pw header we just read in.
+		 * Otherwise it's of type raid_pwhdr32_od_t and has to
+		 * be converted.
+		 */
+		if (mr->c.un_revision == MD_64BIT_META_DEV) {
+			rpw = (raid_pwhdr_t *)buf;
+		} else {
+			RAID_CONVERT_RPW((raid_pwhdr32_od_t *)buf, rpw);
+		}
+
+		if (rpw->rpw_column != col)
+			goto error_exit;
+
+		if (col == 0)
+			mnum = rpw->rpw_unit;
+
+		if (rpw->rpw_unit != mnum)
+			goto error_exit;
+
+		if (rpw->rpw_magic_ext == RAID_PWMAGIC) {
+			/* 4.1 prewrite header */
+			if ((rpw->rpw_origcolumncnt != mr->un_origcolumncnt) ||
+			    (rpw->rpw_totalcolumncnt
+				!= mr->un_totalcolumncnt) ||
+			    (rpw->rpw_segsize != mr->un_segsize) ||
+			    (rpw->rpw_segsincolumn != mr->un_segsincolumn) ||
+			    (rpw->rpw_pwcnt != mr->un_pwcnt) ||
+			    (rpw->rpw_pwstart !=
+				mr->un_column[col].un_pwstart) ||
+			    (rpw->rpw_devstart !=
+				mr->un_column[col].un_devstart) ||
+			    (rpw->rpw_pwsize != mr->un_pwsize))
+				goto error_exit;
+		}
+		/*
+		 * this is an old prewrite header (4.0) the unit structure
+		 * will have to be trusted.
+		 */
+		(void) close(fd);
+	}
+
+	return (0);
+
+error_exit:
+	(void) close(fd);
+	return (-1);
+}
+
+/*
+ * create raid
+ */
+int
+meta_create_raid(
+	mdsetname_t	*sp,
+	md_raid_t	*raidp,
+	mdcmdopts_t	options,
+	md_error_t	*ep
+)
+{
+	mdname_t	*raidnp = raidp->common.namep;
+	uint_t		ncol = raidp->cols.cols_len;
+	uint_t		orig_ncol = raidp->orig_ncol;
+	size_t		rdsize;
+	mr_unit_t	*mr;
+	uint_t		col;
+	diskaddr_t	disk_size = 0;
+	uint_t		disk_maxio = 0;
+	uint_t		pwes;
+	diskaddr_t	non_pw_blks, column_size;
+	mdnamelist_t	*keynlp = NULL;
+	md_set_params_t	set_params;
+	int		rval = -1;
+	md_timeval32_t	creation_time;
+	int		create_flag = MD_CRO_32BIT;
+
+	/* validate raid */
+	if (meta_check_raid(sp, raidp, options, ep) != 0)
+		return (-1);
+
+	/* allocate raid unit */
+	rdsize = sizeof (*mr) - sizeof (mr->un_column[0]);
+	rdsize += ncol * sizeof (mr->un_column[0]);
+	mr = Zalloc(rdsize);
+
+	if (meta_gettimeofday(&creation_time) == -1)
+		return (mdsyserror(ep, errno, NULL));
+	/*
+	 * initialize the top level mr_unit_t structure
+	 * setup the unit state to indicate whether to retain
+	 * any data currently on the metadevice or to clear it
+	 */
+	mr->c.un_type = MD_METARAID;
+	MD_SID(mr) = meta_getminor(raidnp->dev);
+	mr->c.un_size = rdsize;
+	mr->un_magic = RAID_UNMAGIC;
+	mr->un_state = raidp->state;
+	mr->un_timestamp = creation_time;
+	mr->un_origcolumncnt = orig_ncol;
+	mr->un_segsize = (uint_t)raidp->interlace;
+	if (raidp->hspnamep != NULL) {
+		mr->un_hsp_id = raidp->hspnamep->hsp;
+	} else {
+		mr->un_hsp_id = MD_HSP_NONE;
+	}
+	/*
+	 * setup original columns, saving start_block and
+	 * finding smallest size and maxio
+	 */
+	for (col = 0; (col < orig_ncol); ++col) {
+		md_raidcol_t	*cp = &raidp->cols.cols_val[col];
+		mdname_t	*colnp = cp->colnamep;
+		mr_column_t	*mdc = &mr->un_column[col];
+		diskaddr_t	size;
+		uint_t		maxio;
+		mdcinfo_t	*cinfop;
+
+		/* setup state */
+		mdc->un_devstate = cp->state;
+
+		/* setup creation time */
+		mdc->un_devtimestamp = creation_time;
+
+		/* get start, size, and maxio */
+		if ((mdc->un_orig_devstart = metagetstart(sp, colnp, ep)) ==
+		    MD_DISKADDR_ERROR)
+			goto out;
+		if ((size = metagetsize(colnp, ep)) == MD_DISKADDR_ERROR)
+			goto out;
+		size -= mdc->un_orig_devstart;
+		if ((cinfop = metagetcinfo(colnp, ep)) == NULL)
+			goto out;
+		maxio = cinfop->maxtransfer;
+
+		if (options & MDCMD_DOIT) {
+			/* store name in namespace */
+			if (add_key_name(sp, colnp, &keynlp, ep) != 0)
+				goto out;
+		}
+
+		/* setup column */
+		mdc->un_orig_key = colnp->key;
+		mdc->un_orig_dev = colnp->dev;
+		mdc->un_dev = mdc->un_orig_dev;
+		mdc->un_pwstart = mdc->un_orig_pwstart;
+		mdc->un_devstart = mdc->un_orig_devstart;
+		mdc->un_alt_dev = NODEV64;
+		mdc->un_alt_pwstart = 0;
+		mdc->un_alt_devstart = 0;
+		mdc->un_hs_id = 0;
+		if (mr->un_state == RUS_INIT)
+			mdc->un_devstate = RCS_INIT;
+		else
+			mdc->un_devstate = RCS_OKAY;
+
+		/* adjust for smallest disk */
+		if (disk_size == 0) {
+			disk_size = size;
+		} else if (size < disk_size) {
+			disk_size = size;
+		}
+		if (disk_maxio == 0) {
+			disk_maxio = maxio;
+		} else if (maxio < disk_maxio) {
+			disk_maxio = maxio;
+		}
+	}
+	assert(col == mr->un_origcolumncnt);
+
+	/*
+	 * before processing any of the attached column(s)
+	 * set up the composition of the metadevice for column
+	 * sizes and pre-write information
+	 */
+	mr->un_maxio = disk_maxio;	/* smallest maxio */
+	mr->un_iosize = min(mr->un_maxio, (mr->un_segsize + 1));
+	pwes = mr->un_iosize;
+	if (raidp->pw_count)
+		mr->un_pwcnt = raidp->pw_count;
+	else
+		mr->un_pwcnt = PWCNT_MIN;
+	if ((mr->un_pwcnt < PWCNT_MIN) || (mr->un_pwcnt > PWCNT_MAX)) {
+		(void) mderror(ep, MDE_RAID_BAD_PW_CNT, raidnp->cname);
+		goto out;
+	}
+	mr->un_pwsize = roundup((mr->un_pwcnt * pwes), 2);
+
+	/* now calculate the number of segments per column */
+	non_pw_blks = disk_size - mr->un_pwsize;	/* smallest disk */
+	if ((mr->un_pwsize > disk_size) ||
+	    (non_pw_blks < (diskaddr_t)mr->un_segsize)) {
+		(void) mdsyserror(ep, ENOSPC, raidnp->cname);
+		goto out;
+	}
+	mr->un_segsincolumn = non_pw_blks / mr->un_segsize;
+	column_size = mr->un_segsize * mr->un_segsincolumn;
+
+	/*
+	 * adjust the pw_cnt, pw_size, to fit into any fragmentation
+	 * left over after column_size has been computed
+	 */
+	mr->un_pwsize = rounddown(((uint_t)(disk_size - column_size)), 2);
+	mr->un_pwcnt = mr->un_pwsize / pwes;
+	assert(mr->un_pwcnt >= PWCNT_MIN);
+	mr->un_pwsize = roundup((mr->un_pwcnt * pwes), 2);
+	assert((mr->un_pwsize + column_size) <= disk_size);
+
+	/*
+	 * calculate the actual block count available based on the
+	 * segment size and the number of segments per column ...
+	 * ... and adjust for the number of parity segments
+	 */
+	mr->c.un_actual_tb = column_size * (mr->un_origcolumncnt - 1);
+
+	if (raid_geom(raidp, mr, ep) != 0)
+		goto out;
+
+	create_flag = meta_check_devicesize(mr->c.un_total_blocks);
+
+	/*
+	 * now calculate the pre-write offset and update the column
+	 * structures to include the address of the individual pre-write
+	 * areas
+	 */
+	for (col = 0; (col < orig_ncol); ++col) {
+		md_raidcol_t	*cp = &raidp->cols.cols_val[col];
+		mdname_t	*colnp = cp->colnamep;
+		mr_column_t	*mdc = &mr->un_column[col];
+		diskaddr_t	size;
+
+		/* get size */
+		if ((size = metagetsize(colnp, ep)) == MD_DISKADDR_ERROR)
+			goto out;
+
+		/* adjust start and size by prewrite */
+		mdc->un_orig_pwstart = mdc->un_orig_devstart;
+		mdc->un_orig_devstart += mr->un_pwsize;
+		mdc->un_pwstart = mdc->un_orig_pwstart;
+		mdc->un_devstart = mdc->un_orig_devstart;
+
+		assert(size >= mdc->un_orig_devstart);
+		size -= mdc->un_orig_devstart;
+
+		/* make sure we still have something left */
+		assert(size >= column_size);
+	}
+
+	/* do concat cols */
+	mr->un_totalcolumncnt = mr->un_origcolumncnt;
+	assert(col == mr->un_origcolumncnt);
+	for (col = orig_ncol; (col < ncol); ++col) {
+		md_raidcol_t	*cp = &raidp->cols.cols_val[col];
+		mdname_t	*colnp = cp->colnamep;
+		mr_column_t	*mdc = &mr->un_column[col];
+
+		/* attach column */
+		if (attach_raid_col(sp, raidnp, mr, mdc, colnp,
+		    cp->state, &keynlp, options, ep) != 0) {
+			goto out;
+		}
+	}
+	assert(mr->un_totalcolumncnt == ncol);
+
+	/* fill in the size of the raid */
+	if (options & MDCMD_UPDATE) {
+		raidp->common.size = mr->c.un_total_blocks;
+		raidp->column_size = mr->un_segsize * mr->un_segsincolumn;
+	}
+
+	/* if we're not doing anything, return success */
+	if (! (options & MDCMD_DOIT)) {
+		rval = 0;	/* success */
+		goto out;
+	}
+
+	if ((mr->un_state & RUS_OKAY) &&
+	    (meta_raid_valid(raidp, mr) != 0)) {
+		(void) mderror(ep, MDE_RAID_INVALID, raidnp->cname);
+		goto out;
+	}
+
+	/* create raid */
+	(void) memset(&set_params, 0, sizeof (set_params));
+	/* did the user tell us to generate a large device? */
+	if (create_flag == MD_CRO_64BIT) {
+		mr->c.un_revision = MD_64BIT_META_DEV;
+		set_params.options = MD_CRO_64BIT;
+	} else {
+		mr->c.un_revision = MD_32BIT_META_DEV;
+		set_params.options = MD_CRO_32BIT;
+	}
+	set_params.mnum = MD_SID(mr);
+	set_params.size = mr->c.un_size;
+	set_params.mdp = (uintptr_t)mr;
+	MD_SETDRIVERNAME(&set_params, MD_RAID, MD_MIN2SET(set_params.mnum));
+	if (metaioctl(MD_IOCSET, &set_params, &set_params.mde,
+	    raidnp->cname) != 0) {
+		(void) mdstealerror(ep, &set_params.mde);
+		goto out;
+	}
+	rval = 0;	/* success */
+
+	/* cleanup, return success */
+out:
+	Free(mr);
+	if (rval != 0) {
+		(void) del_key_names(sp, keynlp, NULL);
+	}
+	metafreenamelist(keynlp);
+	if ((rval == 0) && (options & MDCMD_DOIT)) {
+		if (invalidate_columns(sp, raidnp, ep) != 0)
+			rval = -1;
+		meta_invalidate_name(raidnp);
+	}
+	return (rval);
+}
+
+/*
+ * initialize raid
+ * NOTE: this functions is metainit(1m)'s command line parser!
+ */
+int
+meta_init_raid(
+	mdsetname_t	**spp,
+	int		argc,
+	char		*argv[],
+	mdcmdopts_t	options,
+	md_error_t	*ep
+)
+{
+	char		*uname = argv[0];
+	mdname_t	*raidnp = NULL;
+	int		old_optind;
+	int		c;
+	md_raid_t	*raidp = NULL;
+	uint_t		ncol, col;
+	int		rval = -1;
+	md_set_desc	*sd;
+
+	/* get raid name */
+	assert(argc > 0);
+	if (argc < 1)
+		goto syntax;
+	if ((raidnp = metaname(spp, uname, ep)) == NULL)
+		goto out;
+	assert(*spp != NULL);
+
+	/*
+	 * Raid metadevice not allowed on multi-node diskset.
+	 */
+	if (! metaislocalset(*spp)) {
+		if ((sd = metaget_setdesc(*spp, ep)) == NULL)
+			goto out;
+		if (MD_MNSET_DESC(sd)) {
+			rval = meta_cook_syntax(ep, MDE_MNSET_NORAID, uname,
+						argc, argv);
+			goto out;
+		}
+	}
+
+	uname = raidnp->cname;
+	if (metachkmeta(raidnp, ep) != 0)
+		goto out;
+
+	if (!(options & MDCMD_NOLOCK)) {
+		/* grab set lock */
+		if (meta_lock(*spp, TRUE, ep) != 0)
+			goto out;
+
+		if (meta_check_ownership(*spp, ep) != 0)
+			goto out;
+	}
+
+	/* see if it exists already */
+	if (metagetmiscname(raidnp, ep) != NULL) {
+		(void) mdmderror(ep, MDE_UNIT_ALREADY_SETUP,
+		    meta_getminor(raidnp->dev), uname);
+		goto out;
+	} else if (! mdismderror(ep, MDE_UNIT_NOT_SETUP)) {
+		goto out;
+	} else {
+		mdclrerror(ep);
+	}
+	--argc, ++argv;
+
+	/* grab -r */
+	if ((argc < 1) || (strcmp(argv[0], "-r") != 0))
+		goto syntax;
+	--argc, ++argv;
+
+	/* parse general options */
+	optind = 0;
+	opterr = 0;
+	if (getopt(argc, argv, "") != -1)
+		goto options;
+
+	/* allocate raid */
+	raidp = Zalloc(sizeof (*raidp));
+
+	/* setup common */
+	raidp->common.namep = raidnp;
+	raidp->common.type = MD_METARAID;
+	raidp->state = RUS_INIT;
+
+	/* allocate and parse cols */
+	for (ncol = 0; ((ncol < argc) && (argv[ncol][0] != '-')); ++ncol)
+		;
+	raidp->cols.cols_len = ncol;
+	if (ncol != 0) {
+		raidp->cols.cols_val =
+		    Zalloc(ncol * sizeof (*raidp->cols.cols_val));
+	}
+	for (col = 0; ((argc > 0) && (col < ncol)); ++col) {
+		md_raidcol_t	*mdc = &raidp->cols.cols_val[col];
+		mdname_t	*colnp;
+
+		/* parse column name */
+		if ((colnp = metaname(spp, argv[0], ep)) == NULL)
+			goto out;
+		/* check for soft partitions */
+		if (meta_sp_issp(*spp, colnp, ep) != 0) {
+			/* check disks */
+			if (metachkcomp(colnp, ep) != 0)
+				goto out;
+		}
+		mdc->colnamep = colnp;
+		--argc, ++argv;
+	}
+
+	/* parse raid options */
+	old_optind = optind = 0;
+	opterr = 0;
+	while ((c = getopt(argc, argv, "h:i:ko:w:")) != -1) {
+		switch (c) {
+		case 'h':
+			if ((raidp->hspnamep = metahspname(spp, optarg,
+			    ep)) == NULL) {
+				goto out;
+			}
+			break;
+
+		case 'i':
+			if (parse_interlace(uname, optarg, &raidp->interlace,
+			    ep) != 0) {
+				goto out;
+			}
+			if (meta_raid_check_interlace(raidp->interlace,
+			    uname, ep))
+				goto out;
+			break;
+
+		case 'k':
+			raidp->state = RUS_OKAY;
+			break;
+
+		case 'o':
+			if ((sscanf(optarg, "%u", &raidp->orig_ncol) != 1) ||
+			    ((int)raidp->orig_ncol < 0)) {
+				goto syntax;
+			}
+			if ((raidp->orig_ncol < MD_RAID_MIN) ||
+			    (raidp->orig_ncol > ncol)) {
+				rval = mderror(ep, MDE_BAD_ORIG_NCOL, uname);
+				goto out;
+			}
+			break;
+		case 'w':
+			if ((sscanf(optarg, "%d", &raidp->pw_count) != 1) ||
+			    ((int)raidp->pw_count < 0))
+				goto syntax;
+			if (((int)raidp->pw_count < PWCNT_MIN) ||
+			    ((int)raidp->pw_count > PWCNT_MAX)) {
+				rval = mderror(ep, MDE_RAID_BAD_PW_CNT, uname);
+				goto out;
+			}
+			break;
+		default:
+			argc += old_optind;
+			argv -= old_optind;
+			goto options;
+		}
+		old_optind = optind;
+	}
+	argc -= optind;
+	argv += optind;
+
+	/* we should be at the end */
+	if (argc != 0)
+		goto syntax;
+
+	/* default to all original columns */
+	if (raidp->orig_ncol == 0)
+		raidp->orig_ncol = ncol;
+
+	/* create raid */
+	if (meta_create_raid(*spp, raidp, options, ep) != 0)
+		goto out;
+	rval = 0;	/* success */
+
+	/* let em know */
+	if (options & MDCMD_PRINT) {
+		(void) printf(dgettext(TEXT_DOMAIN, "%s: RAID is setup\n"),
+		    uname);
+		(void) fflush(stdout);
+	}
+	goto out;
+
+	/* syntax error */
+syntax:
+	rval = meta_cook_syntax(ep, MDE_SYNTAX, uname, argc, argv);
+	goto out;
+
+	/* options error */
+options:
+	rval = meta_cook_syntax(ep, MDE_OPTION, uname, argc, argv);
+	goto out;
+
+	/* cleanup, return error */
+out:
+	if (raidp != NULL)
+		meta_free_raid(raidp);
+	return (rval);
+}
+
+/*
+ * reset RAIDs
+ */
+int
+meta_raid_reset(
+	mdsetname_t	*sp,
+	mdname_t	*raidnp,
+	mdcmdopts_t	options,
+	md_error_t	*ep
+)
+{
+	md_raid_t	*raidp;
+	int		rval = -1;
+	int		col;
+
+	/* should have same set */
+	assert(sp != NULL);
+	assert((raidnp == NULL) ||
+	    (sp->setno == MD_MIN2SET(meta_getminor(raidnp->dev))));
+
+	/* reset all raids */
+	if (raidnp == NULL) {
+		mdnamelist_t	*raidnlp = NULL;
+		mdnamelist_t	*p;
+
+		/* for each raid */
+		rval = 0;
+		if (meta_get_raid_names(sp, &raidnlp, 0, ep) < 0)
+			return (-1);
+		for (p = raidnlp; (p != NULL); p = p->next) {
+			/* reset RAID */
+			raidnp = p->namep;
+			if (meta_raid_reset(sp, raidnp, options, ep) != 0) {
+				rval = -1;
+				break;
+			}
+		}
+
+		/* cleanup, return success */
+		metafreenamelist(raidnlp);
+		return (rval);
+	}
+
+	/* check name */
+	if (metachkmeta(raidnp, ep) != 0)
+		return (-1);
+
+	/* get unit structure */
+	if ((raidp = meta_get_raid(sp, raidnp, ep)) == NULL)
+		return (-1);
+
+	/* make sure nobody owns us */
+	if (MD_HAS_PARENT(raidp->common.parent)) {
+		return (mdmderror(ep, MDE_IN_USE, meta_getminor(raidnp->dev),
+		    raidnp->cname));
+	}
+
+	/* clear subdevices cache */
+	if (invalidate_columns(sp, raidnp, ep) != 0)
+		return (-1);
+
+	/* clear metadevice */
+	if (meta_reset(sp, raidnp, options, ep) != 0)
+		goto out;
+	rval = 0;	/* success */
+
+	/* let em know */
+	if (options & MDCMD_PRINT) {
+		(void) printf(dgettext(TEXT_DOMAIN, "%s: RAID is cleared\n"),
+		    raidnp->cname);
+		(void) fflush(stdout);
+	}
+
+	/* clear subdevices */
+	if (! (options & MDCMD_RECURSE))
+		goto out;
+
+	for (col = 0; (col < raidp->cols.cols_len); ++col) {
+		md_raidcol_t	*cp = &raidp->cols.cols_val[col];
+		mdname_t	*colnp = cp->colnamep;
+
+		/* only recurse on metadevices */
+		if (! metaismeta(colnp))
+			continue;
+
+		if (meta_reset_by_name(sp, colnp, options, ep) != 0)
+			rval = -1;
+	}
+
+	/* cleanup, return success */
+out:
+	meta_invalidate_name(raidnp);
+	return (rval);
+}
+
+/*
+ * reports TRUE if any RAID component is in error
+ */
+int
+meta_raid_anycomp_is_err(mdsetname_t *sp, mdnamelist_t *raid_names)
+{
+	mdnamelist_t	*nlp;
+	md_error_t	  status	= mdnullerror;
+	md_error_t	 *ep		= &status;
+	int		  any_errs	= FALSE;
+
+	for (nlp = raid_names; nlp; nlp = nlp->next) {
+		md_raid_t	*raidp;
+
+		if ((raidp = meta_get_raid(sp, nlp->namep, ep)) == NULL) {
+			any_errs |= TRUE;
+			goto out;
+		}
+		if (raidp->state != RUS_OKAY && raidp->state != RUS_INIT) {
+			any_errs |= TRUE;
+			goto out;
+		}
+	}
+out:
+	if (!mdisok(ep))
+		mdclrerror(ep);
+
+	return (any_errs);
+}
+/*
+ * regen parity on a raid
+ */
+int
+meta_raid_regen_byname(mdsetname_t *sp, mdname_t *raidnp, diskaddr_t size,
+	md_error_t *ep)
+{
+	char			*miscname;
+	md_resync_ioctl_t	ri;
+
+	/* should have a set */
+	assert(sp != NULL);
+	assert(sp->setno == MD_MIN2SET(meta_getminor(raidnp->dev)));
+
+	/* make sure we have a raid */
+	if ((miscname = metagetmiscname(raidnp, ep)) == NULL)
+		return (-1);
+	if (strcmp(miscname, MD_RAID) != 0) {
+		return (mdmderror(ep, MDE_NOT_RAID, meta_getminor(raidnp->dev),
+		    raidnp->cname));
+	}
+
+	/* start resync */
+	(void) memset(&ri, 0, sizeof (ri));
+	MD_SETDRIVERNAME(&ri, MD_RAID, sp->setno);
+	ri.ri_mnum = meta_getminor(raidnp->dev);
+	ri.ri_copysize = size;
+	if (metaioctl(MD_IOCSETREGEN, &ri, &ri.mde, raidnp->cname) != 0)
+		return (mdstealerror(ep, &ri.mde));
+
+	/* return success */
+	return (0);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_raid_resync.c b/usr/src/lib/lvm/libmeta/common/meta_raid_resync.c
new file mode 100644
index 0000000000..061299022f
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_raid_resync.c
@@ -0,0 +1,130 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 1994-2002 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+/*
+ * raid operations
+ */
+
+#include <meta.h>
+#include <sys/lvm/md_mirror.h>
+
+/*
+ * resync raid
+ */
+int
+meta_raid_resync(
+	mdsetname_t		*sp,
+	mdname_t		*raidnp,
+	daddr_t			size,
+	md_error_t		*ep
+)
+{
+	char			*miscname;
+	md_resync_ioctl_t	ri;
+
+	/* should have a set */
+	assert(sp != NULL);
+	assert(sp->setno == MD_MIN2SET(meta_getminor(raidnp->dev)));
+
+	/* make sure we have a raid */
+	if ((miscname = metagetmiscname(raidnp, ep)) == NULL)
+		return (-1);
+	if (strcmp(miscname, MD_RAID) != 0) {
+		return (mdmderror(ep, MDE_NOT_RAID, meta_getminor(raidnp->dev),
+		    raidnp->cname));
+	}
+
+	/* start resync */
+	(void) memset(&ri, 0, sizeof (ri));
+	MD_SETDRIVERNAME(&ri, MD_RAID, sp->setno);
+	ri.ri_mnum = meta_getminor(raidnp->dev);
+	ri.ri_copysize = size;
+	if (metaioctl(MD_IOCSETSYNC, &ri, &ri.mde, raidnp->cname) != 0)
+		return (mdstealerror(ep, &ri.mde));
+
+	/* return success */
+	return (0);
+}
+
+/*
+ * NAME:	meta_raid_resync_all
+ * DESCRIPTION: loop through the RAID devices synch'ing all
+ * PARAMETERS:	char		*sp	- the set to synch
+ *		daddr_t		size	- resync size
+ *		md_error_t	*ep	- return error info
+ *
+ */
+int
+meta_raid_resync_all(
+	mdsetname_t	*sp,
+	daddr_t		size,
+	md_error_t	*ep
+)
+{
+	mdnamelist_t	*nlp = NULL;
+	mdnamelist_t	*p;
+	int		rval = 0, fval;
+
+	/* should have a set */
+	assert(sp != NULL);
+
+	/* get raids */
+	if (meta_get_raid_names(sp, &nlp, 0, ep) < 0)
+		return (-1);
+
+	/* fork a process */
+	if ((fval = md_daemonize(sp, ep)) != 0) {
+		/*
+		 * md_daemonize forks off a process to do the work.  This
+		 * is the parent or errror.
+		 */
+		if (fval > 0) {
+			if (nlp != NULL)
+				metafreenamelist(nlp);
+			return (0);
+		}
+		mdclrerror(ep);
+	}
+
+	assert((fval == 0) || (fval == -1));
+
+	/* resync each raid */
+	for (p = nlp; (p != NULL); p = p->next) {
+		mdname_t	*raidnp = p->namep;
+
+		if (meta_raid_resync(sp, raidnp, size, ep) != 0)
+			rval = -1;
+	}
+
+	/* cleanup, return success */
+	if (nlp != NULL)
+		metafreenamelist(nlp);
+	if (fval == 0)
+		exit(0);
+	return (rval);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_rename.c b/usr/src/lib/lvm/libmeta/common/meta_rename.c
new file mode 100644
index 0000000000..617b3f3694
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_rename.c
@@ -0,0 +1,539 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+/*
+ * Just in case we're not in a build environment, make sure that
+ * TEXT_DOMAIN gets set to something.
+ */
+#if !defined(TEXT_DOMAIN)
+#define	TEXT_DOMAIN "SYS_TEST"
+#endif
+
+/*
+ * change the identity of a metadevice
+ * These are the "do it" functions for the metarename command.
+ */
+
+#include <string.h>
+#include <meta.h>
+#include <sys/lvm/md_rename.h>
+
+/* private */
+#define	FORCE	(0x00000001)
+#define	NOISY	(0x00000010)
+#define	NOFLIP	(0x00000020)
+#define	DRYRUN	(0x00000040)
+
+#define	OP_STR(op)						\
+	((op) == MDRNOP_EXCHANGE?	"exchange":		\
+	    (op) == MDRNOP_RENAME?	"rename":		\
+	    (op) == MDRNOP_UNK?		"<unknown>": "garbage")
+
+
+/*
+ * Check if from_np is open
+ * Return 0 if not open, -1 if open
+ */
+static int
+check_open(
+	mdsetname_t	*sp,
+	mdname_t	*from_np,
+	md_error_t	*ep)
+{
+	int		rc;
+
+	if ((rc = meta_isopen(sp, from_np, ep, (mdcmdopts_t)0)) < 0) {
+		assert(!mdisok(ep));
+		return (-1);
+
+	} else if (rc > 0) {
+		if (mdisok(ep)) {
+			(void) mdmderror(ep, MDE_RENAME_BUSY,
+				meta_getminor(from_np->dev),
+				from_np->cname);
+		}
+		return (-1);
+	}
+	return (0);
+}
+
+/*
+ * meta_swap is the common code used by the
+ * meta_rename() and meta_exchange() entry points
+ */
+
+static int
+meta_swap(
+	mdsetname_t	*sp,
+	mdname_t	*from_np,
+	mdname_t	*to_np,
+	md_renop_t	op,
+	int		flags,
+	md_error_t	*ep)
+{
+	md_rename_t	txn;
+
+	/*
+	 * If the device exists a key may already exist so need to find it
+	 * otherwise we'll end up adding the key in again which will lead
+	 * to an inconsistent n_count for the namespace record.
+	 */
+	if (from_np->dev != NODEV) {
+		(void) meta_getnmentbydev(sp->setno, MD_SIDEWILD, from_np->dev,
+		    NULL, NULL, &from_np->key, ep);
+	}
+
+	if ((from_np->key == MD_KEYWILD) || (from_np->key == MD_KEYBAD)) {
+		if (add_key_name(sp, from_np, NULL, ep) != 0) {
+			assert(!mdisok(ep));
+			return (-1);
+		}
+	}
+
+	(void) memset(&txn, 0, sizeof (txn));
+
+	txn.op		= op;
+	txn.revision	= MD_RENAME_VERSION;
+	txn.flags	= 0;
+	txn.from.mnum	= meta_getminor(from_np->dev);
+	txn.from.key	= from_np->key;
+
+	if ((txn.from.key == MD_KEYBAD) || (txn.from.key == MD_KEYWILD)) {
+		(void) mdmderror(ep, MDE_RENAME_SOURCE_BAD, txn.from.mnum,
+								from_np->cname);
+		return (-1);
+	}
+
+	if ((to_np->key == MD_KEYWILD) || (to_np->key == MD_KEYBAD)) {
+		if (add_key_name(sp, to_np, NULL, ep) != 0) {
+			assert(!mdisok(ep));
+			return (-1);
+		}
+	}
+
+	txn.to.mnum	= meta_getminor(to_np->dev);
+	txn.to.key	= to_np->key;
+
+	if ((txn.to.key == MD_KEYBAD) || (txn.to.key == MD_KEYWILD)) {
+		(void) mdmderror(ep, MDE_RENAME_TARGET_BAD, txn.to.mnum,
+								to_np->cname);
+		return (-1);
+	}
+
+	if (flags & NOISY) {
+		(void) fprintf(stderr, "\top: %s\n", OP_STR(txn.op));
+		(void) fprintf(stderr, "\trevision: %d, flags: %d\n",
+				txn.revision, txn.flags);
+		(void) fprintf(stderr,
+				"\tfrom(mnum,key): %ld, %d\tto: %ld, %d\n",
+				txn.from.mnum, txn.from.key,
+				txn.to.mnum, txn.to.key);
+	}
+
+	mdclrerror(ep);
+	if (metaioctl(MD_IOCRENAME, &txn, &txn.mde, from_np->cname) != 0) {
+		(void) del_key_name(sp, to_np, ep);
+		return (mdstealerror(ep, &txn.mde));
+	}
+
+	/* force the name cache to re-read device state */
+	meta_invalidate_name(from_np);
+	meta_invalidate_name(to_np);
+
+	return (0);
+}
+
+/*
+ * rename a metadevice
+ */
+int
+meta_rename(
+	mdsetname_t	*sp,
+	mdname_t	*from_np,
+	mdname_t	*to_np,
+	mdcmdopts_t	 options,
+	md_error_t	*ep
+)
+{
+	int		 flags		= (options & MDCMD_FORCE)? FORCE: 0;
+	int		 rc		= 0;
+	mdcinfo_t	*cinfop;
+	char		*p;
+	md_set_desc	*sd;
+	mdkey_t		 side_key = MD_KEYWILD;
+	md_error_t	 dummy_ep = mdnullerror;
+	int		 i, j;
+	md_mnnode_desc	*nd, *nd_del;
+
+	/* must have a set */
+	assert(sp != NULL);
+	assert(sp->setno == MD_MIN2SET(meta_getminor(from_np->dev)));
+
+	mdclrerror(ep);
+
+	if (((p = getenv("MD_DEBUG")) != NULL) &&
+	    (strstr(p, "RENAME") != NULL)) {
+		flags |= NOISY;
+	}
+	/* if DOIT is not set, we are in dryrun mode */
+	if ((options & MDCMD_DOIT) == 0) {
+		flags |= DRYRUN;
+	}
+
+
+	if (metachkmeta(from_np, ep) != 0) {
+		assert(!mdisok(ep));
+		return (-1);
+	}
+
+	mdclrerror(ep);
+
+	if (meta_get_mdunit(sp, from_np, ep) == NULL) {
+		assert(!mdisok(ep));
+		return (-1);
+	}
+
+	if (meta_get_mdunit(sp, to_np, ep) != NULL) {
+		if (mdisok(ep)) {
+			(void) mdmderror(ep, MDE_UNIT_ALREADY_SETUP,
+					meta_getminor(to_np->dev),
+					to_np->cname);
+		}
+		return (-1);
+	}
+	mdclrerror(ep);
+
+	/* If FORCE is not set, check if metadevice is open */
+	if (!(flags & FORCE)) {
+		if (check_open(sp, from_np, ep) != 0) {
+			return (-1);
+		}
+	}
+
+	/*
+	 * All checks are done, now we do the real work.
+	 * If we are in dryrun mode, we're done.
+	 */
+	if (flags & DRYRUN) {
+		return (0); /* success */
+	}
+
+	/*
+	 * add key for new name to the namespace
+	 */
+	if ((cinfop = metagetcinfo(from_np, ep)) == NULL) {
+		assert(!mdisok(ep));
+		return (-1);
+	}
+
+	if (metaislocalset(sp)) {
+		to_np->key = add_name(sp, MD_SIDEWILD, MD_KEYWILD,
+		    cinfop->dname, meta_getminor(to_np->dev), to_np->bname, ep);
+	} else {
+		/*
+		 * As this is not the local set we have to create a namespace
+		 * record for each side (host) in the set. We cannot use
+		 * add_key_names() because the destination device (to_np)
+		 * should not exist and so the subsequent metagetcinfo()
+		 * call will fail when it tries to open the device, so we
+		 * have to use the information from the source device (from_np)
+		 */
+		if ((sd = metaget_setdesc(sp, ep)) == (md_set_desc *)NULL) {
+			return (-1);
+		}
+		to_np->key = MD_KEYWILD;
+
+		if (MD_MNSET_DESC(sd)) {
+			nd = sd->sd_nodelist;
+			while (nd) {
+				side_key = add_name(sp, (side_t)nd->nd_nodeid,
+				    to_np->key, cinfop->dname,
+				    meta_getminor(to_np->dev),
+				    to_np->bname, ep);
+				/*
+				 * Break out if failed to add the key,
+				 * but delete any name space records that
+				 * were added.
+				 */
+				if (side_key == MD_KEYBAD ||
+				    side_key == MD_KEYWILD) {
+					/*
+					 * If we have a valid to_np->key then
+					 * a record was added correctly but
+					 * we do not know for which side, so
+					 * we need to try to delete all of them.
+					 */
+
+					if (to_np->key != MD_KEYBAD &&
+					    to_np->key != MD_KEYWILD) {
+						nd_del = sd->sd_nodelist;
+						while ((nd_del != nd) &&
+						(nd_del != NULL)) {
+						    (void) del_name(sp,
+						    (side_t)nd_del->nd_nodeid,
+						    to_np->key, &dummy_ep);
+						    nd_del = nd_del->nd_next;
+						}
+						/* preserve error key state */
+						to_np->key = side_key;
+					}
+					break;
+				}
+				to_np->key = side_key;
+				nd = nd->nd_next;
+			}
+		} else {
+			for (i = 0; i < MD_MAXSIDES; i++) {
+				if (sd->sd_nodes[i][0] != '\0') {
+					side_key = add_name(sp, (side_t)i,
+					    to_np->key, cinfop->dname,
+					    meta_getminor(to_np->dev),
+					    to_np->bname, ep);
+					/*
+					 * Break out if failed to add the key,
+					 * but delete any name space records
+					 * that were added.
+					 */
+					if (side_key == MD_KEYBAD ||
+					    side_key == MD_KEYWILD) {
+						/*
+						 * If we have a valid
+						 * to_np->key then a record was
+						 * added correctly but we do
+						 * not know for which side, so
+						 * we need to try to delete
+						 * all of them.
+						 */
+						if (to_np->key != MD_KEYBAD &&
+						    to_np->key != MD_KEYWILD) {
+							for (j = 0; j < i;
+							    j++) {
+							    (void) del_name(sp,
+							    (side_t)j,
+							    to_np->key,
+							    &dummy_ep);
+							}
+							/*
+							 * preserve err
+							 * key state
+							 */
+							to_np->key = side_key;
+						}
+						break;
+					}
+					to_np->key = side_key;
+				}
+			}
+		}
+	}
+
+	if (to_np->key == MD_KEYBAD || to_np->key == MD_KEYWILD) {
+		assert(!mdisok(ep));
+		return (-1);
+	}
+
+	rc = meta_swap(sp, from_np, to_np, MDRNOP_RENAME, flags, ep);
+
+	if (rc == 0) {
+		if (options & MDCMD_PRINT) {
+			(void) fprintf(stdout, dgettext(TEXT_DOMAIN,
+				"%s: has been renamed to %s\n"),
+				from_np->cname, to_np->cname);
+		}
+	}
+
+	return (rc);
+}
+
+/*
+ * return TRUE if current <from>, <to> ordering would
+ * prevent <from> from being in the role of <self>
+ */
+static bool_t
+meta_exchange_need_to_flip(
+	md_common_t	*from_mdp,
+	md_common_t	*to_mdp
+)
+{
+	assert(from_mdp);
+	assert(to_mdp);
+
+	/*
+	 * ?
+	 *  \
+	 * <to>
+	 *    \
+	 *    <from>
+	 */
+
+	if (MD_HAS_PARENT(from_mdp->parent)) {
+		if (MD_HAS_PARENT(to_mdp->parent)) {
+			if (from_mdp->parent ==
+				meta_getminor(to_mdp->namep->dev)) {
+				return (TRUE);
+			}
+		}
+	}
+
+	/*
+	 * <from>
+	 *    \
+	 *    <to>
+	 *      \
+	 *	 ?
+	 */
+
+	if (MD_HAS_PARENT(to_mdp->parent)) {
+		if (to_mdp->capabilities & MD_CAN_META_CHILD) {
+			return (TRUE);
+		}
+	}
+
+	/*
+	 * <to>
+	 *   \
+	 *  <from>
+	 */
+
+	if (MD_HAS_PARENT(from_mdp->parent)) {
+		if (from_mdp->parent == meta_getminor(to_mdp->namep->dev)) {
+			if (!(from_mdp->capabilities & MD_CAN_META_CHILD)) {
+				return (TRUE);
+			}
+		}
+	}
+
+	/*
+	 * <from>	or	<to>
+	 *   \			  \
+	 *  <to>		<from>
+	 *			    \
+	 *			    ?
+	 */
+
+	return (FALSE);
+}
+
+/*
+ * exchange the names of two metadevices
+ */
+int
+meta_exchange(
+	mdsetname_t	*sp,
+	mdname_t	*from_np,
+	mdname_t	*to_np,
+	mdcmdopts_t	 options,
+	md_error_t	*ep
+)
+{
+	int		 flags	= (options & MDCMD_FORCE)? FORCE: 0;
+	md_common_t	*from_mdp, *to_mdp;
+	int		 rc;
+	char		*p, *p2;
+
+	/* must have a set */
+	assert(sp != NULL);
+	assert(sp->setno == MD_MIN2SET(meta_getminor(from_np->dev)));
+	assert(sp->setno == MD_MIN2SET(meta_getminor(to_np->dev)));
+
+	if (metachkmeta(from_np, ep) != 0) {
+		assert(!mdisok(ep));
+		return (-1);
+	}
+
+	if (metachkmeta(to_np, ep) != 0) {
+		assert(!mdisok(ep));
+		return (-1);
+	}
+
+	if ((options & MDCMD_DOIT) == 0) {
+		flags |= DRYRUN;
+	}
+
+	if ((p = getenv("MD_DEBUG")) != NULL) {
+		if ((p2 = strstr(p, "EXCHANGE=")) != NULL) {
+			flags |= NOISY;
+			if ((p2 = strchr(p2, '=')) != NULL) {
+				if (strcmp((p2+1), "NOFLIP") == 0) {
+					flags |= NOFLIP;
+				}
+			}
+		} else if (strstr(p, "EXCHANGE") != NULL) {
+			flags |= NOISY;
+		}
+	}
+
+	if ((from_mdp = meta_get_unit(sp, from_np, ep)) == NULL) {
+		assert(!mdisok(ep));
+		return (-1);
+	}
+
+	if ((to_mdp = meta_get_unit(sp, to_np, ep)) == NULL) {
+		assert(!mdisok(ep));
+		return (-1);
+	}
+	assert(mdisok(ep));
+
+	/* If FORCE is not set, check if metadevice is open */
+	if (!(flags & FORCE)) {
+		if (check_open(sp, from_np, ep) != 0) {
+			return (-1);
+		}
+	}
+
+	/*
+	 * All checks are done, now we do the real work.
+	 * If we are in dryrun mode, we're done.
+	 */
+	if (flags & DRYRUN) {
+		return (0); /* success */
+	}
+
+	/*
+	 * NOFLIP is used only for debugging; the driver
+	 * will catch this and return MDE_RENAME_ORDER, if necessary
+	 */
+	if (((flags & NOFLIP) == 0) &&
+	    meta_exchange_need_to_flip(from_mdp, to_mdp)) {
+
+		rc = meta_swap(sp, to_np, from_np, MDRNOP_EXCHANGE, flags, ep);
+
+	} else {
+		rc = meta_swap(sp, from_np, to_np, MDRNOP_EXCHANGE, flags, ep);
+	}
+
+	if (rc == 0) {
+		if (options & MDCMD_PRINT) {
+			(void) fprintf(stdout, dgettext(TEXT_DOMAIN,
+				"%s and %s have exchanged identities\n"),
+				from_np->cname, to_np->cname);
+		}
+	}
+
+	return (rc);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_repartition.c b/usr/src/lib/lvm/libmeta/common/meta_repartition.c
new file mode 100644
index 0000000000..16bf7ea597
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_repartition.c
@@ -0,0 +1,415 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include <stdio.h>
+#include <meta.h>
+#include "meta_repartition.h"
+
+
+
+/*
+ * FUNCTION:	meta_replicaslice()
+ * INPUT:	dnp	- the name of the drive to check
+ * OUTPUT:	slicep	- pointer to slice number
+ *		ep	- pointer to an md_error_t structure in which
+ *			  to return errors to the caller
+ * RETURNS:	int	-  0 - value pointed to by slicep is valid
+ *			  -1 - otherwise
+ *
+ * PURPOSE:	Determine which slice of the specified drive to
+ *		reserve, presumably for metadb replica usage.
+ *
+ * NOTE:	If slicep is NULL, the return code will indicate
+ *		whether or not the slice number could be determined
+ */
+int
+meta_replicaslice(
+	mddrivename_t	*dnp,
+	uint_t		*slicep,
+	md_error_t	*ep
+)
+{
+	int		err = 0;
+	int		ioctl_return;
+	int		fd;
+	char		*rname;
+	struct dk_geom	geom;
+
+	rname = dnp->rname;
+	if ((fd = open(rname, (O_RDONLY|O_NDELAY), 0)) < 0) {
+		char	*n;
+		int	open_errno;
+		size_t	len;
+
+		if (errno != ENOENT)
+			return (mdsyserror(ep, errno, rname));
+
+		len = strlen(rname) + 3;
+		n = Zalloc(len);
+		(void) snprintf(n, len, "%ss0", rname);
+		fd = open(n, (O_RDONLY|O_NDELAY), 0);
+		open_errno = errno;
+		Free(n);
+		if (fd < 0) {
+			return (mdsyserror(ep, open_errno, rname));
+		}
+	}
+
+	/*
+	 * if our drivenamep points to a device not supporting
+	 * DKIOCGGEOM, we have an EFI label.
+	 */
+	errno = 0;
+	ioctl_return = ioctl(fd, DKIOCGGEOM, &geom);
+	err = errno;
+
+	(void) close(fd);
+
+	/*
+	 * If the DKIOCGGEOM ioctl succeeded, then the device has a
+	 * VTOC style label.  In this case, we use slice 7.
+	 */
+	if (ioctl_return == 0) {
+		if (slicep != NULL) {
+			*slicep = MD_SLICE7;
+		}
+		return (0);
+	}
+
+	/*
+	 * ENOTSUP indicates an EFI style label, in which case slice 7
+	 * cannot be used because its minor number is reserved.  In
+	 * this case, use slice 6.
+	 */
+	if (err == ENOTSUP) {
+		if (slicep != NULL) {
+			*slicep = MD_SLICE6;
+		}
+		return (0);
+	}
+
+	/*
+	 * Those are the only two cases we know how to deal with;
+	 * either the drivenamep didn't point to a disk, or the ioctl
+	 * failed for some other reason.
+	 */
+	if (err == ENOTTY) {
+		return (mddeverror(ep, MDE_NOT_DISK, NODEV, rname));
+	}
+
+	return (mdsyserror(ep, err, rname));
+}
+
+
+
+/*
+ * FUNCTION:	meta_repartition_drive()
+ * INPUT:	sp	- the set name for the device to check
+ *		dnp	- the name of the drive to partition
+ *              options - options (see NOTES)
+ * OUTPUT:	vtocp	- pointer to an mdvtoc_t structure in which
+ *			  to return the new VTOC to the caller
+ *		ep	- pointer to an md_error_t structure in which
+ *			  to return errors to the caller
+ * RETURNS:	int	-  0 - drive was or can be repartitioned
+ *			  -1 - drive could not or should not be
+ *			       repartitioned
+ * PURPOSE:	Repartition a disk for use in a disk set or in order
+ *		to create soft partitions on it.  Alternatively,
+ *		return the VTOC that the disk would have if it were
+ *		repartitioned without actually repartitioning it.
+ *
+ * NOTES:
+ *
+ *     This routine will repartition a drive to make it suitable for
+ *     inclusion in a diskset.  Specifically, it will create a
+ *     proposed VTOC that specifies a replica slice that begins at the
+ *     first valid lba, is large enough to hold a label and a metadb
+ *     replica, does not overlap any other slices, and is unmountable.
+ *     If the current replica slice already satisfies those criteria,
+ *     the routine will neither create a proposed VTOC nor repartition
+ *     the drive unless the MD_REPART_FORCE flag is passed into the
+ *     routine in the options argument.  If the routine does create a
+ *     proposed VTOC, it will return the proposed VTOC in *vtocp if
+ *     vtocp isn't NULL.
+ *
+ *     The slice to be used as the replica slice is determined by the
+ *     function meta_replicaslice().
+ *
+ *     If the replica slice does not satisfy the above criteria or the
+ *     MD_REPART_FORCE flag is set, the proposed VTOC will specify a
+ *     replica slice that satisfies the above criteria, a slice zero
+ *     that contains the remaining space on the disk, and no other
+ *     slices.  If that repartitioning would cause the replica slice
+ *     to move or shrink, and the MD_REPART_LEAVE_REP option is set,
+ *     the routine will return -1 without creating or returning a
+ *     proposed vtoc, and without repartitioning the disk.  Otherwise
+ *     the routine will repartition the disk unless the
+ *     MD_REPART_DONT_LABEL flag is set in the options argument.
+ *
+ *     If the MD_REPART_DONT_LABEL flag is set in the options argument,
+ *     but the routine would otherwise repartition the drive, the
+ *     routine won't repartition the drive, but will create a proposed
+ *     VTOC that satisfies the criteria defined above and return it
+ *     it in *vtocp if vtocp isn't NULL,  The MD_REPART_DONT_LABEL
+ *     option allows calling routines to determine what the contents of
+ *     the drive's VTOC would be if the drive were repartitioned without
+ *     actually repartitioning the drive.
+ */
+int
+meta_repartition_drive(
+	mdsetname_t	*sp,
+	mddrivename_t	*dnp,
+	int		options,
+	mdvtoc_t	*vtocp,
+	md_error_t	*ep
+)
+{
+	uint_t			 replicaslice;
+	diskaddr_t		 first_lba, last_lba;
+	int			 round_sizes = 1;
+	unsigned long long	 cylsize;
+	unsigned long long	 drvsize;
+	int			 i;
+	mdgeom_t		*mdgp;
+	mdvtoc_t		*mdvp;
+	mdvtoc_t		 proposed_vtoc;
+	uint_t			 reservedcyl;
+	ushort_t		 resflag;
+	mdname_t		*resnp;
+	unsigned long long	 ressize;
+	md_set_desc		*sd;
+	daddr_t			 dbsize;
+	diskaddr_t		 replica_start;
+	diskaddr_t		 replica_size;
+	diskaddr_t		 replica_end;
+	diskaddr_t		 data_start;
+	diskaddr_t		 data_size;
+
+	if (meta_replicaslice(dnp, &replicaslice, ep) != 0) {
+		return (-1);
+	}
+
+	/* Don't round for EFI disks */
+	if (replicaslice == MD_SLICE6)
+		round_sizes = 0;
+
+	/*
+	 * We took as argument a drive name pointer, but we need a
+	 * slice name pointer to retrieve vtoc information.  So get
+	 * the name pointer for slice zero first, then use it to get
+	 * the vtoc info for the disk.
+	 */
+	if ((resnp = metaslicename(dnp, MD_SLICE0, ep)) == NULL)
+		return (-1);
+
+	if ((mdvp = metagetvtoc(resnp, FALSE, NULL, ep)) == NULL)
+		return (-1);
+
+	/*
+	 * Determine the metadb size.
+	 */
+	dbsize = MD_DBSIZE;
+	if (!metaislocalset(sp)) {
+		if ((sd = metaget_setdesc(sp, ep)) == NULL)
+			return (-1);
+
+		if (MD_MNSET_DESC(sd))
+			dbsize = MD_MN_DBSIZE;
+	}
+
+	/* If we've got an efi disk, we better have lba info */
+	first_lba = mdvp->first_lba;
+	last_lba = mdvp->last_lba;
+	ASSERT((round_sizes != 0) || (last_lba > 0));
+
+	/*
+	 * At this point, ressize is used as a minimum value.  Later
+	 * it will be rounded up to a cylinder boundary if
+	 * appropriate.  ressize is in units of disk sectors.
+	 */
+	ressize = dbsize + VTOC_SIZE;
+	resflag = V_UNMNT;
+
+	/*
+	 * If we're forcing the repartition, we can skip the replica
+	 * slice and overlap tests.
+	 */
+	if (options & MD_REPART_FORCE) {
+		goto do_repartition;
+	}
+
+	/*
+	 * Replica slice tests: it must begin at first_lba, be long
+	 * enough, have the right flags, and not overlap any other
+	 * slices.  If any of these conditions is violated, we need to
+	 * repartition the disk.
+	 */
+	if (mdvp->parts[replicaslice].start != first_lba) {
+		goto do_repartition;
+	}
+
+	if (mdvp->parts[replicaslice].size < ressize) {
+		goto do_repartition;
+	}
+
+	if (mdvp->parts[replicaslice].flag != resflag) {
+		goto do_repartition;
+	}
+
+	/*
+	 * Check for overlap: this test should use the actual size of
+	 * the replica slice, as contained in the vtoc, and NOT the
+	 * minimum size calculated above.
+	 */
+	replica_end = first_lba + mdvp->parts[replicaslice].size;
+	for (i = 0; i < mdvp->nparts; i++) {
+		if (i != replicaslice) {
+			if ((mdvp->parts[i].size > 0) &&
+			    (mdvp->parts[i].start < replica_end)) {
+				goto do_repartition;
+			}
+		}
+	}
+
+	/*
+	 * If we passed the above tests, then the disk is already
+	 * partitioned appropriately, and we're not being told to
+	 * force a change.
+	 */
+	return (0);
+
+do_repartition:
+
+	/* Retrieve disk geometry info and round to cylinder sizes */
+	if (round_sizes != 0) {
+
+		if ((mdgp = metagetgeom(resnp, ep)) == NULL)
+			return (-1);
+
+		/*
+		 * Both cylsize and drvsize are in units of disk
+		 * sectors.
+		 *
+		 * The intended results are of type unsigned long
+		 * long.  Since each operand of the first
+		 * multiplication is of type unsigned int, we risk
+		 * overflow by multiplying and then converting the
+		 * result.  Therefore we explicitly cast (at least)
+		 * one of the operands, forcing conversion BEFORE
+		 * multiplication, and avoiding overflow.  The second
+		 * assignment is OK, since one of the operands is
+		 * already of the desired type.
+		 */
+		cylsize =
+		    ((unsigned long long)mdgp->nhead) * mdgp->nsect;
+		drvsize = cylsize * mdgp->ncyl;
+
+		/*
+		 * How many cylinders must we reserve for the replica
+		 * slice to ensure that it meets the previously
+		 * calculated minimum size?
+		 */
+		reservedcyl = (ressize + cylsize - 1) / cylsize;
+		ressize = reservedcyl * cylsize;
+	} else {
+		drvsize = last_lba - first_lba;
+	}
+
+	/* Would this require a forbidden change? */
+	if (options & MD_REPART_LEAVE_REP) {
+		if ((mdvp->parts[replicaslice].start != first_lba) ||
+		    (mdvp->parts[replicaslice].size < ressize)) {
+			return (mddeverror(ep, MDE_REPART_REPLICA,
+			    resnp->dev, NULL));
+		}
+	}
+
+	/*
+	 * It seems unlikely that someone would pass us too small a
+	 * disk, but it's still worth checking for...
+	 */
+	if (((round_sizes != 0) && (reservedcyl >= (int)mdgp->ncyl)) ||
+	    ((round_sizes == 0) && (ressize + first_lba >= last_lba))) {
+		return (mdmddberror(ep, MDE_DB_TOOSMALL,
+		    meta_getminor(resnp->dev), sp->setno, 0, NULL));
+	}
+
+	replica_start = first_lba;
+	replica_size = ressize;
+	data_start = first_lba + ressize;
+	data_size = drvsize - ressize;
+
+	/*
+	 * Create the proposed VTOC.  First copy the current VTOC
+	 * into the proposed VTOC to duplicate the values that don't
+	 * need to change.  Then change the partition table and set
+	 * the flag value for the replica slice to resflag to reserve it
+	 * for metadata.
+	 */
+	proposed_vtoc = *mdvp;
+	/* We need at least replicaslice partitions in the proposed vtoc */
+	if (replicaslice >= proposed_vtoc.nparts) {
+		proposed_vtoc.nparts = replicaslice + 1;
+	}
+	for (i = 0; i < proposed_vtoc.nparts; i++) {
+		/* don't change the reserved partition of an EFI device */
+		if (proposed_vtoc.parts[i].tag == V_RESERVED)
+			data_size = proposed_vtoc.parts[i].start - data_start;
+		else
+			(void) memset(&proposed_vtoc.parts[i], '\0',
+				sizeof (proposed_vtoc.parts[i]));
+	}
+
+	proposed_vtoc.parts[MD_SLICE0].start = data_start;
+	proposed_vtoc.parts[MD_SLICE0].size = data_size;
+	proposed_vtoc.parts[MD_SLICE0].tag = V_USR;
+	proposed_vtoc.parts[replicaslice].start = replica_start;
+	proposed_vtoc.parts[replicaslice].size = replica_size;
+	proposed_vtoc.parts[replicaslice].flag = resflag;
+	proposed_vtoc.parts[replicaslice].tag = V_USR;
+
+	if (!(options & MD_REPART_DONT_LABEL)) {
+		/*
+		 * Label the disk with the proposed VTOC.
+		 */
+		*mdvp = proposed_vtoc;
+		if (metasetvtoc(resnp, ep) != 0) {
+			return (-1);
+		}
+	}
+
+	if (vtocp != NULL) {
+		/*
+		 * Return the proposed VTOC.
+		 */
+		*vtocp = proposed_vtoc;
+	}
+
+	return (0);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_replace.c b/usr/src/lib/lvm/libmeta/common/meta_replace.c
new file mode 100644
index 0000000000..3165bd0d53
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_replace.c
@@ -0,0 +1,144 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2003 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+/*
+ * replace components in metadevices
+ */
+
+#include <meta.h>
+#include <sys/lvm/md_stripe.h>
+
+int
+meta_replace(mdsetname_t *sp, mdname_t *metanp, mdname_t *oldnp,
+    mdname_t *newnp, char *uname, mdcmdopts_t options, md_error_t *ep)
+{
+	char			*miscname;
+
+	assert(sp != NULL);
+
+	if (is_hspname(uname)) {
+		mdhspname_t	*hspnp;
+
+		if ((hspnp = metahspname(&sp, uname, ep)) == NULL)
+			return (-1);
+		assert(sp != NULL);
+		(void) meta_hs_replace(sp, hspnp, oldnp, newnp, options, ep);
+		return (0);
+	}
+	assert(sp->setno == MD_MIN2SET(meta_getminor(metanp->dev)));
+	if (metachkmeta(metanp, ep) != 0)
+		return (-1);
+
+
+	if ((miscname = metagetmiscname(metanp, ep)) == NULL)
+		return (-1);
+
+	if (strcmp(miscname, MD_RAID) == 0) {
+		return (meta_raid_replace(sp, metanp, oldnp, newnp,
+		    options, ep));
+	} else if (strcmp(miscname, MD_TRANS) == 0) {
+		return (meta_trans_replace(sp, metanp, oldnp, newnp,
+		    options, ep));
+	} else if (strcmp(miscname, MD_STRIPE) == 0) {
+		return (meta_stripe_replace(sp, metanp, oldnp, newnp,
+		    options, ep));
+	}
+
+	return (mdmderror(ep, MDE_UNKNOWN_TYPE, meta_getminor(metanp->dev),
+	    metanp->cname));
+}
+/*
+ * replace named device
+ */
+int
+meta_replace_byname(
+	mdsetname_t	*sp,
+	mdname_t	*np,
+	mdname_t	*oldnp,
+	mdname_t	*newnp,
+	mdcmdopts_t	options,
+	md_error_t	*ep
+)
+{
+	char		*miscname;
+
+	/* should have a set */
+	assert(sp != NULL);
+	assert(sp->setno == MD_MIN2SET(meta_getminor(np->dev)));
+
+	/* get type */
+	if (metachkmeta(np, ep) != 0)
+		return (-1);
+	if ((miscname = metagetmiscname(np, ep)) == NULL)
+		return (-1);
+
+	/* dispatch */
+	if (strcmp(miscname, MD_RAID) == 0) {
+		return (meta_raid_replace(sp, np, oldnp, newnp, options, ep));
+	} else if (strcmp(miscname, MD_MIRROR) == 0) {
+		return (meta_mirror_replace(sp, np, oldnp, newnp, options, ep));
+	} else {
+		return (mdmderror(ep, MDE_UNKNOWN_TYPE, meta_getminor(np->dev),
+		    np->cname));
+	}
+}
+
+/*
+ * enable named device
+ */
+int
+meta_enable_byname(
+	mdsetname_t	*sp,
+	mdname_t	*np,
+	mdname_t	*compnp,
+	mdcmdopts_t	options,
+	md_error_t	*ep
+)
+{
+	char		*miscname;
+
+	/* should have a set */
+	assert(sp != NULL);
+	assert(sp->setno == MD_MIN2SET(meta_getminor(np->dev)));
+
+	/* get type */
+	if (metachkmeta(np, ep) != 0)
+		return (-1);
+	if ((miscname = metagetmiscname(np, ep)) == NULL)
+		return (-1);
+
+	/* dispatch */
+	if (strcmp(miscname, MD_RAID) == 0) {
+		return (meta_raid_enable(sp, np, compnp, options, ep));
+	} else if (strcmp(miscname, MD_MIRROR) == 0) {
+		return (meta_mirror_enable(sp, np, compnp, options, ep));
+	} else {
+		return (mdmderror(ep, MDE_UNKNOWN_TYPE, meta_getminor(np->dev),
+		    np->cname));
+	}
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_reset.c b/usr/src/lib/lvm/libmeta/common/meta_reset.c
new file mode 100644
index 0000000000..ae04edce5d
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_reset.c
@@ -0,0 +1,146 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+/*
+ * clear metadevices
+ */
+
+#include <meta.h>
+
+/*
+ * clear a metadevice.
+ */
+int
+meta_reset(
+	mdsetname_t	*sp,
+	mdname_t	*np,
+	mdcmdopts_t	options,
+	md_error_t	*ep
+)
+{
+	char		*miscname;
+	md_i_reset_t	mir;
+
+	/* should have a set */
+	assert(sp != NULL);
+	assert(sp->setno == MD_MIN2SET(meta_getminor(np->dev)));
+	/* clear device */
+	if ((miscname = metagetmiscname(np, ep)) == NULL)
+		return (-1);
+	if (meta_isopen(sp, np, ep, options) != 0) {
+		return (mdmderror(ep, MDE_IS_OPEN, meta_getminor(np->dev),
+			np->cname));
+	}
+	(void) memset(&mir, '\0', sizeof (mir));
+	MD_SETDRIVERNAME(&mir, miscname, sp->setno);
+	mir.mnum = meta_getminor(np->dev);
+	mir.force = (options & MDCMD_FORCE) ? 1 : 0;
+	if (metaioctl(MD_IOCRESET, &mir, &mir.mde, np->cname) != 0)
+		return (mdstealerror(ep, &mir.mde));
+
+	/* return success */
+	return (0);
+}
+
+/*
+ * reset all the metadevice and hotspares
+ */
+int
+meta_reset_all(
+	mdsetname_t	*sp,
+	mdcmdopts_t	options,
+	md_error_t	*ep
+)
+{
+	options |= MDCMD_RECURSE;
+
+	/*
+	 * since soft partitions can appear at the top and bottom
+	 * of the stack, we call meta_sp_reset twice to handle all
+	 * cases.
+	 */
+	if (meta_trans_reset(sp, NULL, options, ep) != 0)
+		return (-1);
+	if (meta_sp_reset(sp, NULL, options, ep) != 0)
+		return (-1);
+	if (meta_raid_reset(sp, NULL, options, ep) != 0)
+		return (-1);
+	if (meta_mirror_reset(sp, NULL, options, ep) != 0)
+		return (-1);
+	if (meta_stripe_reset(sp, NULL, options, ep) != 0)
+		return (-1);
+	if (meta_hsp_reset(sp, NULL, options, ep) != 0)
+		return (-1);
+	if (meta_sp_reset(sp, NULL, options, ep) != 0)
+		return (-1);
+
+	return (0);
+}
+
+/*
+ * reset named device
+ */
+int
+meta_reset_by_name(
+	mdsetname_t	*sp,
+	mdname_t	*np,
+	mdcmdopts_t	options,
+	md_error_t	*ep
+)
+{
+	char		*miscname;
+	int		rval = 0;
+
+	/* should have a set */
+	assert(sp != NULL);
+	assert(sp->setno == MD_MIN2SET(meta_getminor(np->dev)));
+
+	/* get type */
+	if (metachkmeta(np, ep) != 0)
+		return (-1);
+	if ((miscname = metagetmiscname(np, ep)) == NULL)
+		return (-1);
+	/* dispatch */
+	if (strcmp(miscname, MD_STRIPE) == 0) {
+		rval = meta_stripe_reset(sp, np, options, ep);
+	} else if (strcmp(miscname, MD_MIRROR) == 0) {
+		rval = meta_mirror_reset(sp, np, options, ep);
+	} else if (strcmp(miscname, MD_TRANS) == 0) {
+		rval = meta_trans_reset(sp, np, options, ep);
+	} else if (strcmp(miscname, MD_RAID) == 0) {
+		rval = meta_raid_reset(sp, np, options, ep);
+	} else if (strcmp(miscname, MD_SP) == 0) {
+		rval = meta_sp_reset(sp, np, options, ep);
+	} else {
+		rval = mdmderror(ep, MDE_UNKNOWN_TYPE, meta_getminor(np->dev),
+		    np->cname);
+	}
+
+	/* cleanup */
+	return (rval);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_resync.c b/usr/src/lib/lvm/libmeta/common/meta_resync.c
new file mode 100644
index 0000000000..b57dfb1197
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_resync.c
@@ -0,0 +1,108 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+/*
+ * mirror operations
+ */
+
+#include <meta.h>
+#include <sdssc.h>
+
+/*
+ * resync named device
+ */
+int
+meta_resync_byname(
+	mdsetname_t	*sp,
+	mdname_t	*np,
+	daddr_t		size,
+	md_error_t	*ep,
+	md_resync_cmd_t	cmd	/* action to perform */
+)
+{
+	char		*miscname;
+
+	/* should have a set */
+	assert(sp != NULL);
+	assert(sp->setno == MD_MIN2SET(meta_getminor(np->dev)));
+
+	/* get type */
+	if (metachkmeta(np, ep) != 0)
+		return (-1);
+	if ((miscname = metagetmiscname(np, ep)) == NULL)
+		return (-1);
+
+	/* dispatch */
+	if (strcmp(miscname, MD_RAID) == 0) {
+		return (meta_raid_resync(sp, np, size, ep));
+	} else if (strcmp(miscname, MD_MIRROR) == 0) {
+		return (meta_mirror_resync(sp, np, size, ep, cmd));
+	} else {
+		return (mdmderror(ep, MDE_UNKNOWN_TYPE, meta_getminor(np->dev),
+		    np->cname));
+	}
+}
+
+/*
+ * resync all devices
+ */
+int
+meta_resync_all(
+	mdsetname_t	*sp,
+	daddr_t		size,
+	md_error_t	*ep
+)
+{
+	int		rval = 0;
+	md_set_desc	*sd;
+
+	/* see if we have any databases */
+	if (meta_setup_db_locations(ep) != 0) {
+		if (mdismddberror(ep, MDE_DB_NODB)) {
+			mdclrerror(ep);
+			return (0);
+		}
+		rval = -1;
+	}
+
+	if (!(metaislocalset(sp))) {
+		if ((sd = metaget_setdesc(sp, ep)) == NULL)
+			return (-1);
+
+		/* MN disksets don't use DCS clustering services. */
+		if (!(MD_MNSET_DESC(sd)))
+			sdssc_notify_service(NULL, Shutdown_Services);
+	}
+
+	/* resync units */
+	if (meta_mirror_resync_all(sp, size, ep) != 0)
+		rval = -1;
+	if (meta_raid_resync_all(sp, size, ep) != 0)
+		rval = -1;
+	return (rval);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_runtime.c b/usr/src/lib/lvm/libmeta/common/meta_runtime.c
new file mode 100644
index 0000000000..f9c5915088
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_runtime.c
@@ -0,0 +1,301 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+/*
+ * Just in case we're not in a build environment, make sure that
+ * TEXT_DOMAIN gets set to something.
+ */
+#if !defined(TEXT_DOMAIN)
+#define	TEXT_DOMAIN "SYS_TEST"
+#endif
+
+/*
+ * Return the values of runtime parameters stored in
+ * /etc/lvm/runtime.cf, converting them to data
+ * types appropriate for use by functions whose behavior
+ * is affected by those values.
+ */
+
+/*
+ * system include files
+ */
+
+#include <libintl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <syslog.h>
+
+/*
+ * SUNWmd include files
+ */
+
+#include <meta.h>		/* for MDD_DOMAIN */
+#include <meta_runtime.h>	/* external interface definition */
+#include <sdssc.h>
+
+/*
+ * The following lines define the runtime parameter configuration file.
+ */
+
+static const char *param_file_namep = "/etc/lvm/runtime.cf";
+
+/*
+ * The runtime parameter configuration file is an ascii text file.
+ * Each text line in the file has a maximum length of 80 four-byte
+ * wide characters.  The line buffer size defined below accomodates
+ * the maximum line length plus the newline character at the end of
+ * the line and the null character that fgets() adds at the end of
+ * the line when it writes the line to the buffer.
+ */
+
+static const int line_buffer_size = 325;
+
+/*
+ * The format for parameter entries in the file is "name=value".
+ * Each "name=value" string must begin a line of the file.
+ * The "name" and "value" tokens may be preceded or followed by
+ * spaces.  Lines beginning with "#" are comment lines.
+ */
+
+static const char *token_separator_listp = " =";
+
+/*
+ * If a runtime parameter that can be set in the file is not set,
+ * or is set to an invalid value, or if the file can't be opened,
+ * the parameter takes on the default value given in the comments
+ * below.
+ */
+
+/*
+ * The following string constant declarations name the runtime
+ * configuration parameters that can be set in the runtime parameter
+ * configuration file.  The allowed values of parameters that
+ * range over small sets of discrete values are also declared below
+ * as string constants.
+ *
+ * CAUTION: When adding new runtime parameters to the runtime
+ *          parameter configuration file, declare their names
+ *          as string constants below, and check for conflicts
+ *          with the names of existing parameters.
+ */
+
+static const char *ownerioctls_namep = "ownerioctls";
+
+/*
+ * allowed values:
+ */
+
+static const char *ownerioctls_onp = "on"; /* default value */
+static const char *ownerioctls_offp = "off";
+
+/*
+ * The "ownerioctls" parameter controls whether the metaset -t and
+ * metaset -r commands issue the MHIOCTKOWN, MHIOCRELEASE, and
+ * MHIOCENFAILFAST ioctls when taking or releasing ownership of disksets.
+ * The allowed parameter values are "on" and "off".
+ *
+ * If the line "ownerioctls=off" appears in the runtime configuration file,
+ * the metaset -t command doesn't issue the MHIOCTKOWN ioctl when taking
+ * ownership of disksets, and the metaset -r command doesn't issue the
+ * MHIOCRELEASE and MHIOCENFAILFAST ioctls when releasing ownership of
+ * disksets.
+ *
+ * If the line "ownerioctls=on" appears in the file, the metaset -t
+ * command issues the MHIOCTKOWN ioctl when taking ownership of disksets,
+ * and the metaset -r command issues the MHIOCRELEASE AND MHIOCENFAILFAST
+ * icotls when releasing ownership of disksets.
+ *
+ * The default value of "ownerioctls" is "on".
+ */
+
+/*
+ * The following lines make forward declarations of private functions.
+ */
+
+static
+char *
+meta_get_rt_param(const char *param_namep, boolean_t warn_if_not_found);
+
+/*
+ * The following lines define public functions.
+ */
+
+boolean_t
+do_owner_ioctls(void)
+{
+	const char	*function_namep = "do_owner_ioctls()";
+	char		*param_valuep;
+	boolean_t	return_value = B_TRUE; /* default behavior */
+	sdssc_version_t	version;
+
+	if ((sdssc_version(&version) == SDSSC_OKAY) && (version.major >= 3)) {
+		/*
+		 * If we're bound to a cluster machine never do ioctls.
+		 * The SC3.0 cluster code will always deal with disk
+		 * reservation.
+		 */
+
+		return_value = B_FALSE;
+	} else {
+		param_valuep = meta_get_rt_param(ownerioctls_namep, B_TRUE);
+		if (param_valuep != NULL) {
+			if (strcmp(param_valuep, ownerioctls_offp) == 0) {
+				return_value = B_FALSE;
+			} else if (strcmp(param_valuep,
+			    ownerioctls_onp) != 0) {
+				(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
+				    "%s: illegal value for %s: %s.\n"),
+					function_namep,
+					ownerioctls_namep,
+					param_valuep);
+				syslog(LOG_ERR, dgettext(TEXT_DOMAIN,
+				    "%s: illegal value for %s: %s.\n"),
+				    function_namep,
+				    ownerioctls_namep,
+				    param_valuep);
+			}
+			free(param_valuep);
+		}
+	}
+	return (return_value);
+}
+
+/*
+ * Retrieve the verbosity level for rpc.mdcommd from the config file.
+ * If none is specified, don't print a warning and return 0
+ */
+uint_t
+commd_get_verbosity(void)
+{
+	char		*param_valuep;
+	uint_t retval	= 0;
+	param_valuep = meta_get_rt_param("commd_verbosity", B_FALSE);
+	if (param_valuep != NULL) {
+		retval = (uint_t)strtol(param_valuep, NULL, 16);
+		free(param_valuep);
+	}
+	return (retval);
+}
+
+/*
+ * Retrieve the debug output file for rpc.mdcommd from the config file.
+ * If none is specified, don't print a warning.
+ * Note that if returning non-NULL, the caller is responsible for freeing
+ * the result pointer.
+ */
+char *
+commd_get_outfile(void)
+{
+	return (meta_get_rt_param("commd_out_file", B_FALSE));
+}
+
+/*
+ * The following lines define private functions
+ */
+
+static char *
+meta_get_rt_param(const char *param_namep, boolean_t warn_if_not_found)
+{
+	const char *function_namep = "meta_get_rt_param()";
+	char *line_bufferp = NULL;
+	char *newlinep = NULL;
+	FILE *param_filep = NULL;
+	char *param_name_tokenp = NULL;
+	char *param_valuep = NULL;
+	char *param_value_tokenp = NULL;
+
+	line_bufferp = (char *)malloc(line_buffer_size);
+	if (line_bufferp == NULL) {
+		(void) fprintf(stderr,
+			dgettext(TEXT_DOMAIN, "%s: malloc failed\n"),
+			function_namep);
+		syslog(LOG_ERR,
+			dgettext(TEXT_DOMAIN, "%s: malloc failed\n"),
+			function_namep);
+		return (param_valuep);
+	}
+	param_filep = fopen(param_file_namep, "r");
+	if (param_filep == NULL) {
+		(void) fprintf(stderr,
+			dgettext(TEXT_DOMAIN, "%s: can't open %s\n"),
+			function_namep, param_file_namep);
+		syslog(LOG_ERR,
+			dgettext(TEXT_DOMAIN, "%s: can't open %s\n"),
+			function_namep, param_file_namep);
+		free(line_bufferp);
+		return (param_valuep);
+	}
+	while ((fgets(line_bufferp, line_buffer_size, param_filep) != NULL) &&
+		(param_valuep == NULL)) {
+
+		newlinep = strchr(line_bufferp, '\n');
+		if (newlinep != NULL) {
+			*newlinep = '\0';
+			newlinep = NULL;
+		}
+		param_name_tokenp = strtok(line_bufferp, token_separator_listp);
+		if ((param_name_tokenp != NULL) &&
+			(strcmp(param_namep, param_name_tokenp) == 0)) {
+
+			param_value_tokenp = strtok(NULL,
+						token_separator_listp);
+		}
+		if (param_value_tokenp != NULL) {
+			param_valuep = strdup(param_value_tokenp);
+			if (param_valuep == NULL) {
+				(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
+				    "%s: strdup failed\n"),
+				    function_namep);
+				syslog(LOG_ERR, dgettext(TEXT_DOMAIN,
+				    "%s: strdup failed\n"),
+				    function_namep);
+				free(line_bufferp);
+				(void) fclose(param_filep);
+				return (param_valuep);
+			}
+		}
+	}
+	if ((param_valuep == NULL) && (warn_if_not_found == B_TRUE)) {
+		(void) fprintf(stderr,
+			dgettext(TEXT_DOMAIN,
+			    "%s: value of %s not set or error in %s\n"),
+			function_namep,
+			param_namep,
+			param_file_namep);
+		syslog(LOG_ERR,
+			dgettext(TEXT_DOMAIN,
+			    "%s: value of %s not set or error in %s\n"),
+			function_namep,
+			param_namep,
+			param_file_namep);
+	}
+	free(line_bufferp);
+	(void) fclose(param_filep);
+	return (param_valuep);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_se_notify.c b/usr/src/lib/lvm/libmeta/common/meta_se_notify.c
new file mode 100644
index 0000000000..7ee231aa42
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_se_notify.c
@@ -0,0 +1,399 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2002 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include <stdlib.h>
+#include <meta.h>
+#include <libsysevent.h>
+#include <libnvpair.h>
+#include <sys/sysevent/svm.h>
+#include <sys/sysevent/eventdefs.h>
+#include <dlfcn.h>
+
+char *
+obj2devname(uint32_t tag, set_t setno, md_dev64_t dev)
+{
+	char		*setname;
+	char		name[MD_MAX_CTDLEN];
+	mdsetname_t	*sp;
+	md_error_t	status = mdnullerror;
+	md_set_record	*md_sr;
+	minor_t		mnum = meta_getminor(dev);
+	int		rtn = 0;
+
+	setname = NULL;
+	if ((setno != MD_SET_BAD) &&
+		((sp = metasetnosetname(setno, &status)) != NULL)) {
+		setname = sp->setname;
+	}
+
+	name[0] = '\0';
+	switch (tag) {
+	case SVM_TAG_HS:
+	case SVM_TAG_METADEVICE:
+	case SVM_TAG_MIRROR:
+	case SVM_TAG_RAID5:
+	case SVM_TAG_STRIPE:
+	case SVM_TAG_TRANS:
+		if (setno == 0) {
+			rtn = snprintf(name, sizeof (name), "d%u",
+			    (unsigned)MD_MIN2UNIT(mnum));
+		} else if (setname != NULL) {
+			rtn = snprintf(name, sizeof (name), "%s/d%u", setname,
+			    (unsigned)MD_MIN2UNIT(mnum));
+		}
+		break;
+	case SVM_TAG_HSP:
+		if (setno == 0) {
+			rtn = snprintf(name, sizeof (name), "hsp%u",
+			    (unsigned)MD_MIN2UNIT(mnum));
+		} else if (setname != NULL) {
+			rtn = snprintf(name, sizeof (name), "%s/hsp%u",
+			    setname, (unsigned)MD_MIN2UNIT(mnum));
+		}
+		break;
+	case SVM_TAG_DRIVE:
+		(void) sprintf(name, "drive");
+		break;
+	case SVM_TAG_HOST:
+		md_sr = NULL;
+		if (setname != NULL) {
+			md_sr = getsetbyname(setname, &status);
+		}
+		if ((md_sr != NULL) && (md_sr->sr_nodes[mnum] != NULL)) {
+			/*
+			 * Get the host data from the node array.
+			 */
+			rtn = snprintf(name, sizeof (name), "%s",
+			    md_sr->sr_nodes[mnum]);
+		}
+		if ((name[0] == '\0') || (rtn >= sizeof (name))) {
+			(void) sprintf(name, "host");
+			rtn = 0;
+		}
+		break;
+	case SVM_TAG_SET:
+		if (setname == NULL) {
+			(void) sprintf(name, "diskset");
+		} else {
+			rtn = snprintf(name, sizeof (name), "%s", setname);
+		}
+		break;
+	default:
+		if ((setname = get_devname(setno, dev)) != NULL) {
+			rtn = snprintf(name, sizeof (name), "%s", setname);
+		}
+		break;
+	}
+	mdclrerror(&status);
+
+	/* Check if we got any rubbish for any of the snprintf's */
+	if ((name[0] == '\0') || (rtn >= sizeof (name))) {
+		return (NULL);
+	}
+
+	return (strdup(name));
+}
+
+/* Sysevent subclass and mdnotify event type pairs */
+struct node {
+	char	*se_ev;
+	evid_t	md_ev;
+};
+
+/* Table must be sorted in ascending order */
+static struct node ev_table[] = {
+	{ ESC_SVM_ADD,			EV_ADD },
+	{ ESC_SVM_ATTACH,		EV_ATTACH },
+	{ ESC_SVM_ATTACHING,		EV_ATTACHING },
+	{ ESC_SVM_CHANGE,		EV_CHANGE },
+	{ ESC_SVM_CREATE,		EV_CREATE },
+	{ ESC_SVM_DELETE,		EV_DELETE },
+	{ ESC_SVM_DETACH,		EV_DETACH },
+	{ ESC_SVM_DETACHING,		EV_DETACHING },
+	{ ESC_SVM_DRIVE_ADD,		EV_DRIVE_ADD },
+	{ ESC_SVM_DRIVE_DELETE,		EV_DRIVE_DELETE },
+	{ ESC_SVM_ENABLE,		EV_ENABLE },
+	{ ESC_SVM_ERRED,		EV_ERRED },
+	{ ESC_SVM_EXCHANGE,		EV_EXCHANGE },
+	{ ESC_SVM_GROW,			EV_GROW },
+	{ ESC_SVM_HS_CHANGED,		EV_HS_CHANGED },
+	{ ESC_SVM_HS_FREED,		EV_HS_FREED },
+	{ ESC_SVM_HOST_ADD,		EV_HOST_ADD },
+	{ ESC_SVM_HOST_DELETE,		EV_HOST_DELETE },
+	{ ESC_SVM_HOTSPARED,		EV_HOTSPARED },
+	{ ESC_SVM_INIT_FAILED,		EV_INIT_FAILED },
+	{ ESC_SVM_INIT_FATAL,		EV_INIT_FATAL },
+	{ ESC_SVM_INIT_START,		EV_INIT_START },
+	{ ESC_SVM_INIT_SUCCESS,		EV_INIT_SUCCESS },
+	{ ESC_SVM_IOERR,		EV_IOERR },
+	{ ESC_SVM_LASTERRED,		EV_LASTERRED },
+	{ ESC_SVM_MEDIATOR_ADD,		EV_MEDIATOR_ADD },
+	{ ESC_SVM_MEDIATOR_DELETE,	EV_MEDIATOR_DELETE },
+	{ ESC_SVM_OFFLINE,		EV_OFFLINE },
+	{ ESC_SVM_OK,			EV_OK },
+	{ ESC_SVM_ONLINE,		EV_ONLINE },
+	{ ESC_SVM_OPEN_FAIL,		EV_OPEN_FAIL },
+	{ ESC_SVM_REGEN_DONE,		EV_REGEN_DONE },
+	{ ESC_SVM_REGEN_FAILED,		EV_REGEN_FAILED },
+	{ ESC_SVM_REGEN_START,		EV_REGEN_START },
+	{ ESC_SVM_RELEASE,		EV_RELEASE },
+	{ ESC_SVM_REMOVE,		EV_REMOVE },
+	{ ESC_SVM_RENAME_DST,		EV_RENAME_DST },
+	{ ESC_SVM_RENAME_SRC,		EV_RENAME_SRC },
+	{ ESC_SVM_REPLACE,		EV_REPLACE },
+	{ ESC_SVM_RESYNC_DONE,		EV_RESYNC_DONE },
+	{ ESC_SVM_RESYNC_FAILED,	EV_RESYNC_FAILED },
+	{ ESC_SVM_RESYNC_START,		EV_RESYNC_START },
+	{ ESC_SVM_RESYNC_SUCCESS,	EV_RESYNC_SUCCESS },
+	{ ESC_SVM_TAKEOVER,		EV_TAKEOVER }
+};
+
+static ev_obj_t md_tags[] = {
+	EVO_UNSPECIFIED,
+	EVO_METADEV,
+	EVO_MIRROR,
+	EVO_STRIPE,
+	EVO_RAID5,
+	EVO_TRANS,
+	EVO_REPLICA,
+	EVO_HSP,
+	EVO_HS,
+	EVO_SET,
+	EVO_DRIVE,
+	EVO_HOST,
+	EVO_MEDIATOR
+};
+
+static int
+ev_compare(const void *node1, const void *node2)
+{
+	return (strcmp((const char *)node1,
+	    ((const struct node *)node2)->se_ev));
+}
+
+/*
+ * Log mdnotify event
+ */
+void
+do_mdnotify(char *se_subclass, uint32_t tag, set_t setno, md_dev64_t devid)
+{
+	evid_t		ev_type;
+	ev_obj_t	md_tag;
+	struct node	*node_ptr;
+
+	/* Translate sysevent into mdnotify event */
+	node_ptr = bsearch(se_subclass, ev_table, (sizeof (ev_table) /
+	    sizeof (ev_table[0])), sizeof (ev_table[0]), ev_compare);
+
+	if (node_ptr == NULL) {
+		ev_type = EV_EMPTY;
+	} else {
+		ev_type = node_ptr->md_ev;
+	}
+
+	if (tag >= (sizeof (md_tags) / sizeof (md_tags[0]))) {
+		md_tag = EVO_UNSPECIFIED;
+	} else {
+		md_tag = md_tags[tag];
+	}
+
+	NOTIFY_MD(md_tag, setno, devid, ev_type);
+}
+
+/*
+ * External symbols from libsysevent and libnvpair which are not
+ * available in static forms
+ */
+static void	*se_handle = NULL, *nv_handle = NULL;
+static int	(*_sysevent_post_event)(char *, char *, char *, char *,
+		    nvlist_t *, sysevent_id_t *) = NULL;
+static int	(*_nvlist_alloc)(nvlist_t **, uint_t, int) = NULL;
+static void	(*_nvlist_free)(nvlist_t *) = NULL;
+static int	(*_nvlist_add_uint32)(nvlist_t *, char *, uint32_t) = NULL;
+static int	(*_nvlist_add_uint64)(nvlist_t *, char *, uint64_t) = NULL;
+static int	(*_nvlist_add_string)(nvlist_t *, char *, char *) = NULL;
+
+/*
+ * Load nvpair and sysevent symbols
+ */
+static int
+load_sev_lib()
+{
+	/* Try to load the sysevent symbol */
+	if (se_handle == NULL) {
+		se_handle = dlopen("/usr/lib/libsysevent.so.1", RTLD_LAZY);
+	}
+	if (se_handle != NULL) {
+		if ((_sysevent_post_event == NULL) &&
+			(_sysevent_post_event = (int (*)(char *, char *, char *,
+			    char *, nvlist_t *, sysevent_id_t *))
+			    dlsym(se_handle, "sysevent_post_event")) == NULL) {
+			goto out;
+		}
+	} else {
+		return (1);
+	}
+
+	/* Try to load the nvpair symbols */
+	if (nv_handle == NULL) {
+		nv_handle = dlopen("/usr/lib/libnvpair.so.1", RTLD_LAZY);
+	}
+	if (nv_handle != NULL) {
+		if ((_nvlist_alloc == NULL) &&
+			(_nvlist_alloc = (int (*)(nvlist_t **, uint_t, int))
+			    dlsym(nv_handle, "nvlist_alloc")) == NULL) {
+			goto out;
+		}
+		if ((_nvlist_free == NULL) &&
+			(_nvlist_free = (void (*)(nvlist_t *))dlsym(nv_handle,
+			    "nvlist_free")) == NULL) {
+			goto out;
+		}
+		if ((_nvlist_add_uint32 == NULL) &&
+			(_nvlist_add_uint32 = (int (*)(nvlist_t *, char *,
+			    uint32_t))dlsym(nv_handle,
+			    "nvlist_add_uint32")) == NULL) {
+			goto out;
+		}
+		if ((_nvlist_add_uint64 == NULL) &&
+			(_nvlist_add_uint64 = (int (*)(nvlist_t *, char *,
+			    uint64_t))dlsym(nv_handle,
+			    "nvlist_add_uint64")) == NULL) {
+			goto out;
+		}
+		if ((_nvlist_add_string == NULL) &&
+			(_nvlist_add_string = (int (*)(nvlist_t *, char *,
+			    char *))dlsym(nv_handle,
+			    "nvlist_add_string")) == NULL) {
+			goto out;
+		}
+
+		return (0);
+	}
+
+out:
+	if ((se_handle != NULL) && (dlclose(se_handle) == 0)) {
+		se_handle = NULL;
+	}
+
+	if ((nv_handle != NULL) && (dlclose(nv_handle) == 0)) {
+		nv_handle = NULL;
+	}
+
+	_sysevent_post_event = NULL;
+	_nvlist_alloc = NULL;
+	_nvlist_free = NULL;
+	_nvlist_add_uint32 = NULL;
+	_nvlist_add_uint64 = NULL;
+	_nvlist_add_string = NULL;
+
+	return (1);
+}
+
+/*
+ * Log SVM sys events
+ */
+void
+meta_svm_sysevent(
+	char		*se_class,
+	char		*se_subclass,
+	uint32_t	tag,
+	set_t		setno,
+	md_dev64_t	devid
+)
+{
+	sysevent_id_t	eid;
+	nvlist_t	*attr_list;
+	int		err = 0;
+	char		*devname;
+
+	/* Raise the mdnotify event before anything else */
+	do_mdnotify(se_subclass, tag, setno, devid);
+
+	/* Just get out if the sysevent symbol can't be loaded */
+	if (load_sev_lib()) {
+		return;
+	}
+
+	err = (*_nvlist_alloc)(&attr_list, NV_UNIQUE_NAME, 0);
+
+	if (err == 0) {
+		/* Add the version number */
+		err = (*_nvlist_add_uint32)(attr_list, SVM_VERSION_NO,
+		    (uint32_t)SVM_VERSION);
+		if (err != 0) {
+			goto fail;
+		}
+
+		/* Add the tag attribute */
+		err = (*_nvlist_add_uint32)(attr_list, SVM_TAG, (uint32_t)tag);
+		if (err != 0) {
+			goto fail;
+		}
+
+		/* Add the set number attribute */
+		err = (*_nvlist_add_uint32)(attr_list, SVM_SET_NO,
+		    (uint32_t)setno);
+		if (err != 0) {
+			goto fail;
+		}
+
+		/* Add the device id attribute */
+		err = (*_nvlist_add_uint64)(attr_list, SVM_DEV_ID,
+		    (uint64_t)devid);
+		if (err != 0) {
+			goto fail;
+		}
+
+		/* Add the device name attribute */
+		devname = obj2devname(tag, setno, devid);
+		if (devname != NULL) {
+			err = (*_nvlist_add_string)(attr_list, SVM_DEV_NAME,
+			    devname);
+			free(devname);
+		} else {
+			err = (*_nvlist_add_string)(attr_list, SVM_DEV_NAME,
+			    "unspecified");
+		}
+		if (err != 0) {
+			goto fail;
+		}
+
+		/* Attempt to post event */
+		(void) (*_sysevent_post_event)(se_class, se_subclass,
+		    SUNW_VENDOR, EP_SVM, attr_list, &eid);
+
+		(*_nvlist_free)(attr_list);
+	}
+
+	return;
+
+fail:
+	(*_nvlist_free)(attr_list);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_set.c b/usr/src/lib/lvm/libmeta/common/meta_set.c
new file mode 100644
index 0000000000..7634779ce5
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_set.c
@@ -0,0 +1,5918 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+/*
+ * Just in case we're not in a build environment, make sure that
+ * TEXT_DOMAIN gets set to something.
+ */
+#if !defined(TEXT_DOMAIN)
+#define	TEXT_DOMAIN "SYS_TEST"
+#endif
+
+/*
+ * Metadevice diskset interfaces
+ */
+
+#include "meta_set_prv.h"
+#include <meta.h>
+#include <metad.h>
+#include <mdmn_changelog.h>
+#include <sys/lvm/md_crc.h>
+#include <sys/utsname.h>
+#include <sdssc.h>
+
+#include <sys/sysevent/eventdefs.h>
+#include <sys/sysevent/svm.h>
+extern	char	*blkname(char *);
+
+static md_drive_desc *
+dr2drivedesc(
+	mdsetname_t	*sp,
+	side_t		sideno,
+	int		flags,
+	md_error_t	*ep
+)
+{
+	md_set_record	*sr;
+	md_drive_record	*dr;
+	mddrivename_t	*dnp;
+	md_drive_desc	*dd_head = NULL;
+	md_set_desc	*sd;
+
+	if (flags & MD_BYPASS_DAEMON) {
+		if ((sr = metad_getsetbynum(sp->setno, ep)) == NULL)
+			return (NULL);
+		sd = metaget_setdesc(sp, ep);
+		sideno = getnodeside(mynode(), sd);
+		sp = metafakesetname(sp->setno, sr->sr_setname);
+	} else {
+		if ((sr = getsetbyname(sp->setname, ep)) == NULL)
+			return (NULL);
+	}
+
+	assert(sideno != MD_SIDEWILD);
+
+	/*
+	 * WARNING:
+	 * The act of getting the dnp from the namespace means that we
+	 * will get the devid of the disk as recorded in the namespace.
+	 * This devid has the potential to be stale if the disk is being
+	 * replaced via a rebind, this means that any code that relies
+	 * on any of the dnp information should take the appropriate action
+	 * to preserve that information. For example in the rebind code the
+	 * devid of the new disk is saved off and then copied back in once
+	 * the code that has called this function has completed.
+	 */
+	for (dr = sr->sr_drivechain; dr != NULL; dr = dr->dr_next) {
+		if ((dnp = metadrivename_withdrkey(sp, sideno, dr->dr_key,
+		    flags, ep)) == NULL) {
+			if (!(flags & MD_BYPASS_DAEMON))
+				free_sr(sr);
+			metafreedrivedesc(&dd_head);
+			return (NULL);
+		}
+
+		(void) metadrivedesc_append(&dd_head, dnp, dr->dr_dbcnt,
+		    dr->dr_dbsize, dr->dr_ctime, dr->dr_genid, dr->dr_flags);
+	}
+
+	if (!(flags & MD_BYPASS_DAEMON)) {
+		free_sr(sr);
+	}
+	return (dd_head);
+}
+
+static int
+get_sidenmlist(
+	mdsetname_t	*sp,
+	mddrivename_t	*dnp,
+	md_error_t	*ep
+)
+{
+	md_set_desc	*sd;
+	mdsidenames_t	*sn, **sn_next;
+	int		i;
+
+	if ((sd = metaget_setdesc(sp, ep)) == NULL)
+		return (-1);
+
+	metaflushsidenames(dnp);
+	sn_next = &dnp->side_names;
+	if (MD_MNSET_DESC(sd)) {
+		/*
+		 * Only get sidenames for this node since
+		 * that is the only side information stored in
+		 * the local mddb for a multi-node diskset.
+		 */
+		if (sd->sd_mn_mynode) {
+			sn = Zalloc(sizeof (*sn));
+			sn->sideno = sd->sd_mn_mynode->nd_nodeid;
+			if ((sn->cname = meta_getnmentbykey(MD_LOCAL_SET,
+			    sn->sideno, dnp->side_names_key, &sn->dname,
+			    &sn->mnum, NULL, ep)) == NULL) {
+				if (sn->dname != NULL)
+					Free(sn->dname);
+				Free(sn);
+				return (-1);
+			}
+
+			/* Add to the end of the linked list */
+			assert(*sn_next == NULL);
+			*sn_next = sn;
+			sn_next = &sn->next;
+		}
+	} else {
+		for (i = 0; i < MD_MAXSIDES; i++) {
+			/* Skip empty slots */
+			if (sd->sd_nodes[i][0] == '\0')
+				continue;
+
+			sn = Zalloc(sizeof (*sn));
+			sn->sideno = i;
+			if ((sn->cname = meta_getnmentbykey(MD_LOCAL_SET,
+			    i+SKEW, dnp->side_names_key, &sn->dname,
+			    &sn->mnum, NULL, ep)) == NULL) {
+				/*
+				 * It is possible that during the add of a
+				 * host to have a 'missing' side as the side
+				 * for this disk will be added later. So ignore
+				 * the error. The 'missing' side will be added
+				 * once the addhosts process has completed.
+				 */
+				if (mdissyserror(ep, ENOENT)) {
+					mdclrerror(ep);
+					Free(sn);
+					continue;
+				}
+
+				if (sn->dname != NULL)
+					Free(sn->dname);
+				Free(sn);
+				return (-1);
+			}
+
+			/* Add to the end of the linked list */
+			assert(*sn_next == NULL);
+			*sn_next = sn;
+			sn_next = &sn->next;
+		}
+	}
+
+	return (0);
+}
+
+static md_drive_desc *
+rl_to_dd(
+	mdsetname_t		*sp,
+	md_replicalist_t	*rlp,
+	md_error_t		*ep
+)
+{
+	md_replicalist_t	*rl;
+	md_replica_t		*r;
+	md_drive_desc		*dd = NULL;
+	md_drive_desc		*d;
+	int			found;
+	md_set_desc		*sd;
+	daddr_t			nblks = 0;
+
+	if ((sd = metaget_setdesc(sp, ep)) == NULL)
+		return (NULL);
+
+	/* find the smallest existing replica */
+	for (rl = rlp; rl != NULL; rl = rl->rl_next) {
+		r = rl->rl_repp;
+		nblks = ((nblks == 0) ? r->r_nblk : min(r->r_nblk, nblks));
+	}
+
+	if (nblks <= 0)
+		nblks = (MD_MNSET_DESC(sd)) ? MD_MN_DBSIZE : MD_DBSIZE;
+
+	for (rl = rlp; rl != NULL; rl = rl->rl_next) {
+		r = rl->rl_repp;
+
+		found = 0;
+		for (d = dd; d != NULL; d = d->dd_next) {
+			if (strcmp(r->r_namep->drivenamep->cname,
+			    d->dd_dnp->cname) == 0) {
+				found = 1;
+				dd->dd_dbcnt++;
+				break;
+			}
+		}
+
+		if (! found)
+			(void) metadrivedesc_append(&dd, r->r_namep->drivenamep,
+			    1, nblks, sd->sd_ctime, sd->sd_genid, MD_DR_OK);
+	}
+
+	return (dd);
+}
+
+/*
+ * Exported Entry Points
+ */
+
+set_t
+get_max_sets(md_error_t *ep)
+{
+
+	static set_t		max_sets = 0;
+
+	if (max_sets == 0)
+		if (metaioctl(MD_IOCGETNSET, &max_sets, ep, NULL) != 0)
+			return (0);
+
+	return (max_sets);
+}
+
+int
+get_max_meds(md_error_t *ep)
+{
+	static int		max_meds = 0;
+
+	if (max_meds == 0)
+		if (metaioctl(MD_MED_GET_NMED, &max_meds, ep, NULL) != 0)
+			return (0);
+
+	return (max_meds);
+}
+
+side_t
+getmyside(mdsetname_t *sp, md_error_t *ep)
+{
+	md_set_desc		*sd;
+	char 			*node = NULL;
+	side_t			sideno;
+
+	if (sp->setno == 0)
+		return (0);
+
+	if ((sd = metaget_setdesc(sp, ep)) == NULL)
+		return (MD_SIDEWILD);
+
+	node = mynode();
+
+	assert(node != NULL);
+
+	sideno = getnodeside(node, sd);
+
+	if (sideno != MD_SIDEWILD)
+		return (sideno);
+
+	return (mddserror(ep, MDE_DS_HOSTNOSIDE, sp->setno, node, NULL, node));
+}
+
+/*
+ * get set info from name
+ */
+md_set_record *
+getsetbyname(char *setname, md_error_t *ep)
+{
+	md_set_record		*sr = NULL;
+	md_mnset_record		*mnsr = NULL;
+	char			*p;
+	size_t			len;
+
+	/* get set info from daemon */
+	if (clnt_getset(mynode(), setname, MD_SET_BAD, &sr, ep) == -1)
+		return (NULL);
+	if (sr != NULL) {
+		/*
+		 * Returned record could be for a multi-node set or a
+		 * non-multi-node set.
+		 */
+		if (MD_MNSET_REC(sr)) {
+			/*
+			 * Record is for a multi-node set.  Reissue call
+			 * to get mnset information.  Need to free
+			 * record as if a non-multi-node set record since
+			 * that is what clnt_getset gave us.  If in
+			 * the daemon, don't free since this is a pointer
+			 * into the setrecords array.
+			 */
+			if (! md_in_daemon) {
+				sr->sr_flags &= ~MD_SR_MN;
+				free_sr(sr);
+			}
+			if (clnt_mngetset(mynode(), setname, MD_SET_BAD, &mnsr,
+			    ep) == -1)
+				return (NULL);
+			if (mnsr != NULL)
+				return ((struct md_set_record *)mnsr);
+		} else {
+			return (sr);
+		}
+	}
+
+	/* no such set */
+	len = strlen(setname) + 30;
+	p = Malloc(len);
+	(void) snprintf(p, len, "setname \"%s\"", setname);
+	(void) mderror(ep, MDE_NO_SET, p);
+	Free(p);
+	return (NULL);
+}
+
+/*
+ * get set info from number
+ */
+md_set_record *
+getsetbynum(set_t setno, md_error_t *ep)
+{
+	md_set_record		*sr;
+	md_mnset_record		*mnsr = NULL;
+	char			buf[100];
+
+	if (clnt_getset(mynode(), NULL, setno, &sr, ep) == -1)
+		return (NULL);
+
+	if (sr != NULL) {
+		/*
+		 * Record is for a multi-node set.  Reissue call
+		 * to get mnset information.  Need to free
+		 * record as if a non-multi-node set record since
+		 * that is what clnt_getset gave us.  If in
+		 * the daemon, don't free since this is a pointer
+		 * into the setrecords array.
+		 */
+		if (MD_MNSET_REC(sr)) {
+			/*
+			 * Record is for a multi-node set.  Reissue call
+			 * to get mnset information.
+			 */
+			if (! md_in_daemon) {
+				sr->sr_flags &= ~MD_SR_MN;
+				free_sr(sr);
+			}
+			if (clnt_mngetset(mynode(), NULL, setno, &mnsr,
+			    ep) == -1)
+				return (NULL);
+			if (mnsr != NULL)
+				return ((struct md_set_record *)mnsr);
+		} else {
+			return (sr);
+		}
+	}
+
+	(void) sprintf(buf, "setno %u", setno);
+	(void) mderror(ep, MDE_NO_SET, buf);
+	return (NULL);
+}
+
+int
+meta_check_drive_inuse(
+	mdsetname_t	*sp,
+	mddrivename_t	*dnp,
+	int		check_db,
+	md_error_t	*ep
+)
+{
+	mdnamelist_t	*nlp = NULL;
+	mdnamelist_t	*p;
+	int		rval = 0;
+
+	/* get all underlying partitions */
+	if (meta_getalldevs(sp, &nlp, check_db, ep) != 0)
+		return (-1);
+
+	/* search for drive */
+	for (p = nlp; (p != NULL); p = p->next) {
+		mdname_t	*np = p->namep;
+
+		if (strcmp(dnp->cname, np->drivenamep->cname) == 0) {
+			rval = (mddserror(ep, MDE_DS_DRIVEINUSE, sp->setno,
+			    NULL, dnp->cname, sp->setname));
+			break;
+		}
+	}
+
+	/* cleanup, return success */
+	metafreenamelist(nlp);
+	return (rval);
+}
+
+/*
+ * simple check for ownership
+ */
+int
+meta_check_ownership(mdsetname_t *sp, md_error_t *ep)
+{
+	int			ownset;
+	md_set_desc		*sd;
+	md_drive_desc		*dd;
+	md_replicalist_t	*rlp = NULL;
+	md_error_t		xep = mdnullerror;
+
+	if (metaislocalset(sp))
+		return (0);
+
+	ownset = own_set(sp, NULL, TRUE, ep);
+	if (! mdisok(ep))
+		return (-1);
+
+	if ((sd = metaget_setdesc(sp, ep)) == NULL)
+		return (-1);
+
+	dd = metaget_drivedesc(sp, (MD_BASICNAME_OK | PRINT_FAST), ep);
+	if (! mdisok(ep))
+		return (-1);
+
+	/* If we have no drive descriptors, check for no ownership */
+	if (dd == NULL) {
+		if (ownset == MD_SETOWNER_NONE)
+			return (0);
+
+		/* If ownership somehow has come to exist, we must clean up */
+
+		if (metareplicalist(sp, (MD_BASICNAME_OK | PRINT_FAST), &rlp,
+		    &xep) < 0)
+			mdclrerror(&xep);
+
+		if ((dd = rl_to_dd(sp, rlp, &xep)) == NULL)
+			if (! mdisok(&xep))
+				mdclrerror(&xep);
+
+		if (!(MD_MNSET_DESC(sd)) && !MD_ATSET_DESC(sd)) {
+			if (rel_own_bydd(sp, dd, TRUE, &xep))
+				mdclrerror(&xep);
+		}
+
+		if (halt_set(sp, &xep))
+			mdclrerror(&xep);
+
+		metafreereplicalist(rlp);
+
+		metafreedrivedesc(&dd);
+
+		return (0);
+	}
+
+	metafreedrivedesc(&sd->sd_drvs);
+
+	if (ownset == MD_SETOWNER_YES)
+		return (0);
+
+	return (mddserror(ep, MDE_DS_NOOWNER, sp->setno, NULL, NULL,
+	    sp->setname));
+}
+
+/*
+ * simple check for ownership
+ */
+int
+meta_check_ownership_on_host(mdsetname_t *sp, char *hostname, md_error_t *ep)
+{
+	md_set_desc	*sd;
+	md_drive_desc	*dd;
+	int		bool;
+
+	if (metaislocalset(sp))
+		return (0);
+
+	if ((sd = metaget_setdesc(sp, ep)) == NULL)
+		return (-1);
+
+	if (getnodeside(hostname, sd) == MD_SIDEWILD)
+		return (mddserror(ep, MDE_DS_NODENOTINSET, sp->setno,
+		    hostname, NULL, sp->setname));
+
+	dd = metaget_drivedesc(sp, (MD_BASICNAME_OK | PRINT_FAST), ep);
+	if (! mdisok(ep))
+		return (-1);
+
+	if (clnt_ownset(hostname, sp, &bool, ep) == -1)
+		return (-1);
+
+	if (dd == NULL)
+		return (0);
+
+	metafreedrivedesc(&sd->sd_drvs);
+
+	if (bool == TRUE)
+		return (0);
+
+	return (mddserror(ep, MDE_DS_NODEISNOTOWNER, sp->setno, hostname, NULL,
+	    sp->setname));
+}
+
+/*
+ * Function that determines if a node is in the multinode diskset
+ * membership list.  Calling node passes in node to be checked and
+ * the nodelist as returned from meta_read_nodelist.  This routine
+ * anticipates being called many times using the same diskset membership
+ * list which is why the alloc and free of the diskset membership list
+ * is left to the calling routine.
+ * Returns:
+ *	1 - if a member
+ *	0 - not a member
+ */
+int
+meta_is_member(
+	char				*node_name,
+	md_mn_nodeid_t			node_id,
+	mndiskset_membershiplist_t	*nl
+)
+{
+	mndiskset_membershiplist_t	*nl2;
+	int				flag_check_name;
+
+	if (node_id != 0)
+		flag_check_name = 0;
+	else if (node_name != NULL)
+		flag_check_name = 1;
+	else
+		return (0);
+
+	nl2 = nl;
+	while (nl2) {
+		if (flag_check_name) {
+			/* Compare given name against name in member list */
+			if (strcmp(nl2->msl_node_name, node_name) == 0)
+				break;
+		} else {
+			/* Compare given nodeid against nodeid in member list */
+			if (nl2->msl_node_id == node_id)
+				break;
+		}
+		nl2 = nl2->next;
+	}
+	/* No match found in member list */
+	if (nl2 == NULL) {
+		return (0);
+	}
+	/* Return 1 if node is in member list */
+	return (1);
+}
+
+/*
+ * meta_getnext_devinfo should go to the host that
+ * has the device, to return the device name, driver name, minor num.
+ * We can take the big cheat for now, since it is a requirement
+ * that the device names and device numbers are the same, and
+ * just get the info locally.
+ *
+ * This routine is very similar to meta_getnextside_devinfo except
+ * that the specific side to be used is being passed in.
+ *
+ * Exit status:
+ *	 0 - No more side info to return
+ *	 1 - More side info's to return
+ *	-1 - An error has been detected
+ */
+/*ARGSUSED*/
+int
+meta_getside_devinfo(
+	mdsetname_t	*sp,		/* for this set */
+	char		*bname,		/* local block name (myside) */
+	side_t		sideno,		/* sideno */
+	char		**ret_bname,	/* block device name of returned side */
+	char		**ret_dname,	/* driver name of returned side */
+	minor_t		*ret_mnum,	/* minor number of returned side */
+	md_error_t	*ep
+)
+{
+	mdname_t	*np;
+
+	if (ret_bname != NULL)
+		*ret_bname = NULL;
+	if (ret_dname != NULL)
+		*ret_dname = NULL;
+	if (ret_mnum != NULL)
+		*ret_mnum = NODEV32;
+
+
+	if ((np = metaname(&sp, bname, ep)) == NULL)
+		return (-1);
+
+/*
+ * NOTE (future) - There will be more work here once devids are integrated
+ * into disksets.  Then the side should be used to find the correct
+ * host and the b/d names should be gotten from that host.
+ */
+
+	/*
+	 * Return the side info.
+	 */
+	if (ret_bname != NULL)
+		*ret_bname = Strdup(np->bname);
+
+	if (ret_dname != NULL) {
+		mdcinfo_t	*cinfo;
+
+		if ((cinfo = metagetcinfo(np, ep)) == NULL)
+			return (-1);
+
+		*ret_dname = Strdup(cinfo->dname);
+	}
+
+	if (ret_mnum != NULL)
+		*ret_mnum = meta_getminor(np->dev);
+
+	return (1);
+}
+
+/*
+ * Get the information on the device from the remote node using the devid
+ * of the disk.
+ *
+ * Exit status:
+ *	 0 - No more side info to return
+ *	 1 - More side info's to return
+ *	-1 - An error has been detected
+ */
+int
+meta_getnextside_devinfo(
+	mdsetname_t	*sp,		/* for this set */
+	char		*bname,		/* local block name (myside) */
+	side_t		*sideno,	/* previous sideno & returned sideno */
+	char		**ret_bname,	/* block device name of returned side */
+	char		**ret_dname,	/* driver name of returned side */
+	minor_t		*ret_mnum,	/* minor number of returned side */
+	md_error_t	*ep
+)
+{
+	md_set_desc	*sd;
+	int		i;
+	mdname_t	*np;
+	mddrivename_t	*dnp;
+	char		*devidstr = NULL;
+	int		devidstrlen;
+	md_dev64_t	retdev = NODEV64;
+	char		*ret_devname = NULL;
+	char		*ret_blkdevname = NULL;
+	char		*ret_driver = NULL;
+	char		*nodename;
+	int		fd;
+	int		ret = -1;
+	char		*minor_name = NULL;
+	md_mnnode_desc	*nd;
+
+
+	if (ret_bname != NULL)
+		*ret_bname = NULL;
+	if (ret_dname != NULL)
+		*ret_dname = NULL;
+	if (ret_mnum != NULL)
+		*ret_mnum = NODEV32;
+
+	if (metaislocalset(sp)) {
+		/* no more sides - we are done */
+		if (*sideno != MD_SIDEWILD)
+			return (0);
+
+		/* First time through -  set up return sideno */
+		*sideno = 0;
+	} else {
+
+		/*
+		 * Find the next sideno, starting after the one given.
+		 */
+		if ((sd = metaget_setdesc(sp, ep)) == NULL)
+			return (-1);
+
+		if (MD_MNSET_DESC(sd)) {
+			nd = sd->sd_nodelist;
+			if ((*sideno == MD_SIDEWILD) &&
+			    (nd != (struct md_mnnode_desc *)NULL)) {
+				*sideno = nd->nd_nodeid;
+			} else {
+				while (nd) {
+					/*
+					 * Found given sideno, now find
+					 * next sideno, if there is one.
+					 */
+					if ((*sideno == nd->nd_nodeid) &&
+					    (nd->nd_next !=
+					    (struct md_mnnode_desc *)NULL)) {
+						*sideno =
+						    nd->nd_next->nd_nodeid;
+						break;
+					}
+					nd = nd->nd_next;
+				}
+				if (nd == NULL) {
+					return (0);
+				}
+			}
+			if (*sideno == MD_SIDEWILD)
+				return (0);
+		} else {
+			for (i = (*sideno)+1; i < MD_MAXSIDES; i++)
+				/* Find next full slot */
+				if (sd->sd_nodes[i][0] != '\0')
+					break;
+
+			/* No more sides - we are done */
+			if (i == MD_MAXSIDES)
+				return (0);
+
+			/* Set up the return sideno */
+			*sideno = i;
+			nodename = (char *)sd->sd_nodes[i];
+		}
+	}
+
+	/*
+	 * Need to pass the node the devid of the disk and get it to
+	 * send back the details of the disk from that side.
+	 */
+	if ((np = metaname(&sp, bname, ep)) == NULL)
+		return (-1);
+
+	dnp = np->drivenamep;
+
+	/*
+	 * By default, set up the parameters so that they are copied out.
+	 */
+	if (ret_bname != NULL)
+		*ret_bname = Strdup(np->bname);
+
+	if (ret_dname != NULL) {
+		mdcinfo_t	*cinfo;
+
+		if ((cinfo = metagetcinfo(np, ep)) == NULL)
+			return (-1);
+
+		*ret_dname = Strdup(cinfo->dname);
+	}
+
+	if (ret_mnum != NULL)
+		*ret_mnum = meta_getminor(np->dev);
+
+	/*
+	 * Try some optimization. If this is the local set or the device
+	 * is a metadevice then just copy the information. If the device
+	 * does not have a devid (due to not having a minor name) then
+	 * fall back to the pre-devid behaviour of copying the information
+	 * on the device: this is okay because the sanity checks before this
+	 * call would have found any issues with the device. If it's a
+	 * multi-node diskset also just return ie. copy.
+	 */
+	if (metaislocalset(sp) || metaismeta(np) || (dnp->devid == NULL) ||
+	    (MD_MNSET_DESC(sd)))
+		return (1);
+
+	if (np->minor_name == (char *)NULL) {
+		/*
+		 * Have to get the minor name then. The slice should exist
+		 * on the disk because it will have already been repartitioned
+		 * up prior to getting to this point.
+		 */
+		if ((fd = open(np->bname, (O_RDONLY|O_NDELAY), 0)) < 0) {
+			(void) mdsyserror(ep, errno, np->bname);
+			return (-1);
+		}
+		(void) devid_get_minor_name(fd, &minor_name);
+		np->minor_name = Strdup(minor_name);
+		devid_str_free(minor_name);
+		(void) close(fd);
+	}
+
+	/* allocate extra space for "/" and NULL hence +2 */
+	devidstrlen = strlen(dnp->devid) + strlen(np->minor_name) + 2;
+	devidstr = (char *)Malloc(devidstrlen);
+
+	/*
+	 * As a minor name is supplied then the ret_devname will be
+	 * appropriate to that minor_name and in this case it will be
+	 * a block device ie /dev/dsk.
+	 */
+	(void) snprintf(devidstr, devidstrlen,
+		"%s/%s", dnp->devid, np->minor_name);
+
+	ret = clnt_devinfo_by_devid(nodename, sp, devidstr, &retdev,
+	    np->bname, &ret_devname, &ret_driver, ep);
+
+	Free(devidstr);
+
+	/*
+	 * If the other side is not running device id in disksets,
+	 * 'ret' is set to ENOTSUP in which case we fallback to
+	 * the existing behaviour
+	 */
+	if (ret == ENOTSUP)
+		return (1);
+	else if (ret == -1)
+		return (-1);
+
+	/*
+	 * ret_devname comes from the rpc call and is a
+	 * raw device name. We need to make this into a
+	 * block device via blkname for further processing.
+	 * Unfortunately, when our device id isn't found in
+	 * the system, the rpc call will return a " " in
+	 * ret_devname in which case we need to fill that in
+	 * as ret_blkname because blkname of " " returns NULL.
+	 */
+	if (ret_bname != NULL && ret_devname != NULL) {
+		ret_blkdevname = blkname(ret_devname);
+		if (ret_blkdevname == NULL)
+			*ret_bname = Strdup(ret_devname);
+		else
+			*ret_bname = Strdup(ret_blkdevname);
+	}
+
+	if (ret_dname != NULL && ret_driver != NULL)
+		*ret_dname = Strdup(ret_driver);
+
+	if (ret_mnum != NULL)
+		*ret_mnum = meta_getminor(retdev);
+
+	return (1);
+}
+
+int
+meta_is_drive_in_anyset(
+	mddrivename_t	*dnp,
+	mdsetname_t	**spp,
+	int		bypass_daemon,
+	md_error_t 	*ep
+)
+{
+	set_t		setno;
+	mdsetname_t	*this_sp;
+	int		is_it;
+	set_t		max_sets;
+
+	if ((max_sets = get_max_sets(ep)) == 0)
+		return (-1);
+
+	assert(spp != NULL);
+	*spp = NULL;
+
+	for (setno = 1; setno < max_sets; setno++) {
+		if (!bypass_daemon) {
+			if ((this_sp = metasetnosetname(setno, ep)) == NULL) {
+				if (mdismddberror(ep, MDE_DB_NODB)) {
+					mdclrerror(ep);
+					return (0);
+				}
+				if (mdiserror(ep, MDE_NO_SET)) {
+					mdclrerror(ep);
+					continue;
+				}
+				return (-1);
+			}
+		} else
+			this_sp = metafakesetname(setno, NULL);
+
+		if ((is_it = meta_is_drive_in_thisset(this_sp, dnp,
+		    bypass_daemon, ep)) == -1) {
+			if (mdiserror(ep, MDE_NO_SET)) {
+				mdclrerror(ep);
+				continue;
+			}
+			return (-1);
+		}
+		if (is_it) {
+			*spp = this_sp;
+			return (0);
+		}
+	}
+	return (0);
+}
+
+int
+meta_is_drive_in_thisset(
+	mdsetname_t	*sp,
+	mddrivename_t	*dnp,
+	int		bypass_daemon,
+	md_error_t	*ep
+)
+{
+	md_drive_desc	*dd, *p;
+
+	if (bypass_daemon)
+		dd = dr2drivedesc(sp, MD_SIDEWILD,
+		    (MD_BASICNAME_OK | MD_BYPASS_DAEMON), ep);
+	else
+		dd = metaget_drivedesc(sp, MD_BASICNAME_OK, ep);
+
+	if (dd == NULL) {
+		if (! mdisok(ep))
+			return (-1);
+		return (0);
+	}
+
+
+	for (p = dd; p != NULL; p = p->dd_next)
+		if (strcmp(p->dd_dnp->cname, dnp->cname) == 0)
+			return (1);
+	return (0);
+}
+
+int
+meta_set_balance(
+	mdsetname_t		*sp,
+	md_error_t		*ep
+)
+{
+	md_set_desc		*sd;
+	md_drive_desc		*dd, *curdd;
+	daddr_t			dbsize;
+	daddr_t			nblks;
+	int			i;
+	int			rval = 0;
+	sigset_t		oldsigs;
+	md_setkey_t		*cl_sk;
+	md_error_t		xep = mdnullerror;
+	md_mnnode_desc		*nd;
+	int			suspend1_flag = 0;
+
+	if ((sd = metaget_setdesc(sp, ep)) == NULL)
+		return (-1);
+
+	dbsize = (MD_MNSET_DESC(sd)) ? MD_MN_DBSIZE : MD_DBSIZE;
+
+	/* Make sure we own the set */
+	if (meta_check_ownership(sp, ep) != 0)
+		return (-1);
+
+	/* END CHECK CODE */
+
+	/*
+	 * Get drive descriptors for the drives that are currently in the set.
+	 */
+	curdd = metaget_drivedesc(sp, MD_FULLNAME_ONLY, ep);
+
+	if (! mdisok(ep))
+		return (-1);
+
+	/* Find the minimum replica size in use is or use the default */
+	if ((nblks = meta_db_minreplica(sp, ep)) < 0)
+		mdclrerror(ep);
+	else
+		dbsize = nblks;	/* adjust replica size */
+
+	/* Make sure we are blocking all signals */
+	if (procsigs(TRUE, &oldsigs, &xep) < 0)
+		mdclrerror(&xep);
+
+	/*
+	 * Lock the set on current set members.
+	 * For MN diskset lock_set and SUSPEND are used to protect against
+	 * other meta* commands running on the other nodes.
+	 */
+	if (MD_MNSET_DESC(sd)) {
+		nd = sd->sd_nodelist;
+		while (nd) {
+			if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+				nd = nd->nd_next;
+				continue;
+			}
+			if (clnt_lock_set(nd->nd_nodename, sp, ep)) {
+				rval = -1;
+				goto out;
+			}
+			nd = nd->nd_next;
+		}
+		/*
+		 * Lock out other meta* commands by suspending
+		 * class 1 messages across the diskset.
+		 */
+		nd = sd->sd_nodelist;
+		while (nd) {
+			if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+				nd = nd->nd_next;
+				continue;
+			}
+			if (clnt_mdcommdctl(nd->nd_nodename,
+			    COMMDCTL_SUSPEND, sp, MD_MSG_CLASS1,
+			    MD_MSCF_NO_FLAGS, ep)) {
+				rval = -1;
+				goto out;
+			}
+			suspend1_flag = 1;
+			nd = nd->nd_next;
+		}
+	} else {
+		for (i = 0; i < MD_MAXSIDES; i++) {
+			/* Skip empty slots */
+			if (sd->sd_nodes[i][0] == '\0') continue;
+
+			if (clnt_lock_set(sd->sd_nodes[i], sp, ep)) {
+				rval = -1;
+				goto out;
+			}
+		}
+	}
+
+	/* We are not adding or deleting any drives, just balancing */
+	dd = NULL;
+
+	/*
+	 * Balance the DB's according to the list of existing drives and the
+	 * list of added drives.
+	 */
+	if ((rval = meta_db_balance(sp, dd, curdd, dbsize, ep)) == -1)
+		goto out;
+
+out:
+	/*
+	 * Unlock diskset by resuming class 1 messages across the diskset.
+	 * Just resume all classes so that resume is the same whether
+	 * just one class was locked or all classes were locked.
+	 */
+	if (suspend1_flag) {
+		nd = sd->sd_nodelist;
+		while (nd) {
+			if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+				nd = nd->nd_next;
+				continue;
+			}
+			if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_RESUME,
+				sp, MD_MSG_CLASS0, MD_MSCF_NO_FLAGS, &xep)) {
+				/*
+				 * We are here because we failed to resume
+				 * rpc.mdcommd.  However we potentially have
+				 * an error from the previous call
+				 * (meta_db_balance). If the previous call
+				 * did fail,  we capture that error and
+				 * generate a perror withthe string,
+				 * "Unable to resume...".
+				 * Setting rval to -1 ensures that in the
+				 * next iteration of the loop, ep is not
+				 * clobbered.
+				 */
+				if (rval == 0)
+					(void) mdstealerror(ep, &xep);
+				else
+					mdclrerror(&xep);
+				rval = -1;
+				mde_perror(ep, dgettext(TEXT_DOMAIN,
+				    "Unable to resume rpc.mdcommd."));
+			}
+			nd = nd->nd_next;
+		}
+	}
+
+	/* Unlock the set */
+	cl_sk = cl_get_setkey(sp->setno, sp->setname);
+	if (MD_MNSET_DESC(sd)) {
+		nd = sd->sd_nodelist;
+		while (nd) {
+			if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+				nd = nd->nd_next;
+				continue;
+			}
+			if (clnt_unlock_set(nd->nd_nodename, cl_sk, &xep)) {
+				if (rval == 0)
+					(void) mdstealerror(ep, &xep);
+				else
+					mdclrerror(&xep);
+				rval = -1;
+			}
+			nd = nd->nd_next;
+		}
+	} else {
+		for (i = 0; i < MD_MAXSIDES; i++) {
+			/* Skip empty slots */
+			if (sd->sd_nodes[i][0] == '\0')
+				continue;
+
+			if (clnt_unlock_set(sd->sd_nodes[i], cl_sk, &xep)) {
+				if (rval == 0)
+					(void) mdstealerror(ep, &xep);
+				rval = -1;
+			}
+		}
+	}
+
+	/* release signals back to what they were on entry */
+	if (procsigs(FALSE, &oldsigs, &xep) < 0)
+		mdclrerror(&xep);
+
+	cl_set_setkey(NULL);
+
+	metaflushsetname(sp);
+
+	return (rval);
+}
+
+int
+meta_set_destroy(
+	mdsetname_t	*sp,
+	int		lock_set,
+	md_error_t	*ep
+)
+{
+	int		i;
+	med_rec_t	medr;
+	md_set_desc	*sd;
+	md_drive_desc	*dd, *p, *p1;
+	mddrivename_t	*dnp;
+	mdname_t	*np;
+	mdnamelist_t	*nlp = NULL;
+	int		num_users = 0;
+	int		has_set;
+	side_t		mysideno;
+	sigset_t	oldsigs;
+	md_error_t	xep = mdnullerror;
+	md_setkey_t	*cl_sk;
+	int		rval = 0;
+	int		delete_end = 1;
+
+	/* Make sure we are blocking all signals */
+	if (procsigs(TRUE, &oldsigs, ep) < 0)
+		return (-1);
+
+	if ((sd = metaget_setdesc(sp, ep)) == NULL) {
+		if (! mdisok(ep))
+			rval = -1;
+		goto out;
+	}
+
+	/*
+	 * meta_set_destroy should not be called for a MN diskset.
+	 * This routine destroys a set without communicating this information
+	 * to the other nodes which would lead to an inconsistency in
+	 * the MN diskset.
+	 */
+	if (MD_MNSET_DESC(sd)) {
+		rval = -1;
+		goto out;
+	}
+
+	/* Continue if a traditional diskset */
+
+	/*
+	 * Check to see who has the set.  If we are not the last user of the
+	 * set, we will not touch the replicas.
+	 */
+	for (i = 0; i < MD_MAXSIDES; i++) {
+		/* Skip empty slots */
+		if (sd->sd_nodes[i][0] == '\0')
+			continue;
+
+		has_set = nodehasset(sp, sd->sd_nodes[i], NHS_NST_EQ,
+		    ep);
+
+		if (has_set < 0) {
+			mdclrerror(ep);
+		} else
+			num_users++;
+	}
+
+	if ((dd = metaget_drivedesc(sp, MD_BASICNAME_OK, ep)) == NULL) {
+		if (! mdisok(ep)) {
+			rval = -1;
+			goto out;
+		}
+	}
+
+	if (setup_db_bydd(sp, dd, TRUE, ep) == -1) {
+		rval = -1;
+		goto out;
+	}
+
+	if (lock_set == TRUE) {
+		/* Lock the set on our side */
+		if (clnt_lock_set(mynode(), sp, ep)) {
+			rval = -1;
+			goto out;
+		}
+	}
+
+	/*
+	 * A traditional diskset has no diskset stale information to send
+	 * since there can only be one owner node at a time.
+	 */
+	if (snarf_set(sp, FALSE, ep))
+		mdclrerror(ep);
+
+	if (dd != NULL) {
+		/*
+		 * Make sure that no drives are in use as parts of metadrives
+		 * or hot spare pools, this is one of the few error conditions
+		 * that will stop this routine, unless the environment has
+		 * META_DESTROY_SET_OK set, in which case, the operation will
+		 * proceed.
+		 */
+		if (getenv("META_DESTROY_SET_OK") == NULL) {
+			for (p = dd; p != NULL; p = p->dd_next) {
+				dnp = p->dd_dnp;
+
+				i = meta_check_drive_inuse(sp, dnp, FALSE, ep);
+				if (i == -1) {
+					/* need xep - wire calls clear error */
+					i = metaget_setownership(sp, &xep);
+					if (i == -1) {
+						rval = -1;
+						goto out;
+					}
+
+					mysideno = getmyside(sp, &xep);
+
+					if (mysideno == MD_SIDEWILD) {
+						rval = -1;
+						goto out;
+					}
+
+					if (sd->sd_isown[mysideno] == FALSE)
+						if (halt_set(sp, &xep)) {
+							rval = -1;
+							goto out;
+						}
+
+					rval = -1;
+					goto out;
+				}
+			}
+		}
+
+		for (i = 0; i < MD_MAXSIDES; i++) {
+			/* Skip empty slots */
+			if (sd->sd_nodes[i][0] == '\0')
+				continue;
+
+			/* Skip non local nodes */
+			if (strcmp(mynode(), sd->sd_nodes[i]) != 0)
+				continue;
+
+			if (clnt_deldrvs(sd->sd_nodes[i], sp, dd, ep))
+				mdclrerror(ep);
+		}
+
+		/*
+		 * Go thru each drive and individually delete the replicas.
+		 * This way we can ignore individual errors.
+		 */
+		for (p = dd; p != NULL; p = p->dd_next) {
+			uint_t	rep_slice;
+
+			dnp = p->dd_dnp;
+			if ((meta_replicaslice(dnp, &rep_slice, ep) != 0) ||
+			    (((np = metaslicename(dnp, rep_slice, ep))
+				== NULL) &&
+				((np = metaslicename(dnp, MD_SLICE0, ep))
+				    == NULL))) {
+				rval = -1;
+				goto out;
+			}
+
+			if ((np = metaslicename(dnp,
+			    rep_slice, ep)) == NULL) {
+				if ((np = metaslicename(dnp,
+				    MD_SLICE0, ep)) == NULL) {
+					rval = -1;
+					goto out;
+				}
+				mdclrerror(ep);
+			}
+
+			/* Yes this is UGLY!!! */
+			p1 = p->dd_next;
+			p->dd_next = NULL;
+			if (rel_own_bydd(sp, p, FALSE, ep))
+				mdclrerror(ep);
+			p->dd_next = p1;
+
+			if (p->dd_dbcnt == 0)
+				continue;
+
+			/*
+			 * Skip the replica removal if we are not the last user
+			 */
+			if (num_users != 1)
+				continue;
+
+			nlp = NULL;
+			(void) metanamelist_append(&nlp, np);
+			if (meta_db_detach(sp, nlp,
+			    (MDFORCE_DS | MDFORCE_SET_LOCKED), NULL, ep))
+				mdclrerror(ep);
+			metafreenamelist(nlp);
+		}
+	}
+
+	if (halt_set(sp, ep)) {
+		rval = -1;
+		goto out;
+	}
+
+	/* Setup the mediator record */
+	(void) memset(&medr, '\0', sizeof (med_rec_t));
+	medr.med_rec_mag = MED_REC_MAGIC;
+	medr.med_rec_rev = MED_REC_REV;
+	medr.med_rec_fl  = 0;
+	medr.med_rec_sn  = sp->setno;
+	(void) strcpy(medr.med_rec_snm, sp->setname);
+	medr.med_rec_meds = sd->sd_med;	/* structure assigment */
+	(void) memset(&medr.med_rec_data, '\0', sizeof (med_data_t));
+	medr.med_rec_foff = 0;
+
+	/*
+	 * If we are the last remaining user, then remove the mediator hosts
+	 */
+	if (num_users == 1) {
+		for (i = 0; i < MED_MAX_HOSTS; i++) {
+			if (medr.med_rec_meds.n_lst[i].a_cnt != 0)
+				SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_REMOVE,
+				    SVM_TAG_MEDIATOR, sp->setno, i);
+			(void) memset(&medr.med_rec_meds.n_lst[i], '\0',
+			    sizeof (md_h_t));
+		}
+		medr.med_rec_meds.n_cnt = 0;
+	} else { 	/* Remove this host from the mediator node list. */
+		for (i = 0; i < MD_MAXSIDES; i++) {
+			/* Skip empty slots */
+			if (sd->sd_nodes[i][0] == '\0')
+				continue;
+
+			/* Copy non local node */
+			if (strcmp(mynode(), sd->sd_nodes[i]) != 0) {
+				(void) strcpy(medr.med_rec_nodes[i],
+				    sd->sd_nodes[i]);
+				continue;
+			}
+
+			/* Clear local node */
+			(void) memset(&medr.med_rec_nodes[i], '\0',
+			    sizeof (md_node_nm_t));
+		}
+	}
+
+	crcgen(&medr, &medr.med_rec_cks, sizeof (med_rec_t), NULL);
+
+	/*
+	 * If the client is part of a cluster put the DCS service
+	 * into a deleteing state.
+	 */
+	if (sdssc_delete_begin(sp->setname) == SDSSC_ERROR) {
+		if (metad_isautotakebyname(sp->setname)) {
+			delete_end = 0;
+		} else {
+			mdclrerror(ep);
+			goto out;
+		}
+	}
+
+	/* Inform the mediator hosts of the new information */
+	for (i = 0; i < MED_MAX_HOSTS; i++) {
+		if (sd->sd_med.n_lst[i].a_cnt == 0)
+			continue;
+
+		if (clnt_med_upd_rec(&sd->sd_med.n_lst[i], sp, &medr, ep))
+			mdclrerror(ep);
+	}
+
+	/* Delete the set locally */
+	for (i = 0; i < MD_MAXSIDES; i++) {
+		/* Skip empty slots */
+		if (sd->sd_nodes[i][0] == '\0')
+			continue;
+
+		/* Skip non local nodes */
+		if (strcmp(mynode(), sd->sd_nodes[i]) != 0)
+			continue;
+
+		if (clnt_delset(sd->sd_nodes[i], sp, ep) == -1)
+			mdclrerror(ep);
+	}
+	if (delete_end &&
+	    sdssc_delete_end(sp->setname, SDSSC_COMMIT) == SDSSC_ERROR)
+		rval = -1;
+
+out:
+	/* release signals back to what they were on entry */
+	if (procsigs(FALSE, &oldsigs, &xep) < 0) {
+		if (rval == 0)
+			(void) mdstealerror(ep, &xep);
+		rval = -1;
+	}
+
+	if (lock_set == TRUE) {
+		cl_sk = cl_get_setkey(sp->setno, sp->setname);
+		if (clnt_unlock_set(mynode(), cl_sk, &xep)) {
+			if (rval == 0)
+				(void) mdstealerror(ep, &xep);
+			rval = -1;
+		}
+		cl_set_setkey(NULL);
+	}
+
+	metaflushsetname(sp);
+	return (rval);
+}
+
+int
+meta_set_purge(
+	mdsetname_t	*sp,
+	int		bypass_cluster,
+	int		forceflg,
+	md_error_t	*ep
+)
+{
+	char		*thishost = mynode();
+	md_set_desc	*sd;
+	md_setkey_t	*cl_sk;
+	md_error_t	xep = mdnullerror;
+	int		rval = 0;
+	int		i, num_hosts = 0;
+	int		has_set = 0;
+	int		max_node = 0;
+	int		delete_end = 1;
+	md_mnnode_desc	*nd;
+
+	if ((sd = metaget_setdesc(sp, ep)) == NULL) {
+		/* unable to find set description */
+		rval = 1;
+		return (rval);
+	}
+
+	if (MD_MNSET_DESC(sd)) {
+		/*
+		 * Get a count of the hosts in the set and also lock the set
+		 * on those hosts that know about it.
+		 */
+		nd = sd->sd_nodelist;
+		while (nd) {
+			if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+				nd = nd->nd_next;
+				continue;
+			}
+			has_set = nodehasset(sp, nd->nd_nodename,
+				NHS_NST_EQ, ep);
+
+			/*
+			 * The host is not aware of this set (has_set < 0) or
+			 * the set does not match (has_set == 0). This check
+			 * prevents the code getting confused by an apparent
+			 * inconsistancy in the set's state, this is in the
+			 * purge code so something is broken in any case and
+			 * this is just trying to fix the brokeness.
+			 */
+			if (has_set <= 0) {
+				mdclrerror(ep);
+				nd->nd_flags |= MD_MN_NODE_NOSET;
+			} else {
+				num_hosts++;
+				if (clnt_lock_set(nd->nd_nodename, sp, ep)) {
+					/*
+					 * If the force flag is set then
+					 * ignore any RPC failures because we
+					 * are only really interested with
+					 * the set on local node.
+					 */
+					if (forceflg && mdanyrpcerror(ep)) {
+						mdclrerror(ep);
+					} else {
+						/*
+						 * set max_node so that in the
+						 * unlock code nodes in the
+						 * set that have not been
+						 * locked are not unlocked.
+						 */
+						max_node = nd->nd_nodeid;
+						rval = 2;
+						goto out1;
+					}
+				}
+
+			}
+			nd = nd->nd_next;
+		}
+		max_node = 0;
+	} else {
+		/*
+		 * Get a count of the hosts in the set and also lock the set
+		 * on those hosts that know about it.
+		 */
+		for (i = 0; i < MD_MAXSIDES; i++) {
+			/* Skip empty slots */
+			if (sd->sd_nodes[i][0] == '\0')
+				continue;
+
+			has_set = nodehasset(sp, sd->sd_nodes[i],
+				NHS_NST_EQ, ep);
+
+			/*
+			 * The host is not aware of this set (has_set < 0) or
+			 * the set does not match (has_set == 0). This check
+			 * prevents the code getting confused by an apparent
+			 * inconsistancy in the set's state, this is in the
+			 * purge code so something is broken in any case and
+			 * this is just trying to fix the brokeness.
+			 */
+			if (has_set <= 0) {
+				mdclrerror(ep);
+				/*
+				 * set the node to NULL to prevent further
+				 * requests to this unresponsive node.
+				 */
+				sd->sd_nodes[i][0] = '\0';
+			} else {
+				num_hosts++;
+				if (clnt_lock_set(sd->sd_nodes[i], sp, ep)) {
+					/*
+					 * If the force flag is set then
+					 * ignore any RPC failures because we
+					 * are only really interested with
+					 * the set on local node.
+					 */
+					if (forceflg && mdanyrpcerror(ep)) {
+						mdclrerror(ep);
+					} else {
+						rval = 2;
+						/*
+						 * set max_node so that in the
+						 * unlock code nodes in the
+						 * set that have not been
+						 * locked are not unlocked.
+						 */
+						max_node = i;
+						goto out1;
+					}
+				}
+			}
+		}
+		max_node = i;	/* now MD_MAXSIDES */
+	}
+	if (!bypass_cluster) {
+		/*
+		 * If there is only one host associated with the
+		 * set then remove the set from the cluster.
+		 */
+		if (num_hosts == 1) {
+			if (sdssc_delete_begin(sp->setname) == SDSSC_ERROR) {
+				if (metad_isautotakebyname(sp->setname)) {
+					delete_end = 0;
+				} else {
+					mdclrerror(ep);
+					rval = 3;
+					goto out1;
+				}
+			}
+		}
+	}
+
+	if (MD_MNSET_DESC(sd)) {
+		/*
+		 * Get a count of the hosts in the set and also lock the set
+		 * on those hosts that know about it.
+		 */
+		nd = sd->sd_nodelist;
+		while (nd) {
+			if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+				nd = nd->nd_next;
+				continue;
+			}
+			if (nd->nd_nodeid != sd->sd_mn_mynode->nd_nodeid) {
+				/*
+				 * Tell the remote node to remove this node
+				 */
+				if (clnt_delhosts(nd->nd_nodename, sp, 1,
+					&thishost, ep) == -1) {
+					/*
+					 * If we fail to delete ourselves
+					 * from the remote host it does not
+					 * really matter because the set is
+					 * being "purged" from this node. The
+					 * set can be purged from the other
+					 * node at a later time.
+					 */
+					mdclrerror(ep);
+				}
+				nd = nd->nd_next;
+				continue;
+			}
+			/* remove the set from this host */
+			if (clnt_delset(nd->nd_nodename, sp, ep) == -1) {
+				md_perror(dgettext(TEXT_DOMAIN, "delset"));
+				if (!bypass_cluster && num_hosts == 1)
+					(void) sdssc_delete_end(sp->setname,
+					    SDSSC_CLEANUP);
+				mdclrerror(ep);
+				goto out1;
+			}
+			nd = nd->nd_next;
+		}
+	} else {
+		for (i = 0; i < MD_MAXSIDES; i++) {
+			/* Skip empty slots */
+			if (sd->sd_nodes[i][0] == '\0')
+				continue;
+			if (strcmp(thishost, sd->sd_nodes[i]) != 0) {
+				/*
+				 * Tell the remote node to remove this node
+				 */
+				if (clnt_delhosts(sd->sd_nodes[i], sp, 1,
+				    &thishost, ep) == -1) {
+					/*
+					 * If we fail to delete ourselves
+					 * from the remote host it does not
+					 * really matter because the set is
+					 * being "purged" from this node. The
+					 * set can be purged from the other
+					 * node at a later time.
+					 */
+					mdclrerror(ep);
+				}
+				continue;
+			}
+
+			/* remove the set from this host */
+			if (clnt_delset(sd->sd_nodes[i], sp, ep) == -1) {
+				md_perror(dgettext(TEXT_DOMAIN, "delset"));
+				if (!bypass_cluster && num_hosts == 1)
+					(void) sdssc_delete_end(sp->setname,
+					    SDSSC_CLEANUP);
+				mdclrerror(ep);
+				goto out1;
+			}
+		}
+	}
+
+	if (!bypass_cluster && num_hosts == 1) {
+		if (delete_end && sdssc_delete_end(sp->setname, SDSSC_COMMIT) ==
+		    SDSSC_ERROR) {
+			rval = 4;
+		}
+	}
+
+out1:
+
+	cl_sk = cl_get_setkey(sp->setno, sp->setname);
+
+	/*
+	 * Remove the set lock on those nodes that had the set locked
+	 * max_node will either be MD_MAXSIDES or array index of the last
+	 * node contacted (or rather failed to contact) for traditional
+	 * diskset.  For a MN diskset, max_node is the node_id of the node
+	 * that failed the lock.
+	 */
+	if (MD_MNSET_DESC(sd)) {
+		nd = sd->sd_nodelist;
+		while (nd) {
+			if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+				nd = nd->nd_next;
+				continue;
+			}
+			if (nd->nd_nodeid == max_node)
+				break;
+			if (clnt_unlock_set(nd->nd_nodename, cl_sk, &xep)) {
+				if (forceflg && mdanyrpcerror(&xep)) {
+					mdclrerror(&xep);
+					nd = nd->nd_next;
+					continue;
+				}
+				if (rval == 0)
+					(void) mdstealerror(ep, &xep);
+				rval = 5;
+			}
+			nd = nd->nd_next;
+		}
+	} else {
+		for (i = 0; i < max_node; i++) {
+			/* Skip empty slots */
+			if (sd->sd_nodes[i][0] == '\0')
+				continue;
+
+			if (clnt_unlock_set(sd->sd_nodes[i], cl_sk, &xep)) {
+				if (forceflg && mdanyrpcerror(&xep)) {
+					mdclrerror(&xep);
+					continue;
+				}
+				if (rval == 0)
+					(void) mdstealerror(ep, &xep);
+				rval = 5;
+			}
+		}
+	}
+
+	cl_set_setkey(NULL);
+
+	return (rval);
+}
+
+int
+meta_set_query(
+	mdsetname_t		*sp,
+	mddb_dtag_lst_t		**dtlpp,
+	md_error_t		*ep
+)
+{
+	mddb_dtag_get_parm_t	dtgp;
+
+	(void) memset(&dtgp, '\0', sizeof (mddb_dtag_get_parm_t));
+	dtgp.dtgp_setno = sp->setno;
+
+	/*CONSTCOND*/
+	while (1) {
+		if (metaioctl(MD_MED_GET_TAG, &dtgp, &dtgp.dtgp_mde, NULL) != 0)
+			if (! mdismddberror(&dtgp.dtgp_mde, MDE_DB_NOTAG) ||
+			    *dtlpp == NULL)
+				return (mdstealerror(ep, &dtgp.dtgp_mde));
+			else
+				break;
+
+		/*
+		 * Run to the end of the list
+		 */
+		for (/* void */; (*dtlpp != NULL); dtlpp = &(*dtlpp)->dtl_nx)
+			/* void */;
+
+		*dtlpp = Zalloc(sizeof (mddb_dtag_lst_t));
+
+		(void) memmove(&(*dtlpp)->dtl_dt, &dtgp.dtgp_dt,
+		    sizeof (mddb_dtag_t));
+
+		dtgp.dtgp_dt.dt_id++;
+	}
+	return (0);
+}
+
+/*
+ * return drivename get by key
+ */
+mddrivename_t *
+metadrivename_withdrkey(
+	mdsetname_t	*sp,
+	side_t		sideno,
+	mdkey_t		key,
+	int		flags,
+	md_error_t	*ep
+)
+{
+	char		*nm;
+	mdname_t	*np;
+	mddrivename_t	*dnp;
+	ddi_devid_t	devidp;
+	md_set_desc	*sd;
+
+	if ((sd = metaget_setdesc(sp, ep)) == NULL) {
+		return (NULL);
+	}
+
+	/* get namespace info */
+	if (MD_MNSET_DESC(sd)) {
+		if ((nm = meta_getnmbykey(MD_LOCAL_SET, sideno,
+		    key, ep)) == NULL)
+			return (NULL);
+	} else {
+		if ((nm = meta_getnmbykey(MD_LOCAL_SET, sideno+SKEW,
+		    key, ep)) == NULL)
+			return (NULL);
+	}
+
+	/* get device name */
+	if (flags & PRINT_FAST) {
+		if ((np = metaname_fast(&sp, nm, ep)) == NULL) {
+			Free(nm);
+			return (NULL);
+		}
+	} else {
+		if ((np = metaname(&sp, nm, ep)) == NULL) {
+			Free(nm);
+			return (NULL);
+		}
+	}
+	Free(nm);
+
+	/* make sure it's OK */
+	if ((! (flags & MD_BASICNAME_OK)) && (metachkcomp(np, ep) != 0))
+		return (NULL);
+
+	/* get drivename */
+	dnp = np->drivenamep;
+	dnp->side_names_key = key;
+
+	/*
+	 * Skip the following devid check if dnp is did device
+	 * The device id is disabled for did device due to the
+	 * lack of minor name support in the did driver. The following
+	 * devid code path can set and propagate the error and
+	 * eventually prevent did disks from being added to the
+	 * diskset under SunCluster systems
+	 */
+	if (strncmp(dnp->rname, "/dev/did/", strlen("/dev/did/")) == 0) {
+		goto out;
+	}
+
+	/* Also, Skip the check if MN diskset, no devid's */
+	if (MD_MNSET_DESC(sd)) {
+		goto out;
+	}
+
+	/*
+	 * Get the devid associated with the key.
+	 *
+	 * If a devid was returned, it MUST be valid even in
+	 * the case where a device id has been "updated". The
+	 * "update" of the device id may have occured due to
+	 * a firmware upgrade.
+	 */
+	if ((devidp = meta_getdidbykey(MD_LOCAL_SET, sideno+SKEW, key, ep))
+	    != NULL) {
+		dnp->devid = devid_str_encode(devidp, NULL);
+		free(devidp);
+	} else {
+		/*
+		 * It is okay if replica is not in devid mode
+		 */
+		if (mdissyserror(ep, MDDB_F_NODEVID)) {
+			mdclrerror(ep);
+			goto out;
+		}
+
+		/*
+		 * devid is missing so this means that we have
+		 * just upgraded from a configuration where
+		 * devid's were not used so try to add in
+		 * the devid and requery.
+		 */
+		if (meta_setdid(MD_LOCAL_SET, sideno + SKEW, key,
+		    ep) < 0)
+			return (NULL);
+		if ((devidp = (ddi_devid_t)meta_getdidbykey(MD_LOCAL_SET,
+		    sideno+SKEW, key, ep)) == NULL)
+			return (NULL);
+		dnp->devid = devid_str_encode(devidp, NULL);
+		devid_free(devidp);
+	}
+
+out:
+	if (flags & MD_BYPASS_DAEMON)
+		return (dnp);
+
+	if (get_sidenmlist(sp, dnp, ep))
+		return (NULL);
+
+	/* return success */
+	return (dnp);
+}
+
+void
+metafreedrivedesc(md_drive_desc **dd)
+{
+	md_drive_desc	*p, *next = NULL;
+
+	for (p = *dd; p != NULL; p = next) {
+		next = p->dd_next;
+		Free(p);
+	}
+	*dd = NULL;
+}
+
+md_drive_desc *
+metaget_drivedesc(
+	mdsetname_t	*sp,
+	int		flags,
+	md_error_t	*ep
+)
+{
+	side_t		sideno = MD_SIDEWILD;
+
+	assert(! (flags & MD_BYPASS_DAEMON));
+
+	if ((sideno = getmyside(sp, ep)) == MD_SIDEWILD)
+		return (NULL);
+
+	return (metaget_drivedesc_sideno(sp, sideno, flags, ep));
+}
+
+md_drive_desc *
+metaget_drivedesc_fromnamelist(
+	mdsetname_t	*sp,
+	mdnamelist_t	*nlp,
+	md_error_t	*ep
+)
+{
+	md_set_desc		*sd;
+	mdnamelist_t		*p;
+	md_drive_desc		*dd = NULL;
+
+	if ((sd = metaget_setdesc(sp, ep)) == NULL)
+		return (NULL);
+
+	for (p = nlp; p != NULL; p = p->next)
+		(void) metadrivedesc_append(&dd, p->namep->drivenamep, 0, 0,
+		    sd->sd_ctime, sd->sd_genid, MD_DR_ADD);
+
+	return (dd);
+}
+
+md_drive_desc *
+metaget_drivedesc_sideno(
+	mdsetname_t *sp,
+	side_t sideno,
+	int flags,
+	md_error_t *ep
+)
+{
+	md_set_desc	*sd = NULL;
+
+	assert(! (flags & MD_BYPASS_DAEMON));
+
+	if ((sd = metaget_setdesc(sp, ep)) == NULL)
+		return (NULL);
+
+	if (sd->sd_drvs)
+		return (sd->sd_drvs);
+
+	if ((sd->sd_drvs = dr2drivedesc(sp, sideno, flags, ep)) == NULL)
+		return (NULL);
+
+	return (sd->sd_drvs);
+}
+
+int
+metaget_setownership(
+	mdsetname_t	*sp,
+	md_error_t	*ep
+)
+{
+	md_set_desc	*sd;
+	int		bool;
+	int		i;
+	md_mnnode_desc	*nd;
+
+	if ((sd = metaget_setdesc(sp, ep)) == NULL)
+		return (-1);
+
+	if (MD_MNSET_DESC(sd)) {
+		nd = sd->sd_nodelist;
+		while (nd) {
+			/* If node isn't alive, can't own diskset */
+			if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+				nd->nd_flags &= ~MD_MN_NODE_OWN;
+				nd = nd->nd_next;
+				continue;
+			}
+			/*
+			 * If can't communicate with rpc.metad, then mark
+			 * this node as not an owner.  That node may
+			 * in fact, be an owner, but without rpc.metad running
+			 * that node can't do much.
+			 */
+			if (clnt_ownset(nd->nd_nodename, sp, &bool, ep) == -1) {
+				nd->nd_flags &= ~MD_MN_NODE_OWN;
+			} else if (bool == TRUE) {
+				nd->nd_flags |= MD_MN_NODE_OWN;
+			} else {
+				nd->nd_flags &= ~MD_MN_NODE_OWN;
+			}
+			nd = nd->nd_next;
+		}
+		return (0);
+	}
+
+	/* Rest of code handles traditional disksets */
+
+	for (i = 0; i < MD_MAXSIDES; i++)
+		sd->sd_isown[i] = 0;
+
+	if (clnt_ownset(mynode(), sp, &bool, ep) == -1)
+		return (-1);
+
+	if (bool == TRUE)
+		sd->sd_isown[getmyside(sp, ep)] = 1;
+
+	return (0);
+}
+
+char *
+mynode(void)
+{
+	static struct utsname	myuname;
+	static int		done = 0;
+
+	if (! done) {
+		if (uname(&myuname) == -1) {
+			md_perror(dgettext(TEXT_DOMAIN, "uname"));
+			assert(0);
+		}
+		done = 1;
+	}
+	return (myuname.nodename);
+}
+
+int
+strinlst(char *str, int cnt, char **lst)
+{
+	int i;
+
+	for (i = 0; i < cnt; i++)
+		if (strcmp(lst[i], str) == 0)
+			return (TRUE);
+
+	return (FALSE);
+}
+
+/*
+ * meta_get_reserved_names
+ *  returns an mdnamelist_t of reserved slices
+ *  reserved slices are those that are used but don't necessarily
+ *  show up as metadevices (ex. reserved slice for db in sets, logs)
+ */
+
+/*ARGSUSED*/
+int
+meta_get_reserved_names(
+	mdsetname_t	*sp,
+	mdnamelist_t	**nlpp,
+	int		options,
+	md_error_t	*ep)
+{
+	int		 count		= 0;
+	mdname_t	*np		= NULL;
+	mdnamelist_t	*transnlp	= NULL;
+	mdnamelist_t	**tailpp 	= nlpp;
+	mdnamelist_t	*nlp;
+	md_drive_desc	*dd, *di;
+
+	if (metaislocalset(sp))
+		goto out;
+
+	if (!(dd = metaget_drivedesc(sp, MD_BASICNAME_OK, ep)) && !mdisok(ep)) {
+		count = -1;
+		goto out;
+	}
+
+	/* db in for sets on reserved slice */
+	for (di = dd; di && count >= 0; di = di->dd_next) {
+		uint_t	rep_slice;
+
+		/*
+		 * Add the name struct to the end of the
+		 * namelist but keep a pointer to the last
+		 * element so that we don't incur the overhead
+		 * of traversing the list each time
+		 */
+		if (di->dd_dnp &&
+		    (meta_replicaslice(di->dd_dnp, &rep_slice, ep) == 0) &&
+		    (np = metaslicename(di->dd_dnp, rep_slice, ep)) &&
+		    (tailpp = meta_namelist_append_wrapper(tailpp, np)))
+			count++;
+		else
+			count = -1;
+	}
+
+	/* now find logs */
+	if (meta_get_trans_names(sp, &transnlp, 0, ep) < 0) {
+		count = -1;
+		goto out;
+	}
+
+	for (nlp = transnlp; (nlp != NULL); nlp = nlp->next) {
+		mdname_t	*transnp = nlp->namep;
+		md_trans_t	*transp;
+
+		if ((transp = meta_get_trans(sp, transnp, ep)) == NULL) {
+			count = -1;
+			goto out;
+		}
+		if (transp->lognamep) {
+			/*
+			 * Add the name struct to the end of the
+			 * namelist but keep a pointer to the last
+			 * element so that we don't incur the overhead
+			 * of traversing the list each time
+			 */
+			tailpp = meta_namelist_append_wrapper(
+			    tailpp, transp->lognamep);
+		}
+	}
+out:
+	metafreenamelist(transnlp);
+	return (count);
+}
+
+/*
+ * Entry point to join a node to MultiNode diskset.
+ *
+ * Validate host in diskset.
+ *	- Should be in membership list from API
+ *	- Should not already be joined into diskset.
+ *	- Set must have drives
+ * Assume valid configuration is stored in the set/drive/node records
+ * in the local mddb since no node or drive can be added to the MNset
+ * unless all drives and nodes are available.  Reconfig steps will
+ * resync all ALIVE nodes in case of panic in critical areas.
+ *
+ * Lock down the set.
+ * Verify host is a member of this diskset.
+ * If drives exist in the configuration, load the mddbs.
+ * Set this node to active by notifying master if one exists.
+ * If this is the first node active in the diskset, this node
+ * 	becomes the master.
+ * Unlock the set.
+ *
+ * Mirror Resync:
+ * If this node is the last node to join the set and clustering
+ * isn't running, then start the 'metasync -r' type resync
+ * on all mirrors in this diskset.
+ * If clustering is running, this resync operation will
+ * be handled by the reconfig steps and should NOT
+ * be handled during a join operation.
+ *
+ * There are multiple return values in order to assist
+ * the join operation of all sets in the metaset command.
+ *
+ * Return values:
+ *	0  - Node successfully joined to set.
+ *	-1 - Join attempted but failed
+ *		- any failure from libmeta calls
+ *		- node not in the member list
+ *	-2 - Join not attempted since
+ *		- this set had no drives in set
+ *		- this node already joined to set
+ *		- set is not a multinode set
+ *	-3 - Node joined to STALE set.
+ */
+extern int
+meta_set_join(
+	mdsetname_t	*sp,
+	md_error_t	*ep
+)
+{
+	md_set_desc		*sd;
+	md_drive_desc		*dd;
+	md_mnnode_desc		*nd, *nd2, my_nd;
+	int			rval = 0;
+	md_setkey_t		*cl_sk;
+	md_error_t		xep = mdnullerror;
+	md_error_t		ep_snarf = mdnullerror;
+	int			master_flag = 0;
+	md_mnset_record		*mas_mnsr = NULL;
+	int			clear_nr_flags = 0;
+	md_mnnode_record	*nr;
+	int			stale_set = 0;
+	int			rb_flags = 0;
+	int			stale_bool = FALSE;
+	int			suspendall_flag = 0;
+	int			suspend1_flag = 0;
+	sigset_t		oldsigs;
+	int			send_reinit = 0;
+
+	if ((sd = metaget_setdesc(sp, ep)) == NULL) {
+		return (-1);
+	}
+
+	/* Must be a multinode diskset */
+	if (!MD_MNSET_DESC(sd)) {
+		(void) mderror(ep, MDE_NOT_MN, sp->setname);
+		return (-2);
+	}
+
+	/* Verify that the node is ALIVE (i.e. is in the API membership list) */
+	if (!(sd->sd_mn_mynode->nd_flags & MD_MN_NODE_ALIVE)) {
+		(void) mddserror(ep, MDE_DS_NOTINMEMBERLIST, sp->setno,
+			sd->sd_mn_mynode->nd_nodename, NULL,
+			sp->setname);
+		return (-1);
+	}
+
+	/* Make sure we are blocking all signals */
+	if (procsigs(TRUE, &oldsigs, &xep) < 0)
+		mdclrerror(&xep);
+
+	/*
+	 * Lock the set on current set members.
+	 * For MN diskset lock_set and SUSPEND are used to protect against
+	 * other meta* commands running on the other nodes.
+	 */
+	nd = sd->sd_nodelist;
+	while (nd) {
+		if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+			nd = nd->nd_next;
+			continue;
+		}
+		if (clnt_lock_set(nd->nd_nodename, sp, ep)) {
+			rval = -1;
+			goto out;
+		}
+		nd = nd->nd_next;
+	}
+
+	/*
+	 * Lock out other meta* commands by suspending
+	 * class 1 messages across the diskset.
+	 */
+	nd = sd->sd_nodelist;
+	while (nd) {
+		if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+			nd = nd->nd_next;
+			continue;
+		}
+		if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_SUSPEND,
+			    sp, MD_MSG_CLASS1, MD_MSCF_NO_FLAGS, ep)) {
+			rval = -1;
+			goto out;
+		}
+		suspend1_flag = 1;
+		nd = nd->nd_next;
+	}
+
+	/*
+	 * Verify that this host is a member (in the host list) of the set.
+	 */
+	nd = sd->sd_nodelist;
+	while (nd) {
+		if (strcmp(mynode(), nd->nd_nodename) == 0) {
+			break;
+		}
+		nd = nd->nd_next;
+	}
+	if (!nd) {
+		(void) mddserror(ep, MDE_DS_NODENOTINSET, sp->setno,
+			sd->sd_mn_mynode->nd_nodename, NULL,
+			sp->setname);
+		rval = -1;
+		goto out;
+	}
+
+	/*
+	 * Need to return failure if host is already 'joined'
+	 * into the set.  This is done so that if later the user
+	 * issues a command to join all sets and a failure is
+	 * encountered - that the resulting cleanup effort
+	 * (withdrawing from all sets that were joined
+	 * during that command) won't withdraw from this set.
+	 */
+	if (nd->nd_flags & MD_MN_NODE_OWN) {
+		rval = -2;
+		goto out2;
+	}
+
+	/*
+	 * Call metaget_setownership that calls each node in diskset and
+	 * marks in set descriptor if node is an owner of the set or not.
+	 * metaget_setownership checks to see if a node is an owner by
+	 * checking to see if that node's kernel has the mddb loaded.
+	 * If a node had panic'd during a reconfig or an
+	 * add/delete/join/withdraw operation, the other nodes' node
+	 * records may not reflect the current state of the diskset,
+	 * so calling metaget_setownership is the safest thing to do.
+	 */
+	if (metaget_setownership(sp, ep) == -1) {
+		rval = -1;
+		goto out;
+	}
+
+	/* If first active member of diskset, become the master. */
+	nd = sd->sd_nodelist;
+	while (nd) {
+		if (nd->nd_flags & MD_MN_NODE_OWN)
+			break;
+		nd = nd->nd_next;
+	}
+	if (nd == NULL)
+		master_flag = 1;
+
+	/*
+	 * If not first active member of diskset, then get the
+	 * master information from a node that is already joined
+	 * and set the master information for this node.  Be sure
+	 * that this node (the already joined node) has its own
+	 * join flag set.  If not, then this diskset isn't currently
+	 * consistent and shouldn't allow a node to join.  This diskset
+	 * inconsistency should only occur when a node has panic'd in
+	 * the set while doing a metaset operation and the sysadmin is
+	 * attempting to join a node into the set.  This inconsistency
+	 * will be fixed during a reconfig cycle which should be occurring
+	 * soon since a node panic'd.
+	 *
+	 * If unable to get this information from an owning node, then
+	 * this diskset isn't currently consistent and shouldn't
+	 * allow a node to join.
+	 */
+	if (!master_flag) {
+		/* get master information from an owner (joined) node */
+		if (clnt_mngetset(nd->nd_nodename, sp->setname,
+		    sp->setno, &mas_mnsr, ep) == -1) {
+			rval = -1;
+			goto out;
+		}
+
+		/* Verify that owner (joined) node has its own JOIN flag set */
+		nr = mas_mnsr->sr_nodechain;
+		while (nr) {
+			if ((nd->nd_nodeid == nr->nr_nodeid) &&
+			    ((nr->nr_flags & MD_MN_NODE_OWN) == NULL)) {
+				(void) mddserror(ep, MDE_DS_NODENOSET,
+				    sp->setno, nd->nd_nodename, NULL,
+				    nd->nd_nodename);
+				free_sr((md_set_record *)mas_mnsr);
+				rval = -1;
+				goto out;
+			}
+			nr = nr->nr_next;
+		}
+
+		/*
+		 * Does master have set marked as STALE?
+		 * If so, need to pass this down to kernel when
+		 * this node snarfs the set.
+		 */
+		if (clnt_mn_is_stale(nd->nd_nodename, sp,
+		    &stale_bool, ep) == -1) {
+			rval = -1;
+			goto out;
+		}
+
+		/* set master information in my rpc.metad's set record */
+		if (clnt_mnsetmaster(mynode(), sp, mas_mnsr->sr_master_nodenm,
+		    mas_mnsr->sr_master_nodeid, ep)) {
+			free_sr((md_set_record *)mas_mnsr);
+			rval = -1;
+			goto out;
+		}
+
+		/* set master information in my cached set desc */
+		(void) strcpy(sd->sd_mn_master_nodenm,
+		    mas_mnsr->sr_master_nodenm);
+		sd->sd_mn_master_nodeid = mas_mnsr->sr_master_nodeid;
+		nd2 = sd->sd_nodelist;
+		while (nd2) {
+		    if (nd2->nd_nodeid == mas_mnsr->sr_master_nodeid) {
+			sd->sd_mn_masternode = nd2;
+			break;
+		    }
+		    nd2 = nd2->nd_next;
+		}
+		free_sr((md_set_record *)mas_mnsr);
+
+		/*
+		 * Set the node flags in mynode's rpc.metad node records for
+		 * the nodes that are in the diskset.  Can use my sd
+		 * since earlier call to metaget_setownership set the
+		 * owner flags based on whether that node had snarfed
+		 * the MN diskset mddb.  Reconfig steps guarantee that
+		 * return of metaget_setownership will match the owning
+		 * node's owner list except in the case where a node
+		 * has just panic'd and in this case, a reconfig will
+		 * be starting immediately and the owner lists will
+		 * be sync'd up by the reconfig.
+		 *
+		 * Flag of SET means to take no action except to
+		 * set the node flags as given in the nodelist linked list.
+		 */
+		if (clnt_upd_nr_flags(mynode(), sp, sd->sd_nodelist,
+		    MD_NR_SET, NULL, ep)) {
+			rval = -1;
+			goto out;
+		}
+	}
+
+	/*
+	 * Read in the mddb if there are drives in the set.
+	 */
+	if ((dd = metaget_drivedesc(sp, (MD_BASICNAME_OK | PRINT_FAST),
+	    ep)) == NULL) {
+		/* No drives in list */
+		if (! mdisok(ep)) {
+			rval = -1;
+			goto out;
+		}
+		rval = -2;
+		goto out;
+	}
+
+	/*
+	 * Notify rpc.mdcommd on all nodes of a nodelist change.
+	 * Start by suspending rpc.mdcommd (which drains it of all messages),
+	 * then change the nodelist followed by a reinit and resume.
+	 */
+	nd = sd->sd_nodelist;
+	while (nd) {
+		if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+			nd = nd->nd_next;
+			continue;
+		}
+
+		if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_SUSPEND, sp,
+		    MD_MSG_CLASS0, MD_MSCF_NO_FLAGS, ep)) {
+			rval = -1;
+			goto out;
+		}
+		suspendall_flag = 1;
+		nd = nd->nd_next;
+	}
+
+	/* Set master in my set record in rpc.metad */
+	if (master_flag) {
+		if (clnt_mnsetmaster(mynode(), sp,
+		    sd->sd_mn_mynode->nd_nodename,
+		    sd->sd_mn_mynode->nd_nodeid, ep)) {
+			rval = -1;
+			goto out;
+		}
+	}
+	/* Causes mddbs to be loaded in kernel */
+	if (setup_db_bydd(sp, dd, 0, ep) == -1) {
+		mde_perror(ep, dgettext(TEXT_DOMAIN,
+		    "Host not able to start diskset."));
+		rval = -1;
+		goto out;
+	}
+
+	if (! mdisok(ep)) {
+		rval = -1;
+		goto out;
+	}
+
+	/*
+	 * Set rollback flags to 1 so that halt_set is called if a failure
+	 * is seen after this point.  If snarf_set fails, still need to
+	 * call halt_set to cleanup the diskset.
+	 */
+	rb_flags = 1;
+
+	/* Starts the set */
+	if (snarf_set(sp, stale_bool, ep) != 0) {
+		if (mdismddberror(ep, MDE_DB_STALE)) {
+			/*
+			 * Don't fail join, STALE means that set has
+			 * < 50% mddbs.
+			 */
+			(void) mdstealerror(&ep_snarf, ep);
+			stale_set = 1;
+		} else if (mdisok(ep)) {
+			/* If snarf failed, but no error was set - set it */
+			(void) mdmddberror(ep, MDE_DB_NOTNOW, NODEV64,
+			    sp->setno, 0, NULL);
+				rval = -1;
+				goto out;
+		} else if (!(mdismddberror(ep, MDE_DB_ACCOK))) {
+			/*
+			 * Don't fail join if ACCOK; ACCOK means that mediator
+			 * provided extra vote.
+			 */
+			rval = -1;
+			goto out;
+		}
+	}
+
+	/* Did set really get snarfed? */
+	if (own_set(sp, NULL, TRUE, ep) == MD_SETOWNER_NO) {
+		if (mdisok(ep)) {
+			/* If snarf failed, but no error was set - set it */
+			(void) mdmddberror(ep, MDE_DB_NOTNOW, NODEV64,
+				sp->setno, 0, NULL);
+		}
+		mde_perror(ep, dgettext(TEXT_DOMAIN,
+		    "Host not able to start diskset."));
+		rval = -1;
+		goto out;
+	}
+
+	/* Change to nodelist so need to send reinit to rpc.mdcommd */
+	send_reinit = 1;
+
+	/* If first node to enter set, setup master and clear change log */
+	if (master_flag) {
+		/* Set master in my locally cached set descriptor */
+		(void) strcpy(sd->sd_mn_master_nodenm,
+		    sd->sd_mn_mynode->nd_nodename);
+		sd->sd_mn_master_nodeid = sd->sd_mn_mynode->nd_nodeid;
+		sd->sd_mn_am_i_master = 1;
+
+		/*
+		 * If first node to join set, then clear out change log
+		 * entries.  Change log entries are only needed when a
+		 * change of master is occurring in a diskset that has
+		 * multiple owners.   Since this node is the first owner
+		 * of the diskset, clear the entries.
+		 *
+		 * Only do this if we are in a single node non-SC3.x
+		 * situation.
+		 */
+		if (meta_mn_singlenode() &&
+			mdmn_reset_changelog(sp, ep,  MDMN_CLF_RESETLOG) != 0) {
+			mde_perror(ep, dgettext(TEXT_DOMAIN,
+			    "Unable to reset changelog."));
+			rval = -1;
+			goto out;
+		}
+	}
+
+	/* Set my locally cached flag */
+	sd->sd_mn_mynode->nd_flags |= MD_MN_NODE_OWN;
+
+	/*
+	 * Set this node's own flag on all joined nodes in the set
+	 * (including my node).
+	 */
+	clear_nr_flags = 1;
+
+	my_nd = *(sd->sd_mn_mynode);
+	my_nd.nd_next = NULL;
+	nd = sd->sd_nodelist;
+	while (nd) {
+		if (!(nd->nd_flags & MD_MN_NODE_OWN)) {
+			nd = nd->nd_next;
+			continue;
+		}
+		if (clnt_upd_nr_flags(nd->nd_nodename, sp, &my_nd,
+		    MD_NR_JOIN, NULL, ep)) {
+			rval = -1;
+			goto out;
+		}
+		nd = nd->nd_next;
+	}
+
+out:
+	if (rval != NULL) {
+		/*
+		 * If rollback flag is 1, then node was joined to set.
+		 * Since an error occurred, withdraw node from set in
+		 * order to rollback to before command was run.
+		 * Need to preserve ep so that calling function can
+		 * get error information.
+		 */
+		if (rb_flags == 1) {
+			if (halt_set(sp, &xep)) {
+				mdclrerror(&xep);
+			}
+		}
+
+		/*
+		 * If error, reset master to INVALID.
+		 * Ignore error since (next) first node to successfully join
+		 * will set master on all nodes.
+		 */
+		(void) clnt_mnsetmaster(mynode(), sp, "",
+			MD_MN_INVALID_NID, &xep);
+		mdclrerror(&xep);
+		/* Reset master in my locally cached set descriptor */
+		sd->sd_mn_master_nodeid = MD_MN_INVALID_NID;
+		sd->sd_mn_am_i_master = 0;
+
+		/*
+		 * If nr flags set on other nodes, reset them.
+		 */
+		if (clear_nr_flags) {
+			nd = sd->sd_nodelist;
+			while (nd) {
+				if (!(nd->nd_flags & MD_MN_NODE_OWN)) {
+					nd = nd->nd_next;
+					continue;
+				}
+				(void) clnt_upd_nr_flags(nd->nd_nodename, sp,
+					&my_nd, MD_NR_WITHDRAW, NULL, &xep);
+				mdclrerror(&xep);
+				nd = nd->nd_next;
+			}
+			/* Reset my locally cached flag */
+			sd->sd_mn_mynode->nd_flags &= ~MD_MN_NODE_OWN;
+		}
+	}
+
+	/*
+	 * Notify rpc.mdcommd on all nodes of a nodelist change.
+	 * Send reinit command to mdcommd which forces it to get
+	 * fresh set description.
+	 */
+	if (send_reinit) {
+		/* Send reinit */
+		nd = sd->sd_nodelist;
+		while (nd) {
+			if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+				nd = nd->nd_next;
+				continue;
+			}
+
+			/* Class is ignored for REINIT */
+			if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_REINIT,
+				sp, NULL, MD_MSCF_NO_FLAGS, &xep)) {
+				/*
+				 * We are here because we failed to resume
+				 * rpc.mdcommd.  However we potentially have
+				 * an error from the previous call
+				 * If the previous call did fail,  we capture
+				 * that error and generate a perror with
+				 * the string, "Unable to resume...".
+				 * Setting rval to -1 ensures that in the
+				 * next iteration of the loop, ep is not
+				 * clobbered.
+				 */
+				if (rval == 0)
+					(void) mdstealerror(ep, &xep);
+				else
+					mdclrerror(&xep);
+				rval = -1;
+				mde_perror(ep, dgettext(TEXT_DOMAIN,
+				    "Unable to reinit rpc.mdcommd."));
+			}
+			nd = nd->nd_next;
+		}
+
+	}
+
+out2:
+	/*
+	 * Unlock diskset by resuming messages across the diskset.
+	 * Just resume all classes so that resume is the same whether
+	 * just one class was locked or all classes were locked.
+	 */
+	if ((suspend1_flag) || (suspendall_flag)) {
+		nd = sd->sd_nodelist;
+		while (nd) {
+			if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+				nd = nd->nd_next;
+				continue;
+			}
+			if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_RESUME,
+				sp, MD_MSG_CLASS0, MD_MSCF_NO_FLAGS, &xep)) {
+				/*
+				 * We are here because we failed to resume
+				 * rpc.mdcommd.  However we potentially have
+				 * an error from the previous call
+				 * If the previous call did fail,  we capture
+				 * that error and generate a perror with
+				 * the string, "Unable to resume...".
+				 * Setting rval to -1 ensures that in the
+				 * next iteration of the loop, ep is not
+				 * clobbered.
+				 */
+				if (rval == 0)
+					(void) mdstealerror(ep, &xep);
+				else
+					mdclrerror(&xep);
+				rval = -1;
+				mde_perror(ep, dgettext(TEXT_DOMAIN,
+				    "Unable to resume rpc.mdcommd."));
+			}
+			nd = nd->nd_next;
+		}
+		meta_ping_mnset(sp->setno);
+	}
+
+	/*
+	 * Unlock set.  This flushes the caches on the servers.
+	 */
+	cl_sk = cl_get_setkey(sp->setno, sp->setname);
+	nd = sd->sd_nodelist;
+	while (nd) {
+		if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+			nd = nd->nd_next;
+			continue;
+		}
+		if (clnt_unlock_set(nd->nd_nodename, cl_sk, &xep)) {
+			if (rval == 0)
+				(void) mdstealerror(ep, &xep);
+			else
+				mdclrerror(&xep);
+			rval = -1;
+		}
+		nd = nd->nd_next;
+	}
+
+	/*
+	 * If this node is the last to join the diskset and clustering isn't
+	 * running, then resync the mirrors in the diskset. We have to wait
+	 * until all nodes are joined so that the status gets propagated to
+	 * all of the members of the set.
+	 * Ignore any error from the resync as the join function shouldn't fail
+	 * because the mirror resync had a problem.
+	 *
+	 * Don't start resync if set is stale.
+	 */
+	if ((rval == 0) && (sdssc_bind_library() != SDSSC_OKAY) &&
+	    (stale_set != 1)) {
+		nd = sd->sd_nodelist;
+		while (nd) {
+			if (!(nd->nd_flags & MD_MN_NODE_OWN))
+				break;
+			nd = nd->nd_next;
+		}
+		/*
+		 * nd set to NULL means that we have no nodes in the set that
+		 * haven't joined. In this case we start the resync.
+		 */
+		if (nd == NULL) {
+			(void) meta_mirror_resync_all(sp, 0, &xep);
+			mdclrerror(&xep);
+		}
+	}
+
+	/* Update ABR state for all soft partitions */
+	(void) meta_sp_update_abr(sp, &xep);
+	mdclrerror(&xep);
+
+	/*
+	 * call metaflushsetnames to reset local cache for master and
+	 * node information.
+	 */
+	metaflushsetname(sp);
+
+	/* release signals back to what they were on entry */
+	if (procsigs(FALSE, &oldsigs, &xep) < 0)
+		mdclrerror(&xep);
+
+	/*
+	 * If no error and stale_set is set, then set ep back
+	 * to ep from snarf_set call and return -3.  If another error
+	 * occurred and rval is not 0, then that error would have
+	 * caused the node to be withdrawn from the set and would
+	 * have set ep to that error information.
+	 */
+	if ((rval == 0) && (stale_set)) {
+		(void) mdstealerror(ep, &ep_snarf);
+		return (-3);
+	}
+
+	return (rval);
+}
+
+/*
+ * Entry point to withdraw a node from MultiNode diskset.
+ *
+ * Validate host in diskset.
+ *	- Should be joined into diskset.
+ * Assume valid configuration is stored in the set/drive/node records
+ * in the local mddb since no node or drive can be added to the MNset
+ * unless all drives and nodes are available.  Reconfig steps will
+ * resync all ALIVE nodes in case of panic in critical areas.
+ *
+ * Lock down the set.
+ * Verify that drives exist in configuration.
+ * Verify host is a member of this diskset.
+ * Verify host is an owner of the diskset (host is joined to diskset).
+ * Only allow withdrawal of master node if master node is the only joined
+ * in the diskset.
+ * Halt the diskset on this node.
+ * Reset Master on this node.
+ * Updated node flags that this node with withdrawn.
+ * Unlock the set.
+ *
+ * Return values:
+ *	0  - Node successfully withdrew from set.
+ *	-1 - Withdrawal attempted but failed
+ *		- any failure from libmeta calls
+ *		- node not in the member list
+ *	-2 - Withdrawal not attempted since
+ *		- this set had no drives in set
+ *		- this node not joined to set
+ *		- set is not a multinode set
+ */
+extern int
+meta_set_withdraw(
+	mdsetname_t	*sp,
+	md_error_t	*ep
+)
+{
+	md_set_desc		*sd;
+	md_drive_desc		*dd = 0;
+	md_mnnode_desc		*nd, my_nd;
+	int			rval = 0;
+	md_setkey_t		*cl_sk;
+	md_error_t		xep = mdnullerror;
+	int			set_halted = 0;
+	int			suspendall_flag = 0;
+	int			suspend1_flag = 0;
+	bool_t			stale_bool = FALSE;
+	mddb_config_t		c;
+	int			node_id_list[1];
+	sigset_t		oldsigs;
+	int			send_reinit = 0;
+
+	if ((sd = metaget_setdesc(sp, ep)) == NULL) {
+		return (-1);
+	}
+
+	/* Must be a multinode diskset */
+	if (!MD_MNSET_DESC(sd)) {
+		(void) mderror(ep, MDE_NOT_MN, sp->setname);
+		return (-1);
+	}
+
+	/* Make sure we are blocking all signals */
+	if (procsigs(TRUE, &oldsigs, &xep) < 0)
+		mdclrerror(&xep);
+
+	/*
+	 * Lock the set on current set members.
+	 * For MN diskset lock_set and SUSPEND are used to protect against
+	 * other meta* commands running on the other nodes.
+	 */
+	nd = sd->sd_nodelist;
+	while (nd) {
+		if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+			nd = nd->nd_next;
+			continue;
+		}
+		if (clnt_lock_set(nd->nd_nodename, sp, ep)) {
+			rval = -1;
+			goto out;
+		}
+		nd = nd->nd_next;
+	}
+	/*
+	 * Lock out other meta* commands by suspending
+	 * class 1 messages across the diskset.
+	 */
+	nd = sd->sd_nodelist;
+	while (nd) {
+		if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+			nd = nd->nd_next;
+			continue;
+		}
+		if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_SUSPEND,
+			sp, MD_MSG_CLASS1, MD_MSCF_NO_FLAGS, ep)) {
+			rval = -1;
+			goto out;
+		}
+		suspend1_flag = 1;
+		nd = nd->nd_next;
+	}
+
+	/* Get list of drives - needed in case of failure */
+	if ((dd = metaget_drivedesc(sp, (MD_BASICNAME_OK | PRINT_FAST),
+	    ep)) == NULL) {
+		/* Error getting drives in list */
+		if (! mdisok(ep)) {
+			rval = -1;
+			goto out2;
+		}
+		/* no drives in list */
+		rval = -2;
+		goto out2;
+	}
+
+	/*
+	 * Verify that this host is a member (in the host list) of the set.
+	 */
+	nd = sd->sd_nodelist;
+	while (nd) {
+		if (strcmp(mynode(), nd->nd_nodename) == 0) {
+			break;
+		}
+		nd = nd->nd_next;
+	}
+	if (!nd) {
+		(void) mddserror(ep, MDE_DS_NODENOTINSET, sp->setno,
+			sd->sd_mn_mynode->nd_nodename, NULL,
+			sp->setname);
+		rval = -1;
+		goto out2;
+	}
+
+	/*
+	 * Call metaget_setownership that calls each node in diskset and
+	 * marks in set descriptor if node is an owner of the set or not.
+	 * metaget_setownership checks to see if a node is an owner by
+	 * checking to see if that node's kernel has the mddb loaded.
+	 * If a node had panic'd during a reconfig or an
+	 * add/delete/join/withdraw operation, the other nodes' node
+	 * records may not reflect the current state of the diskset,
+	 * so calling metaget_setownership is the safest thing to do.
+	 */
+	if (metaget_setownership(sp, ep) == -1) {
+		rval = -1;
+		goto out2;
+	}
+
+	/*
+	 * Verify that this node is joined
+	 * to diskset (i.e. is an owner of the diskset).
+	 */
+	if (!(sd->sd_mn_mynode->nd_flags & MD_MN_NODE_OWN)) {
+		rval = -2;
+		goto out2;
+	}
+
+	/*
+	 * For a MN diskset, only withdraw master if it is
+	 * the only joined node.
+	 */
+	if (sd->sd_mn_master_nodeid == sd->sd_mn_mynode->nd_nodeid) {
+		nd = sd->sd_nodelist;
+		while (nd) {
+			/* Skip my node since checking for other owners */
+			if (nd->nd_nodeid == sd->sd_mn_master_nodeid) {
+				nd = nd->nd_next;
+				continue;
+			}
+			/* If another owner node if found, error */
+			if (nd->nd_flags & MD_MN_NODE_OWN) {
+				(void) mddserror(ep, MDE_DS_WITHDRAWMASTER,
+					sp->setno,
+					sd->sd_mn_mynode->nd_nodename, NULL,
+					sp->setname);
+				rval = -1;
+				goto out2;
+			}
+			nd = nd->nd_next;
+		}
+	}
+
+	/*
+	 * Is current set STALE?
+	 */
+	(void) memset(&c, 0, sizeof (c));
+	c.c_id = 0;
+	c.c_setno = sp->setno;
+	if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0) {
+		(void) mdstealerror(ep, &c.c_mde);
+		rval = -1;
+		goto out;
+	}
+	if (c.c_flags & MDDB_C_STALE) {
+		stale_bool = TRUE;
+	}
+
+	/*
+	 * Notify rpc.mdcommd on all nodes of a nodelist change.
+	 * Start by suspending rpc.mdcommd (which drains it of all messages),
+	 * then change the nodelist followed by a reinit and resume.
+	 */
+	nd = sd->sd_nodelist;
+	while (nd) {
+		if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+			nd = nd->nd_next;
+			continue;
+		}
+
+		if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_SUSPEND,
+		    sp, MD_MSG_CLASS0, MD_MSCF_NO_FLAGS, ep)) {
+			rval = -1;
+			goto out;
+		}
+		suspendall_flag = 1;
+		nd = nd->nd_next;
+	}
+
+	/*
+	 * Withdraw the set - halt set.
+	 * This will fail if any I/O is occuring to any metadevice which
+	 * includes a resync to a mirror metadevice.
+	 */
+	set_halted = 1;
+	if (halt_set(sp, ep)) {
+		/* Was set actually halted? */
+		if (own_set(sp, NULL, TRUE, ep) == MD_SETOWNER_YES) {
+			set_halted = 0;
+		}
+		rval = -1;
+		goto out;
+	}
+
+	/* Change to nodelist so need to send reinit to rpc.mdcommd */
+	send_reinit = 1;
+
+	/* Reset master on withdrawn node */
+	if (clnt_mnsetmaster(sd->sd_mn_mynode->nd_nodename, sp, "",
+	    MD_MN_INVALID_NID, ep)) {
+		rval = -1;
+		goto out;
+	}
+
+	/* Mark my node as withdrawn and send to other nodes */
+	nd = sd->sd_nodelist;
+	my_nd = *(sd->sd_mn_mynode);	/* structure copy */
+	my_nd.nd_next = NULL;
+	while (nd) {
+		if (!(nd->nd_flags & MD_MN_NODE_OWN)) {
+			nd = nd->nd_next;
+			continue;
+		}
+		if (clnt_upd_nr_flags(nd->nd_nodename, sp, &my_nd,
+		    MD_NR_WITHDRAW, NULL, ep)) {
+			rval = -1;
+			goto out;
+		}
+		nd = nd->nd_next;
+	}
+
+	/*
+	 * If withdrawn node is a mirror owner, reset mirror owner
+	 * to NULL.  If an error occurs, print a warning and continue.
+	 * Don't fail metaset because of mirror owner reset problem since
+	 * next node to grab mirror will resolve this issue.
+	 * Before next node grabs mirrors, metaset will show the withdrawn
+	 * node as owner which is why an attempt to reset the mirror owner
+	 * is made.
+	 */
+	node_id_list[0] = sd->sd_mn_mynode->nd_nodeid;	/* Setup my nodeid */
+	nd = sd->sd_nodelist;
+	while (nd) {
+		if (!(nd->nd_flags & MD_MN_NODE_OWN)) {
+			nd = nd->nd_next;
+			continue;
+		}
+		if (clnt_reset_mirror_owner(nd->nd_nodename, sp,
+		    1, &node_id_list[0], &xep) == 01) {
+			mde_perror(&xep, dgettext(TEXT_DOMAIN,
+			    "Unable to reset mirror owner on node %s"),
+			    nd->nd_nodename);
+			mdclrerror(&xep);
+		}
+		nd = nd->nd_next;
+	}
+
+out:
+	if (rval == -1) {
+		/* Rejoin node - Mark node as joined and send to other nodes */
+		nd = sd->sd_nodelist;
+		my_nd = *(sd->sd_mn_mynode);	/* structure copy */
+		my_nd.nd_next = NULL;
+		while (nd) {
+			if (!(nd->nd_flags & MD_MN_NODE_OWN)) {
+				nd = nd->nd_next;
+				continue;
+			}
+			if (clnt_upd_nr_flags(nd->nd_nodename, sp, &my_nd,
+			    MD_NR_JOIN, NULL, &xep)) {
+				mdclrerror(&xep);
+			}
+			nd = nd->nd_next;
+		}
+
+		/* Set master on withdrawn node */
+		if (clnt_mnsetmaster(sd->sd_mn_mynode->nd_nodename, sp,
+		    sd->sd_mn_master_nodenm,
+		    sd->sd_mn_master_nodeid, &xep)) {
+			mdclrerror(&xep);
+		}
+
+		/* Join set if halt_set had succeeded */
+		if (set_halted) {
+			if (setup_db_bydd(sp, dd, 0, &xep) == -1) {
+				mdclrerror(&xep);
+			}
+			/* If set previously stale - make it so at re-join */
+			if (snarf_set(sp, stale_bool, &xep) != 0) {
+				mdclrerror(&xep);
+				(void) halt_set(sp, &xep);
+				mdclrerror(&xep);
+			}
+		}
+	}
+
+	/*
+	 * Notify rpc.mdcommd on all nodes of a nodelist change.
+	 * Send reinit command to mdcommd which forces it to get
+	 * fresh set description.
+	 */
+	if (send_reinit) {
+		/* Send reinit */
+		nd = sd->sd_nodelist;
+		while (nd) {
+			if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+				nd = nd->nd_next;
+				continue;
+			}
+
+			/* Class is ignored for REINIT */
+			if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_REINIT,
+				sp, NULL, MD_MSCF_NO_FLAGS, &xep)) {
+				/*
+				 * We are here because we failed to resume
+				 * rpc.mdcommd.  However we potentially have
+				 * an error from the previous call.
+				 * If the previous call did fail,  we
+				 * capture that error and generate a perror
+				 * withthe string,  "Unable to resume...".
+				 * Setting rval to -1 ensures that in the
+				 * next iteration of the loop, ep is not
+				 * clobbered.
+				 */
+				if (rval == 0)
+					(void) mdstealerror(ep, &xep);
+				else
+					mdclrerror(&xep);
+				rval = -1;
+				mde_perror(ep, dgettext(TEXT_DOMAIN,
+				    "Unable to reinit rpc.mdcommd."));
+			}
+			nd = nd->nd_next;
+		}
+	}
+
+out2:
+	/*
+	 * Unlock diskset by resuming messages across the diskset.
+	 * Just resume all classes so that resume is the same whether
+	 * just one class was locked or all classes were locked.
+	 */
+	if ((suspend1_flag) || (suspendall_flag)) {
+		nd = sd->sd_nodelist;
+		while (nd) {
+			if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+				nd = nd->nd_next;
+				continue;
+			}
+			if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_RESUME,
+				sp, MD_MSG_CLASS0, MD_MSCF_NO_FLAGS, &xep)) {
+				/*
+				 * We are here because we failed to resume
+				 * rpc.mdcommd.  However we potentially have
+				 * an error from the previous call
+				 * If the previous call did fail,  we capture
+				 * that error and generate a perror with
+				 * the string, "Unable to resume...".
+				 * Setting rval to -1 ensures that in the
+				 * next iteration of the loop, ep is not
+				 * clobbered.
+				 */
+				if (rval == 0)
+					(void) mdstealerror(ep, &xep);
+				else
+					mdclrerror(&xep);
+				rval = -1;
+				mde_perror(ep, dgettext(TEXT_DOMAIN,
+				    "Unable to resume rpc.mdcommd."));
+			}
+			nd = nd->nd_next;
+		}
+		meta_ping_mnset(sp->setno);
+	}
+
+	/*
+	 * Unlock set.  This flushes the caches on the servers.
+	 */
+	cl_sk = cl_get_setkey(sp->setno, sp->setname);
+	nd = sd->sd_nodelist;
+	while (nd) {
+		if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+			nd = nd->nd_next;
+			continue;
+		}
+		if (clnt_unlock_set(nd->nd_nodename, cl_sk, &xep)) {
+			if (rval == 0)
+				(void) mdstealerror(ep, &xep);
+			else
+				mdclrerror(&xep);
+			rval = -1;
+		}
+		nd = nd->nd_next;
+	}
+
+	/*
+	 * call metaflushsetnames to reset local cache for master and
+	 * node information.
+	 */
+	metaflushsetname(sp);
+
+	/* release signals back to what they were on entry */
+	if (procsigs(FALSE, &oldsigs, &xep) < 0)
+		mdclrerror(&xep);
+
+	return (rval);
+
+}
+
+/*
+ * Update nodelist with cluster member information.
+ * A node not in the member list will be marked
+ * as not ALIVE and not OWN.
+ * A node in the member list will be marked ALIVE, but
+ * the OWN bit will not be changed.
+ *
+ * If mynode isn't in the membership list, fail causing
+ * another reconfig cycle to be started since a non-member
+ * node shouldn't be taking part in the reconfig cycle.
+ *
+ * Return values:
+ *	0 - No problem.
+ *	1 - Any failure including RPC failure to my node.
+ */
+int
+meta_reconfig_update_nodelist(
+	mdsetname_t			*sp,
+	mndiskset_membershiplist_t	*nl,
+	md_set_desc			*sd,
+	md_error_t			*ep
+)
+{
+	mndiskset_membershiplist_t	*nl2;
+	md_mnnode_desc			*nd;
+	md_error_t			xep = mdnullerror;
+	int				rval = 0;
+
+	/*
+	 * Walk through nodelist, checking to see if each
+	 * node is in the member list.
+	 * If node is not a member, reset ALIVE and OWN node flag.
+	 * If node is a member, set ALIVE.
+	 * If mynode's OWN flag gets reset, then halt the diskset on this node.
+	 */
+	nd = sd->sd_nodelist;
+	while (nd) {
+		nl2 = nl;
+		while (nl2) {
+			/* If node is in member list, set ALIVE */
+			if (nl2->msl_node_id == nd->nd_nodeid) {
+				nd->nd_flags |= MD_MN_NODE_ALIVE;
+				break;
+			} else {
+				nl2 = nl2->next;
+			}
+			/* node is not in member list, mark !ALIVE and !OWN */
+			if (nl2 == NULL) {
+				/* If node is mynode, then halt set if needed */
+				if (strcmp(mynode(), nd->nd_nodename) == 0) {
+					/*
+					 * This shouldn't happen, but just
+					 * in case...  Any node not in the
+					 * membership list should be dead and
+					 * not running reconfig step1.
+					 */
+					if (nd->nd_flags & MD_MN_NODE_OWN) {
+						if (halt_set(sp, &xep)) {
+							mde_perror(&xep, "");
+							mdclrerror(&xep);
+						}
+					}
+					/*
+					 * Return failure since this node
+					 * (mynode) is not in the membership
+					 * list, but process the rest of the
+					 * nodelist first so that rpc.metad
+					 * can be updated with the latest
+					 * membership information.
+					 */
+					(void) mddserror(ep,
+					    MDE_DS_NOTINMEMBERLIST,
+					    sp->setno, nd->nd_nodename, NULL,
+					    sp->setname);
+					rval = 1;
+				}
+				nd->nd_flags &= ~MD_MN_NODE_ALIVE;
+				nd->nd_flags &= ~MD_MN_NODE_OWN;
+			}
+		}
+		nd = nd->nd_next;
+	}
+
+	/* Send this information to rpc.metad */
+	if (clnt_upd_nr_flags(mynode(), sp, sd->sd_nodelist,
+	    MD_NR_SET,  MNSET_IN_RECONFIG, &xep)) {
+		/* Return failure if can't send node flags to rpc.metad */
+		if (rval == 0) {
+			(void) mdstealerror(ep, &xep);
+			rval = 1;
+		}
+	}
+	return (rval);
+}
+
+/*
+ * Choose master determines the master for a diskset.
+ * Each node determines the master on its own and
+ * adds this information to its local rpc.metad nodelist
+ * and also sends it to the kernel.
+ *
+ * Nodelist in set descriptor (sd) is sorted in
+ * monotonically increasing sequence of nodeid.
+ *
+ * Return values:
+ *	0 - No problem.
+ *	205 - There was an RPC problem to another node.
+ *	-1 - There was an error.  This could be an RPC error to my node.
+ *		This is a catastrophic failure causing node to panic.
+ */
+int
+meta_reconfig_choose_master_for_set(
+	mdsetname_t	*sp,
+	md_set_desc	*sd,
+	md_error_t	*ep
+)
+{
+	int			is_owner;
+	md_mnset_record		*mnsr = NULL;
+	int			lowest_alive_nodeid = 0;
+	uint_t			master_nodeid;
+	md_mnnode_desc		*nd, *nd2;
+	md_mnnode_record	*nr;
+	md_drive_desc		*dd;
+	md_setkey_t		*cl_sk;
+	int			rval = 0;
+	md_error_t		xep = mdnullerror;
+	mddb_setflags_config_t	sf;
+
+	/*
+	 * Is current node joined to diskset?
+	 * Don't trust flags, really check to see if mddb is snarfed.
+	 */
+	if (s_ownset(sp->setno, ep) == MD_SETOWNER_YES) {
+		/*
+		 * If a node is joined to the diskset, this node checks
+		 * to see if the current master of the diskset is valid and
+		 * is still in the membership list (ALIVE) and is
+		 * still joined (OWN).  Need to verify if master is
+		 * really joined - don't trust the flags.  (Can trust
+		 * ALIVE since set during earlier part of reconfig cycle.)
+		 * If the current master is valid, still in the membership
+		 * list and joined, then master is not changed on this node.
+		 * Just return.
+		 *
+		 * Verify that nodeid is valid before accessing masternode.
+		 */
+		if ((sd->sd_mn_master_nodeid != MD_MN_INVALID_NID) &&
+		    (sd->sd_mn_masternode->nd_flags & MD_MN_NODE_ALIVE)) {
+			if (clnt_ownset(sd->sd_mn_master_nodenm, sp,
+			    &is_owner, ep) == -1) {
+				/* If RPC failure to another node return 205 */
+				if ((mdanyrpcerror(ep)) &&
+				    (sd->sd_mn_mynode->nd_nodeid !=
+				    sd->sd_mn_master_nodeid)) {
+					return (205);
+				} else {
+					/* Any other failure */
+					return (-1);
+				}
+			} else {
+				if (is_owner == TRUE) {
+
+					meta_mc_log(MC_LOG5, dgettext(
+					    TEXT_DOMAIN, "Set %s previous "
+					    "master chosen %s (%d): %s"),
+					    sp->setname,
+					    sd->sd_mn_master_nodenm,
+					    sd->sd_mn_master_nodeid,
+					    meta_print_hrtime(gethrtime() -
+					    start_time));
+
+					/* Previous master is ok - done */
+					return (0);
+				}
+			}
+		}
+
+		/*
+		 * If current master is no longer in the membership list or
+		 * is no longer joined, then this node uses the following
+		 * algorithm:
+		 * - node calls RPC routine clnt_ownset to get latest
+		 *	information on which nodes are owners of diskset.
+		 * 	clnt_ownset checks on each node to see if its kernel
+		 *	has that diskset snarfed.
+		 */
+		nd = sd->sd_nodelist;
+		while (nd) {
+			/* Don't consider node that isn't in member list */
+			if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+				nd = nd->nd_next;
+				continue;
+			}
+
+			if (clnt_ownset(nd->nd_nodename, sp,
+			    &is_owner, ep) == -1) {
+				/* If RPC failure to another node return 205 */
+				if ((mdanyrpcerror(ep)) &&
+				    (sd->sd_mn_mynode->nd_nodeid !=
+				    nd->nd_nodeid)) {
+					return (205);
+				} else {
+					/* Any other failure */
+					return (-1);
+				}
+			}
+
+			/*
+			 * Set owner flag for each node based on whether
+			 * that node really has a diskset mddb snarfed in
+			 * or not.
+			 */
+			if (is_owner == TRUE)
+				nd->nd_flags |= MD_MN_NODE_OWN;
+			else
+				nd->nd_flags &= ~MD_MN_NODE_OWN;
+
+			nd = nd->nd_next;
+		}
+
+		/*
+		 * - node walks through nodelist looking for nodes that are
+		 *	owners of the diskset that are in the membership list.
+		 * - for each owner, node calls RPC routine clnt_getset to
+		 *	 see if that node has its node record set to OK.
+		 * - If so, master is chosen to be this owner node.
+		 */
+		nd = sd->sd_nodelist;
+		while (nd) {
+			/* Don't consider node that isn't in member list */
+			if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+				nd = nd->nd_next;
+				continue;
+			}
+
+			/* Don't consider a node that isn't an owner */
+			if (!(nd->nd_flags & MD_MN_NODE_OWN)) {
+				nd = nd->nd_next;
+				continue;
+			}
+
+			/* Does node has its own node record set to OK? */
+			if (clnt_mngetset(nd->nd_nodename, sp->setname,
+			    MD_SET_BAD, &mnsr, ep) == -1) {
+				/* If RPC failure to another node return 205 */
+				if ((mdanyrpcerror(ep)) &&
+				    (sd->sd_mn_mynode->nd_nodeid !=
+				    nd->nd_nodeid)) {
+					return (205);
+				} else {
+					/* Any other failure */
+					return (-1);
+				}
+			}
+			nr = mnsr->sr_nodechain;
+			while (nr) {
+				if (nd->nd_nodeid == nr->nr_nodeid) {
+					if (nr->nr_flags & MD_MN_NODE_OK) {
+						/* Found a master */
+						free_sr(
+						    (md_set_record *)mnsr);
+						goto found_master;
+					}
+				}
+				nr = nr->nr_next;
+			}
+			free_sr((md_set_record *)mnsr);
+			nd = nd->nd_next;
+		}
+
+		/*
+		 * - If no owner node has its own node record on its own node
+		 *	set to OK, then this node checks all of the non-owner
+		 * 	nodes that are in the membership list.
+		 * - for each non-owner, node calls RPC routine clnt_getset to
+		 *	 see if that node has its node record set to OK.
+		 * - If set doesn't exist, don't choose node for master.
+		 * - If so, master is chosen to be this non-owner node.
+		 *
+		 */
+		nd = sd->sd_nodelist;
+		while (nd) {
+			/* Don't consider node that isn't in member list */
+			if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+				nd = nd->nd_next;
+				continue;
+			}
+
+			/* Only checking non-owner nodes this time around */
+			if (nd->nd_flags & MD_MN_NODE_OWN) {
+				nd = nd->nd_next;
+				continue;
+			}
+
+			/* Does node has its own node record set to OK? */
+			if (clnt_mngetset(nd->nd_nodename, sp->setname,
+			    MD_SET_BAD, &mnsr, ep) == -1) {
+				/*
+				 * If set doesn't exist on non-owner node,
+				 * don't consider this node for master.
+				 */
+				if (mdiserror(ep, MDE_NO_SET)) {
+					nd = nd->nd_next;
+					continue;
+				} else if ((mdanyrpcerror(ep)) &&
+				    (sd->sd_mn_mynode->nd_nodeid !=
+				    nd->nd_nodeid)) {
+					/* RPC failure to another node */
+					return (205);
+				} else {
+					/* Any other failure */
+					return (-1);
+				}
+			}
+			nr = mnsr->sr_nodechain;
+			while (nr) {
+				if (nd->nd_nodeid == nr->nr_nodeid) {
+					if (nr->nr_flags & MD_MN_NODE_OK) {
+						/* Found a master */
+						free_sr(
+						    (md_set_record *)mnsr);
+						goto found_master;
+					}
+				}
+				nr = nr->nr_next;
+			}
+			free_sr((md_set_record *)mnsr);
+			nd = nd->nd_next;
+		}
+
+		/*
+		 * - If no node can be found that has its own node record on
+		 *	its node to be set to OK, then all alive nodes
+		 * 	were in the process of being added to or deleted
+		 *	from set.  Each alive node will remove all
+		 *	information pertaining to this set from its node.
+		 *
+		 * If all nodes in set are ALIVE, then call sdssc end routines
+		 * since set was truly being initially created or destroyed.
+		 */
+		goto delete_set;
+	} else {
+
+		/*
+		 * If node is not joined to diskset, then this
+		 * node uses the following algorithm:
+		 * - If unjoined node doesn't have a node record for itself,
+		 *	just delete the diskset since diskset was in the
+		 *	process of being created.
+		 * - node needs to find master of diskset before
+		 *	reconfig cycle, if a master existed.
+		 * - node calls RPC routine clnt_ownset to get latest
+		 * 	information on which nodes are owners of diskset.
+		 *	clnt_ownset checks on each node to see if its
+		 *	kernel has that diskset snarfed.
+		 */
+
+		/*
+		 * Is my node in the set description?
+		 * If not, delete the set from this node.
+		 * sr2setdesc sets sd_mn_mynode pointer to the node
+		 * descriptor for this node if there was a node
+		 * record for this node.
+		 *
+		 */
+		if (sd->sd_mn_mynode == NULL) {
+			goto delete_set;
+		}
+
+		nd = sd->sd_nodelist;
+		while (nd) {
+			/* Don't consider node that isn't in member list */
+			if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+				nd = nd->nd_next;
+				continue;
+			}
+
+			if (clnt_ownset(nd->nd_nodename, sp,
+			    &is_owner, ep) == -1) {
+				/* If RPC failure to another node return 205 */
+				if ((mdanyrpcerror(ep)) &&
+				    (sd->sd_mn_mynode->nd_nodeid !=
+				    nd->nd_nodeid)) {
+					return (205);
+				} else {
+					/* Any other failure */
+					return (-1);
+				}
+			}
+
+			/*
+			 * Set owner flag for each node based on whether
+			 * that node really has a diskset mddb snarfed in
+			 * or not.
+			 */
+			if (is_owner == TRUE)
+				nd->nd_flags |= MD_MN_NODE_OWN;
+			else
+				nd->nd_flags &= ~MD_MN_NODE_OWN;
+
+			nd = nd->nd_next;
+		}
+
+		/*
+		 * - node walks through nodelist looking for nodes that
+		 *	are owners of the diskset that are in
+		 *	the membership list.
+		 * - for each owner, node calls RPC routine clnt_getset to
+		 *	see if that node has a master set and to get the
+		 *	diskset description.
+		 * - If the owner node has a set description that doesn't
+		 *	include the non-joined node in the nodelist, this node
+		 *	removes its set description of that diskset
+		 *	(i.e. removes the set from its local mddbs).  This is
+		 *	handling the case of when a node was removed from a
+		 *	diskset while it was not in the cluster membership
+		 *	list.
+		 * - If that node has a master set and the master is in the
+		 *	membership list and is an owner, then either this was
+		 *	the master from before the reconfig cycle or this
+		 *	node has already chosen a new master - either way,
+		 *	the master value is valid as long as it is in the
+		 *	membership list and is an owner
+		 * - master is chosen to be owner node's master
+		 */
+		nd = sd->sd_nodelist;
+		while (nd) {
+			/* Don't consider node that isn't in member list */
+			if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+				nd = nd->nd_next;
+				continue;
+			}
+
+			/* Don't consider a node that isn't an owner */
+			if (!(nd->nd_flags & MD_MN_NODE_OWN)) {
+				nd = nd->nd_next;
+				continue;
+			}
+
+			/* Get owner node's set record */
+			if (clnt_mngetset(nd->nd_nodename, sp->setname,
+			    MD_SET_BAD, &mnsr, ep) == -1) {
+				/* If RPC failure to another node return 205 */
+				if ((mdanyrpcerror(ep)) &&
+				    (sd->sd_mn_mynode->nd_nodeid !=
+				    nd->nd_nodeid)) {
+					return (205);
+				} else {
+					/* Any other failure */
+					return (-1);
+				}
+			}
+
+			/* Is this node in the owner node's set record */
+			nr = mnsr->sr_nodechain;
+			while (nr) {
+				if (sd->sd_mn_mynode->nd_nodeid ==
+				    nr->nr_nodeid) {
+					break;
+				}
+				nr = nr->nr_next;
+			}
+			if (nr == NULL) {
+				/* my node not found - delete set */
+				free_sr((md_set_record *)mnsr);
+				goto delete_set;
+			}
+
+			/* Is owner's node's master valid? */
+			master_nodeid = mnsr->sr_master_nodeid;
+			free_sr((md_set_record *)mnsr);
+			if (master_nodeid == MD_MN_INVALID_NID) {
+				nd = nd->nd_next;
+				continue;
+			}
+
+			nd2 = sd->sd_nodelist;
+			while (nd2) {
+				if ((nd2->nd_nodeid == master_nodeid) &&
+				    (nd2->nd_flags & MD_MN_NODE_ALIVE) &&
+				    (nd2->nd_flags & MD_MN_NODE_OWN)) {
+						nd = nd2;
+						goto found_master;
+				}
+				nd2 = nd2->nd_next;
+			}
+			nd = nd->nd_next;
+		}
+
+		/*
+		 * - If no owner node has a valid master, then follow
+		 * 	algorithm of when a node is joined to the diskset.
+		 * - node walks through nodelist looking for nodes that are
+		 *	owners of the diskset that are in the membership list.
+		 * - for each owner, node calls RPC routine clnt_getset to
+		 *	 see if that node has its node record set to OK.
+		 * - If so, master is chosen to be this owner node.
+		 */
+		nd = sd->sd_nodelist;
+		while (nd) {
+			/* Don't consider node that isn't in member list */
+			if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+				nd = nd->nd_next;
+				continue;
+			}
+
+			/* Don't consider a node that isn't an owner */
+			if (!(nd->nd_flags & MD_MN_NODE_OWN)) {
+				nd = nd->nd_next;
+				continue;
+			}
+
+			/* Does node has its own node record set to OK? */
+			if (clnt_mngetset(nd->nd_nodename, sp->setname,
+			    MD_SET_BAD, &mnsr, ep) == -1) {
+				/* If RPC failure to another node return 205 */
+				if ((mdanyrpcerror(ep)) &&
+				    (sd->sd_mn_mynode->nd_nodeid !=
+				    nd->nd_nodeid)) {
+					return (205);
+				} else {
+					/* Any other failure */
+					return (-1);
+				}
+			}
+			nr = mnsr->sr_nodechain;
+			while (nr) {
+				if (nd->nd_nodeid == nr->nr_nodeid) {
+					if (nr->nr_flags & MD_MN_NODE_OK) {
+						/* Found a master */
+						free_sr(
+						    (md_set_record *)mnsr);
+						goto found_master;
+					}
+				}
+				nr = nr->nr_next;
+			}
+			free_sr((md_set_record *)mnsr);
+			nd = nd->nd_next;
+		}
+
+		/*
+		 * - If no owner node has its own node record on its own node
+		 *	set to OK, then this node checks all of the non-owner
+		 *	nodes that are in the membership list.
+		 * - for each non-owner, node calls RPC routine clnt_getset to
+		 *	see if that node has its node record set to OK.
+		 * - If set doesn't exist, don't choose node for master.
+		 * - If this node doesn't exist in the nodelist on any of the
+		 *	non-owner nodes, this node removes its set description
+		 *	of that diskset (i.e. removes the set from its local
+		 *	mddbs). This is handling the case of when a node was
+		 *	removed from a diskset while it was not in the
+		 *	cluster membership list.
+		 * - If non-owner node has its node record set to OK and if
+		 *	this node hasn't removed this diskset (step directly
+		 *	before this one), then the master is chosen to be this
+		 *	non-owner node.
+		 */
+		nd = sd->sd_nodelist;
+		while (nd) {
+			/* Don't consider node that isn't in member list */
+			if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+				nd->nd_flags |= MD_MN_NODE_DEL;
+				nd = nd->nd_next;
+				continue;
+			}
+
+			/* Don't consider owner nodes since none are OK */
+			if (nd->nd_flags & MD_MN_NODE_OWN) {
+				nd->nd_flags |= MD_MN_NODE_DEL;
+				nd = nd->nd_next;
+				continue;
+			}
+
+			/*
+			 * Don't need to get nodelist from my node since
+			 * this is where sd_nodelist was obtained.
+			 */
+			if (sd->sd_mn_mynode->nd_nodeid == nd->nd_nodeid) {
+				nd = nd->nd_next;
+				continue;
+			}
+
+			/*
+			 * If node has already been decided against for
+			 * master, then skip it.
+			 */
+			if (nd->nd_flags & MD_MN_NODE_DEL) {
+				nd = nd->nd_next;
+				continue;
+			}
+
+			/*
+			 * Does node in my nodelist have its own node
+			 * record marked OK on its node?  And does node
+			 * in my nodelist exist on all other nodes?
+			 * Don't want to choose a node for master unless
+			 * that node is marked OK on its own node and that
+			 * node exists on all other alive nodes.
+			 *
+			 * This is guarding against the case when several
+			 * nodes are down and one of the downed nodes is
+			 * deleted from the diskset.  When the down nodes
+			 * are rebooted into the cluster, you don't want
+			 * any node to pick the deleted node as the master.
+			 */
+			if (clnt_mngetset(nd->nd_nodename, sp->setname,
+			    MD_SET_BAD, &mnsr, ep) == -1) {
+				/*
+				 * If set doesn't exist on non-owner node,
+				 * don't consider this node for master.
+				 */
+				if (mdiserror(ep, MDE_NO_SET)) {
+					nd->nd_flags |= MD_MN_NODE_DEL;
+					nd = nd->nd_next;
+					continue;
+				} else if (mdanyrpcerror(ep)) {
+					/* RPC failure to another node */
+					return (205);
+				} else {
+					/* Any other failure */
+					return (-1);
+				}
+			}
+			/*
+			 * Is my node in the nodelist gotten from the other
+			 * node?  If not, then remove the set from my node
+			 * since set was deleted from my node while my node
+			 * was out of the cluster.
+			 */
+			nr = mnsr->sr_nodechain;
+			while (nr) {
+				if (sd->sd_mn_mynode->nd_nodeid ==
+				    nr->nr_nodeid) {
+					break;
+				}
+				nr = nr->nr_next;
+			}
+			if (nr == NULL) {
+				/* my node not found - delete set */
+				free_sr((md_set_record *)mnsr);
+				goto delete_set;
+			}
+
+			/* Is node being checked marked OK on its own node? */
+			nr = mnsr->sr_nodechain;
+			while (nr) {
+				if (nd->nd_nodeid == nr->nr_nodeid) {
+					if (!(nr->nr_flags & MD_MN_NODE_OK)) {
+						nd->nd_flags |= MD_MN_NODE_DEL;
+					}
+					break;
+				}
+				nr = nr->nr_next;
+			}
+			/*
+			 * If node being checked doesn't exist on its
+			 * own node - don't choose it as master.
+			 */
+			if (nr == NULL) {
+				nd->nd_flags |= MD_MN_NODE_DEL;
+			}
+
+			/*
+			 * Check every node in my node's nodelist against
+			 * the nodelist gotten from the other node.
+			 * If a node in my node's nodelist is not found in the
+			 * other node's nodelist, then set the DEL flag.
+			 */
+			nd2 = sd->sd_nodelist;
+			while (nd2) {
+				nr = mnsr->sr_nodechain;
+				while (nr) {
+					if (nd2->nd_nodeid == nr->nr_nodeid) {
+						break;
+					}
+					nr = nr->nr_next;
+				}
+				/* nd2 not found in other node's nodelist */
+				if (nr == NULL) {
+					nd2->nd_flags |= MD_MN_NODE_DEL;
+				}
+				nd2 = nd2->nd_next;
+			}
+
+			free_sr((md_set_record *)mnsr);
+			nd = nd->nd_next;
+		}
+
+		/*
+		 * Rescan list look for node that has not been marked DEL.
+		 * First node found is the master.
+		 */
+		nd = sd->sd_nodelist;
+		while (nd) {
+			if (!(nd->nd_flags & MD_MN_NODE_DEL)) {
+				break;
+			}
+			nd = nd->nd_next;
+			continue;
+		}
+		if (nd) {
+			/* Found a master */
+			goto found_master;
+		}
+
+		/*
+		 * - If no node can be found that has its own node record on
+		 *	its node to be set to OK, then all alive nodes
+		 * 	were in the process of being added to or deleted
+		 *	from set.  Each alive node will remove all
+		 *	information pertaining to this set from its node.
+		 *
+		 * If all nodes in set are ALIVE, then call sdssc end routines
+		 * since set was truly being initially created or destroyed.
+		 */
+		goto delete_set;
+	}
+
+found_master:
+	meta_mc_log(MC_LOG5, dgettext(TEXT_DOMAIN,
+	    "Set %s master chosen %s (%d): %s"),
+	    sp->setname, nd->nd_nodename, nd->nd_nodeid,
+	    meta_print_hrtime(gethrtime() - start_time));
+
+	if (clnt_lock_set(mynode(), sp, ep) == -1) {
+		return (-1);
+	}
+
+	cl_sk = cl_get_setkey(sp->setno, sp->setname);
+
+	if (clnt_mnsetmaster(mynode(), sp,
+	    nd->nd_nodename, nd->nd_nodeid, ep)) {
+		rval = -1;
+	} else if (sd->sd_mn_mynode->nd_nodeid == nd->nd_nodeid) {
+		/* If this node is new master, set flag in this node's kernel */
+		(void) memset(&sf, 0, sizeof (sf));
+		sf.sf_setno = sp->setno;
+		sf.sf_setflags = MD_SET_MN_NEWMAS_RC;
+		/* Use magic to help protect ioctl against attack. */
+		sf.sf_magic = MDDB_SETFLAGS_MAGIC;
+		sf.sf_flags = MDDB_NM_SET;
+
+		meta_mc_log(MC_LOG5, dgettext(TEXT_DOMAIN,
+		    "Setting new master flag for set %s: %s"),
+		    sp->setname, meta_print_hrtime(gethrtime() - start_time));
+
+		/*
+		 * Fail reconfig cycle if ioctl fails since it is critical
+		 * to set new master flag.
+		 */
+		if (metaioctl(MD_MN_SET_SETFLAGS, &sf, &sf.sf_mde,
+		    NULL) != NULL) {
+			(void) mdstealerror(ep, &sf.sf_mde);
+			rval = -1;
+		}
+	}
+
+	if (clnt_unlock_set(mynode(), cl_sk, &xep) == -1) {
+		if (rval == 0) {
+			(void) mdstealerror(ep, &xep);
+			rval = -1;
+		}
+	}
+
+	cl_set_setkey(NULL);
+
+	metaflushsetname(sp);
+
+	return (rval);
+
+delete_set:
+	meta_mc_log(MC_LOG5, dgettext(TEXT_DOMAIN,
+	    "Master not chosen, deleting set %s: %s"),
+	    sp->setname, meta_print_hrtime(gethrtime() - start_time));
+
+	/*
+	 * Remove all set information from this node:
+	 *	- node records for this set
+	 *	- drive records for this set
+	 *	- set record for this set
+	 * (Only do this on this node since each node
+	 * will do it for its own local mddb.)
+	 *
+	 * If all nodes in set are ALIVE, then
+	 * the lowest numbered ALIVE nodeid in set
+	 * (irregardless of whether an owner node or not) will
+	 * call the DCS service to cleanup for create/delete of set.
+	 *   sdssc_create_end(cleanup) if set was being created or
+	 *   sdssc_delete_end(cleanup) if set was being deleted.
+	 * A node record with flag ADD denotes a set being
+	 * created.  A node record with flag DEL denotes a
+	 * set being deleted.
+	 */
+	nd = sd->sd_nodelist;
+	while (nd) {
+		/* Found a node that isn't alive */
+		if (!(nd->nd_flags & MD_MN_NODE_ALIVE))
+			break;
+
+		/* Is my node the lowest numbered ALIVE node? */
+		if (nd->nd_nodeid < sd->sd_mn_mynode->nd_nodeid) {
+			break;
+		}
+		nd = nd->nd_next;
+	}
+	if (nd == NULL) {
+		/* All nodes ALIVE and this is the lowest nodeid */
+		lowest_alive_nodeid = 1;
+	}
+
+	if (clnt_lock_set(mynode(), sp, ep) == -1) {
+		return (-1);
+	}
+
+
+	/*
+	 * If this node had been joined, withdraw and reset master.
+	 *
+	 * This could happen if a node was being added to or removed
+	 * from a diskset and the node doing the add/delete operation and
+	 * all other nodes in the diskset have left the cluster.
+	 */
+	if (sd->sd_mn_mynode) {
+		nd = sd->sd_mn_mynode;
+		if (nd->nd_flags & MD_MN_NODE_OWN) {
+			if (clnt_withdrawset(mynode(), sp, ep)) {
+				rval = -1;
+				goto out;
+			}
+			if (clnt_mnsetmaster(mynode(), sp, "",
+			    MD_MN_INVALID_NID, ep)) {
+				rval = -1;
+				goto out;
+			}
+		}
+	}
+
+	/*
+	 * Remove side records for this node (side) from local mddb
+	 * (clnt_deldrvs does this) if there are drives in the set.
+	 *
+	 * Don't need to mark this node as DEL since already marked as
+	 * ADD or DEL (or this node would have been chosen as master).
+	 * Don't need to mark other node records, drive records or
+	 * set records as DEL.  If a panic occurs during clnt_delset,
+	 * these records will be deleted the next time this node
+	 * becomes a member and goes through the reconfig cycle.
+	 */
+	/* Get the drive descriptors for this set */
+	if ((dd = metaget_drivedesc(sp, (MD_BASICNAME_OK | PRINT_FAST),
+	    ep)) == NULL) {
+		if (! mdisok(ep)) {
+			/*
+			 * Ignore and clear out any failures from
+			 * metaget_drivedesc since a panic could have
+			 * occurred when a node was partially added to a set.
+			 */
+			mdclrerror(ep);
+		}
+	} else {
+		if (clnt_deldrvs(mynode(), sp, dd, ep)) {
+			rval = -1;
+			goto out;
+		}
+	}
+
+	/*
+	 * Now, delete the set - this removes the node, drive
+	 * and set records from the local mddb.
+	 */
+	if (clnt_delset(mynode(), sp, ep)) {
+		rval = -1;
+		goto out;
+	}
+
+out:
+	cl_sk = cl_get_setkey(sp->setno, sp->setname);
+
+	/*
+	 * Ignore errors from unlock of set since set is no longer
+	 * known (if clnt_delset worked).
+	 */
+	if (clnt_unlock_set(mynode(), cl_sk, &xep) == -1) {
+		mdclrerror(&xep);
+	}
+
+	cl_set_setkey(NULL);
+
+	metaflushsetname(sp);
+
+	/*
+	 * If this node is the lowest numbered nodeid then
+	 * call sdssc_create/delete_end depending on whether
+	 * this node is marked as ADD or DEL in the node record.
+	 */
+	if (lowest_alive_nodeid) {
+		if (nd->nd_flags & MD_MN_NODE_ADD)
+			sdssc_create_end(sp->setname, SDSSC_CLEANUP);
+		else if (nd->nd_flags & MD_MN_NODE_DEL)
+			sdssc_delete_end(sp->setname, SDSSC_CLEANUP);
+	}
+
+	/* Finished with this set -- return */
+	return (rval);
+}
+
+/*
+ * Reconfig step to choose a new master for all MN disksets.
+ * Return values:
+ *	0 - Everything is great.
+ *	1 - This node failed to reconfig.
+ *	205 - Cause another reconfig due to a nodelist problem
+ *		or RPC failure to another node
+ */
+int
+meta_reconfig_choose_master(
+	md_error_t	*ep
+)
+{
+	set_t				max_sets, setno;
+	int				nodecnt;
+	mndiskset_membershiplist_t	*nl;
+	md_set_desc			*sd;
+	mdsetname_t			*sp;
+	int				rval = 0;
+	mddb_setflags_config_t		sf;
+	int				start_node_delayed = 0;
+
+	if ((max_sets = get_max_sets(ep)) == 0) {
+		mde_perror(ep, dgettext(TEXT_DOMAIN,
+		    "Unable to get number of sets"));
+		return (1);
+	}
+
+	/*
+	 * Get membershiplist from API routine.  If there's
+	 * an error, return a 205 to cause another reconfig.
+	 */
+	if (meta_read_nodelist(&nodecnt, &nl, ep) == -1) {
+		mde_perror(ep, "");
+		return (205);
+	}
+
+	for (setno = 1; setno < max_sets; setno++) {
+		if ((sp = metasetnosetname(setno, ep)) == NULL) {
+			if (mdiserror(ep, MDE_NO_SET)) {
+				/* No set for this setno - continue */
+				mdclrerror(ep);
+				continue;
+			} else {
+				/*
+				 * If encountered an RPC error from my node,
+				 * then immediately fail.
+				 */
+				if (mdanyrpcerror(ep)) {
+					mde_perror(ep, "");
+					return (1);
+				}
+				/* Can't get set information */
+				mde_perror(ep, dgettext(TEXT_DOMAIN,
+					"Unable to get information for "
+					"set number %d"), setno);
+				mdclrerror(ep);
+				continue;
+			}
+		}
+
+		/* If setname is there, set desc should exist. */
+		if ((sd = metaget_setdesc(sp, ep)) == NULL) {
+			/*
+			 * If encountered an RPC error from my node,
+			 * then immediately fail.
+			 */
+			if (mdanyrpcerror(ep)) {
+				mde_perror(ep, "");
+				return (1);
+			}
+			mde_perror(ep, dgettext(TEXT_DOMAIN,
+				"Unable to get set %s desc information"),
+				sp->setname);
+			mdclrerror(ep);
+			continue;
+		}
+
+		/* Only reconfig MN disksets */
+		if (!MD_MNSET_DESC(sd)) {
+			continue;
+		}
+
+		meta_mc_log(MC_LOG5, dgettext(TEXT_DOMAIN,
+		    "Begin choose master for set %s: %s"),
+		    sp->setname, meta_print_hrtime(gethrtime() - start_time));
+
+		/* Update nodelist with member information. */
+		if (meta_reconfig_update_nodelist(sp, nl, sd, ep)) {
+			/*
+			 * If encountered an RPC error from my node,
+			 * then immediately fail.
+			 */
+			if (mdanyrpcerror(ep)) {
+				mde_perror(ep, "");
+				return (1);
+			}
+			mde_perror(ep, "");
+			mdclrerror(ep);
+			continue;
+		}
+
+		/*
+		 * If all nodes in a cluster are starting, then
+		 * all nodes will attempt to contact all other nodes
+		 * to determine a master node.  This can lead to a
+		 * problem where node 1 is trying to contact the rpc.metad
+		 * node 2 and node 2 is trying to contact the rpc.metad
+		 * on node 1 -- and this causes the rpc call to fail
+		 * on both nodes and causes a new reconfig cycle.
+		 *
+		 * In order to break this problem, a newly starting node
+		 * will delay a small amount of time (nodeid mod 4 seconds)
+		 * and will then run the code to choose a master for the
+		 * first set.  Delay will only be done once regardless of the
+		 * number of sets.
+		 */
+		if (start_node_delayed == 0) {
+			(void) memset(&sf, 0, sizeof (sf));
+			sf.sf_setno = sp->setno;
+			sf.sf_flags = MDDB_NM_GET;
+			/* Use magic to help protect ioctl against attack. */
+			sf.sf_magic = MDDB_SETFLAGS_MAGIC;
+			if ((metaioctl(MD_MN_GET_SETFLAGS, &sf,
+			    &sf.sf_mde, NULL) == 0) &&
+			    ((sf.sf_setflags & MD_SET_MN_START_RC) ==
+			    MD_SET_MN_START_RC)) {
+				(void) sleep(sd->sd_mn_mynode->nd_nodeid % 4);
+			}
+			start_node_delayed = 1;
+		}
+
+		/* Choose master for this set */
+		rval = meta_reconfig_choose_master_for_set(sp, sd, ep);
+		if (rval == -1) {
+			mde_perror(ep, "");
+			return (1);
+		} else if (rval == 205) {
+			mde_perror(ep, "");
+			return (205);
+		}
+
+		/* Send new nodelist to rpc.mdcommd */
+		(void) mdmn_reinit_set(sp->setno);
+
+		meta_mc_log(MC_LOG5, dgettext(TEXT_DOMAIN,
+		    "Choose master for set %s completed: %s"),
+		    sp->setname, meta_print_hrtime(gethrtime() - start_time));
+	}
+
+	/*
+	 * Each node turns on I/Os for all MN disksets.
+	 * This is to recover from the situation where the master died
+	 * during a previous reconfig cycle when I/Os were suspended
+	 * for a MN diskset.
+	 * If a failure occurs return a 1 which will force this node to
+	 * panic.  Cannot leave node in the situation where I/Os are
+	 * not resumed.
+	 */
+	setno = 0; /* 0 means all MN sets */
+	if (metaioctl(MD_MN_RESUME_SET, &setno, ep, NULL)) {
+		mde_perror(ep, "");
+		return (1);
+	}
+
+	/* Free the nodelist */
+	if (nodecnt)
+		meta_free_nodelist(nl);
+
+	return (0);
+}
+
+/*
+ * meta_mnsync_user_records will synchronize the diskset user records across
+ * all nodes in the diskset.  The diskset user records are stored in
+ * each node's local set mddb.
+ *
+ * This needs to be done even if there is no master change during the
+ * reconfig cycle since this routine should clean up any mess left by
+ * the untimely termination of a metaset or metadb command (due to a
+ * node panic or to user intervention).
+ *
+ * Caller is the Master node.
+ *
+ * Returns	 0 - Success
+ *		205 - Failure during RPC to another node
+ *		-1 - Any other failure and ep is filled in.
+ */
+int
+meta_mnsync_user_records(
+	mdsetname_t	*sp,
+	md_error_t	*ep
+)
+{
+	md_set_desc		*sd;
+	md_mnnode_desc		*master_nodelist, *nd, *nd2, *ndtail;
+	md_mnset_record		*mnsr;
+	md_mnsr_node_t		*master_mnsr_node = NULL, *mnsr_node = NULL;
+	md_mnnode_record	*nr;
+	md_drive_record		*dr;
+	int			dr_cnt, dd_cnt;
+	int			found_my_nr;
+	md_drive_desc		*dd, *dd_prev, *master_dd, *other_dd;
+	int			all_drives_ok;
+	int			rval = 0;
+	int			max_genid = 0;
+	int			num_alive_nodes, num_alive_nodes_del = 0;
+	int			set_locked = 0;
+	md_setkey_t		*cl_sk;
+	md_error_t		xep = mdnullerror;
+	char			*anode[1];
+	mddb_setflags_config_t	sf;
+
+	/*
+	 * Sync up node records first.
+	 * Construct a master nodelist using the nodelist from this
+	 * node's rpc.metad node records and then setting the state of each
+	 * node following these rules:
+	 *	- If a node record is marked OK on its node, mark it OK
+	 *		in the master nodelist (and later OK on all nodes)
+	 *		If a node record is also marked OWN on its node,
+	 *		mark it OWN in the master nodelist.
+	 *	- If a node record is not marked OK on its node, then mark
+	 *		it as DEL in the master list (later deleting it)
+	 *	- If node record doesn't exist on that node, then mark it DEL
+	 *		(later deleting it)
+	 *	- If set record doesn't exist on that node, mark node as DEL
+	 *	- If a node record doesn't exist on all nodes, then mark it DEL
+	 *	- If a node is not ALIVE, then
+	 *		- If that node marked DEL on any node - mark it DEL
+	 *			in master list but leave in nodelist
+	 *		- If that node is marked as ADD on any node, mark it
+	 *			ADD in the master list but leave in nodelist
+	 *		- When that node returns to the living, the DEL
+	 *			node record will be removed and the ADD node
+	 *			record may be removed if marked ADD on that
+	 *			node.
+	 * The key rule is to not remove a node from the nodelist until
+	 * that node record is removed from its own node.  Do not want to
+	 * remove a node's record from all other nodes and then have
+	 * that node have its own record marked OK so that a node will pick
+	 * a different master than the other nodes.
+	 *
+	 * Next,
+	 * If node is ALIVE and node record is marked DEL in master nodelist,
+	 * remove node from set.
+	 * If node is ALIVE and node record is marked OK in master nodelist,
+	 * mark it OK on all other nodes.
+	 * If node is not ALIVE and node record is marked DEL in master
+	 * nodelist, mark it DEL on all other nodes.
+	 * If node is not ALIVE and node record is marked ADD in master,
+	 * nodelist, mark it ADD on all other nodes.
+	 */
+	if ((sd = metaget_setdesc(sp, ep)) == NULL) {
+		return (-1);
+	}
+	master_nodelist = sd->sd_nodelist;
+
+	/*
+	 * Walk through nodelist creating a master nodelist.
+	 */
+	num_alive_nodes = 0;
+	nd = master_nodelist;
+	while (nd) {
+		if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+			nd = nd->nd_next;
+			continue;
+		}
+		num_alive_nodes++;
+		if (clnt_mngetset(nd->nd_nodename, sp->setname,
+		    MD_SET_BAD, &mnsr, ep) == -1) {
+			if (mdiserror(ep, MDE_NO_SET)) {
+				/* set doesn't exist, mark node as DEL */
+				nd->nd_flags &= ~MD_MN_NODE_OK;
+				nd->nd_flags &= ~MD_MN_NODE_ADD;
+				nd->nd_flags |= MD_MN_NODE_DEL;
+				nd->nd_flags |= MD_MN_NODE_NOSET;
+				nd = nd->nd_next;
+				continue;
+			} else {
+				/* If RPC failure to another node return 205 */
+				if ((mdanyrpcerror(ep)) &&
+				    (sd->sd_mn_mynode->nd_nodeid !=
+				    nd->nd_nodeid)) {
+					rval = 205;
+				} else {
+					/* Any other failure */
+					rval = -1;
+				}
+				goto out;
+			}
+		}
+		/* Find biggest genid in records for this diskset */
+		if (mnsr->sr_genid > max_genid)
+			max_genid = mnsr->sr_genid;
+
+		dr = mnsr->sr_drivechain;
+		while (dr) {
+			/* Find biggest genid in records for this diskset */
+			if (dr->dr_genid > max_genid) {
+				max_genid = dr->dr_genid;
+			}
+			dr = dr->dr_next;
+		}
+
+		found_my_nr = 0;
+		nr = mnsr->sr_nodechain;
+		/* nr is the list of node recs from nd_nodename node */
+		while (nr) {
+			/* Find biggest genid in records for this diskset */
+			if (nr->nr_genid > max_genid)
+				max_genid = nr->nr_genid;
+			nd2 = master_nodelist;
+			ndtail = NULL;
+			/* For each node record, is it in master list? */
+			while (nd2) {
+				if (nd2->nd_nodeid == nr->nr_nodeid)
+					break;
+				if (nd2->nd_next == NULL)
+					ndtail = nd2;
+				nd2 = nd2->nd_next;
+			}
+			/*
+			 * Found node record not in master list -- add it
+			 * to list marking it as DEL since node record
+			 * should exist on all nodes unless a panic occurred
+			 * during addition or deletion of host to diskset.
+			 */
+			if (nd2 == NULL) {
+				nd2 = Zalloc(sizeof (*nd2));
+				(void) strcpy(nd2->nd_nodename,
+				    nr->nr_nodename);
+				nd2->nd_flags = nr->nr_flags;
+				nd2->nd_flags |= MD_MN_NODE_DEL;
+				nd2->nd_nodeid = nr->nr_nodeid;
+				nd2->nd_next = NULL;
+				ndtail->nd_next = nd2;
+				nd2 = NULL;
+				nr = nr->nr_next;
+				continue;
+			}
+			/*
+			 * Is this the node record for the node that
+			 * we requested the set desc from?
+			 * If so, check if node has its own node record
+			 * marked OK. If marked OK, check for the OWN bit.
+			 */
+			if (nr->nr_nodeid == nd->nd_nodeid) {
+				found_my_nr = 1;
+				if (nr->nr_flags & MD_MN_NODE_OK) {
+					/*
+					 * If node record is marked OK
+					 * on its own node, then mark it OK
+					 * in the master list.  Node record
+					 * would have to exist on all nodes
+					 * in the ADD state before it could
+					 * be put into the OK state.
+					 */
+					nd->nd_flags |= MD_MN_NODE_OK;
+					nd->nd_flags &=
+					    ~(MD_MN_NODE_ADD | MD_MN_NODE_DEL);
+					/*
+					 * Mark own in master list as marked
+					 * on own node.
+					 */
+					if (nr->nr_flags & MD_MN_NODE_OWN)
+						nd->nd_flags |= MD_MN_NODE_OWN;
+					else
+						nd->nd_flags &= ~MD_MN_NODE_OWN;
+				} else {
+					/* Otherwise, mark node as DEL */
+					nd->nd_flags &= ~MD_MN_NODE_OK;
+					nd->nd_flags &= ~MD_MN_NODE_ADD;
+					nd->nd_flags |= MD_MN_NODE_DEL;
+				}
+			}
+			/*
+			 * If node is not ALIVE and marked DEL
+			 * on any node, make it DEL in master list.
+			 * If node is not ALIVE and marked ADD
+			 * on any node, make it ADD in master list
+			 * unless node record has already been marked DEL.
+			 */
+			if (!(nr->nr_flags & MD_MN_NODE_ALIVE)) {
+				if (nr->nr_flags & MD_MN_NODE_ADD) {
+					if (!(nd->nd_flags & MD_MN_NODE_DEL)) {
+						/* If not DEL - mark it ADD */
+						nd->nd_flags |= MD_MN_NODE_ADD;
+						nd->nd_flags &= ~MD_MN_NODE_OK;
+					}
+				}
+				if (nr->nr_flags & MD_MN_NODE_DEL) {
+					nd->nd_flags |= MD_MN_NODE_DEL;
+					nd->nd_flags &= ~MD_MN_NODE_OK;
+					/* Could already be ADD - make it DEL */
+					nd->nd_flags &= ~MD_MN_NODE_ADD;
+				}
+			}
+			nr = nr->nr_next;
+		}
+		/*
+		 * If a node record doesn't exist on its own node,
+		 * then mark node as DEL.
+		 */
+		if (found_my_nr == 0) {
+			nd->nd_flags &= ~MD_MN_NODE_OK;
+			nd->nd_flags |= MD_MN_NODE_DEL;
+		}
+
+		/*
+		 * If node is OK - put mnsr onto master_mnsr_node list for
+		 * later use when syncing up the drive records in the set.
+		 */
+		if (nd->nd_flags & MD_MN_NODE_OK) {
+			mnsr_node = Zalloc(sizeof (*mnsr_node));
+			mnsr_node->mmn_mnsr = mnsr;
+			(void) strncpy(mnsr_node->mmn_nodename,
+				nd->nd_nodename, MD_MAX_MNNODENAME_PLUS_1);
+			mnsr_node->mmn_next = master_mnsr_node;
+			master_mnsr_node = mnsr_node;
+		} else {
+			free_sr((struct md_set_record *)mnsr);
+		}
+
+		nd = nd->nd_next;
+	}
+
+	meta_mc_log(MC_LOG5, dgettext(TEXT_DOMAIN,
+	    "Master nodelist created for set %s: %s"),
+	    sp->setname, meta_print_hrtime(gethrtime() - start_time));
+
+	/*
+	 * Send master nodelist to the rpc.metad on all nodes (including
+	 * myself) and each node will update itself.  This will set the
+	 * ADD and DEL flags on each node as setup in the master nodelist.
+	 * Don't send nodelist to node where set doesn't exist.
+	 */
+	nd = master_nodelist;
+	while (nd) {
+		if (!(nd->nd_flags & MD_MN_NODE_ALIVE) ||
+		    (nd->nd_flags & MD_MN_NODE_NOSET)) {
+			nd = nd->nd_next;
+			continue;
+		}
+		if (clnt_upd_nr_flags(nd->nd_nodename, sp,
+		    master_nodelist, MD_NR_SET, MNSET_IN_RECONFIG, ep)) {
+			/* If RPC failure to another node return 205 */
+			if ((mdanyrpcerror(ep)) &&
+			    (sd->sd_mn_mynode->nd_nodeid !=
+			    nd->nd_nodeid)) {
+				rval = 205;
+			} else {
+				/* Any other failure */
+				rval = -1;
+			}
+			goto out;
+		}
+		nd = nd->nd_next;
+	}
+
+	/*
+	 * Now, delete nodes that need to be deleted.
+	 */
+	if ((dd = metaget_drivedesc(sp, (MD_BASICNAME_OK | PRINT_FAST),
+	    ep))  == NULL) {
+		if (! mdisok(ep)) {
+			rval = -1;
+			goto out;
+		}
+	}
+
+	/*
+	 * May be doing lots of RPC commands to the nodes, so lock the
+	 * ALIVE members of the set since most of the rpc.metad routines
+	 * require this for security reasons.
+	 */
+	nd = master_nodelist;
+	while (nd) {
+		/* Skip non-alive nodes and node without set */
+		if (!(nd->nd_flags & MD_MN_NODE_ALIVE) ||
+		    (nd->nd_flags & MD_MN_NODE_NOSET)) {
+			nd = nd->nd_next;
+			continue;
+		}
+		if (clnt_lock_set(nd->nd_nodename, sp, ep)) {
+			/* If RPC failure to another node return 205 */
+			if ((mdanyrpcerror(ep)) &&
+			    (sd->sd_mn_mynode->nd_nodeid !=
+			    nd->nd_nodeid)) {
+				rval = 205;
+			} else {
+				/* Any other failure */
+				rval = -1;
+			}
+			goto out;
+		}
+		set_locked = 1;
+		nd = nd->nd_next;
+	}
+
+	nd = master_nodelist;
+	while (nd) {
+		/* Skip non-alive nodes */
+		if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+			nd = nd->nd_next;
+			continue;
+		}
+		if (nd->nd_flags & MD_MN_NODE_DEL) {
+			num_alive_nodes_del++;
+			/*
+			 * Delete this node rec from all ALIVE nodes in diskset.
+			 */
+			nd2 = master_nodelist;
+			while (nd2) {
+				/* Skip non-alive nodes and node without set */
+				if (!(nd2->nd_flags & MD_MN_NODE_ALIVE) ||
+				    (nd2->nd_flags & MD_MN_NODE_NOSET)) {
+					nd2 = nd2->nd_next;
+					continue;
+				}
+
+				/* This is a node being deleted from set */
+				if (nd2->nd_nodeid == nd->nd_nodeid) {
+					/* Mark set record as DEL */
+					if (clnt_upd_sr_flags(nd->nd_nodename,
+					    sp, MD_SR_DEL, ep)) {
+						/* RPC failure to !my node */
+						if ((mdanyrpcerror(ep)) &&
+						    (sd->sd_mn_mynode->
+						    nd_nodeid
+						    != nd->nd_nodeid)) {
+							rval = 205;
+						} else {
+							/* Any other failure */
+							rval = -1;
+						}
+						goto out;
+					}
+					if (clnt_deldrvs(nd->nd_nodename, sp,
+					    dd, ep)) {
+						/* RPC failure to !my node */
+						if ((mdanyrpcerror(ep)) &&
+						    (sd->sd_mn_mynode->
+						    nd_nodeid
+						    != nd->nd_nodeid)) {
+							rval = 205;
+						} else {
+							/* Any other failure */
+							rval = -1;
+						}
+						goto out;
+					}
+					if (clnt_delset(nd->nd_nodename, sp,
+					    ep) == -1) {
+						/* RPC failure to !my node */
+						if ((mdanyrpcerror(ep)) &&
+						    (sd->sd_mn_mynode->
+						    nd_nodeid
+						    != nd->nd_nodeid)) {
+							rval = 205;
+						} else {
+							/* Any other failure */
+							rval = -1;
+						}
+						goto out;
+					}
+				} else {
+					/*
+					 * Delete host from sets on hosts
+					 * not being deleted.
+					 */
+					anode[0] = Strdup(nd->nd_nodename);
+					if (clnt_delhosts(nd2->nd_nodename, sp,
+					    1, anode, ep) == -1) {
+						Free(anode[0]);
+						/* RPC failure to !my node */
+						if ((mdanyrpcerror(ep)) &&
+						    (sd->sd_mn_mynode->
+						    nd_nodeid
+						    != nd2->nd_nodeid)) {
+							rval = 205;
+						} else {
+							/* Any other failure */
+							rval = -1;
+						}
+						goto out;
+					}
+
+					meta_mc_log(MC_LOG5,
+					    dgettext(TEXT_DOMAIN,
+					    "Deleted node %s (%d) on node %s "
+					    "from set %s: %s"),
+					    nd->nd_nodename, nd->nd_nodeid,
+					    nd2->nd_nodename,
+					    sp->setname,
+					    meta_print_hrtime(
+					    gethrtime() - start_time));
+
+					Free(anode[0]);
+				}
+				nd2 = nd2->nd_next;
+			}
+		}
+		nd = nd->nd_next;
+	}
+
+	nd = master_nodelist;
+	cl_sk = cl_get_setkey(sp->setno, sp->setname);
+	while (nd) {
+		/* Skip non-alive nodes and node without set */
+		if (!(nd->nd_flags & MD_MN_NODE_ALIVE) ||
+		    (nd->nd_flags & MD_MN_NODE_NOSET)) {
+			nd = nd->nd_next;
+			continue;
+		}
+		if (clnt_unlock_set(nd->nd_nodename, cl_sk, ep)) {
+			/* If RPC failure to another node return 205 */
+			if ((mdanyrpcerror(ep)) &&
+			    (sd->sd_mn_mynode->nd_nodeid !=
+			    nd->nd_nodeid)) {
+				rval = 205;
+			} else {
+				/* Any other failure */
+				rval = -1;
+			}
+			goto out;
+		}
+		nd = nd->nd_next;
+	}
+	cl_set_setkey(NULL);
+	set_locked = 0;
+
+	meta_mc_log(MC_LOG5, dgettext(TEXT_DOMAIN,
+	    "Nodelist syncronization complete for set %s: %s"),
+	    sp->setname, meta_print_hrtime(gethrtime() - start_time));
+
+	metaflushsetname(sp);
+
+	/*
+	 * If all alive nodes have been deleted from set, just
+	 * return since nothing else can be done until non-alive
+	 * nodes (if there are any) rejoin the cluster.
+	 */
+	if (num_alive_nodes == num_alive_nodes_del) {
+		rval = 0;
+		goto out;
+	}
+
+	/*
+	 * Sync up drive records.
+	 *
+	 * If a node panic'd (or metaset command was killed) during the
+	 * addition or deletion of a drive to the diskset, the nodes
+	 * may have a different view of the drive list.  During cleanup
+	 * of the drive list during reconfig, a drive will be deleted
+	 * from the list if the master node sees that the drive has been
+	 * marked in the ADD state on any node or is marked in the DEL state
+	 * on all nodes.
+	 * This cleanup must occur even if all nodes in the cluster are
+	 * not part of the cluster so that all nodes have the same view
+	 * of the drivelist.
+	 * Then if the entire cluster goes down and comes back up, the
+	 * new master node could be a node that wasn't in the cluster when
+	 * the node was deleted.  This could lead to a situation where the
+	 * master node thinks that a drive is OK, but this drive isn't
+	 * known to the other nodes.
+	 * This situation can also occur during the addition of a drive
+	 * where a node has the drive marked OK, but the node executing the
+	 * metaset command enountered a failure before marking that drive OK
+	 * on the rest of the nodes.  If the node with the OK drive then
+	 * panics, then rest of the nodes will remove that drive marked ADD
+	 * and when the node with the OK drive rejoins the cluster, it will
+	 * have a drive marked OK that is unknown by the other nodes.
+	 *
+	 * There are 2 situations to consider:
+	 * A) Master knows about a drive that other nodes don't know about.
+	 * B) At least one slave node knows about a drive that the master
+	 *    node doesn't know about.
+	 *
+	 * To handle these situations the following steps are followed:
+	 * 1) Count number of drives known by this master node and the
+	 *    other slave nodes.
+	 *    If all nodes have the same number of drives and the master has
+	 *    all drives marked OK, then skip to step4.
+	 *
+	 * 2) If a node has less drives listed than the master, the master
+	 *    must get the drive descriptor list from that node so that
+	 *    master can determine which drive it needs to delete from that
+	 *    node.  Master must get the drive descriptor list since the
+	 *    drive record list does not contain the name of the drive, but
+	 *    only a key and the key can only be interprested on that other
+	 *    node.
+	 *
+	 * 3) The master will then create the master drive list by doing:
+	 *	- Master starts with drive list known by master.
+	 *	- Any drive marked ADD will be removed from the list.
+	 *	- Any drive not known by another node (from step2) will be
+	 *	removed from the drive list.
+	 *	- If a drive is marked DEL on the master, the master must
+	 *	verify that the drive record is marked DEL on all nodes.
+	 *	If any node has the drive record marked OK, mark it OK
+	 *	on the master.  (The reason why is described below).
+	 *
+	 * 4) The master sends out the master drive list and the slave
+	 *    nodes will force their drive lists to match the master
+	 *    drive list by deleting drives, if necessary and by changing
+	 *    the drive record states from ADD->OK if master has drive
+	 *    marked OK and slave has drive marked ADD.
+	 *
+	 * Interesting scenarios:
+	 *
+	 * 1) System has 4 nodes with node 1 as the master.  Node 3 starts
+	 *    to delete a drive record (drive record on node 1 is marked DEL),
+	 *    but is stopped when node 3 panics.  Node 1 also panics.
+	 *    During reconfig cycle, node 2 is picked as master and the drive
+	 *    record is left alone since all nodes in the cluster have it
+	 *    marked OK.  User now sees drive as part of diskset.
+	 *    Now, entire cluster is rebooted and node 1 rejoins the cluster.
+	 *    Node 1 is picked as the master and node 1 has drive record
+	 *    marked DEL.  Node 1 contacts all other nodes in the cluster
+	 *    and since at least one node has the drive record marked OK,
+	 *    the master marks the drive record OK.
+	 *    User continues to see the drive as part of the diskset.
+	 */
+
+	/* Reget set descriptor since flushed above */
+	if ((sd = metaget_setdesc(sp, ep)) == NULL) {
+		rval = -1;
+		goto out;
+	}
+
+	/* Has side effect of setting sd->sd_drvs to same as master_dd */
+	if ((master_dd = metaget_drivedesc_sideno(sp,
+	    sd->sd_mn_mynode->nd_nodeid,
+	    (MD_BASICNAME_OK | PRINT_FAST), ep)) == NULL) {
+		/* No drives in list */
+		if (!mdisok(ep)) {
+			/*
+			 * Can't get drive list for this node, so
+			 * return -1 causing this node to be removed
+			 * cluster config and fixed.
+			 */
+			rval = -1;
+			goto out;
+		}
+	}
+
+	/* Count the number of drives for all nodes */
+	mnsr_node = master_mnsr_node;
+	while (mnsr_node) {
+		dr_cnt = 0;
+		dr = mnsr_node->mmn_mnsr->sr_drivechain;
+		while (dr) {
+			dr_cnt++;
+			dr = dr->dr_next;
+		}
+		mnsr_node->mmn_numdrives = dr_cnt;
+		mnsr_node = mnsr_node->mmn_next;
+	}
+
+	/* Count the number of drives for the master; also check flags */
+	all_drives_ok = 1;
+	dd_cnt = 0;
+	dd = master_dd;
+	while (dd) {
+		dd_cnt++;
+		if (!(dd->dd_flags & MD_DR_OK))
+			all_drives_ok = 0;
+		dd = dd->dd_next;
+	}
+
+	/* If all drives are ok, do quick check against number of drives */
+	if (all_drives_ok) {
+		/* If all nodes have same number of drives, almost done */
+		mnsr_node = master_mnsr_node;
+		while (mnsr_node) {
+			if (mnsr_node->mmn_numdrives != dd_cnt)
+				break;
+			mnsr_node = mnsr_node->mmn_next;
+		}
+		/* All nodes have same number of drives, just send flags */
+		if (mnsr_node == NULL) {
+			goto send_drive_list;
+		}
+	}
+
+	meta_mc_log(MC_LOG5, dgettext(TEXT_DOMAIN,
+	    "Begin detailed drive synchronization for set %s: %s"),
+	    sp->setname, meta_print_hrtime(gethrtime() - start_time));
+
+	/* Detailed check required  */
+	mnsr_node = master_mnsr_node;
+	while (mnsr_node) {
+		/* Does slave node have less drives than master? */
+		if (mnsr_node->mmn_numdrives < dd_cnt) {
+			/* Yes - must determine which drive is missing */
+			if (clnt_getdrivedesc(mnsr_node->mmn_nodename, sp,
+			    &other_dd, ep)) {
+				/* RPC failure to !my node */
+				if ((mdanyrpcerror(ep)) &&
+				    (strcmp(mynode(), mnsr_node->mmn_nodename)
+				    != 0)) {
+					rval = 205;
+				} else {
+					/* Any other failure */
+					rval = -1;
+				}
+				mde_perror(ep, dgettext(TEXT_DOMAIN,
+				    "Master node %s unable to "
+				    "retrieve drive list from node %s"),
+				    mynode(), mnsr_node->mmn_nodename);
+				goto out;
+			}
+			mnsr_node->mmn_dd = other_dd;
+			dd = master_dd;
+			while (dd) {
+				if (!(dd->dd_flags & MD_DR_OK)) {
+					dd = dd->dd_next;
+					continue;
+				}
+				other_dd = mnsr_node->mmn_dd;
+				while (other_dd) {
+					/* Convert to devids, when available */
+					if (strcmp(other_dd->dd_dnp->cname,
+					    dd->dd_dnp->cname) == 0) {
+						break;
+					}
+					other_dd = other_dd->dd_next;
+				}
+				/*
+				 * dd not found on slave so mark it
+				 * ADD for later deletion (drives in ADD
+				 * state are deleted later in this routine).
+				 */
+				if (other_dd == NULL) {
+					dd->dd_flags = MD_DR_ADD;
+				}
+				dd = dd->dd_next;
+			}
+
+		}
+		mnsr_node = mnsr_node->mmn_next;
+	}
+
+	meta_mc_log(MC_LOG5, dgettext(TEXT_DOMAIN,
+	    "Drive check completed for set %s: %s"),
+	    sp->setname, meta_print_hrtime(gethrtime() - start_time));
+
+	dd = master_dd;
+	dd_prev = 0;
+	while (dd) {
+		/* Remove any ADD drives from list */
+		if (dd->dd_flags & MD_DR_ADD) {
+			if (dd_prev) {
+				dd_prev->dd_next = dd->dd_next;
+				dd->dd_next = NULL;
+				metafreedrivedesc(&dd);
+				dd = dd_prev->dd_next;
+			} else {
+				/*
+				 * If removing drive descriptor from head
+				 * of linked list, also change sd->sd_drvs.
+				 */
+				master_dd = sd->sd_drvs = dd->dd_next;
+				dd->dd_next = NULL;
+				metafreedrivedesc(&dd);
+				dd = master_dd;
+			}
+			/* dd setup in if/else above */
+			continue;
+		}
+		/*
+		 * If drive is marked DEL, check all other nodes.
+		 * If drive on another node is marked OK, mark drive OK
+		 * in master list.  If drive is marked DEL or doesn't exist
+		 * on all nodes, remove drive from list.
+		 */
+		if (dd->dd_flags & MD_DR_DEL) {
+			mnsr_node = master_mnsr_node;
+			while (mnsr_node) {
+				if (mnsr_node->mmn_dd == NULL) {
+				    if (clnt_getdrivedesc(
+					mnsr_node->mmn_nodename, sp,
+					&other_dd, ep)) {
+					    /* RPC failure to !my node */
+					    if ((mdanyrpcerror(ep)) &&
+						(strcmp(mynode(),
+						mnsr_node->mmn_nodename)
+						!= 0)) {
+						    rval = 205;
+					    } else {
+						    /* Any other failure */
+						    rval = -1;
+					    }
+					    mde_perror(ep, dgettext(TEXT_DOMAIN,
+						"Master node %s unable "
+						"to retrieve drive list from "
+						"node %s"), mynode(),
+						mnsr_node->mmn_nodename);
+					    goto out;
+				    }
+				    mnsr_node->mmn_dd = other_dd;
+				}
+				other_dd = mnsr_node->mmn_dd;
+				while (other_dd) {
+					/* Found drive (OK) from other node */
+					if (strcmp(dd->dd_dnp->cname,
+					    other_dd->dd_dnp->cname)
+					    == 0) {
+						/* Drive marked OK */
+						if (other_dd->dd_flags &
+						    MD_DR_OK) {
+						    dd->dd_flags = MD_DR_OK;
+						}
+						break;
+					}
+					other_dd = other_dd->dd_next;
+				}
+				if (dd->dd_flags == MD_DR_OK)
+					break;
+
+				mnsr_node = mnsr_node->mmn_next;
+			}
+			/*
+			 * If no node had this drive marked OK, delete it.
+			 */
+			if (dd->dd_flags & MD_DR_DEL) {
+				if (dd_prev) {
+					dd_prev->dd_next = dd->dd_next;
+					dd->dd_next = NULL;
+					metafreedrivedesc(&dd);
+					dd = dd_prev->dd_next;
+				} else {
+					/*
+					 * If removing drive descriptor from
+					 * head of linked list, also change
+					 * sd->sd_drvs.
+					 */
+					master_dd = sd->sd_drvs = dd->dd_next;
+					dd->dd_next = NULL;
+					metafreedrivedesc(&dd);
+					dd = master_dd;
+				}
+				/* dd setup in if/else above */
+				continue;
+			}
+		}
+		dd_prev = dd;
+		dd = dd->dd_next;
+	}
+
+	meta_mc_log(MC_LOG5, dgettext(TEXT_DOMAIN,
+	    "Setting drive states completed for set %s: %s"),
+	    sp->setname, meta_print_hrtime(gethrtime() - start_time));
+
+send_drive_list:
+	/*
+	 * Set genid on all drives to be the highest value seen.
+	 */
+	dd = master_dd;
+	while (dd) {
+		dd->dd_genid = max_genid;
+		dd = dd->dd_next;
+	}
+	/*
+	 * Send updated drive list to all alive nodes.
+	 * Will also set genid on set and node records to have same
+	 * as the drive records.
+	 */
+	nd = sd->sd_nodelist;
+	while (nd) {
+		/* Skip non-alive nodes */
+		if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+			nd = nd->nd_next;
+			continue;
+		}
+		if (clnt_upd_dr_reconfig(nd->nd_nodename, sp, master_dd, ep)) {
+			/* RPC failure to another node */
+			if ((mdanyrpcerror(ep)) &&
+			    (sd->sd_mn_mynode->nd_nodeid != nd->nd_nodeid)) {
+				rval = 205;
+			} else {
+				/* Any other failure */
+				rval = -1;
+			}
+			goto out;
+		}
+		nd = nd->nd_next;
+	}
+
+	meta_mc_log(MC_LOG5, dgettext(TEXT_DOMAIN,
+	    "Sent drive list to all nodes for set %s: %s"),
+	    sp->setname, meta_print_hrtime(gethrtime() - start_time));
+
+	/*
+	 * If no drive records left in set and nodes had been joined,
+	 * withdraw the nodes.  Always reset the master and mark
+	 * all nodes as withdrawn on all nodes.
+	 */
+	if (master_dd == NULL) {
+		/* Reset new master flag since no longer master */
+		(void) memset(&sf, 0, sizeof (sf));
+		sf.sf_setno = sp->setno;
+		sf.sf_setflags = MD_SET_MN_NEWMAS_RC;
+		sf.sf_flags = MDDB_NM_RESET;
+		/* Use magic to help protect ioctl against attack. */
+		sf.sf_magic = MDDB_SETFLAGS_MAGIC;
+		/* Ignore failure, failure to reset flag isn't catastrophic */
+		(void) metaioctl(MD_MN_SET_SETFLAGS, &sf,
+		    &sf.sf_mde, NULL);
+
+		meta_mc_log(MC_LOG5, dgettext(TEXT_DOMAIN,
+		    "Reset new master flag for " "set %s: %s"),
+		    sp->setname, meta_print_hrtime(gethrtime() - start_time));
+
+		nd = sd->sd_nodelist;
+		while (nd) {
+			/* Skip non-alive nodes  */
+			if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+				nd = nd->nd_next;
+				continue;
+			}
+
+			if (clnt_lock_set(nd->nd_nodename, sp, ep)) {
+				/* RPC failure to another node */
+				if ((mdanyrpcerror(ep)) &&
+				    (sd->sd_mn_mynode->nd_nodeid !=
+				    nd->nd_nodeid)) {
+					rval = 205;
+				} else {
+					/* Any other failure */
+					rval = -1;
+				}
+				goto out;
+			}
+			set_locked = 1;
+
+			/* Withdraw node from set if owner */
+			if ((nd->nd_flags & MD_MN_NODE_OWN) &&
+			    (clnt_withdrawset(nd->nd_nodename, sp, ep))) {
+				/* RPC failure to another node */
+				if ((mdanyrpcerror(ep)) &&
+				    (sd->sd_mn_mynode->nd_nodeid !=
+				    nd->nd_nodeid)) {
+					rval = 205;
+				} else {
+					/* Any other failure */
+					rval = -1;
+				}
+				goto out;
+			}
+
+			/* Mark all nodes as withdrawn on this node */
+			if (clnt_upd_nr_flags(nd->nd_nodename, sp,
+			    sd->sd_nodelist, MD_NR_WITHDRAW, NULL, ep)) {
+				/* RPC failure to another node */
+				if ((mdanyrpcerror(ep)) &&
+				    (sd->sd_mn_mynode->nd_nodeid !=
+				    nd->nd_nodeid)) {
+					rval = 205;
+				} else {
+					/* Any other failure */
+					rval = -1;
+				}
+				goto out;
+			}
+
+			/* Resets master to no-master on this node */
+			if (clnt_mnsetmaster(nd->nd_nodename, sp,
+			    "", MD_MN_INVALID_NID, ep)) {
+				/* RPC failure to another node */
+				if ((mdanyrpcerror(ep)) &&
+				    (sd->sd_mn_mynode->nd_nodeid !=
+				    nd->nd_nodeid)) {
+					rval = 205;
+				} else {
+					/* Any other failure */
+					rval = -1;
+				}
+				goto out;
+			}
+
+			cl_sk = cl_get_setkey(sp->setno, sp->setname);
+			if (clnt_unlock_set(nd->nd_nodename, cl_sk, ep)) {
+				/* RPC failure to another node */
+				if ((mdanyrpcerror(ep)) &&
+				    (sd->sd_mn_mynode->nd_nodeid !=
+				    nd->nd_nodeid)) {
+					rval = 205;
+				} else {
+					/* Any other failure */
+					rval = -1;
+				}
+				goto out;
+			}
+			set_locked = 0;
+			nd = nd->nd_next;
+		}
+	}
+
+out:
+	/*
+	 * If got here and set is still locked, then an error has
+	 * occurred and master_nodelist is still valid.
+	 * If error is not an RPC error, then unlock.
+	 * If error is an RPC error, skip unlocks since this could cause
+	 * yet another RPC timeout if a node has failed.
+	 * Ignore failures in unlock since unlock is just trying to
+	 * clean things up.
+	 */
+	if ((set_locked) && !(mdanyrpcerror(ep))) {
+		nd = master_nodelist;
+		cl_sk = cl_get_setkey(sp->setno, sp->setname);
+		while (nd) {
+			/* Skip non-alive nodes */
+			if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+				nd = nd->nd_next;
+				continue;
+			}
+			/*
+			 * If clnt_unlock fails, just break out since next
+			 * reconfig cycle will reset the locks anyway.
+			 */
+			if (clnt_unlock_set(nd->nd_nodename, cl_sk, &xep)) {
+				break;
+			}
+			nd = nd->nd_next;
+		}
+		cl_set_setkey(NULL);
+	}
+	/* Free master_mnsr and drive descs */
+	mnsr_node = master_mnsr_node;
+	while (mnsr_node) {
+		master_mnsr_node = mnsr_node->mmn_next;
+		free_sr((md_set_record *)mnsr_node->mmn_mnsr);
+		free_rem_dd(mnsr_node->mmn_dd);
+		Free(mnsr_node);
+		mnsr_node = master_mnsr_node;
+	}
+
+	/* Frees sd->sd_drvs (which is also master_dd) */
+	metaflushsetname(sp);
+	return (rval);
+}
+
+/*
+ * meta_mnsync_diskset_mddbs
+ * Calling node is guaranteed to be an owner node.
+ * Calling node is the master node.
+ *
+ * Master node verifies that ondisk mddb format matches its incore format.
+ * If no nodes are joined to set, remove the change log entries.
+ * If a node is joined to set, play the change log.
+ *
+ * Returns	 0 - Success
+ *		 1 - Master unable to join to set.
+ *		205 - Failure during RPC to another node
+ *		-1 - Any other failure and ep is filled in.
+ *			-1 return will eventually cause node to panic
+ *			in a SunCluster environment.
+ */
+int
+meta_mnsync_diskset_mddbs(
+	mdsetname_t	*sp,
+	md_error_t	*ep
+)
+{
+	md_set_desc		*sd;
+	mddb_config_t		c;
+	md_mn_msgclass_t	class;
+	mddb_setflags_config_t	sf;
+	md_mnnode_desc		*nd, *nd2;
+	md_error_t		xep = mdnullerror;
+	int			stale_set = 0;
+
+	/* If setname is there, set desc should exist. */
+	if ((sd = metaget_setdesc(sp, ep)) == NULL) {
+		mde_perror(ep, dgettext(TEXT_DOMAIN,
+		    "Unable to get set %s desc information"), sp->setname);
+		return (-1);
+	}
+
+	/* Are there drives in the set? */
+	if (metaget_drivedesc(sp, (MD_BASICNAME_OK | PRINT_FAST),
+	    ep) == NULL) {
+		if (! mdisok(ep)) {
+			return (-1);
+		}
+		/* No drives in set -- nothing to sync up */
+		return (0);
+	}
+
+	/*
+	 * Is master node (which is this node) joined to set?
+	 * If master node isn't joined (which means that no nodes
+	 * are joined to diskset), remove the change log entries
+	 * since no need to replay them - all nodes will have same
+	 * view of mddbs since all nodes are reading in the mddbs
+	 * from disk.
+	 * There is also no need to sync up the master and ondisk mddbs
+	 * since master has no incore knowledge.
+	 * Need to join master to set in order to flush the change
+	 * log entries. Don't need to block I/O during join of master
+	 * to set since no other nodes are joined to set and so no I/O
+	 * can be occurring.
+	 */
+	if (!(sd->sd_mn_mynode->nd_flags & MD_MN_NODE_OWN)) {
+		/* Join master to set */
+		if (clnt_joinset(mynode(), sp,
+		    MNSET_IN_RECONFIG, ep)) {
+			if (mdismddberror(ep, MDE_DB_STALE)) {
+				/*
+				 * If STALE, print message and continue on.
+				 * Don't do any writes or reads to mddbs
+				 * so don't clear change log.
+				 */
+				mde_perror(ep, dgettext(TEXT_DOMAIN,
+				    "Join of master node to STALE set %s"),
+				    sp->setname);
+				stale_set = 1;
+				mdclrerror(ep);
+			} else if (mdismddberror(ep, MDE_DB_ACCOK)) {
+				/* ACCOK means mediator provided extra vote */
+				mdclrerror(ep);
+			} else {
+				/*
+				 * If master is unable to join set, print an
+				 * error message.  Don't return failure or node
+				 * will panic during cluster reconfig cycle.
+				 * Also, withdraw node from set in order to
+				 * cleanup from failed join attempt.
+				 */
+				mde_perror(ep, dgettext(TEXT_DOMAIN,
+				    "Join of master node in set %s failed"),
+				    sp->setname);
+				if (clnt_withdrawset(mynode(), sp, &xep))
+					mdclrerror(&xep);
+				return (1);
+			}
+		}
+		/*
+		 * Master node successfully joined.
+		 * Set local copy of flags to OWN and
+		 * send owner flag to rpc.metad. If not stale,
+		 * flush the change log.
+		 */
+		sd->sd_mn_mynode->nd_flags |= MD_MN_NODE_OWN;
+		if (clnt_upd_nr_flags(mynode(), sp, sd->sd_nodelist, MD_NR_SET,
+		    MNSET_IN_RECONFIG, ep)) {
+			mde_perror(ep, dgettext(TEXT_DOMAIN,
+			    "Flag update of master node join in set %s failed"),
+			    sp->setname);
+			return (-1);
+		}
+
+		if (!stale_set) {
+			if (mdmn_reset_changelog(sp, ep,
+			    MDMN_CLF_RESETLOG) != 0) {
+				mde_perror(ep, dgettext(TEXT_DOMAIN,
+				    "Unable to reset changelog."));
+				return (-1);
+			}
+			meta_mc_log(MC_LOG5, dgettext(TEXT_DOMAIN,
+			    "Removed changelog entries for set %s: %s"),
+			    sp->setname,
+			    meta_print_hrtime(gethrtime() - start_time));
+		}
+		/* Reset new master flag before return */
+		(void) memset(&sf, 0, sizeof (sf));
+		sf.sf_setno = sp->setno;
+		sf.sf_setflags = MD_SET_MN_NEWMAS_RC;
+		sf.sf_flags = MDDB_NM_RESET;
+		/* Use magic to help protect ioctl against attack. */
+		sf.sf_magic = MDDB_SETFLAGS_MAGIC;
+		/* Ignore failure, failure to reset flag isn't catastrophic */
+		(void) metaioctl(MD_MN_SET_SETFLAGS, &sf,
+		    &sf.sf_mde, NULL);
+
+		meta_mc_log(MC_LOG5, dgettext(TEXT_DOMAIN,
+		    "Reset new master flag for set %s: %s"),
+		    sp->setname, meta_print_hrtime(gethrtime() - start_time));
+
+		return (0);
+	}
+
+	/*
+	 * Is master already joined to STALE set (< 50% mddbs avail)?
+	 * If so, can make no config changes to mddbs so don't check or play
+	 * changelog and don't sync master node to ondisk mddbs.
+	 * To get out of the stale state all nodes must be withdrawn
+	 * from set.  Then as nodes are re-joined, all nodes will
+	 * have same view of mddbs since all nodes are reading the
+	 * mddbs from disk.
+	 */
+	(void) memset(&c, 0, sizeof (c));
+	c.c_id = 0;
+	c.c_setno = sp->setno;
+	if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0) {
+		(void) mdstealerror(ep, &c.c_mde);
+		return (-1);
+	}
+	if (c.c_flags & MDDB_C_STALE) {
+		return (0);
+	}
+
+	/*
+	 * If this node is NOT a newly chosen master, then there's
+	 * nothing else to do since the change log should be empty and
+	 * the ondisk and incore mddbs are already consistent.
+	 *
+	 * A newly chosen master is a node that was not the master
+	 * at the beginning of the reconfig cycle.  If a node is a new
+	 * master, then the new master state is reset after the ondisk
+	 * and incore mddbs are consistent and the change log has
+	 * been replayed.
+	 */
+	(void) memset(&sf, 0, sizeof (sf));
+	sf.sf_setno = sp->setno;
+	sf.sf_flags = MDDB_NM_GET;
+	/* Use magic to help protect ioctl against attack. */
+	sf.sf_magic = MDDB_SETFLAGS_MAGIC;
+	if ((metaioctl(MD_MN_GET_SETFLAGS, &sf, &sf.sf_mde, NULL) == 0) &&
+	    ((sf.sf_setflags & MD_SET_MN_NEWMAS_RC) == 0)) {
+		return (0);
+	}
+
+	/*
+	 * Now, sync up incore master view to ondisk mddbs.
+	 * This is needed in the case where a master node
+	 * had made a change to the mddb, but this change
+	 * may not have been relayed to the slaves yet.
+	 * So, the new master needs to verify that the ondisk
+	 * mddbs match what the new master has incore -
+	 * if different, new master rewrites all of the mddbs.
+	 * Then the new master will replay the changelog and the
+	 * new master will then execute what the old master had
+	 * done.
+	 *
+	 * Block all I/Os to disks in this diskset on all nodes in
+	 * the diskset.  This will allow the rewriting of the mddbs
+	 * (if needed), to proceed in a timely manner.
+	 *
+	 * If block of I/Os fail, return a -1.
+	 */
+
+	nd = sd->sd_nodelist;
+	while (nd) {
+		/* Skip non-alive and non-owner nodes  */
+		if ((!(nd->nd_flags & MD_MN_NODE_ALIVE)) ||
+		    (!(nd->nd_flags & MD_MN_NODE_OWN))) {
+			nd = nd->nd_next;
+			continue;
+		}
+		if (clnt_mn_susp_res_io(nd->nd_nodename, sp->setno,
+		    MN_SUSP_IO, ep)) {
+			mde_perror(ep, dgettext(TEXT_DOMAIN,
+			    "Unable to suspend I/O on node %s in set %s"),
+			    nd->nd_nodename, sp->setname);
+
+			/*
+			 * Resume all other nodes that had been suspended.
+			 * (Reconfig return step also resumes I/Os
+			 * for all sets.)
+			 */
+			nd2 = sd->sd_nodelist;
+			while (nd2) {
+				/* Stop when reaching failed node */
+				if (nd2->nd_nodeid == nd->nd_nodeid)
+					break;
+				/* Skip non-alive and non-owner nodes  */
+				if ((!(nd2->nd_flags & MD_MN_NODE_ALIVE)) ||
+				    (!(nd2->nd_flags & MD_MN_NODE_OWN))) {
+					nd2 = nd2->nd_next;
+					continue;
+				}
+				(void) (clnt_mn_susp_res_io(nd2->nd_nodename,
+					sp->setno, MN_RES_IO, &xep));
+				nd2 = nd2->nd_next;
+			}
+
+			/*
+			 * If an RPC failure on another node, return a 205.
+			 * Otherwise, exit with failure.
+			 */
+			if ((mdanyrpcerror(ep)) &&
+			    (sd->sd_mn_mynode->nd_nodeid !=
+			    nd->nd_nodeid)) {
+				return (205);
+			} else {
+				return (-1);
+			}
+
+		}
+		nd = nd->nd_next;
+	}
+
+	(void) memset(&c, 0, sizeof (c));
+	c.c_id = 0;
+	c.c_setno = sp->setno;
+	/* Master can't sync up to ondisk mddbs?  Kick it out of cluster */
+	if (metaioctl(MD_MN_CHK_WRT_MDDB, &c, &c.c_mde, NULL) != 0)
+		return (-1);
+
+	/*
+	 * Resume I/Os that were suspended above.
+	 */
+	nd = sd->sd_nodelist;
+	while (nd) {
+		/* Skip non-alive and non-owner nodes  */
+		if ((!(nd->nd_flags & MD_MN_NODE_ALIVE)) ||
+		    (!(nd->nd_flags & MD_MN_NODE_OWN))) {
+			nd = nd->nd_next;
+			continue;
+		}
+		if (clnt_mn_susp_res_io(nd->nd_nodename, sp->setno,
+		    MN_RES_IO, ep)) {
+			mde_perror(ep, dgettext(TEXT_DOMAIN,
+			    "Unable to resume I/O on node %s in set %s"),
+			    nd->nd_nodename, sp->setname);
+
+			/*
+			 * If an RPC failure then don't do any
+			 * more RPC calls, since one timeout is enough
+			 * to endure.  If RPC failure to another node, return
+			 * 205.  If RPC failure to my node, return -1.
+			 * If not an RPC failure, continue resuming the
+			 * rest of the nodes and then return -1.
+			 */
+			if (mdanyrpcerror(ep)) {
+				if (sd->sd_mn_mynode->nd_nodeid ==
+				    nd->nd_nodeid) {
+					return (-1);
+				} else {
+					return (205);
+				}
+			}
+
+			/*
+			 * If not an RPC error, continue resuming rest of
+			 * nodes, ignoring any failures except for an
+			 * RPC failure which constitutes an immediate exit.
+			 * Start in middle of list with failing node.
+			 */
+			nd2 = nd->nd_next;
+			while (nd2) {
+				/* Skip non-alive and non-owner nodes  */
+				if ((!(nd2->nd_flags & MD_MN_NODE_ALIVE)) ||
+				    (!(nd2->nd_flags & MD_MN_NODE_OWN))) {
+					nd2 = nd2->nd_next;
+					continue;
+				}
+				(void) (clnt_mn_susp_res_io(nd2->nd_nodename,
+					sp->setno, MN_RES_IO, &xep));
+				if (mdanyrpcerror(&xep)) {
+					return (-1);
+				}
+				nd2 = nd2->nd_next;
+			}
+		}
+		nd = nd->nd_next;
+	}
+
+	meta_mc_log(MC_LOG5, dgettext(TEXT_DOMAIN, "Master node has completed "
+	    "checking/writing the mddb for set %s: %s"), sp->setname,
+	    meta_print_hrtime(gethrtime() - start_time));
+
+	/*
+	 * Send (aka replay) all messages we find in the changelog.
+	 * Flag the messages with
+	 *   MD_MSGF_REPLAY_MSG, so no new message ID is generated for them
+	 *   MD_MSGF_OVERRIDE_SUSPEND so they can pass the suspended commd.
+	 */
+	for (class = MD_MN_NCLASSES - 1; class > 0; class--) {
+		mdmn_changelog_record_t	*lr;
+		md_error_t	xep = mdnullerror;
+		md_mn_result_t	*resultp = NULL;
+		int		ret;
+
+		lr = mdmn_get_changelogrec(sp->setno, class);
+		if ((lr->lr_flags & MD_MN_LR_INUSE) == 0) {
+			/* no entry for this class */
+			continue;
+		}
+
+		meta_mc_log(MC_LOG1, dgettext(TEXT_DOMAIN,
+		    "replaying message ID=(%d, 0x%llx-%d)\n"),
+		    MSGID_ELEMS(lr->lr_msg.msg_msgid));
+
+		ret = mdmn_send_message_with_msgid(
+			lr->lr_msg.msg_setno,
+			lr->lr_msg.msg_type,
+			lr->lr_msg.msg_flags |  MD_MSGF_REPLAY_MSG |
+						MD_MSGF_OVERRIDE_SUSPEND,
+			lr->lr_msg.msg_event_data,
+			lr->lr_msg.msg_event_size,
+			&resultp,
+			&lr->lr_msg.msg_msgid,
+			&xep);
+
+		meta_mc_log(MC_LOG1, dgettext(TEXT_DOMAIN,
+		    "mdmn_send_message returned %d\n"), ret);
+
+		if (resultp)
+			free_result(resultp);
+	}
+
+	meta_mc_log(MC_LOG5, dgettext(TEXT_DOMAIN,
+	    "Playing changelog completed for set %s: %s"),
+	    sp->setname, meta_print_hrtime(gethrtime() - start_time));
+
+	/*
+	 * Now that new master has ondisk and incore mddbs in sync, reset
+	 * this node's new master kernel flag (for this set).  If this node
+	 * re-enters another reconfig cycle before the completion of this
+	 * reconfig cycle, this master node won't need to check if the ondisk
+	 * and incore mddbs are in sync since this node won't be considered
+	 * a new master (since this flag is being reset here in the middle of
+	 * step2).  This will save time during any subsequent reconfig
+	 * cycles as long as this node continues to be master.
+	 */
+	(void) memset(&sf, 0, sizeof (sf));
+	sf.sf_setno = sp->setno;
+	sf.sf_setflags = MD_SET_MN_NEWMAS_RC;
+	sf.sf_flags = MDDB_NM_RESET;
+	/* Use magic to help protect ioctl against attack. */
+	sf.sf_magic = MDDB_SETFLAGS_MAGIC;
+	/* Ignore failure, since failure to reset flag isn't catastrophic */
+	(void) metaioctl(MD_MN_SET_SETFLAGS, &sf, &sf.sf_mde, NULL);
+
+	meta_mc_log(MC_LOG5, dgettext(TEXT_DOMAIN,
+	    "Reset new master flag for set %s: %s"),
+	    sp->setname, meta_print_hrtime(gethrtime() - start_time));
+
+	return (0);
+}
+
+/*
+ * meta_mnjoin_all will join all starting nodes in the diskset.
+ * A starting node is considered to be any node that is not
+ * an owner of the set but is a member of the cluster.
+ * Master node is already joined to set (done in meta_mnsync_diskset_mddbs).
+ *
+ * Caller is the Master node.
+ *
+ * Returns	 0 - Success
+ *		205 - Failure during RPC to another node
+ *		-1 - Any other failure and ep is filled in.
+ */
+int
+meta_mnjoin_all(
+	mdsetname_t	*sp,
+	md_error_t	*ep
+)
+{
+	md_set_desc		*sd;
+	md_mnnode_desc		*nd, *nd2;
+	int			rval = 0;
+	int			stale_flag = 0;
+	mddb_config_t		c;
+	int			susp_res_flag = 0;
+	md_error_t		xep = mdnullerror;
+
+	/* If setname is there, set desc should exist. */
+	if ((sd = metaget_setdesc(sp, ep)) == NULL) {
+		mde_perror(ep, dgettext(TEXT_DOMAIN,
+		    "Unable to get set %s desc information"), sp->setname);
+		return (-1);
+	}
+
+	/* Are there drives in the set? */
+	if (metaget_drivedesc(sp, (MD_BASICNAME_OK | PRINT_FAST),
+	    ep) == NULL) {
+		if (! mdisok(ep)) {
+			return (-1);
+		}
+		/* No drives in set -- nothing to join */
+		return (0);
+	}
+
+	/*
+	 * Is set currently stale?
+	 */
+	(void) memset(&c, 0, sizeof (c));
+	c.c_id = 0;
+	c.c_setno = sp->setno;
+	/* Ignore failure since master node may not be joined yet */
+	(void) metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL);
+	if (c.c_flags & MDDB_C_STALE) {
+		stale_flag = MNSET_IS_STALE;
+	}
+
+	/*
+	 * If any nodes are going to be joined to diskset, then
+	 * suspend I/O to all disks in diskset so that nodes can join
+	 * (read in mddbs) in a reasonable amount of time even under
+	 * high I/O load.  Don't need to do this if set is STALE since
+	 * no I/O can be occurring to a STALE set.
+	 */
+	if (stale_flag != MNSET_IS_STALE) {
+		nd = sd->sd_nodelist;
+		while (nd) {
+			/* Found a node that will be joined to diskset */
+			if ((nd->nd_flags & MD_MN_NODE_ALIVE) &&
+			    (!(nd->nd_flags & MD_MN_NODE_OWN))) {
+				/* Set flag that diskset should be suspended */
+				susp_res_flag = 1;
+				break;
+			}
+			nd = nd->nd_next;
+		}
+	}
+
+	if (susp_res_flag) {
+		/*
+		 * Block all I/Os to disks in this diskset on all joined
+		 * nodes in the diskset.
+		 * If block of I/Os fails due to an RPC failure on another
+		 * node, return 205; otherwise, return -1.
+		 */
+		nd = sd->sd_nodelist;
+		while (nd) {
+			/* Skip non-alive and non-owner nodes  */
+			if ((!(nd->nd_flags & MD_MN_NODE_ALIVE)) ||
+			    (!(nd->nd_flags & MD_MN_NODE_OWN))) {
+				nd = nd->nd_next;
+				continue;
+			}
+			if (clnt_mn_susp_res_io(nd->nd_nodename, sp->setno,
+			    MN_SUSP_IO, ep)) {
+				mde_perror(ep, dgettext(TEXT_DOMAIN,
+				    "Unable to suspend I/O on node %s"
+				    " in set %s"), nd->nd_nodename,
+				    sp->setname);
+				/*
+				 * Resume other nodes that had been suspended.
+				 * (Reconfig return step also resumes I/Os
+				 * for all sets.)
+				 */
+				nd2 = sd->sd_nodelist;
+				while (nd2) {
+					/* Stop when reaching failed node */
+					if (nd2->nd_nodeid == nd->nd_nodeid)
+						break;
+					/* Skip non-alive/non-owner nodes  */
+					if ((!(nd2->nd_flags &
+					    MD_MN_NODE_ALIVE)) ||
+					    (!(nd2->nd_flags &
+					    MD_MN_NODE_OWN))) {
+						nd2 = nd2->nd_next;
+						continue;
+					}
+					(void) (clnt_mn_susp_res_io(
+					    nd2->nd_nodename, sp->setno,
+					    MN_RES_IO, &xep));
+					nd2 = nd2->nd_next;
+				}
+
+				/*
+				 * If the suspend failed due to an
+				 * RPC failure on another node, return
+				 * a 205.
+				 * Otherwise, exit with failure.
+				 * The return reconfig step will resume
+				 * I/Os for all disksets.
+				 */
+				if ((mdanyrpcerror(ep)) &&
+				    (sd->sd_mn_mynode->nd_nodeid !=
+				    nd->nd_nodeid)) {
+					return (205);
+				} else {
+					return (-1);
+				}
+			}
+			nd = nd->nd_next;
+		}
+	}
+
+	nd = sd->sd_nodelist;
+	while (nd) {
+		/*
+		 * If a node is in the membership list but isn't joined
+		 * to the set, try to join the node.
+		 */
+		if ((nd->nd_flags & MD_MN_NODE_ALIVE) &&
+		    (!(nd->nd_flags & MD_MN_NODE_OWN))) {
+			if (clnt_joinset(nd->nd_nodename, sp,
+			    (MNSET_IN_RECONFIG | stale_flag), ep)) {
+				/*
+				 * If RPC failure to another node
+				 * then exit without attempting anything else.
+				 * (Reconfig return step will resume I/Os
+				 * for all sets.)
+				 */
+				if (mdanyrpcerror(ep)) {
+					mde_perror(ep, "");
+					return (205);
+				}
+				/*
+				 * STALE and ACCOK failures aren't true
+				 * failures.  STALE means that <50% mddbs
+				 * are available. ACCOK means that the
+				 * mediator provided the extra vote.
+				 * If a true failure, then print messasge
+				 * and withdraw node from set in order to
+				 * cleanup from failed join attempt.
+				 */
+				if ((!mdismddberror(ep, MDE_DB_STALE)) &&
+				    (!mdismddberror(ep, MDE_DB_ACCOK))) {
+					mde_perror(ep,
+					    "WARNING: Unable to join node %s "
+					    "to set %s", nd->nd_nodename,
+					    sp->setname);
+					mdclrerror(ep);
+					if (clnt_withdrawset(nd->nd_nodename,
+					    sp, &xep))
+						mdclrerror(&xep);
+					nd = nd->nd_next;
+					continue;
+				}
+			}
+			/* Set owner flag even if STALE or ACCOK */
+			nd->nd_flags |= MD_MN_NODE_OWN;
+		}
+		nd = nd->nd_next;
+	}
+	/*
+	 * Resume I/Os if suspended above.
+	 */
+	if (susp_res_flag) {
+		nd = sd->sd_nodelist;
+		while (nd) {
+			/*
+			 * Skip non-alive and non-owner nodes
+			 * (this list doesn't include any of
+			 * the nodes that were joined).
+			 */
+			if ((!(nd->nd_flags & MD_MN_NODE_ALIVE)) ||
+			    (!(nd->nd_flags & MD_MN_NODE_OWN))) {
+				nd = nd->nd_next;
+				continue;
+			}
+			if (clnt_mn_susp_res_io(nd->nd_nodename, sp->setno,
+			    MN_RES_IO, ep)) {
+				mde_perror(ep, dgettext(TEXT_DOMAIN,
+				    "Unable to resume I/O on node %s"
+				    " in set %s"), nd->nd_nodename,
+				    sp->setname);
+
+				/*
+				 * If an RPC failure then don't do any
+				 * more RPC calls, since one timeout is enough
+				 * to endure.  If RPC failure to another node,
+				 * return 205.  If RPC failure to my node,
+				 * return -1.
+				 * (Reconfig return step will resume I/Os
+				 * for all sets.)
+				 * If not an RPC failure, continue resuming the
+				 * rest of the nodes and then return -1.
+				 */
+				if (mdanyrpcerror(ep)) {
+					if (sd->sd_mn_mynode->nd_nodeid ==
+					    nd->nd_nodeid) {
+						return (-1);
+					} else {
+						return (205);
+					}
+				}
+
+				/*
+				 * If not an RPC error, continue resuming rest
+				 * of nodes, ignoring any failures except for
+				 * an RPC failure which constitutes an
+				 * immediate exit.
+				 * Start in middle of list with failing node.
+				 */
+				nd2 = nd->nd_next;
+				while (nd2) {
+					/* Skip non-owner nodes  */
+					if ((!(nd2->nd_flags &
+					    MD_MN_NODE_ALIVE)) ||
+					    (!(nd2->nd_flags &
+					    MD_MN_NODE_OWN))) {
+						nd2 = nd2->nd_next;
+						continue;
+					}
+					(void) (clnt_mn_susp_res_io(
+					    nd2->nd_nodename, sp->setno,
+					    MN_RES_IO, &xep));
+					if (mdanyrpcerror(&xep)) {
+						return (-1);
+					}
+					nd2 = nd2->nd_next;
+				}
+			}
+			nd = nd->nd_next;
+		}
+	}
+
+	nd = sd->sd_nodelist;
+	while (nd) {
+		if (!(nd->nd_flags & MD_MN_NODE_OWN)) {
+			nd = nd->nd_next;
+			continue;
+		}
+		/*
+		 * If 1 node fails - go ahead and update the rest except
+		 * in the case of an RPC failure, fail immediately.
+		 */
+		if (clnt_upd_nr_flags(nd->nd_nodename, sp,
+		    sd->sd_nodelist, MD_NR_SET, MNSET_IN_RECONFIG, ep)) {
+			/* RPC failure to another node */
+			if (mdanyrpcerror(ep)) {
+				return (205);
+			}
+			nd = nd->nd_next;
+			rval = -1;
+			continue;
+		}
+		nd = nd->nd_next;
+	}
+
+	meta_mc_log(MC_LOG5, dgettext(TEXT_DOMAIN,
+	    "Join of all nodes completed for set %s: %s"),
+	    sp->setname, meta_print_hrtime(gethrtime() - start_time));
+
+	return (rval);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_set_drv.c b/usr/src/lib/lvm/libmeta/common/meta_set_drv.c
new file mode 100644
index 0000000000..5fad53ad7b
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_set_drv.c
@@ -0,0 +1,1948 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+/*
+ * Metadevice diskset interfaces
+ */
+
+#include <meta.h>
+#include <mdmn_changelog.h>
+#include "meta_set_prv.h"
+#include "meta_repartition.h"
+
+static int
+check_setnodes_againstdrivelist(
+	mdsetname_t		*sp,
+	mddrivenamelist_t	*dnlp,
+	md_error_t		*ep
+)
+{
+	md_set_desc		*sd;
+	mddrivenamelist_t	*p;
+	int 			i;
+	md_mnnode_desc		*nd;
+
+	if ((sd = metaget_setdesc(sp, ep)) == NULL)
+		return (-1);
+
+	if (MD_MNSET_DESC(sd)) {
+		nd = sd->sd_nodelist;
+		while (nd) {
+			if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+				nd = nd->nd_next;
+				continue;
+			}
+			for (p = dnlp; p != NULL; p = p->next)
+				if (checkdrive_onnode(sp, p->drivenamep,
+				    nd->nd_nodename, ep))
+					return (-1);
+			nd = nd->nd_next;
+		}
+	} else {
+		for (i = 0; i < MD_MAXSIDES; i++) {
+			/* Skip empty slots */
+			if (sd->sd_nodes[i][0] == '\0')
+				continue;
+
+			for (p = dnlp; p != NULL; p = p->next)
+				if (checkdrive_onnode(sp, p->drivenamep,
+				    sd->sd_nodes[i], ep))
+					return (-1);
+		}
+	}
+	return (0);
+}
+
+static int
+drvsuniq(mdsetname_t *sp, mddrivenamelist_t *dnlp, md_error_t *ep)
+{
+	mddrivenamelist_t *dl1, *dl2;
+	mddrivename_t *dn1, *dn2;
+
+	for (dl1 = dnlp; dl1 != NULL; dl1 = dl1->next) {
+		dn1 = dl1->drivenamep;
+
+		for (dl2 = dl1->next; dl2 != NULL; dl2 = dl2->next) {
+			dn2 = dl2->drivenamep;
+			if (strcmp(dn1->cname, dn2->cname) != 0)
+				continue;
+
+			return (mddserror(ep, MDE_DS_DUPDRIVE, sp->setno,
+			    NULL, dn1->cname, sp->setname));
+		}
+	}
+	return (0);
+}
+
+static md_drive_desc *
+metaget_drivedesc_fromdrivelist(
+	mdsetname_t		*sp,
+	mddrivenamelist_t	*dnlp,
+	uint_t			flags,
+	md_error_t		*ep
+)
+{
+	mddrivenamelist_t	*p;
+	md_drive_desc		*dd = NULL;
+	md_set_desc		*sd;
+
+	if ((sd = metaget_setdesc(sp, ep)) == NULL)
+		return (NULL);
+
+	for (p = dnlp; p != NULL; p = p->next) {
+		(void) metadrivedesc_append(&dd, p->drivenamep, 0, 0,
+		    sd->sd_ctime, sd->sd_genid, flags);
+	}
+
+	return (dd);
+}
+
+/*
+ * Exported Entry Points
+ */
+
+int
+meta_make_sidenmlist(
+	mdsetname_t	*sp,
+	mddrivename_t	*dnp,
+	md_error_t	*ep
+)
+{
+	mdsidenames_t	*sn, **sn_next;
+	mdname_t	*np;
+	int		done;
+	side_t		sideno = MD_SIDEWILD;
+	uint_t		rep_slice;
+
+	if (meta_replicaslice(dnp, &rep_slice, ep) != 0)
+		return (-1);
+
+	dnp->side_names_key = MD_KEYWILD;
+
+	if ((np = metaslicename(dnp, rep_slice, ep)) == NULL)
+		return (-1);
+
+	metaflushsidenames(dnp);
+	sn_next = &dnp->side_names;
+	/*CONSTCOND*/
+	while (1) {
+		sn = Zalloc(sizeof (*sn));
+
+		if ((done = meta_getnextside_devinfo(sp, np->bname,
+		    &sideno, &sn->cname, &sn->dname, &sn->mnum, ep)) == -1) {
+			Free(sn);
+			return (-1);
+		}
+
+		if (done == 0) {
+			Free(sn);
+			return (0);
+		}
+
+		sn->sideno = sideno;
+
+		/* Add to the end of the linked list */
+		assert(*sn_next == NULL);
+		*sn_next = sn;
+		sn_next = &sn->next;
+	}
+	/*NOTREACHED*/
+}
+
+int
+meta_set_adddrives(
+	mdsetname_t		*sp,
+	mddrivenamelist_t	*dnlp,
+	daddr_t			dbsize,
+	int			force_label,
+	md_error_t		*ep
+)
+{
+	md_set_desc		*sd;
+	md_drive_desc		*dd = NULL, *curdd = NULL, *ddp;
+	int			i;
+	mddrivenamelist_t	*p;
+	mhd_mhiargs_t		mhiargs;
+	int			rval = 0;
+	md_timeval32_t		now;
+	sigset_t		oldsigs;
+	ulong_t			genid;
+	ulong_t			max_genid = 0;
+	md_setkey_t		*cl_sk;
+	int			rb_level = 0;
+	md_error_t		xep = mdnullerror;
+	md_mnnode_desc		*nd;
+	int			suspendall_flag = 0;
+	int			suspend1_flag = 0;
+	int			lock_flag = 0;
+	int			flush_set_onerr = 0;
+
+	if ((sd = metaget_setdesc(sp, ep)) == NULL)
+		return (-1);
+
+	/* Make sure we own the set */
+	if (meta_check_ownership(sp, ep) != 0)
+		return (-1);
+
+	/*
+	 * The drive and node records are stored in the local mddbs of each
+	 * node in the diskset.  Each node's rpc.metad daemon reads in the set,
+	 * drive and node records from that node's local mddb and caches them
+	 * internally. Any process needing diskset information contacts its
+	 * local rpc.metad to get this information.  Since each node in the
+	 * diskset is independently reading the set information from its local
+	 * mddb, the set, drive and node records in the local mddbs must stay
+	 * in-sync, so that all nodes have a consistent view of the diskset.
+	 *
+	 * For a multinode diskset, explicitly verify that all nodes in the
+	 * diskset are ALIVE (i.e. are in the API membership list).  Otherwise,
+	 * fail this operation since all nodes must be ALIVE in order to add
+	 * the new drive record to their local mddb.  If a panic of this node
+	 * leaves the local mddbs set, node and drive records out-of-sync, the
+	 * reconfig cycle will fix the local mddbs and force them back into
+	 * synchronization.
+	 */
+	if (MD_MNSET_DESC(sd)) {
+		nd = sd->sd_nodelist;
+		while (nd) {
+			if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+				(void) mddserror(ep, MDE_DS_NOTINMEMBERLIST,
+					sp->setno,
+					nd->nd_nodename, NULL, sp->setname);
+				return (-1);
+			}
+			nd = nd->nd_next;
+		}
+	}
+
+	if (drvsuniq(sp, dnlp, ep) == -1)
+		return (-1);
+
+	/*
+	 * Lock the set on current set members.
+	 * Set locking done much earlier for MN diskset than for traditional
+	 * diskset since lock_set and SUSPEND are used to protect against
+	 * other meta* commands running on the other nodes.
+	 */
+	if (MD_MNSET_DESC(sd)) {
+		/* Make sure we are blocking all signals */
+		if (procsigs(TRUE, &oldsigs, &xep) < 0)
+			mdclrerror(&xep);
+
+		nd = sd->sd_nodelist;
+		/* All nodes are guaranteed to be ALIVE */
+		while (nd) {
+			if (clnt_lock_set(nd->nd_nodename, sp, ep)) {
+				rval = -1;
+				goto out;
+			}
+			lock_flag = 1;
+			nd = nd->nd_next;
+		}
+		/*
+		 * Lock out other meta* commands by suspending
+		 * class 1 messages across the diskset.
+		 */
+		nd = sd->sd_nodelist;
+		/* All nodes are guaranteed to be ALIVE */
+		while (nd) {
+			if (clnt_mdcommdctl(nd->nd_nodename,
+			    COMMDCTL_SUSPEND, sp, MD_MSG_CLASS1,
+			    MD_MSCF_NO_FLAGS, ep)) {
+				rval = -1;
+				goto out;
+			}
+			suspend1_flag = 1;
+			nd = nd->nd_next;
+		}
+	}
+
+	if (check_setnodes_againstdrivelist(sp, dnlp, ep)) {
+		rval = -1;
+		goto out;
+	}
+
+	for (p = dnlp; p != NULL; p = p->next) {
+		mdsetname_t	*tmp;
+
+		if (meta_is_drive_in_anyset(p->drivenamep, &tmp, FALSE,
+		    ep) == -1) {
+			rval = -1;
+			goto out;
+		}
+
+		if (tmp != NULL) {
+			(void) mddserror(ep, MDE_DS_DRIVEINSET, sp->setno,
+			    tmp->setname, p->drivenamep->cname, sp->setname);
+			rval = -1;
+			goto out;
+		}
+	}
+
+	/* END CHECK CODE */
+
+	/*
+	 * This is a separate loop (from above) so that we validate all the
+	 * drives handed to us before we repartition any one drive.
+	 */
+	for (p = dnlp; p != NULL; p = p->next) {
+		if (meta_repartition_drive(sp,
+		    p->drivenamep,
+		    force_label == TRUE ? MD_REPART_FORCE : 0,
+		    NULL, /* Don't return the VTOC. */
+		    ep) != 0) {
+			rval = -1;
+			goto out;
+		}
+
+		/*
+		 * Create the names for the drives we are adding per side.
+		 */
+		if (meta_make_sidenmlist(sp, p->drivenamep, ep) == -1) {
+			rval = -1;
+			goto out;
+		}
+	}
+
+	/*
+	 * Get the list of drives descriptors that we are adding.
+	 */
+	dd = metaget_drivedesc_fromdrivelist(sp, dnlp, MD_DR_ADD, ep);
+
+	if (! mdisok(ep)) {
+		rval = -1;
+		goto out;
+	}
+
+	/*
+	 * Slam a dummy master block on all the disks that we are adding
+	 * Used by diskset import if the disksets are remotely replicated
+	 */
+	for (ddp = dd; ddp != NULL; ddp = ddp->dd_next) {
+		uint_t		rep_slice;
+		int		fd = -1;
+		mdname_t	*np = NULL;
+
+		if (meta_replicaslice(ddp->dd_dnp, &rep_slice, &xep) != 0) {
+			mdclrerror(&xep);
+			continue;
+		}
+
+		if ((np = metaslicename(ddp->dd_dnp, rep_slice, &xep))
+		    == NULL) {
+			mdclrerror(&xep);
+			continue;
+		}
+
+		if ((fd = open(np->rname, O_RDWR)) >= 0) {
+			meta_mkdummymaster(sp, fd, 16);
+			(void) close(fd);
+		}
+	}
+
+	/*
+	 * Get the set timeout information.
+	 */
+	(void) memset(&mhiargs, '\0', sizeof (mhiargs));
+	if (clnt_gtimeout(mynode(), sp, &mhiargs, ep) == -1) {
+		rval = -1;
+		goto out;
+	}
+
+	/*
+	 * Get timestamp and generation id for new records
+	 */
+	now = sd->sd_ctime;
+	genid = sd->sd_genid;
+
+
+	/* At this point, in case of error, set should be flushed. */
+	flush_set_onerr = 1;
+
+	/* Lock the set on current set members */
+	if (!(MD_MNSET_DESC(sd))) {
+		md_rb_sig_handling_on();
+		for (i = 0; i < MD_MAXSIDES; i++) {
+			/* Skip empty slots */
+			if (sd->sd_nodes[i][0] == '\0')
+				continue;
+
+			if (clnt_lock_set(sd->sd_nodes[i], sp, ep)) {
+				rval = -1;
+				goto out;
+			}
+			lock_flag = 1;
+		}
+	}
+
+	/*
+	 * Get drive descriptors for the drives that are currently in the set.
+	 */
+	curdd = metaget_drivedesc(sp, MD_FULLNAME_ONLY, ep);
+	if (! mdisok(ep))
+		goto rollback;
+
+	/*
+	 * If first drive being added to set, set the mastership
+	 * of the multinode diskset to be this node.
+	 * Only set it on this node.  If all goes well
+	 * and there are no errors, the mastership of this node will be set
+	 * on all nodes in user space and in the kernel.
+	 */
+	if ((MD_MNSET_DESC(sd)) && (curdd == NULL)) {
+		if (clnt_mnsetmaster(mynode(), sp,
+		    sd->sd_mn_mynode->nd_nodename,
+		    sd->sd_mn_mynode->nd_nodeid, ep)) {
+			goto rollback;
+		}
+		/*
+		 * Set this up in my local cache of the set desc so that
+		 * the set descriptor won't have to be gotten again from
+		 * rpc.metad.  If it is flushed and gotten again, these
+		 * values will be set in sr2setdesc.
+		 */
+		sd->sd_mn_master_nodeid = sd->sd_mn_mynode->nd_nodeid;
+		(void) strcpy(sd->sd_mn_master_nodenm,
+		    sd->sd_mn_mynode->nd_nodename);
+		sd->sd_mn_am_i_master = 1;
+	}
+
+	RB_TEST(1, "adddrives", ep)
+
+	RB_PREEMPT;
+	rb_level = 1;	/* level 1 */
+
+	RB_TEST(2, "adddrives", ep)
+
+	/*
+	 * Add the drive records for the drives that we are adding to
+	 * each host in the set.  Marks the drive as MD_DR_ADD.
+	 */
+	if (MD_MNSET_DESC(sd)) {
+		nd = sd->sd_nodelist;
+		/* All nodes are guaranteed to be ALIVE */
+		while (nd) {
+			if (clnt_adddrvs(nd->nd_nodename, sp, dd, now, genid,
+			    ep) == -1)
+				goto rollback;
+
+			RB_TEST(3, "adddrives", ep)
+			nd = nd->nd_next;
+		}
+	} else {
+		for (i = 0; i < MD_MAXSIDES; i++) {
+			/* Skip empty slots */
+			if (sd->sd_nodes[i][0] == '\0')
+				continue;
+
+			if (clnt_adddrvs(sd->sd_nodes[i], sp, dd, now, genid,
+			    ep) == -1)
+				goto rollback;
+
+			RB_TEST(3, "adddrives", ep)
+		}
+	}
+
+	RB_TEST(4, "adddrives", ep)
+
+	RB_PREEMPT;
+	rb_level = 2;	/* level 2 */
+
+	RB_TEST(5, "adddrives", ep)
+
+	/*
+	 * Take ownership of the added drives.
+	 */
+	if (!(MD_MNSET_DESC(sd)) && !MD_ATSET_DESC(sd)) {
+		if (tk_own_bydd(sp, dd, &mhiargs, TRUE, ep))
+			goto rollback;
+	}
+
+	RB_TEST(6, "adddrives", ep)
+
+	RB_PREEMPT;
+	rb_level = 3;	/* level 3 */
+
+	RB_TEST(7, "adddrives", ep)
+
+	/*
+	 * Balance the DB's according to the list of existing drives and the
+	 * list of added drives.
+	 */
+	if ((rval = meta_db_balance(sp, dd, curdd, dbsize, ep)) == -1)
+		goto rollback;
+
+	if ((curdd == NULL) && (MD_MNSET_DESC(sd))) {
+		/*
+		 * Notify rpc.mdcommd on all nodes of a nodelist change.
+		 * Start by suspending rpc.mdcommd (which drains it of all
+		 * messages), then change the nodelist followed by a reinit
+		 * and resume.
+		 */
+		nd = sd->sd_nodelist;
+		/* All nodes are guaranteed to be ALIVE */
+		while (nd) {
+			if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_SUSPEND,
+			    sp, MD_MSG_CLASS0, MD_MSCF_NO_FLAGS, ep)) {
+				rval = -1;
+				goto out;
+			}
+			suspendall_flag = 1;
+			nd = nd->nd_next;
+		}
+	}
+
+	/*
+	 * If a MN diskset and this is the first disk(s) being added
+	 * to set, then pre-allocate change log records here.
+	 * When the other nodes are joined into the MN diskset, the
+	 * USER records will just be snarfed in.
+	 */
+	if ((MD_MNSET_DESC(sd)) && (curdd == NULL)) {
+		if (mdmn_allocate_changelog(sp, ep) != 0)
+			goto rollback;
+	}
+
+	/*
+	 * Mark the drives MD_DR_OK.
+	 * If first drive being added to MN diskset, then set
+	 * master on all nodes to be this node and then join
+	 * all alive nodes (nodes in membership list) to set.
+	 */
+	if (MD_MNSET_DESC(sd)) {
+		nd = sd->sd_nodelist;
+		/* All nodes are guaranteed to be ALIVE */
+		while (nd) {
+			/* don't set master on this node - done earlier */
+			if ((curdd == NULL) && (nd->nd_nodeid !=
+			    sd->sd_mn_mynode->nd_nodeid)) {
+				/*
+				 * Set master on all alive nodes since
+				 * all alive nodes will become joined nodes.
+				 */
+				if (clnt_mnsetmaster(nd->nd_nodename, sp,
+				    sd->sd_mn_mynode->nd_nodename,
+				    sd->sd_mn_mynode->nd_nodeid, ep)) {
+					goto rollback;
+				}
+			}
+
+			if (curdd == NULL) {
+				/*
+				 * No special flags for join set.  Since
+				 * all nodes are joining if 1st drive is being
+				 * added to set then all nodes will be either
+				 * STALE or non-STALE and each node can
+				 * determine this on its own.
+				 */
+				if (clnt_joinset(nd->nd_nodename, sp,
+				    NULL, ep)) {
+					goto rollback;
+				}
+				/* Sets join node flag on all nodes in list */
+				if (clnt_upd_nr_flags(nd->nd_nodename, sp,
+				    sd->sd_nodelist, MD_NR_JOIN, NULL, ep)) {
+					goto rollback;
+				}
+			}
+
+			/*
+			 * Set MD_DR_OK as last thing before unlock.
+			 * In case of panic on this node, recovery
+			 * code can check for MD_DR_OK to determine
+			 * status of diskset.
+			 */
+			if (clnt_upd_dr_flags(nd->nd_nodename, sp, dd,
+			    MD_DR_OK, ep) == -1)
+				goto rollback;
+
+
+			RB_TEST(8, "adddrives", ep)
+			nd = nd->nd_next;
+		}
+	} else {
+		for (i = 0; i < MD_MAXSIDES; i++) {
+			/* Skip empty slots */
+			if (sd->sd_nodes[i][0] == '\0')
+				continue;
+
+			if (clnt_upd_dr_flags(sd->sd_nodes[i], sp, dd, MD_DR_OK,
+			    ep) == -1)
+				goto rollback;
+
+			RB_TEST(8, "adddrives", ep)
+		}
+	}
+
+	RB_TEST(9, "adddrives", ep)
+
+out:
+	/*
+	 * Notify rpc.mdcommd on all nodes of a nodelist change.
+	 * Send reinit command to mdcommd which forces it to get
+	 * fresh set description.
+	 */
+	if (suspendall_flag) {
+		/* Send reinit */
+		nd = sd->sd_nodelist;
+		/* All nodes are guaranteed to be ALIVE */
+		while (nd) {
+			/* Class is ignored for REINIT */
+			if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_REINIT,
+			    sp, NULL, MD_MSCF_NO_FLAGS, &xep)) {
+				if (rval == 0)
+					(void) mdstealerror(ep, &xep);
+				rval = -1;
+				mde_perror(ep, dgettext(TEXT_DOMAIN,
+				    "Unable to reinit rpc.mdcommd.\n"));
+			}
+			nd = nd->nd_next;
+		}
+	}
+	/*
+	 * Unlock diskset by resuming messages across the diskset.
+	 * Just resume all classes so that resume is the same whether
+	 * just one class was locked or all classes were locked.
+	 */
+	if ((suspend1_flag) || (suspendall_flag)) {
+		nd = sd->sd_nodelist;
+		/* All nodes are guaranteed to be ALIVE */
+		while (nd) {
+			if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_RESUME,
+			    sp, MD_MSG_CLASS0, MD_MSCF_NO_FLAGS, &xep)) {
+				if (rval == 0)
+					(void) mdstealerror(ep, &xep);
+				rval = -1;
+				mde_perror(ep, dgettext(TEXT_DOMAIN,
+				    "Unable to resume rpc.mdcommd.\n"));
+			}
+			nd = nd->nd_next;
+		}
+		meta_ping_mnset(sp->setno);
+	}
+
+	if (lock_flag) {
+		cl_sk = cl_get_setkey(sp->setno, sp->setname);
+		if (MD_MNSET_DESC(sd)) {
+			nd = sd->sd_nodelist;
+			/* All nodes are guaranteed to be ALIVE */
+			while (nd) {
+				if (clnt_unlock_set(nd->nd_nodename,
+				    cl_sk, &xep)) {
+					if (rval == 0)
+						(void) mdstealerror(ep, &xep);
+					rval = -1;
+				}
+				nd = nd->nd_next;
+			}
+		} else {
+			for (i = 0; i < MD_MAXSIDES; i++) {
+				/* Skip empty slots */
+				if (sd->sd_nodes[i][0] == '\0')
+					continue;
+
+				if (clnt_unlock_set(sd->sd_nodes[i],
+				    cl_sk, &xep)) {
+					if (rval == 0)
+						(void) mdstealerror(ep, &xep);
+					rval = -1;
+				}
+			}
+		}
+		cl_set_setkey(NULL);
+	}
+
+	metafreedrivedesc(&dd);
+
+	if (flush_set_onerr) {
+		metaflushsetname(sp);
+		if (!(MD_MNSET_DESC(sd))) {
+			md_rb_sig_handling_off(md_got_sig(), md_which_sig());
+		}
+	}
+
+	if (MD_MNSET_DESC(sd)) {
+		/* release signals back to what they were on entry */
+		if (procsigs(FALSE, &oldsigs, &xep) < 0)
+			mdclrerror(&xep);
+	}
+
+	return (rval);
+
+rollback:
+	/* all signals already blocked for MN disket */
+	if (!(MD_MNSET_DESC(sd))) {
+		/* Make sure we are blocking all signals */
+		if (procsigs(TRUE, &oldsigs, &xep) < 0)
+			mdclrerror(&xep);
+	}
+
+	rval = -1;
+
+	max_genid = sd->sd_genid;
+
+	/* level 3 */
+	if (rb_level > 2) {
+		/*
+		 * Since the add drive operation is failing, need
+		 * to reset config back to the way it was
+		 * before the add drive opration.
+		 * If a MN diskset and this is the first drive being added,
+		 * then reset master on all ALIVE nodes (which is all nodes)
+		 * since the master would have not been set previously.
+		 * Don't reset master on this node, since this
+		 * is done later.
+		 * This is ok to fail since next node to add first
+		 * disk to diskset will also set the master on all nodes.
+		 *
+		 * Also, if this is the first drive being added,
+		 * need to have each node withdraw itself from the set.
+		 */
+		if ((MD_MNSET_DESC(sd)) && (curdd == NULL)) {
+			nd = sd->sd_nodelist;
+			/* All nodes are guaranteed to be ALIVE */
+			while (nd) {
+				/*
+				 * Be careful with ordering in case of
+				 * panic between the steps and the
+				 * effect on recovery during reconfig.
+				 */
+				if (clnt_withdrawset(nd->nd_nodename, sp, &xep))
+					mdclrerror(&xep);
+
+				/* Sets withdraw flag on all nodes in list */
+				if (clnt_upd_nr_flags(nd->nd_nodename, sp,
+				    sd->sd_nodelist, MD_NR_WITHDRAW,
+				    NULL, &xep)) {
+					mdclrerror(&xep);
+				}
+
+				/* Skip this node */
+				if (nd->nd_nodeid ==
+				    sd->sd_mn_mynode->nd_nodeid) {
+					nd = nd->nd_next;
+					continue;
+				}
+				/* Reset master on all of the other nodes. */
+				if (clnt_mnsetmaster(nd->nd_nodename, sp,
+				    "", MD_MN_INVALID_NID, &xep))
+					mdclrerror(&xep);
+				nd = nd->nd_next;
+			}
+		}
+	}
+
+	/*
+	 * Send resume command to mdcommd.  Don't send reinit command
+	 * since nodelist should not have changed.
+	 * If suspendall_flag is set, then user would have been adding
+	 * first drives to set.  Since this failed, there is certainly
+	 * no reinit message to send to rpc.commd since no nodes will
+	 * be joined to set at the end of this metaset command.
+	 */
+	if (suspendall_flag) {
+		/* Send resume */
+		nd = sd->sd_nodelist;
+		/* All nodes are guaranteed to be ALIVE */
+		while (nd) {
+			/*
+			 * Resume all classes but class 1 so that lock is held
+			 * against meta* commands.
+			 * To later resume class1, must issue a class0 resume.
+			 */
+			if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_RESUME,
+			    sp, MD_MSG_CLASS0,
+			    MD_MSCF_DONT_RESUME_CLASS1, &xep)) {
+				mde_perror(&xep, dgettext(TEXT_DOMAIN,
+				    "Unable to resume rpc.mdcommd.\n"));
+				mdclrerror(&xep);
+			}
+			nd = nd->nd_next;
+		}
+		meta_ping_mnset(sp->setno);
+	}
+
+	/* level 3 */
+	if (rb_level > 2) {
+		mdnamelist_t	*nlp;
+		mdname_t	*np;
+
+		for (ddp = dd; ddp != NULL; ddp = ddp->dd_next) {
+			uint_t	rep_slice;
+
+			if ((meta_replicaslice(ddp->dd_dnp,
+			    &rep_slice, &xep) != 0) ||
+			    ((np = metaslicename(ddp->dd_dnp, rep_slice,
+				&xep)) == NULL)) {
+				mdclrerror(&xep);
+				continue;
+			}
+			nlp = NULL;
+			(void) metanamelist_append(&nlp, np);
+
+			if (meta_db_detach(sp, nlp,
+			    (MDFORCE_DS | MDFORCE_SET_LOCKED), NULL, &xep))
+				mdclrerror(&xep);
+
+			metafreenamelist(nlp);
+		}
+
+		/* Re-balance */
+		if (meta_db_balance(sp, NULL, curdd, 0, &xep) == -1)
+			mdclrerror(&xep);
+
+		/* Only if we are adding the first drive */
+		/* Handled MN diskset above. */
+		if ((curdd == NULL) && !(MD_MNSET_DESC(sd))) {
+			if (clnt_stimeout(mynode(), sp, &defmhiargs,
+			    &xep) == -1)
+				mdclrerror(&xep);
+
+			/* This is needed because of a corner case */
+			if (halt_set(sp, &xep))
+				mdclrerror(&xep);
+		}
+		max_genid++;
+	}
+
+	/* level 2 */
+	if (rb_level > 1) {
+		if (!(MD_MNSET_DESC(sd)) && !MD_ATSET_DESC(sd)) {
+			if (rel_own_bydd(sp, dd, TRUE, &xep))
+				mdclrerror(&xep);
+		}
+	}
+
+	/* level 1 */
+	if (rb_level > 0) {
+		if (MD_MNSET_DESC(sd)) {
+			nd = sd->sd_nodelist;
+			/* All nodes are guaranteed to be ALIVE */
+			while (nd) {
+				if (clnt_deldrvs(nd->nd_nodename, sp, dd,
+				    &xep) == -1)
+					mdclrerror(&xep);
+				nd = nd->nd_next;
+			}
+		} else {
+			for (i = 0; i < MD_MAXSIDES; i++) {
+				/* Skip empty slots */
+				if (sd->sd_nodes[i][0] == '\0')
+					continue;
+
+				if (clnt_deldrvs(sd->sd_nodes[i], sp, dd,
+				    &xep) == -1)
+					mdclrerror(&xep);
+			}
+		}
+		max_genid += 2;
+		resync_genid(sp, sd, max_genid, 0, NULL);
+	}
+
+	if ((suspend1_flag) || (suspendall_flag)) {
+		/* Send resume */
+		nd = sd->sd_nodelist;
+		/* All nodes are guaranteed to be ALIVE */
+		while (nd) {
+			/*
+			 * Just resume all classes so that resume is the
+			 * same whether just one class was locked or all
+			 * classes were locked.
+			 */
+			if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_RESUME,
+			    sp, MD_MSG_CLASS0, MD_MSCF_NO_FLAGS, &xep)) {
+				mdclrerror(&xep);
+			}
+			nd = nd->nd_next;
+		}
+		meta_ping_mnset(sp->setno);
+	}
+
+	/* level 0 */
+	cl_sk = cl_get_setkey(sp->setno, sp->setname);
+	/* Don't test lock flag since guaranteed to be set if in rollback */
+	if (MD_MNSET_DESC(sd)) {
+		/*
+		 * Since the add drive operation is failing, need
+		 * to reset config back to the way it was
+		 * before the add drive opration.
+		 * If a MN diskset and this is the first drive being
+		 * added, then reset master on this node since
+		 * the master would have not been set previously.
+		 * This is ok to fail since next node to add first
+		 * disk to diskset will also set the master on all nodes.
+		 */
+		if (curdd == NULL) {
+			/* Reset master on mynode */
+			if (clnt_mnsetmaster(mynode(), sp, "",
+			    MD_MN_INVALID_NID, &xep))
+				mdclrerror(&xep);
+		}
+		nd = sd->sd_nodelist;
+		/* All nodes are guaranteed to be ALIVE */
+		while (nd) {
+			if (clnt_unlock_set(nd->nd_nodename, cl_sk, &xep))
+				mdclrerror(&xep);
+			nd = nd->nd_next;
+		}
+	} else {
+		for (i = 0; i < MD_MAXSIDES; i++) {
+			/* Skip empty slots */
+			if (sd->sd_nodes[i][0] == '\0')
+				continue;
+
+			if (clnt_unlock_set(sd->sd_nodes[i], cl_sk, &xep))
+				mdclrerror(&xep);
+		}
+	}
+	cl_set_setkey(NULL);
+
+	/* release signals back to what they were on entry */
+	if (procsigs(FALSE, &oldsigs, &xep) < 0)
+		mdclrerror(&xep);
+
+	metafreedrivedesc(&dd);
+
+	if (flush_set_onerr) {
+		metaflushsetname(sp);
+		if (!(MD_MNSET_DESC(sd))) {
+			md_rb_sig_handling_off(md_got_sig(), md_which_sig());
+		}
+	}
+
+	return (rval);
+}
+
+int
+meta_set_deletedrives(
+	mdsetname_t		*sp,
+	mddrivenamelist_t	*dnlp,
+	int			forceflg,
+	md_error_t		*ep
+)
+{
+	md_set_desc		*sd;
+	md_drive_desc		*ddp, *dd = NULL, *curdd = NULL;
+	md_replicalist_t	*rlp = NULL, *rl;
+	mddrivenamelist_t	*p;
+	int			deldrvcnt = 0;
+	int			rval = 0;
+	mhd_mhiargs_t		mhiargs;
+	int			i;
+	sigset_t		oldsigs;
+	md_setkey_t		*cl_sk;
+	ulong_t			max_genid = 0;
+	int			rb_level = 0;
+	md_error_t		xep = mdnullerror;
+	md_mnnode_desc		*nd;
+	int			has_set;
+	int			current_drv_cnt = 0;
+	int			suspendall_flag = 0, suspendall_flag_rb = 0;
+	int			suspend1_flag = 0;
+	int			lock_flag = 0;
+	bool_t			stale_bool = FALSE;
+	int			flush_set_onerr = 0;
+	mdnamelist_t		*nlp;
+	mdname_t		*np;
+
+	if ((sd = metaget_setdesc(sp, ep)) == NULL)
+		return (-1);
+
+	/* Make sure we own the set */
+	if (meta_check_ownership(sp, ep) != 0)
+		return (-1);
+
+	if (drvsuniq(sp, dnlp, ep) == -1)
+		return (-1);
+
+	/*
+	 * Check and see if all the nodes have the set.
+	 *
+	 * The drive and node records are stored in the local mddbs of each
+	 * node in the diskset.  Each node's rpc.metad daemon reads in the set,
+	 * drive and node records from that node's local mddb and caches them
+	 * internally. Any process needing diskset information contacts its
+	 * local rpc.metad to get this information.  Since each node in the
+	 * diskset is independently reading the set information from its local
+	 * mddb, the set, drive and node records in the local mddbs must stay
+	 * in-sync, so that all nodes have a consistent view of the diskset.
+	 *
+	 * For a multinode diskset, explicitly verify that all nodes in the
+	 * diskset are ALIVE (i.e. are in the API membership list).  Otherwise,
+	 * fail this operation since all nodes must be ALIVE in order to delete
+	 * a drive record from their local mddb.  If a panic of this node
+	 * leaves the local mddbs set, node and drive records out-of-sync, the
+	 * reconfig cycle will fix the local mddbs and force them back into
+	 * synchronization.
+	 */
+	if (MD_MNSET_DESC(sd)) {
+		nd = sd->sd_nodelist;
+		while (nd) {
+			if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+				(void) mddserror(ep, MDE_DS_NOTINMEMBERLIST,
+					sp->setno,
+					nd->nd_nodename, NULL, sp->setname);
+				return (-1);
+			}
+			nd = nd->nd_next;
+		}
+
+		/* Make sure we are blocking all signals */
+		if (procsigs(TRUE, &oldsigs, &xep) < 0)
+			mdclrerror(&xep);
+
+		/*
+		 * Lock the set on current set members.
+		 * Set locking done much earlier for MN diskset than for
+		 * traditional diskset since lock_set and SUSPEND are used
+		 * to protect against other meta* commands running on the
+		 * other nodes.
+		 */
+		nd = sd->sd_nodelist;
+		/* All nodes are guaranteed to be ALIVE */
+		while (nd) {
+			if (clnt_lock_set(nd->nd_nodename, sp, ep)) {
+				rval = -1;
+				goto out;
+			}
+			lock_flag = 1;
+			nd = nd->nd_next;
+		}
+		/*
+		 * Lock out other meta* commands by suspending
+		 * class 1 messages across the diskset.
+		 */
+		nd = sd->sd_nodelist;
+		/* All nodes are guaranteed to be ALIVE */
+		while (nd) {
+			if (clnt_mdcommdctl(nd->nd_nodename,
+			    COMMDCTL_SUSPEND, sp, MD_MSG_CLASS1,
+			    MD_MSCF_NO_FLAGS, ep)) {
+				rval = -1;
+				goto out;
+			}
+			suspend1_flag = 1;
+			nd = nd->nd_next;
+		}
+
+		nd = sd->sd_nodelist;
+		/* All nodes are guaranteed to be ALIVE */
+		while (nd) {
+			if (strcmp(nd->nd_nodename, mynode()) == 0) {
+				nd = nd->nd_next;
+				continue;
+			}
+
+			has_set = nodehasset(sp, nd->nd_nodename,
+				    NHS_NSTG_EQ, ep);
+			if (has_set < 0) {
+				rval = -1;
+				goto out;
+			}
+
+			if (! has_set) {
+				(void) mddserror(ep, MDE_DS_NODENOSET,
+					sp->setno, nd->nd_nodename,
+					NULL, sp->setname);
+				rval = -1;
+				goto out;
+			}
+			nd = nd->nd_next;
+		}
+	} else {
+		for (i = 0; i < MD_MAXSIDES; i++) {
+			/* Skip empty slots */
+			if (sd->sd_nodes[i][0] == '\0')
+				continue;
+
+			if (strcmp(sd->sd_nodes[i], mynode()) == 0)
+				continue;
+
+			has_set = nodehasset(sp, sd->sd_nodes[i], NHS_NSTG_EQ,
+				ep);
+			if (has_set < 0) {
+				/*
+				 * Can directly return since !MN diskset;
+				 * nothing to unlock.
+				 */
+				return (-1);
+			}
+
+			if (! has_set) {
+				/*
+				 * Can directly return since !MN diskset;
+				 * nothing to unlock.
+				 */
+				return (mddserror(ep, MDE_DS_NODENOSET,
+				    sp->setno, sd->sd_nodes[i], NULL,
+				    sp->setname));
+			}
+		}
+	}
+
+	for (p = dnlp; p != NULL; p = p->next) {
+		int		is_it;
+		mddrivename_t	*dnp;
+
+		dnp = p->drivenamep;
+
+		if ((is_it = meta_is_drive_in_thisset(sp, dnp, FALSE, ep))
+		    == -1) {
+			rval = -1;
+			goto out;
+		}
+
+		if (! is_it) {
+			(void) mddserror(ep, MDE_DS_DRIVENOTINSET, sp->setno,
+			    NULL, dnp->cname, sp->setname);
+			rval = -1;
+			goto out;
+		}
+
+		if ((meta_check_drive_inuse(sp, dnp, FALSE, ep)) == -1) {
+			rval = -1;
+			goto out;
+		}
+
+		deldrvcnt++;
+	}
+	current_drv_cnt = deldrvcnt;
+
+	/*
+	 * Get drive descriptors for the drives that are currently in the set.
+	 */
+	curdd = metaget_drivedesc(sp, MD_BASICNAME_OK, ep);
+	if (! mdisok(ep)) {
+		rval = -1;
+		goto out;
+	}
+
+	/*
+	 * Decrement the the delete drive count for each drive currently in the
+	 * set.
+	 */
+	for (ddp = curdd; ddp != NULL; ddp = ddp->dd_next)
+		deldrvcnt--;
+
+	/*
+	 * If the count of drives we are deleting is equal to the drives in the
+	 * set, and we haven't specified forceflg, return an error
+	 */
+	if (deldrvcnt == 0 && forceflg == FALSE) {
+		(void) mderror(ep, MDE_FORCE_DEL_ALL_DRV, NULL);
+		rval = -1;
+		goto out;
+	}
+
+	/*
+	 * Get the list of drive descriptors that we are deleting.
+	 */
+	dd = metaget_drivedesc_fromdrivelist(sp, dnlp, MD_DR_DEL, ep);
+	if (! mdisok(ep)) {
+		rval = -1;
+		goto out;
+	}
+
+	/*
+	 * Get the set timeout information in case we have to roll back.
+	 */
+	(void) memset(&mhiargs, '\0', sizeof (mhiargs));
+	if (clnt_gtimeout(mynode(), sp, &mhiargs, ep) == -1) {
+		rval = -1;
+		goto out;
+	}
+
+	/* At this point, in case of error, set should be flushed. */
+	flush_set_onerr = 1;
+
+	/* END CHECK CODE */
+
+	/* Lock the set on current set members */
+	if (!(MD_MNSET_DESC(sd))) {
+		md_rb_sig_handling_on();
+		for (i = 0; i < MD_MAXSIDES; i++) {
+			/* Skip empty slots */
+			if (sd->sd_nodes[i][0] == '\0')
+				continue;
+
+			if (clnt_lock_set(sd->sd_nodes[i], sp, ep)) {
+				rval = -1;
+				goto out;
+			}
+			lock_flag = 1;
+		}
+	}
+
+	if ((deldrvcnt == 0) && (MD_MNSET_DESC(sd))) {
+		mddb_config_t		c;
+		/*
+		 * Is current set STALE?
+		 */
+		(void) memset(&c, 0, sizeof (c));
+		c.c_id = 0;
+		c.c_setno = sp->setno;
+		if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0) {
+			(void) mdstealerror(ep, &c.c_mde);
+			rval = -1;
+			goto out;
+		}
+		if (c.c_flags & MDDB_C_STALE) {
+			stale_bool = TRUE;
+		}
+	}
+
+	RB_TEST(1, "deletedrives", ep)
+
+	RB_PREEMPT;
+	rb_level = 1;	/* level 1 */
+
+	RB_TEST(2, "deletedrives", ep)
+
+	/*
+	 * Mark the drives MD_DR_DEL
+	 */
+	if (MD_MNSET_DESC(sd)) {
+		nd = sd->sd_nodelist;
+		/* All nodes are guaranteed to be ALIVE */
+		while (nd) {
+			if (clnt_upd_dr_flags(nd->nd_nodename, sp, dd,
+			    MD_DR_DEL, ep) == -1)
+				goto rollback;
+
+			RB_TEST(3, "deletedrives", ep)
+			nd = nd->nd_next;
+		}
+	} else {
+		for (i = 0; i < MD_MAXSIDES; i++) {
+			/* Skip empty slots */
+			if (sd->sd_nodes[i][0] == '\0')
+				continue;
+
+			if (clnt_upd_dr_flags(sd->sd_nodes[i], sp, dd,
+			    MD_DR_DEL, ep) == -1)
+				goto rollback;
+
+			RB_TEST(3, "deletedrives", ep)
+		}
+	}
+
+	RB_TEST(4, "deletedrives", ep)
+
+	RB_PREEMPT;
+	rb_level = 2;	/* level 2 */
+
+	RB_TEST(5, "deletedrives", ep)
+
+	/*
+	 * Balance the DB's according to the list of existing drives and the
+	 * list of deleted drives.
+	 */
+	if (meta_db_balance(sp, dd, curdd, 0, ep) == -1)
+		goto rollback;
+
+	/*
+	 * If the drive(s) to be deleted cannot be accessed,
+	 * they haven't really been deleted yet. Check and delete now
+	 * if need be.
+	 */
+	if (metareplicalist(sp, MD_BASICNAME_OK, &rlp, ep) >= 0) {
+		nlp = NULL;
+		for (ddp = dd; ddp != NULL; ddp = ddp->dd_next) {
+			char	*delete_name;
+
+			delete_name = ddp->dd_dnp->cname;
+
+			for (rl = rlp; rl != NULL; rl = rl->rl_next) {
+				char	*cur_name;
+
+				cur_name =
+				    rl->rl_repp->r_namep->drivenamep->cname;
+
+				if (strcmp(delete_name, cur_name) == 0) {
+					/* put it on the delete list */
+					np = rl->rl_repp->r_namep;
+					(void) metanamelist_append(&nlp, np);
+
+				}
+			}
+		}
+
+		if (nlp != NULL) {
+			if (meta_db_detach(sp, nlp,
+			    (MDFORCE_DS | MDFORCE_SET_LOCKED), NULL,
+			    ep) == -1) {
+				metafreenamelist(nlp);
+				goto rollback;
+			}
+			metafreenamelist(nlp);
+		}
+	}
+
+	RB_TEST(6, "deletedrives", ep)
+
+	RB_PREEMPT;
+	rb_level = 3;	/* level 3 */
+
+	RB_TEST(7, "deletedrives", ep)
+
+	/*
+	 * Cannot suspend set until after meta_db_balance since
+	 * meta_db_balance uses META_DB_ATTACH/DETACH messages.
+	 */
+	if ((deldrvcnt == 0) && (MD_MNSET_DESC(sd))) {
+		/*
+		 * Notify rpc.mdcommd on all nodes of a nodelist change.
+		 * Start by suspending rpc.mdcommd (which drains it of all
+		 * messages), then change the nodelist followed by a reinit
+		 * and resume.
+		 */
+		nd = sd->sd_nodelist;
+		/* All nodes are guaranteed to be ALIVE */
+		while (nd) {
+			if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_SUSPEND,
+			    sp, MD_MSG_CLASS0, MD_MSCF_NO_FLAGS, ep)) {
+				rval = -1;
+				goto out;
+			}
+			suspendall_flag = 1;
+			nd = nd->nd_next;
+		}
+	}
+
+	/*
+	 * Remove the drive records for the drives that were deleted from
+	 * each host in the set.  This removes the record and dr_flags.
+	 */
+	if (MD_MNSET_DESC(sd)) {
+		nd = sd->sd_nodelist;
+		/* All nodes are guaranteed to be ALIVE */
+		while (nd) {
+			if (clnt_deldrvs(nd->nd_nodename, sp, dd, ep) == -1)
+				goto rollback;
+
+			RB_TEST(8, "deletedrives", ep)
+			nd = nd->nd_next;
+		}
+	} else {
+		for (i = 0; i < MD_MAXSIDES; i++) {
+			/* Skip empty slots */
+			if (sd->sd_nodes[i][0] == '\0')
+				continue;
+
+			if (clnt_deldrvs(sd->sd_nodes[i], sp, dd, ep) == -1)
+				goto rollback;
+
+			RB_TEST(8, "deletedrives", ep)
+		}
+	}
+
+	RB_TEST(9, "deletedrives", ep)
+
+	RB_PREEMPT;
+	rb_level = 4;	/* level 4 */
+
+	RB_TEST(10, "deletedrives", ep)
+
+	if (!(MD_MNSET_DESC(sd)) && !MD_ATSET_DESC(sd)) {
+		if (rel_own_bydd(sp, dd, TRUE, ep))
+			goto rollback;
+	}
+
+	/* If we deleted all the drives, then we need to halt the set. */
+	if (deldrvcnt == 0) {
+		RB_TEST(11, "deletedrives", ep)
+
+		RB_PREEMPT;
+		rb_level = 5;	/* level 5 */
+
+		RB_TEST(12, "deletedrives", ep)
+
+		if (clnt_stimeout(mynode(), sp, &defmhiargs, ep) == -1)
+			goto rollback;
+
+		RB_TEST(13, "deletedrives", ep)
+
+		RB_PREEMPT;
+		rb_level = 6;	/* level 6 */
+
+		RB_TEST(14, "deletedrives", ep)
+
+		/* Halt MN diskset on all nodes by having node withdraw */
+		if (MD_MNSET_DESC(sd)) {
+			nd = sd->sd_nodelist;
+			/* All nodes are guaranteed to be ALIVE */
+			while (nd) {
+				/* Only withdraw nodes that are joined */
+				if (!(nd->nd_flags & MD_MN_NODE_OWN)) {
+					nd = nd->nd_next;
+					continue;
+				}
+				/*
+				 * Going to set locally cached node flags to
+				 * rollback join so in case of error, the
+				 * rollback code knows which nodes to re-join.
+				 */
+				nd->nd_flags |= MD_MN_NODE_RB_JOIN;
+
+				/*
+				 * Be careful in ordering of following steps
+				 * so that recovery from a panic between
+				 * the steps is viable.
+				 * Only reset master info in rpc.metad -
+				 * don't reset local cached information
+				 * which will be used to set master information
+				 * back in case of failure (rollback).
+				 */
+				if (clnt_withdrawset(nd->nd_nodename, sp, ep))
+					goto rollback;
+				/* Sets withdraw flag on all nodes in list */
+				if (clnt_upd_nr_flags(nd->nd_nodename, sp,
+				    sd->sd_nodelist, MD_NR_WITHDRAW,
+				    NULL, ep)) {
+					goto rollback;
+				}
+				if (clnt_mnsetmaster(nd->nd_nodename, sp,
+				    "", MD_MN_INVALID_NID, ep)) {
+					goto rollback;
+				}
+				nd = nd->nd_next;
+			}
+		} else {
+			if (halt_set(sp, ep))
+				goto rollback;
+		}
+
+		RB_TEST(15, "deletedrives", ep)
+	}
+
+	RB_TEST(16, "deletedrives", ep)
+
+out:
+	/*
+	 * Notify rpc.mdcommd on all nodes of a nodelist change.
+	 * Send reinit command to mdcommd which forces it to get
+	 * fresh set description.
+	 */
+	if (suspendall_flag) {
+		/* Send reinit */
+		nd = sd->sd_nodelist;
+		/* All nodes are guaranteed to be ALIVE */
+		while (nd) {
+			/* Class is ignored for REINIT */
+			if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_REINIT,
+			    sp, NULL, MD_MSCF_NO_FLAGS, &xep)) {
+				if (rval == 0)
+					(void) mdstealerror(ep, &xep);
+				rval = -1;
+				mde_perror(ep, dgettext(TEXT_DOMAIN,
+				    "Unable to reinit rpc.mdcommd.\n"));
+			}
+			nd = nd->nd_next;
+		}
+	}
+
+	/*
+	 * Just resume all classes so that resume is the same whether
+	 * just one class was locked or all classes were locked.
+	 */
+	if ((suspend1_flag) || (suspendall_flag)) {
+		/* Send resume */
+		nd = sd->sd_nodelist;
+		/* All nodes are guaranteed to be ALIVE */
+		while (nd) {
+			if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_RESUME,
+			    sp, MD_MSG_CLASS0, MD_MSCF_NO_FLAGS, &xep)) {
+				if (rval == 0)
+					(void) mdstealerror(ep, &xep);
+				rval = -1;
+				mde_perror(ep, dgettext(TEXT_DOMAIN,
+				    "Unable to resume rpc.mdcommd.\n"));
+			}
+			nd = nd->nd_next;
+		}
+		meta_ping_mnset(sp->setno);
+	}
+	if (lock_flag) {
+		cl_sk = cl_get_setkey(sp->setno, sp->setname);
+		if (MD_MNSET_DESC(sd)) {
+			nd = sd->sd_nodelist;
+			/* All nodes are guaranteed to be ALIVE */
+			while (nd) {
+				if (clnt_unlock_set(nd->nd_nodename,
+				    cl_sk, &xep)) {
+					if (rval == 0)
+						(void) mdstealerror(ep, &xep);
+					rval = -1;
+				}
+				nd = nd->nd_next;
+			}
+		} else {
+			for (i = 0; i < MD_MAXSIDES; i++) {
+				/* Skip empty slots */
+				if (sd->sd_nodes[i][0] == '\0')
+					continue;
+
+				if (clnt_unlock_set(sd->sd_nodes[i],
+				    cl_sk, &xep)) {
+					if (rval == 0)
+						(void) mdstealerror(ep, &xep);
+					rval = -1;
+				}
+			}
+		}
+		cl_set_setkey(NULL);
+	}
+
+	metafreedrivedesc(&dd);
+
+	if (flush_set_onerr) {
+		metaflushsetname(sp);
+		if (!(MD_MNSET_DESC(sd))) {
+			md_rb_sig_handling_off(md_got_sig(), md_which_sig());
+		}
+	}
+
+	if (MD_MNSET_DESC(sd)) {
+		/* release signals back to what they were on entry */
+		if (procsigs(FALSE, &oldsigs, &xep) < 0)
+			mdclrerror(&xep);
+	}
+
+	return (rval);
+
+rollback:
+	/* all signals already blocked for MN disket */
+	if (!(MD_MNSET_DESC(sd))) {
+		/* Make sure we are blocking all signals */
+		if (procsigs(TRUE, &oldsigs, &xep) < 0)
+			mdclrerror(&xep);
+	}
+
+	rval = -1;
+
+	max_genid = sd->sd_genid;
+
+	/* Set the master on all nodes first thing */
+	if (rb_level > 5) {
+		if (MD_MNSET_DESC(sd)) {
+			nd = sd->sd_nodelist;
+			/* All nodes are guaranteed to be ALIVE */
+			while (nd) {
+				if (!(nd->nd_flags & MD_MN_NODE_RB_JOIN)) {
+					continue;
+				}
+				/*
+				 * Set master on all re-joining nodes to be
+				 * my cached view of master.
+				 */
+				if (clnt_mnsetmaster(nd->nd_nodename, sp,
+				    sd->sd_mn_master_nodenm,
+				    sd->sd_mn_master_nodeid, &xep)) {
+					mdclrerror(&xep);
+				}
+			}
+		}
+	}
+
+	/* level 3 */
+	if (rb_level > 2) {
+		md_set_record		*sr;
+		md_mnset_record		*mnsr;
+		md_drive_record		*dr;
+		int			sr_drive_cnt;
+
+		/*
+		 * See if we have to re-add the drives specified.
+		 */
+		if (MD_MNSET_DESC(sd)) {
+			nd = sd->sd_nodelist;
+			/* All nodes are guaranteed to be ALIVE */
+			while (nd) {
+				/*
+				 * Must get current set record from each
+				 * node to see what else must be done
+				 * to recover.
+				 * Record should be for a multi-node diskset.
+				 */
+				if (clnt_mngetset(nd->nd_nodename, sp->setname,
+				    MD_SET_BAD, &mnsr, &xep) == -1) {
+					mdclrerror(&xep);
+					nd = nd->nd_next;
+					continue;
+				}
+
+				/*
+				 * If all drives are already there, skip
+				 * to next node.
+				 */
+				sr_drive_cnt = 0;
+				dr = mnsr->sr_drivechain;
+				while (dr) {
+					sr_drive_cnt++;
+					dr = dr->dr_next;
+				}
+				if (sr_drive_cnt == current_drv_cnt) {
+					free_sr((md_set_record *)mnsr);
+					nd = nd->nd_next;
+					continue;
+				}
+
+				/* Readd all drives */
+				if (clnt_adddrvs(nd->nd_nodename, sp, dd,
+				    mnsr->sr_ctime, mnsr->sr_genid, &xep) == -1)
+					mdclrerror(&xep);
+
+				free_sr((struct md_set_record *)mnsr);
+				nd = nd->nd_next;
+			}
+		} else {
+			for (i = 0; i < MD_MAXSIDES; i++) {
+				/* Skip empty slots */
+				if (sd->sd_nodes[i][0] == '\0')
+					continue;
+
+				/* Record should be for a non-multi-node set */
+				if (clnt_getset(sd->sd_nodes[i], sp->setname,
+				    MD_SET_BAD, &sr, &xep) == -1) {
+					mdclrerror(&xep);
+					continue;
+				}
+
+				/*
+				 * Set record structure was allocated from RPC
+				 * routine getset so this structure is only of
+				 * size md_set_record even if the MN flag is
+				 * set.  So, clear the flag so that the free
+				 * code doesn't attempt to free a structure
+				 * the size of md_mnset_record.
+				 */
+				if (MD_MNSET_REC(sr)) {
+					sr->sr_flags &= ~MD_SR_MN;
+					free_sr(sr);
+					continue;
+				}
+
+				/* Drive already added, skip to next node */
+				if (sr->sr_drivechain != NULL) {
+					free_sr(sr);
+					continue;
+				}
+
+				if (clnt_adddrvs(sd->sd_nodes[i], sp, dd,
+				    sr->sr_ctime, sr->sr_genid, &xep) == -1)
+					mdclrerror(&xep);
+
+				free_sr(sr);
+			}
+		}
+		max_genid += 2;
+	}
+
+	/*
+	 * Notify rpc.mdcommd on all nodes of a nodelist change.
+	 * At this point in time, don't know which nodes are joined
+	 * to the set.  So, send a reinit command to mdcommd
+	 * which forces it to get fresh set description.  Then send resume.
+	 *
+	 * Later, this code will use rpc.mdcommd messages to reattach disks
+	 * and then rpc.mdcommd may be suspended again, rest of the nodes
+	 * joined, rpc.mdcommd reinited and then resumed.
+	 */
+	if (suspendall_flag) {
+		/* Send reinit */
+		nd = sd->sd_nodelist;
+		/* All nodes are guaranteed to be ALIVE */
+		while (nd) {
+			/* Class is ignored for REINIT */
+			if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_REINIT,
+			    sp, NULL, MD_MSCF_NO_FLAGS, &xep)) {
+				mde_perror(&xep, dgettext(TEXT_DOMAIN,
+				    "Unable to reinit rpc.mdcommd.\n"));
+				mdclrerror(&xep);
+			}
+			nd = nd->nd_next;
+		}
+
+		/* Send resume */
+		nd = sd->sd_nodelist;
+		/* All nodes are guaranteed to be ALIVE */
+		while (nd) {
+			/*
+			 * Resume all classes but class 1 so that lock is held
+			 * against meta* commands.
+			 * To later resume class1, must issue a class0 resume.
+			 */
+			if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_RESUME,
+			    sp, MD_MSG_CLASS0,
+			    MD_MSCF_DONT_RESUME_CLASS1, &xep)) {
+				mde_perror(&xep, dgettext(TEXT_DOMAIN,
+				    "Unable to resume rpc.mdcommd.\n"));
+				mdclrerror(&xep);
+			}
+			nd = nd->nd_next;
+		}
+		meta_ping_mnset(sp->setno);
+	}
+
+	/* level 2 */
+	if (rb_level > 1) {
+		mdnamelist_t	*nlp;
+		mdname_t	*np;
+
+		for (ddp = dd; ddp != NULL; ddp = ddp->dd_next) {
+			uint_t	rep_slice;
+
+			if ((meta_replicaslice(ddp->dd_dnp,
+			    &rep_slice, &xep) != 0) ||
+			    ((np = metaslicename(ddp->dd_dnp, rep_slice,
+				&xep)) == NULL)) {
+				mdclrerror(&xep);
+				continue;
+			}
+			nlp = NULL;
+			(void) metanamelist_append(&nlp, np);
+
+			if (meta_db_attach(sp, nlp,
+			    (MDCHK_DRVINSET | MDCHK_SET_LOCKED),
+			    &sd->sd_ctime, ddp->dd_dbcnt, ddp->dd_dbsize,
+			    NULL, &xep) == -1)
+				mdclrerror(&xep);
+
+			metafreenamelist(nlp);
+		}
+		/* Re-balance */
+		if (meta_db_balance(sp, NULL, curdd, 0, &xep) == -1)
+			mdclrerror(&xep);
+	}
+
+	/* level 4 */
+	if (rb_level > 3) {
+		if (!(MD_MNSET_DESC(sd)) && !MD_ATSET_DESC(sd)) {
+			if (tk_own_bydd(sp, dd, &mhiargs, TRUE, &xep))
+				mdclrerror(&xep);
+		}
+	}
+
+	/* level 5 */
+	if (rb_level > 4) {
+		if (clnt_stimeout(mynode(), sp, &mhiargs, &xep) == -1)
+			mdclrerror(&xep);
+	}
+
+	/*
+	 * If at least one node needs to be rejoined to MN diskset,
+	 * then suspend commd again.
+	 */
+	if (MD_MNSET_DESC(sd)) {
+		nd = sd->sd_nodelist;
+		/* All nodes are guaranteed to be ALIVE */
+		while (nd) {
+			if (!(nd->nd_flags & MD_MN_NODE_RB_JOIN)) {
+				nd = nd->nd_next;
+				continue;
+			}
+			break;
+		}
+		if (nd) {
+			/*
+			 * Found node that will be rejoined so
+			 * notify rpc.mdcommd on all nodes of a nodelist change.
+			 * Start by suspending rpc.mdcommd (which drains it of
+			 * all messages), then change the nodelist followed by
+			 * a reinit and resume.
+			 */
+			nd = sd->sd_nodelist;
+			/* All nodes are guaranteed to be ALIVE */
+			while (nd) {
+				if (clnt_mdcommdctl(nd->nd_nodename,
+				    COMMDCTL_SUSPEND, sp, MD_MSG_CLASS0,
+				    MD_MSCF_NO_FLAGS, &xep)) {
+					mdclrerror(&xep);
+				}
+				suspendall_flag_rb = 1;
+				nd = nd->nd_next;
+			}
+		}
+	}
+
+
+
+	/* level 6 */
+	if (rb_level > 5) {
+		if (MD_MNSET_DESC(sd)) {
+			int	join_flags = 0;
+
+			nd = sd->sd_nodelist;
+			/* All nodes are guaranteed to be ALIVE */
+			while (nd) {
+				/* Only rejoin nodes that were joined before */
+				if (!(nd->nd_flags & MD_MN_NODE_RB_JOIN)) {
+					nd = nd->nd_next;
+					continue;
+				}
+				/*
+				 * Rejoin nodes to same state as before -
+				 * either STALE or non-STALE.
+				 */
+				if (stale_bool == TRUE)
+					join_flags = MNSET_IS_STALE;
+				if (clnt_joinset(nd->nd_nodename, sp,
+				    join_flags, &xep))
+					mdclrerror(&xep);
+				/* Sets OWN flag on all nodes in list */
+				if (clnt_upd_nr_flags(nd->nd_nodename, sp,
+				    sd->sd_nodelist, MD_NR_JOIN, NULL, &xep)) {
+					mdclrerror(&xep);
+				}
+				nd = nd->nd_next;
+			}
+		} else {
+			if (setup_db_bydd(sp, dd, TRUE, &xep) == -1)
+				mdclrerror(&xep);
+
+			/* No special flag for traditional diskset */
+			if (snarf_set(sp, NULL, &xep))
+				mdclrerror(&xep);
+		}
+	}
+
+	/* level 1 */
+	if (rb_level > 0) {
+		/*
+		 * Mark the drives as OK.
+		 */
+		if (MD_MNSET_DESC(sd)) {
+			nd = sd->sd_nodelist;
+			/* All nodes are guaranteed to be ALIVE */
+			while (nd) {
+				/*
+				 * Must be last action before unlock.
+				 * In case of panic, recovery code checks
+				 * for MD_DR_OK to know that drive
+				 * and possible master are fully added back.
+				 */
+				if (clnt_upd_dr_flags(nd->nd_nodename, sp, dd,
+				    MD_DR_OK, &xep) == -1)
+					mdclrerror(&xep);
+				nd = nd->nd_next;
+			}
+		} else {
+			for (i = 0; i < MD_MAXSIDES; i++) {
+				/* Skip empty slots */
+				if (sd->sd_nodes[i][0] == '\0')
+					continue;
+
+				if (clnt_upd_dr_flags(sd->sd_nodes[i], sp, dd,
+				    MD_DR_OK, &xep) == -1)
+					mdclrerror(&xep);
+
+			}
+		}
+		max_genid += 2;
+		resync_genid(sp, sd, max_genid, 0, NULL);
+	}
+	/*
+	 * Notify rpc.mdcommd on all nodes of a nodelist change.
+	 * Send a reinit command to mdcommd which forces it to get
+	 * fresh set description.
+	 */
+	if (suspendall_flag_rb) {
+		/* Send reinit */
+		nd = sd->sd_nodelist;
+		/* All nodes are guaranteed to be ALIVE */
+		while (nd) {
+			/* Class is ignored for REINIT */
+			if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_REINIT,
+			    sp, NULL, MD_MSCF_NO_FLAGS, &xep)) {
+				mde_perror(&xep, dgettext(TEXT_DOMAIN,
+				    "Unable to reinit rpc.mdcommd.\n"));
+				mdclrerror(&xep);
+			}
+			nd = nd->nd_next;
+		}
+	}
+
+	/*
+	 * Just resume all classes so that resume is the same whether
+	 * just one class was locked or all classes were locked.
+	 */
+	if ((suspend1_flag) || (suspendall_flag_rb) || (suspendall_flag)) {
+		/* Send resume */
+		nd = sd->sd_nodelist;
+		/* All nodes are guaranteed to be ALIVE */
+		while (nd) {
+			if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_RESUME,
+			    sp, MD_MSG_CLASS0, MD_MSCF_NO_FLAGS, &xep)) {
+				mde_perror(&xep, dgettext(TEXT_DOMAIN,
+				    "Unable to resume rpc.mdcommd.\n"));
+				mdclrerror(&xep);
+			}
+			nd = nd->nd_next;
+		}
+		meta_ping_mnset(sp->setno);
+	}
+
+
+	/* level 0 */
+	cl_sk = cl_get_setkey(sp->setno, sp->setname);
+	/* Don't test lock flag since guaranteed to be set if in rollback */
+	if (MD_MNSET_DESC(sd)) {
+		nd = sd->sd_nodelist;
+		/* All nodes are guaranteed to be ALIVE */
+		while (nd) {
+			if (clnt_unlock_set(nd->nd_nodename, cl_sk, &xep))
+				mdclrerror(&xep);
+			nd = nd->nd_next;
+		}
+	} else {
+		for (i = 0; i < MD_MAXSIDES; i++) {
+			/* Skip empty slots */
+			if (sd->sd_nodes[i][0] == '\0')
+				continue;
+
+			if (clnt_unlock_set(sd->sd_nodes[i], cl_sk, &xep))
+				mdclrerror(&xep);
+		}
+	}
+	cl_set_setkey(NULL);
+
+	/* release signals back to what they were on entry */
+	if (procsigs(FALSE, &oldsigs, &xep) < 0)
+		mdclrerror(&xep);
+
+	metafreedrivedesc(&dd);
+
+	if (flush_set_onerr) {
+		metaflushsetname(sp);
+		if (!(MD_MNSET_DESC(sd))) {
+			md_rb_sig_handling_off(md_got_sig(), md_which_sig());
+		}
+	}
+
+	return (rval);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_set_hst.c b/usr/src/lib/lvm/libmeta/common/meta_set_hst.c
new file mode 100644
index 0000000000..d5e5f43ed1
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_set_hst.c
@@ -0,0 +1,5688 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+/*
+ * Just in case we're not in a build environment, make sure that
+ * TEXT_DOMAIN gets set to something.
+ */
+#if !defined(TEXT_DOMAIN)
+#define	TEXT_DOMAIN "SYS_TEST"
+#endif
+
+/*
+ * Metadevice diskset interfaces
+ */
+
+#include "meta_set_prv.h"
+#include <meta.h>
+#include <sys/lvm/md_crc.h>
+#include <sys/time.h>
+#include <sdssc.h>
+
+static int
+add_db_sidenms(
+	mdsetname_t	*sp,
+	md_error_t	*ep
+)
+{
+	md_replicalist_t	*rlp = NULL;
+	md_replicalist_t	*rl;
+	int			rval = 0;
+
+	if (metareplicalist(sp, MD_FULLNAME_ONLY, &rlp, ep) < 0)
+		return (-1);
+
+	for (rl = rlp; rl != NULL; rl = rl->rl_next) {
+		md_replica_t	*r = rl->rl_repp;
+
+		/*
+		 * This is not the first replica being added to the
+		 * diskset so call with ADDSIDENMS_BCAST.  If this
+		 * is a traditional diskset, the bcast flag is ignored
+		 * since traditional disksets don't use the rpc.mdcommd.
+		 */
+		if (meta_db_addsidenms(sp, r->r_namep, r->r_blkno,
+		    DB_ADDSIDENMS_BCAST, ep)) {
+			rval = -1;
+			goto out;
+		}
+	}
+
+out:
+	metafreereplicalist(rlp);
+	return (rval);
+}
+
+static int
+add_drvs_to_hosts(
+	mdsetname_t	*sp,
+	int		node_c,
+	char		**node_v,
+	md_error_t	*ep
+)
+{
+	int		i;
+	md_set_desc	*sd;
+	md_drive_desc	*dd;
+	md_timeval32_t	now;
+	ulong_t		genid;
+
+	if ((sd = metaget_setdesc(sp, ep)) == NULL)
+		return (-1);
+
+	if ((dd = metaget_drivedesc(sp, MD_FULLNAME_ONLY, ep)) == NULL) {
+		if (! mdisok(ep))
+			return (-1);
+		return (0);
+	}
+
+	now = sd->sd_ctime;
+	genid = sd->sd_genid - 1;
+
+	for (i = 0; i < node_c; i++) {
+		if (clnt_adddrvs(node_v[i], sp, dd, now, genid, ep) == -1)
+			return (-1);
+	}
+
+	return (0);
+}
+
+static int
+add_md_sidenms(mdsetname_t *sp, side_t sideno, side_t otherside, md_error_t *ep)
+{
+	mdnm_params_t	nm;
+	char		*cname, *dname;
+	side_t		tmp_sideno;
+	minor_t		mnum;
+	int		done, i;
+	int		rval = 0;
+	md_set_desc	*sd;
+
+	(void) memset(&nm, '\0', sizeof (nm));
+	nm.key   = MD_KEYWILD;
+
+	if (!metaislocalset(sp)) {
+		if ((sd = metaget_setdesc(sp, ep)) == NULL)
+			return (-1);
+	}
+	/* Use rpc.mdcommd to add md side info from all nodes */
+	if ((! metaislocalset(sp)) && MD_MNSET_DESC(sd) &&
+	    (sd->sd_mn_mynode->nd_flags & MD_MN_NODE_OWN)) {
+		md_mn_result_t			*resultp = NULL;
+		md_mn_msg_meta_md_addside_t	md_as;
+		int				send_rval;
+
+		md_as.msg_sideno = sideno;
+		md_as.msg_otherside = otherside;
+		/*
+		 * If reconfig cycle has been started, this node is stuck in
+		 * in the return step until this command has completed.  If
+		 * mdcommd is suspended, ask send_message to fail (instead of
+		 * retrying) so that metaset can finish allowing the
+		 * reconfig cycle to proceed.
+		 */
+		send_rval = mdmn_send_message(sp->setno,
+		    MD_MN_MSG_META_MD_ADDSIDE,
+		    MD_MSGF_FAIL_ON_SUSPEND | MD_MSGF_PANIC_WHEN_INCONSISTENT,
+		    (char *)&md_as, sizeof (md_mn_msg_meta_md_addside_t),
+		    &resultp, ep);
+		if (send_rval != 0) {
+			(void) mdstealerror(ep, &(resultp->mmr_ep));
+			if (resultp)
+				free_result(resultp);
+			return (-1);
+		}
+		if (resultp)
+			free_result(resultp);
+		return (0);
+	} else {
+		/*CONSTCOND*/
+		while (1) {
+			nm.mde   = mdnullerror;
+			nm.setno = sp->setno;
+			nm.side  = otherside;
+			if (metaioctl(MD_IOCNXTKEY_NM, &nm, &nm.mde, NULL) != 0)
+				return (mdstealerror(ep, &nm.mde));
+
+			if (nm.key == MD_KEYWILD)
+				return (0);
+
+			nm.devname = (uintptr_t)meta_getnmbykey(sp->setno,
+				otherside, nm.key, ep);
+			if (nm.devname == NULL)
+				return (-1);
+
+			nm.side = sideno;
+			if (MD_MNSET_DESC(sd)) {
+				tmp_sideno = sideno;
+			} else {
+				tmp_sideno = sideno - 1;
+			}
+
+			if ((done = meta_getnextside_devinfo(sp,
+			    (char *)nm.devname, &tmp_sideno,
+			    &cname, &dname, &mnum, ep)) == -1) {
+				Free((void *)nm.devname);
+				return (-1);
+			}
+
+			assert(done == 1);
+			Free((void *)nm.devname);
+
+			/*
+			 * The device reference count can be greater than 1 if
+			 * more than one softpart is configured on top of the
+			 * same device.  If this is the case then we want to
+			 * increment the count to sync up with the other sides.
+			 */
+			for (i = 0; i < nm.ref_count; i++) {
+			    if (add_name(sp, sideno, nm.key, dname, mnum, cname,
+				ep) == -1)
+				rval = -1;
+			}
+
+			Free(cname);
+			Free(dname);
+
+			if (rval != 0)
+				return (rval);
+		}
+	}
+
+	/*NOTREACHED*/
+}
+
+static int
+check_setdrvs_againstnode(mdsetname_t *sp, char *node, md_error_t *ep)
+{
+	mddrivename_t	*dp;
+	md_drive_desc	*dd, *ddp;
+
+	if ((dd = metaget_drivedesc(sp, MD_FULLNAME_ONLY, ep)) == NULL)
+		if (! mdisok(ep))
+			return (-1);
+
+	for (ddp = dd; ddp != NULL; ddp = ddp->dd_next) {
+		dp = ddp->dd_dnp;
+
+		if (checkdrive_onnode(sp, dp, node, ep))
+			return (-1);
+	}
+
+	return (0);
+}
+
+static int
+create_multinode_set_on_hosts(
+	mdsetname_t	*sp,
+	int		node_c,		/* Number of new nodes */
+	char		**node_v,	/* Nodes which are being added */
+	int		new_set,
+	md_error_t	*ep
+)
+{
+	int				i;
+	md_set_desc			*sd;
+	md_timeval32_t			now;
+	ulong_t				genid;
+	int				rval = 0;
+	md_mnnode_desc			*nd, *ndm = NULL;
+	md_mnnode_desc			*nd_prev, *nd_curr;
+	int				nodecnt;
+	mndiskset_membershiplist_t	*nl, *nl2;
+
+	if (!new_set) {
+		if ((sd = metaget_setdesc(sp, ep)) == NULL)
+			return (-1);
+		now = sd->sd_ctime;
+		genid = sd->sd_genid - 1;
+		if (sd->sd_drvs)
+			genid--;
+	} else {
+		sd = Zalloc(sizeof (*sd));
+
+		if (meta_gettimeofday(&now) == -1) {
+			(void) mdsyserror(ep, errno,
+			    dgettext(TEXT_DOMAIN, "meta_gettimeofday()"));
+			rval = -1;
+			goto out;
+		}
+
+		/* Put the new entries into the set */
+		/*
+		 * Get membershiplist from API routine.  If there's
+		 * an error, fail to create set and pass back error.
+		 */
+		if (meta_read_nodelist(&nodecnt, &nl, ep) == -1) {
+			rval = -1;
+			goto out;
+		}
+
+		/*
+		 * meta_set_addhosts has already verified that
+		 * this node list is in the membership list
+		 * so set ALIVE flag.
+		 * Since this is a new set, all hosts being
+		 * added are new to the set, so also set ADD flag.
+		 */
+		for (i = 0; i < node_c; i++) {
+			nd = Zalloc(sizeof (*nd));
+			(void) strcpy(nd->nd_nodename, node_v[i]);
+			nd->nd_ctime = now;
+			nd->nd_flags = (MD_MN_NODE_ALIVE |
+				MD_MN_NODE_ADD);
+			nl2 = nl;
+			while (nl2) {
+			    if (strcmp(nl2->msl_node_name,
+				node_v[i]) == 0) {
+				    nd->nd_nodeid = nl2->msl_node_id;
+				    (void) strcpy(nd->nd_priv_ic,
+					nl2->msl_node_addr);
+				    break;
+			    }
+			    nl2 = nl2->next;
+			}
+
+			/*
+			 * Nodelist must be kept in ascending
+			 * nodeid order.
+			 */
+			if (sd->sd_nodelist == NULL) {
+				/* Nothing in list, just add it */
+				sd->sd_nodelist = nd;
+			} else if (nd->nd_nodeid < sd->sd_nodelist->nd_nodeid) {
+				/* Add to head of list */
+				nd->nd_next = sd->sd_nodelist;
+				sd->sd_nodelist = nd;
+			} else {
+				nd_curr = sd->sd_nodelist->nd_next;
+				nd_prev = sd->sd_nodelist;
+				/* Search for place ot add it */
+				while (nd_curr) {
+					if (nd->nd_nodeid <
+					    nd_curr->nd_nodeid) {
+						/* Add before nd_curr */
+						nd->nd_next = nd_curr;
+						nd_prev->nd_next = nd;
+						break;
+					}
+					nd_prev = nd_curr;
+					nd_curr = nd_curr->nd_next;
+				}
+				/* Add to end of list */
+				if (nd_curr == NULL) {
+					nd_prev->nd_next = nd;
+				}
+
+			}
+			/* Set master to be first node added */
+			if (ndm == NULL)
+				ndm = nd;
+		}
+
+		meta_free_nodelist(nl);
+		/*
+		 * Creating mnset for first time.
+		 * Set master to be invalid until first drive is
+		 * in set.
+		 */
+		(void) strcpy(sd->sd_mn_master_nodenm, "");
+		sd->sd_mn_master_nodeid = MD_MN_INVALID_NID;
+		sd->sd_mn_masternode = ndm;
+		sd->sd_ctime = now;
+		genid = sd->sd_genid = 0;
+	}
+
+	/* Create the set where needed */
+	for (i = 0; i < node_c; i++) {
+		/*
+		 * Create the set on each new node.  If the set already
+		 * exists, then the node list being created on each new node
+		 * is the current node list from before the new nodes
+		 * were added.  If the set doesn't exist, then the node
+		 * list being created on each new node is the entire
+		 * new node list.
+		 */
+		if (clnt_mncreateset(node_v[i], sp, sd->sd_nodelist,
+		    now, genid, sd->sd_mn_master_nodenm,
+		    sd->sd_mn_master_nodeid, ep) == -1) {
+			rval = -1;
+			break;
+		}
+	}
+
+out:
+	if (new_set) {
+		nd = sd->sd_nodelist;
+		while (nd) {
+			sd->sd_nodelist = nd->nd_next;
+			Free(nd);
+			nd = sd->sd_nodelist;
+		}
+		Free(sd);
+	}
+
+	if (rval != 0 || new_set)
+		return (rval);
+
+	/*
+	 * Add the drive records to the new sets
+	 * and names for the new sides.
+	 */
+	return (add_drvs_to_hosts(sp, node_c, node_v, ep));
+}
+
+
+static int
+create_traditional_set_on_hosts(
+	mdsetname_t	*sp,
+	int		node_c,		/* Number of new nodes */
+	char		**node_v,	/* Nodes which are being added */
+	int		new_set,
+	md_error_t	*ep
+)
+{
+	int		i;
+	md_set_desc	*sd;
+	md_timeval32_t	now;
+	ulong_t		genid;
+	int		rval = 0;
+
+	if (!new_set) {
+
+		if ((sd = metaget_setdesc(sp, ep)) == NULL)
+			return (-1);
+		now = sd->sd_ctime;
+
+		genid = sd->sd_genid;
+
+		if (sd->sd_drvs)
+			genid--;
+	} else {
+		if (node_c > MD_MAXSIDES)
+			return (mddserror(ep, MDE_DS_SIDENUMNOTAVAIL,
+			    sp->setno, NULL, NULL, sp->setname));
+
+		sd = Zalloc(sizeof (*sd));
+
+		/* Put the new entries into the set */
+		for (i = 0; i < node_c; i++) {
+			(void) strcpy(sd->sd_nodes[i], node_v[i]);
+		}
+
+		if (meta_gettimeofday(&now) == -1) {
+			(void) mdsyserror(ep, errno, "meta_gettimeofday()");
+			rval = -1;
+			goto out;
+		}
+
+		sd->sd_ctime = now;
+		genid = sd->sd_genid = 0;
+	}
+
+	/* Create the set where needed */
+	for (i = 0; i < node_c; i++) {
+		/*
+		 * Create the set on each new host
+		 */
+		if (clnt_createset(node_v[i], sp, sd->sd_nodes, now, genid,
+		    ep) == -1) {
+			rval = -1;
+			break;
+		}
+	}
+
+out:
+	if (new_set)
+		Free(sd);
+
+	if (rval != 0 || new_set)
+		return (rval);
+
+	/*
+	 * Add the drive records to the new sets
+	 * and names for the new sides.
+	 */
+	return (add_drvs_to_hosts(sp, node_c, node_v, ep));
+}
+
+static int
+create_set_on_hosts(
+	mdsetname_t	*sp,
+	int		multi_node,	/* Multi_node diskset or not? */
+	int		node_c,		/* Number of new nodes */
+	char		**node_v,	/* Nodes which are being added */
+	int		new_set,
+	md_error_t	*ep
+)
+{
+	if (multi_node)
+		return (create_multinode_set_on_hosts(sp, node_c, node_v,
+		    new_set, ep));
+	else
+		return (create_traditional_set_on_hosts(sp, node_c, node_v,
+		    new_set, ep));
+}
+
+static int
+create_set(
+	mdsetname_t	*sp,
+	int		multi_node,	/* Multi-node diskset or not? */
+	int		node_c,
+	char		**node_v,
+	int		auto_take,
+	md_error_t	*ep
+)
+{
+	int		i;
+	int		rval = 0;
+	set_t		max_sets;
+	set_t		setno;
+	int		bool;
+	uint_t		sr_flags;
+	sigset_t	oldsigs;
+	md_setkey_t	*cl_sk;
+	int		rb_level = 0;
+	md_error_t	xep = mdnullerror;
+	rval_e		sdssc_rval;
+	int		lock_flag = 0;
+	int		sig_flag = 0;
+
+	if ((max_sets = get_max_sets(ep)) == 0)
+		return (-1);
+
+	/* We must be a member of the set we are creating */
+	if (! strinlst(mynode(), node_c, node_v))
+		return (mddserror(ep, MDE_DS_SELFNOTIN,
+		    sp->setno, mynode(), NULL, sp->setname));
+
+	/*
+	 * If auto_take then we must be the only member of the set
+	 * that we are creating.
+	 */
+	if (auto_take && node_c > 1)
+		return (mddserror(ep, MDE_DS_SINGLEHOST, sp->setno, NULL, NULL,
+		    sp->setname));
+
+	/*
+	 * If we're part of SC3.0 we'll already have allocated the
+	 * set number so we can skip the allocation algorithm used.
+	 * Set number is unique across traditional and MN disksets.
+	 */
+	if ((sdssc_rval = sdssc_get_index(sp->setname, &setno))
+	    == SDSSC_NOT_BOUND) {
+
+		for (i = 0; i < node_c; i++) {
+			int	has_set;
+
+			/* Skip my node */
+			if (strcmp(mynode(), node_v[i]) == 0)
+				continue;
+
+			/*
+			 * Make sure this set name is not used on the
+			 * other hosts
+			 */
+			has_set = nodehasset(sp, node_v[i], NHS_N_EQ, ep);
+			if (has_set < 0) {
+				if (! mdiserror(ep, MDE_NO_SET)) {
+					rval = -1;
+					goto out;
+				}
+				mdclrerror(ep);
+				continue;
+			}
+
+			if (has_set) {
+				(void) mddserror(ep, MDE_DS_NODEHASSET,
+				    sp->setno, node_v[i], NULL, sp->setname);
+				rval = -1;
+				goto out;
+			}
+		}
+
+		for (setno = 1; setno < max_sets; setno++) {
+			for (i = 0; i < node_c; i++) {
+				if (clnt_setnumbusy(node_v[i], setno,
+				    &bool, ep) == -1) {
+					rval = -1;
+					goto out;
+				}
+
+				if (bool == TRUE)
+					break;
+			}
+			if (i == node_c)
+				break;
+		}
+	} else if (sdssc_rval != SDSSC_OKAY) {
+		(void) mddserror(ep, MDE_DS_SETNUMNOTAVAIL, MD_SET_BAD, NULL,
+		    NULL, sp->setname);
+		rval = -1;
+		goto out;
+	}
+
+	if (setno == max_sets) {
+		(void) mddserror(ep, MDE_DS_SETNUMNOTAVAIL, MD_SET_BAD, NULL,
+		    NULL, sp->setname);
+		rval = -1;
+		goto out;
+	}
+
+	sp->setno = setno;
+
+	/*
+	 * Lock the set on current set members.
+	 * Set locking done much earlier for MN diskset than for traditional
+	 * diskset since lock_set is used to protect against
+	 * other meta* commands running on the other nodes.
+	 * Don't issue mdcommd SUSPEND command since there is nothing
+	 * to suspend since there currently is no set.
+	 */
+	if (multi_node) {
+		/* Make sure we are blocking all signals */
+		if (procsigs(TRUE, &oldsigs, &xep) < 0)
+			mdclrerror(&xep);
+		sig_flag = 1;
+
+		/* Lock the set on new set members */
+		for (i = 0; i < node_c; i++) {
+			if (clnt_lock_set(node_v[i], sp, ep)) {
+				rval = -1;
+				goto out;
+			}
+			lock_flag = 1;
+		}
+		/* Now have the diskset locked, verify set number is still ok */
+		for (i = 0; i < node_c; i++) {
+			if (clnt_setnumbusy(node_v[i], setno,
+			    &bool, ep) == -1) {
+				rval = -1;
+				goto out;
+			}
+		}
+	}
+
+
+	if (meta_set_checkname(sp->setname, ep)) {
+		rval = -1;
+		goto out;
+	}
+
+	for (i = 0; i < node_c; i++) {
+		if (clnt_setnameok(node_v[i], sp, &bool, ep) == -1) {
+			rval = -1;
+			goto out;
+		}
+		if (bool == FALSE) {
+			(void) mddserror(ep, MDE_DS_SETNAMEBUSY, sp->setno,
+			    node_v[i], NULL, sp->setname);
+			rval = -1;
+			goto out;
+		}
+	}
+
+	/* END CHECK CODE */
+
+	/* Lock the set on new set members */
+	if (!multi_node) {
+		md_rb_sig_handling_on();
+		sig_flag = 1;
+		for (i = 0; i < node_c; i++) {
+			if (clnt_lock_set(node_v[i], sp, ep)) {
+				rval = -1;
+				goto out;
+			}
+			lock_flag = 1;
+		}
+	}
+
+	RB_TEST(1, "create_set", ep)
+
+	RB_PREEMPT;
+	rb_level = 1;	/* level 1 */
+
+	RB_TEST(2, "create_set", ep)
+
+	if ((rval = create_set_on_hosts(sp, multi_node, node_c, node_v,
+	    1, ep)) == -1)
+		goto rollback;
+
+	RB_TEST(3, "create_set", ep)
+
+	if (auto_take)
+		sr_flags = MD_SR_OK | MD_SR_AUTO_TAKE;
+	else
+		sr_flags = MD_SR_OK;
+
+	/*
+	 * Mark the set record MD_SR_OK
+	 */
+	for (i = 0; i < node_c; i++)
+		if (clnt_upd_sr_flags(node_v[i], sp, sr_flags, ep))
+			goto rollback;
+
+	rb_level = 2;	/* level 2 */
+
+	/*
+	 * For MN diskset:
+	 * On each added node, set the node record for that node
+	 * to OK.  Then set all node records for the newly added
+	 * nodes on all nodes to ok.
+	 *
+	 * By setting a node's own node record to ok first, even if
+	 * the node adding the hosts panics, the rest of the nodes can
+	 * determine the same node list during the choosing of the master
+	 * during reconfig.  So, only nodes considered for mastership
+	 * are nodes that have both MD_MN_NODE_OK and MD_SR_OK set
+	 * on that node's rpc.metad.  If all nodes have MD_SR_OK set,
+	 * but no node has its own MD_MN_NODE_OK set, then the set will
+	 * be removed during reconfig since a panic occurred during the
+	 * creation of the initial diskset.
+	 */
+
+	if (multi_node) {
+		md_mnnode_desc	*nd, *saved_nd_next;
+		md_set_desc	*sd;
+
+		if ((sd = metaget_setdesc(sp, ep)) == NULL) {
+			goto rollback;
+		}
+
+		for (i = 0; i < node_c; i++) {
+			nd = sd->sd_nodelist;
+			/* All nodes are guaranteed to be ALIVE */
+			while (nd) {
+				if (strcmp(nd->nd_nodename, node_v[i]) == 0)
+					break;
+				nd = nd->nd_next;
+			}
+			/* Something wrong, will pick this up in next loop */
+			if (nd == NULL)
+				continue;
+
+			/* Only changing my local cache of node list */
+			saved_nd_next = nd->nd_next;
+			nd->nd_next = NULL;
+
+			/* Set node record for added host to ok on that host */
+			if (clnt_upd_nr_flags(node_v[i], sp,
+			    nd, MD_NR_OK, NULL, ep)) {
+				nd->nd_next = saved_nd_next;
+				goto rollback;
+			}
+			nd->nd_next = saved_nd_next;
+		}
+
+		/* Now set all node records on all nodes to be ok */
+		nd = sd->sd_nodelist;
+		/* All nodes are guaranteed to be ALIVE */
+		while (nd) {
+			if (clnt_upd_nr_flags(nd->nd_nodename, sp,
+			    sd->sd_nodelist, MD_NR_OK, NULL, ep)) {
+				goto rollback;
+			}
+			nd = nd->nd_next;
+		}
+	}
+
+	RB_TEST(4, "create_set", ep)
+
+out:
+	if ((rval == 0) && multi_node) {
+		/*
+		 * Set successfully created.
+		 * Notify rpc.mdcommd on all nodes of a nodelist change.
+		 * Send reinit command to mdcommd which forces it to get
+		 * fresh set description.  Then send resume.
+		 * Resume on class 0 will resume all classes.
+		 */
+		for (i = 0; i < node_c; i++) {
+			/* Class is ignored for REINIT */
+			if (clnt_mdcommdctl(node_v[i], COMMDCTL_REINIT,
+			    sp, NULL, MD_MSCF_NO_FLAGS, &xep)) {
+				if (rval == 0)
+					(void) mdstealerror(ep, &xep);
+				rval = -1;
+				mde_perror(ep, dgettext(TEXT_DOMAIN,
+				    "Unable to reinit rpc.mdcommd.\n"));
+			}
+		}
+		for (i = 0; i < node_c; i++) {
+			if (clnt_mdcommdctl(node_v[i], COMMDCTL_RESUME,
+			    sp, MD_MSG_CLASS0, MD_MSCF_NO_FLAGS, &xep)) {
+				if (rval == 0)
+					(void) mdstealerror(ep, &xep);
+				rval = -1;
+				mde_perror(ep, dgettext(TEXT_DOMAIN,
+				    "Unable to resume rpc.mdcommd.\n"));
+			}
+		}
+		meta_ping_mnset(sp->setno);
+	}
+	if (lock_flag) {
+		cl_sk = cl_get_setkey(sp->setno, sp->setname);
+		for (i = 0; i < node_c; i++) {
+			if (clnt_unlock_set(node_v[i], cl_sk, &xep)) {
+				if (rval == 0)
+					(void) mdstealerror(ep, &xep);
+				rval = -1;
+			}
+		}
+		cl_set_setkey(NULL);
+	}
+
+	if (sig_flag) {
+		if (multi_node) {
+			/* release signals back to what they were on entry */
+			if (procsigs(FALSE, &oldsigs, &xep) < 0)
+				mdclrerror(&xep);
+		} else {
+			md_rb_sig_handling_off(md_got_sig(), md_which_sig());
+		}
+	}
+
+	return (rval);
+
+rollback:
+	/* all signals already blocked for MN disket */
+	if (!multi_node) {
+		/* Make sure we are blocking all signals */
+		if (procsigs(TRUE, &oldsigs, &xep) < 0)
+			mdclrerror(&xep);
+	}
+
+	rval = -1;
+
+	/*
+	 * For MN diskset:
+	 * On each added node (which is now each node to be deleted),
+	 * set the node record for that node to DEL.  Then set all
+	 * node records for the newly added (soon to be deleted) nodes
+	 * on all nodes to ok.
+	 *
+	 * By setting a node's own node record to DEL first, even if
+	 * the node doing the rollback panics, the rest of the nodes can
+	 * determine the same node list during the choosing of the master
+	 * during reconfig.
+	 */
+
+	/* level 3 */
+	if ((rb_level > 1) && (multi_node)) {
+		md_mnnode_desc	*nd, *saved_nd_next;
+		md_set_desc	*sd;
+
+		if ((sd = metaget_setdesc(sp, &xep)) == NULL) {
+			mdclrerror(&xep);
+		}
+
+		for (i = 0; i < node_c; i++) {
+			nd = sd->sd_nodelist;
+			/* All nodes are guaranteed to be ALIVE */
+			while (nd) {
+				if (strcmp(nd->nd_nodename, node_v[i]) == 0)
+					break;
+				nd = nd->nd_next;
+			}
+			/* Something wrong, will pick this up in next loop */
+			if (nd == NULL)
+				continue;
+
+			/* Only changing my local cache of node list */
+			saved_nd_next = nd->nd_next;
+			nd->nd_next = NULL;
+
+			/* Set node record for added host to DEL on that host */
+			if (clnt_upd_nr_flags(node_v[i], sp,
+			    nd, MD_NR_DEL, NULL, &xep)) {
+				nd->nd_next = saved_nd_next;
+				mdclrerror(&xep);
+			}
+			nd->nd_next = saved_nd_next;
+		}
+
+		/* Now set all node records on all nodes to be DEL */
+		nd = sd->sd_nodelist;
+		/* All nodes are guaranteed to be ALIVE */
+		while (nd) {
+			if (clnt_upd_nr_flags(nd->nd_nodename, sp,
+			    sd->sd_nodelist, MD_NR_DEL, NULL, &xep)) {
+				mdclrerror(&xep);
+			}
+			nd = nd->nd_next;
+		}
+
+		/* Mark set record on all hosts to be DELETED */
+		for (i = 0; i < node_c; i++) {
+			if (clnt_upd_sr_flags(node_v[i], sp, MD_SR_DEL, &xep)) {
+				mdclrerror(&xep);
+			}
+		}
+	}
+	/* level 1 */
+	if (rb_level > 0) {
+		for (i = 0; i < node_c; i++) {
+			if (clnt_delset(node_v[i], sp, &xep) == -1)
+				mdclrerror(&xep);
+		}
+	}
+
+	/* level 0 */
+	/* Don't test lock flag since guaranteed to be set if in rollback */
+	cl_sk = cl_get_setkey(sp->setno, sp->setname);
+	for (i = 0; i < node_c; i++) {
+		if (clnt_unlock_set(node_v[i], cl_sk, &xep))
+			mdclrerror(&xep);
+	}
+	cl_set_setkey(NULL);
+
+	/* release signals back to what they were on entry */
+	if (procsigs(FALSE, &oldsigs, &xep) < 0)
+		mdclrerror(&xep);
+
+	if ((sig_flag) && (!multi_node))
+		md_rb_sig_handling_off(md_got_sig(), md_which_sig());
+
+	return (rval);
+}
+
+static int
+del_db_sidenms(
+	mdsetname_t	*sp,
+	side_t		sideno,
+	md_error_t	*ep
+)
+{
+	md_replicalist_t	*rlp = NULL;
+	md_replicalist_t	*rl;
+	int			rval = 0;
+
+	if (metareplicalist(sp, MD_BASICNAME_OK, &rlp, ep) < 0)
+		return (-1);
+
+	for (rl = rlp; rl != NULL; rl = rl->rl_next) {
+		md_replica_t	*r = rl->rl_repp;
+
+		if (meta_db_delsidenm(sp, sideno, r->r_namep, r->r_blkno, ep)) {
+			rval = -1;
+			goto out;
+		}
+	}
+
+out:
+	metafreereplicalist(rlp);
+	return (rval);
+}
+
+static int
+del_drvs_from_hosts(
+	mdsetname_t	*sp,
+	md_set_desc	*sd,
+	md_drive_desc	*dd,
+	int		node_c,
+	char		**node_v,
+	int		oha,
+	md_error_t	*ep
+)
+{
+	int 		i;
+	md_mnnode_desc	*nd;
+
+	for (i = 0; i < node_c; i++) {
+		if (MD_MNSET_DESC(sd) && (oha == TRUE)) {
+			/*
+			 * During OHA mode, don't issue RPCs to
+			 * non-alive nodes since there is no reason to
+			 * wait for RPC timeouts.
+			 */
+			nd = sd->sd_nodelist;
+			while (nd) {
+				if (strcmp(nd->nd_nodename, node_v[i]) == 0)
+					break;
+				nd = nd->nd_next;
+			}
+			if (nd == NULL) {
+				return (mddserror(ep, MDE_DS_NOTINMEMBERLIST,
+				    sp->setno, nd->nd_nodename,
+				    NULL, sp->setname));
+			}
+
+			if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+				continue;
+			}
+			if (clnt_deldrvs(node_v[i], sp, dd, ep)) {
+				return (-1);
+			}
+		} else if (MD_MNSET_DESC(sd) && (oha == FALSE)) {
+			/*
+			 * All nodes should be alive in non-oha mode.
+			 */
+			if (clnt_deldrvs(node_v[i], sp, dd, ep)) {
+				return (-1);
+			}
+		} else {
+			/*
+			 * For traditional diskset, issue the RPC and
+			 * ignore RPC failure if in OHA mode.
+			 */
+			if (clnt_deldrvs(node_v[i], sp, dd, ep)) {
+				if (oha == TRUE && mdanyrpcerror(ep)) {
+					mdclrerror(ep);
+					continue;
+				}
+				return (-1);
+			}
+		}
+	}
+
+	return (0);
+}
+
+static int
+del_host_noset(
+	mdsetname_t	*sp,
+	char		**anode,
+	md_error_t	*ep
+)
+{
+	int		rval = 0;
+	md_setkey_t	*cl_sk;
+	md_drive_desc	*dd;
+	md_error_t	xep = mdnullerror;
+	md_set_desc	*sd;
+
+	if ((sd = metaget_setdesc(sp, ep)) == NULL)
+		return (-1);
+
+	/* Make sure we own the set */
+	if (meta_check_ownership(sp, ep) != 0)
+		return (-1);
+
+	/* Lock the set on our side */
+	if (clnt_lock_set(mynode(), sp, ep)) {
+		rval = -1;
+		goto out;
+	}
+
+	if (clnt_delhosts(mynode(), sp, 1, anode, ep)) {
+		rval = -1;
+		goto out;
+	}
+
+	if (!MD_MNSET_DESC(sd)) {
+		if ((dd = metaget_drivedesc(sp, (MD_BASICNAME_OK | PRINT_FAST),
+		    ep)) == NULL) {
+			if (! mdisok(ep)) {
+				rval = -1;
+				goto out;
+			}
+		}
+
+		/* If we have drives */
+		if (dd != NULL) {
+			if (clnt_del_drv_sidenms(mynode(), sp, ep)) {
+				rval = -1;
+				goto out;
+			}
+		}
+	}
+
+out:
+	cl_sk = cl_get_setkey(sp->setno, sp->setname);
+	if (clnt_unlock_set(mynode(), cl_sk, &xep)) {
+		if (rval == 0)
+			(void) mdstealerror(ep, &xep);
+		rval = -1;
+	}
+	cl_set_setkey(NULL);
+
+	metaflushsetname(sp);
+
+	return (rval);
+}
+
+static int
+del_md_sidenms(mdsetname_t *sp, side_t sideno, md_error_t *ep)
+{
+	mdnm_params_t		nm;
+	md_set_desc		*sd;
+	int			i;
+
+	if (!metaislocalset(sp)) {
+		if ((sd = metaget_setdesc(sp, ep)) == NULL)
+			return (-1);
+	}
+	/* Use rpc.mdcommd to add md side info from all nodes */
+	if ((! metaislocalset(sp)) && MD_MNSET_DESC(sd) &&
+	    (sd->sd_mn_mynode->nd_flags & MD_MN_NODE_OWN)) {
+		md_mn_result_t			*resultp = NULL;
+		md_mn_msg_meta_md_delside_t	md_ds;
+		int				send_rval;
+
+		md_ds.msg_sideno = sideno;
+		/*
+		 * If reconfig cycle has been started, this node is stuck in
+		 * in the return step until this command has completed.  If
+		 * mdcommd is suspended, ask send_message to fail (instead of
+		 * retrying) so that metaset can finish allowing the
+		 * reconfig cycle to proceed.
+		 */
+		send_rval = mdmn_send_message(sp->setno,
+		    MD_MN_MSG_META_MD_DELSIDE,
+		    MD_MSGF_FAIL_ON_SUSPEND | MD_MSGF_PANIC_WHEN_INCONSISTENT,
+		    (char *)&md_ds, sizeof (md_mn_msg_meta_md_delside_t),
+		    &resultp, ep);
+		if (send_rval != 0) {
+			(void) mdstealerror(ep, &(resultp->mmr_ep));
+			if (resultp)
+				free_result(resultp);
+			return (-1);
+		}
+		if (resultp)
+			free_result(resultp);
+	} else {
+		(void) memset(&nm, '\0', sizeof (nm));
+		nm.key   = MD_KEYWILD;
+
+		/*CONSTCOND*/
+		while (1) {
+			nm.mde   = mdnullerror;
+			nm.setno = sp->setno;
+			nm.side  = MD_SIDEWILD;
+			if (metaioctl(MD_IOCNXTKEY_NM, &nm, &nm.mde, NULL) != 0)
+				return (mdstealerror(ep, &nm.mde));
+
+			if (nm.key == MD_KEYWILD)
+				return (0);
+
+			/*
+			 * The device reference count can be greater than 1 if
+			 * more than one softpart is configured on top of the
+			 * same device.  If this is the case then we want to
+			 * decrement the count to zero so the entry can be
+			 * actually removed.
+			 */
+			for (i = 0; i < nm.ref_count; i++) {
+			    if (del_name(sp, sideno, nm.key, ep) == -1)
+				return (-1);
+			}
+		}
+	}
+	return (0);
+}
+
+static void
+recreate_set(
+	mdsetname_t		*sp,
+	md_set_desc		*sd
+)
+{
+	int			i;
+	int			has_set;
+	md_error_t		xep = mdnullerror;
+	md_mnnode_desc		*nd;
+
+	if (MD_MNSET_DESC(sd)) {
+		nd = sd->sd_nodelist;
+		while (nd) {
+			if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+				nd = nd->nd_next;
+				continue;
+			}
+			has_set = nodehasset(sp, nd->nd_nodename,
+				NHS_NST_EQ, &xep);
+
+			if (has_set >= 0) {
+				nd = nd->nd_next;
+				continue;
+			}
+
+			mdclrerror(&xep);
+
+			if (clnt_mncreateset(nd->nd_nodename, sp,
+			    sd->sd_nodelist,
+			    sd->sd_ctime, sd->sd_genid,
+			    sd->sd_mn_master_nodenm,
+			    sd->sd_mn_master_nodeid, &xep) == -1)
+				mdclrerror(&xep);
+			nd = nd->nd_next;
+		}
+	} else {
+		for (i = 0; i < MD_MAXSIDES; i++) {
+			/* Skip empty slots */
+			if (sd->sd_nodes[i][0] == '\0')
+				continue;
+
+			has_set = nodehasset(sp, sd->sd_nodes[i],
+				NHS_NST_EQ, &xep);
+
+			if (has_set >= 0)
+				continue;
+
+			mdclrerror(&xep);
+
+			if (clnt_createset(sd->sd_nodes[i], sp, sd->sd_nodes,
+			    sd->sd_ctime, sd->sd_genid, &xep) == -1)
+				mdclrerror(&xep);
+		}
+	}
+}
+
+/*
+ * If a MN diskset, set is already locked on all nodes via clnt_lock_set.
+ */
+static int
+del_set_nodrives(
+	mdsetname_t		*sp,
+	int			node_c,
+	char			**node_v,
+	int			oha,
+	md_error_t		*ep
+)
+{
+	md_set_desc		*sd;
+	int			i;
+	sigset_t		oldsigs;
+	md_setkey_t		*cl_sk;
+	int			rb_level = 0;
+	ulong_t			max_genid = 0;
+	int			rval = 0;
+	md_error_t		xep = mdnullerror;
+	md_mnnode_desc		*nd;
+	int			delete_end = 1;
+
+	if ((sd = metaget_setdesc(sp, ep)) == NULL)
+		return (-1);
+
+	if (MD_MNSET_DESC(sd)) {
+		/* Make sure we are blocking all signals */
+		if (procsigs(TRUE, &oldsigs, &xep) < 0)
+			mdclrerror(&xep);
+	} else {
+		md_rb_sig_handling_on();
+	}
+
+	/*
+	 * Lock the set on current set members for traditional disksets.
+	 */
+	if (!(MD_MNSET_DESC(sd))) {
+		for (i = 0; i < node_c; i++) {
+			/*
+			 * For traditional diskset, issue the RPC and
+			 * ignore RPC failure if in OHA mode.
+			 */
+			if (clnt_lock_set(node_v[i], sp, ep)) {
+				if (oha == TRUE && mdanyrpcerror(ep)) {
+					mdclrerror(ep);
+					continue;
+				}
+				rval = -1;
+				goto out;
+			}
+		}
+	}
+
+
+	RB_TEST(1, "deletehosts", ep)
+
+	RB_PREEMPT;
+	rb_level = 1;	/* level 1 */
+
+	RB_TEST(2, "deletehosts", ep)
+
+	/*
+	 * Mark the set record MD_SR_DEL
+	 */
+	for (i = 0; i < node_c; i++) {
+
+		RB_TEST(3, "deletehosts", ep)
+
+		if (MD_MNSET_DESC(sd) && (oha == TRUE)) {
+			/*
+			 * During OHA mode, don't issue RPCs to
+			 * non-alive nodes since there is no reason to
+			 * wait for RPC timeouts.
+			 */
+			nd = sd->sd_nodelist;
+			while (nd) {
+				if (strcmp(nd->nd_nodename, node_v[i]) == 0)
+					break;
+				nd = nd->nd_next;
+			}
+			if (nd == NULL) {
+				(void) mddserror(ep, MDE_DS_NOTINMEMBERLIST,
+				    sp->setno, nd->nd_nodename,
+				    NULL, sp->setname);
+				goto rollback;
+			}
+
+			if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+				continue;
+			}
+
+			if (clnt_upd_sr_flags(node_v[i], sp, MD_SR_DEL, ep)) {
+				goto rollback;
+			}
+		} else if (MD_MNSET_DESC(sd) && (oha == FALSE)) {
+			/*
+			 * All nodes should be alive in non-oha mode.
+			 */
+			if (clnt_upd_sr_flags(node_v[i], sp, MD_SR_DEL, ep)) {
+				goto rollback;
+			}
+		} else {
+			/*
+			 * For traditional diskset, issue the RPC and
+			 * ignore RPC failure if in OHA mode.
+			 */
+			if (clnt_upd_sr_flags(node_v[i], sp, MD_SR_DEL, ep)) {
+				if (oha == TRUE && mdanyrpcerror(ep)) {
+					mdclrerror(ep);
+					continue;
+				}
+				goto rollback;
+			}
+		}
+
+		RB_TEST(4, "deletehosts", ep)
+	}
+
+	RB_TEST(5, "deletehosts", ep)
+
+	RB_PREEMPT;
+	rb_level = 2;	/* level 2 */
+
+	RB_TEST(6, "deletehosts", ep)
+
+	if (sdssc_delete_begin(sp->setname) == SDSSC_ERROR)
+		if (metad_isautotakebyname(sp->setname))
+			delete_end = 0;
+		else
+			goto rollback;
+
+	/* The set is OK to delete, make it so. */
+	for (i = 0; i < node_c; i++) {
+
+		RB_TEST(7, "deletehosts", ep)
+
+		if (MD_MNSET_DESC(sd) && (oha == TRUE)) {
+			/*
+			 * During OHA mode, don't issue RPCs to
+			 * non-alive nodes since there is no reason to
+			 * wait for RPC timeouts.
+			 */
+			nd = sd->sd_nodelist;
+			while (nd) {
+				if (strcmp(nd->nd_nodename, node_v[i]) == 0)
+					break;
+				nd = nd->nd_next;
+			}
+			if (nd == NULL) {
+				(void) mddserror(ep, MDE_DS_NOTINMEMBERLIST,
+				    sp->setno, nd->nd_nodename,
+				    NULL, sp->setname);
+				goto rollback;
+			}
+
+			if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+				continue;
+			}
+
+			if (clnt_delset(node_v[i], sp, ep) == -1) {
+				goto rollback;
+			}
+		} else if (MD_MNSET_DESC(sd) && (oha == FALSE)) {
+			/*
+			 * All nodes should be alive in non-oha mode.
+			 */
+			if (clnt_delset(node_v[i], sp, ep) == -1) {
+				goto rollback;
+			}
+		} else {
+			/*
+			 * For traditional diskset, issue the RPC and
+			 * ignore RPC failure if in OHA mode.
+			 */
+			if (clnt_delset(node_v[i], sp, ep) == -1) {
+				if (oha == TRUE && mdanyrpcerror(ep)) {
+					mdclrerror(ep);
+					continue;
+				}
+				goto rollback;
+			}
+		}
+
+		RB_TEST(8, "deletehosts", ep)
+	}
+
+	RB_TEST(9, "deletehosts", ep)
+
+out:
+	/*
+	 * Unlock the set on current set members
+	 * for traditional disksets.
+	 */
+	if (!(MD_MNSET_DESC(sd))) {
+		cl_sk = cl_get_setkey(sp->setno, sp->setname);
+		for (i = 0; i < node_c; i++) {
+			/*
+			 * For traditional diskset, issue the RPC and
+			 * ignore RPC failure if in OHA mode.
+			 */
+			if (clnt_unlock_set(node_v[i], cl_sk, &xep)) {
+				if (oha == TRUE && mdanyrpcerror(&xep)) {
+					mdclrerror(&xep);
+					continue;
+				}
+				if (rval == 0)
+					(void) mdstealerror(ep, &xep);
+				rval = -1;
+			}
+		}
+		cl_set_setkey(NULL);
+	}
+
+	/*
+	 * A MN diskset has the clnt_locks held by meta_set_deletehosts so
+	 * don't flush that data until meta_set_deletehosts has finished
+	 * with it.  meta_set_deletehosts will handle the flush of the
+	 * setname.
+	 */
+	if (!(MD_MNSET_DESC(sd))) {
+		metaflushsetname(sp);
+	}
+
+	if (delete_end &&
+	    sdssc_delete_end(sp->setname, SDSSC_COMMIT) == SDSSC_ERROR)
+		rval = -1;
+
+	if (MD_MNSET_DESC(sd)) {
+		/* release signals back to what they were on entry */
+		if (procsigs(FALSE, &oldsigs, &xep) < 0)
+			mdclrerror(&xep);
+	} else {
+		md_rb_sig_handling_off(md_got_sig(), md_which_sig());
+	}
+
+	return (rval);
+
+rollback:
+	/* all signals already blocked for MN disket */
+	if (!(MD_MNSET_DESC(sd))) {
+		/* Make sure we are blocking all signals */
+		if (procsigs(TRUE, &oldsigs, &xep) < 0)
+			mdclrerror(&xep);
+	}
+
+	rval = -1;
+
+	max_genid = sd->sd_genid;
+
+	/* level 2 */
+	if (rb_level > 1) {
+		recreate_set(sp, sd);
+		max_genid++;
+
+		if (delete_end)
+			(void) sdssc_delete_end(sp->setname, SDSSC_CLEANUP);
+	}
+
+	/* level 1 */
+	if (rb_level > 0) {
+		max_genid++;
+		resync_genid(sp, sd, max_genid, node_c, node_v);
+	}
+
+	/* level 0 */
+	/*
+	 * Unlock the set on current set members
+	 * for traditional disksets.
+	 */
+	if (!(MD_MNSET_DESC(sd))) {
+		cl_sk = cl_get_setkey(sp->setno, sp->setname);
+		for (i = 0; i < node_c; i++) {
+			/*
+			 * For traditional diskset, issue the RPC and
+			 * ignore RPC failure if in OHA mode.
+			 */
+			if (clnt_unlock_set(node_v[i], cl_sk, &xep))
+				mdclrerror(&xep);
+		}
+		cl_set_setkey(NULL);
+	}
+
+	/* release signals back to what they were on entry */
+	if (procsigs(FALSE, &oldsigs, &xep) < 0)
+		mdclrerror(&xep);
+
+	/*
+	 * A MN diskset has the clnt_locks held by meta_set_deletehosts so
+	 * don't flush that data until meta_set_deletehosts has finished
+	 * with it.  meta_set_deletehosts will handle the flush of the
+	 * setname.
+	 */
+	if (!(MD_MNSET_DESC(sd))) {
+		metaflushsetname(sp);
+		md_rb_sig_handling_off(md_got_sig(), md_which_sig());
+	}
+
+	return (rval);
+}
+
+/*
+ * On entry:
+ *   procsigs already called for MN diskset.
+ *   md_rb_sig_handling already called for traditional diskset.
+ */
+static int
+del_set_on_hosts(
+	mdsetname_t		*sp,
+	md_set_desc		*sd,
+	md_drive_desc		*dd,
+	int			node_c,		/* Number of nodes */
+	char			**node_v,	/* Nodes being deleted */
+	int			oha,
+	md_error_t		*ep
+)
+{
+	int			i;
+	int			j;
+	side_t			sideno;
+	md_replicalist_t	*rlp = NULL;
+	sigset_t		oldsigs;
+	md_setkey_t		*cl_sk;
+	ulong_t			max_genid = 0;
+	int			rb_level = 1;	/* This is a special case */
+	md_error_t		xep = mdnullerror;
+	md_mnnode_desc		*nd;
+
+	RB_PREEMPT;
+
+	RB_TEST(7, "deletehosts", ep)
+
+	if (dd != NULL) {
+		/*
+		 * May need this to re-add sidenames on roll back.
+		 */
+		if (metareplicalist(sp, (MD_BASICNAME_OK | PRINT_FAST), &rlp,
+		    ep) < 0)
+			goto rollback;
+
+		RB_TEST(8, "deletehosts", ep)
+
+		RB_PREEMPT;
+		rb_level = 2;	/* level 2 */
+
+		RB_TEST(9, "deletehosts", ep)
+
+		if (del_drvs_from_hosts(sp, sd, dd, node_c, node_v, oha, ep))
+			goto rollback;
+
+		RB_TEST(10, "deletehosts", ep)
+
+		RB_PREEMPT;
+		rb_level = 3;	/* level 3 */
+
+		RB_TEST(11, "deletehosts", ep)
+
+		/*
+		 * Delete the db replica sides
+		 * This is done before the next loop, so that
+		 * the db does not get unloaded before we are finished
+		 * deleting the sides.
+		 */
+		if (MD_MNSET_DESC(sd)) {
+			nd = sd->sd_nodelist;
+			while (nd) {
+				/* Skip hosts not being deleted */
+				if (! strinlst(nd->nd_nodename, node_c,
+				    node_v)) {
+					nd = nd->nd_next;
+					continue;
+				}
+
+				if (del_db_sidenms(sp, nd->nd_nodeid, ep))
+					goto rollback;
+
+				RB_TEST(12, "deletehosts", ep)
+				nd = nd->nd_next;
+			}
+		} else {
+			for (sideno = 0; sideno < MD_MAXSIDES; sideno++) {
+				/* Skip empty slots */
+				if (sd->sd_nodes[sideno][0] == '\0')
+					continue;
+
+				/* Skip hosts not being deleted */
+				if (! strinlst(sd->sd_nodes[sideno], node_c,
+				    node_v))
+					continue;
+
+				if (del_db_sidenms(sp, sideno, ep))
+					goto rollback;
+
+				RB_TEST(12, "deletehosts", ep)
+			}
+		}
+
+		RB_TEST(13, "deletehosts", ep)
+
+		RB_PREEMPT;
+		rb_level = 4;	/* level 4 */
+
+		RB_TEST(14, "deletehosts", ep)
+
+		/* Delete the names from the namespace */
+		if (MD_MNSET_DESC(sd)) {
+			nd = sd->sd_nodelist;
+			while (nd) {
+				/* Skip hosts not being deleted */
+				if (! strinlst(nd->nd_nodename, node_c,
+				    node_v)) {
+					nd = nd->nd_next;
+					continue;
+				}
+
+				if (del_md_sidenms(sp, nd->nd_nodeid, ep))
+					goto rollback;
+
+				RB_TEST(15, "deletehosts", ep)
+				nd = nd->nd_next;
+			}
+		} else {
+			for (sideno = 0; sideno < MD_MAXSIDES; sideno++) {
+				/* Skip empty slots */
+				if (sd->sd_nodes[sideno][0] == '\0')
+					continue;
+
+				/* Skip hosts not being deleted */
+				if (! strinlst(sd->sd_nodes[sideno], node_c,
+				    node_v))
+					continue;
+
+				if (del_md_sidenms(sp, sideno, ep))
+					goto rollback;
+
+				RB_TEST(15, "deletehosts", ep)
+			}
+		}
+	}
+
+	RB_TEST(16, "deletehosts", ep)
+
+	RB_PREEMPT;
+	rb_level = 5;	/* level 6 */
+
+	RB_TEST(17, "deletehosts", ep)
+
+	for (i = 0; i < node_c; i++) {
+		if (MD_MNSET_DESC(sd) && (oha == TRUE)) {
+			/*
+			 * During OHA mode, don't issue RPCs to
+			 * non-alive nodes since there is no reason to
+			 * wait for RPC timeouts.
+			 */
+			nd = sd->sd_nodelist;
+			while (nd) {
+				if (strcmp(nd->nd_nodename, node_v[i]) == 0)
+					break;
+				nd = nd->nd_next;
+			}
+			if (nd == NULL) {
+				(void) mddserror(ep, MDE_DS_NOTINMEMBERLIST,
+				    sp->setno, nd->nd_nodename,
+				    NULL, sp->setname);
+				goto rollback;
+			}
+
+			if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+				continue;
+			}
+
+			if (clnt_delset(node_v[i], sp, ep) == -1) {
+				goto rollback;
+			}
+		} else if (MD_MNSET_DESC(sd) && (oha == FALSE)) {
+			/*
+			 * All nodes should be alive in non-oha mode.
+			 */
+			if (clnt_delset(node_v[i], sp, ep) == -1) {
+				goto rollback;
+			}
+		} else {
+			/*
+			 * For traditional diskset, issue the RPC and
+			 * ignore RPC failure if in OHA mode.
+			 */
+			if (clnt_delset(node_v[i], sp, ep) == -1) {
+				if (oha == TRUE && mdanyrpcerror(ep)) {
+					mdclrerror(ep);
+					continue;
+				}
+				goto rollback;
+			}
+		}
+
+		RB_TEST(18, "deletehosts", ep)
+	}
+
+	metafreereplicalist(rlp);
+
+	if (MD_MNSET_DESC(sd)) {
+		/* release signals back to what they were on entry */
+		if (procsigs(FALSE, &oldsigs, &xep) < 0)
+			mdclrerror(&xep);
+	} else {
+		md_rb_sig_handling_off(md_got_sig(), md_which_sig());
+	}
+
+	return (0);
+
+rollback:
+	/* all signals already blocked for MN disket */
+	if (!(MD_MNSET_DESC(sd))) {
+		/* Make sure we are blocking all signals */
+		if (procsigs(TRUE, &oldsigs, &xep) < 0)
+			mdclrerror(&xep);
+	}
+
+	max_genid = sd->sd_genid;
+
+	/* level 5 */
+	if (rb_level > 4) {
+		recreate_set(sp, sd);
+		max_genid++;
+	}
+
+	/* level 2 */
+	if (rb_level > 1 && dd != NULL) {
+		/*
+		 * See if we have to re-add the drives specified.
+		 */
+		for (i = 0; i < node_c; i++) {
+			md_set_record	*sr;
+
+			if (MD_MNSET_DESC(sd) && (oha == TRUE)) {
+				/*
+				 * During OHA mode, don't issue RPCs to
+				 * non-alive nodes since there is no reason to
+				 * wait for RPC timeouts.
+				 */
+				nd = sd->sd_nodelist;
+				while (nd) {
+					if (strcmp(nd->nd_nodename, node_v[i])
+					    == 0)
+						break;
+					nd = nd->nd_next;
+				}
+				if (nd == NULL)
+					continue;
+
+				if (!(nd->nd_flags & MD_MN_NODE_ALIVE))
+					continue;
+			}
+
+			/* Don't care if set record is MN or not */
+			if (clnt_getset(node_v[i], sp->setname,
+			    MD_SET_BAD, &sr, &xep) == -1) {
+				mdclrerror(&xep);
+				continue;
+			}
+
+			/* Drive already added, skip to next node */
+			if (sr->sr_drivechain != NULL) {
+				/*
+				 * Set record structure was allocated from RPC
+				 * routine getset so this structure is only of
+				 * size md_set_record even if the MN flag is
+				 * set.  So, clear the flag so that the free
+				 * code doesn't attempt to free a structure
+				 * the size of md_mnset_record.
+				 */
+				sr->sr_flags &= ~MD_SR_MN;
+				free_sr(sr);
+				continue;
+			}
+
+			if (clnt_adddrvs(node_v[i], sp, dd,
+			    sr->sr_ctime, sr->sr_genid, &xep) == -1)
+				mdclrerror(&xep);
+
+			if (clnt_upd_dr_flags(node_v[i], sp, dd,
+			    MD_DR_OK, &xep) == -1)
+				mdclrerror(&xep);
+
+			/*
+			 * Set record structure was allocated from RPC routine
+			 * getset so this structure is only of size
+			 * md_set_record even if the MN flag is set.  So,
+			 * clear the flag so that the free code doesn't
+			 * attempt to free a structure the size of
+			 * md_mnset_record.
+			 */
+			sr->sr_flags &= ~MD_SR_MN;
+			free_sr(sr);
+		}
+		max_genid += 3;
+	}
+
+	/* level 3 */
+	if (rb_level > 2 && dd != NULL) {
+		md_replicalist_t	*rl;
+
+		for (rl = rlp; rl != NULL; rl = rl->rl_next) {
+			md_replica_t	*r = rl->rl_repp;
+
+			/*
+			 * This is not the first replica being added to the
+			 * diskset so call with ADDSIDENMS_BCAST.  If this
+			 * is a traditional diskset, the bcast flag is ignored
+			 * since traditional disksets don't use the rpc.mdcommd.
+			 */
+			if (meta_db_addsidenms(sp, r->r_namep, r->r_blkno,
+			    DB_ADDSIDENMS_BCAST, &xep))
+				mdclrerror(&xep);
+		}
+	}
+
+	/* level 4 */
+	if (rb_level > 3 && dd != NULL) {
+		int	nodeid_addsides = 0;
+		/*
+		 * Add the device names for the new sides into the namespace,
+		 * on all hosts not being deleted.
+		 */
+		if (MD_MNSET_DESC(sd)) {
+			nd = sd->sd_nodelist;
+			while (nd) {
+				/* Find a node that is not being deleted */
+				if (! strinlst(nd->nd_nodename, node_c,
+				    node_v)) {
+					nodeid_addsides = nd->nd_nodeid;
+					break;
+				}
+				nd = nd->nd_next;
+			}
+		} else {
+			for (j = 0; j < MD_MAXSIDES; j++) {
+				/* Skip empty slots */
+				if (sd->sd_nodes[j][0] == '\0')
+					continue;
+
+				/* Find a node that is not being deleted */
+				if (! strinlst(sd->sd_nodes[j], node_c,
+				    node_v))
+					break;
+			}
+			nodeid_addsides = j;
+		}
+
+		if (MD_MNSET_DESC(sd)) {
+			nd = sd->sd_nodelist;
+			while (nd) {
+				/* Skip nodes not being deleted */
+				if (!strinlst(nd->nd_nodename, node_c,
+				    node_v)) {
+					nd = nd->nd_next;
+					continue;
+				}
+
+				/* this side was just created, add the names */
+				if (add_md_sidenms(sp, nd->nd_nodeid,
+				    nodeid_addsides, &xep))
+					mdclrerror(&xep);
+				nd = nd->nd_next;
+			}
+		} else {
+			for (i = 0; i < MD_MAXSIDES; i++) {
+				/* Skip empty slots */
+				if (sd->sd_nodes[i][0] == '\0')
+					continue;
+
+				/* Skip nodes not being deleted */
+				if (!strinlst(sd->sd_nodes[i], node_c, node_v))
+					continue;
+
+				/* this side was just created, add the names */
+				if (add_md_sidenms(sp, i, nodeid_addsides,
+				    &xep))
+					mdclrerror(&xep);
+			}
+		}
+	}
+
+	/* level 1 */
+	if (rb_level > 0) {
+		max_genid++;
+		resync_genid(sp, sd, max_genid, node_c, node_v);
+	}
+
+	/* level 0 */
+	cl_sk = cl_get_setkey(sp->setno, sp->setname);
+	if (MD_MNSET_DESC(sd)) {
+		nd = sd->sd_nodelist;
+		while (nd) {
+			if (!(nd->nd_flags & MD_MN_NODE_ALIVE))
+				continue;
+			/* To balance lock/unlock; can send to dead node */
+			if (clnt_unlock_set(nd->nd_nodename, cl_sk, &xep))
+				mdclrerror(&xep);
+			nd = nd->nd_next;
+		}
+	} else {
+		for (i = 0; i < MD_MAXSIDES; i++) {
+			/* Skip empty slots */
+			if (sd->sd_nodes[i][0] == '\0')
+				continue;
+
+			if (clnt_unlock_set(sd->sd_nodes[i], cl_sk, &xep))
+				mdclrerror(&xep);
+		}
+	}
+	cl_set_setkey(NULL);
+
+	/* release signals back to what they were on entry */
+	if (procsigs(FALSE, &oldsigs, &xep) < 0)
+		mdclrerror(&xep);
+
+	metafreereplicalist(rlp);
+
+	if (!(MD_MNSET_DESC(sd))) {
+		md_rb_sig_handling_off(md_got_sig(), md_which_sig());
+	}
+
+	return (-1);
+}
+
+static int
+make_sideno_sidenm(
+	mdsetname_t	*sp,
+	mddrivename_t	*dnp,
+	side_t		sideno,
+	md_error_t	*ep
+)
+{
+	mdsidenames_t	*sn, **sn_next;
+	md_set_desc	*sd;
+	mdname_t	*np;
+	uint_t		rep_slice;
+	int		err = 0;
+
+	assert(dnp->side_names_key != MD_KEYWILD);
+
+	if ((sd = metaget_setdesc(sp, ep)) == NULL)
+		return (-1);
+
+	/* find the end of the link list */
+	for (sn = dnp->side_names; sn->next != NULL; sn = sn->next);
+	sn_next = &sn->next;
+
+	if (meta_replicaslice(dnp, &rep_slice, ep) != 0)
+		return (-1);
+
+	if ((np = metaslicename(dnp, rep_slice, ep)) == NULL)
+		return (-1);
+
+	sn = Zalloc(sizeof (*sn));
+	sn->sideno = sideno;
+
+	if (MD_MNSET_DESC(sd)) {
+		/*
+		 * For MO diskset the sideno is not an index into
+		 * the array of nodes.  Hence getside_devinfo is
+		 * used instead of meta_getnextside_devinfo.
+		 */
+		if (meta_getside_devinfo(sp, np->bname, sideno, &sn->cname,
+			&sn->dname, &sn->mnum, ep) == -1)
+			err = -1;
+	} else {
+		/* decrement sideno, to look like the previous sideno */
+		sideno--;
+		if (meta_getnextside_devinfo(sp, np->bname, &sideno, &sn->cname,
+			&sn->dname, &sn->mnum, ep) == -1)
+			err = -1;
+	}
+
+	if (err) {
+		Free(sn);
+		return (err);
+	}
+	assert(sn->sideno == sideno);
+
+	/* Add to the end of the linked list */
+	*sn_next = sn;
+	return (0);
+}
+
+static int
+validate_nodes(
+	mdsetname_t	*sp,
+	int		node_c,
+	char		**node_v,
+	md_error_t	*ep
+)
+{
+	char		*hostname;
+	int		i;
+
+
+	for (i = 0; i < node_c; i++) {
+		if (strlen(node_v[i]) > (size_t)MD_MAX_NODENAME)
+			return (mddserror(ep, MDE_DS_NODENAMETOOLONG,
+			    sp->setno, node_v[i], NULL, sp->setname));
+		if (clnt_hostname(node_v[i], &hostname, ep))
+			return (-1);
+		if (strcmp(node_v[i], hostname) != 0) {
+			Free(hostname);
+			return (mddserror(ep, MDE_DS_NOTNODENAME, sp->setno,
+			    node_v[i], NULL, sp->setname));
+		}
+		Free(hostname);
+	}
+	return (0);
+}
+
+/*
+ * Exported Entry Points
+ */
+
+/*
+ * Check the given disk set name for syntactic correctness.
+ */
+int
+meta_set_checkname(char *setname, md_error_t *ep)
+{
+	char	*cp;
+
+	if (strlen(setname) > (size_t)MD_MAX_SETNAME)
+		return (mddserror(ep, MDE_DS_SETNAMETOOLONG,
+		    MD_SET_BAD, NULL, NULL, setname));
+
+	for (cp = setname; *cp; cp++)
+		if (!isprint(*cp) || strchr(INVALID_IN_NAMES, *cp) != NULL)
+			return (mddserror(ep, MDE_DS_INVALIDSETNAME,
+			    MD_SET_BAD, NULL, NULL, setname));
+	return (0);
+}
+
+/*
+ * Add host(s) to the multi-node diskset provided in sp.
+ * 	- create set if non-existent.
+ */
+static int
+meta_multinode_set_addhosts(
+	mdsetname_t	*sp,
+	int		multi_node,
+	int		node_c,
+	char		**node_v,
+	int		auto_take,
+	md_error_t	*ep
+)
+{
+	md_set_desc			*sd;
+	md_drive_desc			*dd, *p;
+	int				rval = 0;
+	int				bool;
+	int				nodeindex;
+	int 				i;
+	int				has_set;
+	sigset_t			oldsigs;
+	md_setkey_t			*cl_sk;
+	int				rb_level = 0;
+	md_error_t			xep = mdnullerror;
+	md_mnnode_desc			*nd, *nd_curr, *nd_prev;
+	md_timeval32_t			now;
+	int				nodecnt;
+	mndiskset_membershiplist_t	*nl, *nl2;
+	int				suspendall_flag = 0;
+	int				suspend1_flag = 0;
+	int				lock_flag = 0;
+	int				stale_flag = 0;
+	md_mnnode_desc			*saved_nd_next;
+	int				remote_sets_created = 0;
+
+	/*
+	 * Check membershiplist first.  If there's
+	 * an error, fail to create set and pass back error.
+	 */
+	if (meta_read_nodelist(&nodecnt, &nl, ep) == -1) {
+		return (-1);
+	}
+	/* Verify that all nodes are in member list */
+	for (i = 0; i < node_c; i++) {
+		/*
+		 * If node in list isn't a member of the membership,
+		 * just return error.
+		 */
+		if (meta_is_member(node_v[i], NULL, nl) == 0) {
+			meta_free_nodelist(nl);
+			return (mddserror(ep, MDE_DS_NOTINMEMBERLIST,
+			    sp->setno, node_v[i], NULL, sp->setname));
+		}
+	}
+	/*
+	 * Node list is needed later, but there is a lot of error
+	 * checking and possible failures between here and there, so
+	 * just re-get the list later if there are no errors.
+	 */
+	meta_free_nodelist(nl);
+	nl = NULL;
+
+	/*
+	 * Verify that list of nodes being added contains no
+	 * duplicates.
+	 */
+	if (nodesuniq(sp, node_c, node_v, ep))
+		return (-1);
+
+	/*
+	 * Verify that each node being added thinks that its nodename
+	 * is the same as the nodename given.
+	 */
+	if (validate_nodes(sp, node_c, node_v, ep))
+		return (-1);
+
+	if ((sd = metaget_setdesc(sp, ep)) == NULL) {
+		if (! mdiserror(ep, MDE_NO_SET))
+			return (-1);
+		mdclrerror(ep);
+		return (create_set(sp, multi_node, node_c, node_v, auto_take,
+		    ep));
+	} else {
+		/*
+		 * If this node and another node were both attempting to
+		 * create the same setname at the same time, and the other
+		 * node has just created the set on this node then sd would
+		 * be non-NULL, but sp->setno would be null (setno is filled
+		 * in by the create_set). If this is true, then fail since
+		 * the other node has already won this race.
+		 */
+		if (sp->setno == NULL) {
+			return (mddserror(ep, MDE_DS_NODEINSET,
+			    NULL, mynode(), NULL, sp->setname));
+		}
+	}
+
+	/* The auto_take behavior is inconsistent with multiple hosts. */
+	if (auto_take || sd->sd_flags & MD_SR_AUTO_TAKE) {
+		(void) mddserror(ep, MDE_DS_SINGLEHOST, sp->setno, NULL, NULL,
+		    sp->setname);
+		return (-1);
+	}
+
+	/*
+	 * We already have the set.
+	 */
+
+	/* Make sure we own the set */
+	if (meta_check_ownership(sp, ep) != 0)
+		return (-1);
+
+	/*
+	 * The drive and node records are stored in the local mddbs of each
+	 * node in the diskset.  Each node's rpc.metad daemon reads in the set,
+	 * drive and node records from that node's local mddb and caches them
+	 * internally. Any process needing diskset information contacts its
+	 * local rpc.metad to get this information.  Since each node in the
+	 * diskset is independently reading the set information from its local
+	 * mddb, the set, drive and node records in the local mddbs must stay
+	 * in-sync, so that all nodes have a consistent view of the diskset.
+	 *
+	 * For a multinode diskset, explicitly verify that all nodes in the
+	 * diskset are ALIVE (i.e. are in the API membership list).  Otherwise,
+	 * fail this operation since all nodes must be ALIVE in order to add
+	 * the new node record to their local mddb.  If a panic of this node
+	 * leaves the local mddbs set, node and drive records out-of-sync, the
+	 * reconfig cycle will fix the local mddbs and force them back into
+	 * synchronization.
+	 */
+	nd = sd->sd_nodelist;
+	while (nd) {
+		if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+			return (mddserror(ep, MDE_DS_NOTINMEMBERLIST,
+			    sp->setno, nd->nd_nodename, NULL,
+			    sp->setname));
+		}
+		nd = nd->nd_next;
+	}
+
+	/*
+	 * Check if node is already in set.
+	 */
+	for (i = 0; i < node_c; i++) {
+		/* Is node already in set? */
+		nd = sd->sd_nodelist;
+		while (nd) {
+			if (strcmp(nd->nd_nodename, node_v[i]) == 0)
+				break;
+			nd = nd->nd_next;
+		}
+		if (nd) {
+			return (mddserror(ep, MDE_DS_NODEINSET,
+			    sp->setno, node_v[i], NULL,
+			    sp->setname));
+		}
+	}
+
+	/*
+	 * Lock the set on current set members.
+	 * Set locking done much earlier for MN diskset than for traditional
+	 * diskset since lock_set and SUSPEND are used to protect against
+	 * other meta* commands running on the other nodes.
+	 */
+	/* Make sure we are blocking all signals */
+	if (procsigs(TRUE, &oldsigs, &xep) < 0)
+		mdclrerror(&xep);
+
+	nd = sd->sd_nodelist;
+	/* All nodes are guaranteed to be ALIVE */
+	while (nd) {
+		if (clnt_lock_set(nd->nd_nodename, sp, ep)) {
+			rval = -1;
+			goto out;
+		}
+		lock_flag = 1;
+		nd = nd->nd_next;
+	}
+	/*
+	 * Lock out other meta* commands by suspending
+	 * class 1 messages across the diskset.
+	 */
+	nd = sd->sd_nodelist;
+	/* Send suspend to nodes in nodelist before addhosts call */
+	/* All nodes are guaranteed to be ALIVE */
+	while (nd) {
+		if (clnt_mdcommdctl(nd->nd_nodename,
+		    COMMDCTL_SUSPEND, sp, MD_MSG_CLASS1,
+		    MD_MSCF_NO_FLAGS, ep)) {
+			rval = -1;
+			goto out;
+		}
+		suspend1_flag = 1;
+		nd = nd->nd_next;
+	}
+
+	/* Lock the set on new set members */
+	for (i = 0; i < node_c; i++) {
+		/* Already verified to be alive */
+		if (clnt_lock_set(node_v[i], sp, ep)) {
+			rval = -1;
+			goto out;
+		}
+		lock_flag = 1;
+	}
+
+	/*
+	 * Perform the required checks for new hosts
+	 */
+	for (i = 0; i < node_c; i++) {
+		/* Make sure this set name is not used on the other hosts */
+		has_set = nodehasset(sp, node_v[i], NHS_N_EQ, ep);
+		if (has_set < 0) {
+			if (! mdiserror(ep, MDE_NO_SET)) {
+				rval = -1;
+				goto out;
+			}
+			/* Keep on truck'n */
+			mdclrerror(ep);
+		} else if (has_set) {
+			(void) mddserror(ep, MDE_DS_NODEHASSET, sp->setno,
+			    node_v[i], NULL, sp->setname);
+			rval = -1;
+			goto out;
+		}
+
+		if (clnt_setnumbusy(node_v[i], sp->setno, &bool, ep) == -1) {
+			rval = -1;
+			goto out;
+		}
+
+		if (bool == TRUE) {
+			(void) mddserror(ep, MDE_DS_SETNUMBUSY, sp->setno,
+			    node_v[i], NULL, sp->setname);
+			rval = -1;
+			goto out;
+		}
+
+		if (clnt_setnameok(node_v[i], sp, &bool, ep) == -1) {
+			rval = -1;
+			goto out;
+		}
+
+		if (bool == FALSE) {
+			(void) mddserror(ep, MDE_DS_SETNAMEBUSY, sp->setno,
+			    node_v[i], NULL, sp->setname);
+			rval = -1;
+			goto out;
+		}
+
+		if (check_setdrvs_againstnode(sp, node_v[i], ep)) {
+			rval = -1;
+			goto out;
+		}
+	}
+
+	/* Get drive descriptors for the set */
+	if ((dd = metaget_drivedesc(sp, MD_FULLNAME_ONLY, ep)) == NULL) {
+		if (! mdisok(ep)) {
+			rval = -1;
+			goto out;
+		}
+	}
+
+	/* END CHECK CODE */
+
+	RB_TEST(1, "addhosts", ep)
+
+	RB_PREEMPT;
+	rb_level = 1;	/* level 1 */
+
+	RB_TEST(2, "addhosts", ep)
+
+	/*
+	 * Create the set where needed
+	 */
+	if (create_set_on_hosts(sp, multi_node, node_c, node_v, 0, ep)) {
+		goto rollback;
+	}
+
+	/*
+	 * Send suspend to rpc.mdcommd on nodes where a set has been
+	 * created since rpc.mdcommd must now be running on the remote nodes.
+	 */
+	remote_sets_created = 1;
+	for (i = 0; i < node_c; i++) {
+		/*
+		 * Lock out other meta* commands by suspending
+		 * class 1 messages across the diskset.
+		 */
+		if (clnt_mdcommdctl(node_v[i],
+		    COMMDCTL_SUSPEND, sp, MD_MSG_CLASS1,
+		    MD_MSCF_NO_FLAGS, ep)) {
+			rval = -1;
+			goto rollback;
+		}
+	}
+
+	/*
+	 * Merge the new entries into the set with the existing sides.
+	 * Get membershiplist from API routine.  If there's
+	 * an error, fail to create set and pass back error.
+	 */
+	if (meta_read_nodelist(&nodecnt, &nl, ep) == -1) {
+		goto rollback;
+	}
+	if (meta_gettimeofday(&now) == -1) {
+		meta_free_nodelist(nl);
+		(void) mdsyserror(ep, errno,
+		    dgettext(TEXT_DOMAIN, "meta_gettimeofday()"));
+		goto rollback;
+	}
+	for (nodeindex = 0; nodeindex < node_c; nodeindex++) {
+		nd = Zalloc(sizeof (*nd));
+		(void) strcpy(nd->nd_nodename, node_v[nodeindex]);
+		nd->nd_ctime = now;
+		nl2 = nl;
+		while (nl2) {
+		    if (strcmp(nl2->msl_node_name,
+			node_v[nodeindex]) == 0) {
+			    nd->nd_nodeid = nl2->msl_node_id;
+			    (void) strcpy(nd->nd_priv_ic,
+				nl2->msl_node_addr);
+			    break;
+		    }
+		    nl2 = nl2->next;
+		}
+
+		/*
+		 * Nodelist must be kept in ascending nodeid order.
+		 */
+		if (sd->sd_nodelist == NULL) {
+			/* Nothing in list, just add it */
+			sd->sd_nodelist = nd;
+		} else if (nd->nd_nodeid <
+		    sd->sd_nodelist->nd_nodeid) {
+			/* Add to head of list */
+			nd->nd_next = sd->sd_nodelist;
+			sd->sd_nodelist = nd;
+		} else {
+			nd_curr = sd->sd_nodelist->nd_next;
+			nd_prev = sd->sd_nodelist;
+			/* Search for place to add it */
+			while (nd_curr) {
+				if (nd->nd_nodeid < nd_curr->nd_nodeid) {
+					/* Add before nd_curr */
+					nd->nd_next = nd_curr;
+					nd_prev->nd_next = nd;
+					break;
+				}
+				nd_prev = nd_curr;
+				nd_curr = nd_curr->nd_next;
+			}
+			/* Add to end of list */
+			if (nd_curr == NULL) {
+				nd_prev->nd_next = nd;
+			}
+
+		}
+		/* Node already verified to be in membership */
+		nd->nd_flags |= MD_MN_NODE_ALIVE;
+	}
+	meta_free_nodelist(nl);
+
+	/* If we have drives */
+	if (dd != NULL) {
+		/*
+		 * For all the hosts being added, create a sidename structure
+		 */
+		nd = sd->sd_nodelist;
+		while (nd) {
+			/* Skip nodes not being added */
+			if (!strinlst(nd->nd_nodename, node_c, node_v)) {
+				nd = nd->nd_next;
+				continue;
+			}
+			for (p = dd; p != NULL; p = p->dd_next) {
+				if (make_sideno_sidenm(sp, p->dd_dnp,
+				    nd->nd_nodeid, ep) != 0)
+					goto rollback;
+			}
+			nd = nd->nd_next;
+		}
+
+		RB_PREEMPT;
+		rb_level = 2;   /* level 2 */
+
+		RB_TEST(4, "addhosts", ep)
+
+		/*
+		 * Add the new sidename for each drive to all the hosts
+		 *
+		 * If a multi-node diskset, each host only stores
+		 * the side information for itself.  So, only send
+		 * side information to the new hosts where each host
+		 * will add the appropriate side information to its
+		 * local mddb.
+		 */
+		nd = sd->sd_nodelist;
+		while (nd) {
+			/* Skip nodes not being added */
+			if (!strinlst(nd->nd_nodename, node_c,
+			    node_v)) {
+				nd = nd->nd_next;
+				continue;
+			}
+
+			/* Add side info to new hosts */
+			if (clnt_add_drv_sidenms(nd->nd_nodename,
+			    mynode(), sp, sd, node_c, node_v, ep))
+				goto rollback;
+
+			nd = nd->nd_next;
+		}
+
+		RB_TEST(5, "addhosts", ep)
+
+		RB_PREEMPT;
+		rb_level = 3;	/* level 3 */
+
+		RB_TEST(6, "addhosts", ep)
+
+		/*
+		 * Add the device names for the new sides into the namespace
+		 * for all hosts being added.  This is adding the side
+		 * names to the diskset's mddb so add sidenames for all
+		 * of the new hosts.
+		 */
+		nd = sd->sd_nodelist;
+		while (nd) {
+			/* Skip nodes not being added */
+			if (!strinlst(nd->nd_nodename, node_c, node_v)) {
+				nd = nd->nd_next;
+				continue;
+			}
+
+			/* this side was just created, add the names */
+			if (add_md_sidenms(sp, nd->nd_nodeid,
+			    MD_SIDEWILD, ep))
+				goto rollback;
+
+			nd = nd->nd_next;
+		}
+
+		RB_TEST(7, "addhosts", ep)
+
+		RB_PREEMPT;
+		rb_level = 4;   /* level 4 */
+
+		RB_TEST(8, "addhosts", ep)
+
+		if (add_db_sidenms(sp, ep))
+			goto rollback;
+
+	} else {
+		RB_PREEMPT;
+		rb_level = 4;
+	}
+
+	RB_TEST(9, "addhosts", ep)
+
+	RB_PREEMPT;
+	rb_level = 5;	/* level 5 */
+
+	RB_TEST(10, "addhosts", ep)
+
+	if (dd != NULL) {
+		/*
+		 * Notify rpc.mdcommd on all nodes of a nodelist change.
+		 * Start by suspending rpc.mdcommd (which drains it of all
+		 * messages), then change the nodelist followed by a reinit
+		 * and resume.
+		 */
+		nd = sd->sd_nodelist;
+		/* Send suspend_all to nodes in nodelist (existing + new) */
+		/* All nodes are guaranteed to be ALIVE */
+		while (nd) {
+			if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_SUSPEND,
+			    sp, MD_MSG_CLASS0, MD_MSCF_NO_FLAGS, ep)) {
+				rval = -1;
+				goto rollback;
+			}
+			suspendall_flag = 1;
+			nd = nd->nd_next;
+		}
+	}
+
+	/* Add the node(s) to the each host that is currently in the set */
+	nd = sd->sd_nodelist;
+	/* All nodes are guaranteed to be ALIVE */
+	while (nd) {
+		if (clnt_addhosts(nd->nd_nodename, sp, node_c, node_v, ep)) {
+			goto rollback;
+		}
+		nd = nd->nd_next;
+	}
+
+	RB_TEST(11, "addhosts", ep)
+
+	if (dd != NULL) {
+		/*
+		 * Mark the drives MD_DR_OK.
+		 */
+		nd = sd->sd_nodelist;
+		/* All nodes are guaranteed to be ALIVE */
+		while (nd) {
+			if (clnt_upd_dr_flags(nd->nd_nodename, sp, dd,
+			    MD_DR_OK, ep) == -1)
+				goto rollback;
+			nd = nd->nd_next;
+		}
+	}
+
+	RB_TEST(12, "addhosts", ep)
+
+	RB_PREEMPT;
+	rb_level = 6;   /* level 6 */
+
+	RB_TEST(13, "addhosts", ep)
+
+
+	/* Add the mediator information to all hosts in the set. */
+	nd = sd->sd_nodelist;
+	/* All nodes are guaranteed to be ALIVE */
+	while (nd) {
+		if (clnt_updmeds(nd->nd_nodename, sp, &sd->sd_med, ep))
+			goto rollback;
+		nd = nd->nd_next;
+	}
+
+	RB_TEST(14, "addhosts", ep)
+
+	/*
+	 * If a MN diskset and there are drives in the set,
+	 * set the master on the new nodes and
+	 * automatically join the new nodes into the set.
+	 */
+	if (dd != NULL) {
+		mddb_config_t   c;
+		/*
+		 * Is current set STALE?
+		 */
+		(void) memset(&c, 0, sizeof (c));
+		c.c_id = 0;
+		c.c_setno = sp->setno;
+		if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0) {
+			(void) mdstealerror(ep, &c.c_mde);
+			rval = -1;
+			goto out;
+		}
+		if (c.c_flags & MDDB_C_STALE) {
+			stale_flag = MNSET_IS_STALE;
+		}
+
+		/* Set master on newly added nodes */
+		for (i = 0; i < node_c; i++) {
+			if (clnt_mnsetmaster(node_v[i], sp,
+			    sd->sd_mn_master_nodenm,
+			    sd->sd_mn_master_nodeid, ep)) {
+				goto rollback;
+			}
+		}
+		/* Join newly added nodes to diskset and set OWN flag */
+		for (i = 0; i < node_c; i++) {
+			if (clnt_joinset(node_v[i], sp, stale_flag, ep))
+				goto rollback;
+			nd = sd->sd_nodelist;
+			while (nd) {
+				if (strcmp(nd->nd_nodename, node_v[i]) == 0) {
+					nd->nd_flags |= MD_MN_NODE_OWN;
+					/*
+					 * Also set ADD flag since this flag
+					 * is already set in rpc.metad - it's
+					 * just not in the local copy.
+					 * Could flush local cache and call
+					 * metaget_setdesc, but this just
+					 * adds time.  Since this node knows
+					 * the state of the node flags in
+					 * rpc.metad, just set the ADD
+					 * flag and save time.
+					 */
+					nd->nd_flags |= MD_MN_NODE_ADD;
+					break;
+				}
+				nd = nd->nd_next;
+			}
+		}
+
+		/* Send new node flag list to all Owner nodes */
+		nd = sd->sd_nodelist;
+		while (nd) {
+			if (!(nd->nd_flags & MD_MN_NODE_OWN)) {
+				nd = nd->nd_next;
+				continue;
+			}
+			/*
+			 * Will effectively set OWN flag in records kept
+			 * cached in rpc.metad.  The ADD flag would have
+			 * already been set by the call to clnt_addhosts.
+			 */
+			if (clnt_upd_nr_flags(nd->nd_nodename, sp,
+			    sd->sd_nodelist, MD_NR_SET, NULL, ep)) {
+				goto rollback;
+			}
+			nd = nd->nd_next;
+		}
+	}
+
+	/*
+	 * Mark the set record MD_SR_OK
+	 */
+	nd = sd->sd_nodelist;
+	/* All nodes are guaranteed to be ALIVE */
+	while (nd) {
+		if (clnt_upd_sr_flags(nd->nd_nodename, sp, MD_SR_OK,
+		    ep)) {
+			goto rollback;
+		}
+		nd = nd->nd_next;
+	}
+
+	/*
+	 * For MN diskset:
+	 * On each newly added node, set the node record for that node
+	 * to OK.  Then set all node records for the newly added
+	 * nodes on all nodes to ok.
+	 *
+	 * By setting a node's own node record to ok first, even if
+	 * the node adding the hosts panics, the rest of the nodes can
+	 * determine the same node list during the choosing of the master
+	 * during reconfig.  So, only nodes considered for mastership
+	 * are nodes that have both MD_MN_NODE_OK and MD_SR_OK set
+	 * on that node's rpc.metad.  If all nodes have MD_SR_OK set,
+	 * but no node has its own MD_MN_NODE_OK set, then the set will
+	 * be removed during reconfig since a panic occurred during the
+	 * creation of the initial diskset.
+	 */
+
+	for (i = 0; i < node_c; i++) {
+		nd = sd->sd_nodelist;
+		/* All nodes are guaranteed to be ALIVE */
+		while (nd) {
+			if (strcmp(nd->nd_nodename, node_v[i]) == 0)
+				break;
+			nd = nd->nd_next;
+		}
+		/* Something wrong, will pick this up in next loop */
+		if (nd == NULL)
+			continue;
+
+		/* Only changing my local cache of node list */
+		saved_nd_next = nd->nd_next;
+		nd->nd_next = NULL;
+
+		/* Set node record for added host to ok on that host */
+		if (clnt_upd_nr_flags(node_v[i], sp,
+		    nd, MD_NR_OK, NULL, ep)) {
+			nd->nd_next = saved_nd_next;
+			goto rollback;
+		}
+		nd->nd_next = saved_nd_next;
+	}
+
+	/* Now set all node records on all nodes to be ok */
+	nd = sd->sd_nodelist;
+	/* All nodes are guaranteed to be ALIVE */
+	while (nd) {
+		if (clnt_upd_nr_flags(nd->nd_nodename, sp,
+		    sd->sd_nodelist, MD_NR_OK, NULL, ep)) {
+			goto rollback;
+		}
+		nd = nd->nd_next;
+	}
+
+	RB_TEST(15, "addhosts", ep)
+out:
+	/*
+	 * Notify rpc.mdcommd on all nodes of a nodelist change.
+	 * Send reinit command to mdcommd which forces it to get
+	 * fresh set description.  Then send resume.
+	 * Resume on class 0 will resume all classes, so can skip
+	 * doing an explicit resume of class1 (ignore suspend1_flag).
+	 */
+	if (suspendall_flag) {
+		/*
+		 * Don't know if nodelist contains the nodes being added
+		 * or not, so do reinit to nodes not being added (by skipping
+		 * any nodes in the nodelist being added) and then do
+		 * reinit to nodes being added if remote_sets_created is 1.
+		 */
+		nd = sd->sd_nodelist;
+		/* All nodes are guaranteed to be ALIVE */
+		while (nd) {
+			/* Skip nodes being added - handled later */
+			if (strinlst(nd->nd_nodename, node_c, node_v)) {
+				nd = nd->nd_next;
+				continue;
+			}
+			/* Class is ignored for REINIT */
+			if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_REINIT,
+			    sp, NULL, MD_MSCF_NO_FLAGS, &xep)) {
+				if (rval == 0)
+					(void) mdstealerror(ep, &xep);
+				rval = -1;
+				mde_perror(ep, dgettext(TEXT_DOMAIN,
+				    "Unable to reinit rpc.mdcommd.\n"));
+			}
+			nd = nd->nd_next;
+		}
+		/*
+		 * Send reinit to added nodes that had a set created since
+		 * rpc.mdcommd is running on the nodes with a set.
+		 */
+		if (remote_sets_created == 1) {
+		    for (i = 0; i < node_c; i++) {
+			if (clnt_mdcommdctl(node_v[i], COMMDCTL_REINIT,
+			    sp, NULL, MD_MSCF_NO_FLAGS, &xep)) {
+				if (rval == 0)
+					(void) mdstealerror(ep, &xep);
+				rval = -1;
+				mde_perror(ep, dgettext(TEXT_DOMAIN,
+				    "Unable to reinit rpc.mdcommd.\n"));
+			}
+		    }
+		}
+	}
+	if ((suspend1_flag) || (suspendall_flag)) {
+		/*
+		 * Unlock diskset by resuming messages across the diskset.
+		 * Just resume all classes so that resume is the same whether
+		 * just one class was locked or all classes were locked.
+		 *
+		 * Don't know if nodelist contains the nodes being added
+		 * or not, so do resume_all to nodes not being added (by
+		 * skipping any nodes in the nodelist being added) and then do
+		 * resume_all to nodes being added if remote_sets_created is 1.
+		 */
+		nd = sd->sd_nodelist;
+		/* All nodes are guaranteed to be ALIVE */
+		while (nd) {
+			/* Skip nodes being added - handled later */
+			if (strinlst(nd->nd_nodename, node_c, node_v)) {
+				nd = nd->nd_next;
+				continue;
+			}
+			if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_RESUME,
+			    sp, MD_MSG_CLASS0, MD_MSCF_NO_FLAGS, &xep)) {
+				if (rval == 0)
+					(void) mdstealerror(ep, &xep);
+				rval = -1;
+				mde_perror(ep, dgettext(TEXT_DOMAIN,
+				    "Unable to resume rpc.mdcommd.\n"));
+			}
+			nd = nd->nd_next;
+		}
+		/*
+		 * Send resume to added nodes that had a set created since
+		 * rpc.mdcommd is be running on the nodes with a set.
+		 */
+		if (remote_sets_created == 1) {
+		    for (i = 0; i < node_c; i++) {
+			/* Already verified to be alive */
+			if (clnt_mdcommdctl(node_v[i], COMMDCTL_RESUME,
+			    sp, MD_MSG_CLASS0, MD_MSCF_NO_FLAGS, &xep)) {
+				if (rval == 0)
+					(void) mdstealerror(ep, &xep);
+				rval = -1;
+				mde_perror(ep, dgettext(TEXT_DOMAIN,
+				    "Unable to resume rpc.mdcommd.\n"));
+			}
+		    }
+		}
+		meta_ping_mnset(sp->setno);
+		/*
+		 * Start a resync thread on the newly added nodes
+		 * if set is not stale. Also start a thread to update the
+		 * abr state of all soft partitions
+		 */
+		if (stale_flag != MNSET_IS_STALE) {
+			for (i = 0; i < node_c; i++) {
+				if (clnt_mn_mirror_resync_all(node_v[i],
+				    sp->setno, &xep)) {
+					if (rval == 0)
+						(void) mdstealerror(ep, &xep);
+					rval = -1;
+					mde_perror(ep, dgettext(TEXT_DOMAIN,
+					    "Unable to start resync "
+					    "thread.\n"));
+				}
+				if (clnt_mn_sp_update_abr(node_v[i],
+				    sp->setno, &xep)) {
+					if (rval == 0)
+						(void) mdstealerror(ep, &xep);
+					rval = -1;
+					mde_perror(ep, dgettext(TEXT_DOMAIN,
+					    "Unable to start sp update "
+					    "thread.\n"));
+				}
+			}
+		}
+	}
+	cl_sk = cl_get_setkey(sp->setno, sp->setname);
+	/*
+	 * Don't know if nodelist contains the nodes being added
+	 * or not, so do clnt_unlock_set to nodes not being added (by
+	 * skipping any nodes in the nodelist being added) and then do
+	 * clnt_unlock_set to nodes being added.
+	 */
+	if (lock_flag) {
+		nd = sd->sd_nodelist;
+		/* All nodes are guaranteed to be ALIVE */
+		while (nd) {
+			/* Skip hosts we get in the next loop */
+			if (strinlst(nd->nd_nodename, node_c, node_v)) {
+				nd = nd->nd_next;
+				continue;
+			}
+			if (clnt_unlock_set(nd->nd_nodename, cl_sk, &xep)) {
+				if (rval == 0)
+					(void) mdstealerror(ep, &xep);
+				rval = -1;
+			}
+			nd = nd->nd_next;
+		}
+		for (i = 0; i < node_c; i++) {
+			/* Already verified to be alive */
+			if (clnt_unlock_set(node_v[i], cl_sk, &xep)) {
+				if (rval == 0)
+					(void) mdstealerror(ep, &xep);
+				rval = -1;
+			}
+		}
+	}
+	cl_set_setkey(NULL);
+
+	metaflushsetname(sp);
+
+	/* release signals back to what they were on entry */
+	if (procsigs(FALSE, &oldsigs, &xep) < 0)
+		mdclrerror(&xep);
+
+	return (rval);
+
+rollback:
+	rval = -1;
+
+	/* level 6 */
+	if (rb_level > 5) {
+		/*
+		 * For each node being deleted, set DEL flag and
+		 * reset OK flag on that node first.
+		 * Until a node has turned off its own
+		 * rpc.metad's NODE_OK flag, that node could be
+		 * considered for master during a reconfig.
+		 */
+		for (i = 0; i < node_c; i++) {
+			nd = sd->sd_nodelist;
+			/* All nodes are guaranteed to be ALIVE */
+			while (nd) {
+				if (strcmp(nd->nd_nodename, node_v[i]) == 0)
+					break;
+				nd = nd->nd_next;
+			}
+			/* Something wrong, handle this in next loop */
+			if (nd == NULL)
+				continue;
+
+			/* Only changing my local cache of node list */
+			saved_nd_next = nd->nd_next;
+			nd->nd_next = NULL;
+
+			/* Set flags for del host to DEL on that host */
+			if (clnt_upd_nr_flags(node_v[i], sp,
+			    nd, MD_NR_DEL, NULL, &xep)) {
+				mdclrerror(&xep);
+			}
+			nd->nd_next = saved_nd_next;
+		}
+
+		for (i = 0; i < node_c; i++) {
+			if (dd != NULL) {
+				/* Reset master on newly added node */
+				if (clnt_mnsetmaster(node_v[i], sp, "",
+				    MD_MN_INVALID_NID, &xep))
+					mdclrerror(&xep);
+				/* Withdraw set on newly added node */
+				if (clnt_withdrawset(node_v[i], sp, &xep))
+					mdclrerror(&xep);
+			}
+			/*
+			 * Turn off owner flag in nodes to be deleted
+			 * if there are drives in the set.
+			 * Also, turn off NODE_OK and turn on NODE_DEL
+			 * for nodes to be deleted.
+			 * These flags are used to set the node
+			 * record flags in all nodes in the set.
+			 */
+			nd = sd->sd_nodelist;
+			while (nd) {
+				if (strcmp(nd->nd_nodename, node_v[i]) == 0) {
+					if (dd != NULL) {
+						nd->nd_flags &= ~MD_MN_NODE_OWN;
+					}
+					nd->nd_flags |= MD_MN_NODE_DEL;
+					nd->nd_flags &= ~MD_MN_NODE_OK;
+					break;
+				}
+				nd = nd->nd_next;
+			}
+		}
+
+		/*
+		 * Now, reset owner and set delete flags for the deleted
+		 * nodes on all nodes.
+		 */
+		nd = sd->sd_nodelist;
+		while (nd) {
+			if (clnt_upd_nr_flags(nd->nd_nodename, sp,
+			    sd->sd_nodelist, MD_NR_SET, NULL, &xep)) {
+				mdclrerror(&xep);
+			}
+			nd = nd->nd_next;
+		}
+
+		/*
+		 * On each node being deleted, set the set record
+		 * to be in DEL state.
+		 */
+		for (i = 0; i < node_c; i++) {
+			if (clnt_upd_sr_flags(node_v[i], sp, MD_SR_DEL, &xep)) {
+				mdclrerror(&xep);
+			}
+		}
+	}
+
+	/* level 5 */
+	if (rb_level > 4) {
+		nd = sd->sd_nodelist;
+		/* All nodes are guaranteed to be ALIVE */
+		while (nd) {
+			if (clnt_delhosts(nd->nd_nodename, sp, node_c,
+			    node_v, &xep) == -1)
+				mdclrerror(&xep);
+			nd = nd->nd_next;
+		}
+	}
+
+	/*
+	 * Notify rpc.mdcommd on all nodes of a nodelist change.
+	 * Send reinit command to mdcommd which forces it to get
+	 * fresh set description.  Then send resume.
+	 * Nodelist contains all nodes (existing + added).
+	 */
+	if (suspendall_flag) {
+		/* Send reinit */
+		nd = sd->sd_nodelist;
+		/* All nodes are guaranteed to be ALIVE */
+		/* Send reinit to nodes in nodelist before addhosts call */
+		while (nd) {
+			/*
+			 * Skip nodes being added if remote sets were not
+			 * created since rpc.mdcommd may not be running
+			 * on the remote nodes.
+			 */
+			if ((remote_sets_created == 0) &&
+			    (strinlst(nd->nd_nodename, node_c, node_v))) {
+				nd = nd->nd_next;
+				continue;
+			}
+			/* Class is ignored for REINIT */
+			if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_REINIT,
+			    sp, NULL, MD_MSCF_NO_FLAGS, &xep)) {
+				mde_perror(&xep, dgettext(TEXT_DOMAIN,
+				    "Unable to reinit rpc.mdcommd.\n"));
+				mdclrerror(&xep);
+			}
+			nd = nd->nd_next;
+		}
+
+		/* Send resume */
+		nd = sd->sd_nodelist;
+		/* All nodes are guaranteed to be ALIVE */
+		while (nd) {
+			/*
+			 * Skip nodes being added if remote sets were not
+			 * created since rpc.mdcommd may not be running
+			 * on the remote nodes.
+			 */
+			if ((remote_sets_created == 0) &&
+			    (strinlst(nd->nd_nodename, node_c, node_v))) {
+				nd = nd->nd_next;
+				continue;
+			}
+			/*
+			 * Resume all classes but class 1 so that lock is held
+			 * against meta* commands.
+			 * Send resume_all_but_1 to nodes in nodelist
+			 * before addhosts call.
+			 */
+			if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_RESUME,
+			    sp, MD_MSG_CLASS0, MD_MSCF_DONT_RESUME_CLASS1,
+			    &xep)) {
+				mde_perror(&xep, dgettext(TEXT_DOMAIN,
+				    "Unable to resume rpc.mdcommd.\n"));
+				mdclrerror(&xep);
+			}
+			nd = nd->nd_next;
+		}
+		meta_ping_mnset(sp->setno);
+	}
+
+	/* level 4 */
+	/* Nodelist may or may not contain nodes being added. */
+	if (rb_level > 3 && dd != NULL) {
+		nd = sd->sd_nodelist;
+		while (nd) {
+			/* Skip nodes not being added */
+			if (!strinlst(nd->nd_nodename, node_c, node_v)) {
+				nd = nd->nd_next;
+				continue;
+			}
+
+			if (del_db_sidenms(sp, nd->nd_nodeid, &xep))
+				mdclrerror(&xep);
+			nd = nd->nd_next;
+		}
+	}
+
+	/* level 3 */
+	/* Nodelist may or may not contain nodes being added. */
+	if (rb_level > 2 && dd != NULL) {
+		nd = sd->sd_nodelist;
+		while (nd) {
+			/* Skip nodes not being added */
+			if (!strinlst(nd->nd_nodename, node_c, node_v)) {
+				nd = nd->nd_next;
+				continue;
+			}
+
+			if (del_md_sidenms(sp, nd->nd_nodeid, &xep))
+				mdclrerror(&xep);
+			nd = nd->nd_next;
+		}
+	}
+
+	/* level 1 */
+	if (rb_level > 0) {
+		if (dd != NULL) {
+			/* delete the drive records */
+			for (i = 0; i < node_c; i++) {
+				if (clnt_deldrvs(node_v[i], sp, dd, &xep) == -1)
+					mdclrerror(&xep);
+			}
+		}
+
+		/* delete the set record */
+		for (i = 0; i < node_c; i++) {
+			if (clnt_delset(node_v[i], sp, &xep) == -1)
+				mdclrerror(&xep);
+		}
+	}
+
+	/* level 0 */
+	cl_sk = cl_get_setkey(sp->setno, sp->setname);
+	/* Don't test lock flag since guaranteed to be set if in rollback */
+	/* Nodelist may or may not contain nodes being added. */
+	/*
+	 * Unlock diskset by resuming messages across the diskset.
+	 * Just resume all classes so that resume is the same whether
+	 * just one class was locked or all classes were locked.
+	 */
+	if ((suspend1_flag) || (suspendall_flag)) {
+		/* All nodes are guaranteed to be ALIVE */
+		nd = sd->sd_nodelist;
+		while (nd) {
+			/*
+			 * Skip nodes being added since remote sets
+			 * were either created and then deleted or
+			 * were never created.  Either way - rpc.mdcommd
+			 * may not be running on the remote node.
+			 */
+			if (strinlst(nd->nd_nodename, node_c, node_v)) {
+				nd = nd->nd_next;
+				continue;
+			}
+			if (clnt_mdcommdctl(nd->nd_nodename,
+			    COMMDCTL_RESUME, sp, MD_MSG_CLASS0,
+			    MD_MSCF_NO_FLAGS, &xep)) {
+				mde_perror(&xep, dgettext(TEXT_DOMAIN,
+				    "Unable to resume rpc.mdcommd.\n"));
+				mdclrerror(&xep);
+			}
+			nd = nd->nd_next;
+		}
+		meta_ping_mnset(sp->setno);
+	}
+	nd = sd->sd_nodelist;
+	/* All nodes are guaranteed to be ALIVE */
+	while (nd) {
+		/* Skip hosts we get in the next loop */
+		if (strinlst(nd->nd_nodename, node_c, node_v)) {
+			nd = nd->nd_next;
+			continue;
+		}
+
+		if (clnt_unlock_set(nd->nd_nodename, cl_sk, &xep))
+			mdclrerror(&xep);
+		nd = nd->nd_next;
+	}
+
+	for (i = 0; i < node_c; i++)
+		if (clnt_unlock_set(node_v[i], cl_sk, &xep))
+			mdclrerror(&xep);
+	cl_set_setkey(NULL);
+
+	/* release signals back to what they were on entry */
+	if (procsigs(FALSE, &oldsigs, &xep) < 0)
+		mdclrerror(&xep);
+
+	metaflushsetname(sp);
+
+	return (rval);
+}
+
+/*
+ * Add host(s) to the traditional diskset provided in sp.
+ *	- create set if non-existent.
+ */
+static int
+meta_traditional_set_addhosts(
+	mdsetname_t	*sp,
+	int		multi_node,
+	int		node_c,
+	char		**node_v,
+	int		auto_take,
+	md_error_t	*ep
+)
+{
+	md_set_desc	*sd;
+	md_drive_desc	*dd, *p;
+	med_rec_t	medr;
+	med_rec_t	rb_medr;
+	int		rval = 0;
+	int		bool;
+	int		nodeindex;
+	int 		i;
+	int		has_set;
+	int		numsides;
+	sigset_t	oldsigs;
+	md_setkey_t	*cl_sk;
+	int		rb_level = 0;
+	md_error_t	xep = mdnullerror;
+	int		max_meds;
+
+	if (nodesuniq(sp, node_c, node_v, ep))
+		return (-1);
+
+	if (validate_nodes(sp, node_c, node_v, ep))
+		return (-1);
+
+	if ((sd = metaget_setdesc(sp, ep)) == NULL) {
+		if (! mdiserror(ep, MDE_NO_SET))
+			return (-1);
+		mdclrerror(ep);
+		return (create_set(sp, multi_node, node_c, node_v, auto_take,
+		    ep));
+	}
+
+	/* The auto_take behavior is inconsistent with multiple hosts. */
+	if (auto_take || sd->sd_flags & MD_SR_AUTO_TAKE) {
+		(void) mddserror(ep, MDE_DS_SINGLEHOST, sp->setno, NULL, NULL,
+		    sp->setname);
+		return (-1);
+	}
+
+	/*
+	 * We already have the set.
+	 */
+
+	/* Make sure we own the set */
+	if (meta_check_ownership(sp, ep) != 0)
+		return (-1);
+
+	/*
+	 * Perform the required checks for new hosts
+	 */
+	for (i = 0; i < node_c; i++) {
+		if (getnodeside(node_v[i], sd) != MD_SIDEWILD)
+			return (mddserror(ep, MDE_DS_NODEINSET, sp->setno,
+			    node_v[i], NULL, sp->setname));
+
+		/* Make sure this set name is not used on the other hosts */
+		has_set = nodehasset(sp, node_v[i], NHS_N_EQ, ep);
+		if (has_set < 0) {
+			if (! mdiserror(ep, MDE_NO_SET))
+				return (-1);
+			/* Keep on truck'n */
+			mdclrerror(ep);
+		} else if (has_set)
+			return (mddserror(ep, MDE_DS_NODEHASSET, sp->setno,
+			    node_v[i], NULL, sp->setname));
+
+		if (clnt_setnumbusy(node_v[i], sp->setno, &bool, ep) == -1)
+			return (-1);
+
+		if (bool == TRUE)
+			return (mddserror(ep, MDE_DS_SETNUMBUSY, sp->setno,
+			    node_v[i], NULL, sp->setname));
+
+		if (clnt_setnameok(node_v[i], sp, &bool, ep) == -1)
+			return (-1);
+
+		if (bool == FALSE)
+			return (mddserror(ep, MDE_DS_SETNAMEBUSY, sp->setno,
+			    node_v[i], NULL, sp->setname));
+
+		if (check_setdrvs_againstnode(sp, node_v[i], ep))
+			return (-1);
+	}
+
+	/* Count the number of occupied slots */
+	numsides = 0;
+	for (i = 0; i < MD_MAXSIDES; i++) {
+		/* Count occupied slots */
+		if (sd->sd_nodes[i][0] != '\0')
+			numsides++;
+	}
+
+	/* Make sure the we have space to add the new sides */
+	if ((numsides + node_c) > MD_MAXSIDES) {
+		(void) mddserror(ep, MDE_DS_SIDENUMNOTAVAIL, sp->setno, NULL,
+		    NULL, sp->setname);
+		return (-1);
+	}
+
+	/* Get drive descriptors for the set */
+	if ((dd = metaget_drivedesc(sp, MD_FULLNAME_ONLY, ep)) == NULL)
+		if (! mdisok(ep))
+			return (-1);
+
+	/* Setup the mediator record roll-back structure */
+	(void) memset(&rb_medr, '\0', sizeof (med_rec_t));
+	rb_medr.med_rec_mag = MED_REC_MAGIC;
+	rb_medr.med_rec_rev = MED_REC_REV;
+	rb_medr.med_rec_fl  = 0;
+	rb_medr.med_rec_sn  = sp->setno;
+	(void) strcpy(rb_medr.med_rec_snm, sp->setname);
+	for (i = 0; i < MD_MAXSIDES; i++)
+		(void) strcpy(rb_medr.med_rec_nodes[i], sd->sd_nodes[i]);
+	rb_medr.med_rec_meds = sd->sd_med;	/* structure assigment */
+	(void) memset(&rb_medr.med_rec_data, '\0', sizeof (med_data_t));
+	rb_medr.med_rec_foff = 0;
+	crcgen(&rb_medr, &rb_medr.med_rec_cks, sizeof (med_rec_t), NULL);
+
+	if ((max_meds = get_max_meds(ep)) == 0)
+		return (-1);
+
+	/* END CHECK CODE */
+
+	md_rb_sig_handling_on();
+
+	/* Lock the set on current set members */
+	for (i = 0; i < MD_MAXSIDES; i++) {
+		/* Skip empty slots */
+		if (sd->sd_nodes[i][0] == '\0')
+			continue;
+
+		if (clnt_lock_set(sd->sd_nodes[i], sp, ep)) {
+			rval = -1;
+			goto out;
+		}
+	}
+
+	/* Lock the set on new set members */
+	for (i = 0; i < node_c; i++) {
+		if (clnt_lock_set(node_v[i], sp, ep)) {
+			rval = -1;
+			goto out;
+		}
+	}
+
+	RB_TEST(1, "addhosts", ep)
+
+	RB_PREEMPT;
+	rb_level = 1;	/* level 1 */
+
+	RB_TEST(2, "addhosts", ep)
+
+	/*
+	 * Add the new hosts to the existing set record on the existing hosts
+	 */
+	for (i = 0; i < MD_MAXSIDES; i++) {
+		/* skip empty slots */
+		if (sd->sd_nodes[i][0] == '\0')
+			continue;
+
+		if (clnt_addhosts(sd->sd_nodes[i], sp, node_c, node_v, ep))
+			goto rollback;
+	}
+
+	RB_PREEMPT;
+	rb_level = 2;	/* level 2 */
+
+	RB_TEST(3, "addhosts", ep);
+
+	/* Merge the new entries into the set with the existing sides */
+	nodeindex = 0;
+	for (i = 0; i < MD_MAXSIDES; i++) {
+		/* Skip full slots */
+		if (sd->sd_nodes[i][0] != '\0')
+			continue;
+
+		(void) strcpy(sd->sd_nodes[i], node_v[nodeindex++]);
+		if (nodeindex == node_c)
+			break;
+	}
+
+	/* If we have drives */
+	if (dd != NULL) {
+		/*
+		 * For all the hosts being added, create a sidename structure
+		 */
+		for (i = 0; i < MD_MAXSIDES; i++) {
+			/* Skip empty slots */
+			if (sd->sd_nodes[i][0] == '\0')
+				continue;
+
+			/* Skip nodes not being added */
+			if (! strinlst(sd->sd_nodes[i], node_c, node_v))
+				continue;
+
+			for (p = dd; p != NULL; p = p->dd_next) {
+				if (make_sideno_sidenm(sp, p->dd_dnp, i,
+				    ep) != 0)
+					goto rollback;
+			}
+		}
+
+		/*
+		 * Add the new sidename for each drive to the existing hosts
+		 */
+		for (i = 0; i < MD_MAXSIDES; i++) {
+			/* Skip empty slots */
+			if (sd->sd_nodes[i][0] == '\0')
+				continue;
+
+			/* Skip nodes being added */
+			if (strinlst(sd->sd_nodes[i], node_c, node_v))
+				continue;
+
+			if (clnt_add_drv_sidenms(sd->sd_nodes[i], mynode(), sp,
+			    sd, node_c, node_v, ep)) {
+				goto rollback;
+			}
+		}
+
+		RB_TEST(4, "addhosts", ep)
+
+		RB_PREEMPT;
+		rb_level = 3;	/* level 3 */
+
+		RB_TEST(5, "addhosts", ep)
+
+		if (add_db_sidenms(sp, ep)) {
+			goto rollback;
+		}
+
+	} else {
+		RB_PREEMPT;
+		rb_level = 3;
+	}
+
+	RB_TEST(6, "addhosts", ep)
+
+	RB_PREEMPT;
+	rb_level = 4;	/* level 4 */
+
+	RB_TEST(7, "addhosts", ep)
+
+
+	/* create the set on the new nodes, this adds the drives as well */
+	if (create_set_on_hosts(sp, multi_node, node_c, node_v, 0, ep)) {
+		goto rollback;
+	}
+
+	RB_TEST(8, "addhosts", ep)
+
+	RB_PREEMPT;
+	rb_level = 5;	/* level 5 */
+
+	RB_TEST(9, "addhosts", ep)
+
+	if (dd != NULL) {
+
+		/*
+		 * Add the device entries for the new sides into the namespace.
+		 */
+		for (i = 0; i < MD_MAXSIDES; i++) {
+			/* Skip empty slots */
+			if (sd->sd_nodes[i][0] == '\0')
+				continue;
+
+			/* Skip nodes not being added */
+			if (! strinlst(sd->sd_nodes[i], node_c, node_v))
+				continue;
+
+			if (add_md_sidenms(sp, i, MD_SIDEWILD, ep))
+				goto rollback;
+		}
+	}
+
+	RB_TEST(10, "addhosts", ep)
+
+	RB_PREEMPT;
+	rb_level = 6;	/* level 6 */
+
+	RB_TEST(11, "addhosts", ep);
+
+	if (dd != NULL) {
+		/*
+		 * Mark the drives MD_DR_OK.
+		 */
+		for (i = 0; i < MD_MAXSIDES; i++) {
+			/* Skip empty slots */
+			if (sd->sd_nodes[i][0] == '\0')
+				continue;
+
+			if (clnt_upd_dr_flags(sd->sd_nodes[i], sp, dd,
+			    MD_DR_OK, ep) == -1) {
+				goto rollback;
+			}
+		}
+	}
+
+	RB_TEST(12, "addhosts", ep)
+
+	/* Bring the mediator record up to date with the set record */
+	medr = rb_medr;				/* structure assignment */
+	for (i = 0; i < MD_MAXSIDES; i++)
+		(void) strcpy(medr.med_rec_nodes[i], sd->sd_nodes[i]);
+	crcgen(&medr, &medr.med_rec_cks, sizeof (med_rec_t), NULL);
+
+	/* Inform the mediator hosts of the new node list */
+	for (i = 0; i < max_meds; i++) {
+		if (sd->sd_med.n_lst[i].a_cnt == 0)
+			continue;
+
+		if (clnt_med_upd_rec(&sd->sd_med.n_lst[i], sp, &medr, ep))
+			goto rollback;
+	}
+
+	/* Add the mediator information to all hosts in the set */
+	for (i = 0; i < MD_MAXSIDES; i++) {
+		/* Skip empty slots */
+		if (sd->sd_nodes[i][0] == '\0')
+			continue;
+
+		if (clnt_updmeds(sd->sd_nodes[i], sp, &sd->sd_med, ep))
+			goto rollback;
+	}
+
+	RB_TEST(13, "addhosts", ep)
+
+	/*
+	 * Mark the set record MD_SR_OK
+	 */
+	for (i = 0; i < MD_MAXSIDES; i++) {
+		/* Skip empty slots */
+		if (sd->sd_nodes[i][0] == '\0')
+			continue;
+
+		if (clnt_upd_sr_flags(sd->sd_nodes[i], sp, MD_SR_OK, ep))
+			goto rollback;
+	}
+
+	RB_TEST(14, "addhosts", ep)
+
+out:
+	cl_sk = cl_get_setkey(sp->setno, sp->setname);
+	for (i = 0; i < MD_MAXSIDES; i++) {
+		/* Skip empty slots */
+		if (sd->sd_nodes[i][0] == '\0')
+			continue;
+
+		/* Skip hosts we get in the next loop */
+		if (strinlst(sd->sd_nodes[i], node_c, node_v))
+			continue;
+
+		if (clnt_unlock_set(sd->sd_nodes[i], cl_sk, &xep)) {
+			if (rval == 0)
+				(void) mdstealerror(ep, &xep);
+			rval = -1;
+		}
+	}
+
+	if (rval == 0) {
+		for (i = 0; i < node_c; i++)
+			if (clnt_unlock_set(node_v[i], cl_sk, &xep)) {
+				if (rval == 0)
+					(void) mdstealerror(ep, &xep);
+				rval = -1;
+			}
+	}
+	cl_set_setkey(NULL);
+
+	metaflushsetname(sp);
+
+	md_rb_sig_handling_off(md_got_sig(), md_which_sig());
+
+	return (rval);
+
+rollback:
+	/* Make sure we are blocking all signals */
+	if (procsigs(TRUE, &oldsigs, &xep) < 0)
+		mdclrerror(&xep);
+
+	rval = -1;
+
+	/* level 6 */
+	if (rb_level > 5) {
+		for (i = 0; i < max_meds; i++) {
+			if (sd->sd_med.n_lst[i].a_cnt == 0)
+				continue;
+
+			if (clnt_med_upd_rec(&sd->sd_med.n_lst[i], sp,
+			    &rb_medr, &xep))
+				mdclrerror(&xep);
+		}
+		if (dd != NULL) {
+			for (i = 0; i < MD_MAXSIDES; i++) {
+				/* Skip empty slots */
+				if (sd->sd_nodes[i][0] == '\0')
+					continue;
+
+				/* Skip nodes not being added */
+				if (! strinlst(sd->sd_nodes[i], node_c, node_v))
+					continue;
+
+				if (del_md_sidenms(sp, i, &xep))
+					mdclrerror(&xep);
+			}
+		}
+	}
+
+	/* level 5 */
+	if (rb_level > 4) {
+		if (dd != NULL) {
+			/* delete the drive records */
+			for (i = 0; i < node_c; i++) {
+				if (clnt_deldrvs(node_v[i], sp, dd, &xep) == -1)
+					mdclrerror(&xep);
+			}
+		}
+		/* delete the set record on the 'new' hosts */
+		for (i = 0; i < node_c; i++) {
+			if (clnt_delset(node_v[i], sp, &xep) == -1)
+				mdclrerror(&xep);
+		}
+	}
+
+	/* level 4 */
+	if (rb_level > 3 && dd != NULL) {
+		for (i = 0; i < MD_MAXSIDES; i++) {
+			/* Skip empty slots */
+			if (sd->sd_nodes[i][0] == '\0')
+				continue;
+
+			/* Skip nodes not being added */
+			if (! strinlst(sd->sd_nodes[i], node_c, node_v))
+				continue;
+
+			if (del_db_sidenms(sp, i, &xep))
+				mdclrerror(&xep);
+		}
+	}
+
+	/* level 3 */
+	if (rb_level > 2 && dd != NULL) {
+		for (i = 0; i < MD_MAXSIDES; i++) {
+			/* Skip empty slots */
+			if (sd->sd_nodes[i][0] == '\0')
+				continue;
+
+			/* Skip nodes not being added */
+			if (! strinlst(sd->sd_nodes[i], node_c, node_v))
+				continue;
+
+			if (clnt_del_drv_sidenms(sd->sd_nodes[i], sp,
+			    &xep) == -1)
+				mdclrerror(&xep);
+		}
+	}
+
+	/* level 2 */
+	if (rb_level > 1) {
+		for (i = 0; i < MD_MAXSIDES; i++) {
+			/* Skip empty slots */
+			if (sd->sd_nodes[i][0] == '\0')
+				continue;
+
+			if (clnt_delhosts(sd->sd_nodes[i], sp, node_c, node_v,
+			    &xep) == -1)
+				mdclrerror(&xep);
+		}
+	}
+
+	/* level 1 */
+	if (rb_level > 0) {
+		cl_sk = cl_get_setkey(sp->setno, sp->setname);
+		for (i = 0; i < MD_MAXSIDES; i++) {
+			/* Skip empty slots */
+			if (sd->sd_nodes[i][0] == '\0')
+				continue;
+
+			/* Skip hosts we get in the next loop */
+			if (strinlst(sd->sd_nodes[i], node_c, node_v))
+				continue;
+
+			if (clnt_unlock_set(sd->sd_nodes[i], cl_sk, &xep))
+				mdclrerror(&xep);
+		}
+
+		for (i = 0; i < node_c; i++)
+			if (clnt_unlock_set(node_v[i], cl_sk, &xep))
+				mdclrerror(&xep);
+		cl_set_setkey(NULL);
+	}
+
+	/* release signals back to what they were on entry */
+	if (procsigs(FALSE, &oldsigs, &xep) < 0)
+		mdclrerror(&xep);
+
+	metaflushsetname(sp);
+
+	md_rb_sig_handling_off(md_got_sig(), md_which_sig());
+
+	return (rval);
+}
+
+/*
+ * Add host(s) to the diskset provided in sp.
+ * 	- create set if non-existent.
+ */
+int
+meta_set_addhosts(
+	mdsetname_t	*sp,
+	int		multi_node,
+	int		node_c,
+	char		**node_v,
+	int		auto_take,
+	md_error_t	*ep
+)
+{
+	if (multi_node)
+		return (meta_multinode_set_addhosts(sp, multi_node, node_c,
+		    node_v, auto_take, ep));
+	else
+		return (meta_traditional_set_addhosts(sp, multi_node, node_c,
+		    node_v, auto_take, ep));
+}
+
+/*
+ * Delete host(s) from the diskset provided in sp.
+ * 	- destroy set if last host in set is removed.
+ */
+int
+meta_set_deletehosts(
+	mdsetname_t		*sp,
+	int			node_c,
+	char			**node_v,
+	int			forceflg,
+	md_error_t		*ep
+)
+{
+	md_set_desc		*sd;
+	md_drive_desc		*dd;
+	med_rec_t		medr;
+	med_rec_t		rb_medr;
+	int			i, j;
+	int			has_set;
+	int			numsides = 0;
+	int			oha = FALSE;
+	sigset_t		oldsigs;
+	mhd_mhiargs_t		mhiargs;
+	md_replicalist_t	*rlp = NULL;
+	md_setkey_t		*cl_sk;
+	ulong_t			max_genid = 0;
+	int			rval = 0;
+	int			rb_level = 0;
+	int			max_meds = 0;
+	md_error_t		xep = mdnullerror;
+	md_mnnode_desc		*nd;
+	md_mnnode_record	*nr;
+	int			delete_master = 0;
+	int			suspendall_flag = 0, suspendall_flag_rb = 0;
+	int			suspend1_flag = 0;
+	int			lock_flag = 0;
+	int			stale_flag = 0;
+	int			*node_id_list = NULL;
+	int			remote_sets_deleted = 0;
+
+	if ((sd = metaget_setdesc(sp, ep)) == NULL)
+		return (-1);
+
+	/*
+	 * Verify that list of nodes being deleted contains no
+	 * duplicates.
+	 */
+	if (nodesuniq(sp, node_c, node_v, ep))
+		return (-1);
+
+	/* Make sure we own the set */
+	if (meta_check_ownership(sp, ep) != 0)
+		return (-1);
+
+	/*
+	 * The drive and node records are stored in the local mddbs of each
+	 * node in the diskset.  Each node's rpc.metad daemon reads in the set,
+	 * drive and node records from that node's local mddb and caches them
+	 * internally. Any process needing diskset information contacts its
+	 * local rpc.metad to get this information.  Since each node in the
+	 * diskset is independently reading the set information from its local
+	 * mddb, the set, drive and node records in the local mddbs must stay
+	 * in-sync, so that all nodes have a consistent view of the diskset.
+	 *
+	 * For a multinode diskset, explicitly verify that all nodes in the
+	 * diskset are ALIVE (i.e. are in the API membership list) if the
+	 * forceflag is FALSE.  (The case of forceflag being TRUE is handled
+	 * in OHA check above.)
+	 *
+	 * If forceflag is FALSE and a node in the diskset is not in
+	 * the membership list, then fail this operation since all nodes must
+	 * be ALIVE in order to delete the node record from their local mddb.
+	 * If a panic of this node leaves the local mddbs set, node and drive
+	 * records out-of-sync, the reconfig cycle will fix the local mddbs
+	 * and force them back into synchronization.
+	 */
+	if ((forceflg == FALSE) && (MD_MNSET_DESC(sd))) {
+		nd = sd->sd_nodelist;
+		while (nd) {
+			if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+				return (mddserror(ep, MDE_DS_NOTINMEMBERLIST,
+				    sp->setno, nd->nd_nodename,
+				    NULL, sp->setname));
+			}
+			nd = nd->nd_next;
+		}
+	}
+
+
+	/*
+	 * Lock the set on current set members.
+	 * Set locking done much earlier for MN diskset than for traditional
+	 * diskset since lock_set and SUSPEND are used to protect against
+	 * other meta* commands running on the other nodes.
+	 */
+	if (MD_MNSET_DESC(sd)) {
+		/* Make sure we are blocking all signals */
+		if (procsigs(TRUE, &oldsigs, &xep) < 0)
+			mdclrerror(&xep);
+
+		nd = sd->sd_nodelist;
+		while (nd) {
+			if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+				nd = nd->nd_next;
+				continue;
+			}
+
+			if (clnt_lock_set(nd->nd_nodename, sp, ep)) {
+				rval = -1;
+				goto out2;
+			}
+			lock_flag = 1;
+			nd = nd->nd_next;
+		}
+		/*
+		 * Lock out other meta* commands by suspending
+		 * class 1 messages across the diskset.
+		 */
+		nd = sd->sd_nodelist;
+		while (nd) {
+			if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+				nd = nd->nd_next;
+				continue;
+			}
+			if (clnt_mdcommdctl(nd->nd_nodename,
+			    COMMDCTL_SUSPEND, sp, MD_MSG_CLASS1,
+			    MD_MSCF_NO_FLAGS, ep)) {
+				rval = -1;
+				goto out2;
+			}
+			suspend1_flag = 1;
+			nd = nd->nd_next;
+		}
+	}
+
+	for (i = 0; i < node_c; i++)
+		if (getnodeside(node_v[i], sd) == MD_SIDEWILD) {
+			(void) mddserror(ep, MDE_DS_NODENOTINSET, sp->setno,
+			    node_v[i], NULL, sp->setname);
+			rval = -1;
+			goto out2;
+		}
+
+	/*
+	 * Count the number of nodes currently in the set.
+	 */
+	if (MD_MNSET_DESC(sd)) {
+		nd = sd->sd_nodelist;
+		while (nd) {
+			numsides++;
+			nd = nd->nd_next;
+		}
+	} else {
+		for (i = 0; i < MD_MAXSIDES; i++)
+			/* Count full slots */
+			if (sd->sd_nodes[i][0] != '\0')
+				numsides++;
+	}
+
+	/*
+	 * OHA mode == -f -h <hostname>
+	 * OHA is One Host Administration that occurs when the forceflag (-f)
+	 * is set and at least one host in the diskset isn't responding
+	 * to RPC requests.
+	 *
+	 * When in OHA mode, a node cannot delete itself from a diskset.
+	 * When in OHA mode, a node can delete a list of nodes from a diskset
+	 * even if some of the nodes in the diskset are unresponsive.
+	 *
+	 * For multinode diskset, only allow OHA mode when the nodes that
+	 * aren't responding in the diskset are not in the membership list
+	 * (i.e. nodes that aren't responding are not marked ALIVE).
+	 * Nodes that aren't in the membership list will be rejoining
+	 * the diskset through a reconfig cycle and the local mddb set
+	 * and node records can be reconciled during the reconfig cycle.
+	 *
+	 * If a node isn't responding, but is still in the membership list,
+	 * fail the request since the node may not be responding because
+	 * rpc.metad died and is restarting.  In this case, no reconfig
+	 * cycle will be started, so there's no way to recover if
+	 * the host delete operation was allowed.
+	 *
+	 * NOTE: if nodes that weren't in the membership when the OHA host
+	 * delete occurred are now the only nodes in membership list,
+	 * those nodes will see the old view of the diskset.  As soon as
+	 * a node re-enters the cluster that was present in the cluster
+	 * during the host deletion, the diskset will reflect the host
+	 * deletion on all nodes presently in the cluster.
+	 */
+	if (forceflg == TRUE) {
+		if (MD_MNSET_DESC(sd)) {
+			nd = sd->sd_nodelist;
+			while (nd) {
+				/*
+				 * If a node isn't ALIVE (in member list),
+				 * then allow a force-able delete in OHA mode.
+				 */
+				if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+					oha = TRUE;
+					break;
+				}
+				/*
+				 * Don't test for clnt_nullproc since already
+				 * tested the RPC connections by clnt_lock_set.
+				 */
+				nd = nd->nd_next;
+			}
+		} else {
+			for (i = 0; i < MD_MAXSIDES; i++) {
+				/* Skip empty slots */
+				if (sd->sd_nodes[i][0] == '\0')
+					continue;
+
+				if (clnt_nullproc(sd->sd_nodes[i], ep) == -1) {
+					/*
+					 * If we timeout to at least one
+					 * client, then we can allow OHA mode,
+					 * otherwise, we are in normal mode.
+					 */
+					if (mdanyrpcerror(ep)) {
+						mdclrerror(ep);
+						if (strinlst(sd->sd_nodes[i],
+						    node_c, node_v)) {
+							oha = TRUE;
+							break;
+						}
+					}
+				}
+			}
+		}
+	}
+
+	/*
+	 * Don't allow this for MN diskset since meta_set_destroy of 1 node
+	 * does NOT remove this node's node record from the other node's set
+	 * records in their local mddb.  This leaves a MN diskset in a very
+	 * messed up state.
+	 */
+	if (!(MD_MNSET_DESC(sd))) {
+		/* Destroy set */
+		if (forceflg == TRUE && node_c == 1 &&
+		    strcmp(mynode(), node_v[0]) == 0) {
+			/* Can return since !MN diskset so nothing to unlock */
+			return (meta_set_destroy(sp, TRUE, ep));
+		}
+	}
+
+
+	/*
+	 * In multinode diskset, can only delete self if this
+	 * is the last node in the set or if all nodes in
+	 * the set are being deleted.  The traditional diskset code
+	 * allows a node to delete itself (when there are other nodes
+	 * in the diskset) when using the force flag, but that code
+	 * path doesn't have the node remove itself from
+	 * the set node list on the other nodes.  Since this isn't
+	 * satisfactory for the multinode diskset, just don't
+	 * allow this operation.
+	 */
+	if (MD_MNSET_DESC(sd) && (numsides > 1) && (node_c != numsides) &&
+	    strinlst(mynode(), node_c, node_v)) {
+		(void) mddserror(ep, MDE_DS_MNCANTDELSELF, sp->setno,
+		    mynode(), NULL, sp->setname);
+		rval = -1;
+		goto out2;
+	}
+
+	/*
+	 * In multinode diskset, don't allow deletion of master node unless
+	 * this is the only node left or unless all nodes are being
+	 * deleted since there is no way to switch
+	 * master ownership (unless via a cluster reconfig cycle).
+	 */
+	delete_master = strinlst(sd->sd_mn_master_nodenm, node_c, node_v);
+	if (MD_MNSET_DESC(sd) && (numsides > 1) && (node_c != numsides) &&
+	    delete_master) {
+		(void) mddserror(ep, MDE_DS_CANTDELMASTER, sp->setno,
+		    sd->sd_mn_master_nodenm, NULL, sp->setname);
+		rval = -1;
+		goto out2;
+	}
+
+
+	/* Deleting self w/o forceflg */
+	if (forceflg == FALSE && numsides > 1 &&
+	    strinlst(mynode(), node_c, node_v)) {
+		(void) mddserror(ep, MDE_DS_CANTDELSELF, sp->setno,
+		    mynode(), NULL, sp->setname);
+		rval = -1;
+		goto out2;
+	}
+
+	/*
+	 * Setup the mediator record roll-back structure for a trad diskset.
+	 *
+	 * For a MN diskset, the deletion of a host in the diskset
+	 * does not cause an update of the mediator record.  If the
+	 * host deletion will cause the diskset to be removed (this is
+	 * the last host being removed or all hosts are being removed)
+	 * then the mediator record must have already been removed by the
+	 * user or this delete host operation will fail (a check for
+	 * this is done later in this routine).
+	 */
+	if (!(MD_MNSET_DESC(sd))) {
+		(void) memset(&rb_medr, '\0', sizeof (med_rec_t));
+		rb_medr.med_rec_mag = MED_REC_MAGIC;
+		rb_medr.med_rec_rev = MED_REC_REV;
+		rb_medr.med_rec_fl = 0;
+		rb_medr.med_rec_sn  = sp->setno;
+		(void) strcpy(rb_medr.med_rec_snm, sp->setname);
+		for (i = 0; i < MD_MAXSIDES; i++)
+		    (void) strcpy(rb_medr.med_rec_nodes[i], sd->sd_nodes[i]);
+		rb_medr.med_rec_meds = sd->sd_med;  /* structure assigment */
+		(void) memset(&rb_medr.med_rec_data, '\0', sizeof (med_data_t));
+		rb_medr.med_rec_foff = 0;
+		crcgen(&rb_medr, &rb_medr.med_rec_cks,
+		    sizeof (med_rec_t), NULL);
+
+		/* Bring the mediator record up to date with the set record */
+		medr = rb_medr;			/* structure assignment */
+
+		if ((max_meds = get_max_meds(ep)) == 0) {
+			rval = -1;
+			goto out2;
+		}
+	}
+
+	/*
+	 * For traditional diskset:
+	 * Check to see if all the hosts we are trying to delete the set from
+	 * have a set "setname" that is the same as ours, i.e. - same name,
+	 * same time stamp, same genid.  We only do this if forceflg is not
+	 * specified or we are in OHA mode.
+	 */
+	if (!(MD_MNSET_DESC(sd)) && (forceflg == FALSE || oha == TRUE)) {
+		int	fix_node_v = FALSE;
+		int	j;
+
+		for (i = 0; i < node_c; i++) {
+			/* We skip this side */
+			if (strcmp(mynode(), node_v[i]) == 0)
+				continue;
+
+			has_set = nodehasset(sp, node_v[i], NHS_NSTG_EQ, ep);
+
+			if (has_set < 0) {
+				char	 *anode[1];
+
+				/*
+				 * Can't talk to the host only allowed in OHA
+				 * mode.
+				 */
+				if (oha == TRUE && mdanyrpcerror(ep)) {
+					mdclrerror(ep);
+					continue;
+				}
+
+				/*
+				 * We got an error we do not, or are not,
+				 * prepared to handle.
+				 */
+				if (! mdiserror(ep, MDE_NO_SET) &&
+				    ! mdismddberror(ep, MDE_DB_NODB)) {
+					rval = -1;
+					goto out2;
+				}
+				mdclrerror(ep);
+
+				/*
+				 * If we got here: both hosts are up; a host in
+				 * our set record does not have the set. So we
+				 * delete the host from our set and invalidate
+				 * the node.
+				 */
+				anode[0] = Strdup(node_v[i]);
+
+				rval = del_host_noset(sp, anode, ep);
+
+				/*
+				 * If we delete a host, make sure the mediator
+				 * hosts are made aware of this.
+				 */
+				for (j = 0; j < MD_MAXSIDES; j++) {
+					if (strcmp(medr.med_rec_nodes[j],
+					    node_v[i]) != 0)
+						continue;
+					(void) memset(&medr.med_rec_nodes[j],
+					    '\0', sizeof (md_node_nm_t));
+				}
+				crcgen(&medr, &medr.med_rec_cks,
+				    sizeof (med_rec_t), NULL);
+
+				rb_medr = medr;		/* struct assignment */
+
+				Free(anode[0]);
+
+				if (rval == -1)
+					goto out2;
+
+				node_v[i][0] = '\0';
+				fix_node_v = TRUE;
+				continue;
+			}
+
+			/*
+			 * If we can talk to the host, and they do not have the
+			 * exact set, then we disallow the operation.
+			 */
+			if (has_set == FALSE) {
+				(void) mddserror(ep, MDE_DS_NODENOSET,
+				    sp->setno, node_v[i], NULL, sp->setname);
+				rval = -1;
+				goto out2;
+			}
+		}
+
+		/*
+		 * Here we prune the node_v's that were invalidated above.
+		 */
+		if (fix_node_v == TRUE) {
+			i = 0;
+			while (i < node_c) {
+				if (node_v[i][0] == '\0') {
+					for (j = i; (j + 1) < node_c; j++)
+						node_v[j] = node_v[j + 1];
+					node_c--;
+				}
+				i++;
+			}
+			/*
+			 * If we are left with no nodes, then we have
+			 * compeleted the operation.
+			 */
+			if (node_c == 0) {
+				/*
+				 * Inform the mediator hosts of the new node
+				 * list
+				 */
+				for (i = 0; i < max_meds; i++) {
+					if (sd->sd_med.n_lst[i].a_cnt == 0)
+						continue;
+
+					if (clnt_med_upd_rec(
+					    &sd->sd_med.n_lst[i], sp, &medr,
+					    ep))
+						mdclrerror(ep);
+				}
+				rval = 0;
+				goto out2;
+			}
+		}
+	}
+
+	/*
+	 * For multinode diskset:
+	 * If forceflag is FALSE then check to see if all the hosts we
+	 * are trying to delete the set from have a set "setname" that
+	 * is the same as ours, i.e. - same name, same time stamp, same genid.
+	 * If forceflag is TRUE, then we don't care if the hosts being
+	 * deleted have the same set information or not since user is forcing
+	 * those hosts to be deleted.
+	 */
+	if ((MD_MNSET_DESC(sd)) && (forceflg == FALSE)) {
+		for (i = 0; i < node_c; i++) {
+			/* We skip this node since comparing against it */
+			if (strcmp(mynode(), node_v[i]) == 0)
+				continue;
+
+			has_set = nodehasset(sp, node_v[i], NHS_NSTG_EQ, ep);
+
+			if (has_set < 0) {
+				rval = -1;
+				goto out2;
+			}
+
+			/*
+			 * If we can talk to the host, and they do not have the
+			 * exact set, then we disallow the operation.
+			 */
+			if (has_set == FALSE) {
+				(void) mddserror(ep, MDE_DS_NODENOSET,
+				    sp->setno, node_v[i], NULL, sp->setname);
+				rval = -1;
+				goto out2;
+			}
+		}
+	}
+
+	/*
+	 * For traditional diskset:
+	 * Can't allow user to delete their node (without deleting all nodes)
+	 * out of a set in OHA mode, would leave a real mess.
+	 * This action was already failed above for a MN diskset.
+	 */
+	if (!(MD_MNSET_DESC(sd)) && (oha == TRUE) &&
+	    strinlst(mynode(), node_c, node_v)) {
+		/* Can directly return since !MN diskset; nothing to unlock */
+		return (mddserror(ep, MDE_DS_OHACANTDELSELF, sp->setno,
+		    mynode(), NULL, sp->setname));
+	}
+
+
+	/* Get the drive descriptors for this set */
+	if ((dd = metaget_drivedesc(sp, (MD_BASICNAME_OK | PRINT_FAST),
+	    ep)) == NULL) {
+		if (! mdisok(ep)) {
+			rval = -1;
+			goto out2;
+		}
+	}
+
+	/*
+	 * We have been asked to delete all the hosts in the set, i.e. - delete
+	 * the whole set.
+	 */
+	if (node_c == numsides) {
+		/*
+		 * This is only a valid operation if all drives have been
+		 * removed first.
+		 */
+
+		if (dd != NULL) {
+			(void) mddserror(ep, MDE_DS_HASDRIVES, sp->setno,
+			    NULL, NULL, sp->setname);
+			rval = -1;
+			goto out2;
+		}
+
+		/*
+		 * If a mediator is currently associated with this set,
+		 * fail the deletion of the last host(s).
+		 */
+		if (sd->sd_med.n_cnt != 0) {
+			(void) mddserror(ep, MDE_DS_HASMED, sp->setno,
+			    NULL, NULL, sp->setname);
+			rval = -1;
+			goto out2;
+		}
+
+		if (! mdisok(ep)) {
+			rval = -1;
+			goto out2;
+		}
+
+		rval = del_set_nodrives(sp, node_c, node_v, oha, ep);
+		remote_sets_deleted = 1;
+		goto out2;
+	}
+
+	/*
+	 * Get timeout values in case we need to roll back
+	 */
+	(void) memset(&mhiargs, '\0', sizeof (mhiargs));
+	if (clnt_gtimeout(mynode(), sp, &mhiargs, ep) != 0) {
+		rval = -1;
+		goto out2;
+	}
+
+	if (dd != NULL) {
+		/*
+		 * We need this around for re-adding DB side names later.
+		 */
+		if (metareplicalist(sp, MD_BASICNAME_OK, &rlp, ep) < 0) {
+			rval = -1;
+			goto out2;
+		}
+
+		/*
+		 * Alloc nodeid list if drives are present in diskset.
+		 * nodeid list is used to reset mirror owners if the
+		 * owner is a deleted node.
+		 */
+		if (MD_MNSET_DESC(sd)) {
+			node_id_list = Zalloc(sizeof (int) * node_c);
+		}
+	}
+
+	/* Lock the set on current set members */
+	if (!(MD_MNSET_DESC(sd))) {
+		md_rb_sig_handling_on();
+		for (i = 0; i < MD_MAXSIDES; i++) {
+			/* Skip empty slots */
+			if (sd->sd_nodes[i][0] == '\0')
+				continue;
+
+			if (clnt_lock_set(sd->sd_nodes[i], sp, ep)) {
+				if (oha == TRUE && mdanyrpcerror(ep)) {
+					mdclrerror(ep);
+					continue;
+				}
+				rval = -1;
+				goto out2;
+			}
+			lock_flag = 1;
+		}
+	}
+
+	RB_TEST(1, "deletehosts", ep)
+
+	RB_PREEMPT;
+	rb_level = 1;	/* level 1 */
+
+	RB_TEST(2, "deletehosts", ep)
+
+	if (MD_MNSET_DESC(sd)) {
+		md_mnnode_desc		*saved_nd_next;
+		mddb_config_t		c;
+
+		if (dd != NULL) {
+			/*
+			 * Notify rpc.mdcommd on all nodes of a nodelist change.
+			 * Start by suspending rpc.mdcommd (which drains it of
+			 * all messages), then change the nodelist followed
+			 * by a reinit and resume.
+			 */
+			nd = sd->sd_nodelist;
+			while (nd) {
+				if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+					nd = nd->nd_next;
+					continue;
+				}
+				if (clnt_mdcommdctl(nd->nd_nodename,
+				    COMMDCTL_SUSPEND, sp,
+				    MD_MSG_CLASS0,
+				    MD_MSCF_NO_FLAGS, ep)) {
+					rval = -1;
+					goto out2;
+				}
+				suspendall_flag = 1;
+				nd = nd->nd_next;
+			}
+			/*
+			 * Is current set STALE?
+			 * Need to know this if delete host fails and node
+			 * is re-joined to diskset.
+			 */
+			(void) memset(&c, 0, sizeof (c));
+			c.c_id = 0;
+			c.c_setno = sp->setno;
+			if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0) {
+				(void) mdstealerror(ep, &c.c_mde);
+				rval = -1;
+				goto out2;
+			}
+			if (c.c_flags & MDDB_C_STALE) {
+				stale_flag = MNSET_IS_STALE;
+			}
+		}
+
+		/*
+		 * For each node being deleted, set DEL flag and
+		 * reset OK flag on that node first.
+		 * Until a node has turned off its own
+		 * rpc.metad's NODE_OK flag, that node could be
+		 * considered for master during a reconfig.
+		 */
+		for (i = 0; i < node_c; i++) {
+			/*
+			 * During OHA mode, don't issue RPCs to
+			 * non-alive nodes since there is no reason to
+			 * wait for RPC timeouts.
+			 */
+			nd = sd->sd_nodelist;
+			while (nd) {
+				if (strcmp(nd->nd_nodename, node_v[i]) == 0)
+					break;
+				nd = nd->nd_next;
+			}
+			/* Something wrong, handle this in next loop */
+			if (nd == NULL)
+				continue;
+
+			/* If node_id_list is alloc'd, fill in for later use */
+			if (node_id_list)
+				node_id_list[i] = nd->nd_nodeid;
+
+			/* All nodes are guaranteed to be ALIVE unless OHA */
+			if ((oha == TRUE) &&
+			    (!(nd->nd_flags & MD_MN_NODE_ALIVE))) {
+				continue;
+			}
+
+			/* Only changing my local cache of node list */
+			saved_nd_next = nd->nd_next;
+			nd->nd_next = NULL;
+
+			/* Set flags for del host to DEL on that host */
+			if (clnt_upd_nr_flags(node_v[i], sp,
+			    nd, MD_NR_DEL, NULL, ep)) {
+				nd->nd_next = saved_nd_next;
+				goto rollback;
+			}
+			nd->nd_next = saved_nd_next;
+		}
+		for (i = 0; i < node_c; i++) {
+			/*
+			 * Turn off owner flag in nodes to be deleted
+			 * if this node has been joined.
+			 * Also, turn off NODE_OK and turn on NODE_DEL
+			 * for nodes to be deleted.
+			 * These flags are used to set the node
+			 * record flags in all nodes in the set.
+			 * Only withdraw nodes that are joined.
+			 */
+			nd = sd->sd_nodelist;
+			while (nd) {
+				/*
+				 * Don't communicate with non-ALIVE node if
+				 * in OHA - but set flags in master list so
+				 * alive nodes are updated correctly.
+				 */
+				if (strcmp(nd->nd_nodename, node_v[i]) == 0) {
+				    if ((oha == TRUE) &&
+					(!(nd->nd_flags & MD_MN_NODE_ALIVE))) {
+						nd->nd_flags |= MD_MN_NODE_DEL;
+						nd->nd_flags &= ~MD_MN_NODE_OK;
+						nd = nd->nd_next;
+						continue;
+				    }
+				    if (nd->nd_flags & MD_MN_NODE_OWN) {
+					/*
+					 * Going to set locally cached node
+					 * flags to rollback join so in case
+					 * of error, the rollback code knows
+					 * which nodes to re-join.
+					 * rpc.metad ignores the RB_JOIN flag.
+					 */
+					nd->nd_flags |= MD_MN_NODE_RB_JOIN;
+					nd->nd_flags &= ~MD_MN_NODE_OWN;
+
+					/*
+					 * Be careful in ordering of following
+					 * steps so that recovery from a panic
+					 * between the steps is viable.
+					 * Only reset master info in rpc.metad
+					 * - don't reset local cached info
+					 * which will be used to set master
+					 * info back if failure (rollback).
+					 */
+					if (clnt_withdrawset(nd->nd_nodename,
+					    sp, ep))
+						goto rollback;
+
+					/* Reset master on deleted node */
+					if (clnt_mnsetmaster(node_v[i], sp, "",
+					    MD_MN_INVALID_NID, ep))
+						goto rollback;
+				    }
+
+				    nd->nd_flags |= MD_MN_NODE_DEL;
+				    nd->nd_flags &= ~MD_MN_NODE_OK;
+				}
+				nd = nd->nd_next;
+			}
+		}
+
+		/*
+		 * Now, reset owner and set delete flags for the
+		 * deleted nodes on all nodes.
+		 */
+		nd = sd->sd_nodelist;
+		while (nd) {
+			/* Skip non-ALIVE node if in OHA */
+			if ((oha == TRUE) &&
+			    (!(nd->nd_flags & MD_MN_NODE_ALIVE))) {
+				nd = nd->nd_next;
+				continue;
+			}
+			if (clnt_upd_nr_flags(nd->nd_nodename, sp,
+			    sd->sd_nodelist, MD_NR_SET, NULL, ep)) {
+				goto rollback;
+			}
+			nd = nd->nd_next;
+		}
+		/*
+		 * Notify rpc.mdcommd on all nodes of a nodelist change.
+		 * Send reinit command to mdcommd which forces it to get
+		 * fresh set description.
+		 */
+		if (suspendall_flag) {
+			/* Send reinit */
+			nd = sd->sd_nodelist;
+			while (nd) {
+			    if ((oha == TRUE) &&
+				(!(nd->nd_flags & MD_MN_NODE_ALIVE))) {
+				    nd = nd->nd_next;
+				    continue;
+			    }
+			    /* Class is ignored for REINIT */
+			    if (clnt_mdcommdctl(nd->nd_nodename,
+				COMMDCTL_REINIT,
+				sp, NULL, MD_MSCF_NO_FLAGS, ep)) {
+				    mde_perror(ep, dgettext(TEXT_DOMAIN,
+					"Unable to reinit rpc.mdcommd.\n"));
+				    goto rollback;
+			    }
+			    nd = nd->nd_next;
+			}
+			/* Send resume */
+			nd = sd->sd_nodelist;
+			while (nd) {
+			    if ((oha == TRUE) &&
+				(!(nd->nd_flags & MD_MN_NODE_ALIVE))) {
+				    nd = nd->nd_next;
+				    continue;
+			    }
+			    if (clnt_mdcommdctl(nd->nd_nodename,
+				COMMDCTL_RESUME, sp, MD_MSG_CLASS0,
+				MD_MSCF_DONT_RESUME_CLASS1, ep)) {
+				    mde_perror(ep, dgettext(TEXT_DOMAIN,
+					"Unable to resume rpc.mdcommd.\n"));
+				    goto rollback;
+			    }
+			    nd = nd->nd_next;
+			}
+			meta_ping_mnset(sp->setno);
+		}
+	}
+
+
+	/*
+	 * Mark the set record MD_SR_DEL on the hosts we are deleting
+	 * If a MN diskset and OHA mode, don't issue RPC to nodes that
+	 * are not ALIVE.
+	 * If a MN diskset and not in OHA mode, then all nodes must respond
+	 * to RPC (be alive) or this routine will return failure.
+	 * If a traditional diskset, all RPC failures if in OHA mode.
+	 */
+	for (i = 0; i < node_c; i++) {
+
+		RB_TEST(3, "deletehosts", ep)
+
+		if ((MD_MNSET_DESC(sd)) && (oha == TRUE)) {
+			/*
+			 * During OHA mode, don't issue RPCs to
+			 * non-alive nodes since there is no reason to
+			 * wait for RPC timeouts.
+			 */
+			nd = sd->sd_nodelist;
+			while (nd) {
+				if (strcmp(nd->nd_nodename, node_v[i]) == 0) {
+					break;
+				}
+				nd = nd->nd_next;
+			}
+			if (nd == NULL) {
+				(void) mddserror(ep, MDE_DS_NODENOTINSET,
+				    sp->setno, node_v[i], NULL, sp->setname);
+				goto rollback;
+			} else if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+				/* Skip non-ALIVE node if in OHA mode */
+				continue;
+			} else {
+				if (clnt_upd_sr_flags(node_v[i], sp,
+				    MD_SR_DEL, ep)) {
+					goto rollback;
+				}
+			}
+		} else if ((MD_MNSET_DESC(sd)) && (oha == FALSE)) {
+			/*
+			 * All nodes should be alive in non-oha mode.
+			 */
+			if (clnt_upd_sr_flags(node_v[i], sp, MD_SR_DEL, ep)) {
+				goto rollback;
+			}
+		} else {
+			/*
+			 * For traditional diskset, issue the RPC and
+			 * ignore RPC failure if in OHA mode.
+			 */
+			if (clnt_upd_sr_flags(node_v[i], sp, MD_SR_DEL, ep)) {
+				if (oha == TRUE && mdanyrpcerror(ep)) {
+					mdclrerror(ep);
+					continue;
+				}
+				goto rollback;
+			}
+		}
+
+		RB_TEST(4, "deletehosts", ep)
+	}
+
+	RB_TEST(5, "deletehosts", ep)
+
+	RB_PREEMPT;
+	rb_level = 2;	/* level 2 */
+
+	RB_TEST(6, "deletehosts", ep)
+
+	/* Delete the set on the hosts we are deleting */
+	if (del_set_on_hosts(sp, sd, dd, node_c, node_v, oha, ep)) {
+		if (node_id_list)
+			Free(node_id_list);
+		/*
+		 * Failure during del_set_on_hosts would have recreated
+		 * the diskset on the remote hosts, but for multi-owner
+		 * disksets need to set node flags properly and REINIT and
+		 * RESUME rpc.mdcommd, so just let the rollback code
+		 * do this.
+		 */
+		if (MD_MNSET_DESC(sd))
+			goto rollback;
+		return (-1);
+	}
+	remote_sets_deleted = 1;
+
+	RB_TEST(19, "deletehosts", ep)
+
+	RB_PREEMPT;
+	rb_level = 3;	/* level 3 */
+
+	RB_TEST(20, "deletehosts", ep)
+
+	/* Delete the host from sets on hosts not being deleted */
+	if (MD_MNSET_DESC(sd)) {
+		nd = sd->sd_nodelist;
+		/* All nodes are guaranteed to be ALIVE unless in oha mode */
+		while (nd) {
+			/*
+			 * During OHA mode, don't issue RPCs to
+			 * non-alive nodes since there is no reason to
+			 * wait for RPC timeouts.
+			 */
+			if ((oha == TRUE) &&
+			    (!(nd->nd_flags & MD_MN_NODE_ALIVE))) {
+				nd = nd->nd_next;
+				continue;
+			}
+
+			/* Skip nodes being deleted */
+			if (strinlst(nd->nd_nodename, node_c, node_v)) {
+				nd = nd->nd_next;
+				continue;
+			}
+			if (clnt_delhosts(nd->nd_nodename, sp, node_c, node_v,
+			    ep) == -1) {
+				goto rollback;
+			}
+
+			RB_TEST(21, "deletehosts", ep)
+			nd = nd->nd_next;
+		}
+	} else {
+		for (i = 0; i < MD_MAXSIDES; i++) {
+			/* Skip empty slots */
+			if (sd->sd_nodes[i][0] == '\0')
+				continue;
+
+			/* Skip nodes being deleted */
+			if (strinlst(sd->sd_nodes[i], node_c, node_v))
+				continue;
+
+			if (clnt_delhosts(sd->sd_nodes[i], sp, node_c, node_v,
+			    ep) == -1) {
+				if (oha == TRUE && mdanyrpcerror(ep)) {
+					mdclrerror(ep);
+					continue;
+				}
+				goto rollback;
+			}
+
+			RB_TEST(21, "deletehosts", ep)
+		}
+	}
+
+	/* We have drives */
+	if (dd != NULL) {
+		RB_TEST(22, "deletehosts", ep)
+
+		RB_PREEMPT;
+		rb_level = 4;	/* level 4 */
+
+		RB_TEST(23, "deletehosts", ep)
+
+		/*
+		 * Delete the old sidename for each drive on all the hosts.
+		 * If a multi-node diskset, each host only stores
+		 * the side information for itself.  So, a multi-node
+		 * diskset doesn't delete the old sidename for
+		 * an old host.
+		 *
+		 * If a MN diskset, reset owners of mirrors that are
+		 * owned by the deleted nodes.
+		 */
+		if (!(MD_MNSET_DESC(sd))) {
+			for (i = 0; i < MD_MAXSIDES; i++) {
+				/* Skip empty slots */
+				if (sd->sd_nodes[i][0] == '\0')
+					continue;
+
+				/* Skip nodes being deleted */
+				if (strinlst(sd->sd_nodes[i], node_c, node_v))
+					continue;
+
+				if (clnt_del_drv_sidenms(sd->sd_nodes[i], sp,
+				    ep)) {
+					if (oha == TRUE && mdanyrpcerror(ep)) {
+						mdclrerror(ep);
+						continue;
+					}
+					metaflushsetname(sp);
+					goto rollback;
+				}
+
+				RB_TEST(24, "deletehosts", ep)
+			}
+		} else {
+		    nd = sd->sd_nodelist;
+		    /* All nodes guaranteed to be ALIVE unless in oha mode */
+		    while (nd) {
+			/*
+			 * If mirror owner was set to a deleted node, then
+			 * each existing node resets mirror owner to NULL.
+			 *
+			 * During OHA mode, don't issue RPCs to
+			 * non-alive nodes since there is no reason to
+			 * wait for RPC timeouts.
+			 */
+			if ((oha == TRUE) &&
+			    (!(nd->nd_flags & MD_MN_NODE_ALIVE))) {
+				nd = nd->nd_next;
+				continue;
+			}
+
+			/* Skip nodes being deleted */
+			if (strinlst(nd->nd_nodename, node_c, node_v)) {
+				nd = nd->nd_next;
+				continue;
+			}
+
+			/*
+			 * If mirror owner is a deleted node, reset mirror
+			 * owners to NULL.  If an error occurs, print a
+			 * warning and continue.  Don't fail metaset
+			 * because of mirror owner reset problem since next
+			 * node to grab mirror will resolve this issue.
+			 * Before next node grabs mirrors, metaset will show
+			 * the deleted node as owner which is why an attempt
+			 * to reset the mirror owner is made.
+			 */
+			if (clnt_reset_mirror_owner(nd->nd_nodename, sp,
+			    node_c, &node_id_list[0], &xep) == -1) {
+				mde_perror(&xep, dgettext(TEXT_DOMAIN,
+				    "Unable to reset mirror owner on"
+				    " node %s\n"), nd->nd_nodename);
+				mdclrerror(&xep);
+			}
+
+			RB_TEST(21, "deletehosts", ep)
+			nd = nd->nd_next;
+		    }
+		}
+	}
+
+	RB_TEST(25, "deletehosts", ep)
+
+	RB_PREEMPT;
+	rb_level = 4;	/* level 4 */
+
+	RB_TEST(26, "deletehosts", ep)
+
+	/*
+	 * Bring the mediator record up to date with the set record for
+	 * traditional diskset.
+	 */
+	if (!(MD_MNSET_DESC(sd))) {
+		medr = rb_medr;			/* structure assignment */
+		for (i = 0; i < MD_MAXSIDES; i++) {
+			if (strinlst(sd->sd_nodes[i], node_c, node_v))
+				(void) memset(&medr.med_rec_nodes[i],
+					'\0', sizeof (md_node_nm_t));
+			else
+				(void) strcpy(medr.med_rec_nodes[i],
+					sd->sd_nodes[i]);
+		}
+		crcgen(&medr, &medr.med_rec_cks, sizeof (med_rec_t), NULL);
+
+		/* Inform the mediator hosts of the new node list */
+		for (i = 0; i < max_meds; i++) {
+			if (sd->sd_med.n_lst[i].a_cnt == 0)
+				continue;
+
+			if (clnt_med_upd_rec(&sd->sd_med.n_lst[i], sp,
+			    &medr, ep)) {
+				if (oha == TRUE && mdanyrpcerror(ep)) {
+					mdclrerror(ep);
+					continue;
+				}
+				goto rollback;
+			}
+		}
+	}
+
+	RB_TEST(27, "deletehosts", ep)
+
+	/*
+	 * For traditional diskset:
+	 * We are deleting ourselves out of the set and we have drives to
+	 * consider; so we need to halt the set, release the drives and
+	 * reset the timeout.  **** THIS IS A ONE WAY TICKET, NO ROLL BACK
+	 * IS POSSIBLE AS SOON AS THE HALT SET COMPLETES, SO THIS IS DONE
+	 * WITH ALL SIGNALS BLOCKED AND LAST ****
+	 *
+	 * This situation cannot occur in a MN diskset since a node can't
+	 * delete itself unless all nodes are being deleted and a diskset
+	 * cannot contain any drives if all nodes are being deleted.
+	 * So, don't even test for this if a MN diskset.
+	 */
+	if (!(MD_MNSET_DESC(sd)) && (dd != NULL) &&
+	    strinlst(mynode(), node_c, node_v)) {
+		/* Make sure we are blocking all signals */
+		if (procsigs(TRUE, &oldsigs, ep) < 0) {
+			rval = -1;
+			goto out1;
+		}
+
+		if (halt_set(sp, ep)) {
+			rval = -1;
+			goto out1;
+		}
+
+		if (rel_own_bydd(sp, dd, FALSE, ep))
+			rval = -1;
+
+out1:
+		/* release signals back to what they were on entry */
+		if (procsigs(FALSE, &oldsigs, &xep) < 0) {
+			if (rval == 0)
+				(void) mdstealerror(ep, &xep);
+			rval = -1;
+		}
+	}
+
+out2:
+	/*
+	 * Unlock diskset by resuming messages across the diskset.
+	 * Just resume all classes so that resume is the same whether
+	 * just one class was locked or all classes were locked.
+	 */
+	if ((suspend1_flag) || (suspendall_flag)) {
+		/* Send resume */
+		nd = sd->sd_nodelist;
+		while (nd) {
+			if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+				nd = nd->nd_next;
+				continue;
+			}
+			/*
+			 * Skip nodes being deleted if remote set
+			 * was deleted since rpc.mdcommd may no longer
+			 * be running on remote node.
+			 */
+			if ((remote_sets_deleted == 1) &&
+			    (strinlst(nd->nd_nodename, node_c, node_v))) {
+				nd = nd->nd_next;
+				continue;
+			}
+			if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_RESUME,
+			    sp, MD_MSG_CLASS0, MD_MSCF_NO_FLAGS, &xep)) {
+				if (rval == 0)
+					(void) mdstealerror(ep, &xep);
+				rval = -1;
+				mde_perror(ep, dgettext(TEXT_DOMAIN,
+				    "Unable to resume rpc.mdcommd.\n"));
+			}
+			nd = nd->nd_next;
+		}
+		meta_ping_mnset(sp->setno);
+	}
+
+	cl_sk = cl_get_setkey(sp->setno, sp->setname);
+	if (lock_flag) {
+		if (MD_MNSET_DESC(sd)) {
+			nd = sd->sd_nodelist;
+			while (nd) {
+				/*
+				 * During OHA mode, don't issue RPCs to
+				 * non-alive nodes since there is no reason to
+				 * wait for RPC timeouts.
+				 */
+				if ((oha == TRUE) &&
+				    (!(nd->nd_flags & MD_MN_NODE_ALIVE))) {
+					nd = nd->nd_next;
+					continue;
+				}
+				if (clnt_unlock_set(nd->nd_nodename,
+				    cl_sk, &xep)) {
+					if (rval == 0)
+						(void) mdstealerror(ep, &xep);
+					rval = -1;
+				}
+				nd = nd->nd_next;
+			}
+		} else {
+			for (i = 0; i < MD_MAXSIDES; i++) {
+				/* Skip empty slots */
+				if (sd->sd_nodes[i][0] == '\0')
+					continue;
+
+				if (clnt_unlock_set(sd->sd_nodes[i],
+				    cl_sk, &xep)) {
+					if (oha == TRUE &&
+					    mdanyrpcerror(&xep)) {
+						mdclrerror(&xep);
+						continue;
+					}
+					if (rval == 0)
+						(void) mdstealerror(ep, &xep);
+					rval = -1;
+				}
+			}
+		}
+	}
+	cl_set_setkey(NULL);
+
+out3:
+	metafreereplicalist(rlp);
+	if (node_id_list)
+		Free(node_id_list);
+
+	metaflushsetname(sp);
+
+	if (MD_MNSET_DESC(sd)) {
+		/* release signals back to what they were on entry */
+		if (procsigs(FALSE, &oldsigs, &xep) < 0)
+			mdclrerror(&xep);
+	} else {
+		md_rb_sig_handling_off(md_got_sig(), md_which_sig());
+	}
+
+
+	return (rval);
+
+rollback:
+	/* all signals already blocked for MN disket */
+	if (!(MD_MNSET_DESC(sd))) {
+		if (procsigs(TRUE, &oldsigs, &xep) < 0)
+			mdclrerror(&xep);
+	}
+
+	rval = -1;
+
+	max_genid = sd->sd_genid;
+
+
+	/*
+	 * Send reinit command to rpc.mdcommd which forces it to get
+	 * fresh set description and resume all classes but class 0.
+	 * Don't send any commands to rpc.mdcommd if set on that node
+	 * has been removed.
+	 */
+	if (suspendall_flag) {
+		/* Send reinit */
+		nd = sd->sd_nodelist;
+		while (nd) {
+			if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+				nd = nd->nd_next;
+				continue;
+			}
+			/*
+			 * If the remote set was deleted, rpc.mdcommd
+			 * may no longer be running so send nothing to it.
+			 */
+			if ((remote_sets_deleted == 1) &&
+			    (strinlst(nd->nd_nodename, node_c, node_v))) {
+				nd = nd->nd_next;
+				continue;
+			}
+			/* Class is ignored for REINIT */
+			if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_REINIT,
+			    sp, NULL, MD_MSCF_NO_FLAGS, &xep)) {
+				mde_perror(&xep, dgettext(TEXT_DOMAIN,
+				    "Unable to reinit rpc.mdcommd.\n"));
+				mdclrerror(&xep);
+			}
+			nd = nd->nd_next;
+		}
+		/* Send resume */
+		nd = sd->sd_nodelist;
+		while (nd) {
+			if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+				nd = nd->nd_next;
+				continue;
+			}
+			/*
+			 * If the remote set was deleted, rpc.mdcommd
+			 * may no longer be running so send nothing to it.
+			 */
+			if ((remote_sets_deleted == 1) &&
+			    (strinlst(nd->nd_nodename, node_c, node_v))) {
+				nd = nd->nd_next;
+				continue;
+			}
+			if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_RESUME,
+			    sp, MD_MSG_CLASS0, MD_MSCF_DONT_RESUME_CLASS1,
+			    &xep)) {
+				mde_perror(&xep, dgettext(TEXT_DOMAIN,
+				    "Unable to resume rpc.mdcommd.\n"));
+				mdclrerror(&xep);
+			}
+			nd = nd->nd_next;
+		}
+		meta_ping_mnset(sp->setno);
+	}
+
+	/* level 2 */
+	if (rb_level > 1) {
+		md_set_record		*sr;
+		md_replicalist_t	*rl;
+
+		recreate_set(sp, sd);
+
+		/*
+		 * Lock out other meta* commands on nodes with the newly
+		 * re-created sets by suspending class 1 messages
+		 * across the diskset.
+		 */
+		nd = sd->sd_nodelist;
+		while (nd) {
+			/* Skip nodes not being deleted */
+			if (!(strinlst(nd->nd_nodename, node_c, node_v))) {
+				nd = nd->nd_next;
+				continue;
+			}
+			/* Suspend commd on nodes with re-created sets */
+			if (clnt_mdcommdctl(nd->nd_nodename,
+			    COMMDCTL_SUSPEND, sp, MD_MSG_CLASS1,
+			    MD_MSCF_NO_FLAGS, &xep)) {
+				mde_perror(&xep, dgettext(TEXT_DOMAIN,
+				    "Unable to suspend rpc.mdcommd.\n"));
+				mdclrerror(&xep);
+			}
+			nd = nd->nd_next;
+		}
+
+		max_genid++;
+
+		/*
+		 * See if we have to re-add the drives specified.
+		 */
+		for (i = 0; i < node_c; i++) {
+			if (MD_MNSET_DESC(sd) && (oha == TRUE)) {
+				/*
+				 * During OHA mode, don't issue RPCs to
+				 * non-alive nodes since there is no reason to
+				 * wait for RPC timeouts.
+				 */
+				nd = sd->sd_nodelist;
+				while (nd) {
+					if (strcmp(nd->nd_nodename, node_v[i])
+					    == 0) {
+						break;
+					}
+					nd = nd->nd_next;
+				}
+				if (nd == 0)
+					continue;
+				if (!(nd->nd_flags & MD_MN_NODE_ALIVE))
+					continue;
+			}
+
+			/* Don't care if set record is MN or not */
+			if (clnt_getset(node_v[i], sp->setname, MD_SET_BAD, &sr,
+			    &xep) == -1) {
+				mdclrerror(&xep);
+				continue;
+			}
+
+			/* Drive already added, skip to next node */
+			if (sr->sr_drivechain != NULL) {
+				/*
+				 * Set record structure was allocated from RPC
+				 * routine getset so this structure is only of
+				 * size md_set_record even if the MN flag is
+				 * set.  So, clear the flag so that the free
+				 * code doesn't attempt to free a structure
+				 * the size of md_mnset_record.
+				 */
+				sr->sr_flags &= ~MD_SR_MN;
+				free_sr(sr);
+				continue;
+			}
+
+			if (clnt_adddrvs(node_v[i], sp, dd, sr->sr_ctime,
+			    sr->sr_genid, &xep) == -1)
+				mdclrerror(&xep);
+
+			if (clnt_upd_dr_flags(node_v[i], sp, dd, MD_DR_OK,
+			    &xep) == -1)
+				mdclrerror(&xep);
+
+			/*
+			 * Set record structure was allocated from RPC routine
+			 * getset so this structure is only of size
+			 * md_set_record even if the MN flag is set.  So,
+			 * clear the flag so that the free code doesn't
+			 * attempt to free a structure the size of
+			 * md_mnset_record.
+			 */
+			sr->sr_flags &= ~MD_SR_MN;
+			free_sr(sr);
+		}
+		max_genid += 3;
+
+		for (rl = rlp; rl != NULL; rl = rl->rl_next) {
+			md_replica_t	*r = rl->rl_repp;
+			/*
+			 * This is not the first replica being added to the
+			 * diskset so call with ADDSIDENMS_BCAST.  If this
+			 * is a traditional diskset, the bcast flag is ignored
+			 * since traditional disksets don't use the rpc.mdcommd.
+			 */
+			if (meta_db_addsidenms(sp, r->r_namep, r->r_blkno,
+			    DB_ADDSIDENMS_BCAST, &xep))
+				mdclrerror(&xep);
+		}
+
+		/*
+		 * Add the device names for the new sides into the namespace,
+		 * on all hosts not being deleted.
+		 */
+		if (MD_MNSET_DESC(sd)) {
+			nd = sd->sd_nodelist;
+			while (nd) {
+				/* Find a node that is not being deleted */
+				if (!strinlst(nd->nd_nodename, node_c,
+				    node_v)) {
+					j = nd->nd_nodeid;
+					break;
+				}
+				nd = nd->nd_next;
+			}
+		} else {
+			for (j = 0; j < MD_MAXSIDES; j++) {
+				/* Skip empty slots */
+				if (sd->sd_nodes[j][0] == '\0')
+					continue;
+
+				/* Find a node that is not being deleted */
+				if (!strinlst(sd->sd_nodes[j], node_c, node_v))
+					break;
+			}
+		}
+
+		if (MD_MNSET_DESC(sd)) {
+			nd = sd->sd_nodelist;
+			while (nd) {
+				/* Skip nodes not being deleted */
+				if (!strinlst(nd->nd_nodename, node_c,
+				    node_v)) {
+					nd = nd->nd_next;
+					continue;
+				}
+
+				/* this side was just created, add the names */
+				if (add_md_sidenms(sp, nd->nd_nodeid, j, &xep))
+					mdclrerror(&xep);
+				nd = nd->nd_next;
+			}
+		} else {
+			for (i = 0; i < MD_MAXSIDES; i++) {
+				/* Skip empty slots */
+				if (sd->sd_nodes[i][0] == '\0')
+					continue;
+
+				/* Skip nodes not being deleted */
+				if (!strinlst(sd->sd_nodes[i], node_c, node_v))
+					continue;
+
+				/* this side was just created, add the names */
+				if (add_md_sidenms(sp, i, j, &xep))
+					mdclrerror(&xep);
+			}
+		}
+	}
+
+	/* level 4 */
+	if (rb_level > 3 && dd != NULL) {
+		/*
+		 * Add the new sidename for each drive to all the hosts
+		 * Multi-node disksets only store the sidename for
+		 * that host, so there is nothing to re-add.
+		 */
+		if (!(MD_MNSET_DESC(sd))) {
+			for (j = 0; j < MD_MAXSIDES; j++) {
+				/* Skip empty slots */
+				if (sd->sd_nodes[j][0] == '\0')
+					continue;
+
+				/* Skip nodes not being deleted */
+				if (!strinlst(sd->sd_nodes[j], node_c, node_v))
+					break;
+			}
+			for (i = 0; i < MD_MAXSIDES; i++) {
+				/* Skip empty slots */
+				if (sd->sd_nodes[i][0] == '\0')
+					continue;
+
+				if (clnt_add_drv_sidenms(sd->sd_nodes[i],
+				    sd->sd_nodes[j], sp, sd, node_c, node_v,
+				    &xep))
+					mdclrerror(&xep);
+			}
+		}
+
+	}
+
+	/* level 5 */
+	if ((rb_level > 4) && (!(MD_MNSET_DESC(sd)))) {
+		/* rollback the mediator record */
+		for (i = 0; i < max_meds; i++) {
+			if (sd->sd_med.n_lst[i].a_cnt == 0)
+				continue;
+
+			if (clnt_med_upd_rec(&sd->sd_med.n_lst[i], sp,
+			    &rb_medr, &xep))
+				mdclrerror(&xep);
+		}
+	}
+
+	/* level 3 */
+	if (rb_level > 2) {
+		md_set_record		*sr;
+		md_mnset_record		*mnsr;
+
+		if (MD_MNSET_DESC(sd)) {
+			nd = sd->sd_nodelist;
+			/*
+			 * During OHA mode, don't issue RPCs to
+			 * non-alive nodes since there is no reason to
+			 * wait for RPC timeouts.
+			 */
+			while (nd) {
+				if ((oha == TRUE) &&
+				    (!(nd->nd_flags & MD_MN_NODE_ALIVE))) {
+					nd = nd->nd_next;
+					continue;
+				}
+				/* Record should be for a multi-node diskset */
+				if (clnt_mngetset(nd->nd_nodename, sp->setname,
+				    MD_SET_BAD, &mnsr, &xep) == -1) {
+					mdclrerror(&xep);
+					nd = nd->nd_next;
+					continue;
+				}
+
+				has_set = 1;
+
+				nr = mnsr->sr_nodechain;
+				while (nr) {
+					if (nd->nd_nodeid == nr->nr_nodeid) {
+						break;
+					}
+					nr = nr->nr_next;
+				}
+				if (nr == NULL)
+					has_set = 0;
+
+				free_sr((struct md_set_record *)mnsr);
+				if (has_set) {
+					nd = nd->nd_next;
+					continue;
+				}
+
+				if (clnt_addhosts(nd->nd_nodename, sp, node_c,
+				    node_v, &xep) == -1)
+					mdclrerror(&xep);
+
+				nd = nd->nd_next;
+			}
+		} else {
+			for (i = 0; i < MD_MAXSIDES; i++) {
+				/* Skip empty slots */
+				if (sd->sd_nodes[i][0] == '\0')
+					continue;
+
+				/* Record should be for a non-multi-node set */
+				if (clnt_getset(sd->sd_nodes[i], sp->setname,
+				    MD_SET_BAD, &sr, &xep) == -1) {
+					mdclrerror(&xep);
+					continue;
+				}
+
+				/*
+				 * Set record structure was allocated from RPC
+				 * routine getset so this structure is only of
+				 * size md_set_record even if the MN flag is
+				 * set.  So, clear the flag so that the free
+				 * code doesn't attempt to free a structure
+				 * the size of md_mnset_record.
+				 */
+				if (MD_MNSET_REC(sr)) {
+					sr->sr_flags &= ~MD_SR_MN;
+					free_sr(sr);
+					continue;
+				}
+
+				has_set = 1;
+				for (j = 0; j < MD_MAXSIDES; j++) {
+					/* Skip empty slots */
+					if (sd->sd_nodes[j][0] == '\0')
+						continue;
+
+					if (sr->sr_nodes[j][0] == '\0') {
+						has_set = 0;
+						break;
+					}
+				}
+
+				free_sr(sr);
+				if (has_set)
+					continue;
+
+				if (clnt_addhosts(sd->sd_nodes[i], sp, node_c,
+				    node_v, &xep) == -1)
+					mdclrerror(&xep);
+			}
+		}
+		max_genid++;
+	}
+
+	/* level 1 */
+	if (rb_level > 0) {
+		max_genid++;
+		/* Sets MD_SR_OK on given nodes. */
+		resync_genid(sp, sd, max_genid, node_c, node_v);
+
+		/*
+		 * For MN diskset:
+		 * On each newly re-added node, set the node record for that
+		 * node to OK.  Then set all node records for the newly added
+		 * nodes on all nodes to ok.
+		 *
+		 * By setting a node's own node record to ok first, even if
+		 * the node re-adding the hosts panics, the rest of the nodes
+		 * can determine the same node list during the choosing of the
+		 * master during reconfig.  So, only nodes considered for
+		 * mastership are nodes that have both MD_MN_NODE_OK and
+		 * MD_SR_OK set on that node's rpc.metad.  If all nodes have
+		 * MD_SR_OK set, but no node has its own MD_MN_NODE_OK set,
+		 * then the set will be removed during reconfig since a panic
+		 * occurred during the re-creation of the deletion of
+		 * the initial diskset.
+		 */
+		if (MD_MNSET_DESC(sd)) {
+			md_mnnode_desc	*saved_nd_next;
+			if (dd != NULL) {
+				/*
+				 * Notify rpc.mdcommd on all nodes of a
+				 * nodelist change.  Start by suspending
+				 * rpc.mdcommd (which drains it of all
+				 * messages), then change the nodelist
+				 * followed by a reinit and resume.
+				 */
+				nd = sd->sd_nodelist;
+				while (nd) {
+					if (!(nd->nd_flags &
+					    MD_MN_NODE_ALIVE)) {
+						nd = nd->nd_next;
+						continue;
+					}
+					if (clnt_mdcommdctl(nd->nd_nodename,
+					    COMMDCTL_SUSPEND, sp,
+					    MD_MSG_CLASS0,
+					    MD_MSCF_NO_FLAGS, &xep)) {
+						mde_perror(&xep,
+						    dgettext(TEXT_DOMAIN,
+						    "Unable to suspend "
+						    "rpc.mdcommd.\n"));
+						mdclrerror(&xep);
+					}
+					suspendall_flag_rb = 1;
+					nd = nd->nd_next;
+				}
+			}
+			for (i = 0; i < node_c; i++) {
+				/*
+				 * During OHA mode, don't issue RPCs to
+				 * non-alive nodes since there is no reason to
+				 * wait for RPC timeouts.
+				 */
+				nd = sd->sd_nodelist;
+				while (nd) {
+					if (strcmp(nd->nd_nodename, node_v[i])
+					    == 0)
+						break;
+					nd = nd->nd_next;
+				}
+				/* Something wrong, finish this in next loop */
+				if (nd == NULL)
+					continue;
+
+				if ((oha == TRUE) &&
+				    (!(nd->nd_flags & MD_MN_NODE_ALIVE))) {
+					continue;
+				}
+
+				if (dd != NULL) {
+					/* Set master on re-joining node. */
+					if (clnt_mnsetmaster(node_v[i], sp,
+					    sd->sd_mn_master_nodenm,
+					    sd->sd_mn_master_nodeid, &xep)) {
+						mdclrerror(&xep);
+					}
+
+					/*
+					 * Re-join set to same state as
+					 * before - stale or non-stale.
+					 */
+					if (clnt_joinset(node_v[i], sp,
+					    stale_flag, &xep)) {
+						mdclrerror(&xep);
+					}
+				}
+
+				/* Only changing my local cache of node list */
+				saved_nd_next = nd->nd_next;
+				nd->nd_next = NULL;
+
+				/* Set record for host to ok on that host */
+				if (clnt_upd_nr_flags(node_v[i], sp,
+				    nd, MD_NR_OK, NULL, &xep)) {
+					mdclrerror(&xep);
+				}
+				nd->nd_next = saved_nd_next;
+			}
+
+			/* Now set all node records on all nodes to be ok */
+			nd = sd->sd_nodelist;
+			while (nd) {
+				/*
+				 * During OHA mode, don't issue RPCs to
+				 * non-alive nodes since there is no reason to
+				 * wait for RPC timeouts.
+				 */
+				if ((oha == TRUE) &&
+				    (!(nd->nd_flags & MD_MN_NODE_ALIVE))) {
+					nd = nd->nd_next;
+					continue;
+				}
+				if (clnt_upd_nr_flags(nd->nd_nodename, sp,
+				    sd->sd_nodelist, MD_NR_OK, NULL, &xep)) {
+					mdclrerror(&xep);
+				}
+				nd = nd->nd_next;
+			}
+		}
+	}
+
+	/*
+	 * Notify rpc.mdcommd on all nodes of a nodelist change.
+	 * Send reinit command to mdcommd which forces it to get
+	 * fresh set description.
+	 */
+	if (suspendall_flag_rb) {
+		/* Send reinit */
+		nd = sd->sd_nodelist;
+		while (nd) {
+			if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+				nd = nd->nd_next;
+				continue;
+			}
+
+			/* Class is ignored for REINIT */
+			if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_REINIT,
+			    sp, NULL, MD_MSCF_NO_FLAGS, &xep)) {
+				mde_perror(&xep, dgettext(TEXT_DOMAIN,
+				    "Unable to reinit rpc.mdcommd.\n"));
+				mdclrerror(&xep);
+			}
+			nd = nd->nd_next;
+		}
+	}
+
+	/*
+	 * Unlock diskset by resuming messages across the diskset.
+	 * Just resume all classes so that resume is the same whether
+	 * just one class was locked or all classes were locked.
+	 */
+	if ((suspend1_flag) || (suspendall_flag) || (suspendall_flag_rb)) {
+		/* Send resume */
+		nd = sd->sd_nodelist;
+		while (nd) {
+			if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+				nd = nd->nd_next;
+				continue;
+			}
+			if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_RESUME,
+			    sp, MD_MSG_CLASS0, MD_MSCF_NO_FLAGS, &xep)) {
+				mde_perror(&xep, dgettext(TEXT_DOMAIN,
+				    "Unable to resume rpc.mdcommd.\n"));
+			}
+			nd = nd->nd_next;
+		}
+		meta_ping_mnset(sp->setno);
+	}
+
+	/*
+	 * Start a resync thread on the re-added nodes
+	 * if set is not stale. Also start a thread to update the
+	 * abr state of all soft partitions
+	 */
+	if (stale_flag != MNSET_IS_STALE) {
+		for (i = 0; i < node_c; i++) {
+			/*
+			 * During OHA mode, don't issue RPCs to
+			 * non-alive nodes since there is no reason to
+			 * wait for RPC timeouts.
+			 */
+			nd = sd->sd_nodelist;
+			while (nd) {
+				if (strcmp(nd->nd_nodename, node_v[i])
+				    == 0)
+					break;
+				nd = nd->nd_next;
+			}
+			if (nd == NULL)
+				continue;
+
+			if ((oha == TRUE) &&
+			    (!(nd->nd_flags & MD_MN_NODE_ALIVE))) {
+				continue;
+			}
+
+			if (dd != 0) {
+				if (clnt_mn_mirror_resync_all(node_v[i],
+				    sp->setno, &xep)) {
+					mde_perror(ep, dgettext(TEXT_DOMAIN,
+					    "Unable to start resync "
+					    "thread.\n"));
+				}
+				if (clnt_mn_sp_update_abr(node_v[i],
+				    sp->setno, &xep)) {
+					mde_perror(ep, dgettext(TEXT_DOMAIN,
+					    "Unable to start sp update "
+					    "thread.\n"));
+				}
+			}
+		}
+	}
+
+	/* level 0 */
+	cl_sk = cl_get_setkey(sp->setno, sp->setname);
+	/* Don't test lock flag since guaranteed to be set if in rollback */
+	if (MD_MNSET_DESC(sd)) {
+		nd = sd->sd_nodelist;
+		while (nd) {
+			/*
+			 * During OHA mode, don't issue RPCs to
+			 * non-alive nodes since there is no reason to
+			 * wait for RPC timeouts.
+			 */
+			if ((oha == TRUE) &&
+			    (!(nd->nd_flags & MD_MN_NODE_ALIVE))) {
+				nd = nd->nd_next;
+				continue;
+			}
+			if (clnt_unlock_set(nd->nd_nodename, cl_sk, &xep))
+				mdclrerror(&xep);
+			nd = nd->nd_next;
+		}
+	} else {
+		for (i = 0; i < MD_MAXSIDES; i++) {
+			/* Skip empty slots */
+			if (sd->sd_nodes[i][0] == '\0')
+				continue;
+
+			if (clnt_unlock_set(sd->sd_nodes[i], cl_sk, &xep))
+				mdclrerror(&xep);
+		}
+	}
+	cl_set_setkey(NULL);
+
+	/* release signals back to what they were on entry */
+	if (procsigs(FALSE, &oldsigs, &xep) < 0)
+		mdclrerror(&xep);
+
+	metafreereplicalist(rlp);
+	if (node_id_list)
+		Free(node_id_list);
+
+	metaflushsetname(sp);
+
+	if (!(MD_MNSET_DESC(sd))) {
+		md_rb_sig_handling_off(md_got_sig(), md_which_sig());
+	}
+
+	return (rval);
+}
+
+int
+meta_set_auto_take(
+	mdsetname_t	*sp,
+	int		take_val,
+	md_error_t	*ep
+)
+{
+	int		i;
+	md_set_desc	*sd;
+	int		rval = 0;
+	md_setkey_t	*cl_sk;
+	md_error_t	xep = mdnullerror;
+	char		*hostname;
+	md_drive_desc	*dd;
+
+	if ((sd = metaget_setdesc(sp, ep)) == NULL)
+		return (-1);
+
+	/* Make sure we own the set */
+	if (meta_check_ownership(sp, ep) != 0)
+		return (-1);
+
+	hostname = mynode();
+
+	/* Lock the set on our side */
+	if (clnt_lock_set(hostname, sp, ep)) {
+	    rval = -1;
+	    goto out;
+	}
+
+	if (take_val) {
+	    /* enable auto_take but only if it is not already set */
+	    if (! (sd->sd_flags & MD_SR_AUTO_TAKE)) {
+		/* verify that we're the only host in the set */
+		for (i = 0; i < MD_MAXSIDES; i++) {
+		    if (sd->sd_nodes[i] == NULL || sd->sd_nodes[i][0] == '\0')
+			continue;
+
+		    if (strcmp(sd->sd_nodes[i], hostname) != 0) {
+			(void) mddserror(ep, MDE_DS_SINGLEHOST, sp->setno, NULL,
+			    NULL, sp->setname);
+			rval = -1;
+			goto out;
+		    }
+		}
+
+		if (clnt_enable_sr_flags(hostname, sp, MD_SR_AUTO_TAKE, ep))
+		    rval = -1;
+
+		/* Disable SCSI reservations */
+		if (sd->sd_flags & MD_SR_MB_DEVID)
+		    dd = metaget_drivedesc(sp, MD_BASICNAME_OK | PRINT_FAST,
+			&xep);
+		else
+		    dd = metaget_drivedesc(sp, MD_BASICNAME_OK, &xep);
+		if (! mdisok(&xep))
+		    mdclrerror(&xep);
+
+		if (dd != NULL) {
+		    if (rel_own_bydd(sp, dd, TRUE, &xep))
+			mdclrerror(&xep);
+		}
+	    }
+
+	} else {
+	    /* disable auto_take, if set, or error */
+	    if (sd->sd_flags & MD_SR_AUTO_TAKE) {
+		if (clnt_disable_sr_flags(hostname, sp, MD_SR_AUTO_TAKE, ep))
+		    rval = -1;
+
+		/* Enable SCSI reservations */
+		if (sd->sd_flags & MD_SR_MB_DEVID)
+		    dd = metaget_drivedesc(sp, MD_BASICNAME_OK | PRINT_FAST,
+			&xep);
+		else
+		    dd = metaget_drivedesc(sp, MD_BASICNAME_OK, &xep);
+		if (! mdisok(&xep))
+		    mdclrerror(&xep);
+
+		if (dd != NULL) {
+		    mhd_mhiargs_t	mhiargs = defmhiargs;
+
+		    if (tk_own_bydd(sp, dd, &mhiargs, TRUE, &xep))
+			mdclrerror(&xep);
+		}
+
+	    } else {
+		(void) mddserror(ep, MDE_DS_AUTONOTSET, sp->setno, NULL, NULL,
+		    sp->setname);
+		rval = -1;
+	    }
+	}
+
+out:
+	cl_sk = cl_get_setkey(sp->setno, sp->setname);
+	if (clnt_unlock_set(hostname, cl_sk, &xep)) {
+	    if (rval == 0)
+		(void) mdstealerror(ep, &xep);
+	    rval = -1;
+	}
+	cl_set_setkey(NULL);
+
+	return (rval);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_set_med.c b/usr/src/lib/lvm/libmeta/common/meta_set_med.c
new file mode 100644
index 0000000000..02b39d39ee
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_set_med.c
@@ -0,0 +1,1253 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+/*
+ * Metadevice diskset interfaces
+ */
+
+#include "meta_set_prv.h"
+#include <sys/lvm/md_crc.h>
+#include <sys/lvm/mdmed.h>
+
+#include <sys/sysevent/eventdefs.h>
+#include <sys/sysevent/svm.h>
+
+#define	MALSIZ	32
+
+static int
+add_lst(char ***listp, char *item)
+{
+	int	i, j;
+
+	if (*listp) {
+		for (i = 0; (*listp)[i]; i++)
+			/* void */;
+	} else {
+		*listp = (char **)Zalloc(MALSIZ * sizeof (char *));
+		i = 0;
+	}
+
+	(*listp)[i] = Strdup(item);
+
+	if ((++i % MALSIZ) == 0) {
+		*listp = (char **)Realloc((void *)*listp,
+			(i + MALSIZ) * sizeof (char *));
+		for (j = i; j < (i + MALSIZ); j++)
+			(*listp)[j] = (char *)NULL;
+	}
+	return (i);
+}
+
+static int
+del_lst(char ***listp)
+{
+	int	i;
+
+	if (*listp) {
+		for (i = 0; (*listp)[i]; i++)
+			free((*listp)[i]);
+		free(*listp);
+		*listp = NULL;
+		return (1);
+	} else
+		return (0);
+}
+
+
+static int
+validate_med_nodes(
+	mdsetname_t	*sp,
+	md_h_arr_t	*mhp,
+	md_error_t	*ep
+)
+{
+	char		*hostname;
+	char		*nodename;
+	char		*nm;
+	char		*cp;
+	int		i, j;
+
+
+	for (i = 0; i < MED_MAX_HOSTS; i++) {
+		if (mhp->n_lst[i].a_cnt == 0)
+			continue;
+
+		for (j = 0; j < mhp->n_lst[i].a_cnt; j++) {
+			nm = mhp->n_lst[i].a_nm[j];
+
+			for (cp = nm; *cp; cp++)
+				if (!isprint(*cp) ||
+				    strchr(INVALID_IN_NAMES, *cp) != NULL)
+					return (mddserror(ep,
+					    MDE_DS_INVALIDMEDNAME,
+					    sp->setno, nm, NULL, sp->setname));
+
+			if (clnt_med_hostname(nm, &hostname, ep))
+				return (-1);
+
+			if (j == 0) {
+				if (strcmp(nm, hostname) != 0) {
+					Free(hostname);
+					return (mddserror(ep,
+					    MDE_DS_NOTNODENAME, sp->setno, nm,
+					    NULL, sp->setname));
+				}
+				nodename = nm;
+			} else {
+				if (strcmp(nodename, hostname) != 0) {
+					Free(hostname);
+					return (mddserror(ep,
+					    MDE_DS_ALIASNOMATCH, sp->setno, nm,
+					    nodename, sp->setname));
+				}
+			}
+			Free(hostname);
+		}
+	}
+	return (0);
+}
+
+/*
+ * Exported Entry Points
+ */
+
+int
+meta_set_addmeds(
+	mdsetname_t		*sp,
+	int			node_c,
+	char			**node_v,
+	md_error_t		*ep
+)
+{
+	md_set_desc		*sd = NULL;
+	md_drive_desc		*dd = NULL;
+	mddb_med_parm_t		mp;
+	mddb_med_upd_parm_t	mup;
+	md_h_arr_t		t;
+	md_h_arr_t		rb_t;
+	med_rec_t		medr;
+	med_rec_t		rb_medr;
+	char			*cp;
+	char			**n_l = NULL;
+	int			n_c = 0;
+	int			i, j;
+	sigset_t		oldsigs;
+	md_setkey_t		*cl_sk;
+	int			rb_level = 0;
+	md_error_t		xep = mdnullerror;
+	int			rval = 0;
+	int			max_meds;
+	md_mnnode_desc		*nd;
+	int			suspend1_flag = 0;
+	int			lock_flag = 0;
+
+	/* Initialize */
+	(void) memset(&t, '\0', sizeof (t));
+	t.n_cnt = node_c;
+	mdclrerror(ep);
+
+	if ((sd = metaget_setdesc(sp, ep)) == NULL)
+		return (-1);
+
+	/* Make sure we own the set */
+	if (meta_check_ownership(sp, ep) != 0)
+		return (-1);
+
+	if ((max_meds = get_max_meds(ep)) == 0)
+		return (-1);
+
+	/*
+	 * The mediator information (which is part of the set record) is
+	 * stored in the local mddbs of each node in the diskset.
+	 * Each node's rpc.metad daemon reads in the set
+	 * records from that node's local mddb and caches them
+	 * internally. Any process needing diskset information contacts its
+	 * local rpc.metad to get this information.  Since each node in the
+	 * diskset is independently reading the set information from its local
+	 * mddb, the set records in the local mddbs must stay
+	 * in-sync, so that all nodes have a consistent view of the diskset.
+	 *
+	 * For a multinode diskset, explicitly verify that all nodes in the
+	 * diskset are ALIVE (i.e. are in the API membership list).  Otherwise,
+	 * fail this operation since all nodes must be ALIVE in order to add
+	 * the mediator information to the set record in their local mddb.
+	 * If a panic of this node leaves the local mddbs set records
+	 * out-of-sync, the reconfig cycle will fix the local mddbs and
+	 * force them back into synchronization.
+	 */
+	if (MD_MNSET_DESC(sd)) {
+		nd = sd->sd_nodelist;
+		while (nd) {
+			if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+				(void) mddserror(ep, MDE_DS_NOTINMEMBERLIST,
+					sp->setno,
+					nd->nd_nodename, NULL, sp->setname);
+				return (-1);
+			}
+			nd = nd->nd_next;
+		}
+	}
+
+	/* Parse the command line into a the md_h_arr_t structure */
+	for (i = 0; i < t.n_cnt; i++) {
+		cp = strtok(node_v[i], ",");
+		j = 0;
+		while (cp) {
+			if (strlen(cp) > (size_t)MD_MAX_NODENAME)
+				return (mddserror(ep, MDE_DS_NODENAMETOOLONG,
+				    sp->setno, cp, NULL, sp->setname));
+			if (j >= MAX_HOST_ADDRS)
+				return (mddserror(ep, MDE_DS_TOOMANYALIAS,
+				    sp->setno, cp, NULL, sp->setname));
+
+			(void) strcpy(t.n_lst[i].a_nm[j], cp);
+
+			j++;
+
+			cp = strtok(NULL, ",");
+		}
+		t.n_lst[i].a_cnt = j;
+	}
+
+	/* Make a list of nodes to check */
+	for (i = 0; i < t.n_cnt; i++)
+		for (j = 0; j < t.n_lst[i].a_cnt; j++)
+			n_c = add_lst(&n_l, t.n_lst[i].a_nm[j]);
+
+	/* Make sure that there are no redundant nodes */
+	rval = nodesuniq(sp, n_c, n_l, ep);
+
+	(void) del_lst(&n_l);
+
+	if (rval != 0)
+		return (rval);
+
+	/*
+	 * Lock the set on current set members.
+	 * Set locking done much earlier for MN diskset than for traditional
+	 * diskset since lock_set and SUSPEND are used to protect against
+	 * other metaset commands running on the other nodes.
+	 */
+	if (MD_MNSET_DESC(sd)) {
+		/* Make sure we are blocking all signals */
+		if (procsigs(TRUE, &oldsigs, &xep) < 0)
+			mdclrerror(&xep);
+		nd = sd->sd_nodelist;
+		/* All nodes are guaranteed to be ALIVE */
+		while (nd) {
+			if (clnt_lock_set(nd->nd_nodename, sp, ep)) {
+				rval = -1;
+				goto out;
+			}
+			lock_flag = 1;
+			nd = nd->nd_next;
+		}
+		/*
+		 * Lock out other meta* commands by suspending
+		 * class 1 messages across the diskset.
+		 */
+		nd = sd->sd_nodelist;
+		/* All nodes are guaranteed to be ALIVE */
+		while (nd) {
+			if (clnt_mdcommdctl(nd->nd_nodename,
+			    COMMDCTL_SUSPEND, sp, MD_MSG_CLASS1,
+			    MD_MSCF_NO_FLAGS, ep)) {
+				rval = -1;
+				goto out;
+			}
+			suspend1_flag = 1;
+			nd = nd->nd_next;
+		}
+	}
+
+	if (validate_med_nodes(sp, &t, ep)) {
+		rval = -1;
+		goto out;
+	}
+
+	/* Check existing mediators against new, if any */
+	if (sd->sd_med.n_cnt > 0) {
+		for (i = 0; i < max_meds; i++)
+			if (sd->sd_med.n_lst[i].a_cnt > 0)
+				n_c = add_lst(&n_l,
+				    sd->sd_med.n_lst[i].a_nm[0]);
+
+		for (i = 0; i < t.n_cnt; i++) {
+			if (strinlst(t.n_lst[i].a_nm[0], n_c, n_l)) {
+				(void) del_lst(&n_l);
+				(void) mddserror(ep, MDE_DS_ISMED, sp->setno,
+				    t.n_lst[i].a_nm[0], NULL,
+				    sp->setname);
+				rval = -1;
+				goto out;
+			}
+		}
+		(void) del_lst(&n_l);
+	}
+
+	if ((t.n_cnt + sd->sd_med.n_cnt) > max_meds) {
+		(void) mderror(ep, MDE_TOOMANYMED, NULL);
+		rval = -1;
+		goto out;
+	}
+
+	/* Copy the current mediator list for rollback */
+	rb_t = sd->sd_med;			/* structure assignment */
+
+	/* Setup the mediator record roll-back structure */
+	(void) memset(&rb_medr, '\0', sizeof (med_rec_t));
+	rb_medr.med_rec_mag = MED_REC_MAGIC;
+	rb_medr.med_rec_rev = MED_REC_REV;
+	rb_medr.med_rec_fl  = 0;
+	rb_medr.med_rec_sn  = sp->setno;
+	(void) strcpy(rb_medr.med_rec_snm, sp->setname);
+	if (MD_MNSET_DESC(sd)) {
+		/*
+		 * For a MN diskset the mediator is not given a list of
+		 * hosts in the set.  Instead a generic name (multiowner) is
+		 * given to the mediator which will allow any node to access
+		 * the mediator data as long as it provides the correct
+		 * setname and set number.  In a MN diskset, the mediator
+		 * data is only used when a first node joins the diskset
+		 * and becomes the master of the MN diskset.
+		 *
+		 * The traditional diskset code keeps the host list in
+		 * the mediator record up to date with respect to the host
+		 * list in the traditional diskset.  This keeps an unauthorized
+		 * node in the traditional diskset from accessing the data
+		 * in the mediator record and being able to 'take' the
+		 * diskset.
+		 *
+		 * This additional check is needed in the traditional diskset
+		 * since a panic during the metaset command can leave
+		 * the diskset with some nodes thinking that an
+		 * action has occurred and other nodes thinking the opposite.
+		 * A node may have really been removed from a diskset, but
+		 * that node doesn't realize this so this node must be
+		 * blocked from using the mediator data when attempting
+		 * to 'take' the diskset.
+		 * (Traditional diskset code has each node's rpc.metad
+		 * cleaning up from an inconsistent state without any
+		 * knowledge from the other nodes in the diskset).
+		 *
+		 * In the MN diskset, the reconfig steps force a consistent
+		 * state across all nodes in the diskset, so no node
+		 * needs to be blocked from accessing the mediator data.
+		 * This allow the MN diskset to use a common 'nodename'
+		 * in the mediator record.  This allows the mediator
+		 * daemon to remain unchanged even though a large number of
+		 * nodes are supported by the MN diskset.
+		 */
+		(void) strlcpy(rb_medr.med_rec_nodes[0], MED_MN_CALLER,
+		    MD_MAX_NODENAME_PLUS_1);
+	} else {
+		for (i = 0; i < MD_MAXSIDES; i++)
+			(void) strcpy(rb_medr.med_rec_nodes[i],
+				sd->sd_nodes[i]);
+	}
+	rb_medr.med_rec_meds = sd->sd_med;	/* structure assigment */
+	(void) memset(&rb_medr.med_rec_data, '\0', sizeof (med_data_t));
+	rb_medr.med_rec_foff = 0;
+	crcgen(&rb_medr, &rb_medr.med_rec_cks, sizeof (med_rec_t), NULL);
+
+	/* Merge new mediators into the set record */
+	for (i = 0; i < t.n_cnt; i++) {
+		for (j = 0; j < max_meds; j++) {
+			if (sd->sd_med.n_lst[j].a_cnt > 0)
+				continue;
+			sd->sd_med.n_lst[j] = t.n_lst[i];
+			SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_ADD, SVM_TAG_MEDIATOR,
+			    sp->setno, j);
+			sd->sd_med.n_cnt++;
+			break;
+		}
+	}
+
+	/*
+	 * Setup the kernel mediator list, which also validates that the
+	 * hosts have valid IP addresses
+	 */
+	(void) memset(&mp, '\0', sizeof (mddb_med_parm_t));
+	mp.med_setno = sp->setno;
+
+	/* Copy the hostnames */
+	if (meta_h2hi(&sd->sd_med, &mp.med, ep)) {
+		rval = -1;
+		goto out;
+	}
+
+	/* Resolve the IP addresses for the host list */
+	if (meta_med_hnm2ip(&mp.med, ep)) {
+		rval = -1;
+		goto out;
+	}
+
+	/* Bring the mediator record up to date with the set record */
+	medr = rb_medr;				/* structure assignment */
+	medr.med_rec_meds = sd->sd_med;		/* structure assigment */
+	crcgen(&medr, &medr.med_rec_cks, sizeof (med_rec_t), NULL);
+
+	/* END CHECK CODE */
+
+	/* Lock the set on current set members */
+	if (!(MD_MNSET_DESC(sd))) {
+		/* all signals already blocked for MN disket */
+		md_rb_sig_handling_on();
+		for (i = 0; i < MD_MAXSIDES; i++) {
+			/* Skip empty slots */
+			if (sd->sd_nodes[i][0] == '\0')
+				continue;
+
+			if (clnt_lock_set(sd->sd_nodes[i], sp, ep)) {
+				rval = -1;
+				goto out;
+			}
+			lock_flag = 1;
+		}
+	}
+
+	RB_TEST(1, "meta_set_addmeds", ep)
+
+	RB_PREEMPT;
+	rb_level = 1;	/* level 1 */
+
+	RB_TEST(2, "meta_set_addmeds", ep)
+
+	/*
+	 * Add the new mediator information to all hosts in the set.
+	 * For MN diskset, each node sends mediator list to its kernel.
+	 */
+	if (MD_MNSET_DESC(sd)) {
+		nd = sd->sd_nodelist;
+		while (nd) {
+			/* All nodes are guaranteed to be ALIVE */
+			if (clnt_updmeds(nd->nd_nodename, sp, &sd->sd_med, ep))
+				goto rollback;
+			nd = nd->nd_next;
+		}
+	} else  {
+		for (i = 0; i < MD_MAXSIDES; i++) {
+			/* Skip empty slots */
+			if (sd->sd_nodes[i][0] == '\0')
+				continue;
+
+			if (clnt_updmeds(sd->sd_nodes[i], sp, &sd->sd_med, ep))
+				goto rollback;
+		}
+	}
+
+	RB_TEST(3, "meta_set_addmeds", ep)
+
+	RB_PREEMPT;
+	rb_level = 2;	/* level 2 */
+
+	RB_TEST(4, "meta_set_addmeds", ep)
+
+	if ((dd = metaget_drivedesc(sp, (MD_BASICNAME_OK | PRINT_FAST),
+	    ep)) == NULL) {
+		if (! mdisok(ep))
+			goto rollback;
+	}
+
+	RB_TEST(5, "meta_set_addmeds", ep)
+
+	RB_PREEMPT;
+	rb_level = 3;	/* level 3 */
+
+	RB_TEST(6, "meta_set_addmeds", ep)
+
+	/* Inform the mediator hosts of the new information */
+	for (i = 0; i < max_meds; i++) {
+		if (sd->sd_med.n_lst[i].a_cnt == 0)
+			continue;
+
+		/* medr contains new mediator node list */
+		if (clnt_med_upd_rec(&sd->sd_med.n_lst[i], sp, &medr, ep))
+			goto rollback;
+	}
+
+	RB_TEST(7, "meta_set_addmeds", ep)
+
+	RB_PREEMPT;
+	rb_level = 4;	/* level 4 */
+
+	RB_TEST(8, "meta_set_addmeds", ep)
+
+	/* In MN diskset, mediator list updated in clnt_updmeds call */
+	if (dd != NULL) {
+		if (!(MD_MNSET_DESC(sd))) {
+			if (metaioctl(MD_MED_SET_LST, &mp, &mp.med_mde,
+			    NULL) != 0) {
+				(void) mdstealerror(ep, &mp.med_mde);
+				goto rollback;
+			}
+		}
+
+		/*
+		 * If only 50% mddbs available, mediator will be
+		 * golden by this ioctl on a traditional diskset.
+		 *
+		 * On a MN disket, this only happens if the mediator
+		 * add operation is executed on the master node.
+		 * If a slave node is adding the mediator, the mediator
+		 * won't be marked golden until the next mddb change.
+		 */
+		(void) memset(&mup, '\0', sizeof (mddb_med_upd_parm_t));
+		mup.med_setno = sp->setno;
+		if (metaioctl(MD_MED_UPD_MED, &mup, &mup.med_mde, NULL) != 0)
+			mdclrerror(&mup.med_mde);
+	}
+
+out:
+	if (suspend1_flag) {
+		/*
+		 * Unlock diskset by resuming messages across the diskset.
+		 * Just resume all classes so that resume is the same whether
+		 * just one class was locked or all classes were locked.
+		 */
+		nd = sd->sd_nodelist;
+		/* All nodes are guaranteed to be ALIVE */
+		while (nd) {
+			if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_RESUME,
+			    sp, MD_MSG_CLASS0, MD_MSCF_NO_FLAGS, &xep)) {
+				if (rval == 0)
+					(void) mdstealerror(ep, &xep);
+				rval = -1;
+				mde_perror(ep, dgettext(TEXT_DOMAIN,
+				    "Unable to resume rpc.mdcommd.\n"));
+			}
+			nd = nd->nd_next;
+		}
+		meta_ping_mnset(sp->setno);
+	}
+	if (lock_flag) {
+		cl_sk = cl_get_setkey(sp->setno, sp->setname);
+		if (MD_MNSET_DESC(sd)) {
+			nd = sd->sd_nodelist;
+			while (nd) {
+				/* All nodes are guaranteed to be ALIVE */
+				if (clnt_unlock_set(nd->nd_nodename,
+				    cl_sk, &xep)) {
+					if (rval == 0)
+						(void) mdstealerror(ep, &xep);
+					rval = -1;
+				}
+				nd = nd->nd_next;
+			}
+		} else  {
+			for (i = 0; i < MD_MAXSIDES; i++) {
+				/* Skip empty slots */
+				if (sd->sd_nodes[i][0] == '\0')
+					continue;
+
+				if (clnt_unlock_set(sd->sd_nodes[i],
+				    cl_sk, &xep)) {
+					if (rval == 0)
+						(void) mdstealerror(ep, &xep);
+					rval = -1;
+				}
+			}
+		}
+		cl_set_setkey(NULL);
+	}
+
+	metafreedrivedesc(&dd);
+
+	if (MD_MNSET_DESC(sd)) {
+		/* release signals back to what they were on entry */
+		if (procsigs(FALSE, &oldsigs, &xep) < 0)
+			mdclrerror(&xep);
+	} else {
+		md_rb_sig_handling_off(md_got_sig(), md_which_sig());
+	}
+
+	return (rval);
+
+rollback:
+	/* all signals already blocked for MN disket */
+	if (!(MD_MNSET_DESC(sd))) {
+		if (procsigs(TRUE, &oldsigs, &xep) < 0)
+			mdclrerror(&xep);
+	}
+
+	rval = -1;
+
+	/*
+	 * level 4
+	 * In MN diskset, mediator list updated in clnt_updmeds call
+	 */
+	if (rb_level > 3 && (dd != NULL) && (!(MD_MNSET_DESC(sd)))) {
+		(void) memset(&mp, '\0', sizeof (mddb_med_parm_t));
+		mp.med_setno = sp->setno;
+		(void) meta_h2hi(&rb_t, &mp.med, &xep);
+		mdclrerror(&xep);
+		(void) meta_med_hnm2ip(&mp.med, &xep);
+		mdclrerror(&xep);
+		(void) metaioctl(MD_MED_SET_LST, &mp, &mp.med_mde, NULL);
+	}
+
+	/* level 3 */
+	if (rb_level > 2) {
+		for (i = 0; i < max_meds; i++) {
+			if (sd->sd_med.n_lst[i].a_cnt == 0)
+				continue;
+
+			/*
+			 * rb_medr contains the rollback mediator node list.
+			 * Send the rollback mediator information to the
+			 * new mediator node list.  If a node had this RPC
+			 * called, but its node is not in the mediator node
+			 * list, rpc.metamedd will delete the mediator
+			 * record on that node.
+			 */
+			if (clnt_med_upd_rec(&sd->sd_med.n_lst[i], sp,
+			    &rb_medr, &xep))
+				mdclrerror(&xep);
+		}
+	}
+
+	/* level 2 */
+	if (rb_level > 1) {
+		metafreedrivedesc(&dd);
+	}
+
+	/* level 1 */
+	if (rb_level > 0) {
+		/* Delete mediator information from all hosts in the set */
+		if (MD_MNSET_DESC(sd)) {
+			nd = sd->sd_nodelist;
+			while (nd) {
+				/* All nodes are guaranteed to be ALIVE */
+				if (clnt_updmeds(nd->nd_nodename, sp, &rb_t,
+				    &xep))
+					mdclrerror(&xep);
+				nd = nd->nd_next;
+			}
+		} else  {
+			for (i = 0; i < MD_MAXSIDES; i++) {
+				/* Skip empty slots */
+				if (sd->sd_nodes[i][0] == '\0')
+					continue;
+
+				if (clnt_updmeds(sd->sd_nodes[i], sp, &rb_t,
+				    &xep))
+					mdclrerror(&xep);
+			}
+		}
+	}
+
+	/* level 0 */
+	if (suspend1_flag) {
+		/*
+		 * Unlock diskset by resuming messages across the diskset.
+		 * Just resume all classes so that resume is the same whether
+		 * just one class was locked or all classes were locked.
+		 */
+		nd = sd->sd_nodelist;
+		/* All nodes are guaranteed to be ALIVE */
+		while (nd) {
+			if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_RESUME,
+			    sp, MD_MSG_CLASS0, MD_MSCF_NO_FLAGS, &xep)) {
+				mdclrerror(&xep);
+				mde_perror(ep, dgettext(TEXT_DOMAIN,
+				    "Unable to resume rpc.mdcommd.\n"));
+			}
+			nd = nd->nd_next;
+		}
+		meta_ping_mnset(sp->setno);
+	}
+	if (lock_flag) {
+		cl_sk = cl_get_setkey(sp->setno, sp->setname);
+		if (MD_MNSET_DESC(sd)) {
+			nd = sd->sd_nodelist;
+			while (nd) {
+				/* All nodes are guaranteed to be ALIVE */
+				if (clnt_unlock_set(nd->nd_nodename,
+				    cl_sk, &xep)) {
+					mdclrerror(&xep);
+				}
+				nd = nd->nd_next;
+			}
+		} else  {
+			for (i = 0; i < MD_MAXSIDES; i++) {
+				/* Skip empty slots */
+				if (sd->sd_nodes[i][0] == '\0')
+					continue;
+
+				if (clnt_unlock_set(sd->sd_nodes[i],
+				    cl_sk, &xep)) {
+					mdclrerror(&xep);
+				}
+			}
+		}
+		cl_set_setkey(NULL);
+	}
+
+	/* release signals back to what they were on entry */
+	if (procsigs(FALSE, &oldsigs, &xep) < 0)
+		mdclrerror(&xep);
+
+	if (!(MD_MNSET_DESC(sd))) {
+		md_rb_sig_handling_off(md_got_sig(), md_which_sig());
+	}
+
+	return (rval);
+}
+
+int
+meta_set_deletemeds(
+	mdsetname_t		*sp,
+	int			node_c,
+	char			**node_v,
+	int			forceflg,
+	md_error_t		*ep
+)
+{
+	md_set_desc		*sd = NULL;
+	md_drive_desc		*dd = NULL;
+	mddb_med_parm_t		mp;
+	md_h_arr_t		rb_t;
+	med_rec_t		medr;
+	med_rec_t		rb_medr;
+	int			i, j;
+	char			**n_l = NULL;
+	int			n_c = 0;
+	sigset_t		oldsigs;
+	md_setkey_t		*cl_sk;
+	int			rb_level = 0;
+	md_error_t		xep = mdnullerror;
+	int			rval = 0;
+	int			max_meds;
+	md_mnnode_desc		*nd;
+	int			suspend1_flag = 0;
+	int			lock_flag = 0;
+
+	mdclrerror(ep);
+
+	if ((sd = metaget_setdesc(sp, ep)) == NULL)
+		return (-1);
+
+	/* Make sure we own the set */
+	if (meta_check_ownership(sp, ep) != 0)
+		return (-1);
+
+	for (i = 0; i < node_c; i++)
+		if (strchr(node_v[i], ',') != NULL)
+			return (mderror(ep, MDE_ONLYNODENAME, node_v[i]));
+
+	if (nodesuniq(sp, node_c, node_v, ep))
+		return (-1);
+
+	if ((max_meds = get_max_meds(ep)) == 0)
+		return (-1);
+
+	/*
+	 * The mediator information (which is part of the set record) is
+	 * stored in the local mddbs of each node in the diskset.
+	 * Each node's rpc.metad daemon reads in the set
+	 * records from that node's local mddb and caches them
+	 * internally. Any process needing diskset information contacts its
+	 * local rpc.metad to get this information.  Since each node in the
+	 * diskset is independently reading the set information from its local
+	 * mddb, the set records in the local mddbs must stay
+	 * in-sync, so that all nodes have a consistent view of the diskset.
+	 *
+	 * For a multinode diskset, explicitly verify that all nodes in the
+	 * diskset are ALIVE (i.e. are in the API membership list).  Otherwise,
+	 * fail this operation since all nodes must be ALIVE in order to delete
+	 * the mediator information from the set record in their local mddb.
+	 * If a panic of this node leaves the local mddbs set records
+	 * out-of-sync, the reconfig cycle will fix the local mddbs and
+	 * force them back into synchronization.
+	 */
+	if (MD_MNSET_DESC(sd)) {
+		nd = sd->sd_nodelist;
+		while (nd) {
+			if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+				(void) mddserror(ep, MDE_DS_NOTINMEMBERLIST,
+					sp->setno,
+					nd->nd_nodename, NULL, sp->setname);
+				return (-1);
+			}
+			nd = nd->nd_next;
+		}
+	}
+
+	if (sd->sd_med.n_cnt == 0)
+		return (mderror(ep, MDE_NOMED, NULL));
+
+	/* Make a list of nodes to check */
+	for (i = 0; i < max_meds; i++)
+		if (sd->sd_med.n_lst[i].a_cnt > 0)
+			n_c = add_lst(&n_l, sd->sd_med.n_lst[i].a_nm[0]);
+
+	for (i = 0; i < node_c; i++) {
+		if (! strinlst(node_v[i], n_c, n_l)) {
+			(void) del_lst(&n_l);
+			return (mddserror(ep, MDE_DS_ISNOTMED, sp->setno,
+			    node_v[i], NULL, sp->setname));
+		}
+	}
+
+	(void) del_lst(&n_l);
+
+	/* Save a copy of the current mediator information */
+	rb_t = sd->sd_med;			/* structure assignment */
+
+	/* Setup the mediator record for rollback */
+	(void) memset(&rb_medr, '\0', sizeof (med_rec_t));
+	rb_medr.med_rec_mag = MED_REC_MAGIC;
+	rb_medr.med_rec_rev = MED_REC_REV;
+	rb_medr.med_rec_fl  = 0;
+	rb_medr.med_rec_sn  = sp->setno;
+	(void) strcpy(rb_medr.med_rec_snm, sp->setname);
+	if (MD_MNSET_DESC(sd)) {
+		/*
+		 * In MN diskset, use a generic nodename, multiowner, in the
+		 * mediator record which allows any node to access mediator
+		 * information.  MN diskset reconfig cycle forces consistent
+		 * view of set/node/drive/mediator information across all nodes
+		 * in the MN diskset.  This allows the relaxation of
+		 * node name checking in rpc.metamedd for MN disksets.
+		 *
+		 * In the traditional diskset, only a node that is in the
+		 * mediator record's diskset nodelist can access mediator
+		 * data.
+		 */
+		(void) strlcpy(rb_medr.med_rec_nodes[0], MED_MN_CALLER,
+		    MD_MAX_NODENAME_PLUS_1);
+	} else {
+		for (i = 0; i < MD_MAXSIDES; i++)
+			(void) strcpy(rb_medr.med_rec_nodes[i],
+				sd->sd_nodes[i]);
+	}
+	rb_medr.med_rec_meds = sd->sd_med;	/* structure assignment */
+	(void) memset(&rb_medr.med_rec_data, '\0', sizeof (med_data_t));
+	rb_medr.med_rec_foff = 0;
+	crcgen(&rb_medr, &rb_medr.med_rec_cks, sizeof (med_rec_t), NULL);
+
+	/* Delete the mediators requested from the set */
+	for (i = 0; i < node_c; i++) {
+		for (j = 0; j < max_meds; j++) {
+			if (sd->sd_med.n_lst[j].a_cnt == 0)
+				continue;
+			if (strcmp(node_v[i],
+			    sd->sd_med.n_lst[j].a_nm[0]) != 0)
+				continue;
+			SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_REMOVE,
+			    SVM_TAG_MEDIATOR, sp->setno, j);
+			(void) memset(&sd->sd_med.n_lst[j], '\0',
+			    sizeof (md_h_t));
+			sd->sd_med.n_cnt--;
+			break;
+		}
+	}
+
+	medr = rb_medr;				/* structure assignment */
+	medr.med_rec_meds = sd->sd_med;		/* structure assignment */
+	crcgen(&medr, &medr.med_rec_cks, sizeof (med_rec_t), NULL);
+
+	/* END CHECK CODE */
+
+	/* Lock the set on current set members */
+	if (MD_MNSET_DESC(sd)) {
+		/* Make sure we are blocking all signals */
+		if (procsigs(TRUE, &oldsigs, &xep) < 0)
+			mdclrerror(&xep);
+		/*
+		 * Lock the set on current set members.
+		 * lock_set and SUSPEND are used to protect against
+		 * other metaset commands running on the other nodes.
+		 */
+		nd = sd->sd_nodelist;
+		while (nd) {
+			/* All nodes are guaranteed to be ALIVE */
+			if (clnt_lock_set(nd->nd_nodename, sp, ep)) {
+				if (forceflg && strcmp(mynode(),
+				    nd->nd_nodename) != 0) {
+					mdclrerror(ep);
+					nd = nd->nd_next;
+					continue;
+				}
+				rval = -1;
+				goto out;
+			}
+			lock_flag = 1;
+			nd = nd->nd_next;
+		}
+		/*
+		 * Lock out other meta* commands by suspending
+		 * class 1 messages across the diskset.
+		 */
+		nd = sd->sd_nodelist;
+		/* All nodes are guaranteed to be ALIVE */
+		while (nd) {
+			if (clnt_mdcommdctl(nd->nd_nodename,
+			    COMMDCTL_SUSPEND, sp, MD_MSG_CLASS1,
+			    MD_MSCF_NO_FLAGS, ep)) {
+				rval = -1;
+				goto out;
+			}
+			suspend1_flag = 1;
+			nd = nd->nd_next;
+		}
+	} else  {
+		md_rb_sig_handling_on();
+		for (i = 0; i < MD_MAXSIDES; i++) {
+			/* Skip empty slots */
+			if (sd->sd_nodes[i][0] == '\0')
+				continue;
+
+			if (clnt_lock_set(sd->sd_nodes[i], sp, ep)) {
+				if (forceflg &&
+				    strcmp(mynode(), sd->sd_nodes[i]) != 0) {
+					mdclrerror(ep);
+					continue;
+				}
+				rval = -1;
+				goto out;
+			}
+			lock_flag = 1;
+		}
+	}
+
+	RB_TEST(1, "meta_set_deletemeds", ep)
+
+	RB_PREEMPT;
+	rb_level = 1;	/* level 1 */
+
+	RB_TEST(2, "meta_set_deletemeds", ep)
+
+	/* Update the mediator information on all hosts in the set */
+	if (MD_MNSET_DESC(sd)) {
+		nd = sd->sd_nodelist;
+		while (nd) {
+			/* All nodes are guaranteed to be ALIVE */
+			if (clnt_updmeds(nd->nd_nodename, sp, &sd->sd_med,
+			    ep)) {
+				if (forceflg && strcmp(mynode(),
+				    nd->nd_nodename) != 0) {
+					mdclrerror(ep);
+					continue;
+				}
+				goto rollback;
+			}
+			nd = nd->nd_next;
+		}
+	} else  {
+		for (i = 0; i < MD_MAXSIDES; i++) {
+			/* Skip empty slots */
+			if (sd->sd_nodes[i][0] == '\0')
+				continue;
+
+			if (clnt_updmeds(sd->sd_nodes[i], sp, &sd->sd_med,
+			    ep)) {
+				if (forceflg && strcmp(mynode(),
+				    sd->sd_nodes[i]) != 0) {
+					mdclrerror(ep);
+					continue;
+				}
+				goto rollback;
+			}
+		}
+	}
+
+	RB_TEST(3, "meta_set_deletemeds", ep)
+
+	RB_PREEMPT;
+	rb_level = 2;	/* level 2 */
+
+	RB_TEST(5, "meta_set_deletemeds", ep)
+
+	if ((dd = metaget_drivedesc(sp, (MD_BASICNAME_OK | PRINT_FAST),
+	    ep)) == NULL) {
+		if (! mdisok(ep))
+			goto rollback;
+	}
+
+	RB_TEST(5, "meta_set_deletemeds", ep)
+
+	RB_PREEMPT;
+	rb_level = 3;	/* level 3 */
+
+	RB_TEST(6, "meta_set_deletemeds", ep)
+
+	if (dd != NULL) {
+		/*
+		 * Set up the parameters to the call to update the
+		 * kernel mediator list
+		 */
+		(void) memset(&mp, '\0', sizeof (mddb_med_parm_t));
+		mp.med_setno = sp->setno;
+		if (meta_h2hi(&sd->sd_med, &mp.med, ep))
+			goto rollback;
+
+		/* Resolve the IP addresses for the host list */
+		if (meta_med_hnm2ip(&mp.med, ep))
+			goto rollback;
+
+		if (metaioctl(MD_MED_SET_LST, &mp, &mp.med_mde, NULL) != 0) {
+			(void) mdstealerror(ep, &mp.med_mde);
+			goto rollback;
+		}
+	}
+
+	RB_TEST(7, "meta_set_deletemeds", ep)
+
+	RB_PREEMPT;
+	rb_level = 4;	/* level 4 */
+
+	RB_TEST(8, "meta_set_deletemeds", ep)
+
+	/* Inform the mediator hosts of the new status */
+	for (i = 0; i < max_meds; i++) {
+		if (rb_t.n_lst[i].a_cnt == 0)
+			continue;
+
+		/*
+		 * medr contains the new mediator node list.
+		 * Send the new mediator information to the
+		 * new mediator node list.  If a node had this RPC
+		 * called, but its node is no longer in the new mediator
+		 * node list, rpc.metamedd will delete the mediator
+		 * record on that node.
+		 */
+		if (clnt_med_upd_rec(&rb_t.n_lst[i], sp, &medr, ep)) {
+			if ((forceflg && mdanyrpcerror(ep)) ||
+			    mdisrpcerror(ep, RPC_PROGNOTREGISTERED)) {
+				mdclrerror(ep);
+				continue;
+			}
+			goto rollback;
+		}
+	}
+
+out:
+	if (dd)
+		metafreedrivedesc(&dd);
+
+	if (suspend1_flag) {
+		/*
+		 * Unlock diskset by resuming messages across the diskset.
+		 * Just resume all classes so that resume is the same whether
+		 * just one class was locked or all classes were locked.
+		 */
+		nd = sd->sd_nodelist;
+		/* All nodes are guaranteed to be ALIVE */
+		while (nd) {
+			if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_RESUME,
+			    sp, MD_MSG_CLASS0, MD_MSCF_NO_FLAGS, &xep)) {
+				if (rval == 0)
+					(void) mdstealerror(ep, &xep);
+				rval = -1;
+				mde_perror(ep, dgettext(TEXT_DOMAIN,
+				    "Unable to resume rpc.mdcommd.\n"));
+			}
+			nd = nd->nd_next;
+		}
+		meta_ping_mnset(sp->setno);
+	}
+
+	cl_sk = cl_get_setkey(sp->setno, sp->setname);
+	if (lock_flag) {
+		if (MD_MNSET_DESC(sd)) {
+			nd = sd->sd_nodelist;
+			while (nd) {
+				/* All nodes are guaranteed to be ALIVE */
+				if (clnt_unlock_set(nd->nd_nodename,
+				    cl_sk, &xep)) {
+					if (forceflg &&
+					    strcmp(mynode(),
+					    nd->nd_nodename) != 0) {
+						mdclrerror(ep);
+						continue;
+					}
+					if (rval == 0)
+						(void) mdstealerror(ep, &xep);
+					rval = -1;
+				}
+				nd = nd->nd_next;
+			}
+		} else {
+			for (i = 0; i < MD_MAXSIDES; i++) {
+				/* Skip empty slots */
+				if (sd->sd_nodes[i][0] == '\0')
+					continue;
+
+				if (clnt_unlock_set(sd->sd_nodes[i],
+				    cl_sk, &xep)) {
+					if (forceflg &&
+					    strcmp(mynode(),
+					    sd->sd_nodes[i]) != 0) {
+						mdclrerror(ep);
+						continue;
+					}
+					if (rval == 0)
+						(void) mdstealerror(ep, &xep);
+					rval = -1;
+				}
+			}
+		}
+	}
+	cl_set_setkey(NULL);
+
+	if (MD_MNSET_DESC(sd)) {
+		/* release signals back to what they were on entry */
+		if (procsigs(FALSE, &oldsigs, &xep) < 0)
+			mdclrerror(&xep);
+	} else {
+		md_rb_sig_handling_off(md_got_sig(), md_which_sig());
+	}
+
+	return (rval);
+
+rollback:
+	/* all signals already blocked for MN disket */
+	if (!(MD_MNSET_DESC(sd))) {
+		if (procsigs(TRUE, &oldsigs, &xep) < 0)
+			mdclrerror(&xep);
+	}
+
+	rval = -1;
+
+	(void) del_lst(&n_l);
+
+	/* level 4 */
+	if (rb_level > 4) {
+		for (i = 0; i < max_meds; i++) {
+			if (rb_t.n_lst[i].a_cnt == 0)
+				continue;
+
+			/*
+			 * rb_medr contains the rollback mediator node list.
+			 * Send the rollback mediator information to the
+			 * new mediator node list.  This will recreate the
+			 * mediator record on all nodes where the mediator
+			 * record had been removed.
+			 */
+			if (clnt_med_upd_rec(&rb_t.n_lst[i], sp, &rb_medr,
+			    &xep))
+				mdclrerror(&xep);
+		}
+	}
+
+	/* level 3 */
+	if (rb_level > 2 && dd != NULL) {
+		(void) memset(&mp, '\0', sizeof (mddb_med_parm_t));
+		mp.med_setno = sp->setno;
+		(void) meta_h2hi(&rb_t, &mp.med, &xep);
+		mdclrerror(&xep);
+		(void) meta_med_hnm2ip(&mp.med, &xep);
+		mdclrerror(&xep);
+		(void) metaioctl(MD_MED_SET_LST, &mp, &mp.med_mde, NULL);
+	}
+
+	/* level 2 */
+	if (rb_level > 1) {
+		metafreedrivedesc(&dd);
+	}
+
+	/* level 1 */
+	if (rb_level > 0) {
+		/* Delete mediator information from all hosts in the set */
+		if (MD_MNSET_DESC(sd)) {
+			nd = sd->sd_nodelist;
+			while (nd) {
+				/* All nodes are guaranteed to be ALIVE */
+				if (clnt_updmeds(nd->nd_nodename, sp, &rb_t,
+				    &xep))
+					mdclrerror(&xep);
+				nd = nd->nd_next;
+			}
+		} else  {
+			for (i = 0; i < MD_MAXSIDES; i++) {
+				/* Skip empty slots */
+				if (sd->sd_nodes[i][0] == '\0')
+					continue;
+
+				if (clnt_updmeds(sd->sd_nodes[i], sp, &rb_t,
+				    &xep))
+					mdclrerror(&xep);
+			}
+		}
+	}
+
+	/* level 0 */
+	cl_sk = cl_get_setkey(sp->setno, sp->setname);
+	/* Unlock the set */
+	/* Don't test lock flag since guaranteed to be set if in rollback */
+	if (MD_MNSET_DESC(sd)) {
+		/*
+		 * Unlock diskset by resuming messages across the diskset.
+		 * Just resume all classes so that resume is the same whether
+		 * just one class was locked or all classes were locked.
+		 */
+		if (suspend1_flag) {
+			/* All nodes are guaranteed to be ALIVE */
+			nd = sd->sd_nodelist;
+			while (nd) {
+				if (clnt_mdcommdctl(nd->nd_nodename,
+				    COMMDCTL_RESUME, sp, MD_MSG_CLASS0,
+				    MD_MSCF_NO_FLAGS, &xep)) {
+				    mde_perror(&xep, dgettext(TEXT_DOMAIN,
+					"Unable to resume rpc.mdcommd.\n"));
+				    mdclrerror(&xep);
+				}
+				nd = nd->nd_next;
+			}
+			meta_ping_mnset(sp->setno);
+		}
+		nd = sd->sd_nodelist;
+		/* All nodes are guaranteed to be ALIVE */
+		while (nd) {
+			if (clnt_unlock_set(nd->nd_nodename, cl_sk, &xep))
+				mdclrerror(&xep);
+			nd = nd->nd_next;
+		}
+	} else  {
+		for (i = 0; i < MD_MAXSIDES; i++) {
+			/* Skip empty slots */
+			if (sd->sd_nodes[i][0] == '\0')
+				continue;
+
+			if (clnt_unlock_set(sd->sd_nodes[i], cl_sk, &xep))
+				mdclrerror(&xep);
+		}
+	}
+	cl_set_setkey(NULL);
+
+	/* release signals back to what they were on entry */
+	if (procsigs(FALSE, &oldsigs, &xep) < 0)
+		mdclrerror(&xep);
+
+	if (!(MD_MNSET_DESC(sd))) {
+		md_rb_sig_handling_off(md_got_sig(), md_which_sig());
+	}
+
+	return (rval);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_set_prv.c b/usr/src/lib/lvm/libmeta/common/meta_set_prv.c
new file mode 100644
index 0000000000..8b615d9af0
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_set_prv.c
@@ -0,0 +1,818 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+/*
+ * Metadevice diskset interfaces
+ */
+
+#include "meta_set_prv.h"
+#include <meta.h>
+#include <sys/lvm/md_mddb.h>
+#include <sys/cladm.h>
+#include <devid.h>
+#include <sys/lvm/md_convert.h>
+
+/*
+ * Exported Entry Points
+ */
+
+int
+checkdrive_onnode(
+	mdsetname_t	*sp,
+	mddrivename_t	*dnp,
+	char		*node,
+	md_error_t	*ep)
+{
+	time_t			mystamp, otherstamp;
+	md_dev64_t		otherdev;
+	mdname_t		*np, *remote_np;
+	mddrivename_t		*remote_dnp;
+	int			release = 0;
+	md_drive_desc		dd;
+	int			rval = 0;
+	int			ret = -1;
+	mhd_mhiargs_t		mhiargs;
+	md_set_desc		*sd;
+	int			is_efi = 0;
+	int			do_fallback = 0;
+
+	(void) memset(&mhiargs, '\0', sizeof (mhiargs));
+
+	if ((sd = metaget_setdesc(sp, ep)) == NULL)
+		return (-1);
+
+	if (meta_is_drive_in_thisset(sp, dnp, FALSE, ep)) {
+		release = 1;
+		dd.dd_next = NULL;
+		dd.dd_dbcnt = 0;
+		dd.dd_dbsize = 0;
+		dd.dd_dnp = dnp;
+		if (clnt_gtimeout(mynode(), sp, &mhiargs, ep) != 0)
+			return (-1);
+		if (!(MD_MNSET_DESC(sd)) && !MD_ATSET_DESC(sd)) {
+			if (rel_own_bydd(sp, &dd, TRUE, ep))
+				return (-1);
+		}
+	}
+	if ((np = metaslicename(dnp, MD_SLICE0, ep)) == NULL) {
+		rval = -1;
+		goto out;
+	}
+
+	/*
+	 * First try and operate assuming the other side
+	 * is running a SVM version that supports device id
+	 * in disksets i.e. is running SVM RPC version 2.
+	 *
+	 * If this call fails due to the other side running
+	 * a SVM version that does not support device id
+	 * in disksets i.e. is running SVM RPC version 1, we
+	 * fallback to the old behaviour.
+	 */
+	if ((dnp->devid != NULL) && (!(MD_MNSET_DESC(sd)))) {
+		char		*rname = NULL;
+		md_dev64_t	dev = NODEV64;
+
+		/*
+		 * If the disk is connected to the remote node then the
+		 * only thing we can be certain of is that the disk will
+		 * have the same devid on that node, it may not have the
+		 * same minor number nor the same ctd name. But if it
+		 * does have the same ctd name then use it.  In most cases
+		 * there will only be a single entry returned but if the
+		 * system has multi-path disks with MPXIO turned off there
+		 * will be multiple entries. Attempting to choose the same
+		 * name will give  the user as consistent a view across the
+		 * nodes as possible.
+		 */
+		ret = clnt_devinfo_by_devid(node, sp, dnp->devid, &dev,
+			np->rname, &rname, NULL, ep);
+
+		/*
+		 * If the return value was ENOTSUP, we know the
+		 * other side is not running a SVM version that
+		 * supports device id in disksets. We fallback
+		 * to the previous behaviour in that case.
+		 */
+		if (ret == ENOTSUP) {
+			do_fallback++;
+			goto fallback;
+		} else if (ret == -1) {
+			rval = -1;
+			goto out;
+		}
+
+		/*
+		 * If the device does not exist on the remote node then
+		 * the returned dev should indicate this (NODEV64) but
+		 * we also check to make sure the returned name is not
+		 * empty to make sure that the namespace does not get
+		 * created with a NULL/empty entry (should not be possbile
+		 * but being paranoid).
+		 */
+		if (dev == NODEV64 || rname == (char *)NULL ||
+		    strcmp(rname, "") == 0) {
+			rval = mddserror(ep, MDE_DS_DRIVENOTCOMMON, sp->setno,
+					node, dnp->cname, sp->setname);
+			goto out;
+		}
+
+		/*
+		 * The rname returned from the remote node maybe different
+		 * to the rname on this node, therefore we need to build up
+		 * a dnp for this new rname.
+		 */
+		if (strcmp(np->rname, rname) != 0) {
+			/* different rname */
+			remote_np = metaname_fast(&sp, rname, ep);
+			if (remote_np != NULL) {
+				remote_dnp = remote_np->drivenamep;
+			}
+		} else {
+			remote_dnp = dnp;
+		}
+	} else {
+		do_fallback++;
+	}
+
+fallback:
+	if (do_fallback) {
+		ret = setdevstamp(dnp, &mystamp, ep);
+		/*
+		 * Check if the disk in question is an EFI disk.
+		 */
+		if (ret == ENOTSUP)
+			is_efi++;
+		else if (ret == -1)
+			return (-1);
+
+		if ((np = metaslicename(dnp, MD_SLICE0, ep)) == NULL) {
+			rval = -1;
+			goto out;
+		}
+
+		if (is_efi) {
+			/*
+			 * For EFI disks, we compare the device
+			 * id for the disks in question.
+			 */
+			ddi_devid_t	thisdevid, otherdevid;
+			char		*encoded_otherdevid = NULL;
+			char		*encoded_thisdevid = NULL;
+
+			if (clnt_devinfo(node, sp, dnp, &otherdev, NULL, ep)
+			    == -1) {
+				rval = -1;
+				goto out;
+			}
+			if (np->dev != otherdev) {
+				rval = mddserror(ep, MDE_DS_DRIVENOTCOMMON,
+				    sp->setno, node, dnp->cname, sp->setname);
+				goto out;
+			}
+
+			if (clnt_devid(node, sp, dnp, &encoded_otherdevid,
+			    ep) == -1) {
+				rval = -1;
+				goto out;
+			}
+			if (encoded_otherdevid == NULL) {
+				rval = -1;
+				goto out;
+			}
+			if (devid_str_decode(encoded_otherdevid, &otherdevid,
+			    NULL) == 0) {
+				/*
+				 * If we are here, it means that dnp->devid
+				 * is NULL. This will typically happen if
+				 * we are dealing with SunCluster DID devices.
+				 *
+				 * We want to explicitly get the device id
+				 * for such a disk
+				 */
+				encoded_thisdevid = meta_get_devid(dnp->rname);
+				ret = devid_str_decode(encoded_thisdevid,
+				    &thisdevid, NULL);
+				if (ret == 0) {
+					ret = devid_compare(thisdevid,
+					    otherdevid);
+					devid_free(thisdevid);
+				}
+				devid_free(otherdevid);
+				if (encoded_thisdevid)
+					Free(encoded_thisdevid);
+			}
+
+			Free(encoded_otherdevid);
+			if (ret != 0) {
+				rval = mddserror(ep, MDE_DS_DRIVENOTCOMMON,
+				    sp->setno, node, dnp->cname, sp->setname);
+				goto out;
+			}
+		} else {
+			/*
+			 * For VTOC disks, we compare the dev_t and
+			 * timestamp for the disks in question.
+			 */
+			if (clnt_devinfo(node, sp, dnp, &otherdev,
+			    &otherstamp, ep) == -1) {
+				rval = -1;
+				goto out;
+			}
+			if ((mystamp != otherstamp) || (np->dev != otherdev)) {
+				rval = mddserror(ep, MDE_DS_DRIVENOTCOMMON,
+				    sp->setno, node, dnp->cname, sp->setname);
+				goto out;
+			}
+		}
+		remote_dnp = dnp;
+	}
+
+	if (clnt_drvused(node, sp, remote_dnp, ep) == -1)
+		rval = -1;
+
+out:
+	if (release)
+		if (!(MD_MNSET_DESC(sd)) && !MD_ATSET_DESC(sd)) {
+			if (tk_own_bydd(sp, &dd, &mhiargs, TRUE, ep))
+				rval = -1;
+		}
+
+	return (rval);
+}
+
+side_t
+getnodeside(char *node, md_set_desc *sd)
+{
+	side_t			sideno;
+	int			nid;
+	md_mnnode_desc		*nd;
+
+	if (MD_MNSET_DESC(sd)) {
+		nd = sd->sd_nodelist;
+		while (nd) {
+			if (strcmp(nd->nd_nodename, node) == 0) {
+				return (nd->nd_nodeid);
+			}
+			nd = nd->nd_next;
+		}
+		return (MD_SIDEWILD);
+	}
+
+
+	/* If regular diskset */
+	for (sideno = 0; sideno < MD_MAXSIDES; sideno++) {
+		if (sd->sd_nodes[sideno] == NULL ||
+		    sd->sd_nodes[sideno][0] == '\0')
+			continue;
+
+		if (strcmp(sd->sd_nodes[sideno], node) == 0) {
+			return (sideno);
+		}
+	}
+
+	/*
+	 * If the first loop fails we may be in a situation where this host
+	 * is configured as part of a cluster yet not running in the cluster
+	 * mode. If so, the names stored in sd->sd_nodes[] are going to be
+	 * nodeid's instead of hostnames. See if we can find a match that way.
+	 */
+	if (_cladm(CL_CONFIG, CL_NODEID, &nid) == 0) {
+		for (sideno = 0; sideno < MD_MAXSIDES; sideno++) {
+			if (sd->sd_nodes[sideno] == NULL ||
+			    sd->sd_nodes[sideno][0] == '\0')
+				continue;
+			if (atoi(sd->sd_nodes[sideno]) == nid)
+				return (sideno);
+		}
+	}
+
+	return (MD_SIDEWILD);
+}
+
+int
+halt_set(mdsetname_t *sp, md_error_t *ep)
+{
+	mddb_config_t	c;
+
+	(void) memset(&c, 0, sizeof (c));
+	c.c_setno = sp->setno;
+	if ((c.c_sideno = getmyside(sp, ep)) == MD_SIDEWILD)
+		return (-1);
+
+	if (s_ownset(sp->setno, ep) == MD_SETOWNER_YES) {
+		/* Don't need device id information from this ioctl */
+		c.c_locator.l_devid = (uint64_t)0;
+		c.c_locator.l_devid_flags = 0;
+		/* Kill any resyncs that are running on mirrors in this set */
+		meta_mirror_resync_kill(sp);
+		if (metaioctl(MD_RELEASE_SET, &c, &c.c_mde, NULL) != 0)
+			return (mdstealerror(ep, &c.c_mde));
+	}
+
+	return (0);
+}
+
+md_drive_desc *
+metadrivedesc_append(
+	md_drive_desc	**dd,
+	mddrivename_t	*dnp,
+	int		dbcnt,
+	int		dbsize,
+	md_timeval32_t	timestamp,
+	ulong_t		genid,
+	uint_t		flags
+)
+{
+	md_drive_desc	*p;
+
+	/* run to end of list */
+	for (/* void */; (*dd != NULL); dd = &(*dd)->dd_next)
+		/* void */;
+
+	/* allocate new list element */
+	p = *dd = Zalloc(sizeof (*p));
+
+	p->dd_dnp = dnp;
+	p->dd_dbcnt = dbcnt;
+	p->dd_dbsize = dbsize;
+	p->dd_ctime = timestamp;
+	p->dd_genid = genid;
+	p->dd_flags = flags;
+	return (p);
+}
+
+int
+nodehasset(
+	mdsetname_t	*sp,
+	char		*node,
+	uint_t		match_flag,
+	md_error_t	*ep
+)
+{
+	md_set_desc	*sd;
+	md_set_record	*sr;
+	int		rval = 0;
+
+	if ((sd = metaget_setdesc(sp, ep)) == NULL)
+		return (-1);
+
+	/* Don't care if set record is MN or not */
+	if (clnt_getset(node, sp->setname, MD_SET_BAD, &sr, ep))
+		return (-1);
+
+	if (sr == NULL) {
+		if (! mdisok(ep))
+			return (-1);
+		return (0);
+	}
+
+	/* Looking for name only match */
+	if ((match_flag & NHS_N_EQ) == NHS_N_EQ) {
+		rval = 1;
+		goto out;
+	}
+
+	if (sd->sd_setno != sr->sr_setno)
+		goto out;
+
+	/* Looking for name and setno match */
+	if ((match_flag & NHS_NS_EQ) == NHS_NS_EQ) {
+		rval = 1;
+		goto out;
+	}
+
+	if (sd->sd_ctime.tv_sec != sr->sr_ctime.tv_sec ||
+	    sd->sd_ctime.tv_usec != sr->sr_ctime.tv_usec)
+		goto out;
+
+	/* Looking for name, setno, and timestamp match */
+	if ((match_flag & NHS_NST_EQ) == NHS_NST_EQ) {
+		rval = 1;
+		goto out;
+	}
+
+	if (sd->sd_genid != sr->sr_genid) {
+		if (sd->sd_genid < sr->sr_genid) {
+			/*
+			 * Looking for name, setno, timestamp, and genid on
+			 * other host is GT than other host.
+			 */
+			if ((match_flag & NHS_NST_EQ_G_GT) == NHS_NST_EQ_G_GT) {
+				rval = 1;
+				goto out;
+			}
+		}
+		goto out;
+	}
+
+	/* Looking for name, setno, timestamp, and genid match */
+	if ((match_flag & NHS_NSTG_EQ) == NHS_NSTG_EQ)
+		rval = 1;
+
+out:
+	/*
+	 * Set record structure was allocated from RPC routine getset
+	 * so this structure is only of size md_set_record even if
+	 * the MN flag is set.  So, clear the flag so that the free
+	 * code doesn't attempt to free a structure the size of
+	 * md_mnset_record.
+	 */
+	sr->sr_flags &= ~MD_SR_MN;
+	free_sr(sr);
+
+	return (rval);
+}
+
+int
+nodesuniq(mdsetname_t *sp, int cnt, char **strings, md_error_t *ep)
+{
+	int i, j;
+	for (i = 0; i < cnt; i++)
+		for (j = i + 1; j < cnt; j++)
+			if (strcmp(strings[i], strings[j]) == 0)
+				return (mddserror(ep, MDE_DS_DUPHOST,
+				    sp->setno, strings[i], NULL, sp->setname));
+	return (0);
+}
+
+int
+own_set(mdsetname_t *sp, char **owner_of_set, int forceflg, md_error_t *ep)
+{
+	md_set_desc		*sd;
+	int			am_i_owner;
+	int			i;
+
+	if (metaislocalset(sp)) {
+		if (owner_of_set != NULL)
+			*owner_of_set = Strdup(mynode());
+		return (MD_SETOWNER_YES);
+	}
+
+	if ((sd = metaget_setdesc(sp, ep)) == NULL)
+		return (-1);
+
+	if (clnt_ownset(mynode(), sp, &am_i_owner, ep) == -1)
+		return (-1);
+
+	if (MD_MNSET_DESC(sd)) {
+		if (am_i_owner == TRUE)
+			return (MD_SETOWNER_YES);
+		else
+			return (MD_SETOWNER_NO);
+	}
+
+	if (forceflg == TRUE) {
+		if (am_i_owner == TRUE) {
+			if (owner_of_set != NULL)
+				*owner_of_set = Strdup(mynode());
+			return (MD_SETOWNER_YES);
+		}
+
+		if (owner_of_set != NULL)
+			*owner_of_set = NULL;
+		return (MD_SETOWNER_NONE);
+	}
+
+	if (am_i_owner == TRUE) {
+		if (owner_of_set != NULL)
+			*owner_of_set = Strdup(mynode());
+		return (MD_SETOWNER_YES);
+	}
+
+
+	for (i = 0; i < MD_MAXSIDES; i++) {
+		/*
+		 * Skip empty slots, and my own slot.
+		 */
+		if (sd->sd_nodes[i][0] == '\0' ||
+		    strcmp(sd->sd_nodes[i], mynode()) == 0)
+			continue;
+
+		if (clnt_ownset(sd->sd_nodes[i], sp, &am_i_owner, ep) == -1)
+			return (-1);
+
+		if (am_i_owner == TRUE) {
+			if (owner_of_set != NULL)
+				*owner_of_set = Strdup(sd->sd_nodes[i]);
+			return (MD_SETOWNER_NO);
+		}
+	}
+
+	/* We get here, we currently have no owner. */
+	if (owner_of_set != NULL)
+		*owner_of_set = NULL;
+	return (MD_SETOWNER_NONE);
+}
+
+void
+resync_genid(
+	mdsetname_t		*sp,
+	md_set_desc		*sd,
+	ulong_t			max_genid,
+	int			node_c,
+	char			**node_v
+)
+{
+	int			i, j;
+	ulong_t			cur_genid[MD_MAXSIDES];
+	md_set_record		*sr;
+	md_error_t		xep = mdnullerror;
+	md_mnnode_desc		*nd;
+	md_mnset_record		*mnsr;
+
+	if (node_c > 0 && node_v && *node_v) {
+		/*
+		 * Mark the set record MD_SR_OK.
+		 */
+		for (i = 0; i < node_c; i++)
+			if (clnt_upd_sr_flags(node_v[i], sp, MD_SR_OK, &xep))
+				mdclrerror(&xep);
+		max_genid++;
+	}
+
+	if (MD_MNSET_DESC(sd)) {
+		nd = sd->sd_nodelist;
+		while (nd) {
+			if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+				nd = nd->nd_next;
+				continue;
+			}
+			/* Will only return a multi-node diskset record */
+			if (clnt_mngetset(nd->nd_nodename, sp->setname,
+			    MD_SET_BAD, &mnsr, &xep) == -1) {
+				mdclrerror(&xep);
+				nd = nd->nd_next;
+				continue;
+			}
+			for (j = mnsr->sr_genid; j < max_genid; j++) {
+				if (clnt_upd_sr_flags(nd->nd_nodename, sp,
+				    MD_SR_OK, &xep))
+					mdclrerror(&xep);
+			}
+			free_sr((struct md_set_record *)mnsr);
+			nd = nd->nd_next;
+		}
+		return;
+	}
+
+	/*
+	 * Get current genid for each node.
+	 */
+	for (i = 0; i < MD_MAXSIDES; i++) {
+		cur_genid[i] = 0;
+
+		/* Skip empty slots */
+		if (sd->sd_nodes[i][0] == '\0')
+			continue;
+
+		/* Should be a non-multinode diskset */
+		if (clnt_getset(sd->sd_nodes[i], sp->setname,
+		    MD_SET_BAD, &sr, &xep) == -1) {
+			mdclrerror(&xep);
+			continue;
+		}
+
+		if (MD_MNSET_REC(sr)) {
+			/*
+			 * Set record structure was allocated from RPC routine
+			 * getset so this structure is only of size
+			 * md_set_record even if the MN flag is set.  So,
+			 * clear the flag so that the free code doesn't
+			 * attempt to free a structure the size of
+			 * md_mnset_record.
+			 */
+			sr->sr_flags &= ~MD_SR_MN;
+			free_sr(sr);
+			continue;
+		}
+
+		cur_genid[i] = sr->sr_genid;
+
+		free_sr(sr);
+	}
+
+	/*
+	 * Mark the set record MD_SR_OK
+	 */
+	for (i = 0; i < MD_MAXSIDES; i++) {
+		/* Skip empty slots */
+		if (sd->sd_nodes[i][0] == '\0')
+			continue;
+
+		for (j = cur_genid[i]; j < max_genid; j++)
+			if (clnt_upd_sr_flags(sd->sd_nodes[i], sp, MD_SR_OK,
+			    &xep))
+				mdclrerror(&xep);
+
+	}
+}
+
+int
+setup_db_bydd(mdsetname_t *sp, md_drive_desc *dd, int force, md_error_t *ep)
+{
+	md_drive_desc		*p;
+	struct mddb_config	c;
+	int			i;
+	md_set_desc		*sd;
+	int			use_devid = 1;
+	ddi_devid_t		devidp;
+	char			*minor_name = NULL;
+	size_t			sz;
+	char			*devid_str = NULL;
+
+	if ((sd = metaget_setdesc(sp, ep)) == NULL)
+		return (-1);
+
+	(void) memset(&c, 0, sizeof (c));
+
+	c.c_setno = sp->setno;
+	(void) strcpy(c.c_setname, sp->setname);
+	if ((c.c_sideno = getmyside(sp, ep)) == MD_SIDEWILD)
+		return (-1);
+
+	c.c_timestamp = sd->sd_ctime;
+
+	if (setup_med_cfg(sp, &c, force, ep))
+		return (-1);
+
+	for (p = dd; p != NULL; p = p->dd_next) {
+		mddrivename_t	*dnp;
+		mdname_t	*np;
+		mdcinfo_t	*cinfo;
+		mdsidenames_t	*sn = NULL;
+
+		if (p->dd_dbcnt == 0)
+			continue;
+
+		dnp = p->dd_dnp;
+
+		assert(dnp != NULL);
+
+		for (sn = dnp->side_names; sn != NULL; sn = sn->next) {
+			if (sn->sideno == c.c_sideno)
+				break;
+		}
+
+		/*
+		 * The disk has no side name information
+		 */
+		if (sn == NULL) {
+			uint_t	rep_slice;
+
+			if ((meta_replicaslice(dnp, &rep_slice, ep) != 0) ||
+			    ((np = metaslicename(dnp, rep_slice, ep))
+				== NULL)) {
+				mdclrerror(ep);
+				continue;
+			}
+
+			if (np->dev == NODEV64)
+				continue;
+
+			c.c_locator.l_dev = meta_cmpldev(np->dev);
+			c.c_locator.l_mnum = meta_getminor(np->dev);
+
+			if (!MD_MNSET_DESC(sd)) {
+				/*
+				 * minor_name will be NULL if dnp->devid == NULL
+				 * - see metagetvtoc()
+				 */
+				if (np->minor_name != NULL) {
+					minor_name = Strdup(np->minor_name);
+				}
+			}
+
+			if ((cinfo = metagetcinfo(np, ep)) == NULL) {
+				mdclrerror(ep);
+				continue;
+			}
+
+			(void) strncpy(c.c_locator.l_driver, cinfo->dname,
+			    sizeof (c.c_locator.l_driver));
+		} else {
+			c.c_locator.l_dev = NODEV32;
+			c.c_locator.l_mnum = sn->mnum;
+			(void) strncpy(c.c_locator.l_driver, sn->dname,
+			    sizeof (c.c_locator.l_driver));
+
+			if (!MD_MNSET_DESC(sd)) {
+				if (dnp->devid != NULL) {
+					minor_name = meta_getdidminorbykey(
+					    MD_LOCAL_SET, sn->sideno + SKEW,
+					    dnp->side_names_key, ep);
+				}
+			}
+		}
+
+		if ((dnp->devid == NULL) || MD_MNSET_DESC(sd)) {
+			use_devid = 0;
+		}
+
+		if (use_devid) {
+			/*
+			 * The devid associated with the dnp does not have
+			 * a minor name and so we must add it in.
+			 */
+			size_t	len = strlen(dnp->devid) +
+			    strlen(minor_name) + 2;
+			devid_str = (char *)Malloc(len);
+			(void) snprintf(devid_str, len, "%s/%s", dnp->devid,
+			    minor_name);
+			(void) devid_str_decode(devid_str, &devidp, NULL);
+
+			sz = devid_sizeof(devidp);
+			c.c_locator.l_devid = (uintptr_t)malloc(sz);
+			c.c_locator.l_devid_sz = sz;
+			(void) memcpy((void *)c.c_locator.l_devid, devidp, sz);
+			if (minor_name == NULL) {
+				/* ERROR fix up */
+				Free(devid_str);
+				return (-1);
+			}
+			(void) strcpy(c.c_locator.l_minor_name, minor_name);
+			c.c_locator.l_devid_flags = MDDB_DEVID_VALID |
+			    MDDB_DEVID_SPACE | MDDB_DEVID_SZ;
+		} else {
+			/*
+			 * Don't need device id information from
+			 * this ioctl
+			 */
+			c.c_locator.l_devid = (uint64_t)0;
+			c.c_locator.l_devid_flags = 0;
+		}
+
+
+		for (i = 0; i < p->dd_dbcnt; i++) {
+			c.c_locator.l_flags = 0;
+			c.c_locator.l_blkno = 16 + i * p->dd_dbsize;
+
+			if (metaioctl(MD_DB_USEDEV, &c, &c.c_mde, NULL) != 0) {
+				if (use_devid) {
+					Free(devid_str);
+				}
+				Free(minor_name);
+				return (mdstealerror(ep, &c.c_mde));
+			}
+		}
+		if (use_devid) {
+			Free(devid_str);
+		}
+		Free(minor_name);
+	}
+
+	/* return success */
+	return (0);
+}
+
+int
+snarf_set(mdsetname_t *sp, bool_t stale_bool, md_error_t *ep)
+{
+	mddb_config_t	c;
+
+	(void) memset(&c, '\0', sizeof (c));
+
+	c.c_setno = sp->setno;
+	if ((c.c_sideno = getmyside(sp, ep)) == MD_SIDEWILD)
+		return (-1);
+
+	/* Don't need device id information from this ioctl */
+	c.c_locator.l_devid = (uint64_t)0;
+	c.c_locator.l_devid_flags = 0;
+	if (stale_bool == TRUE) {
+		c.c_flags = MDDB_C_STALE;
+	}
+	if (metaioctl(MD_GRAB_SET, &c, &c.c_mde, NULL) != 0)
+		return (mdstealerror(ep, &c.c_mde));
+
+	if (c.c_flags & MDDB_C_STALE)
+		return (mdmddberror(ep, MDE_DB_STALE, NODEV64, sp->setno,
+		    0, NULL));
+
+	return (0);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_set_tkr.c b/usr/src/lib/lvm/libmeta/common/meta_set_tkr.c
new file mode 100644
index 0000000000..b13c483af0
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_set_tkr.c
@@ -0,0 +1,1079 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+/*
+ * Metadevice diskset interfaces
+ */
+
+#include "meta_set_prv.h"
+#include <sys/lvm/md_crc.h>
+
+
+static int
+upd_dr_dbinfo(
+	mdsetname_t		*sp,
+	md_set_desc		*sd,
+	md_drive_desc		*dd,
+	md_replicalist_t	*rlp,
+	int			forceflg,
+	md_error_t		*ep
+)
+{
+	md_drive_desc		*p;
+	md_replica_t		*r;
+	md_replicalist_t	*rl;
+	int			i;
+	int			dbcnt;
+	int			rval = 0;
+	daddr_t			nblks = 0;
+	md_setkey_t		*cl_sk;
+	md_error_t		xep = mdnullerror;
+	md_mnnode_desc		*nd;
+	ddi_devid_t		devid;
+
+	/* find the smallest existing replica */
+	for (rl = rlp; rl != NULL; rl = rl->rl_next) {
+		r = rl->rl_repp;
+		nblks = ((nblks == 0) ? r->r_nblk : min(r->r_nblk, nblks));
+	}
+
+	if (nblks <= 0)
+		nblks = (MD_MNSET_DESC(sd)) ? MD_MN_DBSIZE : MD_DBSIZE;
+
+	for (p = dd; p != NULL; p = p->dd_next) {
+		dbcnt = 0;
+		for (rl = rlp; rl != NULL; rl = rl->rl_next) {
+			r = rl->rl_repp;
+
+			/*
+			 * Before we bump up the dbcnt, if we're
+			 * running with device ids in disksets, let's
+			 * compare the device ids otherwise we compare
+			 * the ctd names.
+			 *
+			 * There is a possibility the device ids might
+			 * have changed. To account for that case, we
+			 * fallback to comparing the ctd names if the
+			 * device id comparison fails. If we aren't running
+			 * in device id mode and a disk has moved, the ctd's
+			 * won't match.
+			 */
+			if ((p->dd_dnp->devid != NULL) &&
+			    (r->r_devid != NULL) && (!MD_MNSET_DESC(sd))) {
+				(void) devid_str_decode(p->dd_dnp->devid,
+				    &devid, NULL);
+				if ((devid_compare(devid, r->r_devid) == 0) ||
+				    (strcmp(r->r_namep->drivenamep->cname,
+				    p->dd_dnp->cname) == 0))
+					dbcnt++;
+				devid_free(devid);
+			} else {
+				if (strcmp(r->r_namep->drivenamep->cname,
+				    p->dd_dnp->cname) == 0)
+					dbcnt++;
+			}
+		}
+		p->dd_dbcnt = dbcnt;
+		p->dd_dbsize = dbcnt > 0 ? nblks : 0;
+	}
+
+	/* Lock the set on current set members */
+	if (MD_MNSET_DESC(sd)) {
+		nd = sd->sd_nodelist;
+		while (nd) {
+			/* If this is forced, don't lock other sides */
+			if (forceflg && strcmp(mynode(), nd->nd_nodename)
+			    != 0) {
+				nd = nd->nd_next;
+				continue;
+			}
+
+			/* We already locked this side in the caller */
+			if (strcmp(mynode(), nd->nd_nodename) == 0) {
+				nd = nd->nd_next;
+				continue;
+			}
+
+			if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+				nd = nd->nd_next;
+				continue;
+			}
+
+			if (clnt_lock_set(nd->nd_nodename, sp, ep)) {
+				rval = -1;
+				goto out;
+			}
+			nd = nd->nd_next;
+		}
+	} else {
+		for (i = 0; i < MD_MAXSIDES; i++) {
+			/* Skip empty slots */
+			if (sd->sd_nodes[i][0] == '\0')
+				continue;
+
+			/* If this is forced, don't lock other sides */
+			if (forceflg && strcmp(mynode(), sd->sd_nodes[i]) != 0)
+				continue;
+
+			/* We already locked this side in the caller */
+			if (strcmp(mynode(), sd->sd_nodes[i]) == 0)
+				continue;
+
+			if (clnt_lock_set(sd->sd_nodes[i], sp, ep)) {
+				rval = -1;
+				goto out;
+			}
+		}
+	}
+
+	if (MD_MNSET_DESC(sd)) {
+		nd = sd->sd_nodelist;
+		while (nd) {
+			/* If this is forced, then only care about this node */
+			if (forceflg && strcmp(mynode(), nd->nd_nodename)
+			    != 0) {
+				nd = nd->nd_next;
+				continue;
+			}
+
+			if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+				nd = nd->nd_next;
+				continue;
+			}
+
+			if (clnt_upd_dr_dbinfo(nd->nd_nodename, sp, dd,
+			    ep) == -1) {
+				if (! mdiserror(ep, MDE_NO_SET) &&
+				    ! mdismddberror(ep, MDE_DB_NODB)) {
+					rval = -1;
+					break;
+				}
+				mdclrerror(ep);
+			}
+			nd = nd->nd_next;
+		}
+	} else {
+		for (i = 0; i < MD_MAXSIDES; i++) {
+			/* Skip empty slots */
+			if (sd->sd_nodes[i][0] == '\0')
+				continue;
+
+			/* If this is forced, then only care about this node */
+			if (forceflg && strcmp(mynode(), sd->sd_nodes[i]) != 0)
+				continue;
+
+			if (clnt_upd_dr_dbinfo(sd->sd_nodes[i], sp, dd,
+			    ep) == -1) {
+				if (! mdiserror(ep, MDE_NO_SET) &&
+				    ! mdismddberror(ep, MDE_DB_NODB)) {
+					rval = -1;
+					break;
+				}
+				mdclrerror(ep);
+			}
+		}
+	}
+
+out:
+	cl_sk = cl_get_setkey(sp->setno, sp->setname);
+	if (MD_MNSET_DESC(sd)) {
+		nd = sd->sd_nodelist;
+		while (nd) {
+			/* If this is forced, don't unlock other sides */
+			if (forceflg && strcmp(mynode(), nd->nd_nodename)
+			    != 0) {
+				nd = nd->nd_next;
+				continue;
+			}
+
+			/* We will unlocked this side in the caller */
+			if (strcmp(mynode(), nd->nd_nodename) == 0) {
+				nd = nd->nd_next;
+				continue;
+			}
+
+			if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
+				nd = nd->nd_next;
+				continue;
+			}
+
+			if (clnt_unlock_set(nd->nd_nodename, cl_sk, &xep)) {
+				if (rval == 0)
+					(void) mdstealerror(ep, &xep);
+				rval = -1;
+			}
+			nd = nd->nd_next;
+		}
+	} else {
+		for (i = 0; i < MD_MAXSIDES; i++) {
+			/* Skip empty slots */
+			if (sd->sd_nodes[i][0] == '\0')
+				continue;
+
+			/* If this is forced, don't unlock other sides */
+			if (forceflg && strcmp(mynode(), sd->sd_nodes[i]) != 0)
+				continue;
+
+			/* We will unlocked this side in the caller */
+			if (strcmp(mynode(), sd->sd_nodes[i]) == 0)
+				continue;
+
+			if (clnt_unlock_set(sd->sd_nodes[i], cl_sk, &xep)) {
+				if (rval == 0)
+					(void) mdstealerror(ep, &xep);
+				rval = -1;
+			}
+		}
+	}
+	/* Do not clear the key, via cl_set_setkey(NULL) this is nested */
+
+	return (rval);
+}
+
+static int
+usetag_take(set_t setno, int usetag, md_error_t *ep)
+{
+	mddb_dtag_use_parm_t	dtup;
+
+	(void) memset(&dtup, '\0', sizeof (mddb_dtag_use_parm_t));
+	dtup.dtup_id = usetag;
+	dtup.dtup_setno = setno;
+
+	if (metaioctl(MD_MED_USE_TAG, &dtup, &dtup.dtup_mde, NULL) != 0)
+		return (mdstealerror(ep, &dtup.dtup_mde));
+
+	return (0);
+}
+
+static int
+useit_take(set_t setno, md_error_t *ep)
+{
+	mddb_accept_parm_t	accp;
+
+	(void) memset(&accp, '\0', sizeof (mddb_accept_parm_t));
+	accp.accp_setno = setno;
+
+	if (metaioctl(MD_MED_ACCEPT, &accp, &accp.accp_mde, NULL) != 0)
+		return (mdstealerror(ep, &accp.accp_mde));
+
+	return (0);
+}
+
+/*
+ * Update the master block with the device id information for the disks
+ * in the diskset. The device id information will be consumed by the
+ * diskset import code in case of remotely replicated disksets.
+ *
+ * For the drives that have a valid diskset mddb on them, we add the
+ * device id for the drive to the unused portion of the mddb.
+ *
+ * For the drives that don't have a diskset mddb on them, we add a dummy
+ * master block that contains the device id for the drive. A dummy master
+ * block is signified by changing the master block magic number, mb_magic,
+ * to MDDB_MAGIC_DU.
+ *
+ * This code is responsible primarily for adding the appropriate device id
+ * information to diskset disks that didn't have the information. This would
+ * typically occur when the OS has been upgraded from an OS release prior to
+ * Solaris 10
+ *
+ * The error path in this routine is defined as - if an error occurs while
+ * updating the mddb for one disk in the diskset, don't bother updating *any*
+ * of the mddbs because it's game over anyways as far as disaster recovery for
+ * that diskset is concerned.
+ *
+ * This code will need to be revisited if and when support for importing
+ * partial disksets is added.
+ *
+ * NOTE: This code relies heavily on the meta_repartition() working correctly
+ * and reformatting a drive, so that there's enough room for a dummy master
+ * block, every time a drive is added to a diskset. Should
+ * the meta_repartition() code change in future, this code will have to be
+ * revisited.
+ *
+ * Returns 0 on success and -1 on failure
+ */
+int
+meta_update_mb(mdsetname_t *sp, md_drive_desc *drivedesc, md_error_t *ep)
+{
+	uint_t			sliceno, offset;
+	void			*mb;
+	mddb_mb_t		*mbp;
+	int			fd = -1;
+	ddi_devid_t		devid = NULL;
+	md_drive_desc		*dd;
+	mddrivename_t		*dnp;
+	mdname_t		*rsp;
+	int			dbcnt;
+	int			dbsize;
+	size_t 			len;
+	md_set_desc		*sd;
+
+	/*
+	 * Don't do anything for MN diskset for now.
+	 */
+	if (! metaislocalset(sp)) {
+		if ((sd = metaget_setdesc(sp, ep)) == NULL)
+			return (-1);
+
+		if (MD_MNSET_DESC(sd))
+			return (0);
+	}
+
+	mb = Malloc(DEV_BSIZE);
+	mbp = (mddb_mb_t *)mb;
+
+	/*
+	 * For every drive in the drive descriptor, iterate through all
+	 * the mddbs present on it and check to see if mb_devid_magic is
+	 * set. If it isn't, then update the master block with the correct
+	 * device id information
+	 */
+	for (dd = drivedesc; dd != NULL; dd = dd->dd_next) {
+		int i = 0;
+
+		dnp = dd->dd_dnp;
+		dbcnt = dd->dd_dbcnt;
+		dbsize = dd->dd_dbsize;
+
+		/*
+		 * When the import support for remotely replicated
+		 * disksets gets implemented, we probably want to
+		 * inform the user that the disks won't be self
+		 * identifying if any of these calls fails
+		 */
+		if (meta_replicaslice(dnp, &sliceno, ep) != 0)
+			return (-1);
+
+		if ((rsp = metaslicename(dnp, sliceno, ep)) == NULL)
+			return (-1);
+
+		if ((fd = open(rsp->rname, O_RDWR)) < 0)
+			goto cleanup;
+
+		/* if devid_str_decode fails, make sure devid is null */
+		if (devid_str_decode(dnp->devid, &devid, NULL) != 0) {
+			devid = NULL;
+		}
+
+		do {
+			int push = 0;
+
+			offset = (i * dbsize + 16);
+			++i;
+
+			if (lseek(fd, (off_t)dbtob(offset), SEEK_SET) < 0)
+				goto cleanup;
+
+			if (read(fd, mbp, DEV_BSIZE) != DEV_BSIZE)
+				goto cleanup;
+
+			if (crcchk((uchar_t *)mbp, (uint_t *)&mbp->mb_checksum,
+			    (uint_t)DEV_BSIZE, (crc_skip_t *)NULL))
+				goto cleanup;
+
+			/*
+			 * If the disk is one of the ones that doesn't
+			 * have a shared mddb on it, we put a dummy
+			 * master block on it.
+			 */
+			if (mbp->mb_devid_magic != MDDB_MAGIC_DE) {
+				if (dbcnt == 0) {
+					meta_mkdummymaster(sp, fd, 16);
+					break;
+				}
+			}
+
+			/*
+			 * if mb_setcreatetime is 0, this field was never
+			 * filled in so do it now.
+			 */
+			if ((mbp->mb_setcreatetime.tv_sec == 0) &&
+			    (mbp->mb_setcreatetime.tv_usec == 0)) {
+				mbp->mb_setcreatetime =
+				    meta_get_lb_inittime(sp, ep);
+				push = 1;
+			}
+
+			/*
+			 * If MDDB_MAGIC_DE is set in the
+			 * mb_devid_magic field then we know we
+			 * have a valid device id and we don't
+			 * need to add it to the master block.
+			 *
+			 * This would have to be revisited if device
+			 * ids change as a result of device id
+			 * algorithms changing or somesuch.
+			 */
+			if (mbp->mb_devid_magic != MDDB_MAGIC_DE) {
+				if (devid != NULL) {
+					len = devid_sizeof(devid);
+					if (len <= (DEV_BSIZE -
+					    sizeof (mddb_mb_t))) {
+						/*
+						 * there's enough space to
+						 * store the devid
+						 */
+						mbp->mb_devid_magic =
+						    MDDB_MAGIC_DE;
+						mbp->mb_devid_len = len;
+						(void) memcpy(mbp->mb_devid,
+						    (char *)devid, len);
+						push = 1;
+					}
+				}
+			}
+
+			/*
+			 * write out (push) any changes we have to the mb
+			 */
+			if (push) {
+				crcgen((uchar_t *)mbp,
+				    (uint_t *)&mbp->mb_checksum,
+				    (uint_t)DEV_BSIZE, (crc_skip_t *)NULL);
+
+				if (lseek(fd, (off_t)dbtob(offset), SEEK_SET)
+				    < 0)
+					goto cleanup;
+
+				if (write(fd, mbp, DEV_BSIZE) != DEV_BSIZE)
+					goto cleanup;
+			}
+			if (devid)
+				devid_free(devid);
+		} while (i < dbcnt);
+		(void) close(fd);
+	}
+	/* success */
+	return (0);
+
+cleanup:
+	if (fd != -1)
+		(void) close(fd);
+	if (devid)
+		devid_free(devid);
+	return (-1);
+}
+
+/*
+ * Exported Entry Points
+ */
+int
+meta_set_take(
+	mdsetname_t		*sp,
+	mhd_mhiargs_t		*mhiargsp,
+	int			flags,
+	int			usetag,
+	md_error_t		*ep
+)
+{
+	md_set_desc		*sd;
+	md_drive_desc		*dd;
+	md_drive_desc		*d = NULL;
+	char			*owner = NULL;
+	int			rval = 0;
+	int			i;
+	int			has_set;
+	int			matches = 0;
+	int			numsides = 0;
+	md_replicalist_t	*rlp = NULL;
+	sigset_t		oldsigs;
+	md_setkey_t		*cl_sk;
+	int			rb_level = 0;
+	md_error_t		xep = mdnullerror;
+	mdsetname_t		*local_sp = NULL;
+	side_t			side = MD_KEYWILD;
+	int			ret = 0;
+	char			*newname = NULL;
+	mdkey_t			side_names_key;
+
+	if ((flags & TAKE_USETAG) || (flags & TAKE_USEIT)) {
+		if (flags & TAKE_USETAG) {
+			if (usetag_take(sp->setno, usetag, ep))
+				return (-1);
+		} else {
+			if (useit_take(sp->setno, ep))
+				return (-1);
+		}
+
+		if (meta_resync_all(sp, MD_DEF_RESYNC_BUF_SIZE, ep) != 0)
+			mdclrerror(ep);
+	}
+
+	/* Do we own the set? */
+	i = own_set(sp, &owner, (flags & TAKE_FORCE), ep);
+	if (! mdisok(ep)) {
+		if (owner != NULL)
+			Free(owner);
+		return (-1);
+	}
+
+	if (i == MD_SETOWNER_NO) {
+		(void) mddserror(ep, MDE_DS_NOTOWNER, sp->setno, owner, NULL,
+		    sp->setname);
+		if (owner != NULL)
+			Free(owner);
+		return (-1);
+	}
+
+	if (owner != NULL) {
+		Free(owner);
+		owner = NULL;
+	}
+
+	/* We already own it, we are done. */
+	if (i == MD_SETOWNER_YES)
+		return (0);
+
+	if ((sd = metaget_setdesc(sp, &xep)) == NULL)
+		return (-1);
+
+	/* You can not take ownership of a set that has no drives */
+	if (sd->sd_flags & MD_SR_MB_DEVID)
+		dd = metaget_drivedesc(sp, MD_BASICNAME_OK | PRINT_FAST, ep);
+	else
+		dd = metaget_drivedesc(sp, MD_BASICNAME_OK, ep);
+
+	if (dd == NULL) {
+		if (! mdisok(ep))
+			return (-1);
+		return (0);
+	}
+
+	/* END CHECK CODE */
+
+	md_rb_sig_handling_on();
+
+	/* Lock the set on our side */
+	if (clnt_lock_set(mynode(), sp, ep)) {
+		rval = -1;
+		goto out;
+	}
+	/*
+	 * Get the current side number - do not use getmyside()
+	 * as this code is essentially getnodeside() and this saves
+	 * some instructions.
+	 */
+	for (i = 0; i < MD_MAXSIDES; i++) {
+		if (sd->sd_nodes[i][0] == '\0')
+			continue;
+		if (strcmp(sd->sd_nodes[i], mynode()) == 0) {
+			/*
+			 * SKEW is required for the local set
+			 * as side 0 in this set is the node
+			 * associated with it (this node).
+			 */
+			side = i + SKEW;
+			break;
+		}
+	}
+	if (side == MD_KEYWILD)
+		return (mddserror(ep, MDE_DS_HOSTNOSIDE, sp->setno, mynode(),
+		    NULL, mynode()));
+
+	/*
+	 * Check the local devid namespace to see if the disks
+	 * have been moved. Use the local set first of all as this contains
+	 * entries for the disks in the set.
+	 *
+	 * This is being done before the tk_own_bydd because the disks
+	 * in the dd list could be wrong! But it should be done with the lock
+	 * held for the set.
+	 */
+	local_sp = metasetname(MD_LOCAL_NAME, ep);
+	for (d = dd; d != NULL; d = d->dd_next) {
+		/*
+		 * Actually do the check of the disks.
+		 */
+		ret = meta_upd_ctdnames(&local_sp, 0, side, d->dd_dnp, &newname,
+		    ep);
+
+		if ((ret == METADEVADM_ERR) ||
+		    (ret == METADEVADM_DSKNAME_ERR)) {
+			/* check failed in some unknown manner */
+			rval = -1;
+			goto out;
+		} else if (ret == METADEVADM_DISKMOVE) {
+
+			/*
+			 * Update the dd namelist so that the rpc.metamhd
+			 * gets the correct disks to reserve - it is the rname
+			 * we are interested in.
+			 */
+			if (newname != NULL) {
+				/*
+				 * Need to save the side names key as this
+				 * points to the namespace entry that will
+				 * need to be updated. In addition the call
+				 * to meta_make_sidenmlist does not actually
+				 * set the namespace key.
+				 */
+				side_names_key = d->dd_dnp->side_names_key;
+				metafreedrivename(d->dd_dnp);
+				d->dd_dnp = metadrivename(&sp,
+				    metadiskname(newname), ep);
+				Free(newname);
+				/*
+				 * null newname so we are reset for next time
+				 * through
+				 */
+				newname = NULL;
+				ret = meta_make_sidenmlist(sp, d->dd_dnp, ep);
+				d->dd_dnp->side_names_key = side_names_key;
+				if (ret == -1) {
+					rval = -1;
+					goto out;
+				}
+			}
+		}
+	}
+
+
+	RB_TEST(1, "take", ep)
+
+	RB_PREEMPT;
+	rb_level = 1;	/* level 1 */
+
+	RB_TEST(2, "take", ep)
+
+	if (!MD_ATSET_DESC(sd)) {
+		if (tk_own_bydd(sp, dd, mhiargsp, FALSE, ep))
+			goto rollback;
+	}
+
+	RB_TEST(3, "take", ep)
+
+	RB_PREEMPT;
+	rb_level = 2;	/* level 2 */
+
+	RB_TEST(4, "take", ep)
+
+	if (clnt_stimeout(mynode(), sp, mhiargsp, ep) == -1)
+		goto rollback;
+
+	if (setup_db_bydd(sp, dd, (flags & TAKE_FORCE), ep) == -1) {
+		if (! mdismddberror(ep, MDE_DB_ACCOK) &&
+		    ! mdismddberror(ep, MDE_DB_TAGDATA))
+			goto rollback;
+		mdclrerror(ep);
+	}
+
+	RB_TEST(5, "take", ep)
+
+	RB_PREEMPT;
+	rb_level = 3;	/* level 3 */
+
+	RB_TEST(6, "take", ep)
+
+	/* Snarf set of traditional diskset doesn't use stale information */
+	if (snarf_set(sp, FALSE, ep)) {
+		if (mdismddberror(ep, MDE_DB_STALE) ||
+		    mdismddberror(ep, MDE_DB_ACCOK) ||
+		    mdismddberror(ep, MDE_DB_TAGDATA)) {
+			rval = -1;
+			goto out;
+		}
+
+		if (! mdismddberror(ep, MDE_DB_NODB) &&
+		    ! mdismddberror(ep, MDE_DB_NOTOWNER))
+			goto rollback;
+
+		/*
+		 * Look at the set on all other hosts, if every other host
+		 * has the same set with a larger genid, then we destroy this
+		 * copy.
+		 */
+		for (i = 0; i < MD_MAXSIDES; i++) {
+			/* Skip empty slots */
+			if (sd->sd_nodes[i][0] == '\0')
+				continue;
+
+			/* Skip this node */
+			if (strcmp(sd->sd_nodes[i], mynode()) == 0)
+				continue;
+
+			numsides++;
+
+			has_set = nodehasset(sp, sd->sd_nodes[i],
+			    NHS_NST_EQ_G_GT, &xep);
+
+			if (has_set < 0) {
+				if (! mdiserror(&xep, MDE_NO_SET) &&
+				    ! mdismddberror(&xep, MDE_DB_NODB))
+					goto rollback;
+				matches++;
+				mdclrerror(&xep);
+				continue;
+			}
+
+			if (has_set)
+				matches++;
+		}
+
+		/* Destroy the set */
+		if (numsides > 0 && (numsides - matches) == 0) {
+			if (meta_set_destroy(sp, FALSE, &xep))
+				mdclrerror(&xep);
+			(void) mddserror(ep, MDE_DS_SETCLEANUP, sp->setno,
+			    sp->setname, NULL, mynode());
+			rval = -1;
+			goto out;
+		}
+		goto rollback;
+	}
+
+	rval = pathname_reload(&sp, sp->setno, ep);
+	if ((rval == METADEVADM_ERR) || (rval == METADEVADM_DSKNAME_ERR)) {
+		goto rollback;
+	}
+
+
+	if (metareplicalist(sp, (MD_BASICNAME_OK | PRINT_FAST), &rlp, ep) < 0)
+		goto rollback;
+
+	if (upd_dr_dbinfo(sp, sd, dd, rlp, (flags & TAKE_FORCE), ep) < 0) {
+		metafreereplicalist(rlp);
+		goto rollback;
+	}
+
+	metafreereplicalist(rlp);
+
+	/*
+	 * If the set doesn't have the MD_SR_MB_DEVID bit set, i.e
+	 * the drives in the set don't have the device id information,
+	 * then stick it in if possible.
+	 *
+	 * If updating the master block fails for whatever reason, it's
+	 * okay. It just means the disk(s) in the diskset won't be self
+	 * identifying.
+	 */
+	if (!(sd->sd_flags & MD_SR_MB_DEVID)) {
+		/* Lock the set on current set members */
+		for (i = 0; i < MD_MAXSIDES; i++) {
+			/* Skip empty slots */
+			if (sd->sd_nodes[i][0] == '\0')
+				continue;
+
+			/* We already locked this side */
+			if (strcmp(mynode(), sd->sd_nodes[i]) == 0)
+				continue;
+
+			if (clnt_lock_set(sd->sd_nodes[i], sp, ep)) {
+				rval = -1;
+				goto out;
+			}
+		}
+		rb_level = 4;	/* level 4 */
+
+		if (meta_update_mb(sp, dd, ep) == 0)
+			/* update the sr_flags on all hosts */
+			for (i = 0; i < MD_MAXSIDES; i++) {
+				/* Skip empty slots */
+				if (sd->sd_nodes[i][0] == '\0')
+					continue;
+
+				if (clnt_upd_sr_flags(sd->sd_nodes[i],
+				    sp, (sd->sd_flags | MD_SR_MB_DEVID), ep))
+					goto rollback;
+			}
+
+		cl_sk = cl_get_setkey(sp->setno, sp->setname);
+		for (i = 0; i < MD_MAXSIDES; i++) {
+			/* Skip empty slots */
+			if (sd->sd_nodes[i][0] == '\0')
+				continue;
+
+			/* Unlocked of this side is done later */
+			if (strcmp(mynode(), sd->sd_nodes[i]) == 0)
+				continue;
+
+			if (clnt_unlock_set(sd->sd_nodes[i], cl_sk, &xep)) {
+				if (rval == 0)
+					(void) mdstealerror(ep, &xep);
+				rval = -1;
+			}
+		}
+	}
+
+	/*
+	 * If we get here, we need to unlock the set before the resync
+	 * gets called, otherwise the "daemon" will hold the set lock
+	 * until the resync is done!
+	 */
+
+	cl_sk = cl_get_setkey(sp->setno, sp->setname);
+	if (clnt_unlock_set(mynode(), cl_sk, &xep)) {
+		if (rval == 0)
+			(void) mdstealerror(ep, &xep);
+		rval = -1;
+	}
+	cl_set_setkey(NULL);
+
+	md_rb_sig_handling_off(md_got_sig(), md_which_sig());
+
+	/* We try to get things resync'ed, but this can fail */
+	mdclrerror(&xep);
+	if (meta_resync_all(sp, MD_DEF_RESYNC_BUF_SIZE, &xep) != 0) {
+		if (rval == 0)
+			(void) mdstealerror(ep, &xep);
+		rval = -1;
+	}
+
+	RB_TEST(7, "take", ep)
+
+	return (rval);
+
+out:
+	cl_sk = cl_get_setkey(sp->setno, sp->setname);
+	if (clnt_unlock_set(mynode(), cl_sk, &xep)) {
+		if (rval == 0)
+			(void) mdstealerror(ep, &xep);
+		rval = -1;
+	}
+	if (!(sd->sd_flags & MD_SR_MB_DEVID) && (rb_level > 2)) {
+		for (i = 0; i < MD_MAXSIDES; i++) {
+			/* Skip empty slots */
+			if (sd->sd_nodes[i][0] == '\0')
+				continue;
+
+			/* We already unlocked this side */
+			if (strcmp(mynode(), sd->sd_nodes[i]) == 0)
+				continue;
+
+			if (clnt_unlock_set(sd->sd_nodes[i], cl_sk, &xep)) {
+				if (rval == 0)
+					(void) mdstealerror(ep, &xep);
+				rval = -1;
+			}
+		}
+	}
+	cl_set_setkey(NULL);
+
+	md_rb_sig_handling_off(md_got_sig(), md_which_sig());
+
+	return (rval);
+
+rollback:
+	/* Make sure we are blocking all signals */
+	if (procsigs(TRUE, &oldsigs, &xep) < 0)
+		mdclrerror(&xep);
+
+	rval = -1;
+
+	/* level 4 */
+	if (rb_level > 3) {
+		if (sd->sd_flags & MD_SR_MB_DEVID) {
+			/* update the sr_flags on all hosts */
+			for (i = 0; i < MD_MAXSIDES; i++) {
+				/* Skip empty slots */
+				if (sd->sd_nodes[i][0] == '\0')
+					continue;
+
+				if (clnt_upd_sr_flags(sd->sd_nodes[i], sp,
+				    (sd->sd_flags & ~MD_SR_MB_DEVID), &xep))
+					mdclrerror(&xep);
+			}
+		}
+
+		cl_sk = cl_get_setkey(sp->setno, sp->setname);
+		for (i = 0; i < MD_MAXSIDES; i++) {
+			/* Skip empty slots */
+			if (sd->sd_nodes[i][0] == '\0')
+				continue;
+
+			/* We will unlocked this side below */
+			if (strcmp(mynode(), sd->sd_nodes[i]) == 0)
+				continue;
+
+			if (clnt_unlock_set(sd->sd_nodes[i], cl_sk, &xep))
+				mdclrerror(&xep);
+		}
+	}
+
+	/* level 3 */
+	if (rb_level > 2) {
+		if (halt_set(sp, &xep))
+			mdclrerror(&xep);
+	}
+
+	/* level 2 */
+	if (rb_level > 1) {
+		if (clnt_stimeout(mynode(), sp, &defmhiargs, &xep) == -1)
+			mdclrerror(&xep);
+	}
+
+	/* level 1 */
+	if (rb_level > 0) {
+		if (!MD_ATSET_DESC(sd)) {
+			if (rel_own_bydd(sp, dd, FALSE, &xep))
+				mdclrerror(&xep);
+		}
+	}
+
+	/* level 0 */
+	cl_sk = cl_get_setkey(sp->setno, sp->setname);
+	if (clnt_unlock_set(mynode(), cl_sk, &xep))
+		mdclrerror(&xep);
+	cl_set_setkey(NULL);
+
+	/* release signals back to what they were on entry */
+	if (procsigs(FALSE, &oldsigs, &xep) < 0)
+		mdclrerror(&xep);
+
+	md_rb_sig_handling_off(md_got_sig(), md_which_sig());
+
+	return (rval);
+}
+
+int
+meta_set_release(
+	mdsetname_t		*sp,
+	md_error_t		*ep
+)
+{
+	int			rval = 0;
+	md_drive_desc		*dd;
+	mhd_mhiargs_t		mhiargs;
+	sigset_t		oldsigs;
+	md_setkey_t		*cl_sk;
+	int			rb_level = 0;
+	md_error_t		xep = mdnullerror;
+
+	/* Make sure we own the set */
+	if (meta_check_ownership(sp, ep) != 0)
+		return (-1);
+
+	/* Get the drive descriptors */
+	if ((dd = metaget_drivedesc(sp, (MD_BASICNAME_OK | PRINT_FAST),
+	    ep)) == NULL)
+		if (! mdisok(ep))
+			return (-1);
+
+	/* Get timeout values in case we need to roll back this release */
+	(void) memset(&mhiargs, '\0', sizeof (mhiargs));
+	if (clnt_gtimeout(mynode(), sp, &mhiargs, ep) != 0)
+		return (-1);
+
+	/* END CHECK CODE */
+
+	md_rb_sig_handling_on();
+
+	/* Lock the set on our side */
+	if (clnt_lock_set(mynode(), sp, ep)) {
+		rval = -1;
+		goto out;
+	}
+
+	RB_TEST(1, "release", ep)
+
+	RB_PREEMPT;
+	rb_level = 1;	/* level 1 */
+
+	RB_TEST(2, "release", ep)
+
+	if (halt_set(sp, ep))
+		goto rollback;
+
+	RB_TEST(3, "release", ep)
+
+	RB_PREEMPT;
+	rb_level = 2;	/* level 2 */
+
+	RB_TEST(4, "release", ep)
+
+	if (rel_own_bydd(sp, dd, FALSE, ep))
+		goto rollback;
+
+	RB_TEST(5, "release", ep)
+
+	RB_PREEMPT;
+	rb_level = 3;	/* level 3 */
+
+	RB_TEST(6, "release", ep)
+
+	if (clnt_stimeout(mynode(), sp, &defmhiargs, ep) == -1)
+		goto rollback;
+
+	RB_TEST(7, "release", ep)
+
+out:
+	cl_sk = cl_get_setkey(sp->setno, sp->setname);
+	if (clnt_unlock_set(mynode(), cl_sk, &xep)) {
+		if (rval == 0)
+			(void) mdstealerror(ep, &xep);
+		rval = -1;
+	}
+	cl_set_setkey(NULL);
+
+	md_rb_sig_handling_off(md_got_sig(), md_which_sig());
+
+	return (rval);
+
+rollback:
+	/* Make sure we are blocking all signals */
+	if (procsigs(TRUE, &oldsigs, &xep) < 0)
+		mdclrerror(&xep);
+
+	rval = -1;
+
+	/* level 3 */
+	if (rb_level > 2) {
+		if (clnt_stimeout(mynode(), sp, &mhiargs, &xep) == -1)
+			mdclrerror(&xep);
+	}
+
+	/* level 2 */
+	if (rb_level > 1) {
+		if (tk_own_bydd(sp, dd, &mhiargs, FALSE, &xep))
+			mdclrerror(&xep);
+	}
+
+	/* level 1 */
+	if (rb_level > 0) {
+		if (setup_db_bydd(sp, dd, TRUE, &xep) == -1)
+			mdclrerror(&xep);
+
+		/* Snarf set of trad diskset doesn't use stale information */
+		if (snarf_set(sp, FALSE, &xep))
+			mdclrerror(&xep);
+	}
+
+	/* level 0 */
+	cl_sk = cl_get_setkey(sp->setno, sp->setname);
+	if (clnt_unlock_set(mynode(), cl_sk, &xep))
+		mdclrerror(&xep);
+	cl_set_setkey(NULL);
+
+	/* release signals back to what they were on entry */
+	if (procsigs(FALSE, &oldsigs, &xep) < 0)
+		mdclrerror(&xep);
+
+	md_rb_sig_handling_off(md_got_sig(), md_which_sig());
+
+	return (rval);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_setup.c b/usr/src/lib/lvm/libmeta/common/meta_setup.c
new file mode 100644
index 0000000000..64bdc73c3c
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_setup.c
@@ -0,0 +1,897 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+/*
+ * Just in case we're not in a build environment, make sure that
+ * TEXT_DOMAIN gets set to something.
+ */
+#if !defined(TEXT_DOMAIN)
+#define	TEXT_DOMAIN "SYS_TEST"
+#endif
+
+/*
+ * setup utility
+ */
+
+#include "meta_set_prv.h"
+#include <sys/resource.h>
+#include <syslog.h>
+
+
+/* globals */
+char		*myname = "";
+FILE		*metalogfp = NULL;
+int		metasyslog = 0;
+uint_t		verbosity = 0;
+hrtime_t	start_time = 0;
+sigset_t	allsigs;
+
+/* locals */
+static	int	rb_signal_handling = FALSE;
+static	int	rb_signal_caught = FALSE;
+static	int	rb_signal_which = 0;
+static	size_t	metansig = 0;
+static	struct	sigaction	*metahandlers = NULL;
+#ifdef	_DEBUG_MALLOC_INC
+static	ulong_t	malloc_histid_begin;
+static	ulong_t	malloc_histid_end;
+static	ulong_t	malloc_inuse_begin;
+static	ulong_t	malloc_inuse_end;
+#endif	/* _DEBUG_MALLOC_INC */
+
+/* forwards */
+static	void	md_catcher(int sig);
+
+/*
+ * push/pop signal handlers
+ */
+static int
+md_pushsig(
+	unsigned	sig,
+	void		(*handler)(int sig),
+	md_error_t	*ep
+)
+{
+	struct	sigaction	newhandler;
+
+	/* expand vector as neccessary */
+	if (sig >= metansig) {
+		if (metahandlers == NULL) {
+			metahandlers = Zalloc(
+			    (sig + 1) * sizeof (metahandlers[0]));
+		} else {
+			metahandlers = Realloc(metahandlers,
+			    ((sig + 1) * sizeof (metahandlers[0])));
+			(void) memset(&metahandlers[metansig], 0,
+			    ((sig - metansig) * sizeof (metahandlers[0])));
+		}
+		metansig = sig;
+	}
+
+	/* We need to have a seperate stack to handle rollback properly */
+	newhandler.sa_flags = 0;
+	if (sigfillset(&newhandler.sa_mask) < 0)
+		return (mdsyserror(ep, errno,
+		    "sigfillset(&newhandler.sa_mask)"));
+	newhandler.sa_handler = handler;
+
+	/* push handler */
+	if (sigaction(sig, &newhandler, &metahandlers[sig]) < 0)
+		return (mdsyserror(ep, errno, "sigaction(&newhandler)"));
+
+	/* return success */
+	return (0);
+}
+
+static int
+md_popsig(
+	unsigned	sig,
+	md_error_t	*ep
+)
+{
+	/* can't pop what isn't pushed */
+	assert(sig <= metansig);
+	assert(metahandlers[sig].sa_handler != md_catcher);
+
+	/* pop handler */
+	if (sigaction(sig, &metahandlers[sig], NULL) < 0)
+		return (mdsyserror(ep, errno, "sigaction(&metahandlers)"));
+
+	/* return success */
+	return (0);
+}
+
+char *
+meta_lock_name(
+	set_t	setno
+)
+{
+	char	lockname[30];
+
+	if (setno == MD_LOCAL_SET)
+		return (strdup(METALOCK));
+
+	(void) snprintf(lockname, sizeof (lockname), "%s.%ld", METALOCK, setno);
+	return (strdup(lockname));
+}
+
+#define	META_LOCK_FD(sp)	((sp)->lockfd)
+#define	META_LOCK_NAME(sp)	(meta_lock_name((sp)->setno))
+
+/*
+ * open lock
+ */
+static int
+meta_lock_open(
+	mdsetname_t	*sp,
+	md_error_t	*ep
+)
+{
+	int	lockfd = META_LOCK_FD(sp);
+	char	*lockname = META_LOCK_NAME(sp);
+
+	/* check for already open */
+	if (lockfd >= 0)
+		goto success;
+	assert(lockfd == MD_NO_LOCK);
+
+	/* open and/or create lock file */
+	if ((lockfd = open(lockname, O_WRONLY, 0)) < 0) {
+		if (errno == EROFS) {
+			lockfd = MD_NO_LOCK;
+			goto success;
+		}
+		if (errno != ENOENT) {
+			(void) mdsyserror(ep, errno, lockname);
+			goto failure;
+		}
+		if ((lockfd = open(lockname, (O_WRONLY|O_CREAT),
+		    0644)) < 0) {
+			(void) mdsyserror(ep, errno, lockname);
+			goto failure;
+		}
+		if (fchmod(lockfd, 0644) != 0) {
+			(void) mdsyserror(ep, errno, lockname);
+			goto failure;
+		}
+	}
+
+	/* return success */
+success:
+	if (lockname != NULL)
+		free(lockname);
+	META_LOCK_FD(sp) = lockfd;
+	return (0);
+
+	/* flag failure */
+failure:
+	if (lockname != NULL)
+		free(lockname);
+	if (lockfd >= 0)
+		(void) close(lockfd);
+	return (-1);
+}
+
+static int
+meta_lock_close(
+	mdsetname_t	*sp,
+	md_error_t	*ep
+)
+{
+	int	retval = 0;
+
+	if (close(META_LOCK_FD(sp)) != 0) {
+		if (ep != NULL) {
+			char	*lockname = META_LOCK_NAME(sp);
+			(void) mdsyserror(ep, errno, lockname);
+			if (lockname != NULL)
+				free(lockname);
+		}
+
+		retval = -1;
+	}
+	META_LOCK_FD(sp) = MD_NO_LOCK;
+	return (retval);
+}
+
+/*
+ * unlock
+ */
+int
+meta_unlock(
+	mdsetname_t	*sp,
+	md_error_t	*ep
+)
+{
+	int	lockfd = META_LOCK_FD(sp);
+
+	/* ignore read-only filesystem */
+	if (lockfd == MD_NO_LOCK)
+		return (0);
+
+	assert(lockfd >= 0);
+
+	/* unlock and discard */
+	if (lockf(lockfd, F_ULOCK, 0) != 0) {
+		(void) mdsyserror(ep, errno, METALOCK);
+		(void) meta_lock_close(sp, NULL);
+		return (-1);
+	}
+	return (meta_lock_close(sp, ep));
+}
+
+/*
+ * lock
+ */
+int
+meta_lock(
+	mdsetname_t	*sp,
+	int		print_status,
+	md_error_t	*ep
+)
+{
+	int	lockfd;
+	char	*lockname = NULL;
+
+	/* open lock file */
+	if (meta_lock_open(sp, ep) != 0) {
+		assert(META_LOCK_FD(sp) == MD_NO_LOCK);
+		goto failure;
+	}
+
+	/* ignore read-only filesystem */
+	if ((lockfd = META_LOCK_FD(sp)) == MD_NO_LOCK)
+		goto success;
+	assert(lockfd >= 0);
+
+	lockname = META_LOCK_NAME(sp);
+
+	/* grab lock */
+	if (lockf(lockfd, F_TLOCK, 0) != 0) {
+		if ((errno != EACCES) && (errno != EAGAIN)) {
+			(void) mdsyserror(ep, errno, lockname);
+			goto failure;
+		}
+		if (print_status)
+			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
+			    "%s: waiting on %s\n"),
+			    myname, lockname);
+		if (lockf(lockfd, F_LOCK, 0) != 0) {
+			(void) mdsyserror(ep, errno, lockname);
+			goto failure;
+		}
+	}
+
+	/* return success */
+success:
+	if (lockname != NULL)
+		free(lockname);
+	return (0);
+
+	/* flag failure */
+failure:
+	if (lockname != NULL)
+		free(lockname);
+	if (lockfd >= 0)
+		(void) meta_lock_close(sp, ep);
+	return (-1);
+}
+
+int
+meta_lock_nowait(
+	mdsetname_t	*sp,
+	md_error_t	*ep
+)
+{
+	int	lockfd;
+	char	*lockname = NULL;
+
+	/* open lock file */
+	if (meta_lock_open(sp, ep) != 0) {
+		assert(META_LOCK_FD(sp) == MD_NO_LOCK);
+		goto failure;
+	}
+
+	/* ignore read-only filesystem */
+	if ((lockfd = META_LOCK_FD(sp)) == MD_NO_LOCK)
+		goto success;
+	assert(lockfd >= 0);
+
+	lockname = META_LOCK_NAME(sp);
+
+	/* grab lock */
+	if (lockf(lockfd, F_TLOCK, 0) != 0) {
+		if ((errno != EACCES) && (errno != EAGAIN)) {
+			(void) mdsyserror(ep, errno, lockname);
+			goto failure;
+		}
+		(void) mdsyserror(ep, EAGAIN, lockname);
+		goto failure;
+	}
+
+	/* return success */
+success:
+	if (lockname != NULL)
+		free(lockname);
+	return (0);
+
+	/* flag failure */
+failure:
+	if (lockname != NULL)
+		free(lockname);
+	if (lockfd >= 0)
+		(void) meta_lock_close(sp, ep);
+	return (-1);
+}
+
+/*
+ * lock status
+ */
+int
+meta_lock_status(
+	mdsetname_t	*sp,
+	md_error_t	*ep
+)
+{
+	int lockfd;
+
+	/* open lock file */
+	if (meta_lock_open(sp, ep) != 0) {
+		assert(META_LOCK_FD(sp) == MD_NO_LOCK);
+		return (-1);
+	}
+
+	lockfd = META_LOCK_FD(sp);
+	/* ignore read-only filesystem */
+	if (lockfd == MD_NO_LOCK)
+		return (0);
+	assert(lockfd >= 0);
+
+	/* test lock */
+	if (lockf(lockfd, F_TEST, 0) != 0) {
+		char *lockname = META_LOCK_NAME(sp);
+		(void) mdsyserror(ep, errno, lockname);
+		if (lockname != NULL)
+			free(lockname);
+		return (-1);
+	}
+
+	return (0);
+}
+
+/*
+ * setup for syslog daemon output
+ */
+static void
+md_syslog(
+	char	*name	/* name of program */
+)
+{
+	if ((name == NULL) || (*name == '\0'))
+		name = "md";
+	openlog(name, LOG_CONS, LOG_DAEMON);
+	metasyslog = 1;
+}
+
+/*
+ * daemonize: put in background
+ */
+int
+md_daemonize(
+	mdsetname_t	*sp,
+	md_error_t	*ep
+)
+{
+	char		*p;
+	struct rlimit	rlim;
+	pid_t		pid;
+	int		i;
+
+	/* debug */
+	if (((p = getenv("MD_DEBUG")) != NULL) &&
+	    (strstr(p, "NODAEMON") != NULL)) {
+		return (0);	/* do nothing */
+	}
+
+	/* get number of file descriptors */
+	if (getrlimit(RLIMIT_NOFILE, &rlim) != 0) {
+		return (mdsyserror(ep, errno, "getrlimit(RLIMIT_NOFILE)"));
+	}
+
+	/* fork and kill parent */
+	if ((pid = fork()) == -1)
+		return (mdsyserror(ep, errno, "fork"));
+	else if (pid != 0)
+		return (pid);
+
+	/*
+	 * We need to close the admin device and reset the specialfd to force
+	 * the child process to reopen it, since we are going to close all
+	 * descriptors from 3 up to RLIMIT_NOFILE in the child.
+	 */
+	if (close_admin(ep) != 0)
+		return (-1);
+
+	/* close RPC connections */
+	metarpccloseall();
+
+	/* drop lock */
+	if (meta_unlock(sp, ep) != 0)
+		return (-1);
+
+	if (rlim.rlim_cur != RLIM_INFINITY) {
+		/*
+		 * close all but stdout, stderr, and metalogfp
+		 */
+
+		for (i = 0; (i < rlim.rlim_cur); ++i) {
+			if ((i == fileno(stdout)) ||
+			    (i == fileno(stderr)) ||
+			    ((metalogfp != NULL) &&
+			    (i == fileno(metalogfp)))) {
+				continue;
+			}
+			(void) close(i);
+		}
+	}
+
+	/* put in own process group */
+	if (setsid() == -1)
+		return (mdsyserror(ep, errno, "setsid"));
+
+	/* setup syslog */
+	md_syslog(myname);
+
+	/* return success */
+	return (0);
+}
+
+/*
+ * flush and sync fp
+ */
+static void
+flushfp(
+	FILE	*fp
+)
+{
+	(void) fflush(fp);
+	(void) fsync(fileno(fp));
+}
+
+/*
+ * reset and exit utility
+ */
+void
+md_exit(
+	mdsetname_t	*sp,
+	int		eval
+)
+{
+	md_error_t	status = mdnullerror;
+	md_error_t	*ep = &status;
+
+
+	/* close RPC connections */
+	metarpccloseall();
+
+	if (sp != NULL) {
+		if (meta_unlock(sp, ep) != 0) {
+			mde_perror(ep, "");
+			mdclrerror(ep);
+			if (eval == 0)
+				eval = 1;
+		}
+	}
+
+	/* flush name caches */
+#ifdef	DEBUG
+	metaflushnames(1);
+#endif	/* DEBUG */
+
+	/* log exit */
+	if (metalogfp != NULL) {
+		md_logpfx(metalogfp);
+		(void) fprintf(metalogfp, dgettext(TEXT_DOMAIN,
+		    "exiting with %d\n"), eval);
+		flushfp(metalogfp);
+		(void) fclose(metalogfp);
+		metalogfp = NULL;
+	}
+	if ((metasyslog) && (eval != 0)) {
+		syslog(LOG_ERR, dgettext(TEXT_DOMAIN,
+		    "exiting with %d\n"), eval);
+		closelog();
+		metasyslog = 0;
+	}
+
+	/* check arena, print malloc usage */
+#ifdef	_DEBUG_MALLOC_INC
+	(void) malloc_chain_check(1);
+	{
+		char	*p;
+
+		if (((p = getenv("MD_DEBUG")) != NULL) &&
+		    (strstr(p, "MALLOC") != NULL)) {
+			malloc_inuse_end = malloc_inuse(&malloc_histid_end);
+			(void) fprintf(stderr, "%s: end malloc_inuse %lu\n",
+			    myname, malloc_inuse_end);
+			if (malloc_inuse_end != malloc_inuse_begin) {
+				malloc_list(fileno(stderr),
+				    malloc_histid_begin, malloc_histid_end);
+			}
+		}
+	}
+#endif	/* _DEBUG_MALLOC_INC */
+
+	/* exit with value */
+	exit(eval);
+}
+
+/*
+ * signal catcher
+ */
+static void
+md_catcher(
+	int			sig
+)
+{
+	char			buf[128];
+	char			*msg;
+	md_error_t		status = mdnullerror;
+	md_error_t		*ep = &status;
+	struct sigaction	defhandler;
+
+	/* log signal */
+	if ((msg = strsignal(sig)) == NULL) {
+		(void) snprintf(buf, sizeof (buf),
+		    dgettext(TEXT_DOMAIN, "unknown signal %d"), sig);
+		msg = buf;
+	}
+	md_eprintf("%s\n", msg);
+
+	/*
+	 * In roll_back crtical section handling, the first instance of a user
+	 * generated signal is caught, a flag is set to allow preemption at a
+	 * "convenient" point and md_catcher returns.  If the user continues
+	 * generate the signal, the second instance will invoke the default
+	 * handler and exit.
+	 */
+	if (rb_signal_handling == TRUE) {
+		if (sig != SIGABRT && sig != SIGBUS && sig != SIGSEGV) {
+			if (rb_signal_caught == FALSE) {
+				rb_signal_caught = TRUE;
+				rb_signal_which  = sig;
+				return;
+			}
+		}
+	}
+
+	/* let default handler do it's thing */
+	if (md_popsig(sig, ep) != 0) {
+		mde_perror(ep, "");
+		mdclrerror(ep);
+		defhandler.sa_flags = 0;
+		if (sigfillset(&defhandler.sa_mask) < 0) {
+			(void) mdsyserror(ep, errno,
+			    "sigfillset(&defhandler.sa_mask)");
+			mde_perror(ep, "");
+			md_exit(NULL, 1);
+		}
+		defhandler.sa_handler = SIG_DFL;
+		if (sigaction(sig, &defhandler, NULL) < 0) {
+			(void) mdsyserror(ep, errno, "sigaction(&defhandler)");
+			mde_perror(ep, "");
+			md_exit(NULL, 1);
+		}
+	}
+
+	md_post_sig(sig);
+}
+
+void
+md_post_sig(int sig)
+{
+	if (kill(getpid(), sig) != 0) {
+		md_perror("kill(getpid())");
+		md_exit(NULL, -sig);
+	}
+}
+
+int
+md_got_sig(void)
+{
+	return (rb_signal_caught);
+}
+
+int
+md_which_sig(void)
+{
+	return (rb_signal_which);
+}
+
+void
+md_rb_sig_handling_on(void)
+{
+	rb_signal_handling = TRUE;
+}
+
+void
+md_rb_sig_handling_off(int sig_seen, int sig)
+{
+	rb_signal_handling = FALSE;
+	rb_signal_caught = FALSE;
+	rb_signal_which  = 0;
+	if (sig_seen)
+		md_post_sig(sig);
+}
+
+/*
+ * setup metaclust variables
+ */
+void
+setup_mc_log(
+	uint_t	level
+)
+{
+	/* initialise externals */
+	verbosity = level;
+	start_time = gethrtime();
+}
+
+/*
+ * initilize utility
+ */
+int
+md_init(
+	int		argc,
+	char		*argv[],
+	int		dosyslog,
+	int		doadmin,
+	md_error_t	*ep
+)
+{
+	int ret = 0;
+
+	/* initialize everything but the signals */
+	if ((ret = md_init_nosig(argc, argv, dosyslog,
+			doadmin, ep)) != 0)
+		return (ret);
+
+
+	if (sigfillset(&allsigs) < 0)
+		return (mdsyserror(ep, errno, "sigfillset(&allsigs)"));
+
+	/* catch common signals */
+	if ((md_pushsig(SIGHUP, md_catcher, ep) != 0) ||
+	    (md_pushsig(SIGINT, md_catcher, ep) != 0) ||
+	    (md_pushsig(SIGQUIT, md_catcher, ep) != 0) ||
+	    (md_pushsig(SIGABRT, md_catcher, ep) != 0) ||
+	    (md_pushsig(SIGBUS, md_catcher, ep) != 0) ||
+	    (md_pushsig(SIGSEGV, md_catcher, ep) != 0) ||
+	    (md_pushsig(SIGPIPE, md_catcher, ep) != 0) ||
+	    (md_pushsig(SIGTERM, md_catcher, ep) != 0)) {
+		return (-1);
+	}
+
+	/* return success */
+	return (0);
+}
+
+
+/*
+ * initilize utility without setting up sighandlers
+ * setting up signal handlers in libmeta can affect others
+ * programs that link with libmeta but have their own handlers
+ */
+int
+md_init_nosig(
+	int		argc,
+	char		*argv[],
+	int		dosyslog,
+	int		doadmin,
+	md_error_t	*ep
+)
+{
+	/* setup myname */
+	if ((myname = strrchr(argv[0], '/')) != NULL)
+		++myname;
+	else
+		myname = argv[0];
+
+#if !defined(TEXT_DOMAIN)
+#define	TEXT_DOMAIN "SYS_TEST"
+#endif
+
+	/* print malloc usage */
+#ifdef	_DEBUG_MALLOC_INC
+	{
+		char	*p;
+
+		if (((p = getenv("MD_DEBUG")) != NULL) &&
+		    (strstr(p, "MALLOC") != NULL)) {
+			malloc_inuse_begin =
+			    malloc_inuse(&malloc_histid_begin);
+			(void) fprintf(stderr, "%s: begin malloc_inuse %lu\n",
+			    myname, malloc_inuse_begin);
+		}
+	}
+#endif	/* _DEBUG_MALLOC_INC */
+
+	/* open syslog */
+	if (dosyslog)
+		md_syslog(myname);
+
+	/* log command */
+	if (getenv(METALOGENV) != NULL) {
+		if ((metalogfp = fopen(METALOG, "a")) != NULL) {
+			int	i;
+
+			(void) fchmod(fileno(metalogfp), 0664);
+			md_logpfx(metalogfp);
+			for (i = 1; (i < argc); ++i)
+				(void) fprintf(metalogfp, " %s", argv[i]);
+			(void) fprintf(metalogfp, "\n");
+			flushfp(metalogfp);
+		}
+	}
+
+	/* make sure we can open the admin device before we do anything else */
+	if (doadmin)
+		if (open_admin(ep) < 0)
+			return (-1);
+
+	/* flush name caches */
+	metaflushnames(1);
+
+	/* return success */
+	return (0);
+}
+
+/*
+ * (re)initilize daemon
+ */
+int
+md_init_daemon(
+	char		*name,
+	md_error_t	*ep
+)
+{
+	static int	already = 0;
+	int		dosyslog = 1;
+	int		doadmin = 1;
+
+	/* setup */
+	if (! already) {
+		if (md_init(1, &name, dosyslog, doadmin, ep) != 0)
+			return (-1);
+		already = 1;
+	}
+
+	/* return success */
+	return (0);
+}
+
+/*
+ * Roll back functions for handling sync and async cleanup.
+ */
+
+int
+procsigs(int block, sigset_t *oldsigs, md_error_t *ep)
+{
+	if (block == TRUE) {
+		if (sigprocmask(SIG_BLOCK, &allsigs, oldsigs) < 0) {
+			(void) mdsyserror(ep, errno, "sigprocmask(SIG_BLOCK)");
+			return (-1);
+		}
+	} else {
+		if (sigprocmask(SIG_SETMASK, oldsigs, NULL) < 0) {
+			(void) mdsyserror(ep, errno,
+			    "sigprocmask(SIG_SETMASK)");
+			return (-1);
+		}
+	}
+	return (0);
+}
+
+#ifdef DEBUG
+int
+rb_test(
+	int		rbt_sel_tpt,
+	char		*rbt_sel_tag,
+	md_error_t	*ep
+)
+{
+	char		*rbt_env_tpt = getenv("META_RBT_TPT");
+	char		*rbt_env_tag = getenv("META_RBT_TAG");
+	int		sig = 0;
+	int		rbt_int_tpt;
+	int		rbt_tag_match = 1;
+	sigset_t	curmask;
+	md_error_t	xep = mdnullerror;
+
+	if (rbt_env_tpt) {
+		rbt_int_tpt = atoi(rbt_env_tpt);
+		if (rbt_int_tpt < 0) {
+			sig = 1;
+			rbt_int_tpt = -1 * rbt_int_tpt;
+		}
+
+		assert(rbt_sel_tpt != 0);
+
+		if (rbt_int_tpt == 0)
+			return (0);
+
+		if (rbt_env_tag && rbt_sel_tag)
+			if (strcmp(rbt_env_tag, rbt_sel_tag) != 0)
+				rbt_tag_match = 0;
+
+		if (rbt_int_tpt == rbt_sel_tpt && rbt_tag_match) {
+			md_eprintf(
+			    "******************** RB_TEST(%s, %d, sig=%s)\n",
+			    rbt_sel_tag, rbt_sel_tpt,
+			    (sig != 0) ? "True" : "False");
+			if (sig) {
+				md_eprintf("********** sigsuspend()\n");
+				if (sigprocmask(NULL, NULL, &curmask) < 0) {
+					(void) mdsyserror(&xep, errno, NULL);
+					mde_perror(&xep, "sigprocmask(GET)");
+					md_exit(NULL, 1);
+				}
+
+				if (sigsuspend(&curmask) < 0) {
+					(void) mdsyserror(&xep, errno, NULL);
+					mde_perror(&xep,
+					    "sigsuspend(&curmask)");
+					md_exit(NULL, 1);
+				}
+
+				if (md_got_sig())
+					return (-1);
+			}
+			(void) mderror(ep, MDE_TESTERROR,
+			    "********** rb_test()");
+			md_eprintf("******************** rollback\n");
+			return (-1);
+		}
+	}
+	return (0);
+}
+#else
+/* ARGSUSED */
+int
+rb_test(
+	int		rbt_sel_tpt,
+	char		*rbt_sel_tag,
+	md_error_t	*ep
+)
+{
+	(void) mderror(ep, MDE_TESTERROR, "******** rb_test:Not supported\n");
+	return (-1);
+
+}
+#endif	/* DEBUG */
diff --git a/usr/src/lib/lvm/libmeta/common/meta_smf.c b/usr/src/lib/lvm/libmeta/common/meta_smf.c
new file mode 100644
index 0000000000..204691a1a3
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_smf.c
@@ -0,0 +1,351 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+/*
+ * Service Management Facility (SMF) interfaces.
+ */
+
+#include <stdio.h>
+#include <libscf.h>
+#include <meta.h>
+
+static void enable(char *svc_names[], md_error_t *ep);
+static void disable(char *svc_names[], md_error_t *ep);
+static int enabled(char *svc_name);
+static int online(char *svc_names[], char **names);
+static void wait_online(char *svc_names[]);
+static int is_online(char *svc_name);
+
+static char
+*svm_core_svcs[] = {
+	"system/metainit:default",
+	"system/mdmonitor:default",
+	"network/rpc/meta:default",
+	NULL
+};
+
+static char
+*svm_diskset_svcs[] = {
+	"network/rpc/metamed:default",
+	"network/rpc/metamh:default",
+	NULL
+};
+
+static char
+*svm_mn_diskset_svcs[] = {
+	"network/rpc/mdcomm:default",
+	NULL
+};
+
+/*
+ * Enable the specified SVM services through the SMF.
+ */
+int
+meta_smf_enable(uint_t flags, md_error_t *ep)
+{
+	if (flags & META_SMF_CORE) {
+		enable(svm_core_svcs, ep);
+		wait_online(svm_core_svcs);
+	}
+
+	if (flags & META_SMF_DISKSET) {
+		enable(svm_diskset_svcs, ep);
+		wait_online(svm_diskset_svcs);
+	}
+
+	if (flags & META_SMF_MN_DISKSET) {
+		enable(svm_mn_diskset_svcs, ep);
+		wait_online(svm_mn_diskset_svcs);
+	}
+
+	if (ep != NULL)
+		return ((mdisok(ep)) ? 0 : -1);
+	else
+		return (0);
+}
+
+/*
+ * Disable the specified SVM services through the SMF.
+ */
+int
+meta_smf_disable(uint_t flags, md_error_t *ep)
+{
+	if (flags & META_SMF_CORE) {
+		disable(svm_core_svcs, ep);
+	}
+
+	if (flags & META_SMF_DISKSET) {
+		disable(svm_diskset_svcs, ep);
+	}
+
+	if (flags & META_SMF_MN_DISKSET) {
+		disable(svm_mn_diskset_svcs, ep);
+	}
+
+	if (ep != NULL)
+		return ((mdisok(ep)) ? 0 : -1);
+	else
+		return (0);
+}
+
+/*
+ * Determine if desired services are online.  If all services in the
+ * classes specified by flags are online, 1 is returned.  Otherwise
+ * 0 is returned.
+ */
+
+int
+meta_smf_isonline(uint_t flags, md_error_t *ep)
+{
+	int	ret = 1;
+	char	*names = NULL;
+
+	if (flags & META_SMF_CORE) {
+		if (online(svm_core_svcs, &names) == 0)
+			ret = 0;
+	}
+	if (flags & META_SMF_DISKSET) {
+		if (online(svm_diskset_svcs, &names) == 0)
+			ret = 0;
+	}
+	if (flags & META_SMF_MN_DISKSET) {
+		if (online(svm_mn_diskset_svcs, &names) == 0)
+			ret = 0;
+	}
+
+	if (ret == 0) {
+		(void) mderror(ep, MDE_SMF_NO_SERVICE, names);
+		Free(names);
+	}
+
+	return (ret);
+}
+
+/*
+ * Return a bitmask of the META_SMF_* flags indicating which services should be
+ * online given the current SVM configuration.
+ */
+int
+meta_smf_getmask()
+{
+	int		mask = 0;
+	mdsetname_t	*sp = NULL;
+	mddb_config_t	c;
+	md_error_t	status = mdnullerror;
+	md_error_t	*ep = &status;
+	int		max_sets;
+
+	/*
+	 * If there are any local metadbs configured then the core services
+	 * are needed.
+	 */
+	(void) memset(&c, 0, sizeof (c));
+	c.c_setno = MD_LOCAL_SET;
+	if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0 || c.c_dbcnt == 0)
+		return (mask);
+
+	mask |= META_SMF_CORE;
+
+	/*
+	 * If any disksets configured then the diskset services are needed.
+	 * Also check for multi-node sets.
+	 */
+	if ((max_sets = get_max_sets(ep)) > 0) {
+		int i;
+
+		mdclrerror(ep);
+		for (i = 1; i < max_sets; i++) {
+			md_set_desc	*sd;
+
+			if ((sp = metasetnosetname(i, ep)) == NULL) {
+				if (!mdisok(ep) && !mdiserror(ep, MDE_NO_SET) &&
+				    !mdismddberror(ep, MDE_NOTENOUGH_DB) &&
+				    !mdiserror(ep, MDE_SMF_NO_SERVICE) &&
+				    ep->info.errclass != MDEC_RPC) {
+					/*
+					 * metad rpc program not registered
+					 * can't get diskset info
+					 */
+					break;
+				}
+
+			} else {
+				mask |= META_SMF_DISKSET;
+
+				if ((sd = metaget_setdesc(sp, ep)) != NULL) {
+					if (MD_MNSET_DESC(sd)) {
+						mask |= META_SMF_MN_DISKSET;
+
+						/*
+						 * we don't have to check the
+						 * rest of the disksets at this
+						 * point
+						 */
+						break;
+					}
+				}
+			}
+
+			mdclrerror(ep);
+		}
+	}
+
+	return (mask);
+}
+
+static void
+enable(char *svc_names[], md_error_t *ep)
+{
+	int i;
+
+	for (i = 0; svc_names[i]; i++) {
+		if (!enabled(svc_names[i]))
+			if (smf_enable_instance(svc_names[i], 0) != 0) {
+				if (ep != NULL) {
+					(void) mderror(ep, MDE_SMF_FAIL,
+					    svc_names[i]);
+				}
+			}
+	}
+}
+
+static void
+disable(char *svc_names[], md_error_t *ep)
+{
+	int i;
+
+	for (i = 0; svc_names[i]; i++) {
+		if (enabled(svc_names[i]))
+			if (smf_disable_instance(svc_names[i], 0) != 0) {
+				if (ep != NULL) {
+					(void) mderror(ep, MDE_SMF_FAIL,
+					    svc_names[i]);
+				}
+			}
+	}
+}
+
+static int
+enabled(char *svc_name)
+{
+	scf_simple_prop_t	*prop;
+	int			rval = 0;
+
+	prop = scf_simple_prop_get(NULL, svc_name, SCF_PG_GENERAL,
+		SCF_PROPERTY_ENABLED);
+
+	if (scf_simple_prop_numvalues(prop) == 1) {
+		if (*scf_simple_prop_next_boolean(prop) != 0)
+			rval = 1;
+	}
+
+	scf_simple_prop_free(prop);
+
+	return (rval);
+}
+
+/*
+ * There can be a delay while the RPC services get going.  Try to
+ * make sure the RPC daemons are ready to run before we return.
+ * Check 15 times (15 seconds total wait time) and then just
+ * return.
+ */
+static void
+wait_online(char *svc_names[])
+{
+	int i;
+	char	*names = NULL;
+
+	for (i = 0; i < 15; i++) {
+		if (online(svc_names, &names))
+			break;
+		(void) sleep(1);
+	}
+
+	if (names != NULL)
+		Free(names);
+}
+
+/*
+ * Check to see if all services in the svc_names are online.  If they are
+ * all online 1 is returned, otherwise 0 is returned.
+ */
+
+static int
+online(char *svc_names[], char **names)
+{
+	int i;
+	int rv = 1;
+
+	for (i = 0; svc_names[i]; i++) {
+		if (is_online(svc_names[i]) == 0) {
+			int sz;
+			char *p;
+
+			/*
+			 * Need space for the name, the new line, the
+			 * tab and the null terminator.
+			 */
+			sz = strlen(svc_names[i]) + 3;
+
+			if (*names == NULL) {
+				p = Malloc(sz);
+				(void) snprintf(p, sz, "\n\t%s", svc_names[i]);
+
+			} else {
+				/* Add space for existing names */
+				sz += strlen(*names);
+				p = Malloc(sz);
+				(void) snprintf(p, sz, "%s\n\t%s", *names,
+				    svc_names[i]);
+				Free(names);
+			}
+
+			*names = p;
+			rv = 0;
+		}
+	}
+	return (rv);
+}
+
+/*
+ * Return 1 if the specified service is online.  Otherwise, return 0.
+ */
+static int
+is_online(char *svc_name)
+{
+	int	rval = 0;
+	char	*s;
+
+	if ((s = smf_get_state(svc_name)) != NULL) {
+		if (strcmp(s, "online") == 0)
+			rval = 1;
+		free(s);
+	}
+	return (rval);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_sp.c b/usr/src/lib/lvm/libmeta/common/meta_sp.c
new file mode 100644
index 0000000000..ce3965489f
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_sp.c
@@ -0,0 +1,6652 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+/*
+ * Just in case we're not in a build environment, make sure that
+ * TEXT_DOMAIN gets set to something.
+ */
+#if !defined(TEXT_DOMAIN)
+#define	TEXT_DOMAIN "SYS_TEST"
+#endif
+
+/*
+ * soft partition operations
+ *
+ * Soft Partitions provide a virtual disk mechanism which is used to
+ * divide a large volume into many small pieces, each appearing as a
+ * separate device.  A soft partition consists of a series of extents,
+ * each having an offset and a length.  The extents are logically
+ * contiguous, so where the first extent leaves off the second extent
+ * picks up.  Which extent a given "virtual offset" belongs to is
+ * dependent on the size of all the previous extents in the soft
+ * partition.
+ *
+ * Soft partitions are represented in memory by an extent node
+ * (sp_ext_node_t) which contains all of the information necessary to
+ * create a unit structure and update the on-disk format, called
+ * "watermarks".  These extent nodes are typically kept in a doubly
+ * linked list and are manipulated by list manipulation routines.  A
+ * list of extents may represent all of the soft partitions on a volume,
+ * a single soft partition, or perhaps just a set of extents that need
+ * to be updated.  Extent lists may be sorted by extent or by name/seq#,
+ * depending on which compare function is used.  Most of the routines
+ * require the list be sorted by offset to work, and that's the typical
+ * configuration.
+ *
+ * In order to do an allocation, knowledge of all soft partitions on the
+ * volume is required.  Then free space is determined from the space
+ * that is not allocated, and new allocations can be made from the free
+ * space.  Once the new allocations are made, a unit structure is created
+ * and the watermarks are updated.  The status is then changed to "okay"
+ * on the unit structure to commit the transaction.  If updating the
+ * watermarks fails, the unit structure is in an intermediate state and
+ * the driver will not allow access to the device.
+ *
+ * A typical sequence of events is:
+ *     1. Fetch the list of names for all soft partitions on a volume
+ *         meta_sp_get_by_component()
+ *     2. Construct an extent list from the name list
+ *         meta_sp_extlist_from_namelist()
+ *     3. Fill the gaps in the extent list with free extents
+ *         meta_sp_list_freefill()
+ *     4. Allocate from the free extents
+ *         meta_sp_alloc_by_len()
+ *         meta_sp_alloc_by_list()
+ *     5. Create the unit structure from the extent list
+ *         meta_sp_createunit()
+ *         meta_sp_updateunit()
+ *     6. Write out the watermarks
+ *         meta_sp_update_wm()
+ *     7. Set the status to "Okay"
+ *         meta_sp_setstatus()
+ *
+ */
+
+#include <stdio.h>
+#include <meta.h>
+#include "meta_repartition.h"
+#include <sys/lvm/md_sp.h>
+#include <sys/lvm/md_crc.h>
+#include <strings.h>
+#include <sys/lvm/md_mirror.h>
+#include <sys/bitmap.h>
+
+extern int	md_in_daemon;
+
+typedef struct sp_ext_node {
+	struct sp_ext_node	*ext_next;	/* next element */
+	struct sp_ext_node	*ext_prev;	/* previous element */
+	sp_ext_type_t		ext_type;	/* type of extent */
+	sp_ext_offset_t		ext_offset;	/* starting offset */
+	sp_ext_length_t		ext_length;	/* length of this node */
+	uint_t			ext_flags;	/* extent flags */
+	uint32_t		ext_seq;	/* watermark seq no */
+	mdname_t		*ext_namep;	/* name pointer */
+	mdsetname_t		*ext_setp;	/* set pointer */
+} sp_ext_node_t;
+
+/* extent flags */
+#define	EXTFLG_UPDATE	(1)
+
+/* Extent node compare function for list sorting */
+typedef int (*ext_cmpfunc_t)(sp_ext_node_t *, sp_ext_node_t *);
+
+
+/* Function Prototypes */
+
+/* Debugging Functions */
+static void meta_sp_debug(char *format, ...);
+static void meta_sp_printunit(mp_unit_t *mp);
+
+/* Misc Support Functions */
+int meta_sp_parsesize(char *s, sp_ext_length_t *szp);
+static int meta_sp_parsesizestring(char *s, sp_ext_length_t *szp);
+static int meta_sp_setgeom(mdname_t *np, mdname_t *compnp, mp_unit_t *mp,
+	md_error_t *ep);
+static int meta_sp_get_by_component(mdsetname_t *sp, mdname_t *compnp,
+    mdnamelist_t **nlpp, int force, md_error_t *ep);
+static sp_ext_length_t meta_sp_get_default_alignment(mdsetname_t *sp,
+    mdname_t *compnp, md_error_t *ep);
+
+/* Extent List Manipulation Functions */
+static int meta_sp_cmp_by_nameseq(sp_ext_node_t *e1, sp_ext_node_t *e2);
+static int meta_sp_cmp_by_offset(sp_ext_node_t *e1, sp_ext_node_t *e2);
+static void meta_sp_list_insert(mdsetname_t *sp, mdname_t *np,
+    sp_ext_node_t **head, sp_ext_offset_t offset, sp_ext_length_t length,
+    sp_ext_type_t type, uint_t seq, uint_t flags, ext_cmpfunc_t compare);
+static void meta_sp_list_free(sp_ext_node_t **head);
+static void meta_sp_list_remove(sp_ext_node_t **head, sp_ext_node_t *ext);
+static sp_ext_length_t meta_sp_list_size(sp_ext_node_t *head,
+    sp_ext_type_t exttype, int exclude_wm);
+static sp_ext_node_t *meta_sp_list_find(sp_ext_node_t *head,
+    sp_ext_offset_t offset);
+static void meta_sp_list_freefill(sp_ext_node_t **extlist,
+    sp_ext_length_t size);
+static void meta_sp_list_dump(sp_ext_node_t *head);
+static int meta_sp_list_overlaps(sp_ext_node_t *head);
+
+/* Extent List Query Functions */
+static boolean_t meta_sp_enough_space(int desired_number_of_sps,
+	blkcnt_t desired_sp_size, sp_ext_node_t **extent_listpp,
+	sp_ext_length_t alignment);
+static boolean_t meta_sp_get_extent_list(mdsetname_t *mdsetnamep,
+	mdname_t *device_mdnamep, sp_ext_node_t **extent_listpp,
+	md_error_t *ep);
+static boolean_t meta_sp_get_extent_list_for_drive(mdsetname_t *mdsetnamep,
+	mddrivename_t *mddrivenamep, sp_ext_node_t **extent_listpp);
+
+
+/* Extent Allocation Functions */
+static void meta_sp_alloc_by_ext(mdsetname_t *sp, mdname_t *np,
+    sp_ext_node_t **extlist, sp_ext_node_t *free_ext,
+    sp_ext_offset_t alloc_offset, sp_ext_length_t alloc_length, uint_t seq);
+static int meta_sp_alloc_by_len(mdsetname_t *sp, mdname_t *np,
+    sp_ext_node_t **extlist, sp_ext_length_t *lp,
+    sp_ext_offset_t last_off, sp_ext_length_t alignment);
+static int meta_sp_alloc_by_list(mdsetname_t *sp, mdname_t *np,
+    sp_ext_node_t **extlist, sp_ext_node_t *oblist);
+
+/* Extent List Population Functions */
+static int meta_sp_extlist_from_namelist(mdsetname_t *sp, mdnamelist_t *spnlp,
+    sp_ext_node_t **extlist, md_error_t *ep);
+static int meta_sp_extlist_from_wm(mdsetname_t *sp, mdname_t *compnp,
+    sp_ext_node_t **extlist, ext_cmpfunc_t compare, md_error_t *ep);
+
+/* Print (metastat) Functions */
+static int meta_sp_short_print(md_sp_t *msp, char *fname, FILE *fp,
+    mdprtopts_t options, md_error_t *ep);
+static char *meta_sp_status_to_name(xsp_status_t xsp_status, uint_t tstate);
+static int meta_sp_report(mdsetname_t *sp, md_sp_t *msp, mdnamelist_t **nlpp,
+    char *fname, FILE *fp, mdprtopts_t options, md_error_t *ep);
+
+/* Watermark Manipulation Functions */
+static int meta_sp_update_wm(mdsetname_t *sp, md_sp_t *msp,
+    sp_ext_node_t *extlist, md_error_t *ep);
+static int meta_sp_clear_wm(mdsetname_t *sp, md_sp_t *msp, md_error_t *ep);
+static int meta_sp_read_wm(mdsetname_t *sp, mdname_t *compnp,
+    mp_watermark_t *wm, sp_ext_offset_t offset,  md_error_t *ep);
+static diskaddr_t meta_sp_get_start(mdsetname_t *sp, mdname_t *compnp,
+    md_error_t *ep);
+
+/* Unit Structure Manipulation Functions */
+static void meta_sp_fillextarray(mp_unit_t *mp, sp_ext_node_t *extlist);
+static mp_unit_t *meta_sp_createunit(mdname_t *np, mdname_t *compnp,
+    sp_ext_node_t *extlist, int numexts, sp_ext_length_t len,
+    sp_status_t status, md_error_t *ep);
+static mp_unit_t *meta_sp_updateunit(mdname_t *np,  mp_unit_t *old_un,
+    sp_ext_node_t *extlist, sp_ext_length_t grow_len, int numexts,
+    md_error_t *ep);
+static int meta_create_sp(mdsetname_t *sp, md_sp_t *msp, sp_ext_node_t *oblist,
+    mdcmdopts_t options, sp_ext_length_t alignment, md_error_t *ep);
+static int meta_check_sp(mdsetname_t *sp, md_sp_t *msp, mdcmdopts_t options,
+    int *repart_options, md_error_t *ep);
+
+/* Reset (metaclear) Functions */
+static int meta_sp_reset_common(mdsetname_t *sp, mdname_t *np, md_sp_t *msp,
+    md_sp_reset_t reset_params, mdcmdopts_t options, md_error_t *ep);
+
+/* Recovery (metarecover) Functions */
+static void meta_sp_display_exthdr(void);
+static void meta_sp_display_ext(sp_ext_node_t *ext);
+static int meta_sp_checkseq(sp_ext_node_t *extlist);
+static int meta_sp_resolve_name_conflict(mdsetname_t *, mdname_t *,
+    mdname_t **, md_error_t *);
+static int meta_sp_validate_wm(mdsetname_t *sp, mdname_t *np,
+    mdcmdopts_t options, md_error_t *ep);
+static int meta_sp_validate_unit(mdsetname_t *sp, mdname_t *compnp,
+    mdcmdopts_t options, md_error_t *ep);
+static int meta_sp_validate_wm_and_unit(mdsetname_t *sp, mdname_t *np,
+    mdcmdopts_t options, md_error_t *ep);
+static int meta_sp_validate_exts(mdname_t *np, sp_ext_node_t *wmext,
+    sp_ext_node_t *unitext, md_error_t *ep);
+static int meta_sp_recover_from_wm(mdsetname_t *sp, mdname_t *compnp,
+    mdcmdopts_t options, md_error_t *ep);
+static int meta_sp_recover_from_unit(mdsetname_t *sp, mdname_t *np,
+    mdcmdopts_t options, md_error_t *ep);
+
+/*
+ * Private Constants
+ */
+
+static const int FORCE_RELOAD_CACHE = 1;
+static const uint_t NO_FLAGS = 0;
+static const sp_ext_offset_t NO_OFFSET = 0ULL;
+static const uint_t NO_SEQUENCE_NUMBER = 0;
+static const int ONE_SOFT_PARTITION = 1;
+
+static unsigned long sp_parent_printed[BT_BITOUL(MD_MAXUNITS)];
+
+#define	TEST_SOFT_PARTITION_NAMEP NULL
+#define	TEST_SETNAMEP NULL
+
+#define	EXCLUDE_WM	(1)
+#define	INCLUDE_WM	(0)
+
+#define	SP_UNALIGNED	(0LL)
+
+/*
+ * **************************************************************************
+ *                          Debugging Functions                             *
+ * **************************************************************************
+ */
+
+/*PRINTFLIKE1*/
+static void
+meta_sp_debug(char *format, ...)
+{
+	static int debug;
+	static int debug_set = 0;
+	va_list ap;
+
+	if (!debug_set) {
+		debug = getenv(META_SP_DEBUG) ? 1 : 0;
+		debug_set = 1;
+	}
+
+	if (debug) {
+		va_start(ap, format);
+		(void) vfprintf(stderr, format, ap);
+		va_end(ap);
+	}
+}
+
+static void
+meta_sp_printunit(mp_unit_t *mp)
+{
+	int i;
+
+	if (mp == NULL)
+		return;
+
+	/* print the common fields we know about */
+	(void) fprintf(stderr, "\tmp->c.un_type: %d\n", mp->c.un_type);
+	(void) fprintf(stderr, "\tmp->c.un_size: %u\n", mp->c.un_size);
+	(void) fprintf(stderr, "\tmp->c.un_self_id: %lu\n", MD_SID(mp));
+
+	/* sp-specific fields */
+	(void) fprintf(stderr, "\tmp->un_status: %u\n", mp->un_status);
+	(void) fprintf(stderr, "\tmp->un_numexts: %u\n", mp->un_numexts);
+	(void) fprintf(stderr, "\tmp->un_length: %llu\n", mp->un_length);
+	(void) fprintf(stderr, "\tmp->un_dev(32): 0x%llx\n", mp->un_dev);
+	(void) fprintf(stderr, "\tmp->un_dev(64): 0x%llx\n", mp->un_dev);
+	(void) fprintf(stderr, "\tmp->un_key: %d\n", mp->un_key);
+
+	/* print extent information */
+	(void) fprintf(stderr, "\tExt#\tvoff\t\tpoff\t\tLen\n");
+	for (i = 0; i < mp->un_numexts; i++) {
+		(void) fprintf(stderr, "\t%d\t%llu\t\t%llu\t\t%llu\n", i,
+		    mp->un_ext[i].un_voff, mp->un_ext[i].un_poff,
+		    mp->un_ext[i].un_len);
+	}
+}
+
+/*
+ * FUNCTION:    meta_sp_parsesize()
+ * INPUT:       s       - the string to parse
+ * OUTPUT:      *szp    - disk block count (0 for "all")
+ * RETURNS:     -1 for error, 0 for success
+ * PURPOSE:     parses the command line parameter that specifies the
+ *              requested size of a soft partition.  The input string
+ *              is either the literal "all" or a numeric value
+ *              followed by a single character, b for disk blocks, k
+ *              for kilobytes, m for megabytes, g for gigabytes, or t
+ *              for terabytes.  p for petabytes and e for exabytes
+ *              have been added as undocumented features for future
+ *              expansion.  For example, 100m is 100 megabytes, while
+ *              50g is 50 gigabytes.  All values are rounded up to the
+ *              nearest block size.
+ */
+int
+meta_sp_parsesize(char *s, sp_ext_length_t *szp)
+{
+	if (s == NULL || szp == NULL) {
+		return (-1);
+	}
+
+	/* Check for literal "all" */
+	if (strcasecmp(s, "all") == 0) {
+		*szp = 0;
+		return (0);
+	}
+
+	return (meta_sp_parsesizestring(s, szp));
+}
+
+/*
+ * FUNCTION:	meta_sp_parsesizestring()
+ * INPUT:	s	- the string to parse
+ * OUTPUT:	*szp	- disk block count
+ * RETURNS:	-1 for error, 0 for success
+ * PURPOSE:	parses a string that specifies size. The input string is a
+ *		numeric value followed by a single character, b for disk blocks,
+ *		k for kilobytes, m for megabytes, g for gigabytes, or t for
+ *		terabytes.  p for petabytes and e for exabytes have been added
+ *		as undocumented features for future expansion.  For example,
+ *		100m is 100 megabytes, while 50g is 50 gigabytes.  All values
+ *		are rounded up to the nearest block size.
+ */
+static int
+meta_sp_parsesizestring(char *s, sp_ext_length_t *szp)
+{
+	sp_ext_length_t	len = 0;
+	char		len_type[2];
+
+	if (s == NULL || szp == NULL) {
+		return (-1);
+	}
+
+	/*
+	 * make sure block offset does not overflow 2^64 bytes.
+	 */
+	if ((sscanf(s, "%llu%1[BbKkMmGgTt]", &len, len_type) != 2) ||
+	    (len == 0LL) ||
+	    (len > (1LL << (64 - DEV_BSHIFT))))
+		return (-1);
+
+	switch (len_type[0]) {
+	case 'B':
+	case 'b':
+		len = lbtodb(roundup(len * DEV_BSIZE, DEV_BSIZE));
+		break;
+	case 'K':
+	case 'k':
+		len = lbtodb(roundup(len * 1024ULL, DEV_BSIZE));
+		break;
+	case 'M':
+	case 'm':
+		len = lbtodb(roundup(len * 1024ULL*1024ULL, DEV_BSIZE));
+		break;
+	case 'g':
+	case 'G':
+		len = lbtodb(roundup(len * 1024ULL*1024ULL*1024ULL, DEV_BSIZE));
+		break;
+	case 't':
+	case 'T':
+		len = lbtodb(roundup(len * 1024ULL*1024ULL*1024ULL*1024ULL,
+		    DEV_BSIZE));
+		break;
+	case 'p':
+	case 'P':
+		len = lbtodb(roundup(
+		    len * 1024ULL*1024ULL*1024ULL*1024ULL*1024ULL,
+		    DEV_BSIZE));
+		break;
+	case 'e':
+	case 'E':
+		len = lbtodb(roundup(
+		    len * 1024ULL*1024ULL*1024ULL*1024ULL*1024ULL*1024ULL,
+		    DEV_BSIZE));
+		break;
+	default:
+		/* error */
+		return (-1);
+	}
+
+	*szp = len;
+	return (0);
+}
+
+/*
+ * FUNCTION:	meta_sp_setgeom()
+ * INPUT:	np      - the underlying device to setup geometry for
+ *		compnp	- the underlying device to setup geometry for
+ *		mp	- the unit structure to set the geometry for
+ * OUTPUT:	ep	- return error pointer
+ * RETURNS:	int	- -1 if error, 0 otherwise
+ * PURPOSE:	establishes geometry information for a device
+ */
+static int
+meta_sp_setgeom(
+	mdname_t	*np,
+	mdname_t	*compnp,
+	mp_unit_t	*mp,
+	md_error_t	*ep
+)
+{
+	mdgeom_t	*geomp;
+	uint_t		round_cyl = 0;
+
+	if ((geomp = metagetgeom(compnp, ep)) == NULL)
+		return (-1);
+	if (meta_setup_geom((md_unit_t *)mp, np, geomp, geomp->write_reinstruct,
+	    geomp->read_reinstruct, round_cyl, ep) != 0)
+		return (-1);
+
+	return (0);
+}
+
+/*
+ * FUNCTION:	meta_sp_setstatus()
+ * INPUT:	sp	- the set name for the devices to set the status on
+ *		minors	- an array of minor numbers of devices to set status on
+ *		num_units - number of entries in the array
+ *		status	- status value to set all units to
+ * OUTPUT:	ep	- return error pointer
+ * RETURNS:	int	- -1 if error, 0 success
+ * PURPOSE:	sets the status of one or more soft partitions to the
+ *		requested value
+ */
+int
+meta_sp_setstatus(
+	mdsetname_t	*sp,
+	minor_t		*minors,
+	int		num_units,
+	sp_status_t	status,
+	md_error_t	*ep
+)
+{
+	md_sp_statusset_t	status_params;
+
+	assert(minors != NULL);
+
+	/* update status of all soft partitions to the status passed in */
+	(void) memset(&status_params, 0, sizeof (status_params));
+	status_params.num_units = num_units;
+	status_params.new_status = status;
+	status_params.size = num_units * sizeof (minor_t);
+	status_params.minors = (uintptr_t)minors;
+	MD_SETDRIVERNAME(&status_params, MD_SP, sp->setno);
+	if (metaioctl(MD_IOC_SPSTATUS, &status_params, &status_params.mde,
+	    NULL) != 0) {
+		(void) mdstealerror(ep, &status_params.mde);
+		return (-1);
+	}
+	return (0);
+}
+
+/*
+ * FUNCTION:	meta_get_sp_names()
+ * INPUT:	sp	- the set name to get soft partitions from
+ *		options	- options from the command line
+ * OUTPUT:	nlpp	- list of all soft partition names
+ *		ep	- return error pointer
+ * RETURNS:	int	- -1 if error, 0 success
+ * PURPOSE:	returns a list of all soft partitions in the metadb
+ *		for all devices in the specified set
+ */
+int
+meta_get_sp_names(
+	mdsetname_t	*sp,
+	mdnamelist_t	**nlpp,
+	int		options,
+	md_error_t	*ep
+)
+{
+	return (meta_get_names(MD_SP, sp, nlpp, options, ep));
+}
+
+/*
+ * FUNCTION:	meta_get_by_component()
+ * INPUT:	sp	- the set name to get soft partitions from
+ *		compnp	- the name of the device containing the soft
+ *			  partitions that will be returned
+ *		force	- 0 - reads cached namelist if available,
+ *			  1 - reloads cached namelist, frees old namelist
+ * OUTPUT:	nlpp	- list of all soft partition names
+ *		ep	- return error pointer
+ * RETURNS:	int	- -1 error, otherwise the number of soft partitions
+ *			  found on the component (0 = none found).
+ * PURPOSE:	returns a list of all soft partitions on a given device
+ *		from the metadb information
+ */
+static int
+meta_sp_get_by_component(
+	mdsetname_t	*sp,
+	mdname_t	*compnp,
+	mdnamelist_t	**nlpp,
+	int		force,
+	md_error_t	*ep
+)
+{
+	static mdnamelist_t	*cached_list = NULL;	/* cached namelist */
+	static int		cached_count = 0;	/* cached count */
+	mdnamelist_t		*spnlp = NULL;		/* all sp names */
+	mdnamelist_t		*namep;			/* list iterator */
+	mdnamelist_t		**tailpp = nlpp;	/* namelist tail */
+	mdnamelist_t		**cachetailpp;		/* cache tail */
+	md_sp_t			*msp;			/* unit structure */
+	int			count = 0;		/* count of sp's */
+	int			err;
+	mdname_t		*curnp;
+
+	if ((cached_list != NULL) && (!force)) {
+		/* return a copy of the cached list */
+		for (namep = cached_list; namep != NULL; namep = namep->next)
+			tailpp = meta_namelist_append_wrapper(tailpp,
+			    namep->namep);
+		return (cached_count);
+	}
+
+	/* free the cache and reset values to zeros to prepare for a new list */
+	metafreenamelist(cached_list);
+	cached_count = 0;
+	cached_list = NULL;
+	cachetailpp = &cached_list;
+	*nlpp = NULL;
+
+	/* get all the softpartitions first of all */
+	if (meta_get_sp_names(sp, &spnlp, 0, ep) < 0)
+		return (-1);
+
+	/*
+	 * Now for each sp, see if it resides on the component we
+	 * are interested in, if so then add it to our list
+	 */
+	for (namep = spnlp; namep != NULL; namep = namep->next) {
+		curnp = namep->namep;
+
+		/* get the unit structure */
+		if ((msp = meta_get_sp_common(sp, curnp, 0, ep)) == NULL)
+			continue;
+
+		/*
+		 * If the current soft partition is not on the same
+		 * component, continue the search.  If it is on the same
+		 * component, add it to our namelist.
+		 */
+		err = meta_check_samedrive(compnp, msp->compnamep, ep);
+		if (err <= 0) {
+			/* not on the same device, check the next one */
+			continue;
+		}
+
+		/* it's on the same drive */
+
+		/*
+		 * Check for overlapping partitions if the component is not
+		 * a metadevice.
+		 */
+		if (!metaismeta(msp->compnamep)) {
+			/*
+			 * if they're on the same drive, neither
+			 * should be a metadevice if one isn't
+			 */
+			assert(!metaismeta(compnp));
+
+			if (meta_check_overlap(msp->compnamep->cname,
+			    compnp, 0, -1, msp->compnamep, 0, -1, ep) == 0)
+				continue;
+
+			/* in this case it's not an error for them to overlap */
+			mdclrerror(ep);
+		}
+
+		/* Component is on the same device, add to the used list */
+		tailpp = meta_namelist_append_wrapper(tailpp, curnp);
+		cachetailpp = meta_namelist_append_wrapper(cachetailpp,
+		    curnp);
+
+		++count;
+		++cached_count;
+	}
+
+	assert(count == cached_count);
+	return (count);
+
+out:
+	metafreenamelist(*nlpp);
+	*nlpp = NULL;
+	return (-1);
+}
+
+/*
+ * FUNCTION:    meta_sp_get_default_alignment()
+ * INPUT:       sp      - the pertinent set name
+ *              compnp  - the name of the underlying component
+ * OUTPUT:      ep      - return error pointer
+ * RETURNS:     sp_ext_length_t =0: no default alignment
+ *                              >0: default alignment
+ * PURPOSE:     returns the default alignment for soft partitions to
+ *              be built on top of the specified component or
+ *              metadevice
+ */
+static sp_ext_length_t
+meta_sp_get_default_alignment(
+	mdsetname_t	*sp,
+	mdname_t	*compnp,
+	md_error_t	*ep
+)
+{
+	sp_ext_length_t	a = SP_UNALIGNED;
+	char		*mname;
+
+	assert(compnp != NULL);
+
+	/*
+	 * We treat raw devices as opaque, and assume nothing about
+	 * their alignment requirements.
+	 */
+	if (!metaismeta(compnp))
+		return (SP_UNALIGNED);
+
+	/*
+	 * We already know it's a metadevice from the previous test;
+	 * metagetmiscname() will tell us which metadevice type we
+	 * have
+	 */
+	mname = metagetmiscname(compnp, ep);
+	if (mname == NULL)
+		goto out;
+
+	/*
+	 * For a mirror, we want to deal with the stripe that is the
+	 * primary side.  If it happens to be asymmetrically
+	 * configured, there is no simple way to fake a universal
+	 * alignment.  There's a chance that the least common
+	 * denominator of the set of interlaces from all stripes of
+	 * all submirrors would do it, but nobody that really cared
+	 * that much about this issue would create an asymmetric
+	 * config to start with.
+	 *
+	 * If the component underlying the soft partition is a mirror,
+	 * then at the exit of this loop, compnp will have been
+	 * updated to describe the first active submirror.
+	 */
+	if (strcmp(mname, MD_MIRROR) == 0) {
+		md_mirror_t	*mp;
+		int		smi;
+		md_submirror_t	*smp;
+
+		mp = meta_get_mirror(sp, compnp, ep);
+		if (mp == NULL)
+			goto out;
+
+		for (smi = 0; smi < NMIRROR; smi++) {
+
+			smp = &mp->submirrors[smi];
+			if (smp->state == SMS_UNUSED)
+				continue;
+
+			compnp = smp->submirnamep;
+			assert(compnp != NULL);
+
+			mname = metagetmiscname(compnp, ep);
+			if (mname == NULL)
+				goto out;
+
+			break;
+		}
+
+		if (smi == NMIRROR)
+			goto out;
+	}
+
+	/*
+	 * Handle stripes and submirrors identically; just return the
+	 * interlace of the first row.
+	 */
+	if (strcmp(mname, MD_STRIPE) == 0) {
+		md_stripe_t	*stp;
+
+		stp = meta_get_stripe(sp, compnp, ep);
+		if (stp == NULL)
+			goto out;
+
+		a = stp->rows.rows_val[0].interlace;
+		goto out;
+	}
+
+	/*
+	 * Raid is even more straightforward; the interlace applies to
+	 * the entire device.
+	 */
+	if (strcmp(mname, MD_RAID) == 0) {
+		md_raid_t	*rp;
+
+		rp = meta_get_raid(sp, compnp, ep);
+		if (rp == NULL)
+			goto out;
+
+		a = rp->interlace;
+		goto out;
+	}
+
+	/*
+	 * If we have arrived here with the alignment still not set,
+	 * then we expect the error to have been set by one of the
+	 * routines we called.  If neither is the case, something has
+	 * really gone wrong above.  (Probably the submirror walk
+	 * failed to produce a valid submirror, but that would be
+	 * really bad...)
+	 */
+out:
+	meta_sp_debug("meta_sp_get_default_alignment: miscname %s, "
+	    "alignment %lld\n", (mname == NULL) ? "NULL" : mname, a);
+
+	if (getenv(META_SP_DEBUG) && !mdisok(ep)) {
+		mde_perror(ep, NULL);
+	}
+
+	assert((a > 0) || (!mdisok(ep)));
+
+	return (a);
+}
+
+
+
+/*
+ * FUNCTION:	meta_check_insp()
+ * INPUT:	sp	- the set name for the device to check
+ *		np	- the name of the device to check
+ *		slblk	- the starting offset of the device to check
+ *		nblks	- the number of blocks in the device to check
+ * OUTPUT:	ep	- return error pointer
+ * RETURNS:	int	-  0 - device contains soft partitions
+ *			  -1 - device does not contain soft partitions
+ * PURPOSE:	determines whether a device contains any soft partitions
+ */
+/* ARGSUSED */
+int
+meta_check_insp(
+	mdsetname_t	*sp,
+	mdname_t	*np,
+	diskaddr_t	slblk,
+	diskaddr_t	nblks,
+	md_error_t	*ep
+)
+{
+	mdnamelist_t	*spnlp = NULL;	/* soft partition name list */
+	int		count;
+	int		rval;
+
+	/* check set pointer */
+	assert(sp != NULL);
+
+	/* find all soft partitions on the component */
+	count = meta_sp_get_by_component(sp, np, &spnlp, 0, ep);
+
+	if (count == -1) {
+		rval = -1;
+	} else if (count > 0) {
+		rval = mduseerror(ep, MDE_ALREADY, np->dev,
+		    spnlp->namep->cname, np->cname);
+	} else {
+		rval = 0;
+	}
+
+	metafreenamelist(spnlp);
+	return (rval);
+}
+
+/*
+ * **************************************************************************
+ *                    Extent List Manipulation Functions                    *
+ * **************************************************************************
+ */
+
+/*
+ * FUNCTION:	meta_sp_cmp_by_nameseq()
+ * INPUT:	e1	- first node to compare
+ *		e2	- second node to compare
+ * OUTPUT:	none
+ * RETURNS:	int	- =0 - nodes are equal
+ *			  <0 - e1 should go before e2
+ *			  >0 - e1 should go after e2
+ * PURPOSE:	used for sorted list inserts to build a list sorted by
+ *		name first and sequence number second.
+ */
+static int
+meta_sp_cmp_by_nameseq(sp_ext_node_t *e1, sp_ext_node_t *e2)
+{
+	int rval;
+
+	if (e1->ext_namep == NULL)
+		return (1);
+	if (e2->ext_namep == NULL)
+		return (-1);
+	if ((rval = strcmp(e1->ext_namep->cname, e2->ext_namep->cname)) != 0)
+		return (rval);
+
+	/* the names are equal, compare sequence numbers */
+	if (e1->ext_seq > e2->ext_seq)
+		return (1);
+	if (e1->ext_seq < e2->ext_seq)
+		return (-1);
+	/* sequence numbers are also equal */
+	return (0);
+}
+
+/*
+ * FUNCTION:	meta_sp_cmp_by_offset()
+ * INPUT:	e1	- first node to compare
+ *		e2	- second node to compare
+ * OUTPUT:	none
+ * RETURNS:	int	- =0 - nodes are equal
+ *			  <0 - e1 should go before e2
+ *			  >0 - e1 should go after e2
+ * PURPOSE:	used for sorted list inserts to build a list sorted by offset
+ */
+static int
+meta_sp_cmp_by_offset(sp_ext_node_t *e1, sp_ext_node_t *e2)
+{
+	if (e1->ext_offset > e2->ext_offset)
+		return (1);
+	if (e1->ext_offset < e2->ext_offset)
+		return (-1);
+	/* offsets are equal */
+	return (0);
+}
+
+/*
+ * FUNCTION:	meta_sp_list_insert()
+ * INPUT:	sp	- the set name for the device the node belongs to
+ *		np	- the name of the device the node belongs to
+ *		head	- the head of the list, must be NULL for empty list
+ *		offset	- the physical offset of this extent in sectors
+ *		length	- the length of this extent in sectors
+ *		type	- the type of the extent being inserted
+ *		seq	- the sequence number of the extent being inserted
+ *		flags	- extent flags (eg. whether it needs to be updated)
+ *		compare	- the compare function to use
+ * OUTPUT:	head	- points to the new head if a node was inserted
+ *			  at the beginning
+ * RETURNS:	void
+ * PURPOSE:	inserts an extent node into a sorted doubly linked list.
+ *		The sort order is determined by the compare function.
+ *		Memory is allocated for the node in this function and it
+ *		is up to the caller to free it, possibly using
+ *		meta_sp_list_free().  If a node is inserted at the
+ *		beginning of the list, the head pointer is updated to
+ *		point to the new first node.
+ */
+static void
+meta_sp_list_insert(
+	mdsetname_t	*sp,
+	mdname_t	*np,
+	sp_ext_node_t	**head,
+	sp_ext_offset_t	offset,
+	sp_ext_length_t	length,
+	sp_ext_type_t	type,
+	uint_t		seq,
+	uint_t		flags,
+	ext_cmpfunc_t	compare
+)
+{
+	sp_ext_node_t	*newext;
+	sp_ext_node_t	*curext;
+
+	assert(head != NULL);
+
+	/* Don't bother adding zero length nodes */
+	if (length == 0ULL)
+		return;
+
+	/* allocate and fill in new ext_node */
+	newext = Zalloc(sizeof (sp_ext_node_t));
+
+	newext->ext_offset = offset;
+	newext->ext_length = length;
+	newext->ext_flags = flags;
+	newext->ext_type = type;
+	newext->ext_seq = seq;
+	newext->ext_setp = sp;
+	newext->ext_namep = np;
+
+	/* first node in the list */
+	if (*head == NULL) {
+		newext->ext_next = newext->ext_prev = NULL;
+		*head = newext;
+	} else if ((*compare)(*head, newext) >= 0) {
+		/* the first node has a bigger offset, so insert before it */
+		assert((*head)->ext_prev == NULL);
+
+		newext->ext_prev = NULL;
+		newext->ext_next = *head;
+		(*head)->ext_prev = newext;
+		*head = newext;
+	} else {
+		/*
+		 * find the next node whose offset is greater than
+		 * the one we want to insert, or the end of the list.
+		 */
+		for (curext = *head;
+		    (curext->ext_next != NULL) &&
+		    ((*compare)(curext->ext_next, newext) < 0);
+		    (curext = curext->ext_next))
+			;
+
+		/* link the new node in after the current node */
+		newext->ext_next = curext->ext_next;
+		newext->ext_prev = curext;
+
+		if (curext->ext_next != NULL)
+			curext->ext_next->ext_prev = newext;
+
+		curext->ext_next = newext;
+	}
+}
+
+/*
+ * FUNCTION:	meta_sp_list_free()
+ * INPUT:	head	- the head of the list, must be NULL for empty list
+ * OUTPUT:	head	- points to NULL on return
+ * RETURNS:	void
+ * PURPOSE:	walks a double linked extent list and frees each node
+ */
+static void
+meta_sp_list_free(sp_ext_node_t **head)
+{
+	sp_ext_node_t	*ext;
+	sp_ext_node_t	*next;
+
+	assert(head != NULL);
+
+	ext = *head;
+	while (ext) {
+		next = ext->ext_next;
+		Free(ext);
+		ext = next;
+	}
+	*head = NULL;
+}
+
+/*
+ * FUNCTION:	meta_sp_list_remove()
+ * INPUT:	head	- the head of the list, must be NULL for empty list
+ *		ext	- the extent to remove, must be a member of the list
+ * OUTPUT:	head	- points to the new head of the list
+ * RETURNS:	void
+ * PURPOSE:	unlinks the node specified by ext from the list and
+ *		frees it, possibly moving the head pointer forward if
+ *		the head is the node being removed.
+ */
+static void
+meta_sp_list_remove(sp_ext_node_t **head, sp_ext_node_t *ext)
+{
+	assert(head != NULL);
+	assert(*head != NULL);
+
+	if (*head == ext)
+		*head = ext->ext_next;
+
+	if (ext->ext_prev != NULL)
+		ext->ext_prev->ext_next = ext->ext_next;
+	if (ext->ext_next != NULL)
+		ext->ext_next->ext_prev = ext->ext_prev;
+	Free(ext);
+}
+
+/*
+ * FUNCTION:	meta_sp_list_size()
+ * INPUT:	head	- the head of the list, must be NULL for empty list
+ *		exttype	- the type of the extents to sum
+ *		exclude_wm - subtract space for extent headers from total
+ * OUTPUT:	none
+ * RETURNS:	sp_ext_length_t	- the sum of all of the lengths
+ * PURPOSE:	sums the lengths of all extents in the list matching the
+ *		specified type.  This could be used for computing the
+ *		amount of free or used space, for example.
+ */
+static sp_ext_length_t
+meta_sp_list_size(sp_ext_node_t *head, sp_ext_type_t exttype, int exclude_wm)
+{
+	sp_ext_node_t	*ext;
+	sp_ext_length_t	size = 0LL;
+
+	for (ext = head; ext != NULL; ext = ext->ext_next)
+		if (ext->ext_type == exttype)
+			size += ext->ext_length -
+			    ((exclude_wm) ? MD_SP_WMSIZE : 0);
+
+	return (size);
+}
+
+/*
+ * FUNCTION:	meta_sp_list_find()
+ * INPUT:	head	- the head of the list, must be NULL for empty list
+ *		offset	- the offset contained by the node to find
+ * OUTPUT:	none
+ * RETURNS:	sp_ext_node_t *	- the node containing the requested offset
+ *				  or NULL if no such nodes were found.
+ * PURPOSE:	finds a node in a list containing the requested offset
+ *		(inclusive).  If multiple nodes contain this offset then
+ *		only the first will be returned, though typically these
+ *		lists are managed with non-overlapping nodes.
+ *
+ *		*The list MUST be sorted by offset for this function to work.*
+ */
+static sp_ext_node_t *
+meta_sp_list_find(
+	sp_ext_node_t	*head,
+	sp_ext_offset_t	offset
+)
+{
+	sp_ext_node_t	*ext;
+
+	for (ext = head; ext != NULL; ext = ext->ext_next) {
+		/* check if the offset lies within this extent */
+		if ((offset >= ext->ext_offset) &&
+		    (offset < ext->ext_offset + ext->ext_length)) {
+			/*
+			 * the requested extent should always be a
+			 * subset of an extent in the list.
+			 */
+			return (ext);
+		}
+	}
+	return (NULL);
+}
+
+/*
+ * FUNCTION:	meta_sp_list_freefill()
+ * INPUT:	head	- the head of the list, must be NULL for empty list
+ *		size	- the size of the volume this extent list is
+ *			  representing
+ * OUTPUT:	head	- the new head of the list
+ * RETURNS:	void
+ * PURPOSE:	finds gaps in the extent list and fills them with a free
+ *		node.  If there is a gap at the beginning the head
+ *		pointer will be changed to point to the new free node.
+ *		If there is free space at the end, the last free extent
+ *		will extend all the way out to the size specified.
+ *
+ *		*The list MUST be sorted by offset for this function to work.*
+ */
+static void
+meta_sp_list_freefill(
+	sp_ext_node_t	**head,
+	sp_ext_length_t	size
+)
+{
+	sp_ext_node_t	*ext;
+	sp_ext_offset_t	curoff = 0LL;
+
+	for (ext = *head; ext != NULL; ext = ext->ext_next) {
+		if (curoff < ext->ext_offset)
+			meta_sp_list_insert(NULL, NULL, head,
+			    curoff, ext->ext_offset - curoff,
+			    EXTTYP_FREE, 0, 0, meta_sp_cmp_by_offset);
+		curoff = ext->ext_offset + ext->ext_length;
+	}
+
+	/* pad inverse list out to the end */
+	if (curoff < size)
+		meta_sp_list_insert(NULL, NULL, head, curoff, size - curoff,
+		    EXTTYP_FREE, 0, 0, meta_sp_cmp_by_offset);
+
+	if (getenv(META_SP_DEBUG)) {
+		meta_sp_debug("meta_sp_list_freefill: Extent list with "
+		    "holes freefilled:\n");
+		meta_sp_list_dump(*head);
+	}
+}
+
+/*
+ * FUNCTION:	meta_sp_list_dump()
+ * INPUT:	head	- the head of the list, must be NULL for empty list
+ * OUTPUT:	none
+ * RETURNS:	void
+ * PURPOSE:	dumps the entire extent list to stdout for easy debugging
+ */
+static void
+meta_sp_list_dump(sp_ext_node_t *head)
+{
+	sp_ext_node_t	*ext;
+
+	meta_sp_debug("meta_sp_list_dump: dumping extent list:\n");
+	meta_sp_debug("%5s %10s %5s %7s %10s %10s %5s %10s %10s\n", "Name",
+	    "Addr", "Seq#", "Type", "Offset", "Length", "Flags", "Prev",
+	    "Next");
+	for (ext = head; ext != NULL; ext = ext->ext_next) {
+		if (ext->ext_namep != NULL)
+			meta_sp_debug("%5s", ext->ext_namep->cname);
+		else
+			meta_sp_debug("%5s", "NONE");
+
+		meta_sp_debug("%10p %5u ", (void *) ext, ext->ext_seq);
+		switch (ext->ext_type) {
+		case EXTTYP_ALLOC:
+			meta_sp_debug("%7s ", "ALLOC");
+			break;
+		case EXTTYP_FREE:
+			meta_sp_debug("%7s ", "FREE");
+			break;
+		case EXTTYP_END:
+			meta_sp_debug("%7s ", "END");
+			break;
+		case EXTTYP_RESERVED:
+			meta_sp_debug("%7s ", "RESV");
+			break;
+		default:
+			meta_sp_debug("%7s ", "INVLD");
+			break;
+		}
+
+		meta_sp_debug("%10llu %10llu %5u %10p %10p\n",
+		    ext->ext_offset, ext->ext_length,
+		    ext->ext_flags, (void *) ext->ext_prev,
+		    (void *) ext->ext_next);
+	}
+	meta_sp_debug("\n");
+}
+
+/*
+ * FUNCTION:	meta_sp_list_overlaps()
+ * INPUT:	head	- the head of the list, must be NULL for empty list
+ * OUTPUT:	none
+ * RETURNS:	int	- 1 if extents overlap, 0 if ok
+ * PURPOSE:	checks a list for overlaps.  The list MUST be sorted by
+ *		offset for this function to work properly.
+ */
+static int
+meta_sp_list_overlaps(sp_ext_node_t *head)
+{
+	sp_ext_node_t	*ext;
+
+	for (ext = head; ext->ext_next != NULL; ext = ext->ext_next) {
+		if (ext->ext_offset + ext->ext_length >
+		    ext->ext_next->ext_offset)
+			return (1);
+	}
+	return (0);
+}
+
+/*
+ * **************************************************************************
+ *                        Extent Allocation Functions                       *
+ * **************************************************************************
+ */
+
+/*
+ * FUNCTION:	meta_sp_alloc_by_ext()
+ * INPUT:	sp	- the set name for the device the node belongs to
+ *		np	- the name of the device the node belongs to
+ *		head	- the head of the list, must be NULL for empty list
+ *		free_ext	- the free extent being allocated from
+ *		alloc_offset	- the offset of the allocation
+ *		alloc_len	- the length of the allocation
+ *		seq		- the sequence number of the allocation
+ * OUTPUT:	head	- the new head pointer
+ * RETURNS:	void
+ * PURPOSE:	allocates a portion of the free extent free_ext.  The
+ *		allocated portion starts at alloc_offset and is
+ *		alloc_length long.  Both (alloc_offset) and (alloc_offset +
+ *		alloc_length) must be contained within the free extent.
+ *
+ *		The free extent is split into as many as 3 pieces - a
+ *		free extent containing [ free_offset .. alloc_offset ), an
+ *		allocated extent containing the range [ alloc_offset ..
+ *		alloc_end ], and another free extent containing the
+ *		range ( alloc_end .. free_end ].  If either of the two
+ *		new free extents would be zero length, they are not created.
+ *
+ *		Finally, the original free extent is removed.  All newly
+ *		created extents have the EXTFLG_UPDATE flag set.
+ */
+static void
+meta_sp_alloc_by_ext(
+	mdsetname_t	*sp,
+	mdname_t	*np,
+	sp_ext_node_t	**head,
+	sp_ext_node_t	*free_ext,
+	sp_ext_offset_t	alloc_offset,
+	sp_ext_length_t	alloc_length,
+	uint_t		seq
+)
+{
+	sp_ext_offset_t	free_offset = free_ext->ext_offset;
+	sp_ext_length_t	free_length = free_ext->ext_length;
+
+	sp_ext_offset_t	alloc_end = alloc_offset + alloc_length;
+	sp_ext_offset_t	free_end  = free_offset  + free_length;
+
+	/* allocated extent must be a subset of the free extent */
+	assert(free_offset <= alloc_offset);
+	assert(free_end >= alloc_end);
+
+	meta_sp_list_remove(head, free_ext);
+
+	if (free_offset < alloc_offset) {
+		meta_sp_list_insert(NULL, NULL, head, free_offset,
+		    (alloc_offset - free_offset), EXTTYP_FREE, 0,
+		    EXTFLG_UPDATE, meta_sp_cmp_by_offset);
+	}
+
+	if (free_end > alloc_end) {
+		meta_sp_list_insert(NULL, NULL, head, alloc_end,
+		    (free_end - alloc_end), EXTTYP_FREE, 0, EXTFLG_UPDATE,
+		    meta_sp_cmp_by_offset);
+	}
+
+	meta_sp_list_insert(sp, np, head, alloc_offset, alloc_length,
+	    EXTTYP_ALLOC, seq, EXTFLG_UPDATE, meta_sp_cmp_by_offset);
+
+	if (getenv(META_SP_DEBUG)) {
+		meta_sp_debug("meta_sp_alloc_by_ext: extent list:\n");
+		meta_sp_list_dump(*head);
+	}
+}
+
+/*
+ * FUNCTION:	meta_sp_alloc_by_len()
+ * INPUT:	sp	- the set name for the device the node belongs to
+ *		np	- the name of the device the node belongs to
+ *		head	- the head of the list, must be NULL for empty list
+ *		*lp	- the requested length to allocate
+ *		last_off	- the last offset already allocated.
+ *		alignment	- the desired extent alignmeent
+ * OUTPUT:	head	- the new head pointer
+ *		*lp	- the length allocated
+ * RETURNS:	int	- -1 if error, the number of new extents on success
+ * PURPOSE:	allocates extents from free space to satisfy the requested
+ *		length.  If requested length is zero, allocates all
+ *		remaining free space.  This function provides the meat
+ *		of the extent allocation algorithm.  Allocation is a
+ *		three tier process:
+ *
+ *		1. If last_off is nonzero and there is free space following
+ *		   that node, then it is extended to allocate as much of that
+ *		   free space as possible.  This is useful for metattach.
+ *		2. If a free extent can be found to satisfy the remaining
+ *		   requested space, then satisfy the rest of the request
+ *		   from that extent.
+ *		3. Start allocating space from any remaining free extents until
+ *		   the remainder of the request is satisified.
+ *
+ *              If alignment is non-zero, then every extent modified
+ *              or newly allocated will be aligned modulo alignment,
+ *              with a length that is an integer multiple of
+ *              alignment.
+ *
+ *		The EXTFLG_UPDATE flag is set for all nodes (free and
+ *		allocated) that require updated watermarks.
+ *
+ *		This algorithm may have a negative impact on fragmentation
+ *		in pathological cases and may be improved if it turns out
+ *		to be a problem.  This may be exacerbated by particularly
+ *		large alignments.
+ *
+ * NOTE:	It's confusing, so it demands an explanation:
+ *		- len is used to represent requested data space; it
+ *		  does not include room for a watermark.  On each full
+ *		  or partial allocation, len will be decremented by
+ *		  alloc_len (see next paragraph) until it reaches
+ *		  zero.
+ *		- alloc_len is used to represent data space allocated
+ *		  from a particular extent; it does not include space
+ *		  for a watermark.  In the rare event that a_length
+ *		  (see next paragraph) is equal to MD_SP_WMSIZE,
+ *		  alloc_len will be zero and the resulting MD_SP_WMSIZE
+ *		  fragment of space will be utterly unusable.
+ *		- a_length is used to represent all space to be
+ *		  allocated from a particular extent; it DOES include
+ *		  space for a watermark.
+ */
+static int
+meta_sp_alloc_by_len(
+	mdsetname_t	*sp,
+	mdname_t	*np,
+	sp_ext_node_t	**head,
+	sp_ext_length_t	*lp,
+	sp_ext_offset_t	last_off,
+	sp_ext_offset_t	alignment
+)
+{
+	sp_ext_node_t	*free_ext;
+	sp_ext_node_t	*alloc_ext;
+	uint_t		last_seq = 0;
+	uint_t		numexts = 0;
+	sp_ext_length_t	freespace;
+	sp_ext_length_t	alloc_len;
+	sp_ext_length_t	len;
+
+	/* We're DOA if we can't read *lp */
+	assert(lp != NULL);
+	len = *lp;
+
+	/*
+	 * Process the nominal case first: we've been given an actual
+	 * size argument, rather than the literal "all"
+	 */
+
+	if (len != 0) {
+
+		/*
+		 * Short circuit the check for free space.  This may
+		 * tell us we have enough space when we really don't
+		 * because each extent loses space to a watermark, but
+		 * it will always tell us there isn't enough space
+		 * correctly.  Worst case we do some extra work.
+		 */
+		freespace = meta_sp_list_size(*head, EXTTYP_FREE,
+		    INCLUDE_WM);
+
+		if (freespace < len)
+			return (-1);
+
+		/*
+		 * First see if we can extend the last extent for an
+		 * attach.
+		 */
+		if (last_off != 0LL) {
+			int align = 0;
+
+			alloc_ext =
+			    meta_sp_list_find(*head, last_off);
+			assert(alloc_ext != NULL);
+
+			/*
+			 * The offset test reflects the
+			 * inclusion of the watermark in the extent
+			 */
+			align = (alignment > 0) &&
+			    (((alloc_ext->ext_offset + MD_SP_WMSIZE) %
+				alignment) == 0);
+
+			/*
+			 * If we decided not to align here, we should
+			 * also reset "alignment" so we don't bother
+			 * later, either.
+			 */
+			if (!align) {
+				alignment = 0;
+			}
+
+			last_seq = alloc_ext->ext_seq;
+
+			free_ext = meta_sp_list_find(*head,
+			    alloc_ext->ext_offset +
+			    alloc_ext->ext_length);
+
+			/*
+			 * If a free extent follows our last allocated
+			 * extent, then remove the last allocated
+			 * extent and increase the size of the free
+			 * extent to overlap it, then allocate the
+			 * total space from the new free extent.
+			 */
+			if (free_ext != NULL &&
+			    free_ext->ext_type == EXTTYP_FREE) {
+				assert(free_ext->ext_offset ==
+				    alloc_ext->ext_offset +
+				    alloc_ext->ext_length);
+
+				alloc_len =
+				    MIN(len, free_ext->ext_length);
+
+				if (align && (alloc_len < len)) {
+					/* No watermark space needed */
+					alloc_len -= alloc_len % alignment;
+				}
+
+				if (alloc_len > 0) {
+					free_ext->ext_offset -=
+					    alloc_ext->ext_length;
+					free_ext->ext_length +=
+					    alloc_ext->ext_length;
+
+					meta_sp_alloc_by_ext(sp, np, head,
+					    free_ext, free_ext->ext_offset,
+					    alloc_ext->ext_length + alloc_len,
+					    last_seq);
+
+					/*
+					 * now remove the original allocated
+					 * node.  We may have overlapping
+					 * extents for a short time before
+					 * this node is removed.
+					 */
+					meta_sp_list_remove(head, alloc_ext);
+					len -= alloc_len;
+				}
+			}
+			last_seq++;
+		}
+
+		if (len == 0LL)
+			goto out;
+
+		/*
+		 * Next, see if we can find a single allocation for
+		 * the remainder.  This may make fragmentation worse
+		 * in some cases, but there's no good way to allocate
+		 * that doesn't have a highly fragmented corner case.
+		 */
+		for (free_ext = *head; free_ext != NULL;
+			free_ext = free_ext->ext_next) {
+			sp_ext_offset_t	a_offset;
+			sp_ext_offset_t	a_length;
+
+			if (free_ext->ext_type != EXTTYP_FREE)
+				continue;
+
+			/*
+			 * The length test should include space for
+			 * the watermark
+			 */
+
+			a_offset = free_ext->ext_offset;
+			a_length = free_ext->ext_length;
+
+			if (alignment > 0) {
+
+				/*
+				 * Shortcut for extents that have been
+				 * previously added to pad out the
+				 * data space
+				 */
+				if (a_length < alignment) {
+					continue;
+				}
+
+				/*
+				 * Round up so the data space begins
+				 * on a properly aligned boundary.
+				 */
+				a_offset += alignment -
+				    (a_offset % alignment) - MD_SP_WMSIZE;
+
+				/*
+				 * This is only necessary in case the
+				 * watermark size is ever greater than
+				 * one.  It'll never happen, of
+				 * course; we'll get rid of watermarks
+				 * before we make 'em bigger.
+				 */
+				if (a_offset < free_ext->ext_offset) {
+					a_offset += alignment;
+				}
+
+				/*
+				 * Adjust the length to account for
+				 * the space lost above (if any)
+				 */
+				a_length -=
+					(a_offset - free_ext->ext_offset);
+			}
+
+			if (a_length >= len + MD_SP_WMSIZE) {
+				meta_sp_alloc_by_ext(sp, np, head,
+					free_ext, a_offset,
+					len + MD_SP_WMSIZE, last_seq);
+
+				len = 0LL;
+				numexts++;
+				break;
+			}
+		}
+
+		if (len == 0LL)
+			goto out;
+
+
+		/*
+		 * If the request could not be satisfied by extending
+		 * the last extent or by a single extent, then put
+		 * multiple smaller extents together until the request
+		 * is satisfied.
+		 */
+		for (free_ext = *head; (free_ext != NULL) && (len > 0);
+			free_ext = free_ext->ext_next) {
+			sp_ext_offset_t a_offset;
+			sp_ext_length_t a_length;
+
+			if (free_ext->ext_type != EXTTYP_FREE)
+				continue;
+
+			a_offset = free_ext->ext_offset;
+			a_length = free_ext->ext_length;
+
+			if (alignment > 0) {
+
+				/*
+				 * Shortcut for extents that have been
+				 * previously added to pad out the
+				 * data space
+				 */
+				if (a_length < alignment) {
+					continue;
+				}
+
+				/*
+				 * Round up so the data space begins
+				 * on a properly aligned boundary.
+				 */
+				a_offset += alignment -
+					(a_offset % alignment) - MD_SP_WMSIZE;
+
+				/*
+				 * This is only necessary in case the
+				 * watermark size is ever greater than
+				 * one.  It'll never happen, of
+				 * course; we'll get rid of watermarks
+				 * before we make 'em bigger.
+				 */
+				if (a_offset < free_ext->ext_offset) {
+					a_offset += alignment;
+				}
+
+				/*
+				 * Adjust the length to account for
+				 * the space lost above (if any)
+				 */
+				a_length -=
+					(a_offset - free_ext->ext_offset);
+
+				/*
+				 * Adjust the length to be properly
+				 * aligned if it is NOT to be the
+				 * last extent in the soft partition.
+				 */
+				if ((a_length - MD_SP_WMSIZE) < len)
+					a_length -=
+						(a_length - MD_SP_WMSIZE)
+						% alignment;
+			}
+
+			alloc_len = MIN(len, a_length - MD_SP_WMSIZE);
+			if (alloc_len == 0)
+				continue;
+
+			/*
+			 * meta_sp_alloc_by_ext() expects the
+			 * allocation length to include the watermark
+			 * size, which is why we don't simply pass in
+			 * alloc_len here.
+			 */
+			meta_sp_alloc_by_ext(sp, np, head, free_ext,
+				a_offset, MIN(len + MD_SP_WMSIZE, a_length),
+				last_seq);
+
+			len -= alloc_len;
+			numexts++;
+			last_seq++;
+		}
+
+
+		/*
+		 * If there was not enough space we can throw it all
+		 * away since no real work has been done yet.
+		 */
+		if (len != 0) {
+			meta_sp_list_free(head);
+			return (-1);
+		}
+	}
+
+	/*
+	 * Otherwise, the literal "all" was specified: allocate all
+	 * available free space.  Don't bother with alignment.
+	 */
+	else {
+		/* First, extend the last extent if this is a grow */
+		if (last_off != 0LL) {
+			alloc_ext =
+				meta_sp_list_find(*head, last_off);
+			assert(alloc_ext != NULL);
+
+			last_seq = alloc_ext->ext_seq;
+
+			free_ext = meta_sp_list_find(*head,
+				alloc_ext->ext_offset +
+				alloc_ext->ext_length);
+
+			/*
+			 * If a free extent follows our last allocated
+			 * extent, then remove the last allocated
+			 * extent and increase the size of the free
+			 * extent to overlap it, then allocate the
+			 * total space from the new free extent.
+			 */
+			if (free_ext != NULL &&
+			    free_ext->ext_type == EXTTYP_FREE) {
+				assert(free_ext->ext_offset ==
+				    alloc_ext->ext_offset +
+				    alloc_ext->ext_length);
+
+				len = alloc_len =
+				    free_ext->ext_length;
+
+				free_ext->ext_offset -=
+				    alloc_ext->ext_length;
+				free_ext->ext_length +=
+				    alloc_ext->ext_length;
+
+				meta_sp_alloc_by_ext(sp, np, head,
+				    free_ext, free_ext->ext_offset,
+				    alloc_ext->ext_length + alloc_len,
+				    last_seq);
+
+				/*
+				 * now remove the original allocated
+				 * node.  We may have overlapping
+				 * extents for a short time before
+				 * this node is removed.
+				 */
+				meta_sp_list_remove(head, alloc_ext);
+			}
+
+			last_seq++;
+		}
+
+		/* Next, grab all remaining free space */
+		for (free_ext = *head; free_ext != NULL;
+			free_ext = free_ext->ext_next) {
+
+			if (free_ext->ext_type == EXTTYP_FREE) {
+				alloc_len =
+				    free_ext->ext_length - MD_SP_WMSIZE;
+				if (alloc_len == 0)
+					continue;
+
+				/*
+				 * meta_sp_alloc_by_ext() expects the
+				 * allocation length to include the
+				 * watermark size, which is why we
+				 * don't simply pass in alloc_len
+				 * here.
+				 */
+				meta_sp_alloc_by_ext(sp, np, head,
+				    free_ext, free_ext->ext_offset,
+				    free_ext->ext_length,
+				    last_seq);
+
+				len += alloc_len;
+				numexts++;
+				last_seq++;
+			}
+		}
+	}
+
+out:
+	if (getenv(META_SP_DEBUG)) {
+		meta_sp_debug("meta_sp_alloc_by_len: Extent list after "
+		    "allocation:\n");
+		meta_sp_list_dump(*head);
+	}
+
+	if (*lp == 0) {
+		*lp = len;
+
+		/*
+		 * Make sure the callers hit a no space error if we
+		 * didn't actually find anything.
+		 */
+		if (len == 0) {
+			return (-1);
+		}
+	}
+
+	return (numexts);
+}
+
+/*
+ * FUNCTION:	meta_sp_alloc_by_list()
+ * INPUT:	sp	- the set name for the device the node belongs to
+ *		np	- the name of the device the node belongs to
+ *		head	- the head of the list, must be NULL for empty list
+ *		oblist	- an extent list containing requested nodes to allocate
+ * OUTPUT:	head	- the new head pointer
+ * RETURNS:	int	- -1 if error, the number of new extents on success
+ * PURPOSE:	allocates extents from free space to satisfy the requested
+ *		extent list.  This is primarily used for the -o/-b options
+ *		where the user may specifically request extents to allocate.
+ *		Each extent in the oblist must be a subset (inclusive) of a
+ *		free extent and may not overlap each other.  This
+ *		function sets the EXTFLG_UPDATE flag for each node that
+ *		requires a watermark update after allocating.
+ */
+static int
+meta_sp_alloc_by_list(
+	mdsetname_t	*sp,
+	mdname_t	*np,
+	sp_ext_node_t	**head,
+	sp_ext_node_t	*oblist
+)
+{
+	sp_ext_node_t	*ext;
+	sp_ext_node_t	*free_ext;
+	uint_t		numexts = 0;
+
+	for (ext = oblist; ext != NULL; ext = ext->ext_next) {
+
+		free_ext = meta_sp_list_find(*head,
+		    ext->ext_offset - MD_SP_WMSIZE);
+
+		/* Make sure the allocation is within the free extent */
+		if ((free_ext == NULL) ||
+		    (ext->ext_offset + ext->ext_length >
+		    free_ext->ext_offset + free_ext->ext_length) ||
+		    (free_ext->ext_type != EXTTYP_FREE))
+			return (-1);
+
+		meta_sp_alloc_by_ext(sp, np, head, free_ext,
+		    ext->ext_offset - MD_SP_WMSIZE,
+		    ext->ext_length + MD_SP_WMSIZE, ext->ext_seq);
+
+		numexts++;
+	}
+
+	assert(meta_sp_list_overlaps(*head) == 0);
+
+	if (getenv(META_SP_DEBUG)) {
+		meta_sp_debug("meta_sp_alloc_by_list: Extent list after "
+		    "allocation:\n");
+		meta_sp_list_dump(*head);
+	}
+
+	return (numexts);
+}
+
+/*
+ * **************************************************************************
+ *                     Extent List Population Functions                     *
+ * **************************************************************************
+ */
+
+/*
+ * FUNCTION:	meta_sp_extlist_from_namelist()
+ * INPUT:	sp	- the set name for the device the node belongs to
+ *		spnplp	- the namelist of soft partitions to build a list from
+ * OUTPUT:	extlist	- the extent list built from the SPs in the namelist
+ *		ep	- return error pointer
+ * RETURNS:	int	- -1 if error, 0 on success
+ * PURPOSE:	builds an extent list representing the soft partitions
+ *		specified in the namelist.  Each extent in each soft
+ *		partition is added to the list with the type EXTTYP_ALLOC.
+ *		The EXTFLG_UPDATE flag is not set on any nodes.  Each
+ *		extent in the list includes the space occupied by the
+ *		watermark, which is not included in the unit structures.
+ */
+static int
+meta_sp_extlist_from_namelist(
+	mdsetname_t	*sp,
+	mdnamelist_t	*spnlp,
+	sp_ext_node_t	**extlist,
+	md_error_t	*ep
+)
+{
+	int		extn;
+	md_sp_t		*msp;		/* unit structure of the sp's */
+	mdnamelist_t	*namep;
+
+	assert(sp != NULL);
+
+	/*
+	 * Now go through the soft partitions and add a node to the used
+	 * list for each allocated extent.
+	 */
+	for (namep = spnlp; namep != NULL; namep = namep->next) {
+		mdname_t	*curnp = namep->namep;
+
+		/* get the unit structure */
+		if ((msp = meta_get_sp_common(sp, curnp, 0, ep)) == NULL)
+			return (-1);
+
+		for (extn = 0; (extn < msp->ext.ext_len); extn++) {
+			md_sp_ext_t	*extp = &msp->ext.ext_val[extn];
+
+			/*
+			 * subtract from offset and add to the length
+			 * to account for the watermark, which is not
+			 * contained in the extents in the unit structure.
+			 */
+			meta_sp_list_insert(sp, curnp, extlist,
+			    extp->poff - MD_SP_WMSIZE, extp->len + MD_SP_WMSIZE,
+			    EXTTYP_ALLOC, extn, 0, meta_sp_cmp_by_offset);
+		}
+	}
+	return (0);
+}
+
+/*
+ * FUNCTION:	meta_sp_extlist_from_wm()
+ * INPUT:	sp	- the set name for the device the node belongs to
+ *		compnp	- the name of the device to scan watermarks on
+ * OUTPUT:	extlist	- the extent list built from the SPs in the namelist
+ *		ep	- return error pointer
+ * RETURNS:	int	- -1 if error, 0 on success
+ * PURPOSE:	builds an extent list representing the soft partitions
+ *		specified in the namelist.  Each extent in each soft
+ *		partition is added to the list with the type EXTTYP_ALLOC.
+ *		The EXTFLG_UPDATE flag is not set on any nodes.  Each
+ *		extent in the list includes the space occupied by the
+ *		watermark, which is not included in the unit structures.
+ */
+static int
+meta_sp_extlist_from_wm(
+	mdsetname_t	*sp,
+	mdname_t	*compnp,
+	sp_ext_node_t	**extlist,
+	ext_cmpfunc_t	compare,
+	md_error_t	*ep
+)
+{
+	mp_watermark_t	wm;
+	mdname_t	*np = NULL;
+	mdsetname_t	*spsetp = NULL;
+	sp_ext_offset_t	cur_off;
+
+	if ((cur_off = meta_sp_get_start(sp, compnp, ep)) == MD_DISKADDR_ERROR)
+		return (-1);
+
+	for (;;) {
+		if (meta_sp_read_wm(sp, compnp, &wm, cur_off, ep) != 0) {
+			return (-1);
+		}
+
+		/* get the set and name pointers */
+		if (strcmp(wm.wm_setname, MD_SP_LOCALSETNAME) != 0) {
+			if ((spsetp = metasetname(wm.wm_setname, ep)) == NULL) {
+				return (-1);
+			}
+		}
+
+		if (strcmp(wm.wm_mdname, MD_SP_FREEWMNAME) != 0) {
+			if (meta_init_make_device(&sp, wm.wm_mdname, ep) != 0)
+				return (-1);
+			np = metaname(&spsetp, wm.wm_mdname, ep);
+			if (np == NULL) {
+				return (-1);
+			}
+		}
+
+		/* insert watermark into extent list */
+		meta_sp_list_insert(spsetp, np, extlist, cur_off,
+		    wm.wm_length + MD_SP_WMSIZE, wm.wm_type, wm.wm_seq,
+		    EXTFLG_UPDATE, compare);
+
+		/* if we see the end watermark, we're done */
+		if (wm.wm_type == EXTTYP_END)
+			break;
+
+		cur_off += wm.wm_length + 1;
+
+		/* clear out set and name pointers for next iteration */
+		np = NULL;
+		spsetp = NULL;
+	}
+
+	return (0);
+}
+
+/*
+ * **************************************************************************
+ *                        Print (metastat) Functions                        *
+ * **************************************************************************
+ */
+
+/*
+ * FUNCTION:	meta_sp_short_print()
+ * INPUT:	msp	- the unit structure to display
+ *		fp	- the file pointer to send output to
+ *		options	- print options from the command line processor
+ * OUTPUT:	ep	- return error pointer
+ * RETURNS:	int	- -1 if error, 0 on success
+ * PURPOSE:	display a short report of the soft partition in md.tab
+ *		form, primarily used for metastat -p.
+ */
+static int
+meta_sp_short_print(
+	md_sp_t		*msp,
+	char		*fname,
+	FILE		*fp,
+	mdprtopts_t	options,
+	md_error_t	*ep
+)
+{
+	int	extn;
+
+	if (options & PRINT_LARGEDEVICES) {
+		if (msp->common.revision != MD_64BIT_META_DEV)
+			return (0);
+	}
+
+	/* print name and -p */
+	if (fprintf(fp, "%s -p", msp->common.namep->cname) == EOF)
+		return (mdsyserror(ep, errno, fname));
+
+	/* print the component */
+	/*
+	 * If the path is our standard /dev/rdsk or /dev/md/rdsk
+	 * then just print out the cxtxdxsx or the dx, metainit
+	 * will assume the default, otherwise we need the full
+	 * pathname to make sure this works as we intend.
+	 */
+	if ((strstr(msp->compnamep->rname, "/dev/rdsk") == NULL) &&
+	    (strstr(msp->compnamep->rname, "/dev/md/rdsk") == NULL) &&
+	    (strstr(msp->compnamep->rname, "/dev/td/") == NULL)) {
+		/* not standard path so print full pathname */
+		if (fprintf(fp, " %s", msp->compnamep->rname) == EOF)
+			return (mdsyserror(ep, errno, fname));
+	} else {
+		/* standard path so print ctds or d number */
+		if (fprintf(fp, " %s", msp->compnamep->cname) == EOF)
+			return (mdsyserror(ep, errno, fname));
+	}
+
+	/* print out each extent */
+	for (extn = 0; (extn < msp->ext.ext_len); extn++) {
+		md_sp_ext_t	*extp = &msp->ext.ext_val[extn];
+		if (fprintf(fp, " -o %llu -b %llu ", extp->poff,
+		    extp->len) == EOF)
+			return (mdsyserror(ep, errno, fname));
+	}
+
+	if (fprintf(fp, "\n") == EOF)
+		return (mdsyserror(ep, errno, fname));
+
+	/* success */
+	return (0);
+}
+
+/*
+ * FUNCTION:	meta_sp_status_to_name()
+ * INPUT:	xsp_status	- the status value to convert to a string
+ *		tstate		- transient errored device state. If set the
+ *				  device is Unavailable
+ * OUTPUT:	none
+ * RETURNS:	char *	- a pointer to the string representing the status value
+ * PURPOSE:	return an internationalized string representing the
+ *		status value for a soft partition.  The strings are
+ *		strdup'd and must be freed by the caller.
+ */
+static char *
+meta_sp_status_to_name(
+	xsp_status_t	xsp_status,
+	uint_t		tstate
+)
+{
+	char *rval = NULL;
+
+	/*
+	 * Check to see if we have MD_INACCESSIBLE set. This is the only valid
+	 * value for an 'Unavailable' return. tstate can be set because of
+	 * other multi-node reasons (e.g. ABR being set)
+	 */
+	if (tstate & MD_INACCESSIBLE) {
+		return (Strdup(dgettext(TEXT_DOMAIN, "Unavailable")));
+	}
+
+	switch (xsp_status) {
+	case MD_SP_CREATEPEND:
+		rval = Strdup(dgettext(TEXT_DOMAIN, "Creating"));
+		break;
+	case MD_SP_GROWPEND:
+		rval = Strdup(dgettext(TEXT_DOMAIN, "Growing"));
+		break;
+	case MD_SP_DELPEND:
+		rval = Strdup(dgettext(TEXT_DOMAIN, "Deleting"));
+		break;
+	case MD_SP_OK:
+		rval = Strdup(dgettext(TEXT_DOMAIN, "Okay"));
+		break;
+	case MD_SP_ERR:
+		rval = Strdup(dgettext(TEXT_DOMAIN, "Errored"));
+		break;
+	case MD_SP_RECOVER:
+		rval = Strdup(dgettext(TEXT_DOMAIN, "Recovering"));
+		break;
+	}
+
+	if (rval == NULL)
+		rval = Strdup(dgettext(TEXT_DOMAIN, "Invalid"));
+
+	return (rval);
+}
+
+/*
+ * FUNCTION:	meta_sp_report()
+ * INPUT:	sp	- the set name for the unit being displayed
+ *		msp	- the unit structure to display
+ *		nlpp	- pass back the large devs
+ *		fp	- the file pointer to send output to
+ *		options	- print options from the command line processor
+ * OUTPUT:	ep	- return error pointer
+ * RETURNS:	int	- -1 if error, 0 on success
+ * PURPOSE:	print a full report of the device specified
+ */
+static int
+meta_sp_report(
+	mdsetname_t	*sp,
+	md_sp_t		*msp,
+	mdnamelist_t	**nlpp,
+	char		*fname,
+	FILE		*fp,
+	mdprtopts_t	options,
+	md_error_t	*ep
+)
+{
+	uint_t		extn;
+	char		*status;
+	char		*devid = "";
+	mdname_t	*didnp = NULL;
+	ddi_devid_t	dtp;
+	int		len;
+	uint_t		tstate = 0;
+
+	if (options & PRINT_LARGEDEVICES) {
+		if (msp->common.revision != MD_64BIT_META_DEV) {
+			return (0);
+		} else {
+			if (meta_getdevs(sp, msp->common.namep, nlpp, ep) != 0)
+				return (-1);
+		}
+	}
+
+	if (options & PRINT_HEADER) {
+		if (fprintf(fp, dgettext(TEXT_DOMAIN, "%s: Soft Partition\n"),
+		    msp->common.namep->cname) == EOF)
+			return (mdsyserror(ep, errno, fname));
+	}
+
+	if (fprintf(fp, dgettext(TEXT_DOMAIN, "    Device: %s\n"),
+	    msp->compnamep->cname) == EOF)
+		return (mdsyserror(ep, errno, fname));
+
+	/* Determine if device is available before displaying status */
+	if (metaismeta(msp->common.namep)) {
+		if (meta_get_tstate(msp->common.namep->dev, &tstate, ep) != 0)
+			return (-1);
+	}
+	status = meta_sp_status_to_name(msp->status, tstate & MD_DEV_ERRORED);
+
+	/* print out "State" to be consistent with other metadevices */
+	if (tstate & MD_ABR_CAP) {
+		if (fprintf(fp, dgettext(TEXT_DOMAIN,
+		    "    State: %s - Application Based Recovery (ABR)\n"),
+		    status) == EOF) {
+			Free(status);
+			return (mdsyserror(ep, errno, fname));
+		}
+	} else {
+		if (fprintf(fp, dgettext(TEXT_DOMAIN,
+		    "    State: %s\n"), status) == EOF) {
+			Free(status);
+			return (mdsyserror(ep, errno, fname));
+		}
+	}
+	free(status);
+
+	if (fprintf(fp, dgettext(TEXT_DOMAIN, "    Size: %llu blocks (%s)\n"),
+	    msp->common.size,
+	    meta_number_to_string(msp->common.size, DEV_BSIZE)) == EOF)
+		return (mdsyserror(ep, errno, fname));
+
+	/* print component details */
+	if (! metaismeta(msp->compnamep)) {
+		diskaddr_t	start_blk;
+		int		has_mddb;
+		char		*has_mddb_str;
+
+		/* print header */
+		/*
+		 * Building a format string on the fly that will
+		 * be used in (f)printf. This allows the length
+		 * of the ctd to vary from small to large without
+		 * looking horrible.
+		 */
+		len = strlen(msp->compnamep->cname);
+		len = max(len, strlen(dgettext(TEXT_DOMAIN, "Device")));
+		len += 2;
+		if (fprintf(fp,
+		    "\t%-*.*s %-12.12s %-5.5s %s\n",
+		    len, len,
+		    dgettext(TEXT_DOMAIN, "Device"),
+		    dgettext(TEXT_DOMAIN, "Start Block"),
+		    dgettext(TEXT_DOMAIN, "Dbase"),
+		    dgettext(TEXT_DOMAIN, "Reloc")) == EOF) {
+			return (mdsyserror(ep, errno, fname));
+		}
+
+
+		/* get info */
+		if ((start_blk = meta_sp_get_start(sp, msp->compnamep, ep)) ==
+		    MD_DISKADDR_ERROR)
+			return (-1);
+
+		if ((has_mddb = metahasmddb(sp, msp->compnamep, ep)) < 0)
+			return (-1);
+
+		if (has_mddb)
+			has_mddb_str = dgettext(TEXT_DOMAIN, "Yes");
+		else
+			has_mddb_str = dgettext(TEXT_DOMAIN, "No");
+
+		/* populate the key in the name_p structure */
+		didnp = metadevname(&sp, msp->compnamep->dev, ep);
+		if (didnp == NULL) {
+			return (-1);
+		}
+
+		/* determine if devid does NOT exist */
+		if (options & PRINT_DEVID) {
+		    if ((dtp = meta_getdidbykey(sp->setno, getmyside(sp, ep),
+					didnp->key, ep)) == NULL)
+				devid = dgettext(TEXT_DOMAIN, "No ");
+			else {
+				devid = dgettext(TEXT_DOMAIN, "Yes");
+				free(dtp);
+			}
+		}
+
+		/* print info */
+		/*
+		 * This allows the length
+		 * of the ctd to vary from small to large without
+		 * looking horrible.
+		 */
+		if (fprintf(fp, "\t%-*s %8lld     %-5.5s %s\n",
+		    len, msp->compnamep->cname,
+		    start_blk, has_mddb_str, devid) == EOF) {
+			return (mdsyserror(ep, errno, fname));
+		}
+		(void) fprintf(fp, "\n");
+	}
+
+
+	/* print the headers */
+	if (fprintf(fp, "\t%6.6s %24.24s %24.24s\n",
+	    dgettext(TEXT_DOMAIN, "Extent"),
+	    dgettext(TEXT_DOMAIN, "Start Block"),
+	    dgettext(TEXT_DOMAIN, "Block count")) == EOF)
+		return (mdsyserror(ep, errno, fname));
+
+	/* print out each extent */
+	for (extn = 0; (extn < msp->ext.ext_len); extn++) {
+		md_sp_ext_t	*extp = &msp->ext.ext_val[extn];
+
+		/* If PRINT_TIMES option is ever supported, add output here */
+		if (fprintf(fp, "\t%6u %24llu %24llu\n",
+		    extn, extp->poff, extp->len) == EOF)
+			return (mdsyserror(ep, errno, fname));
+	}
+
+	/* separate records with a newline */
+	(void) fprintf(fp, "\n");
+	return (0);
+}
+
+/*
+ * FUNCTION:	meta_sp_print()
+ * INPUT:	sp	- the set name for the unit being displayed
+ *		np	- the name of the device to print
+ *		fname	- ??? not used
+ *		fp	- the file pointer to send output to
+ *		options	- print options from the command line processor
+ * OUTPUT:	ep	- return error pointer
+ * RETURNS:	int	- -1 if error, 0 on success
+ * PURPOSE:	print a full report of the device specified by metastat.
+ *		This is the main entry point for printing.
+ */
+int
+meta_sp_print(
+	mdsetname_t	*sp,
+	mdname_t	*np,
+	mdnamelist_t	**nlpp,
+	char		*fname,
+	FILE		*fp,
+	mdprtopts_t	options,
+	md_error_t	*ep
+)
+{
+	md_sp_t		*msp;
+	md_unit_t	*mdp;
+	int		rval = 0;
+
+	/* should always have the same set */
+	assert(sp != NULL);
+
+	/* print all the soft partitions */
+	if (np == NULL) {
+		mdnamelist_t	*nlp = NULL;
+		mdnamelist_t	*p;
+		int		cnt;
+
+		if ((cnt = meta_get_sp_names(sp, &nlp, options, ep)) < 0)
+			return (-1);
+		else if (cnt == 0)
+			return (0);
+
+		/* recusively print them out */
+		for (p = nlp; (p != NULL); p = p->next) {
+			mdname_t	*curnp = p->namep;
+
+			/*
+			 * one problem with the rval of -1 here is that
+			 * the error gets "lost" when the next device is
+			 * printed, but we want to print them all anyway.
+			 */
+			rval = meta_sp_print(sp, curnp, nlpp, fname, fp,
+			    options, ep);
+		}
+
+		/* clean up, return success */
+		metafreenamelist(nlp);
+		return (rval);
+	}
+
+	/* get the unit structure */
+	if ((msp = meta_get_sp_common(sp, np,
+	    ((options & PRINT_FAST) ? 1 : 0), ep)) == NULL)
+		return (-1);
+
+	/* check for parented */
+	if ((! (options & PRINT_SUBDEVS)) &&
+	    (MD_HAS_PARENT(msp->common.parent))) {
+		return (0);
+	}
+
+	/* print appropriate detail */
+	if (options & PRINT_SHORT) {
+		if (meta_sp_short_print(msp, fname, fp, options, ep) != 0)
+			return (-1);
+	} else {
+		if (meta_sp_report(sp, msp, nlpp, fname, fp, options, ep) != 0)
+			return (-1);
+	}
+
+	/*
+	 * Print underlying metadevices if they are parented to us and
+	 * if the info for the underlying metadevice has not been printed.
+	 */
+	if (metaismeta(msp->compnamep)) {
+		/* get the unit structure for the subdevice */
+		if ((mdp = meta_get_mdunit(sp, msp->compnamep, ep)) == NULL)
+			return (-1);
+
+		/* If info not already printed, recurse */
+		if (!BT_TEST(sp_parent_printed, MD_MIN2UNIT(MD_SID(mdp)))) {
+			if (meta_print_name(sp, msp->compnamep, nlpp, fname, fp,
+			    (options | PRINT_HEADER | PRINT_SUBDEVS),
+			    NULL, ep) != 0) {
+				return (-1);
+			}
+			BT_SET(sp_parent_printed, MD_MIN2UNIT(MD_SID(mdp)));
+		}
+	}
+	return (0);
+}
+
+/*
+ * **************************************************************************
+ *                     Watermark Manipulation Functions                     *
+ * **************************************************************************
+ */
+
+/*
+ * FUNCTION:	meta_sp_get_start()
+ * INPUT:	sp	- the operating set
+ *		np 	- device upon which the sp is being built
+ * OUTPUT:	ep	- return error pointer
+ * RETURNS:	daddr_t	- -1 if error, otherwise the start block
+ * PURPOSE:	Encapsulate the determination of the start block of the
+ *		device upon which the sp is built or being built.
+ *		This is done to hide the ugliness of the algorithm.  In
+ *		the case where a sp is being built upon a stripe of > 1
+ *		TB that is made up of a set of disks in which the first
+ *		has a VTOC label the result returned from the call to
+ *		metagetstart is incorrect.  The reason being that a > 1
+ *		TB metadevice will manufacture an EFI label in which the
+ *		start address is zero.  This is irrespective of the underlying
+ *		devices.  The long term fix for this is to fix
+ *		meta_efi_to_mdvtoc and meta_efi_to mdgeom so that they return
+ *		values that are indicative of the first underlying device in
+ *		metadevice.
+ */
+static diskaddr_t
+meta_sp_get_start(
+	mdsetname_t	*sp,
+	mdname_t	*np,
+	md_error_t	*ep
+)
+{
+	daddr_t		start_block;
+
+	if ((start_block = metagetstart(sp, np, ep)) != MD_DISKADDR_ERROR) {
+		start_block += MD_SP_START;
+		/*
+		 * In the case that the device upon which the sp is being
+		 * created is a metadevice then ensure that in the case that
+		 * the first underlying device has a vtoc label that it is
+		 * not overwritten with a watermark by setting the start block
+		 * to point just past the vtoc label
+		 */
+		if (start_block < VTOC_SIZE && metaismeta(np))
+			start_block = VTOC_SIZE;
+	}
+
+	return (start_block);
+}
+
+/*
+ * FUNCTION:	meta_sp_update_wm()
+ * INPUT:	sp	- the operating set
+ *		msp	- a pointer to the XDR unit structure
+ *		extlist	- the extent list specifying watermarks to update
+ * OUTPUT:	ep	- return error pointer
+ * RETURNS:	int	- -1 if error, 0 on success
+ * PURPOSE:	steps backwards through the extent list updating
+ *		watermarks for all extents with the EXTFLG_UPDATE flag
+ *		set.  Writing the watermarks guarantees consistency when
+ *		extents must be broken into pieces since the original
+ *		watermark will be the last to be updated, and will be
+ *		changed to point to a new watermark that is already
+ *		known to be consistent.  If one of the writes fails, the
+ *		original watermark stays intact and none of the changes
+ *		are realized.
+ */
+static int
+meta_sp_update_wm(
+	mdsetname_t	*sp,
+	md_sp_t		*msp,
+	sp_ext_node_t	*extlist,
+	md_error_t	*ep
+)
+{
+	sp_ext_node_t	*ext;
+	sp_ext_node_t	*tail;
+	mp_watermark_t	*wmp, *watermarks;
+	xsp_offset_t	*osp, *offsets;
+	int		update_count = 0;
+	int		rval = 0;
+	md_unit_t	*mdp;
+	md_sp_update_wm_t	update_params;
+
+	if (getenv(META_SP_DEBUG)) {
+		meta_sp_debug("meta_sp_update_wm: Updating watermarks:\n");
+		meta_sp_list_dump(extlist);
+	}
+
+	/*
+	 * find the last node so we can write the watermarks backwards
+	 * and count watermarks to update so we can allocate space
+	 */
+	for (ext = extlist; ext != NULL; ext = ext->ext_next) {
+		if ((ext->ext_flags & EXTFLG_UPDATE) != 0) {
+			update_count++;
+		}
+
+		if (ext->ext_next == NULL) {
+			tail = ext;
+		}
+	}
+	ext = tail;
+
+	wmp = watermarks =
+	    Zalloc(update_count * sizeof (mp_watermark_t));
+	osp = offsets =
+	    Zalloc(update_count * sizeof (sp_ext_offset_t));
+
+	while (ext != NULL) {
+		if ((ext->ext_flags & EXTFLG_UPDATE) != 0) {
+			/* update watermark */
+			wmp->wm_magic = MD_SP_MAGIC;
+			wmp->wm_version = MD_SP_VERSION;
+			wmp->wm_type = ext->ext_type;
+			wmp->wm_seq = ext->ext_seq;
+			wmp->wm_length = ext->ext_length - MD_SP_WMSIZE;
+
+			/* fill in the volume name and set name */
+			if (ext->ext_namep != NULL)
+				(void) strcpy(wmp->wm_mdname,
+				    ext->ext_namep->cname);
+			else
+				(void) strcpy(wmp->wm_mdname, MD_SP_FREEWMNAME);
+			if (ext->ext_setp != NULL &&
+			    ext->ext_setp->setno != MD_LOCAL_SET)
+				(void) strcpy(wmp->wm_setname,
+				    ext->ext_setp->setname);
+			else
+				(void) strcpy(wmp->wm_setname,
+				    MD_SP_LOCALSETNAME);
+
+			/* Generate the checksum */
+			wmp->wm_checksum = 0;
+			crcgen((uchar_t *)wmp, (uint_t *)&wmp->wm_checksum,
+			    sizeof (*wmp), NULL);
+
+			/* record the extent offset */
+			*osp = ext->ext_offset;
+
+			/* Advance the placeholders */
+			osp++; wmp++;
+		}
+		ext = ext->ext_prev;
+	}
+
+	mdp = meta_get_mdunit(sp, msp->common.namep, ep);
+	if (mdp == NULL) {
+		rval = -1;
+		goto out;
+	}
+
+	(void) memset(&update_params, 0, sizeof (update_params));
+	update_params.mnum = MD_SID(mdp);
+	update_params.count = update_count;
+	update_params.wmp = (uintptr_t)watermarks;
+	update_params.osp = (uintptr_t)offsets;
+	MD_SETDRIVERNAME(&update_params, MD_SP,
+	    MD_MIN2SET(update_params.mnum));
+
+	if (metaioctl(MD_IOC_SPUPDATEWM, &update_params,
+	    &update_params.mde, msp->common.namep->cname) != 0) {
+		(void) mdstealerror(ep, &update_params.mde);
+		rval = -1;
+		goto out;
+	}
+
+out:
+	Free(watermarks);
+	Free(offsets);
+
+	return (rval);
+}
+
+/*
+ * FUNCTION:	meta_sp_clear_wm()
+ * INPUT:	sp	- the operating set
+ *		msp	- the unit structure for the soft partition to clear
+ * OUTPUT:	ep	- return error pointer
+ * RETURNS:	int	- -1 if error, 0 on success
+ * PURPOSE:	steps through the extents for a soft partition unit and
+ *		creates an extent list designed to mark all of the
+ *		watermarks for those extents as free.  The extent list
+ *		is then passed to meta_sp_update_wm() to actually write
+ *		the watermarks out.
+ */
+static int
+meta_sp_clear_wm(
+	mdsetname_t	*sp,
+	md_sp_t		*msp,
+	md_error_t	*ep
+)
+{
+	sp_ext_node_t	*extlist = NULL;
+	int		numexts = msp->ext.ext_len;
+	uint_t		i;
+	int		rval = 0;
+
+	/* for each watermark must set the flag to SP_FREE */
+	for (i = 0; i < numexts; i++) {
+		md_sp_ext_t	*extp = &msp->ext.ext_val[i];
+
+		meta_sp_list_insert(NULL, NULL, &extlist,
+		    extp->poff - MD_SP_WMSIZE, extp->len + MD_SP_WMSIZE,
+		    EXTTYP_FREE, 0, EXTFLG_UPDATE, meta_sp_cmp_by_offset);
+	}
+
+	/* update watermarks */
+	rval = meta_sp_update_wm(sp, msp, extlist, ep);
+
+	meta_sp_list_free(&extlist);
+	return (rval);
+}
+
+/*
+ * FUNCTION:	meta_sp_read_wm()
+ * INPUT:	sp	- setname for component
+ *		compnp	- mdname_t for component
+ *		offset	- the offset of the watermark to read (sectors)
+ * OUTPUT:	wm	- the watermark structure to read into
+ *		ep	- return error pointer
+ * RETURNS:	int	- -1 if error, 0 on success
+ * PURPOSE:	seeks out to the requested offset and reads a watermark.
+ *		It then verifies that the magic number is correct and
+ *		that the checksum is valid, returning an error if either
+ *		is wrong.
+ */
+static int
+meta_sp_read_wm(
+	mdsetname_t	*sp,
+	mdname_t	*compnp,
+	mp_watermark_t	*wm,
+	sp_ext_offset_t	offset,
+	md_error_t	*ep
+)
+{
+	md_sp_read_wm_t	read_params;
+
+	/*
+	 * make sure block offset does not overflow 2^64 bytes and it's a
+	 * multiple of the block size.
+	 */
+	assert(offset <= (1LL << (64 - DEV_BSHIFT)));
+	/* LINTED */
+	assert((sizeof (*wm) % DEV_BSIZE) == 0);
+
+	(void) memset(wm, 0, sizeof (*wm));
+
+	(void) memset(&read_params, 0, sizeof (read_params));
+	read_params.rdev = compnp->dev;
+	read_params.wmp = (uintptr_t)wm;
+	read_params.offset = offset;
+	MD_SETDRIVERNAME(&read_params, MD_SP, sp->setno);
+
+	if (metaioctl(MD_IOC_SPREADWM, &read_params,
+	    &read_params.mde, compnp->cname) != 0) {
+
+		(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
+		    "Extent header read failed, block %llu.\n"), offset);
+		return (mdstealerror(ep, &read_params.mde));
+	}
+
+	/* make sure magic number is correct */
+	if (wm->wm_magic != MD_SP_MAGIC) {
+		(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
+		    "found incorrect magic number %x, expected %x.\n"),
+		    wm->wm_magic, MD_SP_MAGIC);
+		/*
+		 * Pass NULL for the device name as we don't have
+		 * valid watermark contents.
+		 */
+		return (mdmderror(ep, MDE_SP_BADWMMAGIC, 0, NULL));
+	}
+
+	if (crcchk((uchar_t *)wm, (uint_t *)&wm->wm_checksum,
+	    sizeof (*wm), NULL)) {
+		(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
+		    "found incorrect checksum %x.\n"),
+		    wm->wm_checksum);
+		return (mdmderror(ep, MDE_SP_BADWMCRC, 0, wm->wm_mdname));
+	}
+
+	return (0);
+}
+
+/*
+ * **************************************************************************
+ *                  Query Functions
+ * **************************************************************************
+ */
+
+/*
+ * IMPORTANT NOTE: This is a static function that assumes that
+ *		   its input parameters have been checked and
+ *		   have valid values that lie within acceptable
+ *		   ranges.
+ *
+ * FUNCTION:	meta_sp_enough_space()
+ * INPUT:	desired_number_of_sps - the number of soft partitions desired;
+ *					must be > 0
+ *		desired_sp_size - the desired soft partition size in blocks;
+ *				  must be > 0
+ *		extent_listpp - a reference to a reference to an extent
+ *				list that lists the extents on a device;
+ *				must be a reference to a reference to a
+ *				valid extent list
+ *		alignment - the desired data space alignment for the sp's
+ * OUTPUT:	boolean_t return value
+ * RETURNS:	boolean_t - B_TRUE if there's enough space in the extent
+ *			    list to create the desired soft partitions,
+ *			    B_FALSE if there's not enough space
+ * PURPOSE:	determines whether there's enough free space in an extent
+ *		list to allow creation of a set of soft partitions
+ */
+static boolean_t
+meta_sp_enough_space(
+	int		desired_number_of_sps,
+	blkcnt_t	desired_sp_size,
+	sp_ext_node_t	**extent_listpp,
+	sp_ext_length_t	alignment
+)
+{
+	boolean_t		enough_space;
+	int			number_of_sps;
+	int			number_of_extents_used;
+	sp_ext_length_t		desired_ext_length = desired_sp_size;
+
+	enough_space = B_TRUE;
+	number_of_sps = 0;
+	while ((enough_space == B_TRUE) &&
+		(number_of_sps < desired_number_of_sps)) {
+		/*
+		 * Use the extent allocation algorithm implemented by
+		 * meta_sp_alloc_by_len() to test whether the free
+		 * extents in the extent list referenced by *extent_listpp
+		 * contain enough space to accomodate a soft partition
+		 * of size desired_ext_length.
+		 *
+		 * Repeat the test <desired_number_of_sps> times
+		 * or until it fails, whichever comes first,
+		 * each time allocating the extents required to
+		 * create the soft partition without actually
+		 * creating the soft partition.
+		 */
+		number_of_extents_used = meta_sp_alloc_by_len(
+						TEST_SETNAMEP,
+						TEST_SOFT_PARTITION_NAMEP,
+						extent_listpp,
+						&desired_ext_length,
+						NO_OFFSET,
+						alignment);
+		if (number_of_extents_used == -1) {
+			enough_space = B_FALSE;
+		} else {
+			number_of_sps++;
+		}
+	}
+	return (enough_space);
+}
+
+/*
+ * IMPORTANT NOTE: This is a static function that calls other functions
+ *		   that check its mdsetnamep and device_mdnamep
+ *		   input parameters, but expects extent_listpp to
+ *		   be a initialized to a valid address to which
+ *		   it can write a reference to the extent list that
+ *		   it creates.
+ *
+ * FUNCTION:	meta_sp_get_extent_list()
+ * INPUT:	mdsetnamep - a reference to the mdsetname_t structure
+ *			     for the set containing the device for
+ *			     which the extents are to be listed
+ *		device_mdnamep - a reference to the mdname_t structure
+ *				 for the device for which the extents
+ *				 are to be listed
+ * OUTPUT:	*extent_listpp - a reference to the extent list for
+ *				 the device; NULL if the function fails
+ *		*ep - the libmeta error encountered, if any
+ * RETURNS:	boolean_t - B_TRUE if the function call was successful,
+ *			    B_FALSE if not
+ * PURPOSE:	gets the extent list for a device
+ */
+static boolean_t
+meta_sp_get_extent_list(
+	mdsetname_t	*mdsetnamep,
+	mdname_t	*device_mdnamep,
+	sp_ext_node_t	**extent_listpp,
+	md_error_t	*ep
+)
+{
+	diskaddr_t		device_size_in_blocks;
+	mdnamelist_t		*sp_name_listp;
+	diskaddr_t		start_block_address_in_blocks;
+
+	*extent_listpp = NULL;
+	sp_name_listp = NULL;
+
+	start_block_address_in_blocks = meta_sp_get_start(mdsetnamep,
+						device_mdnamep,
+						ep);
+	if (start_block_address_in_blocks == MD_DISKADDR_ERROR) {
+	    if (getenv(META_SP_DEBUG)) {
+		mde_perror(ep, "meta_sp_get_extent_list:meta_sp_get_start");
+	    }
+	    return (B_FALSE);
+	}
+
+	device_size_in_blocks = metagetsize(device_mdnamep, ep);
+	if (device_size_in_blocks == MD_DISKADDR_ERROR) {
+	    if (getenv(META_SP_DEBUG)) {
+		mde_perror(ep,
+		    "meta_sp_get_extent_list:metagetsize");
+	    }
+	    return (B_FALSE);
+	}
+
+	/*
+	 * Sanity check: the start block will have skipped an integer
+	 * number of cylinders, C.  C will usually be zero.  If (C > 0),
+	 * and the disk slice happens to only be C cylinders in total
+	 * size, we'll fail this check.
+	 */
+	if (device_size_in_blocks <=
+	    (start_block_address_in_blocks + MD_SP_WMSIZE)) {
+	    (void) mdmderror(ep, MDE_SP_NOSPACE, 0, device_mdnamep->cname);
+	    return (B_FALSE);
+	}
+
+	/*
+	 * After this point, we will have allocated resources, so any
+	 * failure returns must be through the supplied "fail" label
+	 * to properly deallocate things.
+	 */
+
+	/*
+	 * Create an empty extent list that starts one watermark past
+	 * the start block of the device and ends one watermark before
+	 * the end of the device.
+	 */
+	meta_sp_list_insert(TEST_SETNAMEP,
+			    TEST_SOFT_PARTITION_NAMEP,
+			    extent_listpp,
+			    NO_OFFSET,
+			    (sp_ext_length_t)start_block_address_in_blocks,
+			    EXTTYP_RESERVED,
+			    NO_SEQUENCE_NUMBER,
+			    NO_FLAGS,
+			    meta_sp_cmp_by_offset);
+	meta_sp_list_insert(TEST_SETNAMEP,
+			    TEST_SOFT_PARTITION_NAMEP,
+			    extent_listpp,
+			    (sp_ext_offset_t)(device_size_in_blocks -
+				MD_SP_WMSIZE),
+			    MD_SP_WMSIZE,
+			    EXTTYP_END,
+			    NO_SEQUENCE_NUMBER,
+			    NO_FLAGS,
+			    meta_sp_cmp_by_offset);
+
+	/*
+	 * Get the list of soft partitions that are already on the
+	 * device.
+	 */
+	if (meta_sp_get_by_component(mdsetnamep, device_mdnamep,
+	    &sp_name_listp, FORCE_RELOAD_CACHE, ep) < 1) {
+		if (getenv(META_SP_DEBUG)) {
+			mde_perror(ep,
+			    "meta_sp_get_extent_list:meta_sp_get_by_component");
+		}
+		goto fail;
+	}
+
+	if (sp_name_listp != NULL) {
+		/*
+		 * If there are soft partitions on the device, add the
+		 * extents used in them to the extent list.
+		 */
+		if (meta_sp_extlist_from_namelist(mdsetnamep, sp_name_listp,
+		    extent_listpp, ep) == -1) {
+			if (getenv(META_SP_DEBUG)) {
+				mde_perror(ep, "meta_sp_get_extent_list:"
+				    "meta_sp_extlist_from_namelist");
+			}
+			goto fail;
+		}
+		metafreenamelist(sp_name_listp);
+	}
+
+	/*
+	 * Add free extents to the extent list to represent
+	 * the remaining regions of free space on the
+	 * device.
+	 */
+	meta_sp_list_freefill(extent_listpp, device_size_in_blocks);
+	return (B_TRUE);
+
+fail:
+	if (sp_name_listp != NULL) {
+		metafreenamelist(sp_name_listp);
+	}
+
+	if (*extent_listpp != NULL) {
+		/*
+		 * meta_sp_list_free sets *extent_listpp to NULL.
+		 */
+		meta_sp_list_free(extent_listpp);
+	}
+	return (B_FALSE);
+}
+
+/*
+ * IMPORTANT NOTE: This is a static function that calls other functions
+ *		   that check its mdsetnamep and mddrivenamep
+ *		   input parameters, but expects extent_listpp to
+ *		   be a initialized to a valid address to which
+ *		   it can write a reference to the extent list that
+ *		   it creates.
+ *
+ * FUNCTION:	meta_sp_get_extent_list_for_drive()
+ * INPUT:	mdsetnamep - a reference to the mdsetname_t structure
+ *			     for the set containing the drive for
+ *			     which the extents are to be listed
+ *		mddrivenamep   - a reference to the mddrivename_t structure
+ *				 for the drive for which the extents
+ *				 are to be listed
+ * OUTPUT:	*extent_listpp - a reference to the extent list for
+ *				 the drive; NULL if the function fails
+ * RETURNS:	boolean_t - B_TRUE if the function call was successful,
+ *			    B_FALSE if not
+ * PURPOSE:	gets the extent list for a drive when the entire drive
+ *		is to be soft partitioned
+ */
+static boolean_t
+meta_sp_get_extent_list_for_drive(
+	mdsetname_t	*mdsetnamep,
+	mddrivename_t	*mddrivenamep,
+	sp_ext_node_t	**extent_listpp
+)
+{
+	boolean_t		can_use;
+	diskaddr_t		free_space;
+	md_error_t		mderror;
+	mdvtoc_t		proposed_vtoc;
+	int			repartition_options;
+	int			return_value;
+	md_sp_t			test_sp_struct;
+
+	can_use = B_TRUE;
+	*extent_listpp = NULL;
+	mderror = mdnullerror;
+	test_sp_struct.compnamep = metaslicename(mddrivenamep, MD_SLICE0,
+					&mderror);
+	if (test_sp_struct.compnamep == NULL) {
+		can_use = B_FALSE;
+	}
+
+	if (can_use == B_TRUE) {
+		mderror = mdnullerror;
+		repartition_options = 0;
+		return_value = meta_check_sp(mdsetnamep, &test_sp_struct,
+				MDCMD_USE_WHOLE_DISK, &repartition_options,
+				&mderror);
+		if (return_value != 0) {
+			can_use = B_FALSE;
+		}
+	}
+
+	if (can_use == B_TRUE) {
+		mderror = mdnullerror;
+		repartition_options = repartition_options |
+			(MD_REPART_FORCE | MD_REPART_DONT_LABEL);
+		return_value = meta_repartition_drive(mdsetnamep, mddrivenamep,
+				repartition_options, &proposed_vtoc, &mderror);
+		if (return_value != 0) {
+			can_use = B_FALSE;
+		}
+	}
+
+	if (can_use == B_TRUE) {
+		free_space = proposed_vtoc.parts[MD_SLICE0].size;
+		if (free_space <= (MD_SP_START + MD_SP_WMSIZE)) {
+			can_use = B_FALSE;
+		}
+	}
+
+	if (can_use == B_TRUE) {
+		/*
+		 * Create an extent list that starts with
+		 * a reserved extent that ends at the start
+		 * of the usable space on slice zero of the
+		 * proposed VTOC, ends with an extent that
+		 * reserves space for a watermark at the end
+		 * of slice zero, and contains a single free
+		 * extent that occupies the rest of the space
+		 * on the slice.
+		 *
+		 * NOTE:
+		 *
+		 * Don't use metagetstart() or metagetsize() to
+		 * find the usable space.  They query the mdname_t
+		 * structure that represents an actual device to
+		 * determine the amount of space on the device that
+		 * contains metadata and the total amount of space
+		 * on the device.  Since this function creates a
+		 * proposed extent list that doesn't reflect the
+		 * state of an actual device, there's no mdname_t
+		 * structure to be queried.
+		 *
+		 * When a drive is reformatted to prepare for
+		 * soft partitioning, all of slice seven is
+		 * reserved for metadata, all of slice zero is
+		 * available for soft partitioning, and all other
+		 * slices on the drive are empty.  The proposed
+		 * extent list for the drive therefore contains
+		 * only three extents: a reserved extent that ends
+		 * at the start of the usable space on slice zero,
+		 * a single free extent that occupies all the usable
+		 * space on slice zero, and an ending extent that
+		 * reserves space for a watermark at the end of
+		 * slice zero.
+		 */
+		meta_sp_list_insert(TEST_SETNAMEP,
+			TEST_SOFT_PARTITION_NAMEP,
+			extent_listpp,
+			NO_OFFSET,
+			(sp_ext_length_t)(MD_SP_START),
+			EXTTYP_RESERVED,
+			NO_SEQUENCE_NUMBER,
+			NO_FLAGS,
+			meta_sp_cmp_by_offset);
+		meta_sp_list_insert(TEST_SETNAMEP,
+			TEST_SOFT_PARTITION_NAMEP,
+			extent_listpp,
+			(sp_ext_offset_t)(free_space - MD_SP_WMSIZE),
+			MD_SP_WMSIZE,
+			EXTTYP_END,
+			NO_SEQUENCE_NUMBER,
+			NO_FLAGS,
+			meta_sp_cmp_by_offset);
+		meta_sp_list_freefill(extent_listpp, free_space);
+	}
+	return (can_use);
+}
+
+/*
+ * FUNCTION:	meta_sp_can_create_sps()
+ * INPUT:	mdsetnamep - a reference to the mdsetname_t structure
+ *			     for the set containing the device for
+ *			     which the extents are to be listed
+ *		mdnamep - a reference to the mdname_t of the device
+ *			  on which the soft parititions are to be created
+ *		number_of_sps - the desired number of soft partitions
+ *		sp_size - the desired soft partition size
+ * OUTPUT:	boolean_t return value
+ * RETURNS:	boolean_t - B_TRUE if the soft partitionns can be created,
+ *			    B_FALSE if not
+ * PURPOSE:	determines whether a set of soft partitions can be created
+ *		on a device
+ */
+boolean_t
+meta_sp_can_create_sps(
+	mdsetname_t	*mdsetnamep,
+	mdname_t	*mdnamep,
+	int		number_of_sps,
+	blkcnt_t	sp_size
+)
+{
+	sp_ext_node_t	*extent_listp;
+	boolean_t	succeeded;
+	md_error_t	mde;
+
+	if ((number_of_sps > 0) && (sp_size > 0)) {
+		succeeded = meta_sp_get_extent_list(mdsetnamep, mdnamep,
+						    &extent_listp, &mde);
+	} else {
+		succeeded = B_FALSE;
+	}
+
+	/*
+	 * We don't really care about an error return from the
+	 * alignment call; that will just result in passing zero,
+	 * which will be interpreted as no alignment.
+	 */
+
+	if (succeeded == B_TRUE) {
+		succeeded = meta_sp_enough_space(number_of_sps,
+		    sp_size, &extent_listp,
+		    meta_sp_get_default_alignment(mdsetnamep, mdnamep, &mde));
+		meta_sp_list_free(&extent_listp);
+	}
+	return (succeeded);
+}
+
+/*
+ * FUNCTION:	meta_sp_can_create_sps_on_drive()
+ * INPUT:	mdsetnamep - a reference to the mdsetname_t structure
+ *			     for the set containing the drive for
+ *			     which the extents are to be listed
+ *		mddrivenamep - a reference to the mddrivename_t of the drive
+ *			       on which the soft parititions are to be created
+ *		number_of_sps - the desired number of soft partitions
+ *		sp_size - the desired soft partition size
+ * OUTPUT:	boolean_t return value
+ * RETURNS:	boolean_t - B_TRUE if the soft partitionns can be created,
+ *			    B_FALSE if not
+ * PURPOSE:	determines whether a set of soft partitions can be created
+ *		on a drive if the entire drive is soft partitioned
+ */
+boolean_t
+meta_sp_can_create_sps_on_drive(
+	mdsetname_t	*mdsetnamep,
+	mddrivename_t	*mddrivenamep,
+	int		number_of_sps,
+	blkcnt_t	sp_size
+)
+{
+	sp_ext_node_t	*extent_listp;
+	boolean_t	succeeded;
+
+	if ((number_of_sps > 0) && (sp_size > 0)) {
+		succeeded = meta_sp_get_extent_list_for_drive(mdsetnamep,
+							mddrivenamep,
+							&extent_listp);
+	} else {
+		succeeded = B_FALSE;
+	}
+
+	/*
+	 * We don't care about alignment on the space call because
+	 * we're specifically dealing with a drive, which will have no
+	 * inherent alignment.
+	 */
+
+	if (succeeded == B_TRUE) {
+		succeeded = meta_sp_enough_space(number_of_sps, sp_size,
+		    &extent_listp, SP_UNALIGNED);
+		meta_sp_list_free(&extent_listp);
+	}
+	return (succeeded);
+}
+
+/*
+ * FUNCTION:	meta_sp_get_free_space()
+ * INPUT:	mdsetnamep - a reference to the mdsetname_t structure
+ *			     for the set containing the device for
+ *			     which the free space is to be returned
+ *		mdnamep - a reference to the mdname_t of the device
+ *			  for which the free space is to be returned
+ * OUTPUT:	blkcnt_t return value
+ * RETURNS:	blkcnt_t - the number of blocks of free space on the device
+ * PURPOSE:	returns the number of blocks of free space on a device
+ */
+blkcnt_t
+meta_sp_get_free_space(
+	mdsetname_t	*mdsetnamep,
+	mdname_t	*mdnamep
+)
+{
+	sp_ext_node_t		*extent_listp;
+	sp_ext_length_t		free_blocks;
+	boolean_t		succeeded;
+	md_error_t		mde;
+
+	extent_listp = NULL;
+	free_blocks = 0;
+	succeeded = meta_sp_get_extent_list(mdsetnamep, mdnamep,
+					    &extent_listp, &mde);
+	if (succeeded == B_TRUE) {
+		free_blocks = meta_sp_list_size(extent_listp,
+		    EXTTYP_FREE, INCLUDE_WM);
+		meta_sp_list_free(&extent_listp);
+		if (free_blocks > (10 * MD_SP_WMSIZE)) {
+			/*
+			 * Subtract a safety margin for watermarks when
+			 * computing the number of blocks available for
+			 * use.  The actual number of watermarks can't
+			 * be calculated without knowing the exact numbers
+			 * and sizes of both the free extents and the soft
+			 * partitions to be created.  The calculation is
+			 * highly complex and error-prone even if those
+			 * quantities are known.  The approximate value
+			 * 10 * MD_SP_WMSIZE is within a few blocks of the
+			 * correct value in all practical cases.
+			 */
+			free_blocks = free_blocks - (10 * MD_SP_WMSIZE);
+		} else {
+			free_blocks = 0;
+		}
+	} else {
+	    mdclrerror(&mde);
+	}
+
+	return (free_blocks);
+}
+
+/*
+ * FUNCTION:	meta_sp_get_free_space_on_drive()
+ * INPUT:	mdsetnamep - a reference to the mdsetname_t structure
+ *			     for the set containing the drive for
+ *			     which the free space is to be returned
+ *		mddrivenamep - a reference to the mddrivename_t of the drive
+ *			       for which the free space is to be returned
+ * OUTPUT:	blkcnt_t return value
+ * RETURNS:	blkcnt_t - the number of blocks of free space on the drive
+ * PURPOSE:	returns the number of blocks of space usable for soft
+ *		partitions on an entire drive, if the entire drive is
+ *		soft partitioned
+ */
+blkcnt_t
+meta_sp_get_free_space_on_drive(
+	mdsetname_t	*mdsetnamep,
+	mddrivename_t	*mddrivenamep
+)
+{
+	sp_ext_node_t		*extent_listp;
+	sp_ext_length_t		free_blocks;
+	boolean_t		succeeded;
+
+	extent_listp = NULL;
+	free_blocks = 0;
+	succeeded = meta_sp_get_extent_list_for_drive(mdsetnamep,
+			mddrivenamep, &extent_listp);
+	if (succeeded == B_TRUE) {
+		free_blocks = meta_sp_list_size(extent_listp,
+		    EXTTYP_FREE, INCLUDE_WM);
+		meta_sp_list_free(&extent_listp);
+		if (free_blocks > (10 * MD_SP_WMSIZE)) {
+			/*
+			 * Subtract a safety margin for watermarks when
+			 * computing the number of blocks available for
+			 * use.  The actual number of watermarks can't
+			 * be calculated without knowing the exact numbers
+			 * and sizes of both the free extents and the soft
+			 * partitions to be created.  The calculation is
+			 * highly complex and error-prone even if those
+			 * quantities are known.  The approximate value
+			 * 10 * MD_SP_WMSIZE is within a few blocks of the
+			 * correct value in all practical cases.
+			 */
+			free_blocks = free_blocks - (10 * MD_SP_WMSIZE);
+		} else {
+			free_blocks = 0;
+		}
+	}
+	return (free_blocks);
+}
+
+/*
+ * FUNCTION:	meta_sp_get_number_of_possible_sps()
+ * INPUT:	mdsetnamep - a reference to the mdsetname_t structure
+ *			     for the set containing the device for
+ *			     which the number of possible soft partitions
+ *			     is to be returned
+ *		mdnamep - a reference to the mdname_t of the device
+ *			  for which the number of possible soft partitions
+ *			  is to be returned
+ * OUTPUT:	int return value
+ * RETURNS:	int - the number of soft partitions of the desired size
+ *		      that can be created on the device
+ * PURPOSE:	returns the number of soft partitions of a given size
+ *		that can be created on a device
+ */
+int
+meta_sp_get_number_of_possible_sps(
+	mdsetname_t	*mdsetnamep,
+	mdname_t	*mdnamep,
+	blkcnt_t	sp_size
+)
+{
+	sp_ext_node_t	*extent_listp;
+	int		number_of_possible_sps;
+	boolean_t	succeeded;
+	md_error_t	mde;
+	sp_ext_length_t	alignment;
+
+	extent_listp = NULL;
+	number_of_possible_sps = 0;
+	if (sp_size > 0) {
+	    if ((succeeded = meta_sp_get_extent_list(mdsetnamep,
+		mdnamep, &extent_listp, &mde)) == B_FALSE)
+		mdclrerror(&mde);
+	} else {
+		succeeded = B_FALSE;
+	}
+
+	if (succeeded == B_TRUE) {
+		alignment = meta_sp_get_default_alignment(mdsetnamep,
+		    mdnamep, &mde);
+	}
+
+	while (succeeded == B_TRUE) {
+		/*
+		 * Keep allocating space from the extent list
+		 * for soft partitions of the desired size until
+		 * there's not enough free space left in the list
+		 * for another soft partiition of that size.
+		 * Add one to the number of possible soft partitions
+		 * for each soft partition for which there is
+		 * enough free space left.
+		 */
+		succeeded = meta_sp_enough_space(ONE_SOFT_PARTITION,
+		    sp_size, &extent_listp, alignment);
+		if (succeeded == B_TRUE) {
+			number_of_possible_sps++;
+		}
+	}
+	if (extent_listp != NULL) {
+		meta_sp_list_free(&extent_listp);
+	}
+	return (number_of_possible_sps);
+}
+
+/*
+ * FUNCTION:	meta_sp_get_number_of_possible_sps_on_drive()
+ * INPUT:	mdsetnamep - a reference to the mdsetname_t structure
+ *			     for the set containing the drive for
+ *			     which the number of possible soft partitions
+ *			     is to be returned
+ *		mddrivenamep - a reference to the mddrivename_t of the drive
+ *			       for which the number of possible soft partitions
+ *			       is to be returned
+ *		sp_size - the size in blocks of the proposed soft partitions
+ * OUTPUT:	int return value
+ * RETURNS:	int - the number of soft partitions of the desired size
+ *		      that can be created on the drive
+ * PURPOSE:	returns the number of soft partitions of a given size
+ *		that can be created on a drive, if the entire drive is
+ *		soft partitioned
+ */
+int
+meta_sp_get_number_of_possible_sps_on_drive(
+	mdsetname_t	*mdsetnamep,
+	mddrivename_t	*mddrivenamep,
+	blkcnt_t	sp_size
+)
+{
+	sp_ext_node_t	*extent_listp;
+	int		number_of_possible_sps;
+	boolean_t	succeeded;
+
+	extent_listp = NULL;
+	number_of_possible_sps = 0;
+	if (sp_size > 0) {
+		succeeded = meta_sp_get_extent_list_for_drive(mdsetnamep,
+					mddrivenamep, &extent_listp);
+	} else {
+		succeeded = B_FALSE;
+	}
+	while (succeeded == B_TRUE) {
+		/*
+		 * Keep allocating space from the extent list
+		 * for soft partitions of the desired size until
+		 * there's not enough free space left in the list
+		 * for another soft partition of that size.
+		 * Add one to the number of possible soft partitions
+		 * for each soft partition for which there is
+		 * enough free space left.
+		 *
+		 * Since it's a drive, not a metadevice, make no
+		 * assumptions about alignment.
+		 */
+		succeeded = meta_sp_enough_space(ONE_SOFT_PARTITION,
+		    sp_size, &extent_listp, SP_UNALIGNED);
+		if (succeeded == B_TRUE) {
+			number_of_possible_sps++;
+		}
+	}
+	if (extent_listp != NULL) {
+		meta_sp_list_free(&extent_listp);
+	}
+	return (number_of_possible_sps);
+}
+
+/*
+ * FUNCTION:	meta_sp_get_possible_sp_size()
+ * INPUT:	mdsetnamep - a reference to the mdsetname_t structure
+ *			     for the set containing the device for
+ *			     which the possible soft partition size
+ *			     is to be returned
+ *		mdnamep - a reference to the mdname_t of the device
+ *			  for which the possible soft partition size
+ *			  is to be returned
+ *		number_of_sps - the desired number of soft partitions
+ * OUTPUT:	blkcnt_t return value
+ * RETURNS:	blkcnt_t - the possible soft partition size in blocks
+ * PURPOSE:	returns the maximum possible size of each of a given number of
+ *		soft partitions of equal size that can be created on a device
+ */
+blkcnt_t
+meta_sp_get_possible_sp_size(
+	mdsetname_t	*mdsetnamep,
+	mdname_t	*mdnamep,
+	int		number_of_sps
+)
+{
+	blkcnt_t	free_blocks;
+	blkcnt_t	sp_size;
+	boolean_t	succeeded;
+
+	sp_size = 0;
+	if (number_of_sps > 0) {
+		free_blocks = meta_sp_get_free_space(mdsetnamep, mdnamep);
+		sp_size = free_blocks / number_of_sps;
+		succeeded = meta_sp_can_create_sps(mdsetnamep, mdnamep,
+						number_of_sps, sp_size);
+		while ((succeeded == B_FALSE) && (sp_size > 0)) {
+			/*
+			 * To compensate for space that may have been
+			 * occupied by watermarks, reduce sp_size by a
+			 * number of blocks equal to the number of soft
+			 * partitions desired, and test again to see
+			 * whether the desired number of soft partitions
+			 * can be created.
+			 */
+			sp_size = sp_size - ((blkcnt_t)number_of_sps);
+			succeeded = meta_sp_can_create_sps(mdsetnamep, mdnamep,
+							number_of_sps, sp_size);
+		}
+		if (sp_size < 0) {
+			sp_size = 0;
+		}
+	}
+	return (sp_size);
+}
+
+/*
+ * FUNCTION:	meta_sp_get_possible_sp_size_on_drive()
+ * INPUT:	mdsetnamep - a reference to the mdsetname_t structure
+ *			     for the set containing the drive for
+ *			     which the possible soft partition size
+ *			     is to be returned
+ *		mddrivenamep - a reference to the mddrivename_t of the drive
+ *			       for which the possible soft partition size
+ *			       is to be returned
+ *		number_of_sps - the desired number of soft partitions
+ * OUTPUT:	blkcnt_t return value
+ * RETURNS:	blkcnt_t - the possible soft partition size in blocks
+ * PURPOSE:	returns the maximum possible size of each of a given number of
+ *		soft partitions of equal size that can be created on a drive
+ *              if the entire drive is soft partitioned
+ */
+blkcnt_t
+meta_sp_get_possible_sp_size_on_drive(
+	mdsetname_t	*mdsetnamep,
+	mddrivename_t	*mddrivenamep,
+	int		number_of_sps
+)
+{
+	blkcnt_t	free_blocks;
+	blkcnt_t	sp_size;
+	boolean_t	succeeded;
+
+	sp_size = 0;
+	if (number_of_sps > 0) {
+		free_blocks = meta_sp_get_free_space_on_drive(mdsetnamep,
+								mddrivenamep);
+		sp_size = free_blocks / number_of_sps;
+		succeeded = meta_sp_can_create_sps_on_drive(mdsetnamep,
+						mddrivenamep,
+						number_of_sps, sp_size);
+		while ((succeeded == B_FALSE) && (sp_size > 0)) {
+			/*
+			 * To compensate for space that may have been
+			 * occupied by watermarks, reduce sp_size by a
+			 * number of blocks equal to the number of soft
+			 * partitions desired, and test again to see
+			 * whether the desired number of soft partitions
+			 * can be created.
+			 */
+			sp_size = sp_size - ((blkcnt_t)number_of_sps);
+			succeeded = meta_sp_can_create_sps_on_drive(mdsetnamep,
+							mddrivenamep,
+							number_of_sps, sp_size);
+		}
+		if (sp_size < 0) {
+			sp_size = 0;
+		}
+	}
+	return (sp_size);
+}
+
+/*
+ * **************************************************************************
+ *                  Unit Structure Manipulation Functions                   *
+ * **************************************************************************
+ */
+
+/*
+ * FUNCTION:	meta_sp_fillextarray()
+ * INPUT:	mp	- the unit structure to fill
+ *		extlist	- the list of extents to fill with
+ * OUTPUT:	none
+ * RETURNS:	void
+ * PURPOSE:	fills in the unit structure extent list with the extents
+ *		specified by extlist.  Only extents in extlist with the
+ *		EXTFLG_UPDATE flag are changed in the unit structure,
+ *		and the index into the unit structure is the sequence
+ *		number in the extent list.  After all of the nodes have
+ *		been updated the virtual offsets in the unit structure
+ *		are updated to reflect the new lengths.
+ */
+static void
+meta_sp_fillextarray(
+	mp_unit_t	*mp,
+	sp_ext_node_t	*extlist
+)
+{
+	int	i;
+	sp_ext_node_t	*ext;
+	sp_ext_offset_t	curvoff = 0LL;
+
+	assert(mp != NULL);
+
+	/* go through the allocation list and fill in our unit structure */
+	for (ext = extlist; ext != NULL; ext = ext->ext_next) {
+		if ((ext->ext_type == EXTTYP_ALLOC) &&
+		    (ext->ext_flags & EXTFLG_UPDATE) != 0) {
+			mp->un_ext[ext->ext_seq].un_poff =
+			    ext->ext_offset + MD_SP_WMSIZE;
+			mp->un_ext[ext->ext_seq].un_len =
+			    ext->ext_length - MD_SP_WMSIZE;
+		}
+	}
+
+	for (i = 0; i < mp->un_numexts; i++) {
+		assert(mp->un_ext[i].un_poff != 0);
+		assert(mp->un_ext[i].un_len  != 0);
+		mp->un_ext[i].un_voff = curvoff;
+		curvoff += mp->un_ext[i].un_len;
+	}
+}
+
+/*
+ * FUNCTION:	meta_sp_createunit()
+ * INPUT:	np	- the name of the device to create a unit structure for
+ *		compnp	- the name of the device the soft partition is on
+ *		extlist	- the extent list to populate the new unit with
+ *		numexts	- the number of extents in the extent list
+ *		len	- the total size of the soft partition (sectors)
+ *		status	- the initial status of the unit structure
+ * OUTPUT:	ep	- return error pointer
+ * RETURNS:	mp_unit_t * - the new unit structure.
+ * PURPOSE:	allocates and fills in a new soft partition unit
+ *		structure to be passed to the soft partitioning driver
+ *		for creation.
+ */
+static mp_unit_t *
+meta_sp_createunit(
+	mdname_t	*np,
+	mdname_t	*compnp,
+	sp_ext_node_t	*extlist,
+	int		numexts,
+	sp_ext_length_t	len,
+	sp_status_t	status,
+	md_error_t	*ep
+)
+{
+	mp_unit_t	*mp;
+	uint_t		ms_size;
+
+	ms_size = (sizeof (*mp) - sizeof (mp->un_ext[0])) +
+	    (numexts * sizeof (mp->un_ext[0]));
+
+	mp = Zalloc(ms_size);
+
+	/* fill in fields in common unit structure */
+	mp->c.un_type = MD_METASP;
+	mp->c.un_size = ms_size;
+	MD_SID(mp) = meta_getminor(np->dev);
+	mp->c.un_total_blocks = len;
+	mp->c.un_actual_tb = len;
+
+	/* set up geometry */
+	(void) meta_sp_setgeom(np, compnp, mp, ep);
+
+	/* if we're building on metadevice we can't parent */
+	if (metaismeta(compnp))
+		MD_CAPAB(mp) = MD_CANT_PARENT;
+	else
+		MD_CAPAB(mp) = MD_CAN_PARENT;
+
+	/* fill soft partition-specific fields */
+	mp->un_dev = compnp->dev;
+	mp->un_key = compnp->key;
+
+	/* mdname_t start_blk field is not 64-bit! */
+	mp->un_start_blk = (sp_ext_offset_t)compnp->start_blk;
+	mp->un_status = status;
+	mp->un_numexts = numexts;
+	mp->un_length = len;
+
+	/* fill in the extent array */
+	meta_sp_fillextarray(mp, extlist);
+
+	return (mp);
+}
+
+/*
+ * FUNCTION:	meta_sp_updateunit()
+ * INPUT:	np       - name structure for the metadevice being updated
+ *		old_un	 - the original unit structure that is being updated
+ *		extlist	 - the extent list to populate the new unit with
+ *		grow_len - the amount by which the partition is being grown
+ *		numexts	 - the number of extents in the extent list
+ *		ep       - return error pointer
+ * OUTPUT:	none
+ * RETURNS:	mp_unit_t * - the updated unit structure
+ * PURPOSE:	allocates and fills in a new soft partition unit structure to
+ *		be passed to the soft partitioning driver for creation.  The
+ *		old unit structure is first copied in, and then the updated
+ *		extents are changed in the new unit structure.  This is
+ *		typically used when the size of an existing unit is changed.
+ */
+static mp_unit_t *
+meta_sp_updateunit(
+	mdname_t	*np,
+	mp_unit_t	*old_un,
+	sp_ext_node_t	*extlist,
+	sp_ext_length_t	grow_len,
+	int		numexts,
+	md_error_t	*ep
+)
+{
+	mp_unit_t	*new_un;
+	sp_ext_length_t	new_len;
+	uint_t		new_size;
+
+	assert(old_un != NULL);
+	assert(extlist != NULL);
+
+	/* allocate new unit structure and copy in old unit */
+	new_size = (sizeof (*old_un) - sizeof (old_un->un_ext[0])) +
+	    ((old_un->un_numexts + numexts) * sizeof (old_un->un_ext[0]));
+	new_len = old_un->un_length + grow_len;
+	new_un = Zalloc(new_size);
+	bcopy(old_un, new_un, old_un->c.un_size);
+
+	/* update size and geometry information */
+	new_un->c.un_size = new_size;
+	new_un->un_length = new_len;
+	new_un->c.un_total_blocks = new_len;
+	new_un->c.un_actual_tb = new_len;
+	if (meta_adjust_geom((md_unit_t *)new_un, np,
+	    old_un->c.un_wr_reinstruct, old_un->c.un_rd_reinstruct,
+	    0, ep) != 0) {
+		Free(new_un);
+		return (NULL);
+	}
+
+	/* update extent information */
+	new_un->un_numexts += numexts;
+
+	meta_sp_fillextarray(new_un, extlist);
+
+	return (new_un);
+}
+
+/*
+ * FUNCTION:	meta_get_sp()
+ * INPUT:	sp	- the set name for the device to get
+ *		np	- the name of the device to get
+ * OUTPUT:	ep	- return error pointer
+ * RETURNS:	md_sp_t * - the XDR unit structure for the soft partition
+ * PURPOSE:	interface to the rest of libmeta for fetching a unit structure
+ *		for the named device.  Just a wrapper for meta_get_sp_common().
+ */
+md_sp_t *
+meta_get_sp(
+	mdsetname_t	*sp,
+	mdname_t	*np,
+	md_error_t	*ep
+)
+{
+	return (meta_get_sp_common(sp, np, 0, ep));
+}
+
+/*
+ * FUNCTION:	meta_get_sp_common()
+ * INPUT:	sp	- the set name for the device to get
+ *		np	- the name of the device to get
+ *		fast	- whether to use the cache or not (NOT IMPLEMENTED!)
+ * OUTPUT:	ep	- return error pointer
+ * RETURNS:	md_sp_t * - the XDR unit structure for the soft partition,
+ *			    NULL if np is not a soft partition
+ * PURPOSE:	common routine for fetching a soft partition unit structure
+ */
+md_sp_t *
+meta_get_sp_common(
+	mdsetname_t	*sp,
+	mdname_t	*np,
+	int		fast,
+	md_error_t	*ep
+)
+{
+	mddrivename_t	*dnp = np->drivenamep;
+	char		*miscname;
+	mp_unit_t	*mp;
+	md_sp_t		*msp;
+	int		i;
+
+	/* must have set */
+	assert(sp != NULL);
+
+	/* short circuit */
+	if (dnp->unitp != NULL) {
+		if (dnp->unitp->type != MD_METASP)
+			return (NULL);
+		return ((md_sp_t *)dnp->unitp);
+	}
+	/* get miscname and unit */
+	if ((miscname = metagetmiscname(np, ep)) == NULL)
+		return (NULL);
+
+	if (strcmp(miscname, MD_SP) != 0) {
+		(void) mdmderror(ep, MDE_NOT_SP, 0, np->cname);
+		return (NULL);
+	}
+
+	if ((mp = (mp_unit_t *)meta_get_mdunit(sp, np, ep)) == NULL)
+		return (NULL);
+
+	assert(mp->c.un_type == MD_METASP);
+
+	/* allocate soft partition */
+	msp = Zalloc(sizeof (*msp));
+
+	/* get the common information */
+	msp->common.namep = np;
+	msp->common.type = mp->c.un_type;
+	msp->common.state = mp->c.un_status;
+	msp->common.capabilities = mp->c.un_capabilities;
+	msp->common.parent = mp->c.un_parent;
+	msp->common.size = mp->c.un_total_blocks;
+	msp->common.user_flags = mp->c.un_user_flags;
+	msp->common.revision = mp->c.un_revision;
+
+	/* get soft partition information */
+	if ((msp->compnamep = metakeyname(&sp, mp->un_key, fast, ep)) == NULL)
+		goto out;
+
+	/*
+	 * Fill in the key and the start block.  Note that the start
+	 * block in the unit structure is 64 bits but the name pointer
+	 * only supports 32 bits.
+	 */
+	msp->compnamep->key = mp->un_key;
+	msp->compnamep->start_blk = mp->un_start_blk;
+
+	/* fill in status field */
+	msp->status = mp->un_status;
+
+	/* allocate the extents */
+	msp->ext.ext_val = Zalloc(mp->un_numexts * sizeof (*msp->ext.ext_val));
+	msp->ext.ext_len = mp->un_numexts;
+
+	/* do the extents for this soft partition */
+	for (i = 0; i < mp->un_numexts; i++) {
+		struct mp_ext	*mde = &mp->un_ext[i];
+		md_sp_ext_t	*extp = &msp->ext.ext_val[i];
+
+		extp->voff = mde->un_voff;
+		extp->poff = mde->un_poff;
+		extp->len = mde->un_len;
+	}
+
+	/* cleanup, return success */
+	Free(mp);
+	dnp->unitp = (md_common_t *)msp;
+	return (msp);
+
+out:
+	/* clean up and return error */
+	Free(mp);
+	Free(msp);
+	return (NULL);
+}
+
+
+/*
+ * FUNCTION:	meta_init_sp()
+ * INPUT:	spp	- the set name for the new device
+ *		argc	- the remaining argument count for the metainit cmdline
+ *		argv	- the remainder of the unparsed command line
+ *		options	- global options parsed by metainit
+ * OUTPUT:	ep	- return error pointer
+ * RETURNS:	int	- -1 failure, 0 success
+ * PURPOSE:	provides the command line parsing and name management overhead
+ *		for creating a new soft partition.  Ultimately this calls
+ *		meta_create_sp() which does the real work of allocating space
+ *		for the new soft partition.
+ */
+int
+meta_init_sp(
+	mdsetname_t	**spp,
+	int		argc,
+	char		*argv[],
+	mdcmdopts_t	options,
+	md_error_t	*ep
+)
+{
+	char		*compname = NULL;
+	mdname_t	*spcompnp = NULL;	/* name of component volume */
+	char		*devname = argv[0];	/* unit name */
+	mdname_t	*np = NULL;		/* name of soft partition */
+	md_sp_t		*msp = NULL;
+	int		c;
+	int		old_optind;
+	sp_ext_length_t	len = 0LL;
+	int		rval = -1;
+	uint_t		seq;
+	int		oflag;
+	int		failed;
+	mddrivename_t	*dnp = NULL;
+	sp_ext_length_t	alignment = 0LL;
+	sp_ext_node_t	*extlist = NULL;
+
+	assert(argc > 0);
+
+	/* expect sp name, -p, optional -e, compname, and size parameters */
+	/* grab soft partition name */
+	if ((np = metaname(spp, devname, ep)) == NULL)
+		goto out;
+
+	/* see if it exists already */
+	if (metagetmiscname(np, ep) != NULL) {
+		(void) mdmderror(ep, MDE_UNIT_ALREADY_SETUP,
+		    meta_getminor(np->dev), devname);
+		goto out;
+	} else if (! mdismderror(ep, MDE_UNIT_NOT_SETUP)) {
+		goto out;
+	} else {
+		mdclrerror(ep);
+	}
+	--argc, ++argv;
+
+	if (argc == 0)
+		goto syntax;
+
+	/* grab -p */
+	if (strcmp(argv[0], "-p") != 0)
+		goto syntax;
+	--argc, ++argv;
+
+	if (argc == 0)
+		goto syntax;
+
+	/* see if -e is there */
+	if (strcmp(argv[0], "-e") == 0) {
+		/* use the whole disk */
+		options |= MDCMD_USE_WHOLE_DISK;
+		--argc, ++argv;
+	}
+
+	if (argc == 0)
+		goto syntax;
+
+	/* get component name */
+	compname = Strdup(argv[0]);
+
+	if (options & MDCMD_USE_WHOLE_DISK) {
+		if ((dnp = metadrivename(spp, compname, ep)) == NULL) {
+			goto out;
+		}
+		if ((spcompnp = metaslicename(dnp, 0, ep)) == NULL) {
+			goto out;
+		}
+	} else if ((spcompnp = metaname(spp, compname, ep)) == NULL) {
+		goto out;
+	}
+	assert(*spp != NULL);
+
+	if (!(options & MDCMD_NOLOCK)) {
+		/* grab set lock */
+		if (meta_lock(*spp, TRUE, ep))
+			goto out;
+
+		if (meta_check_ownership(*spp, ep) != 0)
+			goto out;
+	}
+
+	/* allocate the soft partition */
+	msp = Zalloc(sizeof (*msp));
+
+	/* setup common */
+	msp->common.namep = np;
+	msp->common.type = MD_METASP;
+
+	compname = spcompnp->cname;
+
+	assert(spcompnp->rname != NULL);
+	--argc, ++argv;
+
+	if (argc == 0) {
+		goto syntax;
+	}
+
+	if (*argv[0] == '-') {
+		/*
+		 * parse any other command line options, this includes
+		 * the recovery options -o and -b. The special thing
+		 * with these options is that the len needs to be
+		 * kept track of otherwise when the geometry of the
+		 * "device" is built it will create an invalid geometry
+		 */
+		old_optind = optind = 0;
+		opterr = 0;
+		oflag = 0;
+		seq = 0;
+		failed = 0;
+		while ((c = getopt(argc, argv, "A:o:b:")) != -1) {
+			sp_ext_offset_t	offset;
+			sp_ext_length_t	length;
+			longlong_t	tmp_size;
+
+			switch (c) {
+			case 'A':	/* data alignment */
+				if (meta_sp_parsesizestring(optarg,
+					&alignment) == -1) {
+					failed = 1;
+				}
+				break;
+			case 'o':	/* offset in the partition */
+				if (oflag == 1) {
+					failed = 1;
+				} else {
+					tmp_size = atoll(optarg);
+					if (tmp_size <= 0) {
+						failed = 1;
+					} else {
+						oflag = 1;
+						options |= MDCMD_DIRECT;
+
+						offset = tmp_size;
+					}
+				}
+
+				break;
+			case 'b':	/* number of blocks */
+				if (oflag == 0) {
+					failed = 1;
+				} else {
+					tmp_size = atoll(optarg);
+					if (tmp_size <= 0) {
+						failed = 1;
+					} else {
+						oflag = 0;
+
+						length = tmp_size;
+
+						/* we have a pair of values */
+						meta_sp_list_insert(*spp, np,
+							&extlist, offset,
+							length, EXTTYP_ALLOC,
+							seq++, EXTFLG_UPDATE,
+							meta_sp_cmp_by_offset);
+						len += length;
+					}
+				}
+
+				break;
+			default:
+				argc -= old_optind;
+				argv += old_optind;
+				goto options;
+			}
+
+			if (failed) {
+				argc -= old_optind;
+				argv += old_optind;
+				goto syntax;
+			}
+
+			old_optind = optind;
+		}
+		argc -= optind;
+		argv += optind;
+
+		/*
+		 * Must have matching pairs of -o and -b flags
+		 */
+		if (oflag != 0)
+			goto syntax;
+
+		/*
+		 * Can't specify both layout (indicated indirectly by
+		 * len being set by thye -o/-b cases above) AND
+		 * alignment
+		 */
+		if ((len > 0LL) && (alignment > 0LL))
+			goto syntax;
+
+		/*
+		 * sanity check the allocation list
+		 */
+		if ((extlist != NULL) && meta_sp_list_overlaps(extlist))
+			goto syntax;
+	}
+
+	if (len == 0LL) {
+		if (argc == 0)
+			goto syntax;
+		if (meta_sp_parsesize(argv[0], &len) == -1)
+			goto syntax;
+		--argc, ++argv;
+	}
+
+	msp->ext.ext_val = Zalloc(sizeof (*msp->ext.ext_val));
+	msp->ext.ext_val->len = len;
+	msp->compnamep = spcompnp;
+
+	/* we should be at the end */
+	if (argc != 0)
+		goto syntax;
+
+	/* create soft partition */
+	if (meta_create_sp(*spp, msp, extlist, options, alignment, ep) != 0)
+		goto out;
+	rval = 0;
+
+	/* let em know */
+	if (options & MDCMD_PRINT) {
+		(void) printf(dgettext(TEXT_DOMAIN,
+		    "%s: Soft Partition is setup\n"),
+		    devname);
+		(void) fflush(stdout);
+	}
+	goto out;
+
+syntax:
+	/* syntax error */
+	rval = meta_cook_syntax(ep, MDE_SYNTAX, compname, argc, argv);
+	goto out;
+
+options:
+	/* options error */
+	rval = meta_cook_syntax(ep, MDE_OPTION, compname, argc, argv);
+	goto out;
+
+out:
+	if (msp != NULL) {
+		if (msp->ext.ext_val != NULL) {
+			Free(msp->ext.ext_val);
+		}
+		Free(msp);
+	}
+
+	return (rval);
+}
+
+/*
+ * FUNCTION:	meta_free_sp()
+ * INPUT:	msp	- the soft partition unit to free
+ * OUTPUT:	none
+ * RETURNS:	void
+ * PURPOSE:	provides an interface from the rest of libmeta for freeing a
+ *		soft partition unit
+ */
+void
+meta_free_sp(md_sp_t *msp)
+{
+	Free(msp);
+}
+
+/*
+ * FUNCTION:	meta_sp_issp()
+ * INPUT:	sp	- the set name to check
+ *		np	- the name to check
+ * OUTPUT:	ep	- return error pointer
+ * RETURNS:	int	- 0 means sp,np is a soft partition
+ *			  1 means sp,np is not a soft partition
+ * PURPOSE:	determines whether the given device is a soft partition
+ *		device.  This is called by other metadevice check routines.
+ */
+int
+meta_sp_issp(
+	mdsetname_t	*sp,
+	mdname_t	*np,
+	md_error_t	*ep
+)
+{
+	if (meta_get_sp_common(sp, np, 0, ep) == NULL)
+		return (1);
+
+	return (0);
+}
+
+/*
+ * FUNCTION:	meta_check_sp()
+ * INPUT:	sp	- the set name to check
+ *		msp	- the unit structure to check
+ *		options	- creation options
+ * OUTPUT:	repart_options - options to be passed to
+ *				meta_repartition_drive()
+ *		ep	- return error pointer
+ * RETURNS:	int	-  0 ok to create on this component
+ *			  -1 error or not ok to create on this component
+ * PURPOSE:	Checks to determine whether the rules for creation of
+ *		soft partitions allow creation of a soft partition on
+ *		the device described by the mdname_t structure referred
+ *		to by msp->compnamep.
+ *
+ *		NOTE: Does NOT check to determine whether the extents
+ *		      described in the md_sp_t structure referred to by
+ *		      msp will fit on the device described by the mdname_t
+ *		      structure located at msp->compnamep.
+ */
+static int
+meta_check_sp(
+	mdsetname_t	*sp,
+	md_sp_t		*msp,
+	mdcmdopts_t	options,
+	int		*repart_options,
+	md_error_t	*ep
+)
+{
+	md_common_t	*mdp;
+	mdname_t	*compnp = msp->compnamep;
+	uint_t		slice;
+	mddrivename_t	*dnp;
+	mdname_t	*slicenp;
+	mdvtoc_t	*vtocp;
+
+	/* make sure it is in the set */
+	if (meta_check_inset(sp, compnp, ep) != 0)
+		return (-1);
+
+	if ((options & MDCMD_USE_WHOLE_DISK) != 0) {
+		uint_t	rep_slice;
+
+		/*
+		 * check to make sure we can partition this drive.
+		 * we cannot continue if any of the following are
+		 * true:
+		 * The drive is a metadevice.
+		 * The drive contains a mounted slice.
+		 * The drive contains a slice being swapped to.
+		 * The drive contains slices which are part of other
+		 * metadevices.
+		 * The drive contains a metadb.
+		 */
+		if (metaismeta(compnp))
+			return (mddeverror(ep, MDE_IS_META, compnp->dev,
+			    compnp->cname));
+
+		assert(compnp->drivenamep != NULL);
+
+		/*
+		 * ensure that we have slice 0 since the disk will be
+		 * repartitioned in the USE_WHOLE_DISK case.  this check
+		 * is redundant unless the user incorrectly specifies a
+		 * a fully qualified drive AND slice name (i.e.,
+		 * /dev/dsk/cXtXdXsX), which will be incorrectly
+		 * recognized as a drive name by the metaname code.
+		 */
+
+		if ((vtocp = metagetvtoc(compnp, FALSE, &slice, ep)) == NULL)
+			return (-1);
+		if (slice != MD_SLICE0)
+			return (mderror(ep, MDE_NOT_DRIVENAME, compnp->cname));
+
+		dnp = compnp->drivenamep;
+		if (meta_replicaslice(dnp, &rep_slice, ep) != 0)
+			return (-1);
+
+		for (slice = 0; slice < vtocp->nparts; slice++) {
+
+			/* only check if the slice really exists */
+			if (vtocp->parts[slice].size == 0)
+				continue;
+
+			slicenp = metaslicename(dnp, slice, ep);
+			if (slicenp == NULL)
+				return (-1);
+
+			/* check to ensure that it is not already in use */
+			if (meta_check_inuse(sp,
+			    slicenp, MDCHK_INUSE, ep) != 0) {
+				return (-1);
+			}
+
+			/*
+			 * Up to this point, tests are applied to all
+			 * slices uniformly.
+			 */
+
+			if (slice == rep_slice) {
+				/*
+				 * Tests inside the body of this
+				 * conditional are applied only to
+				 * slice seven.
+				 */
+				if (meta_check_inmeta(sp, slicenp,
+				    options | MDCHK_ALLOW_MDDB |
+				    MDCHK_ALLOW_REPSLICE, 0, -1, ep) != 0)
+					return (-1);
+
+				/*
+				 * For slice seven, a metadb is NOT an
+				 * automatic failure. It merely means
+				 * that we're not allowed to muck
+				 * about with the partitioning of that
+				 * slice.  We indicate this by masking
+				 * in the MD_REPART_LEAVE_REP flag.
+				 */
+				if (metahasmddb(sp, slicenp, ep)) {
+					assert(repart_options !=
+					    NULL);
+					*repart_options |=
+					    MD_REPART_LEAVE_REP;
+				}
+
+				/*
+				 * Skip the remaining tests for slice
+				 * seven
+				 */
+				continue;
+			}
+
+			/*
+			 * Tests below this point will be applied to
+			 * all slices EXCEPT for the replica slice.
+			 */
+
+
+			/* check if component is in a metadevice */
+			if (meta_check_inmeta(sp, slicenp, options, 0,
+			    -1, ep) != 0)
+				return (-1);
+
+			/* check to see if component has a metadb */
+			if (metahasmddb(sp, slicenp, ep))
+				return (mddeverror(ep, MDE_HAS_MDDB,
+				    slicenp->dev, slicenp->cname));
+		}
+		/*
+		 * This should be all of the testing necessary when
+		 * the MDCMD_USE_WHOLE_DISK flag is set; the rest of
+		 * meta_check_sp() is oriented towards component
+		 * arguments instead of disks.
+		 */
+		goto meta_check_sp_ok;
+
+	}
+
+	/* check to ensure that it is not already in use */
+	if (meta_check_inuse(sp, compnp, MDCHK_INUSE, ep) != 0) {
+		return (-1);
+	}
+
+	if (!metaismeta(compnp)) {	/* handle non-metadevices */
+
+		/*
+		 * The component can have one or more soft partitions on it
+		 * already, but can't be part of any other type of metadevice,
+		 * so if it is used for a metadevice, but the metadevice
+		 * isn't a soft partition, return failure.
+		 */
+
+		if (meta_check_inmeta(sp, compnp, options, 0, -1, ep) != 0 &&
+		    meta_check_insp(sp, compnp, 0, -1, ep) == 0) {
+			return (-1);
+		}
+	} else {			/* handle metadevices */
+		/* get underlying unit & check capabilities */
+		if ((mdp = meta_get_unit(sp, compnp, ep)) == NULL)
+			return (-1);
+
+		if ((! (mdp->capabilities & MD_CAN_PARENT)) ||
+		    (! (mdp->capabilities & MD_CAN_SP)))
+			return (mdmderror(ep, MDE_INVAL_UNIT,
+			    meta_getminor(compnp->dev), compnp->cname));
+	}
+
+meta_check_sp_ok:
+	mdclrerror(ep);
+	return (0);
+}
+
+/*
+ * FUNCTION:	meta_create_sp()
+ * INPUT:	sp	- the set name to create in
+ *		msp	- the unit structure to create
+ *		oblist	- an optional list of requested extents (-o/-b options)
+ *		options	- creation options
+ *		alignment - data alignment
+ * OUTPUT:	ep	- return error pointer
+ * RETURNS:	int	-  0 success, -1 error
+ * PURPOSE:	does most of the work for creating a soft partition.  If
+ *		metainit -p -e was used, first partition the drive.  Then
+ *		create an extent list based on the existing soft partitions
+ *		and assume all space not used by them is free.  Storage for
+ *		the new soft partition is allocated from the free extents
+ *		based on the length specified on the command line or the
+ *		oblist passed in.  The unit structure is then committed and
+ *		the watermarks are updated.  Finally, the status is changed to
+ *		Okay and the process is complete.
+ */
+static int
+meta_create_sp(
+	mdsetname_t	*sp,
+	md_sp_t		*msp,
+	sp_ext_node_t	*oblist,
+	mdcmdopts_t	options,
+	sp_ext_length_t	alignment,
+	md_error_t	*ep
+)
+{
+	mdname_t	*np = msp->common.namep;
+	mdname_t	*compnp = msp->compnamep;
+	mp_unit_t	*mp = NULL;
+	mdnamelist_t	*keynlp = NULL, *spnlp = NULL;
+	md_set_params_t	set_params;
+	int		rval = -1;
+	diskaddr_t	comp_size;
+	diskaddr_t	sp_start;
+	sp_ext_node_t	*extlist = NULL;
+	int		numexts = 0;	/* number of extents */
+	int		count = 0;
+	int		committed = 0;
+	int		repart_options = MD_REPART_FORCE;
+	int		create_flag = MD_CRO_32BIT;
+
+	md_set_desc	*sd;
+	mm_unit_t	*mm;
+	md_set_mmown_params_t	*ownpar = NULL;
+	int		comp_is_mirror = 0;
+
+	/* validate soft partition */
+	if (meta_check_sp(sp, msp, options, &repart_options, ep) != 0)
+		return (-1);
+
+	if ((options & MDCMD_USE_WHOLE_DISK) != 0) {
+		if ((options & MDCMD_DOIT) != 0) {
+			if (meta_repartition_drive(sp,
+			    compnp->drivenamep,
+			    repart_options,
+			    NULL, /* Don't return the VTOC */
+			    ep) != 0)
+
+				return (-1);
+		} else {
+			/*
+			 * If -n and -e are both specified, it doesn't make
+			 * sense to continue without actually partitioning
+			 * the drive.
+			 */
+			return (0);
+		}
+	}
+
+	/* populate the start_blk field of the component name */
+	if ((sp_start = meta_sp_get_start(sp, compnp, ep)) ==
+	    MD_DISKADDR_ERROR) {
+		rval = -1;
+		goto out;
+	}
+
+	if (options & MDCMD_DOIT) {
+		/* store name in namespace */
+		if (add_key_name(sp, compnp, &keynlp, ep) != 0) {
+			rval = -1;
+			goto out;
+		}
+	}
+
+	/*
+	 * Get a list of the soft partitions that currently reside on
+	 * the component.  We should ALWAYS force reload the cache,
+	 * because if this is a single creation, there will not BE a
+	 * cached list, and if we're using the md.tab, we must rebuild
+	 * the list because it won't contain the previous (if any)
+	 * soft partition.
+	 */
+	count = meta_sp_get_by_component(sp, compnp, &spnlp, 1, ep);
+	if (count < 0) {
+		/* error occured */
+		rval = -1;
+		goto out;
+	}
+
+	/*
+	 * get the size of the underlying device.  if the size is smaller
+	 * than or equal to the watermark size, we know there isn't
+	 * enough space.
+	 */
+	if ((comp_size = metagetsize(compnp, ep)) == MD_DISKADDR_ERROR) {
+		rval = -1;
+		goto out;
+	} else if (comp_size <= MD_SP_WMSIZE) {
+		(void) mdmderror(ep, MDE_SP_NOSPACE, 0, compnp->cname);
+		rval = -1;
+		goto out;
+	}
+	/*
+	 * seed extlist with reserved space at the beginning of the volume and
+	 * enough space for the end watermark.  The end watermark always gets
+	 * updated, but if the underlying device changes size it may not be
+	 * pointed to until the extent before it is updated.  Since the
+	 * end of the reserved space is where the first watermark starts,
+	 * the reserved extent should never be marked for updating.
+	 */
+
+	meta_sp_list_insert(NULL, NULL, &extlist,
+	    0ULL, sp_start, EXTTYP_RESERVED, 0, 0, meta_sp_cmp_by_offset);
+	meta_sp_list_insert(NULL, NULL, &extlist,
+	    (sp_ext_offset_t)(comp_size - MD_SP_WMSIZE), MD_SP_WMSIZE,
+	    EXTTYP_END, 0, EXTFLG_UPDATE, meta_sp_cmp_by_offset);
+
+	if (meta_sp_extlist_from_namelist(sp, spnlp, &extlist, ep) == -1) {
+		rval = -1;
+		goto out;
+	}
+
+	metafreenamelist(spnlp);
+
+	if (getenv(META_SP_DEBUG)) {
+		meta_sp_debug("meta_create_sp: list of used extents:\n");
+		meta_sp_list_dump(extlist);
+	}
+
+	meta_sp_list_freefill(&extlist, metagetsize(compnp, ep));
+
+	/* get extent list from -o/-b options or from free space */
+	if (options & MDCMD_DIRECT) {
+		if (getenv(META_SP_DEBUG)) {
+			meta_sp_debug("meta_create_sp: Dumping -o/-b list:\n");
+			meta_sp_list_dump(oblist);
+		}
+
+		numexts = meta_sp_alloc_by_list(sp, np, &extlist, oblist);
+		if (numexts == -1) {
+			(void) mdmderror(ep, MDE_SP_OVERLAP, 0, np->cname);
+			rval = -1;
+			goto out;
+		}
+	} else {
+		numexts = meta_sp_alloc_by_len(sp, np, &extlist,
+		    &msp->ext.ext_val->len, 0LL, (alignment > 0) ? alignment :
+		    meta_sp_get_default_alignment(sp, compnp, ep));
+		if (numexts == -1) {
+			(void) mdmderror(ep, MDE_SP_NOSPACE, 0, np->cname);
+			rval = -1;
+			goto out;
+		}
+	}
+
+	assert(extlist != NULL);
+
+	/* create soft partition */
+	mp = meta_sp_createunit(msp->common.namep, msp->compnamep,
+	    extlist, numexts, msp->ext.ext_val->len, MD_SP_CREATEPEND, ep);
+
+	create_flag = meta_check_devicesize(mp->c.un_total_blocks);
+
+	/* if we're not doing anything (metainit -n), return success */
+	if (! (options & MDCMD_DOIT)) {
+		rval = 0;	/* success */
+		goto out;
+	}
+
+	(void) memset(&set_params, 0, sizeof (set_params));
+
+	if (create_flag == MD_CRO_64BIT) {
+		mp->c.un_revision = MD_64BIT_META_DEV;
+		set_params.options = MD_CRO_64BIT;
+	} else {
+		mp->c.un_revision = MD_32BIT_META_DEV;
+		set_params.options = MD_CRO_32BIT;
+	}
+
+	if (getenv(META_SP_DEBUG)) {
+		meta_sp_debug("meta_create_sp: printing unit structure\n");
+		meta_sp_printunit(mp);
+	}
+
+	/*
+	 * Check to see if we're trying to create a partition on a mirror. If so
+	 * we may have to enforce an ownership change before writing the
+	 * watermark out.
+	 */
+	if (metaismeta(compnp)) {
+		char *miscname;
+
+		miscname = metagetmiscname(compnp, ep);
+		if (miscname != NULL)
+			comp_is_mirror = (strcmp(miscname, MD_MIRROR) == 0);
+		else
+			comp_is_mirror = 0;
+	} else {
+		comp_is_mirror = 0;
+	}
+
+	/*
+	 * For a multi-node environment we have to ensure that the master
+	 * node owns an underlying mirror before we issue the MD_IOCSET ioctl.
+	 * If the master does not own the device we will deadlock as the
+	 * implicit write of the watermarks (in sp_ioctl.c) will cause an
+	 * ownership change that will block as the MD_IOCSET is still in
+	 * progress. To close this window we force an owner change to occur
+	 * before issuing the MD_IOCSET. We cannot simply open the device and
+	 * write to it as this will only work for the first soft-partition
+	 * creation.
+	 */
+
+	if (comp_is_mirror && !metaislocalset(sp)) {
+
+		if ((sd = metaget_setdesc(sp, ep)) == NULL) {
+			rval = -1;
+			goto out;
+		}
+		if (MD_MNSET_DESC(sd) && sd->sd_mn_am_i_master) {
+			mm = (mm_unit_t *)meta_get_unit(sp, compnp, ep);
+			if (mm == NULL) {
+				rval = -1;
+				goto out;
+			} else {
+				rval = meta_mn_change_owner(&ownpar, sp->setno,
+					meta_getminor(compnp->dev),
+					sd->sd_mn_mynode->nd_nodeid,
+					MD_MN_MM_PREVENT_CHANGE |
+					    MD_MN_MM_SPAWN_THREAD);
+				if (rval == -1)
+					goto out;
+			}
+		}
+	}
+
+	set_params.mnum = MD_SID(mp);
+	set_params.size = mp->c.un_size;
+	set_params.mdp = (uintptr_t)mp;
+	MD_SETDRIVERNAME(&set_params, MD_SP, MD_MIN2SET(set_params.mnum));
+
+	/* first phase of commit. */
+	if (metaioctl(MD_IOCSET, &set_params, &set_params.mde,
+	    np->cname) != 0) {
+		(void) mdstealerror(ep, &set_params.mde);
+		rval = -1;
+		goto out;
+	}
+
+	/* we've successfully committed the record */
+	committed = 1;
+
+	/* write watermarks */
+	if (meta_sp_update_wm(sp, msp, extlist, ep) < 0) {
+		rval = -1;
+		goto out;
+	}
+
+	/*
+	 * Allow mirror ownership to change. If we don't succeed in this
+	 * ioctl it isn't fatal, but the cluster will probably hang fairly
+	 * soon as the mirror owner won't change. However, we have
+	 * successfully written the watermarks out to the device so the
+	 * softpart creation has succeeded
+	 */
+	if (ownpar) {
+		(void) meta_mn_change_owner(&ownpar, sp->setno, ownpar->d.mnum,
+		    ownpar->d.owner,
+		    MD_MN_MM_ALLOW_CHANGE | MD_MN_MM_SPAWN_THREAD);
+	}
+
+	/* second phase of commit, set status to MD_SP_OK */
+	if (meta_sp_setstatus(sp, &(MD_SID(mp)), 1, MD_SP_OK, ep) < 0) {
+		rval = -1;
+		goto out;
+	}
+	rval = 0;
+out:
+	Free(mp);
+	if (ownpar)
+		Free(ownpar);
+
+	if (extlist != NULL)
+		meta_sp_list_free(&extlist);
+
+	if (rval != 0 && keynlp != NULL && committed != 1)
+		(void) del_key_names(sp, keynlp, NULL);
+
+	metafreenamelist(keynlp);
+
+	return (rval);
+}
+
+/*
+ * **************************************************************************
+ *                      Reset (metaclear) Functions                         *
+ * **************************************************************************
+ */
+
+/*
+ * FUNCTION:	meta_sp_reset_common()
+ * INPUT:	sp	- the set name of the device to reset
+ *		np	- the name of the device to reset
+ *		msp	- the unit structure to reset
+ *		options	- metaclear options
+ * OUTPUT:	ep	- return error pointer
+ * RETURNS:	int	-  0 success, -1 error
+ * PURPOSE:	"resets", or more accurately deletes, the soft partition
+ *		specified.  First the state is set to "deleting" and then the
+ *		watermarks are all cleared out.  Once the watermarks have been
+ *		updated, the unit structure is deleted from the metadb.
+ */
+static int
+meta_sp_reset_common(
+	mdsetname_t	*sp,
+	mdname_t	*np,
+	md_sp_t		*msp,
+	md_sp_reset_t	reset_params,
+	mdcmdopts_t	options,
+	md_error_t	*ep
+)
+{
+	char	*miscname;
+	int	rval = -1;
+	int	is_open = 0;
+
+	/* make sure that nobody owns us */
+	if (MD_HAS_PARENT(msp->common.parent))
+		return (mdmderror(ep, MDE_IN_USE, meta_getminor(np->dev),
+					np->cname));
+
+	/* make sure that the soft partition isn't open */
+	if ((is_open = meta_isopen(sp, np, ep, options)) < 0)
+		return (-1);
+	else if (is_open)
+		return (mdmderror(ep, MDE_IS_OPEN, meta_getminor(np->dev),
+					np->cname));
+
+	/* get miscname */
+	if ((miscname = metagetmiscname(np, ep)) == NULL)
+		return (-1);
+
+	/* fill in reset params */
+	MD_SETDRIVERNAME(&reset_params, miscname, sp->setno);
+	reset_params.mnum = meta_getminor(np->dev);
+	reset_params.force = (options & MDCMD_FORCE) ? 1 : 0;
+
+	/*
+	 * clear soft partition - phase one.
+	 * place the soft partition into the "delete pending" state.
+	 */
+	if (meta_sp_setstatus(sp, &reset_params.mnum, 1, MD_SP_DELPEND, ep) < 0)
+		return (-1);
+
+	/*
+	 * Now clear the watermarks.  If the force flag is specified,
+	 * ignore any errors writing the watermarks and delete the unit
+	 * structure anyway.  An error may leave the on-disk format in a
+	 * corrupt state.  If force is not specified and we fail here,
+	 * the soft partition will remain in the "delete pending" state.
+	 */
+	if ((meta_sp_clear_wm(sp, msp, ep) < 0) &&
+	    ((options & MDCMD_FORCE) == 0))
+		goto out;
+
+	/*
+	 * clear soft partition - phase two.
+	 * the driver removes the soft partition from the metadb and
+	 * zeros out incore version.
+	 */
+	if (metaioctl(MD_IOCRESET, &reset_params,
+	    &reset_params.mde, np->cname) != 0) {
+		(void) mdstealerror(ep, &reset_params.mde);
+		goto out;
+	}
+	rval = 0;	/* success */
+
+	if (options & MDCMD_PRINT) {
+		(void) printf(dgettext(TEXT_DOMAIN,
+		    "%s: Soft Partition is cleared\n"),
+		    np->cname);
+		(void) fflush(stdout);
+	}
+
+	/*
+	 * if told to recurse and on a metadevice, then attempt to
+	 * clear the subdevices.  Indicate failure if the clear fails.
+	 */
+	if ((options & MDCMD_RECURSE) &&
+	    (metaismeta(msp->compnamep)) &&
+	    (meta_reset_by_name(sp, msp->compnamep, options, ep) != 0))
+		rval = -1;
+
+out:
+	meta_invalidate_name(np);
+	return (rval);
+}
+
+/*
+ * FUNCTION:	meta_sp_reset()
+ * INPUT:	sp	- the set name of the device to reset
+ *		np	- the name of the device to reset
+ *		options	- metaclear options
+ * OUTPUT:	ep	- return error pointer
+ * RETURNS:	int	-  0 success, -1 error
+ * PURPOSE:	provides the entry point to the rest of libmeta for deleting a
+ *		soft partition.  If np is NULL, then soft partitions are
+ *		all deleted at the current level and then recursively deleted.
+ *		Otherwise, if a name is specified either directly or as a
+ *		result of a recursive operation, it deletes only that name.
+ *		Since something sitting under a soft partition may be parented
+ *		to it, we have to reparent that other device to another soft
+ *		partition on the same component if we're deleting the one it's
+ *		parented to.
+ */
+int
+meta_sp_reset(
+	mdsetname_t	*sp,
+	mdname_t	*np,
+	mdcmdopts_t	options,
+	md_error_t	*ep
+)
+{
+	md_sp_t		*msp;
+	int		rval = -1;
+	mdnamelist_t	*spnlp = NULL, *nlp = NULL;
+	md_sp_reset_t	reset_params;
+	int		num_sp;
+
+	assert(sp != NULL);
+
+	/* reset/delete all soft paritions */
+	if (np == NULL) {
+		/*
+		 * meta_reset_all sets MDCMD_RECURSE, but this behavior
+		 * is incorrect for soft partitions.  We want to clear
+		 * all soft partitions at a particular level in the
+		 * metadevice stack before moving to the next level.
+		 * Thus, we clear MDCMD_RECURSE from the options.
+		 */
+		options &= ~MDCMD_RECURSE;
+
+		/* for each soft partition */
+		rval = 0;
+		if (meta_get_sp_names(sp, &spnlp, 0, ep) < 0)
+			rval = -1;
+
+		for (nlp = spnlp; (nlp != NULL); nlp = nlp->next) {
+			np = nlp->namep;
+			if ((msp = meta_get_sp(sp, np, ep)) == NULL) {
+				rval = -1;
+				break;
+			}
+			/*
+			 * meta_reset_all calls us twice to get soft
+			 * partitions at the top and bottom of the stack.
+			 * thus, if we have a parent, we'll get deleted
+			 * on the next call.
+			 */
+			if (MD_HAS_PARENT(msp->common.parent))
+				continue;
+			/*
+			 * If this is a multi-node set, we send a series
+			 * of individual metaclear commands.
+			 */
+			if (meta_is_mn_set(sp, ep)) {
+				if (meta_mn_send_metaclear_command(sp,
+				    np->cname, options, 0, ep) != 0) {
+					rval = -1;
+					break;
+				}
+			} else {
+				if (meta_sp_reset(sp, np, options, ep) != 0) {
+					rval = -1;
+					break;
+				}
+			}
+		}
+		/* cleanup return status */
+		metafreenamelist(spnlp);
+		return (rval);
+	}
+
+	/* check the name */
+	if (metachkmeta(np, ep) != 0)
+		return (-1);
+
+	/* get the unit structure */
+	if ((msp = meta_get_sp(sp, np, ep)) == NULL)
+		return (-1);
+
+	/* clear out reset parameters */
+	(void) memset(&reset_params, 0, sizeof (reset_params));
+
+	/* if our child is a metadevice, we need to deparent/reparent it */
+	if (metaismeta(msp->compnamep)) {
+		/* get sp's on this component */
+		if ((num_sp = meta_sp_get_by_component(sp, msp->compnamep,
+		    &spnlp, 1, ep)) <= 0)
+			/* no sp's on this device.  error! */
+			return (-1);
+		else if (num_sp == 1)
+			/* last sp on this device, so we deparent */
+			reset_params.new_parent = MD_NO_PARENT;
+		else {
+			/* have to reparent this metadevice */
+			for (nlp = spnlp; nlp != NULL; nlp = nlp->next) {
+				if (meta_getminor(nlp->namep->dev) ==
+					meta_getminor(np->dev))
+					continue;
+				/*
+				 * this isn't the softpart we are deleting,
+				 * so use this device as the new parent.
+				 */
+				reset_params.new_parent =
+				    meta_getminor(nlp->namep->dev);
+				break;
+			}
+		}
+		metafreenamelist(spnlp);
+	}
+
+	if (meta_sp_reset_common(sp, np, msp, reset_params, options, ep) != 0)
+		return (-1);
+
+	return (0);
+}
+
+/*
+ * FUNCTION:	meta_sp_reset_component()
+ * INPUT:	sp	- the set name of the device to reset
+ *		name	- the string name of the device to reset
+ *		options	- metaclear options
+ * OUTPUT:	ep	- return error pointer
+ * RETURNS:	int	-  0 success, -1 error
+ * PURPOSE:	provides the ability to delete all soft partitions on a
+ *		specified device (metaclear -p).  It first gets all of the
+ *		soft partitions on the component and then deletes each one
+ *		individually.
+ */
+int
+meta_sp_reset_component(
+	mdsetname_t	*sp,
+	char		*name,
+	mdcmdopts_t	options,
+	md_error_t	*ep
+)
+{
+	mdname_t	*compnp, *np;
+	mdnamelist_t	*spnlp = NULL;
+	mdnamelist_t	*nlp = NULL;
+	md_sp_t		*msp;
+	int		count;
+	md_sp_reset_t	reset_params;
+
+	if ((compnp = metaname(&sp, name, ep)) == NULL)
+		return (-1);
+
+	/* If we're starting out with no soft partitions, it's an error */
+	count = meta_sp_get_by_component(sp, compnp, &spnlp, 1, ep);
+	if (count == 0)
+		return (mdmderror(ep, MDE_SP_NOSP, 0, compnp->cname));
+	else if (count < 0)
+		return (-1);
+
+	/*
+	 * clear all soft partitions on this component.
+	 * NOTE: we reparent underlying metadevices as we go so that
+	 * things stay sane.  Also, if we encounter an error, we stop
+	 * and go no further in case recovery might be needed.
+	 */
+	for (nlp = spnlp; nlp != NULL; nlp = nlp->next) {
+		/* clear out reset parameters */
+		(void) memset(&reset_params, 0, sizeof (reset_params));
+
+		/* check the name */
+		np = nlp->namep;
+
+		if (metachkmeta(np, ep) != 0) {
+			metafreenamelist(spnlp);
+			return (-1);
+		}
+
+		/* get the unit structure */
+		if ((msp = meta_get_sp(sp, np, ep)) == NULL) {
+			metafreenamelist(spnlp);
+			return (-1);
+		}
+
+		/* have to deparent/reparent metadevices */
+		if (metaismeta(compnp)) {
+			if (nlp->next == NULL)
+				reset_params.new_parent = MD_NO_PARENT;
+			else
+				reset_params.new_parent =
+				    meta_getminor(spnlp->next->namep->dev);
+		}
+
+		/* clear soft partition */
+		if (meta_sp_reset_common(sp, np, msp, reset_params,
+		    options, ep) < 0) {
+			metafreenamelist(spnlp);
+			return (-1);
+		}
+	}
+	metafreenamelist(spnlp);
+	return (0);
+}
+
+/*
+ * **************************************************************************
+ *                      Grow (metattach) Functions                          *
+ * **************************************************************************
+ */
+
+/*
+ * FUNCTION:	meta_sp_attach()
+ * INPUT:	sp	- the set name of the device to attach to
+ *		np	- the name of the device to attach to
+ *		addsize	- the unparsed string holding the amount of space to add
+ *		options	- metattach options
+ *		alignment - data alignment
+ * OUTPUT:	ep	- return error pointer
+ * RETURNS:	int	-  0 success, -1 error
+ * PURPOSE:	grows a soft partition by reading in the existing unit
+ *		structure and setting its state to Growing, allocating more
+ *		space (similar to meta_create_sp()), updating the watermarks,
+ *		and then writing out the new unit structure in the Okay state.
+ */
+int
+meta_sp_attach(
+	mdsetname_t	*sp,
+	mdname_t	*np,
+	char		*addsize,
+	mdcmdopts_t	options,
+	sp_ext_length_t	alignment,
+	md_error_t	*ep
+)
+{
+	md_grow_params_t	grow_params;
+	sp_ext_length_t		grow_len;	/* amount to grow */
+	mp_unit_t		*mp, *new_un;
+	mdname_t		*compnp = NULL;
+
+	sp_ext_node_t		*extlist = NULL;
+	int			numexts;
+	mdnamelist_t		*spnlp = NULL;
+	int			count;
+	md_sp_t			*msp;
+	daddr_t			start_block;
+
+	/* should have the same set */
+	assert(sp != NULL);
+	assert(sp->setno == MD_MIN2SET(meta_getminor(np->dev)));
+
+	/* check name */
+	if (metachkmeta(np, ep) != 0)
+		return (-1);
+
+	if (meta_sp_parsesize(addsize, &grow_len) == -1) {
+		return (mdmderror(ep, MDE_SP_BAD_LENGTH, 0, np->cname));
+	}
+
+	if ((mp = (mp_unit_t *)meta_get_mdunit(sp, np, ep)) == NULL)
+		return (-1);
+
+	/* make sure we don't have a parent */
+	if (MD_HAS_PARENT(mp->c.un_parent)) {
+		Free(mp);
+		return (mdmderror(ep, MDE_INVAL_UNIT, 0, np->cname));
+	}
+
+	if (getenv(META_SP_DEBUG)) {
+		meta_sp_debug("meta_sp_attach: Unit structure before new "
+		    "space:\n");
+		meta_sp_printunit(mp);
+	}
+
+	/*
+	 * NOTE: the fast option to metakeyname is 0 as opposed to 1
+	 * If this was not the case we would suffer the following
+	 * assertion failure:
+	 * Assertion failed: type1 != MDT_FAST_META && type1 != MDT_FAST_COMP
+	 * file meta_check.x, line 315
+	 * I guess this is because we have not "seen" this drive before
+	 * and hence hit the failure - this is of course the attach routine
+	 */
+	if ((compnp = metakeyname(&sp, mp->un_key, 0, ep)) == NULL) {
+		Free(mp);
+		return (-1);
+	}
+
+	/* metakeyname does not fill in the key. */
+	compnp->key = mp->un_key;
+
+	/* work out the space on the component that we are dealing with */
+	count = meta_sp_get_by_component(sp, compnp, &spnlp, 0, ep);
+
+	/*
+	 * see if the component has been soft partitioned yet, or if an
+	 * error occurred.
+	 */
+	if (count == 0) {
+		Free(mp);
+		return (mdmderror(ep, MDE_NOT_SP, 0, np->cname));
+	} else if (count < 0) {
+		Free(mp);
+		return (-1);
+	}
+
+	/*
+	 * seed extlist with reserved space at the beginning of the volume and
+	 * enough space for the end watermark.  The end watermark always gets
+	 * updated, but if the underlying device changes size it may not be
+	 * pointed to until the extent before it is updated.  Since the
+	 * end of the reserved space is where the first watermark starts,
+	 * the reserved extent should never be marked for updating.
+	 */
+	if ((start_block = meta_sp_get_start(sp, compnp, ep)) ==
+	    MD_DISKADDR_ERROR) {
+		Free(mp);
+		return (-1);
+	}
+
+	meta_sp_list_insert(NULL, NULL, &extlist, 0ULL, start_block,
+	    EXTTYP_RESERVED, 0, 0, meta_sp_cmp_by_offset);
+	meta_sp_list_insert(NULL, NULL, &extlist,
+	    metagetsize(compnp, ep) - MD_SP_WMSIZE, MD_SP_WMSIZE,
+	    EXTTYP_END, 0, EXTFLG_UPDATE, meta_sp_cmp_by_offset);
+
+	if (meta_sp_extlist_from_namelist(sp, spnlp, &extlist, ep) == -1) {
+		Free(mp);
+		return (-1);
+	}
+
+	metafreenamelist(spnlp);
+
+	if (getenv(META_SP_DEBUG)) {
+		meta_sp_debug("meta_sp_attach: list of used extents:\n");
+		meta_sp_list_dump(extlist);
+	}
+
+	meta_sp_list_freefill(&extlist, metagetsize(compnp, ep));
+
+	assert(mp->un_numexts >= 1);
+	numexts = meta_sp_alloc_by_len(sp, np, &extlist, &grow_len,
+	    mp->un_ext[mp->un_numexts - 1].un_poff,
+	    (alignment > 0) ? alignment :
+	    meta_sp_get_default_alignment(sp, compnp, ep));
+
+	if (numexts == -1) {
+		Free(mp);
+		return (mdmderror(ep, MDE_SP_NOSPACE, 0, np->cname));
+	}
+
+	/* allocate new unit structure and copy in old unit */
+	if ((new_un = meta_sp_updateunit(np, mp, extlist,
+	    grow_len, numexts, ep)) == NULL) {
+		Free(mp);
+		return (-1);
+	}
+	Free(mp);
+
+	/* If running in dryrun mode (-n option), we're done here */
+	if ((options & MDCMD_DOIT) == 0) {
+		if (options & MDCMD_PRINT) {
+			(void) printf(dgettext(TEXT_DOMAIN,
+			    "%s: Soft Partition would grow\n"),
+			    np->cname);
+			(void) fflush(stdout);
+		}
+		return (0);
+	}
+
+	if (getenv(META_SP_DEBUG)) {
+		meta_sp_debug("meta_sp_attach: updated unit structure:\n");
+		meta_sp_printunit(new_un);
+	}
+
+	assert(new_un != NULL);
+
+	(void) memset(&grow_params, 0, sizeof (grow_params));
+	if (new_un->c.un_total_blocks > MD_MAX_BLKS_FOR_SMALL_DEVS) {
+		grow_params.options = MD_CRO_64BIT;
+		new_un->c.un_revision = MD_64BIT_META_DEV;
+	} else {
+		grow_params.options = MD_CRO_32BIT;
+		new_un->c.un_revision = MD_32BIT_META_DEV;
+	}
+	grow_params.mnum = MD_SID(new_un);
+	grow_params.size = new_un->c.un_size;
+	grow_params.mdp = (uintptr_t)new_un;
+	MD_SETDRIVERNAME(&grow_params, MD_SP, MD_MIN2SET(grow_params.mnum));
+
+	if (metaioctl(MD_IOCGROW, &grow_params, &grow_params.mde,
+	    np->cname) != 0) {
+		(void) mdstealerror(ep, &grow_params.mde);
+		return (-1);
+	}
+
+	/* update all watermarks */
+
+	if ((msp = meta_get_sp(sp, np, ep)) == NULL)
+		return (-1);
+	if (meta_sp_update_wm(sp, msp, extlist, ep) < 0)
+		return (-1);
+
+
+	/* second phase of commit, set status to MD_SP_OK */
+	if (meta_sp_setstatus(sp, &(MD_SID(new_un)), 1, MD_SP_OK, ep) < 0)
+		return (-1);
+
+	meta_invalidate_name(np);
+
+	if (options & MDCMD_PRINT) {
+		(void) printf(dgettext(TEXT_DOMAIN,
+		    "%s: Soft Partition has been grown\n"),
+		    np->cname);
+		(void) fflush(stdout);
+	}
+
+	return (0);
+}
+
+/*
+ * **************************************************************************
+ *                    Recovery (metarecover) Functions                      *
+ * **************************************************************************
+ */
+
+/*
+ * FUNCTION:	meta_recover_sp()
+ * INPUT:	sp	- the name of the set we are recovering on
+ *		compnp	- name pointer for device we are recovering on
+ *		argc	- argument count
+ *		argv	- left over arguments not parsed by metarecover command
+ *		options	- metarecover options
+ * OUTPUT:	ep	- return error pointer
+ * RETURNS:	int	- 0 - success, -1 - error
+ * PURPOSE:	parse soft partitioning-specific metarecover options and
+ *		dispatch to the appropriate function to handle recovery.
+ */
+int
+meta_recover_sp(
+	mdsetname_t	*sp,
+	mdname_t	*compnp,
+	int		argc,
+	char		*argv[],
+	mdcmdopts_t	options,
+	md_error_t	*ep
+)
+{
+	md_set_desc	*sd;
+
+	if (argc > 1) {
+		(void) meta_cook_syntax(ep, MDE_SYNTAX, compnp->cname,
+		    argc, argv);
+		return (-1);
+	}
+
+	/*
+	 * For a MN set, this operation must be performed on the master
+	 * as it is responsible for maintaining the watermarks
+	 */
+	if (!metaislocalset(sp)) {
+		if ((sd = metaget_setdesc(sp, ep)) == NULL)
+			return (-1);
+		if (MD_MNSET_DESC(sd) && !sd->sd_mn_am_i_master) {
+			(void) mddserror(ep, MDE_DS_MASTER_ONLY, sp->setno,
+			    sd->sd_mn_master_nodenm, NULL, NULL);
+			return (-1);
+		}
+	}
+	if (argc == 0) {
+		/*
+		 * if no additional arguments are passed, metarecover should
+		 * validate both on-disk and metadb structures as well as
+		 * checking that both are consistent with each other
+		 */
+		if (meta_sp_validate_wm(sp, compnp, options, ep) < 0)
+			return (-1);
+		if (meta_sp_validate_unit(sp, compnp, options, ep) < 0)
+			return (-1);
+		if (meta_sp_validate_wm_and_unit(sp, compnp, options, ep) < 0)
+			return (-1);
+	} else if (strcmp(argv[0], "-d") == 0) {
+		/*
+		 * Ensure that there is no existing valid record for this
+		 * soft-partition. If there is we have nothing to do.
+		 */
+		if (meta_sp_validate_unit(sp, compnp, options, ep) == 0)
+			return (-1);
+		/* validate and recover from on-disk structures */
+		if (meta_sp_validate_wm(sp, compnp, options, ep) < 0)
+			return (-1);
+		if (meta_sp_recover_from_wm(sp, compnp, options, ep) < 0)
+			return (-1);
+	} else if (strcmp(argv[0], "-m") == 0) {
+		/* validate and recover from metadb structures */
+		if (meta_sp_validate_unit(sp, compnp, options, ep) < 0)
+			return (-1);
+		if (meta_sp_recover_from_unit(sp, compnp, options, ep) < 0)
+			return (-1);
+	} else {
+		/* syntax error */
+		(void) meta_cook_syntax(ep, MDE_SYNTAX, compnp->cname,
+		    argc, argv);
+		return (-1);
+	}
+
+	return (0);
+}
+
+/*
+ * FUNCTION:	meta_sp_display_exthdr()
+ * INPUT:	none
+ * OUTPUT:	none
+ * RETURNS:	void
+ * PURPOSE:	print header line for sp_ext_node_t information.  to be used
+ *		in conjunction with meta_sp_display_ext().
+ */
+static void
+meta_sp_display_exthdr(void)
+{
+	(void) printf("%20s %5s %7s %20s %20s\n",
+	    dgettext(TEXT_DOMAIN, "Name"),
+	    dgettext(TEXT_DOMAIN, "Seq#"),
+	    dgettext(TEXT_DOMAIN, "Type"),
+	    dgettext(TEXT_DOMAIN, "Offset"),
+	    dgettext(TEXT_DOMAIN, "Length"));
+}
+
+
+/*
+ * FUNCTION:	meta_sp_display_ext()
+ * INPUT:	ext	- extent to display
+ * OUTPUT:	none
+ * RETURNS:	void
+ * PURPOSE:	print selected fields from sp_ext_node_t.
+ */
+static void
+meta_sp_display_ext(sp_ext_node_t *ext)
+{
+	/* print extent information */
+	if (ext->ext_namep != NULL)
+		(void) printf("%20s ", ext->ext_namep->cname);
+	else
+		(void) printf("%20s ", "NONE");
+
+	(void) printf("%5u ", ext->ext_seq);
+
+	switch (ext->ext_type) {
+	case EXTTYP_ALLOC:
+		(void) printf("%7s ", "ALLOC");
+		break;
+	case EXTTYP_FREE:
+		(void) printf("%7s ", "FREE");
+		break;
+	case EXTTYP_RESERVED:
+		(void) printf("%7s ", "RESV");
+		break;
+	case EXTTYP_END:
+		(void) printf("%7s ", "END");
+		break;
+	default:
+		(void) printf("%7s ", "INVLD");
+		break;
+	}
+
+	(void) printf("%20llu %20llu\n", ext->ext_offset, ext->ext_length);
+}
+
+
+/*
+ * FUNCTION:	meta_sp_checkseq()
+ * INPUT:	extlist	- list of extents to be checked
+ * OUTPUT:	none
+ * RETURNS:	int	- 0 - success, -1 - error
+ * PURPOSE:	check soft partition sequence numbers.  this function assumes
+ *		that a list of extents representing 1 or more soft partitions
+ *		is passed in sorted in sequence number order.  within a
+ *		single soft partition, there may not be any missing or
+ *		duplicate sequence numbers.
+ */
+static int
+meta_sp_checkseq(sp_ext_node_t *extlist)
+{
+	sp_ext_node_t *ext;
+
+	assert(extlist != NULL);
+
+	for (ext = extlist;
+	    ext->ext_next != NULL && ext->ext_next->ext_type == EXTTYP_ALLOC;
+	    ext = ext->ext_next) {
+		if (ext->ext_next->ext_namep != NULL &&
+		    strcmp(ext->ext_next->ext_namep->cname,
+			ext->ext_namep->cname) != 0)
+				continue;
+
+		if (ext->ext_next->ext_seq != ext->ext_seq + 1) {
+			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
+			    "%s: sequence numbers are "
+			    "incorrect: %d should be %d\n"),
+			    ext->ext_next->ext_namep->cname,
+			    ext->ext_next->ext_seq, ext->ext_seq + 1);
+			return (-1);
+		}
+	}
+	return (0);
+}
+
+
+/*
+ * FUNCTION:	meta_sp_resolve_name_conflict()
+ * INPUT:	sp	- name of set we're are recovering in.
+ *		old_np	- name pointer of soft partition we found on disk.
+ * OUTPUT:	new_np	- name pointer for new soft partition name.
+ *		ep	- error pointer returned.
+ * RETURNS:	int	- 0 - name not replace, 1 - name replaced, -1 - error
+ * PURPOSE:	Check to see if the name of one of the soft partitions we found
+ *		on disk already exists in the metadb.  If so, prompt for a new
+ *		name.  In addition, we keep a static array of names that
+ *		will be recovered from this device since these names don't
+ *		exist in the configuration at this point but cannot be
+ *		recovered more than once.
+ */
+static int
+meta_sp_resolve_name_conflict(
+	mdsetname_t	*sp,
+	mdname_t	*old_np,
+	mdname_t	**new_np,
+	md_error_t	*ep
+)
+{
+	char		yesno[255];
+	char		*yes;
+	char		newname[MD_SP_MAX_DEVNAME_PLUS_1];
+	int		nunits;
+	static int	*used_names = NULL;
+
+	assert(old_np != NULL);
+
+	if (used_names == NULL) {
+		if ((nunits = meta_get_nunits(ep)) < 0)
+			return (-1);
+		used_names = Zalloc(nunits * sizeof (int));
+	}
+
+	/* see if it exists already */
+	if (used_names[MD_MIN2UNIT(meta_getminor(old_np->dev))] == 0 &&
+	    metagetmiscname(old_np, ep) == NULL) {
+		if (! mdismderror(ep, MDE_UNIT_NOT_SETUP))
+			return (-1);
+		else {
+			used_names[MD_MIN2UNIT(meta_getminor(old_np->dev))] = 1;
+			mdclrerror(ep);
+			return (0);
+		}
+	}
+
+	/* name exists, ask the user for a new one */
+	(void) printf(dgettext(TEXT_DOMAIN,
+	    "WARNING: A soft partition named %s was found in the extent\n"
+	    "headers, but this name already exists in the metadb "
+	    "configuration.\n"
+	    "In order to continue recovery you must supply\n"
+	    "a new name for this soft partition.\n"), old_np->cname);
+	(void) printf(dgettext(TEXT_DOMAIN,
+	    "Would you like to continue and supply a new name? (yes/no) "));
+
+	(void) fflush(stdout);
+	if ((fgets(yesno, sizeof (yesno), stdin) == NULL) ||
+	    (strlen(yesno) == 1))
+		(void) snprintf(yesno, sizeof (yesno), "%s\n",
+		    dgettext(TEXT_DOMAIN, "no"));
+	yes = dgettext(TEXT_DOMAIN, "yes");
+	if (strncasecmp(yesno, yes, strlen(yesno) - 1) != 0) {
+		return (-1);
+	}
+
+	(void) fflush(stdin);
+
+	/* get the new name */
+	for (;;) {
+		(void) printf(dgettext(TEXT_DOMAIN, "Please enter a new name "
+		    "for this soft partition (dXXXX) "));
+		(void) fflush(stdout);
+		if (fgets(newname, MD_SP_MAX_DEVNAME_PLUS_1, stdin) == NULL)
+			(void) strcpy(newname, "");
+
+		/* remove newline character */
+		if (newname[strlen(newname) - 1] == '\n')
+			newname[strlen(newname) - 1] = '\0';
+
+		if (!(is_metaname(newname)) ||
+		    (meta_init_make_device(&sp, newname, ep) != 0)) {
+			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
+			    "Invalid metadevice name\n"));
+			(void) fflush(stderr);
+			continue;
+		}
+
+		if ((*new_np = metaname(&sp, newname, ep)) == NULL) {
+			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
+			    "Invalid metadevice name\n"));
+			(void) fflush(stderr);
+			continue;
+		}
+
+		assert(MD_MIN2UNIT(meta_getminor((*new_np)->dev)) < nunits);
+		/* make sure the name isn't already being used */
+		if (used_names[MD_MIN2UNIT(meta_getminor((*new_np)->dev))] ||
+		    metagetmiscname(*new_np, ep) != NULL) {
+			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
+			    "That name already exists\n"));
+			continue;
+		} else if (! mdismderror(ep, MDE_UNIT_NOT_SETUP))
+			return (-1);
+
+		break;
+	}
+
+	/* got a new name, place in used array and return */
+	used_names[MD_MIN2UNIT(meta_getminor((*new_np)->dev))] = 1;
+	mdclrerror(ep);
+	return (1);
+}
+
+/*
+ * FUNCTION:	meta_sp_validate_wm()
+ * INPUT:	sp	- set name we are recovering in
+ *		compnp	- name pointer for device we are recovering from
+ *		options	- metarecover options
+ * OUTPUT:	ep	- error pointer returned
+ * RETURNS:	int	- 0 - success, -1 - error
+ * PURPOSE:	validate and display watermark configuration.  walk the
+ *		on-disk watermark structures and validate the information
+ *		found within.  since a watermark configuration is
+ *		"self-defining", the act of traversing the watermarks
+ *		is part of the validation process.
+ */
+static int
+meta_sp_validate_wm(
+	mdsetname_t	*sp,
+	mdname_t	*compnp,
+	mdcmdopts_t	options,
+	md_error_t	*ep
+)
+{
+	sp_ext_node_t	*extlist = NULL;
+	sp_ext_node_t	*ext;
+	int		num_sps = 0;
+	int		rval;
+
+	if ((options & MDCMD_VERBOSE) != 0)
+		(void) printf(dgettext(TEXT_DOMAIN,
+		    "Verifying on-disk structures on %s.\n"),
+		    compnp->cname);
+
+	/*
+	 * for each watermark, build an ext_node, place on list.
+	 */
+	rval = meta_sp_extlist_from_wm(sp, compnp, &extlist,
+	    meta_sp_cmp_by_nameseq, ep);
+
+	if ((options & MDCMD_VERBOSE) != 0) {
+		/* print out what we found */
+		if (extlist == NULL)
+			(void) printf(dgettext(TEXT_DOMAIN,
+			    "No extent headers found on %s.\n"),
+			    compnp->cname);
+		else {
+			(void) printf(dgettext(TEXT_DOMAIN,
+			    "The following extent headers were found on %s.\n"),
+			    compnp->cname);
+			meta_sp_display_exthdr();
+		}
+		for (ext = extlist; ext != NULL; ext = ext->ext_next)
+			meta_sp_display_ext(ext);
+	}
+
+	if (rval < 0) {
+		(void) printf(dgettext(TEXT_DOMAIN,
+		    "%s: On-disk structures invalid or "
+		    "no soft partitions found.\n"),
+		    compnp->cname);
+		return (-1);
+	}
+
+	assert(extlist != NULL);
+
+	/* count number of soft partitions */
+	for (ext = extlist;
+	    ext != NULL && ext->ext_type == EXTTYP_ALLOC;
+	    ext = ext->ext_next) {
+		if (ext->ext_next != NULL &&
+		    ext->ext_next->ext_namep != NULL &&
+		    strcmp(ext->ext_next->ext_namep->cname,
+			ext->ext_namep->cname) == 0)
+				continue;
+		num_sps++;
+	}
+
+	if ((options & MDCMD_VERBOSE) != 0)
+		(void) printf(dgettext(TEXT_DOMAIN,
+		    "Found %d soft partition(s) on %s.\n"), num_sps,
+		    compnp->cname);
+
+	if (num_sps == 0) {
+		(void) printf(dgettext(TEXT_DOMAIN,
+		    "%s: No soft partitions.\n"), compnp->cname);
+		return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname));
+	}
+
+	/* check sequence numbers */
+	if ((options & MDCMD_VERBOSE) != 0)
+		(void) printf(dgettext(TEXT_DOMAIN,
+		    "Checking sequence numbers.\n"));
+
+	if (meta_sp_checkseq(extlist) != 0)
+		return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname));
+
+	return (0);
+}
+
+/*
+ * FUNCTION:	meta_sp_validate_unit()
+ * INPUT:	sp	- name of set we are recovering in
+ *		compnp	- name of component we are recovering from
+ *		options	- metarecover options
+ * OUTPUT:	ep	- error pointer returned
+ * RETURNS:	int	- 0 - success, -1 - error
+ * PURPOSE:	validate and display metadb configuration.  begin by getting
+ *		all soft partitions built on the specified component.  get
+ *		the unit structure for each one and validate the fields within.
+ */
+static int
+meta_sp_validate_unit(
+	mdsetname_t	*sp,
+	mdname_t	*compnp,
+	mdcmdopts_t	options,
+	md_error_t	*ep
+)
+{
+	md_sp_t		*msp;
+	mdnamelist_t	*spnlp = NULL;
+	mdnamelist_t	*namep = NULL;
+	int		count;
+	uint_t		extn;
+	sp_ext_length_t	size;
+
+	if ((options & MDCMD_VERBOSE) != 0)
+		(void) printf(dgettext(TEXT_DOMAIN,
+		    "%s: Validating soft partition metadb entries.\n"),
+		    compnp->cname);
+
+	if ((size = metagetsize(compnp, ep)) == MD_DISKADDR_ERROR)
+		return (-1);
+
+	/* get all soft partitions on component */
+	count = meta_sp_get_by_component(sp, compnp, &spnlp, 0, ep);
+
+	if (count == 0) {
+		(void) printf(dgettext(TEXT_DOMAIN,
+		    "%s: No soft partitions.\n"), compnp->cname);
+		return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname));
+	} else if (count < 0) {
+		return (-1);
+	}
+
+	/* Now go through the soft partitions and check each one */
+	for (namep = spnlp; namep != NULL; namep = namep->next) {
+		mdname_t	*curnp = namep->namep;
+		sp_ext_offset_t	curvoff;
+
+		/* get the unit structure */
+		if ((msp = meta_get_sp_common(sp, curnp, 0, ep)) == NULL)
+			return (-1);
+
+		/* verify generic unit structure parameters */
+		if ((options & MDCMD_VERBOSE) != 0)
+			(void) printf(dgettext(TEXT_DOMAIN,
+			    "\nVerifying device %s.\n"),
+			    curnp->cname);
+
+		/*
+		 * MD_SP_LAST is an invalid state and is always the
+		 * highest numbered.
+		 */
+		if (msp->status >= MD_SP_LAST) {
+			(void) printf(dgettext(TEXT_DOMAIN,
+			    "%s: status value %u is out of range.\n"),
+			    curnp->cname, msp->status);
+			return (mdmderror(ep, MDE_RECOVER_FAILED,
+			    0, curnp->cname));
+		} else if ((options & MDCMD_VERBOSE) != 0) {
+			uint_t	tstate = 0;
+
+			if (metaismeta(msp->compnamep)) {
+				if (meta_get_tstate(msp->common.namep->dev,
+				    &tstate, ep) != 0)
+					return (-1);
+			}
+			(void) printf(dgettext(TEXT_DOMAIN,
+			    "%s: Status \"%s\" is valid.\n"),
+			    curnp->cname, meta_sp_status_to_name(msp->status,
+			    tstate & MD_DEV_ERRORED));
+		}
+
+		/* Now verify each extent */
+		if ((options & MDCMD_VERBOSE) != 0)
+			(void) printf("%14s %21s %21s %21s\n",
+			    dgettext(TEXT_DOMAIN, "Extent Number"),
+			    dgettext(TEXT_DOMAIN, "Virtual Offset"),
+			    dgettext(TEXT_DOMAIN, "Physical Offset"),
+			    dgettext(TEXT_DOMAIN, "Length"));
+
+		curvoff = 0ULL;
+		for (extn = 0; extn < msp->ext.ext_len; extn++) {
+			md_sp_ext_t	*extp = &msp->ext.ext_val[extn];
+
+			if ((options & MDCMD_VERBOSE) != 0)
+				(void) printf("%14u %21llu %21llu %21llu\n",
+				    extn, extp->voff, extp->poff, extp->len);
+
+			if (extp->voff != curvoff) {
+				(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
+				    "%s: virtual offset for extent %u "
+				    "is inconsistent, expected %llu, "
+				    "got %llu.\n"), curnp->cname, extn,
+				    curvoff, extp->voff);
+				return (mdmderror(ep, MDE_RECOVER_FAILED,
+				    0, compnp->cname));
+			}
+
+			/* make sure extent does not drop off the end */
+			if ((extp->poff + extp->len) == size) {
+				(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
+				    "%s: extent %u at offset %llu, "
+				    "length %llu exceeds the size of the "
+				    "device, %llu.\n"), curnp->cname,
+				    extn, extp->poff, extp->len, size);
+				return (mdmderror(ep, MDE_RECOVER_FAILED,
+				    0, compnp->cname));
+			}
+
+			curvoff += extp->len;
+		}
+	}
+	if (options & MDCMD_PRINT) {
+		(void) printf(dgettext(TEXT_DOMAIN,
+		    "%s: Soft Partition metadb configuration is valid\n"),
+		    compnp->cname);
+	}
+	return (0);
+}
+
+/*
+ * FUNCTION:	meta_sp_validate_wm_and_unit()
+ * INPUT:	sp	- name of set we are recovering in
+ *		compnp	- name of device we are recovering from
+ *		options	- metarecover options
+ * OUTPUT:	ep	- error pointer returned
+ * RETURNS:	int	- 0 - success, -1 error
+ * PURPOSE:	cross-validate and display watermarks and metadb records.
+ *		get both the unit structures for the soft partitions built
+ *		on the specified component and the watermarks found on that
+ *		component and check to make sure they are consistent with
+ *		each other.
+ */
+static int
+meta_sp_validate_wm_and_unit(
+	mdsetname_t	*sp,
+	mdname_t	*np,
+	mdcmdopts_t	options,
+	md_error_t	*ep
+)
+{
+	sp_ext_node_t	*wmlist = NULL;
+	sp_ext_node_t	*unitlist = NULL;
+	sp_ext_node_t	*unitext;
+	sp_ext_node_t	*wmext;
+	sp_ext_offset_t	tmpunitoff;
+	mdnamelist_t	*spnlp = NULL;
+	int		count;
+	int		rval = 0;
+	int		verbose = (options & MDCMD_VERBOSE);
+
+	/* get unit structure list */
+	count = meta_sp_get_by_component(sp, np, &spnlp, 0, ep);
+	if (count <= 0)
+		return (-1);
+
+	meta_sp_list_insert(NULL, NULL, &unitlist,
+	    metagetsize(np, ep) - MD_SP_WMSIZE, MD_SP_WMSIZE,
+	    EXTTYP_END, 0, EXTFLG_UPDATE, meta_sp_cmp_by_offset);
+
+	if (meta_sp_extlist_from_namelist(sp, spnlp, &unitlist, ep) == -1) {
+		metafreenamelist(spnlp);
+		return (-1);
+	}
+
+	metafreenamelist(spnlp);
+
+	meta_sp_list_freefill(&unitlist, metagetsize(np, ep));
+
+	if (meta_sp_extlist_from_wm(sp, np, &wmlist,
+	    meta_sp_cmp_by_offset, ep) < 0) {
+		meta_sp_list_free(&unitlist);
+		return (-1);
+	}
+
+	if (getenv(META_SP_DEBUG)) {
+		meta_sp_debug("meta_sp_validate_wm_and_unit: unit list:\n");
+		meta_sp_list_dump(unitlist);
+		meta_sp_debug("meta_sp_validate_wm_and_unit: wm list:\n");
+		meta_sp_list_dump(wmlist);
+	}
+
+	/*
+	 * step through both lists and compare allocated nodes.  Free
+	 * nodes and end watermarks may differ between the two but
+	 * that's generally ok, and if they're wrong will typically
+	 * cause misplaced allocated extents.
+	 */
+	if (verbose)
+		(void) printf(dgettext(TEXT_DOMAIN, "\n%s: Verifying metadb "
+		    "allocations match extent headers.\n"), np->cname);
+
+	unitext = unitlist;
+	wmext = wmlist;
+	while ((wmext != NULL) && (unitext != NULL)) {
+		/* find next allocated extents in each list */
+		while (wmext != NULL && wmext->ext_type != EXTTYP_ALLOC)
+			wmext = wmext->ext_next;
+
+		while (unitext != NULL && unitext->ext_type != EXTTYP_ALLOC)
+			unitext = unitext->ext_next;
+
+		if (wmext == NULL || unitext == NULL)
+			break;
+
+		if (verbose) {
+			(void) printf(dgettext(TEXT_DOMAIN,
+			    "Metadb extent:\n"));
+			meta_sp_display_exthdr();
+			meta_sp_display_ext(unitext);
+			(void) printf(dgettext(TEXT_DOMAIN,
+			    "Extent header extent:\n"));
+			meta_sp_display_exthdr();
+			meta_sp_display_ext(wmext);
+			(void) printf("\n");
+		}
+
+		if (meta_sp_validate_exts(np, wmext, unitext, ep) < 0)
+			rval = -1;
+
+		/*
+		 * if the offsets aren't equal, only increment the
+		 * lowest one in hopes of getting the lists back in sync.
+		 */
+		tmpunitoff = unitext->ext_offset;
+		if (unitext->ext_offset <= wmext->ext_offset)
+			unitext = unitext->ext_next;
+		if (wmext->ext_offset <= tmpunitoff)
+			wmext = wmext->ext_next;
+	}
+
+	/*
+	 * if both lists aren't at the end then there are extra
+	 * allocated nodes in one of them.
+	 */
+	if (wmext != NULL) {
+		(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
+		    "%s: extent headers contain allocations not in "
+		    "the metadb\n\n"), np->cname);
+		rval = -1;
+	}
+
+	if (unitext != NULL) {
+		(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
+		    "%s: metadb contains allocations not in the extent "
+		    "headers\n\n"), np->cname);
+		rval = -1;
+	}
+
+	if (options & MDCMD_PRINT) {
+		if (rval == 0) {
+			(void) printf(dgettext(TEXT_DOMAIN,
+			    "%s: Soft Partition metadb matches extent "
+			    "header configuration\n"), np->cname);
+		} else {
+			(void) printf(dgettext(TEXT_DOMAIN,
+			    "%s: Soft Partition metadb does not match extent "
+			    "header configuration\n"), np->cname);
+		}
+	}
+
+	return (rval);
+}
+
+/*
+ * FUNCTION:	meta_sp_validate_exts()
+ * INPUT:	compnp	- name pointer for device we are recovering from
+ *		wmext	- extent node representing watermark
+ *		unitext	- extent node from unit structure
+ * OUTPUT:	ep	- return error pointer
+ * RETURNS:	int	- 0 - succes, mdmderror return code - error
+ * PURPOSE:	Takes two extent nodes and checks them against each other.
+ *		offset, length, sequence number, set, and name are compared.
+ */
+static int
+meta_sp_validate_exts(
+	mdname_t	*compnp,
+	sp_ext_node_t	*wmext,
+	sp_ext_node_t	*unitext,
+	md_error_t	*ep
+)
+{
+	if (wmext->ext_offset != unitext->ext_offset) {
+		(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
+		    "%s: unit structure and extent header offsets differ.\n"),
+		    compnp->cname);
+		return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname));
+	}
+
+	if (wmext->ext_length != unitext->ext_length) {
+		(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
+		    "%s: unit structure and extent header lengths differ.\n"),
+		    compnp->cname);
+		return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname));
+	}
+
+	if (wmext->ext_seq != unitext->ext_seq) {
+		(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
+		    "%s: unit structure and extent header sequence numbers "
+		    "differ.\n"), compnp->cname);
+		return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname));
+	}
+
+	if (wmext->ext_type != unitext->ext_type) {
+		(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
+		    "%s: unit structure and extent header types differ.\n"),
+		    compnp->cname);
+		return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname));
+	}
+
+	/*
+	 * If one has a set pointer and the other doesn't, error.
+	 * If both extents have setnames, then make sure they match
+	 * If both are NULL, it's ok, they match.
+	 */
+	if ((unitext->ext_setp == NULL) ^ (wmext->ext_setp == NULL)) {
+		(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
+		    "%s: unit structure and extent header set values "
+		    "differ.\n"), compnp->cname);
+		return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname));
+	}
+
+	if (unitext->ext_setp != NULL) {
+		if (strcmp(unitext->ext_setp->setname,
+		    wmext->ext_setp->setname) != 0) {
+			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
+			    "%s: unit structure and extent header set names "
+			    "differ.\n"), compnp->cname);
+			return (mdmderror(ep, MDE_RECOVER_FAILED,
+			    0, compnp->cname));
+		}
+	}
+
+	/*
+	 * If one has a name pointer and the other doesn't, error.
+	 * If both extents have names, then make sure they match
+	 * If both are NULL, it's ok, they match.
+	 */
+	if ((unitext->ext_namep == NULL) ^ (wmext->ext_namep == NULL)) {
+		(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
+		    "%s: unit structure and extent header name values "
+		    "differ.\n"), compnp->cname);
+		return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname));
+	}
+
+	if (unitext->ext_namep != NULL) {
+		if (strcmp(wmext->ext_namep->cname,
+		    unitext->ext_namep->cname) != 0) {
+			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
+			    "%s: unit structure and extent header names "
+			    "differ.\n"), compnp->cname);
+			return (mdmderror(ep, MDE_RECOVER_FAILED,
+			    0, compnp->cname));
+		}
+	}
+
+	return (0);
+}
+
+/*
+ * FUNCTION:	update_sp_status()
+ * INPUT:	sp	- name of set we are recovering in
+ *		minors	- pointer to an array of soft partition minor numbers
+ *		num_sps	- number of minor numbers in array
+ *		status	- new status to be applied to all soft parts in array
+ *		mn_set	- set if current set is a multi-node set
+ * OUTPUT:	ep	- return error pointer
+ * RETURNS:	int	- 0 - success, -1 - error
+ * PURPOSE:	update  status of soft partitions to new status. minors is an
+ *		array of minor numbers to apply the new status to.
+ *		If mn_set is set, a message is sent to all nodes in the
+ *		cluster to update the status locally.
+ */
+static int
+update_sp_status(
+	mdsetname_t	*sp,
+	minor_t		*minors,
+	int		num_sps,
+	sp_status_t	status,
+	bool_t		mn_set,
+	md_error_t	*ep
+)
+{
+	int	i;
+	int	err = 0;
+
+	if (mn_set) {
+		md_mn_msg_sp_setstat_t	sp_setstat_params;
+		int			result;
+		md_mn_result_t		*resp = NULL;
+
+		for (i = 0; i < num_sps; i++) {
+			sp_setstat_params.sp_setstat_mnum = minors[i];
+			sp_setstat_params.sp_setstat_status = status;
+
+			result = mdmn_send_message(sp->setno,
+			    MD_MN_MSG_SP_SETSTAT, MD_MSGF_DEFAULT_FLAGS,
+			    (char *)&sp_setstat_params,
+			    sizeof (sp_setstat_params),
+			    &resp, ep);
+			if (resp != NULL) {
+				if (resp->mmr_exitval != 0)
+					err = -1;
+				free_result(resp);
+			}
+			if (result != 0) {
+				err = -1;
+			}
+		}
+	} else {
+		if (meta_sp_setstatus(sp, minors, num_sps, status, ep) < 0)
+			err = -1;
+	}
+	if (err < 0) {
+		(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
+		    "Error updating status on recovered soft "
+		    "partitions.\n"));
+	}
+	return (err);
+}
+
+/*
+ * FUNCTION:	meta_sp_recover_from_wm()
+ * INPUT:	sp	- name of set we are recovering in
+ *		compnp	- name pointer for component we are recovering from
+ *		options	- metarecover options
+ * OUTPUT:	ep	- return error pointer
+ * RETURNS:	int	- 0 - success, -1 - error
+ * PURPOSE:	update metadb records to match watermarks.  begin by getting
+ *		an extlist representing all soft partitions on the component.
+ *		then build a unit structure for each soft partition.
+ *		notify user of changes, then commit each soft partition to
+ *		the metadb one at a time in the "recovering" state.  update
+ *		any watermarks that may need it	(to reflect possible name
+ *		changes), and, finally, set the status of all recovered
+ *		partitions to the "OK" state at once.
+ */
+static int
+meta_sp_recover_from_wm(
+	mdsetname_t	*sp,
+	mdname_t	*compnp,
+	mdcmdopts_t	options,
+	md_error_t	*ep
+)
+{
+	sp_ext_node_t		*extlist = NULL;
+	sp_ext_node_t		*sp_list = NULL;
+	sp_ext_node_t		*update_list = NULL;
+	sp_ext_node_t		*ext;
+	sp_ext_node_t		*sp_ext;
+	mp_unit_t		*mp;
+	mp_unit_t		**un_array;
+	int			numexts = 0, num_sps = 0, i = 0;
+	int			err = 0;
+	int			not_recovered = 0;
+	int			committed = 0;
+	sp_ext_length_t		sp_length = 0LL;
+	mdnamelist_t		*keynlp = NULL;
+	mdname_t		*np;
+	mdname_t		*new_np;
+	int			new_name;
+	md_set_params_t		set_params;
+	minor_t			*minors = NULL;
+	char			yesno[255];
+	char			*yes;
+	bool_t			mn_set = 0;
+	md_set_desc		*sd;
+	mm_unit_t		*mm;
+	md_set_mmown_params_t	*ownpar = NULL;
+	int			comp_is_mirror = 0;
+
+	/*
+	 * if this component appears in another metadevice already, do
+	 * NOT recover from it.
+	 */
+	if (meta_check_inmeta(sp, compnp, options, 0, -1, ep) != 0)
+		return (-1);
+
+	/* set flag if dealing with a MN set */
+	if (!metaislocalset(sp)) {
+		if ((sd = metaget_setdesc(sp, ep)) == NULL) {
+			return (-1);
+		}
+		if (MD_MNSET_DESC(sd))
+			mn_set = 1;
+	}
+	/*
+	 * for each watermark, build an ext_node, place on list.
+	 */
+	if (meta_sp_extlist_from_wm(sp, compnp, &extlist,
+	    meta_sp_cmp_by_nameseq, ep) < 0)
+		return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname));
+
+	assert(extlist != NULL);
+
+	/* count number of soft partitions */
+	for (ext = extlist;
+	    ext != NULL && ext->ext_type == EXTTYP_ALLOC;
+	    ext = ext->ext_next) {
+		if (ext->ext_next != NULL &&
+		    ext->ext_next->ext_namep != NULL &&
+		    strcmp(ext->ext_next->ext_namep->cname,
+			ext->ext_namep->cname) == 0)
+				continue;
+		num_sps++;
+	}
+
+	/* allocate array of unit structure pointers */
+	un_array = Zalloc(num_sps * sizeof (mp_unit_t *));
+
+	/*
+	 * build unit structures from list of ext_nodes.
+	 */
+	for (ext = extlist;
+	    ext != NULL && ext->ext_type == EXTTYP_ALLOC;
+	    ext = ext->ext_next) {
+		meta_sp_list_insert(ext->ext_setp, ext->ext_namep,
+		    &sp_list, ext->ext_offset, ext->ext_length,
+		    ext->ext_type, ext->ext_seq, ext->ext_flags,
+		    meta_sp_cmp_by_nameseq);
+
+		numexts++;
+		sp_length += ext->ext_length - MD_SP_WMSIZE;
+
+		if (ext->ext_next != NULL &&
+		    ext->ext_next->ext_namep != NULL &&
+		    strcmp(ext->ext_next->ext_namep->cname,
+			ext->ext_namep->cname) == 0)
+				continue;
+
+		/*
+		 * if we made it here, we are at a soft partition
+		 * boundary in the list.
+		 */
+		if (getenv(META_SP_DEBUG)) {
+			meta_sp_debug("meta_recover_from_wm: dumping wm "
+			    "list:\n");
+			meta_sp_list_dump(sp_list);
+		}
+
+		assert(sp_list != NULL);
+		assert(sp_list->ext_namep != NULL);
+
+		if ((new_name = meta_sp_resolve_name_conflict(sp,
+		    sp_list->ext_namep, &new_np, ep)) < 0) {
+			err = 1;
+			goto out;
+		} else if (new_name) {
+			for (sp_ext = sp_list;
+			    sp_ext != NULL;
+			    sp_ext = sp_ext->ext_next) {
+				/*
+				 * insert into the update list for
+				 * watermark update.
+				 */
+				meta_sp_list_insert(sp_ext->ext_setp,
+				    new_np, &update_list, sp_ext->ext_offset,
+				    sp_ext->ext_length, sp_ext->ext_type,
+				    sp_ext->ext_seq, EXTFLG_UPDATE,
+				    meta_sp_cmp_by_offset);
+			}
+
+		}
+		if (options & MDCMD_DOIT) {
+			/* store name in namespace */
+			if (mn_set) {
+				/* send message to all nodes to return key */
+				md_mn_msg_addkeyname_t	*send_params;
+				int			result;
+				md_mn_result_t		*resp = NULL;
+				int			message_size;
+
+				message_size =  sizeof (*send_params) +
+				    strlen(compnp->cname) + 1;
+				send_params = Zalloc(message_size);
+				send_params->addkeyname_setno = sp->setno;
+				(void) strcpy(&send_params->addkeyname_name[0],
+				    compnp->cname);
+				result = mdmn_send_message(sp->setno,
+				    MD_MN_MSG_ADDKEYNAME, MD_MSGF_DEFAULT_FLAGS,
+				    (char *)send_params, message_size, &resp,
+				    ep);
+				Free(send_params);
+				if (resp != NULL) {
+					if (resp->mmr_exitval >= 0) {
+						compnp->key =
+						    (mdkey_t)resp->mmr_exitval;
+					} else {
+						err = 1;
+						free_result(resp);
+						goto out;
+					}
+					free_result(resp);
+				}
+				if (result != 0) {
+					err = 1;
+					goto out;
+				}
+				(void) metanamelist_append(&keynlp, compnp);
+			} else {
+				if (add_key_name(sp, compnp, &keynlp,
+				    ep) != 0) {
+					err = 1;
+					goto out;
+				}
+			}
+		}
+
+		/* create the unit structure */
+		if ((mp = meta_sp_createunit(
+		    (new_name) ? new_np : sp_list->ext_namep, compnp,
+		    sp_list, numexts, sp_length, MD_SP_RECOVER, ep)) == NULL) {
+			err = 1;
+			goto out;
+		}
+
+		if (getenv(META_SP_DEBUG)) {
+			meta_sp_debug("meta_sp_recover_from_wm: "
+			    "printing newly created unit structure");
+			meta_sp_printunit(mp);
+		}
+
+		/* place in unit structure array */
+		un_array[i++] = mp;
+
+		/* free sp_list */
+		meta_sp_list_free(&sp_list);
+		sp_list = NULL;
+		numexts = 0;
+		sp_length = 0LL;
+	}
+
+	/* display configuration updates */
+	(void) printf(dgettext(TEXT_DOMAIN,
+	    "The following soft partitions were found and will be added to\n"
+	    "your metadevice configuration.\n"));
+	(void) printf("%5s %15s %18s\n",
+	    dgettext(TEXT_DOMAIN, "Name"),
+	    dgettext(TEXT_DOMAIN, "Size"),
+	    dgettext(TEXT_DOMAIN, "No. of Extents"));
+	for (i = 0; i < num_sps; i++) {
+		(void) printf("%5s%lu %15llu %9d\n", "d",
+		    MD_MIN2UNIT(MD_SID(un_array[i])),
+		    un_array[i]->un_length, un_array[i]->un_numexts);
+	}
+
+	if (!(options & MDCMD_DOIT)) {
+		not_recovered = 1;
+		goto out;
+	}
+
+	/* ask user for confirmation */
+	(void) printf(dgettext(TEXT_DOMAIN,
+	    "WARNING: You are about to add one or more soft partition\n"
+	    "metadevices to your metadevice configuration.  If there\n"
+	    "appears to be an error in the soft partition(s) displayed\n"
+	    "above, do NOT proceed with this recovery operation.\n"));
+	(void) printf(dgettext(TEXT_DOMAIN,
+	    "Are you sure you want to do this (yes/no)? "));
+
+	(void) fflush(stdout);
+	if ((fgets(yesno, sizeof (yesno), stdin) == NULL) ||
+	    (strlen(yesno) == 1))
+		(void) snprintf(yesno, sizeof (yesno), "%s\n",
+		    dgettext(TEXT_DOMAIN, "no"));
+	yes = dgettext(TEXT_DOMAIN, "yes");
+	if (strncasecmp(yesno, yes, strlen(yesno) - 1) != 0) {
+		not_recovered = 1;
+		goto out;
+	}
+
+	/* commit records one at a time */
+	for (i = 0; i < num_sps; i++) {
+		(void) memset(&set_params, 0, sizeof (set_params));
+		set_params.mnum = MD_SID(un_array[i]);
+		set_params.size = (un_array[i])->c.un_size;
+		set_params.mdp = (uintptr_t)(un_array[i]);
+		set_params.options =
+				meta_check_devicesize(un_array[i]->un_length);
+		if (set_params.options == MD_CRO_64BIT) {
+			un_array[i]->c.un_revision = MD_64BIT_META_DEV;
+		} else {
+			un_array[i]->c.un_revision = MD_32BIT_META_DEV;
+		}
+		MD_SETDRIVERNAME(&set_params, MD_SP,
+		    MD_MIN2SET(set_params.mnum));
+
+		np = metamnumname(&sp, MD_SID(un_array[i]), 0, ep);
+
+		/*
+		 * If this is an MN set, send the MD_IOCSET ioctl to all nodes
+		 */
+		if (mn_set) {
+			md_mn_msg_iocset_t	send_params;
+			int			result;
+			md_mn_result_t		*resp = NULL;
+			int			mess_size;
+
+			/*
+			 * Calculate message size. md_mn_msg_iocset_t only
+			 * contains one extent, so increment the size to
+			 * include all extents
+			 */
+			mess_size = sizeof (send_params) -
+			    sizeof (mp_ext_t) +
+			    (un_array[i]->un_numexts * sizeof (mp_ext_t));
+
+			send_params.iocset_params = set_params;
+			(void) memcpy(&send_params.unit, un_array[i],
+			    sizeof (*un_array[i]) - sizeof (mp_ext_t) +
+			    (un_array[i]->un_numexts * sizeof (mp_ext_t)));
+			result = mdmn_send_message(sp->setno,
+			    MD_MN_MSG_IOCSET, MD_MSGF_DEFAULT_FLAGS,
+			    (char *)&send_params, mess_size, &resp,
+			    ep);
+			if (resp != NULL) {
+				if (resp->mmr_exitval != 0)
+					err = 1;
+				free_result(resp);
+			}
+			if (result != 0) {
+				err = 1;
+			}
+		} else {
+			if (metaioctl(MD_IOCSET, &set_params, &set_params.mde,
+			    np->cname) != 0) {
+				err = 1;
+			}
+		}
+
+		if (err == 1) {
+			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
+			    "%s: Error committing record to metadb.\n"),
+			    np->cname);
+			goto out;
+		}
+
+		/* note that we've committed a record */
+		if (!committed)
+			committed = 1;
+
+		/* update any watermarks that need it */
+		if (update_list != NULL) {
+			md_sp_t *msp;
+
+			/*
+			 * Check to see if we're trying to create a partition
+			 * on a mirror. If so we may have to enforce an
+			 * ownership change before writing the watermark out.
+			 */
+			if (metaismeta(compnp)) {
+				char *miscname;
+
+				miscname = metagetmiscname(compnp, ep);
+				if (miscname != NULL)
+					comp_is_mirror = (strcmp(miscname,
+					    MD_MIRROR) == 0);
+				else
+					comp_is_mirror = 0;
+			}
+			/*
+			 * If this is a MN set and the component is a mirror,
+			 * change ownership to this node in order to write the
+			 * watermarks
+			 */
+			if (mn_set && comp_is_mirror) {
+				mm = (mm_unit_t *)meta_get_unit(sp, compnp, ep);
+				if (mm == NULL) {
+					err = 1;
+					goto out;
+				} else {
+					err = meta_mn_change_owner(&ownpar,
+						sp->setno,
+						meta_getminor(compnp->dev),
+						sd->sd_mn_mynode->nd_nodeid,
+						MD_MN_MM_PREVENT_CHANGE |
+						    MD_MN_MM_SPAWN_THREAD);
+					if (err != 0)
+						goto out;
+				}
+			}
+
+			if ((msp = meta_get_sp(sp, np, ep)) == NULL) {
+				err = 1;
+				(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
+				    "%s: Error updating extent headers.\n"),
+				    np->cname);
+				goto out;
+			}
+			if (meta_sp_update_wm(sp, msp, update_list, ep) < 0) {
+				err = 1;
+				(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
+				    "%s: Error updating extent headers "
+				    "on disk.\n"), np->cname);
+				goto out;
+			}
+		}
+		/*
+		 * If we have changed ownership earlier and prevented any
+		 * ownership changes, we can now allow ownership changes
+		 * again.
+		 */
+		if (ownpar) {
+			(void) meta_mn_change_owner(&ownpar, sp->setno,
+			    ownpar->d.mnum,
+			    ownpar->d.owner,
+			    MD_MN_MM_ALLOW_CHANGE | MD_MN_MM_SPAWN_THREAD);
+		}
+	}
+
+	/* update status of all soft partitions to OK */
+	minors = Zalloc(num_sps * sizeof (minor_t));
+	for (i = 0; i < num_sps; i++)
+		minors[i] = MD_SID(un_array[i]);
+
+	err = update_sp_status(sp, minors, num_sps, MD_SP_OK, mn_set, ep);
+	if (err != 0)
+		goto out;
+
+	if (options & MDCMD_PRINT)
+		(void) printf(dgettext(TEXT_DOMAIN, "%s: "
+		    "Soft Partitions recovered from device.\n"),
+		    compnp->cname);
+out:
+	/* free memory */
+	if (extlist != NULL)
+		meta_sp_list_free(&extlist);
+	if (sp_list != NULL)
+		meta_sp_list_free(&sp_list);
+	if (update_list != NULL)
+		meta_sp_list_free(&update_list);
+	if (un_array != NULL)	{
+		for (i = 0; i < num_sps; i++)
+			Free(un_array[i]);
+		Free(un_array);
+	}
+	if (minors != NULL)
+		Free(minors);
+	if (ownpar != NULL)
+		Free(ownpar);
+	(void) fflush(stdout);
+
+	if ((keynlp != NULL) && (committed != 1)) {
+		/*
+		 * if we haven't committed any softparts, either because of an
+		 * error or because the user decided not to proceed, delete
+		 * namelist key for the component
+		 */
+		if (mn_set) {
+			mdnamelist_t	*p;
+
+			for (p = keynlp; (p != NULL); p = p->next) {
+				mdname_t		*np = p->namep;
+				md_mn_msg_delkeyname_t	send_params;
+				md_mn_result_t		*resp = NULL;
+
+				send_params.delkeyname_dev = np->dev;
+				send_params.delkeyname_setno = sp->setno;
+				send_params.delkeyname_key = np->key;
+				(void) mdmn_send_message(sp->setno,
+				    MD_MN_MSG_DELKEYNAME, MD_MSGF_DEFAULT_FLAGS,
+				    (char *)&send_params, sizeof (send_params),
+				    &resp, ep);
+				if (resp != NULL) {
+					free_result(resp);
+				}
+			}
+		} else {
+			(void) del_key_names(sp, keynlp, NULL);
+		}
+	}
+
+	metafreenamelist(keynlp);
+
+	if (err)
+		return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname));
+
+	if (not_recovered)
+		if (options & MDCMD_PRINT)
+			(void) printf(dgettext(TEXT_DOMAIN, "%s: "
+			    "Soft Partitions NOT recovered from device.\n"),
+			    compnp->cname);
+	return (0);
+}
+
+/*
+ * FUNCTION:	meta_sp_recover_from_unit()
+ * INPUT:	sp	- name of set we are recovering in
+ *		compnp	- name of component we are recovering from
+ *		options	- metarecover options
+ * OUTPUT:	ep	- return error pointer
+ * RETURNS:	int	- 0 - success, -1 - error
+ * PURPOSE:	update watermarks to match metadb records.  begin by getting
+ *		a namelist representing all soft partitions on the specified
+ *		component.  then, build an extlist representing the soft
+ *		partitions, filling in the freespace extents.  notify user
+ *		of changes, place all soft partitions into the "recovering"
+ *		state and update the watermarks.  finally, return all soft
+ *		partitions to the "OK" state.
+ */
+static int
+meta_sp_recover_from_unit(
+	mdsetname_t	*sp,
+	mdname_t	*compnp,
+	mdcmdopts_t	options,
+	md_error_t	*ep
+)
+{
+	mdnamelist_t	*spnlp = NULL;
+	mdnamelist_t	*nlp = NULL;
+	sp_ext_node_t	*ext = NULL;
+	sp_ext_node_t	*extlist = NULL;
+	int		count;
+	char		yesno[255];
+	char		*yes;
+	int		rval = 0;
+	minor_t		*minors = NULL;
+	int		i;
+	md_sp_t		*msp;
+	md_set_desc	*sd;
+	bool_t		mn_set = 0;
+	daddr_t		start_block;
+
+	count = meta_sp_get_by_component(sp, compnp, &spnlp, 0, ep);
+	if (count <= 0)
+		return (-1);
+
+	/* set flag if dealing with a MN set */
+	if (!metaislocalset(sp)) {
+		if ((sd = metaget_setdesc(sp, ep)) == NULL) {
+			return (-1);
+		}
+		if (MD_MNSET_DESC(sd))
+			mn_set = 1;
+	}
+	/*
+	 * Save the XDR unit structure for one of the soft partitions;
+	 * we'll use this later to provide metadevice context to
+	 * update the watermarks so the device can be resolved by
+	 * devid instead of dev_t.
+	 */
+	if ((msp = meta_get_sp(sp, spnlp->namep, ep)) == NULL) {
+		metafreenamelist(spnlp);
+		return (-1);
+	}
+
+	if ((start_block = meta_sp_get_start(sp, compnp, ep)) ==
+	    MD_DISKADDR_ERROR) {
+		return (-1);
+	}
+
+	meta_sp_list_insert(NULL, NULL, &extlist, 0ULL, start_block,
+	    EXTTYP_RESERVED, 0, 0, meta_sp_cmp_by_offset);
+	meta_sp_list_insert(NULL, NULL, &extlist,
+	    metagetsize(compnp, ep) - MD_SP_WMSIZE, MD_SP_WMSIZE,
+	    EXTTYP_END, 0, EXTFLG_UPDATE, meta_sp_cmp_by_offset);
+
+	if (meta_sp_extlist_from_namelist(sp, spnlp, &extlist, ep) == -1) {
+		metafreenamelist(spnlp);
+		return (-1);
+	}
+
+	assert(extlist != NULL);
+	if ((options & MDCMD_VERBOSE) != 0) {
+		(void) printf(dgettext(TEXT_DOMAIN,
+		    "Updating extent headers on device %s from metadb.\n\n"),
+		    compnp->cname);
+		(void) printf(dgettext(TEXT_DOMAIN,
+		    "The following extent headers will be written:\n"));
+		meta_sp_display_exthdr();
+	}
+
+	meta_sp_list_freefill(&extlist, metagetsize(compnp, ep));
+
+	for (ext = extlist; ext != NULL; ext = ext->ext_next) {
+
+		/* mark every node for updating except the reserved space */
+		if (ext->ext_type != EXTTYP_RESERVED) {
+			ext->ext_flags |= EXTFLG_UPDATE;
+
+			/* print extent information */
+			if ((options & MDCMD_VERBOSE) != 0)
+				meta_sp_display_ext(ext);
+		}
+	}
+
+	/* request verification and then update all watermarks */
+	if ((options & MDCMD_DOIT) != 0) {
+
+		(void) printf(dgettext(TEXT_DOMAIN,
+		    "\nWARNING: You are about to overwrite portions of %s\n"
+		    "with soft partition metadata. The extent headers will be\n"
+		    "written to match the existing metadb configuration.  If\n"
+		    "the device was not previously setup with this\n"
+		    "configuration, data loss may result.\n\n"),
+		    compnp->cname);
+		(void) printf(dgettext(TEXT_DOMAIN,
+		    "Are you sure you want to do this (yes/no)? "));
+
+		(void) fflush(stdout);
+		if ((fgets(yesno, sizeof (yesno), stdin) == NULL) ||
+		    (strlen(yesno) == 1))
+			(void) snprintf(yesno, sizeof (yesno),
+			    "%s\n", dgettext(TEXT_DOMAIN, "no"));
+		yes = dgettext(TEXT_DOMAIN, "yes");
+		if (strncasecmp(yesno, yes, strlen(yesno) - 1) == 0) {
+			/* place soft partitions into recovering state */
+			minors = Zalloc(count * sizeof (minor_t));
+			for (nlp = spnlp, i = 0;
+			    nlp != NULL && i < count;
+			    nlp = nlp->next, i++) {
+				assert(nlp->namep != NULL);
+				minors[i] = meta_getminor(nlp->namep->dev);
+			}
+			if (update_sp_status(sp, minors, count,
+			    MD_SP_RECOVER, mn_set, ep) != 0) {
+				rval = -1;
+				goto out;
+			}
+
+			/* update the watermarks */
+			if (meta_sp_update_wm(sp, msp, extlist, ep) < 0) {
+				rval = -1;
+				goto out;
+			}
+
+			if (options & MDCMD_PRINT) {
+				(void) printf(dgettext(TEXT_DOMAIN, "%s: "
+				    "Soft Partitions recovered from metadb\n"),
+				    compnp->cname);
+			}
+
+			/* return soft partitions to the OK state */
+			if (update_sp_status(sp, minors, count,
+			    MD_SP_OK, mn_set, ep) != 0) {
+				rval = -1;
+				goto out;
+			}
+
+			rval = 0;
+			goto out;
+		}
+	}
+
+	if (options & MDCMD_PRINT) {
+		(void) printf(dgettext(TEXT_DOMAIN,
+		    "%s: Soft Partitions NOT recovered from metadb\n"),
+		    compnp->cname);
+	}
+
+out:
+	if (minors != NULL)
+		Free(minors);
+	metafreenamelist(spnlp);
+	meta_sp_list_free(&extlist);
+	(void) fflush(stdout);
+	return (rval);
+}
+
+
+/*
+ * FUNCTION:	meta_sp_update_abr()
+ * INPUT:	sp	- name of set we are recovering in
+ * OUTPUT:	ep	- return error pointer
+ * RETURNS:	int	- 0 - success, -1 - error
+ * PURPOSE:	update the ABR state for all soft partitions in the set. This
+ *		is called when joining a set. It sends a message to the master
+ *		node for each soft partition to get the value of tstate and
+ *		then sets ABR ,if required, by opening the sp, setting ABR
+ *		and then closing the sp. This approach is taken rather that
+ *		just issuing the MD_MN_SET_CAP ioctl, in order to deal with
+ *		the case when we have another node simultaneously unsetting ABR.
+ */
+int
+meta_sp_update_abr(
+	mdsetname_t	*sp,
+	md_error_t	*ep
+)
+{
+	mdnamelist_t	*devnlp = NULL;
+	mdnamelist_t	*p;
+	mdname_t	*devnp = NULL;
+	md_unit_t	*un;
+	char		fname[MAXPATHLEN];
+	int		mnum, fd;
+	volcap_t	vc;
+	uint_t		tstate;
+
+
+	if (meta_get_sp_names(sp, &devnlp, 0, ep) < 0) {
+		return (-1);
+	}
+
+	/* Exit if no soft partitions in this set */
+	if (devnlp == NULL)
+		return (0);
+
+	/* For each soft partition */
+	for (p = devnlp; (p != NULL); p = p->next) {
+		devnp = p->namep;
+
+		/* check if this is a top level metadevice */
+		if ((un = meta_get_mdunit(sp, devnp, ep)) == NULL)
+			goto out;
+		if (MD_HAS_PARENT(MD_PARENT(un))) {
+			Free(un);
+			continue;
+		}
+		Free(un);
+
+		/* Get tstate from Master */
+		if (meta_mn_send_get_tstate(devnp->dev, &tstate, ep) != 0) {
+			mdname_t	*np;
+			np = metamnumname(&sp, meta_getminor(devnp->dev), 0,
+			    ep);
+			if (np) {
+				md_perror(dgettext(TEXT_DOMAIN,
+				    "Unable to get tstate for %s"), np->cname);
+			}
+			continue;
+		}
+		/* If not set on the master, nothing to do */
+		if (!(tstate & MD_ABR_CAP))
+			continue;
+
+		mnum = meta_getminor(devnp->dev);
+		(void) snprintf(fname, MAXPATHLEN, "/dev/md/%s/rdsk/d%u",
+		    sp->setname, (unsigned)MD_MIN2UNIT(mnum));
+		if ((fd = open(fname, O_RDWR, 0)) < 0) {
+			md_perror(dgettext(TEXT_DOMAIN,
+			    "Could not open device %s"), fname);
+			continue;
+		}
+
+		/* Set ABR state */
+		vc.vc_info = 0;
+		vc.vc_set = 0;
+		if (ioctl(fd, DKIOCGETVOLCAP, &vc) < 0) {
+			(void) close(fd);
+			continue;
+		}
+
+		vc.vc_set = DKV_ABR_CAP;
+		if (ioctl(fd, DKIOCSETVOLCAP, &vc) < 0) {
+			(void) close(fd);
+			goto out;
+		}
+
+		(void) close(fd);
+	}
+	metafreenamelist(devnlp);
+	return (0);
+out:
+	metafreenamelist(devnlp);
+	return (-1);
+}
+
+/*
+ * FUNCTION:	meta_mn_sp_update_abr()
+ * INPUT:	arg	- Given set.
+ * PURPOSE:	update the ABR state for all soft partitions in the set by
+ *		forking a process to call meta_sp_update_abr()
+ *		This function is only called via rpc.metad when adding a node
+ *		to a set, ie this node is beong joined to the set by another
+ *		node.
+ */
+void *
+meta_mn_sp_update_abr(void *arg)
+{
+	set_t		setno = *((set_t *)arg);
+	mdsetname_t	*sp;
+	md_error_t	mde = mdnullerror;
+	int		fval;
+
+	/* should have a set */
+	assert(setno != NULL);
+
+	if ((sp = metasetnosetname(setno, &mde)) == NULL) {
+		mde_perror(&mde, "");
+		return (NULL);
+	}
+
+	if (!(meta_is_mn_set(sp, &mde))) {
+		mde_perror(&mde, "");
+		return (NULL);
+	}
+
+	/* fork a process */
+	if ((fval = md_daemonize(sp, &mde)) != 0) {
+		/*
+		 * md_daemonize will fork off a process.  The is the
+		 * parent or error.
+		 */
+		if (fval > 0) {
+			return (NULL);
+		}
+		mde_perror(&mde, "");
+		return (NULL);
+	}
+	/*
+	 * Child process should never return back to rpc.metad, but
+	 * should exit.
+	 * Flush all internally cached data inherited from parent process
+	 * since cached data will be cleared when parent process RPC request
+	 * has completed (which is possibly before this child process
+	 * can complete).
+	 * Child process can retrieve and cache its own copy of data from
+	 * rpc.metad that won't be changed by the parent process.
+	 *
+	 * Reset md_in_daemon since this child will be a client of rpc.metad
+	 * not part of the rpc.metad daemon itself.
+	 * md_in_daemon is used by rpc.metad so that libmeta can tell if
+	 * this thread is rpc.metad or any other thread.  (If this thread
+	 * was rpc.metad it could use some short circuit code to get data
+	 * directly from rpc.metad instead of doing an RPC call to rpc.metad).
+	 */
+	md_in_daemon = 0;
+	metaflushsetname(sp);
+	sr_cache_flush_setno(setno);
+	if ((sp = metasetnosetname(setno, &mde)) == NULL) {
+		mde_perror(&mde, "");
+		md_exit(sp, 1);
+	}
+
+
+	/*
+	 * Closing stdin/out/err here.
+	 */
+	(void) close(0);
+	(void) close(1);
+	(void) close(2);
+	assert(fval == 0);
+
+	(void) meta_sp_update_abr(sp, &mde);
+
+	md_exit(sp, 0);
+	/*NOTREACHED*/
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_stat.c b/usr/src/lib/lvm/libmeta/common/meta_stat.c
new file mode 100644
index 0000000000..90844f9148
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_stat.c
@@ -0,0 +1,103 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 1992, 1993, 1994, 2000 by Sun Microsystems, Inc.
+ * All rights reserved.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+/*
+ * Caching stat function
+ */
+
+#include <meta.h>
+
+#define	MD_NUM_STAT_HEAD	16
+
+struct statcache {
+	struct statcache	*sc_next;
+	struct stat		sc_stat;
+	char			*sc_filename;
+};
+
+static struct statcache	*statcache_head[MD_NUM_STAT_HEAD] =
+	{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+
+int
+meta_stat(const char *filename, struct stat *sbp)
+{
+	struct statcache	*scp;
+	int			 hash;
+	char			*cp;
+
+	hash = 0;
+	for (cp = (char *)filename; *cp != 0; cp++)
+		hash += *cp;
+
+	hash &= 0xf;
+
+	for (scp = statcache_head[hash]; scp != NULL; scp = scp->sc_next)
+		if (strcmp(filename, scp->sc_filename) == 0)
+			break;
+	if (scp) {
+		(void) memcpy((caddr_t)sbp, (caddr_t)&scp->sc_stat,
+		    sizeof (*sbp));
+		return (0);
+	}
+	if (stat(filename, sbp) != 0)
+		return (-1);
+
+	if (!S_ISBLK(sbp->st_mode) && !S_ISCHR(sbp->st_mode))
+		return (-1);
+
+	scp = (struct statcache *)malloc(sizeof (*scp));
+	if (scp != NULL) {
+		(void) memcpy((caddr_t)&scp->sc_stat, (caddr_t)sbp,
+		    sizeof (*sbp));
+		scp->sc_filename = strdup(filename);
+		if (scp->sc_filename == NULL) {
+			free((char *)scp);
+			return (0);
+		}
+		scp->sc_next = statcache_head[hash];
+		statcache_head[hash] = scp;
+	}
+	return (0);
+}
+
+void
+metaflushstatcache(void)
+{
+	struct statcache	*p, *n;
+	int			i;
+
+	for (i = 0; i < MD_NUM_STAT_HEAD; i++) {
+		for (p = statcache_head[i], n = NULL; p != NULL; p = n) {
+			n = p->sc_next;
+			Free(p->sc_filename);
+			Free(p);
+		}
+		statcache_head[i] = NULL;
+	}
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_stripe.c b/usr/src/lib/lvm/libmeta/common/meta_stripe.c
new file mode 100644
index 0000000000..237afcd60b
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_stripe.c
@@ -0,0 +1,2496 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+/*
+ * Just in case we're not in a build environment, make sure that
+ * TEXT_DOMAIN gets set to something.
+ */
+#if !defined(TEXT_DOMAIN)
+#define	TEXT_DOMAIN "SYS_TEST"
+#endif
+
+/*
+ * stripe operations
+ */
+
+#include <limits.h>
+#include <stdlib.h>
+#include <meta.h>
+#include <sys/lvm/md_stripe.h>
+#include <sys/lvm/md_convert.h>
+
+#define	QUOTE(x)	#x
+#define	VAL2STR(x)	QUOTE(x)
+
+/*
+ * replace stripe/concat
+ */
+int
+meta_stripe_replace(
+	mdsetname_t	*sp,
+	mdname_t	*stripenp,
+	mdname_t	*oldnp,
+	mdname_t	*newnp,
+	mdcmdopts_t	options,
+	md_error_t	*ep
+)
+{
+	replace_params_t	params;
+	md_dev64_t		old_dev,
+				new_dev;
+	diskaddr_t		new_start_blk,
+				new_end_blk,
+				label,
+				size,
+				start_blk;
+
+	/* should have same set */
+	assert(sp != NULL);
+	assert(sp->setno == MD_MIN2SET(meta_getminor(stripenp->dev)));
+
+	new_dev = newnp->dev;
+	new_start_blk = newnp->start_blk;
+	new_end_blk = newnp->end_blk;
+
+	meta_invalidate_name(stripenp);
+
+	/* the old device binding is now established */
+	if ((old_dev = oldnp->dev) == NODEV64)
+		return (mdsyserror(ep, ENODEV, oldnp->cname));
+
+	if (((strcmp(oldnp->rname, newnp->rname) == 0) &&
+	    (old_dev != new_dev))) {
+		newnp->dev = new_dev;
+		newnp->start_blk = new_start_blk;
+		newnp->end_blk = new_end_blk;
+	}
+
+	if ((size = metagetsize(newnp, ep)) == MD_DISKADDR_ERROR)
+		return (-1);
+	if ((label = metagetlabel(newnp, ep)) == MD_DISKADDR_ERROR)
+		return (-1);
+	if ((start_blk = metagetstart(sp, newnp, ep)) == MD_DISKADDR_ERROR)
+		return (-1);
+	if (start_blk >= size) {
+		(void) mdsyserror(ep, ENOSPC, newnp->cname);
+		return (-1);
+	}
+
+	/* In dryrun mode (DOIT not set) we must not alter the mddb */
+	if (options & MDCMD_DOIT) {
+		if (add_key_name(sp, newnp, NULL, ep) != 0)
+			return (-1);
+	}
+
+	/*
+	 * There is no need to call meta_fixdevid() here as this function is
+	 * only called by the metareplace -c command which actually does
+	 * nothing (in terms of a resync) and thus does nothing with the devid.
+	 */
+
+	(void) memset(&params, 0, sizeof (params));
+	params.mnum = meta_getminor(stripenp->dev);
+	MD_SETDRIVERNAME(&params, MD_STRIPE, sp->setno);
+
+	params.cmd = REPLACE_COMP;
+	params.old_dev = old_dev;
+	params.new_dev = new_dev;
+	params.new_key = newnp->key;
+	params.start_blk = newnp->start_blk;
+	params.number_blks = size;
+	/* Is this just a dryrun ? */
+	if ((options & MDCMD_DOIT) == 0) {
+		params.options |= MDIOCTL_DRYRUN;
+	}
+	if (label == 0)
+		params.has_label = 0;
+	else
+		params.has_label = 1;
+	if (metaioctl(MD_IOCREPLACE, &params, &params.mde, NULL) != 0) {
+		if (options & MDCMD_DOIT)
+			(void) del_key_name(sp, newnp, ep);
+		return (mdstealerror(ep, &params.mde));
+	}
+	meta_invalidate_name(oldnp);
+	meta_invalidate_name(newnp);
+	meta_invalidate_name(stripenp);
+
+	if (options & MDCMD_PRINT) {
+		(void) printf(dgettext(TEXT_DOMAIN,
+		    "%s: device %s is replaced with %s\n"),
+		    stripenp->cname, oldnp->cname, newnp->cname);
+
+	}
+	return (0);
+}
+
+
+/*
+ * FUNCTION:	meta_get_stripe_names()
+ * INPUT:	sp	- the set name to get stripes from
+ *		options	- options from the command line
+ * OUTPUT:	nlpp	- list of all stripe names
+ *		ep	- return error pointer
+ * RETURNS:	int	- -1 if error, 0 success
+ * PURPOSE:	returns a list of all stripes in the metadb
+ *		for all devices in the specified set
+ */
+int
+meta_get_stripe_names(
+	mdsetname_t	*sp,
+	mdnamelist_t	**nlpp,
+	int		options,
+	md_error_t	*ep
+)
+{
+	return (meta_get_names(MD_STRIPE, sp, nlpp, options, ep));
+}
+
+/*
+ * free stripe
+ */
+void
+meta_free_stripe(
+	md_stripe_t	*stripep
+)
+{
+	uint_t		row;
+
+	for (row = 0; (row < stripep->rows.rows_len); ++row) {
+		md_row_t	*rp = &stripep->rows.rows_val[row];
+
+		if (rp->comps.comps_val != NULL) {
+			assert(rp->comps.comps_len > 0);
+			Free(rp->comps.comps_val);
+		}
+	}
+	if (stripep->rows.rows_val != NULL) {
+		assert(stripep->rows.rows_len > 0);
+		Free(stripep->rows.rows_val);
+	}
+	Free(stripep);
+}
+
+
+/*
+ * get stripe (common)
+ */
+md_stripe_t *
+meta_get_stripe_common(
+	mdsetname_t	*sp,
+	mdname_t	*stripenp,
+	int		fast,
+	md_error_t	*ep
+)
+{
+	mddrivename_t	*dnp = stripenp->drivenamep;
+	char		*miscname;
+	ms_unit_t	*ms;
+	md_stripe_t	*stripep;
+	uint_t		row;
+
+	/* must have set */
+	assert(sp != NULL);
+	assert(sp->setno == MD_MIN2SET(meta_getminor(stripenp->dev)));
+
+	/* short circuit */
+	if (dnp->unitp != NULL) {
+		assert(dnp->unitp->type == MD_DEVICE);
+		return ((md_stripe_t *)dnp->unitp);
+	}
+
+	/* get miscname and unit */
+	if ((miscname = metagetmiscname(stripenp, ep)) == NULL)
+		return (NULL);
+	if (strcmp(miscname, MD_STRIPE) != 0) {
+		(void) mdmderror(ep, MDE_NOT_STRIPE,
+			    meta_getminor(stripenp->dev), stripenp->cname);
+		return (NULL);
+	}
+	if ((ms = (ms_unit_t *)meta_get_mdunit(sp, stripenp, ep)) == NULL)
+		return (NULL);
+	assert(ms->c.un_type == MD_DEVICE);
+
+	/* allocate stripe */
+	stripep = Zalloc(sizeof (*stripep));
+
+	/* allocate rows */
+	assert(ms->un_nrows > 0);
+	stripep->rows.rows_len = ms->un_nrows;
+	stripep->rows.rows_val = Zalloc(stripep->rows.rows_len *
+	    sizeof (*stripep->rows.rows_val));
+
+	/* get common info */
+	stripep->common.namep = stripenp;
+	stripep->common.type = ms->c.un_type;
+	stripep->common.state = ms->c.un_status;
+	stripep->common.capabilities = ms->c.un_capabilities;
+	stripep->common.parent = ms->c.un_parent;
+	stripep->common.size = ms->c.un_total_blocks;
+	stripep->common.user_flags = ms->c.un_user_flags;
+	stripep->common.revision = ms->c.un_revision;
+
+	/* get options */
+	if ((ms->un_hsp_id != MD_HSP_NONE) &&
+	    ((stripep->hspnamep = metahsphspname(&sp, ms->un_hsp_id,
+	    ep)) == NULL)) {
+		goto out;
+	}
+
+	/* get rows */
+	for (row = 0; (row < ms->un_nrows); ++row) {
+		struct ms_row	*mdr = &ms->un_row[row];
+		struct ms_comp	*mdcomp = (void *)&((char *)ms)[ms->un_ocomp];
+		md_row_t	*rp = &stripep->rows.rows_val[row];
+		uint_t		comp, c;
+
+		/* get interlace */
+		rp->interlace = mdr->un_interlace;
+
+		/* allocate comps */
+		assert(mdr->un_ncomp > 0);
+		rp->comps.comps_len = mdr->un_ncomp;
+		rp->comps.comps_val = Zalloc(rp->comps.comps_len *
+		    sizeof (*rp->comps.comps_val));
+
+		/* get components */
+		for (comp = 0, c = mdr->un_icomp; (comp < mdr->un_ncomp);
+		    ++comp, ++c) {
+			struct ms_comp	*mdc = &mdcomp[c];
+			diskaddr_t	comp_start_blk = mdc->un_start_block;
+			md_comp_t	*cp = &rp->comps.comps_val[comp];
+
+			/* get the component name */
+			cp->compnamep = metakeyname(&sp, mdc->un_key, fast, ep);
+			if (cp->compnamep == NULL)
+				goto out;
+
+			/* if hotspared */
+			if (mdc->un_mirror.ms_hs_id != 0) {
+				diskaddr_t hs_start_blk = mdc->un_start_block;
+
+				/* get the hotspare name */
+				cp->hsnamep = metakeyname(&sp,
+				    mdc->un_mirror.ms_hs_key, fast, ep);
+				if (cp->hsnamep == NULL)
+					goto out;
+
+				if (getenv("META_DEBUG_START_BLK") != NULL) {
+					if (metagetstart(sp, cp->hsnamep,
+					    ep) == MD_DISKADDR_ERROR)
+						mdclrerror(ep);
+
+					if ((cp->hsnamep->start_blk == 0) &&
+					    (hs_start_blk != 0))
+						md_eprintf(dgettext(TEXT_DOMAIN,
+					    "%s: suspected bad start block,"
+					    " seems labelled [stripe/hs]\n"),
+					    cp->hsnamep->cname);
+
+					if ((cp->hsnamep->start_blk > 0) &&
+					    (hs_start_blk == 0) &&
+					    ! ((row == 0) && (comp == 0)))
+						md_eprintf(dgettext(TEXT_DOMAIN,
+					    "%s: suspected bad start block, "
+					    "seems unlabelled [stripe/hs]\n"),
+					    cp->hsnamep->cname);
+				}
+				/* override any start_blk */
+				cp->hsnamep->start_blk = hs_start_blk;
+
+				/* get the right component start_blk */
+				comp_start_blk = mdc->un_mirror.ms_orig_blk;
+			} else {
+				if (getenv("META_DEBUG_START_BLK") != NULL) {
+					if (metagetstart(sp, cp->compnamep,
+					    ep) == MD_DISKADDR_ERROR)
+						mdclrerror(ep);
+
+					if ((cp->compnamep->start_blk == 0) &&
+					    (comp_start_blk != 0))
+						md_eprintf(dgettext(TEXT_DOMAIN,
+					    "%s: suspected bad start block,"
+					    " seems labelled [stripe]"),
+					    cp->compnamep->cname);
+
+					if ((cp->compnamep->start_blk > 0) &&
+					    (comp_start_blk == 0) &&
+					    ! ((row == 0) && (comp == 0)))
+						md_eprintf(dgettext(TEXT_DOMAIN,
+					    "%s: suspected bad start block, "
+					    "seems unlabelled [stripe]"),
+					    cp->compnamep->cname);
+				}
+			}
+
+			/* override any start_blk */
+			cp->compnamep->start_blk = comp_start_blk;
+
+			/* get state */
+			cp->state = mdc->un_mirror.ms_state;
+
+			/* get time of last state change */
+			cp->timestamp = mdc->un_mirror.ms_timestamp;
+
+			/* get lasterr count */
+			cp->lasterrcnt = mdc->un_mirror.ms_lasterrcnt;
+		}
+	}
+
+	/* cleanup, return success */
+	Free(ms);
+	dnp->unitp = (md_common_t *)stripep;
+	return (stripep);
+
+	/* cleanup, return error */
+out:
+	Free(ms);
+	meta_free_stripe(stripep);
+	return (NULL);
+}
+
+/*
+ * get stripe
+ */
+md_stripe_t *
+meta_get_stripe(
+	mdsetname_t	*sp,
+	mdname_t	*stripenp,
+	md_error_t	*ep
+)
+{
+	return (meta_get_stripe_common(sp, stripenp, 0, ep));
+}
+
+/*
+ * check stripe for dev
+ */
+static int
+in_stripe(
+	mdsetname_t	*sp,
+	mdname_t	*stripenp,
+	mdname_t	*np,
+	diskaddr_t	slblk,
+	diskaddr_t	nblks,
+	md_error_t	*ep
+)
+{
+	md_stripe_t	*stripep;
+	uint_t		row;
+
+	/* should be in the same set */
+	assert(sp != NULL);
+
+	/* get unit */
+	if ((stripep = meta_get_stripe(sp, stripenp, ep)) == NULL)
+		return (-1);
+
+	/* look in rows */
+	for (row = 0; (row < stripep->rows.rows_len); ++row) {
+		md_row_t	*rp = &stripep->rows.rows_val[row];
+		uint_t		comp;
+
+		/* look in columns */
+		for (comp = 0; (comp < rp->comps.comps_len); ++comp) {
+			md_comp_t	*cp = &rp->comps.comps_val[comp];
+			mdname_t	*compnp = cp->compnamep;
+			diskaddr_t	comp_sblk;
+			int		err;
+
+			/* check same drive since metagetstart() can fail */
+			if ((err = meta_check_samedrive(np, compnp, ep)) < 0)
+				return (-1);
+			else if (err == 0)
+				continue;
+
+			/* check overlap */
+			if ((comp_sblk = metagetstart(sp, compnp, ep)) ==
+			    MD_DISKADDR_ERROR)
+				return (-1);
+			if (meta_check_overlap(stripenp->cname, np,
+			    slblk, nblks, compnp, comp_sblk, -1,
+			    ep) != 0) {
+				return (-1);
+			}
+		}
+	}
+
+	/* return success */
+	return (0);
+}
+
+/*
+ * check to see if we're in a stripe
+ */
+int
+meta_check_instripe(
+	mdsetname_t	*sp,
+	mdname_t	*np,
+	diskaddr_t	slblk,
+	diskaddr_t	nblks,
+	md_error_t	*ep
+)
+{
+	mdnamelist_t	*stripenlp = NULL;
+	mdnamelist_t	*p;
+	int		rval = 0;
+
+	/* should have a set */
+	assert(sp != NULL);
+
+	/* for each stripe */
+	if (meta_get_stripe_names(sp, &stripenlp, 0, ep) < 0)
+		return (-1);
+	for (p = stripenlp; (p != NULL); p = p->next) {
+		mdname_t	*stripenp = p->namep;
+
+		/* check stripe */
+		if (in_stripe(sp, stripenp, np, slblk, nblks, ep) != 0) {
+			rval = -1;
+			break;
+		}
+	}
+
+	/* cleanup, return success */
+	metafreenamelist(stripenlp);
+	return (rval);
+}
+
+/*
+ * check component
+ */
+int
+meta_check_component(
+	mdsetname_t	*sp,
+	mdname_t	*np,
+	int		force,
+	md_error_t	*ep
+)
+{
+	mdchkopts_t	options = (MDCHK_ALLOW_MDDB);
+	md_common_t	*mdp;
+
+	/*
+	 * See if we are a soft partition: meta_sp_issp() returns 0 if
+	 * np points to a soft partition, so the if and else clauses
+	 * here represent "not a soft partition" and "soft partition,"
+	 * respectively.
+	 */
+	if (meta_sp_issp(sp, np, ep) != 0) {
+		/* make sure we have a disk */
+		if (metachkcomp(np, ep) != 0)
+			return (-1);
+	} else {
+		/* make sure soft partition can parent & doesn't have parent */
+		if ((mdp = meta_get_unit(sp, np, ep)) == NULL)
+			return (mdmderror(ep, MDE_INVAL_UNIT, NULL,
+			    np->cname));
+		if (mdp->capabilities == MD_CANT_PARENT)
+			return (mdmderror(ep, MDE_INVAL_UNIT, NULL,
+			    np->cname));
+		if (MD_HAS_PARENT(mdp->parent)) {
+			mdname_t *pnp;
+
+			pnp = metamnumname(&sp, mdp->parent, 0, ep);
+			if (pnp == NULL) {
+				return (-1);
+			}
+
+			return (mduseerror(ep, MDE_ALREADY, np->dev,
+			    pnp->cname, np->cname));
+		}
+	}
+
+	/* check to ensure that it is not already in use */
+	if ((! force) &&
+	    (meta_check_inuse(sp, np, MDCHK_INUSE, ep) != 0)) {
+		return (-1);
+	}
+
+	/* make sure it is in the set */
+	if (meta_check_inset(sp, np, ep) != 0)
+		return (-1);
+
+	/* make sure its not in a metadevice */
+	if (meta_check_inmeta(sp, np, options, 0, -1, ep) != 0)
+		return (-1);
+
+	/* return success */
+	return (0);
+}
+
+/*
+ * print stripe
+ */
+static int
+stripe_print(
+	md_stripe_t	*stripep,
+	char		*fname,
+	FILE		*fp,
+	mdprtopts_t	options,
+	md_error_t	*ep
+)
+{
+	uint_t		row;
+	int		rval = -1;
+
+	if (options & PRINT_LARGEDEVICES) {
+		if (stripep->common.revision != MD_64BIT_META_DEV) {
+			rval = 0;
+			goto out;
+		}
+	}
+
+	/* print name and num rows */
+	if (fprintf(fp, "%s %u",
+	    stripep->common.namep->cname, stripep->rows.rows_len) == EOF)
+		goto out;
+
+	/* print rows */
+	for (row = 0; (row < stripep->rows.rows_len); ++row) {
+		md_row_t	*rp = &stripep->rows.rows_val[row];
+		uint_t		comp;
+
+		/* print num components */
+		if (fprintf(fp, " %u", rp->comps.comps_len) == EOF)
+			goto out;
+
+		/* print components */
+		for (comp = 0; (comp < rp->comps.comps_len); ++comp) {
+			md_comp_t	*cp = &rp->comps.comps_val[comp];
+
+			/* print component */
+			/*
+			 * If the path is our standard /dev/rdsk or /dev/md/rdsk
+			 * then just print out the cxtxdxsx or the dx, metainit
+			 * will assume the default, otherwise we need the full
+			 * pathname to make sure this works as we intend.
+			 */
+			if ((strstr(cp->compnamep->rname, "/dev/rdsk") ==
+			    NULL) && (strstr(cp->compnamep->rname,
+			    "/dev/md/rdsk") == NULL) &&
+			    (strstr(cp->compnamep->rname, "/dev/td/") ==
+			    NULL)) {
+				/* not standard path, print full pathname */
+				if (fprintf(fp, " %s", cp->compnamep->rname)
+				    == EOF)
+					goto out;
+			} else {
+				/* standard path */
+				if (fprintf(fp, " %s", cp->compnamep->cname)
+				    == EOF)
+					goto out;
+			}
+		}
+
+		/* print interlace */
+		if (rp->comps.comps_len > 1)
+			if (fprintf(fp, " -i %lldb", rp->interlace) == EOF)
+				goto out;
+
+		/* print continuation */
+		if (row != (stripep->rows.rows_len - 1))
+			if (fprintf(fp, " \\\n\t") == EOF)
+				goto out;
+	}
+
+	/* print hotspare name */
+	if (stripep->hspnamep != NULL)
+		if (fprintf(fp, " -h %s", stripep->hspnamep->hspname) == EOF)
+			goto out;
+
+	/* terminate last line */
+	if (fprintf(fp, "\n") == EOF)
+		goto out;
+
+	/* success */
+	rval = 0;
+
+	/* cleanup, return error */
+out:
+	if (rval != 0)
+		(void) mdsyserror(ep, errno, fname);
+	return (rval);
+}
+
+/*
+ * convert component state to name
+ */
+char *
+comp_state_to_name(
+	md_comp_t	*mdcp,
+	md_timeval32_t	*tvp,
+	uint_t		tstate	/* Errored tstate flags */
+)
+{
+	comp_state_t	state = mdcp->state;
+
+	/* grab time */
+	if (tvp != NULL)
+		*tvp = mdcp->timestamp;
+
+	if (tstate != 0) {
+		return (dgettext(TEXT_DOMAIN, "Unavailable"));
+	}
+
+	/* return state */
+	switch (state) {
+	case CS_OKAY:
+		return (dgettext(TEXT_DOMAIN, "Okay"));
+	case CS_ERRED:
+		return (dgettext(TEXT_DOMAIN, "Maintenance"));
+	case CS_LAST_ERRED:
+		return (dgettext(TEXT_DOMAIN, "Last Erred"));
+	case CS_RESYNC:
+		return (dgettext(TEXT_DOMAIN, "Resyncing"));
+	default:
+		return (dgettext(TEXT_DOMAIN, "invalid"));
+	}
+}
+
+/*
+ * print subdevice stripe row
+ */
+static int
+subdev_row_report(
+	mdsetname_t	*sp,
+	md_row_t	*rp,
+	char		*fname,
+	FILE		*fp,
+	mdprtopts_t	options,
+	uint_t		top_tstate,	/* Errored tstate flags */
+	md_error_t	*ep
+)
+{
+	uint_t		comp;
+	int		rval = -1;
+	ddi_devid_t	dtp;
+	int		len = 0;
+
+
+	/*
+	 * building a format string on the fly that will be used
+	 * in fprintf. This is to allow really really long ctd names
+	 */
+	for (comp = 0; (comp < rp->comps.comps_len); ++comp) {
+		md_comp_t	*cp = &rp->comps.comps_val[comp];
+		char		*cname = cp->compnamep->cname;
+
+		len = max(len, strlen(cname));
+	}
+
+	len = max(len, strlen(dgettext(TEXT_DOMAIN, "Device")));
+	len += 2;
+	/* print header */
+	if (! (options & PRINT_TIMES)) {
+		if (fprintf(fp,
+		    "\t%-*.*s %-12.12s %5.5s %12.12s %5.5s %s\n",
+		    len, len,
+		    dgettext(TEXT_DOMAIN, "Device"),
+		    dgettext(TEXT_DOMAIN, "Start Block"),
+		    dgettext(TEXT_DOMAIN, "Dbase"),
+		    dgettext(TEXT_DOMAIN, "State"),
+		    dgettext(TEXT_DOMAIN, "Reloc"),
+			dgettext(TEXT_DOMAIN, "Hot Spare")) == EOF) {
+			goto out;
+		}
+	} else {
+		if (fprintf(fp,
+		    "\t%-*s %5s %5s %-11s %-5s %-9s %s\n",
+		    len,
+		    dgettext(TEXT_DOMAIN, "Device"),
+		    dgettext(TEXT_DOMAIN, "Start"),
+		    dgettext(TEXT_DOMAIN, "Dbase"),
+		    dgettext(TEXT_DOMAIN, "State"),
+		    dgettext(TEXT_DOMAIN, "Reloc"),
+		    dgettext(TEXT_DOMAIN, "Hot Spare"),
+			dgettext(TEXT_DOMAIN, "Time")) == EOF) {
+			goto out;
+		}
+	}
+
+
+	/* print components */
+	for (comp = 0; (comp < rp->comps.comps_len); ++comp) {
+		md_comp_t	*cp = &rp->comps.comps_val[comp];
+		mdname_t	*namep = cp->compnamep;
+		char		*cname = namep->cname;
+		diskaddr_t	start_blk;
+		int		has_mddb;
+		char		*has_mddb_str;
+		char		*comp_state;
+		md_timeval32_t	tv;
+		char		*hsname = ((cp->hsnamep != NULL) ?
+					    cp->hsnamep->cname : "");
+		char		*devid = " ";
+		mdname_t	*didnp = NULL;
+		uint_t		tstate = 0;
+
+		/* get info */
+		if ((start_blk = metagetstart(sp, namep, ep)) ==
+		    MD_DISKADDR_ERROR) {
+			return (-1);
+		}
+		if ((has_mddb = metahasmddb(sp, namep, ep)) < 0) {
+			return (-1);
+		}
+		if (has_mddb)
+			has_mddb_str = dgettext(TEXT_DOMAIN, "Yes");
+		else
+			has_mddb_str = dgettext(TEXT_DOMAIN, "No");
+
+		/*
+		 * If the component is a metadevice, print out either
+		 * unavailable or the state of the metadevice, if not
+		 * a metadevice, print nothing if the state of the
+		 * stripe is unavailable
+		 */
+		if (metaismeta(namep)) {
+			if (meta_get_tstate(namep->dev, &tstate, ep) != 0)
+				return (-1);
+			comp_state = comp_state_to_name(cp, &tv, tstate &
+			    MD_DEV_ERRORED);
+		} else {
+			/*
+			 * if top_tstate is set, that implies that you have
+			 * a ctd type device with an unavailable metadevice
+			 * on top of it. If so, print a - for it's state
+			 */
+			if (top_tstate != 0)
+				comp_state = "-";
+			else
+				comp_state = comp_state_to_name(cp, &tv,
+				    tstate & MD_DEV_ERRORED);
+		}
+
+		/* populate the key in the name_p structure */
+		if ((didnp = metadevname(&sp, namep->dev, ep))
+				== NULL) {
+			return (-1);
+		}
+
+	    /* determine if devid does NOT exist */
+		if (options & PRINT_DEVID) {
+		    if ((dtp = meta_getdidbykey(sp->setno, getmyside(sp, ep),
+				didnp->key, ep)) == NULL)
+				devid = dgettext(TEXT_DOMAIN, "No ");
+			else {
+				devid = dgettext(TEXT_DOMAIN, "Yes");
+				free(dtp);
+			}
+		}
+		/* print info */
+		/*
+		 * building a format string on the fly that will be used
+		 * in fprintf. This is to allow really really long ctd names
+		 */
+		if (! (options & PRINT_TIMES)) {
+			if (fprintf(fp,
+			    "\t%-*s %8lld     %-5.5s %12.12s %5.5s %s\n",
+			    len, cname, start_blk,
+			    has_mddb_str, comp_state, devid, hsname) == EOF) {
+				goto out;
+			}
+		} else {
+			char	*timep = meta_print_time(&tv);
+
+			if (fprintf(fp,
+			    "\t%-*s %5lld %-5s %-11s %-5s %-9s %s\n",
+			    len, cname, start_blk,
+			    has_mddb_str, comp_state, devid, hsname,
+			    timep) == EOF) {
+				goto out;
+			}
+		}
+	}
+
+	/* success */
+	rval = 0;
+
+	/* cleanup, return error */
+out:
+	if (rval != 0)
+		(void) mdsyserror(ep, errno, fname);
+	return (rval);
+}
+
+/*
+ * print toplevel stripe row
+ */
+/*ARGSUSED4*/
+static int
+toplev_row_report(
+	mdsetname_t	*sp,
+	md_row_t	*rp,
+	char		*fname,
+	FILE		*fp,
+	mdprtopts_t	options,
+	md_error_t	*ep
+)
+{
+	uint_t		comp;
+	int		rval = -1;
+	char		*devid = " ";
+	mdname_t	*didnp = NULL;
+	int		len = 0;
+
+	/*
+	 * building a format string on the fly that will be used
+	 * in fprintf. This is to allow really really long ctd names
+	 */
+	for (comp = 0; (comp < rp->comps.comps_len); ++comp) {
+		len = max(len,
+		    strlen(rp->comps.comps_val[comp].compnamep->cname));
+	}
+
+	len = max(len, strlen(dgettext(TEXT_DOMAIN, "Device")));
+	len += 2;
+	/* print header */
+	if (fprintf(fp,
+	    "\t%-*.*s %-12.12s %-5.5s\t%s\n",
+	    len, len,
+	    dgettext(TEXT_DOMAIN, "Device"),
+	    dgettext(TEXT_DOMAIN, "Start Block"),
+	    dgettext(TEXT_DOMAIN, "Dbase"),
+		dgettext(TEXT_DOMAIN, "Reloc")) == EOF) {
+		goto out;
+	}
+
+	/* print components */
+	for (comp = 0; (comp < rp->comps.comps_len); ++comp) {
+		md_comp_t	*cp = &rp->comps.comps_val[comp];
+		mdname_t	*namep = cp->compnamep;
+		char		*cname = namep->cname;
+		diskaddr_t	start_blk;
+		int		has_mddb;
+		char		*has_mddb_str;
+		ddi_devid_t	dtp;
+
+		/* get info */
+		if ((start_blk = metagetstart(sp, namep, ep)) ==
+		    MD_DISKADDR_ERROR) {
+			return (-1);
+		}
+		if ((has_mddb = metahasmddb(sp, namep, ep)) < 0) {
+			return (-1);
+		}
+		if (has_mddb)
+			has_mddb_str = dgettext(TEXT_DOMAIN, "Yes");
+		else
+			has_mddb_str = dgettext(TEXT_DOMAIN, "No");
+
+		/* populate the key in the name_p structure */
+		if ((didnp = metadevname(&sp, namep->dev, ep))
+				== NULL) {
+			return (-1);
+		}
+
+	    /* determine if devid does NOT exist */
+	    if (options & PRINT_DEVID) {
+		if ((dtp = meta_getdidbykey(sp->setno, getmyside(sp, ep),
+				didnp->key, ep)) == NULL) {
+				devid = dgettext(TEXT_DOMAIN, "No ");
+			} else {
+				devid = dgettext(TEXT_DOMAIN, "Yes");
+				free(dtp);
+			}
+		}
+		/* print info */
+		/*
+		 * building a format string on the fly that will be used
+		 * in fprintf. This is to allow really really long ctd names
+		 */
+		if (fprintf(fp,
+		    "\t%-*s %8lld     %-5.5s\t%s\n", len,
+		    cname, start_blk, has_mddb_str, devid) == EOF) {
+			goto out;
+		}
+	}
+
+	/* success */
+	rval = 0;
+
+	/* cleanup, return error */
+out:
+	if (rval != 0)
+		(void) mdsyserror(ep, errno, fname);
+	return (rval);
+}
+
+/*
+ * print stripe options
+ */
+int
+meta_print_stripe_options(
+	mdhspname_t	*hspnamep,
+	char		*fname,
+	FILE		*fp,
+	md_error_t	*ep
+)
+{
+	char		*hspname = ((hspnamep != NULL) ? hspnamep->hspname :
+					dgettext(TEXT_DOMAIN, "none"));
+	int		rval = -1;
+
+	/* print options */
+	if (fprintf(fp, dgettext(TEXT_DOMAIN,
+	    "    Hot spare pool: %s\n"), hspname) == EOF) {
+		goto out;
+	}
+
+	/* success */
+	rval = 0;
+
+	/* cleanup, return error */
+out:
+	if (rval != 0)
+		(void) mdsyserror(ep, errno, fname);
+	return (rval);
+}
+
+/*
+ * report stripe
+ */
+static int
+stripe_report(
+	mdsetname_t	*sp,
+	md_stripe_t	*stripep,
+	mdnamelist_t	**nlpp,
+	char		*fname,
+	FILE		*fp,
+	mdprtopts_t	options,
+	md_error_t	*ep
+)
+{
+	uint_t		row;
+	int		rval = -1;
+	uint_t		tstate = 0;
+
+	/*
+	 * if the -B option has been specified check to see if the
+	 * metadevice is s "big" one and print if so, also if a
+	 * big device we need to store the ctd involved for use in
+	 * printing out the relocation information.
+	 */
+	if (options & PRINT_LARGEDEVICES) {
+		if (stripep->common.revision != MD_64BIT_META_DEV) {
+			rval = 0;
+			goto out;
+		} else {
+			if (meta_getdevs(sp, stripep->common.namep,
+			    nlpp, ep) != 0)
+				goto out;
+		}
+	}
+
+	/* print header */
+	if (options & PRINT_HEADER) {
+		if (fprintf(fp, "%s: Concat/Stripe\n",
+		    stripep->common.namep->cname) == EOF) {
+			goto out;
+		}
+
+	}
+
+	/* print hotspare pool */
+	if (stripep->hspnamep != NULL) {
+		if (meta_print_stripe_options(stripep->hspnamep,
+		    fname, fp, ep) != 0) {
+			return (-1);
+		}
+	}
+
+	if (metaismeta(stripep->common.namep)) {
+		if (meta_get_tstate(stripep->common.namep->dev, &tstate, ep)
+		    != 0)
+			return (-1);
+	}
+	if ((tstate & MD_DEV_ERRORED) != 0) {
+		if (fprintf(fp, dgettext(TEXT_DOMAIN,
+		    "    State: Unavailable\n"
+		    "    Reconnect disk and invoke: metastat -i\n")) == EOF) {
+			goto out;
+		}
+	}
+
+	/* print size */
+	if (fprintf(fp, dgettext(TEXT_DOMAIN, "    Size: %lld blocks (%s)\n"),
+	    stripep->common.size,
+	    meta_number_to_string(stripep->common.size, DEV_BSIZE))
+	    == EOF) {
+		goto out;
+	}
+
+	/* print rows */
+	for (row = 0; (row < stripep->rows.rows_len); ++row) {
+		md_row_t	*rp = &stripep->rows.rows_val[row];
+
+		/* print stripe and interlace */
+		if (rp->comps.comps_len > 1) {
+			if (fprintf(fp, dgettext(TEXT_DOMAIN,
+			    "    Stripe %u: (interlace: %lld blocks)\n"),
+			    row, rp->interlace) == EOF) {
+				goto out;
+			}
+		} else {
+			if (fprintf(fp, dgettext(TEXT_DOMAIN,
+			    "    Stripe %u:\n"),
+			    row) == EOF) {
+				goto out;
+			}
+		}
+
+		/* print components appropriately */
+		if (MD_HAS_PARENT(stripep->common.parent)) {
+			if (subdev_row_report(sp, rp, fname, fp, options,
+			    tstate & MD_DEV_ERRORED, ep) != 0) {
+				return (-1);
+			}
+		} else {
+			if (toplev_row_report(sp, rp, fname, fp, options,
+			    ep) != 0) {
+				return (-1);
+			}
+		}
+	}
+
+	/* add extra line */
+	if (fprintf(fp, "\n") == EOF)
+		goto out;
+
+	/* success */
+	rval = 0;
+
+	/* cleanup, return error */
+out:
+	if (rval != 0)
+		(void) mdsyserror(ep, errno, fname);
+	return (rval);
+}
+
+/*
+ * print/report stripe
+ */
+int
+meta_stripe_print(
+	mdsetname_t	*sp,
+	mdname_t	*stripenp,
+	mdnamelist_t	**nlpp,
+	char		*fname,
+	FILE		*fp,
+	mdprtopts_t	options,
+	md_error_t	*ep
+)
+{
+	md_stripe_t	*stripep;
+	int		row, comp;
+
+	/* should have same set */
+	assert(sp != NULL);
+	assert((stripenp == NULL) ||
+	    (sp->setno == MD_MIN2SET(meta_getminor(stripenp->dev))));
+
+	/* print all stripes */
+	if (stripenp == NULL) {
+		mdnamelist_t	*nlp = NULL;
+		mdnamelist_t	*p;
+		int		cnt;
+		int		rval = 0;
+
+		/* get list */
+		if ((cnt = meta_get_stripe_names(sp, &nlp, options, ep)) < 0)
+			return (-1);
+		else if (cnt == 0)
+			return (0);
+
+		/* recurse */
+		for (p = nlp; (p != NULL); p = p->next) {
+			mdname_t	*np = p->namep;
+
+			if (meta_stripe_print(sp, np, nlpp, fname, fp,
+			    options, ep) != 0)
+				rval = -1;
+		}
+
+		/* cleanup, return success */
+		metafreenamelist(nlp);
+		return (rval);
+	}
+
+	/* get unit structure */
+	if ((stripep = meta_get_stripe_common(sp, stripenp,
+	    ((options & PRINT_FAST) ? 1 : 0), ep)) == NULL)
+		return (-1);
+
+	/* check for parented */
+	if ((! (options & PRINT_SUBDEVS)) &&
+	    (MD_HAS_PARENT(stripep->common.parent))) {
+		return (0);
+	}
+
+	/* print appropriate detail */
+	if (options & PRINT_SHORT) {
+		if (stripe_print(stripep, fname, fp, options, ep) != 0)
+			return (-1);
+	} else {
+		if (stripe_report(sp, stripep, nlpp, fname, fp, options,
+		    ep) != 0)
+			return (-1);
+	}
+
+	/* Recurse on components that are metadevices */
+	for (row = 0; (row < stripep->rows.rows_len); ++row) {
+		md_row_t	*rp = &stripep->rows.rows_val[row];
+
+		/* look for components that are metadevices */
+		for (comp = 0; (comp < rp->comps.comps_len); ++comp) {
+			md_comp_t	*cp = &rp->comps.comps_val[comp];
+			mdname_t	*namep = cp->compnamep;
+
+			if ((metaismeta(namep)) &&
+			    (meta_print_name(sp, namep, nlpp, fname, fp,
+			    (options | PRINT_HEADER | PRINT_SUBDEVS),
+			    NULL, ep) != 0)) {
+				return (-1);
+			}
+		}
+	}
+	return (0);
+}
+
+/*
+ * find stripe component to replace
+ */
+int
+meta_find_erred_comp(
+	mdsetname_t	*sp,
+	mdname_t	*stripenp,
+	mdname_t	**compnpp,
+	comp_state_t	*compstate,
+	md_error_t	*ep
+)
+{
+	md_stripe_t	*stripep;
+	md_comp_t	*compp = NULL;
+	uint_t		lasterrcnt = 0;
+	uint_t		row;
+
+	/* get stripe */
+	*compnpp = NULL;
+	if ((stripep = meta_get_stripe_common(sp, stripenp, 1, ep)) == NULL)
+		return (-1);
+
+	/*
+	 * Try to find the first erred component.
+	 * If there is not one, then look for the
+	 *	first last_erred component.
+	 */
+	for (row = 0; (row < stripep->rows.rows_len); ++row) {
+		md_row_t	*rp = &stripep->rows.rows_val[row];
+		uint_t		comp;
+
+		for (comp = 0; (comp < rp->comps.comps_len); ++comp) {
+			md_comp_t	*cp = &rp->comps.comps_val[comp];
+
+			if ((cp->state == CS_ERRED) && ((compp == NULL) ||
+			    (cp->lasterrcnt < lasterrcnt))) {
+				compp = cp;
+				lasterrcnt = cp->lasterrcnt;
+			}
+		}
+	}
+	for (row = 0; (row < stripep->rows.rows_len); ++row) {
+		md_row_t	*rp = &stripep->rows.rows_val[row];
+		uint_t		comp;
+
+		for (comp = 0; (comp < rp->comps.comps_len); ++comp) {
+			md_comp_t	*cp = &rp->comps.comps_val[comp];
+
+			if ((cp->state == CS_LAST_ERRED) && ((compp == NULL) ||
+			    (cp->lasterrcnt < lasterrcnt))) {
+				compp = cp;
+				lasterrcnt = cp->lasterrcnt;
+			}
+		}
+	}
+
+	/* return component */
+	if (compp != NULL) {
+		*compnpp = compp->compnamep;
+		*compstate = compp->state;
+	}
+
+	/* return success */
+	return (0);
+}
+
+/*
+ * invalidate component names
+ */
+static int
+invalidate_components(
+	mdsetname_t	*sp,
+	mdname_t	*stripenp,
+	md_error_t	*ep
+)
+{
+	md_stripe_t	*stripep;
+	uint_t		row;
+
+	if ((stripep = meta_get_stripe(sp, stripenp, ep)) == NULL)
+		return (-1);
+	for (row = 0; (row < stripep->rows.rows_len); ++row) {
+		md_row_t	*rp = &stripep->rows.rows_val[row];
+		uint_t		comp;
+
+		for (comp = 0; (comp < rp->comps.comps_len); ++comp) {
+			md_comp_t	*cp = &rp->comps.comps_val[comp];
+			mdname_t	*compnp = cp->compnamep;
+
+			meta_invalidate_name(compnp);
+		}
+	}
+	return (0);
+}
+
+/*
+ * attach components to stripe
+ */
+int
+meta_stripe_attach(
+	mdsetname_t		*sp,
+	mdname_t		*stripenp,
+	mdnamelist_t		*nlp,
+	diskaddr_t		interlace,
+	mdcmdopts_t		options,
+	md_error_t		*ep
+)
+{
+	mdnamelist_t		*lp;
+	ms_unit_t		*old_un, *new_un;
+	struct ms_row		*mdr, *new_mdr;
+	uint_t			newcomps, ncomps, icomp;
+	uint_t			row;
+	size_t			mdsize, first_comp;
+	diskaddr_t		new_blks;
+	diskaddr_t		limit;
+	diskaddr_t		disk_size = 0;
+	ms_comp_t		*mdcomp, *new_comp;
+	uint_t			write_reinstruct = 0;
+	uint_t			read_reinstruct = 0;
+	mdnamelist_t		*keynlp = NULL;
+	uint_t			round_cyl = 1;
+	minor_t			parent;
+	md_grow_params_t	mgp;
+	int			rval = -1;
+	md_timeval32_t		creation_time;
+	int			create_flag = MD_CRO_32BIT;
+
+	/* should have a set */
+	assert(sp != NULL);
+	assert(sp->setno == MD_MIN2SET(meta_getminor(stripenp->dev)));
+
+	/* check type */
+	if (metachkmeta(stripenp, ep) != 0)
+		return (-1);
+
+	/* check and count components */
+	assert(nlp != NULL);
+	newcomps = 0;
+	for (lp = nlp; (lp != NULL); lp = lp->next) {
+		mdname_t	*np = lp->namep;
+		mdnamelist_t	*p;
+
+		/* check against existing devices */
+		if (meta_check_component(sp, np, 0, ep) != 0)
+			return (-1);
+
+		/* check against ourselves */
+		for (p = lp->next; (p != NULL); p = p->next) {
+			if (meta_check_overlap(np->cname, np, 0, -1,
+			    p->namep, 0, -1, ep) != 0) {
+				return (-1);
+			}
+		}
+
+		/* count */
+		++newcomps;
+	}
+
+	/* get old unit */
+	if ((old_un = (ms_unit_t *)meta_get_mdunit(sp, stripenp, ep)) == NULL)
+		return (-1);
+
+	/* if zero, inherit the last rows interlace value */
+	if (interlace == 0) {
+		mdr = &old_un->un_row[old_un->un_nrows - 1];
+		interlace = mdr->un_interlace;
+	}
+
+	/*
+	 * calculate size of new unit structure
+	 */
+
+	/* unit + rows */
+	mdsize = sizeof (ms_unit_t) - sizeof (struct ms_row);
+	mdsize += sizeof (struct ms_row) * (old_un->un_nrows + 1);
+
+	/* number of new components being added */
+	ncomps = newcomps;
+
+	/* count the # of components in the old unit */
+	mdr = &old_un->un_row[0];
+	for (row = 0; (row < old_un->un_nrows); row++)
+		ncomps += mdr[row].un_ncomp;
+	first_comp = roundup(mdsize, sizeof (long long));
+	mdsize += sizeof (ms_comp_t) * ncomps + (first_comp - mdsize);
+
+	/* allocate new unit */
+	new_un = Zalloc(mdsize);
+	new_un->un_ocomp = first_comp;
+
+	/* compute new data */
+	new_mdr = &new_un->un_row[old_un->un_nrows];
+	new_mdr->un_icomp = ncomps - newcomps;
+	new_mdr->un_ncomp = newcomps;
+	new_mdr->un_blocks = 0;
+	new_mdr->un_cum_blocks =
+	    old_un->un_row[old_un->un_nrows - 1].un_cum_blocks;
+	new_mdr->un_interlace = interlace;
+
+	/* for each new device */
+	mdcomp = (struct ms_comp *)(void *)&((char *)new_un)[new_un->un_ocomp];
+	icomp = new_mdr->un_icomp;
+	if (meta_gettimeofday(&creation_time) == -1)
+		return (mdsyserror(ep, errno, NULL));
+	for (lp = nlp; (lp != NULL); lp = lp->next) {
+		mdname_t	*np = lp->namep;
+		diskaddr_t	size, start_blk;
+		mdgeom_t		*geomp;
+
+		/* figure out how big */
+		if ((size = metagetsize(np, ep)) == MD_DISKADDR_ERROR)
+			goto out;
+		if ((start_blk = metagetstart(sp, np, ep)) ==
+		    MD_DISKADDR_ERROR)
+			goto out;
+		if (start_blk >= size) {
+			(void) mdsyserror(ep, ENOSPC, np->cname);
+			goto out;
+		}
+		size -= start_blk;
+		if (newcomps > 1)
+			size = rounddown(size, interlace);
+
+		/* adjust for smallest disk */
+		if (disk_size == 0) {
+			disk_size = size;
+		} else if (size < disk_size) {
+			disk_size = size;
+		}
+
+		/* get worst reinstructs */
+		if ((geomp = metagetgeom(np, ep)) == NULL)
+			goto out;
+		if (geomp->write_reinstruct > write_reinstruct)
+			write_reinstruct = geomp->write_reinstruct;
+		if (geomp->read_reinstruct > read_reinstruct)
+			read_reinstruct = geomp->read_reinstruct;
+
+		/* In dryrun mode (DOIT not set) we must not alter the mddb */
+		if (options & MDCMD_DOIT) {
+			/* store name in namespace */
+			if (add_key_name(sp, np, &keynlp, ep) != 0)
+				goto out;
+		}
+
+		/* build new component */
+		new_comp = &mdcomp[icomp++];
+		new_comp->un_key = np->key;
+		new_comp->un_dev = np->dev;
+		new_comp->un_start_block = start_blk;
+		new_comp->un_mirror.ms_state = CS_OKAY;
+		new_comp->un_mirror.ms_timestamp = creation_time;
+	}
+
+	limit = LLONG_MAX;
+
+	/* compute new size */
+	new_mdr->un_blocks = new_mdr->un_ncomp * disk_size;
+	new_blks = new_mdr->un_cum_blocks + new_mdr->un_blocks;
+	if (new_blks > limit) {
+		new_mdr->un_cum_blocks = limit;
+		new_blks = limit;
+		md_eprintf(dgettext(TEXT_DOMAIN,
+		    "unit size overflow, limit is %lld blocks\n"),
+		    limit);
+	} else {
+		new_mdr->un_cum_blocks += new_mdr->un_blocks;
+	}
+	new_un->c.un_actual_tb = new_mdr->un_cum_blocks;
+	new_un->un_nrows = old_un->un_nrows + 1;
+
+	/* adjust geometry */
+	new_un->c.un_nhead = old_un->c.un_nhead;
+	new_un->c.un_nsect = old_un->c.un_nsect;
+	new_un->c.un_rpm = old_un->c.un_rpm;
+	new_un->c.un_wr_reinstruct = old_un->c.un_wr_reinstruct;
+	new_un->c.un_rd_reinstruct = old_un->c.un_rd_reinstruct;
+	if (meta_adjust_geom((md_unit_t *)new_un, stripenp,
+	    write_reinstruct, read_reinstruct, round_cyl, ep) != 0)
+		goto out;
+
+	/* if in dryrun mode, we are done here. */
+	if ((options & MDCMD_DOIT) == 0)  {
+		if (options & MDCMD_PRINT) {
+			if (newcomps == 1) {
+				(void) printf(dgettext(TEXT_DOMAIN,
+				"%s: attaching component would suceed\n"),
+				stripenp->cname);
+			} else {
+				(void) printf(dgettext(TEXT_DOMAIN,
+				"%s: attaching components would suceed\n"),
+				stripenp->cname);
+			}
+		}
+		rval = 0; /* success */
+		goto out;
+	}
+
+	create_flag = meta_check_devicesize(new_un->c.un_total_blocks);
+
+	/* grow stripe */
+	(void) memset(&mgp, 0, sizeof (mgp));
+	mgp.mnum = MD_SID(old_un);
+	MD_SETDRIVERNAME(&mgp, MD_STRIPE, sp->setno);
+	mgp.size = mdsize;
+	mgp.mdp = (uintptr_t)new_un;
+	mgp.nrows = old_un->un_nrows;
+	if (create_flag == MD_CRO_32BIT) {
+		mgp.options = MD_CRO_32BIT;
+		new_un->c.un_revision = MD_32BIT_META_DEV;
+	} else {
+		mgp.options = MD_CRO_64BIT;
+		new_un->c.un_revision = MD_64BIT_META_DEV;
+	}
+
+	if ((MD_HAS_PARENT(old_un->c.un_parent)) &&
+	    (old_un->c.un_parent != MD_MULTI_PARENT)) {
+		mgp.npar = 1;
+		parent = old_un->c.un_parent;
+		mgp.par = (uintptr_t)(&parent);
+	}
+
+	if (metaioctl(MD_IOCGROW, &mgp, &mgp.mde, NULL) != 0) {
+		(void) mdstealerror(ep, &mgp.mde);
+		goto out;
+	}
+
+	/* clear cache */
+	if (invalidate_components(sp, stripenp, ep) != 0)
+		goto out;
+	meta_invalidate_name(stripenp);
+
+	/* let em know */
+	if (options & MDCMD_PRINT) {
+		if (newcomps == 1) {
+			(void) printf(dgettext(TEXT_DOMAIN,
+			    "%s: component is attached\n"), stripenp->cname);
+		} else {
+			(void) printf(dgettext(TEXT_DOMAIN,
+			    "%s: components are attached\n"), stripenp->cname);
+		}
+		(void) fflush(stdout);
+	}
+
+	/* grow any parents */
+	if (meta_concat_parent(sp, stripenp, ep) != 0)
+		return (-1);
+
+	rval = 0;	/* success */
+
+	/* cleanup, return error */
+out:
+	Free(old_un);
+	Free(new_un);
+	if (options & MDCMD_DOIT) {
+		if (rval != 0)
+			(void) del_key_names(sp, keynlp, NULL);
+		metafreenamelist(keynlp);
+	}
+	return (rval);
+}
+
+/*
+ * get stripe parameters
+ */
+int
+meta_stripe_get_params(
+	mdsetname_t	*sp,
+	mdname_t	*stripenp,
+	ms_params_t	*paramsp,
+	md_error_t	*ep
+)
+{
+	md_stripe_t	*stripep;
+
+	/* should have a set */
+	assert(sp != NULL);
+	assert(sp->setno == MD_MIN2SET(meta_getminor(stripenp->dev)));
+
+	/* check name */
+	if (metachkmeta(stripenp, ep) != 0)
+		return (-1);
+
+	/* get unit */
+	if ((stripep = meta_get_stripe(sp, stripenp, ep)) == NULL)
+		return (-1);
+
+	/* return parameters */
+	(void) memset(paramsp, 0, sizeof (*paramsp));
+	if (stripep->hspnamep == NULL)
+		paramsp->hsp_id = MD_HSP_NONE;
+	else
+		paramsp->hsp_id = stripep->hspnamep->hsp;
+	return (0);
+}
+
+/*
+ * set stripe parameters
+ */
+int
+meta_stripe_set_params(
+	mdsetname_t		*sp,
+	mdname_t		*stripenp,
+	ms_params_t		*paramsp,
+	md_error_t		*ep
+)
+{
+	md_stripe_params_t	msp;
+
+	/* should have a set */
+	assert(sp != NULL);
+	assert(sp->setno == MD_MIN2SET(meta_getminor(stripenp->dev)));
+
+	/* check name */
+	if (metachkmeta(stripenp, ep) != 0)
+		return (-1);
+
+	/* set parameters */
+	(void) memset(&msp, 0, sizeof (msp));
+	MD_SETDRIVERNAME(&msp, MD_STRIPE, sp->setno);
+	msp.mnum = meta_getminor(stripenp->dev);
+	msp.params = *paramsp;
+	if (metaioctl(MD_IOCCHANGE, &msp, &msp.mde, stripenp->cname) != 0)
+		return (mdstealerror(ep, &msp.mde));
+
+	/* clear cache */
+	meta_invalidate_name(stripenp);
+
+	/* return success */
+	return (0);
+}
+
+/*
+ * check for dups in the stripe itself
+ */
+static int
+check_twice(
+	md_stripe_t	*stripep,
+	uint_t		row,
+	uint_t		comp,
+	md_error_t	*ep
+)
+{
+	mdname_t	*stripenp = stripep->common.namep;
+	mdname_t	*thisnp;
+	uint_t		r;
+
+	thisnp = stripep->rows.rows_val[row].comps.comps_val[comp].compnamep;
+	for (r = 0; (r <= row); ++r) {
+		md_row_t	*rp = &stripep->rows.rows_val[r];
+		uint_t		e = ((r == row) ? comp : rp->comps.comps_len);
+		uint_t		c;
+
+		for (c = 0; (c < e); ++c) {
+			md_comp_t	*cp = &rp->comps.comps_val[c];
+			mdname_t	*compnp = cp->compnamep;
+
+			if (meta_check_overlap(stripenp->cname, thisnp, 0, -1,
+			    compnp, 0, -1, ep) != 0) {
+				return (-1);
+			}
+		}
+	}
+	return (0);
+}
+
+/*
+ * default stripe interlace
+ */
+diskaddr_t
+meta_default_stripe_interlace(void)
+{
+	diskaddr_t		interlace;
+
+	/* default to 16k, round up if necessary */
+	interlace = btodb(16 * 1024);
+	if (interlace < btodb(MININTERLACE))
+		interlace = roundup(MININTERLACE, interlace);
+	return (interlace);
+}
+
+/*
+ * convert interlaces
+ */
+int
+meta_stripe_check_interlace(
+	diskaddr_t	interlace,
+	char		*uname,
+	md_error_t	*ep
+)
+{
+	if ((interlace < btodb(MININTERLACE)) ||
+		(interlace > btodb(MAXINTERLACE))) {
+		return (mderror(ep, MDE_BAD_INTERLACE, uname));
+	}
+	return (0);
+}
+
+
+/*
+ * check stripe
+ */
+int
+meta_check_stripe(
+	mdsetname_t	*sp,
+	md_stripe_t	*stripep,
+	mdcmdopts_t	options,
+	md_error_t	*ep
+)
+{
+	mdname_t	*stripenp = stripep->common.namep;
+	int		force = ((options & MDCMD_FORCE) ? 1 : 0);
+	int		doit = ((options & MDCMD_DOIT) ? 1 : 0);
+	int		updateit = ((options & MDCMD_UPDATE) ? 1 : 0);
+	uint_t		row;
+
+	/* check rows */
+	if (stripep->rows.rows_len < 1) {
+		return (mdmderror(ep, MDE_BAD_STRIPE,
+		    meta_getminor(stripenp->dev), stripenp->cname));
+	}
+	for (row = 0; (row < stripep->rows.rows_len); ++row) {
+		md_row_t	*rp = &stripep->rows.rows_val[row];
+		uint_t		comp;
+
+		/* check number */
+		if (rp->comps.comps_len < 1) {
+			return (mdmderror(ep, MDE_BAD_STRIPE,
+			    meta_getminor(stripenp->dev), stripenp->cname));
+		}
+
+		/* compute default interlace */
+		if (rp->interlace == 0) {
+			rp->interlace = meta_default_stripe_interlace();
+		}
+
+		/* check interlace */
+		if (meta_stripe_check_interlace(rp->interlace, stripenp->cname,
+		    ep) != 0) {
+			return (-1);
+		}
+
+		/* check components */
+		for (comp = 0; (comp < rp->comps.comps_len); ++comp) {
+			md_comp_t	*cp = &rp->comps.comps_val[comp];
+			mdname_t	*compnp = cp->compnamep;
+			diskaddr_t	start_blk, size;
+
+			/* check component */
+			if (!updateit) {
+				if (meta_check_component(sp, compnp,
+					force, ep) != 0)
+					return (-1);
+				if (((start_blk = metagetstart(sp, compnp,
+				    ep)) == MD_DISKADDR_ERROR) ||
+				    ((size = metagetsize(compnp, ep)) ==
+				    MD_DISKADDR_ERROR)) {
+					return (-1);
+				}
+				if (start_blk >= size)
+					return (mdsyserror(ep, ENOSPC,
+						compnp->cname));
+				size -= start_blk;
+				size = rounddown(size, rp->interlace);
+				if (size == 0)
+					return (mdsyserror(ep, ENOSPC,
+						compnp->cname));
+			}
+
+			/* check this stripe too */
+			if (check_twice(stripep, row, comp, ep) != 0)
+				return (-1);
+		}
+	}
+
+	/* check hotspare pool name */
+	if (doit) {
+		if ((stripep->hspnamep != NULL) &&
+		    (metachkhsp(sp, stripep->hspnamep, ep) != 0)) {
+			return (-1);
+		}
+	}
+
+	/* return success */
+	return (0);
+}
+
+/*
+ * setup stripe geometry
+ */
+static int
+stripe_geom(
+	md_stripe_t	*stripep,
+	ms_unit_t	*ms,
+	md_error_t	*ep
+)
+{
+	uint_t		nrow = stripep->rows.rows_len;
+	uint_t		write_reinstruct = 0;
+	uint_t		read_reinstruct = 0;
+	uint_t		round_cyl = 1;
+	uint_t		row;
+	mdgeom_t	*geomp;
+	diskaddr_t	first_row_size = 0;
+	char		*miscname;
+	int		is_sp = 0;
+
+	/* get worst reinstructs */
+	for (row = 0; (row < nrow); ++row) {
+		md_row_t	*rp = &stripep->rows.rows_val[row];
+		uint_t		ncomp = rp->comps.comps_len;
+		uint_t		comp;
+
+		for (comp = 0; (comp < ncomp); ++comp) {
+			md_comp_t	*cp = &rp->comps.comps_val[comp];
+			mdname_t	*compnp = cp->compnamep;
+
+			if ((geomp = metagetgeom(compnp, ep)) == NULL)
+				return (-1);
+			if (geomp->write_reinstruct > write_reinstruct)
+				write_reinstruct = geomp->write_reinstruct;
+			if (geomp->read_reinstruct > read_reinstruct)
+				read_reinstruct = geomp->read_reinstruct;
+		}
+	}
+
+	if ((geomp = metagetgeom(
+	    stripep->rows.rows_val[0].comps.comps_val[0].compnamep,
+	    ep)) == NULL) {
+		return (-1);
+	}
+	/*
+	 * Figure out if the first component is a softpartition as the
+	 * truncation check only occurs on them.
+	 */
+	if ((miscname = metagetmiscname(
+	    stripep->rows.rows_val[0].comps.comps_val[0].compnamep,
+	    ep)) == NULL) {
+		if (!mdisdeverror(ep, MDE_NOT_META))
+			return (-1);
+	} else if (strcmp(miscname, MD_SP) == 0) {
+		is_sp = 1;
+	}
+
+
+	/* setup geometry from first device */
+	if (meta_setup_geom((md_unit_t *)ms, stripep->common.namep, geomp,
+	    write_reinstruct, read_reinstruct, round_cyl, ep) != 0)
+		return (-1);
+
+	/*
+	 * Here we want to make sure that any truncation did not
+	 * result in lost data (or, more appropriately, inaccessible
+	 * data).
+	 *
+	 * This is mainly a danger for (1, 1) concats, but it is
+	 * mathematically possible for other somewhat contrived
+	 * arrangements where in the sum of the lengths of each row
+	 * beyond the first is smaller than the cylinder size of the
+	 * only component in the first row.
+	 *
+	 * It is tempting to simply test for truncation here, by
+	 * (md->c.un_total_blocks < md->c.un_actual_tb). That does
+	 * not tell us, however, if rounding resulted in data loss,
+	 * rather only that it occurred. The somewhat less obvious
+	 * test below covers both the obvious (1, 1) case and the
+	 * aforementioned corner case.
+	 */
+	first_row_size = ms->un_row[0].un_blocks;
+	if (is_sp == 1) {
+		md_unit_t	*md = (md_unit_t *)ms;
+
+		if (md->c.un_total_blocks < first_row_size) {
+			char buf[] = VAL2STR(ULLONG_MAX);
+
+			/*
+			 * The only difference here is the text of the error
+			 * message, since the remediation is slightly
+			 * different in the one-component versus
+			 * multiple-component cases.
+			 */
+			if (nrow == 1) {
+				(void) mderror(ep, MDE_STRIPE_TRUNC_SINGLE,
+				    stripep->common.namep->cname);
+			} else {
+				(void) mderror(ep, MDE_STRIPE_TRUNC_MULTIPLE,
+				    stripep->common.namep->cname);
+			}
+
+			/*
+			 * By the size comparison above and the initialization
+			 * of buf[] in terms of ULLONG_MAX, we guarantee that
+			 * the value arg is non-negative and that we won't
+			 * overflow the container.
+			 */
+			mderrorextra(ep, ulltostr((md->c.un_total_blocks +
+			    (geomp->nhead * geomp->nsect))
+			    - first_row_size, &buf[sizeof (buf) - 1]));
+
+			return (-1);
+		}
+	}
+
+	/* return success */
+	return (0);
+}
+
+/*
+ * create stripe
+ */
+int
+meta_create_stripe(
+	mdsetname_t	*sp,
+	md_stripe_t	*stripep,
+	mdcmdopts_t	options,
+	md_error_t	*ep
+)
+{
+	mdname_t	*stripenp = stripep->common.namep;
+	int		force = ((options & MDCMD_FORCE) ? 1 : 0);
+	int		doall = ((options & MDCMD_ALLOPTION) ? 1 : 0);
+	uint_t		nrow = stripep->rows.rows_len;
+	uint_t		ncomp = 0;
+	uint_t		icomp = 0;
+	diskaddr_t	cum_blocks = 0;
+	diskaddr_t	limit;
+	size_t		mdsize, first_comp;
+	uint_t		row;
+	ms_unit_t	*ms;
+	ms_comp_t	*mdcomp;
+	mdnamelist_t	*keynlp = NULL;
+	md_set_params_t	set_params;
+	int		rval = -1;
+	md_timeval32_t	creation_time;
+	int		create_flag = MD_CRO_32BIT;
+
+	/* validate stripe */
+	if (meta_check_stripe(sp, stripep, options, ep) != 0)
+		return (-1);
+
+	/* allocate stripe unit */
+	mdsize = sizeof (*ms) - sizeof (ms->un_row[0]);
+	mdsize += sizeof (ms->un_row) * nrow;
+	for (row = 0; (row < nrow); ++row) {
+		md_row_t	*rp = &stripep->rows.rows_val[row];
+
+		ncomp += rp->comps.comps_len;
+	}
+	first_comp = roundup(mdsize, sizeof (long long));
+	mdsize += (first_comp - mdsize) + (ncomp * sizeof (ms_comp_t));
+	ms = Zalloc(mdsize);
+	ms->un_ocomp = first_comp;
+	if (meta_gettimeofday(&creation_time) == -1)
+		return (mdsyserror(ep, errno, NULL));
+
+	/* do rows */
+	mdcomp = (ms_comp_t *)(void *)&((char *)ms)[ms->un_ocomp];
+	for (row = 0; (row < nrow); ++row) {
+		md_row_t	*rp = &stripep->rows.rows_val[row];
+		uint_t		ncomp = rp->comps.comps_len;
+		struct ms_row	*mdr = &ms->un_row[row];
+		diskaddr_t	disk_size = 0;
+		uint_t		comp;
+
+		/* setup component count and offfset */
+		mdr->un_icomp = icomp;
+		mdr->un_ncomp = ncomp;
+
+		/* do components */
+		for (comp = 0; (comp < ncomp); ++comp) {
+			md_comp_t	*cp = &rp->comps.comps_val[comp];
+			mdname_t	*compnp = cp->compnamep;
+			ms_comp_t	*mdc = &mdcomp[icomp++];
+			diskaddr_t	size, start_blk;
+
+			/*
+			 * get start and size
+			 * if first component is labelled, include label
+			 */
+			if ((size = metagetsize(compnp, ep)) ==
+			    MD_DISKADDR_ERROR)
+				goto out;
+			if ((start_blk = metagetstart(sp, compnp, ep)) ==
+			    MD_DISKADDR_ERROR)
+				goto out;
+			if ((row == 0) && (comp == 0)) {
+				diskaddr_t	label;
+				int		has_db;
+
+				if ((has_db = metahasmddb(sp, compnp, ep)) < 0)
+					goto out;
+				if ((label = metagetlabel(compnp, ep)) ==
+				    MD_DISKADDR_ERROR)
+					goto out;
+				if ((has_db == 0) && (label != 0)) {
+					ms->c.un_flag |= MD_LABELED;
+					start_blk = compnp->start_blk = 0;
+				}
+			}
+			/* make sure we still have something left */
+			if (start_blk >= size) {
+				(void) mdsyserror(ep, ENOSPC, compnp->cname);
+				goto out;
+			}
+			size -= start_blk;
+
+			/*
+			 * round down by interlace: this only applies
+			 * if this row is a stripe, as indicated by
+			 * (ncomp > 1)
+			 */
+			if (ncomp > 1)
+				size = rounddown(size, rp->interlace);
+
+			if (size == 0) {
+				(void) mdsyserror(ep, ENOSPC, compnp->cname);
+				goto out;
+			}
+
+			/*
+			 * adjust for smallest disk: for a concat (any
+			 * row with only one component), this will
+			 * never hit the second conditional.
+			 */
+			if (disk_size == 0) {
+				disk_size = size;
+			} else if (size < disk_size) {
+				disk_size = size;
+			}
+
+			if (options & MDCMD_DOIT) {
+				/* store name in namespace */
+				if (add_key_name(sp, compnp, &keynlp, ep) != 0)
+					goto out;
+			}
+
+			/* setup component */
+			mdc->un_key = compnp->key;
+			mdc->un_dev = compnp->dev;
+			mdc->un_start_block = start_blk;
+			mdc->un_mirror.ms_state = CS_OKAY;
+			mdc->un_mirror.ms_timestamp = creation_time;
+		}
+		limit = LLONG_MAX;
+
+		/* setup row */
+		mdr->un_blocks = mdr->un_ncomp * disk_size;
+		cum_blocks += mdr->un_blocks;
+		if (cum_blocks > limit) {
+			cum_blocks = limit;
+			md_eprintf(dgettext(TEXT_DOMAIN,
+			    "unit size overflow, limit is %lld blocks\n"),
+			    limit);
+		}
+		mdr->un_cum_blocks = cum_blocks;
+		mdr->un_interlace = rp->interlace;
+	}
+
+	/* setup unit */
+	ms->c.un_type = MD_DEVICE;
+	MD_SID(ms) = meta_getminor(stripenp->dev);
+	ms->c.un_actual_tb = cum_blocks;
+	ms->c.un_size = mdsize;
+	if (stripep->hspnamep != NULL)
+		ms->un_hsp_id = stripep->hspnamep->hsp;
+	else
+		ms->un_hsp_id = MD_HSP_NONE;
+	ms->un_nrows = nrow;
+
+	/* fill in the size of the stripe */
+	if (options & MDCMD_UPDATE) {
+		stripep->common.size = ms->c.un_total_blocks;
+		for (row = 0; (row < nrow); ++row) {
+			stripep->rows.rows_val[row].row_size =
+			    ms->un_row[row].un_blocks;
+		}
+	}
+
+	if (stripe_geom(stripep, ms, ep) != 0) {
+		/*
+		 * If the device is being truncated then only allow this
+		 * if the user is aware (using the -f option) or they
+		 * are in a recovery/complete build situation (using the -a
+		 * option).
+		 */
+		if ((mdiserror(ep, MDE_STRIPE_TRUNC_SINGLE) ||
+		    mdiserror(ep, MDE_STRIPE_TRUNC_MULTIPLE)) &&
+		    (force || doall)) {
+			md_eprintf(dgettext(TEXT_DOMAIN,
+"%s: WARNING: This form of metainit is not recommended.\n"
+"The stripe is truncating the size of the underlying device.\n"
+"Please see ERRORS in metainit(1M) for additional information.\n"),
+			    stripenp->cname);
+			mdclrerror(ep);
+		} else {
+			goto out;
+		}
+	}
+
+	create_flag = meta_check_devicesize(ms->c.un_total_blocks);
+
+	/* if we're not doing anything, return success */
+	if (! (options & MDCMD_DOIT)) {
+		rval = 0;	/* success */
+		goto out;
+	}
+
+	/* create stripe */
+	(void) memset(&set_params, 0, sizeof (set_params));
+
+	/* did the user tell us to generate a large device? */
+	if (create_flag == MD_CRO_64BIT) {
+		ms->c.un_revision = MD_64BIT_META_DEV;
+		set_params.options = MD_CRO_64BIT;
+	} else {
+		ms->c.un_revision = MD_32BIT_META_DEV;
+		set_params.options = MD_CRO_32BIT;
+	}
+
+	set_params.mnum = MD_SID(ms);
+	set_params.size = ms->c.un_size;
+	set_params.mdp = (uintptr_t)ms;
+	MD_SETDRIVERNAME(&set_params, MD_STRIPE, MD_MIN2SET(set_params.mnum));
+	if (metaioctl(MD_IOCSET, &set_params, &set_params.mde,
+	    stripenp->cname) != 0) {
+		(void) mdstealerror(ep, &set_params.mde);
+		goto out;
+	}
+	rval = 0;	/* success */
+
+	/* cleanup, return success */
+out:
+	Free(ms);
+	if (rval != 0) {
+		(void) del_key_names(sp, keynlp, NULL);
+	}
+
+	metafreenamelist(keynlp);
+	if ((rval == 0) && (options & MDCMD_DOIT)) {
+		if (invalidate_components(sp, stripenp, ep) != 0)
+			rval = -1;
+		meta_invalidate_name(stripenp);
+	}
+	return (rval);
+}
+
+/*
+ * initialize stripe
+ * NOTE: this functions is metainit(1m)'s command line parser!
+ */
+int
+meta_init_stripe(
+	mdsetname_t	**spp,
+	int		argc,
+	char		*argv[],
+	mdcmdopts_t	options,
+	md_error_t	*ep
+)
+{
+	char		*uname = argv[0];
+	mdname_t	*stripenp = NULL;
+	int		old_optind;
+	int		c;
+	md_stripe_t	*stripep = NULL;
+	uint_t		nrow, row;
+	int		rval = -1;
+
+	/* get stripe name */
+	assert(argc > 0);
+	if (argc < 1)
+		goto syntax;
+
+	if ((stripenp = metaname(spp, uname, ep)) == NULL)
+		goto out;
+	assert(*spp != NULL);
+	uname = stripenp->cname;
+	if (metachkmeta(stripenp, ep) != 0)
+		goto out;
+
+	if (!(options & MDCMD_NOLOCK)) {
+		/* grab set lock */
+		if (meta_lock(*spp, TRUE, ep))
+			goto out;
+
+		if (meta_check_ownership(*spp, ep) != 0)
+			goto out;
+	}
+
+	/* see if it exists already */
+	if (metagetmiscname(stripenp, ep) != NULL) {
+		(void) mdmderror(ep, MDE_UNIT_ALREADY_SETUP,
+		    meta_getminor(stripenp->dev), uname);
+		goto out;
+	} else if (! mdismderror(ep, MDE_UNIT_NOT_SETUP)) {
+		goto out;
+	} else {
+		mdclrerror(ep);
+	}
+	--argc, ++argv;
+
+	/* parse general options */
+	optind = 0;
+	opterr = 0;
+	if (getopt(argc, argv, "") != -1)
+		goto options;
+
+	/* allocate stripe */
+	stripep = Zalloc(sizeof (*stripep));
+
+	/* setup common */
+	stripep->common.namep = stripenp;
+	stripep->common.type = MD_DEVICE;
+
+	/* allocate and parse rows */
+	if (argc < 1) {
+		(void) mdmderror(ep, MDE_NROWS, meta_getminor(stripenp->dev),
+				uname);
+		goto out;
+	} else if ((sscanf(argv[0], "%u", &nrow) != 1) || ((int)nrow < 0)) {
+		goto syntax;
+	} else if (nrow < 1) {
+		(void) mdmderror(ep, MDE_NROWS, meta_getminor(stripenp->dev),
+				uname);
+		goto out;
+	}
+	--argc, ++argv;
+	stripep->rows.rows_len = nrow;
+	stripep->rows.rows_val =
+	    Zalloc(nrow * sizeof (*stripep->rows.rows_val));
+	for (row = 0; (row < nrow); ++row) {
+		md_row_t	*mdr = &stripep->rows.rows_val[row];
+		uint_t		ncomp, comp;
+
+		/* allocate and parse components */
+		if (argc < 1) {
+			(void) mdmderror(ep, MDE_NROWS,
+					meta_getminor(stripenp->dev), uname);
+			goto out;
+		} else if ((sscanf(argv[0], "%u", &ncomp) != 1) ||
+		    ((int)ncomp < 0)) {
+			goto syntax;
+		} else if (ncomp < 1) {
+			(void) mdmderror(ep, MDE_NCOMPS,
+					meta_getminor(stripenp->dev), uname);
+			goto out;
+		}
+		--argc, ++argv;
+		mdr->comps.comps_len = ncomp;
+		mdr->comps.comps_val =
+		    Zalloc(ncomp * sizeof (*mdr->comps.comps_val));
+		for (comp = 0; (comp < ncomp); ++comp) {
+			md_comp_t	*mdc = &mdr->comps.comps_val[comp];
+			mdname_t	*compnp;
+
+			/* parse component name */
+			if (argc < 1) {
+				(void) mdmderror(ep, MDE_NCOMPS,
+				    meta_getminor(stripenp->dev), uname);
+				goto out;
+			}
+			if ((compnp = metaname(spp, argv[0], ep)) == NULL) {
+				goto out;
+			}
+			/* check for soft partition */
+			if (meta_sp_issp(*spp, compnp, ep) != 0) {
+				/* check disk */
+				if (metachkcomp(compnp, ep) != 0) {
+					goto out;
+				}
+			}
+			mdc->compnamep = compnp;
+			--argc, ++argv;
+		}
+
+		/* parse row options */
+		old_optind = optind = 0;
+		opterr = 0;
+		while ((c = getopt(argc, argv, "i:")) != -1) {
+			switch (c) {
+			case 'i':
+				if (parse_interlace(uname, optarg,
+				    &mdr->interlace, ep) != 0) {
+					goto out;
+				}
+				if (meta_stripe_check_interlace(mdr->interlace,
+					uname, ep))
+					goto out;
+				break;
+
+			default:
+				optind = old_optind;	/* bomb out later */
+				goto done_row_opts;
+			}
+			old_optind = optind;
+		}
+done_row_opts:
+		argc -= optind;
+		argv += optind;
+	}
+
+	/* parse stripe options */
+	old_optind = optind = 0;
+	opterr = 0;
+	while ((c = getopt(argc, argv, "h:")) != -1) {
+		switch (c) {
+		case 'h':
+			if ((stripep->hspnamep = metahspname(spp, optarg,
+			    ep)) == NULL) {
+				goto out;
+			}
+			break;
+
+		default:
+			argc += old_optind;
+			argv += old_optind;
+			goto options;
+		}
+		old_optind = optind;
+	}
+	argc -= optind;
+	argv += optind;
+
+	/* we should be at the end */
+	if (argc != 0)
+		goto syntax;
+
+	/* create stripe */
+	if (meta_create_stripe(*spp, stripep, options, ep) != 0)
+		goto out;
+	rval = 0;	/* success */
+
+	/* let em know */
+	if (options & MDCMD_PRINT) {
+		(void) printf(dgettext(TEXT_DOMAIN,
+		    "%s: Concat/Stripe is setup\n"),
+		    uname);
+		(void) fflush(stdout);
+	}
+	goto out;
+
+	/* syntax error */
+syntax:
+	rval = meta_cook_syntax(ep, MDE_SYNTAX, uname, argc, argv);
+	goto out;
+
+	/* options error */
+options:
+	rval = meta_cook_syntax(ep, MDE_OPTION, uname, argc, argv);
+	goto out;
+
+	/* cleanup, return error */
+out:
+	if (stripep != NULL)
+		meta_free_stripe(stripep);
+	return (rval);
+}
+
+/*
+ * reset stripes
+ */
+int
+meta_stripe_reset(
+	mdsetname_t	*sp,
+	mdname_t	*stripenp,
+	mdcmdopts_t	options,
+	md_error_t	*ep
+)
+{
+	md_stripe_t	*stripep;
+	int		rval = -1;
+	int		row, comp;
+
+	/* should have same set */
+	assert(sp != NULL);
+	assert((stripenp == NULL) ||
+	    (sp->setno == MD_MIN2SET(meta_getminor(stripenp->dev))));
+
+	/* reset all stripes */
+	if (stripenp == NULL) {
+		mdnamelist_t	*stripenlp = NULL;
+		mdnamelist_t	*p;
+
+		/* for each stripe */
+		rval = 0;
+		if (meta_get_stripe_names(sp, &stripenlp, 0, ep) < 0)
+			return (-1);
+		for (p = stripenlp; (p != NULL); p = p->next) {
+			/* reset stripe */
+			stripenp = p->namep;
+
+			/*
+			 * If this is a multi-node set, we send a series
+			 * of individual metaclear commands.
+			 */
+			if (meta_is_mn_set(sp, ep)) {
+				if (meta_mn_send_metaclear_command(sp,
+				    stripenp->cname, options, 0, ep) != 0) {
+					rval = -1;
+					break;
+				}
+			} else {
+				if (meta_stripe_reset(sp, stripenp,
+				    options, ep) != 0) {
+					rval = -1;
+					break;
+				}
+			}
+		}
+
+		/* cleanup, return success */
+		metafreenamelist(stripenlp);
+		return (rval);
+	}
+
+	/* check name */
+	if (metachkmeta(stripenp, ep) != 0)
+		return (-1);
+
+	/* get unit structure */
+	if ((stripep = meta_get_stripe(sp, stripenp, ep)) == NULL)
+		return (-1);
+
+	/* make sure nobody owns us */
+	if (MD_HAS_PARENT(stripep->common.parent)) {
+		return (mdmderror(ep, MDE_IN_USE, meta_getminor(stripenp->dev),
+		    stripenp->cname));
+	}
+
+	/* clear subdevices cache */
+	if (invalidate_components(sp, stripenp, ep) != 0)
+		return (-1);
+
+	/* clear metadevice */
+	if (meta_reset(sp, stripenp, options, ep) != 0)
+		goto out;
+	rval = 0;	/* success */
+
+	/* let em know */
+	if (options & MDCMD_PRINT) {
+		(void) printf(dgettext(TEXT_DOMAIN,
+		    "%s: Concat/Stripe is cleared\n"),
+		    stripenp->cname);
+		(void) fflush(stdout);
+	}
+
+	/* clear subdevices */
+	if (! (options & MDCMD_RECURSE))
+		goto out;
+
+	for (row = 0; (row < stripep->rows.rows_len); ++row) {
+		md_row_t	*rp = &stripep->rows.rows_val[row];
+		for (comp = 0; (comp < rp->comps.comps_len); ++comp) {
+			md_comp_t	*cp = &rp->comps.comps_val[comp];
+			mdname_t	*compnp = cp->compnamep;
+
+			/* only recurse on metadevices */
+			if (! metaismeta(compnp))
+				continue;
+
+			if (meta_reset_by_name(sp, compnp, options, ep) != 0)
+				rval = -1;
+		}
+	}
+
+	/* cleanup, return success */
+out:
+	meta_invalidate_name(stripenp);
+	return (rval);
+}
+
+/*
+ * reports TRUE if any stripe component is in error
+ */
+int
+meta_stripe_anycomp_is_err(mdsetname_t *sp, mdnamelist_t *stripe_names)
+{
+	mdnamelist_t	*nlp;
+	md_error_t	  status	= mdnullerror;
+	md_error_t	 *ep		= &status;
+	int		  any_errs	= FALSE;
+
+	for (nlp = stripe_names; nlp; nlp = nlp->next) {
+		md_stripe_t	*stripep;
+		int		 row;
+
+		if ((stripep = meta_get_stripe(sp, nlp->namep, ep)) == NULL) {
+			any_errs |= TRUE;
+			goto out;
+		}
+
+		for (row = 0; row < stripep->rows.rows_len; ++row) {
+			md_row_t	*rp	= &stripep->rows.rows_val[row];
+			uint_t		 comp;
+
+			for (comp = 0; comp < rp->comps.comps_len; ++comp) {
+				md_comp_t *cp	= &rp->comps.comps_val[comp];
+
+				if (cp->state != CS_OKAY) {
+					any_errs |= TRUE;
+					goto out;
+				}
+			}
+		}
+	}
+out:
+	if (!mdisok(ep))
+		mdclrerror(ep);
+
+	return (any_errs);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_systemfile.c b/usr/src/lib/lvm/libmeta/common/meta_systemfile.c
new file mode 100644
index 0000000000..9e5e20f057
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_systemfile.c
@@ -0,0 +1,475 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2003 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+/*
+ * Just in case we're not in a build environment, make sure that
+ * TEXT_DOMAIN gets set to something.
+ */
+#if !defined(TEXT_DOMAIN)
+#define	TEXT_DOMAIN "SYS_TEST"
+#endif
+
+/*
+ * patch /kernel/drv/md.conf file
+ */
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <meta.h>
+#include <sys/lvm/md_mddb.h>
+
+/*
+ * magic strings in system
+ */
+#define	BEGROOTSTR	"* Begin MDD root info (do not edit)\n"
+#define	ENDROOTSTR	"* End MDD root info (do not edit)\n"
+#define	BEGMDDBSTR	"# Begin MDD database info (do not edit)\n"
+#define	ENDMDDBSTR	"# End MDD database info (do not edit)\n"
+
+/*
+ * copy system file, yank root and database lines
+ */
+int
+meta_systemfile_copy(
+	char		*sname,		/* system file name */
+	int		doroot,		/* remove mdd root stuff */
+	int		domddb,		/* remove mdd database stuff */
+	int		doit,		/* really copy file */
+	int		verbose,	/* show what we're doing */
+	char		**tname,	/* returned temp file name */
+	FILE		**tfp,		/* returned open FILE */
+	md_error_t	*ep		/* returned error */
+)
+{
+	FILE		*fp;
+	struct stat	sbuf;
+	char		buf[MDDB_BOOTLIST_MAX_LEN];
+	int		delroot = 0;
+	int		delmddb = 0;
+
+	/* check names */
+	assert(sname != NULL);
+	assert(tname != NULL);
+	assert(tfp != NULL);
+
+	/* get temp name */
+	*tfp = NULL;
+	*tname = Malloc(strlen(sname) + strlen(".tmp") + 1);
+	(void) strcpy(*tname, sname);
+	(void) strcat(*tname, ".tmp");
+
+	/* copy system file, yank stuff */
+	if (((fp = fopen(sname, "r")) == NULL) ||
+	    (fstat(fileno(fp), &sbuf) != 0)) {
+		if (errno != ENOENT) {
+			(void) mdsyserror(ep, errno, sname);
+			goto out;
+		}
+	}
+	if (doit) {
+		if ((*tfp = fopen(*tname, "w")) == NULL) {
+			/*
+			 * If we are on the miniroot we need to create
+			 * files in /var/tmp. Opening a writable file
+			 * in the miniroot result is EROFS error.
+			 */
+			if (errno != EROFS) {
+				(void) mdsyserror(ep, errno, *tname);
+				goto out;
+			}
+			Free(*tname);
+			*tname = tempnam("/var/tmp", "svm_");
+			if (*tname == NULL) {
+				(void) mdsyserror(ep, errno, NULL);
+				goto out;
+			}
+			if ((*tfp = fopen(*tname, "w")) == NULL) {
+				(void) mdsyserror(ep, errno, *tname);
+				goto out;
+			}
+		}
+		if (fp != NULL) {
+			if ((fchmod(fileno(*tfp), (sbuf.st_mode & 0777))
+			    != 0) ||
+			    (fchown(fileno(*tfp), sbuf.st_uid, sbuf.st_gid)
+			    != 0)) {
+				(void) mdsyserror(ep, errno, *tname);
+				goto out;
+			}
+		}
+	}
+	if (verbose) {
+		(void) printf(dgettext(TEXT_DOMAIN,
+		    "Delete the following lines from %s:\n\n"), sname);
+	}
+	while ((fp != NULL) && (fgets(buf, sizeof (buf), fp) != NULL)) {
+		if ((doroot) && (strcmp(buf, BEGROOTSTR) == 0)) {
+			delroot = 1;
+			if (verbose)
+				(void) printf("%s", buf);
+			continue;
+		}
+		if (delroot) {
+			if (strcmp(buf, ENDROOTSTR) == 0)
+				delroot = 0;
+			if (verbose)
+				(void) printf("%s", buf);
+			continue;
+		}
+		if ((domddb) && (strcmp(buf, BEGMDDBSTR) == 0)) {
+			delmddb = 1;
+			if (verbose)
+				(void) printf("%s", buf);
+			continue;
+		}
+		if (delmddb) {
+			if (strcmp(buf, ENDMDDBSTR) == 0)
+				delmddb = 0;
+			if (verbose)
+				(void) printf("%s", buf);
+			continue;
+		}
+		if (doit) {
+			if (fputs(buf, *tfp) == EOF) {
+				(void) mdsyserror(ep, errno, *tname);
+				goto out;
+			}
+		}
+	}
+	if (fp != NULL) {
+		if ((! feof(fp)) ||
+		    (fclose(fp) != 0)) {
+			(void) mdsyserror(ep, errno, sname);
+			goto out;
+		}
+		fp = NULL;
+	}
+	if (verbose)
+		(void) printf("\n");
+
+	/* make sure we didn't stop mid-delete */
+	if ((delroot) || (delmddb)) {
+		(void) mderror(ep, MDE_SYSTEM_FILE, sname);
+		goto out;
+	}
+
+	/* flush stuff */
+	if (doit) {
+		if ((fflush(*tfp) != 0) ||
+		    (fsync(fileno(*tfp)) != 0)) {
+			(void) mdsyserror(ep, errno, *tname);
+			goto out;
+		}
+	}
+
+	/* return success */
+	return (0);
+
+	/* cleanup, return error */
+out:
+	if (fp != NULL)
+		(void) fclose(fp);
+	if (*tname != NULL) {
+		(void) unlink(*tname);
+		Free(*tname);
+	}
+	if (*tfp != NULL)
+		(void) fclose(*tfp);
+	return (-1);
+}
+
+/*
+ * append root on MD lines to system
+ */
+int
+meta_systemfile_append_mdroot(
+	mdname_t	*rootnp,	/* root device name */
+	char		*sname,		/* system file name */
+	char		*tname,		/* temp file name */
+	FILE		*tfp,		/* temp FILE */
+	int		ismeta,		/* is a metadevice */
+	int		doit,		/* really patch file */
+	int		verbose,	/* show what we're doing */
+	md_error_t	*ep
+)
+{
+	char		*longblkname;
+
+	/* check names */
+	assert(sname != NULL);
+	assert(tname != NULL);
+	assert(!doit || tfp != NULL);
+
+	/* get root /devices name */
+	if ((longblkname = metagetdevicesname(rootnp, ep)) == NULL)
+		return (-1);
+
+	/* add header */
+	if (verbose) {
+		(void) printf(dgettext(TEXT_DOMAIN,
+		    "Add the following lines to %s:\n\n"), sname);
+		(void) printf("%s", BEGROOTSTR);
+	}
+	if (doit) {
+		if (fprintf(tfp, "%s", BEGROOTSTR) == EOF) {
+			return (mdsyserror(ep, errno, tname));
+		}
+	}
+
+	/* add rootdev */
+	if (ismeta) {
+		if (verbose)
+			(void) printf("rootdev:%s\n", longblkname);
+		if (doit) {
+			if (fprintf(tfp, "rootdev:%s\n", longblkname) == EOF) {
+				return (mdsyserror(ep, errno, tname));
+			}
+		}
+	}
+
+	/* add trailer */
+	if (verbose) {
+		(void) printf("%s\n", ENDROOTSTR);
+	}
+	if (doit) {
+		if (fprintf(tfp, "%s", ENDROOTSTR) == EOF) {
+			return (mdsyserror(ep, errno, tname));
+		}
+	}
+
+	/* flush stuff */
+	if (doit) {
+		if ((fflush(tfp) != 0) ||
+		    (fsync(fileno(tfp)) != 0)) {
+			return (mdsyserror(ep, errno, tname));
+		}
+	}
+
+	/* return success */
+	return (0);
+}
+
+/*
+ * parse mddb.cf line
+ *
+ * Caller of this routine needs to free the device id string that
+ * is passed back during a successful return.
+ */
+static int
+confline(
+	char		*line,		/* line in file */
+	char		**driver,	/* returned driver name */
+	minor_t		*mnump,		/* returned minor number */
+	daddr_t		*block,		/* returned block offset */
+	char		**devid_char_pp	/* returned device id string */
+)
+{
+	char		*p = line;
+	int		chksum = 0;
+	int		i;
+	uint_t		devid_size;
+
+	if (*p == '#') {
+		return (-1);
+	}
+	*driver = p;
+	while ((*p != ' ') && (*p != '\t'))
+		chksum += *p++;
+	if (*driver == p) {
+		return (-1);
+	}
+	*p++ = '\0';
+	*mnump = strtoul(p, &p, 10);
+	chksum += *mnump;
+	*block = strtol(p, &p, 10);
+	chksum += *block;
+
+	/* parse out devid */
+	while ((*p == ' ') || (*p == '\t')) {
+		p++;
+	}
+	i = strcspn(p, " \t");
+	*devid_char_pp = Malloc(i+1);
+	(void) strncpy(*devid_char_pp, p, i);
+	(*devid_char_pp)[i] = '\0';
+	devid_size = i;
+	p += devid_size;
+	for (i = 0; i < devid_size; i++) {
+		chksum += (*devid_char_pp)[i];
+	}
+
+	chksum += strtol(p, &p, 10);
+	if (chksum != 42) {
+		Free (*devid_char_pp);
+		devid_char_pp = NULL;
+		return (-1);
+	}
+	return (0);
+}
+
+/*
+ * append MDDB lines to system
+ */
+int
+meta_systemfile_append_mddb(
+	char		*cname,		/* mddb.cf file name */
+	char		*sname,		/* system file name */
+	char		*tname,		/* temp file name */
+	FILE		*tfp,		/* temp FILE */
+	int		doit,		/* really patch file */
+	int		verbose,	/* show what we're doing */
+	md_error_t	*ep		/* returned error */
+)
+{
+	FILE		*cfp = NULL;
+	char		buf[1024];
+	char		*p;
+	int		i;
+	char		*driver;
+	minor_t		mnum;
+	daddr_t		block;
+	char		line[MDDB_BOOTLIST_MAX_LEN];
+	char		entry[MDDB_BOOTLIST_MAX_LEN];
+	char		*devid_char_p = NULL;
+	struct stat	statbuf;
+
+	/* check names */
+	assert(cname != NULL);
+	assert(sname != NULL);
+	assert(tname != NULL);
+	assert(!doit || tfp != NULL);
+
+	/* open database conf file */
+	if ((cfp = fopen(cname, "r")) == NULL) {
+		(void) mdsyserror(ep, errno, cname);
+		goto out;
+	}
+	/* Check that it is an ordinary file */
+	if (stat(cname, &statbuf) != 0) {
+		(void) mdsyserror(ep, errno, cname);
+		goto out;
+	}
+	if ((statbuf.st_mode & S_IFMT) != S_IFREG) {
+		(void) mderror(ep, MDE_MDDB_FILE, cname);
+		goto out;
+	}
+
+	/* add header */
+	if (verbose) {
+		(void) printf(dgettext(TEXT_DOMAIN,
+		    "Add the following lines to %s:\n\n"), sname);
+		(void) printf("%s", BEGMDDBSTR);
+	}
+	if (doit) {
+		if (fprintf(tfp, "%s", BEGMDDBSTR) == EOF) {
+			(void) mdsyserror(ep, errno, tname);
+			goto out;
+		}
+	}
+
+	/* append database lines */
+	while (((p = fgets(buf, sizeof (buf), cfp)) != NULL) &&
+	    (confline(buf, &driver, &mnum, &block, &devid_char_p) != 0))
+		;
+	for (i = 1; ((p != NULL) && (i <= MDDB_MAX_PATCH)); ++i) {
+		(void) snprintf(line, sizeof (line),
+		    "mddb_bootlist%d=\"%s:%lu:%ld:%s",
+		    i, driver, mnum, block, devid_char_p);
+		if (devid_char_p != NULL) {
+			free(devid_char_p);
+			devid_char_p = NULL;
+		}
+
+		while ((p = fgets(buf, sizeof (buf), cfp)) != NULL) {
+			if (confline(buf, &driver, &mnum, &block,
+			    &devid_char_p) != 0) {
+				continue;
+			}
+			(void) snprintf(entry, sizeof (entry), " %s:%lu:%ld:%s",
+			    driver, mnum, block, devid_char_p);
+
+			if ((strlen(line) + strlen(entry) + 4) > sizeof (line))
+				break;
+			(void) strcat(line, entry);
+			if (devid_char_p != NULL) {
+				free(devid_char_p);
+				devid_char_p = NULL;
+			}
+		}
+		if (verbose)
+			/* CSTYLED */
+			(void) printf("%s\";\n", line);
+		if (doit) {
+			/* CSTYLED */
+			if (fprintf(tfp, "%s\";\n", line) <= 0) {
+				(void) mdsyserror(ep, errno, tname);
+				goto out;
+			}
+		}
+	}
+
+	if (devid_char_p != NULL) {
+		free(devid_char_p);
+		devid_char_p = NULL;
+	}
+
+	/* add trailer */
+	if (verbose)
+		(void) printf("%s\n", ENDMDDBSTR);
+	if (doit) {
+		if (fprintf(tfp, "%s", ENDMDDBSTR) == EOF) {
+			(void) mdsyserror(ep, errno, tname);
+			goto out;
+		}
+	}
+
+	/* close database conf file */
+	if (fclose(cfp) != 0) {
+		cfp = NULL;
+		(void) mdsyserror(ep, errno, cname);
+		goto out;
+	}
+	cfp = NULL;
+
+	/* flush stuff */
+	if (doit) {
+		if ((fflush(tfp) != 0) ||
+		    (fsync(fileno(tfp)) != 0)) {
+			(void) mdsyserror(ep, errno, tname);
+			goto out;
+		}
+	}
+
+	/* return success */
+	return (0);
+
+	/* cleanup, return error */
+out:
+	if (cfp != NULL)
+		(void) fclose(cfp);
+	return (-1);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_tab.c b/usr/src/lib/lvm/libmeta/common/meta_tab.c
new file mode 100644
index 0000000000..7e1ed32a6b
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_tab.c
@@ -0,0 +1,342 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+/*
+ * Just in case we're not in a build environment, make sure that
+ * TEXT_DOMAIN gets set to something.
+ */
+#if !defined(TEXT_DOMAIN)
+#define	TEXT_DOMAIN "SYS_TEST"
+#endif
+
+#include <meta.h>
+
+#include <ctype.h>
+
+/*
+ * free md.tab struct
+ */
+void
+meta_tab_free(
+	md_tab_t	*tabp
+)
+{
+	size_t		line;
+
+	Free(tabp->filename);
+	Free(tabp->data);
+	if (tabp->lines != NULL) {
+		assert(tabp->alloc > 0);
+		for (line = 0; (line < tabp->nlines); ++line) {
+			md_tab_line_t	*linep = &tabp->lines[line];
+
+			if (linep->context != NULL)
+				Free(linep->context);
+			if (linep->cname != NULL)
+				Free(linep->cname);
+			if (linep->argv != NULL) {
+				assert(linep->alloc > 0);
+				Free(linep->argv);
+			}
+		}
+		Free(tabp->lines);
+	}
+	Free(tabp);
+}
+
+/*
+ * (re)allocate argv array
+ */
+static void
+realloc_argv(
+	md_tab_line_t	*linep,
+	size_t		argc
+)
+{
+	/* allocate in chunks */
+	argc = roundup(argc, TAB_ARG_ALLOC);
+	if (argc < linep->alloc)
+		return;
+
+	/* (re)allocate */
+	if (linep->alloc == 0) {
+		linep->argv = Malloc(argc * sizeof (*linep->argv));
+	} else {
+		assert(linep->argv != NULL);
+		linep->argv =
+		    Realloc(linep->argv, (argc * sizeof (*linep->argv)));
+	}
+
+	/* zero out new stuff */
+	(void) memset(&linep->argv[linep->alloc], 0,
+	    ((argc - linep->alloc) * sizeof (*linep->argv)));
+
+	/* adjust for new size */
+	linep->alloc = argc;
+}
+
+/*
+ * (re)allocate line array
+ */
+static void
+realloc_lines(
+	md_tab_t	*tabp,
+	size_t		nlines
+)
+{
+	/* allocate in chunks */
+	nlines = roundup(nlines, TAB_LINE_ALLOC);
+	if (nlines < tabp->alloc)
+		return;
+
+	/* (re)allocate */
+	if (tabp->alloc == 0) {
+		assert(tabp->lines == NULL);
+		tabp->lines = Malloc(nlines * sizeof (*tabp->lines));
+	} else {
+		assert(tabp->lines != NULL);
+		tabp->lines =
+		    Realloc(tabp->lines, (nlines * sizeof (*tabp->lines)));
+	}
+
+	/* zero out new stuff */
+	(void) memset(&tabp->lines[tabp->alloc], 0,
+	    ((nlines - tabp->alloc) * sizeof (*tabp->lines)));
+
+	/* adjust for new size */
+	tabp->alloc = nlines;
+}
+
+/*
+ * parse up md.tab struct
+ */
+static void
+parse_tab(
+	md_tab_t	*tabp
+)
+{
+	uint_t		lineno = 1;
+	char		*p = tabp->data;
+	char		*e = tabp->data + tabp->total - 1;
+	char		*context;
+	size_t		len;
+
+	/* we can count on '\n\0' as the last characters */
+	assert(tabp->total >= 2);
+	assert(tabp->data[tabp->total - 2] == '\n');
+	assert(tabp->data[tabp->total - 1] == '\0');
+
+	/* allocate context buffer "file line XXX" */
+	assert(tabp->filename != NULL);
+	len = strlen(tabp->filename) +
+	    strlen(dgettext(TEXT_DOMAIN, "%s line %u")) + 20 + 1;
+	context = Malloc(len);
+
+	/* parse lines */
+	while (p < e) {
+		md_tab_line_t	*linep;
+		char		*t;
+
+		/* allocate new line */
+		realloc_lines(tabp, (tabp->nlines + 1));
+		linep = &tabp->lines[tabp->nlines];
+		(void) snprintf(context, len,
+		    dgettext(TEXT_DOMAIN, "%s line %u"), tabp->filename,
+		    lineno);
+
+		/* comments */
+		if (*p == '#') {
+			while (*p != '\n')
+				++p;
+		}
+
+		/* coalesce \ continuations */
+		t = p;
+		while (*t != '\n') {
+			if ((*t == '\\') && (*(t + 1) == '\n')) {
+				*t++ = ' ';
+				*t = ' ';
+				++lineno;
+			}
+			++t;
+		}
+
+		/* leading whitespace */
+		while ((*p != '\n') && (isspace(*p)))
+			++p;
+
+		/* count lines */
+		if (*p == '\n') {
+			++p;
+			++lineno;
+			continue;
+		}
+
+		/* tokenize line */
+		while ((p < e) && (*p != '\n')) {
+			char	**argvp;
+
+			/* allocate new token */
+			realloc_argv(linep, (linep->argc + 1));
+			argvp = &linep->argv[linep->argc++];
+
+			/* find end of token */
+			*argvp = p;
+			while ((*p != '\n') && (! isspace(*p)))
+				++p;
+
+			/* terminate */
+			if (*p == '\n') {
+				*p++ = '\0';
+				++lineno;
+				break;
+			}
+
+			/* eat white space */
+			*p++ = '\0';
+			while ((p < e) && (*p != '\n') && (isspace(*p)))
+				++p;
+		}
+		tabp->nlines++;
+
+		/* fill in the rest */
+		assert((linep->argc > 0) && (linep->argv != NULL) &&
+		    (linep->argv[0][0] != '\0') &&
+		    (! isspace(linep->argv[0][0])));
+		linep->context = Strdup(context);
+		linep->type = meta_get_init_type(linep->argc, linep->argv);
+		linep->cname = Strdup(meta_canonicalize(NULL, linep->argv[0]));
+		assert(linep->cname != NULL);
+	}
+
+	/* cleanup */
+	Free(context);
+}
+
+/*
+ * read in md.tab file and return struct
+ */
+md_tab_t *
+meta_tab_parse(
+	char		*filename,
+	md_error_t	*ep
+)
+{
+	md_tab_t	*tabp = NULL;
+	int		fd = -1;
+	struct stat	statbuf;
+	size_t		sofar;
+	char		*p;
+
+	/* open tab file */
+	if (filename == NULL)
+		filename = METATAB;
+	if ((fd = open(filename, O_RDONLY, 0)) < 0) {
+		(void) mdsyserror(ep, errno, filename);
+		goto out;
+	}
+	if (fstat(fd, &statbuf) != 0) {
+		(void) mdsyserror(ep, errno, filename);
+		goto out;
+	}
+
+	/* allocate table */
+	tabp = Zalloc(sizeof (*tabp));
+	tabp->filename = Strdup(filename);
+	tabp->total = statbuf.st_size + 2;	/* terminating "\n\0" */
+	tabp->data = Malloc(tabp->total);
+
+	/* read in data */
+	sofar = 0;
+	p = tabp->data;
+	while (sofar < statbuf.st_size) {
+		int	cnt;
+
+		if ((cnt = read(fd, p, 8192)) < 0) {
+			(void) mdsyserror(ep, errno, filename);
+			goto out;
+		} else if (cnt == 0) {
+			(void) mderror(ep, MDE_SYNTAX, filename);
+			goto out;
+		}
+		sofar += cnt;
+		p += cnt;
+	}
+	tabp->data[tabp->total - 2] = '\n';
+	tabp->data[tabp->total - 1] = '\0';
+
+	/* close file */
+	if (close(fd) != 0) {
+		(void) mdsyserror(ep, errno, filename);
+		fd = -1;
+		goto out;
+	}
+	fd = -1;
+
+	/* parse it up */
+	parse_tab(tabp);
+
+	/* return success */
+	return (tabp);
+
+	/* cleanup, return error */
+out:
+	if (fd >= 0)
+		(void) close(fd);
+	if (tabp != NULL)
+		meta_tab_free(tabp);
+	return (NULL);
+}
+
+/*
+ * find line in md.tab
+ */
+md_tab_line_t *
+meta_tab_find(
+	mdsetname_t	*sp,
+	md_tab_t	*tabp,
+	char		*name,
+	mdinittypes_t	type
+)
+{
+	char		*cname = meta_canonicalize(sp, name);
+	size_t		line;
+
+	for (line = 0; (line < tabp->nlines); ++line) {
+		md_tab_line_t	*linep = &tabp->lines[line];
+
+		assert((linep->argc > 0) && (linep->argv[0] != NULL));
+		if (((linep->type & type) != 0) &&
+		    (strcmp(linep->cname, cname) == 0)) {
+			Free(cname);
+			return (linep);
+		}
+	}
+	Free(cname);
+	return (NULL);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_time.c b/usr/src/lib/lvm/libmeta/common/meta_time.c
new file mode 100644
index 0000000000..ace6483a08
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_time.c
@@ -0,0 +1,53 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2002 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+/*
+ * 32-bit only version of gettimeofday
+ */
+
+#include <sys/time.h>
+#include <sys/types32.h>
+#include <meta.h>
+
+int
+meta_gettimeofday(md_timeval32_t *tv32)
+{
+	struct timeval tv;
+	int retval;
+
+	if (tv32 == NULL)
+		return (0);
+
+	if ((retval = gettimeofday(&tv, NULL)) == 0) {
+	    tv32->tv_sec = (time32_t)tv.tv_sec;
+	    tv32->tv_usec = (int32_t)tv.tv_usec;
+	    return (0);
+	}
+
+	return (retval);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_trans.c b/usr/src/lib/lvm/libmeta/common/meta_trans.c
new file mode 100644
index 0000000000..e350e2d2d5
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_trans.c
@@ -0,0 +1,1761 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+/*
+ * Just in case we're not in a build environment, make sure that
+ * TEXT_DOMAIN gets set to something.
+ */
+#if !defined(TEXT_DOMAIN)
+#define	TEXT_DOMAIN "SYS_TEST"
+#endif
+
+/*
+ * trans operations
+ */
+
+#include <meta.h>
+#include <meta_basic.h>
+#include <sys/lvm/md_trans.h>
+#include <sys/wait.h>
+#include <sys/mnttab.h>
+#include <stddef.h>
+
+extern char *getfullblkname();
+
+/*
+ * replace trans
+ */
+
+int
+meta_trans_replace(mdsetname_t *sp, mdname_t *transnp, mdname_t *oldnp,
+    mdname_t *newnp, mdcmdopts_t options, md_error_t *ep)
+{
+	replace_params_t	params;
+	md_dev64_t		old_dev,
+				new_dev;
+	daddr_t			new_start_blk,
+				new_end_blk;
+
+	/* should have same set */
+	assert(sp != NULL);
+	assert(sp->setno == MD_MIN2SET(meta_getminor(transnp->dev)));
+
+	new_dev = newnp->dev;
+	new_start_blk = newnp->start_blk;
+	new_end_blk = newnp->end_blk;
+
+	meta_invalidate_name(transnp);
+	/* the old device binding is now established */
+	if ((old_dev = oldnp->dev) == NODEV64)
+		return (mdsyserror(ep, ENODEV, oldnp->cname));
+
+	if (((strcmp(oldnp->rname, newnp->rname) == 0) &&
+	    (old_dev != new_dev))) {
+		newnp->dev = new_dev;
+		newnp->start_blk = new_start_blk;
+		newnp->end_blk = new_end_blk;
+	}
+
+	if (add_key_name(sp, newnp, NULL, ep) != 0)
+		return (-1);
+
+	(void) memset(&params, 0, sizeof (params));
+	params.mnum = meta_getminor(transnp->dev);
+	MD_SETDRIVERNAME(&params, MD_TRANS, sp->setno);
+
+	params.cmd = REPLACE_COMP;
+	params.old_dev = old_dev;
+	params.new_dev = new_dev;
+	params.new_key = newnp->key;
+	if (metaioctl(MD_IOCREPLACE, &params, &params.mde, NULL) != 0) {
+		(void) del_key_name(sp, newnp, ep);
+		return (mdstealerror(ep, &params.mde));
+	}
+	meta_invalidate_name(oldnp);
+	meta_invalidate_name(newnp);
+	meta_invalidate_name(transnp);
+
+	if (options & MDCMD_PRINT) {
+		(void) printf(dgettext(TEXT_DOMAIN,
+		    "%s: device %s is replaced with %s\n"),
+		    transnp->cname, oldnp->cname, newnp->cname);
+	}
+	return (0);
+}
+
+
+
+/*
+ * FUNCTION:	meta_get_trans_names()
+ * INPUT:	sp	- the set name to get trans from
+ *		options	- options from the command line
+ * OUTPUT:	nlpp	- list of all trans names
+ *		ep	- return error pointer
+ * RETURNS:	int	- -1 if error, 0 success
+ * PURPOSE:	returns a list of all trans in the metadb
+ *		for all devices in the specified set
+ */
+int
+meta_get_trans_names(
+	mdsetname_t	*sp,
+	mdnamelist_t	**nlpp,
+	int		options,
+	md_error_t	*ep
+)
+{
+	return (meta_get_names(MD_TRANS, sp, nlpp, options, ep));
+}
+
+/*
+ * free trans unit
+ */
+void
+meta_free_trans(
+	md_trans_t	*transp
+)
+{
+	Free(transp);
+}
+
+/*
+ * get trans (common)
+ */
+md_trans_t *
+meta_get_trans_common(
+	mdsetname_t	*sp,
+	mdname_t	*transnp,
+	int		fast,
+	md_error_t	*ep
+)
+{
+	mddrivename_t	*dnp = transnp->drivenamep;
+	char		*miscname;
+	mt_unit_t	*mt;
+	md_trans_t	*transp;
+	int		gotlog;
+
+	/* must have set */
+	assert(sp != NULL);
+	assert(sp->setno == MD_MIN2SET(meta_getminor(transnp->dev)));
+
+	/* short circuit */
+	if (dnp->unitp != NULL) {
+		assert(dnp->unitp->type == MD_METATRANS);
+		return ((md_trans_t *)dnp->unitp);
+	}
+
+	/* get miscname and unit */
+	if ((miscname = metagetmiscname(transnp, ep)) == NULL)
+		return (NULL);
+	if (strcmp(miscname, MD_TRANS) != 0) {
+		(void) mdmderror(ep, MDE_NOT_MT,
+		    meta_getminor(transnp->dev), transnp->cname);
+		return (NULL);
+	}
+	if ((mt = (mt_unit_t *)meta_get_mdunit(sp, transnp, ep)) == NULL)
+		return (NULL);
+	assert(mt->c.un_type == MD_METATRANS);
+
+	/* allocate trans */
+	transp = Zalloc(sizeof (*transp));
+
+	/* get common info */
+	transp->common.namep = transnp;
+	transp->common.type = mt->c.un_type;
+	transp->common.state = mt->c.un_status;
+	transp->common.capabilities = mt->c.un_capabilities;
+	transp->common.parent = mt->c.un_parent;
+	transp->common.size = mt->c.un_total_blocks;
+	transp->common.user_flags = mt->c.un_user_flags;
+	transp->common.revision = mt->c.un_revision;
+
+	/* get master */
+	transp->masternamep = metakeyname(&sp, mt->un_m_key, fast, ep);
+	if (transp->masternamep == NULL)
+		goto out;
+
+	/* get log */
+	gotlog = ((mt->un_flags & TRANS_DETACHED) == 0);
+	if (gotlog) {
+		daddr_t	sblk;
+
+		transp->lognamep = metakeyname(&sp, mt->un_l_key, fast, ep);
+		if (transp->lognamep == NULL)
+			goto out;
+
+		/* calculate the kernels start block */
+		sblk = mt->un_l_pwsblk + mt->un_l_maxtransfer;
+
+		if (getenv("META_DEBUG_START_BLK") != NULL) {
+			if (metagetstart(sp, transp->lognamep, ep) ==
+			    MD_DISKADDR_ERROR)
+				mdclrerror(ep);
+
+			if (transp->lognamep->start_blk > sblk)
+				md_eprintf(dgettext(TEXT_DOMAIN,
+				    "%s: suspected bad start block [trans]\n"),
+				    transp->lognamep->cname);
+		}
+
+		/* override any start_blk */
+		transp->lognamep->start_blk = sblk;
+	}
+
+	/* get flags, etc. */
+	transp->flags = mt->un_flags;
+	transp->timestamp = mt->un_timestamp;
+	transp->log_error = mt->un_l_error;
+	transp->log_timestamp = mt->un_l_timestamp;
+	transp->log_size = mt->un_l_nblks;
+	transp->debug = mt->un_debug;
+
+	/* cleanup, return success */
+	Free(mt);
+	dnp->unitp = (md_common_t *)transp;
+	return (transp);
+
+	/* cleanup, return error */
+out:
+	Free(mt);
+	meta_free_trans(transp);
+	return (NULL);
+}
+
+/*
+ * get trans
+ */
+md_trans_t *
+meta_get_trans(
+	mdsetname_t	*sp,
+	mdname_t	*transnp,
+	md_error_t	*ep
+)
+{
+	return (meta_get_trans_common(sp, transnp, 0, ep));
+}
+
+/*
+ * check trans for dev
+ */
+static int
+in_trans(
+	mdsetname_t	*sp,
+	mdname_t	*transnp,
+	mdname_t	*np,
+	mdchkopts_t	options,
+	diskaddr_t	slblk,
+	diskaddr_t	nblks,
+	md_error_t	*ep
+)
+{
+	md_trans_t	*transp;
+	mdname_t	*masternp;
+	mdname_t	*lognp;
+
+	/* should be in the same set */
+	assert(sp != NULL);
+	assert(sp->setno == MD_MIN2SET(meta_getminor(transnp->dev)));
+
+	/* get unit */
+	if ((transp = meta_get_trans(sp, transnp, ep)) == NULL)
+		return (-1);
+
+	/* check master */
+	masternp = transp->masternamep;
+	if ((! metaismeta(masternp)) &&
+	    (meta_check_overlap(transnp->cname, np, slblk, nblks,
+	    masternp, 0, -1, ep) != 0)) {
+		return (-1);
+	}
+
+	/* check log */
+	if (((lognp = transp->lognamep) != NULL) &&
+	    (! (options & MDCHK_ALLOW_LOG)) &&
+	    (! metaismeta(lognp))) {
+		daddr_t		log_start;
+		int		err;
+
+		/* check same drive since metagetstart() can fail */
+		if ((err = meta_check_samedrive(np, lognp, ep)) < 0)
+			return (-1);
+
+		/* check overlap */
+		if (err != 0) {
+			if ((log_start = metagetstart(sp, lognp, ep)) ==
+			    MD_DISKADDR_ERROR)
+				return (-1);
+			if (meta_check_overlap(transnp->cname, np, slblk,
+			    nblks, lognp, log_start, -1, ep) != 0) {
+				return (-1);
+			}
+		}
+	}
+
+	/* return success */
+	return (0);
+}
+
+/*
+ * check to see if we're in a trans
+ */
+int
+meta_check_intrans(
+	mdsetname_t	*sp,
+	mdname_t	*np,
+	mdchkopts_t	options,
+	diskaddr_t	slblk,
+	diskaddr_t	nblks,
+	md_error_t	*ep
+)
+{
+	mdnamelist_t	*transnlp = NULL;
+	mdnamelist_t	*p;
+	int		rval = 0;
+
+	/* should have a set */
+	assert(sp != NULL);
+
+	/* for each trans */
+	if (meta_get_trans_names(sp, &transnlp, 0, ep) < 0)
+		return (-1);
+	for (p = transnlp; (p != NULL); p = p->next) {
+		mdname_t	*transnp = p->namep;
+
+		/* check trans */
+		if (in_trans(sp, transnp, np, options, slblk, nblks, ep) != 0) {
+			rval = -1;
+			break;
+		}
+	}
+
+	/* cleanup, return success */
+	metafreenamelist(transnlp);
+	return (rval);
+}
+
+/*
+ * check master
+ */
+int
+meta_check_master(
+	mdsetname_t	*sp,
+	mdname_t	*np,
+	int		force,
+	md_error_t	*ep
+)
+{
+	mdchkopts_t	options = 0;
+	md_common_t	*mdp;
+
+	/* make sure we have a disk */
+	if (metachkdisk(np, ep) != 0)
+		return (-1);
+
+	/* check to ensure that it is not already in use */
+	if ((!force) && meta_check_inuse(sp, np, MDCHK_INUSE, ep) != 0) {
+		return (-1);
+	}
+
+	/* make sure it is in the set */
+	if (meta_check_inset(sp, np, ep) != 0)
+		return (-1);
+
+	/* make sure its not in a metadevice */
+	if (! metaismeta(np)) {		/* Non-metadevices */
+		if (meta_check_inmeta(sp, np, options, 0, -1, ep) != 0)
+			return (-1);
+	} else {			/* Metadevices only! */
+		if ((mdp = meta_get_unit(sp, np, ep)) == NULL)
+			return (-1);
+
+		/*
+		 * Since soft partitions may appear at the top or bottom
+		 * of the metadevice stack, we check them separately.
+		 * A trans may be built on top of a soft partition if
+		 * the soft partition has no parent (can't rely on the
+		 * MD_CAN_PARENT flag in this case since a soft partition
+		 * built on a metadevice clears this flag to prevent nested
+		 * configurations).
+		 */
+		if ((meta_sp_issp(sp, np, ep) == 0) &&
+		    (mdp->parent == MD_NO_PARENT))
+			return (0);
+
+		if ((! (mdp->capabilities & MD_CAN_PARENT)) ||
+		    (mdp->parent != MD_NO_PARENT)) {
+			return (mdmderror(ep, MDE_INVAL_UNIT,
+			    meta_getminor(np->dev), np->cname));
+		}
+	}
+
+	/* return success */
+	return (0);
+}
+
+/*
+ * check log
+ */
+int
+meta_check_log(
+	mdsetname_t	*sp,
+	mdname_t	*np,
+	md_error_t	*ep
+)
+{
+	mdchkopts_t	options = (MDCHK_ALLOW_MDDB | MDCHK_ALLOW_LOG);
+	md_common_t	*mdp;
+
+	/* make sure we have a disk */
+	if (metachkdisk(np, ep) != 0)
+		return (-1);
+
+	/* check to ensure that it is not already in use */
+	if (meta_check_inuse(sp, np, MDCHK_INUSE, ep) != 0) {
+		return (-1);
+	}
+
+	/* make sure it is in the set */
+	if (meta_check_inset(sp, np, ep) != 0)
+		return (-1);
+
+	/* make sure its not in a metadevice */
+	if (! metaismeta(np)) {		/* Non-metadevices */
+		if (meta_check_inmeta(sp, np, options, 0, -1, ep) != 0)
+			return (-1);
+	} else {			/* Metadevices only! */
+		if ((mdp = meta_get_unit(sp, np, ep)) == NULL)
+			return (-1);
+
+		/*
+		 * Since soft partitions may appear at the top or bottom
+		 * of the metadevice stack, we check them separately.
+		 * A trans may be built on top of a soft partition if
+		 * the soft partition has no parent (can't rely on the
+		 * MD_CAN_PARENT flag in this case since a soft partition
+		 * built on a metadevice clears this flag to prevent nested
+		 * configurations).
+		 *
+		 */
+		if ((meta_sp_issp(sp, np, ep) == 0) &&
+		    (mdp->parent == MD_NO_PARENT))
+			return (0);
+
+		if ((! (mdp->capabilities & MD_CAN_PARENT)) ||
+		    ((mdp->parent != MD_NO_PARENT) &&
+		    (mdp->parent != MD_MULTI_PARENT))) {
+			return (mdmderror(ep, MDE_INVAL_UNIT,
+			    meta_getminor(np->dev), np->cname));
+		}
+	}
+
+	/* return success */
+	return (0);
+}
+
+/*
+ * print trans
+ */
+static int
+trans_print(
+	md_trans_t	*transp,
+	char		*fname,
+	FILE		*fp,
+	md_error_t	*ep
+)
+{
+	int		rval = -1;
+
+	/* print name and -t */
+	if (fprintf(fp, "%s -t", transp->common.namep->cname) == EOF)
+		goto out;
+
+	/* print master */
+	/*
+	 * If the path is our standard /dev/rdsk or /dev/md/rdsk
+	 * then just print out the cxtxdxsx or the dx, metainit
+	 * will assume the default, otherwise we need the full
+	 * pathname to make sure this works as we intend.
+	 */
+	if ((strstr(transp->masternamep->rname, "/dev/rdsk") == NULL) &&
+	    (strstr(transp->masternamep->rname, "/dev/md/rdsk") == NULL) &&
+	    (strstr(transp->masternamep->rname, "/dev/td/") == NULL)) {
+		/* not standard path, print full pathname */
+		if (fprintf(fp, " %s", transp->masternamep->rname) == EOF)
+			goto out;
+	} else {
+		/* standard path, print ctds or d number */
+		if (fprintf(fp, " %s", transp->masternamep->cname) == EOF)
+			goto out;
+	}
+
+
+	/* print log */
+	if (transp->lognamep != NULL) {
+		/*
+		 * If the path is our standard /dev/rdsk or /dev/md/rdsk
+		 * then just print out the cxtxdxsx or the dx, metainit
+		 * will assume the default, otherwise we need the full
+		 * pathname to make sure this works as we intend.
+		 */
+		if ((strstr(transp->lognamep->rname, "/dev/rdsk") == NULL) &&
+		    (strstr(transp->lognamep->rname, "/dev/md/rdsk") == NULL) &&
+		    (strstr(transp->lognamep->rname, "/dev/td/") == NULL)) {
+			/* not standard path, print full pathname */
+			if (fprintf(fp, " %s", transp->lognamep->rname) == EOF)
+				goto out;
+		} else {
+			/* standard path */
+			if (fprintf(fp, " %s", transp->lognamep->cname) == EOF)
+				goto out;
+		}
+	}
+
+	/* print terminating newline */
+	if (fprintf(fp, "\n") == EOF)
+		goto out;
+
+	/* success */
+	rval = 0;
+
+	/* cleanup, return error */
+out:
+	if (rval != 0)
+		(void) mdsyserror(ep, errno, fname);
+	return (rval);
+}
+
+/*
+ * convert flags to repair action
+ */
+
+char *
+mt_flags_to_action(
+	md_trans_t *transp
+)
+{
+	int	 len;
+	char	*actionp	= NULL;
+	int	 err		= -1;
+
+	if (!transp) {
+		goto out;
+	}
+
+	/*
+	 * if in any of these states, the log_error word is not (yet) meaningful
+	 */
+	if (transp->flags & (TRANS_DETACHED|TRANS_DETACHING|TRANS_ATTACHING)) {
+		goto out;
+	}
+
+	if (transp->log_error & LDL_ANYERROR) {
+		char *fix_msg = dgettext(TEXT_DOMAIN,
+		    "    To Fix: Please refer to the log device's status.\n");
+
+		if ((len = strlen(fix_msg)) <= 0) {
+			goto out;
+		}
+		if (!(actionp = Zalloc(len+1))) {
+			goto out;
+		}
+		if (strncpy(actionp, fix_msg, len + 1) != actionp) {
+			goto out;
+		}
+	}
+	err = 0;
+out:
+	if (err != 0) {
+		if (actionp) {
+			Free(actionp);
+			actionp = NULL;
+		}
+	}
+	return (actionp);
+}
+
+/*
+ * convert log state to repair action
+ */
+char *
+mt_l_error_to_action(
+	mdsetname_t	*sp,
+	mdnamelist_t	*transnlp,
+	mdname_t	*lognamep,
+	md_error_t	*ep
+)
+{
+	char		 umnt_msg[1024];
+	char		 fsck_msg[1024];
+	char		 mnt_msg[1024];
+	mdnamelist_t	*p;
+	md_trans_t	*tp;
+	int		 rc;
+	int		 len		= 0;
+	char		*rmsg		= NULL;
+	char		*mp		= NULL;
+	bool_t		 is_mounted	= FALSE;
+	bool_t		 any_in_error	= FALSE;
+	int		 only_fsck	= TRUE;
+
+	(void) memset(umnt_msg, 0, sizeof (umnt_msg));
+	(void) memset(fsck_msg, 0, sizeof (fsck_msg));
+	(void) memset(mnt_msg, 0, sizeof (mnt_msg));
+
+	/*
+	 * If a the trans devices listed in transnlp contain
+	 * devices which are in error and are sub-mount points
+	 * of each other, than it would need to be reverse sorted.
+	 * When this actually occurs, and customers find the usage
+	 * message insufficiently clear, then we should take the
+	 * hit to sort it.
+	 */
+
+	/*
+	 * this preliminary loop is necessary to keep the
+	 * fsck message greppable, if possible
+	 */
+	for (p = transnlp; ((p != NULL) && (only_fsck == TRUE)); p = p->next) {
+
+		if ((tp = meta_get_trans(sp, p->namep, ep)) == NULL) {
+			goto out;
+		}
+
+		if (!(tp->log_error & LDL_ANYERROR)) {
+			continue;
+		}
+
+		if ((tp->lognamep == NULL) ||
+		    (strcmp(lognamep->bname, tp->lognamep->bname) != 0)) {
+			continue;
+		}
+
+		mdclrerror(ep);
+		is_mounted = (meta_check_inuse(sp,
+		    p->namep, MDCHK_MOUNTED, ep) != 0);
+
+		if (!mdisok(ep) && mdiserror(ep, MDE_IS_MOUNTED)) {
+			goto out;
+		}
+
+		mdclrerror(ep);
+		mp = meta_get_mountp(sp, p->namep, ep);
+
+		if (!mdisok(ep)) {
+			goto out;
+		}
+
+		if (is_mounted) {
+			if (!mp) {
+				goto out;
+			}
+			only_fsck = FALSE;
+
+			/*
+			 * not greppable; there must be multiple commands, so
+			 * add preliminary newline so the formatting is uniform
+			 */
+			if (sprintf(umnt_msg, "\n") == EOF) {
+				goto out;
+			}
+
+		}
+
+		if (mp) {
+			Free(mp);
+			mp = NULL;
+		}
+	}
+
+	/*
+	 * although the log may either be in error or hard-error
+	 * states, the action is the same; unmount, fsck and remount
+	 * all fs associated with this log
+	 */
+	for (p = transnlp; (p != NULL); p = p->next) {
+
+		if ((tp = meta_get_trans(sp, p->namep, ep)) == NULL) {
+			goto out;
+		}
+
+		if (!(tp->log_error & LDL_ANYERROR)) {
+			continue;
+		}
+
+		if ((tp->lognamep == NULL) ||
+		    (strcmp(lognamep->bname, tp->lognamep->bname) != 0)) {
+			continue;
+		}
+
+		mdclrerror(ep);
+		is_mounted = (meta_check_inuse(sp,
+		    p->namep, MDCHK_MOUNTED, ep) != 0);
+
+		if (!mdisok(ep) && mdiserror(ep, MDE_IS_MOUNTED)) {
+			goto out;
+		}
+
+		mdclrerror(ep);
+		mp = meta_get_mountp(sp, p->namep, ep);
+
+		if (!mdisok(ep)) {
+			goto out;
+		}
+
+		if (is_mounted) {
+			if (!mp) {
+				goto out;
+			}
+		}
+
+		if (is_mounted) {
+			rc = snprintf(umnt_msg, sizeof (umnt_msg),
+			    "%s            umount %s\n", umnt_msg, mp);
+
+			if (rc < 0) {
+				goto out;
+			}
+		}
+
+		rc = snprintf(fsck_msg, sizeof (fsck_msg), "%s %s",
+		    (any_in_error) ? fsck_msg :
+		    ((only_fsck) ? "fsck" : "            fsck"),
+		    p->namep->rname);
+		if (rc < 0) {
+			goto out;
+		}
+
+		if (is_mounted) {
+			rc = snprintf(mnt_msg, sizeof (mnt_msg),
+			    "%s            mount %s %s\n",
+			    mnt_msg, p->namep->bname, mp);
+
+			if (rc < 0) {
+				goto out;
+			}
+		}
+
+		if (mp) {
+			Free(mp);
+			mp = NULL;
+		}
+
+		any_in_error |= TRUE;
+	}
+
+	if (!any_in_error) {
+		goto out;
+	}
+
+	len = strlen(umnt_msg) + strlen(fsck_msg) + strlen(mnt_msg) +
+							(only_fsck? 1: 0) + 1;
+	if (!(rmsg = Zalloc(len))) {
+		len = 0;
+		goto out;
+	}
+	rc = snprintf(rmsg, len, "%s%s%s%s", umnt_msg, fsck_msg,
+					    !only_fsck? "\n": "", mnt_msg);
+	if (rc == EOF) {
+		goto out;
+	}
+
+out:
+	if (mp) {
+		Free(mp);
+		mp = NULL;
+	}
+	if (len == 0 && rmsg) {
+		Free(rmsg);
+		rmsg = NULL;
+	}
+
+	return (rmsg);
+}
+
+/*
+ * printable log state
+ */
+char *
+mt_l_error_to_name(
+	md_trans_t	*transp,
+	md_timeval32_t	*tvp,
+	uint_t		tstate	/* Errored tstate flags */
+)
+{
+	mt_l_error_t	log_error = transp->log_error;
+
+	/* grab time */
+	if (tvp != NULL)
+		*tvp = transp->log_timestamp;
+
+	if (tstate != 0) {
+		return (dgettext(TEXT_DOMAIN, "Unavailable"));
+	}
+
+	/* return state */
+	if (log_error & LDL_ERROR) {
+		return (dgettext(TEXT_DOMAIN, "Error"));
+	} else if (log_error & LDL_HERROR) {
+		return (dgettext(TEXT_DOMAIN, "Hard Error"));
+	} else {
+		return (dgettext(TEXT_DOMAIN, "Okay"));
+	}
+}
+
+/*
+ * printable trans state
+ */
+char *
+mt_flags_to_name(
+	md_trans_t	*transp,
+	md_timeval32_t	*tvp,
+	uint_t		tstate	/* Errored tstate flags */
+)
+{
+	/* grab time */
+	if (tvp != NULL)
+		*tvp = transp->timestamp;
+
+	if (tstate != 0) {
+		return (dgettext(TEXT_DOMAIN, "Unavailable"));
+	}
+
+	/* return state */
+	if (transp->flags & TRANS_DETACHED)
+		return (dgettext(TEXT_DOMAIN, "Detached"));
+	else if (transp->flags & TRANS_DETACHING)
+		return (dgettext(TEXT_DOMAIN, "Detaching"));
+	else if (transp->flags & TRANS_ATTACHING)
+		return (dgettext(TEXT_DOMAIN, "Attaching"));
+	return (mt_l_error_to_name(transp, tvp, tstate));
+}
+
+/*
+ * report trans
+ */
+static int
+trans_report(
+	mdsetname_t	*sp,
+	md_trans_t	*transp,
+	char		*fname,
+	FILE		*fp,
+	mdprtopts_t	options,
+	md_error_t	*ep
+)
+{
+	char		*mt_state;
+	md_timeval32_t	tv;
+	char		*timep;
+	int		rval = -1;
+	char		*actionp = NULL;
+	char 		*devid = "";
+	mdname_t	*didnp = NULL;
+	ddi_devid_t	dtp;
+	uint_t		tstate = 0;
+
+	/* print header */
+	if (options & PRINT_HEADER) {
+		if (fprintf(fp, dgettext(TEXT_DOMAIN, "%s: Trans"
+		    " (Feature replaced see message below)\n"),
+		    transp->common.namep->cname) == EOF) {
+			goto out;
+		}
+	}
+
+	/* print state */
+	if (metaismeta(transp->common.namep)) {
+		if (meta_get_tstate(transp->common.namep->dev, &tstate, ep)
+		    != 0)
+			goto out;
+	}
+	mt_state = mt_flags_to_name(transp, &tv, tstate & MD_DEV_ERRORED);
+	if (options & PRINT_TIMES) {
+		timep = meta_print_time(&tv);
+	} else {
+		timep = "";
+	}
+	if (fprintf(fp, dgettext(TEXT_DOMAIN, "    State: %-12s %s\n"),
+	    mt_state, timep) == EOF) {
+		goto out;
+	}
+
+	if ((tstate & MD_DEV_ERRORED) == 0) {
+		actionp = mt_flags_to_action(transp);
+		if (actionp) {
+			if (fprintf(fp, "%s", actionp) == EOF) {
+				goto out;
+			}
+			Free(actionp);
+			actionp = NULL;
+		}
+	}
+
+	/* debug stuff */
+	if (transp->debug) {
+		if (fprintf(fp,
+		    "    Debug Modes:%s%s%s%s%s%s%s%s%s%s%s\n",
+		    (transp->debug & MT_TRANSACT) ? " TRANSACT" : "",
+		    (transp->debug & MT_MATAMAP) ? " METADATA" : "",
+		    (transp->debug & MT_WRITE_CHECK) ?  " WRITES" : "",
+		    (transp->debug & MT_LOG_WRITE_CHECK) ? " LOGWRITES" : "",
+		    (transp->debug & MT_CHECK_MAP) ? " MAP" : "",
+		    (transp->debug & MT_TRACE) ? " TRACE" : "",
+		    (transp->debug & MT_SIZE) ? " SIZE" : "",
+		    (transp->debug & MT_NOASYNC) ? " NOASYNC" : "",
+		    (transp->debug & MT_FORCEROLL) ? " FORCEROLL" : "",
+		    (transp->debug & MT_SCAN) ? " SCAN" : "",
+		    (transp->debug & MT_PREWRITE) ? " PREWRITE" : "")
+		    == EOF) {
+			goto out;
+		}
+	}
+
+	/* print size */
+	if (fprintf(fp, dgettext(TEXT_DOMAIN, "    Size: %lld blocks (%s)\n"),
+	    transp->common.size,
+	    meta_number_to_string(transp->common.size, DEV_BSIZE)) == EOF) {
+		goto out;
+	}
+
+
+	/* print master */
+	if (fprintf(fp, dgettext(TEXT_DOMAIN, "    Master Device: %s\n"),
+	    transp->masternamep->cname) == EOF) {
+		goto out;
+	}
+
+	/* print log */
+	if (transp->lognamep != NULL) {
+		if (fprintf(fp, dgettext(TEXT_DOMAIN,
+		    "    Logging Device: %s\n"),
+		    transp->lognamep->cname) == EOF) {
+			goto out;
+		}
+	}
+
+	/* add extra line */
+	if (fprintf(fp, "\n") == EOF)
+		goto out;
+
+	/* print master details if regular device */
+	if (! metaismeta(transp->masternamep)) {
+		daddr_t	start_blk = 0;
+		char	*has_mddb_str = dgettext(TEXT_DOMAIN, "No");
+		int	len;
+
+		/*
+		 * Building a format string on the fly that will
+		 * be used in (f)printf. This allows the length
+		 * of the ctd to vary from small to large without
+		 * looking horrible.
+		 */
+		len = strlen(transp->masternamep->cname) + 2;
+		len = max(len, strlen(dgettext(TEXT_DOMAIN, "Master Device")));
+
+		/* print header */
+		if (fprintf(fp,
+		    "\t%-*.*s %-12.12s %-5.5s %s\n",
+		    len, len,
+		    dgettext(TEXT_DOMAIN, "Master Device"),
+		    dgettext(TEXT_DOMAIN, "Start Block"),
+		    dgettext(TEXT_DOMAIN, "Dbase"),
+			dgettext(TEXT_DOMAIN, "Reloc")) == EOF) {
+			goto out;
+		}
+
+		/* populate the key in the name_p structure */
+		if ((didnp = metadevname(&sp,
+				transp->masternamep->dev, ep)) == NULL) {
+			return (-1);
+		}
+
+	    /* determine if devid does NOT exist */
+		if (options & PRINT_DEVID)
+		    if ((dtp = meta_getdidbykey(sp->setno, getmyside(sp, ep),
+					didnp->key, ep)) == NULL)
+				devid = dgettext(TEXT_DOMAIN, "No ");
+			else {
+				devid = dgettext(TEXT_DOMAIN, "Yes");
+				free(dtp);
+			}
+
+		/* print info */
+		/*
+		 * This allows the length
+		 * of the ctd to vary from small to large without
+		 * looking horrible.
+		 */
+		if (fprintf(fp, "\t%-*s %8ld     %-5.5s %s\n", len,
+		    transp->masternamep->cname,
+		    start_blk, has_mddb_str, devid) == EOF) {
+			goto out;
+		}
+		/* add extra line */
+		if (fprintf(fp, "\n") == EOF)
+			goto out;
+	}
+
+	/* success */
+	rval = 0;
+
+	/* cleanup, return error */
+out:
+	if (rval != 0)
+		(void) mdsyserror(ep, errno, fname);
+	return (rval);
+}
+
+/*
+ * print/report trans
+ */
+int
+meta_trans_print(
+	mdsetname_t	*sp,
+	mdname_t	*transnp,
+	mdnamelist_t	**nlistpp,
+	char		*fname,
+	FILE		*fp,
+	mdprtopts_t	options,
+	int		*meta_print_trans_msgp, /* NULL if transnp != NULL */
+	mdnamelist_t	**lognlpp,
+	md_error_t	*ep
+)
+{
+	md_trans_t	*transp;
+	mdname_t	*lognamep;
+
+	/* should have same set */
+	assert(sp != NULL);
+
+	/* print all transs */
+	if (transnp == NULL) {
+		mdnamelist_t	*nlp = NULL;
+		mdnamelist_t	*p;
+		int		cnt;
+		int		rval = 0;
+
+		/* get list */
+		if ((cnt = meta_get_trans_names(sp, &nlp, options, ep)) < 0)
+			return (-1);
+		else if (cnt == 0)
+			return (0);
+
+		/* recurse */
+		for (p = nlp; (p != NULL); p = p->next) {
+			mdname_t	*np = p->namep;
+
+			if (meta_trans_print(sp, np, nlistpp, fname, fp,
+			    options, meta_print_trans_msgp, lognlpp, ep) != 0)
+				rval = -1;
+		}
+
+		if (meta_print_trans_msgp)
+			*meta_print_trans_msgp = 1;
+
+		/* cleanup, return success */
+		metafreenamelist(nlp);
+		return (rval);
+	}
+
+
+	/* get unit structure */
+	if ((transp = meta_get_trans_common(sp, transnp,
+	    ((options & PRINT_FAST) ? 1 : 0), ep)) == NULL)
+		return (-1);
+
+	/* save unique log */
+	if ((lognlpp != NULL) &&
+	    ((lognamep = transp->lognamep) != NULL)) {
+		mdnamelist_t	*p;
+
+		for (p = *lognlpp; (p != NULL); p = p->next) {
+			if (strcmp(lognamep->bname, p->namep->bname) == 0)
+				break;
+		}
+		if (p == NULL)
+			(void) metanamelist_append(lognlpp, lognamep);
+	}
+
+	/* check for parented */
+	if ((! (options & PRINT_SUBDEVS)) &&
+	    (MD_HAS_PARENT(transp->common.parent))) {
+		return (0);
+	}
+
+	/* can't have a large trans */
+	if (!(options & PRINT_LARGEDEVICES)) {
+		/* print appropriate detail */
+		if (options & PRINT_SHORT) {
+			if (trans_print(transp, fname, fp, ep) != 0)
+				return (-1);
+		} else {
+			if (trans_report(sp, transp, fname, fp, options, ep)
+			    != 0)
+				return (-1);
+		}
+	}
+
+	/* print underlying metadevices, log is later */
+	if (metaismeta(transp->masternamep)) {
+		if (meta_print_name(sp, transp->masternamep, nlistpp, fname,
+		    fp, (options | PRINT_HEADER | PRINT_SUBDEVS), NULL, ep)
+		    != 0) {
+			return (-1);
+		}
+	}
+
+	/* return success */
+	return (0);
+}
+
+/*
+ * print log
+ */
+static int
+log_print(
+	mdsetname_t	*sp,
+	mdname_t	*lognamep,
+	char		*fname,
+	FILE		*fp,
+	mdprtopts_t	options,
+	md_error_t	*ep
+)
+{
+	mdnamelist_t	*nlp = NULL;
+
+	/* metadevice info */
+	if (metaismeta(lognamep)) {
+		return (meta_print_name(sp, lognamep, &nlp, fname, fp,
+		    options, NULL, ep));
+	}
+
+	/* regular device info */
+	return (0);
+}
+
+/*
+ * report log
+ */
+static int
+log_report(
+	mdsetname_t	*sp,
+	mdname_t	*lognamep,
+	mdnamelist_t	**nlistpp,
+	char		*fname,
+	FILE		*fp,
+	mdprtopts_t	options,
+	mdnamelist_t	*transnlp,
+	md_error_t	*ep
+)
+{
+	md_trans_t	*transp = NULL;
+	mdnamelist_t	*p;
+	char		*ml_state;
+	md_timeval32_t	tv;
+	char		*timep;
+	char		*actionp = NULL;
+	int		rval = -1;
+	char		*devid = " ";
+	mdname_t	*didnp = NULL;
+	ddi_devid_t	dtp;
+	uint_t		tstate = 0;
+
+	for (p = transnlp; (p != NULL); p = p->next) {
+		md_trans_t	*tp;
+
+		if ((tp = meta_get_trans(sp, p->namep, ep)) == NULL)
+			return (-1);
+		if ((tp->lognamep != NULL) &&
+		    (strcmp(lognamep->bname, tp->lognamep->bname) == 0)) {
+			transp = tp;	/* save any parent trans */
+		}
+	}
+
+	/* we must have at least one trans */
+	assert(transp != NULL);
+	if (transp == NULL) {
+		rval = 0;
+		goto out;
+	}
+
+	if ((options & PRINT_LARGEDEVICES) &&
+	    (transp->log_size <= MD_MAX_BLKS_FOR_SMALL_DEVS)) {
+		rval = 0;
+		goto out;
+	}
+
+	/* print header and trans devices, collect log_error and size */
+	if (fprintf(fp, dgettext(TEXT_DOMAIN, "%s: Logging device for"),
+	    lognamep->cname) == EOF) {
+		goto out;
+	}
+
+	if ((transp->lognamep != NULL) &&
+	    (strcmp(lognamep->bname, transp->lognamep->bname) == 0)) {
+		if (fprintf(fp, " %s", transp->common.namep->cname)
+		    == EOF) {
+			goto out;
+		}
+	}
+	if (fprintf(fp, "\n") == EOF)
+		goto out;
+
+	/* print state */
+	if (metaismeta(transp->lognamep)) {
+		if (meta_get_tstate(transp->lognamep->dev, &tstate, ep) != 0)
+			return (-1);
+	}
+	ml_state = mt_l_error_to_name(transp, &tv, tstate & MD_DEV_ERRORED);
+	if (options & PRINT_TIMES) {
+		timep = meta_print_time(&tv);
+	} else {
+		timep = "";
+	}
+	if (fprintf(fp, dgettext(TEXT_DOMAIN, "    State: %-12s %s\n"),
+	    ml_state, timep) == EOF) {
+		goto out;
+	}
+
+	if ((tstate & MD_DEV_ERRORED) == 0) {
+		actionp = mt_l_error_to_action(sp, transnlp, lognamep, ep);
+		if (actionp) {
+			if (fprintf(fp, dgettext(TEXT_DOMAIN,
+			    "    Invoke: %s\n"), actionp) == EOF) {
+				goto out;
+			}
+			Free(actionp);
+			actionp = NULL;
+		}
+	}
+
+	/* print size */
+	if (fprintf(fp, dgettext(TEXT_DOMAIN, "    Size: %ld blocks (%s)\n"),
+	    transp->log_size,
+	    meta_number_to_string(transp->log_size, DEV_BSIZE)) == EOF) {
+		goto out;
+	}
+
+	/* MD_DEBUG stuff */
+	if (options & PRINT_DEBUG) {
+		mdname_t	*transnp = transp->common.namep;
+		mt_unit_t	*mt;
+		daddr_t		blksinuse, head, tail, nblks, eblk, sblk;
+		int		percent;
+
+		if ((mt = (mt_unit_t *)meta_get_mdunit(sp, transnp, ep))
+		    == NULL) {
+			return (-1);
+		}
+		assert(mt->c.un_type == MD_METATRANS);
+
+		if (fprintf(fp, dgettext(TEXT_DOMAIN,
+		    "    Transfer Size: %d blocks\n"),
+		    mt->un_l_maxtransfer) == EOF) {
+			Free(mt);
+			goto out;
+		}
+
+		head = mt->un_l_head;
+		tail = mt->un_l_tail;
+		sblk = mt->un_l_sblk;
+		nblks = mt->un_l_nblks;
+		eblk = sblk + nblks;
+		if (head <= tail)
+			blksinuse = tail - head;
+		else
+			blksinuse = (eblk - head) + (tail - sblk);
+
+		percent = ((u_longlong_t)blksinuse * 100) / nblks;
+		if (fprintf(fp, dgettext(TEXT_DOMAIN,
+		    "    Full: %d%% (%ld of %ld blocks)\n"),
+		    percent, blksinuse, nblks) == EOF) {
+			Free(mt);
+			goto out;
+		}
+
+		percent = ((u_longlong_t)mt->un_l_resv * 100) /
+		    mt->un_l_maxresv;
+		if (fprintf(fp, dgettext(TEXT_DOMAIN,
+		    "    Reserved: %d%% (%ud of %ud bytes)\n"),
+		    percent, mt->un_l_resv, mt->un_l_maxresv) == EOF) {
+			Free(mt);
+			goto out;
+		}
+		Free(mt);
+	}
+
+	/* add extra line */
+	if (fprintf(fp, "\n") == EOF)
+		goto out;
+
+	/* print log details */
+	if (metaismeta(lognamep)) {
+		if (meta_print_name(sp, lognamep, nlistpp, fname, fp,
+		    options, NULL, ep) != 0) {
+			return (-1);
+		}
+	} else {
+		daddr_t		start_blk;
+		int		has_mddb;
+		char		*has_mddb_str;
+		int		len;
+
+		/*
+		 * Building a format string on the fly that will
+		 * be used in (f)printf. This allows the length
+		 * of the ctd to vary from small to large without
+		 * looking horrible.
+		 */
+		len = strlen(lognamep->cname) + 2;
+		len = max(len, strlen(dgettext(TEXT_DOMAIN, "Logging Device")));
+		/* print header */
+		if (fprintf(fp,
+		    "\t%-*.*s %-12.12s %-5.5s %s\n",
+		    len, len,
+		    dgettext(TEXT_DOMAIN, "Logging Device"),
+		    dgettext(TEXT_DOMAIN, "Start Block"),
+		    dgettext(TEXT_DOMAIN, "Dbase"),
+			dgettext(TEXT_DOMAIN, "Reloc")) == EOF) {
+			goto out;
+		}
+		/* get info */
+		if ((start_blk = metagetstart(sp, lognamep, ep)) ==
+		    MD_DISKADDR_ERROR) {
+			return (-1);
+		}
+		if ((has_mddb = metahasmddb(sp, lognamep, ep)) < 0) {
+			return (-1);
+		}
+		if (has_mddb)
+			has_mddb_str = dgettext(TEXT_DOMAIN, "Yes");
+		else
+			has_mddb_str = dgettext(TEXT_DOMAIN, "No");
+
+		/* populate the key in the name_p structure */
+		if ((didnp = metadevname(&sp, lognamep->dev, ep)) == NULL) {
+			return (-1);
+		}
+
+	    /* determine if devid does NOT exist */
+		if (options & PRINT_DEVID)
+		    if ((dtp = meta_getdidbykey(sp->setno, getmyside(sp, ep),
+					didnp->key, ep)) == NULL)
+				devid = dgettext(TEXT_DOMAIN, "No ");
+			else {
+				devid = dgettext(TEXT_DOMAIN, "Yes");
+				free(dtp);
+			}
+
+		/* print info */
+		/*
+		 * This allows the length
+		 * of the ctd to vary from small to large without
+		 * looking horrible.
+		 */
+		if (fprintf(fp, "\t%-*s %8ld     %-5.5s %s\n",
+		    len, lognamep->cname, start_blk,
+		    has_mddb_str, devid) == EOF) {
+			goto out;
+		}
+	}
+
+	/* add extra line */
+	if (fprintf(fp, "\n") == EOF)
+		goto out;
+
+	/* success */
+	rval = 0;
+
+	/* cleanup, return error */
+out:
+	if (rval != 0)
+		(void) mdsyserror(ep, errno, fname);
+	return (rval);
+}
+
+/*
+ * print/report logs
+ */
+int
+meta_logs_print(
+	mdsetname_t	*sp,
+	mdnamelist_t	*lognlp,
+	mdnamelist_t	**nlistpp,
+	char		*fname,
+	FILE		*fp,
+	mdprtopts_t	options,
+	md_error_t	*ep
+)
+{
+	mdnamelist_t	*transnlp = NULL;
+	mdnamelist_t	*p;
+	int		rval = 0;
+
+	/* must have a set */
+	assert(sp != NULL);
+
+	/* get trans devices */
+	if (lognlp == NULL)
+		return (0);
+
+	if (! (options & PRINT_SHORT))
+		if (meta_get_trans_names(sp, &transnlp, options, ep) < 0)
+			return (-1);
+
+	/* print all logs */
+	options |= PRINT_SUBDEVS;
+	for (p = lognlp; (p != NULL); p = p->next) {
+		mdname_t	*lognamep = p->namep;
+
+		/* print appropriate detail */
+		if (options & PRINT_SHORT) {
+			if (log_print(sp, lognamep, fname, fp, options,
+			    ep) != 0) {
+				rval = -1;
+			}
+		} else {
+			if (log_report(sp, lognamep, nlistpp, fname, fp,
+			    options, transnlp, ep) != 0) {
+				rval = -1;
+			}
+		}
+	}
+
+	/* cleanup, return success */
+out:
+	metafreenamelist(transnlp);
+	return (rval);
+}
+
+/*
+ * meta_lockfs_common -- common lock and unlock code
+ *
+ * Normally this routine will return a 0 for success. Even if
+ * lockfs wasn't able to lock down the filesystem. The reason
+ * for this is that the master device can be in an errored state
+ * and the lock can't be obtained. We don't want to prevent
+ * possible recovery in this case and it's not likely any activity
+ * will be occurring. If the filesystem is healthy with activity
+ * lockfs will successfully lock the filesystem and return an
+ * error code of 0.
+ *
+ * The one case where this routine returns a non-zero value would
+ * be if we can't determine the outcome of the lockfs. This should
+ * never occur because we don't catch signals that could cause
+ * waitpid() to prematurely return.
+ */
+static int
+meta_lockfs_common(mdname_t *fs, void **cookie, int lockit)
+{
+	char		*blkname;
+	FILE		*m;
+	struct mnttab	tab_wildcard, tab_match;
+	pid_t		pid;
+	int		lock_exit;
+
+	(void) memset(&tab_wildcard, 0, sizeof (tab_wildcard));
+	(void) memset(&tab_match, 0, sizeof (tab_match));
+
+	if ((blkname = fs->bname) == NULL)
+		blkname = getfullblkname(fs->cname);
+
+	tab_wildcard.mnt_special = blkname;
+
+	if ((m = fopen(MNTTAB, "r")) == NULL) {
+		/*
+		 * No mnttab means nothing is mounted
+		 */
+		*cookie = 0;
+		return (0);
+	}
+
+	if (getmntany(m, &tab_match, &tab_wildcard)) {
+		/*
+		 * No match in mnttab so we're not mounted ... at least
+		 * nothing better be mounted.
+		 */
+		*cookie = 0;
+		return (0);
+	}
+
+	(void) fclose(m);
+
+	switch (pid = fork()) {
+	    case -1:
+		/*
+		 * We've got some major trouble here and shouldn't
+		 * continue. The user needs to clear up the problems
+		 * that the system currently has before proceeding
+		 * to detach the log.
+		 */
+		(void) printf(dgettext(TEXT_DOMAIN, "failed to fork lockfs\n"));
+		*cookie = 0;
+		return (1);
+
+	    case 0:
+		(void) execl("/usr/sbin/lockfs", "lockfs", lockit ? "-w" : "-u",
+		    "-c", "Solaris Volume Manager detach lock",
+		    tab_match.mnt_mountp, 0);
+		/*
+		 * Shouldn't reach here, but if this code is run on
+		 * a release that doesn't have lockfs return an error
+		 * code so that the -f (force) option could be used
+		 * by metadetach.
+		 */
+		exit(1);
+
+	    default:
+		if (waitpid(pid, &lock_exit, 0) != pid) {
+			/*
+			 * We couldn't get status regarding the
+			 * outcome of the lockfs command. We should
+			 * attempt to unlock the filesystem though.
+			 * Return an error code so that if the user
+			 * is trying to force the detach make them
+			 * clear up this problem first.
+			 */
+			*cookie = (void *)1;
+			return (1);
+		}
+
+		*cookie = (void *)1;
+		return (0);
+	}
+}
+
+/*
+ * meta_lockfs - if mounted, lock a given device against writes
+ *
+ * See comment section for meta_lockfs_common
+ */
+static int
+meta_lockfs(mdname_t *fs, void **cookie)
+{
+	return (meta_lockfs_common(fs, cookie, 1));
+}
+
+/*
+ * meta_unlockfs - if mounted, unlock the filesystem if previously locked
+ *
+ * See comment section for meta_lockfs_common
+ */
+static void
+meta_unlockfs(mdname_t *fs, void **cookie)
+{
+	/*
+	 * Simple time saver. We could always try to unlock
+	 * the filesystem, that takes time a resources.
+	 */
+	if (*cookie == (void *)1)
+		(void) meta_lockfs_common(fs, cookie, 0);
+}
+
+/*
+ * meta_trans_detach -- detach log from trans device
+ */
+int
+meta_trans_detach(
+	mdsetname_t	*sp,
+	mdname_t	*transnp,
+	mdcmdopts_t	options,
+	int		*delayed,
+	md_error_t	*ep
+)
+{
+	int		force = ((options & MDCMD_FORCE) ? 1 : 0);
+	md_i_get_t	detach;
+	md_trans_t	*transp;
+	mdname_t	*lognp;
+	void		*lock_cookie;
+
+	/* should have a set */
+	assert(sp != NULL);
+	assert(sp->setno == MD_MIN2SET(meta_getminor(transnp->dev)));
+
+	/* check name */
+	if (metachkmeta(transnp, ep) != 0)
+		return (-1);
+
+	/* save log name */
+	if ((transp = meta_get_trans(sp, transnp, ep)) == NULL)
+		return (-1);
+	if ((lognp = transp->lognamep) == NULL)
+		return (mdmderror(ep, MDE_NO_LOG, meta_getminor(transnp->dev),
+		    transnp->cname));
+
+	/*
+	 * If trans device is mounted lock the filesystem
+	 * against writes and mod time updates.
+	 */
+	if (force && meta_lockfs(transnp, &lock_cookie)) {
+		/*
+		 * This device is mounted and we were unable
+		 * lock the device. Data corruption can occur
+		 * if we don't lock the device before removing
+		 * the log so bail out here.
+		 * NOTE: There's one case were the exist status
+		 * of lockfs could have been lost yet the command
+		 * could have run. We should try to unlock the filesystem
+		 * before returning.
+		 */
+		meta_unlockfs(transnp, &lock_cookie);
+		return (mdmderror(ep, MDE_UNKNOWN_TYPE,
+		    meta_getminor(transnp->dev), transnp->cname));
+	}
+
+	/* detach log */
+	*delayed = 0;
+	(void) memset(&detach, 0, sizeof (detach));
+	detach.id = meta_getminor(transnp->dev);
+	MD_SETDRIVERNAME(&detach, MD_TRANS, sp->setno);
+	detach.size = force;
+	if (metaioctl(MD_IOC_TRANS_DETACH, &detach, &detach.mde, NULL) != 0) {
+		/* delayed detach */
+		if ((force) && (mdissyserror(&detach.mde, EBUSY))) {
+			*delayed = 1;
+			mdclrerror(&detach.mde);
+		} else {
+			meta_unlockfs(transnp, &lock_cookie);
+			return (mdstealerror(ep, &detach.mde));
+		}
+	}
+
+	/*
+	 * Unlock the filesystem
+	 */
+	meta_unlockfs(transnp, &lock_cookie);
+
+	/* clear cache */
+	meta_invalidate_name(lognp);
+	meta_invalidate_name(transnp);
+
+	/* let em know */
+	if (options & MDCMD_PRINT) {
+		if (*delayed) {
+			(void) printf(dgettext(TEXT_DOMAIN,
+"%s: logging device %s will be detached at unmount or reboot\n"),
+			    transnp->cname, lognp->cname);
+		} else {
+			(void) printf(dgettext(TEXT_DOMAIN,
+			    "%s: logging device %s is detached\n"),
+			    transnp->cname, lognp->cname);
+		}
+		(void) fflush(stdout);
+	}
+
+	/* return success */
+	return (0);
+}
+
+/*
+ * reset trans
+ */
+int
+meta_trans_reset(
+	mdsetname_t	*sp,
+	mdname_t	*transnp,
+	mdcmdopts_t	options,
+	md_error_t	*ep
+)
+{
+	md_trans_t	*transp;
+	int		rval = -1;
+
+	/* should have a set */
+	assert(sp != NULL);
+	assert((transnp == NULL) ||
+	    (sp->setno == MD_MIN2SET(meta_getminor(transnp->dev))));
+
+	/* reset all trans */
+	if (transnp == NULL) {
+		mdnamelist_t	*transnlp = NULL;
+		mdnamelist_t	*p;
+
+		/* for each trans */
+		rval = 0;
+		if (meta_get_trans_names(sp, &transnlp, 0, ep) < 0)
+			return (-1);
+		for (p = transnlp; (p != NULL); p = p->next) {
+			/* reset trans */
+			transnp = p->namep;
+			if (meta_trans_reset(sp, transnp, options, ep) != 0) {
+				rval = -1;
+				break;
+			}
+		}
+
+		/* cleanup, return success */
+		metafreenamelist(transnlp);
+		return (rval);
+	}
+
+	/* check name */
+	if (metachkmeta(transnp, ep) != 0)
+		return (-1);
+	/* get unit structure */
+	if ((transp = meta_get_trans(sp, transnp, ep)) == NULL)
+		return (-1);
+
+	/* make sure nobody owns us */
+	if (MD_HAS_PARENT(transp->common.parent)) {
+		return (mdmderror(ep, MDE_IN_USE, meta_getminor(transnp->dev),
+		    transnp->cname));
+	}
+
+	/* clear subdevices cache */
+	meta_invalidate_name(transp->masternamep);
+	if (transp->lognamep)
+		meta_invalidate_name(transp->lognamep);
+
+	/* clear metadevice */
+	if (meta_reset(sp, transnp, options, ep) != 0)
+		goto out;
+	rval = 0;	/* success */
+
+	/* let em know */
+	if (options & MDCMD_PRINT) {
+		(void) printf(dgettext(TEXT_DOMAIN, "%s: Trans is cleared\n"),
+		    transnp->cname);
+		(void) fflush(stdout);
+	}
+
+	/* clear subdevices */
+	if (! (options & MDCMD_RECURSE))
+		goto out;
+	if (metaismeta(transp->masternamep)) {
+		mdname_t	*masternp = transp->masternamep;
+
+		if (meta_reset_by_name(sp, masternp, options, ep) != 0)
+			rval = -1;
+	}
+	/* (multi-parented) log will be cleared later */
+
+	/* cleanup, return success */
+out:
+	meta_invalidate_name(transnp);
+	return (rval);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_userflags.c b/usr/src/lib/lvm/libmeta/common/meta_userflags.c
new file mode 100644
index 0000000000..6ac028625d
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/meta_userflags.c
@@ -0,0 +1,98 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 1993-2002 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+/*
+ * get/set user flags for the metadevices (FOR GUI USE ONLY)
+ */
+
+#include <meta.h>
+
+/*
+ * get user flags stored in the common unit structure.
+ */
+int
+meta_getuserflags(
+	mdsetname_t	*sp,
+	mdname_t	*np,
+	uint_t		*userflags,
+	md_error_t	*ep
+)
+{
+	md_common_t	*mdp;
+
+	/* should have a set */
+	assert(sp != NULL);
+	assert(sp->setno == MD_MIN2SET(meta_getminor(np->dev)));
+
+	if ((mdp = meta_get_unit(sp, np, ep)) == NULL)
+		return (-1);
+
+	*userflags = mdp->user_flags;
+	return (0);
+}
+
+
+/*
+ * set user flags, stored in the common unit structure.
+ */
+int
+meta_setuserflags(
+	mdsetname_t	*sp,
+	mdname_t	*np,
+	uint_t		userflags,
+	md_error_t	*ep
+)
+{
+	md_set_userflags_t	msu;
+	char			*miscname;
+
+	/* should have a set */
+	assert(sp != NULL);
+	assert(sp->setno == MD_MIN2SET(meta_getminor(np->dev)));
+
+	/* check name */
+	if (metachkmeta(np, ep) != 0)
+		return (-1);
+
+	/* get misc name */
+	if ((miscname = metagetmiscname(np, ep)) == NULL)
+		return (-1);
+
+	/* set parameters */
+	(void) memset(&msu, 0, sizeof (msu));
+	MD_SETDRIVERNAME(&msu, miscname, sp->setno);
+	msu.mnum = meta_getminor(np->dev);
+	msu.userflags = userflags;
+	if (metaioctl(MD_IOCSET_FLAGS, &msu, &msu.mde, np->cname) != 0)
+		return (mdstealerror(ep, &msu.mde));
+
+	/* clear cache */
+	meta_invalidate_name(np);
+
+	return (0);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/metad_svc_stubs.c b/usr/src/lib/lvm/libmeta/common/metad_svc_stubs.c
new file mode 100644
index 0000000000..32be258ab3
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/metad_svc_stubs.c
@@ -0,0 +1,825 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include <meta.h>
+#include <metad.h>
+
+#pragma weak mdrpc_nullproc_1_svc = _mdrpc_nullproc_1_svc
+#pragma weak mdrpc_hostname_1_svc = _mdrpc_hostname_1_svc
+#pragma weak mdrpc_addhosts_1_svc = _mdrpc_addhosts_1_svc
+#pragma weak mdrpc_delhosts_1_svc = _mdrpc_delhosts_1_svc
+#pragma weak mdrpc_createset_1_svc = _mdrpc_createset_1_svc
+#pragma weak mdrpc_delset_1_svc = _mdrpc_delset_1_svc
+#pragma weak mdrpc_getset_1_svc = _mdrpc_getset_1_svc
+#pragma weak mdrpc_setnumbusy_1_svc = _mdrpc_setnumbusy_1_svc
+#pragma weak mdrpc_setnameok_1_svc = _mdrpc_setnameok_1_svc
+#pragma weak mdrpc_ownset_1_svc = _mdrpc_ownset_1_svc
+#pragma weak mdrpc_adddrvs_1_svc = _mdrpc_adddrvs_1_svc
+#pragma weak mdrpc_deldrvs_1_svc = _mdrpc_deldrvs_1_svc
+#pragma weak mdrpc_upd_dr_dbinfo_1_svc = _mdrpc_upd_dr_dbinfo_1_svc
+#pragma weak mdrpc_devinfo_1_svc = _mdrpc_devinfo_1_svc
+#pragma weak mdrpc_drvused_1_svc = _mdrpc_drvused_1_svc
+#pragma weak mdrpc_add_drv_sidenms_1_svc = _mdrpc_add_drv_sidenms_1_svc
+#pragma weak mdrpc_del_drv_sidenms_1_svc = _mdrpc_del_drv_sidenms_1_svc
+#pragma weak mdrpc_gtimeout_1_svc = _mdrpc_gtimeout_1_svc
+#pragma weak mdrpc_stimeout_1_svc = _mdrpc_stimeout_1_svc
+#pragma weak mdrpc_upd_dr_flags_1_svc = _mdrpc_upd_dr_flags_1_svc
+#pragma weak mdrpc_upd_sr_flags_1_svc = _mdrpc_upd_sr_flags_1_svc
+#pragma weak mdrpc_unlock_set_1_svc = _mdrpc_unlock_set_1_svc
+#pragma weak mdrpc_lock_set_1_svc = _mdrpc_lock_set_1_svc
+#pragma weak mdrpc_updmeds_1_svc = _mdrpc_updmeds_1_svc
+
+#pragma weak mdrpc_nullproc_2_svc =		_mdrpc_nullproc_2_svc
+#pragma weak mdrpc_hostname_2_svc =		_mdrpc_hostname_2_svc
+#pragma weak mdrpc_addhosts_2_svc =		_mdrpc_addhosts_2_svc
+#pragma weak mdrpc_delhosts_2_svc =		_mdrpc_delhosts_2_svc
+#pragma weak mdrpc_createset_2_svc =		_mdrpc_createset_2_svc
+#pragma weak mdrpc_delset_2_svc =		_mdrpc_delset_2_svc
+#pragma weak mdrpc_getset_2_svc =		_mdrpc_getset_2_svc
+#pragma weak mdrpc_setnumbusy_2_svc =		_mdrpc_setnumbusy_2_svc
+#pragma weak mdrpc_setnameok_2_svc =		_mdrpc_setnameok_2_svc
+#pragma weak mdrpc_ownset_2_svc =		_mdrpc_ownset_2_svc
+#pragma weak mdrpc_adddrvs_2_svc =		_mdrpc_adddrvs_2_svc
+#pragma weak mdrpc_deldrvs_2_svc =		_mdrpc_deldrvs_2_svc
+#pragma weak mdrpc_upd_dr_dbinfo_2_svc =	_mdrpc_upd_dr_dbinfo_2_svc
+#pragma weak mdrpc_devinfo_2_svc =		_mdrpc_devinfo_2_svc
+#pragma weak mdrpc_devid_2_svc =		_mdrpc_devid_2_svc
+#pragma weak mdrpc_devinfo_by_devid_2_svc =	_mdrpc_devinfo_by_devid_2_svc
+#pragma weak mdrpc_devinfo_by_devid_name_2_svc =\
+					_mdrpc_devinfo_by_devid_name_2_svc
+#pragma weak mdrpc_drvused_2_svc =		_mdrpc_drvused_2_svc
+#pragma weak mdrpc_add_drv_sidenms_2_svc =	_mdrpc_add_drv_sidenms_2_svc
+#pragma weak mdrpc_del_drv_sidenms_2_svc =	_mdrpc_del_drv_sidenms_2_svc
+#pragma weak mdrpc_gtimeout_2_svc =		_mdrpc_gtimeout_2_svc
+#pragma weak mdrpc_stimeout_2_svc =		_mdrpc_stimeout_2_svc
+#pragma weak mdrpc_upd_dr_flags_2_svc =		_mdrpc_upd_dr_flags_2_svc
+#pragma weak mdrpc_upd_sr_flags_2_svc =		_mdrpc_upd_sr_flags_2_svc
+#pragma weak mdrpc_unlock_set_2_svc =		_mdrpc_unlock_set_2_svc
+#pragma weak mdrpc_lock_set_2_svc =		_mdrpc_lock_set_2_svc
+#pragma weak mdrpc_updmeds_2_svc =		_mdrpc_updmeds_2_svc
+#pragma weak mdrpc_mncreateset_2_svc =		_mdrpc_mncreateset_2_svc
+#pragma weak mdrpc_mngetset_2_svc =		_mdrpc_mngetset_2_svc
+#pragma weak mdrpc_mnsetmaster_2_svc =		_mdrpc_mnsetmaster_2_svc
+#pragma weak mdrpc_joinset_2_svc =		_mdrpc_joinset_2_svc
+#pragma weak mdrpc_withdrawset_2_svc =		_mdrpc_withdrawset_2_svc
+#pragma weak mdrpc_upd_nr_flags_2_svc =		_mdrpc_upd_nr_flags_2_svc
+#pragma weak mdrpc_mn_is_stale_2_svc =		_mdrpc_mn_is_stale_2_svc
+#pragma weak mdrpc_mdcommdctl_2_svc =		_mdrpc_mdcommdctl_2_svc
+#pragma weak mdrpc_upd_dr_reconfig_2_svc =	_mdrpc_upd_dr_reconfig_2_svc
+#pragma weak mdrpc_getdrivedesc_2_svc =		_mdrpc_getdrivedesc_2_svc
+#pragma weak mdrpc_reset_mirror_owner_2_svc =	_mdrpc_reset_mirror_owner_2_svc
+#pragma weak mdrpc_mn_susp_res_io_2_svc =	_mdrpc_mn_susp_res_io_2_svc
+#pragma weak mdrpc_resnarf_set_2_svc =		_mdrpc_resnarf_set_2_svc
+#pragma weak mdrpc_mn_mirror_resync_all_2_svc = \
+					_mdrpc_mn_mirror_resync_all_2_svc
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_nullproc_1_svc(
+	mdrpc_null_args *a,
+	mdrpc_generic_res *b,
+	struct svc_req *c)
+{
+	assert(0);
+	return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_hostname_1_svc(
+	mdrpc_null_args	*a,
+	mdrpc_hostname_res *b,
+	struct svc_req *c)
+{
+	assert(0);
+	return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_addhosts_1_svc(
+	mdrpc_host_args *a,
+	mdrpc_generic_res *b,
+	struct svc_req *c)
+{
+	assert(0);
+	return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_delhosts_1_svc(
+	mdrpc_host_args *a,
+	mdrpc_generic_res *b,
+	struct svc_req *c)
+{
+	assert(0);
+	return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_createset_1_svc(
+	mdrpc_createset_args *a,
+	mdrpc_generic_res *b,
+	struct svc_req *c)
+{
+	assert(0);
+	return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_delset_1_svc(
+	mdrpc_sp_args *a,
+	mdrpc_generic_res *b,
+	struct svc_req *c)
+{
+	assert(0);
+	return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_getset_1_svc(
+	mdrpc_getset_args *a,
+	mdrpc_getset_res *b,
+	struct svc_req *c)
+{
+	assert(0);
+	return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_setnumbusy_1_svc(
+	mdrpc_setno_args *a,
+	mdrpc_bool_res *b,
+	struct svc_req *c)
+{
+	assert(0);
+	return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_setnameok_1_svc(
+	mdrpc_sp_args *a,
+	mdrpc_bool_res *b,
+	struct svc_req *c)
+{
+	assert(0);
+	return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_ownset_1_svc(
+	mdrpc_sp_args *a,
+	mdrpc_bool_res *b,
+	struct svc_req *c)
+{
+	assert(0);
+	return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_adddrvs_1_svc(
+	mdrpc_drives_args *a,
+	mdrpc_generic_res *b,
+	struct svc_req *c)
+{
+	assert(0);
+	return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_deldrvs_1_svc(
+	mdrpc_drives_args *a,
+	mdrpc_generic_res *b,
+	struct svc_req *c)
+{
+	assert(0);
+	return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_upd_dr_dbinfo_1_svc(
+	mdrpc_drives_args *a,
+	mdrpc_generic_res *b,
+	struct svc_req *c)
+{
+	assert(0);
+	return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_devinfo_1_svc(
+	mdrpc_devinfo_args *a,
+	mdrpc_devinfo_res *b,
+	struct svc_req *c)
+{
+	assert(0);
+	return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_drvused_1_svc(
+	mdrpc_drvused_args *a,
+	mdrpc_generic_res *b,
+	struct svc_req *c)
+{
+	assert(0);
+	return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_add_drv_sidenms_1_svc(
+	mdrpc_drv_sidenm_args *a,
+	mdrpc_generic_res *b,
+	struct svc_req *c)
+{
+	assert(0);
+	return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_del_drv_sidenms_1_svc(
+	mdrpc_sp_args *a,
+	mdrpc_generic_res *b,
+	struct svc_req *c)
+{
+	assert(0);
+	return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_gtimeout_1_svc(
+	mdrpc_sp_args *a,
+	mdrpc_gtimeout_res *b,
+	struct svc_req *c)
+{
+	assert(0);
+	return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_stimeout_1_svc(
+	mdrpc_stimeout_args *a,
+	mdrpc_generic_res *b,
+	struct svc_req *c)
+{
+	assert(0);
+	return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_upd_dr_flags_1_svc(
+	mdrpc_upd_dr_flags_args *a,
+	mdrpc_generic_res *b,
+	struct svc_req *c)
+{
+	assert(0);
+	return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_upd_sr_flags_1_svc(
+	mdrpc_upd_sr_flags_args *a,
+	mdrpc_generic_res *b,
+	struct svc_req *c)
+{
+	assert(0);
+	return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_unlock_set_1_svc(
+	mdrpc_null_args *a,
+	mdrpc_setlock_res *b,
+	struct svc_req *c)
+{
+	assert(0);
+	return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_lock_set_1_svc(
+	mdrpc_null_args *a,
+	mdrpc_setlock_res *b,
+	struct svc_req *c)
+{
+	assert(0);
+	return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_updmeds_1_svc(
+	mdrpc_updmeds_args *a,
+	mdrpc_generic_res *b,
+	struct svc_req *c)
+{
+	assert(0);
+	return (TRUE);
+}
+
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_nullproc_2_svc(
+	mdrpc_null_args *a,
+	mdrpc_generic_res *b,
+	struct svc_req *c)
+{
+	assert(0);
+	return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_hostname_2_svc(
+	mdrpc_null_args	*a,
+	mdrpc_hostname_res *b,
+	struct svc_req *c)
+{
+	assert(0);
+	return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_addhosts_2_svc(
+	mdrpc_host_args *a,
+	mdrpc_generic_res *b,
+	struct svc_req *c)
+{
+	assert(0);
+	return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_delhosts_2_svc(
+	mdrpc_host_args *a,
+	mdrpc_generic_res *b,
+	struct svc_req *c)
+{
+	assert(0);
+	return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_createset_2_svc(
+	mdrpc_createset_args *a,
+	mdrpc_generic_res *b,
+	struct svc_req *c)
+{
+	assert(0);
+	return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_delset_2_svc(
+	mdrpc_sp_args *a,
+	mdrpc_generic_res *b,
+	struct svc_req *c)
+{
+	assert(0);
+	return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_getset_2_svc(
+	mdrpc_getset_args *a,
+	mdrpc_getset_res *b,
+	struct svc_req *c)
+{
+	assert(0);
+	return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_setnumbusy_2_svc(
+	mdrpc_setno_args *a,
+	mdrpc_bool_res *b,
+	struct svc_req *c)
+{
+	assert(0);
+	return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_setnameok_2_svc(
+	mdrpc_sp_args *a,
+	mdrpc_bool_res *b,
+	struct svc_req *c)
+{
+	assert(0);
+	return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_ownset_2_svc(
+	mdrpc_sp_args *a,
+	mdrpc_bool_res *b,
+	struct svc_req *c)
+{
+	assert(0);
+	return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_adddrvs_2_svc(
+	mdrpc_drives_2_args *a,
+	mdrpc_generic_res *b,
+	struct svc_req *c)
+{
+	assert(0);
+	return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_deldrvs_2_svc(
+	mdrpc_drives_2_args *a,
+	mdrpc_generic_res *b,
+	struct svc_req *c)
+{
+	assert(0);
+	return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_upd_dr_dbinfo_2_svc(
+	mdrpc_drives_2_args *a,
+	mdrpc_generic_res *b,
+	struct svc_req *c)
+{
+	assert(0);
+	return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_devinfo_2_svc(
+	mdrpc_devinfo_2_args *a,
+	mdrpc_devinfo_2_res *b,
+	struct svc_req *c)
+{
+	assert(0);
+	return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_devid_2_svc(
+	mdrpc_devid_args *a,
+	mdrpc_devid_res *b,
+	struct svc_req *c)
+{
+	assert(0);
+	return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_devinfo_by_devid_2_svc(
+	mdrpc_devidstr_args *a,
+	mdrpc_devinfo_2_res *b,
+	struct svc_req *c)
+{
+	assert(0);
+	return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_devinfo_by_devid_name_2_svc(
+	mdrpc_devid_name_2_args *a,
+	mdrpc_devinfo_2_res *b,
+	struct svc_req *c
+)
+{
+	assert(0);
+	return (TRUE);
+}
+
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_drvused_2_svc(
+	mdrpc_drvused_2_args *a,
+	mdrpc_generic_res *b,
+	struct svc_req *c)
+{
+	assert(0);
+	return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_add_drv_sidenms_2_svc(
+	mdrpc_drv_sidenm_2_args *a,
+	mdrpc_generic_res *b,
+	struct svc_req *c)
+{
+	assert(0);
+	return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_del_drv_sidenms_2_svc(
+	mdrpc_sp_args *a,
+	mdrpc_generic_res *b,
+	struct svc_req *c)
+{
+	assert(0);
+	return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_gtimeout_2_svc(
+	mdrpc_sp_args *a,
+	mdrpc_gtimeout_res *b,
+	struct svc_req *c)
+{
+	assert(0);
+	return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_stimeout_2_svc(
+	mdrpc_stimeout_args *a,
+	mdrpc_generic_res *b,
+	struct svc_req *c)
+{
+	assert(0);
+	return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_upd_dr_flags_2_svc(
+	mdrpc_upd_dr_flags_2_args *a,
+	mdrpc_generic_res *b,
+	struct svc_req *c)
+{
+	assert(0);
+	return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_upd_sr_flags_2_svc(
+	mdrpc_upd_sr_flags_args *a,
+	mdrpc_generic_res *b,
+	struct svc_req *c
+)
+{
+	assert(0);
+	return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_unlock_set_2_svc(
+	mdrpc_null_args *a,
+	mdrpc_setlock_res *b,
+	struct svc_req *c
+)
+{
+	assert(0);
+	return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_lock_set_2_svc(
+	mdrpc_null_args *a,
+	mdrpc_setlock_res *b,
+	struct svc_req *c
+)
+{
+	assert(0);
+	return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_updmeds_2_svc(
+	mdrpc_updmeds_args *a,
+	mdrpc_generic_res *b,
+	struct svc_req *c
+)
+{
+	assert(0);
+	return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_mncreateset_2_svc(
+	mdrpc_mncreateset_2_args *a,
+	mdrpc_generic_res *b,
+	struct svc_req *c
+)
+{
+	assert(0);
+	return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_mngetset_2_svc(
+	mdrpc_getset_2_args *a,
+	mdrpc_mngetset_res *b,
+	struct svc_req *c
+)
+{
+	assert(0);
+	return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_mnsetmaster_2_svc(
+	mdrpc_mnsetmaster_2_args *a,
+	mdrpc_generic_res *b,
+	struct svc_req *c
+)
+{
+	assert(0);
+	return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_joinset_2_svc(
+	mdrpc_sp_2_args *a,
+	mdrpc_generic_res *b,
+	struct svc_req *c
+)
+{
+	assert(0);
+	return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_withdrawset_2_svc(
+	mdrpc_sp_2_args *a,
+	mdrpc_generic_res *b,
+	struct svc_req *c)
+{
+	assert(0);
+	return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_upd_nr_flags_2_svc(
+	mdrpc_upd_nr_flags_2_args *a,
+	mdrpc_generic_res *b,
+	struct svc_req *c)
+{
+	assert(0);
+	return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_mn_is_stale_2_svc(
+	mdrpc_setno_2_args *a,
+	mdrpc_bool_res *b,
+	struct svc_req *c)
+{
+	assert(0);
+	return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_mdcommdctl_2_svc(
+	mdrpc_mdcommdctl_2_args *a,
+	mdrpc_generic_res *b,
+	struct svc_req *c)
+{
+	assert(0);
+	return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_resnarf_set_2_svc(
+	mdrpc_setno_2_args *a,
+	mdrpc_generic_res *b,
+	struct svc_req *c)
+{
+	assert(0);
+	return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_upd_dr_reconfig_2_svc(
+	mdrpc_upd_dr_flags_2_args *a,
+	mdrpc_generic_res *b,
+	struct svc_req *c)
+{
+	assert(0);
+	return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_getdrivedesc_2_svc(
+	mdrpc_sp_2_args *a,
+	mdrpc_getdrivedesc_res *b,
+	struct svc_req *c)
+{
+	assert(0);
+	return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_reset_mirror_owner_2_svc(
+	mdrpc_nodeid_2_args *a,
+	mdrpc_generic_res *b,
+	struct svc_req *c)
+{
+	assert(0);
+	return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_mn_susp_res_io_2_svc(
+	mdrpc_mn_susp_res_io_2_args *a,
+	mdrpc_generic_res *b,
+	struct svc_req *c)
+{
+	assert(0);
+	return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_mn_mirror_resync_all_2_svc(
+	mdrpc_setno_2_args *a,
+	mdrpc_generic_res *b,
+	struct svc_req *c)
+{
+	assert(0);
+	return (TRUE);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/metagetroot.c b/usr/src/lib/lvm/libmeta/common/metagetroot.c
new file mode 100644
index 0000000000..3891c6bd74
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/metagetroot.c
@@ -0,0 +1,121 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2003 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+/*
+ * get root device
+ */
+
+#include <meta.h>
+#include "meta_lib_prv.h"
+
+#include <sys/mnttab.h>
+
+/*
+ * Return the current root filesystem block device name
+ */
+void *
+meta_get_current_root(
+	md_error_t	*ep
+)
+{
+	FILE		*fp;
+	struct mnttab	mp;
+
+	if ((fp = open_mnttab()) == NULL) {
+		(void) mdsyserror(ep, errno, MNTTAB);
+		return (NULL);
+	}
+
+	while (getmntent(fp, &mp) == 0) {
+	if (strcmp(mp.mnt_mountp, "/") == 0)
+		return (mp.mnt_special);
+	}
+	(void) mderror(ep, MDE_NOROOT, NULL);
+	return (NULL);
+}
+
+/*
+ * Return the current root filesystem block device name. This is only valid
+ * when root is either a slice, a stripe or a mirror.
+ */
+mdname_t *
+meta_get_current_root_dev(
+	mdsetname_t	*sp,
+	md_error_t	*ep
+)
+{
+	md_stripe_t	*stripep;
+	md_mirror_t	*mirrorp;
+	md_row_t	*rp;
+	md_comp_t	*cp;
+	mdname_t	*rootnp;
+	void		*curroot;
+	char		*miscname;
+	int		smi;
+
+	if ((curroot = meta_get_current_root(ep)) == NULL)
+		return (NULL);
+	if ((rootnp = metaname(&sp, curroot, ep)) == NULL)
+		return (NULL);
+	if (metaismeta(rootnp)) {
+		if ((miscname = metagetmiscname(rootnp, ep)) == NULL)
+			return (NULL);
+		if ((strcmp(miscname, MD_MIRROR) == 0) &&
+		    ((mirrorp = meta_get_mirror(sp, rootnp, ep)) != NULL)) {
+			for (smi = 0; smi < NMIRROR; smi++) {
+				md_submirror_t *mdsp =
+				    &mirrorp->submirrors[smi];
+				rootnp = mdsp->submirnamep;
+				/* skip unused submirrors */
+				if (rootnp == NULL) {
+					assert(mdsp->state == SMS_UNUSED);
+					continue;
+				}
+				if ((miscname = metagetmiscname(rootnp, ep))
+				    == NULL) {
+					(void) mdmderror(ep, MDE_UNKNOWN_TYPE,
+					    meta_getminor(rootnp->dev),
+					    rootnp->cname);
+					return (NULL);
+				}
+				break;
+			}
+		}
+		if ((strcmp(miscname, MD_STRIPE) == 0) &&
+		    ((stripep = meta_get_stripe(sp, rootnp, ep)) != NULL)) {
+			rp = &stripep->rows.rows_val[0];
+			cp = &rp->comps.comps_val[0];
+			if (metachkcomp(cp->compnamep, ep) == 0)
+				return (cp->compnamep);
+		}
+		/* Root is not a single stripe metadevice */
+		(void) mddeverror(ep, MDE_INV_ROOT, rootnp->dev, rootnp->cname);
+		return (NULL);
+	} else return (rootnp);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/metarpcopen.c b/usr/src/lib/lvm/libmeta/common/metarpcopen.c
new file mode 100644
index 0000000000..bd0f4232f5
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/metarpcopen.c
@@ -0,0 +1,422 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+/*
+ * Just in case we're not in a build environment, make sure that
+ * TEXT_DOMAIN gets set to something.
+ */
+#if !defined(TEXT_DOMAIN)
+#define	TEXT_DOMAIN "SYS_TEST"
+#endif
+
+#include <meta.h>
+#include <metad.h>
+
+#define	CC_TTL_MAX	20
+
+typedef struct {
+	char		*cc_node;
+	struct timeval	cc_ttl;
+	CLIENT		*cc_clp;
+} client_cache_t;
+
+typedef struct client_header {
+	client_cache_t	**ch_cache;	/* array of clients. */
+	mutex_t		ch_mutex;	/* lock access to ch_cache */
+} client_header_t;
+
+/*
+ * This structure is used to pass data from meta_client_create to
+ * client_create_helper via meta_client_create_retry.
+ */
+typedef struct clnt_data {
+	rpcprog_t	cd_prognum;	/* RPC program number */
+	rpcvers_t	cd_version;	/* Desired interface version */
+	char		*cd_nettype;	/* Type of network to use */
+} clnt_data_t;
+
+#define	MALLOC_BLK_SIZE	10
+static client_header_t	client_header = {(client_cache_t **)NULL, DEFAULTMUTEX};
+
+static void
+cc_add(
+	client_header_t *header,
+	char *node,
+	CLIENT *clntp,
+	md_error_t *ep
+)
+{
+	client_cache_t ***cachep = &header->ch_cache;
+	struct timeval	now;
+	int		i;
+	int		j = 0;
+
+	if (gettimeofday(&now, NULL) == -1) {
+		(void) mdsyserror(ep, errno, "gettimeofday()");
+		return;
+	}
+
+	(void) mutex_lock(&header->ch_mutex);
+	if (*cachep) {
+		for (i = 0; (*cachep)[i] != NULL; i++)
+			if (strcmp((*cachep)[i]->cc_node, node) == 0 &&
+			    (*cachep)[i]->cc_clp == NULL) {
+				(*cachep)[i]->cc_clp = clntp;
+				(*cachep)[i]->cc_ttl = now;
+				(void) mutex_unlock(&header->ch_mutex);
+				return;
+			}
+	} else {
+		*cachep = Calloc(MALLOC_BLK_SIZE, sizeof (**cachep));
+		i = 0;
+	}
+
+	(*cachep)[i] = Zalloc(sizeof (***cachep));
+	(*cachep)[i]->cc_node = Strdup(node);
+	(*cachep)[i]->cc_clp = clntp;
+	(*cachep)[i]->cc_ttl = now;
+
+	if ((++i % MALLOC_BLK_SIZE) == 0) {
+		*cachep = Realloc(*cachep,
+		    (i + MALLOC_BLK_SIZE) * sizeof (**cachep));
+		for (j = i; j < (i + MALLOC_BLK_SIZE); j++)
+			(*cachep)[j] = NULL;
+	}
+	(void) mutex_unlock(&header->ch_mutex);
+}
+
+static void
+rel_clntp(client_cache_t *cachep)
+{
+	CLIENT		*clntp = cachep->cc_clp;
+
+	if (clntp != NULL) {
+		auth_destroy(clntp->cl_auth);
+		clnt_destroy(clntp);
+	}
+	cachep->cc_clp = NULL;
+}
+
+static void
+cc_destroy(client_header_t *header)
+{
+	client_cache_t ***cachep = &header->ch_cache;
+	int	i;
+
+	(void) mutex_lock(&header->ch_mutex);
+	if (*cachep) {
+		for (i = 0; ((*cachep)[i] != NULL); i++) {
+			client_cache_t	*p = (*cachep)[i];
+
+			Free(p->cc_node);
+			rel_clntp(p);
+			Free(p);
+		}
+		Free(*cachep);
+		*cachep = NULL;
+	}
+	(void) mutex_unlock(&header->ch_mutex);
+}
+
+/*
+ * Set the timeout value for this client handle.
+ */
+static int
+cl_sto(
+	CLIENT		*clntp,
+	char		*hostname,
+	long		time_out,
+	md_error_t	*ep
+)
+{
+	struct timeval	nto;
+
+	(void) memset(&nto, '\0', sizeof (nto));
+
+	nto.tv_sec = time_out;
+
+	if (clnt_control(clntp, CLSET_TIMEOUT, (char *)&nto) != TRUE)
+		return (mdrpcerror(ep, clntp, hostname,
+		    dgettext(TEXT_DOMAIN, "metad client set timeout")));
+
+	return (0);
+}
+
+/*
+ * client_create_vers_retry is the helper function to be passed to
+ * meta_client_create_retry to do the actual work of creating the client
+ * when version selection is necessary.
+ */
+
+/* ARGSUSED */
+static CLIENT *
+client_create_vers_retry(char *hostname,
+	void *ignore,
+	struct timeval *tout
+)
+{
+	rpcvers_t	vers;		/* Version # not needed. */
+
+	return (clnt_create_vers_timed(hostname, METAD, &vers,
+		METAD_VERSION, METAD_VERSION_DEVID, "tcp", tout));
+}
+
+/*
+ * client_create_helper is the helper function to be passed to
+ * meta_client_create_retry when plain vanilla client create is desired.
+ */
+static CLIENT *
+client_create_helper(char *hostname, void *private, struct timeval *time_out)
+{
+	clnt_data_t	*cd = (clnt_data_t *)private;
+
+	return (clnt_create_timed(hostname, cd->cd_prognum, cd->cd_version,
+		cd->cd_nettype, time_out));
+}
+
+/*
+ * meta_client_create_retry is a general function to assist in creating RPC
+ * clients.  This function handles retrying if the attempt to create a
+ * client fails.  meta_client_create_retry itself does not actually create
+ * the client.  Instead it calls the helper function, func, to do that job.
+ *
+ * With the help of func, meta_client_create_retry will create an RPC
+ * connection allowing up to tout seconds to complete the task.  If the
+ * connection creation fails for RPC_RPCBFAILURE, RPC_CANTRECV or
+ * RPC_PROGNOTREGISTERED and tout seconds have not passed,
+ * meta_client_create_retry will try again.  The reason retries are
+ * important is that when the inet daemon is being refreshed, it can take
+ * 15-20 seconds for it to start responding again.
+ *
+ * Arguments:
+ *
+ *	hostname	- Name of remote host
+ *
+ *	func		- Pointer to the helper function, that will
+ *			  actually try to create the client.
+ *
+ *	data		- Private data to be passed on to func.
+ *			  meta_client_create_retry treats this as an opaque
+ *			  pointer.
+ *
+ *	tout		- Number of seconds to allow for the connection
+ *			  attempt.
+ *
+ *	ep		- Standard SVM error pointer.  May be NULL.
+ */
+CLIENT *
+meta_client_create_retry(
+	char			*hostname,
+	clnt_create_func_t	func,
+	void			*data,
+	time_t			tout,
+	md_error_t		*ep
+)
+{
+	static int		debug;		/* print debugging info */
+	static int		debug_set = 0;
+
+	CLIENT			*clnt = (CLIENT *) NULL;
+	struct timeval		curtime;
+	char			*d;
+	struct timeval		start;
+	struct timeval		timeout;
+
+	if (debug_set == 0) {
+		d = getenv("MD_DEBUG");
+		if (d == NULL) {
+			debug = 0;
+		} else {
+			debug = (strstr(d, "RPC") == NULL) ? 0 : 1;
+		}
+		debug_set = 1;
+	}
+	timeout.tv_usec = 0;
+	if (gettimeofday(&start, NULL) == -1) {
+		if (ep != (md_error_t *)NULL) {
+			(void) mdsyserror(ep, errno, "gettimeofday()");
+		}
+		return (clnt);
+	}
+	curtime = start;
+	while ((curtime.tv_sec - start.tv_sec) < tout) {
+		/* Use remaining time as the timeout value. */
+		timeout.tv_sec = tout - (curtime.tv_sec - start.tv_sec);
+		clnt = (*func)(hostname, data, &timeout);
+		if (clnt != (CLIENT *) NULL)
+			break;
+		if ((rpc_createerr.cf_stat == RPC_RPCBFAILURE) ||
+			(rpc_createerr.cf_stat == RPC_PROGNOTREGISTERED) ||
+			(rpc_createerr.cf_stat == RPC_CANTRECV)) {
+			if (debug) {
+				clnt_pcreateerror("meta_client_create_retry");
+			}
+			/* If error might be fixed in time, sleep & try again */
+			(void) sleep(2);
+			if (gettimeofday(&curtime, NULL) == -1) {
+				if (ep != (md_error_t *)NULL) {
+					(void) mdsyserror(ep, errno,
+						"gettimeofday()");
+				}
+				return (clnt);
+			}
+		} else {
+			/* Not a recoverable error. */
+			break;
+		}
+	}
+	if ((clnt == (CLIENT *) NULL) && (ep != (md_error_t *)NULL)) {
+		(void) mdrpccreateerror(ep, hostname,
+			"meta_client_create_retry");
+	}
+	return (clnt);
+}
+
+/*
+ * meta_client_create is intended to be used within SVM as a replacement
+ * for calls to clnt_create.  meta_client_create invokes the retry
+ * mechanism of meta_client_create_retry.
+ */
+CLIENT *
+meta_client_create(char *host, rpcprog_t prognum, rpcvers_t version,
+	char *nettype)
+{
+	clnt_data_t		cd;
+
+	cd.cd_prognum = prognum;
+	cd.cd_version = version;
+	cd.cd_nettype = nettype;
+	return (meta_client_create_retry(host, client_create_helper,
+		(void *)&cd, MD_CLNT_CREATE_TOUT, (md_error_t *)NULL));
+}
+
+/*
+ * create and return RPC connection
+ */
+CLIENT *
+metarpcopen(
+	char		*hostname,
+	long		time_out,
+	md_error_t	*ep
+)
+{
+	CLIENT		*clntp = NULL;
+	client_cache_t	***cachep = &client_header.ch_cache;
+	int		i;
+	long		delta;
+	struct timeval	now;
+
+	if (gettimeofday(&now, NULL) == -1) {
+		(void) mdsyserror(ep, errno, "gettimeofday()");
+		return (NULL);
+	}
+
+	/*
+	 * Before trying to create the client, make sure that the core SVM
+	 * services are enabled by the Service Management Facility.  We
+	 * don't want to suffer the 60 second timeout if the services are
+	 * not even enabled.  This call actually only verifies that they
+	 * are enabled on this host no matter which host the caller wants
+	 * to connect to.  Nonetheless, if the services are not enabled on
+	 * the local host, our RPC stuff is not going to work as expected.
+	 */
+	if (meta_smf_isonline(META_SMF_CORE, ep) == 0) {
+		return (NULL);
+	}
+
+	(void) mutex_lock(&client_header.ch_mutex);
+	if (client_header.ch_cache) {
+		for (i = 0; (*cachep)[i] != NULL; i++) {
+			if (strcmp((*cachep)[i]->cc_node, hostname) == 0) {
+				clntp = (*cachep)[i]->cc_clp;
+				if (clntp == NULL)
+					continue;
+				delta = now.tv_sec -
+				    (*cachep)[i]->cc_ttl.tv_sec;
+				if (delta > CC_TTL_MAX) {
+					rel_clntp((*cachep)[i]);
+					continue;
+				}
+				if (cl_sto(clntp, hostname, time_out,
+					ep) != 0) {
+					(void) mutex_unlock(
+						&client_header.ch_mutex);
+					return (NULL);
+				}
+				(void) mutex_unlock(&client_header.ch_mutex);
+				return (clntp);
+			}
+		}
+	}
+	(void) mutex_unlock(&client_header.ch_mutex);
+
+	/*
+	 * Try to create a version 2 client handle by default.
+	 * If this fails (i.e. client is version 1), try to
+	 * create a version 1 client handle.
+	 */
+	clntp = meta_client_create_retry(hostname, client_create_vers_retry,
+		(void *)NULL, MD_CLNT_CREATE_TOUT, ep);
+
+	/* open connection */
+	if (clntp == NULL) {
+		(void) mdrpccreateerror(ep, hostname,
+		    dgettext(TEXT_DOMAIN, "metad client create"));
+		cc_add(&client_header, hostname, NULL, ep);
+		return (NULL);
+	} else {
+		auth_destroy(clntp->cl_auth);
+		clntp->cl_auth = authsys_create_default();
+		assert(clntp->cl_auth != NULL);
+	}
+
+	cc_add(&client_header, hostname, clntp, ep);
+
+	if (cl_sto(clntp, hostname, time_out, ep) != 0)
+		return (NULL);
+
+	return (clntp);
+}
+
+/*
+ * metarpcclose - is a place holder so that when using
+ *		  metarpcopen, it does not appear that
+ *		  we have dangling opens.  We can at some
+ *		  later decrement open counts here too, if needed.
+ */
+/*ARGSUSED*/
+void
+metarpcclose(CLIENT *clntp)
+{
+}
+
+void
+metarpccloseall(void)
+{
+	cc_destroy(&client_header);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/metasplitname.c b/usr/src/lib/lvm/libmeta/common/metasplitname.c
new file mode 100644
index 0000000000..84634a109e
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/metasplitname.c
@@ -0,0 +1,77 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 1992, 1993, 2000 by Sun Microsystems, Inc.
+ * All rights reserved.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+/*
+ * split and splice name
+ */
+
+#include <meta.h>
+
+int
+splitname(char *name, md_splitname *spn)
+{
+	size_t prefixlen;
+	size_t suffixlen;
+	char	*lastslash;
+	lastslash = strrchr(name, '/');
+	if (lastslash != NULL) {
+		prefixlen = lastslash - name;
+		suffixlen = (strlen(name) - prefixlen) - 1; /* slash dropped */
+	} else {
+		prefixlen = 0;
+		suffixlen = strlen(name);
+	}
+	if (prefixlen > MD_MAXPREFIX ||
+	    suffixlen > MD_MAXSUFFIX)
+		return (1);
+	(void) memcpy(SPN_PREFIX(spn).pre_data, name, prefixlen);
+	SPN_PREFIX(spn).pre_len = prefixlen;
+	(void) memcpy(SPN_SUFFIX(spn).suf_data, lastslash + 1, suffixlen);
+	SPN_SUFFIX(spn).suf_len = suffixlen;
+	return (0);
+}
+
+char *
+splicename(md_splitname *spn)
+{
+	char *name;
+	char *suffix;
+	size_t prefixlen;
+	size_t suffixlen;
+
+	prefixlen = SPN_PREFIX(spn).pre_len;
+	suffixlen = SPN_SUFFIX(spn).suf_len;
+	name = Malloc(prefixlen + suffixlen + 2);
+	(void) memcpy(name, SPN_PREFIX(spn).pre_data, prefixlen);
+	name[prefixlen] = '/';
+	suffix = name + (prefixlen + 1);
+	(void) memcpy(suffix, SPN_SUFFIX(spn).suf_data, suffixlen);
+	name[prefixlen + suffixlen + 1] = 0;
+	return (name);
+}
diff --git a/usr/src/lib/lvm/libmeta/common/sdssc_bind.c b/usr/src/lib/lvm/libmeta/common/sdssc_bind.c
new file mode 100644
index 0000000000..c8e1f8c3ee
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/common/sdssc_bind.c
@@ -0,0 +1,205 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+/*
+ * Block comment which describes the contents of this file.
+ */
+
+#include <dlfcn.h>
+#include <meta.h>
+#include <metadyn.h>
+#include <sdssc.h>
+
+#define	SDSSC_PATH SDSSC_CL_LIBDIR "/sc/libsds_sc.so"
+
+static func_table_t dl_table[] = {
+	{ "_sdssc_version",		(void **)&sdssc_version },
+	{ "_sdssc_create_begin",	(void **)&sdssc_create_begin },
+	{ "_sdssc_mo_create_begin",	(void **)&sdssc_mo_create_begin },
+	{ "_sdssc_create_end",		(void **)&sdssc_create_end },
+	{ "_sdssc_delete_begin",	(void **)&sdssc_delete_begin },
+	{ "_sdssc_delete_end",		(void **)&sdssc_delete_end },
+	{ "_sdssc_get_index",		(void **)&sdssc_get_index },
+	{ "_sdssc_add_hosts",		(void **)&sdssc_add_hosts },
+	{ "_sdssc_delete_hosts",	(void **)&sdssc_delete_hosts },
+	{ "_sdssc_get_primary_host",	(void **)&sdssc_get_primary_host },
+	{ "_sdssc_cmd_proxy",		(void **)&sdssc_cmd_proxy },
+	{ "_sdssc_getnodelist",		(void **)&sdssc_getnodelist },
+	{ "_sdssc_freenodelist",	(void **)&sdssc_freenodelist },
+	{ "_sdssc_binddevs",		(void **)&sdssc_binddevs },
+	{ "_sdssc_bindclusterdevs",	(void **)&sdssc_bindclusterdevs },
+	{ "_sdssc_gettransportbynode",	(void **)&sdssc_gettransportbynode },
+	{ "_sdssc_free_mdcerr_list",	(void **)&sdssc_free_mdcerr_list },
+	{ "_sdssc_property_get",	(void **)&sdssc_property_get },
+	{ "_sdssc_property_set",	(void **)&sdssc_property_set },
+	{ "_sdssc_get_services",	(void **)&sdssc_get_services },
+	{ "_sdssc_get_services_free",	(void **)&sdssc_get_services_free },
+	{ "_sdssc_suspend",		(void **)&sdssc_suspend },
+	{ "_sdssc_convert_cluster_path",
+	    (void **)&sdssc_convert_cluster_path },
+	{ "_sdssc_convert_ctd_path",
+	    (void **)&sdssc_convert_ctd_path },
+	{ "_sdssc_convert_path_free",
+	    (void **)&sdssc_convert_path_free },
+	{ "_sdssc_notify_service",	(void **)&sdssc_notify_service },
+	{ "_sdssc_cm_nm2nid",	(void **)&sdssc_cm_nm2nid },
+	{ "_sdssc_cm_sr_nm2nid",	(void **)&sdssc_cm_sr_nm2nid },
+	{ "_sdssc_cm_nid2nm",	(void **)&sdssc_cm_nid2nm },
+	{ "_sdssc_cm_sr_nid2nm",	(void **)&sdssc_cm_sr_nid2nm },
+	{ "_sdssc_get_priv_ipaddr",	(void **)&sdssc_get_priv_ipaddr },
+	{ (char *)0,			(void **)0 }
+};
+
+static rval_e
+just_dup_string(const char *source, char **dest)
+{
+	*dest = strdup(source);
+	return (SDSSC_OKAY);
+}
+
+static void
+free_dup_string(char *source)
+{
+	free(source);
+}
+
+/*
+ * not_bound -- routine to always return NOT_BOUND
+ */
+static rval_e
+not_bound(void)
+{
+	return (SDSSC_NOT_BOUND);
+}
+
+/*
+ * not_bound_error -- routine to always return SDSSC_NOT_BOUND_ERROR since
+ * routine is not bound.  This is used when using an older version
+ * of libsdssc that doesn't support MN disksets.  When an MN specific
+ * routine is called (such as sdssc_mo_create_set) an SDSSC_NOT_BOUND_ERROR
+ * will be returned.
+ */
+static rval_e
+not_bound_error(void)
+{
+	return (SDSSC_NOT_BOUND_ERROR);
+}
+
+
+/*
+ * set_common_routine -- set cluster interface routines to return NOT_BOUND
+ */
+static void
+set_common_routine()
+{
+	func_table_p	f;
+
+	for (f = dl_table; f->fptr != (void *)0; f++) {
+		if (strcmp(f->fname, "_sdssc_convert_cluster_path") == 0) {
+			*f->fptr = (void *)&just_dup_string;
+		} else if (strcmp(f->fname, "_sdssc_free_convert_cluster_path")
+		    == 0) {
+			*f->fptr = (void *)&free_dup_string;
+		} else {
+			*f->fptr = (void *)&not_bound;
+		}
+	}
+}
+
+/*
+ * sdssc_bind_library -- entry point which resolves all cluster interface pts.
+ */
+rval_e
+sdssc_bind_library(void)
+{
+	void		*dp;
+	int		(*lb)();
+	func_table_p	ftp;
+
+	/*
+	 * If already bound then just return okay so this routine
+	 * becomes idempotent. If this check isn't made then we'll
+	 * fail when calling the "_bind_library" function because
+	 * dcs_initialize() can only be called once.
+	 */
+	if (sdssc_version != 0) {
+		if ((void *)sdssc_version == (void *)not_bound)
+			return (SDSSC_NOT_BOUND);
+		else
+			return (SDSSC_OKAY);
+	}
+
+	if ((dp = dlopen(SDSSC_PATH, RTLD_LAZY)) == NULL) {
+		set_common_routine();
+		return (SDSSC_NOT_BOUND);
+	} else {
+
+		/*
+		 * Allow the binding library to initialize state if
+		 * necessary. Currently this calls the DCS initialize()
+		 * routine which checks to see if we're part of a cluster.
+		 */
+		if ((lb = (int (*)())dlsym(dp, "_bind_library")) != NULL) {
+			if (lb() != 0) {
+				set_common_routine();
+				return (SDSSC_NOT_BOUND);
+			}
+		}
+
+		/*
+		 * Load 'em up. Pick up the function address and store
+		 * the values in the global pointers for other routines
+		 * to use.
+		 */
+		for (ftp = dl_table; ftp->fptr != (void *)0; ftp++) {
+			if ((*ftp->fptr = dlsym(dp, ftp->fname)) == NULL) {
+
+				/*
+				 * If old libsdssc library is there, then
+				 * sdssc_mo_create_begin is not yet supported.
+				 */
+				if (strcmp(ftp->fname,
+				    "sdssc_mo_create_begin")) {
+					*ftp->fptr = (void *)&not_bound_error;
+					continue;
+				}
+				/*
+				 * If this routine fails to find a single
+				 * entry point that it's expecting
+				 * (except sdssc_mo_create_begin) then
+				 * setup non-sdssc stubs routines
+				 * as function pointers.
+				 */
+				set_common_routine();
+				return (SDSSC_ERROR);
+			}
+		}
+
+		return (SDSSC_OKAY);
+	}
+}
diff --git a/usr/src/lib/lvm/libmeta/i386/Makefile b/usr/src/lib/lvm/libmeta/i386/Makefile
new file mode 100644
index 0000000000..17c519db5c
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/i386/Makefile
@@ -0,0 +1,31 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License").  You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+# ident	"%Z%%M%	%I%	%E% SMI"
+#
+
+include ../Makefile.com
+
+install debug: all $(ROOTLIBS) $(ROOTLINT) $(ROOTLINKS)
diff --git a/usr/src/lib/lvm/libmeta/sparc/Makefile b/usr/src/lib/lvm/libmeta/sparc/Makefile
new file mode 100644
index 0000000000..75eec28afb
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/sparc/Makefile
@@ -0,0 +1,31 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License").  You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright (c) 1998-2001 by Sun Microsystems, Inc.
+# All rights reserved.
+#
+# ident	"%Z%%M%	%I%	%E% SMI"
+#
+
+include ../Makefile.com
+
+install debug: all $(ROOTLIBS) $(ROOTLINT) $(ROOTLINKS)
diff --git a/usr/src/lib/lvm/libmeta/spec/Makefile b/usr/src/lib/lvm/libmeta/spec/Makefile
new file mode 100644
index 0000000000..5e88f3ac7e
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/spec/Makefile
@@ -0,0 +1,29 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License").  You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#ident	"%Z%%M%	%I%	%E% SMI"
+#
+# Copyright (c) 2000 by Sun Microsystems, Inc.
+# All rights reserved.
+#
+# lib/lvm/libmeta/spec/Makefile
+
+include	$(SRC)/lib/Makefile.spec.arch
diff --git a/usr/src/lib/lvm/libmeta/spec/Makefile.targ b/usr/src/lib/lvm/libmeta/spec/Makefile.targ
new file mode 100644
index 0000000000..40ffb28073
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/spec/Makefile.targ
@@ -0,0 +1,36 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License").  You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#ident	"%Z%%M%	%I%	%E% SMI"
+#
+# Copyright (c) 2000 by Sun Microsystems, Inc.
+# All rights reserved.
+#
+# lib/lvm/libmeta/spec/Makefile.targ
+
+LIBRARY	=	libmeta.a
+VERS	=	.1
+
+OBJECTS	=	meta.o
+
+TRANSCPP =
+
+SPECCPP =	-I.. -I../../inc
diff --git a/usr/src/lib/lvm/libmeta/spec/amd64/Makefile b/usr/src/lib/lvm/libmeta/spec/amd64/Makefile
new file mode 100644
index 0000000000..c7d89e007c
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/spec/amd64/Makefile
@@ -0,0 +1,46 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License").  You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+# ident	"%Z%%M%	%I%	%E% SMI"
+#
+
+.KEEP_STATE:
+
+# To enable apptrace, comment out the following line
+DISABLE_APPTRACE=   $(POUND_SIGN)
+
+include	../Makefile.targ
+
+# Add arch specific objects here
+OBJECTS	+=
+
+include	$(SRC)/lib/Makefile.lib
+
+# Uncomment the following if the linker complains
+#amd64_C_PICFLAGS = $(amd64_C_BIGPICFLAGS)
+
+include	$(SRC)/lib/Makefile.spec
+
+$(DISABLE_APPTRACE)install: $(ROOTABILIB64)
diff --git a/usr/src/lib/lvm/libmeta/spec/i386/Makefile b/usr/src/lib/lvm/libmeta/spec/i386/Makefile
new file mode 100644
index 0000000000..3fe06d99af
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/spec/i386/Makefile
@@ -0,0 +1,47 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License").  You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+#ident	"%Z%%M%	%I%	%E% SMI"
+#
+# Copyright (c) 2000-2001 by Sun Microsystems, Inc.
+# All rights reserved.
+#
+# lib/lvm/libmeta/spec/i386/Makefile
+
+.KEEP_STATE:
+
+# To enable apptrace, comment out the following line
+DISABLE_APPTRACE=   $(POUND_SIGN)
+
+include	../Makefile.targ
+
+# Add arch specific objects here
+OBJECTS	+=
+
+include	$(SRC)/lib/Makefile.lib
+
+# Uncomment the following if the linker complains
+#i386_C_PICFLAGS  = -K PIC
+
+include	$(SRC)/lib/Makefile.spec
+
+$(DISABLE_APPTRACE)install: $(ROOTABILIB)
diff --git a/usr/src/lib/lvm/libmeta/spec/meta.spec b/usr/src/lib/lvm/libmeta/spec/meta.spec
new file mode 100644
index 0000000000..48d7d2b30e
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/spec/meta.spec
@@ -0,0 +1,3699 @@
+#
+# Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License").  You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+#
+# lib/lvm/libmeta/spec/meta.spec
+
+function	meta_smf_enable
+version		SUNWprivate_1.1
+end
+
+function	meta_smf_disable
+version		SUNWprivate_1.1
+end
+
+function	meta_smf_getmask
+version		SUNWprivate_1.1
+end
+
+function	meta_smf_isonline
+version		SUNWprivate_1.1
+end
+
+function	meta_svm_sysevent
+version		SUNWprivate_1.1
+end
+
+function	close_admin
+version		SUNWprivate_1.1
+end
+
+function	meta_dev_ismeta
+version		SUNWprivate_1.1
+end
+
+function	meta_get_nunits
+version		SUNWprivate_1.1
+end
+
+function	metamakedev
+version		SUNWprivate_1.1
+end
+
+function	meta_get_tstate
+version		SUNWprivate_1.1
+end
+
+function	meta_expldev
+version		SUNWprivate_1.1
+end
+
+function	meta_cmpldev
+version		SUNWprivate_1.1
+end
+
+function	meta_getmajor
+version		SUNWprivate_1.1
+end
+
+function	meta_getminor
+version		SUNWprivate_1.1
+end
+
+function	open_admin
+version		SUNWprivate_1.1
+end
+
+function	meta_concat_generic
+version		SUNWprivate_1.1
+end
+
+function	meta_concat_parent
+version		SUNWprivate_1.1
+end
+
+function	meta_check_driveinset
+version		SUNWprivate_1.1
+end
+
+function	meta_check_drivemounted
+version		SUNWprivate_1.1
+end
+
+function	meta_check_driveswapped
+version		SUNWprivate_1.1
+end
+
+function	meta_check_inmeta
+version		SUNWprivate_1.1
+end
+
+function	meta_check_inset
+version		SUNWprivate_1.1
+end
+
+function	meta_check_root
+version		SUNWprivate_1.1
+end
+
+function	meta_check_inuse
+version		SUNWprivate_1.1
+end
+
+function	meta_imp_drvused
+version		SUNWprivate_1.1
+end
+
+function	meta_check_overlap
+version		SUNWprivate_1.1
+end
+
+function	meta_check_samedrive
+version		SUNWprivate_1.1
+end
+
+function	meta_check_inreplica
+version		SUNWprivate_1.1
+end
+
+function	meta_check_replica
+version		SUNWprivate_1.1
+end
+
+function	meta_db_addsidenms
+version		SUNWprivate_1.1
+end
+
+function	meta_db_attach
+version		SUNWprivate_1.1
+end
+
+function	meta_db_delsidenm
+version		SUNWprivate_1.1
+end
+
+function	meta_db_detach
+version		SUNWprivate_1.1
+end
+
+function	meta_db_minreplica
+version		SUNWprivate_1.1
+end
+
+function	meta_db_patch
+version		SUNWprivate_1.1
+end
+
+function	meta_get_replica_names
+version		SUNWprivate_1.1
+end
+
+function	meta_setup_db_locations
+version		SUNWprivate_1.1
+end
+
+function	meta_sync_db_locations
+version		SUNWprivate_1.1
+end
+
+function	meta_getdidminorbykey
+version		SUNWprivate_1.1
+end
+
+function	meta_getdidbykey
+version		SUNWprivate_1.1
+end
+
+function	meta_setdid
+version		SUNWprivate_1.1
+end
+
+function	metafreereplicalist
+version		SUNWprivate_1.1
+end
+
+function	metareplicalist
+version		SUNWprivate_1.1
+end
+
+function	meta_db_balance
+version		SUNWprivate_1.1
+end
+
+function	meta_create_non_dup_list
+version		SUNWprivate_1.1
+end
+
+function	sdssc_add_hosts
+version		SUNWprivate_1.1
+end
+
+function	sdssc_bind_library
+version		SUNWprivate_1.1
+end
+
+function	sdssc_bindclusterdevs
+version		SUNWprivate_1.1
+end
+
+function	sdssc_binddevs
+version		SUNWprivate_1.1
+end
+
+function	sdssc_clnt_bind_devs
+version		SUNWprivate_1.1
+end
+
+function	sdssc_clnt_proxy_cmd
+version		SUNWprivate_1.1
+end
+
+function	sdssc_cm_nid2nm
+version		SUNWprivate_1.1
+end
+
+function	sdssc_cm_nm2nid
+version		SUNWprivate_1.1
+end
+
+function	sdssc_cm_sr_nid2nm
+version		SUNWprivate_1.1
+end
+
+function	sdssc_cm_sr_nm2nid
+version		SUNWprivate_1.1
+end
+
+function	sdssc_cmd_proxy
+version		SUNWprivate_1.1
+end
+
+function	sdssc_convert_cluster_path
+version		SUNWprivate_1.1
+end
+
+function	sdssc_convert_ctd_path
+version		SUNWprivate_1.1
+end
+
+function	sdssc_convert_path_free
+version		SUNWprivate_1.1
+end
+
+function	sdssc_create_begin
+version		SUNWprivate_1.1
+end
+
+function	sdssc_mo_create_begin
+version		SUNWprivate_1.1
+end
+
+function	sdssc_create_end
+version		SUNWprivate_1.1
+end
+
+function	sdssc_delete_begin
+version		SUNWprivate_1.1
+end
+
+function	sdssc_delete_end
+version		SUNWprivate_1.1
+end
+
+function	sdssc_delete_hosts
+version		SUNWprivate_1.1
+end
+
+function	sdssc_free_mdcerr_list
+version		SUNWprivate_1.1
+end
+
+function	sdssc_freenodelist
+version		SUNWprivate_1.1
+end
+
+function	sdssc_get_index
+version		SUNWprivate_1.1
+end
+
+function	sdssc_get_primary_host
+version		SUNWprivate_1.1
+end
+
+function	sdssc_get_priv_ipaddr
+version		SUNWprivate_1.1
+end
+
+function	sdssc_get_services
+version		SUNWprivate_1.1
+end
+
+function	sdssc_get_services_free
+version		SUNWprivate_1.1
+end
+
+function	sdssc_getnodelist
+version		SUNWprivate_1.1
+end
+
+function	sdssc_gettransportbynode
+version		SUNWprivate_1.1
+end
+
+function	sdssc_notify_service
+version		SUNWprivate_1.1
+end
+
+function	sdssc_property_get
+version		SUNWprivate_1.1
+end
+
+function	sdssc_property_set
+version		SUNWprivate_1.1
+end
+
+function	sdssc_suspend
+version		SUNWprivate_1.1
+end
+
+function	sdssc_version
+version		SUNWprivate_1.1
+end
+
+function	getdevstamp
+version		SUNWprivate_1.1
+end
+
+function	setdevstamp
+version		SUNWprivate_1.1
+end
+
+function	md_eprintf
+version		SUNWprivate_1.1
+end
+
+function	meta_mc_log
+version		SUNWprivate_1.1
+end
+
+function	md_logpfx
+version		SUNWprivate_1.1
+end
+
+function	md_perror
+version		SUNWprivate_1.1
+end
+
+function	mdclrerror
+version		SUNWprivate_1.1
+end
+
+function	mdcomperror
+version		SUNWprivate_1.1
+end
+
+function	mddeverror
+version		SUNWprivate_1.1
+end
+
+function	mddserror
+version		SUNWprivate_1.1
+end
+
+function	mde_perror
+version		SUNWprivate_1.1
+end
+
+function	mde_sperror
+version		SUNWprivate_1.1
+end
+
+function	mderror
+version		SUNWprivate_1.1
+end
+
+function	mderrorextra
+version		SUNWprivate_1.1
+end
+
+function	mdhserror
+version		SUNWprivate_1.1
+end
+
+function	mdhsperror
+version		SUNWprivate_1.1
+end
+
+function	mdmddberror
+version		SUNWprivate_1.1
+end
+
+function	mdmderror
+version		SUNWprivate_1.1
+end
+
+function	mdrpccreateerror
+version		SUNWprivate_1.1
+end
+
+function	mdrpcerror
+version		SUNWprivate_1.1
+end
+
+function	mdstealerror
+version		SUNWprivate_1.1
+end
+
+function	mdsyserror
+version		SUNWprivate_1.1
+end
+
+function	mduseerror
+version		SUNWprivate_1.1
+end
+
+function	metaioctl
+version		SUNWprivate_1.1
+end
+
+function	meta_getalldevs
+version		SUNWprivate_1.1
+end
+
+function	meta_getdevs
+version		SUNWprivate_1.1
+end
+
+function	meta_getvtoc
+version		SUNWprivate_1.1
+end
+
+function	meta_setvtoc
+version		SUNWprivate_1.1
+end
+
+function	hs_state_to_name
+version		SUNWprivate_1.1
+end
+
+function	meta_check_hotspare
+version		SUNWprivate_1.1
+end
+
+function	meta_check_hsp
+version		SUNWprivate_1.1
+end
+
+function	meta_check_inhsp
+version		SUNWprivate_1.1
+end
+
+function	meta_create_hsp
+version		SUNWprivate_1.1
+end
+
+function	meta_free_hsp
+version		SUNWprivate_1.1
+end
+
+function	meta_get_hsp
+version		SUNWprivate_1.1
+end
+
+function	meta_get_hsp_common
+version		SUNWprivate_1.1
+end
+
+function	meta_get_hsp_names
+version		SUNWprivate_1.1
+end
+
+function	meta_hs_add
+version		SUNWprivate_1.1
+end
+
+function	meta_hs_delete
+version		SUNWprivate_1.1
+end
+
+function	meta_hs_enable
+version		SUNWprivate_1.1
+end
+
+function	meta_hs_replace
+version		SUNWprivate_1.1
+end
+
+function	meta_hsp_print
+version		SUNWprivate_1.1
+end
+
+function	meta_hsp_reset
+version		SUNWprivate_1.1
+end
+
+function	meta_init_hsp
+version		SUNWprivate_1.1
+end
+
+function	meta_invalidate_hsp
+version		SUNWprivate_1.1
+end
+
+function	metachkhsp
+version		SUNWprivate_1.1
+end
+
+function	meta_adjust_geom
+version		SUNWprivate_1.1
+end
+
+function	meta_cook_syntax
+version		SUNWprivate_1.1
+end
+
+function	meta_init_name
+version		SUNWprivate_1.1
+end
+
+function	meta_init_make_device
+version		SUNWprivate_1.1
+end
+
+function	meta_setup_geom
+version		SUNWprivate_1.1
+end
+
+function	parse_interlace
+version		SUNWprivate_1.1
+end
+
+function	close_mnttab
+version		SUNWprivate_1.1
+end
+
+function	open_mnttab
+version		SUNWprivate_1.1
+end
+
+function	meta_update_md_cf
+version		SUNWprivate_1.1
+end
+
+function	med_errnum_to_str
+version		SUNWprivate_1.1
+end
+
+function	Calloc
+version		SUNWprivate_1.1
+end
+
+function	Free
+version		SUNWprivate_1.1
+end
+
+function	Malloc
+version		SUNWprivate_1.1
+end
+
+function	Realloc
+version		SUNWprivate_1.1
+end
+
+function	Strdup
+version		SUNWprivate_1.1
+end
+
+function	Zalloc
+version		SUNWprivate_1.1
+end
+
+function	cl_get_setkey
+version		SUNWprivate_1.1
+end
+
+function	cl_set_setkey
+version		SUNWprivate_1.1
+end
+
+function	clnt_add_drv_sidenms
+version		SUNWprivate_1.1
+end
+
+function	clnt_adddrvs
+version		SUNWprivate_1.1
+end
+
+function	clnt_addhosts
+version		SUNWprivate_1.1
+end
+
+function	clnt_createset
+version		SUNWprivate_1.1
+end
+
+function	clnt_del_drv_sidenms
+version		SUNWprivate_1.1
+end
+
+function	clnt_deldrvs
+version		SUNWprivate_1.1
+end
+
+function	clnt_delhosts
+version		SUNWprivate_1.1
+end
+
+function	clnt_delset
+version		SUNWprivate_1.1
+end
+
+function	clnt_devinfo
+version		SUNWprivate_1.1
+end
+
+function	clnt_drvused
+version		SUNWprivate_1.1
+end
+
+function	clnt_devinfo_by_devid
+version		SUNWprivate_1.1
+end
+
+function	clnt_getset
+version		SUNWprivate_1.1
+end
+
+function	clnt_mngetset
+version		SUNWprivate_1.1
+end
+
+function	clnt_gtimeout
+version		SUNWprivate_1.1
+end
+
+function	clnt_hostname
+version		SUNWprivate_1.1
+end
+
+function	clnt_lock_set
+version		SUNWprivate_1.1
+end
+
+function	clnt_nullproc
+version		SUNWprivate_1.1
+end
+
+function	clnt_ownset
+version		SUNWprivate_1.1
+end
+
+function	clnt_setnameok
+version		SUNWprivate_1.1
+end
+
+function	clnt_setnumbusy
+version		SUNWprivate_1.1
+end
+
+function	clnt_stimeout
+version		SUNWprivate_1.1
+end
+
+function	clnt_unlock_set
+version		SUNWprivate_1.1
+end
+
+function	clnt_upd_dr_dbinfo
+version		SUNWprivate_1.1
+end
+
+function	clnt_upd_dr_flags
+version		SUNWprivate_1.1
+end
+
+function	clnt_upd_sr_flags
+version		SUNWprivate_1.1
+end
+
+function	clnt_upd_nr_flags
+version		SUNWprivate_1.1
+end
+
+function	clnt_updmeds
+version		SUNWprivate_1.1
+end
+
+function	meta_conv_drvdesc_new2old
+version		SUNWprivate_1.1
+end
+
+function	meta_conv_drvdesc_old2new
+version		SUNWprivate_1.1
+end
+
+function	meta_conv_drvname_new2old
+version		SUNWprivate_1.1
+end
+
+function	meta_conv_drvname_old2new
+version		SUNWprivate_1.1
+end
+
+function	alloc_olddrvdesc
+version		SUNWprivate_1.1
+end
+
+function	alloc_newdrvdesc
+version		SUNWprivate_1.1
+end
+
+function	free_olddrvdesc
+version		SUNWprivate_1.1
+end
+
+function	free_newdrvdesc
+version		SUNWprivate_1.1
+end
+
+function	meta_get_devid
+version		SUNWprivate_1.1
+end
+
+function	meta_print_devid
+version		SUNWprivate_1.1
+end
+
+function	clnt_mncreateset
+version		SUNWprivate_1.1
+end
+
+function	clnt_joinset
+version		SUNWprivate_1.1
+end
+
+function	clnt_mnsetmaster
+version		SUNWprivate_1.1
+end
+
+function	clnt_mn_mirror_resync_all
+version		SUNWprivate_1.1
+end
+
+function	clnt_mn_sp_update_abr
+version		SUNWprivate_1.1
+end
+
+function	free_sr
+version		SUNWprivate_1.1
+end
+
+function	short_circuit_getset
+version		SUNWprivate_1.1
+end
+
+function	commitset
+version		SUNWprivate_1.1
+end
+
+function	dr_cache_add
+version		SUNWprivate_1.1
+end
+
+function	dr_cache_del
+version		SUNWprivate_1.1
+end
+
+function	mnnr_cache_add
+version		SUNWprivate_1.1
+end
+
+function	mnnr_cache_del
+version		SUNWprivate_1.1
+end
+
+function	drdup
+version		SUNWprivate_1.1
+end
+
+function	get_db_rec
+version		SUNWprivate_1.1
+end
+
+function	get_ur_rec
+version		SUNWprivate_1.1
+end
+
+function	metad_getsetbyname
+version		SUNWprivate_1.1
+end
+
+function	metad_getsetbynum
+version		SUNWprivate_1.1
+end
+
+function	resnarf_set
+version		SUNWprivate_1.1
+end
+
+function	metad_isautotakebyname
+version		SUNWprivate_1.1
+end
+
+function	metad_isautotakebynum
+version		SUNWprivate_1.1
+end
+
+function	s_delrec
+version		SUNWprivate_1.1
+end
+
+function	s_delset
+version		SUNWprivate_1.1
+end
+
+function	s_ownset
+version		SUNWprivate_1.1
+end
+
+function	set_snarf
+version		SUNWprivate_1.1
+end
+
+function	setdup
+version		SUNWprivate_1.1
+end
+
+function	mnsetdup
+version		SUNWprivate_1.1
+end
+
+function	sr_cache_add
+version		SUNWprivate_1.1
+end
+
+function	sr_cache_del
+version		SUNWprivate_1.1
+end
+
+function	sr_cache_flush
+version		SUNWprivate_1.1
+end
+
+function	sr_cache_flush_setno
+version		SUNWprivate_1.1
+end
+
+function	sr_validate
+version		SUNWprivate_1.1
+end
+
+function	sr_del_drv
+version		SUNWprivate_1.1
+end
+
+function	clnt_med_get_data
+version		SUNWprivate_1.1
+end
+
+function	clnt_med_get_rec
+version		SUNWprivate_1.1
+end
+
+function	clnt_med_hostname
+version		SUNWprivate_1.1
+end
+
+function	clnt_med_null
+version		SUNWprivate_1.1
+end
+
+function	clnt_med_upd_data
+version		SUNWprivate_1.1
+end
+
+function	clnt_med_upd_rec
+version		SUNWprivate_1.1
+end
+
+function	meddstealerror
+version		SUNWprivate_1.1
+end
+
+function	meta_h2hi
+version		SUNWprivate_1.1
+end
+
+function	meta_hi2h
+version		SUNWprivate_1.1
+end
+
+function	meta_med_hnm2ip
+version		SUNWprivate_1.1
+end
+
+function	setup_med_cfg
+version		SUNWprivate_1.1
+end
+
+function	defmhiargs
+version		SUNWprivate_1.1
+end
+
+function	meta_drive_to_disk_status_list
+version		SUNWprivate_1.1
+end
+
+function	meta_free_disk_status_list
+version		SUNWprivate_1.1
+end
+
+function	meta_free_drive_info_list
+version		SUNWprivate_1.1
+end
+
+function	meta_get_drive_names
+version		SUNWprivate_1.1
+end
+
+function	meta_list_disks
+version		SUNWprivate_1.1
+end
+
+function	meta_imp_set
+version		SUNWprivate_1.1
+end
+
+function	meta_list_drives
+version		SUNWprivate_1.1
+end
+
+function	meta_get_set_info
+version		SUNWprivate_1.1
+end
+
+function	meta_prune_cnames
+version		SUNWprivate_1.1
+end
+
+function	meta_rel_own
+version		SUNWprivate_1.1
+end
+
+function	meta_status_own
+version		SUNWprivate_1.1
+end
+
+function	meta_take_own
+version		SUNWprivate_1.1
+end
+
+function	mhstealerror
+version		SUNWprivate_1.1
+end
+
+function	rel_own_bydd
+version		SUNWprivate_1.1
+end
+
+function	tk_own_bydd
+version		SUNWprivate_1.1
+end
+
+function	meta_check_inmirror
+version		SUNWprivate_1.1
+end
+
+function	meta_check_mirror
+version		SUNWprivate_1.1
+end
+
+function	meta_check_submirror
+version		SUNWprivate_1.1
+end
+
+function	meta_create_mirror
+version		SUNWprivate_1.1
+end
+
+function	meta_free_mirror
+version		SUNWprivate_1.1
+end
+
+function	meta_get_mirror
+version		SUNWprivate_1.1
+end
+
+function	meta_get_mirror_names
+version		SUNWprivate_1.1
+end
+
+function	meta_init_mirror
+version		SUNWprivate_1.1
+end
+
+function	meta_mirror_anycomp_is_err
+version		SUNWprivate_1.1
+end
+
+function	meta_mirror_attach
+version		SUNWprivate_1.1
+end
+
+function	meta_mirror_detach
+version		SUNWprivate_1.1
+end
+
+function	meta_mirror_enable
+version		SUNWprivate_1.1
+end
+
+function	meta_mirror_get_params
+version		SUNWprivate_1.1
+end
+
+function	meta_mirror_offline
+version		SUNWprivate_1.1
+end
+
+function	meta_mirror_online
+version		SUNWprivate_1.1
+end
+
+function	meta_mirror_print
+version		SUNWprivate_1.1
+end
+
+function	meta_mirror_replace
+version		SUNWprivate_1.1
+end
+
+function	meta_mirror_reset
+version		SUNWprivate_1.1
+end
+
+function	meta_mirror_set_params
+version		SUNWprivate_1.1
+end
+
+function	meta_print_mirror_options
+version		SUNWprivate_1.1
+end
+
+function	name_to_pass_num
+version		SUNWprivate_1.1
+end
+
+function	name_to_rd_opt
+version		SUNWprivate_1.1
+end
+
+function	name_to_wr_opt
+version		SUNWprivate_1.1
+end
+
+function	rd_opt_to_name
+version		SUNWprivate_1.1
+end
+
+function	sm_state_to_action
+version		SUNWprivate_1.1
+end
+
+function	sm_state_to_name
+version		SUNWprivate_1.1
+end
+
+function	wr_opt_to_name
+version		SUNWprivate_1.1
+end
+
+function	meta_mirror_resync
+version		SUNWprivate_1.1
+end
+
+function	meta_mirror_resync_all
+version		SUNWprivate_1.1
+end
+
+function	meta_mn_mirror_resync_all
+version		SUNWprivate_1.1
+end
+
+function	meta_mirror_resync_kill_all
+version		SUNWprivate_1.1
+end
+
+function	meta_mirror_resync_block_all
+version		SUNWprivate_1.1
+end
+
+function	meta_mirror_resync_unblock_all
+version		SUNWprivate_1.1
+end
+
+function	meta_mirror_resync_unblock
+version		SUNWprivate_1.1
+end
+
+function	meta_mirror_resync_kill
+version		SUNWprivate_1.1
+end
+
+function	meta_get_mountp
+version		SUNWprivate_1.1
+end
+
+function	blkname
+version		SUNWprivate_1.1
+end
+
+function	get_devname
+version		SUNWprivate_1.1
+end
+
+function	get_hspname
+version		SUNWprivate_1.1
+end
+
+function	get_mdname
+version		SUNWprivate_1.1
+end
+
+function	is_hspname
+version		SUNWprivate_1.1
+end
+
+function	sr2setdesc
+version		SUNWprivate_1.1
+end
+
+function	is_metaname
+version		SUNWprivate_1.1
+end
+
+function	meta_canonicalize
+version		SUNWprivate_1.1
+end
+
+function	meta_get_hotspare_names
+version		SUNWprivate_1.1
+end
+
+function	meta_getdev
+version		SUNWprivate_1.1
+end
+
+function	metachkcomp
+version		SUNWprivate_1.1
+end
+
+function	metachkdisk
+version		SUNWprivate_1.1
+end
+
+function	metachkmeta
+version		SUNWprivate_1.1
+end
+
+function	metadevname
+version		SUNWprivate_1.1
+end
+
+function	metadiskname
+version		SUNWprivate_1.1
+end
+
+function	metadrivename
+version		SUNWprivate_1.1
+end
+
+function	metadrivenamelist
+version		SUNWprivate_1.1
+end
+
+function	metadrivenamelist_append
+version		SUNWprivate_1.1
+end
+
+function	meta_drivenamelist_append_wrapper
+version		SUNWprivate_1.1
+end
+
+function	metafakesetname
+version		SUNWprivate_1.1
+end
+
+function	metaflushnames
+version		SUNWprivate_1.1
+end
+
+function	metaflushsetname
+version		SUNWprivate_1.1
+end
+
+function	metaflushsidenames
+version		SUNWprivate_1.1
+end
+
+function	metafreedrivename
+version		SUNWprivate_1.1
+end
+
+function	metafreedrivenamelist
+version		SUNWprivate_1.1
+end
+
+function	metafreehspnamelist
+version		SUNWprivate_1.1
+end
+
+function	metafreenamelist
+version		SUNWprivate_1.1
+end
+
+function	metaget_setdesc
+version		SUNWprivate_1.1
+end
+
+function	metahsphspname
+version		SUNWprivate_1.1
+end
+
+function	metahspname
+version		SUNWprivate_1.1
+end
+
+function	metahspnamelist
+version		SUNWprivate_1.1
+end
+
+function	metahspnamelist_append
+version		SUNWprivate_1.1
+end
+
+function	metaislocalset
+version		SUNWprivate_1.1
+end
+
+function	metaismeta
+version		SUNWprivate_1.1
+end
+
+function	metaissameset
+version		SUNWprivate_1.1
+end
+
+function	metakeyname
+version		SUNWprivate_1.1
+end
+
+function	metamnumname
+version		SUNWprivate_1.1
+end
+
+function	metaname
+version		SUNWprivate_1.1
+end
+
+function	metaname_fast
+version		SUNWprivate_1.1
+end
+
+function	metanamelist
+version		SUNWprivate_1.1
+end
+
+function	metanamelist_append
+version		SUNWprivate_1.1
+end
+
+function	metasetname
+version		SUNWprivate_1.1
+end
+
+function	metasetnosetname
+version		SUNWprivate_1.1
+end
+
+function	metaslicename
+version		SUNWprivate_1.1
+end
+
+function	ctlr_cache_add
+version		SUNWprivate_1.1
+end
+
+function	ctlr_cache_look
+version		SUNWprivate_1.1
+end
+
+function	getdrvnode
+version		SUNWprivate_1.1
+end
+
+function	meta_free_unit
+version		SUNWprivate_1.1
+end
+
+function	meta_get_mdunit
+version		SUNWprivate_1.1
+end
+
+function	meta_get_unit
+version		SUNWprivate_1.1
+end
+
+function	meta_invalidate_name
+version		SUNWprivate_1.1
+end
+
+function	meta_isopen
+version		SUNWprivate_1.1
+end
+
+function	meta_match_enclosure
+version		SUNWprivate_1.1
+end
+
+function	metaflushctlrcache
+version		SUNWprivate_1.1
+end
+
+function	metafreevtoc
+version		SUNWprivate_1.1
+end
+
+function	metagetcinfo
+version		SUNWprivate_1.1
+end
+
+function	metagetdevicesname
+version		SUNWprivate_1.1
+end
+
+function	metagetgeom
+version		SUNWprivate_1.1
+end
+
+function	metagetlabel
+version		SUNWprivate_1.1
+end
+
+function	metagetmiscname
+version		SUNWprivate_1.1
+end
+
+function	metagetpartno
+version		SUNWprivate_1.1
+end
+
+function	metagetset
+version		SUNWprivate_1.1
+end
+
+function	metagetsize
+version		SUNWprivate_1.1
+end
+
+function	metagetstart
+version		SUNWprivate_1.1
+end
+
+function	metagetvtoc
+version		SUNWprivate_1.1
+end
+
+function	metahasmddb
+version		SUNWprivate_1.1
+end
+
+function	metasetvtoc
+version		SUNWprivate_1.1
+end
+
+function	add_key_name
+version		SUNWprivate_1.1
+end
+
+function	add_name
+version		SUNWprivate_1.1
+end
+
+function	del_key_name
+version		SUNWprivate_1.1
+end
+
+function	del_key_names
+version		SUNWprivate_1.1
+end
+
+function	del_name
+version		SUNWprivate_1.1
+end
+
+function	meta_getnmbykey
+version		SUNWprivate_1.1
+end
+
+function	meta_getnmentbydev
+version		SUNWprivate_1.1
+end
+
+function	meta_getnmentbykey
+version		SUNWprivate_1.1
+end
+
+function	evdrv2evlib_typetab
+version		SUNWprivate_1.1
+end
+
+function	meta_notify_createq
+version		SUNWprivate_1.1
+end
+
+function	meta_notify_deleteq
+version		SUNWprivate_1.1
+end
+
+function	meta_notify_doputev
+version		SUNWprivate_1.1
+end
+
+function	meta_notify_flushq
+version		SUNWprivate_1.1
+end
+
+function	meta_notify_freeevlist
+version		SUNWprivate_1.1
+end
+
+function	meta_notify_getev
+version		SUNWprivate_1.1
+end
+
+function	meta_notify_getevlist
+version		SUNWprivate_1.1
+end
+
+function	meta_notify_listq
+version		SUNWprivate_1.1
+end
+
+function	meta_notify_putev
+version		SUNWprivate_1.1
+end
+
+function	meta_notify_putevlist
+version		SUNWprivate_1.1
+end
+
+function	meta_notify_sendev
+version		SUNWprivate_1.1
+end
+
+function	meta_notify_validq
+version		SUNWprivate_1.1
+end
+
+function	tag2obj_typetab
+version		SUNWprivate_1.1
+end
+
+function	meta_patch_fsdev
+version		SUNWprivate_1.1
+end
+
+function	meta_patch_swapdev
+version		SUNWprivate_1.1
+end
+
+function	meta_patch_vfstab
+version		SUNWprivate_1.1
+end
+
+function	meta_patch_rootdev
+version		SUNWprivate_1.1
+end
+
+function	meta_prbits
+version		SUNWprivate_1.1
+end
+
+function	meta_print_all
+version		SUNWprivate_1.1
+end
+
+function	meta_print_name
+version		SUNWprivate_1.1
+end
+
+function	meta_print_time
+version		SUNWprivate_1.1
+end
+
+function	meta_print_hrtime
+version		SUNWprivate_1.1
+end
+
+function	meta_check_column
+version		SUNWprivate_1.1
+end
+
+function	meta_check_inraid
+version		SUNWprivate_1.1
+end
+
+function	meta_check_raid
+version		SUNWprivate_1.1
+end
+
+function	meta_create_raid
+version		SUNWprivate_1.1
+end
+
+function	meta_default_raid_interlace
+version		SUNWprivate_1.1
+end
+
+function	meta_free_raid
+version		SUNWprivate_1.1
+end
+
+function	meta_get_raid_common
+version		SUNWprivate_1.1
+end
+
+function	meta_get_raid
+version		SUNWprivate_1.1
+end
+
+function	meta_get_raid_names
+version		SUNWprivate_1.1
+end
+
+function	meta_init_raid
+version		SUNWprivate_1.1
+end
+
+function	meta_print_raid_options
+version		SUNWprivate_1.1
+end
+
+function	meta_raid_anycomp_is_err
+version		SUNWprivate_1.1
+end
+
+function	meta_raid_attach
+version		SUNWprivate_1.1
+end
+
+function	meta_raid_check_interlace
+version		SUNWprivate_1.1
+end
+
+function	meta_raid_enable
+version		SUNWprivate_1.1
+end
+
+function	meta_raid_get_params
+version		SUNWprivate_1.1
+end
+
+function	meta_raid_print
+version		SUNWprivate_1.1
+end
+
+function	meta_raid_regen_byname
+version		SUNWprivate_1.1
+end
+
+function	meta_raid_replace
+version		SUNWprivate_1.1
+end
+
+function	meta_raid_reset
+version		SUNWprivate_1.1
+end
+
+function	meta_raid_set_params
+version		SUNWprivate_1.1
+end
+
+function	meta_raid_state_cnt
+version		SUNWprivate_1.1
+end
+
+function	meta_raid_valid
+version		SUNWprivate_1.1
+end
+
+function	raid_col_state_to_name
+version		SUNWprivate_1.1
+end
+
+function	raid_state_to_action
+version		SUNWprivate_1.1
+end
+
+function	raid_state_to_name
+version		SUNWprivate_1.1
+end
+
+function	meta_raid_resync
+version		SUNWprivate_1.1
+end
+
+function	meta_raid_resync_all
+version		SUNWprivate_1.1
+end
+
+function	meta_exchange
+version		SUNWprivate_1.1
+end
+
+function	meta_rename
+version		SUNWprivate_1.1
+end
+
+function	meta_enable_byname
+version		SUNWprivate_1.1
+end
+
+function	meta_replace
+version		SUNWprivate_1.1
+end
+
+function	meta_replace_byname
+version		SUNWprivate_1.1
+end
+
+function	meta_reset
+version		SUNWprivate_1.1
+end
+
+function	meta_reset_all
+version		SUNWprivate_1.1
+end
+
+function	meta_reset_by_name
+version		SUNWprivate_1.1
+end
+
+function	meta_resync_all
+version		SUNWprivate_1.1
+end
+
+function	meta_resync_byname
+version		SUNWprivate_1.1
+end
+
+function	do_owner_ioctls
+version		SUNWprivate_1.1
+end
+
+function	commd_get_verbosity
+version		SUNWprivate_1.1
+end
+
+function	commd_get_outfile
+version		SUNWprivate_1.1
+end
+
+function	get_max_meds
+version		SUNWprivate_1.1
+end
+
+function	get_max_sets
+version		SUNWprivate_1.1
+end
+
+function	getmyside
+version		SUNWprivate_1.1
+end
+
+function	getsetbyname
+version		SUNWprivate_1.1
+end
+
+function	getsetbynum
+version		SUNWprivate_1.1
+end
+
+function	meta_check_drive_inuse
+version		SUNWprivate_1.1
+end
+
+function	meta_check_ownership
+version		SUNWprivate_1.1
+end
+
+function	meta_check_ownership_on_host
+version		SUNWprivate_1.1
+end
+
+function	meta_get_reserved_names
+version		SUNWprivate_1.1
+end
+
+function	meta_getnextside_devinfo
+version		SUNWprivate_1.1
+end
+
+function	meta_is_drive_in_anyset
+version		SUNWprivate_1.1
+end
+
+function	meta_is_drive_in_thisset
+version		SUNWprivate_1.1
+end
+
+function	meta_set_balance
+version		SUNWprivate_1.1
+end
+
+function	meta_set_destroy
+version		SUNWprivate_1.1
+end
+
+function	meta_set_purge
+version		SUNWprivate_1.1
+end
+
+function	meta_set_query
+version		SUNWprivate_1.1
+end
+
+function	metadrivename_withdrkey
+version		SUNWprivate_1.1
+end
+
+function	metafreedrivedesc
+version		SUNWprivate_1.1
+end
+
+function	metaget_drivedesc
+version		SUNWprivate_1.1
+end
+
+function	metaget_drivedesc_fromnamelist
+version		SUNWprivate_1.1
+end
+
+function	metaget_drivedesc_sideno
+version		SUNWprivate_1.1
+end
+
+function	metaget_setownership
+version		SUNWprivate_1.1
+end
+
+function	mynode
+version		SUNWprivate_1.1
+end
+
+function	strinlst
+version		SUNWprivate_1.1
+end
+
+function	meta_make_sidenmlist
+version		SUNWprivate_1.1
+end
+
+function	meta_set_adddrives
+version		SUNWprivate_1.1
+end
+
+function	meta_set_deletedrives
+version		SUNWprivate_1.1
+end
+
+function	meta_set_checkname
+version		SUNWprivate_1.1
+end
+
+function	meta_set_addhosts
+version		SUNWprivate_1.1
+end
+
+function	meta_set_deletehosts
+version		SUNWprivate_1.1
+end
+
+function	meta_set_addmeds
+version		SUNWprivate_1.1
+end
+
+function	meta_set_deletemeds
+version		SUNWprivate_1.1
+end
+
+function	meta_set_auto_take
+version		SUNWprivate_1.1
+end
+
+function	checkdrive_onnode
+version		SUNWprivate_1.1
+end
+
+function	getnodeside
+version		SUNWprivate_1.1
+end
+
+function	halt_set
+version		SUNWprivate_1.1
+end
+
+function	metadrivedesc_append
+version		SUNWprivate_1.1
+end
+
+function	nodehasset
+version		SUNWprivate_1.1
+end
+
+function	nodesuniq
+version		SUNWprivate_1.1
+end
+
+function	own_set
+version		SUNWprivate_1.1
+end
+
+function	resync_genid
+version		SUNWprivate_1.1
+end
+
+function	setup_db_bydd
+version		SUNWprivate_1.1
+end
+
+function	snarf_set
+version		SUNWprivate_1.1
+end
+
+function	meta_set_release
+version		SUNWprivate_1.1
+end
+
+function	meta_set_take
+version		SUNWprivate_1.1
+end
+
+function	meta_set_join
+version		SUNWprivate_1.1
+end
+
+function	meta_set_withdraw
+version		SUNWprivate_1.1
+end
+
+function	meta_update_mb
+version		SUNWprivate_1.1
+end
+
+function	allsigs
+version		SUNWprivate_1.1
+end
+
+function	md_daemonize
+version		SUNWprivate_1.1
+end
+
+function	md_exit
+version		SUNWprivate_1.1
+end
+
+function	md_got_sig
+version		SUNWprivate_1.1
+end
+
+function	setup_mc_log
+version		SUNWprivate_1.1
+end
+
+function	md_init
+version		SUNWprivate_1.1
+end
+
+function	md_init_nosig
+version		SUNWprivate_1.1
+end
+
+function	md_init_daemon
+version		SUNWprivate_1.1
+end
+
+function	md_post_sig
+version		SUNWprivate_1.1
+end
+
+function	md_rb_sig_handling_off
+version		SUNWprivate_1.1
+end
+
+function	md_rb_sig_handling_on
+version		SUNWprivate_1.1
+end
+
+function	md_which_sig
+version		SUNWprivate_1.1
+end
+
+function	meta_lock
+version		SUNWprivate_1.1
+end
+
+function	meta_lock_name
+version		SUNWprivate_1.1
+end
+
+function	meta_lock_nowait
+version		SUNWprivate_1.1
+end
+
+function	meta_lock_status
+version		SUNWprivate_1.1
+end
+
+function	meta_unlock
+version		SUNWprivate_1.1
+end
+
+function	metalogfp
+version		SUNWprivate_1.1
+end
+
+function	metasyslog
+version		SUNWprivate_1.1
+end
+
+function	verbosity
+version		SUNWprivate_1.1
+end
+
+function	start_time
+version		SUNWprivate_1.1
+end
+
+function	myname
+version		SUNWprivate_1.1
+end
+
+function	procsigs
+version		SUNWprivate_1.1
+end
+
+function	rb_test
+version		SUNWprivate_1.1
+end
+
+function	meta_stat
+version		SUNWprivate_1.1
+end
+
+function	metaflushstatcache
+version		SUNWprivate_1.1
+end
+
+function	comp_state_to_name
+version		SUNWprivate_1.1
+end
+
+function	meta_check_component
+version		SUNWprivate_1.1
+end
+
+function	meta_check_instripe
+version		SUNWprivate_1.1
+end
+
+function	meta_check_stripe
+version		SUNWprivate_1.1
+end
+
+function	meta_create_stripe
+version		SUNWprivate_1.1
+end
+
+function	meta_default_stripe_interlace
+version		SUNWprivate_1.1
+end
+
+function	meta_find_erred_comp
+version		SUNWprivate_1.1
+end
+
+function	meta_free_stripe
+version		SUNWprivate_1.1
+end
+
+function	meta_get_stripe_common
+version		SUNWprivate_1.1
+end
+
+function	meta_get_stripe
+version		SUNWprivate_1.1
+end
+
+function	meta_get_stripe_names
+version		SUNWprivate_1.1
+end
+
+function	meta_init_stripe
+version		SUNWprivate_1.1
+end
+
+function	meta_print_stripe_options
+version		SUNWprivate_1.1
+end
+
+function	meta_recover_sp
+version		SUNWprivate_1.1
+end
+
+function	meta_sp_issp
+version		SUNWprivate_1.1
+end
+
+function	meta_sp_reset_component
+version		SUNWprivate_1.1
+end
+
+function	meta_sp_attach
+version		SUNWprivate_1.1
+end
+
+function	meta_sp_update_abr
+version		SUNWprivate_1.1
+end
+
+function	meta_mn_sp_update_abr
+version		SUNWprivate_1.1
+end
+
+function	meta_get_sp_common
+version		SUNWprivate_1.1
+end
+
+function	meta_get_sp
+version		SUNWprivate_1.1
+end
+
+function	meta_free_sp
+version		SUNWprivate_1.1
+end
+
+function	meta_get_sp_names
+version		SUNWprivate_1.1
+end
+
+function	meta_sp_can_create_sps
+version		SUNWprivate_1.1
+end
+
+function	meta_sp_can_create_sps_on_drive
+version		SUNWprivate_1.1
+end
+
+function	meta_sp_get_free_space
+version		SUNWprivate_1.1
+end
+
+function	meta_sp_get_free_space_on_drive
+version		SUNWprivate_1.1
+end
+
+function	meta_sp_get_number_of_possible_sps
+version		SUNWprivate_1.1
+end
+
+function	meta_sp_get_number_of_possible_sps_on_drive
+version		SUNWprivate_1.1
+end
+
+function	meta_sp_get_possible_sp_size
+version		SUNWprivate_1.1
+end
+
+function	meta_sp_get_possible_sp_size_on_drive
+version		SUNWprivate_1.1
+end
+
+function	meta_sp_parsesize
+version		SUNWprivate_1.1
+end
+
+function	meta_stripe_anycomp_is_err
+version		SUNWprivate_1.1
+end
+
+function	meta_stripe_attach
+version		SUNWprivate_1.1
+end
+
+function	meta_stripe_check_interlace
+version		SUNWprivate_1.1
+end
+
+function	meta_stripe_get_params
+version		SUNWprivate_1.1
+end
+
+function	meta_stripe_print
+version		SUNWprivate_1.1
+end
+
+function	meta_stripe_replace
+version		SUNWprivate_1.1
+end
+
+function	meta_stripe_reset
+version		SUNWprivate_1.1
+end
+
+function	meta_stripe_set_params
+version		SUNWprivate_1.1
+end
+
+function	meta_systemfile_append_mddb
+version		SUNWprivate_1.1
+end
+
+function	meta_systemfile_append_mdroot
+version		SUNWprivate_1.1
+end
+
+function	meta_systemfile_copy
+version		SUNWprivate_1.1
+end
+
+function	meta_tab_find
+version		SUNWprivate_1.1
+end
+
+function	meta_tab_free
+version		SUNWprivate_1.1
+end
+
+function	meta_tab_parse
+version		SUNWprivate_1.1
+end
+
+function	meta_check_intrans
+version		SUNWprivate_1.1
+end
+
+function	meta_check_log
+version		SUNWprivate_1.1
+end
+
+function	meta_check_master
+version		SUNWprivate_1.1
+end
+
+function	meta_free_trans
+version		SUNWprivate_1.1
+end
+
+function	meta_get_trans
+version		SUNWprivate_1.1
+end
+
+function	meta_get_trans_common
+version		SUNWprivate_1.1
+end
+
+function	meta_get_trans_names
+version		SUNWprivate_1.1
+end
+
+function	meta_logs_print
+version		SUNWprivate_1.1
+end
+
+function	meta_trans_detach
+version		SUNWprivate_1.1
+end
+
+function	meta_trans_print
+version		SUNWprivate_1.1
+end
+
+function	meta_trans_replace
+version		SUNWprivate_1.1
+end
+
+function	meta_trans_reset
+version		SUNWprivate_1.1
+end
+
+function	mt_flags_to_action
+version		SUNWprivate_1.1
+end
+
+function	mt_flags_to_name
+version		SUNWprivate_1.1
+end
+
+function	mt_l_error_to_action
+version		SUNWprivate_1.1
+end
+
+function	mt_l_error_to_name
+version		SUNWprivate_1.1
+end
+
+function	transstats
+version		SUNWprivate_1.1
+end
+
+function	meta_getuserflags
+version		SUNWprivate_1.1
+end
+
+function	meta_setuserflags
+version		SUNWprivate_1.1
+end
+
+function	metarpcclose
+version		SUNWprivate_1.1
+end
+
+function	metarpccloseall
+version		SUNWprivate_1.1
+end
+
+function	metarpcopen
+version		SUNWprivate_1.1
+end
+
+function	splicename
+version		SUNWprivate_1.1
+end
+
+function	splitname
+version		SUNWprivate_1.1
+end
+
+function	crcfreetab
+version		SUNWprivate_1.1
+end
+
+function	crcfunc
+version		SUNWprivate_1.1
+end
+
+function	mdnullerror
+version		SUNWprivate_1.1
+end
+
+function	xdr_comp_state_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_comp_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_diskaddr_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_hotspare_states_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_hs_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_hsp_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_md_common_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_md_comp_errno_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_md_comp_error_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_md_comp_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_md_dev_errno_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_md_dev_error_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_md_drive_desc
+version		SUNWprivate_1.1
+end
+
+function	xdr_md_drive_record
+version		SUNWprivate_1.1
+end
+
+function	xdr_md_ds_errno_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_md_ds_error_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_md_errclass_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_md_error_info_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_md_error_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_md_hs_errno_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_md_hs_error_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_md_hs_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_md_hsp_errno_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_md_hsp_error_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_md_hsp_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_md_md_errno_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_md_md_error_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_md_mddb_errno_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_md_mddb_error_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_md_mirror_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_md_name_prefix
+version		SUNWprivate_1.1
+end
+
+function	xdr_md_name_suffix
+version		SUNWprivate_1.1
+end
+
+function	xdr_md_parent_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_md_raid_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_md_raidcol_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_md_replica_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_md_replica_recerr_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_md_replicalist_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_md_riflags_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_md_row_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_md_rpc_error_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_md_set_desc
+version		SUNWprivate_1.1
+end
+
+function	xdr_md_set_record
+version		SUNWprivate_1.1
+end
+
+function	xdr_md_setkey_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_md_shared_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_md_splitname
+version		SUNWprivate_1.1
+end
+
+function	xdr_md_stackcap_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_md_status_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_md_stripe_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_md_submirror_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_md_sys_error_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_md_trans_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_md_types_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_md_ur_get_cmd_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_md_use_errno_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_md_use_error_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_md_void_errno_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_md_void_error_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_mdcinfo_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_mddb_cfgcmd_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_mddb_recstatus_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_mddb_type_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_mddb_usercmd_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_mddb_userrec_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_mddrivename_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_mddrivenamelist_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_mdgeom_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_mdhspname_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_mdhspnamelist_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_mdname_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_mdnamelist_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_mdnmtype_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_mdpart_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_mdsetname_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_mdsetnamelist_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_mdsidenames_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_mdvtoc_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_minor_or_hsp_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_mm_params_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_mm_pass_num_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_mm_rd_opt_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_mm_wr_opt_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_mr_params_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_ms_params_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_mt_debug_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_mt_flags_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_mt_l_error_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_rcs_flags_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_rcs_state_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_replica_flags_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_rus_state_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_sm_flags_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_sm_state_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_unit_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_clnt_stat
+version		SUNWprivate_1.1
+end
+
+function	xdr_md_timeval32_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_daddr_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_md_dev64_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_dev_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_md_alias_ip_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_md_alias_nm_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_md_h_arr_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_md_h_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_md_hi_arr_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_md_hi_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_md_node_nm_arr_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_md_node_nm_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_md_set_nm_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_mddb_recid_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_mdkey_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_minor_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_off_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_set_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_side_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_size_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_timeval
+version		SUNWprivate_1.1
+end
+
+function	md_in_daemon
+version		SUNWprivate_1.1
+end
+
+function	mdrpc_add_drv_sidenms_1
+version		SUNWprivate_1.1
+end
+
+function	mdrpc_adddrvs_1
+version		SUNWprivate_1.1
+end
+
+function	mdrpc_addhosts_1
+version		SUNWprivate_1.1
+end
+
+function	mdrpc_createset_1
+version		SUNWprivate_1.1
+end
+
+function	mdrpc_del_drv_sidenms_1
+version		SUNWprivate_1.1
+end
+
+function	mdrpc_deldrvs_1
+version		SUNWprivate_1.1
+end
+
+function	mdrpc_delhosts_1
+version		SUNWprivate_1.1
+end
+
+function	mdrpc_delset_1
+version		SUNWprivate_1.1
+end
+
+function	mdrpc_drvused_1
+version		SUNWprivate_1.1
+end
+
+function	mdrpc_flush_internal_1
+version		SUNWprivate_1.1
+end
+
+function	mdrpc_getset_1
+version		SUNWprivate_1.1
+end
+
+function	mdrpc_gtimeout_1
+version		SUNWprivate_1.1
+end
+
+function	mdrpc_hostname_1
+version		SUNWprivate_1.1
+end
+
+function	mdrpc_lock_set_1
+version		SUNWprivate_1.1
+end
+
+function	mdrpc_nullproc_1
+version		SUNWprivate_1.1
+end
+
+function	mdrpc_ownset_1
+version		SUNWprivate_1.1
+end
+
+function	mdrpc_setnameok_1
+version		SUNWprivate_1.1
+end
+
+function	mdrpc_setnumbusy_1
+version		SUNWprivate_1.1
+end
+
+function	mdrpc_stimeout_1
+version		SUNWprivate_1.1
+end
+
+function	mdrpc_unlock_set_1
+version		SUNWprivate_1.1
+end
+
+function	mdrpc_upd_dr_dbinfo_1
+version		SUNWprivate_1.1
+end
+
+function	mdrpc_upd_dr_flags_1
+version		SUNWprivate_1.1
+end
+
+function	mdrpc_upd_sr_flags_1
+version		SUNWprivate_1.1
+end
+
+function	mdrpc_updmeds_1
+version		SUNWprivate_1.1
+end
+
+function	mdrpc_add_drv_sidenms_2
+version		SUNWprivate_1.1
+end
+
+function	mdrpc_adddrvs_2
+version		SUNWprivate_1.1
+end
+
+function	mdrpc_addhosts_2
+version		SUNWprivate_1.1
+end
+
+function	mdrpc_createset_2
+version		SUNWprivate_1.1
+end
+
+function	mdrpc_del_drv_sidenms_2
+version		SUNWprivate_1.1
+end
+
+function	mdrpc_deldrvs_2
+version		SUNWprivate_1.1
+end
+
+function	mdrpc_delhosts_2
+version		SUNWprivate_1.1
+end
+
+function	mdrpc_delset_2
+version		SUNWprivate_1.1
+end
+
+function	mdrpc_devinfo_2
+version		SUNWprivate_1.1
+end
+
+function	mdrpc_drvused_2
+version		SUNWprivate_1.1
+end
+
+function	mdrpc_flush_internal_2
+version		SUNWprivate_1.1
+end
+
+function	mdrpc_getset_2
+version		SUNWprivate_1.1
+end
+
+function	mdrpc_mngetset_2
+version		SUNWprivate_1.1
+end
+
+function	mdrpc_gtimeout_2
+version		SUNWprivate_1.1
+end
+
+function	mdrpc_hostname_2
+version		SUNWprivate_1.1
+end
+
+function	mdrpc_lock_set_2
+version		SUNWprivate_1.1
+end
+
+function	mdrpc_nullproc_2
+version		SUNWprivate_1.1
+end
+
+function	mdrpc_ownset_2
+version		SUNWprivate_1.1
+end
+
+function	mdrpc_setnameok_2
+version		SUNWprivate_1.1
+end
+
+function	mdrpc_setnumbusy_2
+version		SUNWprivate_1.1
+end
+
+function	mdrpc_stimeout_2
+version		SUNWprivate_1.1
+end
+
+function	mdrpc_unlock_set_2
+version		SUNWprivate_1.1
+end
+
+function	mdrpc_upd_dr_dbinfo_2
+version		SUNWprivate_1.1
+end
+
+function	mdrpc_upd_dr_flags_2
+version		SUNWprivate_1.1
+end
+
+function	mdrpc_upd_sr_flags_2
+version		SUNWprivate_1.1
+end
+
+function	mdrpc_upd_nr_flags_2
+version		SUNWprivate_1.1
+end
+
+function	mdrpc_updmeds_2
+version		SUNWprivate_1.1
+end
+
+function	mdrpc_mncreateset_2
+version		SUNWprivate_1.1
+end
+
+function	mdrpc_mnsetmaster_2
+version		SUNWprivate_1.1
+end
+
+function	mdrpc_mn_mirror_resync_all_2
+version		SUNWprivate_1.1
+end
+
+function	mdrpc_mn_sp_update_abr_2
+version		SUNWprivate_1.1
+end
+
+function	xdr_mdrpc_bool_res
+version		SUNWprivate_1.1
+end
+
+function	xdr_mdrpc_createset_args
+version		SUNWprivate_1.1
+end
+
+function	xdr_mdrpc_createset_2_args
+version		SUNWprivate_1.1
+end
+
+function	xdr_mdrpc_mncreateset_2_args
+version		SUNWprivate_1.1
+end
+
+function	xdr_mdrpc_devinfo_args
+version		SUNWprivate_1.1
+end
+
+function	xdr_mdrpc_devidstr_args
+version		SUNWprivate_1.1
+end
+
+function	xdr_mdrpc_devid_name_2_args
+version		SUNWprivate_1.1
+end
+
+function	xdr_mdrpc_devinfo_2_args
+version		SUNWprivate_1.1
+end
+
+function	xdr_mdrpc_devinfo_res
+version		SUNWprivate_1.1
+end
+
+function	xdr_mdrpc_devinfo_2_res
+version		SUNWprivate_1.1
+end
+
+function	xdr_mdrpc_devid_args
+version		SUNWprivate_1.1
+end
+
+function	xdr_mdrpc_devid_2_args
+version		SUNWprivate_1.1
+end
+
+function	xdr_mdrpc_devid_res
+version		SUNWprivate_1.1
+end
+
+function	xdr_mdrpc_drives_args
+version		SUNWprivate_1.1
+end
+
+function	xdr_mdrpc_drives_2_args
+version		SUNWprivate_1.1
+end
+
+function	xdr_mdrpc_drv_sidenm_args
+version		SUNWprivate_1.1
+end
+
+function	xdr_mdrpc_drv_sidenm_2_args
+version		SUNWprivate_1.1
+end
+
+function	xdr_mdrpc_drvused_args
+version		SUNWprivate_1.1
+end
+
+function	xdr_mdrpc_drvused_2_args
+version		SUNWprivate_1.1
+end
+
+function	xdr_mdrpc_generic_res
+version		SUNWprivate_1.1
+end
+
+function	xdr_mdrpc_getset_args
+version		SUNWprivate_1.1
+end
+
+function	xdr_mdrpc_getset_2_args
+version		SUNWprivate_1.1
+end
+
+function	xdr_mdrpc_getset_res
+version		SUNWprivate_1.1
+end
+
+function	xdr_mdrpc_mngetset_res
+version		SUNWprivate_1.1
+end
+
+function	xdr_mdrpc_gtimeout_res
+version		SUNWprivate_1.1
+end
+
+function	xdr_mdrpc_host_args
+version		SUNWprivate_1.1
+end
+
+function	xdr_mdrpc_host_2_args
+version		SUNWprivate_1.1
+end
+
+function	xdr_mdrpc_hostname_res
+version		SUNWprivate_1.1
+end
+
+function	xdr_mdrpc_null_args
+version		SUNWprivate_1.1
+end
+
+function	xdr_mdrpc_setlock_res
+version		SUNWprivate_1.1
+end
+
+function	xdr_mdrpc_setno_args
+version		SUNWprivate_1.1
+end
+
+function	xdr_mdrpc_setno_2_args
+version		SUNWprivate_1.1
+end
+
+function	xdr_mdrpc_sp_args
+version		SUNWprivate_1.1
+end
+
+function	xdr_mdrpc_sp_2_args
+version		SUNWprivate_1.1
+end
+
+function	xdr_mdrpc_stimeout_args
+version		SUNWprivate_1.1
+end
+
+function	xdr_mdrpc_stimeout_2_args
+version		SUNWprivate_1.1
+end
+
+function	xdr_mdrpc_upd_dr_flags_args
+version		SUNWprivate_1.1
+end
+
+function	xdr_mdrpc_upd_dr_flags_2_args
+version		SUNWprivate_1.1
+end
+
+function	xdr_mdrpc_upd_sr_flags_args
+version		SUNWprivate_1.1
+end
+
+function	xdr_mdrpc_upd_sr_flags_2_args
+version		SUNWprivate_1.1
+end
+
+function	xdr_mdrpc_upd_nr_flags_2_args
+version		SUNWprivate_1.1
+end
+
+function	xdr_mdrpc_updmeds_args
+version		SUNWprivate_1.1
+end
+
+function	xdr_mdrpc_updmeds_2_args
+version		SUNWprivate_1.1
+end
+
+function	xdr_mdrpc_mnsetmaster_2_args
+version		SUNWprivate_1.1
+end
+
+function	xdr_stringarray
+version		SUNWprivate_1.1
+end
+
+function	med_get_data_1
+version		SUNWprivate_1.1
+end
+
+function	med_get_rec_1
+version		SUNWprivate_1.1
+end
+
+function	med_hostname_1
+version		SUNWprivate_1.1
+end
+
+function	med_null_1
+version		SUNWprivate_1.1
+end
+
+function	med_upd_data_1
+version		SUNWprivate_1.1
+end
+
+function	med_upd_rec_1
+version		SUNWprivate_1.1
+end
+
+function	md_med_def_timeout
+version		SUNWprivate_1.1
+end
+
+function	md_med_pmap_timeout
+version		SUNWprivate_1.1
+end
+
+function	med_null_err
+version		SUNWprivate_1.1
+end
+
+function	xdr_md_med_errno_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_med_args_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_med_data_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_med_err_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_med_get_data_res_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_med_get_rec_res_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_med_hnm_res_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_med_med_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_med_rec_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_med_res_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_med_upd_data_args_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_med_upd_rec_args_t
+version		SUNWprivate_1.1
+end
+
+function	mhd_list_1
+version		SUNWprivate_1.1
+end
+
+function	mhd_relown_1
+version		SUNWprivate_1.1
+end
+
+function	mhd_status_1
+version		SUNWprivate_1.1
+end
+
+function	mhd_tkown_1
+version		SUNWprivate_1.1
+end
+
+function	mhd_null_error
+version		SUNWprivate_1.1
+end
+
+function	xdr_mhd_drive_status_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_mhd_drivename_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_mhd_error_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_mhd_ff_mode_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_mhd_list_args_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_mhd_list_res_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_mhd_opts_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_mhd_relown_args_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_mhd_set_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_mhd_status_args_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_mhd_status_res_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_mhd_tkown_args_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_mhd_cinfo_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_mhd_ctlrtype_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_mhd_did_flags_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_mhd_drive_id_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_mhd_drive_info_list_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_mhd_drive_info_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_mhd_mhiargs_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_mhd_serial_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_mhd_mhioctkown_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_md_mn_msg_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_md_mn_nodeid_t
+version		SUNWprivate_1.1
+end
+
+function	meta_get_current_root
+version		SUNWprivate_1.1
+end
+
+function	meta_get_current_root_dev
+version		SUNWprivate_1.1
+end
+
+function	meta_gettimeofday
+version		SUNWprivate_1.1
+end
+
+function	meta_replicaslice
+version		SUNWprivate_1.1
+end
+
+function	meta_get_tstate
+version		SUNWprivate_1.1
+end
+
+function	meta_setmdvtoc
+version		SUNWprivate_1.1
+end
+
+function	meta_check_devicesize
+version		SUNWprivate_1.1
+end
+
+function	clnt_devid
+version		SUNWprivate_1.1
+end
+
+function	meta_number_to_string
+version		SUNWprivate_1.1
+end
+
+function	meta_repartition_drive
+version		SUNWprivate_1.1
+end
+
+function	mdmn_send_message
+version		SUNWprivate_1.1
+end
+
+function	copy_result
+version		SUNWprivate_1.1
+end
+
+function	free_result
+version		SUNWprivate_1.1
+end
+
+function	copy_msg
+version		SUNWprivate_1.1
+end
+
+function	copy_msg_1
+version		SUNWprivate_1.1
+end
+
+function	free_msg
+version		SUNWprivate_1.1
+end
+
+function	mdmn_get_handler
+version		SUNWprivate_1.1
+end
+
+function	mdmn_get_submessage_generator
+version		SUNWprivate_1.1
+end
+
+function	mdmn_get_message_class
+version		SUNWprivate_1.1
+end
+
+function	mdmn_get_timeout
+version		SUNWprivate_1.1
+end
+
+function	meta_read_nodelist
+version		SUNWprivate_1.1
+end
+
+function	meta_write_nodelist
+version		SUNWprivate_1.1
+end
+
+function	meta_free_nodelist
+version		SUNWprivate_1.1
+end
+
+function	meta_is_mn_set
+version		SUNWprivate_1.1
+end
+
+function	meta_ping_mnset
+version		SUNWprivate_1.1
+end
+
+function	meta_mn_send_command
+version		SUNWprivate_1.1
+end
+
+function	meta_mn_send_suspend_writes
+version		SUNWprivate_1.1
+end
+
+function	meta_mn_send_setsync
+version		SUNWprivate_1.1
+end
+
+function	meta_mn_send_metaclear_command
+version		SUNWprivate_1.1
+end
+
+function	meta_mn_send_resync_starting
+version		SUNWprivate_1.1
+end
+
+function	meta_mn_change_owner
+version		SUNWprivate_1.1
+end
+
+function	meta_is_mn_name
+version		SUNWprivate_1.1
+end
+
+function	meta_reconfig_choose_master
+version		SUNWprivate_1.1
+end
+
+function	meta_mnsync_user_records
+version		SUNWprivate_1.1
+end
+
+function	meta_mnsync_diskset_mddbs
+version		SUNWprivate_1.1
+end
+
+function	meta_mnjoin_all
+version		SUNWprivate_1.1
+end
+
+function	mdmn_create_msgid
+version		SUNWprivate_1.1
+end
+
+function	mdmn_suspend
+version		SUNWprivate_1.1
+end
+
+function	mdmn_resume
+version		SUNWprivate_1.1
+end
+
+function	mdmn_reinit_set
+version		SUNWprivate_1.1
+end
+
+function	mdmn_msgtype_lock
+version		SUNWprivate_1.1
+end
+
+function	mdmn_abort
+version		SUNWprivate_1.1
+end
+
+function	mdmn_send_1
+version		SUNWprivate_1.1
+end
+
+function	mdmn_work_1
+version		SUNWprivate_1.1
+end
+
+function	mdmn_wakeup_initiator_1
+version		SUNWprivate_1.1
+end
+
+function	mdmn_wakeup_master_1
+version		SUNWprivate_1.1
+end
+
+function	mdmn_comm_lock_1
+version		SUNWprivate_1.1
+end
+
+function	mdmn_comm_unlock_1
+version		SUNWprivate_1.1
+end
+
+function	mdmn_comm_suspend_1
+version		SUNWprivate_1.1
+end
+
+function	mdmn_comm_resume_1
+version		SUNWprivate_1.1
+end
+
+function	mdmn_comm_reinit_set_1
+version		SUNWprivate_1.1
+end
+
+function	mdmn_comm_msglock_1
+version		SUNWprivate_1.1
+end
+
+function	clnt_mdcommdctl
+version		SUNWprivate_1.1
+end
+
+function	mdrpc_mdcommdctl_2
+version		SUNWprivate_1.1
+end
+
+function	clnt_mn_is_stale
+version		SUNWprivate_1.1
+end
+
+function	mdrpc_mn_is_stale_2
+version		SUNWprivate_1.1
+end
+
+function	clnt_clr_mnsetlock
+version		SUNWprivate_1.1
+end
+
+function	mdrpc_clr_mnsetlock_2
+version		SUNWprivate_1.1
+end
+
+function	xdr_mdrpc_sp_flags_args
+version		SUNWprivate_1.1
+end
+
+function	xdr_mdrpc_sp_flags_2_args
+version		SUNWprivate_1.1
+end
+
+function	xdr_mdrpc_mdcommdctl_args
+version		SUNWprivate_1.1
+end
+
+function	xdr_mdrpc_mdcommdctl_2_args
+version		SUNWprivate_1.1
+end
+
+function	xdr_mdrpc_getdrivedesc_res
+version		SUNWprivate_1.1
+end
+
+function	dd_list_dup
+version		SUNWprivate_1.1
+end
+
+function	mdmn_allocate_changelog
+version		SUNWprivate_1.1
+end
+
+function	mdmn_reset_changelog
+version		SUNWprivate_1.1
+end
+
+function	mdmn_log_msg
+version		SUNWprivate_1.1
+end
+
+function	mdmn_unlog_msg
+version		SUNWprivate_1.1
+end
+
+function	mdmn_snarf_changelog
+version		SUNWprivate_1.1
+end
+
+function	mdmn_get_changelogrec
+version		SUNWprivate_1.1
+end
+
+function	clnt_reset_mirror_owner
+version		SUNWprivate_1.1
+end
+
+function	mdrpc_reset_mirror_owner_2
+version		SUNWprivate_1.1
+end
+
+function	clnt_mn_susp_res_io
+version		SUNWprivate_1.1
+end
+
+function	mdrpc_mn_susp_res_io_2
+version		SUNWprivate_1.1
+end
+
+function	xdr_mdrpc_mn_susp_res_io_args
+version		SUNWprivate_1.1
+end
+
+function	xdr_mdrpc_mn_susp_res_io_2_args
+version		SUNWprivate_1.1
+end
+
+function	xdr_mdrpc_nodeid_args
+version		SUNWprivate_1.1
+end
+
+function	xdr_mdrpc_nodeid_2_args
+version		SUNWprivate_1.1
+end
+
+function	meta_is_member
+version		SUNWprivate_1.1
+end
+
+function	meta_mn_singlenode
+version		SUNWprivate_1.1
+end
+
+function	meta_sp_setstatus
+version		SUNWprivate_1.1
+end
+
+function	xdr_mp_unit_t
+version		SUNWprivate_1.1
+end
+
+function	xdr_md_set_params_t
+version		SUNWprivate_1.1
+end
+
+function	meta_fixdevid
+version		SUNWprivate_1.1
+end
+
+function	meta_upd_ctdnames	
+version		SUNWprivate_1.1
+end
+
+function	pathname_reload
+version		SUNWprivate_1.1
+end
+
+function	meta_deviceid_to_nmlist
+version		SUNWprivate_1.1
+end
+
+function	meta_mn_send_get_tstate
+version		SUNWprivate_1.1
+end
+
+function	meta_client_create_retry
+version		SUNWprivate_1.1
+end
+
+function	meta_client_create
+version		SUNWprivate_1.1
+end
+
+function	read_master_block
+version		SUNWprivate_1.1
+end
diff --git a/usr/src/lib/lvm/libmeta/spec/sparc/Makefile b/usr/src/lib/lvm/libmeta/spec/sparc/Makefile
new file mode 100644
index 0000000000..8d93c87287
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/spec/sparc/Makefile
@@ -0,0 +1,47 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License").  You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+#ident	"%Z%%M%	%I%	%E% SMI"
+#
+# Copyright (c) 2000-2001 by Sun Microsystems, Inc.
+# All rights reserved.
+#
+# lib/lvm/libmeta/spec/sparc/Makefile
+
+.KEEP_STATE:
+
+# To enable apptrace, comment out the following line
+DISABLE_APPTRACE=   $(POUND_SIGN)
+
+include	../Makefile.targ
+
+# Add arch specific objects here
+OBJECTS	+=
+
+include	$(SRC)/lib/Makefile.lib
+
+# Uncomment the following if the linker complains
+sparc_C_PICFLAGS  = -K PIC
+
+include	$(SRC)/lib/Makefile.spec
+
+$(DISABLE_APPTRACE)install: $(ROOTABILIB)
diff --git a/usr/src/lib/lvm/libmeta/spec/sparcv9/Makefile b/usr/src/lib/lvm/libmeta/spec/sparcv9/Makefile
new file mode 100644
index 0000000000..5a7be1d65b
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/spec/sparcv9/Makefile
@@ -0,0 +1,47 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License").  You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+#ident	"%Z%%M%	%I%	%E% SMI"
+#
+# Copyright (c) 2000-2001 by Sun Microsystems, Inc.
+# All rights reserved.
+#
+# lib/lvm/libmeta/spec/sparcv9/Makefile
+
+.KEEP_STATE:
+
+# To enable apptrace, comment out the following line
+DISABLE_APPTRACE=   $(POUND_SIGN)
+
+include	../Makefile.targ
+
+# Add arch specific objects here
+OBJECTS	+=
+
+include	$(SRC)/lib/Makefile.lib
+
+# Uncomment the following if the linker complains
+sparc_C_PICFLAGS  = -K PIC
+
+include	$(SRC)/lib/Makefile.spec
+
+$(DISABLE_APPTRACE)install: $(ROOTABILIB64)
diff --git a/usr/src/lib/lvm/libmeta/spec/versions b/usr/src/lib/lvm/libmeta/spec/versions
new file mode 100644
index 0000000000..523cb927d5
--- /dev/null
+++ b/usr/src/lib/lvm/libmeta/spec/versions
@@ -0,0 +1,31 @@
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+# Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License").  You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+i386 {
+	SUNWprivate_1.1;
+}
+sparc {
+	SUNWprivate_1.1;
+}
diff --git a/usr/src/lib/lvm/libpreen/Makefile b/usr/src/lib/lvm/libpreen/Makefile
new file mode 100644
index 0000000000..ca1bb50937
--- /dev/null
+++ b/usr/src/lib/lvm/libpreen/Makefile
@@ -0,0 +1,49 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License").  You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright (c) 1998-2001 by Sun Microsystems, Inc.
+# All rights reserved.
+#
+# ident	"%Z%%M%	%I%	%E% SMI"
+#
+
+include $(SRC)/lib/Makefile.lib
+
+SUBDIRS =	$(MACH)
+
+all :=		TARGET= all
+install :=	TARGET= install
+clean :=	TARGET= clean
+clobber :=	TARGET= clobber
+lint :=		TARGET= lint
+debug :=	TARGET= debug
+
+.KEEP_STATE:
+
+all clean clobber debug install: spec .WAIT $(SUBDIRS)
+
+lint: $(SUBDIRS)
+
+spec $(SUBDIRS): FRC
+	@cd $@; pwd; $(MAKE) $(TARGET)
+
+FRC:
diff --git a/usr/src/lib/lvm/libpreen/Makefile.com b/usr/src/lib/lvm/libpreen/Makefile.com
new file mode 100644
index 0000000000..fdfee4627d
--- /dev/null
+++ b/usr/src/lib/lvm/libpreen/Makefile.com
@@ -0,0 +1,50 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License").  You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+# ident	"%Z%%M%	%I%	%E% SMI"
+#
+
+LIBRARY=	preen_md.a 
+VERS=          	.1 
+OBJECTS=	mdpreen.o
+
+include $(SRC)/lib/lvm/Makefile.lvm
+
+ROOTLIBDIR=	$(ROOT)/usr/lib/drv
+LIBS=		$(DYNLIB) 	# don't build a static lib
+CPPFLAGS +=	-D_FILE_OFFSET_BITS=64
+LDLIBS +=	-lmeta -lc
+ZDEFS=
+
+MAPDIR=         $(SRC)/lib/lvm/libpreen/spec/$(TRANSMACH)
+SPECMAPFILE=	$(MAPDIR)/mapfile
+
+.KEEP_STATE:
+
+all: $(LIBS)
+
+include $(SRC)/lib/lvm/Makefile.targ
+
+$(ROOTLIBDIR)/$(DYNLIB) :=	FILEMODE= 555
diff --git a/usr/src/lib/lvm/libpreen/common/mdpreen.c b/usr/src/lib/lvm/libpreen/common/mdpreen.c
new file mode 100644
index 0000000000..a28a6c2560
--- /dev/null
+++ b/usr/src/lib/lvm/libpreen/common/mdpreen.c
@@ -0,0 +1,335 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2003 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+/*
+ * preenlib interface for SVM.
+ *
+ * On startup fsck attempts to check filesystems in parallel. However
+ * running mutiple fscks on the same disk at the same time
+ * significantly degrades the performance. fsck code avoids such
+ * behavior. To analyse such patterns it needs the physical disk
+ * instance. preen_build_devs provides that information for
+ * filesystems that are on top of metadevices.
+ */
+
+#include <meta.h>
+#include <limits.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include <sdssc.h>
+
+#define	MAX_N2M_ALIAS_LINE	(2*FILENAME_MAX + 1)
+#define	NAME_TO_MAJOR		"/etc/name_to_major"
+#define	MD_MODULE		"md"
+
+/*
+ *	Macros to produce a quoted string containing the value of a
+ *	preprocessor macro. For example, if SIZE is defined to be 256,
+ *	VAL2STR(SIZE) is "256". This is used to construct format
+ *	strings for scanf-family functions below.
+ */
+#define	QUOTE(x)	#x
+#define	VAL2STR(x)	QUOTE(x)
+
+extern	void	preen_addunit(void *cookie, char *dname, int (*cf)(),
+		    void *datap, uint_t unit);
+extern	int	preen_subdev(char *name, struct dk_cinfo *dkiop, void *dp);
+
+static int
+get_major_from_n2m(char *modname, int *major)
+{
+	FILE *fp;
+	char drv[FILENAME_MAX + 1];
+	int entry;
+	int found = 0;
+	char line[MAX_N2M_ALIAS_LINE];
+	int status = 0;
+
+	if ((fp = fopen(NAME_TO_MAJOR, "r")) == NULL) {
+		return (-1);
+	}
+
+	while ((fgets(line, sizeof (line), fp) != NULL) &&
+						status == 0) {
+
+		if (sscanf(line, "%" VAL2STR(FILENAME_MAX) "s %d",
+		    drv, &entry) != 2) {
+			status = -1;
+		}
+		if (strcmp(drv, modname) == 0) {
+			*major = entry;
+			found = 1;
+			break;
+		}
+	}
+
+	/*
+	 * if no match is found return -1
+	 */
+	if (found == 0)
+		status = -1;
+
+	(void) fclose(fp);
+	return (status);
+}
+
+/*
+ * If the name contains a diskset name, it is parsed out and returned.
+ * The dev_path can be either a md pathname /dev/md/rdsk/d0 or a path
+ * name that contains a diskset /dev/md/red/rdsk/d0.
+ */
+
+static char *
+parse_path(char *dev_path)
+{
+	char *cpdev;
+	char *cp, *cpp;
+	char *setname;
+	size_t size;
+
+	/*
+	 * paths are /dev/md/rdsk/dx or /dev/md/<setname>/rdsk/dx
+	 * cp points to /rdsk/dx. Scan back to the previous slash.
+	 * If this matches "dev", then path is a local set.
+	 *
+	 * The /rdsk/d pattern in strstr is used so that users with
+	 * a twisted mind can create a diskset called "rdsk" and
+	 * would still want everything to work!!
+	 */
+	cp = strstr(dev_path, "/rdsk/d");
+
+	for (cpdev = cp - 1; *cpdev != '/'; cpdev--);
+	cpdev = cpdev - 3; /* backspace 3 char */
+	if (strncmp(cpdev, "dev", strlen("dev")) == 0)
+		return (Strdup(MD_LOCAL_NAME));
+
+	/*
+	 * extract the setname from the path
+	 */
+	cpp = cp;
+	for (cp--; *cp != '/'; cp--);
+	size = (size_t)(cpp - cp);
+	setname = (char *)Malloc(size);
+	(void) strlcpy(setname, (const char *)(cp + 1), size);
+
+	return (setname);
+}
+
+/*
+ * This routine is called from preenlib the first time. It is then
+ * recursively called through preen_subdev.
+ *
+ * The argument passed in (uname) starts with the special device from
+ * /etc/vfstab. Recursive calls pass in the underlying physical device
+ * names.
+ */
+void
+preen_build_devs(
+	char		*uname,		/* name of metadevice */
+	struct dk_cinfo	*dkiop,		/* associated controller info */
+	void		*dp		/* magic info */
+)
+{
+	char		*setname = NULL;
+	mdsetname_t	*sp;
+	mdname_t	*namep;		/* metadevice name */
+	mdnamelist_t	*nlp = NULL;	/* list of real devices */
+	mdnamelist_t	*p;
+	devid_nmlist_t	*nm_list = NULL;
+	md_error_t	status = mdnullerror;
+	md_error_t	*ep = &status;
+	int		ep_valid = 0;	/* does ep contain a real error */
+	struct stat	statb;
+	static int	md_major = -1;
+	side_t		sideno;
+
+	if (stat(uname, &statb) != 0)
+		return;
+
+	if (md_major == -1 &&
+		get_major_from_n2m(MD_MODULE, &md_major) != 0)
+		return;
+
+	/*
+	 * If the path passed in is not a metadevice, then add that
+	 * device to the list (preen_addunit) since it has to be a
+	 * physical device.
+	 */
+
+	if (major(statb.st_rdev) != md_major) {
+		preen_addunit(dp, dkiop->dki_dname, NULL, NULL,
+		    dkiop->dki_unit);
+		return;
+	}
+	/*
+	 * Bind to the cluster library
+	 */
+
+	if (sdssc_bind_library() == SDSSC_ERROR)
+		return;
+
+	if (md_init_daemon("fsck", ep) != 0) {
+		ep_valid = 1;
+		goto out;
+	}
+
+	/*
+	 * parse the path name to get the diskset name.
+	 */
+
+	setname = parse_path(uname);
+	if ((sp = metasetname(setname, ep)) == NULL) {
+		ep_valid = 1;
+		goto out;
+	}
+
+	/* check for ownership */
+	if (meta_check_ownership(sp, ep) != 0) {
+		/*
+		 * Don't own the set but we are here implies
+		 * that this is a clustered proxy device. Simply add
+		 * the unit.
+		 */
+		preen_addunit(dp, dkiop->dki_dname, NULL, NULL,
+		    dkiop->dki_unit);
+		ep_valid = 1;
+		goto out;
+	}
+
+	/*
+	 * get list of underlying physical devices.
+	 */
+	if ((namep = metaname(&sp, uname, ep)) == NULL) {
+		ep_valid = 1;
+		goto out;
+	}
+
+	if (namep->dev == NODEV64) {
+		goto out;
+	}
+
+	if (meta_getdevs(sp, namep, &nlp, ep) != 0) {
+		ep_valid = 1;
+		goto out;
+	}
+
+	if ((sideno = getmyside(sp, ep)) == MD_SIDEWILD) {
+		ep_valid = 1;
+		goto out;
+	}
+
+	/* gather and add the underlying devs */
+	for (p = nlp; (p != NULL); p = p->next) {
+		mdname_t	*devnp = p->namep;
+		int		fd;
+		struct dk_cinfo	cinfo;
+		ddi_devid_t	md_did;
+		char		*devname;
+		char		*minor_name = NULL;
+		char		mname[MAXPATHLEN];
+
+		/*
+		 * we don't want to use the rname anymore because
+		 * that may have changed. Use the device id information
+		 * to find the correct ctd name and open based on that.
+		 * If there isn't a devid or we have a did device, then
+		 * use the rname. In clustering, it's corrected for us.
+		 * If no devid it's at least worth a try.
+		 */
+		if (((md_did = meta_getdidbykey(sp->setno, sideno,
+		    devnp->key, ep)) == NULL) || ((minor_name =
+		    meta_getdidminorbykey(sp->setno, sideno,
+		    devnp->key, ep)) == NULL)) {
+			devname = devnp->rname;
+			if (md_did)
+				Free(md_did);
+		} else {
+			if (strstr(minor_name, ",raw") == NULL) {
+				(void) snprintf(mname, MAXPATHLEN, "%s,raw",
+				    minor_name);
+			} else {
+				(void) snprintf(mname, MAXPATHLEN, "%s",
+				    minor_name);
+			}
+
+			/*
+			 * We need to make sure we call this with a specific
+			 * mname (raw mname) so that we get the exact slice
+			 * with the given device id. Otherwise we could try
+			 * to open a slice that doesn't really exist.
+			 */
+			if (meta_deviceid_to_nmlist("/dev", md_did,
+			    mname, &nm_list) != 0) {
+				(void) mdsyserror(ep, errno, devnp->rname);
+				ep_valid = 1;
+				Free(md_did);
+				Free(minor_name);
+				goto out;
+			}
+			devname = Strdup(nm_list->devname);
+			Free(md_did);
+			Free(minor_name);
+			devid_free_nmlist(nm_list);
+		}
+		/* get device name and (real) cinfo */
+		if ((fd = open(devname, O_RDONLY, 0)) < 0) {
+			(void) mdsyserror(ep, errno, devname);
+			ep_valid = 1;
+			goto out;
+		}
+
+		if (ioctl(fd, DKIOCINFO, &cinfo) != 0) {
+			(void) mdsyserror(ep, errno, devname);
+			(void) close(fd);
+			ep_valid = 1;
+			goto out;
+		}
+		(void) close(fd);	/* sd/ssd bug */
+
+		/*
+		 * preen_subdev fails when the device name has been
+		 * resolved to the physical layer. Hence it is added
+		 * to preen_addunit.
+		 */
+		if (preen_subdev(devname, &cinfo, dp) != 0) {
+			preen_addunit(dp, cinfo.dki_dname, NULL, NULL,
+			    cinfo.dki_unit);
+		}
+	}
+
+	/* cleanup, if we fail, just add this composite device to the list */
+out:
+	if (setname != NULL)
+		Free(setname);
+	if (ep_valid != 0) {
+		mde_perror(&status, "");
+		mdclrerror(&status);
+	}
+	metafreenamelist(nlp);
+}
diff --git a/usr/src/lib/lvm/libpreen/i386/Makefile b/usr/src/lib/lvm/libpreen/i386/Makefile
new file mode 100644
index 0000000000..bb9355b10a
--- /dev/null
+++ b/usr/src/lib/lvm/libpreen/i386/Makefile
@@ -0,0 +1,31 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License").  You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright (c) 2000-2001 by Sun Microsystems, Inc.
+# All rights reserved.
+#
+# ident	"%Z%%M%	%I%	%E% SMI"
+#
+
+include ../Makefile.com
+
+install debug: all $(ROOTLIBDIR) $(ROOTLIBS)
diff --git a/usr/src/lib/lvm/libpreen/sparc/Makefile b/usr/src/lib/lvm/libpreen/sparc/Makefile
new file mode 100644
index 0000000000..eff30f413e
--- /dev/null
+++ b/usr/src/lib/lvm/libpreen/sparc/Makefile
@@ -0,0 +1,30 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License").  You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright (c) 2000-2001 by Sun Microsystems, Inc.
+# All rights reserved.
+#
+# ident	"%Z%%M%	%I%	%E% SMI"
+#
+include ../Makefile.com
+
+install debug: all $(ROOTLIBDIR) $(ROOTLIBS)
diff --git a/usr/src/lib/lvm/libpreen/spec/Makefile b/usr/src/lib/lvm/libpreen/spec/Makefile
new file mode 100644
index 0000000000..4f28d95836
--- /dev/null
+++ b/usr/src/lib/lvm/libpreen/spec/Makefile
@@ -0,0 +1,29 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License").  You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#ident	"%Z%%M%	%I%	%E% SMI"
+#
+# Copyright (c) 2000 by Sun Microsystems, Inc.
+# All rights reserved.
+#
+# lib/lvm/libpreen/spec/Makefile
+
+include	$(SRC)/lib/Makefile.spec.arch
diff --git a/usr/src/lib/lvm/libpreen/spec/Makefile.targ b/usr/src/lib/lvm/libpreen/spec/Makefile.targ
new file mode 100644
index 0000000000..582a2c4653
--- /dev/null
+++ b/usr/src/lib/lvm/libpreen/spec/Makefile.targ
@@ -0,0 +1,36 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License").  You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#ident	"%Z%%M%	%I%	%E% SMI"
+#
+# Copyright (c) 2000 by Sun Microsystems, Inc.
+# All rights reserved.
+#
+# lib/lvm/libpreen/spec/Makefile.targ
+
+LIBRARY	=	libpreen.a
+VERS	=	.1
+
+OBJECTS	=	preen.o
+
+TRANSCPP =
+
+SPECCPP =	-I.. -I../../inc
diff --git a/usr/src/lib/lvm/libpreen/spec/amd64/Makefile b/usr/src/lib/lvm/libpreen/spec/amd64/Makefile
new file mode 100644
index 0000000000..c7d89e007c
--- /dev/null
+++ b/usr/src/lib/lvm/libpreen/spec/amd64/Makefile
@@ -0,0 +1,46 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License").  You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+# ident	"%Z%%M%	%I%	%E% SMI"
+#
+
+.KEEP_STATE:
+
+# To enable apptrace, comment out the following line
+DISABLE_APPTRACE=   $(POUND_SIGN)
+
+include	../Makefile.targ
+
+# Add arch specific objects here
+OBJECTS	+=
+
+include	$(SRC)/lib/Makefile.lib
+
+# Uncomment the following if the linker complains
+#amd64_C_PICFLAGS = $(amd64_C_BIGPICFLAGS)
+
+include	$(SRC)/lib/Makefile.spec
+
+$(DISABLE_APPTRACE)install: $(ROOTABILIB64)
diff --git a/usr/src/lib/lvm/libpreen/spec/i386/Makefile b/usr/src/lib/lvm/libpreen/spec/i386/Makefile
new file mode 100644
index 0000000000..a50dd5cd36
--- /dev/null
+++ b/usr/src/lib/lvm/libpreen/spec/i386/Makefile
@@ -0,0 +1,47 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License").  You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+#ident	"%Z%%M%	%I%	%E% SMI"
+#
+# Copyright (c) 2000-2001 by Sun Microsystems, Inc.
+# All rights reserved.
+#
+# lib/lvm/libpreen/spec/i386/Makefile
+
+.KEEP_STATE:
+
+# To enable apptrace, comment out the following line
+DISABLE_APPTRACE=   $(POUND_SIGN)
+
+include	../Makefile.targ
+
+# Add arch specific objects here
+OBJECTS	+=
+
+include	$(SRC)/lib/Makefile.lib
+
+# Uncomment the following if the linker complains
+#i386_C_PICFLAGS  = -K PIC
+
+include	$(SRC)/lib/Makefile.spec
+
+$(DISABLE_APPTRACE)install: $(ROOTABILIB)
diff --git a/usr/src/lib/lvm/libpreen/spec/preen.spec b/usr/src/lib/lvm/libpreen/spec/preen.spec
new file mode 100644
index 0000000000..839c0fcafe
--- /dev/null
+++ b/usr/src/lib/lvm/libpreen/spec/preen.spec
@@ -0,0 +1,31 @@
+#
+# Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License").  You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+#
+# lib/lvm/libpreen/spec/preen.spec
+
+function	preen_build_devs
+version		SUNWprivate_1.1
+end
diff --git a/usr/src/lib/lvm/libpreen/spec/sparc/Makefile b/usr/src/lib/lvm/libpreen/spec/sparc/Makefile
new file mode 100644
index 0000000000..f3d039672f
--- /dev/null
+++ b/usr/src/lib/lvm/libpreen/spec/sparc/Makefile
@@ -0,0 +1,47 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License").  You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+#ident	"%Z%%M%	%I%	%E% SMI"
+#
+# Copyright (c) 2000-2001 by Sun Microsystems, Inc.
+# All rights reserved.
+#
+# lib/lvm/libpreen/spec/sparc/Makefile
+
+.KEEP_STATE:
+
+# To enable apptrace, comment out the following line
+DISABLE_APPTRACE=   $(POUND_SIGN)
+
+include	../Makefile.targ
+
+# Add arch specific objects here
+OBJECTS	+=
+
+include	$(SRC)/lib/Makefile.lib
+
+# Uncomment the following if the linker complains
+sparc_C_PICFLAGS  = -K PIC
+
+include	$(SRC)/lib/Makefile.spec
+
+$(DISABLE_APPTRACE)install: $(ROOTABILIB)
diff --git a/usr/src/lib/lvm/libpreen/spec/sparcv9/Makefile b/usr/src/lib/lvm/libpreen/spec/sparcv9/Makefile
new file mode 100644
index 0000000000..520e996ffb
--- /dev/null
+++ b/usr/src/lib/lvm/libpreen/spec/sparcv9/Makefile
@@ -0,0 +1,47 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License").  You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+#ident	"%Z%%M%	%I%	%E% SMI"
+#
+# Copyright (c) 2000-2001 by Sun Microsystems, Inc.
+# All rights reserved.
+#
+# lib/lvm/libpreen/spec/sparcv9/Makefile
+
+.KEEP_STATE:
+
+# To enable apptrace, comment out the following line
+DISABLE_APPTRACE=   $(POUND_SIGN)
+
+include	../Makefile.targ
+
+# Add arch specific objects here
+OBJECTS	+=
+
+include	$(SRC)/lib/Makefile.lib
+
+# Uncomment the following if the linker complains
+sparc_C_PICFLAGS  = -K PIC
+
+include	$(SRC)/lib/Makefile.spec
+
+$(DISABLE_APPTRACE)install: $(ROOTABILIB64)
diff --git a/usr/src/lib/lvm/libpreen/spec/versions b/usr/src/lib/lvm/libpreen/spec/versions
new file mode 100644
index 0000000000..523cb927d5
--- /dev/null
+++ b/usr/src/lib/lvm/libpreen/spec/versions
@@ -0,0 +1,31 @@
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+# Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License").  You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+i386 {
+	SUNWprivate_1.1;
+}
+sparc {
+	SUNWprivate_1.1;
+}
diff --git a/usr/src/lib/lvm/libsvm/Makefile b/usr/src/lib/lvm/libsvm/Makefile
new file mode 100644
index 0000000000..1917939812
--- /dev/null
+++ b/usr/src/lib/lvm/libsvm/Makefile
@@ -0,0 +1,58 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License").  You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright (c) 2001 by Sun Microsystems, Inc.
+# All rights reserved.
+#
+# ident	"%Z%%M%	%I%	%E% SMI"
+#
+
+include $(SRC)/lib/Makefile.lib
+
+HDRS =		libsvm.h
+HDRDIR =	common/hdrs
+SUBDIRS =	$(MACH) 
+
+all := 		TARGET= all
+install :=	TARGET= install
+check :=	TARGET= check
+clean :=	TARGET= clean
+clobber :=	TARGET= clobber
+lint :=		TARGET= lint
+debug :=	TARGET= debug
+
+.KEEP_STATE:
+
+all clean clobber debug install: spec .WAIT $(SUBDIRS)
+
+install_h: $(ROOTHDRS)
+
+check: $(CHECKHDRS)
+
+lint: $(SUBDIRS)
+
+spec $(SUBDIRS): FRC
+	@cd $@; pwd; $(MAKE) $(TARGET)
+
+FRC:
+
+include $(SRC)/lib/Makefile.targ
diff --git a/usr/src/lib/lvm/libsvm/Makefile.com b/usr/src/lib/lvm/libsvm/Makefile.com
new file mode 100644
index 0000000000..3a87715a12
--- /dev/null
+++ b/usr/src/lib/lvm/libsvm/Makefile.com
@@ -0,0 +1,64 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License").  You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+# ident	"%Z%%M%	%I%	%E% SMI"
+#
+
+LIBRARY=      	libsvm.a 
+VERS=          	.1 
+OBJECTS=	check_svm.o \
+		getdrvname.o \
+		metaconf.o \
+		metainterfaces.o \
+		modops.o \
+		start_svm.o \
+		debug.o \
+		update_mdconf.o
+
+include $(SRC)/lib/lvm/Makefile.lvm
+
+ROOTLIBDIR=	$(ROOT)/usr/snadm/lib
+
+LIBS =		$(DYNLIB) # don't build a static lib
+LDLIBS +=	-lmeta -ldevid -lc
+#
+# XXX There isn't a lint library for libspmicommon.  For now, we work
+# around this by only using the library when we build (as opposed to lint).
+#
+all debug install := LDLIBS += -L/usr/snadm/lib -lspmicommon
+
+DYNFLAGS +=	-R/usr/snadm/lib
+CPPFLAGS +=	-D_FILE_OFFSET_BITS=64
+CPPFLAGS +=	-I$(SRC)/lib/lvm/libsvm/common/hdrs
+ZDEFS =
+
+MAPDIR=         $(SRC)/lib/lvm/libsvm/spec/$(TRANSMACH)
+SPECMAPFILE=	$(MAPDIR)/mapfile
+
+.KEEP_STATE:
+
+all: $(LIBS)
+
+include $(SRC)/lib/lvm/Makefile.targ
diff --git a/usr/src/lib/lvm/libsvm/common/check_svm.c b/usr/src/lib/lvm/libsvm/common/check_svm.c
new file mode 100644
index 0000000000..5c92ac2788
--- /dev/null
+++ b/usr/src/lib/lvm/libsvm/common/check_svm.c
@@ -0,0 +1,169 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2003 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <string.h>
+#include <meta.h>
+#include <sys/types.h>
+#include <sys/mkdev.h>
+#include <sys/stat.h>
+#include <limits.h>
+#include <svm.h>
+
+/*
+ * FUNCTION: valid_bootlist
+ *
+ * INPUT: file pointer, line buffer, line_length
+ *
+ * RETURN VALUES:
+ *	0 - SUCCESS
+ *	-1 - FAIL
+ *
+ */
+
+int
+valid_bootlist(FILE *fp, int line_len)
+{
+	char *bp = NULL;
+	char *line;
+
+	/*
+	 * errno may not be cleared by callee routines and we
+	 * we want to catch fgets failures hence errno is reset.
+	 */
+	errno = 0;
+	if ((line = malloc(line_len)) == NULL)
+		return (RET_ERROR);
+
+	while (fgets(line, line_len, fp) != NULL) {
+		bp = strstr(line, "mddb_bootlist");
+		if (bp != NULL) {
+			/* if not commented out then breakout */
+			if (*line != '*' && *line != '#') {
+				break;
+			}
+		}
+	}
+
+	free(line);
+	if (bp == NULL || errno != 0)
+		return (RET_ERROR);
+
+	return (RET_SUCCESS);
+}
+
+/*
+ * FUNCTION: svm_check
+ *	Check the existance of DiskSuite or SVM
+ *
+ * INPUT: rootpath
+ *
+ * RETURN VALUES:
+ *	0 - SUCCESS
+ *	-1 - FAIL
+ */
+
+int
+svm_check(char *path)
+{
+	FILE *fp;
+	char tmppath[PATH_MAX];
+	int rval;
+
+	(void) strcat(strcpy(tmppath, path), MD_CONF);
+
+	if ((fp = fopen(tmppath, "r")) == NULL) {
+		rval = errno;
+		goto free_exit;
+	}
+
+	rval = valid_bootlist(fp, MDDB_BOOTLIST_MAX_LEN);
+
+	debug_printf("svm_check(): valid bootlist in %s. status %d\n",
+		tmppath, rval);
+
+	if (rval == RET_SUCCESS) {
+		goto free_exit;
+	}
+	(void) fclose(fp);
+
+	/* not found in md.conf  try etc/system */
+	(void) strcat(strcpy(tmppath, path), SYSTEM_FILE);
+
+	if ((fp = fopen(tmppath, "r")) == NULL) {
+		rval = errno;
+		goto free_exit;
+	}
+
+	rval = valid_bootlist(fp, MDDB_BOOTLIST_MAX_LEN);
+
+	debug_printf("svm_check(): valid bootlist in %s. status %d\n",
+		tmppath, rval);
+free_exit:
+	(void) fclose(fp);
+	if (rval > 0)
+		rval = RET_ERROR;
+	return (rval);
+}
+
+/*
+ * FUNCTION: svm_is_md
+ *	Check if the the given device name has an md driver.
+ * INPUT: special device name (/dev/dsk/c0t0d0s0 or /dev/md/dsk/d10)
+ *
+ * RETURN:
+ *	1 - if it is a metadevice.
+ *	0 - if it is not a metadevice.
+ */
+
+int
+svm_is_md(char *device_name)
+{
+	char buf[30];
+	struct stat sbuf;
+	int rval = 0;
+
+	(void) memset(buf, 0, 30);
+
+	debug_printf("svm_is_md(): device %s\n", device_name);
+	if (stat(device_name, &sbuf) != 0)
+		return (RET_ERROR);
+
+	if (get_drv_name(major(sbuf.st_rdev), "/", buf) == RET_ERROR) {
+		debug_printf("svm_is_md(): device get_drv_name failed: %s\n",
+				device_name);
+		return (0);
+	}
+	if (strcmp(buf, MD_MODULE) == 0) {
+		debug_printf("svm_is_md(): device %s succeed\n", device_name);
+		rval = 1;
+	}
+	return (rval);
+}
diff --git a/usr/src/lib/lvm/libsvm/common/debug.c b/usr/src/lib/lvm/libsvm/common/debug.c
new file mode 100644
index 0000000000..38f7ae56cc
--- /dev/null
+++ b/usr/src/lib/lvm/libsvm/common/debug.c
@@ -0,0 +1,76 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2003 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include <stdio.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <limits.h>
+#include <string.h>
+
+/* The following defines are for tracing output (from libsmpicommon) */
+
+#define	LOG		0x1	/* write message to log file */
+#define	SCR		0x2	/* write message to the screen */
+#define	LOGSCR		LOG|SCR /* write message to the log and screen */
+#define	LEVEL0		0x0001  /* message level 0 */
+#define	LEVEL1		0x0002  /* message level 1 */
+#define	LEVEL2		0x0004  /* message level 2 */
+#define	LEVEL3		0x0010  /* message level 3 */
+
+extern int get_trace_level(void);
+extern int write_status(unsigned char, unsigned int, char *, ...);
+
+const char libsvm_str[] = "LIB_SVM: ";
+const int libsvm_len = sizeof (libsvm_str);
+
+/*PRINTFLIKE1*/
+void
+debug_printf(char *fmt, ...)
+{
+	va_list ap;
+	char *cp;
+	char *buf;
+
+	if (get_trace_level() > 5) {
+		if ((buf = calloc(PATH_MAX, sizeof (char))) == NULL)
+			return;
+		(void) strcpy(buf, libsvm_str);
+		/*
+		 * libsvm_len - 1 is because the length includes NULL
+		 */
+
+		cp = buf + (libsvm_len - 1);
+		va_start(ap, fmt);
+		if (vsnprintf(cp, (PATH_MAX - (libsvm_len - 1)),
+							fmt, ap) >= 0) {
+			write_status(LOGSCR, LEVEL0, buf);
+		}
+		free(buf);
+		va_end(ap);
+	}
+}
diff --git a/usr/src/lib/lvm/libsvm/common/getdrvname.c b/usr/src/lib/lvm/libsvm/common/getdrvname.c
new file mode 100644
index 0000000000..9bef7fa115
--- /dev/null
+++ b/usr/src/lib/lvm/libsvm/common/getdrvname.c
@@ -0,0 +1,90 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2003 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+#include <sys/types.h>
+#include <svm.h>
+
+/*
+ *	Macros to produce a quoted string containing the value of a
+ *	preprocessor macro. For example, if SIZE is defined to be 256,
+ *	VAL2STR(SIZE) is "256". This is used to construct format
+ *	strings for scanf-family functions below.
+ */
+#define	QUOTE(x)	#x
+#define	VAL2STR(x)	QUOTE(x)
+
+/*
+ * FUNCTION:
+ *	Return the driver name for a major number
+ *
+ * INPUT: major number, mount point for name_to_major file, pointer
+ * to a valid buffer.
+ *
+ * RETURN VALUES:
+ *	0 - SUCCESS - buf contain the driver name.
+ *	-1 - FAIL
+ *
+ */
+
+int
+get_drv_name(major_t major, char *mnt, char *buf)
+{
+	FILE *fp;
+	char drv[FILENAME_MAX + 1];
+	char entry[FILENAME_MAX + 1];
+	char line[MAX_N2M_ALIAS_LINE];
+	char fname[PATH_MAX];
+
+	int status = RET_NOERROR;
+	(void) snprintf(fname, sizeof (fname), "%s%s", mnt, NAME_TO_MAJOR);
+
+	if ((fp = fopen(fname, "r")) == NULL) {
+		return (RET_ERROR);
+	}
+
+	while ((fgets(line, sizeof (line), fp) != NULL) &&
+						status == RET_NOERROR) {
+		if (sscanf(line,
+		    "%" VAL2STR(FILENAME_MAX) "s %" VAL2STR(FILENAME_MAX) "s",
+		    drv, entry) != 2) {
+			status = RET_ERROR;
+		}
+		if (atoi(entry) == major)
+			break;
+
+	}
+
+	if (status == RET_NOERROR)
+		(void) strcpy(buf, drv);
+	(void) fclose(fp);
+	return (status);
+}
diff --git a/usr/src/lib/lvm/libsvm/common/hdrs/libsvm.h b/usr/src/lib/lvm/libsvm/common/hdrs/libsvm.h
new file mode 100644
index 0000000000..98c13a2684
--- /dev/null
+++ b/usr/src/lib/lvm/libsvm/common/hdrs/libsvm.h
@@ -0,0 +1,70 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef	_LIBSVM_H
+#define	_LIBSVM_H
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+
+/*
+ * The following declarations are for libsvm which provides
+ * Solaris Install with a set of interfaces required to upgrade
+ * mirrored roots. These are controlled by a Contract PSARC 2000/049
+ * and should not be changed without informing Install.
+ */
+
+typedef struct {
+	char *root_md;		/* metaroot device name */
+	int count;		/* number of components in the metadevice */
+	char *md_comps[1];	/* array of "ctds" component names */
+} svm_info_t;
+
+/* Convertion of MDDB flags */
+#define	SVM_DONT_CONV	0x01	/* Don't convert MDDB to devid mode */
+#define	SVM_CONV	0x02	/* Convert MDDB to devid mode */
+
+
+extern int svm_check(char *rootpath);
+extern int svm_start(char *rootpath, svm_info_t **svm_infopp,
+			int repl_state_flag);
+extern int svm_stop();
+extern void svm_free(svm_info_t *svm_infop);
+extern int svm_is_md(char *device_name);
+extern int svm_get_components(char *root_md_device, svm_info_t **svmpp);
+extern svm_info_t *svm_alloc();
+extern int get_mdcomponents(char *devname, svm_info_t **pp);
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif /* _LIBSVM_H */
diff --git a/usr/src/lib/lvm/libsvm/common/hdrs/svm.h b/usr/src/lib/lvm/libsvm/common/hdrs/svm.h
new file mode 100644
index 0000000000..cb5d60f30f
--- /dev/null
+++ b/usr/src/lib/lvm/libsvm/common/hdrs/svm.h
@@ -0,0 +1,95 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+
+#ifndef	_SVM_H
+#define	_SVM_H
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+
+#define	RET_SUCCESS	0
+#define	RET_ERROR	-1
+#define	RET_NOERROR	RET_SUCCESS
+
+
+#define	PROP_KEEP_REPL_STATE	"md_keep_repl_state"
+#define	PROP_DEVID_DESTROY	"md_devid_destroy"
+
+#define	MD_CONF		"/kernel/drv/md.conf"
+#define	MD_CONF_ORIG	"/tmp/md.conf.orig"
+#define	SYSTEM_FILE	"/etc/system"
+#define	NAME_TO_MAJOR	"/etc/name_to_major"
+#define	VFSTAB		"/etc/vfstab"
+
+#define	MD_MODULE "md"
+#define	ROOT_MNTPT "/"
+#define	ROOT_METADEVICE "/dev/md/dsk/"
+
+
+typedef enum {
+	MD_STR_NOTFOUND,	/* bootlist not found */
+	MD_STR_START,		/* bootlist found, convertion started */
+	MD_STR_DONE		/* bootlist converversion done */
+} convflag_t;
+
+/* The following defines have been taken from addrem.h */
+#define	MAX_CMD_LINE	256
+#define	MAX_N2M_ALIAS_LINE	FILENAME_MAX + FILENAME_MAX + 1
+#define	MAXLEN_NAM_TO_MAJ_ENT	FILENAME_MAX + MAX_STR_MAJOR + 1
+#define	OPT_LEN	128
+#define	CADDR_HEX_STR	16
+#define	UINT_STR	10
+#define	MODLINE_ENT_MAX	(4 * UINT_STR) + CADDR_HEX_STR + MODMAXNAMELEN
+#define	MAX_STR_MAJOR	UINT_STR
+#define	STR_LONG	10
+#define	PERM_STR	4
+#define	MAX_PERM_ENTRY	(2 * STR_LONG) + PERM_STR + (2 * FILENAME_MAX) + 1
+#define	MAX_DBFILE_ENTRY	MAX_PERM_ENTRY
+
+extern void create_diskset_links();
+extern int copyfile(char *from, char *to);
+extern int get_drv_name(major_t major, char *file_name, char *buf);
+extern int mod_unload(char *modname);
+extern int valid_bootlist(FILE *fp, int line_size);
+extern int convert_bootlist(char *systemfile, char *mdconf, char **tmpfilename);
+extern int write_xlate_to_mdconf(char *rootpath);
+extern int write_targ_nm_table(char *rootpath);
+extern int get_rootmetadevice(char *rootpath, char **devname);
+extern void set_upgrade_prop(char *prop_name, int val);
+extern int is_upgrade_prop(char *prop_name);
+extern int create_in_file_prop(char *prop_name, char *fname);
+extern void debug_printf(char *fmt, ...);
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* _SVM_H */
diff --git a/usr/src/lib/lvm/libsvm/common/metaconf.c b/usr/src/lib/lvm/libsvm/common/metaconf.c
new file mode 100644
index 0000000000..504f38ba73
--- /dev/null
+++ b/usr/src/lib/lvm/libsvm/common/metaconf.c
@@ -0,0 +1,195 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+
+#include <stdio.h>
+#include <ctype.h>
+#include <sys/types.h>
+#include <sys/mkdev.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <dirent.h>
+#include <limits.h>
+#include <string.h>
+#include <libsvm.h>
+#include <svm.h>
+#include <errno.h>
+
+
+#define	VERSION "1.0"
+#define	DISK_DIR "/dev/rdsk"
+
+extern int _map_to_effective_dev();
+
+int
+is_blankline(char *buf)
+{
+	for (; *buf != 0; buf++) {
+		if (!isspace(*buf))
+			return (0);
+	}
+	return (1);
+}
+
+/*
+ * FUNCTION: write_targ_nm_table
+ *	creates a tuple table of <driver name, major number > in md.conf
+ * INPUT: rootpath
+ *
+ * RETURN VALUES:
+ *	RET_SUCCESS
+ *	RET_ERROR
+ */
+
+int
+write_targ_nm_table(char *path)
+{
+	FILE	*targfp = NULL;
+	FILE	*mdfp = NULL;
+	char	buf[PATH_MAX], *cp;
+	int	retval = RET_SUCCESS;
+	int	first_entry = 1;
+
+	if ((mdfp = fopen(MD_CONF, "a")) == NULL)
+		return (RET_ERROR);
+
+	(void) snprintf(buf, sizeof (buf), "%s%s", path, NAME_TO_MAJOR);
+
+	if ((targfp = fopen(buf, "r")) == NULL) {
+		(void) fclose(mdfp);
+		return (RET_ERROR);
+	}
+
+	while (fgets(buf, PATH_MAX, targfp) != NULL &&
+				(retval == RET_SUCCESS)) {
+		cp = strrchr(buf, '\n');
+		*cp = 0;
+		if (is_blankline(buf))
+			continue;
+		if (first_entry) {
+			if (fprintf(mdfp, "md_targ_nm_table=\"%s\"", buf) < 0)
+				retval = RET_ERROR;
+			first_entry = 0;
+		}
+		if (fprintf(mdfp, ",\"%s\"", buf) < 0)
+				retval = RET_ERROR;
+	}
+	if (!first_entry)
+		if (fprintf(mdfp, ";\n") < 0)
+			retval = RET_ERROR;
+	(void) fclose(mdfp);
+	(void) fclose(targfp);
+	return (retval);
+}
+
+/*
+ * FUNCTION: write_xlate_to_mdconf
+ *	creates a tuple table of <miniroot devt, target devt> in md.conf
+ * INPUT: rootpath
+ *
+ * RETURN VALUES:
+ *	RET_SUCCESS
+ *	RET_ERROR
+ */
+
+int
+write_xlate_to_mdconf(char *path)
+{
+	FILE		*fptr = NULL;
+	struct dirent	*dp;
+	DIR		*dirp;
+	struct stat	statb_dev;
+	struct stat	statb_edev;
+	char		*devname;
+	char		edevname[PATH_MAX];
+	char		targname[PATH_MAX];
+	char		diskdir[PATH_MAX];
+	int		first_devid = 1;
+	int		ret = RET_SUCCESS;
+
+	if ((fptr = fopen(MD_CONF, "a")) == NULL) {
+		return (RET_ERROR);
+	}
+
+
+	(void) snprintf(diskdir, sizeof (diskdir), "%s%s", path, DISK_DIR);
+	if ((dirp = opendir(diskdir)) == NULL) {
+		(void) fclose(fptr);
+		return (RET_ERROR);
+	}
+
+	/* special case to write the first tuple in the table */
+	while (((dp = readdir(dirp)) != (struct dirent *)0) &&
+						(ret != RET_ERROR)) {
+		if ((strcmp(dp->d_name, ".") == 0) ||
+		    (strcmp(dp->d_name, "..") == 0))
+			continue;
+
+		if ((strlen(diskdir) + strlen(dp->d_name) + 2) > PATH_MAX) {
+		    continue;
+		}
+
+		(void) snprintf(targname, sizeof (targname), "%s/%s",
+		    diskdir, dp->d_name);
+
+		if (stat(targname, &statb_dev) != 0) {
+		    continue;
+		}
+
+		if ((devname = strstr(targname, DISK_DIR)) == NULL) {
+			continue;
+		}
+
+		if (_map_to_effective_dev((char *)devname, (char *)&edevname)
+		    != 0) {
+			continue;
+		}
+
+		if (stat(edevname, &statb_edev) != 0) {
+			continue;
+		}
+
+		if (first_devid) {
+			if (fprintf(fptr, "md_xlate_ver=\"%s\";\n"
+				"md_xlate=%lu,%lu", VERSION,
+				statb_edev.st_rdev, statb_dev.st_rdev) < 0)
+				ret = RET_ERROR;
+			first_devid = 0;
+		}
+		if (fprintf(fptr, ",%lu,%lu", statb_edev.st_rdev,
+			statb_dev.st_rdev) < 0)
+			ret = RET_ERROR;
+	} /* end while */
+
+	if (!first_devid)
+		if (fprintf(fptr, ";\n") < 0)
+			ret = RET_ERROR;
+	(void) fclose(fptr);
+	(void) closedir(dirp);
+	return (ret);
+}
diff --git a/usr/src/lib/lvm/libsvm/common/metainterfaces.c b/usr/src/lib/lvm/libsvm/common/metainterfaces.c
new file mode 100644
index 0000000000..20746d4b58
--- /dev/null
+++ b/usr/src/lib/lvm/libsvm/common/metainterfaces.c
@@ -0,0 +1,490 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/vfstab.h>
+#include <meta.h>
+#include <libsvm.h>
+#include <svm.h>
+#include <sdssc.h>
+
+
+extern int mod_unload(char *modname);
+static int inited = 0;
+
+/*
+ * FUNCTION: init_metalib
+ *	initialize libmeta only once.
+ *
+ * RETURN VALUES:
+ *	0 - SUCCESS
+ *     -1 - FAIL
+ */
+
+static int
+init_metalib()
+{
+	int largc = 1;
+	char *largv = "libsvm";
+	md_error_t status = mdnullerror;
+
+	if (!inited) {
+		if (md_init_nosig(largc, &largv, 0, 1, &status) != 0 ||
+				meta_check_root(&status) != 0) {
+			return (-1);
+		}
+		inited = 1;
+	}
+	return (RET_SUCCESS);
+}
+
+/*
+ * FUNCTION: reset_metalib
+ *
+ * INPUT: ptr to md_error_t
+ */
+
+static void
+reset_metalib(md_error_t *ep)
+{
+	inited = 0;
+	(void) close_admin(ep);
+}
+
+/*
+ * FUNCTION: metahalt
+ *	halt the metadb
+ *
+ */
+
+static void
+metahalt()
+{
+	mdsetname_t	*sp;
+	md_error_t status = mdnullerror;
+
+	(void) init_metalib();
+	if ((sp = metasetname(MD_LOCAL_NAME, &status)) == NULL) {
+		return;
+	}
+	if (meta_lock(sp, TRUE, &status)) {
+		return;
+	}
+	if (metaioctl(MD_HALT, NULL, &status, NULL) != 0) {
+		debug_printf("metahalt(): errno %d\n",
+			status.info.md_error_info_t_u.sys_error.errnum);
+	}
+	(void) meta_unlock(sp, &status);
+	reset_metalib(&status);
+}
+
+/*
+ * FUNCTION: svm_stop
+ *	Halt the SDS/SVM configuration and unload md module.
+ *
+ * RETURN VALUES:
+ *	0 - SUCCESS
+ *	RET_ERROR
+ */
+
+#define	MAX_TIMEOUT 1800
+int
+svm_stop()
+{
+	int rval = RET_SUCCESS;
+	int timeval = 0;
+	int sleep_int = 5;
+
+	metahalt();
+
+	if ((rval = mod_unload(MD_MODULE)) != 0) {
+		timeval += sleep_int;
+		(void) sleep(sleep_int);
+		while (timeval < MAX_TIMEOUT) {
+			if ((rval = mod_unload(MD_MODULE)) == 0) {
+				debug_printf("svm_stop(): mod_unload succeeded."
+						" Time %d\n", timeval);
+
+				break;
+			}
+
+			debug_printf("svm_stop(): mod_unload failed. Trying "
+				"in  %d s (%d)\n", sleep_int, timeval);
+
+			timeval += sleep_int;
+			(void) sleep(sleep_int);
+			metahalt();
+		}
+
+		if (rval != 0) {
+			rval = RET_ERROR;
+			debug_printf("svm_stop(): mod_unload FAILED!\n");
+		}
+	}
+
+	return (rval);
+}
+
+/*
+ * FUNCTION: get_rootmetadevice
+ *	parses the vfstab to return the metadevice
+ *
+ * INPUT:
+ *	mount point
+ *	mdname	- pointer to string pointer that will contain the
+ *		  metadevice name. Caller must free the allocated space.
+ * RETURN VALUES:
+ *	mdname - md root device name
+ *	0 - SUCCESS
+ *	!0 - FAIL
+ *		> 0 errno
+ *		RET_ERROR
+ */
+
+int
+get_rootmetadevice(char *mntpath, char **mdname)
+{
+	struct	vfstab v;
+	FILE	*fp;
+	int	rval = RET_SUCCESS;
+	char	*cp;
+	char	vfstab_name[PATH_MAX + 1];
+
+	if (mdname == NULL)
+		return (EINVAL);
+
+	*mdname = NULL;
+
+	if (snprintf(vfstab_name, PATH_MAX + 1, "%s%s", mntpath, VFSTAB) < 0)
+		return (ENOMEM);
+
+	debug_printf("get_rootmetadevice(): mntpath %s %s\n", mntpath,
+		vfstab_name);
+
+	if ((fp = fopen(vfstab_name, "r")) == NULL) {
+		rval = errno;
+		return (rval);
+	}
+
+	if ((rval = getvfsfile(fp, &v, ROOT_MNTPT)) != 0) {
+		goto out;
+	}
+
+
+	debug_printf("get_rootmetadevice(): vfs_special %s\n", v.vfs_special);
+	if (strstr(v.vfs_special, ROOT_METADEVICE) == NULL) {
+		/* md device not found */
+		rval = RET_ERROR;
+		goto out;
+	}
+
+	/* found a match fill it and return */
+	cp = v.vfs_special + strlen(ROOT_METADEVICE);
+
+	*mdname = (char *)malloc(strlen(cp) + 1);
+
+	if (*mdname == NULL) {
+		rval = ENOMEM;
+		goto out;
+	}
+	(void) strcpy(*mdname, cp);
+	debug_printf("get_rootmetadevice(): *mdname %s rval %d\n",
+							*mdname, rval);
+out:
+	(void) fclose(fp);
+	return (rval);
+}
+
+/*
+ * FUNCTION: create_diskset_links
+ * 	Create the diskset name symlinks in /dev/md from the diskset
+ *	names found in the set records.  These are normally created
+ *	in rpc.metad when you create the set but those symlinks are
+ *	sitting out on the real system disk and we're running off the
+ *	devfs that got created when we booted off the install image.
+ */
+
+void
+create_diskset_links()
+{
+	int		max_sets;
+	int		i;
+	md_error_t	error = mdnullerror;
+
+	/*
+	 * Resolve the function pointers for libsds_sc so that we can
+	 * snarf the set records.
+	 */
+	(void) sdssc_bind_library();
+	(void) init_metalib();
+
+	if ((max_sets = get_max_sets(&error)) == 0) {
+		debug_printf("create_diskset_links(): get_max_sets failed\n");
+		mdclrerror(&error);
+		return;
+	}
+
+	for (i = 1; i < max_sets; i++) {
+		md_set_record	*sr;
+		char		setname[MAXPATHLEN];
+		char		setnum[MAXPATHLEN];
+
+		if ((sr = metad_getsetbynum(i, &error)) == NULL) {
+			mdclrerror(&error);
+			continue;
+		}
+
+		(void) snprintf(setname, MAXPATHLEN, "/dev/md/%s",
+		    sr->sr_setname);
+		(void) snprintf(setnum, MAXPATHLEN, "shared/%d", i);
+		/*
+		 * Ignore failures to create the symlink.  This could
+		 * happen because suninstall is restartable so the
+		 * symlink might have already been created.
+		 */
+		(void) symlink(setnum, setname);
+	}
+}
+
+/*
+ * FUNCTION: svm_alloc
+ * 	Return a pointer to an opaque piece of zeroed memory.
+ *
+ * RETURN VALUES:
+ *	Non null - SUCCESS
+ *	NULL - FAIL
+ */
+
+svm_info_t *
+svm_alloc()
+{
+	return ((svm_info_t *)calloc(1, sizeof (svm_info_t)));
+}
+
+/*
+ * FUNCTION: svm_free
+ *
+ * INPUT: pointer to struct svm_info
+ */
+
+void
+svm_free(svm_info_t *svmp)
+{
+	int i;
+
+	if (svmp == NULL)
+		return;
+
+	for (i = 0; i < svmp->count; i++) {
+		free(svmp->md_comps[i]);
+	}
+	free(svmp->root_md);
+	free(svmp);
+}
+
+/*
+ * FUNCTION: get_mdcomponents
+ *	Given "uname" metadevice, return the physical components
+ *      of that metadevice.
+ *
+ * INPUT:
+ *	uname - metadevice name
+ *
+ * RETURN VALUES:
+ *	svmp - structure containing md name and components
+ *	RET_SUCCESS
+ *	RET_ERROR
+ *
+ */
+
+int
+get_mdcomponents(char *uname, svm_info_t **svmpp)
+{
+
+	svm_info_t	*svmp;
+	md_error_t	status, *ep;
+	mdname_t	*namep;
+	mdnamelist_t	*nlp = NULL;
+	mdnamelist_t	*p;
+	mdsetname_t	*sp = NULL;
+	char		*strp = NULL;
+	int		rval, cnt;
+
+	rval = RET_SUCCESS;
+	cnt = 0;
+	status = mdnullerror;
+	ep = &status;
+	svmp = *svmpp;
+
+	(void) init_metalib();
+
+	debug_printf("get_mdcomponents(): Enter unit name %s\n", uname);
+
+	if (((namep = metaname(&sp, uname, ep)) == NULL) ||
+					(metachkmeta(namep, ep) != 0)) {
+		debug_printf("get_mdcomponents(): "
+				"metaname or metachkmeta failed\n");
+		mdclrerror(ep);
+		return (RET_ERROR);
+	}
+
+	debug_printf("get_mdcomponents(): meta_getdevs %s\n", namep->cname);
+
+	if ((meta_getdevs(sp, namep, &nlp, ep)) < 0) {
+		debug_printf("get_mdcomponents(): "
+				"comp %s - meta_getdevs failed\n", uname);
+		metafreenamelist(nlp);
+		mdclrerror(ep);
+		return (RET_ERROR);
+	}
+
+	/* compute the number of devices */
+
+	for (p = nlp, cnt = 0; p != NULL;  p = p->next, cnt++)
+		;
+
+	/*
+	 * Need to add n -1 components since slvmp already has space
+	 * for one device.
+	 */
+
+	svmp = (svm_info_t *)realloc(svmp, sizeof (svm_info_t) +
+		(sizeof (char *) * (cnt - 1)));
+
+	if (svmp == NULL) {
+		debug_printf("get_mdcomponents(): realloc of svmp failed\n");
+		metafreenamelist(nlp);
+		return (RET_ERROR);
+	}
+
+
+	for (p = nlp, cnt = 0; p != NULL; p = p->next, cnt++) {
+		mdname_t	*devnp = p->namep;
+
+		if ((strp = strdup(devnp->cname)) == NULL) {
+			rval = RET_ERROR;
+			break;
+		}
+		svmp->md_comps[cnt] = strp;
+	}
+
+	/* count is set to the number of devices in the list */
+
+	svmp->count = cnt;
+	svmp->root_md = strdup(uname);
+	if (rval == RET_SUCCESS && svmp->root_md != NULL) {
+		debug_printf("get_mdcomponents(): root_md %s count %d \n",
+			svmp->root_md, svmp->count);
+		for (cnt = 0; cnt < svmp->count; cnt++)
+			debug_printf("get_mdcomponents(): %s\n",
+							svmp->md_comps[cnt]);
+	} else {
+		rval = RET_ERROR;
+		svm_free(svmp);
+		svmp = NULL;
+		debug_printf("get_mdcomponents(): malloc failed\n");
+
+	}
+
+
+	metafreenamelist(nlp);
+	*svmpp = svmp;
+	return (rval);
+}
+
+
+/*
+ * FUNCTION: svm_get_components
+ *	return svm_infop with the components of a metadevice.
+ *
+ * INPUT:
+ *	md_device - eg. /dev/md/dsk/d10, /dev/md/foo/dsk/d10, or
+ *			/dev/md/shared/1/dsk/d10
+ *
+ * RETURN:
+ *	0 - SUCCESS
+ *     !0 - FAIL
+ */
+
+int
+svm_get_components(char *md_device, svm_info_t **svmpp)
+{
+	int	len;
+
+	/*
+	 * If this is a named diskset with a shared name
+	 * (e.g. /dev/md/shared/1/dsk/d10) call get_mdcomponents with
+	 * the diskset and metadevice name (e.g. foo/d10).
+	 * Otherwise this is a regular name (e.g. /dev/md/dsk/d10 or
+	 * /dev/md/foo/dsk/d10 or d10 or foo/d10) all of which
+	 * get_mdcomponents can handle directly.
+	 */
+
+	len = strlen("/dev/md/shared/");
+	if (strncmp(md_device, "/dev/md/shared/", len) == 0) {
+	    int		numlen;
+	    int		setnum;
+	    char	*cp;
+	    char	*slashp;
+	    char	mdname[MAXPATHLEN];
+	    mdsetname_t	*sp;
+	    md_error_t	error = mdnullerror;
+
+	    cp = md_device + len;
+
+	    if ((slashp = strstr(cp, "/")) == NULL)
+		return (RET_ERROR);
+	    numlen = slashp - cp;
+	    if (numlen >= MAXPATHLEN - 1)
+		return (RET_ERROR);
+
+	    (void) strlcpy(mdname, cp, numlen + 1);
+	    /* setnum now contains the diskset number */
+	    setnum = atoi(mdname);
+	    if ((sp = metasetnosetname(setnum, &error)) == NULL ||
+		!mdisok(&error))
+		return (RET_ERROR);
+
+	    cp = slashp + 1;
+	    /* cp now pointing at dsk/... */
+	    if ((slashp = strstr(cp, "/")) == NULL)
+		return (RET_ERROR);
+
+	    (void) snprintf(mdname, MAXPATHLEN, "%s/%s", sp->setname,
+		slashp + 1);
+	    /* mdname now contains diskset and metadevice name e.g. foo/d10 */
+
+	    debug_printf("svm_get_components(): mdname %s\n", mdname);
+	    return (get_mdcomponents(mdname, svmpp));
+
+	} else {
+	    debug_printf("svm_get_components(): md_device %s\n", md_device);
+	    return (get_mdcomponents(md_device, svmpp));
+	}
+}
diff --git a/usr/src/lib/lvm/libsvm/common/modops.c b/usr/src/lib/lvm/libsvm/common/modops.c
new file mode 100644
index 0000000000..78914a9069
--- /dev/null
+++ b/usr/src/lib/lvm/libsvm/common/modops.c
@@ -0,0 +1,120 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+#include <sys/errno.h>
+#include <sys/modctl.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <svm.h>
+
+/*
+ * FUNCTION: get modid
+ *	Given a module name returns module id.
+ *
+ * INPUT: module name
+ *
+ * RETURN VALUES:
+ *		> 0 SUCCESS
+ *              -1 FAIL
+ */
+
+static int
+get_modid(char *modname)
+{
+	struct modinfo modinfo;
+	int id;
+	int rval = RET_ERROR;
+
+	id = -1; /* look for all modules */
+
+	modinfo.mi_id = modinfo.mi_nextid = id;
+	modinfo.mi_info = MI_INFO_ALL | MI_INFO_NOBASE;
+
+	do {
+		if (modctl(MODINFO, id, &modinfo) < 0)
+			break;
+
+		modinfo.mi_name[MODMAXNAMELEN - 1] = '\0';
+		/* if we find a match break out */
+		if (strcmp(modinfo.mi_name, modname) == 0) {
+			rval = modinfo.mi_id;
+			break;
+		}
+	/* LINTED */
+	} while (1);
+
+	return (rval);
+}
+
+/*
+ * FUNCTION: mod_unload
+ *	unload a module.
+ *
+ * INPUT: module name
+ *
+ * RETURN VALUES:
+ *	0 - SUCCESS
+ *	!0 - FAIL
+ *		> 0 errno
+ *		-1
+ * NOTE: If we fail to get the module id because the module is not
+ * currently loaded we still want to try to force a reload of the
+ * .conf file when it does load.
+ */
+int
+mod_unload(char *modname)
+{
+	int id;
+	major_t major;
+	int	rval = RET_SUCCESS;
+
+	id = get_modid(modname);
+
+	if (id != -1) {
+		if (modctl(MODUNLOAD, id) < 0) {
+			rval = errno;
+		}
+	}
+
+	if ((modctl(MODGETMAJBIND, modname, strlen(modname) + 1,
+	    &major)) != 0) {
+		return (errno);
+	}
+
+	if ((modctl(MODUNLOADDRVCONF, major) != 0) ||
+	    (modctl(MODLOADDRVCONF, major) != 0)) {
+		return (errno);
+	}
+
+	return (rval);
+}
diff --git a/usr/src/lib/lvm/libsvm/common/start_svm.c b/usr/src/lib/lvm/libsvm/common/start_svm.c
new file mode 100644
index 0000000000..f423d4f418
--- /dev/null
+++ b/usr/src/lib/lvm/libsvm/common/start_svm.c
@@ -0,0 +1,284 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include <ctype.h>
+#include <malloc.h>
+#include <stdio.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/param.h>
+#include <limits.h>
+#include <meta.h>
+#include <svm.h>
+#include <libsvm.h>
+
+#define	MODEBITS	(S_ISUID|S_ISGID|S_ISVTX|S_IRWXU|S_IRWXG|S_IRWXO)
+#define	ISREG(A)	(((A).st_mode & S_IFMT) == S_IFREG)
+#define	DEFAULT_ROOTDIR "/a"
+
+
+/*
+ * FUNCTION: svm_start
+ *	starts SDS/SVM configuration. If root mirroring exists then the
+ *	components of the root mirror are returned in svmpp.
+ *
+ * INPUT: mntpnt - root mount point
+ *	  svmpp - prealloced structure to return components
+ *	  repl_state_flag - SVM_CONV/SVM_DONT_CONV
+ *
+ * RETURN:
+ *	  0 - SUCCESS
+ *	  !0 - ERROR
+ *	  if > 0 errno
+ */
+
+int
+svm_start(char *mntpnt, svm_info_t **svmpp, int repl_state_flag)
+{
+	char *rootdir, *tf;
+	char *mdevnamep = NULL;
+	char system_file[PATH_MAX];
+	char mdconf[PATH_MAX];
+	int rval = 0;
+
+	if (mntpnt == NULL)
+		rootdir = DEFAULT_ROOTDIR;
+	else
+		rootdir = mntpnt;
+
+	if ((rval = snprintf(system_file, PATH_MAX, "%s%s",
+					rootdir, SYSTEM_FILE)) < 0) {
+		return (RET_ERROR);
+	}
+
+	if ((rval = snprintf(mdconf, PATH_MAX, "%s%s",
+					rootdir, MD_CONF)) < 0) {
+		return (RET_ERROR);
+	}
+
+	debug_printf("svm_start(): repl_state_flag %s\n",
+		(repl_state_flag == SVM_DONT_CONV) ? "SVM_DONT_CONV":
+						"SVM_CONV");
+
+	if (copyfile(MD_CONF, MD_CONF_ORIG))
+		return (RET_ERROR);
+
+	switch (rval = convert_bootlist(system_file, mdconf, &tf)) {
+		case 0:
+		case -1:			/* found in etc/system flag */
+			break;
+		default: /* convert bootlist failed */
+			debug_printf("svm_start(): convert_bootlist failed."
+					"rval %d\n", rval);
+			goto errout;
+	}
+
+	if (repl_state_flag == SVM_DONT_CONV) {
+		rval = create_in_file_prop(PROP_KEEP_REPL_STATE, tf);
+		if (rval != 0)
+			goto errout;
+	}
+
+	if (is_upgrade_prop(PROP_DEVID_DESTROY)) {
+		rval = create_in_file_prop(PROP_DEVID_DESTROY, tf);
+		/*
+		 * For the idempotent behavior reset internal
+		 * flag incase we have to return due to errors
+		 */
+		set_upgrade_prop(PROP_DEVID_DESTROY, 0);
+		if (rval != 0)
+			goto errout;
+	}
+
+
+	/*
+	 * Since svm_start is called only after svm_check,
+	 * we can assume that there is a valid metadb. If the mddb_bootlist
+	 * is not found in etc/system, then it must be in md.conf which
+	 * we copied to temporary file pointed to by tf
+	 */
+	if (copyfile(tf, MD_CONF)) {
+		debug_printf("svm_start(): copy of %s to %s failed\n", tf,
+			MD_CONF);
+		goto errout;
+	}
+
+	if ((rval = write_xlate_to_mdconf(rootdir)) != 0) {
+		debug_printf("svm_start(): write_xlate_to_mdconf(%s) failed\n",
+				rootdir);
+		goto errout;
+	}
+
+	if ((rval = write_targ_nm_table(rootdir)) != 0) {
+		goto errout;
+	}
+
+	/* run devfsadm to create the devices specified in md.conf */
+	if ((rval = system("/usr/sbin/devfsadm -r /tmp -p "
+		"/tmp/root/etc/path_to_inst -i md")) != 0) {
+		debug_printf("svm_start(): devfsadm -i md failed: %d\n", rval);
+		goto errout;
+	}
+
+	/*
+	 * We have to unload md after the devfsadm run so that when metainit
+	 * loads things it gets the right information from md.conf.
+	 */
+	if (rval = svm_stop()) {
+		debug_printf("svm_start(): svm_stop failed.\n");
+		return (RET_ERROR);
+	}
+
+	if ((rval = system("/usr/sbin/metainit -r")) != 0) {
+		debug_printf("svm_start(): metainit -r failed: %d\n", rval);
+		goto errout;
+	}
+
+	create_diskset_links();
+
+	if ((rval = system("/usr/sbin/metasync -r")) != 0) {
+		debug_printf("svm_start(): metasync -r failed: %d\n", rval);
+		goto errout;
+	}
+
+	/*
+	 * We ignore failures from metadevadm, since it can fail if
+	 * miniroot dev_t's don't match target dev_ts. But it still
+	 * will update md.conf with device Id information which is
+	 * why we are calling it here.
+	 */
+
+	(void) system("/usr/sbin/metadevadm -r");
+
+	/*
+	 * check to see if we have a root metadevice and if so
+	 *  get its components.
+	 */
+
+	if ((rval = get_rootmetadevice(rootdir, &mdevnamep)) == 0) {
+		if (rval = get_mdcomponents(mdevnamep, svmpp)) {
+			debug_printf("svm_start(): get_mdcomponents(%s,..)"
+				"failed %d\n", mdevnamep, rval);
+			goto errout;
+		}
+
+	} else {
+		rval = 0; /* not a mirrored root */
+		debug_printf("svm_start(): get_rootmetadevice(%s,..) "
+			"No root mirrors! ", rootdir);
+	}
+errout:
+	free(mdevnamep);
+	if (rval != 0) {
+		struct stat sbuf;
+		if (stat(MD_CONF_ORIG, &sbuf) == 0)
+			(void) copyfile(MD_CONF_ORIG, MD_CONF);
+		debug_printf("svm_start(): svm_start failed: %d\n", rval);
+	} else {
+		int i;
+
+		if ((*svmpp)->count > 0) {
+			debug_printf("svmpp: ");
+			debug_printf("    root_md: %s", (*svmpp)->root_md);
+			debug_printf("    count: %d", (*svmpp)->count);
+			for (i = 0; i < (*svmpp)->count; i++) {
+				debug_printf("    md_comps[%d]: %s", i,
+				(*svmpp)->md_comps[i]);
+			}
+			debug_printf(" \n");
+		} else {
+			if ((*svmpp)->count == 0)
+				debug_printf("svm_start(): no mirrored root\n");
+		}
+		debug_printf("svm_start(): svm_start succeeded.\n");
+	}
+	return (rval);
+}
+
+/*
+ * FUNCTION: copyfile
+ *
+ * INPUT: self descriptive
+ *
+ * RETURN:
+ *	RET_SUCCESS
+ *	RET_ERROR
+ */
+int
+copyfile(char *from, char *to)
+{
+	int fromfd, tofd;
+	char buf[1024];
+	ssize_t	rbytes;
+	struct stat fromstat;
+
+	if ((fromfd = open(from, O_RDONLY | O_NDELAY)) < 0)
+		return (RET_ERROR);
+
+	if ((fstat(fromfd, &fromstat) < 0) || ! ISREG(fromstat)) {
+		(void) close(fromfd);
+		return (RET_ERROR);
+	}
+
+	if ((tofd = open(to, O_CREAT | O_WRONLY | O_TRUNC,
+		(fromstat.st_mode & MODEBITS))) < 0) {
+		(void) close(fromfd);
+		return (RET_ERROR);
+	}
+
+	/*
+	 * in case the file exists then perm is forced by this chmod
+	 */
+	(void) fchmod(tofd, fromstat.st_mode & MODEBITS);
+
+	for (;;) {
+		rbytes = read(fromfd, buf, sizeof (buf));
+		/*
+		 * no need to check for negative values since the file
+		 * has been successfully stat'ed
+		 */
+		if (rbytes == 0)
+			break;
+		if (write(tofd, buf, rbytes) != rbytes) {
+				rbytes = -1;
+				break;
+		}
+	}
+
+	(void) close(fromfd);
+	(void) close(tofd);
+	if (rbytes < 0) {
+		(void) unlink(to);
+		return (RET_ERROR);
+	}
+	return (RET_SUCCESS);
+}
diff --git a/usr/src/lib/lvm/libsvm/common/update_mdconf.c b/usr/src/lib/lvm/libsvm/common/update_mdconf.c
new file mode 100644
index 0000000000..f757648911
--- /dev/null
+++ b/usr/src/lib/lvm/libsvm/common/update_mdconf.c
@@ -0,0 +1,379 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <devid.h>
+#include <errno.h>
+#include <string.h>
+#include <assert.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <meta.h>
+#include <libsvm.h>
+#include <svm.h>
+
+/*
+ * magic strings in system
+ */
+#define	BEGMDDBSTR	"* Begin MDD database info (do not edit)\n"
+#define	ENDMDDBSTR	"* End MDD database info (do not edit)\n"
+#define	NEW_BEGMDDBSTR	"# Begin MDD database info (do not edit)\n"
+#define	NEW_ENDMDDBSTR	"# End MDD database info (do not edit)\n"
+
+#define	MDDBBOOTLIST	"mddb_bootlist"
+
+#define	SYS_COMMENTCHAR	'*'
+#define	CONF_COMMENTCHAR '#'
+
+typedef struct {
+	char *prop_name;
+	int  prop_val;
+} md_prop_t;
+
+typedef enum {
+	MDDB_SYS_FILE,
+	MDDB_MDCONF_FILE
+} ftype_t;
+
+static md_prop_t upgrade_props[] = {
+		{ PROP_KEEP_REPL_STATE, 0 },
+		{ PROP_DEVID_DESTROY, 0},
+		{ NULL, 0}
+};
+
+/*
+ * The following functions manage upgrade properties
+ */
+
+void
+set_upgrade_prop(char *prop_name, int val)
+{
+	md_prop_t *upp;
+
+	upp = &upgrade_props[0];
+
+	for (; upp->prop_name != NULL; upp++) {
+		if (strcmp(upp->prop_name, prop_name) == 0) {
+			upp->prop_val = val;
+			return;
+		}
+	}
+}
+
+int
+is_upgrade_prop(char *prop_name)
+{
+	md_prop_t *upp;
+
+	upp = &upgrade_props[0];
+
+	for (; upp->prop_name != NULL; upp++) {
+		if (strcmp(upp->prop_name, prop_name) == 0) {
+			return (upp->prop_val == 1);
+		}
+	}
+	return (0);
+}
+
+int
+create_in_file_prop(char *prop_name, char *fname)
+{
+	FILE *fp;
+	md_prop_t *upp;
+	int rval = RET_ERROR;
+
+	if ((fp = fopen(fname, "a")) == NULL) {
+		return (errno);
+	}
+
+	upp = &upgrade_props[0];
+
+	for (; upp->prop_name != NULL; upp++) {
+		if (strcmp(upp->prop_name, prop_name) == 0) {
+			(void) fprintf(fp, "%s = 1;\n", upp->prop_name);
+			rval = RET_SUCCESS;
+			break;
+		}
+	}
+	(void) fclose(fp);
+	return (rval);
+}
+
+static int
+is_devid_added(char *str)
+{
+	int cnt = 0;
+	char *cp;
+
+	/* there are exactly 3 colons in the string for devid */
+	for (cnt = 0; cnt < 4; cnt++) {
+		if ((cp = strchr(str, ':')) == NULL)
+			break;
+		str = ++cp;
+	}
+	return (cnt == 3);
+}
+
+/*
+ * FUNCTION: parse_bootlist
+ *	Parse the bootlist and add the extra field to mddb_boolist entry to
+ *	conform to devid changes.
+ *
+ * Old format: <drivername>:<minor_number>:<offset>
+ * New format: <drivername>:<minor_number>:<offset>:<devid>
+ * Devid of id0 implies no device id.
+ *
+ * INPUT: *line - contains the mddb_bootlist
+ *	  *tfp - File pointer to the md.conf.tmp file.
+ *
+ * RETURN:
+ *	  0	- Success
+ *	  > 0	- Failure. Errno returned
+ */
+
+static int
+parse_bootlist(char *line, FILE *tfp)
+{
+	char output[1024];
+	char *cp;
+	int retval = RET_SUCCESS;
+
+	(void) memset(output, 0, sizeof (output));
+
+	if (line[0] == SYS_COMMENTCHAR) {
+		output[0] = CONF_COMMENTCHAR;
+	}
+	/* move the line start of mddbbootlist */
+	cp = strstr(line, MDDBBOOTLIST);
+	if (cp != NULL)
+		line = cp;
+
+	/* grab the "mddb_boolist" word */
+	cp = strtok(line, "= ");
+	(void) strcat(output, cp);
+	(void) strcat(output, "=\042"); /* add back the EQUAL and QUOTE chars */
+
+	/*
+	 * The line passed in is for example,
+	 * mddb_bootlist1="sd:7:16:id1,sd@SIBM_DDRS34560SUN4.2G2N9688_____/h";
+	 * At this point mddb_bootlist and "=" have been parsed out.
+	 * The remaining string consists of driver name, colon separator and
+	 * the device id(if it exists) within quotes.
+	 * The deviceid string can contain upper and lower letters, digits
+	 * and +-.=_~. Quotes, spaces and \n and \t are not
+	 * allowed. They are converted to either _ or their ascii value.
+	 * So using space,\n,;and quotes as a separator is safe.
+	 */
+
+	while ((cp = strtok(NULL, " \n\042;")) != NULL) {
+		(void) strcat(output, cp);
+		if (!is_devid_added(cp)) {
+			/* append :id0 for devid */
+			(void) strcat(strcat(output, ":"),
+						devid_str_encode(NULL, NULL));
+
+			/* no devid => SDS->SLVM migration. Set the flag */
+			set_upgrade_prop(PROP_DEVID_DESTROY, 1);
+		}
+		(void) strcat(output, " "); /* leave space between entries */
+	}
+
+	/* remove the extra space at the end */
+	output[strlen(output) - 1] = 0;
+	(void) strcat(output, "\042;\n");
+	if (fprintf(tfp, "%s", output) < 0) {
+		retval = errno;
+	}
+	return (retval);
+}
+
+/*
+ * FUNCTION: snarf_n_modify_bootlist
+ *  This function stuffs the mddb_bootlist from either etc/system
+ * or kernel/drv/md.conf of the target system into a temporary file tname.
+ * The boolist in the temporary file is in device ID format.
+ *
+ * INPUT: *fp - file pointer that contains the mddb_bootlist.
+ *	  *tname - file into which the modified bootlist will be written to.
+ *	  * buf - buffer handed by upper level routine for reading in contents.
+ *	  * bufsiz - size of the buffer.
+ *	  mddb_file - flag
+ *
+ * RETURN:
+ *	0	- Success
+ *	> 0	- Failure. Errno returned.
+ */
+
+static int
+snarf_n_modify_bootlist(
+	FILE *fp,	/* File pointer to snarf from */
+	char *tname,	/* name of the temporary file */
+	char *buf,	/* Buffer to read into */
+	int bufsz,	/* buffer size */
+	ftype_t mddb_file /* flag to indicate if its /etc/system or md.conf */
+)
+{
+	FILE *tfp;
+	int rval = RET_SUCCESS;
+	char *fname = SYSTEM_FILE;
+	char *mddb_start = BEGMDDBSTR;
+	char *mddb_end = ENDMDDBSTR;
+	convflag_t cstatus = MD_STR_NOTFOUND;
+
+	if (mddb_file == MDDB_MDCONF_FILE) {
+		fname = MD_CONF;
+		mddb_start = NEW_BEGMDDBSTR;
+		mddb_end = NEW_ENDMDDBSTR;
+	}
+
+	if ((tfp = fopen(tname, "a")) == NULL)
+		return (errno);
+	debug_printf("Convert from %s\n", fname);
+
+	rewind(fp);
+	while (fgets(buf, bufsz, fp) != NULL) {
+		if (strcmp(buf, mddb_start) == 0) {
+			cstatus = MD_STR_START;
+			if (fprintf(tfp, "%s", NEW_BEGMDDBSTR) < 0) {
+				rval = errno;
+				break;
+			}
+			continue;
+		}
+		if (cstatus == MD_STR_START) {
+			if (strcmp(buf, mddb_end) == 0) {
+				cstatus = MD_STR_DONE;
+				if (fprintf(tfp, "%s", NEW_ENDMDDBSTR) < 0) {
+					rval = errno;
+					break;
+				}
+
+				if (mddb_file == MDDB_MDCONF_FILE)
+					continue;
+				else
+					break;
+			}
+
+			rval = parse_bootlist(buf, tfp);
+			if (rval == RET_SUCCESS)
+				continue;
+			else
+				break;
+		}
+		if (mddb_file == MDDB_MDCONF_FILE) {
+			if (fprintf(tfp, "%s\n", buf) < 0) {
+				rval = errno;
+				break;
+			}
+		}
+
+	} /* while (fgets */
+
+	if (cstatus == MD_STR_NOTFOUND || cstatus == MD_STR_START)
+		rval = RET_ERROR;
+	(void) fclose(tfp);
+	return (rval);
+}
+
+
+/*
+ * FUNCTION: convert_bootlist
+ * Get the bootlist from $ROOT/etc/system and add modified bootlist to
+ * md.conf.
+ * The function converts the mddb_boolist format from that in /etc/system
+ * to md.conf. Also new fields are added to handle the devid id format.
+ * A copy of md.conf is created and the new entries are added to it.
+ * The name of the new file is returned to the calling program.
+ *
+ * Input: system file name
+ *	  md.conf file name
+ *	  pointer to  temp file name.
+ * RETURN:
+ *	 *tname - name of the file that has md.conf + new mddb_boolist entries
+ *	 0	- success
+ *	 -1	- mddb_bootlist not found
+ *	 > 0	- errno
+ *
+ */
+
+int
+convert_bootlist(
+	char 	*sname, /* system file name */
+	char	*mdconf, /* md.conf file name */
+	char 	**tname /* temp file name */
+)
+{
+	FILE	*fp;
+	char	cmd_buf[MDDB_BOOTLIST_MAX_LEN];
+	int	retval = RET_SUCCESS;
+
+	/* check names */
+	assert(sname != NULL);
+	assert(tname != NULL);
+
+	/* get temp name */
+	*tname = tmpnam(NULL);
+
+	if ((fp = fopen(sname, "r")) == NULL) {
+		retval = errno;
+		goto out;
+	}
+	if (valid_bootlist(fp, MDDB_BOOTLIST_MAX_LEN) == RET_SUCCESS) {
+		if ((retval = copyfile(mdconf, *tname)) == RET_ERROR) {
+			debug_printf("convert_bootlist: copy %s %s failed\n",
+				mdconf, *tname);
+			goto out;
+		}
+		retval = snarf_n_modify_bootlist(fp, *tname, cmd_buf,
+				MDDB_BOOTLIST_MAX_LEN, MDDB_SYS_FILE);
+	} else {
+		(void) fclose(fp); /* close system file */
+		if ((fp = fopen(mdconf, "r")) == NULL) {
+			retval = errno;
+			goto out;
+		}
+		if (valid_bootlist(fp, MDDB_BOOTLIST_MAX_LEN) == RET_ERROR) {
+			retval = RET_ERROR;
+			goto out;
+		}
+		retval = snarf_n_modify_bootlist(fp, *tname, cmd_buf,
+			MDDB_BOOTLIST_MAX_LEN, MDDB_MDCONF_FILE);
+	}
+out:
+	debug_printf("convert_bootlist: retval %d\n", retval);
+	if (fp != NULL)
+		(void) fclose(fp);
+
+	if ((retval != RET_SUCCESS) && (*tname != NULL)) {
+		(void) unlink(*tname);
+		free(*tname);
+	}
+	return (retval);
+}
diff --git a/usr/src/lib/lvm/libsvm/i386/Makefile b/usr/src/lib/lvm/libsvm/i386/Makefile
new file mode 100644
index 0000000000..4fc9526b2a
--- /dev/null
+++ b/usr/src/lib/lvm/libsvm/i386/Makefile
@@ -0,0 +1,31 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License").  You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright (c) 2001 by Sun Microsystems, Inc.
+# All rights reserved.
+#
+# ident	"%Z%%M%	%I%	%E% SMI"
+#
+
+include ../Makefile.com
+
+install debug: $(ROOTLIBDIR) $(ROOTLIBS) $(ROOTLINKS)
diff --git a/usr/src/lib/lvm/libsvm/sparc/Makefile b/usr/src/lib/lvm/libsvm/sparc/Makefile
new file mode 100644
index 0000000000..4fc9526b2a
--- /dev/null
+++ b/usr/src/lib/lvm/libsvm/sparc/Makefile
@@ -0,0 +1,31 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License").  You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright (c) 2001 by Sun Microsystems, Inc.
+# All rights reserved.
+#
+# ident	"%Z%%M%	%I%	%E% SMI"
+#
+
+include ../Makefile.com
+
+install debug: $(ROOTLIBDIR) $(ROOTLIBS) $(ROOTLINKS)
diff --git a/usr/src/lib/lvm/libsvm/spec/Makefile b/usr/src/lib/lvm/libsvm/spec/Makefile
new file mode 100644
index 0000000000..7256a09a20
--- /dev/null
+++ b/usr/src/lib/lvm/libsvm/spec/Makefile
@@ -0,0 +1,28 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License").  You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+#
+# Copyright (c) 2001 by Sun Microsystems, Inc.
+# All rights reserved.
+#
+
+include	$(SRC)/lib/Makefile.spec.arch
diff --git a/usr/src/lib/lvm/libsvm/spec/Makefile.targ b/usr/src/lib/lvm/libsvm/spec/Makefile.targ
new file mode 100644
index 0000000000..3a5ed0eb26
--- /dev/null
+++ b/usr/src/lib/lvm/libsvm/spec/Makefile.targ
@@ -0,0 +1,41 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License").  You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+#
+# Copyright (c) 2001 by Sun Microsystems, Inc.
+# All rights reserved.
+#
+
+LIBRARY	=	libsvm.a
+VERS	=	.1
+
+OBJECTS	=	svm.o
+
+TRANSCPP =
+
+SPECCPP =	-I.. -I../../inc
+
+#
+# usr/snadm/lib/abi targets
+#
+SVMLIB_ABILIB= $(SNADMINLIB_ABI)/$(ABILIB)
+SNADMINLIB_ABI=$(ROOT)/usr/snadm/lib/abi
diff --git a/usr/src/lib/lvm/libsvm/spec/amd64/Makefile b/usr/src/lib/lvm/libsvm/spec/amd64/Makefile
new file mode 100644
index 0000000000..a3a067f7a0
--- /dev/null
+++ b/usr/src/lib/lvm/libsvm/spec/amd64/Makefile
@@ -0,0 +1,50 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License").  You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+# ident	"%Z%%M%	%I%	%E% SMI"
+#
+
+.KEEP_STATE:
+
+# To enable apptrace, comment out the following line
+DISABLE_APPTRACE=   $(POUND_SIGN)
+
+include	../Makefile.targ
+
+# Add arch specific objects here
+OBJECTS	+=
+
+include	$(SRC)/lib/Makefile.lib
+
+# Uncomment the following if the linker complains
+#amd64_C_PICFLAGS = $(amd64_C_BIGPICFLAGS)
+
+include	$(SRC)/lib/Makefile.spec
+
+$(DISABLE_APPTRACE)install: $(SPECMAP) $(ABILIB)
+$(DISABLE_APPTRACE)	$(INS) -s -d -m $(DIRMODE) -u $(OWNER) \
+$(DISABLE_APPTRACE)		-g $(GROUP) $(SNADMINLIB_ABI)/amd64
+$(DISABLE_APPTRACE)	$(INS) -s -m $(FILEMODE) -u $(OWNER) \
+$(DISABLE_APPTRACE)		-g $(GROUP) -f $(SNADMINLIB_ABI)/amd64 $(ABILIB)
diff --git a/usr/src/lib/lvm/libsvm/spec/i386/Makefile b/usr/src/lib/lvm/libsvm/spec/i386/Makefile
new file mode 100644
index 0000000000..23807a39c5
--- /dev/null
+++ b/usr/src/lib/lvm/libsvm/spec/i386/Makefile
@@ -0,0 +1,53 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License").  You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+#ident	"%Z%%M%	%I%	%E% SMI"
+#
+# Copyright 2001-2003 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+
+.KEEP_STATE:
+
+# To enable apptrace, comment out the following line
+DISABLE_APPTRACE=   $(POUND_SIGN)
+
+include	../Makefile.targ
+
+# Add arch specific objects here
+OBJECTS	+=
+
+include	$(SRC)/lib/Makefile.lib
+
+# Uncomment the following if the linker complains
+#i386_C_PICFLAGS  = -K PIC
+
+include	$(SRC)/lib/Makefile.spec
+
+ROOTABILIB=	$(SNADMINLIB_ABI)/$(ABILIB)
+
+$(ROOTABILIB):	$(SNADMINLIB_ABI) $(SPECMAP)
+
+$(ROOTABILIB):	$(ABILIB)
+	$(INS.file)
+
+$(DISABLE_APPTRACE)install: $(ROOTABILIB)
diff --git a/usr/src/lib/lvm/libsvm/spec/sparc/Makefile b/usr/src/lib/lvm/libsvm/spec/sparc/Makefile
new file mode 100644
index 0000000000..19aecc452c
--- /dev/null
+++ b/usr/src/lib/lvm/libsvm/spec/sparc/Makefile
@@ -0,0 +1,50 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License").  You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+#
+# Copyright (c) 2001 by Sun Microsystems, Inc.
+# All rights reserved.
+#
+
+.KEEP_STATE:
+
+# To enable apptrace, comment out the following line
+DISABLE_APPTRACE=   $(POUND_SIGN)
+
+include	../Makefile.targ
+
+# Add arch specific objects here
+OBJECTS	+=
+
+include	$(SRC)/lib/Makefile.lib
+
+# Uncomment the following if the linker complains
+#sparc_C_PICFLAGS  = -K PIC
+
+include	$(SRC)/lib/Makefile.spec
+
+$(DISABLE_APPTRACE)install: $(SPECMAP) $(ABILIB)
+$(DISABLE_APPTRACE)	$(INS) -s -d -m $(DIRMODE) -u $(OWNER) \
+$(DISABLE_APPTRACE)		-g $(GROUP) $(SNADMINLIB_ABI)
+$(DISABLE_APPTRACE)	$(INS) -s -m $(FILEMODE) -u $(OWNER) \
+$(DISABLE_APPTRACE)		-g $(GROUP) -f $(SNADMINLIB_ABI) $(ABILIB)
diff --git a/usr/src/lib/lvm/libsvm/spec/sparcv9/Makefile b/usr/src/lib/lvm/libsvm/spec/sparcv9/Makefile
new file mode 100644
index 0000000000..704f35f988
--- /dev/null
+++ b/usr/src/lib/lvm/libsvm/spec/sparcv9/Makefile
@@ -0,0 +1,50 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License").  You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+#
+# Copyright (c) 2001 by Sun Microsystems, Inc.
+# All rights reserved.
+#
+
+.KEEP_STATE:
+
+# To enable apptrace, comment out the following line
+DISABLE_APPTRACE=   $(POUND_SIGN)
+
+include	../Makefile.targ
+
+# Add arch specific objects here
+OBJECTS	+=
+
+include	$(SRC)/lib/Makefile.lib
+
+# Uncomment the following if the linker complains
+#sparc_C_PICFLAGS  = -K PIC
+
+include	$(SRC)/lib/Makefile.spec
+
+$(DISABLE_APPTRACE)install: $(SPECMAP) $(ABILIB)
+$(DISABLE_APPTRACE)	$(INS) -s -d -m $(DIRMODE) -u $(OWNER) \
+$(DISABLE_APPTRACE)		-g $(GROUP) $(SNADMINLIB_ABI)/sparcv9
+$(DISABLE_APPTRACE)	$(INS) -s -m $(FILEMODE) -u $(OWNER) \
+$(DISABLE_APPTRACE)		-g $(GROUP) -f $(SNADMINLIB_ABI)/sparcv9 $(ABILIB)
diff --git a/usr/src/lib/lvm/libsvm/spec/svm.spec b/usr/src/lib/lvm/libsvm/spec/svm.spec
new file mode 100644
index 0000000000..ae381eaae5
--- /dev/null
+++ b/usr/src/lib/lvm/libsvm/spec/svm.spec
@@ -0,0 +1,55 @@
+#
+# Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License").  You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+#
+
+function	svm_check
+version		SUNWprivate_1.1
+end
+
+function	svm_start
+version		SUNWprivate_1.1
+end
+
+function	svm_stop
+version		SUNWprivate_1.1
+end
+
+function	svm_is_md
+version		SUNWprivate_1.1
+end
+
+function	svm_get_components
+version		SUNWprivate_1.1
+end
+
+function	svm_alloc
+version		SUNWprivate_1.1
+end
+
+function	svm_free
+version		SUNWprivate_1.1
+end
+
diff --git a/usr/src/lib/lvm/libsvm/spec/versions b/usr/src/lib/lvm/libsvm/spec/versions
new file mode 100644
index 0000000000..523cb927d5
--- /dev/null
+++ b/usr/src/lib/lvm/libsvm/spec/versions
@@ -0,0 +1,31 @@
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+# Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License").  You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+i386 {
+	SUNWprivate_1.1;
+}
+sparc {
+	SUNWprivate_1.1;
+}
author	stevel@tonic-gate <none@none>	2005-06-14 00:00:00 -0700
committer	stevel@tonic-gate <none@none>	2005-06-14 00:00:00 -0700
commit	7c478bd95313f5f23a4c958a745db2134aa03244 (patch)
tree	c871e58545497667cbb4b0a4f2daf204743e1fe7 /usr/src/lib/lvm
download	illumos-joyent-7c478bd95313f5f23a4c958a745db2134aa03244.tar.gz