summaryrefslogtreecommitdiff
path: root/usr
diff options
context:
space:
mode:
authorjeanm <none@none>2006-05-06 10:47:51 -0700
committerjeanm <none@none>2006-05-06 10:47:51 -0700
commitda83352438a4a62b87fcb6fd1583e3a70aa31bb8 (patch)
tree2ed9be63db473a2144198071f61b16c67f50df86 /usr
parent1e3549a6454dbbb2d27b0f1fdb707b1d24b7141b (diff)
downloadillumos-gate-da83352438a4a62b87fcb6fd1583e3a70aa31bb8.tar.gz
4964366 metaimport should handle partial disksets
Diffstat (limited to 'usr')
-rw-r--r--usr/src/cmd/lvm/rpc.metad/metad_svc_subr.c196
-rw-r--r--usr/src/cmd/lvm/util/metaimport.c525
-rw-r--r--usr/src/cmd/lvm/util/metaset.c26
-rw-r--r--usr/src/head/meta.h142
-rw-r--r--usr/src/head/metad.x5
-rw-r--r--usr/src/lib/lvm/libmeta/common/meta_devadm.c9
-rw-r--r--usr/src/lib/lvm/libmeta/common/meta_error.c7
-rw-r--r--usr/src/lib/lvm/libmeta/common/meta_hotspares.c6
-rw-r--r--usr/src/lib/lvm/libmeta/common/meta_import.c1857
-rw-r--r--usr/src/lib/lvm/libmeta/common/meta_metad.c95
-rw-r--r--usr/src/lib/lvm/libmeta/common/meta_mn_handlers.c2
-rw-r--r--usr/src/lib/lvm/libmeta/common/meta_name.c284
-rw-r--r--usr/src/lib/lvm/libmeta/common/meta_namespace.c28
-rw-r--r--usr/src/lib/lvm/libmeta/common/meta_set.c221
-rw-r--r--usr/src/lib/lvm/libmeta/common/meta_set_drv.c261
-rw-r--r--usr/src/lib/lvm/libmeta/common/meta_set_hst.c11
-rw-r--r--usr/src/lib/lvm/libmeta/common/meta_set_prv.c67
-rw-r--r--usr/src/lib/lvm/libmeta/common/meta_set_tkr.c258
-rw-r--r--usr/src/lib/lvm/libmeta/common/metad_svc_stubs.c31
-rw-r--r--usr/src/lib/lvm/libmeta/spec/meta.spec36
-rw-r--r--usr/src/uts/common/io/lvm/md/md_ioctl.c72
-rw-r--r--usr/src/uts/common/io/lvm/md/md_mddb.c501
-rw-r--r--usr/src/uts/common/io/lvm/md/md_names.c87
-rw-r--r--usr/src/uts/common/sys/lvm/md_mddb.h1
-rw-r--r--usr/src/uts/common/sys/lvm/mdio.h8
-rw-r--r--usr/src/uts/common/sys/lvm/mdiox.x29
-rw-r--r--usr/src/uts/common/sys/lvm/mdvar.h3
27 files changed, 3801 insertions, 967 deletions
diff --git a/usr/src/cmd/lvm/rpc.metad/metad_svc_subr.c b/usr/src/cmd/lvm/rpc.metad/metad_svc_subr.c
index 92f6d1c490..0d92faf3e2 100644
--- a/usr/src/cmd/lvm/rpc.metad/metad_svc_subr.c
+++ b/usr/src/cmd/lvm/rpc.metad/metad_svc_subr.c
@@ -132,11 +132,13 @@ add_sideno_sidenm(
*/
if (MD_MNSET_DESC(sd)) {
if (add_name(local_sp, sideno, local_key,
- sn->dname, sn->mnum, sn->cname, ep) == -1)
+ sn->dname, sn->mnum, sn->cname, NULL, NULL,
+ ep) == -1)
return (-1);
} else {
if (add_name(local_sp, sideno+SKEW, local_key,
- sn->dname, sn->mnum, sn->cname, ep) == -1)
+ sn->dname, sn->mnum, sn->cname, NULL, NULL,
+ ep) == -1)
return (-1);
}
} else
@@ -597,7 +599,8 @@ add_sidenamelist(
*/
if (nodeid == sn->sideno) {
if ((err = add_name(local_sp, sn->sideno, key,
- sn->dname, sn->mnum, sn->cname, ep)) == -1)
+ sn->dname, sn->mnum, sn->cname,
+ NULL, NULL, ep)) == -1)
return (-1);
key = (mdkey_t)err;
break;
@@ -620,7 +623,8 @@ add_sidenamelist(
if (sn->sideno != thisside)
continue;
if ((err = add_name(local_sp, sn->sideno+SKEW, key,
- sn->dname, sn->mnum, sn->cname, ep)) == -1)
+ sn->dname, sn->mnum, sn->cname, NULL,
+ NULL, ep)) == -1)
return (-1);
key = (mdkey_t)err;
break;
@@ -635,7 +639,8 @@ add_sidenamelist(
if (sn->sideno == thisside)
continue;
if ((err = add_name(local_sp, sn->sideno+SKEW, key,
- sn->dname, sn->mnum, sn->cname, ep)) == -1)
+ sn->dname, sn->mnum, sn->cname, NULL, NULL,
+ ep)) == -1)
return (-1);
key = (mdkey_t)err;
}
@@ -647,7 +652,8 @@ add_sidenamelist(
sn = dn->side_names;
if (sn) {
if ((err = add_name(local_sp, sn->sideno, key,
- sn->dname, sn->mnum, sn->cname, ep)) == -1)
+ sn->dname, sn->mnum, sn->cname,
+ NULL, NULL, ep)) == -1)
return (-1);
key = (mdkey_t)err;
}
@@ -658,6 +664,139 @@ add_sidenamelist(
return (0);
}
+/*
+ * imp_adddrvs
+ * This is a version of adddrvs that is specific to the
+ * metaimport command. Due to the unavailability of some disks,
+ * information needs to be obtained about the disk from the devid so
+ * it can eventually be passed down to add_sidenamelist.
+ * Go ahead and set drive state to MD_DR_OK here so that no
+ * later RPC is needed to set OK where UNRLSV_REPLICATED could
+ * be cleared. Set record is still set to MD_SR_ADD which will force
+ * a cleanup of the set in case of panic.
+ */
+void
+imp_adddrvs(
+ char *setname,
+ md_drive_desc *dd,
+ md_timeval32_t timestamp,
+ ulong_t genid,
+ md_error_t *ep
+)
+{
+ mddb_userreq_t req;
+ md_drive_record *dr, *tdr;
+ md_set_record *sr;
+ md_drive_desc *p;
+ mddrivename_t *dn;
+ mdname_t *np;
+ md_dev64_t dev;
+ md_error_t xep = mdnullerror;
+ char *minorname = NULL;
+ ddi_devid_t devidp = NULL;
+ mdsidenames_t *sn;
+ mdsetname_t *local_sp;
+
+
+ if ((local_sp = metasetname(MD_LOCAL_NAME, ep)) == NULL) {
+ return;
+ }
+
+ if ((sr = getsetbyname(setname, ep)) == NULL)
+ return;
+
+ for (p = dd; p != NULL; p = p->dd_next) {
+ uint_t rep_slice;
+ int ret = 0;
+
+ dn = p->dd_dnp;
+
+ /*
+ * We need the minorname and devid string decoded from the
+ * devid to add the sidename for this drive to the
+ * local set.
+ */
+ ret = devid_str_decode(dn->devid, &devidp, &minorname);
+ if (ret != 0) {
+ /* failed to decode the devid */
+ goto out;
+ }
+
+ sn = dn->side_names;
+ if (sn == NULL) {
+ dn->side_names_key = MD_KEYWILD;
+ continue;
+ }
+
+ if ((dn->side_names_key = add_name(local_sp, SKEW, MD_KEYWILD,
+ sn->dname, sn->mnum, sn->cname, minorname, devidp,
+ ep)) == -1) {
+ devid_free(devidp);
+ devid_str_free(minorname);
+ goto out;
+ }
+
+ devid_free(devidp);
+ devid_str_free(minorname);
+
+ /* Create the drive record */
+ (void) memset(&req, 0, sizeof (req));
+ METAD_SETUP_DR(MD_DB_CREATE, 0);
+ req.ur_size = sizeof (*dr);
+ if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde, NULL) != 0) {
+ (void) mdstealerror(ep, &req.ur_mde);
+ goto out;
+ }
+
+ /* Fill in the drive record values */
+ dr = Zalloc(sizeof (*dr));
+ dr->dr_selfid = req.ur_recid;
+ dr->dr_dbcnt = p->dd_dbcnt;
+ dr->dr_dbsize = p->dd_dbsize;
+ dr->dr_key = dn->side_names_key;
+
+ dr->dr_ctime = timestamp;
+ dr->dr_genid = genid;
+ dr->dr_revision = MD_DRIVE_RECORD_REVISION;
+ dr->dr_flags = MD_DR_OK;
+ if (p->dd_flags & MD_DR_UNRSLV_REPLICATED) {
+ dr->dr_flags |= MD_DR_UNRSLV_REPLICATED;
+ sr->sr_flags |= MD_SR_UNRSLV_REPLICATED;
+ }
+
+ /* Link the drive records and fill in in-core data */
+ dr_cache_add(sr, dr);
+
+ dev = NODEV64;
+ if ((meta_replicaslice(dn, &rep_slice, &xep) == 0) &&
+ ((np = metaslicename(dn, rep_slice, &xep)) != NULL))
+ dev = np->dev;
+ else
+ mdclrerror(&xep);
+
+ SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_REMOVE, SVM_TAG_DRIVE,
+ MD_LOCAL_SET, dev);
+ SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_ADD, SVM_TAG_DRIVE,
+ sr->sr_setno, dev);
+ }
+
+ /* Commit all the records atomically */
+ commitset(sr, TRUE, ep);
+ free_sr(sr);
+ return;
+
+out:
+ /* If failures, remove drive records. */
+ dr = tdr = sr->sr_drivechain;
+ while (dr != NULL) {
+ tdr = dr->dr_next;
+ if (del_name(local_sp, 0, dr->dr_key, &xep))
+ mdclrerror(&xep);
+ sr_del_drv(sr, dr->dr_selfid);
+ dr = tdr;
+ }
+}
+
static void
adddrvs(
char *setname,
@@ -836,6 +975,51 @@ mdrpc_adddrvs_2_svc(
}
}
+/*
+ * add 1 or more drive records to a set when importing.
+ */
+bool_t
+mdrpc_imp_adddrvs_2_svc(
+ mdrpc_drives_2_args *args,
+ mdrpc_generic_res *res,
+ struct svc_req *rqstp /* RPC stuff */
+)
+{
+ mdrpc_drives_2_args_r1 *v2_args;
+ md_error_t *ep = &res->status;
+ int err;
+ int op_mode = W_OK;
+
+ switch (args->rev) {
+ case MD_METAD_ARGS_REV_1:
+ v2_args = &args->mdrpc_drives_2_args_u.rev1;
+ if (v2_args == NULL) {
+ return (FALSE);
+ }
+ break;
+ default:
+ return (FALSE);
+ }
+
+ /* setup, check permissions */
+ (void) memset(res, 0, sizeof (*res));
+ if ((err = svc_init(rqstp, op_mode, ep)) < 0)
+ return (FALSE);
+ else if (err != 0)
+ return (TRUE);
+
+ if (check_set_lock(op_mode, v2_args->cl_sk, ep))
+ return (TRUE);
+
+ /* doit */
+ imp_adddrvs(v2_args->sp->setname, v2_args->drivedescs,
+ v2_args->timestamp, v2_args->genid, ep);
+
+ err = svc_fini(ep);
+
+ return (TRUE);
+}
+
static void
addhosts(
char *setname,
diff --git a/usr/src/cmd/lvm/util/metaimport.c b/usr/src/cmd/lvm/util/metaimport.c
index 0ec9adfeb1..231be20eed 100644
--- a/usr/src/cmd/lvm/util/metaimport.c
+++ b/usr/src/cmd/lvm/util/metaimport.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -39,7 +38,7 @@
#include <sys/lvm/md_names.h>
#include <sdssc.h>
-static md_im_drive_info_t *overlap_disks = NULL;
+static md_im_drive_info_t *overlap_disks;
static void
usage(mdsetname_t *sp, char *string)
@@ -79,51 +78,123 @@ print_version(mdsetname_t *sp)
static int
set_disk_overlap(md_im_set_desc_t *misp)
{
-
- md_im_set_desc_t *next, *isp = misp;
- md_im_drive_info_t *set_dr, *next_set_dr, **chain;
- int is_overlap = 0;
-
+ md_im_set_desc_t *next, *isp = misp;
+ md_im_drive_info_t *set_dr, *next_set_dr, **chain;
+ int is_overlap = 0;
+ md_im_drive_info_t *good_disk = NULL;
+ md_im_drive_info_t *d;
+ md_timeval32_t gooddisktime;
+ int disk_not_available = 0;
+ /*
+ * There are 2 ways we could get an "overlap" disk.
+ * One is if the ctd's are the same. The other is if
+ * the setcreatetimestamp on the disk doesn't agree with the
+ * "good" disk in the set. However, if we have a disk that is
+ * unavailable and the other instance of the ctd is available we
+ * really don't have a conflict. It's just that the unavailable ctd
+ * is it's "old" location and the available instance is a current
+ * location.
+ */
for (; isp != NULL; isp = isp->mis_next) {
for (next = isp->mis_next; next != NULL; next = next->mis_next) {
-
for (set_dr = isp->mis_drives; set_dr != NULL;
- set_dr = set_dr->mid_next) {
-
- for (next_set_dr = next->mis_drives;
- next_set_dr != NULL;
- next_set_dr = next_set_dr->mid_next) {
- if (strcmp(set_dr->mid_dnp->cname,
- next_set_dr->mid_dnp->cname) == 0) {
+ set_dr = set_dr->mid_next) {
+ if (set_dr->mid_available == MD_IM_DISK_NOT_AVAILABLE)
+ disk_not_available = 1;
+ else
+ disk_not_available = 0;
+ for (next_set_dr = next->mis_drives; next_set_dr != NULL;
+ next_set_dr = next_set_dr->mid_next) {
+ if (disk_not_available &&
+ (next_set_dr->mid_available
+ == MD_IM_DISK_AVAILABLE))
+ continue;
+ else if (!disk_not_available &&
+ (next_set_dr->mid_available ==
+ MD_IM_DISK_NOT_AVAILABLE))
+ continue;
+ if (strcmp(set_dr->mid_dnp->cname,
+ next_set_dr->mid_dnp->cname) == 0) {
/*
- * Chain it, skip if already there
+ * Chain it, skip if
+ * already there
*/
if (overlap_disks == NULL) {
set_dr->overlap = NULL;
+ set_dr->overlapped_disk = 1;
+ next_set_dr->overlapped_disk = 1;
overlap_disks = set_dr;
} else {
for (chain = &overlap_disks;
*chain != NULL;
chain = &(*chain)->overlap) {
if (strcmp(set_dr->mid_dnp->cname,
- (*chain)->mid_dnp->cname)
- == 0)
+ (*chain)->mid_dnp->cname) == 0)
break;
}
if (*chain == NULL) {
*chain = set_dr;
set_dr->overlap = NULL;
+ set_dr->overlapped_disk = 1;
+ next_set_dr->overlapped_disk = 1;
}
}
if (!is_overlap)
is_overlap = 1;
- }
}
+ }
}
}
}
+ for (isp = misp; isp != NULL; isp = isp->mis_next) {
+ good_disk = pick_good_disk(isp);
+ if (good_disk == NULL) {
+ /* didn't find a good disk */
+ continue;
+ }
+ gooddisktime = good_disk->mid_setcreatetimestamp;
+ for (d = isp->mis_drives; d != NULL; d = d->mid_next) {
+ if (d->mid_available == MD_IM_DISK_NOT_AVAILABLE)
+ continue;
+ /*
+ * If the disk doesn't have the same set creation
+ * time as the designated "good disk" we have a
+ * time conflict/overlap situation. Mark the disk
+ * as such.
+ */
+ if ((gooddisktime.tv_usec !=
+ d->mid_setcreatetimestamp.tv_usec) ||
+ (gooddisktime.tv_sec !=
+ d->mid_setcreatetimestamp.tv_sec)) {
+ d->overlapped_disk = 1;
+ if (overlap_disks == NULL) {
+ d->overlap = NULL;
+ d->overlapped_disk = 1;
+ overlap_disks = d;
+ } else {
+ for (chain = &overlap_disks;
+ *chain != NULL;
+ chain = &(*chain)->overlap) {
+ if (strcmp(d->mid_dnp->cname,
+ (*chain)->mid_dnp->cname)
+ == 0) {
+ break;
+ }
+ }
+
+ if (*chain == NULL) {
+ *chain = d;
+ d->overlap = NULL;
+ d->overlapped_disk = 1;
+ }
+ }
+ if (!is_overlap)
+ is_overlap = 1;
+ }
+ }
+ }
return (is_overlap);
}
@@ -155,6 +226,19 @@ report_overlap_recommendation()
uint_t sliceno;
int fd = -1;
+ /*
+ * If the disk isn't available (i.e. powered off or dead)
+ * we can't read the master block timestamp and thus
+ * cannot make a recommendation as to which set it belongs to.
+ */
+ if (d->mid_available != MD_IM_DISK_AVAILABLE) {
+ (void) fprintf(stdout, " %s ", d->mid_dnp->cname);
+ (void) fprintf(stdout,
+ gettext(" - no recommendation can "
+ "be made because disk is unavailable\n"));
+ continue;
+ }
+
if (meta_replicaslice(d->mid_dnp, &sliceno, ep) != 0)
continue;
@@ -173,13 +257,174 @@ report_overlap_recommendation()
(void) close(fd);
fprintf(stdout, " %s ", d->mid_dnp->cname);
(void) fprintf(stdout, "%s: %s\n",
- gettext(" - recommend importing with set "
+ gettext(" - must import with set "
"created at "), meta_print_time((md_timeval32_t *)
(&(mbp->mb_setcreatetime))));
}
Free(mbp);
}
+/*
+ * is_first_disk is called to determine if the disk passed to it is
+ * eligible to be used as the "first disk time" in the set. It checks to
+ * see if the disk is available, on the skip list or not (thus already in
+ * an importable set) or being used by the system already.
+ * RETURN:
+ * 1 The time can be used as the first disk time
+ * 0 The time should not be used.
+ */
+static int
+is_first_disk(
+md_im_drive_info_t *d,
+mddrivenamelist_t **skiph)
+{
+ mddrivenamelist_t *slp;
+ md_error_t status = mdnullerror;
+ md_error_t *ep = &status;
+ mdsetname_t *sp = metasetname(MD_LOCAL_NAME, ep);
+
+ /*
+ * If a disk is not available there is no
+ * set creation timestamp available.
+ */
+ if (d->mid_available == MD_IM_DISK_AVAILABLE) {
+ /*
+ * We also need to make sure this disk isn't already on
+ * the skip list.
+ */
+ for (slp = *skiph; slp != NULL; slp = slp->next) {
+ if (d->mid_dnp == slp->drivenamep)
+ return (0);
+ }
+ /*
+ * And we need to make sure the drive isn't
+ * currently being used for something else
+ * like a mounted file system or a current
+ * metadevice or in a set.
+ */
+ if (meta_imp_drvused(sp, d->mid_dnp, ep)) {
+ return (0);
+ }
+ } else {
+ return (0);
+ }
+ return (1);
+}
+
+/*
+ * Input a list of disks (dnlp), find the sets that are importable, create
+ * a list of these sets (mispp), and a list of the disks within each of these
+ * sets (midp). These lists (mispp and midp) will be used by metaimport.
+ */
+static int process_disks(
+ mddrivenamelist_t *dnlp,
+ mddrivenamelist_t **skipt,
+ md_im_set_desc_t **mispp,
+ int flags,
+ int *set_count,
+ int overlap,
+ md_error_t *ep
+)
+{
+ mddrivenamelist_t *dp;
+ int rscount = 0;
+ int hasreplica;
+ md_im_set_desc_t *p;
+ md_im_drive_info_t *d;
+ mddrivenamelist_t **skiph = skipt;
+
+ /* Scan qualified disks */
+ for (dp = dnlp; dp != NULL; dp = dp->next) {
+ mddrivenamelist_t *slp;
+
+ /* is the current drive on the skip list? */
+ for (slp = *skiph; slp != NULL; slp = slp->next) {
+ if (dp->drivenamep == slp->drivenamep)
+ break;
+ }
+ /* drive on the skip list ? */
+ if (slp != NULL)
+ continue;
+
+ /*
+ * In addition to updating the misp list, either verbose or
+ * standard output will be generated.
+ *
+ */
+ hasreplica = meta_get_and_report_set_info(dp, mispp, 0,
+ flags, set_count, overlap, overlap_disks, ep);
+
+ if (hasreplica < 0) {
+ mde_perror(ep, "");
+ mdclrerror(ep);
+ } else {
+
+ rscount += hasreplica;
+
+ /* Eliminate duplicate reporting */
+ if (hasreplica > 0) {
+ md_timeval32_t firstdisktime;
+
+ /*
+ * Go to the tail for the current set
+ */
+ for (p = *mispp; p->mis_next != NULL;
+ p = p->mis_next);
+
+ /*
+ * Now look for the set creation timestamp.
+ * If a disk is not available there is no
+ * set creation timestamp available so look
+ * for the first available disk to grab this
+ * information from. We also need to make
+ * sure this disk isn't already on the skip
+ * list. If so go to the next available drive.
+ * And we need to make sure the drive isn't
+ * currently being used for something else
+ * like a mounted file system or a current
+ * metadevice or in a set.
+ */
+ for (d = p->mis_drives; d != NULL;
+ d = d->mid_next) {
+ if (is_first_disk(d, skiph)) {
+ firstdisktime =
+ d->mid_setcreatetimestamp;
+ break;
+ }
+ }
+ for (d = p->mis_drives; d != NULL;
+ d = d->mid_next) {
+ /*
+ * if the mb_setcreatetime for a disk
+ * is not the same as the first disk
+ * in the set, don't put it on the
+ * skip list. This disk probably
+ * doesn't really belong in this set
+ * and we'll want to look at it again
+ * to figure out where it does belong.
+ * If the disk isn't available, there's
+ * really no point in looking at it
+ * again so put it on the skip list.
+ */
+ if (d->mid_available ==
+ MD_IM_DISK_AVAILABLE) {
+ if ((d->mid_setcreatetimestamp.
+ tv_sec != firstdisktime.
+ tv_sec) ||
+ (d->mid_setcreatetimestamp.
+ tv_usec !=
+ firstdisktime.tv_usec))
+ continue;
+ }
+ skipt =
+ meta_drivenamelist_append_wrapper(
+ skipt, d->mid_dnp);
+ }
+ }
+ }
+ }
+ return (rscount);
+}
int
main(int argc, char *argv[])
@@ -197,18 +442,18 @@ main(int argc, char *argv[])
mddrivenamelist_t *dnlp = NULL;
mddrivenamelist_t *dp;
mddrivenamelist_t *skiph = NULL;
- mddrivenamelist_t **skipt = &skiph;
int rscount = 0;
- int hasreplica;
+ md_im_set_desc_t *pass1_misp = NULL;
md_im_set_desc_t *misp = NULL;
+ md_im_set_desc_t **pass1_mispp = &pass1_misp;
md_im_set_desc_t **mispp = &misp;
mhd_mhiargs_t mhiargs = defmhiargs;
int have_multiple_sets = 0;
int force = 0;
int overlap = 0;
- int partial = 0;
uint_t imp_flags = 0;
int set_count = 0;
+ int no_quorum = 0;
/*
* Get the locale set up before calling any other routines
@@ -374,7 +619,8 @@ main(int argc, char *argv[])
char *dlist;
int sizecnt = 0;
- sizecnt += strlen(ip->drive);
+ /* add 1 for null terminator */
+ sizecnt += strlen(ip->drive) + 1;
for (dp = dnlp->next; dp != NULL; dp = dp->next) {
sizecnt += 2; /* for the ", " */
sizecnt += strlen(dp->drivenamep->cname);
@@ -383,15 +629,14 @@ main(int argc, char *argv[])
dlist = Malloc(sizecnt);
strlcpy(dlist, ip->drive, sizecnt);
- Free(ip->drive);
- dlist += strlen(ip->drive);
+ Free(ip->drive);
for (dp = dnlp->next; dp != NULL; dp = dp->next) {
strlcat(dlist, ", ", sizecnt);
strlcat(dlist, dp->drivenamep->cname, sizecnt);
}
- ip->drive = Strdup(dlist);
+ ip->drive = dlist;
}
/* Don't continue if we're already hosed */
@@ -406,96 +651,26 @@ main(int argc, char *argv[])
md_exit(sp, 0);
}
- /* Scan qualified disks */
- for (dp = dnlp; dp != NULL; dp = dp->next) {
- mddrivenamelist_t *slp;
-
- /* is the current drive on the skip list? */
- for (slp = skiph; slp != NULL; slp = slp->next) {
- if (dp->drivenamep == slp->drivenamep)
- goto skipdisk;
- }
-
- /*
- * In addition to updating the misp list, either verbose or
- * standard output will be generated.
- *
- */
- hasreplica = meta_get_and_report_set_info(dp, mispp, 0,
- imp_flags, &set_count, ep);
-
- /*
- * If current disk is part of a partial diskset,
- * meta_get_set_info returns an ENOTSUP for this disk.
- * Import of partial disksets isn't supported yet,
- * so do NOT put this disk onto any list being set up
- * by metaimport. The partial diskset error message will
- * only be printed once when the first partial diskset is
- * detected. If the user is actually trying to import the
- * partial diskset, print the error and exit; otherwise,
- * print the error and continue.
- */
- if (hasreplica == ENOTSUP) {
- if (report_only) {
- if (!partial) {
- mde_perror(ep, "");
- partial = 1;
- }
- mdclrerror(ep);
- goto skipdisk;
- } else {
- mde_perror(ep, "");
- md_exit(sp, 1);
- }
- }
-
- if (hasreplica < 0) {
- mde_perror(ep, "");
- mdclrerror(ep);
- } else {
- md_im_set_desc_t *p;
- md_im_drive_info_t *d;
-
- rscount += hasreplica;
+ /*
+ * META_IMP_PASS1 means gather the info, but don't report.
+ */
+ (void) process_disks(dnlp, &skiph, pass1_mispp,
+ imp_flags | META_IMP_PASS1, &set_count, overlap, ep);
- /* Eliminate duplicate reporting */
- if (hasreplica > 0) {
- md_timeval32_t firstdisktime;
+ overlap_disks = NULL;
+ overlap = set_disk_overlap(pass1_misp);
+ skiph = NULL;
- /*
- * Go to the tail for the current set
- */
- for (p = misp; p->mis_next != NULL;
- p = p->mis_next);
- firstdisktime =
- p->mis_drives->mid_setcreatetimestamp;
- for (d = p->mis_drives;
- d != NULL;
- d = d->mid_next) {
- /*
- * if the mb_setcreatetime for a disk
- * is not the same as the first disk
- * in the set, don't put it on the
- * skip list. This disk probably
- * doesn't really belong in this set
- * and we'll want to look at it again
- * to figure out where it does belong.
- */
- if ((d->mid_setcreatetimestamp.tv_sec !=
- firstdisktime.tv_sec) ||
- (d->mid_setcreatetimestamp.tv_usec
- != firstdisktime.tv_usec))
- continue;
- skipt =
- meta_drivenamelist_append_wrapper(
- skipt, d->mid_dnp);
- }
- }
- }
-
-skipdisk:
- ;
- }
+ /*
+ * This time call without META_IMP_PASS1 set and we gather
+ * and report the information.
+ * We need to do this twice because of the overlap detection.
+ * The first pass generates a list of disks to detect overlap on.
+ * We then do a second pass using that overlap list to generate
+ * the report.
+ */
+ rscount = process_disks(dnlp, &skiph, mispp, imp_flags, &set_count,
+ overlap, ep);
/*
* Now have entire list of disks associated with diskset including
@@ -508,15 +683,52 @@ skipdisk:
md_im_drive_info_t *d;
mddrivename_t *dnp;
+ if (sp == NULL) {
+ /* Get sp for local set */
+ if ((sp = metasetname(MD_LOCAL_NAME, ep)) == NULL) {
+ mde_perror(ep, "");
+ meta_free_im_set_desc(misp);
+ md_exit(sp, 1);
+ }
+ }
+
for (p = misp; p != NULL; p = p->mis_next) {
for (d = p->mis_drives; d != NULL; d = d->mid_next) {
dnp = d->mid_dnp;
- if (meta_imp_drvused(sp, dnp, ep)) {
- (void) mddserror(ep,
- MDE_DS_DRIVEINUSE, 0, NULL,
- dnp->cname, NULL);
- mde_perror(ep, "");
- md_exit(sp, 0);
+ if (d->mid_available == MD_IM_DISK_AVAILABLE) {
+ if (meta_imp_drvused(sp, dnp, ep)) {
+ (void) mddserror(ep,
+ MDE_DS_DRIVEINUSE, 0, NULL,
+ dnp->cname, NULL);
+ mde_perror(ep, "");
+ meta_free_im_set_desc(misp);
+ md_exit(sp, 1);
+ }
+ } else {
+ /*
+ * If drive is unavailable, then check
+ * that this drive hasn't already been
+ * imported as part of another partial
+ * diskset. Check by devid instead of
+ * cname since the unavailable drive
+ * would have the cname from its
+ * previous system and this may collide
+ * with a valid cname on this system.
+ * Fail if devid is found in another
+ * set or if the routine fails.
+ */
+ mdsetname_t *tmp_sp = NULL;
+
+ if ((meta_is_devid_in_anyset(
+ d->mid_devid, &tmp_sp, ep) == -1) ||
+ (tmp_sp != NULL)) {
+ (void) mddserror(ep,
+ MDE_DS_DRIVEINUSE, 0, NULL,
+ dnp->cname, NULL);
+ mde_perror(ep, "");
+ meta_free_im_set_desc(misp);
+ md_exit(sp, 1);
+ }
}
}
}
@@ -531,9 +743,11 @@ skipdisk:
* If we've found partial disksets but no complete disksets,
* we don't want this to print.
*/
- if (!partial) {
+ if (!misp) {
md_eprintf("%s\n", gettext("no unconfigured sets "
"detected"));
+ meta_free_im_set_desc(misp);
+ md_exit(sp, 1);
}
md_exit(sp, 0);
}
@@ -566,17 +780,15 @@ skipdisk:
gettext("Number of disksets eligible for import"),
set_count);
}
+ }
+ if (overlap) {
+ report_overlap_recommendation();
+ }
- overlap = set_disk_overlap(misp);
- if (overlap) {
- report_overlap_recommendation();
- }
-
- if (!report_only) {
- md_eprintf("%s\n\n", gettext("multiple unconfigured "
- "sets detected.\nRerun the command with the "
- "suggested options for the desired set."));
- }
+ if (have_multiple_sets && !report_only) {
+ md_eprintf("%s\n\n", gettext("multiple unconfigured "
+ "sets detected.\nRerun the command with the "
+ "suggested options for the desired set."));
}
@@ -586,8 +798,29 @@ skipdisk:
*/
if (report_only) {
+ meta_free_im_set_desc(misp);
md_exit(sp, 0);
} else if (have_multiple_sets) {
+ meta_free_im_set_desc(misp);
+ md_exit(sp, 1);
+ } else if (overlap) {
+ md_im_drive_info_t *d;
+ /*
+ * The only way we can get here is if we're doing an import
+ * request on a set that contains at least one disk with
+ * a time conflict. We are prohibiting the importation of
+ * this type of set until the offending disk(s) are turned
+ * off to prevent data corruption.
+ */
+ printf(gettext("To import this set, "));
+ for (d = pass1_misp->mis_drives;
+ d != NULL;
+ d = d->mid_next) {
+ if (d->overlapped_disk)
+ printf("%s ", d->mid_dnp->cname);
+ }
+ printf(gettext("must be removed from the system\n"));
+ meta_free_im_set_desc(misp);
md_exit(sp, 1);
}
@@ -595,32 +828,50 @@ skipdisk:
usage(sp, gettext("You must specify a new set name."));
}
+ /*
+ * The user must specify the -f (force) flag if the following
+ * conditions exist:
+ * - partial diskset
+ * - stale diskset
+ */
+ if (meta_replica_quorum(misp) != 0)
+ no_quorum = 1;
+ if (misp->mis_partial || no_quorum) {
+ if (!force)
+ usage(sp, gettext("You must specify the force flag"));
+ }
(void) meta_imp_set(misp, setname_new, force, dry_run, ep);
-
if (dry_run) {
+ meta_free_im_set_desc(misp);
md_exit(sp, 0);
}
if (!mdisok(ep)) {
+ meta_free_im_set_desc(misp);
mde_perror(ep, "");
md_exit(sp, 1);
}
if ((sp = metasetname(setname_new, ep)) == NULL) {
+ meta_free_im_set_desc(misp);
mde_perror(ep, "");
md_exit(sp, 1);
}
if (meta_lock_nowait(sp, ep) != 0) {
+ meta_free_im_set_desc(misp);
mde_perror(ep, "");
md_exit(sp, 10); /* special errcode */
}
- if (meta_set_take(sp, &mhiargs, 0, 0, &status)) {
+ if (meta_set_take(sp, &mhiargs, (misp->mis_partial | TAKE_IMP),
+ 0, &status)) {
+ meta_free_im_set_desc(misp);
mde_perror(&status, "");
md_exit(sp, 1);
}
+ meta_free_im_set_desc(misp);
md_exit(sp, 0);
/*NOTREACHED*/
return (0);
diff --git a/usr/src/cmd/lvm/util/metaset.c b/usr/src/cmd/lvm/util/metaset.c
index 59c803d2f3..953554e3c5 100644
--- a/usr/src/cmd/lvm/util/metaset.c
+++ b/usr/src/cmd/lvm/util/metaset.c
@@ -1510,6 +1510,7 @@ parse_takeset(int argc, char **argv)
sdssc_boolean_e cluster_take = SDSSC_False;
sdssc_version_t vers;
rval_e rval;
+ int set_take_rval;
/* reset and parse args */
optind = 1;
@@ -1646,7 +1647,30 @@ parse_takeset(int argc, char **argv)
md_exit(sp, 10); /* special errcode */
}
- if (meta_set_take(sp, &mhiargs, flags, usetag, &status)) {
+ /*
+ * If a 2 is returned from meta_set_take, this take was able to resolve
+ * an unresolved replicated disk (i.e. a disk is now available that
+ * had been missing during the import of the replicated diskset).
+ * Need to release the diskset and re-take in order to have
+ * the subdrivers re-snarf using the newly resolved (or newly mapped)
+ * devids. This also allows the namespace to be updated with the
+ * correct major names in the case where the disk being replicated
+ * was handled by a different driver than the replicated disk.
+ */
+ set_take_rval = meta_set_take(sp, &mhiargs, flags, usetag, &status);
+ if (set_take_rval == 2) {
+ if (meta_set_release(sp, &status)) {
+ mde_perror(&status,
+ "Need to release and take set to resolve names.");
+ md_exit(sp, 1);
+ }
+ metaflushdrivenames();
+ metaflushsetname(sp);
+ set_take_rval = meta_set_take(sp, &mhiargs,
+ (flags | TAKE_RETAKE), usetag, &status);
+ }
+
+ if (set_take_rval == -1) {
mde_perror(&status, "");
if (mdismddberror(&status, MDE_DB_TAGDATA))
md_exit(sp, 2);
diff --git a/usr/src/head/meta.h b/usr/src/head/meta.h
index ada550ed07..dc4cd38691 100644
--- a/usr/src/head/meta.h
+++ b/usr/src/head/meta.h
@@ -404,6 +404,8 @@ typedef struct md_mn_msg_tbl_entry {
#define TAKE_FORCE 0x0001
#define TAKE_USETAG 0x0002
#define TAKE_USEIT 0x0004
+#define TAKE_IMP 0x0008
+#define TAKE_RETAKE 0x0010
/*
* ignore gettext for lint so we check printf args
@@ -595,6 +597,62 @@ typedef struct md_evlist {
/* end of meta event definitions ("meta_notify.h") */
+typedef struct md_im_names {
+ int min_count;
+ char **min_names;
+} md_im_names_t;
+
+/* Values for replica info status */
+#define MD_IM_REPLICA_SCANNED (0x01)
+#define MD_IM_REPLICA_VALID (0x02)
+
+typedef struct md_im_replica_info {
+ struct md_im_replica_info *mir_next;
+ int mir_status;
+ int mir_flags;
+ daddr32_t mir_offset;
+ daddr32_t mir_length;
+ md_timeval32_t mir_timestamp;
+} md_im_replica_info_t;
+
+typedef struct md_im_drive_info {
+ struct md_im_drive_info *mid_next; /* next drive in this set */
+ mddrivename_t *mid_dnp;
+ void *mid_devid;
+ void *mid_o_devid;
+ int mid_devid_sz;
+ int mid_o_devid_sz;
+ char mid_minor_name[MDDB_MINOR_NAME_MAX];
+ minor_t mid_mnum;
+ int mid_available;
+ md_timeval32_t mid_setcreatetimestamp;
+ char *mid_driver_name;
+ char *mid_devname;
+ md_im_replica_info_t *mid_replicas;
+ int overlapped_disk;
+ struct md_im_drive_info *overlap; /* chain of overlap disks */
+} md_im_drive_info_t;
+
+/* Values for mid_available */
+#define MD_IM_DISK_AVAILABLE 0x00
+#define MD_IM_DISK_NOT_AVAILABLE 0x01
+
+/* Values for set descriptor flags */
+#define MD_IM_SET_INVALID 0x10
+#define MD_IM_SET_REPLICATED 0x20
+
+/* Values for mis_partial */
+#define MD_IM_COMPLETE_DISKSET 0x04
+#define MD_IM_PARTIAL_DISKSET 0x08
+
+typedef struct md_im_set_desc {
+ struct md_im_set_desc *mis_next;
+ int mis_flags;
+ int mis_oldsetno;
+ md_im_drive_info_t *mis_drives;
+ int mis_active_replicas;
+ int mis_partial;
+} md_im_set_desc_t;
/* meta_admin.c */
extern int open_admin(md_error_t *ep);
@@ -1120,12 +1178,15 @@ extern mdsetname_t *metasetnosetname(set_t setno, md_error_t *ep);
extern mdsetname_t *metafakesetname(set_t setno, char *sname);
extern md_set_desc *metaget_setdesc(mdsetname_t *sp, md_error_t *ep);
extern void metaflushsetname(mdsetname_t *sp);
+extern void metaflushdrivenames(void);
extern int metaislocalset(mdsetname_t *sp);
extern int metaissameset(mdsetname_t *sp1, mdsetname_t *sp2);
extern void metaflushsidenames(mddrivename_t *dnp);
extern char *metadiskname(char *name);
extern mddrivename_t *metadrivename(mdsetname_t **spp, char *uname,
md_error_t *ep);
+extern mddrivename_t *metadrivenamebydevid(mdsetname_t **spp, char *devid,
+ char *uname, md_error_t *ep);
extern mdname_t *metaslicename(mddrivename_t *dnp, uint_t sliceno,
md_error_t *ep);
extern void metafreedrivename(mddrivename_t *dnp);
@@ -1181,6 +1242,9 @@ extern int meta_get_hotspare_names(mdsetname_t *sp,
mdnamelist_t **nlpp, int options, md_error_t *ep);
extern void meta_create_non_dup_list(mdname_t *mdnp,
mddevid_t **ldevidpp);
+extern mddrivename_t *meta_getdnp_bydevid(mdsetname_t *sp, side_t sideno,
+ ddi_devid_t devidp, mdkey_t key, md_error_t *ep);
+
/* meta_nameinfo.c */
extern mdsetname_t *metagetset(mdname_t *np, int bypass_daemon,
@@ -1233,7 +1297,7 @@ extern int meta_setdid(set_t setno, side_t sideno, mdkey_t key,
md_error_t *ep);
extern int add_name(mdsetname_t *sp, side_t sideno, mdkey_t key,
char *dname, minor_t mnum, char *bname,
- md_error_t *ep);
+ char *minorname, ddi_devid_t devid, md_error_t *ep);
extern int del_name(mdsetname_t *sp, side_t sideno, mdkey_t key,
md_error_t *ep);
extern int add_key_name(mdsetname_t *sp, mdname_t *np,
@@ -1391,6 +1455,10 @@ extern int meta_is_drive_in_anyset(mddrivename_t *dnp,
extern int meta_is_drive_in_thisset(mdsetname_t *sp,
mddrivename_t *dnp, int bypass_daemon,
md_error_t *ep);
+extern int meta_is_devid_in_anyset(void *devid,
+ mdsetname_t **spp, md_error_t *ep);
+extern int meta_is_devid_in_thisset(mdsetname_t *sp,
+ void *devid, md_error_t *ep);
extern int meta_set_balance(mdsetname_t *sp, md_error_t *ep);
extern int meta_set_destroy(mdsetname_t *sp, int lock_set,
md_error_t *ep);
@@ -1428,7 +1496,8 @@ extern int meta_devid_use(md_error_t *ep);
/* meta_set_drv.c */
extern int meta_make_sidenmlist(mdsetname_t *,
- mddrivename_t *, md_error_t *);
+ mddrivename_t *, int imp_flag,
+ md_im_drive_info_t *midp, md_error_t *);
extern int meta_set_adddrives(mdsetname_t *sp,
mddrivenamelist_t *dnlp, daddr_t dbsize,
int force_label, md_error_t *ep);
@@ -1763,49 +1832,18 @@ extern int read_database_block(md_error_t *, int, mddb_mb_t *, int,
void *, int);
extern daddr_t getphysblk(mddb_block_t, mddb_mb_t *);
-typedef struct md_im_names {
- int min_count;
- char **min_names;
-} md_im_names_t;
-
-/* Values for replica info status */
-#define MD_IM_REPLICA_SCANNED (0x01)
-#define MD_IM_REPLICA_VALID (0x02)
+extern md_im_drive_info_t *pick_good_disk(md_im_set_desc_t *misp);
-typedef struct md_im_replica_info {
- struct md_im_replica_info *mir_next;
- int mir_status;
- int mir_flags;
- daddr32_t mir_offset;
- daddr32_t mir_length;
- md_timeval32_t mir_timestamp;
-} md_im_replica_info_t;
-
-typedef struct md_im_drive_info {
- struct md_im_drive_info *mid_next; /* next drive in this set */
- mddrivename_t *mid_dnp;
- void *mid_devid;
- void *mid_o_devid;
- int mid_devid_sz;
- int mid_o_devid_sz;
- char mid_minor_name[MDDB_MINOR_NAME_MAX];
- md_timeval32_t mid_setcreatetimestamp;
- char *mid_devname;
- md_im_replica_info_t *mid_replicas;
- struct md_im_drive_info *overlap; /* chain of overlap disks */
-} md_im_drive_info_t;
-
-/* Values for set descriptor flags */
-#define MD_IM_SET_INVALID 0x01
-#define MD_IM_SET_REPLICATED 0x02
-
-typedef struct md_im_set_desc {
- struct md_im_set_desc *mis_next;
- int mis_flags;
- int mis_oldsetno;
- md_im_drive_info_t *mis_drives;
- int mis_active_replicas;
-} md_im_set_desc_t;
+extern void meta_unrslv_replicated_mb(mdsetname_t *sp,
+ md_drive_desc *dd, mddrivenamelist_t *dnlp,
+ md_error_t *ep);
+extern void meta_unrslv_replicated_nm(mdsetname_t *sp,
+ md_drive_desc *dd, mddrivenamelist_t *dnlp,
+ md_error_t *ep);
+extern void * replicated_list_lookup(uint_t devid_len,
+ void *old_devid);
+extern int build_replicated_disks_list(md_error_t *ep,
+ mddrivenamelist_t *dnlp);
/*
* pnm_rec is used to store the mapping from keys in the NM namespace
@@ -1831,18 +1869,29 @@ typedef struct pnm_rec {
/* Flags for metaimport reporting */
#define META_IMP_REPORT 0x0001
#define META_IMP_VERBOSE 0x0002
+#define META_IMP_PASS1 0x1000
extern int meta_list_disks(md_error_t *, md_im_names_t *);
extern mddrivenamelist_t *meta_prune_cnames(md_error_t *,
md_im_names_t *, int);
extern int meta_get_and_report_set_info(
mddrivenamelist_t *, md_im_set_desc_t **,
- int, uint_t, int *, md_error_t *);
+ int, uint_t, int *, int,
+ md_im_drive_info_t *, md_error_t *);
extern void free_pnm_rec_list(pnm_rec_t **);
extern int meta_imp_set(md_im_set_desc_t *,
char *, int, bool_t, md_error_t *);
extern int meta_imp_drvused(mdsetname_t *sp,
mddrivename_t *dnp, md_error_t *ep);
+extern int meta_replica_quorum(md_im_set_desc_t *misp);
+extern int meta_imp_set_adddrives(mdsetname_t *sp,
+ mddrivenamelist_t *dnlp,
+ md_im_set_desc_t *misp, md_error_t *ep);
+extern void meta_free_im_set_desc(md_im_set_desc_t *misp);
+extern int clnt_imp_adddrvs(char *hostname,
+ mdsetname_t *sp, md_drive_desc *dd,
+ md_timeval32_t timestamp,
+ ulong_t genid, md_error_t *ep);
/* Flags for direction in copy_msg_1 */
#define MD_MN_COPY_TO_ONDISK 0x0001
@@ -1866,9 +1915,6 @@ extern int meta_write_nodelist(int nodecnt, char **nids,
md_error_t *ep);
extern void meta_free_nodelist(mndiskset_membershiplist_t *nl);
-/* Values for set descriptor flags */
-#define MD_IM_SET_INVALID 0x01
-
/* meta_mn_subr.c */
/* defines for flags argument for meta_mn_send_command() */
#define MD_DISP_STDERR 0x0000
diff --git a/usr/src/head/metad.x b/usr/src/head/metad.x
index 6754f4b121..6aa42b637b 100644
--- a/usr/src/head/metad.x
+++ b/usr/src/head/metad.x
@@ -1,5 +1,5 @@
%/*
-% * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+% * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
% * Use is subject to license terms.
% *
% * CDDL HEADER START
@@ -965,6 +965,9 @@ program METAD {
mdrpc_generic_res
mdrpc_mn_sp_update_abr(mdrpc_setno_2_args) = 43;
+ mdrpc_generic_res
+ mdrpc_imp_adddrvs(mdrpc_drives_2_args) = 44;
+
} = 2;
} = 100229;
diff --git a/usr/src/lib/lvm/libmeta/common/meta_devadm.c b/usr/src/lib/lvm/libmeta/common/meta_devadm.c
index 975c87e4f3..8668c6eb6e 100644
--- a/usr/src/lib/lvm/libmeta/common/meta_devadm.c
+++ b/usr/src/lib/lvm/libmeta/common/meta_devadm.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -626,6 +625,8 @@ pathname_reload(
/* metadevices do not have devid's in them */
mda_debug("pathname_reload: no devid for %s\n",
(char *)(uintptr_t)nm.devname);
+ /* Clear error if no devid and go to next nm entry */
+ mdclrerror(ep);
continue;
}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_error.c b/usr/src/lib/lvm/libmeta/common/meta_error.c
index f98aed73a9..a5f0f45cbe 100644
--- a/usr/src/lib/lvm/libmeta/common/meta_error.c
+++ b/usr/src/lib/lvm/libmeta/common/meta_error.c
@@ -1964,13 +1964,6 @@ ds_to_str(
(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
"multiple namespace records detected"));
break;
- case MDE_DS_PARTIALSET:
- (void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
- "partial diskset detected\n"
- "Please refer to the Solaris Volume Manager documentation,"
- "\nTroubleshooting section, at http://docs.sun.com or from"
- "\nyour local copy"));
- break;
case MDE_DS_COMMDCTL_SUSPEND_NYD:
(void) snprintf(p, psize, dgettext(TEXT_DOMAIN,
"rpc.mdcommd on host %s is not yet drained during "
diff --git a/usr/src/lib/lvm/libmeta/common/meta_hotspares.c b/usr/src/lib/lvm/libmeta/common/meta_hotspares.c
index 15f310d0b0..5ba8c84866 100644
--- a/usr/src/lib/lvm/libmeta/common/meta_hotspares.c
+++ b/usr/src/lib/lvm/libmeta/common/meta_hotspares.c
@@ -926,7 +926,7 @@ add_hsp_name_mn_sides(
if (nd->nd_nodeid == curside)
continue;
if (add_name(sp, nd->nd_nodeid, key, MD_HOTSPARES,
- minor(NODEV), hsp_name, ep) == -1) {
+ minor(NODEV), hsp_name, NULL, NULL, ep) == -1) {
return (-1);
}
}
@@ -964,7 +964,7 @@ add_hsp_name_trad_sides(
continue;
if (sd->sd_nodes[i][0] != '\0') {
if (add_name(sp, i, key, MD_HOTSPARES, minor(NODEV),
- hsp_name, ep) == -1) {
+ hsp_name, NULL, NULL, ep) == -1) {
return (-1);
}
}
@@ -1014,7 +1014,7 @@ add_hsp_name(
/* First add the record for the side of the current node. */
key = add_name(sp, thisside, MD_KEYWILD, MD_HOTSPARES, minor(NODEV),
- hsp_name, ep);
+ hsp_name, NULL, NULL, ep);
if (key == -1) {
goto cleanup;
}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_import.c b/usr/src/lib/lvm/libmeta/common/meta_import.c
index 650df6fcff..cb3a6aaaf9 100644
--- a/usr/src/lib/lvm/libmeta/common/meta_import.c
+++ b/usr/src/lib/lvm/libmeta/common/meta_import.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -48,6 +47,8 @@ typedef struct did_list {
dev_t dev;
uint_t did_index;
char *minor_name;
+ char *driver_name;
+ int available;
struct did_list *next;
} did_list_t;
@@ -76,7 +77,11 @@ static replicated_disk_t *replicated_disk_list[MAX_DEVID_LEN + 1] = {NULL};
* The list of replicated disks is built just once and this flag is set
* once it's done
*/
-static int replicated_disk_list_built = 0;
+int replicated_disk_list_built_pass1 = 0;
+int replicated_disk_list_built_pass2 = 0;
+int *replicated_disk_list_built;
+
+static void free_did_list(did_list_t *did_listp);
/*
* Map logical blk to physical
@@ -120,17 +125,15 @@ static md_im_drive_info_t *
drive_append(
md_im_drive_info_t **midpp,
mddrivename_t *dnp,
- void *devid,
- void *rdevid,
- void *devname,
- int devid_sz,
- char *minor_name,
+ did_list_t *nonrep_did_listp,
+ minor_t mnum,
md_timeval32_t timestamp,
md_im_replica_info_t *mirp
)
{
md_im_drive_info_t *midp;
int o_devid_sz;
+ int devid_sz;
for (; (*midpp != NULL); midpp = &((*midpp)->mid_next))
;
@@ -140,37 +143,47 @@ drive_append(
midp->mid_dnp = dnp;
/*
- * If rdevid is not NULL then we know we are dealing with
+ * If rdid is not NULL then we know we are dealing with
* replicated diskset case. 'devid_sz' will always be the
- * size of a valid devid which can be 'devid' or 'rdevid'
+ * size of a valid devid which can be 'did' or 'rdid'
*/
- midp->mid_devid = (void *)Malloc(devid_sz);
- if (rdevid) {
- (void) memcpy(midp->mid_devid, rdevid, devid_sz);
+ if (nonrep_did_listp->rdid) {
+ devid_sz = devid_sizeof(nonrep_did_listp->rdid);
+ midp->mid_devid = (void *)Malloc(devid_sz);
+ (void) memcpy(midp->mid_devid, nonrep_did_listp->rdid,
+ devid_sz);
/*
* Also need to store the 'other' devid
*/
- o_devid_sz = devid_sizeof((ddi_devid_t)devid);
+ o_devid_sz = devid_sizeof((ddi_devid_t)(nonrep_did_listp->did));
midp->mid_o_devid = (void *)Malloc(o_devid_sz);
- (void) memcpy(midp->mid_o_devid, devid, o_devid_sz);
+ (void) memcpy(midp->mid_o_devid, nonrep_did_listp->did,
+ o_devid_sz);
midp->mid_o_devid_sz = o_devid_sz;
} else {
+ devid_sz = devid_sizeof(nonrep_did_listp->did);
+ midp->mid_devid = (void *)Malloc(devid_sz);
/*
* In the case of regular diskset, midp->mid_o_devid
* will be a NULL pointer
*/
- (void) memcpy(midp->mid_devid, devid, devid_sz);
+ (void) memcpy(midp->mid_devid, nonrep_did_listp->did, devid_sz);
}
- if (devname)
- midp->mid_devname = Strdup(devname);
-
midp->mid_devid_sz = devid_sz;
midp->mid_setcreatetimestamp = timestamp;
- (void) strlcpy(midp->mid_minor_name, minor_name, MDDB_MINOR_NAME_MAX);
+ midp->mid_available = nonrep_did_listp->available;
+ if (nonrep_did_listp->minor_name) {
+ (void) strlcpy(midp->mid_minor_name,
+ nonrep_did_listp->minor_name, MDDB_MINOR_NAME_MAX);
+ }
+ midp->mid_mnum = mnum;
+ if (nonrep_did_listp->driver_name)
+ midp->mid_driver_name = Strdup(nonrep_did_listp->driver_name);
midp->mid_replicas = mirp;
-
+ if (nonrep_did_listp->devname)
+ midp->mid_devname = Strdup(nonrep_did_listp->devname);
return (midp);
}
@@ -187,17 +200,14 @@ static md_im_drive_info_t **
drive_append_wrapper(
md_im_drive_info_t **tailpp,
mddrivename_t *dnp,
- void *devid,
- void *rdevid,
- void *devname,
- int devid_sz,
- char *minor_name,
+ did_list_t *nonrep_did_listp,
+ minor_t mnum,
md_timeval32_t timestamp,
md_im_replica_info_t *mirp
)
{
- (void) drive_append(tailpp, dnp, devid, rdevid, devname, devid_sz,
- minor_name, timestamp, mirp);
+ (void) drive_append(tailpp, dnp, nonrep_did_listp, mnum, timestamp,
+ mirp);
if ((*tailpp)->mid_next == NULL)
return (tailpp);
@@ -302,7 +312,7 @@ map_replica_disk(
* for the disk.
* If you store the returned devid you must create a local copy.
*/
-static void *
+void *
replicated_list_lookup(
uint_t devid_len,
void *old_devid
@@ -374,16 +384,13 @@ get_replica_disks(
did_list_t *did_listp,
mddb_mb_t *mb,
mddb_lb_t *lbp,
- md_error_t *ep,
- int replicated
+ md_error_t *ep
)
{
mddrivename_t *dnp;
int indx, on_list;
mdsetname_t *sp = metasetname(MD_LOCAL_NAME, ep);
int flags;
- int devid_sz;
- char *minor_name;
did_list_t *replica_disk;
daddr32_t offset;
daddr32_t length;
@@ -391,63 +398,68 @@ get_replica_disks(
md_im_replica_info_t **mirpp = NULL;
md_im_drive_info_t **midpp = &misp->mis_drives;
md_im_drive_info_t *midp;
- void *did;
for (indx = 0; indx < lbp->lb_loccnt; indx++) {
on_list = 0;
- if (lbp->lb_locators[indx].l_flags & MDDB_F_ACTIVE) {
+ if ((lbp->lb_locators[indx].l_flags == 0) ||
+ (lbp->lb_locators[indx].l_flags & MDDB_F_DELETED))
+ continue;
- /*
- * search the device id list for a
- * specific ctds based on the locator
- * block device id array index.
- */
- replica_disk = map_replica_disk(did_listp, indx);
+ /*
+ * search the device id list for a
+ * specific ctds based on the locator
+ * block device id array index.
+ */
+ replica_disk = map_replica_disk(did_listp, indx);
- assert(replica_disk != NULL);
+ assert(replica_disk != NULL);
- /*
- * metadrivename() can fail for a slice name
- * if there is not an existing mddrivename_t.
- * So we use metadiskname() to strip the slice
- * number.
- */
- dnp = metadrivename(&sp,
- metadiskname(replica_disk->devname), ep);
+ /*
+ * metadrivename() can fail for a slice name
+ * if there is not an existing mddrivename_t.
+ * So we use metadiskname() to strip the slice
+ * number.
+ */
+ dnp = metadrivename(&sp, metadiskname(replica_disk->devname),
+ ep);
- for (midp = misp->mis_drives; midp != NULL;
- midp = midp->mid_next) {
- if (dnp == midp->mid_dnp) {
+ for (midp = misp->mis_drives; midp != NULL;
+ midp = midp->mid_next) {
+ if (dnp == midp->mid_dnp) {
+ /*
+ * You could get a dnp match, but if 1 disk
+ * is unavailable and the other isn't, they
+ * will have the same dnp due
+ * to the name being the same, but in fact
+ * are different disks.
+ */
+ if (midp->mid_available ==
+ replica_disk->available) {
on_list = 1;
mirpp = &midp->mid_replicas;
break;
}
}
+ }
- /*
- * Get the correct devid_sz
- */
- if (replicated)
- did = replica_disk->rdid;
- else
- did = replica_disk->did;
+ /*
+ * New on the list so add it
+ */
+ if (!on_list) {
+ mddb_mb_t *mbp;
+ uint_t sliceno;
+ mdname_t *rsp;
+ int fd = -1;
- devid_sz = devid_sizeof((ddi_devid_t)did);
- minor_name = replica_disk->minor_name;
+ mbp = Malloc(DEV_BSIZE);
/*
- * New on the list so add it
+ * If the disk isn't available, we don't
+ * want to try to read from it.
*/
- if (!on_list) {
- mddb_mb_t *mbp;
- uint_t sliceno;
- mdname_t *rsp;
- int fd = -1;
-
- mbp = Malloc(DEV_BSIZE);
-
+ if (replica_disk->available == MD_IM_DISK_AVAILABLE) {
/* determine the replica slice */
if (meta_replicaslice(dnp, &sliceno,
ep) != 0) {
@@ -488,54 +500,57 @@ get_replica_disks(
}
(void) close(fd);
- midpp = drive_append_wrapper(midpp, dnp,
- replica_disk->did, replica_disk->rdid,
- replica_disk->devname,
- devid_sz, minor_name, mbp->mb_setcreatetime,
- NULL);
- mirpp = &((*midpp)->mid_replicas);
- Free(mbp);
}
+ midpp = drive_append_wrapper(midpp, dnp,
+ replica_disk,
+ meta_getminor(replica_disk->dev),
+ mbp->mb_setcreatetime, NULL);
+ mirpp = &((*midpp)->mid_replicas);
+ Free(mbp);
+ }
- /*
- * For either of these assertions to fail, it implies
- * a NULL return from metadrivename() above. Since
- * the args came from a presumed valid locator block,
- * that's Bad.
- */
- assert(midpp != NULL);
- assert(mirpp != NULL);
+ /*
+ * For either of these assertions to fail, it implies
+ * a NULL return from metadrivename() above. Since
+ * the args came from a presumed valid locator block,
+ * that's Bad.
+ */
+ assert(midpp != NULL);
+ assert(mirpp != NULL);
- /*
- * Extract the parameters describing this replica.
- *
- * The magic "1" in the length calculation accounts
- * for the length of the master block, in addition to
- * the block count it describes. (The master block
- * will always take up one block on the disk, and
- * there will always only be one master block per
- * replica, even though much of the code is structured
- * to handle noncontiguous replicas.)
- */
- flags = lbp->lb_locators[indx].l_flags;
- offset = lbp->lb_locators[indx].l_blkno;
- length = mb->mb_blkcnt + 1;
- timestamp = mb->mb_setcreatetime;
+ /*
+ * Extract the parameters describing this replica.
+ *
+ * The magic "1" in the length calculation accounts
+ * for the length of the master block, in addition to
+ * the block count it describes. (The master block
+ * will always take up one block on the disk, and
+ * there will always only be one master block per
+ * replica, even though much of the code is structured
+ * to handle noncontiguous replicas.)
+ */
+ flags = lbp->lb_locators[indx].l_flags;
+ offset = lbp->lb_locators[indx].l_blkno;
+ length = mb->mb_blkcnt + 1;
+ timestamp = mb->mb_setcreatetime;
- mirpp = replica_append_wrapper(mirpp, flags,
- offset, length, timestamp);
+ mirpp = replica_append_wrapper(mirpp, flags,
+ offset, length, timestamp);
- /*
- * If we're here it means -
- *
- * a) we had an active copy of the replica, and
- * b) we've added the disk to the list of
- * disks as well.
- *
- * We need to bump up the number of active
- * replica count for each such replica so that it
- * can be used later for replica quorum check.
- */
+ /*
+ * If we're here it means -
+ *
+ * we've added the disk to the list of
+ * disks.
+ */
+
+ /*
+ * We need to bump up the number of active
+ * replica count for each such replica that is
+ * active so that it can be used later for replica
+ * quorum check.
+ */
+ if (flags & MDDB_F_ACTIVE) {
misp->mis_active_replicas++;
}
}
@@ -621,6 +636,8 @@ static void
get_disks_from_didnamespace(
md_im_set_desc_t *misp,
pnm_rec_t **pnm,
+ mddb_rb_t *nm,
+ mddb_rb_t *shrnm,
mddb_rb_t *did_nm,
mddb_rb_t *did_shrnm,
uint_t imp_flags,
@@ -635,14 +652,24 @@ get_disks_from_didnamespace(
mdsetname_t *sp = metasetname(MD_LOCAL_NAME, ep);
mddb_rb_t *rbp_did = did_nm;
mddb_rb_t *rbp_did_shr = did_shrnm;
+ mddb_rb_t *rbp_nm = nm;
+ mddb_rb_t *rbp_shr_nm = shrnm;
int on_list = 0;
- int devid_sz;
struct devid_min_rec *did_rec;
struct devid_shr_rec *did_shr_rec;
+ struct nm_rec *namesp_rec;
+ struct nm_shr_rec *namesp_shr_rec;
struct did_shr_name *did;
struct did_min_name *min;
void *r_did; /* NULL if not a replicated diskset */
void *valid_did;
+ int avail = 0;
+ struct nm_name *nmp;
+ struct nm_shared_name *snmp;
+ mdkey_t drv_key, key, dev_key;
+ minor_t mnum = 0;
+ did_list_t *nonrep_did_listp;
+ size_t used_size, offset;
/*
* We got a pointer to an mddb record, which we expect to contain a
@@ -653,6 +680,10 @@ get_disks_from_didnamespace(
/* LINTED */
did_shr_rec = (struct devid_shr_rec *)
((caddr_t)(&rbp_did_shr->rb_data));
+ /* LINTED */
+ namesp_rec = (struct nm_rec *)((caddr_t)(&rbp_nm->rb_data));
+ /* LINTED */
+ namesp_shr_rec = (struct nm_shr_rec *)((caddr_t)(&rbp_shr_nm->rb_data));
/*
* Skip the nm_rec_hdr and iterate on the array of struct minor_name
@@ -664,9 +695,10 @@ get_disks_from_didnamespace(
on_list = 0;
r_did = NULL;
+ nonrep_did_listp = Zalloc(sizeof (struct did_list));
/*
- * For a give DID_NM key, locate the corresponding device
+ * For a given DID_NM key, locate the corresponding device
* id from DID_NM_SHR
*/
for (did = &did_shr_rec->device_id[0]; did->did_key != 0;
@@ -691,7 +723,7 @@ get_disks_from_didnamespace(
* If replicated diskset
*/
if (replicated) {
- size_t new_devid_len;
+ size_t new_devid_len, old_devid_len;
char *temp;
/*
* In this case, did->did_devid will
@@ -699,56 +731,206 @@ get_disks_from_didnamespace(
*/
temp = replicated_list_lookup(did->did_size,
did->did_devid);
- new_devid_len = devid_sizeof((ddi_devid_t)temp);
- r_did = Zalloc(new_devid_len);
- (void) memcpy(r_did, temp, new_devid_len);
+ if (temp == NULL) {
+ /* we have a partial replicated set, fake it */
+ new_devid_len = did->did_size;
+ r_did = Zalloc(new_devid_len);
+ (void) memcpy(r_did, did->did_devid,
+ new_devid_len);
+ } else {
+ new_devid_len = devid_sizeof((ddi_devid_t)temp);
+ r_did = Zalloc(new_devid_len);
+ (void) memcpy(r_did, temp, new_devid_len);
+ }
valid_did = r_did;
+ nonrep_did_listp->rdid = Zalloc(new_devid_len);
+ (void) memcpy(nonrep_did_listp->rdid, r_did,
+ new_devid_len);
+ old_devid_len =
+ devid_sizeof((ddi_devid_t)did->did_devid);
+ nonrep_did_listp->did = Zalloc(old_devid_len);
+ (void) memcpy((void *)nonrep_did_listp->did,
+ (void *)did->did_devid, old_devid_len);
} else {
+ size_t new_devid_len;
+
valid_did = did->did_devid;
+ new_devid_len =
+ devid_sizeof((ddi_devid_t)did->did_devid);
+ nonrep_did_listp->did = Zalloc(new_devid_len);
+ (void) memcpy((void *)nonrep_did_listp->did,
+ (void *)did->did_devid, new_devid_len);
}
- /* Get the ctds mapping for that device id */
+ /*
+ * Get a ctds mapping for that device id.
+ * Since disk is being imported into this system,
+ * just use the first ctds in list.
+ */
if (meta_deviceid_to_nmlist(search_path,
(ddi_devid_t)valid_did,
&min->min_name[0], &nmlist) == 0) {
+ /*
+ * We know the disk is available. Use the
+ * device information in nmlist.
+ */
+ assert(nmlist[0].devname != NULL);
+ nonrep_did_listp->devname = Strdup(nmlist[0].devname);
+ nonrep_did_listp->available = MD_IM_DISK_AVAILABLE;
+ avail = 0;
+ mnum = meta_getminor(nmlist[0].dev);
+ devid_free_nmlist(nmlist);
+ } else {
+ /*
+ * The disk is not available. That means we need to
+ * use the (old) device information stored in the
+ * namespace.
+ */
+ /* search in nm space for a match */
+ offset = sizeof (struct nm_rec) -
+ sizeof (struct nm_name);
+ used_size = namesp_rec->r_rec_hdr.r_used_size - offset;
+ for (nmp = &namesp_rec->r_name[0]; nmp->n_key != 0;
+ /* LINTED */
+ nmp = (struct nm_name *)((char *)nmp +
+ NAMSIZ(nmp))) {
+ if (nmp->n_key == min->min_key)
+ break;
+ used_size -= NAMSIZ(nmp);
+ if ((int)used_size <= 0) {
+ md_exit(NULL, 1);
+ }
+ }
- assert(nmlist->devname != NULL);
- dnp = metadrivename(&sp,
- metadiskname(nmlist->devname), ep);
+ if (nmp->n_key == 0) {
+ assert(nmp->n_key != 0);
+ md_exit(NULL, 1);
+ }
+ dev_key = nmp->n_dir_key;
+ snmp = &namesp_shr_rec->sr_name[0];
+ key = snmp->sn_key;
/*
- * Add drive to pnm_rec_t list of physical devices for
- * metastat output.
+ * Use the namespace n_dir_key to look in the
+ * shared namespace. When we find the matching
+ * key, that is the devname and minor number we
+ * want.
*/
- if (imp_flags & META_IMP_VERBOSE) {
- append_pnm_rec(pnm, min->min_key,
- nmlist->devname);
+ offset = sizeof (struct nm_shr_rec) -
+ sizeof (struct nm_shared_name);
+ used_size = namesp_shr_rec->sr_rec_hdr.r_used_size -
+ offset;
+ while (key != 0) {
+ if (dev_key == key) {
+ /*
+ * This complicated looking series
+ * of code creates a devname of the
+ * form <sn_name>/<n_name> which
+ * will look like /dev/dsk/c1t4d0s0.
+ */
+ nonrep_did_listp->devname =
+ Zalloc(strlen(nmp->n_name) +
+ strlen(snmp->sn_name) + 2);
+ (void) strlcpy(
+ nonrep_did_listp->devname,
+ snmp->sn_name,
+ strlen(snmp->sn_name));
+ (void) strlcat(
+ nonrep_did_listp->devname, "/",
+ strlen(nmp->n_name) +
+ strlen(snmp->sn_name) + 2);
+ (void) strlcat(
+ nonrep_did_listp->devname,
+ nmp->n_name,
+ strlen(nmp->n_name) +
+ strlen(snmp->sn_name) + 2);
+ mnum = nmp->n_minor;
+ break;
+ }
+ /* LINTED */
+ snmp = (struct nm_shared_name *)((char *)snmp +
+ SHR_NAMSIZ(snmp));
+ key = snmp->sn_key;
+ used_size -= SHR_NAMSIZ(snmp);
+ if ((int)used_size <= 0) {
+ md_exit(NULL, 1);
+ }
+ }
+ if (key == 0) {
+ nonrep_did_listp->devname = NULL;
+ mnum = 0;
}
- assert(dnp != NULL);
- /* Is it already on the list? */
- for (midp = misp->mis_drives; midp != NULL;
- midp = midp->mid_next) {
- if (midp->mid_dnp == dnp) {
- on_list = 1;
+ nonrep_did_listp->available = MD_IM_DISK_NOT_AVAILABLE;
+ nonrep_did_listp->minor_name = Strdup(min->min_name);
+ avail = 1;
+ drv_key = nmp->n_drv_key;
+ snmp = &namesp_shr_rec->sr_name[0];
+ key = snmp->sn_key;
+ /*
+ * Use the namespace n_drv_key to look in the
+ * shared namespace. When we find the matching
+ * key, that is the driver name for the disk.
+ */
+ offset = sizeof (struct nm_shr_rec) -
+ sizeof (struct nm_shared_name);
+ used_size = namesp_shr_rec->sr_rec_hdr.r_used_size -
+ offset;
+ while (key != 0) {
+ if (drv_key == key) {
+ nonrep_did_listp->driver_name =
+ Strdup(snmp->sn_name);
break;
}
+ /* LINTED */
+ snmp = (struct nm_shared_name *)((char *)snmp +
+ SHR_NAMSIZ(snmp));
+ key = snmp->sn_key;
+ used_size -= SHR_NAMSIZ(snmp);
+ if ((int)used_size <= 0) {
+ md_exit(NULL, 1);
+ }
}
+ if (key == 0)
+ nonrep_did_listp->driver_name = NULL;
+ }
+ dnp = metadrivename(&sp,
+ metadiskname(nonrep_did_listp->devname), ep);
+ /*
+ * Add drive to pnm_rec_t list of physical devices for
+ * metastat output.
+ */
+ if (imp_flags & META_IMP_VERBOSE) {
+ append_pnm_rec(pnm, min->min_key,
+ nonrep_did_listp->devname);
+ }
- devid_sz = devid_sizeof(
- (ddi_devid_t)valid_did);
+ assert(dnp != NULL);
+ /* Is it already on the list? */
+ for (midp = misp->mis_drives; midp != NULL;
+ midp = midp->mid_next) {
+ if (midp->mid_dnp == dnp) {
+ if (midp->mid_available ==
+ nonrep_did_listp->available) {
+ on_list = 1;
+ break;
+ }
+ }
+ }
- if (!on_list) {
- mddb_mb_t *mbp;
- uint_t sliceno;
- mdname_t *rsp;
- int fd = -1;
+ if (!on_list) {
+ mddb_mb_t *mbp;
+ uint_t sliceno;
+ mdname_t *rsp;
+ int fd = -1;
- mbp = Malloc(DEV_BSIZE);
+ mbp = Malloc(DEV_BSIZE);
+ if (!avail) {
/* determine the replica slice */
if (meta_replicaslice(dnp, &sliceno,
ep) != 0) {
Free(mbp);
+ free_did_list(nonrep_did_listp);
continue;
}
@@ -759,18 +941,21 @@ get_disks_from_didnamespace(
if (dnp->vtoc.parts[sliceno].size
== 0) {
Free(mbp);
+ free_did_list(nonrep_did_listp);
continue;
}
if ((rsp = metaslicename(dnp, sliceno,
ep)) == NULL) {
Free(mbp);
+ free_did_list(nonrep_did_listp);
continue;
}
if ((fd = open(rsp->rname,
O_RDONLY| O_NDELAY)) < 0) {
Free(mbp);
+ free_did_list(nonrep_did_listp);
continue;
}
@@ -781,26 +966,26 @@ get_disks_from_didnamespace(
DEV_BSIZE) <= 0) {
mdclrerror(ep);
Free(mbp);
- (void) close(fd);
- continue;
+ free_did_list(nonrep_did_listp);
+ (void) close(fd);
+ continue;
}
(void) close(fd);
- /*
- * If it is replicated diskset,
- * r_did will be non-NULL and
- * devid_sz will be its size.
- * Passing the devname as NULL because field
- * is not currently used for a non-replica disk.
- */
- midpp = drive_append_wrapper(midpp,
- dnp, &did->did_devid, r_did, NULL,
- devid_sz, &min->min_name[0],
- mbp->mb_setcreatetime, NULL);
- Free(mbp);
}
- devid_free_nmlist(nmlist);
+ /*
+ * If it is replicated diskset,
+ * r_did will be non-NULL.
+ * Passing the devname as NULL because field
+ * is not currently used for a non-replica disk.
+ */
+ midpp = drive_append_wrapper(midpp,
+ dnp, nonrep_did_listp,
+ mnum, mbp->mb_setcreatetime, NULL);
+ Free(mbp);
+ free_did_list(nonrep_did_listp);
}
+ free_did_list(nonrep_did_listp);
}
}
@@ -821,17 +1006,19 @@ set_append(
mddb_mb_t *mb,
mddb_lb_t *lbp,
mddb_rb_t *nm,
+ mddb_rb_t *shrnm,
pnm_rec_t **pnm,
mddb_rb_t *did_nm,
mddb_rb_t *did_shrnm,
uint_t imp_flags,
- int replicated,
md_error_t *ep
)
{
md_im_set_desc_t *misp;
set_t setno = mb->mb_setno;
+ int partial = imp_flags & MD_IM_PARTIAL_DISKSET;
+ int replicated = imp_flags & MD_IM_SET_REPLICATED;
/* run to end of list */
for (; (*mispp != NULL); mispp = &((*mispp)->mis_next))
@@ -844,12 +1031,13 @@ set_append(
misp->mis_flags = MD_IM_SET_REPLICATED;
misp->mis_oldsetno = setno;
+ misp->mis_partial = partial;
/* Get the disks with and without replicas */
- get_replica_disks(misp, did_listp, mb, lbp, ep, replicated);
+ get_replica_disks(misp, did_listp, mb, lbp, ep);
if (nm != NULL && did_nm != NULL && did_shrnm != NULL) {
- get_disks_from_didnamespace(misp, pnm, did_nm,
+ get_disks_from_didnamespace(misp, pnm, nm, shrnm, did_nm,
did_shrnm, imp_flags, replicated, ep);
}
@@ -1404,8 +1592,8 @@ read_nm_rec(
* ids; the caller of this routine is responsible for free'ing up the memory.
*
* Returns:
- * 1 if it's a replicated disk
- * 0 if it's not a replicated disk
+ * MD_IM_SET_REPLICATED if it's a replicated disk
+ * 0 if it's not a replicated disk
*/
static int
is_replicated(
@@ -1426,7 +1614,7 @@ is_replicated(
return (retval);
if (devid_compare((ddi_devid_t)mbp->mb_devid, current_devid) != 0)
- retval = 1;
+ retval = MD_IM_SET_REPLICATED;
if (retval && need_devid) {
new_devid_len = devid_sizeof(current_devid);
@@ -1474,7 +1662,7 @@ free_replicated_disks_list()
* 1 on success
* 0 on failure
*/
-static int
+int
build_replicated_disks_list(
md_error_t *ep,
mddrivenamelist_t *dnlp
@@ -1522,7 +1710,7 @@ build_replicated_disks_list(
}
(void) close(fd);
}
- replicated_disk_list_built = 1;
+ *replicated_disk_list_built = 1;
Free(mbp);
return (1);
@@ -1553,6 +1741,102 @@ free_did_list(
Free(temp->devname);
if (temp->minor_name)
Free(temp->minor_name);
+ if (temp->driver_name)
+ Free(temp->driver_name);
+ Free(temp);
+ }
+}
+
+/*
+ * meta_free_im_replica_info
+ *
+ * Frees the md_im_replica_info list
+ */
+static void
+meta_free_im_replica_info(
+ md_im_replica_info_t *mirp
+)
+{
+ md_im_replica_info_t *r, *temp;
+
+ r = mirp;
+
+ while (r != NULL) {
+ temp = r;
+ r = r->mir_next;
+
+ Free(temp);
+ }
+}
+
+/*
+ * meta_free_im_drive_info
+ *
+ * Frees the md_im_drive_info list
+ */
+static void
+meta_free_im_drive_info(
+ md_im_drive_info_t *midp
+)
+{
+ md_im_drive_info_t *d, *temp;
+
+ d = midp;
+
+ while (d != NULL) {
+ temp = d;
+ d = d->mid_next;
+
+ if (temp->mid_available & MD_IM_DISK_NOT_AVAILABLE)
+ /*
+ * dnp is not on the drivenamelist and is a temp
+ * dnp for metaimport if the disk is unavailable.
+ * We need to specifically free it because of this.
+ * If the disk is available, standard drivelist freeing
+ * will kick in so we don't need to do it.
+ */
+ metafreedrivename(temp->mid_dnp);
+ if (temp->mid_devid)
+ Free(temp->mid_devid);
+ if (temp->mid_o_devid)
+ Free(temp->mid_o_devid);
+ if (temp->mid_driver_name)
+ Free(temp->mid_driver_name);
+ if (temp->mid_devname)
+ Free(temp->mid_devname);
+ if (temp->mid_replicas) {
+ meta_free_im_replica_info(temp->mid_replicas);
+ temp->mid_replicas = NULL;
+ }
+ if (temp->overlap) {
+ meta_free_im_drive_info(temp->overlap);
+ temp->overlap = NULL;
+ }
+ Free(temp);
+ }
+}
+
+/*
+ * meta_free_im_set_desc
+ *
+ * Frees the md_im_set_desc_t list
+ */
+void
+meta_free_im_set_desc(
+ md_im_set_desc_t *misp
+)
+{
+ md_im_set_desc_t *s, *temp;
+
+ s = misp;
+
+ while (s != NULL) {
+ temp = s;
+ s = s->mis_next;
+ if (temp->mis_drives) {
+ meta_free_im_drive_info(temp->mis_drives);
+ temp->mis_drives = NULL;
+ }
Free(temp);
}
}
@@ -1577,7 +1861,9 @@ build_did_list(
md_error_t *ep,
int fd,
mddb_mb_t *mb,
+ mddb_lb_t *lbp,
mddb_did_blk_t *lbdidp,
+ mddb_ln_t *lnp,
did_list_t **did_listp,
int replicated
)
@@ -1593,8 +1879,11 @@ build_did_list(
mddb_did_info_t *did_info = NULL;
void *did = NULL;
size_t new_devid_len;
+ int partial = 0;
+ int partial_replicated = 0;
for (cnt = 0; cnt < MDDB_NLB; cnt++) {
+ partial_replicated = 0;
did_info = &lbdidp->blk_info[cnt];
if (!(did_info->info_flags & MDDB_DID_EXISTS))
@@ -1604,7 +1893,7 @@ build_did_list(
new->did = Zalloc(did_info->info_length);
/*
- * If we can re-use the buffer already has been
+ * If we can re-use the buffer that has already been
* read in then just use it. Otherwise free
* the previous one and alloc a new one
*/
@@ -1646,10 +1935,19 @@ build_did_list(
if (replicated) {
temp = replicated_list_lookup(did_info->info_length,
new->did);
- new_devid_len = devid_sizeof((ddi_devid_t)temp);
- new->rdid = Zalloc(new_devid_len);
- (void) memcpy(new->rdid, temp, new_devid_len);
- did = new->rdid;
+ if (temp == NULL) {
+ /* we have a partial replicated set, fake it */
+ new_devid_len = devid_sizeof((ddi_devid_t)new->did);
+ new->rdid = Zalloc(new_devid_len);
+ (void) memcpy(new->rdid, new->did, new_devid_len);
+ did = new->rdid;
+ partial_replicated = 1;
+ } else {
+ new_devid_len = devid_sizeof((ddi_devid_t)temp);
+ new->rdid = Zalloc(new_devid_len);
+ (void) memcpy(new->rdid, temp, new_devid_len);
+ did = new->rdid;
+ }
} else {
did = new->did;
}
@@ -1658,20 +1956,42 @@ build_did_list(
return (-1);
}
- if ((rval = meta_deviceid_to_nmlist(search_path,
- (ddi_devid_t)did, minor_name, &nm)) != 0) {
- *did_listp = head;
- free_did_list(*did_listp);
- *did_listp = NULL;
- (void) mddserror(ep, MDE_DS_PARTIALSET, MD_SET_BAD,
- mynode(), NULL, NULL);
- return (ENOTSUP);
+ if (partial_replicated || meta_deviceid_to_nmlist(search_path,
+ (ddi_devid_t)did, minor_name, &nm) != 0) {
+ int len = 0;
+
+ /*
+ * Partial diskset case. We'll need to get the
+ * device information from the metadb instead
+ * of the output (nm) of meta_deviceid_to_nmlist.
+ */
+ len = strlen(lnp->ln_prefixes[0].pre_data) +
+ strlen(lnp->ln_suffixes[0][cnt].suf_data) + 2;
+ new->devname = Zalloc(len);
+ (void) strlcpy(new->devname,
+ lnp->ln_prefixes[0].pre_data,
+ strlen(lnp->ln_prefixes[0].pre_data) + 1);
+ (void) strlcat(new->devname, "/", len);
+ (void) strlcat(new->devname,
+ lnp->ln_suffixes[0][cnt].suf_data, len);
+ new->minor_name = Strdup(minor_name);
+ new->next = head;
+ new->available = MD_IM_DISK_NOT_AVAILABLE;
+ new->driver_name = Strdup(lbp->lb_drvnm[0].dn_data);
+ new->dev = lbp->lb_locators[cnt].l_dev;
+ head = new;
+ partial = ENOTSUP;
+ continue;
}
+ /*
+ * Disk is there. Grab device information from nm structure.
+ */
assert(nm->devname != NULL);
new->devname = Strdup(nm->devname);
new->dev = nm->dev;
new->minor_name = Strdup(minor_name);
+ new->available = MD_IM_DISK_AVAILABLE;
devid_free_nmlist(nm);
@@ -1683,6 +2003,8 @@ build_did_list(
if (bp)
Free(bp);
*did_listp = head;
+ if (partial)
+ return (partial);
return (1);
}
/*
@@ -1698,7 +2020,6 @@ build_did_list(
*/
static int
check_nm_disks(
- md_error_t *ep,
struct devid_min_rec *did_nmp,
struct devid_shr_rec *did_shrnmp
)
@@ -1751,8 +2072,7 @@ check_nm_disks(
*/
if ((meta_deviceid_to_nmlist(search_path,
did, minor_name, &nm)) != 0) {
- (void) mddserror(ep, MDE_DS_PARTIALSET, MD_SET_BAD,
- mynode(), NULL, NULL);
+ /* Partial diskset detected */
return (ENOTSUP);
}
devid_free_nmlist(nm);
@@ -1828,6 +2148,86 @@ report_metadb_info(
(void) printf("\n");
}
+/*
+ * meta_replica_quorum will determine if the disks in the set to be
+ * imported have enough valid replicas to have quorum.
+ *
+ * RETURN:
+ * -1 Set doesn't have quorum
+ * 0 Set does have quorum
+ */
+int
+meta_replica_quorum(
+ md_im_set_desc_t *misp
+)
+{
+ md_im_drive_info_t *midp;
+ md_im_replica_info_t *midr;
+ int replica_count = 0;
+
+ for (midp = misp->mis_drives; midp != NULL;
+ midp = midp->mid_next) {
+
+ if (midp->mid_available == MD_IM_DISK_NOT_AVAILABLE)
+ continue;
+
+ /*
+ * The drive is okay. Now count its replicas
+ */
+ for (midr = midp->mid_replicas; midr != NULL;
+ midr = midr->mir_next) {
+ replica_count++;
+ }
+ }
+
+ if (misp->mis_active_replicas & 1) {
+ /* odd number of replicas */
+ if (replica_count < (misp->mis_active_replicas + 1)/2)
+ return (-1);
+ } else {
+ /* even number of replicas */
+ if (replica_count <= ((misp->mis_active_replicas + 1)/2))
+ return (-1);
+ }
+
+ return (0);
+}
+
+
+/*
+ * Choose the best drive to use for the metaimport command.
+ */
+md_im_drive_info_t *
+pick_good_disk(md_im_set_desc_t *misp)
+{
+ md_timeval32_t *setcrtime; /* set creation time */
+ md_im_drive_info_t *good_disk = NULL;
+ md_im_drive_info_t *midp = NULL;
+ md_im_replica_info_t *mirp;
+
+ setcrtime = &(misp->mis_drives->mid_replicas->mir_timestamp);
+ for (midp = misp->mis_drives; (midp != NULL) && (good_disk == NULL);
+ midp = midp->mid_next) {
+ /* drive must be available */
+ if (midp->mid_available == MD_IM_DISK_NOT_AVAILABLE) {
+ continue;
+ }
+ for (mirp = midp->mid_replicas; mirp != NULL;
+ mirp = mirp->mir_next) {
+ /* replica must be active to be a good one */
+ if (mirp->mir_flags & MDDB_F_ACTIVE) {
+ if ((setcrtime->tv_sec ==
+ midp-> mid_setcreatetimestamp.tv_sec) &&
+ (setcrtime->tv_usec ==
+ midp->mid_setcreatetimestamp.tv_usec)) {
+ good_disk = midp;
+ break;
+ }
+ }
+ }
+ }
+ return (good_disk);
+}
/*
* report_set_info()
@@ -1848,22 +2248,21 @@ report_set_info(
int fd,
uint_t imp_flags,
int set_count,
+ int overlap,
+ md_im_drive_info_t *overlap_disks,
md_error_t *ep
)
{
int rval = 0;
md_im_drive_info_t *d;
- md_im_replica_info_t *r;
md_im_drive_info_t *good_disk = NULL;
int i;
int in = META_INDENT;
char indent[MAXPATHLEN];
- int dlen = 0;
- md_timeval32_t firstdisktime;
md_timeval32_t lastaccess; /* stores last modified timestamp */
- int set_contains_time_conflict = 0;
- int disk_time_conflict = 0;
-
+ int has_overlap = 0;
+ int no_quorum = 0;
+ int partial = 0;
/* Calculates the correct indentation. */
indent[0] = 0;
@@ -1881,99 +2280,113 @@ report_set_info(
}
}
+ partial = misp->mis_partial;
+ good_disk = pick_good_disk(misp);
+ if (good_disk == NULL) {
+ return (rval);
+ }
+
/*
* Make the distinction between a regular diskset and
- * a replicated diskset.
+ * a replicated diskset. Also make the distinction
+ * between a partial vs. full diskset.
*/
- if (misp->mis_flags & MD_IM_SET_REPLICATED) {
- if (imp_flags & META_IMP_REPORT) {
- (void) printf("%i) %s:\n", set_count, gettext(
- "Found replicated diskset containing disks"));
+ if (partial == MD_IM_PARTIAL_DISKSET) {
+ if (misp->mis_flags & MD_IM_SET_REPLICATED) {
+ if (imp_flags & META_IMP_REPORT) {
+ (void) printf("%i) %s:\n", set_count, gettext(
+ "Found partial replicated diskset "
+ "containing disks"));
+ } else {
+ (void) printf("\n%s:\n", gettext(
+ "Importing partial replicated diskset "
+ "containing disks"));
+ }
} else {
- (void) printf("\n%s:\n", gettext(
- "Importing replicated diskset containing disks"));
+ if (imp_flags & META_IMP_REPORT) {
+ (void) printf("%i) %s:\n", set_count, gettext(
+ "Found partial regular diskset containing "
+ "disks"));
+ } else {
+ (void) printf("\n%s:\n", gettext(
+ "Importing partial regular diskset "
+ "containing disks"));
+ }
}
} else {
- if (imp_flags & META_IMP_REPORT) {
- (void) printf("%i) %s:\n", set_count, gettext(
- "Found regular diskset containing disks"));
+ if (misp->mis_flags & MD_IM_SET_REPLICATED) {
+ if (imp_flags & META_IMP_REPORT) {
+ (void) printf("%i) %s:\n", set_count, gettext(
+ "Found replicated diskset containing "
+ "disks"));
+ } else {
+ (void) printf("\n%s:\n", gettext(
+ "Importing replicated diskset containing "
+ "disks"));
+ }
} else {
- (void) printf("\n%s:\n", gettext(
- "Importing regular diskset containing disks"));
+ if (imp_flags & META_IMP_REPORT) {
+ (void) printf("%i) %s:\n", set_count, gettext(
+ "Found regular diskset containing disks"));
+ } else {
+ (void) printf("\n%s:\n", gettext(
+ "Importing regular diskset containing "
+ "disks"));
+ }
}
}
-
/*
- * Save the set creation time for the first disk in the
- * diskset.
+ * Check each drive in the set. If it's unavailable or
+ * an overlap tell the user.
*/
for (d = misp->mis_drives; d != NULL; d = d->mid_next) {
- dlen = max(dlen, strlen(d->mid_dnp->cname));
- if (good_disk == NULL) {
- for (r = d->mid_replicas; r != NULL; r = r->mir_next) {
- if (r->mir_flags & MDDB_F_ACTIVE) {
- good_disk = d;
- firstdisktime =
- d->mid_setcreatetimestamp;
+ (void) fprintf(stdout, " %s", d->mid_dnp->cname);
+ if (d->mid_available == MD_IM_DISK_NOT_AVAILABLE) {
+ (void) fprintf(stdout, " (UNAVAIL)");
+ }
+ if (overlap) {
+ md_im_drive_info_t **chain;
+ /*
+ * There is the potential for an overlap, see if
+ * this disk is one of the overlapped disks.
+ */
+ for (chain = &overlap_disks; *chain != NULL;
+ chain = &(*chain)->overlap) {
+ if (strcmp(d->mid_dnp->cname,
+ (*chain)->mid_dnp->cname) == 0) {
+ (void) fprintf(stdout, " (CONFLICT)");
+ has_overlap = 1;
break;
}
}
- } else {
- break;
}
+ (void) fprintf(stdout, "\n");
}
-
/*
- * Compares the set creation time from the first disk in the
- * diskset to the diskset creation time on all other
- * disks in the diskset.
- * If they are different then the disk probably belongs to a
- * different diskset so we will print out a warning.
- *
- * Looping through all drives in the diskset to print
- * out information about the drive.
+ * This note explains the (UNAVAIL) that appears next to the
+ * disks in the diskset that are not available.
*/
- for (d = misp->mis_drives; d != NULL; disk_time_conflict = 0,
- d = d->mid_next) {
- /*
- * Verify that the disk's seconds and micro-seconds fields
- * match the fields for the good_disk.
- */
- if ((firstdisktime.tv_sec !=
- d->mid_setcreatetimestamp.tv_sec) ||
- (firstdisktime.tv_usec !=
- d->mid_setcreatetimestamp.tv_usec)) {
- disk_time_conflict = 1;
- set_contains_time_conflict = 1;
- }
-
- /* Printing disk names. */
- if (disk_time_conflict == 1) {
- /* print '*' next to conflicting disk */
- (void) printf("%s%-*.*s *\n", indent,
- dlen, dlen, d->mid_dnp->cname);
- } else {
- (void) printf("%s%-*.*s\n", indent,
- dlen, dlen, d->mid_dnp->cname);
- }
+ if (partial) {
+ (void) printf("%s%s\n%s%s\n\n", indent,
+ gettext("(UNAVAIL) WARNING: This disk is unavailable on"
+ " this system."), indent, gettext("Import may corrupt "
+ "data in the diskset."));
}
- (void) printf("\n");
/*
- * This note explains the "*" that appears next to the
- * disks with metadbs' whose lb_inittime timestamp does not
+ * This note explains the (CONFLICT) that appears next to the
+ * disks whose lb_inittime timestamp does not
* match the rest of the diskset.
*/
- if (set_contains_time_conflict) {
+ if (has_overlap) {
(void) printf("%s%s\n%s%s\n\n", indent,
- gettext("* WARNING: This disk has been reused in "
- "another diskset."), indent, gettext("Import may corrupt "
- "data in the diskset."));
+ gettext("(CONFLICT) WARNING: This disk has been reused in "
+ "another diskset or system configuration."), indent,
+ gettext("Import may corrupt data in the diskset."));
}
-
/*
* If the verbose flag was given on the command line,
* we will print out the metastat -c information , the
@@ -2039,6 +2452,10 @@ report_set_info(
gettext("For more information about this diskset"),
indent, myname, good_disk->mid_dnp->cname);
}
+
+ if (meta_replica_quorum(misp) != 0)
+ no_quorum = 1;
+
/*
* TRANSLATION_NOTE
*
@@ -2047,9 +2464,15 @@ report_set_info(
* (untranslatable) that the user may use to import
* the specified diskset.
*/
- (void) printf("%s%s:\n%s %s -s <newsetname> %s\n", indent,
- gettext("To import this diskset"), indent, myname,
- good_disk->mid_dnp->cname);
+ if (partial || has_overlap || no_quorum) {
+ (void) printf("%s%s:\n%s %s -f -s <newsetname> %s\n",
+ indent, gettext("To import this diskset"), indent,
+ myname, good_disk->mid_dnp->cname);
+ } else {
+ (void) printf("%s%s:\n%s %s -s <newsetname> %s\n",
+ indent, gettext("To import this diskset"), indent,
+ myname, good_disk->mid_dnp->cname);
+ }
}
(void) printf("\n\n");
@@ -2063,12 +2486,12 @@ report_set_info(
* Scans a given drive for set specific information. If the given drive
* has a shared metadb, scans the shared metadb for information pertaining
* to the set.
+ * If imp_flags has META_IMP_PASS1 set don't report.
*
* Returns:
* <0 for failure
* 0 success but no replicas were found
* 1 success and a replica was found
- * ENOTSUP for partial disksets detected
*/
int
meta_get_and_report_set_info(
@@ -2077,6 +2500,8 @@ meta_get_and_report_set_info(
int local_mb_ok,
uint_t imp_flags,
int *set_count,
+ int overlap,
+ md_im_drive_info_t *overlap_disks,
md_error_t *ep
)
{
@@ -2100,13 +2525,15 @@ meta_get_and_report_set_info(
mddrivenamelist_t *dnlp;
mddrivename_t *dnp;
md_im_names_t cnames = { 0, NULL};
- char *nm = NULL;
+ char *nm = NULL, *shrnm = NULL;
char *did_nm = NULL, *did_shrnm = NULL;
struct nm_rec *nmp;
+ struct nm_shr_rec *snmp;
struct devid_shr_rec *did_shrnmp;
struct devid_min_rec *did_nmp;
int extended_namespace = 0;
int replicated = 0;
+ int partial = 0;
pnm_rec_t *pnm = NULL; /* list of physical devs in set */
md_im_set_desc_t *misp;
@@ -2198,7 +2625,18 @@ meta_get_and_report_set_info(
* the locator block are invalid and we need to build a list of
* replicated disks.
*/
- if (replicated && !replicated_disk_list_built) {
+ if (imp_flags & META_IMP_PASS1) {
+ /*
+ * We need to do this for both passes but
+ * replicated_disk_list_built is global so we need some way
+ * to determine which pass we're on. Set it to the appropriate
+ * pass's flag.
+ */
+ replicated_disk_list_built = &replicated_disk_list_built_pass1;
+ } else {
+ replicated_disk_list_built = &replicated_disk_list_built_pass2;
+ }
+ if (replicated && !(*replicated_disk_list_built)) {
/*
* if there's a replicated diskset involved, we need to
* scan the system one more time and build a list of all
@@ -2214,11 +2652,6 @@ meta_get_and_report_set_info(
goto out;
}
- rval = build_did_list(ep, fd, mbp, lbdidp, &did_listp, replicated);
-
- if ((rval <= 0) || (rval == ENOTSUP))
- goto out;
-
/*
* Until here, we've gotten away with fixed sizes for the
* master block and locator block. The locator names,
@@ -2231,6 +2664,20 @@ meta_get_and_report_set_info(
if ((rval = read_locator_names(ep, fd, mbp, lbp, lnp, lnsize)) <= 0)
goto out;
+ rval = build_did_list(ep, fd, mbp, lbp, lbdidp, lnp, &did_listp,
+ replicated);
+
+ /*
+ * An rval of ENOTSUP means we have a partial diskset. We'll want
+ * to set the partial variable so we can pass this information
+ * set_append_wrapper later for placing on the misp list.
+ */
+ if (rval == ENOTSUP)
+ partial = MD_IM_PARTIAL_DISKSET;
+
+ if (rval < 0)
+ goto out;
+
/*
* Read in the NM record
* If no NM record was found, it still is a valid configuration
@@ -2260,6 +2707,20 @@ meta_get_and_report_set_info(
goto out;
}
+ if ((rval = read_nm_rec(ep, fd, mbp, lbp, &shrnm, MDDB_SHR_NM,
+ rsp->cname)) < 0)
+ goto out;
+ else if (rval == 0)
+ goto append;
+
+ /*LINTED*/
+ snmp = (struct nm_shr_rec *)(shrnm + sizeof (mddb_rb_t));
+ if (snmp->sr_rec_hdr.r_next_recid != (mddb_recid_t)0) {
+ extended_namespace = 1;
+ rval = 0;
+ goto out;
+ }
+
if ((rval = read_nm_rec(ep, fd, mbp, lbp, &did_nm,
MDDB_DID_NM, rsp->cname)) < 0)
goto out;
@@ -2295,23 +2756,36 @@ meta_get_and_report_set_info(
* are actually available. If they aren't we'll return with
* an ENOTSUP error which indicates a partial diskset.
*/
- rval = check_nm_disks(ep, did_nmp, did_shrnmp);
- if ((rval < 0) || (rval == ENOTSUP))
+ rval = check_nm_disks(did_nmp, did_shrnmp);
+
+ /*
+ * An rval of ENOTSUP means we have a partial diskset. We'll want
+ * to set the partial variable so we can pass this information
+ * to set_append_wrapper later for placing on the misp list.
+ */
+ if (rval == ENOTSUP)
+ partial = MD_IM_PARTIAL_DISKSET;
+
+ if (rval < 0)
goto out;
append:
/* Finally, we've got what we need to process this replica. */
misp = set_append(mispp, did_listp, mbp, lbp,
/*LINTED*/
- (mddb_rb_t *)nm, &pnm, (mddb_rb_t *)did_nm, (mddb_rb_t *)did_shrnm,
- imp_flags, replicated, ep);
-
- *set_count += 1;
- rval = report_set_info(misp, mbp, lbp,
- /*LINTED*/
- (mddb_rb_t *)nm, &pnm, rsp, fd, imp_flags, *set_count, ep);
- if (rval < 0)
- goto out;
+ (mddb_rb_t *)nm, (mddb_rb_t *)shrnm, &pnm, (mddb_rb_t *)did_nm,
+ /*LINTED*/
+ (mddb_rb_t *)did_shrnm, (imp_flags | partial | replicated), ep);
+
+ if (!(imp_flags & META_IMP_PASS1)) {
+ *set_count += 1;
+ rval = report_set_info(misp, mbp, lbp,
+ /*LINTED*/
+ (mddb_rb_t *)nm, &pnm, rsp, fd, imp_flags, *set_count,
+ overlap, overlap_disks, ep);
+ if (rval < 0)
+ goto out;
+ }
/* Return the fact that we found at least one set */
rval = 1;
@@ -2376,48 +2850,563 @@ meta_getminor_name(
return (ret_minor_name);
}
-static int
-meta_replica_quorum(
- md_im_set_desc_t *misp,
- md_error_t *ep
+/*
+ * meta_update_mb_did
+ *
+ * Update or create the master block with the new set number.
+ * If a non-null devid pointer is given, the devid in the
+ * master block will also be changed.
+ *
+ * This routine is called during the import of a diskset
+ * (meta_imp_update_mb) and during the take of a diskset that has
+ * some unresolved replicated drives (meta_unrslv_replicated_mb).
+ *
+ * Returns : nothing (void)
+ */
+static void
+meta_update_mb_did(
+ mdsetname_t *sp,
+ mddrivename_t *dnp, /* raw name of drive with mb */
+ void *new_devid, /* devid to be stored in mb */
+ int new_devid_len,
+ void *old_devid, /* old devid stored in mb */
+ int replica_present, /* does replica follow mb? */
+ int offset,
+ md_error_t *ep
)
{
+ int fd;
+ struct mddb_mb *mbp;
+ uint_t sliceno;
+ mdname_t *rsp;
+
+ /* determine the replica slice */
+ if (meta_replicaslice(dnp, &sliceno, ep) != 0) {
+ return;
+ }
+
+ /*
+ * if the replica slice size is zero,
+ * don't bother opening
+ */
+ if (dnp->vtoc.parts[sliceno].size == 0) {
+ return;
+ }
+
+ if ((rsp = metaslicename(dnp, sliceno, ep)) == NULL) {
+ return;
+ }
+
+ if ((fd = open(rsp->rname, O_RDWR | O_NDELAY)) < 0) {
+ return;
+ }
+
+ if (lseek(fd, (off_t)dbtob(offset), SEEK_SET) < 0)
+ return;
+
+ mbp = Zalloc(DEV_BSIZE);
+ if (read(fd, mbp, DEV_BSIZE) != DEV_BSIZE) {
+ Free(mbp);
+ return;
+ }
+
+ /* If no replica on disk, check for dummy mb */
+ if (replica_present == NULL) {
+ /*
+ * Check to see if there is a dummy there. If not
+ * create one. This would happen if the set was
+ * created before the master block dummy code was
+ * implemented.
+ */
+ if ((mbp->mb_magic != MDDB_MAGIC_DU) ||
+ (mbp->mb_revision != MDDB_REV_MB)) {
+ meta_mkdummymaster(sp, fd, offset);
+ Free(mbp);
+ return;
+ }
+ }
+
+ mbp->mb_setno = sp->setno;
+ if (meta_gettimeofday(&mbp->mb_timestamp) == -1) {
+ Free(mbp);
+ return;
+ }
+
+ /*
+ * If a old_devid is non-NULL then we're are dealing with a
+ * replicated diskset and the devid needs to be updated.
+ */
+ if (old_devid) {
+ if (mbp->mb_devid_magic == MDDB_MAGIC_DE) {
+ if (mbp->mb_devid_len)
+ (void) memset(mbp->mb_devid, 0,
+ mbp->mb_devid_len);
+ (void) memcpy(mbp->mb_devid,
+ (char *)new_devid, new_devid_len);
+ mbp->mb_devid_len = new_devid_len;
+ }
+ }
+
+ crcgen((uchar_t *)mbp, (uint_t *)&mbp->mb_checksum,
+ (uint_t)DEV_BSIZE, (crc_skip_t *)NULL);
+
+ /*
+ * Now write out the changes to disk.
+ * If an error occurs, just continue on.
+ * Next take of set will register this drive as
+ * an unresolved replicated drive and will attempt
+ * to fix the master block again.
+ */
+ if (lseek(fd, (off_t)dbtob(offset), SEEK_SET) < 0) {
+ Free(mbp);
+ return;
+ }
+ if (write(fd, mbp, DEV_BSIZE) != DEV_BSIZE) {
+ Free(mbp);
+ return;
+ }
+
+ Free(mbp);
+ (void) close(fd);
+}
+
+
+/*
+ * meta_imp_update_mb
+ *
+ * Update the master block information during an import.
+ * Takes an import set descriptor.
+ *
+ * Returns : nothing (void)
+ */
+void
+meta_imp_update_mb(mdsetname_t *sp, md_im_set_desc_t *misp, md_error_t *ep)
+{
md_im_drive_info_t *midp;
mddrivename_t *dnp;
- md_im_replica_info_t *midr;
- mdname_t *np;
- struct stat st_buf;
- uint_t rep_slice;
- int replica_count = 0;
+ int offset = 16; /* default mb offset is 16 */
- for (midp = misp->mis_drives; midp != NULL;
- midp = midp->mid_next) {
+ for (midp = misp->mis_drives; midp != NULL; midp = midp->mid_next) {
+ /*
+ * If disk isn't available we can't update, so go to next
+ */
+ if (midp->mid_available == MD_IM_DISK_NOT_AVAILABLE) {
+ continue;
+ }
dnp = midp->mid_dnp;
- if ((meta_replicaslice(dnp, &rep_slice, ep) != 0) ||
- ((np = metaslicename(dnp, rep_slice, ep))
+ if (midp->mid_replicas) {
+ md_im_replica_info_t *mirp;
+
+ /*
+ * If we have replicas on this disk we need to make
+ * sure that we update the master block on every
+ * replica on the disk.
+ */
+ for (mirp = midp->mid_replicas; mirp != NULL;
+ mirp = mirp->mir_next) {
+ offset = mirp->mir_offset;
+ meta_update_mb_did(sp, dnp, midp->mid_devid,
+ midp->mid_devid_sz, midp->mid_o_devid,
+ 1, offset, ep);
+ }
+ } else {
+ /* No replicas, just update the one dummy mb */
+ meta_update_mb_did(sp, dnp, midp->mid_devid,
+ midp->mid_devid_sz, midp->mid_o_devid,
+ 0, offset, ep);
+ }
+ if (!mdisok(ep))
+ return;
+ }
+}
+
+/*
+ * meta_unrslv_replicated_common
+ *
+ * Given a drive_desc and a drivenamelist pointer,
+ * return the devidp associated with the drive_desc,
+ * the replicated (new) devidp associated with the drive_desc
+ * and the specific mddrivename in the drivenamelist that
+ * matches the replicated (new) devidp.
+ *
+ * Typically the drivenamelist pointer would be setup by
+ * the meta_prune_cnames function.
+ *
+ * Calling function must free devidp using devid_free.
+ *
+ * Returns 0 - success, found new_devidp and dnp_new.
+ * Returns 1 - failure, didn't find new devid info
+ */
+static int
+meta_unrslv_replicated_common(
+ int myside,
+ md_drive_desc *dd, /* drive list for diskset */
+ mddrivenamelist_t *dnlp, /* list of drives on current system */
+ ddi_devid_t *devidp, /* old devid */
+ ddi_devid_t *new_devidp, /* replicated (new) devid */
+ mddrivename_t **dnp_new, /* replicated drive name */
+ md_error_t *ep
+)
+{
+ mddrivename_t *dnp; /* drive name of old drive */
+ mdsidenames_t *sn = NULL;
+ uint_t rep_slice;
+ mdname_t *np;
+ char *minor_name = NULL;
+ char *devid_str = NULL;
+ size_t len;
+ int devid_sz;
+ mddrivenamelist_t *dp;
+ ddi_devid_t old_devid; /* devid of old drive */
+ ddi_devid_t new_devid; /* devid of new replicated drive */
+ ddi_devid_t dnp_new_devid; /* devid derived from drive */
+ /* name of replicated drive */
+
+ dnp = dd->dd_dnp;
+
+ /* Get old devid from drive record */
+ (void) devid_str_decode(dd->dd_dnp->devid,
+ &old_devid, NULL);
+
+ /* Look up replicated (new) devid */
+ new_devid = replicated_list_lookup(
+ devid_sizeof(old_devid), old_devid);
+
+ devid_free(old_devid);
+
+ if (new_devid == NULL)
+ return (1);
+
+ /*
+ * Using new_devid, find a drivename entry with a matching devid.
+ * Use the passed in dnlp since it has the new (replicated) disknames
+ * in it.
+ */
+ for (dp = dnlp; dp != NULL; dp = dp->next) {
+ (void) devid_str_decode(dp->drivenamep->devid,
+ &dnp_new_devid, NULL);
+
+ if (dnp_new_devid == NULL)
+ continue;
+
+ if (devid_compare(new_devid, dnp_new_devid) == 0) {
+ devid_free(dnp_new_devid);
+ break;
+ }
+ devid_free(dnp_new_devid);
+ }
+
+ /* If can't find new name for drive - nothing to update */
+ if (dp == NULL)
+ return (1);
+
+ /*
+ * Setup returned value to be the drivename structure associated
+ * with new (replicated) drive.
+ */
+ *dnp_new = dp->drivenamep;
+
+ /*
+ * Need to return the new devid including the minor name.
+ * Find the minor_name here using the sidename or by
+ * looking in the namespace.
+ */
+ for (sn = dnp->side_names; sn != NULL; sn = sn->next) {
+ if (sn->sideno == myside)
+ break;
+ }
+
+ /*
+ * The disk has no side name information
+ */
+ if (sn == NULL) {
+ if ((meta_replicaslice(*dnp_new, &rep_slice, ep) != 0) ||
+ ((np = metaslicename(*dnp_new, rep_slice, ep))
== NULL)) {
mdclrerror(ep);
- continue;
+ return (1);
}
- if (stat(np->bname, &st_buf) != 0)
+ if (np->dev == NODEV64)
+ return (1);
+
+ /*
+ * minor_name will be NULL if dnp->devid == NULL
+ * - see metagetvtoc()
+ */
+ if (np->minor_name == NULL)
+ return (1);
+ else
+ minor_name = Strdup(np->minor_name);
+
+ } else {
+ minor_name = meta_getdidminorbykey(
+ MD_LOCAL_SET, sn->sideno + SKEW,
+ dnp->side_names_key, ep);
+ if (!mdisok(ep))
+ return (1);
+ }
+ /*
+ * Now, use the old devid with minor name to lookup
+ * the replicated (new) devid that will also contain
+ * a minor name.
+ */
+ len = strlen(dnp->devid) + strlen(minor_name) + 2;
+ devid_str = (char *)Malloc(len);
+ (void) snprintf(devid_str, len, "%s/%s", dnp->devid,
+ minor_name);
+ (void) devid_str_decode(devid_str, devidp, NULL);
+ Free(devid_str);
+ devid_sz = devid_sizeof((ddi_devid_t)*devidp);
+ *new_devidp = replicated_list_lookup(devid_sz, *devidp);
+ return (0);
+}
+
+/*
+ * meta_unrslv_replicated_mb
+ *
+ * Update the master block information during a take.
+ * Takes an md_drive_desc descriptor.
+ *
+ * Returns : nothing (void)
+ */
+void
+meta_unrslv_replicated_mb(
+ mdsetname_t *sp,
+ md_drive_desc *dd, /* drive list for diskset */
+ mddrivenamelist_t *dnlp, /* list of drives on current system */
+ md_error_t *ep
+)
+{
+ md_drive_desc *d = NULL, *d_save;
+ mddrivename_t *dnp; /* dnp of old drive */
+ mddrivename_t *dnp_new; /* dnp of new (replicated) drive */
+ mddrivename_t *dnp_save; /* saved copy needed to restore */
+ ddi_devid_t devidp, new_devidp;
+ int myside;
+
+ if ((myside = getmyside(sp, ep)) == MD_SIDEWILD)
+ return;
+
+ for (d = dd; d != NULL; d = d->dd_next) {
+ dnp = d->dd_dnp;
+ if (dnp == NULL)
+ continue;
+
+ /* If don't need to update master block - skip it. */
+ if (!(d->dd_flags & MD_DR_FIX_MB_DID))
continue;
/*
- * The drive is okay now count its replicas
+ * Get old and replicated (new) devids associated with this
+ * drive. Also, get the new (replicated) drivename structure.
*/
- for (midr = midp->mid_replicas; midr != NULL;
- midr = midr->mir_next) {
- replica_count++;
+ if (meta_unrslv_replicated_common(myside, d, dnlp, &devidp,
+ &new_devidp, &dnp_new, ep) != 0) {
+ mdclrerror(ep);
+ continue;
+ }
+
+ if (new_devidp) {
+ int offset = 16; /* default mb offset is 16 */
+ int dbcnt;
+
+ if (d->dd_dbcnt) {
+ /*
+ * Update each master block on the disk
+ */
+ for (dbcnt = d->dd_dbcnt; dbcnt != 0; dbcnt--) {
+ meta_update_mb_did(sp, dnp_new,
+ new_devidp,
+ devid_sizeof(new_devidp), devidp,
+ 1, offset, ep);
+ offset += d->dd_dbsize;
+ }
+ } else {
+ /* update the one dummy mb */
+ meta_update_mb_did(sp, dnp_new, new_devidp,
+ devid_sizeof(new_devidp), devidp,
+ 0, offset, ep);
+ }
+ if (!mdisok(ep)) {
+ devid_free(devidp);
+ return;
+ }
+
+ /* Set drive record flags to ok */
+ /* Just update this one drive record. */
+ d_save = d->dd_next;
+ dnp_save = d->dd_dnp;
+ d->dd_next = NULL;
+ d->dd_dnp = dnp_new;
+ /* Ignore failure since no bad effect. */
+ (void) clnt_upd_dr_flags(mynode(), sp, d,
+ MD_DR_OK, ep);
+ d->dd_next = d_save;
+ d->dd_dnp = dnp_save;
}
+ devid_free(devidp);
}
+}
- if (replica_count < (misp->mis_active_replicas + 1)/2)
- return (-1);
+/*
+ * meta_update_nm_rr_did
+ *
+ * Change a devid stored in the diskset namespace and in the local set
+ * namespace with the new devid.
+ *
+ * This routine is called during the import of a diskset
+ * (meta_imp_update_nn) and during the take of a diskset that has
+ * some unresolved replicated drives (meta_unrslv_replicated_nm).
+ *
+ * Returns : nothing (void)
+ */
+static void
+meta_update_nm_rr_did(
+ mdsetname_t *sp,
+ void *old_devid, /* old devid being replaced */
+ int old_devid_sz,
+ void *new_devid, /* devid to be stored in nm */
+ int new_devid_sz,
+ int import_flag, /* called during import? */
+ md_error_t *ep
+)
+{
+ struct mddb_config c;
- return (0);
+ (void) memset(&c, 0, sizeof (c));
+ c.c_setno = sp->setno;
+
+ /* During import to NOT update the local namespace. */
+ if (import_flag)
+ c.c_flags = MDDB_C_IMPORT;
+
+ c.c_locator.l_devid = (uintptr_t)Malloc(new_devid_sz);
+ (void) memcpy((void *)(uintptr_t)c.c_locator.l_devid,
+ new_devid, new_devid_sz);
+ c.c_locator.l_devid_sz = new_devid_sz;
+ c.c_locator.l_devid_flags =
+ MDDB_DEVID_VALID | MDDB_DEVID_SPACE | MDDB_DEVID_SZ;
+ c.c_locator.l_old_devid = (uint64_t)(uintptr_t)Malloc(old_devid_sz);
+ (void) memcpy((void *)(uintptr_t)c.c_locator.l_old_devid,
+ old_devid, old_devid_sz);
+ c.c_locator.l_old_devid_sz = old_devid_sz;
+ if (metaioctl(MD_IOCUPDATE_NM_RR_DID, &c, &c.c_mde, NULL) != 0) {
+ (void) mdstealerror(ep, &c.c_mde);
+ }
+ Free((void *)(uintptr_t)c.c_locator.l_devid);
+ Free((void *)(uintptr_t)c.c_locator.l_old_devid);
+}
+
+/*
+ * meta_imp_update_nm
+ *
+ * Change a devid stored in the diskset namespace with the new devid.
+ * This routine is called during the import of a remotely replicated diskset.
+ *
+ * Returns : nothing (void)
+ */
+void
+meta_imp_update_nm(mdsetname_t *sp, md_im_set_desc_t *misp, md_error_t *ep)
+{
+ md_im_drive_info_t *midp;
+
+ for (midp = misp->mis_drives; midp != NULL; midp = midp->mid_next) {
+ /*
+ * If disk isn't available we can't update, so go to next
+ */
+ if (midp->mid_available == MD_IM_DISK_NOT_AVAILABLE) {
+ continue;
+ }
+
+ meta_update_nm_rr_did(sp, midp->mid_o_devid,
+ midp->mid_o_devid_sz, midp->mid_devid,
+ midp->mid_devid_sz, 1, ep);
+ if (!mdisok(ep))
+ return;
+ }
+}
+
+/*
+ * meta_unrslv_replicated_nm
+ *
+ * Change a devid stored in the diskset namespace and in the local set
+ * namespace with the new devid.
+ *
+ * This routine is called during the take of a diskset that has
+ * some unresolved replicated drives.
+ *
+ * Returns : nothing (void)
+ */
+void
+meta_unrslv_replicated_nm(
+ mdsetname_t *sp,
+ md_drive_desc *dd, /* drive list for diskset */
+ mddrivenamelist_t *dnlp, /* list of drives on current system */
+ md_error_t *ep
+)
+{
+ md_drive_desc *d = NULL;
+ mddrivename_t *dnp; /* drive name of old drive */
+ mddrivename_t *dnp_new; /* drive name of new (repl) drive */
+ ddi_devid_t devidp, new_devidp;
+ ddi_devid_t old_devid;
+ char *devid_old_save;
+ mdsetname_t *local_sp = NULL;
+ int myside;
+
+ if ((myside = getmyside(sp, ep)) == MD_SIDEWILD)
+ return;
+
+ for (d = dd; d != NULL; d = d->dd_next) {
+ dnp = d->dd_dnp;
+ if (dnp == NULL)
+ continue;
+
+ /* If don't need to update namespace - skip it. */
+ if (!(d->dd_flags & MD_DR_FIX_LB_NM_DID))
+ continue;
+
+ /* Get old devid from drive record */
+ (void) devid_str_decode(d->dd_dnp->devid,
+ &old_devid, NULL);
+
+ /*
+ * Get old and replicated (new) devids associated with this
+ * drive. Also, get the new (replicated) drivename structure.
+ */
+ if (meta_unrslv_replicated_common(myside, d, dnlp, &devidp,
+ &new_devidp, &dnp_new, ep) != 0) {
+ mdclrerror(ep);
+ continue;
+ }
+
+ if (new_devidp) {
+ meta_update_nm_rr_did(sp, devidp,
+ devid_sizeof(devidp), new_devidp,
+ devid_sizeof(new_devidp), 0, ep);
+ if (!mdisok(ep)) {
+ devid_free(devidp);
+ return;
+ }
+ }
+ devid_free(devidp);
+
+ /*
+ * Using the new devid, fix up the name.
+ * If meta_upd_ctdnames fails, the next take will re-resolve
+ * the name from the new devid.
+ */
+ local_sp = metasetname(MD_LOCAL_NAME, ep);
+ devid_old_save = dnp->devid;
+ dnp->devid = dnp_new->devid;
+ (void) meta_upd_ctdnames(&local_sp, 0, (myside + SKEW),
+ dnp, NULL, ep);
+ mdclrerror(ep);
+ dnp->devid = devid_old_save;
+ }
}
static set_t
@@ -2472,9 +3461,17 @@ meta_imp_set(
struct mddb_config c;
mdname_t *np;
md_im_replica_info_t *mirp;
- char setnum_link[MAXPATHLEN];
- char setname_link[MAXPATHLEN];
+ set_t setno;
+ mdcinfo_t *cinfo;
+ mdsetname_t *sp;
+ mddrivenamelist_t *dnlp = NULL;
+ mddrivenamelist_t **dnlpp = &dnlp;
char *minor_name = NULL;
+ int stale_flag = 0;
+ md_set_desc *sd;
+ int partial_replicated_flag = 0;
+ md_error_t xep = mdnullerror;
+ md_setkey_t *cl_sk;
(void) memset(&c, 0, sizeof (c));
(void) strlcpy(c.c_setname, setname, sizeof (c.c_setname));
@@ -2493,45 +3490,99 @@ meta_imp_set(
/*
* Find the next available set number
*/
- if ((c.c_setno = meta_imp_setno(ep)) == MD_SET_BAD) {
+ if ((setno = meta_imp_setno(ep)) == MD_SET_BAD) {
return (mddserror(ep, MDE_DS_SETNOTIMP, MD_SET_BAD,
mynode(), NULL, c.c_setname));
}
+ c.c_setno = setno;
if (meta_gettimeofday(&tp) == -1) {
return (mdsyserror(ep, errno, NULL));
}
c.c_timestamp = tp;
/* Check to see if replica quorum requirement is fulfilled */
- if (!force && meta_replica_quorum(misp, ep) == -1)
- return (mddserror(ep, MDE_DS_INSUFQUORUM, MD_SET_BAD,
- mynode(), NULL, c.c_setname));
+ if (meta_replica_quorum(misp) == -1) {
+ if (!force) {
+ return (mddserror(ep, MDE_DS_INSUFQUORUM, MD_SET_BAD,
+ mynode(), NULL, c.c_setname));
+ } else {
+ stale_flag = MD_IMP_STALE_SET;
+ /*
+ * If we have a stale diskset, the kernel will
+ * delete the replicas on the unavailable disks.
+ * To be consistent, we'll zero out the mirp on those
+ * disks here.
+ */
+ for (midp = misp->mis_drives; midp != NULL;
+ midp = midp->mid_next) {
+ if (midp->mid_available ==
+ MD_IM_DISK_NOT_AVAILABLE) {
+ midp->mid_replicas = NULL;
+ }
+ }
+ }
+ }
for (midp = misp->mis_drives; midp != NULL;
midp = midp->mid_next) {
- mdcinfo_t *cinfo;
+
+ if ((misp->mis_flags & MD_IM_SET_REPLICATED) &&
+ (partial_replicated_flag == 0) &&
+ (midp->mid_available == MD_IM_DISK_NOT_AVAILABLE))
+ partial_replicated_flag = MD_SR_UNRSLV_REPLICATED;
/*
- * We pass down the list of the drives in the
- * set down to the kernel irrespective of
- * whether the drives have a replica or not.
- *
- * The kernel detects which of the drives don't
- * have a replica and accordingly does the
- * right thing.
+ * We pass the list of the drives in the
+ * set with replicas on them down to the kernel.
*/
dnp = midp->mid_dnp;
- if ((meta_replicaslice(dnp, &rep_slice, ep) != 0) ||
- ((np = metaslicename(dnp, rep_slice, ep))
- == NULL)) {
- mdclrerror(ep);
+ mirp = midp->mid_replicas;
+ if (!mirp) {
+ /*
+ * No replicas on this disk, go to next disk.
+ */
continue;
}
- (void) strcpy(c.c_locator.l_devname, np->bname);
- c.c_locator.l_dev = meta_cmpldev(np->dev);
- c.c_locator.l_mnum = meta_getminor(np->dev);
+ if (midp->mid_available == MD_IM_DISK_NOT_AVAILABLE) {
+ /*
+ * The disk isn't there. We'll need to get the
+ * disk information from the midp list instead
+ * of going and looking for it. This means it
+ * will be information relative to the old
+ * system.
+ */
+ minor_name = Strdup(midp->mid_minor_name);
+ (void) strncpy(c.c_locator.l_driver,
+ midp->mid_driver_name,
+ sizeof (c.c_locator.l_driver));
+ (void) strcpy(c.c_locator.l_devname, midp->mid_devname);
+ c.c_locator.l_mnum = midp->mid_mnum;
+
+ } else {
+ if ((meta_replicaslice(dnp, &rep_slice, ep) != 0) ||
+ ((np = metaslicename(dnp, rep_slice, ep))
+ == NULL)) {
+ mdclrerror(ep);
+ continue;
+ }
+ (void) strcpy(c.c_locator.l_devname, np->bname);
+ c.c_locator.l_dev = meta_cmpldev(np->dev);
+ c.c_locator.l_mnum = meta_getminor(np->dev);
+ minor_name = meta_getminor_name(np->bname, ep);
+ if ((cinfo = metagetcinfo(np, ep)) == NULL) {
+ mdclrerror(ep);
+ continue;
+ }
+
+ if (cinfo->dname) {
+ (void) strncpy(c.c_locator.l_driver,
+ cinfo->dname,
+ sizeof (c.c_locator.l_driver));
+ }
+ }
+
c.c_locator.l_devid = (uintptr_t)Malloc(midp->mid_devid_sz);
(void) memcpy((void *)(uintptr_t)c.c_locator.l_devid,
midp->mid_devid, midp->mid_devid_sz);
@@ -2546,31 +3597,14 @@ meta_imp_set(
midp->mid_o_devid, midp->mid_o_devid_sz);
c.c_locator.l_old_devid_sz = midp->mid_o_devid_sz;
}
- minor_name = meta_getminor_name(np->bname, ep);
- (void) strncpy(c.c_locator.l_minor_name, minor_name,
- sizeof (c.c_locator.l_minor_name));
-
- if ((cinfo = metagetcinfo(np, ep)) == NULL) {
- mdclrerror(ep);
- continue;
+ if (minor_name) {
+ (void) strncpy(c.c_locator.l_minor_name, minor_name,
+ sizeof (c.c_locator.l_minor_name));
}
- (void) strncpy(c.c_locator.l_driver, cinfo->dname,
- sizeof (c.c_locator.l_driver));
-
- mirp = midp->mid_replicas;
do {
- if (mirp) {
- c.c_locator.l_flags = 0;
- c.c_locator.l_blkno = mirp->mir_offset;
- mirp = mirp->mir_next;
- } else {
- /*
- * Default offset for dummy is 16
- */
- c.c_locator.l_blkno = 16;
- }
-
+ c.c_locator.l_flags = 0;
+ c.c_locator.l_blkno = mirp->mir_offset;
if (metaioctl(MD_DB_USEDEV, &c, &c.c_mde, NULL) != 0) {
Free((void *)(uintptr_t)c.c_locator.l_devid);
if (c.c_locator.l_old_devid)
@@ -2578,6 +3612,7 @@ meta_imp_set(
c.c_locator.l_old_devid);
return (mdstealerror(ep, &c.c_mde));
}
+ mirp = mirp->mir_next;
} while (mirp != NULL);
}
@@ -2595,39 +3630,143 @@ meta_imp_set(
}
/*
- * Now kernel should have all the information
+ * Now the kernel should have all the information
* regarding the import diskset replica.
- * Tell kernel to load them up and import the set
+ * Tell the kernel to load them up and import the set
*/
- if (metaioctl(MD_IOCIMP_LOAD, &c.c_setno, &c.c_mde, NULL) != 0) {
+ (void) memset(&c, 0, sizeof (c));
+ c.c_flags = stale_flag;
+ c.c_setno = setno;
+ if (metaioctl(MD_IOCIMP_LOAD, &c, &c.c_mde, NULL) != 0) {
Free((void *)(uintptr_t)c.c_locator.l_devid);
if (c.c_locator.l_old_devid)
Free((void *)(uintptr_t)c.c_locator.l_old_devid);
return (mdstealerror(ep, &c.c_mde));
}
- (void) meta_smf_enable(META_SMF_DISKSET, NULL);
+ /*
+ * Create a set name for the set.
+ */
+ sp = Zalloc(sizeof (*sp));
+ sp->setname = Strdup(setname);
+ sp->lockfd = MD_NO_LOCK;
+ sp->setno = setno;
+ sd = Zalloc(sizeof (*sd));
+ (void) strcpy(sd->sd_nodes[0], mynode());
+ sd->sd_ctime = tp;
+ sd->sd_genid = 0;
+
+
+ if (misp->mis_flags & MD_IM_SET_REPLICATED) {
+ /* Update the diskset namespace */
+ meta_imp_update_nm(sp, misp, ep);
+
+ /* Release the diskset - even if update_nm failed */
+ (void) memset(&c, 0, sizeof (c));
+ c.c_setno = setno;
+ /* Don't need device id information from this ioctl */
+ c.c_locator.l_devid = (uint64_t)0;
+ c.c_locator.l_devid_flags = 0;
+ if (metaioctl(MD_RELEASE_SET, &c, &c.c_mde, NULL) != 0) {
+ if (mdisok(ep))
+ (void) mdstealerror(ep, &c.c_mde);
+ Free(sd);
+ Free(sp);
+ return (-1);
+ }
+
+ /* If update_nm failed, then fail the import. */
+ if (!mdisok(ep)) {
+ Free(sd);
+ Free(sp);
+ return (-1);
+ }
+ }
+
+ /*
+ * We'll need to update information in the master block due
+ * to the set number changing and if the case of a replicated
+ * diskset, the device id changing. May also need to create a
+ * dummy master block if it's not there.
+ */
+ meta_imp_update_mb(sp, misp, ep);
+ if (!mdisok(ep)) {
+ Free(sd);
+ Free(sp);
+ return (-1);
+ }
+
+ /*
+ * Create set record for diskset, but record is left in
+ * MD_SR_ADD state until after drives are added to set.
+ */
+ if (clnt_lock_set(mynode(), sp, ep)) {
+ Free(sd);
+ Free(sp);
+ return (-1);
+ }
+
+ if (clnt_createset(mynode(), sp, sd->sd_nodes,
+ sd->sd_ctime, sd->sd_genid, ep)) {
+ cl_sk = cl_get_setkey(sp->setno, sp->setname);
+ (void) clnt_unlock_set(mynode(), cl_sk, &xep);
+ Free(sd);
+ Free(sp);
+ return (-1);
+ }
- /* The set has now been imported, create the appropriate symlink */
- (void) snprintf(setname_link, MAXPATHLEN, "/dev/md/%s", setname);
- (void) snprintf(setnum_link, MAXPATHLEN, "shared/%d", c.c_setno);
+ Free(sd);
/*
- * Since we already verified that the setname was OK, make sure to
- * cleanup before proceeding.
+ * Create drive records for the disks in the set.
*/
- if (unlink(setname_link) == -1) {
- if (errno != ENOENT)
- (void) mdsyserror(ep, errno, setname_link);
+ for (midp = misp->mis_drives; midp != NULL; midp = midp->mid_next) {
+ dnp = midp->mid_dnp;
+ if (midp->mid_available & MD_IM_DISK_NOT_AVAILABLE) {
+ /*
+ * If the disk isn't available, the dnp->devid is
+ * no good. It is either blank for the case where
+ * there is no disk with that devname, or it
+ * contains the devid for the real disk in the system
+ * with that name. The problem is, if the disk is
+ * unavailable, then the devid should be the devid
+ * of the missing disk. So we're faking a dnp for
+ * the import. This is needed for creating drive
+ * records.
+ */
+ dnp = Zalloc(sizeof (mddrivename_t));
+ dnp->side_names_key = midp->mid_dnp->side_names_key;
+ dnp->type = midp->mid_dnp->type;
+ dnp->cname = Strdup(midp->mid_dnp->cname);
+ dnp->rname = Strdup(midp->mid_dnp->rname);
+ dnp->devid = devid_str_encode(midp->mid_devid,
+ NULL);
+ midp->mid_dnp = dnp;
+ }
+ dnlpp = meta_drivenamelist_append_wrapper(dnlpp, dnp);
+ }
+
+ if (meta_imp_set_adddrives(sp, dnlp, misp, ep)) {
+ Free(sp);
+ return (mddserror(ep, MDE_DS_SETNOTIMP, MD_SET_BAD,
+ mynode(), NULL, c.c_setname));
}
- if (symlink(setnum_link, setname_link) == -1)
- (void) mdsyserror(ep, errno, setname_link);
+ /* If drives were added without error, set set_record to OK */
+ if (clnt_upd_sr_flags(mynode(), sp,
+ (partial_replicated_flag | MD_SR_OK | MD_SR_MB_DEVID), ep)) {
+ Free(sp);
+ return (mddserror(ep, MDE_DS_SETNOTIMP, MD_SET_BAD,
+ mynode(), NULL, c.c_setname));
+ }
+
+ Free(sp);
- /* resnarf the set that has just been imported */
- if (clnt_resnarf_set(mynode(), c.c_setno, ep) != 0)
- md_eprintf("%s\n", dgettext(TEXT_DOMAIN, "Please stop and "
- "restart rpc.metad"));
+ cl_sk = cl_get_setkey(sp->setno, sp->setname);
+ if (clnt_unlock_set(mynode(), cl_sk, ep)) {
+ return (-1);
+ }
+ cl_set_setkey(NULL);
Free((void *)(uintptr_t)c.c_locator.l_devid);
if (c.c_locator.l_old_devid)
diff --git a/usr/src/lib/lvm/libmeta/common/meta_metad.c b/usr/src/lib/lvm/libmeta/common/meta_metad.c
index adf281e542..8c1d246afa 100644
--- a/usr/src/lib/lvm/libmeta/common/meta_metad.c
+++ b/usr/src/lib/lvm/libmeta/common/meta_metad.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -510,6 +509,94 @@ clnt_add_drv_sidenms(
}
/*
+ * Adding drives via metaimport to disksets. Some of the drives may
+ * not be available so we need more information than the basic clnt_adddrvs
+ * offers us.
+ */
+int
+clnt_imp_adddrvs(
+ char *hostname,
+ mdsetname_t *sp,
+ md_drive_desc *dd,
+ md_timeval32_t timestamp,
+ ulong_t genid,
+ md_error_t *ep
+)
+{
+ CLIENT *clntp;
+ mdrpc_drives_2_args v2_args;
+ mdrpc_drives_2_args_r1 *v21_args;
+ mdrpc_generic_res res;
+ int rval;
+ int version;
+
+ /* initialize */
+ mdclrerror(ep);
+ (void) memset(&v2_args, 0, sizeof (v2_args));
+ (void) memset(&res, 0, sizeof (res));
+
+ /* build args */
+ v2_args.rev = MD_METAD_ARGS_REV_1;
+ v21_args = &v2_args.mdrpc_drives_2_args_u.rev1;
+ v21_args->sp = sp;
+ v21_args->cl_sk = cl_get_setkey(sp->setno, sp->setname);
+ v21_args->drivedescs = dd;
+ v21_args->timestamp = timestamp;
+ v21_args->genid = genid;
+
+ /* do it */
+ if (md_in_daemon && strcmp(mynode(), hostname) == 0) {
+ int bool;
+
+ /*
+ * If the server is local, we call the v1 procedure
+ */
+ bool = mdrpc_imp_adddrvs_2(&v2_args, &res, NULL);
+ assert(bool == TRUE);
+ (void) mdstealerror(ep, &res.status);
+ } else {
+ if ((clntp = metarpcopen(hostname, CL_LONG_TMO, ep)) == NULL)
+ return (-1);
+
+ /*
+ * Check the client handle for the version
+ * and invoke the appropriate version of the
+ * remote procedure
+ */
+ CLNT_CONTROL(clntp, CLGET_VERS, (char *)&version);
+
+ /*
+ * If the client is version 1, return error
+ * otherwise, make the remote procedure call.
+ */
+ if (version == METAD_VERSION) { /* version 1 */
+ (void) mddserror(ep, MDE_DS_RPCVERSMISMATCH,
+ sp->setno, hostname, NULL, NULL);
+ metarpcclose(clntp);
+ return (-1);
+ } else {
+ rval = mdrpc_imp_adddrvs_2(&v2_args, &res, clntp);
+ if (rval != RPC_SUCCESS)
+ (void) mdrpcerror(ep, clntp, hostname,
+ dgettext(TEXT_DOMAIN,
+ "metad imp add drives"));
+ else
+ (void) mdstealerror(ep, &res.status);
+ }
+
+ metarpcclose(clntp);
+ }
+
+ xdr_free(xdr_mdrpc_generic_res, (char *)&res);
+
+ if (! mdisok(ep))
+ return (-1);
+
+ return (0);
+}
+
+
+/*
* Add drives to disksets.
*/
int
diff --git a/usr/src/lib/lvm/libmeta/common/meta_mn_handlers.c b/usr/src/lib/lvm/libmeta/common/meta_mn_handlers.c
index 6da29e6f3c..cb29de889d 100644
--- a/usr/src/lib/lvm/libmeta/common/meta_mn_handlers.c
+++ b/usr/src/lib/lvm/libmeta/common/meta_mn_handlers.c
@@ -1471,7 +1471,7 @@ mdmn_do_meta_md_addside(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp)
*/
for (i = 0; i < nm.ref_count; i++) {
if (add_name(sp, d->msg_sideno, nm.key, dname, mnum,
- cname, &ep) == -1) {
+ cname, NULL, NULL, &ep) == -1) {
(void) mdstealerror(&(resp->mmr_ep), &ep);
Free(cname);
Free(dname);
diff --git a/usr/src/lib/lvm/libmeta/common/meta_name.c b/usr/src/lib/lvm/libmeta/common/meta_name.c
index b892f13493..dc628b5514 100644
--- a/usr/src/lib/lvm/libmeta/common/meta_name.c
+++ b/usr/src/lib/lvm/libmeta/common/meta_name.c
@@ -1070,7 +1070,7 @@ metafreedrivename(
/*
* flush the drive name cache
*/
-static void
+void
metaflushdrivenames()
{
mddrivenamelist_t *p, *n;
@@ -2621,7 +2621,289 @@ metaname_fast(
{
return (metaname_common(spp, uname, 1, uname_type, ep));
}
+/*
+ * Get the dnp using the device id.
+ *
+ * We have the potential to have more than 1 dnp with the same disk name but
+ * have different device ids. This would happen in the case of a partial
+ * diskset. The unavailable disk name is relative to the prior host and could
+ * possibly be the same as a disk on this system. The only way to tell which
+ * dnp belongs with this disk is by searching by device id. We have the
+ * potential to have the case where 1) the disk who's device id we pass in is
+ * in the system. In this case the name and the device id are both valid for
+ * the disk. 2) The disk whose device id we've been passed is not in the
+ * system and no disk with the same name has a dnp on the list. And 3) The
+ * disk whose device id we've been passed is not on the system but there is
+ * a disk with the same name (different devid) that is on the system. Here's
+ * what we return for each of those cases:
+ * 1) If disk is in system:
+ * disk is found on drivelistp or we create a new drivename and it's
+ * fully populated as expected.
+ * 2) If disk not in system, no collision
+ * Disk with the same devid is not found on drivelistp, we create a new
+ * drivename structure and the dnp->devid is filled in not from getparts
+ * but from the devidp passed in. No other disk in the system has the
+ * same "name" or devid.
+ * This situation would be caused by the import of a partial diskset.
+ * 3) If disk not in system, collision
+ * Disk with the same devid is not found on the drivelistp, we create a
+ * new drivename struct but getparts will use the information from the
+ * name which is actually in reference to another disk of the same name
+ * in the system. getparts will fill in the dnp->devid with the value
+ * from the other disk and we overwrite this with the value of this disk.
+ * To get into this situation one of the disks is actually unavailable
+ * as in the case of a partial import.
+ */
+mddrivename_t *
+meta_getdnp_bydevid(
+ mdsetname_t *sp,
+ side_t sideno,
+ ddi_devid_t devidp,
+ mdkey_t key,
+ md_error_t *ep
+)
+{
+ ddi_devid_t dnp_devidp;
+ char *nm;
+ mddrivenamelist_t **tail;
+ mddrivename_t *dnp;
+ uint_t slice;
+ mdname_t *np;
+ char *rname = NULL;
+ char *dname = NULL;
+ uint_t nparts, partno;
+ int ret;
+ md_set_desc *sd = NULL;
+ meta_device_type_t uname_type = LOGICAL_DEVICE;
+
+ /* look in the cache first */
+ for (tail = &drivelistp; (*tail != NULL); tail = &(*tail)->next) {
+ dnp = (*tail)->drivenamep;
+ if (dnp->type != MDT_COMP)
+ continue;
+ ret = devid_str_decode(dnp->devid, &dnp_devidp, NULL);
+ if (ret != 0) {
+ /* unable to decode the devid */
+ return (NULL);
+ }
+ /* compare with the devid passed in. */
+ if (devid_compare(devidp, dnp_devidp) == 0) {
+ /* match! We have the same disk */
+ devid_free(dnp_devidp);
+ return (dnp);
+ }
+ devid_free(dnp_devidp);
+ }
+
+ /* drive not in the cache */
+
+ if ((sd = metaget_setdesc(sp, ep)) == NULL) {
+ return (NULL);
+ }
+ /* get namespace info */
+ if (MD_MNSET_DESC(sd)) {
+ if ((nm = meta_getnmbykey(MD_LOCAL_SET, sideno,
+ key, ep)) == NULL)
+ return (NULL);
+ } else {
+ if ((nm = meta_getnmbykey(MD_LOCAL_SET,
+ sideno+SKEW, key, ep)) == NULL)
+ return (NULL);
+ }
+
+ /* get raw name (rname) of the slice and drive name (dname) */
+ if ((rname = getrawnames(&sp, nm, &dname, &uname_type, ep)) == NULL) {
+ return (NULL);
+ }
+
+ /* allocate new list element and drive */
+ *tail = Zalloc(sizeof (**tail));
+ dnp = (*tail)->drivenamep = Zalloc(sizeof (*dnp));
+ metainitdrivename(dnp);
+
+ /* get parts info */
+ /*
+ * Note that if the disk is unavailable this name will point to
+ * either a nonexistent disk and thus the part info and devid will
+ * be empty or the name will point to the wrong disk and this
+ * information will be invalid. Because of this, we overwrite the
+ * dnp->devid with the correct one after getparts returns.
+ */
+ if (getparts(dnp, rname, dname, uname_type, &nparts, &partno, ep) != 0)
+ goto out;
+
+ dnp->devid = devid_str_encode(devidp, NULL);
+
+ /*
+ * libmeta needs at least V_NUMPAR partitions.
+ * If we have an EFI partition with less than V_NUMPAR slices,
+ * we nevertheless reserve space for V_NUMPAR
+ */
+ if (nparts < V_NUMPAR) {
+ nparts = V_NUMPAR;
+ }
+
+ /* allocate and link in parts */
+ dnp->parts.parts_len = nparts;
+ dnp->parts.parts_val = Zalloc((sizeof (*dnp->parts.parts_val)) *
+ dnp->parts.parts_len);
+
+ for (slice = 0; (slice < nparts); ++slice) {
+ np = &dnp->parts.parts_val[slice];
+ metainitname(np);
+ np->drivenamep = dnp;
+ }
+
+ /* setup name_t (or slice) wanted */
+ if ((np = setup_slice(sp, uname_type, dnp, nm, rname,
+ dname, partno, ep)) == NULL)
+ goto out;
+
+ /* canonical disk name */
+ if ((dnp->cname = metadiskname(np->cname)) == NULL)
+ dnp->cname = Strdup(np->cname);
+ if ((dnp->rname = metadiskname(np->rname)) == NULL)
+ dnp->rname = Strdup(np->rname);
+
+ if (dname != NULL)
+ Free(dname);
+ Free(rname);
+ return (dnp);
+
+out:
+ if (dname != NULL)
+ Free(dname);
+ if (rname != NULL)
+ Free(rname);
+
+ metafreedrivename(dnp);
+ Free(dnp);
+ Free(*tail);
+ *tail = NULL;
+ return (NULL);
+}
+
+/*
+ * Search the drivename list by devid instead of name. If you don't find
+ * an entry with the same device id, create one for the uname passed in.
+ */
+mddrivename_t *
+metadrivenamebydevid(
+ mdsetname_t **spp,
+ char *devid,
+ char *uname,
+ md_error_t *ep
+)
+{
+ ddi_devid_t dnp_devidp, in_devidp;
+ mdname_t *np;
+ mddrivenamelist_t **tail;
+ char *rname = NULL;
+ mddrivename_t *dnp;
+ char *dname;
+ int ret;
+ uint_t nparts, partno;
+ uint_t slice;
+ meta_device_type_t uname_type = LOGICAL_DEVICE;
+
+ /* look in the cache first */
+ for (tail = &drivelistp; (*tail != NULL); tail = &(*tail)->next) {
+ dnp = (*tail)->drivenamep;
+ if (dnp->type != MDT_COMP)
+ continue;
+
+ /* decode the dnp devid */
+ ret = devid_str_decode(dnp->devid, &dnp_devidp, NULL);
+ if (ret != 0) {
+ /* unable to decode the devid */
+ return (NULL);
+ }
+ /* decode the passed in devid */
+ ret = devid_str_decode(devid, &in_devidp, NULL);
+ if (ret != 0) {
+ /* unable to decode the devid */
+ devid_free(dnp_devidp);
+ return (NULL);
+ }
+ /* compare with the devids */
+ if (devid_compare(in_devidp, dnp_devidp) == 0) {
+ /* match! We have the same disk */
+ devid_free(dnp_devidp);
+ devid_free(in_devidp);
+ return (dnp);
+ }
+ }
+ devid_free(dnp_devidp);
+ devid_free(in_devidp);
+
+ /* not in the cache */
+
+ /* get raw name (rname) of the slice and drive (dname) we have */
+ if ((rname = getrawnames(spp, uname, &dname, &uname_type,
+ ep)) == NULL) {
+ return (NULL);
+ }
+
+ /* allocate new list element and drive */
+ *tail = Zalloc(sizeof (**tail));
+ dnp = (*tail)->drivenamep = Zalloc(sizeof (*dnp));
+
+ metainitdrivename(dnp);
+
+ /* get parts info */
+ if (getparts(dnp, rname, dname, uname_type, &nparts, &partno, ep) != 0)
+ goto out;
+
+ /*
+ * libmeta needs at least V_NUMPAR partitions.
+ * If we have an EFI partition with less than V_NUMPAR slices,
+ * we nevertheless reserve space for V_NUMPAR
+ */
+ if (nparts < V_NUMPAR) {
+ nparts = V_NUMPAR;
+ }
+
+ /* allocate and link in parts */
+ dnp->parts.parts_len = nparts;
+ dnp->parts.parts_val = Zalloc((sizeof (*dnp->parts.parts_val)) *
+ dnp->parts.parts_len);
+ for (slice = 0; (slice < nparts); ++slice) {
+ np = &dnp->parts.parts_val[slice];
+ metainitname(np);
+ np->drivenamep = dnp;
+ }
+
+ /* setup name_t (or slice) wanted */
+ if ((np = setup_slice(*spp, uname_type, dnp, uname, rname,
+ dname, partno, ep)) == NULL)
+ goto out;
+
+ /* canonical disk name */
+ if ((dnp->cname = metadiskname(np->cname)) == NULL)
+ dnp->cname = Strdup(np->cname);
+ if ((dnp->rname = metadiskname(np->rname)) == NULL)
+ dnp->rname = Strdup(np->rname);
+
+ /* cleanup, return success */
+ if (dname != NULL)
+ Free(dname);
+ Free(rname);
+ return (dnp);
+
+ /* cleanup, return error */
+out:
+ if (dname != NULL)
+ Free(dname);
+ if (rname != NULL)
+ Free(rname);
+
+ metafreedrivename(dnp);
+ Free(dnp);
+ Free(*tail);
+ *tail = NULL;
+ return (NULL);
+}
/*
* set up names for a drive
*/
diff --git a/usr/src/lib/lvm/libmeta/common/meta_namespace.c b/usr/src/lib/lvm/libmeta/common/meta_namespace.c
index 53c1b3e35c..9d2d16bd3f 100644
--- a/usr/src/lib/lvm/libmeta/common/meta_namespace.c
+++ b/usr/src/lib/lvm/libmeta/common/meta_namespace.c
@@ -392,6 +392,10 @@ meta_getnmentbydev(
return (Strdup(device_name));
}
+/*
+ * The arguments, minorname and devid, are only used with the partial
+ * import code and should be NULL otherwise.
+ */
int
add_name(
mdsetname_t *sp,
@@ -400,6 +404,8 @@ add_name(
char *dname,
minor_t mnum,
char *bname,
+ char *minorname, /* only used with a partial import */
+ ddi_devid_t devid, /* only used with a partial import */
md_error_t *ep
)
{
@@ -413,7 +419,13 @@ add_name(
(void) strncpy(nm.drvnm, dname, sizeof (nm.drvnm));
nm.devname_len = strlen(bname) + 1;
nm.devname = (uintptr_t)bname;
-
+ if (devid && minorname) {
+ nm.minorname_len = strlen(minorname) + 1;
+ nm.minorname = (uintptr_t)minorname;
+ nm.devid_size = devid_sizeof(devid);
+ nm.devid = (uintptr_t)devid;
+ nm.imp_flag = MDDB_C_IMPORT;
+ }
if (metaioctl(MD_IOCSET_NM, &nm, &nm.mde, bname) < 0)
return (mdstealerror(ep, &nm.mde));
@@ -579,7 +591,8 @@ add_key_name(
}
if ((err = add_name(sp, thisside, key, devlist[thisside].dname,
- devlist[thisside].mnum, devlist[thisside].bname, ep)) == -1) {
+ devlist[thisside].mnum, devlist[thisside].bname, NULL,
+ NULL, ep)) == -1) {
empty_devicelist();
return (-1);
}
@@ -602,7 +615,8 @@ add_key_name(
if (devlist[sideno].dname != NULL) {
err = add_name(sp, sideno, key, devlist[sideno].dname,
- devlist[sideno].mnum, devlist[sideno].bname, ep);
+ devlist[sideno].mnum, devlist[sideno].bname,
+ NULL, NULL, ep);
if (err == -1) {
empty_devicelist();
return (-1);
@@ -758,7 +772,7 @@ add_self_name(
if (metaislocalset(sp)) {
if ((key = add_name(sp, myside, MD_KEYWILD, drvname,
- minor, devname, ep)) == MD_KEYBAD) {
+ minor, devname, NULL, NULL, ep)) == MD_KEYBAD) {
Free(devname);
return (-1);
}
@@ -767,7 +781,7 @@ add_self_name(
* Add myside first and use the returned key to add other sides
*/
if ((key = add_name(sp, myside, MD_KEYWILD, drvname,
- minor, devname, ep)) == MD_KEYBAD) {
+ minor, devname, NULL, NULL, ep)) == MD_KEYBAD) {
Free(devname);
return (-1);
}
@@ -786,7 +800,7 @@ add_self_name(
if (mnside->nd_nodeid == myside)
continue;
if (add_name(sp, mnside->nd_nodeid, key, drvname,
- minor, devname, ep) == -1) {
+ minor, devname, NULL, NULL, ep) == -1) {
Free(devname);
return (-1);
}
@@ -798,7 +812,7 @@ add_self_name(
if (side == myside)
continue;
if (add_name(sp, side, key, drvname, minor, devname,
- ep) == -1) {
+ NULL, NULL, ep) == -1) {
Free(devname);
return (-1);
}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_set.c b/usr/src/lib/lvm/libmeta/common/meta_set.c
index 94c380a10d..397f016f7b 100644
--- a/usr/src/lib/lvm/libmeta/common/meta_set.c
+++ b/usr/src/lib/lvm/libmeta/common/meta_set.c
@@ -926,6 +926,97 @@ meta_is_drive_in_thisset(
return (0);
}
+/*
+ * Check to see if devid is in use in any diskset.
+ * This is used in the case when a partial diskset is being imported
+ * to make sure that the unvailable drive isn't already in use in an
+ * already imported partial diskset. Can't check on the cname since the
+ * unavailable disk's cname is from the previous system and may collide
+ * with a cname on this system.
+ * Return values:
+ * 1: devid has been found in a diskset
+ * 0: devid not found in any diskset
+ */
+int
+meta_is_devid_in_anyset(
+ void *devid,
+ mdsetname_t **spp,
+ md_error_t *ep
+)
+{
+ set_t setno;
+ mdsetname_t *this_sp;
+ int is_it;
+ set_t max_sets;
+
+ if ((max_sets = get_max_sets(ep)) == 0)
+ return (-1);
+
+ assert(spp != NULL);
+ *spp = NULL;
+
+ for (setno = 1; setno < max_sets; setno++) {
+ if ((this_sp = metasetnosetname(setno, ep)) == NULL) {
+ if (mdismddberror(ep, MDE_DB_NODB)) {
+ mdclrerror(ep);
+ return (0);
+ }
+ if (mdiserror(ep, MDE_NO_SET)) {
+ mdclrerror(ep);
+ continue;
+ }
+ return (-1);
+ }
+
+ if ((is_it = meta_is_devid_in_thisset(this_sp,
+ devid, ep)) == -1) {
+ if (mdiserror(ep, MDE_NO_SET)) {
+ mdclrerror(ep);
+ continue;
+ }
+ return (-1);
+ }
+ if (is_it) {
+ *spp = this_sp;
+ return (0);
+ }
+ }
+ return (0);
+}
+
+int
+meta_is_devid_in_thisset(
+ mdsetname_t *sp,
+ void *devid,
+ md_error_t *ep
+)
+{
+ md_drive_desc *dd, *p;
+ ddi_devid_t dd_devid;
+
+ dd = metaget_drivedesc(sp, MD_BASICNAME_OK, ep);
+ if (dd == NULL) {
+ if (! mdisok(ep))
+ return (-1);
+ return (0);
+ }
+
+ for (p = dd; p != NULL; p = p->dd_next) {
+ if (p->dd_dnp->devid == NULL)
+ continue;
+ (void) devid_str_decode(p->dd_dnp->devid,
+ &dd_devid, NULL);
+ if (dd_devid == NULL)
+ continue;
+ if (devid_compare(devid, dd_devid) == 0) {
+ devid_free(dd_devid);
+ return (1);
+ }
+ devid_free(dd_devid);
+ }
+ return (0);
+}
+
int
meta_set_balance(
mdsetname_t *sp,
@@ -1769,55 +1860,6 @@ metadrivename_withdrkey(
return (NULL);
}
- /* get namespace info */
- if (MD_MNSET_DESC(sd)) {
- if ((nm = meta_getnmbykey(MD_LOCAL_SET, sideno,
- key, ep)) == NULL)
- return (NULL);
- } else {
- if ((nm = meta_getnmbykey(MD_LOCAL_SET, sideno+SKEW,
- key, ep)) == NULL)
- return (NULL);
- }
-
- /* get device name */
- if (flags & PRINT_FAST) {
- if ((np = metaname_fast(&sp, nm, LOGICAL_DEVICE, ep)) == NULL) {
- Free(nm);
- return (NULL);
- }
- } else {
- if ((np = metaname(&sp, nm, LOGICAL_DEVICE, ep)) == NULL) {
- Free(nm);
- return (NULL);
- }
- }
- Free(nm);
-
- /* make sure it's OK */
- if ((! (flags & MD_BASICNAME_OK)) && (metachkcomp(np, ep) != 0))
- return (NULL);
-
- /* get drivename */
- dnp = np->drivenamep;
- dnp->side_names_key = key;
-
- /*
- * Skip the following devid check if dnp is did device
- * The device id is disabled for did device due to the
- * lack of minor name support in the did driver. The following
- * devid code path can set and propagate the error and
- * eventually prevent did disks from being added to the
- * diskset under SunCluster systems
- */
- if (strncmp(dnp->rname, "/dev/did/", strlen("/dev/did/")) == 0) {
- goto out;
- }
-
- /* Also, Skip the check if MN diskset, no devid's */
- if (MD_MNSET_DESC(sd)) {
- goto out;
- }
/*
* Get the devid associated with the key.
@@ -1829,10 +1871,69 @@ metadrivename_withdrkey(
*/
if ((devidp = meta_getdidbykey(MD_LOCAL_SET, sideno+SKEW, key, ep))
!= NULL) {
- dnp->devid = devid_str_encode(devidp, NULL);
+ /*
+ * Look for the correct dnp using the devid for comparison.
+ */
+ dnp = meta_getdnp_bydevid(sp, sideno, devidp, key, ep);
free(devidp);
+ dnp->side_names_key = key;
} else {
/*
+ * We didn't get a devid. We'll try for a dnp using the
+ * name. If we have a MN diskset or if the dnp is a did
+ * device, we're done because then we don't have devids.
+ * Otherwise we'll try to set the devid
+ * and get the dnp via devid again.
+ * We also need to clear the ep structure. When the
+ * above call to meta_getdidbykey returned a null, it
+ * also put an error code into ep. In this case, the null
+ * return is actually OK and any errors can be ignored. The
+ * reason it is OK is because this could be a MN set or
+ * we could be running without devids (ex cluster).
+ */
+ mdclrerror(ep);
+
+ if ((nm = meta_getnmbykey(MD_LOCAL_SET, sideno, key,
+ ep)) == NULL)
+ return (NULL);
+ /* get device name */
+ if (flags & PRINT_FAST) {
+ if ((np = metaname_fast(&sp, nm,
+ LOGICAL_DEVICE, ep)) == NULL) {
+ Free(nm);
+ return (NULL);
+ }
+ } else {
+ if ((np = metaname(&sp, nm, LOGICAL_DEVICE,
+ ep)) == NULL) {
+ Free(nm);
+ return (NULL);
+ }
+ }
+ Free(nm);
+ /* make sure it's OK */
+ if ((! (flags & MD_BASICNAME_OK)) && (metachkcomp(np,
+ ep) != 0))
+ return (NULL);
+
+ /* get drivename */
+ dnp = np->drivenamep;
+ dnp->side_names_key = key;
+ /*
+ * Skip the devid set/check for the following cases:
+ * 1) If MN diskset, there are no devid's
+ * 2) if dnp is did device
+ * The device id is disabled for did device due to the
+ * lack of minor name support in the did driver. The following
+ * devid code path can set and propagate the error and
+ * eventually prevent did disks from being added to the
+ * diskset under SunCluster systems
+ */
+ if ((strncmp(dnp->rname, "/dev/did/", strlen("/dev/did/"))
+ == 0) || (MD_MNSET_DESC(sd)))
+ goto out;
+
+ /*
* It is okay if replica is not in devid mode
*/
if (mdissyserror(ep, MDDB_F_NODEVID)) {
@@ -1841,21 +1942,31 @@ metadrivename_withdrkey(
}
/*
+ * We're not MN or did devices but
* devid is missing so this means that we have
* just upgraded from a configuration where
* devid's were not used so try to add in
- * the devid and requery.
+ * the devid and requery. If the devid still isn't there,
+ * that's OK. dnp->devid will be null as it is in any
+ * configuration with no devids.
*/
if (meta_setdid(MD_LOCAL_SET, sideno + SKEW, key,
ep) < 0)
return (NULL);
if ((devidp = (ddi_devid_t)meta_getdidbykey(MD_LOCAL_SET,
- sideno+SKEW, key, ep)) == NULL)
- return (NULL);
- dnp->devid = devid_str_encode(devidp, NULL);
- devid_free(devidp);
+ sideno+SKEW, key, ep)) != NULL) {
+ /*
+ * Found a devid so look for the dnp using the
+ * devid as the search mechanism.
+ */
+ dnp = meta_getdnp_bydevid(sp, sideno, devidp, key, ep);
+ free(devidp);
+ dnp->side_names_key = key;
+ }
}
+
+
out:
if (flags & MD_BYPASS_DAEMON)
return (dnp);
diff --git a/usr/src/lib/lvm/libmeta/common/meta_set_drv.c b/usr/src/lib/lvm/libmeta/common/meta_set_drv.c
index 5fad53ad7b..7dc51aec97 100644
--- a/usr/src/lib/lvm/libmeta/common/meta_set_drv.c
+++ b/usr/src/lib/lvm/libmeta/common/meta_set_drv.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -128,39 +127,63 @@ metaget_drivedesc_fromdrivelist(
int
meta_make_sidenmlist(
- mdsetname_t *sp,
- mddrivename_t *dnp,
- md_error_t *ep
+ mdsetname_t *sp,
+ mddrivename_t *dnp,
+ int import_flag, /* flags partial import */
+ md_im_drive_info_t *midp, /* import drive information */
+ md_error_t *ep
)
{
- mdsidenames_t *sn, **sn_next;
- mdname_t *np;
- int done;
- side_t sideno = MD_SIDEWILD;
- uint_t rep_slice;
-
- if (meta_replicaslice(dnp, &rep_slice, ep) != 0)
- return (-1);
+ mdsidenames_t *sn, **sn_next;
+ mdname_t *np;
+ int done;
+ side_t sideno = MD_SIDEWILD;
+ uint_t rep_slice;
+ char *bname;
- dnp->side_names_key = MD_KEYWILD;
+ if (!import_flag) {
+ /*
+ * Normal (aka NOT partial import) code path.
+ */
+ if (meta_replicaslice(dnp, &rep_slice, ep) != 0) {
+ return (-1);
+ }
- if ((np = metaslicename(dnp, rep_slice, ep)) == NULL)
- return (-1);
+ dnp->side_names_key = MD_KEYWILD;
+ if ((np = metaslicename(dnp, rep_slice, ep)) == NULL)
+ return (-1);
+ bname = Strdup(np->bname);
+ } else {
+ /*
+ * When doing a partial import, we'll get the needed
+ * information from somewhere other than the system.
+ */
+ dnp->side_names_key = MD_KEYWILD;
+ bname = Strdup(midp->mid_devname);
+ }
metaflushsidenames(dnp);
sn_next = &dnp->side_names;
/*CONSTCOND*/
while (1) {
sn = Zalloc(sizeof (*sn));
- if ((done = meta_getnextside_devinfo(sp, np->bname,
- &sideno, &sn->cname, &sn->dname, &sn->mnum, ep)) == -1) {
- Free(sn);
- return (-1);
+ if ((done = meta_getnextside_devinfo(sp, bname, &sideno,
+ &sn->cname, &sn->dname, &sn->mnum, ep)) == -1) {
+ if (import_flag) {
+ mdclrerror(ep);
+ sn->dname = Strdup(midp->mid_driver_name);
+ sn->mnum = midp->mid_mnum;
+ } else {
+ Free(sn);
+ Free(bname);
+ return (-1);
+ }
}
if (done == 0) {
Free(sn);
+ Free(bname);
return (0);
}
@@ -312,18 +335,17 @@ meta_set_adddrives(
*/
for (p = dnlp; p != NULL; p = p->next) {
if (meta_repartition_drive(sp,
- p->drivenamep,
- force_label == TRUE ? MD_REPART_FORCE : 0,
+ p->drivenamep, force_label == TRUE ? MD_REPART_FORCE : 0,
NULL, /* Don't return the VTOC. */
ep) != 0) {
rval = -1;
goto out;
}
-
/*
* Create the names for the drives we are adding per side.
*/
- if (meta_make_sidenmlist(sp, p->drivenamep, ep) == -1) {
+ if (meta_make_sidenmlist(sp, p->drivenamep, 0, NULL,
+ ep) == -1) {
rval = -1;
goto out;
}
@@ -364,7 +386,6 @@ meta_set_adddrives(
(void) close(fd);
}
}
-
/*
* Get the set timeout information.
*/
@@ -938,6 +959,192 @@ rollback:
return (rval);
}
+/*
+ * Add drives routine used during import of a diskset.
+ */
+int
+meta_imp_set_adddrives(
+ mdsetname_t *sp,
+ mddrivenamelist_t *dnlp,
+ md_im_set_desc_t *misp,
+ md_error_t *ep
+)
+{
+ md_set_desc *sd;
+ mddrivenamelist_t *p;
+ md_drive_desc *dd = NULL, *ddp;
+ int flush_set_onerr = 0;
+ md_timeval32_t now;
+ ulong_t genid;
+ mhd_mhiargs_t mhiargs;
+ md_im_replica_info_t *mirp;
+ md_im_drive_info_t *midp;
+ int rval = 0;
+ sigset_t oldsigs;
+ ulong_t max_genid = 0;
+ int rb_level = 0;
+ md_error_t xep = mdnullerror;
+
+ if ((sd = metaget_setdesc(sp, ep)) == NULL)
+ return (-1);
+
+ for (p = dnlp; p != NULL; p = p->next) {
+ int imp_flag = 0;
+
+ /*
+ * If we have a partial diskset, meta_make_sidenmlist will
+ * need information from midp to complete making the
+ * side name structure.
+ */
+ if (misp->mis_partial) {
+ imp_flag = MDDB_C_IMPORT;
+ for (midp = misp->mis_drives; midp != NULL;
+ midp = midp->mid_next) {
+ if (midp->mid_dnp == p->drivenamep)
+ break;
+ }
+ if (midp == NULL) {
+ (void) mddserror(ep, MDE_DS_SETNOTIMP,
+ MD_SET_BAD, mynode(), NULL, sp->setname);
+ rval = -1;
+ goto out;
+ }
+ }
+ /*
+ * Create the names for the drives we are adding per side.
+ */
+ if (meta_make_sidenmlist(sp, p->drivenamep, imp_flag,
+ midp, ep) == -1) {
+ rval = -1;
+ goto out;
+ }
+ }
+
+ /*
+ * Get the list of drives descriptors that we are adding.
+ */
+ dd = metaget_drivedesc_fromdrivelist(sp, dnlp, MD_DR_ADD, ep);
+
+ if (! mdisok(ep)) {
+ rval = -1;
+ goto out;
+ }
+
+ /*
+ * Get the set timeout information.
+ */
+ (void) memset(&mhiargs, '\0', sizeof (mhiargs));
+ if (clnt_gtimeout(mynode(), sp, &mhiargs, ep) == -1) {
+ rval = -1;
+ goto out;
+ }
+
+ /*
+ * Get timestamp and generation id for new records
+ */
+ now = sd->sd_ctime;
+ genid = sd->sd_genid;
+
+ /* At this point, in case of error, set should be flushed. */
+ flush_set_onerr = 1;
+
+ rb_level = 1; /* level 1 */
+
+ for (midp = misp->mis_drives; midp != NULL; midp = midp->mid_next) {
+ for (ddp = dd; ddp != NULL; ddp = ddp->dd_next) {
+ if (ddp->dd_dnp == midp->mid_dnp) {
+ /* same disk */
+ ddp->dd_dnp->devid =
+ devid_str_encode(midp->mid_devid,
+ midp->mid_minor_name);
+
+ ddp->dd_dbcnt = 0;
+ mirp = midp->mid_replicas;
+ if (mirp) {
+ ddp->dd_dbsize = mirp->mir_length;
+ for (; mirp != NULL;
+ mirp = mirp->mir_next) {
+ ddp->dd_dbcnt++;
+ }
+ }
+ if ((midp->mid_available &
+ MD_IM_DISK_NOT_AVAILABLE) &&
+ (misp->mis_flags & MD_IM_SET_REPLICATED)) {
+ ddp->dd_flags = MD_DR_UNRSLV_REPLICATED;
+ }
+ }
+ }
+ }
+
+ /*
+ * Add the drive records for the drives that we are adding to
+ * each host in the set. Marks the drive records as MD_DR_ADD.
+ * May also mark a drive record as MD_DR_UNRSLV_REPLICATED if
+ * this flag was set in the dd_flags for that drive.
+ */
+ if (clnt_imp_adddrvs(mynode(), sp, dd, now, genid, ep) == -1)
+ goto rollback;
+
+ rb_level = 2; /* level 2 */
+
+ /*
+ * Take ownership of the added drives.
+ */
+ if (tk_own_bydd(sp, dd, &mhiargs, TRUE, ep))
+ goto rollback;
+
+out:
+ metafreedrivedesc(&dd);
+
+ if (flush_set_onerr) {
+ metaflushsetname(sp);
+ }
+
+ return (rval);
+
+rollback:
+ /* Make sure we are blocking all signals */
+ if (procsigs(TRUE, &oldsigs, &xep) < 0)
+ mdclrerror(&xep);
+
+ rval = -1;
+
+ max_genid = sd->sd_genid;
+
+ /* level 2 */
+ if (rb_level > 1) {
+ if (!MD_ATSET_DESC(sd)) {
+ if (rel_own_bydd(sp, dd, TRUE, &xep)) {
+ mdclrerror(&xep);
+ }
+ }
+ }
+
+ /* level 1 */
+ if (rb_level > 0) {
+ if (clnt_deldrvs(mynode(), sp, dd, &xep) == -1) {
+ mdclrerror(&xep);
+ }
+ max_genid += 2;
+ resync_genid(sp, sd, max_genid, 0, NULL);
+ }
+
+ /* level 0 */
+
+ /* release signals back to what they were on entry */
+ if (procsigs(FALSE, &oldsigs, &xep) < 0)
+ mdclrerror(&xep);
+
+ metafreedrivedesc(&dd);
+
+ if (flush_set_onerr) {
+ metaflushsetname(sp);
+ md_rb_sig_handling_off(md_got_sig(), md_which_sig());
+ }
+
+ return (rval);
+}
+
int
meta_set_deletedrives(
mdsetname_t *sp,
diff --git a/usr/src/lib/lvm/libmeta/common/meta_set_hst.c b/usr/src/lib/lvm/libmeta/common/meta_set_hst.c
index e665406cff..9bf87f8cd2 100644
--- a/usr/src/lib/lvm/libmeta/common/meta_set_hst.c
+++ b/usr/src/lib/lvm/libmeta/common/meta_set_hst.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -201,8 +200,8 @@ add_md_sidenms(mdsetname_t *sp, side_t sideno, side_t otherside, md_error_t *ep)
* increment the count to sync up with the other sides.
*/
for (i = 0; i < nm.ref_count; i++) {
- if (add_name(sp, sideno, nm.key, dname, mnum, cname,
- ep) == -1)
+ if (add_name(sp, sideno, nm.key, dname, mnum,
+ cname, NULL, NULL, ep) == -1)
rval = -1;
}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_set_prv.c b/usr/src/lib/lvm/libmeta/common/meta_set_prv.c
index f3a8f39e17..76454d4db7 100644
--- a/usr/src/lib/lvm/libmeta/common/meta_set_prv.c
+++ b/usr/src/lib/lvm/libmeta/common/meta_set_prv.c
@@ -35,7 +35,6 @@
#include <sys/cladm.h>
#include <devid.h>
#include <sys/lvm/md_convert.h>
-#include <sdssc.h>
/*
* Exported Entry Points
@@ -642,15 +641,14 @@ setup_db_bydd(mdsetname_t *sp, md_drive_desc *dd, int force, md_error_t *ep)
int i;
md_set_desc *sd;
int use_devid = 1;
- ddi_devid_t devidp;
+ ddi_devid_t devidp, new_devidp;
char *minor_name = NULL;
size_t sz;
char *devid_str = NULL;
- sdssc_version_t version;
+ int need_to_free_devidp = 0;
if ((sd = metaget_setdesc(sp, ep)) == NULL)
return (-1);
-
(void) memset(&c, 0, sizeof (c));
c.c_setno = sp->setno;
@@ -732,14 +730,7 @@ setup_db_bydd(mdsetname_t *sp, md_drive_desc *dd, int force, md_error_t *ep)
}
}
- /*
- * If the device does not have a devid or is a multinode
- * diskset or we are in a SunCluster 3.x enviroment then
- * do not use devids.
- */
- if ((dnp->devid == NULL) || MD_MNSET_DESC(sd) ||
- ((sdssc_version(&version) == SDSSC_OKAY) &&
- (version.major >= 3))) {
+ if ((dnp->devid == NULL) || MD_MNSET_DESC(sd)) {
use_devid = 0;
}
@@ -754,18 +745,50 @@ setup_db_bydd(mdsetname_t *sp, md_drive_desc *dd, int force, md_error_t *ep)
(void) snprintf(devid_str, len, "%s/%s", dnp->devid,
minor_name);
(void) devid_str_decode(devid_str, &devidp, NULL);
+ need_to_free_devidp = 1;
+
+ /* If need to fix LB then setup old_devid info */
+ if (p->dd_flags & MD_DR_FIX_LB_NM_DID) {
+ sz = devid_sizeof(devidp);
+ c.c_locator.l_old_devid_sz = sz;
+ c.c_locator.l_old_devid = (uintptr_t)malloc(sz);
+ (void) memcpy((void *)(uintptr_t)
+ c.c_locator.l_old_devid,
+ devidp, sz);
+
+ new_devidp = replicated_list_lookup(
+ devid_sizeof((ddi_devid_t)devidp),
+ (void *)(uintptr_t)devidp);
+ devid_free(devidp);
+ need_to_free_devidp = 0;
+ devidp = new_devidp;
+ }
sz = devid_sizeof(devidp);
c.c_locator.l_devid = (uintptr_t)malloc(sz);
c.c_locator.l_devid_sz = sz;
- (void) memcpy((void *)(uintptr_t)c.c_locator.l_devid,
+ (void) memcpy((void *)(uintptr_t)
+ c.c_locator.l_devid,
devidp, sz);
+ if (need_to_free_devidp) {
+ devid_free(devidp);
+ need_to_free_devidp = 0;
+ }
if (minor_name == NULL) {
/* ERROR fix up */
Free(devid_str);
+ Free((void *)(uintptr_t)c.c_locator.l_devid);
+ if (c.c_locator.l_old_devid_sz) {
+ Free((void *)
+ (uintptr_t)c.c_locator.l_old_devid);
+ c.c_locator.l_old_devid_sz = 0;
+ c.c_locator.l_old_devid =
+ (uintptr_t)NULL;
+ }
return (-1);
}
- (void) strcpy(c.c_locator.l_minor_name, minor_name);
+ (void) strcpy(c.c_locator.l_minor_name,
+ minor_name);
c.c_locator.l_devid_flags = MDDB_DEVID_VALID |
MDDB_DEVID_SPACE | MDDB_DEVID_SZ;
} else {
@@ -785,6 +808,15 @@ setup_db_bydd(mdsetname_t *sp, md_drive_desc *dd, int force, md_error_t *ep)
if (metaioctl(MD_DB_USEDEV, &c, &c.c_mde, NULL) != 0) {
if (use_devid) {
Free(devid_str);
+ Free((void *)
+ (uintptr_t)c.c_locator.l_devid);
+ if (c.c_locator.l_old_devid_sz) {
+ Free((void *)(uintptr_t)
+ c.c_locator.l_old_devid);
+ c.c_locator.l_old_devid_sz = 0;
+ c.c_locator.l_old_devid =
+ (uintptr_t)NULL;
+ }
}
Free(minor_name);
return (mdstealerror(ep, &c.c_mde));
@@ -792,6 +824,13 @@ setup_db_bydd(mdsetname_t *sp, md_drive_desc *dd, int force, md_error_t *ep)
}
if (use_devid) {
Free(devid_str);
+ Free((void *)(uintptr_t)c.c_locator.l_devid);
+ if (c.c_locator.l_old_devid_sz) {
+ Free((void *)
+ (uintptr_t)c.c_locator.l_old_devid);
+ c.c_locator.l_old_devid_sz = 0;
+ c.c_locator.l_old_devid = (uintptr_t)NULL;
+ }
}
Free(minor_name);
}
diff --git a/usr/src/lib/lvm/libmeta/common/meta_set_tkr.c b/usr/src/lib/lvm/libmeta/common/meta_set_tkr.c
index c46ba0220d..98e0329ab7 100644
--- a/usr/src/lib/lvm/libmeta/common/meta_set_tkr.c
+++ b/usr/src/lib/lvm/libmeta/common/meta_set_tkr.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -33,6 +32,7 @@
#include "meta_set_prv.h"
#include <sys/lvm/md_crc.h>
+extern char *blkname(char *);
static int
upd_dr_dbinfo(
@@ -480,6 +480,8 @@ cleanup:
return (-1);
}
+extern int *replicated_disk_list_built;
+extern int replicated_disk_list_built_pass1;
/*
* Exported Entry Points
*/
@@ -497,6 +499,7 @@ meta_set_take(
md_drive_desc *d = NULL;
char *owner = NULL;
int rval = 0;
+ int pathname_return = 0;
int i;
int has_set;
int matches = 0;
@@ -511,6 +514,9 @@ meta_set_take(
int ret = 0;
char *newname = NULL;
mdkey_t side_names_key;
+ int unrslv_replicated = 0;
+ mddrivenamelist_t *dnlp = NULL;
+ int retake_flag = 0;
if ((flags & TAKE_USETAG) || (flags & TAKE_USEIT)) {
if (flags & TAKE_USETAG) {
@@ -598,6 +604,180 @@ meta_set_take(
side += SKEW;
/*
+ * If this set had been previously imported as a partial replicated
+ * diskset, then must attempt to updated any unresolved drive
+ * records in diskset with new devid information. Must set
+ * flags in drivedesc list before loading up set so that the
+ * md driver will fix up names and devids correctly in the
+ * locator block.
+ */
+ if (sd->sd_flags & MD_SR_UNRSLV_REPLICATED) {
+ md_im_names_t cnames = { 0, NULL};
+ ddi_devid_t old_devid, new_devid;
+ char *search_path = "/dev";
+ devid_nmlist_t *nmlist;
+ int indx;
+ mddrivenamelist_t **dnlpp = &dnlp;
+
+ if (meta_list_disks(ep, &cnames) != 0) {
+ rval = -1;
+ goto out;
+ }
+
+ for (indx = 0; indx < cnames.min_count; ++indx) {
+ mddrivename_t *dnp;
+ mdsetname_t *sp = metasetname(MD_LOCAL_NAME, ep);
+ int fd = -1;
+ ddi_devid_t devid1;
+ char *cdevidp;
+ int len;
+ char *fp;
+
+ /*
+ * We may have name collision here so we need to get
+ * the dnp using the devid and not the name.
+ */
+ len = strlen(cnames.min_names[indx]) + strlen("s0");
+ if ((fp = (char *)Malloc(len+1)) == NULL) {
+ (void) mdsyserror(ep, ENOMEM, NULL);
+ rval = -1;
+ goto out;
+ }
+ (void) snprintf(fp, len + 1, "%ss0",
+ cnames.min_names[indx]);
+ if ((fd = open(fp, O_RDONLY|O_NDELAY)) < 0) {
+ (void) mdsyserror(ep, EIO, fp);
+ rval = -1;
+ goto out;
+ }
+ Free(fp);
+ /* if no device id, what error?) */
+ if (devid_get(fd, &devid1) != 0) {
+ (void) mdsyserror(ep, EIO, fp);
+ rval = -1;
+ goto out;
+ }
+ if (close(fd) < 0) {
+ (void) mdsyserror(ep, EIO, fp);
+ rval = -1;
+ goto out;
+ }
+ cdevidp = devid_str_encode(devid1, NULL);
+ if (cdevidp == NULL) {
+ (void) mdsyserror(ep, EIO, fp);
+ rval = -1;
+ goto out;
+ }
+ devid_free(devid1);
+ dnp = metadrivenamebydevid(&sp, cdevidp,
+ cnames.min_names[indx], ep);
+ devid_str_free(cdevidp);
+ if (dnp == NULL) {
+ /*
+ * Assuming we're interested in knowing about
+ * whatever error occurred, but not in stopping.
+ */
+ mde_perror(ep, cnames.min_names[indx]);
+ mdclrerror(ep);
+ continue;
+ }
+
+ dnlpp = meta_drivenamelist_append_wrapper(dnlpp, dnp);
+ }
+ /* Reget sd and dd since freed by meta_prune_cnames. */
+ if ((sd = metaget_setdesc(sp, ep)) == NULL) {
+ rval = -1;
+ goto out;
+ }
+
+ if (sd->sd_flags & MD_SR_MB_DEVID)
+ dd = metaget_drivedesc(sp,
+ MD_BASICNAME_OK | PRINT_FAST, ep);
+ else
+ dd = metaget_drivedesc(sp,
+ MD_BASICNAME_OK, ep);
+ /* If ep has error, then there was a failure, set rval */
+ if (!mdisok(ep)) {
+ rval = -1;
+ goto out;
+ }
+
+ /* Builds global replicated disk list */
+ replicated_disk_list_built = &replicated_disk_list_built_pass1;
+
+ /* If success, then clear error structure */
+ if (build_replicated_disks_list(ep, dnlp) == 1)
+ mdclrerror(ep);
+ /* If ep has error, then there was a failure, set rval */
+ if (! mdisok(ep)) {
+ rval = -1;
+ goto out;
+ }
+
+ for (d = dd; d != NULL; d = d->dd_next) {
+ if (d->dd_flags & MD_DR_UNRSLV_REPLICATED) {
+ /* Get old devid from drive record */
+ (void) devid_str_decode(d->dd_dnp->devid,
+ &old_devid, NULL);
+
+ /*
+ * If the devid stored in the drive record
+ * (old_devid) matches a devid known by
+ * the system, then this disk has already
+ * been partially resolved. This situation
+ * could occur if a panic happened during a
+ * previous take of this diskset.
+ * Set flag to later handle fixing the master
+ * block on disk and turning off the unresolved
+ * replicated flag.
+ */
+ if (meta_deviceid_to_nmlist(search_path,
+ (ddi_devid_t)old_devid,
+ DEVID_MINOR_NAME_ALL,
+ &nmlist) == 0) {
+ d->dd_flags |= MD_DR_FIX_MB_DID;
+ retake_flag = 1;
+ continue;
+ }
+
+ /*
+ * If the devid stored in the drive record
+ * is on the list of replicated disks found
+ * during a system scan then set both flags
+ * so that the locator block, namespaces
+ * (diskset and local set), master block
+ * and unresolved replicated flag are updated.
+ */
+ new_devid = replicated_list_lookup(
+ devid_sizeof((ddi_devid_t)old_devid),
+ old_devid);
+ devid_free(old_devid);
+
+ /*
+ * If devid stored in the drive record is
+ * not found then set flag to mark
+ * that set is still unresolved and
+ * continue to next drive record.
+ */
+ if (new_devid == NULL) {
+ unrslv_replicated = 1;
+ continue;
+ }
+
+ /*
+ * Set flags to fix up the master block,
+ * locator block of the diskset, diskset
+ * namespace and the local set namespace.
+ */
+ d->dd_flags |= (MD_DR_FIX_MB_DID |
+ MD_DR_FIX_LB_NM_DID);
+ retake_flag = 1;
+ }
+ }
+
+ }
+
+ /*
* Check the local devid namespace to see if the disks
* have been moved. Use the local set first of all as this contains
* entries for the disks in the set.
@@ -627,6 +807,7 @@ meta_set_take(
* we are interested in.
*/
if (newname != NULL) {
+ char *save_devid;
/*
* Need to save the side names key as this
* points to the namespace entry that will
@@ -635,16 +816,28 @@ meta_set_take(
* set the namespace key.
*/
side_names_key = d->dd_dnp->side_names_key;
+
+ /*
+ * There is the possibility that there
+ * will be multiple disks with the same
+ * name but different devids in the
+ * drivelist. Because of this, we need
+ * to look for a new dnp based on devid
+ * and not name.
+ */
+ save_devid = Strdup(d->dd_dnp->devid);
metafreedrivename(d->dd_dnp);
- d->dd_dnp = metadrivename(&sp,
- metadiskname(newname), ep);
+ d->dd_dnp = metadrivenamebydevid(&sp,
+ save_devid, newname, ep);
+ Free(save_devid);
Free(newname);
/*
* null newname so we are reset for next time
* through
*/
newname = NULL;
- ret = meta_make_sidenmlist(sp, d->dd_dnp, ep);
+ ret = meta_make_sidenmlist(sp,
+ d->dd_dnp, 0, NULL, ep);
d->dd_dnp->side_names_key = side_names_key;
if (ret == -1) {
rval = -1;
@@ -663,7 +856,8 @@ meta_set_take(
RB_TEST(2, "take", ep)
if (!MD_ATSET_DESC(sd)) {
- if (tk_own_bydd(sp, dd, mhiargsp, FALSE, ep))
+ if (tk_own_bydd(sp, dd, mhiargsp,
+ flags & MD_IM_PARTIAL_DISKSET, ep))
goto rollback;
}
@@ -743,13 +937,38 @@ meta_set_take(
(void) mddserror(ep, MDE_DS_SETCLEANUP, sp->setno,
sp->setname, NULL, mynode());
rval = -1;
- goto out;
}
goto rollback;
}
- rval = pathname_reload(&sp, sp->setno, ep);
- if ((rval == METADEVADM_ERR) || (rval == METADEVADM_DSKNAME_ERR)) {
+ /*
+ * If an unresolved replicated diskset, fix up diskset
+ * and local namespaces, master block and drive record
+ * with the new devid. If all drives in diskset are
+ * now resolved, then clear set unresolved replicated flag.
+ * If an error is encountered, don't fail the take, but
+ * don't proceed any further in resolving the replicated disks.
+ */
+ if (sd->sd_flags & MD_SR_UNRSLV_REPLICATED) {
+ /* Fix up diskset and local namespaces with new devids */
+ meta_unrslv_replicated_nm(sp, dd, dnlp, ep);
+ if (mdisok(ep)) {
+ /* Fix up master block with new devids */
+ meta_unrslv_replicated_mb(sp, dd, dnlp, ep);
+ }
+
+ /* If all drives are resolved, set OK flag in set record. */
+ if (mdisok(ep) && (unrslv_replicated == 0)) {
+ /* Ignore failure since no bad effect. */
+ (void) clnt_upd_sr_flags(mynode(), sp, MD_SR_OK, ep);
+ }
+ mdclrerror(ep);
+
+ }
+
+ pathname_return = pathname_reload(&sp, sp->setno, ep);
+ if ((pathname_return == METADEVADM_ERR) ||
+ (pathname_return == METADEVADM_DSKNAME_ERR)) {
goto rollback;
}
@@ -847,6 +1066,23 @@ meta_set_take(
RB_TEST(7, "take", ep)
+ /*
+ * In order to resolve the namespace major driver names and
+ * to have the subdrivers attempt to re-associate devts from
+ * the newly resolved replicated device ids, return a '2'.
+ * This instructs metaset to release the diskset and re-take.
+ *
+ * Return a 2 if
+ * - no error was detected on the take
+ * - a replicated unresolved devid was resolved during take
+ * - take isn't being called during an import
+ * - this isn't already a re-take situation
+ */
+ if ((rval == 0) && (retake_flag == 1) &&
+ ((flags & (TAKE_RETAKE | TAKE_IMP)) == 0)) {
+ rval = 2;
+ }
+
return (rval);
out:
diff --git a/usr/src/lib/lvm/libmeta/common/metad_svc_stubs.c b/usr/src/lib/lvm/libmeta/common/metad_svc_stubs.c
index 32be258ab3..8631a82f3d 100644
--- a/usr/src/lib/lvm/libmeta/common/metad_svc_stubs.c
+++ b/usr/src/lib/lvm/libmeta/common/metad_svc_stubs.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -65,6 +64,7 @@
#pragma weak mdrpc_setnameok_2_svc = _mdrpc_setnameok_2_svc
#pragma weak mdrpc_ownset_2_svc = _mdrpc_ownset_2_svc
#pragma weak mdrpc_adddrvs_2_svc = _mdrpc_adddrvs_2_svc
+#pragma weak mdrpc_imp_set_drvs_2_svc = _mdrpc_imp_set_drvs_2_svc
#pragma weak mdrpc_deldrvs_2_svc = _mdrpc_deldrvs_2_svc
#pragma weak mdrpc_upd_dr_dbinfo_2_svc = _mdrpc_upd_dr_dbinfo_2_svc
#pragma weak mdrpc_devinfo_2_svc = _mdrpc_devinfo_2_svc
@@ -97,6 +97,7 @@
#pragma weak mdrpc_resnarf_set_2_svc = _mdrpc_resnarf_set_2_svc
#pragma weak mdrpc_mn_mirror_resync_all_2_svc = \
_mdrpc_mn_mirror_resync_all_2_svc
+#pragma weak mdrpc_imp_adddrvs_2_svc = _mdrpc_imp_adddrvs_2_svc
/*ARGSUSED*/
bool_t
@@ -486,6 +487,17 @@ _mdrpc_adddrvs_2_svc(
/*ARGSUSED*/
bool_t
+_mdrpc_imp_set_drvs_2_svc(
+ mdrpc_drives_2_args *a,
+ mdrpc_generic_res *b,
+ struct svc_req *c)
+{
+ assert(0);
+ return (TRUE);
+}
+
+/*ARGSUSED*/
+bool_t
_mdrpc_deldrvs_2_svc(
mdrpc_drives_2_args *a,
mdrpc_generic_res *b,
@@ -823,3 +835,14 @@ _mdrpc_mn_mirror_resync_all_2_svc(
assert(0);
return (TRUE);
}
+
+/*ARGSUSED*/
+bool_t
+_mdrpc_imp_adddrvs_2_svc(
+ mdrpc_drives_2_args *a,
+ mdrpc_generic_res *b,
+ struct svc_req *c)
+{
+ assert(0);
+ return (TRUE);
+}
diff --git a/usr/src/lib/lvm/libmeta/spec/meta.spec b/usr/src/lib/lvm/libmeta/spec/meta.spec
index b33f13b015..9a077d12df 100644
--- a/usr/src/lib/lvm/libmeta/spec/meta.spec
+++ b/usr/src/lib/lvm/libmeta/spec/meta.spec
@@ -937,6 +937,10 @@ function meta_free_drive_info_list
version SUNWprivate_1.1
end
+function meta_free_im_set_desc
+version SUNWprivate_1.1
+end
+
function meta_get_drive_names
version SUNWprivate_1.1
end
@@ -981,6 +985,10 @@ function meta_rel_own
version SUNWprivate_1.1
end
+function meta_replica_quorum
+version SUNWprivate_1.1
+end
+
function meta_status_own
version SUNWprivate_1.1
end
@@ -1257,6 +1265,10 @@ function metaflushsidenames
version SUNWprivate_1.1
end
+function metaflushdrivenames
+version SUNWprivate_1.1
+end
+
function metafreedrivename
version SUNWprivate_1.1
end
@@ -1769,6 +1781,14 @@ function meta_is_drive_in_thisset
version SUNWprivate_1.1
end
+function meta_is_devid_in_anyset
+version SUNWprivate_1.1
+end
+
+function meta_is_devid_in_thisset
+version SUNWprivate_1.1
+end
+
function meta_set_balance
version SUNWprivate_1.1
end
@@ -1817,10 +1837,6 @@ function strinlst
version SUNWprivate_1.1
end
-function meta_make_sidenmlist
-version SUNWprivate_1.1
-end
-
function meta_set_adddrives
version SUNWprivate_1.1
end
@@ -3685,6 +3701,14 @@ function xdr_mdrpc_nodeid_2_args
version SUNWprivate_1.1
end
+function clnt_imp_adddrvs
+version SUNWprivate_1.1
+end
+
+function mdrpc_imp_adddrvs_2
+version SUNWprivate_1.1
+end
+
function meta_is_member
version SUNWprivate_1.1
end
@@ -3737,6 +3761,10 @@ function read_master_block
version SUNWprivate_1.1
end
+function pick_good_disk
+version SUNWprivate_1.1
+end
+
function add_self_name
version SUNWprivate_1.1
end
diff --git a/usr/src/uts/common/io/lvm/md/md_ioctl.c b/usr/src/uts/common/io/lvm/md/md_ioctl.c
index c3102d7e6c..cfa6246d9a 100644
--- a/usr/src/uts/common/io/lvm/md/md_ioctl.c
+++ b/usr/src/uts/common/io/lvm/md/md_ioctl.c
@@ -143,9 +143,11 @@ get_lb_inittime_ioctl(
static int
setnm_ioctl(mdnm_params_t *nm, int mode)
{
- char *name;
+ char *name, *minorname = NULL;
side_t side;
int err = 0;
+ void *devid = NULL;
+ int devid_sz;
/*
* Don't allow addition of new names to namespace during upgrade.
@@ -178,6 +180,36 @@ setnm_ioctl(mdnm_params_t *nm, int mode)
goto out;
}
+ if (nm->imp_flag) {
+ if ((nm->devid == NULL) || (nm->minorname == NULL)) {
+ err = EINVAL;
+ goto out;
+ }
+ if (nm->devid) {
+ devid_sz = nm->devid_size;
+ devid = kmem_zalloc(devid_sz, KM_SLEEP);
+ err = ddi_copyin((caddr_t)(uintptr_t)nm->devid,
+ devid, devid_sz, mode);
+ if (err) {
+ err = EFAULT;
+ goto out;
+ }
+ }
+ if (nm->minorname) {
+ if (nm->minorname_len > MAXPATHLEN) {
+ err = EINVAL;
+ goto out;
+ }
+ minorname = kmem_zalloc(nm->minorname_len, KM_SLEEP);
+ err = ddi_copyin((caddr_t)(uintptr_t)nm->minorname,
+ minorname, (size_t)nm->minorname_len, mode);
+ if (err) {
+ err = EFAULT;
+ goto out;
+ }
+ }
+ }
+
if (nm->side == -1)
side = mddb_getsidenum(nm->setno);
else
@@ -190,7 +222,8 @@ setnm_ioctl(mdnm_params_t *nm, int mode)
}
nm->key = md_setdevname(nm->setno, side, nm->key, nm->drvnm,
- nm->mnum, name, 0, &nm->mde);
+ nm->mnum, name, nm->imp_flag, (ddi_devid_t)devid, minorname,
+ 0, &nm->mde);
/*
* If we got an error from md_setdevname & md_setdevname did not
* set the error code, we'll default to MDE_DB_NOSPACE.
@@ -202,6 +235,11 @@ setnm_ioctl(mdnm_params_t *nm, int mode)
out:
kmem_free(name, MAXPATHLEN);
+ if (devid) {
+ kmem_free(devid, devid_sz);
+ }
+ if (minorname)
+ kmem_free(minorname, nm->minorname_len);
return (err);
}
@@ -227,6 +265,7 @@ getnm_ioctl(
if ((md_get_setstatus(nm->setno) & MD_SET_SNARFED) == 0)
return (ENODEV);
+
name = kmem_alloc(MAXPATHLEN, KM_SLEEP);
if (nm->side == -1)
@@ -3295,15 +3334,16 @@ md_base_ioctl(md_dev64_t dev, int cmd, caddr_t data, int mode, IOLOCK *lockp)
if (! (mode & FWRITE))
return (EACCES);
- sz = sizeof (set_t);
- d = kmem_alloc(sz, KM_SLEEP);
+ mddb_config_case = 1;
- if (ddi_copyin(data, d, sz, mode) != 0) {
- err = EFAULT;
- break;
+ err = mddb_config_from_user(&d, data, mode, &c_devid_addr,
+ &c_old_devid_addr);
+
+ if (err) {
+ return (err);
}
- err = md_imp_snarf_set((set_t *)d, mode);
+ err = md_imp_snarf_set((mddb_config_t *)d);
break;
}
@@ -3324,6 +3364,22 @@ md_base_ioctl(md_dev64_t dev, int cmd, caddr_t data, int mode, IOLOCK *lockp)
err = get_lb_inittime_ioctl((mddb_config_t *)d);
break;
}
+ case MD_IOCUPDATE_NM_RR_DID:
+ {
+ if (! (mode & FWRITE))
+ return (EACCES);
+
+ mddb_config_case = 1;
+
+ err = mddb_config_from_user(&d, data, mode, &c_devid_addr,
+ &c_old_devid_addr);
+
+ if (err)
+ return (err);
+
+ err = md_update_nm_rr_did_ioctl((mddb_config_t *)d);
+ break;
+ }
default:
return (ENOTTY); /* used by next level up */
}
diff --git a/usr/src/uts/common/io/lvm/md/md_mddb.c b/usr/src/uts/common/io/lvm/md/md_mddb.c
index e98960da9d..ad2b567b33 100644
--- a/usr/src/uts/common/io/lvm/md/md_mddb.c
+++ b/usr/src/uts/common/io/lvm/md/md_mddb.c
@@ -113,7 +113,7 @@ extern md_ops_t *md_opslist;
extern md_krwlock_t nm_lock;
static int update_locatorblock(mddb_set_t *s, md_dev64_t dev,
- ddi_devid_t didptr);
+ ddi_devid_t didptr, ddi_devid_t old_didptr);
/*
* Defines for crc calculation for records
@@ -1027,8 +1027,16 @@ mddb_devid_add(
((char *)devid_ptr)[i] = ((char *)devid)[i];
/* Update mddb_did_info area for new device id */
- did_info->info_flags = MDDB_DID_EXISTS | MDDB_DID_VALID |
- MDDB_DID_UPDATED;
+ did_info->info_flags = MDDB_DID_EXISTS | MDDB_DID_VALID;
+
+ /*
+ * Only set UPDATED flag for non-replicated import cases.
+ * This allows the side locator driver name index to get
+ * updated in load_old_replicas.
+ */
+ if (!(md_get_setstatus(s->s_setno) & MD_SET_REPLICATED_IMPORT))
+ did_info->info_flags |= MDDB_DID_UPDATED;
+
did_info->info_firstblk = blk;
did_info->info_blkcnt = blkcnt;
did_info->info_offset = offset;
@@ -1806,8 +1814,10 @@ getmasters(
if (crcchk(mb, &mb->mb_checksum, MDDB_BSIZE, NULL)) {
error = MDDB_F_EFMT | MDDB_F_EMASTER;
}
- if (!(md_get_setstatus(s->s_setno) & MD_SET_IMPORT) &&
- (mb->mb_setno != s->s_setno)) {
+
+ if (!(md_get_setstatus(s->s_setno) &
+ (MD_SET_IMPORT | MD_SET_REPLICATED_IMPORT)) &&
+ (mb->mb_setno != s->s_setno)) {
error = MDDB_F_EFMT | MDDB_F_EMASTER;
}
if (mb->mb_blkno != blkno) {
@@ -1826,8 +1836,9 @@ getmasters(
* Don't care about devid in local set since it is not used
* and this should not be part of set importing
*/
- if ((s->s_setno != MD_LOCAL_SET) && !(md_get_setstatus(s->s_setno) &
- MD_SET_IMPORT)) {
+ if ((s->s_setno != MD_LOCAL_SET) &&
+ !(md_get_setstatus(s->s_setno) &
+ (MD_SET_IMPORT | MD_SET_REPLICATED_IMPORT))) {
/*
* Now check the destroy flag. We also need to handle
* the case where the destroy flag is reset after the
@@ -2331,7 +2342,8 @@ getuserdata(
* record, we must convert it because it was incore as a 64 bit
* structure but its on disk layout has only 32 bit for block sizes
*/
- if (!(md_get_setstatus(setno) & MD_SET_IMPORT) &&
+ if (!(md_get_setstatus(setno) &
+ (MD_SET_IMPORT | MD_SET_REPLICATED_IMPORT)) &&
(type >= MDDB_FIRST_MODID) &&
((rbp->rb_revision == MDDB_REV_RB) ||
(rbp->rb_revision == MDDB_REV_RBFN))) {
@@ -2878,9 +2890,21 @@ match_mddb(mddb_ri_t *rip, ddi_devid_t devid, char *minor, md_dev64_t dev,
}
if (rip->ri_devid && devid && minor) {
- if (ddi_devid_compare(rip->ri_devid, devid) != 0 ||
- strcmp(rip->ri_minor_name, minor) != 0)
- return (0);
+ /*
+ * If old devid exists, then this is a replicated diskset
+ * and both old and new devids must be checked.
+ */
+ if (rip->ri_old_devid) {
+ if (((ddi_devid_compare(rip->ri_devid, devid) != 0) &&
+ (ddi_devid_compare(rip->ri_old_devid,
+ devid) != 0)) ||
+ (strcmp(rip->ri_minor_name, minor) != 0))
+ return (0);
+ } else {
+ if (ddi_devid_compare(rip->ri_devid, devid) != 0 ||
+ strcmp(rip->ri_minor_name, minor) != 0)
+ return (0);
+ }
} else {
if (rip->ri_dev != dev)
return (0);
@@ -4225,7 +4249,7 @@ selectlocator(
if (r->ri_lbp == (mddb_lb_t *)NULL)
continue;
- if (cmpidentifier(s, &r->ri_lbp->lb_ident))
+ if (!cmpidentifier(s, &r->ri_lbp->lb_ident))
continue;
if (r->ri_dtp != (mddb_dt_t *)NULL) {
@@ -4852,7 +4876,8 @@ get_mbs_n_lbs(
* We don't do this check if we're in the middle of
* importing a set.
*/
- if (!(md_get_setstatus(s->s_setno) & MD_SET_IMPORT) &&
+ if (!(md_get_setstatus(s->s_setno) &
+ (MD_SET_IMPORT | MD_SET_REPLICATED_IMPORT)) &&
(lbp->lb_setno != s->s_setno))
continue;
@@ -5111,27 +5136,27 @@ get_mbs_n_lbs(
if (!(did_info->info_flags & MDDB_DID_EXISTS))
continue;
- if (rip->ri_old_devid == NULL)
- continue;
-
if (did_icp->did_ic_devid[li] == NULL)
continue;
for (trip = s->s_rip; trip != NULL;
trip = trip->ri_next) {
+ if (trip->ri_old_devid == NULL)
+ continue;
if (ddi_devid_compare(
trip->ri_old_devid,
did_icp->did_ic_devid[li]) != 0) {
continue;
}
- /* update l_dev */
+ /* update l_dev and side mnum */
lp->l_dev = md_cmpldev(trip->ri_dev);
+ lbp->lb_sidelocators[0][li].l_mnum =
+ md_getminor(trip->ri_dev);
}
}
}
-
/*
* If there is a valid devid, verify that this locator
* block has information about itself by checking the
@@ -5162,8 +5187,9 @@ get_mbs_n_lbs(
if (!(did_info->info_flags & MDDB_DID_EXISTS))
continue;
- if ((md_get_setstatus(setno) &
- MD_SET_REPLICATED_IMPORT)) {
+ if (((md_get_setstatus(setno) &
+ MD_SET_REPLICATED_IMPORT)) &&
+ (rip->ri_old_devid != (ddi_devid_t)NULL)) {
if (ddi_devid_compare(rip->ri_old_devid,
did_icp->did_ic_devid[li]) != 0)
continue;
@@ -5471,6 +5497,7 @@ load_old_replicas(
char *minor_name;
int write_lb = 0;
int rval;
+ int stale_rtn = 0;
/* The only error path out of get_mbs_n_lbs() is MDDB_E_TAGDATA */
if (retval = get_mbs_n_lbs(s, &write_lb))
@@ -5819,12 +5846,17 @@ load_old_replicas(
/* This will return non-zero if STALE or TOOFEW */
/* This will write out chosen replica image to all replicas */
- if (selectreplicas(s, MDDB_SCANALL))
- goto errout;
+ stale_rtn = selectreplicas(s, MDDB_SCANALL);
if ((md_get_setstatus(setno) & MD_SET_REPLICATED_IMPORT)) {
ddi_devid_t devidptr;
+ /*
+ * ignore the return value from selectreplicas because we
+ * may have a STALE or TOOFEW set in the case of a partial
+ * replicated diskset. We will fix that up later.
+ */
+
lbp = s->s_lbp;
for (li = 0; li < lbp->lb_loccnt; li++) {
did_info = &(did_icp->did_ic_blkp->blk_info[li]);
@@ -5842,13 +5874,17 @@ load_old_replicas(
}
if (update_locatorblock(s,
md_expldev(lp->l_dev),
- rip->ri_devid)) {
+ rip->ri_devid, rip->ri_old_devid)) {
goto errout;
}
}
}
}
+ } else {
+ if (stale_rtn)
+ goto errout;
}
+
/*
* If the replica is in device id style - validate the device id's,
* if present, in the locator block devid area.
@@ -7146,7 +7182,8 @@ mddb_unload_set(
MD_SET_OWNERSHIP | MD_SET_BADTAG |
MD_SET_CLRTAG | MD_SET_MNSET |
MD_SET_DIDCLUP | MD_SET_MNPARSE_BLK |
- MD_SET_MN_MIR_STATE_RC);
+ MD_SET_MN_MIR_STATE_RC | MD_SET_IMPORT |
+ MD_SET_REPLICATED_IMPORT);
mutex_exit(SETMUTEX(setno));
}
@@ -7674,6 +7711,22 @@ out:
* the devt to see if it matches the given devt. If so, and
* there is an associated device id which is not the same
* as the passed in devid, delete old devid and add a new one.
+ *
+ * During import of replicated disksets, old_didptr contains
+ * the original disk's device id. Use this device id in
+ * addition to the devt to determine if an entry is a match
+ * and should be updated with the new device id of the
+ * replicated disk. Specifically, this is the case being handled:
+ *
+ * Original_disk Replicated_disk Disk_Available_During_Import
+ * c1t1d0 c1t3d0 no - so old name c1t1d0 shown
+ * c1t2d0 c1t1d0 yes - name is c1t1d0
+ * c1t3d0 c1t2d0 yes - name is c1t2d0
+ *
+ * Can't just match on devt since devt for the first and third
+ * disks will be the same, but the original disk's device id
+ * is known and can be used to distinguish which disk's
+ * replicated device id should be updated.
* RETURN
* MDDB_E_NODEVID
* MDDB_E_NOLOCBLK
@@ -7681,7 +7734,12 @@ out:
* 0 Success
*/
static int
-update_locatorblock(mddb_set_t *s, md_dev64_t dev, ddi_devid_t didptr)
+update_locatorblock(
+ mddb_set_t *s,
+ md_dev64_t dev,
+ ddi_devid_t didptr,
+ ddi_devid_t old_didptr
+)
{
mddb_lb_t *lbp = NULL;
mddb_locator_t *lp;
@@ -7690,6 +7748,11 @@ update_locatorblock(mddb_set_t *s, md_dev64_t dev, ddi_devid_t didptr)
ddi_devid_t devid_ptr;
int retval = 0;
char *minor_name;
+ int repl_import_flag;
+
+ /* Set replicated flag if this is a replicated import */
+ repl_import_flag = md_get_setstatus(s->s_setno) &
+ MD_SET_REPLICATED_IMPORT;
lbp = s->s_lbp;
/* find replicas that haven't been deleted */
@@ -7713,20 +7776,32 @@ update_locatorblock(mddb_set_t *s, md_dev64_t dev, ddi_devid_t didptr)
if (devid_ptr == NULL) {
return (MDDB_E_NODEVID);
}
+
+ /*
+ * During a replicated import the old_didptr
+ * must match the current devid before the
+ * devid can be updated.
+ */
+ if (repl_import_flag) {
+ if (ddi_devid_compare(devid_ptr,
+ old_didptr) != 0)
+ continue;
+ }
+
if (ddi_devid_compare(devid_ptr, didptr) != 0) {
/*
* devid's not equal so
* delete and add
*/
if (ddi_lyr_get_minor_name(
- md_dev64_to_dev(dev),
- S_IFBLK, &minor_name) == DDI_SUCCESS) {
+ md_dev64_to_dev(dev),
+ S_IFBLK, &minor_name) == DDI_SUCCESS) {
(void) mddb_devid_delete(s, li);
(void) mddb_devid_add(s, li, didptr,
- minor_name);
+ minor_name);
kmem_free(minor_name,
- strlen(minor_name)+1);
- break;
+ strlen(minor_name)+1);
+ break;
} else {
retval = 1;
goto err_out;
@@ -7867,7 +7942,7 @@ setdid(
}
}
- if (update_locatorblock(s, cp->c_devt, devidp)) {
+ if (update_locatorblock(s, cp->c_devt, devidp, NULL)) {
err = -1;
goto out;
}
@@ -8547,8 +8622,7 @@ mddb_configure(
if (cp->c_locator.l_old_devid) {
md_set_setstatus(setno, MD_SET_REPLICATED_IMPORT);
}
- if ((err = ridev(&s->s_rip, &cp->c_locator, NULL, flag)) != 0)
- err = mddbstatus2error(ep, err, NODEV32, setno);
+ err = ridev(&s->s_rip, &cp->c_locator, NULL, flag);
mddb_setexit(s);
break;
@@ -10065,6 +10139,16 @@ take_set(mddb_config_t *cp, int mode)
snarf_ok = 1;
}
+ /*
+ * Clear replicated import flag since this is
+ * used during the take of a diskset with
+ * previously unresolved replicated disks.
+ */
+ if (md_get_setstatus(setno) &
+ MD_SET_REPLICATED_IMPORT) {
+ md_clr_setstatus(setno, MD_SET_REPLICATED_IMPORT);
+ }
+
if (! err && mdisok(ep)) {
if (! cp->c_flags) {
medup.med_setno = setno;
@@ -12232,6 +12316,9 @@ update_mb(
int err = 0;
for (rip = s->s_rip; rip != NULL; rip = rip->ri_next) {
+ if (rip->ri_flags & MDDB_F_EMASTER)
+ /* disk is powered off or not there */
+ continue;
if (md_get_setstatus(s->s_setno) &
MD_SET_REPLICATED_IMPORT) {
@@ -12282,7 +12369,10 @@ update_setname(
rw_enter(&nm_lock.lock, RW_WRITER);
if ((nh = get_first_record(setno, 0, NM_SHARED)) == NULL) {
- err = MD_KEYBAD;
+ /*
+ * No namespace is okay
+ */
+ err = 0;
goto out;
}
@@ -12304,13 +12394,13 @@ update_setname(
if (remove_shared_entry(nh, o_key, NULL, 0L | NM_IMP_SHARED |
NM_NOCOMMIT)) {
- err = MD_KEYBAD;
+ err = MDDB_E_NORECORD;
goto out;
}
if ((new_shn = (struct nm_shared_name *)alloc_entry(
nh, md_set[setno].s_nmid, len, NM_SHARED |
NM_NOCOMMIT, &recid)) == NULL) {
- err = MD_KEYBAD;
+ err = MDDB_E_NORECORD;
goto out;
}
@@ -12332,17 +12422,26 @@ out:
return (err);
}
+/*
+ * Returns 0 on success.
+ * Returns -1 on failure with ep filled in.
+ */
static int
md_imp_db(
- set_t setno
+ set_t setno,
+ int stale_flag,
+ md_error_t *ep
)
{
mddb_set_t *s;
int err = 0;
mddb_dt_t *dtp;
+ mddb_lb_t *lbp;
+ int i;
+ int loccnt;
if ((s = mddb_setenter(setno, MDDB_MUSTEXIST, &err)) == NULL) {
- return (err);
+ return (mddbstatus2error(ep, err, NODEV32, setno));
}
/* Update dt */
@@ -12351,6 +12450,7 @@ md_imp_db(
}
if ((err = dt_write(s)) != 0) {
+ err = mdsyserror(ep, err);
mddb_setexit(s);
return (err);
}
@@ -12362,14 +12462,36 @@ md_imp_db(
*/
/* Update lb */
- if ((err = writelocall(s)) != 0) {
- mddb_setexit(s);
- return (err);
- }
+ if (stale_flag & MD_IMP_STALE_SET) {
+ lbp = s->s_lbp;
+ loccnt = lbp->lb_loccnt;
+ for (i = 0; i < loccnt; i++) {
+ mddb_locator_t *lp = &lbp->lb_locators[i];
+ md_dev64_t ndev = md_expldev(lp->l_dev);
+ ddi_devid_t devid_ptr;
+ devid_ptr = s->s_did_icp->did_ic_devid[i];
+ if (devid_ptr == NULL) {
+ /*
+ * Already deleted, go to next one.
+ */
+ continue;
+ }
+ if (mddb_devid_validate((ddi_devid_t)devid_ptr, &ndev,
+ NULL)) {
+ /* disk unavailable, mark deleted */
+ lp->l_flags = MDDB_F_DELETED;
+ /* then remove the device id from the list */
+ free_mbipp(&s->s_mbiarray[i]);
+ s->s_mbiarray[i] = 0;
+ (void) mddb_devid_delete(s, i);
+ }
+ }
+ md_clr_setstatus(setno, MD_SET_STALE);
+ }
- /* Update mb */
- if ((err = update_mb(s)) != 0) {
+ if ((err = writelocall(s)) != 0) {
+ err = mdmddberror(ep, MDDB_E_NOTNOW, NODEV32, setno);
mddb_setexit(s);
return (err);
}
@@ -12377,11 +12499,13 @@ md_imp_db(
mddb_setexit(s);
/* Update db records */
- if ((err = update_db_rec(s)) != 0)
- return (err);
+ if ((err = update_db_rec(s)) != 0) {
+ return (mddbstatus2error(ep, err, NODEV32, setno));
+ }
/* Update setname embedded in the namespace */
- err = update_setname(setno);
+ if ((err = update_setname(setno)) != 0)
+ return (mddbstatus2error(ep, err, NODEV32, setno));
return (err);
}
@@ -12436,136 +12560,20 @@ md_setup_recids(
*ids = &recids[0];
}
-static int
-md_imp_create_set(
- set_t setno
-)
-{
- mddb_set_t *s;
- int drc = 0, err = 0;
- size_t sr_size = sizeof (md_set_record);
- md_set_record *sr;
- mddb_recid_t sr_recid, dr_recid, *ids = NULL;
- mddb_ri_t *rip, *trip;
- md_drive_record *dr;
- size_t dr_size = sizeof (md_drive_record);
- mdkey_t dr_key;
- md_error_t error = MDNULLERROR;
-
-
- if ((s = mddb_setenter(setno, MDDB_MUSTEXIST, &err)) == NULL)
- return (err);
-
- /* Create and fill in set record */
- if ((sr_recid = mddb_createrec(sr_size, MDDB_USER, MDDB_UR_SR,
- MD_CRO_32BIT, MD_LOCAL_SET)) < 0) {
- mddb_setexit(s);
- return (MDDB_E_INVALID);
- }
-
- sr = (md_set_record *)mddb_getrecaddr(sr_recid);
- sr->sr_selfid = sr_recid;
- sr->sr_setno = s->s_setno;
- (void) strcpy(sr->sr_setname, s->s_setname);
- uniqtime32(&sr->sr_ctime);
- sr->sr_genid = 0;
- sr->sr_revision = MD_SET_RECORD_REVISION;
- sr->sr_flags |= MD_SR_OK;
- sr->sr_mhiargs = defmhiargs;
- (void) strcpy(sr->sr_nodes[0], utsname.nodename);
-
- /* Create and fillin drive records */
- for (rip = s->s_rip; rip != NULL; rip = rip->ri_next) {
- /*
- * Add entry and create the record
- */
- if ((dr_key = md_setdevname(MD_LOCAL_SET, 1, MD_KEYWILD,
- rip->ri_driver, md_getminor(rip->ri_dev),
- rip->ri_devname, setno, &error)) == 0)
- continue;
-
- if (dr_key < 0) {
- mddb_setexit(s);
- return (MD_KEYBAD);
- }
-
- if ((dr_recid = mddb_createrec(dr_size, MDDB_USER,
- MDDB_UR_DR, MD_CRO_32BIT, MD_LOCAL_SET)) < 0) {
- mddb_setexit(s);
- return (MDDB_E_INVALID);
- }
-
- dr = (md_drive_record *)mddb_getrecaddr(dr_recid);
- dr->dr_selfid = dr_recid;
-
- /*
- * We need to check to see if the drive on
- * the rip has a replica. If it doesn't have
- * a replica, then we need to set the dr_dbcnt
- * and dr_dbsize to 0 to reflect that.
- */
- if (rip->ri_mbip == NULL) {
- dr->dr_dbcnt = 0;
- dr->dr_dbsize = 0;
- } else {
- dr->dr_dbcnt = 1;
-
- for (trip = s->s_rip; trip != NULL;
- trip = trip->ri_next) {
-
- if (trip == rip)
- continue;
-
- if ((trip->ri_dev == rip->ri_dev) &&
- (strcmp(trip->ri_devname, rip->ri_devname)
- == 0))
- dr->dr_dbcnt++;
- }
-
- dr->dr_dbsize = rip->ri_mbip->mbi_mddb_mb.mb_blkcnt + 1;
- }
- dr->dr_key = dr_key;
- uniqtime32(&dr->dr_ctime);
- dr->dr_genid = 1;
- dr->dr_revision = MD_DRIVE_RECORD_REVISION;
- dr->dr_flags = MD_SR_OK;
- drc++;
-
- /* Add on the linked list */
- (void) md_dr_add(sr, dr);
- }
-
- /*
- * Alloc and setup recids which include set record
- */
- (void) md_setup_recids(sr, &ids, drc + 2);
-
- /*
- * Commit all the records
- */
- err = mddb_commitrecs(ids);
-
- if (ids)
- kmem_free(ids, sizeof (mddb_recid_t) * (drc + 2));
- mddb_setexit(s);
- return (err);
-}
-
/*
- * namespace is loaded before this is called.
- * The purpose of this function is to update the device ids in the entire
- * namespace using the data in the ri structure. Compare the devid found in
- * the namespace with ri_old_devid and if they are the same, update with the
- * devid in ri_devid.
+ * The purpose of this function is to replace the old_devid with the
+ * new_devid in the given namespace. This is used for importing
+ * remotely replicated drives.
*/
-static int
-md_imp_update_namespace_did(mddb_set_t *s)
+int
+md_update_namespace_rr_did(
+ mddb_config_t *cp
+)
{
- set_t setno = s->s_lbp->lb_setno;
+ set_t setno = cp->c_setno;
struct nm_next_hdr *nh;
mdkey_t key = MD_KEYWILD;
side_t side = MD_SIDEWILD;
- mddb_ri_t *rip = NULL;
mddb_recid_t recids[3];
struct did_min_name *n;
struct nm_next_hdr *did_shr_nh;
@@ -12578,6 +12586,13 @@ md_imp_update_namespace_did(mddb_set_t *s)
struct did_shr_name *shn;
size_t offset;
struct nm_next_hdr *this_did_shr_nh;
+ void *old_devid, *new_devid;
+
+ if (!(md_get_setstatus(setno) & MD_SET_NM_LOADED))
+ return (EIO);
+
+ old_devid = (void *)(uintptr_t)cp->c_locator.l_old_devid;
+ new_devid = (void *)(uintptr_t)cp->c_locator.l_devid;
/*
* It is okay if we dont have any configuration
@@ -12591,7 +12606,7 @@ md_imp_update_namespace_did(mddb_set_t *s)
/* check out every entry in the namespace */
if ((n = (struct did_min_name *)lookup_entry(nh, setno,
side, key, NODEV64, NM_DEVID)) == NULL) {
- break;
+ continue;
} else {
did_shr_nh = get_first_record(setno, 0, NM_DEVID |
NM_SHARED);
@@ -12608,39 +12623,37 @@ md_imp_update_namespace_did(mddb_set_t *s)
rw_enter(&nm_lock.lock, RW_WRITER);
devid = (ddi_devid_t)shr_n->did_devid;
/* find this devid in the incore replica */
- for (rip = s->s_rip; rip != NULL; rip = rip->ri_next) {
- if (ddi_devid_compare(devid, rip->ri_old_devid)
- == 0) {
- /*
- * found the corresponding entry
- * update with new devid
- */
- /* first remove old devid info */
- ent_did_key = shr_n ->did_key;
- ent_did_count = shr_n->did_count;
- ent_did_data = shr_n->did_data;
- ent_size = DID_SHR_NAMSIZ(shr_n);
- size = ((struct nm_rec_hdr *)
- this_did_shr_nh->nmn_record)->
- r_used_size - offset - ent_size;
- if (size == 0) {
- (void) bzero(shr_n, ent_size);
- } else {
- (void) ovbcopy((caddr_t)shr_n +
- ent_size, shr_n, size);
- (void) bzero((caddr_t)shr_n +
- size, ent_size);
- }
- ((struct nm_rec_hdr *)this_did_shr_nh->
- nmn_record)->r_used_size -=
- ent_size;
- /* add in new devid info */
- if ((shn = (struct did_shr_name *)
- alloc_entry(did_shr_nh,
- md_set[setno].s_did_nmid,
- ddi_devid_sizeof(rip->ri_devid),
- NM_DEVID | NM_SHARED | NM_NOCOMMIT,
- &recids[0])) == NULL) {
+ if (ddi_devid_compare(devid, old_devid) == 0) {
+ /*
+ * found the corresponding entry
+ * update with new devid
+ */
+ /* first remove old devid info */
+ ent_did_key = shr_n ->did_key;
+ ent_did_count = shr_n->did_count;
+ ent_did_data = shr_n->did_data;
+ ent_size = DID_SHR_NAMSIZ(shr_n);
+ size = ((struct nm_rec_hdr *)
+ this_did_shr_nh->nmn_record)->
+ r_used_size - offset - ent_size;
+ if (size == 0) {
+ (void) bzero(shr_n, ent_size);
+ } else {
+ (void) ovbcopy((caddr_t)shr_n +
+ ent_size, shr_n, size);
+ (void) bzero((caddr_t)shr_n +
+ size, ent_size);
+ }
+ ((struct nm_rec_hdr *)this_did_shr_nh->
+ nmn_record)->r_used_size -=
+ ent_size;
+ /* add in new devid info */
+ if ((shn = (struct did_shr_name *)
+ alloc_entry(did_shr_nh,
+ md_set[setno].s_did_nmid,
+ cp->c_locator.l_devid_sz,
+ NM_DEVID | NM_SHARED | NM_NOCOMMIT,
+ &recids[0])) == NULL) {
rw_exit(&nm_lock.lock);
return (ENOMEM);
}
@@ -12649,34 +12662,74 @@ md_imp_update_namespace_did(mddb_set_t *s)
ent_did_data |= NM_DEVID_VALID;
shn->did_data = ent_did_data;
shn->did_size = ddi_devid_sizeof(
- rip->ri_devid);
- bcopy((void *)rip->ri_devid, (void *)
+ new_devid);
+ bcopy((void *)new_devid, (void *)
shn->did_devid, shn->did_size);
recids[1] = md_set[setno].s_nmid;
recids[2] = 0;
mddb_commitrecs_wrapper(recids);
- }
}
rw_exit(&nm_lock.lock);
}
}
+
return (0);
}
+/*
+ * namespace is loaded before this is called.
+ * This function is a wrapper for md_update_namespace_rr_did.
+ *
+ * md_update_namespace_rr_did may be called twice if attempting to
+ * resolve a replicated device id during the take of a diskset - once
+ * for the diskset namespace and a second time for the local namespace.
+ * The local namespace would need to be updated when a drive has been
+ * found during a take of the diskset that hadn't been resolved during
+ * the import (aka partial replicated import).
+ *
+ * If being called during the import of the diskset (IMPORT flag set)
+ * md_update_namespace_rr_did will only be called once with the disket
+ * namespace.
+ */
+int
+md_update_nm_rr_did_ioctl(
+ mddb_config_t *cp
+)
+{
+ int rval = 0;
+
+ /* If update of diskset namespace fails, stop and return failure */
+ if ((rval = md_update_namespace_rr_did(cp)) != 0)
+ return (rval);
+
+ if (cp->c_flags & MDDB_C_IMPORT)
+ return (0);
+
+ /* If update of local namespace fails, return failure */
+ cp->c_setno = MD_LOCAL_SET;
+ rval = md_update_namespace_rr_did(cp);
+ return (rval);
+}
+
/*ARGSUSED*/
int
md_imp_snarf_set(
- set_t *setnum,
- int mode
+ mddb_config_t *cp
)
{
- set_t setno = *setnum; /* import setno */
+ set_t setno;
+ int stale_flag;
mddb_set_t *s;
int i, err = 0;
md_ops_t *ops;
+ md_error_t *ep = &cp->c_mde;
+
+ setno = cp->c_setno;
+ stale_flag = cp->c_flags;
+ mdclrerror(ep);
if (setno >= md_nsets) {
- return (EINVAL);
+ return (mdsyserror(ep, EINVAL));
}
md_haltsnarf_enter(setno);
@@ -12688,6 +12741,7 @@ md_imp_snarf_set(
md_set_setstatus(setno, MD_SET_IMPORT);
if ((s = mddb_setenter(setno, MDDB_MUSTEXIST, &err)) == NULL) {
+ err = mddbstatus2error(ep, err, NODEV32, setno);
goto out;
}
@@ -12708,12 +12762,12 @@ md_imp_snarf_set(
* and ask each module to fixup unit records
*/
if (!md_load_namespace(setno, NULL, NM_DEVID)) {
- err = ENOENT;
+ err = mdsyserror(ep, ENOENT);
goto cleanup;
}
if (!md_load_namespace(setno, NULL, 0L)) {
(void) md_unload_namespace(setno, NM_DEVID);
- err = ENOENT;
+ err = mdsyserror(ep, ENOENT);
goto cleanup;
}
@@ -12732,22 +12786,17 @@ md_imp_snarf_set(
* (4) directory block
* calls appropriate writes to push changes out
*/
- if ((err = md_imp_db(setno)) != 0)
- goto cleanup;
-
- /*
- * Create set in MD_LOCAL_SET
- */
- if ((err = md_imp_create_set(setno)) != 0)
+ if ((err = md_imp_db(setno, stale_flag, ep)) != 0) {
goto cleanup;
+ }
/*
- * update the namespace device ids if necessary (ie. block copy disk)
+ * Don't unload namespace if importing a replicated diskset.
+ * Namespace will be unloaded with an explicit RELEASE_SET ioctl.
*/
- if ((md_get_setstatus(s->s_setno) & MD_SET_REPLICATED_IMPORT)) {
- if ((err = md_imp_update_namespace_did(s)) != 0) {
- goto cleanup;
- }
+ if (md_get_setstatus(s->s_setno) & MD_SET_REPLICATED_IMPORT) {
+ md_haltsnarf_exit(setno);
+ return (err);
}
cleanup:
diff --git a/usr/src/uts/common/io/lvm/md/md_names.c b/usr/src/uts/common/io/lvm/md/md_names.c
index efa401a6a0..43f01703ee 100644
--- a/usr/src/uts/common/io/lvm/md/md_names.c
+++ b/usr/src/uts/common/io/lvm/md/md_names.c
@@ -1726,14 +1726,17 @@ zero_data_ptrs(struct nm_next_hdr *nh, set_t setno)
*/
mdkey_t
md_setdevname(
- set_t setno, /* specify which namespace to put in */
- side_t side, /* (key 1) side # */
- mdkey_t key, /* (key 2) KEYWILD - alloc key, else use key */
- char *drvnm, /* store this driver name with devicename */
- minor_t mnum, /* store this minor number as well */
- char *devname, /* device name to be stored */
- set_t imp_setno, /* used exclusively by import */
- md_error_t *ep /* place to return error info */
+ set_t setno, /* specify which namespace to put in */
+ side_t side, /* (key 1) side # */
+ mdkey_t key, /* (key 2) KEYWILD - alloc key, else use key */
+ char *drvnm, /* store this driver name with devicename */
+ minor_t mnum, /* store this minor number as well */
+ char *devname, /* device name to be stored */
+ int imp_flag, /* used exclusively by import */
+ ddi_devid_t imp_devid, /* used exclusively by import */
+ char *imp_mname, /* used exclusively by import */
+ set_t imp_setno, /* used exclusively by import */
+ md_error_t *ep /* place to return error info */
)
{
struct nm_next_hdr *nh, *did_nh = NULL;
@@ -1819,18 +1822,26 @@ md_setdevname(
* of the side information is taken here because it is dealt
* with later on.
*/
- devt = makedevice(ddi_name_to_major(drvnm), mnum);
- if ((ddi_lyr_get_devid(devt, &devid) == DDI_SUCCESS) &&
- (ddi_lyr_get_minor_name(devt, S_IFBLK, &mname) ==
- DDI_SUCCESS) &&
- (((mddb_set_t *)md_set[setno].s_db)->s_lbp->lb_flags &
- MDDB_DEVID_STYLE))
- /*
- * Reference the device id namespace
- */
+ if (!imp_flag) {
+ devt = makedevice(ddi_name_to_major(drvnm), mnum);
+ if ((ddi_lyr_get_devid(devt, &devid) == DDI_SUCCESS) &&
+ (ddi_lyr_get_minor_name(devt, S_IFBLK, &mname) ==
+ DDI_SUCCESS) &&
+ (((mddb_set_t *)md_set[setno].s_db)->s_lbp->lb_flags &
+ MDDB_DEVID_STYLE))
+ /*
+ * Reference the device id namespace
+ */
+ shared = NM_DEVID | NM_NOTSHARED;
+ else
+ shared = NM_NOTSHARED;
+ } else {
+ /* Importing diskset has devids so store in namespace */
+ devid = kmem_alloc(ddi_devid_sizeof(imp_devid), KM_SLEEP);
+ bcopy(imp_devid, devid, ddi_devid_sizeof(imp_devid));
+ mname = md_strdup(imp_mname);
shared = NM_DEVID | NM_NOTSHARED;
- else
- shared = NM_NOTSHARED;
+ }
/*
* Always lookup the primary name space
@@ -1873,6 +1884,41 @@ md_setdevname(
*/
lookup_res = lookup_deventry(nh, setno, side, key, drvnm, mnum, dname,
fname, &n);
+
+ /* If we are importing the set */
+ if (imp_flag && (lookup_res == LOOKUP_DEV_FOUND)) {
+ ushort_t did_sz;
+ ddi_devid_t did;
+
+ /*
+ * We need to check for the case where there is a disk
+ * already in the namespace with a different ID from
+ * the one we want to add, but the same name. This is
+ * possible in the case of an unavailable disk.
+ */
+ rw_exit(&nm_lock.lock);
+ if (md_getdevid(setno, side, n->n_key, NULL, &did_sz) != 0)
+ did_sz = 0;
+ rw_enter(&nm_lock.lock, RW_WRITER);
+ if (did_sz > 0) {
+ did = kmem_zalloc(did_sz, KM_SLEEP);
+ rw_exit(&nm_lock.lock);
+ (void) md_getdevid(setno, side, n->n_key, did, &did_sz);
+ rw_enter(&nm_lock.lock, RW_WRITER);
+ if (ddi_devid_compare(did, devid) == 0) {
+ kmem_free(did, did_sz);
+ retval = 0;
+ goto out;
+ }
+ kmem_free(did, did_sz);
+ }
+ /*
+ * This is not the same disk so we haven't really found it.
+ * Thus, we need to say it's "NOMATCH" and create a new
+ * entry.
+ */
+ lookup_res = LOOKUP_DEV_NOMATCH;
+ }
switch (lookup_res) {
case LOOKUP_DEV_FOUND:
/* If we are importing the set */
@@ -2079,8 +2125,9 @@ add_devid:
}
}
out:
- if (devid)
+ if (devid) {
ddi_devid_free(devid);
+ }
if (dname)
freestr(dname);
if (mname)
diff --git a/usr/src/uts/common/sys/lvm/md_mddb.h b/usr/src/uts/common/sys/lvm/md_mddb.h
index b031594f19..8f6226e675 100644
--- a/usr/src/uts/common/sys/lvm/md_mddb.h
+++ b/usr/src/uts/common/sys/lvm/md_mddb.h
@@ -881,6 +881,7 @@ extern int mddb_validate_lb(set_t setno, int *rmaxsz);
extern int mddb_getinvlb_devid(set_t setno, int count,
int size, char **ctdptr);
extern int md_update_minor(set_t, side_t, mdkey_t);
+extern int md_update_nm_rr_did_ioctl(mddb_config_t *cp);
extern int md_update_top_device_minor(set_t, side_t,
md_dev64_t);
#ifdef DEBUG
diff --git a/usr/src/uts/common/sys/lvm/mdio.h b/usr/src/uts/common/sys/lvm/mdio.h
index 9ff907f078..1cedfe2bc6 100644
--- a/usr/src/uts/common/sys/lvm/mdio.h
+++ b/usr/src/uts/common/sys/lvm/mdio.h
@@ -140,6 +140,11 @@ extern "C" {
"logging; they\n#pass data directly to the underlying device.\n"
/*
+ * for importing of disksets (IMP_LOAD)
+ */
+#define MD_IMP_STALE_SET 1
+
+/*
* miscname stuff
*/
@@ -304,6 +309,7 @@ typedef struct mdnm_params {
ushort_t minorname_len; /* length of minor name */
uint64_t minorname; /* address of minor name */
uint_t ref_count; /* returned n_count */
+ int imp_flag; /* used by metaimport */
} mdnm_params_t;
typedef struct mdhspnm_params {
@@ -749,6 +755,8 @@ typedef struct md_regen_param {
#define MD_DB_LBINITTIME (MDIOC|104) /* get the lb_inittime */
#define MD_IOCGET_HSP_NM (MDIOC|105) /* get hsp entry from namespace */
#define MD_IOCREM_DEV (MDIOC|106) /* remove device node for unit */
+#define MD_IOCUPDATE_NM_RR_DID (MDIOC|107) /* update remotely repl did in NM */
+
#define MDIOC_MISC (MDIOC|128) /* misc module base */
/* Used in DEBUG_TEST code */
diff --git a/usr/src/uts/common/sys/lvm/mdiox.x b/usr/src/uts/common/sys/lvm/mdiox.x
index fdb2d14bb9..9280d849e9 100644
--- a/usr/src/uts/common/sys/lvm/mdiox.x
+++ b/usr/src/uts/common/sys/lvm/mdiox.x
@@ -188,9 +188,12 @@ const MD_DRIVE_RECORD_REVISION = 0x00010000;
#ifdef RPC_HDR
%
-%#define MD_DR_ADD 0x00000001U
-%#define MD_DR_DEL 0x00000002U
-%#define MD_DR_OK 0x80000000U
+%#define MD_DR_ADD 0x00000001U
+%#define MD_DR_DEL 0x00000002U
+%#define MD_DR_FIX_MB_DID 0x10000000U /* Fix MB */
+%#define MD_DR_FIX_LB_NM_DID 0x20000000U /* Fix LB and namespaces */
+%#define MD_DR_UNRSLV_REPLICATED 0x40000000U
+%#define MD_DR_OK 0x80000000U
#endif /* RPC_HDR */
#if !defined(_KERNEL)
@@ -253,19 +256,21 @@ const MD_SET_RECORD_REVISION = 0x00010000;
#ifdef RPC_HDR
%
-%#define MD_SR_ADD 0x00000001U
-%#define MD_SR_DEL 0x00000002U
-%#define MD_SR_CHECK 0x00000004U
-%#define MD_SR_CVT 0x00000008U
-%#define MD_SR_LOCAL 0x00000010U
-%#define MD_SR_MB_DEVID 0x10000000U
-%#define MD_SR_AUTO_TAKE 0x20000000U
-%#define MD_SR_MN 0x40000000U
-%#define MD_SR_OK 0x80000000U
+%#define MD_SR_ADD 0x00000001U
+%#define MD_SR_DEL 0x00000002U
+%#define MD_SR_CHECK 0x00000004U
+%#define MD_SR_CVT 0x00000008U
+%#define MD_SR_LOCAL 0x00000010U
+%#define MD_SR_UNRSLV_REPLICATED 0x08000000U
+%#define MD_SR_MB_DEVID 0x10000000U
+%#define MD_SR_AUTO_TAKE 0x20000000U
+%#define MD_SR_MN 0x40000000U
+%#define MD_SR_OK 0x80000000U
%#define MD_SR_STATE_FLAGS (MD_SR_ADD | \
% MD_SR_DEL | \
% MD_SR_CHECK | \
% MD_SR_CVT | \
+% MD_SR_UNRSLV_REPLICATED | \
% MD_SR_OK)
#endif /* RPC_HDR */
diff --git a/usr/src/uts/common/sys/lvm/mdvar.h b/usr/src/uts/common/sys/lvm/mdvar.h
index 7388c54d41..c03847efb5 100644
--- a/usr/src/uts/common/sys/lvm/mdvar.h
+++ b/usr/src/uts/common/sys/lvm/mdvar.h
@@ -770,6 +770,7 @@ extern void md_remove_minor_node(minor_t);
/* Externals from md_names.c */
extern mdkey_t md_setdevname(set_t, side_t, mdkey_t, char *, minor_t, char *,
+ int imp_flag, ddi_devid_t devid, char *minorname,
set_t, md_error_t *);
extern int md_getdevname(set_t, side_t, mdkey_t, md_dev64_t, char *,
size_t);
@@ -815,7 +816,7 @@ extern md_dev64_t md_makedevice(major_t, minor_t);
extern major_t md_getmajor(md_dev64_t);
extern minor_t md_getminor(md_dev64_t);
extern void md_timeval(md_timeval32_t *);
-extern int md_imp_snarf_set(set_t *, int);
+extern int md_imp_snarf_set(mddb_config_t *);
/* externals from md_mddb.c */
extern int mddb_reread_rr(set_t, mddb_recid_t);