diff options
author | John Wren Kennedy <John.Wren.Kennedy@Sun.COM> | 2008-12-24 08:23:40 -0700 |
---|---|---|
committer | John Wren Kennedy <John.Wren.Kennedy@Sun.COM> | 2008-12-24 08:23:40 -0700 |
commit | bf85a12b7c81d0745d5a8aff65baeff50006cde9 (patch) | |
tree | 2df17ed75d500b9d39accaf22e204ad908d92afb /usr/src/lib/lvm | |
parent | 2a22541854e0588bbe16b56aee850bdab9f2684b (diff) | |
download | illumos-joyent-bf85a12b7c81d0745d5a8aff65baeff50006cde9.tar.gz |
6580729 node 16th joined, metaclust timed out in step4, local_daemon has rpc tli error
6692015 SVM global reader-writer mutex priority inversion causes deadlock under load
6725904 callers of meta_getdnp_bydevid() should do so correctly
6726615 Bruichladdich - SVM support for sQFS on mirrors in SunCluster
6756133 MD_MN_MSG_MDDB_PARSE message passes incorrect message size when used
6758399 mdmn_ksend_message() retries door_ki_upcall() without resetting data_ptr/data_size
6766848 mdcommd assumes SVCXPRT will survive thr_create()
6769738 md_mps_t not completely cleared before re-use
Diffstat (limited to 'usr/src/lib/lvm')
-rw-r--r-- | usr/src/lib/lvm/libmeta/common/mapfile-vers | 24 | ||||
-rw-r--r-- | usr/src/lib/lvm/libmeta/common/meta_db.c | 11 | ||||
-rw-r--r-- | usr/src/lib/lvm/libmeta/common/meta_mn_changelog.c | 50 | ||||
-rw-r--r-- | usr/src/lib/lvm/libmeta/common/meta_mn_comm.c | 280 | ||||
-rw-r--r-- | usr/src/lib/lvm/libmeta/common/meta_mn_handlers.c | 69 | ||||
-rw-r--r-- | usr/src/lib/lvm/libmeta/common/meta_mn_msg_table.c | 34 | ||||
-rw-r--r-- | usr/src/lib/lvm/libmeta/common/meta_mn_subr.c | 27 | ||||
-rw-r--r-- | usr/src/lib/lvm/libmeta/common/meta_nameinfo.c | 10 | ||||
-rw-r--r-- | usr/src/lib/lvm/libmeta/common/meta_runtime.c | 85 | ||||
-rw-r--r-- | usr/src/lib/lvm/libmeta/common/meta_set.c | 14 | ||||
-rw-r--r-- | usr/src/lib/lvm/libmeta/common/meta_set_hst.c | 443 | ||||
-rw-r--r-- | usr/src/lib/lvm/libmeta/common/meta_sp.c | 90 |
12 files changed, 681 insertions, 456 deletions
diff --git a/usr/src/lib/lvm/libmeta/common/mapfile-vers b/usr/src/lib/lvm/libmeta/common/mapfile-vers index 960a5fe5a4..4cbee994c8 100644 --- a/usr/src/lib/lvm/libmeta/common/mapfile-vers +++ b/usr/src/lib/lvm/libmeta/common/mapfile-vers @@ -22,8 +22,6 @@ # Copyright 2008 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # -# ident "%Z%%M% %I% %E% SMI" -# SUNWprivate_1.1 { global: @@ -92,7 +90,7 @@ SUNWprivate_1.1 { commitset; comp_state_to_name; copy_msg; - copy_msg_1; + copy_msg_2; copy_result; crcfreetab; crcfunc; @@ -160,12 +158,12 @@ SUNWprivate_1.1 { md_med_pmap_timeout; mdmn_abort; mdmn_allocate_changelog; - mdmn_comm_lock_1; - mdmn_comm_msglock_1; - mdmn_comm_reinit_set_1; - mdmn_comm_resume_1; - mdmn_comm_suspend_1; - mdmn_comm_unlock_1; + mdmn_comm_lock_2; + mdmn_comm_msglock_2; + mdmn_comm_reinit_set_2; + mdmn_comm_resume_2; + mdmn_comm_suspend_2; + mdmn_comm_unlock_2; mdmn_create_msgid; mdmn_get_changelogrec; mdmn_get_handler; @@ -177,14 +175,14 @@ SUNWprivate_1.1 { mdmn_reinit_set; mdmn_reset_changelog; mdmn_resume; - mdmn_send_1; + mdmn_send_2; mdmn_send_message; mdmn_snarf_changelog; mdmn_suspend; mdmn_unlog_msg; - mdmn_wakeup_initiator_1; - mdmn_wakeup_master_1; - mdmn_work_1; + mdmn_wakeup_initiator_2; + mdmn_wakeup_master_2; + mdmn_work_2; mdnullerror; md_perror; md_post_sig; diff --git a/usr/src/lib/lvm/libmeta/common/meta_db.c b/usr/src/lib/lvm/libmeta/common/meta_db.c index c79cfca3be..e3410773f5 100644 --- a/usr/src/lib/lvm/libmeta/common/meta_db.c +++ b/usr/src/lib/lvm/libmeta/common/meta_db.c @@ -18,13 +18,12 @@ * * CDDL HEADER END */ + /* * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" - /* * Just in case we're not in a build environment, make sure that * TEXT_DOMAIN gets set to something. @@ -928,7 +927,7 @@ meta_db_addsidenms( */ send_rval = mdmn_send_message(sp->setno, MD_MN_MSG_META_DB_NEWSIDE, MD_MSGF_FAIL_ON_SUSPEND | - MD_MSGF_PANIC_WHEN_INCONSISTENT, (char *)&db_ns, + MD_MSGF_PANIC_WHEN_INCONSISTENT, 0, (char *)&db_ns, sizeof (md_mn_msg_meta_db_newside_t), &resultp, ep); if (send_rval != 0) { @@ -1048,7 +1047,7 @@ meta_db_delsidenm( */ send_rval = mdmn_send_message(sp->setno, MD_MN_MSG_META_DB_DELSIDE, MD_MSGF_FAIL_ON_SUSPEND | - MD_MSGF_PANIC_WHEN_INCONSISTENT, (char *)&db_ds, + MD_MSGF_PANIC_WHEN_INCONSISTENT, 0, (char *)&db_ds, sizeof (md_mn_msg_meta_db_delside_t), &resultp, ep); if (send_rval != 0) { if (resultp == NULL) @@ -1542,7 +1541,7 @@ meta_db_attach( flags |= MD_MSGF_NO_LOG; send_rval = mdmn_send_message(sp->setno, MD_MN_MSG_META_DB_ATTACH, - flags, (char *)&attach, + flags, 0, (char *)&attach, sizeof (md_mn_msg_meta_db_attach_t), &resultp, ep); if (send_rval != 0) { @@ -2007,7 +2006,7 @@ meta_db_detach( flags |= MD_MSGF_NO_LOG; send_rval = mdmn_send_message(sp->setno, MD_MN_MSG_META_DB_DETACH, - flags, (char *)&detach, + flags, 0, (char *)&detach, sizeof (md_mn_msg_meta_db_detach_t), &resultp, ep); if (send_rval != 0) { diff --git a/usr/src/lib/lvm/libmeta/common/meta_mn_changelog.c b/usr/src/lib/lvm/libmeta/common/meta_mn_changelog.c index 05a5ea9df5..388a5d9de7 100644 --- a/usr/src/lib/lvm/libmeta/common/meta_mn_changelog.c +++ b/usr/src/lib/lvm/libmeta/common/meta_mn_changelog.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -19,16 +18,14 @@ * * CDDL HEADER END */ + /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" - #include <stdlib.h> #include <unistd.h> - #include <wait.h> #include <sys/time.h> #include <meta.h> @@ -131,7 +128,7 @@ copy_changelog(mdmn_changelog_record_t *incp, odp->lr_class = incp->lr_class; odp->lr_msglen = incp->lr_msglen; if (incp->lr_msglen) - copy_msg_1(&incp->lr_msg, &odp->lr_od_msg, direction); + copy_msg_2(&incp->lr_msg, &odp->lr_od_msg, direction); } else { incp->lr_revision = odp->lr_revision; incp->lr_flags = odp->lr_flags; @@ -139,7 +136,7 @@ copy_changelog(mdmn_changelog_record_t *incp, incp->lr_class = odp->lr_class; incp->lr_msglen = odp->lr_msglen; if (odp->lr_msglen) - copy_msg_1(&incp->lr_msg, &odp->lr_od_msg, direction); + copy_msg_2(&incp->lr_msg, &odp->lr_od_msg, direction); } } @@ -196,7 +193,7 @@ mdmn_allocate_changelog(mdsetname_t *sp, md_error_t *ep) (void) mdstealerror(ep, &req.ur_mde); #ifdef DEBUG syslog(LOG_DEBUG, "allocate_log: %s\n", - mde_sperror(ep, "")); + mde_sperror(ep, "")); #endif Free(mdmn_changelog[setno]); return (-1); @@ -389,13 +386,14 @@ mdmn_unlog_msg(md_mn_msg_t *msg) assert(lr != NULL); if (!MSGID_CMP(&(msg->msg_msgid), &(lr->lr_msg.msg_msgid))) { syslog(LOG_ERR, dgettext(TEXT_DOMAIN, - "unlog_msg: msgid mismatch\n" - "\t\tstored: ID = (%d, 0x%llx-%d) setno %d class %d type %d\n" - "\t\tattempting to unlog:\n" - "\t\tID = (%d, 0x%llx-%d) setno %d class %d type %d.\n"), - MSGID_ELEMS(lr->lr_msg.msg_msgid), lr->lr_setno, - lr->lr_class, lr->lr_msgtype, MSGID_ELEMS(msg->msg_msgid), - msg->msg_setno, class, msg->msg_type); + "unlog_msg: msgid mismatch\n" + "\t\tstored: ID = (%d, 0x%llx-%d) setno %d " + "class %d type %d\n" + "\t\tattempting to unlog:\n" + "\t\tID = (%d, 0x%llx-%d) setno %d class %d type %d.\n"), + MSGID_ELEMS(lr->lr_msg.msg_msgid), lr->lr_setno, + lr->lr_class, lr->lr_msgtype, MSGID_ELEMS(msg->msg_msgid), + msg->msg_setno, class, msg->msg_type); return (-1); } lr->lr_msglen = 0; @@ -462,10 +460,10 @@ mdmn_commitlog(md_set_desc *sd, md_error_t *ep) if (!(MD_MNSET_DESC(sd)) || !sd->sd_mn_am_i_master) { if (!(MD_MNSET_DESC(sd))) { syslog(LOG_DAEMON | LOG_ERR, dgettext(TEXT_DOMAIN, - "mdmn_commitlog - Not MN Set\n")); + "mdmn_commitlog - Not MN Set\n")); } else { syslog(LOG_DAEMON | LOG_ERR, dgettext(TEXT_DOMAIN, - "mdmn_commit_log - Not Master\n")); + "mdmn_commit_log - Not Master\n")); } return (-1); } @@ -485,7 +483,7 @@ mdmn_commitlog(md_set_desc *sd, md_error_t *ep) req.ur_size = MDMN_LOGRECSIZE_OD; req.ur_data = (uintptr_t)&clodrec; if ((retval = metaioctl(MD_MN_DB_USERREQ, &req, &req.ur_mde, - NULL)) != 0) { + NULL)) != 0) { (void) mdstealerror(ep, &req.ur_mde); #ifdef DEBUG syslog(LOG_DAEMON|LOG_DEBUG, @@ -501,16 +499,16 @@ mdmn_commitlog(md_set_desc *sd, md_error_t *ep) recs[lrc] = 0; /* Commit to mddb on disk */ METAD_SETUP_LR(MD_DB_COMMIT_MANY, setno, - mdmn_changelog[setno][0].lr_selfid); + mdmn_changelog[setno][0].lr_selfid); req.ur_size = size; req.ur_data = (uintptr_t)recs; if ((retval = metaioctl(MD_MN_DB_USERREQ, &req, - &req.ur_mde, NULL)) != 0) { + &req.ur_mde, NULL)) != 0) { (void) mdstealerror(ep, &req.ur_mde); #ifdef DEBUG syslog(LOG_DAEMON|LOG_DEBUG, - "mdmn_commitlog - metaioctl COMMIT_MANY" - "Failure\n%s", mde_sperror(ep, "")); + "mdmn_commitlog - metaioctl COMMIT_MANY" + "Failure\n%s", mde_sperror(ep, "")); #endif } } @@ -609,7 +607,7 @@ mdmn_snarf_changelog(set_t set, md_error_t *ep) } lr = (mdmn_changelog_record_od_t *)get_ur_rec(set, MD_UR_GET_NEXT, - MDDB_UR_LR, &id, ep); + MDDB_UR_LR, &id, ep); if (lr == NULL) return (0); @@ -618,7 +616,7 @@ mdmn_snarf_changelog(set_t set, md_error_t *ep) if (mdmn_changelog[set] == NULL) { /* Allocate incore state for the log */ mdmn_changelog[set] = Zalloc(MDMN_LOGHDR_SIZE * - mdmn_logrecs); + mdmn_logrecs); } do { diff --git a/usr/src/lib/lvm/libmeta/common/meta_mn_comm.c b/usr/src/lib/lvm/libmeta/common/meta_mn_comm.c index e005e83348..2feae7301a 100644 --- a/usr/src/lib/lvm/libmeta/common/meta_mn_comm.c +++ b/usr/src/lib/lvm/libmeta/common/meta_mn_comm.c @@ -20,12 +20,10 @@ */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" - #include <stdlib.h> #include <unistd.h> #include <wait.h> @@ -72,181 +70,264 @@ mdmn_get_timeout(md_mn_msgtype_t msgtype) void ldump_msg(char *prefix, md_mn_msg_t *msg) { - (void) fprintf(stderr, "%s &msg = 0x%x\n", prefix, (uint_t)msg); - (void) fprintf(stderr, "%s ID = (%d, 0x%llx-%d)\n", prefix, + (void) fprintf(stderr, "%s &msg = 0x%x\n", prefix, (uint_t)msg); + (void) fprintf(stderr, "%s ID = (%d, 0x%llx-%d)\n", prefix, MSGID_ELEMS(msg->msg_msgid)); - (void) fprintf(stderr, "%s sender = %d\n", prefix, msg->msg_sender); - (void) fprintf(stderr, "%s flags = 0x%x\n", prefix, msg->msg_flags); - (void) fprintf(stderr, "%s setno = %d\n", prefix, msg->msg_setno); - (void) fprintf(stderr, "%s type = %d\n", prefix, msg->msg_type); - (void) fprintf(stderr, "%s size = %d\n", prefix, msg->msg_event_size); + (void) fprintf(stderr, "%s sender = %d\n", prefix, msg->msg_sender); + (void) fprintf(stderr, "%s flags = 0x%x\n", + prefix, msg->msg_flags); + (void) fprintf(stderr, "%s setno = %d\n", prefix, msg->msg_setno); + (void) fprintf(stderr, "%s recipient = %d\n", + prefix, msg->msg_recipient); + (void) fprintf(stderr, "%s type = %d\n", prefix, msg->msg_type); + (void) fprintf(stderr, "%s size = %d\n", + prefix, msg->msg_event_size); } +#define COMMD_PROGNAME "rpc.mdcommd" + +extern uint_t meta_rpc_err_mask(void); + +/* + * If a clnt_call gets an RPC error, force the message out here with details. + * This would be nice to send to commd_debug(), but we can't call rpc.mdcommd + * code from libmeta. + */ +static void +mdmn_handle_RPC_error(CLIENT *clnt, char *ident, md_mn_nodeid_t nid) +{ + /* + * This is sized for a max message which would look like this: + * "mdmn_wakeup_initiator: rpc.mdcommd node 4294967295" + */ + char errstr[51]; + struct rpc_err e; + + CLNT_GETERR((CLIENT *) clnt, &e); + if (meta_rpc_err_mask() & (1 << e.re_status)) { + if (nid == 0) { + (void) snprintf(errstr, sizeof (errstr), + "%s: %s node (local)", ident, COMMD_PROGNAME); + } else { + (void) snprintf(errstr, sizeof (errstr), + "%s: %s node %d", ident, COMMD_PROGNAME, nid); + } + syslog(LOG_WARNING, "mdmn_handle_RPC_error: %s", + clnt_sperror(clnt, errstr)); + } +} /* Default timeout can be changed using clnt_control() */ static struct timeval TIMEOUT = { 25, 0 }; md_mn_result_t * -mdmn_send_1(argp, clnt) +mdmn_send_2(argp, clnt, nid) md_mn_msg_t *argp; CLIENT *clnt; + md_mn_nodeid_t nid; { + enum clnt_stat res; md_mn_result_t *clnt_res = Zalloc(sizeof (md_mn_result_t)); - if (clnt_call(clnt, mdmn_send, + res = clnt_call(clnt, mdmn_send, (xdrproc_t)xdr_md_mn_msg_t, (caddr_t)argp, - (xdrproc_t)xdr_md_mn_result_t, (caddr_t)clnt_res, - TIMEOUT) != RPC_SUCCESS) { - return (NULL); + (xdrproc_t)xdr_md_mn_result_t, (caddr_t)clnt_res, TIMEOUT); + + if (res == RPC_SUCCESS) { + return (clnt_res); } - return (clnt_res); + mdmn_handle_RPC_error(clnt, "mdmn_send", nid); + Free(clnt_res); + return (NULL); } int * -mdmn_work_1(argp, clnt) +mdmn_work_2(argp, clnt, nid) md_mn_msg_t *argp; CLIENT *clnt; + md_mn_nodeid_t nid; { + enum clnt_stat res; int *clnt_res = Zalloc(sizeof (int)); - if (clnt_call(clnt, mdmn_work, + res = clnt_call(clnt, mdmn_work, (xdrproc_t)xdr_md_mn_msg_t, (caddr_t)argp, - (xdrproc_t)xdr_int, (caddr_t)clnt_res, - TIMEOUT) != RPC_SUCCESS) { - Free(clnt_res); - return (NULL); + (xdrproc_t)xdr_int, (caddr_t)clnt_res, TIMEOUT); + + if (res == RPC_SUCCESS) { + return (clnt_res); } - return (clnt_res); + mdmn_handle_RPC_error(clnt, "mdmn_work", nid); + Free(clnt_res); + return (NULL); } int * -mdmn_wakeup_initiator_1(argp, clnt) +mdmn_wakeup_initiator_2(argp, clnt, nid) md_mn_result_t *argp; CLIENT *clnt; + md_mn_nodeid_t nid; { + enum clnt_stat res; int *clnt_res = Zalloc(sizeof (int)); - if (clnt_call(clnt, mdmn_wakeup_initiator, + res = clnt_call(clnt, mdmn_wakeup_initiator, (xdrproc_t)xdr_md_mn_result_t, (caddr_t)argp, - (xdrproc_t)xdr_int, (caddr_t)clnt_res, - TIMEOUT) != RPC_SUCCESS) { - Free(clnt_res); - return (NULL); + (xdrproc_t)xdr_int, (caddr_t)clnt_res, TIMEOUT); + + if (res == RPC_SUCCESS) { + return (clnt_res); } - return (clnt_res); + mdmn_handle_RPC_error(clnt, "mdmn_wakeup_initiator", nid); + Free(clnt_res); + return (NULL); } int * -mdmn_wakeup_master_1(argp, clnt) +mdmn_wakeup_master_2(argp, clnt, nid) md_mn_result_t *argp; CLIENT *clnt; + md_mn_nodeid_t nid; { + enum clnt_stat res; int *clnt_res = Zalloc(sizeof (int)); - if (clnt_call(clnt, mdmn_wakeup_master, + res = clnt_call(clnt, mdmn_wakeup_master, (xdrproc_t)xdr_md_mn_result_t, (caddr_t)argp, - (xdrproc_t)xdr_int, (caddr_t)clnt_res, - TIMEOUT) != RPC_SUCCESS) { - Free(clnt_res); - return (NULL); + (xdrproc_t)xdr_int, (caddr_t)clnt_res, TIMEOUT); + + if (res == RPC_SUCCESS) { + return (clnt_res); } - return (clnt_res); + mdmn_handle_RPC_error(clnt, "mdmn_wakeup_master", nid); + Free(clnt_res); + return (NULL); } int * -mdmn_comm_lock_1(argp, clnt) +mdmn_comm_lock_2(argp, clnt, nid) md_mn_set_and_class_t *argp; CLIENT *clnt; + md_mn_nodeid_t nid; { + enum clnt_stat res; int *clnt_res = Zalloc(sizeof (int)); - if (clnt_call(clnt, mdmn_comm_lock, + res = clnt_call(clnt, mdmn_comm_lock, (xdrproc_t)xdr_md_mn_set_and_class_t, (caddr_t)argp, - (xdrproc_t)xdr_int, (caddr_t)clnt_res, - TIMEOUT) != RPC_SUCCESS) { - return (NULL); + (xdrproc_t)xdr_int, (caddr_t)clnt_res, TIMEOUT); + + if (res == RPC_SUCCESS) { + return (clnt_res); } - return (clnt_res); + mdmn_handle_RPC_error(clnt, "mdmn_comm_lock", nid); + Free(clnt_res); + return (NULL); } int * -mdmn_comm_unlock_1(argp, clnt) +mdmn_comm_unlock_2(argp, clnt, nid) md_mn_set_and_class_t *argp; CLIENT *clnt; + md_mn_nodeid_t nid; { + enum clnt_stat res; int *clnt_res = Zalloc(sizeof (int)); - if (clnt_call(clnt, mdmn_comm_unlock, + res = clnt_call(clnt, mdmn_comm_unlock, (xdrproc_t)xdr_md_mn_set_and_class_t, (caddr_t)argp, - (xdrproc_t)xdr_int, (caddr_t)clnt_res, - TIMEOUT) != RPC_SUCCESS) { - return (NULL); + (xdrproc_t)xdr_int, (caddr_t)clnt_res, TIMEOUT); + + if (res == RPC_SUCCESS) { + return (clnt_res); } - return (clnt_res); + mdmn_handle_RPC_error(clnt, "mdmn_comm_unlock", nid); + Free(clnt_res); + return (NULL); } int * -mdmn_comm_suspend_1(argp, clnt) +mdmn_comm_suspend_2(argp, clnt, nid) md_mn_set_and_class_t *argp; CLIENT *clnt; + md_mn_nodeid_t nid; { + enum clnt_stat res; int *clnt_res = Zalloc(sizeof (int)); - if (clnt_call(clnt, mdmn_comm_suspend, + res = clnt_call(clnt, mdmn_comm_suspend, (xdrproc_t)xdr_md_mn_set_and_class_t, (caddr_t)argp, - (xdrproc_t)xdr_int, (caddr_t)clnt_res, - TIMEOUT) != RPC_SUCCESS) { - return (NULL); + (xdrproc_t)xdr_int, (caddr_t)clnt_res, TIMEOUT); + + if (res == RPC_SUCCESS) { + return (clnt_res); } - return (clnt_res); + mdmn_handle_RPC_error(clnt, "mdmn_comm_suspend", nid); + Free(clnt_res); + return (NULL); } int * -mdmn_comm_resume_1(argp, clnt) +mdmn_comm_resume_2(argp, clnt, nid) md_mn_set_and_class_t *argp; CLIENT *clnt; + md_mn_nodeid_t nid; { + enum clnt_stat res; int *clnt_res = Zalloc(sizeof (int)); - if (clnt_call(clnt, mdmn_comm_resume, + res = clnt_call(clnt, mdmn_comm_resume, (xdrproc_t)xdr_md_mn_set_and_class_t, (caddr_t)argp, - (xdrproc_t)xdr_int, (caddr_t)clnt_res, - TIMEOUT) != RPC_SUCCESS) { - return (NULL); + (xdrproc_t)xdr_int, (caddr_t)clnt_res, TIMEOUT); + + if (res == RPC_SUCCESS) { + return (clnt_res); } - return (clnt_res); + mdmn_handle_RPC_error(clnt, "mdmn_comm_resume", nid); + Free(clnt_res); + return (NULL); } int * -mdmn_comm_reinit_set_1(argp, clnt) +mdmn_comm_reinit_set_2(argp, clnt, nid) set_t *argp; CLIENT *clnt; + md_mn_nodeid_t nid; { + enum clnt_stat res; int *clnt_res = Zalloc(sizeof (int)); - if (clnt_call(clnt, mdmn_comm_reinit_set, + res = clnt_call(clnt, mdmn_comm_reinit_set, (xdrproc_t)xdr_set_t, (caddr_t)argp, - (xdrproc_t)xdr_int, (caddr_t)clnt_res, - TIMEOUT) != RPC_SUCCESS) { - return (NULL); + (xdrproc_t)xdr_int, (caddr_t)clnt_res, TIMEOUT); + + if (res == RPC_SUCCESS) { + return (clnt_res); } - return (clnt_res); + mdmn_handle_RPC_error(clnt, "mdmn_comm_reinit_set", nid); + Free(clnt_res); + return (NULL); } int * -mdmn_comm_msglock_1(argp, clnt) +mdmn_comm_msglock_2(argp, clnt, nid) md_mn_type_and_lock_t *argp; CLIENT *clnt; + md_mn_nodeid_t nid; { + enum clnt_stat res; int *clnt_res = Zalloc(sizeof (int)); - if (clnt_call(clnt, mdmn_comm_msglock, + res = clnt_call(clnt, mdmn_comm_msglock, (xdrproc_t)xdr_md_mn_type_and_lock_t, (caddr_t)argp, - (xdrproc_t)xdr_int, (caddr_t)clnt_res, - TIMEOUT) != RPC_SUCCESS) { - return (NULL); + (xdrproc_t)xdr_int, (caddr_t)clnt_res, TIMEOUT); + + if (res == RPC_SUCCESS) { + return (clnt_res); } - return (clnt_res); + mdmn_handle_RPC_error(clnt, "mdmn_comm_msglock", nid); + Free(clnt_res); + return (NULL); } @@ -370,6 +451,7 @@ copy_msg(md_mn_msg_t *msg, md_mn_msg_t *dest) nmsg->msg_flags = msg->msg_flags; nmsg->msg_setno = msg->msg_setno; nmsg->msg_type = msg->msg_type; + nmsg->msg_recipient = msg->msg_recipient; nmsg->msg_event_size = msg->msg_event_size; if (msg->msg_event_size > 0) { bcopy(msg->msg_event_data, nmsg->msg_event_data, @@ -379,7 +461,7 @@ copy_msg(md_mn_msg_t *msg, md_mn_msg_t *dest) } void -copy_msg_1(md_mn_msg_t *msg, md_mn_msg_od_t *msgod, int direction) +copy_msg_2(md_mn_msg_t *msg, md_mn_msg_od_t *msgod, int direction) { assert((direction == MD_MN_COPY_TO_ONDISK) || (direction == MD_MN_COPY_TO_INCORE)); @@ -390,6 +472,7 @@ copy_msg_1(md_mn_msg_t *msg, md_mn_msg_od_t *msgod, int direction) msgod->msg_flags = msg->msg_flags; msgod->msg_setno = msg->msg_setno; msgod->msg_type = msg->msg_type; + msgod->msg_recipient = msg->msg_recipient; msgod->msg_od_event_size = msg->msg_event_size; /* paranoid checks */ if (msg->msg_event_size != 0 && msg->msg_event_data != NULL) @@ -401,6 +484,7 @@ copy_msg_1(md_mn_msg_t *msg, md_mn_msg_od_t *msgod, int direction) msg->msg_flags = msgod->msg_flags; msg->msg_setno = msgod->msg_setno; msg->msg_type = msgod->msg_type; + msg->msg_recipient = msgod->msg_recipient; msg->msg_event_size = msgod->msg_od_event_size; if (msg->msg_event_data == NULL) msg->msg_event_data = Zalloc(msg->msg_event_size); @@ -462,7 +546,7 @@ mdmn_get_local_clnt(uint_t flag) if (mdmn_clients == (md_mn_client_list_t *)NULL) { /* if there is no entry, create a client and return a it */ local_daemon = meta_client_create(LOCALHOST_IPv4, MDMN_COMMD, - ONE, "tcp"); + TWO, "tcp"); } else { /* * If there is an entry from a previous put operation, @@ -517,6 +601,13 @@ mdmn_put_local_clnt(CLIENT *local_daemon) * a msgid is already attached to it. * In that case mdmn_send_message_with_msgid() has to be called directly. * + * The recipient argument is almost always unused, and is therefore typically + * set to zero, as zero is an invalid cluster nodeid. The exceptions are the + * marking and clearing of the DRL from a node that is not currently the + * owner. In these cases, the recipient argument will be the nodeid of the + * mirror owner, and MD_MSGF_DIRECTED will be set in the flags. Non-owner + * nodes will not receive these messages. + * * Return values / CAVEAT EMPTOR: see mdmn_send_message_with_msgid() */ @@ -525,13 +616,14 @@ mdmn_send_message( set_t setno, md_mn_msgtype_t type, uint_t flags, + md_mn_nodeid_t recipient, char *data, int size, md_mn_result_t **result, md_error_t *ep) { - return (mdmn_send_message_with_msgid( - setno, type, flags, data, size, result, MD_NULL_MSGID, ep)); + return (mdmn_send_message_with_msgid(setno, type, flags, + recipient, data, size, result, MD_NULL_MSGID, ep)); } /* * mdmn_send_message_with_msgid() @@ -561,6 +653,7 @@ mdmn_send_message_with_msgid( set_t setno, md_mn_msgtype_t type, uint_t flags, + md_mn_nodeid_t recipient, char *data, int size, md_mn_result_t **result, @@ -619,6 +712,7 @@ mdmn_send_message_with_msgid( */ msg.msg_flags = flags; msg.msg_setno = setno; + msg.msg_recipient = recipient; msg.msg_type = type; msg.msg_event_size = size; msg.msg_event_data = data; @@ -655,7 +749,7 @@ mdmn_send_message_with_msgid( * - retries1 or retries2 exceeded */ for (; ; ) { - *result = mdmn_send_1(&msg, local_daemon); + *result = mdmn_send_2(&msg, local_daemon, 0); resp = *result; if (resp != (md_mn_result_t *)NULL) { /* Bingo! */ @@ -800,8 +894,8 @@ mdmn_suspend(set_t setno, md_mn_msgclass_t class, long timeout) if ((setno >= MD_MAXSETS) || (class >= MD_MN_NCLASSES)) { return (MDE_DS_COMMDCTL_SUSPEND_FAIL); } - local_daemon = meta_client_create(LOCALHOST_IPv4, MDMN_COMMD, ONE, - "tcp"); + local_daemon = meta_client_create(LOCALHOST_IPv4, MDMN_COMMD, TWO, + "tcp"); if (local_daemon == (CLIENT *)NULL) { clnt_pcreateerror("local_daemon"); return (MDE_DS_COMMDCTL_SUSPEND_FAIL); @@ -818,7 +912,7 @@ mdmn_suspend(set_t setno, md_mn_msgclass_t class, long timeout) msc.msc_class = class; msc.msc_flags = 0; - resp = mdmn_comm_suspend_1(&msc, local_daemon); + resp = mdmn_comm_suspend_2(&msc, local_daemon, 0); clnt_destroy(local_daemon); if (resp == NULL) { @@ -861,8 +955,8 @@ mdmn_resume(set_t setno, md_mn_msgclass_t class, uint_t flags, long timeout) if ((setno >= MD_MAXSETS) || (class >= MD_MN_NCLASSES)) { return (MDE_DS_COMMDCTL_RESUME_FAIL); } - local_daemon = meta_client_create(LOCALHOST_IPv4, MDMN_COMMD, ONE, - "tcp"); + local_daemon = meta_client_create(LOCALHOST_IPv4, MDMN_COMMD, TWO, + "tcp"); if (local_daemon == (CLIENT *)NULL) { clnt_pcreateerror("local_daemon"); return (MDE_DS_COMMDCTL_RESUME_FAIL); @@ -879,7 +973,7 @@ mdmn_resume(set_t setno, md_mn_msgclass_t class, uint_t flags, long timeout) msc.msc_class = class; msc.msc_flags = flags; - resp = mdmn_comm_resume_1(&msc, local_daemon); + resp = mdmn_comm_resume_2(&msc, local_daemon, 0); if (resp != NULL) { if (*resp == MDMNE_ACK) { @@ -905,10 +999,8 @@ mdmn_abort(void) md_error_t mdne = mdnullerror; (void) mdmn_send_message(0, /* No set is needed for this message */ - MD_MN_MSG_ABORT, - MD_MSGF_LOCAL_ONLY, - dummy, sizeof (dummy), - &resultp, &mdne); + MD_MN_MSG_ABORT, MD_MSGF_LOCAL_ONLY, 0, + dummy, sizeof (dummy), &resultp, &mdne); if (resultp != NULL) { Free(resultp); @@ -935,8 +1027,8 @@ mdmn_reinit_set(set_t setno, long timeout) if ((setno == 0) || (setno >= MD_MAXSETS)) { return (1); } - local_daemon = meta_client_create(LOCALHOST_IPv4, MDMN_COMMD, ONE, - "tcp"); + local_daemon = meta_client_create(LOCALHOST_IPv4, MDMN_COMMD, TWO, + "tcp"); if (local_daemon == (CLIENT *)NULL) { clnt_pcreateerror("local_daemon"); return (1); @@ -949,7 +1041,7 @@ mdmn_reinit_set(set_t setno, long timeout) } } - resp = mdmn_comm_reinit_set_1(&setno, local_daemon); + resp = mdmn_comm_reinit_set_2(&setno, local_daemon, 0); if (resp != NULL) { if (*resp == MDMNE_ACK) { @@ -984,8 +1076,8 @@ mdmn_msgtype_lock(md_mn_msgtype_t msgtype, uint_t locktype) if ((msgtype == 0) || (msgtype >= MD_MN_NMESSAGES)) { return (1); } - local_daemon = meta_client_create(LOCALHOST_IPv4, MDMN_COMMD, ONE, - "tcp"); + local_daemon = meta_client_create(LOCALHOST_IPv4, MDMN_COMMD, TWO, + "tcp"); if (local_daemon == (CLIENT *)NULL) { clnt_pcreateerror("local_daemon"); return (1); @@ -993,7 +1085,7 @@ mdmn_msgtype_lock(md_mn_msgtype_t msgtype, uint_t locktype) mmtl.mmtl_type = msgtype; mmtl.mmtl_lock = locktype; - resp = mdmn_comm_msglock_1(&mmtl, local_daemon); + resp = mdmn_comm_msglock_2(&mmtl, local_daemon, 0); if (resp != NULL) { if (*resp == MDMNE_ACK) { diff --git a/usr/src/lib/lvm/libmeta/common/meta_mn_handlers.c b/usr/src/lib/lvm/libmeta/common/meta_mn_handlers.c index ce70615f4e..041bb9b76d 100644 --- a/usr/src/lib/lvm/libmeta/common/meta_mn_handlers.c +++ b/usr/src/lib/lvm/libmeta/common/meta_mn_handlers.c @@ -18,13 +18,12 @@ * * CDDL HEADER END */ + /* * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" - #include <stdlib.h> #include <unistd.h> #include <wait.h> @@ -448,7 +447,7 @@ mdmn_do_choose_owner(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp) myflags |= msg->msg_flags & MD_MSGF_INHERIT_BITS; ret = mdmn_send_message(MD_MIN2SET(d->msg_chooseid_mnum), - MD_MN_MSG_CHANGE_OWNER, myflags, (char *)&chownermsg, + MD_MN_MSG_CHANGE_OWNER, myflags, 0, (char *)&chownermsg, sizeof (chownermsg), &resp1, &mde); if (resp1 != NULL) free_result(resp1); @@ -2120,3 +2119,67 @@ mdmn_do_addmdname(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp) resp->mmr_exitval = 0; } + +/* + * This is used to issue a MD_MN_RR_DIRTY ioctl to the mirror. + */ +/*ARGSUSED*/ +void +mdmn_do_mark_dirty(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp) +{ + md_mn_msg_rr_dirty_t *d; + md_mn_rr_dirty_params_t rp; + int ret; + + resp->mmr_out_size = 0; + resp->mmr_err_size = 0; + resp->mmr_out = NULL; + resp->mmr_err = NULL; + resp->mmr_comm_state = MDMNE_ACK; + d = (md_mn_msg_rr_dirty_t *)((void *)(msg->msg_event_data)); + + (void) memset(&rp, 0, sizeof (rp)); + MD_SETDRIVERNAME(&rp, MD_MIRROR, MD_MIN2SET(d->rr_mnum)) + rp.rr_mnum = d->rr_mnum; + rp.rr_nodeid = d->rr_nodeid; + rp.rr_start = (ushort_t)((d->rr_range >> 16) & 0xffff); + rp.rr_end = (ushort_t)(d->rr_range & 0xffff); + + ret = metaioctl(MD_MN_RR_DIRTY, &rp, &rp.mde, NULL); + + resp->mmr_exitval = ret; +} + +/* + * This is used to issue a MD_MN_RR_CLEAN ioctl to the mirror. + */ +/*ARGSUSED*/ +void +mdmn_do_mark_clean(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp) +{ + md_mn_msg_rr_clean_t *d; + md_mn_rr_clean_params_t *rcp; + int ret; + + resp->mmr_out_size = 0; + resp->mmr_err_size = 0; + resp->mmr_out = NULL; + resp->mmr_err = NULL; + resp->mmr_comm_state = MDMNE_ACK; + d = (md_mn_msg_rr_clean_t *)((void *)(msg->msg_event_data)); + + rcp = Zalloc(sizeof (struct md_mn_rr_clean_params) + + MDMN_MSG_RR_CLEAN_DATA_BYTES(d)); + MD_SETDRIVERNAME(rcp, MD_MIRROR, MD_MIN2SET(d->rr_mnum)) + rcp->rr_mnum = d->rr_mnum; + rcp->rr_nodeid = d->rr_nodeid; + rcp->rr_start_size = d->rr_start_size; + (void) memcpy(MDMN_RR_CLEAN_PARAMS_DATA(rcp), MDMN_MSG_RR_CLEAN_DATA(d), + MDMN_MSG_RR_CLEAN_DATA_BYTES(d)); + + ret = metaioctl(MD_MN_RR_CLEAN, rcp, &rcp->mde, NULL); + + Free(rcp); + + resp->mmr_exitval = ret; +} diff --git a/usr/src/lib/lvm/libmeta/common/meta_mn_msg_table.c b/usr/src/lib/lvm/libmeta/common/meta_mn_msg_table.c index b24f278617..2fdd7a8713 100644 --- a/usr/src/lib/lvm/libmeta/common/meta_mn_msg_table.c +++ b/usr/src/lib/lvm/libmeta/common/meta_mn_msg_table.c @@ -19,12 +19,10 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" - #include <meta.h> extern void mdmn_do_cmd(HANDLER_PARMS); @@ -56,6 +54,8 @@ extern void mdmn_do_delkeyname(HANDLER_PARMS); extern void mdmn_do_get_tstate(HANDLER_PARMS); extern void mdmn_do_get_mirstate(HANDLER_PARMS); extern void mdmn_do_addmdname(HANDLER_PARMS); +extern void mdmn_do_mark_dirty(HANDLER_PARMS); +extern void mdmn_do_mark_clean(HANDLER_PARMS); extern int mdmn_smgen_test6(SMGEN_PARMS); extern int mdmn_smgen_state_upd(SMGEN_PARMS); @@ -693,10 +693,36 @@ md_mn_msg_tbl_entry_t msg_table[MD_MN_NMESSAGES] = { * Add metadevice name into replica */ MD_MSG_CLASS1, /* message class */ - mdmn_do_addmdname, /* add ,etadevice name */ + mdmn_do_addmdname, /* add metadevice name */ NULL, /* submessage generator */ 90, /* times out in 90 secs */ 10000, 2, /* class busy retry / time delta */ 10, 1000 /* comm fail retry / time delta */ }, + + { + /* + * MD_MN_MSG_RR_DIRTY + * Mark given range of un_dirty_bm as dirty + */ + MD_MSG_CLASS2, /* message class */ + mdmn_do_mark_dirty, /* message handler */ + NULL, /* submessage generator */ + 8, /* timeout in seconds */ + UINT_MAX, 10, /* class busy retry / time delta */ + UINT_MAX, 100 /* comm fail retry / time delta */ + }, + + { + /* + * MD_MN_MSG_RR_CLEAN + * Mark given range of un_dirty_bm as clean + */ + MD_MSG_CLASS2, /* message class */ + mdmn_do_mark_clean, /* message handler */ + NULL, /* submessage generator */ + 8, /* timeout in seconds */ + UINT_MAX, 10, /* class busy retry / time delta */ + UINT_MAX, 100 /* comm fail retry / time delta */ + }, }; diff --git a/usr/src/lib/lvm/libmeta/common/meta_mn_subr.c b/usr/src/lib/lvm/libmeta/common/meta_mn_subr.c index aa7127453f..c1cbd68bf4 100644 --- a/usr/src/lib/lvm/libmeta/common/meta_mn_subr.c +++ b/usr/src/lib/lvm/libmeta/common/meta_mn_subr.c @@ -19,12 +19,10 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" - /* * Just in case we're not in a build environment, make sure that * TEXT_DOMAIN gets set to something. @@ -62,7 +60,7 @@ meta_is_mn_set( /* Local set cannot be MultiNode */ if ((sp == NULL) || (sp->setname == NULL) || - (strcmp(sp->setname, MD_LOCAL_NAME) == 0)) + (strcmp(sp->setname, MD_LOCAL_NAME) == 0)) return (0); sd = metaget_setdesc(sp, ep); ASSERT(sd != NULL); @@ -128,7 +126,7 @@ meta_ping_mnset(set_t setno) md_mn_result_t *resp = NULL; (void) mdmn_send_message(setno, MD_MN_MSG_TEST2, - MD_MSGF_NO_LOG | MD_MSGF_FAIL_ON_SUSPEND, data, + MD_MSGF_NO_LOG | MD_MSGF_FAIL_ON_SUSPEND, 0, data, sizeof (data), &resp, &mde); if (resp != (md_mn_result_t *)NULL) { @@ -234,9 +232,8 @@ meta_mn_send_command( } else { send_message_type = MD_MN_MSG_BC_CMD; } - err = mdmn_send_message( - sp->setno, send_message_type, send_message_flags, - cmd, 1024, &resp, ep); + err = mdmn_send_message(sp->setno, send_message_type, + send_message_flags, 0, cmd, 1024, &resp, ep); free(cmd); @@ -285,9 +282,9 @@ meta_mn_send_command( "Command not attempted: Unable to log message " "in set %s\n"), sp->setname); if (c.c_flags & MDDB_C_STALE) { - (void) mdmddberror(ep, MDE_DB_STALE, - (minor_t)NODEV64, sp->setno, 0, NULL); - mde_perror(ep, ""); + (void) mdmddberror(ep, MDE_DB_STALE, + (minor_t)NODEV64, sp->setno, 0, NULL); + mde_perror(ep, ""); } } else { (void) fprintf(stderr, dgettext(TEXT_DOMAIN, @@ -333,7 +330,7 @@ meta_mn_send_suspend_writes( */ result = mdmn_send_message(MD_MIN2SET(mnum), MD_MN_MSG_SUSPEND_WRITES, - MD_MSGF_NO_LOG | MD_MSGF_OVERRIDE_SUSPEND, + MD_MSGF_NO_LOG | MD_MSGF_OVERRIDE_SUSPEND, 0, (char *)&suspwrmsg, sizeof (suspwrmsg), &resp, ep); if (resp != NULL) { free_result(resp); @@ -608,7 +605,7 @@ meta_mn_send_setsync( * time required. */ ret = mdmn_send_message(sp->setno, MD_MN_MSG_SETSYNC, - MD_MSGF_NO_LOG | MD_MSGF_OVERRIDE_SUSPEND, + MD_MSGF_NO_LOG | MD_MSGF_OVERRIDE_SUSPEND, 0, (char *)&setsyncmsg, sizeof (setsyncmsg), &resp, ep); if (resp != NULL) { free_result(resp); @@ -720,7 +717,7 @@ meta_mn_send_resync_starting( resyncmsg.msg_resync_mnum = mnum; result = mdmn_send_message(MD_MIN2SET(mnum), MD_MN_MSG_RESYNC_STARTING, - MD_MSGF_NO_LOG | MD_MSGF_OVERRIDE_SUSPEND, + MD_MSGF_NO_LOG | MD_MSGF_OVERRIDE_SUSPEND, 0, (char *)&resyncmsg, sizeof (resyncmsg), &resp, ep); if (resp != NULL) { @@ -905,7 +902,7 @@ meta_mn_send_get_tstate( tstatemsg.gettstate_dev = dev; result = mdmn_send_message(MD_MIN2SET(mnum), MD_MN_MSG_GET_TSTATE, - MD_MSGF_NO_LOG | MD_MSGF_NO_BCAST, + MD_MSGF_NO_LOG | MD_MSGF_NO_BCAST, 0, (char *)&tstatemsg, sizeof (tstatemsg), &resp, ep); if (result == 0) diff --git a/usr/src/lib/lvm/libmeta/common/meta_nameinfo.c b/usr/src/lib/lvm/libmeta/common/meta_nameinfo.c index 986f4e2705..fb128747b3 100644 --- a/usr/src/lib/lvm/libmeta/common/meta_nameinfo.c +++ b/usr/src/lib/lvm/libmeta/common/meta_nameinfo.c @@ -1244,11 +1244,11 @@ meta_isopen( * and the message doesn't need being logged either. * Hence NO_LOG and NO_MCT */ - err = mdmn_send_message( - sp->setno, MD_MN_MSG_CLU_CHECK, - MD_MSGF_NO_MCT | MD_MSGF_STOP_ON_ERROR | - MD_MSGF_NO_LOG | MD_MSGF_OVERRIDE_SUSPEND, - (char *)&d, sizeof (md_isopen_t), &resp, ep); + err = mdmn_send_message(sp->setno, + MD_MN_MSG_CLU_CHECK, MD_MSGF_NO_MCT | + MD_MSGF_STOP_ON_ERROR | MD_MSGF_NO_LOG | + MD_MSGF_OVERRIDE_SUSPEND, 0, (char *)&d, + sizeof (md_isopen_t), &resp, ep); if (err == 0) { d.isopen = resp->mmr_exitval; } else { diff --git a/usr/src/lib/lvm/libmeta/common/meta_runtime.c b/usr/src/lib/lvm/libmeta/common/meta_runtime.c index f9c5915088..f401219f44 100644 --- a/usr/src/lib/lvm/libmeta/common/meta_runtime.c +++ b/usr/src/lib/lvm/libmeta/common/meta_runtime.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -19,13 +18,12 @@ * * CDDL HEADER END */ + /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" - /* * Just in case we're not in a build environment, make sure that * TEXT_DOMAIN gets set to something. @@ -171,9 +169,8 @@ do_owner_ioctls(void) ownerioctls_onp) != 0) { (void) fprintf(stderr, dgettext(TEXT_DOMAIN, "%s: illegal value for %s: %s.\n"), - function_namep, - ownerioctls_namep, - param_valuep); + function_namep, ownerioctls_namep, + param_valuep); syslog(LOG_ERR, dgettext(TEXT_DOMAIN, "%s: illegal value for %s: %s.\n"), function_namep, @@ -216,6 +213,32 @@ commd_get_outfile(void) } /* + * This controls what type of RPC errors are sent to syslog(). + * It is used as a bitmask against the clnt_stat list, which defines + * 0 as RPC_SUCCESS, so likely shouldn't be set. + * + * The #define below provides a default of all errors in the list. + * The default can then be modified to reduce the amount of traffic + * going to syslog in the event of RPC errors. + */ + +#define DEFAULT_ERRMASK (UINT_MAX & ~(1 << RPC_SUCCESS)) + +uint_t +meta_rpc_err_mask(void) +{ + char *param_valuep; + uint_t retval = DEFAULT_ERRMASK; + + param_valuep = meta_get_rt_param("commd_RPC_errors", B_FALSE); + if (param_valuep != NULL) { + retval = (uint_t)strtol(param_valuep, NULL, 16); + free(param_valuep); + } + return (retval); +} + +/* * The following lines define private functions */ @@ -232,27 +255,23 @@ meta_get_rt_param(const char *param_namep, boolean_t warn_if_not_found) line_bufferp = (char *)malloc(line_buffer_size); if (line_bufferp == NULL) { - (void) fprintf(stderr, - dgettext(TEXT_DOMAIN, "%s: malloc failed\n"), - function_namep); - syslog(LOG_ERR, - dgettext(TEXT_DOMAIN, "%s: malloc failed\n"), - function_namep); + (void) fprintf(stderr, dgettext(TEXT_DOMAIN, + "%s: malloc failed\n"), function_namep); + syslog(LOG_ERR, dgettext(TEXT_DOMAIN, "%s: malloc failed\n"), + function_namep); return (param_valuep); } param_filep = fopen(param_file_namep, "r"); if (param_filep == NULL) { - (void) fprintf(stderr, - dgettext(TEXT_DOMAIN, "%s: can't open %s\n"), - function_namep, param_file_namep); - syslog(LOG_ERR, - dgettext(TEXT_DOMAIN, "%s: can't open %s\n"), - function_namep, param_file_namep); + (void) fprintf(stderr, dgettext(TEXT_DOMAIN, + "%s: can't open %s\n"), function_namep, param_file_namep); + syslog(LOG_ERR, dgettext(TEXT_DOMAIN, "%s: can't open %s\n"), + function_namep, param_file_namep); free(line_bufferp); return (param_valuep); } while ((fgets(line_bufferp, line_buffer_size, param_filep) != NULL) && - (param_valuep == NULL)) { + (param_valuep == NULL)) { newlinep = strchr(line_bufferp, '\n'); if (newlinep != NULL) { @@ -261,10 +280,10 @@ meta_get_rt_param(const char *param_namep, boolean_t warn_if_not_found) } param_name_tokenp = strtok(line_bufferp, token_separator_listp); if ((param_name_tokenp != NULL) && - (strcmp(param_namep, param_name_tokenp) == 0)) { + (strcmp(param_namep, param_name_tokenp) == 0)) { param_value_tokenp = strtok(NULL, - token_separator_listp); + token_separator_listp); } if (param_value_tokenp != NULL) { param_valuep = strdup(param_value_tokenp); @@ -282,18 +301,12 @@ meta_get_rt_param(const char *param_namep, boolean_t warn_if_not_found) } } if ((param_valuep == NULL) && (warn_if_not_found == B_TRUE)) { - (void) fprintf(stderr, - dgettext(TEXT_DOMAIN, - "%s: value of %s not set or error in %s\n"), - function_namep, - param_namep, - param_file_namep); - syslog(LOG_ERR, - dgettext(TEXT_DOMAIN, - "%s: value of %s not set or error in %s\n"), - function_namep, - param_namep, - param_file_namep); + (void) fprintf(stderr, dgettext(TEXT_DOMAIN, + "%s: value of %s not set or error in %s\n"), + function_namep, param_namep, param_file_namep); + syslog(LOG_ERR, dgettext(TEXT_DOMAIN, + "%s: value of %s not set or error in %s\n"), + function_namep, param_namep, param_file_namep); } free(line_bufferp); (void) fclose(param_filep); diff --git a/usr/src/lib/lvm/libmeta/common/meta_set.c b/usr/src/lib/lvm/libmeta/common/meta_set.c index 58c7f85735..59d592ce40 100644 --- a/usr/src/lib/lvm/libmeta/common/meta_set.c +++ b/usr/src/lib/lvm/libmeta/common/meta_set.c @@ -19,12 +19,10 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" - /* * Just in case we're not in a build environment, make sure that * TEXT_DOMAIN gets set to something. @@ -1877,7 +1875,6 @@ metadrivename_withdrkey( return (NULL); } - /* * Get the devid associated with the key. * @@ -1893,6 +1890,11 @@ metadrivename_withdrkey( */ dnp = meta_getdnp_bydevid(sp, sideno, devidp, key, ep); free(devidp); + + /* dnp could be NULL if the devid could not be decoded. */ + if (dnp == NULL) { + return (NULL); + } dnp->side_names_key = key; } else { /* @@ -1981,6 +1983,9 @@ metadrivename_withdrkey( */ dnp = meta_getdnp_bydevid(sp, sideno, devidp, key, ep); free(devidp); + if (dnp == NULL) { + return (NULL); + } dnp->side_names_key = key; } } @@ -5733,6 +5738,7 @@ meta_mnsync_diskset_mddbs( lr->lr_msg.msg_type, lr->lr_msg.msg_flags | MD_MSGF_REPLAY_MSG | MD_MSGF_OVERRIDE_SUSPEND, + lr->lr_msg.msg_recipient, lr->lr_msg.msg_event_data, lr->lr_msg.msg_event_size, &resultp, diff --git a/usr/src/lib/lvm/libmeta/common/meta_set_hst.c b/usr/src/lib/lvm/libmeta/common/meta_set_hst.c index fcfd8faaa3..364b463c84 100644 --- a/usr/src/lib/lvm/libmeta/common/meta_set_hst.c +++ b/usr/src/lib/lvm/libmeta/common/meta_set_hst.c @@ -18,13 +18,12 @@ * * CDDL HEADER END */ + /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" - /* * Just in case we're not in a build environment, make sure that * TEXT_DOMAIN gets set to something. @@ -148,7 +147,7 @@ add_md_sidenms(mdsetname_t *sp, side_t sideno, side_t otherside, md_error_t *ep) send_rval = mdmn_send_message(sp->setno, MD_MN_MSG_META_MD_ADDSIDE, MD_MSGF_FAIL_ON_SUSPEND | MD_MSGF_PANIC_WHEN_INCONSISTENT, - (char *)&md_as, sizeof (md_mn_msg_meta_md_addside_t), + 0, (char *)&md_as, sizeof (md_mn_msg_meta_md_addside_t), &resultp, ep); if (send_rval != 0) { (void) mdstealerror(ep, &(resultp->mmr_ep)); @@ -178,7 +177,7 @@ add_md_sidenms(mdsetname_t *sp, side_t sideno, side_t otherside, md_error_t *ep) * Let's see if it is hsp or not */ nm.devname = (uintptr_t)meta_getnmentbykey(sp->setno, - otherside, nm.key, &drvnm, NULL, NULL, ep); + otherside, nm.key, &drvnm, NULL, NULL, ep); if (nm.devname == NULL || drvnm == NULL) { if (nm.devname) Free((void *)(uintptr_t)nm.devname); @@ -229,9 +228,9 @@ add_md_sidenms(mdsetname_t *sp, side_t sideno, side_t otherside, md_error_t *ep) * increment the count to sync up with the other sides. */ for (i = 0; i < nm.ref_count; i++) { - if (add_name(sp, sideno, nm.key, dname, mnum, - cname, NULL, NULL, ep) == -1) - rval = -1; + if (add_name(sp, sideno, nm.key, dname, mnum, + cname, NULL, NULL, ep) == -1) + rval = -1; } Free(cname); @@ -323,17 +322,17 @@ create_multinode_set_on_hosts( (void) strcpy(nd->nd_nodename, node_v[i]); nd->nd_ctime = now; nd->nd_flags = (MD_MN_NODE_ALIVE | - MD_MN_NODE_ADD); + MD_MN_NODE_ADD); nl2 = nl; while (nl2) { - if (strcmp(nl2->msl_node_name, - node_v[i]) == 0) { - nd->nd_nodeid = nl2->msl_node_id; - (void) strcpy(nd->nd_priv_ic, - nl2->msl_node_addr); - break; - } - nl2 = nl2->next; + if (strcmp(nl2->msl_node_name, + node_v[i]) == 0) { + nd->nd_nodeid = nl2->msl_node_id; + (void) strcpy(nd->nd_priv_ic, + nl2->msl_node_addr); + break; + } + nl2 = nl2->next; } /* @@ -1123,7 +1122,7 @@ del_md_sidenms(mdsetname_t *sp, side_t sideno, md_error_t *ep) send_rval = mdmn_send_message(sp->setno, MD_MN_MSG_META_MD_DELSIDE, MD_MSGF_FAIL_ON_SUSPEND | MD_MSGF_PANIC_WHEN_INCONSISTENT, - (char *)&md_ds, sizeof (md_mn_msg_meta_md_delside_t), + 0, (char *)&md_ds, sizeof (md_mn_msg_meta_md_delside_t), &resultp, ep); if (send_rval != 0) { (void) mdstealerror(ep, &(resultp->mmr_ep)); @@ -1156,8 +1155,8 @@ del_md_sidenms(mdsetname_t *sp, side_t sideno, md_error_t *ep) * actually removed. */ for (i = 0; i < nm.ref_count; i++) { - if (del_name(sp, sideno, nm.key, ep) == -1) - return (-1); + if (del_name(sp, sideno, nm.key, ep) == -1) + return (-1); } } } @@ -1183,7 +1182,7 @@ recreate_set( continue; } has_set = nodehasset(sp, nd->nd_nodename, - NHS_NST_EQ, &xep); + NHS_NST_EQ, &xep); if (has_set >= 0) { nd = nd->nd_next; @@ -1207,7 +1206,7 @@ recreate_set( continue; has_set = nodehasset(sp, sd->sd_nodes[i], - NHS_NST_EQ, &xep); + NHS_NST_EQ, &xep); if (has_set >= 0) continue; @@ -1967,7 +1966,8 @@ make_sideno_sidenm( return (-1); /* find the end of the link list */ - for (sn = dnp->side_names; sn->next != NULL; sn = sn->next); + for (sn = dnp->side_names; sn->next != NULL; sn = sn->next) + ; sn_next = &sn->next; if (meta_replicaslice(dnp, &rep_slice, ep) != 0) @@ -1986,13 +1986,13 @@ make_sideno_sidenm( * used instead of meta_getnextside_devinfo. */ if (meta_getside_devinfo(sp, np->bname, sideno, &sn->cname, - &sn->dname, &sn->mnum, ep) == -1) + &sn->dname, &sn->mnum, ep) == -1) err = -1; } else { /* decrement sideno, to look like the previous sideno */ sideno--; - if (meta_getnextside_devinfo(sp, np->bname, &sideno, &sn->cname, - &sn->dname, &sn->mnum, ep) == -1) + if (meta_getnextside_devinfo(sp, np->bname, &sideno, + &sn->cname, &sn->dname, &sn->mnum, ep) == -1) err = -1; } @@ -2377,14 +2377,14 @@ meta_multinode_set_addhosts( nd->nd_ctime = now; nl2 = nl; while (nl2) { - if (strcmp(nl2->msl_node_name, - node_v[nodeindex]) == 0) { - nd->nd_nodeid = nl2->msl_node_id; - (void) strcpy(nd->nd_priv_ic, - nl2->msl_node_addr); - break; - } - nl2 = nl2->next; + if (strcmp(nl2->msl_node_name, + node_v[nodeindex]) == 0) { + nd->nd_nodeid = nl2->msl_node_id; + (void) strcpy(nd->nd_priv_ic, + nl2->msl_node_addr); + break; + } + nl2 = nl2->next; } /* @@ -2773,16 +2773,16 @@ out: * rpc.mdcommd is running on the nodes with a set. */ if (remote_sets_created == 1) { - for (i = 0; i < node_c; i++) { - if (clnt_mdcommdctl(node_v[i], COMMDCTL_REINIT, - sp, NULL, MD_MSCF_NO_FLAGS, &xep)) { - if (rval == 0) - (void) mdstealerror(ep, &xep); - rval = -1; - mde_perror(ep, dgettext(TEXT_DOMAIN, - "Unable to reinit rpc.mdcommd.\n")); + for (i = 0; i < node_c; i++) { + if (clnt_mdcommdctl(node_v[i], COMMDCTL_REINIT, + sp, NULL, MD_MSCF_NO_FLAGS, &xep)) { + if (rval == 0) + (void) mdstealerror(ep, &xep); + rval = -1; + mde_perror(ep, dgettext(TEXT_DOMAIN, + "Unable to reinit rpc.mdcommd.\n")); + } } - } } } if ((suspend1_flag) || (suspendall_flag)) { @@ -2819,17 +2819,18 @@ out: * rpc.mdcommd is be running on the nodes with a set. */ if (remote_sets_created == 1) { - for (i = 0; i < node_c; i++) { - /* Already verified to be alive */ - if (clnt_mdcommdctl(node_v[i], COMMDCTL_RESUME, - sp, MD_MSG_CLASS0, MD_MSCF_NO_FLAGS, &xep)) { - if (rval == 0) - (void) mdstealerror(ep, &xep); - rval = -1; - mde_perror(ep, dgettext(TEXT_DOMAIN, - "Unable to resume rpc.mdcommd.\n")); + for (i = 0; i < node_c; i++) { + /* Already verified to be alive */ + if (clnt_mdcommdctl(node_v[i], COMMDCTL_RESUME, + sp, MD_MSG_CLASS0, MD_MSCF_NO_FLAGS, + &xep)) { + if (rval == 0) + (void) mdstealerror(ep, &xep); + rval = -1; + mde_perror(ep, dgettext(TEXT_DOMAIN, + "Unable to resume rpc.mdcommd.\n")); + } } - } } meta_ping_mnset(sp->setno); /* @@ -4031,7 +4032,8 @@ meta_set_deletehosts( rb_medr.med_rec_sn = sp->setno; (void) strcpy(rb_medr.med_rec_snm, sp->setname); for (i = 0; i < MD_MAXSIDES; i++) - (void) strcpy(rb_medr.med_rec_nodes[i], sd->sd_nodes[i]); + (void) strcpy(rb_medr.med_rec_nodes[i], + sd->sd_nodes[i]); rb_medr.med_rec_meds = sd->sd_med; /* structure assigment */ (void) memset(&rb_medr.med_rec_data, '\0', sizeof (med_data_t)); rb_medr.med_rec_foff = 0; @@ -4432,45 +4434,52 @@ meta_set_deletehosts( * alive nodes are updated correctly. */ if (strcmp(nd->nd_nodename, node_v[i]) == 0) { - if ((oha == TRUE) && - (!(nd->nd_flags & MD_MN_NODE_ALIVE))) { + if ((oha == TRUE) && (!(nd->nd_flags & + MD_MN_NODE_ALIVE))) { nd->nd_flags |= MD_MN_NODE_DEL; nd->nd_flags &= ~MD_MN_NODE_OK; nd = nd->nd_next; continue; - } - if (nd->nd_flags & MD_MN_NODE_OWN) { - /* - * Going to set locally cached node - * flags to rollback join so in case - * of error, the rollback code knows - * which nodes to re-join. - * rpc.metad ignores the RB_JOIN flag. - */ - nd->nd_flags |= MD_MN_NODE_RB_JOIN; - nd->nd_flags &= ~MD_MN_NODE_OWN; + } + if (nd->nd_flags & MD_MN_NODE_OWN) { + /* + * Going to set locally cached + * node flags to rollback join + * so in case of error, the + * rollback code knows which + * nodes to re-join. rpc.metad + * ignores the RB_JOIN flag. + */ + nd->nd_flags |= + MD_MN_NODE_RB_JOIN; + nd->nd_flags &= ~MD_MN_NODE_OWN; - /* - * Be careful in ordering of following - * steps so that recovery from a panic - * between the steps is viable. - * Only reset master info in rpc.metad - * - don't reset local cached info - * which will be used to set master - * info back if failure (rollback). - */ - if (clnt_withdrawset(nd->nd_nodename, - sp, ep)) - goto rollback; - - /* Reset master on deleted node */ - if (clnt_mnsetmaster(node_v[i], sp, "", - MD_MN_INVALID_NID, ep)) - goto rollback; - } - - nd->nd_flags |= MD_MN_NODE_DEL; - nd->nd_flags &= ~MD_MN_NODE_OK; + /* + * Be careful in ordering of + * following steps so that + * recovery from a panic + * between the steps is viable. + * Only reset master info in + * rpc.metad - don't reset + * local cached info which will + * be used to set master info + * back if failure (rollback). + */ + if (clnt_withdrawset( + nd->nd_nodename, sp, ep)) + goto rollback; + + /* + * Reset master on deleted node + */ + if (clnt_mnsetmaster(node_v[i], + sp, "", MD_MN_INVALID_NID, + ep)) + goto rollback; + } + + nd->nd_flags |= MD_MN_NODE_DEL; + nd->nd_flags &= ~MD_MN_NODE_OK; } nd = nd->nd_next; } @@ -4503,37 +4512,37 @@ meta_set_deletehosts( /* Send reinit */ nd = sd->sd_nodelist; while (nd) { - if ((oha == TRUE) && - (!(nd->nd_flags & MD_MN_NODE_ALIVE))) { - nd = nd->nd_next; - continue; - } - /* Class is ignored for REINIT */ - if (clnt_mdcommdctl(nd->nd_nodename, - COMMDCTL_REINIT, - sp, NULL, MD_MSCF_NO_FLAGS, ep)) { - mde_perror(ep, dgettext(TEXT_DOMAIN, - "Unable to reinit rpc.mdcommd.\n")); - goto rollback; - } - nd = nd->nd_next; + if ((oha == TRUE) && + (!(nd->nd_flags & MD_MN_NODE_ALIVE))) { + nd = nd->nd_next; + continue; + } + /* Class is ignored for REINIT */ + if (clnt_mdcommdctl(nd->nd_nodename, + COMMDCTL_REINIT, sp, NULL, + MD_MSCF_NO_FLAGS, ep)) { + mde_perror(ep, dgettext(TEXT_DOMAIN, + "Unable to reinit rpc.mdcommd.\n")); + goto rollback; + } + nd = nd->nd_next; } /* Send resume */ nd = sd->sd_nodelist; while (nd) { - if ((oha == TRUE) && - (!(nd->nd_flags & MD_MN_NODE_ALIVE))) { - nd = nd->nd_next; - continue; - } - if (clnt_mdcommdctl(nd->nd_nodename, - COMMDCTL_RESUME, sp, MD_MSG_CLASS0, - MD_MSCF_DONT_RESUME_CLASS1, ep)) { - mde_perror(ep, dgettext(TEXT_DOMAIN, - "Unable to resume rpc.mdcommd.\n")); - goto rollback; - } - nd = nd->nd_next; + if ((oha == TRUE) && + (!(nd->nd_flags & MD_MN_NODE_ALIVE))) { + nd = nd->nd_next; + continue; + } + if (clnt_mdcommdctl(nd->nd_nodename, + COMMDCTL_RESUME, sp, MD_MSG_CLASS0, + MD_MSCF_DONT_RESUME_CLASS1, ep)) { + mde_perror(ep, dgettext(TEXT_DOMAIN, + "Unable to resume rpc.mdcommd.\n")); + goto rollback; + } + nd = nd->nd_next; } meta_ping_mnset(sp->setno); } @@ -4727,50 +4736,52 @@ meta_set_deletehosts( RB_TEST(24, "deletehosts", ep) } } else { - nd = sd->sd_nodelist; - /* All nodes guaranteed to be ALIVE unless in oha mode */ - while (nd) { - /* - * If mirror owner was set to a deleted node, then - * each existing node resets mirror owner to NULL. - * - * During OHA mode, don't issue RPCs to - * non-alive nodes since there is no reason to - * wait for RPC timeouts. - */ - if ((oha == TRUE) && - (!(nd->nd_flags & MD_MN_NODE_ALIVE))) { - nd = nd->nd_next; - continue; - } + nd = sd->sd_nodelist; + /* All nodes guaranteed ALIVE unless in oha mode */ + while (nd) { + /* + * If mirror owner was set to a deleted node, + * then each existing node resets mirror owner + * to NULL. + * + * During OHA mode, don't issue RPCs to + * non-alive nodes since there is no reason to + * wait for RPC timeouts. + */ + if ((oha == TRUE) && + (!(nd->nd_flags & MD_MN_NODE_ALIVE))) { + nd = nd->nd_next; + continue; + } - /* Skip nodes being deleted */ - if (strinlst(nd->nd_nodename, node_c, node_v)) { - nd = nd->nd_next; - continue; - } + /* Skip nodes being deleted */ + if (strinlst(nd->nd_nodename, node_c, node_v)) { + nd = nd->nd_next; + continue; + } - /* - * If mirror owner is a deleted node, reset mirror - * owners to NULL. If an error occurs, print a - * warning and continue. Don't fail metaset - * because of mirror owner reset problem since next - * node to grab mirror will resolve this issue. - * Before next node grabs mirrors, metaset will show - * the deleted node as owner which is why an attempt - * to reset the mirror owner is made. - */ - if (clnt_reset_mirror_owner(nd->nd_nodename, sp, - node_c, &node_id_list[0], &xep) == -1) { - mde_perror(&xep, dgettext(TEXT_DOMAIN, - "Unable to reset mirror owner on" - " node %s\n"), nd->nd_nodename); - mdclrerror(&xep); - } + /* + * If mirror owner is a deleted node, reset + * mirror owners to NULL. If an error occurs, + * print a warning and continue. Don't fail + * metaset because of mirror owner reset + * problem since next node to grab mirror + * will resolve this issue. Before next node + * grabs mirrors, metaset will show the deleted + * node as owner which is why an attempt to + * reset the mirror owner is made. + */ + if (clnt_reset_mirror_owner(nd->nd_nodename, sp, + node_c, &node_id_list[0], &xep) == -1) { + mde_perror(&xep, dgettext(TEXT_DOMAIN, + "Unable to reset mirror owner on" + " node %s\n"), nd->nd_nodename); + mdclrerror(&xep); + } - RB_TEST(21, "deletehosts", ep) - nd = nd->nd_next; - } + RB_TEST(21, "deletehosts", ep) + nd = nd->nd_next; + } } } @@ -4790,10 +4801,10 @@ meta_set_deletehosts( for (i = 0; i < MD_MAXSIDES; i++) { if (strinlst(sd->sd_nodes[i], node_c, node_v)) (void) memset(&medr.med_rec_nodes[i], - '\0', sizeof (md_node_nm_t)); + '\0', sizeof (md_node_nm_t)); else (void) strcpy(medr.med_rec_nodes[i], - sd->sd_nodes[i]); + sd->sd_nodes[i]); } crcgen(&medr, &medr.med_rec_cks, sizeof (med_rec_t), NULL); @@ -5636,79 +5647,85 @@ meta_set_auto_take( /* Lock the set on our side */ if (clnt_lock_set(hostname, sp, ep)) { - rval = -1; - goto out; + rval = -1; + goto out; } if (take_val) { - /* enable auto_take but only if it is not already set */ - if (! (sd->sd_flags & MD_SR_AUTO_TAKE)) { - /* verify that we're the only host in the set */ - for (i = 0; i < MD_MAXSIDES; i++) { - if (sd->sd_nodes[i] == NULL || sd->sd_nodes[i][0] == '\0') - continue; + /* enable auto_take but only if it is not already set */ + if (! (sd->sd_flags & MD_SR_AUTO_TAKE)) { + /* verify that we're the only host in the set */ + for (i = 0; i < MD_MAXSIDES; i++) { + if (sd->sd_nodes[i] == NULL || + sd->sd_nodes[i][0] == '\0') + continue; - if (strcmp(sd->sd_nodes[i], hostname) != 0) { - (void) mddserror(ep, MDE_DS_SINGLEHOST, sp->setno, NULL, - NULL, sp->setname); - rval = -1; - goto out; - } - } + if (strcmp(sd->sd_nodes[i], hostname) != 0) { + (void) mddserror(ep, MDE_DS_SINGLEHOST, + sp->setno, NULL, NULL, sp->setname); + rval = -1; + goto out; + } + } - if (clnt_enable_sr_flags(hostname, sp, MD_SR_AUTO_TAKE, ep)) - rval = -1; + if (clnt_enable_sr_flags(hostname, sp, + MD_SR_AUTO_TAKE, ep)) + rval = -1; - /* Disable SCSI reservations */ - if (sd->sd_flags & MD_SR_MB_DEVID) - dd = metaget_drivedesc(sp, MD_BASICNAME_OK | PRINT_FAST, - &xep); - else - dd = metaget_drivedesc(sp, MD_BASICNAME_OK, &xep); - if (! mdisok(&xep)) - mdclrerror(&xep); + /* Disable SCSI reservations */ + if (sd->sd_flags & MD_SR_MB_DEVID) + dd = metaget_drivedesc(sp, MD_BASICNAME_OK | + PRINT_FAST, &xep); + else + dd = metaget_drivedesc(sp, MD_BASICNAME_OK, + &xep); - if (dd != NULL) { - if (rel_own_bydd(sp, dd, TRUE, &xep)) - mdclrerror(&xep); + if (! mdisok(&xep)) + mdclrerror(&xep); + + if (dd != NULL) { + if (rel_own_bydd(sp, dd, TRUE, &xep)) + mdclrerror(&xep); + } } - } } else { - /* disable auto_take, if set, or error */ - if (sd->sd_flags & MD_SR_AUTO_TAKE) { - if (clnt_disable_sr_flags(hostname, sp, MD_SR_AUTO_TAKE, ep)) - rval = -1; - - /* Enable SCSI reservations */ - if (sd->sd_flags & MD_SR_MB_DEVID) - dd = metaget_drivedesc(sp, MD_BASICNAME_OK | PRINT_FAST, - &xep); - else - dd = metaget_drivedesc(sp, MD_BASICNAME_OK, &xep); - if (! mdisok(&xep)) - mdclrerror(&xep); + /* disable auto_take, if set, or error */ + if (sd->sd_flags & MD_SR_AUTO_TAKE) { + if (clnt_disable_sr_flags(hostname, sp, + MD_SR_AUTO_TAKE, ep)) + rval = -1; - if (dd != NULL) { - mhd_mhiargs_t mhiargs = defmhiargs; + /* Enable SCSI reservations */ + if (sd->sd_flags & MD_SR_MB_DEVID) + dd = metaget_drivedesc(sp, MD_BASICNAME_OK | + PRINT_FAST, &xep); + else + dd = metaget_drivedesc(sp, MD_BASICNAME_OK, + &xep); - if (tk_own_bydd(sp, dd, &mhiargs, TRUE, &xep)) - mdclrerror(&xep); - } + if (! mdisok(&xep)) + mdclrerror(&xep); - } else { - (void) mddserror(ep, MDE_DS_AUTONOTSET, sp->setno, NULL, NULL, - sp->setname); - rval = -1; - } + if (dd != NULL) { + mhd_mhiargs_t mhiargs = defmhiargs; + + if (tk_own_bydd(sp, dd, &mhiargs, TRUE, &xep)) + mdclrerror(&xep); + } + } else { + (void) mddserror(ep, MDE_DS_AUTONOTSET, sp->setno, + NULL, NULL, sp->setname); + rval = -1; + } } out: cl_sk = cl_get_setkey(sp->setno, sp->setname); if (clnt_unlock_set(hostname, cl_sk, &xep)) { - if (rval == 0) - (void) mdstealerror(ep, &xep); - rval = -1; + if (rval == 0) + (void) mdstealerror(ep, &xep); + rval = -1; } cl_set_setkey(NULL); diff --git a/usr/src/lib/lvm/libmeta/common/meta_sp.c b/usr/src/lib/lvm/libmeta/common/meta_sp.c index 7fc6396a53..c69c211f00 100644 --- a/usr/src/lib/lvm/libmeta/common/meta_sp.c +++ b/usr/src/lib/lvm/libmeta/common/meta_sp.c @@ -18,6 +18,7 @@ * * CDDL HEADER END */ + /* * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. @@ -1895,7 +1896,7 @@ meta_sp_extlist_from_wm( wm.wm_mdname); result = mdmn_send_message(sp->setno, MD_MN_MSG_ADDMDNAME, - MD_MSGF_PANIC_WHEN_INCONSISTENT, + MD_MSGF_PANIC_WHEN_INCONSISTENT, 0, (char *)send_params, message_size, &resp, ep); Free(send_params); @@ -2384,10 +2385,11 @@ meta_sp_get_start( } /* - * FUNCTION: meta_sp_update_wm() + * FUNCTION: meta_sp_update_wm_common() * INPUT: sp - the operating set * msp - a pointer to the XDR unit structure * extlist - the extent list specifying watermarks to update + * iocval - either MD_IOC_SPUPDATEWM or MD_MN_IOC_SPUPDATEWM * OUTPUT: ep - return error pointer * RETURNS: int - -1 if error, 0 on success * PURPOSE: steps backwards through the extent list updating @@ -2401,10 +2403,11 @@ meta_sp_get_start( * are realized. */ static int -meta_sp_update_wm( +meta_sp_update_wm_common( mdsetname_t *sp, md_sp_t *msp, sp_ext_node_t *extlist, + int iocval, md_error_t *ep ) { @@ -2493,8 +2496,8 @@ meta_sp_update_wm( MD_SETDRIVERNAME(&update_params, MD_SP, MD_MIN2SET(update_params.mnum)); - if (metaioctl(MD_IOC_SPUPDATEWM, &update_params, - &update_params.mde, msp->common.namep->cname) != 0) { + if (metaioctl(iocval, &update_params, &update_params.mde, + msp->common.namep->cname) != 0) { (void) mdstealerror(ep, &update_params.mde); rval = -1; goto out; @@ -2507,6 +2510,30 @@ out: return (rval); } +static int +meta_sp_update_wm( + mdsetname_t *sp, + md_sp_t *msp, + sp_ext_node_t *extlist, + md_error_t *ep +) +{ + return (meta_sp_update_wm_common(sp, msp, extlist, MD_IOC_SPUPDATEWM, + ep)); +} + +static int +meta_mn_sp_update_wm( + mdsetname_t *sp, + md_sp_t *msp, + sp_ext_node_t *extlist, + md_error_t *ep +) +{ + return (meta_sp_update_wm_common(sp, msp, extlist, MD_MN_IOC_SPUPDATEWM, + ep)); +} + /* * FUNCTION: meta_sp_clear_wm() * INPUT: sp - the operating set @@ -4227,9 +4254,9 @@ meta_create_sp( int committed = 0; int repart_options = MD_REPART_FORCE; int create_flag = MD_CRO_32BIT; + int mn_set_master = 0; md_set_desc *sd; - mm_unit_t *mm; md_set_mmown_params_t *ownpar = NULL; int comp_is_mirror = 0; @@ -4417,19 +4444,7 @@ meta_create_sp( goto out; } if (MD_MNSET_DESC(sd) && sd->sd_mn_am_i_master) { - mm = (mm_unit_t *)meta_get_unit(sp, compnp, ep); - if (mm == NULL) { - rval = -1; - goto out; - } else { - rval = meta_mn_change_owner(&ownpar, sp->setno, - meta_getminor(compnp->dev), - sd->sd_mn_mynode->nd_nodeid, - MD_MN_MM_PREVENT_CHANGE | - MD_MN_MM_SPAWN_THREAD); - if (rval == -1) - goto out; - } + mn_set_master = 1; } } @@ -4450,22 +4465,22 @@ meta_create_sp( committed = 1; /* write watermarks */ - if (meta_sp_update_wm(sp, msp, extlist, ep) < 0) { - rval = -1; - goto out; - } - /* - * Allow mirror ownership to change. If we don't succeed in this - * ioctl it isn't fatal, but the cluster will probably hang fairly - * soon as the mirror owner won't change. However, we have - * successfully written the watermarks out to the device so the - * softpart creation has succeeded + * Special-case for Multi-node sets. As we now have a distributed DRL + * update mechanism, we _will_ hit the ioctl-within-ioctl deadlock case + * unless we use a 'special' MN-capable ioctl to stage the watermark + * update. This only affects the master-node in an MN set. */ - if (ownpar) { - (void) meta_mn_change_owner(&ownpar, sp->setno, ownpar->d.mnum, - ownpar->d.owner, - MD_MN_MM_ALLOW_CHANGE | MD_MN_MM_SPAWN_THREAD); + if (mn_set_master) { + if (meta_mn_sp_update_wm(sp, msp, extlist, ep) < 0) { + rval = -1; + goto out; + } + } else { + if (meta_sp_update_wm(sp, msp, extlist, ep) < 0) { + rval = -1; + goto out; + } } /* second phase of commit, set status to MD_SP_OK */ @@ -5838,7 +5853,7 @@ update_sp_status( sp_setstat_params.sp_setstat_status = status; result = mdmn_send_message(sp->setno, - MD_MN_MSG_SP_SETSTAT, MD_MSGF_DEFAULT_FLAGS, + MD_MN_MSG_SP_SETSTAT, MD_MSGF_DEFAULT_FLAGS, 0, (char *)&sp_setstat_params, sizeof (sp_setstat_params), &resp, ep); @@ -6022,7 +6037,7 @@ meta_sp_recover_from_wm( compnp->cname); result = mdmn_send_message(sp->setno, MD_MN_MSG_ADDKEYNAME, MD_MSGF_DEFAULT_FLAGS, - (char *)send_params, message_size, &resp, + 0, (char *)send_params, message_size, &resp, ep); Free(send_params); if (resp != NULL) { @@ -6154,7 +6169,7 @@ meta_sp_recover_from_wm( sizeof (*un_array[i]) - sizeof (mp_ext_t) + (un_array[i]->un_numexts * sizeof (mp_ext_t))); result = mdmn_send_message(sp->setno, - MD_MN_MSG_IOCSET, MD_MSGF_DEFAULT_FLAGS, + MD_MN_MSG_IOCSET, MD_MSGF_DEFAULT_FLAGS, 0, (char *)&send_params, mess_size, &resp, ep); if (resp != NULL) { @@ -6303,7 +6318,8 @@ out: send_params.delkeyname_key = np->key; (void) mdmn_send_message(sp->setno, MD_MN_MSG_DELKEYNAME, MD_MSGF_DEFAULT_FLAGS, - (char *)&send_params, sizeof (send_params), + 0, (char *)&send_params, + sizeof (send_params), &resp, ep); if (resp != NULL) { free_result(resp); |