summaryrefslogtreecommitdiff
path: root/usr/src/cmd
diff options
context:
space:
mode:
authorjrutt <none@none>2006-05-19 14:43:08 -0700
committerjrutt <none@none>2006-05-19 14:43:08 -0700
commit154b1f02449b21af9273efd1a7776a3fe65a0744 (patch)
tree9ee0542f6378c7269460b63e235427450e183e02 /usr/src/cmd
parent99fd1a494893b1f74ebd5f3561cebb86213f28b1 (diff)
downloadillumos-gate-154b1f02449b21af9273efd1a7776a3fe65a0744.tar.gz
6417265 event-transport : call to fmd_xprt_close inside etm_send causes module abort
6417268 event-transport : change debug and error messages for better problem determination 6418474 event-transport : need to call nvlist_free in etm_post_msg when fmd_xprt_post is not called 6419724 event-transport : client should make multiple startup attempts 6421336 event-transport : deadlock between etm_reinit() and etm_send() 6421451 event-transport : set client socket to O_NONBLOCK prior to calling connect()
Diffstat (limited to 'usr/src/cmd')
-rw-r--r--usr/src/cmd/fm/modules/SUNW,SPARC-Enterprise/event-transport/ex_dscp.c42
-rw-r--r--usr/src/cmd/fm/modules/common/event-transport/etm.c113
2 files changed, 97 insertions, 58 deletions
diff --git a/usr/src/cmd/fm/modules/SUNW,SPARC-Enterprise/event-transport/ex_dscp.c b/usr/src/cmd/fm/modules/SUNW,SPARC-Enterprise/event-transport/ex_dscp.c
index 55c404a416..e58a20b53c 100644
--- a/usr/src/cmd/fm/modules/SUNW,SPARC-Enterprise/event-transport/ex_dscp.c
+++ b/usr/src/cmd/fm/modules/SUNW,SPARC-Enterprise/event-transport/ex_dscp.c
@@ -152,13 +152,13 @@ exs_prep_client(fmd_hdl_t *hdl, exs_hdl_t *hp)
if ((rv = dscpAddr(hp->h_domain_id, DSCP_ADDR_REMOTE,
(struct sockaddr *)&hp->h_client.c_saddr,
&hp->h_client.c_len)) != DSCP_OK) {
- fmd_hdl_debug(hdl, "xport - dscpAddr for %s failed: %d",
+ fmd_hdl_error(hdl, "xport - dscpAddr for %s failed: %d",
hp->h_endpt_id, rv);
return (1);
}
if ((hp->h_client.c_sd = socket(AF_INET, SOCK_STREAM, 0)) == -1) {
- fmd_hdl_debug(hdl, "xport - client socket failed for %s",
+ fmd_hdl_error(hdl, "xport - client socket failed for %s",
hp->h_endpt_id);
return (1);
}
@@ -166,7 +166,7 @@ exs_prep_client(fmd_hdl_t *hdl, exs_hdl_t *hp)
/* Bind the socket to the local IP address of the DSCP link */
if ((rv = dscpBind(hp->h_domain_id, hp->h_client.c_sd,
EXS_CLIENT_PORT)) != DSCP_OK) {
- fmd_hdl_debug(hdl, "xport - client bind for %s failed: %d",
+ fmd_hdl_error(hdl, "xport - client bind for %s failed: %d",
hp->h_endpt_id, rv);
(void) close(hp->h_client.c_sd);
hp->h_client.c_sd = EXS_SD_FREE;
@@ -177,7 +177,7 @@ exs_prep_client(fmd_hdl_t *hdl, exs_hdl_t *hp)
/* Set IPsec security policy for this socket */
if ((rv = dscpSecure(hp->h_domain_id, hp->h_client.c_sd)) != DSCP_OK) {
- fmd_hdl_debug(hdl, "xport - dscpSecure for %s failed: %d",
+ fmd_hdl_error(hdl, "xport - dscpSecure for %s failed: %d",
hp->h_endpt_id, rv);
(void) close(hp->h_client.c_sd);
hp->h_client.c_sd = EXS_SD_FREE;
@@ -200,13 +200,13 @@ exs_prep_accept(fmd_hdl_t *hdl)
int rv;
if ((Acceptor_conn.c_sd = socket(AF_INET, SOCK_STREAM, 0)) == -1) {
- fmd_hdl_debug(hdl, "xport - acceptor socket failed");
+ fmd_hdl_error(hdl, "xport - acceptor socket failed");
return (1);
}
if (setsockopt(Acceptor_conn.c_sd, SOL_SOCKET, SO_REUSEADDR,
&optval, sizeof (optval))) {
- fmd_hdl_debug(hdl, "xport - set REUSEADDR failed");
+ fmd_hdl_error(hdl, "xport - set REUSEADDR failed");
(void) close(Acceptor_conn.c_sd);
Acceptor_conn.c_sd = EXS_SD_FREE;
return (1);
@@ -215,7 +215,7 @@ exs_prep_accept(fmd_hdl_t *hdl)
/* Bind the socket to the local IP address of the DSCP link */
if ((rv = dscpBind(domain, Acceptor_conn.c_sd,
EXS_SERVER_PORT)) != DSCP_OK) {
- fmd_hdl_debug(hdl, "xport - acceptor bind failed: %d", rv);
+ fmd_hdl_error(hdl, "xport - acceptor bind failed: %d", rv);
(void) close(Acceptor_conn.c_sd);
Acceptor_conn.c_sd = EXS_SD_FREE;
return (1);
@@ -223,7 +223,7 @@ exs_prep_accept(fmd_hdl_t *hdl)
/* Activate IPsec security policy for this socket */
if ((rv = dscpSecure(domain, Acceptor_conn.c_sd)) != DSCP_OK) {
- fmd_hdl_debug(hdl, "xport - dscpSecure for acceptor failed: %d",
+ fmd_hdl_error(hdl, "xport - dscpSecure for acceptor failed: %d",
rv);
(void) close(Acceptor_conn.c_sd);
Acceptor_conn.c_sd = EXS_SD_FREE;
@@ -231,7 +231,7 @@ exs_prep_accept(fmd_hdl_t *hdl)
}
if ((listen(Acceptor_conn.c_sd, EXS_NUM_SOCKS)) == -1) {
- fmd_hdl_debug(hdl, "xport - acceptor listen failed");
+ fmd_hdl_error(hdl, "xport - acceptor listen failed");
(void) close(Acceptor_conn.c_sd);
Acceptor_conn.c_sd = EXS_SD_FREE;
return (1);
@@ -305,7 +305,7 @@ exs_build_set(fmd_hdl_t *hdl)
else if ((errno == EBADF) || (errno == ENOTSOCK))
curr->h_server.c_sd = EXS_SD_FREE;
else
- fmd_hdl_error(hdl, "xport - getsockname fail");
+ fmd_hdl_debug(hdl, "xport - getsockname fail");
if (curr->h_server.c_sd > max_sd)
max_sd = curr->h_server.c_sd;
@@ -535,7 +535,7 @@ etm_xport_init(fmd_hdl_t *hdl, char *endpoint_id,
exs_hdl_t *hp, *curr;
int domain_id;
- if ((exs_get_id(hdl, endpoint_id, &domain_id)) == -1)
+ if (exs_get_id(hdl, endpoint_id, &domain_id))
return (NULL);
(void) pthread_mutex_lock(&List_lock);
@@ -682,20 +682,22 @@ etm_xport_open(fmd_hdl_t *hdl, etm_xport_hdl_t tlhdl)
return (NULL);
}
+ /* Set the socket to be non-blocking */
+ flags = fcntl(hp->h_client.c_sd, F_GETFL, 0);
+ (void) fcntl(hp->h_client.c_sd, F_SETFL, flags | O_NONBLOCK);
+
if ((connect(hp->h_client.c_sd,
(struct sockaddr *)&hp->h_client.c_saddr,
hp->h_client.c_len)) == -1) {
- fmd_hdl_error(hdl, "xport - failed connect to server for %s",
- hp->h_endpt_id);
- (void) close(hp->h_client.c_sd);
- hp->h_client.c_sd = EXS_SD_FREE;
- return (NULL);
+ if (errno != EINPROGRESS) {
+ fmd_hdl_error(hdl, "xport - failed server connect : %s",
+ hp->h_endpt_id);
+ (void) close(hp->h_client.c_sd);
+ hp->h_client.c_sd = EXS_SD_FREE;
+ return (NULL);
+ }
}
- /* Set the socket to be non-blocking */
- flags = fcntl(hp->h_client.c_sd, F_GETFL, 0);
- (void) fcntl(hp->h_client.c_sd, F_SETFL, flags | O_NONBLOCK);
-
fmd_hdl_debug(hdl, "xport - connected client socket for %s",
hp->h_endpt_id);
diff --git a/usr/src/cmd/fm/modules/common/event-transport/etm.c b/usr/src/cmd/fm/modules/common/event-transport/etm.c
index 285c33caa8..00e4dd9634 100644
--- a/usr/src/cmd/fm/modules/common/event-transport/etm.c
+++ b/usr/src/cmd/fm/modules/common/event-transport/etm.c
@@ -235,7 +235,7 @@ etm_check_hdr(fmd_hdl_t *hdl, etm_epmap_t *mp, void *buf)
etm_proto_hdr_t *hp = (etm_proto_hdr_t *)buf;
if (bcmp(hp->hdr_delim, ETM_DELIM, ETM_DELIMLEN) != 0) {
- fmd_hdl_error(hdl, "Bad delimiter in ETM header from %s "
+ fmd_hdl_debug(hdl, "Bad delimiter in ETM header from %s "
": 0x%x\n", mp->epm_ep_str, hp->hdr_delim);
return (ETM_HDR_INVALID);
}
@@ -247,14 +247,14 @@ etm_check_hdr(fmd_hdl_t *hdl, etm_epmap_t *mp, void *buf)
}
if (hp->hdr_ver != mp->epm_ver) {
- fmd_hdl_error(hdl, "Bad version in ETM header from %s : 0x%x\n",
+ fmd_hdl_debug(hdl, "Bad version in ETM header from %s : 0x%x\n",
mp->epm_ep_str, hp->hdr_ver);
return (ETM_HDR_BADVERSION);
}
if ((hp->hdr_type == ETM_HDR_TYPE_TOO_LOW) ||
(hp->hdr_type >= ETM_HDR_TYPE_TOO_HIGH)) {
- fmd_hdl_error(hdl, "Bad type in ETM header from %s : 0x%x\n",
+ fmd_hdl_debug(hdl, "Bad type in ETM header from %s : 0x%x\n",
mp->epm_ep_str, hp->hdr_type);
return (ETM_HDR_BADTYPE);
}
@@ -292,7 +292,7 @@ etm_post_msg(fmd_hdl_t *hdl, etm_epmap_t *mp, void *buf, size_t buflen)
int rv;
if (nvlist_unpack((char *)buf, buflen, &nvl, 0)) {
- fmd_hdl_debug(hdl, "failed to unpack message");
+ fmd_hdl_error(hdl, "failed to unpack message");
return (1);
}
@@ -317,11 +317,15 @@ etm_post_msg(fmd_hdl_t *hdl, etm_epmap_t *mp, void *buf, size_t buflen)
} else {
fmd_hdl_debug(hdl, "unable to post message, qstat = %d",
mp->epm_qstat);
+ nvlist_free(nvl);
+ /* Remote peer will attempt to resend event */
rv = 2;
}
} else {
(void) pthread_mutex_unlock(&Etm_mod_lock);
fmd_hdl_debug(hdl, "unable to post message, module exiting");
+ nvlist_free(nvl);
+ /* Remote peer will attempt to resend event */
rv = 3;
}
@@ -435,7 +439,7 @@ etm_get_ep_nvl(fmd_hdl_t *hdl, etm_epmap_t *mp)
(void) nvlist_alloc(&mp->epm_ep_nvl, NV_UNIQUE_NAME, 0);
if (nvlist_add_string(mp->epm_ep_nvl, "domain-id", mp->epm_ep_str)) {
- fmd_hdl_debug(hdl, "failed to add domain-id string to nvlist "
+ fmd_hdl_error(hdl, "failed to add domain-id string to nvlist "
"for %s", mp->epm_ep_str);
nvlist_free(mp->epm_ep_nvl);
return (1);
@@ -508,6 +512,7 @@ etm_reconnect(fmd_hdl_t *hdl, etm_epmap_t *mp)
/*
* Suspend a given connection and setup for reconnection retries.
+ * Assume caller holds lock on epm_lock.
*/
static void
etm_suspend_reconnect(fmd_hdl_t *hdl, etm_epmap_t *mp)
@@ -519,8 +524,6 @@ etm_suspend_reconnect(fmd_hdl_t *hdl, etm_epmap_t *mp)
}
(void) pthread_mutex_unlock(&Etm_mod_lock);
- (void) pthread_mutex_lock(&mp->epm_lock);
-
if (mp->epm_oconn != NULL) {
(void) etm_xport_close(hdl, mp->epm_oconn);
mp->epm_oconn = NULL;
@@ -540,8 +543,6 @@ etm_suspend_reconnect(fmd_hdl_t *hdl, etm_epmap_t *mp)
mp->epm_timer_in_use = 1;
}
}
-
- (void) pthread_mutex_unlock(&mp->epm_lock);
}
/*
@@ -561,7 +562,7 @@ etm_reinit(fmd_hdl_t *hdl, etm_epmap_t *mp)
if (mp->epm_xprthdl != NULL) {
fmd_xprt_close(hdl, mp->epm_xprthdl);
- fmd_hdl_debug(hdl, "queue closed for %s", mp->epm_ep_str);
+ fmd_hdl_debug(hdl, "queue closed for %s", mp->epm_ep_str);
mp->epm_xprthdl = NULL;
/* mp->epm_ep_nvl is free'd in fmd_xprt_close */
mp->epm_ep_nvl = NULL;
@@ -896,11 +897,20 @@ etm_init_epmap(fmd_hdl_t *hdl, char *epname, int flags)
if (IS_CLIENT(newmap)) {
if (etm_handle_startup(hdl, newmap)) {
- etm_free_ep_nvl(hdl, newmap);
- (void) etm_xport_fini(hdl, newmap->epm_tlhdl);
- fmd_hdl_strfree(hdl, newmap->epm_ep_str);
- fmd_hdl_free(hdl, newmap, sizeof (etm_epmap_t));
- return;
+ /*
+ * For whatever reason, we could not complete the
+ * startup handshake with the server. Set the timer
+ * and try again.
+ */
+ if (newmap->epm_oconn != NULL) {
+ (void) etm_xport_close(hdl, newmap->epm_oconn);
+ newmap->epm_oconn = NULL;
+ }
+ newmap->epm_cstat = C_UNINITIALIZED;
+ newmap->epm_qstat = Q_UNINITIALIZED;
+ newmap->epm_timer_id = fmd_timer_install(hdl, newmap,
+ NULL, Reconn_interval);
+ newmap->epm_timer_in_use = 1;
}
}
@@ -1084,15 +1094,24 @@ etm_send(fmd_hdl_t *hdl, fmd_xprt_t *xprthdl, fmd_event_t *ep, nvlist_t *nvl)
mp = fmd_xprt_getspecific(hdl, xprthdl);
- (void) pthread_mutex_lock(&mp->epm_lock);
+ if (pthread_mutex_trylock(&mp->epm_lock))
+ /* Another thread may be trying to close this fmd_xprt_t */
+ return (FMD_SEND_RETRY);
mp->epm_txbusy++;
- if (mp->epm_cstat == C_CLOSED) {
+ if (mp->epm_qstat == Q_UNINITIALIZED) {
mp->epm_txbusy--;
(void) pthread_mutex_unlock(&mp->epm_lock);
(void) pthread_cond_broadcast(&mp->epm_tx_cv);
+ return (FMD_SEND_FAILED);
+ }
+
+ if (mp->epm_cstat == C_CLOSED) {
etm_suspend_reconnect(hdl, mp);
+ mp->epm_txbusy--;
+ (void) pthread_mutex_unlock(&mp->epm_lock);
+ (void) pthread_cond_broadcast(&mp->epm_tx_cv);
return (FMD_SEND_RETRY);
}
@@ -1114,10 +1133,10 @@ etm_send(fmd_hdl_t *hdl, fmd_xprt_t *xprthdl, fmd_event_t *ep, nvlist_t *nvl)
if (mp->epm_oconn == NULL) {
if ((mp->epm_oconn = etm_xport_open(hdl, mp->epm_tlhdl))
== NULL) {
+ etm_suspend_reconnect(hdl, mp);
mp->epm_txbusy--;
(void) pthread_mutex_unlock(&mp->epm_lock);
(void) pthread_cond_broadcast(&mp->epm_tx_cv);
- etm_suspend_reconnect(hdl, mp);
return (FMD_SEND_RETRY);
} else {
mp->epm_cstat = C_OPEN;
@@ -1129,8 +1148,9 @@ etm_send(fmd_hdl_t *hdl, fmd_xprt_t *xprthdl, fmd_event_t *ep, nvlist_t *nvl)
msgnvl = fmd_xprt_translate(hdl, xprthdl, ep);
if (msgnvl == NULL) {
- mp->epm_qstat = Q_UNINITIALIZED;
+ mp->epm_txbusy--;
(void) pthread_mutex_unlock(&mp->epm_lock);
+ (void) pthread_cond_broadcast(&mp->epm_tx_cv);
fmd_hdl_error(hdl, "Failed to translate event %p\n",
(void *) ep);
return (FMD_SEND_FAILED);
@@ -1150,6 +1170,10 @@ etm_send(fmd_hdl_t *hdl, fmd_xprt_t *xprthdl, fmd_event_t *ep, nvlist_t *nvl)
(void) etm_create_hdr(buf, mp->epm_ver, ETM_HDR_MSG, nvsize);
if (rv = nvlist_pack(msgnvl, &nvbuf, &nvsize, NV_ENCODE_XDR, 0)) {
+ (void) pthread_mutex_lock(&mp->epm_lock);
+ mp->epm_txbusy--;
+ (void) pthread_mutex_unlock(&mp->epm_lock);
+ (void) pthread_cond_broadcast(&mp->epm_tx_cv);
fmd_hdl_error(hdl, "Failed to pack event : %s\n", strerror(rv));
FREE_BUF(hdl, buf, buflen);
return (FMD_SEND_FAILED);
@@ -1159,15 +1183,15 @@ etm_send(fmd_hdl_t *hdl, fmd_xprt_t *xprthdl, fmd_event_t *ep, nvlist_t *nvl)
if (etm_xport_write(hdl, mp->epm_oconn, Rw_timeout, buf,
buflen) != buflen) {
+ fmd_hdl_debug(hdl, "failed to send message to %s",
+ mp->epm_ep_str);
(void) pthread_mutex_lock(&mp->epm_lock);
+ etm_suspend_reconnect(hdl, mp);
mp->epm_txbusy--;
(void) pthread_mutex_unlock(&mp->epm_lock);
(void) pthread_cond_broadcast(&mp->epm_tx_cv);
- fmd_hdl_debug(hdl, "failed to send message to %s",
- mp->epm_ep_str);
FREE_BUF(hdl, buf, buflen);
INCRSTAT(Etm_stats.error_write.fmds_value.ui64);
- etm_suspend_reconnect(hdl, mp);
return (FMD_SEND_RETRY);
}
@@ -1178,15 +1202,15 @@ etm_send(fmd_hdl_t *hdl, fmd_xprt_t *xprthdl, fmd_event_t *ep, nvlist_t *nvl)
if (etm_xport_read(hdl, mp->epm_oconn, Rw_timeout, buf,
hdrlen) != hdrlen) {
+ fmd_hdl_debug(hdl, "failed to read ACK from %s",
+ mp->epm_ep_str);
(void) pthread_mutex_lock(&mp->epm_lock);
+ etm_suspend_reconnect(hdl, mp);
mp->epm_txbusy--;
(void) pthread_mutex_unlock(&mp->epm_lock);
(void) pthread_cond_broadcast(&mp->epm_tx_cv);
- fmd_hdl_debug(hdl, "failed to read ACK from %s",
- mp->epm_ep_str);
FREE_BUF(hdl, buf, buflen);
INCRSTAT(Etm_stats.error_read.fmds_value.ui64);
- etm_suspend_reconnect(hdl, mp);
return (FMD_SEND_RETRY);
}
@@ -1215,19 +1239,18 @@ etm_send(fmd_hdl_t *hdl, fmd_xprt_t *xprthdl, fmd_event_t *ep, nvlist_t *nvl)
} else if (hdrstat == ETM_HDR_S_RESTART) {
/* Server has restarted */
- if (mp->epm_xprthdl != NULL) {
- mp->epm_cstat = C_CLOSED;
- fmd_xprt_close(hdl, xprthdl);
- /* mp->epm_ep_nvl is free'd in fmd_xprt_close */
- mp->epm_ep_nvl = NULL;
- mp->epm_qstat = Q_UNINITIALIZED;
- fmd_hdl_debug(hdl, "server restarted, queue "
- "closed for %s", mp->epm_ep_str);
- if (mp->epm_timer_in_use == 0) {
- mp->epm_timer_id = fmd_timer_install(
- hdl, mp, NULL, Reconn_interval);
- mp->epm_timer_in_use = 1;
- }
+ mp->epm_cstat = C_CLOSED;
+ mp->epm_qstat = Q_UNINITIALIZED;
+ fmd_hdl_debug(hdl, "server %s restarted",
+ mp->epm_ep_str);
+ /*
+ * Cannot call fmd_xprt_close here, so we'll do it
+ * on the timeout thread.
+ */
+ if (mp->epm_timer_in_use == 0) {
+ mp->epm_timer_id = fmd_timer_install(
+ hdl, mp, NULL, 0);
+ mp->epm_timer_in_use = 1;
}
/*
@@ -1278,10 +1301,24 @@ etm_timeout(fmd_hdl_t *hdl, id_t id, void *data)
if (mp->epm_qstat == Q_UNINITIALIZED) {
/* Server has shutdown and we (client) need to reconnect */
+ if (mp->epm_xprthdl != NULL) {
+ fmd_xprt_close(hdl, mp->epm_xprthdl);
+ fmd_hdl_debug(hdl, "queue closed for %s",
+ mp->epm_ep_str);
+ mp->epm_xprthdl = NULL;
+ /* mp->epm_ep_nvl is free'd in fmd_xprt_close */
+ mp->epm_ep_nvl = NULL;
+ }
+
if (mp->epm_ep_nvl == NULL)
(void) etm_get_ep_nvl(hdl, mp);
if (etm_handle_startup(hdl, mp)) {
+ if (mp->epm_oconn != NULL) {
+ (void) etm_xport_close(hdl, mp->epm_oconn);
+ mp->epm_oconn = NULL;
+ }
+ mp->epm_cstat = C_UNINITIALIZED;
mp->epm_qstat = Q_UNINITIALIZED;
mp->epm_timer_id = fmd_timer_install(hdl, mp, NULL,
Reconn_interval);