diff options
author | jrutt <none@none> | 2006-05-19 14:43:08 -0700 |
---|---|---|
committer | jrutt <none@none> | 2006-05-19 14:43:08 -0700 |
commit | 154b1f02449b21af9273efd1a7776a3fe65a0744 (patch) | |
tree | 9ee0542f6378c7269460b63e235427450e183e02 /usr/src/cmd | |
parent | 99fd1a494893b1f74ebd5f3561cebb86213f28b1 (diff) | |
download | illumos-gate-154b1f02449b21af9273efd1a7776a3fe65a0744.tar.gz |
6417265 event-transport : call to fmd_xprt_close inside etm_send causes module abort
6417268 event-transport : change debug and error messages for better problem determination
6418474 event-transport : need to call nvlist_free in etm_post_msg when fmd_xprt_post is not called
6419724 event-transport : client should make multiple startup attempts
6421336 event-transport : deadlock between etm_reinit() and etm_send()
6421451 event-transport : set client socket to O_NONBLOCK prior to calling connect()
Diffstat (limited to 'usr/src/cmd')
-rw-r--r-- | usr/src/cmd/fm/modules/SUNW,SPARC-Enterprise/event-transport/ex_dscp.c | 42 | ||||
-rw-r--r-- | usr/src/cmd/fm/modules/common/event-transport/etm.c | 113 |
2 files changed, 97 insertions, 58 deletions
diff --git a/usr/src/cmd/fm/modules/SUNW,SPARC-Enterprise/event-transport/ex_dscp.c b/usr/src/cmd/fm/modules/SUNW,SPARC-Enterprise/event-transport/ex_dscp.c index 55c404a416..e58a20b53c 100644 --- a/usr/src/cmd/fm/modules/SUNW,SPARC-Enterprise/event-transport/ex_dscp.c +++ b/usr/src/cmd/fm/modules/SUNW,SPARC-Enterprise/event-transport/ex_dscp.c @@ -152,13 +152,13 @@ exs_prep_client(fmd_hdl_t *hdl, exs_hdl_t *hp) if ((rv = dscpAddr(hp->h_domain_id, DSCP_ADDR_REMOTE, (struct sockaddr *)&hp->h_client.c_saddr, &hp->h_client.c_len)) != DSCP_OK) { - fmd_hdl_debug(hdl, "xport - dscpAddr for %s failed: %d", + fmd_hdl_error(hdl, "xport - dscpAddr for %s failed: %d", hp->h_endpt_id, rv); return (1); } if ((hp->h_client.c_sd = socket(AF_INET, SOCK_STREAM, 0)) == -1) { - fmd_hdl_debug(hdl, "xport - client socket failed for %s", + fmd_hdl_error(hdl, "xport - client socket failed for %s", hp->h_endpt_id); return (1); } @@ -166,7 +166,7 @@ exs_prep_client(fmd_hdl_t *hdl, exs_hdl_t *hp) /* Bind the socket to the local IP address of the DSCP link */ if ((rv = dscpBind(hp->h_domain_id, hp->h_client.c_sd, EXS_CLIENT_PORT)) != DSCP_OK) { - fmd_hdl_debug(hdl, "xport - client bind for %s failed: %d", + fmd_hdl_error(hdl, "xport - client bind for %s failed: %d", hp->h_endpt_id, rv); (void) close(hp->h_client.c_sd); hp->h_client.c_sd = EXS_SD_FREE; @@ -177,7 +177,7 @@ exs_prep_client(fmd_hdl_t *hdl, exs_hdl_t *hp) /* Set IPsec security policy for this socket */ if ((rv = dscpSecure(hp->h_domain_id, hp->h_client.c_sd)) != DSCP_OK) { - fmd_hdl_debug(hdl, "xport - dscpSecure for %s failed: %d", + fmd_hdl_error(hdl, "xport - dscpSecure for %s failed: %d", hp->h_endpt_id, rv); (void) close(hp->h_client.c_sd); hp->h_client.c_sd = EXS_SD_FREE; @@ -200,13 +200,13 @@ exs_prep_accept(fmd_hdl_t *hdl) int rv; if ((Acceptor_conn.c_sd = socket(AF_INET, SOCK_STREAM, 0)) == -1) { - fmd_hdl_debug(hdl, "xport - acceptor socket failed"); + fmd_hdl_error(hdl, "xport - acceptor socket failed"); return (1); } if (setsockopt(Acceptor_conn.c_sd, SOL_SOCKET, SO_REUSEADDR, &optval, sizeof (optval))) { - fmd_hdl_debug(hdl, "xport - set REUSEADDR failed"); + fmd_hdl_error(hdl, "xport - set REUSEADDR failed"); (void) close(Acceptor_conn.c_sd); Acceptor_conn.c_sd = EXS_SD_FREE; return (1); @@ -215,7 +215,7 @@ exs_prep_accept(fmd_hdl_t *hdl) /* Bind the socket to the local IP address of the DSCP link */ if ((rv = dscpBind(domain, Acceptor_conn.c_sd, EXS_SERVER_PORT)) != DSCP_OK) { - fmd_hdl_debug(hdl, "xport - acceptor bind failed: %d", rv); + fmd_hdl_error(hdl, "xport - acceptor bind failed: %d", rv); (void) close(Acceptor_conn.c_sd); Acceptor_conn.c_sd = EXS_SD_FREE; return (1); @@ -223,7 +223,7 @@ exs_prep_accept(fmd_hdl_t *hdl) /* Activate IPsec security policy for this socket */ if ((rv = dscpSecure(domain, Acceptor_conn.c_sd)) != DSCP_OK) { - fmd_hdl_debug(hdl, "xport - dscpSecure for acceptor failed: %d", + fmd_hdl_error(hdl, "xport - dscpSecure for acceptor failed: %d", rv); (void) close(Acceptor_conn.c_sd); Acceptor_conn.c_sd = EXS_SD_FREE; @@ -231,7 +231,7 @@ exs_prep_accept(fmd_hdl_t *hdl) } if ((listen(Acceptor_conn.c_sd, EXS_NUM_SOCKS)) == -1) { - fmd_hdl_debug(hdl, "xport - acceptor listen failed"); + fmd_hdl_error(hdl, "xport - acceptor listen failed"); (void) close(Acceptor_conn.c_sd); Acceptor_conn.c_sd = EXS_SD_FREE; return (1); @@ -305,7 +305,7 @@ exs_build_set(fmd_hdl_t *hdl) else if ((errno == EBADF) || (errno == ENOTSOCK)) curr->h_server.c_sd = EXS_SD_FREE; else - fmd_hdl_error(hdl, "xport - getsockname fail"); + fmd_hdl_debug(hdl, "xport - getsockname fail"); if (curr->h_server.c_sd > max_sd) max_sd = curr->h_server.c_sd; @@ -535,7 +535,7 @@ etm_xport_init(fmd_hdl_t *hdl, char *endpoint_id, exs_hdl_t *hp, *curr; int domain_id; - if ((exs_get_id(hdl, endpoint_id, &domain_id)) == -1) + if (exs_get_id(hdl, endpoint_id, &domain_id)) return (NULL); (void) pthread_mutex_lock(&List_lock); @@ -682,20 +682,22 @@ etm_xport_open(fmd_hdl_t *hdl, etm_xport_hdl_t tlhdl) return (NULL); } + /* Set the socket to be non-blocking */ + flags = fcntl(hp->h_client.c_sd, F_GETFL, 0); + (void) fcntl(hp->h_client.c_sd, F_SETFL, flags | O_NONBLOCK); + if ((connect(hp->h_client.c_sd, (struct sockaddr *)&hp->h_client.c_saddr, hp->h_client.c_len)) == -1) { - fmd_hdl_error(hdl, "xport - failed connect to server for %s", - hp->h_endpt_id); - (void) close(hp->h_client.c_sd); - hp->h_client.c_sd = EXS_SD_FREE; - return (NULL); + if (errno != EINPROGRESS) { + fmd_hdl_error(hdl, "xport - failed server connect : %s", + hp->h_endpt_id); + (void) close(hp->h_client.c_sd); + hp->h_client.c_sd = EXS_SD_FREE; + return (NULL); + } } - /* Set the socket to be non-blocking */ - flags = fcntl(hp->h_client.c_sd, F_GETFL, 0); - (void) fcntl(hp->h_client.c_sd, F_SETFL, flags | O_NONBLOCK); - fmd_hdl_debug(hdl, "xport - connected client socket for %s", hp->h_endpt_id); diff --git a/usr/src/cmd/fm/modules/common/event-transport/etm.c b/usr/src/cmd/fm/modules/common/event-transport/etm.c index 285c33caa8..00e4dd9634 100644 --- a/usr/src/cmd/fm/modules/common/event-transport/etm.c +++ b/usr/src/cmd/fm/modules/common/event-transport/etm.c @@ -235,7 +235,7 @@ etm_check_hdr(fmd_hdl_t *hdl, etm_epmap_t *mp, void *buf) etm_proto_hdr_t *hp = (etm_proto_hdr_t *)buf; if (bcmp(hp->hdr_delim, ETM_DELIM, ETM_DELIMLEN) != 0) { - fmd_hdl_error(hdl, "Bad delimiter in ETM header from %s " + fmd_hdl_debug(hdl, "Bad delimiter in ETM header from %s " ": 0x%x\n", mp->epm_ep_str, hp->hdr_delim); return (ETM_HDR_INVALID); } @@ -247,14 +247,14 @@ etm_check_hdr(fmd_hdl_t *hdl, etm_epmap_t *mp, void *buf) } if (hp->hdr_ver != mp->epm_ver) { - fmd_hdl_error(hdl, "Bad version in ETM header from %s : 0x%x\n", + fmd_hdl_debug(hdl, "Bad version in ETM header from %s : 0x%x\n", mp->epm_ep_str, hp->hdr_ver); return (ETM_HDR_BADVERSION); } if ((hp->hdr_type == ETM_HDR_TYPE_TOO_LOW) || (hp->hdr_type >= ETM_HDR_TYPE_TOO_HIGH)) { - fmd_hdl_error(hdl, "Bad type in ETM header from %s : 0x%x\n", + fmd_hdl_debug(hdl, "Bad type in ETM header from %s : 0x%x\n", mp->epm_ep_str, hp->hdr_type); return (ETM_HDR_BADTYPE); } @@ -292,7 +292,7 @@ etm_post_msg(fmd_hdl_t *hdl, etm_epmap_t *mp, void *buf, size_t buflen) int rv; if (nvlist_unpack((char *)buf, buflen, &nvl, 0)) { - fmd_hdl_debug(hdl, "failed to unpack message"); + fmd_hdl_error(hdl, "failed to unpack message"); return (1); } @@ -317,11 +317,15 @@ etm_post_msg(fmd_hdl_t *hdl, etm_epmap_t *mp, void *buf, size_t buflen) } else { fmd_hdl_debug(hdl, "unable to post message, qstat = %d", mp->epm_qstat); + nvlist_free(nvl); + /* Remote peer will attempt to resend event */ rv = 2; } } else { (void) pthread_mutex_unlock(&Etm_mod_lock); fmd_hdl_debug(hdl, "unable to post message, module exiting"); + nvlist_free(nvl); + /* Remote peer will attempt to resend event */ rv = 3; } @@ -435,7 +439,7 @@ etm_get_ep_nvl(fmd_hdl_t *hdl, etm_epmap_t *mp) (void) nvlist_alloc(&mp->epm_ep_nvl, NV_UNIQUE_NAME, 0); if (nvlist_add_string(mp->epm_ep_nvl, "domain-id", mp->epm_ep_str)) { - fmd_hdl_debug(hdl, "failed to add domain-id string to nvlist " + fmd_hdl_error(hdl, "failed to add domain-id string to nvlist " "for %s", mp->epm_ep_str); nvlist_free(mp->epm_ep_nvl); return (1); @@ -508,6 +512,7 @@ etm_reconnect(fmd_hdl_t *hdl, etm_epmap_t *mp) /* * Suspend a given connection and setup for reconnection retries. + * Assume caller holds lock on epm_lock. */ static void etm_suspend_reconnect(fmd_hdl_t *hdl, etm_epmap_t *mp) @@ -519,8 +524,6 @@ etm_suspend_reconnect(fmd_hdl_t *hdl, etm_epmap_t *mp) } (void) pthread_mutex_unlock(&Etm_mod_lock); - (void) pthread_mutex_lock(&mp->epm_lock); - if (mp->epm_oconn != NULL) { (void) etm_xport_close(hdl, mp->epm_oconn); mp->epm_oconn = NULL; @@ -540,8 +543,6 @@ etm_suspend_reconnect(fmd_hdl_t *hdl, etm_epmap_t *mp) mp->epm_timer_in_use = 1; } } - - (void) pthread_mutex_unlock(&mp->epm_lock); } /* @@ -561,7 +562,7 @@ etm_reinit(fmd_hdl_t *hdl, etm_epmap_t *mp) if (mp->epm_xprthdl != NULL) { fmd_xprt_close(hdl, mp->epm_xprthdl); - fmd_hdl_debug(hdl, "queue closed for %s", mp->epm_ep_str); + fmd_hdl_debug(hdl, "queue closed for %s", mp->epm_ep_str); mp->epm_xprthdl = NULL; /* mp->epm_ep_nvl is free'd in fmd_xprt_close */ mp->epm_ep_nvl = NULL; @@ -896,11 +897,20 @@ etm_init_epmap(fmd_hdl_t *hdl, char *epname, int flags) if (IS_CLIENT(newmap)) { if (etm_handle_startup(hdl, newmap)) { - etm_free_ep_nvl(hdl, newmap); - (void) etm_xport_fini(hdl, newmap->epm_tlhdl); - fmd_hdl_strfree(hdl, newmap->epm_ep_str); - fmd_hdl_free(hdl, newmap, sizeof (etm_epmap_t)); - return; + /* + * For whatever reason, we could not complete the + * startup handshake with the server. Set the timer + * and try again. + */ + if (newmap->epm_oconn != NULL) { + (void) etm_xport_close(hdl, newmap->epm_oconn); + newmap->epm_oconn = NULL; + } + newmap->epm_cstat = C_UNINITIALIZED; + newmap->epm_qstat = Q_UNINITIALIZED; + newmap->epm_timer_id = fmd_timer_install(hdl, newmap, + NULL, Reconn_interval); + newmap->epm_timer_in_use = 1; } } @@ -1084,15 +1094,24 @@ etm_send(fmd_hdl_t *hdl, fmd_xprt_t *xprthdl, fmd_event_t *ep, nvlist_t *nvl) mp = fmd_xprt_getspecific(hdl, xprthdl); - (void) pthread_mutex_lock(&mp->epm_lock); + if (pthread_mutex_trylock(&mp->epm_lock)) + /* Another thread may be trying to close this fmd_xprt_t */ + return (FMD_SEND_RETRY); mp->epm_txbusy++; - if (mp->epm_cstat == C_CLOSED) { + if (mp->epm_qstat == Q_UNINITIALIZED) { mp->epm_txbusy--; (void) pthread_mutex_unlock(&mp->epm_lock); (void) pthread_cond_broadcast(&mp->epm_tx_cv); + return (FMD_SEND_FAILED); + } + + if (mp->epm_cstat == C_CLOSED) { etm_suspend_reconnect(hdl, mp); + mp->epm_txbusy--; + (void) pthread_mutex_unlock(&mp->epm_lock); + (void) pthread_cond_broadcast(&mp->epm_tx_cv); return (FMD_SEND_RETRY); } @@ -1114,10 +1133,10 @@ etm_send(fmd_hdl_t *hdl, fmd_xprt_t *xprthdl, fmd_event_t *ep, nvlist_t *nvl) if (mp->epm_oconn == NULL) { if ((mp->epm_oconn = etm_xport_open(hdl, mp->epm_tlhdl)) == NULL) { + etm_suspend_reconnect(hdl, mp); mp->epm_txbusy--; (void) pthread_mutex_unlock(&mp->epm_lock); (void) pthread_cond_broadcast(&mp->epm_tx_cv); - etm_suspend_reconnect(hdl, mp); return (FMD_SEND_RETRY); } else { mp->epm_cstat = C_OPEN; @@ -1129,8 +1148,9 @@ etm_send(fmd_hdl_t *hdl, fmd_xprt_t *xprthdl, fmd_event_t *ep, nvlist_t *nvl) msgnvl = fmd_xprt_translate(hdl, xprthdl, ep); if (msgnvl == NULL) { - mp->epm_qstat = Q_UNINITIALIZED; + mp->epm_txbusy--; (void) pthread_mutex_unlock(&mp->epm_lock); + (void) pthread_cond_broadcast(&mp->epm_tx_cv); fmd_hdl_error(hdl, "Failed to translate event %p\n", (void *) ep); return (FMD_SEND_FAILED); @@ -1150,6 +1170,10 @@ etm_send(fmd_hdl_t *hdl, fmd_xprt_t *xprthdl, fmd_event_t *ep, nvlist_t *nvl) (void) etm_create_hdr(buf, mp->epm_ver, ETM_HDR_MSG, nvsize); if (rv = nvlist_pack(msgnvl, &nvbuf, &nvsize, NV_ENCODE_XDR, 0)) { + (void) pthread_mutex_lock(&mp->epm_lock); + mp->epm_txbusy--; + (void) pthread_mutex_unlock(&mp->epm_lock); + (void) pthread_cond_broadcast(&mp->epm_tx_cv); fmd_hdl_error(hdl, "Failed to pack event : %s\n", strerror(rv)); FREE_BUF(hdl, buf, buflen); return (FMD_SEND_FAILED); @@ -1159,15 +1183,15 @@ etm_send(fmd_hdl_t *hdl, fmd_xprt_t *xprthdl, fmd_event_t *ep, nvlist_t *nvl) if (etm_xport_write(hdl, mp->epm_oconn, Rw_timeout, buf, buflen) != buflen) { + fmd_hdl_debug(hdl, "failed to send message to %s", + mp->epm_ep_str); (void) pthread_mutex_lock(&mp->epm_lock); + etm_suspend_reconnect(hdl, mp); mp->epm_txbusy--; (void) pthread_mutex_unlock(&mp->epm_lock); (void) pthread_cond_broadcast(&mp->epm_tx_cv); - fmd_hdl_debug(hdl, "failed to send message to %s", - mp->epm_ep_str); FREE_BUF(hdl, buf, buflen); INCRSTAT(Etm_stats.error_write.fmds_value.ui64); - etm_suspend_reconnect(hdl, mp); return (FMD_SEND_RETRY); } @@ -1178,15 +1202,15 @@ etm_send(fmd_hdl_t *hdl, fmd_xprt_t *xprthdl, fmd_event_t *ep, nvlist_t *nvl) if (etm_xport_read(hdl, mp->epm_oconn, Rw_timeout, buf, hdrlen) != hdrlen) { + fmd_hdl_debug(hdl, "failed to read ACK from %s", + mp->epm_ep_str); (void) pthread_mutex_lock(&mp->epm_lock); + etm_suspend_reconnect(hdl, mp); mp->epm_txbusy--; (void) pthread_mutex_unlock(&mp->epm_lock); (void) pthread_cond_broadcast(&mp->epm_tx_cv); - fmd_hdl_debug(hdl, "failed to read ACK from %s", - mp->epm_ep_str); FREE_BUF(hdl, buf, buflen); INCRSTAT(Etm_stats.error_read.fmds_value.ui64); - etm_suspend_reconnect(hdl, mp); return (FMD_SEND_RETRY); } @@ -1215,19 +1239,18 @@ etm_send(fmd_hdl_t *hdl, fmd_xprt_t *xprthdl, fmd_event_t *ep, nvlist_t *nvl) } else if (hdrstat == ETM_HDR_S_RESTART) { /* Server has restarted */ - if (mp->epm_xprthdl != NULL) { - mp->epm_cstat = C_CLOSED; - fmd_xprt_close(hdl, xprthdl); - /* mp->epm_ep_nvl is free'd in fmd_xprt_close */ - mp->epm_ep_nvl = NULL; - mp->epm_qstat = Q_UNINITIALIZED; - fmd_hdl_debug(hdl, "server restarted, queue " - "closed for %s", mp->epm_ep_str); - if (mp->epm_timer_in_use == 0) { - mp->epm_timer_id = fmd_timer_install( - hdl, mp, NULL, Reconn_interval); - mp->epm_timer_in_use = 1; - } + mp->epm_cstat = C_CLOSED; + mp->epm_qstat = Q_UNINITIALIZED; + fmd_hdl_debug(hdl, "server %s restarted", + mp->epm_ep_str); + /* + * Cannot call fmd_xprt_close here, so we'll do it + * on the timeout thread. + */ + if (mp->epm_timer_in_use == 0) { + mp->epm_timer_id = fmd_timer_install( + hdl, mp, NULL, 0); + mp->epm_timer_in_use = 1; } /* @@ -1278,10 +1301,24 @@ etm_timeout(fmd_hdl_t *hdl, id_t id, void *data) if (mp->epm_qstat == Q_UNINITIALIZED) { /* Server has shutdown and we (client) need to reconnect */ + if (mp->epm_xprthdl != NULL) { + fmd_xprt_close(hdl, mp->epm_xprthdl); + fmd_hdl_debug(hdl, "queue closed for %s", + mp->epm_ep_str); + mp->epm_xprthdl = NULL; + /* mp->epm_ep_nvl is free'd in fmd_xprt_close */ + mp->epm_ep_nvl = NULL; + } + if (mp->epm_ep_nvl == NULL) (void) etm_get_ep_nvl(hdl, mp); if (etm_handle_startup(hdl, mp)) { + if (mp->epm_oconn != NULL) { + (void) etm_xport_close(hdl, mp->epm_oconn); + mp->epm_oconn = NULL; + } + mp->epm_cstat = C_UNINITIALIZED; mp->epm_qstat = Q_UNINITIALIZED; mp->epm_timer_id = fmd_timer_install(hdl, mp, NULL, Reconn_interval); |