diff options
author | Lu Huafeng <Huafeng.Lv@Sun.COM> | 2008-12-17 12:37:29 +0800 |
---|---|---|
committer | Lu Huafeng <Huafeng.Lv@Sun.COM> | 2008-12-17 12:37:29 +0800 |
commit | 8e4b770f2d2fd1b39d446d80e94ec2b1f9703543 (patch) | |
tree | 05f098b0bbf8ec7a62eeba98e6056de7638eb500 /usr/src | |
parent | 7d586c73618cfc2856b4ccf6d9aa584131fd17e1 (diff) | |
download | illumos-gate-8e4b770f2d2fd1b39d446d80e94ec2b1f9703543.tar.gz |
PSARC 2008/688 Sun Cluster TCP/IP Hooks Update
6717519 Support sun cluster with client-side shared address
6777262 modify Cluster hook signatures to add netstackid and allow future changes
Diffstat (limited to 'usr/src')
-rw-r--r-- | usr/src/uts/common/inet/ip/ip.c | 32 | ||||
-rw-r--r-- | usr/src/uts/common/inet/ip/ipclassifier.c | 24 | ||||
-rw-r--r-- | usr/src/uts/common/inet/ip/ipsecah.c | 7 | ||||
-rw-r--r-- | usr/src/uts/common/inet/ip/ipsecesp.c | 7 | ||||
-rw-r--r-- | usr/src/uts/common/inet/ip/sadb.c | 49 | ||||
-rw-r--r-- | usr/src/uts/common/inet/tcp.h | 9 | ||||
-rw-r--r-- | usr/src/uts/common/inet/tcp/tcp.c | 135 | ||||
-rw-r--r-- | usr/src/uts/common/inet/udp/udp.c | 185 | ||||
-rw-r--r-- | usr/src/uts/common/inet/udp_impl.h | 1 | ||||
-rw-r--r-- | usr/src/uts/intel/ip/ip.global-objs.debug64 | 2 | ||||
-rw-r--r-- | usr/src/uts/intel/ip/ip.global-objs.obj64 | 2 | ||||
-rw-r--r-- | usr/src/uts/sparc/ip/ip.global-objs.debug64 | 2 | ||||
-rw-r--r-- | usr/src/uts/sparc/ip/ip.global-objs.obj64 | 2 |
13 files changed, 326 insertions, 131 deletions
diff --git a/usr/src/uts/common/inet/ip/ip.c b/usr/src/uts/common/inet/ip/ip.c index 3141cd914e..03dc414233 100644 --- a/usr/src/uts/common/inet/ip/ip.c +++ b/usr/src/uts/common/inet/ip/ip.c @@ -187,38 +187,40 @@ typedef struct iproutedata_s { * in the cluster * */ -int (*cl_inet_isclusterwide)(uint8_t protocol, - sa_family_t addr_family, uint8_t *laddrp) = NULL; +int (*cl_inet_isclusterwide)(netstackid_t stack_id, uint8_t protocol, + sa_family_t addr_family, uint8_t *laddrp, void *args) = NULL; /* * Hook function to generate cluster wide ip fragment identifier */ -uint32_t (*cl_inet_ipident)(uint8_t protocol, sa_family_t addr_family, - uint8_t *laddrp, uint8_t *faddrp) = NULL; +uint32_t (*cl_inet_ipident)(netstackid_t stack_id, uint8_t protocol, + sa_family_t addr_family, uint8_t *laddrp, uint8_t *faddrp, + void *args) = NULL; /* * Hook function to generate cluster wide SPI. */ -void (*cl_inet_getspi)(uint8_t, uint8_t *, size_t) = NULL; +void (*cl_inet_getspi)(netstackid_t, uint8_t, uint8_t *, size_t, + void *) = NULL; /* * Hook function to verify if the SPI is already utlized. */ -int (*cl_inet_checkspi)(uint8_t, uint32_t) = NULL; +int (*cl_inet_checkspi)(netstackid_t, uint8_t, uint32_t, void *) = NULL; /* * Hook function to delete the SPI from the cluster wide repository. */ -void (*cl_inet_deletespi)(uint8_t, uint32_t) = NULL; +void (*cl_inet_deletespi)(netstackid_t, uint8_t, uint32_t, void *) = NULL; /* * Hook function to inform the cluster when packet received on an IDLE SA */ -void (*cl_inet_idlesa)(uint8_t, uint32_t, sa_family_t, in6_addr_t, - in6_addr_t) = NULL; +void (*cl_inet_idlesa)(netstackid_t, uint8_t, uint32_t, sa_family_t, + in6_addr_t, in6_addr_t, void *) = NULL; /* * Synchronization notes: @@ -22707,11 +22709,13 @@ another:; clusterwide = 0; if (cl_inet_ipident) { ASSERT(cl_inet_isclusterwide); - if ((*cl_inet_isclusterwide)(IPPROTO_IP, - AF_INET, (uint8_t *)(uintptr_t)src)) { - ipha->ipha_ident = (*cl_inet_ipident)(IPPROTO_IP, - AF_INET, (uint8_t *)(uintptr_t)src, - (uint8_t *)(uintptr_t)dst); + netstackid_t stack_id = ipst->ips_netstack->netstack_stackid; + + if ((*cl_inet_isclusterwide)(stack_id, IPPROTO_IP, + AF_INET, (uint8_t *)(uintptr_t)src, NULL)) { + ipha->ipha_ident = (*cl_inet_ipident)(stack_id, + IPPROTO_IP, AF_INET, (uint8_t *)(uintptr_t)src, + (uint8_t *)(uintptr_t)dst, NULL); clusterwide = 1; } } diff --git a/usr/src/uts/common/inet/ip/ipclassifier.c b/usr/src/uts/common/inet/ip/ipclassifier.c index 50bd38c981..fccb85f8f0 100644 --- a/usr/src/uts/common/inet/ip/ipclassifier.c +++ b/usr/src/uts/common/inet/ip/ipclassifier.c @@ -358,6 +358,15 @@ static void ip_helper_stream_destructor(void *, void *); boolean_t ip_use_helper_cache = B_TRUE; +/* + * Hook functions to enable cluster networking + * On non-clustered systems these vectors must always be NULL. + */ +extern void (*cl_inet_listen)(netstackid_t, uint8_t, sa_family_t, + uint8_t *, in_port_t, void *); +extern void (*cl_inet_unlisten)(netstackid_t, uint8_t, sa_family_t, + uint8_t *, in_port_t, void *); + #ifdef IPCL_DEBUG #define INET_NTOA_BUFSIZE 18 @@ -810,8 +819,8 @@ ipcl_conn_unlisten(conn_t *connp) addr_family = AF_INET; laddrp = (uint8_t *)&connp->conn_bound_source; } - (*cl_inet_unlisten)(IPPROTO_TCP, addr_family, laddrp, - connp->conn_lport); + (*cl_inet_unlisten)(connp->conn_netstack->netstack_stackid, + IPPROTO_TCP, addr_family, laddrp, connp->conn_lport, NULL); } connp->conn_flags &= ~IPCL_CL_LISTENER; } @@ -1190,8 +1199,10 @@ ipcl_bind_insert(conn_t *connp, uint8_t protocol, ipaddr_t src, uint16_t lport) if (cl_inet_listen != NULL) { ASSERT(!connp->conn_pkt_isv6); connp->conn_flags |= IPCL_CL_LISTENER; - (*cl_inet_listen)(IPPROTO_TCP, AF_INET, - (uint8_t *)&connp->conn_bound_source, lport); + (*cl_inet_listen)( + connp->conn_netstack->netstack_stackid, + IPPROTO_TCP, AF_INET, + (uint8_t *)&connp->conn_bound_source, lport, NULL); } break; @@ -1271,8 +1282,9 @@ ipcl_bind_insert_v6(conn_t *connp, uint8_t protocol, const in6_addr_t *src, laddrp = (uint8_t *)&connp->conn_bound_source; } connp->conn_flags |= IPCL_CL_LISTENER; - (*cl_inet_listen)(IPPROTO_TCP, addr_family, laddrp, - lport); + (*cl_inet_listen)( + connp->conn_netstack->netstack_stackid, + IPPROTO_TCP, addr_family, laddrp, lport, NULL); } break; diff --git a/usr/src/uts/common/inet/ip/ipsecah.c b/usr/src/uts/common/inet/ip/ipsecah.c index cf016d9ce7..93a90848d0 100644 --- a/usr/src/uts/common/inet/ip/ipsecah.c +++ b/usr/src/uts/common/inet/ip/ipsecah.c @@ -151,7 +151,8 @@ static boolean_t ah_register_out(uint32_t, uint32_t, uint_t, ipsecah_stack_t *); static void *ipsecah_stack_init(netstackid_t stackid, netstack_t *ns); static void ipsecah_stack_fini(netstackid_t stackid, void *arg); -extern void (*cl_inet_getspi)(uint8_t, uint8_t *, size_t); +extern void (*cl_inet_getspi)(netstackid_t, uint8_t, uint8_t *, size_t, + void *); /* Setable in /etc/system */ uint32_t ah_hash_size = IPSEC_DEFAULT_HASH_SIZE; @@ -1959,8 +1960,8 @@ ah_getspi(mblk_t *mp, keysock_in_t *ksi, ipsecah_stack_t *ahstack) * Randomly generate a proposed SPI value. */ if (cl_inet_getspi != NULL) { - cl_inet_getspi(IPPROTO_AH, (uint8_t *)&newspi, - sizeof (uint32_t)); + cl_inet_getspi(ahstack->ipsecah_netstack->netstack_stackid, + IPPROTO_AH, (uint8_t *)&newspi, sizeof (uint32_t), NULL); } else { (void) random_get_pseudo_bytes((uint8_t *)&newspi, sizeof (uint32_t)); diff --git a/usr/src/uts/common/inet/ip/ipsecesp.c b/usr/src/uts/common/inet/ip/ipsecesp.c index d3690f129e..e30dfca9fe 100644 --- a/usr/src/uts/common/inet/ip/ipsecesp.c +++ b/usr/src/uts/common/inet/ip/ipsecesp.c @@ -144,7 +144,8 @@ static boolean_t esp_strip_header(mblk_t *, boolean_t, uint32_t, static ipsec_status_t esp_submit_req_inbound(mblk_t *, ipsa_t *, uint_t); static ipsec_status_t esp_submit_req_outbound(mblk_t *, ipsa_t *, uchar_t *, uint_t); -extern void (*cl_inet_getspi)(uint8_t, uint8_t *, size_t); +extern void (*cl_inet_getspi)(netstackid_t, uint8_t, uint8_t *, size_t, + void *); /* Setable in /etc/system */ uint32_t esp_hash_size = IPSEC_DEFAULT_HASH_SIZE; @@ -1487,8 +1488,8 @@ esp_getspi(mblk_t *mp, keysock_in_t *ksi, ipsecesp_stack_t *espstack) * Randomly generate a proposed SPI value */ if (cl_inet_getspi != NULL) { - cl_inet_getspi(IPPROTO_ESP, (uint8_t *)&newspi, - sizeof (uint32_t)); + cl_inet_getspi(espstack->ipsecesp_netstack->netstack_stackid, + IPPROTO_ESP, (uint8_t *)&newspi, sizeof (uint32_t), NULL); } else { (void) random_get_pseudo_bytes((uint8_t *)&newspi, sizeof (uint32_t)); diff --git a/usr/src/uts/common/inet/ip/sadb.c b/usr/src/uts/common/inet/ip/sadb.c index 0bfb60dac4..ba6d5c5a3d 100644 --- a/usr/src/uts/common/inet/ip/sadb.c +++ b/usr/src/uts/common/inet/ip/sadb.c @@ -85,9 +85,12 @@ static void lifetime_fuzz(ipsa_t *); static void age_pair_peer_list(templist_t *, sadb_t *, boolean_t); static void ipsa_set_replay(ipsa_t *ipsa, uint32_t offset); -extern void (*cl_inet_getspi)(uint8_t protocol, uint8_t *ptr, size_t len); -extern int (*cl_inet_checkspi)(uint8_t protocol, uint32_t spi); -extern void (*cl_inet_deletespi)(uint8_t protocol, uint32_t spi); +extern void (*cl_inet_getspi)(netstackid_t stack_id, uint8_t protocol, + uint8_t *ptr, size_t len, void *args); +extern int (*cl_inet_checkspi)(netstackid_t stack_id, uint8_t protocol, + uint32_t spi, void *args); +extern void (*cl_inet_deletespi)(netstackid_t stack_id, uint8_t protocol, + uint32_t spi, void *args); /* * ipsacq_maxpackets is defined here to make it tunable @@ -335,7 +338,8 @@ sadb_delete_cluster(ipsa_t *assoc) (assoc->ipsa_state == IPSA_STATE_MATURE))) { protocol = (assoc->ipsa_type == SADB_SATYPE_AH) ? IPPROTO_AH : IPPROTO_ESP; - cl_inet_deletespi(protocol, assoc->ipsa_spi); + cl_inet_deletespi(assoc->ipsa_netstack->netstack_stackid, + protocol, assoc->ipsa_spi, NULL); } } @@ -1026,24 +1030,25 @@ sadb_destroyer(isaf_t **tablep, uint_t numentries, boolean_t forever, int i; isaf_t *table = *tablep; uint8_t protocol; + ipsa_t *sa; + netstackid_t sid; if (table == NULL) return; for (i = 0; i < numentries; i++) { mutex_enter(&table[i].isaf_lock); - while (table[i].isaf_ipsa != NULL) { + while ((sa = table[i].isaf_ipsa) != NULL) { if (inbound && cl_inet_deletespi && - (table[i].isaf_ipsa->ipsa_state != - IPSA_STATE_ACTIVE_ELSEWHERE) && - (table[i].isaf_ipsa->ipsa_state != - IPSA_STATE_IDLE)) { - protocol = (table[i].isaf_ipsa->ipsa_type == - SADB_SATYPE_AH) ? IPPROTO_AH : IPPROTO_ESP; - cl_inet_deletespi(protocol, - table[i].isaf_ipsa->ipsa_spi); + (sa->ipsa_state != IPSA_STATE_ACTIVE_ELSEWHERE) && + (sa->ipsa_state != IPSA_STATE_IDLE)) { + protocol = (sa->ipsa_type == SADB_SATYPE_AH) ? + IPPROTO_AH : IPPROTO_ESP; + sid = sa->ipsa_netstack->netstack_stackid; + cl_inet_deletespi(sid, protocol, sa->ipsa_spi, + NULL); } - sadb_unlinkassoc(table[i].isaf_ipsa); + sadb_unlinkassoc(sa); } table[i].isaf_gen++; mutex_exit(&table[i].isaf_lock); @@ -3255,7 +3260,8 @@ sadb_common_add(queue_t *ip_q, queue_t *pfkey_q, mblk_t *mp, sadb_msg_t *samsg, if (!isupdate && (clone == B_TRUE || is_inbound == B_TRUE) && cl_inet_checkspi && (assoc->sadb_sa_state != SADB_X_SASTATE_ACTIVE_ELSEWHERE)) { - rcode = cl_inet_checkspi(protocol, assoc->sadb_sa_spi); + rcode = cl_inet_checkspi(ns->netstack_stackid, protocol, + assoc->sadb_sa_spi, NULL); if (rcode == -1) { return (EEXIST); } @@ -5869,7 +5875,8 @@ sadb_getspi(keysock_in_t *ksi, uint32_t master_spi, int *diagnostic, if (master_spi < min || master_spi > max) { /* Return a random value in the range. */ if (cl_inet_getspi) { - cl_inet_getspi(protocol, (uint8_t *)&add, sizeof (add)); + cl_inet_getspi(ns->netstack_stackid, protocol, + (uint8_t *)&add, sizeof (add), NULL); } else { (void) random_get_pseudo_bytes((uint8_t *)&add, sizeof (add)); @@ -6896,8 +6903,8 @@ void sadb_buf_pkt(ipsa_t *ipsa, mblk_t *bpkt, netstack_t *ns) { ipsec_stack_t *ipss = ns->netstack_ipsec; - extern void (*cl_inet_idlesa)(uint8_t, uint32_t, sa_family_t, - in6_addr_t, in6_addr_t); + extern void (*cl_inet_idlesa)(netstackid_t, uint8_t, uint32_t, + sa_family_t, in6_addr_t, in6_addr_t, void *); in6_addr_t *srcaddr = (in6_addr_t *)(&ipsa->ipsa_srcaddr); in6_addr_t *dstaddr = (in6_addr_t *)(&ipsa->ipsa_dstaddr); @@ -6910,9 +6917,9 @@ sadb_buf_pkt(ipsa_t *ipsa, mblk_t *bpkt, netstack_t *ns) return; } - cl_inet_idlesa((ipsa->ipsa_type == SADB_SATYPE_AH) ? - IPPROTO_AH : IPPROTO_ESP, ipsa->ipsa_spi, ipsa->ipsa_addrfam, - *srcaddr, *dstaddr); + cl_inet_idlesa(ns->netstack_stackid, + (ipsa->ipsa_type == SADB_SATYPE_AH) ? IPPROTO_AH : IPPROTO_ESP, + ipsa->ipsa_spi, ipsa->ipsa_addrfam, *srcaddr, *dstaddr, NULL); mutex_enter(&ipsa->ipsa_lock); ipsa->ipsa_mblkcnt++; diff --git a/usr/src/uts/common/inet/tcp.h b/usr/src/uts/common/inet/tcp.h index 76d1864d62..1911216dd5 100644 --- a/usr/src/uts/common/inet/tcp.h +++ b/usr/src/uts/common/inet/tcp.h @@ -719,15 +719,6 @@ typedef struct cl_tcp_info_s { } cl_tcp_info_t; /* - * Hook functions to enable cluster networking - * On non-clustered systems these vectors must always be NULL. - */ - -extern void (*cl_inet_listen)(uint8_t, sa_family_t, uint8_t *, in_port_t); -extern void (*cl_inet_unlisten)(uint8_t, sa_family_t, uint8_t *, - in_port_t); - -/* * Contracted Consolidation Private ioctl for aborting TCP connections. * In order to keep the offsets and size of the structure the same between * a 32-bit application and a 64-bit amd64 kernel, we use a #pragma diff --git a/usr/src/uts/common/inet/tcp/tcp.c b/usr/src/uts/common/inet/tcp/tcp.c index ce7d9fb395..4ac44ec799 100644 --- a/usr/src/uts/common/inet/tcp/tcp.c +++ b/usr/src/uts/common/inet/tcp/tcp.c @@ -1358,53 +1358,72 @@ uint32_t tcp_drop_ack_unsent_cnt = 10; * On non-clustered systems these vectors must always be NULL. */ -void (*cl_inet_listen)(uint8_t protocol, sa_family_t addr_family, - uint8_t *laddrp, in_port_t lport) = NULL; -void (*cl_inet_unlisten)(uint8_t protocol, sa_family_t addr_family, - uint8_t *laddrp, in_port_t lport) = NULL; -void (*cl_inet_connect)(uint8_t protocol, sa_family_t addr_family, +void (*cl_inet_listen)(netstackid_t stack_id, uint8_t protocol, + sa_family_t addr_family, uint8_t *laddrp, + in_port_t lport, void *args) = NULL; +void (*cl_inet_unlisten)(netstackid_t stack_id, uint8_t protocol, + sa_family_t addr_family, uint8_t *laddrp, + in_port_t lport, void *args) = NULL; + +int (*cl_inet_connect2)(netstackid_t stack_id, uint8_t protocol, + boolean_t is_outgoing, + sa_family_t addr_family, uint8_t *laddrp, in_port_t lport, - uint8_t *faddrp, in_port_t fport) = NULL; -void (*cl_inet_disconnect)(uint8_t protocol, sa_family_t addr_family, - uint8_t *laddrp, in_port_t lport, - uint8_t *faddrp, in_port_t fport) = NULL; + uint8_t *faddrp, in_port_t fport, + void *args) = NULL; + +void (*cl_inet_disconnect)(netstackid_t stack_id, uint8_t protocol, + sa_family_t addr_family, uint8_t *laddrp, + in_port_t lport, uint8_t *faddrp, + in_port_t fport, void *args) = NULL; /* * The following are defined in ip.c */ -extern int (*cl_inet_isclusterwide)(uint8_t protocol, sa_family_t addr_family, - uint8_t *laddrp); -extern uint32_t (*cl_inet_ipident)(uint8_t protocol, sa_family_t addr_family, - uint8_t *laddrp, uint8_t *faddrp); +extern int (*cl_inet_isclusterwide)(netstackid_t stack_id, uint8_t protocol, + sa_family_t addr_family, uint8_t *laddrp, + void *args); +extern uint32_t (*cl_inet_ipident)(netstackid_t stack_id, uint8_t protocol, + sa_family_t addr_family, uint8_t *laddrp, + uint8_t *faddrp, void *args); + -#define CL_INET_CONNECT(tcp) { \ - if (cl_inet_connect != NULL) { \ +/* + * int CL_INET_CONNECT(conn_t *cp, tcp_t *tcp, boolean_t is_outgoing, int err) + */ +#define CL_INET_CONNECT(connp, tcp, is_outgoing, err) { \ + (err) = 0; \ + if (cl_inet_connect2 != NULL) { \ /* \ * Running in cluster mode - register active connection \ * information \ */ \ if ((tcp)->tcp_ipversion == IPV4_VERSION) { \ if ((tcp)->tcp_ipha->ipha_src != 0) { \ - (*cl_inet_connect)(IPPROTO_TCP, AF_INET,\ + (err) = (*cl_inet_connect2)( \ + (connp)->conn_netstack->netstack_stackid,\ + IPPROTO_TCP, is_outgoing, AF_INET, \ (uint8_t *)(&((tcp)->tcp_ipha->ipha_src)),\ (in_port_t)(tcp)->tcp_lport, \ (uint8_t *)(&((tcp)->tcp_ipha->ipha_dst)),\ - (in_port_t)(tcp)->tcp_fport); \ + (in_port_t)(tcp)->tcp_fport, NULL); \ } \ } else { \ if (!IN6_IS_ADDR_UNSPECIFIED( \ - &(tcp)->tcp_ip6h->ip6_src)) {\ - (*cl_inet_connect)(IPPROTO_TCP, AF_INET6,\ + &(tcp)->tcp_ip6h->ip6_src)) { \ + (err) = (*cl_inet_connect2)( \ + (connp)->conn_netstack->netstack_stackid,\ + IPPROTO_TCP, is_outgoing, AF_INET6, \ (uint8_t *)(&((tcp)->tcp_ip6h->ip6_src)),\ (in_port_t)(tcp)->tcp_lport, \ (uint8_t *)(&((tcp)->tcp_ip6h->ip6_dst)),\ - (in_port_t)(tcp)->tcp_fport); \ + (in_port_t)(tcp)->tcp_fport, NULL); \ } \ } \ } \ } -#define CL_INET_DISCONNECT(tcp) { \ +#define CL_INET_DISCONNECT(connp, tcp) { \ if (cl_inet_disconnect != NULL) { \ /* \ * Running in cluster mode - deregister active \ @@ -1412,23 +1431,24 @@ extern uint32_t (*cl_inet_ipident)(uint8_t protocol, sa_family_t addr_family, */ \ if ((tcp)->tcp_ipversion == IPV4_VERSION) { \ if ((tcp)->tcp_ip_src != 0) { \ - (*cl_inet_disconnect)(IPPROTO_TCP, \ - AF_INET, \ - (uint8_t *)(&((tcp)->tcp_ip_src)),\ + (*cl_inet_disconnect)( \ + (connp)->conn_netstack->netstack_stackid,\ + IPPROTO_TCP, AF_INET, \ + (uint8_t *)(&((tcp)->tcp_ip_src)), \ (in_port_t)(tcp)->tcp_lport, \ - (uint8_t *) \ - (&((tcp)->tcp_ipha->ipha_dst)),\ - (in_port_t)(tcp)->tcp_fport); \ + (uint8_t *)(&((tcp)->tcp_ipha->ipha_dst)),\ + (in_port_t)(tcp)->tcp_fport, NULL); \ } \ } else { \ if (!IN6_IS_ADDR_UNSPECIFIED( \ &(tcp)->tcp_ip_src_v6)) { \ - (*cl_inet_disconnect)(IPPROTO_TCP, AF_INET6,\ + (*cl_inet_disconnect)( \ + (connp)->conn_netstack->netstack_stackid,\ + IPPROTO_TCP, AF_INET6, \ (uint8_t *)(&((tcp)->tcp_ip_src_v6)),\ (in_port_t)(tcp)->tcp_lport, \ - (uint8_t *) \ - (&((tcp)->tcp_ip6h->ip6_dst)),\ - (in_port_t)(tcp)->tcp_fport); \ + (uint8_t *)(&((tcp)->tcp_ip6h->ip6_dst)),\ + (in_port_t)(tcp)->tcp_fport, NULL); \ } \ } \ } \ @@ -1439,7 +1459,8 @@ extern uint32_t (*cl_inet_ipident)(uint8_t protocol, sa_family_t addr_family, * This routine is used to extract the current list of live connections * which must continue to to be dispatched to this node. */ -int cl_tcp_walk_list(int (*callback)(cl_tcp_info_t *, void *), void *arg); +int cl_tcp_walk_list(netstackid_t stack_id, + int (*callback)(cl_tcp_info_t *, void *), void *arg); static int cl_tcp_walk_list_stack(int (*callback)(cl_tcp_info_t *, void *), void *arg, tcp_stack_t *tcps); @@ -4235,7 +4256,7 @@ tcp_closei_local(tcp_t *tcp) */ if (tcp->tcp_state == TCPS_TIME_WAIT) (void) tcp_time_wait_remove(tcp, NULL); - CL_INET_DISCONNECT(tcp); + CL_INET_DISCONNECT(connp, tcp); ipcl_hash_remove(connp); /* @@ -5537,7 +5558,11 @@ tcp_conn_request(void *arg, mblk_t *mp, void *arg2) tcp_bind_hash_insert(&tcps->tcps_bind_fanout[ TCP_BIND_HASH(eager->tcp_lport)], eager, 0); - CL_INET_CONNECT(eager); + CL_INET_CONNECT(connp, eager, B_FALSE, err); + if (err != 0) { + tcp_bind_hash_remove(eager); + goto error3; + } /* * No need to check for multicast destination since ip will only pass @@ -7463,7 +7488,7 @@ tcp_reinit(tcp_t *tcp) */ tcp_close_mpp(&tcp->tcp_conn.tcp_eager_conn_ind); - CL_INET_DISCONNECT(tcp); + CL_INET_DISCONNECT(tcp->tcp_connp, tcp); /* * The connection can't be on the tcp_time_wait_head list @@ -20071,6 +20096,7 @@ tcp_multisend(queue_t *q, tcp_t *tcp, const int mss, const int tcp_hdr_len, int usable_mmd, tail_unsent_mmd; uint_t snxt_mmd, obsegs_mmd, obbytes_mmd; mblk_t *xmit_tail_mmd; + netstackid_t stack_id; #ifdef _BIG_ENDIAN #define IPVER(ip6h) ((((uint32_t *)ip6h)[0] >> 28) & 0x7) @@ -20113,6 +20139,8 @@ tcp_multisend(queue_t *q, tcp_t *tcp, const int mss, const int tcp_hdr_len, ASSERT(CONN_IS_LSO_MD_FASTPATH(connp)); ASSERT(!CONN_IPSEC_OUT_ENCAPSULATED(connp)); + stack_id = connp->conn_netstack->netstack_stackid; + usable_mmd = tail_unsent_mmd = 0; snxt_mmd = obsegs_mmd = obbytes_mmd = 0; xmit_tail_mmd = NULL; @@ -20877,14 +20905,15 @@ legacy_send_no_md: clusterwide = B_FALSE; if (cl_inet_ipident != NULL) { ASSERT(cl_inet_isclusterwide != NULL); - if ((*cl_inet_isclusterwide)(IPPROTO_IP, - AF_INET, - (uint8_t *)(uintptr_t)src)) { + if ((*cl_inet_isclusterwide)(stack_id, + IPPROTO_IP, AF_INET, + (uint8_t *)(uintptr_t)src, NULL)) { ipha->ipha_ident = - (*cl_inet_ipident) - (IPPROTO_IP, AF_INET, + (*cl_inet_ipident)(stack_id, + IPPROTO_IP, AF_INET, (uint8_t *)(uintptr_t)src, - (uint8_t *)(uintptr_t)dst); + (uint8_t *)(uintptr_t)dst, + NULL); clusterwide = B_TRUE; } } @@ -25057,17 +25086,29 @@ tcp_iss_init(tcp_t *tcp) * gather a list of connections that need to be forwarded to * specific nodes in the cluster when configuration changes occur. * - * The callback is invoked for each tcp_t structure. Returning + * The callback is invoked for each tcp_t structure from all netstacks, + * if 'stack_id' is less than 0. Otherwise, only for tcp_t structures + * from the netstack with the specified stack_id. Returning * non-zero from the callback routine terminates the search. */ int -cl_tcp_walk_list(int (*cl_callback)(cl_tcp_info_t *, void *), - void *arg) +cl_tcp_walk_list(netstackid_t stack_id, + int (*cl_callback)(cl_tcp_info_t *, void *), void *arg) { netstack_handle_t nh; netstack_t *ns; int ret = 0; + if (stack_id >= 0) { + if ((ns = netstack_find_by_stackid(stack_id)) == NULL) + return (EINVAL); + + ret = cl_tcp_walk_list_stack(cl_callback, arg, + ns->netstack_tcp); + netstack_rele(ns); + return (ret); + } + netstack_next_init(&nh); while ((ns = netstack_next(&nh)) != NULL) { ret = cl_tcp_walk_list_stack(cl_callback, arg, @@ -26572,7 +26613,11 @@ tcp_post_ip_bind(tcp_t *tcp, mblk_t *mp, int error) if (tcp->tcp_hard_binding) { tcp->tcp_hard_binding = B_FALSE; tcp->tcp_hard_bound = B_TRUE; - CL_INET_CONNECT(tcp); + CL_INET_CONNECT(tcp->tcp_connp, tcp, B_TRUE, retval); + if (retval != 0) { + error = EADDRINUSE; + goto bind_failed; + } } else { if (ire_mp != NULL) freeb(ire_mp); diff --git a/usr/src/uts/common/inet/udp/udp.c b/usr/src/uts/common/inet/udp/udp.c index 5f819f1285..07ec613ef6 100644 --- a/usr/src/uts/common/inet/udp/udp.c +++ b/usr/src/uts/common/inet/udp/udp.c @@ -248,6 +248,52 @@ static int udp_post_ip_bind_connect(udp_t *, mblk_t *, int); #define UDP_XMIT_HIWATER (56 * 1024) #define UDP_XMIT_LOWATER 1024 +/* + * The following is defined in tcp.c + */ +extern int (*cl_inet_connect2)(netstackid_t stack_id, + uint8_t protocol, boolean_t is_outgoing, + sa_family_t addr_family, + uint8_t *laddrp, in_port_t lport, + uint8_t *faddrp, in_port_t fport, void *args); + +/* + * Checks if the given destination addr/port is allowed out. + * If allowed, registers the (dest_addr/port, node_ID) mapping at Cluster. + * Called for each connect() and for sendto()/sendmsg() to a different + * destination. + * For connect(), called in udp_connect(). + * For sendto()/sendmsg(), called in udp_output_v{4,6}(). + * + * This macro assumes that the cl_inet_connect2 hook is not NULL. + * Please check this before calling this macro. + * + * void + * CL_INET_UDP_CONNECT(conn_t cp, udp_t *udp, boolean_t is_outgoing, + * in6_addr_t *faddrp, in_port_t (or uint16_t) fport, int err); + */ +#define CL_INET_UDP_CONNECT(cp, udp, is_outgoing, faddrp, fport, err) { \ + (err) = 0; \ + /* \ + * Running in cluster mode - check and register active \ + * "connection" information \ + */ \ + if ((udp)->udp_ipversion == IPV4_VERSION) \ + (err) = (*cl_inet_connect2)( \ + (cp)->conn_netstack->netstack_stackid, \ + IPPROTO_UDP, is_outgoing, AF_INET, \ + (uint8_t *)&((udp)->udp_v6src._S6_un._S6_u32[3]), \ + (udp)->udp_port, \ + (uint8_t *)&((faddrp)->_S6_un._S6_u32[3]), \ + (in_port_t)(fport), NULL); \ + else \ + (err) = (*cl_inet_connect2)( \ + (cp)->conn_netstack->netstack_stackid, \ + IPPROTO_UDP, is_outgoing, AF_INET6, \ + (uint8_t *)&((udp)->udp_v6src), (udp)->udp_port, \ + (uint8_t *)(faddrp), (in_port_t)(fport), NULL); \ +} + static struct module_info udp_mod_info = { UDP_MOD_ID, UDP_MOD_NAME, 1, INFPSZ, UDP_RECV_HIWATER, UDP_RECV_LOWATER }; @@ -371,10 +417,12 @@ uint32_t udp_random_anon_port = 1; * On non-clustered systems these vectors must always be NULL */ -void (*cl_inet_bind)(uchar_t protocol, sa_family_t addr_family, - uint8_t *laddrp, in_port_t lport) = NULL; -void (*cl_inet_unbind)(uint8_t protocol, sa_family_t addr_family, - uint8_t *laddrp, in_port_t lport) = NULL; +void (*cl_inet_bind)(netstackid_t stack_id, uchar_t protocol, + sa_family_t addr_family, uint8_t *laddrp, in_port_t lport, + void *args) = NULL; +void (*cl_inet_unbind)(netstackid_t stack_id, uint8_t protocol, + sa_family_t addr_family, uint8_t *laddrp, in_port_t lport, + void *args) = NULL; typedef union T_primitives *t_primp_t; @@ -841,13 +889,17 @@ udp_quiesce_conn(conn_t *connp) * Running in cluster mode - register unbind information */ if (udp->udp_ipversion == IPV4_VERSION) { - (*cl_inet_unbind)(IPPROTO_UDP, AF_INET, + (*cl_inet_unbind)( + connp->conn_netstack->netstack_stackid, + IPPROTO_UDP, AF_INET, (uint8_t *)(&(V4_PART_OF_V6(udp->udp_v6src))), - (in_port_t)udp->udp_port); + (in_port_t)udp->udp_port, NULL); } else { - (*cl_inet_unbind)(IPPROTO_UDP, AF_INET6, + (*cl_inet_unbind)( + connp->conn_netstack->netstack_stackid, + IPPROTO_UDP, AF_INET6, (uint8_t *)(&(udp->udp_v6src)), - (in_port_t)udp->udp_port); + (in_port_t)udp->udp_port, NULL); } } @@ -4916,7 +4968,8 @@ retry: } static int -udp_update_label(queue_t *wq, mblk_t *mp, ipaddr_t dst) +udp_update_label(queue_t *wq, mblk_t *mp, ipaddr_t dst, + boolean_t *update_lastdst) { int err; uchar_t opt_storage[IP_MAX_OPT_LENGTH]; @@ -4937,7 +4990,7 @@ udp_update_label(queue_t *wq, mblk_t *mp, ipaddr_t dst) char *, "queue(1) failed to update options(2) on mp(3)", queue_t *, wq, char *, opt_storage, mblk_t *, mp); } else { - IN6_IPADDR_TO_V4MAPPED(dst, &udp->udp_v6lastdst); + *update_lastdst = B_TRUE; } return (err); } @@ -4967,7 +5020,8 @@ udp_output_v4(conn_t *connp, mblk_t *mp, ipaddr_t v4dst, uint16_t port, ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; queue_t *q = connp->conn_wq; ire_t *ire; - + in6_addr_t v6dst; + boolean_t update_lastdst = B_FALSE; *error = 0; pktinfop->ip4_ill_index = 0; @@ -5033,6 +5087,34 @@ udp_output_v4(conn_t *connp, mblk_t *mp, ipaddr_t v4dst, uint16_t port, rw_enter(&udp->udp_rwlock, RW_READER); lock_held = B_TRUE; + + /* + * Cluster and TSOL note: + * udp.udp_v6lastdst is shared by Cluster and TSOL + * udp.udp_lastdstport is used by Cluster + * + * Both Cluster and TSOL need to update the dest addr and/or port. + * Updating is done after both Cluster and TSOL checks, protected + * by conn_lock. + */ + mutex_enter(&connp->conn_lock); + + if (cl_inet_connect2 != NULL && + (!IN6_IS_ADDR_V4MAPPED(&udp->udp_v6lastdst) || + V4_PART_OF_V6(udp->udp_v6lastdst) != v4dst || + udp->udp_lastdstport != port)) { + mutex_exit(&connp->conn_lock); + *error = 0; + IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); + CL_INET_UDP_CONNECT(connp, udp, B_TRUE, &v6dst, port, *error); + if (*error != 0) { + *error = EHOSTUNREACH; + goto done; + } + update_lastdst = B_TRUE; + mutex_enter(&connp->conn_lock); + } + /* * Check if our saved options are valid; update if not. * TSOL Note: Since we are not in WRITER mode, UDP packets @@ -5043,7 +5125,6 @@ udp_output_v4(conn_t *connp, mblk_t *mp, ipaddr_t v4dst, uint16_t port, * and ip_snd_options_len are consistent for the current * destination and are updated atomically. */ - mutex_enter(&connp->conn_lock); if (is_system_labeled()) { /* Using UDP MLP requires SCM_UCRED from user */ if (connp->conn_mlp_type != mlptSingle && @@ -5064,11 +5145,16 @@ udp_output_v4(conn_t *connp, mblk_t *mp, ipaddr_t v4dst, uint16_t port, if ((!IN6_IS_ADDR_V4MAPPED(&udp->udp_v6lastdst) || V4_PART_OF_V6(udp->udp_v6lastdst) != v4dst || connp->conn_mlp_type != mlptSingle) && - (*error = udp_update_label(q, mp, v4dst)) != 0) { + (*error = udp_update_label(q, mp, v4dst, &update_lastdst)) + != 0) { mutex_exit(&connp->conn_lock); goto done; } } + if (update_lastdst) { + IN6_IPADDR_TO_V4MAPPED(v4dst, &udp->udp_v6lastdst); + udp->udp_lastdstport = port; + } if (udp->udp_ip_snd_options_len > 0) { ip_snd_opt_len = udp->udp_ip_snd_options_len; bcopy(udp->udp_ip_snd_options, ip_snd_opt, ip_snd_opt_len); @@ -5600,7 +5686,8 @@ udp_xmit(queue_t *q, mblk_t *mp, ire_t *ire, conn_t *connp, zoneid_t zoneid) } static boolean_t -udp_update_label_v6(queue_t *wq, mblk_t *mp, in6_addr_t *dst) +udp_update_label_v6(queue_t *wq, mblk_t *mp, in6_addr_t *dst, + boolean_t *update_lastdst) { udp_t *udp = Q_TO_UDP(wq); int err; @@ -5620,7 +5707,7 @@ udp_update_label_v6(queue_t *wq, mblk_t *mp, in6_addr_t *dst) char *, "queue(1) failed to update options(2) on mp(3)", queue_t *, wq, char *, opt_storage, mblk_t *, mp); } else { - udp->udp_v6lastdst = *dst; + *update_lastdst = B_TRUE; } return (err); } @@ -5940,6 +6027,7 @@ udp_output_v6(conn_t *connp, mblk_t *mp, sin6_t *sin6, int *error, uint8_t *cp; uint8_t *nxthdr_ptr; in6_addr_t ip6_dst; + in_port_t port; udpattrs_t attrs; boolean_t opt_present; ip6_hbh_t *hopoptsptr = NULL; @@ -5947,6 +6035,7 @@ udp_output_v6(conn_t *connp, mblk_t *mp, sin6_t *sin6, int *error, boolean_t is_ancillary = B_FALSE; size_t sth_wroff = 0; ire_t *ire; + boolean_t update_lastdst = B_FALSE; *error = 0; @@ -6038,6 +6127,29 @@ udp_output_v6(conn_t *connp, mblk_t *mp, sin6_t *sin6, int *error, if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) ip6_dst = ipv6_loopback; + port = sin6->sin6_port; + + /* + * Cluster and TSOL notes, Cluster check: + * see comments in udp_output_v4(). + */ + mutex_enter(&connp->conn_lock); + + if (cl_inet_connect2 != NULL && + (!IN6_ARE_ADDR_EQUAL(&ip6_dst, &udp->udp_v6lastdst) || + port != udp->udp_lastdstport)) { + mutex_exit(&connp->conn_lock); + *error = 0; + CL_INET_UDP_CONNECT(connp, udp, B_TRUE, &ip6_dst, port, *error); + if (*error != 0) { + *error = EHOSTUNREACH; + rw_exit(&udp->udp_rwlock); + goto done; + } + update_lastdst = B_TRUE; + mutex_enter(&connp->conn_lock); + } + /* * If we're not going to the same destination as last time, then * recompute the label required. This is done in a separate routine to @@ -6051,7 +6163,6 @@ udp_output_v6(conn_t *connp, mblk_t *mp, sin6_t *sin6, int *error, * and sticky ipp_hopoptslen are consistent for the current * destination and are updated atomically. */ - mutex_enter(&connp->conn_lock); if (is_system_labeled()) { /* Using UDP MLP requires SCM_UCRED from user */ if (connp->conn_mlp_type != mlptSingle && @@ -6073,13 +6184,19 @@ udp_output_v6(conn_t *connp, mblk_t *mp, sin6_t *sin6, int *error, if ((opt_present || !IN6_ARE_ADDR_EQUAL(&udp->udp_v6lastdst, &ip6_dst) || connp->conn_mlp_type != mlptSingle) && - (*error = udp_update_label_v6(q, mp, &ip6_dst)) != 0) { + (*error = udp_update_label_v6(q, mp, &ip6_dst, + &update_lastdst)) != 0) { rw_exit(&udp->udp_rwlock); mutex_exit(&connp->conn_lock); goto done; } } + if (update_lastdst) { + udp->udp_v6lastdst = ip6_dst; + udp->udp_lastdstport = port; + } + /* * If there's a security label here, then we ignore any options the * user may try to set. We keep the peer's label as a hidden sticky @@ -8263,15 +8380,16 @@ udp_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr, * Running in cluster mode - register bind information */ if (udp->udp_ipversion == IPV4_VERSION) { - (*cl_inet_bind)(IPPROTO_UDP, AF_INET, + (*cl_inet_bind)(connp->conn_netstack->netstack_stackid, + IPPROTO_UDP, AF_INET, (uint8_t *)(&V4_PART_OF_V6(udp->udp_v6src)), - (in_port_t)udp->udp_port); + (in_port_t)udp->udp_port, NULL); } else { - (*cl_inet_bind)(IPPROTO_UDP, AF_INET6, + (*cl_inet_bind)(connp->conn_netstack->netstack_stackid, + IPPROTO_UDP, AF_INET6, (uint8_t *)&(udp->udp_v6src), - (in_port_t)udp->udp_port); + (in_port_t)udp->udp_port, NULL); } - } connp->conn_anon_port = (is_system_labeled() && requested_port == 0); @@ -8446,13 +8564,17 @@ udp_do_unbind(conn_t *connp) * Running in cluster mode - register unbind information */ if (udp->udp_ipversion == IPV4_VERSION) { - (*cl_inet_unbind)(IPPROTO_UDP, AF_INET, + (*cl_inet_unbind)( + connp->conn_netstack->netstack_stackid, + IPPROTO_UDP, AF_INET, (uint8_t *)(&V4_PART_OF_V6(udp->udp_v6src)), - (in_port_t)udp->udp_port); + (in_port_t)udp->udp_port, NULL); } else { - (*cl_inet_unbind)(IPPROTO_UDP, AF_INET6, + (*cl_inet_unbind)( + connp->conn_netstack->netstack_stackid, + IPPROTO_UDP, AF_INET6, (uint8_t *)&(udp->udp_v6src), - (in_port_t)udp->udp_port); + (in_port_t)udp->udp_port, NULL); } } @@ -8744,6 +8866,17 @@ udp_do_connect(conn_t *connp, const struct sockaddr *sa, socklen_t len) rw_exit(&udp->udp_rwlock); return (-TBADADDR); } + + if (cl_inet_connect2 != NULL) { + CL_INET_UDP_CONNECT(connp, udp, B_TRUE, &v6dst, dstport, error); + if (error != 0) { + mutex_exit(&udpf->uf_lock); + udp->udp_pending_op = -1; + rw_exit(&udp->udp_rwlock); + return (-TBADADDR); + } + } + udp->udp_state = TS_DATA_XFER; mutex_exit(&udpf->uf_lock); diff --git a/usr/src/uts/common/inet/udp_impl.h b/usr/src/uts/common/inet/udp_impl.h index 38d255ac9d..34b38e67bd 100644 --- a/usr/src/uts/common/inet/udp_impl.h +++ b/usr/src/uts/common/inet/udp_impl.h @@ -326,6 +326,7 @@ typedef struct udp_s { uint_t udp_label_len; /* length of security label */ uint_t udp_label_len_v6; /* len of v6 security label */ in6_addr_t udp_v6lastdst; /* most recent destination */ + in_port_t udp_lastdstport; /* most recent dest port */ uint64_t udp_open_time; /* time when this was opened */ pid_t udp_open_pid; /* process id when this was opened */ diff --git a/usr/src/uts/intel/ip/ip.global-objs.debug64 b/usr/src/uts/intel/ip/ip.global-objs.debug64 index 2e501f8abc..3972f1b4ec 100644 --- a/usr/src/uts/intel/ip/ip.global-objs.debug64 +++ b/usr/src/uts/intel/ip/ip.global-objs.debug64 @@ -26,7 +26,7 @@ IP_MAJ cb_inet_devops cl_inet_bind -cl_inet_connect +cl_inet_connect2 cl_inet_disconnect cl_inet_ipident cl_inet_isclusterwide diff --git a/usr/src/uts/intel/ip/ip.global-objs.obj64 b/usr/src/uts/intel/ip/ip.global-objs.obj64 index b773f8a5e0..f6a97be29b 100644 --- a/usr/src/uts/intel/ip/ip.global-objs.obj64 +++ b/usr/src/uts/intel/ip/ip.global-objs.obj64 @@ -26,7 +26,7 @@ IP_MAJ cb_inet_devops cl_inet_bind -cl_inet_connect +cl_inet_connect2 cl_inet_disconnect cl_inet_ipident cl_inet_isclusterwide diff --git a/usr/src/uts/sparc/ip/ip.global-objs.debug64 b/usr/src/uts/sparc/ip/ip.global-objs.debug64 index fabffbc5f5..279bd92d0b 100644 --- a/usr/src/uts/sparc/ip/ip.global-objs.debug64 +++ b/usr/src/uts/sparc/ip/ip.global-objs.debug64 @@ -26,7 +26,7 @@ IP_MAJ cb_inet_devops cl_inet_bind -cl_inet_connect +cl_inet_connect2 cl_inet_disconnect cl_inet_ipident cl_inet_isclusterwide diff --git a/usr/src/uts/sparc/ip/ip.global-objs.obj64 b/usr/src/uts/sparc/ip/ip.global-objs.obj64 index c7fb907f8c..4f4bc3e376 100644 --- a/usr/src/uts/sparc/ip/ip.global-objs.obj64 +++ b/usr/src/uts/sparc/ip/ip.global-objs.obj64 @@ -26,7 +26,7 @@ IP_MAJ cb_inet_devops cl_inet_bind -cl_inet_connect +cl_inet_connect2 cl_inet_disconnect cl_inet_ipident cl_inet_isclusterwide |