diff options
Diffstat (limited to 'usr/src/uts/common/inet/udp/udp.c')
-rw-r--r-- | usr/src/uts/common/inet/udp/udp.c | 7703 |
1 files changed, 2698 insertions, 5005 deletions
diff --git a/usr/src/uts/common/inet/udp/udp.c b/usr/src/uts/common/inet/udp/udp.c index d0bab511b0..e18fc57f40 100644 --- a/usr/src/uts/common/inet/udp/udp.c +++ b/usr/src/uts/common/inet/udp/udp.c @@ -26,12 +26,9 @@ #include <sys/types.h> #include <sys/stream.h> -#include <sys/dlpi.h> -#include <sys/pattr.h> #include <sys/stropts.h> #include <sys/strlog.h> #include <sys/strsun.h> -#include <sys/time.h> #define _SUN_TPI_VERSION 2 #include <sys/tihdr.h> #include <sys/timod.h> @@ -41,7 +38,9 @@ #include <sys/suntpi.h> #include <sys/xti_inet.h> #include <sys/kmem.h> +#include <sys/cred_impl.h> #include <sys/policy.h> +#include <sys/priv.h> #include <sys/ucred.h> #include <sys/zone.h> @@ -57,12 +56,11 @@ #include <netinet/ip6.h> #include <netinet/icmp6.h> #include <netinet/udp.h> -#include <net/if.h> -#include <net/route.h> #include <inet/common.h> #include <inet/ip.h> #include <inet/ip_impl.h> +#include <inet/ipsec_impl.h> #include <inet/ip6.h> #include <inet/ip_ire.h> #include <inet/ip_if.h> @@ -74,34 +72,25 @@ #include <inet/optcom.h> #include <inet/snmpcom.h> #include <inet/kstatcom.h> -#include <inet/udp_impl.h> #include <inet/ipclassifier.h> -#include <inet/ipsec_impl.h> -#include <inet/ipp_common.h> #include <sys/squeue_impl.h> #include <inet/ipnet.h> #include <sys/ethernet.h> -/* - * The ipsec_info.h header file is here since it has the definition for the - * M_CTL message types used by IP to convey information to the ULP. The - * ipsec_info.h needs the pfkeyv2.h, hence the latter's presence. - */ -#include <net/pfkeyv2.h> -#include <inet/ipsec_info.h> - #include <sys/tsol/label.h> #include <sys/tsol/tnet.h> #include <rpc/pmap_prot.h> +#include <inet/udp_impl.h> + /* * Synchronization notes: * * UDP is MT and uses the usual kernel synchronization primitives. There are 2 - * locks, the fanout lock (uf_lock) and the udp endpoint lock udp_rwlock. - * We also use conn_lock when updating things that affect the IP classifier - * lookup. - * The lock order is udp_rwlock -> uf_lock and is udp_rwlock -> conn_lock. + * locks, the fanout lock (uf_lock) and conn_lock. conn_lock + * protects the contents of the udp_t. uf_lock protects the address and the + * fanout information. + * The lock order is conn_lock -> uf_lock. * * The fanout lock uf_lock: * When a UDP endpoint is bound to a local port, it is inserted into @@ -114,11 +103,6 @@ * from the bind hash list only when it is being unbound or being closed. * The per bucket lock also protects a UDP endpoint's state changes. * - * The udp_rwlock: - * This protects most of the other fields in the udp_t. The exact list of - * fields which are protected by each of the above locks is documented in - * the udp_t structure definition. - * * Plumbing notes: * UDP is always a device driver. For compatibility with mibopen() code * it is possible to I_PUSH "udp", but that results in pushing a passthrough @@ -133,41 +117,32 @@ /* For /etc/system control */ uint_t udp_bind_fanout_size = UDP_BIND_FANOUT_SIZE; -/* Option processing attrs */ -typedef struct udpattrs_s { - union { - ip6_pkt_t *udpattr_ipp6; /* For V6 */ - ip4_pkt_t *udpattr_ipp4; /* For V4 */ - } udpattr_ippu; -#define udpattr_ipp6 udpattr_ippu.udpattr_ipp6 -#define udpattr_ipp4 udpattr_ippu.udpattr_ipp4 - mblk_t *udpattr_mb; - boolean_t udpattr_credset; -} udpattrs_t; - static void udp_addr_req(queue_t *q, mblk_t *mp); static void udp_tpi_bind(queue_t *q, mblk_t *mp); static void udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp); static void udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock); -static int udp_build_hdrs(udp_t *udp); +static int udp_build_hdr_template(conn_t *, const in6_addr_t *, + const in6_addr_t *, in_port_t, uint32_t); static void udp_capability_req(queue_t *q, mblk_t *mp); static int udp_tpi_close(queue_t *q, int flags); +static void udp_close_free(conn_t *); static void udp_tpi_connect(queue_t *q, mblk_t *mp); static void udp_tpi_disconnect(queue_t *q, mblk_t *mp); static void udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, - int sys_error); -static void udp_err_ack_prim(queue_t *q, mblk_t *mp, int primitive, - t_scalar_t tlierr, int unixerr); + int sys_error); +static void udp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive, + t_scalar_t tlierr, int sys_error); static int udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr); static int udp_extra_priv_ports_add(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr); static int udp_extra_priv_ports_del(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr); -static void udp_icmp_error(conn_t *, mblk_t *); -static void udp_icmp_error_ipv6(conn_t *, mblk_t *); +static void udp_icmp_input(void *, mblk_t *, void *, ip_recv_attr_t *); +static void udp_icmp_error_ipv6(conn_t *connp, mblk_t *mp, + ip_recv_attr_t *ira); static void udp_info_req(queue_t *q, mblk_t *mp); -static void udp_input(void *, mblk_t *, void *); +static void udp_input(void *, mblk_t *, void *, ip_recv_attr_t *); static void udp_lrput(queue_t *, mblk_t *); static void udp_lwput(queue_t *, mblk_t *); static int udp_open(queue_t *q, dev_t *devp, int flag, int sflag, @@ -176,24 +151,34 @@ static int udp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp); static int udp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp); -static int udp_unitdata_opt_process(queue_t *q, mblk_t *mp, - int *errorp, udpattrs_t *udpattrs); static boolean_t udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name); +int udp_opt_set(conn_t *connp, uint_t optset_context, + int level, int name, uint_t inlen, + uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, + void *thisdg_attrs, cred_t *cr); +int udp_opt_get(conn_t *connp, int level, int name, + uchar_t *ptr); +static int udp_output_connected(conn_t *connp, mblk_t *mp, cred_t *cr, + pid_t pid); +static int udp_output_lastdst(conn_t *connp, mblk_t *mp, cred_t *cr, + pid_t pid, ip_xmit_attr_t *ixa); +static int udp_output_newdst(conn_t *connp, mblk_t *data_mp, sin_t *sin, + sin6_t *sin6, ushort_t ipversion, cred_t *cr, pid_t, + ip_xmit_attr_t *ixa); static int udp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr); static boolean_t udp_param_register(IDP *ndp, udpparam_t *udppa, int cnt); static int udp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr); -static void udp_send_data(udp_t *udp, queue_t *q, mblk_t *mp, - ipha_t *ipha); -static void udp_ud_err(queue_t *q, mblk_t *mp, uchar_t *destaddr, - t_scalar_t destlen, t_scalar_t err); +static mblk_t *udp_prepend_hdr(conn_t *, ip_xmit_attr_t *, const ip_pkt_t *, + const in6_addr_t *, const in6_addr_t *, in_port_t, uint32_t, mblk_t *, + int *); +static mblk_t *udp_prepend_header_template(conn_t *, ip_xmit_attr_t *, + mblk_t *, const in6_addr_t *, in_port_t, uint32_t, int *); +static void udp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err); +static void udp_ud_err_connected(conn_t *, t_scalar_t); static void udp_tpi_unbind(queue_t *q, mblk_t *mp); static in_port_t udp_update_next_port(udp_t *udp, in_port_t port, boolean_t random); -static mblk_t *udp_output_v4(conn_t *, mblk_t *, ipaddr_t, uint16_t, uint_t, - int *, boolean_t, struct nmsghdr *, cred_t *, pid_t); -static mblk_t *udp_output_v6(conn_t *connp, mblk_t *mp, sin6_t *sin6, - int *error, struct nmsghdr *msg, cred_t *cr, pid_t pid); static void udp_wput_other(queue_t *q, mblk_t *mp); static void udp_wput_iocdata(queue_t *q, mblk_t *mp); static void udp_wput_fallback(queue_t *q, mblk_t *mp); @@ -208,11 +193,9 @@ static void *udp_kstat2_init(netstackid_t, udp_stat_t *); static void udp_kstat2_fini(netstackid_t, kstat_t *); static int udp_kstat_update(kstat_t *kp, int rw); -static void udp_xmit(queue_t *, mblk_t *, ire_t *ire, conn_t *, zoneid_t); -static int udp_send_connected(conn_t *, mblk_t *, struct nmsghdr *, - cred_t *, pid_t); -static void udp_ulp_recv(conn_t *, mblk_t *); +/* Common routines for TPI and socket module */ +static void udp_ulp_recv(conn_t *, mblk_t *, uint_t, ip_recv_attr_t *); /* Common routine for TPI and socket module */ static conn_t *udp_do_open(cred_t *, boolean_t, int); @@ -220,30 +203,20 @@ static void udp_do_close(conn_t *); static int udp_do_bind(conn_t *, struct sockaddr *, socklen_t, cred_t *, boolean_t); static int udp_do_unbind(conn_t *); -static int udp_do_getsockname(udp_t *, struct sockaddr *, uint_t *); -static int udp_do_getpeername(udp_t *, struct sockaddr *, uint_t *); int udp_getsockname(sock_lower_handle_t, struct sockaddr *, socklen_t *, cred_t *); int udp_getpeername(sock_lower_handle_t, struct sockaddr *, socklen_t *, cred_t *); static int udp_do_connect(conn_t *, const struct sockaddr *, socklen_t, - cred_t *cr); -static int udp_post_ip_bind_connect(udp_t *, mblk_t *, int); + cred_t *, pid_t); #define UDP_RECV_HIWATER (56 * 1024) #define UDP_RECV_LOWATER 128 #define UDP_XMIT_HIWATER (56 * 1024) #define UDP_XMIT_LOWATER 1024 -/* - * The following is defined in tcp.c - */ -extern int (*cl_inet_connect2)(netstackid_t stack_id, - uint8_t protocol, boolean_t is_outgoing, - sa_family_t addr_family, - uint8_t *laddrp, in_port_t lport, - uint8_t *faddrp, in_port_t fport, void *args); +#pragma inline(udp_output_connected, udp_output_newdst, udp_output_lastdst) /* * Checks if the given destination addr/port is allowed out. @@ -251,7 +224,7 @@ extern int (*cl_inet_connect2)(netstackid_t stack_id, * Called for each connect() and for sendto()/sendmsg() to a different * destination. * For connect(), called in udp_connect(). - * For sendto()/sendmsg(), called in udp_output_v{4,6}(). + * For sendto()/sendmsg(), called in udp_output_newdst(). * * This macro assumes that the cl_inet_connect2 hook is not NULL. * Please check this before calling this macro. @@ -260,25 +233,26 @@ extern int (*cl_inet_connect2)(netstackid_t stack_id, * CL_INET_UDP_CONNECT(conn_t cp, udp_t *udp, boolean_t is_outgoing, * in6_addr_t *faddrp, in_port_t (or uint16_t) fport, int err); */ -#define CL_INET_UDP_CONNECT(cp, udp, is_outgoing, faddrp, fport, err) { \ +#define CL_INET_UDP_CONNECT(cp, is_outgoing, faddrp, fport, err) { \ (err) = 0; \ /* \ * Running in cluster mode - check and register active \ * "connection" information \ */ \ - if ((udp)->udp_ipversion == IPV4_VERSION) \ + if ((cp)->conn_ipversion == IPV4_VERSION) \ (err) = (*cl_inet_connect2)( \ (cp)->conn_netstack->netstack_stackid, \ IPPROTO_UDP, is_outgoing, AF_INET, \ - (uint8_t *)&((udp)->udp_v6src._S6_un._S6_u32[3]), \ - (udp)->udp_port, \ - (uint8_t *)&((faddrp)->_S6_un._S6_u32[3]), \ + (uint8_t *)&((cp)->conn_laddr_v4), \ + (cp)->conn_lport, \ + (uint8_t *)&(V4_PART_OF_V6(*faddrp)), \ (in_port_t)(fport), NULL); \ else \ (err) = (*cl_inet_connect2)( \ (cp)->conn_netstack->netstack_stackid, \ IPPROTO_UDP, is_outgoing, AF_INET6, \ - (uint8_t *)&((udp)->udp_v6src), (udp)->udp_port, \ + (uint8_t *)&((cp)->conn_laddr_v6), \ + (cp)->conn_lport, \ (uint8_t *)(faddrp), (in_port_t)(fport), NULL); \ } @@ -387,6 +361,8 @@ udpparam_t udp_param_arr[] = { { 0, (1<<30), UDP_XMIT_LOWATER, "udp_xmit_lowat"}, { UDP_RECV_LOWATER, (1<<30), UDP_RECV_HIWATER, "udp_recv_hiwat"}, { 65536, (1<<30), 2*1024*1024, "udp_max_buf"}, + { 0, 1, 0, "udp_pmtu_discovery" }, + { 0, 1, 0, "udp_sendto_ignerr" }, }; /* END CSTYLED */ @@ -451,9 +427,10 @@ retry: static void udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock) { - udp_t *udpnext; - kmutex_t *lockp; - udp_stack_t *us = udp->udp_us; + udp_t *udpnext; + kmutex_t *lockp; + udp_stack_t *us = udp->udp_us; + conn_t *connp = udp->udp_connp; if (udp->udp_ptpbhn == NULL) return; @@ -462,9 +439,9 @@ udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock) * Extract the lock pointer in case there are concurrent * hash_remove's for this instance. */ - ASSERT(udp->udp_port != 0); + ASSERT(connp->conn_lport != 0); if (!caller_holds_lock) { - lockp = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, + lockp = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport, us->us_bind_fanout_size)].uf_lock; ASSERT(lockp != NULL); mutex_enter(lockp); @@ -486,8 +463,10 @@ udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock) static void udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp) { + conn_t *connp = udp->udp_connp; udp_t **udpp; udp_t *udpnext; + conn_t *connext; ASSERT(MUTEX_HELD(&uf->uf_lock)); ASSERT(udp->udp_ptpbhn == NULL); @@ -503,11 +482,11 @@ udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp) * specific address get preference over those binding to * INADDR_ANY. */ - if (V6_OR_V4_INADDR_ANY(udp->udp_bound_v6src) && - !V6_OR_V4_INADDR_ANY(udpnext->udp_bound_v6src)) { + connext = udpnext->udp_connp; + if (V6_OR_V4_INADDR_ANY(connp->conn_bound_addr_v6) && + !V6_OR_V4_INADDR_ANY(connext->conn_bound_addr_v6)) { while ((udpnext = udpp[0]) != NULL && - !V6_OR_V4_INADDR_ANY( - udpnext->udp_bound_v6src)) { + !V6_OR_V4_INADDR_ANY(connext->conn_bound_addr_v6)) { udpp = &(udpnext->udp_bind_hash); } if (udpnext != NULL) @@ -525,10 +504,9 @@ udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp) * This routine is called to handle each O_T_BIND_REQ/T_BIND_REQ message * passed to udp_wput. * It associates a port number and local address with the stream. - * The O_T_BIND_REQ/T_BIND_REQ is passed downstream to ip with the UDP - * protocol type (IPPROTO_UDP) placed in the message following the address. - * A T_BIND_ACK message is passed upstream when ip acknowledges the request. - * (Called as writer.) + * It calls IP to verify the local IP address, and calls IP to insert + * the conn_t in the fanout table. + * If everything is ok it then sends the T_BIND_ACK back up. * * Note that UDP over IPv4 and IPv6 sockets can use the same port number * without setting SO_REUSEADDR. This is needed so that they @@ -580,10 +558,10 @@ udp_tpi_bind(queue_t *q, mblk_t *mp) } /* * Reallocate the message to make sure we have enough room for an - * address and the protocol type. + * address. */ - mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t) + 1, 1); - if (!mp1) { + mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t), 1); + if (mp1 == NULL) { udp_err_ack(q, mp, TSYSERR, ENOMEM); return; } @@ -597,7 +575,7 @@ udp_tpi_bind(queue_t *q, mblk_t *mp) switch (tbr->ADDR_length) { case 0: /* Request for a generic port */ tbr->ADDR_offset = sizeof (struct T_bind_req); - if (udp->udp_family == AF_INET) { + if (connp->conn_family == AF_INET) { tbr->ADDR_length = sizeof (sin_t); sin = (sin_t *)&tbr[1]; *sin = sin_null; @@ -605,7 +583,7 @@ udp_tpi_bind(queue_t *q, mblk_t *mp) mp->b_wptr = (uchar_t *)&sin[1]; sa = (struct sockaddr *)sin; } else { - ASSERT(udp->udp_family == AF_INET6); + ASSERT(connp->conn_family == AF_INET6); tbr->ADDR_length = sizeof (sin6_t); sin6 = (sin6_t *)&tbr[1]; *sin6 = sin6_null; @@ -622,7 +600,7 @@ udp_tpi_bind(queue_t *q, mblk_t *mp) udp_err_ack(q, mp, TSYSERR, EINVAL); return; } - if (udp->udp_family != AF_INET || + if (connp->conn_family != AF_INET || sa->sa_family != AF_INET) { udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); return; @@ -636,7 +614,7 @@ udp_tpi_bind(queue_t *q, mblk_t *mp) udp_err_ack(q, mp, TSYSERR, EINVAL); return; } - if (udp->udp_family != AF_INET6 || + if (connp->conn_family != AF_INET6 || sa->sa_family != AF_INET6) { udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); return; @@ -669,29 +647,21 @@ udp_tpi_bind(queue_t *q, mblk_t *mp) * This routine handles each T_CONN_REQ message passed to udp. It * associates a default destination address with the stream. * - * This routine sends down a T_BIND_REQ to IP with the following mblks: - * T_BIND_REQ - specifying local and remote address/port - * IRE_DB_REQ_TYPE - to get an IRE back containing ire_type and src - * T_OK_ACK - for the T_CONN_REQ - * T_CONN_CON - to keep the TPI user happy - * - * The connect completes in udp_do_connect. - * When a T_BIND_ACK is received information is extracted from the IRE - * and the two appended messages are sent to the TPI user. - * Should udp_bind_result receive T_ERROR_ACK for the T_BIND_REQ it will - * convert it to an error ack for the appropriate primitive. + * After various error checks are completed, udp_connect() lays + * the target address and port into the composite header template. + * Then we ask IP for information, including a source address if we didn't + * already have one. Finally we send up the T_OK_ACK reply message. */ static void udp_tpi_connect(queue_t *q, mblk_t *mp) { - udp_t *udp; conn_t *connp = Q_TO_CONN(q); int error; socklen_t len; struct sockaddr *sa; struct T_conn_req *tcr; cred_t *cr; - + pid_t pid; /* * All Solaris components should pass a db_credp * for this TPI message, hence we ASSERT. @@ -699,14 +669,13 @@ udp_tpi_connect(queue_t *q, mblk_t *mp) * like a TPI message sent by some other kernel * component, we check and return an error. */ - cr = msg_getcred(mp, NULL); + cr = msg_getcred(mp, &pid); ASSERT(cr != NULL); if (cr == NULL) { udp_err_ack(q, mp, TSYSERR, EINVAL); return; } - udp = connp->conn_udp; tcr = (struct T_conn_req *)mp->b_rptr; /* A bit of sanity checking */ @@ -724,7 +693,7 @@ udp_tpi_connect(queue_t *q, mblk_t *mp) * Determine packet type based on type of address passed in * the request should contain an IPv4 or IPv6 address. * Make sure that address family matches the type of - * family of the the address passed down + * family of the address passed down. */ len = tcr->DEST_length; switch (tcr->DEST_length) { @@ -743,13 +712,13 @@ udp_tpi_connect(queue_t *q, mblk_t *mp) break; } - error = proto_verify_ip_addr(udp->udp_family, sa, len); + error = proto_verify_ip_addr(connp->conn_family, sa, len); if (error != 0) { udp_err_ack(q, mp, TSYSERR, error); return; } - error = udp_do_connect(connp, sa, len, cr); + error = udp_do_connect(connp, sa, len, cr, pid); if (error != 0) { if (error < 0) udp_err_ack(q, mp, -error, 0); @@ -761,7 +730,7 @@ udp_tpi_connect(queue_t *q, mblk_t *mp) * We have to send a connection confirmation to * keep TLI happy. */ - if (udp->udp_family == AF_INET) { + if (connp->conn_family == AF_INET) { mp1 = mi_tpi_conn_con(NULL, (char *)sa, sizeof (sin_t), NULL, 0); } else { @@ -810,72 +779,14 @@ done: return (0); } -/* - * Called in the close path to quiesce the conn - */ -void -udp_quiesce_conn(conn_t *connp) -{ - udp_t *udp = connp->conn_udp; - - if (cl_inet_unbind != NULL && udp->udp_state == TS_IDLE) { - /* - * Running in cluster mode - register unbind information - */ - if (udp->udp_ipversion == IPV4_VERSION) { - (*cl_inet_unbind)( - connp->conn_netstack->netstack_stackid, - IPPROTO_UDP, AF_INET, - (uint8_t *)(&(V4_PART_OF_V6(udp->udp_v6src))), - (in_port_t)udp->udp_port, NULL); - } else { - (*cl_inet_unbind)( - connp->conn_netstack->netstack_stackid, - IPPROTO_UDP, AF_INET6, - (uint8_t *)(&(udp->udp_v6src)), - (in_port_t)udp->udp_port, NULL); - } - } - - udp_bind_hash_remove(udp, B_FALSE); - -} - -void +static void udp_close_free(conn_t *connp) { udp_t *udp = connp->conn_udp; /* If there are any options associated with the stream, free them. */ - if (udp->udp_ip_snd_options != NULL) { - mi_free((char *)udp->udp_ip_snd_options); - udp->udp_ip_snd_options = NULL; - udp->udp_ip_snd_options_len = 0; - } - - if (udp->udp_ip_rcv_options != NULL) { - mi_free((char *)udp->udp_ip_rcv_options); - udp->udp_ip_rcv_options = NULL; - udp->udp_ip_rcv_options_len = 0; - } - - /* Free memory associated with sticky options */ - if (udp->udp_sticky_hdrs_len != 0) { - kmem_free(udp->udp_sticky_hdrs, - udp->udp_sticky_hdrs_len); - udp->udp_sticky_hdrs = NULL; - udp->udp_sticky_hdrs_len = 0; - } - if (udp->udp_last_cred != NULL) { - crfree(udp->udp_last_cred); - udp->udp_last_cred = NULL; - } - if (udp->udp_effective_cred != NULL) { - crfree(udp->udp_effective_cred); - udp->udp_effective_cred = NULL; - } - - ip6_pkt_free(&udp->udp_sticky_ipp); + if (udp->udp_recv_ipp.ipp_fields != 0) + ip_pkt_free(&udp->udp_recv_ipp); /* * Clear any fields which the kmem_cache constructor clears. @@ -892,59 +803,48 @@ static int udp_do_disconnect(conn_t *connp) { udp_t *udp; - mblk_t *ire_mp; udp_fanout_t *udpf; udp_stack_t *us; int error; udp = connp->conn_udp; us = udp->udp_us; - rw_enter(&udp->udp_rwlock, RW_WRITER); - if (udp->udp_state != TS_DATA_XFER || udp->udp_pending_op != -1) { - rw_exit(&udp->udp_rwlock); + mutex_enter(&connp->conn_lock); + if (udp->udp_state != TS_DATA_XFER) { + mutex_exit(&connp->conn_lock); return (-TOUTSTATE); } - udp->udp_pending_op = T_DISCON_REQ; - udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, + udpf = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport, us->us_bind_fanout_size)]; mutex_enter(&udpf->uf_lock); - udp->udp_v6src = udp->udp_bound_v6src; + if (connp->conn_mcbc_bind) + connp->conn_saddr_v6 = ipv6_all_zeros; + else + connp->conn_saddr_v6 = connp->conn_bound_addr_v6; + connp->conn_laddr_v6 = connp->conn_bound_addr_v6; + connp->conn_faddr_v6 = ipv6_all_zeros; + connp->conn_fport = 0; udp->udp_state = TS_IDLE; mutex_exit(&udpf->uf_lock); - if (udp->udp_family == AF_INET6) { - /* Rebuild the header template */ - error = udp_build_hdrs(udp); - if (error != 0) { - udp->udp_pending_op = -1; - rw_exit(&udp->udp_rwlock); - return (error); - } - } + /* Remove any remnants of mapped address binding */ + if (connp->conn_family == AF_INET6) + connp->conn_ipversion = IPV6_VERSION; - ire_mp = allocb(sizeof (ire_t), BPRI_HI); - if (ire_mp == NULL) { - mutex_enter(&udpf->uf_lock); - udp->udp_pending_op = -1; - mutex_exit(&udpf->uf_lock); - rw_exit(&udp->udp_rwlock); - return (ENOMEM); - } - - rw_exit(&udp->udp_rwlock); - - if (udp->udp_family == AF_INET6) { - error = ip_proto_bind_laddr_v6(connp, &ire_mp, IPPROTO_UDP, - &udp->udp_bound_v6src, udp->udp_port, B_TRUE); - } else { - error = ip_proto_bind_laddr_v4(connp, &ire_mp, IPPROTO_UDP, - V4_PART_OF_V6(udp->udp_bound_v6src), udp->udp_port, B_TRUE); - } + connp->conn_v6lastdst = ipv6_all_zeros; + error = udp_build_hdr_template(connp, &connp->conn_saddr_v6, + &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo); + mutex_exit(&connp->conn_lock); + if (error != 0) + return (error); - return (udp_post_ip_bind_connect(udp, ire_mp, error)); + /* + * Tell IP to remove the full binding and revert + * to the local address binding. + */ + return (ip_laddr_fanout_insert(connp)); } - static void udp_tpi_disconnect(queue_t *q, mblk_t *mp) { @@ -981,12 +881,9 @@ int udp_disconnect(conn_t *connp) { int error; - udp_t *udp = connp->conn_udp; - - udp->udp_dgram_errind = B_FALSE; + connp->conn_dgram_errind = B_FALSE; error = udp_do_disconnect(connp); - if (error < 0) error = proto_tlitosyserr(-error); @@ -1003,8 +900,8 @@ udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error) /* Shorthand to generate and send TPI error acks to our client */ static void -udp_err_ack_prim(queue_t *q, mblk_t *mp, int primitive, t_scalar_t t_error, - int sys_error) +udp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive, + t_scalar_t t_error, int sys_error) { struct T_error_ack *teackp; @@ -1018,7 +915,7 @@ udp_err_ack_prim(queue_t *q, mblk_t *mp, int primitive, t_scalar_t t_error, } } -/*ARGSUSED*/ +/*ARGSUSED2*/ static int udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) { @@ -1033,7 +930,7 @@ udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) return (0); } -/* ARGSUSED */ +/* ARGSUSED1 */ static int udp_extra_priv_ports_add(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr) @@ -1072,7 +969,7 @@ udp_extra_priv_ports_add(queue_t *q, mblk_t *mp, char *value, caddr_t cp, return (0); } -/* ARGSUSED */ +/* ARGSUSED1 */ static int udp_extra_priv_ports_del(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr) @@ -1109,39 +1006,41 @@ udp_extra_priv_ports_del(queue_t *q, mblk_t *mp, char *value, caddr_t cp, #define ICMP_MIN_UDP_HDR 4 /* - * udp_icmp_error is called by udp_input to process ICMP msgs. passed up by IP. + * udp_icmp_input is called as conn_recvicmp to process ICMP messages. * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors. * Assumes that IP has pulled up everything up to and including the ICMP header. */ +/* ARGSUSED2 */ static void -udp_icmp_error(conn_t *connp, mblk_t *mp) +udp_icmp_input(void *arg1, mblk_t *mp, void *arg2, ip_recv_attr_t *ira) { - icmph_t *icmph; - ipha_t *ipha; - int iph_hdr_length; - udpha_t *udpha; - sin_t sin; - sin6_t sin6; - mblk_t *mp1; - int error = 0; - udp_t *udp = connp->conn_udp; + conn_t *connp = (conn_t *)arg1; + icmph_t *icmph; + ipha_t *ipha; + int iph_hdr_length; + udpha_t *udpha; + sin_t sin; + sin6_t sin6; + mblk_t *mp1; + int error = 0; + udp_t *udp = connp->conn_udp; - mp1 = NULL; ipha = (ipha_t *)mp->b_rptr; ASSERT(OK_32PTR(mp->b_rptr)); if (IPH_HDR_VERSION(ipha) != IPV4_VERSION) { ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION); - udp_icmp_error_ipv6(connp, mp); + udp_icmp_error_ipv6(connp, mp, ira); return; } ASSERT(IPH_HDR_VERSION(ipha) == IPV4_VERSION); /* Skip past the outer IP and ICMP headers */ - iph_hdr_length = IPH_HDR_LENGTH(ipha); + ASSERT(IPH_HDR_LENGTH(ipha) == ira->ira_ip_hdr_length); + iph_hdr_length = ira->ira_ip_hdr_length; icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length]; - ipha = (ipha_t *)&icmph[1]; + ipha = (ipha_t *)&icmph[1]; /* Inner IP header */ /* Skip past the inner IP and find the ULP header */ iph_hdr_length = IPH_HDR_LENGTH(ipha); @@ -1150,11 +1049,41 @@ udp_icmp_error(conn_t *connp, mblk_t *mp) switch (icmph->icmph_type) { case ICMP_DEST_UNREACHABLE: switch (icmph->icmph_code) { - case ICMP_FRAGMENTATION_NEEDED: + case ICMP_FRAGMENTATION_NEEDED: { + ipha_t *ipha; + ip_xmit_attr_t *ixa; /* * IP has already adjusted the path MTU. + * But we need to adjust DF for IPv4. */ + if (connp->conn_ipversion != IPV4_VERSION) + break; + + ixa = conn_get_ixa(connp, B_FALSE); + if (ixa == NULL || ixa->ixa_ire == NULL) { + /* + * Some other thread holds conn_ixa. We will + * redo this on the next ICMP too big. + */ + if (ixa != NULL) + ixa_refrele(ixa); + break; + } + (void) ip_get_pmtu(ixa); + + mutex_enter(&connp->conn_lock); + ipha = (ipha_t *)connp->conn_ht_iphc; + if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF) { + ipha->ipha_fragment_offset_and_flags |= + IPH_DF_HTONS; + } else { + ipha->ipha_fragment_offset_and_flags &= + ~IPH_DF_HTONS; + } + mutex_exit(&connp->conn_lock); + ixa_refrele(ixa); break; + } case ICMP_PORT_UNREACHABLE: case ICMP_PROTOCOL_UNREACHABLE: error = ECONNREFUSED; @@ -1177,25 +1106,24 @@ udp_icmp_error(conn_t *connp, mblk_t *mp) * Deliver T_UDERROR_IND when the application has asked for it. * The socket layer enables this automatically when connected. */ - if (!udp->udp_dgram_errind) { + if (!connp->conn_dgram_errind) { freemsg(mp); return; } - - switch (udp->udp_family) { + switch (connp->conn_family) { case AF_INET: sin = sin_null; sin.sin_family = AF_INET; sin.sin_addr.s_addr = ipha->ipha_dst; sin.sin_port = udpha->uha_dst_port; if (IPCL_IS_NONSTR(connp)) { - rw_enter(&udp->udp_rwlock, RW_WRITER); + mutex_enter(&connp->conn_lock); if (udp->udp_state == TS_DATA_XFER) { - if (sin.sin_port == udp->udp_dstport && + if (sin.sin_port == connp->conn_fport && sin.sin_addr.s_addr == - V4_PART_OF_V6(udp->udp_v6dst)) { - rw_exit(&udp->udp_rwlock); + connp->conn_faddr_v4) { + mutex_exit(&connp->conn_lock); (*connp->conn_upcalls->su_set_error) (connp->conn_upper_handle, error); goto done; @@ -1204,10 +1132,12 @@ udp_icmp_error(conn_t *connp, mblk_t *mp) udp->udp_delayed_error = error; *((sin_t *)&udp->udp_delayed_addr) = sin; } - rw_exit(&udp->udp_rwlock); + mutex_exit(&connp->conn_lock); } else { mp1 = mi_tpi_uderror_ind((char *)&sin, sizeof (sin_t), NULL, 0, error); + if (mp1 != NULL) + putnext(connp->conn_rq, mp1); } break; case AF_INET6: @@ -1216,12 +1146,12 @@ udp_icmp_error(conn_t *connp, mblk_t *mp) IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &sin6.sin6_addr); sin6.sin6_port = udpha->uha_dst_port; if (IPCL_IS_NONSTR(connp)) { - rw_enter(&udp->udp_rwlock, RW_WRITER); + mutex_enter(&connp->conn_lock); if (udp->udp_state == TS_DATA_XFER) { - if (sin6.sin6_port == udp->udp_dstport && + if (sin6.sin6_port == connp->conn_fport && IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr, - &udp->udp_v6dst)) { - rw_exit(&udp->udp_rwlock); + &connp->conn_faddr_v6)) { + mutex_exit(&connp->conn_lock); (*connp->conn_upcalls->su_set_error) (connp->conn_upper_handle, error); goto done; @@ -1230,17 +1160,16 @@ udp_icmp_error(conn_t *connp, mblk_t *mp) udp->udp_delayed_error = error; *((sin6_t *)&udp->udp_delayed_addr) = sin6; } - rw_exit(&udp->udp_rwlock); + mutex_exit(&connp->conn_lock); } else { mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), NULL, 0, error); + if (mp1 != NULL) + putnext(connp->conn_rq, mp1); } break; } - if (mp1 != NULL) - putnext(connp->conn_rq, mp1); done: - ASSERT(!RW_ISWRITER(&udp->udp_rwlock)); freemsg(mp); } @@ -1251,7 +1180,7 @@ done: * ICMPv6 header. */ static void -udp_icmp_error_ipv6(conn_t *connp, mblk_t *mp) +udp_icmp_error_ipv6(conn_t *connp, mblk_t *mp, ip_recv_attr_t *ira) { icmp6_t *icmp6; ip6_t *ip6h, *outer_ip6h; @@ -1265,12 +1194,19 @@ udp_icmp_error_ipv6(conn_t *connp, mblk_t *mp) udp_stack_t *us = udp->udp_us; outer_ip6h = (ip6_t *)mp->b_rptr; +#ifdef DEBUG if (outer_ip6h->ip6_nxt != IPPROTO_ICMPV6) iph_hdr_length = ip_hdr_length_v6(mp, outer_ip6h); else iph_hdr_length = IPV6_HDR_LEN; + ASSERT(iph_hdr_length == ira->ira_ip_hdr_length); +#endif + /* Skip past the outer IP and ICMP headers */ + iph_hdr_length = ira->ira_ip_hdr_length; icmp6 = (icmp6_t *)&mp->b_rptr[iph_hdr_length]; - ip6h = (ip6_t *)&icmp6[1]; + + /* Skip past the inner IP and find the ULP header */ + ip6h = (ip6_t *)&icmp6[1]; /* Inner IP header */ if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &iph_hdr_length, &nexthdrp)) { freemsg(mp); return; @@ -1308,7 +1244,7 @@ udp_icmp_error_ipv6(conn_t *connp, mblk_t *mp) * information, send up an empty message containing an * IPV6_PATHMTU ancillary data item. */ - if (!udp->udp_ipv6_recvpathmtu) + if (!connp->conn_ipv6_recvpathmtu) break; udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t) + @@ -1334,7 +1270,7 @@ udp_icmp_error_ipv6(conn_t *connp, mblk_t *mp) sin6 = (sin6_t *)&tudi[1]; bzero(sin6, sizeof (sin6_t)); sin6->sin6_family = AF_INET6; - sin6->sin6_addr = udp->udp_v6dst; + sin6->sin6_addr = connp->conn_faddr_v6; toh = (struct T_opthdr *)&sin6[1]; toh->level = IPPROTO_IPV6; @@ -1352,8 +1288,7 @@ udp_icmp_error_ipv6(conn_t *connp, mblk_t *mp) * message. Free it, then send our empty message. */ freemsg(mp); - udp_ulp_recv(connp, newmp); - + udp_ulp_recv(connp, newmp, msgdsize(newmp), ira); return; } case ICMP6_TIME_EXCEEDED: @@ -1378,7 +1313,7 @@ udp_icmp_error_ipv6(conn_t *connp, mblk_t *mp) * Deliver T_UDERROR_IND when the application has asked for it. * The socket layer enables this automatically when connected. */ - if (!udp->udp_dgram_errind) { + if (!connp->conn_dgram_errind) { freemsg(mp); return; } @@ -1390,12 +1325,12 @@ udp_icmp_error_ipv6(conn_t *connp, mblk_t *mp) sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK; if (IPCL_IS_NONSTR(connp)) { - rw_enter(&udp->udp_rwlock, RW_WRITER); + mutex_enter(&connp->conn_lock); if (udp->udp_state == TS_DATA_XFER) { - if (sin6.sin6_port == udp->udp_dstport && + if (sin6.sin6_port == connp->conn_fport && IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr, - &udp->udp_v6dst)) { - rw_exit(&udp->udp_rwlock); + &connp->conn_faddr_v6)) { + mutex_exit(&connp->conn_lock); (*connp->conn_upcalls->su_set_error) (connp->conn_upper_handle, error); goto done; @@ -1404,7 +1339,7 @@ udp_icmp_error_ipv6(conn_t *connp, mblk_t *mp) udp->udp_delayed_error = error; *((sin6_t *)&udp->udp_delayed_addr) = sin6; } - rw_exit(&udp->udp_rwlock); + mutex_exit(&connp->conn_lock); } else { mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), NULL, 0, error); @@ -1412,7 +1347,6 @@ udp_icmp_error_ipv6(conn_t *connp, mblk_t *mp) putnext(connp->conn_rq, mp1); } done: - ASSERT(!RW_ISWRITER(&udp->udp_rwlock)); freemsg(mp); } @@ -1426,11 +1360,12 @@ done: static void udp_addr_req(queue_t *q, mblk_t *mp) { - sin_t *sin; - sin6_t *sin6; + struct sockaddr *sa; mblk_t *ackmp; struct T_addr_ack *taa; udp_t *udp = Q_TO_UDP(q); + conn_t *connp = udp->udp_connp; + uint_t addrlen; /* Make it large enough for worst case */ ackmp = reallocb(mp, sizeof (struct T_addr_ack) + @@ -1446,7 +1381,13 @@ udp_addr_req(queue_t *q, mblk_t *mp) taa->PRIM_type = T_ADDR_ACK; ackmp->b_datap->db_type = M_PCPROTO; - rw_enter(&udp->udp_rwlock, RW_READER); + + if (connp->conn_family == AF_INET) + addrlen = sizeof (sin_t); + else + addrlen = sizeof (sin6_t); + + mutex_enter(&connp->conn_lock); /* * Note: Following code assumes 32 bit alignment of basic * data structures like sin_t and struct T_addr_ack. @@ -1456,91 +1397,23 @@ udp_addr_req(queue_t *q, mblk_t *mp) * Fill in local address first */ taa->LOCADDR_offset = sizeof (*taa); - if (udp->udp_family == AF_INET) { - taa->LOCADDR_length = sizeof (sin_t); - sin = (sin_t *)&taa[1]; - /* Fill zeroes and then initialize non-zero fields */ - *sin = sin_null; - sin->sin_family = AF_INET; - if (!IN6_IS_ADDR_V4MAPPED_ANY(&udp->udp_v6src) && - !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { - IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6src, - sin->sin_addr.s_addr); - } else { - /* - * INADDR_ANY - * udp_v6src is not set, we might be bound to - * broadcast/multicast. Use udp_bound_v6src as - * local address instead (that could - * also still be INADDR_ANY) - */ - IN6_V4MAPPED_TO_IPADDR(&udp->udp_bound_v6src, - sin->sin_addr.s_addr); - } - sin->sin_port = udp->udp_port; - ackmp->b_wptr = (uchar_t *)&sin[1]; - if (udp->udp_state == TS_DATA_XFER) { - /* - * connected, fill remote address too - */ - taa->REMADDR_length = sizeof (sin_t); - /* assumed 32-bit alignment */ - taa->REMADDR_offset = taa->LOCADDR_offset + - taa->LOCADDR_length; - - sin = (sin_t *)(ackmp->b_rptr + - taa->REMADDR_offset); - /* initialize */ - *sin = sin_null; - sin->sin_family = AF_INET; - sin->sin_addr.s_addr = - V4_PART_OF_V6(udp->udp_v6dst); - sin->sin_port = udp->udp_dstport; - ackmp->b_wptr = (uchar_t *)&sin[1]; - } - } else { - taa->LOCADDR_length = sizeof (sin6_t); - sin6 = (sin6_t *)&taa[1]; - /* Fill zeroes and then initialize non-zero fields */ - *sin6 = sin6_null; - sin6->sin6_family = AF_INET6; - if (!IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { - sin6->sin6_addr = udp->udp_v6src; - } else { - /* - * UNSPECIFIED - * udp_v6src is not set, we might be bound to - * broadcast/multicast. Use udp_bound_v6src as - * local address instead (that could - * also still be UNSPECIFIED) - */ - sin6->sin6_addr = - udp->udp_bound_v6src; - } - sin6->sin6_port = udp->udp_port; - ackmp->b_wptr = (uchar_t *)&sin6[1]; - if (udp->udp_state == TS_DATA_XFER) { - /* - * connected, fill remote address too - */ - taa->REMADDR_length = sizeof (sin6_t); - /* assumed 32-bit alignment */ - taa->REMADDR_offset = taa->LOCADDR_offset + - taa->LOCADDR_length; - - sin6 = (sin6_t *)(ackmp->b_rptr + - taa->REMADDR_offset); - /* initialize */ - *sin6 = sin6_null; - sin6->sin6_family = AF_INET6; - sin6->sin6_addr = udp->udp_v6dst; - sin6->sin6_port = udp->udp_dstport; - ackmp->b_wptr = (uchar_t *)&sin6[1]; - } - ackmp->b_wptr = (uchar_t *)&sin6[1]; - } + taa->LOCADDR_length = addrlen; + sa = (struct sockaddr *)&taa[1]; + (void) conn_getsockname(connp, sa, &addrlen); + ackmp->b_wptr += addrlen; } - rw_exit(&udp->udp_rwlock); + if (udp->udp_state == TS_DATA_XFER) { + /* + * connected, fill remote address too + */ + taa->REMADDR_length = addrlen; + /* assumed 32-bit alignment */ + taa->REMADDR_offset = taa->LOCADDR_offset + taa->LOCADDR_length; + sa = (struct sockaddr *)(ackmp->b_rptr + taa->REMADDR_offset); + (void) conn_getpeername(connp, sa, &addrlen); + ackmp->b_wptr += addrlen; + } + mutex_exit(&connp->conn_lock); ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim); qreply(q, ackmp); } @@ -1548,7 +1421,9 @@ udp_addr_req(queue_t *q, mblk_t *mp) static void udp_copy_info(struct T_info_ack *tap, udp_t *udp) { - if (udp->udp_family == AF_INET) { + conn_t *connp = udp->udp_connp; + + if (connp->conn_family == AF_INET) { *tap = udp_g_t_info_ack_ipv4; } else { *tap = udp_g_t_info_ack_ipv6; @@ -1632,20 +1507,15 @@ udp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) * This is the open routine for udp. It allocates a udp_t structure for * the stream and, on the first open of the module, creates an ND table. */ -/*ARGSUSED2*/ static int udp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp, boolean_t isv6) { - int error; udp_t *udp; conn_t *connp; dev_t conn_dev; - udp_stack_t *us; vmem_t *minor_arena; - TRACE_1(TR_FAC_UDP, TR_UDP_OPEN, "udp_open: q %p", q); - /* If the stream is already open, return immediately. */ if (q->q_ptr != NULL) return (0); @@ -1685,7 +1555,6 @@ udp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp, return (ENOMEM); } udp = connp->conn_udp; - us = udp->udp_us; *devp = makedevice(getemajor(*devp), (minor_t)conn_dev); connp->conn_dev = conn_dev; @@ -1699,39 +1568,27 @@ udp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp, connp->conn_rq = q; connp->conn_wq = WR(q); - rw_enter(&udp->udp_rwlock, RW_WRITER); - ASSERT(connp->conn_ulp == IPPROTO_UDP); + /* + * Since this conn_t/udp_t is not yet visible to anybody else we don't + * need to lock anything. + */ + ASSERT(connp->conn_proto == IPPROTO_UDP); ASSERT(connp->conn_udp == udp); ASSERT(udp->udp_connp == connp); if (flag & SO_SOCKSTR) { - connp->conn_flags |= IPCL_SOCKET; udp->udp_issocket = B_TRUE; } - q->q_hiwat = us->us_recv_hiwat; - WR(q)->q_hiwat = us->us_xmit_hiwat; - WR(q)->q_lowat = us->us_xmit_lowat; + WR(q)->q_hiwat = connp->conn_sndbuf; + WR(q)->q_lowat = connp->conn_sndlowat; qprocson(q); - if (udp->udp_family == AF_INET6) { - /* Build initial header template for transmit */ - if ((error = udp_build_hdrs(udp)) != 0) { - rw_exit(&udp->udp_rwlock); - qprocsoff(q); - inet_minor_free(minor_arena, conn_dev); - ipcl_conn_destroy(connp); - return (error); - } - } - rw_exit(&udp->udp_rwlock); - /* Set the Stream head write offset and high watermark. */ - (void) proto_set_tx_wroff(q, connp, - udp->udp_max_hdr_len + us->us_wroff_extra); - /* XXX udp_set_rcv_hiwat() doesn't hold the lock, is it a bug??? */ - (void) proto_set_rx_hiwat(q, connp, udp_set_rcv_hiwat(udp, q->q_hiwat)); + (void) proto_set_tx_wroff(q, connp, connp->conn_wroff); + (void) proto_set_rx_hiwat(q, connp, + udp_set_rcv_hiwat(udp, connp->conn_rcvbuf)); mutex_enter(&connp->conn_lock); connp->conn_state_flags &= ~CONN_INCIPIENT; @@ -1753,7 +1610,6 @@ udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name) * This routine gets default values of certain options whose default * values are maintained by protcol specific code */ -/* ARGSUSED */ int udp_opt_default(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) { @@ -1791,456 +1647,127 @@ udp_opt_default(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) /* * This routine retrieves the current status of socket options. - * It returns the size of the option retrieved. + * It returns the size of the option retrieved, or -1. */ -static int -udp_opt_get(conn_t *connp, int level, int name, uchar_t *ptr) +int +udp_opt_get(conn_t *connp, t_scalar_t level, t_scalar_t name, + uchar_t *ptr) { - udp_t *udp = connp->conn_udp; - udp_stack_t *us = udp->udp_us; int *i1 = (int *)ptr; - ip6_pkt_t *ipp = &udp->udp_sticky_ipp; + udp_t *udp = connp->conn_udp; int len; + conn_opt_arg_t coas; + int retval; - ASSERT(RW_READ_HELD(&udp->udp_rwlock)); - switch (level) { - case SOL_SOCKET: - switch (name) { - case SO_DEBUG: - *i1 = udp->udp_debug; - break; /* goto sizeof (int) option return */ - case SO_REUSEADDR: - *i1 = udp->udp_reuseaddr; - break; /* goto sizeof (int) option return */ - case SO_TYPE: - *i1 = SOCK_DGRAM; - break; /* goto sizeof (int) option return */ + coas.coa_connp = connp; + coas.coa_ixa = connp->conn_ixa; + coas.coa_ipp = &connp->conn_xmit_ipp; + coas.coa_ancillary = B_FALSE; + coas.coa_changed = 0; + /* + * We assume that the optcom framework has checked for the set + * of levels and names that are supported, hence we don't worry + * about rejecting based on that. + * First check for UDP specific handling, then pass to common routine. + */ + switch (level) { + case IPPROTO_IP: /* - * The following three items are available here, - * but are only meaningful to IP. + * Only allow IPv4 option processing on IPv4 sockets. */ - case SO_DONTROUTE: - *i1 = udp->udp_dontroute; - break; /* goto sizeof (int) option return */ - case SO_USELOOPBACK: - *i1 = udp->udp_useloopback; - break; /* goto sizeof (int) option return */ - case SO_BROADCAST: - *i1 = udp->udp_broadcast; - break; /* goto sizeof (int) option return */ - - case SO_SNDBUF: - *i1 = udp->udp_xmit_hiwat; - break; /* goto sizeof (int) option return */ - case SO_RCVBUF: - *i1 = udp->udp_rcv_disply_hiwat; - break; /* goto sizeof (int) option return */ - case SO_DGRAM_ERRIND: - *i1 = udp->udp_dgram_errind; - break; /* goto sizeof (int) option return */ - case SO_RECVUCRED: - *i1 = udp->udp_recvucred; - break; /* goto sizeof (int) option return */ - case SO_TIMESTAMP: - *i1 = udp->udp_timestamp; - break; /* goto sizeof (int) option return */ - case SO_ANON_MLP: - *i1 = connp->conn_anon_mlp; - break; /* goto sizeof (int) option return */ - case SO_MAC_EXEMPT: - *i1 = (connp->conn_mac_mode == CONN_MAC_AWARE); - break; - case SO_MAC_IMPLICIT: - *i1 = (connp->conn_mac_mode == CONN_MAC_IMPLICIT); - break; - case SO_ALLZONES: - *i1 = connp->conn_allzones; - break; /* goto sizeof (int) option return */ - case SO_EXCLBIND: - *i1 = udp->udp_exclbind ? SO_EXCLBIND : 0; - break; - case SO_PROTOTYPE: - *i1 = IPPROTO_UDP; - break; - case SO_DOMAIN: - *i1 = udp->udp_family; - break; - default: - return (-1); - } - break; - case IPPROTO_IP: - if (udp->udp_family != AF_INET) + if (connp->conn_family != AF_INET) return (-1); + switch (name) { case IP_OPTIONS: case T_IP_OPTIONS: - len = udp->udp_ip_rcv_options_len - udp->udp_label_len; - if (len > 0) { - bcopy(udp->udp_ip_rcv_options + - udp->udp_label_len, ptr, len); - } - return (len); - case IP_TOS: - case T_IP_TOS: - *i1 = (int)udp->udp_type_of_service; - break; /* goto sizeof (int) option return */ - case IP_TTL: - *i1 = (int)udp->udp_ttl; - break; /* goto sizeof (int) option return */ - case IP_DHCPINIT_IF: - return (-EINVAL); - case IP_NEXTHOP: - case IP_RECVPKTINFO: - /* - * This also handles IP_PKTINFO. - * IP_PKTINFO and IP_RECVPKTINFO have the same value. - * Differentiation is based on the size of the argument - * passed in. - * This option is handled in IP which will return an - * error for IP_PKTINFO as it's not supported as a - * sticky option. - */ - return (-EINVAL); - case IP_MULTICAST_IF: - /* 0 address if not set */ - *(ipaddr_t *)ptr = udp->udp_multicast_if_addr; - return (sizeof (ipaddr_t)); - case IP_MULTICAST_TTL: - *(uchar_t *)ptr = udp->udp_multicast_ttl; - return (sizeof (uchar_t)); - case IP_MULTICAST_LOOP: - *ptr = connp->conn_multicast_loop; - return (sizeof (uint8_t)); - case IP_RECVOPTS: - *i1 = udp->udp_recvopts; - break; /* goto sizeof (int) option return */ - case IP_RECVDSTADDR: - *i1 = udp->udp_recvdstaddr; - break; /* goto sizeof (int) option return */ - case IP_RECVIF: - *i1 = udp->udp_recvif; - break; /* goto sizeof (int) option return */ - case IP_RECVSLLA: - *i1 = udp->udp_recvslla; - break; /* goto sizeof (int) option return */ - case IP_RECVTTL: - *i1 = udp->udp_recvttl; - break; /* goto sizeof (int) option return */ - case IP_ADD_MEMBERSHIP: - case IP_DROP_MEMBERSHIP: - case IP_BLOCK_SOURCE: - case IP_UNBLOCK_SOURCE: - case IP_ADD_SOURCE_MEMBERSHIP: - case IP_DROP_SOURCE_MEMBERSHIP: - case MCAST_JOIN_GROUP: - case MCAST_LEAVE_GROUP: - case MCAST_BLOCK_SOURCE: - case MCAST_UNBLOCK_SOURCE: - case MCAST_JOIN_SOURCE_GROUP: - case MCAST_LEAVE_SOURCE_GROUP: - /* cannot "get" the value for these */ - return (-1); - case IP_BOUND_IF: - /* Zero if not set */ - *i1 = udp->udp_bound_if; - break; /* goto sizeof (int) option return */ - case IP_UNSPEC_SRC: - *i1 = udp->udp_unspec_source; - break; /* goto sizeof (int) option return */ - case IP_BROADCAST_TTL: - *(uchar_t *)ptr = connp->conn_broadcast_ttl; - return (sizeof (uchar_t)); - default: - return (-1); - } - break; - case IPPROTO_IPV6: - if (udp->udp_family != AF_INET6) - return (-1); - switch (name) { - case IPV6_UNICAST_HOPS: - *i1 = (unsigned int)udp->udp_ttl; - break; /* goto sizeof (int) option return */ - case IPV6_MULTICAST_IF: - /* 0 index if not set */ - *i1 = udp->udp_multicast_if_index; - break; /* goto sizeof (int) option return */ - case IPV6_MULTICAST_HOPS: - *i1 = udp->udp_multicast_ttl; - break; /* goto sizeof (int) option return */ - case IPV6_MULTICAST_LOOP: - *i1 = connp->conn_multicast_loop; - break; /* goto sizeof (int) option return */ - case IPV6_JOIN_GROUP: - case IPV6_LEAVE_GROUP: - case MCAST_JOIN_GROUP: - case MCAST_LEAVE_GROUP: - case MCAST_BLOCK_SOURCE: - case MCAST_UNBLOCK_SOURCE: - case MCAST_JOIN_SOURCE_GROUP: - case MCAST_LEAVE_SOURCE_GROUP: - /* cannot "get" the value for these */ - return (-1); - case IPV6_BOUND_IF: - /* Zero if not set */ - *i1 = udp->udp_bound_if; - break; /* goto sizeof (int) option return */ - case IPV6_UNSPEC_SRC: - *i1 = udp->udp_unspec_source; - break; /* goto sizeof (int) option return */ - case IPV6_RECVPKTINFO: - *i1 = udp->udp_ip_recvpktinfo; - break; /* goto sizeof (int) option return */ - case IPV6_RECVTCLASS: - *i1 = udp->udp_ipv6_recvtclass; - break; /* goto sizeof (int) option return */ - case IPV6_RECVPATHMTU: - *i1 = udp->udp_ipv6_recvpathmtu; - break; /* goto sizeof (int) option return */ - case IPV6_RECVHOPLIMIT: - *i1 = udp->udp_ipv6_recvhoplimit; - break; /* goto sizeof (int) option return */ - case IPV6_RECVHOPOPTS: - *i1 = udp->udp_ipv6_recvhopopts; - break; /* goto sizeof (int) option return */ - case IPV6_RECVDSTOPTS: - *i1 = udp->udp_ipv6_recvdstopts; - break; /* goto sizeof (int) option return */ - case _OLD_IPV6_RECVDSTOPTS: - *i1 = udp->udp_old_ipv6_recvdstopts; - break; /* goto sizeof (int) option return */ - case IPV6_RECVRTHDRDSTOPTS: - *i1 = udp->udp_ipv6_recvrthdrdstopts; - break; /* goto sizeof (int) option return */ - case IPV6_RECVRTHDR: - *i1 = udp->udp_ipv6_recvrthdr; - break; /* goto sizeof (int) option return */ - case IPV6_PKTINFO: { - /* XXX assumes that caller has room for max size! */ - struct in6_pktinfo *pkti; - - pkti = (struct in6_pktinfo *)ptr; - if (ipp->ipp_fields & IPPF_IFINDEX) - pkti->ipi6_ifindex = ipp->ipp_ifindex; - else - pkti->ipi6_ifindex = 0; - if (ipp->ipp_fields & IPPF_ADDR) - pkti->ipi6_addr = ipp->ipp_addr; - else - pkti->ipi6_addr = ipv6_all_zeros; - return (sizeof (struct in6_pktinfo)); - } - case IPV6_TCLASS: - if (ipp->ipp_fields & IPPF_TCLASS) - *i1 = ipp->ipp_tclass; - else - *i1 = IPV6_FLOW_TCLASS( - IPV6_DEFAULT_VERS_AND_FLOW); - break; /* goto sizeof (int) option return */ - case IPV6_NEXTHOP: { - sin6_t *sin6 = (sin6_t *)ptr; - - if (!(ipp->ipp_fields & IPPF_NEXTHOP)) - return (0); - *sin6 = sin6_null; - sin6->sin6_family = AF_INET6; - sin6->sin6_addr = ipp->ipp_nexthop; - return (sizeof (sin6_t)); - } - case IPV6_HOPOPTS: - if (!(ipp->ipp_fields & IPPF_HOPOPTS)) - return (0); - if (ipp->ipp_hopoptslen <= udp->udp_label_len_v6) + mutex_enter(&connp->conn_lock); + if (!(udp->udp_recv_ipp.ipp_fields & + IPPF_IPV4_OPTIONS)) { + mutex_exit(&connp->conn_lock); return (0); - /* - * The cipso/label option is added by kernel. - * User is not usually aware of this option. - * We copy out the hbh opt after the label option. - */ - bcopy((char *)ipp->ipp_hopopts + udp->udp_label_len_v6, - ptr, ipp->ipp_hopoptslen - udp->udp_label_len_v6); - if (udp->udp_label_len_v6 > 0) { - ptr[0] = ((char *)ipp->ipp_hopopts)[0]; - ptr[1] = (ipp->ipp_hopoptslen - - udp->udp_label_len_v6 + 7) / 8 - 1; } - return (ipp->ipp_hopoptslen - udp->udp_label_len_v6); - case IPV6_RTHDRDSTOPTS: - if (!(ipp->ipp_fields & IPPF_RTDSTOPTS)) - return (0); - bcopy(ipp->ipp_rtdstopts, ptr, ipp->ipp_rtdstoptslen); - return (ipp->ipp_rtdstoptslen); - case IPV6_RTHDR: - if (!(ipp->ipp_fields & IPPF_RTHDR)) - return (0); - bcopy(ipp->ipp_rthdr, ptr, ipp->ipp_rthdrlen); - return (ipp->ipp_rthdrlen); - case IPV6_DSTOPTS: - if (!(ipp->ipp_fields & IPPF_DSTOPTS)) - return (0); - bcopy(ipp->ipp_dstopts, ptr, ipp->ipp_dstoptslen); - return (ipp->ipp_dstoptslen); - case IPV6_PATHMTU: - return (ip_fill_mtuinfo(&udp->udp_v6dst, - udp->udp_dstport, (struct ip6_mtuinfo *)ptr, - us->us_netstack)); - default: - return (-1); + + len = udp->udp_recv_ipp.ipp_ipv4_options_len; + ASSERT(len != 0); + bcopy(udp->udp_recv_ipp.ipp_ipv4_options, ptr, len); + mutex_exit(&connp->conn_lock); + return (len); } break; case IPPROTO_UDP: switch (name) { - case UDP_ANONPRIVBIND: - *i1 = udp->udp_anon_priv_bind; - break; - case UDP_EXCLBIND: - *i1 = udp->udp_exclbind ? UDP_EXCLBIND : 0; - break; - case UDP_RCVHDR: - *i1 = udp->udp_rcvhdr ? 1 : 0; - break; case UDP_NAT_T_ENDPOINT: + mutex_enter(&connp->conn_lock); *i1 = udp->udp_nat_t_endpoint; - break; - default: - return (-1); + mutex_exit(&connp->conn_lock); + return (sizeof (int)); + case UDP_RCVHDR: + mutex_enter(&connp->conn_lock); + *i1 = udp->udp_rcvhdr ? 1 : 0; + mutex_exit(&connp->conn_lock); + return (sizeof (int)); } - break; - default: - return (-1); } - return (sizeof (int)); + mutex_enter(&connp->conn_lock); + retval = conn_opt_get(&coas, level, name, ptr); + mutex_exit(&connp->conn_lock); + return (retval); } +/* + * This routine retrieves the current status of socket options. + * It returns the size of the option retrieved, or -1. + */ int udp_tpi_opt_get(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) { - udp_t *udp; - int err; - - udp = Q_TO_UDP(q); + conn_t *connp = Q_TO_CONN(q); + int err; - rw_enter(&udp->udp_rwlock, RW_READER); - err = udp_opt_get(Q_TO_CONN(q), level, name, ptr); - rw_exit(&udp->udp_rwlock); + err = udp_opt_get(connp, level, name, ptr); return (err); } /* * This routine sets socket options. */ -/* ARGSUSED */ -static int -udp_do_opt_set(conn_t *connp, int level, int name, uint_t inlen, - uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, cred_t *cr, - void *thisdg_attrs, boolean_t checkonly) +int +udp_do_opt_set(conn_opt_arg_t *coa, int level, int name, + uint_t inlen, uchar_t *invalp, cred_t *cr, boolean_t checkonly) { - udpattrs_t *attrs = thisdg_attrs; - int *i1 = (int *)invalp; - boolean_t onoff = (*i1 == 0) ? 0 : 1; - udp_t *udp = connp->conn_udp; + conn_t *connp = coa->coa_connp; + ip_xmit_attr_t *ixa = coa->coa_ixa; + udp_t *udp = connp->conn_udp; udp_stack_t *us = udp->udp_us; - int error; - uint_t newlen; - size_t sth_wroff; + int *i1 = (int *)invalp; + boolean_t onoff = (*i1 == 0) ? 0 : 1; + int error; - ASSERT(RW_WRITE_HELD(&udp->udp_rwlock)); + ASSERT(MUTEX_NOT_HELD(&coa->coa_connp->conn_lock)); /* - * For fixed length options, no sanity check - * of passed in length is done. It is assumed *_optcom_req() - * routines do the right thing. + * First do UDP specific sanity checks and handle UDP specific + * options. Note that some IPPROTO_UDP options are handled + * by conn_opt_set. */ switch (level) { case SOL_SOCKET: switch (name) { - case SO_REUSEADDR: - if (!checkonly) { - udp->udp_reuseaddr = onoff; - PASS_OPT_TO_IP(connp); - } - break; - case SO_DEBUG: - if (!checkonly) - udp->udp_debug = onoff; - break; - /* - * The following three items are available here, - * but are only meaningful to IP. - */ - case SO_DONTROUTE: - if (!checkonly) { - udp->udp_dontroute = onoff; - PASS_OPT_TO_IP(connp); - } - break; - case SO_USELOOPBACK: - if (!checkonly) { - udp->udp_useloopback = onoff; - PASS_OPT_TO_IP(connp); - } - break; - case SO_BROADCAST: - if (!checkonly) { - udp->udp_broadcast = onoff; - PASS_OPT_TO_IP(connp); - } - break; - case SO_SNDBUF: if (*i1 > us->us_max_buf) { - *outlenp = 0; return (ENOBUFS); } - if (!checkonly) { - udp->udp_xmit_hiwat = *i1; - connp->conn_wq->q_hiwat = *i1; - } break; case SO_RCVBUF: if (*i1 > us->us_max_buf) { - *outlenp = 0; return (ENOBUFS); } - if (!checkonly) { - int size; - - udp->udp_rcv_disply_hiwat = *i1; - size = udp_set_rcv_hiwat(udp, *i1); - rw_exit(&udp->udp_rwlock); - (void) proto_set_rx_hiwat(connp->conn_rq, connp, - size); - rw_enter(&udp->udp_rwlock, RW_WRITER); - } - break; - case SO_DGRAM_ERRIND: - if (!checkonly) - udp->udp_dgram_errind = onoff; - break; - case SO_RECVUCRED: - if (!checkonly) - udp->udp_recvucred = onoff; - break; - case SO_ALLZONES: - /* - * "soft" error (negative) - * option not handled at this level - * Do not modify *outlenp. - */ - return (-EINVAL); - case SO_TIMESTAMP: - if (!checkonly) - udp->udp_timestamp = onoff; - break; - case SO_ANON_MLP: - case SO_MAC_EXEMPT: - case SO_MAC_IMPLICIT: - PASS_OPT_TO_IP(connp); break; + case SCM_UCRED: { struct ucred_s *ucr; - cred_t *cr, *newcr; + cred_t *newcr; ts_label_t *tsl; /* @@ -2250,20 +1777,18 @@ udp_do_opt_set(conn_t *connp, int level, int name, uint_t inlen, */ if (connp->conn_mlp_type == mlptSingle) break; + ucr = (struct ucred_s *)invalp; if (inlen != ucredsize || ucr->uc_labeloff < sizeof (*ucr) || ucr->uc_labeloff + sizeof (bslabel_t) > inlen) return (EINVAL); if (!checkonly) { - mblk_t *mb; - pid_t cpid; - - if (attrs == NULL || - (mb = attrs->udpattr_mb) == NULL) - return (EINVAL); - if ((cr = msg_getcred(mb, &cpid)) == NULL) - cr = udp->udp_connp->conn_cred; + /* + * Set ixa_tsl to the new label. + * We assume that crgetzoneid doesn't change + * as part of the SCM_UCRED. + */ ASSERT(cr != NULL); if ((tsl = crgetlabel(cr)) == NULL) return (EINVAL); @@ -2271,778 +1796,75 @@ udp_do_opt_set(conn_t *connp, int level, int name, uint_t inlen, tsl->tsl_doi, KM_NOSLEEP); if (newcr == NULL) return (ENOSR); - mblk_setcred(mb, newcr, cpid); - attrs->udpattr_credset = B_TRUE; - crfree(newcr); - } - break; - } - case SO_EXCLBIND: - if (!checkonly) - udp->udp_exclbind = onoff; - break; - case SO_RCVTIMEO: - case SO_SNDTIMEO: - /* - * Pass these two options in order for third part - * protocol usage. Here just return directly. - */ - return (0); - default: - *outlenp = 0; - return (EINVAL); - } - break; - case IPPROTO_IP: - if (udp->udp_family != AF_INET) { - *outlenp = 0; - return (ENOPROTOOPT); - } - switch (name) { - case IP_OPTIONS: - case T_IP_OPTIONS: - /* Save options for use by IP. */ - newlen = inlen + udp->udp_label_len; - if ((inlen & 0x3) || newlen > IP_MAX_OPT_LENGTH) { - *outlenp = 0; - return (EINVAL); - } - if (checkonly) - break; - - /* - * Update the stored options taking into account - * any CIPSO option which we should not overwrite. - */ - if (!tsol_option_set(&udp->udp_ip_snd_options, - &udp->udp_ip_snd_options_len, - udp->udp_label_len, invalp, inlen)) { - *outlenp = 0; - return (ENOMEM); - } - - udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + - UDPH_SIZE + udp->udp_ip_snd_options_len; - sth_wroff = udp->udp_max_hdr_len + us->us_wroff_extra; - rw_exit(&udp->udp_rwlock); - (void) proto_set_tx_wroff(connp->conn_rq, connp, - sth_wroff); - rw_enter(&udp->udp_rwlock, RW_WRITER); - break; - - case IP_TTL: - if (!checkonly) { - udp->udp_ttl = (uchar_t)*i1; - } - break; - case IP_TOS: - case T_IP_TOS: - if (!checkonly) { - udp->udp_type_of_service = (uchar_t)*i1; - } - break; - case IP_MULTICAST_IF: { - /* - * TODO should check OPTMGMT reply and undo this if - * there is an error. - */ - struct in_addr *inap = (struct in_addr *)invalp; - if (!checkonly) { - udp->udp_multicast_if_addr = - inap->s_addr; - PASS_OPT_TO_IP(connp); - } - break; - } - case IP_MULTICAST_TTL: - if (!checkonly) - udp->udp_multicast_ttl = *invalp; - break; - case IP_MULTICAST_LOOP: - if (!checkonly) { - connp->conn_multicast_loop = *invalp; - PASS_OPT_TO_IP(connp); - } - break; - case IP_RECVOPTS: - if (!checkonly) - udp->udp_recvopts = onoff; - break; - case IP_RECVDSTADDR: - if (!checkonly) - udp->udp_recvdstaddr = onoff; - break; - case IP_RECVIF: - if (!checkonly) { - udp->udp_recvif = onoff; - PASS_OPT_TO_IP(connp); - } - break; - case IP_RECVSLLA: - if (!checkonly) { - udp->udp_recvslla = onoff; - PASS_OPT_TO_IP(connp); - } - break; - case IP_RECVTTL: - if (!checkonly) - udp->udp_recvttl = onoff; - break; - case IP_PKTINFO: { - /* - * This also handles IP_RECVPKTINFO. - * IP_PKTINFO and IP_RECVPKTINFO have same value. - * Differentiation is based on the size of the - * argument passed in. - */ - struct in_pktinfo *pktinfop; - ip4_pkt_t *attr_pktinfop; - - if (checkonly) - break; - - if (inlen == sizeof (int)) { - /* - * This is IP_RECVPKTINFO option. - * Keep a local copy of whether this option is - * set or not and pass it down to IP for - * processing. - */ - - udp->udp_ip_recvpktinfo = onoff; - return (-EINVAL); - } - - if (attrs == NULL || - (attr_pktinfop = attrs->udpattr_ipp4) == NULL) { + ASSERT(newcr->cr_label != NULL); /* - * sticky option or no buffer to return - * the results. + * Move the hold on the cr_label to ixa_tsl by + * setting cr_label to NULL. Then release newcr. */ - return (EINVAL); - } - - if (inlen != sizeof (struct in_pktinfo)) - return (EINVAL); - - pktinfop = (struct in_pktinfo *)invalp; - - /* - * At least one of the values should be specified - */ - if (pktinfop->ipi_ifindex == 0 && - pktinfop->ipi_spec_dst.s_addr == INADDR_ANY) { - return (EINVAL); - } - - attr_pktinfop->ip4_addr = pktinfop->ipi_spec_dst.s_addr; - attr_pktinfop->ip4_ill_index = pktinfop->ipi_ifindex; - - break; - } - case IP_ADD_MEMBERSHIP: - case IP_DROP_MEMBERSHIP: - case IP_BLOCK_SOURCE: - case IP_UNBLOCK_SOURCE: - case IP_ADD_SOURCE_MEMBERSHIP: - case IP_DROP_SOURCE_MEMBERSHIP: - case MCAST_JOIN_GROUP: - case MCAST_LEAVE_GROUP: - case MCAST_BLOCK_SOURCE: - case MCAST_UNBLOCK_SOURCE: - case MCAST_JOIN_SOURCE_GROUP: - case MCAST_LEAVE_SOURCE_GROUP: - case IP_SEC_OPT: - case IP_NEXTHOP: - case IP_DHCPINIT_IF: - /* - * "soft" error (negative) - * option not handled at this level - * Do not modify *outlenp. - */ - return (-EINVAL); - case IP_BOUND_IF: - if (!checkonly) { - udp->udp_bound_if = *i1; - PASS_OPT_TO_IP(connp); - } - break; - case IP_UNSPEC_SRC: - if (!checkonly) { - udp->udp_unspec_source = onoff; - PASS_OPT_TO_IP(connp); - } - break; - case IP_BROADCAST_TTL: - if (!checkonly) - connp->conn_broadcast_ttl = *invalp; - break; - default: - *outlenp = 0; - return (EINVAL); - } - break; - case IPPROTO_IPV6: { - ip6_pkt_t *ipp; - boolean_t sticky; - - if (udp->udp_family != AF_INET6) { - *outlenp = 0; - return (ENOPROTOOPT); - } - /* - * Deal with both sticky options and ancillary data - */ - sticky = B_FALSE; - if (attrs == NULL || (ipp = attrs->udpattr_ipp6) == - NULL) { - /* sticky options, or none */ - ipp = &udp->udp_sticky_ipp; - sticky = B_TRUE; - } - - switch (name) { - case IPV6_MULTICAST_IF: - if (!checkonly) { - udp->udp_multicast_if_index = *i1; - PASS_OPT_TO_IP(connp); - } - break; - case IPV6_UNICAST_HOPS: - /* -1 means use default */ - if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { - *outlenp = 0; - return (EINVAL); - } - if (!checkonly) { - if (*i1 == -1) { - udp->udp_ttl = ipp->ipp_unicast_hops = - us->us_ipv6_hoplimit; - ipp->ipp_fields &= ~IPPF_UNICAST_HOPS; - /* Pass modified value to IP. */ - *i1 = udp->udp_ttl; - } else { - udp->udp_ttl = ipp->ipp_unicast_hops = - (uint8_t)*i1; - ipp->ipp_fields |= IPPF_UNICAST_HOPS; - } - /* Rebuild the header template */ - error = udp_build_hdrs(udp); - if (error != 0) { - *outlenp = 0; - return (error); - } - } - break; - case IPV6_MULTICAST_HOPS: - /* -1 means use default */ - if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { - *outlenp = 0; - return (EINVAL); - } - if (!checkonly) { - if (*i1 == -1) { - udp->udp_multicast_ttl = - ipp->ipp_multicast_hops = - IP_DEFAULT_MULTICAST_TTL; - ipp->ipp_fields &= ~IPPF_MULTICAST_HOPS; - /* Pass modified value to IP. */ - *i1 = udp->udp_multicast_ttl; - } else { - udp->udp_multicast_ttl = - ipp->ipp_multicast_hops = - (uint8_t)*i1; - ipp->ipp_fields |= IPPF_MULTICAST_HOPS; - } - } - break; - case IPV6_MULTICAST_LOOP: - if (*i1 != 0 && *i1 != 1) { - *outlenp = 0; - return (EINVAL); - } - if (!checkonly) { - connp->conn_multicast_loop = *i1; - PASS_OPT_TO_IP(connp); - } - break; - case IPV6_JOIN_GROUP: - case IPV6_LEAVE_GROUP: - case MCAST_JOIN_GROUP: - case MCAST_LEAVE_GROUP: - case MCAST_BLOCK_SOURCE: - case MCAST_UNBLOCK_SOURCE: - case MCAST_JOIN_SOURCE_GROUP: - case MCAST_LEAVE_SOURCE_GROUP: - /* - * "soft" error (negative) - * option not handled at this level - * Note: Do not modify *outlenp - */ - return (-EINVAL); - case IPV6_BOUND_IF: - if (!checkonly) { - udp->udp_bound_if = *i1; - PASS_OPT_TO_IP(connp); - } - break; - case IPV6_UNSPEC_SRC: - if (!checkonly) { - udp->udp_unspec_source = onoff; - PASS_OPT_TO_IP(connp); - } - break; - /* - * Set boolean switches for ancillary data delivery - */ - case IPV6_RECVPKTINFO: - if (!checkonly) { - udp->udp_ip_recvpktinfo = onoff; - PASS_OPT_TO_IP(connp); - } - break; - case IPV6_RECVTCLASS: - if (!checkonly) { - udp->udp_ipv6_recvtclass = onoff; - PASS_OPT_TO_IP(connp); - } - break; - case IPV6_RECVPATHMTU: - if (!checkonly) { - udp->udp_ipv6_recvpathmtu = onoff; - PASS_OPT_TO_IP(connp); - } - break; - case IPV6_RECVHOPLIMIT: - if (!checkonly) { - udp->udp_ipv6_recvhoplimit = onoff; - PASS_OPT_TO_IP(connp); - } - break; - case IPV6_RECVHOPOPTS: - if (!checkonly) { - udp->udp_ipv6_recvhopopts = onoff; - PASS_OPT_TO_IP(connp); - } - break; - case IPV6_RECVDSTOPTS: - if (!checkonly) { - udp->udp_ipv6_recvdstopts = onoff; - PASS_OPT_TO_IP(connp); - } - break; - case _OLD_IPV6_RECVDSTOPTS: - if (!checkonly) - udp->udp_old_ipv6_recvdstopts = onoff; - break; - case IPV6_RECVRTHDRDSTOPTS: - if (!checkonly) { - udp->udp_ipv6_recvrthdrdstopts = onoff; - PASS_OPT_TO_IP(connp); - } - break; - case IPV6_RECVRTHDR: - if (!checkonly) { - udp->udp_ipv6_recvrthdr = onoff; - PASS_OPT_TO_IP(connp); - } - break; - /* - * Set sticky options or ancillary data. - * If sticky options, (re)build any extension headers - * that might be needed as a result. - */ - case IPV6_PKTINFO: - /* - * The source address and ifindex are verified - * in ip_opt_set(). For ancillary data the - * source address is checked in ip_wput_v6. - */ - if (inlen != 0 && inlen != sizeof (struct in6_pktinfo)) - return (EINVAL); - if (checkonly) - break; - - if (inlen == 0) { - ipp->ipp_fields &= ~(IPPF_IFINDEX|IPPF_ADDR); - ipp->ipp_sticky_ignored |= - (IPPF_IFINDEX|IPPF_ADDR); - } else { - struct in6_pktinfo *pkti; - - pkti = (struct in6_pktinfo *)invalp; - ipp->ipp_ifindex = pkti->ipi6_ifindex; - ipp->ipp_addr = pkti->ipi6_addr; - if (ipp->ipp_ifindex != 0) - ipp->ipp_fields |= IPPF_IFINDEX; - else - ipp->ipp_fields &= ~IPPF_IFINDEX; - if (!IN6_IS_ADDR_UNSPECIFIED( - &ipp->ipp_addr)) - ipp->ipp_fields |= IPPF_ADDR; - else - ipp->ipp_fields &= ~IPPF_ADDR; - } - if (sticky) { - error = udp_build_hdrs(udp); - if (error != 0) - return (error); - PASS_OPT_TO_IP(connp); - } - break; - case IPV6_HOPLIMIT: - if (sticky) - return (EINVAL); - if (inlen != 0 && inlen != sizeof (int)) - return (EINVAL); - if (checkonly) - break; - - if (inlen == 0) { - ipp->ipp_fields &= ~IPPF_HOPLIMIT; - ipp->ipp_sticky_ignored |= IPPF_HOPLIMIT; - } else { - if (*i1 > 255 || *i1 < -1) - return (EINVAL); - if (*i1 == -1) - ipp->ipp_hoplimit = - us->us_ipv6_hoplimit; - else - ipp->ipp_hoplimit = *i1; - ipp->ipp_fields |= IPPF_HOPLIMIT; - } - break; - case IPV6_TCLASS: - if (inlen != 0 && inlen != sizeof (int)) - return (EINVAL); - if (checkonly) - break; - - if (inlen == 0) { - ipp->ipp_fields &= ~IPPF_TCLASS; - ipp->ipp_sticky_ignored |= IPPF_TCLASS; - } else { - if (*i1 > 255 || *i1 < -1) - return (EINVAL); - if (*i1 == -1) - ipp->ipp_tclass = 0; - else - ipp->ipp_tclass = *i1; - ipp->ipp_fields |= IPPF_TCLASS; - } - if (sticky) { - error = udp_build_hdrs(udp); - if (error != 0) - return (error); - } - break; - case IPV6_NEXTHOP: - /* - * IP will verify that the nexthop is reachable - * and fail for sticky options. - */ - if (inlen != 0 && inlen != sizeof (sin6_t)) - return (EINVAL); - if (checkonly) - break; - - if (inlen == 0) { - ipp->ipp_fields &= ~IPPF_NEXTHOP; - ipp->ipp_sticky_ignored |= IPPF_NEXTHOP; - } else { - sin6_t *sin6 = (sin6_t *)invalp; - - if (sin6->sin6_family != AF_INET6) { - return (EAFNOSUPPORT); - } - if (IN6_IS_ADDR_V4MAPPED( - &sin6->sin6_addr)) - return (EADDRNOTAVAIL); - ipp->ipp_nexthop = sin6->sin6_addr; - if (!IN6_IS_ADDR_UNSPECIFIED( - &ipp->ipp_nexthop)) - ipp->ipp_fields |= IPPF_NEXTHOP; - else - ipp->ipp_fields &= ~IPPF_NEXTHOP; - } - if (sticky) { - error = udp_build_hdrs(udp); - if (error != 0) - return (error); - PASS_OPT_TO_IP(connp); - } - break; - case IPV6_HOPOPTS: { - ip6_hbh_t *hopts = (ip6_hbh_t *)invalp; - /* - * Sanity checks - minimum size, size a multiple of - * eight bytes, and matching size passed in. - */ - if (inlen != 0 && - inlen != (8 * (hopts->ip6h_len + 1))) - return (EINVAL); - - if (checkonly) - break; - - error = optcom_pkt_set(invalp, inlen, sticky, - (uchar_t **)&ipp->ipp_hopopts, - &ipp->ipp_hopoptslen, - sticky ? udp->udp_label_len_v6 : 0); - if (error != 0) - return (error); - if (ipp->ipp_hopoptslen == 0) { - ipp->ipp_fields &= ~IPPF_HOPOPTS; - ipp->ipp_sticky_ignored |= IPPF_HOPOPTS; - } else { - ipp->ipp_fields |= IPPF_HOPOPTS; - } - if (sticky) { - error = udp_build_hdrs(udp); - if (error != 0) - return (error); - } - break; - } - case IPV6_RTHDRDSTOPTS: { - ip6_dest_t *dopts = (ip6_dest_t *)invalp; - - /* - * Sanity checks - minimum size, size a multiple of - * eight bytes, and matching size passed in. - */ - if (inlen != 0 && - inlen != (8 * (dopts->ip6d_len + 1))) - return (EINVAL); - - if (checkonly) - break; - - if (inlen == 0) { - if (sticky && - (ipp->ipp_fields & IPPF_RTDSTOPTS) != 0) { - kmem_free(ipp->ipp_rtdstopts, - ipp->ipp_rtdstoptslen); - ipp->ipp_rtdstopts = NULL; - ipp->ipp_rtdstoptslen = 0; - } - - ipp->ipp_fields &= ~IPPF_RTDSTOPTS; - ipp->ipp_sticky_ignored |= IPPF_RTDSTOPTS; - } else { - error = optcom_pkt_set(invalp, inlen, sticky, - (uchar_t **)&ipp->ipp_rtdstopts, - &ipp->ipp_rtdstoptslen, 0); - if (error != 0) - return (error); - ipp->ipp_fields |= IPPF_RTDSTOPTS; - } - if (sticky) { - error = udp_build_hdrs(udp); - if (error != 0) - return (error); - } - break; - } - case IPV6_DSTOPTS: { - ip6_dest_t *dopts = (ip6_dest_t *)invalp; - - /* - * Sanity checks - minimum size, size a multiple of - * eight bytes, and matching size passed in. - */ - if (inlen != 0 && - inlen != (8 * (dopts->ip6d_len + 1))) - return (EINVAL); - - if (checkonly) - break; - - if (inlen == 0) { - if (sticky && - (ipp->ipp_fields & IPPF_DSTOPTS) != 0) { - kmem_free(ipp->ipp_dstopts, - ipp->ipp_dstoptslen); - ipp->ipp_dstopts = NULL; - ipp->ipp_dstoptslen = 0; - } - ipp->ipp_fields &= ~IPPF_DSTOPTS; - ipp->ipp_sticky_ignored |= IPPF_DSTOPTS; - } else { - error = optcom_pkt_set(invalp, inlen, sticky, - (uchar_t **)&ipp->ipp_dstopts, - &ipp->ipp_dstoptslen, 0); - if (error != 0) - return (error); - ipp->ipp_fields |= IPPF_DSTOPTS; - } - if (sticky) { - error = udp_build_hdrs(udp); - if (error != 0) - return (error); - } - break; - } - case IPV6_RTHDR: { - ip6_rthdr_t *rt = (ip6_rthdr_t *)invalp; - - /* - * Sanity checks - minimum size, size a multiple of - * eight bytes, and matching size passed in. - */ - if (inlen != 0 && - inlen != (8 * (rt->ip6r_len + 1))) - return (EINVAL); - - if (checkonly) - break; - - if (inlen == 0) { - if (sticky && - (ipp->ipp_fields & IPPF_RTHDR) != 0) { - kmem_free(ipp->ipp_rthdr, - ipp->ipp_rthdrlen); - ipp->ipp_rthdr = NULL; - ipp->ipp_rthdrlen = 0; - } - ipp->ipp_fields &= ~IPPF_RTHDR; - ipp->ipp_sticky_ignored |= IPPF_RTHDR; - } else { - error = optcom_pkt_set(invalp, inlen, sticky, - (uchar_t **)&ipp->ipp_rthdr, - &ipp->ipp_rthdrlen, 0); - if (error != 0) - return (error); - ipp->ipp_fields |= IPPF_RTHDR; - } - if (sticky) { - error = udp_build_hdrs(udp); - if (error != 0) - return (error); + ip_xmit_attr_replace_tsl(ixa, newcr->cr_label); + ixa->ixa_flags |= IXAF_UCRED_TSL; + newcr->cr_label = NULL; + crfree(newcr); + coa->coa_changed |= COA_HEADER_CHANGED; + coa->coa_changed |= COA_WROFF_CHANGED; } - break; + /* Fully handled this option. */ + return (0); } - - case IPV6_DONTFRAG: - if (checkonly) - break; - - if (onoff) { - ipp->ipp_fields |= IPPF_DONTFRAG; - } else { - ipp->ipp_fields &= ~IPPF_DONTFRAG; - } - break; - - case IPV6_USE_MIN_MTU: - if (inlen != sizeof (int)) - return (EINVAL); - - if (*i1 < -1 || *i1 > 1) - return (EINVAL); - - if (checkonly) - break; - - ipp->ipp_fields |= IPPF_USE_MIN_MTU; - ipp->ipp_use_min_mtu = *i1; - break; - - case IPV6_SEC_OPT: - case IPV6_SRC_PREFERENCES: - case IPV6_V6ONLY: - /* Handled at the IP level */ - return (-EINVAL); - default: - *outlenp = 0; - return (EINVAL); } break; - } /* end IPPROTO_IPV6 */ case IPPROTO_UDP: switch (name) { - case UDP_ANONPRIVBIND: - if ((error = secpolicy_net_privaddr(cr, 0, - IPPROTO_UDP)) != 0) { - *outlenp = 0; - return (error); - } - if (!checkonly) { - udp->udp_anon_priv_bind = onoff; - } - break; - case UDP_EXCLBIND: - if (!checkonly) - udp->udp_exclbind = onoff; - break; - case UDP_RCVHDR: - if (!checkonly) - udp->udp_rcvhdr = onoff; - break; case UDP_NAT_T_ENDPOINT: if ((error = secpolicy_ip_config(cr, B_FALSE)) != 0) { - *outlenp = 0; return (error); } /* - * Use udp_family instead so we can avoid ambiguitites + * Use conn_family instead so we can avoid ambiguitites * with AF_INET6 sockets that may switch from IPv4 * to IPv6. */ - if (udp->udp_family != AF_INET) { - *outlenp = 0; + if (connp->conn_family != AF_INET) { return (EAFNOSUPPORT); } if (!checkonly) { - int size; - + mutex_enter(&connp->conn_lock); udp->udp_nat_t_endpoint = onoff; - - udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + - UDPH_SIZE + udp->udp_ip_snd_options_len; - - /* Also, adjust wroff */ - if (onoff) { - udp->udp_max_hdr_len += - sizeof (uint32_t); - } - size = udp->udp_max_hdr_len + - us->us_wroff_extra; - (void) proto_set_tx_wroff(connp->conn_rq, connp, - size); + mutex_exit(&connp->conn_lock); + coa->coa_changed |= COA_HEADER_CHANGED; + coa->coa_changed |= COA_WROFF_CHANGED; } - break; - default: - *outlenp = 0; - return (EINVAL); + /* Fully handled this option. */ + return (0); + case UDP_RCVHDR: + mutex_enter(&connp->conn_lock); + udp->udp_rcvhdr = onoff; + mutex_exit(&connp->conn_lock); + return (0); } break; - default: - *outlenp = 0; - return (EINVAL); - } - /* - * Common case of OK return with outval same as inval. - */ - if (invalp != outvalp) { - /* don't trust bcopy for identical src/dst */ - (void) bcopy(invalp, outvalp, inlen); } - *outlenp = inlen; - return (0); + error = conn_opt_set(coa, level, name, inlen, invalp, + checkonly, cr); + return (error); } +/* + * This routine sets socket options. + */ int -udp_opt_set(conn_t *connp, uint_t optset_context, int level, int name, - uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, - void *thisdg_attrs, cred_t *cr) +udp_opt_set(conn_t *connp, uint_t optset_context, int level, + int name, uint_t inlen, uchar_t *invalp, uint_t *outlenp, + uchar_t *outvalp, void *thisdg_attrs, cred_t *cr) { - int error; + udp_t *udp = connp->conn_udp; + int err; + conn_opt_arg_t coas, *coa; boolean_t checkonly; + udp_stack_t *us = udp->udp_us; - error = 0; switch (optset_context) { case SETFN_OPTCOM_CHECKONLY: checkonly = B_TRUE; @@ -3056,7 +1878,7 @@ udp_opt_set(conn_t *connp, uint_t optset_context, int level, int name, */ if (inlen == 0) { *outlenp = 0; - goto done; + return (0); } break; case SETFN_OPTCOM_NEGOTIATE: @@ -3074,8 +1896,7 @@ udp_opt_set(conn_t *connp, uint_t optset_context, int level, int name, */ if (!udp_opt_allow_udr_set(level, name)) { *outlenp = 0; - error = EINVAL; - goto done; + return (EINVAL); } break; default: @@ -3083,99 +1904,326 @@ udp_opt_set(conn_t *connp, uint_t optset_context, int level, int name, * We should never get here */ *outlenp = 0; - error = EINVAL; - goto done; + return (EINVAL); } ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) || (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0)); - error = udp_do_opt_set(connp, level, name, inlen, invalp, outlenp, - outvalp, cr, thisdg_attrs, checkonly); -done: - return (error); + if (thisdg_attrs != NULL) { + /* Options from T_UNITDATA_REQ */ + coa = (conn_opt_arg_t *)thisdg_attrs; + ASSERT(coa->coa_connp == connp); + ASSERT(coa->coa_ixa != NULL); + ASSERT(coa->coa_ipp != NULL); + ASSERT(coa->coa_ancillary); + } else { + coa = &coas; + coas.coa_connp = connp; + /* Get a reference on conn_ixa to prevent concurrent mods */ + coas.coa_ixa = conn_get_ixa(connp, B_TRUE); + if (coas.coa_ixa == NULL) { + *outlenp = 0; + return (ENOMEM); + } + coas.coa_ipp = &connp->conn_xmit_ipp; + coas.coa_ancillary = B_FALSE; + coas.coa_changed = 0; + } + + err = udp_do_opt_set(coa, level, name, inlen, invalp, + cr, checkonly); + if (err != 0) { +errout: + if (!coa->coa_ancillary) + ixa_refrele(coa->coa_ixa); + *outlenp = 0; + return (err); + } + /* Handle DHCPINIT here outside of lock */ + if (level == IPPROTO_IP && name == IP_DHCPINIT_IF) { + uint_t ifindex; + ill_t *ill; + + ifindex = *(uint_t *)invalp; + if (ifindex == 0) { + ill = NULL; + } else { + ill = ill_lookup_on_ifindex(ifindex, B_FALSE, + coa->coa_ixa->ixa_ipst); + if (ill == NULL) { + err = ENXIO; + goto errout; + } + + mutex_enter(&ill->ill_lock); + if (ill->ill_state_flags & ILL_CONDEMNED) { + mutex_exit(&ill->ill_lock); + ill_refrele(ill); + err = ENXIO; + goto errout; + } + if (IS_VNI(ill)) { + mutex_exit(&ill->ill_lock); + ill_refrele(ill); + err = EINVAL; + goto errout; + } + } + mutex_enter(&connp->conn_lock); + + if (connp->conn_dhcpinit_ill != NULL) { + /* + * We've locked the conn so conn_cleanup_ill() + * cannot clear conn_dhcpinit_ill -- so it's + * safe to access the ill. + */ + ill_t *oill = connp->conn_dhcpinit_ill; + + ASSERT(oill->ill_dhcpinit != 0); + atomic_dec_32(&oill->ill_dhcpinit); + ill_set_inputfn(connp->conn_dhcpinit_ill); + connp->conn_dhcpinit_ill = NULL; + } + + if (ill != NULL) { + connp->conn_dhcpinit_ill = ill; + atomic_inc_32(&ill->ill_dhcpinit); + ill_set_inputfn(ill); + mutex_exit(&connp->conn_lock); + mutex_exit(&ill->ill_lock); + ill_refrele(ill); + } else { + mutex_exit(&connp->conn_lock); + } + } + + /* + * Common case of OK return with outval same as inval. + */ + if (invalp != outvalp) { + /* don't trust bcopy for identical src/dst */ + (void) bcopy(invalp, outvalp, inlen); + } + *outlenp = inlen; + + /* + * If this was not ancillary data, then we rebuild the headers, + * update the IRE/NCE, and IPsec as needed. + * Since the label depends on the destination we go through + * ip_set_destination first. + */ + if (coa->coa_ancillary) { + return (0); + } + + if (coa->coa_changed & COA_ROUTE_CHANGED) { + in6_addr_t saddr, faddr, nexthop; + in_port_t fport; + + /* + * We clear lastdst to make sure we pick up the change + * next time sending. + * If we are connected we re-cache the information. + * We ignore errors to preserve BSD behavior. + * Note that we don't redo IPsec policy lookup here + * since the final destination (or source) didn't change. + */ + mutex_enter(&connp->conn_lock); + connp->conn_v6lastdst = ipv6_all_zeros; + + ip_attr_nexthop(coa->coa_ipp, coa->coa_ixa, + &connp->conn_faddr_v6, &nexthop); + saddr = connp->conn_saddr_v6; + faddr = connp->conn_faddr_v6; + fport = connp->conn_fport; + mutex_exit(&connp->conn_lock); + + if (!IN6_IS_ADDR_UNSPECIFIED(&faddr) && + !IN6_IS_ADDR_V4MAPPED_ANY(&faddr)) { + (void) ip_attr_connect(connp, coa->coa_ixa, + &saddr, &faddr, &nexthop, fport, NULL, NULL, + IPDF_ALLOW_MCBC | IPDF_VERIFY_DST); + } + } + + ixa_refrele(coa->coa_ixa); + + if (coa->coa_changed & COA_HEADER_CHANGED) { + /* + * Rebuild the header template if we are connected. + * Otherwise clear conn_v6lastdst so we rebuild the header + * in the data path. + */ + mutex_enter(&connp->conn_lock); + if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) && + !IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) { + err = udp_build_hdr_template(connp, + &connp->conn_saddr_v6, &connp->conn_faddr_v6, + connp->conn_fport, connp->conn_flowinfo); + if (err != 0) { + mutex_exit(&connp->conn_lock); + return (err); + } + } else { + connp->conn_v6lastdst = ipv6_all_zeros; + } + mutex_exit(&connp->conn_lock); + } + if (coa->coa_changed & COA_RCVBUF_CHANGED) { + (void) proto_set_rx_hiwat(connp->conn_rq, connp, + connp->conn_rcvbuf); + } + if ((coa->coa_changed & COA_SNDBUF_CHANGED) && !IPCL_IS_NONSTR(connp)) { + connp->conn_wq->q_hiwat = connp->conn_sndbuf; + } + if (coa->coa_changed & COA_WROFF_CHANGED) { + /* Increase wroff if needed */ + uint_t wroff; + + mutex_enter(&connp->conn_lock); + wroff = connp->conn_ht_iphc_allocated + us->us_wroff_extra; + if (udp->udp_nat_t_endpoint) + wroff += sizeof (uint32_t); + if (wroff > connp->conn_wroff) { + connp->conn_wroff = wroff; + mutex_exit(&connp->conn_lock); + (void) proto_set_tx_wroff(connp->conn_rq, connp, wroff); + } else { + mutex_exit(&connp->conn_lock); + } + } + return (err); } -/* ARGSUSED */ +/* This routine sets socket options. */ int udp_tpi_opt_set(queue_t *q, uint_t optset_context, int level, int name, uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, - void *thisdg_attrs, cred_t *cr, mblk_t *mblk) + void *thisdg_attrs, cred_t *cr) { - conn_t *connp = Q_TO_CONN(q); + conn_t *connp = Q_TO_CONN(q); int error; - udp_t *udp = connp->conn_udp; - rw_enter(&udp->udp_rwlock, RW_WRITER); error = udp_opt_set(connp, optset_context, level, name, inlen, invalp, outlenp, outvalp, thisdg_attrs, cr); - rw_exit(&udp->udp_rwlock); return (error); } /* - * Update udp_sticky_hdrs based on udp_sticky_ipp, udp_v6src, and udp_ttl. - * The headers include ip6i_t (if needed), ip6_t, any sticky extension - * headers, and the udp header. - * Returns failure if can't allocate memory. + * Setup IP and UDP headers. + * Returns NULL on allocation failure, in which case data_mp is freed. */ -static int -udp_build_hdrs(udp_t *udp) +mblk_t * +udp_prepend_hdr(conn_t *connp, ip_xmit_attr_t *ixa, const ip_pkt_t *ipp, + const in6_addr_t *v6src, const in6_addr_t *v6dst, in_port_t dstport, + uint32_t flowinfo, mblk_t *data_mp, int *errorp) { - udp_stack_t *us = udp->udp_us; - uchar_t *hdrs; - uint_t hdrs_len; - ip6_t *ip6h; - ip6i_t *ip6i; - udpha_t *udpha; - ip6_pkt_t *ipp = &udp->udp_sticky_ipp; - size_t sth_wroff; - conn_t *connp = udp->udp_connp; - - ASSERT(RW_WRITE_HELD(&udp->udp_rwlock)); - ASSERT(connp != NULL); + mblk_t *mp; + udpha_t *udpha; + udp_stack_t *us = connp->conn_netstack->netstack_udp; + uint_t data_len; + uint32_t cksum; + udp_t *udp = connp->conn_udp; + boolean_t insert_spi = udp->udp_nat_t_endpoint; + uint_t ulp_hdr_len; - hdrs_len = ip_total_hdrs_len_v6(ipp) + UDPH_SIZE; - ASSERT(hdrs_len != 0); - if (hdrs_len != udp->udp_sticky_hdrs_len) { - /* Need to reallocate */ - hdrs = kmem_alloc(hdrs_len, KM_NOSLEEP); - if (hdrs == NULL) - return (ENOMEM); + data_len = msgdsize(data_mp); + ulp_hdr_len = UDPH_SIZE; + if (insert_spi) + ulp_hdr_len += sizeof (uint32_t); - if (udp->udp_sticky_hdrs_len != 0) { - kmem_free(udp->udp_sticky_hdrs, - udp->udp_sticky_hdrs_len); - } - udp->udp_sticky_hdrs = hdrs; - udp->udp_sticky_hdrs_len = hdrs_len; + mp = conn_prepend_hdr(ixa, ipp, v6src, v6dst, IPPROTO_UDP, flowinfo, + ulp_hdr_len, data_mp, data_len, us->us_wroff_extra, &cksum, errorp); + if (mp == NULL) { + ASSERT(*errorp != 0); + return (NULL); } - ip_build_hdrs_v6(udp->udp_sticky_hdrs, - udp->udp_sticky_hdrs_len - UDPH_SIZE, ipp, IPPROTO_UDP); - /* Set header fields not in ipp */ - if (ipp->ipp_fields & IPPF_HAS_IP6I) { - ip6i = (ip6i_t *)udp->udp_sticky_hdrs; - ip6h = (ip6_t *)&ip6i[1]; + data_len += ulp_hdr_len; + ixa->ixa_pktlen = data_len + ixa->ixa_ip_hdr_length; + + udpha = (udpha_t *)(mp->b_rptr + ixa->ixa_ip_hdr_length); + udpha->uha_src_port = connp->conn_lport; + udpha->uha_dst_port = dstport; + udpha->uha_checksum = 0; + udpha->uha_length = htons(data_len); + + /* + * If there was a routing option/header then conn_prepend_hdr + * has massaged it and placed the pseudo-header checksum difference + * in the cksum argument. + * + * Setup header length and prepare for ULP checksum done in IP. + * + * We make it easy for IP to include our pseudo header + * by putting our length in uha_checksum. + * The IP source, destination, and length have already been set by + * conn_prepend_hdr. + */ + cksum += data_len; + cksum = (cksum >> 16) + (cksum & 0xFFFF); + ASSERT(cksum < 0x10000); + + if (ixa->ixa_flags & IXAF_IS_IPV4) { + ipha_t *ipha = (ipha_t *)mp->b_rptr; + + ASSERT(ntohs(ipha->ipha_length) == ixa->ixa_pktlen); + + /* IP does the checksum if uha_checksum is non-zero */ + if (us->us_do_checksum) { + if (cksum == 0) + udpha->uha_checksum = 0xffff; + else + udpha->uha_checksum = htons(cksum); + } else { + udpha->uha_checksum = 0; + } } else { - ip6h = (ip6_t *)udp->udp_sticky_hdrs; + ip6_t *ip6h = (ip6_t *)mp->b_rptr; + + ASSERT(ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN == ixa->ixa_pktlen); + if (cksum == 0) + udpha->uha_checksum = 0xffff; + else + udpha->uha_checksum = htons(cksum); } - if (!(ipp->ipp_fields & IPPF_ADDR)) - ip6h->ip6_src = udp->udp_v6src; + /* Insert all-0s SPI now. */ + if (insert_spi) + *((uint32_t *)(udpha + 1)) = 0; - udpha = (udpha_t *)(udp->udp_sticky_hdrs + hdrs_len - UDPH_SIZE); - udpha->uha_src_port = udp->udp_port; + return (mp); +} - /* Try to get everything in a single mblk */ - if (hdrs_len > udp->udp_max_hdr_len) { - udp->udp_max_hdr_len = hdrs_len; - sth_wroff = udp->udp_max_hdr_len + us->us_wroff_extra; - rw_exit(&udp->udp_rwlock); - (void) proto_set_tx_wroff(udp->udp_connp->conn_rq, - udp->udp_connp, sth_wroff); - rw_enter(&udp->udp_rwlock, RW_WRITER); - } +static int +udp_build_hdr_template(conn_t *connp, const in6_addr_t *v6src, + const in6_addr_t *v6dst, in_port_t dstport, uint32_t flowinfo) +{ + udpha_t *udpha; + int error; + + ASSERT(MUTEX_HELD(&connp->conn_lock)); + /* + * We clear lastdst to make sure we don't use the lastdst path + * next time sending since we might not have set v6dst yet. + */ + connp->conn_v6lastdst = ipv6_all_zeros; + + error = conn_build_hdr_template(connp, UDPH_SIZE, 0, v6src, v6dst, + flowinfo); + if (error != 0) + return (error); + + /* + * Any routing header/option has been massaged. The checksum difference + * is stored in conn_sum. + */ + udpha = (udpha_t *)connp->conn_ht_ulp; + udpha->uha_src_port = connp->conn_lport; + udpha->uha_dst_port = dstport; + udpha->uha_checksum = 0; + udpha->uha_length = htons(UDPH_SIZE); /* Filled in later */ return (0); } @@ -3252,189 +2300,6 @@ udp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr) return (0); } -/* - * Copy hop-by-hop option from ipp->ipp_hopopts to the buffer provided (with - * T_opthdr) and return the number of bytes copied. 'dbuf' may be NULL to - * just count the length needed for allocation. If 'dbuf' is non-NULL, - * then it's assumed to be allocated to be large enough. - * - * Returns zero if trimming of the security option causes all options to go - * away. - */ -static size_t -copy_hop_opts(const ip6_pkt_t *ipp, uchar_t *dbuf) -{ - struct T_opthdr *toh; - size_t hol = ipp->ipp_hopoptslen; - ip6_hbh_t *dstopt = NULL; - const ip6_hbh_t *srcopt = ipp->ipp_hopopts; - size_t tlen, olen, plen; - boolean_t deleting; - const struct ip6_opt *sopt, *lastpad; - struct ip6_opt *dopt; - - if ((toh = (struct T_opthdr *)dbuf) != NULL) { - toh->level = IPPROTO_IPV6; - toh->name = IPV6_HOPOPTS; - toh->status = 0; - dstopt = (ip6_hbh_t *)(toh + 1); - } - - /* - * If labeling is enabled, then skip the label option - * but get other options if there are any. - */ - if (is_system_labeled()) { - dopt = NULL; - if (dstopt != NULL) { - /* will fill in ip6h_len later */ - dstopt->ip6h_nxt = srcopt->ip6h_nxt; - dopt = (struct ip6_opt *)(dstopt + 1); - } - sopt = (const struct ip6_opt *)(srcopt + 1); - hol -= sizeof (*srcopt); - tlen = sizeof (*dstopt); - lastpad = NULL; - deleting = B_FALSE; - /* - * This loop finds the first (lastpad pointer) of any number of - * pads that preceeds the security option, then treats the - * security option as though it were a pad, and then finds the - * next non-pad option (or end of list). - * - * It then treats the entire block as one big pad. To preserve - * alignment of any options that follow, or just the end of the - * list, it computes a minimal new padding size that keeps the - * same alignment for the next option. - * - * If it encounters just a sequence of pads with no security - * option, those are copied as-is rather than collapsed. - * - * Note that to handle the end of list case, the code makes one - * loop with 'hol' set to zero. - */ - for (;;) { - if (hol > 0) { - if (sopt->ip6o_type == IP6OPT_PAD1) { - if (lastpad == NULL) - lastpad = sopt; - sopt = (const struct ip6_opt *) - &sopt->ip6o_len; - hol--; - continue; - } - olen = sopt->ip6o_len + sizeof (*sopt); - if (olen > hol) - olen = hol; - if (sopt->ip6o_type == IP6OPT_PADN || - sopt->ip6o_type == ip6opt_ls) { - if (sopt->ip6o_type == ip6opt_ls) - deleting = B_TRUE; - if (lastpad == NULL) - lastpad = sopt; - sopt = (const struct ip6_opt *) - ((const char *)sopt + olen); - hol -= olen; - continue; - } - } else { - /* if nothing was copied at all, then delete */ - if (tlen == sizeof (*dstopt)) - return (0); - /* last pass; pick up any trailing padding */ - olen = 0; - } - if (deleting) { - /* - * compute aligning effect of deleted material - * to reproduce with pad. - */ - plen = ((const char *)sopt - - (const char *)lastpad) & 7; - tlen += plen; - if (dopt != NULL) { - if (plen == 1) { - dopt->ip6o_type = IP6OPT_PAD1; - } else if (plen > 1) { - plen -= sizeof (*dopt); - dopt->ip6o_type = IP6OPT_PADN; - dopt->ip6o_len = plen; - if (plen > 0) - bzero(dopt + 1, plen); - } - dopt = (struct ip6_opt *) - ((char *)dopt + plen); - } - deleting = B_FALSE; - lastpad = NULL; - } - /* if there's uncopied padding, then copy that now */ - if (lastpad != NULL) { - olen += (const char *)sopt - - (const char *)lastpad; - sopt = lastpad; - lastpad = NULL; - } - if (dopt != NULL && olen > 0) { - bcopy(sopt, dopt, olen); - dopt = (struct ip6_opt *)((char *)dopt + olen); - } - if (hol == 0) - break; - tlen += olen; - sopt = (const struct ip6_opt *) - ((const char *)sopt + olen); - hol -= olen; - } - /* go back and patch up the length value, rounded upward */ - if (dstopt != NULL) - dstopt->ip6h_len = (tlen - 1) >> 3; - } else { - tlen = hol; - if (dstopt != NULL) - bcopy(srcopt, dstopt, hol); - } - - tlen += sizeof (*toh); - if (toh != NULL) - toh->len = tlen; - - return (tlen); -} - -/* - * Update udp_rcv_opt_len from the packet. - * Called when options received, and when no options received but - * udp_ip_recv_opt_len has previously recorded options. - */ -static void -udp_save_ip_rcv_opt(udp_t *udp, void *opt, int opt_len) -{ - /* Save the options if any */ - if (opt_len > 0) { - if (opt_len > udp->udp_ip_rcv_options_len) { - /* Need to allocate larger buffer */ - if (udp->udp_ip_rcv_options_len != 0) - mi_free((char *)udp->udp_ip_rcv_options); - udp->udp_ip_rcv_options_len = 0; - udp->udp_ip_rcv_options = - (uchar_t *)mi_alloc(opt_len, BPRI_HI); - if (udp->udp_ip_rcv_options != NULL) - udp->udp_ip_rcv_options_len = opt_len; - } - if (udp->udp_ip_rcv_options_len != 0) { - bcopy(opt, udp->udp_ip_rcv_options, opt_len); - /* Adjust length if we are resusing the space */ - udp->udp_ip_rcv_options_len = opt_len; - } - } else if (udp->udp_ip_rcv_options_len != 0) { - /* Clear out previously recorded options */ - mi_free((char *)udp->udp_ip_rcv_options); - udp->udp_ip_rcv_options = NULL; - udp->udp_ip_rcv_options_len = 0; - } -} - static mblk_t * udp_queue_fallback(udp_t *udp, mblk_t *mp) { @@ -3466,15 +2331,15 @@ udp_queue_fallback(udp_t *udp, mblk_t *mp) * TPI, then we'll queue the mp for later processing. */ static void -udp_ulp_recv(conn_t *connp, mblk_t *mp) +udp_ulp_recv(conn_t *connp, mblk_t *mp, uint_t len, ip_recv_attr_t *ira) { if (IPCL_IS_NONSTR(connp)) { udp_t *udp = connp->conn_udp; int error; + ASSERT(len == msgdsize(mp)); if ((*connp->conn_upcalls->su_recv) - (connp->conn_upper_handle, mp, msgdsize(mp), 0, &error, - NULL) < 0) { + (connp->conn_upper_handle, mp, len, 0, &error, NULL) < 0) { mutex_enter(&udp->udp_recv_lock); if (error == ENOSPC) { /* @@ -3500,282 +2365,170 @@ udp_ulp_recv(conn_t *connp, mblk_t *mp) } ASSERT(MUTEX_NOT_HELD(&udp->udp_recv_lock)); } else { + if (is_system_labeled()) { + ASSERT(ira->ira_cred != NULL); + /* + * Provide for protocols above UDP such as RPC + * NOPID leaves db_cpid unchanged. + */ + mblk_setcred(mp, ira->ira_cred, NOPID); + } + putnext(connp->conn_rq, mp); } } +/* + * This is the inbound data path. + * IP has already pulled up the IP plus UDP headers and verified alignment + * etc. + */ /* ARGSUSED2 */ static void -udp_input(void *arg1, mblk_t *mp, void *arg2) +udp_input(void *arg1, mblk_t *mp, void *arg2, ip_recv_attr_t *ira) { - conn_t *connp = (conn_t *)arg1; + conn_t *connp = (conn_t *)arg1; struct T_unitdata_ind *tudi; uchar_t *rptr; /* Pointer to IP header */ int hdr_length; /* Length of IP+UDP headers */ - int opt_len; int udi_size; /* Size of T_unitdata_ind */ - int mp_len; + int pkt_len; udp_t *udp; udpha_t *udpha; - int ipversion; - ip6_pkt_t ipp; + ip_pkt_t ipps; ip6_t *ip6h; - ip6i_t *ip6i; mblk_t *mp1; - mblk_t *options_mp = NULL; - ip_pktinfo_t *pinfo = NULL; - cred_t *cr = NULL; - pid_t cpid; - uint32_t udp_ip_rcv_options_len; - udp_bits_t udp_bits; - cred_t *rcr = connp->conn_cred; - udp_stack_t *us; + uint32_t udp_ipv4_options_len; + crb_t recv_ancillary; + udp_stack_t *us; ASSERT(connp->conn_flags & IPCL_UDPCONN); udp = connp->conn_udp; us = udp->udp_us; rptr = mp->b_rptr; - ASSERT(DB_TYPE(mp) == M_DATA || DB_TYPE(mp) == M_CTL); + + ASSERT(DB_TYPE(mp) == M_DATA); ASSERT(OK_32PTR(rptr)); + ASSERT(ira->ira_pktlen == msgdsize(mp)); + pkt_len = ira->ira_pktlen; /* - * IP should have prepended the options data in an M_CTL - * Check M_CTL "type" to make sure are not here bcos of - * a valid ICMP message + * Get a snapshot of these and allow other threads to change + * them after that. We need the same recv_ancillary when determining + * the size as when adding the ancillary data items. */ - if (DB_TYPE(mp) == M_CTL) { - if (MBLKL(mp) == sizeof (ip_pktinfo_t) && - ((ip_pktinfo_t *)mp->b_rptr)->ip_pkt_ulp_type == - IN_PKTINFO) { - /* - * IP_RECVIF or IP_RECVSLLA or IPF_RECVADDR information - * has been prepended to the packet by IP. We need to - * extract the mblk and adjust the rptr - */ - pinfo = (ip_pktinfo_t *)mp->b_rptr; - options_mp = mp; - mp = mp->b_cont; - rptr = mp->b_rptr; - UDP_STAT(us, udp_in_pktinfo); - } else { - /* - * ICMP messages. - */ - udp_icmp_error(connp, mp); - return; - } - } + mutex_enter(&connp->conn_lock); + udp_ipv4_options_len = udp->udp_recv_ipp.ipp_ipv4_options_len; + recv_ancillary = connp->conn_recv_ancillary; + mutex_exit(&connp->conn_lock); + + hdr_length = ira->ira_ip_hdr_length; - mp_len = msgdsize(mp); /* - * This is the inbound data path. - * First, we check to make sure the IP version number is correct, - * and then pull the IP and UDP headers into the first mblk. + * IP inspected the UDP header thus all of it must be in the mblk. + * UDP length check is performed for IPv6 packets and IPv4 packets + * to check if the size of the packet as specified + * by the UDP header is the same as the length derived from the IP + * header. */ + udpha = (udpha_t *)(rptr + hdr_length); + if (pkt_len != ntohs(udpha->uha_length) + hdr_length) + goto tossit; - /* Initialize regardless if ipversion is IPv4 or IPv6 */ - ipp.ipp_fields = 0; + hdr_length += UDPH_SIZE; + ASSERT(MBLKL(mp) >= hdr_length); /* IP did a pullup */ - ipversion = IPH_HDR_VERSION(rptr); + /* Initialize regardless of IP version */ + ipps.ipp_fields = 0; - rw_enter(&udp->udp_rwlock, RW_READER); - udp_ip_rcv_options_len = udp->udp_ip_rcv_options_len; - udp_bits = udp->udp_bits; - rw_exit(&udp->udp_rwlock); + if (((ira->ira_flags & IRAF_IPV4_OPTIONS) || + udp_ipv4_options_len > 0) && + connp->conn_family == AF_INET) { + int err; - switch (ipversion) { - case IPV4_VERSION: - ASSERT(MBLKL(mp) >= sizeof (ipha_t)); - ASSERT(((ipha_t *)rptr)->ipha_protocol == IPPROTO_UDP); - hdr_length = IPH_HDR_LENGTH(rptr) + UDPH_SIZE; - opt_len = hdr_length - (IP_SIMPLE_HDR_LENGTH + UDPH_SIZE); - if ((opt_len > 0 || udp_ip_rcv_options_len > 0) && - udp->udp_family == AF_INET) { - /* - * Record/update udp_ip_rcv_options with the lock - * held. Not needed for AF_INET6 sockets - * since they don't support a getsockopt of IP_OPTIONS. - */ - rw_enter(&udp->udp_rwlock, RW_WRITER); - udp_save_ip_rcv_opt(udp, rptr + IP_SIMPLE_HDR_LENGTH, - opt_len); - rw_exit(&udp->udp_rwlock); - } - /* Handle IPV6_RECVPKTINFO even for IPv4 packet. */ - if ((udp->udp_family == AF_INET6) && (pinfo != NULL) && - udp->udp_ip_recvpktinfo) { - if (pinfo->ip_pkt_flags & IPF_RECVIF) { - ipp.ipp_fields |= IPPF_IFINDEX; - ipp.ipp_ifindex = pinfo->ip_pkt_ifindex; - } - } - break; - case IPV6_VERSION: /* - * IPv6 packets can only be received by applications - * that are prepared to receive IPv6 addresses. - * The IP fanout must ensure this. + * Record/update udp_recv_ipp with the lock + * held. Not needed for AF_INET6 sockets + * since they don't support a getsockopt of IP_OPTIONS. */ - ASSERT(udp->udp_family == AF_INET6); + mutex_enter(&connp->conn_lock); + err = ip_find_hdr_v4((ipha_t *)rptr, &udp->udp_recv_ipp, + B_TRUE); + if (err != 0) { + /* Allocation failed. Drop packet */ + mutex_exit(&connp->conn_lock); + freemsg(mp); + BUMP_MIB(&us->us_udp_mib, udpInErrors); + return; + } + mutex_exit(&connp->conn_lock); + } - ip6h = (ip6_t *)rptr; - ASSERT((uchar_t *)&ip6h[1] <= mp->b_wptr); + if (recv_ancillary.crb_all != 0) { + /* + * Record packet information in the ip_pkt_t + */ + if (ira->ira_flags & IRAF_IS_IPV4) { + ASSERT(IPH_HDR_VERSION(rptr) == IPV4_VERSION); + ASSERT(MBLKL(mp) >= sizeof (ipha_t)); + ASSERT(((ipha_t *)rptr)->ipha_protocol == IPPROTO_UDP); + ASSERT(ira->ira_ip_hdr_length == IPH_HDR_LENGTH(rptr)); - if (ip6h->ip6_nxt != IPPROTO_UDP) { + (void) ip_find_hdr_v4((ipha_t *)rptr, &ipps, B_FALSE); + } else { uint8_t nexthdrp; - /* Look for ifindex information */ - if (ip6h->ip6_nxt == IPPROTO_RAW) { - ip6i = (ip6i_t *)ip6h; - if ((uchar_t *)&ip6i[1] > mp->b_wptr) - goto tossit; - - if (ip6i->ip6i_flags & IP6I_IFINDEX) { - ASSERT(ip6i->ip6i_ifindex != 0); - ipp.ipp_fields |= IPPF_IFINDEX; - ipp.ipp_ifindex = ip6i->ip6i_ifindex; - } - rptr = (uchar_t *)&ip6i[1]; - mp->b_rptr = rptr; - if (rptr == mp->b_wptr) { - mp1 = mp->b_cont; - freeb(mp); - mp = mp1; - rptr = mp->b_rptr; - } - if (MBLKL(mp) < (IPV6_HDR_LEN + UDPH_SIZE)) - goto tossit; - ip6h = (ip6_t *)rptr; - mp_len = msgdsize(mp); - } + + ASSERT(IPH_HDR_VERSION(rptr) == IPV6_VERSION); /* - * Find any potentially interesting extension headers - * as well as the length of the IPv6 + extension - * headers. + * IPv6 packets can only be received by applications + * that are prepared to receive IPv6 addresses. + * The IP fanout must ensure this. */ - hdr_length = ip_find_hdr_v6(mp, ip6h, &ipp, &nexthdrp) + - UDPH_SIZE; - ASSERT(nexthdrp == IPPROTO_UDP); - } else { - hdr_length = IPV6_HDR_LEN + UDPH_SIZE; - ip6i = NULL; - } - break; - default: - ASSERT(0); - } + ASSERT(connp->conn_family == AF_INET6); - /* - * IP inspected the UDP header thus all of it must be in the mblk. - * UDP length check is performed for IPv6 packets and IPv4 packets - * to check if the size of the packet as specified - * by the header is the same as the physical size of the packet. - * FIXME? Didn't IP already check this? - */ - udpha = (udpha_t *)(rptr + (hdr_length - UDPH_SIZE)); - if ((MBLKL(mp) < hdr_length) || - (mp_len != (ntohs(udpha->uha_length) + hdr_length - UDPH_SIZE))) { - goto tossit; - } + ip6h = (ip6_t *)rptr; - - /* Walk past the headers unless UDP_RCVHDR was set. */ - if (!udp_bits.udpb_rcvhdr) { - mp->b_rptr = rptr + hdr_length; - mp_len -= hdr_length; + /* We don't care about the length, but need the ipp */ + hdr_length = ip_find_hdr_v6(mp, ip6h, B_TRUE, &ipps, + &nexthdrp); + ASSERT(hdr_length == ira->ira_ip_hdr_length); + /* Restore */ + hdr_length = ira->ira_ip_hdr_length + UDPH_SIZE; + ASSERT(nexthdrp == IPPROTO_UDP); + } } /* * This is the inbound data path. Packets are passed upstream as - * T_UNITDATA_IND messages with full IP headers still attached. + * T_UNITDATA_IND messages. */ - if (udp->udp_family == AF_INET) { + if (connp->conn_family == AF_INET) { sin_t *sin; ASSERT(IPH_HDR_VERSION((ipha_t *)rptr) == IPV4_VERSION); /* * Normally only send up the source address. - * If IP_RECVDSTADDR is set we include the destination IP - * address as an option. With IP_RECVOPTS we include all - * the IP options. + * If any ancillary data items are wanted we add those. */ udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t); - if (udp_bits.udpb_recvdstaddr) { - udi_size += sizeof (struct T_opthdr) + - sizeof (struct in_addr); - UDP_STAT(us, udp_in_recvdstaddr); - } - - if (udp_bits.udpb_ip_recvpktinfo && (pinfo != NULL) && - (pinfo->ip_pkt_flags & IPF_RECVADDR)) { - udi_size += sizeof (struct T_opthdr) + - sizeof (struct in_pktinfo); - UDP_STAT(us, udp_ip_rcvpktinfo); - } - - if ((udp_bits.udpb_recvopts) && opt_len > 0) { - udi_size += sizeof (struct T_opthdr) + opt_len; - UDP_STAT(us, udp_in_recvopts); - } - - /* - * If the IP_RECVSLLA or the IP_RECVIF is set then allocate - * space accordingly - */ - if ((udp_bits.udpb_recvif) && (pinfo != NULL) && - (pinfo->ip_pkt_flags & IPF_RECVIF)) { - udi_size += sizeof (struct T_opthdr) + sizeof (uint_t); - UDP_STAT(us, udp_in_recvif); - } - - if ((udp_bits.udpb_recvslla) && (pinfo != NULL) && - (pinfo->ip_pkt_flags & IPF_RECVSLLA)) { - udi_size += sizeof (struct T_opthdr) + - sizeof (struct sockaddr_dl); - UDP_STAT(us, udp_in_recvslla); - } - - if ((udp_bits.udpb_recvucred) && - (cr = msg_getcred(mp, &cpid)) != NULL) { - udi_size += sizeof (struct T_opthdr) + ucredsize; - UDP_STAT(us, udp_in_recvucred); - } - - /* - * If SO_TIMESTAMP is set allocate the appropriate sized - * buffer. Since gethrestime() expects a pointer aligned - * argument, we allocate space necessary for extra - * alignment (even though it might not be used). - */ - if (udp_bits.udpb_timestamp) { - udi_size += sizeof (struct T_opthdr) + - sizeof (timestruc_t) + _POINTER_ALIGNMENT; - UDP_STAT(us, udp_in_timestamp); - } - - /* - * If IP_RECVTTL is set allocate the appropriate sized buffer - */ - if (udp_bits.udpb_recvttl) { - udi_size += sizeof (struct T_opthdr) + sizeof (uint8_t); - UDP_STAT(us, udp_in_recvttl); + if (recv_ancillary.crb_all != 0) { + udi_size += conn_recvancillary_size(connp, + recv_ancillary, ira, mp, &ipps); } /* Allocate a message block for the T_UNITDATA_IND structure. */ mp1 = allocb(udi_size, BPRI_MED); if (mp1 == NULL) { freemsg(mp); - if (options_mp != NULL) - freeb(options_mp); BUMP_MIB(&us->us_udp_mib, udpInErrors); return; } mp1->b_cont = mp; - mp = mp1; - mp->b_datap->db_type = M_PROTO; - tudi = (struct T_unitdata_ind *)mp->b_rptr; - mp->b_wptr = (uchar_t *)tudi + udi_size; + mp1->b_datap->db_type = M_PROTO; + tudi = (struct T_unitdata_ind *)mp1->b_rptr; + mp1->b_wptr = (uchar_t *)tudi + udi_size; tudi->PRIM_type = T_UNITDATA_IND; tudi->SRC_length = sizeof (sin_t); tudi->SRC_offset = sizeof (struct T_unitdata_ind); @@ -3786,7 +2539,7 @@ udp_input(void *arg1, mblk_t *mp, void *arg2) sin = (sin_t *)&tudi[1]; sin->sin_addr.s_addr = ((ipha_t *)rptr)->ipha_src; sin->sin_port = udpha->uha_src_port; - sin->sin_family = udp->udp_family; + sin->sin_family = connp->conn_family; *(uint32_t *)&sin->sin_zero[0] = 0; *(uint32_t *)&sin->sin_zero[4] = 0; @@ -3795,166 +2548,8 @@ udp_input(void *arg1, mblk_t *mp, void *arg2) * IP_RECVTTL has been set. */ if (udi_size != 0) { - /* - * Copy in destination address before options to avoid - * any padding issues. - */ - char *dstopt; - - dstopt = (char *)&sin[1]; - if (udp_bits.udpb_recvdstaddr) { - struct T_opthdr *toh; - ipaddr_t *dstptr; - - toh = (struct T_opthdr *)dstopt; - toh->level = IPPROTO_IP; - toh->name = IP_RECVDSTADDR; - toh->len = sizeof (struct T_opthdr) + - sizeof (ipaddr_t); - toh->status = 0; - dstopt += sizeof (struct T_opthdr); - dstptr = (ipaddr_t *)dstopt; - *dstptr = ((ipha_t *)rptr)->ipha_dst; - dstopt += sizeof (ipaddr_t); - udi_size -= toh->len; - } - - if (udp_bits.udpb_recvopts && opt_len > 0) { - struct T_opthdr *toh; - - toh = (struct T_opthdr *)dstopt; - toh->level = IPPROTO_IP; - toh->name = IP_RECVOPTS; - toh->len = sizeof (struct T_opthdr) + opt_len; - toh->status = 0; - dstopt += sizeof (struct T_opthdr); - bcopy(rptr + IP_SIMPLE_HDR_LENGTH, dstopt, - opt_len); - dstopt += opt_len; - udi_size -= toh->len; - } - - if ((udp_bits.udpb_ip_recvpktinfo) && (pinfo != NULL) && - (pinfo->ip_pkt_flags & IPF_RECVADDR)) { - struct T_opthdr *toh; - struct in_pktinfo *pktinfop; - - toh = (struct T_opthdr *)dstopt; - toh->level = IPPROTO_IP; - toh->name = IP_PKTINFO; - toh->len = sizeof (struct T_opthdr) + - sizeof (*pktinfop); - toh->status = 0; - dstopt += sizeof (struct T_opthdr); - pktinfop = (struct in_pktinfo *)dstopt; - pktinfop->ipi_ifindex = pinfo->ip_pkt_ifindex; - pktinfop->ipi_spec_dst = - pinfo->ip_pkt_match_addr; - pktinfop->ipi_addr.s_addr = - ((ipha_t *)rptr)->ipha_dst; - - dstopt += sizeof (struct in_pktinfo); - udi_size -= toh->len; - } - - if ((udp_bits.udpb_recvslla) && (pinfo != NULL) && - (pinfo->ip_pkt_flags & IPF_RECVSLLA)) { - - struct T_opthdr *toh; - struct sockaddr_dl *dstptr; - - toh = (struct T_opthdr *)dstopt; - toh->level = IPPROTO_IP; - toh->name = IP_RECVSLLA; - toh->len = sizeof (struct T_opthdr) + - sizeof (struct sockaddr_dl); - toh->status = 0; - dstopt += sizeof (struct T_opthdr); - dstptr = (struct sockaddr_dl *)dstopt; - bcopy(&pinfo->ip_pkt_slla, dstptr, - sizeof (struct sockaddr_dl)); - dstopt += sizeof (struct sockaddr_dl); - udi_size -= toh->len; - } - - if ((udp_bits.udpb_recvif) && (pinfo != NULL) && - (pinfo->ip_pkt_flags & IPF_RECVIF)) { - - struct T_opthdr *toh; - uint_t *dstptr; - - toh = (struct T_opthdr *)dstopt; - toh->level = IPPROTO_IP; - toh->name = IP_RECVIF; - toh->len = sizeof (struct T_opthdr) + - sizeof (uint_t); - toh->status = 0; - dstopt += sizeof (struct T_opthdr); - dstptr = (uint_t *)dstopt; - *dstptr = pinfo->ip_pkt_ifindex; - dstopt += sizeof (uint_t); - udi_size -= toh->len; - } - - if (cr != NULL) { - struct T_opthdr *toh; - - toh = (struct T_opthdr *)dstopt; - toh->level = SOL_SOCKET; - toh->name = SCM_UCRED; - toh->len = sizeof (struct T_opthdr) + ucredsize; - toh->status = 0; - dstopt += sizeof (struct T_opthdr); - (void) cred2ucred(cr, cpid, dstopt, rcr); - dstopt += ucredsize; - udi_size -= toh->len; - } - - if (udp_bits.udpb_timestamp) { - struct T_opthdr *toh; - - toh = (struct T_opthdr *)dstopt; - toh->level = SOL_SOCKET; - toh->name = SCM_TIMESTAMP; - toh->len = sizeof (struct T_opthdr) + - sizeof (timestruc_t) + _POINTER_ALIGNMENT; - toh->status = 0; - dstopt += sizeof (struct T_opthdr); - /* Align for gethrestime() */ - dstopt = (char *)P2ROUNDUP((intptr_t)dstopt, - sizeof (intptr_t)); - gethrestime((timestruc_t *)dstopt); - dstopt = (char *)toh + toh->len; - udi_size -= toh->len; - } - - /* - * CAUTION: - * Due to aligment issues - * Processing of IP_RECVTTL option - * should always be the last. Adding - * any option processing after this will - * cause alignment panic. - */ - if (udp_bits.udpb_recvttl) { - struct T_opthdr *toh; - uint8_t *dstptr; - - toh = (struct T_opthdr *)dstopt; - toh->level = IPPROTO_IP; - toh->name = IP_RECVTTL; - toh->len = sizeof (struct T_opthdr) + - sizeof (uint8_t); - toh->status = 0; - dstopt += sizeof (struct T_opthdr); - dstptr = (uint8_t *)dstopt; - *dstptr = ((ipha_t *)rptr)->ipha_ttl; - dstopt += sizeof (uint8_t); - udi_size -= toh->len; - } - - /* Consumed all of allocated space */ - ASSERT(udi_size == 0); + conn_recvancillary_add(connp, recv_ancillary, ira, + &ipps, (uchar_t *)&sin[1], udi_size); } } else { sin6_t *sin6; @@ -3968,89 +2563,21 @@ udp_input(void *arg1, mblk_t *mp, void *arg2) */ udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t); - if (ipp.ipp_fields & (IPPF_HOPOPTS|IPPF_DSTOPTS|IPPF_RTDSTOPTS| - IPPF_RTHDR|IPPF_IFINDEX)) { - if ((udp_bits.udpb_ipv6_recvhopopts) && - (ipp.ipp_fields & IPPF_HOPOPTS)) { - size_t hlen; - - UDP_STAT(us, udp_in_recvhopopts); - hlen = copy_hop_opts(&ipp, NULL); - if (hlen == 0) - ipp.ipp_fields &= ~IPPF_HOPOPTS; - udi_size += hlen; - } - if (((udp_bits.udpb_ipv6_recvdstopts) || - udp_bits.udpb_old_ipv6_recvdstopts) && - (ipp.ipp_fields & IPPF_DSTOPTS)) { - udi_size += sizeof (struct T_opthdr) + - ipp.ipp_dstoptslen; - UDP_STAT(us, udp_in_recvdstopts); - } - if ((((udp_bits.udpb_ipv6_recvdstopts) && - udp_bits.udpb_ipv6_recvrthdr && - (ipp.ipp_fields & IPPF_RTHDR)) || - (udp_bits.udpb_ipv6_recvrthdrdstopts)) && - (ipp.ipp_fields & IPPF_RTDSTOPTS)) { - udi_size += sizeof (struct T_opthdr) + - ipp.ipp_rtdstoptslen; - UDP_STAT(us, udp_in_recvrtdstopts); - } - if ((udp_bits.udpb_ipv6_recvrthdr) && - (ipp.ipp_fields & IPPF_RTHDR)) { - udi_size += sizeof (struct T_opthdr) + - ipp.ipp_rthdrlen; - UDP_STAT(us, udp_in_recvrthdr); - } - if ((udp_bits.udpb_ip_recvpktinfo) && - (ipp.ipp_fields & IPPF_IFINDEX)) { - udi_size += sizeof (struct T_opthdr) + - sizeof (struct in6_pktinfo); - UDP_STAT(us, udp_in_recvpktinfo); - } - - } - if ((udp_bits.udpb_recvucred) && - (cr = msg_getcred(mp, &cpid)) != NULL) { - udi_size += sizeof (struct T_opthdr) + ucredsize; - UDP_STAT(us, udp_in_recvucred); - } - - /* - * If SO_TIMESTAMP is set allocate the appropriate sized - * buffer. Since gethrestime() expects a pointer aligned - * argument, we allocate space necessary for extra - * alignment (even though it might not be used). - */ - if (udp_bits.udpb_timestamp) { - udi_size += sizeof (struct T_opthdr) + - sizeof (timestruc_t) + _POINTER_ALIGNMENT; - UDP_STAT(us, udp_in_timestamp); - } - - if (udp_bits.udpb_ipv6_recvhoplimit) { - udi_size += sizeof (struct T_opthdr) + sizeof (int); - UDP_STAT(us, udp_in_recvhoplimit); - } - - if (udp_bits.udpb_ipv6_recvtclass) { - udi_size += sizeof (struct T_opthdr) + sizeof (int); - UDP_STAT(us, udp_in_recvtclass); + if (recv_ancillary.crb_all != 0) { + udi_size += conn_recvancillary_size(connp, + recv_ancillary, ira, mp, &ipps); } mp1 = allocb(udi_size, BPRI_MED); if (mp1 == NULL) { freemsg(mp); - if (options_mp != NULL) - freeb(options_mp); BUMP_MIB(&us->us_udp_mib, udpInErrors); return; } mp1->b_cont = mp; - mp = mp1; - mp->b_datap->db_type = M_PROTO; - tudi = (struct T_unitdata_ind *)mp->b_rptr; - mp->b_wptr = (uchar_t *)tudi + udi_size; + mp1->b_datap->db_type = M_PROTO; + tudi = (struct T_unitdata_ind *)mp1->b_rptr; + mp1->b_wptr = (uchar_t *)tudi + udi_size; tudi->PRIM_type = T_UNITDATA_IND; tudi->SRC_length = sizeof (sin6_t); tudi->SRC_offset = sizeof (struct T_unitdata_ind); @@ -4059,7 +2586,7 @@ udp_input(void *arg1, mblk_t *mp, void *arg2) udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin6_t)); tudi->OPT_length = udi_size; sin6 = (sin6_t *)&tudi[1]; - if (ipversion == IPV4_VERSION) { + if (ira->ira_flags & IRAF_IS_IPV4) { in6_addr_t v6dst; IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_src, @@ -4069,196 +2596,43 @@ udp_input(void *arg1, mblk_t *mp, void *arg2) sin6->sin6_flowinfo = 0; sin6->sin6_scope_id = 0; sin6->__sin6_src_id = ip_srcid_find_addr(&v6dst, - connp->conn_zoneid, us->us_netstack); + IPCL_ZONEID(connp), us->us_netstack); } else { + ip6h = (ip6_t *)rptr; + sin6->sin6_addr = ip6h->ip6_src; /* No sin6_flowinfo per API */ sin6->sin6_flowinfo = 0; - /* For link-scope source pass up scope id */ - if ((ipp.ipp_fields & IPPF_IFINDEX) && - IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) - sin6->sin6_scope_id = ipp.ipp_ifindex; + /* For link-scope pass up scope id */ + if (IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) + sin6->sin6_scope_id = ira->ira_ruifindex; else sin6->sin6_scope_id = 0; sin6->__sin6_src_id = ip_srcid_find_addr( - &ip6h->ip6_dst, connp->conn_zoneid, + &ip6h->ip6_dst, IPCL_ZONEID(connp), us->us_netstack); } sin6->sin6_port = udpha->uha_src_port; - sin6->sin6_family = udp->udp_family; + sin6->sin6_family = connp->conn_family; if (udi_size != 0) { - uchar_t *dstopt; - - dstopt = (uchar_t *)&sin6[1]; - if ((udp_bits.udpb_ip_recvpktinfo) && - (ipp.ipp_fields & IPPF_IFINDEX)) { - struct T_opthdr *toh; - struct in6_pktinfo *pkti; - - toh = (struct T_opthdr *)dstopt; - toh->level = IPPROTO_IPV6; - toh->name = IPV6_PKTINFO; - toh->len = sizeof (struct T_opthdr) + - sizeof (*pkti); - toh->status = 0; - dstopt += sizeof (struct T_opthdr); - pkti = (struct in6_pktinfo *)dstopt; - if (ipversion == IPV6_VERSION) - pkti->ipi6_addr = ip6h->ip6_dst; - else - IN6_IPADDR_TO_V4MAPPED( - ((ipha_t *)rptr)->ipha_dst, - &pkti->ipi6_addr); - pkti->ipi6_ifindex = ipp.ipp_ifindex; - dstopt += sizeof (*pkti); - udi_size -= toh->len; - } - if (udp_bits.udpb_ipv6_recvhoplimit) { - struct T_opthdr *toh; - - toh = (struct T_opthdr *)dstopt; - toh->level = IPPROTO_IPV6; - toh->name = IPV6_HOPLIMIT; - toh->len = sizeof (struct T_opthdr) + - sizeof (uint_t); - toh->status = 0; - dstopt += sizeof (struct T_opthdr); - if (ipversion == IPV6_VERSION) - *(uint_t *)dstopt = ip6h->ip6_hops; - else - *(uint_t *)dstopt = - ((ipha_t *)rptr)->ipha_ttl; - dstopt += sizeof (uint_t); - udi_size -= toh->len; - } - if (udp_bits.udpb_ipv6_recvtclass) { - struct T_opthdr *toh; - - toh = (struct T_opthdr *)dstopt; - toh->level = IPPROTO_IPV6; - toh->name = IPV6_TCLASS; - toh->len = sizeof (struct T_opthdr) + - sizeof (uint_t); - toh->status = 0; - dstopt += sizeof (struct T_opthdr); - if (ipversion == IPV6_VERSION) { - *(uint_t *)dstopt = - IPV6_FLOW_TCLASS(ip6h->ip6_flow); - } else { - ipha_t *ipha = (ipha_t *)rptr; - *(uint_t *)dstopt = - ipha->ipha_type_of_service; - } - dstopt += sizeof (uint_t); - udi_size -= toh->len; - } - if ((udp_bits.udpb_ipv6_recvhopopts) && - (ipp.ipp_fields & IPPF_HOPOPTS)) { - size_t hlen; - - hlen = copy_hop_opts(&ipp, dstopt); - dstopt += hlen; - udi_size -= hlen; - } - if ((udp_bits.udpb_ipv6_recvdstopts) && - (udp_bits.udpb_ipv6_recvrthdr) && - (ipp.ipp_fields & IPPF_RTHDR) && - (ipp.ipp_fields & IPPF_RTDSTOPTS)) { - struct T_opthdr *toh; - - toh = (struct T_opthdr *)dstopt; - toh->level = IPPROTO_IPV6; - toh->name = IPV6_DSTOPTS; - toh->len = sizeof (struct T_opthdr) + - ipp.ipp_rtdstoptslen; - toh->status = 0; - dstopt += sizeof (struct T_opthdr); - bcopy(ipp.ipp_rtdstopts, dstopt, - ipp.ipp_rtdstoptslen); - dstopt += ipp.ipp_rtdstoptslen; - udi_size -= toh->len; - } - if ((udp_bits.udpb_ipv6_recvrthdr) && - (ipp.ipp_fields & IPPF_RTHDR)) { - struct T_opthdr *toh; - - toh = (struct T_opthdr *)dstopt; - toh->level = IPPROTO_IPV6; - toh->name = IPV6_RTHDR; - toh->len = sizeof (struct T_opthdr) + - ipp.ipp_rthdrlen; - toh->status = 0; - dstopt += sizeof (struct T_opthdr); - bcopy(ipp.ipp_rthdr, dstopt, ipp.ipp_rthdrlen); - dstopt += ipp.ipp_rthdrlen; - udi_size -= toh->len; - } - if ((udp_bits.udpb_ipv6_recvdstopts) && - (ipp.ipp_fields & IPPF_DSTOPTS)) { - struct T_opthdr *toh; - - toh = (struct T_opthdr *)dstopt; - toh->level = IPPROTO_IPV6; - toh->name = IPV6_DSTOPTS; - toh->len = sizeof (struct T_opthdr) + - ipp.ipp_dstoptslen; - toh->status = 0; - dstopt += sizeof (struct T_opthdr); - bcopy(ipp.ipp_dstopts, dstopt, - ipp.ipp_dstoptslen); - dstopt += ipp.ipp_dstoptslen; - udi_size -= toh->len; - } - if (cr != NULL) { - struct T_opthdr *toh; - - toh = (struct T_opthdr *)dstopt; - toh->level = SOL_SOCKET; - toh->name = SCM_UCRED; - toh->len = sizeof (struct T_opthdr) + ucredsize; - toh->status = 0; - (void) cred2ucred(cr, cpid, &toh[1], rcr); - dstopt += toh->len; - udi_size -= toh->len; - } - if (udp_bits.udpb_timestamp) { - struct T_opthdr *toh; - - toh = (struct T_opthdr *)dstopt; - toh->level = SOL_SOCKET; - toh->name = SCM_TIMESTAMP; - toh->len = sizeof (struct T_opthdr) + - sizeof (timestruc_t) + _POINTER_ALIGNMENT; - toh->status = 0; - dstopt += sizeof (struct T_opthdr); - /* Align for gethrestime() */ - dstopt = (uchar_t *)P2ROUNDUP((intptr_t)dstopt, - sizeof (intptr_t)); - gethrestime((timestruc_t *)dstopt); - dstopt = (uchar_t *)toh + toh->len; - udi_size -= toh->len; - } - - /* Consumed all of allocated space */ - ASSERT(udi_size == 0); + conn_recvancillary_add(connp, recv_ancillary, ira, + &ipps, (uchar_t *)&sin6[1], udi_size); } -#undef sin6 - /* No IP_RECVDSTADDR for IPv6. */ } - BUMP_MIB(&us->us_udp_mib, udpHCInDatagrams); - if (options_mp != NULL) - freeb(options_mp); - - udp_ulp_recv(connp, mp); + /* Walk past the headers unless IP_RECVHDR was set. */ + if (!udp->udp_rcvhdr) { + mp->b_rptr = rptr + hdr_length; + pkt_len -= hdr_length; + } + BUMP_MIB(&us->us_udp_mib, udpHCInDatagrams); + udp_ulp_recv(connp, mp1, pkt_len, ira); return; tossit: freemsg(mp); - if (options_mp != NULL) - freeb(options_mp); BUMP_MIB(&us->us_udp_mib, udpInErrors); } @@ -4386,23 +2760,34 @@ udp_snmp_get(queue_t *q, mblk_t *mpctl) needattr = B_TRUE; break; } + mutex_enter(&connp->conn_lock); + if (udp->udp_state == TS_DATA_XFER && + connp->conn_ixa->ixa_tsl != NULL) { + ts_label_t *tsl; + + tsl = connp->conn_ixa->ixa_tsl; + mlp.tme_flags |= MIB2_TMEF_IS_LABELED; + mlp.tme_doi = label2doi(tsl); + mlp.tme_label = *label2bslabel(tsl); + needattr = B_TRUE; + } + mutex_exit(&connp->conn_lock); /* * Create an IPv4 table entry for IPv4 entries and also * any IPv6 entries which are bound to in6addr_any * (i.e. anything a IPv4 peer could connect/send to). */ - if (udp->udp_ipversion == IPV4_VERSION || + if (connp->conn_ipversion == IPV4_VERSION || (udp->udp_state <= TS_IDLE && - IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src))) { + IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6))) { ude.udpEntryInfo.ue_state = state; /* * If in6addr_any this will set it to * INADDR_ANY */ - ude.udpLocalAddress = - V4_PART_OF_V6(udp->udp_v6src); - ude.udpLocalPort = ntohs(udp->udp_port); + ude.udpLocalAddress = connp->conn_laddr_v4; + ude.udpLocalPort = ntohs(connp->conn_lport); if (udp->udp_state == TS_DATA_XFER) { /* * Can potentially get here for @@ -4414,9 +2799,9 @@ udp_snmp_get(queue_t *q, mblk_t *mpctl) * this part of the code. */ ude.udpEntryInfo.ue_RemoteAddress = - V4_PART_OF_V6(udp->udp_v6dst); + connp->conn_faddr_v4; ude.udpEntryInfo.ue_RemotePort = - ntohs(udp->udp_dstport); + ntohs(connp->conn_fport); } else { ude.udpEntryInfo.ue_RemoteAddress = 0; ude.udpEntryInfo.ue_RemotePort = 0; @@ -4429,10 +2814,10 @@ udp_snmp_get(queue_t *q, mblk_t *mpctl) */ ude.udpInstance = (uint32_t)(uintptr_t)udp; ude.udpCreationProcess = - (udp->udp_open_pid < 0) ? + (connp->conn_cpid < 0) ? MIB2_UNKNOWN_PROCESS : - udp->udp_open_pid; - ude.udpCreationTime = udp->udp_open_time; + connp->conn_cpid; + ude.udpCreationTime = connp->conn_open_time; (void) snmp_append_data2(mp_conn_ctl->b_cont, &mp_conn_tail, (char *)&ude, sizeof (ude)); @@ -4442,16 +2827,24 @@ udp_snmp_get(queue_t *q, mblk_t *mpctl) mp_attr_ctl->b_cont, &mp_attr_tail, (char *)&mlp, sizeof (mlp)); } - if (udp->udp_ipversion == IPV6_VERSION) { + if (connp->conn_ipversion == IPV6_VERSION) { ude6.udp6EntryInfo.ue_state = state; - ude6.udp6LocalAddress = udp->udp_v6src; - ude6.udp6LocalPort = ntohs(udp->udp_port); - ude6.udp6IfIndex = udp->udp_bound_if; + ude6.udp6LocalAddress = connp->conn_laddr_v6; + ude6.udp6LocalPort = ntohs(connp->conn_lport); + mutex_enter(&connp->conn_lock); + if (connp->conn_ixa->ixa_flags & + IXAF_SCOPEID_SET) { + ude6.udp6IfIndex = + connp->conn_ixa->ixa_scopeid; + } else { + ude6.udp6IfIndex = connp->conn_bound_if; + } + mutex_exit(&connp->conn_lock); if (udp->udp_state == TS_DATA_XFER) { ude6.udp6EntryInfo.ue_RemoteAddress = - udp->udp_v6dst; + connp->conn_faddr_v6; ude6.udp6EntryInfo.ue_RemotePort = - ntohs(udp->udp_dstport); + ntohs(connp->conn_fport); } else { ude6.udp6EntryInfo.ue_RemoteAddress = sin6_null.sin6_addr; @@ -4464,10 +2857,10 @@ udp_snmp_get(queue_t *q, mblk_t *mpctl) */ ude6.udp6Instance = (uint32_t)(uintptr_t)udp; ude6.udp6CreationProcess = - (udp->udp_open_pid < 0) ? + (connp->conn_cpid < 0) ? MIB2_UNKNOWN_PROCESS : - udp->udp_open_pid; - ude6.udp6CreationTime = udp->udp_open_time; + connp->conn_cpid; + ude6.udp6CreationTime = connp->conn_open_time; (void) snmp_append_data2(mp6_conn_ctl->b_cont, &mp6_conn_tail, (char *)&ude6, @@ -4548,39 +2941,34 @@ udp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name, * passed in mp. This message is freed. */ static void -udp_ud_err(queue_t *q, mblk_t *mp, uchar_t *destaddr, t_scalar_t destlen, - t_scalar_t err) +udp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err) { struct T_unitdata_req *tudr; mblk_t *mp1; + uchar_t *destaddr; + t_scalar_t destlen; uchar_t *optaddr; t_scalar_t optlen; - if (DB_TYPE(mp) == M_DATA) { - ASSERT(destaddr != NULL && destlen != 0); - optaddr = NULL; - optlen = 0; - } else { - if ((mp->b_wptr < mp->b_rptr) || - (MBLKL(mp)) < sizeof (struct T_unitdata_req)) { - goto done; - } - tudr = (struct T_unitdata_req *)mp->b_rptr; - destaddr = mp->b_rptr + tudr->DEST_offset; - if (destaddr < mp->b_rptr || destaddr >= mp->b_wptr || - destaddr + tudr->DEST_length < mp->b_rptr || - destaddr + tudr->DEST_length > mp->b_wptr) { - goto done; - } - optaddr = mp->b_rptr + tudr->OPT_offset; - if (optaddr < mp->b_rptr || optaddr >= mp->b_wptr || - optaddr + tudr->OPT_length < mp->b_rptr || - optaddr + tudr->OPT_length > mp->b_wptr) { - goto done; - } - destlen = tudr->DEST_length; - optlen = tudr->OPT_length; + if ((mp->b_wptr < mp->b_rptr) || + (MBLKL(mp)) < sizeof (struct T_unitdata_req)) { + goto done; } + tudr = (struct T_unitdata_req *)mp->b_rptr; + destaddr = mp->b_rptr + tudr->DEST_offset; + if (destaddr < mp->b_rptr || destaddr >= mp->b_wptr || + destaddr + tudr->DEST_length < mp->b_rptr || + destaddr + tudr->DEST_length > mp->b_wptr) { + goto done; + } + optaddr = mp->b_rptr + tudr->OPT_offset; + if (optaddr < mp->b_rptr || optaddr >= mp->b_wptr || + optaddr + tudr->OPT_length < mp->b_rptr || + optaddr + tudr->OPT_length > mp->b_wptr) { + goto done; + } + destlen = tudr->DEST_length; + optlen = tudr->OPT_length; mp1 = mi_tpi_uderror_ind((char *)destaddr, destlen, (char *)optaddr, optlen, err); @@ -4685,1093 +3073,721 @@ retry: return (port); } +/* + * Handle T_UNITDATA_REQ with options. Both IPv4 and IPv6 + * Either tudr_mp or msg is set. If tudr_mp we take ancillary data from + * the TPI options, otherwise we take them from msg_control. + * If both sin and sin6 is set it is a connected socket and we use conn_faddr. + * Always consumes mp; never consumes tudr_mp. + */ static int -udp_update_label(queue_t *wq, mblk_t *mp, ipaddr_t dst) +udp_output_ancillary(conn_t *connp, sin_t *sin, sin6_t *sin6, mblk_t *mp, + mblk_t *tudr_mp, struct nmsghdr *msg, cred_t *cr, pid_t pid) { - int err; - cred_t *cred; - cred_t *orig_cred = NULL; - cred_t *effective_cred = NULL; - uchar_t opt_storage[IP_MAX_OPT_LENGTH]; - udp_t *udp = Q_TO_UDP(wq); + udp_t *udp = connp->conn_udp; udp_stack_t *us = udp->udp_us; + int error; + ip_xmit_attr_t *ixa; + ip_pkt_t *ipp; + in6_addr_t v6src; + in6_addr_t v6dst; + in6_addr_t v6nexthop; + in_port_t dstport; + uint32_t flowinfo; + uint_t srcid; + int is_absreq_failure = 0; + conn_opt_arg_t coas, *coa; - /* - * All Solaris components should pass a db_credp - * for this message, hence we ASSERT. - * On production kernels we return an error to be robust against - * random streams modules sitting on top of us. - */ - cred = orig_cred = msg_getcred(mp, NULL); - ASSERT(cred != NULL); - if (cred == NULL) - return (EINVAL); + ASSERT(tudr_mp != NULL || msg != NULL); /* - * Verify the destination is allowed to receive packets at - * the security label of the message data. tsol_check_dest() - * may create a new effective cred for this message with a - * modified label or label flags. Note that we use the cred/label - * from the message to handle MLP + * Get ixa before checking state to handle a disconnect race. + * + * We need an exclusive copy of conn_ixa since the ancillary data + * options might modify it. That copy has no pointers hence we + * need to set them up once we've parsed the ancillary data. */ - if ((err = tsol_check_dest(cred, &dst, IPV4_VERSION, - udp->udp_connp->conn_mac_mode, &effective_cred)) != 0) - goto done; - if (effective_cred != NULL) - cred = effective_cred; + ixa = conn_get_ixa_exclusive(connp); + if (ixa == NULL) { + BUMP_MIB(&us->us_udp_mib, udpOutErrors); + freemsg(mp); + return (ENOMEM); + } + ASSERT(cr != NULL); + ixa->ixa_cred = cr; + ixa->ixa_cpid = pid; + if (is_system_labeled()) { + /* We need to restart with a label based on the cred */ + ip_xmit_attr_restore_tsl(ixa, ixa->ixa_cred); + } - /* - * Calculate the security label to be placed in the text - * of the message (if any). - */ - if ((err = tsol_compute_label(cred, dst, opt_storage, - us->us_netstack->netstack_ip)) != 0) - goto done; + /* In case previous destination was multicast or multirt */ + ip_attr_newdst(ixa); - /* - * Insert the security label in the cached ip options, - * removing any old label that may exist. - */ - if ((err = tsol_update_options(&udp->udp_ip_snd_options, - &udp->udp_ip_snd_options_len, &udp->udp_label_len, - opt_storage)) != 0) + /* Get a copy of conn_xmit_ipp since the options might change it */ + ipp = kmem_zalloc(sizeof (*ipp), KM_NOSLEEP); + if (ipp == NULL) { + ixa_refrele(ixa); + BUMP_MIB(&us->us_udp_mib, udpOutErrors); + freemsg(mp); + return (ENOMEM); + } + mutex_enter(&connp->conn_lock); + error = ip_pkt_copy(&connp->conn_xmit_ipp, ipp, KM_NOSLEEP); + mutex_exit(&connp->conn_lock); + if (error != 0) { + BUMP_MIB(&us->us_udp_mib, udpOutErrors); + freemsg(mp); goto done; + } /* - * Save the destination address and creds we used to - * generate the security label text. + * Parse the options and update ixa and ipp as a result. + * Note that ixa_tsl can be updated if SCM_UCRED. + * ixa_refrele/ixa_inactivate will release any reference on ixa_tsl. */ - if (cred != udp->udp_effective_cred) { - if (udp->udp_effective_cred != NULL) - crfree(udp->udp_effective_cred); - crhold(cred); - udp->udp_effective_cred = cred; - } - if (orig_cred != udp->udp_last_cred) { - if (udp->udp_last_cred != NULL) - crfree(udp->udp_last_cred); - crhold(orig_cred); - udp->udp_last_cred = orig_cred; - } -done: - if (effective_cred != NULL) - crfree(effective_cred); - if (err != 0) { - DTRACE_PROBE4( - tx__ip__log__info__updatelabel__udp, - char *, "queue(1) failed to update options(2) on mp(3)", - queue_t *, wq, char *, opt_storage, mblk_t *, mp); - } - return (err); -} + coa = &coas; + coa->coa_connp = connp; + coa->coa_ixa = ixa; + coa->coa_ipp = ipp; + coa->coa_ancillary = B_TRUE; + coa->coa_changed = 0; -static mblk_t * -udp_output_v4(conn_t *connp, mblk_t *mp, ipaddr_t v4dst, uint16_t port, - uint_t srcid, int *error, boolean_t insert_spi, struct nmsghdr *msg, - cred_t *cr, pid_t pid) -{ - udp_t *udp = connp->conn_udp; - mblk_t *mp1 = mp; - mblk_t *mp2; - ipha_t *ipha; - int ip_hdr_length; - uint32_t ip_len; - udpha_t *udpha; - boolean_t lock_held = B_FALSE; - in_port_t uha_src_port; - udpattrs_t attrs; - uchar_t ip_snd_opt[IP_MAX_OPT_LENGTH]; - uint32_t ip_snd_opt_len = 0; - ip4_pkt_t pktinfo; - ip4_pkt_t *pktinfop = &pktinfo; - ip_opt_info_t optinfo; - ip_stack_t *ipst = connp->conn_netstack->netstack_ip; - udp_stack_t *us = udp->udp_us; - ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; - queue_t *q = connp->conn_wq; - ire_t *ire; - in6_addr_t v6dst; - boolean_t update_lastdst = B_FALSE; - - *error = 0; - pktinfop->ip4_ill_index = 0; - pktinfop->ip4_addr = INADDR_ANY; - optinfo.ip_opt_flags = 0; - optinfo.ip_opt_ill_index = 0; + if (msg != NULL) { + error = process_auxiliary_options(connp, msg->msg_control, + msg->msg_controllen, coa, &udp_opt_obj, udp_opt_set, cr); + } else { + struct T_unitdata_req *tudr; - if (v4dst == INADDR_ANY) - v4dst = htonl(INADDR_LOOPBACK); + tudr = (struct T_unitdata_req *)tudr_mp->b_rptr; + ASSERT(tudr->PRIM_type == T_UNITDATA_REQ); + error = tpi_optcom_buf(connp->conn_wq, tudr_mp, + &tudr->OPT_length, tudr->OPT_offset, cr, &udp_opt_obj, + coa, &is_absreq_failure); + } + if (error != 0) { + /* + * Note: No special action needed in this + * module for "is_absreq_failure" + */ + freemsg(mp); + BUMP_MIB(&us->us_udp_mib, udpOutErrors); + goto done; + } + ASSERT(is_absreq_failure == 0); + mutex_enter(&connp->conn_lock); /* - * If options passed in, feed it for verification and handling + * If laddr is unspecified then we look at sin6_src_id. + * We will give precedence to a source address set with IPV6_PKTINFO + * (aka IPPF_ADDR) but that is handled in build_hdrs. However, we don't + * want ip_attr_connect to select a source (since it can fail) when + * IPV6_PKTINFO is specified. + * If this doesn't result in a source address then we get a source + * from ip_attr_connect() below. */ - attrs.udpattr_credset = B_FALSE; - if (IPCL_IS_NONSTR(connp)) { - if (msg->msg_controllen != 0) { - attrs.udpattr_ipp4 = pktinfop; - attrs.udpattr_mb = mp; - - rw_enter(&udp->udp_rwlock, RW_WRITER); - *error = process_auxiliary_options(connp, - msg->msg_control, msg->msg_controllen, - &attrs, &udp_opt_obj, udp_opt_set, cr); - rw_exit(&udp->udp_rwlock); - if (*error) - goto done; + v6src = connp->conn_saddr_v6; + if (sin != NULL) { + IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &v6dst); + dstport = sin->sin_port; + flowinfo = 0; + ixa->ixa_flags &= ~IXAF_SCOPEID_SET; + ixa->ixa_flags |= IXAF_IS_IPV4; + } else if (sin6 != NULL) { + v6dst = sin6->sin6_addr; + dstport = sin6->sin6_port; + flowinfo = sin6->sin6_flowinfo; + srcid = sin6->__sin6_src_id; + if (IN6_IS_ADDR_LINKSCOPE(&v6dst) && sin6->sin6_scope_id != 0) { + ixa->ixa_scopeid = sin6->sin6_scope_id; + ixa->ixa_flags |= IXAF_SCOPEID_SET; + } else { + ixa->ixa_flags &= ~IXAF_SCOPEID_SET; } - } else { - if (DB_TYPE(mp) != M_DATA) { - mp1 = mp->b_cont; - if (((struct T_unitdata_req *) - mp->b_rptr)->OPT_length != 0) { - attrs.udpattr_ipp4 = pktinfop; - attrs.udpattr_mb = mp; - if (udp_unitdata_opt_process(q, mp, error, - &attrs) < 0) - goto done; - /* - * Note: success in processing options. - * mp option buffer represented by - * OPT_length/offset now potentially modified - * and contain option setting results - */ - ASSERT(*error == 0); - } + if (srcid != 0 && IN6_IS_ADDR_UNSPECIFIED(&v6src)) { + ip_srcid_find_id(srcid, &v6src, IPCL_ZONEID(connp), + connp->conn_netstack); } + if (IN6_IS_ADDR_V4MAPPED(&v6dst)) + ixa->ixa_flags |= IXAF_IS_IPV4; + else + ixa->ixa_flags &= ~IXAF_IS_IPV4; + } else { + /* Connected case */ + v6dst = connp->conn_faddr_v6; + dstport = connp->conn_fport; + flowinfo = connp->conn_flowinfo; } + mutex_exit(&connp->conn_lock); - /* mp1 points to the M_DATA mblk carrying the packet */ - ASSERT(mp1 != NULL && DB_TYPE(mp1) == M_DATA); - - /* - * Determine whether we need to mark the mblk with the user's - * credentials. - * If labeled then sockfs would have already done this. - */ - ASSERT(!is_system_labeled() || msg_getcred(mp, NULL) != NULL); - - ire = connp->conn_ire_cache; - if (CLASSD(v4dst) || (ire == NULL) || (ire->ire_addr != v4dst) || - (ire->ire_type & (IRE_BROADCAST | IRE_LOCAL | IRE_LOOPBACK))) { - if (cr != NULL && msg_getcred(mp, NULL) == NULL) - mblk_setcred(mp, cr, pid); + /* Handle IPV6_PKTINFO setting source address. */ + if (IN6_IS_ADDR_UNSPECIFIED(&v6src) && + (ipp->ipp_fields & IPPF_ADDR)) { + if (ixa->ixa_flags & IXAF_IS_IPV4) { + if (IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) + v6src = ipp->ipp_addr; + } else { + if (!IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) + v6src = ipp->ipp_addr; + } } - rw_enter(&udp->udp_rwlock, RW_READER); - lock_held = B_TRUE; + ip_attr_nexthop(ipp, ixa, &v6dst, &v6nexthop); + error = ip_attr_connect(connp, ixa, &v6src, &v6dst, &v6nexthop, dstport, + &v6src, NULL, IPDF_ALLOW_MCBC | IPDF_VERIFY_DST | IPDF_IPSEC); - /* - * Cluster and TSOL note: - * udp.udp_v6lastdst is shared by Cluster and TSOL - * udp.udp_lastdstport is used by Cluster - * - * Both Cluster and TSOL need to update the dest addr and/or port. - * Updating is done after both Cluster and TSOL checks, protected - * by conn_lock. - */ - mutex_enter(&connp->conn_lock); - - if (cl_inet_connect2 != NULL && - (!IN6_IS_ADDR_V4MAPPED(&udp->udp_v6lastdst) || - V4_PART_OF_V6(udp->udp_v6lastdst) != v4dst || - udp->udp_lastdstport != port)) { - mutex_exit(&connp->conn_lock); - *error = 0; - IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); - CL_INET_UDP_CONNECT(connp, udp, B_TRUE, &v6dst, port, *error); - if (*error != 0) { - *error = EHOSTUNREACH; - goto done; + switch (error) { + case 0: + break; + case EADDRNOTAVAIL: + /* + * IXAF_VERIFY_SOURCE tells us to pick a better source. + * Don't have the application see that errno + */ + error = ENETUNREACH; + goto failed; + case ENETDOWN: + /* + * Have !ipif_addr_ready address; drop packet silently + * until we can get applications to not send until we + * are ready. + */ + error = 0; + goto failed; + case EHOSTUNREACH: + case ENETUNREACH: + if (ixa->ixa_ire != NULL) { + /* + * Let conn_ip_output/ire_send_noroute return + * the error and send any local ICMP error. + */ + error = 0; + break; } - update_lastdst = B_TRUE; - mutex_enter(&connp->conn_lock); + /* FALLTHRU */ + default: + failed: + freemsg(mp); + BUMP_MIB(&us->us_udp_mib, udpOutErrors); + goto done; } /* - * Check if our saved options are valid; update if not. - * TSOL Note: Since we are not in WRITER mode, UDP packets - * to different destination may require different labels, - * or worse, UDP packets to same IP address may require - * different labels due to use of shared all-zones address. - * We use conn_lock to ensure that lastdst, ip_snd_options, - * and ip_snd_options_len are consistent for the current - * destination and are updated atomically. + * We might be going to a different destination than last time, + * thus check that TX allows the communication and compute any + * needed label. + * + * TSOL Note: We have an exclusive ipp and ixa for this thread so we + * don't have to worry about concurrent threads. */ if (is_system_labeled()) { - cred_t *credp; - pid_t cpid; - /* Using UDP MLP requires SCM_UCRED from user */ if (connp->conn_mlp_type != mlptSingle && - !attrs.udpattr_credset) { - mutex_exit(&connp->conn_lock); - DTRACE_PROBE4( - tx__ip__log__info__output__udp, - char *, "MLP mp(1) lacks SCM_UCRED attr(2) on q(3)", - mblk_t *, mp, udpattrs_t *, &attrs, queue_t *, q); - *error = EINVAL; + !((ixa->ixa_flags & IXAF_UCRED_TSL))) { + BUMP_MIB(&us->us_udp_mib, udpOutErrors); + error = ECONNREFUSED; + freemsg(mp); goto done; } /* - * Update label option for this UDP socket if - * - the destination has changed, - * - the UDP socket is MLP, or - * - the cred attached to the mblk changed. + * Check whether Trusted Solaris policy allows communication + * with this host, and pretend that the destination is + * unreachable if not. + * Compute any needed label and place it in ipp_label_v4/v6. + * + * Later conn_build_hdr_template/conn_prepend_hdr takes + * ipp_label_v4/v6 to form the packet. + * + * Tsol note: We have ipp structure local to this thread so + * no locking is needed. */ - credp = msg_getcred(mp, &cpid); - if (!IN6_IS_ADDR_V4MAPPED(&udp->udp_v6lastdst) || - V4_PART_OF_V6(udp->udp_v6lastdst) != v4dst || - connp->conn_mlp_type != mlptSingle || - credp != udp->udp_last_cred) { - if ((*error = udp_update_label(q, mp, v4dst)) != 0) { - mutex_exit(&connp->conn_lock); - goto done; - } - update_lastdst = B_TRUE; + error = conn_update_label(connp, ixa, &v6dst, ipp); + if (error != 0) { + freemsg(mp); + BUMP_MIB(&us->us_udp_mib, udpOutErrors); + goto done; } - - /* - * Attach the effective cred to the mblk to ensure future - * routing decisions will be based on it's label. - */ - mblk_setcred(mp, udp->udp_effective_cred, cpid); } - if (update_lastdst) { - IN6_IPADDR_TO_V4MAPPED(v4dst, &udp->udp_v6lastdst); - udp->udp_lastdstport = port; + mp = udp_prepend_hdr(connp, ixa, ipp, &v6src, &v6dst, dstport, + flowinfo, mp, &error); + if (mp == NULL) { + ASSERT(error != 0); + BUMP_MIB(&us->us_udp_mib, udpOutErrors); + goto done; } - if (udp->udp_ip_snd_options_len > 0) { - ip_snd_opt_len = udp->udp_ip_snd_options_len; - bcopy(udp->udp_ip_snd_options, ip_snd_opt, ip_snd_opt_len); + if (ixa->ixa_pktlen > IP_MAXPACKET) { + error = EMSGSIZE; + BUMP_MIB(&us->us_udp_mib, udpOutErrors); + freemsg(mp); + goto done; } - mutex_exit(&connp->conn_lock); + /* We're done. Pass the packet to ip. */ + BUMP_MIB(&us->us_udp_mib, udpHCOutDatagrams); - /* Add an IP header */ - ip_hdr_length = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + ip_snd_opt_len + - (insert_spi ? sizeof (uint32_t) : 0); - ipha = (ipha_t *)&mp1->b_rptr[-ip_hdr_length]; - if (DB_REF(mp1) != 1 || (uchar_t *)ipha < DB_BASE(mp1) || - !OK_32PTR(ipha)) { - mp2 = allocb(ip_hdr_length + us->us_wroff_extra, BPRI_LO); - if (mp2 == NULL) { - TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, - "udp_wput_end: q %p (%S)", q, "allocbfail2"); - *error = ENOMEM; - goto done; - } - mp2->b_wptr = DB_LIM(mp2); - mp2->b_cont = mp1; - mp1 = mp2; - if (DB_TYPE(mp) != M_DATA) - mp->b_cont = mp1; - else - mp = mp1; - ipha = (ipha_t *)(mp1->b_wptr - ip_hdr_length); - } - ip_hdr_length -= (UDPH_SIZE + (insert_spi ? sizeof (uint32_t) : 0)); -#ifdef _BIG_ENDIAN - /* Set version, header length, and tos */ - *(uint16_t *)&ipha->ipha_version_and_hdr_length = - ((((IP_VERSION << 4) | (ip_hdr_length>>2)) << 8) | - udp->udp_type_of_service); - /* Set ttl and protocol */ - *(uint16_t *)&ipha->ipha_ttl = (udp->udp_ttl << 8) | IPPROTO_UDP; -#else - /* Set version, header length, and tos */ - *(uint16_t *)&ipha->ipha_version_and_hdr_length = - ((udp->udp_type_of_service << 8) | - ((IP_VERSION << 4) | (ip_hdr_length>>2))); - /* Set ttl and protocol */ - *(uint16_t *)&ipha->ipha_ttl = (IPPROTO_UDP << 8) | udp->udp_ttl; -#endif - if (pktinfop->ip4_addr != INADDR_ANY) { - ipha->ipha_src = pktinfop->ip4_addr; - optinfo.ip_opt_flags = IP_VERIFY_SRC; - } else { + error = conn_ip_output(mp, ixa); + /* No udpOutErrors if an error since IP increases its error counter */ + switch (error) { + case 0: + break; + case EWOULDBLOCK: + (void) ixa_check_drain_insert(connp, ixa); + error = 0; + break; + case EADDRNOTAVAIL: /* - * Copy our address into the packet. If this is zero, - * first look at __sin6_src_id for a hint. If we leave the - * source as INADDR_ANY then ip will fill in the real source - * address. + * IXAF_VERIFY_SOURCE tells us to pick a better source. + * Don't have the application see that errno */ - IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6src, ipha->ipha_src); - if (srcid != 0 && ipha->ipha_src == INADDR_ANY) { - in6_addr_t v6src; - - ip_srcid_find_id(srcid, &v6src, connp->conn_zoneid, - us->us_netstack); - IN6_V4MAPPED_TO_IPADDR(&v6src, ipha->ipha_src); - } - } - uha_src_port = udp->udp_port; - if (ip_hdr_length == IP_SIMPLE_HDR_LENGTH) { - rw_exit(&udp->udp_rwlock); - lock_held = B_FALSE; - } - - if (pktinfop->ip4_ill_index != 0) { - optinfo.ip_opt_ill_index = pktinfop->ip4_ill_index; + error = ENETUNREACH; + /* FALLTHRU */ + default: + mutex_enter(&connp->conn_lock); + /* + * Clear the source and v6lastdst so we call ip_attr_connect + * for the next packet and try to pick a better source. + */ + if (connp->conn_mcbc_bind) + connp->conn_saddr_v6 = ipv6_all_zeros; + else + connp->conn_saddr_v6 = connp->conn_bound_addr_v6; + connp->conn_v6lastdst = ipv6_all_zeros; + mutex_exit(&connp->conn_lock); + break; } +done: + ixa_refrele(ixa); + ip_pkt_free(ipp); + kmem_free(ipp, sizeof (*ipp)); + return (error); +} - ipha->ipha_fragment_offset_and_flags = 0; - ipha->ipha_ident = 0; - - mp1->b_rptr = (uchar_t *)ipha; - - ASSERT((uintptr_t)(mp1->b_wptr - (uchar_t *)ipha) <= - (uintptr_t)UINT_MAX); +/* + * Handle sending an M_DATA for a connected socket. + * Handles both IPv4 and IPv6. + */ +static int +udp_output_connected(conn_t *connp, mblk_t *mp, cred_t *cr, pid_t pid) +{ + udp_t *udp = connp->conn_udp; + udp_stack_t *us = udp->udp_us; + int error; + ip_xmit_attr_t *ixa; - /* Determine length of packet */ - ip_len = (uint32_t)(mp1->b_wptr - (uchar_t *)ipha); - if ((mp2 = mp1->b_cont) != NULL) { - do { - ASSERT((uintptr_t)MBLKL(mp2) <= (uintptr_t)UINT_MAX); - ip_len += (uint32_t)MBLKL(mp2); - } while ((mp2 = mp2->b_cont) != NULL); - } /* - * If the size of the packet is greater than the maximum allowed by - * ip, return an error. Passing this down could cause panics because - * the size will have wrapped and be inconsistent with the msg size. + * If no other thread is using conn_ixa this just gets a reference to + * conn_ixa. Otherwise we get a safe copy of conn_ixa. */ - if (ip_len > IP_MAXPACKET) { - TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, - "udp_wput_end: q %p (%S)", q, "IP length exceeded"); - *error = EMSGSIZE; - goto done; + ixa = conn_get_ixa(connp, B_FALSE); + if (ixa == NULL) { + BUMP_MIB(&us->us_udp_mib, udpOutErrors); + freemsg(mp); + return (ENOMEM); } - ipha->ipha_length = htons((uint16_t)ip_len); - ip_len -= ip_hdr_length; - ip_len = htons((uint16_t)ip_len); - udpha = (udpha_t *)(((uchar_t *)ipha) + ip_hdr_length); - - /* Insert all-0s SPI now. */ - if (insert_spi) - *((uint32_t *)(udpha + 1)) = 0; - /* - * Copy in the destination address - */ - ipha->ipha_dst = v4dst; - - /* - * Set ttl based on IP_MULTICAST_TTL to match IPv6 logic. - */ - if (CLASSD(v4dst)) - ipha->ipha_ttl = udp->udp_multicast_ttl; - - udpha->uha_dst_port = port; - udpha->uha_src_port = uha_src_port; + ASSERT(cr != NULL); + ixa->ixa_cred = cr; + ixa->ixa_cpid = pid; - if (ip_snd_opt_len > 0) { - uint32_t cksum; + mutex_enter(&connp->conn_lock); + mp = udp_prepend_header_template(connp, ixa, mp, &connp->conn_saddr_v6, + connp->conn_fport, connp->conn_flowinfo, &error); - bcopy(ip_snd_opt, &ipha[1], ip_snd_opt_len); - lock_held = B_FALSE; - rw_exit(&udp->udp_rwlock); - /* - * Massage source route putting first source route in ipha_dst. - * Ignore the destination in T_unitdata_req. - * Create a checksum adjustment for a source route, if any. - */ - cksum = ip_massage_options(ipha, us->us_netstack); - cksum = (cksum & 0xFFFF) + (cksum >> 16); - cksum -= ((ipha->ipha_dst >> 16) & 0xFFFF) + - (ipha->ipha_dst & 0xFFFF); - if ((int)cksum < 0) - cksum--; - cksum = (cksum & 0xFFFF) + (cksum >> 16); - /* - * IP does the checksum if uha_checksum is non-zero, - * We make it easy for IP to include our pseudo header - * by putting our length in uha_checksum. - */ - cksum += ip_len; - cksum = (cksum & 0xFFFF) + (cksum >> 16); - /* There might be a carry. */ - cksum = (cksum & 0xFFFF) + (cksum >> 16); -#ifdef _LITTLE_ENDIAN - if (us->us_do_checksum) - ip_len = (cksum << 16) | ip_len; -#else - if (us->us_do_checksum) - ip_len = (ip_len << 16) | cksum; - else - ip_len <<= 16; -#endif - } else { - /* - * IP does the checksum if uha_checksum is non-zero, - * We make it easy for IP to include our pseudo header - * by putting our length in uha_checksum. - */ - if (us->us_do_checksum) - ip_len |= (ip_len << 16); -#ifndef _LITTLE_ENDIAN - else - ip_len <<= 16; -#endif + if (mp == NULL) { + ASSERT(error != 0); + mutex_exit(&connp->conn_lock); + ixa_refrele(ixa); + BUMP_MIB(&us->us_udp_mib, udpOutErrors); + freemsg(mp); + return (error); } - ASSERT(!lock_held); - /* Set UDP length and checksum */ - *((uint32_t *)&udpha->uha_length) = ip_len; - if (DB_TYPE(mp) != M_DATA) { - cred_t *cr; - pid_t cpid; + /* + * In case we got a safe copy of conn_ixa, or if opt_set made us a new + * safe copy, then we need to fill in any pointers in it. + */ + if (ixa->ixa_ire == NULL) { + in6_addr_t faddr, saddr; + in6_addr_t nexthop; + in_port_t fport; + + saddr = connp->conn_saddr_v6; + faddr = connp->conn_faddr_v6; + fport = connp->conn_fport; + ip_attr_nexthop(&connp->conn_xmit_ipp, ixa, &faddr, &nexthop); + mutex_exit(&connp->conn_lock); - /* Move any cred from the T_UNITDATA_REQ to the packet */ - cr = msg_extractcred(mp, &cpid); - if (cr != NULL) { - if (mp1->b_datap->db_credp != NULL) - crfree(mp1->b_datap->db_credp); - mp1->b_datap->db_credp = cr; - mp1->b_datap->db_cpid = cpid; + error = ip_attr_connect(connp, ixa, &saddr, &faddr, &nexthop, + fport, NULL, NULL, IPDF_ALLOW_MCBC | IPDF_VERIFY_DST | + IPDF_IPSEC); + switch (error) { + case 0: + break; + case EADDRNOTAVAIL: + /* + * IXAF_VERIFY_SOURCE tells us to pick a better source. + * Don't have the application see that errno + */ + error = ENETUNREACH; + goto failed; + case ENETDOWN: + /* + * Have !ipif_addr_ready address; drop packet silently + * until we can get applications to not send until we + * are ready. + */ + error = 0; + goto failed; + case EHOSTUNREACH: + case ENETUNREACH: + if (ixa->ixa_ire != NULL) { + /* + * Let conn_ip_output/ire_send_noroute return + * the error and send any local ICMP error. + */ + error = 0; + break; + } + /* FALLTHRU */ + default: + failed: + ixa_refrele(ixa); + freemsg(mp); + BUMP_MIB(&us->us_udp_mib, udpOutErrors); + return (error); } - ASSERT(mp != mp1); - freeb(mp); + } else { + /* Done with conn_t */ + mutex_exit(&connp->conn_lock); } - - /* mp has been consumed and we'll return success */ - ASSERT(*error == 0); - mp = NULL; + ASSERT(ixa->ixa_ire != NULL); /* We're done. Pass the packet to ip. */ BUMP_MIB(&us->us_udp_mib, udpHCOutDatagrams); - TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, - "udp_wput_end: q %p (%S)", q, "end"); - - if ((connp->conn_flags & IPCL_CHECK_POLICY) != 0 || - CONN_OUTBOUND_POLICY_PRESENT(connp, ipss) || - connp->conn_dontroute || - connp->conn_outgoing_ill != NULL || optinfo.ip_opt_flags != 0 || - optinfo.ip_opt_ill_index != 0 || - ipha->ipha_version_and_hdr_length != IP_SIMPLE_HDR_VERSION || - IPP_ENABLED(IPP_LOCAL_OUT, ipst) || - ipst->ips_ip_g_mrouter != NULL) { - UDP_STAT(us, udp_ip_send); - ip_output_options(connp, mp1, connp->conn_wq, IP_WPUT, - &optinfo); - } else { - udp_send_data(udp, connp->conn_wq, mp1, ipha); - } -done: - if (lock_held) - rw_exit(&udp->udp_rwlock); - if (*error != 0) { - ASSERT(mp != NULL); - BUMP_MIB(&us->us_udp_mib, udpOutErrors); + error = conn_ip_output(mp, ixa); + /* No udpOutErrors if an error since IP increases its error counter */ + switch (error) { + case 0: + break; + case EWOULDBLOCK: + (void) ixa_check_drain_insert(connp, ixa); + error = 0; + break; + case EADDRNOTAVAIL: + /* + * IXAF_VERIFY_SOURCE tells us to pick a better source. + * Don't have the application see that errno + */ + error = ENETUNREACH; + break; } - return (mp); + ixa_refrele(ixa); + return (error); } -static void -udp_send_data(udp_t *udp, queue_t *q, mblk_t *mp, ipha_t *ipha) +/* + * Handle sending an M_DATA to the last destination. + * Handles both IPv4 and IPv6. + * + * NOTE: The caller must hold conn_lock and we drop it here. + */ +static int +udp_output_lastdst(conn_t *connp, mblk_t *mp, cred_t *cr, pid_t pid, + ip_xmit_attr_t *ixa) { - conn_t *connp = udp->udp_connp; - ipaddr_t src, dst; - ire_t *ire; - ipif_t *ipif = NULL; - mblk_t *ire_fp_mp; - boolean_t retry_caching; - udp_stack_t *us = udp->udp_us; - ip_stack_t *ipst = connp->conn_netstack->netstack_ip; - - dst = ipha->ipha_dst; - src = ipha->ipha_src; - ASSERT(ipha->ipha_ident == 0); - - if (CLASSD(dst)) { - int err; - - ipif = conn_get_held_ipif(connp, - &connp->conn_multicast_ipif, &err); - - if (ipif == NULL || ipif->ipif_isv6 || - (ipif->ipif_ill->ill_phyint->phyint_flags & - PHYI_LOOPBACK)) { - if (ipif != NULL) - ipif_refrele(ipif); - UDP_STAT(us, udp_ip_send); - ip_output(connp, mp, q, IP_WPUT); - return; - } - } + udp_t *udp = connp->conn_udp; + udp_stack_t *us = udp->udp_us; + int error; - retry_caching = B_FALSE; - mutex_enter(&connp->conn_lock); - ire = connp->conn_ire_cache; - ASSERT(!(connp->conn_state_flags & CONN_INCIPIENT)); + ASSERT(MUTEX_HELD(&connp->conn_lock)); + ASSERT(ixa != NULL); - if (ire == NULL || ire->ire_addr != dst || - (ire->ire_marks & IRE_MARK_CONDEMNED)) { - retry_caching = B_TRUE; - } else if (CLASSD(dst) && (ire->ire_type & IRE_CACHE)) { - ill_t *stq_ill = (ill_t *)ire->ire_stq->q_ptr; + ASSERT(cr != NULL); + ixa->ixa_cred = cr; + ixa->ixa_cpid = pid; - ASSERT(ipif != NULL); - if (!IS_ON_SAME_LAN(stq_ill, ipif->ipif_ill)) - retry_caching = B_TRUE; - } + mp = udp_prepend_header_template(connp, ixa, mp, &connp->conn_v6lastsrc, + connp->conn_lastdstport, connp->conn_lastflowinfo, &error); - if (!retry_caching) { - ASSERT(ire != NULL); - IRE_REFHOLD(ire); + if (mp == NULL) { + ASSERT(error != 0); mutex_exit(&connp->conn_lock); - } else { - boolean_t cached = B_FALSE; + ixa_refrele(ixa); + BUMP_MIB(&us->us_udp_mib, udpOutErrors); + freemsg(mp); + return (error); + } - connp->conn_ire_cache = NULL; + /* + * In case we got a safe copy of conn_ixa, or if opt_set made us a new + * safe copy, then we need to fill in any pointers in it. + */ + if (ixa->ixa_ire == NULL) { + in6_addr_t lastdst, lastsrc; + in6_addr_t nexthop; + in_port_t lastport; + + lastsrc = connp->conn_v6lastsrc; + lastdst = connp->conn_v6lastdst; + lastport = connp->conn_lastdstport; + ip_attr_nexthop(&connp->conn_xmit_ipp, ixa, &lastdst, &nexthop); mutex_exit(&connp->conn_lock); - /* Release the old ire */ - if (ire != NULL) { - IRE_REFRELE_NOTR(ire); - ire = NULL; - } - - if (CLASSD(dst)) { - ASSERT(ipif != NULL); - ire = ire_ctable_lookup(dst, 0, 0, ipif, - connp->conn_zoneid, msg_getlabel(mp), - MATCH_IRE_ILL, ipst); - } else { - ASSERT(ipif == NULL); - ire = ire_cache_lookup(dst, connp->conn_zoneid, - msg_getlabel(mp), ipst); - } - - if (ire == NULL) { - if (ipif != NULL) - ipif_refrele(ipif); - UDP_STAT(us, udp_ire_null); - ip_output(connp, mp, q, IP_WPUT); - return; - } - IRE_REFHOLD_NOTR(ire); - - mutex_enter(&connp->conn_lock); - if (CONN_CACHE_IRE(connp) && connp->conn_ire_cache == NULL && - !(ire->ire_marks & IRE_MARK_CONDEMNED)) { - irb_t *irb = ire->ire_bucket; - + error = ip_attr_connect(connp, ixa, &lastsrc, &lastdst, + &nexthop, lastport, NULL, NULL, IPDF_ALLOW_MCBC | + IPDF_VERIFY_DST | IPDF_IPSEC); + switch (error) { + case 0: + break; + case EADDRNOTAVAIL: /* - * IRE's created for non-connection oriented transports - * are normally initialized with IRE_MARK_TEMPORARY set - * in the ire_marks. These IRE's are preferentially - * reaped when the hash chain length in the cache - * bucket exceeds the maximum value specified in - * ip[6]_ire_max_bucket_cnt. This can severely affect - * UDP performance if IRE cache entries that we need - * to reuse are continually removed. To remedy this, - * when we cache the IRE in the conn_t, we remove the - * IRE_MARK_TEMPORARY bit from the ire_marks if it was - * set. + * IXAF_VERIFY_SOURCE tells us to pick a better source. + * Don't have the application see that errno */ - if (ire->ire_marks & IRE_MARK_TEMPORARY) { - rw_enter(&irb->irb_lock, RW_WRITER); - if (ire->ire_marks & IRE_MARK_TEMPORARY) { - ire->ire_marks &= ~IRE_MARK_TEMPORARY; - irb->irb_tmp_ire_cnt--; - } - rw_exit(&irb->irb_lock); + error = ENETUNREACH; + goto failed; + case ENETDOWN: + /* + * Have !ipif_addr_ready address; drop packet silently + * until we can get applications to not send until we + * are ready. + */ + error = 0; + goto failed; + case EHOSTUNREACH: + case ENETUNREACH: + if (ixa->ixa_ire != NULL) { + /* + * Let conn_ip_output/ire_send_noroute return + * the error and send any local ICMP error. + */ + error = 0; + break; } - connp->conn_ire_cache = ire; - cached = B_TRUE; + /* FALLTHRU */ + default: + failed: + ixa_refrele(ixa); + freemsg(mp); + BUMP_MIB(&us->us_udp_mib, udpOutErrors); + return (error); } + } else { + /* Done with conn_t */ mutex_exit(&connp->conn_lock); - - /* - * We can continue to use the ire but since it was not - * cached, we should drop the extra reference. - */ - if (!cached) - IRE_REFRELE_NOTR(ire); } - ASSERT(ire != NULL && ire->ire_ipversion == IPV4_VERSION); - ASSERT(!CLASSD(dst) || ipif != NULL); - /* - * Check if we can take the fast-path. - * Note that "incomplete" ire's (where the link-layer for next hop - * is not resolved, or where the fast-path header in nce_fp_mp is not - * available yet) are sent down the legacy (slow) path - */ - if ((ire->ire_type & (IRE_BROADCAST|IRE_LOCAL|IRE_LOOPBACK)) || - (ire->ire_flags & RTF_MULTIRT) || (ire->ire_stq == NULL) || - (ire->ire_max_frag < ntohs(ipha->ipha_length)) || - ((ire->ire_nce == NULL) || - ((ire_fp_mp = ire->ire_nce->nce_fp_mp) == NULL)) || - connp->conn_nexthop_set || (MBLKL(ire_fp_mp) > MBLKHEAD(mp))) { - if (ipif != NULL) - ipif_refrele(ipif); - UDP_STAT(us, udp_ip_ire_send); - IRE_REFRELE(ire); - ip_output(connp, mp, q, IP_WPUT); - return; - } + /* We're done. Pass the packet to ip. */ + BUMP_MIB(&us->us_udp_mib, udpHCOutDatagrams); - if (src == INADDR_ANY && !connp->conn_unspec_src) { - if (CLASSD(dst) && !(ire->ire_flags & RTF_SETSRC)) - ipha->ipha_src = ipif->ipif_src_addr; + error = conn_ip_output(mp, ixa); + /* No udpOutErrors if an error since IP increases its error counter */ + switch (error) { + case 0: + break; + case EWOULDBLOCK: + (void) ixa_check_drain_insert(connp, ixa); + error = 0; + break; + case EADDRNOTAVAIL: + /* + * IXAF_VERIFY_SOURCE tells us to pick a better source. + * Don't have the application see that errno + */ + error = ENETUNREACH; + /* FALLTHRU */ + default: + mutex_enter(&connp->conn_lock); + /* + * Clear the source and v6lastdst so we call ip_attr_connect + * for the next packet and try to pick a better source. + */ + if (connp->conn_mcbc_bind) + connp->conn_saddr_v6 = ipv6_all_zeros; else - ipha->ipha_src = ire->ire_src_addr; + connp->conn_saddr_v6 = connp->conn_bound_addr_v6; + connp->conn_v6lastdst = ipv6_all_zeros; + mutex_exit(&connp->conn_lock); + break; } - - if (ipif != NULL) - ipif_refrele(ipif); - - udp_xmit(connp->conn_wq, mp, ire, connp, connp->conn_zoneid); + ixa_refrele(ixa); + return (error); } -static void -udp_xmit(queue_t *q, mblk_t *mp, ire_t *ire, conn_t *connp, zoneid_t zoneid) + +/* + * Prepend the header template and then fill in the source and + * flowinfo. The caller needs to handle the destination address since + * it's setting is different if rthdr or source route. + * + * Returns NULL is allocation failed or if the packet would exceed IP_MAXPACKET. + * When it returns NULL it sets errorp. + */ +static mblk_t * +udp_prepend_header_template(conn_t *connp, ip_xmit_attr_t *ixa, mblk_t *mp, + const in6_addr_t *v6src, in_port_t dstport, uint32_t flowinfo, int *errorp) { - ipaddr_t src, dst; - ill_t *ill; - mblk_t *ire_fp_mp; - uint_t ire_fp_mp_len; - uint16_t *up; - uint32_t cksum, hcksum_txflags; - queue_t *dev_q; - udp_t *udp = connp->conn_udp; - ipha_t *ipha = (ipha_t *)mp->b_rptr; + udp_t *udp = connp->conn_udp; udp_stack_t *us = udp->udp_us; - ip_stack_t *ipst = connp->conn_netstack->netstack_ip; - boolean_t ll_multicast = B_FALSE; - boolean_t direct_send; - - dev_q = ire->ire_stq->q_next; - ASSERT(dev_q != NULL); + boolean_t insert_spi = udp->udp_nat_t_endpoint; + uint_t pktlen; + uint_t alloclen; + uint_t copylen; + uint8_t *iph; + uint_t ip_hdr_length; + udpha_t *udpha; + uint32_t cksum; + ip_pkt_t *ipp; - ill = ire_to_ill(ire); - ASSERT(ill != NULL); + ASSERT(MUTEX_HELD(&connp->conn_lock)); /* - * For the direct send case, if resetting of conn_direct_blocked - * was missed, it is still ok because the putq() would enable - * the queue and write service will drain it out. + * Copy the header template and leave space for an SPI */ - direct_send = ILL_DIRECT_CAPABLE(ill); - - /* is queue flow controlled? */ - if ((!direct_send) && (q->q_first != NULL || connp->conn_draining || - DEV_Q_FLOW_BLOCKED(dev_q))) { - BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsHCOutRequests); - BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards); - if (ipst->ips_ip_output_queue) { - DTRACE_PROBE1(udp__xmit__putq, conn_t *, connp); - (void) putq(connp->conn_wq, mp); - } else { - freemsg(mp); - } - ire_refrele(ire); - return; - } - - ire_fp_mp = ire->ire_nce->nce_fp_mp; - ire_fp_mp_len = MBLKL(ire_fp_mp); - ASSERT(MBLKHEAD(mp) >= ire_fp_mp_len); - - dst = ipha->ipha_dst; - src = ipha->ipha_src; - - - BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCOutRequests); - - ipha->ipha_ident = (uint16_t)atomic_add_32_nv(&ire->ire_ident, 1); -#ifndef _BIG_ENDIAN - ipha->ipha_ident = (ipha->ipha_ident << 8) | (ipha->ipha_ident >> 8); -#endif - - if (ILL_HCKSUM_CAPABLE(ill) && dohwcksum) { - ASSERT(ill->ill_hcksum_capab != NULL); - hcksum_txflags = ill->ill_hcksum_capab->ill_hcksum_txflags; - } else { - hcksum_txflags = 0; - } - - /* pseudo-header checksum (do it in parts for IP header checksum) */ - cksum = (dst >> 16) + (dst & 0xFFFF) + (src >> 16) + (src & 0xFFFF); - - ASSERT(ipha->ipha_version_and_hdr_length == IP_SIMPLE_HDR_VERSION); - up = IPH_UDPH_CHECKSUMP(ipha, IP_SIMPLE_HDR_LENGTH); - if (*up != 0) { - IP_CKSUM_XMIT_FAST(ire->ire_ipversion, hcksum_txflags, - mp, ipha, up, IPPROTO_UDP, IP_SIMPLE_HDR_LENGTH, - ntohs(ipha->ipha_length), cksum); - - /* Software checksum? */ - if (DB_CKSUMFLAGS(mp) == 0) { - UDP_STAT(us, udp_out_sw_cksum); - UDP_STAT_UPDATE(us, udp_out_sw_cksum_bytes, - ntohs(ipha->ipha_length) - IP_SIMPLE_HDR_LENGTH); - } - } - - if (!CLASSD(dst)) { - ipha->ipha_fragment_offset_and_flags |= - (uint32_t)htons(ire->ire_frag_flag); - } - - /* Calculate IP header checksum if hardware isn't capable */ - if (!(DB_CKSUMFLAGS(mp) & HCK_IPV4_HDRCKSUM)) { - IP_HDR_CKSUM(ipha, cksum, ((uint32_t *)ipha)[0], - ((uint16_t *)ipha)[4]); + copylen = connp->conn_ht_iphc_len; + alloclen = copylen + (insert_spi ? sizeof (uint32_t) : 0); + pktlen = alloclen + msgdsize(mp); + if (pktlen > IP_MAXPACKET) { + freemsg(mp); + *errorp = EMSGSIZE; + return (NULL); } + ixa->ixa_pktlen = pktlen; - if (CLASSD(dst)) { - if (ilm_lookup_ill(ill, dst, ALL_ZONES) != NULL) { - ip_multicast_loopback(q, ill, mp, - connp->conn_multicast_loop ? 0 : - IP_FF_NO_MCAST_LOOP, zoneid); - } + /* check/fix buffer config, setup pointers into it */ + iph = mp->b_rptr - alloclen; + if (DB_REF(mp) != 1 || iph < DB_BASE(mp) || !OK_32PTR(iph)) { + mblk_t *mp1; - /* If multicast TTL is 0 then we are done */ - if (ipha->ipha_ttl == 0) { + mp1 = allocb(alloclen + us->us_wroff_extra, BPRI_MED); + if (mp1 == NULL) { freemsg(mp); - ire_refrele(ire); - return; + *errorp = ENOMEM; + return (NULL); } - ll_multicast = B_TRUE; + mp1->b_wptr = DB_LIM(mp1); + mp1->b_cont = mp; + mp = mp1; + iph = (mp->b_wptr - alloclen); } + mp->b_rptr = iph; + bcopy(connp->conn_ht_iphc, iph, copylen); + ip_hdr_length = (uint_t)(connp->conn_ht_ulp - connp->conn_ht_iphc); - ASSERT(DB_TYPE(ire_fp_mp) == M_DATA); - mp->b_rptr = (uchar_t *)ipha - ire_fp_mp_len; - bcopy(ire_fp_mp->b_rptr, mp->b_rptr, ire_fp_mp_len); - - UPDATE_OB_PKT_COUNT(ire); - ire->ire_last_used_time = lbolt; - - BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCOutTransmits); - UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCOutOctets, - ntohs(ipha->ipha_length)); + ixa->ixa_ip_hdr_length = ip_hdr_length; + udpha = (udpha_t *)(iph + ip_hdr_length); - DTRACE_PROBE4(ip4__physical__out__start, - ill_t *, NULL, ill_t *, ill, ipha_t *, ipha, mblk_t *, mp); - FW_HOOKS(ipst->ips_ip4_physical_out_event, - ipst->ips_ipv4firewall_physical_out, NULL, ill, ipha, mp, mp, - ll_multicast, ipst); - DTRACE_PROBE1(ip4__physical__out__end, mblk_t *, mp); - if (ipst->ips_ip4_observe.he_interested && mp != NULL) { - zoneid_t szone; - - /* - * Both of these functions expect b_rptr to be - * where the IP header starts, so advance past the - * link layer header if present. - */ - mp->b_rptr += ire_fp_mp_len; - szone = ip_get_zoneid_v4(ipha->ipha_src, mp, - ipst, ALL_ZONES); - ipobs_hook(mp, IPOBS_HOOK_OUTBOUND, szone, - ALL_ZONES, ill, ipst); - mp->b_rptr -= ire_fp_mp_len; - } + /* + * Setup header length and prepare for ULP checksum done in IP. + * udp_build_hdr_template has already massaged any routing header + * and placed the result in conn_sum. + * + * We make it easy for IP to include our pseudo header + * by putting our length in uha_checksum. + */ + cksum = pktlen - ip_hdr_length; + udpha->uha_length = htons(cksum); - if (mp == NULL) - goto bail; + cksum += connp->conn_sum; + cksum = (cksum >> 16) + (cksum & 0xFFFF); + ASSERT(cksum < 0x10000); - DTRACE_IP7(send, mblk_t *, mp, conn_t *, NULL, - void_ip_t *, ipha, __dtrace_ipsr_ill_t *, ill, - ipha_t *, ipha, ip6_t *, NULL, int, 0); + ipp = &connp->conn_xmit_ipp; + if (ixa->ixa_flags & IXAF_IS_IPV4) { + ipha_t *ipha = (ipha_t *)iph; - if (direct_send) { - uintptr_t cookie; - ill_dld_direct_t *idd = &ill->ill_dld_capab->idc_direct; + ipha->ipha_length = htons((uint16_t)pktlen); - cookie = idd->idd_tx_df(idd->idd_tx_dh, mp, - (uintptr_t)connp, 0); - if (cookie != NULL) { - idl_tx_list_t *idl_txl; + /* IP does the checksum if uha_checksum is non-zero */ + if (us->us_do_checksum) + udpha->uha_checksum = htons(cksum); - /* - * Flow controlled. - */ - DTRACE_PROBE2(non__null__cookie, uintptr_t, - cookie, conn_t *, connp); - idl_txl = &ipst->ips_idl_tx_list[IDLHASHINDEX(cookie)]; - mutex_enter(&idl_txl->txl_lock); - /* - * Check again after holding txl_lock to see if Tx - * ring is still blocked and only then insert the - * connp into the drain list. - */ - if (connp->conn_direct_blocked || - (idd->idd_tx_fctl_df(idd->idd_tx_fctl_dh, - cookie) == 0)) { - mutex_exit(&idl_txl->txl_lock); - goto bail; - } - if (idl_txl->txl_cookie != NULL && - idl_txl->txl_cookie != cookie) { - DTRACE_PROBE2(udp__xmit__collision, - uintptr_t, cookie, - uintptr_t, idl_txl->txl_cookie); - UDP_STAT(us, udp_cookie_coll); - } else { - connp->conn_direct_blocked = B_TRUE; - idl_txl->txl_cookie = cookie; - conn_drain_insert(connp, idl_txl); - DTRACE_PROBE1(udp__xmit__insert, - conn_t *, connp); - } - mutex_exit(&idl_txl->txl_lock); + /* if IP_PKTINFO specified an addres it wins over bind() */ + if ((ipp->ipp_fields & IPPF_ADDR) && + IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) { + ASSERT(ipp->ipp_addr_v4 != INADDR_ANY); + ipha->ipha_src = ipp->ipp_addr_v4; + } else { + IN6_V4MAPPED_TO_IPADDR(v6src, ipha->ipha_src); } } else { - DTRACE_PROBE1(udp__xmit__putnext, mblk_t *, mp); - putnext(ire->ire_stq, mp); - } -bail: - IRE_REFRELE(ire); -} + ip6_t *ip6h = (ip6_t *)iph; -static boolean_t -udp_update_label_v6(queue_t *wq, mblk_t *mp, in6_addr_t *dst) -{ - udp_t *udp = Q_TO_UDP(wq); - int err; - cred_t *cred; - cred_t *orig_cred; - cred_t *effective_cred = NULL; - uchar_t opt_storage[TSOL_MAX_IPV6_OPTION]; - udp_stack_t *us = udp->udp_us; - - /* - * All Solaris components should pass a db_credp - * for this message, hence we ASSERT. - * On production kernels we return an error to be robust against - * random streams modules sitting on top of us. - */ - cred = orig_cred = msg_getcred(mp, NULL); - ASSERT(cred != NULL); - if (cred == NULL) - return (EINVAL); - - /* - * Verify the destination is allowed to receive packets at - * the security label of the message data. tsol_check_dest() - * may create a new effective cred for this message with a - * modified label or label flags. Note that we use the - * cred/label from the message to handle MLP. - */ - if ((err = tsol_check_dest(cred, dst, IPV6_VERSION, - udp->udp_connp->conn_mac_mode, &effective_cred)) != 0) - goto done; - if (effective_cred != NULL) - cred = effective_cred; - - /* - * Calculate the security label to be placed in the text - * of the message (if any). - */ - if ((err = tsol_compute_label_v6(cred, dst, opt_storage, - us->us_netstack->netstack_ip)) != 0) - goto done; - - /* - * Insert the security label in the cached ip options, - * removing any old label that may exist. - */ - if ((err = tsol_update_sticky(&udp->udp_sticky_ipp, - &udp->udp_label_len_v6, opt_storage)) != 0) - goto done; + ip6h->ip6_plen = htons((uint16_t)(pktlen - IPV6_HDR_LEN)); + udpha->uha_checksum = htons(cksum); - /* - * Save the destination address and cred we used to - * generate the security label text. - */ - if (cred != udp->udp_effective_cred) { - if (udp->udp_effective_cred != NULL) - crfree(udp->udp_effective_cred); - crhold(cred); - udp->udp_effective_cred = cred; - } - if (orig_cred != udp->udp_last_cred) { - if (udp->udp_last_cred != NULL) - crfree(udp->udp_last_cred); - crhold(orig_cred); - udp->udp_last_cred = orig_cred; + /* if IP_PKTINFO specified an addres it wins over bind() */ + if ((ipp->ipp_fields & IPPF_ADDR) && + !IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) { + ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&ipp->ipp_addr)); + ip6h->ip6_src = ipp->ipp_addr; + } else { + ip6h->ip6_src = *v6src; + } + ip6h->ip6_vcf = + (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) | + (flowinfo & ~IPV6_VERS_AND_FLOW_MASK); + if (ipp->ipp_fields & IPPF_TCLASS) { + /* Overrides the class part of flowinfo */ + ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf, + ipp->ipp_tclass); + } } -done: - if (effective_cred != NULL) - crfree(effective_cred); + /* Insert all-0s SPI now. */ + if (insert_spi) + *((uint32_t *)(udpha + 1)) = 0; - if (err != 0) { - DTRACE_PROBE4( - tx__ip__log__drop__updatelabel__udp6, - char *, "queue(1) failed to update options(2) on mp(3)", - queue_t *, wq, char *, opt_storage, mblk_t *, mp); - } - return (err); + udpha->uha_dst_port = dstport; + return (mp); } -static int -udp_send_connected(conn_t *connp, mblk_t *mp, struct nmsghdr *msg, cred_t *cr, - pid_t pid) +/* + * Send a T_UDERR_IND in response to an M_DATA + */ +static void +udp_ud_err_connected(conn_t *connp, t_scalar_t error) { - udp_t *udp = connp->conn_udp; - udp_stack_t *us = udp->udp_us; - ipaddr_t v4dst; - in_port_t dstport; - boolean_t mapped_addr; struct sockaddr_storage ss; sin_t *sin; sin6_t *sin6; struct sockaddr *addr; socklen_t addrlen; - int error; - boolean_t insert_spi = udp->udp_nat_t_endpoint; - - /* M_DATA for connected socket */ - - ASSERT(udp->udp_issocket); - UDP_DBGSTAT(us, udp_data_conn); + mblk_t *mp1; mutex_enter(&connp->conn_lock); - if (udp->udp_state != TS_DATA_XFER) { - mutex_exit(&connp->conn_lock); - BUMP_MIB(&us->us_udp_mib, udpOutErrors); - UDP_STAT(us, udp_out_err_notconn); - freemsg(mp); - TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, - "udp_wput_end: connp %p (%S)", connp, - "not-connected; address required"); - return (EDESTADDRREQ); - } - - mapped_addr = IN6_IS_ADDR_V4MAPPED(&udp->udp_v6dst); - if (mapped_addr) - IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6dst, v4dst); - /* Initialize addr and addrlen as if they're passed in */ - if (udp->udp_family == AF_INET) { + if (connp->conn_family == AF_INET) { sin = (sin_t *)&ss; + *sin = sin_null; sin->sin_family = AF_INET; - dstport = sin->sin_port = udp->udp_dstport; - ASSERT(mapped_addr); - sin->sin_addr.s_addr = v4dst; + sin->sin_port = connp->conn_fport; + sin->sin_addr.s_addr = connp->conn_faddr_v4; addr = (struct sockaddr *)sin; addrlen = sizeof (*sin); } else { sin6 = (sin6_t *)&ss; + *sin6 = sin6_null; sin6->sin6_family = AF_INET6; - dstport = sin6->sin6_port = udp->udp_dstport; - sin6->sin6_flowinfo = udp->udp_flowinfo; - sin6->sin6_addr = udp->udp_v6dst; - sin6->sin6_scope_id = 0; + sin6->sin6_port = connp->conn_fport; + sin6->sin6_flowinfo = connp->conn_flowinfo; + sin6->sin6_addr = connp->conn_faddr_v6; + if (IN6_IS_ADDR_LINKSCOPE(&connp->conn_faddr_v6) && + (connp->conn_ixa->ixa_flags & IXAF_SCOPEID_SET)) { + sin6->sin6_scope_id = connp->conn_ixa->ixa_scopeid; + } else { + sin6->sin6_scope_id = 0; + } sin6->__sin6_src_id = 0; addr = (struct sockaddr *)sin6; addrlen = sizeof (*sin6); } mutex_exit(&connp->conn_lock); - if (mapped_addr) { - /* - * Handle both AF_INET and AF_INET6; the latter - * for IPV4 mapped destination addresses. Note - * here that both addr and addrlen point to the - * corresponding struct depending on the address - * family of the socket. - */ - mp = udp_output_v4(connp, mp, v4dst, dstport, 0, &error, - insert_spi, msg, cr, pid); - } else { - mp = udp_output_v6(connp, mp, sin6, &error, msg, cr, pid); - } - if (error == 0) { - ASSERT(mp == NULL); - return (0); - } - - UDP_STAT(us, udp_out_err_output); - ASSERT(mp != NULL); - if (IPCL_IS_NONSTR(connp)) { - freemsg(mp); - return (error); - } else { - /* mp is freed by the following routine */ - udp_ud_err(connp->conn_wq, mp, (uchar_t *)addr, - (t_scalar_t)addrlen, (t_scalar_t)error); - return (0); - } -} - -/* ARGSUSED */ -static int -udp_send_not_connected(conn_t *connp, mblk_t *mp, struct sockaddr *addr, - socklen_t addrlen, struct nmsghdr *msg, cred_t *cr, pid_t pid) -{ - - udp_t *udp = connp->conn_udp; - boolean_t insert_spi = udp->udp_nat_t_endpoint; - int error = 0; - sin6_t *sin6; - sin_t *sin; - uint_t srcid; - uint16_t port; - ipaddr_t v4dst; - - - ASSERT(addr != NULL); - - switch (udp->udp_family) { - case AF_INET6: - sin6 = (sin6_t *)addr; - if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { - /* - * Destination is a non-IPv4-compatible IPv6 address. - * Send out an IPv6 format packet. - */ - mp = udp_output_v6(connp, mp, sin6, &error, msg, cr, - pid); - if (error != 0) - goto ud_error; - - return (0); - } - /* - * If the local address is not zero or a mapped address - * return an error. It would be possible to send an IPv4 - * packet but the response would never make it back to the - * application since it is bound to a non-mapped address. - */ - if (!IN6_IS_ADDR_V4MAPPED(&udp->udp_v6src) && - !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { - error = EADDRNOTAVAIL; - goto ud_error; - } - /* Send IPv4 packet without modifying udp_ipversion */ - /* Extract port and ipaddr */ - port = sin6->sin6_port; - IN6_V4MAPPED_TO_IPADDR(&sin6->sin6_addr, v4dst); - srcid = sin6->__sin6_src_id; - break; - - case AF_INET: - sin = (sin_t *)addr; - /* Extract port and ipaddr */ - port = sin->sin_port; - v4dst = sin->sin_addr.s_addr; - srcid = 0; - break; - } - - mp = udp_output_v4(connp, mp, v4dst, port, srcid, &error, insert_spi, - msg, cr, pid); - - if (error == 0) { - ASSERT(mp == NULL); - return (0); - } - -ud_error: - ASSERT(mp != NULL); - - return (error); + mp1 = mi_tpi_uderror_ind((char *)addr, addrlen, NULL, 0, error); + if (mp1 != NULL) + putnext(connp->conn_rq, mp1); } /* @@ -5788,15 +3804,20 @@ ud_error: void udp_wput(queue_t *q, mblk_t *mp) { + sin6_t *sin6; + sin_t *sin = NULL; + uint_t srcid; conn_t *connp = Q_TO_CONN(q); udp_t *udp = connp->conn_udp; int error = 0; - struct sockaddr *addr; + struct sockaddr *addr = NULL; socklen_t addrlen; udp_stack_t *us = udp->udp_us; - - TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_START, - "udp_wput_start: queue %p mp %p", q, mp); + struct T_unitdata_req *tudr; + mblk_t *data_mp; + ushort_t ipversion; + cred_t *cr; + pid_t pid; /* * We directly handle several cases here: T_UNITDATA_REQ message @@ -5805,910 +3826,612 @@ udp_wput(queue_t *q, mblk_t *mp) */ switch (DB_TYPE(mp)) { case M_DATA: - /* - * Quick check for error cases. Checks will be done again - * under the lock later on - */ if (!udp->udp_issocket || udp->udp_state != TS_DATA_XFER) { /* Not connected; address is required */ BUMP_MIB(&us->us_udp_mib, udpOutErrors); + UDP_DBGSTAT(us, udp_data_notconn); UDP_STAT(us, udp_out_err_notconn); freemsg(mp); - TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, - "udp_wput_end: connp %p (%S)", connp, - "not-connected; address required"); return; } - (void) udp_send_connected(connp, mp, NULL, NULL, -1); + /* + * All Solaris components should pass a db_credp + * for this message, hence we ASSERT. + * On production kernels we return an error to be robust against + * random streams modules sitting on top of us. + */ + cr = msg_getcred(mp, &pid); + ASSERT(cr != NULL); + if (cr == NULL) { + BUMP_MIB(&us->us_udp_mib, udpOutErrors); + freemsg(mp); + return; + } + ASSERT(udp->udp_issocket); + UDP_DBGSTAT(us, udp_data_conn); + error = udp_output_connected(connp, mp, cr, pid); + if (error != 0) { + UDP_STAT(us, udp_out_err_output); + if (connp->conn_rq != NULL) + udp_ud_err_connected(connp, (t_scalar_t)error); +#ifdef DEBUG + printf("udp_output_connected returned %d\n", error); +#endif + } return; case M_PROTO: - case M_PCPROTO: { - struct T_unitdata_req *tudr; - - ASSERT((uintptr_t)MBLKL(mp) <= (uintptr_t)INT_MAX); + case M_PCPROTO: tudr = (struct T_unitdata_req *)mp->b_rptr; - - /* Handle valid T_UNITDATA_REQ here */ - if (MBLKL(mp) >= sizeof (*tudr) && - ((t_primp_t)mp->b_rptr)->type == T_UNITDATA_REQ) { - if (mp->b_cont == NULL) { - TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, - "udp_wput_end: q %p (%S)", q, "badaddr"); - error = EPROTO; - goto ud_error; - } - - if (!MBLKIN(mp, 0, tudr->DEST_offset + - tudr->DEST_length)) { - TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, - "udp_wput_end: q %p (%S)", q, "badaddr"); - error = EADDRNOTAVAIL; - goto ud_error; - } - /* - * If a port has not been bound to the stream, fail. - * This is not a problem when sockfs is directly - * above us, because it will ensure that the socket - * is first bound before allowing data to be sent. - */ - if (udp->udp_state == TS_UNBND) { - TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, - "udp_wput_end: q %p (%S)", q, "outstate"); - error = EPROTO; - goto ud_error; - } - addr = (struct sockaddr *) - &mp->b_rptr[tudr->DEST_offset]; - addrlen = tudr->DEST_length; - if (tudr->OPT_length != 0) - UDP_STAT(us, udp_out_opt); - break; + if (MBLKL(mp) < sizeof (*tudr) || + ((t_primp_t)mp->b_rptr)->type != T_UNITDATA_REQ) { + udp_wput_other(q, mp); + return; } - /* FALLTHRU */ - } + break; + default: udp_wput_other(q, mp); return; } - ASSERT(addr != NULL); - error = udp_send_not_connected(connp, mp, addr, addrlen, NULL, NULL, - -1); - if (error != 0) { -ud_error: - UDP_STAT(us, udp_out_err_output); - ASSERT(mp != NULL); - /* mp is freed by the following routine */ - udp_ud_err(q, mp, (uchar_t *)addr, (t_scalar_t)addrlen, - (t_scalar_t)error); + /* Handle valid T_UNITDATA_REQ here */ + data_mp = mp->b_cont; + if (data_mp == NULL) { + error = EPROTO; + goto ud_error2; } -} + mp->b_cont = NULL; -/* ARGSUSED */ -static void -udp_wput_fallback(queue_t *wq, mblk_t *mp) -{ -#ifdef DEBUG - cmn_err(CE_CONT, "udp_wput_fallback: Message in fallback \n"); -#endif - freemsg(mp); -} - - -/* - * udp_output_v6(): - * Assumes that udp_wput did some sanity checking on the destination - * address. - */ -static mblk_t * -udp_output_v6(conn_t *connp, mblk_t *mp, sin6_t *sin6, int *error, - struct nmsghdr *msg, cred_t *cr, pid_t pid) -{ - ip6_t *ip6h; - ip6i_t *ip6i; /* mp1->b_rptr even if no ip6i_t */ - mblk_t *mp1 = mp; - mblk_t *mp2; - int udp_ip_hdr_len = IPV6_HDR_LEN + UDPH_SIZE; - size_t ip_len; - udpha_t *udph; - udp_t *udp = connp->conn_udp; - udp_stack_t *us = udp->udp_us; - queue_t *q = connp->conn_wq; - ip6_pkt_t ipp_s; /* For ancillary data options */ - ip6_pkt_t *ipp = &ipp_s; - ip6_pkt_t *tipp; /* temporary ipp */ - uint32_t csum = 0; - uint_t ignore = 0; - uint_t option_exists = 0, is_sticky = 0; - uint8_t *cp; - uint8_t *nxthdr_ptr; - in6_addr_t ip6_dst; - in_port_t port; - udpattrs_t attrs; - boolean_t opt_present; - ip6_hbh_t *hopoptsptr = NULL; - uint_t hopoptslen = 0; - boolean_t is_ancillary = B_FALSE; - size_t sth_wroff = 0; - ire_t *ire; - boolean_t update_lastdst = B_FALSE; - - *error = 0; - - /* - * If the local address is a mapped address return - * an error. - * It would be possible to send an IPv6 packet but the - * response would never make it back to the application - * since it is bound to a mapped address. - */ - if (IN6_IS_ADDR_V4MAPPED(&udp->udp_v6src)) { - *error = EADDRNOTAVAIL; - goto done; + if (!MBLKIN(mp, 0, tudr->DEST_offset + tudr->DEST_length)) { + error = EADDRNOTAVAIL; + goto ud_error2; } - ipp->ipp_fields = 0; - ipp->ipp_sticky_ignored = 0; - /* - * If TPI options passed in, feed it for verification and handling + * All Solaris components should pass a db_credp + * for this TPI message, hence we should ASSERT. + * However, RPC (svc_clts_ksend) does this odd thing where it + * passes the options from a T_UNITDATA_IND unchanged in a + * T_UNITDATA_REQ. While that is the right thing to do for + * some options, SCM_UCRED being the key one, this also makes it + * pass down IP_RECVDSTADDR. Hence we can't ASSERT here. */ - attrs.udpattr_credset = B_FALSE; - opt_present = B_FALSE; - if (IPCL_IS_NONSTR(connp)) { - if (msg->msg_controllen != 0) { - attrs.udpattr_ipp6 = ipp; - attrs.udpattr_mb = mp; - - rw_enter(&udp->udp_rwlock, RW_WRITER); - *error = process_auxiliary_options(connp, - msg->msg_control, msg->msg_controllen, - &attrs, &udp_opt_obj, udp_opt_set, cr); - rw_exit(&udp->udp_rwlock); - if (*error) - goto done; - ASSERT(*error == 0); - opt_present = B_TRUE; - } - } else { - if (DB_TYPE(mp) != M_DATA) { - mp1 = mp->b_cont; - if (((struct T_unitdata_req *) - mp->b_rptr)->OPT_length != 0) { - attrs.udpattr_ipp6 = ipp; - attrs.udpattr_mb = mp; - if (udp_unitdata_opt_process(q, mp, error, - &attrs) < 0) { - goto done; - } - ASSERT(*error == 0); - opt_present = B_TRUE; - } - } + cr = msg_getcred(mp, &pid); + if (cr == NULL) { + cr = connp->conn_cred; + pid = connp->conn_cpid; } /* - * Determine whether we need to mark the mblk with the user's - * credentials. - * If labeled then sockfs would have already done this. + * If a port has not been bound to the stream, fail. + * This is not a problem when sockfs is directly + * above us, because it will ensure that the socket + * is first bound before allowing data to be sent. */ - ASSERT(!is_system_labeled() || msg_getcred(mp, NULL) != NULL); - ire = connp->conn_ire_cache; - if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr) || (ire == NULL) || - (!IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, &sin6->sin6_addr)) || - (ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK))) { - if (cr != NULL && msg_getcred(mp, NULL) == NULL) - mblk_setcred(mp, cr, pid); - } - - rw_enter(&udp->udp_rwlock, RW_READER); - ignore = ipp->ipp_sticky_ignored; - - /* mp1 points to the M_DATA mblk carrying the packet */ - ASSERT(mp1 != NULL && DB_TYPE(mp1) == M_DATA); - - if (sin6->sin6_scope_id != 0 && - IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) { - /* - * IPPF_SCOPE_ID is special. It's neither a sticky - * option nor ancillary data. It needs to be - * explicitly set in options_exists. - */ - option_exists |= IPPF_SCOPE_ID; + if (udp->udp_state == TS_UNBND) { + error = EPROTO; + goto ud_error2; } + addr = (struct sockaddr *)&mp->b_rptr[tudr->DEST_offset]; + addrlen = tudr->DEST_length; - /* - * Compute the destination address - */ - ip6_dst = sin6->sin6_addr; - if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) - ip6_dst = ipv6_loopback; - - port = sin6->sin6_port; - - /* - * Cluster and TSOL notes, Cluster check: - * see comments in udp_output_v4(). - */ - mutex_enter(&connp->conn_lock); - - if (cl_inet_connect2 != NULL && - (!IN6_ARE_ADDR_EQUAL(&ip6_dst, &udp->udp_v6lastdst) || - port != udp->udp_lastdstport)) { - mutex_exit(&connp->conn_lock); - *error = 0; - CL_INET_UDP_CONNECT(connp, udp, B_TRUE, &ip6_dst, port, *error); - if (*error != 0) { - *error = EHOSTUNREACH; - rw_exit(&udp->udp_rwlock); - goto done; + switch (connp->conn_family) { + case AF_INET6: + sin6 = (sin6_t *)addr; + if (!OK_32PTR((char *)sin6) || (addrlen != sizeof (sin6_t)) || + (sin6->sin6_family != AF_INET6)) { + error = EADDRNOTAVAIL; + goto ud_error2; } - update_lastdst = B_TRUE; - mutex_enter(&connp->conn_lock); - } - /* - * If we're not going to the same destination as last time, then - * recompute the label required. This is done in a separate routine to - * avoid blowing up our stack here. - * - * TSOL Note: Since we are not in WRITER mode, UDP packets - * to different destination may require different labels, - * or worse, UDP packets to same IP address may require - * different labels due to use of shared all-zones address. - * We use conn_lock to ensure that lastdst, sticky ipp_hopopts, - * and sticky ipp_hopoptslen are consistent for the current - * destination and are updated atomically. - */ - if (is_system_labeled()) { - cred_t *credp; - pid_t cpid; + srcid = sin6->__sin6_src_id; + if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { + /* + * Destination is a non-IPv4-compatible IPv6 address. + * Send out an IPv6 format packet. + */ - /* Using UDP MLP requires SCM_UCRED from user */ - if (connp->conn_mlp_type != mlptSingle && - !attrs.udpattr_credset) { - DTRACE_PROBE4( - tx__ip__log__info__output__udp6, - char *, "MLP mp(1) lacks SCM_UCRED attr(2) on q(3)", - mblk_t *, mp1, udpattrs_t *, &attrs, queue_t *, q); - *error = EINVAL; - rw_exit(&udp->udp_rwlock); - mutex_exit(&connp->conn_lock); - goto done; - } - /* - * update label option for this UDP socket if - * - the destination has changed, - * - the UDP socket is MLP, or - * - the cred attached to the mblk changed. - */ - credp = msg_getcred(mp, &cpid); - if (opt_present || - !IN6_ARE_ADDR_EQUAL(&udp->udp_v6lastdst, &ip6_dst) || - connp->conn_mlp_type != mlptSingle || - credp != udp->udp_last_cred) { - if ((*error = udp_update_label_v6(q, mp, &ip6_dst)) - != 0) { - rw_exit(&udp->udp_rwlock); - mutex_exit(&connp->conn_lock); - goto done; + /* + * If the local address is a mapped address return + * an error. + * It would be possible to send an IPv6 packet but the + * response would never make it back to the application + * since it is bound to a mapped address. + */ + if (IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6)) { + error = EADDRNOTAVAIL; + goto ud_error2; } - update_lastdst = B_TRUE; - } - /* - * Attach the effective cred to the mblk to ensure future - * routing decisions will be based on it's label. - */ - mblk_setcred(mp, udp->udp_effective_cred, cpid); - } - if (update_lastdst) { - udp->udp_v6lastdst = ip6_dst; - udp->udp_lastdstport = port; - } + UDP_DBGSTAT(us, udp_out_ipv6); - /* - * If there's a security label here, then we ignore any options the - * user may try to set. We keep the peer's label as a hidden sticky - * option. We make a private copy of this label before releasing the - * lock so that label is kept consistent with the destination addr. - */ - if (udp->udp_label_len_v6 > 0) { - ignore &= ~IPPF_HOPOPTS; - ipp->ipp_fields &= ~IPPF_HOPOPTS; - } + if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) + sin6->sin6_addr = ipv6_loopback; + ipversion = IPV6_VERSION; + } else { + if (connp->conn_ipv6_v6only) { + error = EADDRNOTAVAIL; + goto ud_error2; + } - if ((udp->udp_sticky_ipp.ipp_fields == 0) && (ipp->ipp_fields == 0)) { - /* No sticky options nor ancillary data. */ - mutex_exit(&connp->conn_lock); - goto no_options; - } + /* + * If the local address is not zero or a mapped address + * return an error. It would be possible to send an + * IPv4 packet but the response would never make it + * back to the application since it is bound to a + * non-mapped address. + */ + if (!IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6) && + !IN6_IS_ADDR_UNSPECIFIED(&connp->conn_saddr_v6)) { + error = EADDRNOTAVAIL; + goto ud_error2; + } + UDP_DBGSTAT(us, udp_out_mapped); - /* - * Go through the options figuring out where each is going to - * come from and build two masks. The first mask indicates if - * the option exists at all. The second mask indicates if the - * option is sticky or ancillary. - */ - if (!(ignore & IPPF_HOPOPTS)) { - if (ipp->ipp_fields & IPPF_HOPOPTS) { - option_exists |= IPPF_HOPOPTS; - udp_ip_hdr_len += ipp->ipp_hopoptslen; - } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_HOPOPTS) { - option_exists |= IPPF_HOPOPTS; - is_sticky |= IPPF_HOPOPTS; - ASSERT(udp->udp_sticky_ipp.ipp_hopoptslen != 0); - hopoptsptr = kmem_alloc( - udp->udp_sticky_ipp.ipp_hopoptslen, KM_NOSLEEP); - if (hopoptsptr == NULL) { - *error = ENOMEM; - mutex_exit(&connp->conn_lock); - goto done; + if (V4_PART_OF_V6(sin6->sin6_addr) == INADDR_ANY) { + V4_PART_OF_V6(sin6->sin6_addr) = + htonl(INADDR_LOOPBACK); } - hopoptslen = udp->udp_sticky_ipp.ipp_hopoptslen; - bcopy(udp->udp_sticky_ipp.ipp_hopopts, hopoptsptr, - hopoptslen); - udp_ip_hdr_len += hopoptslen; + ipversion = IPV4_VERSION; } - } - mutex_exit(&connp->conn_lock); - if (!(ignore & IPPF_RTHDR)) { - if (ipp->ipp_fields & IPPF_RTHDR) { - option_exists |= IPPF_RTHDR; - udp_ip_hdr_len += ipp->ipp_rthdrlen; - } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_RTHDR) { - option_exists |= IPPF_RTHDR; - is_sticky |= IPPF_RTHDR; - udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_rthdrlen; - } - } + if (tudr->OPT_length != 0) { + /* + * If we are connected then the destination needs to be + * the same as the connected one. + */ + if (udp->udp_state == TS_DATA_XFER && + !conn_same_as_last_v6(connp, sin6)) { + error = EISCONN; + goto ud_error2; + } + UDP_STAT(us, udp_out_opt); + error = udp_output_ancillary(connp, NULL, sin6, + data_mp, mp, NULL, cr, pid); + } else { + ip_xmit_attr_t *ixa; - if (!(ignore & IPPF_RTDSTOPTS) && (option_exists & IPPF_RTHDR)) { - if (ipp->ipp_fields & IPPF_RTDSTOPTS) { - option_exists |= IPPF_RTDSTOPTS; - udp_ip_hdr_len += ipp->ipp_rtdstoptslen; - } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_RTDSTOPTS) { - option_exists |= IPPF_RTDSTOPTS; - is_sticky |= IPPF_RTDSTOPTS; - udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_rtdstoptslen; + /* + * We have to allocate an ip_xmit_attr_t before we grab + * conn_lock and we need to hold conn_lock once we've + * checked conn_same_as_last_v6 to handle concurrent + * send* calls on a socket. + */ + ixa = conn_get_ixa(connp, B_FALSE); + if (ixa == NULL) { + error = ENOMEM; + goto ud_error2; + } + mutex_enter(&connp->conn_lock); + + if (conn_same_as_last_v6(connp, sin6) && + connp->conn_lastsrcid == srcid && + ipsec_outbound_policy_current(ixa)) { + UDP_DBGSTAT(us, udp_out_lastdst); + /* udp_output_lastdst drops conn_lock */ + error = udp_output_lastdst(connp, data_mp, cr, + pid, ixa); + } else { + UDP_DBGSTAT(us, udp_out_diffdst); + /* udp_output_newdst drops conn_lock */ + error = udp_output_newdst(connp, data_mp, NULL, + sin6, ipversion, cr, pid, ixa); + } + ASSERT(MUTEX_NOT_HELD(&connp->conn_lock)); } - } - - if (!(ignore & IPPF_DSTOPTS)) { - if (ipp->ipp_fields & IPPF_DSTOPTS) { - option_exists |= IPPF_DSTOPTS; - udp_ip_hdr_len += ipp->ipp_dstoptslen; - } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_DSTOPTS) { - option_exists |= IPPF_DSTOPTS; - is_sticky |= IPPF_DSTOPTS; - udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_dstoptslen; + if (error == 0) { + freeb(mp); + return; } - } + break; - if (!(ignore & IPPF_IFINDEX)) { - if (ipp->ipp_fields & IPPF_IFINDEX) { - option_exists |= IPPF_IFINDEX; - } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_IFINDEX) { - option_exists |= IPPF_IFINDEX; - is_sticky |= IPPF_IFINDEX; + case AF_INET: + sin = (sin_t *)addr; + if ((!OK_32PTR((char *)sin) || addrlen != sizeof (sin_t)) || + (sin->sin_family != AF_INET)) { + error = EADDRNOTAVAIL; + goto ud_error2; } - } + UDP_DBGSTAT(us, udp_out_ipv4); + if (sin->sin_addr.s_addr == INADDR_ANY) + sin->sin_addr.s_addr = htonl(INADDR_LOOPBACK); + ipversion = IPV4_VERSION; - if (!(ignore & IPPF_ADDR)) { - if (ipp->ipp_fields & IPPF_ADDR) { - option_exists |= IPPF_ADDR; - } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_ADDR) { - option_exists |= IPPF_ADDR; - is_sticky |= IPPF_ADDR; - } - } + srcid = 0; + if (tudr->OPT_length != 0) { + /* + * If we are connected then the destination needs to be + * the same as the connected one. + */ + if (udp->udp_state == TS_DATA_XFER && + !conn_same_as_last_v4(connp, sin)) { + error = EISCONN; + goto ud_error2; + } + UDP_STAT(us, udp_out_opt); + error = udp_output_ancillary(connp, sin, NULL, + data_mp, mp, NULL, cr, pid); + } else { + ip_xmit_attr_t *ixa; - if (!(ignore & IPPF_DONTFRAG)) { - if (ipp->ipp_fields & IPPF_DONTFRAG) { - option_exists |= IPPF_DONTFRAG; - } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_DONTFRAG) { - option_exists |= IPPF_DONTFRAG; - is_sticky |= IPPF_DONTFRAG; + /* + * We have to allocate an ip_xmit_attr_t before we grab + * conn_lock and we need to hold conn_lock once we've + * checked conn_same_as_last_v4 to handle concurrent + * send* calls on a socket. + */ + ixa = conn_get_ixa(connp, B_FALSE); + if (ixa == NULL) { + error = ENOMEM; + goto ud_error2; + } + mutex_enter(&connp->conn_lock); + + if (conn_same_as_last_v4(connp, sin) && + ipsec_outbound_policy_current(ixa)) { + UDP_DBGSTAT(us, udp_out_lastdst); + /* udp_output_lastdst drops conn_lock */ + error = udp_output_lastdst(connp, data_mp, cr, + pid, ixa); + } else { + UDP_DBGSTAT(us, udp_out_diffdst); + /* udp_output_newdst drops conn_lock */ + error = udp_output_newdst(connp, data_mp, sin, + NULL, ipversion, cr, pid, ixa); + } + ASSERT(MUTEX_NOT_HELD(&connp->conn_lock)); } - } - - if (!(ignore & IPPF_USE_MIN_MTU)) { - if (ipp->ipp_fields & IPPF_USE_MIN_MTU) { - option_exists |= IPPF_USE_MIN_MTU; - } else if (udp->udp_sticky_ipp.ipp_fields & - IPPF_USE_MIN_MTU) { - option_exists |= IPPF_USE_MIN_MTU; - is_sticky |= IPPF_USE_MIN_MTU; + if (error == 0) { + freeb(mp); + return; } + break; } + UDP_STAT(us, udp_out_err_output); + ASSERT(mp != NULL); + /* mp is freed by the following routine */ + udp_ud_err(q, mp, (t_scalar_t)error); + return; - if (!(ignore & IPPF_HOPLIMIT) && (ipp->ipp_fields & IPPF_HOPLIMIT)) - option_exists |= IPPF_HOPLIMIT; - /* IPV6_HOPLIMIT can never be sticky */ - ASSERT(!(udp->udp_sticky_ipp.ipp_fields & IPPF_HOPLIMIT)); +ud_error2: + BUMP_MIB(&us->us_udp_mib, udpOutErrors); + freemsg(data_mp); + UDP_STAT(us, udp_out_err_output); + ASSERT(mp != NULL); + /* mp is freed by the following routine */ + udp_ud_err(q, mp, (t_scalar_t)error); +} - if (!(ignore & IPPF_UNICAST_HOPS) && - (udp->udp_sticky_ipp.ipp_fields & IPPF_UNICAST_HOPS)) { - option_exists |= IPPF_UNICAST_HOPS; - is_sticky |= IPPF_UNICAST_HOPS; - } +/* + * Handle the case of the IP address, port, flow label being different + * for both IPv4 and IPv6. + * + * NOTE: The caller must hold conn_lock and we drop it here. + */ +static int +udp_output_newdst(conn_t *connp, mblk_t *data_mp, sin_t *sin, sin6_t *sin6, + ushort_t ipversion, cred_t *cr, pid_t pid, ip_xmit_attr_t *ixa) +{ + uint_t srcid; + uint32_t flowinfo; + udp_t *udp = connp->conn_udp; + int error = 0; + ip_xmit_attr_t *oldixa; + udp_stack_t *us = udp->udp_us; + in6_addr_t v6src; + in6_addr_t v6dst; + in6_addr_t v6nexthop; + in_port_t dstport; - if (!(ignore & IPPF_MULTICAST_HOPS) && - (udp->udp_sticky_ipp.ipp_fields & IPPF_MULTICAST_HOPS)) { - option_exists |= IPPF_MULTICAST_HOPS; - is_sticky |= IPPF_MULTICAST_HOPS; - } + ASSERT(MUTEX_HELD(&connp->conn_lock)); + ASSERT(ixa != NULL); + /* + * We hold conn_lock across all the use and modifications of + * the conn_lastdst, conn_ixa, and conn_xmit_ipp to ensure that they + * stay consistent. + */ - if (!(ignore & IPPF_TCLASS)) { - if (ipp->ipp_fields & IPPF_TCLASS) { - option_exists |= IPPF_TCLASS; - } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_TCLASS) { - option_exists |= IPPF_TCLASS; - is_sticky |= IPPF_TCLASS; - } + ASSERT(cr != NULL); + ixa->ixa_cred = cr; + ixa->ixa_cpid = pid; + if (is_system_labeled()) { + /* We need to restart with a label based on the cred */ + ip_xmit_attr_restore_tsl(ixa, ixa->ixa_cred); } - if (!(ignore & IPPF_NEXTHOP) && - (udp->udp_sticky_ipp.ipp_fields & IPPF_NEXTHOP)) { - option_exists |= IPPF_NEXTHOP; - is_sticky |= IPPF_NEXTHOP; + /* + * If we are connected then the destination needs to be the + * same as the connected one, which is not the case here since we + * checked for that above. + */ + if (udp->udp_state == TS_DATA_XFER) { + mutex_exit(&connp->conn_lock); + error = EISCONN; + goto ud_error; } -no_options: + /* In case previous destination was multicast or multirt */ + ip_attr_newdst(ixa); /* - * If any options carried in the ip6i_t were specified, we - * need to account for the ip6i_t in the data we'll be sending - * down. + * If laddr is unspecified then we look at sin6_src_id. + * We will give precedence to a source address set with IPV6_PKTINFO + * (aka IPPF_ADDR) but that is handled in build_hdrs. However, we don't + * want ip_attr_connect to select a source (since it can fail) when + * IPV6_PKTINFO is specified. + * If this doesn't result in a source address then we get a source + * from ip_attr_connect() below. */ - if (option_exists & IPPF_HAS_IP6I) - udp_ip_hdr_len += sizeof (ip6i_t); - - /* check/fix buffer config, setup pointers into it */ - ip6h = (ip6_t *)&mp1->b_rptr[-udp_ip_hdr_len]; - if (DB_REF(mp1) != 1 || ((unsigned char *)ip6h < DB_BASE(mp1)) || - !OK_32PTR(ip6h)) { - - /* Try to get everything in a single mblk next time */ - if (udp_ip_hdr_len > udp->udp_max_hdr_len) { - udp->udp_max_hdr_len = udp_ip_hdr_len; - sth_wroff = udp->udp_max_hdr_len + us->us_wroff_extra; + v6src = connp->conn_saddr_v6; + if (sin != NULL) { + IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &v6dst); + dstport = sin->sin_port; + flowinfo = 0; + srcid = 0; + ixa->ixa_flags &= ~IXAF_SCOPEID_SET; + if (srcid != 0 && V4_PART_OF_V6(&v6src) == INADDR_ANY) { + ip_srcid_find_id(srcid, &v6src, IPCL_ZONEID(connp), + connp->conn_netstack); } - - mp2 = allocb(udp_ip_hdr_len + us->us_wroff_extra, BPRI_LO); - if (mp2 == NULL) { - *error = ENOMEM; - rw_exit(&udp->udp_rwlock); - goto done; + ixa->ixa_flags |= IXAF_IS_IPV4; + } else { + v6dst = sin6->sin6_addr; + dstport = sin6->sin6_port; + flowinfo = sin6->sin6_flowinfo; + srcid = sin6->__sin6_src_id; + if (IN6_IS_ADDR_LINKSCOPE(&v6dst) && sin6->sin6_scope_id != 0) { + ixa->ixa_scopeid = sin6->sin6_scope_id; + ixa->ixa_flags |= IXAF_SCOPEID_SET; + } else { + ixa->ixa_flags &= ~IXAF_SCOPEID_SET; } - mp2->b_wptr = DB_LIM(mp2); - mp2->b_cont = mp1; - mp1 = mp2; - if (DB_TYPE(mp) != M_DATA) - mp->b_cont = mp1; + if (srcid != 0 && IN6_IS_ADDR_UNSPECIFIED(&v6src)) { + ip_srcid_find_id(srcid, &v6src, IPCL_ZONEID(connp), + connp->conn_netstack); + } + if (IN6_IS_ADDR_V4MAPPED(&v6dst)) + ixa->ixa_flags |= IXAF_IS_IPV4; else - mp = mp1; - - ip6h = (ip6_t *)(mp1->b_wptr - udp_ip_hdr_len); + ixa->ixa_flags &= ~IXAF_IS_IPV4; } - mp1->b_rptr = (unsigned char *)ip6h; - ip6i = (ip6i_t *)ip6h; - -#define ANCIL_OR_STICKY_PTR(f) ((is_sticky & f) ? &udp->udp_sticky_ipp : ipp) - if (option_exists & IPPF_HAS_IP6I) { - ip6h = (ip6_t *)&ip6i[1]; - ip6i->ip6i_flags = 0; - ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; - - /* sin6_scope_id takes precendence over IPPF_IFINDEX */ - if (option_exists & IPPF_SCOPE_ID) { - ip6i->ip6i_flags |= IP6I_IFINDEX; - ip6i->ip6i_ifindex = sin6->sin6_scope_id; - } else if (option_exists & IPPF_IFINDEX) { - tipp = ANCIL_OR_STICKY_PTR(IPPF_IFINDEX); - ASSERT(tipp->ipp_ifindex != 0); - ip6i->ip6i_flags |= IP6I_IFINDEX; - ip6i->ip6i_ifindex = tipp->ipp_ifindex; - } - - if (option_exists & IPPF_ADDR) { - /* - * Enable per-packet source address verification if - * IPV6_PKTINFO specified the source address. - * ip6_src is set in the transport's _wput function. - */ - ip6i->ip6i_flags |= IP6I_VERIFY_SRC; - } - - if (option_exists & IPPF_DONTFRAG) { - ip6i->ip6i_flags |= IP6I_DONTFRAG; - } + /* Handle IPV6_PKTINFO setting source address. */ + if (IN6_IS_ADDR_UNSPECIFIED(&v6src) && + (connp->conn_xmit_ipp.ipp_fields & IPPF_ADDR)) { + ip_pkt_t *ipp = &connp->conn_xmit_ipp; - if (option_exists & IPPF_USE_MIN_MTU) { - ip6i->ip6i_flags = IP6I_API_USE_MIN_MTU( - ip6i->ip6i_flags, ipp->ipp_use_min_mtu); + if (ixa->ixa_flags & IXAF_IS_IPV4) { + if (IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) + v6src = ipp->ipp_addr; + } else { + if (!IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) + v6src = ipp->ipp_addr; } + } - if (option_exists & IPPF_NEXTHOP) { - tipp = ANCIL_OR_STICKY_PTR(IPPF_NEXTHOP); - ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_nexthop)); - ip6i->ip6i_flags |= IP6I_NEXTHOP; - ip6i->ip6i_nexthop = tipp->ipp_nexthop; - } + ip_attr_nexthop(&connp->conn_xmit_ipp, ixa, &v6dst, &v6nexthop); + mutex_exit(&connp->conn_lock); + error = ip_attr_connect(connp, ixa, &v6src, &v6dst, &v6nexthop, dstport, + &v6src, NULL, IPDF_ALLOW_MCBC | IPDF_VERIFY_DST | IPDF_IPSEC); + switch (error) { + case 0: + break; + case EADDRNOTAVAIL: /* - * tell IP this is an ip6i_t private header + * IXAF_VERIFY_SOURCE tells us to pick a better source. + * Don't have the application see that errno */ - ip6i->ip6i_nxt = IPPROTO_RAW; - } - - /* Initialize IPv6 header */ - ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; - bzero(&ip6h->ip6_src, sizeof (ip6h->ip6_src)); - - /* Set the hoplimit of the outgoing packet. */ - if (option_exists & IPPF_HOPLIMIT) { - /* IPV6_HOPLIMIT ancillary data overrides all other settings. */ - ip6h->ip6_hops = ipp->ipp_hoplimit; - ip6i->ip6i_flags |= IP6I_HOPLIMIT; - } else if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { - ip6h->ip6_hops = udp->udp_multicast_ttl; - if (option_exists & IPPF_MULTICAST_HOPS) - ip6i->ip6i_flags |= IP6I_HOPLIMIT; - } else { - ip6h->ip6_hops = udp->udp_ttl; - if (option_exists & IPPF_UNICAST_HOPS) - ip6i->ip6i_flags |= IP6I_HOPLIMIT; - } - - if (option_exists & IPPF_ADDR) { - tipp = ANCIL_OR_STICKY_PTR(IPPF_ADDR); - ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_addr)); - ip6h->ip6_src = tipp->ipp_addr; - } else { + error = ENETUNREACH; + goto failed; + case ENETDOWN: /* - * The source address was not set using IPV6_PKTINFO. - * First look at the bound source. - * If unspecified fallback to __sin6_src_id. + * Have !ipif_addr_ready address; drop packet silently + * until we can get applications to not send until we + * are ready. */ - ip6h->ip6_src = udp->udp_v6src; - if (sin6->__sin6_src_id != 0 && - IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { - ip_srcid_find_id(sin6->__sin6_src_id, - &ip6h->ip6_src, connp->conn_zoneid, - us->us_netstack); + error = 0; + goto failed; + case EHOSTUNREACH: + case ENETUNREACH: + if (ixa->ixa_ire != NULL) { + /* + * Let conn_ip_output/ire_send_noroute return + * the error and send any local ICMP error. + */ + error = 0; + break; } + /* FALLTHRU */ + failed: + default: + goto ud_error; } - nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt; - cp = (uint8_t *)&ip6h[1]; /* - * Here's where we have to start stringing together - * any extension headers in the right order: - * Hop-by-hop, destination, routing, and final destination opts. + * Cluster note: we let the cluster hook know that we are sending to a + * new address and/or port. */ - if (option_exists & IPPF_HOPOPTS) { - /* Hop-by-hop options */ - ip6_hbh_t *hbh = (ip6_hbh_t *)cp; - tipp = ANCIL_OR_STICKY_PTR(IPPF_HOPOPTS); - if (hopoptslen == 0) { - hopoptsptr = tipp->ipp_hopopts; - hopoptslen = tipp->ipp_hopoptslen; - is_ancillary = B_TRUE; - } - - *nxthdr_ptr = IPPROTO_HOPOPTS; - nxthdr_ptr = &hbh->ip6h_nxt; - - bcopy(hopoptsptr, cp, hopoptslen); - cp += hopoptslen; - - if (hopoptsptr != NULL && !is_ancillary) { - kmem_free(hopoptsptr, hopoptslen); - hopoptsptr = NULL; - hopoptslen = 0; + if (cl_inet_connect2 != NULL) { + CL_INET_UDP_CONNECT(connp, B_TRUE, &v6dst, dstport, error); + if (error != 0) { + error = EHOSTUNREACH; + goto ud_error; } } - /* - * En-route destination options - * Only do them if there's a routing header as well - */ - if (option_exists & IPPF_RTDSTOPTS) { - ip6_dest_t *dst = (ip6_dest_t *)cp; - tipp = ANCIL_OR_STICKY_PTR(IPPF_RTDSTOPTS); - - *nxthdr_ptr = IPPROTO_DSTOPTS; - nxthdr_ptr = &dst->ip6d_nxt; - bcopy(tipp->ipp_rtdstopts, cp, tipp->ipp_rtdstoptslen); - cp += tipp->ipp_rtdstoptslen; - } - /* - * Routing header next - */ - if (option_exists & IPPF_RTHDR) { - ip6_rthdr_t *rt = (ip6_rthdr_t *)cp; - tipp = ANCIL_OR_STICKY_PTR(IPPF_RTHDR); - - *nxthdr_ptr = IPPROTO_ROUTING; - nxthdr_ptr = &rt->ip6r_nxt; - - bcopy(tipp->ipp_rthdr, cp, tipp->ipp_rthdrlen); - cp += tipp->ipp_rthdrlen; - } + mutex_enter(&connp->conn_lock); /* - * Do ultimate destination options + * While we dropped the lock some other thread might have connected + * this socket. If so we bail out with EISCONN to ensure that the + * connecting thread is the one that updates conn_ixa, conn_ht_* + * and conn_*last*. */ - if (option_exists & IPPF_DSTOPTS) { - ip6_dest_t *dest = (ip6_dest_t *)cp; - tipp = ANCIL_OR_STICKY_PTR(IPPF_DSTOPTS); - - *nxthdr_ptr = IPPROTO_DSTOPTS; - nxthdr_ptr = &dest->ip6d_nxt; - - bcopy(tipp->ipp_dstopts, cp, tipp->ipp_dstoptslen); - cp += tipp->ipp_dstoptslen; + if (udp->udp_state == TS_DATA_XFER) { + mutex_exit(&connp->conn_lock); + error = EISCONN; + goto ud_error; } - /* - * Now set the last header pointer to the proto passed in - */ - ASSERT((int)(cp - (uint8_t *)ip6i) == (udp_ip_hdr_len - UDPH_SIZE)); - *nxthdr_ptr = IPPROTO_UDP; - - /* Update UDP header */ - udph = (udpha_t *)((uchar_t *)ip6i + udp_ip_hdr_len - UDPH_SIZE); - udph->uha_dst_port = sin6->sin6_port; - udph->uha_src_port = udp->udp_port; /* - * Copy in the destination address + * We need to rebuild the headers if + * - we are labeling packets (could be different for different + * destinations) + * - we have a source route (or routing header) since we need to + * massage that to get the pseudo-header checksum + * - the IP version is different than the last time + * - a socket option with COA_HEADER_CHANGED has been set which + * set conn_v6lastdst to zero. + * + * Otherwise the prepend function will just update the src, dst, + * dstport, and flow label. */ - ip6h->ip6_dst = ip6_dst; - - ip6h->ip6_vcf = - (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) | - (sin6->sin6_flowinfo & ~IPV6_VERS_AND_FLOW_MASK); - - if (option_exists & IPPF_TCLASS) { - tipp = ANCIL_OR_STICKY_PTR(IPPF_TCLASS); - ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf, - tipp->ipp_tclass); - } - rw_exit(&udp->udp_rwlock); - - if (option_exists & IPPF_RTHDR) { - ip6_rthdr_t *rth; - + if (is_system_labeled()) { + /* TX MLP requires SCM_UCRED and don't have that here */ + if (connp->conn_mlp_type != mlptSingle) { + mutex_exit(&connp->conn_lock); + error = ECONNREFUSED; + goto ud_error; + } /* - * Perform any processing needed for source routing. - * We know that all extension headers will be in the same mblk - * as the IPv6 header. + * Check whether Trusted Solaris policy allows communication + * with this host, and pretend that the destination is + * unreachable if not. + * Compute any needed label and place it in ipp_label_v4/v6. + * + * Later conn_build_hdr_template/conn_prepend_hdr takes + * ipp_label_v4/v6 to form the packet. + * + * Tsol note: Since we hold conn_lock we know no other + * thread manipulates conn_xmit_ipp. */ - rth = ip_find_rthdr_v6(ip6h, mp1->b_wptr); - if (rth != NULL && rth->ip6r_segleft != 0) { - if (rth->ip6r_type != IPV6_RTHDR_TYPE_0) { - /* - * Drop packet - only support Type 0 routing. - * Notify the application as well. - */ - *error = EPROTO; - goto done; - } - - /* - * rth->ip6r_len is twice the number of - * addresses in the header. Thus it must be even. - */ - if (rth->ip6r_len & 0x1) { - *error = EPROTO; - goto done; - } - /* - * Shuffle the routing header and ip6_dst - * addresses, and get the checksum difference - * between the first hop (in ip6_dst) and - * the destination (in the last routing hdr entry). - */ - csum = ip_massage_options_v6(ip6h, rth, - us->us_netstack); - /* - * Verify that the first hop isn't a mapped address. - * Routers along the path need to do this verification - * for subsequent hops. - */ - if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) { - *error = EADDRNOTAVAIL; - goto done; + error = conn_update_label(connp, ixa, &v6dst, + &connp->conn_xmit_ipp); + if (error != 0) { + mutex_exit(&connp->conn_lock); + goto ud_error; + } + /* Rebuild the header template */ + error = udp_build_hdr_template(connp, &v6src, &v6dst, dstport, + flowinfo); + if (error != 0) { + mutex_exit(&connp->conn_lock); + goto ud_error; + } + } else if ((connp->conn_xmit_ipp.ipp_fields & + (IPPF_IPV4_OPTIONS|IPPF_RTHDR)) || + ipversion != connp->conn_lastipversion || + IN6_IS_ADDR_UNSPECIFIED(&connp->conn_v6lastdst)) { + /* Rebuild the header template */ + error = udp_build_hdr_template(connp, &v6src, &v6dst, dstport, + flowinfo); + if (error != 0) { + mutex_exit(&connp->conn_lock); + goto ud_error; + } + } else { + /* Simply update the destination address if no source route */ + if (ixa->ixa_flags & IXAF_IS_IPV4) { + ipha_t *ipha = (ipha_t *)connp->conn_ht_iphc; + + IN6_V4MAPPED_TO_IPADDR(&v6dst, ipha->ipha_dst); + if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF) { + ipha->ipha_fragment_offset_and_flags |= + IPH_DF_HTONS; + } else { + ipha->ipha_fragment_offset_and_flags &= + ~IPH_DF_HTONS; } - - cp += (rth->ip6r_len + 1)*8; + } else { + ip6_t *ip6h = (ip6_t *)connp->conn_ht_iphc; + ip6h->ip6_dst = v6dst; } } - /* count up length of UDP packet */ - ip_len = (mp1->b_wptr - (unsigned char *)ip6h) - IPV6_HDR_LEN; - if ((mp2 = mp1->b_cont) != NULL) { - do { - ASSERT((uintptr_t)MBLKL(mp2) <= (uintptr_t)UINT_MAX); - ip_len += (uint32_t)MBLKL(mp2); - } while ((mp2 = mp2->b_cont) != NULL); - } - /* - * If the size of the packet is greater than the maximum allowed by - * ip, return an error. Passing this down could cause panics because - * the size will have wrapped and be inconsistent with the msg size. - */ - if (ip_len > IP_MAXPACKET) { - *error = EMSGSIZE; - goto done; - } - - /* Store the UDP length. Subtract length of extension hdrs */ - udph->uha_length = htons(ip_len + IPV6_HDR_LEN - - (int)((uchar_t *)udph - (uchar_t *)ip6h)); - - /* - * We make it easy for IP to include our pseudo header - * by putting our length in uh_checksum, modified (if - * we have a routing header) by the checksum difference - * between the ultimate destination and first hop addresses. - * Note: UDP over IPv6 must always checksum the packet. + * Remember the dst/dstport etc which corresponds to the built header + * template and conn_ixa. */ - csum += udph->uha_length; - csum = (csum & 0xFFFF) + (csum >> 16); - udph->uha_checksum = (uint16_t)csum; - -#ifdef _LITTLE_ENDIAN - ip_len = htons(ip_len); -#endif - ip6h->ip6_plen = ip_len; - - if (DB_TYPE(mp) != M_DATA) { - cred_t *cr; - pid_t cpid; - - /* Move any cred from the T_UNITDATA_REQ to the packet */ - cr = msg_extractcred(mp, &cpid); - if (cr != NULL) { - if (mp1->b_datap->db_credp != NULL) - crfree(mp1->b_datap->db_credp); - mp1->b_datap->db_credp = cr; - mp1->b_datap->db_cpid = cpid; - } + oldixa = conn_replace_ixa(connp, ixa); + connp->conn_v6lastdst = v6dst; + connp->conn_lastipversion = ipversion; + connp->conn_lastdstport = dstport; + connp->conn_lastflowinfo = flowinfo; + connp->conn_lastscopeid = ixa->ixa_scopeid; + connp->conn_lastsrcid = srcid; + /* Also remember a source to use together with lastdst */ + connp->conn_v6lastsrc = v6src; + + data_mp = udp_prepend_header_template(connp, ixa, data_mp, &v6src, + dstport, flowinfo, &error); + + /* Done with conn_t */ + mutex_exit(&connp->conn_lock); + ixa_refrele(oldixa); - ASSERT(mp != mp1); - freeb(mp); + if (data_mp == NULL) { + ASSERT(error != 0); + goto ud_error; } - /* mp has been consumed and we'll return success */ - ASSERT(*error == 0); - mp = NULL; - - /* We're done. Pass the packet to IP */ + /* We're done. Pass the packet to ip. */ BUMP_MIB(&us->us_udp_mib, udpHCOutDatagrams); - ip_output_v6(connp, mp1, q, IP_WPUT); -done: - if (sth_wroff != 0) { - (void) proto_set_tx_wroff(RD(q), connp, - udp->udp_max_hdr_len + us->us_wroff_extra); - } - if (hopoptsptr != NULL && !is_ancillary) { - kmem_free(hopoptsptr, hopoptslen); - hopoptsptr = NULL; - } - if (*error != 0) { - ASSERT(mp != NULL); - BUMP_MIB(&us->us_udp_mib, udpOutErrors); - } - return (mp); -} - - -static int -i_udp_getpeername(udp_t *udp, struct sockaddr *sa, uint_t *salenp) -{ - sin_t *sin = (sin_t *)sa; - sin6_t *sin6 = (sin6_t *)sa; - - ASSERT(RW_LOCK_HELD(&udp->udp_rwlock)); - - if (udp->udp_state != TS_DATA_XFER) - return (ENOTCONN); - - switch (udp->udp_family) { - case AF_INET: - ASSERT(udp->udp_ipversion == IPV4_VERSION); - - if (*salenp < sizeof (sin_t)) - return (EINVAL); - - *salenp = sizeof (sin_t); - *sin = sin_null; - sin->sin_family = AF_INET; - sin->sin_port = udp->udp_dstport; - sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_v6dst); + error = conn_ip_output(data_mp, ixa); + /* No udpOutErrors if an error since IP increases its error counter */ + switch (error) { + case 0: break; - - case AF_INET6: - if (*salenp < sizeof (sin6_t)) - return (EINVAL); - - *salenp = sizeof (sin6_t); - *sin6 = sin6_null; - sin6->sin6_family = AF_INET6; - sin6->sin6_port = udp->udp_dstport; - sin6->sin6_addr = udp->udp_v6dst; - sin6->sin6_flowinfo = udp->udp_flowinfo; + case EWOULDBLOCK: + (void) ixa_check_drain_insert(connp, ixa); + error = 0; break; - } - - return (0); -} - -static int -udp_getmyname(udp_t *udp, struct sockaddr *sa, uint_t *salenp) -{ - sin_t *sin = (sin_t *)sa; - sin6_t *sin6 = (sin6_t *)sa; - - ASSERT(RW_LOCK_HELD(&udp->udp_rwlock)); - - switch (udp->udp_family) { - case AF_INET: - ASSERT(udp->udp_ipversion == IPV4_VERSION); - - if (*salenp < sizeof (sin_t)) - return (EINVAL); - - *salenp = sizeof (sin_t); - *sin = sin_null; - sin->sin_family = AF_INET; - sin->sin_port = udp->udp_port; - + case EADDRNOTAVAIL: /* - * If udp_v6src is unspecified, we might be bound to broadcast - * / multicast. Use udp_bound_v6src as local address instead - * (that could also still be unspecified). + * IXAF_VERIFY_SOURCE tells us to pick a better source. + * Don't have the application see that errno */ - if (!IN6_IS_ADDR_V4MAPPED_ANY(&udp->udp_v6src) && - !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { - sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_v6src); - } else { - sin->sin_addr.s_addr = - V4_PART_OF_V6(udp->udp_bound_v6src); - } - break; - - case AF_INET6: - if (*salenp < sizeof (sin6_t)) - return (EINVAL); - - *salenp = sizeof (sin6_t); - *sin6 = sin6_null; - sin6->sin6_family = AF_INET6; - sin6->sin6_port = udp->udp_port; - sin6->sin6_flowinfo = udp->udp_flowinfo; - + error = ENETUNREACH; + /* FALLTHRU */ + default: + mutex_enter(&connp->conn_lock); /* - * If udp_v6src is unspecified, we might be bound to broadcast - * / multicast. Use udp_bound_v6src as local address instead - * (that could also still be unspecified). + * Clear the source and v6lastdst so we call ip_attr_connect + * for the next packet and try to pick a better source. */ - if (!IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) - sin6->sin6_addr = udp->udp_v6src; + if (connp->conn_mcbc_bind) + connp->conn_saddr_v6 = ipv6_all_zeros; else - sin6->sin6_addr = udp->udp_bound_v6src; + connp->conn_saddr_v6 = connp->conn_bound_addr_v6; + connp->conn_v6lastdst = ipv6_all_zeros; + mutex_exit(&connp->conn_lock); break; } + ixa_refrele(ixa); + return (error); - return (0); +ud_error: + if (ixa != NULL) + ixa_refrele(ixa); + + freemsg(data_mp); + BUMP_MIB(&us->us_udp_mib, udpOutErrors); + UDP_STAT(us, udp_out_err_output); + return (error); +} + +/* ARGSUSED */ +static void +udp_wput_fallback(queue_t *wq, mblk_t *mp) +{ +#ifdef DEBUG + cmn_err(CE_CONT, "udp_wput_fallback: Message in fallback \n"); +#endif + freemsg(mp); } + /* * Handle special out-of-band ioctl requests (see PSARC/2008/265). */ @@ -6717,7 +4440,8 @@ udp_wput_cmdblk(queue_t *q, mblk_t *mp) { void *data; mblk_t *datamp = mp->b_cont; - udp_t *udp = Q_TO_UDP(q); + conn_t *connp = Q_TO_CONN(q); + udp_t *udp = connp->conn_udp; cmdblk_t *cmdp = (cmdblk_t *)mp->b_rptr; if (datamp == NULL || MBLKL(datamp) < cmdp->cb_len) { @@ -6727,19 +4451,23 @@ udp_wput_cmdblk(queue_t *q, mblk_t *mp) } data = datamp->b_rptr; - rw_enter(&udp->udp_rwlock, RW_READER); + mutex_enter(&connp->conn_lock); switch (cmdp->cb_cmd) { case TI_GETPEERNAME: - cmdp->cb_error = i_udp_getpeername(udp, data, &cmdp->cb_len); + if (udp->udp_state != TS_DATA_XFER) + cmdp->cb_error = ENOTCONN; + else + cmdp->cb_error = conn_getpeername(connp, data, + &cmdp->cb_len); break; case TI_GETMYNAME: - cmdp->cb_error = udp_getmyname(udp, data, &cmdp->cb_len); + cmdp->cb_error = conn_getsockname(connp, data, &cmdp->cb_len); break; default: cmdp->cb_error = EINVAL; break; } - rw_exit(&udp->udp_rwlock); + mutex_exit(&connp->conn_lock); qreply(q, mp); } @@ -6747,10 +4475,11 @@ udp_wput_cmdblk(queue_t *q, mblk_t *mp) static void udp_use_pure_tpi(udp_t *udp) { - rw_enter(&udp->udp_rwlock, RW_WRITER); - udp->udp_issocket = B_FALSE; - rw_exit(&udp->udp_rwlock); + conn_t *connp = udp->udp_connp; + mutex_enter(&connp->conn_lock); + udp->udp_issocket = B_FALSE; + mutex_exit(&connp->conn_lock); UDP_STAT(udp->udp_us, udp_sock_fallback); } @@ -6758,20 +4487,13 @@ static void udp_wput_other(queue_t *q, mblk_t *mp) { uchar_t *rptr = mp->b_rptr; - struct datab *db; struct iocblk *iocp; - cred_t *cr; conn_t *connp = Q_TO_CONN(q); udp_t *udp = connp->conn_udp; - udp_stack_t *us; - - TRACE_1(TR_FAC_UDP, TR_UDP_WPUT_OTHER_START, - "udp_wput_other_start: q %p", q); - - us = udp->udp_us; - db = mp->b_datap; + udp_stack_t *us = udp->udp_us; + cred_t *cr; - switch (db->db_type) { + switch (mp->b_datap->db_type) { case M_CMD: udp_wput_cmdblk(q, mp); return; @@ -6779,37 +4501,29 @@ udp_wput_other(queue_t *q, mblk_t *mp) case M_PROTO: case M_PCPROTO: if (mp->b_wptr - rptr < sizeof (t_scalar_t)) { + /* + * If the message does not contain a PRIM_type, + * throw it away. + */ freemsg(mp); - TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, - "udp_wput_other_end: q %p (%S)", q, "protoshort"); return; } switch (((t_primp_t)rptr)->type) { case T_ADDR_REQ: udp_addr_req(q, mp); - TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, - "udp_wput_other_end: q %p (%S)", q, "addrreq"); return; case O_T_BIND_REQ: case T_BIND_REQ: udp_tpi_bind(q, mp); - TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, - "udp_wput_other_end: q %p (%S)", q, "bindreq"); return; case T_CONN_REQ: udp_tpi_connect(q, mp); - TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, - "udp_wput_other_end: q %p (%S)", q, "connreq"); return; case T_CAPABILITY_REQ: udp_capability_req(q, mp); - TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, - "udp_wput_other_end: q %p (%S)", q, "capabreq"); return; case T_INFO_REQ: udp_info_req(q, mp); - TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, - "udp_wput_other_end: q %p (%S)", q, "inforeq"); return; case T_UNITDATA_REQ: /* @@ -6817,14 +4531,10 @@ udp_wput_other(queue_t *q, mblk_t *mp) * be bad. Valid T_UNITDATA_REQs are handled * in udp_wput. */ - udp_ud_err(q, mp, NULL, 0, EADDRNOTAVAIL); - TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, - "udp_wput_other_end: q %p (%S)", q, "unitdatareq"); + udp_ud_err(q, mp, EADDRNOTAVAIL); return; case T_UNBIND_REQ: udp_tpi_unbind(q, mp); - TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, - "udp_wput_other_end: q %p (%S)", q, "unbindreq"); return; case T_SVR4_OPTMGMT_REQ: /* @@ -6842,11 +4552,8 @@ udp_wput_other(queue_t *q, mblk_t *mp) } if (!snmpcom_req(q, mp, udp_snmp_set, ip_snmp_get, cr)) { - (void) svr4_optcom_req(q, - mp, cr, &udp_opt_obj, B_TRUE); + svr4_optcom_req(q, mp, cr, &udp_opt_obj); } - TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, - "udp_wput_other_end: q %p (%S)", q, "optmgmtreq"); return; case T_OPTMGMT_REQ: @@ -6863,34 +4570,24 @@ udp_wput_other(queue_t *q, mblk_t *mp) udp_err_ack(q, mp, TSYSERR, EINVAL); return; } - (void) tpi_optcom_req(q, mp, cr, &udp_opt_obj, B_TRUE); - TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, - "udp_wput_other_end: q %p (%S)", q, "optmgmtreq"); + tpi_optcom_req(q, mp, cr, &udp_opt_obj); return; case T_DISCON_REQ: udp_tpi_disconnect(q, mp); - TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, - "udp_wput_other_end: q %p (%S)", q, "disconreq"); return; /* The following TPI message is not supported by udp. */ case O_T_CONN_RES: case T_CONN_RES: udp_err_ack(q, mp, TNOTSUPPORT, 0); - TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, - "udp_wput_other_end: q %p (%S)", q, - "connres/disconreq"); return; - /* The following 3 TPI messages are illegal for udp. */ + /* The following 3 TPI requests are illegal for udp. */ case T_DATA_REQ: case T_EXDATA_REQ: case T_ORDREL_REQ: udp_err_ack(q, mp, TNOTSUPPORT, 0); - TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, - "udp_wput_other_end: q %p (%S)", q, - "data/exdata/ordrel"); return; default: break; @@ -6914,13 +4611,10 @@ udp_wput_other(queue_t *q, mblk_t *mp) iocp->ioc_count = 0; mp->b_datap->db_type = M_IOCACK; qreply(q, mp); - TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, - "udp_wput_other_end: q %p (%S)", q, - "getpeername"); return; } /* FALLTHRU */ - case TI_GETMYNAME: { + case TI_GETMYNAME: /* * For TI_GETPEERNAME and TI_GETMYNAME, we first * need to copyin the user's strbuf structure. @@ -6929,17 +4623,12 @@ udp_wput_other(queue_t *q, mblk_t *mp) */ mi_copyin(q, mp, NULL, SIZEOF_STRUCT(strbuf, iocp->ioc_flag)); - TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, - "udp_wput_other_end: q %p (%S)", q, "getmyname"); return; - } case ND_SET: /* nd_getset performs the necessary checking */ case ND_GET: if (nd_getset(q, us->us_nd, mp)) { qreply(q, mp); - TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, - "udp_wput_other_end: q %p (%S)", q, "get"); return; } break; @@ -6969,16 +4658,12 @@ udp_wput_other(queue_t *q, mblk_t *mp) break; case M_IOCDATA: udp_wput_iocdata(q, mp); - TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, - "udp_wput_other_end: q %p (%S)", q, "iocdata"); return; default: /* Unrecognized messages are passed through without change. */ break; } - TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, - "udp_wput_other_end: q %p (%S)", q, "end"); - ip_output(connp, mp, q, IP_WPUT); + ip_wput_nondata(q, mp); } /* @@ -6991,9 +4676,9 @@ udp_wput_iocdata(queue_t *q, mblk_t *mp) mblk_t *mp1; struct iocblk *iocp = (struct iocblk *)mp->b_rptr; STRUCT_HANDLE(strbuf, sb); - udp_t *udp = Q_TO_UDP(q); - int error; uint_t addrlen; + conn_t *connp = Q_TO_CONN(q); + udp_t *udp = connp->conn_udp; /* Make sure it is one of ours. */ switch (iocp->ioc_cmd) { @@ -7001,7 +4686,7 @@ udp_wput_iocdata(queue_t *q, mblk_t *mp) case TI_GETPEERNAME: break; default: - ip_output(udp->udp_connp, mp, q, IP_WPUT); + ip_wput_nondata(q, mp); return; } @@ -7040,77 +4725,45 @@ udp_wput_iocdata(queue_t *q, mblk_t *mp) * address and then we'll copyout the strbuf. */ STRUCT_SET_HANDLE(sb, iocp->ioc_flag, (void *)mp1->b_rptr); - addrlen = udp->udp_family == AF_INET ? sizeof (sin_t) : sizeof (sin6_t); + + if (connp->conn_family == AF_INET) + addrlen = sizeof (sin_t); + else + addrlen = sizeof (sin6_t); + if (STRUCT_FGET(sb, maxlen) < addrlen) { mi_copy_done(q, mp, EINVAL); return; } - mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), addrlen, B_TRUE); - - if (mp1 == NULL) - return; - - rw_enter(&udp->udp_rwlock, RW_READER); switch (iocp->ioc_cmd) { case TI_GETMYNAME: - error = udp_do_getsockname(udp, (void *)mp1->b_rptr, &addrlen); break; case TI_GETPEERNAME: - error = udp_do_getpeername(udp, (void *)mp1->b_rptr, &addrlen); + if (udp->udp_state != TS_DATA_XFER) { + mi_copy_done(q, mp, ENOTCONN); + return; + } break; } - rw_exit(&udp->udp_rwlock); - - if (error != 0) { - mi_copy_done(q, mp, error); - } else { - mp1->b_wptr += addrlen; - STRUCT_FSET(sb, len, addrlen); - - /* Copy out the address */ - mi_copyout(q, mp); - } -} - -static int -udp_unitdata_opt_process(queue_t *q, mblk_t *mp, int *errorp, - udpattrs_t *udpattrs) -{ - struct T_unitdata_req *udreqp; - int is_absreq_failure; - cred_t *cr; - - ASSERT(((t_primp_t)mp->b_rptr)->type); - - /* - * All Solaris components should pass a db_credp - * for this TPI message, hence we should ASSERT. - * However, RPC (svc_clts_ksend) does this odd thing where it - * passes the options from a T_UNITDATA_IND unchanged in a - * T_UNITDATA_REQ. While that is the right thing to do for - * some options, SCM_UCRED being the key one, this also makes it - * pass down IP_RECVDSTADDR. Hence we can't ASSERT here. - */ - cr = msg_getcred(mp, NULL); - if (cr == NULL) { - cr = Q_TO_CONN(q)->conn_cred; - } - udreqp = (struct T_unitdata_req *)mp->b_rptr; - - *errorp = tpi_optcom_buf(q, mp, &udreqp->OPT_length, - udreqp->OPT_offset, cr, &udp_opt_obj, - udpattrs, &is_absreq_failure); + mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), addrlen, B_TRUE); + if (!mp1) + return; - if (*errorp != 0) { - /* - * Note: No special action needed in this - * module for "is_absreq_failure" - */ - return (-1); /* failure */ + STRUCT_FSET(sb, len, addrlen); + switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) { + case TI_GETMYNAME: + (void) conn_getsockname(connp, (struct sockaddr *)mp1->b_wptr, + &addrlen); + break; + case TI_GETPEERNAME: + (void) conn_getpeername(connp, (struct sockaddr *)mp1->b_wptr, + &addrlen); + break; } - ASSERT(is_absreq_failure == 0); - return (0); /* success */ + mp1->b_wptr += addrlen; + /* Copy out the address */ + mi_copyout(q, mp); } void @@ -7234,34 +4887,19 @@ udp_kstat2_init(netstackid_t stackid, udp_stat_t *us_statisticsp) kstat_t *ksp; udp_stat_t template = { - { "udp_ip_send", KSTAT_DATA_UINT64 }, - { "udp_ip_ire_send", KSTAT_DATA_UINT64 }, - { "udp_ire_null", KSTAT_DATA_UINT64 }, { "udp_sock_fallback", KSTAT_DATA_UINT64 }, - { "udp_out_sw_cksum", KSTAT_DATA_UINT64 }, - { "udp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, { "udp_out_opt", KSTAT_DATA_UINT64 }, { "udp_out_err_notconn", KSTAT_DATA_UINT64 }, { "udp_out_err_output", KSTAT_DATA_UINT64 }, { "udp_out_err_tudr", KSTAT_DATA_UINT64 }, - { "udp_in_pktinfo", KSTAT_DATA_UINT64 }, - { "udp_in_recvdstaddr", KSTAT_DATA_UINT64 }, - { "udp_in_recvopts", KSTAT_DATA_UINT64 }, - { "udp_in_recvif", KSTAT_DATA_UINT64 }, - { "udp_in_recvslla", KSTAT_DATA_UINT64 }, - { "udp_in_recvucred", KSTAT_DATA_UINT64 }, - { "udp_in_recvttl", KSTAT_DATA_UINT64 }, - { "udp_in_recvhopopts", KSTAT_DATA_UINT64 }, - { "udp_in_recvhoplimit", KSTAT_DATA_UINT64 }, - { "udp_in_recvdstopts", KSTAT_DATA_UINT64 }, - { "udp_in_recvrtdstopts", KSTAT_DATA_UINT64 }, - { "udp_in_recvrthdr", KSTAT_DATA_UINT64 }, - { "udp_in_recvpktinfo", KSTAT_DATA_UINT64 }, - { "udp_in_recvtclass", KSTAT_DATA_UINT64 }, - { "udp_in_timestamp", KSTAT_DATA_UINT64 }, #ifdef DEBUG { "udp_data_conn", KSTAT_DATA_UINT64 }, { "udp_data_notconn", KSTAT_DATA_UINT64 }, + { "udp_out_lastdst", KSTAT_DATA_UINT64 }, + { "udp_out_diffdst", KSTAT_DATA_UINT64 }, + { "udp_out_ipv6", KSTAT_DATA_UINT64 }, + { "udp_out_mapped", KSTAT_DATA_UINT64 }, + { "udp_out_ipv4", KSTAT_DATA_UINT64 }, #endif }; @@ -7384,8 +5022,6 @@ udp_set_rcv_hiwat(udp_t *udp, size_t size) static void udp_lrput(queue_t *q, mblk_t *mp) { - mblk_t *mp1; - switch (mp->b_datap->db_type) { case M_FLUSH: /* Turn around */ @@ -7396,9 +5032,6 @@ udp_lrput(queue_t *q, mblk_t *mp) } break; } - /* Could receive messages that passed through ar_rput */ - for (mp1 = mp; mp1; mp1 = mp1->b_cont) - mp1->b_prev = mp1->b_next = NULL; freemsg(mp); } @@ -7425,6 +5058,7 @@ udp_do_open(cred_t *credp, boolean_t isv6, int flags) zoneid_t zoneid; netstack_t *ns; udp_stack_t *us; + int len; ns = netstack_find_by_cred(credp); ASSERT(ns != NULL); @@ -7455,34 +5089,40 @@ udp_do_open(cred_t *credp, boolean_t isv6, int flags) */ netstack_rele(ns); - rw_enter(&udp->udp_rwlock, RW_WRITER); - ASSERT(connp->conn_ulp == IPPROTO_UDP); + /* + * Since this conn_t/udp_t is not yet visible to anybody else we don't + * need to lock anything. + */ + ASSERT(connp->conn_proto == IPPROTO_UDP); ASSERT(connp->conn_udp == udp); ASSERT(udp->udp_connp == connp); /* Set the initial state of the stream and the privilege status. */ udp->udp_state = TS_UNBND; + connp->conn_ixa->ixa_flags |= IXAF_VERIFY_SOURCE; if (isv6) { - udp->udp_family = AF_INET6; - udp->udp_ipversion = IPV6_VERSION; - udp->udp_max_hdr_len = IPV6_HDR_LEN + UDPH_SIZE; - udp->udp_ttl = us->us_ipv6_hoplimit; - connp->conn_af_isv6 = B_TRUE; + connp->conn_family = AF_INET6; + connp->conn_ipversion = IPV6_VERSION; + connp->conn_ixa->ixa_flags &= ~IXAF_IS_IPV4; + connp->conn_default_ttl = us->us_ipv6_hoplimit; + len = sizeof (ip6_t) + UDPH_SIZE; } else { - udp->udp_family = AF_INET; - udp->udp_ipversion = IPV4_VERSION; - udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE; - udp->udp_ttl = us->us_ipv4_ttl; - connp->conn_af_isv6 = B_FALSE; + connp->conn_family = AF_INET; + connp->conn_ipversion = IPV4_VERSION; + connp->conn_ixa->ixa_flags |= IXAF_IS_IPV4; + connp->conn_default_ttl = us->us_ipv4_ttl; + len = sizeof (ipha_t) + UDPH_SIZE; } - udp->udp_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; - udp->udp_pending_op = -1; - connp->conn_multicast_loop = IP_DEFAULT_MULTICAST_LOOP; - connp->conn_zoneid = zoneid; + ASSERT(connp->conn_ixa->ixa_protocol == connp->conn_proto); + connp->conn_xmit_ipp.ipp_unicast_hops = connp->conn_default_ttl; + + connp->conn_ixa->ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; + connp->conn_ixa->ixa_flags |= IXAF_MULTICAST_LOOP | IXAF_SET_ULP_CKSUM; + /* conn_allzones can not be set this early, hence no IPCL_ZONEID */ + connp->conn_ixa->ixa_zoneid = zoneid; - udp->udp_open_time = lbolt64; - udp->udp_open_pid = curproc->p_pid; + connp->conn_zoneid = zoneid; /* * If the caller has the process-wide flag set, then default to MAC @@ -7491,22 +5131,38 @@ udp_do_open(cred_t *credp, boolean_t isv6, int flags) if (getpflags(NET_MAC_AWARE, credp) != 0) connp->conn_mac_mode = CONN_MAC_AWARE; - connp->conn_ulp_labeled = is_system_labeled(); + connp->conn_zone_is_global = (crgetzoneid(credp) == GLOBAL_ZONEID); udp->udp_us = us; + connp->conn_rcvbuf = us->us_recv_hiwat; + connp->conn_sndbuf = us->us_xmit_hiwat; + connp->conn_sndlowat = us->us_xmit_lowat; + connp->conn_rcvlowat = udp_mod_info.mi_lowat; + + connp->conn_wroff = len + us->us_wroff_extra; + connp->conn_so_type = SOCK_DGRAM; + connp->conn_recv = udp_input; + connp->conn_recvicmp = udp_icmp_input; crhold(credp); connp->conn_cred = credp; + connp->conn_cpid = curproc->p_pid; + connp->conn_open_time = lbolt64; + /* Cache things in ixa without an extra refhold */ + connp->conn_ixa->ixa_cred = connp->conn_cred; + connp->conn_ixa->ixa_cpid = connp->conn_cpid; + if (is_system_labeled()) + connp->conn_ixa->ixa_tsl = crgetlabel(connp->conn_cred); *((sin6_t *)&udp->udp_delayed_addr) = sin6_null; - rw_exit(&udp->udp_rwlock); + if (us->us_pmtu_discovery) + connp->conn_ixa->ixa_flags |= IXAF_PMTU_DISCOVERY; return (connp); } -/* ARGSUSED */ sock_lower_handle_t udp_create(int family, int type, int proto, sock_downcalls_t **sock_downcalls, uint_t *smodep, int *errorp, int flags, cred_t *credp) @@ -7539,39 +5195,17 @@ udp_create(int family, int type, int proto, sock_downcalls_t **sock_downcalls, ASSERT(us != NULL); udp->udp_issocket = B_TRUE; - connp->conn_flags |= IPCL_NONSTR | IPCL_SOCKET; - - /* Set flow control */ - rw_enter(&udp->udp_rwlock, RW_WRITER); - (void) udp_set_rcv_hiwat(udp, us->us_recv_hiwat); - udp->udp_rcv_disply_hiwat = us->us_recv_hiwat; - udp->udp_rcv_lowat = udp_mod_info.mi_lowat; - udp->udp_xmit_hiwat = us->us_xmit_hiwat; - udp->udp_xmit_lowat = us->us_xmit_lowat; - - if (udp->udp_family == AF_INET6) { - /* Build initial header template for transmit */ - if ((*errorp = udp_build_hdrs(udp)) != 0) { - rw_exit(&udp->udp_rwlock); - ipcl_conn_destroy(connp); - return (NULL); - } - } - rw_exit(&udp->udp_rwlock); + connp->conn_flags |= IPCL_NONSTR; - connp->conn_flow_cntrld = B_FALSE; - - ASSERT(us->us_ldi_ident != NULL); - - if ((*errorp = ip_create_helper_stream(connp, us->us_ldi_ident)) != 0) { - ip1dbg(("udp_create: create of IP helper stream failed\n")); - udp_do_close(connp); - return (NULL); - } + /* + * Set flow control + * Since this conn_t/udp_t is not yet visible to anybody else we don't + * need to lock anything. + */ + (void) udp_set_rcv_hiwat(udp, connp->conn_rcvbuf); + udp->udp_rcv_disply_hiwat = connp->conn_rcvbuf; - /* Set the send flow control */ - connp->conn_wq->q_hiwat = us->us_xmit_hiwat; - connp->conn_wq->q_lowat = us->us_xmit_lowat; + connp->conn_flow_cntrld = B_FALSE; mutex_enter(&connp->conn_lock); connp->conn_state_flags &= ~CONN_INCIPIENT; @@ -7583,14 +5217,12 @@ udp_create(int family, int type, int proto, sock_downcalls_t **sock_downcalls, return ((sock_lower_handle_t)connp); } -/* ARGSUSED */ +/* ARGSUSED3 */ void udp_activate(sock_lower_handle_t proto_handle, sock_upper_handle_t sock_handle, sock_upcalls_t *sock_upcalls, int flags, cred_t *cr) { conn_t *connp = (conn_t *)proto_handle; - udp_t *udp = connp->conn_udp; - udp_stack_t *us = udp->udp_us; struct sock_proto_props sopp; /* All Solaris components should pass a cred for this operation. */ @@ -7599,14 +5231,15 @@ udp_activate(sock_lower_handle_t proto_handle, sock_upper_handle_t sock_handle, connp->conn_upcalls = sock_upcalls; connp->conn_upper_handle = sock_handle; - sopp.sopp_flags = SOCKOPT_WROFF | SOCKOPT_RCVHIWAT | + sopp.sopp_flags = SOCKOPT_WROFF | SOCKOPT_RCVHIWAT | SOCKOPT_RCVLOWAT | SOCKOPT_MAXBLK | SOCKOPT_MAXPSZ | SOCKOPT_MINPSZ; - sopp.sopp_wroff = udp->udp_max_hdr_len + us->us_wroff_extra; + sopp.sopp_wroff = connp->conn_wroff; sopp.sopp_maxblk = INFPSZ; - sopp.sopp_rxhiwat = udp->udp_rcv_hiwat; + sopp.sopp_rxhiwat = connp->conn_rcvbuf; + sopp.sopp_rxlowat = connp->conn_rcvlowat; sopp.sopp_maxaddrlen = sizeof (sin6_t); sopp.sopp_maxpsz = - (udp->udp_family == AF_INET) ? UDP_MAXPACKET_IPV4 : + (connp->conn_family == AF_INET) ? UDP_MAXPACKET_IPV4 : UDP_MAXPACKET_IPV6; sopp.sopp_minpsz = (udp_mod_info.mi_minpsz == 1) ? 0 : udp_mod_info.mi_minpsz; @@ -7618,9 +5251,32 @@ udp_activate(sock_lower_handle_t proto_handle, sock_upper_handle_t sock_handle, static void udp_do_close(conn_t *connp) { + udp_t *udp; + ASSERT(connp != NULL && IPCL_IS_UDP(connp)); + udp = connp->conn_udp; + + if (cl_inet_unbind != NULL && udp->udp_state == TS_IDLE) { + /* + * Running in cluster mode - register unbind information + */ + if (connp->conn_ipversion == IPV4_VERSION) { + (*cl_inet_unbind)( + connp->conn_netstack->netstack_stackid, + IPPROTO_UDP, AF_INET, + (uint8_t *)(&V4_PART_OF_V6(connp->conn_laddr_v6)), + (in_port_t)connp->conn_lport, NULL); + } else { + (*cl_inet_unbind)( + connp->conn_netstack->netstack_stackid, + IPPROTO_UDP, AF_INET6, + (uint8_t *)&(connp->conn_laddr_v6), + (in_port_t)connp->conn_lport, NULL); + } + } + + udp_bind_hash_remove(udp, B_FALSE); - udp_quiesce_conn(connp); ip_quiesce_conn(connp); if (!IPCL_IS_NONSTR(connp)) { @@ -7642,6 +5298,7 @@ udp_do_close(conn_t *connp) * future. */ ASSERT(connp->conn_ref == 1); + if (!IPCL_IS_NONSTR(connp)) { inet_minor_free(connp->conn_minor_arena, connp->conn_dev); } else { @@ -7652,7 +5309,7 @@ udp_do_close(conn_t *connp) ipcl_conn_destroy(connp); } -/* ARGSUSED */ +/* ARGSUSED1 */ int udp_close(sock_lower_handle_t proto_handle, int flags, cred_t *cr) { @@ -7671,59 +5328,41 @@ udp_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr, { sin_t *sin; sin6_t *sin6; - sin6_t sin6addr; + udp_t *udp = connp->conn_udp; + int error = 0; + ip_laddr_t laddr_type = IPVL_UNICAST_UP; /* INADDR_ANY */ in_port_t port; /* Host byte order */ in_port_t requested_port; /* Host byte order */ int count; + ipaddr_t v4src; /* Set if AF_INET */ in6_addr_t v6src; int loopmax; udp_fanout_t *udpf; in_port_t lport; /* Network byte order */ - udp_t *udp; + uint_t scopeid = 0; + zoneid_t zoneid = IPCL_ZONEID(connp); + ip_stack_t *ipst = connp->conn_netstack->netstack_ip; boolean_t is_inaddr_any; mlp_type_t addrtype, mlptype; - udp_stack_t *us; - int error = 0; - mblk_t *mp = NULL; - - udp = connp->conn_udp; - us = udp->udp_us; - - if (udp->udp_state != TS_UNBND) { - (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE, - "udp_bind: bad state, %u", udp->udp_state); - return (-TOUTSTATE); - } + udp_stack_t *us = udp->udp_us; switch (len) { - case 0: - if (udp->udp_family == AF_INET) { - sin = (sin_t *)&sin6addr; - *sin = sin_null; - sin->sin_family = AF_INET; - sin->sin_addr.s_addr = INADDR_ANY; - udp->udp_ipversion = IPV4_VERSION; - } else { - ASSERT(udp->udp_family == AF_INET6); - sin6 = (sin6_t *)&sin6addr; - *sin6 = sin6_null; - sin6->sin6_family = AF_INET6; - V6_SET_ZERO(sin6->sin6_addr); - udp->udp_ipversion = IPV6_VERSION; - } - port = 0; - break; - case sizeof (sin_t): /* Complete IPv4 address */ sin = (sin_t *)sa; if (sin == NULL || !OK_32PTR((char *)sin)) return (EINVAL); - if (udp->udp_family != AF_INET || + if (connp->conn_family != AF_INET || sin->sin_family != AF_INET) { return (EAFNOSUPPORT); } + v4src = sin->sin_addr.s_addr; + IN6_IPADDR_TO_V4MAPPED(v4src, &v6src); + if (v4src != INADDR_ANY) { + laddr_type = ip_laddr_verify_v4(v4src, zoneid, ipst, + B_TRUE); + } port = ntohs(sin->sin_port); break; @@ -7733,10 +5372,28 @@ udp_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr, if (sin6 == NULL || !OK_32PTR((char *)sin6)) return (EINVAL); - if (udp->udp_family != AF_INET6 || + if (connp->conn_family != AF_INET6 || sin6->sin6_family != AF_INET6) { return (EAFNOSUPPORT); } + v6src = sin6->sin6_addr; + if (IN6_IS_ADDR_V4MAPPED(&v6src)) { + if (connp->conn_ipv6_v6only) + return (EADDRNOTAVAIL); + + IN6_V4MAPPED_TO_IPADDR(&v6src, v4src); + if (v4src != INADDR_ANY) { + laddr_type = ip_laddr_verify_v4(v4src, + zoneid, ipst, B_FALSE); + } + } else { + if (!IN6_IS_ADDR_UNSPECIFIED(&v6src)) { + if (IN6_IS_ADDR_LINKSCOPE(&v6src)) + scopeid = sin6->sin6_scope_id; + laddr_type = ip_laddr_verify_v6(&v6src, + zoneid, ipst, B_TRUE, scopeid); + } + } port = ntohs(sin6->sin6_port); break; @@ -7746,6 +5403,10 @@ udp_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr, return (-TBADADDR); } + /* Is the local address a valid unicast, multicast, or broadcast? */ + if (laddr_type == IPVL_BAD) + return (EADDRNOTAVAIL); + requested_port = port; if (requested_port == 0 || !bind_to_req_port_only) @@ -7759,7 +5420,7 @@ udp_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr, * doesn't care which port number we bind to. Get one in the * valid range. */ - if (udp->udp_anon_priv_bind) { + if (connp->conn_anon_priv_bind) { port = udp_get_next_priv_port(udp); } else { port = udp_update_next_port(udp, @@ -7798,53 +5459,45 @@ udp_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr, * TPI primitives only 1 at a time and wait for the response before * sending the next primitive. */ - rw_enter(&udp->udp_rwlock, RW_WRITER); - if (udp->udp_state != TS_UNBND || udp->udp_pending_op != -1) { - rw_exit(&udp->udp_rwlock); + mutex_enter(&connp->conn_lock); + if (udp->udp_state != TS_UNBND) { + mutex_exit(&connp->conn_lock); (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE, "udp_bind: bad state, %u", udp->udp_state); return (-TOUTSTATE); } - /* XXX how to remove the T_BIND_REQ? Should set it before calling */ - udp->udp_pending_op = T_BIND_REQ; /* * Copy the source address into our udp structure. This address * may still be zero; if so, IP will fill in the correct address * each time an outbound packet is passed to it. Since the udp is * not yet in the bind hash list, we don't grab the uf_lock to - * change udp_ipversion + * change conn_ipversion */ - if (udp->udp_family == AF_INET) { + if (connp->conn_family == AF_INET) { ASSERT(sin != NULL); - ASSERT(udp->udp_ipversion == IPV4_VERSION); - udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + - udp->udp_ip_snd_options_len; - IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &v6src); + ASSERT(connp->conn_ixa->ixa_flags & IXAF_IS_IPV4); } else { - ASSERT(sin6 != NULL); - v6src = sin6->sin6_addr; if (IN6_IS_ADDR_V4MAPPED(&v6src)) { /* - * no need to hold the uf_lock to set the udp_ipversion + * no need to hold the uf_lock to set the conn_ipversion * since we are not yet in the fanout list */ - udp->udp_ipversion = IPV4_VERSION; - udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + - UDPH_SIZE + udp->udp_ip_snd_options_len; + connp->conn_ipversion = IPV4_VERSION; + connp->conn_ixa->ixa_flags |= IXAF_IS_IPV4; } else { - udp->udp_ipversion = IPV6_VERSION; - udp->udp_max_hdr_len = udp->udp_sticky_hdrs_len; + connp->conn_ipversion = IPV6_VERSION; + connp->conn_ixa->ixa_flags &= ~IXAF_IS_IPV4; } } /* - * If udp_reuseaddr is not set, then we have to make sure that + * If conn_reuseaddr is not set, then we have to make sure that * the IP address and port number the application requested * (or we selected for the application) is not being used by * another stream. If another stream is already using the * requested IP address and port, the behavior depends on * "bind_to_req_port_only". If set the bind fails; otherwise we - * search for any an unused port to bind to the the stream. + * search for any an unused port to bind to the stream. * * As per the BSD semantics, as modified by the Deering multicast * changes, if udp_reuseaddr is set, then we allow multiple binds @@ -7860,7 +5513,7 @@ udp_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr, */ count = 0; - if (udp->udp_anon_priv_bind) { + if (connp->conn_anon_priv_bind) { /* * loopmax = (IPPORT_RESERVED-1) - * us->us_min_anonpriv_port + 1 @@ -7876,6 +5529,7 @@ udp_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr, for (;;) { udp_t *udp1; boolean_t found_exclbind = B_FALSE; + conn_t *connp1; /* * Walk through the list of udp streams bound to @@ -7887,7 +5541,9 @@ udp_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr, mutex_enter(&udpf->uf_lock); for (udp1 = udpf->uf_udp; udp1 != NULL; udp1 = udp1->udp_bind_hash) { - if (lport != udp1->udp_port) + connp1 = udp1->udp_connp; + + if (lport != connp1->conn_lport) continue; /* @@ -7896,7 +5552,7 @@ udp_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr, * privilege as being in all zones, as there's * otherwise no way to identify the right receiver. */ - if (!IPCL_BIND_ZONE_MATCH(udp1->udp_connp, connp)) + if (!IPCL_BIND_ZONE_MATCH(connp1, connp)) continue; /* @@ -7918,12 +5574,13 @@ udp_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr, * For labeled systems, SO_MAC_EXEMPT behaves the same * as UDP_EXCLBIND, except that zoneid is ignored. */ - if (udp1->udp_exclbind || udp->udp_exclbind || + if (connp1->conn_exclbind || connp->conn_exclbind || IPCL_CONNS_MAC(udp1->udp_connp, connp)) { if (V6_OR_V4_INADDR_ANY( - udp1->udp_bound_v6src) || + connp1->conn_bound_addr_v6) || is_inaddr_any || - IN6_ARE_ADDR_EQUAL(&udp1->udp_bound_v6src, + IN6_ARE_ADDR_EQUAL( + &connp1->conn_bound_addr_v6, &v6src)) { found_exclbind = B_TRUE; break; @@ -7935,7 +5592,7 @@ udp_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr, * Check ipversion to allow IPv4 and IPv6 sockets to * have disjoint port number spaces. */ - if (udp->udp_ipversion != udp1->udp_ipversion) { + if (connp->conn_ipversion != connp1->conn_ipversion) { /* * On the first time through the loop, if the @@ -7963,8 +5620,8 @@ udp_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr, * (non-wildcard, also), keep going. */ if (!is_inaddr_any && - !V6_OR_V4_INADDR_ANY(udp1->udp_bound_v6src) && - !IN6_ARE_ADDR_EQUAL(&udp1->udp_bound_v6src, + !V6_OR_V4_INADDR_ANY(connp1->conn_bound_addr_v6) && + !IN6_ARE_ADDR_EQUAL(&connp1->conn_laddr_v6, &v6src)) { continue; } @@ -7972,7 +5629,7 @@ udp_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr, } if (!found_exclbind && - (udp->udp_reuseaddr && requested_port != 0)) { + (connp->conn_reuseaddr && requested_port != 0)) { break; } @@ -7995,12 +5652,11 @@ udp_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr, * the routine (and exit the loop). * */ - udp->udp_pending_op = -1; - rw_exit(&udp->udp_rwlock); + mutex_exit(&connp->conn_lock); return (-TADDRBUSY); } - if (udp->udp_anon_priv_bind) { + if (connp->conn_anon_priv_bind) { port = udp_get_next_priv_port(udp); } else { if ((count == 0) && (requested_port != 0)) { @@ -8025,66 +5681,82 @@ udp_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr, * there are none available, so send an error * to the user. */ - udp->udp_pending_op = -1; - rw_exit(&udp->udp_rwlock); + mutex_exit(&connp->conn_lock); return (-TNOADDR); } } /* * Copy the source address into our udp structure. This address - * may still be zero; if so, ip will fill in the correct address - * each time an outbound packet is passed to it. + * may still be zero; if so, ip_attr_connect will fill in the correct + * address when a packet is about to be sent. * If we are binding to a broadcast or multicast address then - * udp_post_ip_bind_connect will clear the source address - * when udp_do_bind success. + * we just set the conn_bound_addr since we don't want to use + * that as the source address when sending. */ - udp->udp_v6src = udp->udp_bound_v6src = v6src; - udp->udp_port = lport; + connp->conn_bound_addr_v6 = v6src; + connp->conn_laddr_v6 = v6src; + if (scopeid != 0) { + connp->conn_ixa->ixa_flags |= IXAF_SCOPEID_SET; + connp->conn_ixa->ixa_scopeid = scopeid; + connp->conn_incoming_ifindex = scopeid; + } else { + connp->conn_ixa->ixa_flags &= ~IXAF_SCOPEID_SET; + connp->conn_incoming_ifindex = connp->conn_bound_if; + } + + switch (laddr_type) { + case IPVL_UNICAST_UP: + case IPVL_UNICAST_DOWN: + connp->conn_saddr_v6 = v6src; + connp->conn_mcbc_bind = B_FALSE; + break; + case IPVL_MCAST: + case IPVL_BCAST: + /* ip_set_destination will pick a source address later */ + connp->conn_saddr_v6 = ipv6_all_zeros; + connp->conn_mcbc_bind = B_TRUE; + break; + } + + /* Any errors after this point should use late_error */ + connp->conn_lport = lport; + /* - * Now reset the the next anonymous port if the application requested + * Now reset the next anonymous port if the application requested * an anonymous port, or we handed out the next anonymous port. */ - if ((requested_port == 0) && (!udp->udp_anon_priv_bind)) { + if ((requested_port == 0) && (!connp->conn_anon_priv_bind)) { us->us_next_port_to_try = port + 1; } - /* Initialize the O_T_BIND_REQ/T_BIND_REQ for ip. */ - if (udp->udp_family == AF_INET) { - sin->sin_port = udp->udp_port; + /* Initialize the T_BIND_ACK. */ + if (connp->conn_family == AF_INET) { + sin->sin_port = connp->conn_lport; } else { - sin6->sin6_port = udp->udp_port; - /* Rebuild the header template */ - error = udp_build_hdrs(udp); - if (error != 0) { - udp->udp_pending_op = -1; - rw_exit(&udp->udp_rwlock); - mutex_exit(&udpf->uf_lock); - return (error); - } + sin6->sin6_port = connp->conn_lport; } udp->udp_state = TS_IDLE; udp_bind_hash_insert(udpf, udp); mutex_exit(&udpf->uf_lock); - rw_exit(&udp->udp_rwlock); + mutex_exit(&connp->conn_lock); if (cl_inet_bind) { /* * Running in cluster mode - register bind information */ - if (udp->udp_ipversion == IPV4_VERSION) { + if (connp->conn_ipversion == IPV4_VERSION) { (*cl_inet_bind)(connp->conn_netstack->netstack_stackid, - IPPROTO_UDP, AF_INET, - (uint8_t *)(&V4_PART_OF_V6(udp->udp_v6src)), - (in_port_t)udp->udp_port, NULL); + IPPROTO_UDP, AF_INET, (uint8_t *)&v4src, + (in_port_t)connp->conn_lport, NULL); } else { (*cl_inet_bind)(connp->conn_netstack->netstack_stackid, - IPPROTO_UDP, AF_INET6, - (uint8_t *)&(udp->udp_v6src), - (in_port_t)udp->udp_port, NULL); + IPPROTO_UDP, AF_INET6, (uint8_t *)&v6src, + (in_port_t)connp->conn_lport, NULL); } } + mutex_enter(&connp->conn_lock); connp->conn_anon_port = (is_system_labeled() && requested_port == 0); if (is_system_labeled() && (!connp->conn_anon_port || connp->conn_anon_mlp)) { @@ -8092,18 +5764,16 @@ udp_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr, zone_t *zone; zone = crgetzone(cr); - connp->conn_mlp_type = udp->udp_recvucred ? mlptBoth : + connp->conn_mlp_type = + connp->conn_recv_ancillary.crb_recvucred ? mlptBoth : mlptSingle; addrtype = tsol_mlp_addr_type( connp->conn_allzones ? ALL_ZONES : zone->zone_id, IPV6_VERSION, &v6src, us->us_netstack->netstack_ip); if (addrtype == mlptSingle) { - rw_enter(&udp->udp_rwlock, RW_WRITER); - udp->udp_pending_op = -1; - rw_exit(&udp->udp_rwlock); - connp->conn_anon_port = B_FALSE; - connp->conn_mlp_type = mlptSingle; - return (-TNOADDR); + error = -TNOADDR; + mutex_exit(&connp->conn_lock); + goto late_error; } mlpport = connp->conn_anon_port ? PMAPPORT : port; mlptype = tsol_mlp_port_type(zone, IPPROTO_UDP, mlpport, @@ -8115,12 +5785,9 @@ udp_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr, */ if (mlptype != mlptSingle && connp->conn_mlp_type == mlptSingle) { - rw_enter(&udp->udp_rwlock, RW_WRITER); - udp->udp_pending_op = -1; - rw_exit(&udp->udp_rwlock); - connp->conn_anon_port = B_FALSE; - connp->conn_mlp_type = mlptSingle; - return (EINVAL); + error = EINVAL; + mutex_exit(&connp->conn_lock); + goto late_error; } /* @@ -8129,18 +5796,15 @@ udp_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr, */ if (mlptype != mlptSingle && secpolicy_net_bindmlp(cr) != 0) { - if (udp->udp_debug) { + if (connp->conn_debug) { (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE, "udp_bind: no priv for multilevel port %d", mlpport); } - rw_enter(&udp->udp_rwlock, RW_WRITER); - udp->udp_pending_op = -1; - rw_exit(&udp->udp_rwlock); - connp->conn_anon_port = B_FALSE; - connp->conn_mlp_type = mlptSingle; - return (-TACCES); + error = -TACCES; + mutex_exit(&connp->conn_lock); + goto late_error; } /* @@ -8158,7 +5822,7 @@ udp_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr, mlpzone = tsol_mlp_findzone(IPPROTO_UDP, htons(mlpport)); if (connp->conn_zoneid != mlpzone) { - if (udp->udp_debug) { + if (connp->conn_debug) { (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE, "udp_bind: attempt to bind port " @@ -8167,62 +5831,82 @@ udp_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr, mlpport, connp->conn_zoneid, mlpzone); } - rw_enter(&udp->udp_rwlock, RW_WRITER); - udp->udp_pending_op = -1; - rw_exit(&udp->udp_rwlock); - connp->conn_anon_port = B_FALSE; - connp->conn_mlp_type = mlptSingle; - return (-TACCES); + error = -TACCES; + mutex_exit(&connp->conn_lock); + goto late_error; } } if (connp->conn_anon_port) { - error = tsol_mlp_anon(zone, mlptype, connp->conn_ulp, + error = tsol_mlp_anon(zone, mlptype, connp->conn_proto, port, B_TRUE); if (error != 0) { - if (udp->udp_debug) { + if (connp->conn_debug) { (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE, "udp_bind: cannot establish anon " "MLP for port %d", port); } - rw_enter(&udp->udp_rwlock, RW_WRITER); - udp->udp_pending_op = -1; - rw_exit(&udp->udp_rwlock); - connp->conn_anon_port = B_FALSE; - connp->conn_mlp_type = mlptSingle; - return (-TACCES); + error = -TACCES; + mutex_exit(&connp->conn_lock); + goto late_error; } } connp->conn_mlp_type = mlptype; } - if (!V6_OR_V4_INADDR_ANY(udp->udp_v6src)) { - /* - * Append a request for an IRE if udp_v6src not - * zero (IPv4 - INADDR_ANY, or IPv6 - all-zeroes address). - */ - mp = allocb(sizeof (ire_t), BPRI_HI); - if (!mp) { - rw_enter(&udp->udp_rwlock, RW_WRITER); - udp->udp_pending_op = -1; - rw_exit(&udp->udp_rwlock); - return (ENOMEM); - } - mp->b_wptr += sizeof (ire_t); - mp->b_datap->db_type = IRE_DB_REQ_TYPE; + /* + * We create an initial header template here to make a subsequent + * sendto have a starting point. Since conn_last_dst is zero the + * first sendto will always follow the 'dst changed' code path. + * Note that we defer massaging options and the related checksum + * adjustment until we have a destination address. + */ + error = udp_build_hdr_template(connp, &connp->conn_saddr_v6, + &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo); + if (error != 0) { + mutex_exit(&connp->conn_lock); + goto late_error; } - if (udp->udp_family == AF_INET6) { - ASSERT(udp->udp_connp->conn_af_isv6); - error = ip_proto_bind_laddr_v6(connp, &mp, IPPROTO_UDP, - &udp->udp_bound_v6src, udp->udp_port, B_TRUE); - } else { - ASSERT(!udp->udp_connp->conn_af_isv6); - error = ip_proto_bind_laddr_v4(connp, &mp, IPPROTO_UDP, - V4_PART_OF_V6(udp->udp_bound_v6src), udp->udp_port, - B_TRUE); + /* Just in case */ + connp->conn_faddr_v6 = ipv6_all_zeros; + connp->conn_fport = 0; + connp->conn_v6lastdst = ipv6_all_zeros; + mutex_exit(&connp->conn_lock); + + error = ip_laddr_fanout_insert(connp); + if (error != 0) + goto late_error; + + /* Bind succeeded */ + return (0); + +late_error: + /* We had already picked the port number, and then the bind failed */ + mutex_enter(&connp->conn_lock); + udpf = &us->us_bind_fanout[ + UDP_BIND_HASH(connp->conn_lport, + us->us_bind_fanout_size)]; + mutex_enter(&udpf->uf_lock); + connp->conn_saddr_v6 = ipv6_all_zeros; + connp->conn_bound_addr_v6 = ipv6_all_zeros; + connp->conn_laddr_v6 = ipv6_all_zeros; + if (scopeid != 0) { + connp->conn_ixa->ixa_flags &= ~IXAF_SCOPEID_SET; + connp->conn_incoming_ifindex = connp->conn_bound_if; } + udp->udp_state = TS_UNBND; + udp_bind_hash_remove(udp, B_TRUE); + connp->conn_lport = 0; + mutex_exit(&udpf->uf_lock); + connp->conn_anon_port = B_FALSE; + connp->conn_mlp_type = mlptSingle; - (void) udp_post_ip_bind_connect(udp, mp, error); + connp->conn_v6lastdst = ipv6_all_zeros; + + /* Restore the header that was built above - different source address */ + (void) udp_build_hdr_template(connp, &connp->conn_saddr_v6, + &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo); + mutex_exit(&connp->conn_lock); return (error); } @@ -8256,12 +5940,32 @@ udp_bind(sock_lower_handle_t proto_handle, struct sockaddr *sa, static int udp_implicit_bind(conn_t *connp, cred_t *cr) { + sin6_t sin6addr; + sin_t *sin; + sin6_t *sin6; + socklen_t len; int error; /* All Solaris components should pass a cred for this operation. */ ASSERT(cr != NULL); - error = udp_do_bind(connp, NULL, 0, cr, B_FALSE); + if (connp->conn_family == AF_INET) { + len = sizeof (struct sockaddr_in); + sin = (sin_t *)&sin6addr; + *sin = sin_null; + sin->sin_family = AF_INET; + sin->sin_addr.s_addr = INADDR_ANY; + } else { + ASSERT(connp->conn_family == AF_INET6); + len = sizeof (sin6_t); + sin6 = (sin6_t *)&sin6addr; + *sin6 = sin6_null; + sin6->sin6_family = AF_INET6; + V6_SET_ZERO(sin6->sin6_addr); + } + + error = udp_do_bind(connp, (struct sockaddr *)&sin6addr, len, + cr, B_FALSE); return ((error < 0) ? proto_tlitosyserr(-error) : error); } @@ -8280,137 +5984,51 @@ udp_do_unbind(conn_t *connp) /* * Running in cluster mode - register unbind information */ - if (udp->udp_ipversion == IPV4_VERSION) { + if (connp->conn_ipversion == IPV4_VERSION) { (*cl_inet_unbind)( connp->conn_netstack->netstack_stackid, IPPROTO_UDP, AF_INET, - (uint8_t *)(&V4_PART_OF_V6(udp->udp_v6src)), - (in_port_t)udp->udp_port, NULL); + (uint8_t *)(&V4_PART_OF_V6(connp->conn_laddr_v6)), + (in_port_t)connp->conn_lport, NULL); } else { (*cl_inet_unbind)( connp->conn_netstack->netstack_stackid, IPPROTO_UDP, AF_INET6, - (uint8_t *)&(udp->udp_v6src), - (in_port_t)udp->udp_port, NULL); + (uint8_t *)&(connp->conn_laddr_v6), + (in_port_t)connp->conn_lport, NULL); } } - rw_enter(&udp->udp_rwlock, RW_WRITER); - if (udp->udp_state == TS_UNBND || udp->udp_pending_op != -1) { - rw_exit(&udp->udp_rwlock); + mutex_enter(&connp->conn_lock); + /* If a bind has not been done, we can't unbind. */ + if (udp->udp_state == TS_UNBND) { + mutex_exit(&connp->conn_lock); return (-TOUTSTATE); } - udp->udp_pending_op = T_UNBIND_REQ; - rw_exit(&udp->udp_rwlock); - - /* - * Pass the unbind to IP; T_UNBIND_REQ is larger than T_OK_ACK - * and therefore ip_unbind must never return NULL. - */ - ip_unbind(connp); - - /* - * Once we're unbound from IP, the pending operation may be cleared - * here. - */ - rw_enter(&udp->udp_rwlock, RW_WRITER); - udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, + udpf = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport, us->us_bind_fanout_size)]; - mutex_enter(&udpf->uf_lock); udp_bind_hash_remove(udp, B_TRUE); - V6_SET_ZERO(udp->udp_v6src); - V6_SET_ZERO(udp->udp_bound_v6src); - udp->udp_port = 0; + connp->conn_saddr_v6 = ipv6_all_zeros; + connp->conn_bound_addr_v6 = ipv6_all_zeros; + connp->conn_laddr_v6 = ipv6_all_zeros; + connp->conn_mcbc_bind = B_FALSE; + connp->conn_lport = 0; + /* In case we were also connected */ + connp->conn_faddr_v6 = ipv6_all_zeros; + connp->conn_fport = 0; mutex_exit(&udpf->uf_lock); - udp->udp_pending_op = -1; + connp->conn_v6lastdst = ipv6_all_zeros; udp->udp_state = TS_UNBND; - if (udp->udp_family == AF_INET6) - (void) udp_build_hdrs(udp); - rw_exit(&udp->udp_rwlock); - return (0); -} - -static int -udp_post_ip_bind_connect(udp_t *udp, mblk_t *ire_mp, int error) -{ - ire_t *ire; - udp_fanout_t *udpf; - udp_stack_t *us = udp->udp_us; - - ASSERT(udp->udp_pending_op != -1); - rw_enter(&udp->udp_rwlock, RW_WRITER); - if (error == 0) { - /* For udp_do_connect() success */ - /* udp_do_bind() success will do nothing in here */ - /* - * If a broadcast/multicast address was bound, set - * the source address to 0. - * This ensures no datagrams with broadcast address - * as source address are emitted (which would violate - * RFC1122 - Hosts requirements) - * - * Note that when connecting the returned IRE is - * for the destination address and we only perform - * the broadcast check for the source address (it - * is OK to connect to a broadcast/multicast address.) - */ - if (ire_mp != NULL && ire_mp->b_datap->db_type == IRE_DB_TYPE) { - ire = (ire_t *)ire_mp->b_rptr; + (void) udp_build_hdr_template(connp, &connp->conn_saddr_v6, + &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo); + mutex_exit(&connp->conn_lock); - /* - * Note: we get IRE_BROADCAST for IPv6 to "mark" a - * multicast local address. - */ - udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, - us->us_bind_fanout_size)]; - if (ire->ire_type == IRE_BROADCAST && - udp->udp_state != TS_DATA_XFER) { - ASSERT(udp->udp_pending_op == T_BIND_REQ || - udp->udp_pending_op == O_T_BIND_REQ); - /* - * This was just a local bind to a broadcast - * addr. - */ - mutex_enter(&udpf->uf_lock); - V6_SET_ZERO(udp->udp_v6src); - mutex_exit(&udpf->uf_lock); - if (udp->udp_family == AF_INET6) - (void) udp_build_hdrs(udp); - } else if (V6_OR_V4_INADDR_ANY(udp->udp_v6src)) { - if (udp->udp_family == AF_INET6) - (void) udp_build_hdrs(udp); - } - } - } else { - udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, - us->us_bind_fanout_size)]; - mutex_enter(&udpf->uf_lock); + ip_unbind(connp); - if (udp->udp_state == TS_DATA_XFER) { - /* Connect failed */ - /* Revert back to the bound source */ - udp->udp_v6src = udp->udp_bound_v6src; - udp->udp_state = TS_IDLE; - } else { - /* For udp_do_bind() failed */ - V6_SET_ZERO(udp->udp_v6src); - V6_SET_ZERO(udp->udp_bound_v6src); - udp->udp_state = TS_UNBND; - udp_bind_hash_remove(udp, B_TRUE); - udp->udp_port = 0; - } - mutex_exit(&udpf->uf_lock); - if (udp->udp_family == AF_INET6) - (void) udp_build_hdrs(udp); - } - udp->udp_pending_op = -1; - rw_exit(&udp->udp_rwlock); - if (ire_mp != NULL) - freeb(ire_mp); - return (error); + return (0); } /* @@ -8418,7 +6036,7 @@ udp_post_ip_bind_connect(udp_t *udp, mblk_t *ire_mp, int error) */ static int udp_do_connect(conn_t *connp, const struct sockaddr *sa, socklen_t len, - cred_t *cr) + cred_t *cr, pid_t pid) { sin6_t *sin6; sin_t *sin; @@ -8426,12 +6044,16 @@ udp_do_connect(conn_t *connp, const struct sockaddr *sa, socklen_t len, ipaddr_t v4dst; uint16_t dstport; uint32_t flowinfo; - mblk_t *ire_mp; udp_fanout_t *udpf; udp_t *udp, *udp1; ushort_t ipversion; udp_stack_t *us; int error; + conn_t *connp1; + ip_xmit_attr_t *ixa; + uint_t scopeid = 0; + uint_t srcid = 0; + in6_addr_t v6src = connp->conn_saddr_v6; udp = connp->conn_udp; us = udp->udp_us; @@ -8451,7 +6073,7 @@ udp_do_connect(conn_t *connp, const struct sockaddr *sa, socklen_t len, v4dst = sin->sin_addr.s_addr; dstport = sin->sin_port; IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); - ASSERT(udp->udp_ipversion == IPV4_VERSION); + ASSERT(connp->conn_ipversion == IPV4_VERSION); ipversion = IPV4_VERSION; break; @@ -8459,13 +6081,33 @@ udp_do_connect(conn_t *connp, const struct sockaddr *sa, socklen_t len, sin6 = (sin6_t *)sa; v6dst = sin6->sin6_addr; dstport = sin6->sin6_port; + srcid = sin6->__sin6_src_id; + if (srcid != 0 && IN6_IS_ADDR_UNSPECIFIED(&v6src)) { + ip_srcid_find_id(srcid, &v6src, IPCL_ZONEID(connp), + connp->conn_netstack); + } if (IN6_IS_ADDR_V4MAPPED(&v6dst)) { + if (connp->conn_ipv6_v6only) + return (EADDRNOTAVAIL); + + /* + * Destination adress is mapped IPv6 address. + * Source bound address should be unspecified or + * IPv6 mapped address as well. + */ + if (!IN6_IS_ADDR_UNSPECIFIED( + &connp->conn_bound_addr_v6) && + !IN6_IS_ADDR_V4MAPPED(&connp->conn_bound_addr_v6)) { + return (EADDRNOTAVAIL); + } IN6_V4MAPPED_TO_IPADDR(&v6dst, v4dst); ipversion = IPV4_VERSION; flowinfo = 0; } else { ipversion = IPV6_VERSION; flowinfo = sin6->sin6_flowinfo; + if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) + scopeid = sin6->sin6_scope_id; } break; } @@ -8473,44 +6115,53 @@ udp_do_connect(conn_t *connp, const struct sockaddr *sa, socklen_t len, if (dstport == 0) return (-TBADADDR); - rw_enter(&udp->udp_rwlock, RW_WRITER); + /* + * If there is a different thread using conn_ixa then we get a new + * copy and cut the old one loose from conn_ixa. Otherwise we use + * conn_ixa and prevent any other thread from using/changing it. + * Once connect() is done other threads can use conn_ixa since the + * refcnt will be back at one. + */ + ixa = conn_get_ixa(connp, B_TRUE); + if (ixa == NULL) + return (ENOMEM); + ASSERT(ixa->ixa_refcnt >= 2); + ASSERT(ixa == connp->conn_ixa); + + mutex_enter(&connp->conn_lock); /* - * This UDP must have bound to a port already before doing a connect. - * TPI mandates that users must send TPI primitives only 1 at a time - * and wait for the response before sending the next primitive. + * This udp_t must have bound to a port already before doing a connect. + * Reject if a connect is in progress (we drop conn_lock during + * udp_do_connect). */ - if (udp->udp_state == TS_UNBND || udp->udp_pending_op != -1) { - rw_exit(&udp->udp_rwlock); + if (udp->udp_state == TS_UNBND || udp->udp_state == TS_WCON_CREQ) { + mutex_exit(&connp->conn_lock); (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE, "udp_connect: bad state, %u", udp->udp_state); + ixa_refrele(ixa); return (-TOUTSTATE); } - udp->udp_pending_op = T_CONN_REQ; - ASSERT(udp->udp_port != 0 && udp->udp_ptpbhn != NULL); - - if (ipversion == IPV4_VERSION) { - udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + - udp->udp_ip_snd_options_len; - } else { - udp->udp_max_hdr_len = udp->udp_sticky_hdrs_len; - } + ASSERT(connp->conn_lport != 0 && udp->udp_ptpbhn != NULL); - udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, + udpf = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport, us->us_bind_fanout_size)]; mutex_enter(&udpf->uf_lock); if (udp->udp_state == TS_DATA_XFER) { /* Already connected - clear out state */ - udp->udp_v6src = udp->udp_bound_v6src; + if (connp->conn_mcbc_bind) + connp->conn_saddr_v6 = ipv6_all_zeros; + else + connp->conn_saddr_v6 = connp->conn_bound_addr_v6; + connp->conn_laddr_v6 = connp->conn_bound_addr_v6; + connp->conn_faddr_v6 = ipv6_all_zeros; + connp->conn_fport = 0; udp->udp_state = TS_IDLE; } - /* - * Create a default IP header with no IP options. - */ - udp->udp_dstport = dstport; - udp->udp_ipversion = ipversion; + connp->conn_fport = dstport; + connp->conn_ipversion = ipversion; if (ipversion == IPV4_VERSION) { /* * Interpret a zero destination to mean loopback. @@ -8520,29 +6171,16 @@ udp_do_connect(conn_t *connp, const struct sockaddr *sa, socklen_t len, if (v4dst == INADDR_ANY) { v4dst = htonl(INADDR_LOOPBACK); IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); - if (udp->udp_family == AF_INET) { + if (connp->conn_family == AF_INET) { sin->sin_addr.s_addr = v4dst; } else { sin6->sin6_addr = v6dst; } } - udp->udp_v6dst = v6dst; - udp->udp_flowinfo = 0; - - /* - * If the destination address is multicast and - * an outgoing multicast interface has been set, - * use the address of that interface as our - * source address if no source address has been set. - */ - if (V4_PART_OF_V6(udp->udp_v6src) == INADDR_ANY && - CLASSD(v4dst) && - udp->udp_multicast_if_addr != INADDR_ANY) { - IN6_IPADDR_TO_V4MAPPED(udp->udp_multicast_if_addr, - &udp->udp_v6src); - } + connp->conn_faddr_v6 = v6dst; + connp->conn_flowinfo = 0; } else { - ASSERT(udp->udp_ipversion == IPV6_VERSION); + ASSERT(connp->conn_ipversion == IPV6_VERSION); /* * Interpret a zero destination to mean loopback. * Update the T_CONN_REQ (sin/sin6) since it is used to @@ -8552,82 +6190,133 @@ udp_do_connect(conn_t *connp, const struct sockaddr *sa, socklen_t len, v6dst = ipv6_loopback; sin6->sin6_addr = v6dst; } - udp->udp_v6dst = v6dst; - udp->udp_flowinfo = flowinfo; - /* - * If the destination address is multicast and - * an outgoing multicast interface has been set, - * then the ip bind logic will pick the correct source - * address (i.e. matching the outgoing multicast interface). - */ + connp->conn_faddr_v6 = v6dst; + connp->conn_flowinfo = flowinfo; + } + mutex_exit(&udpf->uf_lock); + + ixa->ixa_cred = cr; + ixa->ixa_cpid = pid; + if (is_system_labeled()) { + /* We need to restart with a label based on the cred */ + ip_xmit_attr_restore_tsl(ixa, ixa->ixa_cred); + } + + if (scopeid != 0) { + ixa->ixa_flags |= IXAF_SCOPEID_SET; + ixa->ixa_scopeid = scopeid; + connp->conn_incoming_ifindex = scopeid; + } else { + ixa->ixa_flags &= ~IXAF_SCOPEID_SET; + connp->conn_incoming_ifindex = connp->conn_bound_if; + } + /* + * conn_connect will drop conn_lock and reacquire it. + * To prevent a send* from messing with this udp_t while the lock + * is dropped we set udp_state and clear conn_v6lastdst. + * That will make all send* fail with EISCONN. + */ + connp->conn_v6lastdst = ipv6_all_zeros; + udp->udp_state = TS_WCON_CREQ; + + error = conn_connect(connp, NULL, IPDF_ALLOW_MCBC); + mutex_exit(&connp->conn_lock); + if (error != 0) + goto connect_failed; + + /* + * The addresses have been verified. Time to insert in + * the correct fanout list. + */ + error = ipcl_conn_insert(connp); + if (error != 0) + goto connect_failed; + + mutex_enter(&connp->conn_lock); + error = udp_build_hdr_template(connp, &connp->conn_saddr_v6, + &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo); + if (error != 0) { + mutex_exit(&connp->conn_lock); + goto connect_failed; } + udp->udp_state = TS_DATA_XFER; + /* Record this as the "last" send even though we haven't sent any */ + connp->conn_v6lastdst = connp->conn_faddr_v6; + connp->conn_lastipversion = connp->conn_ipversion; + connp->conn_lastdstport = connp->conn_fport; + connp->conn_lastflowinfo = connp->conn_flowinfo; + connp->conn_lastscopeid = scopeid; + connp->conn_lastsrcid = srcid; + /* Also remember a source to use together with lastdst */ + connp->conn_v6lastsrc = v6src; + mutex_exit(&connp->conn_lock); + /* - * Verify that the src/port/dst/port is unique for all - * connections in TS_DATA_XFER + * We've picked a source address above. Now we can + * verify that the src/port/dst/port is unique for all + * connections in TS_DATA_XFER, skipping ourselves. */ + mutex_enter(&udpf->uf_lock); for (udp1 = udpf->uf_udp; udp1 != NULL; udp1 = udp1->udp_bind_hash) { if (udp1->udp_state != TS_DATA_XFER) continue; - if (udp->udp_port != udp1->udp_port || - udp->udp_ipversion != udp1->udp_ipversion || - dstport != udp1->udp_dstport || - !IN6_ARE_ADDR_EQUAL(&udp->udp_v6src, &udp1->udp_v6src) || - !IN6_ARE_ADDR_EQUAL(&v6dst, &udp1->udp_v6dst) || - !(IPCL_ZONE_MATCH(udp->udp_connp, - udp1->udp_connp->conn_zoneid) || - IPCL_ZONE_MATCH(udp1->udp_connp, - udp->udp_connp->conn_zoneid))) + + if (udp1 == udp) + continue; + + connp1 = udp1->udp_connp; + if (connp->conn_lport != connp1->conn_lport || + connp->conn_ipversion != connp1->conn_ipversion || + dstport != connp1->conn_fport || + !IN6_ARE_ADDR_EQUAL(&connp->conn_laddr_v6, + &connp1->conn_laddr_v6) || + !IN6_ARE_ADDR_EQUAL(&v6dst, &connp1->conn_faddr_v6) || + !(IPCL_ZONE_MATCH(connp, connp1->conn_zoneid) || + IPCL_ZONE_MATCH(connp1, connp->conn_zoneid))) continue; mutex_exit(&udpf->uf_lock); - udp->udp_pending_op = -1; - rw_exit(&udp->udp_rwlock); - return (-TBADADDR); + error = -TBADADDR; + goto connect_failed; } - if (cl_inet_connect2 != NULL) { - CL_INET_UDP_CONNECT(connp, udp, B_TRUE, &v6dst, dstport, error); + CL_INET_UDP_CONNECT(connp, B_TRUE, &v6dst, dstport, error); if (error != 0) { mutex_exit(&udpf->uf_lock); - udp->udp_pending_op = -1; - rw_exit(&udp->udp_rwlock); - return (-TBADADDR); + error = -TBADADDR; + goto connect_failed; } } - - udp->udp_state = TS_DATA_XFER; mutex_exit(&udpf->uf_lock); - ire_mp = allocb(sizeof (ire_t), BPRI_HI); - if (ire_mp == NULL) { - mutex_enter(&udpf->uf_lock); - udp->udp_state = TS_IDLE; - udp->udp_pending_op = -1; - mutex_exit(&udpf->uf_lock); - rw_exit(&udp->udp_rwlock); - return (ENOMEM); - } - - rw_exit(&udp->udp_rwlock); + ixa_refrele(ixa); + return (0); - ire_mp->b_wptr += sizeof (ire_t); - ire_mp->b_datap->db_type = IRE_DB_REQ_TYPE; +connect_failed: + if (ixa != NULL) + ixa_refrele(ixa); + mutex_enter(&connp->conn_lock); + mutex_enter(&udpf->uf_lock); + udp->udp_state = TS_IDLE; + connp->conn_faddr_v6 = ipv6_all_zeros; + connp->conn_fport = 0; + /* In case the source address was set above */ + if (connp->conn_mcbc_bind) + connp->conn_saddr_v6 = ipv6_all_zeros; + else + connp->conn_saddr_v6 = connp->conn_bound_addr_v6; + connp->conn_laddr_v6 = connp->conn_bound_addr_v6; + mutex_exit(&udpf->uf_lock); - if (udp->udp_family == AF_INET) { - error = ip_proto_bind_connected_v4(connp, &ire_mp, IPPROTO_UDP, - &V4_PART_OF_V6(udp->udp_v6src), udp->udp_port, - V4_PART_OF_V6(udp->udp_v6dst), udp->udp_dstport, - B_TRUE, B_TRUE, cr); - } else { - error = ip_proto_bind_connected_v6(connp, &ire_mp, IPPROTO_UDP, - &udp->udp_v6src, udp->udp_port, &udp->udp_v6dst, - &udp->udp_sticky_ipp, udp->udp_dstport, B_TRUE, B_TRUE, cr); - } + connp->conn_v6lastdst = ipv6_all_zeros; + connp->conn_flowinfo = 0; - return (udp_post_ip_bind_connect(udp, ire_mp, error)); + (void) udp_build_hdr_template(connp, &connp->conn_saddr_v6, + &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo); + mutex_exit(&connp->conn_lock); + return (error); } -/* ARGSUSED */ static int udp_connect(sock_lower_handle_t proto_handle, const struct sockaddr *sa, socklen_t len, sock_connid_t *id, cred_t *cr) @@ -8636,6 +6325,7 @@ udp_connect(sock_lower_handle_t proto_handle, const struct sockaddr *sa, udp_t *udp = connp->conn_udp; int error; boolean_t did_bind = B_FALSE; + pid_t pid = curproc->p_pid; /* All Solaris components should pass a cred for this operation. */ ASSERT(cr != NULL); @@ -8652,7 +6342,7 @@ udp_connect(sock_lower_handle_t proto_handle, const struct sockaddr *sa, return (error); } - error = proto_verify_ip_addr(udp->udp_family, sa, len); + error = proto_verify_ip_addr(connp->conn_family, sa, len); if (error != 0) goto done; @@ -8671,9 +6361,9 @@ udp_connect(sock_lower_handle_t proto_handle, const struct sockaddr *sa, /* * set SO_DGRAM_ERRIND */ - udp->udp_dgram_errind = B_TRUE; + connp->conn_dgram_errind = B_TRUE; - error = udp_do_connect(connp, sa, len, cr); + error = udp_do_connect(connp, sa, len, cr, pid); if (error != 0 && did_bind) { int unbind_err; @@ -8702,44 +6392,33 @@ done: return (error); } -/* ARGSUSED */ int udp_send(sock_lower_handle_t proto_handle, mblk_t *mp, struct nmsghdr *msg, cred_t *cr) { + sin6_t *sin6; + sin_t *sin = NULL; + uint_t srcid; conn_t *connp = (conn_t *)proto_handle; udp_t *udp = connp->conn_udp; - udp_stack_t *us = udp->udp_us; int error = 0; + udp_stack_t *us = udp->udp_us; + ushort_t ipversion; + pid_t pid = curproc->p_pid; + ip_xmit_attr_t *ixa; ASSERT(DB_TYPE(mp) == M_DATA); /* All Solaris components should pass a cred for this operation. */ ASSERT(cr != NULL); - /* If labeled then sockfs should have already set db_credp */ - ASSERT(!is_system_labeled() || msg_getcred(mp, NULL) != NULL); - - /* - * If the socket is connected and no change in destination - */ - if (msg->msg_namelen == 0) { - error = udp_send_connected(connp, mp, msg, cr, curproc->p_pid); - if (error == EDESTADDRREQ) - return (error); - else - return (udp->udp_dgram_errind ? error : 0); - } - - /* - * Do an implicit bind if necessary. - */ + /* do an implicit bind if necessary */ if (udp->udp_state == TS_UNBND) { error = udp_implicit_bind(connp, cr); /* * We could be racing with an actual bind, in which case * we would see EPROTO. We cross our fingers and try - * to send. + * to connect. */ if (!(error == 0 || error == EPROTO)) { freemsg(mp); @@ -8747,75 +6426,203 @@ udp_send(sock_lower_handle_t proto_handle, mblk_t *mp, struct nmsghdr *msg, } } - rw_enter(&udp->udp_rwlock, RW_WRITER); - - if (msg->msg_name != NULL && udp->udp_state == TS_DATA_XFER) { - rw_exit(&udp->udp_rwlock); - freemsg(mp); + /* Connected? */ + if (msg->msg_name == NULL) { + if (udp->udp_state != TS_DATA_XFER) { + BUMP_MIB(&us->us_udp_mib, udpOutErrors); + return (EDESTADDRREQ); + } + if (msg->msg_controllen != 0) { + error = udp_output_ancillary(connp, NULL, NULL, mp, + NULL, msg, cr, pid); + } else { + error = udp_output_connected(connp, mp, cr, pid); + } + if (us->us_sendto_ignerr) + return (0); + else + return (error); + } + if (udp->udp_state == TS_DATA_XFER) { + BUMP_MIB(&us->us_udp_mib, udpOutErrors); return (EISCONN); } + error = proto_verify_ip_addr(connp->conn_family, + (struct sockaddr *)msg->msg_name, msg->msg_namelen); + if (error != 0) { + BUMP_MIB(&us->us_udp_mib, udpOutErrors); + return (error); + } + switch (connp->conn_family) { + case AF_INET6: + sin6 = (sin6_t *)msg->msg_name; + srcid = sin6->__sin6_src_id; - if (udp->udp_delayed_error != 0) { - boolean_t match; + if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { + /* + * Destination is a non-IPv4-compatible IPv6 address. + * Send out an IPv6 format packet. + */ - error = udp->udp_delayed_error; - match = B_FALSE; - udp->udp_delayed_error = 0; - switch (udp->udp_family) { - case AF_INET: { - /* Compare just IP address and port */ - sin_t *sin1 = (sin_t *)msg->msg_name; - sin_t *sin2 = (sin_t *)&udp->udp_delayed_addr; + /* + * If the local address is a mapped address return + * an error. + * It would be possible to send an IPv6 packet but the + * response would never make it back to the application + * since it is bound to a mapped address. + */ + if (IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6)) { + BUMP_MIB(&us->us_udp_mib, udpOutErrors); + return (EADDRNOTAVAIL); + } + if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) + sin6->sin6_addr = ipv6_loopback; + ipversion = IPV6_VERSION; + } else { + if (connp->conn_ipv6_v6only) { + BUMP_MIB(&us->us_udp_mib, udpOutErrors); + return (EADDRNOTAVAIL); + } - if (msg->msg_namelen == sizeof (sin_t) && - sin1->sin_port == sin2->sin_port && - sin1->sin_addr.s_addr == sin2->sin_addr.s_addr) - match = B_TRUE; + /* + * If the local address is not zero or a mapped address + * return an error. It would be possible to send an + * IPv4 packet but the response would never make it + * back to the application since it is bound to a + * non-mapped address. + */ + if (!IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6) && + !IN6_IS_ADDR_UNSPECIFIED(&connp->conn_saddr_v6)) { + BUMP_MIB(&us->us_udp_mib, udpOutErrors); + return (EADDRNOTAVAIL); + } - break; + if (V4_PART_OF_V6(sin6->sin6_addr) == INADDR_ANY) { + V4_PART_OF_V6(sin6->sin6_addr) = + htonl(INADDR_LOOPBACK); + } + ipversion = IPV4_VERSION; } - case AF_INET6: { - sin6_t *sin1 = (sin6_t *)msg->msg_name; - sin6_t *sin2 = (sin6_t *)&udp->udp_delayed_addr; - if (msg->msg_namelen == sizeof (sin6_t) && - sin1->sin6_port == sin2->sin6_port && - IN6_ARE_ADDR_EQUAL(&sin1->sin6_addr, - &sin2->sin6_addr)) - match = B_TRUE; - break; - } - default: - ASSERT(0); + /* + * We have to allocate an ip_xmit_attr_t before we grab + * conn_lock and we need to hold conn_lock once we've check + * conn_same_as_last_v6 to handle concurrent send* calls on a + * socket. + */ + if (msg->msg_controllen == 0) { + ixa = conn_get_ixa(connp, B_FALSE); + if (ixa == NULL) { + BUMP_MIB(&us->us_udp_mib, udpOutErrors); + return (ENOMEM); + } + } else { + ixa = NULL; } + mutex_enter(&connp->conn_lock); + if (udp->udp_delayed_error != 0) { + sin6_t *sin2 = (sin6_t *)&udp->udp_delayed_addr; - *((sin6_t *)&udp->udp_delayed_addr) = sin6_null; + error = udp->udp_delayed_error; + udp->udp_delayed_error = 0; - if (match) { - rw_exit(&udp->udp_rwlock); - freemsg(mp); + /* Compare IP address, port, and family */ + + if (sin6->sin6_port == sin2->sin6_port && + IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr, + &sin2->sin6_addr) && + sin6->sin6_family == sin2->sin6_family) { + mutex_exit(&connp->conn_lock); + BUMP_MIB(&us->us_udp_mib, udpOutErrors); + if (ixa != NULL) + ixa_refrele(ixa); + return (error); + } + } + + if (msg->msg_controllen != 0) { + mutex_exit(&connp->conn_lock); + ASSERT(ixa == NULL); + error = udp_output_ancillary(connp, NULL, sin6, mp, + NULL, msg, cr, pid); + } else if (conn_same_as_last_v6(connp, sin6) && + connp->conn_lastsrcid == srcid && + ipsec_outbound_policy_current(ixa)) { + /* udp_output_lastdst drops conn_lock */ + error = udp_output_lastdst(connp, mp, cr, pid, ixa); + } else { + /* udp_output_newdst drops conn_lock */ + error = udp_output_newdst(connp, mp, NULL, sin6, + ipversion, cr, pid, ixa); + } + ASSERT(MUTEX_NOT_HELD(&connp->conn_lock)); + if (us->us_sendto_ignerr) + return (0); + else return (error); + case AF_INET: + sin = (sin_t *)msg->msg_name; + + ipversion = IPV4_VERSION; + + if (sin->sin_addr.s_addr == INADDR_ANY) + sin->sin_addr.s_addr = htonl(INADDR_LOOPBACK); + + /* + * We have to allocate an ip_xmit_attr_t before we grab + * conn_lock and we need to hold conn_lock once we've check + * conn_same_as_last_v6 to handle concurrent send* on a socket. + */ + if (msg->msg_controllen == 0) { + ixa = conn_get_ixa(connp, B_FALSE); + if (ixa == NULL) { + BUMP_MIB(&us->us_udp_mib, udpOutErrors); + return (ENOMEM); + } + } else { + ixa = NULL; } - } + mutex_enter(&connp->conn_lock); + if (udp->udp_delayed_error != 0) { + sin_t *sin2 = (sin_t *)&udp->udp_delayed_addr; - error = proto_verify_ip_addr(udp->udp_family, - (struct sockaddr *)msg->msg_name, msg->msg_namelen); - rw_exit(&udp->udp_rwlock); + error = udp->udp_delayed_error; + udp->udp_delayed_error = 0; - if (error != 0) { - freemsg(mp); - return (error); - } + /* Compare IP address and port */ - error = udp_send_not_connected(connp, mp, - (struct sockaddr *)msg->msg_name, msg->msg_namelen, msg, cr, - curproc->p_pid); - if (error != 0) { - UDP_STAT(us, udp_out_err_output); - freemsg(mp); + if (sin->sin_port == sin2->sin_port && + sin->sin_addr.s_addr == sin2->sin_addr.s_addr) { + mutex_exit(&connp->conn_lock); + BUMP_MIB(&us->us_udp_mib, udpOutErrors); + if (ixa != NULL) + ixa_refrele(ixa); + return (error); + } + } + if (msg->msg_controllen != 0) { + mutex_exit(&connp->conn_lock); + ASSERT(ixa == NULL); + error = udp_output_ancillary(connp, sin, NULL, mp, + NULL, msg, cr, pid); + } else if (conn_same_as_last_v4(connp, sin) && + ipsec_outbound_policy_current(ixa)) { + /* udp_output_lastdst drops conn_lock */ + error = udp_output_lastdst(connp, mp, cr, pid, ixa); + } else { + /* udp_output_newdst drops conn_lock */ + error = udp_output_newdst(connp, mp, sin, NULL, + ipversion, cr, pid, ixa); + } + ASSERT(MUTEX_NOT_HELD(&connp->conn_lock)); + if (us->us_sendto_ignerr) + return (0); + else + return (error); + default: + return (EINVAL); } - return (udp->udp_dgram_errind ? error : 0); } int @@ -8854,8 +6661,7 @@ udp_fallback(sock_lower_handle_t proto_handle, queue_t *q, stropt_mp->b_wptr += sizeof (*stropt); stropt = (struct stroptions *)stropt_mp->b_rptr; stropt->so_flags = SO_WROFF | SO_HIWAT; - stropt->so_wroff = - (ushort_t)(udp->udp_max_hdr_len + udp->udp_us->us_wroff_extra); + stropt->so_wroff = connp->conn_wroff; stropt->so_hiwat = udp->udp_rcv_disply_hiwat; putnext(RD(q), stropt_mp); @@ -8881,9 +6687,9 @@ udp_fallback(sock_lower_handle_t proto_handle, queue_t *q, faddrlen = 0; opts = 0; - if (udp->udp_dgram_errind) + if (connp->conn_dgram_errind) opts |= SO_DGRAM_ERRIND; - if (udp->udp_dontroute) + if (connp->conn_ixa->ixa_flags & IXAF_DONTROUTE) opts |= SO_DONTROUTE; (*quiesced_cb)(connp->conn_upper_handle, q, &tca, @@ -8908,9 +6714,9 @@ udp_fallback(sock_lower_handle_t proto_handle, queue_t *q, /* * No longer a streams less socket */ - rw_enter(&udp->udp_rwlock, RW_WRITER); + mutex_enter(&connp->conn_lock); connp->conn_flags &= ~IPCL_NONSTR; - rw_exit(&udp->udp_rwlock); + mutex_exit(&connp->conn_lock); mutex_exit(&udp->udp_recv_lock); @@ -8919,48 +6725,7 @@ udp_fallback(sock_lower_handle_t proto_handle, queue_t *q, return (0); } -static int -udp_do_getpeername(udp_t *udp, struct sockaddr *sa, uint_t *salenp) -{ - sin_t *sin = (sin_t *)sa; - sin6_t *sin6 = (sin6_t *)sa; - - ASSERT(RW_LOCK_HELD(&udp->udp_rwlock)); - ASSERT(udp != NULL); - - if (udp->udp_state != TS_DATA_XFER) - return (ENOTCONN); - - switch (udp->udp_family) { - case AF_INET: - ASSERT(udp->udp_ipversion == IPV4_VERSION); - - if (*salenp < sizeof (sin_t)) - return (EINVAL); - - *salenp = sizeof (sin_t); - *sin = sin_null; - sin->sin_family = AF_INET; - sin->sin_port = udp->udp_dstport; - sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_v6dst); - break; - case AF_INET6: - if (*salenp < sizeof (sin6_t)) - return (EINVAL); - - *salenp = sizeof (sin6_t); - *sin6 = sin6_null; - sin6->sin6_family = AF_INET6; - sin6->sin6_port = udp->udp_dstport; - sin6->sin6_addr = udp->udp_v6dst; - sin6->sin6_flowinfo = udp->udp_flowinfo; - break; - } - - return (0); -} - -/* ARGSUSED */ +/* ARGSUSED3 */ int udp_getpeername(sock_lower_handle_t proto_handle, struct sockaddr *sa, socklen_t *salenp, cred_t *cr) @@ -8972,104 +6737,29 @@ udp_getpeername(sock_lower_handle_t proto_handle, struct sockaddr *sa, /* All Solaris components should pass a cred for this operation. */ ASSERT(cr != NULL); - ASSERT(udp != NULL); - - rw_enter(&udp->udp_rwlock, RW_READER); - - error = udp_do_getpeername(udp, sa, salenp); - - rw_exit(&udp->udp_rwlock); - + mutex_enter(&connp->conn_lock); + if (udp->udp_state != TS_DATA_XFER) + error = ENOTCONN; + else + error = conn_getpeername(connp, sa, salenp); + mutex_exit(&connp->conn_lock); return (error); } -static int -udp_do_getsockname(udp_t *udp, struct sockaddr *sa, uint_t *salenp) -{ - sin_t *sin = (sin_t *)sa; - sin6_t *sin6 = (sin6_t *)sa; - - ASSERT(udp != NULL); - ASSERT(RW_LOCK_HELD(&udp->udp_rwlock)); - - switch (udp->udp_family) { - case AF_INET: - ASSERT(udp->udp_ipversion == IPV4_VERSION); - - if (*salenp < sizeof (sin_t)) - return (EINVAL); - - *salenp = sizeof (sin_t); - *sin = sin_null; - sin->sin_family = AF_INET; - if (udp->udp_state == TS_UNBND) { - break; - } - sin->sin_port = udp->udp_port; - - if (!IN6_IS_ADDR_V4MAPPED_ANY(&udp->udp_v6src) && - !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { - sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_v6src); - } else { - /* - * INADDR_ANY - * udp_v6src is not set, we might be bound to - * broadcast/multicast. Use udp_bound_v6src as - * local address instead (that could - * also still be INADDR_ANY) - */ - sin->sin_addr.s_addr = - V4_PART_OF_V6(udp->udp_bound_v6src); - } - break; - - case AF_INET6: - if (*salenp < sizeof (sin6_t)) - return (EINVAL); - - *salenp = sizeof (sin6_t); - *sin6 = sin6_null; - sin6->sin6_family = AF_INET6; - if (udp->udp_state == TS_UNBND) { - break; - } - sin6->sin6_port = udp->udp_port; - - if (!IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { - sin6->sin6_addr = udp->udp_v6src; - } else { - /* - * UNSPECIFIED - * udp_v6src is not set, we might be bound to - * broadcast/multicast. Use udp_bound_v6src as - * local address instead (that could - * also still be UNSPECIFIED) - */ - sin6->sin6_addr = udp->udp_bound_v6src; - } - } - return (0); -} - -/* ARGSUSED */ +/* ARGSUSED3 */ int udp_getsockname(sock_lower_handle_t proto_handle, struct sockaddr *sa, socklen_t *salenp, cred_t *cr) { conn_t *connp = (conn_t *)proto_handle; - udp_t *udp = connp->conn_udp; int error; /* All Solaris components should pass a cred for this operation. */ ASSERT(cr != NULL); - ASSERT(udp != NULL); - rw_enter(&udp->udp_rwlock, RW_READER); - - error = udp_do_getsockname(udp, sa, salenp); - - rw_exit(&udp->udp_rwlock); - + mutex_enter(&connp->conn_lock); + error = conn_getsockname(connp, sa, salenp); + mutex_exit(&connp->conn_lock); return (error); } @@ -9078,7 +6768,6 @@ udp_getsockopt(sock_lower_handle_t proto_handle, int level, int option_name, void *optvalp, socklen_t *optlen, cred_t *cr) { conn_t *connp = (conn_t *)proto_handle; - udp_t *udp = connp->conn_udp; int error; t_uscalar_t max_optbuf_len; void *optvalp_buf; @@ -9090,7 +6779,6 @@ udp_getsockopt(sock_lower_handle_t proto_handle, int level, int option_name, error = proto_opt_check(level, option_name, *optlen, &max_optbuf_len, udp_opt_obj.odb_opt_des_arr, udp_opt_obj.odb_opt_arr_cnt, - udp_opt_obj.odb_topmost_tpiprovider, B_FALSE, B_TRUE, cr); if (error != 0) { if (error < 0) @@ -9099,28 +6787,22 @@ udp_getsockopt(sock_lower_handle_t proto_handle, int level, int option_name, } optvalp_buf = kmem_alloc(max_optbuf_len, KM_SLEEP); - rw_enter(&udp->udp_rwlock, RW_READER); len = udp_opt_get(connp, level, option_name, optvalp_buf); - rw_exit(&udp->udp_rwlock); - - if (len < 0) { - /* - * Pass on to IP - */ + if (len == -1) { kmem_free(optvalp_buf, max_optbuf_len); - return (ip_get_options(connp, level, option_name, - optvalp, optlen, cr)); - } else { - /* - * update optlen and copy option value - */ - t_uscalar_t size = MIN(len, *optlen); - bcopy(optvalp_buf, optvalp, size); - bcopy(&size, optlen, sizeof (size)); - - kmem_free(optvalp_buf, max_optbuf_len); - return (0); + return (EINVAL); } + + /* + * update optlen and copy option value + */ + t_uscalar_t size = MIN(len, *optlen); + + bcopy(optvalp_buf, optvalp, size); + bcopy(&size, optlen, sizeof (size)); + + kmem_free(optvalp_buf, max_optbuf_len); + return (0); } int @@ -9128,7 +6810,6 @@ udp_setsockopt(sock_lower_handle_t proto_handle, int level, int option_name, const void *optvalp, socklen_t optlen, cred_t *cr) { conn_t *connp = (conn_t *)proto_handle; - udp_t *udp = connp->conn_udp; int error; /* All Solaris components should pass a cred for this operation. */ @@ -9137,7 +6818,6 @@ udp_setsockopt(sock_lower_handle_t proto_handle, int level, int option_name, error = proto_opt_check(level, option_name, optlen, NULL, udp_opt_obj.odb_opt_des_arr, udp_opt_obj.odb_opt_arr_cnt, - udp_opt_obj.odb_topmost_tpiprovider, B_TRUE, B_FALSE, cr); if (error != 0) { @@ -9146,19 +6826,11 @@ udp_setsockopt(sock_lower_handle_t proto_handle, int level, int option_name, return (error); } - rw_enter(&udp->udp_rwlock, RW_WRITER); error = udp_opt_set(connp, SETFN_OPTCOM_NEGOTIATE, level, option_name, optlen, (uchar_t *)optvalp, (uint_t *)&optlen, (uchar_t *)optvalp, NULL, cr); - rw_exit(&udp->udp_rwlock); - if (error < 0) { - /* - * Pass on to ip - */ - error = ip_set_options(connp, level, option_name, optvalp, - optlen, cr); - } + ASSERT(error >= 0); return (error); } @@ -9174,7 +6846,7 @@ udp_clr_flowctrl(sock_lower_handle_t proto_handle) mutex_exit(&udp->udp_recv_lock); } -/* ARGSUSED */ +/* ARGSUSED2 */ int udp_shutdown(sock_lower_handle_t proto_handle, int how, cred_t *cr) { @@ -9204,6 +6876,27 @@ udp_ioctl(sock_lower_handle_t proto_handle, int cmd, intptr_t arg, /* All Solaris components should pass a cred for this operation. */ ASSERT(cr != NULL); + /* + * If we don't have a helper stream then create one. + * ip_create_helper_stream takes care of locking the conn_t, + * so this check for NULL is just a performance optimization. + */ + if (connp->conn_helper_info == NULL) { + udp_stack_t *us = connp->conn_udp->udp_us; + + ASSERT(us->us_ldi_ident != NULL); + + /* + * Create a helper stream for non-STREAMS socket. + */ + error = ip_create_helper_stream(connp, us->us_ldi_ident); + if (error != 0) { + ip0dbg(("tcp_ioctl: create of IP helper stream " + "failed %d\n", error)); + return (error); + } + } + switch (cmd) { case ND_SET: case ND_GET: |