summaryrefslogtreecommitdiff
path: root/usr/src/uts/common/inet/udp/udp.c
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src/uts/common/inet/udp/udp.c')
-rw-r--r--usr/src/uts/common/inet/udp/udp.c7703
1 files changed, 2698 insertions, 5005 deletions
diff --git a/usr/src/uts/common/inet/udp/udp.c b/usr/src/uts/common/inet/udp/udp.c
index d0bab511b0..e18fc57f40 100644
--- a/usr/src/uts/common/inet/udp/udp.c
+++ b/usr/src/uts/common/inet/udp/udp.c
@@ -26,12 +26,9 @@
#include <sys/types.h>
#include <sys/stream.h>
-#include <sys/dlpi.h>
-#include <sys/pattr.h>
#include <sys/stropts.h>
#include <sys/strlog.h>
#include <sys/strsun.h>
-#include <sys/time.h>
#define _SUN_TPI_VERSION 2
#include <sys/tihdr.h>
#include <sys/timod.h>
@@ -41,7 +38,9 @@
#include <sys/suntpi.h>
#include <sys/xti_inet.h>
#include <sys/kmem.h>
+#include <sys/cred_impl.h>
#include <sys/policy.h>
+#include <sys/priv.h>
#include <sys/ucred.h>
#include <sys/zone.h>
@@ -57,12 +56,11 @@
#include <netinet/ip6.h>
#include <netinet/icmp6.h>
#include <netinet/udp.h>
-#include <net/if.h>
-#include <net/route.h>
#include <inet/common.h>
#include <inet/ip.h>
#include <inet/ip_impl.h>
+#include <inet/ipsec_impl.h>
#include <inet/ip6.h>
#include <inet/ip_ire.h>
#include <inet/ip_if.h>
@@ -74,34 +72,25 @@
#include <inet/optcom.h>
#include <inet/snmpcom.h>
#include <inet/kstatcom.h>
-#include <inet/udp_impl.h>
#include <inet/ipclassifier.h>
-#include <inet/ipsec_impl.h>
-#include <inet/ipp_common.h>
#include <sys/squeue_impl.h>
#include <inet/ipnet.h>
#include <sys/ethernet.h>
-/*
- * The ipsec_info.h header file is here since it has the definition for the
- * M_CTL message types used by IP to convey information to the ULP. The
- * ipsec_info.h needs the pfkeyv2.h, hence the latter's presence.
- */
-#include <net/pfkeyv2.h>
-#include <inet/ipsec_info.h>
-
#include <sys/tsol/label.h>
#include <sys/tsol/tnet.h>
#include <rpc/pmap_prot.h>
+#include <inet/udp_impl.h>
+
/*
* Synchronization notes:
*
* UDP is MT and uses the usual kernel synchronization primitives. There are 2
- * locks, the fanout lock (uf_lock) and the udp endpoint lock udp_rwlock.
- * We also use conn_lock when updating things that affect the IP classifier
- * lookup.
- * The lock order is udp_rwlock -> uf_lock and is udp_rwlock -> conn_lock.
+ * locks, the fanout lock (uf_lock) and conn_lock. conn_lock
+ * protects the contents of the udp_t. uf_lock protects the address and the
+ * fanout information.
+ * The lock order is conn_lock -> uf_lock.
*
* The fanout lock uf_lock:
* When a UDP endpoint is bound to a local port, it is inserted into
@@ -114,11 +103,6 @@
* from the bind hash list only when it is being unbound or being closed.
* The per bucket lock also protects a UDP endpoint's state changes.
*
- * The udp_rwlock:
- * This protects most of the other fields in the udp_t. The exact list of
- * fields which are protected by each of the above locks is documented in
- * the udp_t structure definition.
- *
* Plumbing notes:
* UDP is always a device driver. For compatibility with mibopen() code
* it is possible to I_PUSH "udp", but that results in pushing a passthrough
@@ -133,41 +117,32 @@
/* For /etc/system control */
uint_t udp_bind_fanout_size = UDP_BIND_FANOUT_SIZE;
-/* Option processing attrs */
-typedef struct udpattrs_s {
- union {
- ip6_pkt_t *udpattr_ipp6; /* For V6 */
- ip4_pkt_t *udpattr_ipp4; /* For V4 */
- } udpattr_ippu;
-#define udpattr_ipp6 udpattr_ippu.udpattr_ipp6
-#define udpattr_ipp4 udpattr_ippu.udpattr_ipp4
- mblk_t *udpattr_mb;
- boolean_t udpattr_credset;
-} udpattrs_t;
-
static void udp_addr_req(queue_t *q, mblk_t *mp);
static void udp_tpi_bind(queue_t *q, mblk_t *mp);
static void udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp);
static void udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock);
-static int udp_build_hdrs(udp_t *udp);
+static int udp_build_hdr_template(conn_t *, const in6_addr_t *,
+ const in6_addr_t *, in_port_t, uint32_t);
static void udp_capability_req(queue_t *q, mblk_t *mp);
static int udp_tpi_close(queue_t *q, int flags);
+static void udp_close_free(conn_t *);
static void udp_tpi_connect(queue_t *q, mblk_t *mp);
static void udp_tpi_disconnect(queue_t *q, mblk_t *mp);
static void udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error,
- int sys_error);
-static void udp_err_ack_prim(queue_t *q, mblk_t *mp, int primitive,
- t_scalar_t tlierr, int unixerr);
+ int sys_error);
+static void udp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive,
+ t_scalar_t tlierr, int sys_error);
static int udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp,
cred_t *cr);
static int udp_extra_priv_ports_add(queue_t *q, mblk_t *mp,
char *value, caddr_t cp, cred_t *cr);
static int udp_extra_priv_ports_del(queue_t *q, mblk_t *mp,
char *value, caddr_t cp, cred_t *cr);
-static void udp_icmp_error(conn_t *, mblk_t *);
-static void udp_icmp_error_ipv6(conn_t *, mblk_t *);
+static void udp_icmp_input(void *, mblk_t *, void *, ip_recv_attr_t *);
+static void udp_icmp_error_ipv6(conn_t *connp, mblk_t *mp,
+ ip_recv_attr_t *ira);
static void udp_info_req(queue_t *q, mblk_t *mp);
-static void udp_input(void *, mblk_t *, void *);
+static void udp_input(void *, mblk_t *, void *, ip_recv_attr_t *);
static void udp_lrput(queue_t *, mblk_t *);
static void udp_lwput(queue_t *, mblk_t *);
static int udp_open(queue_t *q, dev_t *devp, int flag, int sflag,
@@ -176,24 +151,34 @@ static int udp_openv4(queue_t *q, dev_t *devp, int flag, int sflag,
cred_t *credp);
static int udp_openv6(queue_t *q, dev_t *devp, int flag, int sflag,
cred_t *credp);
-static int udp_unitdata_opt_process(queue_t *q, mblk_t *mp,
- int *errorp, udpattrs_t *udpattrs);
static boolean_t udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name);
+int udp_opt_set(conn_t *connp, uint_t optset_context,
+ int level, int name, uint_t inlen,
+ uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp,
+ void *thisdg_attrs, cred_t *cr);
+int udp_opt_get(conn_t *connp, int level, int name,
+ uchar_t *ptr);
+static int udp_output_connected(conn_t *connp, mblk_t *mp, cred_t *cr,
+ pid_t pid);
+static int udp_output_lastdst(conn_t *connp, mblk_t *mp, cred_t *cr,
+ pid_t pid, ip_xmit_attr_t *ixa);
+static int udp_output_newdst(conn_t *connp, mblk_t *data_mp, sin_t *sin,
+ sin6_t *sin6, ushort_t ipversion, cred_t *cr, pid_t,
+ ip_xmit_attr_t *ixa);
static int udp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr);
static boolean_t udp_param_register(IDP *ndp, udpparam_t *udppa, int cnt);
static int udp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp,
cred_t *cr);
-static void udp_send_data(udp_t *udp, queue_t *q, mblk_t *mp,
- ipha_t *ipha);
-static void udp_ud_err(queue_t *q, mblk_t *mp, uchar_t *destaddr,
- t_scalar_t destlen, t_scalar_t err);
+static mblk_t *udp_prepend_hdr(conn_t *, ip_xmit_attr_t *, const ip_pkt_t *,
+ const in6_addr_t *, const in6_addr_t *, in_port_t, uint32_t, mblk_t *,
+ int *);
+static mblk_t *udp_prepend_header_template(conn_t *, ip_xmit_attr_t *,
+ mblk_t *, const in6_addr_t *, in_port_t, uint32_t, int *);
+static void udp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err);
+static void udp_ud_err_connected(conn_t *, t_scalar_t);
static void udp_tpi_unbind(queue_t *q, mblk_t *mp);
static in_port_t udp_update_next_port(udp_t *udp, in_port_t port,
boolean_t random);
-static mblk_t *udp_output_v4(conn_t *, mblk_t *, ipaddr_t, uint16_t, uint_t,
- int *, boolean_t, struct nmsghdr *, cred_t *, pid_t);
-static mblk_t *udp_output_v6(conn_t *connp, mblk_t *mp, sin6_t *sin6,
- int *error, struct nmsghdr *msg, cred_t *cr, pid_t pid);
static void udp_wput_other(queue_t *q, mblk_t *mp);
static void udp_wput_iocdata(queue_t *q, mblk_t *mp);
static void udp_wput_fallback(queue_t *q, mblk_t *mp);
@@ -208,11 +193,9 @@ static void *udp_kstat2_init(netstackid_t, udp_stat_t *);
static void udp_kstat2_fini(netstackid_t, kstat_t *);
static int udp_kstat_update(kstat_t *kp, int rw);
-static void udp_xmit(queue_t *, mblk_t *, ire_t *ire, conn_t *, zoneid_t);
-static int udp_send_connected(conn_t *, mblk_t *, struct nmsghdr *,
- cred_t *, pid_t);
-static void udp_ulp_recv(conn_t *, mblk_t *);
+/* Common routines for TPI and socket module */
+static void udp_ulp_recv(conn_t *, mblk_t *, uint_t, ip_recv_attr_t *);
/* Common routine for TPI and socket module */
static conn_t *udp_do_open(cred_t *, boolean_t, int);
@@ -220,30 +203,20 @@ static void udp_do_close(conn_t *);
static int udp_do_bind(conn_t *, struct sockaddr *, socklen_t, cred_t *,
boolean_t);
static int udp_do_unbind(conn_t *);
-static int udp_do_getsockname(udp_t *, struct sockaddr *, uint_t *);
-static int udp_do_getpeername(udp_t *, struct sockaddr *, uint_t *);
int udp_getsockname(sock_lower_handle_t,
struct sockaddr *, socklen_t *, cred_t *);
int udp_getpeername(sock_lower_handle_t,
struct sockaddr *, socklen_t *, cred_t *);
static int udp_do_connect(conn_t *, const struct sockaddr *, socklen_t,
- cred_t *cr);
-static int udp_post_ip_bind_connect(udp_t *, mblk_t *, int);
+ cred_t *, pid_t);
#define UDP_RECV_HIWATER (56 * 1024)
#define UDP_RECV_LOWATER 128
#define UDP_XMIT_HIWATER (56 * 1024)
#define UDP_XMIT_LOWATER 1024
-/*
- * The following is defined in tcp.c
- */
-extern int (*cl_inet_connect2)(netstackid_t stack_id,
- uint8_t protocol, boolean_t is_outgoing,
- sa_family_t addr_family,
- uint8_t *laddrp, in_port_t lport,
- uint8_t *faddrp, in_port_t fport, void *args);
+#pragma inline(udp_output_connected, udp_output_newdst, udp_output_lastdst)
/*
* Checks if the given destination addr/port is allowed out.
@@ -251,7 +224,7 @@ extern int (*cl_inet_connect2)(netstackid_t stack_id,
* Called for each connect() and for sendto()/sendmsg() to a different
* destination.
* For connect(), called in udp_connect().
- * For sendto()/sendmsg(), called in udp_output_v{4,6}().
+ * For sendto()/sendmsg(), called in udp_output_newdst().
*
* This macro assumes that the cl_inet_connect2 hook is not NULL.
* Please check this before calling this macro.
@@ -260,25 +233,26 @@ extern int (*cl_inet_connect2)(netstackid_t stack_id,
* CL_INET_UDP_CONNECT(conn_t cp, udp_t *udp, boolean_t is_outgoing,
* in6_addr_t *faddrp, in_port_t (or uint16_t) fport, int err);
*/
-#define CL_INET_UDP_CONNECT(cp, udp, is_outgoing, faddrp, fport, err) { \
+#define CL_INET_UDP_CONNECT(cp, is_outgoing, faddrp, fport, err) { \
(err) = 0; \
/* \
* Running in cluster mode - check and register active \
* "connection" information \
*/ \
- if ((udp)->udp_ipversion == IPV4_VERSION) \
+ if ((cp)->conn_ipversion == IPV4_VERSION) \
(err) = (*cl_inet_connect2)( \
(cp)->conn_netstack->netstack_stackid, \
IPPROTO_UDP, is_outgoing, AF_INET, \
- (uint8_t *)&((udp)->udp_v6src._S6_un._S6_u32[3]), \
- (udp)->udp_port, \
- (uint8_t *)&((faddrp)->_S6_un._S6_u32[3]), \
+ (uint8_t *)&((cp)->conn_laddr_v4), \
+ (cp)->conn_lport, \
+ (uint8_t *)&(V4_PART_OF_V6(*faddrp)), \
(in_port_t)(fport), NULL); \
else \
(err) = (*cl_inet_connect2)( \
(cp)->conn_netstack->netstack_stackid, \
IPPROTO_UDP, is_outgoing, AF_INET6, \
- (uint8_t *)&((udp)->udp_v6src), (udp)->udp_port, \
+ (uint8_t *)&((cp)->conn_laddr_v6), \
+ (cp)->conn_lport, \
(uint8_t *)(faddrp), (in_port_t)(fport), NULL); \
}
@@ -387,6 +361,8 @@ udpparam_t udp_param_arr[] = {
{ 0, (1<<30), UDP_XMIT_LOWATER, "udp_xmit_lowat"},
{ UDP_RECV_LOWATER, (1<<30), UDP_RECV_HIWATER, "udp_recv_hiwat"},
{ 65536, (1<<30), 2*1024*1024, "udp_max_buf"},
+ { 0, 1, 0, "udp_pmtu_discovery" },
+ { 0, 1, 0, "udp_sendto_ignerr" },
};
/* END CSTYLED */
@@ -451,9 +427,10 @@ retry:
static void
udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock)
{
- udp_t *udpnext;
- kmutex_t *lockp;
- udp_stack_t *us = udp->udp_us;
+ udp_t *udpnext;
+ kmutex_t *lockp;
+ udp_stack_t *us = udp->udp_us;
+ conn_t *connp = udp->udp_connp;
if (udp->udp_ptpbhn == NULL)
return;
@@ -462,9 +439,9 @@ udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock)
* Extract the lock pointer in case there are concurrent
* hash_remove's for this instance.
*/
- ASSERT(udp->udp_port != 0);
+ ASSERT(connp->conn_lport != 0);
if (!caller_holds_lock) {
- lockp = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port,
+ lockp = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport,
us->us_bind_fanout_size)].uf_lock;
ASSERT(lockp != NULL);
mutex_enter(lockp);
@@ -486,8 +463,10 @@ udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock)
static void
udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp)
{
+ conn_t *connp = udp->udp_connp;
udp_t **udpp;
udp_t *udpnext;
+ conn_t *connext;
ASSERT(MUTEX_HELD(&uf->uf_lock));
ASSERT(udp->udp_ptpbhn == NULL);
@@ -503,11 +482,11 @@ udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp)
* specific address get preference over those binding to
* INADDR_ANY.
*/
- if (V6_OR_V4_INADDR_ANY(udp->udp_bound_v6src) &&
- !V6_OR_V4_INADDR_ANY(udpnext->udp_bound_v6src)) {
+ connext = udpnext->udp_connp;
+ if (V6_OR_V4_INADDR_ANY(connp->conn_bound_addr_v6) &&
+ !V6_OR_V4_INADDR_ANY(connext->conn_bound_addr_v6)) {
while ((udpnext = udpp[0]) != NULL &&
- !V6_OR_V4_INADDR_ANY(
- udpnext->udp_bound_v6src)) {
+ !V6_OR_V4_INADDR_ANY(connext->conn_bound_addr_v6)) {
udpp = &(udpnext->udp_bind_hash);
}
if (udpnext != NULL)
@@ -525,10 +504,9 @@ udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp)
* This routine is called to handle each O_T_BIND_REQ/T_BIND_REQ message
* passed to udp_wput.
* It associates a port number and local address with the stream.
- * The O_T_BIND_REQ/T_BIND_REQ is passed downstream to ip with the UDP
- * protocol type (IPPROTO_UDP) placed in the message following the address.
- * A T_BIND_ACK message is passed upstream when ip acknowledges the request.
- * (Called as writer.)
+ * It calls IP to verify the local IP address, and calls IP to insert
+ * the conn_t in the fanout table.
+ * If everything is ok it then sends the T_BIND_ACK back up.
*
* Note that UDP over IPv4 and IPv6 sockets can use the same port number
* without setting SO_REUSEADDR. This is needed so that they
@@ -580,10 +558,10 @@ udp_tpi_bind(queue_t *q, mblk_t *mp)
}
/*
* Reallocate the message to make sure we have enough room for an
- * address and the protocol type.
+ * address.
*/
- mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t) + 1, 1);
- if (!mp1) {
+ mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t), 1);
+ if (mp1 == NULL) {
udp_err_ack(q, mp, TSYSERR, ENOMEM);
return;
}
@@ -597,7 +575,7 @@ udp_tpi_bind(queue_t *q, mblk_t *mp)
switch (tbr->ADDR_length) {
case 0: /* Request for a generic port */
tbr->ADDR_offset = sizeof (struct T_bind_req);
- if (udp->udp_family == AF_INET) {
+ if (connp->conn_family == AF_INET) {
tbr->ADDR_length = sizeof (sin_t);
sin = (sin_t *)&tbr[1];
*sin = sin_null;
@@ -605,7 +583,7 @@ udp_tpi_bind(queue_t *q, mblk_t *mp)
mp->b_wptr = (uchar_t *)&sin[1];
sa = (struct sockaddr *)sin;
} else {
- ASSERT(udp->udp_family == AF_INET6);
+ ASSERT(connp->conn_family == AF_INET6);
tbr->ADDR_length = sizeof (sin6_t);
sin6 = (sin6_t *)&tbr[1];
*sin6 = sin6_null;
@@ -622,7 +600,7 @@ udp_tpi_bind(queue_t *q, mblk_t *mp)
udp_err_ack(q, mp, TSYSERR, EINVAL);
return;
}
- if (udp->udp_family != AF_INET ||
+ if (connp->conn_family != AF_INET ||
sa->sa_family != AF_INET) {
udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT);
return;
@@ -636,7 +614,7 @@ udp_tpi_bind(queue_t *q, mblk_t *mp)
udp_err_ack(q, mp, TSYSERR, EINVAL);
return;
}
- if (udp->udp_family != AF_INET6 ||
+ if (connp->conn_family != AF_INET6 ||
sa->sa_family != AF_INET6) {
udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT);
return;
@@ -669,29 +647,21 @@ udp_tpi_bind(queue_t *q, mblk_t *mp)
* This routine handles each T_CONN_REQ message passed to udp. It
* associates a default destination address with the stream.
*
- * This routine sends down a T_BIND_REQ to IP with the following mblks:
- * T_BIND_REQ - specifying local and remote address/port
- * IRE_DB_REQ_TYPE - to get an IRE back containing ire_type and src
- * T_OK_ACK - for the T_CONN_REQ
- * T_CONN_CON - to keep the TPI user happy
- *
- * The connect completes in udp_do_connect.
- * When a T_BIND_ACK is received information is extracted from the IRE
- * and the two appended messages are sent to the TPI user.
- * Should udp_bind_result receive T_ERROR_ACK for the T_BIND_REQ it will
- * convert it to an error ack for the appropriate primitive.
+ * After various error checks are completed, udp_connect() lays
+ * the target address and port into the composite header template.
+ * Then we ask IP for information, including a source address if we didn't
+ * already have one. Finally we send up the T_OK_ACK reply message.
*/
static void
udp_tpi_connect(queue_t *q, mblk_t *mp)
{
- udp_t *udp;
conn_t *connp = Q_TO_CONN(q);
int error;
socklen_t len;
struct sockaddr *sa;
struct T_conn_req *tcr;
cred_t *cr;
-
+ pid_t pid;
/*
* All Solaris components should pass a db_credp
* for this TPI message, hence we ASSERT.
@@ -699,14 +669,13 @@ udp_tpi_connect(queue_t *q, mblk_t *mp)
* like a TPI message sent by some other kernel
* component, we check and return an error.
*/
- cr = msg_getcred(mp, NULL);
+ cr = msg_getcred(mp, &pid);
ASSERT(cr != NULL);
if (cr == NULL) {
udp_err_ack(q, mp, TSYSERR, EINVAL);
return;
}
- udp = connp->conn_udp;
tcr = (struct T_conn_req *)mp->b_rptr;
/* A bit of sanity checking */
@@ -724,7 +693,7 @@ udp_tpi_connect(queue_t *q, mblk_t *mp)
* Determine packet type based on type of address passed in
* the request should contain an IPv4 or IPv6 address.
* Make sure that address family matches the type of
- * family of the the address passed down
+ * family of the address passed down.
*/
len = tcr->DEST_length;
switch (tcr->DEST_length) {
@@ -743,13 +712,13 @@ udp_tpi_connect(queue_t *q, mblk_t *mp)
break;
}
- error = proto_verify_ip_addr(udp->udp_family, sa, len);
+ error = proto_verify_ip_addr(connp->conn_family, sa, len);
if (error != 0) {
udp_err_ack(q, mp, TSYSERR, error);
return;
}
- error = udp_do_connect(connp, sa, len, cr);
+ error = udp_do_connect(connp, sa, len, cr, pid);
if (error != 0) {
if (error < 0)
udp_err_ack(q, mp, -error, 0);
@@ -761,7 +730,7 @@ udp_tpi_connect(queue_t *q, mblk_t *mp)
* We have to send a connection confirmation to
* keep TLI happy.
*/
- if (udp->udp_family == AF_INET) {
+ if (connp->conn_family == AF_INET) {
mp1 = mi_tpi_conn_con(NULL, (char *)sa,
sizeof (sin_t), NULL, 0);
} else {
@@ -810,72 +779,14 @@ done:
return (0);
}
-/*
- * Called in the close path to quiesce the conn
- */
-void
-udp_quiesce_conn(conn_t *connp)
-{
- udp_t *udp = connp->conn_udp;
-
- if (cl_inet_unbind != NULL && udp->udp_state == TS_IDLE) {
- /*
- * Running in cluster mode - register unbind information
- */
- if (udp->udp_ipversion == IPV4_VERSION) {
- (*cl_inet_unbind)(
- connp->conn_netstack->netstack_stackid,
- IPPROTO_UDP, AF_INET,
- (uint8_t *)(&(V4_PART_OF_V6(udp->udp_v6src))),
- (in_port_t)udp->udp_port, NULL);
- } else {
- (*cl_inet_unbind)(
- connp->conn_netstack->netstack_stackid,
- IPPROTO_UDP, AF_INET6,
- (uint8_t *)(&(udp->udp_v6src)),
- (in_port_t)udp->udp_port, NULL);
- }
- }
-
- udp_bind_hash_remove(udp, B_FALSE);
-
-}
-
-void
+static void
udp_close_free(conn_t *connp)
{
udp_t *udp = connp->conn_udp;
/* If there are any options associated with the stream, free them. */
- if (udp->udp_ip_snd_options != NULL) {
- mi_free((char *)udp->udp_ip_snd_options);
- udp->udp_ip_snd_options = NULL;
- udp->udp_ip_snd_options_len = 0;
- }
-
- if (udp->udp_ip_rcv_options != NULL) {
- mi_free((char *)udp->udp_ip_rcv_options);
- udp->udp_ip_rcv_options = NULL;
- udp->udp_ip_rcv_options_len = 0;
- }
-
- /* Free memory associated with sticky options */
- if (udp->udp_sticky_hdrs_len != 0) {
- kmem_free(udp->udp_sticky_hdrs,
- udp->udp_sticky_hdrs_len);
- udp->udp_sticky_hdrs = NULL;
- udp->udp_sticky_hdrs_len = 0;
- }
- if (udp->udp_last_cred != NULL) {
- crfree(udp->udp_last_cred);
- udp->udp_last_cred = NULL;
- }
- if (udp->udp_effective_cred != NULL) {
- crfree(udp->udp_effective_cred);
- udp->udp_effective_cred = NULL;
- }
-
- ip6_pkt_free(&udp->udp_sticky_ipp);
+ if (udp->udp_recv_ipp.ipp_fields != 0)
+ ip_pkt_free(&udp->udp_recv_ipp);
/*
* Clear any fields which the kmem_cache constructor clears.
@@ -892,59 +803,48 @@ static int
udp_do_disconnect(conn_t *connp)
{
udp_t *udp;
- mblk_t *ire_mp;
udp_fanout_t *udpf;
udp_stack_t *us;
int error;
udp = connp->conn_udp;
us = udp->udp_us;
- rw_enter(&udp->udp_rwlock, RW_WRITER);
- if (udp->udp_state != TS_DATA_XFER || udp->udp_pending_op != -1) {
- rw_exit(&udp->udp_rwlock);
+ mutex_enter(&connp->conn_lock);
+ if (udp->udp_state != TS_DATA_XFER) {
+ mutex_exit(&connp->conn_lock);
return (-TOUTSTATE);
}
- udp->udp_pending_op = T_DISCON_REQ;
- udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port,
+ udpf = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport,
us->us_bind_fanout_size)];
mutex_enter(&udpf->uf_lock);
- udp->udp_v6src = udp->udp_bound_v6src;
+ if (connp->conn_mcbc_bind)
+ connp->conn_saddr_v6 = ipv6_all_zeros;
+ else
+ connp->conn_saddr_v6 = connp->conn_bound_addr_v6;
+ connp->conn_laddr_v6 = connp->conn_bound_addr_v6;
+ connp->conn_faddr_v6 = ipv6_all_zeros;
+ connp->conn_fport = 0;
udp->udp_state = TS_IDLE;
mutex_exit(&udpf->uf_lock);
- if (udp->udp_family == AF_INET6) {
- /* Rebuild the header template */
- error = udp_build_hdrs(udp);
- if (error != 0) {
- udp->udp_pending_op = -1;
- rw_exit(&udp->udp_rwlock);
- return (error);
- }
- }
+ /* Remove any remnants of mapped address binding */
+ if (connp->conn_family == AF_INET6)
+ connp->conn_ipversion = IPV6_VERSION;
- ire_mp = allocb(sizeof (ire_t), BPRI_HI);
- if (ire_mp == NULL) {
- mutex_enter(&udpf->uf_lock);
- udp->udp_pending_op = -1;
- mutex_exit(&udpf->uf_lock);
- rw_exit(&udp->udp_rwlock);
- return (ENOMEM);
- }
-
- rw_exit(&udp->udp_rwlock);
-
- if (udp->udp_family == AF_INET6) {
- error = ip_proto_bind_laddr_v6(connp, &ire_mp, IPPROTO_UDP,
- &udp->udp_bound_v6src, udp->udp_port, B_TRUE);
- } else {
- error = ip_proto_bind_laddr_v4(connp, &ire_mp, IPPROTO_UDP,
- V4_PART_OF_V6(udp->udp_bound_v6src), udp->udp_port, B_TRUE);
- }
+ connp->conn_v6lastdst = ipv6_all_zeros;
+ error = udp_build_hdr_template(connp, &connp->conn_saddr_v6,
+ &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo);
+ mutex_exit(&connp->conn_lock);
+ if (error != 0)
+ return (error);
- return (udp_post_ip_bind_connect(udp, ire_mp, error));
+ /*
+ * Tell IP to remove the full binding and revert
+ * to the local address binding.
+ */
+ return (ip_laddr_fanout_insert(connp));
}
-
static void
udp_tpi_disconnect(queue_t *q, mblk_t *mp)
{
@@ -981,12 +881,9 @@ int
udp_disconnect(conn_t *connp)
{
int error;
- udp_t *udp = connp->conn_udp;
-
- udp->udp_dgram_errind = B_FALSE;
+ connp->conn_dgram_errind = B_FALSE;
error = udp_do_disconnect(connp);
-
if (error < 0)
error = proto_tlitosyserr(-error);
@@ -1003,8 +900,8 @@ udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error)
/* Shorthand to generate and send TPI error acks to our client */
static void
-udp_err_ack_prim(queue_t *q, mblk_t *mp, int primitive, t_scalar_t t_error,
- int sys_error)
+udp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive,
+ t_scalar_t t_error, int sys_error)
{
struct T_error_ack *teackp;
@@ -1018,7 +915,7 @@ udp_err_ack_prim(queue_t *q, mblk_t *mp, int primitive, t_scalar_t t_error,
}
}
-/*ARGSUSED*/
+/*ARGSUSED2*/
static int
udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr)
{
@@ -1033,7 +930,7 @@ udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr)
return (0);
}
-/* ARGSUSED */
+/* ARGSUSED1 */
static int
udp_extra_priv_ports_add(queue_t *q, mblk_t *mp, char *value, caddr_t cp,
cred_t *cr)
@@ -1072,7 +969,7 @@ udp_extra_priv_ports_add(queue_t *q, mblk_t *mp, char *value, caddr_t cp,
return (0);
}
-/* ARGSUSED */
+/* ARGSUSED1 */
static int
udp_extra_priv_ports_del(queue_t *q, mblk_t *mp, char *value, caddr_t cp,
cred_t *cr)
@@ -1109,39 +1006,41 @@ udp_extra_priv_ports_del(queue_t *q, mblk_t *mp, char *value, caddr_t cp,
#define ICMP_MIN_UDP_HDR 4
/*
- * udp_icmp_error is called by udp_input to process ICMP msgs. passed up by IP.
+ * udp_icmp_input is called as conn_recvicmp to process ICMP messages.
* Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors.
* Assumes that IP has pulled up everything up to and including the ICMP header.
*/
+/* ARGSUSED2 */
static void
-udp_icmp_error(conn_t *connp, mblk_t *mp)
+udp_icmp_input(void *arg1, mblk_t *mp, void *arg2, ip_recv_attr_t *ira)
{
- icmph_t *icmph;
- ipha_t *ipha;
- int iph_hdr_length;
- udpha_t *udpha;
- sin_t sin;
- sin6_t sin6;
- mblk_t *mp1;
- int error = 0;
- udp_t *udp = connp->conn_udp;
+ conn_t *connp = (conn_t *)arg1;
+ icmph_t *icmph;
+ ipha_t *ipha;
+ int iph_hdr_length;
+ udpha_t *udpha;
+ sin_t sin;
+ sin6_t sin6;
+ mblk_t *mp1;
+ int error = 0;
+ udp_t *udp = connp->conn_udp;
- mp1 = NULL;
ipha = (ipha_t *)mp->b_rptr;
ASSERT(OK_32PTR(mp->b_rptr));
if (IPH_HDR_VERSION(ipha) != IPV4_VERSION) {
ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION);
- udp_icmp_error_ipv6(connp, mp);
+ udp_icmp_error_ipv6(connp, mp, ira);
return;
}
ASSERT(IPH_HDR_VERSION(ipha) == IPV4_VERSION);
/* Skip past the outer IP and ICMP headers */
- iph_hdr_length = IPH_HDR_LENGTH(ipha);
+ ASSERT(IPH_HDR_LENGTH(ipha) == ira->ira_ip_hdr_length);
+ iph_hdr_length = ira->ira_ip_hdr_length;
icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length];
- ipha = (ipha_t *)&icmph[1];
+ ipha = (ipha_t *)&icmph[1]; /* Inner IP header */
/* Skip past the inner IP and find the ULP header */
iph_hdr_length = IPH_HDR_LENGTH(ipha);
@@ -1150,11 +1049,41 @@ udp_icmp_error(conn_t *connp, mblk_t *mp)
switch (icmph->icmph_type) {
case ICMP_DEST_UNREACHABLE:
switch (icmph->icmph_code) {
- case ICMP_FRAGMENTATION_NEEDED:
+ case ICMP_FRAGMENTATION_NEEDED: {
+ ipha_t *ipha;
+ ip_xmit_attr_t *ixa;
/*
* IP has already adjusted the path MTU.
+ * But we need to adjust DF for IPv4.
*/
+ if (connp->conn_ipversion != IPV4_VERSION)
+ break;
+
+ ixa = conn_get_ixa(connp, B_FALSE);
+ if (ixa == NULL || ixa->ixa_ire == NULL) {
+ /*
+ * Some other thread holds conn_ixa. We will
+ * redo this on the next ICMP too big.
+ */
+ if (ixa != NULL)
+ ixa_refrele(ixa);
+ break;
+ }
+ (void) ip_get_pmtu(ixa);
+
+ mutex_enter(&connp->conn_lock);
+ ipha = (ipha_t *)connp->conn_ht_iphc;
+ if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF) {
+ ipha->ipha_fragment_offset_and_flags |=
+ IPH_DF_HTONS;
+ } else {
+ ipha->ipha_fragment_offset_and_flags &=
+ ~IPH_DF_HTONS;
+ }
+ mutex_exit(&connp->conn_lock);
+ ixa_refrele(ixa);
break;
+ }
case ICMP_PORT_UNREACHABLE:
case ICMP_PROTOCOL_UNREACHABLE:
error = ECONNREFUSED;
@@ -1177,25 +1106,24 @@ udp_icmp_error(conn_t *connp, mblk_t *mp)
* Deliver T_UDERROR_IND when the application has asked for it.
* The socket layer enables this automatically when connected.
*/
- if (!udp->udp_dgram_errind) {
+ if (!connp->conn_dgram_errind) {
freemsg(mp);
return;
}
-
- switch (udp->udp_family) {
+ switch (connp->conn_family) {
case AF_INET:
sin = sin_null;
sin.sin_family = AF_INET;
sin.sin_addr.s_addr = ipha->ipha_dst;
sin.sin_port = udpha->uha_dst_port;
if (IPCL_IS_NONSTR(connp)) {
- rw_enter(&udp->udp_rwlock, RW_WRITER);
+ mutex_enter(&connp->conn_lock);
if (udp->udp_state == TS_DATA_XFER) {
- if (sin.sin_port == udp->udp_dstport &&
+ if (sin.sin_port == connp->conn_fport &&
sin.sin_addr.s_addr ==
- V4_PART_OF_V6(udp->udp_v6dst)) {
- rw_exit(&udp->udp_rwlock);
+ connp->conn_faddr_v4) {
+ mutex_exit(&connp->conn_lock);
(*connp->conn_upcalls->su_set_error)
(connp->conn_upper_handle, error);
goto done;
@@ -1204,10 +1132,12 @@ udp_icmp_error(conn_t *connp, mblk_t *mp)
udp->udp_delayed_error = error;
*((sin_t *)&udp->udp_delayed_addr) = sin;
}
- rw_exit(&udp->udp_rwlock);
+ mutex_exit(&connp->conn_lock);
} else {
mp1 = mi_tpi_uderror_ind((char *)&sin, sizeof (sin_t),
NULL, 0, error);
+ if (mp1 != NULL)
+ putnext(connp->conn_rq, mp1);
}
break;
case AF_INET6:
@@ -1216,12 +1146,12 @@ udp_icmp_error(conn_t *connp, mblk_t *mp)
IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &sin6.sin6_addr);
sin6.sin6_port = udpha->uha_dst_port;
if (IPCL_IS_NONSTR(connp)) {
- rw_enter(&udp->udp_rwlock, RW_WRITER);
+ mutex_enter(&connp->conn_lock);
if (udp->udp_state == TS_DATA_XFER) {
- if (sin6.sin6_port == udp->udp_dstport &&
+ if (sin6.sin6_port == connp->conn_fport &&
IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr,
- &udp->udp_v6dst)) {
- rw_exit(&udp->udp_rwlock);
+ &connp->conn_faddr_v6)) {
+ mutex_exit(&connp->conn_lock);
(*connp->conn_upcalls->su_set_error)
(connp->conn_upper_handle, error);
goto done;
@@ -1230,17 +1160,16 @@ udp_icmp_error(conn_t *connp, mblk_t *mp)
udp->udp_delayed_error = error;
*((sin6_t *)&udp->udp_delayed_addr) = sin6;
}
- rw_exit(&udp->udp_rwlock);
+ mutex_exit(&connp->conn_lock);
} else {
mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t),
NULL, 0, error);
+ if (mp1 != NULL)
+ putnext(connp->conn_rq, mp1);
}
break;
}
- if (mp1 != NULL)
- putnext(connp->conn_rq, mp1);
done:
- ASSERT(!RW_ISWRITER(&udp->udp_rwlock));
freemsg(mp);
}
@@ -1251,7 +1180,7 @@ done:
* ICMPv6 header.
*/
static void
-udp_icmp_error_ipv6(conn_t *connp, mblk_t *mp)
+udp_icmp_error_ipv6(conn_t *connp, mblk_t *mp, ip_recv_attr_t *ira)
{
icmp6_t *icmp6;
ip6_t *ip6h, *outer_ip6h;
@@ -1265,12 +1194,19 @@ udp_icmp_error_ipv6(conn_t *connp, mblk_t *mp)
udp_stack_t *us = udp->udp_us;
outer_ip6h = (ip6_t *)mp->b_rptr;
+#ifdef DEBUG
if (outer_ip6h->ip6_nxt != IPPROTO_ICMPV6)
iph_hdr_length = ip_hdr_length_v6(mp, outer_ip6h);
else
iph_hdr_length = IPV6_HDR_LEN;
+ ASSERT(iph_hdr_length == ira->ira_ip_hdr_length);
+#endif
+ /* Skip past the outer IP and ICMP headers */
+ iph_hdr_length = ira->ira_ip_hdr_length;
icmp6 = (icmp6_t *)&mp->b_rptr[iph_hdr_length];
- ip6h = (ip6_t *)&icmp6[1];
+
+ /* Skip past the inner IP and find the ULP header */
+ ip6h = (ip6_t *)&icmp6[1]; /* Inner IP header */
if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &iph_hdr_length, &nexthdrp)) {
freemsg(mp);
return;
@@ -1308,7 +1244,7 @@ udp_icmp_error_ipv6(conn_t *connp, mblk_t *mp)
* information, send up an empty message containing an
* IPV6_PATHMTU ancillary data item.
*/
- if (!udp->udp_ipv6_recvpathmtu)
+ if (!connp->conn_ipv6_recvpathmtu)
break;
udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t) +
@@ -1334,7 +1270,7 @@ udp_icmp_error_ipv6(conn_t *connp, mblk_t *mp)
sin6 = (sin6_t *)&tudi[1];
bzero(sin6, sizeof (sin6_t));
sin6->sin6_family = AF_INET6;
- sin6->sin6_addr = udp->udp_v6dst;
+ sin6->sin6_addr = connp->conn_faddr_v6;
toh = (struct T_opthdr *)&sin6[1];
toh->level = IPPROTO_IPV6;
@@ -1352,8 +1288,7 @@ udp_icmp_error_ipv6(conn_t *connp, mblk_t *mp)
* message. Free it, then send our empty message.
*/
freemsg(mp);
- udp_ulp_recv(connp, newmp);
-
+ udp_ulp_recv(connp, newmp, msgdsize(newmp), ira);
return;
}
case ICMP6_TIME_EXCEEDED:
@@ -1378,7 +1313,7 @@ udp_icmp_error_ipv6(conn_t *connp, mblk_t *mp)
* Deliver T_UDERROR_IND when the application has asked for it.
* The socket layer enables this automatically when connected.
*/
- if (!udp->udp_dgram_errind) {
+ if (!connp->conn_dgram_errind) {
freemsg(mp);
return;
}
@@ -1390,12 +1325,12 @@ udp_icmp_error_ipv6(conn_t *connp, mblk_t *mp)
sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK;
if (IPCL_IS_NONSTR(connp)) {
- rw_enter(&udp->udp_rwlock, RW_WRITER);
+ mutex_enter(&connp->conn_lock);
if (udp->udp_state == TS_DATA_XFER) {
- if (sin6.sin6_port == udp->udp_dstport &&
+ if (sin6.sin6_port == connp->conn_fport &&
IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr,
- &udp->udp_v6dst)) {
- rw_exit(&udp->udp_rwlock);
+ &connp->conn_faddr_v6)) {
+ mutex_exit(&connp->conn_lock);
(*connp->conn_upcalls->su_set_error)
(connp->conn_upper_handle, error);
goto done;
@@ -1404,7 +1339,7 @@ udp_icmp_error_ipv6(conn_t *connp, mblk_t *mp)
udp->udp_delayed_error = error;
*((sin6_t *)&udp->udp_delayed_addr) = sin6;
}
- rw_exit(&udp->udp_rwlock);
+ mutex_exit(&connp->conn_lock);
} else {
mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t),
NULL, 0, error);
@@ -1412,7 +1347,6 @@ udp_icmp_error_ipv6(conn_t *connp, mblk_t *mp)
putnext(connp->conn_rq, mp1);
}
done:
- ASSERT(!RW_ISWRITER(&udp->udp_rwlock));
freemsg(mp);
}
@@ -1426,11 +1360,12 @@ done:
static void
udp_addr_req(queue_t *q, mblk_t *mp)
{
- sin_t *sin;
- sin6_t *sin6;
+ struct sockaddr *sa;
mblk_t *ackmp;
struct T_addr_ack *taa;
udp_t *udp = Q_TO_UDP(q);
+ conn_t *connp = udp->udp_connp;
+ uint_t addrlen;
/* Make it large enough for worst case */
ackmp = reallocb(mp, sizeof (struct T_addr_ack) +
@@ -1446,7 +1381,13 @@ udp_addr_req(queue_t *q, mblk_t *mp)
taa->PRIM_type = T_ADDR_ACK;
ackmp->b_datap->db_type = M_PCPROTO;
- rw_enter(&udp->udp_rwlock, RW_READER);
+
+ if (connp->conn_family == AF_INET)
+ addrlen = sizeof (sin_t);
+ else
+ addrlen = sizeof (sin6_t);
+
+ mutex_enter(&connp->conn_lock);
/*
* Note: Following code assumes 32 bit alignment of basic
* data structures like sin_t and struct T_addr_ack.
@@ -1456,91 +1397,23 @@ udp_addr_req(queue_t *q, mblk_t *mp)
* Fill in local address first
*/
taa->LOCADDR_offset = sizeof (*taa);
- if (udp->udp_family == AF_INET) {
- taa->LOCADDR_length = sizeof (sin_t);
- sin = (sin_t *)&taa[1];
- /* Fill zeroes and then initialize non-zero fields */
- *sin = sin_null;
- sin->sin_family = AF_INET;
- if (!IN6_IS_ADDR_V4MAPPED_ANY(&udp->udp_v6src) &&
- !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) {
- IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6src,
- sin->sin_addr.s_addr);
- } else {
- /*
- * INADDR_ANY
- * udp_v6src is not set, we might be bound to
- * broadcast/multicast. Use udp_bound_v6src as
- * local address instead (that could
- * also still be INADDR_ANY)
- */
- IN6_V4MAPPED_TO_IPADDR(&udp->udp_bound_v6src,
- sin->sin_addr.s_addr);
- }
- sin->sin_port = udp->udp_port;
- ackmp->b_wptr = (uchar_t *)&sin[1];
- if (udp->udp_state == TS_DATA_XFER) {
- /*
- * connected, fill remote address too
- */
- taa->REMADDR_length = sizeof (sin_t);
- /* assumed 32-bit alignment */
- taa->REMADDR_offset = taa->LOCADDR_offset +
- taa->LOCADDR_length;
-
- sin = (sin_t *)(ackmp->b_rptr +
- taa->REMADDR_offset);
- /* initialize */
- *sin = sin_null;
- sin->sin_family = AF_INET;
- sin->sin_addr.s_addr =
- V4_PART_OF_V6(udp->udp_v6dst);
- sin->sin_port = udp->udp_dstport;
- ackmp->b_wptr = (uchar_t *)&sin[1];
- }
- } else {
- taa->LOCADDR_length = sizeof (sin6_t);
- sin6 = (sin6_t *)&taa[1];
- /* Fill zeroes and then initialize non-zero fields */
- *sin6 = sin6_null;
- sin6->sin6_family = AF_INET6;
- if (!IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) {
- sin6->sin6_addr = udp->udp_v6src;
- } else {
- /*
- * UNSPECIFIED
- * udp_v6src is not set, we might be bound to
- * broadcast/multicast. Use udp_bound_v6src as
- * local address instead (that could
- * also still be UNSPECIFIED)
- */
- sin6->sin6_addr =
- udp->udp_bound_v6src;
- }
- sin6->sin6_port = udp->udp_port;
- ackmp->b_wptr = (uchar_t *)&sin6[1];
- if (udp->udp_state == TS_DATA_XFER) {
- /*
- * connected, fill remote address too
- */
- taa->REMADDR_length = sizeof (sin6_t);
- /* assumed 32-bit alignment */
- taa->REMADDR_offset = taa->LOCADDR_offset +
- taa->LOCADDR_length;
-
- sin6 = (sin6_t *)(ackmp->b_rptr +
- taa->REMADDR_offset);
- /* initialize */
- *sin6 = sin6_null;
- sin6->sin6_family = AF_INET6;
- sin6->sin6_addr = udp->udp_v6dst;
- sin6->sin6_port = udp->udp_dstport;
- ackmp->b_wptr = (uchar_t *)&sin6[1];
- }
- ackmp->b_wptr = (uchar_t *)&sin6[1];
- }
+ taa->LOCADDR_length = addrlen;
+ sa = (struct sockaddr *)&taa[1];
+ (void) conn_getsockname(connp, sa, &addrlen);
+ ackmp->b_wptr += addrlen;
}
- rw_exit(&udp->udp_rwlock);
+ if (udp->udp_state == TS_DATA_XFER) {
+ /*
+ * connected, fill remote address too
+ */
+ taa->REMADDR_length = addrlen;
+ /* assumed 32-bit alignment */
+ taa->REMADDR_offset = taa->LOCADDR_offset + taa->LOCADDR_length;
+ sa = (struct sockaddr *)(ackmp->b_rptr + taa->REMADDR_offset);
+ (void) conn_getpeername(connp, sa, &addrlen);
+ ackmp->b_wptr += addrlen;
+ }
+ mutex_exit(&connp->conn_lock);
ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim);
qreply(q, ackmp);
}
@@ -1548,7 +1421,9 @@ udp_addr_req(queue_t *q, mblk_t *mp)
static void
udp_copy_info(struct T_info_ack *tap, udp_t *udp)
{
- if (udp->udp_family == AF_INET) {
+ conn_t *connp = udp->udp_connp;
+
+ if (connp->conn_family == AF_INET) {
*tap = udp_g_t_info_ack_ipv4;
} else {
*tap = udp_g_t_info_ack_ipv6;
@@ -1632,20 +1507,15 @@ udp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
* This is the open routine for udp. It allocates a udp_t structure for
* the stream and, on the first open of the module, creates an ND table.
*/
-/*ARGSUSED2*/
static int
udp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp,
boolean_t isv6)
{
- int error;
udp_t *udp;
conn_t *connp;
dev_t conn_dev;
- udp_stack_t *us;
vmem_t *minor_arena;
- TRACE_1(TR_FAC_UDP, TR_UDP_OPEN, "udp_open: q %p", q);
-
/* If the stream is already open, return immediately. */
if (q->q_ptr != NULL)
return (0);
@@ -1685,7 +1555,6 @@ udp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp,
return (ENOMEM);
}
udp = connp->conn_udp;
- us = udp->udp_us;
*devp = makedevice(getemajor(*devp), (minor_t)conn_dev);
connp->conn_dev = conn_dev;
@@ -1699,39 +1568,27 @@ udp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp,
connp->conn_rq = q;
connp->conn_wq = WR(q);
- rw_enter(&udp->udp_rwlock, RW_WRITER);
- ASSERT(connp->conn_ulp == IPPROTO_UDP);
+ /*
+ * Since this conn_t/udp_t is not yet visible to anybody else we don't
+ * need to lock anything.
+ */
+ ASSERT(connp->conn_proto == IPPROTO_UDP);
ASSERT(connp->conn_udp == udp);
ASSERT(udp->udp_connp == connp);
if (flag & SO_SOCKSTR) {
- connp->conn_flags |= IPCL_SOCKET;
udp->udp_issocket = B_TRUE;
}
- q->q_hiwat = us->us_recv_hiwat;
- WR(q)->q_hiwat = us->us_xmit_hiwat;
- WR(q)->q_lowat = us->us_xmit_lowat;
+ WR(q)->q_hiwat = connp->conn_sndbuf;
+ WR(q)->q_lowat = connp->conn_sndlowat;
qprocson(q);
- if (udp->udp_family == AF_INET6) {
- /* Build initial header template for transmit */
- if ((error = udp_build_hdrs(udp)) != 0) {
- rw_exit(&udp->udp_rwlock);
- qprocsoff(q);
- inet_minor_free(minor_arena, conn_dev);
- ipcl_conn_destroy(connp);
- return (error);
- }
- }
- rw_exit(&udp->udp_rwlock);
-
/* Set the Stream head write offset and high watermark. */
- (void) proto_set_tx_wroff(q, connp,
- udp->udp_max_hdr_len + us->us_wroff_extra);
- /* XXX udp_set_rcv_hiwat() doesn't hold the lock, is it a bug??? */
- (void) proto_set_rx_hiwat(q, connp, udp_set_rcv_hiwat(udp, q->q_hiwat));
+ (void) proto_set_tx_wroff(q, connp, connp->conn_wroff);
+ (void) proto_set_rx_hiwat(q, connp,
+ udp_set_rcv_hiwat(udp, connp->conn_rcvbuf));
mutex_enter(&connp->conn_lock);
connp->conn_state_flags &= ~CONN_INCIPIENT;
@@ -1753,7 +1610,6 @@ udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name)
* This routine gets default values of certain options whose default
* values are maintained by protcol specific code
*/
-/* ARGSUSED */
int
udp_opt_default(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr)
{
@@ -1791,456 +1647,127 @@ udp_opt_default(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr)
/*
* This routine retrieves the current status of socket options.
- * It returns the size of the option retrieved.
+ * It returns the size of the option retrieved, or -1.
*/
-static int
-udp_opt_get(conn_t *connp, int level, int name, uchar_t *ptr)
+int
+udp_opt_get(conn_t *connp, t_scalar_t level, t_scalar_t name,
+ uchar_t *ptr)
{
- udp_t *udp = connp->conn_udp;
- udp_stack_t *us = udp->udp_us;
int *i1 = (int *)ptr;
- ip6_pkt_t *ipp = &udp->udp_sticky_ipp;
+ udp_t *udp = connp->conn_udp;
int len;
+ conn_opt_arg_t coas;
+ int retval;
- ASSERT(RW_READ_HELD(&udp->udp_rwlock));
- switch (level) {
- case SOL_SOCKET:
- switch (name) {
- case SO_DEBUG:
- *i1 = udp->udp_debug;
- break; /* goto sizeof (int) option return */
- case SO_REUSEADDR:
- *i1 = udp->udp_reuseaddr;
- break; /* goto sizeof (int) option return */
- case SO_TYPE:
- *i1 = SOCK_DGRAM;
- break; /* goto sizeof (int) option return */
+ coas.coa_connp = connp;
+ coas.coa_ixa = connp->conn_ixa;
+ coas.coa_ipp = &connp->conn_xmit_ipp;
+ coas.coa_ancillary = B_FALSE;
+ coas.coa_changed = 0;
+ /*
+ * We assume that the optcom framework has checked for the set
+ * of levels and names that are supported, hence we don't worry
+ * about rejecting based on that.
+ * First check for UDP specific handling, then pass to common routine.
+ */
+ switch (level) {
+ case IPPROTO_IP:
/*
- * The following three items are available here,
- * but are only meaningful to IP.
+ * Only allow IPv4 option processing on IPv4 sockets.
*/
- case SO_DONTROUTE:
- *i1 = udp->udp_dontroute;
- break; /* goto sizeof (int) option return */
- case SO_USELOOPBACK:
- *i1 = udp->udp_useloopback;
- break; /* goto sizeof (int) option return */
- case SO_BROADCAST:
- *i1 = udp->udp_broadcast;
- break; /* goto sizeof (int) option return */
-
- case SO_SNDBUF:
- *i1 = udp->udp_xmit_hiwat;
- break; /* goto sizeof (int) option return */
- case SO_RCVBUF:
- *i1 = udp->udp_rcv_disply_hiwat;
- break; /* goto sizeof (int) option return */
- case SO_DGRAM_ERRIND:
- *i1 = udp->udp_dgram_errind;
- break; /* goto sizeof (int) option return */
- case SO_RECVUCRED:
- *i1 = udp->udp_recvucred;
- break; /* goto sizeof (int) option return */
- case SO_TIMESTAMP:
- *i1 = udp->udp_timestamp;
- break; /* goto sizeof (int) option return */
- case SO_ANON_MLP:
- *i1 = connp->conn_anon_mlp;
- break; /* goto sizeof (int) option return */
- case SO_MAC_EXEMPT:
- *i1 = (connp->conn_mac_mode == CONN_MAC_AWARE);
- break;
- case SO_MAC_IMPLICIT:
- *i1 = (connp->conn_mac_mode == CONN_MAC_IMPLICIT);
- break;
- case SO_ALLZONES:
- *i1 = connp->conn_allzones;
- break; /* goto sizeof (int) option return */
- case SO_EXCLBIND:
- *i1 = udp->udp_exclbind ? SO_EXCLBIND : 0;
- break;
- case SO_PROTOTYPE:
- *i1 = IPPROTO_UDP;
- break;
- case SO_DOMAIN:
- *i1 = udp->udp_family;
- break;
- default:
- return (-1);
- }
- break;
- case IPPROTO_IP:
- if (udp->udp_family != AF_INET)
+ if (connp->conn_family != AF_INET)
return (-1);
+
switch (name) {
case IP_OPTIONS:
case T_IP_OPTIONS:
- len = udp->udp_ip_rcv_options_len - udp->udp_label_len;
- if (len > 0) {
- bcopy(udp->udp_ip_rcv_options +
- udp->udp_label_len, ptr, len);
- }
- return (len);
- case IP_TOS:
- case T_IP_TOS:
- *i1 = (int)udp->udp_type_of_service;
- break; /* goto sizeof (int) option return */
- case IP_TTL:
- *i1 = (int)udp->udp_ttl;
- break; /* goto sizeof (int) option return */
- case IP_DHCPINIT_IF:
- return (-EINVAL);
- case IP_NEXTHOP:
- case IP_RECVPKTINFO:
- /*
- * This also handles IP_PKTINFO.
- * IP_PKTINFO and IP_RECVPKTINFO have the same value.
- * Differentiation is based on the size of the argument
- * passed in.
- * This option is handled in IP which will return an
- * error for IP_PKTINFO as it's not supported as a
- * sticky option.
- */
- return (-EINVAL);
- case IP_MULTICAST_IF:
- /* 0 address if not set */
- *(ipaddr_t *)ptr = udp->udp_multicast_if_addr;
- return (sizeof (ipaddr_t));
- case IP_MULTICAST_TTL:
- *(uchar_t *)ptr = udp->udp_multicast_ttl;
- return (sizeof (uchar_t));
- case IP_MULTICAST_LOOP:
- *ptr = connp->conn_multicast_loop;
- return (sizeof (uint8_t));
- case IP_RECVOPTS:
- *i1 = udp->udp_recvopts;
- break; /* goto sizeof (int) option return */
- case IP_RECVDSTADDR:
- *i1 = udp->udp_recvdstaddr;
- break; /* goto sizeof (int) option return */
- case IP_RECVIF:
- *i1 = udp->udp_recvif;
- break; /* goto sizeof (int) option return */
- case IP_RECVSLLA:
- *i1 = udp->udp_recvslla;
- break; /* goto sizeof (int) option return */
- case IP_RECVTTL:
- *i1 = udp->udp_recvttl;
- break; /* goto sizeof (int) option return */
- case IP_ADD_MEMBERSHIP:
- case IP_DROP_MEMBERSHIP:
- case IP_BLOCK_SOURCE:
- case IP_UNBLOCK_SOURCE:
- case IP_ADD_SOURCE_MEMBERSHIP:
- case IP_DROP_SOURCE_MEMBERSHIP:
- case MCAST_JOIN_GROUP:
- case MCAST_LEAVE_GROUP:
- case MCAST_BLOCK_SOURCE:
- case MCAST_UNBLOCK_SOURCE:
- case MCAST_JOIN_SOURCE_GROUP:
- case MCAST_LEAVE_SOURCE_GROUP:
- /* cannot "get" the value for these */
- return (-1);
- case IP_BOUND_IF:
- /* Zero if not set */
- *i1 = udp->udp_bound_if;
- break; /* goto sizeof (int) option return */
- case IP_UNSPEC_SRC:
- *i1 = udp->udp_unspec_source;
- break; /* goto sizeof (int) option return */
- case IP_BROADCAST_TTL:
- *(uchar_t *)ptr = connp->conn_broadcast_ttl;
- return (sizeof (uchar_t));
- default:
- return (-1);
- }
- break;
- case IPPROTO_IPV6:
- if (udp->udp_family != AF_INET6)
- return (-1);
- switch (name) {
- case IPV6_UNICAST_HOPS:
- *i1 = (unsigned int)udp->udp_ttl;
- break; /* goto sizeof (int) option return */
- case IPV6_MULTICAST_IF:
- /* 0 index if not set */
- *i1 = udp->udp_multicast_if_index;
- break; /* goto sizeof (int) option return */
- case IPV6_MULTICAST_HOPS:
- *i1 = udp->udp_multicast_ttl;
- break; /* goto sizeof (int) option return */
- case IPV6_MULTICAST_LOOP:
- *i1 = connp->conn_multicast_loop;
- break; /* goto sizeof (int) option return */
- case IPV6_JOIN_GROUP:
- case IPV6_LEAVE_GROUP:
- case MCAST_JOIN_GROUP:
- case MCAST_LEAVE_GROUP:
- case MCAST_BLOCK_SOURCE:
- case MCAST_UNBLOCK_SOURCE:
- case MCAST_JOIN_SOURCE_GROUP:
- case MCAST_LEAVE_SOURCE_GROUP:
- /* cannot "get" the value for these */
- return (-1);
- case IPV6_BOUND_IF:
- /* Zero if not set */
- *i1 = udp->udp_bound_if;
- break; /* goto sizeof (int) option return */
- case IPV6_UNSPEC_SRC:
- *i1 = udp->udp_unspec_source;
- break; /* goto sizeof (int) option return */
- case IPV6_RECVPKTINFO:
- *i1 = udp->udp_ip_recvpktinfo;
- break; /* goto sizeof (int) option return */
- case IPV6_RECVTCLASS:
- *i1 = udp->udp_ipv6_recvtclass;
- break; /* goto sizeof (int) option return */
- case IPV6_RECVPATHMTU:
- *i1 = udp->udp_ipv6_recvpathmtu;
- break; /* goto sizeof (int) option return */
- case IPV6_RECVHOPLIMIT:
- *i1 = udp->udp_ipv6_recvhoplimit;
- break; /* goto sizeof (int) option return */
- case IPV6_RECVHOPOPTS:
- *i1 = udp->udp_ipv6_recvhopopts;
- break; /* goto sizeof (int) option return */
- case IPV6_RECVDSTOPTS:
- *i1 = udp->udp_ipv6_recvdstopts;
- break; /* goto sizeof (int) option return */
- case _OLD_IPV6_RECVDSTOPTS:
- *i1 = udp->udp_old_ipv6_recvdstopts;
- break; /* goto sizeof (int) option return */
- case IPV6_RECVRTHDRDSTOPTS:
- *i1 = udp->udp_ipv6_recvrthdrdstopts;
- break; /* goto sizeof (int) option return */
- case IPV6_RECVRTHDR:
- *i1 = udp->udp_ipv6_recvrthdr;
- break; /* goto sizeof (int) option return */
- case IPV6_PKTINFO: {
- /* XXX assumes that caller has room for max size! */
- struct in6_pktinfo *pkti;
-
- pkti = (struct in6_pktinfo *)ptr;
- if (ipp->ipp_fields & IPPF_IFINDEX)
- pkti->ipi6_ifindex = ipp->ipp_ifindex;
- else
- pkti->ipi6_ifindex = 0;
- if (ipp->ipp_fields & IPPF_ADDR)
- pkti->ipi6_addr = ipp->ipp_addr;
- else
- pkti->ipi6_addr = ipv6_all_zeros;
- return (sizeof (struct in6_pktinfo));
- }
- case IPV6_TCLASS:
- if (ipp->ipp_fields & IPPF_TCLASS)
- *i1 = ipp->ipp_tclass;
- else
- *i1 = IPV6_FLOW_TCLASS(
- IPV6_DEFAULT_VERS_AND_FLOW);
- break; /* goto sizeof (int) option return */
- case IPV6_NEXTHOP: {
- sin6_t *sin6 = (sin6_t *)ptr;
-
- if (!(ipp->ipp_fields & IPPF_NEXTHOP))
- return (0);
- *sin6 = sin6_null;
- sin6->sin6_family = AF_INET6;
- sin6->sin6_addr = ipp->ipp_nexthop;
- return (sizeof (sin6_t));
- }
- case IPV6_HOPOPTS:
- if (!(ipp->ipp_fields & IPPF_HOPOPTS))
- return (0);
- if (ipp->ipp_hopoptslen <= udp->udp_label_len_v6)
+ mutex_enter(&connp->conn_lock);
+ if (!(udp->udp_recv_ipp.ipp_fields &
+ IPPF_IPV4_OPTIONS)) {
+ mutex_exit(&connp->conn_lock);
return (0);
- /*
- * The cipso/label option is added by kernel.
- * User is not usually aware of this option.
- * We copy out the hbh opt after the label option.
- */
- bcopy((char *)ipp->ipp_hopopts + udp->udp_label_len_v6,
- ptr, ipp->ipp_hopoptslen - udp->udp_label_len_v6);
- if (udp->udp_label_len_v6 > 0) {
- ptr[0] = ((char *)ipp->ipp_hopopts)[0];
- ptr[1] = (ipp->ipp_hopoptslen -
- udp->udp_label_len_v6 + 7) / 8 - 1;
}
- return (ipp->ipp_hopoptslen - udp->udp_label_len_v6);
- case IPV6_RTHDRDSTOPTS:
- if (!(ipp->ipp_fields & IPPF_RTDSTOPTS))
- return (0);
- bcopy(ipp->ipp_rtdstopts, ptr, ipp->ipp_rtdstoptslen);
- return (ipp->ipp_rtdstoptslen);
- case IPV6_RTHDR:
- if (!(ipp->ipp_fields & IPPF_RTHDR))
- return (0);
- bcopy(ipp->ipp_rthdr, ptr, ipp->ipp_rthdrlen);
- return (ipp->ipp_rthdrlen);
- case IPV6_DSTOPTS:
- if (!(ipp->ipp_fields & IPPF_DSTOPTS))
- return (0);
- bcopy(ipp->ipp_dstopts, ptr, ipp->ipp_dstoptslen);
- return (ipp->ipp_dstoptslen);
- case IPV6_PATHMTU:
- return (ip_fill_mtuinfo(&udp->udp_v6dst,
- udp->udp_dstport, (struct ip6_mtuinfo *)ptr,
- us->us_netstack));
- default:
- return (-1);
+
+ len = udp->udp_recv_ipp.ipp_ipv4_options_len;
+ ASSERT(len != 0);
+ bcopy(udp->udp_recv_ipp.ipp_ipv4_options, ptr, len);
+ mutex_exit(&connp->conn_lock);
+ return (len);
}
break;
case IPPROTO_UDP:
switch (name) {
- case UDP_ANONPRIVBIND:
- *i1 = udp->udp_anon_priv_bind;
- break;
- case UDP_EXCLBIND:
- *i1 = udp->udp_exclbind ? UDP_EXCLBIND : 0;
- break;
- case UDP_RCVHDR:
- *i1 = udp->udp_rcvhdr ? 1 : 0;
- break;
case UDP_NAT_T_ENDPOINT:
+ mutex_enter(&connp->conn_lock);
*i1 = udp->udp_nat_t_endpoint;
- break;
- default:
- return (-1);
+ mutex_exit(&connp->conn_lock);
+ return (sizeof (int));
+ case UDP_RCVHDR:
+ mutex_enter(&connp->conn_lock);
+ *i1 = udp->udp_rcvhdr ? 1 : 0;
+ mutex_exit(&connp->conn_lock);
+ return (sizeof (int));
}
- break;
- default:
- return (-1);
}
- return (sizeof (int));
+ mutex_enter(&connp->conn_lock);
+ retval = conn_opt_get(&coas, level, name, ptr);
+ mutex_exit(&connp->conn_lock);
+ return (retval);
}
+/*
+ * This routine retrieves the current status of socket options.
+ * It returns the size of the option retrieved, or -1.
+ */
int
udp_tpi_opt_get(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr)
{
- udp_t *udp;
- int err;
-
- udp = Q_TO_UDP(q);
+ conn_t *connp = Q_TO_CONN(q);
+ int err;
- rw_enter(&udp->udp_rwlock, RW_READER);
- err = udp_opt_get(Q_TO_CONN(q), level, name, ptr);
- rw_exit(&udp->udp_rwlock);
+ err = udp_opt_get(connp, level, name, ptr);
return (err);
}
/*
* This routine sets socket options.
*/
-/* ARGSUSED */
-static int
-udp_do_opt_set(conn_t *connp, int level, int name, uint_t inlen,
- uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, cred_t *cr,
- void *thisdg_attrs, boolean_t checkonly)
+int
+udp_do_opt_set(conn_opt_arg_t *coa, int level, int name,
+ uint_t inlen, uchar_t *invalp, cred_t *cr, boolean_t checkonly)
{
- udpattrs_t *attrs = thisdg_attrs;
- int *i1 = (int *)invalp;
- boolean_t onoff = (*i1 == 0) ? 0 : 1;
- udp_t *udp = connp->conn_udp;
+ conn_t *connp = coa->coa_connp;
+ ip_xmit_attr_t *ixa = coa->coa_ixa;
+ udp_t *udp = connp->conn_udp;
udp_stack_t *us = udp->udp_us;
- int error;
- uint_t newlen;
- size_t sth_wroff;
+ int *i1 = (int *)invalp;
+ boolean_t onoff = (*i1 == 0) ? 0 : 1;
+ int error;
- ASSERT(RW_WRITE_HELD(&udp->udp_rwlock));
+ ASSERT(MUTEX_NOT_HELD(&coa->coa_connp->conn_lock));
/*
- * For fixed length options, no sanity check
- * of passed in length is done. It is assumed *_optcom_req()
- * routines do the right thing.
+ * First do UDP specific sanity checks and handle UDP specific
+ * options. Note that some IPPROTO_UDP options are handled
+ * by conn_opt_set.
*/
switch (level) {
case SOL_SOCKET:
switch (name) {
- case SO_REUSEADDR:
- if (!checkonly) {
- udp->udp_reuseaddr = onoff;
- PASS_OPT_TO_IP(connp);
- }
- break;
- case SO_DEBUG:
- if (!checkonly)
- udp->udp_debug = onoff;
- break;
- /*
- * The following three items are available here,
- * but are only meaningful to IP.
- */
- case SO_DONTROUTE:
- if (!checkonly) {
- udp->udp_dontroute = onoff;
- PASS_OPT_TO_IP(connp);
- }
- break;
- case SO_USELOOPBACK:
- if (!checkonly) {
- udp->udp_useloopback = onoff;
- PASS_OPT_TO_IP(connp);
- }
- break;
- case SO_BROADCAST:
- if (!checkonly) {
- udp->udp_broadcast = onoff;
- PASS_OPT_TO_IP(connp);
- }
- break;
-
case SO_SNDBUF:
if (*i1 > us->us_max_buf) {
- *outlenp = 0;
return (ENOBUFS);
}
- if (!checkonly) {
- udp->udp_xmit_hiwat = *i1;
- connp->conn_wq->q_hiwat = *i1;
- }
break;
case SO_RCVBUF:
if (*i1 > us->us_max_buf) {
- *outlenp = 0;
return (ENOBUFS);
}
- if (!checkonly) {
- int size;
-
- udp->udp_rcv_disply_hiwat = *i1;
- size = udp_set_rcv_hiwat(udp, *i1);
- rw_exit(&udp->udp_rwlock);
- (void) proto_set_rx_hiwat(connp->conn_rq, connp,
- size);
- rw_enter(&udp->udp_rwlock, RW_WRITER);
- }
- break;
- case SO_DGRAM_ERRIND:
- if (!checkonly)
- udp->udp_dgram_errind = onoff;
- break;
- case SO_RECVUCRED:
- if (!checkonly)
- udp->udp_recvucred = onoff;
- break;
- case SO_ALLZONES:
- /*
- * "soft" error (negative)
- * option not handled at this level
- * Do not modify *outlenp.
- */
- return (-EINVAL);
- case SO_TIMESTAMP:
- if (!checkonly)
- udp->udp_timestamp = onoff;
- break;
- case SO_ANON_MLP:
- case SO_MAC_EXEMPT:
- case SO_MAC_IMPLICIT:
- PASS_OPT_TO_IP(connp);
break;
+
case SCM_UCRED: {
struct ucred_s *ucr;
- cred_t *cr, *newcr;
+ cred_t *newcr;
ts_label_t *tsl;
/*
@@ -2250,20 +1777,18 @@ udp_do_opt_set(conn_t *connp, int level, int name, uint_t inlen,
*/
if (connp->conn_mlp_type == mlptSingle)
break;
+
ucr = (struct ucred_s *)invalp;
if (inlen != ucredsize ||
ucr->uc_labeloff < sizeof (*ucr) ||
ucr->uc_labeloff + sizeof (bslabel_t) > inlen)
return (EINVAL);
if (!checkonly) {
- mblk_t *mb;
- pid_t cpid;
-
- if (attrs == NULL ||
- (mb = attrs->udpattr_mb) == NULL)
- return (EINVAL);
- if ((cr = msg_getcred(mb, &cpid)) == NULL)
- cr = udp->udp_connp->conn_cred;
+ /*
+ * Set ixa_tsl to the new label.
+ * We assume that crgetzoneid doesn't change
+ * as part of the SCM_UCRED.
+ */
ASSERT(cr != NULL);
if ((tsl = crgetlabel(cr)) == NULL)
return (EINVAL);
@@ -2271,778 +1796,75 @@ udp_do_opt_set(conn_t *connp, int level, int name, uint_t inlen,
tsl->tsl_doi, KM_NOSLEEP);
if (newcr == NULL)
return (ENOSR);
- mblk_setcred(mb, newcr, cpid);
- attrs->udpattr_credset = B_TRUE;
- crfree(newcr);
- }
- break;
- }
- case SO_EXCLBIND:
- if (!checkonly)
- udp->udp_exclbind = onoff;
- break;
- case SO_RCVTIMEO:
- case SO_SNDTIMEO:
- /*
- * Pass these two options in order for third part
- * protocol usage. Here just return directly.
- */
- return (0);
- default:
- *outlenp = 0;
- return (EINVAL);
- }
- break;
- case IPPROTO_IP:
- if (udp->udp_family != AF_INET) {
- *outlenp = 0;
- return (ENOPROTOOPT);
- }
- switch (name) {
- case IP_OPTIONS:
- case T_IP_OPTIONS:
- /* Save options for use by IP. */
- newlen = inlen + udp->udp_label_len;
- if ((inlen & 0x3) || newlen > IP_MAX_OPT_LENGTH) {
- *outlenp = 0;
- return (EINVAL);
- }
- if (checkonly)
- break;
-
- /*
- * Update the stored options taking into account
- * any CIPSO option which we should not overwrite.
- */
- if (!tsol_option_set(&udp->udp_ip_snd_options,
- &udp->udp_ip_snd_options_len,
- udp->udp_label_len, invalp, inlen)) {
- *outlenp = 0;
- return (ENOMEM);
- }
-
- udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH +
- UDPH_SIZE + udp->udp_ip_snd_options_len;
- sth_wroff = udp->udp_max_hdr_len + us->us_wroff_extra;
- rw_exit(&udp->udp_rwlock);
- (void) proto_set_tx_wroff(connp->conn_rq, connp,
- sth_wroff);
- rw_enter(&udp->udp_rwlock, RW_WRITER);
- break;
-
- case IP_TTL:
- if (!checkonly) {
- udp->udp_ttl = (uchar_t)*i1;
- }
- break;
- case IP_TOS:
- case T_IP_TOS:
- if (!checkonly) {
- udp->udp_type_of_service = (uchar_t)*i1;
- }
- break;
- case IP_MULTICAST_IF: {
- /*
- * TODO should check OPTMGMT reply and undo this if
- * there is an error.
- */
- struct in_addr *inap = (struct in_addr *)invalp;
- if (!checkonly) {
- udp->udp_multicast_if_addr =
- inap->s_addr;
- PASS_OPT_TO_IP(connp);
- }
- break;
- }
- case IP_MULTICAST_TTL:
- if (!checkonly)
- udp->udp_multicast_ttl = *invalp;
- break;
- case IP_MULTICAST_LOOP:
- if (!checkonly) {
- connp->conn_multicast_loop = *invalp;
- PASS_OPT_TO_IP(connp);
- }
- break;
- case IP_RECVOPTS:
- if (!checkonly)
- udp->udp_recvopts = onoff;
- break;
- case IP_RECVDSTADDR:
- if (!checkonly)
- udp->udp_recvdstaddr = onoff;
- break;
- case IP_RECVIF:
- if (!checkonly) {
- udp->udp_recvif = onoff;
- PASS_OPT_TO_IP(connp);
- }
- break;
- case IP_RECVSLLA:
- if (!checkonly) {
- udp->udp_recvslla = onoff;
- PASS_OPT_TO_IP(connp);
- }
- break;
- case IP_RECVTTL:
- if (!checkonly)
- udp->udp_recvttl = onoff;
- break;
- case IP_PKTINFO: {
- /*
- * This also handles IP_RECVPKTINFO.
- * IP_PKTINFO and IP_RECVPKTINFO have same value.
- * Differentiation is based on the size of the
- * argument passed in.
- */
- struct in_pktinfo *pktinfop;
- ip4_pkt_t *attr_pktinfop;
-
- if (checkonly)
- break;
-
- if (inlen == sizeof (int)) {
- /*
- * This is IP_RECVPKTINFO option.
- * Keep a local copy of whether this option is
- * set or not and pass it down to IP for
- * processing.
- */
-
- udp->udp_ip_recvpktinfo = onoff;
- return (-EINVAL);
- }
-
- if (attrs == NULL ||
- (attr_pktinfop = attrs->udpattr_ipp4) == NULL) {
+ ASSERT(newcr->cr_label != NULL);
/*
- * sticky option or no buffer to return
- * the results.
+ * Move the hold on the cr_label to ixa_tsl by
+ * setting cr_label to NULL. Then release newcr.
*/
- return (EINVAL);
- }
-
- if (inlen != sizeof (struct in_pktinfo))
- return (EINVAL);
-
- pktinfop = (struct in_pktinfo *)invalp;
-
- /*
- * At least one of the values should be specified
- */
- if (pktinfop->ipi_ifindex == 0 &&
- pktinfop->ipi_spec_dst.s_addr == INADDR_ANY) {
- return (EINVAL);
- }
-
- attr_pktinfop->ip4_addr = pktinfop->ipi_spec_dst.s_addr;
- attr_pktinfop->ip4_ill_index = pktinfop->ipi_ifindex;
-
- break;
- }
- case IP_ADD_MEMBERSHIP:
- case IP_DROP_MEMBERSHIP:
- case IP_BLOCK_SOURCE:
- case IP_UNBLOCK_SOURCE:
- case IP_ADD_SOURCE_MEMBERSHIP:
- case IP_DROP_SOURCE_MEMBERSHIP:
- case MCAST_JOIN_GROUP:
- case MCAST_LEAVE_GROUP:
- case MCAST_BLOCK_SOURCE:
- case MCAST_UNBLOCK_SOURCE:
- case MCAST_JOIN_SOURCE_GROUP:
- case MCAST_LEAVE_SOURCE_GROUP:
- case IP_SEC_OPT:
- case IP_NEXTHOP:
- case IP_DHCPINIT_IF:
- /*
- * "soft" error (negative)
- * option not handled at this level
- * Do not modify *outlenp.
- */
- return (-EINVAL);
- case IP_BOUND_IF:
- if (!checkonly) {
- udp->udp_bound_if = *i1;
- PASS_OPT_TO_IP(connp);
- }
- break;
- case IP_UNSPEC_SRC:
- if (!checkonly) {
- udp->udp_unspec_source = onoff;
- PASS_OPT_TO_IP(connp);
- }
- break;
- case IP_BROADCAST_TTL:
- if (!checkonly)
- connp->conn_broadcast_ttl = *invalp;
- break;
- default:
- *outlenp = 0;
- return (EINVAL);
- }
- break;
- case IPPROTO_IPV6: {
- ip6_pkt_t *ipp;
- boolean_t sticky;
-
- if (udp->udp_family != AF_INET6) {
- *outlenp = 0;
- return (ENOPROTOOPT);
- }
- /*
- * Deal with both sticky options and ancillary data
- */
- sticky = B_FALSE;
- if (attrs == NULL || (ipp = attrs->udpattr_ipp6) ==
- NULL) {
- /* sticky options, or none */
- ipp = &udp->udp_sticky_ipp;
- sticky = B_TRUE;
- }
-
- switch (name) {
- case IPV6_MULTICAST_IF:
- if (!checkonly) {
- udp->udp_multicast_if_index = *i1;
- PASS_OPT_TO_IP(connp);
- }
- break;
- case IPV6_UNICAST_HOPS:
- /* -1 means use default */
- if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) {
- *outlenp = 0;
- return (EINVAL);
- }
- if (!checkonly) {
- if (*i1 == -1) {
- udp->udp_ttl = ipp->ipp_unicast_hops =
- us->us_ipv6_hoplimit;
- ipp->ipp_fields &= ~IPPF_UNICAST_HOPS;
- /* Pass modified value to IP. */
- *i1 = udp->udp_ttl;
- } else {
- udp->udp_ttl = ipp->ipp_unicast_hops =
- (uint8_t)*i1;
- ipp->ipp_fields |= IPPF_UNICAST_HOPS;
- }
- /* Rebuild the header template */
- error = udp_build_hdrs(udp);
- if (error != 0) {
- *outlenp = 0;
- return (error);
- }
- }
- break;
- case IPV6_MULTICAST_HOPS:
- /* -1 means use default */
- if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) {
- *outlenp = 0;
- return (EINVAL);
- }
- if (!checkonly) {
- if (*i1 == -1) {
- udp->udp_multicast_ttl =
- ipp->ipp_multicast_hops =
- IP_DEFAULT_MULTICAST_TTL;
- ipp->ipp_fields &= ~IPPF_MULTICAST_HOPS;
- /* Pass modified value to IP. */
- *i1 = udp->udp_multicast_ttl;
- } else {
- udp->udp_multicast_ttl =
- ipp->ipp_multicast_hops =
- (uint8_t)*i1;
- ipp->ipp_fields |= IPPF_MULTICAST_HOPS;
- }
- }
- break;
- case IPV6_MULTICAST_LOOP:
- if (*i1 != 0 && *i1 != 1) {
- *outlenp = 0;
- return (EINVAL);
- }
- if (!checkonly) {
- connp->conn_multicast_loop = *i1;
- PASS_OPT_TO_IP(connp);
- }
- break;
- case IPV6_JOIN_GROUP:
- case IPV6_LEAVE_GROUP:
- case MCAST_JOIN_GROUP:
- case MCAST_LEAVE_GROUP:
- case MCAST_BLOCK_SOURCE:
- case MCAST_UNBLOCK_SOURCE:
- case MCAST_JOIN_SOURCE_GROUP:
- case MCAST_LEAVE_SOURCE_GROUP:
- /*
- * "soft" error (negative)
- * option not handled at this level
- * Note: Do not modify *outlenp
- */
- return (-EINVAL);
- case IPV6_BOUND_IF:
- if (!checkonly) {
- udp->udp_bound_if = *i1;
- PASS_OPT_TO_IP(connp);
- }
- break;
- case IPV6_UNSPEC_SRC:
- if (!checkonly) {
- udp->udp_unspec_source = onoff;
- PASS_OPT_TO_IP(connp);
- }
- break;
- /*
- * Set boolean switches for ancillary data delivery
- */
- case IPV6_RECVPKTINFO:
- if (!checkonly) {
- udp->udp_ip_recvpktinfo = onoff;
- PASS_OPT_TO_IP(connp);
- }
- break;
- case IPV6_RECVTCLASS:
- if (!checkonly) {
- udp->udp_ipv6_recvtclass = onoff;
- PASS_OPT_TO_IP(connp);
- }
- break;
- case IPV6_RECVPATHMTU:
- if (!checkonly) {
- udp->udp_ipv6_recvpathmtu = onoff;
- PASS_OPT_TO_IP(connp);
- }
- break;
- case IPV6_RECVHOPLIMIT:
- if (!checkonly) {
- udp->udp_ipv6_recvhoplimit = onoff;
- PASS_OPT_TO_IP(connp);
- }
- break;
- case IPV6_RECVHOPOPTS:
- if (!checkonly) {
- udp->udp_ipv6_recvhopopts = onoff;
- PASS_OPT_TO_IP(connp);
- }
- break;
- case IPV6_RECVDSTOPTS:
- if (!checkonly) {
- udp->udp_ipv6_recvdstopts = onoff;
- PASS_OPT_TO_IP(connp);
- }
- break;
- case _OLD_IPV6_RECVDSTOPTS:
- if (!checkonly)
- udp->udp_old_ipv6_recvdstopts = onoff;
- break;
- case IPV6_RECVRTHDRDSTOPTS:
- if (!checkonly) {
- udp->udp_ipv6_recvrthdrdstopts = onoff;
- PASS_OPT_TO_IP(connp);
- }
- break;
- case IPV6_RECVRTHDR:
- if (!checkonly) {
- udp->udp_ipv6_recvrthdr = onoff;
- PASS_OPT_TO_IP(connp);
- }
- break;
- /*
- * Set sticky options or ancillary data.
- * If sticky options, (re)build any extension headers
- * that might be needed as a result.
- */
- case IPV6_PKTINFO:
- /*
- * The source address and ifindex are verified
- * in ip_opt_set(). For ancillary data the
- * source address is checked in ip_wput_v6.
- */
- if (inlen != 0 && inlen != sizeof (struct in6_pktinfo))
- return (EINVAL);
- if (checkonly)
- break;
-
- if (inlen == 0) {
- ipp->ipp_fields &= ~(IPPF_IFINDEX|IPPF_ADDR);
- ipp->ipp_sticky_ignored |=
- (IPPF_IFINDEX|IPPF_ADDR);
- } else {
- struct in6_pktinfo *pkti;
-
- pkti = (struct in6_pktinfo *)invalp;
- ipp->ipp_ifindex = pkti->ipi6_ifindex;
- ipp->ipp_addr = pkti->ipi6_addr;
- if (ipp->ipp_ifindex != 0)
- ipp->ipp_fields |= IPPF_IFINDEX;
- else
- ipp->ipp_fields &= ~IPPF_IFINDEX;
- if (!IN6_IS_ADDR_UNSPECIFIED(
- &ipp->ipp_addr))
- ipp->ipp_fields |= IPPF_ADDR;
- else
- ipp->ipp_fields &= ~IPPF_ADDR;
- }
- if (sticky) {
- error = udp_build_hdrs(udp);
- if (error != 0)
- return (error);
- PASS_OPT_TO_IP(connp);
- }
- break;
- case IPV6_HOPLIMIT:
- if (sticky)
- return (EINVAL);
- if (inlen != 0 && inlen != sizeof (int))
- return (EINVAL);
- if (checkonly)
- break;
-
- if (inlen == 0) {
- ipp->ipp_fields &= ~IPPF_HOPLIMIT;
- ipp->ipp_sticky_ignored |= IPPF_HOPLIMIT;
- } else {
- if (*i1 > 255 || *i1 < -1)
- return (EINVAL);
- if (*i1 == -1)
- ipp->ipp_hoplimit =
- us->us_ipv6_hoplimit;
- else
- ipp->ipp_hoplimit = *i1;
- ipp->ipp_fields |= IPPF_HOPLIMIT;
- }
- break;
- case IPV6_TCLASS:
- if (inlen != 0 && inlen != sizeof (int))
- return (EINVAL);
- if (checkonly)
- break;
-
- if (inlen == 0) {
- ipp->ipp_fields &= ~IPPF_TCLASS;
- ipp->ipp_sticky_ignored |= IPPF_TCLASS;
- } else {
- if (*i1 > 255 || *i1 < -1)
- return (EINVAL);
- if (*i1 == -1)
- ipp->ipp_tclass = 0;
- else
- ipp->ipp_tclass = *i1;
- ipp->ipp_fields |= IPPF_TCLASS;
- }
- if (sticky) {
- error = udp_build_hdrs(udp);
- if (error != 0)
- return (error);
- }
- break;
- case IPV6_NEXTHOP:
- /*
- * IP will verify that the nexthop is reachable
- * and fail for sticky options.
- */
- if (inlen != 0 && inlen != sizeof (sin6_t))
- return (EINVAL);
- if (checkonly)
- break;
-
- if (inlen == 0) {
- ipp->ipp_fields &= ~IPPF_NEXTHOP;
- ipp->ipp_sticky_ignored |= IPPF_NEXTHOP;
- } else {
- sin6_t *sin6 = (sin6_t *)invalp;
-
- if (sin6->sin6_family != AF_INET6) {
- return (EAFNOSUPPORT);
- }
- if (IN6_IS_ADDR_V4MAPPED(
- &sin6->sin6_addr))
- return (EADDRNOTAVAIL);
- ipp->ipp_nexthop = sin6->sin6_addr;
- if (!IN6_IS_ADDR_UNSPECIFIED(
- &ipp->ipp_nexthop))
- ipp->ipp_fields |= IPPF_NEXTHOP;
- else
- ipp->ipp_fields &= ~IPPF_NEXTHOP;
- }
- if (sticky) {
- error = udp_build_hdrs(udp);
- if (error != 0)
- return (error);
- PASS_OPT_TO_IP(connp);
- }
- break;
- case IPV6_HOPOPTS: {
- ip6_hbh_t *hopts = (ip6_hbh_t *)invalp;
- /*
- * Sanity checks - minimum size, size a multiple of
- * eight bytes, and matching size passed in.
- */
- if (inlen != 0 &&
- inlen != (8 * (hopts->ip6h_len + 1)))
- return (EINVAL);
-
- if (checkonly)
- break;
-
- error = optcom_pkt_set(invalp, inlen, sticky,
- (uchar_t **)&ipp->ipp_hopopts,
- &ipp->ipp_hopoptslen,
- sticky ? udp->udp_label_len_v6 : 0);
- if (error != 0)
- return (error);
- if (ipp->ipp_hopoptslen == 0) {
- ipp->ipp_fields &= ~IPPF_HOPOPTS;
- ipp->ipp_sticky_ignored |= IPPF_HOPOPTS;
- } else {
- ipp->ipp_fields |= IPPF_HOPOPTS;
- }
- if (sticky) {
- error = udp_build_hdrs(udp);
- if (error != 0)
- return (error);
- }
- break;
- }
- case IPV6_RTHDRDSTOPTS: {
- ip6_dest_t *dopts = (ip6_dest_t *)invalp;
-
- /*
- * Sanity checks - minimum size, size a multiple of
- * eight bytes, and matching size passed in.
- */
- if (inlen != 0 &&
- inlen != (8 * (dopts->ip6d_len + 1)))
- return (EINVAL);
-
- if (checkonly)
- break;
-
- if (inlen == 0) {
- if (sticky &&
- (ipp->ipp_fields & IPPF_RTDSTOPTS) != 0) {
- kmem_free(ipp->ipp_rtdstopts,
- ipp->ipp_rtdstoptslen);
- ipp->ipp_rtdstopts = NULL;
- ipp->ipp_rtdstoptslen = 0;
- }
-
- ipp->ipp_fields &= ~IPPF_RTDSTOPTS;
- ipp->ipp_sticky_ignored |= IPPF_RTDSTOPTS;
- } else {
- error = optcom_pkt_set(invalp, inlen, sticky,
- (uchar_t **)&ipp->ipp_rtdstopts,
- &ipp->ipp_rtdstoptslen, 0);
- if (error != 0)
- return (error);
- ipp->ipp_fields |= IPPF_RTDSTOPTS;
- }
- if (sticky) {
- error = udp_build_hdrs(udp);
- if (error != 0)
- return (error);
- }
- break;
- }
- case IPV6_DSTOPTS: {
- ip6_dest_t *dopts = (ip6_dest_t *)invalp;
-
- /*
- * Sanity checks - minimum size, size a multiple of
- * eight bytes, and matching size passed in.
- */
- if (inlen != 0 &&
- inlen != (8 * (dopts->ip6d_len + 1)))
- return (EINVAL);
-
- if (checkonly)
- break;
-
- if (inlen == 0) {
- if (sticky &&
- (ipp->ipp_fields & IPPF_DSTOPTS) != 0) {
- kmem_free(ipp->ipp_dstopts,
- ipp->ipp_dstoptslen);
- ipp->ipp_dstopts = NULL;
- ipp->ipp_dstoptslen = 0;
- }
- ipp->ipp_fields &= ~IPPF_DSTOPTS;
- ipp->ipp_sticky_ignored |= IPPF_DSTOPTS;
- } else {
- error = optcom_pkt_set(invalp, inlen, sticky,
- (uchar_t **)&ipp->ipp_dstopts,
- &ipp->ipp_dstoptslen, 0);
- if (error != 0)
- return (error);
- ipp->ipp_fields |= IPPF_DSTOPTS;
- }
- if (sticky) {
- error = udp_build_hdrs(udp);
- if (error != 0)
- return (error);
- }
- break;
- }
- case IPV6_RTHDR: {
- ip6_rthdr_t *rt = (ip6_rthdr_t *)invalp;
-
- /*
- * Sanity checks - minimum size, size a multiple of
- * eight bytes, and matching size passed in.
- */
- if (inlen != 0 &&
- inlen != (8 * (rt->ip6r_len + 1)))
- return (EINVAL);
-
- if (checkonly)
- break;
-
- if (inlen == 0) {
- if (sticky &&
- (ipp->ipp_fields & IPPF_RTHDR) != 0) {
- kmem_free(ipp->ipp_rthdr,
- ipp->ipp_rthdrlen);
- ipp->ipp_rthdr = NULL;
- ipp->ipp_rthdrlen = 0;
- }
- ipp->ipp_fields &= ~IPPF_RTHDR;
- ipp->ipp_sticky_ignored |= IPPF_RTHDR;
- } else {
- error = optcom_pkt_set(invalp, inlen, sticky,
- (uchar_t **)&ipp->ipp_rthdr,
- &ipp->ipp_rthdrlen, 0);
- if (error != 0)
- return (error);
- ipp->ipp_fields |= IPPF_RTHDR;
- }
- if (sticky) {
- error = udp_build_hdrs(udp);
- if (error != 0)
- return (error);
+ ip_xmit_attr_replace_tsl(ixa, newcr->cr_label);
+ ixa->ixa_flags |= IXAF_UCRED_TSL;
+ newcr->cr_label = NULL;
+ crfree(newcr);
+ coa->coa_changed |= COA_HEADER_CHANGED;
+ coa->coa_changed |= COA_WROFF_CHANGED;
}
- break;
+ /* Fully handled this option. */
+ return (0);
}
-
- case IPV6_DONTFRAG:
- if (checkonly)
- break;
-
- if (onoff) {
- ipp->ipp_fields |= IPPF_DONTFRAG;
- } else {
- ipp->ipp_fields &= ~IPPF_DONTFRAG;
- }
- break;
-
- case IPV6_USE_MIN_MTU:
- if (inlen != sizeof (int))
- return (EINVAL);
-
- if (*i1 < -1 || *i1 > 1)
- return (EINVAL);
-
- if (checkonly)
- break;
-
- ipp->ipp_fields |= IPPF_USE_MIN_MTU;
- ipp->ipp_use_min_mtu = *i1;
- break;
-
- case IPV6_SEC_OPT:
- case IPV6_SRC_PREFERENCES:
- case IPV6_V6ONLY:
- /* Handled at the IP level */
- return (-EINVAL);
- default:
- *outlenp = 0;
- return (EINVAL);
}
break;
- } /* end IPPROTO_IPV6 */
case IPPROTO_UDP:
switch (name) {
- case UDP_ANONPRIVBIND:
- if ((error = secpolicy_net_privaddr(cr, 0,
- IPPROTO_UDP)) != 0) {
- *outlenp = 0;
- return (error);
- }
- if (!checkonly) {
- udp->udp_anon_priv_bind = onoff;
- }
- break;
- case UDP_EXCLBIND:
- if (!checkonly)
- udp->udp_exclbind = onoff;
- break;
- case UDP_RCVHDR:
- if (!checkonly)
- udp->udp_rcvhdr = onoff;
- break;
case UDP_NAT_T_ENDPOINT:
if ((error = secpolicy_ip_config(cr, B_FALSE)) != 0) {
- *outlenp = 0;
return (error);
}
/*
- * Use udp_family instead so we can avoid ambiguitites
+ * Use conn_family instead so we can avoid ambiguitites
* with AF_INET6 sockets that may switch from IPv4
* to IPv6.
*/
- if (udp->udp_family != AF_INET) {
- *outlenp = 0;
+ if (connp->conn_family != AF_INET) {
return (EAFNOSUPPORT);
}
if (!checkonly) {
- int size;
-
+ mutex_enter(&connp->conn_lock);
udp->udp_nat_t_endpoint = onoff;
-
- udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH +
- UDPH_SIZE + udp->udp_ip_snd_options_len;
-
- /* Also, adjust wroff */
- if (onoff) {
- udp->udp_max_hdr_len +=
- sizeof (uint32_t);
- }
- size = udp->udp_max_hdr_len +
- us->us_wroff_extra;
- (void) proto_set_tx_wroff(connp->conn_rq, connp,
- size);
+ mutex_exit(&connp->conn_lock);
+ coa->coa_changed |= COA_HEADER_CHANGED;
+ coa->coa_changed |= COA_WROFF_CHANGED;
}
- break;
- default:
- *outlenp = 0;
- return (EINVAL);
+ /* Fully handled this option. */
+ return (0);
+ case UDP_RCVHDR:
+ mutex_enter(&connp->conn_lock);
+ udp->udp_rcvhdr = onoff;
+ mutex_exit(&connp->conn_lock);
+ return (0);
}
break;
- default:
- *outlenp = 0;
- return (EINVAL);
- }
- /*
- * Common case of OK return with outval same as inval.
- */
- if (invalp != outvalp) {
- /* don't trust bcopy for identical src/dst */
- (void) bcopy(invalp, outvalp, inlen);
}
- *outlenp = inlen;
- return (0);
+ error = conn_opt_set(coa, level, name, inlen, invalp,
+ checkonly, cr);
+ return (error);
}
+/*
+ * This routine sets socket options.
+ */
int
-udp_opt_set(conn_t *connp, uint_t optset_context, int level, int name,
- uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp,
- void *thisdg_attrs, cred_t *cr)
+udp_opt_set(conn_t *connp, uint_t optset_context, int level,
+ int name, uint_t inlen, uchar_t *invalp, uint_t *outlenp,
+ uchar_t *outvalp, void *thisdg_attrs, cred_t *cr)
{
- int error;
+ udp_t *udp = connp->conn_udp;
+ int err;
+ conn_opt_arg_t coas, *coa;
boolean_t checkonly;
+ udp_stack_t *us = udp->udp_us;
- error = 0;
switch (optset_context) {
case SETFN_OPTCOM_CHECKONLY:
checkonly = B_TRUE;
@@ -3056,7 +1878,7 @@ udp_opt_set(conn_t *connp, uint_t optset_context, int level, int name,
*/
if (inlen == 0) {
*outlenp = 0;
- goto done;
+ return (0);
}
break;
case SETFN_OPTCOM_NEGOTIATE:
@@ -3074,8 +1896,7 @@ udp_opt_set(conn_t *connp, uint_t optset_context, int level, int name,
*/
if (!udp_opt_allow_udr_set(level, name)) {
*outlenp = 0;
- error = EINVAL;
- goto done;
+ return (EINVAL);
}
break;
default:
@@ -3083,99 +1904,326 @@ udp_opt_set(conn_t *connp, uint_t optset_context, int level, int name,
* We should never get here
*/
*outlenp = 0;
- error = EINVAL;
- goto done;
+ return (EINVAL);
}
ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) ||
(optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0));
- error = udp_do_opt_set(connp, level, name, inlen, invalp, outlenp,
- outvalp, cr, thisdg_attrs, checkonly);
-done:
- return (error);
+ if (thisdg_attrs != NULL) {
+ /* Options from T_UNITDATA_REQ */
+ coa = (conn_opt_arg_t *)thisdg_attrs;
+ ASSERT(coa->coa_connp == connp);
+ ASSERT(coa->coa_ixa != NULL);
+ ASSERT(coa->coa_ipp != NULL);
+ ASSERT(coa->coa_ancillary);
+ } else {
+ coa = &coas;
+ coas.coa_connp = connp;
+ /* Get a reference on conn_ixa to prevent concurrent mods */
+ coas.coa_ixa = conn_get_ixa(connp, B_TRUE);
+ if (coas.coa_ixa == NULL) {
+ *outlenp = 0;
+ return (ENOMEM);
+ }
+ coas.coa_ipp = &connp->conn_xmit_ipp;
+ coas.coa_ancillary = B_FALSE;
+ coas.coa_changed = 0;
+ }
+
+ err = udp_do_opt_set(coa, level, name, inlen, invalp,
+ cr, checkonly);
+ if (err != 0) {
+errout:
+ if (!coa->coa_ancillary)
+ ixa_refrele(coa->coa_ixa);
+ *outlenp = 0;
+ return (err);
+ }
+ /* Handle DHCPINIT here outside of lock */
+ if (level == IPPROTO_IP && name == IP_DHCPINIT_IF) {
+ uint_t ifindex;
+ ill_t *ill;
+
+ ifindex = *(uint_t *)invalp;
+ if (ifindex == 0) {
+ ill = NULL;
+ } else {
+ ill = ill_lookup_on_ifindex(ifindex, B_FALSE,
+ coa->coa_ixa->ixa_ipst);
+ if (ill == NULL) {
+ err = ENXIO;
+ goto errout;
+ }
+
+ mutex_enter(&ill->ill_lock);
+ if (ill->ill_state_flags & ILL_CONDEMNED) {
+ mutex_exit(&ill->ill_lock);
+ ill_refrele(ill);
+ err = ENXIO;
+ goto errout;
+ }
+ if (IS_VNI(ill)) {
+ mutex_exit(&ill->ill_lock);
+ ill_refrele(ill);
+ err = EINVAL;
+ goto errout;
+ }
+ }
+ mutex_enter(&connp->conn_lock);
+
+ if (connp->conn_dhcpinit_ill != NULL) {
+ /*
+ * We've locked the conn so conn_cleanup_ill()
+ * cannot clear conn_dhcpinit_ill -- so it's
+ * safe to access the ill.
+ */
+ ill_t *oill = connp->conn_dhcpinit_ill;
+
+ ASSERT(oill->ill_dhcpinit != 0);
+ atomic_dec_32(&oill->ill_dhcpinit);
+ ill_set_inputfn(connp->conn_dhcpinit_ill);
+ connp->conn_dhcpinit_ill = NULL;
+ }
+
+ if (ill != NULL) {
+ connp->conn_dhcpinit_ill = ill;
+ atomic_inc_32(&ill->ill_dhcpinit);
+ ill_set_inputfn(ill);
+ mutex_exit(&connp->conn_lock);
+ mutex_exit(&ill->ill_lock);
+ ill_refrele(ill);
+ } else {
+ mutex_exit(&connp->conn_lock);
+ }
+ }
+
+ /*
+ * Common case of OK return with outval same as inval.
+ */
+ if (invalp != outvalp) {
+ /* don't trust bcopy for identical src/dst */
+ (void) bcopy(invalp, outvalp, inlen);
+ }
+ *outlenp = inlen;
+
+ /*
+ * If this was not ancillary data, then we rebuild the headers,
+ * update the IRE/NCE, and IPsec as needed.
+ * Since the label depends on the destination we go through
+ * ip_set_destination first.
+ */
+ if (coa->coa_ancillary) {
+ return (0);
+ }
+
+ if (coa->coa_changed & COA_ROUTE_CHANGED) {
+ in6_addr_t saddr, faddr, nexthop;
+ in_port_t fport;
+
+ /*
+ * We clear lastdst to make sure we pick up the change
+ * next time sending.
+ * If we are connected we re-cache the information.
+ * We ignore errors to preserve BSD behavior.
+ * Note that we don't redo IPsec policy lookup here
+ * since the final destination (or source) didn't change.
+ */
+ mutex_enter(&connp->conn_lock);
+ connp->conn_v6lastdst = ipv6_all_zeros;
+
+ ip_attr_nexthop(coa->coa_ipp, coa->coa_ixa,
+ &connp->conn_faddr_v6, &nexthop);
+ saddr = connp->conn_saddr_v6;
+ faddr = connp->conn_faddr_v6;
+ fport = connp->conn_fport;
+ mutex_exit(&connp->conn_lock);
+
+ if (!IN6_IS_ADDR_UNSPECIFIED(&faddr) &&
+ !IN6_IS_ADDR_V4MAPPED_ANY(&faddr)) {
+ (void) ip_attr_connect(connp, coa->coa_ixa,
+ &saddr, &faddr, &nexthop, fport, NULL, NULL,
+ IPDF_ALLOW_MCBC | IPDF_VERIFY_DST);
+ }
+ }
+
+ ixa_refrele(coa->coa_ixa);
+
+ if (coa->coa_changed & COA_HEADER_CHANGED) {
+ /*
+ * Rebuild the header template if we are connected.
+ * Otherwise clear conn_v6lastdst so we rebuild the header
+ * in the data path.
+ */
+ mutex_enter(&connp->conn_lock);
+ if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) &&
+ !IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) {
+ err = udp_build_hdr_template(connp,
+ &connp->conn_saddr_v6, &connp->conn_faddr_v6,
+ connp->conn_fport, connp->conn_flowinfo);
+ if (err != 0) {
+ mutex_exit(&connp->conn_lock);
+ return (err);
+ }
+ } else {
+ connp->conn_v6lastdst = ipv6_all_zeros;
+ }
+ mutex_exit(&connp->conn_lock);
+ }
+ if (coa->coa_changed & COA_RCVBUF_CHANGED) {
+ (void) proto_set_rx_hiwat(connp->conn_rq, connp,
+ connp->conn_rcvbuf);
+ }
+ if ((coa->coa_changed & COA_SNDBUF_CHANGED) && !IPCL_IS_NONSTR(connp)) {
+ connp->conn_wq->q_hiwat = connp->conn_sndbuf;
+ }
+ if (coa->coa_changed & COA_WROFF_CHANGED) {
+ /* Increase wroff if needed */
+ uint_t wroff;
+
+ mutex_enter(&connp->conn_lock);
+ wroff = connp->conn_ht_iphc_allocated + us->us_wroff_extra;
+ if (udp->udp_nat_t_endpoint)
+ wroff += sizeof (uint32_t);
+ if (wroff > connp->conn_wroff) {
+ connp->conn_wroff = wroff;
+ mutex_exit(&connp->conn_lock);
+ (void) proto_set_tx_wroff(connp->conn_rq, connp, wroff);
+ } else {
+ mutex_exit(&connp->conn_lock);
+ }
+ }
+ return (err);
}
-/* ARGSUSED */
+/* This routine sets socket options. */
int
udp_tpi_opt_set(queue_t *q, uint_t optset_context, int level, int name,
uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp,
- void *thisdg_attrs, cred_t *cr, mblk_t *mblk)
+ void *thisdg_attrs, cred_t *cr)
{
- conn_t *connp = Q_TO_CONN(q);
+ conn_t *connp = Q_TO_CONN(q);
int error;
- udp_t *udp = connp->conn_udp;
- rw_enter(&udp->udp_rwlock, RW_WRITER);
error = udp_opt_set(connp, optset_context, level, name, inlen, invalp,
outlenp, outvalp, thisdg_attrs, cr);
- rw_exit(&udp->udp_rwlock);
return (error);
}
/*
- * Update udp_sticky_hdrs based on udp_sticky_ipp, udp_v6src, and udp_ttl.
- * The headers include ip6i_t (if needed), ip6_t, any sticky extension
- * headers, and the udp header.
- * Returns failure if can't allocate memory.
+ * Setup IP and UDP headers.
+ * Returns NULL on allocation failure, in which case data_mp is freed.
*/
-static int
-udp_build_hdrs(udp_t *udp)
+mblk_t *
+udp_prepend_hdr(conn_t *connp, ip_xmit_attr_t *ixa, const ip_pkt_t *ipp,
+ const in6_addr_t *v6src, const in6_addr_t *v6dst, in_port_t dstport,
+ uint32_t flowinfo, mblk_t *data_mp, int *errorp)
{
- udp_stack_t *us = udp->udp_us;
- uchar_t *hdrs;
- uint_t hdrs_len;
- ip6_t *ip6h;
- ip6i_t *ip6i;
- udpha_t *udpha;
- ip6_pkt_t *ipp = &udp->udp_sticky_ipp;
- size_t sth_wroff;
- conn_t *connp = udp->udp_connp;
-
- ASSERT(RW_WRITE_HELD(&udp->udp_rwlock));
- ASSERT(connp != NULL);
+ mblk_t *mp;
+ udpha_t *udpha;
+ udp_stack_t *us = connp->conn_netstack->netstack_udp;
+ uint_t data_len;
+ uint32_t cksum;
+ udp_t *udp = connp->conn_udp;
+ boolean_t insert_spi = udp->udp_nat_t_endpoint;
+ uint_t ulp_hdr_len;
- hdrs_len = ip_total_hdrs_len_v6(ipp) + UDPH_SIZE;
- ASSERT(hdrs_len != 0);
- if (hdrs_len != udp->udp_sticky_hdrs_len) {
- /* Need to reallocate */
- hdrs = kmem_alloc(hdrs_len, KM_NOSLEEP);
- if (hdrs == NULL)
- return (ENOMEM);
+ data_len = msgdsize(data_mp);
+ ulp_hdr_len = UDPH_SIZE;
+ if (insert_spi)
+ ulp_hdr_len += sizeof (uint32_t);
- if (udp->udp_sticky_hdrs_len != 0) {
- kmem_free(udp->udp_sticky_hdrs,
- udp->udp_sticky_hdrs_len);
- }
- udp->udp_sticky_hdrs = hdrs;
- udp->udp_sticky_hdrs_len = hdrs_len;
+ mp = conn_prepend_hdr(ixa, ipp, v6src, v6dst, IPPROTO_UDP, flowinfo,
+ ulp_hdr_len, data_mp, data_len, us->us_wroff_extra, &cksum, errorp);
+ if (mp == NULL) {
+ ASSERT(*errorp != 0);
+ return (NULL);
}
- ip_build_hdrs_v6(udp->udp_sticky_hdrs,
- udp->udp_sticky_hdrs_len - UDPH_SIZE, ipp, IPPROTO_UDP);
- /* Set header fields not in ipp */
- if (ipp->ipp_fields & IPPF_HAS_IP6I) {
- ip6i = (ip6i_t *)udp->udp_sticky_hdrs;
- ip6h = (ip6_t *)&ip6i[1];
+ data_len += ulp_hdr_len;
+ ixa->ixa_pktlen = data_len + ixa->ixa_ip_hdr_length;
+
+ udpha = (udpha_t *)(mp->b_rptr + ixa->ixa_ip_hdr_length);
+ udpha->uha_src_port = connp->conn_lport;
+ udpha->uha_dst_port = dstport;
+ udpha->uha_checksum = 0;
+ udpha->uha_length = htons(data_len);
+
+ /*
+ * If there was a routing option/header then conn_prepend_hdr
+ * has massaged it and placed the pseudo-header checksum difference
+ * in the cksum argument.
+ *
+ * Setup header length and prepare for ULP checksum done in IP.
+ *
+ * We make it easy for IP to include our pseudo header
+ * by putting our length in uha_checksum.
+ * The IP source, destination, and length have already been set by
+ * conn_prepend_hdr.
+ */
+ cksum += data_len;
+ cksum = (cksum >> 16) + (cksum & 0xFFFF);
+ ASSERT(cksum < 0x10000);
+
+ if (ixa->ixa_flags & IXAF_IS_IPV4) {
+ ipha_t *ipha = (ipha_t *)mp->b_rptr;
+
+ ASSERT(ntohs(ipha->ipha_length) == ixa->ixa_pktlen);
+
+ /* IP does the checksum if uha_checksum is non-zero */
+ if (us->us_do_checksum) {
+ if (cksum == 0)
+ udpha->uha_checksum = 0xffff;
+ else
+ udpha->uha_checksum = htons(cksum);
+ } else {
+ udpha->uha_checksum = 0;
+ }
} else {
- ip6h = (ip6_t *)udp->udp_sticky_hdrs;
+ ip6_t *ip6h = (ip6_t *)mp->b_rptr;
+
+ ASSERT(ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN == ixa->ixa_pktlen);
+ if (cksum == 0)
+ udpha->uha_checksum = 0xffff;
+ else
+ udpha->uha_checksum = htons(cksum);
}
- if (!(ipp->ipp_fields & IPPF_ADDR))
- ip6h->ip6_src = udp->udp_v6src;
+ /* Insert all-0s SPI now. */
+ if (insert_spi)
+ *((uint32_t *)(udpha + 1)) = 0;
- udpha = (udpha_t *)(udp->udp_sticky_hdrs + hdrs_len - UDPH_SIZE);
- udpha->uha_src_port = udp->udp_port;
+ return (mp);
+}
- /* Try to get everything in a single mblk */
- if (hdrs_len > udp->udp_max_hdr_len) {
- udp->udp_max_hdr_len = hdrs_len;
- sth_wroff = udp->udp_max_hdr_len + us->us_wroff_extra;
- rw_exit(&udp->udp_rwlock);
- (void) proto_set_tx_wroff(udp->udp_connp->conn_rq,
- udp->udp_connp, sth_wroff);
- rw_enter(&udp->udp_rwlock, RW_WRITER);
- }
+static int
+udp_build_hdr_template(conn_t *connp, const in6_addr_t *v6src,
+ const in6_addr_t *v6dst, in_port_t dstport, uint32_t flowinfo)
+{
+ udpha_t *udpha;
+ int error;
+
+ ASSERT(MUTEX_HELD(&connp->conn_lock));
+ /*
+ * We clear lastdst to make sure we don't use the lastdst path
+ * next time sending since we might not have set v6dst yet.
+ */
+ connp->conn_v6lastdst = ipv6_all_zeros;
+
+ error = conn_build_hdr_template(connp, UDPH_SIZE, 0, v6src, v6dst,
+ flowinfo);
+ if (error != 0)
+ return (error);
+
+ /*
+ * Any routing header/option has been massaged. The checksum difference
+ * is stored in conn_sum.
+ */
+ udpha = (udpha_t *)connp->conn_ht_ulp;
+ udpha->uha_src_port = connp->conn_lport;
+ udpha->uha_dst_port = dstport;
+ udpha->uha_checksum = 0;
+ udpha->uha_length = htons(UDPH_SIZE); /* Filled in later */
return (0);
}
@@ -3252,189 +2300,6 @@ udp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr)
return (0);
}
-/*
- * Copy hop-by-hop option from ipp->ipp_hopopts to the buffer provided (with
- * T_opthdr) and return the number of bytes copied. 'dbuf' may be NULL to
- * just count the length needed for allocation. If 'dbuf' is non-NULL,
- * then it's assumed to be allocated to be large enough.
- *
- * Returns zero if trimming of the security option causes all options to go
- * away.
- */
-static size_t
-copy_hop_opts(const ip6_pkt_t *ipp, uchar_t *dbuf)
-{
- struct T_opthdr *toh;
- size_t hol = ipp->ipp_hopoptslen;
- ip6_hbh_t *dstopt = NULL;
- const ip6_hbh_t *srcopt = ipp->ipp_hopopts;
- size_t tlen, olen, plen;
- boolean_t deleting;
- const struct ip6_opt *sopt, *lastpad;
- struct ip6_opt *dopt;
-
- if ((toh = (struct T_opthdr *)dbuf) != NULL) {
- toh->level = IPPROTO_IPV6;
- toh->name = IPV6_HOPOPTS;
- toh->status = 0;
- dstopt = (ip6_hbh_t *)(toh + 1);
- }
-
- /*
- * If labeling is enabled, then skip the label option
- * but get other options if there are any.
- */
- if (is_system_labeled()) {
- dopt = NULL;
- if (dstopt != NULL) {
- /* will fill in ip6h_len later */
- dstopt->ip6h_nxt = srcopt->ip6h_nxt;
- dopt = (struct ip6_opt *)(dstopt + 1);
- }
- sopt = (const struct ip6_opt *)(srcopt + 1);
- hol -= sizeof (*srcopt);
- tlen = sizeof (*dstopt);
- lastpad = NULL;
- deleting = B_FALSE;
- /*
- * This loop finds the first (lastpad pointer) of any number of
- * pads that preceeds the security option, then treats the
- * security option as though it were a pad, and then finds the
- * next non-pad option (or end of list).
- *
- * It then treats the entire block as one big pad. To preserve
- * alignment of any options that follow, or just the end of the
- * list, it computes a minimal new padding size that keeps the
- * same alignment for the next option.
- *
- * If it encounters just a sequence of pads with no security
- * option, those are copied as-is rather than collapsed.
- *
- * Note that to handle the end of list case, the code makes one
- * loop with 'hol' set to zero.
- */
- for (;;) {
- if (hol > 0) {
- if (sopt->ip6o_type == IP6OPT_PAD1) {
- if (lastpad == NULL)
- lastpad = sopt;
- sopt = (const struct ip6_opt *)
- &sopt->ip6o_len;
- hol--;
- continue;
- }
- olen = sopt->ip6o_len + sizeof (*sopt);
- if (olen > hol)
- olen = hol;
- if (sopt->ip6o_type == IP6OPT_PADN ||
- sopt->ip6o_type == ip6opt_ls) {
- if (sopt->ip6o_type == ip6opt_ls)
- deleting = B_TRUE;
- if (lastpad == NULL)
- lastpad = sopt;
- sopt = (const struct ip6_opt *)
- ((const char *)sopt + olen);
- hol -= olen;
- continue;
- }
- } else {
- /* if nothing was copied at all, then delete */
- if (tlen == sizeof (*dstopt))
- return (0);
- /* last pass; pick up any trailing padding */
- olen = 0;
- }
- if (deleting) {
- /*
- * compute aligning effect of deleted material
- * to reproduce with pad.
- */
- plen = ((const char *)sopt -
- (const char *)lastpad) & 7;
- tlen += plen;
- if (dopt != NULL) {
- if (plen == 1) {
- dopt->ip6o_type = IP6OPT_PAD1;
- } else if (plen > 1) {
- plen -= sizeof (*dopt);
- dopt->ip6o_type = IP6OPT_PADN;
- dopt->ip6o_len = plen;
- if (plen > 0)
- bzero(dopt + 1, plen);
- }
- dopt = (struct ip6_opt *)
- ((char *)dopt + plen);
- }
- deleting = B_FALSE;
- lastpad = NULL;
- }
- /* if there's uncopied padding, then copy that now */
- if (lastpad != NULL) {
- olen += (const char *)sopt -
- (const char *)lastpad;
- sopt = lastpad;
- lastpad = NULL;
- }
- if (dopt != NULL && olen > 0) {
- bcopy(sopt, dopt, olen);
- dopt = (struct ip6_opt *)((char *)dopt + olen);
- }
- if (hol == 0)
- break;
- tlen += olen;
- sopt = (const struct ip6_opt *)
- ((const char *)sopt + olen);
- hol -= olen;
- }
- /* go back and patch up the length value, rounded upward */
- if (dstopt != NULL)
- dstopt->ip6h_len = (tlen - 1) >> 3;
- } else {
- tlen = hol;
- if (dstopt != NULL)
- bcopy(srcopt, dstopt, hol);
- }
-
- tlen += sizeof (*toh);
- if (toh != NULL)
- toh->len = tlen;
-
- return (tlen);
-}
-
-/*
- * Update udp_rcv_opt_len from the packet.
- * Called when options received, and when no options received but
- * udp_ip_recv_opt_len has previously recorded options.
- */
-static void
-udp_save_ip_rcv_opt(udp_t *udp, void *opt, int opt_len)
-{
- /* Save the options if any */
- if (opt_len > 0) {
- if (opt_len > udp->udp_ip_rcv_options_len) {
- /* Need to allocate larger buffer */
- if (udp->udp_ip_rcv_options_len != 0)
- mi_free((char *)udp->udp_ip_rcv_options);
- udp->udp_ip_rcv_options_len = 0;
- udp->udp_ip_rcv_options =
- (uchar_t *)mi_alloc(opt_len, BPRI_HI);
- if (udp->udp_ip_rcv_options != NULL)
- udp->udp_ip_rcv_options_len = opt_len;
- }
- if (udp->udp_ip_rcv_options_len != 0) {
- bcopy(opt, udp->udp_ip_rcv_options, opt_len);
- /* Adjust length if we are resusing the space */
- udp->udp_ip_rcv_options_len = opt_len;
- }
- } else if (udp->udp_ip_rcv_options_len != 0) {
- /* Clear out previously recorded options */
- mi_free((char *)udp->udp_ip_rcv_options);
- udp->udp_ip_rcv_options = NULL;
- udp->udp_ip_rcv_options_len = 0;
- }
-}
-
static mblk_t *
udp_queue_fallback(udp_t *udp, mblk_t *mp)
{
@@ -3466,15 +2331,15 @@ udp_queue_fallback(udp_t *udp, mblk_t *mp)
* TPI, then we'll queue the mp for later processing.
*/
static void
-udp_ulp_recv(conn_t *connp, mblk_t *mp)
+udp_ulp_recv(conn_t *connp, mblk_t *mp, uint_t len, ip_recv_attr_t *ira)
{
if (IPCL_IS_NONSTR(connp)) {
udp_t *udp = connp->conn_udp;
int error;
+ ASSERT(len == msgdsize(mp));
if ((*connp->conn_upcalls->su_recv)
- (connp->conn_upper_handle, mp, msgdsize(mp), 0, &error,
- NULL) < 0) {
+ (connp->conn_upper_handle, mp, len, 0, &error, NULL) < 0) {
mutex_enter(&udp->udp_recv_lock);
if (error == ENOSPC) {
/*
@@ -3500,282 +2365,170 @@ udp_ulp_recv(conn_t *connp, mblk_t *mp)
}
ASSERT(MUTEX_NOT_HELD(&udp->udp_recv_lock));
} else {
+ if (is_system_labeled()) {
+ ASSERT(ira->ira_cred != NULL);
+ /*
+ * Provide for protocols above UDP such as RPC
+ * NOPID leaves db_cpid unchanged.
+ */
+ mblk_setcred(mp, ira->ira_cred, NOPID);
+ }
+
putnext(connp->conn_rq, mp);
}
}
+/*
+ * This is the inbound data path.
+ * IP has already pulled up the IP plus UDP headers and verified alignment
+ * etc.
+ */
/* ARGSUSED2 */
static void
-udp_input(void *arg1, mblk_t *mp, void *arg2)
+udp_input(void *arg1, mblk_t *mp, void *arg2, ip_recv_attr_t *ira)
{
- conn_t *connp = (conn_t *)arg1;
+ conn_t *connp = (conn_t *)arg1;
struct T_unitdata_ind *tudi;
uchar_t *rptr; /* Pointer to IP header */
int hdr_length; /* Length of IP+UDP headers */
- int opt_len;
int udi_size; /* Size of T_unitdata_ind */
- int mp_len;
+ int pkt_len;
udp_t *udp;
udpha_t *udpha;
- int ipversion;
- ip6_pkt_t ipp;
+ ip_pkt_t ipps;
ip6_t *ip6h;
- ip6i_t *ip6i;
mblk_t *mp1;
- mblk_t *options_mp = NULL;
- ip_pktinfo_t *pinfo = NULL;
- cred_t *cr = NULL;
- pid_t cpid;
- uint32_t udp_ip_rcv_options_len;
- udp_bits_t udp_bits;
- cred_t *rcr = connp->conn_cred;
- udp_stack_t *us;
+ uint32_t udp_ipv4_options_len;
+ crb_t recv_ancillary;
+ udp_stack_t *us;
ASSERT(connp->conn_flags & IPCL_UDPCONN);
udp = connp->conn_udp;
us = udp->udp_us;
rptr = mp->b_rptr;
- ASSERT(DB_TYPE(mp) == M_DATA || DB_TYPE(mp) == M_CTL);
+
+ ASSERT(DB_TYPE(mp) == M_DATA);
ASSERT(OK_32PTR(rptr));
+ ASSERT(ira->ira_pktlen == msgdsize(mp));
+ pkt_len = ira->ira_pktlen;
/*
- * IP should have prepended the options data in an M_CTL
- * Check M_CTL "type" to make sure are not here bcos of
- * a valid ICMP message
+ * Get a snapshot of these and allow other threads to change
+ * them after that. We need the same recv_ancillary when determining
+ * the size as when adding the ancillary data items.
*/
- if (DB_TYPE(mp) == M_CTL) {
- if (MBLKL(mp) == sizeof (ip_pktinfo_t) &&
- ((ip_pktinfo_t *)mp->b_rptr)->ip_pkt_ulp_type ==
- IN_PKTINFO) {
- /*
- * IP_RECVIF or IP_RECVSLLA or IPF_RECVADDR information
- * has been prepended to the packet by IP. We need to
- * extract the mblk and adjust the rptr
- */
- pinfo = (ip_pktinfo_t *)mp->b_rptr;
- options_mp = mp;
- mp = mp->b_cont;
- rptr = mp->b_rptr;
- UDP_STAT(us, udp_in_pktinfo);
- } else {
- /*
- * ICMP messages.
- */
- udp_icmp_error(connp, mp);
- return;
- }
- }
+ mutex_enter(&connp->conn_lock);
+ udp_ipv4_options_len = udp->udp_recv_ipp.ipp_ipv4_options_len;
+ recv_ancillary = connp->conn_recv_ancillary;
+ mutex_exit(&connp->conn_lock);
+
+ hdr_length = ira->ira_ip_hdr_length;
- mp_len = msgdsize(mp);
/*
- * This is the inbound data path.
- * First, we check to make sure the IP version number is correct,
- * and then pull the IP and UDP headers into the first mblk.
+ * IP inspected the UDP header thus all of it must be in the mblk.
+ * UDP length check is performed for IPv6 packets and IPv4 packets
+ * to check if the size of the packet as specified
+ * by the UDP header is the same as the length derived from the IP
+ * header.
*/
+ udpha = (udpha_t *)(rptr + hdr_length);
+ if (pkt_len != ntohs(udpha->uha_length) + hdr_length)
+ goto tossit;
- /* Initialize regardless if ipversion is IPv4 or IPv6 */
- ipp.ipp_fields = 0;
+ hdr_length += UDPH_SIZE;
+ ASSERT(MBLKL(mp) >= hdr_length); /* IP did a pullup */
- ipversion = IPH_HDR_VERSION(rptr);
+ /* Initialize regardless of IP version */
+ ipps.ipp_fields = 0;
- rw_enter(&udp->udp_rwlock, RW_READER);
- udp_ip_rcv_options_len = udp->udp_ip_rcv_options_len;
- udp_bits = udp->udp_bits;
- rw_exit(&udp->udp_rwlock);
+ if (((ira->ira_flags & IRAF_IPV4_OPTIONS) ||
+ udp_ipv4_options_len > 0) &&
+ connp->conn_family == AF_INET) {
+ int err;
- switch (ipversion) {
- case IPV4_VERSION:
- ASSERT(MBLKL(mp) >= sizeof (ipha_t));
- ASSERT(((ipha_t *)rptr)->ipha_protocol == IPPROTO_UDP);
- hdr_length = IPH_HDR_LENGTH(rptr) + UDPH_SIZE;
- opt_len = hdr_length - (IP_SIMPLE_HDR_LENGTH + UDPH_SIZE);
- if ((opt_len > 0 || udp_ip_rcv_options_len > 0) &&
- udp->udp_family == AF_INET) {
- /*
- * Record/update udp_ip_rcv_options with the lock
- * held. Not needed for AF_INET6 sockets
- * since they don't support a getsockopt of IP_OPTIONS.
- */
- rw_enter(&udp->udp_rwlock, RW_WRITER);
- udp_save_ip_rcv_opt(udp, rptr + IP_SIMPLE_HDR_LENGTH,
- opt_len);
- rw_exit(&udp->udp_rwlock);
- }
- /* Handle IPV6_RECVPKTINFO even for IPv4 packet. */
- if ((udp->udp_family == AF_INET6) && (pinfo != NULL) &&
- udp->udp_ip_recvpktinfo) {
- if (pinfo->ip_pkt_flags & IPF_RECVIF) {
- ipp.ipp_fields |= IPPF_IFINDEX;
- ipp.ipp_ifindex = pinfo->ip_pkt_ifindex;
- }
- }
- break;
- case IPV6_VERSION:
/*
- * IPv6 packets can only be received by applications
- * that are prepared to receive IPv6 addresses.
- * The IP fanout must ensure this.
+ * Record/update udp_recv_ipp with the lock
+ * held. Not needed for AF_INET6 sockets
+ * since they don't support a getsockopt of IP_OPTIONS.
*/
- ASSERT(udp->udp_family == AF_INET6);
+ mutex_enter(&connp->conn_lock);
+ err = ip_find_hdr_v4((ipha_t *)rptr, &udp->udp_recv_ipp,
+ B_TRUE);
+ if (err != 0) {
+ /* Allocation failed. Drop packet */
+ mutex_exit(&connp->conn_lock);
+ freemsg(mp);
+ BUMP_MIB(&us->us_udp_mib, udpInErrors);
+ return;
+ }
+ mutex_exit(&connp->conn_lock);
+ }
- ip6h = (ip6_t *)rptr;
- ASSERT((uchar_t *)&ip6h[1] <= mp->b_wptr);
+ if (recv_ancillary.crb_all != 0) {
+ /*
+ * Record packet information in the ip_pkt_t
+ */
+ if (ira->ira_flags & IRAF_IS_IPV4) {
+ ASSERT(IPH_HDR_VERSION(rptr) == IPV4_VERSION);
+ ASSERT(MBLKL(mp) >= sizeof (ipha_t));
+ ASSERT(((ipha_t *)rptr)->ipha_protocol == IPPROTO_UDP);
+ ASSERT(ira->ira_ip_hdr_length == IPH_HDR_LENGTH(rptr));
- if (ip6h->ip6_nxt != IPPROTO_UDP) {
+ (void) ip_find_hdr_v4((ipha_t *)rptr, &ipps, B_FALSE);
+ } else {
uint8_t nexthdrp;
- /* Look for ifindex information */
- if (ip6h->ip6_nxt == IPPROTO_RAW) {
- ip6i = (ip6i_t *)ip6h;
- if ((uchar_t *)&ip6i[1] > mp->b_wptr)
- goto tossit;
-
- if (ip6i->ip6i_flags & IP6I_IFINDEX) {
- ASSERT(ip6i->ip6i_ifindex != 0);
- ipp.ipp_fields |= IPPF_IFINDEX;
- ipp.ipp_ifindex = ip6i->ip6i_ifindex;
- }
- rptr = (uchar_t *)&ip6i[1];
- mp->b_rptr = rptr;
- if (rptr == mp->b_wptr) {
- mp1 = mp->b_cont;
- freeb(mp);
- mp = mp1;
- rptr = mp->b_rptr;
- }
- if (MBLKL(mp) < (IPV6_HDR_LEN + UDPH_SIZE))
- goto tossit;
- ip6h = (ip6_t *)rptr;
- mp_len = msgdsize(mp);
- }
+
+ ASSERT(IPH_HDR_VERSION(rptr) == IPV6_VERSION);
/*
- * Find any potentially interesting extension headers
- * as well as the length of the IPv6 + extension
- * headers.
+ * IPv6 packets can only be received by applications
+ * that are prepared to receive IPv6 addresses.
+ * The IP fanout must ensure this.
*/
- hdr_length = ip_find_hdr_v6(mp, ip6h, &ipp, &nexthdrp) +
- UDPH_SIZE;
- ASSERT(nexthdrp == IPPROTO_UDP);
- } else {
- hdr_length = IPV6_HDR_LEN + UDPH_SIZE;
- ip6i = NULL;
- }
- break;
- default:
- ASSERT(0);
- }
+ ASSERT(connp->conn_family == AF_INET6);
- /*
- * IP inspected the UDP header thus all of it must be in the mblk.
- * UDP length check is performed for IPv6 packets and IPv4 packets
- * to check if the size of the packet as specified
- * by the header is the same as the physical size of the packet.
- * FIXME? Didn't IP already check this?
- */
- udpha = (udpha_t *)(rptr + (hdr_length - UDPH_SIZE));
- if ((MBLKL(mp) < hdr_length) ||
- (mp_len != (ntohs(udpha->uha_length) + hdr_length - UDPH_SIZE))) {
- goto tossit;
- }
+ ip6h = (ip6_t *)rptr;
-
- /* Walk past the headers unless UDP_RCVHDR was set. */
- if (!udp_bits.udpb_rcvhdr) {
- mp->b_rptr = rptr + hdr_length;
- mp_len -= hdr_length;
+ /* We don't care about the length, but need the ipp */
+ hdr_length = ip_find_hdr_v6(mp, ip6h, B_TRUE, &ipps,
+ &nexthdrp);
+ ASSERT(hdr_length == ira->ira_ip_hdr_length);
+ /* Restore */
+ hdr_length = ira->ira_ip_hdr_length + UDPH_SIZE;
+ ASSERT(nexthdrp == IPPROTO_UDP);
+ }
}
/*
* This is the inbound data path. Packets are passed upstream as
- * T_UNITDATA_IND messages with full IP headers still attached.
+ * T_UNITDATA_IND messages.
*/
- if (udp->udp_family == AF_INET) {
+ if (connp->conn_family == AF_INET) {
sin_t *sin;
ASSERT(IPH_HDR_VERSION((ipha_t *)rptr) == IPV4_VERSION);
/*
* Normally only send up the source address.
- * If IP_RECVDSTADDR is set we include the destination IP
- * address as an option. With IP_RECVOPTS we include all
- * the IP options.
+ * If any ancillary data items are wanted we add those.
*/
udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t);
- if (udp_bits.udpb_recvdstaddr) {
- udi_size += sizeof (struct T_opthdr) +
- sizeof (struct in_addr);
- UDP_STAT(us, udp_in_recvdstaddr);
- }
-
- if (udp_bits.udpb_ip_recvpktinfo && (pinfo != NULL) &&
- (pinfo->ip_pkt_flags & IPF_RECVADDR)) {
- udi_size += sizeof (struct T_opthdr) +
- sizeof (struct in_pktinfo);
- UDP_STAT(us, udp_ip_rcvpktinfo);
- }
-
- if ((udp_bits.udpb_recvopts) && opt_len > 0) {
- udi_size += sizeof (struct T_opthdr) + opt_len;
- UDP_STAT(us, udp_in_recvopts);
- }
-
- /*
- * If the IP_RECVSLLA or the IP_RECVIF is set then allocate
- * space accordingly
- */
- if ((udp_bits.udpb_recvif) && (pinfo != NULL) &&
- (pinfo->ip_pkt_flags & IPF_RECVIF)) {
- udi_size += sizeof (struct T_opthdr) + sizeof (uint_t);
- UDP_STAT(us, udp_in_recvif);
- }
-
- if ((udp_bits.udpb_recvslla) && (pinfo != NULL) &&
- (pinfo->ip_pkt_flags & IPF_RECVSLLA)) {
- udi_size += sizeof (struct T_opthdr) +
- sizeof (struct sockaddr_dl);
- UDP_STAT(us, udp_in_recvslla);
- }
-
- if ((udp_bits.udpb_recvucred) &&
- (cr = msg_getcred(mp, &cpid)) != NULL) {
- udi_size += sizeof (struct T_opthdr) + ucredsize;
- UDP_STAT(us, udp_in_recvucred);
- }
-
- /*
- * If SO_TIMESTAMP is set allocate the appropriate sized
- * buffer. Since gethrestime() expects a pointer aligned
- * argument, we allocate space necessary for extra
- * alignment (even though it might not be used).
- */
- if (udp_bits.udpb_timestamp) {
- udi_size += sizeof (struct T_opthdr) +
- sizeof (timestruc_t) + _POINTER_ALIGNMENT;
- UDP_STAT(us, udp_in_timestamp);
- }
-
- /*
- * If IP_RECVTTL is set allocate the appropriate sized buffer
- */
- if (udp_bits.udpb_recvttl) {
- udi_size += sizeof (struct T_opthdr) + sizeof (uint8_t);
- UDP_STAT(us, udp_in_recvttl);
+ if (recv_ancillary.crb_all != 0) {
+ udi_size += conn_recvancillary_size(connp,
+ recv_ancillary, ira, mp, &ipps);
}
/* Allocate a message block for the T_UNITDATA_IND structure. */
mp1 = allocb(udi_size, BPRI_MED);
if (mp1 == NULL) {
freemsg(mp);
- if (options_mp != NULL)
- freeb(options_mp);
BUMP_MIB(&us->us_udp_mib, udpInErrors);
return;
}
mp1->b_cont = mp;
- mp = mp1;
- mp->b_datap->db_type = M_PROTO;
- tudi = (struct T_unitdata_ind *)mp->b_rptr;
- mp->b_wptr = (uchar_t *)tudi + udi_size;
+ mp1->b_datap->db_type = M_PROTO;
+ tudi = (struct T_unitdata_ind *)mp1->b_rptr;
+ mp1->b_wptr = (uchar_t *)tudi + udi_size;
tudi->PRIM_type = T_UNITDATA_IND;
tudi->SRC_length = sizeof (sin_t);
tudi->SRC_offset = sizeof (struct T_unitdata_ind);
@@ -3786,7 +2539,7 @@ udp_input(void *arg1, mblk_t *mp, void *arg2)
sin = (sin_t *)&tudi[1];
sin->sin_addr.s_addr = ((ipha_t *)rptr)->ipha_src;
sin->sin_port = udpha->uha_src_port;
- sin->sin_family = udp->udp_family;
+ sin->sin_family = connp->conn_family;
*(uint32_t *)&sin->sin_zero[0] = 0;
*(uint32_t *)&sin->sin_zero[4] = 0;
@@ -3795,166 +2548,8 @@ udp_input(void *arg1, mblk_t *mp, void *arg2)
* IP_RECVTTL has been set.
*/
if (udi_size != 0) {
- /*
- * Copy in destination address before options to avoid
- * any padding issues.
- */
- char *dstopt;
-
- dstopt = (char *)&sin[1];
- if (udp_bits.udpb_recvdstaddr) {
- struct T_opthdr *toh;
- ipaddr_t *dstptr;
-
- toh = (struct T_opthdr *)dstopt;
- toh->level = IPPROTO_IP;
- toh->name = IP_RECVDSTADDR;
- toh->len = sizeof (struct T_opthdr) +
- sizeof (ipaddr_t);
- toh->status = 0;
- dstopt += sizeof (struct T_opthdr);
- dstptr = (ipaddr_t *)dstopt;
- *dstptr = ((ipha_t *)rptr)->ipha_dst;
- dstopt += sizeof (ipaddr_t);
- udi_size -= toh->len;
- }
-
- if (udp_bits.udpb_recvopts && opt_len > 0) {
- struct T_opthdr *toh;
-
- toh = (struct T_opthdr *)dstopt;
- toh->level = IPPROTO_IP;
- toh->name = IP_RECVOPTS;
- toh->len = sizeof (struct T_opthdr) + opt_len;
- toh->status = 0;
- dstopt += sizeof (struct T_opthdr);
- bcopy(rptr + IP_SIMPLE_HDR_LENGTH, dstopt,
- opt_len);
- dstopt += opt_len;
- udi_size -= toh->len;
- }
-
- if ((udp_bits.udpb_ip_recvpktinfo) && (pinfo != NULL) &&
- (pinfo->ip_pkt_flags & IPF_RECVADDR)) {
- struct T_opthdr *toh;
- struct in_pktinfo *pktinfop;
-
- toh = (struct T_opthdr *)dstopt;
- toh->level = IPPROTO_IP;
- toh->name = IP_PKTINFO;
- toh->len = sizeof (struct T_opthdr) +
- sizeof (*pktinfop);
- toh->status = 0;
- dstopt += sizeof (struct T_opthdr);
- pktinfop = (struct in_pktinfo *)dstopt;
- pktinfop->ipi_ifindex = pinfo->ip_pkt_ifindex;
- pktinfop->ipi_spec_dst =
- pinfo->ip_pkt_match_addr;
- pktinfop->ipi_addr.s_addr =
- ((ipha_t *)rptr)->ipha_dst;
-
- dstopt += sizeof (struct in_pktinfo);
- udi_size -= toh->len;
- }
-
- if ((udp_bits.udpb_recvslla) && (pinfo != NULL) &&
- (pinfo->ip_pkt_flags & IPF_RECVSLLA)) {
-
- struct T_opthdr *toh;
- struct sockaddr_dl *dstptr;
-
- toh = (struct T_opthdr *)dstopt;
- toh->level = IPPROTO_IP;
- toh->name = IP_RECVSLLA;
- toh->len = sizeof (struct T_opthdr) +
- sizeof (struct sockaddr_dl);
- toh->status = 0;
- dstopt += sizeof (struct T_opthdr);
- dstptr = (struct sockaddr_dl *)dstopt;
- bcopy(&pinfo->ip_pkt_slla, dstptr,
- sizeof (struct sockaddr_dl));
- dstopt += sizeof (struct sockaddr_dl);
- udi_size -= toh->len;
- }
-
- if ((udp_bits.udpb_recvif) && (pinfo != NULL) &&
- (pinfo->ip_pkt_flags & IPF_RECVIF)) {
-
- struct T_opthdr *toh;
- uint_t *dstptr;
-
- toh = (struct T_opthdr *)dstopt;
- toh->level = IPPROTO_IP;
- toh->name = IP_RECVIF;
- toh->len = sizeof (struct T_opthdr) +
- sizeof (uint_t);
- toh->status = 0;
- dstopt += sizeof (struct T_opthdr);
- dstptr = (uint_t *)dstopt;
- *dstptr = pinfo->ip_pkt_ifindex;
- dstopt += sizeof (uint_t);
- udi_size -= toh->len;
- }
-
- if (cr != NULL) {
- struct T_opthdr *toh;
-
- toh = (struct T_opthdr *)dstopt;
- toh->level = SOL_SOCKET;
- toh->name = SCM_UCRED;
- toh->len = sizeof (struct T_opthdr) + ucredsize;
- toh->status = 0;
- dstopt += sizeof (struct T_opthdr);
- (void) cred2ucred(cr, cpid, dstopt, rcr);
- dstopt += ucredsize;
- udi_size -= toh->len;
- }
-
- if (udp_bits.udpb_timestamp) {
- struct T_opthdr *toh;
-
- toh = (struct T_opthdr *)dstopt;
- toh->level = SOL_SOCKET;
- toh->name = SCM_TIMESTAMP;
- toh->len = sizeof (struct T_opthdr) +
- sizeof (timestruc_t) + _POINTER_ALIGNMENT;
- toh->status = 0;
- dstopt += sizeof (struct T_opthdr);
- /* Align for gethrestime() */
- dstopt = (char *)P2ROUNDUP((intptr_t)dstopt,
- sizeof (intptr_t));
- gethrestime((timestruc_t *)dstopt);
- dstopt = (char *)toh + toh->len;
- udi_size -= toh->len;
- }
-
- /*
- * CAUTION:
- * Due to aligment issues
- * Processing of IP_RECVTTL option
- * should always be the last. Adding
- * any option processing after this will
- * cause alignment panic.
- */
- if (udp_bits.udpb_recvttl) {
- struct T_opthdr *toh;
- uint8_t *dstptr;
-
- toh = (struct T_opthdr *)dstopt;
- toh->level = IPPROTO_IP;
- toh->name = IP_RECVTTL;
- toh->len = sizeof (struct T_opthdr) +
- sizeof (uint8_t);
- toh->status = 0;
- dstopt += sizeof (struct T_opthdr);
- dstptr = (uint8_t *)dstopt;
- *dstptr = ((ipha_t *)rptr)->ipha_ttl;
- dstopt += sizeof (uint8_t);
- udi_size -= toh->len;
- }
-
- /* Consumed all of allocated space */
- ASSERT(udi_size == 0);
+ conn_recvancillary_add(connp, recv_ancillary, ira,
+ &ipps, (uchar_t *)&sin[1], udi_size);
}
} else {
sin6_t *sin6;
@@ -3968,89 +2563,21 @@ udp_input(void *arg1, mblk_t *mp, void *arg2)
*/
udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t);
- if (ipp.ipp_fields & (IPPF_HOPOPTS|IPPF_DSTOPTS|IPPF_RTDSTOPTS|
- IPPF_RTHDR|IPPF_IFINDEX)) {
- if ((udp_bits.udpb_ipv6_recvhopopts) &&
- (ipp.ipp_fields & IPPF_HOPOPTS)) {
- size_t hlen;
-
- UDP_STAT(us, udp_in_recvhopopts);
- hlen = copy_hop_opts(&ipp, NULL);
- if (hlen == 0)
- ipp.ipp_fields &= ~IPPF_HOPOPTS;
- udi_size += hlen;
- }
- if (((udp_bits.udpb_ipv6_recvdstopts) ||
- udp_bits.udpb_old_ipv6_recvdstopts) &&
- (ipp.ipp_fields & IPPF_DSTOPTS)) {
- udi_size += sizeof (struct T_opthdr) +
- ipp.ipp_dstoptslen;
- UDP_STAT(us, udp_in_recvdstopts);
- }
- if ((((udp_bits.udpb_ipv6_recvdstopts) &&
- udp_bits.udpb_ipv6_recvrthdr &&
- (ipp.ipp_fields & IPPF_RTHDR)) ||
- (udp_bits.udpb_ipv6_recvrthdrdstopts)) &&
- (ipp.ipp_fields & IPPF_RTDSTOPTS)) {
- udi_size += sizeof (struct T_opthdr) +
- ipp.ipp_rtdstoptslen;
- UDP_STAT(us, udp_in_recvrtdstopts);
- }
- if ((udp_bits.udpb_ipv6_recvrthdr) &&
- (ipp.ipp_fields & IPPF_RTHDR)) {
- udi_size += sizeof (struct T_opthdr) +
- ipp.ipp_rthdrlen;
- UDP_STAT(us, udp_in_recvrthdr);
- }
- if ((udp_bits.udpb_ip_recvpktinfo) &&
- (ipp.ipp_fields & IPPF_IFINDEX)) {
- udi_size += sizeof (struct T_opthdr) +
- sizeof (struct in6_pktinfo);
- UDP_STAT(us, udp_in_recvpktinfo);
- }
-
- }
- if ((udp_bits.udpb_recvucred) &&
- (cr = msg_getcred(mp, &cpid)) != NULL) {
- udi_size += sizeof (struct T_opthdr) + ucredsize;
- UDP_STAT(us, udp_in_recvucred);
- }
-
- /*
- * If SO_TIMESTAMP is set allocate the appropriate sized
- * buffer. Since gethrestime() expects a pointer aligned
- * argument, we allocate space necessary for extra
- * alignment (even though it might not be used).
- */
- if (udp_bits.udpb_timestamp) {
- udi_size += sizeof (struct T_opthdr) +
- sizeof (timestruc_t) + _POINTER_ALIGNMENT;
- UDP_STAT(us, udp_in_timestamp);
- }
-
- if (udp_bits.udpb_ipv6_recvhoplimit) {
- udi_size += sizeof (struct T_opthdr) + sizeof (int);
- UDP_STAT(us, udp_in_recvhoplimit);
- }
-
- if (udp_bits.udpb_ipv6_recvtclass) {
- udi_size += sizeof (struct T_opthdr) + sizeof (int);
- UDP_STAT(us, udp_in_recvtclass);
+ if (recv_ancillary.crb_all != 0) {
+ udi_size += conn_recvancillary_size(connp,
+ recv_ancillary, ira, mp, &ipps);
}
mp1 = allocb(udi_size, BPRI_MED);
if (mp1 == NULL) {
freemsg(mp);
- if (options_mp != NULL)
- freeb(options_mp);
BUMP_MIB(&us->us_udp_mib, udpInErrors);
return;
}
mp1->b_cont = mp;
- mp = mp1;
- mp->b_datap->db_type = M_PROTO;
- tudi = (struct T_unitdata_ind *)mp->b_rptr;
- mp->b_wptr = (uchar_t *)tudi + udi_size;
+ mp1->b_datap->db_type = M_PROTO;
+ tudi = (struct T_unitdata_ind *)mp1->b_rptr;
+ mp1->b_wptr = (uchar_t *)tudi + udi_size;
tudi->PRIM_type = T_UNITDATA_IND;
tudi->SRC_length = sizeof (sin6_t);
tudi->SRC_offset = sizeof (struct T_unitdata_ind);
@@ -4059,7 +2586,7 @@ udp_input(void *arg1, mblk_t *mp, void *arg2)
udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin6_t));
tudi->OPT_length = udi_size;
sin6 = (sin6_t *)&tudi[1];
- if (ipversion == IPV4_VERSION) {
+ if (ira->ira_flags & IRAF_IS_IPV4) {
in6_addr_t v6dst;
IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_src,
@@ -4069,196 +2596,43 @@ udp_input(void *arg1, mblk_t *mp, void *arg2)
sin6->sin6_flowinfo = 0;
sin6->sin6_scope_id = 0;
sin6->__sin6_src_id = ip_srcid_find_addr(&v6dst,
- connp->conn_zoneid, us->us_netstack);
+ IPCL_ZONEID(connp), us->us_netstack);
} else {
+ ip6h = (ip6_t *)rptr;
+
sin6->sin6_addr = ip6h->ip6_src;
/* No sin6_flowinfo per API */
sin6->sin6_flowinfo = 0;
- /* For link-scope source pass up scope id */
- if ((ipp.ipp_fields & IPPF_IFINDEX) &&
- IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src))
- sin6->sin6_scope_id = ipp.ipp_ifindex;
+ /* For link-scope pass up scope id */
+ if (IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src))
+ sin6->sin6_scope_id = ira->ira_ruifindex;
else
sin6->sin6_scope_id = 0;
sin6->__sin6_src_id = ip_srcid_find_addr(
- &ip6h->ip6_dst, connp->conn_zoneid,
+ &ip6h->ip6_dst, IPCL_ZONEID(connp),
us->us_netstack);
}
sin6->sin6_port = udpha->uha_src_port;
- sin6->sin6_family = udp->udp_family;
+ sin6->sin6_family = connp->conn_family;
if (udi_size != 0) {
- uchar_t *dstopt;
-
- dstopt = (uchar_t *)&sin6[1];
- if ((udp_bits.udpb_ip_recvpktinfo) &&
- (ipp.ipp_fields & IPPF_IFINDEX)) {
- struct T_opthdr *toh;
- struct in6_pktinfo *pkti;
-
- toh = (struct T_opthdr *)dstopt;
- toh->level = IPPROTO_IPV6;
- toh->name = IPV6_PKTINFO;
- toh->len = sizeof (struct T_opthdr) +
- sizeof (*pkti);
- toh->status = 0;
- dstopt += sizeof (struct T_opthdr);
- pkti = (struct in6_pktinfo *)dstopt;
- if (ipversion == IPV6_VERSION)
- pkti->ipi6_addr = ip6h->ip6_dst;
- else
- IN6_IPADDR_TO_V4MAPPED(
- ((ipha_t *)rptr)->ipha_dst,
- &pkti->ipi6_addr);
- pkti->ipi6_ifindex = ipp.ipp_ifindex;
- dstopt += sizeof (*pkti);
- udi_size -= toh->len;
- }
- if (udp_bits.udpb_ipv6_recvhoplimit) {
- struct T_opthdr *toh;
-
- toh = (struct T_opthdr *)dstopt;
- toh->level = IPPROTO_IPV6;
- toh->name = IPV6_HOPLIMIT;
- toh->len = sizeof (struct T_opthdr) +
- sizeof (uint_t);
- toh->status = 0;
- dstopt += sizeof (struct T_opthdr);
- if (ipversion == IPV6_VERSION)
- *(uint_t *)dstopt = ip6h->ip6_hops;
- else
- *(uint_t *)dstopt =
- ((ipha_t *)rptr)->ipha_ttl;
- dstopt += sizeof (uint_t);
- udi_size -= toh->len;
- }
- if (udp_bits.udpb_ipv6_recvtclass) {
- struct T_opthdr *toh;
-
- toh = (struct T_opthdr *)dstopt;
- toh->level = IPPROTO_IPV6;
- toh->name = IPV6_TCLASS;
- toh->len = sizeof (struct T_opthdr) +
- sizeof (uint_t);
- toh->status = 0;
- dstopt += sizeof (struct T_opthdr);
- if (ipversion == IPV6_VERSION) {
- *(uint_t *)dstopt =
- IPV6_FLOW_TCLASS(ip6h->ip6_flow);
- } else {
- ipha_t *ipha = (ipha_t *)rptr;
- *(uint_t *)dstopt =
- ipha->ipha_type_of_service;
- }
- dstopt += sizeof (uint_t);
- udi_size -= toh->len;
- }
- if ((udp_bits.udpb_ipv6_recvhopopts) &&
- (ipp.ipp_fields & IPPF_HOPOPTS)) {
- size_t hlen;
-
- hlen = copy_hop_opts(&ipp, dstopt);
- dstopt += hlen;
- udi_size -= hlen;
- }
- if ((udp_bits.udpb_ipv6_recvdstopts) &&
- (udp_bits.udpb_ipv6_recvrthdr) &&
- (ipp.ipp_fields & IPPF_RTHDR) &&
- (ipp.ipp_fields & IPPF_RTDSTOPTS)) {
- struct T_opthdr *toh;
-
- toh = (struct T_opthdr *)dstopt;
- toh->level = IPPROTO_IPV6;
- toh->name = IPV6_DSTOPTS;
- toh->len = sizeof (struct T_opthdr) +
- ipp.ipp_rtdstoptslen;
- toh->status = 0;
- dstopt += sizeof (struct T_opthdr);
- bcopy(ipp.ipp_rtdstopts, dstopt,
- ipp.ipp_rtdstoptslen);
- dstopt += ipp.ipp_rtdstoptslen;
- udi_size -= toh->len;
- }
- if ((udp_bits.udpb_ipv6_recvrthdr) &&
- (ipp.ipp_fields & IPPF_RTHDR)) {
- struct T_opthdr *toh;
-
- toh = (struct T_opthdr *)dstopt;
- toh->level = IPPROTO_IPV6;
- toh->name = IPV6_RTHDR;
- toh->len = sizeof (struct T_opthdr) +
- ipp.ipp_rthdrlen;
- toh->status = 0;
- dstopt += sizeof (struct T_opthdr);
- bcopy(ipp.ipp_rthdr, dstopt, ipp.ipp_rthdrlen);
- dstopt += ipp.ipp_rthdrlen;
- udi_size -= toh->len;
- }
- if ((udp_bits.udpb_ipv6_recvdstopts) &&
- (ipp.ipp_fields & IPPF_DSTOPTS)) {
- struct T_opthdr *toh;
-
- toh = (struct T_opthdr *)dstopt;
- toh->level = IPPROTO_IPV6;
- toh->name = IPV6_DSTOPTS;
- toh->len = sizeof (struct T_opthdr) +
- ipp.ipp_dstoptslen;
- toh->status = 0;
- dstopt += sizeof (struct T_opthdr);
- bcopy(ipp.ipp_dstopts, dstopt,
- ipp.ipp_dstoptslen);
- dstopt += ipp.ipp_dstoptslen;
- udi_size -= toh->len;
- }
- if (cr != NULL) {
- struct T_opthdr *toh;
-
- toh = (struct T_opthdr *)dstopt;
- toh->level = SOL_SOCKET;
- toh->name = SCM_UCRED;
- toh->len = sizeof (struct T_opthdr) + ucredsize;
- toh->status = 0;
- (void) cred2ucred(cr, cpid, &toh[1], rcr);
- dstopt += toh->len;
- udi_size -= toh->len;
- }
- if (udp_bits.udpb_timestamp) {
- struct T_opthdr *toh;
-
- toh = (struct T_opthdr *)dstopt;
- toh->level = SOL_SOCKET;
- toh->name = SCM_TIMESTAMP;
- toh->len = sizeof (struct T_opthdr) +
- sizeof (timestruc_t) + _POINTER_ALIGNMENT;
- toh->status = 0;
- dstopt += sizeof (struct T_opthdr);
- /* Align for gethrestime() */
- dstopt = (uchar_t *)P2ROUNDUP((intptr_t)dstopt,
- sizeof (intptr_t));
- gethrestime((timestruc_t *)dstopt);
- dstopt = (uchar_t *)toh + toh->len;
- udi_size -= toh->len;
- }
-
- /* Consumed all of allocated space */
- ASSERT(udi_size == 0);
+ conn_recvancillary_add(connp, recv_ancillary, ira,
+ &ipps, (uchar_t *)&sin6[1], udi_size);
}
-#undef sin6
- /* No IP_RECVDSTADDR for IPv6. */
}
- BUMP_MIB(&us->us_udp_mib, udpHCInDatagrams);
- if (options_mp != NULL)
- freeb(options_mp);
-
- udp_ulp_recv(connp, mp);
+ /* Walk past the headers unless IP_RECVHDR was set. */
+ if (!udp->udp_rcvhdr) {
+ mp->b_rptr = rptr + hdr_length;
+ pkt_len -= hdr_length;
+ }
+ BUMP_MIB(&us->us_udp_mib, udpHCInDatagrams);
+ udp_ulp_recv(connp, mp1, pkt_len, ira);
return;
tossit:
freemsg(mp);
- if (options_mp != NULL)
- freeb(options_mp);
BUMP_MIB(&us->us_udp_mib, udpInErrors);
}
@@ -4386,23 +2760,34 @@ udp_snmp_get(queue_t *q, mblk_t *mpctl)
needattr = B_TRUE;
break;
}
+ mutex_enter(&connp->conn_lock);
+ if (udp->udp_state == TS_DATA_XFER &&
+ connp->conn_ixa->ixa_tsl != NULL) {
+ ts_label_t *tsl;
+
+ tsl = connp->conn_ixa->ixa_tsl;
+ mlp.tme_flags |= MIB2_TMEF_IS_LABELED;
+ mlp.tme_doi = label2doi(tsl);
+ mlp.tme_label = *label2bslabel(tsl);
+ needattr = B_TRUE;
+ }
+ mutex_exit(&connp->conn_lock);
/*
* Create an IPv4 table entry for IPv4 entries and also
* any IPv6 entries which are bound to in6addr_any
* (i.e. anything a IPv4 peer could connect/send to).
*/
- if (udp->udp_ipversion == IPV4_VERSION ||
+ if (connp->conn_ipversion == IPV4_VERSION ||
(udp->udp_state <= TS_IDLE &&
- IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src))) {
+ IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6))) {
ude.udpEntryInfo.ue_state = state;
/*
* If in6addr_any this will set it to
* INADDR_ANY
*/
- ude.udpLocalAddress =
- V4_PART_OF_V6(udp->udp_v6src);
- ude.udpLocalPort = ntohs(udp->udp_port);
+ ude.udpLocalAddress = connp->conn_laddr_v4;
+ ude.udpLocalPort = ntohs(connp->conn_lport);
if (udp->udp_state == TS_DATA_XFER) {
/*
* Can potentially get here for
@@ -4414,9 +2799,9 @@ udp_snmp_get(queue_t *q, mblk_t *mpctl)
* this part of the code.
*/
ude.udpEntryInfo.ue_RemoteAddress =
- V4_PART_OF_V6(udp->udp_v6dst);
+ connp->conn_faddr_v4;
ude.udpEntryInfo.ue_RemotePort =
- ntohs(udp->udp_dstport);
+ ntohs(connp->conn_fport);
} else {
ude.udpEntryInfo.ue_RemoteAddress = 0;
ude.udpEntryInfo.ue_RemotePort = 0;
@@ -4429,10 +2814,10 @@ udp_snmp_get(queue_t *q, mblk_t *mpctl)
*/
ude.udpInstance = (uint32_t)(uintptr_t)udp;
ude.udpCreationProcess =
- (udp->udp_open_pid < 0) ?
+ (connp->conn_cpid < 0) ?
MIB2_UNKNOWN_PROCESS :
- udp->udp_open_pid;
- ude.udpCreationTime = udp->udp_open_time;
+ connp->conn_cpid;
+ ude.udpCreationTime = connp->conn_open_time;
(void) snmp_append_data2(mp_conn_ctl->b_cont,
&mp_conn_tail, (char *)&ude, sizeof (ude));
@@ -4442,16 +2827,24 @@ udp_snmp_get(queue_t *q, mblk_t *mpctl)
mp_attr_ctl->b_cont, &mp_attr_tail,
(char *)&mlp, sizeof (mlp));
}
- if (udp->udp_ipversion == IPV6_VERSION) {
+ if (connp->conn_ipversion == IPV6_VERSION) {
ude6.udp6EntryInfo.ue_state = state;
- ude6.udp6LocalAddress = udp->udp_v6src;
- ude6.udp6LocalPort = ntohs(udp->udp_port);
- ude6.udp6IfIndex = udp->udp_bound_if;
+ ude6.udp6LocalAddress = connp->conn_laddr_v6;
+ ude6.udp6LocalPort = ntohs(connp->conn_lport);
+ mutex_enter(&connp->conn_lock);
+ if (connp->conn_ixa->ixa_flags &
+ IXAF_SCOPEID_SET) {
+ ude6.udp6IfIndex =
+ connp->conn_ixa->ixa_scopeid;
+ } else {
+ ude6.udp6IfIndex = connp->conn_bound_if;
+ }
+ mutex_exit(&connp->conn_lock);
if (udp->udp_state == TS_DATA_XFER) {
ude6.udp6EntryInfo.ue_RemoteAddress =
- udp->udp_v6dst;
+ connp->conn_faddr_v6;
ude6.udp6EntryInfo.ue_RemotePort =
- ntohs(udp->udp_dstport);
+ ntohs(connp->conn_fport);
} else {
ude6.udp6EntryInfo.ue_RemoteAddress =
sin6_null.sin6_addr;
@@ -4464,10 +2857,10 @@ udp_snmp_get(queue_t *q, mblk_t *mpctl)
*/
ude6.udp6Instance = (uint32_t)(uintptr_t)udp;
ude6.udp6CreationProcess =
- (udp->udp_open_pid < 0) ?
+ (connp->conn_cpid < 0) ?
MIB2_UNKNOWN_PROCESS :
- udp->udp_open_pid;
- ude6.udp6CreationTime = udp->udp_open_time;
+ connp->conn_cpid;
+ ude6.udp6CreationTime = connp->conn_open_time;
(void) snmp_append_data2(mp6_conn_ctl->b_cont,
&mp6_conn_tail, (char *)&ude6,
@@ -4548,39 +2941,34 @@ udp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name,
* passed in mp. This message is freed.
*/
static void
-udp_ud_err(queue_t *q, mblk_t *mp, uchar_t *destaddr, t_scalar_t destlen,
- t_scalar_t err)
+udp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err)
{
struct T_unitdata_req *tudr;
mblk_t *mp1;
+ uchar_t *destaddr;
+ t_scalar_t destlen;
uchar_t *optaddr;
t_scalar_t optlen;
- if (DB_TYPE(mp) == M_DATA) {
- ASSERT(destaddr != NULL && destlen != 0);
- optaddr = NULL;
- optlen = 0;
- } else {
- if ((mp->b_wptr < mp->b_rptr) ||
- (MBLKL(mp)) < sizeof (struct T_unitdata_req)) {
- goto done;
- }
- tudr = (struct T_unitdata_req *)mp->b_rptr;
- destaddr = mp->b_rptr + tudr->DEST_offset;
- if (destaddr < mp->b_rptr || destaddr >= mp->b_wptr ||
- destaddr + tudr->DEST_length < mp->b_rptr ||
- destaddr + tudr->DEST_length > mp->b_wptr) {
- goto done;
- }
- optaddr = mp->b_rptr + tudr->OPT_offset;
- if (optaddr < mp->b_rptr || optaddr >= mp->b_wptr ||
- optaddr + tudr->OPT_length < mp->b_rptr ||
- optaddr + tudr->OPT_length > mp->b_wptr) {
- goto done;
- }
- destlen = tudr->DEST_length;
- optlen = tudr->OPT_length;
+ if ((mp->b_wptr < mp->b_rptr) ||
+ (MBLKL(mp)) < sizeof (struct T_unitdata_req)) {
+ goto done;
}
+ tudr = (struct T_unitdata_req *)mp->b_rptr;
+ destaddr = mp->b_rptr + tudr->DEST_offset;
+ if (destaddr < mp->b_rptr || destaddr >= mp->b_wptr ||
+ destaddr + tudr->DEST_length < mp->b_rptr ||
+ destaddr + tudr->DEST_length > mp->b_wptr) {
+ goto done;
+ }
+ optaddr = mp->b_rptr + tudr->OPT_offset;
+ if (optaddr < mp->b_rptr || optaddr >= mp->b_wptr ||
+ optaddr + tudr->OPT_length < mp->b_rptr ||
+ optaddr + tudr->OPT_length > mp->b_wptr) {
+ goto done;
+ }
+ destlen = tudr->DEST_length;
+ optlen = tudr->OPT_length;
mp1 = mi_tpi_uderror_ind((char *)destaddr, destlen,
(char *)optaddr, optlen, err);
@@ -4685,1093 +3073,721 @@ retry:
return (port);
}
+/*
+ * Handle T_UNITDATA_REQ with options. Both IPv4 and IPv6
+ * Either tudr_mp or msg is set. If tudr_mp we take ancillary data from
+ * the TPI options, otherwise we take them from msg_control.
+ * If both sin and sin6 is set it is a connected socket and we use conn_faddr.
+ * Always consumes mp; never consumes tudr_mp.
+ */
static int
-udp_update_label(queue_t *wq, mblk_t *mp, ipaddr_t dst)
+udp_output_ancillary(conn_t *connp, sin_t *sin, sin6_t *sin6, mblk_t *mp,
+ mblk_t *tudr_mp, struct nmsghdr *msg, cred_t *cr, pid_t pid)
{
- int err;
- cred_t *cred;
- cred_t *orig_cred = NULL;
- cred_t *effective_cred = NULL;
- uchar_t opt_storage[IP_MAX_OPT_LENGTH];
- udp_t *udp = Q_TO_UDP(wq);
+ udp_t *udp = connp->conn_udp;
udp_stack_t *us = udp->udp_us;
+ int error;
+ ip_xmit_attr_t *ixa;
+ ip_pkt_t *ipp;
+ in6_addr_t v6src;
+ in6_addr_t v6dst;
+ in6_addr_t v6nexthop;
+ in_port_t dstport;
+ uint32_t flowinfo;
+ uint_t srcid;
+ int is_absreq_failure = 0;
+ conn_opt_arg_t coas, *coa;
- /*
- * All Solaris components should pass a db_credp
- * for this message, hence we ASSERT.
- * On production kernels we return an error to be robust against
- * random streams modules sitting on top of us.
- */
- cred = orig_cred = msg_getcred(mp, NULL);
- ASSERT(cred != NULL);
- if (cred == NULL)
- return (EINVAL);
+ ASSERT(tudr_mp != NULL || msg != NULL);
/*
- * Verify the destination is allowed to receive packets at
- * the security label of the message data. tsol_check_dest()
- * may create a new effective cred for this message with a
- * modified label or label flags. Note that we use the cred/label
- * from the message to handle MLP
+ * Get ixa before checking state to handle a disconnect race.
+ *
+ * We need an exclusive copy of conn_ixa since the ancillary data
+ * options might modify it. That copy has no pointers hence we
+ * need to set them up once we've parsed the ancillary data.
*/
- if ((err = tsol_check_dest(cred, &dst, IPV4_VERSION,
- udp->udp_connp->conn_mac_mode, &effective_cred)) != 0)
- goto done;
- if (effective_cred != NULL)
- cred = effective_cred;
+ ixa = conn_get_ixa_exclusive(connp);
+ if (ixa == NULL) {
+ BUMP_MIB(&us->us_udp_mib, udpOutErrors);
+ freemsg(mp);
+ return (ENOMEM);
+ }
+ ASSERT(cr != NULL);
+ ixa->ixa_cred = cr;
+ ixa->ixa_cpid = pid;
+ if (is_system_labeled()) {
+ /* We need to restart with a label based on the cred */
+ ip_xmit_attr_restore_tsl(ixa, ixa->ixa_cred);
+ }
- /*
- * Calculate the security label to be placed in the text
- * of the message (if any).
- */
- if ((err = tsol_compute_label(cred, dst, opt_storage,
- us->us_netstack->netstack_ip)) != 0)
- goto done;
+ /* In case previous destination was multicast or multirt */
+ ip_attr_newdst(ixa);
- /*
- * Insert the security label in the cached ip options,
- * removing any old label that may exist.
- */
- if ((err = tsol_update_options(&udp->udp_ip_snd_options,
- &udp->udp_ip_snd_options_len, &udp->udp_label_len,
- opt_storage)) != 0)
+ /* Get a copy of conn_xmit_ipp since the options might change it */
+ ipp = kmem_zalloc(sizeof (*ipp), KM_NOSLEEP);
+ if (ipp == NULL) {
+ ixa_refrele(ixa);
+ BUMP_MIB(&us->us_udp_mib, udpOutErrors);
+ freemsg(mp);
+ return (ENOMEM);
+ }
+ mutex_enter(&connp->conn_lock);
+ error = ip_pkt_copy(&connp->conn_xmit_ipp, ipp, KM_NOSLEEP);
+ mutex_exit(&connp->conn_lock);
+ if (error != 0) {
+ BUMP_MIB(&us->us_udp_mib, udpOutErrors);
+ freemsg(mp);
goto done;
+ }
/*
- * Save the destination address and creds we used to
- * generate the security label text.
+ * Parse the options and update ixa and ipp as a result.
+ * Note that ixa_tsl can be updated if SCM_UCRED.
+ * ixa_refrele/ixa_inactivate will release any reference on ixa_tsl.
*/
- if (cred != udp->udp_effective_cred) {
- if (udp->udp_effective_cred != NULL)
- crfree(udp->udp_effective_cred);
- crhold(cred);
- udp->udp_effective_cred = cred;
- }
- if (orig_cred != udp->udp_last_cred) {
- if (udp->udp_last_cred != NULL)
- crfree(udp->udp_last_cred);
- crhold(orig_cred);
- udp->udp_last_cred = orig_cred;
- }
-done:
- if (effective_cred != NULL)
- crfree(effective_cred);
- if (err != 0) {
- DTRACE_PROBE4(
- tx__ip__log__info__updatelabel__udp,
- char *, "queue(1) failed to update options(2) on mp(3)",
- queue_t *, wq, char *, opt_storage, mblk_t *, mp);
- }
- return (err);
-}
+ coa = &coas;
+ coa->coa_connp = connp;
+ coa->coa_ixa = ixa;
+ coa->coa_ipp = ipp;
+ coa->coa_ancillary = B_TRUE;
+ coa->coa_changed = 0;
-static mblk_t *
-udp_output_v4(conn_t *connp, mblk_t *mp, ipaddr_t v4dst, uint16_t port,
- uint_t srcid, int *error, boolean_t insert_spi, struct nmsghdr *msg,
- cred_t *cr, pid_t pid)
-{
- udp_t *udp = connp->conn_udp;
- mblk_t *mp1 = mp;
- mblk_t *mp2;
- ipha_t *ipha;
- int ip_hdr_length;
- uint32_t ip_len;
- udpha_t *udpha;
- boolean_t lock_held = B_FALSE;
- in_port_t uha_src_port;
- udpattrs_t attrs;
- uchar_t ip_snd_opt[IP_MAX_OPT_LENGTH];
- uint32_t ip_snd_opt_len = 0;
- ip4_pkt_t pktinfo;
- ip4_pkt_t *pktinfop = &pktinfo;
- ip_opt_info_t optinfo;
- ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
- udp_stack_t *us = udp->udp_us;
- ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec;
- queue_t *q = connp->conn_wq;
- ire_t *ire;
- in6_addr_t v6dst;
- boolean_t update_lastdst = B_FALSE;
-
- *error = 0;
- pktinfop->ip4_ill_index = 0;
- pktinfop->ip4_addr = INADDR_ANY;
- optinfo.ip_opt_flags = 0;
- optinfo.ip_opt_ill_index = 0;
+ if (msg != NULL) {
+ error = process_auxiliary_options(connp, msg->msg_control,
+ msg->msg_controllen, coa, &udp_opt_obj, udp_opt_set, cr);
+ } else {
+ struct T_unitdata_req *tudr;
- if (v4dst == INADDR_ANY)
- v4dst = htonl(INADDR_LOOPBACK);
+ tudr = (struct T_unitdata_req *)tudr_mp->b_rptr;
+ ASSERT(tudr->PRIM_type == T_UNITDATA_REQ);
+ error = tpi_optcom_buf(connp->conn_wq, tudr_mp,
+ &tudr->OPT_length, tudr->OPT_offset, cr, &udp_opt_obj,
+ coa, &is_absreq_failure);
+ }
+ if (error != 0) {
+ /*
+ * Note: No special action needed in this
+ * module for "is_absreq_failure"
+ */
+ freemsg(mp);
+ BUMP_MIB(&us->us_udp_mib, udpOutErrors);
+ goto done;
+ }
+ ASSERT(is_absreq_failure == 0);
+ mutex_enter(&connp->conn_lock);
/*
- * If options passed in, feed it for verification and handling
+ * If laddr is unspecified then we look at sin6_src_id.
+ * We will give precedence to a source address set with IPV6_PKTINFO
+ * (aka IPPF_ADDR) but that is handled in build_hdrs. However, we don't
+ * want ip_attr_connect to select a source (since it can fail) when
+ * IPV6_PKTINFO is specified.
+ * If this doesn't result in a source address then we get a source
+ * from ip_attr_connect() below.
*/
- attrs.udpattr_credset = B_FALSE;
- if (IPCL_IS_NONSTR(connp)) {
- if (msg->msg_controllen != 0) {
- attrs.udpattr_ipp4 = pktinfop;
- attrs.udpattr_mb = mp;
-
- rw_enter(&udp->udp_rwlock, RW_WRITER);
- *error = process_auxiliary_options(connp,
- msg->msg_control, msg->msg_controllen,
- &attrs, &udp_opt_obj, udp_opt_set, cr);
- rw_exit(&udp->udp_rwlock);
- if (*error)
- goto done;
+ v6src = connp->conn_saddr_v6;
+ if (sin != NULL) {
+ IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &v6dst);
+ dstport = sin->sin_port;
+ flowinfo = 0;
+ ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
+ ixa->ixa_flags |= IXAF_IS_IPV4;
+ } else if (sin6 != NULL) {
+ v6dst = sin6->sin6_addr;
+ dstport = sin6->sin6_port;
+ flowinfo = sin6->sin6_flowinfo;
+ srcid = sin6->__sin6_src_id;
+ if (IN6_IS_ADDR_LINKSCOPE(&v6dst) && sin6->sin6_scope_id != 0) {
+ ixa->ixa_scopeid = sin6->sin6_scope_id;
+ ixa->ixa_flags |= IXAF_SCOPEID_SET;
+ } else {
+ ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
}
- } else {
- if (DB_TYPE(mp) != M_DATA) {
- mp1 = mp->b_cont;
- if (((struct T_unitdata_req *)
- mp->b_rptr)->OPT_length != 0) {
- attrs.udpattr_ipp4 = pktinfop;
- attrs.udpattr_mb = mp;
- if (udp_unitdata_opt_process(q, mp, error,
- &attrs) < 0)
- goto done;
- /*
- * Note: success in processing options.
- * mp option buffer represented by
- * OPT_length/offset now potentially modified
- * and contain option setting results
- */
- ASSERT(*error == 0);
- }
+ if (srcid != 0 && IN6_IS_ADDR_UNSPECIFIED(&v6src)) {
+ ip_srcid_find_id(srcid, &v6src, IPCL_ZONEID(connp),
+ connp->conn_netstack);
}
+ if (IN6_IS_ADDR_V4MAPPED(&v6dst))
+ ixa->ixa_flags |= IXAF_IS_IPV4;
+ else
+ ixa->ixa_flags &= ~IXAF_IS_IPV4;
+ } else {
+ /* Connected case */
+ v6dst = connp->conn_faddr_v6;
+ dstport = connp->conn_fport;
+ flowinfo = connp->conn_flowinfo;
}
+ mutex_exit(&connp->conn_lock);
- /* mp1 points to the M_DATA mblk carrying the packet */
- ASSERT(mp1 != NULL && DB_TYPE(mp1) == M_DATA);
-
- /*
- * Determine whether we need to mark the mblk with the user's
- * credentials.
- * If labeled then sockfs would have already done this.
- */
- ASSERT(!is_system_labeled() || msg_getcred(mp, NULL) != NULL);
-
- ire = connp->conn_ire_cache;
- if (CLASSD(v4dst) || (ire == NULL) || (ire->ire_addr != v4dst) ||
- (ire->ire_type & (IRE_BROADCAST | IRE_LOCAL | IRE_LOOPBACK))) {
- if (cr != NULL && msg_getcred(mp, NULL) == NULL)
- mblk_setcred(mp, cr, pid);
+ /* Handle IPV6_PKTINFO setting source address. */
+ if (IN6_IS_ADDR_UNSPECIFIED(&v6src) &&
+ (ipp->ipp_fields & IPPF_ADDR)) {
+ if (ixa->ixa_flags & IXAF_IS_IPV4) {
+ if (IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr))
+ v6src = ipp->ipp_addr;
+ } else {
+ if (!IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr))
+ v6src = ipp->ipp_addr;
+ }
}
- rw_enter(&udp->udp_rwlock, RW_READER);
- lock_held = B_TRUE;
+ ip_attr_nexthop(ipp, ixa, &v6dst, &v6nexthop);
+ error = ip_attr_connect(connp, ixa, &v6src, &v6dst, &v6nexthop, dstport,
+ &v6src, NULL, IPDF_ALLOW_MCBC | IPDF_VERIFY_DST | IPDF_IPSEC);
- /*
- * Cluster and TSOL note:
- * udp.udp_v6lastdst is shared by Cluster and TSOL
- * udp.udp_lastdstport is used by Cluster
- *
- * Both Cluster and TSOL need to update the dest addr and/or port.
- * Updating is done after both Cluster and TSOL checks, protected
- * by conn_lock.
- */
- mutex_enter(&connp->conn_lock);
-
- if (cl_inet_connect2 != NULL &&
- (!IN6_IS_ADDR_V4MAPPED(&udp->udp_v6lastdst) ||
- V4_PART_OF_V6(udp->udp_v6lastdst) != v4dst ||
- udp->udp_lastdstport != port)) {
- mutex_exit(&connp->conn_lock);
- *error = 0;
- IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst);
- CL_INET_UDP_CONNECT(connp, udp, B_TRUE, &v6dst, port, *error);
- if (*error != 0) {
- *error = EHOSTUNREACH;
- goto done;
+ switch (error) {
+ case 0:
+ break;
+ case EADDRNOTAVAIL:
+ /*
+ * IXAF_VERIFY_SOURCE tells us to pick a better source.
+ * Don't have the application see that errno
+ */
+ error = ENETUNREACH;
+ goto failed;
+ case ENETDOWN:
+ /*
+ * Have !ipif_addr_ready address; drop packet silently
+ * until we can get applications to not send until we
+ * are ready.
+ */
+ error = 0;
+ goto failed;
+ case EHOSTUNREACH:
+ case ENETUNREACH:
+ if (ixa->ixa_ire != NULL) {
+ /*
+ * Let conn_ip_output/ire_send_noroute return
+ * the error and send any local ICMP error.
+ */
+ error = 0;
+ break;
}
- update_lastdst = B_TRUE;
- mutex_enter(&connp->conn_lock);
+ /* FALLTHRU */
+ default:
+ failed:
+ freemsg(mp);
+ BUMP_MIB(&us->us_udp_mib, udpOutErrors);
+ goto done;
}
/*
- * Check if our saved options are valid; update if not.
- * TSOL Note: Since we are not in WRITER mode, UDP packets
- * to different destination may require different labels,
- * or worse, UDP packets to same IP address may require
- * different labels due to use of shared all-zones address.
- * We use conn_lock to ensure that lastdst, ip_snd_options,
- * and ip_snd_options_len are consistent for the current
- * destination and are updated atomically.
+ * We might be going to a different destination than last time,
+ * thus check that TX allows the communication and compute any
+ * needed label.
+ *
+ * TSOL Note: We have an exclusive ipp and ixa for this thread so we
+ * don't have to worry about concurrent threads.
*/
if (is_system_labeled()) {
- cred_t *credp;
- pid_t cpid;
-
/* Using UDP MLP requires SCM_UCRED from user */
if (connp->conn_mlp_type != mlptSingle &&
- !attrs.udpattr_credset) {
- mutex_exit(&connp->conn_lock);
- DTRACE_PROBE4(
- tx__ip__log__info__output__udp,
- char *, "MLP mp(1) lacks SCM_UCRED attr(2) on q(3)",
- mblk_t *, mp, udpattrs_t *, &attrs, queue_t *, q);
- *error = EINVAL;
+ !((ixa->ixa_flags & IXAF_UCRED_TSL))) {
+ BUMP_MIB(&us->us_udp_mib, udpOutErrors);
+ error = ECONNREFUSED;
+ freemsg(mp);
goto done;
}
/*
- * Update label option for this UDP socket if
- * - the destination has changed,
- * - the UDP socket is MLP, or
- * - the cred attached to the mblk changed.
+ * Check whether Trusted Solaris policy allows communication
+ * with this host, and pretend that the destination is
+ * unreachable if not.
+ * Compute any needed label and place it in ipp_label_v4/v6.
+ *
+ * Later conn_build_hdr_template/conn_prepend_hdr takes
+ * ipp_label_v4/v6 to form the packet.
+ *
+ * Tsol note: We have ipp structure local to this thread so
+ * no locking is needed.
*/
- credp = msg_getcred(mp, &cpid);
- if (!IN6_IS_ADDR_V4MAPPED(&udp->udp_v6lastdst) ||
- V4_PART_OF_V6(udp->udp_v6lastdst) != v4dst ||
- connp->conn_mlp_type != mlptSingle ||
- credp != udp->udp_last_cred) {
- if ((*error = udp_update_label(q, mp, v4dst)) != 0) {
- mutex_exit(&connp->conn_lock);
- goto done;
- }
- update_lastdst = B_TRUE;
+ error = conn_update_label(connp, ixa, &v6dst, ipp);
+ if (error != 0) {
+ freemsg(mp);
+ BUMP_MIB(&us->us_udp_mib, udpOutErrors);
+ goto done;
}
-
- /*
- * Attach the effective cred to the mblk to ensure future
- * routing decisions will be based on it's label.
- */
- mblk_setcred(mp, udp->udp_effective_cred, cpid);
}
- if (update_lastdst) {
- IN6_IPADDR_TO_V4MAPPED(v4dst, &udp->udp_v6lastdst);
- udp->udp_lastdstport = port;
+ mp = udp_prepend_hdr(connp, ixa, ipp, &v6src, &v6dst, dstport,
+ flowinfo, mp, &error);
+ if (mp == NULL) {
+ ASSERT(error != 0);
+ BUMP_MIB(&us->us_udp_mib, udpOutErrors);
+ goto done;
}
- if (udp->udp_ip_snd_options_len > 0) {
- ip_snd_opt_len = udp->udp_ip_snd_options_len;
- bcopy(udp->udp_ip_snd_options, ip_snd_opt, ip_snd_opt_len);
+ if (ixa->ixa_pktlen > IP_MAXPACKET) {
+ error = EMSGSIZE;
+ BUMP_MIB(&us->us_udp_mib, udpOutErrors);
+ freemsg(mp);
+ goto done;
}
- mutex_exit(&connp->conn_lock);
+ /* We're done. Pass the packet to ip. */
+ BUMP_MIB(&us->us_udp_mib, udpHCOutDatagrams);
- /* Add an IP header */
- ip_hdr_length = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + ip_snd_opt_len +
- (insert_spi ? sizeof (uint32_t) : 0);
- ipha = (ipha_t *)&mp1->b_rptr[-ip_hdr_length];
- if (DB_REF(mp1) != 1 || (uchar_t *)ipha < DB_BASE(mp1) ||
- !OK_32PTR(ipha)) {
- mp2 = allocb(ip_hdr_length + us->us_wroff_extra, BPRI_LO);
- if (mp2 == NULL) {
- TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END,
- "udp_wput_end: q %p (%S)", q, "allocbfail2");
- *error = ENOMEM;
- goto done;
- }
- mp2->b_wptr = DB_LIM(mp2);
- mp2->b_cont = mp1;
- mp1 = mp2;
- if (DB_TYPE(mp) != M_DATA)
- mp->b_cont = mp1;
- else
- mp = mp1;
- ipha = (ipha_t *)(mp1->b_wptr - ip_hdr_length);
- }
- ip_hdr_length -= (UDPH_SIZE + (insert_spi ? sizeof (uint32_t) : 0));
-#ifdef _BIG_ENDIAN
- /* Set version, header length, and tos */
- *(uint16_t *)&ipha->ipha_version_and_hdr_length =
- ((((IP_VERSION << 4) | (ip_hdr_length>>2)) << 8) |
- udp->udp_type_of_service);
- /* Set ttl and protocol */
- *(uint16_t *)&ipha->ipha_ttl = (udp->udp_ttl << 8) | IPPROTO_UDP;
-#else
- /* Set version, header length, and tos */
- *(uint16_t *)&ipha->ipha_version_and_hdr_length =
- ((udp->udp_type_of_service << 8) |
- ((IP_VERSION << 4) | (ip_hdr_length>>2)));
- /* Set ttl and protocol */
- *(uint16_t *)&ipha->ipha_ttl = (IPPROTO_UDP << 8) | udp->udp_ttl;
-#endif
- if (pktinfop->ip4_addr != INADDR_ANY) {
- ipha->ipha_src = pktinfop->ip4_addr;
- optinfo.ip_opt_flags = IP_VERIFY_SRC;
- } else {
+ error = conn_ip_output(mp, ixa);
+ /* No udpOutErrors if an error since IP increases its error counter */
+ switch (error) {
+ case 0:
+ break;
+ case EWOULDBLOCK:
+ (void) ixa_check_drain_insert(connp, ixa);
+ error = 0;
+ break;
+ case EADDRNOTAVAIL:
/*
- * Copy our address into the packet. If this is zero,
- * first look at __sin6_src_id for a hint. If we leave the
- * source as INADDR_ANY then ip will fill in the real source
- * address.
+ * IXAF_VERIFY_SOURCE tells us to pick a better source.
+ * Don't have the application see that errno
*/
- IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6src, ipha->ipha_src);
- if (srcid != 0 && ipha->ipha_src == INADDR_ANY) {
- in6_addr_t v6src;
-
- ip_srcid_find_id(srcid, &v6src, connp->conn_zoneid,
- us->us_netstack);
- IN6_V4MAPPED_TO_IPADDR(&v6src, ipha->ipha_src);
- }
- }
- uha_src_port = udp->udp_port;
- if (ip_hdr_length == IP_SIMPLE_HDR_LENGTH) {
- rw_exit(&udp->udp_rwlock);
- lock_held = B_FALSE;
- }
-
- if (pktinfop->ip4_ill_index != 0) {
- optinfo.ip_opt_ill_index = pktinfop->ip4_ill_index;
+ error = ENETUNREACH;
+ /* FALLTHRU */
+ default:
+ mutex_enter(&connp->conn_lock);
+ /*
+ * Clear the source and v6lastdst so we call ip_attr_connect
+ * for the next packet and try to pick a better source.
+ */
+ if (connp->conn_mcbc_bind)
+ connp->conn_saddr_v6 = ipv6_all_zeros;
+ else
+ connp->conn_saddr_v6 = connp->conn_bound_addr_v6;
+ connp->conn_v6lastdst = ipv6_all_zeros;
+ mutex_exit(&connp->conn_lock);
+ break;
}
+done:
+ ixa_refrele(ixa);
+ ip_pkt_free(ipp);
+ kmem_free(ipp, sizeof (*ipp));
+ return (error);
+}
- ipha->ipha_fragment_offset_and_flags = 0;
- ipha->ipha_ident = 0;
-
- mp1->b_rptr = (uchar_t *)ipha;
-
- ASSERT((uintptr_t)(mp1->b_wptr - (uchar_t *)ipha) <=
- (uintptr_t)UINT_MAX);
+/*
+ * Handle sending an M_DATA for a connected socket.
+ * Handles both IPv4 and IPv6.
+ */
+static int
+udp_output_connected(conn_t *connp, mblk_t *mp, cred_t *cr, pid_t pid)
+{
+ udp_t *udp = connp->conn_udp;
+ udp_stack_t *us = udp->udp_us;
+ int error;
+ ip_xmit_attr_t *ixa;
- /* Determine length of packet */
- ip_len = (uint32_t)(mp1->b_wptr - (uchar_t *)ipha);
- if ((mp2 = mp1->b_cont) != NULL) {
- do {
- ASSERT((uintptr_t)MBLKL(mp2) <= (uintptr_t)UINT_MAX);
- ip_len += (uint32_t)MBLKL(mp2);
- } while ((mp2 = mp2->b_cont) != NULL);
- }
/*
- * If the size of the packet is greater than the maximum allowed by
- * ip, return an error. Passing this down could cause panics because
- * the size will have wrapped and be inconsistent with the msg size.
+ * If no other thread is using conn_ixa this just gets a reference to
+ * conn_ixa. Otherwise we get a safe copy of conn_ixa.
*/
- if (ip_len > IP_MAXPACKET) {
- TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END,
- "udp_wput_end: q %p (%S)", q, "IP length exceeded");
- *error = EMSGSIZE;
- goto done;
+ ixa = conn_get_ixa(connp, B_FALSE);
+ if (ixa == NULL) {
+ BUMP_MIB(&us->us_udp_mib, udpOutErrors);
+ freemsg(mp);
+ return (ENOMEM);
}
- ipha->ipha_length = htons((uint16_t)ip_len);
- ip_len -= ip_hdr_length;
- ip_len = htons((uint16_t)ip_len);
- udpha = (udpha_t *)(((uchar_t *)ipha) + ip_hdr_length);
-
- /* Insert all-0s SPI now. */
- if (insert_spi)
- *((uint32_t *)(udpha + 1)) = 0;
- /*
- * Copy in the destination address
- */
- ipha->ipha_dst = v4dst;
-
- /*
- * Set ttl based on IP_MULTICAST_TTL to match IPv6 logic.
- */
- if (CLASSD(v4dst))
- ipha->ipha_ttl = udp->udp_multicast_ttl;
-
- udpha->uha_dst_port = port;
- udpha->uha_src_port = uha_src_port;
+ ASSERT(cr != NULL);
+ ixa->ixa_cred = cr;
+ ixa->ixa_cpid = pid;
- if (ip_snd_opt_len > 0) {
- uint32_t cksum;
+ mutex_enter(&connp->conn_lock);
+ mp = udp_prepend_header_template(connp, ixa, mp, &connp->conn_saddr_v6,
+ connp->conn_fport, connp->conn_flowinfo, &error);
- bcopy(ip_snd_opt, &ipha[1], ip_snd_opt_len);
- lock_held = B_FALSE;
- rw_exit(&udp->udp_rwlock);
- /*
- * Massage source route putting first source route in ipha_dst.
- * Ignore the destination in T_unitdata_req.
- * Create a checksum adjustment for a source route, if any.
- */
- cksum = ip_massage_options(ipha, us->us_netstack);
- cksum = (cksum & 0xFFFF) + (cksum >> 16);
- cksum -= ((ipha->ipha_dst >> 16) & 0xFFFF) +
- (ipha->ipha_dst & 0xFFFF);
- if ((int)cksum < 0)
- cksum--;
- cksum = (cksum & 0xFFFF) + (cksum >> 16);
- /*
- * IP does the checksum if uha_checksum is non-zero,
- * We make it easy for IP to include our pseudo header
- * by putting our length in uha_checksum.
- */
- cksum += ip_len;
- cksum = (cksum & 0xFFFF) + (cksum >> 16);
- /* There might be a carry. */
- cksum = (cksum & 0xFFFF) + (cksum >> 16);
-#ifdef _LITTLE_ENDIAN
- if (us->us_do_checksum)
- ip_len = (cksum << 16) | ip_len;
-#else
- if (us->us_do_checksum)
- ip_len = (ip_len << 16) | cksum;
- else
- ip_len <<= 16;
-#endif
- } else {
- /*
- * IP does the checksum if uha_checksum is non-zero,
- * We make it easy for IP to include our pseudo header
- * by putting our length in uha_checksum.
- */
- if (us->us_do_checksum)
- ip_len |= (ip_len << 16);
-#ifndef _LITTLE_ENDIAN
- else
- ip_len <<= 16;
-#endif
+ if (mp == NULL) {
+ ASSERT(error != 0);
+ mutex_exit(&connp->conn_lock);
+ ixa_refrele(ixa);
+ BUMP_MIB(&us->us_udp_mib, udpOutErrors);
+ freemsg(mp);
+ return (error);
}
- ASSERT(!lock_held);
- /* Set UDP length and checksum */
- *((uint32_t *)&udpha->uha_length) = ip_len;
- if (DB_TYPE(mp) != M_DATA) {
- cred_t *cr;
- pid_t cpid;
+ /*
+ * In case we got a safe copy of conn_ixa, or if opt_set made us a new
+ * safe copy, then we need to fill in any pointers in it.
+ */
+ if (ixa->ixa_ire == NULL) {
+ in6_addr_t faddr, saddr;
+ in6_addr_t nexthop;
+ in_port_t fport;
+
+ saddr = connp->conn_saddr_v6;
+ faddr = connp->conn_faddr_v6;
+ fport = connp->conn_fport;
+ ip_attr_nexthop(&connp->conn_xmit_ipp, ixa, &faddr, &nexthop);
+ mutex_exit(&connp->conn_lock);
- /* Move any cred from the T_UNITDATA_REQ to the packet */
- cr = msg_extractcred(mp, &cpid);
- if (cr != NULL) {
- if (mp1->b_datap->db_credp != NULL)
- crfree(mp1->b_datap->db_credp);
- mp1->b_datap->db_credp = cr;
- mp1->b_datap->db_cpid = cpid;
+ error = ip_attr_connect(connp, ixa, &saddr, &faddr, &nexthop,
+ fport, NULL, NULL, IPDF_ALLOW_MCBC | IPDF_VERIFY_DST |
+ IPDF_IPSEC);
+ switch (error) {
+ case 0:
+ break;
+ case EADDRNOTAVAIL:
+ /*
+ * IXAF_VERIFY_SOURCE tells us to pick a better source.
+ * Don't have the application see that errno
+ */
+ error = ENETUNREACH;
+ goto failed;
+ case ENETDOWN:
+ /*
+ * Have !ipif_addr_ready address; drop packet silently
+ * until we can get applications to not send until we
+ * are ready.
+ */
+ error = 0;
+ goto failed;
+ case EHOSTUNREACH:
+ case ENETUNREACH:
+ if (ixa->ixa_ire != NULL) {
+ /*
+ * Let conn_ip_output/ire_send_noroute return
+ * the error and send any local ICMP error.
+ */
+ error = 0;
+ break;
+ }
+ /* FALLTHRU */
+ default:
+ failed:
+ ixa_refrele(ixa);
+ freemsg(mp);
+ BUMP_MIB(&us->us_udp_mib, udpOutErrors);
+ return (error);
}
- ASSERT(mp != mp1);
- freeb(mp);
+ } else {
+ /* Done with conn_t */
+ mutex_exit(&connp->conn_lock);
}
-
- /* mp has been consumed and we'll return success */
- ASSERT(*error == 0);
- mp = NULL;
+ ASSERT(ixa->ixa_ire != NULL);
/* We're done. Pass the packet to ip. */
BUMP_MIB(&us->us_udp_mib, udpHCOutDatagrams);
- TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END,
- "udp_wput_end: q %p (%S)", q, "end");
-
- if ((connp->conn_flags & IPCL_CHECK_POLICY) != 0 ||
- CONN_OUTBOUND_POLICY_PRESENT(connp, ipss) ||
- connp->conn_dontroute ||
- connp->conn_outgoing_ill != NULL || optinfo.ip_opt_flags != 0 ||
- optinfo.ip_opt_ill_index != 0 ||
- ipha->ipha_version_and_hdr_length != IP_SIMPLE_HDR_VERSION ||
- IPP_ENABLED(IPP_LOCAL_OUT, ipst) ||
- ipst->ips_ip_g_mrouter != NULL) {
- UDP_STAT(us, udp_ip_send);
- ip_output_options(connp, mp1, connp->conn_wq, IP_WPUT,
- &optinfo);
- } else {
- udp_send_data(udp, connp->conn_wq, mp1, ipha);
- }
-done:
- if (lock_held)
- rw_exit(&udp->udp_rwlock);
- if (*error != 0) {
- ASSERT(mp != NULL);
- BUMP_MIB(&us->us_udp_mib, udpOutErrors);
+ error = conn_ip_output(mp, ixa);
+ /* No udpOutErrors if an error since IP increases its error counter */
+ switch (error) {
+ case 0:
+ break;
+ case EWOULDBLOCK:
+ (void) ixa_check_drain_insert(connp, ixa);
+ error = 0;
+ break;
+ case EADDRNOTAVAIL:
+ /*
+ * IXAF_VERIFY_SOURCE tells us to pick a better source.
+ * Don't have the application see that errno
+ */
+ error = ENETUNREACH;
+ break;
}
- return (mp);
+ ixa_refrele(ixa);
+ return (error);
}
-static void
-udp_send_data(udp_t *udp, queue_t *q, mblk_t *mp, ipha_t *ipha)
+/*
+ * Handle sending an M_DATA to the last destination.
+ * Handles both IPv4 and IPv6.
+ *
+ * NOTE: The caller must hold conn_lock and we drop it here.
+ */
+static int
+udp_output_lastdst(conn_t *connp, mblk_t *mp, cred_t *cr, pid_t pid,
+ ip_xmit_attr_t *ixa)
{
- conn_t *connp = udp->udp_connp;
- ipaddr_t src, dst;
- ire_t *ire;
- ipif_t *ipif = NULL;
- mblk_t *ire_fp_mp;
- boolean_t retry_caching;
- udp_stack_t *us = udp->udp_us;
- ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
-
- dst = ipha->ipha_dst;
- src = ipha->ipha_src;
- ASSERT(ipha->ipha_ident == 0);
-
- if (CLASSD(dst)) {
- int err;
-
- ipif = conn_get_held_ipif(connp,
- &connp->conn_multicast_ipif, &err);
-
- if (ipif == NULL || ipif->ipif_isv6 ||
- (ipif->ipif_ill->ill_phyint->phyint_flags &
- PHYI_LOOPBACK)) {
- if (ipif != NULL)
- ipif_refrele(ipif);
- UDP_STAT(us, udp_ip_send);
- ip_output(connp, mp, q, IP_WPUT);
- return;
- }
- }
+ udp_t *udp = connp->conn_udp;
+ udp_stack_t *us = udp->udp_us;
+ int error;
- retry_caching = B_FALSE;
- mutex_enter(&connp->conn_lock);
- ire = connp->conn_ire_cache;
- ASSERT(!(connp->conn_state_flags & CONN_INCIPIENT));
+ ASSERT(MUTEX_HELD(&connp->conn_lock));
+ ASSERT(ixa != NULL);
- if (ire == NULL || ire->ire_addr != dst ||
- (ire->ire_marks & IRE_MARK_CONDEMNED)) {
- retry_caching = B_TRUE;
- } else if (CLASSD(dst) && (ire->ire_type & IRE_CACHE)) {
- ill_t *stq_ill = (ill_t *)ire->ire_stq->q_ptr;
+ ASSERT(cr != NULL);
+ ixa->ixa_cred = cr;
+ ixa->ixa_cpid = pid;
- ASSERT(ipif != NULL);
- if (!IS_ON_SAME_LAN(stq_ill, ipif->ipif_ill))
- retry_caching = B_TRUE;
- }
+ mp = udp_prepend_header_template(connp, ixa, mp, &connp->conn_v6lastsrc,
+ connp->conn_lastdstport, connp->conn_lastflowinfo, &error);
- if (!retry_caching) {
- ASSERT(ire != NULL);
- IRE_REFHOLD(ire);
+ if (mp == NULL) {
+ ASSERT(error != 0);
mutex_exit(&connp->conn_lock);
- } else {
- boolean_t cached = B_FALSE;
+ ixa_refrele(ixa);
+ BUMP_MIB(&us->us_udp_mib, udpOutErrors);
+ freemsg(mp);
+ return (error);
+ }
- connp->conn_ire_cache = NULL;
+ /*
+ * In case we got a safe copy of conn_ixa, or if opt_set made us a new
+ * safe copy, then we need to fill in any pointers in it.
+ */
+ if (ixa->ixa_ire == NULL) {
+ in6_addr_t lastdst, lastsrc;
+ in6_addr_t nexthop;
+ in_port_t lastport;
+
+ lastsrc = connp->conn_v6lastsrc;
+ lastdst = connp->conn_v6lastdst;
+ lastport = connp->conn_lastdstport;
+ ip_attr_nexthop(&connp->conn_xmit_ipp, ixa, &lastdst, &nexthop);
mutex_exit(&connp->conn_lock);
- /* Release the old ire */
- if (ire != NULL) {
- IRE_REFRELE_NOTR(ire);
- ire = NULL;
- }
-
- if (CLASSD(dst)) {
- ASSERT(ipif != NULL);
- ire = ire_ctable_lookup(dst, 0, 0, ipif,
- connp->conn_zoneid, msg_getlabel(mp),
- MATCH_IRE_ILL, ipst);
- } else {
- ASSERT(ipif == NULL);
- ire = ire_cache_lookup(dst, connp->conn_zoneid,
- msg_getlabel(mp), ipst);
- }
-
- if (ire == NULL) {
- if (ipif != NULL)
- ipif_refrele(ipif);
- UDP_STAT(us, udp_ire_null);
- ip_output(connp, mp, q, IP_WPUT);
- return;
- }
- IRE_REFHOLD_NOTR(ire);
-
- mutex_enter(&connp->conn_lock);
- if (CONN_CACHE_IRE(connp) && connp->conn_ire_cache == NULL &&
- !(ire->ire_marks & IRE_MARK_CONDEMNED)) {
- irb_t *irb = ire->ire_bucket;
-
+ error = ip_attr_connect(connp, ixa, &lastsrc, &lastdst,
+ &nexthop, lastport, NULL, NULL, IPDF_ALLOW_MCBC |
+ IPDF_VERIFY_DST | IPDF_IPSEC);
+ switch (error) {
+ case 0:
+ break;
+ case EADDRNOTAVAIL:
/*
- * IRE's created for non-connection oriented transports
- * are normally initialized with IRE_MARK_TEMPORARY set
- * in the ire_marks. These IRE's are preferentially
- * reaped when the hash chain length in the cache
- * bucket exceeds the maximum value specified in
- * ip[6]_ire_max_bucket_cnt. This can severely affect
- * UDP performance if IRE cache entries that we need
- * to reuse are continually removed. To remedy this,
- * when we cache the IRE in the conn_t, we remove the
- * IRE_MARK_TEMPORARY bit from the ire_marks if it was
- * set.
+ * IXAF_VERIFY_SOURCE tells us to pick a better source.
+ * Don't have the application see that errno
*/
- if (ire->ire_marks & IRE_MARK_TEMPORARY) {
- rw_enter(&irb->irb_lock, RW_WRITER);
- if (ire->ire_marks & IRE_MARK_TEMPORARY) {
- ire->ire_marks &= ~IRE_MARK_TEMPORARY;
- irb->irb_tmp_ire_cnt--;
- }
- rw_exit(&irb->irb_lock);
+ error = ENETUNREACH;
+ goto failed;
+ case ENETDOWN:
+ /*
+ * Have !ipif_addr_ready address; drop packet silently
+ * until we can get applications to not send until we
+ * are ready.
+ */
+ error = 0;
+ goto failed;
+ case EHOSTUNREACH:
+ case ENETUNREACH:
+ if (ixa->ixa_ire != NULL) {
+ /*
+ * Let conn_ip_output/ire_send_noroute return
+ * the error and send any local ICMP error.
+ */
+ error = 0;
+ break;
}
- connp->conn_ire_cache = ire;
- cached = B_TRUE;
+ /* FALLTHRU */
+ default:
+ failed:
+ ixa_refrele(ixa);
+ freemsg(mp);
+ BUMP_MIB(&us->us_udp_mib, udpOutErrors);
+ return (error);
}
+ } else {
+ /* Done with conn_t */
mutex_exit(&connp->conn_lock);
-
- /*
- * We can continue to use the ire but since it was not
- * cached, we should drop the extra reference.
- */
- if (!cached)
- IRE_REFRELE_NOTR(ire);
}
- ASSERT(ire != NULL && ire->ire_ipversion == IPV4_VERSION);
- ASSERT(!CLASSD(dst) || ipif != NULL);
- /*
- * Check if we can take the fast-path.
- * Note that "incomplete" ire's (where the link-layer for next hop
- * is not resolved, or where the fast-path header in nce_fp_mp is not
- * available yet) are sent down the legacy (slow) path
- */
- if ((ire->ire_type & (IRE_BROADCAST|IRE_LOCAL|IRE_LOOPBACK)) ||
- (ire->ire_flags & RTF_MULTIRT) || (ire->ire_stq == NULL) ||
- (ire->ire_max_frag < ntohs(ipha->ipha_length)) ||
- ((ire->ire_nce == NULL) ||
- ((ire_fp_mp = ire->ire_nce->nce_fp_mp) == NULL)) ||
- connp->conn_nexthop_set || (MBLKL(ire_fp_mp) > MBLKHEAD(mp))) {
- if (ipif != NULL)
- ipif_refrele(ipif);
- UDP_STAT(us, udp_ip_ire_send);
- IRE_REFRELE(ire);
- ip_output(connp, mp, q, IP_WPUT);
- return;
- }
+ /* We're done. Pass the packet to ip. */
+ BUMP_MIB(&us->us_udp_mib, udpHCOutDatagrams);
- if (src == INADDR_ANY && !connp->conn_unspec_src) {
- if (CLASSD(dst) && !(ire->ire_flags & RTF_SETSRC))
- ipha->ipha_src = ipif->ipif_src_addr;
+ error = conn_ip_output(mp, ixa);
+ /* No udpOutErrors if an error since IP increases its error counter */
+ switch (error) {
+ case 0:
+ break;
+ case EWOULDBLOCK:
+ (void) ixa_check_drain_insert(connp, ixa);
+ error = 0;
+ break;
+ case EADDRNOTAVAIL:
+ /*
+ * IXAF_VERIFY_SOURCE tells us to pick a better source.
+ * Don't have the application see that errno
+ */
+ error = ENETUNREACH;
+ /* FALLTHRU */
+ default:
+ mutex_enter(&connp->conn_lock);
+ /*
+ * Clear the source and v6lastdst so we call ip_attr_connect
+ * for the next packet and try to pick a better source.
+ */
+ if (connp->conn_mcbc_bind)
+ connp->conn_saddr_v6 = ipv6_all_zeros;
else
- ipha->ipha_src = ire->ire_src_addr;
+ connp->conn_saddr_v6 = connp->conn_bound_addr_v6;
+ connp->conn_v6lastdst = ipv6_all_zeros;
+ mutex_exit(&connp->conn_lock);
+ break;
}
-
- if (ipif != NULL)
- ipif_refrele(ipif);
-
- udp_xmit(connp->conn_wq, mp, ire, connp, connp->conn_zoneid);
+ ixa_refrele(ixa);
+ return (error);
}
-static void
-udp_xmit(queue_t *q, mblk_t *mp, ire_t *ire, conn_t *connp, zoneid_t zoneid)
+
+/*
+ * Prepend the header template and then fill in the source and
+ * flowinfo. The caller needs to handle the destination address since
+ * it's setting is different if rthdr or source route.
+ *
+ * Returns NULL is allocation failed or if the packet would exceed IP_MAXPACKET.
+ * When it returns NULL it sets errorp.
+ */
+static mblk_t *
+udp_prepend_header_template(conn_t *connp, ip_xmit_attr_t *ixa, mblk_t *mp,
+ const in6_addr_t *v6src, in_port_t dstport, uint32_t flowinfo, int *errorp)
{
- ipaddr_t src, dst;
- ill_t *ill;
- mblk_t *ire_fp_mp;
- uint_t ire_fp_mp_len;
- uint16_t *up;
- uint32_t cksum, hcksum_txflags;
- queue_t *dev_q;
- udp_t *udp = connp->conn_udp;
- ipha_t *ipha = (ipha_t *)mp->b_rptr;
+ udp_t *udp = connp->conn_udp;
udp_stack_t *us = udp->udp_us;
- ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
- boolean_t ll_multicast = B_FALSE;
- boolean_t direct_send;
-
- dev_q = ire->ire_stq->q_next;
- ASSERT(dev_q != NULL);
+ boolean_t insert_spi = udp->udp_nat_t_endpoint;
+ uint_t pktlen;
+ uint_t alloclen;
+ uint_t copylen;
+ uint8_t *iph;
+ uint_t ip_hdr_length;
+ udpha_t *udpha;
+ uint32_t cksum;
+ ip_pkt_t *ipp;
- ill = ire_to_ill(ire);
- ASSERT(ill != NULL);
+ ASSERT(MUTEX_HELD(&connp->conn_lock));
/*
- * For the direct send case, if resetting of conn_direct_blocked
- * was missed, it is still ok because the putq() would enable
- * the queue and write service will drain it out.
+ * Copy the header template and leave space for an SPI
*/
- direct_send = ILL_DIRECT_CAPABLE(ill);
-
- /* is queue flow controlled? */
- if ((!direct_send) && (q->q_first != NULL || connp->conn_draining ||
- DEV_Q_FLOW_BLOCKED(dev_q))) {
- BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsHCOutRequests);
- BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards);
- if (ipst->ips_ip_output_queue) {
- DTRACE_PROBE1(udp__xmit__putq, conn_t *, connp);
- (void) putq(connp->conn_wq, mp);
- } else {
- freemsg(mp);
- }
- ire_refrele(ire);
- return;
- }
-
- ire_fp_mp = ire->ire_nce->nce_fp_mp;
- ire_fp_mp_len = MBLKL(ire_fp_mp);
- ASSERT(MBLKHEAD(mp) >= ire_fp_mp_len);
-
- dst = ipha->ipha_dst;
- src = ipha->ipha_src;
-
-
- BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCOutRequests);
-
- ipha->ipha_ident = (uint16_t)atomic_add_32_nv(&ire->ire_ident, 1);
-#ifndef _BIG_ENDIAN
- ipha->ipha_ident = (ipha->ipha_ident << 8) | (ipha->ipha_ident >> 8);
-#endif
-
- if (ILL_HCKSUM_CAPABLE(ill) && dohwcksum) {
- ASSERT(ill->ill_hcksum_capab != NULL);
- hcksum_txflags = ill->ill_hcksum_capab->ill_hcksum_txflags;
- } else {
- hcksum_txflags = 0;
- }
-
- /* pseudo-header checksum (do it in parts for IP header checksum) */
- cksum = (dst >> 16) + (dst & 0xFFFF) + (src >> 16) + (src & 0xFFFF);
-
- ASSERT(ipha->ipha_version_and_hdr_length == IP_SIMPLE_HDR_VERSION);
- up = IPH_UDPH_CHECKSUMP(ipha, IP_SIMPLE_HDR_LENGTH);
- if (*up != 0) {
- IP_CKSUM_XMIT_FAST(ire->ire_ipversion, hcksum_txflags,
- mp, ipha, up, IPPROTO_UDP, IP_SIMPLE_HDR_LENGTH,
- ntohs(ipha->ipha_length), cksum);
-
- /* Software checksum? */
- if (DB_CKSUMFLAGS(mp) == 0) {
- UDP_STAT(us, udp_out_sw_cksum);
- UDP_STAT_UPDATE(us, udp_out_sw_cksum_bytes,
- ntohs(ipha->ipha_length) - IP_SIMPLE_HDR_LENGTH);
- }
- }
-
- if (!CLASSD(dst)) {
- ipha->ipha_fragment_offset_and_flags |=
- (uint32_t)htons(ire->ire_frag_flag);
- }
-
- /* Calculate IP header checksum if hardware isn't capable */
- if (!(DB_CKSUMFLAGS(mp) & HCK_IPV4_HDRCKSUM)) {
- IP_HDR_CKSUM(ipha, cksum, ((uint32_t *)ipha)[0],
- ((uint16_t *)ipha)[4]);
+ copylen = connp->conn_ht_iphc_len;
+ alloclen = copylen + (insert_spi ? sizeof (uint32_t) : 0);
+ pktlen = alloclen + msgdsize(mp);
+ if (pktlen > IP_MAXPACKET) {
+ freemsg(mp);
+ *errorp = EMSGSIZE;
+ return (NULL);
}
+ ixa->ixa_pktlen = pktlen;
- if (CLASSD(dst)) {
- if (ilm_lookup_ill(ill, dst, ALL_ZONES) != NULL) {
- ip_multicast_loopback(q, ill, mp,
- connp->conn_multicast_loop ? 0 :
- IP_FF_NO_MCAST_LOOP, zoneid);
- }
+ /* check/fix buffer config, setup pointers into it */
+ iph = mp->b_rptr - alloclen;
+ if (DB_REF(mp) != 1 || iph < DB_BASE(mp) || !OK_32PTR(iph)) {
+ mblk_t *mp1;
- /* If multicast TTL is 0 then we are done */
- if (ipha->ipha_ttl == 0) {
+ mp1 = allocb(alloclen + us->us_wroff_extra, BPRI_MED);
+ if (mp1 == NULL) {
freemsg(mp);
- ire_refrele(ire);
- return;
+ *errorp = ENOMEM;
+ return (NULL);
}
- ll_multicast = B_TRUE;
+ mp1->b_wptr = DB_LIM(mp1);
+ mp1->b_cont = mp;
+ mp = mp1;
+ iph = (mp->b_wptr - alloclen);
}
+ mp->b_rptr = iph;
+ bcopy(connp->conn_ht_iphc, iph, copylen);
+ ip_hdr_length = (uint_t)(connp->conn_ht_ulp - connp->conn_ht_iphc);
- ASSERT(DB_TYPE(ire_fp_mp) == M_DATA);
- mp->b_rptr = (uchar_t *)ipha - ire_fp_mp_len;
- bcopy(ire_fp_mp->b_rptr, mp->b_rptr, ire_fp_mp_len);
-
- UPDATE_OB_PKT_COUNT(ire);
- ire->ire_last_used_time = lbolt;
-
- BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCOutTransmits);
- UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCOutOctets,
- ntohs(ipha->ipha_length));
+ ixa->ixa_ip_hdr_length = ip_hdr_length;
+ udpha = (udpha_t *)(iph + ip_hdr_length);
- DTRACE_PROBE4(ip4__physical__out__start,
- ill_t *, NULL, ill_t *, ill, ipha_t *, ipha, mblk_t *, mp);
- FW_HOOKS(ipst->ips_ip4_physical_out_event,
- ipst->ips_ipv4firewall_physical_out, NULL, ill, ipha, mp, mp,
- ll_multicast, ipst);
- DTRACE_PROBE1(ip4__physical__out__end, mblk_t *, mp);
- if (ipst->ips_ip4_observe.he_interested && mp != NULL) {
- zoneid_t szone;
-
- /*
- * Both of these functions expect b_rptr to be
- * where the IP header starts, so advance past the
- * link layer header if present.
- */
- mp->b_rptr += ire_fp_mp_len;
- szone = ip_get_zoneid_v4(ipha->ipha_src, mp,
- ipst, ALL_ZONES);
- ipobs_hook(mp, IPOBS_HOOK_OUTBOUND, szone,
- ALL_ZONES, ill, ipst);
- mp->b_rptr -= ire_fp_mp_len;
- }
+ /*
+ * Setup header length and prepare for ULP checksum done in IP.
+ * udp_build_hdr_template has already massaged any routing header
+ * and placed the result in conn_sum.
+ *
+ * We make it easy for IP to include our pseudo header
+ * by putting our length in uha_checksum.
+ */
+ cksum = pktlen - ip_hdr_length;
+ udpha->uha_length = htons(cksum);
- if (mp == NULL)
- goto bail;
+ cksum += connp->conn_sum;
+ cksum = (cksum >> 16) + (cksum & 0xFFFF);
+ ASSERT(cksum < 0x10000);
- DTRACE_IP7(send, mblk_t *, mp, conn_t *, NULL,
- void_ip_t *, ipha, __dtrace_ipsr_ill_t *, ill,
- ipha_t *, ipha, ip6_t *, NULL, int, 0);
+ ipp = &connp->conn_xmit_ipp;
+ if (ixa->ixa_flags & IXAF_IS_IPV4) {
+ ipha_t *ipha = (ipha_t *)iph;
- if (direct_send) {
- uintptr_t cookie;
- ill_dld_direct_t *idd = &ill->ill_dld_capab->idc_direct;
+ ipha->ipha_length = htons((uint16_t)pktlen);
- cookie = idd->idd_tx_df(idd->idd_tx_dh, mp,
- (uintptr_t)connp, 0);
- if (cookie != NULL) {
- idl_tx_list_t *idl_txl;
+ /* IP does the checksum if uha_checksum is non-zero */
+ if (us->us_do_checksum)
+ udpha->uha_checksum = htons(cksum);
- /*
- * Flow controlled.
- */
- DTRACE_PROBE2(non__null__cookie, uintptr_t,
- cookie, conn_t *, connp);
- idl_txl = &ipst->ips_idl_tx_list[IDLHASHINDEX(cookie)];
- mutex_enter(&idl_txl->txl_lock);
- /*
- * Check again after holding txl_lock to see if Tx
- * ring is still blocked and only then insert the
- * connp into the drain list.
- */
- if (connp->conn_direct_blocked ||
- (idd->idd_tx_fctl_df(idd->idd_tx_fctl_dh,
- cookie) == 0)) {
- mutex_exit(&idl_txl->txl_lock);
- goto bail;
- }
- if (idl_txl->txl_cookie != NULL &&
- idl_txl->txl_cookie != cookie) {
- DTRACE_PROBE2(udp__xmit__collision,
- uintptr_t, cookie,
- uintptr_t, idl_txl->txl_cookie);
- UDP_STAT(us, udp_cookie_coll);
- } else {
- connp->conn_direct_blocked = B_TRUE;
- idl_txl->txl_cookie = cookie;
- conn_drain_insert(connp, idl_txl);
- DTRACE_PROBE1(udp__xmit__insert,
- conn_t *, connp);
- }
- mutex_exit(&idl_txl->txl_lock);
+ /* if IP_PKTINFO specified an addres it wins over bind() */
+ if ((ipp->ipp_fields & IPPF_ADDR) &&
+ IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) {
+ ASSERT(ipp->ipp_addr_v4 != INADDR_ANY);
+ ipha->ipha_src = ipp->ipp_addr_v4;
+ } else {
+ IN6_V4MAPPED_TO_IPADDR(v6src, ipha->ipha_src);
}
} else {
- DTRACE_PROBE1(udp__xmit__putnext, mblk_t *, mp);
- putnext(ire->ire_stq, mp);
- }
-bail:
- IRE_REFRELE(ire);
-}
+ ip6_t *ip6h = (ip6_t *)iph;
-static boolean_t
-udp_update_label_v6(queue_t *wq, mblk_t *mp, in6_addr_t *dst)
-{
- udp_t *udp = Q_TO_UDP(wq);
- int err;
- cred_t *cred;
- cred_t *orig_cred;
- cred_t *effective_cred = NULL;
- uchar_t opt_storage[TSOL_MAX_IPV6_OPTION];
- udp_stack_t *us = udp->udp_us;
-
- /*
- * All Solaris components should pass a db_credp
- * for this message, hence we ASSERT.
- * On production kernels we return an error to be robust against
- * random streams modules sitting on top of us.
- */
- cred = orig_cred = msg_getcred(mp, NULL);
- ASSERT(cred != NULL);
- if (cred == NULL)
- return (EINVAL);
-
- /*
- * Verify the destination is allowed to receive packets at
- * the security label of the message data. tsol_check_dest()
- * may create a new effective cred for this message with a
- * modified label or label flags. Note that we use the
- * cred/label from the message to handle MLP.
- */
- if ((err = tsol_check_dest(cred, dst, IPV6_VERSION,
- udp->udp_connp->conn_mac_mode, &effective_cred)) != 0)
- goto done;
- if (effective_cred != NULL)
- cred = effective_cred;
-
- /*
- * Calculate the security label to be placed in the text
- * of the message (if any).
- */
- if ((err = tsol_compute_label_v6(cred, dst, opt_storage,
- us->us_netstack->netstack_ip)) != 0)
- goto done;
-
- /*
- * Insert the security label in the cached ip options,
- * removing any old label that may exist.
- */
- if ((err = tsol_update_sticky(&udp->udp_sticky_ipp,
- &udp->udp_label_len_v6, opt_storage)) != 0)
- goto done;
+ ip6h->ip6_plen = htons((uint16_t)(pktlen - IPV6_HDR_LEN));
+ udpha->uha_checksum = htons(cksum);
- /*
- * Save the destination address and cred we used to
- * generate the security label text.
- */
- if (cred != udp->udp_effective_cred) {
- if (udp->udp_effective_cred != NULL)
- crfree(udp->udp_effective_cred);
- crhold(cred);
- udp->udp_effective_cred = cred;
- }
- if (orig_cred != udp->udp_last_cred) {
- if (udp->udp_last_cred != NULL)
- crfree(udp->udp_last_cred);
- crhold(orig_cred);
- udp->udp_last_cred = orig_cred;
+ /* if IP_PKTINFO specified an addres it wins over bind() */
+ if ((ipp->ipp_fields & IPPF_ADDR) &&
+ !IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) {
+ ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&ipp->ipp_addr));
+ ip6h->ip6_src = ipp->ipp_addr;
+ } else {
+ ip6h->ip6_src = *v6src;
+ }
+ ip6h->ip6_vcf =
+ (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) |
+ (flowinfo & ~IPV6_VERS_AND_FLOW_MASK);
+ if (ipp->ipp_fields & IPPF_TCLASS) {
+ /* Overrides the class part of flowinfo */
+ ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf,
+ ipp->ipp_tclass);
+ }
}
-done:
- if (effective_cred != NULL)
- crfree(effective_cred);
+ /* Insert all-0s SPI now. */
+ if (insert_spi)
+ *((uint32_t *)(udpha + 1)) = 0;
- if (err != 0) {
- DTRACE_PROBE4(
- tx__ip__log__drop__updatelabel__udp6,
- char *, "queue(1) failed to update options(2) on mp(3)",
- queue_t *, wq, char *, opt_storage, mblk_t *, mp);
- }
- return (err);
+ udpha->uha_dst_port = dstport;
+ return (mp);
}
-static int
-udp_send_connected(conn_t *connp, mblk_t *mp, struct nmsghdr *msg, cred_t *cr,
- pid_t pid)
+/*
+ * Send a T_UDERR_IND in response to an M_DATA
+ */
+static void
+udp_ud_err_connected(conn_t *connp, t_scalar_t error)
{
- udp_t *udp = connp->conn_udp;
- udp_stack_t *us = udp->udp_us;
- ipaddr_t v4dst;
- in_port_t dstport;
- boolean_t mapped_addr;
struct sockaddr_storage ss;
sin_t *sin;
sin6_t *sin6;
struct sockaddr *addr;
socklen_t addrlen;
- int error;
- boolean_t insert_spi = udp->udp_nat_t_endpoint;
-
- /* M_DATA for connected socket */
-
- ASSERT(udp->udp_issocket);
- UDP_DBGSTAT(us, udp_data_conn);
+ mblk_t *mp1;
mutex_enter(&connp->conn_lock);
- if (udp->udp_state != TS_DATA_XFER) {
- mutex_exit(&connp->conn_lock);
- BUMP_MIB(&us->us_udp_mib, udpOutErrors);
- UDP_STAT(us, udp_out_err_notconn);
- freemsg(mp);
- TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END,
- "udp_wput_end: connp %p (%S)", connp,
- "not-connected; address required");
- return (EDESTADDRREQ);
- }
-
- mapped_addr = IN6_IS_ADDR_V4MAPPED(&udp->udp_v6dst);
- if (mapped_addr)
- IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6dst, v4dst);
-
/* Initialize addr and addrlen as if they're passed in */
- if (udp->udp_family == AF_INET) {
+ if (connp->conn_family == AF_INET) {
sin = (sin_t *)&ss;
+ *sin = sin_null;
sin->sin_family = AF_INET;
- dstport = sin->sin_port = udp->udp_dstport;
- ASSERT(mapped_addr);
- sin->sin_addr.s_addr = v4dst;
+ sin->sin_port = connp->conn_fport;
+ sin->sin_addr.s_addr = connp->conn_faddr_v4;
addr = (struct sockaddr *)sin;
addrlen = sizeof (*sin);
} else {
sin6 = (sin6_t *)&ss;
+ *sin6 = sin6_null;
sin6->sin6_family = AF_INET6;
- dstport = sin6->sin6_port = udp->udp_dstport;
- sin6->sin6_flowinfo = udp->udp_flowinfo;
- sin6->sin6_addr = udp->udp_v6dst;
- sin6->sin6_scope_id = 0;
+ sin6->sin6_port = connp->conn_fport;
+ sin6->sin6_flowinfo = connp->conn_flowinfo;
+ sin6->sin6_addr = connp->conn_faddr_v6;
+ if (IN6_IS_ADDR_LINKSCOPE(&connp->conn_faddr_v6) &&
+ (connp->conn_ixa->ixa_flags & IXAF_SCOPEID_SET)) {
+ sin6->sin6_scope_id = connp->conn_ixa->ixa_scopeid;
+ } else {
+ sin6->sin6_scope_id = 0;
+ }
sin6->__sin6_src_id = 0;
addr = (struct sockaddr *)sin6;
addrlen = sizeof (*sin6);
}
mutex_exit(&connp->conn_lock);
- if (mapped_addr) {
- /*
- * Handle both AF_INET and AF_INET6; the latter
- * for IPV4 mapped destination addresses. Note
- * here that both addr and addrlen point to the
- * corresponding struct depending on the address
- * family of the socket.
- */
- mp = udp_output_v4(connp, mp, v4dst, dstport, 0, &error,
- insert_spi, msg, cr, pid);
- } else {
- mp = udp_output_v6(connp, mp, sin6, &error, msg, cr, pid);
- }
- if (error == 0) {
- ASSERT(mp == NULL);
- return (0);
- }
-
- UDP_STAT(us, udp_out_err_output);
- ASSERT(mp != NULL);
- if (IPCL_IS_NONSTR(connp)) {
- freemsg(mp);
- return (error);
- } else {
- /* mp is freed by the following routine */
- udp_ud_err(connp->conn_wq, mp, (uchar_t *)addr,
- (t_scalar_t)addrlen, (t_scalar_t)error);
- return (0);
- }
-}
-
-/* ARGSUSED */
-static int
-udp_send_not_connected(conn_t *connp, mblk_t *mp, struct sockaddr *addr,
- socklen_t addrlen, struct nmsghdr *msg, cred_t *cr, pid_t pid)
-{
-
- udp_t *udp = connp->conn_udp;
- boolean_t insert_spi = udp->udp_nat_t_endpoint;
- int error = 0;
- sin6_t *sin6;
- sin_t *sin;
- uint_t srcid;
- uint16_t port;
- ipaddr_t v4dst;
-
-
- ASSERT(addr != NULL);
-
- switch (udp->udp_family) {
- case AF_INET6:
- sin6 = (sin6_t *)addr;
- if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
- /*
- * Destination is a non-IPv4-compatible IPv6 address.
- * Send out an IPv6 format packet.
- */
- mp = udp_output_v6(connp, mp, sin6, &error, msg, cr,
- pid);
- if (error != 0)
- goto ud_error;
-
- return (0);
- }
- /*
- * If the local address is not zero or a mapped address
- * return an error. It would be possible to send an IPv4
- * packet but the response would never make it back to the
- * application since it is bound to a non-mapped address.
- */
- if (!IN6_IS_ADDR_V4MAPPED(&udp->udp_v6src) &&
- !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) {
- error = EADDRNOTAVAIL;
- goto ud_error;
- }
- /* Send IPv4 packet without modifying udp_ipversion */
- /* Extract port and ipaddr */
- port = sin6->sin6_port;
- IN6_V4MAPPED_TO_IPADDR(&sin6->sin6_addr, v4dst);
- srcid = sin6->__sin6_src_id;
- break;
-
- case AF_INET:
- sin = (sin_t *)addr;
- /* Extract port and ipaddr */
- port = sin->sin_port;
- v4dst = sin->sin_addr.s_addr;
- srcid = 0;
- break;
- }
-
- mp = udp_output_v4(connp, mp, v4dst, port, srcid, &error, insert_spi,
- msg, cr, pid);
-
- if (error == 0) {
- ASSERT(mp == NULL);
- return (0);
- }
-
-ud_error:
- ASSERT(mp != NULL);
-
- return (error);
+ mp1 = mi_tpi_uderror_ind((char *)addr, addrlen, NULL, 0, error);
+ if (mp1 != NULL)
+ putnext(connp->conn_rq, mp1);
}
/*
@@ -5788,15 +3804,20 @@ ud_error:
void
udp_wput(queue_t *q, mblk_t *mp)
{
+ sin6_t *sin6;
+ sin_t *sin = NULL;
+ uint_t srcid;
conn_t *connp = Q_TO_CONN(q);
udp_t *udp = connp->conn_udp;
int error = 0;
- struct sockaddr *addr;
+ struct sockaddr *addr = NULL;
socklen_t addrlen;
udp_stack_t *us = udp->udp_us;
-
- TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_START,
- "udp_wput_start: queue %p mp %p", q, mp);
+ struct T_unitdata_req *tudr;
+ mblk_t *data_mp;
+ ushort_t ipversion;
+ cred_t *cr;
+ pid_t pid;
/*
* We directly handle several cases here: T_UNITDATA_REQ message
@@ -5805,910 +3826,612 @@ udp_wput(queue_t *q, mblk_t *mp)
*/
switch (DB_TYPE(mp)) {
case M_DATA:
- /*
- * Quick check for error cases. Checks will be done again
- * under the lock later on
- */
if (!udp->udp_issocket || udp->udp_state != TS_DATA_XFER) {
/* Not connected; address is required */
BUMP_MIB(&us->us_udp_mib, udpOutErrors);
+ UDP_DBGSTAT(us, udp_data_notconn);
UDP_STAT(us, udp_out_err_notconn);
freemsg(mp);
- TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END,
- "udp_wput_end: connp %p (%S)", connp,
- "not-connected; address required");
return;
}
- (void) udp_send_connected(connp, mp, NULL, NULL, -1);
+ /*
+ * All Solaris components should pass a db_credp
+ * for this message, hence we ASSERT.
+ * On production kernels we return an error to be robust against
+ * random streams modules sitting on top of us.
+ */
+ cr = msg_getcred(mp, &pid);
+ ASSERT(cr != NULL);
+ if (cr == NULL) {
+ BUMP_MIB(&us->us_udp_mib, udpOutErrors);
+ freemsg(mp);
+ return;
+ }
+ ASSERT(udp->udp_issocket);
+ UDP_DBGSTAT(us, udp_data_conn);
+ error = udp_output_connected(connp, mp, cr, pid);
+ if (error != 0) {
+ UDP_STAT(us, udp_out_err_output);
+ if (connp->conn_rq != NULL)
+ udp_ud_err_connected(connp, (t_scalar_t)error);
+#ifdef DEBUG
+ printf("udp_output_connected returned %d\n", error);
+#endif
+ }
return;
case M_PROTO:
- case M_PCPROTO: {
- struct T_unitdata_req *tudr;
-
- ASSERT((uintptr_t)MBLKL(mp) <= (uintptr_t)INT_MAX);
+ case M_PCPROTO:
tudr = (struct T_unitdata_req *)mp->b_rptr;
-
- /* Handle valid T_UNITDATA_REQ here */
- if (MBLKL(mp) >= sizeof (*tudr) &&
- ((t_primp_t)mp->b_rptr)->type == T_UNITDATA_REQ) {
- if (mp->b_cont == NULL) {
- TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END,
- "udp_wput_end: q %p (%S)", q, "badaddr");
- error = EPROTO;
- goto ud_error;
- }
-
- if (!MBLKIN(mp, 0, tudr->DEST_offset +
- tudr->DEST_length)) {
- TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END,
- "udp_wput_end: q %p (%S)", q, "badaddr");
- error = EADDRNOTAVAIL;
- goto ud_error;
- }
- /*
- * If a port has not been bound to the stream, fail.
- * This is not a problem when sockfs is directly
- * above us, because it will ensure that the socket
- * is first bound before allowing data to be sent.
- */
- if (udp->udp_state == TS_UNBND) {
- TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END,
- "udp_wput_end: q %p (%S)", q, "outstate");
- error = EPROTO;
- goto ud_error;
- }
- addr = (struct sockaddr *)
- &mp->b_rptr[tudr->DEST_offset];
- addrlen = tudr->DEST_length;
- if (tudr->OPT_length != 0)
- UDP_STAT(us, udp_out_opt);
- break;
+ if (MBLKL(mp) < sizeof (*tudr) ||
+ ((t_primp_t)mp->b_rptr)->type != T_UNITDATA_REQ) {
+ udp_wput_other(q, mp);
+ return;
}
- /* FALLTHRU */
- }
+ break;
+
default:
udp_wput_other(q, mp);
return;
}
- ASSERT(addr != NULL);
- error = udp_send_not_connected(connp, mp, addr, addrlen, NULL, NULL,
- -1);
- if (error != 0) {
-ud_error:
- UDP_STAT(us, udp_out_err_output);
- ASSERT(mp != NULL);
- /* mp is freed by the following routine */
- udp_ud_err(q, mp, (uchar_t *)addr, (t_scalar_t)addrlen,
- (t_scalar_t)error);
+ /* Handle valid T_UNITDATA_REQ here */
+ data_mp = mp->b_cont;
+ if (data_mp == NULL) {
+ error = EPROTO;
+ goto ud_error2;
}
-}
+ mp->b_cont = NULL;
-/* ARGSUSED */
-static void
-udp_wput_fallback(queue_t *wq, mblk_t *mp)
-{
-#ifdef DEBUG
- cmn_err(CE_CONT, "udp_wput_fallback: Message in fallback \n");
-#endif
- freemsg(mp);
-}
-
-
-/*
- * udp_output_v6():
- * Assumes that udp_wput did some sanity checking on the destination
- * address.
- */
-static mblk_t *
-udp_output_v6(conn_t *connp, mblk_t *mp, sin6_t *sin6, int *error,
- struct nmsghdr *msg, cred_t *cr, pid_t pid)
-{
- ip6_t *ip6h;
- ip6i_t *ip6i; /* mp1->b_rptr even if no ip6i_t */
- mblk_t *mp1 = mp;
- mblk_t *mp2;
- int udp_ip_hdr_len = IPV6_HDR_LEN + UDPH_SIZE;
- size_t ip_len;
- udpha_t *udph;
- udp_t *udp = connp->conn_udp;
- udp_stack_t *us = udp->udp_us;
- queue_t *q = connp->conn_wq;
- ip6_pkt_t ipp_s; /* For ancillary data options */
- ip6_pkt_t *ipp = &ipp_s;
- ip6_pkt_t *tipp; /* temporary ipp */
- uint32_t csum = 0;
- uint_t ignore = 0;
- uint_t option_exists = 0, is_sticky = 0;
- uint8_t *cp;
- uint8_t *nxthdr_ptr;
- in6_addr_t ip6_dst;
- in_port_t port;
- udpattrs_t attrs;
- boolean_t opt_present;
- ip6_hbh_t *hopoptsptr = NULL;
- uint_t hopoptslen = 0;
- boolean_t is_ancillary = B_FALSE;
- size_t sth_wroff = 0;
- ire_t *ire;
- boolean_t update_lastdst = B_FALSE;
-
- *error = 0;
-
- /*
- * If the local address is a mapped address return
- * an error.
- * It would be possible to send an IPv6 packet but the
- * response would never make it back to the application
- * since it is bound to a mapped address.
- */
- if (IN6_IS_ADDR_V4MAPPED(&udp->udp_v6src)) {
- *error = EADDRNOTAVAIL;
- goto done;
+ if (!MBLKIN(mp, 0, tudr->DEST_offset + tudr->DEST_length)) {
+ error = EADDRNOTAVAIL;
+ goto ud_error2;
}
- ipp->ipp_fields = 0;
- ipp->ipp_sticky_ignored = 0;
-
/*
- * If TPI options passed in, feed it for verification and handling
+ * All Solaris components should pass a db_credp
+ * for this TPI message, hence we should ASSERT.
+ * However, RPC (svc_clts_ksend) does this odd thing where it
+ * passes the options from a T_UNITDATA_IND unchanged in a
+ * T_UNITDATA_REQ. While that is the right thing to do for
+ * some options, SCM_UCRED being the key one, this also makes it
+ * pass down IP_RECVDSTADDR. Hence we can't ASSERT here.
*/
- attrs.udpattr_credset = B_FALSE;
- opt_present = B_FALSE;
- if (IPCL_IS_NONSTR(connp)) {
- if (msg->msg_controllen != 0) {
- attrs.udpattr_ipp6 = ipp;
- attrs.udpattr_mb = mp;
-
- rw_enter(&udp->udp_rwlock, RW_WRITER);
- *error = process_auxiliary_options(connp,
- msg->msg_control, msg->msg_controllen,
- &attrs, &udp_opt_obj, udp_opt_set, cr);
- rw_exit(&udp->udp_rwlock);
- if (*error)
- goto done;
- ASSERT(*error == 0);
- opt_present = B_TRUE;
- }
- } else {
- if (DB_TYPE(mp) != M_DATA) {
- mp1 = mp->b_cont;
- if (((struct T_unitdata_req *)
- mp->b_rptr)->OPT_length != 0) {
- attrs.udpattr_ipp6 = ipp;
- attrs.udpattr_mb = mp;
- if (udp_unitdata_opt_process(q, mp, error,
- &attrs) < 0) {
- goto done;
- }
- ASSERT(*error == 0);
- opt_present = B_TRUE;
- }
- }
+ cr = msg_getcred(mp, &pid);
+ if (cr == NULL) {
+ cr = connp->conn_cred;
+ pid = connp->conn_cpid;
}
/*
- * Determine whether we need to mark the mblk with the user's
- * credentials.
- * If labeled then sockfs would have already done this.
+ * If a port has not been bound to the stream, fail.
+ * This is not a problem when sockfs is directly
+ * above us, because it will ensure that the socket
+ * is first bound before allowing data to be sent.
*/
- ASSERT(!is_system_labeled() || msg_getcred(mp, NULL) != NULL);
- ire = connp->conn_ire_cache;
- if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr) || (ire == NULL) ||
- (!IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, &sin6->sin6_addr)) ||
- (ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK))) {
- if (cr != NULL && msg_getcred(mp, NULL) == NULL)
- mblk_setcred(mp, cr, pid);
- }
-
- rw_enter(&udp->udp_rwlock, RW_READER);
- ignore = ipp->ipp_sticky_ignored;
-
- /* mp1 points to the M_DATA mblk carrying the packet */
- ASSERT(mp1 != NULL && DB_TYPE(mp1) == M_DATA);
-
- if (sin6->sin6_scope_id != 0 &&
- IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) {
- /*
- * IPPF_SCOPE_ID is special. It's neither a sticky
- * option nor ancillary data. It needs to be
- * explicitly set in options_exists.
- */
- option_exists |= IPPF_SCOPE_ID;
+ if (udp->udp_state == TS_UNBND) {
+ error = EPROTO;
+ goto ud_error2;
}
+ addr = (struct sockaddr *)&mp->b_rptr[tudr->DEST_offset];
+ addrlen = tudr->DEST_length;
- /*
- * Compute the destination address
- */
- ip6_dst = sin6->sin6_addr;
- if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
- ip6_dst = ipv6_loopback;
-
- port = sin6->sin6_port;
-
- /*
- * Cluster and TSOL notes, Cluster check:
- * see comments in udp_output_v4().
- */
- mutex_enter(&connp->conn_lock);
-
- if (cl_inet_connect2 != NULL &&
- (!IN6_ARE_ADDR_EQUAL(&ip6_dst, &udp->udp_v6lastdst) ||
- port != udp->udp_lastdstport)) {
- mutex_exit(&connp->conn_lock);
- *error = 0;
- CL_INET_UDP_CONNECT(connp, udp, B_TRUE, &ip6_dst, port, *error);
- if (*error != 0) {
- *error = EHOSTUNREACH;
- rw_exit(&udp->udp_rwlock);
- goto done;
+ switch (connp->conn_family) {
+ case AF_INET6:
+ sin6 = (sin6_t *)addr;
+ if (!OK_32PTR((char *)sin6) || (addrlen != sizeof (sin6_t)) ||
+ (sin6->sin6_family != AF_INET6)) {
+ error = EADDRNOTAVAIL;
+ goto ud_error2;
}
- update_lastdst = B_TRUE;
- mutex_enter(&connp->conn_lock);
- }
- /*
- * If we're not going to the same destination as last time, then
- * recompute the label required. This is done in a separate routine to
- * avoid blowing up our stack here.
- *
- * TSOL Note: Since we are not in WRITER mode, UDP packets
- * to different destination may require different labels,
- * or worse, UDP packets to same IP address may require
- * different labels due to use of shared all-zones address.
- * We use conn_lock to ensure that lastdst, sticky ipp_hopopts,
- * and sticky ipp_hopoptslen are consistent for the current
- * destination and are updated atomically.
- */
- if (is_system_labeled()) {
- cred_t *credp;
- pid_t cpid;
+ srcid = sin6->__sin6_src_id;
+ if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
+ /*
+ * Destination is a non-IPv4-compatible IPv6 address.
+ * Send out an IPv6 format packet.
+ */
- /* Using UDP MLP requires SCM_UCRED from user */
- if (connp->conn_mlp_type != mlptSingle &&
- !attrs.udpattr_credset) {
- DTRACE_PROBE4(
- tx__ip__log__info__output__udp6,
- char *, "MLP mp(1) lacks SCM_UCRED attr(2) on q(3)",
- mblk_t *, mp1, udpattrs_t *, &attrs, queue_t *, q);
- *error = EINVAL;
- rw_exit(&udp->udp_rwlock);
- mutex_exit(&connp->conn_lock);
- goto done;
- }
- /*
- * update label option for this UDP socket if
- * - the destination has changed,
- * - the UDP socket is MLP, or
- * - the cred attached to the mblk changed.
- */
- credp = msg_getcred(mp, &cpid);
- if (opt_present ||
- !IN6_ARE_ADDR_EQUAL(&udp->udp_v6lastdst, &ip6_dst) ||
- connp->conn_mlp_type != mlptSingle ||
- credp != udp->udp_last_cred) {
- if ((*error = udp_update_label_v6(q, mp, &ip6_dst))
- != 0) {
- rw_exit(&udp->udp_rwlock);
- mutex_exit(&connp->conn_lock);
- goto done;
+ /*
+ * If the local address is a mapped address return
+ * an error.
+ * It would be possible to send an IPv6 packet but the
+ * response would never make it back to the application
+ * since it is bound to a mapped address.
+ */
+ if (IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6)) {
+ error = EADDRNOTAVAIL;
+ goto ud_error2;
}
- update_lastdst = B_TRUE;
- }
- /*
- * Attach the effective cred to the mblk to ensure future
- * routing decisions will be based on it's label.
- */
- mblk_setcred(mp, udp->udp_effective_cred, cpid);
- }
- if (update_lastdst) {
- udp->udp_v6lastdst = ip6_dst;
- udp->udp_lastdstport = port;
- }
+ UDP_DBGSTAT(us, udp_out_ipv6);
- /*
- * If there's a security label here, then we ignore any options the
- * user may try to set. We keep the peer's label as a hidden sticky
- * option. We make a private copy of this label before releasing the
- * lock so that label is kept consistent with the destination addr.
- */
- if (udp->udp_label_len_v6 > 0) {
- ignore &= ~IPPF_HOPOPTS;
- ipp->ipp_fields &= ~IPPF_HOPOPTS;
- }
+ if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
+ sin6->sin6_addr = ipv6_loopback;
+ ipversion = IPV6_VERSION;
+ } else {
+ if (connp->conn_ipv6_v6only) {
+ error = EADDRNOTAVAIL;
+ goto ud_error2;
+ }
- if ((udp->udp_sticky_ipp.ipp_fields == 0) && (ipp->ipp_fields == 0)) {
- /* No sticky options nor ancillary data. */
- mutex_exit(&connp->conn_lock);
- goto no_options;
- }
+ /*
+ * If the local address is not zero or a mapped address
+ * return an error. It would be possible to send an
+ * IPv4 packet but the response would never make it
+ * back to the application since it is bound to a
+ * non-mapped address.
+ */
+ if (!IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6) &&
+ !IN6_IS_ADDR_UNSPECIFIED(&connp->conn_saddr_v6)) {
+ error = EADDRNOTAVAIL;
+ goto ud_error2;
+ }
+ UDP_DBGSTAT(us, udp_out_mapped);
- /*
- * Go through the options figuring out where each is going to
- * come from and build two masks. The first mask indicates if
- * the option exists at all. The second mask indicates if the
- * option is sticky or ancillary.
- */
- if (!(ignore & IPPF_HOPOPTS)) {
- if (ipp->ipp_fields & IPPF_HOPOPTS) {
- option_exists |= IPPF_HOPOPTS;
- udp_ip_hdr_len += ipp->ipp_hopoptslen;
- } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_HOPOPTS) {
- option_exists |= IPPF_HOPOPTS;
- is_sticky |= IPPF_HOPOPTS;
- ASSERT(udp->udp_sticky_ipp.ipp_hopoptslen != 0);
- hopoptsptr = kmem_alloc(
- udp->udp_sticky_ipp.ipp_hopoptslen, KM_NOSLEEP);
- if (hopoptsptr == NULL) {
- *error = ENOMEM;
- mutex_exit(&connp->conn_lock);
- goto done;
+ if (V4_PART_OF_V6(sin6->sin6_addr) == INADDR_ANY) {
+ V4_PART_OF_V6(sin6->sin6_addr) =
+ htonl(INADDR_LOOPBACK);
}
- hopoptslen = udp->udp_sticky_ipp.ipp_hopoptslen;
- bcopy(udp->udp_sticky_ipp.ipp_hopopts, hopoptsptr,
- hopoptslen);
- udp_ip_hdr_len += hopoptslen;
+ ipversion = IPV4_VERSION;
}
- }
- mutex_exit(&connp->conn_lock);
- if (!(ignore & IPPF_RTHDR)) {
- if (ipp->ipp_fields & IPPF_RTHDR) {
- option_exists |= IPPF_RTHDR;
- udp_ip_hdr_len += ipp->ipp_rthdrlen;
- } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_RTHDR) {
- option_exists |= IPPF_RTHDR;
- is_sticky |= IPPF_RTHDR;
- udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_rthdrlen;
- }
- }
+ if (tudr->OPT_length != 0) {
+ /*
+ * If we are connected then the destination needs to be
+ * the same as the connected one.
+ */
+ if (udp->udp_state == TS_DATA_XFER &&
+ !conn_same_as_last_v6(connp, sin6)) {
+ error = EISCONN;
+ goto ud_error2;
+ }
+ UDP_STAT(us, udp_out_opt);
+ error = udp_output_ancillary(connp, NULL, sin6,
+ data_mp, mp, NULL, cr, pid);
+ } else {
+ ip_xmit_attr_t *ixa;
- if (!(ignore & IPPF_RTDSTOPTS) && (option_exists & IPPF_RTHDR)) {
- if (ipp->ipp_fields & IPPF_RTDSTOPTS) {
- option_exists |= IPPF_RTDSTOPTS;
- udp_ip_hdr_len += ipp->ipp_rtdstoptslen;
- } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_RTDSTOPTS) {
- option_exists |= IPPF_RTDSTOPTS;
- is_sticky |= IPPF_RTDSTOPTS;
- udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_rtdstoptslen;
+ /*
+ * We have to allocate an ip_xmit_attr_t before we grab
+ * conn_lock and we need to hold conn_lock once we've
+ * checked conn_same_as_last_v6 to handle concurrent
+ * send* calls on a socket.
+ */
+ ixa = conn_get_ixa(connp, B_FALSE);
+ if (ixa == NULL) {
+ error = ENOMEM;
+ goto ud_error2;
+ }
+ mutex_enter(&connp->conn_lock);
+
+ if (conn_same_as_last_v6(connp, sin6) &&
+ connp->conn_lastsrcid == srcid &&
+ ipsec_outbound_policy_current(ixa)) {
+ UDP_DBGSTAT(us, udp_out_lastdst);
+ /* udp_output_lastdst drops conn_lock */
+ error = udp_output_lastdst(connp, data_mp, cr,
+ pid, ixa);
+ } else {
+ UDP_DBGSTAT(us, udp_out_diffdst);
+ /* udp_output_newdst drops conn_lock */
+ error = udp_output_newdst(connp, data_mp, NULL,
+ sin6, ipversion, cr, pid, ixa);
+ }
+ ASSERT(MUTEX_NOT_HELD(&connp->conn_lock));
}
- }
-
- if (!(ignore & IPPF_DSTOPTS)) {
- if (ipp->ipp_fields & IPPF_DSTOPTS) {
- option_exists |= IPPF_DSTOPTS;
- udp_ip_hdr_len += ipp->ipp_dstoptslen;
- } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_DSTOPTS) {
- option_exists |= IPPF_DSTOPTS;
- is_sticky |= IPPF_DSTOPTS;
- udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_dstoptslen;
+ if (error == 0) {
+ freeb(mp);
+ return;
}
- }
+ break;
- if (!(ignore & IPPF_IFINDEX)) {
- if (ipp->ipp_fields & IPPF_IFINDEX) {
- option_exists |= IPPF_IFINDEX;
- } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_IFINDEX) {
- option_exists |= IPPF_IFINDEX;
- is_sticky |= IPPF_IFINDEX;
+ case AF_INET:
+ sin = (sin_t *)addr;
+ if ((!OK_32PTR((char *)sin) || addrlen != sizeof (sin_t)) ||
+ (sin->sin_family != AF_INET)) {
+ error = EADDRNOTAVAIL;
+ goto ud_error2;
}
- }
+ UDP_DBGSTAT(us, udp_out_ipv4);
+ if (sin->sin_addr.s_addr == INADDR_ANY)
+ sin->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+ ipversion = IPV4_VERSION;
- if (!(ignore & IPPF_ADDR)) {
- if (ipp->ipp_fields & IPPF_ADDR) {
- option_exists |= IPPF_ADDR;
- } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_ADDR) {
- option_exists |= IPPF_ADDR;
- is_sticky |= IPPF_ADDR;
- }
- }
+ srcid = 0;
+ if (tudr->OPT_length != 0) {
+ /*
+ * If we are connected then the destination needs to be
+ * the same as the connected one.
+ */
+ if (udp->udp_state == TS_DATA_XFER &&
+ !conn_same_as_last_v4(connp, sin)) {
+ error = EISCONN;
+ goto ud_error2;
+ }
+ UDP_STAT(us, udp_out_opt);
+ error = udp_output_ancillary(connp, sin, NULL,
+ data_mp, mp, NULL, cr, pid);
+ } else {
+ ip_xmit_attr_t *ixa;
- if (!(ignore & IPPF_DONTFRAG)) {
- if (ipp->ipp_fields & IPPF_DONTFRAG) {
- option_exists |= IPPF_DONTFRAG;
- } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_DONTFRAG) {
- option_exists |= IPPF_DONTFRAG;
- is_sticky |= IPPF_DONTFRAG;
+ /*
+ * We have to allocate an ip_xmit_attr_t before we grab
+ * conn_lock and we need to hold conn_lock once we've
+ * checked conn_same_as_last_v4 to handle concurrent
+ * send* calls on a socket.
+ */
+ ixa = conn_get_ixa(connp, B_FALSE);
+ if (ixa == NULL) {
+ error = ENOMEM;
+ goto ud_error2;
+ }
+ mutex_enter(&connp->conn_lock);
+
+ if (conn_same_as_last_v4(connp, sin) &&
+ ipsec_outbound_policy_current(ixa)) {
+ UDP_DBGSTAT(us, udp_out_lastdst);
+ /* udp_output_lastdst drops conn_lock */
+ error = udp_output_lastdst(connp, data_mp, cr,
+ pid, ixa);
+ } else {
+ UDP_DBGSTAT(us, udp_out_diffdst);
+ /* udp_output_newdst drops conn_lock */
+ error = udp_output_newdst(connp, data_mp, sin,
+ NULL, ipversion, cr, pid, ixa);
+ }
+ ASSERT(MUTEX_NOT_HELD(&connp->conn_lock));
}
- }
-
- if (!(ignore & IPPF_USE_MIN_MTU)) {
- if (ipp->ipp_fields & IPPF_USE_MIN_MTU) {
- option_exists |= IPPF_USE_MIN_MTU;
- } else if (udp->udp_sticky_ipp.ipp_fields &
- IPPF_USE_MIN_MTU) {
- option_exists |= IPPF_USE_MIN_MTU;
- is_sticky |= IPPF_USE_MIN_MTU;
+ if (error == 0) {
+ freeb(mp);
+ return;
}
+ break;
}
+ UDP_STAT(us, udp_out_err_output);
+ ASSERT(mp != NULL);
+ /* mp is freed by the following routine */
+ udp_ud_err(q, mp, (t_scalar_t)error);
+ return;
- if (!(ignore & IPPF_HOPLIMIT) && (ipp->ipp_fields & IPPF_HOPLIMIT))
- option_exists |= IPPF_HOPLIMIT;
- /* IPV6_HOPLIMIT can never be sticky */
- ASSERT(!(udp->udp_sticky_ipp.ipp_fields & IPPF_HOPLIMIT));
+ud_error2:
+ BUMP_MIB(&us->us_udp_mib, udpOutErrors);
+ freemsg(data_mp);
+ UDP_STAT(us, udp_out_err_output);
+ ASSERT(mp != NULL);
+ /* mp is freed by the following routine */
+ udp_ud_err(q, mp, (t_scalar_t)error);
+}
- if (!(ignore & IPPF_UNICAST_HOPS) &&
- (udp->udp_sticky_ipp.ipp_fields & IPPF_UNICAST_HOPS)) {
- option_exists |= IPPF_UNICAST_HOPS;
- is_sticky |= IPPF_UNICAST_HOPS;
- }
+/*
+ * Handle the case of the IP address, port, flow label being different
+ * for both IPv4 and IPv6.
+ *
+ * NOTE: The caller must hold conn_lock and we drop it here.
+ */
+static int
+udp_output_newdst(conn_t *connp, mblk_t *data_mp, sin_t *sin, sin6_t *sin6,
+ ushort_t ipversion, cred_t *cr, pid_t pid, ip_xmit_attr_t *ixa)
+{
+ uint_t srcid;
+ uint32_t flowinfo;
+ udp_t *udp = connp->conn_udp;
+ int error = 0;
+ ip_xmit_attr_t *oldixa;
+ udp_stack_t *us = udp->udp_us;
+ in6_addr_t v6src;
+ in6_addr_t v6dst;
+ in6_addr_t v6nexthop;
+ in_port_t dstport;
- if (!(ignore & IPPF_MULTICAST_HOPS) &&
- (udp->udp_sticky_ipp.ipp_fields & IPPF_MULTICAST_HOPS)) {
- option_exists |= IPPF_MULTICAST_HOPS;
- is_sticky |= IPPF_MULTICAST_HOPS;
- }
+ ASSERT(MUTEX_HELD(&connp->conn_lock));
+ ASSERT(ixa != NULL);
+ /*
+ * We hold conn_lock across all the use and modifications of
+ * the conn_lastdst, conn_ixa, and conn_xmit_ipp to ensure that they
+ * stay consistent.
+ */
- if (!(ignore & IPPF_TCLASS)) {
- if (ipp->ipp_fields & IPPF_TCLASS) {
- option_exists |= IPPF_TCLASS;
- } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_TCLASS) {
- option_exists |= IPPF_TCLASS;
- is_sticky |= IPPF_TCLASS;
- }
+ ASSERT(cr != NULL);
+ ixa->ixa_cred = cr;
+ ixa->ixa_cpid = pid;
+ if (is_system_labeled()) {
+ /* We need to restart with a label based on the cred */
+ ip_xmit_attr_restore_tsl(ixa, ixa->ixa_cred);
}
- if (!(ignore & IPPF_NEXTHOP) &&
- (udp->udp_sticky_ipp.ipp_fields & IPPF_NEXTHOP)) {
- option_exists |= IPPF_NEXTHOP;
- is_sticky |= IPPF_NEXTHOP;
+ /*
+ * If we are connected then the destination needs to be the
+ * same as the connected one, which is not the case here since we
+ * checked for that above.
+ */
+ if (udp->udp_state == TS_DATA_XFER) {
+ mutex_exit(&connp->conn_lock);
+ error = EISCONN;
+ goto ud_error;
}
-no_options:
+ /* In case previous destination was multicast or multirt */
+ ip_attr_newdst(ixa);
/*
- * If any options carried in the ip6i_t were specified, we
- * need to account for the ip6i_t in the data we'll be sending
- * down.
+ * If laddr is unspecified then we look at sin6_src_id.
+ * We will give precedence to a source address set with IPV6_PKTINFO
+ * (aka IPPF_ADDR) but that is handled in build_hdrs. However, we don't
+ * want ip_attr_connect to select a source (since it can fail) when
+ * IPV6_PKTINFO is specified.
+ * If this doesn't result in a source address then we get a source
+ * from ip_attr_connect() below.
*/
- if (option_exists & IPPF_HAS_IP6I)
- udp_ip_hdr_len += sizeof (ip6i_t);
-
- /* check/fix buffer config, setup pointers into it */
- ip6h = (ip6_t *)&mp1->b_rptr[-udp_ip_hdr_len];
- if (DB_REF(mp1) != 1 || ((unsigned char *)ip6h < DB_BASE(mp1)) ||
- !OK_32PTR(ip6h)) {
-
- /* Try to get everything in a single mblk next time */
- if (udp_ip_hdr_len > udp->udp_max_hdr_len) {
- udp->udp_max_hdr_len = udp_ip_hdr_len;
- sth_wroff = udp->udp_max_hdr_len + us->us_wroff_extra;
+ v6src = connp->conn_saddr_v6;
+ if (sin != NULL) {
+ IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &v6dst);
+ dstport = sin->sin_port;
+ flowinfo = 0;
+ srcid = 0;
+ ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
+ if (srcid != 0 && V4_PART_OF_V6(&v6src) == INADDR_ANY) {
+ ip_srcid_find_id(srcid, &v6src, IPCL_ZONEID(connp),
+ connp->conn_netstack);
}
-
- mp2 = allocb(udp_ip_hdr_len + us->us_wroff_extra, BPRI_LO);
- if (mp2 == NULL) {
- *error = ENOMEM;
- rw_exit(&udp->udp_rwlock);
- goto done;
+ ixa->ixa_flags |= IXAF_IS_IPV4;
+ } else {
+ v6dst = sin6->sin6_addr;
+ dstport = sin6->sin6_port;
+ flowinfo = sin6->sin6_flowinfo;
+ srcid = sin6->__sin6_src_id;
+ if (IN6_IS_ADDR_LINKSCOPE(&v6dst) && sin6->sin6_scope_id != 0) {
+ ixa->ixa_scopeid = sin6->sin6_scope_id;
+ ixa->ixa_flags |= IXAF_SCOPEID_SET;
+ } else {
+ ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
}
- mp2->b_wptr = DB_LIM(mp2);
- mp2->b_cont = mp1;
- mp1 = mp2;
- if (DB_TYPE(mp) != M_DATA)
- mp->b_cont = mp1;
+ if (srcid != 0 && IN6_IS_ADDR_UNSPECIFIED(&v6src)) {
+ ip_srcid_find_id(srcid, &v6src, IPCL_ZONEID(connp),
+ connp->conn_netstack);
+ }
+ if (IN6_IS_ADDR_V4MAPPED(&v6dst))
+ ixa->ixa_flags |= IXAF_IS_IPV4;
else
- mp = mp1;
-
- ip6h = (ip6_t *)(mp1->b_wptr - udp_ip_hdr_len);
+ ixa->ixa_flags &= ~IXAF_IS_IPV4;
}
- mp1->b_rptr = (unsigned char *)ip6h;
- ip6i = (ip6i_t *)ip6h;
-
-#define ANCIL_OR_STICKY_PTR(f) ((is_sticky & f) ? &udp->udp_sticky_ipp : ipp)
- if (option_exists & IPPF_HAS_IP6I) {
- ip6h = (ip6_t *)&ip6i[1];
- ip6i->ip6i_flags = 0;
- ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW;
-
- /* sin6_scope_id takes precendence over IPPF_IFINDEX */
- if (option_exists & IPPF_SCOPE_ID) {
- ip6i->ip6i_flags |= IP6I_IFINDEX;
- ip6i->ip6i_ifindex = sin6->sin6_scope_id;
- } else if (option_exists & IPPF_IFINDEX) {
- tipp = ANCIL_OR_STICKY_PTR(IPPF_IFINDEX);
- ASSERT(tipp->ipp_ifindex != 0);
- ip6i->ip6i_flags |= IP6I_IFINDEX;
- ip6i->ip6i_ifindex = tipp->ipp_ifindex;
- }
-
- if (option_exists & IPPF_ADDR) {
- /*
- * Enable per-packet source address verification if
- * IPV6_PKTINFO specified the source address.
- * ip6_src is set in the transport's _wput function.
- */
- ip6i->ip6i_flags |= IP6I_VERIFY_SRC;
- }
-
- if (option_exists & IPPF_DONTFRAG) {
- ip6i->ip6i_flags |= IP6I_DONTFRAG;
- }
+ /* Handle IPV6_PKTINFO setting source address. */
+ if (IN6_IS_ADDR_UNSPECIFIED(&v6src) &&
+ (connp->conn_xmit_ipp.ipp_fields & IPPF_ADDR)) {
+ ip_pkt_t *ipp = &connp->conn_xmit_ipp;
- if (option_exists & IPPF_USE_MIN_MTU) {
- ip6i->ip6i_flags = IP6I_API_USE_MIN_MTU(
- ip6i->ip6i_flags, ipp->ipp_use_min_mtu);
+ if (ixa->ixa_flags & IXAF_IS_IPV4) {
+ if (IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr))
+ v6src = ipp->ipp_addr;
+ } else {
+ if (!IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr))
+ v6src = ipp->ipp_addr;
}
+ }
- if (option_exists & IPPF_NEXTHOP) {
- tipp = ANCIL_OR_STICKY_PTR(IPPF_NEXTHOP);
- ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_nexthop));
- ip6i->ip6i_flags |= IP6I_NEXTHOP;
- ip6i->ip6i_nexthop = tipp->ipp_nexthop;
- }
+ ip_attr_nexthop(&connp->conn_xmit_ipp, ixa, &v6dst, &v6nexthop);
+ mutex_exit(&connp->conn_lock);
+ error = ip_attr_connect(connp, ixa, &v6src, &v6dst, &v6nexthop, dstport,
+ &v6src, NULL, IPDF_ALLOW_MCBC | IPDF_VERIFY_DST | IPDF_IPSEC);
+ switch (error) {
+ case 0:
+ break;
+ case EADDRNOTAVAIL:
/*
- * tell IP this is an ip6i_t private header
+ * IXAF_VERIFY_SOURCE tells us to pick a better source.
+ * Don't have the application see that errno
*/
- ip6i->ip6i_nxt = IPPROTO_RAW;
- }
-
- /* Initialize IPv6 header */
- ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW;
- bzero(&ip6h->ip6_src, sizeof (ip6h->ip6_src));
-
- /* Set the hoplimit of the outgoing packet. */
- if (option_exists & IPPF_HOPLIMIT) {
- /* IPV6_HOPLIMIT ancillary data overrides all other settings. */
- ip6h->ip6_hops = ipp->ipp_hoplimit;
- ip6i->ip6i_flags |= IP6I_HOPLIMIT;
- } else if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) {
- ip6h->ip6_hops = udp->udp_multicast_ttl;
- if (option_exists & IPPF_MULTICAST_HOPS)
- ip6i->ip6i_flags |= IP6I_HOPLIMIT;
- } else {
- ip6h->ip6_hops = udp->udp_ttl;
- if (option_exists & IPPF_UNICAST_HOPS)
- ip6i->ip6i_flags |= IP6I_HOPLIMIT;
- }
-
- if (option_exists & IPPF_ADDR) {
- tipp = ANCIL_OR_STICKY_PTR(IPPF_ADDR);
- ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_addr));
- ip6h->ip6_src = tipp->ipp_addr;
- } else {
+ error = ENETUNREACH;
+ goto failed;
+ case ENETDOWN:
/*
- * The source address was not set using IPV6_PKTINFO.
- * First look at the bound source.
- * If unspecified fallback to __sin6_src_id.
+ * Have !ipif_addr_ready address; drop packet silently
+ * until we can get applications to not send until we
+ * are ready.
*/
- ip6h->ip6_src = udp->udp_v6src;
- if (sin6->__sin6_src_id != 0 &&
- IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) {
- ip_srcid_find_id(sin6->__sin6_src_id,
- &ip6h->ip6_src, connp->conn_zoneid,
- us->us_netstack);
+ error = 0;
+ goto failed;
+ case EHOSTUNREACH:
+ case ENETUNREACH:
+ if (ixa->ixa_ire != NULL) {
+ /*
+ * Let conn_ip_output/ire_send_noroute return
+ * the error and send any local ICMP error.
+ */
+ error = 0;
+ break;
}
+ /* FALLTHRU */
+ failed:
+ default:
+ goto ud_error;
}
- nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt;
- cp = (uint8_t *)&ip6h[1];
/*
- * Here's where we have to start stringing together
- * any extension headers in the right order:
- * Hop-by-hop, destination, routing, and final destination opts.
+ * Cluster note: we let the cluster hook know that we are sending to a
+ * new address and/or port.
*/
- if (option_exists & IPPF_HOPOPTS) {
- /* Hop-by-hop options */
- ip6_hbh_t *hbh = (ip6_hbh_t *)cp;
- tipp = ANCIL_OR_STICKY_PTR(IPPF_HOPOPTS);
- if (hopoptslen == 0) {
- hopoptsptr = tipp->ipp_hopopts;
- hopoptslen = tipp->ipp_hopoptslen;
- is_ancillary = B_TRUE;
- }
-
- *nxthdr_ptr = IPPROTO_HOPOPTS;
- nxthdr_ptr = &hbh->ip6h_nxt;
-
- bcopy(hopoptsptr, cp, hopoptslen);
- cp += hopoptslen;
-
- if (hopoptsptr != NULL && !is_ancillary) {
- kmem_free(hopoptsptr, hopoptslen);
- hopoptsptr = NULL;
- hopoptslen = 0;
+ if (cl_inet_connect2 != NULL) {
+ CL_INET_UDP_CONNECT(connp, B_TRUE, &v6dst, dstport, error);
+ if (error != 0) {
+ error = EHOSTUNREACH;
+ goto ud_error;
}
}
- /*
- * En-route destination options
- * Only do them if there's a routing header as well
- */
- if (option_exists & IPPF_RTDSTOPTS) {
- ip6_dest_t *dst = (ip6_dest_t *)cp;
- tipp = ANCIL_OR_STICKY_PTR(IPPF_RTDSTOPTS);
-
- *nxthdr_ptr = IPPROTO_DSTOPTS;
- nxthdr_ptr = &dst->ip6d_nxt;
- bcopy(tipp->ipp_rtdstopts, cp, tipp->ipp_rtdstoptslen);
- cp += tipp->ipp_rtdstoptslen;
- }
- /*
- * Routing header next
- */
- if (option_exists & IPPF_RTHDR) {
- ip6_rthdr_t *rt = (ip6_rthdr_t *)cp;
- tipp = ANCIL_OR_STICKY_PTR(IPPF_RTHDR);
-
- *nxthdr_ptr = IPPROTO_ROUTING;
- nxthdr_ptr = &rt->ip6r_nxt;
-
- bcopy(tipp->ipp_rthdr, cp, tipp->ipp_rthdrlen);
- cp += tipp->ipp_rthdrlen;
- }
+ mutex_enter(&connp->conn_lock);
/*
- * Do ultimate destination options
+ * While we dropped the lock some other thread might have connected
+ * this socket. If so we bail out with EISCONN to ensure that the
+ * connecting thread is the one that updates conn_ixa, conn_ht_*
+ * and conn_*last*.
*/
- if (option_exists & IPPF_DSTOPTS) {
- ip6_dest_t *dest = (ip6_dest_t *)cp;
- tipp = ANCIL_OR_STICKY_PTR(IPPF_DSTOPTS);
-
- *nxthdr_ptr = IPPROTO_DSTOPTS;
- nxthdr_ptr = &dest->ip6d_nxt;
-
- bcopy(tipp->ipp_dstopts, cp, tipp->ipp_dstoptslen);
- cp += tipp->ipp_dstoptslen;
+ if (udp->udp_state == TS_DATA_XFER) {
+ mutex_exit(&connp->conn_lock);
+ error = EISCONN;
+ goto ud_error;
}
- /*
- * Now set the last header pointer to the proto passed in
- */
- ASSERT((int)(cp - (uint8_t *)ip6i) == (udp_ip_hdr_len - UDPH_SIZE));
- *nxthdr_ptr = IPPROTO_UDP;
-
- /* Update UDP header */
- udph = (udpha_t *)((uchar_t *)ip6i + udp_ip_hdr_len - UDPH_SIZE);
- udph->uha_dst_port = sin6->sin6_port;
- udph->uha_src_port = udp->udp_port;
/*
- * Copy in the destination address
+ * We need to rebuild the headers if
+ * - we are labeling packets (could be different for different
+ * destinations)
+ * - we have a source route (or routing header) since we need to
+ * massage that to get the pseudo-header checksum
+ * - the IP version is different than the last time
+ * - a socket option with COA_HEADER_CHANGED has been set which
+ * set conn_v6lastdst to zero.
+ *
+ * Otherwise the prepend function will just update the src, dst,
+ * dstport, and flow label.
*/
- ip6h->ip6_dst = ip6_dst;
-
- ip6h->ip6_vcf =
- (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) |
- (sin6->sin6_flowinfo & ~IPV6_VERS_AND_FLOW_MASK);
-
- if (option_exists & IPPF_TCLASS) {
- tipp = ANCIL_OR_STICKY_PTR(IPPF_TCLASS);
- ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf,
- tipp->ipp_tclass);
- }
- rw_exit(&udp->udp_rwlock);
-
- if (option_exists & IPPF_RTHDR) {
- ip6_rthdr_t *rth;
-
+ if (is_system_labeled()) {
+ /* TX MLP requires SCM_UCRED and don't have that here */
+ if (connp->conn_mlp_type != mlptSingle) {
+ mutex_exit(&connp->conn_lock);
+ error = ECONNREFUSED;
+ goto ud_error;
+ }
/*
- * Perform any processing needed for source routing.
- * We know that all extension headers will be in the same mblk
- * as the IPv6 header.
+ * Check whether Trusted Solaris policy allows communication
+ * with this host, and pretend that the destination is
+ * unreachable if not.
+ * Compute any needed label and place it in ipp_label_v4/v6.
+ *
+ * Later conn_build_hdr_template/conn_prepend_hdr takes
+ * ipp_label_v4/v6 to form the packet.
+ *
+ * Tsol note: Since we hold conn_lock we know no other
+ * thread manipulates conn_xmit_ipp.
*/
- rth = ip_find_rthdr_v6(ip6h, mp1->b_wptr);
- if (rth != NULL && rth->ip6r_segleft != 0) {
- if (rth->ip6r_type != IPV6_RTHDR_TYPE_0) {
- /*
- * Drop packet - only support Type 0 routing.
- * Notify the application as well.
- */
- *error = EPROTO;
- goto done;
- }
-
- /*
- * rth->ip6r_len is twice the number of
- * addresses in the header. Thus it must be even.
- */
- if (rth->ip6r_len & 0x1) {
- *error = EPROTO;
- goto done;
- }
- /*
- * Shuffle the routing header and ip6_dst
- * addresses, and get the checksum difference
- * between the first hop (in ip6_dst) and
- * the destination (in the last routing hdr entry).
- */
- csum = ip_massage_options_v6(ip6h, rth,
- us->us_netstack);
- /*
- * Verify that the first hop isn't a mapped address.
- * Routers along the path need to do this verification
- * for subsequent hops.
- */
- if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) {
- *error = EADDRNOTAVAIL;
- goto done;
+ error = conn_update_label(connp, ixa, &v6dst,
+ &connp->conn_xmit_ipp);
+ if (error != 0) {
+ mutex_exit(&connp->conn_lock);
+ goto ud_error;
+ }
+ /* Rebuild the header template */
+ error = udp_build_hdr_template(connp, &v6src, &v6dst, dstport,
+ flowinfo);
+ if (error != 0) {
+ mutex_exit(&connp->conn_lock);
+ goto ud_error;
+ }
+ } else if ((connp->conn_xmit_ipp.ipp_fields &
+ (IPPF_IPV4_OPTIONS|IPPF_RTHDR)) ||
+ ipversion != connp->conn_lastipversion ||
+ IN6_IS_ADDR_UNSPECIFIED(&connp->conn_v6lastdst)) {
+ /* Rebuild the header template */
+ error = udp_build_hdr_template(connp, &v6src, &v6dst, dstport,
+ flowinfo);
+ if (error != 0) {
+ mutex_exit(&connp->conn_lock);
+ goto ud_error;
+ }
+ } else {
+ /* Simply update the destination address if no source route */
+ if (ixa->ixa_flags & IXAF_IS_IPV4) {
+ ipha_t *ipha = (ipha_t *)connp->conn_ht_iphc;
+
+ IN6_V4MAPPED_TO_IPADDR(&v6dst, ipha->ipha_dst);
+ if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF) {
+ ipha->ipha_fragment_offset_and_flags |=
+ IPH_DF_HTONS;
+ } else {
+ ipha->ipha_fragment_offset_and_flags &=
+ ~IPH_DF_HTONS;
}
-
- cp += (rth->ip6r_len + 1)*8;
+ } else {
+ ip6_t *ip6h = (ip6_t *)connp->conn_ht_iphc;
+ ip6h->ip6_dst = v6dst;
}
}
- /* count up length of UDP packet */
- ip_len = (mp1->b_wptr - (unsigned char *)ip6h) - IPV6_HDR_LEN;
- if ((mp2 = mp1->b_cont) != NULL) {
- do {
- ASSERT((uintptr_t)MBLKL(mp2) <= (uintptr_t)UINT_MAX);
- ip_len += (uint32_t)MBLKL(mp2);
- } while ((mp2 = mp2->b_cont) != NULL);
- }
-
/*
- * If the size of the packet is greater than the maximum allowed by
- * ip, return an error. Passing this down could cause panics because
- * the size will have wrapped and be inconsistent with the msg size.
- */
- if (ip_len > IP_MAXPACKET) {
- *error = EMSGSIZE;
- goto done;
- }
-
- /* Store the UDP length. Subtract length of extension hdrs */
- udph->uha_length = htons(ip_len + IPV6_HDR_LEN -
- (int)((uchar_t *)udph - (uchar_t *)ip6h));
-
- /*
- * We make it easy for IP to include our pseudo header
- * by putting our length in uh_checksum, modified (if
- * we have a routing header) by the checksum difference
- * between the ultimate destination and first hop addresses.
- * Note: UDP over IPv6 must always checksum the packet.
+ * Remember the dst/dstport etc which corresponds to the built header
+ * template and conn_ixa.
*/
- csum += udph->uha_length;
- csum = (csum & 0xFFFF) + (csum >> 16);
- udph->uha_checksum = (uint16_t)csum;
-
-#ifdef _LITTLE_ENDIAN
- ip_len = htons(ip_len);
-#endif
- ip6h->ip6_plen = ip_len;
-
- if (DB_TYPE(mp) != M_DATA) {
- cred_t *cr;
- pid_t cpid;
-
- /* Move any cred from the T_UNITDATA_REQ to the packet */
- cr = msg_extractcred(mp, &cpid);
- if (cr != NULL) {
- if (mp1->b_datap->db_credp != NULL)
- crfree(mp1->b_datap->db_credp);
- mp1->b_datap->db_credp = cr;
- mp1->b_datap->db_cpid = cpid;
- }
+ oldixa = conn_replace_ixa(connp, ixa);
+ connp->conn_v6lastdst = v6dst;
+ connp->conn_lastipversion = ipversion;
+ connp->conn_lastdstport = dstport;
+ connp->conn_lastflowinfo = flowinfo;
+ connp->conn_lastscopeid = ixa->ixa_scopeid;
+ connp->conn_lastsrcid = srcid;
+ /* Also remember a source to use together with lastdst */
+ connp->conn_v6lastsrc = v6src;
+
+ data_mp = udp_prepend_header_template(connp, ixa, data_mp, &v6src,
+ dstport, flowinfo, &error);
+
+ /* Done with conn_t */
+ mutex_exit(&connp->conn_lock);
+ ixa_refrele(oldixa);
- ASSERT(mp != mp1);
- freeb(mp);
+ if (data_mp == NULL) {
+ ASSERT(error != 0);
+ goto ud_error;
}
- /* mp has been consumed and we'll return success */
- ASSERT(*error == 0);
- mp = NULL;
-
- /* We're done. Pass the packet to IP */
+ /* We're done. Pass the packet to ip. */
BUMP_MIB(&us->us_udp_mib, udpHCOutDatagrams);
- ip_output_v6(connp, mp1, q, IP_WPUT);
-done:
- if (sth_wroff != 0) {
- (void) proto_set_tx_wroff(RD(q), connp,
- udp->udp_max_hdr_len + us->us_wroff_extra);
- }
- if (hopoptsptr != NULL && !is_ancillary) {
- kmem_free(hopoptsptr, hopoptslen);
- hopoptsptr = NULL;
- }
- if (*error != 0) {
- ASSERT(mp != NULL);
- BUMP_MIB(&us->us_udp_mib, udpOutErrors);
- }
- return (mp);
-}
-
-
-static int
-i_udp_getpeername(udp_t *udp, struct sockaddr *sa, uint_t *salenp)
-{
- sin_t *sin = (sin_t *)sa;
- sin6_t *sin6 = (sin6_t *)sa;
-
- ASSERT(RW_LOCK_HELD(&udp->udp_rwlock));
-
- if (udp->udp_state != TS_DATA_XFER)
- return (ENOTCONN);
-
- switch (udp->udp_family) {
- case AF_INET:
- ASSERT(udp->udp_ipversion == IPV4_VERSION);
-
- if (*salenp < sizeof (sin_t))
- return (EINVAL);
-
- *salenp = sizeof (sin_t);
- *sin = sin_null;
- sin->sin_family = AF_INET;
- sin->sin_port = udp->udp_dstport;
- sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_v6dst);
+ error = conn_ip_output(data_mp, ixa);
+ /* No udpOutErrors if an error since IP increases its error counter */
+ switch (error) {
+ case 0:
break;
-
- case AF_INET6:
- if (*salenp < sizeof (sin6_t))
- return (EINVAL);
-
- *salenp = sizeof (sin6_t);
- *sin6 = sin6_null;
- sin6->sin6_family = AF_INET6;
- sin6->sin6_port = udp->udp_dstport;
- sin6->sin6_addr = udp->udp_v6dst;
- sin6->sin6_flowinfo = udp->udp_flowinfo;
+ case EWOULDBLOCK:
+ (void) ixa_check_drain_insert(connp, ixa);
+ error = 0;
break;
- }
-
- return (0);
-}
-
-static int
-udp_getmyname(udp_t *udp, struct sockaddr *sa, uint_t *salenp)
-{
- sin_t *sin = (sin_t *)sa;
- sin6_t *sin6 = (sin6_t *)sa;
-
- ASSERT(RW_LOCK_HELD(&udp->udp_rwlock));
-
- switch (udp->udp_family) {
- case AF_INET:
- ASSERT(udp->udp_ipversion == IPV4_VERSION);
-
- if (*salenp < sizeof (sin_t))
- return (EINVAL);
-
- *salenp = sizeof (sin_t);
- *sin = sin_null;
- sin->sin_family = AF_INET;
- sin->sin_port = udp->udp_port;
-
+ case EADDRNOTAVAIL:
/*
- * If udp_v6src is unspecified, we might be bound to broadcast
- * / multicast. Use udp_bound_v6src as local address instead
- * (that could also still be unspecified).
+ * IXAF_VERIFY_SOURCE tells us to pick a better source.
+ * Don't have the application see that errno
*/
- if (!IN6_IS_ADDR_V4MAPPED_ANY(&udp->udp_v6src) &&
- !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) {
- sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_v6src);
- } else {
- sin->sin_addr.s_addr =
- V4_PART_OF_V6(udp->udp_bound_v6src);
- }
- break;
-
- case AF_INET6:
- if (*salenp < sizeof (sin6_t))
- return (EINVAL);
-
- *salenp = sizeof (sin6_t);
- *sin6 = sin6_null;
- sin6->sin6_family = AF_INET6;
- sin6->sin6_port = udp->udp_port;
- sin6->sin6_flowinfo = udp->udp_flowinfo;
-
+ error = ENETUNREACH;
+ /* FALLTHRU */
+ default:
+ mutex_enter(&connp->conn_lock);
/*
- * If udp_v6src is unspecified, we might be bound to broadcast
- * / multicast. Use udp_bound_v6src as local address instead
- * (that could also still be unspecified).
+ * Clear the source and v6lastdst so we call ip_attr_connect
+ * for the next packet and try to pick a better source.
*/
- if (!IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src))
- sin6->sin6_addr = udp->udp_v6src;
+ if (connp->conn_mcbc_bind)
+ connp->conn_saddr_v6 = ipv6_all_zeros;
else
- sin6->sin6_addr = udp->udp_bound_v6src;
+ connp->conn_saddr_v6 = connp->conn_bound_addr_v6;
+ connp->conn_v6lastdst = ipv6_all_zeros;
+ mutex_exit(&connp->conn_lock);
break;
}
+ ixa_refrele(ixa);
+ return (error);
- return (0);
+ud_error:
+ if (ixa != NULL)
+ ixa_refrele(ixa);
+
+ freemsg(data_mp);
+ BUMP_MIB(&us->us_udp_mib, udpOutErrors);
+ UDP_STAT(us, udp_out_err_output);
+ return (error);
+}
+
+/* ARGSUSED */
+static void
+udp_wput_fallback(queue_t *wq, mblk_t *mp)
+{
+#ifdef DEBUG
+ cmn_err(CE_CONT, "udp_wput_fallback: Message in fallback \n");
+#endif
+ freemsg(mp);
}
+
/*
* Handle special out-of-band ioctl requests (see PSARC/2008/265).
*/
@@ -6717,7 +4440,8 @@ udp_wput_cmdblk(queue_t *q, mblk_t *mp)
{
void *data;
mblk_t *datamp = mp->b_cont;
- udp_t *udp = Q_TO_UDP(q);
+ conn_t *connp = Q_TO_CONN(q);
+ udp_t *udp = connp->conn_udp;
cmdblk_t *cmdp = (cmdblk_t *)mp->b_rptr;
if (datamp == NULL || MBLKL(datamp) < cmdp->cb_len) {
@@ -6727,19 +4451,23 @@ udp_wput_cmdblk(queue_t *q, mblk_t *mp)
}
data = datamp->b_rptr;
- rw_enter(&udp->udp_rwlock, RW_READER);
+ mutex_enter(&connp->conn_lock);
switch (cmdp->cb_cmd) {
case TI_GETPEERNAME:
- cmdp->cb_error = i_udp_getpeername(udp, data, &cmdp->cb_len);
+ if (udp->udp_state != TS_DATA_XFER)
+ cmdp->cb_error = ENOTCONN;
+ else
+ cmdp->cb_error = conn_getpeername(connp, data,
+ &cmdp->cb_len);
break;
case TI_GETMYNAME:
- cmdp->cb_error = udp_getmyname(udp, data, &cmdp->cb_len);
+ cmdp->cb_error = conn_getsockname(connp, data, &cmdp->cb_len);
break;
default:
cmdp->cb_error = EINVAL;
break;
}
- rw_exit(&udp->udp_rwlock);
+ mutex_exit(&connp->conn_lock);
qreply(q, mp);
}
@@ -6747,10 +4475,11 @@ udp_wput_cmdblk(queue_t *q, mblk_t *mp)
static void
udp_use_pure_tpi(udp_t *udp)
{
- rw_enter(&udp->udp_rwlock, RW_WRITER);
- udp->udp_issocket = B_FALSE;
- rw_exit(&udp->udp_rwlock);
+ conn_t *connp = udp->udp_connp;
+ mutex_enter(&connp->conn_lock);
+ udp->udp_issocket = B_FALSE;
+ mutex_exit(&connp->conn_lock);
UDP_STAT(udp->udp_us, udp_sock_fallback);
}
@@ -6758,20 +4487,13 @@ static void
udp_wput_other(queue_t *q, mblk_t *mp)
{
uchar_t *rptr = mp->b_rptr;
- struct datab *db;
struct iocblk *iocp;
- cred_t *cr;
conn_t *connp = Q_TO_CONN(q);
udp_t *udp = connp->conn_udp;
- udp_stack_t *us;
-
- TRACE_1(TR_FAC_UDP, TR_UDP_WPUT_OTHER_START,
- "udp_wput_other_start: q %p", q);
-
- us = udp->udp_us;
- db = mp->b_datap;
+ udp_stack_t *us = udp->udp_us;
+ cred_t *cr;
- switch (db->db_type) {
+ switch (mp->b_datap->db_type) {
case M_CMD:
udp_wput_cmdblk(q, mp);
return;
@@ -6779,37 +4501,29 @@ udp_wput_other(queue_t *q, mblk_t *mp)
case M_PROTO:
case M_PCPROTO:
if (mp->b_wptr - rptr < sizeof (t_scalar_t)) {
+ /*
+ * If the message does not contain a PRIM_type,
+ * throw it away.
+ */
freemsg(mp);
- TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END,
- "udp_wput_other_end: q %p (%S)", q, "protoshort");
return;
}
switch (((t_primp_t)rptr)->type) {
case T_ADDR_REQ:
udp_addr_req(q, mp);
- TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END,
- "udp_wput_other_end: q %p (%S)", q, "addrreq");
return;
case O_T_BIND_REQ:
case T_BIND_REQ:
udp_tpi_bind(q, mp);
- TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END,
- "udp_wput_other_end: q %p (%S)", q, "bindreq");
return;
case T_CONN_REQ:
udp_tpi_connect(q, mp);
- TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END,
- "udp_wput_other_end: q %p (%S)", q, "connreq");
return;
case T_CAPABILITY_REQ:
udp_capability_req(q, mp);
- TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END,
- "udp_wput_other_end: q %p (%S)", q, "capabreq");
return;
case T_INFO_REQ:
udp_info_req(q, mp);
- TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END,
- "udp_wput_other_end: q %p (%S)", q, "inforeq");
return;
case T_UNITDATA_REQ:
/*
@@ -6817,14 +4531,10 @@ udp_wput_other(queue_t *q, mblk_t *mp)
* be bad. Valid T_UNITDATA_REQs are handled
* in udp_wput.
*/
- udp_ud_err(q, mp, NULL, 0, EADDRNOTAVAIL);
- TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END,
- "udp_wput_other_end: q %p (%S)", q, "unitdatareq");
+ udp_ud_err(q, mp, EADDRNOTAVAIL);
return;
case T_UNBIND_REQ:
udp_tpi_unbind(q, mp);
- TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END,
- "udp_wput_other_end: q %p (%S)", q, "unbindreq");
return;
case T_SVR4_OPTMGMT_REQ:
/*
@@ -6842,11 +4552,8 @@ udp_wput_other(queue_t *q, mblk_t *mp)
}
if (!snmpcom_req(q, mp, udp_snmp_set, ip_snmp_get,
cr)) {
- (void) svr4_optcom_req(q,
- mp, cr, &udp_opt_obj, B_TRUE);
+ svr4_optcom_req(q, mp, cr, &udp_opt_obj);
}
- TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END,
- "udp_wput_other_end: q %p (%S)", q, "optmgmtreq");
return;
case T_OPTMGMT_REQ:
@@ -6863,34 +4570,24 @@ udp_wput_other(queue_t *q, mblk_t *mp)
udp_err_ack(q, mp, TSYSERR, EINVAL);
return;
}
- (void) tpi_optcom_req(q, mp, cr, &udp_opt_obj, B_TRUE);
- TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END,
- "udp_wput_other_end: q %p (%S)", q, "optmgmtreq");
+ tpi_optcom_req(q, mp, cr, &udp_opt_obj);
return;
case T_DISCON_REQ:
udp_tpi_disconnect(q, mp);
- TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END,
- "udp_wput_other_end: q %p (%S)", q, "disconreq");
return;
/* The following TPI message is not supported by udp. */
case O_T_CONN_RES:
case T_CONN_RES:
udp_err_ack(q, mp, TNOTSUPPORT, 0);
- TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END,
- "udp_wput_other_end: q %p (%S)", q,
- "connres/disconreq");
return;
- /* The following 3 TPI messages are illegal for udp. */
+ /* The following 3 TPI requests are illegal for udp. */
case T_DATA_REQ:
case T_EXDATA_REQ:
case T_ORDREL_REQ:
udp_err_ack(q, mp, TNOTSUPPORT, 0);
- TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END,
- "udp_wput_other_end: q %p (%S)", q,
- "data/exdata/ordrel");
return;
default:
break;
@@ -6914,13 +4611,10 @@ udp_wput_other(queue_t *q, mblk_t *mp)
iocp->ioc_count = 0;
mp->b_datap->db_type = M_IOCACK;
qreply(q, mp);
- TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END,
- "udp_wput_other_end: q %p (%S)", q,
- "getpeername");
return;
}
/* FALLTHRU */
- case TI_GETMYNAME: {
+ case TI_GETMYNAME:
/*
* For TI_GETPEERNAME and TI_GETMYNAME, we first
* need to copyin the user's strbuf structure.
@@ -6929,17 +4623,12 @@ udp_wput_other(queue_t *q, mblk_t *mp)
*/
mi_copyin(q, mp, NULL,
SIZEOF_STRUCT(strbuf, iocp->ioc_flag));
- TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END,
- "udp_wput_other_end: q %p (%S)", q, "getmyname");
return;
- }
case ND_SET:
/* nd_getset performs the necessary checking */
case ND_GET:
if (nd_getset(q, us->us_nd, mp)) {
qreply(q, mp);
- TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END,
- "udp_wput_other_end: q %p (%S)", q, "get");
return;
}
break;
@@ -6969,16 +4658,12 @@ udp_wput_other(queue_t *q, mblk_t *mp)
break;
case M_IOCDATA:
udp_wput_iocdata(q, mp);
- TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END,
- "udp_wput_other_end: q %p (%S)", q, "iocdata");
return;
default:
/* Unrecognized messages are passed through without change. */
break;
}
- TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END,
- "udp_wput_other_end: q %p (%S)", q, "end");
- ip_output(connp, mp, q, IP_WPUT);
+ ip_wput_nondata(q, mp);
}
/*
@@ -6991,9 +4676,9 @@ udp_wput_iocdata(queue_t *q, mblk_t *mp)
mblk_t *mp1;
struct iocblk *iocp = (struct iocblk *)mp->b_rptr;
STRUCT_HANDLE(strbuf, sb);
- udp_t *udp = Q_TO_UDP(q);
- int error;
uint_t addrlen;
+ conn_t *connp = Q_TO_CONN(q);
+ udp_t *udp = connp->conn_udp;
/* Make sure it is one of ours. */
switch (iocp->ioc_cmd) {
@@ -7001,7 +4686,7 @@ udp_wput_iocdata(queue_t *q, mblk_t *mp)
case TI_GETPEERNAME:
break;
default:
- ip_output(udp->udp_connp, mp, q, IP_WPUT);
+ ip_wput_nondata(q, mp);
return;
}
@@ -7040,77 +4725,45 @@ udp_wput_iocdata(queue_t *q, mblk_t *mp)
* address and then we'll copyout the strbuf.
*/
STRUCT_SET_HANDLE(sb, iocp->ioc_flag, (void *)mp1->b_rptr);
- addrlen = udp->udp_family == AF_INET ? sizeof (sin_t) : sizeof (sin6_t);
+
+ if (connp->conn_family == AF_INET)
+ addrlen = sizeof (sin_t);
+ else
+ addrlen = sizeof (sin6_t);
+
if (STRUCT_FGET(sb, maxlen) < addrlen) {
mi_copy_done(q, mp, EINVAL);
return;
}
- mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), addrlen, B_TRUE);
-
- if (mp1 == NULL)
- return;
-
- rw_enter(&udp->udp_rwlock, RW_READER);
switch (iocp->ioc_cmd) {
case TI_GETMYNAME:
- error = udp_do_getsockname(udp, (void *)mp1->b_rptr, &addrlen);
break;
case TI_GETPEERNAME:
- error = udp_do_getpeername(udp, (void *)mp1->b_rptr, &addrlen);
+ if (udp->udp_state != TS_DATA_XFER) {
+ mi_copy_done(q, mp, ENOTCONN);
+ return;
+ }
break;
}
- rw_exit(&udp->udp_rwlock);
-
- if (error != 0) {
- mi_copy_done(q, mp, error);
- } else {
- mp1->b_wptr += addrlen;
- STRUCT_FSET(sb, len, addrlen);
-
- /* Copy out the address */
- mi_copyout(q, mp);
- }
-}
-
-static int
-udp_unitdata_opt_process(queue_t *q, mblk_t *mp, int *errorp,
- udpattrs_t *udpattrs)
-{
- struct T_unitdata_req *udreqp;
- int is_absreq_failure;
- cred_t *cr;
-
- ASSERT(((t_primp_t)mp->b_rptr)->type);
-
- /*
- * All Solaris components should pass a db_credp
- * for this TPI message, hence we should ASSERT.
- * However, RPC (svc_clts_ksend) does this odd thing where it
- * passes the options from a T_UNITDATA_IND unchanged in a
- * T_UNITDATA_REQ. While that is the right thing to do for
- * some options, SCM_UCRED being the key one, this also makes it
- * pass down IP_RECVDSTADDR. Hence we can't ASSERT here.
- */
- cr = msg_getcred(mp, NULL);
- if (cr == NULL) {
- cr = Q_TO_CONN(q)->conn_cred;
- }
- udreqp = (struct T_unitdata_req *)mp->b_rptr;
-
- *errorp = tpi_optcom_buf(q, mp, &udreqp->OPT_length,
- udreqp->OPT_offset, cr, &udp_opt_obj,
- udpattrs, &is_absreq_failure);
+ mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), addrlen, B_TRUE);
+ if (!mp1)
+ return;
- if (*errorp != 0) {
- /*
- * Note: No special action needed in this
- * module for "is_absreq_failure"
- */
- return (-1); /* failure */
+ STRUCT_FSET(sb, len, addrlen);
+ switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) {
+ case TI_GETMYNAME:
+ (void) conn_getsockname(connp, (struct sockaddr *)mp1->b_wptr,
+ &addrlen);
+ break;
+ case TI_GETPEERNAME:
+ (void) conn_getpeername(connp, (struct sockaddr *)mp1->b_wptr,
+ &addrlen);
+ break;
}
- ASSERT(is_absreq_failure == 0);
- return (0); /* success */
+ mp1->b_wptr += addrlen;
+ /* Copy out the address */
+ mi_copyout(q, mp);
}
void
@@ -7234,34 +4887,19 @@ udp_kstat2_init(netstackid_t stackid, udp_stat_t *us_statisticsp)
kstat_t *ksp;
udp_stat_t template = {
- { "udp_ip_send", KSTAT_DATA_UINT64 },
- { "udp_ip_ire_send", KSTAT_DATA_UINT64 },
- { "udp_ire_null", KSTAT_DATA_UINT64 },
{ "udp_sock_fallback", KSTAT_DATA_UINT64 },
- { "udp_out_sw_cksum", KSTAT_DATA_UINT64 },
- { "udp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 },
{ "udp_out_opt", KSTAT_DATA_UINT64 },
{ "udp_out_err_notconn", KSTAT_DATA_UINT64 },
{ "udp_out_err_output", KSTAT_DATA_UINT64 },
{ "udp_out_err_tudr", KSTAT_DATA_UINT64 },
- { "udp_in_pktinfo", KSTAT_DATA_UINT64 },
- { "udp_in_recvdstaddr", KSTAT_DATA_UINT64 },
- { "udp_in_recvopts", KSTAT_DATA_UINT64 },
- { "udp_in_recvif", KSTAT_DATA_UINT64 },
- { "udp_in_recvslla", KSTAT_DATA_UINT64 },
- { "udp_in_recvucred", KSTAT_DATA_UINT64 },
- { "udp_in_recvttl", KSTAT_DATA_UINT64 },
- { "udp_in_recvhopopts", KSTAT_DATA_UINT64 },
- { "udp_in_recvhoplimit", KSTAT_DATA_UINT64 },
- { "udp_in_recvdstopts", KSTAT_DATA_UINT64 },
- { "udp_in_recvrtdstopts", KSTAT_DATA_UINT64 },
- { "udp_in_recvrthdr", KSTAT_DATA_UINT64 },
- { "udp_in_recvpktinfo", KSTAT_DATA_UINT64 },
- { "udp_in_recvtclass", KSTAT_DATA_UINT64 },
- { "udp_in_timestamp", KSTAT_DATA_UINT64 },
#ifdef DEBUG
{ "udp_data_conn", KSTAT_DATA_UINT64 },
{ "udp_data_notconn", KSTAT_DATA_UINT64 },
+ { "udp_out_lastdst", KSTAT_DATA_UINT64 },
+ { "udp_out_diffdst", KSTAT_DATA_UINT64 },
+ { "udp_out_ipv6", KSTAT_DATA_UINT64 },
+ { "udp_out_mapped", KSTAT_DATA_UINT64 },
+ { "udp_out_ipv4", KSTAT_DATA_UINT64 },
#endif
};
@@ -7384,8 +5022,6 @@ udp_set_rcv_hiwat(udp_t *udp, size_t size)
static void
udp_lrput(queue_t *q, mblk_t *mp)
{
- mblk_t *mp1;
-
switch (mp->b_datap->db_type) {
case M_FLUSH:
/* Turn around */
@@ -7396,9 +5032,6 @@ udp_lrput(queue_t *q, mblk_t *mp)
}
break;
}
- /* Could receive messages that passed through ar_rput */
- for (mp1 = mp; mp1; mp1 = mp1->b_cont)
- mp1->b_prev = mp1->b_next = NULL;
freemsg(mp);
}
@@ -7425,6 +5058,7 @@ udp_do_open(cred_t *credp, boolean_t isv6, int flags)
zoneid_t zoneid;
netstack_t *ns;
udp_stack_t *us;
+ int len;
ns = netstack_find_by_cred(credp);
ASSERT(ns != NULL);
@@ -7455,34 +5089,40 @@ udp_do_open(cred_t *credp, boolean_t isv6, int flags)
*/
netstack_rele(ns);
- rw_enter(&udp->udp_rwlock, RW_WRITER);
- ASSERT(connp->conn_ulp == IPPROTO_UDP);
+ /*
+ * Since this conn_t/udp_t is not yet visible to anybody else we don't
+ * need to lock anything.
+ */
+ ASSERT(connp->conn_proto == IPPROTO_UDP);
ASSERT(connp->conn_udp == udp);
ASSERT(udp->udp_connp == connp);
/* Set the initial state of the stream and the privilege status. */
udp->udp_state = TS_UNBND;
+ connp->conn_ixa->ixa_flags |= IXAF_VERIFY_SOURCE;
if (isv6) {
- udp->udp_family = AF_INET6;
- udp->udp_ipversion = IPV6_VERSION;
- udp->udp_max_hdr_len = IPV6_HDR_LEN + UDPH_SIZE;
- udp->udp_ttl = us->us_ipv6_hoplimit;
- connp->conn_af_isv6 = B_TRUE;
+ connp->conn_family = AF_INET6;
+ connp->conn_ipversion = IPV6_VERSION;
+ connp->conn_ixa->ixa_flags &= ~IXAF_IS_IPV4;
+ connp->conn_default_ttl = us->us_ipv6_hoplimit;
+ len = sizeof (ip6_t) + UDPH_SIZE;
} else {
- udp->udp_family = AF_INET;
- udp->udp_ipversion = IPV4_VERSION;
- udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE;
- udp->udp_ttl = us->us_ipv4_ttl;
- connp->conn_af_isv6 = B_FALSE;
+ connp->conn_family = AF_INET;
+ connp->conn_ipversion = IPV4_VERSION;
+ connp->conn_ixa->ixa_flags |= IXAF_IS_IPV4;
+ connp->conn_default_ttl = us->us_ipv4_ttl;
+ len = sizeof (ipha_t) + UDPH_SIZE;
}
- udp->udp_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
- udp->udp_pending_op = -1;
- connp->conn_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
- connp->conn_zoneid = zoneid;
+ ASSERT(connp->conn_ixa->ixa_protocol == connp->conn_proto);
+ connp->conn_xmit_ipp.ipp_unicast_hops = connp->conn_default_ttl;
+
+ connp->conn_ixa->ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
+ connp->conn_ixa->ixa_flags |= IXAF_MULTICAST_LOOP | IXAF_SET_ULP_CKSUM;
+ /* conn_allzones can not be set this early, hence no IPCL_ZONEID */
+ connp->conn_ixa->ixa_zoneid = zoneid;
- udp->udp_open_time = lbolt64;
- udp->udp_open_pid = curproc->p_pid;
+ connp->conn_zoneid = zoneid;
/*
* If the caller has the process-wide flag set, then default to MAC
@@ -7491,22 +5131,38 @@ udp_do_open(cred_t *credp, boolean_t isv6, int flags)
if (getpflags(NET_MAC_AWARE, credp) != 0)
connp->conn_mac_mode = CONN_MAC_AWARE;
- connp->conn_ulp_labeled = is_system_labeled();
+ connp->conn_zone_is_global = (crgetzoneid(credp) == GLOBAL_ZONEID);
udp->udp_us = us;
+ connp->conn_rcvbuf = us->us_recv_hiwat;
+ connp->conn_sndbuf = us->us_xmit_hiwat;
+ connp->conn_sndlowat = us->us_xmit_lowat;
+ connp->conn_rcvlowat = udp_mod_info.mi_lowat;
+
+ connp->conn_wroff = len + us->us_wroff_extra;
+ connp->conn_so_type = SOCK_DGRAM;
+
connp->conn_recv = udp_input;
+ connp->conn_recvicmp = udp_icmp_input;
crhold(credp);
connp->conn_cred = credp;
+ connp->conn_cpid = curproc->p_pid;
+ connp->conn_open_time = lbolt64;
+ /* Cache things in ixa without an extra refhold */
+ connp->conn_ixa->ixa_cred = connp->conn_cred;
+ connp->conn_ixa->ixa_cpid = connp->conn_cpid;
+ if (is_system_labeled())
+ connp->conn_ixa->ixa_tsl = crgetlabel(connp->conn_cred);
*((sin6_t *)&udp->udp_delayed_addr) = sin6_null;
- rw_exit(&udp->udp_rwlock);
+ if (us->us_pmtu_discovery)
+ connp->conn_ixa->ixa_flags |= IXAF_PMTU_DISCOVERY;
return (connp);
}
-/* ARGSUSED */
sock_lower_handle_t
udp_create(int family, int type, int proto, sock_downcalls_t **sock_downcalls,
uint_t *smodep, int *errorp, int flags, cred_t *credp)
@@ -7539,39 +5195,17 @@ udp_create(int family, int type, int proto, sock_downcalls_t **sock_downcalls,
ASSERT(us != NULL);
udp->udp_issocket = B_TRUE;
- connp->conn_flags |= IPCL_NONSTR | IPCL_SOCKET;
-
- /* Set flow control */
- rw_enter(&udp->udp_rwlock, RW_WRITER);
- (void) udp_set_rcv_hiwat(udp, us->us_recv_hiwat);
- udp->udp_rcv_disply_hiwat = us->us_recv_hiwat;
- udp->udp_rcv_lowat = udp_mod_info.mi_lowat;
- udp->udp_xmit_hiwat = us->us_xmit_hiwat;
- udp->udp_xmit_lowat = us->us_xmit_lowat;
-
- if (udp->udp_family == AF_INET6) {
- /* Build initial header template for transmit */
- if ((*errorp = udp_build_hdrs(udp)) != 0) {
- rw_exit(&udp->udp_rwlock);
- ipcl_conn_destroy(connp);
- return (NULL);
- }
- }
- rw_exit(&udp->udp_rwlock);
+ connp->conn_flags |= IPCL_NONSTR;
- connp->conn_flow_cntrld = B_FALSE;
-
- ASSERT(us->us_ldi_ident != NULL);
-
- if ((*errorp = ip_create_helper_stream(connp, us->us_ldi_ident)) != 0) {
- ip1dbg(("udp_create: create of IP helper stream failed\n"));
- udp_do_close(connp);
- return (NULL);
- }
+ /*
+ * Set flow control
+ * Since this conn_t/udp_t is not yet visible to anybody else we don't
+ * need to lock anything.
+ */
+ (void) udp_set_rcv_hiwat(udp, connp->conn_rcvbuf);
+ udp->udp_rcv_disply_hiwat = connp->conn_rcvbuf;
- /* Set the send flow control */
- connp->conn_wq->q_hiwat = us->us_xmit_hiwat;
- connp->conn_wq->q_lowat = us->us_xmit_lowat;
+ connp->conn_flow_cntrld = B_FALSE;
mutex_enter(&connp->conn_lock);
connp->conn_state_flags &= ~CONN_INCIPIENT;
@@ -7583,14 +5217,12 @@ udp_create(int family, int type, int proto, sock_downcalls_t **sock_downcalls,
return ((sock_lower_handle_t)connp);
}
-/* ARGSUSED */
+/* ARGSUSED3 */
void
udp_activate(sock_lower_handle_t proto_handle, sock_upper_handle_t sock_handle,
sock_upcalls_t *sock_upcalls, int flags, cred_t *cr)
{
conn_t *connp = (conn_t *)proto_handle;
- udp_t *udp = connp->conn_udp;
- udp_stack_t *us = udp->udp_us;
struct sock_proto_props sopp;
/* All Solaris components should pass a cred for this operation. */
@@ -7599,14 +5231,15 @@ udp_activate(sock_lower_handle_t proto_handle, sock_upper_handle_t sock_handle,
connp->conn_upcalls = sock_upcalls;
connp->conn_upper_handle = sock_handle;
- sopp.sopp_flags = SOCKOPT_WROFF | SOCKOPT_RCVHIWAT |
+ sopp.sopp_flags = SOCKOPT_WROFF | SOCKOPT_RCVHIWAT | SOCKOPT_RCVLOWAT |
SOCKOPT_MAXBLK | SOCKOPT_MAXPSZ | SOCKOPT_MINPSZ;
- sopp.sopp_wroff = udp->udp_max_hdr_len + us->us_wroff_extra;
+ sopp.sopp_wroff = connp->conn_wroff;
sopp.sopp_maxblk = INFPSZ;
- sopp.sopp_rxhiwat = udp->udp_rcv_hiwat;
+ sopp.sopp_rxhiwat = connp->conn_rcvbuf;
+ sopp.sopp_rxlowat = connp->conn_rcvlowat;
sopp.sopp_maxaddrlen = sizeof (sin6_t);
sopp.sopp_maxpsz =
- (udp->udp_family == AF_INET) ? UDP_MAXPACKET_IPV4 :
+ (connp->conn_family == AF_INET) ? UDP_MAXPACKET_IPV4 :
UDP_MAXPACKET_IPV6;
sopp.sopp_minpsz = (udp_mod_info.mi_minpsz == 1) ? 0 :
udp_mod_info.mi_minpsz;
@@ -7618,9 +5251,32 @@ udp_activate(sock_lower_handle_t proto_handle, sock_upper_handle_t sock_handle,
static void
udp_do_close(conn_t *connp)
{
+ udp_t *udp;
+
ASSERT(connp != NULL && IPCL_IS_UDP(connp));
+ udp = connp->conn_udp;
+
+ if (cl_inet_unbind != NULL && udp->udp_state == TS_IDLE) {
+ /*
+ * Running in cluster mode - register unbind information
+ */
+ if (connp->conn_ipversion == IPV4_VERSION) {
+ (*cl_inet_unbind)(
+ connp->conn_netstack->netstack_stackid,
+ IPPROTO_UDP, AF_INET,
+ (uint8_t *)(&V4_PART_OF_V6(connp->conn_laddr_v6)),
+ (in_port_t)connp->conn_lport, NULL);
+ } else {
+ (*cl_inet_unbind)(
+ connp->conn_netstack->netstack_stackid,
+ IPPROTO_UDP, AF_INET6,
+ (uint8_t *)&(connp->conn_laddr_v6),
+ (in_port_t)connp->conn_lport, NULL);
+ }
+ }
+
+ udp_bind_hash_remove(udp, B_FALSE);
- udp_quiesce_conn(connp);
ip_quiesce_conn(connp);
if (!IPCL_IS_NONSTR(connp)) {
@@ -7642,6 +5298,7 @@ udp_do_close(conn_t *connp)
* future.
*/
ASSERT(connp->conn_ref == 1);
+
if (!IPCL_IS_NONSTR(connp)) {
inet_minor_free(connp->conn_minor_arena, connp->conn_dev);
} else {
@@ -7652,7 +5309,7 @@ udp_do_close(conn_t *connp)
ipcl_conn_destroy(connp);
}
-/* ARGSUSED */
+/* ARGSUSED1 */
int
udp_close(sock_lower_handle_t proto_handle, int flags, cred_t *cr)
{
@@ -7671,59 +5328,41 @@ udp_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr,
{
sin_t *sin;
sin6_t *sin6;
- sin6_t sin6addr;
+ udp_t *udp = connp->conn_udp;
+ int error = 0;
+ ip_laddr_t laddr_type = IPVL_UNICAST_UP; /* INADDR_ANY */
in_port_t port; /* Host byte order */
in_port_t requested_port; /* Host byte order */
int count;
+ ipaddr_t v4src; /* Set if AF_INET */
in6_addr_t v6src;
int loopmax;
udp_fanout_t *udpf;
in_port_t lport; /* Network byte order */
- udp_t *udp;
+ uint_t scopeid = 0;
+ zoneid_t zoneid = IPCL_ZONEID(connp);
+ ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
boolean_t is_inaddr_any;
mlp_type_t addrtype, mlptype;
- udp_stack_t *us;
- int error = 0;
- mblk_t *mp = NULL;
-
- udp = connp->conn_udp;
- us = udp->udp_us;
-
- if (udp->udp_state != TS_UNBND) {
- (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE,
- "udp_bind: bad state, %u", udp->udp_state);
- return (-TOUTSTATE);
- }
+ udp_stack_t *us = udp->udp_us;
switch (len) {
- case 0:
- if (udp->udp_family == AF_INET) {
- sin = (sin_t *)&sin6addr;
- *sin = sin_null;
- sin->sin_family = AF_INET;
- sin->sin_addr.s_addr = INADDR_ANY;
- udp->udp_ipversion = IPV4_VERSION;
- } else {
- ASSERT(udp->udp_family == AF_INET6);
- sin6 = (sin6_t *)&sin6addr;
- *sin6 = sin6_null;
- sin6->sin6_family = AF_INET6;
- V6_SET_ZERO(sin6->sin6_addr);
- udp->udp_ipversion = IPV6_VERSION;
- }
- port = 0;
- break;
-
case sizeof (sin_t): /* Complete IPv4 address */
sin = (sin_t *)sa;
if (sin == NULL || !OK_32PTR((char *)sin))
return (EINVAL);
- if (udp->udp_family != AF_INET ||
+ if (connp->conn_family != AF_INET ||
sin->sin_family != AF_INET) {
return (EAFNOSUPPORT);
}
+ v4src = sin->sin_addr.s_addr;
+ IN6_IPADDR_TO_V4MAPPED(v4src, &v6src);
+ if (v4src != INADDR_ANY) {
+ laddr_type = ip_laddr_verify_v4(v4src, zoneid, ipst,
+ B_TRUE);
+ }
port = ntohs(sin->sin_port);
break;
@@ -7733,10 +5372,28 @@ udp_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr,
if (sin6 == NULL || !OK_32PTR((char *)sin6))
return (EINVAL);
- if (udp->udp_family != AF_INET6 ||
+ if (connp->conn_family != AF_INET6 ||
sin6->sin6_family != AF_INET6) {
return (EAFNOSUPPORT);
}
+ v6src = sin6->sin6_addr;
+ if (IN6_IS_ADDR_V4MAPPED(&v6src)) {
+ if (connp->conn_ipv6_v6only)
+ return (EADDRNOTAVAIL);
+
+ IN6_V4MAPPED_TO_IPADDR(&v6src, v4src);
+ if (v4src != INADDR_ANY) {
+ laddr_type = ip_laddr_verify_v4(v4src,
+ zoneid, ipst, B_FALSE);
+ }
+ } else {
+ if (!IN6_IS_ADDR_UNSPECIFIED(&v6src)) {
+ if (IN6_IS_ADDR_LINKSCOPE(&v6src))
+ scopeid = sin6->sin6_scope_id;
+ laddr_type = ip_laddr_verify_v6(&v6src,
+ zoneid, ipst, B_TRUE, scopeid);
+ }
+ }
port = ntohs(sin6->sin6_port);
break;
@@ -7746,6 +5403,10 @@ udp_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr,
return (-TBADADDR);
}
+ /* Is the local address a valid unicast, multicast, or broadcast? */
+ if (laddr_type == IPVL_BAD)
+ return (EADDRNOTAVAIL);
+
requested_port = port;
if (requested_port == 0 || !bind_to_req_port_only)
@@ -7759,7 +5420,7 @@ udp_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr,
* doesn't care which port number we bind to. Get one in the
* valid range.
*/
- if (udp->udp_anon_priv_bind) {
+ if (connp->conn_anon_priv_bind) {
port = udp_get_next_priv_port(udp);
} else {
port = udp_update_next_port(udp,
@@ -7798,53 +5459,45 @@ udp_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr,
* TPI primitives only 1 at a time and wait for the response before
* sending the next primitive.
*/
- rw_enter(&udp->udp_rwlock, RW_WRITER);
- if (udp->udp_state != TS_UNBND || udp->udp_pending_op != -1) {
- rw_exit(&udp->udp_rwlock);
+ mutex_enter(&connp->conn_lock);
+ if (udp->udp_state != TS_UNBND) {
+ mutex_exit(&connp->conn_lock);
(void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE,
"udp_bind: bad state, %u", udp->udp_state);
return (-TOUTSTATE);
}
- /* XXX how to remove the T_BIND_REQ? Should set it before calling */
- udp->udp_pending_op = T_BIND_REQ;
/*
* Copy the source address into our udp structure. This address
* may still be zero; if so, IP will fill in the correct address
* each time an outbound packet is passed to it. Since the udp is
* not yet in the bind hash list, we don't grab the uf_lock to
- * change udp_ipversion
+ * change conn_ipversion
*/
- if (udp->udp_family == AF_INET) {
+ if (connp->conn_family == AF_INET) {
ASSERT(sin != NULL);
- ASSERT(udp->udp_ipversion == IPV4_VERSION);
- udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE +
- udp->udp_ip_snd_options_len;
- IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &v6src);
+ ASSERT(connp->conn_ixa->ixa_flags & IXAF_IS_IPV4);
} else {
- ASSERT(sin6 != NULL);
- v6src = sin6->sin6_addr;
if (IN6_IS_ADDR_V4MAPPED(&v6src)) {
/*
- * no need to hold the uf_lock to set the udp_ipversion
+ * no need to hold the uf_lock to set the conn_ipversion
* since we are not yet in the fanout list
*/
- udp->udp_ipversion = IPV4_VERSION;
- udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH +
- UDPH_SIZE + udp->udp_ip_snd_options_len;
+ connp->conn_ipversion = IPV4_VERSION;
+ connp->conn_ixa->ixa_flags |= IXAF_IS_IPV4;
} else {
- udp->udp_ipversion = IPV6_VERSION;
- udp->udp_max_hdr_len = udp->udp_sticky_hdrs_len;
+ connp->conn_ipversion = IPV6_VERSION;
+ connp->conn_ixa->ixa_flags &= ~IXAF_IS_IPV4;
}
}
/*
- * If udp_reuseaddr is not set, then we have to make sure that
+ * If conn_reuseaddr is not set, then we have to make sure that
* the IP address and port number the application requested
* (or we selected for the application) is not being used by
* another stream. If another stream is already using the
* requested IP address and port, the behavior depends on
* "bind_to_req_port_only". If set the bind fails; otherwise we
- * search for any an unused port to bind to the the stream.
+ * search for any an unused port to bind to the stream.
*
* As per the BSD semantics, as modified by the Deering multicast
* changes, if udp_reuseaddr is set, then we allow multiple binds
@@ -7860,7 +5513,7 @@ udp_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr,
*/
count = 0;
- if (udp->udp_anon_priv_bind) {
+ if (connp->conn_anon_priv_bind) {
/*
* loopmax = (IPPORT_RESERVED-1) -
* us->us_min_anonpriv_port + 1
@@ -7876,6 +5529,7 @@ udp_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr,
for (;;) {
udp_t *udp1;
boolean_t found_exclbind = B_FALSE;
+ conn_t *connp1;
/*
* Walk through the list of udp streams bound to
@@ -7887,7 +5541,9 @@ udp_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr,
mutex_enter(&udpf->uf_lock);
for (udp1 = udpf->uf_udp; udp1 != NULL;
udp1 = udp1->udp_bind_hash) {
- if (lport != udp1->udp_port)
+ connp1 = udp1->udp_connp;
+
+ if (lport != connp1->conn_lport)
continue;
/*
@@ -7896,7 +5552,7 @@ udp_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr,
* privilege as being in all zones, as there's
* otherwise no way to identify the right receiver.
*/
- if (!IPCL_BIND_ZONE_MATCH(udp1->udp_connp, connp))
+ if (!IPCL_BIND_ZONE_MATCH(connp1, connp))
continue;
/*
@@ -7918,12 +5574,13 @@ udp_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr,
* For labeled systems, SO_MAC_EXEMPT behaves the same
* as UDP_EXCLBIND, except that zoneid is ignored.
*/
- if (udp1->udp_exclbind || udp->udp_exclbind ||
+ if (connp1->conn_exclbind || connp->conn_exclbind ||
IPCL_CONNS_MAC(udp1->udp_connp, connp)) {
if (V6_OR_V4_INADDR_ANY(
- udp1->udp_bound_v6src) ||
+ connp1->conn_bound_addr_v6) ||
is_inaddr_any ||
- IN6_ARE_ADDR_EQUAL(&udp1->udp_bound_v6src,
+ IN6_ARE_ADDR_EQUAL(
+ &connp1->conn_bound_addr_v6,
&v6src)) {
found_exclbind = B_TRUE;
break;
@@ -7935,7 +5592,7 @@ udp_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr,
* Check ipversion to allow IPv4 and IPv6 sockets to
* have disjoint port number spaces.
*/
- if (udp->udp_ipversion != udp1->udp_ipversion) {
+ if (connp->conn_ipversion != connp1->conn_ipversion) {
/*
* On the first time through the loop, if the
@@ -7963,8 +5620,8 @@ udp_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr,
* (non-wildcard, also), keep going.
*/
if (!is_inaddr_any &&
- !V6_OR_V4_INADDR_ANY(udp1->udp_bound_v6src) &&
- !IN6_ARE_ADDR_EQUAL(&udp1->udp_bound_v6src,
+ !V6_OR_V4_INADDR_ANY(connp1->conn_bound_addr_v6) &&
+ !IN6_ARE_ADDR_EQUAL(&connp1->conn_laddr_v6,
&v6src)) {
continue;
}
@@ -7972,7 +5629,7 @@ udp_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr,
}
if (!found_exclbind &&
- (udp->udp_reuseaddr && requested_port != 0)) {
+ (connp->conn_reuseaddr && requested_port != 0)) {
break;
}
@@ -7995,12 +5652,11 @@ udp_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr,
* the routine (and exit the loop).
*
*/
- udp->udp_pending_op = -1;
- rw_exit(&udp->udp_rwlock);
+ mutex_exit(&connp->conn_lock);
return (-TADDRBUSY);
}
- if (udp->udp_anon_priv_bind) {
+ if (connp->conn_anon_priv_bind) {
port = udp_get_next_priv_port(udp);
} else {
if ((count == 0) && (requested_port != 0)) {
@@ -8025,66 +5681,82 @@ udp_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr,
* there are none available, so send an error
* to the user.
*/
- udp->udp_pending_op = -1;
- rw_exit(&udp->udp_rwlock);
+ mutex_exit(&connp->conn_lock);
return (-TNOADDR);
}
}
/*
* Copy the source address into our udp structure. This address
- * may still be zero; if so, ip will fill in the correct address
- * each time an outbound packet is passed to it.
+ * may still be zero; if so, ip_attr_connect will fill in the correct
+ * address when a packet is about to be sent.
* If we are binding to a broadcast or multicast address then
- * udp_post_ip_bind_connect will clear the source address
- * when udp_do_bind success.
+ * we just set the conn_bound_addr since we don't want to use
+ * that as the source address when sending.
*/
- udp->udp_v6src = udp->udp_bound_v6src = v6src;
- udp->udp_port = lport;
+ connp->conn_bound_addr_v6 = v6src;
+ connp->conn_laddr_v6 = v6src;
+ if (scopeid != 0) {
+ connp->conn_ixa->ixa_flags |= IXAF_SCOPEID_SET;
+ connp->conn_ixa->ixa_scopeid = scopeid;
+ connp->conn_incoming_ifindex = scopeid;
+ } else {
+ connp->conn_ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
+ connp->conn_incoming_ifindex = connp->conn_bound_if;
+ }
+
+ switch (laddr_type) {
+ case IPVL_UNICAST_UP:
+ case IPVL_UNICAST_DOWN:
+ connp->conn_saddr_v6 = v6src;
+ connp->conn_mcbc_bind = B_FALSE;
+ break;
+ case IPVL_MCAST:
+ case IPVL_BCAST:
+ /* ip_set_destination will pick a source address later */
+ connp->conn_saddr_v6 = ipv6_all_zeros;
+ connp->conn_mcbc_bind = B_TRUE;
+ break;
+ }
+
+ /* Any errors after this point should use late_error */
+ connp->conn_lport = lport;
+
/*
- * Now reset the the next anonymous port if the application requested
+ * Now reset the next anonymous port if the application requested
* an anonymous port, or we handed out the next anonymous port.
*/
- if ((requested_port == 0) && (!udp->udp_anon_priv_bind)) {
+ if ((requested_port == 0) && (!connp->conn_anon_priv_bind)) {
us->us_next_port_to_try = port + 1;
}
- /* Initialize the O_T_BIND_REQ/T_BIND_REQ for ip. */
- if (udp->udp_family == AF_INET) {
- sin->sin_port = udp->udp_port;
+ /* Initialize the T_BIND_ACK. */
+ if (connp->conn_family == AF_INET) {
+ sin->sin_port = connp->conn_lport;
} else {
- sin6->sin6_port = udp->udp_port;
- /* Rebuild the header template */
- error = udp_build_hdrs(udp);
- if (error != 0) {
- udp->udp_pending_op = -1;
- rw_exit(&udp->udp_rwlock);
- mutex_exit(&udpf->uf_lock);
- return (error);
- }
+ sin6->sin6_port = connp->conn_lport;
}
udp->udp_state = TS_IDLE;
udp_bind_hash_insert(udpf, udp);
mutex_exit(&udpf->uf_lock);
- rw_exit(&udp->udp_rwlock);
+ mutex_exit(&connp->conn_lock);
if (cl_inet_bind) {
/*
* Running in cluster mode - register bind information
*/
- if (udp->udp_ipversion == IPV4_VERSION) {
+ if (connp->conn_ipversion == IPV4_VERSION) {
(*cl_inet_bind)(connp->conn_netstack->netstack_stackid,
- IPPROTO_UDP, AF_INET,
- (uint8_t *)(&V4_PART_OF_V6(udp->udp_v6src)),
- (in_port_t)udp->udp_port, NULL);
+ IPPROTO_UDP, AF_INET, (uint8_t *)&v4src,
+ (in_port_t)connp->conn_lport, NULL);
} else {
(*cl_inet_bind)(connp->conn_netstack->netstack_stackid,
- IPPROTO_UDP, AF_INET6,
- (uint8_t *)&(udp->udp_v6src),
- (in_port_t)udp->udp_port, NULL);
+ IPPROTO_UDP, AF_INET6, (uint8_t *)&v6src,
+ (in_port_t)connp->conn_lport, NULL);
}
}
+ mutex_enter(&connp->conn_lock);
connp->conn_anon_port = (is_system_labeled() && requested_port == 0);
if (is_system_labeled() && (!connp->conn_anon_port ||
connp->conn_anon_mlp)) {
@@ -8092,18 +5764,16 @@ udp_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr,
zone_t *zone;
zone = crgetzone(cr);
- connp->conn_mlp_type = udp->udp_recvucred ? mlptBoth :
+ connp->conn_mlp_type =
+ connp->conn_recv_ancillary.crb_recvucred ? mlptBoth :
mlptSingle;
addrtype = tsol_mlp_addr_type(
connp->conn_allzones ? ALL_ZONES : zone->zone_id,
IPV6_VERSION, &v6src, us->us_netstack->netstack_ip);
if (addrtype == mlptSingle) {
- rw_enter(&udp->udp_rwlock, RW_WRITER);
- udp->udp_pending_op = -1;
- rw_exit(&udp->udp_rwlock);
- connp->conn_anon_port = B_FALSE;
- connp->conn_mlp_type = mlptSingle;
- return (-TNOADDR);
+ error = -TNOADDR;
+ mutex_exit(&connp->conn_lock);
+ goto late_error;
}
mlpport = connp->conn_anon_port ? PMAPPORT : port;
mlptype = tsol_mlp_port_type(zone, IPPROTO_UDP, mlpport,
@@ -8115,12 +5785,9 @@ udp_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr,
*/
if (mlptype != mlptSingle &&
connp->conn_mlp_type == mlptSingle) {
- rw_enter(&udp->udp_rwlock, RW_WRITER);
- udp->udp_pending_op = -1;
- rw_exit(&udp->udp_rwlock);
- connp->conn_anon_port = B_FALSE;
- connp->conn_mlp_type = mlptSingle;
- return (EINVAL);
+ error = EINVAL;
+ mutex_exit(&connp->conn_lock);
+ goto late_error;
}
/*
@@ -8129,18 +5796,15 @@ udp_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr,
*/
if (mlptype != mlptSingle &&
secpolicy_net_bindmlp(cr) != 0) {
- if (udp->udp_debug) {
+ if (connp->conn_debug) {
(void) strlog(UDP_MOD_ID, 0, 1,
SL_ERROR|SL_TRACE,
"udp_bind: no priv for multilevel port %d",
mlpport);
}
- rw_enter(&udp->udp_rwlock, RW_WRITER);
- udp->udp_pending_op = -1;
- rw_exit(&udp->udp_rwlock);
- connp->conn_anon_port = B_FALSE;
- connp->conn_mlp_type = mlptSingle;
- return (-TACCES);
+ error = -TACCES;
+ mutex_exit(&connp->conn_lock);
+ goto late_error;
}
/*
@@ -8158,7 +5822,7 @@ udp_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr,
mlpzone = tsol_mlp_findzone(IPPROTO_UDP,
htons(mlpport));
if (connp->conn_zoneid != mlpzone) {
- if (udp->udp_debug) {
+ if (connp->conn_debug) {
(void) strlog(UDP_MOD_ID, 0, 1,
SL_ERROR|SL_TRACE,
"udp_bind: attempt to bind port "
@@ -8167,62 +5831,82 @@ udp_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr,
mlpport, connp->conn_zoneid,
mlpzone);
}
- rw_enter(&udp->udp_rwlock, RW_WRITER);
- udp->udp_pending_op = -1;
- rw_exit(&udp->udp_rwlock);
- connp->conn_anon_port = B_FALSE;
- connp->conn_mlp_type = mlptSingle;
- return (-TACCES);
+ error = -TACCES;
+ mutex_exit(&connp->conn_lock);
+ goto late_error;
}
}
if (connp->conn_anon_port) {
- error = tsol_mlp_anon(zone, mlptype, connp->conn_ulp,
+ error = tsol_mlp_anon(zone, mlptype, connp->conn_proto,
port, B_TRUE);
if (error != 0) {
- if (udp->udp_debug) {
+ if (connp->conn_debug) {
(void) strlog(UDP_MOD_ID, 0, 1,
SL_ERROR|SL_TRACE,
"udp_bind: cannot establish anon "
"MLP for port %d", port);
}
- rw_enter(&udp->udp_rwlock, RW_WRITER);
- udp->udp_pending_op = -1;
- rw_exit(&udp->udp_rwlock);
- connp->conn_anon_port = B_FALSE;
- connp->conn_mlp_type = mlptSingle;
- return (-TACCES);
+ error = -TACCES;
+ mutex_exit(&connp->conn_lock);
+ goto late_error;
}
}
connp->conn_mlp_type = mlptype;
}
- if (!V6_OR_V4_INADDR_ANY(udp->udp_v6src)) {
- /*
- * Append a request for an IRE if udp_v6src not
- * zero (IPv4 - INADDR_ANY, or IPv6 - all-zeroes address).
- */
- mp = allocb(sizeof (ire_t), BPRI_HI);
- if (!mp) {
- rw_enter(&udp->udp_rwlock, RW_WRITER);
- udp->udp_pending_op = -1;
- rw_exit(&udp->udp_rwlock);
- return (ENOMEM);
- }
- mp->b_wptr += sizeof (ire_t);
- mp->b_datap->db_type = IRE_DB_REQ_TYPE;
+ /*
+ * We create an initial header template here to make a subsequent
+ * sendto have a starting point. Since conn_last_dst is zero the
+ * first sendto will always follow the 'dst changed' code path.
+ * Note that we defer massaging options and the related checksum
+ * adjustment until we have a destination address.
+ */
+ error = udp_build_hdr_template(connp, &connp->conn_saddr_v6,
+ &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo);
+ if (error != 0) {
+ mutex_exit(&connp->conn_lock);
+ goto late_error;
}
- if (udp->udp_family == AF_INET6) {
- ASSERT(udp->udp_connp->conn_af_isv6);
- error = ip_proto_bind_laddr_v6(connp, &mp, IPPROTO_UDP,
- &udp->udp_bound_v6src, udp->udp_port, B_TRUE);
- } else {
- ASSERT(!udp->udp_connp->conn_af_isv6);
- error = ip_proto_bind_laddr_v4(connp, &mp, IPPROTO_UDP,
- V4_PART_OF_V6(udp->udp_bound_v6src), udp->udp_port,
- B_TRUE);
+ /* Just in case */
+ connp->conn_faddr_v6 = ipv6_all_zeros;
+ connp->conn_fport = 0;
+ connp->conn_v6lastdst = ipv6_all_zeros;
+ mutex_exit(&connp->conn_lock);
+
+ error = ip_laddr_fanout_insert(connp);
+ if (error != 0)
+ goto late_error;
+
+ /* Bind succeeded */
+ return (0);
+
+late_error:
+ /* We had already picked the port number, and then the bind failed */
+ mutex_enter(&connp->conn_lock);
+ udpf = &us->us_bind_fanout[
+ UDP_BIND_HASH(connp->conn_lport,
+ us->us_bind_fanout_size)];
+ mutex_enter(&udpf->uf_lock);
+ connp->conn_saddr_v6 = ipv6_all_zeros;
+ connp->conn_bound_addr_v6 = ipv6_all_zeros;
+ connp->conn_laddr_v6 = ipv6_all_zeros;
+ if (scopeid != 0) {
+ connp->conn_ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
+ connp->conn_incoming_ifindex = connp->conn_bound_if;
}
+ udp->udp_state = TS_UNBND;
+ udp_bind_hash_remove(udp, B_TRUE);
+ connp->conn_lport = 0;
+ mutex_exit(&udpf->uf_lock);
+ connp->conn_anon_port = B_FALSE;
+ connp->conn_mlp_type = mlptSingle;
- (void) udp_post_ip_bind_connect(udp, mp, error);
+ connp->conn_v6lastdst = ipv6_all_zeros;
+
+ /* Restore the header that was built above - different source address */
+ (void) udp_build_hdr_template(connp, &connp->conn_saddr_v6,
+ &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo);
+ mutex_exit(&connp->conn_lock);
return (error);
}
@@ -8256,12 +5940,32 @@ udp_bind(sock_lower_handle_t proto_handle, struct sockaddr *sa,
static int
udp_implicit_bind(conn_t *connp, cred_t *cr)
{
+ sin6_t sin6addr;
+ sin_t *sin;
+ sin6_t *sin6;
+ socklen_t len;
int error;
/* All Solaris components should pass a cred for this operation. */
ASSERT(cr != NULL);
- error = udp_do_bind(connp, NULL, 0, cr, B_FALSE);
+ if (connp->conn_family == AF_INET) {
+ len = sizeof (struct sockaddr_in);
+ sin = (sin_t *)&sin6addr;
+ *sin = sin_null;
+ sin->sin_family = AF_INET;
+ sin->sin_addr.s_addr = INADDR_ANY;
+ } else {
+ ASSERT(connp->conn_family == AF_INET6);
+ len = sizeof (sin6_t);
+ sin6 = (sin6_t *)&sin6addr;
+ *sin6 = sin6_null;
+ sin6->sin6_family = AF_INET6;
+ V6_SET_ZERO(sin6->sin6_addr);
+ }
+
+ error = udp_do_bind(connp, (struct sockaddr *)&sin6addr, len,
+ cr, B_FALSE);
return ((error < 0) ? proto_tlitosyserr(-error) : error);
}
@@ -8280,137 +5984,51 @@ udp_do_unbind(conn_t *connp)
/*
* Running in cluster mode - register unbind information
*/
- if (udp->udp_ipversion == IPV4_VERSION) {
+ if (connp->conn_ipversion == IPV4_VERSION) {
(*cl_inet_unbind)(
connp->conn_netstack->netstack_stackid,
IPPROTO_UDP, AF_INET,
- (uint8_t *)(&V4_PART_OF_V6(udp->udp_v6src)),
- (in_port_t)udp->udp_port, NULL);
+ (uint8_t *)(&V4_PART_OF_V6(connp->conn_laddr_v6)),
+ (in_port_t)connp->conn_lport, NULL);
} else {
(*cl_inet_unbind)(
connp->conn_netstack->netstack_stackid,
IPPROTO_UDP, AF_INET6,
- (uint8_t *)&(udp->udp_v6src),
- (in_port_t)udp->udp_port, NULL);
+ (uint8_t *)&(connp->conn_laddr_v6),
+ (in_port_t)connp->conn_lport, NULL);
}
}
- rw_enter(&udp->udp_rwlock, RW_WRITER);
- if (udp->udp_state == TS_UNBND || udp->udp_pending_op != -1) {
- rw_exit(&udp->udp_rwlock);
+ mutex_enter(&connp->conn_lock);
+ /* If a bind has not been done, we can't unbind. */
+ if (udp->udp_state == TS_UNBND) {
+ mutex_exit(&connp->conn_lock);
return (-TOUTSTATE);
}
- udp->udp_pending_op = T_UNBIND_REQ;
- rw_exit(&udp->udp_rwlock);
-
- /*
- * Pass the unbind to IP; T_UNBIND_REQ is larger than T_OK_ACK
- * and therefore ip_unbind must never return NULL.
- */
- ip_unbind(connp);
-
- /*
- * Once we're unbound from IP, the pending operation may be cleared
- * here.
- */
- rw_enter(&udp->udp_rwlock, RW_WRITER);
- udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port,
+ udpf = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport,
us->us_bind_fanout_size)];
-
mutex_enter(&udpf->uf_lock);
udp_bind_hash_remove(udp, B_TRUE);
- V6_SET_ZERO(udp->udp_v6src);
- V6_SET_ZERO(udp->udp_bound_v6src);
- udp->udp_port = 0;
+ connp->conn_saddr_v6 = ipv6_all_zeros;
+ connp->conn_bound_addr_v6 = ipv6_all_zeros;
+ connp->conn_laddr_v6 = ipv6_all_zeros;
+ connp->conn_mcbc_bind = B_FALSE;
+ connp->conn_lport = 0;
+ /* In case we were also connected */
+ connp->conn_faddr_v6 = ipv6_all_zeros;
+ connp->conn_fport = 0;
mutex_exit(&udpf->uf_lock);
- udp->udp_pending_op = -1;
+ connp->conn_v6lastdst = ipv6_all_zeros;
udp->udp_state = TS_UNBND;
- if (udp->udp_family == AF_INET6)
- (void) udp_build_hdrs(udp);
- rw_exit(&udp->udp_rwlock);
- return (0);
-}
-
-static int
-udp_post_ip_bind_connect(udp_t *udp, mblk_t *ire_mp, int error)
-{
- ire_t *ire;
- udp_fanout_t *udpf;
- udp_stack_t *us = udp->udp_us;
-
- ASSERT(udp->udp_pending_op != -1);
- rw_enter(&udp->udp_rwlock, RW_WRITER);
- if (error == 0) {
- /* For udp_do_connect() success */
- /* udp_do_bind() success will do nothing in here */
- /*
- * If a broadcast/multicast address was bound, set
- * the source address to 0.
- * This ensures no datagrams with broadcast address
- * as source address are emitted (which would violate
- * RFC1122 - Hosts requirements)
- *
- * Note that when connecting the returned IRE is
- * for the destination address and we only perform
- * the broadcast check for the source address (it
- * is OK to connect to a broadcast/multicast address.)
- */
- if (ire_mp != NULL && ire_mp->b_datap->db_type == IRE_DB_TYPE) {
- ire = (ire_t *)ire_mp->b_rptr;
+ (void) udp_build_hdr_template(connp, &connp->conn_saddr_v6,
+ &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo);
+ mutex_exit(&connp->conn_lock);
- /*
- * Note: we get IRE_BROADCAST for IPv6 to "mark" a
- * multicast local address.
- */
- udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port,
- us->us_bind_fanout_size)];
- if (ire->ire_type == IRE_BROADCAST &&
- udp->udp_state != TS_DATA_XFER) {
- ASSERT(udp->udp_pending_op == T_BIND_REQ ||
- udp->udp_pending_op == O_T_BIND_REQ);
- /*
- * This was just a local bind to a broadcast
- * addr.
- */
- mutex_enter(&udpf->uf_lock);
- V6_SET_ZERO(udp->udp_v6src);
- mutex_exit(&udpf->uf_lock);
- if (udp->udp_family == AF_INET6)
- (void) udp_build_hdrs(udp);
- } else if (V6_OR_V4_INADDR_ANY(udp->udp_v6src)) {
- if (udp->udp_family == AF_INET6)
- (void) udp_build_hdrs(udp);
- }
- }
- } else {
- udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port,
- us->us_bind_fanout_size)];
- mutex_enter(&udpf->uf_lock);
+ ip_unbind(connp);
- if (udp->udp_state == TS_DATA_XFER) {
- /* Connect failed */
- /* Revert back to the bound source */
- udp->udp_v6src = udp->udp_bound_v6src;
- udp->udp_state = TS_IDLE;
- } else {
- /* For udp_do_bind() failed */
- V6_SET_ZERO(udp->udp_v6src);
- V6_SET_ZERO(udp->udp_bound_v6src);
- udp->udp_state = TS_UNBND;
- udp_bind_hash_remove(udp, B_TRUE);
- udp->udp_port = 0;
- }
- mutex_exit(&udpf->uf_lock);
- if (udp->udp_family == AF_INET6)
- (void) udp_build_hdrs(udp);
- }
- udp->udp_pending_op = -1;
- rw_exit(&udp->udp_rwlock);
- if (ire_mp != NULL)
- freeb(ire_mp);
- return (error);
+ return (0);
}
/*
@@ -8418,7 +6036,7 @@ udp_post_ip_bind_connect(udp_t *udp, mblk_t *ire_mp, int error)
*/
static int
udp_do_connect(conn_t *connp, const struct sockaddr *sa, socklen_t len,
- cred_t *cr)
+ cred_t *cr, pid_t pid)
{
sin6_t *sin6;
sin_t *sin;
@@ -8426,12 +6044,16 @@ udp_do_connect(conn_t *connp, const struct sockaddr *sa, socklen_t len,
ipaddr_t v4dst;
uint16_t dstport;
uint32_t flowinfo;
- mblk_t *ire_mp;
udp_fanout_t *udpf;
udp_t *udp, *udp1;
ushort_t ipversion;
udp_stack_t *us;
int error;
+ conn_t *connp1;
+ ip_xmit_attr_t *ixa;
+ uint_t scopeid = 0;
+ uint_t srcid = 0;
+ in6_addr_t v6src = connp->conn_saddr_v6;
udp = connp->conn_udp;
us = udp->udp_us;
@@ -8451,7 +6073,7 @@ udp_do_connect(conn_t *connp, const struct sockaddr *sa, socklen_t len,
v4dst = sin->sin_addr.s_addr;
dstport = sin->sin_port;
IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst);
- ASSERT(udp->udp_ipversion == IPV4_VERSION);
+ ASSERT(connp->conn_ipversion == IPV4_VERSION);
ipversion = IPV4_VERSION;
break;
@@ -8459,13 +6081,33 @@ udp_do_connect(conn_t *connp, const struct sockaddr *sa, socklen_t len,
sin6 = (sin6_t *)sa;
v6dst = sin6->sin6_addr;
dstport = sin6->sin6_port;
+ srcid = sin6->__sin6_src_id;
+ if (srcid != 0 && IN6_IS_ADDR_UNSPECIFIED(&v6src)) {
+ ip_srcid_find_id(srcid, &v6src, IPCL_ZONEID(connp),
+ connp->conn_netstack);
+ }
if (IN6_IS_ADDR_V4MAPPED(&v6dst)) {
+ if (connp->conn_ipv6_v6only)
+ return (EADDRNOTAVAIL);
+
+ /*
+ * Destination adress is mapped IPv6 address.
+ * Source bound address should be unspecified or
+ * IPv6 mapped address as well.
+ */
+ if (!IN6_IS_ADDR_UNSPECIFIED(
+ &connp->conn_bound_addr_v6) &&
+ !IN6_IS_ADDR_V4MAPPED(&connp->conn_bound_addr_v6)) {
+ return (EADDRNOTAVAIL);
+ }
IN6_V4MAPPED_TO_IPADDR(&v6dst, v4dst);
ipversion = IPV4_VERSION;
flowinfo = 0;
} else {
ipversion = IPV6_VERSION;
flowinfo = sin6->sin6_flowinfo;
+ if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr))
+ scopeid = sin6->sin6_scope_id;
}
break;
}
@@ -8473,44 +6115,53 @@ udp_do_connect(conn_t *connp, const struct sockaddr *sa, socklen_t len,
if (dstport == 0)
return (-TBADADDR);
- rw_enter(&udp->udp_rwlock, RW_WRITER);
+ /*
+ * If there is a different thread using conn_ixa then we get a new
+ * copy and cut the old one loose from conn_ixa. Otherwise we use
+ * conn_ixa and prevent any other thread from using/changing it.
+ * Once connect() is done other threads can use conn_ixa since the
+ * refcnt will be back at one.
+ */
+ ixa = conn_get_ixa(connp, B_TRUE);
+ if (ixa == NULL)
+ return (ENOMEM);
+ ASSERT(ixa->ixa_refcnt >= 2);
+ ASSERT(ixa == connp->conn_ixa);
+
+ mutex_enter(&connp->conn_lock);
/*
- * This UDP must have bound to a port already before doing a connect.
- * TPI mandates that users must send TPI primitives only 1 at a time
- * and wait for the response before sending the next primitive.
+ * This udp_t must have bound to a port already before doing a connect.
+ * Reject if a connect is in progress (we drop conn_lock during
+ * udp_do_connect).
*/
- if (udp->udp_state == TS_UNBND || udp->udp_pending_op != -1) {
- rw_exit(&udp->udp_rwlock);
+ if (udp->udp_state == TS_UNBND || udp->udp_state == TS_WCON_CREQ) {
+ mutex_exit(&connp->conn_lock);
(void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE,
"udp_connect: bad state, %u", udp->udp_state);
+ ixa_refrele(ixa);
return (-TOUTSTATE);
}
- udp->udp_pending_op = T_CONN_REQ;
- ASSERT(udp->udp_port != 0 && udp->udp_ptpbhn != NULL);
-
- if (ipversion == IPV4_VERSION) {
- udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE +
- udp->udp_ip_snd_options_len;
- } else {
- udp->udp_max_hdr_len = udp->udp_sticky_hdrs_len;
- }
+ ASSERT(connp->conn_lport != 0 && udp->udp_ptpbhn != NULL);
- udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port,
+ udpf = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport,
us->us_bind_fanout_size)];
mutex_enter(&udpf->uf_lock);
if (udp->udp_state == TS_DATA_XFER) {
/* Already connected - clear out state */
- udp->udp_v6src = udp->udp_bound_v6src;
+ if (connp->conn_mcbc_bind)
+ connp->conn_saddr_v6 = ipv6_all_zeros;
+ else
+ connp->conn_saddr_v6 = connp->conn_bound_addr_v6;
+ connp->conn_laddr_v6 = connp->conn_bound_addr_v6;
+ connp->conn_faddr_v6 = ipv6_all_zeros;
+ connp->conn_fport = 0;
udp->udp_state = TS_IDLE;
}
- /*
- * Create a default IP header with no IP options.
- */
- udp->udp_dstport = dstport;
- udp->udp_ipversion = ipversion;
+ connp->conn_fport = dstport;
+ connp->conn_ipversion = ipversion;
if (ipversion == IPV4_VERSION) {
/*
* Interpret a zero destination to mean loopback.
@@ -8520,29 +6171,16 @@ udp_do_connect(conn_t *connp, const struct sockaddr *sa, socklen_t len,
if (v4dst == INADDR_ANY) {
v4dst = htonl(INADDR_LOOPBACK);
IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst);
- if (udp->udp_family == AF_INET) {
+ if (connp->conn_family == AF_INET) {
sin->sin_addr.s_addr = v4dst;
} else {
sin6->sin6_addr = v6dst;
}
}
- udp->udp_v6dst = v6dst;
- udp->udp_flowinfo = 0;
-
- /*
- * If the destination address is multicast and
- * an outgoing multicast interface has been set,
- * use the address of that interface as our
- * source address if no source address has been set.
- */
- if (V4_PART_OF_V6(udp->udp_v6src) == INADDR_ANY &&
- CLASSD(v4dst) &&
- udp->udp_multicast_if_addr != INADDR_ANY) {
- IN6_IPADDR_TO_V4MAPPED(udp->udp_multicast_if_addr,
- &udp->udp_v6src);
- }
+ connp->conn_faddr_v6 = v6dst;
+ connp->conn_flowinfo = 0;
} else {
- ASSERT(udp->udp_ipversion == IPV6_VERSION);
+ ASSERT(connp->conn_ipversion == IPV6_VERSION);
/*
* Interpret a zero destination to mean loopback.
* Update the T_CONN_REQ (sin/sin6) since it is used to
@@ -8552,82 +6190,133 @@ udp_do_connect(conn_t *connp, const struct sockaddr *sa, socklen_t len,
v6dst = ipv6_loopback;
sin6->sin6_addr = v6dst;
}
- udp->udp_v6dst = v6dst;
- udp->udp_flowinfo = flowinfo;
- /*
- * If the destination address is multicast and
- * an outgoing multicast interface has been set,
- * then the ip bind logic will pick the correct source
- * address (i.e. matching the outgoing multicast interface).
- */
+ connp->conn_faddr_v6 = v6dst;
+ connp->conn_flowinfo = flowinfo;
+ }
+ mutex_exit(&udpf->uf_lock);
+
+ ixa->ixa_cred = cr;
+ ixa->ixa_cpid = pid;
+ if (is_system_labeled()) {
+ /* We need to restart with a label based on the cred */
+ ip_xmit_attr_restore_tsl(ixa, ixa->ixa_cred);
+ }
+
+ if (scopeid != 0) {
+ ixa->ixa_flags |= IXAF_SCOPEID_SET;
+ ixa->ixa_scopeid = scopeid;
+ connp->conn_incoming_ifindex = scopeid;
+ } else {
+ ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
+ connp->conn_incoming_ifindex = connp->conn_bound_if;
+ }
+ /*
+ * conn_connect will drop conn_lock and reacquire it.
+ * To prevent a send* from messing with this udp_t while the lock
+ * is dropped we set udp_state and clear conn_v6lastdst.
+ * That will make all send* fail with EISCONN.
+ */
+ connp->conn_v6lastdst = ipv6_all_zeros;
+ udp->udp_state = TS_WCON_CREQ;
+
+ error = conn_connect(connp, NULL, IPDF_ALLOW_MCBC);
+ mutex_exit(&connp->conn_lock);
+ if (error != 0)
+ goto connect_failed;
+
+ /*
+ * The addresses have been verified. Time to insert in
+ * the correct fanout list.
+ */
+ error = ipcl_conn_insert(connp);
+ if (error != 0)
+ goto connect_failed;
+
+ mutex_enter(&connp->conn_lock);
+ error = udp_build_hdr_template(connp, &connp->conn_saddr_v6,
+ &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo);
+ if (error != 0) {
+ mutex_exit(&connp->conn_lock);
+ goto connect_failed;
}
+ udp->udp_state = TS_DATA_XFER;
+ /* Record this as the "last" send even though we haven't sent any */
+ connp->conn_v6lastdst = connp->conn_faddr_v6;
+ connp->conn_lastipversion = connp->conn_ipversion;
+ connp->conn_lastdstport = connp->conn_fport;
+ connp->conn_lastflowinfo = connp->conn_flowinfo;
+ connp->conn_lastscopeid = scopeid;
+ connp->conn_lastsrcid = srcid;
+ /* Also remember a source to use together with lastdst */
+ connp->conn_v6lastsrc = v6src;
+ mutex_exit(&connp->conn_lock);
+
/*
- * Verify that the src/port/dst/port is unique for all
- * connections in TS_DATA_XFER
+ * We've picked a source address above. Now we can
+ * verify that the src/port/dst/port is unique for all
+ * connections in TS_DATA_XFER, skipping ourselves.
*/
+ mutex_enter(&udpf->uf_lock);
for (udp1 = udpf->uf_udp; udp1 != NULL; udp1 = udp1->udp_bind_hash) {
if (udp1->udp_state != TS_DATA_XFER)
continue;
- if (udp->udp_port != udp1->udp_port ||
- udp->udp_ipversion != udp1->udp_ipversion ||
- dstport != udp1->udp_dstport ||
- !IN6_ARE_ADDR_EQUAL(&udp->udp_v6src, &udp1->udp_v6src) ||
- !IN6_ARE_ADDR_EQUAL(&v6dst, &udp1->udp_v6dst) ||
- !(IPCL_ZONE_MATCH(udp->udp_connp,
- udp1->udp_connp->conn_zoneid) ||
- IPCL_ZONE_MATCH(udp1->udp_connp,
- udp->udp_connp->conn_zoneid)))
+
+ if (udp1 == udp)
+ continue;
+
+ connp1 = udp1->udp_connp;
+ if (connp->conn_lport != connp1->conn_lport ||
+ connp->conn_ipversion != connp1->conn_ipversion ||
+ dstport != connp1->conn_fport ||
+ !IN6_ARE_ADDR_EQUAL(&connp->conn_laddr_v6,
+ &connp1->conn_laddr_v6) ||
+ !IN6_ARE_ADDR_EQUAL(&v6dst, &connp1->conn_faddr_v6) ||
+ !(IPCL_ZONE_MATCH(connp, connp1->conn_zoneid) ||
+ IPCL_ZONE_MATCH(connp1, connp->conn_zoneid)))
continue;
mutex_exit(&udpf->uf_lock);
- udp->udp_pending_op = -1;
- rw_exit(&udp->udp_rwlock);
- return (-TBADADDR);
+ error = -TBADADDR;
+ goto connect_failed;
}
-
if (cl_inet_connect2 != NULL) {
- CL_INET_UDP_CONNECT(connp, udp, B_TRUE, &v6dst, dstport, error);
+ CL_INET_UDP_CONNECT(connp, B_TRUE, &v6dst, dstport, error);
if (error != 0) {
mutex_exit(&udpf->uf_lock);
- udp->udp_pending_op = -1;
- rw_exit(&udp->udp_rwlock);
- return (-TBADADDR);
+ error = -TBADADDR;
+ goto connect_failed;
}
}
-
- udp->udp_state = TS_DATA_XFER;
mutex_exit(&udpf->uf_lock);
- ire_mp = allocb(sizeof (ire_t), BPRI_HI);
- if (ire_mp == NULL) {
- mutex_enter(&udpf->uf_lock);
- udp->udp_state = TS_IDLE;
- udp->udp_pending_op = -1;
- mutex_exit(&udpf->uf_lock);
- rw_exit(&udp->udp_rwlock);
- return (ENOMEM);
- }
-
- rw_exit(&udp->udp_rwlock);
+ ixa_refrele(ixa);
+ return (0);
- ire_mp->b_wptr += sizeof (ire_t);
- ire_mp->b_datap->db_type = IRE_DB_REQ_TYPE;
+connect_failed:
+ if (ixa != NULL)
+ ixa_refrele(ixa);
+ mutex_enter(&connp->conn_lock);
+ mutex_enter(&udpf->uf_lock);
+ udp->udp_state = TS_IDLE;
+ connp->conn_faddr_v6 = ipv6_all_zeros;
+ connp->conn_fport = 0;
+ /* In case the source address was set above */
+ if (connp->conn_mcbc_bind)
+ connp->conn_saddr_v6 = ipv6_all_zeros;
+ else
+ connp->conn_saddr_v6 = connp->conn_bound_addr_v6;
+ connp->conn_laddr_v6 = connp->conn_bound_addr_v6;
+ mutex_exit(&udpf->uf_lock);
- if (udp->udp_family == AF_INET) {
- error = ip_proto_bind_connected_v4(connp, &ire_mp, IPPROTO_UDP,
- &V4_PART_OF_V6(udp->udp_v6src), udp->udp_port,
- V4_PART_OF_V6(udp->udp_v6dst), udp->udp_dstport,
- B_TRUE, B_TRUE, cr);
- } else {
- error = ip_proto_bind_connected_v6(connp, &ire_mp, IPPROTO_UDP,
- &udp->udp_v6src, udp->udp_port, &udp->udp_v6dst,
- &udp->udp_sticky_ipp, udp->udp_dstport, B_TRUE, B_TRUE, cr);
- }
+ connp->conn_v6lastdst = ipv6_all_zeros;
+ connp->conn_flowinfo = 0;
- return (udp_post_ip_bind_connect(udp, ire_mp, error));
+ (void) udp_build_hdr_template(connp, &connp->conn_saddr_v6,
+ &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo);
+ mutex_exit(&connp->conn_lock);
+ return (error);
}
-/* ARGSUSED */
static int
udp_connect(sock_lower_handle_t proto_handle, const struct sockaddr *sa,
socklen_t len, sock_connid_t *id, cred_t *cr)
@@ -8636,6 +6325,7 @@ udp_connect(sock_lower_handle_t proto_handle, const struct sockaddr *sa,
udp_t *udp = connp->conn_udp;
int error;
boolean_t did_bind = B_FALSE;
+ pid_t pid = curproc->p_pid;
/* All Solaris components should pass a cred for this operation. */
ASSERT(cr != NULL);
@@ -8652,7 +6342,7 @@ udp_connect(sock_lower_handle_t proto_handle, const struct sockaddr *sa,
return (error);
}
- error = proto_verify_ip_addr(udp->udp_family, sa, len);
+ error = proto_verify_ip_addr(connp->conn_family, sa, len);
if (error != 0)
goto done;
@@ -8671,9 +6361,9 @@ udp_connect(sock_lower_handle_t proto_handle, const struct sockaddr *sa,
/*
* set SO_DGRAM_ERRIND
*/
- udp->udp_dgram_errind = B_TRUE;
+ connp->conn_dgram_errind = B_TRUE;
- error = udp_do_connect(connp, sa, len, cr);
+ error = udp_do_connect(connp, sa, len, cr, pid);
if (error != 0 && did_bind) {
int unbind_err;
@@ -8702,44 +6392,33 @@ done:
return (error);
}
-/* ARGSUSED */
int
udp_send(sock_lower_handle_t proto_handle, mblk_t *mp, struct nmsghdr *msg,
cred_t *cr)
{
+ sin6_t *sin6;
+ sin_t *sin = NULL;
+ uint_t srcid;
conn_t *connp = (conn_t *)proto_handle;
udp_t *udp = connp->conn_udp;
- udp_stack_t *us = udp->udp_us;
int error = 0;
+ udp_stack_t *us = udp->udp_us;
+ ushort_t ipversion;
+ pid_t pid = curproc->p_pid;
+ ip_xmit_attr_t *ixa;
ASSERT(DB_TYPE(mp) == M_DATA);
/* All Solaris components should pass a cred for this operation. */
ASSERT(cr != NULL);
- /* If labeled then sockfs should have already set db_credp */
- ASSERT(!is_system_labeled() || msg_getcred(mp, NULL) != NULL);
-
- /*
- * If the socket is connected and no change in destination
- */
- if (msg->msg_namelen == 0) {
- error = udp_send_connected(connp, mp, msg, cr, curproc->p_pid);
- if (error == EDESTADDRREQ)
- return (error);
- else
- return (udp->udp_dgram_errind ? error : 0);
- }
-
- /*
- * Do an implicit bind if necessary.
- */
+ /* do an implicit bind if necessary */
if (udp->udp_state == TS_UNBND) {
error = udp_implicit_bind(connp, cr);
/*
* We could be racing with an actual bind, in which case
* we would see EPROTO. We cross our fingers and try
- * to send.
+ * to connect.
*/
if (!(error == 0 || error == EPROTO)) {
freemsg(mp);
@@ -8747,75 +6426,203 @@ udp_send(sock_lower_handle_t proto_handle, mblk_t *mp, struct nmsghdr *msg,
}
}
- rw_enter(&udp->udp_rwlock, RW_WRITER);
-
- if (msg->msg_name != NULL && udp->udp_state == TS_DATA_XFER) {
- rw_exit(&udp->udp_rwlock);
- freemsg(mp);
+ /* Connected? */
+ if (msg->msg_name == NULL) {
+ if (udp->udp_state != TS_DATA_XFER) {
+ BUMP_MIB(&us->us_udp_mib, udpOutErrors);
+ return (EDESTADDRREQ);
+ }
+ if (msg->msg_controllen != 0) {
+ error = udp_output_ancillary(connp, NULL, NULL, mp,
+ NULL, msg, cr, pid);
+ } else {
+ error = udp_output_connected(connp, mp, cr, pid);
+ }
+ if (us->us_sendto_ignerr)
+ return (0);
+ else
+ return (error);
+ }
+ if (udp->udp_state == TS_DATA_XFER) {
+ BUMP_MIB(&us->us_udp_mib, udpOutErrors);
return (EISCONN);
}
+ error = proto_verify_ip_addr(connp->conn_family,
+ (struct sockaddr *)msg->msg_name, msg->msg_namelen);
+ if (error != 0) {
+ BUMP_MIB(&us->us_udp_mib, udpOutErrors);
+ return (error);
+ }
+ switch (connp->conn_family) {
+ case AF_INET6:
+ sin6 = (sin6_t *)msg->msg_name;
+ srcid = sin6->__sin6_src_id;
- if (udp->udp_delayed_error != 0) {
- boolean_t match;
+ if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
+ /*
+ * Destination is a non-IPv4-compatible IPv6 address.
+ * Send out an IPv6 format packet.
+ */
- error = udp->udp_delayed_error;
- match = B_FALSE;
- udp->udp_delayed_error = 0;
- switch (udp->udp_family) {
- case AF_INET: {
- /* Compare just IP address and port */
- sin_t *sin1 = (sin_t *)msg->msg_name;
- sin_t *sin2 = (sin_t *)&udp->udp_delayed_addr;
+ /*
+ * If the local address is a mapped address return
+ * an error.
+ * It would be possible to send an IPv6 packet but the
+ * response would never make it back to the application
+ * since it is bound to a mapped address.
+ */
+ if (IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6)) {
+ BUMP_MIB(&us->us_udp_mib, udpOutErrors);
+ return (EADDRNOTAVAIL);
+ }
+ if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
+ sin6->sin6_addr = ipv6_loopback;
+ ipversion = IPV6_VERSION;
+ } else {
+ if (connp->conn_ipv6_v6only) {
+ BUMP_MIB(&us->us_udp_mib, udpOutErrors);
+ return (EADDRNOTAVAIL);
+ }
- if (msg->msg_namelen == sizeof (sin_t) &&
- sin1->sin_port == sin2->sin_port &&
- sin1->sin_addr.s_addr == sin2->sin_addr.s_addr)
- match = B_TRUE;
+ /*
+ * If the local address is not zero or a mapped address
+ * return an error. It would be possible to send an
+ * IPv4 packet but the response would never make it
+ * back to the application since it is bound to a
+ * non-mapped address.
+ */
+ if (!IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6) &&
+ !IN6_IS_ADDR_UNSPECIFIED(&connp->conn_saddr_v6)) {
+ BUMP_MIB(&us->us_udp_mib, udpOutErrors);
+ return (EADDRNOTAVAIL);
+ }
- break;
+ if (V4_PART_OF_V6(sin6->sin6_addr) == INADDR_ANY) {
+ V4_PART_OF_V6(sin6->sin6_addr) =
+ htonl(INADDR_LOOPBACK);
+ }
+ ipversion = IPV4_VERSION;
}
- case AF_INET6: {
- sin6_t *sin1 = (sin6_t *)msg->msg_name;
- sin6_t *sin2 = (sin6_t *)&udp->udp_delayed_addr;
- if (msg->msg_namelen == sizeof (sin6_t) &&
- sin1->sin6_port == sin2->sin6_port &&
- IN6_ARE_ADDR_EQUAL(&sin1->sin6_addr,
- &sin2->sin6_addr))
- match = B_TRUE;
- break;
- }
- default:
- ASSERT(0);
+ /*
+ * We have to allocate an ip_xmit_attr_t before we grab
+ * conn_lock and we need to hold conn_lock once we've check
+ * conn_same_as_last_v6 to handle concurrent send* calls on a
+ * socket.
+ */
+ if (msg->msg_controllen == 0) {
+ ixa = conn_get_ixa(connp, B_FALSE);
+ if (ixa == NULL) {
+ BUMP_MIB(&us->us_udp_mib, udpOutErrors);
+ return (ENOMEM);
+ }
+ } else {
+ ixa = NULL;
}
+ mutex_enter(&connp->conn_lock);
+ if (udp->udp_delayed_error != 0) {
+ sin6_t *sin2 = (sin6_t *)&udp->udp_delayed_addr;
- *((sin6_t *)&udp->udp_delayed_addr) = sin6_null;
+ error = udp->udp_delayed_error;
+ udp->udp_delayed_error = 0;
- if (match) {
- rw_exit(&udp->udp_rwlock);
- freemsg(mp);
+ /* Compare IP address, port, and family */
+
+ if (sin6->sin6_port == sin2->sin6_port &&
+ IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr,
+ &sin2->sin6_addr) &&
+ sin6->sin6_family == sin2->sin6_family) {
+ mutex_exit(&connp->conn_lock);
+ BUMP_MIB(&us->us_udp_mib, udpOutErrors);
+ if (ixa != NULL)
+ ixa_refrele(ixa);
+ return (error);
+ }
+ }
+
+ if (msg->msg_controllen != 0) {
+ mutex_exit(&connp->conn_lock);
+ ASSERT(ixa == NULL);
+ error = udp_output_ancillary(connp, NULL, sin6, mp,
+ NULL, msg, cr, pid);
+ } else if (conn_same_as_last_v6(connp, sin6) &&
+ connp->conn_lastsrcid == srcid &&
+ ipsec_outbound_policy_current(ixa)) {
+ /* udp_output_lastdst drops conn_lock */
+ error = udp_output_lastdst(connp, mp, cr, pid, ixa);
+ } else {
+ /* udp_output_newdst drops conn_lock */
+ error = udp_output_newdst(connp, mp, NULL, sin6,
+ ipversion, cr, pid, ixa);
+ }
+ ASSERT(MUTEX_NOT_HELD(&connp->conn_lock));
+ if (us->us_sendto_ignerr)
+ return (0);
+ else
return (error);
+ case AF_INET:
+ sin = (sin_t *)msg->msg_name;
+
+ ipversion = IPV4_VERSION;
+
+ if (sin->sin_addr.s_addr == INADDR_ANY)
+ sin->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+
+ /*
+ * We have to allocate an ip_xmit_attr_t before we grab
+ * conn_lock and we need to hold conn_lock once we've check
+ * conn_same_as_last_v6 to handle concurrent send* on a socket.
+ */
+ if (msg->msg_controllen == 0) {
+ ixa = conn_get_ixa(connp, B_FALSE);
+ if (ixa == NULL) {
+ BUMP_MIB(&us->us_udp_mib, udpOutErrors);
+ return (ENOMEM);
+ }
+ } else {
+ ixa = NULL;
}
- }
+ mutex_enter(&connp->conn_lock);
+ if (udp->udp_delayed_error != 0) {
+ sin_t *sin2 = (sin_t *)&udp->udp_delayed_addr;
- error = proto_verify_ip_addr(udp->udp_family,
- (struct sockaddr *)msg->msg_name, msg->msg_namelen);
- rw_exit(&udp->udp_rwlock);
+ error = udp->udp_delayed_error;
+ udp->udp_delayed_error = 0;
- if (error != 0) {
- freemsg(mp);
- return (error);
- }
+ /* Compare IP address and port */
- error = udp_send_not_connected(connp, mp,
- (struct sockaddr *)msg->msg_name, msg->msg_namelen, msg, cr,
- curproc->p_pid);
- if (error != 0) {
- UDP_STAT(us, udp_out_err_output);
- freemsg(mp);
+ if (sin->sin_port == sin2->sin_port &&
+ sin->sin_addr.s_addr == sin2->sin_addr.s_addr) {
+ mutex_exit(&connp->conn_lock);
+ BUMP_MIB(&us->us_udp_mib, udpOutErrors);
+ if (ixa != NULL)
+ ixa_refrele(ixa);
+ return (error);
+ }
+ }
+ if (msg->msg_controllen != 0) {
+ mutex_exit(&connp->conn_lock);
+ ASSERT(ixa == NULL);
+ error = udp_output_ancillary(connp, sin, NULL, mp,
+ NULL, msg, cr, pid);
+ } else if (conn_same_as_last_v4(connp, sin) &&
+ ipsec_outbound_policy_current(ixa)) {
+ /* udp_output_lastdst drops conn_lock */
+ error = udp_output_lastdst(connp, mp, cr, pid, ixa);
+ } else {
+ /* udp_output_newdst drops conn_lock */
+ error = udp_output_newdst(connp, mp, sin, NULL,
+ ipversion, cr, pid, ixa);
+ }
+ ASSERT(MUTEX_NOT_HELD(&connp->conn_lock));
+ if (us->us_sendto_ignerr)
+ return (0);
+ else
+ return (error);
+ default:
+ return (EINVAL);
}
- return (udp->udp_dgram_errind ? error : 0);
}
int
@@ -8854,8 +6661,7 @@ udp_fallback(sock_lower_handle_t proto_handle, queue_t *q,
stropt_mp->b_wptr += sizeof (*stropt);
stropt = (struct stroptions *)stropt_mp->b_rptr;
stropt->so_flags = SO_WROFF | SO_HIWAT;
- stropt->so_wroff =
- (ushort_t)(udp->udp_max_hdr_len + udp->udp_us->us_wroff_extra);
+ stropt->so_wroff = connp->conn_wroff;
stropt->so_hiwat = udp->udp_rcv_disply_hiwat;
putnext(RD(q), stropt_mp);
@@ -8881,9 +6687,9 @@ udp_fallback(sock_lower_handle_t proto_handle, queue_t *q,
faddrlen = 0;
opts = 0;
- if (udp->udp_dgram_errind)
+ if (connp->conn_dgram_errind)
opts |= SO_DGRAM_ERRIND;
- if (udp->udp_dontroute)
+ if (connp->conn_ixa->ixa_flags & IXAF_DONTROUTE)
opts |= SO_DONTROUTE;
(*quiesced_cb)(connp->conn_upper_handle, q, &tca,
@@ -8908,9 +6714,9 @@ udp_fallback(sock_lower_handle_t proto_handle, queue_t *q,
/*
* No longer a streams less socket
*/
- rw_enter(&udp->udp_rwlock, RW_WRITER);
+ mutex_enter(&connp->conn_lock);
connp->conn_flags &= ~IPCL_NONSTR;
- rw_exit(&udp->udp_rwlock);
+ mutex_exit(&connp->conn_lock);
mutex_exit(&udp->udp_recv_lock);
@@ -8919,48 +6725,7 @@ udp_fallback(sock_lower_handle_t proto_handle, queue_t *q,
return (0);
}
-static int
-udp_do_getpeername(udp_t *udp, struct sockaddr *sa, uint_t *salenp)
-{
- sin_t *sin = (sin_t *)sa;
- sin6_t *sin6 = (sin6_t *)sa;
-
- ASSERT(RW_LOCK_HELD(&udp->udp_rwlock));
- ASSERT(udp != NULL);
-
- if (udp->udp_state != TS_DATA_XFER)
- return (ENOTCONN);
-
- switch (udp->udp_family) {
- case AF_INET:
- ASSERT(udp->udp_ipversion == IPV4_VERSION);
-
- if (*salenp < sizeof (sin_t))
- return (EINVAL);
-
- *salenp = sizeof (sin_t);
- *sin = sin_null;
- sin->sin_family = AF_INET;
- sin->sin_port = udp->udp_dstport;
- sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_v6dst);
- break;
- case AF_INET6:
- if (*salenp < sizeof (sin6_t))
- return (EINVAL);
-
- *salenp = sizeof (sin6_t);
- *sin6 = sin6_null;
- sin6->sin6_family = AF_INET6;
- sin6->sin6_port = udp->udp_dstport;
- sin6->sin6_addr = udp->udp_v6dst;
- sin6->sin6_flowinfo = udp->udp_flowinfo;
- break;
- }
-
- return (0);
-}
-
-/* ARGSUSED */
+/* ARGSUSED3 */
int
udp_getpeername(sock_lower_handle_t proto_handle, struct sockaddr *sa,
socklen_t *salenp, cred_t *cr)
@@ -8972,104 +6737,29 @@ udp_getpeername(sock_lower_handle_t proto_handle, struct sockaddr *sa,
/* All Solaris components should pass a cred for this operation. */
ASSERT(cr != NULL);
- ASSERT(udp != NULL);
-
- rw_enter(&udp->udp_rwlock, RW_READER);
-
- error = udp_do_getpeername(udp, sa, salenp);
-
- rw_exit(&udp->udp_rwlock);
-
+ mutex_enter(&connp->conn_lock);
+ if (udp->udp_state != TS_DATA_XFER)
+ error = ENOTCONN;
+ else
+ error = conn_getpeername(connp, sa, salenp);
+ mutex_exit(&connp->conn_lock);
return (error);
}
-static int
-udp_do_getsockname(udp_t *udp, struct sockaddr *sa, uint_t *salenp)
-{
- sin_t *sin = (sin_t *)sa;
- sin6_t *sin6 = (sin6_t *)sa;
-
- ASSERT(udp != NULL);
- ASSERT(RW_LOCK_HELD(&udp->udp_rwlock));
-
- switch (udp->udp_family) {
- case AF_INET:
- ASSERT(udp->udp_ipversion == IPV4_VERSION);
-
- if (*salenp < sizeof (sin_t))
- return (EINVAL);
-
- *salenp = sizeof (sin_t);
- *sin = sin_null;
- sin->sin_family = AF_INET;
- if (udp->udp_state == TS_UNBND) {
- break;
- }
- sin->sin_port = udp->udp_port;
-
- if (!IN6_IS_ADDR_V4MAPPED_ANY(&udp->udp_v6src) &&
- !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) {
- sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_v6src);
- } else {
- /*
- * INADDR_ANY
- * udp_v6src is not set, we might be bound to
- * broadcast/multicast. Use udp_bound_v6src as
- * local address instead (that could
- * also still be INADDR_ANY)
- */
- sin->sin_addr.s_addr =
- V4_PART_OF_V6(udp->udp_bound_v6src);
- }
- break;
-
- case AF_INET6:
- if (*salenp < sizeof (sin6_t))
- return (EINVAL);
-
- *salenp = sizeof (sin6_t);
- *sin6 = sin6_null;
- sin6->sin6_family = AF_INET6;
- if (udp->udp_state == TS_UNBND) {
- break;
- }
- sin6->sin6_port = udp->udp_port;
-
- if (!IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) {
- sin6->sin6_addr = udp->udp_v6src;
- } else {
- /*
- * UNSPECIFIED
- * udp_v6src is not set, we might be bound to
- * broadcast/multicast. Use udp_bound_v6src as
- * local address instead (that could
- * also still be UNSPECIFIED)
- */
- sin6->sin6_addr = udp->udp_bound_v6src;
- }
- }
- return (0);
-}
-
-/* ARGSUSED */
+/* ARGSUSED3 */
int
udp_getsockname(sock_lower_handle_t proto_handle, struct sockaddr *sa,
socklen_t *salenp, cred_t *cr)
{
conn_t *connp = (conn_t *)proto_handle;
- udp_t *udp = connp->conn_udp;
int error;
/* All Solaris components should pass a cred for this operation. */
ASSERT(cr != NULL);
- ASSERT(udp != NULL);
- rw_enter(&udp->udp_rwlock, RW_READER);
-
- error = udp_do_getsockname(udp, sa, salenp);
-
- rw_exit(&udp->udp_rwlock);
-
+ mutex_enter(&connp->conn_lock);
+ error = conn_getsockname(connp, sa, salenp);
+ mutex_exit(&connp->conn_lock);
return (error);
}
@@ -9078,7 +6768,6 @@ udp_getsockopt(sock_lower_handle_t proto_handle, int level, int option_name,
void *optvalp, socklen_t *optlen, cred_t *cr)
{
conn_t *connp = (conn_t *)proto_handle;
- udp_t *udp = connp->conn_udp;
int error;
t_uscalar_t max_optbuf_len;
void *optvalp_buf;
@@ -9090,7 +6779,6 @@ udp_getsockopt(sock_lower_handle_t proto_handle, int level, int option_name,
error = proto_opt_check(level, option_name, *optlen, &max_optbuf_len,
udp_opt_obj.odb_opt_des_arr,
udp_opt_obj.odb_opt_arr_cnt,
- udp_opt_obj.odb_topmost_tpiprovider,
B_FALSE, B_TRUE, cr);
if (error != 0) {
if (error < 0)
@@ -9099,28 +6787,22 @@ udp_getsockopt(sock_lower_handle_t proto_handle, int level, int option_name,
}
optvalp_buf = kmem_alloc(max_optbuf_len, KM_SLEEP);
- rw_enter(&udp->udp_rwlock, RW_READER);
len = udp_opt_get(connp, level, option_name, optvalp_buf);
- rw_exit(&udp->udp_rwlock);
-
- if (len < 0) {
- /*
- * Pass on to IP
- */
+ if (len == -1) {
kmem_free(optvalp_buf, max_optbuf_len);
- return (ip_get_options(connp, level, option_name,
- optvalp, optlen, cr));
- } else {
- /*
- * update optlen and copy option value
- */
- t_uscalar_t size = MIN(len, *optlen);
- bcopy(optvalp_buf, optvalp, size);
- bcopy(&size, optlen, sizeof (size));
-
- kmem_free(optvalp_buf, max_optbuf_len);
- return (0);
+ return (EINVAL);
}
+
+ /*
+ * update optlen and copy option value
+ */
+ t_uscalar_t size = MIN(len, *optlen);
+
+ bcopy(optvalp_buf, optvalp, size);
+ bcopy(&size, optlen, sizeof (size));
+
+ kmem_free(optvalp_buf, max_optbuf_len);
+ return (0);
}
int
@@ -9128,7 +6810,6 @@ udp_setsockopt(sock_lower_handle_t proto_handle, int level, int option_name,
const void *optvalp, socklen_t optlen, cred_t *cr)
{
conn_t *connp = (conn_t *)proto_handle;
- udp_t *udp = connp->conn_udp;
int error;
/* All Solaris components should pass a cred for this operation. */
@@ -9137,7 +6818,6 @@ udp_setsockopt(sock_lower_handle_t proto_handle, int level, int option_name,
error = proto_opt_check(level, option_name, optlen, NULL,
udp_opt_obj.odb_opt_des_arr,
udp_opt_obj.odb_opt_arr_cnt,
- udp_opt_obj.odb_topmost_tpiprovider,
B_TRUE, B_FALSE, cr);
if (error != 0) {
@@ -9146,19 +6826,11 @@ udp_setsockopt(sock_lower_handle_t proto_handle, int level, int option_name,
return (error);
}
- rw_enter(&udp->udp_rwlock, RW_WRITER);
error = udp_opt_set(connp, SETFN_OPTCOM_NEGOTIATE, level, option_name,
optlen, (uchar_t *)optvalp, (uint_t *)&optlen, (uchar_t *)optvalp,
NULL, cr);
- rw_exit(&udp->udp_rwlock);
- if (error < 0) {
- /*
- * Pass on to ip
- */
- error = ip_set_options(connp, level, option_name, optvalp,
- optlen, cr);
- }
+ ASSERT(error >= 0);
return (error);
}
@@ -9174,7 +6846,7 @@ udp_clr_flowctrl(sock_lower_handle_t proto_handle)
mutex_exit(&udp->udp_recv_lock);
}
-/* ARGSUSED */
+/* ARGSUSED2 */
int
udp_shutdown(sock_lower_handle_t proto_handle, int how, cred_t *cr)
{
@@ -9204,6 +6876,27 @@ udp_ioctl(sock_lower_handle_t proto_handle, int cmd, intptr_t arg,
/* All Solaris components should pass a cred for this operation. */
ASSERT(cr != NULL);
+ /*
+ * If we don't have a helper stream then create one.
+ * ip_create_helper_stream takes care of locking the conn_t,
+ * so this check for NULL is just a performance optimization.
+ */
+ if (connp->conn_helper_info == NULL) {
+ udp_stack_t *us = connp->conn_udp->udp_us;
+
+ ASSERT(us->us_ldi_ident != NULL);
+
+ /*
+ * Create a helper stream for non-STREAMS socket.
+ */
+ error = ip_create_helper_stream(connp, us->us_ldi_ident);
+ if (error != 0) {
+ ip0dbg(("tcp_ioctl: create of IP helper stream "
+ "failed %d\n", error));
+ return (error);
+ }
+ }
+
switch (cmd) {
case ND_SET:
case ND_GET: