summaryrefslogtreecommitdiff
path: root/usr/src/uts/common/inet/udp/udp.c
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src/uts/common/inet/udp/udp.c')
-rw-r--r--usr/src/uts/common/inet/udp/udp.c2643
1 files changed, 955 insertions, 1688 deletions
diff --git a/usr/src/uts/common/inet/udp/udp.c b/usr/src/uts/common/inet/udp/udp.c
index 301c397cf6..91c3cd6772 100644
--- a/usr/src/uts/common/inet/udp/udp.c
+++ b/usr/src/uts/common/inet/udp/udp.c
@@ -98,101 +98,39 @@ const char udp_version[] = "%Z%%M% %I% %E% SMI";
/*
* Synchronization notes:
*
- * UDP uses a combination of its internal perimeter, a global lock and
- * a set of bind hash locks to protect its data structures. Please see
- * the note above udp_mode_assertions for details about the internal
- * perimeter.
+ * UDP is MT and uses the usual kernel synchronization primitives. There are 2
+ * locks, the fanout lock (uf_lock) and the udp endpoint lock udp_rwlock.
+ * We also use conn_lock when updating things that affect the IP classifier
+ * lookup.
+ * The lock order is udp_rwlock -> uf_lock and is udp_rwlock -> conn_lock.
*
+ * The fanout lock uf_lock:
* When a UDP endpoint is bound to a local port, it is inserted into
* a bind hash list. The list consists of an array of udp_fanout_t buckets.
* The size of the array is controlled by the udp_bind_fanout_size variable.
* This variable can be changed in /etc/system if the default value is
* not large enough. Each bind hash bucket is protected by a per bucket
* lock. It protects the udp_bind_hash and udp_ptpbhn fields in the udp_t
- * structure. An UDP endpoint is removed from the bind hash list only
- * when it is being unbound or being closed. The per bucket lock also
- * protects a UDP endpoint's state changes.
+ * structure and a few other fields in the udp_t. A UDP endpoint is removed
+ * from the bind hash list only when it is being unbound or being closed.
+ * The per bucket lock also protects a UDP endpoint's state changes.
*
- * Plumbing notes:
- *
- * Both udp and ip are merged, but the streams plumbing is kept unchanged
- * in that udp is always pushed atop /dev/ip. This is done to preserve
- * backwards compatibility for certain applications which rely on such
- * plumbing geometry to do things such as issuing I_POP on the stream
- * in order to obtain direct access to /dev/ip, etc.
- *
- * All UDP processings happen in the /dev/ip instance; the udp module
- * instance does not possess any state about the endpoint, and merely
- * acts as a dummy module whose presence is to keep the streams plumbing
- * appearance unchanged. At open time /dev/ip allocates a conn_t that
- * happens to embed a udp_t. This stays dormant until the time udp is
- * pushed, which indicates to /dev/ip that it must convert itself from
- * an IP to a UDP endpoint.
- *
- * We only allow for the following plumbing cases:
+ * The udp_rwlock:
+ * This protects most of the other fields in the udp_t. The exact list of
+ * fields which are protected by each of the above locks is documented in
+ * the udp_t structure definition.
*
- * Normal:
- * /dev/ip is first opened and later udp is pushed directly on top.
- * This is the default action that happens when a udp socket or
- * /dev/udp is opened. The conn_t created by /dev/ip instance is
- * now shared and is marked with IPCL_UDP.
- *
- * SNMP-only:
- * udp is pushed on top of a module other than /dev/ip. When this
- * happens it will support only SNMP semantics. A new conn_t is
- * allocated and marked with IPCL_UDPMOD.
+ * Plumbing notes:
+ * UDP is always a device driver. For compatibility with mibopen() code
+ * it is possible to I_PUSH "udp", but that results in pushing a passthrough
+ * dummy module.
*
- * The above cases imply that we don't support any intermediate module to
+ * The above implies that we don't support any intermediate module to
* reside in between /dev/ip and udp -- in fact, we never supported such
* scenario in the past as the inter-layer communication semantics have
- * always been private. Also note that the normal case allows for SNMP
- * requests to be processed in addition to the rest of UDP operations.
- *
- * The normal case plumbing is depicted by the following diagram:
- *
- * +---------------+---------------+
- * | | | udp
- * | udp_wq | udp_rq |
- * | | UDP_RD |
- * | | |
- * +---------------+---------------+
- * | ^
- * v |
- * +---------------+---------------+
- * | | | /dev/ip
- * | ip_wq | ip_rq | conn_t
- * | UDP_WR | |
- * | | |
- * +---------------+---------------+
- *
- * Messages arriving at udp_wq from above will end up in ip_wq before
- * it gets processed, i.e. udp write entry points will advance udp_wq
- * and use its q_next value as ip_wq in order to use the conn_t that
- * is stored in its q_ptr. Likewise, messages generated by ip to the
- * module above udp will appear as if they are originated from udp_rq,
- * i.e. putnext() calls to the module above udp is done using the
- * udp_rq instead of ip_rq in order to avoid udp_rput() which does
- * nothing more than calling putnext().
- *
- * The above implies the following rule of thumb:
- *
- * 1. udp_t is obtained from conn_t, which is created by the /dev/ip
- * instance and is stored in q_ptr of both ip_wq and ip_rq. There
- * is no direct reference to conn_t from either udp_wq or udp_rq.
- *
- * 2. Write-side entry points of udp can obtain the conn_t via the
- * Q_TO_CONN() macro, using the queue value obtain from UDP_WR().
- *
- * 3. While in /dev/ip context, putnext() to the module above udp can
- * be done by supplying the queue value obtained from UDP_RD().
- *
+ * always been private.
*/
-static queue_t *UDP_WR(queue_t *);
-static queue_t *UDP_RD(queue_t *);
-
-struct kmem_cache *udp_cache;
-
/* For /etc/system control */
uint_t udp_bind_fanout_size = UDP_BIND_FANOUT_SIZE;
@@ -217,7 +155,10 @@ static void udp_addr_req(queue_t *q, mblk_t *mp);
static void udp_bind(queue_t *q, mblk_t *mp);
static void udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp);
static void udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock);
-static int udp_build_hdrs(queue_t *q, udp_t *udp);
+static void udp_bind_result(conn_t *, mblk_t *);
+static void udp_bind_ack(conn_t *, mblk_t *mp);
+static void udp_bind_error(conn_t *, mblk_t *mp);
+static int udp_build_hdrs(udp_t *udp);
static void udp_capability_req(queue_t *q, mblk_t *mp);
static int udp_close(queue_t *q);
static void udp_connect(queue_t *q, mblk_t *mp);
@@ -235,9 +176,16 @@ static int udp_extra_priv_ports_del(queue_t *q, mblk_t *mp,
static void udp_icmp_error(queue_t *q, mblk_t *mp);
static void udp_icmp_error_ipv6(queue_t *q, mblk_t *mp);
static void udp_info_req(queue_t *q, mblk_t *mp);
+static void udp_input(void *, mblk_t *, void *);
static mblk_t *udp_ip_bind_mp(udp_t *udp, t_scalar_t bind_prim,
t_scalar_t addr_length);
+static void udp_lrput(queue_t *, mblk_t *);
+static void udp_lwput(queue_t *, mblk_t *);
static int udp_open(queue_t *q, dev_t *devp, int flag, int sflag,
+ cred_t *credp, boolean_t isv6);
+static int udp_openv4(queue_t *q, dev_t *devp, int flag, int sflag,
+ cred_t *credp);
+static int udp_openv6(queue_t *q, dev_t *devp, int flag, int sflag,
cred_t *credp);
static int udp_unitdata_opt_process(queue_t *q, mblk_t *mp,
int *errorp, udpattrs_t *udpattrs);
@@ -247,11 +195,8 @@ static boolean_t udp_param_register(IDP *ndp, udpparam_t *udppa, int cnt);
static int udp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp,
cred_t *cr);
static void udp_report_item(mblk_t *mp, udp_t *udp);
-static void udp_rput(queue_t *q, mblk_t *mp);
-static void udp_rput_other(queue_t *, mblk_t *);
static int udp_rinfop(queue_t *q, infod_t *dp);
static int udp_rrw(queue_t *q, struiod_t *dp);
-static void udp_rput_bind_ack(queue_t *q, mblk_t *mp);
static int udp_status_report(queue_t *q, mblk_t *mp, caddr_t cp,
cred_t *cr);
static void udp_send_data(udp_t *, queue_t *, mblk_t *, ipha_t *);
@@ -260,15 +205,12 @@ static void udp_ud_err(queue_t *q, mblk_t *mp, uchar_t *destaddr,
static void udp_unbind(queue_t *q, mblk_t *mp);
static in_port_t udp_update_next_port(udp_t *udp, in_port_t port,
boolean_t random);
-static void udp_wput(queue_t *q, mblk_t *mp);
static mblk_t *udp_output_v4(conn_t *, mblk_t *, ipaddr_t, uint16_t, uint_t,
int *, boolean_t);
static mblk_t *udp_output_v6(conn_t *connp, mblk_t *mp, sin6_t *sin6,
int *error);
static void udp_wput_other(queue_t *q, mblk_t *mp);
static void udp_wput_iocdata(queue_t *q, mblk_t *mp);
-static void udp_output(conn_t *connp, mblk_t *mp, struct sockaddr *addr,
- socklen_t addrlen);
static size_t udp_set_rcv_hiwat(udp_t *udp, size_t size);
static void *udp_stack_init(netstackid_t stackid, netstack_t *ns);
@@ -279,56 +221,62 @@ static void udp_kstat_fini(netstackid_t stackid, kstat_t *ksp);
static void *udp_kstat2_init(netstackid_t, udp_stat_t *);
static void udp_kstat2_fini(netstackid_t, kstat_t *);
static int udp_kstat_update(kstat_t *kp, int rw);
-static void udp_input_wrapper(void *arg, mblk_t *mp, void *arg2);
-static void udp_rput_other_wrapper(void *arg, mblk_t *mp, void *arg2);
-static void udp_wput_other_wrapper(void *arg, mblk_t *mp, void *arg2);
-static void udp_resume_bind_cb(void *arg, mblk_t *mp, void *arg2);
static void udp_rcv_enqueue(queue_t *q, udp_t *udp, mblk_t *mp,
uint_t pkt_len);
static void udp_rcv_drain(queue_t *q, udp_t *udp, boolean_t closing);
-static void udp_enter(conn_t *, mblk_t *, sqproc_t, uint8_t);
-static void udp_exit(conn_t *);
-static void udp_become_writer(conn_t *, mblk_t *, sqproc_t, uint8_t);
-#ifdef DEBUG
-static void udp_mode_assertions(udp_t *, int);
-#endif /* DEBUG */
-
-major_t UDP6_MAJ;
-#define UDP6 "udp6"
+static void udp_xmit(queue_t *, mblk_t *, ire_t *ire, conn_t *, zoneid_t);
#define UDP_RECV_HIWATER (56 * 1024)
#define UDP_RECV_LOWATER 128
#define UDP_XMIT_HIWATER (56 * 1024)
#define UDP_XMIT_LOWATER 1024
-static struct module_info udp_info = {
+static struct module_info udp_mod_info = {
UDP_MOD_ID, UDP_MOD_NAME, 1, INFPSZ, UDP_RECV_HIWATER, UDP_RECV_LOWATER
};
-static struct qinit udp_rinit = {
- (pfi_t)udp_rput, NULL, udp_open, udp_close, NULL,
- &udp_info, NULL, udp_rrw, udp_rinfop, STRUIOT_STANDARD
+/*
+ * Entry points for UDP as a device.
+ * We have separate open functions for the /dev/udp and /dev/udp6 devices.
+ */
+static struct qinit udp_rinitv4 = {
+ NULL, NULL, udp_openv4, udp_close, NULL,
+ &udp_mod_info, NULL, udp_rrw, udp_rinfop, STRUIOT_STANDARD
+};
+
+static struct qinit udp_rinitv6 = {
+ NULL, NULL, udp_openv6, udp_close, NULL,
+ &udp_mod_info, NULL, udp_rrw, udp_rinfop, STRUIOT_STANDARD
};
static struct qinit udp_winit = {
(pfi_t)udp_wput, NULL, NULL, NULL, NULL,
- &udp_info, NULL, NULL, NULL, STRUIOT_NONE
+ &udp_mod_info, NULL, NULL, NULL, STRUIOT_NONE
};
-/* Support for just SNMP if UDP is not pushed directly over device IP */
-struct qinit udp_snmp_rinit = {
- (pfi_t)putnext, NULL, udp_open, ip_snmpmod_close, NULL,
- &udp_info, NULL, NULL, NULL, STRUIOT_NONE
+/*
+ * UDP needs to handle I_LINK and I_PLINK since ifconfig
+ * likes to use it as a place to hang the various streams.
+ */
+static struct qinit udp_lrinit = {
+ (pfi_t)udp_lrput, NULL, udp_openv4, udp_close, NULL,
+ &udp_mod_info
};
-struct qinit udp_snmp_winit = {
- (pfi_t)ip_snmpmod_wput, NULL, udp_open, ip_snmpmod_close, NULL,
- &udp_info, NULL, NULL, NULL, STRUIOT_NONE
+static struct qinit udp_lwinit = {
+ (pfi_t)udp_lwput, NULL, udp_openv4, udp_close, NULL,
+ &udp_mod_info
};
-struct streamtab udpinfo = {
- &udp_rinit, &udp_winit
+/* For AF_INET aka /dev/udp */
+struct streamtab udpinfov4 = {
+ &udp_rinitv4, &udp_winit, &udp_lrinit, &udp_lwinit
+};
+
+/* For AF_INET6 aka /dev/udp6 */
+struct streamtab udpinfov6 = {
+ &udp_rinitv6, &udp_winit, &udp_lrinit, &udp_lwinit
};
static sin_t sin_null; /* Zero address for quick clears */
@@ -409,429 +357,6 @@ void (*cl_inet_unbind)(uint8_t protocol, sa_family_t addr_family,
typedef union T_primitives *t_primp_t;
-#define UDP_ENQUEUE_MP(udp, mp, proc, tag) { \
- ASSERT((mp)->b_prev == NULL && (mp)->b_queue == NULL); \
- ASSERT(MUTEX_HELD(&(udp)->udp_connp->conn_lock)); \
- (mp)->b_queue = (queue_t *)((uintptr_t)tag); \
- (mp)->b_prev = (mblk_t *)proc; \
- if ((udp)->udp_mphead == NULL) \
- (udp)->udp_mphead = (mp); \
- else \
- (udp)->udp_mptail->b_next = (mp); \
- (udp)->udp_mptail = (mp); \
- (udp)->udp_mpcount++; \
-}
-
-#define UDP_READERS_INCREF(udp) { \
- ASSERT(MUTEX_HELD(&(udp)->udp_connp->conn_lock)); \
- (udp)->udp_reader_count++; \
-}
-
-#define UDP_READERS_DECREF(udp) { \
- ASSERT(MUTEX_HELD(&(udp)->udp_connp->conn_lock)); \
- (udp)->udp_reader_count--; \
- if ((udp)->udp_reader_count == 0) \
- cv_broadcast(&(udp)->udp_connp->conn_cv); \
-}
-
-#define UDP_SQUEUE_DECREF(udp) { \
- ASSERT(MUTEX_HELD(&(udp)->udp_connp->conn_lock)); \
- (udp)->udp_squeue_count--; \
- if ((udp)->udp_squeue_count == 0) \
- cv_broadcast(&(udp)->udp_connp->conn_cv); \
-}
-
-/*
- * Notes on UDP endpoint synchronization:
- *
- * UDP needs exclusive operation on a per endpoint basis, when executing
- * functions that modify the endpoint state. udp_rput_other() deals with
- * packets with IP options, and processing these packets end up having
- * to update the endpoint's option related state. udp_wput_other() deals
- * with control operations from the top, e.g. connect() that needs to
- * update the endpoint state. These could be synchronized using locks,
- * but the current version uses squeues for this purpose. squeues may
- * give performance improvement for certain cases such as connected UDP
- * sockets; thus the framework allows for using squeues.
- *
- * The perimeter routines are described as follows:
- *
- * udp_enter():
- * Enter the UDP endpoint perimeter.
- *
- * udp_become_writer():
- * Become exclusive on the UDP endpoint. Specifies a function
- * that will be called exclusively either immediately or later
- * when the perimeter is available exclusively.
- *
- * udp_exit():
- * Exit the UDP perimeter.
- *
- * Entering UDP from the top or from the bottom must be done using
- * udp_enter(). No lock must be held while attempting to enter the UDP
- * perimeter. When finished, udp_exit() must be called to get out of
- * the perimeter.
- *
- * UDP operates in either MT_HOT mode or in SQUEUE mode. In MT_HOT mode,
- * multiple threads may enter a UDP endpoint concurrently. This is used
- * for sending and/or receiving normal data. Control operations and other
- * special cases call udp_become_writer() to become exclusive on a per
- * endpoint basis and this results in transitioning to SQUEUE mode. squeue
- * by definition serializes access to the conn_t. When there are no more
- * pending messages on the squeue for the UDP connection, the endpoint
- * reverts to MT_HOT mode. During the interregnum when not all MT threads
- * of an endpoint have finished, messages are queued in the UDP endpoint
- * and the UDP is in UDP_MT_QUEUED mode or UDP_QUEUED_SQUEUE mode.
- *
- * These modes have the following analogs:
- *
- * UDP_MT_HOT/udp_reader_count==0 none
- * UDP_MT_HOT/udp_reader_count>0 RW_READ_LOCK
- * UDP_MT_QUEUED RW_WRITE_WANTED
- * UDP_SQUEUE or UDP_QUEUED_SQUEUE RW_WRITE_LOCKED
- *
- * Stable modes: UDP_MT_HOT, UDP_SQUEUE
- * Transient modes: UDP_MT_QUEUED, UDP_QUEUED_SQUEUE
- *
- * While in stable modes, UDP keeps track of the number of threads
- * operating on the endpoint. The udp_reader_count variable represents
- * the number of threads entering the endpoint as readers while it is
- * in UDP_MT_HOT mode. Transitioning to UDP_SQUEUE happens when there
- * is only a single reader, i.e. when this counter drops to 1. Likewise,
- * udp_squeue_count represents the number of threads operating on the
- * endpoint's squeue while it is in UDP_SQUEUE mode. The mode transition
- * to UDP_MT_HOT happens after the last thread exits the endpoint, i.e.
- * when this counter drops to 0.
- *
- * The default mode is set to UDP_MT_HOT and UDP alternates between
- * UDP_MT_HOT and UDP_SQUEUE as shown in the state transition below.
- *
- * Mode transition:
- * ----------------------------------------------------------------
- * old mode Event New mode
- * ----------------------------------------------------------------
- * UDP_MT_HOT Call to udp_become_writer() UDP_SQUEUE
- * and udp_reader_count == 1
- *
- * UDP_MT_HOT Call to udp_become_writer() UDP_MT_QUEUED
- * and udp_reader_count > 1
- *
- * UDP_MT_QUEUED udp_reader_count drops to zero UDP_QUEUED_SQUEUE
- *
- * UDP_QUEUED_SQUEUE All messages enqueued on the UDP_SQUEUE
- * internal UDP queue successfully
- * moved to squeue AND udp_squeue_count != 0
- *
- * UDP_QUEUED_SQUEUE All messages enqueued on the UDP_MT_HOT
- * internal UDP queue successfully
- * moved to squeue AND udp_squeue_count
- * drops to zero
- *
- * UDP_SQUEUE udp_squeue_count drops to zero UDP_MT_HOT
- * ----------------------------------------------------------------
- */
-
-static queue_t *
-UDP_WR(queue_t *q)
-{
- ASSERT(q->q_ptr == NULL && _OTHERQ(q)->q_ptr == NULL);
- ASSERT(WR(q)->q_next != NULL && WR(q)->q_next->q_ptr != NULL);
- ASSERT(IPCL_IS_UDP(Q_TO_CONN(WR(q)->q_next)));
-
- return (_WR(q)->q_next);
-}
-
-static queue_t *
-UDP_RD(queue_t *q)
-{
- ASSERT(q->q_ptr != NULL && _OTHERQ(q)->q_ptr != NULL);
- ASSERT(IPCL_IS_UDP(Q_TO_CONN(q)));
- ASSERT(RD(q)->q_next != NULL && RD(q)->q_next->q_ptr == NULL);
-
- return (_RD(q)->q_next);
-}
-
-#ifdef DEBUG
-#define UDP_MODE_ASSERTIONS(udp, caller) udp_mode_assertions(udp, caller)
-#else
-#define UDP_MODE_ASSERTIONS(udp, caller)
-#endif
-
-/* Invariants */
-#ifdef DEBUG
-
-uint32_t udp_count[4];
-
-/* Context of udp_mode_assertions */
-#define UDP_ENTER 1
-#define UDP_BECOME_WRITER 2
-#define UDP_EXIT 3
-
-static void
-udp_mode_assertions(udp_t *udp, int caller)
-{
- ASSERT(MUTEX_HELD(&udp->udp_connp->conn_lock));
-
- switch (udp->udp_mode) {
- case UDP_MT_HOT:
- /*
- * Messages have not yet been enqueued on the internal queue,
- * otherwise we would have switched to UDP_MT_QUEUED. Likewise
- * by definition, there can't be any messages enqueued on the
- * squeue. The UDP could be quiescent, so udp_reader_count
- * could be zero at entry.
- */
- ASSERT(udp->udp_mphead == NULL && udp->udp_mpcount == 0 &&
- udp->udp_squeue_count == 0);
- ASSERT(caller == UDP_ENTER || udp->udp_reader_count != 0);
- udp_count[0]++;
- break;
-
- case UDP_MT_QUEUED:
- /*
- * The last MT thread to exit the udp perimeter empties the
- * internal queue and then switches the UDP to
- * UDP_QUEUED_SQUEUE mode. Since we are still in UDP_MT_QUEUED
- * mode, it means there must be at least 1 MT thread still in
- * the perimeter and at least 1 message on the internal queue.
- */
- ASSERT(udp->udp_reader_count >= 1 && udp->udp_mphead != NULL &&
- udp->udp_mpcount != 0 && udp->udp_squeue_count == 0);
- udp_count[1]++;
- break;
-
- case UDP_QUEUED_SQUEUE:
- /*
- * The switch has happened from MT to SQUEUE. So there can't
- * any MT threads. Messages could still pile up on the internal
- * queue until the transition is complete and we move to
- * UDP_SQUEUE mode. We can't assert on nonzero udp_squeue_count
- * since the squeue could drain any time.
- */
- ASSERT(udp->udp_reader_count == 0);
- udp_count[2]++;
- break;
-
- case UDP_SQUEUE:
- /*
- * The transition is complete. Thre can't be any messages on
- * the internal queue. The udp could be quiescent or the squeue
- * could drain any time, so we can't assert on nonzero
- * udp_squeue_count during entry. Nor can we assert that
- * udp_reader_count is zero, since, a reader thread could have
- * directly become writer in line by calling udp_become_writer
- * without going through the queued states.
- */
- ASSERT(udp->udp_mphead == NULL && udp->udp_mpcount == 0);
- ASSERT(caller == UDP_ENTER || udp->udp_squeue_count != 0);
- udp_count[3]++;
- break;
- }
-}
-#endif
-
-#define _UDP_ENTER(connp, mp, proc, tag) { \
- udp_t *_udp = (connp)->conn_udp; \
- \
- mutex_enter(&(connp)->conn_lock); \
- if ((connp)->conn_state_flags & CONN_CLOSING) { \
- mutex_exit(&(connp)->conn_lock); \
- freemsg(mp); \
- } else { \
- UDP_MODE_ASSERTIONS(_udp, UDP_ENTER); \
- \
- switch (_udp->udp_mode) { \
- case UDP_MT_HOT: \
- /* We can execute as reader right away. */ \
- UDP_READERS_INCREF(_udp); \
- mutex_exit(&(connp)->conn_lock); \
- (*(proc))(connp, mp, (connp)->conn_sqp); \
- break; \
- \
- case UDP_SQUEUE: \
- /* \
- * We are in squeue mode, send the \
- * packet to the squeue \
- */ \
- _udp->udp_squeue_count++; \
- CONN_INC_REF_LOCKED(connp); \
- mutex_exit(&(connp)->conn_lock); \
- squeue_enter((connp)->conn_sqp, mp, proc, \
- connp, tag); \
- break; \
- \
- case UDP_MT_QUEUED: \
- case UDP_QUEUED_SQUEUE: \
- /* \
- * Some messages may have been enqueued \
- * ahead of us. Enqueue the new message \
- * at the tail of the internal queue to \
- * preserve message ordering. \
- */ \
- UDP_ENQUEUE_MP(_udp, mp, proc, tag); \
- mutex_exit(&(connp)->conn_lock); \
- break; \
- } \
- } \
-}
-
-static void
-udp_enter(conn_t *connp, mblk_t *mp, sqproc_t proc, uint8_t tag)
-{
- _UDP_ENTER(connp, mp, proc, tag);
-}
-
-static void
-udp_become_writer(conn_t *connp, mblk_t *mp, sqproc_t proc, uint8_t tag)
-{
- udp_t *udp;
-
- udp = connp->conn_udp;
-
- mutex_enter(&connp->conn_lock);
-
- UDP_MODE_ASSERTIONS(udp, UDP_BECOME_WRITER);
-
- switch (udp->udp_mode) {
- case UDP_MT_HOT:
- if (udp->udp_reader_count == 1) {
- /*
- * We are the only MT thread. Switch to squeue mode
- * immediately.
- */
- udp->udp_mode = UDP_SQUEUE;
- udp->udp_squeue_count = 1;
- CONN_INC_REF_LOCKED(connp);
- mutex_exit(&connp->conn_lock);
- squeue_enter(connp->conn_sqp, mp, proc, connp, tag);
- return;
- }
- /* FALLTHRU */
-
- case UDP_MT_QUEUED:
- /* Enqueue the packet internally in UDP */
- udp->udp_mode = UDP_MT_QUEUED;
- UDP_ENQUEUE_MP(udp, mp, proc, tag);
- mutex_exit(&connp->conn_lock);
- return;
-
- case UDP_SQUEUE:
- case UDP_QUEUED_SQUEUE:
- /*
- * We are already exclusive. i.e. we are already
- * writer. Simply call the desired function.
- */
- udp->udp_squeue_count++;
- mutex_exit(&connp->conn_lock);
- (*proc)(connp, mp, connp->conn_sqp);
- return;
- }
-}
-
-/*
- * Transition from MT mode to SQUEUE mode, when the last MT thread
- * is exiting the UDP perimeter. Move all messages from the internal
- * udp queue to the squeue. A better way would be to move all the
- * messages in one shot, this needs more support from the squeue framework
- */
-static void
-udp_switch_to_squeue(udp_t *udp)
-{
- mblk_t *mp;
- mblk_t *mp_next;
- sqproc_t proc;
- uint8_t tag;
- conn_t *connp = udp->udp_connp;
-
- ASSERT(MUTEX_HELD(&connp->conn_lock));
- ASSERT(udp->udp_mode == UDP_MT_QUEUED);
- while (udp->udp_mphead != NULL) {
- mp = udp->udp_mphead;
- udp->udp_mphead = NULL;
- udp->udp_mptail = NULL;
- udp->udp_mpcount = 0;
- udp->udp_mode = UDP_QUEUED_SQUEUE;
- mutex_exit(&connp->conn_lock);
- /*
- * It is best not to hold any locks across the calls
- * to squeue functions. Since we drop the lock we
- * need to go back and check the udp_mphead once again
- * after the squeue_fill and hence the while loop at
- * the top of this function
- */
- for (; mp != NULL; mp = mp_next) {
- mp_next = mp->b_next;
- proc = (sqproc_t)mp->b_prev;
- tag = (uint8_t)((uintptr_t)mp->b_queue);
- mp->b_next = NULL;
- mp->b_prev = NULL;
- mp->b_queue = NULL;
- CONN_INC_REF(connp);
- udp->udp_squeue_count++;
- squeue_fill(connp->conn_sqp, mp, proc, connp,
- tag);
- }
- mutex_enter(&connp->conn_lock);
- }
- /*
- * udp_squeue_count of zero implies that the squeue has drained
- * even before we arrived here (i.e. after the squeue_fill above)
- */
- udp->udp_mode = (udp->udp_squeue_count != 0) ?
- UDP_SQUEUE : UDP_MT_HOT;
-}
-
-#define _UDP_EXIT(connp) { \
- udp_t *_udp = (connp)->conn_udp; \
- \
- mutex_enter(&(connp)->conn_lock); \
- UDP_MODE_ASSERTIONS(_udp, UDP_EXIT); \
- \
- switch (_udp->udp_mode) { \
- case UDP_MT_HOT: \
- UDP_READERS_DECREF(_udp); \
- mutex_exit(&(connp)->conn_lock); \
- break; \
- \
- case UDP_SQUEUE: \
- UDP_SQUEUE_DECREF(_udp); \
- if (_udp->udp_squeue_count == 0) \
- _udp->udp_mode = UDP_MT_HOT; \
- mutex_exit(&(connp)->conn_lock); \
- break; \
- \
- case UDP_MT_QUEUED: \
- /* \
- * If this is the last MT thread, we need to \
- * switch to squeue mode \
- */ \
- UDP_READERS_DECREF(_udp); \
- if (_udp->udp_reader_count == 0) \
- udp_switch_to_squeue(_udp); \
- mutex_exit(&(connp)->conn_lock); \
- break; \
- \
- case UDP_QUEUED_SQUEUE: \
- UDP_SQUEUE_DECREF(_udp); \
- /* \
- * Even if the udp_squeue_count drops to zero, we \
- * don't want to change udp_mode to UDP_MT_HOT here. \
- * The thread in udp_switch_to_squeue will take care \
- * of the transition to UDP_MT_HOT, after emptying \
- * any more new messages that have been enqueued in \
- * udp_mphead. \
- */ \
- mutex_exit(&(connp)->conn_lock); \
- break; \
- } \
-}
-
-static void
-udp_exit(conn_t *connp)
-{
- _UDP_EXIT(connp);
-}
-
/*
* Return the next anonymous port in the privileged port range for
* bind checking.
@@ -988,9 +513,7 @@ udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp)
udp_t *udpnext;
ASSERT(MUTEX_HELD(&uf->uf_lock));
- if (udp->udp_ptpbhn != NULL) {
- udp_bind_hash_remove(udp, B_TRUE);
- }
+ ASSERT(udp->udp_ptpbhn == NULL);
udpp = &uf->uf_udp;
udpnext = udpp[0];
if (udpnext != NULL) {
@@ -1068,7 +591,6 @@ udp_bind(queue_t *q, mblk_t *mp)
udp_err_ack(q, mp, TPROTO, 0);
return;
}
-
if (udp->udp_state != TS_UNBND) {
(void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
"udp_bind: bad state, %u", udp->udp_state);
@@ -1198,9 +720,25 @@ udp_bind(queue_t *q, mblk_t *mp)
}
/*
+ * The state must be TS_UNBND. TPI mandates that users must send
+ * TPI primitives only 1 at a time and wait for the response before
+ * sending the next primitive.
+ */
+ rw_enter(&udp->udp_rwlock, RW_WRITER);
+ if (udp->udp_state != TS_UNBND || udp->udp_pending_op != -1) {
+ rw_exit(&udp->udp_rwlock);
+ (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
+ "udp_bind: bad state, %u", udp->udp_state);
+ udp_err_ack(q, mp, TOUTSTATE, 0);
+ return;
+ }
+ udp->udp_pending_op = tbr->PRIM_type;
+ /*
* Copy the source address into our udp structure. This address
* may still be zero; if so, IP will fill in the correct address
- * each time an outbound packet is passed to it.
+ * each time an outbound packet is passed to it. Since the udp is
+ * not yet in the bind hash list, we don't grab the uf_lock to
+ * change udp_ipversion
*/
if (udp->udp_family == AF_INET) {
ASSERT(sin != NULL);
@@ -1212,6 +750,10 @@ udp_bind(queue_t *q, mblk_t *mp)
ASSERT(sin6 != NULL);
v6src = sin6->sin6_addr;
if (IN6_IS_ADDR_V4MAPPED(&v6src)) {
+ /*
+ * no need to hold the uf_lock to set the udp_ipversion
+ * since we are not yet in the fanout list
+ */
udp->udp_ipversion = IPV4_VERSION;
udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH +
UDPH_SIZE + udp->udp_ip_snd_options_len;
@@ -1383,6 +925,8 @@ udp_bind(queue_t *q, mblk_t *mp)
* the routine (and exit the loop).
*
*/
+ udp->udp_pending_op = -1;
+ rw_exit(&udp->udp_rwlock);
udp_err_ack(q, mp, TADDRBUSY, 0);
return;
}
@@ -1412,6 +956,8 @@ udp_bind(queue_t *q, mblk_t *mp)
* there are none available, so send an error
* to the user.
*/
+ udp->udp_pending_op = -1;
+ rw_exit(&udp->udp_rwlock);
udp_err_ack(q, mp, TNOADDR, 0);
return;
}
@@ -1421,8 +967,9 @@ udp_bind(queue_t *q, mblk_t *mp)
* Copy the source address into our udp structure. This address
* may still be zero; if so, ip will fill in the correct address
* each time an outbound packet is passed to it.
- * If we are binding to a broadcast or multicast address udp_rput
- * will clear the source address when it receives the T_BIND_ACK.
+ * If we are binding to a broadcast or multicast address then
+ * udp_bind_ack will clear the source address when it receives
+ * the T_BIND_ACK.
*/
udp->udp_v6src = udp->udp_bound_v6src = v6src;
udp->udp_port = lport;
@@ -1442,8 +989,10 @@ udp_bind(queue_t *q, mblk_t *mp)
sin6->sin6_port = udp->udp_port;
/* Rebuild the header template */
- error = udp_build_hdrs(q, udp);
+ error = udp_build_hdrs(udp);
if (error != 0) {
+ udp->udp_pending_op = -1;
+ rw_exit(&udp->udp_rwlock);
mutex_exit(&udpf->uf_lock);
udp_err_ack(q, mp, TSYSERR, error);
return;
@@ -1452,6 +1001,7 @@ udp_bind(queue_t *q, mblk_t *mp)
udp->udp_state = TS_IDLE;
udp_bind_hash_insert(udpf, udp);
mutex_exit(&udpf->uf_lock);
+ rw_exit(&udp->udp_rwlock);
if (cl_inet_bind) {
/*
@@ -1480,8 +1030,11 @@ udp_bind(queue_t *q, mblk_t *mp)
connp->conn_mlp_type = udp->udp_recvucred ? mlptBoth :
mlptSingle;
addrtype = tsol_mlp_addr_type(zone->zone_id, IPV6_VERSION,
- &v6src, udp->udp_us->us_netstack->netstack_ip);
+ &v6src, us->us_netstack->netstack_ip);
if (addrtype == mlptSingle) {
+ rw_enter(&udp->udp_rwlock, RW_WRITER);
+ udp->udp_pending_op = -1;
+ rw_exit(&udp->udp_rwlock);
udp_err_ack(q, mp, TNOADDR, 0);
connp->conn_anon_port = B_FALSE;
connp->conn_mlp_type = mlptSingle;
@@ -1499,6 +1052,9 @@ udp_bind(queue_t *q, mblk_t *mp)
"udp_bind: no priv for multilevel port %d",
mlpport);
}
+ rw_enter(&udp->udp_rwlock, RW_WRITER);
+ udp->udp_pending_op = -1;
+ rw_exit(&udp->udp_rwlock);
udp_err_ack(q, mp, TACCES, 0);
connp->conn_anon_port = B_FALSE;
connp->conn_mlp_type = mlptSingle;
@@ -1529,6 +1085,9 @@ udp_bind(queue_t *q, mblk_t *mp)
mlpport, connp->conn_zoneid,
mlpzone);
}
+ rw_enter(&udp->udp_rwlock, RW_WRITER);
+ udp->udp_pending_op = -1;
+ rw_exit(&udp->udp_rwlock);
udp_err_ack(q, mp, TACCES, 0);
connp->conn_anon_port = B_FALSE;
connp->conn_mlp_type = mlptSingle;
@@ -1547,6 +1106,9 @@ udp_bind(queue_t *q, mblk_t *mp)
"udp_bind: cannot establish anon "
"MLP for port %d", port);
}
+ rw_enter(&udp->udp_rwlock, RW_WRITER);
+ udp->udp_pending_op = -1;
+ rw_exit(&udp->udp_rwlock);
udp_err_ack(q, mp, TACCES, 0);
connp->conn_anon_port = B_FALSE;
connp->conn_mlp_type = mlptSingle;
@@ -1565,6 +1127,9 @@ udp_bind(queue_t *q, mblk_t *mp)
*/
mp->b_cont = allocb(sizeof (ire_t), BPRI_HI);
if (!mp->b_cont) {
+ rw_enter(&udp->udp_rwlock, RW_WRITER);
+ udp->udp_pending_op = -1;
+ rw_exit(&udp->udp_rwlock);
udp_err_ack(q, mp, TSYSERR, ENOMEM);
return;
}
@@ -1576,34 +1141,25 @@ udp_bind(queue_t *q, mblk_t *mp)
else
mp = ip_bind_v4(q, mp, connp);
+ /* The above return NULL if the bind needs to be deferred */
if (mp != NULL)
- udp_rput_other(_RD(q), mp);
+ udp_bind_result(connp, mp);
else
CONN_INC_REF(connp);
}
-
-void
-udp_resume_bind(conn_t *connp, mblk_t *mp)
-{
- udp_enter(connp, mp, udp_resume_bind_cb, SQTAG_BIND_RETRY);
-}
-
/*
- * This is called from ip_wput_nondata to resume a deferred UDP bind.
+ * This is called from ip_wput_nondata to handle the results of a
+ * deferred UDP bind. It is called once the bind has been completed.
*/
-/* ARGSUSED */
-static void
-udp_resume_bind_cb(void *arg, mblk_t *mp, void *arg2)
+void
+udp_resume_bind(conn_t *connp, mblk_t *mp)
{
- conn_t *connp = arg;
-
ASSERT(connp != NULL && IPCL_IS_UDP(connp));
- udp_rput_other(connp->conn_rq, mp);
+ udp_bind_result(connp, mp);
CONN_OPER_PENDING_DONE(connp);
- udp_exit(connp);
}
/*
@@ -1616,11 +1172,11 @@ udp_resume_bind_cb(void *arg, mblk_t *mp, void *arg2)
* T_OK_ACK - for the T_CONN_REQ
* T_CONN_CON - to keep the TPI user happy
*
- * The connect completes in udp_rput.
+ * The connect completes in udp_bind_result.
* When a T_BIND_ACK is received information is extracted from the IRE
* and the two appended messages are sent to the TPI user.
- * Should udp_rput receive T_ERROR_ACK for the T_BIND_REQ it will convert
- * it to an error ack for the appropriate primitive.
+ * Should udp_bind_result receive T_ERROR_ACK for the T_BIND_REQ it will
+ * convert it to an error ack for the appropriate primitive.
*/
static void
udp_connect(queue_t *q, mblk_t *mp)
@@ -1635,10 +1191,11 @@ udp_connect(queue_t *q, mblk_t *mp)
mblk_t *mp1, *mp2;
udp_fanout_t *udpf;
udp_t *udp, *udp1;
+ ushort_t ipversion;
udp_stack_t *us;
+ conn_t *connp = Q_TO_CONN(q);
- udp = Q_TO_UDP(q);
-
+ udp = connp->conn_udp;
tcr = (struct T_conn_req *)mp->b_rptr;
us = udp->udp_us;
@@ -1647,28 +1204,6 @@ udp_connect(queue_t *q, mblk_t *mp)
udp_err_ack(q, mp, TPROTO, 0);
return;
}
- /*
- * This UDP must have bound to a port already before doing
- * a connect.
- */
- if (udp->udp_state == TS_UNBND) {
- (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
- "udp_connect: bad state, %u", udp->udp_state);
- udp_err_ack(q, mp, TOUTSTATE, 0);
- return;
- }
- ASSERT(udp->udp_port != 0 && udp->udp_ptpbhn != NULL);
-
- udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port,
- us->us_bind_fanout_size)];
-
- if (udp->udp_state == TS_DATA_XFER) {
- /* Already connected - clear out state */
- mutex_enter(&udpf->uf_lock);
- udp->udp_v6src = udp->udp_bound_v6src;
- udp->udp_state = TS_IDLE;
- mutex_exit(&udpf->uf_lock);
- }
if (tcr->OPT_length != 0) {
udp_err_ack(q, mp, TBADOPT, 0);
@@ -1702,8 +1237,7 @@ udp_connect(queue_t *q, mblk_t *mp)
dstport = sin->sin_port;
IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst);
ASSERT(udp->udp_ipversion == IPV4_VERSION);
- udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE +
- udp->udp_ip_snd_options_len;
+ ipversion = IPV4_VERSION;
break;
case sizeof (sin6_t):
@@ -1719,18 +1253,15 @@ udp_connect(queue_t *q, mblk_t *mp)
return;
}
v6dst = sin6->sin6_addr;
+ dstport = sin6->sin6_port;
if (IN6_IS_ADDR_V4MAPPED(&v6dst)) {
IN6_V4MAPPED_TO_IPADDR(&v6dst, v4dst);
- udp->udp_ipversion = IPV4_VERSION;
- udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH +
- UDPH_SIZE + udp->udp_ip_snd_options_len;
+ ipversion = IPV4_VERSION;
flowinfo = 0;
} else {
- udp->udp_ipversion = IPV6_VERSION;
- udp->udp_max_hdr_len = udp->udp_sticky_hdrs_len;
+ ipversion = IPV6_VERSION;
flowinfo = sin6->sin6_flowinfo;
}
- dstport = sin6->sin6_port;
break;
}
if (dstport == 0) {
@@ -1738,11 +1269,46 @@ udp_connect(queue_t *q, mblk_t *mp)
return;
}
+ rw_enter(&udp->udp_rwlock, RW_WRITER);
+
+ /*
+ * This UDP must have bound to a port already before doing a connect.
+ * TPI mandates that users must send TPI primitives only 1 at a time
+ * and wait for the response before sending the next primitive.
+ */
+ if (udp->udp_state == TS_UNBND || udp->udp_pending_op != -1) {
+ rw_exit(&udp->udp_rwlock);
+ (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
+ "udp_connect: bad state, %u", udp->udp_state);
+ udp_err_ack(q, mp, TOUTSTATE, 0);
+ return;
+ }
+ udp->udp_pending_op = T_CONN_REQ;
+ ASSERT(udp->udp_port != 0 && udp->udp_ptpbhn != NULL);
+
+ if (ipversion == IPV4_VERSION) {
+ udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE +
+ udp->udp_ip_snd_options_len;
+ } else {
+ udp->udp_max_hdr_len = udp->udp_sticky_hdrs_len;
+ }
+
+ udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port,
+ us->us_bind_fanout_size)];
+
+ mutex_enter(&udpf->uf_lock);
+ if (udp->udp_state == TS_DATA_XFER) {
+ /* Already connected - clear out state */
+ udp->udp_v6src = udp->udp_bound_v6src;
+ udp->udp_state = TS_IDLE;
+ }
+
/*
* Create a default IP header with no IP options.
*/
udp->udp_dstport = dstport;
- if (udp->udp_ipversion == IPV4_VERSION) {
+ udp->udp_ipversion = ipversion;
+ if (ipversion == IPV4_VERSION) {
/*
* Interpret a zero destination to mean loopback.
* Update the T_CONN_REQ (sin/sin6) since it is used to
@@ -1794,10 +1360,9 @@ udp_connect(queue_t *q, mblk_t *mp)
}
/*
- * Verify that the src/port/dst/port and zoneid is unique for all
+ * Verify that the src/port/dst/port is unique for all
* connections in TS_DATA_XFER
*/
- mutex_enter(&udpf->uf_lock);
for (udp1 = udpf->uf_udp; udp1 != NULL; udp1 = udp1->udp_bind_hash) {
if (udp1->udp_state != TS_DATA_XFER)
continue;
@@ -1812,6 +1377,8 @@ udp_connect(queue_t *q, mblk_t *mp)
udp->udp_connp->conn_zoneid)))
continue;
mutex_exit(&udpf->uf_lock);
+ udp->udp_pending_op = -1;
+ rw_exit(&udp->udp_rwlock);
udp_err_ack(q, mp, TBADADDR, 0);
return;
}
@@ -1828,17 +1395,20 @@ udp_connect(queue_t *q, mblk_t *mp)
else
mp1 = udp_ip_bind_mp(udp, O_T_BIND_REQ, sizeof (ipa6_conn_t));
if (mp1 == NULL) {
- udp_err_ack(q, mp, TSYSERR, ENOMEM);
bind_failed:
mutex_enter(&udpf->uf_lock);
udp->udp_state = TS_IDLE;
+ udp->udp_pending_op = -1;
mutex_exit(&udpf->uf_lock);
+ rw_exit(&udp->udp_rwlock);
+ udp_err_ack(q, mp, TSYSERR, ENOMEM);
return;
}
+ rw_exit(&udp->udp_rwlock);
/*
* We also have to send a connection confirmation to
- * keep TLI happy. Prepare it for udp_rput.
+ * keep TLI happy. Prepare it for udp_bind_result.
*/
if (udp->udp_family == AF_INET)
mp2 = mi_tpi_conn_con(NULL, (char *)sin,
@@ -1848,7 +1418,7 @@ bind_failed:
sizeof (*sin6), NULL, 0);
if (mp2 == NULL) {
freemsg(mp1);
- udp_err_ack(q, mp, TSYSERR, ENOMEM);
+ rw_enter(&udp->udp_rwlock, RW_WRITER);
goto bind_failed;
}
@@ -1856,36 +1426,43 @@ bind_failed:
if (mp == NULL) {
/* Unable to reuse the T_CONN_REQ for the ack. */
freemsg(mp2);
+ rw_enter(&udp->udp_rwlock, RW_WRITER);
+ mutex_enter(&udpf->uf_lock);
+ udp->udp_state = TS_IDLE;
+ udp->udp_pending_op = -1;
+ mutex_exit(&udpf->uf_lock);
+ rw_exit(&udp->udp_rwlock);
udp_err_ack_prim(q, mp1, T_CONN_REQ, TSYSERR, ENOMEM);
- goto bind_failed;
+ return;
}
/* Hang onto the T_OK_ACK and T_CONN_CON for later. */
linkb(mp1, mp);
linkb(mp1, mp2);
- mblk_setcred(mp1, udp->udp_connp->conn_cred);
+ mblk_setcred(mp1, connp->conn_cred);
if (udp->udp_family == AF_INET)
- mp1 = ip_bind_v4(q, mp1, udp->udp_connp);
+ mp1 = ip_bind_v4(q, mp1, connp);
else
- mp1 = ip_bind_v6(q, mp1, udp->udp_connp, NULL);
+ mp1 = ip_bind_v6(q, mp1, connp, NULL);
+ /* The above return NULL if the bind needs to be deferred */
if (mp1 != NULL)
- udp_rput_other(_RD(q), mp1);
+ udp_bind_result(connp, mp1);
else
- CONN_INC_REF(udp->udp_connp);
+ CONN_INC_REF(connp);
}
static int
udp_close(queue_t *q)
{
- conn_t *connp = Q_TO_CONN(UDP_WR(q));
+ conn_t *connp = (conn_t *)q->q_ptr;
udp_t *udp;
- queue_t *ip_rq = RD(UDP_WR(q));
ASSERT(connp != NULL && IPCL_IS_UDP(connp));
udp = connp->conn_udp;
+ udp_quiesce_conn(connp);
ip_quiesce_conn(connp);
/*
* Disable read-side synchronous stream
@@ -1896,11 +1473,6 @@ udp_close(queue_t *q)
qprocsoff(q);
- /* restore IP module's high and low water marks to default values */
- ip_rq->q_hiwat = ip_rq->q_qinfo->qi_minfo->mi_hiwat;
- WR(ip_rq)->q_hiwat = WR(ip_rq)->q_qinfo->qi_minfo->mi_hiwat;
- WR(ip_rq)->q_lowat = WR(ip_rq)->q_qinfo->qi_minfo->mi_lowat;
-
ASSERT(udp->udp_rcv_cnt == 0);
ASSERT(udp->udp_rcv_msgcnt == 0);
ASSERT(udp->udp_rcv_list_head == NULL);
@@ -1909,23 +1481,28 @@ udp_close(queue_t *q)
udp_close_free(connp);
/*
- * Restore connp as an IP endpoint.
- * Locking required to prevent a race with udp_snmp_get()/
- * ipcl_get_next_conn(), which selects conn_t which are
- * IPCL_UDP and not CONN_CONDEMNED.
+ * Now we are truly single threaded on this stream, and can
+ * delete the things hanging off the connp, and finally the connp.
+ * We removed this connp from the fanout list, it cannot be
+ * accessed thru the fanouts, and we already waited for the
+ * conn_ref to drop to 0. We are already in close, so
+ * there cannot be any other thread from the top. qprocsoff
+ * has completed, and service has completed or won't run in
+ * future.
*/
- mutex_enter(&connp->conn_lock);
- connp->conn_flags &= ~IPCL_UDP;
- connp->conn_state_flags &=
- ~(CONN_CLOSING | CONN_CONDEMNED | CONN_QUIESCED);
- connp->conn_ulp_labeled = B_FALSE;
- mutex_exit(&connp->conn_lock);
+ ASSERT(connp->conn_ref == 1);
+
+ inet_minor_free(ip_minor_arena, connp->conn_dev);
+ connp->conn_ref--;
+ ipcl_conn_destroy(connp);
+
+ q->q_ptr = WR(q)->q_ptr = NULL;
return (0);
}
/*
- * Called in the close path from IP (ip_quiesce_conn) to quiesce the conn
+ * Called in the close path to quiesce the conn
*/
void
udp_quiesce_conn(conn_t *connp)
@@ -1949,12 +1526,6 @@ udp_quiesce_conn(conn_t *connp)
udp_bind_hash_remove(udp, B_FALSE);
- mutex_enter(&connp->conn_lock);
- while (udp->udp_reader_count != 0 || udp->udp_squeue_count != 0 ||
- udp->udp_mode != UDP_MT_HOT) {
- cv_wait(&connp->conn_cv, &connp->conn_lock);
- }
- mutex_exit(&connp->conn_lock);
}
void
@@ -1982,12 +1553,6 @@ udp_close_free(conn_t *connp)
}
ip6_pkt_free(&udp->udp_sticky_ipp);
-
- udp->udp_connp = NULL;
- netstack_rele(udp->udp_us->us_netstack);
-
- connp->conn_udp = NULL;
- kmem_cache_free(udp_cache, udp);
}
/*
@@ -2000,26 +1565,31 @@ udp_close_free(conn_t *connp)
* T_BIND_REQ - specifying just the local address/port
* T_OK_ACK - for the T_DISCON_REQ
*
- * The disconnect completes in udp_rput.
+ * The disconnect completes in udp_bind_result.
* When a T_BIND_ACK is received the appended T_OK_ACK is sent to the TPI user.
- * Should udp_rput receive T_ERROR_ACK for the T_BIND_REQ it will convert
- * it to an error ack for the appropriate primitive.
+ * Should udp_bind_result receive T_ERROR_ACK for the T_BIND_REQ it will
+ * convert it to an error ack for the appropriate primitive.
*/
static void
udp_disconnect(queue_t *q, mblk_t *mp)
{
- udp_t *udp = Q_TO_UDP(q);
+ udp_t *udp;
mblk_t *mp1;
udp_fanout_t *udpf;
udp_stack_t *us;
+ conn_t *connp = Q_TO_CONN(q);
+ udp = connp->conn_udp;
us = udp->udp_us;
- if (udp->udp_state != TS_DATA_XFER) {
+ rw_enter(&udp->udp_rwlock, RW_WRITER);
+ if (udp->udp_state != TS_DATA_XFER || udp->udp_pending_op != -1) {
+ rw_exit(&udp->udp_rwlock);
(void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
"udp_disconnect: bad state, %u", udp->udp_state);
udp_err_ack(q, mp, TOUTSTATE, 0);
return;
}
+ udp->udp_pending_op = T_DISCON_REQ;
udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port,
us->us_bind_fanout_size)];
mutex_enter(&udpf->uf_lock);
@@ -2036,12 +1606,16 @@ udp_disconnect(queue_t *q, mblk_t *mp)
else
mp1 = udp_ip_bind_mp(udp, O_T_BIND_REQ, sizeof (sin6_t));
if (mp1 == NULL) {
+ udp->udp_pending_op = -1;
+ rw_exit(&udp->udp_rwlock);
udp_err_ack(q, mp, TSYSERR, ENOMEM);
return;
}
mp = mi_tpi_ok_ack_alloc(mp);
if (mp == NULL) {
/* Unable to reuse the T_DISCON_REQ for the ack. */
+ udp->udp_pending_op = -1;
+ rw_exit(&udp->udp_rwlock);
udp_err_ack_prim(q, mp1, T_DISCON_REQ, TSYSERR, ENOMEM);
return;
}
@@ -2050,29 +1624,30 @@ udp_disconnect(queue_t *q, mblk_t *mp)
int error;
/* Rebuild the header template */
- error = udp_build_hdrs(q, udp);
+ error = udp_build_hdrs(udp);
if (error != 0) {
+ udp->udp_pending_op = -1;
+ rw_exit(&udp->udp_rwlock);
udp_err_ack_prim(q, mp, T_DISCON_REQ, TSYSERR, error);
freemsg(mp1);
return;
}
}
- mutex_enter(&udpf->uf_lock);
- udp->udp_discon_pending = 1;
- mutex_exit(&udpf->uf_lock);
- /* Append the T_OK_ACK to the T_BIND_REQ for udp_rput */
+ rw_exit(&udp->udp_rwlock);
+ /* Append the T_OK_ACK to the T_BIND_REQ for udp_bind_ack */
linkb(mp1, mp);
if (udp->udp_family == AF_INET6)
- mp1 = ip_bind_v6(q, mp1, udp->udp_connp, NULL);
+ mp1 = ip_bind_v6(q, mp1, connp, NULL);
else
- mp1 = ip_bind_v4(q, mp1, udp->udp_connp);
+ mp1 = ip_bind_v4(q, mp1, connp);
+ /* The above return NULL if the bind needs to be deferred */
if (mp1 != NULL)
- udp_rput_other(_RD(q), mp1);
+ udp_bind_result(connp, mp1);
else
- CONN_INC_REF(udp->udp_connp);
+ CONN_INC_REF(connp);
}
/* This routine creates a T_ERROR_ACK message and passes it upstream. */
@@ -2080,7 +1655,7 @@ static void
udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error)
{
if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL)
- putnext(UDP_RD(q), mp);
+ qreply(q, mp);
}
/* Shorthand to generate and send TPI error acks to our client */
@@ -2096,7 +1671,7 @@ udp_err_ack_prim(queue_t *q, mblk_t *mp, int primitive, t_scalar_t t_error,
teackp->ERROR_prim = primitive;
teackp->TLI_error = t_error;
teackp->UNIX_error = sys_error;
- putnext(UDP_RD(q), mp);
+ qreply(q, mp);
}
}
@@ -2191,13 +1766,9 @@ udp_extra_priv_ports_del(queue_t *q, mblk_t *mp, char *value, caddr_t cp,
#define ICMP_MIN_UDP_HDR 4
/*
- * udp_icmp_error is called by udp_rput to process ICMP msgs. passed up by IP.
+ * udp_icmp_error is called by udp_input to process ICMP msgs. passed up by IP.
* Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors.
* Assumes that IP has pulled up everything up to and including the ICMP header.
- * An M_CTL could potentially come here from some other module (i.e. if UDP
- * is pushed on some module other than IP). Thus, if we find that the M_CTL
- * does not have enough ICMP information , following STREAMS conventions,
- * we send it upstream assuming it is an M_CTL we don't understand.
*/
static void
udp_icmp_error(queue_t *q, mblk_t *mp)
@@ -2210,70 +1781,27 @@ udp_icmp_error(queue_t *q, mblk_t *mp)
sin6_t sin6;
mblk_t *mp1;
int error = 0;
- size_t mp_size = MBLKL(mp);
udp_t *udp = Q_TO_UDP(q);
- /*
- * Assume IP provides aligned packets - otherwise toss
- */
- if (!OK_32PTR(mp->b_rptr)) {
- freemsg(mp);
- return;
- }
+ ipha = (ipha_t *)mp->b_rptr;
- /*
- * Verify that we have a complete IP header and the application has
- * asked for errors. If not, send it upstream.
- */
- if (!udp->udp_dgram_errind || mp_size < sizeof (ipha_t)) {
-noticmpv4:
- putnext(UDP_RD(q), mp);
- return;
- }
+ ASSERT(OK_32PTR(mp->b_rptr));
- ipha = (ipha_t *)mp->b_rptr;
- /*
- * Verify IP version. Anything other than IPv4 or IPv6 packet is sent
- * upstream. ICMPv6 is handled in udp_icmp_error_ipv6.
- */
- switch (IPH_HDR_VERSION(ipha)) {
- case IPV6_VERSION:
+ if (IPH_HDR_VERSION(ipha) != IPV4_VERSION) {
+ ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION);
udp_icmp_error_ipv6(q, mp);
return;
- case IPV4_VERSION:
- break;
- default:
- goto noticmpv4;
}
+ ASSERT(IPH_HDR_VERSION(ipha) == IPV4_VERSION);
/* Skip past the outer IP and ICMP headers */
iph_hdr_length = IPH_HDR_LENGTH(ipha);
icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length];
- /*
- * If we don't have the correct outer IP header length or if the ULP
- * is not IPPROTO_ICMP or if we don't have a complete inner IP header
- * send the packet upstream.
- */
- if (iph_hdr_length < sizeof (ipha_t) ||
- ipha->ipha_protocol != IPPROTO_ICMP ||
- (ipha_t *)&icmph[1] + 1 > (ipha_t *)mp->b_wptr) {
- goto noticmpv4;
- }
ipha = (ipha_t *)&icmph[1];
/* Skip past the inner IP and find the ULP header */
iph_hdr_length = IPH_HDR_LENGTH(ipha);
udpha = (udpha_t *)((char *)ipha + iph_hdr_length);
- /*
- * If we don't have the correct inner IP header length or if the ULP
- * is not IPPROTO_UDP or if we don't have at least ICMP_MIN_UDP_HDR
- * bytes of UDP header, send it upstream.
- */
- if (iph_hdr_length < sizeof (ipha_t) ||
- ipha->ipha_protocol != IPPROTO_UDP ||
- (uchar_t *)udpha + ICMP_MIN_UDP_HDR > mp->b_wptr) {
- goto noticmpv4;
- }
switch (icmph->icmph_type) {
case ICMP_DEST_UNREACHABLE:
@@ -2281,7 +1809,6 @@ noticmpv4:
case ICMP_FRAGMENTATION_NEEDED:
/*
* IP has already adjusted the path MTU.
- * XXX Somehow pass MTU indication to application?
*/
break;
case ICMP_PORT_UNREACHABLE:
@@ -2302,6 +1829,15 @@ noticmpv4:
return;
}
+ /*
+ * Deliver T_UDERROR_IND when the application has asked for it.
+ * The socket layer enables this automatically when connected.
+ */
+ if (!udp->udp_dgram_errind) {
+ freemsg(mp);
+ return;
+ }
+
switch (udp->udp_family) {
case AF_INET:
sin = sin_null;
@@ -2322,7 +1858,7 @@ noticmpv4:
break;
}
if (mp1)
- putnext(UDP_RD(q), mp1);
+ putnext(q, mp1);
freemsg(mp);
}
@@ -2331,67 +1867,33 @@ noticmpv4:
* Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors.
* Assumes that IP has pulled up all the extension headers as well as the
* ICMPv6 header.
- * An M_CTL could potentially come here from some other module (i.e. if UDP
- * is pushed on some module other than IP). Thus, if we find that the M_CTL
- * does not have enough ICMP information , following STREAMS conventions,
- * we send it upstream assuming it is an M_CTL we don't understand. The reason
- * it might get here is if the non-ICMP M_CTL accidently has 6 in the version
- * field (when cast to ipha_t in udp_icmp_error).
*/
static void
udp_icmp_error_ipv6(queue_t *q, mblk_t *mp)
{
icmp6_t *icmp6;
ip6_t *ip6h, *outer_ip6h;
- uint16_t hdr_length;
+ uint16_t iph_hdr_length;
uint8_t *nexthdrp;
udpha_t *udpha;
sin6_t sin6;
mblk_t *mp1;
int error = 0;
- size_t mp_size = MBLKL(mp);
udp_t *udp = Q_TO_UDP(q);
-
- /*
- * Verify that we have a complete IP header. If not, send it upstream.
- */
- if (mp_size < sizeof (ip6_t)) {
-noticmpv6:
- putnext(UDP_RD(q), mp);
- return;
- }
+ udp_stack_t *us = udp->udp_us;
outer_ip6h = (ip6_t *)mp->b_rptr;
- /*
- * Verify this is an ICMPV6 packet, else send it upstream
- */
- if (outer_ip6h->ip6_nxt == IPPROTO_ICMPV6) {
- hdr_length = IPV6_HDR_LEN;
- } else if (!ip_hdr_length_nexthdr_v6(mp, outer_ip6h, &hdr_length,
- &nexthdrp) ||
- *nexthdrp != IPPROTO_ICMPV6) {
- goto noticmpv6;
- }
- icmp6 = (icmp6_t *)&mp->b_rptr[hdr_length];
+ if (outer_ip6h->ip6_nxt != IPPROTO_ICMPV6)
+ iph_hdr_length = ip_hdr_length_v6(mp, outer_ip6h);
+ else
+ iph_hdr_length = IPV6_HDR_LEN;
+ icmp6 = (icmp6_t *)&mp->b_rptr[iph_hdr_length];
ip6h = (ip6_t *)&icmp6[1];
- /*
- * Verify we have a complete ICMP and inner IP header.
- */
- if ((uchar_t *)&ip6h[1] > mp->b_wptr)
- goto noticmpv6;
-
- if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp))
- goto noticmpv6;
- udpha = (udpha_t *)((char *)ip6h + hdr_length);
- /*
- * Validate inner header. If the ULP is not IPPROTO_UDP or if we don't
- * have at least ICMP_MIN_UDP_HDR bytes of UDP header send the
- * packet upstream.
- */
- if ((*nexthdrp != IPPROTO_UDP) ||
- ((uchar_t *)udpha + ICMP_MIN_UDP_HDR) > mp->b_wptr) {
- goto noticmpv6;
+ if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &iph_hdr_length, &nexthdrp)) {
+ freemsg(mp);
+ return;
}
+ udpha = (udpha_t *)((char *)ip6h + iph_hdr_length);
switch (icmp6->icmp6_type) {
case ICMP6_DST_UNREACH:
@@ -2430,7 +1932,7 @@ noticmpv6:
udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t) +
opt_length;
if ((newmp = allocb(udi_size, BPRI_MED)) == NULL) {
- BUMP_MIB(&udp->udp_mib, udpInErrors);
+ BUMP_MIB(&us->us_udp_mib, udpInErrors);
break;
}
@@ -2468,7 +1970,7 @@ noticmpv6:
* message. Free it, then send our empty message.
*/
freemsg(mp);
- putnext(UDP_RD(q), newmp);
+ putnext(q, newmp);
return;
}
case ICMP6_TIME_EXCEEDED:
@@ -2489,6 +1991,15 @@ noticmpv6:
return;
}
+ /*
+ * Deliver T_UDERROR_IND when the application has asked for it.
+ * The socket layer enables this automatically when connected.
+ */
+ if (!udp->udp_dgram_errind) {
+ freemsg(mp);
+ return;
+ }
+
sin6 = sin6_null;
sin6.sin6_family = AF_INET6;
sin6.sin6_addr = ip6h->ip6_dst;
@@ -2498,7 +2009,7 @@ noticmpv6:
mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), NULL, 0,
error);
if (mp1)
- putnext(UDP_RD(q), mp1);
+ putnext(q, mp1);
freemsg(mp);
}
@@ -2532,6 +2043,7 @@ udp_addr_req(queue_t *q, mblk_t *mp)
taa->PRIM_type = T_ADDR_ACK;
ackmp->b_datap->db_type = M_PCPROTO;
+ rw_enter(&udp->udp_rwlock, RW_READER);
/*
* Note: Following code assumes 32 bit alignment of basic
* data structures like sin_t and struct T_addr_ack.
@@ -2625,8 +2137,9 @@ udp_addr_req(queue_t *q, mblk_t *mp)
ackmp->b_wptr = (uchar_t *)&sin6[1];
}
}
+ rw_exit(&udp->udp_rwlock);
ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim);
- putnext(UDP_RD(q), ackmp);
+ qreply(q, ackmp);
}
static void
@@ -2669,7 +2182,7 @@ udp_capability_req(queue_t *q, mblk_t *mp)
tcap->CAP_bits1 |= TC1_INFO;
}
- putnext(UDP_RD(q), mp);
+ qreply(q, mp);
}
/*
@@ -2688,7 +2201,7 @@ udp_info_req(queue_t *q, mblk_t *mp)
if (!mp)
return;
udp_copy_info((struct T_info_ack *)mp->b_rptr, udp);
- putnext(UDP_RD(q), mp);
+ qreply(q, mp);
}
/*
@@ -2738,7 +2251,7 @@ udp_ip_bind_mp(udp_t *udp, t_scalar_t bind_prim, t_scalar_t addr_length)
sin6_t *sin6;
ASSERT(bind_prim == O_T_BIND_REQ || bind_prim == T_BIND_REQ);
-
+ ASSERT(RW_LOCK_HELD(&udp->udp_rwlock));
mp = allocb(sizeof (*tbr) + addr_length + 1, BPRI_HI);
if (!mp)
return (mp);
@@ -2830,18 +2343,33 @@ udp_ip_bind_mp(udp_t *udp, t_scalar_t bind_prim, t_scalar_t addr_length)
return (mp);
}
+/* For /dev/udp aka AF_INET open */
+static int
+udp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
+{
+ return (udp_open(q, devp, flag, sflag, credp, B_FALSE));
+}
+
+/* For /dev/udp6 aka AF_INET6 open */
+static int
+udp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
+{
+ return (udp_open(q, devp, flag, sflag, credp, B_TRUE));
+}
+
/*
* This is the open routine for udp. It allocates a udp_t structure for
* the stream and, on the first open of the module, creates an ND table.
*/
-/* ARGSUSED */
+/*ARGSUSED2*/
static int
-udp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
+udp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp,
+ boolean_t isv6)
{
int err;
udp_t *udp;
conn_t *connp;
- queue_t *ip_wq;
+ dev_t conn_dev;
zoneid_t zoneid;
netstack_t *ns;
udp_stack_t *us;
@@ -2852,8 +2380,7 @@ udp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
if (q->q_ptr != NULL)
return (0);
- /* If this is not a push of udp as a module, fail. */
- if (sflag != MODOPEN)
+ if (sflag == MODOPEN)
return (EINVAL);
ns = netstack_find_by_cred(credp);
@@ -2865,63 +2392,43 @@ udp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
* For exclusive stacks we set the zoneid to zero
* to make UDP operate as if in the global zone.
*/
- if (us->us_netstack->netstack_stackid != GLOBAL_NETSTACKID)
+ if (ns->netstack_stackid != GLOBAL_NETSTACKID)
zoneid = GLOBAL_ZONEID;
else
zoneid = crgetzoneid(credp);
- q->q_hiwat = us->us_recv_hiwat;
- WR(q)->q_hiwat = us->us_xmit_hiwat;
- WR(q)->q_lowat = us->us_xmit_lowat;
-
- /* Insert ourselves in the stream since we're about to walk q_next */
- qprocson(q);
+ if ((conn_dev = inet_minor_alloc(ip_minor_arena)) == 0) {
+ netstack_rele(ns);
+ return (EBUSY);
+ }
+ *devp = makedevice(getemajor(*devp), (minor_t)conn_dev);
- udp = kmem_cache_alloc(udp_cache, KM_SLEEP);
- bzero(udp, sizeof (*udp));
+ connp = ipcl_conn_create(IPCL_UDPCONN, KM_SLEEP, ns);
+ connp->conn_dev = conn_dev;
+ udp = connp->conn_udp;
/*
- * UDP is supported only as a module and it has to be pushed directly
- * above the device instance of IP. If UDP is pushed anywhere else
- * on a stream, it will support just T_SVR4_OPTMGMT_REQ for the
- * sake of MIB browsers and fail everything else.
+ * ipcl_conn_create did a netstack_hold. Undo the hold that was
+ * done by netstack_find_by_cred()
*/
- ip_wq = WR(q)->q_next;
- if (NOT_OVER_IP(ip_wq)) {
- /* Support just SNMP for MIB browsers */
- connp = ipcl_conn_create(IPCL_IPCCONN, KM_SLEEP,
- us->us_netstack);
- connp->conn_rq = q;
- connp->conn_wq = WR(q);
- connp->conn_flags |= IPCL_UDPMOD;
- connp->conn_cred = credp;
- connp->conn_zoneid = zoneid;
- connp->conn_udp = udp;
- udp->udp_us = us;
- udp->udp_connp = connp;
- q->q_ptr = WR(q)->q_ptr = connp;
- crhold(credp);
- q->q_qinfo = &udp_snmp_rinit;
- WR(q)->q_qinfo = &udp_snmp_winit;
- return (0);
- }
+ netstack_rele(ns);
/*
* Initialize the udp_t structure for this stream.
*/
- q = RD(ip_wq);
- connp = Q_TO_CONN(q);
- mutex_enter(&connp->conn_lock);
- connp->conn_proto = IPPROTO_UDP;
- connp->conn_flags |= IPCL_UDP;
- connp->conn_sqp = IP_SQUEUE_GET(lbolt);
- connp->conn_udp = udp;
+ q->q_ptr = connp;
+ WR(q)->q_ptr = connp;
+ connp->conn_rq = q;
+ connp->conn_wq = WR(q);
+
+ rw_enter(&udp->udp_rwlock, RW_WRITER);
+ ASSERT(connp->conn_ulp == IPPROTO_UDP);
+ ASSERT(connp->conn_udp == udp);
+ ASSERT(udp->udp_connp == connp);
/* Set the initial state of the stream and the privilege status. */
- udp->udp_connp = connp;
udp->udp_state = TS_UNBND;
- udp->udp_mode = UDP_MT_HOT;
- if (getmajor(*devp) == (major_t)UDP6_MAJ) {
+ if (isv6) {
udp->udp_family = AF_INET6;
udp->udp_ipversion = IPV6_VERSION;
udp->udp_max_hdr_len = IPV6_HDR_LEN + UDPH_SIZE;
@@ -2938,6 +2445,7 @@ udp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
}
udp->udp_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
+ udp->udp_pending_op = -1;
connp->conn_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
connp->conn_zoneid = zoneid;
@@ -2951,41 +2459,45 @@ udp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
if (getpflags(NET_MAC_AWARE, credp) != 0)
udp->udp_mac_exempt = B_TRUE;
- if (connp->conn_flags & IPCL_SOCKET) {
+ if (flag & SO_SOCKSTR) {
+ connp->conn_flags |= IPCL_SOCKET;
udp->udp_issocket = B_TRUE;
udp->udp_direct_sockfs = B_TRUE;
}
connp->conn_ulp_labeled = is_system_labeled();
- mutex_exit(&connp->conn_lock);
udp->udp_us = us;
- /*
- * The transmit hiwat/lowat is only looked at on IP's queue.
- * Store in q_hiwat in order to return on SO_SNDBUF/SO_RCVBUF
- * getsockopts.
- */
q->q_hiwat = us->us_recv_hiwat;
WR(q)->q_hiwat = us->us_xmit_hiwat;
WR(q)->q_lowat = us->us_xmit_lowat;
+ connp->conn_recv = udp_input;
+ crhold(credp);
+ connp->conn_cred = credp;
+
+ mutex_enter(&connp->conn_lock);
+ connp->conn_state_flags &= ~CONN_INCIPIENT;
+ mutex_exit(&connp->conn_lock);
+
+ qprocson(q);
+
if (udp->udp_family == AF_INET6) {
/* Build initial header template for transmit */
- if ((err = udp_build_hdrs(q, udp)) != 0) {
- /* XXX missing free of connp? crfree? netstack_rele? */
- qprocsoff(UDP_RD(q));
- udp->udp_connp = NULL;
- connp->conn_udp = NULL;
- kmem_cache_free(udp_cache, udp);
+ if ((err = udp_build_hdrs(udp)) != 0) {
+ rw_exit(&udp->udp_rwlock);
+ qprocsoff(q);
+ ipcl_conn_destroy(connp);
return (err);
}
}
+ rw_exit(&udp->udp_rwlock);
/* Set the Stream head write offset and high watermark. */
- (void) mi_set_sth_wroff(UDP_RD(q),
+ (void) mi_set_sth_wroff(q,
udp->udp_max_hdr_len + us->us_wroff_extra);
- (void) mi_set_sth_hiwat(UDP_RD(q), udp_set_rcv_hiwat(udp, q->q_hiwat));
+ (void) mi_set_sth_hiwat(q, udp_set_rcv_hiwat(udp, q->q_hiwat));
return (0);
}
@@ -3006,7 +2518,7 @@ udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name)
*/
/* ARGSUSED */
int
-udp_opt_default(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr)
+udp_opt_default(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr)
{
udp_t *udp = Q_TO_UDP(q);
udp_stack_t *us = udp->udp_us;
@@ -3041,12 +2553,11 @@ udp_opt_default(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr)
}
/*
- * This routine retrieves the current status of socket options
- * and expects the caller to pass in the queue pointer of the
- * upper instance. It returns the size of the option retrieved.
+ * This routine retrieves the current status of socket options.
+ * It returns the size of the option retrieved.
*/
int
-udp_opt_get(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr)
+udp_opt_get_locked(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr)
{
int *i1 = (int *)ptr;
conn_t *connp;
@@ -3055,7 +2566,6 @@ udp_opt_get(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr)
int len;
udp_stack_t *us;
- q = UDP_WR(q);
connp = Q_TO_CONN(q);
udp = connp->conn_udp;
ipp = &udp->udp_sticky_ipp;
@@ -3368,13 +2878,26 @@ udp_opt_get(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr)
return (sizeof (int));
}
+int
+udp_opt_get(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr)
+{
+ udp_t *udp;
+ int err;
+
+ udp = Q_TO_UDP(q);
+
+ rw_enter(&udp->udp_rwlock, RW_READER);
+ err = udp_opt_get_locked(q, level, name, ptr);
+ rw_exit(&udp->udp_rwlock);
+ return (err);
+}
+
/*
- * This routine sets socket options; it expects the caller
- * to pass in the queue pointer of the upper instance.
+ * This routine sets socket options.
*/
/* ARGSUSED */
int
-udp_opt_set(queue_t *q, uint_t optset_context, int level,
+udp_opt_set_locked(queue_t *q, uint_t optset_context, int level,
int name, uint_t inlen, uchar_t *invalp, uint_t *outlenp,
uchar_t *outvalp, void *thisdg_attrs, cred_t *cr, mblk_t *mblk)
{
@@ -3387,8 +2910,8 @@ udp_opt_set(queue_t *q, uint_t optset_context, int level,
udp_t *udp;
uint_t newlen;
udp_stack_t *us;
+ size_t sth_wroff;
- q = UDP_WR(q);
connp = Q_TO_CONN(q);
udp = connp->conn_udp;
us = udp->udp_us;
@@ -3479,7 +3002,6 @@ udp_opt_set(queue_t *q, uint_t optset_context, int level,
}
if (!checkonly) {
q->q_hiwat = *i1;
- WR(UDP_RD(q))->q_hiwat = *i1;
}
break;
case SO_RCVBUF:
@@ -3489,9 +3011,10 @@ udp_opt_set(queue_t *q, uint_t optset_context, int level,
}
if (!checkonly) {
RD(q)->q_hiwat = *i1;
- UDP_RD(q)->q_hiwat = *i1;
- (void) mi_set_sth_hiwat(UDP_RD(q),
+ rw_exit(&udp->udp_rwlock);
+ (void) mi_set_sth_hiwat(RD(q),
udp_set_rcv_hiwat(udp, *i1));
+ rw_enter(&udp->udp_rwlock, RW_WRITER);
}
break;
case SO_DGRAM_ERRIND:
@@ -3588,6 +3111,10 @@ udp_opt_set(queue_t *q, uint_t optset_context, int level,
if (checkonly)
break;
+ /*
+ * Update the stored options taking into account
+ * any CIPSO option which we should not overwrite.
+ */
if (!tsol_option_set(&udp->udp_ip_snd_options,
&udp->udp_ip_snd_options_len,
udp->udp_label_len, invalp, inlen)) {
@@ -3597,8 +3124,10 @@ udp_opt_set(queue_t *q, uint_t optset_context, int level,
udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH +
UDPH_SIZE + udp->udp_ip_snd_options_len;
- (void) mi_set_sth_wroff(RD(q), udp->udp_max_hdr_len +
- us->us_wroff_extra);
+ sth_wroff = udp->udp_max_hdr_len + us->us_wroff_extra;
+ rw_exit(&udp->udp_rwlock);
+ (void) mi_set_sth_wroff(RD(q), sth_wroff);
+ rw_enter(&udp->udp_rwlock, RW_WRITER);
break;
case IP_TTL:
@@ -3784,7 +3313,7 @@ udp_opt_set(queue_t *q, uint_t optset_context, int level,
ipp->ipp_fields |= IPPF_UNICAST_HOPS;
}
/* Rebuild the header template */
- error = udp_build_hdrs(q, udp);
+ error = udp_build_hdrs(udp);
if (error != 0) {
*outlenp = 0;
return (error);
@@ -3921,7 +3450,7 @@ udp_opt_set(queue_t *q, uint_t optset_context, int level,
ipp->ipp_fields &= ~IPPF_ADDR;
}
if (sticky) {
- error = udp_build_hdrs(q, udp);
+ error = udp_build_hdrs(udp);
if (error != 0)
return (error);
}
@@ -3967,7 +3496,7 @@ udp_opt_set(queue_t *q, uint_t optset_context, int level,
ipp->ipp_fields |= IPPF_TCLASS;
}
if (sticky) {
- error = udp_build_hdrs(q, udp);
+ error = udp_build_hdrs(udp);
if (error != 0)
return (error);
}
@@ -4001,7 +3530,7 @@ udp_opt_set(queue_t *q, uint_t optset_context, int level,
ipp->ipp_fields &= ~IPPF_NEXTHOP;
}
if (sticky) {
- error = udp_build_hdrs(q, udp);
+ error = udp_build_hdrs(udp);
if (error != 0)
return (error);
}
@@ -4032,7 +3561,7 @@ udp_opt_set(queue_t *q, uint_t optset_context, int level,
ipp->ipp_fields |= IPPF_HOPOPTS;
}
if (sticky) {
- error = udp_build_hdrs(q, udp);
+ error = udp_build_hdrs(udp);
if (error != 0)
return (error);
}
@@ -4072,7 +3601,7 @@ udp_opt_set(queue_t *q, uint_t optset_context, int level,
ipp->ipp_fields |= IPPF_RTDSTOPTS;
}
if (sticky) {
- error = udp_build_hdrs(q, udp);
+ error = udp_build_hdrs(udp);
if (error != 0)
return (error);
}
@@ -4111,7 +3640,7 @@ udp_opt_set(queue_t *q, uint_t optset_context, int level,
ipp->ipp_fields |= IPPF_DSTOPTS;
}
if (sticky) {
- error = udp_build_hdrs(q, udp);
+ error = udp_build_hdrs(udp);
if (error != 0)
return (error);
}
@@ -4150,7 +3679,7 @@ udp_opt_set(queue_t *q, uint_t optset_context, int level,
ipp->ipp_fields |= IPPF_RTHDR;
}
if (sticky) {
- error = udp_build_hdrs(q, udp);
+ error = udp_build_hdrs(udp);
if (error != 0)
return (error);
}
@@ -4265,6 +3794,23 @@ udp_opt_set(queue_t *q, uint_t optset_context, int level,
return (0);
}
+int
+udp_opt_set(queue_t *q, uint_t optset_context, int level,
+ int name, uint_t inlen, uchar_t *invalp, uint_t *outlenp,
+ uchar_t *outvalp, void *thisdg_attrs, cred_t *cr, mblk_t *mblk)
+{
+ udp_t *udp;
+ int err;
+
+ udp = Q_TO_UDP(q);
+
+ rw_enter(&udp->udp_rwlock, RW_WRITER);
+ err = udp_opt_set_locked(q, optset_context, level, name, inlen, invalp,
+ outlenp, outvalp, thisdg_attrs, cr, mblk);
+ rw_exit(&udp->udp_rwlock);
+ return (err);
+}
+
/*
* Update udp_sticky_hdrs based on udp_sticky_ipp, udp_v6src, and udp_ttl.
* The headers include ip6i_t (if needed), ip6_t, any sticky extension
@@ -4272,7 +3818,7 @@ udp_opt_set(queue_t *q, uint_t optset_context, int level,
* Returns failure if can't allocate memory.
*/
static int
-udp_build_hdrs(queue_t *q, udp_t *udp)
+udp_build_hdrs(udp_t *udp)
{
udp_stack_t *us = udp->udp_us;
uchar_t *hdrs;
@@ -4281,7 +3827,9 @@ udp_build_hdrs(queue_t *q, udp_t *udp)
ip6i_t *ip6i;
udpha_t *udpha;
ip6_pkt_t *ipp = &udp->udp_sticky_ipp;
+ size_t sth_wroff;
+ ASSERT(RW_WRITE_HELD(&udp->udp_rwlock));
hdrs_len = ip_total_hdrs_len_v6(ipp) + UDPH_SIZE;
ASSERT(hdrs_len != 0);
if (hdrs_len != udp->udp_sticky_hdrs_len) {
@@ -4317,8 +3865,10 @@ udp_build_hdrs(queue_t *q, udp_t *udp)
/* Try to get everything in a single mblk */
if (hdrs_len > udp->udp_max_hdr_len) {
udp->udp_max_hdr_len = hdrs_len;
- (void) mi_set_sth_wroff(RD(q), udp->udp_max_hdr_len +
- us->us_wroff_extra);
+ sth_wroff = udp->udp_max_hdr_len + us->us_wroff_extra;
+ rw_exit(&udp->udp_rwlock);
+ (void) mi_set_sth_wroff(udp->udp_connp->conn_rq, sth_wroff);
+ rw_enter(&udp->udp_rwlock, RW_WRITER);
}
return (0);
}
@@ -4556,12 +4106,48 @@ copy_hop_opts(const ip6_pkt_t *ipp, uchar_t *dbuf)
return (tlen);
}
+/*
+ * Update udp_rcv_opt_len from the packet.
+ * Called when options received, and when no options received but
+ * udp_ip_recv_opt_len has previously recorded options.
+ */
+static void
+udp_save_ip_rcv_opt(udp_t *udp, void *opt, int opt_len)
+{
+ /* Save the options if any */
+ if (opt_len > 0) {
+ if (opt_len > udp->udp_ip_rcv_options_len) {
+ /* Need to allocate larger buffer */
+ if (udp->udp_ip_rcv_options_len != 0)
+ mi_free((char *)udp->udp_ip_rcv_options);
+ udp->udp_ip_rcv_options_len = 0;
+ udp->udp_ip_rcv_options =
+ (uchar_t *)mi_alloc(opt_len, BPRI_HI);
+ if (udp->udp_ip_rcv_options != NULL)
+ udp->udp_ip_rcv_options_len = opt_len;
+ }
+ if (udp->udp_ip_rcv_options_len != 0) {
+ bcopy(opt, udp->udp_ip_rcv_options, opt_len);
+ /* Adjust length if we are resusing the space */
+ udp->udp_ip_rcv_options_len = opt_len;
+ }
+ } else if (udp->udp_ip_rcv_options_len != 0) {
+ /* Clear out previously recorded options */
+ mi_free((char *)udp->udp_ip_rcv_options);
+ udp->udp_ip_rcv_options = NULL;
+ udp->udp_ip_rcv_options_len = 0;
+ }
+}
+
+/* ARGSUSED2 */
static void
-udp_input(conn_t *connp, mblk_t *mp)
+udp_input(void *arg1, mblk_t *mp, void *arg2)
{
+ conn_t *connp = (conn_t *)arg1;
struct T_unitdata_ind *tudi;
uchar_t *rptr; /* Pointer to IP header */
int hdr_length; /* Length of IP+UDP headers */
+ int opt_len;
int udi_size; /* Size of T_unitdata_ind */
int mp_len;
udp_t *udp;
@@ -4574,13 +4160,13 @@ udp_input(conn_t *connp, mblk_t *mp)
mblk_t *options_mp = NULL;
ip_pktinfo_t *pinfo = NULL;
cred_t *cr = NULL;
- queue_t *q = connp->conn_rq;
pid_t cpid;
+ uint32_t udp_ip_rcv_options_len;
+ udp_bits_t udp_bits;
cred_t *rcr = connp->conn_cred;
udp_stack_t *us;
- TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_START,
- "udp_rput_start: q %p mp %p", q, mp);
+ ASSERT(connp->conn_flags & IPCL_UDPCONN);
udp = connp->conn_udp;
us = udp->udp_us;
@@ -4599,7 +4185,7 @@ udp_input(conn_t *connp, mblk_t *mp)
IN_PKTINFO) {
/*
* IP_RECVIF or IP_RECVSLLA or IPF_RECVADDR information
- * has been appended to the packet by IP. We need to
+ * has been prepended to the packet by IP. We need to
* extract the mblk and adjust the rptr
*/
pinfo = (ip_pktinfo_t *)mp->b_rptr;
@@ -4611,9 +4197,7 @@ udp_input(conn_t *connp, mblk_t *mp)
/*
* ICMP messages.
*/
- udp_icmp_error(q, mp);
- TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END,
- "udp_rput_end: q %p (%S)", q, "m_ctl");
+ udp_icmp_error(connp->conn_rq, mp);
return;
}
}
@@ -4623,53 +4207,37 @@ udp_input(conn_t *connp, mblk_t *mp)
* This is the inbound data path.
* First, we check to make sure the IP version number is correct,
* and then pull the IP and UDP headers into the first mblk.
- * Assume IP provides aligned packets - otherwise toss.
- * Also, check if we have a complete IP header.
*/
/* Initialize regardless if ipversion is IPv4 or IPv6 */
ipp.ipp_fields = 0;
ipversion = IPH_HDR_VERSION(rptr);
+
+ rw_enter(&udp->udp_rwlock, RW_READER);
+ udp_ip_rcv_options_len = udp->udp_ip_rcv_options_len;
+ udp_bits = udp->udp_bits;
+ rw_exit(&udp->udp_rwlock);
+
switch (ipversion) {
case IPV4_VERSION:
ASSERT(MBLKL(mp) >= sizeof (ipha_t));
ASSERT(((ipha_t *)rptr)->ipha_protocol == IPPROTO_UDP);
hdr_length = IPH_HDR_LENGTH(rptr) + UDPH_SIZE;
- if ((hdr_length > IP_SIMPLE_HDR_LENGTH + UDPH_SIZE) ||
- (udp->udp_ip_rcv_options_len)) {
+ opt_len = hdr_length - (IP_SIMPLE_HDR_LENGTH + UDPH_SIZE);
+ if ((opt_len > 0 || udp_ip_rcv_options_len > 0) &&
+ udp->udp_family == AF_INET) {
/*
- * Handle IPv4 packets with options outside of the
- * main data path. Not needed for AF_INET6 sockets
+ * Record/update udp_ip_rcv_options with the lock
+ * held. Not needed for AF_INET6 sockets
* since they don't support a getsockopt of IP_OPTIONS.
*/
- if (udp->udp_family == AF_INET6)
- break;
- /*
- * UDP length check performed for IPv4 packets with
- * options to check whether UDP length specified in
- * the header is the same as the physical length of
- * the packet.
- */
- udpha = (udpha_t *)(rptr + (hdr_length - UDPH_SIZE));
- if (mp_len != (ntohs(udpha->uha_length) +
- hdr_length - UDPH_SIZE)) {
- goto tossit;
- }
- /*
- * Handle the case where the packet has IP options
- * and the IP_RECVSLLA & IP_RECVIF are set
- */
- if (pinfo != NULL)
- mp = options_mp;
- udp_become_writer(connp, mp, udp_rput_other_wrapper,
- SQTAG_UDP_INPUT);
- TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END,
- "udp_rput_end: q %p (%S)", q, "end");
- return;
+ rw_enter(&udp->udp_rwlock, RW_WRITER);
+ udp_save_ip_rcv_opt(udp, rptr + IP_SIMPLE_HDR_LENGTH,
+ opt_len);
+ rw_exit(&udp->udp_rwlock);
}
-
- /* Handle IPV6_RECVHOPLIMIT. */
+ /* Handle IPV6_RECVPKTINFO even for IPv4 packet. */
if ((udp->udp_family == AF_INET6) && (pinfo != NULL) &&
udp->udp_ip_recvpktinfo) {
if (pinfo->ip_pkt_flags & IPF_RECVIF) {
@@ -4735,8 +4303,9 @@ udp_input(conn_t *connp, mblk_t *mp)
/*
* IP inspected the UDP header thus all of it must be in the mblk.
* UDP length check is performed for IPv6 packets and IPv4 packets
- * without options to check if the size of the packet as specified
+ * to check if the size of the packet as specified
* by the header is the same as the physical size of the packet.
+ * FIXME? Didn't IP already check this?
*/
udpha = (udpha_t *)(rptr + (hdr_length - UDPH_SIZE));
if ((MBLKL(mp) < hdr_length) ||
@@ -4744,8 +4313,9 @@ udp_input(conn_t *connp, mblk_t *mp)
goto tossit;
}
- /* Walk past the headers. */
- if (!udp->udp_rcvhdr) {
+
+ /* Walk past the headers unless IP_RECVHDR was set. */
+ if (!udp_bits.udpb_rcvhdr) {
mp->b_rptr = rptr + hdr_length;
mp_len -= hdr_length;
}
@@ -4760,56 +4330,62 @@ udp_input(conn_t *connp, mblk_t *mp)
ASSERT(IPH_HDR_VERSION((ipha_t *)rptr) == IPV4_VERSION);
/*
- * Normally only send up the address.
+ * Normally only send up the source address.
* If IP_RECVDSTADDR is set we include the destination IP
* address as an option. With IP_RECVOPTS we include all
- * the IP options. Only ip_rput_other() handles packets
- * that contain IP options.
+ * the IP options.
*/
udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t);
- if (udp->udp_recvdstaddr) {
+ if (udp_bits.udpb_recvdstaddr) {
udi_size += sizeof (struct T_opthdr) +
sizeof (struct in_addr);
UDP_STAT(us, udp_in_recvdstaddr);
}
- if (udp->udp_ip_recvpktinfo && (pinfo != NULL) &&
+ if (udp_bits.udpb_ip_recvpktinfo && (pinfo != NULL) &&
(pinfo->ip_pkt_flags & IPF_RECVADDR)) {
udi_size += sizeof (struct T_opthdr) +
sizeof (struct in_pktinfo);
- UDP_STAT(us, udp_ip_recvpktinfo);
+ UDP_STAT(us, udp_ip_rcvpktinfo);
+ }
+
+ if ((udp_bits.udpb_recvopts) && opt_len > 0) {
+ udi_size += sizeof (struct T_opthdr) + opt_len;
+ UDP_STAT(us, udp_in_recvopts);
}
/*
* If the IP_RECVSLLA or the IP_RECVIF is set then allocate
* space accordingly
*/
- if (udp->udp_recvif && (pinfo != NULL) &&
+ if ((udp_bits.udpb_recvif) && (pinfo != NULL) &&
(pinfo->ip_pkt_flags & IPF_RECVIF)) {
udi_size += sizeof (struct T_opthdr) + sizeof (uint_t);
UDP_STAT(us, udp_in_recvif);
}
- if (udp->udp_recvslla && (pinfo != NULL) &&
+ if ((udp_bits.udpb_recvslla) && (pinfo != NULL) &&
(pinfo->ip_pkt_flags & IPF_RECVSLLA)) {
udi_size += sizeof (struct T_opthdr) +
sizeof (struct sockaddr_dl);
UDP_STAT(us, udp_in_recvslla);
}
- if (udp->udp_recvucred && (cr = DB_CRED(mp)) != NULL) {
+ if ((udp_bits.udpb_recvucred) &&
+ (cr = DB_CRED(mp)) != NULL) {
udi_size += sizeof (struct T_opthdr) + ucredsize;
cpid = DB_CPID(mp);
UDP_STAT(us, udp_in_recvucred);
}
+ /* XXX FIXME: apply to AF_INET6 as well */
/*
* If SO_TIMESTAMP is set allocate the appropriate sized
* buffer. Since gethrestime() expects a pointer aligned
* argument, we allocate space necessary for extra
* alignment (even though it might not be used).
*/
- if (udp->udp_timestamp) {
+ if (udp_bits.udpb_timestamp) {
udi_size += sizeof (struct T_opthdr) +
sizeof (timestruc_t) + _POINTER_ALIGNMENT;
UDP_STAT(us, udp_in_timestamp);
@@ -4818,11 +4394,10 @@ udp_input(conn_t *connp, mblk_t *mp)
/*
* If IP_RECVTTL is set allocate the appropriate sized buffer
*/
- if (udp->udp_recvttl) {
+ if (udp_bits.udpb_recvttl) {
udi_size += sizeof (struct T_opthdr) + sizeof (uint8_t);
UDP_STAT(us, udp_in_recvttl);
}
- ASSERT(IPH_HDR_LENGTH((ipha_t *)rptr) == IP_SIMPLE_HDR_LENGTH);
/* Allocate a message block for the T_UNITDATA_IND structure. */
mp1 = allocb(udi_size, BPRI_MED);
@@ -4830,9 +4405,7 @@ udp_input(conn_t *connp, mblk_t *mp)
freemsg(mp);
if (options_mp != NULL)
freeb(options_mp);
- TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END,
- "udp_rput_end: q %p (%S)", q, "allocbfail");
- BUMP_MIB(&udp->udp_mib, udpInErrors);
+ BUMP_MIB(&us->us_udp_mib, udpInErrors);
return;
}
mp1->b_cont = mp;
@@ -4866,7 +4439,7 @@ udp_input(conn_t *connp, mblk_t *mp)
char *dstopt;
dstopt = (char *)&sin[1];
- if (udp->udp_recvdstaddr) {
+ if (udp_bits.udpb_recvdstaddr) {
struct T_opthdr *toh;
ipaddr_t *dstptr;
@@ -4879,11 +4452,26 @@ udp_input(conn_t *connp, mblk_t *mp)
dstopt += sizeof (struct T_opthdr);
dstptr = (ipaddr_t *)dstopt;
*dstptr = ((ipha_t *)rptr)->ipha_dst;
- dstopt = (char *)toh + toh->len;
+ dstopt += sizeof (ipaddr_t);
+ udi_size -= toh->len;
+ }
+
+ if (udp_bits.udpb_recvopts && opt_len > 0) {
+ struct T_opthdr *toh;
+
+ toh = (struct T_opthdr *)dstopt;
+ toh->level = IPPROTO_IP;
+ toh->name = IP_RECVOPTS;
+ toh->len = sizeof (struct T_opthdr) + opt_len;
+ toh->status = 0;
+ dstopt += sizeof (struct T_opthdr);
+ bcopy(rptr + IP_SIMPLE_HDR_LENGTH, dstopt,
+ opt_len);
+ dstopt += opt_len;
udi_size -= toh->len;
}
- if (udp->udp_ip_recvpktinfo && (pinfo != NULL) &&
+ if ((udp_bits.udpb_ip_recvpktinfo) && (pinfo != NULL) &&
(pinfo->ip_pkt_flags & IPF_RECVADDR)) {
struct T_opthdr *toh;
struct in_pktinfo *pktinfop;
@@ -4906,7 +4494,7 @@ udp_input(conn_t *connp, mblk_t *mp)
udi_size -= toh->len;
}
- if (udp->udp_recvslla && (pinfo != NULL) &&
+ if ((udp_bits.udpb_recvslla) && (pinfo != NULL) &&
(pinfo->ip_pkt_flags & IPF_RECVSLLA)) {
struct T_opthdr *toh;
@@ -4922,11 +4510,11 @@ udp_input(conn_t *connp, mblk_t *mp)
dstptr = (struct sockaddr_dl *)dstopt;
bcopy(&pinfo->ip_pkt_slla, dstptr,
sizeof (struct sockaddr_dl));
- dstopt = (char *)toh + toh->len;
+ dstopt += sizeof (struct sockaddr_dl);
udi_size -= toh->len;
}
- if (udp->udp_recvif && (pinfo != NULL) &&
+ if ((udp_bits.udpb_recvif) && (pinfo != NULL) &&
(pinfo->ip_pkt_flags & IPF_RECVIF)) {
struct T_opthdr *toh;
@@ -4941,7 +4529,7 @@ udp_input(conn_t *connp, mblk_t *mp)
dstopt += sizeof (struct T_opthdr);
dstptr = (uint_t *)dstopt;
*dstptr = pinfo->ip_pkt_ifindex;
- dstopt = (char *)toh + toh->len;
+ dstopt += sizeof (uint_t);
udi_size -= toh->len;
}
@@ -4953,12 +4541,13 @@ udp_input(conn_t *connp, mblk_t *mp)
toh->name = SCM_UCRED;
toh->len = sizeof (struct T_opthdr) + ucredsize;
toh->status = 0;
- (void) cred2ucred(cr, cpid, &toh[1], rcr);
- dstopt = (char *)toh + toh->len;
+ dstopt += sizeof (struct T_opthdr);
+ (void) cred2ucred(cr, cpid, dstopt, rcr);
+ dstopt += ucredsize;
udi_size -= toh->len;
}
- if (udp->udp_timestamp) {
+ if (udp_bits.udpb_timestamp) {
struct T_opthdr *toh;
toh = (struct T_opthdr *)dstopt;
@@ -4984,7 +4573,7 @@ udp_input(conn_t *connp, mblk_t *mp)
* any option processing after this will
* cause alignment panic.
*/
- if (udp->udp_recvttl) {
+ if (udp_bits.udpb_recvttl) {
struct T_opthdr *toh;
uint8_t *dstptr;
@@ -4997,7 +4586,7 @@ udp_input(conn_t *connp, mblk_t *mp)
dstopt += sizeof (struct T_opthdr);
dstptr = (uint8_t *)dstopt;
*dstptr = ((ipha_t *)rptr)->ipha_ttl;
- dstopt = (char *)toh + toh->len;
+ dstopt += sizeof (uint8_t);
udi_size -= toh->len;
}
@@ -5013,15 +4602,12 @@ udp_input(conn_t *connp, mblk_t *mp)
* Normally we only send up the address. If receiving of any
* optional receive side information is enabled, we also send
* that up as options.
- * [ Only udp_rput_other() handles packets that contain IP
- * options so code to account for does not appear immediately
- * below but elsewhere ]
*/
udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t);
if (ipp.ipp_fields & (IPPF_HOPOPTS|IPPF_DSTOPTS|IPPF_RTDSTOPTS|
IPPF_RTHDR|IPPF_IFINDEX)) {
- if (udp->udp_ipv6_recvhopopts &&
+ if ((udp_bits.udpb_ipv6_recvhopopts) &&
(ipp.ipp_fields & IPPF_HOPOPTS)) {
size_t hlen;
@@ -5031,29 +4617,29 @@ udp_input(conn_t *connp, mblk_t *mp)
ipp.ipp_fields &= ~IPPF_HOPOPTS;
udi_size += hlen;
}
- if ((udp->udp_ipv6_recvdstopts ||
- udp->udp_old_ipv6_recvdstopts) &&
+ if (((udp_bits.udpb_ipv6_recvdstopts) ||
+ udp_bits.udpb_old_ipv6_recvdstopts) &&
(ipp.ipp_fields & IPPF_DSTOPTS)) {
udi_size += sizeof (struct T_opthdr) +
ipp.ipp_dstoptslen;
UDP_STAT(us, udp_in_recvdstopts);
}
- if (((udp->udp_ipv6_recvdstopts &&
- udp->udp_ipv6_recvrthdr &&
+ if ((((udp_bits.udpb_ipv6_recvdstopts) &&
+ udp_bits.udpb_ipv6_recvrthdr &&
(ipp.ipp_fields & IPPF_RTHDR)) ||
- udp->udp_ipv6_recvrthdrdstopts) &&
+ (udp_bits.udpb_ipv6_recvrthdrdstopts)) &&
(ipp.ipp_fields & IPPF_RTDSTOPTS)) {
udi_size += sizeof (struct T_opthdr) +
ipp.ipp_rtdstoptslen;
UDP_STAT(us, udp_in_recvrtdstopts);
}
- if (udp->udp_ipv6_recvrthdr &&
+ if ((udp_bits.udpb_ipv6_recvrthdr) &&
(ipp.ipp_fields & IPPF_RTHDR)) {
udi_size += sizeof (struct T_opthdr) +
ipp.ipp_rthdrlen;
UDP_STAT(us, udp_in_recvrthdr);
}
- if (udp->udp_ip_recvpktinfo &&
+ if ((udp_bits.udpb_ip_recvpktinfo) &&
(ipp.ipp_fields & IPPF_IFINDEX)) {
udi_size += sizeof (struct T_opthdr) +
sizeof (struct in6_pktinfo);
@@ -5061,18 +4647,19 @@ udp_input(conn_t *connp, mblk_t *mp)
}
}
- if (udp->udp_recvucred && (cr = DB_CRED(mp)) != NULL) {
+ if ((udp_bits.udpb_recvucred) &&
+ (cr = DB_CRED(mp)) != NULL) {
udi_size += sizeof (struct T_opthdr) + ucredsize;
cpid = DB_CPID(mp);
UDP_STAT(us, udp_in_recvucred);
}
- if (udp->udp_ipv6_recvhoplimit) {
+ if (udp_bits.udpb_ipv6_recvhoplimit) {
udi_size += sizeof (struct T_opthdr) + sizeof (int);
UDP_STAT(us, udp_in_recvhoplimit);
}
- if (udp->udp_ipv6_recvtclass) {
+ if (udp_bits.udpb_ipv6_recvtclass) {
udi_size += sizeof (struct T_opthdr) + sizeof (int);
UDP_STAT(us, udp_in_recvtclass);
}
@@ -5082,9 +4669,7 @@ udp_input(conn_t *connp, mblk_t *mp)
freemsg(mp);
if (options_mp != NULL)
freeb(options_mp);
- TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END,
- "udp_rput_end: q %p (%S)", q, "allocbfail");
- BUMP_MIB(&udp->udp_mib, udpInErrors);
+ BUMP_MIB(&us->us_udp_mib, udpInErrors);
return;
}
mp1->b_cont = mp;
@@ -5132,7 +4717,7 @@ udp_input(conn_t *connp, mblk_t *mp)
uchar_t *dstopt;
dstopt = (uchar_t *)&sin6[1];
- if (udp->udp_ip_recvpktinfo &&
+ if ((udp_bits.udpb_ip_recvpktinfo) &&
(ipp.ipp_fields & IPPF_IFINDEX)) {
struct T_opthdr *toh;
struct in6_pktinfo *pkti;
@@ -5155,7 +4740,7 @@ udp_input(conn_t *connp, mblk_t *mp)
dstopt += sizeof (*pkti);
udi_size -= toh->len;
}
- if (udp->udp_ipv6_recvhoplimit) {
+ if (udp_bits.udpb_ipv6_recvhoplimit) {
struct T_opthdr *toh;
toh = (struct T_opthdr *)dstopt;
@@ -5173,7 +4758,7 @@ udp_input(conn_t *connp, mblk_t *mp)
dstopt += sizeof (uint_t);
udi_size -= toh->len;
}
- if (udp->udp_ipv6_recvtclass) {
+ if (udp_bits.udpb_ipv6_recvtclass) {
struct T_opthdr *toh;
toh = (struct T_opthdr *)dstopt;
@@ -5194,7 +4779,7 @@ udp_input(conn_t *connp, mblk_t *mp)
dstopt += sizeof (uint_t);
udi_size -= toh->len;
}
- if (udp->udp_ipv6_recvhopopts &&
+ if ((udp_bits.udpb_ipv6_recvhopopts) &&
(ipp.ipp_fields & IPPF_HOPOPTS)) {
size_t hlen;
@@ -5202,8 +4787,8 @@ udp_input(conn_t *connp, mblk_t *mp)
dstopt += hlen;
udi_size -= hlen;
}
- if (udp->udp_ipv6_recvdstopts &&
- udp->udp_ipv6_recvrthdr &&
+ if ((udp_bits.udpb_ipv6_recvdstopts) &&
+ (udp_bits.udpb_ipv6_recvrthdr) &&
(ipp.ipp_fields & IPPF_RTHDR) &&
(ipp.ipp_fields & IPPF_RTDSTOPTS)) {
struct T_opthdr *toh;
@@ -5220,7 +4805,7 @@ udp_input(conn_t *connp, mblk_t *mp)
dstopt += ipp.ipp_rtdstoptslen;
udi_size -= toh->len;
}
- if (udp->udp_ipv6_recvrthdr &&
+ if ((udp_bits.udpb_ipv6_recvrthdr) &&
(ipp.ipp_fields & IPPF_RTHDR)) {
struct T_opthdr *toh;
@@ -5235,7 +4820,7 @@ udp_input(conn_t *connp, mblk_t *mp)
dstopt += ipp.ipp_rthdrlen;
udi_size -= toh->len;
}
- if (udp->udp_ipv6_recvdstopts &&
+ if ((udp_bits.udpb_ipv6_recvdstopts) &&
(ipp.ipp_fields & IPPF_DSTOPTS)) {
struct T_opthdr *toh;
@@ -5271,20 +4856,18 @@ udp_input(conn_t *connp, mblk_t *mp)
/* No IP_RECVDSTADDR for IPv6. */
}
- BUMP_MIB(&udp->udp_mib, udpHCInDatagrams);
- TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END,
- "udp_rput_end: q %p (%S)", q, "end");
+ BUMP_MIB(&us->us_udp_mib, udpHCInDatagrams);
if (options_mp != NULL)
freeb(options_mp);
- if (udp->udp_direct_sockfs) {
+ if (udp_bits.udpb_direct_sockfs) {
/*
* There is nothing above us except for the stream head;
* use the read-side synchronous stream interface in
* order to reduce the time spent in interrupt thread.
*/
ASSERT(udp->udp_issocket);
- udp_rcv_enqueue(UDP_RD(q), udp, mp, mp_len);
+ udp_rcv_enqueue(connp->conn_rq, udp, mp, mp_len);
} else {
/*
* Use regular STREAMS interface to pass data upstream
@@ -5292,7 +4875,7 @@ udp_input(conn_t *connp, mblk_t *mp)
* switched over to the slow mode due to sockmod being
* popped or a module being pushed on top of us.
*/
- putnext(UDP_RD(q), mp);
+ putnext(connp->conn_rq, mp);
}
return;
@@ -5300,472 +4883,79 @@ tossit:
freemsg(mp);
if (options_mp != NULL)
freeb(options_mp);
- BUMP_MIB(&udp->udp_mib, udpInErrors);
-}
-
-void
-udp_conn_recv(conn_t *connp, mblk_t *mp)
-{
- _UDP_ENTER(connp, mp, udp_input_wrapper, SQTAG_UDP_FANOUT);
-}
-
-/* ARGSUSED */
-static void
-udp_input_wrapper(void *arg, mblk_t *mp, void *arg2)
-{
- udp_input((conn_t *)arg, mp);
- _UDP_EXIT((conn_t *)arg);
+ BUMP_MIB(&us->us_udp_mib, udpInErrors);
}
/*
- * Process non-M_DATA messages as well as M_DATA messages that requires
- * modifications to udp_ip_rcv_options i.e. IPv4 packets with IP options.
+ * Handle the results of a T_BIND_REQ whether deferred by IP or handled
+ * immediately.
*/
static void
-udp_rput_other(queue_t *q, mblk_t *mp)
+udp_bind_result(conn_t *connp, mblk_t *mp)
{
- struct T_unitdata_ind *tudi;
- mblk_t *mp1;
- uchar_t *rptr;
- uchar_t *new_rptr;
- int hdr_length;
- int udi_size; /* Size of T_unitdata_ind */
- int opt_len; /* Length of IP options */
- sin_t *sin;
struct T_error_ack *tea;
- mblk_t *options_mp = NULL;
- ip_pktinfo_t *pinfo;
- boolean_t recv_on = B_FALSE;
- cred_t *cr = NULL;
- udp_t *udp = Q_TO_UDP(q);
- pid_t cpid;
- cred_t *rcr = udp->udp_connp->conn_cred;
- udp_stack_t *us = udp->udp_us;
-
- TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_START,
- "udp_rput_other: q %p mp %p", q, mp);
-
- ASSERT(OK_32PTR(mp->b_rptr));
- rptr = mp->b_rptr;
switch (mp->b_datap->db_type) {
- case M_CTL:
- /*
- * We are here only if IP_RECVSLLA and/or IP_RECVIF are set
- */
- recv_on = B_TRUE;
- options_mp = mp;
- pinfo = (ip_pktinfo_t *)options_mp->b_rptr;
-
- /*
- * The actual data is in mp->b_cont
- */
- mp = mp->b_cont;
- ASSERT(OK_32PTR(mp->b_rptr));
- rptr = mp->b_rptr;
- break;
- case M_DATA:
- /*
- * M_DATA messages contain IPv4 datagrams. They are handled
- * after this switch.
- */
- break;
case M_PROTO:
case M_PCPROTO:
/* M_PROTO messages contain some type of TPI message. */
- ASSERT((uintptr_t)(mp->b_wptr - rptr) <= (uintptr_t)INT_MAX);
- if (mp->b_wptr - rptr < sizeof (t_scalar_t)) {
+ ASSERT((uintptr_t)(mp->b_wptr - mp->b_rptr) <=
+ (uintptr_t)INT_MAX);
+ if (mp->b_wptr - mp->b_rptr < sizeof (t_scalar_t)) {
freemsg(mp);
- TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END,
- "udp_rput_other_end: q %p (%S)", q, "protoshort");
return;
}
- tea = (struct T_error_ack *)rptr;
+ tea = (struct T_error_ack *)mp->b_rptr;
switch (tea->PRIM_type) {
case T_ERROR_ACK:
switch (tea->ERROR_prim) {
case O_T_BIND_REQ:
- case T_BIND_REQ: {
- /*
- * If our O_T_BIND_REQ/T_BIND_REQ fails,
- * clear out the associated port and source
- * address before passing the message
- * upstream. If this was caused by a T_CONN_REQ
- * revert back to bound state.
- */
- udp_fanout_t *udpf;
-
- udpf = &us->us_bind_fanout[UDP_BIND_HASH(
- udp->udp_port, us->us_bind_fanout_size)];
- mutex_enter(&udpf->uf_lock);
- if (udp->udp_state == TS_DATA_XFER) {
- /* Connect failed */
- tea->ERROR_prim = T_CONN_REQ;
- /* Revert back to the bound source */
- udp->udp_v6src = udp->udp_bound_v6src;
- udp->udp_state = TS_IDLE;
- mutex_exit(&udpf->uf_lock);
- if (udp->udp_family == AF_INET6)
- (void) udp_build_hdrs(q, udp);
- break;
- }
-
- if (udp->udp_discon_pending) {
- tea->ERROR_prim = T_DISCON_REQ;
- udp->udp_discon_pending = 0;
- }
- V6_SET_ZERO(udp->udp_v6src);
- V6_SET_ZERO(udp->udp_bound_v6src);
- udp->udp_state = TS_UNBND;
- udp_bind_hash_remove(udp, B_TRUE);
- udp->udp_port = 0;
- mutex_exit(&udpf->uf_lock);
- if (udp->udp_family == AF_INET6)
- (void) udp_build_hdrs(q, udp);
- break;
- }
+ case T_BIND_REQ:
+ udp_bind_error(connp, mp);
+ return;
default:
break;
}
- break;
- case T_BIND_ACK:
- udp_rput_bind_ack(q, mp);
- return;
-
- case T_OPTMGMT_ACK:
- case T_OK_ACK:
- break;
- default:
+ ASSERT(0);
freemsg(mp);
return;
- }
- putnext(UDP_RD(q), mp);
- return;
- }
- /*
- * This is the inbound data path.
- * First, we make sure the data contains both IP and UDP headers.
- *
- * This handle IPv4 packets for only AF_INET sockets.
- * AF_INET6 sockets can never access udp_ip_rcv_options thus there
- * is no need saving the options.
- */
- ASSERT(IPH_HDR_VERSION((ipha_t *)rptr) == IPV4_VERSION);
- hdr_length = IPH_HDR_LENGTH(rptr) + UDPH_SIZE;
- if (mp->b_wptr - rptr < hdr_length) {
- if (!pullupmsg(mp, hdr_length)) {
- freemsg(mp);
- if (options_mp != NULL)
- freeb(options_mp);
- BUMP_MIB(&udp->udp_mib, udpInErrors);
- TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END,
- "udp_rput_other_end: q %p (%S)", q, "hdrshort");
+ case T_BIND_ACK:
+ udp_bind_ack(connp, mp);
return;
- }
- rptr = mp->b_rptr;
- }
- /* Walk past the headers. */
- new_rptr = rptr + hdr_length;
- if (!udp->udp_rcvhdr)
- mp->b_rptr = new_rptr;
- /* Save the options if any */
- opt_len = hdr_length - (IP_SIMPLE_HDR_LENGTH + UDPH_SIZE);
- if (opt_len > 0) {
- if (opt_len > udp->udp_ip_rcv_options_len) {
- if (udp->udp_ip_rcv_options_len)
- mi_free((char *)udp->udp_ip_rcv_options);
- udp->udp_ip_rcv_options_len = 0;
- udp->udp_ip_rcv_options =
- (uchar_t *)mi_alloc(opt_len, BPRI_HI);
- if (udp->udp_ip_rcv_options)
- udp->udp_ip_rcv_options_len = opt_len;
- }
- if (udp->udp_ip_rcv_options_len) {
- bcopy(rptr + IP_SIMPLE_HDR_LENGTH,
- udp->udp_ip_rcv_options, opt_len);
- /* Adjust length if we are resusing the space */
- udp->udp_ip_rcv_options_len = opt_len;
+ default:
+ break;
}
- } else if (udp->udp_ip_rcv_options_len) {
- mi_free((char *)udp->udp_ip_rcv_options);
- udp->udp_ip_rcv_options = NULL;
- udp->udp_ip_rcv_options_len = 0;
- }
-
- /*
- * Normally only send up the address.
- * If IP_RECVDSTADDR is set we include the destination IP
- * address as an option. With IP_RECVOPTS we include all
- * the IP options.
- */
- udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t);
- if (udp->udp_recvdstaddr) {
- udi_size += sizeof (struct T_opthdr) + sizeof (struct in_addr);
- UDP_STAT(us, udp_in_recvdstaddr);
- }
-
- if (udp->udp_ip_recvpktinfo && recv_on &&
- (pinfo->ip_pkt_flags & IPF_RECVADDR)) {
- udi_size += sizeof (struct T_opthdr) +
- sizeof (struct in_pktinfo);
- UDP_STAT(us, udp_ip_recvpktinfo);
- }
-
- if (udp->udp_recvopts && opt_len > 0) {
- udi_size += sizeof (struct T_opthdr) + opt_len;
- UDP_STAT(us, udp_in_recvopts);
- }
-
- /*
- * If the IP_RECVSLLA or the IP_RECVIF is set then allocate
- * space accordingly
- */
- if (udp->udp_recvif && recv_on &&
- (pinfo->ip_pkt_flags & IPF_RECVIF)) {
- udi_size += sizeof (struct T_opthdr) + sizeof (uint_t);
- UDP_STAT(us, udp_in_recvif);
- }
-
- if (udp->udp_recvslla && recv_on &&
- (pinfo->ip_pkt_flags & IPF_RECVSLLA)) {
- udi_size += sizeof (struct T_opthdr) +
- sizeof (struct sockaddr_dl);
- UDP_STAT(us, udp_in_recvslla);
- }
-
- if (udp->udp_recvucred && (cr = DB_CRED(mp)) != NULL) {
- udi_size += sizeof (struct T_opthdr) + ucredsize;
- cpid = DB_CPID(mp);
- UDP_STAT(us, udp_in_recvucred);
- }
- /*
- * If IP_RECVTTL is set allocate the appropriate sized buffer
- */
- if (udp->udp_recvttl) {
- udi_size += sizeof (struct T_opthdr) + sizeof (uint8_t);
- UDP_STAT(us, udp_in_recvttl);
- }
-
- /* Allocate a message block for the T_UNITDATA_IND structure. */
- mp1 = allocb(udi_size, BPRI_MED);
- if (mp1 == NULL) {
freemsg(mp);
- if (options_mp != NULL)
- freeb(options_mp);
- TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END,
- "udp_rput_other_end: q %p (%S)", q, "allocbfail");
- BUMP_MIB(&udp->udp_mib, udpInErrors);
+ return;
+ default:
+ /* FIXME: other cases? */
+ ASSERT(0);
+ freemsg(mp);
return;
}
- mp1->b_cont = mp;
- mp = mp1;
- mp->b_datap->db_type = M_PROTO;
- tudi = (struct T_unitdata_ind *)mp->b_rptr;
- mp->b_wptr = (uchar_t *)tudi + udi_size;
- tudi->PRIM_type = T_UNITDATA_IND;
- tudi->SRC_length = sizeof (sin_t);
- tudi->SRC_offset = sizeof (struct T_unitdata_ind);
- tudi->OPT_offset = sizeof (struct T_unitdata_ind) + sizeof (sin_t);
- udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t));
- tudi->OPT_length = udi_size;
-
- sin = (sin_t *)&tudi[1];
- sin->sin_addr.s_addr = ((ipha_t *)rptr)->ipha_src;
- sin->sin_port = ((in_port_t *)
- new_rptr)[-(UDPH_SIZE/sizeof (in_port_t))];
- sin->sin_family = AF_INET;
- *(uint32_t *)&sin->sin_zero[0] = 0;
- *(uint32_t *)&sin->sin_zero[4] = 0;
-
- /*
- * Add options if IP_RECVDSTADDR, IP_RECVIF, IP_RECVSLLA or
- * IP_RECVTTL has been set.
- */
- if (udi_size != 0) {
- /*
- * Copy in destination address before options to avoid any
- * padding issues.
- */
- char *dstopt;
-
- dstopt = (char *)&sin[1];
- if (udp->udp_recvdstaddr) {
- struct T_opthdr *toh;
- ipaddr_t *dstptr;
-
- toh = (struct T_opthdr *)dstopt;
- toh->level = IPPROTO_IP;
- toh->name = IP_RECVDSTADDR;
- toh->len = sizeof (struct T_opthdr) + sizeof (ipaddr_t);
- toh->status = 0;
- dstopt += sizeof (struct T_opthdr);
- dstptr = (ipaddr_t *)dstopt;
- *dstptr = (((ipaddr_t *)rptr)[4]);
- dstopt += sizeof (ipaddr_t);
- udi_size -= toh->len;
- }
- if (udp->udp_recvopts && udi_size != 0) {
- struct T_opthdr *toh;
-
- toh = (struct T_opthdr *)dstopt;
- toh->level = IPPROTO_IP;
- toh->name = IP_RECVOPTS;
- toh->len = sizeof (struct T_opthdr) + opt_len;
- toh->status = 0;
- dstopt += sizeof (struct T_opthdr);
- bcopy(rptr + IP_SIMPLE_HDR_LENGTH, dstopt, opt_len);
- dstopt += opt_len;
- udi_size -= toh->len;
- }
- if (udp->udp_ip_recvpktinfo && recv_on &&
- (pinfo->ip_pkt_flags & IPF_RECVADDR)) {
-
- struct T_opthdr *toh;
- struct in_pktinfo *pktinfop;
-
- toh = (struct T_opthdr *)dstopt;
- toh->level = IPPROTO_IP;
- toh->name = IP_PKTINFO;
- toh->len = sizeof (struct T_opthdr) +
- sizeof (*pktinfop);
- toh->status = 0;
- dstopt += sizeof (struct T_opthdr);
- pktinfop = (struct in_pktinfo *)dstopt;
- pktinfop->ipi_ifindex = pinfo->ip_pkt_ifindex;
- pktinfop->ipi_spec_dst = pinfo->ip_pkt_match_addr;
-
- pktinfop->ipi_addr.s_addr = ((ipha_t *)rptr)->ipha_dst;
-
- dstopt += sizeof (struct in_pktinfo);
- udi_size -= toh->len;
- }
-
- if (udp->udp_recvslla && recv_on &&
- (pinfo->ip_pkt_flags & IPF_RECVSLLA)) {
-
- struct T_opthdr *toh;
- struct sockaddr_dl *dstptr;
-
- toh = (struct T_opthdr *)dstopt;
- toh->level = IPPROTO_IP;
- toh->name = IP_RECVSLLA;
- toh->len = sizeof (struct T_opthdr) +
- sizeof (struct sockaddr_dl);
- toh->status = 0;
- dstopt += sizeof (struct T_opthdr);
- dstptr = (struct sockaddr_dl *)dstopt;
- bcopy(&pinfo->ip_pkt_slla, dstptr,
- sizeof (struct sockaddr_dl));
- dstopt += sizeof (struct sockaddr_dl);
- udi_size -= toh->len;
- }
-
- if (udp->udp_recvif && recv_on &&
- (pinfo->ip_pkt_flags & IPF_RECVIF)) {
-
- struct T_opthdr *toh;
- uint_t *dstptr;
-
- toh = (struct T_opthdr *)dstopt;
- toh->level = IPPROTO_IP;
- toh->name = IP_RECVIF;
- toh->len = sizeof (struct T_opthdr) +
- sizeof (uint_t);
- toh->status = 0;
- dstopt += sizeof (struct T_opthdr);
- dstptr = (uint_t *)dstopt;
- *dstptr = pinfo->ip_pkt_ifindex;
- dstopt += sizeof (uint_t);
- udi_size -= toh->len;
- }
-
- if (cr != NULL) {
- struct T_opthdr *toh;
-
- toh = (struct T_opthdr *)dstopt;
- toh->level = SOL_SOCKET;
- toh->name = SCM_UCRED;
- toh->len = sizeof (struct T_opthdr) + ucredsize;
- toh->status = 0;
- (void) cred2ucred(cr, cpid, &toh[1], rcr);
- dstopt += toh->len;
- udi_size -= toh->len;
- }
-
- if (udp->udp_recvttl) {
- struct T_opthdr *toh;
- uint8_t *dstptr;
-
- toh = (struct T_opthdr *)dstopt;
- toh->level = IPPROTO_IP;
- toh->name = IP_RECVTTL;
- toh->len = sizeof (struct T_opthdr) +
- sizeof (uint8_t);
- toh->status = 0;
- dstopt += sizeof (struct T_opthdr);
- dstptr = (uint8_t *)dstopt;
- *dstptr = ((ipha_t *)rptr)->ipha_ttl;
- dstopt += sizeof (uint8_t);
- udi_size -= toh->len;
- }
-
- ASSERT(udi_size == 0); /* "Consumed" all of allocated space */
- }
- BUMP_MIB(&udp->udp_mib, udpHCInDatagrams);
- TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END,
- "udp_rput_other_end: q %p (%S)", q, "end");
- if (options_mp != NULL)
- freeb(options_mp);
-
- if (udp->udp_direct_sockfs) {
- /*
- * There is nothing above us except for the stream head;
- * use the read-side synchronous stream interface in
- * order to reduce the time spent in interrupt thread.
- */
- ASSERT(udp->udp_issocket);
- udp_rcv_enqueue(UDP_RD(q), udp, mp, msgdsize(mp));
- } else {
- /*
- * Use regular STREAMS interface to pass data upstream
- * if this is not a socket endpoint, or if we have
- * switched over to the slow mode due to sockmod being
- * popped or a module being pushed on top of us.
- */
- putnext(UDP_RD(q), mp);
- }
-}
-
-/* ARGSUSED */
-static void
-udp_rput_other_wrapper(void *arg, mblk_t *mp, void *arg2)
-{
- conn_t *connp = arg;
-
- udp_rput_other(connp->conn_rq, mp);
- udp_exit(connp);
}
/*
* Process a T_BIND_ACK
*/
static void
-udp_rput_bind_ack(queue_t *q, mblk_t *mp)
+udp_bind_ack(conn_t *connp, mblk_t *mp)
{
- udp_t *udp = Q_TO_UDP(q);
+ udp_t *udp = connp->conn_udp;
mblk_t *mp1;
ire_t *ire;
struct T_bind_ack *tba;
uchar_t *addrp;
ipa_conn_t *ac;
ipa6_conn_t *ac6;
+ udp_fanout_t *udpf;
+ udp_stack_t *us = udp->udp_us;
- if (udp->udp_discon_pending)
- udp->udp_discon_pending = 0;
-
+ ASSERT(udp->udp_pending_op != -1);
+ rw_enter(&udp->udp_rwlock, RW_WRITER);
/*
* If a broadcast/multicast address was bound set
* the source address to 0.
@@ -5786,12 +4976,18 @@ udp_rput_bind_ack(queue_t *q, mblk_t *mp)
* Note: we get IRE_BROADCAST for IPv6 to "mark" a multicast
* local address.
*/
+ udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port,
+ us->us_bind_fanout_size)];
if (ire->ire_type == IRE_BROADCAST &&
udp->udp_state != TS_DATA_XFER) {
+ ASSERT(udp->udp_pending_op == T_BIND_REQ ||
+ udp->udp_pending_op == O_T_BIND_REQ);
/* This was just a local bind to a broadcast addr */
+ mutex_enter(&udpf->uf_lock);
V6_SET_ZERO(udp->udp_v6src);
+ mutex_exit(&udpf->uf_lock);
if (udp->udp_family == AF_INET6)
- (void) udp_build_hdrs(q, udp);
+ (void) udp_build_hdrs(udp);
} else if (V6_OR_V4_INADDR_ANY(udp->udp_v6src)) {
/*
* Local address not yet set - pick it from the
@@ -5808,8 +5004,10 @@ udp_rput_bind_ack(queue_t *q, mblk_t *mp)
sizeof (ipa_conn_x_t));
ac = &((ipa_conn_x_t *)addrp)->acx_conn;
}
+ mutex_enter(&udpf->uf_lock);
IN6_IPADDR_TO_V4MAPPED(ac->ac_laddr,
&udp->udp_v6src);
+ mutex_exit(&udpf->uf_lock);
break;
case AF_INET6:
if (tba->ADDR_length == sizeof (ipa6_conn_t)) {
@@ -5820,13 +5018,17 @@ udp_rput_bind_ack(queue_t *q, mblk_t *mp)
ac6 = &((ipa6_conn_x_t *)
addrp)->ac6x_conn;
}
+ mutex_enter(&udpf->uf_lock);
udp->udp_v6src = ac6->ac6_laddr;
- (void) udp_build_hdrs(q, udp);
+ mutex_exit(&udpf->uf_lock);
+ (void) udp_build_hdrs(udp);
break;
}
}
mp1 = mp1->b_cont;
}
+ udp->udp_pending_op = -1;
+ rw_exit(&udp->udp_rwlock);
/*
* Look for one or more appended ACK message added by
* udp_connect or udp_disconnect.
@@ -5846,20 +5048,86 @@ udp_rput_bind_ack(queue_t *q, mblk_t *mp)
while (mp != NULL) {
mp1 = mp->b_cont;
mp->b_cont = NULL;
- putnext(UDP_RD(q), mp);
+ putnext(connp->conn_rq, mp);
mp = mp1;
}
return;
}
freemsg(mp->b_cont);
mp->b_cont = NULL;
- putnext(UDP_RD(q), mp);
+ putnext(connp->conn_rq, mp);
+}
+
+static void
+udp_bind_error(conn_t *connp, mblk_t *mp)
+{
+ udp_t *udp = connp->conn_udp;
+ struct T_error_ack *tea;
+ udp_fanout_t *udpf;
+ udp_stack_t *us = udp->udp_us;
+
+ tea = (struct T_error_ack *)mp->b_rptr;
+
+ /*
+ * If our O_T_BIND_REQ/T_BIND_REQ fails,
+ * clear out the associated port and source
+ * address before passing the message
+ * upstream. If this was caused by a T_CONN_REQ
+ * revert back to bound state.
+ */
+
+ rw_enter(&udp->udp_rwlock, RW_WRITER);
+ ASSERT(udp->udp_pending_op != -1);
+ tea->ERROR_prim = udp->udp_pending_op;
+ udp->udp_pending_op = -1;
+ udpf = &us->us_bind_fanout[
+ UDP_BIND_HASH(udp->udp_port,
+ us->us_bind_fanout_size)];
+ mutex_enter(&udpf->uf_lock);
+
+ switch (tea->ERROR_prim) {
+ case T_CONN_REQ:
+ ASSERT(udp->udp_state == TS_DATA_XFER);
+ /* Connect failed */
+ /* Revert back to the bound source */
+ udp->udp_v6src = udp->udp_bound_v6src;
+ udp->udp_state = TS_IDLE;
+ mutex_exit(&udpf->uf_lock);
+ if (udp->udp_family == AF_INET6)
+ (void) udp_build_hdrs(udp);
+ rw_exit(&udp->udp_rwlock);
+ break;
+
+ case T_DISCON_REQ:
+ case T_BIND_REQ:
+ case O_T_BIND_REQ:
+ V6_SET_ZERO(udp->udp_v6src);
+ V6_SET_ZERO(udp->udp_bound_v6src);
+ udp->udp_state = TS_UNBND;
+ udp_bind_hash_remove(udp, B_TRUE);
+ udp->udp_port = 0;
+ mutex_exit(&udpf->uf_lock);
+ if (udp->udp_family == AF_INET6)
+ (void) udp_build_hdrs(udp);
+ rw_exit(&udp->udp_rwlock);
+ break;
+
+ default:
+ mutex_exit(&udpf->uf_lock);
+ rw_exit(&udp->udp_rwlock);
+ (void) mi_strlog(connp->conn_rq, 1,
+ SL_ERROR|SL_TRACE,
+ "udp_input_other: bad ERROR_prim, "
+ "len %d", tea->ERROR_prim);
+ }
+ putnext(connp->conn_rq, mp);
}
/*
- * return SNMP stuff in buffer in mpdata
+ * return SNMP stuff in buffer in mpdata. We don't hold any lock and report
+ * information that can be changing beneath us.
*/
-int
+mblk_t *
udp_snmp_get(queue_t *q, mblk_t *mpctl)
{
mblk_t *mpdata;
@@ -5880,11 +5148,18 @@ udp_snmp_get(queue_t *q, mblk_t *mpctl)
int i;
connf_t *connfp;
conn_t *connp = Q_TO_CONN(q);
- udp_t *udp = connp->conn_udp;
int v4_conn_idx;
int v6_conn_idx;
boolean_t needattr;
+ udp_t *udp;
ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
+ udp_stack_t *us = connp->conn_netstack->netstack_udp;
+ mblk_t *mp2ctl;
+
+ /*
+ * make a copy of the original message
+ */
+ mp2ctl = copymsg(mpctl);
mp_conn_ctl = mp_attr_ctl = mp6_conn_ctl = NULL;
if (mpctl == NULL ||
@@ -5896,23 +5171,25 @@ udp_snmp_get(queue_t *q, mblk_t *mpctl)
freemsg(mp_conn_ctl);
freemsg(mp_attr_ctl);
freemsg(mp6_conn_ctl);
+ freemsg(mpctl);
+ freemsg(mp2ctl);
return (0);
}
zoneid = connp->conn_zoneid;
/* fixed length structure for IPv4 and IPv6 counters */
- SET_MIB(udp->udp_mib.udpEntrySize, sizeof (mib2_udpEntry_t));
- SET_MIB(udp->udp_mib.udp6EntrySize, sizeof (mib2_udp6Entry_t));
+ SET_MIB(us->us_udp_mib.udpEntrySize, sizeof (mib2_udpEntry_t));
+ SET_MIB(us->us_udp_mib.udp6EntrySize, sizeof (mib2_udp6Entry_t));
/* synchronize 64- and 32-bit counters */
- SYNC32_MIB(&udp->udp_mib, udpInDatagrams, udpHCInDatagrams);
- SYNC32_MIB(&udp->udp_mib, udpOutDatagrams, udpHCOutDatagrams);
+ SYNC32_MIB(&us->us_udp_mib, udpInDatagrams, udpHCInDatagrams);
+ SYNC32_MIB(&us->us_udp_mib, udpOutDatagrams, udpHCOutDatagrams);
optp = (struct opthdr *)&mpctl->b_rptr[sizeof (struct T_optmgmt_ack)];
optp->level = MIB2_UDP;
optp->name = 0;
- (void) snmp_append_data(mpdata, (char *)&udp->udp_mib,
- sizeof (udp->udp_mib));
+ (void) snmp_append_data(mpdata, (char *)&us->us_udp_mib,
+ sizeof (us->us_udp_mib));
optp->len = msgdsize(mpdata);
qreply(q, mpctl);
@@ -5924,7 +5201,7 @@ udp_snmp_get(queue_t *q, mblk_t *mpctl)
connp = NULL;
while ((connp = ipcl_get_next_conn(connfp, connp,
- IPCL_UDP))) {
+ IPCL_UDPCONN))) {
udp = connp->conn_udp;
if (zoneid != connp->conn_zoneid)
continue;
@@ -6088,7 +5365,7 @@ udp_snmp_get(queue_t *q, mblk_t *mpctl)
else
qreply(q, mp6_attr_ctl);
- return (1);
+ return (mp2ctl);
}
/*
@@ -6190,7 +5467,7 @@ udp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr)
connp = NULL;
while ((connp = ipcl_get_next_conn(connfp, connp,
- IPCL_UDP))) {
+ IPCL_UDPCONN))) {
udp = connp->conn_udp;
if (zoneid != GLOBAL_ZONEID &&
zoneid != connp->conn_zoneid)
@@ -6246,7 +5523,7 @@ udp_ud_err(queue_t *q, mblk_t *mp, uchar_t *destaddr, t_scalar_t destlen,
mp1 = mi_tpi_uderror_ind((char *)destaddr, destlen,
(char *)optaddr, optlen, err);
if (mp1 != NULL)
- putnext(UDP_RD(q), mp1);
+ qreply(q, mp1);
done:
freemsg(mp);
@@ -6260,12 +5537,9 @@ static void
udp_unbind(queue_t *q, mblk_t *mp)
{
udp_t *udp = Q_TO_UDP(q);
+ udp_fanout_t *udpf;
+ udp_stack_t *us = udp->udp_us;
- /* If a bind has not been done, we can't unbind. */
- if (udp->udp_state == TS_UNBND) {
- udp_err_ack(q, mp, TOUTSTATE, 0);
- return;
- }
if (cl_inet_unbind != NULL) {
/*
* Running in cluster mode - register unbind information
@@ -6281,29 +5555,44 @@ udp_unbind(queue_t *q, mblk_t *mp)
}
}
- udp_bind_hash_remove(udp, B_FALSE);
- V6_SET_ZERO(udp->udp_v6src);
- V6_SET_ZERO(udp->udp_bound_v6src);
- udp->udp_port = 0;
- udp->udp_state = TS_UNBND;
-
- if (udp->udp_family == AF_INET6) {
- int error;
-
- /* Rebuild the header template */
- error = udp_build_hdrs(q, udp);
- if (error != 0) {
- udp_err_ack(q, mp, TSYSERR, error);
- return;
- }
+ rw_enter(&udp->udp_rwlock, RW_WRITER);
+ if (udp->udp_state == TS_UNBND || udp->udp_pending_op != -1) {
+ rw_exit(&udp->udp_rwlock);
+ udp_err_ack(q, mp, TOUTSTATE, 0);
+ return;
}
+ udp->udp_pending_op = T_UNBIND_REQ;
+ rw_exit(&udp->udp_rwlock);
+
/*
* Pass the unbind to IP; T_UNBIND_REQ is larger than T_OK_ACK
* and therefore ip_unbind must never return NULL.
*/
mp = ip_unbind(q, mp);
ASSERT(mp != NULL);
- putnext(UDP_RD(q), mp);
+ ASSERT(((struct T_ok_ack *)mp->b_rptr)->PRIM_type == T_OK_ACK);
+
+ /*
+ * Once we're unbound from IP, the pending operation may be cleared
+ * here.
+ */
+ rw_enter(&udp->udp_rwlock, RW_WRITER);
+ udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port,
+ us->us_bind_fanout_size)];
+ mutex_enter(&udpf->uf_lock);
+ udp_bind_hash_remove(udp, B_TRUE);
+ V6_SET_ZERO(udp->udp_v6src);
+ V6_SET_ZERO(udp->udp_bound_v6src);
+ udp->udp_port = 0;
+ mutex_exit(&udpf->uf_lock);
+
+ udp->udp_pending_op = -1;
+ udp->udp_state = TS_UNBND;
+ if (udp->udp_family == AF_INET6)
+ (void) udp_build_hdrs(udp);
+ rw_exit(&udp->udp_rwlock);
+
+ qreply(q, mp);
}
/*
@@ -6381,10 +5670,11 @@ udp_update_label(queue_t *wq, mblk_t *mp, ipaddr_t dst)
int err;
uchar_t opt_storage[IP_MAX_OPT_LENGTH];
udp_t *udp = Q_TO_UDP(wq);
+ udp_stack_t *us = udp->udp_us;
err = tsol_compute_label(DB_CREDDEF(mp, udp->udp_connp->conn_cred), dst,
opt_storage, udp->udp_mac_exempt,
- udp->udp_us->us_netstack->netstack_ip);
+ us->us_netstack->netstack_ip);
if (err == 0) {
err = tsol_update_options(&udp->udp_ip_snd_options,
&udp->udp_ip_snd_options_len, &udp->udp_label_len,
@@ -6413,6 +5703,8 @@ udp_output_v4(conn_t *connp, mblk_t *mp, ipaddr_t v4dst, uint16_t port,
int ip_hdr_length;
uint32_t ip_len;
udpha_t *udpha;
+ boolean_t lock_held = B_FALSE;
+ in_port_t uha_src_port;
udpattrs_t attrs;
uchar_t ip_snd_opt[IP_MAX_OPT_LENGTH];
uint32_t ip_snd_opt_len = 0;
@@ -6457,6 +5749,8 @@ udp_output_v4(conn_t *connp, mblk_t *mp, ipaddr_t v4dst, uint16_t port,
/* mp1 points to the M_DATA mblk carrying the packet */
ASSERT(mp1 != NULL && DB_TYPE(mp1) == M_DATA);
+ rw_enter(&udp->udp_rwlock, RW_READER);
+ lock_held = B_TRUE;
/*
* Check if our saved options are valid; update if not.
* TSOL Note: Since we are not in WRITER mode, UDP packets
@@ -6557,6 +5851,11 @@ udp_output_v4(conn_t *connp, mblk_t *mp, ipaddr_t v4dst, uint16_t port,
IN6_V4MAPPED_TO_IPADDR(&v6src, ipha->ipha_src);
}
}
+ uha_src_port = udp->udp_port;
+ if (ip_hdr_length == IP_SIMPLE_HDR_LENGTH) {
+ rw_exit(&udp->udp_rwlock);
+ lock_held = B_FALSE;
+ }
if (pktinfop->ip4_ill_index != 0) {
optinfo.ip_opt_ill_index = pktinfop->ip4_ill_index;
@@ -6610,12 +5909,14 @@ udp_output_v4(conn_t *connp, mblk_t *mp, ipaddr_t v4dst, uint16_t port,
ipha->ipha_ttl = udp->udp_multicast_ttl;
udpha->uha_dst_port = port;
- udpha->uha_src_port = udp->udp_port;
+ udpha->uha_src_port = uha_src_port;
if (ip_snd_opt_len > 0) {
uint32_t cksum;
bcopy(ip_snd_opt, &ipha[1], ip_snd_opt_len);
+ lock_held = B_FALSE;
+ rw_exit(&udp->udp_rwlock);
/*
* Massage source route putting first source route in ipha_dst.
* Ignore the destination in T_unitdata_req.
@@ -6659,7 +5960,7 @@ udp_output_v4(conn_t *connp, mblk_t *mp, ipaddr_t v4dst, uint16_t port,
ip_len <<= 16;
#endif
}
-
+ ASSERT(!lock_held);
/* Set UDP length and checksum */
*((uint32_t *)&udpha->uha_length) = ip_len;
if (DB_CRED(mp) != NULL)
@@ -6675,7 +5976,7 @@ udp_output_v4(conn_t *connp, mblk_t *mp, ipaddr_t v4dst, uint16_t port,
mp = NULL;
/* We're done. Pass the packet to ip. */
- BUMP_MIB(&udp->udp_mib, udpHCOutDatagrams);
+ BUMP_MIB(&us->us_udp_mib, udpHCOutDatagrams);
TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END,
"udp_wput_end: q %p (%S)", q, "end");
@@ -6696,9 +5997,11 @@ udp_output_v4(conn_t *connp, mblk_t *mp, ipaddr_t v4dst, uint16_t port,
}
done:
+ if (lock_held)
+ rw_exit(&udp->udp_rwlock);
if (*error != 0) {
ASSERT(mp != NULL);
- BUMP_MIB(&udp->udp_mib, udpOutErrors);
+ BUMP_MIB(&us->us_udp_mib, udpOutErrors);
}
return (mp);
}
@@ -6708,14 +6011,9 @@ udp_send_data(udp_t *udp, queue_t *q, mblk_t *mp, ipha_t *ipha)
{
conn_t *connp = udp->udp_connp;
ipaddr_t src, dst;
- ill_t *ill;
ire_t *ire;
ipif_t *ipif = NULL;
mblk_t *ire_fp_mp;
- uint_t ire_fp_mp_len;
- uint16_t *up;
- uint32_t cksum, hcksum_txflags;
- queue_t *dev_q;
boolean_t retry_caching;
udp_stack_t *us = udp->udp_us;
ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
@@ -6824,10 +6122,9 @@ udp_send_data(udp_t *udp, queue_t *q, mblk_t *mp, ipha_t *ipha)
if ((ire->ire_type & (IRE_BROADCAST|IRE_LOCAL|IRE_LOOPBACK)) ||
(ire->ire_flags & RTF_MULTIRT) || (ire->ire_stq == NULL) ||
(ire->ire_max_frag < ntohs(ipha->ipha_length)) ||
- (connp->conn_nexthop_set) ||
- (ire->ire_nce == NULL) ||
- ((ire_fp_mp = ire->ire_nce->nce_fp_mp) == NULL) ||
- ((ire_fp_mp_len = MBLKL(ire_fp_mp)) > MBLKHEAD(mp))) {
+ ((ire->ire_nce == NULL) ||
+ ((ire_fp_mp = ire->ire_nce->nce_fp_mp) == NULL)) ||
+ connp->conn_nexthop_set || (MBLKL(ire_fp_mp) > MBLKHEAD(mp))) {
if (ipif != NULL)
ipif_refrele(ipif);
UDP_STAT(us, udp_ip_ire_send);
@@ -6836,43 +6133,62 @@ udp_send_data(udp_t *udp, queue_t *q, mblk_t *mp, ipha_t *ipha)
return;
}
- ill = ire_to_ill(ire);
- ASSERT(ill != NULL);
+ if (src == INADDR_ANY && !connp->conn_unspec_src) {
+ if (CLASSD(dst) && !(ire->ire_flags & RTF_SETSRC))
+ ipha->ipha_src = ipif->ipif_src_addr;
+ else
+ ipha->ipha_src = ire->ire_src_addr;
+ }
- BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCOutRequests);
+ if (ipif != NULL)
+ ipif_refrele(ipif);
+
+ udp_xmit(connp->conn_wq, mp, ire, connp, connp->conn_zoneid);
+}
+
+static void
+udp_xmit(queue_t *q, mblk_t *mp, ire_t *ire, conn_t *connp, zoneid_t zoneid)
+{
+ ipaddr_t src, dst;
+ ill_t *ill;
+ mblk_t *ire_fp_mp;
+ uint_t ire_fp_mp_len;
+ uint16_t *up;
+ uint32_t cksum, hcksum_txflags;
+ queue_t *dev_q;
+ udp_t *udp = connp->conn_udp;
+ ipha_t *ipha = (ipha_t *)mp->b_rptr;
+ udp_stack_t *us = udp->udp_us;
+ ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
dev_q = ire->ire_stq->q_next;
ASSERT(dev_q != NULL);
- /*
- * If the service thread is already running, or if the driver
- * queue is currently flow-controlled, queue this packet.
- */
- if ((q->q_first != NULL || connp->conn_draining) ||
- ((dev_q->q_next || dev_q->q_first) && !canput(dev_q))) {
- if (ipst->ips_ip_output_queue) {
- (void) putq(q, mp);
- } else {
- BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
- freemsg(mp);
- }
- if (ipif != NULL)
- ipif_refrele(ipif);
- IRE_REFRELE(ire);
+
+
+ if (DEV_Q_IS_FLOW_CTLED(dev_q)) {
+ BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsHCOutRequests);
+ BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards);
+ ire_refrele(ire);
return;
}
+ ire_fp_mp = ire->ire_nce->nce_fp_mp;
+ ire_fp_mp_len = MBLKL(ire_fp_mp);
+ ASSERT(MBLKHEAD(mp) >= ire_fp_mp_len);
+
+ dst = ipha->ipha_dst;
+ src = ipha->ipha_src;
+
+ ill = ire_to_ill(ire);
+ ASSERT(ill != NULL);
+
+ BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCOutRequests);
+
ipha->ipha_ident = (uint16_t)atomic_add_32_nv(&ire->ire_ident, 1);
#ifndef _BIG_ENDIAN
ipha->ipha_ident = (ipha->ipha_ident << 8) | (ipha->ipha_ident >> 8);
#endif
- if (src == INADDR_ANY && !connp->conn_unspec_src) {
- if (CLASSD(dst) && !(ire->ire_flags & RTF_SETSRC))
- src = ipha->ipha_src = ipif->ipif_src_addr;
- else
- src = ipha->ipha_src = ire->ire_src_addr;
- }
-
if (ILL_HCKSUM_CAPABLE(ill) && dohwcksum) {
ASSERT(ill->ill_hcksum_capab != NULL);
hcksum_txflags = ill->ill_hcksum_capab->ill_hcksum_txflags;
@@ -6918,15 +6234,13 @@ udp_send_data(udp_t *udp, queue_t *q, mblk_t *mp, ipha_t *ipha)
if (ilm != NULL) {
ip_multicast_loopback(q, ill, mp,
connp->conn_multicast_loop ? 0 :
- IP_FF_NO_MCAST_LOOP, connp->conn_zoneid);
+ IP_FF_NO_MCAST_LOOP, zoneid);
}
/* If multicast TTL is 0 then we are done */
if (ipha->ipha_ttl == 0) {
- if (ipif != NULL)
- ipif_refrele(ipif);
freemsg(mp);
- IRE_REFRELE(ire);
+ ire_refrele(ire);
return;
}
}
@@ -6961,8 +6275,6 @@ udp_send_data(udp_t *udp, queue_t *q, mblk_t *mp, ipha_t *ipha)
putnext(ire->ire_stq, mp);
}
- if (ipif != NULL)
- ipif_refrele(ipif);
IRE_REFRELE(ire);
}
@@ -6972,10 +6284,11 @@ udp_update_label_v6(queue_t *wq, mblk_t *mp, in6_addr_t *dst)
udp_t *udp = Q_TO_UDP(wq);
int err;
uchar_t opt_storage[TSOL_MAX_IPV6_OPTION];
+ udp_stack_t *us = udp->udp_us;
err = tsol_compute_label_v6(DB_CREDDEF(mp, udp->udp_connp->conn_cred),
dst, opt_storage, udp->udp_mac_exempt,
- udp->udp_us->us_netstack->netstack_ip);
+ us->us_netstack->netstack_ip);
if (err == 0) {
err = tsol_update_sticky(&udp->udp_sticky_ipp,
&udp->udp_label_len_v6, opt_storage);
@@ -6991,97 +6304,145 @@ udp_update_label_v6(queue_t *wq, mblk_t *mp, in6_addr_t *dst)
return (err);
}
+void
+udp_output_connected(void *arg, mblk_t *mp)
+{
+ conn_t *connp = (conn_t *)arg;
+ udp_t *udp = connp->conn_udp;
+ udp_stack_t *us = udp->udp_us;
+ ipaddr_t v4dst;
+ in_port_t dstport;
+ boolean_t mapped_addr;
+ struct sockaddr_storage ss;
+ sin_t *sin;
+ sin6_t *sin6;
+ struct sockaddr *addr;
+ socklen_t addrlen;
+ int error;
+ boolean_t insert_spi = udp->udp_nat_t_endpoint;
+
+ /* M_DATA for connected socket */
+
+ ASSERT(udp->udp_issocket);
+ UDP_DBGSTAT(us, udp_data_conn);
+
+ mutex_enter(&connp->conn_lock);
+ if (udp->udp_state != TS_DATA_XFER) {
+ mutex_exit(&connp->conn_lock);
+ BUMP_MIB(&us->us_udp_mib, udpOutErrors);
+ UDP_STAT(us, udp_out_err_notconn);
+ freemsg(mp);
+ TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END,
+ "udp_wput_end: connp %p (%S)", connp,
+ "not-connected; address required");
+ return;
+ }
+
+ mapped_addr = IN6_IS_ADDR_V4MAPPED(&udp->udp_v6dst);
+ if (mapped_addr)
+ IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6dst, v4dst);
+
+ /* Initialize addr and addrlen as if they're passed in */
+ if (udp->udp_family == AF_INET) {
+ sin = (sin_t *)&ss;
+ sin->sin_family = AF_INET;
+ dstport = sin->sin_port = udp->udp_dstport;
+ ASSERT(mapped_addr);
+ sin->sin_addr.s_addr = v4dst;
+ addr = (struct sockaddr *)sin;
+ addrlen = sizeof (*sin);
+ } else {
+ sin6 = (sin6_t *)&ss;
+ sin6->sin6_family = AF_INET6;
+ dstport = sin6->sin6_port = udp->udp_dstport;
+ sin6->sin6_flowinfo = udp->udp_flowinfo;
+ sin6->sin6_addr = udp->udp_v6dst;
+ sin6->sin6_scope_id = 0;
+ sin6->__sin6_src_id = 0;
+ addr = (struct sockaddr *)sin6;
+ addrlen = sizeof (*sin6);
+ }
+ mutex_exit(&connp->conn_lock);
+
+ if (mapped_addr) {
+ /*
+ * Handle both AF_INET and AF_INET6; the latter
+ * for IPV4 mapped destination addresses. Note
+ * here that both addr and addrlen point to the
+ * corresponding struct depending on the address
+ * family of the socket.
+ */
+ mp = udp_output_v4(connp, mp, v4dst, dstport, 0, &error,
+ insert_spi);
+ } else {
+ mp = udp_output_v6(connp, mp, sin6, &error);
+ }
+ if (error == 0) {
+ ASSERT(mp == NULL);
+ return;
+ }
+
+ UDP_STAT(us, udp_out_err_output);
+ ASSERT(mp != NULL);
+ /* mp is freed by the following routine */
+ udp_ud_err(connp->conn_wq, mp, (uchar_t *)addr, (t_scalar_t)addrlen,
+ (t_scalar_t)error);
+}
+
/*
* This routine handles all messages passed downstream. It either
* consumes the message or passes it downstream; it never queues a
* a message.
+ *
+ * Also entry point for sockfs when udp is in "direct sockfs" mode. This mode
+ * is valid when we are directly beneath the stream head, and thus sockfs
+ * is able to bypass STREAMS and directly call us, passing along the sockaddr
+ * structure without the cumbersome T_UNITDATA_REQ interface for the case of
+ * connected endpoints.
*/
-static void
-udp_output(conn_t *connp, mblk_t *mp, struct sockaddr *addr, socklen_t addrlen)
+void
+udp_wput(queue_t *q, mblk_t *mp)
{
sin6_t *sin6;
sin_t *sin;
ipaddr_t v4dst;
uint16_t port;
uint_t srcid;
- queue_t *q = connp->conn_wq;
+ conn_t *connp = Q_TO_CONN(q);
udp_t *udp = connp->conn_udp;
int error = 0;
- struct sockaddr_storage ss;
+ struct sockaddr *addr;
+ socklen_t addrlen;
udp_stack_t *us = udp->udp_us;
boolean_t insert_spi = udp->udp_nat_t_endpoint;
TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_START,
- "udp_wput_start: connp %p mp %p", connp, mp);
+ "udp_wput_start: queue %p mp %p", q, mp);
/*
* We directly handle several cases here: T_UNITDATA_REQ message
- * coming down as M_PROTO/M_PCPROTO and M_DATA messages for both
- * connected and non-connected socket. The latter carries the
- * address structure along when this routine gets called.
+ * coming down as M_PROTO/M_PCPROTO and M_DATA messages for connected
+ * socket.
*/
switch (DB_TYPE(mp)) {
case M_DATA:
+ /*
+ * Quick check for error cases. Checks will be done again
+ * under the lock later on
+ */
if (!udp->udp_direct_sockfs || udp->udp_state != TS_DATA_XFER) {
- if (!udp->udp_direct_sockfs ||
- addr == NULL || addrlen == 0) {
- /* Not connected; address is required */
- BUMP_MIB(&udp->udp_mib, udpOutErrors);
- UDP_STAT(us, udp_out_err_notconn);
- freemsg(mp);
- TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END,
- "udp_wput_end: connp %p (%S)", connp,
- "not-connected; address required");
- return;
- }
- ASSERT(udp->udp_issocket);
- UDP_DBGSTAT(us, udp_data_notconn);
- /* Not connected; do some more checks below */
- break;
- }
- /* M_DATA for connected socket */
- UDP_DBGSTAT(us, udp_data_conn);
- IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6dst, v4dst);
-
- /* Initialize addr and addrlen as if they're passed in */
- if (udp->udp_family == AF_INET) {
- sin = (sin_t *)&ss;
- sin->sin_family = AF_INET;
- sin->sin_port = udp->udp_dstport;
- sin->sin_addr.s_addr = v4dst;
- addr = (struct sockaddr *)sin;
- addrlen = sizeof (*sin);
- } else {
- sin6 = (sin6_t *)&ss;
- sin6->sin6_family = AF_INET6;
- sin6->sin6_port = udp->udp_dstport;
- sin6->sin6_flowinfo = udp->udp_flowinfo;
- sin6->sin6_addr = udp->udp_v6dst;
- sin6->sin6_scope_id = 0;
- sin6->__sin6_src_id = 0;
- addr = (struct sockaddr *)sin6;
- addrlen = sizeof (*sin6);
- }
-
- if (udp->udp_family == AF_INET ||
- IN6_IS_ADDR_V4MAPPED(&udp->udp_v6dst)) {
- /*
- * Handle both AF_INET and AF_INET6; the latter
- * for IPV4 mapped destination addresses. Note
- * here that both addr and addrlen point to the
- * corresponding struct depending on the address
- * family of the socket.
- */
- mp = udp_output_v4(connp, mp, v4dst,
- udp->udp_dstport, 0, &error, insert_spi);
- } else {
- mp = udp_output_v6(connp, mp, sin6, &error);
- }
- if (error != 0) {
- ASSERT(addr != NULL && addrlen != 0);
- goto ud_error;
+ /* Not connected; address is required */
+ BUMP_MIB(&us->us_udp_mib, udpOutErrors);
+ UDP_STAT(us, udp_out_err_notconn);
+ freemsg(mp);
+ TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END,
+ "udp_wput_end: connp %p (%S)", connp,
+ "not-connected; address required");
+ return;
}
+ udp_output_connected(connp, mp);
return;
+
case M_PROTO:
case M_PCPROTO: {
struct T_unitdata_req *tudr;
@@ -7128,8 +6489,7 @@ udp_output(conn_t *connp, mblk_t *mp, struct sockaddr *addr, socklen_t addrlen)
/* FALLTHRU */
}
default:
- udp_become_writer(connp, mp, udp_wput_other_wrapper,
- SQTAG_UDP_OUTPUT);
+ udp_wput_other(q, mp);
return;
}
ASSERT(addr != NULL);
@@ -7137,8 +6497,8 @@ udp_output(conn_t *connp, mblk_t *mp, struct sockaddr *addr, socklen_t addrlen)
switch (udp->udp_family) {
case AF_INET6:
sin6 = (sin6_t *)addr;
- if (!OK_32PTR((char *)sin6) || addrlen != sizeof (sin6_t) ||
- sin6->sin6_family != AF_INET6) {
+ if (!OK_32PTR((char *)sin6) || (addrlen != sizeof (sin6_t)) ||
+ (sin6->sin6_family != AF_INET6)) {
TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END,
"udp_wput_end: q %p (%S)", q, "badaddr");
error = EADDRNOTAVAIL;
@@ -7180,8 +6540,8 @@ udp_output(conn_t *connp, mblk_t *mp, struct sockaddr *addr, socklen_t addrlen)
case AF_INET:
sin = (sin_t *)addr;
- if (!OK_32PTR((char *)sin) || addrlen != sizeof (sin_t) ||
- sin->sin_family != AF_INET) {
+ if ((!OK_32PTR((char *)sin) || addrlen != sizeof (sin_t)) ||
+ (sin->sin_family != AF_INET)) {
TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END,
"udp_wput_end: q %p (%S)", q, "badaddr");
error = EADDRNOTAVAIL;
@@ -7205,107 +6565,6 @@ ud_error:
}
}
-/* ARGSUSED */
-static void
-udp_output_wrapper(void *arg, mblk_t *mp, void *arg2)
-{
- udp_output((conn_t *)arg, mp, NULL, 0);
- _UDP_EXIT((conn_t *)arg);
-}
-
-static void
-udp_wput(queue_t *q, mblk_t *mp)
-{
- _UDP_ENTER(Q_TO_CONN(UDP_WR(q)), mp, udp_output_wrapper,
- SQTAG_UDP_WPUT);
-}
-
-/*
- * Allocate and prepare a T_UNITDATA_REQ message.
- */
-static mblk_t *
-udp_tudr_alloc(struct sockaddr *addr, socklen_t addrlen)
-{
- struct T_unitdata_req *tudr;
- mblk_t *mp;
-
- mp = allocb(sizeof (*tudr) + addrlen, BPRI_MED);
- if (mp != NULL) {
- mp->b_wptr += sizeof (*tudr) + addrlen;
- DB_TYPE(mp) = M_PROTO;
-
- tudr = (struct T_unitdata_req *)mp->b_rptr;
- tudr->PRIM_type = T_UNITDATA_REQ;
- tudr->DEST_length = addrlen;
- tudr->DEST_offset = (t_scalar_t)sizeof (*tudr);
- tudr->OPT_length = 0;
- tudr->OPT_offset = 0;
- bcopy(addr, tudr+1, addrlen);
- }
- return (mp);
-}
-
-/*
- * Entry point for sockfs when udp is in "direct sockfs" mode. This mode
- * is valid when we are directly beneath the stream head, and thus sockfs
- * is able to bypass STREAMS and directly call us, passing along the sockaddr
- * structure without the cumbersome T_UNITDATA_REQ interface. Note that
- * this is done for both connected and non-connected endpoint.
- */
-void
-udp_wput_data(queue_t *q, mblk_t *mp, struct sockaddr *addr, socklen_t addrlen)
-{
- conn_t *connp;
- udp_t *udp;
- udp_stack_t *us;
-
- q = UDP_WR(q);
- connp = Q_TO_CONN(q);
- udp = connp->conn_udp;
- us = udp->udp_us;
-
- /* udpsockfs should only send down M_DATA for this entry point */
- ASSERT(DB_TYPE(mp) == M_DATA);
-
- mutex_enter(&connp->conn_lock);
- UDP_MODE_ASSERTIONS(udp, UDP_ENTER);
-
- if (udp->udp_mode != UDP_MT_HOT) {
- /*
- * We can't enter this conn right away because another
- * thread is currently executing as writer; therefore we
- * need to deposit the message into the squeue to be
- * drained later. If a socket address is present, we
- * need to create a T_UNITDATA_REQ message as placeholder.
- */
- if (addr != NULL && addrlen != 0) {
- mblk_t *tudr_mp = udp_tudr_alloc(addr, addrlen);
-
- if (tudr_mp == NULL) {
- mutex_exit(&connp->conn_lock);
- BUMP_MIB(&udp->udp_mib, udpOutErrors);
- UDP_STAT(us, udp_out_err_tudr);
- freemsg(mp);
- return;
- }
- /* Tag the packet with T_UNITDATA_REQ */
- tudr_mp->b_cont = mp;
- mp = tudr_mp;
- }
- mutex_exit(&connp->conn_lock);
- udp_enter(connp, mp, udp_output_wrapper, SQTAG_UDP_WPUT);
- return;
- }
-
- /* We can execute as reader right away. */
- UDP_READERS_INCREF(udp);
- mutex_exit(&connp->conn_lock);
-
- udp_output(connp, mp, addr, addrlen);
-
- udp_exit(connp);
-}
-
/*
* udp_output_v6():
* Assumes that udp_wput did some sanity checking on the destination
@@ -7338,6 +6597,7 @@ udp_output_v6(conn_t *connp, mblk_t *mp, sin6_t *sin6, int *error)
uint_t hopoptslen = 0;
boolean_t is_ancillary = B_FALSE;
udp_stack_t *us = udp->udp_us;
+ size_t sth_wroff = 0;
*error = 0;
@@ -7366,12 +6626,15 @@ udp_output_v6(conn_t *connp, mblk_t *mp, sin6_t *sin6, int *error)
if (((struct T_unitdata_req *)mp->b_rptr)->OPT_length != 0) {
attrs.udpattr_ipp6 = ipp;
attrs.udpattr_mb = mp;
- if (udp_unitdata_opt_process(q, mp, error, &attrs) < 0)
+ if (udp_unitdata_opt_process(q, mp, error,
+ &attrs) < 0) {
goto done;
+ }
ASSERT(*error == 0);
opt_present = B_TRUE;
}
}
+ rw_enter(&udp->udp_rwlock, RW_READER);
ignore = ipp->ipp_sticky_ignored;
/* mp1 points to the M_DATA mblk carrying the packet */
@@ -7417,6 +6680,7 @@ udp_output_v6(conn_t *connp, mblk_t *mp, sin6_t *sin6, int *error)
char *, "MLP mp(1) lacks SCM_UCRED attr(2) on q(3)",
mblk_t *, mp1, udpattrs_t *, &attrs, queue_t *, q);
*error = ECONNREFUSED;
+ rw_exit(&udp->udp_rwlock);
mutex_exit(&connp->conn_lock);
goto done;
}
@@ -7429,6 +6693,7 @@ udp_output_v6(conn_t *connp, mblk_t *mp, sin6_t *sin6, int *error)
!IN6_ARE_ADDR_EQUAL(&udp->udp_v6lastdst, &ip6_dst) ||
connp->conn_mlp_type != mlptSingle) &&
(*error = udp_update_label_v6(q, mp, &ip6_dst)) != 0) {
+ rw_exit(&udp->udp_rwlock);
mutex_exit(&connp->conn_lock);
goto done;
}
@@ -7596,15 +6861,17 @@ no_options:
ip6h = (ip6_t *)&mp1->b_rptr[-udp_ip_hdr_len];
if (DB_REF(mp1) != 1 || ((unsigned char *)ip6h < DB_BASE(mp1)) ||
!OK_32PTR(ip6h)) {
+
/* Try to get everything in a single mblk next time */
if (udp_ip_hdr_len > udp->udp_max_hdr_len) {
udp->udp_max_hdr_len = udp_ip_hdr_len;
- (void) mi_set_sth_wroff(UDP_RD(q),
- udp->udp_max_hdr_len + us->us_wroff_extra);
+ sth_wroff = udp->udp_max_hdr_len + us->us_wroff_extra;
}
+
mp2 = allocb(udp_ip_hdr_len + us->us_wroff_extra, BPRI_LO);
if (mp2 == NULL) {
*error = ENOMEM;
+ rw_exit(&udp->udp_rwlock);
goto done;
}
mp2->b_wptr = DB_LIM(mp2);
@@ -7801,6 +7068,7 @@ no_options:
ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf,
tipp->ipp_tclass);
}
+ rw_exit(&udp->udp_rwlock);
if (option_exists & IPPF_RTHDR) {
ip6_rthdr_t *rth;
@@ -7902,17 +7170,21 @@ no_options:
mp = NULL;
/* We're done. Pass the packet to IP */
- BUMP_MIB(&udp->udp_mib, udpHCOutDatagrams);
+ BUMP_MIB(&us->us_udp_mib, udpHCOutDatagrams);
ip_output_v6(connp, mp1, q, IP_WPUT);
done:
+ if (sth_wroff != 0) {
+ (void) mi_set_sth_wroff(RD(q),
+ udp->udp_max_hdr_len + us->us_wroff_extra);
+ }
if (hopoptsptr != NULL && !is_ancillary) {
kmem_free(hopoptsptr, hopoptslen);
hopoptsptr = NULL;
}
if (*error != 0) {
ASSERT(mp != NULL);
- BUMP_MIB(&udp->udp_mib, udpOutErrors);
+ BUMP_MIB(&us->us_udp_mib, udpOutErrors);
}
return (mp);
}
@@ -7988,26 +7260,17 @@ udp_wput_other(queue_t *q, mblk_t *mp)
"udp_wput_other_end: q %p (%S)", q, "unbindreq");
return;
case T_SVR4_OPTMGMT_REQ:
- if (!snmpcom_req(q, mp, udp_snmp_set, udp_snmp_get, cr))
- /*
- * Use upper queue for option processing in
- * case the request is not handled at this
- * level and needs to be passed down to IP.
- */
- (void) svr4_optcom_req(_WR(UDP_RD(q)),
- mp, cr, &udp_opt_obj);
+ if (!snmpcom_req(q, mp, udp_snmp_set, ip_snmp_get,
+ cr)) {
+ (void) svr4_optcom_req(q,
+ mp, cr, &udp_opt_obj, B_TRUE);
+ }
TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END,
"udp_wput_other_end: q %p (%S)", q, "optmgmtreq");
return;
case T_OPTMGMT_REQ:
- /*
- * Use upper queue for option processing in
- * case the request is not handled at this
- * level and needs to be passed down to IP.
- */
- (void) tpi_optcom_req(_WR(UDP_RD(q)),
- mp, cr, &udp_opt_obj);
+ (void) tpi_optcom_req(q, mp, cr, &udp_opt_obj, B_TRUE);
TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END,
"udp_wput_other_end: q %p (%S)", q, "optmgmtreq");
return;
@@ -8057,7 +7320,7 @@ udp_wput_other(queue_t *q, mblk_t *mp)
iocp->ioc_error = ENOTCONN;
iocp->ioc_count = 0;
mp->b_datap->db_type = M_IOCACK;
- putnext(UDP_RD(q), mp);
+ qreply(q, mp);
TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END,
"udp_wput_other_end: q %p (%S)", q,
"getpeername");
@@ -8081,7 +7344,7 @@ udp_wput_other(queue_t *q, mblk_t *mp)
/* nd_getset performs the necessary checking */
case ND_GET:
if (nd_getset(q, us->us_nd, mp)) {
- putnext(UDP_RD(q), mp);
+ qreply(q, mp);
TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END,
"udp_wput_other_end: q %p (%S)", q, "get");
return;
@@ -8107,7 +7370,7 @@ udp_wput_other(queue_t *q, mblk_t *mp)
* stream interface and drain any
* queued data.
*/
- udp_rcv_drain(UDP_RD(q), udp,
+ udp_rcv_drain(RD(q), udp,
B_FALSE);
ASSERT(!udp->udp_direct_sockfs);
UDP_STAT(us, udp_sock_fallback);
@@ -8117,7 +7380,7 @@ udp_wput_other(queue_t *q, mblk_t *mp)
}
iocp->ioc_count = 0;
iocp->ioc_rval = 0;
- putnext(UDP_RD(q), mp);
+ qreply(q, mp);
return;
default:
break;
@@ -8137,14 +7400,6 @@ udp_wput_other(queue_t *q, mblk_t *mp)
ip_output(connp, mp, q, IP_WPUT);
}
-/* ARGSUSED */
-static void
-udp_wput_other_wrapper(void *arg, mblk_t *mp, void *arg2)
-{
- udp_wput_other(((conn_t *)arg)->conn_wq, mp);
- udp_exit((conn_t *)arg);
-}
-
/*
* udp_wput_iocdata is called by udp_wput_other to handle all M_IOCDATA
* messages.
@@ -8171,7 +7426,6 @@ udp_wput_iocdata(queue_t *q, mblk_t *mp)
return;
}
- q = WR(UDP_RD(q));
switch (mi_copy_state(q, mp, &mp1)) {
case -1:
return;
@@ -8317,11 +7571,7 @@ udp_unitdata_opt_process(queue_t *q, mblk_t *mp, int *errorp,
udreqp = (struct T_unitdata_req *)mp->b_rptr;
- /*
- * Use upper queue for option processing since the callback
- * routines expect to be called in UDP instance instead of IP.
- */
- *errorp = tpi_optcom_buf(_WR(UDP_RD(q)), mp, &udreqp->OPT_length,
+ *errorp = tpi_optcom_buf(q, mp, &udreqp->OPT_length,
udreqp->OPT_offset, cr, &udp_opt_obj,
udpattrs, &is_absreq_failure);
@@ -8339,13 +7589,9 @@ udp_unitdata_opt_process(queue_t *q, mblk_t *mp, int *errorp,
void
udp_ddi_init(void)
{
- UDP6_MAJ = ddi_name_to_major(UDP6);
udp_max_optsize = optcom_max_optsize(udp_opt_obj.odb_opt_des_arr,
udp_opt_obj.odb_opt_arr_cnt);
- udp_cache = kmem_cache_create("udp_cache", sizeof (udp_t),
- CACHE_ALIGN_SIZE, NULL, NULL, NULL, NULL, NULL, 0);
-
/*
* We want to be informed each time a stack is created or
* destroyed in the kernel, so we can maintain the
@@ -8358,8 +7604,6 @@ void
udp_ddi_destroy(void)
{
netstack_unregister(NS_UDP);
-
- kmem_cache_destroy(udp_cache);
}
/*
@@ -8584,17 +7828,6 @@ udp_kstat_update(kstat_t *kp, int rw)
return (0);
}
-/* ARGSUSED */
-static void
-udp_rput(queue_t *q, mblk_t *mp)
-{
- /*
- * We get here whenever we do qreply() from IP,
- * i.e as part of handlings ioctls, etc.
- */
- putnext(q, mp);
-}
-
/*
* Read-side synchronous stream info entry point, called as a
* result of handling certain STREAMS ioctl operations.
@@ -8606,7 +7839,7 @@ udp_rinfop(queue_t *q, infod_t *dp)
uint_t cmd = dp->d_cmd;
int res = 0;
int error = 0;
- udp_t *udp = Q_TO_UDP(RD(UDP_WR(q)));
+ udp_t *udp = Q_TO_UDP(q);
struct stdata *stp = STREAM(q);
mutex_enter(&udp->udp_drain_lock);
@@ -8681,12 +7914,9 @@ static int
udp_rrw(queue_t *q, struiod_t *dp)
{
mblk_t *mp;
- udp_t *udp = Q_TO_UDP(_RD(UDP_WR(q)));
+ udp_t *udp = Q_TO_UDP(q);
udp_stack_t *us = udp->udp_us;
- /* We should never get here when we're in SNMP mode */
- ASSERT(!(udp->udp_connp->conn_flags & IPCL_UDPMOD));
-
/*
* Dequeue datagram from the head of the list and return
* it to caller; also ensure that RSLEEP sd_wakeq flag is
@@ -8850,3 +8080,40 @@ udp_set_rcv_hiwat(udp_t *udp, size_t size)
udp->udp_rcv_hiwat = size;
return (size);
}
+
+/*
+ * For the lower queue so that UDP can be a dummy mux.
+ * Nobody should be sending
+ * packets up this stream
+ */
+static void
+udp_lrput(queue_t *q, mblk_t *mp)
+{
+ mblk_t *mp1;
+
+ switch (mp->b_datap->db_type) {
+ case M_FLUSH:
+ /* Turn around */
+ if (*mp->b_rptr & FLUSHW) {
+ *mp->b_rptr &= ~FLUSHR;
+ qreply(q, mp);
+ return;
+ }
+ break;
+ }
+ /* Could receive messages that passed through ar_rput */
+ for (mp1 = mp; mp1; mp1 = mp1->b_cont)
+ mp1->b_prev = mp1->b_next = NULL;
+ freemsg(mp);
+}
+
+/*
+ * For the lower queue so that UDP can be a dummy mux.
+ * Nobody should be sending packets down this stream.
+ */
+/* ARGSUSED */
+void
+udp_lwput(queue_t *q, mblk_t *mp)
+{
+ freemsg(mp);
+}