summaryrefslogtreecommitdiff
path: root/usr/src/uts/common/inet/ip/rts.c
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src/uts/common/inet/ip/rts.c')
-rw-r--r--usr/src/uts/common/inet/ip/rts.c297
1 files changed, 150 insertions, 147 deletions
diff --git a/usr/src/uts/common/inet/ip/rts.c b/usr/src/uts/common/inet/ip/rts.c
index f9c37a5421..4b7edc1577 100644
--- a/usr/src/uts/common/inet/ip/rts.c
+++ b/usr/src/uts/common/inet/ip/rts.c
@@ -48,6 +48,7 @@
#include <inet/common.h>
#include <netinet/ip6.h>
#include <inet/ip.h>
+#include <inet/ipclassifier.h>
#include <inet/mi.h>
#include <inet/nd.h>
#include <inet/optcom.h>
@@ -55,6 +56,9 @@
#include <sys/isa_defs.h>
#include <net/route.h>
+#include <inet/rts_impl.h>
+#include <inet/ip_rts.h>
+
/*
* This is a transport provider for routing sockets. Downstream messages are
* wrapped with a IP_IOCTL header, and ip_wput_ioctl calls the appropriate entry
@@ -75,49 +79,6 @@
* the dirty work is done down in ip.
*/
-/*
- * RTS stack instances
- */
-struct rts_stack {
- netstack_t *rtss_netstack; /* Common netstack */
-
- caddr_t rtss_g_nd;
- struct rtsparam_s *rtss_params;
-};
-typedef struct rts_stack rts_stack_t;
-
-/*
- * Object to represent database of options to search passed to
- * {sock,tpi}optcom_req() interface routine to take care of option
- * management and associated methods.
- * XXX. These and other externs should really move to a rts header.
- */
-extern optdb_obj_t rts_opt_obj;
-extern uint_t rts_max_optsize;
-
-/* Internal routing socket stream control structure, one per open stream */
-typedef struct rts_s {
- cred_t *rts_credp; /* Opener's credentials */
- uint_t rts_state; /* Provider interface state */
- uint_t rts_error; /* Routing socket error code */
- uint_t rts_flag; /* Pending I/O state */
- uint_t rts_proto; /* SO_PROTOTYPE "socket" option. */
- uint_t rts_debug : 1, /* SO_DEBUG "socket" option. */
- rts_dontroute : 1, /* SO_DONTROUTE "socket" option. */
- rts_broadcast : 1, /* SO_BROADCAST "socket" option. */
- rts_reuseaddr : 1, /* SO_REUSEADDR "socket" option. */
- rts_useloopback : 1, /* SO_USELOOPBACK "socket" option. */
- rts_multicast_loop : 1, /* IP_MULTICAST_LOOP option */
- rts_hdrincl : 1, /* IP_HDRINCL option + RAW and IGMP */
-
- : 0;
- rts_stack_t *rts_rtss;
-} rts_t;
-
-#define RTS_WPUT_PENDING 0x1 /* Waiting for write-side to complete */
-#define RTS_WRW_PENDING 0x2 /* Routing socket write in progress */
-#define RTS_OPEN_PENDING 0x4 /* Routing socket open in progress */
-
/* Default structure copied into T_INFO_ACK messages */
static struct T_info_ack rts_g_t_info_ack = {
T_INFO_ACK,
@@ -133,14 +94,6 @@ static struct T_info_ack rts_g_t_info_ack = {
(XPG4_1) /* PROVIDER_flag */
};
-/* Named Dispatch Parameter Management Structure */
-typedef struct rtsparam_s {
- uint_t rts_param_min;
- uint_t rts_param_max;
- uint_t rts_param_value;
- char *rts_param_name;
-} rtsparam_t;
-
/*
* Table of ND variables supported by rts. These are loaded into rts_g_nd
* in rts_open.
@@ -156,11 +109,12 @@ static rtsparam_t lcl_param_arr[] = {
#define rtss_xmit_hiwat rtss_params[0].rts_param_value
#define rtss_xmit_lowat rtss_params[1].rts_param_value
#define rtss_recv_hiwat rtss_params[2].rts_param_value
-#define rtss_max_buf rtss_params[3].rts_param_value
+#define rtss_max_buf rtss_params[3].rts_param_value
static int rts_close(queue_t *q);
static void rts_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error,
int sys_error);
+static void rts_input(void *, mblk_t *, void *);
static mblk_t *rts_ioctl_alloc(mblk_t *data, cred_t *cr);
static int rts_open(queue_t *q, dev_t *devp, int flag, int sflag,
cred_t *credp);
@@ -171,12 +125,11 @@ int rts_opt_get(queue_t *q, t_scalar_t level, t_scalar_t name,
int rts_opt_set(queue_t *q, uint_t optset_context, int level,
int name, uint_t inlen, uchar_t *invalp, uint_t *outlenp,
uchar_t *outvalp, void *thisdg_attrs, cred_t *cr, mblk_t *mblk);
-static void rts_param_cleanup(IDP *ndp);
static int rts_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr);
static boolean_t rts_param_register(IDP *ndp, rtsparam_t *rtspa, int cnt);
static int rts_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp,
cred_t *cr);
-static void rts_rput(queue_t *q, mblk_t *mp);
+static void rts_rsrv(queue_t *q);
static void *rts_stack_init(netstackid_t stackid, netstack_t *ns);
static void rts_stack_fini(netstackid_t stackid, void *arg);
static void rts_wput(queue_t *q, mblk_t *mp);
@@ -184,21 +137,21 @@ static void rts_wput_iocdata(queue_t *q, mblk_t *mp);
static void rts_wput_other(queue_t *q, mblk_t *mp);
static int rts_wrw(queue_t *q, struiod_t *dp);
-static struct module_info info = {
+static struct module_info rts_mod_info = {
129, "rts", 1, INFPSZ, 512, 128
};
-static struct qinit rinit = {
- (pfi_t)rts_rput, NULL, rts_open, rts_close, NULL, &info
+static struct qinit rtsrinit = {
+ NULL, (pfi_t)rts_rsrv, rts_open, rts_close, NULL, &rts_mod_info
};
-static struct qinit winit = {
- (pfi_t)rts_wput, NULL, NULL, NULL, NULL, &info,
+static struct qinit rtswinit = {
+ (pfi_t)rts_wput, NULL, NULL, NULL, NULL, &rts_mod_info,
NULL, (pfi_t)rts_wrw, NULL, STRUIOT_STANDARD
};
struct streamtab rtsinfo = {
- &rinit, &winit
+ &rtsrinit, &rtswinit
};
/*
@@ -250,29 +203,49 @@ rts_ioctl_alloc(mblk_t *data, cred_t *cr)
static int
rts_close(queue_t *q)
{
- rts_t *rts = (rts_t *)q->q_ptr;
+ conn_t *connp = Q_TO_CONN(q);
+
+ ASSERT(connp != NULL && IPCL_IS_RTS(connp));
+
+ ip_rts_unregister(connp);
+
+ ip_quiesce_conn(connp);
qprocsoff(q);
- crfree(rts->rts_credp);
- netstack_rele(rts->rts_rtss->rtss_netstack);
+ /*
+ * Now we are truly single threaded on this stream, and can
+ * delete the things hanging off the connp, and finally the connp.
+ * We removed this connp from the fanout list, it cannot be
+ * accessed thru the fanouts, and we already waited for the
+ * conn_ref to drop to 0. We are already in close, so
+ * there cannot be any other thread from the top. qprocsoff
+ * has completed, and service has completed or won't run in
+ * future.
+ */
+ ASSERT(connp->conn_ref == 1);
+
+ inet_minor_free(ip_minor_arena, connp->conn_dev);
+
+ connp->conn_ref--;
+ ipcl_conn_destroy(connp);
- mi_free(q->q_ptr);
+ q->q_ptr = WR(q)->q_ptr = NULL;
return (0);
}
/*
* This is the open routine for routing socket. It allocates
- * rts_t structure for the stream and sends an IOCTL to
- * the down module to indicate that it is a routing socket
- * stream.
+ * rts_t structure for the stream and tells IP that it is a routing socket.
*/
/* ARGSUSED */
static int
rts_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
{
- mblk_t *mp = NULL;
rts_t *rts;
+ conn_t *connp;
+ dev_t conn_dev;
+ zoneid_t zoneid;
netstack_t *ns;
rts_stack_t *rtss;
@@ -280,8 +253,7 @@ rts_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
if (q->q_ptr != NULL)
return (0);
- /* If this is not a push of rts as a module, fail. */
- if (sflag != MODOPEN)
+ if (sflag == MODOPEN)
return (EINVAL);
ns = netstack_find_by_cred(credp);
@@ -289,56 +261,76 @@ rts_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
rtss = ns->netstack_rts;
ASSERT(rtss != NULL);
- q->q_ptr = mi_zalloc_sleep(sizeof (rts_t));
- WR(q)->q_ptr = q->q_ptr;
- rts = (rts_t *)q->q_ptr;
+ /*
+ * For exclusive stacks we set the zoneid to zero
+ * to make RTS operate as if in the global zone.
+ */
+ if (ns->netstack_stackid != GLOBAL_NETSTACKID)
+ zoneid = GLOBAL_ZONEID;
+ else
+ zoneid = crgetzoneid(credp);
+
+ if ((conn_dev = inet_minor_alloc(ip_minor_arena)) == 0) {
+ netstack_rele(ns);
+ return (EBUSY);
+ }
+ *devp = makedevice(getemajor(*devp), (minor_t)conn_dev);
- rts->rts_rtss = rtss;
+ connp = ipcl_conn_create(IPCL_RTSCONN, KM_SLEEP, ns);
+ connp->conn_dev = conn_dev;
+ rts = connp->conn_rts;
- rts->rts_credp = credp;
- crhold(credp);
/*
- * The receive hiwat is only looked at on the stream head queue.
- * Store in q_hiwat in order to return on SO_RCVBUF getsockopts.
+ * ipcl_conn_create did a netstack_hold. Undo the hold that was
+ * done by netstack_find_by_cred()
*/
- q->q_hiwat = rtss->rtss_recv_hiwat;
+ netstack_rele(ns);
+
/*
- * The transmit hiwat/lowat is only looked at on IP's queue.
- * Store in q_hiwat/q_lowat in order to return on SO_SNDBUF/SO_SNDLOWAT
- * getsockopts.
+ * Initialize the rts_t structure for this stream.
*/
+ q->q_ptr = connp;
+ WR(q)->q_ptr = connp;
+ connp->conn_rq = q;
+ connp->conn_wq = WR(q);
+
+ rw_enter(&rts->rts_rwlock, RW_WRITER);
+ ASSERT(connp->conn_rts == rts);
+ ASSERT(rts->rts_connp == connp);
+
+ /* Set the initial state of the stream and the privilege status. */
+ rts->rts_state = TS_UNBND;
+ connp->conn_zoneid = zoneid;
+
+ connp->conn_ulp_labeled = is_system_labeled();
+
+ rts->rts_rtss = rtss;
+
+ q->q_hiwat = rtss->rtss_recv_hiwat;
WR(q)->q_hiwat = rtss->rtss_xmit_hiwat;
WR(q)->q_lowat = rtss->rtss_xmit_lowat;
+
+ connp->conn_recv = rts_input;
+ crhold(credp);
+ connp->conn_cred = credp;
+
+ mutex_enter(&connp->conn_lock);
+ connp->conn_state_flags &= ~CONN_INCIPIENT;
+ mutex_exit(&connp->conn_lock);
+
qprocson(q);
+ rw_exit(&rts->rts_rwlock);
+
/*
* Indicate the down IP module that this is a routing socket
* client by sending an RTS IOCTL without any user data. Although
* this is just a notification message (without any real routing
* request), we pass in any credential for correctness sake.
*/
- mp = rts_ioctl_alloc(NULL, credp);
- if (mp == NULL) {
- qprocsoff(q);
- ASSERT(q->q_ptr != NULL);
- netstack_rele(rtss->rtss_netstack);
- mi_free(q->q_ptr);
- crfree(credp);
- return (ENOMEM);
- }
- rts->rts_flag |= RTS_OPEN_PENDING;
- putnext(WR(q), mp);
- while (rts->rts_flag & RTS_OPEN_PENDING) {
- if (!qwait_sig(q)) {
- (void) rts_close(q);
- return (EINTR);
- }
- }
- if (rts->rts_error != 0) {
- (void) rts_close(q);
- return (ENOTSUP);
- }
- rts->rts_state = TS_UNBND;
+ ip_rts_register(connp);
+
return (0);
+
}
/*
@@ -363,14 +355,13 @@ rts_ok_ack(queue_t *q, mblk_t *mp)
/*
* This routine is called by rts_wput to handle T_UNBIND_REQ messages.
- * After some error checking, the message is passed downstream to ip.
*/
static void
rts_unbind(queue_t *q, mblk_t *mp)
{
- rts_t *rts;
+ conn_t *connp = Q_TO_CONN(q);
+ rts_t *rts = connp->conn_rts;
- rts = (rts_t *)q->q_ptr;
/* If a bind has not been done, we can't unbind. */
if (rts->rts_state != TS_IDLE) {
rts_err_ack(q, mp, TOUTSTATE, 0);
@@ -389,11 +380,11 @@ rts_unbind(queue_t *q, mblk_t *mp)
static void
rts_bind(queue_t *q, mblk_t *mp)
{
+ conn_t *connp = Q_TO_CONN(q);
+ rts_t *rts = connp->conn_rts;
mblk_t *mp1;
struct T_bind_req *tbr;
- rts_t *rts;
- rts = (rts_t *)q->q_ptr;
if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) {
(void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
"rts_bind: bad data, %d", rts->rts_state);
@@ -448,14 +439,15 @@ rts_copy_info(struct T_info_ack *tap, rts_t *rts)
static void
rts_capability_req(queue_t *q, mblk_t *mp)
{
- rts_t *rts = (rts_t *)q->q_ptr;
+ conn_t *connp = Q_TO_CONN(q);
+ rts_t *rts = connp->conn_rts;
t_uscalar_t cap_bits1;
struct T_capability_ack *tcap;
cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1;
mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack),
- mp->b_datap->db_type, T_CAPABILITY_ACK);
+ mp->b_datap->db_type, T_CAPABILITY_ACK);
if (mp == NULL)
return;
@@ -478,7 +470,8 @@ rts_capability_req(queue_t *q, mblk_t *mp)
static void
rts_info_req(queue_t *q, mblk_t *mp)
{
- rts_t *rts = (rts_t *)q->q_ptr;
+ conn_t *connp = Q_TO_CONN(q);
+ rts_t *rts = connp->conn_rts;
mp = tpi_ack_alloc(mp, sizeof (rts_g_t_info_ack), M_PCPROTO,
T_INFO_ACK);
@@ -508,7 +501,8 @@ int
rts_opt_get(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr)
{
int *i1 = (int *)ptr;
- rts_t *rts = (rts_t *)q->q_ptr;
+ conn_t *connp = Q_TO_CONN(q);
+ rts_t *rts = connp->conn_rts;
switch (level) {
case SOL_SOCKET:
@@ -575,7 +569,8 @@ rts_opt_set(queue_t *q, uint_t optset_context, int level,
uchar_t *outvalp, void *thisdg_attrs, cred_t *cr, mblk_t *mblk)
{
int *i1 = (int *)invalp;
- rts_t *rts = (rts_t *)q->q_ptr;
+ conn_t *connp = Q_TO_CONN(q);
+ rts_t *rts = connp->conn_rts;
boolean_t checkonly;
rts_stack_t *rtss = rts->rts_rtss;
@@ -681,7 +676,6 @@ rts_opt_set(queue_t *q, uint_t optset_context, int level,
}
if (!checkonly) {
q->q_hiwat = *i1;
- q->q_next->q_hiwat = *i1;
}
break; /* goto sizeof (int) option return */
case SO_RCVBUF:
@@ -712,16 +706,6 @@ rts_opt_set(queue_t *q, uint_t optset_context, int level,
}
/*
- * This routine frees the ND table if all streams have been closed.
- * It is called by rts_close and rts_open.
- */
-static void
-rts_param_cleanup(IDP *ndp)
-{
- nd_free(ndp);
-}
-
-/*
* This routine retrieves the value of an ND variable in a rtsparam_t
* structure. It is called through nd_getset when a user reads the
* variable.
@@ -779,6 +763,16 @@ rts_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr)
}
/*
+ * Empty rsrv routine which is used by rts_input to cause a wakeup
+ * of a thread in qwait.
+ */
+/*ARGSUSED*/
+static void
+rts_rsrv(queue_t *q)
+{
+}
+
+/*
* This routine handles synchronous messages passed downstream. It either
* consumes the message or passes it downstream; it never queues a
* a message. The data messages that go down are wrapped in an IOCTL
@@ -796,9 +790,9 @@ rts_wrw(queue_t *q, struiod_t *dp)
mblk_t *mp1;
int error;
rt_msghdr_t *rtm;
- rts_t *rts;
+ conn_t *connp = Q_TO_CONN(q);
+ rts_t *rts = connp->conn_rts;
- rts = (rts_t *)q->q_ptr;
while (rts->rts_flag & RTS_WRW_PENDING) {
if (qwait_rw(q)) {
rts->rts_error = EINTR;
@@ -872,12 +866,18 @@ err_ret:
* consumes the message or passes it downstream; it never queues a
* a message. The data messages that go down are wrapped in an IOCTL
* message.
+ *
+ * FIXME? Should we call IP rts_request directly? Could punt on returning
+ * errno in the case when it defers processing due to
+ * IPIF_CHANGING/ILL_CHANGING???
*/
static void
rts_wput(queue_t *q, mblk_t *mp)
{
uchar_t *rptr = mp->b_rptr;
mblk_t *mp1;
+ conn_t *connp = Q_TO_CONN(q);
+ rts_t *rts = connp->conn_rts;
switch (mp->b_datap->db_type) {
case M_DATA:
@@ -904,8 +904,6 @@ rts_wput(queue_t *q, mblk_t *mp)
mp1 = rts_ioctl_alloc(mp, DB_CRED(mp));
if (mp1 == NULL) {
- rts_t *rts = (rts_t *)q->q_ptr;
-
ASSERT(rts != NULL);
freemsg(mp);
if (rts->rts_flag & RTS_WPUT_PENDING) {
@@ -914,7 +912,7 @@ rts_wput(queue_t *q, mblk_t *mp)
}
return;
}
- putnext(q, mp1);
+ ip_output(connp, mp1, q, IP_WPUT);
}
@@ -926,16 +924,16 @@ rts_wput(queue_t *q, mblk_t *mp)
static void
rts_wput_other(queue_t *q, mblk_t *mp)
{
+ conn_t *connp = Q_TO_CONN(q);
+ rts_t *rts = connp->conn_rts;
uchar_t *rptr = mp->b_rptr;
- rts_t *rts;
struct iocblk *iocp;
cred_t *cr;
rts_stack_t *rtss;
- rts = (rts_t *)q->q_ptr;
rtss = rts->rts_rtss;
- cr = DB_CREDDEF(mp, rts->rts_credp);
+ cr = DB_CREDDEF(mp, connp->conn_cred);
switch (mp->b_datap->db_type) {
case M_PROTO:
@@ -963,10 +961,11 @@ rts_wput_other(queue_t *q, mblk_t *mp)
rts_info_req(q, mp);
return;
case T_SVR4_OPTMGMT_REQ:
- (void) svr4_optcom_req(q, mp, cr, &rts_opt_obj);
+ (void) svr4_optcom_req(q, mp, cr, &rts_opt_obj,
+ B_FALSE);
return;
case T_OPTMGMT_REQ:
- (void) tpi_optcom_req(q, mp, cr, &rts_opt_obj);
+ (void) tpi_optcom_req(q, mp, cr, &rts_opt_obj, B_FALSE);
return;
case O_T_CONN_RES:
case T_CONN_RES:
@@ -1008,7 +1007,7 @@ rts_wput_other(queue_t *q, mblk_t *mp)
default:
break;
}
- putnext(q, mp);
+ ip_output(connp, mp, q, IP_WPUT);
}
/*
@@ -1017,6 +1016,7 @@ rts_wput_other(queue_t *q, mblk_t *mp)
static void
rts_wput_iocdata(queue_t *q, mblk_t *mp)
{
+ conn_t *connp = Q_TO_CONN(q);
struct sockaddr *rtsaddr;
mblk_t *mp1;
STRUCT_HANDLE(strbuf, sb);
@@ -1027,7 +1027,7 @@ rts_wput_iocdata(queue_t *q, mblk_t *mp)
case TI_GETPEERNAME:
break;
default:
- putnext(q, mp);
+ ip_output(connp, mp, q, IP_WPUT);
return;
}
switch (mi_copy_state(q, mp, &mp1)) {
@@ -1072,25 +1072,28 @@ rts_wput_iocdata(queue_t *q, mblk_t *mp)
mi_copyout(q, mp);
}
+/*ARGSUSED2*/
static void
-rts_rput(queue_t *q, mblk_t *mp)
+rts_input(void *arg1, mblk_t *mp, void *arg2)
{
- rts_t *rts;
+ conn_t *connp = (conn_t *)arg1;
+ rts_t *rts = connp->conn_rts;
struct iocblk *iocp;
mblk_t *mp1;
struct T_data_ind *tdi;
- rts = (rts_t *)q->q_ptr;
switch (mp->b_datap->db_type) {
case M_IOCACK:
case M_IOCNAK:
iocp = (struct iocblk *)mp->b_rptr;
- if (rts->rts_flag & (RTS_WPUT_PENDING|RTS_OPEN_PENDING)) {
- if (rts->rts_flag & RTS_WPUT_PENDING)
- rts->rts_flag &= ~RTS_WPUT_PENDING;
- else
- rts->rts_flag &= ~RTS_OPEN_PENDING;
+ if (rts->rts_flag & (RTS_WPUT_PENDING)) {
+ rts->rts_flag &= ~RTS_WPUT_PENDING;
rts->rts_error = iocp->ioc_error;
+ /*
+ * Tell rts_wvw/qwait that we are done.
+ * Note: there is no qwait_wakeup() we can use.
+ */
+ qenable(connp->conn_rq);
freemsg(mp);
return;
}
@@ -1116,7 +1119,7 @@ rts_rput(queue_t *q, mblk_t *mp)
default:
break;
}
- putnext(q, mp);
+ putnext(connp->conn_rq, mp);
}
@@ -1171,7 +1174,7 @@ rts_stack_fini(netstackid_t stackid, void *arg)
{
rts_stack_t *rtss = (rts_stack_t *)arg;
- rts_param_cleanup(&rtss->rtss_g_nd);
+ nd_free(&rtss->rtss_g_nd);
kmem_free(rtss->rtss_params, sizeof (lcl_param_arr));
rtss->rtss_params = NULL;
kmem_free(rtss, sizeof (*rtss));