/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License, Version 1.0 only * (the "License"). You may not use this file except in compliance * with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2005 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #pragma ident "%Z%%M% %I% %E% SMI" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define _SUN_TPI_VERSION 2 #include #include #include #include #include int nca_sendfilev(file_t *, struct sendfilevec *, int, ssize_t *); /* NCAfs vnode operations */ int socknca_read(struct vnode *, struct uio *, int, struct cred *, struct caller_context *); int socknca_write(struct vnode *, struct uio *, int, struct cred *, struct caller_context *); int nca_poll(struct vnode *, short, int, short *, struct pollhead **); int socknca_close(struct vnode *, int, int, offset_t, struct cred *); void socknca_inactive(struct vnode *, struct cred *); /* NCAfs sonode operations */ static int sonca_bind(struct sonode *, struct sockaddr *, socklen_t, int); static int sonca_listen(struct sonode *, int); static int sonca_connect(struct sonode *, const struct sockaddr *, socklen_t, int, int); static int sonca_accept(struct sonode *, int, struct sonode **); static int sonca_sendmsg(struct sonode *, struct nmsghdr *, struct uio *); static int sonca_shutdown(struct sonode *, int); static int sonca_getsockname(struct sonode *so); static struct kmem_cache *ncafs_cache; #ifdef DEBUG int nca_sendfilev_debug = 0; #endif typedef struct ncafs_priv { mblk_t *iop_mp; mblk_t *req_mp; uchar_t *req_ptr; int req_size; size_t iop_dataleft; int iop_more; } ncafs_priv_t; typedef struct so_ncafs { struct sonode so; ncafs_priv_t so_priv; } so_ncafs_t; static sonodeops_t ncafs_sonodeops = { sonca_accept, /* sop_accept */ sonca_bind, /* sop_bind */ sonca_listen, /* sop_listen */ sonca_connect, /* sop_connect */ sotpi_recvmsg, /* sop_recvmsg */ sonca_sendmsg, /* sop_sendmsg */ sotpi_getpeername, /* sop_getpeername */ sonca_getsockname, /* sop_getsockname */ sonca_shutdown, /* sop_shutdown */ sotpi_getsockopt, /* sop_getsockopt */ sotpi_setsockopt /* sop_setsockopt */ }; /*ARGSUSED*/ static int nca_constructor(void *buf, void *cdrarg, int kmflags) { struct sonode *so = buf; struct vnode *vp; so->so_oobmsg = NULL; so->so_ack_mp = NULL; so->so_conn_ind_head = NULL; so->so_conn_ind_tail = NULL; so->so_discon_ind_mp = NULL; so->so_ux_bound_vp = NULL; so->so_unbind_mp = NULL; so->so_accessvp = NULL; so->so_laddr_sa = NULL; so->so_faddr_sa = NULL; so->so_ops = &ncafs_sonodeops; vp = vn_alloc(KM_SLEEP); so->so_vnode = vp; (void) vn_setops(vp, socknca_vnodeops); vp->v_data = (caddr_t)so; mutex_init(&so->so_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&so->so_plumb_lock, NULL, MUTEX_DEFAULT, NULL); cv_init(&so->so_state_cv, NULL, CV_DEFAULT, NULL); cv_init(&so->so_ack_cv, NULL, CV_DEFAULT, NULL); cv_init(&so->so_connind_cv, NULL, CV_DEFAULT, NULL); cv_init(&so->so_want_cv, NULL, CV_DEFAULT, NULL); return (0); } /*ARGSUSED1*/ static void nca_destructor(void *buf, void *cdrarg) { struct sonode *so = buf; struct vnode *vp = SOTOV(so); ASSERT(so->so_oobmsg == NULL); ASSERT(so->so_ack_mp == NULL); ASSERT(so->so_conn_ind_head == NULL); ASSERT(so->so_conn_ind_tail == NULL); ASSERT(so->so_discon_ind_mp == NULL); ASSERT(so->so_ux_bound_vp == NULL); ASSERT(so->so_unbind_mp == NULL); ASSERT(so->so_ops == &ncafs_sonodeops); ASSERT(vn_matchops(vp, socknca_vnodeops)); ASSERT(vp->v_data == (caddr_t)so); vn_free(vp); mutex_destroy(&so->so_lock); mutex_destroy(&so->so_plumb_lock); cv_destroy(&so->so_state_cv); cv_destroy(&so->so_ack_cv); cv_destroy(&so->so_connind_cv); cv_destroy(&so->so_want_cv); } void sonca_init(void) { ncafs_cache = kmem_cache_create("ncafs_cache", sizeof (so_ncafs_t), 0, nca_constructor, nca_destructor, NULL, NULL, NULL, 0); } /* ARGSUSED */ void socknca_inactive(struct vnode *vp, struct cred *cr) { struct sonode *so = VTOSO(vp); mblk_t *mp; ASSERT(so->so_family == AF_NCA); mutex_enter(&vp->v_lock); /* * If no one has reclaimed the vnode, remove from the * cache now. */ if (vp->v_count < 1) cmn_err(CE_PANIC, "socknca_inactive: Bad v_count"); /* * Drop the temporary hold by vn_rele now */ if (--vp->v_count != 0) { mutex_exit(&vp->v_lock); return; } mutex_exit(&vp->v_lock); /* We are the sole owner of so now */ ASSERT(!vn_has_cached_data(vp)); ASSERT(so->so_count == 0); ASSERT(so->so_accessvp); ASSERT(so->so_discon_ind_mp == NULL); vn_invalid(vp); vp = so->so_accessvp; VN_RELE(vp); mutex_enter(&so->so_lock); so->so_accessvp = NULL; mutex_exit(&so->so_lock); if ((mp = so->so_conn_ind_head) != NULL) { mblk_t *mp1; while (mp != NULL) { mp1 = mp->b_next; mp->b_next = NULL; freemsg(mp); mp = mp1; } so->so_conn_ind_head = so->so_conn_ind_tail = NULL; so->so_state &= ~SS_HASCONNIND; } if (so->so_laddr_sa != NULL) { ASSERT((caddr_t)so->so_faddr_sa == (caddr_t)so->so_laddr_sa + so->so_addr_size); ASSERT(so->so_faddr_maxlen == so->so_laddr_maxlen); so->so_state &= ~(SS_LADDR_VALID | SS_FADDR_VALID); kmem_free(so->so_laddr_sa, so->so_laddr_maxlen * 2); so->so_laddr_sa = NULL; so->so_laddr_len = so->so_laddr_maxlen = 0; so->so_faddr_sa = NULL; so->so_faddr_len = so->so_faddr_maxlen = 0; } if (so->so_peercred != NULL) crfree(so->so_peercred); kmem_cache_free(ncafs_cache, (so_ncafs_t *)so); } static int nca_check(vnode_t *vp) { struct strioctl strioc; int error; /* Check if NCA is enabled */ strioc.ic_cmd = NCA_READY; strioc.ic_timout = 0; strioc.ic_len = 0; strioc.ic_dp = (char *)NULL; error = strdoioctl(vp->v_stream, &strioc, FNATIVE, K_TO_K, CRED(), &error); return (error); } /* * Open stream and send down ioctl to make sure nca is supported. * Ensure that vnode is unchanged if check succeeds. */ static int nca_open_and_check(vnode_t *vp) { struct sonode *so; major_t maj; dev_t new_dev; int error; new_dev = vp->v_rdev; maj = getmajor(new_dev); ASSERT(maj < devcnt); ASSERT(STREAMSTAB(maj)); if ((error = stropen(vp, &new_dev, FREAD|FWRITE, CRED())) != 0) { VN_RELE(vp); return (error); } so = VTOSO(vp); so->so_count++; /* release hold in clone_open() */ if (so->so_flag & SOCLONE) { ASSERT(new_dev != vp->v_rdev); /* dev_t must be cloned */ ddi_rele_driver(getmajor(new_dev)); } if ((error = nca_check(vp)) != 0) { (void) strclose(vp, FREAD|FWRITE, CRED()); vp->v_stream = NULL; return (EPROTONOSUPPORT); } if (vp->v_stream != NULL) { strclean(vp); (void) strclose(vp, FREAD|FWRITE, CRED()); vp->v_stream = NULL; so->so_count--; } return (0); } /* ARGSUSED */ struct sonode * sonca_create(vnode_t *accessvp, int domain, int type, int protocol, int version, struct sonode *tso, int *errorp) { struct sonode *so; vnode_t *vp; time_t now; dev_t dev; int error; so_ncafs_t *so_ncafs; ncafs_priv_t *so_priv; ASSERT(accessvp); ASSERT(domain == AF_NCA); so_ncafs = kmem_cache_alloc(ncafs_cache, KM_SLEEP); so = &so_ncafs->so; /* * We rely on the "struct sonode" being the first element in the * "so_ncfas_t" in sonca_inactive(). */ so_priv = &so_ncafs->so_priv; so->so_priv = (void *)so_priv; so->so_cache = ncafs_cache; so->so_obj = so_ncafs; vp = SOTOV(so); now = gethrestime_sec(); so->so_flag = 0; ASSERT(so->so_accessvp == NULL); so->so_accessvp = accessvp; so->so_dev = dev = accessvp->v_rdev; /* * record in so_flag that it is a clone. */ if (getmajor(dev) == clone_major) so->so_flag |= SOCLONE; so->so_state = 0; so->so_mode = 0; so->so_fsid = sockdev; so->so_atime = now; so->so_mtime = now; so->so_ctime = now; /* Never modified */ so->so_count = 0; so->so_family = (short)domain; so->so_type = (short)type; so->so_protocol = (short)protocol; so->so_pushcnt = 0; so->so_options = 0; so->so_linger.l_onoff = 0; so->so_linger.l_linger = 0; so->so_sndbuf = 0; so->so_rcvbuf = 0; so->so_error = 0; so->so_delayed_error = 0; so->so_peercred = NULL; ASSERT(so->so_oobmsg == NULL); so->so_oobcnt = 0; so->so_oobsigcnt = 0; so->so_pgrp = 0; so->so_provinfo = NULL; so_priv->req_mp = NULL; so_priv->req_size = 0; so_priv->iop_mp = NULL; ASSERT(so->so_laddr_sa == NULL && so->so_faddr_sa == NULL); so->so_eaddr_mp = NULL; so->so_addr_size = (socklen_t)sizeof (struct sockaddr_in); so->so_laddr_maxlen = so->so_faddr_maxlen = P2ROUNDUP(so->so_addr_size, KMEM_ALIGN); so->so_laddr_sa = kmem_zalloc(so->so_laddr_maxlen * 2, KM_SLEEP); so->so_faddr_sa = (struct sockaddr *)((caddr_t)so->so_laddr_sa + so->so_laddr_maxlen); so->so_laddr_len = so->so_faddr_len = 0; ASSERT(!(so->so_state & (SS_FADDR_VALID | SS_LADDR_VALID))); ASSERT(so->so_ack_mp == NULL); ASSERT(so->so_conn_ind_head == NULL); ASSERT(so->so_conn_ind_tail == NULL); ASSERT(so->so_ux_bound_vp == NULL); ASSERT(so->so_unbind_mp == NULL); vn_reinit(vp); (void) vn_setops(vp, socknca_vnodeops); vp->v_vfsp = rootvfs; vp->v_type = VSOCK; vp->v_rdev = so->so_dev; vn_exists(vp); so->so_version = SOV_SOCKSTREAM; /* * We need to check if NCA is plumbed before letting this * call succeed. If we are being called from nca_accept, * don't bother. */ if ((tso == NULL) && ((error = nca_open_and_check(vp)) != 0)) { *errorp = error; kmem_free(so->so_laddr_sa, so->so_laddr_maxlen * 2); kmem_cache_free(ncafs_cache, so_ncafs); return (NULL); } /* Set up the Stream now */ if (error = socktpi_open(&vp, FREAD|FWRITE, CRED())) { VN_RELE(vp); *errorp = error; kmem_free(so->so_laddr_sa, so->so_laddr_maxlen * 2); kmem_cache_free(ncafs_cache, so_ncafs); return (NULL); } so_installhooks(so); return (so); } int sonca_bind(struct sonode *so, struct sockaddr *name, socklen_t namelen, int flags) { vnode_t *vp; struct strioctl strioc; int error; if (name == NULL) return (EOPNOTSUPP); if (namelen != sizeof (struct sockaddr_in)) return (EINVAL); if ((name->sa_family != AF_INET) && (name->sa_family != AF_NCA)) return (EAFNOSUPPORT); ASSERT(so->so_family == AF_NCA); if (!(flags & _SOBIND_LOCK_HELD)) { mutex_enter(&so->so_lock); so_lock_single(so); /* Set SOLOCKED */ } else { ASSERT(MUTEX_HELD(&so->so_lock)); ASSERT(so->so_flag & SOLOCKED); } vp = SOTOV(so); strioc.ic_cmd = NCA_BIND; strioc.ic_timout = 0; strioc.ic_len = namelen; strioc.ic_dp = (char *)name; error = strdoioctl(vp->v_stream, &strioc, FNATIVE, K_TO_K, CRED(), &error); if (error == 0) { so->so_laddr_len = (socklen_t)namelen; bcopy(name, so->so_laddr_sa, namelen); so->so_state |= SS_ISBOUND|SS_LADDR_VALID; } if (!(flags & _SOBIND_LOCK_HELD)) { so_unlock_single(so, SOLOCKED); mutex_exit(&so->so_lock); } else { ASSERT(MUTEX_HELD(&so->so_lock)); ASSERT(so->so_flag & SOLOCKED); } return (error); } int sonca_listen(struct sonode *so, int backlog) { if (backlog <= 0) return (EOPNOTSUPP); if ((so->so_state & (SS_ISCONNECTED|SS_ISBOUND)) != SS_ISBOUND) return (EINVAL); if (!(so->so_state & SS_ACCEPTCONN)) { struct strioctl strioc; int error; /* Let NCA know that web server is ready */ strioc.ic_cmd = NCA_LISTEN; strioc.ic_timout = 0; strioc.ic_len = so->so_laddr_len; strioc.ic_dp = (char *)so->so_laddr_sa; error = strdoioctl(SOTOV(so)->v_stream, &strioc, FNATIVE, K_TO_K, CRED(), &error); if (error != 0) return (error); } mutex_enter(&so->so_lock); so->so_state |= SS_ACCEPTCONN; so->so_backlog = backlog; mutex_exit(&so->so_lock); return (0); } /* * Issue a downcall to NCA by sending a "downcallinfo_t" structure in an M_CTL * mblk downwards. We cannot use M_DATA mblks because they could carry * arbitrary data e.g. if a process opens "/dev/nca" directly and tries to * write to it. The M_CTL mblk is marked with a magic number to allow some * basic sanity tests inside NCA. */ static int nca_downcall(struct sonode *so, nca_io2_t *iop, uio_t *uiop) { mblk_t *mp; downcallinfo_t *dcip; int error; if ((mp = allocb(sizeof (downcallinfo_t), BPRI_MED)) == NULL) return (ENOMEM); DB_TYPE(mp) = M_CTL; mp->b_wptr += sizeof (downcallinfo_t); dcip = (downcallinfo_t *)mp->b_rptr; dcip->dci_magic = DOWNCALLINFO_MAGIC; dcip->dci_iop = iop; dcip->dci_uiop = uiop; /* * We must use kstrwritemp() here because we want to send down * a M_CTL mblk which strwrite() doesn't support. kstrwritemp() * has more lax checks which is allright in this case because * NCA sockets always grant write permissions, we don't want * the mblk_t to be split anyway and there will never be a * M_HANGUP mblk on a NCA socket's stream. */ error = kstrwritemp(SOTOV(so), mp, ((so->so_state & SS_NDELAY) ? FNDELAY : 0) | ((so->so_state & SS_NONBLOCK) ? FNONBLOCK : 0)); if (error != 0) freeb(mp); return (error); } /* ARGSUSED */ int sonca_connect(struct sonode *so, const struct sockaddr *name, socklen_t namelen, int fflag, int flags) { return (EINVAL); } int nca_emptywrite(struct sonode *so, int more) { ncafs_priv_t *so_priv = (ncafs_priv_t *)so->so_priv; nca_io2_t *iop; iop = (nca_io2_t *)so_priv->iop_mp->b_rptr; iop->data_len = 0; iop->more = (uint8_t)more; return (nca_downcall(so, iop, NULL)); } int sonca_accept(struct sonode *so, int fflag, struct sonode **nsop) { struct T_conn_ind *conn_ind; int error = 0; mblk_t *mp; struct sonode *nso; vnode_t *nvp; void *src; t_uscalar_t srclen; void *opt; t_uscalar_t optlen; mblk_t *iop_mp, *req_mp; nca_io2_t *iop; ncafs_priv_t *nso_priv; #ifdef NCAFS_DEBUG2 char *nca_request_ptr; char buf[1024]; int len; #endif #ifdef NCAFS_DEBUG3 hrtime_t start, end, delta; static hrtime_t total = 0; #endif /* * Check that the socket is a listener. */ if (!(so->so_state & SS_ACCEPTCONN)) { error = EINVAL; eprintsoline(so, error); return (error); } error = sowaitconnind(so, fflag, &mp); if (error != 0) { eprintsoline(so, error); return (error); } ASSERT(mp); conn_ind = (struct T_conn_ind *)mp->b_rptr; srclen = conn_ind->SRC_length; src = sogetoff(mp, conn_ind->SRC_offset, srclen, 1); if (src == NULL) { error = EPROTO; eprintsoline(so, error); goto disconnect_unlocked; } optlen = conn_ind->OPT_length; if (optlen != 0) { opt = sogetoff(mp, conn_ind->OPT_offset, optlen, __TPI_ALIGN_SIZE); if (opt == NULL) { error = EPROTO; eprintsoline(so, error); goto disconnect_unlocked; } } iop_mp = mp->b_cont; if (iop_mp == NULL) { cmn_err(CE_CONT, "sonca_accept: Got NULL iop_mp"); freemsg(mp); return (EINVAL); } mp->b_cont = NULL; req_mp = iop_mp->b_cont; if (req_mp == NULL) { cmn_err(CE_CONT, "sonca_accept: Got NULL req_mp"); freemsg(iop_mp); freemsg(mp); return (EINVAL); } iop_mp->b_cont = NULL; /* * Create the new socket. */ VN_HOLD(so->so_accessvp); #ifdef NCAFS_DEBUG3 start = gethrtime(); #endif nso = sonca_create(so->so_accessvp, so->so_family, so->so_type, so->so_protocol, so->so_version, so, &error); #ifdef NCAFS_DEBUG3 end = gethrtime(); delta = end - start; total += delta; cmn_err(CE_CONT, "accept time = %lld. Total = %lld\n", delta, total); #endif if (nso == NULL) { /* * Accept can not fail with ENOBUFS. sonca_create sleeps * waiting for memory until a signal is caught so return * EINTR. */ if (error == ENOBUFS) error = EINTR; VN_RELE(so->so_accessvp); eprintsoline(so, error); cmn_err(CE_CONT, "sonca_accept: nso == NULL\n"); goto disconnect_unlocked; } nvp = SOTOV(nso); nso_priv = nso->so_priv; nso->so_faddr_len = (socklen_t)srclen; ASSERT(so->so_faddr_len <= so->so_faddr_maxlen); bcopy(src, nso->so_faddr_sa, srclen); nso->so_state |= SS_FADDR_VALID; /* Do the automatic bind for the new socket */ mutex_enter(&nso->so_lock); nso->so_state |= SS_ISBOUND|SS_HASDATA|SS_ISCONNECTED; nso->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING); iop = (nca_io2_t *)iop_mp->b_rptr; /* * Copy local address from nca_io2_t. */ src = (void *)((size_t)iop + (size_t)iop->local); #ifdef NCAFS_DEBUG2 cmn_err(CE_CONT, "sonca_accept: iop = %p, src = %p, local = %d, " " local_len = %d, laddr_maxlen = %d", iop, src, iop->local, iop->local_len, nso->so_laddr_maxlen); #endif nso->so_laddr_len = iop->local_len; ASSERT(nso->so_laddr_len <= nso->so_laddr_maxlen); bcopy(src, nso->so_laddr_sa, nso->so_laddr_len); nso->so_state |= SS_LADDR_VALID; ASSERT(iop->first == 1); #ifdef NCAFS_DEBUG2 nca_request_ptr = (char *)req_mp->b_rptr; len = req_mp->b_wptr - req_mp->b_rptr; len = (len > 1024) ? 1024 : len; bcopy(nca_request_ptr, buf, len); buf[len] = '\0'; cmn_err(CE_CONT, "sonca_accept (%d): %s\n", len, buf); #endif nso_priv->iop_mp = iop_mp; nso_priv->req_mp = req_mp; nso_priv->req_size = req_mp->b_wptr - req_mp->b_rptr; nso_priv->req_ptr = req_mp->b_rptr; nso_priv->iop_dataleft = 0; nso_priv->iop_more = 0; iop = (nca_io2_t *)nso_priv->iop_mp->b_rptr; if (iop->more == 1) { error = nca_emptywrite(nso, 1); iop->first = 0; if (error != 0) { mutex_exit(&nso->so_lock); goto disconnect_vp_unlocked; } nso_priv->iop_more = 1; } /* * Pass out new socket. */ if (nsop != NULL) *nsop = nso; mutex_exit(&nso->so_lock); freemsg(mp); return (0); disconnect_vp_unlocked: (void) VOP_CLOSE(nvp, 0, 1, 0, CRED()); VN_RELE(nvp); disconnect_unlocked: freemsg(mp); return (error); } /* ARGSUSED */ int socknca_read( struct vnode *vp, struct uio *uiop, int ioflag, struct cred *cr, struct caller_context *ct) { struct sonode *so = VTOSO(vp); struct nmsghdr lmsg; int error = 0, len; struct iovec *iov; struct iovec read_iov; struct uio xuiop; ncafs_priv_t *so_priv = (ncafs_priv_t *)so->so_priv; nca_io2_t io; ASSERT(so->so_family == AF_NCA); ASSERT(vp->v_type == VSOCK); so_update_attrs(so, SOACC); mutex_enter(&so->so_lock); if (so->so_state & SS_HASDATA) { iov = uiop->uio_iov; len = so_priv->req_size; if (len > iov->iov_len) { len = iov->iov_len; error = uiomove(so_priv->req_ptr, len, UIO_READ, uiop); so_priv->req_ptr += len; so_priv->req_size -= len; } else { error = uiomove(so_priv->req_ptr, len, UIO_READ, uiop); so->so_state &= ~SS_HASDATA; so->so_state |= so_priv->iop_more ? SS_MOREDATA : SS_DONEREAD; } mutex_exit(&so->so_lock); } else { if (so_priv->iop_dataleft > 0) { lmsg.msg_namelen = 0; lmsg.msg_controllen = 0; lmsg.msg_flags = 0; if (so_priv->iop_dataleft <= uiop->uio_resid) { size_t saved_resid; saved_resid = uiop->uio_resid; uiop->uio_resid = so_priv->iop_dataleft; mutex_exit(&so->so_lock); error = SOP_RECVMSG(so, &lmsg, uiop); mutex_enter(&so->so_lock); uiop->uio_resid = saved_resid - so_priv->iop_dataleft; so_priv->iop_dataleft = 0; if (!so_priv->iop_more) { so->so_state &= ~SS_MOREDATA; so->so_state |= SS_DONEREAD; } mutex_exit(&so->so_lock); } else { so_priv->iop_dataleft -= uiop->uio_resid; mutex_exit(&so->so_lock); error = SOP_RECVMSG(so, &lmsg, uiop); } return (error); } if (!so_priv->iop_more) { mutex_exit(&so->so_lock); return (0); } /* * NCA will send more data along with nca_io2_t. We need to * do a sorecvmsg with uiop sizeof (nca_io2_t). Based on that * we should set so_priv->iop_more, and then do a sorecvmsg on * user supplied uiop. */ read_iov.iov_base = (char *)&io; read_iov.iov_len = sizeof (nca_io2_t); xuiop.uio_iov = &read_iov; xuiop.uio_iovcnt = 1; xuiop.uio_loffset = 0; xuiop.uio_segflg = UIO_SYSSPACE; xuiop.uio_fmode = 0; xuiop.uio_extflg = UIO_COPY_CACHED; xuiop.uio_limit = 0; xuiop.uio_resid = read_iov.iov_len; lmsg.msg_namelen = 0; lmsg.msg_controllen = 0; lmsg.msg_flags = 0; mutex_exit(&so->so_lock); error = SOP_RECVMSG(so, &lmsg, &xuiop); if (error != 0) return (error); if (xuiop.uio_resid != 0) { /* Couldn't read entire nca_io2_t. Something is wrong */ cmn_err(CE_CONT, "socknca_read: couldn't read full " "nca_io2_t. Needed %d, read %d", (int)sizeof (nca_io2_t), (int)(sizeof (nca_io2_t) - xuiop.uio_resid)); return (EBADMSG); } mutex_enter(&so->so_lock); ASSERT(io.version == NCA_HTTP_VERSION2); if (io.more) { nca_io2_t *iop; so_priv->iop_more = 1; error = nca_emptywrite(so, 1); iop = (nca_io2_t *)so_priv->iop_mp->b_rptr; iop->first = 0; if (error != 0) { mutex_exit(&so->so_lock); return (error); } } else { so_priv->iop_more = 0; } lmsg.msg_namelen = 0; lmsg.msg_controllen = 0; lmsg.msg_flags = 0; if (io.data_len <= uiop->uio_resid) { size_t saved_resid; saved_resid = uiop->uio_resid; uiop->uio_resid = io.data_len; mutex_exit(&so->so_lock); error = SOP_RECVMSG(so, &lmsg, uiop); if (error != 0) { cmn_err(CE_CONT, "socknca_read: error reading " "data. error = %d", error); return (error); } mutex_enter(&so->so_lock); uiop->uio_resid = saved_resid - io.data_len; so_priv->iop_dataleft = 0; if (so_priv->iop_more == 0) { so->so_state &= ~SS_MOREDATA; so->so_state |= SS_DONEREAD; } mutex_exit(&so->so_lock); } else { so_priv->iop_dataleft = io.data_len - uiop->uio_resid; mutex_exit(&so->so_lock); error = SOP_RECVMSG(so, &lmsg, uiop); if (error != 0) { cmn_err(CE_CONT, "socknca_read: error reading " "data. error = %d", error); return (error); } } } return (error); } /* ARGSUSED2 */ int socknca_write( struct vnode *vp, struct uio *uiop, int ioflag, struct cred *cr, struct caller_context *ct) { struct sonode *so = VTOSO(vp); ncafs_priv_t *so_priv = (ncafs_priv_t *)so->so_priv; nca_io2_t *iop; int error; ASSERT(so_priv != NULL); if (so->so_state & SS_CANTSENDMORE) { tsignal(curthread, SIGPIPE); return (EPIPE); } if (so->so_error != 0) { mutex_enter(&so->so_lock); error = sogeterr(so); if (error != 0) { mutex_exit(&so->so_lock); return (error); } mutex_exit(&so->so_lock); } if ((so->so_state & (SS_ISCONNECTED|SS_ISBOUND)) != (SS_ISCONNECTED|SS_ISBOUND)) { return (ENOTCONN); } iop = (nca_io2_t *)so_priv->iop_mp->b_rptr; iop->data_len = uiop->uio_resid; iop->trailer_len = 0; iop->direct_len = 0; iop->direct_type = NCA_IO_DIRECT_NONE; iop->more = 1; error = nca_downcall(so, iop, uiop); #ifdef NCAFS_DEBUG if (error != 0) cmn_err(CE_CONT, "socknca_write: write error %d\n", error); #endif mutex_enter(&so->so_lock); iop->first = 0; mutex_exit(&so->so_lock); return (error); } int socknca_ioctl(struct vnode *vp, int cmd, intptr_t arg, int mode, struct cred *cr, int32_t *rvalp) { switch (cmd) { case NCA_BIND: case NCA_LISTEN: case NCA_READY: /* Filter out internal ioctl()s used between NCA and NCAfs. */ return (EINVAL); case I_PUSH: case I_POP: /* * Prohibit popping "sockmod" or pushing a module above * "sockfmod" for NCA sockets because NCAfs and NCA use * a private M_CTL interface for data and no application * using NCA (web servers) needs this feature. */ return (ENOTSUP); } return (socktpi_ioctl(vp, cmd, arg, mode, cr, rvalp)); } static int sonca_sendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop) { /* * NCA doesn't support socket features like OOB data or bypassing * the routing table. Reject anything except simple cases like * "send(fd, msg, len, 0);". */ if ((msg->msg_control == NULL || msg->msg_controllen == 0) && !(msg->msg_flags & (MSG_OOB|MSG_DONTROUTE))) { return (socknca_write(SOTOV(so), uiop, 0, CRED(), NULL)); } /* * Return EINVAL instead of the more appropriate ENOTSUP because the * later is not allowed as an error code returned by send(3SOCKET). */ return (EINVAL); } int nca_poll( struct vnode *vp, short events, int anyyet, short *reventsp, struct pollhead **phpp) { short origevents = events; short inevents; struct sonode *so = VTOSO(vp); int error; int so_state = so->so_state; /* snapshot */ #ifdef NCAFS_DEBUG cmn_err(CE_CONT, "nca_poll: thread (%x) polling %x for events %x, " "anyyet %d, state %x\n", (uint_t)curthread, so, events, anyyet, so->so_state); #endif ASSERT(vp->v_type == VSOCK); ASSERT(vp->v_stream != NULL); if (!(so_state & SS_ISCONNECTED)) { /* Not connected yet - turn off write side events */ events &= ~(POLLOUT|POLLWRBAND); } /* * Check for errors without calling strpoll if the caller wants them. * In sockets the errors are represented as input/output events * and there is no need to ask the stream head for this information. */ if (so->so_error != 0 && ((POLLIN|POLLRDNORM|POLLOUT) & origevents)) { *reventsp = (POLLIN|POLLRDNORM|POLLOUT) & origevents; return (0); } /* * Check if NCA has HTTP request data available or is expecting * more HTTP request data. */ inevents = 0; if (events & (POLLIN|POLLRDNORM|POLLRDBAND|POLLPRI)) { events &= ~(POLLIN|POLLRDNORM|POLLRDBAND|POLLPRI); if (so_state & (SS_HASDATA|SS_MOREDATA|SS_HASCONNIND)) { inevents = (POLLIN|POLLRDNORM) & origevents; if (events == 0) { *reventsp = inevents; return (0); } } if ((so_state & SS_DONEREAD) && (events == 0)) { /* * Caller only asked for inputs events and * no more data is coming. */ *reventsp = inevents; return (0); } } /* * Ignore M_PROTO only messages such as the T_EXDATA_IND messages. * These message with only an M_PROTO/M_PCPROTO part and no M_DATA * will not trigger a POLLIN event with POLLRDDATA set. * After shutdown(output) a stream head write error is set. * However, we should not return output events. */ events |= POLLRDDATA|POLLNOERR; error = strpoll(vp->v_stream, events, anyyet, reventsp, phpp); if (error != 0) return (error); if (so->so_state & SS_HASCONNIND) inevents |= (POLLIN|POLLRDNORM) & origevents; *reventsp |= inevents; ASSERT(!(*reventsp & POLLERR)); return (0); } int sonca_shutdown(struct sonode *so, int how) { int error = 0; ncafs_priv_t *so_priv = (ncafs_priv_t *)so->so_priv; mutex_enter(&so->so_lock); so_lock_single(so); /* Set SOLOCKED */ if (!(so->so_state & SS_ISCONNECTED)) { error = ENOTCONN; goto done; } switch (how) { case 0: so->so_state |= SS_CANTRCVMORE; break; case 2: so->so_state |= SS_CANTRCVMORE; /* FALLTHRU */ case 1: so->so_state |= SS_CANTSENDMORE; if (so->so_state & SS_ISCONNECTED) { ASSERT(so_priv->iop_mp != NULL); ASSERT(so_priv->req_mp != NULL); (void) nca_emptywrite(so, 0); if (so_priv->req_mp != NULL) { freemsg(so_priv->req_mp); so_priv->req_mp = NULL; so_priv->req_size = 0; } if (so_priv->iop_mp != NULL) { freemsg(so_priv->iop_mp); so_priv->iop_mp = NULL; } so_priv->iop_more = 0; so->so_state &= ~SS_ISCONNECTED; } break; default: error = EINVAL; goto done; } done: so_unlock_single(so, SOLOCKED); mutex_exit(&so->so_lock); return (error); } /* ARGSUSED */ int socknca_close( struct vnode *vp, int flag, int count, offset_t offset, struct cred *cr) { struct sonode *so = VTOSO(vp); dev_t dev; int error = 0; ncafs_priv_t *so_priv = (ncafs_priv_t *)so->so_priv; mutex_enter(&so->so_lock); so_lock_single(so); /* Set SOLOCKED */ if ((so->so_state & SS_ISCONNECTED) && (count <= 1)) { ASSERT(so_priv->iop_mp != NULL); ASSERT(so_priv->req_mp != NULL); (void) nca_emptywrite(so, 0); if (so_priv->req_mp != NULL) { freemsg(so_priv->req_mp); so_priv->req_mp = NULL; so_priv->req_size = 0; } if (so_priv->iop_mp != NULL) { freemsg(so_priv->iop_mp); so_priv->iop_mp = NULL; } so->so_state &= ~SS_ISCONNECTED; } so_unlock_single(so, SOLOCKED); mutex_exit(&so->so_lock); cleanlocks(vp, ttoproc(curthread)->p_pid, 0); if (vp->v_stream != NULL) strclean(vp); if (count > 1) return (0); dev = so->so_dev; ASSERT(vp->v_type == VSOCK); ASSERT(STREAMSTAB(getmajor(dev))); mutex_enter(&so->so_lock); so_lock_single(so); /* Set SOLOCKED */ ASSERT(so->so_count > 0); so->so_count--; /* one fewer open reference */ /* * Only call the close routine when the last open reference through * any [s, v]node goes away. */ if (so->so_count == 0 && vp->v_stream != NULL) { mutex_exit(&so->so_lock); error = strclose(vp, flag, cr); vp->v_stream = NULL; mutex_enter(&so->so_lock); } so_unlock_single(so, SOLOCKED); mutex_exit(&so->so_lock); /* * Decrement the device driver's reference count for streams * opened via the clone dip. The driver was held in clone_open(). * The absence of clone_close() forces this asymmetry. */ if (so->so_flag & SOCLONE) ddi_rele_driver(getmajor(dev)); return (error); } #define SEND_MAX_CHUNK 16 int nca_sendfilev(file_t *fp, struct sendfilevec *sfv, int sfvcnt, ssize_t *xferred) { struct sonode *so; struct vnode *vp; int error; uio_t *uiop; iovec_t *iovp; nca_io2_t *iop; nca_sendvec_t *nca_vecp; int sfd = -1; int size; ASSERT(sfvcnt <= SEND_MAX_CHUNK); vp = fp->f_vnode; so = VTOSO(vp); if (so->so_type != SOCK_STREAM) return (EPROTONOSUPPORT); if (so->so_state & SS_CANTSENDMORE) { tsignal(curthread, SIGPIPE); return (EPIPE); } if ((so->so_state & (SS_ISCONNECTED|SS_ISBOUND)) != (SS_ISCONNECTED|SS_ISBOUND)) { return (ENOTCONN); } iop = (nca_io2_t *)((ncafs_priv_t *)so->so_priv)->iop_mp->b_rptr; iop->more = 1; iop->data_len = 0; iop->trailer_len = 0; iop->direct_len = 0; size = sfvcnt * sizeof (nca_sendvec_t) + /* nca_vecp */ sizeof (uio_t) + /* uio */ sizeof (iovec_t); /* iov */ nca_vecp = kmem_alloc(size, KM_SLEEP); uiop = (uio_t *)&nca_vecp[sfvcnt]; iovp = (iovec_t *)&uiop[1]; /* * There are 2 possibilities - * * i) A request has a single file fd and multiple optional headers * and/or a single trailer. A trailer is defined as the last * SFV_FD_SELF vector if no file fd is present or a single SFV_FD_SELF * after a file fd. Request of this forms are processed as * direct_type = NCA_IO_DIRECT_FILE_FD. nca_ncafs_srv() will * make a single call to nca_httpd_data() to process this * request. * * ii) All other requests containing more than 1 file fd are * processed using direct_type of NCA_IO_SENDVEC. A special * case here is 2 SFV_FD_SELF after a file fd. */ { sendfilevec_t *tmpsfv = sfv; uint8_t direct_type = NCA_IO_SENDVEC; nca_sendvec_t *nsvp; int i; for (i = 0, nsvp = nca_vecp, tmpsfv = sfv; i < sfvcnt; i++, nsvp++, tmpsfv++) { struct vnode *sfv_vp; if (tmpsfv->sfv_fd == SFV_FD_SELF) { /* Header Chunks */ #ifdef DEBUG if (nca_sendfilev_debug) { char *ptr; ptr = kmem_alloc(tmpsfv->sfv_len + 1, KM_SLEEP); if (copyin((char *)tmpsfv->sfv_off, ptr, tmpsfv->sfv_len)) { cmn_err(CE_CONT, "sendfilev: " "error header " "debug copyin"); kmem_free(ptr, tmpsfv->sfv_len + 1); goto fault; } ptr[tmpsfv->sfv_len] = 0; cmn_err(CE_CONT, "nca_sendfilev: " "Header (%ld) is: %s", tmpsfv->sfv_len, ptr); kmem_free(ptr, tmpsfv->sfv_len + 1); } #endif /* There can only be one trailer */ if (sfd >= 0 && (i != sfvcnt - 1)) direct_type = NCA_IO_SENDVEC; sfv_vp = NULL; } else { direct_type = (sfd < 0) ? NCA_IO_DIRECT_FILE_FD : NCA_IO_SENDVEC; sfd = tmpsfv->sfv_fd; NCA_DEBUG4_IF(nca_sendfilev_debug, "nca_sendfilev: " "fd is %d, len = %d, off = %d\n", tmpsfv->sfv_fd, (int)tmpsfv->sfv_len, (int)tmpsfv->sfv_off); if ((fp = getf(sfd)) == NULL) goto badf; if (!(fp->f_flag & FREAD)) goto acces; sfv_vp = fp->f_vnode; releasef(sfd); } *xferred += tmpsfv->sfv_len; nsvp->sfv_fd = tmpsfv->sfv_fd; nsvp->sfv_flag = tmpsfv->sfv_flag; nsvp->sfv_off = tmpsfv->sfv_off; nsvp->sfv_len = tmpsfv->sfv_len; nsvp->sfv_vp = sfv_vp; } iop->direct_type = direct_type; } iovp->iov_base = (char *)nca_vecp; iovp->iov_len = sfvcnt * sizeof (nca_sendvec_t); iop->direct_len = sfvcnt; iop->direct = 0; uiop->uio_iovcnt = 1; uiop->uio_iov = iovp; uiop->uio_loffset = 0; uiop->uio_fmode = 0; uiop->uio_extflg = UIO_COPY_DEFAULT; uiop->uio_resid = 0; uiop->uio_segflg = UIO_SYSSPACE; uiop->uio_limit = 0; if (so->so_error != 0) { mutex_enter(&so->so_lock); error = sogeterr(so); mutex_exit(&so->so_lock); if (error != 0) goto out; } error = nca_downcall(so, iop, uiop); if (uiop->uio_resid != 0) { /* NCA consumes all uio data unless an error occur */ error = EIO; } mutex_enter(&so->so_lock); iop->first = 0; mutex_exit(&so->so_lock); out: kmem_free(nca_vecp, size); return (error); fault: error = EFAULT; goto out; badf: error = EBADF; goto out; acces: releasef(sfd); error = EACCES; goto out; } /* ARGSUSED */ static int sonca_getsockname(struct sonode *so) { /* * For AF_NCA type socket, the local address has already been updated * in so_laddr_sa as part of accept. */ return (0); }