diff options
Diffstat (limited to 'usr/src')
77 files changed, 4354 insertions, 4419 deletions
diff --git a/usr/src/cmd/cmd-inet/usr.sbin/ifconfig/ifconfig.c b/usr/src/cmd/cmd-inet/usr.sbin/ifconfig/ifconfig.c index 595b76248e..ed8ab462f5 100644 --- a/usr/src/cmd/cmd-inet/usr.sbin/ifconfig/ifconfig.c +++ b/usr/src/cmd/cmd-inet/usr.sbin/ifconfig/ifconfig.c @@ -182,8 +182,8 @@ static void usage(void); static int strioctl(int s, int cmd, char *buf, int buflen); static int setifdhcp(const char *caller, const char *ifname, int argc, char *argv[]); -static int ip_domux2fd(int *, int *, int *, int *); -static int ip_plink(int, int, int, int); +static int ip_domux2fd(int *, int *, int *, int *, int *); +static int ip_plink(int, int, int, int, int); static int modop(char *arg, char op); static int get_lun(char *); static void selectifs(int argc, char *argv[], int af, @@ -571,7 +571,7 @@ foreachinterface(void (*func)(), int argc, char *argv[], int af, if (onflags || offflags) { (void) memset(&lifrl, 0, sizeof (lifrl)); (void) strncpy(lifrl.lifr_name, lifrp->lifr_name, - sizeof (lifrl.lifr_name)); + sizeof (lifrl.lifr_name)); if (ioctl(s, SIOCGLIFFLAGS, (caddr_t)&lifrl) < 0) { /* * Perror0() assumes the name to be in the @@ -1209,7 +1209,7 @@ static int set_tun_esp_encr_alg(char *addr, int64_t param) { return (set_tun_algs(ESP_ENCR_ALG, - parsealg(addr, IPSEC_PROTO_ESP))); + parsealg(addr, IPSEC_PROTO_ESP))); } /* ARGSUSED */ @@ -1217,7 +1217,7 @@ static int set_tun_esp_auth_alg(char *addr, int64_t param) { return (set_tun_algs(ESP_AUTH_ALG, - parsealg(addr, IPSEC_PROTO_AH))); + parsealg(addr, IPSEC_PROTO_AH))); } /* ARGSUSED */ @@ -1225,7 +1225,7 @@ static int set_tun_ah_alg(char *addr, int64_t param) { return (set_tun_algs(AH_AUTH_ALG, - parsealg(addr, IPSEC_PROTO_AH))); + parsealg(addr, IPSEC_PROTO_AH))); } /* ARGSUSED */ @@ -2224,6 +2224,7 @@ setifgroupname(char *grpname, int64_t param) static int modlist(char *null, int64_t param) { + int muxid_fd; int muxfd; int ipfd_lowstr; int arpfd_lowstr; @@ -2233,7 +2234,7 @@ modlist(char *null, int64_t param) int orig_arpid; (void) strncpy(lifr.lifr_name, name, sizeof (lifr.lifr_name)); - if (ip_domux2fd(&muxfd, &ipfd_lowstr, &arpfd_lowstr, + if (ip_domux2fd(&muxfd, &muxid_fd, &ipfd_lowstr, &arpfd_lowstr, &orig_arpid) < 0) { return (-1); } @@ -2255,14 +2256,15 @@ modlist(char *null, int64_t param) Perror0("cannot I_LIST for module names"); } else { for (i = 0; i < strlist.sl_nmods; i++) { - (void) printf("%d %s\n", i, - strlist.sl_modlist[i].l_name); + (void) printf("%d %s\n", i, + strlist.sl_modlist[i].l_name); } } free(strlist.sl_modlist); } } - return (ip_plink(muxfd, ipfd_lowstr, arpfd_lowstr, orig_arpid)); + return (ip_plink(muxfd, muxid_fd, ipfd_lowstr, arpfd_lowstr, + orig_arpid)); } #define MODINSERT_OP 'i' @@ -2291,27 +2293,26 @@ modremove(char *arg, int64_t param) } /* - * Open a stream on /dev/udp, pop off all undesired modules (note that - * the user may have configured autopush to add modules above or below - * udp), and push the arp module onto raw IP. + * Open a stream on /dev/udp{,6}, pop off all undesired modules (note that + * the user may have configured autopush to add modules above + * udp), and push the arp module onto the resulting stream. + * This is used to make IP+ARP be able to atomically track the muxid + * for the I_PLINKed STREAMS, thus it isn't related to ARP running the ARP + * protocol. */ static int open_arp_on_udp(char *udp_dev_name) { int fd; - boolean_t popped; if ((fd = open(udp_dev_name, O_RDWR)) == -1) { Perror2("open", udp_dev_name); return (-1); } errno = 0; - popped = _B_FALSE; while (ioctl(fd, I_POP, 0) != -1) - popped = _B_TRUE; - if (!popped) { - Perror2("cannot pop", udp_dev_name); - } else if (errno != EINVAL) { + ; + if (errno != EINVAL) { Perror2("pop", udp_dev_name); } else if (ioctl(fd, I_PUSH, ARP_MOD_NAME) == -1) { Perror2("arp PUSH", udp_dev_name); @@ -2328,8 +2329,10 @@ open_arp_on_udp(char *udp_dev_name) * global variable lifr. * * Param: - * int *udp_fd: (referenced) fd to /dev/udp (upper IP stream). - * int *fd: (referenced) fd to the lower IP stream. + * int *muxfd: fd to /dev/udp{,6} for I_PLINK/I_PUNLINK + * int *muxid_fd: fd to /dev/udp{,6} for LIFMUXID + * int *ipfd_lowstr: fd to the lower IP stream. + * int *arpfd_lowstr: fd to the lower ARP stream. * * Return: * -1 if operation fails, 0 otherwise. @@ -2338,12 +2341,11 @@ open_arp_on_udp(char *udp_dev_name) * for the logic of the PLINK/PUNLINK */ static int -ip_domux2fd(int *muxfd, int *ipfd_lowstr, int *arpfd_lowstr, int *orig_arpid) +ip_domux2fd(int *muxfd, int *muxid_fd, int *ipfd_lowstr, int *arpfd_lowstr, + int *orig_arpid) { - int ip_fd; uint64_t flags; char *udp_dev_name; - char *ip_dev_name; *orig_arpid = 0; (void) strncpy(lifr.lifr_name, name, sizeof (lifr.lifr_name)); @@ -2353,20 +2355,18 @@ ip_domux2fd(int *muxfd, int *ipfd_lowstr, int *arpfd_lowstr, int *orig_arpid) flags = lifr.lifr_flags; if (flags & IFF_IPV4) { udp_dev_name = UDP_DEV_NAME; - ip_dev_name = IP_DEV_NAME; } else if (flags & IFF_IPV6) { udp_dev_name = UDP6_DEV_NAME; - ip_dev_name = IP6_DEV_NAME; } else { return (-1); } - if ((ip_fd = open(ip_dev_name, O_RDWR)) < 0) { - Perror2("open", ip_dev_name); + if ((*muxid_fd = open(udp_dev_name, O_RDWR)) < 0) { + Perror2("open", udp_dev_name); return (-1); } - if (ioctl(ip_fd, SIOCGLIFMUXID, (caddr_t)&lifr) < 0) { - Perror2("SIOCGLIFMUXID", ip_dev_name); + if (ioctl(*muxid_fd, SIOCGLIFMUXID, (caddr_t)&lifr) < 0) { + Perror2("SIOCGLIFMUXID", udp_dev_name); return (-1); } if (debug > 0) { @@ -2374,6 +2374,9 @@ ip_domux2fd(int *muxfd, int *ipfd_lowstr, int *arpfd_lowstr, int *orig_arpid) lifr.lifr_arp_muxid, lifr.lifr_ip_muxid); } + /* + * Use /dev/udp{,6} as the mux to avoid linkcycles. + */ if ((*muxfd = open_arp_on_udp(udp_dev_name)) == -1) return (-1); @@ -2393,7 +2396,7 @@ ip_domux2fd(int *muxfd, int *ipfd_lowstr, int *arpfd_lowstr, int *orig_arpid) */ *orig_arpid = lifr.lifr_arp_muxid; lifr.lifr_arp_muxid = 0; - (void) ioctl(*muxfd, SIOCSLIFMUXID, + (void) ioctl(*muxid_fd, SIOCSLIFMUXID, (caddr_t)&lifr); *arpfd_lowstr = -1; } else { @@ -2415,7 +2418,7 @@ ip_domux2fd(int *muxfd, int *ipfd_lowstr, int *arpfd_lowstr, int *orig_arpid) /* Undo any changes we made */ if (*orig_arpid != 0) { lifr.lifr_arp_muxid = *orig_arpid; - (void) ioctl(*muxfd, SIOCSLIFMUXID, (caddr_t)&lifr); + (void) ioctl(*muxid_fd, SIOCSLIFMUXID, (caddr_t)&lifr); } return (-1); } @@ -2424,7 +2427,7 @@ ip_domux2fd(int *muxfd, int *ipfd_lowstr, int *arpfd_lowstr, int *orig_arpid) /* Undo any changes we made */ if (*orig_arpid != 0) { lifr.lifr_arp_muxid = *orig_arpid; - (void) ioctl(*muxfd, SIOCSLIFMUXID, (caddr_t)&lifr); + (void) ioctl(*muxid_fd, SIOCSLIFMUXID, (caddr_t)&lifr); } return (-1); } @@ -2439,8 +2442,10 @@ ip_domux2fd(int *muxfd, int *ipfd_lowstr, int *arpfd_lowstr, int *orig_arpid) * must be called in pairs. * * Param: - * int udp_fd: fd to /dev/udp (upper IP stream). - * int fd: fd to the lower IP stream. + * int muxfd: fd to /dev/udp{,6} for I_PLINK/I_PUNLINK + * int muxid_fd: fd to /dev/udp{,6} for LIFMUXID + * int ipfd_lowstr: fd to the lower IP stream. + * int arpfd_lowstr: fd to the lower ARP stream. * * Return: * -1 if operation fails, 0 otherwise. @@ -2449,7 +2454,8 @@ ip_domux2fd(int *muxfd, int *ipfd_lowstr, int *arpfd_lowstr, int *orig_arpid) * for the logic of the PLINK/PUNLINK */ static int -ip_plink(int muxfd, int ipfd_lowstr, int arpfd_lowstr, int orig_arpid) +ip_plink(int muxfd, int muxid_fd, int ipfd_lowstr, int arpfd_lowstr, + int orig_arpid) { int ip_muxid; @@ -2474,9 +2480,11 @@ ip_plink(int muxfd, int ipfd_lowstr, int arpfd_lowstr, int orig_arpid) /* Undo the changes we did in ip_domux2fd */ lifr.lifr_arp_muxid = orig_arpid; lifr.lifr_ip_muxid = ip_muxid; - (void) ioctl(muxfd, SIOCSLIFMUXID, (caddr_t)&lifr); + (void) ioctl(muxid_fd, SIOCSLIFMUXID, (caddr_t)&lifr); } + (void) close(muxfd); + (void) close(muxid_fd); return (0); } @@ -2497,6 +2505,7 @@ modop(char *arg, char op) { char *pos_p; int muxfd; + int muxid_fd; int ipfd_lowstr; /* IP stream (lower stream of mux) to be plinked */ int arpfd_lowstr; /* ARP stream (lower stream of mux) to be plinked */ struct strmodconf mod; @@ -2551,7 +2560,7 @@ modop(char *arg, char op) } mod.pos = atoi(pos_p); - if (ip_domux2fd(&muxfd, &ipfd_lowstr, &arpfd_lowstr, + if (ip_domux2fd(&muxfd, &muxid_fd, &ipfd_lowstr, &arpfd_lowstr, &orig_arpid) < 0) { free(arg_str); return (-1); @@ -2581,7 +2590,8 @@ modop(char *arg, char op) break; } free(arg_str); - return (ip_plink(muxfd, ipfd_lowstr, arpfd_lowstr, orig_arpid)); + return (ip_plink(muxfd, muxid_fd, ipfd_lowstr, arpfd_lowstr, + orig_arpid)); } /* @@ -3101,7 +3111,7 @@ configinfo(char *null, int64_t param) (void) printf(" metric %d ", lifr.lifr_metric); } if (((flags & (IFF_VIRTUAL|IFF_LOOPBACK)) != IFF_VIRTUAL) && - ioctl(s, SIOCGLIFMTU, (caddr_t)&lifr) >= 0) + ioctl(s, SIOCGLIFMTU, (caddr_t)&lifr) >= 0) (void) printf(" mtu %d", lifr.lifr_metric); /* don't print index when in compatibility mode */ @@ -3259,7 +3269,7 @@ tun_status(void) if ((protocol == AF_INET6) && (icfg_get_tunnel_encaplimit(handle, &encaplimit) == - ICFG_SUCCESS)) { + ICFG_SUCCESS)) { if (!tabbed) { (void) printf("\t"); tabbed = _B_TRUE; @@ -3350,7 +3360,7 @@ in_status(int force, uint64_t flags) (void) memset(&lifr.lifr_addr, 0, sizeof (lifr.lifr_addr)); else - Perror0_exit("in_status: SIOCGLIFDSTADDR"); + Perror0_exit("in_status: SIOCGLIFDSTADDR"); } sin = (struct sockaddr_in *)&lifr.lifr_dstaddr; (void) printf("--> %s ", inet_ntoa(sin->sin_addr)); @@ -3363,7 +3373,7 @@ in_status(int force, uint64_t flags) (void) memset(&lifr.lifr_addr, 0, sizeof (lifr.lifr_addr)); else - Perror0_exit("in_status: SIOCGLIFBRDADDR"); + Perror0_exit("in_status: SIOCGLIFBRDADDR"); } sin = (struct sockaddr_in *)&lifr.lifr_addr; if (sin->sin_addr.s_addr != 0) { @@ -3450,7 +3460,7 @@ in6_status(int force, uint64_t flags) (void) memset(&lifr.lifr_addr, 0, sizeof (lifr.lifr_addr)); else - Perror0_exit("in_status6: SIOCGLIFDSTADDR"); + Perror0_exit("in_status6: SIOCGLIFDSTADDR"); } sin6 = (struct sockaddr_in6 *)&lifr.lifr_dstaddr; (void) printf("--> %s ", @@ -3466,7 +3476,7 @@ in6_status(int force, uint64_t flags) (void) memset(&lifr.lifr_addr, 0, sizeof (lifr.lifr_addr)); else - Perror0_exit("in_status6: SIOCGLIFTOKEN"); + Perror0_exit("in_status6: SIOCGLIFTOKEN"); } else { sin6 = (struct sockaddr_in6 *)&lifr.lifr_addr; (void) printf("token %s/%d ", @@ -3569,7 +3579,7 @@ in_configinfo(int force, uint64_t flags) (void) memset(&lifr.lifr_addr, 0, sizeof (lifr.lifr_addr)); else - Perror0_exit("in_configinfo: SIOCGLIFDSTADDR"); + Perror0_exit("in_configinfo: SIOCGLIFDSTADDR"); } sin = (struct sockaddr_in *)&lifr.lifr_dstaddr; (void) printf(" destination %s ", inet_ntoa(sin->sin_addr)); @@ -3582,7 +3592,7 @@ in_configinfo(int force, uint64_t flags) (void) memset(&lifr.lifr_addr, 0, sizeof (lifr.lifr_addr)); else - Perror0_exit("in_configinfo: SIOCGLIFBRDADDR"); + Perror0_exit("in_configinfo: SIOCGLIFBRDADDR"); } sin = (struct sockaddr_in *)&lifr.lifr_addr; if (sin->sin_addr.s_addr != 0) { @@ -3678,7 +3688,7 @@ in6_configinfo(int force, uint64_t flags) (void) memset(&lifr.lifr_addr, 0, sizeof (lifr.lifr_addr)); else - Perror0_exit("in6_configinfo: SIOCGLIFDSTADDR"); + Perror0_exit("in6_configinfo: SIOCGLIFDSTADDR"); } sin6 = (struct sockaddr_in6 *)&lifr.lifr_dstaddr; (void) printf(" destination %s ", @@ -3692,7 +3702,7 @@ in6_configinfo(int force, uint64_t flags) (void) memset(&lifr.lifr_addr, 0, sizeof (lifr.lifr_addr)); else - Perror0_exit("in6_configinfo: SIOCGLIFTOKEN"); + Perror0_exit("in6_configinfo: SIOCGLIFTOKEN"); } else { sin6 = (struct sockaddr_in6 *)&lifr.lifr_addr; (void) printf(" token %s/%d ", @@ -3761,10 +3771,10 @@ get_lun(char *rsrc) * and IP uses the info in the I_PLINK message to get the muxid. * * a. STREAMS does not allow us to use /dev/ip itself as the mux. So we use - * /dev/udp[6]. + * /dev/udp{,6}. * b. SIOCGLIFMUXID returns the muxid corresponding to the V4 or V6 stream * depending on the open i.e. V4 vs V6 open. So we need to use /dev/udp - * or /dev/udp6. + * or /dev/udp6 for SIOCGLIFMUXID and SIOCSLIFMUXID. * c. We need to push ARP in order to get the required kernel support for * atomic plumbings. The actual work done by ARP is explained in arp.c * Without pushing ARP, we will still be able to plumb/unplumb. But @@ -3963,6 +3973,7 @@ inetunplumb(char *arg, int64_t param) { int ip_muxid, arp_muxid; int mux_fd; + int muxid_fd; char *udp_dev_name; char *strptr; uint64_t flags; @@ -3994,15 +4005,18 @@ inetunplumb(char *arg, int64_t param) else udp_dev_name = UDP_DEV_NAME; + if ((muxid_fd = open(udp_dev_name, O_RDWR)) == -1) + exit(EXIT_FAILURE); + if ((mux_fd = open_arp_on_udp(udp_dev_name)) == -1) exit(EXIT_FAILURE); (void) strncpy(lifr.lifr_name, name, sizeof (lifr.lifr_name)); - if (ioctl(mux_fd, SIOCGLIFFLAGS, (caddr_t)&lifr) < 0) { + if (ioctl(muxid_fd, SIOCGLIFFLAGS, (caddr_t)&lifr) < 0) { Perror0_exit("unplumb: SIOCGLIFFLAGS"); } flags = lifr.lifr_flags; - if (ioctl(mux_fd, SIOCGLIFMUXID, (caddr_t)&lifr) < 0) { + if (ioctl(muxid_fd, SIOCGLIFMUXID, (caddr_t)&lifr) < 0) { Perror0_exit("unplumb: SIOCGLIFMUXID"); } arp_muxid = lifr.lifr_arp_muxid; @@ -4028,7 +4042,7 @@ inetunplumb(char *arg, int64_t param) * for consistency of IP-ARP streams. */ lifr.lifr_arp_muxid = 0; - (void) ioctl(mux_fd, SIOCSLIFMUXID, + (void) ioctl(muxid_fd, SIOCSLIFMUXID, (caddr_t)&lifr); changed_arp_muxid = _B_TRUE; } else { @@ -4048,12 +4062,13 @@ inetunplumb(char *arg, int64_t param) save_errno = errno; lifr.lifr_arp_muxid = arp_muxid; lifr.lifr_ip_muxid = ip_muxid; - (void) ioctl(mux_fd, SIOCSLIFMUXID, (caddr_t)&lifr); + (void) ioctl(muxid_fd, SIOCSLIFMUXID, (caddr_t)&lifr); errno = save_errno; } Perror0_exit("I_PUNLINK for ip"); } (void) close(mux_fd); + (void) close(muxid_fd); return (0); } @@ -4146,17 +4161,17 @@ Perror0(char *cmd) case ENXIO: (void) fprintf(stderr, "%s: %s: no such interface\n", - cmd, lifr.lifr_name); + cmd, lifr.lifr_name); break; case EPERM: (void) fprintf(stderr, "%s: %s: permission denied\n", - cmd, lifr.lifr_name); + cmd, lifr.lifr_name); break; case EEXIST: (void) fprintf(stderr, "%s: %s: already exists\n", - cmd, lifr.lifr_name); + cmd, lifr.lifr_name); break; default: { @@ -4189,12 +4204,12 @@ Perror2(char *cmd, char *str) case ENXIO: (void) fprintf(stderr, "%s: %s: no such interface\n", - cmd, str); + cmd, str); break; case EPERM: (void) fprintf(stderr, "%s: %s: permission denied\n", - cmd, str); + cmd, str); break; default: { diff --git a/usr/src/cmd/netadm/iu.ap.sh b/usr/src/cmd/netadm/iu.ap.sh index cf7d61c15d..abf930cade 100644 --- a/usr/src/cmd/netadm/iu.ap.sh +++ b/usr/src/cmd/netadm/iu.ap.sh @@ -43,7 +43,6 @@ case "$MACH" in usbser_edge -1 0 ldterm ttcompat usbsprl -1 0 ldterm ttcompat usbsksp -1 0 ldterm ttcompat - rts -1 0 rts [anchor] ipsecesp -1 0 ipsecesp ipsecah -1 0 ipsecah " > iu.ap @@ -73,7 +72,6 @@ case "$MACH" in usbsacm -1 0 ldterm ttcompat usbsprl -1 0 ldterm ttcompat usbsksp -1 0 ldterm ttcompat - rts -1 0 rts [anchor] ttymux -1 0 ldterm ttcompat ipsecesp -1 0 ipsecesp ipsecah -1 0 ipsecah diff --git a/usr/src/cmd/rcm_daemon/common/ip_rcm.c b/usr/src/cmd/rcm_daemon/common/ip_rcm.c index 684df3130c..f4a896e41a 100644 --- a/usr/src/cmd/rcm_daemon/common/ip_rcm.c +++ b/usr/src/cmd/rcm_daemon/common/ip_rcm.c @@ -283,8 +283,8 @@ static int mpathd_send_cmd(mpathd_cmd_t *); static int connect_to_mpathd(int); static int modop(char *, char *, int, char); static int get_modlist(char *, ip_lif_t *); -static int ip_domux2fd(int *, int *, struct lifreq *); -static int ip_plink(int, int, struct lifreq *); +static int ip_domux2fd(int *, int *, int *, struct lifreq *); +static int ip_plink(int, int, int, struct lifreq *); static int ip_onlinelist(rcm_handle_t *, ip_cache_t *, char **, uint_t, rcm_info_t **); static int ip_offlinelist(rcm_handle_t *, ip_cache_t *, char **, uint_t, @@ -2287,7 +2287,8 @@ modop(char *name, char *arg, int pos, char op) static int get_modlist(char *name, ip_lif_t *lif) { - int udp_fd; + int mux_fd; + int muxid_fd; int fd; int i; int num_mods; @@ -2298,7 +2299,7 @@ get_modlist(char *name, ip_lif_t *lif) (void) strncpy(lifr.lifr_name, name, sizeof (lifr.lifr_name)); lifr.lifr_flags = lif->li_ifflags; - if (ip_domux2fd(&udp_fd, &fd, &lifr) < 0) { + if (ip_domux2fd(&mux_fd, &muxid_fd, &fd, &lifr) < 0) { rcm_log_message(RCM_ERROR, _("IP: ip_domux2fd(%s)\n"), name); return (-1); } @@ -2307,7 +2308,7 @@ get_modlist(char *name, ip_lif_t *lif) rcm_log_message(RCM_ERROR, _("IP: get_modlist(%s): I_LIST(%s) \n"), name, strerror(errno)); - (void) ip_plink(udp_fd, fd, &lifr); + (void) ip_plink(mux_fd, muxid_fd, fd, &lifr); return (-1); } @@ -2317,7 +2318,7 @@ get_modlist(char *name, ip_lif_t *lif) if (strlist.sl_modlist == NULL) { rcm_log_message(RCM_ERROR, _("IP: get_modlist(%s): %s\n"), name, strerror(errno)); - (void) ip_plink(udp_fd, fd, &lifr); + (void) ip_plink(mux_fd, muxid_fd, fd, &lifr); return (-1); } @@ -2325,7 +2326,7 @@ get_modlist(char *name, ip_lif_t *lif) rcm_log_message(RCM_ERROR, _("IP: get_modlist(%s): I_LIST error: %s\n"), name, strerror(errno)); - (void) ip_plink(udp_fd, fd, &lifr); + (void) ip_plink(mux_fd, muxid_fd, fd, &lifr); return (-1); } @@ -2336,7 +2337,7 @@ get_modlist(char *name, ip_lif_t *lif) rcm_log_message(RCM_ERROR, _("IP: get_modlist(%s): %s\n"), name, strerror(errno)); - (void) ip_plink(udp_fd, fd, &lifr); + (void) ip_plink(mux_fd, muxid_fd, fd, &lifr); return (-1); } (void) strcpy(lif->li_modules[i], strlist.sl_modlist[i].l_name); @@ -2346,7 +2347,7 @@ get_modlist(char *name, ip_lif_t *lif) free(strlist.sl_modlist); rcm_log_message(RCM_TRACE1, "IP: getmodlist(%s) success\n", name); - return (ip_plink(udp_fd, fd, &lifr)); + return (ip_plink(mux_fd, muxid_fd, fd, &lifr)); } /* @@ -2354,37 +2355,34 @@ get_modlist(char *name, ip_lif_t *lif) * Stolen from ifconfig.c */ static int -ip_domux2fd(int *udp_fd, int *fd, struct lifreq *lifr) +ip_domux2fd(int *mux_fd, int *muxid_fdp, int *fd, struct lifreq *lifr) { - int ip_fd; + int muxid_fd; char *udp_dev_name; - char *ip_dev_name; if (lifr->lifr_flags & IFF_IPV6) { - udp_dev_name = UDP6_DEV_NAME; - ip_dev_name = IP6_DEV_NAME; + udp_dev_name = UDP6_DEV_NAME; } else { - udp_dev_name = UDP_DEV_NAME; - ip_dev_name = IP_DEV_NAME; + udp_dev_name = UDP_DEV_NAME; } - if ((ip_fd = open(ip_dev_name, O_RDWR)) < 0) { + if ((muxid_fd = open(udp_dev_name, O_RDWR)) < 0) { rcm_log_message(RCM_ERROR, _("IP: ip_domux2fd: open(%s) %s\n"), - ip_dev_name, strerror(errno)); + udp_dev_name, strerror(errno)); return (-1); } - if ((*udp_fd = open(udp_dev_name, O_RDWR)) < 0) { + if ((*mux_fd = open(udp_dev_name, O_RDWR)) < 0) { rcm_log_message(RCM_ERROR, _("IP: ip_domux2fd: open(%s) %s\n"), udp_dev_name, strerror(errno)); - (void) close(ip_fd); + (void) close(muxid_fd); return (-1); } - if (ioctl(ip_fd, SIOCGLIFMUXID, (caddr_t)lifr) < 0) { + if (ioctl(muxid_fd, SIOCGLIFMUXID, (caddr_t)lifr) < 0) { rcm_log_message(RCM_ERROR, _("IP: ip_domux2fd: SIOCGLIFMUXID(%s): %s\n"), - ip_dev_name, strerror(errno)); - (void) close(*udp_fd); - (void) close(ip_fd); + udp_dev_name, strerror(errno)); + (void) close(*mux_fd); + (void) close(muxid_fd); return (-1); } @@ -2392,25 +2390,25 @@ ip_domux2fd(int *udp_fd, int *fd, struct lifreq *lifr) "IP: ip_domux2fd: ARP_muxid %d IP_muxid %d\n", lifr->lifr_arp_muxid, lifr->lifr_ip_muxid); - if ((*fd = ioctl(*udp_fd, _I_MUXID2FD, lifr->lifr_ip_muxid)) < 0) { + if ((*fd = ioctl(*mux_fd, _I_MUXID2FD, lifr->lifr_ip_muxid)) < 0) { rcm_log_message(RCM_ERROR, _("IP: ip_domux2fd: _I_MUXID2FD(%s): %s\n"), udp_dev_name, strerror(errno)); - (void) close(*udp_fd); - (void) close(ip_fd); + (void) close(*mux_fd); + (void) close(muxid_fd); return (-1); } - if (ioctl(*udp_fd, I_PUNLINK, lifr->lifr_ip_muxid) < 0) { + if (ioctl(*mux_fd, I_PUNLINK, lifr->lifr_ip_muxid) < 0) { rcm_log_message(RCM_ERROR, _("IP: ip_domux2fd: I_PUNLINK(%s): %s\n"), udp_dev_name, strerror(errno)); - (void) close(*udp_fd); - (void) close(ip_fd); + (void) close(*mux_fd); + (void) close(muxid_fd); return (-1); } - /* Note: udp_fd is closed in ip_plink below */ - (void) close(ip_fd); + /* Note: mux_fd and muxid_fd are closed in ip_plink below */ + *muxid_fdp = muxid_fd; return (0); } @@ -2419,29 +2417,32 @@ ip_domux2fd(int *udp_fd, int *fd, struct lifreq *lifr) * Stolen from ifconfig.c */ static int -ip_plink(int udp_fd, int fd, struct lifreq *lifr) +ip_plink(int mux_fd, int muxid_fd, int fd, struct lifreq *lifr) { int mux_id; - if ((mux_id = ioctl(udp_fd, I_PLINK, fd)) < 0) { + if ((mux_id = ioctl(mux_fd, I_PLINK, fd)) < 0) { rcm_log_message(RCM_ERROR, _("IP: ip_plink I_PLINK(%s): %s\n"), UDP_DEV_NAME, strerror(errno)); - (void) close(udp_fd); + (void) close(mux_fd); + (void) close(muxid_fd); (void) close(fd); return (-1); } lifr->lifr_ip_muxid = mux_id; - if (ioctl(udp_fd, SIOCSLIFMUXID, (caddr_t)lifr) < 0) { + if (ioctl(muxid_fd, SIOCSLIFMUXID, (caddr_t)lifr) < 0) { rcm_log_message(RCM_ERROR, _("IP: ip_plink SIOCSLIFMUXID(%s): %s\n"), UDP_DEV_NAME, strerror(errno)); - (void) close(udp_fd); + (void) close(mux_fd); + (void) close(muxid_fd); (void) close(fd); return (-1); } - (void) close(udp_fd); + (void) close(mux_fd); + (void) close(muxid_fd); (void) close(fd); return (0); } diff --git a/usr/src/pkgdefs/SUNWckr/prototype_i386 b/usr/src/pkgdefs/SUNWckr/prototype_i386 index 4b1234b4d1..0f14a2f445 100644 --- a/usr/src/pkgdefs/SUNWckr/prototype_i386 +++ b/usr/src/pkgdefs/SUNWckr/prototype_i386 @@ -229,7 +229,6 @@ f none kernel/strmod/pipemod 755 root sys f none kernel/strmod/ptem 755 root sys f none kernel/strmod/redirmod 755 root sys f none kernel/strmod/rpcmod 755 root sys -l none kernel/strmod/rts=../../kernel/drv/rts l none kernel/strmod/tcp=../../kernel/drv/tcp f none kernel/strmod/timod 755 root sys f none kernel/strmod/tirdwr 755 root sys @@ -430,7 +429,6 @@ f none kernel/strmod/amd64/pipemod 755 root sys f none kernel/strmod/amd64/ptem 755 root sys f none kernel/strmod/amd64/redirmod 755 root sys f none kernel/strmod/amd64/rpcmod 755 root sys -l none kernel/strmod/amd64/rts=../../../kernel/drv/amd64/rts l none kernel/strmod/amd64/tcp=../../../kernel/drv/amd64/tcp f none kernel/strmod/amd64/timod 755 root sys f none kernel/strmod/amd64/tirdwr 755 root sys diff --git a/usr/src/pkgdefs/SUNWckr/prototype_sparc b/usr/src/pkgdefs/SUNWckr/prototype_sparc index 5591d8fde7..9b0a5c702e 100644 --- a/usr/src/pkgdefs/SUNWckr/prototype_sparc +++ b/usr/src/pkgdefs/SUNWckr/prototype_sparc @@ -230,7 +230,6 @@ f none kernel/strmod/sparcv9/pipemod 755 root sys f none kernel/strmod/sparcv9/ptem 755 root sys f none kernel/strmod/sparcv9/redirmod 755 root sys f none kernel/strmod/sparcv9/rpcmod 755 root sys -l none kernel/strmod/sparcv9/rts=../../../kernel/drv/sparcv9/rts l none kernel/strmod/sparcv9/tcp=../../../kernel/drv/sparcv9/tcp f none kernel/strmod/sparcv9/timod 755 root sys f none kernel/strmod/sparcv9/tirdwr 755 root sys diff --git a/usr/src/pkgdefs/common_files/i.iuap b/usr/src/pkgdefs/common_files/i.iuap index c7737998c5..17463df77f 100644 --- a/usr/src/pkgdefs/common_files/i.iuap +++ b/usr/src/pkgdefs/common_files/i.iuap @@ -74,7 +74,7 @@ do add_driver usbsacm add_driver usbsprl add_driver usbsksp - add_driver_replace rts + remove_driver rts add_driver ipsecah add_driver ipsecesp add_driver_replace zs diff --git a/usr/src/uts/common/Makefile.files b/usr/src/uts/common/Makefile.files index 467761957a..1575dd888f 100644 --- a/usr/src/uts/common/Makefile.files +++ b/usr/src/uts/common/Makefile.files @@ -460,12 +460,14 @@ TSWTCL_OBJS += tswtcl.o tswtclddi.o ARP_OBJS += arpddi.o arp.o arp_netinfo.o -ICMP_OBJS += icmpddi.o icmp.o icmp_opt_data.o +ICMP_OBJS += icmpddi.o ICMP6_OBJS += icmp6ddi.o -RTS_OBJS += rtsddi.o rts.o rts_opt_data.o +RTS_OBJS += rtsddi.o +IP_ICMP_OBJS = icmp.o icmp_opt_data.o +IP_RTS_OBJS = rts.o rts_opt_data.o IP_TCP_OBJS = tcp.o tcp_fusion.o tcp_kssl.o tcp_opt_data.o tcp_sack.o \ tcp_trace.o IP_UDP_OBJS = udp.o udp_opt_data.o @@ -482,7 +484,9 @@ IP_OBJS += igmp.o ip.o ip6.o ip6_asp.o ip6_if.o ip6_ire.o ip6_rts.o \ ip_multi.o ip_ndp.o ip_opt_data.o ip_rts.o ip_srcid.o \ ipddi.o ipdrop.o mi.o nd.o optcom.o snmpcom.o ipsec_loader.o \ spd.o ipclassifier.o inet_common.o ip_squeue.o squeue.o \ - ip_sadb.o ip_ftable.o radix.o \ + ip_sadb.o ip_ftable.o radix.o ip_dummy.o \ + $(IP_ICMP_OBJS) \ + $(IP_RTS_OBJS) \ $(IP_TCP_OBJS) \ $(IP_UDP_OBJS) \ $(IP_SCTP_OBJS) diff --git a/usr/src/uts/common/fs/sockfs/socktpi.c b/usr/src/uts/common/fs/sockfs/socktpi.c index fa130a1ff6..65a10f0690 100644 --- a/usr/src/uts/common/fs/sockfs/socktpi.c +++ b/usr/src/uts/common/fs/sockfs/socktpi.c @@ -370,8 +370,8 @@ sotpi_bindlisten(struct sonode *so, struct sockaddr *name, void *nl7c = NULL; dprintso(so, 1, ("sotpi_bindlisten(%p, %p, %d, %d, 0x%x) %s\n", - so, name, namelen, backlog, flags, - pr_state(so->so_state, so->so_mode))); + so, name, namelen, backlog, flags, + pr_state(so->so_state, so->so_mode))); tcp_udp_xport = so->so_type == SOCK_STREAM || so->so_type == SOCK_DGRAM; @@ -406,8 +406,8 @@ sotpi_bindlisten(struct sonode *so, struct sockaddr *name, ASSERT(so->so_ux_bound_vp); addr = &so->so_ux_laddr; addrlen = (t_uscalar_t)sizeof (so->so_ux_laddr); - dprintso(so, 1, - ("sobind rebind UNIX: addrlen %d, addr 0x%p, vp %p\n", + dprintso(so, 1, ("sobind rebind UNIX: addrlen %d, " + "addr 0x%p, vp %p\n", addrlen, ((struct so_ux_addr *)addr)->soua_vp, so->so_ux_bound_vp)); @@ -435,7 +435,7 @@ sotpi_bindlisten(struct sonode *so, struct sockaddr *name, * just like BSD. */ so->so_laddr_len = - (socklen_t)sizeof (struct sockaddr); + (socklen_t)sizeof (struct sockaddr); ASSERT(so->so_laddr_len <= so->so_laddr_maxlen); bzero(so->so_laddr_sa, so->so_laddr_len); so->so_laddr_sa->sa_family = so->so_family; @@ -519,7 +519,7 @@ sotpi_bindlisten(struct sonode *so, struct sockaddr *name, ASSERT(!(so->so_state & SS_ISBOUND)); if (name == NULL) { so->so_state &= - ~(SS_ISCONNECTED|SS_ISCONNECTING); + ~(SS_ISCONNECTED|SS_ISCONNECTING); goto done; } } @@ -674,7 +674,7 @@ sotpi_bindlisten(struct sonode *so, struct sockaddr *name, break; case AF_UNIX: { struct sockaddr_un *soun = - (struct sockaddr_un *)so->so_laddr_sa; + (struct sockaddr_un *)so->so_laddr_sa; struct vnode *vp; struct vattr vattr; @@ -700,7 +700,7 @@ sotpi_bindlisten(struct sonode *so, struct sockaddr *name, vattr.va_mask = AT_TYPE|AT_MODE; /* NOTE: holding so_lock */ error = vn_create(soun->sun_path, UIO_SYSSPACE, &vattr, - EXCL, 0, &vp, CRMKNOD, 0, 0); + EXCL, 0, &vp, CRMKNOD, 0, 0); if (error) { if (error == EEXIST) error = EADDRINUSE; @@ -794,7 +794,7 @@ sotpi_bindlisten(struct sonode *so, struct sockaddr *name, bind_req.CONIND_number = backlog; /* NOTE: holding so_lock while sleeping */ mp = soallocproto2(&bind_req, sizeof (bind_req), - addr, addrlen, 0, _ALLOC_SLEEP); + addr, addrlen, 0, _ALLOC_SLEEP); so->so_state &= ~SS_LADDR_VALID; /* Done using so_laddr_sa - can drop the lock */ @@ -831,7 +831,7 @@ sotpi_bindlisten(struct sonode *so, struct sockaddr *name, } error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, - MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); + MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); if (error) { eprintsoline(so, error); mutex_enter(&so->so_lock); @@ -879,8 +879,8 @@ skip_transport: * a sockaddr_in. */ addr = sogetoff(mp, bind_ack->ADDR_offset, - bind_ack->ADDR_length, - __TPI_ALIGN_SIZE); + bind_ack->ADDR_length, + __TPI_ALIGN_SIZE); if (addr == NULL) { freemsg(mp); error = EPROTO; @@ -1027,9 +1027,9 @@ skip_transport: error = EADDRINUSE; eprintsoline(so, error); eprintso(so, - ("addrlen %d, addr 0x%x, vp %p\n", - addrlen, *((int *)addr), - so->so_ux_bound_vp)); + ("addrlen %d, addr 0x%x, vp %p\n", + addrlen, *((int *)addr), + so->so_ux_bound_vp)); goto done; } so->so_state |= SS_LADDR_VALID; @@ -1155,7 +1155,7 @@ sotpi_unbind(struct sonode *so, int flags) mblk_t *mp; dprintso(so, 1, ("sotpi_unbind(%p, 0x%x) %s\n", - so, flags, pr_state(so->so_state, so->so_mode))); + so, flags, pr_state(so->so_state, so->so_mode))); ASSERT(MUTEX_HELD(&so->so_lock)); ASSERT(so->so_flag & SOLOCKED); @@ -1178,7 +1178,7 @@ sotpi_unbind(struct sonode *so, int flags) mp = soallocproto1(&unbind_req, sizeof (unbind_req), 0, _ALLOC_SLEEP); error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, - MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); + MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); mutex_enter(&so->so_lock); if (error) { eprintsoline(so, error); @@ -1243,7 +1243,7 @@ sotpi_listen(struct sonode *so, int backlog) int error = 0; dprintso(so, 1, ("sotpi_listen(%p, %d) %s\n", - so, backlog, pr_state(so->so_state, so->so_mode))); + so, backlog, pr_state(so->so_state, so->so_mode))); if (so->so_serv_type == T_CLTS) return (EOPNOTSUPP); @@ -1257,8 +1257,8 @@ sotpi_listen(struct sonode *so, int backlog) */ if (so->so_state & SS_ACCEPTCONN && !((so->so_family == AF_INET || so->so_family == AF_INET6) && - /*CONSTCOND*/ - !solisten_tpi_tcp)) + /*CONSTCOND*/ + !solisten_tpi_tcp)) return (0); if (so->so_state & SS_ISCONNECTED) @@ -1302,7 +1302,7 @@ sotpi_listen(struct sonode *so, int backlog) goto done; } error = sotpi_bindlisten(so, NULL, 0, backlog, - _SOBIND_UNSPEC|_SOBIND_LOCK_HELD|_SOBIND_LISTEN); + _SOBIND_UNSPEC|_SOBIND_LOCK_HELD|_SOBIND_LISTEN); } else if (backlog > 0) { /* * AF_INET{,6} hack to avoid losing the port. @@ -1318,7 +1318,7 @@ sotpi_listen(struct sonode *so, int backlog) goto done; } error = sotpi_bindlisten(so, NULL, 0, backlog, - _SOBIND_REBIND|_SOBIND_LOCK_HELD|_SOBIND_LISTEN); + _SOBIND_REBIND|_SOBIND_LOCK_HELD|_SOBIND_LISTEN); } else { so->so_state |= SS_ACCEPTCONN; so->so_backlog = backlog; @@ -1348,7 +1348,7 @@ sodisconnect(struct sonode *so, t_scalar_t seqno, int flags) mblk_t *mp; dprintso(so, 1, ("sodisconnect(%p, %d, 0x%x) %s\n", - so, seqno, flags, pr_state(so->so_state, so->so_mode))); + so, seqno, flags, pr_state(so->so_state, so->so_mode))); if (!(flags & _SODISCONNECT_LOCK_HELD)) { mutex_enter(&so->so_lock); @@ -1379,7 +1379,7 @@ sodisconnect(struct sonode *so, t_scalar_t seqno, int flags) mp = soallocproto1(&discon_req, sizeof (discon_req), 0, _ALLOC_SLEEP); error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, - MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); + MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); mutex_enter(&so->so_lock); if (error) { eprintsoline(so, error); @@ -1429,7 +1429,7 @@ sotpi_accept(struct sonode *so, int fflag, struct sonode **nsop) size_t sinlen; dprintso(so, 1, ("sotpi_accept(%p, 0x%x, %p) %s\n", - so, fflag, nsop, pr_state(so->so_state, so->so_mode))); + so, fflag, nsop, pr_state(so->so_state, so->so_mode))); /* * Defer single-threading the accepting socket until @@ -1525,7 +1525,7 @@ again: */ VN_HOLD(so->so_accessvp); nso = sotpi_create(so->so_accessvp, so->so_family, so->so_type, - so->so_protocol, so->so_version, so, &error); + so->so_protocol, so->so_version, so, &error); if (nso == NULL) { ASSERT(error != 0); /* @@ -1753,7 +1753,7 @@ again: */ mutex_exit(&nso->so_lock); (void) VOP_CLOSE(nvp, 0, 1, (offset_t)0, - CRED()); + CRED()); VN_RELE(nvp); goto again; } @@ -1947,8 +1947,8 @@ sotpi_connect(struct sonode *so, boolean_t need_unlock; dprintso(so, 1, ("sotpi_connect(%p, %p, %d, 0x%x, 0x%x) %s\n", - so, name, namelen, fflag, flags, - pr_state(so->so_state, so->so_mode))); + so, name, namelen, fflag, flags, + pr_state(so->so_state, so->so_mode))); /* * Preallocate the T_CONN_REQ mblk before grabbing SOLOCKED to @@ -2058,14 +2058,14 @@ sotpi_connect(struct sonode *so, int val; if ((so->so_family == AF_INET || - so->so_family == AF_INET6) && + so->so_family == AF_INET6) && (so->so_type == SOCK_DGRAM || - so->so_type == SOCK_RAW) && + so->so_type == SOCK_RAW) && /*CONSTCOND*/ !soconnect_tpi_udp) { /* XXX What about implicitly unbinding here? */ error = sodisconnect(so, -1, - _SODISCONNECT_LOCK_HELD); + _SODISCONNECT_LOCK_HELD); } else { so->so_state &= ~(SS_ISCONNECTED | SS_ISCONNECTING | @@ -2078,7 +2078,7 @@ sotpi_connect(struct sonode *so, val = 0; (void) sotpi_setsockopt(so, SOL_SOCKET, SO_DGRAM_ERRIND, - &val, (t_uscalar_t)sizeof (val)); + &val, (t_uscalar_t)sizeof (val)); mutex_enter(&so->so_lock); so_lock_single(so); /* Set SOLOCKED */ @@ -2149,12 +2149,12 @@ sotpi_connect(struct sonode *so, src = so->so_laddr_sa; srclen = (t_uscalar_t)so->so_laddr_len; dprintso(so, 1, - ("sotpi_connect UNIX: srclen %d, src %p\n", - srclen, src)); + ("sotpi_connect UNIX: srclen %d, src %p\n", + srclen, src)); error = so_ux_addr_xlate(so, - so->so_faddr_sa, (socklen_t)so->so_faddr_len, - (flags & _SOCONNECT_XPG4_2), - &addr, &addrlen); + so->so_faddr_sa, (socklen_t)so->so_faddr_len, + (flags & _SOCONNECT_XPG4_2), + &addr, &addrlen); if (error) goto bad; } @@ -2189,7 +2189,7 @@ sotpi_connect(struct sonode *so, val = 1; (void) sotpi_setsockopt(so, SOL_SOCKET, SO_DGRAM_ERRIND, - &val, (t_uscalar_t)sizeof (val)); + &val, (t_uscalar_t)sizeof (val)); mutex_enter(&so->so_lock); so_lock_single(so); /* Set SOLOCKED */ @@ -2228,7 +2228,7 @@ sotpi_connect(struct sonode *so, IN6_IS_ADDR_V4MAPPED_ANY( &((sin6_t *)so->so_laddr_sa)->sin6_addr) || ((sin6_t *)so->so_laddr_sa)->sin6_port == 0) - so->so_state &= ~SS_LADDR_VALID; + so->so_state &= ~SS_LADDR_VALID; break; default: @@ -2265,9 +2265,9 @@ sotpi_connect(struct sonode *so, toh.len = (t_uscalar_t)(srclen + sizeof (struct T_opthdr)); toh.status = 0; conn_req.OPT_length = - (t_scalar_t)(sizeof (toh) + _TPI_ALIGN_TOPT(srclen)); + (t_scalar_t)(sizeof (toh) + _TPI_ALIGN_TOPT(srclen)); conn_req.OPT_offset = (t_scalar_t)(sizeof (conn_req) + - _TPI_ALIGN_TOPT(addrlen)); + _TPI_ALIGN_TOPT(addrlen)); soappendmsg(mp, &conn_req, sizeof (conn_req)); soappendmsg(mp, addr, addrlen); @@ -2290,7 +2290,7 @@ sotpi_connect(struct sonode *so, #endif /* C2_AUDIT */ error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, - MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); + MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); mp = NULL; mutex_enter(&so->so_lock); if (error != 0) @@ -2375,7 +2375,7 @@ sotpi_shutdown(struct sonode *so, int how) int error = 0; dprintso(so, 1, ("sotpi_shutdown(%p, %d) %s\n", - so, how, pr_state(so->so_state, so->so_mode))); + so, how, pr_state(so->so_state, so->so_mode))); mutex_enter(&so->so_lock); so_lock_single(so); /* Set SOLOCKED */ @@ -2390,7 +2390,7 @@ sotpi_shutdown(struct sonode *so, int how) error = ENOTCONN; if (xnet_check_print) { printf("sockfs: X/Open shutdown check " - "caused ENOTCONN\n"); + "caused ENOTCONN\n"); } } goto done; @@ -2424,7 +2424,7 @@ sotpi_shutdown(struct sonode *so, int how) * Assumes that the SS_CANT* flags are never cleared in the above code. */ state_change = (so->so_state & (SS_CANTRCVMORE|SS_CANTSENDMORE)) - - (old_state & (SS_CANTRCVMORE|SS_CANTSENDMORE)); + (old_state & (SS_CANTRCVMORE|SS_CANTSENDMORE)); ASSERT((state_change & ~(SS_CANTRCVMORE|SS_CANTSENDMORE)) == 0); switch (state_change) { @@ -2525,7 +2525,7 @@ sotpi_shutdown(struct sonode *so, int how) * Note that there is no T_OK_ACK for ordrel_req. */ error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, - MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); + MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); mutex_enter(&so->so_lock); if (error) { eprintsoline(so, error); @@ -2574,7 +2574,7 @@ so_unix_close(struct sonode *so) return; dprintso(so, 1, ("so_unix_close(%p) %s\n", - so, pr_state(so->so_state, so->so_mode))); + so, pr_state(so->so_state, so->so_mode))); toh.level = SOL_SOCKET; toh.name = SO_UNIX_CLOSE; @@ -2615,7 +2615,7 @@ so_unix_close(struct sonode *so) * Length and family checks. */ error = so_addr_verify(so, so->so_faddr_sa, - (t_uscalar_t)so->so_faddr_len); + (t_uscalar_t)so->so_faddr_len); if (error) { eprintsoline(so, error); return; @@ -2638,12 +2638,12 @@ so_unix_close(struct sonode *so) src = so->so_laddr_sa; srclen = (socklen_t)so->so_laddr_len; dprintso(so, 1, - ("so_ux_close: srclen %d, src %p\n", - srclen, src)); + ("so_ux_close: srclen %d, src %p\n", + srclen, src)); error = so_ux_addr_xlate(so, - so->so_faddr_sa, - (socklen_t)so->so_faddr_len, 0, - &addr, &addrlen); + so->so_faddr_sa, + (socklen_t)so->so_faddr_len, 0, + &addr, &addrlen); if (error) { eprintsoline(so, error); return; @@ -2655,7 +2655,7 @@ so_unix_close(struct sonode *so) if (srclen == 0) { tudr.OPT_length = (t_scalar_t)sizeof (toh); tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + - _TPI_ALIGN_TOPT(addrlen)); + _TPI_ALIGN_TOPT(addrlen)); size = tudr.OPT_offset + tudr.OPT_length; /* NOTE: holding so_lock while sleeping */ @@ -2676,7 +2676,7 @@ so_unix_close(struct sonode *so) toh2.level = SOL_SOCKET; toh2.name = SO_SRCADDR; toh2.len = (t_uscalar_t)(srclen + - sizeof (struct T_opthdr)); + sizeof (struct T_opthdr)); toh2.status = 0; size = tudr.OPT_offset + tudr.OPT_length; @@ -2694,7 +2694,7 @@ so_unix_close(struct sonode *so) } mutex_exit(&so->so_lock); error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, - MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); + MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); mutex_enter(&so->so_lock); } @@ -2761,9 +2761,9 @@ sorecvoob(struct sonode *so, struct nmsghdr *msg, struct uio *uiop, int flags) so->so_state ^= SS_HAVEOOBDATA|SS_HADOOBDATA; } dprintso(so, 1, - ("after recvoob(%p): counts %d/%d state %s\n", - so, so->so_oobsigcnt, - so->so_oobcnt, pr_state(so->so_state, so->so_mode))); + ("after recvoob(%p): counts %d/%d state %s\n", + so, so->so_oobsigcnt, + so->so_oobcnt, pr_state(so->so_state, so->so_mode))); ASSERT(so_verify_oobstate(so)); mutex_exit(&so->so_lock); @@ -2775,7 +2775,7 @@ sorecvoob(struct sonode *so, struct nmsghdr *msg, struct uio *uiop, int flags) n = MIN(n, uiop->uio_resid); if (n > 0) error = uiomove(nmp->b_rptr, n, - UIO_READ, uiop); + UIO_READ, uiop); if (error) break; nmp = nmp->b_cont; @@ -2807,9 +2807,9 @@ sorecv_update_oobstate(struct sonode *so) mutex_enter(&so->so_lock); ASSERT(so_verify_oobstate(so)); dprintso(so, 1, - ("sorecv_update_oobstate: counts %d/%d state %s\n", - so->so_oobsigcnt, - so->so_oobcnt, pr_state(so->so_state, so->so_mode))); + ("sorecv_update_oobstate: counts %d/%d state %s\n", + so->so_oobsigcnt, + so->so_oobcnt, pr_state(so->so_state, so->so_mode))); if (so->so_oobsigcnt == 0) { /* No more pending oob indications */ so->so_state &= ~(SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK); @@ -2920,8 +2920,8 @@ sotpi_recvmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop) msg->msg_flags = 0; dprintso(so, 1, ("sotpi_recvmsg(%p, %p, 0x%x) state %s err %d\n", - so, msg, flags, - pr_state(so->so_state, so->so_mode), so->so_error)); + so, msg, flags, + pr_state(so->so_state, so->so_mode), so->so_error)); /* * If we are not connected because we have never been connected @@ -2962,7 +2962,7 @@ sotpi_recvmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop) msg->msg_namelen = 0; dprintso(so, 1, ("sotpi_recvmsg: namelen %d controllen %d\n", - namelen, controllen)); + namelen, controllen)); mutex_enter(&so->so_lock); /* @@ -3201,8 +3201,8 @@ retry: /* Caller wants source address */ addrlen = tpr->unitdata_ind.SRC_length; addr = sogetoff(mp, - tpr->unitdata_ind.SRC_offset, - addrlen, 1); + tpr->unitdata_ind.SRC_offset, + addrlen, 1); if (addr == NULL) { freemsg(mp); error = EPROTO; @@ -3229,8 +3229,8 @@ retry: * Determine how large cmsg buffer is needed. */ opt = sogetoff(mp, - tpr->unitdata_ind.OPT_offset, - optlen, __TPI_ALIGN_SIZE); + tpr->unitdata_ind.OPT_offset, + optlen, __TPI_ALIGN_SIZE); if (opt == NULL) { freemsg(mp); @@ -3241,7 +3241,7 @@ retry: if (so->so_family == AF_UNIX) so_getopt_srcaddr(opt, optlen, &addr, &addrlen); ncontrollen = so_cmsglen(mp, opt, optlen, - !(flags & MSG_XPG4_2)); + !(flags & MSG_XPG4_2)); if (controllen != 0) controllen = ncontrollen; else if (ncontrollen != 0) @@ -3274,13 +3274,13 @@ retry: control = kmem_zalloc(controllen, KM_SLEEP); error = so_opt2cmsg(mp, opt, optlen, - !(flags & MSG_XPG4_2), - control, controllen); + !(flags & MSG_XPG4_2), + control, controllen); if (error) { freemsg(mp); if (msg->msg_namelen != 0) kmem_free(msg->msg_name, - msg->msg_namelen); + msg->msg_namelen); kmem_free(control, controllen); eprintsoline(so, error); goto err; @@ -3315,8 +3315,8 @@ retry: * Determine how large cmsg buffer is needed. */ opt = sogetoff(mp, - tpr->optdata_ind.OPT_offset, - optlen, __TPI_ALIGN_SIZE); + tpr->optdata_ind.OPT_offset, + optlen, __TPI_ALIGN_SIZE); if (opt == NULL) { freemsg(mp); @@ -3326,7 +3326,7 @@ retry: } ncontrollen = so_cmsglen(mp, opt, optlen, - !(flags & MSG_XPG4_2)); + !(flags & MSG_XPG4_2)); if (controllen != 0) controllen = ncontrollen; else if (ncontrollen != 0) @@ -3344,8 +3344,8 @@ retry: control = kmem_zalloc(controllen, KM_SLEEP); error = so_opt2cmsg(mp, opt, optlen, - !(flags & MSG_XPG4_2), - control, controllen); + !(flags & MSG_XPG4_2), + control, controllen); if (error) { freemsg(mp); kmem_free(control, controllen); @@ -3388,11 +3388,11 @@ retry: } case T_EXDATA_IND: { dprintso(so, 1, - ("sotpi_recvmsg: EXDATA_IND counts %d/%d consumed %ld " - "state %s\n", - so->so_oobsigcnt, so->so_oobcnt, - saved_resid - uiop->uio_resid, - pr_state(so->so_state, so->so_mode))); + ("sotpi_recvmsg: EXDATA_IND counts %d/%d consumed %ld " + "state %s\n", + so->so_oobsigcnt, so->so_oobcnt, + saved_resid - uiop->uio_resid, + pr_state(so->so_state, so->so_mode))); /* * kstrgetmsg handles MSGMARK so there is nothing to * inspect in the T_EXDATA_IND. @@ -3418,11 +3418,11 @@ retry: * underneath us. */ dprintso(so, 1, - ("sotpi_recvmsg: consume EXDATA_IND " - "counts %d/%d state %s\n", - so->so_oobsigcnt, - so->so_oobcnt, - pr_state(so->so_state, so->so_mode))); + ("sotpi_recvmsg: consume EXDATA_IND " + "counts %d/%d state %s\n", + so->so_oobsigcnt, + so->so_oobcnt, + pr_state(so->so_state, so->so_mode))); pflag = MSG_ANY | MSG_DELAYERROR; if (so->so_mode & SM_ATOMIC) @@ -3432,7 +3432,7 @@ retry: mp = NULL; error = kstrgetmsg(SOTOV(so), &mp, uiop, - &pri, &pflag, (clock_t)-1, &rval); + &pri, &pflag, (clock_t)-1, &rval); ASSERT(uiop->uio_resid == saved_resid); if (error) { @@ -3573,8 +3573,8 @@ sosend_dgramcmsg(struct sonode *so, struct sockaddr *name, socklen_t namelen, ("sosend_dgramcmsg UNIX: srclen %d, src %p\n", srclen, src)); error = so_ux_addr_xlate(so, name, namelen, - (flags & MSG_XPG4_2), - &addr, &addrlen); + (flags & MSG_XPG4_2), + &addr, &addrlen); if (error) { eprintsoline(so, error); return (error); @@ -3587,7 +3587,7 @@ sosend_dgramcmsg(struct sonode *so, struct sockaddr *name, socklen_t namelen, srclen = 0; } optlen = so_optlen(control, controllen, - !(flags & MSG_XPG4_2)); + !(flags & MSG_XPG4_2)); tudr.PRIM_type = T_UNITDATA_REQ; tudr.DEST_length = addrlen; tudr.DEST_offset = (t_scalar_t)sizeof (tudr); @@ -3597,7 +3597,7 @@ sosend_dgramcmsg(struct sonode *so, struct sockaddr *name, socklen_t namelen, else tudr.OPT_length = optlen; tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + - _TPI_ALIGN_TOPT(addrlen)); + _TPI_ALIGN_TOPT(addrlen)); size = tudr.OPT_offset + tudr.OPT_length; @@ -3605,7 +3605,7 @@ sosend_dgramcmsg(struct sonode *so, struct sockaddr *name, socklen_t namelen, * File descriptors only when SM_FDPASSING set. */ error = so_getfdopt(control, controllen, - !(flags & MSG_XPG4_2), &fds, &fdlen); + !(flags & MSG_XPG4_2), &fds, &fdlen); if (error) return (error); if (fdlen != -1) { @@ -3635,7 +3635,7 @@ sosend_dgramcmsg(struct sonode *so, struct sockaddr *name, socklen_t namelen, toh.level = SOL_SOCKET; toh.name = SO_FILEP; toh.len = fdbuf->fd_size + - (t_uscalar_t)sizeof (struct T_opthdr); + (t_uscalar_t)sizeof (struct T_opthdr); toh.status = 0; soappendmsg(mp, &toh, sizeof (toh)); soappendmsg(mp, fdbuf, fdbuf->fd_size); @@ -3701,7 +3701,7 @@ sosend_svccmsg(struct sonode *so, struct T_opthdr toh; dprintso(so, 1, - ("sosend_svccmsg: resid %ld bytes\n", uiop->uio_resid)); + ("sosend_svccmsg: resid %ld bytes\n", uiop->uio_resid)); /* * Has to be bound and connected. However, since no locks are @@ -3733,7 +3733,7 @@ sosend_svccmsg(struct sonode *so, iosize = uiop->uio_resid; } dprintso(so, 1, ("sosend_svccmsg: sending %d, %ld bytes\n", - tdr.DATA_flag, iosize)); + tdr.DATA_flag, iosize)); optlen = so_optlen(control, controllen, !(flags & MSG_XPG4_2)); tdr.OPT_length = optlen; @@ -3744,7 +3744,7 @@ sosend_svccmsg(struct sonode *so, * File descriptors only when SM_FDPASSING set. */ error = so_getfdopt(control, controllen, - !(flags & MSG_XPG4_2), &fds, &fdlen); + !(flags & MSG_XPG4_2), &fds, &fdlen); if (error) return (error); if (fdlen != -1) { @@ -3772,7 +3772,7 @@ sosend_svccmsg(struct sonode *so, toh.level = SOL_SOCKET; toh.name = SO_FILEP; toh.len = fdbuf->fd_size + - (t_uscalar_t)sizeof (struct T_opthdr); + (t_uscalar_t)sizeof (struct T_opthdr); toh.status = 0; soappendmsg(mp, &toh, sizeof (toh)); soappendmsg(mp, fdbuf, fdbuf->fd_size); @@ -3786,7 +3786,7 @@ sosend_svccmsg(struct sonode *so, ASSERT(mp->b_wptr <= mp->b_datap->db_lim); error = kstrputmsg(SOTOV(so), mp, uiop, iosize, - 0, MSG_BAND, 0); + 0, MSG_BAND, 0); if (error) { if (!first && error == EWOULDBLOCK) return (0); @@ -3880,11 +3880,11 @@ sosend_dgram(struct sonode *so, struct sockaddr *name, socklen_t namelen, src = so->so_laddr_sa; srclen = (socklen_t)so->so_laddr_len; dprintso(so, 1, - ("sosend_dgram UNIX: srclen %d, src %p\n", - srclen, src)); + ("sosend_dgram UNIX: srclen %d, src %p\n", + srclen, src)); error = so_ux_addr_xlate(so, name, namelen, - (flags & MSG_XPG4_2), - &addr, &addrlen); + (flags & MSG_XPG4_2), + &addr, &addrlen); if (error) { eprintsoline(so, error); goto done; @@ -3922,9 +3922,9 @@ sosend_dgram(struct sonode *so, struct sockaddr *name, socklen_t namelen, ssize_t size; tudr.OPT_length = (t_scalar_t)(sizeof (toh) + - _TPI_ALIGN_TOPT(srclen)); + _TPI_ALIGN_TOPT(srclen)); tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + - _TPI_ALIGN_TOPT(addrlen)); + _TPI_ALIGN_TOPT(addrlen)); toh.level = SOL_SOCKET; toh.name = SO_SRCADDR; @@ -3982,8 +3982,8 @@ sosend_svc(struct sonode *so, int first = 1; dprintso(so, 1, - ("sosend_svc: %p, resid %ld bytes, prim %d, sflag 0x%x\n", - so, uiop->uio_resid, prim, sflag)); + ("sosend_svc: %p, resid %ld bytes, prim %d, sflag 0x%x\n", + so, uiop->uio_resid, prim, sflag)); /* * Has to be bound and connected. However, since no locks are @@ -4011,7 +4011,7 @@ sosend_svc(struct sonode *so, iosize = uiop->uio_resid; } dprintso(so, 1, ("sosend_svc: sending 0x%x %d, %ld bytes\n", - prim, tdr.MORE_flag, iosize)); + prim, tdr.MORE_flag, iosize)); mp = soallocproto1(&tdr, sizeof (tdr), 0, _ALLOC_INTR); if (mp == NULL) { /* @@ -4025,7 +4025,7 @@ sosend_svc(struct sonode *so, } error = kstrputmsg(SOTOV(so), mp, uiop, iosize, - 0, sflag | MSG_BAND, 0); + 0, sflag | MSG_BAND, 0); if (error) { if (!first && error == EWOULDBLOCK) return (0); @@ -4077,8 +4077,8 @@ sotpi_sendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop) int flags; dprintso(so, 1, ("sotpi_sendmsg(%p, %p, 0x%x) state %s, error %d\n", - so, msg, msg->msg_flags, - pr_state(so->so_state, so->so_mode), so->so_error)); + so, msg, msg->msg_flags, + pr_state(so->so_state, so->so_mode), so->so_error)); mutex_enter(&so->so_lock); so_state = so->so_state; @@ -4176,8 +4176,8 @@ sotpi_sendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop) tudi = (struct T_uderror_ind *)so->so_eaddr_mp->b_rptr; addrlen = tudi->DEST_length; addr = sogetoff(so->so_eaddr_mp, - tudi->DEST_offset, - addrlen, 1); + tudi->DEST_offset, + addrlen, 1); ASSERT(addr); /* Checked by strsock_proto */ switch (so->so_family) { case AF_INET: { @@ -4202,7 +4202,7 @@ sotpi_sendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop) namelen == addrlen && sin1->sin6_port == sin2->sin6_port && IN6_ARE_ADDR_EQUAL(&sin1->sin6_addr, - &sin2->sin6_addr)) + &sin2->sin6_addr)) match = B_TRUE; break; } @@ -4218,9 +4218,9 @@ sotpi_sendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop) mutex_exit(&so->so_lock); #ifdef DEBUG dprintso(so, 0, - ("sockfs delayed error %d for %s\n", - error, - pr_addr(so->so_family, name, namelen))); + ("sockfs delayed error %d for %s\n", + error, + pr_addr(so->so_family, name, namelen))); #endif /* DEBUG */ return (error); } @@ -4237,7 +4237,7 @@ sotpi_sendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop) val = 1; error = sotpi_setsockopt(so, SOL_SOCKET, SO_DONTROUTE, - &val, (t_uscalar_t)sizeof (val)); + &val, (t_uscalar_t)sizeof (val)); if (error) return (error); dontroute = 1; @@ -4258,9 +4258,9 @@ sotpi_sendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop) goto done; } error = sosend_svccmsg(so, uiop, - !(flags & MSG_EOR), - msg->msg_control, msg->msg_controllen, - flags); + !(flags & MSG_EOR), + msg->msg_control, msg->msg_controllen, + flags); } goto done; } @@ -4314,9 +4314,9 @@ sotpi_sendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop) */ if (!dontroute) return (sosend_svc(so, uiop, prim, - !(flags & MSG_EOR), sflag)); + !(flags & MSG_EOR), sflag)); error = sosend_svc(so, uiop, prim, - !(flags & MSG_EOR), sflag); + !(flags & MSG_EOR), sflag); } ASSERT(dontroute); done: @@ -4325,7 +4325,7 @@ done: val = 0; (void) sotpi_setsockopt(so, SOL_SOCKET, SO_DONTROUTE, - &val, (t_uscalar_t)sizeof (val)); + &val, (t_uscalar_t)sizeof (val)); } return (error); } @@ -4340,7 +4340,7 @@ sodgram_direct(struct sonode *so, struct sockaddr *name, socklen_t namelen, struct uio *uiop, int flags) { struct T_unitdata_req tudr; - mblk_t *mp; + mblk_t *mp = NULL; int error = 0; void *addr; socklen_t addrlen; @@ -4348,6 +4348,8 @@ sodgram_direct(struct sonode *so, struct sockaddr *name, struct stdata *stp = SOTOV(so)->v_stream; int so_state; queue_t *udp_wq; + boolean_t connected; + mblk_t *mpdata = NULL; ASSERT(name != NULL && namelen != 0); ASSERT(!(so->so_mode & SM_CONNREQUIRED)); @@ -4375,6 +4377,26 @@ sodgram_direct(struct sonode *so, struct sockaddr *name, so_state = so->so_state; + connected = so_state & SS_ISCONNECTED; + if (!connected) { + tudr.PRIM_type = T_UNITDATA_REQ; + tudr.DEST_length = addrlen; + tudr.DEST_offset = (t_scalar_t)sizeof (tudr); + tudr.OPT_length = 0; + tudr.OPT_offset = 0; + + mp = soallocproto2(&tudr, sizeof (tudr), addr, addrlen, 0, + _ALLOC_INTR); + if (mp == NULL) { + /* + * Caught a signal waiting for memory. + * Let send* return EINTR. + */ + error = EINTR; + goto done; + } + } + /* * For UDP we don't break up the copyin into smaller pieces * as in the TCP case. That means if ENOMEM is returned by @@ -4385,42 +4407,35 @@ sodgram_direct(struct sonode *so, struct sockaddr *name, */ udp_wq = stp->sd_wrq->q_next; if (canput(udp_wq) && - (mp = mcopyinuio(stp, uiop, -1, -1, &error)) != NULL) { - ASSERT(DB_TYPE(mp) == M_DATA); + (mpdata = mcopyinuio(stp, uiop, -1, -1, &error)) != NULL) { + ASSERT(DB_TYPE(mpdata) == M_DATA); ASSERT(uiop->uio_resid == 0); + if (!connected) + linkb(mp, mpdata); + else + mp = mpdata; #ifdef C2_AUDIT if (audit_active) audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); #endif /* C2_AUDIT */ - udp_wput_data(udp_wq, mp, addr, addrlen); + + udp_wput(udp_wq, mp); return (0); } - if (error != 0 && error != ENOMEM) + + ASSERT(mpdata == NULL); + if (error != 0 && error != ENOMEM) { + freemsg(mp); return (error); + } /* * For connected, let strwrite() handle the blocking case. * Otherwise we fall thru and use kstrputmsg(). */ - if (so_state & SS_ISCONNECTED) + if (connected) return (strwrite(SOTOV(so), uiop, CRED())); - tudr.PRIM_type = T_UNITDATA_REQ; - tudr.DEST_length = addrlen; - tudr.DEST_offset = (t_scalar_t)sizeof (tudr); - tudr.OPT_length = 0; - tudr.OPT_offset = 0; - - mp = soallocproto2(&tudr, sizeof (tudr), addr, addrlen, 0, _ALLOC_INTR); - if (mp == NULL) { - /* - * Caught a signal waiting for memory. - * Let send* return EINTR. - */ - error = EINTR; - goto done; - } - #ifdef C2_AUDIT if (audit_active) audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); @@ -4562,7 +4577,7 @@ sotpi_getpeername(struct sonode *so) k_sigset_t smask; dprintso(so, 1, ("sotpi_getpeername(%p) %s\n", - so, pr_state(so->so_state, so->so_mode))); + so, pr_state(so->so_state, so->so_mode))); mutex_enter(&so->so_lock); so_lock_single(so); /* Set SOLOCKED */ @@ -4580,8 +4595,8 @@ sotpi_getpeername(struct sonode *so) } #ifdef DEBUG dprintso(so, 1, ("sotpi_getpeername (local): %s\n", - pr_addr(so->so_family, so->so_faddr_sa, - (t_uscalar_t)so->so_faddr_len))); + pr_addr(so->so_family, so->so_faddr_sa, + (t_uscalar_t)so->so_faddr_len))); #endif /* DEBUG */ if (so->so_family == AF_UNIX) { @@ -4611,7 +4626,7 @@ sotpi_getpeername(struct sonode *so) res = 0; ASSERT(CRED()); error = strioctl(SOTOV(so), TI_GETPEERNAME, (intptr_t)&strbuf, - 0, K_TO_K, CRED(), &res); + 0, K_TO_K, CRED(), &res); sigunintr(&smask); mutex_enter(&so->so_lock); @@ -4650,8 +4665,8 @@ sotpi_getpeername(struct sonode *so) kmem_free(addr, addrlen); #ifdef DEBUG dprintso(so, 1, ("sotpi_getpeername (tp): %s\n", - pr_addr(so->so_family, so->so_faddr_sa, - (t_uscalar_t)so->so_faddr_len))); + pr_addr(so->so_family, so->so_faddr_sa, + (t_uscalar_t)so->so_faddr_len))); #endif /* DEBUG */ done: so_unlock_single(so, SOLOCKED); @@ -4672,7 +4687,7 @@ sotpi_getsockname(struct sonode *so) k_sigset_t smask; dprintso(so, 1, ("sotpi_getsockname(%p) %s\n", - so, pr_state(so->so_state, so->so_mode))); + so, pr_state(so->so_state, so->so_mode))); mutex_enter(&so->so_lock); so_lock_single(so); /* Set SOLOCKED */ @@ -4693,8 +4708,8 @@ sotpi_getsockname(struct sonode *so) } #ifdef DEBUG dprintso(so, 1, ("sotpi_getsockname (local): %s\n", - pr_addr(so->so_family, so->so_laddr_sa, - (t_uscalar_t)so->so_laddr_len))); + pr_addr(so->so_family, so->so_laddr_sa, + (t_uscalar_t)so->so_laddr_len))); #endif /* DEBUG */ if (so->so_family == AF_UNIX) { /* Transport has different name space - return local info */ @@ -4726,7 +4741,7 @@ sotpi_getsockname(struct sonode *so) res = 0; ASSERT(CRED()); error = strioctl(SOTOV(so), TI_GETMYNAME, (intptr_t)&strbuf, - 0, K_TO_K, CRED(), &res); + 0, K_TO_K, CRED(), &res); sigunintr(&smask); mutex_enter(&so->so_lock); @@ -4762,8 +4777,8 @@ sotpi_getsockname(struct sonode *so) kmem_free(addr, addrlen); #ifdef DEBUG dprintso(so, 1, ("sotpi_getsockname (tp): %s\n", - pr_addr(so->so_family, so->so_laddr_sa, - (t_uscalar_t)so->so_laddr_len))); + pr_addr(so->so_family, so->so_laddr_sa, + (t_uscalar_t)so->so_laddr_len))); #endif /* DEBUG */ done: so_unlock_single(so, SOLOCKED); @@ -4794,8 +4809,8 @@ sotpi_getsockopt(struct sonode *so, int level, int option_name, uint32_t value; dprintso(so, 1, ("sotpi_getsockopt(%p, 0x%x, 0x%x, %p, %p) %s\n", - so, level, option_name, optval, optlenp, - pr_state(so->so_state, so->so_mode))); + so, level, option_name, optval, optlenp, + pr_state(so->so_state, so->so_mode))); mutex_enter(&so->so_lock); so_lock_single(so); /* Set SOLOCKED */ @@ -4920,7 +4935,7 @@ sotpi_getsockopt(struct sonode *so, int level, int option_name, if (lvalue == 0) { mutex_exit(&so->so_lock); (void) strqget(strvp2wq(SOTOV(so))->q_next, - QHIWAT, 0, &lvalue); + QHIWAT, 0, &lvalue); mutex_enter(&so->so_lock); dprintso(so, 1, ("got SO_SNDBUF %ld from q\n", lvalue)); @@ -4950,7 +4965,7 @@ sotpi_getsockopt(struct sonode *so, int level, int option_name, if (lvalue == 0) { mutex_exit(&so->so_lock); (void) strqget(RD(strvp2wq(SOTOV(so))), - QHIWAT, 0, &lvalue); + QHIWAT, 0, &lvalue); mutex_enter(&so->so_lock); dprintso(so, 1, ("got SO_RCVBUF %ld from q\n", lvalue)); @@ -5010,7 +5025,7 @@ sotpi_getsockopt(struct sonode *so, int level, int option_name, &oh, sizeof (oh), NULL, maxlen, 0, _ALLOC_SLEEP); /* Let option management work in the presence of data flow control */ error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, - MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); + MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); mp = NULL; mutex_enter(&so->so_lock); if (error) { @@ -5031,7 +5046,7 @@ sotpi_getsockopt(struct sonode *so, int level, int option_name, ASSERT(mp); optmgmt_ack = (struct T_optmgmt_ack *)mp->b_rptr; opt_res = (struct opthdr *)sogetoff(mp, optmgmt_ack->OPT_offset, - optmgmt_ack->OPT_length, __TPI_ALIGN_SIZE); + optmgmt_ack->OPT_length, __TPI_ALIGN_SIZE); if (opt_res == NULL) { if (option != NULL) { /* We have a fallback value */ @@ -5046,7 +5061,7 @@ sotpi_getsockopt(struct sonode *so, int level, int option_name, /* check to ensure that the option is within bounds */ if (((uintptr_t)option + opt_res->len < (uintptr_t)option) || - (uintptr_t)option + opt_res->len > (uintptr_t)mp->b_wptr) { + (uintptr_t)option + opt_res->len > (uintptr_t)mp->b_wptr) { if (option != NULL) { /* We have a fallback value */ error = 0; @@ -5089,8 +5104,8 @@ sotpi_setsockopt(struct sonode *so, int level, int option_name, boolean_t handled = B_FALSE; dprintso(so, 1, ("sotpi_setsockopt(%p, 0x%x, 0x%x, %p, %d) %s\n", - so, level, option_name, optval, optlen, - pr_state(so->so_state, so->so_mode))); + so, level, option_name, optval, optlen, + pr_state(so->so_state, so->so_mode))); /* X/Open requires this check */ @@ -5278,7 +5293,7 @@ sotpi_setsockopt(struct sonode *so, int level, int option_name, &oh, sizeof (oh), optval, optlen, 0, _ALLOC_SLEEP); /* Let option management work in the presence of data flow control */ error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, - MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); + MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); mp = NULL; mutex_enter(&so->so_lock); if (error) { @@ -5384,13 +5399,13 @@ done: case SO_DGRAM_ERRIND: if (intvalue != 0) { dprintso(so, 1, - ("sotpi_setsockopt: setting 0x%x\n", - option_name)); + ("sotpi_setsockopt: setting 0x%x\n", + option_name)); so->so_options |= option_name; } else { dprintso(so, 1, - ("sotpi_setsockopt: clearing 0x%x\n", - option_name)); + ("sotpi_setsockopt: clearing 0x%x\n", + option_name)); so->so_options &= ~option_name; } break; diff --git a/usr/src/uts/common/inet/arp/arp.c b/usr/src/uts/common/inet/arp/arp.c index c2393a2eac..a40ac5a874 100644 --- a/usr/src/uts/common/inet/arp/arp.c +++ b/usr/src/uts/common/inet/arp/arp.c @@ -269,20 +269,20 @@ static arpparam_t arp_param_arr[] = { #define as_broadcast_interval as_param_arr[11].arp_param_value #define as_defend_period as_param_arr[12].arp_param_value -static struct module_info info = { +static struct module_info arp_mod_info = { 0, "arp", 0, INFPSZ, 512, 128 }; -static struct qinit rinit = { - (pfi_t)ar_rput, NULL, ar_open, ar_close, NULL, &info +static struct qinit arprinit = { + (pfi_t)ar_rput, NULL, ar_open, ar_close, NULL, &arp_mod_info }; -static struct qinit winit = { - (pfi_t)ar_wput, (pfi_t)ar_wsrv, ar_open, ar_close, NULL, &info +static struct qinit arpwinit = { + (pfi_t)ar_wput, (pfi_t)ar_wsrv, ar_open, ar_close, NULL, &arp_mod_info }; struct streamtab arpinfo = { - &rinit, &winit + &arprinit, &arpwinit }; /* @@ -2491,15 +2491,15 @@ ar_mapping_add(queue_t *q, mblk_t *mp_orig) } return (ar_ce_create( arl, - arma->arma_proto, - hw_addr, - hw_addr_len, - proto_addr, - proto_addr_len, - proto_mask, - proto_extract_mask, - hw_extract_start, - arma->arma_flags | ACE_F_MAPPING)); + arma->arma_proto, + hw_addr, + hw_addr_len, + proto_addr, + proto_addr_len, + proto_mask, + proto_extract_mask, + hw_extract_start, + arma->arma_flags | ACE_F_MAPPING)); } static boolean_t @@ -2587,17 +2587,18 @@ ar_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) ar->ar_as = as; /* - * Probe for the DLPI info if we are not pushed on IP. Wait for + * Probe for the DLPI info if we are not pushed on IP or UDP. Wait for * the reply. In case of error call ar_close() which will take * care of doing everything required to close this instance, such * as freeing the arl, restarting the timer on a different queue etc. */ - if (strcmp(q->q_next->q_qinfo->qi_minfo->mi_idname, "ip") == 0) { + if (strcmp(q->q_next->q_qinfo->qi_minfo->mi_idname, "ip") == 0 || + strcmp(q->q_next->q_qinfo->qi_minfo->mi_idname, "udp") == 0) { arc_t *arc; /* - * We are pushed directly on top of IP. There is no need to - * send down a DL_INFO_REQ. Return success. This could + * We are pushed directly on top of IP or UDP. There is no need + * to send down a DL_INFO_REQ. Return success. This could * either be an ill stream (i.e. <arp-IP-Driver> stream) * or a stream corresponding to an open of /dev/arp * (i.e. <arp-IP> stream). Note that we don't support @@ -2630,10 +2631,12 @@ ar_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) ASSERT(tmp_q->q_qinfo->qi_minfo != NULL); - if (strcmp(tmp_q->q_qinfo->qi_minfo->mi_idname, "ip") == 0) { + if (strcmp(tmp_q->q_qinfo->qi_minfo->mi_idname, "ip") == 0 || + strcmp(tmp_q->q_qinfo->qi_minfo->mi_idname, "udp") == 0) { /* - * We don't support pushing ARP arbitrarily on an IP driver - * stream. ARP has to be pushed directly above IP. + * We don't support pushing ARP arbitrarily on an IP or UDP + * driver stream. ARP has to be pushed directly above IP or + * UDP. */ (void) ar_close(RD(q)); return (ENOTSUP); @@ -2759,7 +2762,7 @@ ar_plink_send(queue_t *q, mblk_t *mp) if ((q->q_qinfo != NULL) && (q->q_qinfo->qi_minfo != NULL)) { name = arpwq->q_qinfo->qi_minfo->mi_idname; if (name != NULL && name[0] != NULL && - (strcmp(name, info.mi_idname) == 0)) + (strcmp(name, arp_mod_info.mi_idname) == 0)) break; } arpwq = arpwq->q_next; @@ -3194,7 +3197,7 @@ ar_rput(queue_t *q, mblk_t *mp) arl_t *, arl, arh_t *, arh, mblk_t *, mp); ARP_HOOK_IN(as->as_arp_physical_in_event, as->as_arp_physical_in, - arl->arl_index, arh, mp, mp1, as); + arl->arl_index, arh, mp, mp1, as); DTRACE_PROBE1(arp__physical__in__end, mblk_t *, mp); @@ -3502,7 +3505,7 @@ ar_rput_dlpi(queue_t *q, mblk_t *mp) */ if (ap != NULL) { ap->ap_notifies = (dlp->notify_ack.dl_notifications & - DL_NOTE_LINK_UP) != 0; + DL_NOTE_LINK_UP) != 0; } ar_dlpi_done(arl, DL_NOTIFY_REQ); break; @@ -3821,7 +3824,7 @@ ar_snmp_msg2(ace_t *ace, void *arg) ntme.ipNetToMediaPhysAddress.o_length = MIN(OCTET_LENGTH, ace->ace_hw_addr_length); if ((ace->ace_flags & ACE_F_RESOLVED) == 0) - ntme.ipNetToMediaPhysAddress.o_length = 0; + ntme.ipNetToMediaPhysAddress.o_length = 0; bcopy(ace->ace_hw_addr, ntme.ipNetToMediaPhysAddress.o_bytes, ntme.ipNetToMediaPhysAddress.o_length); diff --git a/usr/src/uts/common/inet/arp/arpddi.c b/usr/src/uts/common/inet/arp/arpddi.c index edd5cd4c2a..77ab087cd9 100644 --- a/usr/src/uts/common/inet/arp/arpddi.c +++ b/usr/src/uts/common/inet/arp/arpddi.c @@ -37,8 +37,9 @@ #define INET_NAME "arp" #define INET_MODDESC "ARP STREAMS module %I%" #define INET_DEVDESC "ARP STREAMS driver %I%" -#define INET_DEVMINOR IPV4_MINOR -#define INET_STRTAB arpinfo +#define INET_DEVMINOR 0 +#define INET_DEVSTRTAB ipinfov4 +#define INET_MODSTRTAB arpinfo #define INET_DEVMTFLAGS IP_DEVMTFLAGS /* since as a driver we're ip */ #define INET_MODMTFLAGS (D_MP | D_MTPERMOD) @@ -52,8 +53,6 @@ _init(void) { int error; - INET_BECOME_IP(); - /* * Note: After mod_install succeeds, another thread can enter * therefore all initialization is done before it and any diff --git a/usr/src/uts/common/inet/inetddi.c b/usr/src/uts/common/inet/inetddi.c index e6a0395ba0..69b1ba8c4f 100644 --- a/usr/src/uts/common/inet/inetddi.c +++ b/usr/src/uts/common/inet/inetddi.c @@ -42,46 +42,47 @@ * The symbols that all modules and drivers must define are: * * INET_NAME The name of the module/driver. - * INET_STRTAB The name of the `streamtab' structure. * * The symbols that all modules must define are: * + * INET_MODSTRTAB The name of the `streamtab' structure for this module. * INET_MODDESC The one-line description for this module. * INET_MODMTFLAGS The mt-streams(9F) flags for the module. * * The symbols that all drivers must define are: * + * INET_DEVSTRTAB The name of the `streamtab' structure for this driver. * INET_DEVDESC The one-line description for this driver. * INET_DEVMTFLAGS The mt-streams(9F) flags for the driver. * INET_DEVMINOR The minor number of the driver (usually 0). * * Drivers that need to masquerade as IP should set INET_DEVMTFLAGS to - * IP_DEVMTFLAGS and then call INET_BECOME_IP() in their _init(9E) routine. + * IP_DEVMTFLAGS and set INET_DEVSTRTAB to ipinfo. */ -#if !defined(INET_STRTAB) -#error inetddi.c: INET_STRTAB is not defined! -#elif !defined(INET_NAME) +#if !defined(INET_NAME) #error inetddi.c: INET_NAME is not defined! #elif !defined(INET_DEVDESC) && !defined(INET_MODDESC) #error inetddi.c: at least one of INET_DEVDESC or INET_MODDESC must be defined! +#elif defined(INET_DEVDESC) && !defined(INET_DEVSTRTAB) +#error inetddi.c: INET_DEVDESC is defined but INET_DEVSTRTAB is not! #elif defined(INET_DEVDESC) && !defined(INET_DEVMTFLAGS) #error inetddi.c: INET_DEVDESC is defined but INET_DEVMTFLAGS is not! #elif defined(INET_DEVDESC) && !defined(INET_DEVMINOR) #error inetddi.c: INET_DEVDESC is defined but INET_DEVMINOR is not! +#elif defined(INET_MODDESC) && !defined(INET_MODSTRTAB) +#error inetddi.c: INET_MODDESC is defined but INET_MODSTRTAB is not! #elif defined(INET_MODDESC) && !defined(INET_MODMTFLAGS) #error inetddi.c: INET_MODDESC is defined but INET_MODMTFLAGS is not! #endif -extern struct streamtab INET_STRTAB, ipinfo; - #ifdef INET_DEVDESC +extern struct streamtab INET_DEVSTRTAB; + /* - * This macro is intended to be called from the _init() routine of drivers - * that actually want to be IP. Yes, this is a disgusting, vile hack. + * Drivers that actually want to be IP would set INET_DEVSTRTAB to ipinfo. */ -#define INET_BECOME_IP() (cb_inet_devops.cb_str = &ipinfo) static dev_info_t *inet_dev_info; @@ -167,7 +168,7 @@ inet_info(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result) } DDI_DEFINE_STREAM_OPS(inet_devops, nulldev, nulldev, inet_attach, inet_detach, - nulldev, inet_info, INET_DEVMTFLAGS, &INET_STRTAB); + nulldev, inet_info, INET_DEVMTFLAGS, &INET_DEVSTRTAB); static struct modldrv modldrv = { &mod_driverops, @@ -178,9 +179,11 @@ static struct modldrv modldrv = { #endif /* INET_DEVDESC */ #ifdef INET_MODDESC +extern struct streamtab INET_MODSTRTAB; + static struct fmodsw fsw = { INET_NAME, - &INET_STRTAB, + &INET_MODSTRTAB, INET_MODMTFLAGS }; diff --git a/usr/src/uts/common/inet/ip.h b/usr/src/uts/common/inet/ip.h index 5d10fab0fd..9e828964a4 100644 --- a/usr/src/uts/common/inet/ip.h +++ b/usr/src/uts/common/inet/ip.h @@ -87,12 +87,6 @@ extern "C" { #define SCTP_MOD_NAME "sctp" -/* Minor numbers */ -#define IPV4_MINOR 0 -#define IPV6_MINOR 1 -#define TCP_MINOR 2 -#define TCP_MINOR6 3 - #ifndef _IPADDR_T #define _IPADDR_T typedef uint32_t ipaddr_t; @@ -261,6 +255,8 @@ typedef struct ipoptp_s #define Q_TO_CONN(q) ((conn_t *)(q)->q_ptr) #define Q_TO_TCP(q) (Q_TO_CONN((q))->conn_tcp) #define Q_TO_UDP(q) (Q_TO_CONN((q))->conn_udp) +#define Q_TO_ICMP(q) (Q_TO_CONN((q))->conn_icmp) +#define Q_TO_RTS(q) (Q_TO_CONN((q))->conn_rts) /* * The following two macros are used by IP to get the appropriate @@ -1008,7 +1004,6 @@ typedef struct conn_s conn_t; * * ipc_multicast_loop conn_multicast_loop * ipc_multi_router conn_multi_router - * ipc_priv_stream conn_priv_stream * ipc_draining conn_draining * * ipc_did_putbq conn_did_putbq @@ -3102,7 +3097,10 @@ extern void ip_ire_g_fini(void); extern void ip_ire_g_init(void); extern void ip_ire_fini(ip_stack_t *); extern void ip_ire_init(ip_stack_t *); -extern int ip_open(queue_t *, dev_t *, int, int, cred_t *); +extern int ip_openv4(queue_t *q, dev_t *devp, int flag, int sflag, + cred_t *credp); +extern int ip_openv6(queue_t *q, dev_t *devp, int flag, int sflag, + cred_t *credp); extern int ip_reassemble(mblk_t *, ipf_t *, uint_t, boolean_t, ill_t *, size_t); extern int ip_opt_set_ill(conn_t *, int, boolean_t, boolean_t, @@ -3114,8 +3112,6 @@ extern void ip_rput_dlpi(queue_t *, mblk_t *); extern void ip_rput_forward(ire_t *, ipha_t *, mblk_t *, ill_t *); extern void ip_rput_forward_multicast(ipaddr_t, mblk_t *, ipif_t *); -extern int ip_snmpmod_close(queue_t *); -extern void ip_snmpmod_wput(queue_t *, mblk_t *); extern void ip_mib2_add_ip_stats(mib2_ipIfStatsEntry_t *, mib2_ipIfStatsEntry_t *); extern void ip_mib2_add_icmp6_stats(mib2_ipv6IfIcmpEntry_t *, @@ -3125,7 +3121,7 @@ extern void ip_proto_input(queue_t *, mblk_t *, ipha_t *, ire_t *, ill_t *, boolean_t); extern void ip_rput_other(ipsq_t *, queue_t *, mblk_t *, void *); extern ire_t *ip_check_multihome(void *, ire_t *, ill_t *); -extern void ip_setqinfo(queue_t *, minor_t, boolean_t, ip_stack_t *); +extern void ip_setpktversion(conn_t *, boolean_t, boolean_t, ip_stack_t *); extern void ip_trash_ire_reclaim(void *); extern void ip_trash_timer_expire(void *); extern void ip_wput(queue_t *, mblk_t *); @@ -3151,8 +3147,8 @@ extern ipxmit_state_t ip_xmit_v4(mblk_t *, ire_t *, struct ipsec_out_s *, boolean_t); extern int ip_hdr_complete(ipha_t *, zoneid_t, ip_stack_t *); -extern struct qinit rinit_ipv6; -extern struct qinit winit_ipv6; +extern struct qinit iprinitv6; +extern struct qinit ipwinitv6; extern void conn_drain_insert(conn_t *connp); extern int conn_ipsec_length(conn_t *connp); @@ -3193,7 +3189,7 @@ extern ill_t *ip_grab_attach_ill(ill_t *, mblk_t *, int, boolean_t, ip_stack_t *); extern ire_t *conn_set_outgoing_ill(conn_t *, ire_t *, ill_t **); extern int ipsec_req_from_conn(conn_t *, ipsec_req_t *, int); -extern int ip_snmp_get(queue_t *q, mblk_t *mctl); +extern int ip_snmp_get(queue_t *q, mblk_t *mctl, int level); extern int ip_snmp_set(queue_t *q, int, int, uchar_t *, int); extern void ip_process_ioctl(ipsq_t *, queue_t *, mblk_t *, void *); extern void ip_quiesce_conn(conn_t *); @@ -3393,17 +3389,6 @@ struct ill_dls_capab_s { }; /* - * This message is sent by an upper-layer protocol to tell IP that it knows all - * about labels and will construct them itself. IP takes the slow path and - * recomputes the label on every packet when this isn't true. - */ -#define IP_ULP_OUT_LABELED (('O' << 8) + 'L') -typedef struct out_labeled_s { - uint32_t out_labeled_type; /* OUT_LABELED */ - queue_t *out_qnext; /* intermediate detection */ -} out_labeled_t; - -/* * IP squeues exports */ extern int ip_squeue_profile; diff --git a/usr/src/uts/common/inet/ip/icmp.c b/usr/src/uts/common/inet/ip/icmp.c index e7439595f4..12c5cb59cb 100644 --- a/usr/src/uts/common/inet/ip/icmp.c +++ b/usr/src/uts/common/inet/ip/icmp.c @@ -82,31 +82,32 @@ #include <inet/ip_impl.h> -#define ICMP6 "icmp6" -major_t ICMP6_MAJ; - -/* - * Object to represent database of options to search passed to - * {sock,tpi}optcom_req() interface routine to take care of option - * management and associated methods. - * XXX These and other extern's should really move to a icmp header. - */ -extern optdb_obj_t icmp_opt_obj; -extern uint_t icmp_max_optsize; - /* * Synchronization notes: * - * At all points in this code where exclusive access is required, we - * pass a message to a subroutine by invoking qwriter(..., PERIM_OUTER) - * which will arrange to call the routine only after all threads have - * exited the shared resource. + * RAWIP is MT and uses the usual kernel synchronization primitives. There is + * locks, which is icmp_rwlock. We also use conn_lock when updating things + * which affect the IP classifier lookup. + * The lock order is icmp_rwlock -> conn_lock. + * + * The icmp_rwlock: + * This protects most of the other fields in the icmp_t. The exact list of + * fields which are protected by each of the above locks is documented in + * the icmp_t structure definition. + * + * Plumbing notes: + * ICMP is always a device driver. For compatibility with mibopen() code + * it is possible to I_PUSH "icmp", but that results in pushing a passthrough + * dummy module. */ static void icmp_addr_req(queue_t *q, mblk_t *mp); static void icmp_bind(queue_t *q, mblk_t *mp); static void icmp_bind_proto(queue_t *q); -static int icmp_build_hdrs(queue_t *q, icmp_t *icmp); +static void icmp_bind_result(conn_t *, mblk_t *); +static void icmp_bind_ack(conn_t *, mblk_t *mp); +static void icmp_bind_error(conn_t *, mblk_t *mp); +static int icmp_build_hdrs(icmp_t *icmp); static void icmp_capability_req(queue_t *q, mblk_t *mp); static int icmp_close(queue_t *q); static void icmp_connect(queue_t *q, mblk_t *mp); @@ -118,10 +119,16 @@ static void icmp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive, static void icmp_icmp_error(queue_t *q, mblk_t *mp); static void icmp_icmp_error_ipv6(queue_t *q, mblk_t *mp); static void icmp_info_req(queue_t *q, mblk_t *mp); +static void icmp_input(void *, mblk_t *, void *); static mblk_t *icmp_ip_bind_mp(icmp_t *icmp, t_scalar_t bind_prim, t_scalar_t addr_length, in_port_t); -static int icmp_open(queue_t *q, dev_t *devp, int flag, - int sflag, cred_t *credp); +static int icmp_open(queue_t *q, dev_t *devp, int flag, int sflag, + cred_t *credp, boolean_t isv6); +static int icmp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, + cred_t *credp); +static int icmp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, + cred_t *credp); +static void icmp_output(queue_t *q, mblk_t *mp); static int icmp_unitdata_opt_process(queue_t *q, mblk_t *mp, int *errorp, void *thisdg_attrs); static boolean_t icmp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name); @@ -135,9 +142,6 @@ static int icmp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr); static boolean_t icmp_param_register(IDP *ndp, icmpparam_t *icmppa, int cnt); static int icmp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr); -static void icmp_rput(queue_t *q, mblk_t *mp); -static void icmp_rput_bind_ack(queue_t *q, mblk_t *mp); -static int icmp_snmp_get(queue_t *q, mblk_t *mpctl); static int icmp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr, int len); static int icmp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, @@ -159,20 +163,34 @@ static void rawip_kstat_fini(netstackid_t stackid, kstat_t *ksp); static int rawip_kstat_update(kstat_t *kp, int rw); -static struct module_info info = { +static struct module_info icmp_mod_info = { 5707, "icmp", 1, INFPSZ, 512, 128 }; -static struct qinit rinit = { - (pfi_t)icmp_rput, NULL, icmp_open, icmp_close, NULL, &info +/* + * Entry points for ICMP as a device. + * We have separate open functions for the /dev/icmp and /dev/icmp6 devices. + */ +static struct qinit icmprinitv4 = { + NULL, NULL, icmp_openv4, icmp_close, NULL, &icmp_mod_info +}; + +static struct qinit icmprinitv6 = { + NULL, NULL, icmp_openv6, icmp_close, NULL, &icmp_mod_info }; -static struct qinit winit = { - (pfi_t)icmp_wput, NULL, NULL, NULL, NULL, &info +static struct qinit icmpwinit = { + (pfi_t)icmp_wput, NULL, NULL, NULL, NULL, &icmp_mod_info }; -struct streamtab icmpinfo = { - &rinit, &winit +/* For AF_INET aka /dev/icmp */ +struct streamtab icmpinfov4 = { + &icmprinitv4, &icmpwinit +}; + +/* For AF_INET6 aka /dev/icmp6 */ +struct streamtab icmpinfov6 = { + &icmprinitv6, &icmpwinit }; static sin_t sin_null; /* Zero address for quick clears */ @@ -223,8 +241,7 @@ static icmpparam_t icmp_param_arr[] = { * passed to icmp_wput. * The O_T_BIND_REQ/T_BIND_REQ is passed downstream to ip with the ICMP * protocol type placed in the message following the address. A T_BIND_ACK - * message is passed upstream when ip acknowledges the request. - * (Called as writer.) + * message is returned by ip_bind_v4/v6. */ static void icmp_bind(queue_t *q, mblk_t *mp) @@ -234,8 +251,9 @@ icmp_bind(queue_t *q, mblk_t *mp) mblk_t *mp1; struct T_bind_req *tbr; icmp_t *icmp; + conn_t *connp = Q_TO_CONN(q); - icmp = (icmp_t *)q->q_ptr; + icmp = connp->conn_icmp; if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) { (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, "icmp_bind: bad req, len %u", @@ -315,12 +333,30 @@ icmp_bind(queue_t *q, mblk_t *mp) icmp_err_ack(q, mp, TBADADDR, 0); return; } + + /* + * The state must be TS_UNBND. TPI mandates that users must send + * TPI primitives only 1 at a time and wait for the response before + * sending the next primitive. + */ + rw_enter(&icmp->icmp_rwlock, RW_WRITER); + if (icmp->icmp_state != TS_UNBND || icmp->icmp_pending_op != -1) { + rw_exit(&icmp->icmp_rwlock); + (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, + "icmp_bind: bad state, %d", icmp->icmp_state); + icmp_err_ack(q, mp, TOUTSTATE, 0); + return; + } + + icmp->icmp_pending_op = tbr->PRIM_type; + /* * Copy the source address into our icmp structure. This address * may still be zero; if so, ip will fill in the correct address * each time an outbound packet is passed to it. - * If we are binding to a broadcast or multicast address icmp_rput - * will clear the source address when it receives the T_BIND_ACK. + * If we are binding to a broadcast or multicast address then + * icmp_bind_ack will clear the source address when it receives + * the T_BIND_ACK. */ icmp->icmp_state = TS_IDLE; @@ -342,8 +378,10 @@ icmp_bind(queue_t *q, mblk_t *mp) icmp->icmp_bound_v6src = icmp->icmp_v6src; /* Rebuild the header template */ - error = icmp_build_hdrs(q, icmp); + error = icmp_build_hdrs(icmp); if (error != 0) { + icmp->icmp_pending_op = -1; + rw_exit(&icmp->icmp_rwlock); icmp_err_ack(q, mp, TSYSERR, error); return; } @@ -359,15 +397,27 @@ icmp_bind(queue_t *q, mblk_t *mp) */ mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); if (!mp->b_cont) { + icmp->icmp_pending_op = -1; + rw_exit(&icmp->icmp_rwlock); icmp_err_ack(q, mp, TSYSERR, ENOMEM); return; } mp->b_cont->b_wptr += sizeof (ire_t); mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; } + rw_exit(&icmp->icmp_rwlock); /* Pass the O_T_BIND_REQ/T_BIND_REQ to ip. */ - putnext(q, mp); + if (icmp->icmp_family == AF_INET6) + mp = ip_bind_v6(q, mp, connp, NULL); + else + mp = ip_bind_v4(q, mp, connp); + + /* The above return NULL if the bind needs to be deferred */ + if (mp != NULL) + icmp_bind_result(connp, mp); + else + CONN_INC_REF(connp); } /* @@ -379,8 +429,10 @@ icmp_bind_proto(queue_t *q) mblk_t *mp; struct T_bind_req *tbr; icmp_t *icmp; + conn_t *connp = Q_TO_CONN(q); + + icmp = connp->conn_icmp; - icmp = (icmp_t *)q->q_ptr; mp = allocb(sizeof (struct T_bind_req) + sizeof (sin6_t) + 1, BPRI_MED); if (!mp) { @@ -390,6 +442,8 @@ icmp_bind_proto(queue_t *q) tbr = (struct T_bind_req *)mp->b_rptr; tbr->PRIM_type = O_T_BIND_REQ; /* change to T_BIND_REQ ? */ tbr->ADDR_offset = sizeof (struct T_bind_req); + + rw_enter(&icmp->icmp_rwlock, RW_WRITER); if (icmp->icmp_ipversion == IPV4_VERSION) { sin_t *sin; @@ -411,9 +465,33 @@ icmp_bind_proto(queue_t *q) /* Place protocol type in the O_T_BIND_REQ following the address. */ *mp->b_wptr++ = icmp->icmp_proto; + rw_exit(&icmp->icmp_rwlock); /* Pass the O_T_BIND_REQ to ip. */ - putnext(q, mp); + if (icmp->icmp_family == AF_INET6) + mp = ip_bind_v6(q, mp, connp, NULL); + else + mp = ip_bind_v4(q, mp, connp); + + /* The above return NULL if the bind needs to be deferred */ + if (mp != NULL) + icmp_bind_result(connp, mp); + else + CONN_INC_REF(connp); +} + +/* + * This is called from ip_wput_nondata to handle the results of a + * deferred RAWIP bind. It is called once the bind has been completed. + */ +void +rawip_resume_bind(conn_t *connp, mblk_t *mp) +{ + ASSERT(connp != NULL && IPCL_IS_RAWIP(connp)); + + icmp_bind_result(connp, mp); + + CONN_OPER_PENDING_DONE(connp); } /* @@ -426,11 +504,11 @@ icmp_bind_proto(queue_t *q) * T_OK_ACK - for the T_CONN_REQ * T_CONN_CON - to keep the TPI user happy * - * The connect completes in icmp_rput. + * The connect completes in icmp_bind_result. * When a T_BIND_ACK is received information is extracted from the IRE * and the two appended messages are sent to the TPI user. - * Should icmp_rput receive T_ERROR_ACK for the T_BIND_REQ it will convert - * it to an error ack for the appropriate primitive. + * Should icmp_bind_result receive T_ERROR_ACK for the T_BIND_REQ it will + * convert it to an error ack for the appropriate primitive. */ static void icmp_connect(queue_t *q, mblk_t *mp) @@ -443,26 +521,21 @@ icmp_connect(queue_t *q, mblk_t *mp) ipaddr_t v4dst; in6_addr_t v6dst; uint32_t flowinfo; + conn_t *connp = Q_TO_CONN(q); - icmp = (icmp_t *)q->q_ptr; + icmp = connp->conn_icmp; tcr = (struct T_conn_req *)mp->b_rptr; /* Sanity checks */ - if ((mp->b_wptr - mp->b_rptr < sizeof (struct T_conn_req))) { + if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_conn_req)) { icmp_err_ack(q, mp, TPROTO, 0); return; } - if (icmp->icmp_state == TS_DATA_XFER) { - /* Already connected - clear out state */ - icmp->icmp_v6src = icmp->icmp_bound_v6src; - icmp->icmp_state = TS_IDLE; - } - - if (tcr->OPT_length != 0) { icmp_err_ack(q, mp, TBADOPT, 0); return; } + switch (tcr->DEST_length) { default: icmp_err_ack(q, mp, TBADADDR, 0); @@ -561,6 +634,22 @@ icmp_connect(queue_t *q, mblk_t *mp) */ } + rw_enter(&icmp->icmp_rwlock, RW_WRITER); + if (icmp->icmp_state == TS_UNBND || icmp->icmp_pending_op != -1) { + rw_exit(&icmp->icmp_rwlock); + (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, + "icmp_connect: bad state, %d", icmp->icmp_state); + icmp_err_ack(q, mp, TOUTSTATE, 0); + return; + } + icmp->icmp_pending_op = T_CONN_REQ; + + if (icmp->icmp_state == TS_DATA_XFER) { + /* Already connected - clear out state */ + icmp->icmp_v6src = icmp->icmp_bound_v6src; + icmp->icmp_state = TS_IDLE; + } + /* * Send down bind to IP to verify that there is a route * and to determine the source address. @@ -575,13 +664,15 @@ icmp_connect(queue_t *q, mblk_t *mp) sin6->sin6_port); } if (mp1 == NULL) { + icmp->icmp_pending_op = -1; + rw_exit(&icmp->icmp_rwlock); icmp_err_ack(q, mp, TSYSERR, ENOMEM); return; } /* * We also have to send a connection confirmation to - * keep TLI happy. Prepare it for icmp_rput. + * keep TLI happy. Prepare it for icmp_bind_result. */ if (icmp->icmp_family == AF_INET) { mp2 = mi_tpi_conn_con(NULL, (char *)sin, sizeof (*sin), NULL, @@ -593,6 +684,8 @@ icmp_connect(queue_t *q, mblk_t *mp) } if (mp2 == NULL) { freemsg(mp1); + icmp->icmp_pending_op = -1; + rw_exit(&icmp->icmp_rwlock); icmp_err_ack(q, mp, TSYSERR, ENOMEM); return; } @@ -601,32 +694,36 @@ icmp_connect(queue_t *q, mblk_t *mp) if (mp == NULL) { /* Unable to reuse the T_CONN_REQ for the ack. */ freemsg(mp2); + icmp->icmp_pending_op = -1; + rw_exit(&icmp->icmp_rwlock); icmp_err_ack_prim(q, mp1, T_CONN_REQ, TSYSERR, ENOMEM); return; } icmp->icmp_state = TS_DATA_XFER; + rw_exit(&icmp->icmp_rwlock); /* Hang onto the T_OK_ACK and T_CONN_CON for later. */ linkb(mp1, mp); linkb(mp1, mp2); - mblk_setcred(mp1, icmp->icmp_credp); - putnext(q, mp1); + mblk_setcred(mp1, connp->conn_cred); + if (icmp->icmp_family == AF_INET) + mp1 = ip_bind_v4(q, mp1, connp); + else + mp1 = ip_bind_v6(q, mp1, connp, NULL); + + /* The above return NULL if the bind needs to be deferred */ + if (mp1 != NULL) + icmp_bind_result(connp, mp1); + else + CONN_INC_REF(connp); } -static int -icmp_close(queue_t *q) +static void +icmp_close_free(conn_t *connp) { - icmp_t *icmp = (icmp_t *)q->q_ptr; - int i1; - icmp_stack_t *is = icmp->icmp_is; - - /* tell IP that if we're not here, he can't trust labels */ - if (is_system_labeled()) - putnext(WR(q), icmp->icmp_delabel); - - qprocsoff(q); + icmp_t *icmp = connp->conn_icmp; /* If there are any options associated with the stream, free them. */ if (icmp->icmp_ip_snd_options) @@ -642,16 +739,41 @@ icmp_close(queue_t *q) icmp->icmp_sticky_hdrs = NULL; icmp->icmp_sticky_hdrs_len = 0; } - ip6_pkt_free(&icmp->icmp_sticky_ipp); +} + +static int +icmp_close(queue_t *q) +{ + conn_t *connp = (conn_t *)q->q_ptr; + + ASSERT(connp != NULL && IPCL_IS_RAWIP(connp)); + + ip_quiesce_conn(connp); + + qprocsoff(connp->conn_rq); + + icmp_close_free(connp); + + /* + * Now we are truly single threaded on this stream, and can + * delete the things hanging off the connp, and finally the connp. + * We removed this connp from the fanout list, it cannot be + * accessed thru the fanouts, and we already waited for the + * conn_ref to drop to 0. We are already in close, so + * there cannot be any other thread from the top. qprocsoff + * has completed, and service has completed or won't run in + * future. + */ + ASSERT(connp->conn_ref == 1); - crfree(icmp->icmp_credp); - netstack_rele(icmp->icmp_is->is_netstack); + inet_minor_free(ip_minor_arena, connp->conn_dev); - /* Free the icmp structure and release the minor device number. */ - i1 = mi_close_comm(&is->is_head, q); + connp->conn_ref--; + ipcl_conn_destroy(connp); - return (i1); + q->q_ptr = WR(q)->q_ptr = NULL; + return (0); } /* @@ -664,25 +786,28 @@ icmp_close(queue_t *q) * T_BIND_REQ - specifying just the local address. * T_OK_ACK - for the T_DISCON_REQ * - * The disconnect completes in icmp_rput. + * The disconnect completes in icmp_bind_result. * When a T_BIND_ACK is received the appended T_OK_ACK is sent to the TPI user. - * Should icmp_rput receive T_ERROR_ACK for the T_BIND_REQ it will convert - * it to an error ack for the appropriate primitive. + * Should icmp_bind_result receive T_ERROR_ACK for the T_BIND_REQ it will + * convert it to an error ack for the appropriate primitive. */ static void icmp_disconnect(queue_t *q, mblk_t *mp) { icmp_t *icmp; mblk_t *mp1; + conn_t *connp = Q_TO_CONN(q); - icmp = (icmp_t *)q->q_ptr; - - if (icmp->icmp_state != TS_DATA_XFER) { + icmp = connp->conn_icmp; + rw_enter(&icmp->icmp_rwlock, RW_WRITER); + if (icmp->icmp_state != TS_DATA_XFER || icmp->icmp_pending_op != -1) { + rw_exit(&icmp->icmp_rwlock); (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, "icmp_disconnect: bad state, %d", icmp->icmp_state); icmp_err_ack(q, mp, TOUTSTATE, 0); return; } + icmp->icmp_pending_op = T_DISCON_REQ; icmp->icmp_v6src = icmp->icmp_bound_v6src; icmp->icmp_state = TS_IDLE; @@ -697,12 +822,16 @@ icmp_disconnect(queue_t *q, mblk_t *mp) mp1 = icmp_ip_bind_mp(icmp, O_T_BIND_REQ, sizeof (sin6_t), 0); } if (mp1 == NULL) { + icmp->icmp_pending_op = -1; + rw_exit(&icmp->icmp_rwlock); icmp_err_ack(q, mp, TSYSERR, ENOMEM); return; } mp = mi_tpi_ok_ack_alloc(mp); if (mp == NULL) { /* Unable to reuse the T_DISCON_REQ for the ack. */ + icmp->icmp_pending_op = -1; + rw_exit(&icmp->icmp_rwlock); icmp_err_ack_prim(q, mp1, T_DISCON_REQ, TSYSERR, ENOMEM); return; } @@ -711,18 +840,30 @@ icmp_disconnect(queue_t *q, mblk_t *mp) int error; /* Rebuild the header template */ - error = icmp_build_hdrs(q, icmp); + error = icmp_build_hdrs(icmp); if (error != 0) { + icmp->icmp_pending_op = -1; + rw_exit(&icmp->icmp_rwlock); icmp_err_ack_prim(q, mp, T_DISCON_REQ, TSYSERR, error); freemsg(mp1); return; } } - icmp->icmp_discon_pending = 1; - /* Append the T_OK_ACK to the T_BIND_REQ for icmp_rput */ + rw_exit(&icmp->icmp_rwlock); + /* Append the T_OK_ACK to the T_BIND_REQ for icmp_bind_result */ linkb(mp1, mp); - putnext(q, mp1); + + if (icmp->icmp_family == AF_INET6) + mp1 = ip_bind_v6(q, mp1, connp, NULL); + else + mp1 = ip_bind_v4(q, mp1, connp); + + /* The above return NULL if the bind needs to be deferred */ + if (mp1 != NULL) + icmp_bind_result(connp, mp1); + else + CONN_INC_REF(connp); } /* This routine creates a T_ERROR_ACK message and passes it upstream. */ @@ -751,7 +892,7 @@ icmp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive, } /* - * icmp_icmp_error is called by icmp_rput to process ICMP + * icmp_icmp_error is called by icmp_input to process ICMP * messages passed up by IP. * Generates the appropriate T_UDERROR_IND for permanent * (non-transient) errors. @@ -768,19 +909,12 @@ icmp_icmp_error(queue_t *q, mblk_t *mp) sin6_t sin6; mblk_t *mp1; int error = 0; - icmp_t *icmp = (icmp_t *)q->q_ptr; - - /* - * Deliver T_UDERROR_IND when the application has asked for it. - * The socket layer enables this automatically when connected. - */ - if (!icmp->icmp_dgram_errind) { - freemsg(mp); - return; - } + icmp_t *icmp = Q_TO_ICMP(q); ipha = (ipha_t *)mp->b_rptr; + ASSERT(OK_32PTR(mp->b_rptr)); + if (IPH_HDR_VERSION(ipha) != IPV4_VERSION) { ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION); icmp_icmp_error_ipv6(q, mp); @@ -788,6 +922,7 @@ icmp_icmp_error(queue_t *q, mblk_t *mp) } ASSERT(IPH_HDR_VERSION(ipha) == IPV4_VERSION); + /* Skip past the outer IP and ICMP headers */ iph_hdr_length = IPH_HDR_LENGTH(ipha); icmph = (icmph_t *)(&mp->b_rptr[iph_hdr_length]); ipha = (ipha_t *)&icmph[1]; @@ -799,7 +934,6 @@ icmp_icmp_error(queue_t *q, mblk_t *mp) case ICMP_FRAGMENTATION_NEEDED: /* * IP has already adjusted the path MTU. - * XXX Somehow pass MTU indication to application? */ break; case ICMP_PORT_UNREACHABLE: @@ -820,6 +954,15 @@ icmp_icmp_error(queue_t *q, mblk_t *mp) return; } + /* + * Deliver T_UDERROR_IND when the application has asked for it. + * The socket layer enables this automatically when connected. + */ + if (!icmp->icmp_dgram_errind) { + freemsg(mp); + return; + } + switch (icmp->icmp_family) { case AF_INET: sin = sin_null; @@ -859,7 +1002,7 @@ icmp_icmp_error_ipv6(queue_t *q, mblk_t *mp) sin6_t sin6; mblk_t *mp1; int error = 0; - icmp_t *icmp = (icmp_t *)q->q_ptr; + icmp_t *icmp = Q_TO_ICMP(q); outer_ip6h = (ip6_t *)mp->b_rptr; if (outer_ip6h->ip6_nxt != IPPROTO_ICMPV6) @@ -873,14 +1016,7 @@ icmp_icmp_error_ipv6(queue_t *q, mblk_t *mp) freemsg(mp); return; } - if (*nexthdrp != icmp->icmp_proto) { - /* - * Could have switched icmp_proto after while ip did fanout of - * this message - */ - freemsg(mp); - return; - } + switch (icmp6->icmp6_type) { case ICMP6_DST_UNREACH: switch (icmp6->icmp6_code) { @@ -918,7 +1054,7 @@ icmp_icmp_error_ipv6(queue_t *q, mblk_t *mp) udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t) + opt_length; if ((newmp = allocb(udi_size, BPRI_MED)) == NULL) { - BUMP_MIB(&icmp->icmp_rawip_mib, rawipInErrors); + BUMP_MIB(&icmp->icmp_is->is_rawip_mib, rawipInErrors); break; } @@ -977,6 +1113,15 @@ icmp_icmp_error_ipv6(queue_t *q, mblk_t *mp) return; } + /* + * Deliver T_UDERROR_IND when the application has asked for it. + * The socket layer enables this automatically when connected. + */ + if (!icmp->icmp_dgram_errind) { + freemsg(mp); + return; + } + sin6 = sin6_null; sin6.sin6_family = AF_INET6; sin6.sin6_addr = ip6h->ip6_dst; @@ -999,7 +1144,7 @@ icmp_icmp_error_ipv6(queue_t *q, mblk_t *mp) static void icmp_addr_req(queue_t *q, mblk_t *mp) { - icmp_t *icmp = (icmp_t *)q->q_ptr; + icmp_t *icmp = Q_TO_ICMP(q); mblk_t *ackmp; struct T_addr_ack *taa; @@ -1017,7 +1162,7 @@ icmp_addr_req(queue_t *q, mblk_t *mp) taa->PRIM_type = T_ADDR_ACK; ackmp->b_datap->db_type = M_PCPROTO; - + rw_enter(&icmp->icmp_rwlock, RW_READER); /* * Note: Following code assumes 32 bit alignment of basic * data structures like sin_t and struct T_addr_ack. @@ -1075,6 +1220,7 @@ icmp_addr_req(queue_t *q, mblk_t *mp) ackmp->b_wptr = (uchar_t *)&sin6[1]; } } + rw_exit(&icmp->icmp_rwlock); ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim); qreply(q, ackmp); } @@ -1101,14 +1247,14 @@ icmp_copy_info(struct T_info_ack *tap, icmp_t *icmp) static void icmp_capability_req(queue_t *q, mblk_t *mp) { - icmp_t *icmp = (icmp_t *)q->q_ptr; + icmp_t *icmp = Q_TO_ICMP(q); t_uscalar_t cap_bits1; struct T_capability_ack *tcap; cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1; mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack), - mp->b_datap->db_type, T_CAPABILITY_ACK); + mp->b_datap->db_type, T_CAPABILITY_ACK); if (!mp) return; @@ -1131,7 +1277,7 @@ icmp_capability_req(queue_t *q, mblk_t *mp) static void icmp_info_req(queue_t *q, mblk_t *mp) { - icmp_t *icmp = (icmp_t *)q->q_ptr; + icmp_t *icmp = Q_TO_ICMP(q); mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO, T_INFO_ACK); @@ -1189,7 +1335,7 @@ icmp_ip_bind_mp(icmp_t *icmp, t_scalar_t bind_prim, t_scalar_t addr_length, sin6_t *sin6; ASSERT(bind_prim == O_T_BIND_REQ || bind_prim == T_BIND_REQ); - + ASSERT(RW_LOCK_HELD(&icmp->icmp_rwlock)); mp = allocb(sizeof (*tbr) + addr_length + 1, BPRI_HI); if (mp == NULL) return (NULL); @@ -1279,67 +1425,44 @@ icmp_ip_bind_mp(icmp_t *icmp, t_scalar_t bind_prim, t_scalar_t addr_length, return (mp); } -/* ARGSUSED */ -static void -dummy_func(void *arg) +/* For /dev/icmp aka AF_INET open */ +static int +icmp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) { + return (icmp_open(q, devp, flag, sflag, credp, B_FALSE)); } -static mblk_t * -alloc_wait(queue_t *q, size_t len, int pri, int *errp) +/* For /dev/icmp6 aka AF_INET6 open */ +static int +icmp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) { - mblk_t *mp; - bufcall_id_t id; - int retv; - - while ((mp = allocb(len, pri)) == NULL) { - id = qbufcall(q, len, pri, dummy_func, NULL); - if (id == 0) { - *errp = ENOMEM; - break; - } - retv = qwait_sig(q); - qunbufcall(q, id); - if (retv == 0) { - *errp = EINTR; - break; - } - } - if (mp != NULL) - mp->b_wptr += len; - return (mp); + return (icmp_open(q, devp, flag, sflag, credp, B_TRUE)); } /* * This is the open routine for icmp. It allocates a icmp_t structure for * the stream and, on the first open of the module, creates an ND table. */ +/*ARGSUSED2*/ static int -icmp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) +icmp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp, + boolean_t isv6) { int err; icmp_t *icmp; - mblk_t *mp; - out_labeled_t *olp; + conn_t *connp; + dev_t conn_dev; + zoneid_t zoneid; netstack_t *ns; icmp_stack_t *is; - zoneid_t zoneid; /* If the stream is already open, return immediately. */ if (q->q_ptr != NULL) return (0); - /* If this is not a push of icmp as a module, fail. */ - if (sflag != MODOPEN) + if (sflag == MODOPEN) return (EINVAL); - /* - * Defer the qprocson until everything is initialized since - * we are D_MTPERQ and after qprocson the rput routine can - * run. (Could do qprocson earlier since icmp currently - * has an outer perimeter.) - */ - ns = netstack_find_by_cred(credp); ASSERT(ns != NULL); is = ns->netstack_icmp; @@ -1349,56 +1472,53 @@ icmp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) * For exclusive stacks we set the zoneid to zero * to make ICMP operate as if in the global zone. */ - if (is->is_netstack->netstack_stackid != GLOBAL_NETSTACKID) + if (ns->netstack_stackid != GLOBAL_NETSTACKID) zoneid = GLOBAL_ZONEID; else zoneid = crgetzoneid(credp); - /* - * Create a icmp_t structure for this stream and link into the - * list of open streams. - */ - err = mi_open_comm(&is->is_head, sizeof (icmp_t), q, devp, - flag, sflag, credp); - if (err != 0) { - netstack_rele(is->is_netstack); - return (err); + if ((conn_dev = inet_minor_alloc(ip_minor_arena)) == 0) { + netstack_rele(ns); + return (EBUSY); } + *devp = makedevice(getemajor(*devp), (minor_t)conn_dev); + + connp = ipcl_conn_create(IPCL_RAWIPCONN, KM_SLEEP, ns); + connp->conn_dev = conn_dev; + icmp = connp->conn_icmp; /* - * The receive hiwat is only looked at on the stream head queue. - * Store in q_hiwat in order to return on SO_RCVBUF getsockopts. + * ipcl_conn_create did a netstack_hold. Undo the hold that was + * done by netstack_find_by_cred() */ - q->q_hiwat = is->is_recv_hiwat; - - /* Set the initial state of the stream and the privilege status. */ - icmp = (icmp_t *)q->q_ptr; - icmp->icmp_state = TS_UNBND; - icmp->icmp_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; - icmp->icmp_multicast_loop = IP_DEFAULT_MULTICAST_LOOP; - icmp->icmp_filter = NULL; - - icmp->icmp_credp = credp; - crhold(credp); + netstack_rele(ns); /* - * If the caller has the process-wide flag set, then default to MAC - * exempt mode. This allows read-down to unlabeled hosts. + * Initialize the icmp_t structure for this stream. */ - if (getpflags(NET_MAC_AWARE, credp) != 0) - icmp->icmp_mac_exempt = B_TRUE; + q->q_ptr = connp; + WR(q)->q_ptr = connp; + connp->conn_rq = q; + connp->conn_wq = WR(q); - icmp->icmp_zoneid = zoneid; - icmp->icmp_is = is; + rw_enter(&icmp->icmp_rwlock, RW_WRITER); + ASSERT(connp->conn_ulp == IPPROTO_ICMP); + ASSERT(connp->conn_icmp == icmp); + ASSERT(icmp->icmp_connp == connp); - if (getmajor(*devp) == (major_t)ICMP6_MAJ) { + /* Set the initial state of the stream and the privilege status. */ + icmp->icmp_state = TS_UNBND; + if (isv6) { icmp->icmp_ipversion = IPV6_VERSION; icmp->icmp_family = AF_INET6; + connp->conn_ulp = IPPROTO_ICMPV6; /* May be changed by a SO_PROTOTYPE socket option. */ icmp->icmp_proto = IPPROTO_ICMPV6; icmp->icmp_checksum_off = 2; /* Offset for icmp6_cksum */ icmp->icmp_max_hdr_len = IPV6_HDR_LEN; icmp->icmp_ttl = (uint8_t)is->is_ipv6_hoplimit; + connp->conn_af_isv6 = B_TRUE; + connp->conn_flags |= IPCL_ISV6; } else { icmp->icmp_ipversion = IPV4_VERSION; icmp->icmp_family = AF_INET; @@ -1406,69 +1526,56 @@ icmp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) icmp->icmp_proto = IPPROTO_ICMP; icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH; icmp->icmp_ttl = (uint8_t)is->is_ipv4_ttl; + connp->conn_af_isv6 = B_FALSE; + connp->conn_flags &= ~IPCL_ISV6; } - qprocson(q); + icmp->icmp_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; + icmp->icmp_pending_op = -1; + connp->conn_multicast_loop = IP_DEFAULT_MULTICAST_LOOP; + connp->conn_zoneid = zoneid; /* - * Check if icmp is being I_PUSHed by a non-privileged user. - * If so, we set icmp_restricted to indicate that only MIB - * traffic may pass. + * If the caller has the process-wide flag set, then default to MAC + * exempt mode. This allows read-down to unlabeled hosts. */ - if (secpolicy_net_icmpaccess(credp) != 0) { - icmp->icmp_restricted = 1; - } + if (getpflags(NET_MAC_AWARE, credp) != 0) + icmp->icmp_mac_exempt = B_TRUE; - /* - * The transmit hiwat is only looked at on IP's queue. - * Store in q_hiwat in order to return on SO_SNDBUF - * getsockopts. - */ + connp->conn_ulp_labeled = is_system_labeled(); + + icmp->icmp_is = is; + + q->q_hiwat = is->is_recv_hiwat; WR(q)->q_hiwat = is->is_xmit_hiwat; - WR(q)->q_next->q_hiwat = WR(q)->q_hiwat; WR(q)->q_lowat = is->is_xmit_lowat; - WR(q)->q_next->q_lowat = WR(q)->q_lowat; + + connp->conn_recv = icmp_input; + crhold(credp); + connp->conn_cred = credp; + + mutex_enter(&connp->conn_lock); + connp->conn_state_flags &= ~CONN_INCIPIENT; + mutex_exit(&connp->conn_lock); + + qprocson(q); if (icmp->icmp_family == AF_INET6) { /* Build initial header template for transmit */ - err = icmp_build_hdrs(q, icmp); - if (err != 0) - goto open_error; + if ((err = icmp_build_hdrs(icmp)) != 0) { + rw_exit(&icmp->icmp_rwlock); + qprocsoff(q); + ipcl_conn_destroy(connp); + return (err); + } } + rw_exit(&icmp->icmp_rwlock); + /* Set the Stream head write offset. */ (void) mi_set_sth_wroff(q, icmp->icmp_max_hdr_len + is->is_wroff_extra); (void) mi_set_sth_hiwat(q, q->q_hiwat); - if (is_system_labeled()) { - /* notify IP that we know about labeling */ - mp = alloc_wait(q, sizeof (*olp), BPRI_MED, &err); - if (mp == NULL) - goto open_error; - mp->b_datap->db_type = M_CTL; - olp = (out_labeled_t *)mp->b_rptr; - olp->out_labeled_type = IP_ULP_OUT_LABELED; - olp->out_qnext = WR(q)->q_next; - putnext(WR(q), mp); - - /* save off a copy for closing */ - mp = alloc_wait(q, sizeof (*olp), BPRI_MED, &err); - if (mp == NULL) - goto open_error; - mp->b_datap->db_type = M_CTL; - olp = (out_labeled_t *)mp->b_rptr; - olp->out_labeled_type = IP_ULP_OUT_LABELED; - olp->out_qnext = NULL; - icmp->icmp_delabel = mp; - } - return (0); - -open_error: - qprocsoff(q); - crfree(credp); - (void) mi_close_comm(&is->is_head, q); - netstack_rele(is->is_netstack); - return (err); } /* @@ -1489,7 +1596,7 @@ icmp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name) int icmp_opt_default(queue_t *q, int level, int name, uchar_t *ptr) { - icmp_t *icmp = (icmp_t *)q->q_ptr; + icmp_t *icmp = Q_TO_ICMP(q); icmp_stack_t *is = icmp->icmp_is; int *i1 = (int *)ptr; @@ -1534,12 +1641,13 @@ icmp_opt_default(queue_t *q, int level, int name, uchar_t *ptr) * It returns the size of the option retrieved. */ int -icmp_opt_get(queue_t *q, int level, int name, uchar_t *ptr) +icmp_opt_get_locked(queue_t *q, int level, int name, uchar_t *ptr) { - icmp_t *icmp = (icmp_t *)q->q_ptr; + conn_t *connp = Q_TO_CONN(q); + icmp_t *icmp = connp->conn_icmp; + icmp_stack_t *is = icmp->icmp_is; int *i1 = (int *)ptr; ip6_pkt_t *ipp = &icmp->icmp_sticky_ipp; - icmp_stack_t *is = icmp->icmp_is; switch (level) { case SOL_SOCKET: @@ -1635,7 +1743,7 @@ icmp_opt_get(queue_t *q, int level, int name, uchar_t *ptr) *(uchar_t *)ptr = icmp->icmp_multicast_ttl; return (sizeof (uchar_t)); case IP_MULTICAST_LOOP: - *ptr = icmp->icmp_multicast_loop; + *ptr = connp->conn_multicast_loop; return (sizeof (uint8_t)); case IP_BOUND_IF: /* Zero if not set */ @@ -1712,7 +1820,7 @@ icmp_opt_get(queue_t *q, int level, int name, uchar_t *ptr) *i1 = icmp->icmp_multicast_ttl; break; case IPV6_MULTICAST_LOOP: - *i1 = icmp->icmp_multicast_loop; + *i1 = connp->conn_multicast_loop; break; case IPV6_BOUND_IF: /* Zero if not set */ @@ -1834,8 +1942,8 @@ icmp_opt_get(queue_t *q, int level, int name, uchar_t *ptr) return (0); return (ip_fill_mtuinfo(&icmp->icmp_v6dst, 0, - (struct ip6_mtuinfo *)ptr, - is->is_netstack)); + (struct ip6_mtuinfo *)ptr, + is->is_netstack)); case IPV6_TCLASS: if (ipp->ipp_fields & IPPF_TCLASS) *i1 = ipp->ipp_tclass; @@ -1876,14 +1984,32 @@ icmp_opt_get(queue_t *q, int level, int name, uchar_t *ptr) return (sizeof (int)); } +/* + * This routine retrieves the current status of socket options. + * It returns the size of the option retrieved. + */ +int +icmp_opt_get(queue_t *q, int level, int name, uchar_t *ptr) +{ + icmp_t *icmp = Q_TO_ICMP(q); + int err; + + rw_enter(&icmp->icmp_rwlock, RW_READER); + err = icmp_opt_get_locked(q, level, name, ptr); + rw_exit(&icmp->icmp_rwlock); + return (err); +} + + /* This routine sets socket options. */ /* ARGSUSED */ int -icmp_opt_set(queue_t *q, uint_t optset_context, int level, int name, +icmp_opt_set_locked(queue_t *q, uint_t optset_context, int level, int name, uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, void *thisdg_attrs, cred_t *cr, mblk_t *mblk) { - icmp_t *icmp = (icmp_t *)q->q_ptr; + conn_t *connp = Q_TO_CONN(q); + icmp_t *icmp = connp->conn_icmp; icmp_stack_t *is = icmp->icmp_is; int *i1 = (int *)invalp; boolean_t onoff = (*i1 == 0) ? 0 : 1; @@ -2000,7 +2126,7 @@ icmp_opt_set(queue_t *q, uint_t optset_context, int level, int name, } /* Rebuild the header template */ - error = icmp_build_hdrs(q, icmp); + error = icmp_build_hdrs(icmp); if (error != 0) { *outlenp = 0; return (error); @@ -2010,13 +2136,18 @@ icmp_opt_set(queue_t *q, uint_t optset_context, int level, int name, * For SCTP, we don't use icmp_bind_proto() for * raw socket binding. Note that we do not need * to set *outlenp. + * FIXME: how does SCTP work? */ if (icmp->icmp_proto == IPPROTO_SCTP) return (0); - icmp_bind_proto(q); *outlenp = sizeof (int); *(int *)outvalp = *i1 & 0xFF; + + /* Drop lock across the bind operation */ + rw_exit(&icmp->icmp_rwlock); + icmp_bind_proto(q); + rw_enter(&icmp->icmp_rwlock, RW_WRITER); return (0); case SO_REUSEADDR: if (!checkonly) @@ -2047,7 +2178,6 @@ icmp_opt_set(queue_t *q, uint_t optset_context, int level, int name, } if (!checkonly) { q->q_hiwat = *i1; - q->q_next->q_hiwat = *i1; } break; case SO_RCVBUF: @@ -2057,7 +2187,9 @@ icmp_opt_set(queue_t *q, uint_t optset_context, int level, int name, } if (!checkonly) { RD(q)->q_hiwat = *i1; + rw_exit(&icmp->icmp_rwlock); (void) mi_set_sth_hiwat(RD(q), *i1); + rw_enter(&icmp->icmp_rwlock, RW_WRITER); } break; case SO_DGRAM_ERRIND: @@ -2125,8 +2257,10 @@ icmp_opt_set(queue_t *q, uint_t optset_context, int level, int name, icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + icmp->icmp_ip_snd_options_len; + rw_exit(&icmp->icmp_rwlock); (void) mi_set_sth_wroff(RD(q), icmp->icmp_max_hdr_len + - is->is_wroff_extra); + is->is_wroff_extra); + rw_enter(&icmp->icmp_rwlock, RW_WRITER); break; case IP_HDRINCL: if (!checkonly) @@ -2157,7 +2291,7 @@ icmp_opt_set(queue_t *q, uint_t optset_context, int level, int name, break; case IP_MULTICAST_LOOP: if (!checkonly) { - icmp->icmp_multicast_loop = + connp->conn_multicast_loop = (*invalp == 0) ? 0 : 1; } break; @@ -2176,8 +2310,10 @@ icmp_opt_set(queue_t *q, uint_t optset_context, int level, int name, case IP_RECVIF: if (!checkonly) icmp->icmp_recvif = onoff; - break; - + /* + * pass to ip + */ + return (-EINVAL); case IP_PKTINFO: { /* * This also handles IP_RECVPKTINFO. @@ -2307,7 +2443,7 @@ icmp_opt_set(queue_t *q, uint_t optset_context, int level, int name, ipp->ipp_fields |= IPPF_UNICAST_HOPS; } /* Rebuild the header template */ - error = icmp_build_hdrs(q, icmp); + error = icmp_build_hdrs(icmp); if (error != 0) { *outlenp = 0; return (error); @@ -2342,7 +2478,7 @@ icmp_opt_set(queue_t *q, uint_t optset_context, int level, int name, return (EINVAL); } if (!checkonly) - icmp->icmp_multicast_loop = *i1; + connp->conn_multicast_loop = *i1; break; case IPV6_CHECKSUM: /* @@ -2372,7 +2508,7 @@ icmp_opt_set(queue_t *q, uint_t optset_context, int level, int name, ipp->ipp_fields |= IPPF_RAW_CKSUM; } /* Rebuild the header template */ - error = icmp_build_hdrs(q, icmp); + error = icmp_build_hdrs(icmp); if (error != 0) { *outlenp = 0; return (error); @@ -2476,7 +2612,7 @@ icmp_opt_set(queue_t *q, uint_t optset_context, int level, int name, ipp->ipp_fields &= ~IPPF_ADDR; } if (sticky) { - error = icmp_build_hdrs(q, icmp); + error = icmp_build_hdrs(icmp); if (error != 0) return (error); } @@ -2530,7 +2666,7 @@ icmp_opt_set(queue_t *q, uint_t optset_context, int level, int name, ipp->ipp_fields |= IPPF_TCLASS; } if (sticky) { - error = icmp_build_hdrs(q, icmp); + error = icmp_build_hdrs(icmp); if (error != 0) return (error); } @@ -2563,7 +2699,7 @@ icmp_opt_set(queue_t *q, uint_t optset_context, int level, int name, ipp->ipp_fields &= ~IPPF_NEXTHOP; } if (sticky) { - error = icmp_build_hdrs(q, icmp); + error = icmp_build_hdrs(icmp); if (error != 0) return (error); } @@ -2593,7 +2729,7 @@ icmp_opt_set(queue_t *q, uint_t optset_context, int level, int name, ipp->ipp_fields |= IPPF_HOPOPTS; } if (sticky) { - error = icmp_build_hdrs(q, icmp); + error = icmp_build_hdrs(icmp); if (error != 0) return (error); } @@ -2632,7 +2768,7 @@ icmp_opt_set(queue_t *q, uint_t optset_context, int level, int name, ipp->ipp_fields |= IPPF_RTDSTOPTS; } if (sticky) { - error = icmp_build_hdrs(q, icmp); + error = icmp_build_hdrs(icmp); if (error != 0) return (error); } @@ -2671,7 +2807,7 @@ icmp_opt_set(queue_t *q, uint_t optset_context, int level, int name, ipp->ipp_fields |= IPPF_DSTOPTS; } if (sticky) { - error = icmp_build_hdrs(q, icmp); + error = icmp_build_hdrs(icmp); if (error != 0) return (error); } @@ -2710,7 +2846,7 @@ icmp_opt_set(queue_t *q, uint_t optset_context, int level, int name, ipp->ipp_fields |= IPPF_RTHDR; } if (sticky) { - error = icmp_build_hdrs(q, icmp); + error = icmp_build_hdrs(icmp); if (error != 0) return (error); } @@ -2823,6 +2959,24 @@ icmp_opt_set(queue_t *q, uint_t optset_context, int level, int name, *outlenp = inlen; return (0); } +/* This routine sets socket options. */ +/* ARGSUSED */ +int +icmp_opt_set(queue_t *q, uint_t optset_context, int level, int name, + uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, + void *thisdg_attrs, cred_t *cr, mblk_t *mblk) +{ + icmp_t *icmp; + int err; + + icmp = Q_TO_ICMP(q); + + rw_enter(&icmp->icmp_rwlock, RW_WRITER); + err = icmp_opt_set_locked(q, optset_context, level, name, inlen, invalp, + outlenp, outvalp, thisdg_attrs, cr, mblk); + rw_exit(&icmp->icmp_rwlock); + return (err); +} /* * Update icmp_sticky_hdrs based on icmp_sticky_ipp, icmp_v6src, icmp_ttl, @@ -2832,7 +2986,7 @@ icmp_opt_set(queue_t *q, uint_t optset_context, int level, int name, * Returns failure if can't allocate memory. */ static int -icmp_build_hdrs(queue_t *q, icmp_t *icmp) +icmp_build_hdrs(icmp_t *icmp) { icmp_stack_t *is = icmp->icmp_is; uchar_t *hdrs; @@ -2841,6 +2995,7 @@ icmp_build_hdrs(queue_t *q, icmp_t *icmp) ip6i_t *ip6i; ip6_pkt_t *ipp = &icmp->icmp_sticky_ipp; + ASSERT(RW_WRITE_HELD(&icmp->icmp_rwlock)); hdrs_len = ip_total_hdrs_len_v6(ipp); ASSERT(hdrs_len != 0); if (hdrs_len != icmp->icmp_sticky_hdrs_len) { @@ -2884,8 +3039,10 @@ icmp_build_hdrs(queue_t *q, icmp_t *icmp) /* Try to get everything in a single mblk */ if (hdrs_len > icmp->icmp_max_hdr_len) { icmp->icmp_max_hdr_len = hdrs_len; - (void) mi_set_sth_wroff(RD(q), icmp->icmp_max_hdr_len + - is->is_wroff_extra); + rw_exit(&icmp->icmp_rwlock); + (void) mi_set_sth_wroff(icmp->icmp_connp->conn_rq, + icmp->icmp_max_hdr_len + is->is_wroff_extra); + rw_enter(&icmp->icmp_rwlock, RW_WRITER); } return (0); } @@ -2951,15 +3108,15 @@ icmp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr) icmppa->icmp_param_value = new_value; return (0); } - +/*ARGSUSED2*/ static void -icmp_rput(queue_t *q, mblk_t *mp) +icmp_input(void *arg1, mblk_t *mp, void *arg2) { + conn_t *connp = (conn_t *)arg1; struct T_unitdata_ind *tudi; uchar_t *rptr; - struct T_error_ack *tea; - icmp_t *icmp = (icmp_t *)q->q_ptr; - icmp_stack_t *is = icmp->icmp_is; + icmp_t *icmp; + icmp_stack_t *is; sin_t *sin; sin6_t *sin6; ip6_t *ip6h; @@ -2978,13 +3135,22 @@ icmp_rput(queue_t *q, mblk_t *mp) boolean_t icmp_ipv6_recvhoplimit = B_FALSE; uint_t hopstrip; - if (icmp->icmp_restricted) { - putnext(q, mp); - return; - } + ASSERT(connp->conn_flags & IPCL_RAWIPCONN); + + icmp = connp->conn_icmp; + is = icmp->icmp_is; + rptr = mp->b_rptr; + ASSERT(DB_TYPE(mp) == M_DATA || DB_TYPE(mp) == M_CTL); + ASSERT(OK_32PTR(rptr)); - if (mp->b_datap->db_type == M_CTL) { + /* + * IP should have prepended the options data in an M_CTL + * Check M_CTL "type" to make sure are not here bcos of + * a valid ICMP message + */ + if (DB_TYPE(mp) == M_CTL) { /* + * FIXME: does IP still do this? * IP sends up the IPSEC_IN message for handling IPSEC * policy at the TCP level. We don't need it here. */ @@ -2992,132 +3158,26 @@ icmp_rput(queue_t *q, mblk_t *mp) mp1 = mp->b_cont; freeb(mp); mp = mp1; - } else { + rptr = mp->b_rptr; + } else if (MBLKL(mp) == sizeof (ip_pktinfo_t) && + ((ip_pktinfo_t *)mp->b_rptr)->ip_pkt_ulp_type == + IN_PKTINFO) { + /* + * IP_RECVIF or IP_RECVSLLA or IPF_RECVADDR information + * has been prepended to the packet by IP. We need to + * extract the mblk and adjust the rptr + */ pinfo = (ip_pktinfo_t *)mp->b_rptr; - if ((icmp->icmp_recvif != 0 || - icmp->icmp_ip_recvpktinfo) && - (pinfo->ip_pkt_ulp_type == IN_PKTINFO)) { - /* - * IP has passed the options in mp and the - * actual data is in b_cont. - */ - recvif = B_TRUE; - /* - * We are here bcos IP_RECVIF is set so we need - * to extract the options mblk and adjust the - * rptr - */ - options_mp = mp; - mp = mp->b_cont; - } - } - } - - rptr = mp->b_rptr; - switch (mp->b_datap->db_type) { - case M_DATA: - /* - * M_DATA messages contain IP packets. They are handled - * following the switch. - */ - break; - case M_PROTO: - case M_PCPROTO: - /* M_PROTO messages contain some type of TPI message. */ - if ((mp->b_wptr - rptr) < sizeof (t_scalar_t)) { - freemsg(mp); - return; - } - tea = (struct T_error_ack *)rptr; - switch (tea->PRIM_type) { - case T_ERROR_ACK: - switch (tea->ERROR_prim) { - case O_T_BIND_REQ: - case T_BIND_REQ: - /* - * If our O_T_BIND_REQ/T_BIND_REQ fails, - * clear out the source address before - * passing the message upstream. - * If this was caused by a T_CONN_REQ - * revert back to bound state. - */ - if (icmp->icmp_state == TS_UNBND) { - /* - * TPI has not yet bound - bind sent by - * icmp_bind_proto. - */ - freemsg(mp); - return; - } - if (icmp->icmp_state == TS_DATA_XFER) { - /* Connect failed */ - tea->ERROR_prim = T_CONN_REQ; - icmp->icmp_v6src = - icmp->icmp_bound_v6src; - icmp->icmp_state = TS_IDLE; - if (icmp->icmp_family == AF_INET6) - (void) icmp_build_hdrs(q, icmp); - break; - } - - if (icmp->icmp_discon_pending) { - tea->ERROR_prim = T_DISCON_REQ; - icmp->icmp_discon_pending = 0; - } - V6_SET_ZERO(icmp->icmp_v6src); - V6_SET_ZERO(icmp->icmp_bound_v6src); - icmp->icmp_state = TS_UNBND; - if (icmp->icmp_family == AF_INET6) - (void) icmp_build_hdrs(q, icmp); - break; - default: - break; - } - break; - case T_BIND_ACK: - icmp_rput_bind_ack(q, mp); - return; - - case T_OPTMGMT_ACK: - case T_OK_ACK: - if (tea->PRIM_type == T_OK_ACK) { - struct T_ok_ack *toa; - toa = (struct T_ok_ack *)rptr; - if (toa->CORRECT_prim == T_UNBIND_REQ) { - /* - * If somebody sets IPSEC options, IP - * sends some IPSEC info which is used - * by the TCP for detached connections. - * We don't need it here. - */ - if ((mp1 = mp->b_cont) != NULL) { - freemsg(mp1); - mp->b_cont = NULL; - } - } - } - break; - default: - freemsg(mp); - return; - } - putnext(q, mp); - return; - case M_CTL: - if (recvif) { + options_mp = mp; + mp = mp->b_cont; + rptr = mp->b_rptr; + } else { /* - * IP has passed the options in mp and the actual data - * is in b_cont. Jump to normal data processing. + * ICMP messages. */ - break; + icmp_icmp_error(connp->conn_rq, mp); + return; } - - /* Contains ICMP packet from IP */ - icmp_icmp_error(q, mp); - return; - default: - putnext(q, mp); - return; } /* @@ -3127,7 +3187,7 @@ icmp_rput(queue_t *q, mblk_t *mp) freemsg(mp); if (options_mp != NULL) freeb(options_mp); - BUMP_MIB(&icmp->icmp_rawip_mib, rawipInErrors); + BUMP_MIB(&is->is_rawip_mib, rawipInErrors); return; } ipvers = IPH_HDR_VERSION((ipha_t *)rptr); @@ -3170,7 +3230,7 @@ icmp_rput(queue_t *q, mblk_t *mp) freemsg(mp); if (options_mp != NULL) freeb(options_mp); - BUMP_MIB(&icmp->icmp_rawip_mib, + BUMP_MIB(&is->is_rawip_mib, rawipInErrors); return; } @@ -3221,7 +3281,7 @@ icmp_rput(queue_t *q, mblk_t *mp) freemsg(mp); if (options_mp != NULL) freeb(options_mp); - BUMP_MIB(&icmp->icmp_rawip_mib, rawipInErrors); + BUMP_MIB(&is->is_rawip_mib, rawipInErrors); return; } mp1->b_cont = mp; @@ -3258,7 +3318,7 @@ icmp_rput(queue_t *q, mblk_t *mp) toh->level = IPPROTO_IP; toh->name = IP_RECVIF; toh->len = sizeof (struct T_opthdr) + - sizeof (uint_t); + sizeof (uint_t); toh->status = 0; dstopt += sizeof (struct T_opthdr); dstptr = (uint_t *)dstopt; @@ -3311,8 +3371,8 @@ icmp_rput(queue_t *q, mblk_t *mp) ASSERT(udi_size == 0); } - BUMP_MIB(&icmp->icmp_rawip_mib, rawipInDatagrams); - putnext(q, mp); + BUMP_MIB(&is->is_rawip_mib, rawipInDatagrams); + putnext(connp->conn_rq, mp); return; } @@ -3332,7 +3392,7 @@ icmp_rput(queue_t *q, mblk_t *mp) IPH_HDR_VERSION((ipha_t *)rptr) != IPV6_VERSION || icmp->icmp_family != AF_INET6) { freemsg(mp); - BUMP_MIB(&icmp->icmp_rawip_mib, rawipInErrors); + BUMP_MIB(&is->is_rawip_mib, rawipInErrors); return; } @@ -3471,7 +3531,7 @@ icmp_rput(queue_t *q, mblk_t *mp) ip0dbg(("icmp_rput: RAW checksum " "failed %x\n", sum)); freemsg(mp); - BUMP_MIB(&icmp->icmp_rawip_mib, + BUMP_MIB(&is->is_rawip_mib, rawipInCksumErrs); return; } @@ -3500,7 +3560,7 @@ icmp_rput(queue_t *q, mblk_t *mp) icmp_opt |= IPPF_HOPOPTS; } if ((icmp->icmp_ipv6_recvdstopts || - icmp->icmp_old_ipv6_recvdstopts) && + icmp->icmp_old_ipv6_recvdstopts) && (ipp.ipp_fields & IPPF_DSTOPTS)) { udi_size += sizeof (struct T_opthdr) + ipp.ipp_dstoptslen; @@ -3539,7 +3599,7 @@ icmp_rput(queue_t *q, mblk_t *mp) mp1 = allocb(udi_size, BPRI_MED); if (mp1 == NULL) { freemsg(mp); - BUMP_MIB(&icmp->icmp_rawip_mib, rawipInErrors); + BUMP_MIB(&is->is_rawip_mib, rawipInErrors); return; } mp1->b_cont = mp; @@ -3687,17 +3747,67 @@ icmp_rput(queue_t *q, mblk_t *mp) /* Consumed all of allocated space */ ASSERT(udi_size == 0); } - BUMP_MIB(&icmp->icmp_rawip_mib, rawipInDatagrams); - putnext(q, mp); + BUMP_MIB(&is->is_rawip_mib, rawipInDatagrams); + putnext(connp->conn_rq, mp); +} + +/* + * Handle the results of a T_BIND_REQ whether deferred by IP or handled + * immediately. + */ +static void +icmp_bind_result(conn_t *connp, mblk_t *mp) +{ + struct T_error_ack *tea; + + switch (mp->b_datap->db_type) { + case M_PROTO: + case M_PCPROTO: + /* M_PROTO messages contain some type of TPI message. */ + if ((mp->b_wptr - mp->b_rptr) < sizeof (t_scalar_t)) { + freemsg(mp); + return; + } + tea = (struct T_error_ack *)mp->b_rptr; + + switch (tea->PRIM_type) { + case T_ERROR_ACK: + switch (tea->ERROR_prim) { + case O_T_BIND_REQ: + case T_BIND_REQ: + icmp_bind_error(connp, mp); + return; + default: + break; + } + ASSERT(0); + freemsg(mp); + return; + + case T_BIND_ACK: + icmp_bind_ack(connp, mp); + return; + + default: + break; + } + freemsg(mp); + return; + default: + /* FIXME: other cases? */ + ASSERT(0); + freemsg(mp); + return; + } } /* * Process a T_BIND_ACK */ static void -icmp_rput_bind_ack(queue_t *q, mblk_t *mp) +icmp_bind_ack(conn_t *connp, mblk_t *mp) { - icmp_t *icmp = (icmp_t *)q->q_ptr; + icmp_t *icmp = connp->conn_icmp; mblk_t *mp1; ire_t *ire; struct T_bind_ack *tba; @@ -3705,6 +3815,7 @@ icmp_rput_bind_ack(queue_t *q, mblk_t *mp) ipa_conn_t *ac; ipa6_conn_t *ac6; + rw_enter(&icmp->icmp_rwlock, RW_WRITER); /* * We know if headers are included or not so we can * safely do this. @@ -3715,10 +3826,10 @@ icmp_rput_bind_ack(queue_t *q, mblk_t *mp) * icmp_bind_proto. */ freemsg(mp); + rw_exit(&icmp->icmp_rwlock); return; } - if (icmp->icmp_discon_pending) - icmp->icmp_discon_pending = 0; + ASSERT(icmp->icmp_pending_op != -1); /* * If a broadcast/multicast address was bound set @@ -3742,10 +3853,12 @@ icmp_rput_bind_ack(queue_t *q, mblk_t *mp) */ if (ire->ire_type == IRE_BROADCAST && icmp->icmp_state != TS_DATA_XFER) { + ASSERT(icmp->icmp_pending_op == T_BIND_REQ || + icmp->icmp_pending_op == O_T_BIND_REQ); /* This was just a local bind to a MC/broadcast addr */ V6_SET_ZERO(icmp->icmp_v6src); if (icmp->icmp_family == AF_INET6) - (void) icmp_build_hdrs(q, icmp); + (void) icmp_build_hdrs(icmp); } else if (V6_OR_V4_INADDR_ANY(icmp->icmp_v6src)) { /* * Local address not yet set - pick it from the @@ -3775,11 +3888,13 @@ icmp_rput_bind_ack(queue_t *q, mblk_t *mp) addrp)->ac6x_conn; } icmp->icmp_v6src = ac6->ac6_laddr; - (void) icmp_build_hdrs(q, icmp); + (void) icmp_build_hdrs(icmp); } } mp1 = mp1->b_cont; } + icmp->icmp_pending_op = -1; + rw_exit(&icmp->icmp_rwlock); /* * Look for one or more appended ACK message added by * icmp_connect or icmp_disconnect. @@ -3800,28 +3915,91 @@ icmp_rput_bind_ack(queue_t *q, mblk_t *mp) while (mp != NULL) { mp1 = mp->b_cont; mp->b_cont = NULL; - putnext(q, mp); + putnext(connp->conn_rq, mp); mp = mp1; } return; } freemsg(mp->b_cont); mp->b_cont = NULL; - putnext(q, mp); + putnext(connp->conn_rq, mp); +} + +static void +icmp_bind_error(conn_t *connp, mblk_t *mp) +{ + icmp_t *icmp = connp->conn_icmp; + struct T_error_ack *tea; + + tea = (struct T_error_ack *)mp->b_rptr; + /* + * If our O_T_BIND_REQ/T_BIND_REQ fails, + * clear out the source address before + * passing the message upstream. + * If this was caused by a T_CONN_REQ + * revert back to bound state. + */ + rw_enter(&icmp->icmp_rwlock, RW_WRITER); + if (icmp->icmp_state == TS_UNBND) { + /* + * TPI has not yet bound - bind sent by icmp_bind_proto. + */ + freemsg(mp); + rw_exit(&icmp->icmp_rwlock); + return; + } + ASSERT(icmp->icmp_pending_op != -1); + tea->ERROR_prim = icmp->icmp_pending_op; + icmp->icmp_pending_op = -1; + + switch (tea->ERROR_prim) { + case T_CONN_REQ: + ASSERT(icmp->icmp_state == TS_DATA_XFER); + /* Connect failed */ + /* Revert back to the bound source */ + icmp->icmp_v6src = icmp->icmp_bound_v6src; + icmp->icmp_state = TS_IDLE; + if (icmp->icmp_family == AF_INET6) + (void) icmp_build_hdrs(icmp); + break; + + case T_DISCON_REQ: + case T_BIND_REQ: + case O_T_BIND_REQ: + V6_SET_ZERO(icmp->icmp_v6src); + V6_SET_ZERO(icmp->icmp_bound_v6src); + icmp->icmp_state = TS_UNBND; + if (icmp->icmp_family == AF_INET6) + (void) icmp_build_hdrs(icmp); + break; + default: + break; + } + rw_exit(&icmp->icmp_rwlock); + putnext(connp->conn_rq, mp); } /* * return SNMP stuff in buffer in mpdata */ -static int +mblk_t * icmp_snmp_get(queue_t *q, mblk_t *mpctl) { mblk_t *mpdata; struct opthdr *optp; - icmp_t *icmp = (icmp_t *)q->q_ptr; + conn_t *connp = Q_TO_CONN(q); + icmp_stack_t *is = connp->conn_netstack->netstack_icmp; + mblk_t *mp2ctl; + + /* + * make a copy of the original message + */ + mp2ctl = copymsg(mpctl); if (mpctl == NULL || (mpdata = mpctl->b_cont) == NULL) { + freemsg(mpctl); + freemsg(mp2ctl); return (0); } @@ -3829,12 +4007,12 @@ icmp_snmp_get(queue_t *q, mblk_t *mpctl) optp = (struct opthdr *)&mpctl->b_rptr[sizeof (struct T_optmgmt_ack)]; optp->level = EXPER_RAWIP; optp->name = 0; - (void) snmp_append_data(mpdata, (char *)&icmp->icmp_rawip_mib, - sizeof (icmp->icmp_rawip_mib)); + (void) snmp_append_data(mpdata, (char *)&is->is_rawip_mib, + sizeof (is->is_rawip_mib)); optp->len = msgdsize(mpdata); qreply(q, mpctl); - return (1); + return (mp2ctl); } /* @@ -3843,7 +4021,7 @@ icmp_snmp_get(queue_t *q, mblk_t *mpctl) * to do the appropriate locking. */ /* ARGSUSED */ -static int +int icmp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr, int len) { @@ -3860,15 +4038,11 @@ icmp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name, static int icmp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) { - IDP idp; - icmp_t *icmp; - char *state; + conn_t *connp; + ip_stack_t *ipst; char laddrbuf[INET6_ADDRSTRLEN]; char faddrbuf[INET6_ADDRSTRLEN]; - icmp_stack_t *is; - - icmp = (icmp_t *)q->q_ptr; - is = icmp->icmp_is; + int i; (void) mi_mpprintf(mp, "RAWIP " MI_COL_HDRPAD_STR @@ -3876,27 +4050,40 @@ icmp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) " src addr dest addr state"); /* xxx.xxx.xxx.xxx xxx.xxx.xxx.xxx UNBOUND */ + connp = Q_TO_CONN(q); + ipst = connp->conn_netstack->netstack_ip; + for (i = 0; i < CONN_G_HASH_SIZE; i++) { + connf_t *connfp; + char *state; + + connfp = &ipst->ips_ipcl_globalhash_fanout[i]; + connp = NULL; + + while ((connp = ipcl_get_next_conn(connfp, connp, + IPCL_RAWIPCONN)) != NULL) { + icmp_t *icmp; + + mutex_enter(&(connp)->conn_lock); + icmp = connp->conn_icmp; - for (idp = mi_first_ptr(&is->is_head); - (icmp = (icmp_t *)idp) != NULL; - idp = mi_next_ptr(&is->is_head, idp)) { - if (icmp->icmp_state == TS_UNBND) - state = "UNBOUND"; - else if (icmp->icmp_state == TS_IDLE) - state = "IDLE"; - else if (icmp->icmp_state == TS_DATA_XFER) - state = "CONNECTED"; - else - state = "UnkState"; - - (void) mi_mpprintf(mp, - MI_COL_PTRFMT_STR "%s %s %s", - (void *)icmp, - inet_ntop(AF_INET6, &icmp->icmp_v6dst, faddrbuf, - sizeof (faddrbuf)), - inet_ntop(AF_INET6, &icmp->icmp_v6src, laddrbuf, - sizeof (laddrbuf)), - state); + if (icmp->icmp_state == TS_UNBND) + state = "UNBOUND"; + else if (icmp->icmp_state == TS_IDLE) + state = "IDLE"; + else if (icmp->icmp_state == TS_DATA_XFER) + state = "CONNECTED"; + else + state = "UnkState"; + + (void) mi_mpprintf(mp, MI_COL_PTRFMT_STR "%s %s %s", + (void *)icmp, + inet_ntop(AF_INET6, &icmp->icmp_v6dst, faddrbuf, + sizeof (faddrbuf)), + inet_ntop(AF_INET6, &icmp->icmp_v6src, laddrbuf, + sizeof (laddrbuf)), + state); + mutex_exit(&(connp)->conn_lock); + } } return (0); } @@ -3928,29 +4115,40 @@ icmp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err) static void icmp_unbind(queue_t *q, mblk_t *mp) { - icmp_t *icmp = (icmp_t *)q->q_ptr; + icmp_t *icmp = Q_TO_ICMP(q); + rw_enter(&icmp->icmp_rwlock, RW_WRITER); /* If a bind has not been done, we can't unbind. */ - if (icmp->icmp_state == TS_UNBND) { + if (icmp->icmp_state == TS_UNBND || icmp->icmp_pending_op != -1) { + rw_exit(&icmp->icmp_rwlock); icmp_err_ack(q, mp, TOUTSTATE, 0); return; } + icmp->icmp_pending_op = T_UNBIND_REQ; + rw_exit(&icmp->icmp_rwlock); + + /* + * Pass the unbind to IP; T_UNBIND_REQ is larger than T_OK_ACK + * and therefore ip_unbind must never return NULL. + */ + mp = ip_unbind(q, mp); + ASSERT(mp != NULL); + ASSERT(((struct T_ok_ack *)mp->b_rptr)->PRIM_type == T_OK_ACK); + + /* + * Once we're unbound from IP, the pending operation may be cleared + * here. + */ + rw_enter(&icmp->icmp_rwlock, RW_WRITER); V6_SET_ZERO(icmp->icmp_v6src); V6_SET_ZERO(icmp->icmp_bound_v6src); + icmp->icmp_pending_op = -1; icmp->icmp_state = TS_UNBND; + if (icmp->icmp_family == AF_INET6) + (void) icmp_build_hdrs(icmp); + rw_exit(&icmp->icmp_rwlock); - if (icmp->icmp_family == AF_INET6) { - int error; - - /* Rebuild the header template */ - error = icmp_build_hdrs(q, icmp); - if (error != 0) { - icmp_err_ack(q, mp, TSYSERR, error); - return; - } - } - /* Pass the unbind to IP. */ - putnext(q, mp); + qreply(q, mp); } /* @@ -3959,8 +4157,7 @@ icmp_unbind(queue_t *q, mblk_t *mp) * IPPROTO_IGMP). */ static void -icmp_wput_hdrincl(queue_t *q, mblk_t *mp, icmp_t *icmp, ip4_pkt_t *pktinfop, -boolean_t use_putnext) +icmp_wput_hdrincl(queue_t *q, mblk_t *mp, icmp_t *icmp, ip4_pkt_t *pktinfop) { icmp_stack_t *is = icmp->icmp_is; ipha_t *ipha; @@ -3969,6 +4166,7 @@ boolean_t use_putnext) mblk_t *mp1; uint_t pkt_len; ip_opt_info_t optinfo; + conn_t *connp = icmp->icmp_connp; optinfo.ip_opt_flags = 0; optinfo.ip_opt_ill_index = 0; @@ -3977,7 +4175,7 @@ boolean_t use_putnext) if ((mp->b_wptr - mp->b_rptr) < IP_SIMPLE_HDR_LENGTH) { if (!pullupmsg(mp, IP_SIMPLE_HDR_LENGTH)) { ASSERT(icmp != NULL); - BUMP_MIB(&icmp->icmp_rawip_mib, rawipOutErrors); + BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); freemsg(mp); return; } @@ -4021,7 +4219,7 @@ boolean_t use_putnext) tp_hdr_len)) { if (!pullupmsg(mp, IP_SIMPLE_HDR_LENGTH + tp_hdr_len)) { - BUMP_MIB(&icmp->icmp_rawip_mib, + BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); freemsg(mp); return; @@ -4068,7 +4266,7 @@ boolean_t use_putnext) * Massage source route putting first source * route in ipha_dst. */ - (void) ip_massage_options(ipha, icmp->icmp_is->is_netstack); + (void) ip_massage_options(ipha, is->is_netstack); } if (pktinfop != NULL) { @@ -4078,22 +4276,16 @@ boolean_t use_putnext) if (pktinfop->ip4_addr != INADDR_ANY) { ipha->ipha_src = pktinfop->ip4_addr; optinfo.ip_opt_flags = IP_VERIFY_SRC; - ASSERT(use_putnext == B_FALSE); } if (pktinfop->ip4_ill_index != 0) { optinfo.ip_opt_ill_index = pktinfop->ip4_ill_index; - ASSERT(use_putnext == B_FALSE); } } - mblk_setcred(mp, icmp->icmp_credp); - if (use_putnext) { - putnext(q, mp); - } else { - ip_output_options(Q_TO_CONN(q->q_next), mp, q->q_next, IP_WPUT, - &optinfo); - } + mblk_setcred(mp, connp->conn_cred); + ip_output_options(connp, mp, q, IP_WPUT, + &optinfo); } static boolean_t @@ -4101,17 +4293,19 @@ icmp_update_label(queue_t *q, icmp_t *icmp, mblk_t *mp, ipaddr_t dst) { int err; uchar_t opt_storage[IP_MAX_OPT_LENGTH]; + icmp_stack_t *is = icmp->icmp_is; + conn_t *connp = icmp->icmp_connp; - err = tsol_compute_label(DB_CREDDEF(mp, icmp->icmp_credp), dst, + err = tsol_compute_label(DB_CREDDEF(mp, connp->conn_cred), dst, opt_storage, icmp->icmp_mac_exempt, - icmp->icmp_is->is_netstack->netstack_ip); + is->is_netstack->netstack_ip); if (err == 0) { err = tsol_update_options(&icmp->icmp_ip_snd_options, &icmp->icmp_ip_snd_options_len, &icmp->icmp_label_len, opt_storage); } if (err != 0) { - BUMP_MIB(&icmp->icmp_rawip_mib, rawipOutErrors); + BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); DTRACE_PROBE4( tx__ip__log__drop__updatelabel__icmp, char *, "queue(1) failed to update options(2) on mp(3)", @@ -4137,7 +4331,8 @@ icmp_wput(queue_t *q, mblk_t *mp) int ip_hdr_length; #define tudr ((struct T_unitdata_req *)rptr) size_t ip_len; - icmp_t *icmp = (icmp_t *)q->q_ptr; + conn_t *connp = Q_TO_CONN(q); + icmp_t *icmp = connp->conn_icmp; icmp_stack_t *is = icmp->icmp_is; sin6_t *sin6; sin_t *sin; @@ -4145,13 +4340,6 @@ icmp_wput(queue_t *q, mblk_t *mp) ip4_pkt_t pktinfo; ip4_pkt_t *pktinfop = &pktinfo; ip_opt_info_t optinfo; - queue_t *ip_wq; - boolean_t use_putnext = B_TRUE; - - if (icmp->icmp_restricted) { - icmp_wput_restricted(q, mp); - return; - } switch (mp->b_datap->db_type) { case M_DATA: @@ -4160,7 +4348,7 @@ icmp_wput(queue_t *q, mblk_t *mp) ipha = (ipha_t *)mp->b_rptr; if (mp->b_wptr - mp->b_rptr < IP_SIMPLE_HDR_LENGTH) { if (!pullupmsg(mp, IP_SIMPLE_HDR_LENGTH)) { - BUMP_MIB(&icmp->icmp_rawip_mib, + BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); freemsg(mp); return; @@ -4179,7 +4367,7 @@ icmp_wput(queue_t *q, mblk_t *mp) !icmp_update_label(q, icmp, mp, ipha->ipha_dst)) { return; } - icmp_wput_hdrincl(q, mp, icmp, NULL, use_putnext); + icmp_wput_hdrincl(q, mp, icmp, NULL); return; } freemsg(mp); @@ -4205,19 +4393,19 @@ icmp_wput(queue_t *q, mblk_t *mp) if (icmp->icmp_state == TS_UNBND) { /* If a port has not been bound to the stream, fail. */ - BUMP_MIB(&icmp->icmp_rawip_mib, rawipOutErrors); + BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); icmp_ud_err(q, mp, EPROTO); return; } mp1 = mp->b_cont; if (mp1 == NULL) { - BUMP_MIB(&icmp->icmp_rawip_mib, rawipOutErrors); + BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); icmp_ud_err(q, mp, EPROTO); return; } if ((rptr + tudr->DEST_offset + tudr->DEST_length) > mp->b_wptr) { - BUMP_MIB(&icmp->icmp_rawip_mib, rawipOutErrors); + BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); icmp_ud_err(q, mp, EADDRNOTAVAIL); return; } @@ -4228,14 +4416,14 @@ icmp_wput(queue_t *q, mblk_t *mp) if (!OK_32PTR((char *)sin6) || tudr->DEST_length != sizeof (sin6_t) || sin6->sin6_family != AF_INET6) { - BUMP_MIB(&icmp->icmp_rawip_mib, rawipOutErrors); + BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); icmp_ud_err(q, mp, EADDRNOTAVAIL); return; } /* No support for mapped addresses on raw sockets */ if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { - BUMP_MIB(&icmp->icmp_rawip_mib, rawipOutErrors); + BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); icmp_ud_err(q, mp, EADDRNOTAVAIL); return; } @@ -4252,7 +4440,7 @@ icmp_wput(queue_t *q, mblk_t *mp) if (!OK_32PTR((char *)sin) || tudr->DEST_length != sizeof (sin_t) || sin->sin_family != AF_INET) { - BUMP_MIB(&icmp->icmp_rawip_mib, rawipOutErrors); + BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); icmp_ud_err(q, mp, EADDRNOTAVAIL); return; } @@ -4280,7 +4468,7 @@ icmp_wput(queue_t *q, mblk_t *mp) if (icmp_unitdata_opt_process(q, mp, &error, (void *)pktinfop) < 0) { /* failure */ - BUMP_MIB(&icmp->icmp_rawip_mib, rawipOutErrors); + BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); icmp_ud_err(q, mp, error); return; } @@ -4292,19 +4480,6 @@ icmp_wput(queue_t *q, mblk_t *mp) * and contain option setting results */ - if (pktinfop->ip4_ill_index != 0 || - pktinfop->ip4_addr != INADDR_ANY) { - /* - * PKTINFO option is supported only when ICMP is - * over IP. - */ - ip_wq = WR(q)->q_next; - if (NOT_OVER_IP(ip_wq)) { - icmp_ud_err(q, mp, EINVAL); - return; - } - use_putnext = B_FALSE; - } } if (v4dst == INADDR_ANY) @@ -4321,7 +4496,7 @@ icmp_wput(queue_t *q, mblk_t *mp) /* Protocol 255 contains full IP headers */ if (icmp->icmp_hdrincl) { freeb(mp); - icmp_wput_hdrincl(q, mp1, icmp, pktinfop, use_putnext); + icmp_wput_hdrincl(q, mp1, icmp, pktinfop); return; } @@ -4334,7 +4509,7 @@ icmp_wput(queue_t *q, mblk_t *mp) !OK_32PTR(ipha)) { if (!(mp1 = allocb(ip_hdr_length + is->is_wroff_extra, BPRI_LO))) { - BUMP_MIB(&icmp->icmp_rawip_mib, rawipOutErrors); + BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); icmp_ud_err(q, mp, ENOMEM); return; } @@ -4347,19 +4522,18 @@ icmp_wput(queue_t *q, mblk_t *mp) /* Set version, header length, and tos */ *(uint16_t *)&ipha->ipha_version_and_hdr_length = ((((IP_VERSION << 4) | (ip_hdr_length>>2)) << 8) | - icmp->icmp_type_of_service); + icmp->icmp_type_of_service); /* Set ttl and protocol */ *(uint16_t *)&ipha->ipha_ttl = (icmp->icmp_ttl << 8) | icmp->icmp_proto; #else /* Set version, header length, and tos */ *(uint16_t *)&ipha->ipha_version_and_hdr_length = ((icmp->icmp_type_of_service << 8) | - ((IP_VERSION << 4) | (ip_hdr_length>>2))); + ((IP_VERSION << 4) | (ip_hdr_length>>2))); /* Set ttl and protocol */ *(uint16_t *)&ipha->ipha_ttl = (icmp->icmp_proto << 8) | icmp->icmp_ttl; #endif if (pktinfop->ip4_addr != INADDR_ANY) { - ASSERT(use_putnext == B_FALSE); ipha->ipha_src = pktinfop->ip4_addr; optinfo.ip_opt_flags = IP_VERIFY_SRC; } else { @@ -4375,7 +4549,6 @@ icmp_wput(queue_t *q, mblk_t *mp) if (pktinfop->ip4_ill_index != 0) { optinfo.ip_opt_ill_index = pktinfop->ip4_ill_index; - ASSERT(use_putnext == B_FALSE); } @@ -4401,7 +4574,7 @@ icmp_wput(queue_t *q, mblk_t *mp) * as this can cause problems in layers below. */ if (ip_len > IP_MAXPACKET) { - BUMP_MIB(&icmp->icmp_rawip_mib, rawipOutErrors); + BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); icmp_ud_err(q, mp, EMSGSIZE); return; } @@ -4426,18 +4599,13 @@ icmp_wput(queue_t *q, mblk_t *mp) * Massage source route putting first source route in ipha_dst. * Ignore the destination in the T_unitdata_req. */ - (void) ip_massage_options(ipha, icmp->icmp_is->is_netstack); + (void) ip_massage_options(ipha, is->is_netstack); } freeb(mp); - BUMP_MIB(&icmp->icmp_rawip_mib, rawipOutDatagrams); - mblk_setcred(mp1, icmp->icmp_credp); - if (use_putnext) { - putnext(q, mp1); - } else { - ip_output_options(Q_TO_CONN(q->q_next), mp1, q->q_next, IP_WPUT, - &optinfo); - } + BUMP_MIB(&is->is_rawip_mib, rawipOutDatagrams); + mblk_setcred(mp1, connp->conn_cred); + ip_output_options(Q_TO_CONN(q), mp1, q, IP_WPUT, &optinfo); #undef ipha #undef tudr } @@ -4447,16 +4615,18 @@ icmp_update_label_v6(queue_t *wq, icmp_t *icmp, mblk_t *mp, in6_addr_t *dst) { int err; uchar_t opt_storage[TSOL_MAX_IPV6_OPTION]; + icmp_stack_t *is = icmp->icmp_is; + conn_t *connp = icmp->icmp_connp; - err = tsol_compute_label_v6(DB_CREDDEF(mp, icmp->icmp_credp), dst, + err = tsol_compute_label_v6(DB_CREDDEF(mp, connp->conn_cred), dst, opt_storage, icmp->icmp_mac_exempt, - icmp->icmp_is->is_netstack->netstack_ip); + is->is_netstack->netstack_ip); if (err == 0) { err = tsol_update_sticky(&icmp->icmp_sticky_ipp, &icmp->icmp_label_len_v6, opt_storage); } if (err != 0) { - BUMP_MIB(&icmp->icmp_rawip_mib, rawipOutErrors); + BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); DTRACE_PROBE4( tx__ip__log__drop__updatelabel__icmp6, char *, "queue(1) failed to update options(2) on mp(3)", @@ -4482,7 +4652,7 @@ icmp_wput_ipv6(queue_t *q, mblk_t *mp, sin6_t *sin6, t_scalar_t tudr_optlen) mblk_t *mp1; int ip_hdr_len = IPV6_HDR_LEN; size_t ip_len; - icmp_t *icmp = (icmp_t *)q->q_ptr; + icmp_t *icmp = Q_TO_ICMP(q); icmp_stack_t *is = icmp->icmp_is; ip6_pkt_t ipp_s; /* For ancillary data options */ ip6_pkt_t *ipp = &ipp_s; @@ -4502,7 +4672,7 @@ icmp_wput_ipv6(queue_t *q, mblk_t *mp, sin6_t *sin6, t_scalar_t tudr_optlen) * since it is bound to a mapped address. */ if (IN6_IS_ADDR_V4MAPPED(&icmp->icmp_v6src)) { - BUMP_MIB(&icmp->icmp_rawip_mib, rawipOutErrors); + BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); icmp_ud_err(q, mp, EADDRNOTAVAIL); return; } @@ -4519,7 +4689,7 @@ icmp_wput_ipv6(queue_t *q, mblk_t *mp, sin6_t *sin6, t_scalar_t tudr_optlen) if (icmp_unitdata_opt_process(q, mp, &error, (void *)ipp) < 0) { /* failure */ - BUMP_MIB(&icmp->icmp_rawip_mib, rawipOutErrors); + BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); icmp_ud_err(q, mp, error); return; } @@ -4733,7 +4903,7 @@ no_options: } mp1 = allocb(ip_hdr_len + is->is_wroff_extra, BPRI_LO); if (!mp1) { - BUMP_MIB(&icmp->icmp_rawip_mib, rawipOutErrors); + BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); icmp_ud_err(q, mp, ENOMEM); return; } @@ -4911,8 +5081,8 @@ no_options: ip6h->ip6_dst = ip6_dst; ip6h->ip6_vcf = - (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) | - (sin6->sin6_flowinfo & ~IPV6_VERS_AND_FLOW_MASK); + (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) | + (sin6->sin6_flowinfo & ~IPV6_VERS_AND_FLOW_MASK); if (option_exists & IPPF_TCLASS) { tipp = ANCIL_OR_STICKY_PTR(IPPF_TCLASS); @@ -4935,7 +5105,7 @@ no_options: * Notify the application as well. */ icmp_ud_err(q, mp, EPROTO); - BUMP_MIB(&icmp->icmp_rawip_mib, + BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); return; } @@ -4945,7 +5115,7 @@ no_options: */ if (rth->ip6r_len & 0x1) { icmp_ud_err(q, mp, EPROTO); - BUMP_MIB(&icmp->icmp_rawip_mib, + BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); return; } @@ -4956,7 +5126,7 @@ no_options: * the destination (in the last routing hdr entry). */ csum = ip_massage_options_v6(ip6h, rth, - icmp->icmp_is->is_netstack); + is->is_netstack); /* * Verify that the first hop isn't a mapped address. * Routers along the path need to do this verification @@ -4964,7 +5134,7 @@ no_options: */ if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) { icmp_ud_err(q, mp, EADDRNOTAVAIL); - BUMP_MIB(&icmp->icmp_rawip_mib, + BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); return; } @@ -4982,7 +5152,7 @@ no_options: * as this can cause problems in layers below. */ if (ip_len > IP_MAXPACKET) { - BUMP_MIB(&icmp->icmp_rawip_mib, rawipOutErrors); + BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); icmp_ud_err(q, mp, EMSGSIZE); return; } @@ -5005,7 +5175,7 @@ no_options: cksum_off = ip_hdr_len + icmp->icmp_checksum_off; if (cksum_off + sizeof (uint16_t) > mp1->b_wptr - mp1->b_rptr) { if (!pullupmsg(mp1, cksum_off + sizeof (uint16_t))) { - BUMP_MIB(&icmp->icmp_rawip_mib, + BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); freemsg(mp); return; @@ -5034,9 +5204,8 @@ no_options: freeb(mp); /* We're done. Pass the packet to IP */ - BUMP_MIB(&icmp->icmp_rawip_mib, rawipOutDatagrams); - mblk_setcred(mp1, icmp->icmp_credp); - putnext(q, mp1); + BUMP_MIB(&is->is_rawip_mib, rawipOutDatagrams); + ip_output_v6(icmp->icmp_connp, mp1, q, IP_WPUT); } static void @@ -5045,12 +5214,12 @@ icmp_wput_other(queue_t *q, mblk_t *mp) uchar_t *rptr = mp->b_rptr; struct iocblk *iocp; #define tudr ((struct T_unitdata_req *)rptr) - icmp_t *icmp; + conn_t *connp = Q_TO_CONN(q); + icmp_t *icmp = connp->conn_icmp; + icmp_stack_t *is = icmp->icmp_is; cred_t *cr; - icmp = (icmp_t *)q->q_ptr; - - cr = DB_CREDDEF(mp, icmp->icmp_credp); + cr = DB_CREDDEF(mp, connp->conn_cred); switch (mp->b_datap->db_type) { case M_PROTO: @@ -5069,7 +5238,7 @@ icmp_wput_other(queue_t *q, mblk_t *mp) return; case O_T_BIND_REQ: case T_BIND_REQ: - qwriter(q, mp, icmp_bind, PERIM_OUTER); + icmp_bind(q, mp); return; case T_CONN_REQ: icmp_connect(q, mp); @@ -5093,16 +5262,17 @@ icmp_wput_other(queue_t *q, mblk_t *mp) return; case T_SVR4_OPTMGMT_REQ: - if (!snmpcom_req(q, mp, icmp_snmp_set, icmp_snmp_get, - cr)) + if (!snmpcom_req(q, mp, icmp_snmp_set, ip_snmp_get, + cr)) { /* Only IP can return anything meaningful */ (void) svr4_optcom_req(q, mp, cr, - &icmp_opt_obj); + &icmp_opt_obj, B_TRUE); + } return; case T_OPTMGMT_REQ: /* Only IP can return anything meaningful */ - (void) tpi_optcom_req(q, mp, cr, &icmp_opt_obj); + (void) tpi_optcom_req(q, mp, cr, &icmp_opt_obj, B_TRUE); return; case T_DISCON_REQ: @@ -5137,7 +5307,7 @@ icmp_wput_other(queue_t *q, mblk_t *mp) * don't know the peer's name. */ iocp->ioc_error = ENOTCONN; - err_ret:; + err_ret:; iocp->ioc_count = 0; mp->b_datap->db_type = M_IOCACK; qreply(q, mp); @@ -5157,7 +5327,7 @@ icmp_wput_other(queue_t *q, mblk_t *mp) case ND_SET: /* nd_getset performs the necessary error checking */ case ND_GET: - if (nd_getset(q, icmp->icmp_is->is_nd, mp)) { + if (nd_getset(q, is->is_nd, mp)) { qreply(q, mp); return; } @@ -5172,7 +5342,7 @@ icmp_wput_other(queue_t *q, mblk_t *mp) default: break; } - putnext(q, mp); + ip_wput(q, mp); } /* @@ -5196,7 +5366,8 @@ icmp_wput_iocdata(queue_t *q, mblk_t *mp) case TI_GETPEERNAME: break; default: - putnext(q, mp); + icmp = Q_TO_ICMP(q); + ip_output(icmp->icmp_connp, mp, q, IP_WPUT); return; } switch (mi_copy_state(q, mp, &mp1)) { @@ -5234,7 +5405,7 @@ icmp_wput_iocdata(queue_t *q, mblk_t *mp) */ STRUCT_SET_HANDLE(sb, ((struct iocblk *)mp->b_rptr)->ioc_flag, (void *)mp1->b_rptr); - icmp = (icmp_t *)q->q_ptr; + icmp = Q_TO_ICMP(q); if (icmp->icmp_family == AF_INET) addrlen = sizeof (sin_t); else @@ -5321,68 +5492,19 @@ icmp_wput_iocdata(queue_t *q, mblk_t *mp) mi_copyout(q, mp); } -/* - * Only allow MIB requests and M_FLUSHes to pass. - * All other messages are nacked or dropped. - */ -static void -icmp_wput_restricted(queue_t *q, mblk_t *mp) -{ - cred_t *cr; - icmp_t *icmp; - - switch (DB_TYPE(mp)) { - case M_PROTO: - case M_PCPROTO: - if (MBLKL(mp) < sizeof (t_scalar_t)) { - freemsg(mp); - return; - } - icmp = (icmp_t *)q->q_ptr; - cr = DB_CREDDEF(mp, icmp->icmp_credp); - - switch (((union T_primitives *)mp->b_rptr)->type) { - case T_SVR4_OPTMGMT_REQ: - if (!snmpcom_req(q, mp, - icmp_snmp_set, icmp_snmp_get, cr)) - (void) svr4_optcom_req(q, mp, cr, - &icmp_opt_obj); - return; - case T_OPTMGMT_REQ: - (void) tpi_optcom_req(q, mp, cr, &icmp_opt_obj); - return; - default: - icmp_err_ack(q, mp, TSYSERR, ENOTSUP); - return; - } - /* NOTREACHED */ - case M_IOCTL: - miocnak(q, mp, 0, ENOTSUP); - break; - case M_FLUSH: - putnext(q, mp); - break; - default: - freemsg(mp); - break; - } -} - static int icmp_unitdata_opt_process(queue_t *q, mblk_t *mp, int *errorp, void *thisdg_attrs) { - icmp_t *icmp; + conn_t *connp = Q_TO_CONN(q); struct T_unitdata_req *udreqp; int is_absreq_failure; cred_t *cr; - icmp = (icmp_t *)q->q_ptr; - udreqp = (struct T_unitdata_req *)mp->b_rptr; *errorp = 0; - cr = DB_CREDDEF(mp, icmp->icmp_credp); + cr = DB_CREDDEF(mp, connp->conn_cred); *errorp = tpi_optcom_buf(q, mp, &udreqp->OPT_length, udreqp->OPT_offset, cr, &icmp_opt_obj, @@ -5402,10 +5524,9 @@ icmp_unitdata_opt_process(queue_t *q, mblk_t *mp, int *errorp, void icmp_ddi_init(void) { - ICMP6_MAJ = ddi_name_to_major(ICMP6); icmp_max_optsize = optcom_max_optsize(icmp_opt_obj.odb_opt_des_arr, - icmp_opt_obj.odb_opt_arr_cnt); + icmp_opt_obj.odb_opt_arr_cnt); /* * We want to be informed each time a stack is created or diff --git a/usr/src/uts/common/inet/ip/icmp6ddi.c b/usr/src/uts/common/inet/ip/icmp6ddi.c index e378b2af93..20bfc7e69a 100644 --- a/usr/src/uts/common/inet/ip/icmp6ddi.c +++ b/usr/src/uts/common/inet/ip/icmp6ddi.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 1992-2002 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -33,21 +32,19 @@ #include <inet/ip.h> #define INET_NAME "icmp6" -#define INET_STRTAB icmpinfo +#define INET_DEVSTRTAB icmpinfov6 #define INET_DEVDESC "ICMP6 STREAMS driver %I%" -#define INET_DEVMINOR IPV6_MINOR -#define INET_DEVMTFLAGS IP_DEVMTFLAGS /* since we're really ip */ +#define INET_DEVMINOR 0 +#define INET_DEVMTFLAGS D_MP #include "../inetddi.c" int _init(void) { - INET_BECOME_IP(); - /* - * device initialization takes place in icmpddi.c:_init() - * (i.e. it must be called first.) + * device initialization happens when the actual code containing + * module (/kernel/drv/ip) is loaded, and driven from ip_ddi_init() */ return (mod_install(&modlinkage)); } diff --git a/usr/src/uts/common/inet/ip/icmpddi.c b/usr/src/uts/common/inet/ip/icmpddi.c index 2f86b0f28e..9a4e7621f6 100644 --- a/usr/src/uts/common/inet/ip/icmpddi.c +++ b/usr/src/uts/common/inet/ip/icmpddi.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* Copyright (c) 1990 Mentat Inc. */ @@ -34,49 +33,30 @@ #include <inet/ip.h> #define INET_NAME "icmp" -#define INET_MODDESC "ICMP STREAMS module %I%" +#define INET_MODDESC "ICMP dummy STREAMS module %I%" #define INET_DEVDESC "ICMP STREAMS driver %I%" -#define INET_DEVMINOR IPV4_MINOR -#define INET_STRTAB icmpinfo -#define INET_DEVMTFLAGS IP_DEVMTFLAGS /* since as a driver we're ip */ -#define INET_MODMTFLAGS (D_MP|D_MTPERQ|D_MTOUTPERIM|D_MTOCEXCL) +#define INET_DEVMINOR 0 +#define INET_DEVSTRTAB icmpinfov4 +#define INET_MODSTRTAB dummymodinfo +#define INET_DEVMTFLAGS D_MP +#define INET_MODMTFLAGS D_MP #include "../inetddi.c" -extern void icmp_ddi_init(void); -extern void icmp_ddi_destroy(void); - int _init(void) { - int error; - - INET_BECOME_IP(); - /* - * Note: After mod_install succeeds, another thread can enter - * therefore all initialization is done before it and any - * de-initialization needed done if it fails. + * device initialization happens when the actual code containing + * module (/kernel/drv/ip) is loaded, and driven from ip_ddi_init() */ - icmp_ddi_init(); - error = mod_install(&modlinkage); - if (error != 0) - icmp_ddi_destroy(); - - return (error); + return (mod_install(&modlinkage)); } int _fini(void) { - int error; - - error = mod_remove(&modlinkage); - if (error != 0) - return (error); - - icmp_ddi_destroy(); - return (0); + return (mod_remove(&modlinkage)); } int diff --git a/usr/src/uts/common/inet/ip/ip.c b/usr/src/uts/common/inet/ip/ip.c index 8cbf14165e..37adc4668f 100644 --- a/usr/src/uts/common/inet/ip/ip.c +++ b/usr/src/uts/common/inet/ip/ip.c @@ -69,6 +69,7 @@ #include <inet/nd.h> #include <inet/arp.h> #include <inet/snmpcom.h> +#include <inet/optcom.h> #include <inet/kstatcom.h> #include <netinet/igmp_var.h> @@ -87,7 +88,6 @@ #include <inet/ip_ire.h> #include <inet/ip_ftable.h> #include <inet/ip_rts.h> -#include <inet/optcom.h> #include <inet/ip_ndp.h> #include <inet/ip_listutils.h> #include <netinet/igmp.h> @@ -118,6 +118,8 @@ #include <inet/sctp_ip.h> #include <inet/sctp/sctp_impl.h> #include <inet/udp_impl.h> +#include <inet/rawip_impl.h> +#include <inet/rts_impl.h> #include <sys/sunddi.h> #include <sys/tsol/label.h> @@ -136,16 +138,6 @@ int ip_squeue_enter = 2; /* Setable in /etc/system */ squeue_func_t ip_input_proc; #define SET_BPREV_FLAG(x) ((mblk_t *)(uintptr_t)(x)) -#define TCP6 "tcp6" -#define TCP "tcp" -#define SCTP "sctp" -#define SCTP6 "sctp6" - -major_t TCP6_MAJ; -major_t TCP_MAJ; -major_t SCTP_MAJ; -major_t SCTP6_MAJ; - /* * Setable in /etc/system */ @@ -610,6 +602,8 @@ uint_t ip_max_frag_dups = 10; static int conn_set_held_ipif(conn_t *, ipif_t **, ipif_t *); +static int ip_open(queue_t *q, dev_t *devp, int flag, int sflag, + cred_t *credp, boolean_t isv6); static mblk_t *ip_wput_attach_llhdr(mblk_t *, ire_t *, ip_proc_t, uint32_t); static void icmp_frag_needed(queue_t *, mblk_t *, int, zoneid_t, @@ -673,7 +667,7 @@ static int ip_rput_options(queue_t *, mblk_t *, ipha_t *, ipaddr_t *, ip_stack_t *); static boolean_t ip_rput_fragment(queue_t *, mblk_t **, ipha_t *, uint32_t *, uint16_t *); -int ip_snmp_get(queue_t *, mblk_t *); +int ip_snmp_get(queue_t *, mblk_t *, int); static mblk_t *ip_snmp_get_mib2_ip(queue_t *, mblk_t *, mib2_ipIfStatsEntry_t *, ip_stack_t *); static mblk_t *ip_snmp_get_mib2_ip_traffic_stats(queue_t *, mblk_t *, @@ -1367,28 +1361,49 @@ struct module_info ip_mod_info = { * problem by making the symbols here distinct from those in udp.c. */ -static struct qinit iprinit = { - (pfi_t)ip_rput, NULL, ip_open, ip_close, NULL, +/* + * Entry points for IP as a device and as a module. + * FIXME: down the road we might want a separate module and driver qinit. + * We have separate open functions for the /dev/ip and /dev/ip6 devices. + */ +static struct qinit iprinitv4 = { + (pfi_t)ip_rput, NULL, ip_openv4, ip_close, NULL, + &ip_mod_info +}; + +struct qinit iprinitv6 = { + (pfi_t)ip_rput_v6, NULL, ip_openv6, ip_close, NULL, + &ip_mod_info +}; + +static struct qinit ipwinitv4 = { + (pfi_t)ip_wput, (pfi_t)ip_wsrv, NULL, NULL, NULL, &ip_mod_info }; -static struct qinit ipwinit = { - (pfi_t)ip_wput, (pfi_t)ip_wsrv, ip_open, ip_close, NULL, +struct qinit ipwinitv6 = { + (pfi_t)ip_wput_v6, (pfi_t)ip_wsrv, NULL, NULL, NULL, &ip_mod_info }; static struct qinit iplrinit = { - (pfi_t)ip_lrput, NULL, ip_open, ip_close, NULL, + (pfi_t)ip_lrput, NULL, ip_openv4, ip_close, NULL, &ip_mod_info }; static struct qinit iplwinit = { - (pfi_t)ip_lwput, NULL, ip_open, ip_close, NULL, + (pfi_t)ip_lwput, NULL, NULL, NULL, NULL, &ip_mod_info }; -struct streamtab ipinfo = { - &iprinit, &ipwinit, &iplrinit, &iplwinit +/* For AF_INET aka /dev/ip */ +struct streamtab ipinfov4 = { + &iprinitv4, &ipwinitv4, &iplrinit, &iplwinit +}; + +/* For AF_INET6 aka /dev/ip6 */ +struct streamtab ipinfov6 = { + &iprinitv6, &ipwinitv6, &iplrinit, &iplwinit }; #ifdef DEBUG @@ -4660,14 +4675,7 @@ ip_bind_laddr(conn_t *connp, mblk_t *mp, ipaddr_t src_addr, uint16_t lport, connp->conn_fport = 0; /* * Do we need to add a check to reject Multicast packets - * - * We need to make sure that the conn_recv is set to a non-null - * value before we insert the conn into the classifier table. - * This is to avoid a race with an incoming packet which does an - * ipcl_classify(). */ - if (*mp->b_wptr == IPPROTO_TCP) - connp->conn_recv = tcp_conn_request; error = ipcl_bind_insert(connp, *mp->b_wptr, src_addr, lport); } @@ -4683,8 +4691,6 @@ ip_bind_laddr(conn_t *connp, mblk_t *mp, ipaddr_t src_addr, uint16_t lport, /* Falls through to bad_addr */ } } - } else if (connp->conn_ulp == IPPROTO_TCP) { - connp->conn_recv = tcp_input; } bad_addr: if (error != 0) { @@ -5124,12 +5130,7 @@ ip_bind_connected(conn_t *connp, mblk_t *mp, ipaddr_t *src_addrp, /* * The addresses have been verified. Time to insert in * the correct fanout list. - * We need to make sure that the conn_recv is set to a non-null - * value before we insert into the classifier table to avoid a - * race with an incoming packet which does an ipcl_classify(). */ - if (protocol == IPPROTO_TCP) - connp->conn_recv = tcp_input; error = ipcl_conn_insert(connp, protocol, src_addr, dst_addr, connp->conn_ports); } @@ -5494,7 +5495,7 @@ ip_modclose(ill_t *ill) } /* - * This is called as part of close() for both IP and UDP + * This is called as part of close() for IP, UDP, ICMP, and RTS * in order to quiesce the conn. */ void @@ -5529,9 +5530,6 @@ ip_quiesce_conn(conn_t *connp) ilg_cleanup_reqd = B_TRUE; mutex_exit(&connp->conn_lock); - if (IPCL_IS_UDP(connp)) - udp_quiesce_conn(connp); - if (conn_ioctl_cleanup_reqd) conn_ioctl_cleanup(connp); @@ -5560,8 +5558,7 @@ ip_quiesce_conn(conn_t *connp) if (drain_cleanup_reqd) conn_drain_tail(connp, B_TRUE); - if (connp->conn_rq == ipst->ips_ip_g_mrouter || - connp->conn_wq == ipst->ips_ip_g_mrouter) + if (connp == ipst->ips_ip_g_mrouter) (void) ip_mrouter_done(NULL, ipst); if (ilg_cleanup_reqd) @@ -5617,26 +5614,6 @@ ip_close(queue_t *q, int flags) */ ASSERT(connp->conn_ref == 1); - /* - * A conn which was previously marked as IPCL_UDP cannot - * retain the flag because it would have been cleared by - * udp_close(). - */ - ASSERT(!IPCL_IS_UDP(connp)); - - if (connp->conn_latch != NULL) { - IPLATCH_REFRELE(connp->conn_latch, connp->conn_netstack); - connp->conn_latch = NULL; - } - if (connp->conn_policy != NULL) { - IPPH_REFRELE(connp->conn_policy, connp->conn_netstack); - connp->conn_policy = NULL; - } - if (connp->conn_ipsec_opt_mp != NULL) { - freemsg(connp->conn_ipsec_opt_mp); - connp->conn_ipsec_opt_mp = NULL; - } - inet_minor_free(ip_minor_arena, connp->conn_dev); connp->conn_ref--; @@ -5646,83 +5623,6 @@ ip_close(queue_t *q, int flags) return (0); } -int -ip_snmpmod_close(queue_t *q) -{ - conn_t *connp = Q_TO_CONN(q); - ASSERT(connp->conn_flags & (IPCL_TCPMOD | IPCL_UDPMOD)); - - qprocsoff(q); - - if (connp->conn_flags & IPCL_UDPMOD) - udp_close_free(connp); - - if (connp->conn_cred != NULL) { - crfree(connp->conn_cred); - connp->conn_cred = NULL; - } - CONN_DEC_REF(connp); - q->q_ptr = WR(q)->q_ptr = NULL; - return (0); -} - -/* - * Write side put procedure for TCP module or UDP module instance. TCP/UDP - * as a module is only used for MIB browsers that push TCP/UDP over IP or ARP. - * The only supported primitives are T_SVR4_OPTMGMT_REQ and T_OPTMGMT_REQ. - * M_FLUSH messages and ioctls are only passed downstream; we don't flush our - * queues as we never enqueue messages there and we don't handle any ioctls. - * Everything else is freed. - */ -void -ip_snmpmod_wput(queue_t *q, mblk_t *mp) -{ - conn_t *connp = q->q_ptr; - pfi_t setfn; - pfi_t getfn; - - ASSERT(connp->conn_flags & (IPCL_TCPMOD | IPCL_UDPMOD)); - - switch (DB_TYPE(mp)) { - case M_PROTO: - case M_PCPROTO: - if ((MBLKL(mp) >= sizeof (t_scalar_t)) && - ((((union T_primitives *)mp->b_rptr)->type == - T_SVR4_OPTMGMT_REQ) || - (((union T_primitives *)mp->b_rptr)->type == - T_OPTMGMT_REQ))) { - /* - * This is the only TPI primitive supported. Its - * handling does not require tcp_t, but it does require - * conn_t to check permissions. - */ - cred_t *cr = DB_CREDDEF(mp, connp->conn_cred); - - if (connp->conn_flags & IPCL_TCPMOD) { - setfn = tcp_snmp_set; - getfn = tcp_snmp_get; - } else { - setfn = udp_snmp_set; - getfn = udp_snmp_get; - } - if (!snmpcom_req(q, mp, setfn, getfn, cr)) { - freemsg(mp); - return; - } - } else if ((mp = mi_tpi_err_ack_alloc(mp, TPROTO, ENOTSUP)) - != NULL) - qreply(q, mp); - break; - case M_FLUSH: - case M_IOCTL: - putnext(q, mp); - break; - default: - freemsg(mp); - break; - } -} - /* Return the IP checksum for the IP header at "iph". */ uint16_t ip_csum_hdr(ipha_t *ipha) @@ -5758,6 +5658,9 @@ ip_ddi_destroy(void) { tnet_fini(); + icmp_ddi_destroy(); + rts_ddi_destroy(); + udp_ddi_destroy(); sctp_ddi_g_destroy(); tcp_ddi_g_destroy(); ipsec_policy_g_destroy(); @@ -5925,11 +5828,6 @@ ip_thread_exit(void *phash) void ip_ddi_init(void) { - TCP6_MAJ = ddi_name_to_major(TCP6); - TCP_MAJ = ddi_name_to_major(TCP); - SCTP_MAJ = ddi_name_to_major(SCTP); - SCTP6_MAJ = ddi_name_to_major(SCTP6); - ip_input_proc = ip_squeue_switch(ip_squeue_enter); /* @@ -5968,6 +5866,10 @@ ip_ddi_init(void) sctp_ddi_g_init(); tnet_init(); + + udp_ddi_init(); + rts_ddi_init(); + icmp_ddi_init(); } /* @@ -6623,7 +6525,7 @@ ip_fanout_proto(queue_t *q, mblk_t *mp, ill_t *ill, ipha_t *ipha, uint_t flags, BUMP_MIB(mibptr, ipIfStatsHCInDelivers); if (mctl_present) freeb(first_mp1); - putnext(rq, mp1); + (connp->conn_recv)(connp, mp1, NULL); } } mutex_enter(&connfp->connf_lock); @@ -6669,6 +6571,7 @@ ip_fanout_proto(queue_t *q, mblk_t *mp, ill_t *ill, ipha_t *ipha, uint_t flags, * * Send the WHOLE packet up (incl. IPSEC_IN) without * a policy check. + * FIXME to use conn_recv for tun later. */ putnext(rq, first_mp); CONN_DEC_REF(connp); @@ -6723,7 +6626,7 @@ ip_fanout_proto(queue_t *q, mblk_t *mp, ill_t *ill, ipha_t *ipha, uint_t flags, in_flags, IPCL_ZONEID(connp), ipst); } BUMP_MIB(mibptr, ipIfStatsHCInDelivers); - putnext(rq, mp); + (connp->conn_recv)(connp, mp, NULL); if (mctl_present) freeb(first_mp); } @@ -6935,7 +6838,8 @@ ip_fanout_tcp(queue_t *q, mblk_t *mp, ill_t *recv_ill, ipha_t *ipha, squeue_enter_nodrain(connp->conn_sqp, first_mp, connp->conn_recv, connp, SQTAG_IP_FANOUT_TCP); } else { - putnext(connp->conn_rq, first_mp); + /* Not TCP; must be SOCK_RAW, IPPROTO_TCP */ + (connp->conn_recv)(connp, first_mp, NULL); CONN_DEC_REF(connp); } } @@ -7176,7 +7080,7 @@ ip_fanout_udp_conn(conn_t *connp, mblk_t *first_mp, mblk_t *mp, } BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); /* Send it upstream */ - CONN_UDP_RECV(connp, mp); + (connp->conn_recv)(connp, mp, NULL); } /* @@ -9830,9 +9734,24 @@ ip_modopen(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) return (0); } +/* For /dev/ip aka AF_INET open */ +int +ip_openv4(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) +{ + return (ip_open(q, devp, flag, sflag, credp, B_FALSE)); +} + +/* For /dev/ip6 aka AF_INET6 open */ +int +ip_openv6(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) +{ + return (ip_open(q, devp, flag, sflag, credp, B_TRUE)); +} + /* IP open routine. */ int -ip_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) +ip_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp, + boolean_t isv6) { conn_t *connp; major_t maj; @@ -9886,10 +9805,10 @@ ip_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) connp->conn_flags |= IPCL_SOCKET; /* Minor tells us which /dev entry was opened */ - if (geteminor(*devp) == IPV6_MINOR) { + if (isv6) { connp->conn_flags |= IPCL_ISV6; connp->conn_af_isv6 = B_TRUE; - ip_setqinfo(q, geteminor(*devp), B_FALSE, ipst); + ip_setpktversion(connp, isv6, B_FALSE, ipst); connp->conn_src_preferences = IPV6_PREFER_SRC_DEFAULT; } else { connp->conn_af_isv6 = B_FALSE; @@ -9919,19 +9838,9 @@ ip_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) if (getpflags(NET_MAC_AWARE, credp) != 0) connp->conn_mac_exempt = B_TRUE; - /* - * This should only happen for ndd, netstat, raw socket or other SCTP - * administrative ops. In these cases, we just need a normal conn_t - * with ulp set to IPPROTO_SCTP. All other ops are trapped and - * an error will be returned. - */ - if (maj != SCTP_MAJ && maj != SCTP6_MAJ) { - connp->conn_rq = q; - connp->conn_wq = WR(q); - } else { - connp->conn_ulp = IPPROTO_SCTP; - connp->conn_rq = connp->conn_wq = NULL; - } + connp->conn_rq = q; + connp->conn_wq = WR(q); + /* Non-zero default values */ connp->conn_multicast_loop = IP_DEFAULT_MULTICAST_LOOP; @@ -9949,34 +9858,28 @@ ip_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) } /* - * Change q_qinfo based on the value of isv6. - * This can not called on an ill queue. - * Note that there is no race since either q_qinfo works for conn queues - it - * is just an optimization to enter the best wput routine directly. + * Change the output format (IPv4 vs. IPv6) for a conn_t. + * Note that there is no race since either ip_output function works - it + * is just an optimization to enter the best ip_output routine directly. */ void -ip_setqinfo(queue_t *q, minor_t minor, boolean_t bump_mib, ip_stack_t *ipst) +ip_setpktversion(conn_t *connp, boolean_t isv6, boolean_t bump_mib, + ip_stack_t *ipst) { - ASSERT(q->q_flag & QREADR); - ASSERT(WR(q)->q_next == NULL); - ASSERT(q->q_ptr != NULL); - - if (minor == IPV6_MINOR) { + if (isv6) { if (bump_mib) { BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutSwitchIPVersion); } - q->q_qinfo = &rinit_ipv6; - WR(q)->q_qinfo = &winit_ipv6; - (Q_TO_CONN(q))->conn_pkt_isv6 = B_TRUE; + connp->conn_send = ip_output_v6; + connp->conn_pkt_isv6 = B_TRUE; } else { if (bump_mib) { BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutSwitchIPVersion); } - q->q_qinfo = &iprinit; - WR(q)->q_qinfo = &ipwinit; - (Q_TO_CONN(q))->conn_pkt_isv6 = B_FALSE; + connp->conn_send = ip_output; + connp->conn_pkt_isv6 = B_FALSE; } } @@ -10082,11 +9985,11 @@ conn_restart_ipsec_waiter(conn_t *connp, void *arg) optreq_prim = ((union T_primitives *)mp->b_rptr)->type; if (optreq_prim == T_OPTMGMT_REQ) { err = tpi_optcom_req(CONNP_TO_WQ(connp), mp, cr, - &ip_opt_obj); + &ip_opt_obj, B_FALSE); } else { ASSERT(optreq_prim == T_SVR4_OPTMGMT_REQ); err = svr4_optcom_req(CONNP_TO_WQ(connp), mp, cr, - &ip_opt_obj); + &ip_opt_obj, B_FALSE); } if (err != EINPROGRESS) CONN_OPER_PENDING_DONE(connp); @@ -10642,8 +10545,8 @@ setit: /* * For backward compatibility, this option * implicitly sets ip_multicast_ill as used in - * IP_MULTICAST_IF so that ip_wput gets - * this ipif to send mcast packets. + * IPV6_MULTICAST_IF so that ip_wput gets + * this ill to send mcast packets. */ connp->conn_multicast_ill = ill; connp->conn_orig_multicast_ifindex = (ill == NULL) ? @@ -12884,7 +12787,7 @@ ip_udp_input(queue_t *q, mblk_t *mp, ipha_t *ipha, ire_t *ire, if (ip_udp_check(q, connp, recv_ill, ipha, &mp, &first_mp, mctl_present, ire)) { /* Send it upstream */ - CONN_UDP_RECV(connp, mp); + (connp->conn_recv)(connp, mp, NULL); } } /* @@ -13310,7 +13213,8 @@ try_again: SET_SQUEUE(first_mp, connp->conn_recv, connp); return (first_mp); } else { - putnext(connp->conn_rq, first_mp); + /* SOCK_RAW, IPPROTO_TCP case */ + (connp->conn_recv)(connp, first_mp, NULL); CONN_DEC_REF(connp); return (NULL); } @@ -16415,7 +16319,7 @@ ip_rput_forward(ire_t *ire, ipha_t *ipha, mblk_t *mp, ill_t *in_ill) uint32_t ill_index; ill_t *out_ill; mib2_ipIfStatsEntry_t *mibptr; - ip_stack_t *ipst = in_ill->ill_ipst; + ip_stack_t *ipst = ((ill_t *)(ire->ire_stq->q_ptr))->ill_ipst; /* Get the ill_index of the incoming ILL */ ill_index = (in_ill != NULL) ? in_ill->ill_phyint->phyint_ifindex : 0; @@ -18008,12 +17912,11 @@ bad_src_route: * should free mpctl. */ int -ip_snmp_get(queue_t *q, mblk_t *mpctl) +ip_snmp_get(queue_t *q, mblk_t *mpctl, int level) { ip_stack_t *ipst; sctp_stack_t *sctps; - if (q->q_next != NULL) { ipst = ILLQ_TO_IPST(q); } else { @@ -18026,6 +17929,33 @@ ip_snmp_get(queue_t *q, mblk_t *mpctl) return (0); } + /* + * For the purposes of the (broken) packet shell use + * of the level we make sure MIB2_TCP/MIB2_UDP can be used + * to make TCP and UDP appear first in the list of mib items. + * TBD: We could expand this and use it in netstat so that + * the kernel doesn't have to produce large tables (connections, + * routes, etc) when netstat only wants the statistics or a particular + * table. + */ + if (!(level == MIB2_TCP || level == MIB2_UDP)) { + if ((mpctl = icmp_snmp_get(q, mpctl)) == NULL) { + return (1); + } + } + + if (level != MIB2_TCP) { + if ((mpctl = udp_snmp_get(q, mpctl)) == NULL) { + return (1); + } + } + + if (level != MIB2_UDP) { + if ((mpctl = tcp_snmp_get(q, mpctl)) == NULL) { + return (1); + } + } + if ((mpctl = ip_snmp_get_mib2_ip_traffic_stats(q, mpctl, ipst)) == NULL) { return (1); @@ -20003,14 +19933,6 @@ ip_unbind(queue_t *q, mblk_t *mp) if (mp == NULL) return (NULL); - /* - * Don't bzero the ports if its TCP since TCP still needs the - * lport to remove it from its own bind hash. TCP will do the - * cleanup. - */ - if (!IPCL_IS_TCP(connp)) - bzero(&connp->u_port, sizeof (connp->u_port)); - return (mp); } @@ -20764,13 +20686,9 @@ version_hdrlen_check: */ if (((v_hlen >> 4) & 0x7) == IPV6_VERSION) { /* - * XXX implement a IPv4 and IPv6 packet counter per - * conn and switch when ratio exceeds e.g. 10:1 + * FIXME: assume that callers of ip_output* call + * the right version? */ -#ifdef notyet - if (q->q_next == NULL) /* Avoid ill queue */ - ip_setqinfo(RD(q), B_TRUE, B_TRUE, ipst); -#endif BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutWrongIPVersion); ASSERT(xmit_ill == NULL); if (attach_ill != NULL) @@ -26807,11 +26725,11 @@ ip_restart_optmgmt(ipsq_t *dummy_sq, queue_t *q, mblk_t *first_mp, void *dummy) */ if (or->or_type == T_SVR4_OPTMGMT_REQ) { err = svr4_optcom_req(q, first_mp, NULL, - &ip_opt_obj); + &ip_opt_obj, B_FALSE); } else { ASSERT(or->or_type == T_OPTMGMT_REQ); err = tpi_optcom_req(q, first_mp, NULL, - &ip_opt_obj); + &ip_opt_obj, B_FALSE); } if (err != EINPROGRESS) { /* operation is done */ @@ -27142,13 +27060,6 @@ ip_wput_nondata(ipsq_t *ipsq, queue_t *q, mblk_t *mp, void *dummy_arg) cr = DB_CREDDEF(mp, GET_QUEUE_CRED(q)); - /* Check if it is a queue to /dev/sctp. */ - if (connp != NULL && connp->conn_ulp == IPPROTO_SCTP && - connp->conn_rq == NULL) { - sctp_wput(q, mp); - return; - } - switch (DB_TYPE(mp)) { case M_IOCTL: /* @@ -27304,18 +27215,6 @@ nak: return; } - if (connp != NULL && *(uint32_t *)mp->b_rptr == - IP_ULP_OUT_LABELED) { - out_labeled_t *olp; - - if (mp->b_wptr - mp->b_rptr != sizeof (*olp)) - break; - olp = (out_labeled_t *)mp->b_rptr; - connp->conn_ulp_labeled = olp->out_qnext == q; - freemsg(mp); - return; - } - /* M_CTL messages are used by ARP to tell us things. */ if ((mp->b_wptr - mp->b_rptr) < sizeof (arc_t)) break; @@ -27385,21 +27284,19 @@ nak: goto protonak; } /* - * Both TCP and UDP call ip_bind_{v4,v6}() directly - * instead of going through this path. We only get - * here in the following cases: - * - * a. Bind retries, where ipsq is non-NULL. - * b. T_BIND_REQ is issued from non TCP/UDP - * transport, e.g. icmp for raw socket, - * in which case ipsq will be NULL. + * The transports except SCTP call ip_bind_{v4,v6}() + * directly instead of a a putnext. SCTP doesn't + * generate any T_BIND_REQ since it has its own + * fanout data structures. However, ESP and AH + * come in for regular binds; all other cases are + * bind retries. */ - ASSERT(ipsq != NULL || - (!IPCL_IS_TCP(connp) && !IPCL_IS_UDP(connp))); + ASSERT(!IPCL_IS_SCTP(connp)); /* Don't increment refcnt if this is a re-entry */ if (ipsq == NULL) CONN_INC_REF(connp); + mp = connp->conn_af_isv6 ? ip_bind_v6(q, mp, connp, NULL) : ip_bind_v4(q, mp, connp); if (mp == NULL) @@ -27414,7 +27311,6 @@ nak: squeue_fill(connp->conn_sqp, mp, ip_resume_tcp_bind, connp, SQTAG_BIND_RETRY); - return; } else if (IPCL_IS_UDP(connp)) { /* * In the case of UDP endpoint we @@ -27422,10 +27318,18 @@ nak: */ ASSERT(ipsq != NULL); udp_resume_bind(connp, mp); - return; + } else if (IPCL_IS_RAWIP(connp)) { + /* + * In the case of RAWIP endpoint we + * come here only for bind retries + */ + ASSERT(ipsq != NULL); + rawip_resume_bind(connp, mp); + } else { + /* The case of AH and ESP */ + qreply(q, mp); + CONN_OPER_PENDING_DONE(connp); } - qreply(q, mp); - CONN_OPER_PENDING_DONE(connp); return; } case T_SVR4_OPTMGMT_REQ: @@ -27452,7 +27356,8 @@ nak: CONN_INC_REF(connp); if (ip_check_for_ipsec_opt(q, mp)) return; - err = svr4_optcom_req(q, mp, cr, &ip_opt_obj); + err = svr4_optcom_req(q, mp, cr, &ip_opt_obj, + B_FALSE); if (err != EINPROGRESS) { /* Operation is done */ CONN_OPER_PENDING_DONE(connp); @@ -27482,7 +27387,7 @@ nak: CONN_INC_REF(connp); if (ip_check_for_ipsec_opt(q, mp)) return; - err = tpi_optcom_req(q, mp, cr, &ip_opt_obj); + err = tpi_optcom_req(q, mp, cr, &ip_opt_obj, B_FALSE); if (err != EINPROGRESS) { /* Operation is done */ CONN_OPER_PENDING_DONE(connp); @@ -28459,7 +28364,7 @@ conn_walk_fanout_table(connf_t *connfp, uint_t count, pfv_t func, void *arg, } } -/* ipcl_walk routine invoked for ip_conn_report for each conn. */ +/* conn_walk_fanout routine invoked for ip_conn_report for each conn. */ static void conn_report1(conn_t *connp, void *mp) { @@ -29745,8 +29650,9 @@ ip_fanout_sctp_raw(mblk_t *mp, ill_t *recv_ill, ipha_t *ipha, boolean_t isv4, /* * We are sending the IPSEC_IN message also up. Refer * to comments above this function. + * This is the SOCK_RAW, IPPROTO_SCTP case. */ - putnext(rq, mp); + (connp->conn_recv)(connp, mp, NULL); CONN_DEC_REF(connp); } diff --git a/usr/src/uts/common/inet/ip/ip6.c b/usr/src/uts/common/inet/ip/ip6.c index cc9ed30905..d6bf31a6f3 100644 --- a/usr/src/uts/common/inet/ip/ip6.c +++ b/usr/src/uts/common/inet/ip/ip6.c @@ -69,6 +69,7 @@ #include <inet/common.h> #include <inet/mi.h> +#include <inet/optcom.h> #include <inet/mib2.h> #include <inet/nd.h> #include <inet/arp.h> @@ -86,7 +87,6 @@ #include <inet/ip_if.h> #include <inet/ip_ire.h> #include <inet/ip_rts.h> -#include <inet/optcom.h> #include <inet/ip_ndp.h> #include <net/pfkeyv2.h> #include <inet/ipsec_info.h> @@ -98,6 +98,8 @@ #include <inet/ipclassifier.h> #include <inet/ipsecah.h> #include <inet/udp_impl.h> +#include <inet/rawip_impl.h> +#include <inet/rts_impl.h> #include <sys/squeue.h> #include <sys/tsol/label.h> @@ -207,9 +209,6 @@ static boolean_t ip_source_routed_v6(ip6_t *, mblk_t *, ip_stack_t *); static void ip_wput_ire_v6(queue_t *, mblk_t *, ire_t *, int, int, conn_t *, int, int, int, zoneid_t); -void ip_rput_v6(queue_t *, mblk_t *); -static void ip_wput_v6(queue_t *, mblk_t *); - /* * A template for an IPv6 AR_ENTRY_QUERY */ @@ -229,24 +228,6 @@ static areq_t ipv6_areq_template = { /* anything else filled in by the code */ }; -struct qinit rinit_ipv6 = { - (pfi_t)ip_rput_v6, - NULL, - ip_open, - ip_close, - NULL, - &ip_mod_info -}; - -struct qinit winit_ipv6 = { - (pfi_t)ip_wput_v6, - (pfi_t)ip_wsrv, - ip_open, - ip_close, - NULL, - &ip_mod_info -}; - /* * Handle IPv6 ICMP packets sent to us. Consume the mblk passed in. * The message has already been checksummed and if needed, @@ -791,23 +772,6 @@ icmp_inbound_too_big_v6(queue_t *q, mblk_t *mp, ill_t *ill, mctl_present, zoneid); } -static void -pkt_too_big(conn_t *connp, void *arg) -{ - mblk_t *mp; - - if (!connp->conn_ipv6_recvpathmtu) - return; - - /* create message and drop it on this connections read queue */ - if ((mp = dupb((mblk_t *)arg)) == NULL) { - return; - } - mp->b_datap->db_type = M_CTL; - - putnext(connp->conn_rq, mp); -} - /* * Fanout received ICMPv6 error packets to the transports. * Assumes the IPv6 plus ICMPv6 headers have been pulled up but nothing else. @@ -867,18 +831,6 @@ icmp_inbound_error_fanout_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, /* Set message type, must be done after pullups */ mp->b_datap->db_type = M_CTL; - if (icmp6->icmp6_type == ICMP6_PACKET_TOO_BIG) { - /* - * Deliver indication of ICMP6_PACKET_TOO_BIG to interested - * sockets. - * - * Note I don't like walking every connection to deliver - * this information to a set of listeners. A separate - * list could be kept to keep the cost of this down. - */ - ipcl_walk(pkt_too_big, (void *)mp, ipst); - } - /* Try to pass the ICMP message to clients who need it */ switch (nexthdr) { case IPPROTO_UDP: { @@ -2327,15 +2279,11 @@ ip_bind_v6(queue_t *q, mblk_t *mp, conn_t *connp, ip6_pkt_t *ipp) connp->conn_pkt_isv6 = B_TRUE; } } - /* Update qinfo if v4/v6 changed */ - if ((orig_pkt_isv6 != connp->conn_pkt_isv6) && - !(IPCL_IS_TCP(connp) || IPCL_IS_UDP(connp))) { - if (connp->conn_pkt_isv6) - ip_setqinfo(RD(q), IPV6_MINOR, B_TRUE, ipst); - else - ip_setqinfo(RD(q), IPV4_MINOR, B_TRUE, ipst); - } + /* Update conn_send and pktversion if v4/v6 changed */ + if (orig_pkt_isv6 != connp->conn_pkt_isv6) { + ip_setpktversion(connp, connp->conn_pkt_isv6, B_TRUE, ipst); + } /* * Pass the IPSEC headers size in ire_ipsec_overhead. * We can't do this in ip_bind_insert_ire because the policy @@ -2520,15 +2468,6 @@ ip_bind_laddr_v6(conn_t *connp, mblk_t *mp, const in6_addr_t *v6src, connp->conn_remv6 = ipv6_all_zeros; connp->conn_lport = lport; connp->conn_fport = 0; - - /* - * We need to make sure that the conn_recv is set to a non-null - * value before we insert the conn_t into the classifier table. - * This is to avoid a race with an incoming packet which does - * an ipcl_classify(). - */ - if (*mp->b_wptr == IPPROTO_TCP) - connp->conn_recv = tcp_conn_request; error = ipcl_bind_insert_v6(connp, *mp->b_wptr, v6src, lport); } if (error == 0) { @@ -2544,8 +2483,6 @@ ip_bind_laddr_v6(conn_t *connp, mblk_t *mp, const in6_addr_t *v6src, goto bad_addr; } } - } else if (connp->conn_ulp == IPPROTO_TCP) { - connp->conn_recv = tcp_input; } bad_addr: if (error != 0) { @@ -2604,8 +2541,8 @@ ip_bind_connected_resume_v6(ipsq_t *ipsq, queue_t *q, mblk_t *mp, } else if (IPCL_IS_UDP(connp)) { udp_resume_bind(connp, mp); } else { - qreply(q, mp); - CONN_OPER_PENDING_DONE(connp); + ASSERT(IPCL_IS_RAWIP(connp)); + rawip_resume_bind(connp, mp); } } } @@ -3014,13 +2951,7 @@ ip_bind_connected_v6(conn_t *connp, mblk_t *mp, in6_addr_t *v6src, /* * The addresses have been verified. Time to insert in * the correct fanout list. - * We need to make sure that the conn_recv is set to a non-null - * value before we insert the conn_t into the classifier table. - * This is to avoid a race with an incoming packet which does - * an ipcl_classify(). */ - if (protocol == IPPROTO_TCP) - connp->conn_recv = tcp_input; error = ipcl_conn_insert_v6(connp, protocol, v6src, v6dst, connp->conn_ports, IPCL_IS_TCP(connp) ? connp->conn_tcp->tcp_bound_if : 0); @@ -3329,7 +3260,7 @@ ip_fanout_proto_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, ill_t *ill, freeb(first_mp1); BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); - putnext(rq, mp1); + (connp->conn_recv)(connp, mp1, NULL); } } mutex_enter(&connfp->connf_lock); @@ -3415,7 +3346,7 @@ ip_fanout_proto_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, ill_t *ill, } } BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); - putnext(rq, mp); + (connp->conn_recv)(connp, mp, NULL); if (mctl_present) freeb(first_mp); } @@ -3702,7 +3633,8 @@ ip_fanout_tcp_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, ill_t *ill, ill_t *inill, (*ip_input_proc)(connp->conn_sqp, first_mp, connp->conn_recv, connp, SQTAG_IP6_TCP_INPUT); } else { - putnext(connp->conn_rq, first_mp); + /* SOCK_RAW, IPPROTO_TCP case */ + (connp->conn_recv)(connp, first_mp, NULL); CONN_DEC_REF(connp); } } @@ -3852,7 +3784,7 @@ ip_fanout_udp_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, uint32_t ports, BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); /* Send it upstream */ - CONN_UDP_RECV(connp, mp); + (connp->conn_recv)(connp, mp, NULL); IP6_STAT(ipst, ip6_udp_fannorm); CONN_DEC_REF(connp); @@ -3946,7 +3878,7 @@ ip_fanout_udp_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, uint32_t ports, BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); /* Send it upstream */ - CONN_UDP_RECV(connp, mp1); + (connp->conn_recv)(connp, mp1, NULL); } next_one: mutex_enter(&connfp->connf_lock); @@ -4013,7 +3945,7 @@ next_one: BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); /* Send it upstream */ - CONN_UDP_RECV(connp, mp); + (connp->conn_recv)(connp, mp, NULL); } IP6_STAT(ipst, ip6_udp_fanmb); CONN_DEC_REF(connp); @@ -8438,7 +8370,7 @@ udp_fanout: BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); /* Send it upstream */ - CONN_UDP_RECV(connp, mp); + (connp->conn_recv)(connp, mp, NULL); CONN_DEC_REF(connp); freemsg(hada_mp); @@ -10469,22 +10401,16 @@ send_from_ill: return; notv6: - /* - * XXX implement a IPv4 and IPv6 packet counter per conn and - * switch when ratio exceeds e.g. 10:1 - */ + /* FIXME?: assume the caller calls the right version of ip_output? */ if (q->q_next == NULL) { connp = Q_TO_CONN(q); - if (IPCL_IS_TCP(connp)) { - /* change conn_send for the tcp_v4_connections */ - connp->conn_send = ip_output; - } else if (connp->conn_ulp == IPPROTO_SCTP) { - /* The 'q' is the default SCTP queue */ - connp = (conn_t *)arg; - } else { - ip_setqinfo(RD(q), IPV4_MINOR, B_TRUE, ipst); - } + /* + * We can change conn_send for all types of conn, even + * though only TCP uses it right now. + * FIXME: sctp could use conn_send but doesn't currently. + */ + ip_setpktversion(connp, B_FALSE, B_TRUE, ipst); } BUMP_MIB(mibptr, ipIfStatsOutWrongIPVersion); (void) ip_output(arg, first_mp, arg2, caller); @@ -10499,7 +10425,7 @@ notv6: * in which case we use the global zoneid since those are all part of * the global zone. */ -static void +void ip_wput_v6(queue_t *q, mblk_t *mp) { if (CONN_Q(q)) diff --git a/usr/src/uts/common/inet/ip/ip6ddi.c b/usr/src/uts/common/inet/ip/ip6ddi.c index 3ba4bd75a6..cef848434b 100644 --- a/usr/src/uts/common/inet/ip/ip6ddi.c +++ b/usr/src/uts/common/inet/ip/ip6ddi.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 1992-2002 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -33,9 +32,9 @@ #include <inet/ip.h> #define INET_NAME "ip6" -#define INET_STRTAB ipinfo +#define INET_DEVSTRTAB ipinfov6 #define INET_DEVDESC "IP6 STREAMS driver %I%" -#define INET_DEVMINOR IPV6_MINOR +#define INET_DEVMINOR 0 #define INET_DEVMTFLAGS IP_DEVMTFLAGS /* since we're really ip */ #include "../inetddi.c" @@ -44,8 +43,8 @@ int _init(void) { /* - * device initialization occurs in ipddi.c:_init() - * (i.e. it must be called before this routine) + * device initialization happens when the actual code containing + * module (/kernel/drv/ip) is loaded, and driven from ip_ddi_init() */ return (mod_install(&modlinkage)); } diff --git a/usr/src/uts/common/inet/ip/ip_dummy.c b/usr/src/uts/common/inet/ip/ip_dummy.c new file mode 100644 index 0000000000..57b23ba286 --- /dev/null +++ b/usr/src/uts/common/inet/ip/ip_dummy.c @@ -0,0 +1,90 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/types.h> +#include <sys/conf.h> +#include <sys/modctl.h> +#include <inet/common.h> + +/* + * Dummy streams module that is used by ICMP, UDP, and TCP by setting + * INETMODSTRTAB to dummymodinfo + * + * It's reason for existance is so that mibopen() that I_PUSH icmp, udp, and + * tcp can continue to push modules with those names, even though all the + * MIB information comes from IP. + */ + +static int dummy_modclose(queue_t *q); +static int dummy_modopen(queue_t *q, dev_t *devp, int flag, + int sflag, cred_t *credp); + +/* + * This is common code for the tcp, udp, and icmp streams module which is + * an empty STREAMS module provided for compatibility for mibopen() + * code which I_PUSH modules with those names. + */ +struct module_info dummy_mod_info = { + 5799, "dummymod", 1, INFPSZ, 65536, 1024 +}; + + +static struct qinit dummyrmodinit = { + (pfi_t)putnext, NULL, dummy_modopen, dummy_modclose, NULL, + &dummy_mod_info +}; + +static struct qinit dummywmodinit = { + (pfi_t)putnext, NULL, NULL, NULL, NULL, &dummy_mod_info +}; + +struct streamtab dummymodinfo = { + &dummyrmodinit, &dummywmodinit +}; + +static int +dummy_modclose(queue_t *q) +{ + qprocsoff(q); + return (0); +} + +/* ARGSUSED */ +static int +dummy_modopen(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) +{ + /* If the stream is already open, return immediately. */ + if (q->q_ptr != NULL) + return (0); + + /* If this is not a push of dummy as a module, fail. */ + if (sflag != MODOPEN) + return (EINVAL); + + qprocson(q); + return (0); +} diff --git a/usr/src/uts/common/inet/ip/ip_if.c b/usr/src/uts/common/inet/ip/ip_if.c index 6ee0be2931..729b308e9d 100644 --- a/usr/src/uts/common/inet/ip/ip_if.c +++ b/usr/src/uts/common/inet/ip/ip_if.c @@ -22945,8 +22945,8 @@ ipif_set_values(queue_t *q, mblk_t *mp, char *interf_name, uint_t *new_ppa_ptr) ill->ill_isv6 = B_TRUE; if (ill->ill_rq != NULL) { - ill->ill_rq->q_qinfo = &rinit_ipv6; - ill->ill_wq->q_qinfo = &winit_ipv6; + ill->ill_rq->q_qinfo = &iprinitv6; + ill->ill_wq->q_qinfo = &ipwinitv6; } /* Keep the !IN6_IS_ADDR_V4MAPPED assertions happy */ diff --git a/usr/src/uts/common/inet/ip/ip_mroute.c b/usr/src/uts/common/inet/ip/ip_mroute.c index eeb08607ae..25276c3ff2 100644 --- a/usr/src/uts/common/inet/ip/ip_mroute.c +++ b/usr/src/uts/common/inet/ip/ip_mroute.c @@ -169,9 +169,9 @@ /* Function declarations */ static int add_mfc(struct mfcctl *, ip_stack_t *); -static int add_vif(struct vifctl *, queue_t *, mblk_t *, ip_stack_t *); +static int add_vif(struct vifctl *, conn_t *, mblk_t *, ip_stack_t *); static int del_mfc(struct mfcctl *, ip_stack_t *); -static int del_vif(vifi_t *, queue_t *, mblk_t *, ip_stack_t *); +static int del_vif(vifi_t *, conn_t *, mblk_t *, ip_stack_t *); static void del_vifp(struct vif *); static void encap_send(ipha_t *, mblk_t *, struct vif *, ipaddr_t); static void expire_upcalls(void *); @@ -184,7 +184,7 @@ static int get_version(uchar_t *); static int get_vif_cnt(struct sioc_vif_req *, ip_stack_t *); static int ip_mdq(mblk_t *, ipha_t *, ill_t *, ipaddr_t, struct mfc *); -static int ip_mrouter_init(queue_t *, uchar_t *, int, ip_stack_t *); +static int ip_mrouter_init(conn_t *, uchar_t *, int, ip_stack_t *); static void phyint_send(ipha_t *, mblk_t *, struct vif *, ipaddr_t); static int register_mforward(queue_t *, mblk_t *, ill_t *); static void register_send(ipha_t *, mblk_t *, struct vif *, ipaddr_t); @@ -332,10 +332,11 @@ int ip_mrouter_set(int cmd, queue_t *q, int checkonly, uchar_t *data, int datalen, mblk_t *first_mp) { - ip_stack_t *ipst = CONNQ_TO_IPST(q); + conn_t *connp = Q_TO_CONN(q); + ip_stack_t *ipst = connp->conn_netstack->netstack_ip; mutex_enter(&ipst->ips_ip_g_mrouter_mutex); - if (cmd != MRT_INIT && q != ipst->ips_ip_g_mrouter) { + if (cmd != MRT_INIT && connp != ipst->ips_ip_g_mrouter) { mutex_exit(&ipst->ips_ip_g_mrouter_mutex); return (EACCES); } @@ -356,9 +357,9 @@ ip_mrouter_set(int cmd, queue_t *q, int checkonly, uchar_t *data, case MRT_ADD_MFC: case MRT_DEL_MFC: case MRT_ASSERT: - return (0); + return (0); default: - return (EOPNOTSUPP); + return (EOPNOTSUPP); } } @@ -372,11 +373,12 @@ ip_mrouter_set(int cmd, queue_t *q, int checkonly, uchar_t *data, } switch (cmd) { - case MRT_INIT: return (ip_mrouter_init(q, data, datalen, ipst)); + case MRT_INIT: return (ip_mrouter_init(connp, data, datalen, ipst)); case MRT_DONE: return (ip_mrouter_done(first_mp, ipst)); - case MRT_ADD_VIF: return (add_vif((struct vifctl *)data, q, first_mp, - ipst)); - case MRT_DEL_VIF: return (del_vif((vifi_t *)data, q, first_mp, ipst)); + case MRT_ADD_VIF: return (add_vif((struct vifctl *)data, connp, + first_mp, ipst)); + case MRT_DEL_VIF: return (del_vif((vifi_t *)data, connp, first_mp, + ipst)); case MRT_ADD_MFC: return (add_mfc((struct mfcctl *)data, ipst)); case MRT_DEL_MFC: return (del_mfc((struct mfcctl *)data, ipst)); case MRT_ASSERT: return (set_assert((int *)data, ipst)); @@ -390,9 +392,10 @@ ip_mrouter_set(int cmd, queue_t *q, int checkonly, uchar_t *data, int ip_mrouter_get(int cmd, queue_t *q, uchar_t *data) { - ip_stack_t *ipst = CONNQ_TO_IPST(q); + conn_t *connp = Q_TO_CONN(q); + ip_stack_t *ipst = connp->conn_netstack->netstack_ip; - if (q != ipst->ips_ip_g_mrouter) + if (connp != ipst->ips_ip_g_mrouter) return (EACCES); switch (cmd) { @@ -413,7 +416,8 @@ mrt_ioctl(ipif_t *ipif, sin_t *sin, queue_t *q, mblk_t *mp, { mblk_t *mp1; struct iocblk *iocp = (struct iocblk *)mp->b_rptr; - ip_stack_t *ipst = CONNQ_TO_IPST(q); + conn_t *connp = Q_TO_CONN(q); + ip_stack_t *ipst = connp->conn_netstack->netstack_ip; /* Existence verified in ip_wput_nondata */ mp1 = mp->b_cont->b_cont; @@ -531,9 +535,8 @@ get_assert(uchar_t *data, ip_stack_t *ipst) * Enable multicast routing. */ static int -ip_mrouter_init(queue_t *q, uchar_t *data, int datalen, ip_stack_t *ipst) +ip_mrouter_init(conn_t *connp, uchar_t *data, int datalen, ip_stack_t *ipst) { - conn_t *connp = Q_TO_CONN(q); int *v; if (data == NULL || (datalen != sizeof (int))) @@ -549,12 +552,21 @@ ip_mrouter_init(queue_t *q, uchar_t *data, int datalen, ip_stack_t *ipst) return (EADDRINUSE); } - ipst->ips_ip_g_mrouter = q; + /* + * MRT_INIT should only be allowed for RAW sockets, but we double + * check. + */ + if (!IPCL_IS_RAWIP(connp)) { + mutex_exit(&ipst->ips_ip_g_mrouter_mutex); + return (EINVAL); + } + + ipst->ips_ip_g_mrouter = connp; connp->conn_multi_router = 1; /* In order for tunnels to work we have to turn ip_g_forward on */ if (!WE_ARE_FORWARDING(ipst)) { if (ipst->ips_ip_mrtdebug > 1) { - (void) mi_strlog(q, 1, SL_TRACE, + (void) mi_strlog(connp->conn_rq, 1, SL_TRACE, "ip_mrouter_init: turning on forwarding"); } ipst->ips_saved_ip_g_forward = ipst->ips_ip_g_forward; @@ -599,7 +611,7 @@ ip_mrouter_stack_init(ip_stack_t *ipst) int ip_mrouter_done(mblk_t *mp, ip_stack_t *ipst) { - conn_t *connp; + conn_t *mrouter; vifi_t vifi; struct mfc *mfc_rt; int i; @@ -610,11 +622,11 @@ ip_mrouter_done(mblk_t *mp, ip_stack_t *ipst) return (EINVAL); } - connp = Q_TO_CONN(ipst->ips_ip_g_mrouter); + mrouter = ipst->ips_ip_g_mrouter; if (ipst->ips_saved_ip_g_forward != -1) { if (ipst->ips_ip_mrtdebug > 1) { - (void) mi_strlog(ipst->ips_ip_g_mrouter, 1, SL_TRACE, + (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, "ip_mrouter_done: turning off forwarding"); } ipst->ips_ip_g_forward = ipst->ips_saved_ip_g_forward; @@ -630,7 +642,7 @@ ip_mrouter_done(mblk_t *mp, ip_stack_t *ipst) ipst->ips_last_encap_src = 0; ipst->ips_last_encap_vif = NULL; mutex_exit(&ipst->ips_last_encap_lock); - connp->conn_multi_router = 0; + mrouter->conn_multi_router = 0; mutex_exit(&ipst->ips_ip_g_mrouter_mutex); @@ -671,10 +683,11 @@ ip_mrouter_done(mblk_t *mp, ip_stack_t *ipst) ipsq = ill->ill_phyint->phyint_ipsq; } else { ipsq = ipsq_try_enter(ipif, NULL, - ipst->ips_ip_g_mrouter, mp, + mrouter->conn_wq, mp, ip_restart_optmgmt, NEW_OP, B_TRUE); if (ipsq == NULL) { mutex_exit(&(vifp)->v_lock); + ipif_refrele(ipif); return (EINPROGRESS); } /* @@ -683,7 +696,7 @@ ip_mrouter_done(mblk_t *mp, ip_stack_t *ipst) */ vifp->v_marks &= ~VIF_MARK_GOOD; vifp->v_marks |= VIF_MARK_CONDEMNED; - mutex_exit(&(vifp)->v_lock); + mutex_exit(&(vifp)->v_lock); suc = B_TRUE; } @@ -798,7 +811,7 @@ ip_mrouter_stack_destroy(ip_stack_t *ipst) static boolean_t is_mrouter_off(ip_stack_t *ipst) { - conn_t *connp; + conn_t *mrouter; mutex_enter(&ipst->ips_ip_g_mrouter_mutex); if (ipst->ips_ip_g_mrouter == NULL) { @@ -806,8 +819,8 @@ is_mrouter_off(ip_stack_t *ipst) return (B_TRUE); } - connp = Q_TO_CONN(ipst->ips_ip_g_mrouter); - if (connp->conn_multi_router == 0) { + mrouter = ipst->ips_ip_g_mrouter; + if (mrouter->conn_multi_router == 0) { mutex_exit(&ipst->ips_ip_g_mrouter_mutex); return (B_TRUE); } @@ -850,14 +863,14 @@ lock_good_vif(struct vif *vifp) * Add a vif to the vif table. */ static int -add_vif(struct vifctl *vifcp, queue_t *q, mblk_t *first_mp, ip_stack_t *ipst) +add_vif(struct vifctl *vifcp, conn_t *connp, mblk_t *first_mp, ip_stack_t *ipst) { struct vif *vifp = ipst->ips_vifs + vifcp->vifc_vifi; ipif_t *ipif; int error; struct tbf *v_tbf = ipst->ips_tbfs + vifcp->vifc_vifi; - conn_t *connp = Q_TO_CONN(q); ipsq_t *ipsq; + conn_t *mrouter = ipst->ips_ip_g_mrouter; ASSERT(connp != NULL); @@ -920,7 +933,7 @@ add_vif(struct vifctl *vifcp, queue_t *q, mblk_t *first_mp, ip_stack_t *ipst) } if (ipst->ips_ip_mrtdebug > 1) { - (void) mi_strlog(ipst->ips_ip_g_mrouter, 1, SL_TRACE, + (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, "add_vif: src 0x%x enter", vifcp->vifc_lcl_addr.s_addr); } @@ -1036,7 +1049,7 @@ add_vif(struct vifctl *vifcp, queue_t *q, mblk_t *first_mp, ip_stack_t *ipst) mutex_exit(&ipst->ips_numvifs_mutex); if (ipst->ips_ip_mrtdebug > 1) { - (void) mi_strlog(ipst->ips_ip_g_mrouter, 1, SL_TRACE, + (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, "add_vif: #%d, lcladdr %x, %s %x, thresh %x, rate %d", vifcp->vifc_vifi, ntohl(vifcp->vifc_lcl_addr.s_addr), @@ -1060,6 +1073,7 @@ del_vifp(struct vif *vifp) mblk_t *mp0; vifi_t vifi; ip_stack_t *ipst = vifp->v_ipif->ipif_ill->ill_ipst; + conn_t *mrouter = ipst->ips_ip_g_mrouter; ASSERT(vifp->v_marks & VIF_MARK_CONDEMNED); ASSERT(t != NULL); @@ -1071,7 +1085,7 @@ del_vifp(struct vif *vifp) ipif_refrele(vifp->v_ipif); if (ipst->ips_ip_mrtdebug > 1) { - (void) mi_strlog(ipst->ips_ip_g_mrouter, 1, SL_TRACE, + (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, "del_vif: src 0x%x\n", vifp->v_lcl_addr.s_addr); } @@ -1120,10 +1134,9 @@ del_vifp(struct vif *vifp) } static int -del_vif(vifi_t *vifip, queue_t *q, mblk_t *first_mp, ip_stack_t *ipst) +del_vif(vifi_t *vifip, conn_t *connp, mblk_t *first_mp, ip_stack_t *ipst) { struct vif *vifp = ipst->ips_vifs + *vifip; - conn_t *connp; ipsq_t *ipsq; if (*vifip >= ipst->ips_numvifs) @@ -1150,7 +1163,6 @@ del_vif(vifi_t *vifip, queue_t *q, mblk_t *first_mp, ip_stack_t *ipst) */ if (first_mp != NULL && !(vifp->v_flags & (VIFF_TUNNEL | VIFF_REGISTER))) { - connp = Q_TO_CONN(q); ASSERT(connp != NULL); /* * We have to be exclusive as we have to call ip_delmulti() @@ -1223,6 +1235,7 @@ add_mfc(struct mfcctl *mfccp, ip_stack_t *ipst) ushort_t nstl; int i; struct mfcb *mfcbp; + conn_t *mrouter = ipst->ips_ip_g_mrouter; /* * The value of vifi is NO_VIF (==MAXVIFS) if Mrouted @@ -1259,7 +1272,7 @@ add_mfc(struct mfcctl *mfccp, ip_stack_t *ipst) /* If an entry already exists, just update the fields */ if (rt) { if (ipst->ips_ip_mrtdebug > 1) { - (void) mi_strlog(ipst->ips_ip_g_mrouter, 1, SL_TRACE, + (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, "add_mfc: update o %x grp %x parent %x", ntohl(mfccp->mfcc_origin.s_addr), ntohl(mfccp->mfcc_mcastgrp.s_addr), @@ -1296,7 +1309,7 @@ add_mfc(struct mfcctl *mfccp, ip_stack_t *ipst) mfccp->mfcc_parent); if (ipst->ips_ip_mrtdebug > 1) { - (void) mi_strlog(ipst->ips_ip_g_mrouter, 1, + (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, "add_mfc: o %x g %x p %x", ntohl(mfccp->mfcc_origin.s_addr), @@ -1357,7 +1370,7 @@ add_mfc(struct mfcctl *mfccp, ip_stack_t *ipst) if (nstl == 0) { mutex_enter(&(mfcbp->mfcb_lock)); if (ipst->ips_ip_mrtdebug > 1) { - (void) mi_strlog(ipst->ips_ip_g_mrouter, 1, SL_TRACE, + (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, "add_mfc: no upcall o %x g %x p %x", ntohl(mfccp->mfcc_origin.s_addr), ntohl(mfccp->mfcc_mcastgrp.s_addr), @@ -1375,8 +1388,8 @@ add_mfc(struct mfcctl *mfccp, ip_stack_t *ipst) if ((rt->mfc_origin.s_addr == mfccp->mfcc_origin.s_addr) && (rt->mfc_mcastgrp.s_addr == - mfccp->mfcc_mcastgrp.s_addr) && - (!(rt->mfc_marks & MFCB_MARK_CONDEMNED))) { + mfccp->mfcc_mcastgrp.s_addr) && + (!(rt->mfc_marks & MFCB_MARK_CONDEMNED))) { fill_route(rt, mfccp, ipst); mutex_exit(&rt->mfc_mutex); break; @@ -1493,15 +1506,16 @@ del_mfc(struct mfcctl *mfccp, ip_stack_t *ipst) { struct in_addr origin; struct in_addr mcastgrp; - struct mfc *rt; - uint_t hash; + struct mfc *rt; + uint_t hash; + conn_t *mrouter = ipst->ips_ip_g_mrouter; origin = mfccp->mfcc_origin; mcastgrp = mfccp->mfcc_mcastgrp; hash = MFCHASH(origin.s_addr, mcastgrp.s_addr); if (ipst->ips_ip_mrtdebug > 1) { - (void) mi_strlog(ipst->ips_ip_g_mrouter, 1, SL_TRACE, + (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, "del_mfc: o %x g %x", ntohl(origin.s_addr), ntohl(mcastgrp.s_addr)); @@ -1591,9 +1605,10 @@ ip_mforward(ill_t *ill, ipha_t *ipha, mblk_t *mp) boolean_t pim_reg_packet = B_FALSE; struct mfcb *mfcbp; ip_stack_t *ipst = ill->ill_ipst; + conn_t *mrouter = ipst->ips_ip_g_mrouter; if (ipst->ips_ip_mrtdebug > 1) { - (void) mi_strlog(ipst->ips_ip_g_mrouter, 1, SL_TRACE, + (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, "ip_mforward: RECV ipha_src %x, ipha_dst %x, ill %s", ntohl(ipha->ipha_src), ntohl(ipha->ipha_dst), ill->ill_name); @@ -1610,9 +1625,9 @@ ip_mforward(ill_t *ill, ipha_t *ipha, mblk_t *mp) * or a packet destined to a local-only group. */ if (CLASSD(dst) && (ipha->ipha_ttl <= 1 || - (ipaddr_t)ntohl(dst) <= INADDR_MAX_LOCAL_GROUP)) { + (ipaddr_t)ntohl(dst) <= INADDR_MAX_LOCAL_GROUP)) { if (ipst->ips_ip_mrtdebug > 1) { - (void) mi_strlog(ipst->ips_ip_g_mrouter, 1, SL_TRACE, + (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, "ip_mforward: not forwarded ttl %d," " dst 0x%x ill %s", ipha->ipha_ttl, ntohl(dst), ill->ill_name); @@ -1633,12 +1648,12 @@ ip_mforward(ill_t *ill, ipha_t *ipha, mblk_t *mp) mp->b_prev = NULL; if (ipst->ips_ip_mrtdebug > 1) { if (tunnel_src != 0) { - (void) mi_strlog(ipst->ips_ip_g_mrouter, 1, + (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, "ip_mforward: ill %s arrived via ENCAP TUN", ill->ill_name); } else if (pim_reg_packet) { - (void) mi_strlog(ipst->ips_ip_g_mrouter, 1, + (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, "ip_mforward: ill %s arrived via" " REGISTER VIF", @@ -1650,7 +1665,7 @@ ip_mforward(ill_t *ill, ipha_t *ipha, mblk_t *mp) ((uchar_t *)(ipha + 1))[1] != IPOPT_LSRR) { /* Packet arrived via a physical interface. */ if (ipst->ips_ip_mrtdebug > 1) { - (void) mi_strlog(ipst->ips_ip_g_mrouter, 1, SL_TRACE, + (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, "ip_mforward: ill %s arrived via PHYINT", ill->ill_name); } @@ -1725,7 +1740,7 @@ ip_mforward(ill_t *ill, ipha_t *ipha, mblk_t *mp) ipst->ips_mrtstat->mrts_mfc_misses++; /* BSD uses mrts_no_route++ */ if (ipst->ips_ip_mrtdebug > 1) { - (void) mi_strlog(ipst->ips_ip_g_mrouter, 1, SL_TRACE, + (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, "ip_mforward: no rte ill %s src %x g %x misses %d", ill->ill_name, ntohl(src), ntohl(dst), (int)ipst->ips_mrtstat->mrts_mfc_misses); @@ -1755,7 +1770,7 @@ ip_mforward(ill_t *ill, ipha_t *ipha, mblk_t *mp) mfc_rt = mfc_rt->mfc_next) { mutex_enter(&mfc_rt->mfc_mutex); if (ipst->ips_ip_mrtdebug > 1) { - (void) mi_strlog(ipst->ips_ip_g_mrouter, 1, + (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, "ip_mforward: MFCTAB hash %d o 0x%x" " g 0x%x\n", @@ -1830,7 +1845,7 @@ ip_mforward(ill_t *ill, ipha_t *ipha, mblk_t *mp) rte_m = rte_m->rte_next) npkts++; if (ipst->ips_ip_mrtdebug > 1) { - (void) mi_strlog(ipst->ips_ip_g_mrouter, 1, SL_TRACE, + (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, "ip_mforward: upcalls %d\n", npkts); } if (npkts > MAX_UPQ) { @@ -1857,7 +1872,7 @@ ip_mforward(ill_t *ill, ipha_t *ipha, mblk_t *mp) /* Link into table */ if (ipst->ips_ip_mrtdebug > 1) { - (void) mi_strlog(ipst->ips_ip_g_mrouter, 1, + (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, "ip_mforward: NEW MFCTAB hash %d o 0x%x " "g 0x%x\n", hash, @@ -1877,7 +1892,8 @@ ip_mforward(ill_t *ill, ipha_t *ipha, mblk_t *mp) /* not the first upcall */ prev_rte = mfc_rt->mfc_rte; for (rte1 = mfc_rt->mfc_rte->rte_next; rte1; - prev_rte = rte1, rte1 = rte1->rte_next); + prev_rte = rte1, rte1 = rte1->rte_next) + ; prev_rte->rte_next = rte; } @@ -1921,8 +1937,8 @@ ip_mforward(ill_t *ill, ipha_t *ipha, mblk_t *mp) mfc_rt, EXPIRE_TIMEOUT * UPCALL_EXPIRE); mutex_exit(&mfc_rt->mfc_mutex); mutex_exit(&(ipst->ips_mfcs[hash].mfcb_lock)); - putnext(RD(ipst->ips_ip_g_mrouter), mp_copy); - + /* Pass to RAWIP */ + (mrouter->conn_recv)(mrouter, mp_copy, NULL); } else { mutex_exit(&mfc_rt->mfc_mutex); mutex_exit(&(ipst->ips_mfcs[hash].mfcb_lock)); @@ -1960,16 +1976,18 @@ expire_upcalls(void *arg) uint_t hash; struct mfc *prev_mfc, *mfc0; ip_stack_t *ipst; + conn_t *mrouter; if (mfc_rt->mfc_rte == NULL || mfc_rt->mfc_rte->ill != NULL) { cmn_err(CE_WARN, "expire_upcalls: no ILL\n"); return; } ipst = mfc_rt->mfc_rte->ill->ill_ipst; + mrouter = ipst->ips_ip_g_mrouter; hash = MFCHASH(mfc_rt->mfc_origin.s_addr, mfc_rt->mfc_mcastgrp.s_addr); if (ipst->ips_ip_mrtdebug > 1) { - (void) mi_strlog(ipst->ips_ip_g_mrouter, 1, SL_TRACE, + (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, "expire_upcalls: hash %d s %x g %x", hash, ntohl(mfc_rt->mfc_origin.s_addr), ntohl(mfc_rt->mfc_mcastgrp.s_addr)); @@ -2023,9 +2041,10 @@ ip_mdq(mblk_t *mp, ipha_t *ipha, ill_t *ill, ipaddr_t tunnel_src, size_t plen = msgdsize(mp); vifi_t num_of_vifs; ip_stack_t *ipst = ill->ill_ipst; + conn_t *mrouter = ipst->ips_ip_g_mrouter; if (ipst->ips_ip_mrtdebug > 1) { - (void) mi_strlog(ipst->ips_ip_g_mrouter, 1, SL_TRACE, + (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, "ip_mdq: SEND src %x, ipha_dst %x, ill %s", ntohl(ipha->ipha_src), ntohl(ipha->ipha_dst), ill->ill_name); @@ -2055,7 +2074,7 @@ ip_mdq(mblk_t *mp, ipha_t *ipha, ill_t *ill, ipaddr_t tunnel_src, ip1dbg(("ip_mdq: no route for origin ill %s, vifi is NO_VIF\n", ill->ill_name)); if (ipst->ips_ip_mrtdebug > 1) { - (void) mi_strlog(ipst->ips_ip_g_mrouter, 1, SL_TRACE, + (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, "ip_mdq: vifi is NO_VIF ill = %s", ill->ill_name); } return (-1); /* drop pkt */ @@ -2095,7 +2114,7 @@ ip_mdq(mblk_t *mp, ipha_t *ipha, ill_t *ill, ipaddr_t tunnel_src, (int)vifi, (int)ipst->ips_numvifs, ill->ill_name, ipst->ips_vifs[vifi].v_ipif->ipif_ill->ill_name)); if (ipst->ips_ip_mrtdebug > 1) { - (void) mi_strlog(ipst->ips_ip_g_mrouter, 1, SL_TRACE, + (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, "ip_mdq: arrived wrong if, vifi %d ill " "%s viftable ill %s\n", (int)vifi, ill->ill_name, @@ -2132,7 +2151,8 @@ ip_mdq(mblk_t *mp, ipha_t *ipha, ill_t *ill, ipaddr_t tunnel_src, im->im_msgtype = IGMPMSG_WRONGVIF; im->im_mbz = 0; im->im_vif = (ushort_t)vifi; - putnext(RD(ipst->ips_ip_g_mrouter), mp_copy); + /* Pass to RAWIP */ + (mrouter->conn_recv)(mrouter, mp_copy, NULL); } unlock_good_vif(&ipst->ips_vifs[vifi]); if (tunnel_src != 0) @@ -2201,6 +2221,7 @@ phyint_send(ipha_t *ipha, mblk_t *mp, struct vif *vifp, ipaddr_t dst) { mblk_t *mp_copy; ip_stack_t *ipst = vifp->v_ipif->ipif_ill->ill_ipst; + conn_t *mrouter = ipst->ips_ip_g_mrouter; /* Make a new reference to the packet */ mp_copy = copymsg(mp); /* TODO could copy header and dup rest */ @@ -2213,7 +2234,7 @@ phyint_send(ipha_t *ipha, mblk_t *mp, struct vif *vifp, ipaddr_t dst) tbf_send_packet(vifp, mp_copy); else { if (ipst->ips_ip_mrtdebug > 1) { - (void) mi_strlog(ipst->ips_ip_g_mrouter, 1, SL_TRACE, + (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, "phyint_send: tbf_contr rate %d " "vifp 0x%p mp 0x%p dst 0x%x", vifp->v_rate_limit, (void *)vifp, (void *)mp, dst); @@ -2234,9 +2255,10 @@ register_send(ipha_t *ipha, mblk_t *mp, struct vif *vifp, ipaddr_t dst) mblk_t *mp_copy; ipha_t *ipha_copy; ip_stack_t *ipst = vifp->v_ipif->ipif_ill->ill_ipst; + conn_t *mrouter = ipst->ips_ip_g_mrouter; if (ipst->ips_ip_mrtdebug > 1) { - (void) mi_strlog(ipst->ips_ip_g_mrouter, 1, SL_TRACE, + (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, "register_send: src %x, dst %x\n", ntohl(ipha->ipha_src), ntohl(ipha->ipha_dst)); } @@ -2250,7 +2272,7 @@ register_send(ipha_t *ipha, mblk_t *mp, struct vif *vifp, ipaddr_t dst) if (mp_copy == NULL) { ++ipst->ips_mrtstat->mrts_pim_nomemory; if (ipst->ips_ip_mrtdebug > 3) { - (void) mi_strlog(ipst->ips_ip_g_mrouter, 1, SL_TRACE, + (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, "register_send: allocb failure."); } return; @@ -2267,7 +2289,7 @@ register_send(ipha_t *ipha, mblk_t *mp, struct vif *vifp, ipaddr_t dst) if ((mp_copy->b_cont = copymsg(mp)) == NULL) { ++ipst->ips_mrtstat->mrts_pim_nomemory; if (ipst->ips_ip_mrtdebug > 3) { - (void) mi_strlog(ipst->ips_ip_g_mrouter, 1, SL_TRACE, + (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, "register_send: copymsg failure."); } freeb(mp_copy); @@ -2275,7 +2297,7 @@ register_send(ipha_t *ipha, mblk_t *mp, struct vif *vifp, ipaddr_t dst) } /* - * icmp_rput() asserts that IP version field is set to an + * icmp_input() asserts that IP version field is set to an * appropriate version. Hence, the struct igmpmsg that this really * becomes, needs to have the correct IP version field. */ @@ -2300,15 +2322,16 @@ register_send(ipha_t *ipha, mblk_t *mp, struct vif *vifp, ipaddr_t dst) im->im_mbz = 0; ++ipst->ips_mrtstat->mrts_upcalls; - if (!canputnext(RD(ipst->ips_ip_g_mrouter))) { + if (!canputnext(mrouter->conn_rq)) { ++ipst->ips_mrtstat->mrts_pim_regsend_drops; if (ipst->ips_ip_mrtdebug > 3) { - (void) mi_strlog(ipst->ips_ip_g_mrouter, 1, SL_TRACE, + (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, "register_send: register upcall failure."); } freemsg(mp_copy); } else { - putnext(RD(ipst->ips_ip_g_mrouter), mp_copy); + /* Pass to RAWIP */ + (mrouter->conn_recv)(mrouter, mp_copy, NULL); } } @@ -2354,6 +2377,7 @@ pim_input(queue_t *q, mblk_t *mp, ill_t *ill) struct pim *pimp; /* pointer to a pim struct */ uint32_t *reghdr; ip_stack_t *ipst = ill->ill_ipst; + conn_t *mrouter = ipst->ips_ip_g_mrouter; /* * Pullup the msg for PIM protocol processing. @@ -2375,7 +2399,7 @@ pim_input(queue_t *q, mblk_t *mp, ill_t *ill) if (pimlen < PIM_MINLEN) { ++ipst->ips_mrtstat->mrts_pim_malformed; if (ipst->ips_ip_mrtdebug > 1) { - (void) mi_strlog(ipst->ips_ip_g_mrouter, 1, SL_TRACE, + (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, "pim_input: length not at least minlen"); } freemsg(mp); @@ -2393,7 +2417,7 @@ pim_input(queue_t *q, mblk_t *mp, ill_t *ill) if (pimp->pim_vers != PIM_VERSION) { ++ipst->ips_mrtstat->mrts_pim_badversion; if (ipst->ips_ip_mrtdebug > 1) { - (void) mi_strlog(ipst->ips_ip_g_mrouter, 1, SL_TRACE, + (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, "pim_input: unknown version of PIM"); } freemsg(mp); @@ -2406,7 +2430,7 @@ pim_input(queue_t *q, mblk_t *mp, ill_t *ill) if (!pim_validate_cksum(mp, ip, pimp)) { ++ipst->ips_mrtstat->mrts_pim_rcv_badcsum; if (ipst->ips_ip_mrtdebug > 1) { - (void) mi_strlog(ipst->ips_ip_g_mrouter, 1, SL_TRACE, + (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, "pim_input: invalid checksum"); } freemsg(mp); @@ -2425,14 +2449,14 @@ pim_input(queue_t *q, mblk_t *mp, ill_t *ill) if (!CLASSD(eip->ipha_dst)) { ++ipst->ips_mrtstat->mrts_pim_badregisters; if (ipst->ips_ip_mrtdebug > 1) { - (void) mi_strlog(ipst->ips_ip_g_mrouter, 1, SL_TRACE, + (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, "pim_input: Inner pkt not mcast .. !"); } freemsg(mp); return (-1); } if (ipst->ips_ip_mrtdebug > 1) { - (void) mi_strlog(ipst->ips_ip_g_mrouter, 1, SL_TRACE, + (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, "register from %x, to %x, len %d", ntohl(eip->ipha_src), ntohl(eip->ipha_dst), @@ -2482,6 +2506,7 @@ static int register_mforward(queue_t *q, mblk_t *mp, ill_t *ill) { ip_stack_t *ipst = ill->ill_ipst; + conn_t *mrouter = ipst->ips_ip_g_mrouter; ASSERT(ipst->ips_reg_vif_num <= ipst->ips_numvifs); @@ -2489,7 +2514,7 @@ register_mforward(queue_t *q, mblk_t *mp, ill_t *ill) ipha_t *ipha; ipha = (ipha_t *)mp->b_rptr; - (void) mi_strlog(ipst->ips_ip_g_mrouter, 1, SL_TRACE, + (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, "register_mforward: src %x, dst %x\n", ntohl(ipha->ipha_src), ntohl(ipha->ipha_dst)); } @@ -2523,9 +2548,10 @@ encap_send(ipha_t *ipha, mblk_t *mp, struct vif *vifp, ipaddr_t dst) ipha_t *ipha_copy; size_t len; ip_stack_t *ipst = vifp->v_ipif->ipif_ill->ill_ipst; + conn_t *mrouter = ipst->ips_ip_g_mrouter; if (ipst->ips_ip_mrtdebug > 1) { - (void) mi_strlog(ipst->ips_ip_g_mrouter, 1, SL_TRACE, + (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, "encap_send: vif %ld enter", (ptrdiff_t)(vifp - ipst->ips_vifs)); } @@ -2565,7 +2591,7 @@ encap_send(ipha_t *ipha, mblk_t *mp, struct vif *vifp, ipaddr_t dst) ipha->ipha_hdr_checksum = ip_csum_hdr(ipha); if (ipst->ips_ip_mrtdebug > 1) { - (void) mi_strlog(ipst->ips_ip_g_mrouter, 1, SL_TRACE, + (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, "encap_send: group 0x%x", ntohl(ipha->ipha_dst)); } if (vifp->v_rate_limit <= 0) @@ -2589,6 +2615,7 @@ ip_mroute_decap(queue_t *q, mblk_t *mp, ill_t *ill) ipaddr_t src; struct vif *vifp; ip_stack_t *ipst = ill->ill_ipst; + conn_t *mrouter = ipst->ips_ip_g_mrouter; /* * Dump the packet if it's not to a multicast destination or if @@ -2620,7 +2647,7 @@ ip_mroute_decap(queue_t *q, mblk_t *mp, ill_t *ill) if (vifp->v_flags & VIFF_TUNNEL) ipst->ips_last_encap_vif = vifp; if (ipst->ips_ip_mrtdebug > 1) { - (void) mi_strlog(ipst->ips_ip_g_mrouter, + (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, "ip_mroute_decap: good tun " "vif %ld with %x", @@ -2688,12 +2715,13 @@ reset_mrt_ill(ill_t *ill) struct rtdetq *rte; int i; ip_stack_t *ipst = ill->ill_ipst; + conn_t *mrouter = ipst->ips_ip_g_mrouter; for (i = 0; i < MFCTBLSIZ; i++) { MFCB_REFHOLD(&ipst->ips_mfcs[i]); if ((rt = ipst->ips_mfcs[i].mfcb_mfc) != NULL) { if (ipst->ips_ip_mrtdebug > 1) { - (void) mi_strlog(ipst->ips_ip_g_mrouter, 1, + (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, "reset_mrt_ill: mfctable [%d]", i); } @@ -2703,7 +2731,7 @@ reset_mrt_ill(ill_t *ill) if (rte->ill == ill) { if (ipst->ips_ip_mrtdebug > 1) { (void) mi_strlog( - ipst->ips_ip_g_mrouter, + mrouter->conn_rq, 1, SL_TRACE, "reset_mrt_ill: " "ill 0x%p", ill); @@ -2732,6 +2760,7 @@ tbf_control(struct vif *vifp, mblk_t *mp, ipha_t *ipha) struct tbf *t = vifp->v_tbf; timeout_id_t id = 0; ip_stack_t *ipst = vifp->v_ipif->ipif_ill->ill_ipst; + conn_t *mrouter = ipst->ips_ip_g_mrouter; /* Drop if packet is too large */ if (p_len > MAX_BKT_SIZE) { @@ -2740,7 +2769,7 @@ tbf_control(struct vif *vifp, mblk_t *mp, ipha_t *ipha) return; } if (ipst->ips_ip_mrtdebug > 1) { - (void) mi_strlog(ipst->ips_ip_g_mrouter, 1, SL_TRACE, + (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, "tbf_ctrl: SEND vif %ld, qlen %d, ipha_dst 0x%x", (ptrdiff_t)(vifp - ipst->ips_vifs), t->tbf_q_len, ntohl(ipha->ipha_dst)); @@ -2755,7 +2784,7 @@ tbf_control(struct vif *vifp, mblk_t *mp, ipha_t *ipha) * and the queue is empty, send this packet out. */ if (ipst->ips_ip_mrtdebug > 1) { - (void) mi_strlog(ipst->ips_ip_g_mrouter, 1, SL_TRACE, + (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, "tbf_control: vif %ld, TOKENS %d, pkt len %lu, qlen %d", (ptrdiff_t)(vifp - ipst->ips_vifs), t->tbf_n_tok, p_len, t->tbf_q_len); @@ -2782,7 +2811,7 @@ tbf_control(struct vif *vifp, mblk_t *mp, ipha_t *ipha) } else { /* Check that we have UDP header with IP header */ size_t hdr_length = IPH_HDR_LENGTH(ipha) + - sizeof (struct udphdr); + sizeof (struct udphdr); if ((mp->b_wptr - mp->b_rptr) < hdr_length) { if (!pullupmsg(mp, hdr_length)) { @@ -2828,9 +2857,10 @@ tbf_queue(struct vif *vifp, mblk_t *mp) { struct tbf *t = vifp->v_tbf; ip_stack_t *ipst = vifp->v_ipif->ipif_ill->ill_ipst; + conn_t *mrouter = ipst->ips_ip_g_mrouter; if (ipst->ips_ip_mrtdebug > 1) { - (void) mi_strlog(ipst->ips_ip_g_mrouter, 1, SL_TRACE, + (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, "tbf_queue: vif %ld", (ptrdiff_t)(vifp - ipst->ips_vifs)); } ASSERT(MUTEX_HELD(&t->tbf_lock)); @@ -2863,9 +2893,10 @@ tbf_process_q(struct vif *vifp) struct tbf *t = vifp->v_tbf; size_t len; ip_stack_t *ipst = vifp->v_ipif->ipif_ill->ill_ipst; + conn_t *mrouter = ipst->ips_ip_g_mrouter; if (ipst->ips_ip_mrtdebug > 1) { - (void) mi_strlog(ipst->ips_ip_g_mrouter, 1, SL_TRACE, + (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, "tbf_process_q 1: vif %ld qlen = %d", (ptrdiff_t)(vifp - ipst->ips_vifs), t->tbf_q_len); } @@ -2908,6 +2939,7 @@ tbf_reprocess_q(void *arg) { struct vif *vifp = arg; ip_stack_t *ipst = vifp->v_ipif->ipif_ill->ill_ipst; + conn_t *mrouter = ipst->ips_ip_g_mrouter; mutex_enter(&vifp->v_tbf->tbf_lock); vifp->v_timeout_id = 0; @@ -2922,7 +2954,7 @@ tbf_reprocess_q(void *arg) mutex_exit(&vifp->v_tbf->tbf_lock); if (ipst->ips_ip_mrtdebug > 1) { - (void) mi_strlog(ipst->ips_ip_g_mrouter, 1, SL_TRACE, + (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, "tbf_reprcess_q: vif %ld timeout id = %p", (ptrdiff_t)(vifp - ipst->ips_vifs), vifp->v_timeout_id); } @@ -2942,9 +2974,10 @@ tbf_dq_sel(struct vif *vifp, ipha_t *ipha) mblk_t **np; mblk_t *last, *mp; ip_stack_t *ipst = vifp->v_ipif->ipif_ill->ill_ipst; + conn_t *mrouter = ipst->ips_ip_g_mrouter; if (ipst->ips_ip_mrtdebug > 1) { - (void) mi_strlog(ipst->ips_ip_g_mrouter, 1, SL_TRACE, + (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, "dq_sel: vif %ld dst 0x%x", (ptrdiff_t)(vifp - ipst->ips_vifs), ntohl(ipha->ipha_dst)); } @@ -2984,11 +3017,12 @@ tbf_send_packet(struct vif *vifp, mblk_t *mp) { ipif_t *ipif; ip_stack_t *ipst = vifp->v_ipif->ipif_ill->ill_ipst; + conn_t *mrouter = ipst->ips_ip_g_mrouter; /* If encap tunnel options */ if (vifp->v_flags & VIFF_TUNNEL) { if (ipst->ips_ip_mrtdebug > 1) { - (void) mi_strlog(ipst->ips_ip_g_mrouter, 1, SL_TRACE, + (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, "tbf_send_pkt: ENCAP tunnel vif %ld", (ptrdiff_t)(vifp - ipst->ips_vifs)); } @@ -3023,7 +3057,7 @@ tbf_send_packet(struct vif *vifp, mblk_t *mp) mutex_exit(&ipif->ipif_ill->ill_lock); if (ipst->ips_ip_mrtdebug > 1) { - (void) mi_strlog(ipst->ips_ip_g_mrouter, 1, + (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, "tbf_send_pkt: loopback vif %ld", (ptrdiff_t)(vifp - ipst->ips_vifs)); @@ -3038,7 +3072,7 @@ tbf_send_packet(struct vif *vifp, mblk_t *mp) ire, (ill_t *)ipif->ipif_rq->q_ptr); } else { /* Either copymsg failed or no ire */ - (void) mi_strlog(ipst->ips_ip_g_mrouter, 1, + (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, "tbf_send_pkt: mp_loop 0x%p, ire 0x%p " "vif %ld\n", mp_loop, ire, @@ -3050,7 +3084,7 @@ tbf_send_packet(struct vif *vifp, mblk_t *mp) mutex_exit(&ipif->ipif_ill->ill_lock); } if (ipst->ips_ip_mrtdebug > 1) { - (void) mi_strlog(ipst->ips_ip_g_mrouter, 1, SL_TRACE, + (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, "tbf_send_pkt: phyint forward vif %ld dst = 0x%x", (ptrdiff_t)(vifp - ipst->ips_vifs), ntohl(dst)); } @@ -3069,6 +3103,7 @@ tbf_update_tokens(struct vif *vifp) hrtime_t tm; struct tbf *t = vifp->v_tbf; ip_stack_t *ipst = vifp->v_ipif->ipif_ill->ill_ipst; + conn_t *mrouter = ipst->ips_ip_g_mrouter; ASSERT(MUTEX_HELD(&t->tbf_lock)); @@ -3092,7 +3127,7 @@ tbf_update_tokens(struct vif *vifp) if (t->tbf_n_tok > MAX_BKT_SIZE) t->tbf_n_tok = MAX_BKT_SIZE; if (ipst->ips_ip_mrtdebug > 1) { - (void) mi_strlog(ipst->ips_ip_g_mrouter, 1, SL_TRACE, + (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, "tbf_update_tok: tm %lld tok %d vif %ld", tm, t->tbf_n_tok, (ptrdiff_t)(vifp - ipst->ips_vifs)); } @@ -3110,6 +3145,7 @@ priority(struct vif *vifp, ipha_t *ipha) { int prio; ip_stack_t *ipst = vifp->v_ipif->ipif_ill->ill_ipst; + conn_t *mrouter = ipst->ips_ip_g_mrouter; /* Temporary hack; may add general packet classifier some day */ @@ -3141,7 +3177,7 @@ priority(struct vif *vifp, ipha_t *ipha) break; } if (ipst->ips_ip_mrtdebug > 1) { - (void) mi_strlog(ipst->ips_ip_g_mrouter, 1, SL_TRACE, + (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, "priority: port %x prio %d\n", ntohs(udp->uh_dport), prio); } diff --git a/usr/src/uts/common/inet/ip/ip_opt_data.c b/usr/src/uts/common/inet/ip/ip_opt_data.c index 08ff30ffae..5bdea67f2a 100644 --- a/usr/src/uts/common/inet/ip/ip_opt_data.c +++ b/usr/src/uts/common/inet/ip/ip_opt_data.c @@ -58,231 +58,231 @@ extern int ip_opt_set(queue_t *q, uint_t optset_context, int level, */ opdes_t ip_opt_arr[] = { -{ SO_DONTROUTE, SOL_SOCKET, OA_RW, OA_RW, OP_NP, OP_PASSNEXT, sizeof (int), 0 }, -{ SO_BROADCAST, SOL_SOCKET, OA_RW, OA_RW, OP_NP, OP_PASSNEXT, sizeof (int), 0 }, -{ SO_REUSEADDR, SOL_SOCKET, OA_RW, OA_RW, OP_NP, OP_PASSNEXT, sizeof (int), 0 }, -{ SO_PROTOTYPE, SOL_SOCKET, OA_RW, OA_RW, OP_NP, OP_PASSNEXT, sizeof (int), 0 }, -{ SO_ANON_MLP, SOL_SOCKET, OA_RW, OA_RW, OP_NP, OP_PASSNEXT, sizeof (int), 0 }, -{ SO_MAC_EXEMPT, SOL_SOCKET, OA_RW, OA_RW, OP_NP, OP_PASSNEXT, sizeof (int), 0 +{ SO_DONTROUTE, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 }, +{ SO_BROADCAST, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 }, +{ SO_REUSEADDR, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 }, +{ SO_PROTOTYPE, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 }, +{ SO_ANON_MLP, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 }, +{ SO_MAC_EXEMPT, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 }, -{ SO_ALLZONES, SOL_SOCKET, OA_R, OA_RW, OP_CONFIG, OP_PASSNEXT, sizeof (int), +{ SO_ALLZONES, SOL_SOCKET, OA_R, OA_RW, OP_CONFIG, 0, sizeof (int), 0 }, { IP_OPTIONS, IPPROTO_IP, OA_RW, OA_RW, OP_NP, - (OP_PASSNEXT|OP_VARLEN|OP_NODEFAULT), 40, -1 /* not initialized */ }, + (OP_VARLEN|OP_NODEFAULT), 40, -1 /* not initialized */ }, { T_IP_OPTIONS, IPPROTO_IP, OA_RW, OA_RW, OP_NP, - (OP_PASSNEXT|OP_VARLEN|OP_NODEFAULT), 40, -1 /* not initialized */ }, + (OP_VARLEN|OP_NODEFAULT), 40, -1 /* not initialized */ }, -{ IP_TOS, IPPROTO_IP, OA_RW, OA_RW, OP_NP, OP_PASSNEXT, sizeof (int), 0 }, -{ T_IP_TOS, IPPROTO_IP, OA_RW, OA_RW, OP_NP, OP_PASSNEXT, sizeof (int), 0 }, -{ IP_TTL, IPPROTO_IP, OA_RW, OA_RW, OP_NP, OP_PASSNEXT, sizeof (int), 0 }, -{ IP_MULTICAST_IF, IPPROTO_IP, OA_RW, OA_RW, OP_NP, OP_PASSNEXT, +{ IP_TOS, IPPROTO_IP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 }, +{ T_IP_TOS, IPPROTO_IP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 }, +{ IP_TTL, IPPROTO_IP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 }, +{ IP_MULTICAST_IF, IPPROTO_IP, OA_RW, OA_RW, OP_NP, 0, sizeof (struct in_addr), 0 /* INADDR_ANY */ }, -{ IP_MULTICAST_LOOP, IPPROTO_IP, OA_RW, OA_RW, OP_NP, (OP_PASSNEXT|OP_DEF_FN), +{ IP_MULTICAST_LOOP, IPPROTO_IP, OA_RW, OA_RW, OP_NP, (OP_DEF_FN), sizeof (uchar_t), -1 /* not initialized */}, -{ IP_MULTICAST_TTL, IPPROTO_IP, OA_RW, OA_RW, OP_NP, (OP_PASSNEXT|OP_DEF_FN), +{ IP_MULTICAST_TTL, IPPROTO_IP, OA_RW, OA_RW, OP_NP, (OP_DEF_FN), sizeof (uchar_t), -1 /* not initialized */ }, -{ IP_ADD_MEMBERSHIP, IPPROTO_IP, OA_X, OA_X, OP_NP, (OP_PASSNEXT|OP_NODEFAULT), +{ IP_ADD_MEMBERSHIP, IPPROTO_IP, OA_X, OA_X, OP_NP, (OP_NODEFAULT), sizeof (struct ip_mreq), -1 /* not initialized */ }, -{ IP_DROP_MEMBERSHIP, IPPROTO_IP, OA_X, OA_X, OP_NP, (OP_PASSNEXT|OP_NODEFAULT), +{ IP_DROP_MEMBERSHIP, IPPROTO_IP, OA_X, OA_X, OP_NP, (OP_NODEFAULT), sizeof (struct ip_mreq), -1 /* not initialized */ }, -{ IP_BLOCK_SOURCE, IPPROTO_IP, OA_X, OA_X, OP_NP, (OP_PASSNEXT|OP_NODEFAULT), +{ IP_BLOCK_SOURCE, IPPROTO_IP, OA_X, OA_X, OP_NP, (OP_NODEFAULT), sizeof (struct ip_mreq_source), -1 /* not initialized */ }, -{ IP_UNBLOCK_SOURCE, IPPROTO_IP, OA_X, OA_X, OP_NP, (OP_PASSNEXT|OP_NODEFAULT), +{ IP_UNBLOCK_SOURCE, IPPROTO_IP, OA_X, OA_X, OP_NP, (OP_NODEFAULT), sizeof (struct ip_mreq_source), -1 /* not initialized */ }, { IP_ADD_SOURCE_MEMBERSHIP, IPPROTO_IP, OA_X, OA_X, OP_NP, - (OP_PASSNEXT|OP_NODEFAULT), sizeof (struct ip_mreq_source), -1 }, + (OP_NODEFAULT), sizeof (struct ip_mreq_source), -1 }, { IP_DROP_SOURCE_MEMBERSHIP, IPPROTO_IP, OA_X, OA_X, OP_NP, - (OP_PASSNEXT|OP_NODEFAULT), sizeof (struct ip_mreq_source), -1 }, + (OP_NODEFAULT), sizeof (struct ip_mreq_source), -1 }, -{ IP_RECVOPTS, IPPROTO_IP, OA_RW, OA_RW, OP_NP, OP_PASSNEXT, sizeof (int), 0 }, +{ IP_RECVOPTS, IPPROTO_IP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 }, -{ IP_RECVDSTADDR, IPPROTO_IP, OA_RW, OA_RW, OP_NP, OP_PASSNEXT, sizeof (int), 0 +{ IP_RECVDSTADDR, IPPROTO_IP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 }, -{ IP_RECVIF, IPPROTO_IP, OA_RW, OA_RW, OP_NP, OP_PASSNEXT, sizeof (int), 0 }, +{ IP_RECVIF, IPPROTO_IP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 }, { IP_PKTINFO, IPPROTO_IP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 }, -{ IP_RECVSLLA, IPPROTO_IP, OA_RW, OA_RW, OP_NP, OP_PASSNEXT, sizeof (int), 0 }, +{ IP_RECVSLLA, IPPROTO_IP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 }, -{ IP_BOUND_IF, IPPROTO_IP, OA_RW, OA_RW, OP_NP, OP_PASSNEXT, +{ IP_BOUND_IF, IPPROTO_IP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 /* no ifindex */ }, -{ IP_XMIT_IF, IPPROTO_IP, OA_RW, OA_RW, OP_NP, OP_PASSNEXT, +{ IP_XMIT_IF, IPPROTO_IP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 /* no ifindex */ }, -{ IP_DONTFAILOVER_IF, IPPROTO_IP, OA_RW, OA_RW, OP_NP, OP_PASSNEXT, +{ IP_DONTFAILOVER_IF, IPPROTO_IP, OA_RW, OA_RW, OP_NP, 0, sizeof (struct in_addr), 0 /* not initialized */ }, -{ IP_UNSPEC_SRC, IPPROTO_IP, OA_R, OA_RW, OP_RAW, OP_PASSNEXT, +{ IP_UNSPEC_SRC, IPPROTO_IP, OA_R, OA_RW, OP_RAW, 0, sizeof (int), 0 }, -{ IP_SEC_OPT, IPPROTO_IP, OA_RW, OA_RW, OP_NP, (OP_PASSNEXT|OP_NODEFAULT), +{ IP_SEC_OPT, IPPROTO_IP, OA_RW, OA_RW, OP_NP, (OP_NODEFAULT), sizeof (ipsec_req_t), -1 /* not initialized */ }, -{ IP_NEXTHOP, IPPROTO_IP, OA_R, OA_RW, OP_CONFIG, OP_PASSNEXT, +{ IP_NEXTHOP, IPPROTO_IP, OA_R, OA_RW, OP_CONFIG, 0, sizeof (in_addr_t), -1 /* not initialized */ }, { MRT_INIT, IPPROTO_IP, 0, OA_X, OP_CONFIG, - (OP_PASSNEXT|OP_NODEFAULT), sizeof (int), -1 /* not initialized */ }, + (OP_NODEFAULT), sizeof (int), -1 /* not initialized */ }, { MRT_DONE, IPPROTO_IP, 0, OA_X, OP_CONFIG, - (OP_PASSNEXT|OP_NODEFAULT), 0, -1 /* not initialized */ }, + (OP_NODEFAULT), 0, -1 /* not initialized */ }, -{ MRT_ADD_VIF, IPPROTO_IP, 0, OA_X, OP_CONFIG, (OP_PASSNEXT|OP_NODEFAULT), +{ MRT_ADD_VIF, IPPROTO_IP, 0, OA_X, OP_CONFIG, (OP_NODEFAULT), sizeof (struct vifctl), -1 /* not initialized */ }, -{ MRT_DEL_VIF, IPPROTO_IP, 0, OA_X, OP_CONFIG, (OP_PASSNEXT|OP_NODEFAULT), +{ MRT_DEL_VIF, IPPROTO_IP, 0, OA_X, OP_CONFIG, (OP_NODEFAULT), sizeof (vifi_t), -1 /* not initialized */ }, -{ MRT_ADD_MFC, IPPROTO_IP, 0, OA_X, OP_CONFIG, (OP_PASSNEXT|OP_NODEFAULT), +{ MRT_ADD_MFC, IPPROTO_IP, 0, OA_X, OP_CONFIG, (OP_NODEFAULT), sizeof (struct mfcctl), -1 /* not initialized */ }, -{ MRT_DEL_MFC, IPPROTO_IP, 0, OA_X, OP_CONFIG, (OP_PASSNEXT|OP_NODEFAULT), +{ MRT_DEL_MFC, IPPROTO_IP, 0, OA_X, OP_CONFIG, (OP_NODEFAULT), sizeof (struct mfcctl), -1 /* not initialized */ }, -{ MRT_VERSION, IPPROTO_IP, OA_R, OA_R, OP_NP, (OP_PASSNEXT|OP_NODEFAULT), +{ MRT_VERSION, IPPROTO_IP, OA_R, OA_R, OP_NP, (OP_NODEFAULT), sizeof (int), -1 /* not initialized */ }, -{ MRT_ASSERT, IPPROTO_IP, 0, OA_RW, OP_CONFIG, (OP_PASSNEXT|OP_NODEFAULT), +{ MRT_ASSERT, IPPROTO_IP, 0, OA_RW, OP_CONFIG, (OP_NODEFAULT), sizeof (int), -1 /* not initialized */ }, { MCAST_JOIN_GROUP, IPPROTO_IP, OA_X, OA_X, OP_NP, - (OP_PASSNEXT|OP_NODEFAULT), sizeof (struct group_req), + (OP_NODEFAULT), sizeof (struct group_req), -1 /* not initialized */ }, { MCAST_LEAVE_GROUP, IPPROTO_IP, OA_X, OA_X, OP_NP, - (OP_PASSNEXT|OP_NODEFAULT), sizeof (struct group_req), + (OP_NODEFAULT), sizeof (struct group_req), -1 /* not initialized */ }, { MCAST_BLOCK_SOURCE, IPPROTO_IP, OA_X, OA_X, OP_NP, - (OP_PASSNEXT|OP_NODEFAULT), sizeof (struct group_source_req), + (OP_NODEFAULT), sizeof (struct group_source_req), -1 /* not initialized */ }, { MCAST_UNBLOCK_SOURCE, IPPROTO_IP, OA_X, OA_X, OP_NP, - (OP_PASSNEXT|OP_NODEFAULT), sizeof (struct group_source_req), + (OP_NODEFAULT), sizeof (struct group_source_req), -1 /* not initialized */ }, { MCAST_JOIN_SOURCE_GROUP, IPPROTO_IP, OA_X, OA_X, OP_NP, - (OP_PASSNEXT|OP_NODEFAULT), sizeof (struct group_source_req), + (OP_NODEFAULT), sizeof (struct group_source_req), -1 /* not initialized */ }, { MCAST_LEAVE_SOURCE_GROUP, IPPROTO_IP, OA_X, OA_X, OP_NP, - (OP_PASSNEXT|OP_NODEFAULT), sizeof (struct group_source_req), + (OP_NODEFAULT), sizeof (struct group_source_req), -1 /* not initialized */ }, -{ IPV6_MULTICAST_IF, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, OP_PASSNEXT, +{ IPV6_MULTICAST_IF, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 }, { IPV6_MULTICAST_HOPS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, - (OP_PASSNEXT|OP_DEF_FN), sizeof (int), -1 /* not initialized */ }, + (OP_DEF_FN), sizeof (int), -1 /* not initialized */ }, { IPV6_MULTICAST_LOOP, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, - (OP_PASSNEXT|OP_DEF_FN), sizeof (int), -1 /* not initialized */}, + (OP_DEF_FN), sizeof (int), -1 /* not initialized */}, -{ IPV6_JOIN_GROUP, IPPROTO_IPV6, OA_X, OA_X, OP_NP, (OP_PASSNEXT|OP_NODEFAULT), +{ IPV6_JOIN_GROUP, IPPROTO_IPV6, OA_X, OA_X, OP_NP, (OP_NODEFAULT), sizeof (struct ipv6_mreq), -1 /* not initialized */ }, { IPV6_LEAVE_GROUP, IPPROTO_IPV6, OA_X, OA_X, OP_NP, - (OP_PASSNEXT|OP_NODEFAULT), + (OP_NODEFAULT), sizeof (struct ipv6_mreq), -1 /* not initialized */ }, { IPV6_UNICAST_HOPS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, - (OP_PASSNEXT|OP_DEF_FN), sizeof (int), -1 /* not initialized */ }, + (OP_DEF_FN), sizeof (int), -1 /* not initialized */ }, -{ IPV6_BOUND_IF, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, OP_PASSNEXT, +{ IPV6_BOUND_IF, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 /* no ifindex */ }, -{ IPV6_BOUND_PIF, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, OP_PASSNEXT, +{ IPV6_BOUND_PIF, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 /* no ifindex */ }, -{ IPV6_DONTFAILOVER_IF, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, OP_PASSNEXT, +{ IPV6_DONTFAILOVER_IF, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 /* no ifindex */ }, -{ IPV6_UNSPEC_SRC, IPPROTO_IPV6, OA_R, OA_RW, OP_RAW, OP_PASSNEXT, +{ IPV6_UNSPEC_SRC, IPPROTO_IPV6, OA_R, OA_RW, OP_RAW, 0, sizeof (int), 0 }, { IPV6_PKTINFO, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, - (OP_PASSNEXT|OP_NODEFAULT|OP_VARLEN), + (OP_NODEFAULT|OP_VARLEN), sizeof (struct in6_pktinfo), -1 /* not initialized */ }, { IPV6_HOPLIMIT, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, - (OP_PASSNEXT|OP_NODEFAULT|OP_VARLEN), + (OP_NODEFAULT|OP_VARLEN), sizeof (int), -1 /* not initialized */ }, { IPV6_NEXTHOP, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, - (OP_PASSNEXT|OP_NODEFAULT|OP_VARLEN), + (OP_NODEFAULT|OP_VARLEN), sizeof (sin6_t), -1 /* not initialized */ }, { IPV6_HOPOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, - (OP_PASSNEXT|OP_VARLEN|OP_NODEFAULT), 255*8, + (OP_VARLEN|OP_NODEFAULT), 255*8, -1 /* not initialized */ }, { IPV6_DSTOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, - (OP_PASSNEXT|OP_VARLEN|OP_NODEFAULT), 255*8, + (OP_VARLEN|OP_NODEFAULT), 255*8, -1 /* not initialized */ }, { IPV6_RTHDRDSTOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, - (OP_PASSNEXT|OP_VARLEN|OP_NODEFAULT), 255*8, + (OP_VARLEN|OP_NODEFAULT), 255*8, -1 /* not initialized */ }, { IPV6_RTHDR, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, - (OP_PASSNEXT|OP_VARLEN|OP_NODEFAULT), 255*8, + (OP_VARLEN|OP_NODEFAULT), 255*8, -1 /* not initialized */ }, { IPV6_TCLASS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, - (OP_PASSNEXT|OP_NODEFAULT|OP_VARLEN), + (OP_NODEFAULT|OP_VARLEN), sizeof (int), -1 /* not initialized */ }, -{ IPV6_PATHMTU, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, OP_PASSNEXT, +{ IPV6_PATHMTU, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0, sizeof (struct ip6_mtuinfo), -1 }, -{ IPV6_DONTFRAG, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, OP_PASSNEXT, +{ IPV6_DONTFRAG, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 }, -{ IPV6_USE_MIN_MTU, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, OP_PASSNEXT, +{ IPV6_USE_MIN_MTU, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0, sizeof (int), -1 }, -{ IPV6_V6ONLY, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, OP_PASSNEXT, +{ IPV6_V6ONLY, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 }, /* Enable receipt of ancillary data */ -{ IPV6_RECVPKTINFO, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, OP_PASSNEXT, +{ IPV6_RECVPKTINFO, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 }, -{ IPV6_RECVHOPLIMIT, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, OP_PASSNEXT, +{ IPV6_RECVHOPLIMIT, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 }, -{ IPV6_RECVHOPOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, OP_PASSNEXT, +{ IPV6_RECVHOPOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 }, -{ _OLD_IPV6_RECVDSTOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, OP_PASSNEXT, +{ _OLD_IPV6_RECVDSTOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 }, -{ IPV6_RECVDSTOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, OP_PASSNEXT, +{ IPV6_RECVDSTOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 }, -{ IPV6_RECVRTHDR, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, OP_PASSNEXT, +{ IPV6_RECVRTHDR, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 }, -{ IPV6_RECVRTHDRDSTOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, OP_PASSNEXT, +{ IPV6_RECVRTHDRDSTOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 }, -{ IPV6_RECVTCLASS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, OP_PASSNEXT, +{ IPV6_RECVTCLASS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 }, -{ IPV6_RECVPATHMTU, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, OP_PASSNEXT, +{ IPV6_RECVPATHMTU, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 }, -{ IPV6_SEC_OPT, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, (OP_PASSNEXT|OP_NODEFAULT), +{ IPV6_SEC_OPT, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, (OP_NODEFAULT), sizeof (ipsec_req_t), -1 /* not initialized */ }, -{ IPV6_SRC_PREFERENCES, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, OP_PASSNEXT, +{ IPV6_SRC_PREFERENCES, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0, sizeof (uint32_t), IPV6_PREFER_SRC_DEFAULT }, { MCAST_JOIN_GROUP, IPPROTO_IPV6, OA_X, OA_X, OP_NP, - (OP_PASSNEXT|OP_NODEFAULT), sizeof (struct group_req), + (OP_NODEFAULT), sizeof (struct group_req), -1 /* not initialized */ }, { MCAST_LEAVE_GROUP, IPPROTO_IPV6, OA_X, OA_X, OP_NP, - (OP_PASSNEXT|OP_NODEFAULT), sizeof (struct group_req), + (OP_NODEFAULT), sizeof (struct group_req), -1 /* not initialized */ }, { MCAST_BLOCK_SOURCE, IPPROTO_IPV6, OA_X, OA_X, OP_NP, - (OP_PASSNEXT|OP_NODEFAULT), sizeof (struct group_source_req), + (OP_NODEFAULT), sizeof (struct group_source_req), -1 /* not initialized */ }, { MCAST_UNBLOCK_SOURCE, IPPROTO_IPV6, OA_X, OA_X, OP_NP, - (OP_PASSNEXT|OP_NODEFAULT), sizeof (struct group_source_req), + (OP_NODEFAULT), sizeof (struct group_source_req), -1 /* not initialized */ }, { MCAST_JOIN_SOURCE_GROUP, IPPROTO_IPV6, OA_X, OA_X, OP_NP, - (OP_PASSNEXT|OP_NODEFAULT), sizeof (struct group_source_req), + (OP_NODEFAULT), sizeof (struct group_source_req), -1 /* not initialized */ }, { MCAST_LEAVE_SOURCE_GROUP, IPPROTO_IPV6, OA_X, OA_X, OP_NP, - (OP_PASSNEXT|OP_NODEFAULT), sizeof (struct group_source_req), + (OP_NODEFAULT), sizeof (struct group_source_req), -1 /* not initialized */ }, }; diff --git a/usr/src/uts/common/inet/ip/ip_rts.c b/usr/src/uts/common/inet/ip/ip_rts.c index ff2eb93536..6816016084 100644 --- a/usr/src/uts/common/inet/ip/ip_rts.c +++ b/usr/src/uts/common/inet/ip/ip_rts.c @@ -146,12 +146,13 @@ rts_queue_input(mblk_t *mp, queue_t *q, sa_family_t af, ip_stack_t *ipst) } CONN_INC_REF(connp); mutex_exit(&ipst->ips_rts_clients->connf_lock); + /* Pass to rts_input */ if (!checkqfull || canputnext(CONNP_TO_RQ(connp))) { mp1 = dupmsg(mp); if (mp1 == NULL) mp1 = copymsg(mp); if (mp1 != NULL) - putnext(CONNP_TO_RQ(connp), mp1); + (connp->conn_recv)(connp, mp1, NULL); } mutex_enter(&ipst->ips_rts_clients->connf_lock); @@ -230,6 +231,30 @@ ip_rts_request_retry(ipsq_t *dummy_sq, queue_t *q, mblk_t *mp, void *dummy) } /* + * This is a call from the RTS module + * indicating that this is a Routing Socket + * Stream. Insert this conn_t in routing + * socket client list. + */ +void +ip_rts_register(conn_t *connp) +{ + ip_stack_t *ipst = connp->conn_netstack->netstack_ip; + + connp->conn_loopback = 1; + ipcl_hash_insert_wildcard(ipst->ips_rts_clients, connp); +} + +/* + * This is a call from the RTS module indicating that it is closing. + */ +void +ip_rts_unregister(conn_t *connp) +{ + ipcl_hash_remove(connp); +} + +/* * Processes requests received on a routing socket. It extracts all the * arguments and calls the appropriate function to process the request. * @@ -301,26 +326,14 @@ ip_rts_request(queue_t *q, mblk_t *mp, cred_t *ioc_cr) * this is an indication from routing module * that it is a routing socket stream queue. */ - if (mp->b_cont != NULL) { - mp1 = dupmsg(mp->b_cont); - if (mp1 == NULL) { - error = ENOBUFS; - goto done; - } - mp = mp1; - } else { - /* - * This is a message from RTS module - * indicating that this is a Routing Socket - * Stream. Insert this conn_t in routing - * socket client list. - */ - - connp->conn_loopback = 1; - ipcl_hash_insert_wildcard(ipst->ips_rts_clients, connp); - + ASSERT(mp->b_cont != NULL); + mp1 = dupmsg(mp->b_cont); + if (mp1 == NULL) { + error = ENOBUFS; goto done; } + mp = mp1; + if (mp->b_cont != NULL && !pullupmsg(mp, -1)) { freemsg(mp); error = EINVAL; @@ -891,7 +904,7 @@ ip_rts_request(queue_t *q, mblk_t *mp, cred_t *ioc_cr) if ((found_addrs & RTA_SRC) != 0 && (rtm->rtm_flags & RTF_SETSRC) != 0 && !IN6_ARE_ADDR_EQUAL( - &ire->ire_src_addr_v6, &src_addr_v6)) { + &ire->ire_src_addr_v6, &src_addr_v6)) { if (!IN6_IS_ADDR_UNSPECIFIED( &src_addr_v6)) { @@ -1034,7 +1047,7 @@ done: ioc_mp->b_datap->db_type = M_IOCACK; if (iocp->ioc_error != 0) iocp->ioc_count = 0; - qreply(q, ioc_mp); + (connp->conn_recv)(connp, ioc_mp, NULL); /* conn was refheld in ip_wput_ioctl. */ CONN_OPER_PENDING_DONE(connp); @@ -1149,7 +1162,7 @@ rts_rtmget(mblk_t *mp, ire_t *ire, ire_t *sire, sa_family_t af) rts_fill_msg(RTM_GET, rtm_addrs, sire->ire_addr, sire->ire_mask, sire->ire_gateway_addr, (sire->ire_flags & RTF_SETSRC) ? - sire->ire_src_addr : ire->ire_src_addr, + sire->ire_src_addr : ire->ire_src_addr, ire->ire_ipif->ipif_pp_dst_addr, 0, ire->ire_ipif, new_mp, sacnt, gc); } @@ -1174,7 +1187,7 @@ rts_rtmget(mblk_t *mp, ire_t *ire, ire_t *sire, sa_family_t af) rts_fill_msg_v6(RTM_GET, rtm_addrs, &sire->ire_addr_v6, &sire->ire_mask_v6, &gw_addr_v6, (sire->ire_flags & RTF_SETSRC) ? - &sire->ire_src_addr_v6 : &ire->ire_src_addr_v6, + &sire->ire_src_addr_v6 : &ire->ire_src_addr_v6, &ire->ire_ipif->ipif_v6pp_dst_addr, &ipv6_all_zeros, ire->ire_ipif, new_mp, sacnt, gc); } diff --git a/usr/src/uts/common/inet/ip/ipclassifier.c b/usr/src/uts/common/inet/ip/ipclassifier.c index 9273c98961..2d20eadc75 100644 --- a/usr/src/uts/common/inet/ip/ipclassifier.c +++ b/usr/src/uts/common/inet/ip/ipclassifier.c @@ -230,9 +230,13 @@ const char ipclassifier_version[] = "@(#)ipclassifier.c %I% %E% SMI"; * globalhash table. * * type: This flag determines the type of conn_t which needs to be - * created. + * created i.e., which kmem_cache it comes from. * IPCL_TCPCONN indicates a TCP connection - * IPCL_IPCONN indicates all non-TCP connections. + * IPCL_SCTPCONN indicates a SCTP connection + * IPCL_UDPCONN indicates a UDP conn_t. + * IPCL_RAWIPCONN indicates a RAWIP/ICMP conn_t. + * IPCL_RTSCONN indicates a RTS conn_t. + * IPCL_IPCCONN indicates all other connections. * * void ipcl_conn_destroy(connp) * @@ -266,6 +270,8 @@ const char ipclassifier_version[] = "@(#)ipclassifier.c %I% %E% SMI"; #include <inet/udp_impl.h> #include <inet/sctp_ip.h> #include <inet/sctp/sctp_impl.h> +#include <inet/rawip_impl.h> +#include <inet/rts_impl.h> #include <sys/cpuvar.h> @@ -313,30 +319,40 @@ uint_t ipcl_raw_fanout_size = 256; 50331599, 100663291, 201326557, 0} /* - * wrapper structure to ensure that conn+tcpb are aligned - * on cache lines. + * wrapper structure to ensure that conn and what follows it (tcp_t, etc) + * are aligned on cache lines. */ -typedef struct itc_s { - union { - conn_t itcu_conn; - char itcu_filler[CACHE_ALIGN(conn_s)]; - } itc_u; - tcp_t itc_tcp; +typedef union itc_s { + conn_t itc_conn; + char itcu_filler[CACHE_ALIGN(conn_s)]; } itc_t; -#define itc_conn itc_u.itcu_conn - -struct kmem_cache *ipcl_tcpconn_cache; -struct kmem_cache *ipcl_conn_cache; +struct kmem_cache *tcp_conn_cache; +struct kmem_cache *ip_conn_cache; extern struct kmem_cache *sctp_conn_cache; extern struct kmem_cache *tcp_sack_info_cache; extern struct kmem_cache *tcp_iphc_cache; +struct kmem_cache *udp_conn_cache; +struct kmem_cache *rawip_conn_cache; +struct kmem_cache *rts_conn_cache; extern void tcp_timermp_free(tcp_t *); extern mblk_t *tcp_timermp_alloc(int); -static int ipcl_tcpconn_constructor(void *, void *, int); -static void ipcl_tcpconn_destructor(void *, void *); +static int ip_conn_constructor(void *, void *, int); +static void ip_conn_destructor(void *, void *); + +static int tcp_conn_constructor(void *, void *, int); +static void tcp_conn_destructor(void *, void *); + +static int udp_conn_constructor(void *, void *, int); +static void udp_conn_destructor(void *, void *); + +static int rawip_conn_constructor(void *, void *, int); +static void rawip_conn_destructor(void *, void *); + +static int rts_conn_constructor(void *, void *, int); +static void rts_conn_destructor(void *, void *); #ifdef IPCL_DEBUG #define INET_NTOA_BUFSIZE 18 @@ -358,13 +374,29 @@ inet_ntoa_r(uint32_t in, char *b) void ipcl_g_init(void) { - ipcl_conn_cache = kmem_cache_create("ipcl_conn_cache", + ip_conn_cache = kmem_cache_create("ip_conn_cache", sizeof (conn_t), CACHE_ALIGN_SIZE, - NULL, NULL, NULL, NULL, NULL, 0); + ip_conn_constructor, ip_conn_destructor, + NULL, NULL, NULL, 0); + + tcp_conn_cache = kmem_cache_create("tcp_conn_cache", + sizeof (itc_t) + sizeof (tcp_t), CACHE_ALIGN_SIZE, + tcp_conn_constructor, tcp_conn_destructor, + NULL, NULL, NULL, 0); + + udp_conn_cache = kmem_cache_create("udp_conn_cache", + sizeof (itc_t) + sizeof (udp_t), CACHE_ALIGN_SIZE, + udp_conn_constructor, udp_conn_destructor, + NULL, NULL, NULL, 0); - ipcl_tcpconn_cache = kmem_cache_create("ipcl_tcpconn_cache", - sizeof (itc_t), CACHE_ALIGN_SIZE, - ipcl_tcpconn_constructor, ipcl_tcpconn_destructor, + rawip_conn_cache = kmem_cache_create("rawip_conn_cache", + sizeof (itc_t) + sizeof (icmp_t), CACHE_ALIGN_SIZE, + rawip_conn_constructor, rawip_conn_destructor, + NULL, NULL, NULL, 0); + + rts_conn_cache = kmem_cache_create("rts_conn_cache", + sizeof (itc_t) + sizeof (rts_t), CACHE_ALIGN_SIZE, + rts_conn_constructor, rts_conn_destructor, NULL, NULL, NULL, 0); } @@ -472,8 +504,11 @@ ipcl_init(ip_stack_t *ipst) void ipcl_g_destroy(void) { - kmem_cache_destroy(ipcl_conn_cache); - kmem_cache_destroy(ipcl_tcpconn_cache); + kmem_cache_destroy(ip_conn_cache); + kmem_cache_destroy(tcp_conn_cache); + kmem_cache_destroy(udp_conn_cache); + kmem_cache_destroy(rawip_conn_cache); + kmem_cache_destroy(rts_conn_cache); } /* @@ -553,24 +588,11 @@ ipcl_destroy(ip_stack_t *ipst) conn_t * ipcl_conn_create(uint32_t type, int sleep, netstack_t *ns) { - itc_t *itc; conn_t *connp; sctp_stack_t *sctps; + struct kmem_cache *conn_cache; switch (type) { - case IPCL_TCPCONN: - if ((itc = kmem_cache_alloc(ipcl_tcpconn_cache, - sleep)) == NULL) - return (NULL); - connp = &itc->itc_conn; - connp->conn_ref = 1; - netstack_hold(ns); - connp->conn_netstack = ns; - IPCL_DEBUG_LVL(1, - ("ipcl_conn_create: connp = %p tcp (%p)", - (void *)connp, (void *)connp->conn_tcp)); - ipcl_globalhash_insert(connp); - break; case IPCL_SCTPCONN: if ((connp = kmem_cache_alloc(sctp_conn_cache, sleep)) == NULL) return (NULL); @@ -579,27 +601,40 @@ ipcl_conn_create(uint32_t type, int sleep, netstack_t *ns) SCTP_G_Q_REFHOLD(sctps); netstack_hold(ns); connp->conn_netstack = ns; + return (connp); + + case IPCL_TCPCONN: + conn_cache = tcp_conn_cache; break; + + case IPCL_UDPCONN: + conn_cache = udp_conn_cache; + break; + + case IPCL_RAWIPCONN: + conn_cache = rawip_conn_cache; + break; + + case IPCL_RTSCONN: + conn_cache = rts_conn_cache; + break; + case IPCL_IPCCONN: - connp = kmem_cache_alloc(ipcl_conn_cache, sleep); - if (connp == NULL) - return (NULL); - bzero(connp, sizeof (conn_t)); - mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL); - cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL); - connp->conn_flags = IPCL_IPCCONN; - connp->conn_ref = 1; - netstack_hold(ns); - connp->conn_netstack = ns; - IPCL_DEBUG_LVL(1, - ("ipcl_conn_create: connp = %p\n", (void *)connp)); - ipcl_globalhash_insert(connp); + conn_cache = ip_conn_cache; break; + default: connp = NULL; ASSERT(0); } + if ((connp = kmem_cache_alloc(conn_cache, sleep)) == NULL) + return (NULL); + + connp->conn_ref = 1; + netstack_hold(ns); + connp->conn_netstack = ns; + ipcl_globalhash_insert(connp); return (connp); } @@ -625,7 +660,7 @@ ipcl_conn_destroy(conn_t *connp) ipcl_globalhash_remove(connp); - cv_destroy(&connp->conn_cv); + /* FIXME: add separate tcp_conn_free()? */ if (connp->conn_flags & IPCL_TCPCONN) { tcp_t *tcp = connp->conn_tcp; tcp_stack_t *tcps; @@ -645,7 +680,6 @@ ipcl_conn_destroy(conn_t *connp) TCPS_REFRELE(tcps); } - mutex_destroy(&connp->conn_lock); tcp_free(tcp); mp = tcp->tcp_timercache; tcp->tcp_cred = NULL; @@ -669,30 +703,62 @@ ipcl_conn_destroy(conn_t *connp) ASSERT(connp->conn_latch == NULL); ASSERT(connp->conn_policy == NULL); - bzero(connp, sizeof (itc_t)); - - tcp->tcp_timercache = mp; - connp->conn_tcp = tcp; - connp->conn_flags = IPCL_TCPCONN; - connp->conn_ulp = IPPROTO_TCP; - tcp->tcp_connp = connp; if (ns != NULL) { ASSERT(tcp->tcp_tcps == NULL); connp->conn_netstack = NULL; netstack_rele(ns); } - kmem_cache_free(ipcl_tcpconn_cache, connp); - } else if (connp->conn_flags & IPCL_SCTPCONN) { + + ipcl_conn_cleanup(connp); + connp->conn_flags = IPCL_TCPCONN; + bzero(tcp, sizeof (tcp_t)); + + tcp->tcp_timercache = mp; + tcp->tcp_connp = connp; + kmem_cache_free(tcp_conn_cache, connp); + return; + } + if (connp->conn_latch != NULL) { + IPLATCH_REFRELE(connp->conn_latch, connp->conn_netstack); + connp->conn_latch = NULL; + } + if (connp->conn_policy != NULL) { + IPPH_REFRELE(connp->conn_policy, connp->conn_netstack); + connp->conn_policy = NULL; + } + if (connp->conn_ipsec_opt_mp != NULL) { + freemsg(connp->conn_ipsec_opt_mp); + connp->conn_ipsec_opt_mp = NULL; + } + + if (connp->conn_flags & IPCL_SCTPCONN) { ASSERT(ns != NULL); sctp_free(connp); + return; + } + + if (ns != NULL) { + connp->conn_netstack = NULL; + netstack_rele(ns); + } + ipcl_conn_cleanup(connp); + + /* leave conn_priv aka conn_udp, conn_icmp, etc in place. */ + if (connp->conn_flags & IPCL_UDPCONN) { + connp->conn_flags = IPCL_UDPCONN; + kmem_cache_free(udp_conn_cache, connp); + } else if (connp->conn_flags & IPCL_RAWIPCONN) { + connp->conn_flags = IPCL_RAWIPCONN; + connp->conn_ulp = IPPROTO_ICMP; + kmem_cache_free(rawip_conn_cache, connp); + } else if (connp->conn_flags & IPCL_RTSCONN) { + connp->conn_flags = IPCL_RTSCONN; + kmem_cache_free(rts_conn_cache, connp); } else { - ASSERT(connp->conn_udp == NULL); - mutex_destroy(&connp->conn_lock); - if (ns != NULL) { - connp->conn_netstack = NULL; - netstack_rele(ns); - } - kmem_cache_free(ipcl_conn_cache, connp); + connp->conn_flags = IPCL_IPCCONN; + ASSERT(connp->conn_flags & IPCL_IPCCONN); + ASSERT(connp->conn_priv == NULL); + kmem_cache_free(ip_conn_cache, connp); } } @@ -1940,12 +2006,17 @@ found: /* ARGSUSED */ static int -ipcl_tcpconn_constructor(void *buf, void *cdrarg, int kmflags) +tcp_conn_constructor(void *buf, void *cdrarg, int kmflags) { itc_t *itc = (itc_t *)buf; conn_t *connp = &itc->itc_conn; - tcp_t *tcp = &itc->itc_tcp; - bzero(itc, sizeof (itc_t)); + tcp_t *tcp = (tcp_t *)&itc[1]; + + bzero(connp, sizeof (conn_t)); + bzero(tcp, sizeof (tcp_t)); + + mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL); + cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL); tcp->tcp_timercache = tcp_timermp_alloc(KM_NOSLEEP); connp->conn_tcp = tcp; connp->conn_flags = IPCL_TCPCONN; @@ -1956,9 +2027,202 @@ ipcl_tcpconn_constructor(void *buf, void *cdrarg, int kmflags) /* ARGSUSED */ static void -ipcl_tcpconn_destructor(void *buf, void *cdrarg) +tcp_conn_destructor(void *buf, void *cdrarg) +{ + itc_t *itc = (itc_t *)buf; + conn_t *connp = &itc->itc_conn; + tcp_t *tcp = (tcp_t *)&itc[1]; + + ASSERT(connp->conn_flags & IPCL_TCPCONN); + ASSERT(tcp->tcp_connp == connp); + ASSERT(connp->conn_tcp == tcp); + tcp_timermp_free(tcp); + mutex_destroy(&connp->conn_lock); + cv_destroy(&connp->conn_cv); +} + +/* ARGSUSED */ +static int +ip_conn_constructor(void *buf, void *cdrarg, int kmflags) +{ + itc_t *itc = (itc_t *)buf; + conn_t *connp = &itc->itc_conn; + + bzero(connp, sizeof (conn_t)); + mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL); + cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL); + connp->conn_flags = IPCL_IPCCONN; + + return (0); +} + +/* ARGSUSED */ +static void +ip_conn_destructor(void *buf, void *cdrarg) +{ + itc_t *itc = (itc_t *)buf; + conn_t *connp = &itc->itc_conn; + + ASSERT(connp->conn_flags & IPCL_IPCCONN); + ASSERT(connp->conn_priv == NULL); + mutex_destroy(&connp->conn_lock); + cv_destroy(&connp->conn_cv); +} + +/* ARGSUSED */ +static int +udp_conn_constructor(void *buf, void *cdrarg, int kmflags) +{ + itc_t *itc = (itc_t *)buf; + conn_t *connp = &itc->itc_conn; + udp_t *udp = (udp_t *)&itc[1]; + + bzero(connp, sizeof (conn_t)); + bzero(udp, sizeof (udp_t)); + + mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL); + cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL); + connp->conn_udp = udp; + connp->conn_flags = IPCL_UDPCONN; + connp->conn_ulp = IPPROTO_UDP; + udp->udp_connp = connp; + return (0); +} + +/* ARGSUSED */ +static void +udp_conn_destructor(void *buf, void *cdrarg) +{ + itc_t *itc = (itc_t *)buf; + conn_t *connp = &itc->itc_conn; + udp_t *udp = (udp_t *)&itc[1]; + + ASSERT(connp->conn_flags & IPCL_UDPCONN); + ASSERT(udp->udp_connp == connp); + ASSERT(connp->conn_udp == udp); + mutex_destroy(&connp->conn_lock); + cv_destroy(&connp->conn_cv); +} + +/* ARGSUSED */ +static int +rawip_conn_constructor(void *buf, void *cdrarg, int kmflags) +{ + itc_t *itc = (itc_t *)buf; + conn_t *connp = &itc->itc_conn; + icmp_t *icmp = (icmp_t *)&itc[1]; + + bzero(connp, sizeof (conn_t)); + bzero(icmp, sizeof (icmp_t)); + + mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL); + cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL); + connp->conn_icmp = icmp; + connp->conn_flags = IPCL_RAWIPCONN; + connp->conn_ulp = IPPROTO_ICMP; + icmp->icmp_connp = connp; + return (0); +} + +/* ARGSUSED */ +static void +rawip_conn_destructor(void *buf, void *cdrarg) +{ + itc_t *itc = (itc_t *)buf; + conn_t *connp = &itc->itc_conn; + icmp_t *icmp = (icmp_t *)&itc[1]; + + ASSERT(connp->conn_flags & IPCL_RAWIPCONN); + ASSERT(icmp->icmp_connp == connp); + ASSERT(connp->conn_icmp == icmp); + mutex_destroy(&connp->conn_lock); + cv_destroy(&connp->conn_cv); +} + +/* ARGSUSED */ +static int +rts_conn_constructor(void *buf, void *cdrarg, int kmflags) +{ + itc_t *itc = (itc_t *)buf; + conn_t *connp = &itc->itc_conn; + rts_t *rts = (rts_t *)&itc[1]; + + bzero(connp, sizeof (conn_t)); + bzero(rts, sizeof (rts_t)); + + mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL); + cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL); + connp->conn_rts = rts; + connp->conn_flags = IPCL_RTSCONN; + rts->rts_connp = connp; + return (0); +} + +/* ARGSUSED */ +static void +rts_conn_destructor(void *buf, void *cdrarg) { - tcp_timermp_free(((conn_t *)buf)->conn_tcp); + itc_t *itc = (itc_t *)buf; + conn_t *connp = &itc->itc_conn; + rts_t *rts = (rts_t *)&itc[1]; + + ASSERT(connp->conn_flags & IPCL_RTSCONN); + ASSERT(rts->rts_connp == connp); + ASSERT(connp->conn_rts == rts); + mutex_destroy(&connp->conn_lock); + cv_destroy(&connp->conn_cv); +} + +/* + * Called as part of ipcl_conn_destroy to assert and clear any pointers + * in the conn_t. + */ +void +ipcl_conn_cleanup(conn_t *connp) +{ + ASSERT(connp->conn_ire_cache == NULL); + ASSERT(connp->conn_latch == NULL); +#ifdef notdef + ASSERT(connp->conn_rq == NULL); + ASSERT(connp->conn_wq == NULL); +#endif + ASSERT(connp->conn_cred == NULL); + ASSERT(connp->conn_g_fanout == NULL); + ASSERT(connp->conn_g_next == NULL); + ASSERT(connp->conn_g_prev == NULL); + ASSERT(connp->conn_policy == NULL); + ASSERT(connp->conn_fanout == NULL); + ASSERT(connp->conn_next == NULL); + ASSERT(connp->conn_prev == NULL); +#ifdef notdef + /* + * The ill and ipif pointers are not cleared before the conn_t + * goes away since they do not hold a reference on the ill/ipif. + * We should replace these pointers with ifindex/ipaddr_t to + * make the code less complex. + */ + ASSERT(connp->conn_xmit_if_ill == NULL); + ASSERT(connp->conn_nofailover_ill == NULL); + ASSERT(connp->conn_outgoing_ill == NULL); + ASSERT(connp->conn_incoming_ill == NULL); + ASSERT(connp->conn_outgoing_pill == NULL); + ASSERT(connp->conn_multicast_ipif == NULL); + ASSERT(connp->conn_multicast_ill == NULL); +#endif + ASSERT(connp->conn_oper_pending_ill == NULL); + ASSERT(connp->conn_ilg == NULL); + ASSERT(connp->conn_drain_next == NULL); + ASSERT(connp->conn_drain_prev == NULL); + ASSERT(connp->conn_idl == NULL); + ASSERT(connp->conn_ipsec_opt_mp == NULL); + ASSERT(connp->conn_peercred == NULL); + ASSERT(connp->conn_netstack == NULL); + + /* Clear out the conn_t fields that are not preserved */ + bzero(&connp->conn_start_clr, + sizeof (conn_t) - + ((uchar_t *)&connp->conn_start_clr - (uchar_t *)connp)); + } /* @@ -2042,6 +2306,7 @@ ipcl_globalhash_remove(conn_t *connp) /* Better to stumble on a null pointer than to corrupt memory */ connp->conn_g_next = NULL; connp->conn_g_prev = NULL; + connp->conn_g_fanout = NULL; } /* diff --git a/usr/src/uts/common/inet/ip/ipddi.c b/usr/src/uts/common/inet/ip/ipddi.c index ab3cfb80b7..4c713c8397 100644 --- a/usr/src/uts/common/inet/ip/ipddi.c +++ b/usr/src/uts/common/inet/ip/ipddi.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* Copyright (c) 1990 Mentat Inc. */ @@ -33,11 +33,12 @@ #include <inet/ip.h> #define INET_NAME "ip" -#define INET_STRTAB ipinfo +#define INET_DEVSTRTAB ipinfov4 +#define INET_MODSTRTAB ipinfov4 #define INET_MODDESC "IP STREAMS module 1.47" #define INET_DEVDESC "IP STREAMS driver 1.47" -#define INET_DEVMINOR IPV4_MINOR -#define INET_DEVMTFLAGS D_MP /* we are *really* ip :-) */ +#define INET_DEVMINOR 0 +#define INET_DEVMTFLAGS IP_DEVMTFLAGS /* since we're really ip */ #define INET_MODMTFLAGS D_MP #include "../inetddi.c" diff --git a/usr/src/uts/common/inet/ip/ipsecahddi.c b/usr/src/uts/common/inet/ip/ipsecahddi.c index 8361c24687..132ddad5bf 100644 --- a/usr/src/uts/common/inet/ip/ipsecahddi.c +++ b/usr/src/uts/common/inet/ip/ipsecahddi.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -35,10 +34,11 @@ #include <inet/ipsec_impl.h> #define INET_NAME "ipsecah" -#define INET_STRTAB ipsecahinfo +#define INET_MODSTRTAB ipsecahinfo +#define INET_DEVSTRTAB ipinfov6 #define INET_MODDESC "IPsec AH STREAMS module %I%" #define INET_DEVDESC "IPsec AH STREAMS driver %I%" -#define INET_DEVMINOR IPV6_MINOR /* so we can receive both v4 and v6 */ +#define INET_DEVMINOR 0 #define INET_DEVMTFLAGS IP_DEVMTFLAGS /* since as a driver we're ip */ #define INET_MODMTFLAGS (D_MP|D_MTOCEXCL|D_MTOUTPERIM) @@ -49,8 +49,6 @@ _init(void) { int error; - INET_BECOME_IP(); - /* * Note: After mod_install succeeds, another thread can enter * therefore all initialization is done before it and any diff --git a/usr/src/uts/common/inet/ip/ipsecespddi.c b/usr/src/uts/common/inet/ip/ipsecespddi.c index ce05072300..22aeb2e1ed 100644 --- a/usr/src/uts/common/inet/ip/ipsecespddi.c +++ b/usr/src/uts/common/inet/ip/ipsecespddi.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -35,10 +34,11 @@ #include <inet/ipsec_impl.h> #define INET_NAME "ipsecesp" -#define INET_STRTAB ipsecespinfo +#define INET_MODSTRTAB ipsecespinfo +#define INET_DEVSTRTAB ipinfov6 #define INET_MODDESC "IPsec ESP STREAMS module %I%" #define INET_DEVDESC "IPsec ESP STREAMS driver %I%" -#define INET_DEVMINOR IPV6_MINOR /* so we can receive both v4 and v6 */ +#define INET_DEVMINOR 0 #define INET_DEVMTFLAGS IP_DEVMTFLAGS /* since as a driver we're ip */ #define INET_MODMTFLAGS (D_MP|D_MTOCEXCL|D_MTOUTPERIM) @@ -49,8 +49,6 @@ _init(void) { int error; - INET_BECOME_IP(); - /* * Note: After mod_install succeeds, another thread can enter * therefore all initialization is done before it and any diff --git a/usr/src/uts/common/inet/ip/keysock.c b/usr/src/uts/common/inet/ip/keysock.c index d3bae3a95b..c8e3ce2318 100644 --- a/usr/src/uts/common/inet/ip/keysock.c +++ b/usr/src/uts/common/inet/ip/keysock.c @@ -211,7 +211,7 @@ keysock_plumb_ipsec(netstack_t *ns) keystack->keystack_plumbed = 0; /* we're trying again.. */ cr = zone_get_kcred(netstackid_to_zoneid( - keystack->keystack_netstack->netstack_stackid)); + keystack->keystack_netstack->netstack_stackid)); ASSERT(cr != NULL); /* * Load up the drivers (AH/ESP). @@ -264,7 +264,7 @@ keysock_plumb_ipsec(netstack_t *ns) goto bail; } err = ldi_ioctl(ip6_lh, I_PLINK, (intptr_t)lh, - FREAD+FWRITE+FNOCTTY+FKIOCTL, cr, &muxid); + FREAD+FWRITE+FNOCTTY+FKIOCTL, cr, &muxid); if (err) { ks0dbg(("IPsec: PLINK of KEYSOCK/AH failed (err %d).\n", err)); (void) ldi_close(lh, FREAD|FWRITE, cr); @@ -290,7 +290,7 @@ keysock_plumb_ipsec(netstack_t *ns) goto bail; } err = ldi_ioctl(ip6_lh, I_PLINK, (intptr_t)lh, - FREAD+FWRITE+FNOCTTY+FKIOCTL, cr, &muxid); + FREAD+FWRITE+FNOCTTY+FKIOCTL, cr, &muxid); if (err) { ks0dbg(("IPsec: " "PLINK of KEYSOCK/ESP failed (err %d).\n", err)); @@ -755,7 +755,7 @@ keysock_capability_req(queue_t *q, mblk_t *mp) cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1; mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack), - mp->b_datap->db_type, T_CAPABILITY_ACK); + mp->b_datap->db_type, T_CAPABILITY_ACK); if (mp == NULL) return; @@ -909,7 +909,7 @@ keysock_wput_other(queue_t *q, mblk_t *mp) return; } cr = zone_get_kcred(netstackid_to_zoneid( - keystack->keystack_netstack->netstack_stackid)); + keystack->keystack_netstack->netstack_stackid)); ASSERT(cr != NULL); switch (((union T_primitives *)mp->b_rptr)->type) { @@ -921,11 +921,11 @@ keysock_wput_other(queue_t *q, mblk_t *mp) break; case T_SVR4_OPTMGMT_REQ: (void) svr4_optcom_req(q, mp, DB_CREDDEF(mp, cr), - &keysock_opt_obj); + &keysock_opt_obj, B_FALSE); break; case T_OPTMGMT_REQ: (void) tpi_optcom_req(q, mp, DB_CREDDEF(mp, cr), - &keysock_opt_obj); + &keysock_opt_obj, B_FALSE); break; case T_DATA_REQ: case T_EXDATA_REQ: @@ -1026,11 +1026,11 @@ keysock_passdown(keysock_t *ks, mblk_t *mp, uint8_t satype, sadb_ext_t *extv[], if (extv[SADB_EXT_KEY_ENCRYPT] != NULL) bzero(extv[SADB_EXT_KEY_ENCRYPT], SADB_64TO8( - extv[SADB_EXT_KEY_ENCRYPT]->sadb_ext_len)); + extv[SADB_EXT_KEY_ENCRYPT]->sadb_ext_len)); if (extv[SADB_EXT_KEY_AUTH] != NULL) bzero(extv[SADB_EXT_KEY_AUTH], SADB_64TO8( - extv[SADB_EXT_KEY_AUTH]->sadb_ext_len)); + extv[SADB_EXT_KEY_AUTH]->sadb_ext_len)); if (flushmsg) { ks0dbg(( "keysock: Downwards flush/dump message failed!\n")); @@ -1347,7 +1347,7 @@ keysock_do_flushdump(queue_t *q, mblk_t *mp) */ mutex_enter(&keystack->keystack_consumers[i]->kc_lock); ASSERT((keystack->keystack_consumers[i]->kc_flags & - KC_FLUSHING) == 0); + KC_FLUSHING) == 0); keystack->keystack_consumers[i]->kc_flags |= KC_FLUSHING; mutex_exit(&(keystack->keystack_consumers[i]->kc_lock)); @@ -2098,8 +2098,8 @@ keysock_passup(mblk_t *mp, sadb_msg_t *samsg, minor_t serial, !toall && !(ks->keysock_flags & KEYSOCK_PROMISC) && !((ks->keysock_flags & KEYSOCK_EXTENDED) ? - allereg : allreg && kc != NULL && - KEYSOCK_ISREG(ks, kc->kc_sa_type))) + allereg : allreg && kc != NULL && + KEYSOCK_ISREG(ks, kc->kc_sa_type))) continue; mp1 = dupmsg(mp); @@ -2257,7 +2257,7 @@ keysock_rput(queue_t *q, mblk_t *mp) samsg = (sadb_msg_t *)mp1->b_rptr; if (samsg->sadb_msg_type == SADB_FLUSH || (samsg->sadb_msg_type == SADB_DUMP && - samsg->sadb_msg_len == SADB_8TO64(sizeof (*samsg)))) { + samsg->sadb_msg_len == SADB_8TO64(sizeof (*samsg)))) { /* * If I'm an end-of-FLUSH or an end-of-DUMP marker... */ diff --git a/usr/src/uts/common/inet/ip/keysockddi.c b/usr/src/uts/common/inet/ip/keysockddi.c index 706bad9d91..f3fbe1ca1b 100644 --- a/usr/src/uts/common/inet/ip/keysockddi.c +++ b/usr/src/uts/common/inet/ip/keysockddi.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2003 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -34,7 +33,8 @@ #include <inet/ipsec_impl.h> #define INET_NAME "keysock" -#define INET_STRTAB keysockinfo +#define INET_MODSTRTAB keysockinfo +#define INET_DEVSTRTAB keysockinfo #define INET_MODDESC "PF_KEY socket STREAMS module %I%" #define INET_DEVDESC "PF_KEY socket STREAMS driver %I%" #define INET_DEVMINOR 0 diff --git a/usr/src/uts/common/inet/ip/rts.c b/usr/src/uts/common/inet/ip/rts.c index f9c37a5421..4b7edc1577 100644 --- a/usr/src/uts/common/inet/ip/rts.c +++ b/usr/src/uts/common/inet/ip/rts.c @@ -48,6 +48,7 @@ #include <inet/common.h> #include <netinet/ip6.h> #include <inet/ip.h> +#include <inet/ipclassifier.h> #include <inet/mi.h> #include <inet/nd.h> #include <inet/optcom.h> @@ -55,6 +56,9 @@ #include <sys/isa_defs.h> #include <net/route.h> +#include <inet/rts_impl.h> +#include <inet/ip_rts.h> + /* * This is a transport provider for routing sockets. Downstream messages are * wrapped with a IP_IOCTL header, and ip_wput_ioctl calls the appropriate entry @@ -75,49 +79,6 @@ * the dirty work is done down in ip. */ -/* - * RTS stack instances - */ -struct rts_stack { - netstack_t *rtss_netstack; /* Common netstack */ - - caddr_t rtss_g_nd; - struct rtsparam_s *rtss_params; -}; -typedef struct rts_stack rts_stack_t; - -/* - * Object to represent database of options to search passed to - * {sock,tpi}optcom_req() interface routine to take care of option - * management and associated methods. - * XXX. These and other externs should really move to a rts header. - */ -extern optdb_obj_t rts_opt_obj; -extern uint_t rts_max_optsize; - -/* Internal routing socket stream control structure, one per open stream */ -typedef struct rts_s { - cred_t *rts_credp; /* Opener's credentials */ - uint_t rts_state; /* Provider interface state */ - uint_t rts_error; /* Routing socket error code */ - uint_t rts_flag; /* Pending I/O state */ - uint_t rts_proto; /* SO_PROTOTYPE "socket" option. */ - uint_t rts_debug : 1, /* SO_DEBUG "socket" option. */ - rts_dontroute : 1, /* SO_DONTROUTE "socket" option. */ - rts_broadcast : 1, /* SO_BROADCAST "socket" option. */ - rts_reuseaddr : 1, /* SO_REUSEADDR "socket" option. */ - rts_useloopback : 1, /* SO_USELOOPBACK "socket" option. */ - rts_multicast_loop : 1, /* IP_MULTICAST_LOOP option */ - rts_hdrincl : 1, /* IP_HDRINCL option + RAW and IGMP */ - - : 0; - rts_stack_t *rts_rtss; -} rts_t; - -#define RTS_WPUT_PENDING 0x1 /* Waiting for write-side to complete */ -#define RTS_WRW_PENDING 0x2 /* Routing socket write in progress */ -#define RTS_OPEN_PENDING 0x4 /* Routing socket open in progress */ - /* Default structure copied into T_INFO_ACK messages */ static struct T_info_ack rts_g_t_info_ack = { T_INFO_ACK, @@ -133,14 +94,6 @@ static struct T_info_ack rts_g_t_info_ack = { (XPG4_1) /* PROVIDER_flag */ }; -/* Named Dispatch Parameter Management Structure */ -typedef struct rtsparam_s { - uint_t rts_param_min; - uint_t rts_param_max; - uint_t rts_param_value; - char *rts_param_name; -} rtsparam_t; - /* * Table of ND variables supported by rts. These are loaded into rts_g_nd * in rts_open. @@ -156,11 +109,12 @@ static rtsparam_t lcl_param_arr[] = { #define rtss_xmit_hiwat rtss_params[0].rts_param_value #define rtss_xmit_lowat rtss_params[1].rts_param_value #define rtss_recv_hiwat rtss_params[2].rts_param_value -#define rtss_max_buf rtss_params[3].rts_param_value +#define rtss_max_buf rtss_params[3].rts_param_value static int rts_close(queue_t *q); static void rts_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error); +static void rts_input(void *, mblk_t *, void *); static mblk_t *rts_ioctl_alloc(mblk_t *data, cred_t *cr); static int rts_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp); @@ -171,12 +125,11 @@ int rts_opt_get(queue_t *q, t_scalar_t level, t_scalar_t name, int rts_opt_set(queue_t *q, uint_t optset_context, int level, int name, uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, void *thisdg_attrs, cred_t *cr, mblk_t *mblk); -static void rts_param_cleanup(IDP *ndp); static int rts_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr); static boolean_t rts_param_register(IDP *ndp, rtsparam_t *rtspa, int cnt); static int rts_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr); -static void rts_rput(queue_t *q, mblk_t *mp); +static void rts_rsrv(queue_t *q); static void *rts_stack_init(netstackid_t stackid, netstack_t *ns); static void rts_stack_fini(netstackid_t stackid, void *arg); static void rts_wput(queue_t *q, mblk_t *mp); @@ -184,21 +137,21 @@ static void rts_wput_iocdata(queue_t *q, mblk_t *mp); static void rts_wput_other(queue_t *q, mblk_t *mp); static int rts_wrw(queue_t *q, struiod_t *dp); -static struct module_info info = { +static struct module_info rts_mod_info = { 129, "rts", 1, INFPSZ, 512, 128 }; -static struct qinit rinit = { - (pfi_t)rts_rput, NULL, rts_open, rts_close, NULL, &info +static struct qinit rtsrinit = { + NULL, (pfi_t)rts_rsrv, rts_open, rts_close, NULL, &rts_mod_info }; -static struct qinit winit = { - (pfi_t)rts_wput, NULL, NULL, NULL, NULL, &info, +static struct qinit rtswinit = { + (pfi_t)rts_wput, NULL, NULL, NULL, NULL, &rts_mod_info, NULL, (pfi_t)rts_wrw, NULL, STRUIOT_STANDARD }; struct streamtab rtsinfo = { - &rinit, &winit + &rtsrinit, &rtswinit }; /* @@ -250,29 +203,49 @@ rts_ioctl_alloc(mblk_t *data, cred_t *cr) static int rts_close(queue_t *q) { - rts_t *rts = (rts_t *)q->q_ptr; + conn_t *connp = Q_TO_CONN(q); + + ASSERT(connp != NULL && IPCL_IS_RTS(connp)); + + ip_rts_unregister(connp); + + ip_quiesce_conn(connp); qprocsoff(q); - crfree(rts->rts_credp); - netstack_rele(rts->rts_rtss->rtss_netstack); + /* + * Now we are truly single threaded on this stream, and can + * delete the things hanging off the connp, and finally the connp. + * We removed this connp from the fanout list, it cannot be + * accessed thru the fanouts, and we already waited for the + * conn_ref to drop to 0. We are already in close, so + * there cannot be any other thread from the top. qprocsoff + * has completed, and service has completed or won't run in + * future. + */ + ASSERT(connp->conn_ref == 1); + + inet_minor_free(ip_minor_arena, connp->conn_dev); + + connp->conn_ref--; + ipcl_conn_destroy(connp); - mi_free(q->q_ptr); + q->q_ptr = WR(q)->q_ptr = NULL; return (0); } /* * This is the open routine for routing socket. It allocates - * rts_t structure for the stream and sends an IOCTL to - * the down module to indicate that it is a routing socket - * stream. + * rts_t structure for the stream and tells IP that it is a routing socket. */ /* ARGSUSED */ static int rts_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) { - mblk_t *mp = NULL; rts_t *rts; + conn_t *connp; + dev_t conn_dev; + zoneid_t zoneid; netstack_t *ns; rts_stack_t *rtss; @@ -280,8 +253,7 @@ rts_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) if (q->q_ptr != NULL) return (0); - /* If this is not a push of rts as a module, fail. */ - if (sflag != MODOPEN) + if (sflag == MODOPEN) return (EINVAL); ns = netstack_find_by_cred(credp); @@ -289,56 +261,76 @@ rts_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) rtss = ns->netstack_rts; ASSERT(rtss != NULL); - q->q_ptr = mi_zalloc_sleep(sizeof (rts_t)); - WR(q)->q_ptr = q->q_ptr; - rts = (rts_t *)q->q_ptr; + /* + * For exclusive stacks we set the zoneid to zero + * to make RTS operate as if in the global zone. + */ + if (ns->netstack_stackid != GLOBAL_NETSTACKID) + zoneid = GLOBAL_ZONEID; + else + zoneid = crgetzoneid(credp); + + if ((conn_dev = inet_minor_alloc(ip_minor_arena)) == 0) { + netstack_rele(ns); + return (EBUSY); + } + *devp = makedevice(getemajor(*devp), (minor_t)conn_dev); - rts->rts_rtss = rtss; + connp = ipcl_conn_create(IPCL_RTSCONN, KM_SLEEP, ns); + connp->conn_dev = conn_dev; + rts = connp->conn_rts; - rts->rts_credp = credp; - crhold(credp); /* - * The receive hiwat is only looked at on the stream head queue. - * Store in q_hiwat in order to return on SO_RCVBUF getsockopts. + * ipcl_conn_create did a netstack_hold. Undo the hold that was + * done by netstack_find_by_cred() */ - q->q_hiwat = rtss->rtss_recv_hiwat; + netstack_rele(ns); + /* - * The transmit hiwat/lowat is only looked at on IP's queue. - * Store in q_hiwat/q_lowat in order to return on SO_SNDBUF/SO_SNDLOWAT - * getsockopts. + * Initialize the rts_t structure for this stream. */ + q->q_ptr = connp; + WR(q)->q_ptr = connp; + connp->conn_rq = q; + connp->conn_wq = WR(q); + + rw_enter(&rts->rts_rwlock, RW_WRITER); + ASSERT(connp->conn_rts == rts); + ASSERT(rts->rts_connp == connp); + + /* Set the initial state of the stream and the privilege status. */ + rts->rts_state = TS_UNBND; + connp->conn_zoneid = zoneid; + + connp->conn_ulp_labeled = is_system_labeled(); + + rts->rts_rtss = rtss; + + q->q_hiwat = rtss->rtss_recv_hiwat; WR(q)->q_hiwat = rtss->rtss_xmit_hiwat; WR(q)->q_lowat = rtss->rtss_xmit_lowat; + + connp->conn_recv = rts_input; + crhold(credp); + connp->conn_cred = credp; + + mutex_enter(&connp->conn_lock); + connp->conn_state_flags &= ~CONN_INCIPIENT; + mutex_exit(&connp->conn_lock); + qprocson(q); + rw_exit(&rts->rts_rwlock); + /* * Indicate the down IP module that this is a routing socket * client by sending an RTS IOCTL without any user data. Although * this is just a notification message (without any real routing * request), we pass in any credential for correctness sake. */ - mp = rts_ioctl_alloc(NULL, credp); - if (mp == NULL) { - qprocsoff(q); - ASSERT(q->q_ptr != NULL); - netstack_rele(rtss->rtss_netstack); - mi_free(q->q_ptr); - crfree(credp); - return (ENOMEM); - } - rts->rts_flag |= RTS_OPEN_PENDING; - putnext(WR(q), mp); - while (rts->rts_flag & RTS_OPEN_PENDING) { - if (!qwait_sig(q)) { - (void) rts_close(q); - return (EINTR); - } - } - if (rts->rts_error != 0) { - (void) rts_close(q); - return (ENOTSUP); - } - rts->rts_state = TS_UNBND; + ip_rts_register(connp); + return (0); + } /* @@ -363,14 +355,13 @@ rts_ok_ack(queue_t *q, mblk_t *mp) /* * This routine is called by rts_wput to handle T_UNBIND_REQ messages. - * After some error checking, the message is passed downstream to ip. */ static void rts_unbind(queue_t *q, mblk_t *mp) { - rts_t *rts; + conn_t *connp = Q_TO_CONN(q); + rts_t *rts = connp->conn_rts; - rts = (rts_t *)q->q_ptr; /* If a bind has not been done, we can't unbind. */ if (rts->rts_state != TS_IDLE) { rts_err_ack(q, mp, TOUTSTATE, 0); @@ -389,11 +380,11 @@ rts_unbind(queue_t *q, mblk_t *mp) static void rts_bind(queue_t *q, mblk_t *mp) { + conn_t *connp = Q_TO_CONN(q); + rts_t *rts = connp->conn_rts; mblk_t *mp1; struct T_bind_req *tbr; - rts_t *rts; - rts = (rts_t *)q->q_ptr; if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) { (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, "rts_bind: bad data, %d", rts->rts_state); @@ -448,14 +439,15 @@ rts_copy_info(struct T_info_ack *tap, rts_t *rts) static void rts_capability_req(queue_t *q, mblk_t *mp) { - rts_t *rts = (rts_t *)q->q_ptr; + conn_t *connp = Q_TO_CONN(q); + rts_t *rts = connp->conn_rts; t_uscalar_t cap_bits1; struct T_capability_ack *tcap; cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1; mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack), - mp->b_datap->db_type, T_CAPABILITY_ACK); + mp->b_datap->db_type, T_CAPABILITY_ACK); if (mp == NULL) return; @@ -478,7 +470,8 @@ rts_capability_req(queue_t *q, mblk_t *mp) static void rts_info_req(queue_t *q, mblk_t *mp) { - rts_t *rts = (rts_t *)q->q_ptr; + conn_t *connp = Q_TO_CONN(q); + rts_t *rts = connp->conn_rts; mp = tpi_ack_alloc(mp, sizeof (rts_g_t_info_ack), M_PCPROTO, T_INFO_ACK); @@ -508,7 +501,8 @@ int rts_opt_get(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) { int *i1 = (int *)ptr; - rts_t *rts = (rts_t *)q->q_ptr; + conn_t *connp = Q_TO_CONN(q); + rts_t *rts = connp->conn_rts; switch (level) { case SOL_SOCKET: @@ -575,7 +569,8 @@ rts_opt_set(queue_t *q, uint_t optset_context, int level, uchar_t *outvalp, void *thisdg_attrs, cred_t *cr, mblk_t *mblk) { int *i1 = (int *)invalp; - rts_t *rts = (rts_t *)q->q_ptr; + conn_t *connp = Q_TO_CONN(q); + rts_t *rts = connp->conn_rts; boolean_t checkonly; rts_stack_t *rtss = rts->rts_rtss; @@ -681,7 +676,6 @@ rts_opt_set(queue_t *q, uint_t optset_context, int level, } if (!checkonly) { q->q_hiwat = *i1; - q->q_next->q_hiwat = *i1; } break; /* goto sizeof (int) option return */ case SO_RCVBUF: @@ -712,16 +706,6 @@ rts_opt_set(queue_t *q, uint_t optset_context, int level, } /* - * This routine frees the ND table if all streams have been closed. - * It is called by rts_close and rts_open. - */ -static void -rts_param_cleanup(IDP *ndp) -{ - nd_free(ndp); -} - -/* * This routine retrieves the value of an ND variable in a rtsparam_t * structure. It is called through nd_getset when a user reads the * variable. @@ -779,6 +763,16 @@ rts_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr) } /* + * Empty rsrv routine which is used by rts_input to cause a wakeup + * of a thread in qwait. + */ +/*ARGSUSED*/ +static void +rts_rsrv(queue_t *q) +{ +} + +/* * This routine handles synchronous messages passed downstream. It either * consumes the message or passes it downstream; it never queues a * a message. The data messages that go down are wrapped in an IOCTL @@ -796,9 +790,9 @@ rts_wrw(queue_t *q, struiod_t *dp) mblk_t *mp1; int error; rt_msghdr_t *rtm; - rts_t *rts; + conn_t *connp = Q_TO_CONN(q); + rts_t *rts = connp->conn_rts; - rts = (rts_t *)q->q_ptr; while (rts->rts_flag & RTS_WRW_PENDING) { if (qwait_rw(q)) { rts->rts_error = EINTR; @@ -872,12 +866,18 @@ err_ret: * consumes the message or passes it downstream; it never queues a * a message. The data messages that go down are wrapped in an IOCTL * message. + * + * FIXME? Should we call IP rts_request directly? Could punt on returning + * errno in the case when it defers processing due to + * IPIF_CHANGING/ILL_CHANGING??? */ static void rts_wput(queue_t *q, mblk_t *mp) { uchar_t *rptr = mp->b_rptr; mblk_t *mp1; + conn_t *connp = Q_TO_CONN(q); + rts_t *rts = connp->conn_rts; switch (mp->b_datap->db_type) { case M_DATA: @@ -904,8 +904,6 @@ rts_wput(queue_t *q, mblk_t *mp) mp1 = rts_ioctl_alloc(mp, DB_CRED(mp)); if (mp1 == NULL) { - rts_t *rts = (rts_t *)q->q_ptr; - ASSERT(rts != NULL); freemsg(mp); if (rts->rts_flag & RTS_WPUT_PENDING) { @@ -914,7 +912,7 @@ rts_wput(queue_t *q, mblk_t *mp) } return; } - putnext(q, mp1); + ip_output(connp, mp1, q, IP_WPUT); } @@ -926,16 +924,16 @@ rts_wput(queue_t *q, mblk_t *mp) static void rts_wput_other(queue_t *q, mblk_t *mp) { + conn_t *connp = Q_TO_CONN(q); + rts_t *rts = connp->conn_rts; uchar_t *rptr = mp->b_rptr; - rts_t *rts; struct iocblk *iocp; cred_t *cr; rts_stack_t *rtss; - rts = (rts_t *)q->q_ptr; rtss = rts->rts_rtss; - cr = DB_CREDDEF(mp, rts->rts_credp); + cr = DB_CREDDEF(mp, connp->conn_cred); switch (mp->b_datap->db_type) { case M_PROTO: @@ -963,10 +961,11 @@ rts_wput_other(queue_t *q, mblk_t *mp) rts_info_req(q, mp); return; case T_SVR4_OPTMGMT_REQ: - (void) svr4_optcom_req(q, mp, cr, &rts_opt_obj); + (void) svr4_optcom_req(q, mp, cr, &rts_opt_obj, + B_FALSE); return; case T_OPTMGMT_REQ: - (void) tpi_optcom_req(q, mp, cr, &rts_opt_obj); + (void) tpi_optcom_req(q, mp, cr, &rts_opt_obj, B_FALSE); return; case O_T_CONN_RES: case T_CONN_RES: @@ -1008,7 +1007,7 @@ rts_wput_other(queue_t *q, mblk_t *mp) default: break; } - putnext(q, mp); + ip_output(connp, mp, q, IP_WPUT); } /* @@ -1017,6 +1016,7 @@ rts_wput_other(queue_t *q, mblk_t *mp) static void rts_wput_iocdata(queue_t *q, mblk_t *mp) { + conn_t *connp = Q_TO_CONN(q); struct sockaddr *rtsaddr; mblk_t *mp1; STRUCT_HANDLE(strbuf, sb); @@ -1027,7 +1027,7 @@ rts_wput_iocdata(queue_t *q, mblk_t *mp) case TI_GETPEERNAME: break; default: - putnext(q, mp); + ip_output(connp, mp, q, IP_WPUT); return; } switch (mi_copy_state(q, mp, &mp1)) { @@ -1072,25 +1072,28 @@ rts_wput_iocdata(queue_t *q, mblk_t *mp) mi_copyout(q, mp); } +/*ARGSUSED2*/ static void -rts_rput(queue_t *q, mblk_t *mp) +rts_input(void *arg1, mblk_t *mp, void *arg2) { - rts_t *rts; + conn_t *connp = (conn_t *)arg1; + rts_t *rts = connp->conn_rts; struct iocblk *iocp; mblk_t *mp1; struct T_data_ind *tdi; - rts = (rts_t *)q->q_ptr; switch (mp->b_datap->db_type) { case M_IOCACK: case M_IOCNAK: iocp = (struct iocblk *)mp->b_rptr; - if (rts->rts_flag & (RTS_WPUT_PENDING|RTS_OPEN_PENDING)) { - if (rts->rts_flag & RTS_WPUT_PENDING) - rts->rts_flag &= ~RTS_WPUT_PENDING; - else - rts->rts_flag &= ~RTS_OPEN_PENDING; + if (rts->rts_flag & (RTS_WPUT_PENDING)) { + rts->rts_flag &= ~RTS_WPUT_PENDING; rts->rts_error = iocp->ioc_error; + /* + * Tell rts_wvw/qwait that we are done. + * Note: there is no qwait_wakeup() we can use. + */ + qenable(connp->conn_rq); freemsg(mp); return; } @@ -1116,7 +1119,7 @@ rts_rput(queue_t *q, mblk_t *mp) default: break; } - putnext(q, mp); + putnext(connp->conn_rq, mp); } @@ -1171,7 +1174,7 @@ rts_stack_fini(netstackid_t stackid, void *arg) { rts_stack_t *rtss = (rts_stack_t *)arg; - rts_param_cleanup(&rtss->rtss_g_nd); + nd_free(&rtss->rtss_g_nd); kmem_free(rtss->rtss_params, sizeof (lcl_param_arr)); rtss->rtss_params = NULL; kmem_free(rtss, sizeof (*rtss)); diff --git a/usr/src/uts/common/inet/ip/rtsddi.c b/usr/src/uts/common/inet/ip/rtsddi.c index f8c80ecd83..7739818e47 100644 --- a/usr/src/uts/common/inet/ip/rtsddi.c +++ b/usr/src/uts/common/inet/ip/rtsddi.c @@ -32,47 +32,27 @@ #include <inet/ip.h> #define INET_NAME "rts" -#define INET_STRTAB rtsinfo -#define INET_MODDESC "PF_ROUTE socket STREAMS module %I%" +#define INET_DEVSTRTAB rtsinfo #define INET_DEVDESC "PF_ROUTE socket STREAMS driver %I%" -#define INET_DEVMINOR IPV4_MINOR -#define INET_DEVMTFLAGS IP_DEVMTFLAGS /* since as a driver we're ip */ -#define INET_MODMTFLAGS (D_MP|D_MTQPAIR|D_MTOUTPERIM|D_MTOCEXCL|D_SYNCSTR) +#define INET_DEVMINOR 0 +#define INET_DEVMTFLAGS (D_MP|D_MTQPAIR|D_SYNCSTR) #include "../inetddi.c" -extern void rts_ddi_init(void); -extern void rts_ddi_destroy(void); - int _init(void) { - int error; - - INET_BECOME_IP(); - /* - * Note: After mod_install succeeds, another thread can enter - * therefore all initialization is done before it. + * device initialization happens when the actual code containing + * module (/kernel/drv/ip) is loaded, and driven from ip_ddi_init() */ - rts_ddi_init(); - error = mod_install(&modlinkage); - if (error != 0) - rts_ddi_destroy(); - return (error); + return (mod_install(&modlinkage)); } int _fini(void) { - int error; - - error = mod_remove(&modlinkage); - if (error != 0) - return (error); - - rts_ddi_destroy(); - return (0); + return (mod_remove(&modlinkage)); } int diff --git a/usr/src/uts/common/inet/ip/spdsock.c b/usr/src/uts/common/inet/ip/spdsock.c index 1db9fbeec8..9d3f93f798 100644 --- a/usr/src/uts/common/inet/ip/spdsock.c +++ b/usr/src/uts/common/inet/ip/spdsock.c @@ -742,10 +742,10 @@ spdsock_check_action(ipsec_act_t *act, boolean_t tunnel_polhead, int *diag, } if ((act->ipa_type != IPSEC_ACT_APPLY) && (act->ipa_apply.ipp_use_ah || - act->ipa_apply.ipp_use_esp || - act->ipa_apply.ipp_use_espa || - act->ipa_apply.ipp_use_se || - act->ipa_apply.ipp_use_unique)) { + act->ipa_apply.ipp_use_esp || + act->ipa_apply.ipp_use_espa || + act->ipa_apply.ipp_use_se || + act->ipa_apply.ipp_use_unique)) { *diag = SPD_DIAGNOSTIC_ADD_INCON_FLAGS; return (B_FALSE); } @@ -775,7 +775,7 @@ spdsock_ext_to_actvec(spd_ext_t **extv, ipsec_act_t **actpp, uint_t *nactp, tunnel_polhead = (extv[SPD_EXT_TUN_NAME] != NULL && (((struct spd_rule *)extv[SPD_EXT_RULE])->spd_rule_flags & - SPD_RULE_FLAG_TUNNEL)); + SPD_RULE_FLAG_TUNNEL)); *actpp = NULL; *nactp = 0; @@ -1174,7 +1174,7 @@ spdsock_deleterule(queue_t *q, ipsec_policy_head_t *iph, mblk_t *mp, if (rule->spd_rule_index != 0) { if (ipsec_policy_delete_index(iph, rule->spd_rule_index, - spds->spds_netstack) != 0) { + spds->spds_netstack) != 0) { err = ESRCH; goto fail; } @@ -1582,9 +1582,9 @@ spdsock_encode_sel(uint8_t *base, uint_t offset, const ipsec_sel_t *sel) offset = spdsock_encode_typecode(base, offset, selkey->ipsl_icmp_type, selkey->ipsl_icmp_type_end, (selkey->ipsl_valid & IPSL_ICMP_CODE) ? - selkey->ipsl_icmp_code : 255, + selkey->ipsl_icmp_code : 255, (selkey->ipsl_valid & IPSL_ICMP_CODE) ? - selkey->ipsl_icmp_code_end : 255); + selkey->ipsl_icmp_code_end : 255); } return (offset); } @@ -2535,7 +2535,7 @@ spdsock_do_updatealg(spd_ext_t *extv[], int *diag, spd_stack_t *spds) if (alg->alg_key_sizes == NULL || cur_key >= alg->alg_nkey_sizes) { ss1dbg(spds, ("spdsock_do_updatealg: " - "too many key sizes\n")); + "too many key sizes\n")); *diag = SPD_DIAGNOSTIC_ALG_NUM_KEY_SIZES; goto bail; } @@ -2561,7 +2561,7 @@ spdsock_do_updatealg(spd_ext_t *extv[], int *diag, spd_stack_t *spds) if (alg->alg_block_sizes == NULL || cur_block >= alg->alg_nblock_sizes) { ss1dbg(spds, ("spdsock_do_updatealg: " - "too many block sizes\n")); + "too many block sizes\n")); *diag = SPD_DIAGNOSTIC_ALG_NUM_BLOCK_SIZES; goto bail; } @@ -2574,7 +2574,7 @@ spdsock_do_updatealg(spd_ext_t *extv[], int *diag, spd_stack_t *spds) if (attr->spd_attr_value > CRYPTO_MAX_MECH_NAME) { ss1dbg(spds, ("spdsock_do_updatealg: " - "mech name too long\n")); + "mech name too long\n")); *diag = SPD_DIAGNOSTIC_ALG_MECH_NAME_LEN; goto bail; } @@ -2620,9 +2620,9 @@ bail: /* cleanup */ ipsec_alg_free(alg); for (alg_type = 0; alg_type < IPSEC_NALGTYPES; alg_type++) - for (algid = 0; algid < IPSEC_MAX_ALGS; algid++) + for (algid = 0; algid < IPSEC_MAX_ALGS; algid++) if (spds->spds_algs[alg_type][algid] != NULL) - ipsec_alg_free(spds->spds_algs[alg_type][algid]); + ipsec_alg_free(spds->spds_algs[alg_type][algid]); } /* @@ -3090,7 +3090,7 @@ spdsock_capability_req(queue_t *q, mblk_t *mp) cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1; mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack), - mp->b_datap->db_type, T_CAPABILITY_ACK); + mp->b_datap->db_type, T_CAPABILITY_ACK); if (mp == NULL) return; @@ -3231,7 +3231,7 @@ spdsock_wput_other(queue_t *q, mblk_t *mp) return; } cr = zone_get_kcred(netstackid_to_zoneid( - spds->spds_netstack->netstack_stackid)); + spds->spds_netstack->netstack_stackid)); ASSERT(cr != NULL); switch (((union T_primitives *)mp->b_rptr)->type) { @@ -3243,11 +3243,11 @@ spdsock_wput_other(queue_t *q, mblk_t *mp) break; case T_SVR4_OPTMGMT_REQ: (void) svr4_optcom_req(q, mp, DB_CREDDEF(mp, cr), - &spdsock_opt_obj); + &spdsock_opt_obj, B_FALSE); break; case T_OPTMGMT_REQ: (void) tpi_optcom_req(q, mp, DB_CREDDEF(mp, cr), - &spdsock_opt_obj); + &spdsock_opt_obj, B_FALSE); break; case T_DATA_REQ: case T_EXDATA_REQ: diff --git a/usr/src/uts/common/inet/ip/spdsockddi.c b/usr/src/uts/common/inet/ip/spdsockddi.c index 0f9ecde82a..22fc0e44d3 100644 --- a/usr/src/uts/common/inet/ip/spdsockddi.c +++ b/usr/src/uts/common/inet/ip/spdsockddi.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2001-2002 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -34,7 +33,7 @@ #include <inet/ipsec_impl.h> #define INET_NAME "spdsock" -#define INET_STRTAB spdsockinfo +#define INET_DEVSTRTAB spdsockinfo #define INET_DEVDESC "PF_POLICY socket STREAMS driver %I%" #define INET_DEVMINOR 0 #define INET_DEVMTFLAGS (D_MP | D_MTQPAIR) diff --git a/usr/src/uts/common/inet/ip6.h b/usr/src/uts/common/inet/ip6.h index 0ba010ebfe..9fb4b88367 100644 --- a/usr/src/uts/common/inet/ip6.h +++ b/usr/src/uts/common/inet/ip6.h @@ -364,6 +364,7 @@ extern void ip_wput_ipsec_out_v6(queue_t *, mblk_t *, ip6_t *, ill_t *, ire_t *); extern int ip_total_hdrs_len_v6(ip6_pkt_t *); extern int ipsec_ah_get_hdr_size_v6(mblk_t *, boolean_t); +extern void ip_wput_v6(queue_t *, mblk_t *); extern void ip_wput_local_v6(queue_t *, ill_t *, ip6_t *, mblk_t *, ire_t *, int); extern void ip_output_v6(void *, mblk_t *, void *, int); diff --git a/usr/src/uts/common/inet/ip_impl.h b/usr/src/uts/common/inet/ip_impl.h index a7b9b9ddb9..8cb4bc9b3c 100644 --- a/usr/src/uts/common/inet/ip_impl.h +++ b/usr/src/uts/common/inet/ip_impl.h @@ -478,29 +478,13 @@ typedef struct ip_pdescinfo_s PDESCINFO_STRUCT(2) ip_pdescinfo_t; * flow-control. * * Note that these checks are done after the conn is found in - * the UDP fanout table. A UDP conn in that table may have its - * IPCL_UDP bit cleared from the conn_flags when the application - * pops the udp module without issuing an unbind; in this case - * IP will still receive packets for the conn and deliver it - * upstream via putnext. This is the reason why we have to test - * against IPCL_UDP. + * the UDP fanout table. + * FIXME? Might be faster to check both udp_drain_qfull and canputnext. */ #define CONN_UDP_FLOWCTLD(connp) \ - ((CONN_UDP_SYNCSTR(connp) && \ - (connp)->conn_udp->udp_drain_qfull) || \ - (!CONN_UDP_SYNCSTR(connp) && !canputnext((connp)->conn_rq))) - -/* - * Macro that delivers a given message upstream; if udp module - * is directly above ip, the message is passed directly into - * the stream-less entry point. Otherwise putnext is used. - */ -#define CONN_UDP_RECV(connp, mp) { \ - if (IPCL_IS_UDP(connp)) \ - udp_conn_recv(connp, mp); \ - else \ - putnext((connp)->conn_rq, mp); \ -} + (CONN_UDP_SYNCSTR(connp) ? \ + (connp)->conn_udp->udp_drain_qfull : \ + !canputnext((connp)->conn_rq)) #define ILL_DLS_CAPABLE(ill) \ (((ill)->ill_capabilities & \ diff --git a/usr/src/uts/common/inet/ip_rts.h b/usr/src/uts/common/inet/ip_rts.h index 7b780fb112..a8d3971192 100644 --- a/usr/src/uts/common/inet/ip_rts.h +++ b/usr/src/uts/common/inet/ip_rts.h @@ -55,8 +55,12 @@ extern void ip_rts_newaddrmsg(int, int, const ipif_t *); extern int ip_rts_request(queue_t *, mblk_t *, cred_t *); +extern void ip_rts_register(conn_t *); + extern void ip_rts_rtmsg(int, ire_t *, int, ip_stack_t *); +extern void ip_rts_unregister(conn_t *); + extern mblk_t *rts_alloc_msg(int, int, sa_family_t, uint_t); extern size_t rts_data_msg_size(int, sa_family_t, uint_t); diff --git a/usr/src/uts/common/inet/ip_stack.h b/usr/src/uts/common/inet/ip_stack.h index 2f41d136dd..d6698e7261 100644 --- a/usr/src/uts/common/inet/ip_stack.h +++ b/usr/src/uts/common/inet/ip_stack.h @@ -288,7 +288,7 @@ struct ip_stack { time_t ips_ip_g_frag_timeout; clock_t ips_ip_g_frag_timo_ms; - queue_t *ips_ip_g_mrouter; + struct conn_s *ips_ip_g_mrouter; /* Time since last icmp_pkt_err */ clock_t ips_icmp_pkt_err_last; diff --git a/usr/src/uts/common/inet/ipclassifier.h b/usr/src/uts/common/inet/ipclassifier.h index 5bc5e126d7..3a6a15cf6f 100644 --- a/usr/src/uts/common/inet/ipclassifier.h +++ b/usr/src/uts/common/inet/ipclassifier.h @@ -57,35 +57,42 @@ typedef void (*edesc_rpf)(void *, mblk_t *, void *); */ /* Conn Flags */ -#define IPCL_UDPMOD 0x00020000 /* Is UDP module instance */ -#define IPCL_TCPMOD 0x00040000 /* Is TCP module instance */ +/* Unused 0x00020000 */ +/* Unused 0x00040000 */ #define IPCL_FULLY_BOUND 0x00080000 /* Bound to correct squeue */ #define IPCL_CHECK_POLICY 0x00100000 /* Needs policy checking */ #define IPCL_SOCKET 0x00200000 /* Sockfs connection */ #define IPCL_ACCEPTOR 0x00400000 /* Sockfs priv acceptor */ #define IPCL_CL_LISTENER 0x00800000 /* Cluster listener */ #define IPCL_EAGER 0x01000000 /* Incoming connection */ -#define IPCL_UDP 0x02000000 /* A UDP connection */ -#define IPCL_TCP6 0x04000000 /* A TCP6 connection */ -#define IPCL_TCP4 0x08000000 /* A TCP connection */ +/* Unused 0x02000000 */ +#define IPCL_TCP6 0x04000000 /* AF_INET6 TCP */ +#define IPCL_TCP4 0x08000000 /* IPv4 packet format TCP */ +/* Unused 0x10000000 */ +/* Unused 0x20000000 */ #define IPCL_CONNECTED 0x40000000 /* Conn in connected table */ #define IPCL_BOUND 0x80000000 /* Conn in bind table */ /* Flags identifying the type of conn */ -#define IPCL_TCPCONN 0x00000001 /* Flag to indicate cache */ -#define IPCL_SCTPCONN 0x00000002 -#define IPCL_IPCCONN 0x00000004 -#define IPCL_ISV6 0x00000008 /* Is a V6 connection */ -#define IPCL_IPTUN 0x00000010 /* Has "tun" plumbed above it */ +#define IPCL_TCPCONN 0x00000001 /* From tcp_conn_cache */ +#define IPCL_SCTPCONN 0x00000002 /* From sctp_conn_cache */ +#define IPCL_IPCCONN 0x00000004 /* From ip_conn_cache */ +#define IPCL_UDPCONN 0x00000008 /* From udp_conn_cache */ +#define IPCL_RAWIPCONN 0x00000010 /* From rawip_conn_cache */ +#define IPCL_RTSCONN 0x00000020 /* From rts_conn_cache */ +#define IPCL_ISV6 0x00000040 /* AF_INET6 */ +#define IPCL_IPTUN 0x00000080 /* Has "tun" plumbed above it */ /* Conn Masks */ #define IPCL_TCP (IPCL_TCP4|IPCL_TCP6) -#define IPCL_REMOVED 0x00000020 -#define IPCL_REUSED 0x00000040 +#define IPCL_REMOVED 0x00000100 +#define IPCL_REUSED 0x00000200 +/* The packet format is IPv4; could be an AF_INET or AF_INET6 socket */ #define IPCL_IS_TCP4(connp) \ (((connp)->conn_flags & IPCL_TCP4)) +/* Connected AF_INET with no IPsec policy */ #define IPCL_IS_TCP4_CONNECTED_NO_POLICY(connp) \ (((connp)->conn_flags & \ (IPCL_TCP4|IPCL_CONNECTED|IPCL_CHECK_POLICY|IPCL_TCP6)) \ @@ -97,6 +104,7 @@ typedef void (*edesc_rpf)(void *, mblk_t *, void *); #define IPCL_IS_BOUND(connp) \ ((connp)->conn_flags & IPCL_BOUND) +/* AF_INET TCP that is bound */ #define IPCL_IS_TCP4_BOUND(connp) \ (((connp)->conn_flags & \ (IPCL_TCP4|IPCL_BOUND|IPCL_TCP6)) == \ @@ -105,16 +113,26 @@ typedef void (*edesc_rpf)(void *, mblk_t *, void *); #define IPCL_IS_FULLY_BOUND(connp) \ ((connp)->conn_flags & IPCL_FULLY_BOUND) -#define IPCL_IS_TCP(connp) \ - ((connp)->conn_flags & (IPCL_TCP4|IPCL_TCP6)) - /* - * IPCL_UDP is set on the conn when udp is directly above ip; - * this flag is cleared the moment udp is popped. + * Can't use conn_protocol since we need to tell difference + * between a real TCP socket and a SOCK_RAW, IPPROTO_TCP. */ +#define IPCL_IS_TCP(connp) \ + ((connp)->conn_flags & IPCL_TCPCONN) + +#define IPCL_IS_SCTP(connp) \ + ((connp)->conn_flags & IPCL_SCTPCONN) + #define IPCL_IS_UDP(connp) \ - ((connp)->conn_flags & IPCL_UDP) + ((connp)->conn_flags & IPCL_UDPCONN) + +#define IPCL_IS_RAWIP(connp) \ + ((connp)->conn_flags & IPCL_RAWIPCONN) +#define IPCL_IS_RTS(connp) \ + ((connp)->conn_flags & IPCL_RTSCONN) + +/* FIXME: Isn't it sufficient to check IPCL_IPTUN? */ #define IPCL_IS_IPTUN(connp) \ (((connp)->conn_ulp == IPPROTO_ENCAP || \ (connp)->conn_ulp == IPPROTO_IPV6) && \ @@ -129,12 +147,42 @@ typedef struct pc_t ctb_stack[CONN_STACK_DEPTH]; } conn_trace_t; +/* + * The initial fields in the conn_t are setup by the kmem_cache constructor, + * and are preserved when it is freed. Fields after that are bzero'ed when + * the conn_t is freed. + */ struct conn_s { kmutex_t conn_lock; uint32_t conn_ref; /* Reference counter */ + uint32_t conn_flags; /* Conn Flags */ + + + union { + tcp_t *cp_tcp; /* Pointer to the tcp struct */ + struct udp_s *cp_udp; /* Pointer to the udp struct */ + struct icmp_s *cp_icmp; /* Pointer to rawip struct */ + struct rts_s *cp_rts; /* Pointer to rts struct */ + void *cp_priv; + } conn_proto_priv; +#define conn_tcp conn_proto_priv.cp_tcp +#define conn_udp conn_proto_priv.cp_udp +#define conn_icmp conn_proto_priv.cp_icmp +#define conn_rts conn_proto_priv.cp_rts +#define conn_priv conn_proto_priv.cp_priv + + kcondvar_t conn_cv; + uint8_t conn_ulp; /* protocol type */ + + edesc_rpf conn_recv; /* Pointer to recv routine */ + + /* Fields after this are bzero'ed when the conn_t is freed. */ + + squeue_t *conn_sqp; /* Squeue for processing */ uint_t conn_state_flags; /* IP state flags */ +#define conn_start_clr conn_state_flags + ire_t *conn_ire_cache; /* outbound ire cache */ - uint32_t conn_flags; /* Conn Flags */ unsigned int conn_on_sqp : 1, /* Conn is being processed */ conn_dontroute : 1, /* SO_DONTROUTE state */ @@ -178,13 +226,6 @@ struct conn_s { conn_lso_ok : 1; /* LSO is usable */ - tcp_t *conn_tcp; /* Pointer to the tcp struct */ - struct udp_s *conn_udp; /* Pointer to the udp struct */ - - squeue_t *conn_sqp; /* Squeue for processing */ - edesc_rpf conn_recv; /* Pointer to recv routine */ - void *conn_pad1; - ill_t *conn_xmit_if_ill; /* Outbound ill */ ill_t *conn_nofailover_ill; /* Failover ill */ ipsec_latch_t *conn_latch; /* latched state */ @@ -201,7 +242,6 @@ struct conn_s { struct ipsec_policy_head_s *conn_policy; /* Configured policy */ in6_addr_t conn_bound_source_v6; #define conn_bound_source V4_PART_OF_V6(conn_bound_source_v6) - void *conn_void[1]; connf_t *conn_fanout; /* Hash bucket we're part of */ struct conn_s *conn_next; /* Hash chain next */ @@ -226,18 +266,13 @@ struct conn_s { #define conn_lport u_port.tcpu_ports.tcpu_lport #define conn_ports u_port.conn_ports2 #define conn_upq conn_rq - uint8_t conn_ulp; /* protocol type */ uint8_t conn_unused_byte; - kcondvar_t conn_cv; uint_t conn_proto; /* SO_PROTOTYPE state */ ill_t *conn_incoming_ill; /* IP{,V6}_BOUND_IF */ ill_t *conn_outgoing_pill; /* IP{,V6}_BOUND_PIF */ ill_t *conn_oper_pending_ill; /* pending shared ioctl */ - ill_t *conn_xioctl_pending_ill; /* pending excl ioctl */ - /* this is used only when an unbind is in progress.. */ - struct sq_s *conn_pending_sq; /* waiting for ioctl on sq */ ilg_t *conn_ilg; /* Group memberships */ int conn_ilg_allocated; /* Number allocated */ int conn_ilg_inuse; /* Number currently used */ @@ -459,7 +494,6 @@ struct connf_s { #define IPCL_TCP_EAGER_INIT(connp, protocol, src, rem, ports) { \ (connp)->conn_flags |= (IPCL_TCP4|IPCL_EAGER); \ - (connp)->conn_ulp = protocol; \ IN6_IPADDR_TO_V4MAPPED(src, &(connp)->conn_srcv6); \ IN6_IPADDR_TO_V4MAPPED(rem, &(connp)->conn_remv6); \ (connp)->conn_ports = ports; \ @@ -468,8 +502,7 @@ struct connf_s { } #define IPCL_TCP_EAGER_INIT_V6(connp, protocol, src, rem, ports) { \ - (connp)->conn_flags |= (IPCL_TCP6|IPCL_EAGER); \ - (connp)->conn_ulp = protocol; \ + (connp)->conn_flags |= (IPCL_TCP6|IPCL_EAGER|IPCL_ISV6); \ (connp)->conn_srcv6 = src; \ (connp)->conn_remv6 = rem; \ (connp)->conn_ports = ports; \ @@ -541,6 +574,7 @@ conn_t *ipcl_lookup_listener_v6(uint16_t, in6_addr_t *, uint_t, zoneid_t, ip_stack_t *); int conn_trace_ref(conn_t *); int conn_untrace_ref(conn_t *); +void ipcl_conn_cleanup(conn_t *); conn_t *ipcl_conn_tcp_lookup_reversed_ipv4(conn_t *, ipha_t *, tcph_t *, ip_stack_t *); conn_t *ipcl_conn_tcp_lookup_reversed_ipv6(conn_t *, ip6_t *, tcph_t *, diff --git a/usr/src/uts/common/inet/nca/ncaddi.c b/usr/src/uts/common/inet/nca/ncaddi.c index d7b4af3641..737f16775f 100644 --- a/usr/src/uts/common/inet/nca/ncaddi.c +++ b/usr/src/uts/common/inet/nca/ncaddi.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -45,7 +44,8 @@ char _depends_on[] = "fs/sockfs drv/ip"; extern caddr_t nca_g_nd; /* Head of 'named dispatch' variable list */ #define INET_NAME "nca" -#define INET_STRTAB ncainfo +#define INET_MODSTRTAB ncainfo +#define INET_DEVSTRTAB ncainfo #define INET_MODDESC "NCA STREAMS module 1.6" #define INET_DEVDESC "NCA STREAMS driver 1.6" #define INET_DEVMINOR 0 diff --git a/usr/src/uts/common/inet/optcom.c b/usr/src/uts/common/inet/optcom.c index 355d95a416..3de4044e58 100644 --- a/usr/src/uts/common/inet/optcom.c +++ b/usr/src/uts/common/inet/optcom.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* Copyright (c) 1990 Mentat Inc. */ @@ -188,7 +188,8 @@ optcom_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error) * svr4_optcom_req() or tpi_optcom_req() to restart the option processing. */ int -svr4_optcom_req(queue_t *q, mblk_t *mp, cred_t *cr, optdb_obj_t *dbobjp) +svr4_optcom_req(queue_t *q, mblk_t *mp, cred_t *cr, optdb_obj_t *dbobjp, + boolean_t pass_to_ip) { pfi_t deffn = dbobjp->odb_deffn; pfi_t getfn = dbobjp->odb_getfn; @@ -211,13 +212,9 @@ svr4_optcom_req(queue_t *q, mblk_t *mp, cred_t *cr, optdb_obj_t *dbobjp) struct opthdr *opt_start; opdes_t *optd; boolean_t pass_to_next = B_FALSE; - boolean_t pass_to_ip = B_FALSE; - boolean_t is_tcp; struct T_optmgmt_ack *toa; struct T_optmgmt_req *tor; - is_tcp = (dbobjp == &tcp_opt_obj); - /* * Allocate M_CTL and prepend to the packet for restarting this * option if needed. IP may need to queue and restart the option @@ -310,7 +307,7 @@ no_mem:; opt->name = optd->opdes_name; if (!(optd->opdes_props & OP_DEF_FN) || ((len = (*deffn)(q, opt->level, - opt->name, (uchar_t *)&opt[1])) < 0)) { + opt->name, (uchar_t *)&opt[1])) < 0)) { /* * Fill length and value from table. * @@ -404,8 +401,8 @@ no_mem:; if ((uchar_t *)next_opt < (uchar_t *)&opt[1] || ((next_opt >= opt_end) && - (((uchar_t *)next_opt - (uchar_t *)opt_end) >= - __TPI_ALIGN_SIZE))) + (((uchar_t *)next_opt - (uchar_t *)opt_end) >= + __TPI_ALIGN_SIZE))) goto bad_opt; /* sanity check */ @@ -530,8 +527,8 @@ no_mem:; for (opt = opt_start; opt < opt_end; opt = next_opt) { - next_opt = (struct opthdr *)((uchar_t *)&opt[1] + - _TPI_ALIGN_OPT(opt->len)); + next_opt = (struct opthdr *)((uchar_t *)&opt[1] + + _TPI_ALIGN_OPT(opt->len)); opt1->name = opt->name; opt1->level = opt->level; @@ -544,12 +541,6 @@ no_mem:; if (len < 0) { opt1->len = opt->len; bcopy(&opt[1], &opt1[1], opt->len); - /* - * Pass the option down to IP only - * if TCP hasn't processed it. - */ - if (is_tcp) - pass_to_ip = B_TRUE; } else { opt1->len = (t_uscalar_t)len; } @@ -634,13 +625,12 @@ restart: optcom_err_ack(q, mp, TSYSERR, error); freeb(first_mp); return (0); - } else if (error < 0 && is_tcp) { - /* - * Pass the option down to IP only - * if TCP hasn't processed it. - */ - pass_to_ip = B_TRUE; } + /* + * error < 0 means option is not recognized. + * But with OP_PASSNEXT the next module + * might recognize it. + */ } /* Done with the restart control mp. */ freeb(first_mp); @@ -675,7 +665,8 @@ bad_opt:; * New optcom_req inspired by TPI/XTI semantics */ int -tpi_optcom_req(queue_t *q, mblk_t *mp, cred_t *cr, optdb_obj_t *dbobjp) +tpi_optcom_req(queue_t *q, mblk_t *mp, cred_t *cr, optdb_obj_t *dbobjp, + boolean_t pass_to_ip) { t_scalar_t t_error; mblk_t *toa_mp; @@ -830,7 +821,7 @@ restart: * forwarding and if it is possible, we forward the message * downstream. Else we ack it. */ - if (pass_to_next && (q->q_next != NULL || dbobjp == &tcp_opt_obj)) { + if (pass_to_next && (q->q_next != NULL || pass_to_ip)) { /* * We pass it down as T_OPTMGMT_REQ. This code relies * on the happy coincidence that T_optmgmt_req and @@ -941,7 +932,7 @@ process_topthdrs_first_pass(mblk_t *mp, cred_t *cr, optdb_obj_t *dbobjp, if (tor->MGMT_flags == T_CHECK || !topmost_tpiprovider || ((allopt_len = opt_level_allopts_lengths(opt->level, - opt_arr, opt_arr_cnt)) == 0)) { + opt_arr, opt_arr_cnt)) == 0)) { /* * This is confusing but correct ! * It is not valid to to use T_ALLOPT with @@ -1173,7 +1164,7 @@ do_options_second_pass(queue_t *q, mblk_t *reqmp, mblk_t *ack_mp, cred_t *cr, (opt->status == T_NOTSUPPORT) || (opt->status == T_FAILURE) || ((tor->MGMT_flags & (T_NEGOTIATE|T_CHECK)) && - (opt->status == T_READONLY)); + (opt->status == T_READONLY)); if (failed_option) { /* @@ -1475,7 +1466,7 @@ do_opt_default(queue_t *q, struct T_opthdr *reqopt, uchar_t **resptrp, topth->status = T_FAILURE; *worst_statusp = get_worst_status(T_FAILURE, - *worst_statusp); + *worst_statusp); } } else { /* @@ -1729,8 +1720,8 @@ do_opt_check_or_negotiate(queue_t *q, struct T_opthdr *reqopt, if (optd->opdes_props & OP_DEF_FN) { if ((optd->opdes_props & OP_VARLEN) || ((optsize = (*deffn)(q, reqopt->level, - optd->opdes_name, - (uchar_t *)optd->opdes_defbuf)) < 0)) { + optd->opdes_name, + (uchar_t *)optd->opdes_defbuf)) < 0)) { /* XXX - skip these too */ topth->status = T_SUCCESS; continue; /* skip setting */ diff --git a/usr/src/uts/common/inet/optcom.h b/usr/src/uts/common/inet/optcom.h index 89cc75c6fd..1d2d1cb09d 100644 --- a/usr/src/uts/common/inet/optcom.h +++ b/usr/src/uts/common/inet/optcom.h @@ -204,23 +204,13 @@ typedef struct opt_restart_s { #define SETFN_CONN_NEGOTIATE 4 /* semantics for T_CONN_*_REQ */ /* - * Object to represent database of options to search passed to - * {sock,tpi}optcom_req() interface routine to take care of option - * management and associated methods. - */ -extern optdb_obj_t tcp_opt_obj; -extern optdb_obj_t udp_opt_obj; -extern optdb_obj_t ip_opt_obj; - -extern uint_t tcp_max_optsize; -extern uint_t udp_max_optsize; - -/* * Function prototypes */ extern void optcom_err_ack(queue_t *, mblk_t *, t_scalar_t, int); -extern int svr4_optcom_req(queue_t *, mblk_t *, cred_t *, optdb_obj_t *); -extern int tpi_optcom_req(queue_t *, mblk_t *, cred_t *, optdb_obj_t *); +extern int svr4_optcom_req(queue_t *, mblk_t *, cred_t *, optdb_obj_t *, + boolean_t); +extern int tpi_optcom_req(queue_t *, mblk_t *, cred_t *, optdb_obj_t *, + boolean_t); extern int tpi_optcom_buf(queue_t *, mblk_t *, t_scalar_t *, t_scalar_t, cred_t *, optdb_obj_t *, void *, int *); extern t_uscalar_t optcom_max_optsize(opdes_t *, uint_t); diff --git a/usr/src/uts/common/inet/rawip_impl.h b/usr/src/uts/common/inet/rawip_impl.h index d323b66517..58f29e8a3c 100644 --- a/usr/src/uts/common/inet/rawip_impl.h +++ b/usr/src/uts/common/inet/rawip_impl.h @@ -68,28 +68,40 @@ typedef struct icmp_stack icmp_stack_t; /* Internal icmp control structure, one per open stream */ typedef struct icmp_s { + krwlock_t icmp_rwlock; /* Protects most of icmp_t */ + t_scalar_t icmp_pending_op; /* The current TPI operation */ + /* + * Following fields up to icmp_ipversion protected by conn_lock. + */ uint_t icmp_state; /* TPI state */ in6_addr_t icmp_v6src; /* Source address of this stream */ in6_addr_t icmp_bound_v6src; /* Explicitely bound to address */ in6_addr_t icmp_v6dst; /* Connected destination */ - uint32_t icmp_flowinfo; /* Connected flow id and tclass */ - uint32_t icmp_max_hdr_len; /* For write offset in stream head */ - sa_family_t icmp_family; /* Family from socket() call */ /* * IP format that packets transmitted from this struct should use. * Value can be IP4_VERSION or IPV6_VERSION. */ - uchar_t icmp_ipversion; + uchar_t icmp_ipversion; + + /* Written to only once at the time of opening the endpoint */ + sa_family_t icmp_family; /* Family from socket() call */ + /* Following protected by icmp_rwlock */ + uint32_t icmp_flowinfo; /* Connected flow id and tclass */ + uint32_t icmp_max_hdr_len; /* For write offset in stream head */ uint_t icmp_proto; uint_t icmp_ip_snd_options_len; /* Len of IPv4 options */ uint8_t *icmp_ip_snd_options; /* Ptr to IPv4 options */ uint8_t icmp_multicast_ttl; /* IP*_MULTICAST_TTL/HOPS */ ipaddr_t icmp_multicast_if_addr; /* IP_MULTICAST_IF option */ uint_t icmp_multicast_if_index; /* IPV6_MULTICAST_IF option */ - int icmp_bound_if; /* IP*_BOUND_IF option */ int icmp_xmit_if; /* IP_XMIT_IF option */ + int icmp_bound_if; /* IP*_BOUND_IF option */ + + /* Written to only once at the time of opening the endpoint */ + conn_t *icmp_connp; + /* Following protected by icmp_rwlock */ uint_t icmp_debug : 1, /* SO_DEBUG "socket" option. */ icmp_dontroute : 1, /* SO_DONTROUTE "socket" option. */ @@ -97,38 +109,34 @@ typedef struct icmp_s { icmp_reuseaddr : 1, /* SO_REUSEADDR "socket" option. */ icmp_useloopback : 1, /* SO_USELOOPBACK "socket" option. */ - icmp_multicast_loop : 1, /* IP_MULTICAST_LOOP option */ icmp_hdrincl : 1, /* IP_HDRINCL option + RAW and IGMP */ icmp_dgram_errind : 1, /* SO_DGRAM_ERRIND option */ - - icmp_discon_pending : 1, /* T_DISCON_REQ in progress */ icmp_unspec_source : 1, /* IP*_UNSPEC_SRC option */ + icmp_raw_checksum : 1, /* raw checksum per IPV6_CHECKSUM */ icmp_no_tp_cksum : 1, /* icmp_proto is UDP or TCP */ - icmp_ip_recvpktinfo : 1, /* IPV[4,6]_RECVPKTINFO option */ icmp_ipv6_recvhoplimit : 1, /* IPV6_RECVHOPLIMIT option */ + icmp_ipv6_recvhopopts : 1, /* IPV6_RECVHOPOPTS option */ icmp_ipv6_recvdstopts : 1, /* IPV6_RECVDSTOPTS option */ - icmp_ipv6_recvrthdr : 1, /* IPV6_RECVRTHDR option */ icmp_ipv6_recvpathmtu : 1, /* IPV6_RECVPATHMTU option */ + icmp_recvif:1, /* IP_RECVIF for raw sockets option */ icmp_ipv6_recvtclass : 1, /* IPV6_RECVTCLASS option */ - - icmp_restricted : 1, /* opened by non-privileged user */ icmp_ipv6_recvrtdstopts : 1, /* Obsolete IPV6_RECVRTHDRDSTOPTS */ icmp_old_ipv6_recvdstopts : 1, /* Old ver of IPV6_RECVDSTOPTS */ + icmp_timestamp : 1, /* SO_TIMESTAMP "socket" option */ icmp_mac_exempt : 1, /* SO_MAC_EXEMPT option */ - icmp_pad_to_bit_31: 7; + icmp_pad_to_bit_31: 10; uint8_t icmp_type_of_service; uint8_t icmp_ttl; /* TTL or hoplimit */ uint32_t icmp_checksum_off; /* user supplied checksum offset */ icmp6_filter_t *icmp_filter; /* ICMP6_FILTER option */ - cred_t *icmp_credp; /* Opener's credentials */ ip6_pkt_t icmp_sticky_ipp; /* Sticky options */ uint8_t *icmp_sticky_hdrs; /* Prebuilt IPv6 hdrs */ @@ -137,10 +145,22 @@ typedef struct icmp_s { uint_t icmp_label_len; /* length of security label */ uint_t icmp_label_len_v6; /* sec. part of sticky opt */ in6_addr_t icmp_v6lastdst; /* most recent destination */ - mblk_t *icmp_delabel; /* send this on close */ icmp_stack_t *icmp_is; /* Stack instance */ } icmp_t; -#define icmp_rawip_mib icmp_is->is_rawip_mib + +/* + * Object to represent database of options to search passed to + * {sock,tpi}optcom_req() interface routine to take care of option + * management and associated methods. + */ +extern optdb_obj_t icmp_opt_obj; +extern uint_t icmp_max_optsize; + +extern mblk_t *icmp_snmp_get(queue_t *q, mblk_t *mpctl); +extern void rawip_resume_bind(conn_t *, mblk_t *); + +extern void icmp_ddi_init(void); +extern void icmp_ddi_destroy(void); #endif /* _KERNEL */ diff --git a/usr/src/uts/common/inet/rts_impl.h b/usr/src/uts/common/inet/rts_impl.h new file mode 100644 index 0000000000..f89d1ec82c --- /dev/null +++ b/usr/src/uts/common/inet/rts_impl.h @@ -0,0 +1,110 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ +/* Copyright (c) 1990 Mentat Inc. */ + +#ifndef _RTS_IMPL_H +#define _RTS_IMPL_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef _KERNEL + +#include <sys/types.h> +#include <sys/netstack.h> + +#include <netinet/in.h> +#include <netinet/icmp6.h> +#include <netinet/ip6.h> + +#include <inet/common.h> +#include <inet/ip.h> + +/* Named Dispatch Parameter Management Structure */ +typedef struct rtsparam_s { + uint_t rts_param_min; + uint_t rts_param_max; + uint_t rts_param_value; + char *rts_param_name; +} rtsparam_t; + +/* + * RTS stack instances + */ +struct rts_stack { + netstack_t *rtss_netstack; /* Common netstack */ + + caddr_t rtss_g_nd; + rtsparam_t *rtss_params; +}; +typedef struct rts_stack rts_stack_t; + +/* Internal routing socket stream control structure, one per open stream */ +typedef struct rts_s { + krwlock_t rts_rwlock; /* Protects most of rts_t */ + uint_t rts_state; /* Provider interface state */ + uint_t rts_error; /* Routing socket error code */ + uint_t rts_flag; /* Pending I/O state */ + uint_t rts_proto; /* SO_PROTOTYPE "socket" option. */ + uint_t rts_debug : 1, /* SO_DEBUG "socket" option. */ + rts_dontroute : 1, /* SO_DONTROUTE "socket" option. */ + rts_broadcast : 1, /* SO_BROADCAST "socket" option. */ + rts_reuseaddr : 1, /* SO_REUSEADDR "socket" option. */ + rts_useloopback : 1, /* SO_USELOOPBACK "socket" option. */ + rts_multicast_loop : 1, /* IP_MULTICAST_LOOP option */ + rts_hdrincl : 1, /* IP_HDRINCL option + RAW and IGMP */ + + : 0; + rts_stack_t *rts_rtss; + + /* Written to only once at the time of opening the endpoint */ + conn_t *rts_connp; +} rts_t; + +#define RTS_WPUT_PENDING 0x1 /* Waiting for write-side to complete */ +#define RTS_WRW_PENDING 0x2 /* Routing socket write in progress */ + +/* + * Object to represent database of options to search passed to + * {sock,tpi}optcom_req() interface routine to take care of option + * management and associated methods. + * XXX. These and other externs should really move to a rts header. + */ +extern optdb_obj_t rts_opt_obj; +extern uint_t rts_max_optsize; + +extern void rts_ddi_init(void); +extern void rts_ddi_destroy(void); + +#endif /* _KERNEL */ + +#ifdef __cplusplus +} +#endif + +#endif /* _RTS_IMPL_H */ diff --git a/usr/src/uts/common/inet/sctp/sctp.c b/usr/src/uts/common/inet/sctp/sctp.c index 0ace30dfd2..a5f277db08 100644 --- a/usr/src/uts/common/inet/sctp/sctp.c +++ b/usr/src/uts/common/inet/sctp/sctp.c @@ -73,9 +73,6 @@ #include "sctp_addr.h" #include "sctp_asconf.h" -extern major_t SCTP6_MAJ; -extern major_t SCTP_MAJ; - int sctpdebug; sin6_t sctp_sin6_null; /* Zero address for quick clears */ @@ -1568,7 +1565,6 @@ sctp_g_q_setup(sctp_stack_t *sctps) mutex_exit(&sctps->sctps_g_q_lock); } -major_t IP_MAJ; #define IP "ip" #define SCTP6DEV "/devices/pseudo/sctp6@0:sctp6" @@ -1584,12 +1580,15 @@ sctp_g_q_create(sctp_stack_t *sctps) ldi_ident_t li = NULL; int rval; cred_t *cr; + major_t IP_MAJ; #ifdef NS_DEBUG (void) printf("sctp_g_q_create()for stack %d\n", sctps->sctps_netstack->netstack_stackid); #endif + IP_MAJ = ddi_name_to_major(IP); + ASSERT(sctps->sctps_g_q_creator == curthread); error = ldi_ident_from_major(IP_MAJ, &li); @@ -1686,6 +1685,9 @@ sctp_g_q_close(void *arg) ldi_handle_t lh = NULL; ldi_ident_t li = NULL; cred_t *cr; + major_t IP_MAJ; + + IP_MAJ = ddi_name_to_major(IP); lh = sctps->sctps_g_q_lh; if (lh == NULL) @@ -1756,8 +1758,6 @@ sctp_g_q_inactive(sctp_stack_t *sctps) void sctp_ddi_g_init(void) { - IP_MAJ = ddi_name_to_major(IP); - /* Create sctp_t/conn_t cache */ sctp_conn_cache_init(); diff --git a/usr/src/uts/common/inet/sctp/sctp6ddi.c b/usr/src/uts/common/inet/sctp/sctp6ddi.c index 4b8e2da932..db262a10ed 100644 --- a/usr/src/uts/common/inet/sctp/sctp6ddi.c +++ b/usr/src/uts/common/inet/sctp/sctp6ddi.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -34,17 +33,19 @@ #define INET_NAME "sctp6" #define INET_DEVDESC "SCTP6 device" -/* We are IP... */ -#define INET_STRTAB ipinfo -#define INET_DEVMINOR IPV6_MINOR -#define INET_DEVMTFLAGS IP_DEVMTFLAGS +#define INET_DEVSTRTAB sctpinfo +#define INET_DEVMINOR 0 +#define INET_DEVMTFLAGS D_MP #include "../inetddi.c" int _init(void) { - /* Since we are IP, all initialization are done in ip_ddi_init(). */ + /* + * device initialization happens when the actual code containing + * module (/kernel/drv/ip) is loaded, and driven from ip_ddi_init() + */ return (mod_install(&modlinkage)); } diff --git a/usr/src/uts/common/inet/sctp/sctp_common.c b/usr/src/uts/common/inet/sctp/sctp_common.c index 0a3f66085d..afa613603d 100644 --- a/usr/src/uts/common/inet/sctp/sctp_common.c +++ b/usr/src/uts/common/inet/sctp/sctp_common.c @@ -558,6 +558,7 @@ sctp_add_faddr(sctp_t *sctp, in6_addr_t *addr, int sleep, boolean_t first) if (faddr->ire != NULL && faddr->ire->ire_type & IRE_BROADCAST) { IRE_REFRELE_NOTR(faddr->ire); sctp_timer_free(timer_mp); + faddr->timer_mp = NULL; kmem_cache_free(sctp_kmem_faddr_cache, faddr); return (EADDRNOTAVAIL); } diff --git a/usr/src/uts/common/inet/sctp/sctp_ioc.c b/usr/src/uts/common/inet/sctp/sctp_ioc.c index 6fa9abc632..4cb837253a 100644 --- a/usr/src/uts/common/inet/sctp/sctp_ioc.c +++ b/usr/src/uts/common/inet/sctp/sctp_ioc.c @@ -66,9 +66,6 @@ sctp_def_q_set(queue_t *q, mblk_t *mp) sctp_stack_t *sctps = connp->conn_netstack-> netstack_sctp; - ASSERT(connp != NULL && connp->conn_ulp == IPPROTO_SCTP && - connp->conn_rq == NULL); - if ((mp1 = mp->b_cont) == NULL) { iocp->ioc_error = EINVAL; ip0dbg(("sctp_def_q_set: no file descriptor\n")); @@ -160,3 +157,169 @@ err_ret: mp->b_datap->db_type = M_IOCNAK; qreply(q, mp); } + +/* + * A SCTP streams driver which is there just to handle ioctls on /dev/sctp. + */ +static int sctp_str_close(queue_t *); +static int sctp_str_open(queue_t *, dev_t *, int, int, cred_t *); + +static struct module_info sctp_mod_info = { + 5711, "sctp", 1, INFPSZ, 512, 128 +}; + +static struct qinit sctprinit = { + NULL, NULL, sctp_str_open, sctp_str_close, NULL, &sctp_mod_info +}; + +static struct qinit sctpwinit = { + (pfi_t)sctp_wput, NULL, NULL, NULL, NULL, &sctp_mod_info +}; + +struct streamtab sctpinfo = { + &sctprinit, &sctpwinit +}; + +static int +sctp_str_close(queue_t *q) +{ + conn_t *connp = Q_TO_CONN(q); + + qprocsoff(connp->conn_rq); + + ASSERT(connp->conn_ref == 1); + + inet_minor_free(ip_minor_arena, connp->conn_dev); + + q->q_ptr = WR(q)->q_ptr = NULL; + CONN_DEC_REF(connp); + + return (0); +} + +/*ARGSUSED2*/ +static int +sctp_str_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) +{ + conn_t *connp; + major_t maj; + netstack_t *ns; + zoneid_t zoneid; + + /* If the stream is already open, return immediately. */ + if (q->q_ptr != NULL) + return (0); + + /* If this is not a driver open, fail. */ + if (sflag == MODOPEN) + return (EINVAL); + + ns = netstack_find_by_cred(credp); + ASSERT(ns != NULL); + + /* + * For exclusive stacks we set the zoneid to zero + * to make IP operate as if in the global zone. + */ + if (ns->netstack_stackid != GLOBAL_NETSTACKID) + zoneid = GLOBAL_ZONEID; + else + zoneid = crgetzoneid(credp); + + /* + * We are opening as a device. This is an IP client stream, and we + * allocate an conn_t as the instance data. + */ + connp = ipcl_conn_create(IPCL_IPCCONN, KM_SLEEP, ns); + + /* + * ipcl_conn_create did a netstack_hold. Undo the hold that was + * done by netstack_find_by_cred() + */ + netstack_rele(ns); + + connp->conn_zoneid = zoneid; + + connp->conn_rq = q; + connp->conn_wq = WR(q); + q->q_ptr = WR(q)->q_ptr = connp; + + if ((connp->conn_dev = inet_minor_alloc(ip_minor_arena)) == 0) { + /* CONN_DEC_REF takes care of netstack_rele() */ + q->q_ptr = WR(q)->q_ptr = NULL; + CONN_DEC_REF(connp); + return (EBUSY); + } + + maj = getemajor(*devp); + *devp = makedevice(maj, (minor_t)connp->conn_dev); + + /* + * connp->conn_cred is crfree()ed in ipcl_conn_destroy() + */ + connp->conn_cred = credp; + crhold(connp->conn_cred); + + /* + * Make the conn globally visible to walkers + */ + mutex_enter(&connp->conn_lock); + connp->conn_state_flags &= ~CONN_INCIPIENT; + mutex_exit(&connp->conn_lock); + ASSERT(connp->conn_ref == 1); + + qprocson(q); + + return (0); +} + + +/* + * The SCTP write put procedure which is used only to handle ioctls. + */ +void +sctp_wput(queue_t *q, mblk_t *mp) +{ + uchar_t *rptr; + t_scalar_t type; + + switch (mp->b_datap->db_type) { + case M_IOCTL: + sctp_wput_ioctl(q, mp); + break; + case M_DATA: + /* Should be handled in sctp_output() */ + ASSERT(0); + freemsg(mp); + break; + case M_PROTO: + case M_PCPROTO: + rptr = mp->b_rptr; + if ((mp->b_wptr - rptr) >= sizeof (t_scalar_t)) { + type = ((union T_primitives *)rptr)->type; + /* + * There is no "standard" way on how to respond + * to T_CAPABILITY_REQ if a module does not + * understand it. And the current TI mod + * has problems handling an error ack. So we + * catch the request here and reply with a response + * which the TI mod knows how to respond to. + */ + switch (type) { + case T_CAPABILITY_REQ: + (void) putnextctl1(RD(q), M_ERROR, EPROTO); + break; + default: + if ((mp = mi_tpi_err_ack_alloc(mp, + TNOTSUPPORT, 0)) != NULL) { + qreply(q, mp); + return; + } + } + } + /* FALLTHRU */ + default: + freemsg(mp); + return; + } +} diff --git a/usr/src/uts/common/inet/sctp/sctp_output.c b/usr/src/uts/common/inet/sctp/sctp_output.c index 7bcb6277fe..3d5de70453 100644 --- a/usr/src/uts/common/inet/sctp/sctp_output.c +++ b/usr/src/uts/common/inet/sctp/sctp_output.c @@ -2013,56 +2013,6 @@ restart_timer: } /* - * The SCTP write put procedure called from IP. - */ -void -sctp_wput(queue_t *q, mblk_t *mp) -{ - uchar_t *rptr; - t_scalar_t type; - - switch (mp->b_datap->db_type) { - case M_IOCTL: - sctp_wput_ioctl(q, mp); - break; - case M_DATA: - /* Should be handled in sctp_output() */ - ASSERT(0); - freemsg(mp); - break; - case M_PROTO: - case M_PCPROTO: - rptr = mp->b_rptr; - if ((mp->b_wptr - rptr) >= sizeof (t_scalar_t)) { - type = ((union T_primitives *)rptr)->type; - /* - * There is no "standard" way on how to respond - * to T_CAPABILITY_REQ if a module does not - * understand it. And the current TI mod - * has problems handling an error ack. So we - * catch the request here and reply with a response - * which the TI mod knows how to respond to. - */ - switch (type) { - case T_CAPABILITY_REQ: - (void) putnextctl1(RD(q), M_ERROR, EPROTO); - break; - default: - if ((mp = mi_tpi_err_ack_alloc(mp, - TNOTSUPPORT, 0)) != NULL) { - qreply(q, mp); - return; - } - } - } - /* FALLTHRU */ - default: - freemsg(mp); - return; - } -} - -/* * This function is called by sctp_ss_rexmit() to create a packet * to be retransmitted to the given fp. The given meta and mp * parameters are respectively the sctp_msg_hdr_t and the mblk of the diff --git a/usr/src/uts/common/inet/sctp/sctpddi.c b/usr/src/uts/common/inet/sctp/sctpddi.c index 3f57433836..16b8551712 100644 --- a/usr/src/uts/common/inet/sctp/sctpddi.c +++ b/usr/src/uts/common/inet/sctp/sctpddi.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -34,17 +33,19 @@ #define INET_NAME "sctp" #define INET_DEVDESC "SCTP device" -/* We are IP... */ -#define INET_STRTAB ipinfo -#define INET_DEVMINOR IPV4_MINOR -#define INET_DEVMTFLAGS IP_DEVMTFLAGS +#define INET_DEVSTRTAB sctpinfo +#define INET_DEVMINOR 0 +#define INET_DEVMTFLAGS D_MP #include "../inetddi.c" int _init(void) { - /* Since we are IP, all initialization are done in ip_ddi_init(). */ + /* + * device initialization happens when the actual code containing + * module (/kernel/drv/ip) is loaded, and driven from ip_ddi_init() + */ return (mod_install(&modlinkage)); } diff --git a/usr/src/uts/common/inet/snmpcom.c b/usr/src/uts/common/inet/snmpcom.c index 39d8ed5b32..09900e1134 100644 --- a/usr/src/uts/common/inet/snmpcom.c +++ b/usr/src/uts/common/inet/snmpcom.c @@ -51,9 +51,6 @@ #include <inet/snmpcom.h> #include <inet/ip.h> -#include <inet/ip6.h> -#include <inet/tcp.h> -#include <inet/udp_impl.h> #define DEFAULT_LENGTH sizeof (long) #define DATA_MBLK_SIZE 1024 @@ -180,7 +177,6 @@ snmpcom_req(queue_t *q, mblk_t *mp, pfi_t setfn, pfi_t getfn, cred_t *credp) sor_t *sreq; struct T_optmgmt_req *tor = (struct T_optmgmt_req *)mp->b_rptr; struct T_optmgmt_ack *toa; - boolean_t pass_to_ip = B_FALSE; if (mp->b_cont) { /* don't deal with multiple mblk's */ freemsg(mp->b_cont); @@ -190,7 +186,7 @@ snmpcom_req(queue_t *q, mblk_t *mp, pfi_t setfn, pfi_t getfn, cred_t *credp) } if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_optmgmt_req) || !(req_start = (struct opthdr *)mi_offset_param(mp, - tor->OPT_offset, tor->OPT_length))) + tor->OPT_offset, tor->OPT_length))) goto bad_req1; if (! __TPI_OPT_ISALIGNED(req_start)) goto bad_req1; @@ -201,15 +197,11 @@ snmpcom_req(queue_t *q, mblk_t *mp, pfi_t setfn, pfi_t getfn, cred_t *credp) * calling module to process or ignore as it sees fit. */ if ((!(req_start->level >= MIB2_RANGE_START && - req_start->level <= MIB2_RANGE_END)) && + req_start->level <= MIB2_RANGE_END)) && (!(req_start->level >= EXPER_RANGE_START && - req_start->level <= EXPER_RANGE_END))) + req_start->level <= EXPER_RANGE_END))) return (B_FALSE); - if (setfn == tcp_snmp_set || setfn == udp_snmp_set || - getfn == tcp_snmp_get || getfn == udp_snmp_get) - pass_to_ip = B_TRUE; - switch (tor->MGMT_flags) { case T_NEGOTIATE: @@ -218,11 +210,11 @@ snmpcom_req(queue_t *q, mblk_t *mp, pfi_t setfn, pfi_t getfn, cred_t *credp) return (B_TRUE); } req_end = (struct opthdr *)((uchar_t *)req_start + - tor->OPT_length); + tor->OPT_length); for (req = req_start; req < req_end; req = next_req) { next_req = - (struct opthdr *)((uchar_t *)&req[1] + - _TPI_ALIGN_OPT(req->len)); + (struct opthdr *)((uchar_t *)&req[1] + + _TPI_ALIGN_OPT(req->len)); if (next_req > req_end) goto bad_req2; for (sreq = req_arr; sreq < A_END(req_arr); sreq++) { @@ -233,13 +225,11 @@ snmpcom_req(queue_t *q, mblk_t *mp, pfi_t setfn, pfi_t getfn, cred_t *credp) if (sreq >= A_END(req_arr)) goto bad_req3; if (!(*setfn)(q, req->level, req->name, - (uchar_t *)&req[1], req->len)) + (uchar_t *)&req[1], req->len)) goto bad_req4; } if (q->q_next != NULL) putnext(q, mp); - else if (pass_to_ip) - ip_output(Q_TO_CONN(q), mp, q, IP_WPUT); else freemsg(mp); return (B_TRUE); @@ -264,7 +254,7 @@ snmpcom_req(queue_t *q, mblk_t *mp, pfi_t setfn, pfi_t getfn, cred_t *credp) toa->OPT_offset = sizeof (struct T_optmgmt_ack); toa->OPT_length = sizeof (struct opthdr); toa->MGMT_flags = T_SUCCESS; - if (!(*getfn)(q, mpctl)) + if (!(*getfn)(q, mpctl, req_start->level)) freemsg(mpctl); /* * all data for this module has now been sent upstream. If @@ -274,9 +264,6 @@ snmpcom_req(queue_t *q, mblk_t *mp, pfi_t setfn, pfi_t getfn, cred_t *credp) if (q->q_next != NULL) { putnext(q, mp); return (B_TRUE); - } else if (pass_to_ip) { - ip_output(Q_TO_CONN(q), mp, q, IP_WPUT); - return (B_TRUE); } if (mp->b_cont) { freemsg(mp->b_cont); diff --git a/usr/src/uts/common/inet/snmpcom.h b/usr/src/uts/common/inet/snmpcom.h index f922ed8257..383f7faa2d 100644 --- a/usr/src/uts/common/inet/snmpcom.h +++ b/usr/src/uts/common/inet/snmpcom.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 1992,1997,2001-2003 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* Copyright (c) 1990 Mentat Inc. */ @@ -41,7 +40,7 @@ extern int snmp_append_data2(mblk_t *mpdata, mblk_t **last_mpp, char *blob, int len); extern boolean_t snmpcom_req(queue_t *q, mblk_t *mp, pfi_t setfn, - pfi_t getfn, cred_t *cr); + pfi_t getfn, cred_t *cr); #endif /* defined(_KERNEL) && defined(__STDC__) */ diff --git a/usr/src/uts/common/inet/tcp.h b/usr/src/uts/common/inet/tcp.h index 79434cc8fb..aa5ba3a075 100644 --- a/usr/src/uts/common/inet/tcp.h +++ b/usr/src/uts/common/inet/tcp.h @@ -653,7 +653,7 @@ extern void tcp_input(void *arg, mblk_t *mp, void *arg2); extern void tcp_rput_data(void *arg, mblk_t *mp, void *arg2); extern void *tcp_get_conn(void *arg, tcp_stack_t *); extern void tcp_time_wait_collector(void *arg); -extern int tcp_snmp_get(queue_t *, mblk_t *); +extern mblk_t *tcp_snmp_get(queue_t *, mblk_t *); extern int tcp_snmp_set(queue_t *, int, int, uchar_t *, int len); extern mblk_t *tcp_xmit_mp(tcp_t *tcp, mblk_t *mp, int32_t max_to_send, int32_t *offset, mblk_t **end_mp, uint32_t seq, diff --git a/usr/src/uts/common/inet/tcp/tcp.c b/usr/src/uts/common/inet/tcp/tcp.c index 661ac5d71e..2d77bd0033 100644 --- a/usr/src/uts/common/inet/tcp/tcp.c +++ b/usr/src/uts/common/inet/tcp/tcp.c @@ -234,8 +234,6 @@ const char tcp_version[] = "%Z%%M% %I% %E% SMI"; * packets of the meta buffer are send to the IP path one by one. */ -extern major_t TCP6_MAJ; - /* * Values for squeue switch: * 1: squeue_enter_nodrain @@ -909,11 +907,11 @@ static int tcp_conn_create_v4(conn_t *lconnp, conn_t *connp, ipha_t *ipha, tcph_t *tcph, mblk_t *idmp); static squeue_func_t tcp_squeue_switch(int); -static int tcp_open(queue_t *, dev_t *, int, int, cred_t *); +static int tcp_open(queue_t *, dev_t *, int, int, cred_t *, boolean_t); +static int tcp_openv4(queue_t *, dev_t *, int, int, cred_t *); +static int tcp_openv6(queue_t *, dev_t *, int, int, cred_t *); static int tcp_close(queue_t *, int); static int tcpclose_accept(queue_t *); -static int tcp_modclose(queue_t *); -static void tcp_wput_mod(queue_t *, mblk_t *); static void tcp_squeue_add(squeue_t *); static boolean_t tcp_zcopy_check(tcp_t *); @@ -965,24 +963,16 @@ static struct module_info tcp_winfo = { }; /* - * Entry points for TCP as a module. It only allows SNMP requests - * to pass through. - */ -struct qinit tcp_mod_rinit = { - (pfi_t)putnext, NULL, tcp_open, ip_snmpmod_close, NULL, &tcp_rinfo, -}; - -struct qinit tcp_mod_winit = { - (pfi_t)ip_snmpmod_wput, NULL, tcp_open, ip_snmpmod_close, NULL, - &tcp_rinfo -}; - -/* * Entry points for TCP as a device. The normal case which supports * the TCP functionality. + * We have separate open functions for the /dev/tcp and /dev/tcp6 devices. */ -struct qinit tcp_rinit = { - NULL, (pfi_t)tcp_rsrv, tcp_open, tcp_close, NULL, &tcp_rinfo +struct qinit tcp_rinitv4 = { + NULL, (pfi_t)tcp_rsrv, tcp_openv4, tcp_close, NULL, &tcp_rinfo +}; + +struct qinit tcp_rinitv6 = { + NULL, (pfi_t)tcp_rsrv, tcp_openv6, tcp_close, NULL, &tcp_rinfo }; struct qinit tcp_winit = { @@ -1009,14 +999,22 @@ struct qinit tcp_acceptor_winit = { /* * Entry points for TCP loopback (read side only) + * The open routine is only used for reopens, thus no need to + * have a separate one for tcp_openv6. */ struct qinit tcp_loopback_rinit = { - (pfi_t)0, (pfi_t)tcp_rsrv, tcp_open, tcp_close, (pfi_t)0, + (pfi_t)0, (pfi_t)tcp_rsrv, tcp_openv4, tcp_close, (pfi_t)0, &tcp_rinfo, NULL, tcp_fuse_rrw, tcp_fuse_rinfop, STRUIOT_STANDARD }; -struct streamtab tcpinfo = { - &tcp_rinit, &tcp_winit +/* For AF_INET aka /dev/tcp */ +struct streamtab tcpinfov4 = { + &tcp_rinitv4, &tcp_winit +}; + +/* For AF_INET6 aka /dev/tcp6 */ +struct streamtab tcpinfov6 = { + &tcp_rinitv6, &tcp_winit }; /* @@ -1599,16 +1597,16 @@ tcp_ipsec_cleanup(tcp_t *tcp) { conn_t *connp = tcp->tcp_connp; - if (connp->conn_flags & IPCL_TCPCONN) { - if (connp->conn_latch != NULL) { - IPLATCH_REFRELE(connp->conn_latch, - connp->conn_netstack); - connp->conn_latch = NULL; - } - if (connp->conn_policy != NULL) { - IPPH_REFRELE(connp->conn_policy, connp->conn_netstack); - connp->conn_policy = NULL; - } + ASSERT(connp->conn_flags & IPCL_TCPCONN); + + if (connp->conn_latch != NULL) { + IPLATCH_REFRELE(connp->conn_latch, + connp->conn_netstack); + connp->conn_latch = NULL; + } + if (connp->conn_policy != NULL) { + IPPH_REFRELE(connp->conn_policy, connp->conn_netstack); + connp->conn_policy = NULL; } } @@ -1681,11 +1679,16 @@ tcp_cleanup(tcp_t *tcp) tcp_iphc_len = tcp->tcp_iphc_len; tcp_hdr_grown = tcp->tcp_hdr_grown; - if (connp->conn_cred != NULL) + if (connp->conn_cred != NULL) { crfree(connp->conn_cred); - if (connp->conn_peercred != NULL) + connp->conn_cred = NULL; + } + if (connp->conn_peercred != NULL) { crfree(connp->conn_peercred); - bzero(connp, sizeof (conn_t)); + connp->conn_peercred = NULL; + } + ipcl_conn_cleanup(connp); + connp->conn_flags = IPCL_TCPCONN; bzero(tcp, sizeof (tcp_t)); /* restore the state */ @@ -1696,14 +1699,13 @@ tcp_cleanup(tcp_t *tcp) tcp->tcp_iphc_len = tcp_iphc_len; tcp->tcp_hdr_grown = tcp_hdr_grown; - tcp->tcp_connp = connp; - connp->conn_tcp = tcp; - connp->conn_flags = IPCL_TCPCONN; + ASSERT(connp->conn_tcp == tcp); + ASSERT(connp->conn_flags & IPCL_TCPCONN); connp->conn_state_flags = CONN_INCIPIENT; - connp->conn_ulp = IPPROTO_TCP; - connp->conn_ref = 1; + ASSERT(connp->conn_ulp == IPPROTO_TCP); + ASSERT(connp->conn_ref == 1); } /* @@ -2448,7 +2450,6 @@ tcp_accept_swap(tcp_t *listener, tcp_t *acceptor, tcp_t *eager) econnp->conn_multicast_loop = aconnp->conn_multicast_loop; econnp->conn_af_isv6 = aconnp->conn_af_isv6; econnp->conn_pkt_isv6 = aconnp->conn_pkt_isv6; - econnp->conn_ulp = aconnp->conn_ulp; /* Done with old IPC. Drop its ref on its connp */ CONN_DEC_REF(aconnp); @@ -3004,7 +3005,7 @@ tcp_bind(tcp_t *tcp, mblk_t *mp) uint_t origipversion; int err; queue_t *q = tcp->tcp_wq; - conn_t *connp; + conn_t *connp = tcp->tcp_connp; mlp_type_t addrtype, mlptype; zone_t *zone; cred_t *cr; @@ -3208,7 +3209,6 @@ tcp_bind(tcp_t *tcp, mblk_t *mp) * anonymous MLP. */ cr = DB_CREDDEF(mp, tcp->tcp_cred); - connp = tcp->tcp_connp; if (connp->conn_anon_mlp && is_system_labeled()) { zone = crgetzone(cr); addrtype = tsol_mlp_addr_type(zone->zone_id, @@ -3261,7 +3261,6 @@ tcp_bind(tcp_t *tcp, mblk_t *mp) } user_specified = B_TRUE; - connp = tcp->tcp_connp; if (is_system_labeled()) { zone = crgetzone(cr); addrtype = tsol_mlp_addr_type(zone->zone_id, @@ -3434,7 +3433,13 @@ do_bind: /* * We can call ip_bind directly which returns a T_BIND_ACK mp. The * processing continues in tcp_rput_other(). + * + * We need to make sure that the conn_recv is set to a non-null + * value before we insert the conn into the classifier table. + * This is to avoid a race with an incoming packet which does an + * ipcl_classify(). */ + connp->conn_recv = tcp_conn_request; if (tcp->tcp_family == AF_INET6) { ASSERT(tcp->tcp_connp->conn_af_isv6); mp = ip_bind_v6(q, mp, tcp->tcp_connp, &tcp->tcp_sticky_ipp); @@ -4006,7 +4011,6 @@ tcp_close(queue_t *q, int flags) ASSERT(WR(q)->q_next == NULL); ASSERT(connp->conn_ref >= 2); - ASSERT((connp->conn_flags & IPCL_TCPMOD) == 0); /* * We are being closed as /dev/tcp or /dev/tcp6. @@ -6545,6 +6549,14 @@ tcp_connect_ipv4(tcp_t *tcp, mblk_t *mp, ipaddr_t *dstaddrp, in_port_t dstport, sizeof (ipa6_conn_t)); } if (mp1) { + /* + * We need to make sure that the conn_recv is set to a non-null + * value before we insert the conn_t into the classifier table. + * This is to avoid a race with an incoming packet which does + * an ipcl_classify(). + */ + tcp->tcp_connp->conn_recv = tcp_input; + /* Hang onto the T_OK_ACK for later. */ linkb(mp1, mp); mblk_setcred(mp1, tcp->tcp_cred); @@ -6738,6 +6750,14 @@ tcp_connect_ipv6(tcp_t *tcp, mblk_t *mp, in6_addr_t *dstaddrp, } mp1 = tcp_ip_bind_mp(tcp, O_T_BIND_REQ, sizeof (ipa6_conn_t)); if (mp1) { + /* + * We need to make sure that the conn_recv is set to a non-null + * value before we insert the conn_t into the classifier table. + * This is to avoid a race with an incoming packet which does + * an ipcl_classify(). + */ + tcp->tcp_connp->conn_recv = tcp_input; + /* Hang onto the T_OK_ACK for later. */ linkb(mp1, mp); mblk_setcred(mp1, tcp->tcp_cred); @@ -6803,7 +6823,11 @@ tcp_def_q_set(tcp_t *tcp, mblk_t *mp) /* * We are passing tcp_sticky_ipp as NULL * as it is not useful for tcp_default queue + * + * Set conn_recv just in case. */ + tcp->tcp_connp->conn_recv = tcp_conn_request; + mp1 = ip_bind_v6(q, mp1, tcp->tcp_connp, NULL); if (mp1 != NULL) tcp_rput_other(tcp, mp1); @@ -9561,8 +9585,23 @@ tcp_mss_set(tcp_t *tcp, uint32_t mss, boolean_t do_ss) (void) tcp_maxpsz_set(tcp, B_TRUE); } +/* For /dev/tcp aka AF_INET open */ static int -tcp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) +tcp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) +{ + return (tcp_open(q, devp, flag, sflag, credp, B_FALSE)); +} + +/* For /dev/tcp6 aka AF_INET6 open */ +static int +tcp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) +{ + return (tcp_open(q, devp, flag, sflag, credp, B_TRUE)); +} + +static int +tcp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp, + boolean_t isv6) { tcp_t *tcp = NULL; conn_t *connp; @@ -9574,6 +9613,9 @@ tcp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) if (q->q_ptr != NULL) return (0); + if (sflag == MODOPEN) + return (EINVAL); + if (!(flag & SO_ACCEPTOR)) { /* * Special case for install: miniroot needs to be able to @@ -9613,31 +9655,7 @@ tcp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) tcp_g_q_setup(tcps); } } - if (sflag == MODOPEN) { - /* - * This is a special case. The purpose of a modopen - * is to allow just the T_SVR4_OPTMGMT_REQ to pass - * through for MIB browsers. Everything else is failed. - */ - connp = (conn_t *)tcp_get_conn(IP_SQUEUE_GET(lbolt), tcps); - /* tcp_get_conn incremented refcnt */ - netstack_rele(tcps->tcps_netstack); - - if (connp == NULL) - return (ENOMEM); - connp->conn_flags |= IPCL_TCPMOD; - connp->conn_cred = credp; - connp->conn_zoneid = zoneid; - ASSERT(connp->conn_netstack == tcps->tcps_netstack); - ASSERT(connp->conn_netstack->netstack_tcp == tcps); - q->q_ptr = WR(q)->q_ptr = connp; - crhold(credp); - q->q_qinfo = &tcp_mod_rinit; - WR(q)->q_qinfo = &tcp_mod_winit; - qprocson(q); - return (0); - } if ((conn_dev = inet_minor_alloc(ip_minor_arena)) == 0) { if (tcps != NULL) netstack_rele(tcps->tcps_netstack); @@ -9672,7 +9690,7 @@ tcp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) tcp = connp->conn_tcp; q->q_ptr = WR(q)->q_ptr = connp; - if (getmajor(*devp) == TCP6_MAJ) { + if (isv6) { connp->conn_flags |= (IPCL_TCP6|IPCL_ISV6); connp->conn_send = ip_output_v6; connp->conn_af_isv6 = B_TRUE; @@ -9715,7 +9733,7 @@ tcp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) connp->conn_dev = conn_dev; - ASSERT(q->q_qinfo == &tcp_rinit); + ASSERT(q->q_qinfo == &tcp_rinitv4 || q->q_qinfo == &tcp_rinitv6); ASSERT(WR(q)->q_qinfo == &tcp_winit); if (flag & SO_SOCKSTR) { @@ -16212,7 +16230,7 @@ tcp_rwnd_set(tcp_t *tcp, uint32_t rwnd) /* * Return SNMP stuff in buffer in mpdata. */ -int +mblk_t * tcp_snmp_get(queue_t *q, mblk_t *mpctl) { mblk_t *mpdata; @@ -16229,14 +16247,20 @@ tcp_snmp_get(queue_t *q, mblk_t *mpctl) mib2_tcp6ConnEntry_t tce6; mib2_transportMLPEntry_t mlp; connf_t *connfp; - conn_t *connp; int i; boolean_t ispriv; zoneid_t zoneid; int v4_conn_idx; int v6_conn_idx; - tcp_stack_t *tcps = Q_TO_TCP(q)->tcp_tcps; - ip_stack_t *ipst; + conn_t *connp = Q_TO_CONN(q); + tcp_stack_t *tcps; + ip_stack_t *ipst; + mblk_t *mp2ctl; + + /* + * make a copy of the original message + */ + mp2ctl = copymsg(mpctl); if (mpctl == NULL || (mpdata = mpctl->b_cont) == NULL || @@ -16248,9 +16272,14 @@ tcp_snmp_get(queue_t *q, mblk_t *mpctl) freemsg(mp_attr_ctl); freemsg(mp6_conn_ctl); freemsg(mp6_attr_ctl); - return (0); + freemsg(mpctl); + freemsg(mp2ctl); + return (NULL); } + ipst = connp->conn_netstack->netstack_ip; + tcps = connp->conn_netstack->netstack_tcp; + /* build table of connections -- need count in fixed part */ SET_MIB(tcps->tcps_mib.tcpRtoAlgorithm, 4); /* vanj */ SET_MIB(tcps->tcps_mib.tcpRtoMin, tcps->tcps_rexmit_interval_min); @@ -16487,7 +16516,7 @@ tcp_snmp_get(queue_t *q, mblk_t *mpctl) freemsg(mp6_attr_ctl); else qreply(q, mp6_attr_ctl); - return (1); + return (mp2ctl); } /* Return 0 if invalid set request, 1 otherwise, including non-tcp requests */ @@ -18232,7 +18261,7 @@ tcp_wput_accept(queue_t *q, mblk_t *mp) eager->tcp_rq = rq; eager->tcp_wq = q; rq->q_ptr = econnp; - rq->q_qinfo = &tcp_rinit; + rq->q_qinfo = &tcp_rinitv4; /* No open - same as rinitv6 */ q->q_ptr = econnp; q->q_qinfo = &tcp_winit; listener = eager->tcp_listener; @@ -18468,7 +18497,7 @@ tcp_wput(queue_t *q, mblk_t *mp) } if (type == T_SVR4_OPTMGMT_REQ) { cred_t *cr = DB_CREDDEF(mp, tcp->tcp_cred); - if (snmpcom_req(q, mp, tcp_snmp_set, tcp_snmp_get, + if (snmpcom_req(q, mp, tcp_snmp_set, ip_snmp_get, cr)) { /* * This was a SNMP request @@ -22151,8 +22180,8 @@ non_urgent_data: tcp_info_req(tcp, mp); break; case T_SVR4_OPTMGMT_REQ: /* manage options req */ - /* Only IP is allowed to return meaningful value */ - (void) svr4_optcom_req(tcp->tcp_wq, mp, cr, &tcp_opt_obj); + (void) svr4_optcom_req(tcp->tcp_wq, mp, cr, + &tcp_opt_obj, B_TRUE); break; case T_OPTMGMT_REQ: /* @@ -22160,7 +22189,8 @@ non_urgent_data: * T_OPTMGMT_REQ. See comments in ip.c */ /* Only IP is allowed to return meaningful value */ - (void) tpi_optcom_req(tcp->tcp_wq, mp, cr, &tcp_opt_obj); + (void) tpi_optcom_req(tcp->tcp_wq, mp, cr, &tcp_opt_obj, + B_TRUE); break; case T_UNITDATA_REQ: /* unitdata request */ @@ -24888,7 +24918,6 @@ tcp_g_q_setup(tcp_stack_t *tcps) mutex_exit(&tcps->tcps_g_q_lock); } -major_t IP_MAJ; #define IP "ip" #define TCP6DEV "/devices/pseudo/tcp6@0:tcp6" @@ -24904,11 +24933,14 @@ tcp_g_q_create(tcp_stack_t *tcps) ldi_ident_t li = NULL; int rval; cred_t *cr; + major_t IP_MAJ; #ifdef NS_DEBUG (void) printf("tcp_g_q_create()\n"); #endif + IP_MAJ = ddi_name_to_major(IP); + ASSERT(tcps->tcps_g_q_creator == curthread); error = ldi_ident_from_major(IP_MAJ, &li); @@ -25002,6 +25034,9 @@ tcp_g_q_close(void *arg) ldi_handle_t lh = NULL; ldi_ident_t li = NULL; cred_t *cr; + major_t IP_MAJ; + + IP_MAJ = ddi_name_to_major(IP); #ifdef NS_DEBUG (void) printf("tcp_g_q_inactive() for stack %d refcnt %d\n", @@ -25076,8 +25111,6 @@ tcp_g_q_inactive(tcp_stack_t *tcps) void tcp_ddi_g_init(void) { - IP_MAJ = ddi_name_to_major(IP); - tcp_timercache = kmem_cache_create("tcp_timercache", sizeof (tcp_timer_t) + sizeof (mblk_t), 0, NULL, NULL, NULL, NULL, NULL, 0); diff --git a/usr/src/uts/common/inet/tcp/tcp6ddi.c b/usr/src/uts/common/inet/tcp/tcp6ddi.c index 3ccef00029..e724bdd022 100644 --- a/usr/src/uts/common/inet/tcp/tcp6ddi.c +++ b/usr/src/uts/common/inet/tcp/tcp6ddi.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -33,18 +32,15 @@ #include <inet/ip.h> #define INET_NAME "tcp6" -#define INET_STRTAB tcpinfo +#define INET_DEVSTRTAB tcpinfov6 #define INET_DEVDESC "TCP6 STREAMS driver %I%" -#define INET_MODDESC "TCP6 STREAMS module %I%" -#define INET_DEVMINOR TCP_MINOR6 +#define INET_DEVMINOR 0 /* * Note that unlike UDP, TCP uses synchronous STREAMS only * for TCP Fusion (loopback); this is why we don't define - * D_SYNCSTR here. Since TCP as a module is used only for - * SNMP purposes, we define _D_DIRECT for device instance. + * D_SYNCSTR here. */ #define INET_DEVMTFLAGS (D_MP|_D_DIRECT) -#define INET_MODMTFLAGS D_MP #include "../inetddi.c" @@ -52,8 +48,8 @@ int _init(void) { /* - * device initialization occurs in ipddi.c:_init() - * (i.e. it must be called before this routine) + * device initialization happens when the actual code containing + * module (/kernel/drv/ip) is loaded, and driven from ip_ddi_init() */ return (mod_install(&modlinkage)); } diff --git a/usr/src/uts/common/inet/tcp/tcp_fusion.c b/usr/src/uts/common/inet/tcp/tcp_fusion.c index 6d7e1632ea..d04e91aef7 100644 --- a/usr/src/uts/common/inet/tcp/tcp_fusion.c +++ b/usr/src/uts/common/inet/tcp/tcp_fusion.c @@ -35,6 +35,7 @@ #include <sys/tihdr.h> #include <inet/common.h> +#include <inet/optcom.h> #include <inet/ip.h> #include <inet/ip_impl.h> #include <inet/tcp.h> @@ -1143,8 +1144,8 @@ tcp_fuse_syncstr_disable(tcp_t *tcp) * Also restore SR_SIGALLDATA so that strrput() can generate * the signals again for future M_DATA messages. */ - rq->q_qinfo = &tcp_rinit; - rq->q_struiot = tcp_rinit.qi_struiot; + rq->q_qinfo = &tcp_rinitv4; /* No open - same as rinitv6 */ + rq->q_struiot = tcp_rinitv4.qi_struiot; stp->sd_struiordq = NULL; stp->sd_rput_opt |= SR_SIGALLDATA; tcp->tcp_direct_sockfs = B_FALSE; diff --git a/usr/src/uts/common/inet/tcp/tcp_kssl.c b/usr/src/uts/common/inet/tcp/tcp_kssl.c index 5a4d11860a..74bee314b0 100644 --- a/usr/src/uts/common/inet/tcp/tcp_kssl.c +++ b/usr/src/uts/common/inet/tcp/tcp_kssl.c @@ -44,6 +44,7 @@ #include <netinet/tcp.h> #include <inet/common.h> +#include <inet/optcom.h> #include <inet/ipclassifier.h> #include <inet/ip.h> #include <inet/ip6.h> @@ -178,7 +179,7 @@ tcp_kssl_input(tcp_t *tcp, mblk_t *mp) */ kssl_hold_ctx(tcp->tcp_kssl_ctx); *((kssl_ctx_t *)ctxmp->b_rptr) = - tcp->tcp_kssl_ctx; + tcp->tcp_kssl_ctx; ctxmp->b_wptr = ctxmp->b_rptr + sizeof (kssl_ctx_t); diff --git a/usr/src/uts/common/inet/tcp/tcpddi.c b/usr/src/uts/common/inet/tcp/tcpddi.c index 391fc3e65d..436786b846 100644 --- a/usr/src/uts/common/inet/tcp/tcpddi.c +++ b/usr/src/uts/common/inet/tcp/tcpddi.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* Copyright (c) 1990 Mentat Inc. */ @@ -34,18 +33,18 @@ #include <inet/ip.h> #define INET_NAME "tcp" -#define INET_STRTAB tcpinfo +#define INET_MODSTRTAB dummymodinfo +#define INET_DEVSTRTAB tcpinfov4 #define INET_DEVDESC "TCP STREAMS driver %I%" -#define INET_MODDESC "TCP STREAMS module %I%" -#define INET_DEVMINOR TCP_MINOR +#define INET_MODDESC "TCP dummy STREAMS module %I%" +#define INET_DEVMINOR 0 +#define INET_MODMTFLAGS D_MP /* * Note that unlike UDP, TCP uses synchronous STREAMS only * for TCP Fusion (loopback); this is why we don't define - * D_SYNCSTR here. Since TCP as a module is used only for - * SNMP purposes, we define _D_DIRECT for device instance. + * D_SYNCSTR here. */ #define INET_DEVMTFLAGS (D_MP|_D_DIRECT) -#define INET_MODMTFLAGS D_MP #include "../inetddi.c" @@ -53,8 +52,8 @@ int _init(void) { /* - * device initialization occurs in ipddi.c:_init() - * (i.e. it must be called before this routine) + * device initialization happens when the actual code containing + * module (/kernel/drv/ip) is loaded, and driven from ip_ddi_init() */ return (mod_install(&modlinkage)); } diff --git a/usr/src/uts/common/inet/tcp_impl.h b/usr/src/uts/common/inet/tcp_impl.h index cfbbe84e3a..f04d726d95 100644 --- a/usr/src/uts/common/inet/tcp_impl.h +++ b/usr/src/uts/common/inet/tcp_impl.h @@ -249,7 +249,7 @@ typedef struct tcpparam_s { #define tcps_keepalive_abort_interval tcps_params[63].tcp_param_val #define tcps_keepalive_abort_interval_low tcps_params[63].tcp_param_min -extern struct qinit tcp_loopback_rinit, tcp_rinit; +extern struct qinit tcp_loopback_rinit, tcp_rinitv4, tcp_rinitv6; extern boolean_t do_tcp_fusion; extern int tcp_maxpsz_set(tcp_t *, boolean_t); @@ -271,6 +271,14 @@ extern int tcp_fuse_rinfop(queue_t *, infod_t *); extern size_t tcp_fuse_set_rcv_hiwat(tcp_t *, size_t); extern int tcp_fuse_maxpsz_set(tcp_t *); +/* + * Object to represent database of options to search passed to + * {sock,tpi}optcom_req() interface routine to take care of option + * management and associated methods. + */ +extern optdb_obj_t tcp_opt_obj; +extern uint_t tcp_max_optsize; + #endif /* _KERNEL */ #ifdef __cplusplus diff --git a/usr/src/uts/common/inet/udp/udp.c b/usr/src/uts/common/inet/udp/udp.c index 301c397cf6..91c3cd6772 100644 --- a/usr/src/uts/common/inet/udp/udp.c +++ b/usr/src/uts/common/inet/udp/udp.c @@ -98,101 +98,39 @@ const char udp_version[] = "%Z%%M% %I% %E% SMI"; /* * Synchronization notes: * - * UDP uses a combination of its internal perimeter, a global lock and - * a set of bind hash locks to protect its data structures. Please see - * the note above udp_mode_assertions for details about the internal - * perimeter. + * UDP is MT and uses the usual kernel synchronization primitives. There are 2 + * locks, the fanout lock (uf_lock) and the udp endpoint lock udp_rwlock. + * We also use conn_lock when updating things that affect the IP classifier + * lookup. + * The lock order is udp_rwlock -> uf_lock and is udp_rwlock -> conn_lock. * + * The fanout lock uf_lock: * When a UDP endpoint is bound to a local port, it is inserted into * a bind hash list. The list consists of an array of udp_fanout_t buckets. * The size of the array is controlled by the udp_bind_fanout_size variable. * This variable can be changed in /etc/system if the default value is * not large enough. Each bind hash bucket is protected by a per bucket * lock. It protects the udp_bind_hash and udp_ptpbhn fields in the udp_t - * structure. An UDP endpoint is removed from the bind hash list only - * when it is being unbound or being closed. The per bucket lock also - * protects a UDP endpoint's state changes. + * structure and a few other fields in the udp_t. A UDP endpoint is removed + * from the bind hash list only when it is being unbound or being closed. + * The per bucket lock also protects a UDP endpoint's state changes. * - * Plumbing notes: - * - * Both udp and ip are merged, but the streams plumbing is kept unchanged - * in that udp is always pushed atop /dev/ip. This is done to preserve - * backwards compatibility for certain applications which rely on such - * plumbing geometry to do things such as issuing I_POP on the stream - * in order to obtain direct access to /dev/ip, etc. - * - * All UDP processings happen in the /dev/ip instance; the udp module - * instance does not possess any state about the endpoint, and merely - * acts as a dummy module whose presence is to keep the streams plumbing - * appearance unchanged. At open time /dev/ip allocates a conn_t that - * happens to embed a udp_t. This stays dormant until the time udp is - * pushed, which indicates to /dev/ip that it must convert itself from - * an IP to a UDP endpoint. - * - * We only allow for the following plumbing cases: + * The udp_rwlock: + * This protects most of the other fields in the udp_t. The exact list of + * fields which are protected by each of the above locks is documented in + * the udp_t structure definition. * - * Normal: - * /dev/ip is first opened and later udp is pushed directly on top. - * This is the default action that happens when a udp socket or - * /dev/udp is opened. The conn_t created by /dev/ip instance is - * now shared and is marked with IPCL_UDP. - * - * SNMP-only: - * udp is pushed on top of a module other than /dev/ip. When this - * happens it will support only SNMP semantics. A new conn_t is - * allocated and marked with IPCL_UDPMOD. + * Plumbing notes: + * UDP is always a device driver. For compatibility with mibopen() code + * it is possible to I_PUSH "udp", but that results in pushing a passthrough + * dummy module. * - * The above cases imply that we don't support any intermediate module to + * The above implies that we don't support any intermediate module to * reside in between /dev/ip and udp -- in fact, we never supported such * scenario in the past as the inter-layer communication semantics have - * always been private. Also note that the normal case allows for SNMP - * requests to be processed in addition to the rest of UDP operations. - * - * The normal case plumbing is depicted by the following diagram: - * - * +---------------+---------------+ - * | | | udp - * | udp_wq | udp_rq | - * | | UDP_RD | - * | | | - * +---------------+---------------+ - * | ^ - * v | - * +---------------+---------------+ - * | | | /dev/ip - * | ip_wq | ip_rq | conn_t - * | UDP_WR | | - * | | | - * +---------------+---------------+ - * - * Messages arriving at udp_wq from above will end up in ip_wq before - * it gets processed, i.e. udp write entry points will advance udp_wq - * and use its q_next value as ip_wq in order to use the conn_t that - * is stored in its q_ptr. Likewise, messages generated by ip to the - * module above udp will appear as if they are originated from udp_rq, - * i.e. putnext() calls to the module above udp is done using the - * udp_rq instead of ip_rq in order to avoid udp_rput() which does - * nothing more than calling putnext(). - * - * The above implies the following rule of thumb: - * - * 1. udp_t is obtained from conn_t, which is created by the /dev/ip - * instance and is stored in q_ptr of both ip_wq and ip_rq. There - * is no direct reference to conn_t from either udp_wq or udp_rq. - * - * 2. Write-side entry points of udp can obtain the conn_t via the - * Q_TO_CONN() macro, using the queue value obtain from UDP_WR(). - * - * 3. While in /dev/ip context, putnext() to the module above udp can - * be done by supplying the queue value obtained from UDP_RD(). - * + * always been private. */ -static queue_t *UDP_WR(queue_t *); -static queue_t *UDP_RD(queue_t *); - -struct kmem_cache *udp_cache; - /* For /etc/system control */ uint_t udp_bind_fanout_size = UDP_BIND_FANOUT_SIZE; @@ -217,7 +155,10 @@ static void udp_addr_req(queue_t *q, mblk_t *mp); static void udp_bind(queue_t *q, mblk_t *mp); static void udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp); static void udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock); -static int udp_build_hdrs(queue_t *q, udp_t *udp); +static void udp_bind_result(conn_t *, mblk_t *); +static void udp_bind_ack(conn_t *, mblk_t *mp); +static void udp_bind_error(conn_t *, mblk_t *mp); +static int udp_build_hdrs(udp_t *udp); static void udp_capability_req(queue_t *q, mblk_t *mp); static int udp_close(queue_t *q); static void udp_connect(queue_t *q, mblk_t *mp); @@ -235,9 +176,16 @@ static int udp_extra_priv_ports_del(queue_t *q, mblk_t *mp, static void udp_icmp_error(queue_t *q, mblk_t *mp); static void udp_icmp_error_ipv6(queue_t *q, mblk_t *mp); static void udp_info_req(queue_t *q, mblk_t *mp); +static void udp_input(void *, mblk_t *, void *); static mblk_t *udp_ip_bind_mp(udp_t *udp, t_scalar_t bind_prim, t_scalar_t addr_length); +static void udp_lrput(queue_t *, mblk_t *); +static void udp_lwput(queue_t *, mblk_t *); static int udp_open(queue_t *q, dev_t *devp, int flag, int sflag, + cred_t *credp, boolean_t isv6); +static int udp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, + cred_t *credp); +static int udp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp); static int udp_unitdata_opt_process(queue_t *q, mblk_t *mp, int *errorp, udpattrs_t *udpattrs); @@ -247,11 +195,8 @@ static boolean_t udp_param_register(IDP *ndp, udpparam_t *udppa, int cnt); static int udp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr); static void udp_report_item(mblk_t *mp, udp_t *udp); -static void udp_rput(queue_t *q, mblk_t *mp); -static void udp_rput_other(queue_t *, mblk_t *); static int udp_rinfop(queue_t *q, infod_t *dp); static int udp_rrw(queue_t *q, struiod_t *dp); -static void udp_rput_bind_ack(queue_t *q, mblk_t *mp); static int udp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr); static void udp_send_data(udp_t *, queue_t *, mblk_t *, ipha_t *); @@ -260,15 +205,12 @@ static void udp_ud_err(queue_t *q, mblk_t *mp, uchar_t *destaddr, static void udp_unbind(queue_t *q, mblk_t *mp); static in_port_t udp_update_next_port(udp_t *udp, in_port_t port, boolean_t random); -static void udp_wput(queue_t *q, mblk_t *mp); static mblk_t *udp_output_v4(conn_t *, mblk_t *, ipaddr_t, uint16_t, uint_t, int *, boolean_t); static mblk_t *udp_output_v6(conn_t *connp, mblk_t *mp, sin6_t *sin6, int *error); static void udp_wput_other(queue_t *q, mblk_t *mp); static void udp_wput_iocdata(queue_t *q, mblk_t *mp); -static void udp_output(conn_t *connp, mblk_t *mp, struct sockaddr *addr, - socklen_t addrlen); static size_t udp_set_rcv_hiwat(udp_t *udp, size_t size); static void *udp_stack_init(netstackid_t stackid, netstack_t *ns); @@ -279,56 +221,62 @@ static void udp_kstat_fini(netstackid_t stackid, kstat_t *ksp); static void *udp_kstat2_init(netstackid_t, udp_stat_t *); static void udp_kstat2_fini(netstackid_t, kstat_t *); static int udp_kstat_update(kstat_t *kp, int rw); -static void udp_input_wrapper(void *arg, mblk_t *mp, void *arg2); -static void udp_rput_other_wrapper(void *arg, mblk_t *mp, void *arg2); -static void udp_wput_other_wrapper(void *arg, mblk_t *mp, void *arg2); -static void udp_resume_bind_cb(void *arg, mblk_t *mp, void *arg2); static void udp_rcv_enqueue(queue_t *q, udp_t *udp, mblk_t *mp, uint_t pkt_len); static void udp_rcv_drain(queue_t *q, udp_t *udp, boolean_t closing); -static void udp_enter(conn_t *, mblk_t *, sqproc_t, uint8_t); -static void udp_exit(conn_t *); -static void udp_become_writer(conn_t *, mblk_t *, sqproc_t, uint8_t); -#ifdef DEBUG -static void udp_mode_assertions(udp_t *, int); -#endif /* DEBUG */ - -major_t UDP6_MAJ; -#define UDP6 "udp6" +static void udp_xmit(queue_t *, mblk_t *, ire_t *ire, conn_t *, zoneid_t); #define UDP_RECV_HIWATER (56 * 1024) #define UDP_RECV_LOWATER 128 #define UDP_XMIT_HIWATER (56 * 1024) #define UDP_XMIT_LOWATER 1024 -static struct module_info udp_info = { +static struct module_info udp_mod_info = { UDP_MOD_ID, UDP_MOD_NAME, 1, INFPSZ, UDP_RECV_HIWATER, UDP_RECV_LOWATER }; -static struct qinit udp_rinit = { - (pfi_t)udp_rput, NULL, udp_open, udp_close, NULL, - &udp_info, NULL, udp_rrw, udp_rinfop, STRUIOT_STANDARD +/* + * Entry points for UDP as a device. + * We have separate open functions for the /dev/udp and /dev/udp6 devices. + */ +static struct qinit udp_rinitv4 = { + NULL, NULL, udp_openv4, udp_close, NULL, + &udp_mod_info, NULL, udp_rrw, udp_rinfop, STRUIOT_STANDARD +}; + +static struct qinit udp_rinitv6 = { + NULL, NULL, udp_openv6, udp_close, NULL, + &udp_mod_info, NULL, udp_rrw, udp_rinfop, STRUIOT_STANDARD }; static struct qinit udp_winit = { (pfi_t)udp_wput, NULL, NULL, NULL, NULL, - &udp_info, NULL, NULL, NULL, STRUIOT_NONE + &udp_mod_info, NULL, NULL, NULL, STRUIOT_NONE }; -/* Support for just SNMP if UDP is not pushed directly over device IP */ -struct qinit udp_snmp_rinit = { - (pfi_t)putnext, NULL, udp_open, ip_snmpmod_close, NULL, - &udp_info, NULL, NULL, NULL, STRUIOT_NONE +/* + * UDP needs to handle I_LINK and I_PLINK since ifconfig + * likes to use it as a place to hang the various streams. + */ +static struct qinit udp_lrinit = { + (pfi_t)udp_lrput, NULL, udp_openv4, udp_close, NULL, + &udp_mod_info }; -struct qinit udp_snmp_winit = { - (pfi_t)ip_snmpmod_wput, NULL, udp_open, ip_snmpmod_close, NULL, - &udp_info, NULL, NULL, NULL, STRUIOT_NONE +static struct qinit udp_lwinit = { + (pfi_t)udp_lwput, NULL, udp_openv4, udp_close, NULL, + &udp_mod_info }; -struct streamtab udpinfo = { - &udp_rinit, &udp_winit +/* For AF_INET aka /dev/udp */ +struct streamtab udpinfov4 = { + &udp_rinitv4, &udp_winit, &udp_lrinit, &udp_lwinit +}; + +/* For AF_INET6 aka /dev/udp6 */ +struct streamtab udpinfov6 = { + &udp_rinitv6, &udp_winit, &udp_lrinit, &udp_lwinit }; static sin_t sin_null; /* Zero address for quick clears */ @@ -409,429 +357,6 @@ void (*cl_inet_unbind)(uint8_t protocol, sa_family_t addr_family, typedef union T_primitives *t_primp_t; -#define UDP_ENQUEUE_MP(udp, mp, proc, tag) { \ - ASSERT((mp)->b_prev == NULL && (mp)->b_queue == NULL); \ - ASSERT(MUTEX_HELD(&(udp)->udp_connp->conn_lock)); \ - (mp)->b_queue = (queue_t *)((uintptr_t)tag); \ - (mp)->b_prev = (mblk_t *)proc; \ - if ((udp)->udp_mphead == NULL) \ - (udp)->udp_mphead = (mp); \ - else \ - (udp)->udp_mptail->b_next = (mp); \ - (udp)->udp_mptail = (mp); \ - (udp)->udp_mpcount++; \ -} - -#define UDP_READERS_INCREF(udp) { \ - ASSERT(MUTEX_HELD(&(udp)->udp_connp->conn_lock)); \ - (udp)->udp_reader_count++; \ -} - -#define UDP_READERS_DECREF(udp) { \ - ASSERT(MUTEX_HELD(&(udp)->udp_connp->conn_lock)); \ - (udp)->udp_reader_count--; \ - if ((udp)->udp_reader_count == 0) \ - cv_broadcast(&(udp)->udp_connp->conn_cv); \ -} - -#define UDP_SQUEUE_DECREF(udp) { \ - ASSERT(MUTEX_HELD(&(udp)->udp_connp->conn_lock)); \ - (udp)->udp_squeue_count--; \ - if ((udp)->udp_squeue_count == 0) \ - cv_broadcast(&(udp)->udp_connp->conn_cv); \ -} - -/* - * Notes on UDP endpoint synchronization: - * - * UDP needs exclusive operation on a per endpoint basis, when executing - * functions that modify the endpoint state. udp_rput_other() deals with - * packets with IP options, and processing these packets end up having - * to update the endpoint's option related state. udp_wput_other() deals - * with control operations from the top, e.g. connect() that needs to - * update the endpoint state. These could be synchronized using locks, - * but the current version uses squeues for this purpose. squeues may - * give performance improvement for certain cases such as connected UDP - * sockets; thus the framework allows for using squeues. - * - * The perimeter routines are described as follows: - * - * udp_enter(): - * Enter the UDP endpoint perimeter. - * - * udp_become_writer(): - * Become exclusive on the UDP endpoint. Specifies a function - * that will be called exclusively either immediately or later - * when the perimeter is available exclusively. - * - * udp_exit(): - * Exit the UDP perimeter. - * - * Entering UDP from the top or from the bottom must be done using - * udp_enter(). No lock must be held while attempting to enter the UDP - * perimeter. When finished, udp_exit() must be called to get out of - * the perimeter. - * - * UDP operates in either MT_HOT mode or in SQUEUE mode. In MT_HOT mode, - * multiple threads may enter a UDP endpoint concurrently. This is used - * for sending and/or receiving normal data. Control operations and other - * special cases call udp_become_writer() to become exclusive on a per - * endpoint basis and this results in transitioning to SQUEUE mode. squeue - * by definition serializes access to the conn_t. When there are no more - * pending messages on the squeue for the UDP connection, the endpoint - * reverts to MT_HOT mode. During the interregnum when not all MT threads - * of an endpoint have finished, messages are queued in the UDP endpoint - * and the UDP is in UDP_MT_QUEUED mode or UDP_QUEUED_SQUEUE mode. - * - * These modes have the following analogs: - * - * UDP_MT_HOT/udp_reader_count==0 none - * UDP_MT_HOT/udp_reader_count>0 RW_READ_LOCK - * UDP_MT_QUEUED RW_WRITE_WANTED - * UDP_SQUEUE or UDP_QUEUED_SQUEUE RW_WRITE_LOCKED - * - * Stable modes: UDP_MT_HOT, UDP_SQUEUE - * Transient modes: UDP_MT_QUEUED, UDP_QUEUED_SQUEUE - * - * While in stable modes, UDP keeps track of the number of threads - * operating on the endpoint. The udp_reader_count variable represents - * the number of threads entering the endpoint as readers while it is - * in UDP_MT_HOT mode. Transitioning to UDP_SQUEUE happens when there - * is only a single reader, i.e. when this counter drops to 1. Likewise, - * udp_squeue_count represents the number of threads operating on the - * endpoint's squeue while it is in UDP_SQUEUE mode. The mode transition - * to UDP_MT_HOT happens after the last thread exits the endpoint, i.e. - * when this counter drops to 0. - * - * The default mode is set to UDP_MT_HOT and UDP alternates between - * UDP_MT_HOT and UDP_SQUEUE as shown in the state transition below. - * - * Mode transition: - * ---------------------------------------------------------------- - * old mode Event New mode - * ---------------------------------------------------------------- - * UDP_MT_HOT Call to udp_become_writer() UDP_SQUEUE - * and udp_reader_count == 1 - * - * UDP_MT_HOT Call to udp_become_writer() UDP_MT_QUEUED - * and udp_reader_count > 1 - * - * UDP_MT_QUEUED udp_reader_count drops to zero UDP_QUEUED_SQUEUE - * - * UDP_QUEUED_SQUEUE All messages enqueued on the UDP_SQUEUE - * internal UDP queue successfully - * moved to squeue AND udp_squeue_count != 0 - * - * UDP_QUEUED_SQUEUE All messages enqueued on the UDP_MT_HOT - * internal UDP queue successfully - * moved to squeue AND udp_squeue_count - * drops to zero - * - * UDP_SQUEUE udp_squeue_count drops to zero UDP_MT_HOT - * ---------------------------------------------------------------- - */ - -static queue_t * -UDP_WR(queue_t *q) -{ - ASSERT(q->q_ptr == NULL && _OTHERQ(q)->q_ptr == NULL); - ASSERT(WR(q)->q_next != NULL && WR(q)->q_next->q_ptr != NULL); - ASSERT(IPCL_IS_UDP(Q_TO_CONN(WR(q)->q_next))); - - return (_WR(q)->q_next); -} - -static queue_t * -UDP_RD(queue_t *q) -{ - ASSERT(q->q_ptr != NULL && _OTHERQ(q)->q_ptr != NULL); - ASSERT(IPCL_IS_UDP(Q_TO_CONN(q))); - ASSERT(RD(q)->q_next != NULL && RD(q)->q_next->q_ptr == NULL); - - return (_RD(q)->q_next); -} - -#ifdef DEBUG -#define UDP_MODE_ASSERTIONS(udp, caller) udp_mode_assertions(udp, caller) -#else -#define UDP_MODE_ASSERTIONS(udp, caller) -#endif - -/* Invariants */ -#ifdef DEBUG - -uint32_t udp_count[4]; - -/* Context of udp_mode_assertions */ -#define UDP_ENTER 1 -#define UDP_BECOME_WRITER 2 -#define UDP_EXIT 3 - -static void -udp_mode_assertions(udp_t *udp, int caller) -{ - ASSERT(MUTEX_HELD(&udp->udp_connp->conn_lock)); - - switch (udp->udp_mode) { - case UDP_MT_HOT: - /* - * Messages have not yet been enqueued on the internal queue, - * otherwise we would have switched to UDP_MT_QUEUED. Likewise - * by definition, there can't be any messages enqueued on the - * squeue. The UDP could be quiescent, so udp_reader_count - * could be zero at entry. - */ - ASSERT(udp->udp_mphead == NULL && udp->udp_mpcount == 0 && - udp->udp_squeue_count == 0); - ASSERT(caller == UDP_ENTER || udp->udp_reader_count != 0); - udp_count[0]++; - break; - - case UDP_MT_QUEUED: - /* - * The last MT thread to exit the udp perimeter empties the - * internal queue and then switches the UDP to - * UDP_QUEUED_SQUEUE mode. Since we are still in UDP_MT_QUEUED - * mode, it means there must be at least 1 MT thread still in - * the perimeter and at least 1 message on the internal queue. - */ - ASSERT(udp->udp_reader_count >= 1 && udp->udp_mphead != NULL && - udp->udp_mpcount != 0 && udp->udp_squeue_count == 0); - udp_count[1]++; - break; - - case UDP_QUEUED_SQUEUE: - /* - * The switch has happened from MT to SQUEUE. So there can't - * any MT threads. Messages could still pile up on the internal - * queue until the transition is complete and we move to - * UDP_SQUEUE mode. We can't assert on nonzero udp_squeue_count - * since the squeue could drain any time. - */ - ASSERT(udp->udp_reader_count == 0); - udp_count[2]++; - break; - - case UDP_SQUEUE: - /* - * The transition is complete. Thre can't be any messages on - * the internal queue. The udp could be quiescent or the squeue - * could drain any time, so we can't assert on nonzero - * udp_squeue_count during entry. Nor can we assert that - * udp_reader_count is zero, since, a reader thread could have - * directly become writer in line by calling udp_become_writer - * without going through the queued states. - */ - ASSERT(udp->udp_mphead == NULL && udp->udp_mpcount == 0); - ASSERT(caller == UDP_ENTER || udp->udp_squeue_count != 0); - udp_count[3]++; - break; - } -} -#endif - -#define _UDP_ENTER(connp, mp, proc, tag) { \ - udp_t *_udp = (connp)->conn_udp; \ - \ - mutex_enter(&(connp)->conn_lock); \ - if ((connp)->conn_state_flags & CONN_CLOSING) { \ - mutex_exit(&(connp)->conn_lock); \ - freemsg(mp); \ - } else { \ - UDP_MODE_ASSERTIONS(_udp, UDP_ENTER); \ - \ - switch (_udp->udp_mode) { \ - case UDP_MT_HOT: \ - /* We can execute as reader right away. */ \ - UDP_READERS_INCREF(_udp); \ - mutex_exit(&(connp)->conn_lock); \ - (*(proc))(connp, mp, (connp)->conn_sqp); \ - break; \ - \ - case UDP_SQUEUE: \ - /* \ - * We are in squeue mode, send the \ - * packet to the squeue \ - */ \ - _udp->udp_squeue_count++; \ - CONN_INC_REF_LOCKED(connp); \ - mutex_exit(&(connp)->conn_lock); \ - squeue_enter((connp)->conn_sqp, mp, proc, \ - connp, tag); \ - break; \ - \ - case UDP_MT_QUEUED: \ - case UDP_QUEUED_SQUEUE: \ - /* \ - * Some messages may have been enqueued \ - * ahead of us. Enqueue the new message \ - * at the tail of the internal queue to \ - * preserve message ordering. \ - */ \ - UDP_ENQUEUE_MP(_udp, mp, proc, tag); \ - mutex_exit(&(connp)->conn_lock); \ - break; \ - } \ - } \ -} - -static void -udp_enter(conn_t *connp, mblk_t *mp, sqproc_t proc, uint8_t tag) -{ - _UDP_ENTER(connp, mp, proc, tag); -} - -static void -udp_become_writer(conn_t *connp, mblk_t *mp, sqproc_t proc, uint8_t tag) -{ - udp_t *udp; - - udp = connp->conn_udp; - - mutex_enter(&connp->conn_lock); - - UDP_MODE_ASSERTIONS(udp, UDP_BECOME_WRITER); - - switch (udp->udp_mode) { - case UDP_MT_HOT: - if (udp->udp_reader_count == 1) { - /* - * We are the only MT thread. Switch to squeue mode - * immediately. - */ - udp->udp_mode = UDP_SQUEUE; - udp->udp_squeue_count = 1; - CONN_INC_REF_LOCKED(connp); - mutex_exit(&connp->conn_lock); - squeue_enter(connp->conn_sqp, mp, proc, connp, tag); - return; - } - /* FALLTHRU */ - - case UDP_MT_QUEUED: - /* Enqueue the packet internally in UDP */ - udp->udp_mode = UDP_MT_QUEUED; - UDP_ENQUEUE_MP(udp, mp, proc, tag); - mutex_exit(&connp->conn_lock); - return; - - case UDP_SQUEUE: - case UDP_QUEUED_SQUEUE: - /* - * We are already exclusive. i.e. we are already - * writer. Simply call the desired function. - */ - udp->udp_squeue_count++; - mutex_exit(&connp->conn_lock); - (*proc)(connp, mp, connp->conn_sqp); - return; - } -} - -/* - * Transition from MT mode to SQUEUE mode, when the last MT thread - * is exiting the UDP perimeter. Move all messages from the internal - * udp queue to the squeue. A better way would be to move all the - * messages in one shot, this needs more support from the squeue framework - */ -static void -udp_switch_to_squeue(udp_t *udp) -{ - mblk_t *mp; - mblk_t *mp_next; - sqproc_t proc; - uint8_t tag; - conn_t *connp = udp->udp_connp; - - ASSERT(MUTEX_HELD(&connp->conn_lock)); - ASSERT(udp->udp_mode == UDP_MT_QUEUED); - while (udp->udp_mphead != NULL) { - mp = udp->udp_mphead; - udp->udp_mphead = NULL; - udp->udp_mptail = NULL; - udp->udp_mpcount = 0; - udp->udp_mode = UDP_QUEUED_SQUEUE; - mutex_exit(&connp->conn_lock); - /* - * It is best not to hold any locks across the calls - * to squeue functions. Since we drop the lock we - * need to go back and check the udp_mphead once again - * after the squeue_fill and hence the while loop at - * the top of this function - */ - for (; mp != NULL; mp = mp_next) { - mp_next = mp->b_next; - proc = (sqproc_t)mp->b_prev; - tag = (uint8_t)((uintptr_t)mp->b_queue); - mp->b_next = NULL; - mp->b_prev = NULL; - mp->b_queue = NULL; - CONN_INC_REF(connp); - udp->udp_squeue_count++; - squeue_fill(connp->conn_sqp, mp, proc, connp, - tag); - } - mutex_enter(&connp->conn_lock); - } - /* - * udp_squeue_count of zero implies that the squeue has drained - * even before we arrived here (i.e. after the squeue_fill above) - */ - udp->udp_mode = (udp->udp_squeue_count != 0) ? - UDP_SQUEUE : UDP_MT_HOT; -} - -#define _UDP_EXIT(connp) { \ - udp_t *_udp = (connp)->conn_udp; \ - \ - mutex_enter(&(connp)->conn_lock); \ - UDP_MODE_ASSERTIONS(_udp, UDP_EXIT); \ - \ - switch (_udp->udp_mode) { \ - case UDP_MT_HOT: \ - UDP_READERS_DECREF(_udp); \ - mutex_exit(&(connp)->conn_lock); \ - break; \ - \ - case UDP_SQUEUE: \ - UDP_SQUEUE_DECREF(_udp); \ - if (_udp->udp_squeue_count == 0) \ - _udp->udp_mode = UDP_MT_HOT; \ - mutex_exit(&(connp)->conn_lock); \ - break; \ - \ - case UDP_MT_QUEUED: \ - /* \ - * If this is the last MT thread, we need to \ - * switch to squeue mode \ - */ \ - UDP_READERS_DECREF(_udp); \ - if (_udp->udp_reader_count == 0) \ - udp_switch_to_squeue(_udp); \ - mutex_exit(&(connp)->conn_lock); \ - break; \ - \ - case UDP_QUEUED_SQUEUE: \ - UDP_SQUEUE_DECREF(_udp); \ - /* \ - * Even if the udp_squeue_count drops to zero, we \ - * don't want to change udp_mode to UDP_MT_HOT here. \ - * The thread in udp_switch_to_squeue will take care \ - * of the transition to UDP_MT_HOT, after emptying \ - * any more new messages that have been enqueued in \ - * udp_mphead. \ - */ \ - mutex_exit(&(connp)->conn_lock); \ - break; \ - } \ -} - -static void -udp_exit(conn_t *connp) -{ - _UDP_EXIT(connp); -} - /* * Return the next anonymous port in the privileged port range for * bind checking. @@ -988,9 +513,7 @@ udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp) udp_t *udpnext; ASSERT(MUTEX_HELD(&uf->uf_lock)); - if (udp->udp_ptpbhn != NULL) { - udp_bind_hash_remove(udp, B_TRUE); - } + ASSERT(udp->udp_ptpbhn == NULL); udpp = &uf->uf_udp; udpnext = udpp[0]; if (udpnext != NULL) { @@ -1068,7 +591,6 @@ udp_bind(queue_t *q, mblk_t *mp) udp_err_ack(q, mp, TPROTO, 0); return; } - if (udp->udp_state != TS_UNBND) { (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, "udp_bind: bad state, %u", udp->udp_state); @@ -1198,9 +720,25 @@ udp_bind(queue_t *q, mblk_t *mp) } /* + * The state must be TS_UNBND. TPI mandates that users must send + * TPI primitives only 1 at a time and wait for the response before + * sending the next primitive. + */ + rw_enter(&udp->udp_rwlock, RW_WRITER); + if (udp->udp_state != TS_UNBND || udp->udp_pending_op != -1) { + rw_exit(&udp->udp_rwlock); + (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, + "udp_bind: bad state, %u", udp->udp_state); + udp_err_ack(q, mp, TOUTSTATE, 0); + return; + } + udp->udp_pending_op = tbr->PRIM_type; + /* * Copy the source address into our udp structure. This address * may still be zero; if so, IP will fill in the correct address - * each time an outbound packet is passed to it. + * each time an outbound packet is passed to it. Since the udp is + * not yet in the bind hash list, we don't grab the uf_lock to + * change udp_ipversion */ if (udp->udp_family == AF_INET) { ASSERT(sin != NULL); @@ -1212,6 +750,10 @@ udp_bind(queue_t *q, mblk_t *mp) ASSERT(sin6 != NULL); v6src = sin6->sin6_addr; if (IN6_IS_ADDR_V4MAPPED(&v6src)) { + /* + * no need to hold the uf_lock to set the udp_ipversion + * since we are not yet in the fanout list + */ udp->udp_ipversion = IPV4_VERSION; udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + udp->udp_ip_snd_options_len; @@ -1383,6 +925,8 @@ udp_bind(queue_t *q, mblk_t *mp) * the routine (and exit the loop). * */ + udp->udp_pending_op = -1; + rw_exit(&udp->udp_rwlock); udp_err_ack(q, mp, TADDRBUSY, 0); return; } @@ -1412,6 +956,8 @@ udp_bind(queue_t *q, mblk_t *mp) * there are none available, so send an error * to the user. */ + udp->udp_pending_op = -1; + rw_exit(&udp->udp_rwlock); udp_err_ack(q, mp, TNOADDR, 0); return; } @@ -1421,8 +967,9 @@ udp_bind(queue_t *q, mblk_t *mp) * Copy the source address into our udp structure. This address * may still be zero; if so, ip will fill in the correct address * each time an outbound packet is passed to it. - * If we are binding to a broadcast or multicast address udp_rput - * will clear the source address when it receives the T_BIND_ACK. + * If we are binding to a broadcast or multicast address then + * udp_bind_ack will clear the source address when it receives + * the T_BIND_ACK. */ udp->udp_v6src = udp->udp_bound_v6src = v6src; udp->udp_port = lport; @@ -1442,8 +989,10 @@ udp_bind(queue_t *q, mblk_t *mp) sin6->sin6_port = udp->udp_port; /* Rebuild the header template */ - error = udp_build_hdrs(q, udp); + error = udp_build_hdrs(udp); if (error != 0) { + udp->udp_pending_op = -1; + rw_exit(&udp->udp_rwlock); mutex_exit(&udpf->uf_lock); udp_err_ack(q, mp, TSYSERR, error); return; @@ -1452,6 +1001,7 @@ udp_bind(queue_t *q, mblk_t *mp) udp->udp_state = TS_IDLE; udp_bind_hash_insert(udpf, udp); mutex_exit(&udpf->uf_lock); + rw_exit(&udp->udp_rwlock); if (cl_inet_bind) { /* @@ -1480,8 +1030,11 @@ udp_bind(queue_t *q, mblk_t *mp) connp->conn_mlp_type = udp->udp_recvucred ? mlptBoth : mlptSingle; addrtype = tsol_mlp_addr_type(zone->zone_id, IPV6_VERSION, - &v6src, udp->udp_us->us_netstack->netstack_ip); + &v6src, us->us_netstack->netstack_ip); if (addrtype == mlptSingle) { + rw_enter(&udp->udp_rwlock, RW_WRITER); + udp->udp_pending_op = -1; + rw_exit(&udp->udp_rwlock); udp_err_ack(q, mp, TNOADDR, 0); connp->conn_anon_port = B_FALSE; connp->conn_mlp_type = mlptSingle; @@ -1499,6 +1052,9 @@ udp_bind(queue_t *q, mblk_t *mp) "udp_bind: no priv for multilevel port %d", mlpport); } + rw_enter(&udp->udp_rwlock, RW_WRITER); + udp->udp_pending_op = -1; + rw_exit(&udp->udp_rwlock); udp_err_ack(q, mp, TACCES, 0); connp->conn_anon_port = B_FALSE; connp->conn_mlp_type = mlptSingle; @@ -1529,6 +1085,9 @@ udp_bind(queue_t *q, mblk_t *mp) mlpport, connp->conn_zoneid, mlpzone); } + rw_enter(&udp->udp_rwlock, RW_WRITER); + udp->udp_pending_op = -1; + rw_exit(&udp->udp_rwlock); udp_err_ack(q, mp, TACCES, 0); connp->conn_anon_port = B_FALSE; connp->conn_mlp_type = mlptSingle; @@ -1547,6 +1106,9 @@ udp_bind(queue_t *q, mblk_t *mp) "udp_bind: cannot establish anon " "MLP for port %d", port); } + rw_enter(&udp->udp_rwlock, RW_WRITER); + udp->udp_pending_op = -1; + rw_exit(&udp->udp_rwlock); udp_err_ack(q, mp, TACCES, 0); connp->conn_anon_port = B_FALSE; connp->conn_mlp_type = mlptSingle; @@ -1565,6 +1127,9 @@ udp_bind(queue_t *q, mblk_t *mp) */ mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); if (!mp->b_cont) { + rw_enter(&udp->udp_rwlock, RW_WRITER); + udp->udp_pending_op = -1; + rw_exit(&udp->udp_rwlock); udp_err_ack(q, mp, TSYSERR, ENOMEM); return; } @@ -1576,34 +1141,25 @@ udp_bind(queue_t *q, mblk_t *mp) else mp = ip_bind_v4(q, mp, connp); + /* The above return NULL if the bind needs to be deferred */ if (mp != NULL) - udp_rput_other(_RD(q), mp); + udp_bind_result(connp, mp); else CONN_INC_REF(connp); } - -void -udp_resume_bind(conn_t *connp, mblk_t *mp) -{ - udp_enter(connp, mp, udp_resume_bind_cb, SQTAG_BIND_RETRY); -} - /* - * This is called from ip_wput_nondata to resume a deferred UDP bind. + * This is called from ip_wput_nondata to handle the results of a + * deferred UDP bind. It is called once the bind has been completed. */ -/* ARGSUSED */ -static void -udp_resume_bind_cb(void *arg, mblk_t *mp, void *arg2) +void +udp_resume_bind(conn_t *connp, mblk_t *mp) { - conn_t *connp = arg; - ASSERT(connp != NULL && IPCL_IS_UDP(connp)); - udp_rput_other(connp->conn_rq, mp); + udp_bind_result(connp, mp); CONN_OPER_PENDING_DONE(connp); - udp_exit(connp); } /* @@ -1616,11 +1172,11 @@ udp_resume_bind_cb(void *arg, mblk_t *mp, void *arg2) * T_OK_ACK - for the T_CONN_REQ * T_CONN_CON - to keep the TPI user happy * - * The connect completes in udp_rput. + * The connect completes in udp_bind_result. * When a T_BIND_ACK is received information is extracted from the IRE * and the two appended messages are sent to the TPI user. - * Should udp_rput receive T_ERROR_ACK for the T_BIND_REQ it will convert - * it to an error ack for the appropriate primitive. + * Should udp_bind_result receive T_ERROR_ACK for the T_BIND_REQ it will + * convert it to an error ack for the appropriate primitive. */ static void udp_connect(queue_t *q, mblk_t *mp) @@ -1635,10 +1191,11 @@ udp_connect(queue_t *q, mblk_t *mp) mblk_t *mp1, *mp2; udp_fanout_t *udpf; udp_t *udp, *udp1; + ushort_t ipversion; udp_stack_t *us; + conn_t *connp = Q_TO_CONN(q); - udp = Q_TO_UDP(q); - + udp = connp->conn_udp; tcr = (struct T_conn_req *)mp->b_rptr; us = udp->udp_us; @@ -1647,28 +1204,6 @@ udp_connect(queue_t *q, mblk_t *mp) udp_err_ack(q, mp, TPROTO, 0); return; } - /* - * This UDP must have bound to a port already before doing - * a connect. - */ - if (udp->udp_state == TS_UNBND) { - (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, - "udp_connect: bad state, %u", udp->udp_state); - udp_err_ack(q, mp, TOUTSTATE, 0); - return; - } - ASSERT(udp->udp_port != 0 && udp->udp_ptpbhn != NULL); - - udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, - us->us_bind_fanout_size)]; - - if (udp->udp_state == TS_DATA_XFER) { - /* Already connected - clear out state */ - mutex_enter(&udpf->uf_lock); - udp->udp_v6src = udp->udp_bound_v6src; - udp->udp_state = TS_IDLE; - mutex_exit(&udpf->uf_lock); - } if (tcr->OPT_length != 0) { udp_err_ack(q, mp, TBADOPT, 0); @@ -1702,8 +1237,7 @@ udp_connect(queue_t *q, mblk_t *mp) dstport = sin->sin_port; IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); ASSERT(udp->udp_ipversion == IPV4_VERSION); - udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + - udp->udp_ip_snd_options_len; + ipversion = IPV4_VERSION; break; case sizeof (sin6_t): @@ -1719,18 +1253,15 @@ udp_connect(queue_t *q, mblk_t *mp) return; } v6dst = sin6->sin6_addr; + dstport = sin6->sin6_port; if (IN6_IS_ADDR_V4MAPPED(&v6dst)) { IN6_V4MAPPED_TO_IPADDR(&v6dst, v4dst); - udp->udp_ipversion = IPV4_VERSION; - udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + - UDPH_SIZE + udp->udp_ip_snd_options_len; + ipversion = IPV4_VERSION; flowinfo = 0; } else { - udp->udp_ipversion = IPV6_VERSION; - udp->udp_max_hdr_len = udp->udp_sticky_hdrs_len; + ipversion = IPV6_VERSION; flowinfo = sin6->sin6_flowinfo; } - dstport = sin6->sin6_port; break; } if (dstport == 0) { @@ -1738,11 +1269,46 @@ udp_connect(queue_t *q, mblk_t *mp) return; } + rw_enter(&udp->udp_rwlock, RW_WRITER); + + /* + * This UDP must have bound to a port already before doing a connect. + * TPI mandates that users must send TPI primitives only 1 at a time + * and wait for the response before sending the next primitive. + */ + if (udp->udp_state == TS_UNBND || udp->udp_pending_op != -1) { + rw_exit(&udp->udp_rwlock); + (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, + "udp_connect: bad state, %u", udp->udp_state); + udp_err_ack(q, mp, TOUTSTATE, 0); + return; + } + udp->udp_pending_op = T_CONN_REQ; + ASSERT(udp->udp_port != 0 && udp->udp_ptpbhn != NULL); + + if (ipversion == IPV4_VERSION) { + udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + + udp->udp_ip_snd_options_len; + } else { + udp->udp_max_hdr_len = udp->udp_sticky_hdrs_len; + } + + udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, + us->us_bind_fanout_size)]; + + mutex_enter(&udpf->uf_lock); + if (udp->udp_state == TS_DATA_XFER) { + /* Already connected - clear out state */ + udp->udp_v6src = udp->udp_bound_v6src; + udp->udp_state = TS_IDLE; + } + /* * Create a default IP header with no IP options. */ udp->udp_dstport = dstport; - if (udp->udp_ipversion == IPV4_VERSION) { + udp->udp_ipversion = ipversion; + if (ipversion == IPV4_VERSION) { /* * Interpret a zero destination to mean loopback. * Update the T_CONN_REQ (sin/sin6) since it is used to @@ -1794,10 +1360,9 @@ udp_connect(queue_t *q, mblk_t *mp) } /* - * Verify that the src/port/dst/port and zoneid is unique for all + * Verify that the src/port/dst/port is unique for all * connections in TS_DATA_XFER */ - mutex_enter(&udpf->uf_lock); for (udp1 = udpf->uf_udp; udp1 != NULL; udp1 = udp1->udp_bind_hash) { if (udp1->udp_state != TS_DATA_XFER) continue; @@ -1812,6 +1377,8 @@ udp_connect(queue_t *q, mblk_t *mp) udp->udp_connp->conn_zoneid))) continue; mutex_exit(&udpf->uf_lock); + udp->udp_pending_op = -1; + rw_exit(&udp->udp_rwlock); udp_err_ack(q, mp, TBADADDR, 0); return; } @@ -1828,17 +1395,20 @@ udp_connect(queue_t *q, mblk_t *mp) else mp1 = udp_ip_bind_mp(udp, O_T_BIND_REQ, sizeof (ipa6_conn_t)); if (mp1 == NULL) { - udp_err_ack(q, mp, TSYSERR, ENOMEM); bind_failed: mutex_enter(&udpf->uf_lock); udp->udp_state = TS_IDLE; + udp->udp_pending_op = -1; mutex_exit(&udpf->uf_lock); + rw_exit(&udp->udp_rwlock); + udp_err_ack(q, mp, TSYSERR, ENOMEM); return; } + rw_exit(&udp->udp_rwlock); /* * We also have to send a connection confirmation to - * keep TLI happy. Prepare it for udp_rput. + * keep TLI happy. Prepare it for udp_bind_result. */ if (udp->udp_family == AF_INET) mp2 = mi_tpi_conn_con(NULL, (char *)sin, @@ -1848,7 +1418,7 @@ bind_failed: sizeof (*sin6), NULL, 0); if (mp2 == NULL) { freemsg(mp1); - udp_err_ack(q, mp, TSYSERR, ENOMEM); + rw_enter(&udp->udp_rwlock, RW_WRITER); goto bind_failed; } @@ -1856,36 +1426,43 @@ bind_failed: if (mp == NULL) { /* Unable to reuse the T_CONN_REQ for the ack. */ freemsg(mp2); + rw_enter(&udp->udp_rwlock, RW_WRITER); + mutex_enter(&udpf->uf_lock); + udp->udp_state = TS_IDLE; + udp->udp_pending_op = -1; + mutex_exit(&udpf->uf_lock); + rw_exit(&udp->udp_rwlock); udp_err_ack_prim(q, mp1, T_CONN_REQ, TSYSERR, ENOMEM); - goto bind_failed; + return; } /* Hang onto the T_OK_ACK and T_CONN_CON for later. */ linkb(mp1, mp); linkb(mp1, mp2); - mblk_setcred(mp1, udp->udp_connp->conn_cred); + mblk_setcred(mp1, connp->conn_cred); if (udp->udp_family == AF_INET) - mp1 = ip_bind_v4(q, mp1, udp->udp_connp); + mp1 = ip_bind_v4(q, mp1, connp); else - mp1 = ip_bind_v6(q, mp1, udp->udp_connp, NULL); + mp1 = ip_bind_v6(q, mp1, connp, NULL); + /* The above return NULL if the bind needs to be deferred */ if (mp1 != NULL) - udp_rput_other(_RD(q), mp1); + udp_bind_result(connp, mp1); else - CONN_INC_REF(udp->udp_connp); + CONN_INC_REF(connp); } static int udp_close(queue_t *q) { - conn_t *connp = Q_TO_CONN(UDP_WR(q)); + conn_t *connp = (conn_t *)q->q_ptr; udp_t *udp; - queue_t *ip_rq = RD(UDP_WR(q)); ASSERT(connp != NULL && IPCL_IS_UDP(connp)); udp = connp->conn_udp; + udp_quiesce_conn(connp); ip_quiesce_conn(connp); /* * Disable read-side synchronous stream @@ -1896,11 +1473,6 @@ udp_close(queue_t *q) qprocsoff(q); - /* restore IP module's high and low water marks to default values */ - ip_rq->q_hiwat = ip_rq->q_qinfo->qi_minfo->mi_hiwat; - WR(ip_rq)->q_hiwat = WR(ip_rq)->q_qinfo->qi_minfo->mi_hiwat; - WR(ip_rq)->q_lowat = WR(ip_rq)->q_qinfo->qi_minfo->mi_lowat; - ASSERT(udp->udp_rcv_cnt == 0); ASSERT(udp->udp_rcv_msgcnt == 0); ASSERT(udp->udp_rcv_list_head == NULL); @@ -1909,23 +1481,28 @@ udp_close(queue_t *q) udp_close_free(connp); /* - * Restore connp as an IP endpoint. - * Locking required to prevent a race with udp_snmp_get()/ - * ipcl_get_next_conn(), which selects conn_t which are - * IPCL_UDP and not CONN_CONDEMNED. + * Now we are truly single threaded on this stream, and can + * delete the things hanging off the connp, and finally the connp. + * We removed this connp from the fanout list, it cannot be + * accessed thru the fanouts, and we already waited for the + * conn_ref to drop to 0. We are already in close, so + * there cannot be any other thread from the top. qprocsoff + * has completed, and service has completed or won't run in + * future. */ - mutex_enter(&connp->conn_lock); - connp->conn_flags &= ~IPCL_UDP; - connp->conn_state_flags &= - ~(CONN_CLOSING | CONN_CONDEMNED | CONN_QUIESCED); - connp->conn_ulp_labeled = B_FALSE; - mutex_exit(&connp->conn_lock); + ASSERT(connp->conn_ref == 1); + + inet_minor_free(ip_minor_arena, connp->conn_dev); + connp->conn_ref--; + ipcl_conn_destroy(connp); + + q->q_ptr = WR(q)->q_ptr = NULL; return (0); } /* - * Called in the close path from IP (ip_quiesce_conn) to quiesce the conn + * Called in the close path to quiesce the conn */ void udp_quiesce_conn(conn_t *connp) @@ -1949,12 +1526,6 @@ udp_quiesce_conn(conn_t *connp) udp_bind_hash_remove(udp, B_FALSE); - mutex_enter(&connp->conn_lock); - while (udp->udp_reader_count != 0 || udp->udp_squeue_count != 0 || - udp->udp_mode != UDP_MT_HOT) { - cv_wait(&connp->conn_cv, &connp->conn_lock); - } - mutex_exit(&connp->conn_lock); } void @@ -1982,12 +1553,6 @@ udp_close_free(conn_t *connp) } ip6_pkt_free(&udp->udp_sticky_ipp); - - udp->udp_connp = NULL; - netstack_rele(udp->udp_us->us_netstack); - - connp->conn_udp = NULL; - kmem_cache_free(udp_cache, udp); } /* @@ -2000,26 +1565,31 @@ udp_close_free(conn_t *connp) * T_BIND_REQ - specifying just the local address/port * T_OK_ACK - for the T_DISCON_REQ * - * The disconnect completes in udp_rput. + * The disconnect completes in udp_bind_result. * When a T_BIND_ACK is received the appended T_OK_ACK is sent to the TPI user. - * Should udp_rput receive T_ERROR_ACK for the T_BIND_REQ it will convert - * it to an error ack for the appropriate primitive. + * Should udp_bind_result receive T_ERROR_ACK for the T_BIND_REQ it will + * convert it to an error ack for the appropriate primitive. */ static void udp_disconnect(queue_t *q, mblk_t *mp) { - udp_t *udp = Q_TO_UDP(q); + udp_t *udp; mblk_t *mp1; udp_fanout_t *udpf; udp_stack_t *us; + conn_t *connp = Q_TO_CONN(q); + udp = connp->conn_udp; us = udp->udp_us; - if (udp->udp_state != TS_DATA_XFER) { + rw_enter(&udp->udp_rwlock, RW_WRITER); + if (udp->udp_state != TS_DATA_XFER || udp->udp_pending_op != -1) { + rw_exit(&udp->udp_rwlock); (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, "udp_disconnect: bad state, %u", udp->udp_state); udp_err_ack(q, mp, TOUTSTATE, 0); return; } + udp->udp_pending_op = T_DISCON_REQ; udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, us->us_bind_fanout_size)]; mutex_enter(&udpf->uf_lock); @@ -2036,12 +1606,16 @@ udp_disconnect(queue_t *q, mblk_t *mp) else mp1 = udp_ip_bind_mp(udp, O_T_BIND_REQ, sizeof (sin6_t)); if (mp1 == NULL) { + udp->udp_pending_op = -1; + rw_exit(&udp->udp_rwlock); udp_err_ack(q, mp, TSYSERR, ENOMEM); return; } mp = mi_tpi_ok_ack_alloc(mp); if (mp == NULL) { /* Unable to reuse the T_DISCON_REQ for the ack. */ + udp->udp_pending_op = -1; + rw_exit(&udp->udp_rwlock); udp_err_ack_prim(q, mp1, T_DISCON_REQ, TSYSERR, ENOMEM); return; } @@ -2050,29 +1624,30 @@ udp_disconnect(queue_t *q, mblk_t *mp) int error; /* Rebuild the header template */ - error = udp_build_hdrs(q, udp); + error = udp_build_hdrs(udp); if (error != 0) { + udp->udp_pending_op = -1; + rw_exit(&udp->udp_rwlock); udp_err_ack_prim(q, mp, T_DISCON_REQ, TSYSERR, error); freemsg(mp1); return; } } - mutex_enter(&udpf->uf_lock); - udp->udp_discon_pending = 1; - mutex_exit(&udpf->uf_lock); - /* Append the T_OK_ACK to the T_BIND_REQ for udp_rput */ + rw_exit(&udp->udp_rwlock); + /* Append the T_OK_ACK to the T_BIND_REQ for udp_bind_ack */ linkb(mp1, mp); if (udp->udp_family == AF_INET6) - mp1 = ip_bind_v6(q, mp1, udp->udp_connp, NULL); + mp1 = ip_bind_v6(q, mp1, connp, NULL); else - mp1 = ip_bind_v4(q, mp1, udp->udp_connp); + mp1 = ip_bind_v4(q, mp1, connp); + /* The above return NULL if the bind needs to be deferred */ if (mp1 != NULL) - udp_rput_other(_RD(q), mp1); + udp_bind_result(connp, mp1); else - CONN_INC_REF(udp->udp_connp); + CONN_INC_REF(connp); } /* This routine creates a T_ERROR_ACK message and passes it upstream. */ @@ -2080,7 +1655,7 @@ static void udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error) { if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL) - putnext(UDP_RD(q), mp); + qreply(q, mp); } /* Shorthand to generate and send TPI error acks to our client */ @@ -2096,7 +1671,7 @@ udp_err_ack_prim(queue_t *q, mblk_t *mp, int primitive, t_scalar_t t_error, teackp->ERROR_prim = primitive; teackp->TLI_error = t_error; teackp->UNIX_error = sys_error; - putnext(UDP_RD(q), mp); + qreply(q, mp); } } @@ -2191,13 +1766,9 @@ udp_extra_priv_ports_del(queue_t *q, mblk_t *mp, char *value, caddr_t cp, #define ICMP_MIN_UDP_HDR 4 /* - * udp_icmp_error is called by udp_rput to process ICMP msgs. passed up by IP. + * udp_icmp_error is called by udp_input to process ICMP msgs. passed up by IP. * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors. * Assumes that IP has pulled up everything up to and including the ICMP header. - * An M_CTL could potentially come here from some other module (i.e. if UDP - * is pushed on some module other than IP). Thus, if we find that the M_CTL - * does not have enough ICMP information , following STREAMS conventions, - * we send it upstream assuming it is an M_CTL we don't understand. */ static void udp_icmp_error(queue_t *q, mblk_t *mp) @@ -2210,70 +1781,27 @@ udp_icmp_error(queue_t *q, mblk_t *mp) sin6_t sin6; mblk_t *mp1; int error = 0; - size_t mp_size = MBLKL(mp); udp_t *udp = Q_TO_UDP(q); - /* - * Assume IP provides aligned packets - otherwise toss - */ - if (!OK_32PTR(mp->b_rptr)) { - freemsg(mp); - return; - } + ipha = (ipha_t *)mp->b_rptr; - /* - * Verify that we have a complete IP header and the application has - * asked for errors. If not, send it upstream. - */ - if (!udp->udp_dgram_errind || mp_size < sizeof (ipha_t)) { -noticmpv4: - putnext(UDP_RD(q), mp); - return; - } + ASSERT(OK_32PTR(mp->b_rptr)); - ipha = (ipha_t *)mp->b_rptr; - /* - * Verify IP version. Anything other than IPv4 or IPv6 packet is sent - * upstream. ICMPv6 is handled in udp_icmp_error_ipv6. - */ - switch (IPH_HDR_VERSION(ipha)) { - case IPV6_VERSION: + if (IPH_HDR_VERSION(ipha) != IPV4_VERSION) { + ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION); udp_icmp_error_ipv6(q, mp); return; - case IPV4_VERSION: - break; - default: - goto noticmpv4; } + ASSERT(IPH_HDR_VERSION(ipha) == IPV4_VERSION); /* Skip past the outer IP and ICMP headers */ iph_hdr_length = IPH_HDR_LENGTH(ipha); icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length]; - /* - * If we don't have the correct outer IP header length or if the ULP - * is not IPPROTO_ICMP or if we don't have a complete inner IP header - * send the packet upstream. - */ - if (iph_hdr_length < sizeof (ipha_t) || - ipha->ipha_protocol != IPPROTO_ICMP || - (ipha_t *)&icmph[1] + 1 > (ipha_t *)mp->b_wptr) { - goto noticmpv4; - } ipha = (ipha_t *)&icmph[1]; /* Skip past the inner IP and find the ULP header */ iph_hdr_length = IPH_HDR_LENGTH(ipha); udpha = (udpha_t *)((char *)ipha + iph_hdr_length); - /* - * If we don't have the correct inner IP header length or if the ULP - * is not IPPROTO_UDP or if we don't have at least ICMP_MIN_UDP_HDR - * bytes of UDP header, send it upstream. - */ - if (iph_hdr_length < sizeof (ipha_t) || - ipha->ipha_protocol != IPPROTO_UDP || - (uchar_t *)udpha + ICMP_MIN_UDP_HDR > mp->b_wptr) { - goto noticmpv4; - } switch (icmph->icmph_type) { case ICMP_DEST_UNREACHABLE: @@ -2281,7 +1809,6 @@ noticmpv4: case ICMP_FRAGMENTATION_NEEDED: /* * IP has already adjusted the path MTU. - * XXX Somehow pass MTU indication to application? */ break; case ICMP_PORT_UNREACHABLE: @@ -2302,6 +1829,15 @@ noticmpv4: return; } + /* + * Deliver T_UDERROR_IND when the application has asked for it. + * The socket layer enables this automatically when connected. + */ + if (!udp->udp_dgram_errind) { + freemsg(mp); + return; + } + switch (udp->udp_family) { case AF_INET: sin = sin_null; @@ -2322,7 +1858,7 @@ noticmpv4: break; } if (mp1) - putnext(UDP_RD(q), mp1); + putnext(q, mp1); freemsg(mp); } @@ -2331,67 +1867,33 @@ noticmpv4: * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors. * Assumes that IP has pulled up all the extension headers as well as the * ICMPv6 header. - * An M_CTL could potentially come here from some other module (i.e. if UDP - * is pushed on some module other than IP). Thus, if we find that the M_CTL - * does not have enough ICMP information , following STREAMS conventions, - * we send it upstream assuming it is an M_CTL we don't understand. The reason - * it might get here is if the non-ICMP M_CTL accidently has 6 in the version - * field (when cast to ipha_t in udp_icmp_error). */ static void udp_icmp_error_ipv6(queue_t *q, mblk_t *mp) { icmp6_t *icmp6; ip6_t *ip6h, *outer_ip6h; - uint16_t hdr_length; + uint16_t iph_hdr_length; uint8_t *nexthdrp; udpha_t *udpha; sin6_t sin6; mblk_t *mp1; int error = 0; - size_t mp_size = MBLKL(mp); udp_t *udp = Q_TO_UDP(q); - - /* - * Verify that we have a complete IP header. If not, send it upstream. - */ - if (mp_size < sizeof (ip6_t)) { -noticmpv6: - putnext(UDP_RD(q), mp); - return; - } + udp_stack_t *us = udp->udp_us; outer_ip6h = (ip6_t *)mp->b_rptr; - /* - * Verify this is an ICMPV6 packet, else send it upstream - */ - if (outer_ip6h->ip6_nxt == IPPROTO_ICMPV6) { - hdr_length = IPV6_HDR_LEN; - } else if (!ip_hdr_length_nexthdr_v6(mp, outer_ip6h, &hdr_length, - &nexthdrp) || - *nexthdrp != IPPROTO_ICMPV6) { - goto noticmpv6; - } - icmp6 = (icmp6_t *)&mp->b_rptr[hdr_length]; + if (outer_ip6h->ip6_nxt != IPPROTO_ICMPV6) + iph_hdr_length = ip_hdr_length_v6(mp, outer_ip6h); + else + iph_hdr_length = IPV6_HDR_LEN; + icmp6 = (icmp6_t *)&mp->b_rptr[iph_hdr_length]; ip6h = (ip6_t *)&icmp6[1]; - /* - * Verify we have a complete ICMP and inner IP header. - */ - if ((uchar_t *)&ip6h[1] > mp->b_wptr) - goto noticmpv6; - - if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp)) - goto noticmpv6; - udpha = (udpha_t *)((char *)ip6h + hdr_length); - /* - * Validate inner header. If the ULP is not IPPROTO_UDP or if we don't - * have at least ICMP_MIN_UDP_HDR bytes of UDP header send the - * packet upstream. - */ - if ((*nexthdrp != IPPROTO_UDP) || - ((uchar_t *)udpha + ICMP_MIN_UDP_HDR) > mp->b_wptr) { - goto noticmpv6; + if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &iph_hdr_length, &nexthdrp)) { + freemsg(mp); + return; } + udpha = (udpha_t *)((char *)ip6h + iph_hdr_length); switch (icmp6->icmp6_type) { case ICMP6_DST_UNREACH: @@ -2430,7 +1932,7 @@ noticmpv6: udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t) + opt_length; if ((newmp = allocb(udi_size, BPRI_MED)) == NULL) { - BUMP_MIB(&udp->udp_mib, udpInErrors); + BUMP_MIB(&us->us_udp_mib, udpInErrors); break; } @@ -2468,7 +1970,7 @@ noticmpv6: * message. Free it, then send our empty message. */ freemsg(mp); - putnext(UDP_RD(q), newmp); + putnext(q, newmp); return; } case ICMP6_TIME_EXCEEDED: @@ -2489,6 +1991,15 @@ noticmpv6: return; } + /* + * Deliver T_UDERROR_IND when the application has asked for it. + * The socket layer enables this automatically when connected. + */ + if (!udp->udp_dgram_errind) { + freemsg(mp); + return; + } + sin6 = sin6_null; sin6.sin6_family = AF_INET6; sin6.sin6_addr = ip6h->ip6_dst; @@ -2498,7 +2009,7 @@ noticmpv6: mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), NULL, 0, error); if (mp1) - putnext(UDP_RD(q), mp1); + putnext(q, mp1); freemsg(mp); } @@ -2532,6 +2043,7 @@ udp_addr_req(queue_t *q, mblk_t *mp) taa->PRIM_type = T_ADDR_ACK; ackmp->b_datap->db_type = M_PCPROTO; + rw_enter(&udp->udp_rwlock, RW_READER); /* * Note: Following code assumes 32 bit alignment of basic * data structures like sin_t and struct T_addr_ack. @@ -2625,8 +2137,9 @@ udp_addr_req(queue_t *q, mblk_t *mp) ackmp->b_wptr = (uchar_t *)&sin6[1]; } } + rw_exit(&udp->udp_rwlock); ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim); - putnext(UDP_RD(q), ackmp); + qreply(q, ackmp); } static void @@ -2669,7 +2182,7 @@ udp_capability_req(queue_t *q, mblk_t *mp) tcap->CAP_bits1 |= TC1_INFO; } - putnext(UDP_RD(q), mp); + qreply(q, mp); } /* @@ -2688,7 +2201,7 @@ udp_info_req(queue_t *q, mblk_t *mp) if (!mp) return; udp_copy_info((struct T_info_ack *)mp->b_rptr, udp); - putnext(UDP_RD(q), mp); + qreply(q, mp); } /* @@ -2738,7 +2251,7 @@ udp_ip_bind_mp(udp_t *udp, t_scalar_t bind_prim, t_scalar_t addr_length) sin6_t *sin6; ASSERT(bind_prim == O_T_BIND_REQ || bind_prim == T_BIND_REQ); - + ASSERT(RW_LOCK_HELD(&udp->udp_rwlock)); mp = allocb(sizeof (*tbr) + addr_length + 1, BPRI_HI); if (!mp) return (mp); @@ -2830,18 +2343,33 @@ udp_ip_bind_mp(udp_t *udp, t_scalar_t bind_prim, t_scalar_t addr_length) return (mp); } +/* For /dev/udp aka AF_INET open */ +static int +udp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) +{ + return (udp_open(q, devp, flag, sflag, credp, B_FALSE)); +} + +/* For /dev/udp6 aka AF_INET6 open */ +static int +udp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) +{ + return (udp_open(q, devp, flag, sflag, credp, B_TRUE)); +} + /* * This is the open routine for udp. It allocates a udp_t structure for * the stream and, on the first open of the module, creates an ND table. */ -/* ARGSUSED */ +/*ARGSUSED2*/ static int -udp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) +udp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp, + boolean_t isv6) { int err; udp_t *udp; conn_t *connp; - queue_t *ip_wq; + dev_t conn_dev; zoneid_t zoneid; netstack_t *ns; udp_stack_t *us; @@ -2852,8 +2380,7 @@ udp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) if (q->q_ptr != NULL) return (0); - /* If this is not a push of udp as a module, fail. */ - if (sflag != MODOPEN) + if (sflag == MODOPEN) return (EINVAL); ns = netstack_find_by_cred(credp); @@ -2865,63 +2392,43 @@ udp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) * For exclusive stacks we set the zoneid to zero * to make UDP operate as if in the global zone. */ - if (us->us_netstack->netstack_stackid != GLOBAL_NETSTACKID) + if (ns->netstack_stackid != GLOBAL_NETSTACKID) zoneid = GLOBAL_ZONEID; else zoneid = crgetzoneid(credp); - q->q_hiwat = us->us_recv_hiwat; - WR(q)->q_hiwat = us->us_xmit_hiwat; - WR(q)->q_lowat = us->us_xmit_lowat; - - /* Insert ourselves in the stream since we're about to walk q_next */ - qprocson(q); + if ((conn_dev = inet_minor_alloc(ip_minor_arena)) == 0) { + netstack_rele(ns); + return (EBUSY); + } + *devp = makedevice(getemajor(*devp), (minor_t)conn_dev); - udp = kmem_cache_alloc(udp_cache, KM_SLEEP); - bzero(udp, sizeof (*udp)); + connp = ipcl_conn_create(IPCL_UDPCONN, KM_SLEEP, ns); + connp->conn_dev = conn_dev; + udp = connp->conn_udp; /* - * UDP is supported only as a module and it has to be pushed directly - * above the device instance of IP. If UDP is pushed anywhere else - * on a stream, it will support just T_SVR4_OPTMGMT_REQ for the - * sake of MIB browsers and fail everything else. + * ipcl_conn_create did a netstack_hold. Undo the hold that was + * done by netstack_find_by_cred() */ - ip_wq = WR(q)->q_next; - if (NOT_OVER_IP(ip_wq)) { - /* Support just SNMP for MIB browsers */ - connp = ipcl_conn_create(IPCL_IPCCONN, KM_SLEEP, - us->us_netstack); - connp->conn_rq = q; - connp->conn_wq = WR(q); - connp->conn_flags |= IPCL_UDPMOD; - connp->conn_cred = credp; - connp->conn_zoneid = zoneid; - connp->conn_udp = udp; - udp->udp_us = us; - udp->udp_connp = connp; - q->q_ptr = WR(q)->q_ptr = connp; - crhold(credp); - q->q_qinfo = &udp_snmp_rinit; - WR(q)->q_qinfo = &udp_snmp_winit; - return (0); - } + netstack_rele(ns); /* * Initialize the udp_t structure for this stream. */ - q = RD(ip_wq); - connp = Q_TO_CONN(q); - mutex_enter(&connp->conn_lock); - connp->conn_proto = IPPROTO_UDP; - connp->conn_flags |= IPCL_UDP; - connp->conn_sqp = IP_SQUEUE_GET(lbolt); - connp->conn_udp = udp; + q->q_ptr = connp; + WR(q)->q_ptr = connp; + connp->conn_rq = q; + connp->conn_wq = WR(q); + + rw_enter(&udp->udp_rwlock, RW_WRITER); + ASSERT(connp->conn_ulp == IPPROTO_UDP); + ASSERT(connp->conn_udp == udp); + ASSERT(udp->udp_connp == connp); /* Set the initial state of the stream and the privilege status. */ - udp->udp_connp = connp; udp->udp_state = TS_UNBND; - udp->udp_mode = UDP_MT_HOT; - if (getmajor(*devp) == (major_t)UDP6_MAJ) { + if (isv6) { udp->udp_family = AF_INET6; udp->udp_ipversion = IPV6_VERSION; udp->udp_max_hdr_len = IPV6_HDR_LEN + UDPH_SIZE; @@ -2938,6 +2445,7 @@ udp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) } udp->udp_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; + udp->udp_pending_op = -1; connp->conn_multicast_loop = IP_DEFAULT_MULTICAST_LOOP; connp->conn_zoneid = zoneid; @@ -2951,41 +2459,45 @@ udp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) if (getpflags(NET_MAC_AWARE, credp) != 0) udp->udp_mac_exempt = B_TRUE; - if (connp->conn_flags & IPCL_SOCKET) { + if (flag & SO_SOCKSTR) { + connp->conn_flags |= IPCL_SOCKET; udp->udp_issocket = B_TRUE; udp->udp_direct_sockfs = B_TRUE; } connp->conn_ulp_labeled = is_system_labeled(); - mutex_exit(&connp->conn_lock); udp->udp_us = us; - /* - * The transmit hiwat/lowat is only looked at on IP's queue. - * Store in q_hiwat in order to return on SO_SNDBUF/SO_RCVBUF - * getsockopts. - */ q->q_hiwat = us->us_recv_hiwat; WR(q)->q_hiwat = us->us_xmit_hiwat; WR(q)->q_lowat = us->us_xmit_lowat; + connp->conn_recv = udp_input; + crhold(credp); + connp->conn_cred = credp; + + mutex_enter(&connp->conn_lock); + connp->conn_state_flags &= ~CONN_INCIPIENT; + mutex_exit(&connp->conn_lock); + + qprocson(q); + if (udp->udp_family == AF_INET6) { /* Build initial header template for transmit */ - if ((err = udp_build_hdrs(q, udp)) != 0) { - /* XXX missing free of connp? crfree? netstack_rele? */ - qprocsoff(UDP_RD(q)); - udp->udp_connp = NULL; - connp->conn_udp = NULL; - kmem_cache_free(udp_cache, udp); + if ((err = udp_build_hdrs(udp)) != 0) { + rw_exit(&udp->udp_rwlock); + qprocsoff(q); + ipcl_conn_destroy(connp); return (err); } } + rw_exit(&udp->udp_rwlock); /* Set the Stream head write offset and high watermark. */ - (void) mi_set_sth_wroff(UDP_RD(q), + (void) mi_set_sth_wroff(q, udp->udp_max_hdr_len + us->us_wroff_extra); - (void) mi_set_sth_hiwat(UDP_RD(q), udp_set_rcv_hiwat(udp, q->q_hiwat)); + (void) mi_set_sth_hiwat(q, udp_set_rcv_hiwat(udp, q->q_hiwat)); return (0); } @@ -3006,7 +2518,7 @@ udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name) */ /* ARGSUSED */ int -udp_opt_default(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) +udp_opt_default(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) { udp_t *udp = Q_TO_UDP(q); udp_stack_t *us = udp->udp_us; @@ -3041,12 +2553,11 @@ udp_opt_default(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) } /* - * This routine retrieves the current status of socket options - * and expects the caller to pass in the queue pointer of the - * upper instance. It returns the size of the option retrieved. + * This routine retrieves the current status of socket options. + * It returns the size of the option retrieved. */ int -udp_opt_get(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) +udp_opt_get_locked(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) { int *i1 = (int *)ptr; conn_t *connp; @@ -3055,7 +2566,6 @@ udp_opt_get(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) int len; udp_stack_t *us; - q = UDP_WR(q); connp = Q_TO_CONN(q); udp = connp->conn_udp; ipp = &udp->udp_sticky_ipp; @@ -3368,13 +2878,26 @@ udp_opt_get(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) return (sizeof (int)); } +int +udp_opt_get(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) +{ + udp_t *udp; + int err; + + udp = Q_TO_UDP(q); + + rw_enter(&udp->udp_rwlock, RW_READER); + err = udp_opt_get_locked(q, level, name, ptr); + rw_exit(&udp->udp_rwlock); + return (err); +} + /* - * This routine sets socket options; it expects the caller - * to pass in the queue pointer of the upper instance. + * This routine sets socket options. */ /* ARGSUSED */ int -udp_opt_set(queue_t *q, uint_t optset_context, int level, +udp_opt_set_locked(queue_t *q, uint_t optset_context, int level, int name, uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, void *thisdg_attrs, cred_t *cr, mblk_t *mblk) { @@ -3387,8 +2910,8 @@ udp_opt_set(queue_t *q, uint_t optset_context, int level, udp_t *udp; uint_t newlen; udp_stack_t *us; + size_t sth_wroff; - q = UDP_WR(q); connp = Q_TO_CONN(q); udp = connp->conn_udp; us = udp->udp_us; @@ -3479,7 +3002,6 @@ udp_opt_set(queue_t *q, uint_t optset_context, int level, } if (!checkonly) { q->q_hiwat = *i1; - WR(UDP_RD(q))->q_hiwat = *i1; } break; case SO_RCVBUF: @@ -3489,9 +3011,10 @@ udp_opt_set(queue_t *q, uint_t optset_context, int level, } if (!checkonly) { RD(q)->q_hiwat = *i1; - UDP_RD(q)->q_hiwat = *i1; - (void) mi_set_sth_hiwat(UDP_RD(q), + rw_exit(&udp->udp_rwlock); + (void) mi_set_sth_hiwat(RD(q), udp_set_rcv_hiwat(udp, *i1)); + rw_enter(&udp->udp_rwlock, RW_WRITER); } break; case SO_DGRAM_ERRIND: @@ -3588,6 +3111,10 @@ udp_opt_set(queue_t *q, uint_t optset_context, int level, if (checkonly) break; + /* + * Update the stored options taking into account + * any CIPSO option which we should not overwrite. + */ if (!tsol_option_set(&udp->udp_ip_snd_options, &udp->udp_ip_snd_options_len, udp->udp_label_len, invalp, inlen)) { @@ -3597,8 +3124,10 @@ udp_opt_set(queue_t *q, uint_t optset_context, int level, udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + udp->udp_ip_snd_options_len; - (void) mi_set_sth_wroff(RD(q), udp->udp_max_hdr_len + - us->us_wroff_extra); + sth_wroff = udp->udp_max_hdr_len + us->us_wroff_extra; + rw_exit(&udp->udp_rwlock); + (void) mi_set_sth_wroff(RD(q), sth_wroff); + rw_enter(&udp->udp_rwlock, RW_WRITER); break; case IP_TTL: @@ -3784,7 +3313,7 @@ udp_opt_set(queue_t *q, uint_t optset_context, int level, ipp->ipp_fields |= IPPF_UNICAST_HOPS; } /* Rebuild the header template */ - error = udp_build_hdrs(q, udp); + error = udp_build_hdrs(udp); if (error != 0) { *outlenp = 0; return (error); @@ -3921,7 +3450,7 @@ udp_opt_set(queue_t *q, uint_t optset_context, int level, ipp->ipp_fields &= ~IPPF_ADDR; } if (sticky) { - error = udp_build_hdrs(q, udp); + error = udp_build_hdrs(udp); if (error != 0) return (error); } @@ -3967,7 +3496,7 @@ udp_opt_set(queue_t *q, uint_t optset_context, int level, ipp->ipp_fields |= IPPF_TCLASS; } if (sticky) { - error = udp_build_hdrs(q, udp); + error = udp_build_hdrs(udp); if (error != 0) return (error); } @@ -4001,7 +3530,7 @@ udp_opt_set(queue_t *q, uint_t optset_context, int level, ipp->ipp_fields &= ~IPPF_NEXTHOP; } if (sticky) { - error = udp_build_hdrs(q, udp); + error = udp_build_hdrs(udp); if (error != 0) return (error); } @@ -4032,7 +3561,7 @@ udp_opt_set(queue_t *q, uint_t optset_context, int level, ipp->ipp_fields |= IPPF_HOPOPTS; } if (sticky) { - error = udp_build_hdrs(q, udp); + error = udp_build_hdrs(udp); if (error != 0) return (error); } @@ -4072,7 +3601,7 @@ udp_opt_set(queue_t *q, uint_t optset_context, int level, ipp->ipp_fields |= IPPF_RTDSTOPTS; } if (sticky) { - error = udp_build_hdrs(q, udp); + error = udp_build_hdrs(udp); if (error != 0) return (error); } @@ -4111,7 +3640,7 @@ udp_opt_set(queue_t *q, uint_t optset_context, int level, ipp->ipp_fields |= IPPF_DSTOPTS; } if (sticky) { - error = udp_build_hdrs(q, udp); + error = udp_build_hdrs(udp); if (error != 0) return (error); } @@ -4150,7 +3679,7 @@ udp_opt_set(queue_t *q, uint_t optset_context, int level, ipp->ipp_fields |= IPPF_RTHDR; } if (sticky) { - error = udp_build_hdrs(q, udp); + error = udp_build_hdrs(udp); if (error != 0) return (error); } @@ -4265,6 +3794,23 @@ udp_opt_set(queue_t *q, uint_t optset_context, int level, return (0); } +int +udp_opt_set(queue_t *q, uint_t optset_context, int level, + int name, uint_t inlen, uchar_t *invalp, uint_t *outlenp, + uchar_t *outvalp, void *thisdg_attrs, cred_t *cr, mblk_t *mblk) +{ + udp_t *udp; + int err; + + udp = Q_TO_UDP(q); + + rw_enter(&udp->udp_rwlock, RW_WRITER); + err = udp_opt_set_locked(q, optset_context, level, name, inlen, invalp, + outlenp, outvalp, thisdg_attrs, cr, mblk); + rw_exit(&udp->udp_rwlock); + return (err); +} + /* * Update udp_sticky_hdrs based on udp_sticky_ipp, udp_v6src, and udp_ttl. * The headers include ip6i_t (if needed), ip6_t, any sticky extension @@ -4272,7 +3818,7 @@ udp_opt_set(queue_t *q, uint_t optset_context, int level, * Returns failure if can't allocate memory. */ static int -udp_build_hdrs(queue_t *q, udp_t *udp) +udp_build_hdrs(udp_t *udp) { udp_stack_t *us = udp->udp_us; uchar_t *hdrs; @@ -4281,7 +3827,9 @@ udp_build_hdrs(queue_t *q, udp_t *udp) ip6i_t *ip6i; udpha_t *udpha; ip6_pkt_t *ipp = &udp->udp_sticky_ipp; + size_t sth_wroff; + ASSERT(RW_WRITE_HELD(&udp->udp_rwlock)); hdrs_len = ip_total_hdrs_len_v6(ipp) + UDPH_SIZE; ASSERT(hdrs_len != 0); if (hdrs_len != udp->udp_sticky_hdrs_len) { @@ -4317,8 +3865,10 @@ udp_build_hdrs(queue_t *q, udp_t *udp) /* Try to get everything in a single mblk */ if (hdrs_len > udp->udp_max_hdr_len) { udp->udp_max_hdr_len = hdrs_len; - (void) mi_set_sth_wroff(RD(q), udp->udp_max_hdr_len + - us->us_wroff_extra); + sth_wroff = udp->udp_max_hdr_len + us->us_wroff_extra; + rw_exit(&udp->udp_rwlock); + (void) mi_set_sth_wroff(udp->udp_connp->conn_rq, sth_wroff); + rw_enter(&udp->udp_rwlock, RW_WRITER); } return (0); } @@ -4556,12 +4106,48 @@ copy_hop_opts(const ip6_pkt_t *ipp, uchar_t *dbuf) return (tlen); } +/* + * Update udp_rcv_opt_len from the packet. + * Called when options received, and when no options received but + * udp_ip_recv_opt_len has previously recorded options. + */ +static void +udp_save_ip_rcv_opt(udp_t *udp, void *opt, int opt_len) +{ + /* Save the options if any */ + if (opt_len > 0) { + if (opt_len > udp->udp_ip_rcv_options_len) { + /* Need to allocate larger buffer */ + if (udp->udp_ip_rcv_options_len != 0) + mi_free((char *)udp->udp_ip_rcv_options); + udp->udp_ip_rcv_options_len = 0; + udp->udp_ip_rcv_options = + (uchar_t *)mi_alloc(opt_len, BPRI_HI); + if (udp->udp_ip_rcv_options != NULL) + udp->udp_ip_rcv_options_len = opt_len; + } + if (udp->udp_ip_rcv_options_len != 0) { + bcopy(opt, udp->udp_ip_rcv_options, opt_len); + /* Adjust length if we are resusing the space */ + udp->udp_ip_rcv_options_len = opt_len; + } + } else if (udp->udp_ip_rcv_options_len != 0) { + /* Clear out previously recorded options */ + mi_free((char *)udp->udp_ip_rcv_options); + udp->udp_ip_rcv_options = NULL; + udp->udp_ip_rcv_options_len = 0; + } +} + +/* ARGSUSED2 */ static void -udp_input(conn_t *connp, mblk_t *mp) +udp_input(void *arg1, mblk_t *mp, void *arg2) { + conn_t *connp = (conn_t *)arg1; struct T_unitdata_ind *tudi; uchar_t *rptr; /* Pointer to IP header */ int hdr_length; /* Length of IP+UDP headers */ + int opt_len; int udi_size; /* Size of T_unitdata_ind */ int mp_len; udp_t *udp; @@ -4574,13 +4160,13 @@ udp_input(conn_t *connp, mblk_t *mp) mblk_t *options_mp = NULL; ip_pktinfo_t *pinfo = NULL; cred_t *cr = NULL; - queue_t *q = connp->conn_rq; pid_t cpid; + uint32_t udp_ip_rcv_options_len; + udp_bits_t udp_bits; cred_t *rcr = connp->conn_cred; udp_stack_t *us; - TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_START, - "udp_rput_start: q %p mp %p", q, mp); + ASSERT(connp->conn_flags & IPCL_UDPCONN); udp = connp->conn_udp; us = udp->udp_us; @@ -4599,7 +4185,7 @@ udp_input(conn_t *connp, mblk_t *mp) IN_PKTINFO) { /* * IP_RECVIF or IP_RECVSLLA or IPF_RECVADDR information - * has been appended to the packet by IP. We need to + * has been prepended to the packet by IP. We need to * extract the mblk and adjust the rptr */ pinfo = (ip_pktinfo_t *)mp->b_rptr; @@ -4611,9 +4197,7 @@ udp_input(conn_t *connp, mblk_t *mp) /* * ICMP messages. */ - udp_icmp_error(q, mp); - TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, - "udp_rput_end: q %p (%S)", q, "m_ctl"); + udp_icmp_error(connp->conn_rq, mp); return; } } @@ -4623,53 +4207,37 @@ udp_input(conn_t *connp, mblk_t *mp) * This is the inbound data path. * First, we check to make sure the IP version number is correct, * and then pull the IP and UDP headers into the first mblk. - * Assume IP provides aligned packets - otherwise toss. - * Also, check if we have a complete IP header. */ /* Initialize regardless if ipversion is IPv4 or IPv6 */ ipp.ipp_fields = 0; ipversion = IPH_HDR_VERSION(rptr); + + rw_enter(&udp->udp_rwlock, RW_READER); + udp_ip_rcv_options_len = udp->udp_ip_rcv_options_len; + udp_bits = udp->udp_bits; + rw_exit(&udp->udp_rwlock); + switch (ipversion) { case IPV4_VERSION: ASSERT(MBLKL(mp) >= sizeof (ipha_t)); ASSERT(((ipha_t *)rptr)->ipha_protocol == IPPROTO_UDP); hdr_length = IPH_HDR_LENGTH(rptr) + UDPH_SIZE; - if ((hdr_length > IP_SIMPLE_HDR_LENGTH + UDPH_SIZE) || - (udp->udp_ip_rcv_options_len)) { + opt_len = hdr_length - (IP_SIMPLE_HDR_LENGTH + UDPH_SIZE); + if ((opt_len > 0 || udp_ip_rcv_options_len > 0) && + udp->udp_family == AF_INET) { /* - * Handle IPv4 packets with options outside of the - * main data path. Not needed for AF_INET6 sockets + * Record/update udp_ip_rcv_options with the lock + * held. Not needed for AF_INET6 sockets * since they don't support a getsockopt of IP_OPTIONS. */ - if (udp->udp_family == AF_INET6) - break; - /* - * UDP length check performed for IPv4 packets with - * options to check whether UDP length specified in - * the header is the same as the physical length of - * the packet. - */ - udpha = (udpha_t *)(rptr + (hdr_length - UDPH_SIZE)); - if (mp_len != (ntohs(udpha->uha_length) + - hdr_length - UDPH_SIZE)) { - goto tossit; - } - /* - * Handle the case where the packet has IP options - * and the IP_RECVSLLA & IP_RECVIF are set - */ - if (pinfo != NULL) - mp = options_mp; - udp_become_writer(connp, mp, udp_rput_other_wrapper, - SQTAG_UDP_INPUT); - TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, - "udp_rput_end: q %p (%S)", q, "end"); - return; + rw_enter(&udp->udp_rwlock, RW_WRITER); + udp_save_ip_rcv_opt(udp, rptr + IP_SIMPLE_HDR_LENGTH, + opt_len); + rw_exit(&udp->udp_rwlock); } - - /* Handle IPV6_RECVHOPLIMIT. */ + /* Handle IPV6_RECVPKTINFO even for IPv4 packet. */ if ((udp->udp_family == AF_INET6) && (pinfo != NULL) && udp->udp_ip_recvpktinfo) { if (pinfo->ip_pkt_flags & IPF_RECVIF) { @@ -4735,8 +4303,9 @@ udp_input(conn_t *connp, mblk_t *mp) /* * IP inspected the UDP header thus all of it must be in the mblk. * UDP length check is performed for IPv6 packets and IPv4 packets - * without options to check if the size of the packet as specified + * to check if the size of the packet as specified * by the header is the same as the physical size of the packet. + * FIXME? Didn't IP already check this? */ udpha = (udpha_t *)(rptr + (hdr_length - UDPH_SIZE)); if ((MBLKL(mp) < hdr_length) || @@ -4744,8 +4313,9 @@ udp_input(conn_t *connp, mblk_t *mp) goto tossit; } - /* Walk past the headers. */ - if (!udp->udp_rcvhdr) { + + /* Walk past the headers unless IP_RECVHDR was set. */ + if (!udp_bits.udpb_rcvhdr) { mp->b_rptr = rptr + hdr_length; mp_len -= hdr_length; } @@ -4760,56 +4330,62 @@ udp_input(conn_t *connp, mblk_t *mp) ASSERT(IPH_HDR_VERSION((ipha_t *)rptr) == IPV4_VERSION); /* - * Normally only send up the address. + * Normally only send up the source address. * If IP_RECVDSTADDR is set we include the destination IP * address as an option. With IP_RECVOPTS we include all - * the IP options. Only ip_rput_other() handles packets - * that contain IP options. + * the IP options. */ udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t); - if (udp->udp_recvdstaddr) { + if (udp_bits.udpb_recvdstaddr) { udi_size += sizeof (struct T_opthdr) + sizeof (struct in_addr); UDP_STAT(us, udp_in_recvdstaddr); } - if (udp->udp_ip_recvpktinfo && (pinfo != NULL) && + if (udp_bits.udpb_ip_recvpktinfo && (pinfo != NULL) && (pinfo->ip_pkt_flags & IPF_RECVADDR)) { udi_size += sizeof (struct T_opthdr) + sizeof (struct in_pktinfo); - UDP_STAT(us, udp_ip_recvpktinfo); + UDP_STAT(us, udp_ip_rcvpktinfo); + } + + if ((udp_bits.udpb_recvopts) && opt_len > 0) { + udi_size += sizeof (struct T_opthdr) + opt_len; + UDP_STAT(us, udp_in_recvopts); } /* * If the IP_RECVSLLA or the IP_RECVIF is set then allocate * space accordingly */ - if (udp->udp_recvif && (pinfo != NULL) && + if ((udp_bits.udpb_recvif) && (pinfo != NULL) && (pinfo->ip_pkt_flags & IPF_RECVIF)) { udi_size += sizeof (struct T_opthdr) + sizeof (uint_t); UDP_STAT(us, udp_in_recvif); } - if (udp->udp_recvslla && (pinfo != NULL) && + if ((udp_bits.udpb_recvslla) && (pinfo != NULL) && (pinfo->ip_pkt_flags & IPF_RECVSLLA)) { udi_size += sizeof (struct T_opthdr) + sizeof (struct sockaddr_dl); UDP_STAT(us, udp_in_recvslla); } - if (udp->udp_recvucred && (cr = DB_CRED(mp)) != NULL) { + if ((udp_bits.udpb_recvucred) && + (cr = DB_CRED(mp)) != NULL) { udi_size += sizeof (struct T_opthdr) + ucredsize; cpid = DB_CPID(mp); UDP_STAT(us, udp_in_recvucred); } + /* XXX FIXME: apply to AF_INET6 as well */ /* * If SO_TIMESTAMP is set allocate the appropriate sized * buffer. Since gethrestime() expects a pointer aligned * argument, we allocate space necessary for extra * alignment (even though it might not be used). */ - if (udp->udp_timestamp) { + if (udp_bits.udpb_timestamp) { udi_size += sizeof (struct T_opthdr) + sizeof (timestruc_t) + _POINTER_ALIGNMENT; UDP_STAT(us, udp_in_timestamp); @@ -4818,11 +4394,10 @@ udp_input(conn_t *connp, mblk_t *mp) /* * If IP_RECVTTL is set allocate the appropriate sized buffer */ - if (udp->udp_recvttl) { + if (udp_bits.udpb_recvttl) { udi_size += sizeof (struct T_opthdr) + sizeof (uint8_t); UDP_STAT(us, udp_in_recvttl); } - ASSERT(IPH_HDR_LENGTH((ipha_t *)rptr) == IP_SIMPLE_HDR_LENGTH); /* Allocate a message block for the T_UNITDATA_IND structure. */ mp1 = allocb(udi_size, BPRI_MED); @@ -4830,9 +4405,7 @@ udp_input(conn_t *connp, mblk_t *mp) freemsg(mp); if (options_mp != NULL) freeb(options_mp); - TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, - "udp_rput_end: q %p (%S)", q, "allocbfail"); - BUMP_MIB(&udp->udp_mib, udpInErrors); + BUMP_MIB(&us->us_udp_mib, udpInErrors); return; } mp1->b_cont = mp; @@ -4866,7 +4439,7 @@ udp_input(conn_t *connp, mblk_t *mp) char *dstopt; dstopt = (char *)&sin[1]; - if (udp->udp_recvdstaddr) { + if (udp_bits.udpb_recvdstaddr) { struct T_opthdr *toh; ipaddr_t *dstptr; @@ -4879,11 +4452,26 @@ udp_input(conn_t *connp, mblk_t *mp) dstopt += sizeof (struct T_opthdr); dstptr = (ipaddr_t *)dstopt; *dstptr = ((ipha_t *)rptr)->ipha_dst; - dstopt = (char *)toh + toh->len; + dstopt += sizeof (ipaddr_t); + udi_size -= toh->len; + } + + if (udp_bits.udpb_recvopts && opt_len > 0) { + struct T_opthdr *toh; + + toh = (struct T_opthdr *)dstopt; + toh->level = IPPROTO_IP; + toh->name = IP_RECVOPTS; + toh->len = sizeof (struct T_opthdr) + opt_len; + toh->status = 0; + dstopt += sizeof (struct T_opthdr); + bcopy(rptr + IP_SIMPLE_HDR_LENGTH, dstopt, + opt_len); + dstopt += opt_len; udi_size -= toh->len; } - if (udp->udp_ip_recvpktinfo && (pinfo != NULL) && + if ((udp_bits.udpb_ip_recvpktinfo) && (pinfo != NULL) && (pinfo->ip_pkt_flags & IPF_RECVADDR)) { struct T_opthdr *toh; struct in_pktinfo *pktinfop; @@ -4906,7 +4494,7 @@ udp_input(conn_t *connp, mblk_t *mp) udi_size -= toh->len; } - if (udp->udp_recvslla && (pinfo != NULL) && + if ((udp_bits.udpb_recvslla) && (pinfo != NULL) && (pinfo->ip_pkt_flags & IPF_RECVSLLA)) { struct T_opthdr *toh; @@ -4922,11 +4510,11 @@ udp_input(conn_t *connp, mblk_t *mp) dstptr = (struct sockaddr_dl *)dstopt; bcopy(&pinfo->ip_pkt_slla, dstptr, sizeof (struct sockaddr_dl)); - dstopt = (char *)toh + toh->len; + dstopt += sizeof (struct sockaddr_dl); udi_size -= toh->len; } - if (udp->udp_recvif && (pinfo != NULL) && + if ((udp_bits.udpb_recvif) && (pinfo != NULL) && (pinfo->ip_pkt_flags & IPF_RECVIF)) { struct T_opthdr *toh; @@ -4941,7 +4529,7 @@ udp_input(conn_t *connp, mblk_t *mp) dstopt += sizeof (struct T_opthdr); dstptr = (uint_t *)dstopt; *dstptr = pinfo->ip_pkt_ifindex; - dstopt = (char *)toh + toh->len; + dstopt += sizeof (uint_t); udi_size -= toh->len; } @@ -4953,12 +4541,13 @@ udp_input(conn_t *connp, mblk_t *mp) toh->name = SCM_UCRED; toh->len = sizeof (struct T_opthdr) + ucredsize; toh->status = 0; - (void) cred2ucred(cr, cpid, &toh[1], rcr); - dstopt = (char *)toh + toh->len; + dstopt += sizeof (struct T_opthdr); + (void) cred2ucred(cr, cpid, dstopt, rcr); + dstopt += ucredsize; udi_size -= toh->len; } - if (udp->udp_timestamp) { + if (udp_bits.udpb_timestamp) { struct T_opthdr *toh; toh = (struct T_opthdr *)dstopt; @@ -4984,7 +4573,7 @@ udp_input(conn_t *connp, mblk_t *mp) * any option processing after this will * cause alignment panic. */ - if (udp->udp_recvttl) { + if (udp_bits.udpb_recvttl) { struct T_opthdr *toh; uint8_t *dstptr; @@ -4997,7 +4586,7 @@ udp_input(conn_t *connp, mblk_t *mp) dstopt += sizeof (struct T_opthdr); dstptr = (uint8_t *)dstopt; *dstptr = ((ipha_t *)rptr)->ipha_ttl; - dstopt = (char *)toh + toh->len; + dstopt += sizeof (uint8_t); udi_size -= toh->len; } @@ -5013,15 +4602,12 @@ udp_input(conn_t *connp, mblk_t *mp) * Normally we only send up the address. If receiving of any * optional receive side information is enabled, we also send * that up as options. - * [ Only udp_rput_other() handles packets that contain IP - * options so code to account for does not appear immediately - * below but elsewhere ] */ udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t); if (ipp.ipp_fields & (IPPF_HOPOPTS|IPPF_DSTOPTS|IPPF_RTDSTOPTS| IPPF_RTHDR|IPPF_IFINDEX)) { - if (udp->udp_ipv6_recvhopopts && + if ((udp_bits.udpb_ipv6_recvhopopts) && (ipp.ipp_fields & IPPF_HOPOPTS)) { size_t hlen; @@ -5031,29 +4617,29 @@ udp_input(conn_t *connp, mblk_t *mp) ipp.ipp_fields &= ~IPPF_HOPOPTS; udi_size += hlen; } - if ((udp->udp_ipv6_recvdstopts || - udp->udp_old_ipv6_recvdstopts) && + if (((udp_bits.udpb_ipv6_recvdstopts) || + udp_bits.udpb_old_ipv6_recvdstopts) && (ipp.ipp_fields & IPPF_DSTOPTS)) { udi_size += sizeof (struct T_opthdr) + ipp.ipp_dstoptslen; UDP_STAT(us, udp_in_recvdstopts); } - if (((udp->udp_ipv6_recvdstopts && - udp->udp_ipv6_recvrthdr && + if ((((udp_bits.udpb_ipv6_recvdstopts) && + udp_bits.udpb_ipv6_recvrthdr && (ipp.ipp_fields & IPPF_RTHDR)) || - udp->udp_ipv6_recvrthdrdstopts) && + (udp_bits.udpb_ipv6_recvrthdrdstopts)) && (ipp.ipp_fields & IPPF_RTDSTOPTS)) { udi_size += sizeof (struct T_opthdr) + ipp.ipp_rtdstoptslen; UDP_STAT(us, udp_in_recvrtdstopts); } - if (udp->udp_ipv6_recvrthdr && + if ((udp_bits.udpb_ipv6_recvrthdr) && (ipp.ipp_fields & IPPF_RTHDR)) { udi_size += sizeof (struct T_opthdr) + ipp.ipp_rthdrlen; UDP_STAT(us, udp_in_recvrthdr); } - if (udp->udp_ip_recvpktinfo && + if ((udp_bits.udpb_ip_recvpktinfo) && (ipp.ipp_fields & IPPF_IFINDEX)) { udi_size += sizeof (struct T_opthdr) + sizeof (struct in6_pktinfo); @@ -5061,18 +4647,19 @@ udp_input(conn_t *connp, mblk_t *mp) } } - if (udp->udp_recvucred && (cr = DB_CRED(mp)) != NULL) { + if ((udp_bits.udpb_recvucred) && + (cr = DB_CRED(mp)) != NULL) { udi_size += sizeof (struct T_opthdr) + ucredsize; cpid = DB_CPID(mp); UDP_STAT(us, udp_in_recvucred); } - if (udp->udp_ipv6_recvhoplimit) { + if (udp_bits.udpb_ipv6_recvhoplimit) { udi_size += sizeof (struct T_opthdr) + sizeof (int); UDP_STAT(us, udp_in_recvhoplimit); } - if (udp->udp_ipv6_recvtclass) { + if (udp_bits.udpb_ipv6_recvtclass) { udi_size += sizeof (struct T_opthdr) + sizeof (int); UDP_STAT(us, udp_in_recvtclass); } @@ -5082,9 +4669,7 @@ udp_input(conn_t *connp, mblk_t *mp) freemsg(mp); if (options_mp != NULL) freeb(options_mp); - TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, - "udp_rput_end: q %p (%S)", q, "allocbfail"); - BUMP_MIB(&udp->udp_mib, udpInErrors); + BUMP_MIB(&us->us_udp_mib, udpInErrors); return; } mp1->b_cont = mp; @@ -5132,7 +4717,7 @@ udp_input(conn_t *connp, mblk_t *mp) uchar_t *dstopt; dstopt = (uchar_t *)&sin6[1]; - if (udp->udp_ip_recvpktinfo && + if ((udp_bits.udpb_ip_recvpktinfo) && (ipp.ipp_fields & IPPF_IFINDEX)) { struct T_opthdr *toh; struct in6_pktinfo *pkti; @@ -5155,7 +4740,7 @@ udp_input(conn_t *connp, mblk_t *mp) dstopt += sizeof (*pkti); udi_size -= toh->len; } - if (udp->udp_ipv6_recvhoplimit) { + if (udp_bits.udpb_ipv6_recvhoplimit) { struct T_opthdr *toh; toh = (struct T_opthdr *)dstopt; @@ -5173,7 +4758,7 @@ udp_input(conn_t *connp, mblk_t *mp) dstopt += sizeof (uint_t); udi_size -= toh->len; } - if (udp->udp_ipv6_recvtclass) { + if (udp_bits.udpb_ipv6_recvtclass) { struct T_opthdr *toh; toh = (struct T_opthdr *)dstopt; @@ -5194,7 +4779,7 @@ udp_input(conn_t *connp, mblk_t *mp) dstopt += sizeof (uint_t); udi_size -= toh->len; } - if (udp->udp_ipv6_recvhopopts && + if ((udp_bits.udpb_ipv6_recvhopopts) && (ipp.ipp_fields & IPPF_HOPOPTS)) { size_t hlen; @@ -5202,8 +4787,8 @@ udp_input(conn_t *connp, mblk_t *mp) dstopt += hlen; udi_size -= hlen; } - if (udp->udp_ipv6_recvdstopts && - udp->udp_ipv6_recvrthdr && + if ((udp_bits.udpb_ipv6_recvdstopts) && + (udp_bits.udpb_ipv6_recvrthdr) && (ipp.ipp_fields & IPPF_RTHDR) && (ipp.ipp_fields & IPPF_RTDSTOPTS)) { struct T_opthdr *toh; @@ -5220,7 +4805,7 @@ udp_input(conn_t *connp, mblk_t *mp) dstopt += ipp.ipp_rtdstoptslen; udi_size -= toh->len; } - if (udp->udp_ipv6_recvrthdr && + if ((udp_bits.udpb_ipv6_recvrthdr) && (ipp.ipp_fields & IPPF_RTHDR)) { struct T_opthdr *toh; @@ -5235,7 +4820,7 @@ udp_input(conn_t *connp, mblk_t *mp) dstopt += ipp.ipp_rthdrlen; udi_size -= toh->len; } - if (udp->udp_ipv6_recvdstopts && + if ((udp_bits.udpb_ipv6_recvdstopts) && (ipp.ipp_fields & IPPF_DSTOPTS)) { struct T_opthdr *toh; @@ -5271,20 +4856,18 @@ udp_input(conn_t *connp, mblk_t *mp) /* No IP_RECVDSTADDR for IPv6. */ } - BUMP_MIB(&udp->udp_mib, udpHCInDatagrams); - TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, - "udp_rput_end: q %p (%S)", q, "end"); + BUMP_MIB(&us->us_udp_mib, udpHCInDatagrams); if (options_mp != NULL) freeb(options_mp); - if (udp->udp_direct_sockfs) { + if (udp_bits.udpb_direct_sockfs) { /* * There is nothing above us except for the stream head; * use the read-side synchronous stream interface in * order to reduce the time spent in interrupt thread. */ ASSERT(udp->udp_issocket); - udp_rcv_enqueue(UDP_RD(q), udp, mp, mp_len); + udp_rcv_enqueue(connp->conn_rq, udp, mp, mp_len); } else { /* * Use regular STREAMS interface to pass data upstream @@ -5292,7 +4875,7 @@ udp_input(conn_t *connp, mblk_t *mp) * switched over to the slow mode due to sockmod being * popped or a module being pushed on top of us. */ - putnext(UDP_RD(q), mp); + putnext(connp->conn_rq, mp); } return; @@ -5300,472 +4883,79 @@ tossit: freemsg(mp); if (options_mp != NULL) freeb(options_mp); - BUMP_MIB(&udp->udp_mib, udpInErrors); -} - -void -udp_conn_recv(conn_t *connp, mblk_t *mp) -{ - _UDP_ENTER(connp, mp, udp_input_wrapper, SQTAG_UDP_FANOUT); -} - -/* ARGSUSED */ -static void -udp_input_wrapper(void *arg, mblk_t *mp, void *arg2) -{ - udp_input((conn_t *)arg, mp); - _UDP_EXIT((conn_t *)arg); + BUMP_MIB(&us->us_udp_mib, udpInErrors); } /* - * Process non-M_DATA messages as well as M_DATA messages that requires - * modifications to udp_ip_rcv_options i.e. IPv4 packets with IP options. + * Handle the results of a T_BIND_REQ whether deferred by IP or handled + * immediately. */ static void -udp_rput_other(queue_t *q, mblk_t *mp) +udp_bind_result(conn_t *connp, mblk_t *mp) { - struct T_unitdata_ind *tudi; - mblk_t *mp1; - uchar_t *rptr; - uchar_t *new_rptr; - int hdr_length; - int udi_size; /* Size of T_unitdata_ind */ - int opt_len; /* Length of IP options */ - sin_t *sin; struct T_error_ack *tea; - mblk_t *options_mp = NULL; - ip_pktinfo_t *pinfo; - boolean_t recv_on = B_FALSE; - cred_t *cr = NULL; - udp_t *udp = Q_TO_UDP(q); - pid_t cpid; - cred_t *rcr = udp->udp_connp->conn_cred; - udp_stack_t *us = udp->udp_us; - - TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_START, - "udp_rput_other: q %p mp %p", q, mp); - - ASSERT(OK_32PTR(mp->b_rptr)); - rptr = mp->b_rptr; switch (mp->b_datap->db_type) { - case M_CTL: - /* - * We are here only if IP_RECVSLLA and/or IP_RECVIF are set - */ - recv_on = B_TRUE; - options_mp = mp; - pinfo = (ip_pktinfo_t *)options_mp->b_rptr; - - /* - * The actual data is in mp->b_cont - */ - mp = mp->b_cont; - ASSERT(OK_32PTR(mp->b_rptr)); - rptr = mp->b_rptr; - break; - case M_DATA: - /* - * M_DATA messages contain IPv4 datagrams. They are handled - * after this switch. - */ - break; case M_PROTO: case M_PCPROTO: /* M_PROTO messages contain some type of TPI message. */ - ASSERT((uintptr_t)(mp->b_wptr - rptr) <= (uintptr_t)INT_MAX); - if (mp->b_wptr - rptr < sizeof (t_scalar_t)) { + ASSERT((uintptr_t)(mp->b_wptr - mp->b_rptr) <= + (uintptr_t)INT_MAX); + if (mp->b_wptr - mp->b_rptr < sizeof (t_scalar_t)) { freemsg(mp); - TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, - "udp_rput_other_end: q %p (%S)", q, "protoshort"); return; } - tea = (struct T_error_ack *)rptr; + tea = (struct T_error_ack *)mp->b_rptr; switch (tea->PRIM_type) { case T_ERROR_ACK: switch (tea->ERROR_prim) { case O_T_BIND_REQ: - case T_BIND_REQ: { - /* - * If our O_T_BIND_REQ/T_BIND_REQ fails, - * clear out the associated port and source - * address before passing the message - * upstream. If this was caused by a T_CONN_REQ - * revert back to bound state. - */ - udp_fanout_t *udpf; - - udpf = &us->us_bind_fanout[UDP_BIND_HASH( - udp->udp_port, us->us_bind_fanout_size)]; - mutex_enter(&udpf->uf_lock); - if (udp->udp_state == TS_DATA_XFER) { - /* Connect failed */ - tea->ERROR_prim = T_CONN_REQ; - /* Revert back to the bound source */ - udp->udp_v6src = udp->udp_bound_v6src; - udp->udp_state = TS_IDLE; - mutex_exit(&udpf->uf_lock); - if (udp->udp_family == AF_INET6) - (void) udp_build_hdrs(q, udp); - break; - } - - if (udp->udp_discon_pending) { - tea->ERROR_prim = T_DISCON_REQ; - udp->udp_discon_pending = 0; - } - V6_SET_ZERO(udp->udp_v6src); - V6_SET_ZERO(udp->udp_bound_v6src); - udp->udp_state = TS_UNBND; - udp_bind_hash_remove(udp, B_TRUE); - udp->udp_port = 0; - mutex_exit(&udpf->uf_lock); - if (udp->udp_family == AF_INET6) - (void) udp_build_hdrs(q, udp); - break; - } + case T_BIND_REQ: + udp_bind_error(connp, mp); + return; default: break; } - break; - case T_BIND_ACK: - udp_rput_bind_ack(q, mp); - return; - - case T_OPTMGMT_ACK: - case T_OK_ACK: - break; - default: + ASSERT(0); freemsg(mp); return; - } - putnext(UDP_RD(q), mp); - return; - } - /* - * This is the inbound data path. - * First, we make sure the data contains both IP and UDP headers. - * - * This handle IPv4 packets for only AF_INET sockets. - * AF_INET6 sockets can never access udp_ip_rcv_options thus there - * is no need saving the options. - */ - ASSERT(IPH_HDR_VERSION((ipha_t *)rptr) == IPV4_VERSION); - hdr_length = IPH_HDR_LENGTH(rptr) + UDPH_SIZE; - if (mp->b_wptr - rptr < hdr_length) { - if (!pullupmsg(mp, hdr_length)) { - freemsg(mp); - if (options_mp != NULL) - freeb(options_mp); - BUMP_MIB(&udp->udp_mib, udpInErrors); - TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, - "udp_rput_other_end: q %p (%S)", q, "hdrshort"); + case T_BIND_ACK: + udp_bind_ack(connp, mp); return; - } - rptr = mp->b_rptr; - } - /* Walk past the headers. */ - new_rptr = rptr + hdr_length; - if (!udp->udp_rcvhdr) - mp->b_rptr = new_rptr; - /* Save the options if any */ - opt_len = hdr_length - (IP_SIMPLE_HDR_LENGTH + UDPH_SIZE); - if (opt_len > 0) { - if (opt_len > udp->udp_ip_rcv_options_len) { - if (udp->udp_ip_rcv_options_len) - mi_free((char *)udp->udp_ip_rcv_options); - udp->udp_ip_rcv_options_len = 0; - udp->udp_ip_rcv_options = - (uchar_t *)mi_alloc(opt_len, BPRI_HI); - if (udp->udp_ip_rcv_options) - udp->udp_ip_rcv_options_len = opt_len; - } - if (udp->udp_ip_rcv_options_len) { - bcopy(rptr + IP_SIMPLE_HDR_LENGTH, - udp->udp_ip_rcv_options, opt_len); - /* Adjust length if we are resusing the space */ - udp->udp_ip_rcv_options_len = opt_len; + default: + break; } - } else if (udp->udp_ip_rcv_options_len) { - mi_free((char *)udp->udp_ip_rcv_options); - udp->udp_ip_rcv_options = NULL; - udp->udp_ip_rcv_options_len = 0; - } - - /* - * Normally only send up the address. - * If IP_RECVDSTADDR is set we include the destination IP - * address as an option. With IP_RECVOPTS we include all - * the IP options. - */ - udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t); - if (udp->udp_recvdstaddr) { - udi_size += sizeof (struct T_opthdr) + sizeof (struct in_addr); - UDP_STAT(us, udp_in_recvdstaddr); - } - - if (udp->udp_ip_recvpktinfo && recv_on && - (pinfo->ip_pkt_flags & IPF_RECVADDR)) { - udi_size += sizeof (struct T_opthdr) + - sizeof (struct in_pktinfo); - UDP_STAT(us, udp_ip_recvpktinfo); - } - - if (udp->udp_recvopts && opt_len > 0) { - udi_size += sizeof (struct T_opthdr) + opt_len; - UDP_STAT(us, udp_in_recvopts); - } - - /* - * If the IP_RECVSLLA or the IP_RECVIF is set then allocate - * space accordingly - */ - if (udp->udp_recvif && recv_on && - (pinfo->ip_pkt_flags & IPF_RECVIF)) { - udi_size += sizeof (struct T_opthdr) + sizeof (uint_t); - UDP_STAT(us, udp_in_recvif); - } - - if (udp->udp_recvslla && recv_on && - (pinfo->ip_pkt_flags & IPF_RECVSLLA)) { - udi_size += sizeof (struct T_opthdr) + - sizeof (struct sockaddr_dl); - UDP_STAT(us, udp_in_recvslla); - } - - if (udp->udp_recvucred && (cr = DB_CRED(mp)) != NULL) { - udi_size += sizeof (struct T_opthdr) + ucredsize; - cpid = DB_CPID(mp); - UDP_STAT(us, udp_in_recvucred); - } - /* - * If IP_RECVTTL is set allocate the appropriate sized buffer - */ - if (udp->udp_recvttl) { - udi_size += sizeof (struct T_opthdr) + sizeof (uint8_t); - UDP_STAT(us, udp_in_recvttl); - } - - /* Allocate a message block for the T_UNITDATA_IND structure. */ - mp1 = allocb(udi_size, BPRI_MED); - if (mp1 == NULL) { freemsg(mp); - if (options_mp != NULL) - freeb(options_mp); - TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, - "udp_rput_other_end: q %p (%S)", q, "allocbfail"); - BUMP_MIB(&udp->udp_mib, udpInErrors); + return; + default: + /* FIXME: other cases? */ + ASSERT(0); + freemsg(mp); return; } - mp1->b_cont = mp; - mp = mp1; - mp->b_datap->db_type = M_PROTO; - tudi = (struct T_unitdata_ind *)mp->b_rptr; - mp->b_wptr = (uchar_t *)tudi + udi_size; - tudi->PRIM_type = T_UNITDATA_IND; - tudi->SRC_length = sizeof (sin_t); - tudi->SRC_offset = sizeof (struct T_unitdata_ind); - tudi->OPT_offset = sizeof (struct T_unitdata_ind) + sizeof (sin_t); - udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t)); - tudi->OPT_length = udi_size; - - sin = (sin_t *)&tudi[1]; - sin->sin_addr.s_addr = ((ipha_t *)rptr)->ipha_src; - sin->sin_port = ((in_port_t *) - new_rptr)[-(UDPH_SIZE/sizeof (in_port_t))]; - sin->sin_family = AF_INET; - *(uint32_t *)&sin->sin_zero[0] = 0; - *(uint32_t *)&sin->sin_zero[4] = 0; - - /* - * Add options if IP_RECVDSTADDR, IP_RECVIF, IP_RECVSLLA or - * IP_RECVTTL has been set. - */ - if (udi_size != 0) { - /* - * Copy in destination address before options to avoid any - * padding issues. - */ - char *dstopt; - - dstopt = (char *)&sin[1]; - if (udp->udp_recvdstaddr) { - struct T_opthdr *toh; - ipaddr_t *dstptr; - - toh = (struct T_opthdr *)dstopt; - toh->level = IPPROTO_IP; - toh->name = IP_RECVDSTADDR; - toh->len = sizeof (struct T_opthdr) + sizeof (ipaddr_t); - toh->status = 0; - dstopt += sizeof (struct T_opthdr); - dstptr = (ipaddr_t *)dstopt; - *dstptr = (((ipaddr_t *)rptr)[4]); - dstopt += sizeof (ipaddr_t); - udi_size -= toh->len; - } - if (udp->udp_recvopts && udi_size != 0) { - struct T_opthdr *toh; - - toh = (struct T_opthdr *)dstopt; - toh->level = IPPROTO_IP; - toh->name = IP_RECVOPTS; - toh->len = sizeof (struct T_opthdr) + opt_len; - toh->status = 0; - dstopt += sizeof (struct T_opthdr); - bcopy(rptr + IP_SIMPLE_HDR_LENGTH, dstopt, opt_len); - dstopt += opt_len; - udi_size -= toh->len; - } - if (udp->udp_ip_recvpktinfo && recv_on && - (pinfo->ip_pkt_flags & IPF_RECVADDR)) { - - struct T_opthdr *toh; - struct in_pktinfo *pktinfop; - - toh = (struct T_opthdr *)dstopt; - toh->level = IPPROTO_IP; - toh->name = IP_PKTINFO; - toh->len = sizeof (struct T_opthdr) + - sizeof (*pktinfop); - toh->status = 0; - dstopt += sizeof (struct T_opthdr); - pktinfop = (struct in_pktinfo *)dstopt; - pktinfop->ipi_ifindex = pinfo->ip_pkt_ifindex; - pktinfop->ipi_spec_dst = pinfo->ip_pkt_match_addr; - - pktinfop->ipi_addr.s_addr = ((ipha_t *)rptr)->ipha_dst; - - dstopt += sizeof (struct in_pktinfo); - udi_size -= toh->len; - } - - if (udp->udp_recvslla && recv_on && - (pinfo->ip_pkt_flags & IPF_RECVSLLA)) { - - struct T_opthdr *toh; - struct sockaddr_dl *dstptr; - - toh = (struct T_opthdr *)dstopt; - toh->level = IPPROTO_IP; - toh->name = IP_RECVSLLA; - toh->len = sizeof (struct T_opthdr) + - sizeof (struct sockaddr_dl); - toh->status = 0; - dstopt += sizeof (struct T_opthdr); - dstptr = (struct sockaddr_dl *)dstopt; - bcopy(&pinfo->ip_pkt_slla, dstptr, - sizeof (struct sockaddr_dl)); - dstopt += sizeof (struct sockaddr_dl); - udi_size -= toh->len; - } - - if (udp->udp_recvif && recv_on && - (pinfo->ip_pkt_flags & IPF_RECVIF)) { - - struct T_opthdr *toh; - uint_t *dstptr; - - toh = (struct T_opthdr *)dstopt; - toh->level = IPPROTO_IP; - toh->name = IP_RECVIF; - toh->len = sizeof (struct T_opthdr) + - sizeof (uint_t); - toh->status = 0; - dstopt += sizeof (struct T_opthdr); - dstptr = (uint_t *)dstopt; - *dstptr = pinfo->ip_pkt_ifindex; - dstopt += sizeof (uint_t); - udi_size -= toh->len; - } - - if (cr != NULL) { - struct T_opthdr *toh; - - toh = (struct T_opthdr *)dstopt; - toh->level = SOL_SOCKET; - toh->name = SCM_UCRED; - toh->len = sizeof (struct T_opthdr) + ucredsize; - toh->status = 0; - (void) cred2ucred(cr, cpid, &toh[1], rcr); - dstopt += toh->len; - udi_size -= toh->len; - } - - if (udp->udp_recvttl) { - struct T_opthdr *toh; - uint8_t *dstptr; - - toh = (struct T_opthdr *)dstopt; - toh->level = IPPROTO_IP; - toh->name = IP_RECVTTL; - toh->len = sizeof (struct T_opthdr) + - sizeof (uint8_t); - toh->status = 0; - dstopt += sizeof (struct T_opthdr); - dstptr = (uint8_t *)dstopt; - *dstptr = ((ipha_t *)rptr)->ipha_ttl; - dstopt += sizeof (uint8_t); - udi_size -= toh->len; - } - - ASSERT(udi_size == 0); /* "Consumed" all of allocated space */ - } - BUMP_MIB(&udp->udp_mib, udpHCInDatagrams); - TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, - "udp_rput_other_end: q %p (%S)", q, "end"); - if (options_mp != NULL) - freeb(options_mp); - - if (udp->udp_direct_sockfs) { - /* - * There is nothing above us except for the stream head; - * use the read-side synchronous stream interface in - * order to reduce the time spent in interrupt thread. - */ - ASSERT(udp->udp_issocket); - udp_rcv_enqueue(UDP_RD(q), udp, mp, msgdsize(mp)); - } else { - /* - * Use regular STREAMS interface to pass data upstream - * if this is not a socket endpoint, or if we have - * switched over to the slow mode due to sockmod being - * popped or a module being pushed on top of us. - */ - putnext(UDP_RD(q), mp); - } -} - -/* ARGSUSED */ -static void -udp_rput_other_wrapper(void *arg, mblk_t *mp, void *arg2) -{ - conn_t *connp = arg; - - udp_rput_other(connp->conn_rq, mp); - udp_exit(connp); } /* * Process a T_BIND_ACK */ static void -udp_rput_bind_ack(queue_t *q, mblk_t *mp) +udp_bind_ack(conn_t *connp, mblk_t *mp) { - udp_t *udp = Q_TO_UDP(q); + udp_t *udp = connp->conn_udp; mblk_t *mp1; ire_t *ire; struct T_bind_ack *tba; uchar_t *addrp; ipa_conn_t *ac; ipa6_conn_t *ac6; + udp_fanout_t *udpf; + udp_stack_t *us = udp->udp_us; - if (udp->udp_discon_pending) - udp->udp_discon_pending = 0; - + ASSERT(udp->udp_pending_op != -1); + rw_enter(&udp->udp_rwlock, RW_WRITER); /* * If a broadcast/multicast address was bound set * the source address to 0. @@ -5786,12 +4976,18 @@ udp_rput_bind_ack(queue_t *q, mblk_t *mp) * Note: we get IRE_BROADCAST for IPv6 to "mark" a multicast * local address. */ + udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, + us->us_bind_fanout_size)]; if (ire->ire_type == IRE_BROADCAST && udp->udp_state != TS_DATA_XFER) { + ASSERT(udp->udp_pending_op == T_BIND_REQ || + udp->udp_pending_op == O_T_BIND_REQ); /* This was just a local bind to a broadcast addr */ + mutex_enter(&udpf->uf_lock); V6_SET_ZERO(udp->udp_v6src); + mutex_exit(&udpf->uf_lock); if (udp->udp_family == AF_INET6) - (void) udp_build_hdrs(q, udp); + (void) udp_build_hdrs(udp); } else if (V6_OR_V4_INADDR_ANY(udp->udp_v6src)) { /* * Local address not yet set - pick it from the @@ -5808,8 +5004,10 @@ udp_rput_bind_ack(queue_t *q, mblk_t *mp) sizeof (ipa_conn_x_t)); ac = &((ipa_conn_x_t *)addrp)->acx_conn; } + mutex_enter(&udpf->uf_lock); IN6_IPADDR_TO_V4MAPPED(ac->ac_laddr, &udp->udp_v6src); + mutex_exit(&udpf->uf_lock); break; case AF_INET6: if (tba->ADDR_length == sizeof (ipa6_conn_t)) { @@ -5820,13 +5018,17 @@ udp_rput_bind_ack(queue_t *q, mblk_t *mp) ac6 = &((ipa6_conn_x_t *) addrp)->ac6x_conn; } + mutex_enter(&udpf->uf_lock); udp->udp_v6src = ac6->ac6_laddr; - (void) udp_build_hdrs(q, udp); + mutex_exit(&udpf->uf_lock); + (void) udp_build_hdrs(udp); break; } } mp1 = mp1->b_cont; } + udp->udp_pending_op = -1; + rw_exit(&udp->udp_rwlock); /* * Look for one or more appended ACK message added by * udp_connect or udp_disconnect. @@ -5846,20 +5048,86 @@ udp_rput_bind_ack(queue_t *q, mblk_t *mp) while (mp != NULL) { mp1 = mp->b_cont; mp->b_cont = NULL; - putnext(UDP_RD(q), mp); + putnext(connp->conn_rq, mp); mp = mp1; } return; } freemsg(mp->b_cont); mp->b_cont = NULL; - putnext(UDP_RD(q), mp); + putnext(connp->conn_rq, mp); +} + +static void +udp_bind_error(conn_t *connp, mblk_t *mp) +{ + udp_t *udp = connp->conn_udp; + struct T_error_ack *tea; + udp_fanout_t *udpf; + udp_stack_t *us = udp->udp_us; + + tea = (struct T_error_ack *)mp->b_rptr; + + /* + * If our O_T_BIND_REQ/T_BIND_REQ fails, + * clear out the associated port and source + * address before passing the message + * upstream. If this was caused by a T_CONN_REQ + * revert back to bound state. + */ + + rw_enter(&udp->udp_rwlock, RW_WRITER); + ASSERT(udp->udp_pending_op != -1); + tea->ERROR_prim = udp->udp_pending_op; + udp->udp_pending_op = -1; + udpf = &us->us_bind_fanout[ + UDP_BIND_HASH(udp->udp_port, + us->us_bind_fanout_size)]; + mutex_enter(&udpf->uf_lock); + + switch (tea->ERROR_prim) { + case T_CONN_REQ: + ASSERT(udp->udp_state == TS_DATA_XFER); + /* Connect failed */ + /* Revert back to the bound source */ + udp->udp_v6src = udp->udp_bound_v6src; + udp->udp_state = TS_IDLE; + mutex_exit(&udpf->uf_lock); + if (udp->udp_family == AF_INET6) + (void) udp_build_hdrs(udp); + rw_exit(&udp->udp_rwlock); + break; + + case T_DISCON_REQ: + case T_BIND_REQ: + case O_T_BIND_REQ: + V6_SET_ZERO(udp->udp_v6src); + V6_SET_ZERO(udp->udp_bound_v6src); + udp->udp_state = TS_UNBND; + udp_bind_hash_remove(udp, B_TRUE); + udp->udp_port = 0; + mutex_exit(&udpf->uf_lock); + if (udp->udp_family == AF_INET6) + (void) udp_build_hdrs(udp); + rw_exit(&udp->udp_rwlock); + break; + + default: + mutex_exit(&udpf->uf_lock); + rw_exit(&udp->udp_rwlock); + (void) mi_strlog(connp->conn_rq, 1, + SL_ERROR|SL_TRACE, + "udp_input_other: bad ERROR_prim, " + "len %d", tea->ERROR_prim); + } + putnext(connp->conn_rq, mp); } /* - * return SNMP stuff in buffer in mpdata + * return SNMP stuff in buffer in mpdata. We don't hold any lock and report + * information that can be changing beneath us. */ -int +mblk_t * udp_snmp_get(queue_t *q, mblk_t *mpctl) { mblk_t *mpdata; @@ -5880,11 +5148,18 @@ udp_snmp_get(queue_t *q, mblk_t *mpctl) int i; connf_t *connfp; conn_t *connp = Q_TO_CONN(q); - udp_t *udp = connp->conn_udp; int v4_conn_idx; int v6_conn_idx; boolean_t needattr; + udp_t *udp; ip_stack_t *ipst = connp->conn_netstack->netstack_ip; + udp_stack_t *us = connp->conn_netstack->netstack_udp; + mblk_t *mp2ctl; + + /* + * make a copy of the original message + */ + mp2ctl = copymsg(mpctl); mp_conn_ctl = mp_attr_ctl = mp6_conn_ctl = NULL; if (mpctl == NULL || @@ -5896,23 +5171,25 @@ udp_snmp_get(queue_t *q, mblk_t *mpctl) freemsg(mp_conn_ctl); freemsg(mp_attr_ctl); freemsg(mp6_conn_ctl); + freemsg(mpctl); + freemsg(mp2ctl); return (0); } zoneid = connp->conn_zoneid; /* fixed length structure for IPv4 and IPv6 counters */ - SET_MIB(udp->udp_mib.udpEntrySize, sizeof (mib2_udpEntry_t)); - SET_MIB(udp->udp_mib.udp6EntrySize, sizeof (mib2_udp6Entry_t)); + SET_MIB(us->us_udp_mib.udpEntrySize, sizeof (mib2_udpEntry_t)); + SET_MIB(us->us_udp_mib.udp6EntrySize, sizeof (mib2_udp6Entry_t)); /* synchronize 64- and 32-bit counters */ - SYNC32_MIB(&udp->udp_mib, udpInDatagrams, udpHCInDatagrams); - SYNC32_MIB(&udp->udp_mib, udpOutDatagrams, udpHCOutDatagrams); + SYNC32_MIB(&us->us_udp_mib, udpInDatagrams, udpHCInDatagrams); + SYNC32_MIB(&us->us_udp_mib, udpOutDatagrams, udpHCOutDatagrams); optp = (struct opthdr *)&mpctl->b_rptr[sizeof (struct T_optmgmt_ack)]; optp->level = MIB2_UDP; optp->name = 0; - (void) snmp_append_data(mpdata, (char *)&udp->udp_mib, - sizeof (udp->udp_mib)); + (void) snmp_append_data(mpdata, (char *)&us->us_udp_mib, + sizeof (us->us_udp_mib)); optp->len = msgdsize(mpdata); qreply(q, mpctl); @@ -5924,7 +5201,7 @@ udp_snmp_get(queue_t *q, mblk_t *mpctl) connp = NULL; while ((connp = ipcl_get_next_conn(connfp, connp, - IPCL_UDP))) { + IPCL_UDPCONN))) { udp = connp->conn_udp; if (zoneid != connp->conn_zoneid) continue; @@ -6088,7 +5365,7 @@ udp_snmp_get(queue_t *q, mblk_t *mpctl) else qreply(q, mp6_attr_ctl); - return (1); + return (mp2ctl); } /* @@ -6190,7 +5467,7 @@ udp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) connp = NULL; while ((connp = ipcl_get_next_conn(connfp, connp, - IPCL_UDP))) { + IPCL_UDPCONN))) { udp = connp->conn_udp; if (zoneid != GLOBAL_ZONEID && zoneid != connp->conn_zoneid) @@ -6246,7 +5523,7 @@ udp_ud_err(queue_t *q, mblk_t *mp, uchar_t *destaddr, t_scalar_t destlen, mp1 = mi_tpi_uderror_ind((char *)destaddr, destlen, (char *)optaddr, optlen, err); if (mp1 != NULL) - putnext(UDP_RD(q), mp1); + qreply(q, mp1); done: freemsg(mp); @@ -6260,12 +5537,9 @@ static void udp_unbind(queue_t *q, mblk_t *mp) { udp_t *udp = Q_TO_UDP(q); + udp_fanout_t *udpf; + udp_stack_t *us = udp->udp_us; - /* If a bind has not been done, we can't unbind. */ - if (udp->udp_state == TS_UNBND) { - udp_err_ack(q, mp, TOUTSTATE, 0); - return; - } if (cl_inet_unbind != NULL) { /* * Running in cluster mode - register unbind information @@ -6281,29 +5555,44 @@ udp_unbind(queue_t *q, mblk_t *mp) } } - udp_bind_hash_remove(udp, B_FALSE); - V6_SET_ZERO(udp->udp_v6src); - V6_SET_ZERO(udp->udp_bound_v6src); - udp->udp_port = 0; - udp->udp_state = TS_UNBND; - - if (udp->udp_family == AF_INET6) { - int error; - - /* Rebuild the header template */ - error = udp_build_hdrs(q, udp); - if (error != 0) { - udp_err_ack(q, mp, TSYSERR, error); - return; - } + rw_enter(&udp->udp_rwlock, RW_WRITER); + if (udp->udp_state == TS_UNBND || udp->udp_pending_op != -1) { + rw_exit(&udp->udp_rwlock); + udp_err_ack(q, mp, TOUTSTATE, 0); + return; } + udp->udp_pending_op = T_UNBIND_REQ; + rw_exit(&udp->udp_rwlock); + /* * Pass the unbind to IP; T_UNBIND_REQ is larger than T_OK_ACK * and therefore ip_unbind must never return NULL. */ mp = ip_unbind(q, mp); ASSERT(mp != NULL); - putnext(UDP_RD(q), mp); + ASSERT(((struct T_ok_ack *)mp->b_rptr)->PRIM_type == T_OK_ACK); + + /* + * Once we're unbound from IP, the pending operation may be cleared + * here. + */ + rw_enter(&udp->udp_rwlock, RW_WRITER); + udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, + us->us_bind_fanout_size)]; + mutex_enter(&udpf->uf_lock); + udp_bind_hash_remove(udp, B_TRUE); + V6_SET_ZERO(udp->udp_v6src); + V6_SET_ZERO(udp->udp_bound_v6src); + udp->udp_port = 0; + mutex_exit(&udpf->uf_lock); + + udp->udp_pending_op = -1; + udp->udp_state = TS_UNBND; + if (udp->udp_family == AF_INET6) + (void) udp_build_hdrs(udp); + rw_exit(&udp->udp_rwlock); + + qreply(q, mp); } /* @@ -6381,10 +5670,11 @@ udp_update_label(queue_t *wq, mblk_t *mp, ipaddr_t dst) int err; uchar_t opt_storage[IP_MAX_OPT_LENGTH]; udp_t *udp = Q_TO_UDP(wq); + udp_stack_t *us = udp->udp_us; err = tsol_compute_label(DB_CREDDEF(mp, udp->udp_connp->conn_cred), dst, opt_storage, udp->udp_mac_exempt, - udp->udp_us->us_netstack->netstack_ip); + us->us_netstack->netstack_ip); if (err == 0) { err = tsol_update_options(&udp->udp_ip_snd_options, &udp->udp_ip_snd_options_len, &udp->udp_label_len, @@ -6413,6 +5703,8 @@ udp_output_v4(conn_t *connp, mblk_t *mp, ipaddr_t v4dst, uint16_t port, int ip_hdr_length; uint32_t ip_len; udpha_t *udpha; + boolean_t lock_held = B_FALSE; + in_port_t uha_src_port; udpattrs_t attrs; uchar_t ip_snd_opt[IP_MAX_OPT_LENGTH]; uint32_t ip_snd_opt_len = 0; @@ -6457,6 +5749,8 @@ udp_output_v4(conn_t *connp, mblk_t *mp, ipaddr_t v4dst, uint16_t port, /* mp1 points to the M_DATA mblk carrying the packet */ ASSERT(mp1 != NULL && DB_TYPE(mp1) == M_DATA); + rw_enter(&udp->udp_rwlock, RW_READER); + lock_held = B_TRUE; /* * Check if our saved options are valid; update if not. * TSOL Note: Since we are not in WRITER mode, UDP packets @@ -6557,6 +5851,11 @@ udp_output_v4(conn_t *connp, mblk_t *mp, ipaddr_t v4dst, uint16_t port, IN6_V4MAPPED_TO_IPADDR(&v6src, ipha->ipha_src); } } + uha_src_port = udp->udp_port; + if (ip_hdr_length == IP_SIMPLE_HDR_LENGTH) { + rw_exit(&udp->udp_rwlock); + lock_held = B_FALSE; + } if (pktinfop->ip4_ill_index != 0) { optinfo.ip_opt_ill_index = pktinfop->ip4_ill_index; @@ -6610,12 +5909,14 @@ udp_output_v4(conn_t *connp, mblk_t *mp, ipaddr_t v4dst, uint16_t port, ipha->ipha_ttl = udp->udp_multicast_ttl; udpha->uha_dst_port = port; - udpha->uha_src_port = udp->udp_port; + udpha->uha_src_port = uha_src_port; if (ip_snd_opt_len > 0) { uint32_t cksum; bcopy(ip_snd_opt, &ipha[1], ip_snd_opt_len); + lock_held = B_FALSE; + rw_exit(&udp->udp_rwlock); /* * Massage source route putting first source route in ipha_dst. * Ignore the destination in T_unitdata_req. @@ -6659,7 +5960,7 @@ udp_output_v4(conn_t *connp, mblk_t *mp, ipaddr_t v4dst, uint16_t port, ip_len <<= 16; #endif } - + ASSERT(!lock_held); /* Set UDP length and checksum */ *((uint32_t *)&udpha->uha_length) = ip_len; if (DB_CRED(mp) != NULL) @@ -6675,7 +5976,7 @@ udp_output_v4(conn_t *connp, mblk_t *mp, ipaddr_t v4dst, uint16_t port, mp = NULL; /* We're done. Pass the packet to ip. */ - BUMP_MIB(&udp->udp_mib, udpHCOutDatagrams); + BUMP_MIB(&us->us_udp_mib, udpHCOutDatagrams); TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, "udp_wput_end: q %p (%S)", q, "end"); @@ -6696,9 +5997,11 @@ udp_output_v4(conn_t *connp, mblk_t *mp, ipaddr_t v4dst, uint16_t port, } done: + if (lock_held) + rw_exit(&udp->udp_rwlock); if (*error != 0) { ASSERT(mp != NULL); - BUMP_MIB(&udp->udp_mib, udpOutErrors); + BUMP_MIB(&us->us_udp_mib, udpOutErrors); } return (mp); } @@ -6708,14 +6011,9 @@ udp_send_data(udp_t *udp, queue_t *q, mblk_t *mp, ipha_t *ipha) { conn_t *connp = udp->udp_connp; ipaddr_t src, dst; - ill_t *ill; ire_t *ire; ipif_t *ipif = NULL; mblk_t *ire_fp_mp; - uint_t ire_fp_mp_len; - uint16_t *up; - uint32_t cksum, hcksum_txflags; - queue_t *dev_q; boolean_t retry_caching; udp_stack_t *us = udp->udp_us; ip_stack_t *ipst = connp->conn_netstack->netstack_ip; @@ -6824,10 +6122,9 @@ udp_send_data(udp_t *udp, queue_t *q, mblk_t *mp, ipha_t *ipha) if ((ire->ire_type & (IRE_BROADCAST|IRE_LOCAL|IRE_LOOPBACK)) || (ire->ire_flags & RTF_MULTIRT) || (ire->ire_stq == NULL) || (ire->ire_max_frag < ntohs(ipha->ipha_length)) || - (connp->conn_nexthop_set) || - (ire->ire_nce == NULL) || - ((ire_fp_mp = ire->ire_nce->nce_fp_mp) == NULL) || - ((ire_fp_mp_len = MBLKL(ire_fp_mp)) > MBLKHEAD(mp))) { + ((ire->ire_nce == NULL) || + ((ire_fp_mp = ire->ire_nce->nce_fp_mp) == NULL)) || + connp->conn_nexthop_set || (MBLKL(ire_fp_mp) > MBLKHEAD(mp))) { if (ipif != NULL) ipif_refrele(ipif); UDP_STAT(us, udp_ip_ire_send); @@ -6836,43 +6133,62 @@ udp_send_data(udp_t *udp, queue_t *q, mblk_t *mp, ipha_t *ipha) return; } - ill = ire_to_ill(ire); - ASSERT(ill != NULL); + if (src == INADDR_ANY && !connp->conn_unspec_src) { + if (CLASSD(dst) && !(ire->ire_flags & RTF_SETSRC)) + ipha->ipha_src = ipif->ipif_src_addr; + else + ipha->ipha_src = ire->ire_src_addr; + } - BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCOutRequests); + if (ipif != NULL) + ipif_refrele(ipif); + + udp_xmit(connp->conn_wq, mp, ire, connp, connp->conn_zoneid); +} + +static void +udp_xmit(queue_t *q, mblk_t *mp, ire_t *ire, conn_t *connp, zoneid_t zoneid) +{ + ipaddr_t src, dst; + ill_t *ill; + mblk_t *ire_fp_mp; + uint_t ire_fp_mp_len; + uint16_t *up; + uint32_t cksum, hcksum_txflags; + queue_t *dev_q; + udp_t *udp = connp->conn_udp; + ipha_t *ipha = (ipha_t *)mp->b_rptr; + udp_stack_t *us = udp->udp_us; + ip_stack_t *ipst = connp->conn_netstack->netstack_ip; dev_q = ire->ire_stq->q_next; ASSERT(dev_q != NULL); - /* - * If the service thread is already running, or if the driver - * queue is currently flow-controlled, queue this packet. - */ - if ((q->q_first != NULL || connp->conn_draining) || - ((dev_q->q_next || dev_q->q_first) && !canput(dev_q))) { - if (ipst->ips_ip_output_queue) { - (void) putq(q, mp); - } else { - BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); - freemsg(mp); - } - if (ipif != NULL) - ipif_refrele(ipif); - IRE_REFRELE(ire); + + + if (DEV_Q_IS_FLOW_CTLED(dev_q)) { + BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsHCOutRequests); + BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards); + ire_refrele(ire); return; } + ire_fp_mp = ire->ire_nce->nce_fp_mp; + ire_fp_mp_len = MBLKL(ire_fp_mp); + ASSERT(MBLKHEAD(mp) >= ire_fp_mp_len); + + dst = ipha->ipha_dst; + src = ipha->ipha_src; + + ill = ire_to_ill(ire); + ASSERT(ill != NULL); + + BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCOutRequests); + ipha->ipha_ident = (uint16_t)atomic_add_32_nv(&ire->ire_ident, 1); #ifndef _BIG_ENDIAN ipha->ipha_ident = (ipha->ipha_ident << 8) | (ipha->ipha_ident >> 8); #endif - if (src == INADDR_ANY && !connp->conn_unspec_src) { - if (CLASSD(dst) && !(ire->ire_flags & RTF_SETSRC)) - src = ipha->ipha_src = ipif->ipif_src_addr; - else - src = ipha->ipha_src = ire->ire_src_addr; - } - if (ILL_HCKSUM_CAPABLE(ill) && dohwcksum) { ASSERT(ill->ill_hcksum_capab != NULL); hcksum_txflags = ill->ill_hcksum_capab->ill_hcksum_txflags; @@ -6918,15 +6234,13 @@ udp_send_data(udp_t *udp, queue_t *q, mblk_t *mp, ipha_t *ipha) if (ilm != NULL) { ip_multicast_loopback(q, ill, mp, connp->conn_multicast_loop ? 0 : - IP_FF_NO_MCAST_LOOP, connp->conn_zoneid); + IP_FF_NO_MCAST_LOOP, zoneid); } /* If multicast TTL is 0 then we are done */ if (ipha->ipha_ttl == 0) { - if (ipif != NULL) - ipif_refrele(ipif); freemsg(mp); - IRE_REFRELE(ire); + ire_refrele(ire); return; } } @@ -6961,8 +6275,6 @@ udp_send_data(udp_t *udp, queue_t *q, mblk_t *mp, ipha_t *ipha) putnext(ire->ire_stq, mp); } - if (ipif != NULL) - ipif_refrele(ipif); IRE_REFRELE(ire); } @@ -6972,10 +6284,11 @@ udp_update_label_v6(queue_t *wq, mblk_t *mp, in6_addr_t *dst) udp_t *udp = Q_TO_UDP(wq); int err; uchar_t opt_storage[TSOL_MAX_IPV6_OPTION]; + udp_stack_t *us = udp->udp_us; err = tsol_compute_label_v6(DB_CREDDEF(mp, udp->udp_connp->conn_cred), dst, opt_storage, udp->udp_mac_exempt, - udp->udp_us->us_netstack->netstack_ip); + us->us_netstack->netstack_ip); if (err == 0) { err = tsol_update_sticky(&udp->udp_sticky_ipp, &udp->udp_label_len_v6, opt_storage); @@ -6991,97 +6304,145 @@ udp_update_label_v6(queue_t *wq, mblk_t *mp, in6_addr_t *dst) return (err); } +void +udp_output_connected(void *arg, mblk_t *mp) +{ + conn_t *connp = (conn_t *)arg; + udp_t *udp = connp->conn_udp; + udp_stack_t *us = udp->udp_us; + ipaddr_t v4dst; + in_port_t dstport; + boolean_t mapped_addr; + struct sockaddr_storage ss; + sin_t *sin; + sin6_t *sin6; + struct sockaddr *addr; + socklen_t addrlen; + int error; + boolean_t insert_spi = udp->udp_nat_t_endpoint; + + /* M_DATA for connected socket */ + + ASSERT(udp->udp_issocket); + UDP_DBGSTAT(us, udp_data_conn); + + mutex_enter(&connp->conn_lock); + if (udp->udp_state != TS_DATA_XFER) { + mutex_exit(&connp->conn_lock); + BUMP_MIB(&us->us_udp_mib, udpOutErrors); + UDP_STAT(us, udp_out_err_notconn); + freemsg(mp); + TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, + "udp_wput_end: connp %p (%S)", connp, + "not-connected; address required"); + return; + } + + mapped_addr = IN6_IS_ADDR_V4MAPPED(&udp->udp_v6dst); + if (mapped_addr) + IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6dst, v4dst); + + /* Initialize addr and addrlen as if they're passed in */ + if (udp->udp_family == AF_INET) { + sin = (sin_t *)&ss; + sin->sin_family = AF_INET; + dstport = sin->sin_port = udp->udp_dstport; + ASSERT(mapped_addr); + sin->sin_addr.s_addr = v4dst; + addr = (struct sockaddr *)sin; + addrlen = sizeof (*sin); + } else { + sin6 = (sin6_t *)&ss; + sin6->sin6_family = AF_INET6; + dstport = sin6->sin6_port = udp->udp_dstport; + sin6->sin6_flowinfo = udp->udp_flowinfo; + sin6->sin6_addr = udp->udp_v6dst; + sin6->sin6_scope_id = 0; + sin6->__sin6_src_id = 0; + addr = (struct sockaddr *)sin6; + addrlen = sizeof (*sin6); + } + mutex_exit(&connp->conn_lock); + + if (mapped_addr) { + /* + * Handle both AF_INET and AF_INET6; the latter + * for IPV4 mapped destination addresses. Note + * here that both addr and addrlen point to the + * corresponding struct depending on the address + * family of the socket. + */ + mp = udp_output_v4(connp, mp, v4dst, dstport, 0, &error, + insert_spi); + } else { + mp = udp_output_v6(connp, mp, sin6, &error); + } + if (error == 0) { + ASSERT(mp == NULL); + return; + } + + UDP_STAT(us, udp_out_err_output); + ASSERT(mp != NULL); + /* mp is freed by the following routine */ + udp_ud_err(connp->conn_wq, mp, (uchar_t *)addr, (t_scalar_t)addrlen, + (t_scalar_t)error); +} + /* * This routine handles all messages passed downstream. It either * consumes the message or passes it downstream; it never queues a * a message. + * + * Also entry point for sockfs when udp is in "direct sockfs" mode. This mode + * is valid when we are directly beneath the stream head, and thus sockfs + * is able to bypass STREAMS and directly call us, passing along the sockaddr + * structure without the cumbersome T_UNITDATA_REQ interface for the case of + * connected endpoints. */ -static void -udp_output(conn_t *connp, mblk_t *mp, struct sockaddr *addr, socklen_t addrlen) +void +udp_wput(queue_t *q, mblk_t *mp) { sin6_t *sin6; sin_t *sin; ipaddr_t v4dst; uint16_t port; uint_t srcid; - queue_t *q = connp->conn_wq; + conn_t *connp = Q_TO_CONN(q); udp_t *udp = connp->conn_udp; int error = 0; - struct sockaddr_storage ss; + struct sockaddr *addr; + socklen_t addrlen; udp_stack_t *us = udp->udp_us; boolean_t insert_spi = udp->udp_nat_t_endpoint; TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_START, - "udp_wput_start: connp %p mp %p", connp, mp); + "udp_wput_start: queue %p mp %p", q, mp); /* * We directly handle several cases here: T_UNITDATA_REQ message - * coming down as M_PROTO/M_PCPROTO and M_DATA messages for both - * connected and non-connected socket. The latter carries the - * address structure along when this routine gets called. + * coming down as M_PROTO/M_PCPROTO and M_DATA messages for connected + * socket. */ switch (DB_TYPE(mp)) { case M_DATA: + /* + * Quick check for error cases. Checks will be done again + * under the lock later on + */ if (!udp->udp_direct_sockfs || udp->udp_state != TS_DATA_XFER) { - if (!udp->udp_direct_sockfs || - addr == NULL || addrlen == 0) { - /* Not connected; address is required */ - BUMP_MIB(&udp->udp_mib, udpOutErrors); - UDP_STAT(us, udp_out_err_notconn); - freemsg(mp); - TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, - "udp_wput_end: connp %p (%S)", connp, - "not-connected; address required"); - return; - } - ASSERT(udp->udp_issocket); - UDP_DBGSTAT(us, udp_data_notconn); - /* Not connected; do some more checks below */ - break; - } - /* M_DATA for connected socket */ - UDP_DBGSTAT(us, udp_data_conn); - IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6dst, v4dst); - - /* Initialize addr and addrlen as if they're passed in */ - if (udp->udp_family == AF_INET) { - sin = (sin_t *)&ss; - sin->sin_family = AF_INET; - sin->sin_port = udp->udp_dstport; - sin->sin_addr.s_addr = v4dst; - addr = (struct sockaddr *)sin; - addrlen = sizeof (*sin); - } else { - sin6 = (sin6_t *)&ss; - sin6->sin6_family = AF_INET6; - sin6->sin6_port = udp->udp_dstport; - sin6->sin6_flowinfo = udp->udp_flowinfo; - sin6->sin6_addr = udp->udp_v6dst; - sin6->sin6_scope_id = 0; - sin6->__sin6_src_id = 0; - addr = (struct sockaddr *)sin6; - addrlen = sizeof (*sin6); - } - - if (udp->udp_family == AF_INET || - IN6_IS_ADDR_V4MAPPED(&udp->udp_v6dst)) { - /* - * Handle both AF_INET and AF_INET6; the latter - * for IPV4 mapped destination addresses. Note - * here that both addr and addrlen point to the - * corresponding struct depending on the address - * family of the socket. - */ - mp = udp_output_v4(connp, mp, v4dst, - udp->udp_dstport, 0, &error, insert_spi); - } else { - mp = udp_output_v6(connp, mp, sin6, &error); - } - if (error != 0) { - ASSERT(addr != NULL && addrlen != 0); - goto ud_error; + /* Not connected; address is required */ + BUMP_MIB(&us->us_udp_mib, udpOutErrors); + UDP_STAT(us, udp_out_err_notconn); + freemsg(mp); + TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, + "udp_wput_end: connp %p (%S)", connp, + "not-connected; address required"); + return; } + udp_output_connected(connp, mp); return; + case M_PROTO: case M_PCPROTO: { struct T_unitdata_req *tudr; @@ -7128,8 +6489,7 @@ udp_output(conn_t *connp, mblk_t *mp, struct sockaddr *addr, socklen_t addrlen) /* FALLTHRU */ } default: - udp_become_writer(connp, mp, udp_wput_other_wrapper, - SQTAG_UDP_OUTPUT); + udp_wput_other(q, mp); return; } ASSERT(addr != NULL); @@ -7137,8 +6497,8 @@ udp_output(conn_t *connp, mblk_t *mp, struct sockaddr *addr, socklen_t addrlen) switch (udp->udp_family) { case AF_INET6: sin6 = (sin6_t *)addr; - if (!OK_32PTR((char *)sin6) || addrlen != sizeof (sin6_t) || - sin6->sin6_family != AF_INET6) { + if (!OK_32PTR((char *)sin6) || (addrlen != sizeof (sin6_t)) || + (sin6->sin6_family != AF_INET6)) { TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, "udp_wput_end: q %p (%S)", q, "badaddr"); error = EADDRNOTAVAIL; @@ -7180,8 +6540,8 @@ udp_output(conn_t *connp, mblk_t *mp, struct sockaddr *addr, socklen_t addrlen) case AF_INET: sin = (sin_t *)addr; - if (!OK_32PTR((char *)sin) || addrlen != sizeof (sin_t) || - sin->sin_family != AF_INET) { + if ((!OK_32PTR((char *)sin) || addrlen != sizeof (sin_t)) || + (sin->sin_family != AF_INET)) { TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, "udp_wput_end: q %p (%S)", q, "badaddr"); error = EADDRNOTAVAIL; @@ -7205,107 +6565,6 @@ ud_error: } } -/* ARGSUSED */ -static void -udp_output_wrapper(void *arg, mblk_t *mp, void *arg2) -{ - udp_output((conn_t *)arg, mp, NULL, 0); - _UDP_EXIT((conn_t *)arg); -} - -static void -udp_wput(queue_t *q, mblk_t *mp) -{ - _UDP_ENTER(Q_TO_CONN(UDP_WR(q)), mp, udp_output_wrapper, - SQTAG_UDP_WPUT); -} - -/* - * Allocate and prepare a T_UNITDATA_REQ message. - */ -static mblk_t * -udp_tudr_alloc(struct sockaddr *addr, socklen_t addrlen) -{ - struct T_unitdata_req *tudr; - mblk_t *mp; - - mp = allocb(sizeof (*tudr) + addrlen, BPRI_MED); - if (mp != NULL) { - mp->b_wptr += sizeof (*tudr) + addrlen; - DB_TYPE(mp) = M_PROTO; - - tudr = (struct T_unitdata_req *)mp->b_rptr; - tudr->PRIM_type = T_UNITDATA_REQ; - tudr->DEST_length = addrlen; - tudr->DEST_offset = (t_scalar_t)sizeof (*tudr); - tudr->OPT_length = 0; - tudr->OPT_offset = 0; - bcopy(addr, tudr+1, addrlen); - } - return (mp); -} - -/* - * Entry point for sockfs when udp is in "direct sockfs" mode. This mode - * is valid when we are directly beneath the stream head, and thus sockfs - * is able to bypass STREAMS and directly call us, passing along the sockaddr - * structure without the cumbersome T_UNITDATA_REQ interface. Note that - * this is done for both connected and non-connected endpoint. - */ -void -udp_wput_data(queue_t *q, mblk_t *mp, struct sockaddr *addr, socklen_t addrlen) -{ - conn_t *connp; - udp_t *udp; - udp_stack_t *us; - - q = UDP_WR(q); - connp = Q_TO_CONN(q); - udp = connp->conn_udp; - us = udp->udp_us; - - /* udpsockfs should only send down M_DATA for this entry point */ - ASSERT(DB_TYPE(mp) == M_DATA); - - mutex_enter(&connp->conn_lock); - UDP_MODE_ASSERTIONS(udp, UDP_ENTER); - - if (udp->udp_mode != UDP_MT_HOT) { - /* - * We can't enter this conn right away because another - * thread is currently executing as writer; therefore we - * need to deposit the message into the squeue to be - * drained later. If a socket address is present, we - * need to create a T_UNITDATA_REQ message as placeholder. - */ - if (addr != NULL && addrlen != 0) { - mblk_t *tudr_mp = udp_tudr_alloc(addr, addrlen); - - if (tudr_mp == NULL) { - mutex_exit(&connp->conn_lock); - BUMP_MIB(&udp->udp_mib, udpOutErrors); - UDP_STAT(us, udp_out_err_tudr); - freemsg(mp); - return; - } - /* Tag the packet with T_UNITDATA_REQ */ - tudr_mp->b_cont = mp; - mp = tudr_mp; - } - mutex_exit(&connp->conn_lock); - udp_enter(connp, mp, udp_output_wrapper, SQTAG_UDP_WPUT); - return; - } - - /* We can execute as reader right away. */ - UDP_READERS_INCREF(udp); - mutex_exit(&connp->conn_lock); - - udp_output(connp, mp, addr, addrlen); - - udp_exit(connp); -} - /* * udp_output_v6(): * Assumes that udp_wput did some sanity checking on the destination @@ -7338,6 +6597,7 @@ udp_output_v6(conn_t *connp, mblk_t *mp, sin6_t *sin6, int *error) uint_t hopoptslen = 0; boolean_t is_ancillary = B_FALSE; udp_stack_t *us = udp->udp_us; + size_t sth_wroff = 0; *error = 0; @@ -7366,12 +6626,15 @@ udp_output_v6(conn_t *connp, mblk_t *mp, sin6_t *sin6, int *error) if (((struct T_unitdata_req *)mp->b_rptr)->OPT_length != 0) { attrs.udpattr_ipp6 = ipp; attrs.udpattr_mb = mp; - if (udp_unitdata_opt_process(q, mp, error, &attrs) < 0) + if (udp_unitdata_opt_process(q, mp, error, + &attrs) < 0) { goto done; + } ASSERT(*error == 0); opt_present = B_TRUE; } } + rw_enter(&udp->udp_rwlock, RW_READER); ignore = ipp->ipp_sticky_ignored; /* mp1 points to the M_DATA mblk carrying the packet */ @@ -7417,6 +6680,7 @@ udp_output_v6(conn_t *connp, mblk_t *mp, sin6_t *sin6, int *error) char *, "MLP mp(1) lacks SCM_UCRED attr(2) on q(3)", mblk_t *, mp1, udpattrs_t *, &attrs, queue_t *, q); *error = ECONNREFUSED; + rw_exit(&udp->udp_rwlock); mutex_exit(&connp->conn_lock); goto done; } @@ -7429,6 +6693,7 @@ udp_output_v6(conn_t *connp, mblk_t *mp, sin6_t *sin6, int *error) !IN6_ARE_ADDR_EQUAL(&udp->udp_v6lastdst, &ip6_dst) || connp->conn_mlp_type != mlptSingle) && (*error = udp_update_label_v6(q, mp, &ip6_dst)) != 0) { + rw_exit(&udp->udp_rwlock); mutex_exit(&connp->conn_lock); goto done; } @@ -7596,15 +6861,17 @@ no_options: ip6h = (ip6_t *)&mp1->b_rptr[-udp_ip_hdr_len]; if (DB_REF(mp1) != 1 || ((unsigned char *)ip6h < DB_BASE(mp1)) || !OK_32PTR(ip6h)) { + /* Try to get everything in a single mblk next time */ if (udp_ip_hdr_len > udp->udp_max_hdr_len) { udp->udp_max_hdr_len = udp_ip_hdr_len; - (void) mi_set_sth_wroff(UDP_RD(q), - udp->udp_max_hdr_len + us->us_wroff_extra); + sth_wroff = udp->udp_max_hdr_len + us->us_wroff_extra; } + mp2 = allocb(udp_ip_hdr_len + us->us_wroff_extra, BPRI_LO); if (mp2 == NULL) { *error = ENOMEM; + rw_exit(&udp->udp_rwlock); goto done; } mp2->b_wptr = DB_LIM(mp2); @@ -7801,6 +7068,7 @@ no_options: ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf, tipp->ipp_tclass); } + rw_exit(&udp->udp_rwlock); if (option_exists & IPPF_RTHDR) { ip6_rthdr_t *rth; @@ -7902,17 +7170,21 @@ no_options: mp = NULL; /* We're done. Pass the packet to IP */ - BUMP_MIB(&udp->udp_mib, udpHCOutDatagrams); + BUMP_MIB(&us->us_udp_mib, udpHCOutDatagrams); ip_output_v6(connp, mp1, q, IP_WPUT); done: + if (sth_wroff != 0) { + (void) mi_set_sth_wroff(RD(q), + udp->udp_max_hdr_len + us->us_wroff_extra); + } if (hopoptsptr != NULL && !is_ancillary) { kmem_free(hopoptsptr, hopoptslen); hopoptsptr = NULL; } if (*error != 0) { ASSERT(mp != NULL); - BUMP_MIB(&udp->udp_mib, udpOutErrors); + BUMP_MIB(&us->us_udp_mib, udpOutErrors); } return (mp); } @@ -7988,26 +7260,17 @@ udp_wput_other(queue_t *q, mblk_t *mp) "udp_wput_other_end: q %p (%S)", q, "unbindreq"); return; case T_SVR4_OPTMGMT_REQ: - if (!snmpcom_req(q, mp, udp_snmp_set, udp_snmp_get, cr)) - /* - * Use upper queue for option processing in - * case the request is not handled at this - * level and needs to be passed down to IP. - */ - (void) svr4_optcom_req(_WR(UDP_RD(q)), - mp, cr, &udp_opt_obj); + if (!snmpcom_req(q, mp, udp_snmp_set, ip_snmp_get, + cr)) { + (void) svr4_optcom_req(q, + mp, cr, &udp_opt_obj, B_TRUE); + } TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, "udp_wput_other_end: q %p (%S)", q, "optmgmtreq"); return; case T_OPTMGMT_REQ: - /* - * Use upper queue for option processing in - * case the request is not handled at this - * level and needs to be passed down to IP. - */ - (void) tpi_optcom_req(_WR(UDP_RD(q)), - mp, cr, &udp_opt_obj); + (void) tpi_optcom_req(q, mp, cr, &udp_opt_obj, B_TRUE); TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, "udp_wput_other_end: q %p (%S)", q, "optmgmtreq"); return; @@ -8057,7 +7320,7 @@ udp_wput_other(queue_t *q, mblk_t *mp) iocp->ioc_error = ENOTCONN; iocp->ioc_count = 0; mp->b_datap->db_type = M_IOCACK; - putnext(UDP_RD(q), mp); + qreply(q, mp); TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, "udp_wput_other_end: q %p (%S)", q, "getpeername"); @@ -8081,7 +7344,7 @@ udp_wput_other(queue_t *q, mblk_t *mp) /* nd_getset performs the necessary checking */ case ND_GET: if (nd_getset(q, us->us_nd, mp)) { - putnext(UDP_RD(q), mp); + qreply(q, mp); TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, "udp_wput_other_end: q %p (%S)", q, "get"); return; @@ -8107,7 +7370,7 @@ udp_wput_other(queue_t *q, mblk_t *mp) * stream interface and drain any * queued data. */ - udp_rcv_drain(UDP_RD(q), udp, + udp_rcv_drain(RD(q), udp, B_FALSE); ASSERT(!udp->udp_direct_sockfs); UDP_STAT(us, udp_sock_fallback); @@ -8117,7 +7380,7 @@ udp_wput_other(queue_t *q, mblk_t *mp) } iocp->ioc_count = 0; iocp->ioc_rval = 0; - putnext(UDP_RD(q), mp); + qreply(q, mp); return; default: break; @@ -8137,14 +7400,6 @@ udp_wput_other(queue_t *q, mblk_t *mp) ip_output(connp, mp, q, IP_WPUT); } -/* ARGSUSED */ -static void -udp_wput_other_wrapper(void *arg, mblk_t *mp, void *arg2) -{ - udp_wput_other(((conn_t *)arg)->conn_wq, mp); - udp_exit((conn_t *)arg); -} - /* * udp_wput_iocdata is called by udp_wput_other to handle all M_IOCDATA * messages. @@ -8171,7 +7426,6 @@ udp_wput_iocdata(queue_t *q, mblk_t *mp) return; } - q = WR(UDP_RD(q)); switch (mi_copy_state(q, mp, &mp1)) { case -1: return; @@ -8317,11 +7571,7 @@ udp_unitdata_opt_process(queue_t *q, mblk_t *mp, int *errorp, udreqp = (struct T_unitdata_req *)mp->b_rptr; - /* - * Use upper queue for option processing since the callback - * routines expect to be called in UDP instance instead of IP. - */ - *errorp = tpi_optcom_buf(_WR(UDP_RD(q)), mp, &udreqp->OPT_length, + *errorp = tpi_optcom_buf(q, mp, &udreqp->OPT_length, udreqp->OPT_offset, cr, &udp_opt_obj, udpattrs, &is_absreq_failure); @@ -8339,13 +7589,9 @@ udp_unitdata_opt_process(queue_t *q, mblk_t *mp, int *errorp, void udp_ddi_init(void) { - UDP6_MAJ = ddi_name_to_major(UDP6); udp_max_optsize = optcom_max_optsize(udp_opt_obj.odb_opt_des_arr, udp_opt_obj.odb_opt_arr_cnt); - udp_cache = kmem_cache_create("udp_cache", sizeof (udp_t), - CACHE_ALIGN_SIZE, NULL, NULL, NULL, NULL, NULL, 0); - /* * We want to be informed each time a stack is created or * destroyed in the kernel, so we can maintain the @@ -8358,8 +7604,6 @@ void udp_ddi_destroy(void) { netstack_unregister(NS_UDP); - - kmem_cache_destroy(udp_cache); } /* @@ -8584,17 +7828,6 @@ udp_kstat_update(kstat_t *kp, int rw) return (0); } -/* ARGSUSED */ -static void -udp_rput(queue_t *q, mblk_t *mp) -{ - /* - * We get here whenever we do qreply() from IP, - * i.e as part of handlings ioctls, etc. - */ - putnext(q, mp); -} - /* * Read-side synchronous stream info entry point, called as a * result of handling certain STREAMS ioctl operations. @@ -8606,7 +7839,7 @@ udp_rinfop(queue_t *q, infod_t *dp) uint_t cmd = dp->d_cmd; int res = 0; int error = 0; - udp_t *udp = Q_TO_UDP(RD(UDP_WR(q))); + udp_t *udp = Q_TO_UDP(q); struct stdata *stp = STREAM(q); mutex_enter(&udp->udp_drain_lock); @@ -8681,12 +7914,9 @@ static int udp_rrw(queue_t *q, struiod_t *dp) { mblk_t *mp; - udp_t *udp = Q_TO_UDP(_RD(UDP_WR(q))); + udp_t *udp = Q_TO_UDP(q); udp_stack_t *us = udp->udp_us; - /* We should never get here when we're in SNMP mode */ - ASSERT(!(udp->udp_connp->conn_flags & IPCL_UDPMOD)); - /* * Dequeue datagram from the head of the list and return * it to caller; also ensure that RSLEEP sd_wakeq flag is @@ -8850,3 +8080,40 @@ udp_set_rcv_hiwat(udp_t *udp, size_t size) udp->udp_rcv_hiwat = size; return (size); } + +/* + * For the lower queue so that UDP can be a dummy mux. + * Nobody should be sending + * packets up this stream + */ +static void +udp_lrput(queue_t *q, mblk_t *mp) +{ + mblk_t *mp1; + + switch (mp->b_datap->db_type) { + case M_FLUSH: + /* Turn around */ + if (*mp->b_rptr & FLUSHW) { + *mp->b_rptr &= ~FLUSHR; + qreply(q, mp); + return; + } + break; + } + /* Could receive messages that passed through ar_rput */ + for (mp1 = mp; mp1; mp1 = mp1->b_cont) + mp1->b_prev = mp1->b_next = NULL; + freemsg(mp); +} + +/* + * For the lower queue so that UDP can be a dummy mux. + * Nobody should be sending packets down this stream. + */ +/* ARGSUSED */ +void +udp_lwput(queue_t *q, mblk_t *mp) +{ + freemsg(mp); +} diff --git a/usr/src/uts/common/inet/udp/udp6ddi.c b/usr/src/uts/common/inet/udp/udp6ddi.c index c5b203c654..a5f80f818c 100644 --- a/usr/src/uts/common/inet/udp/udp6ddi.c +++ b/usr/src/uts/common/inet/udp/udp6ddi.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -33,27 +32,24 @@ #include <inet/ip.h> #define INET_NAME "udp6" -#define INET_DEVMINOR IPV6_MINOR +#define INET_DEVMINOR 0 #define INET_DEVDESC "UDP6 STREAMS driver %I%" -#define INET_STRTAB udpinfo -#define INET_DEVMTFLAGS IP_DEVMTFLAGS +#define INET_DEVSTRTAB udpinfov6 /* * We define both synchronous STREAMS and sockfs direct-access * mode for UDP module instance, because it is autopushed on * top of /dev/ip for the sockets case. */ -#define INET_MODMTFLAGS (D_MP|D_SYNCSTR|_D_DIRECT) +#define INET_DEVMTFLAGS (D_MP|D_SYNCSTR|_D_DIRECT) #include "../inetddi.c" int _init(void) { - INET_BECOME_IP(); - /* - * device initialization is done in udpddi.c:_init() - * i.e. it is assumed it is called first + * device initialization happens when the actual code containing + * module (/kernel/drv/ip) is loaded, and driven from ip_ddi_init() */ return (mod_install(&modlinkage)); } diff --git a/usr/src/uts/common/inet/udp/udpddi.c b/usr/src/uts/common/inet/udp/udpddi.c index ad5542295e..32d1021191 100644 --- a/usr/src/uts/common/inet/udp/udpddi.c +++ b/usr/src/uts/common/inet/udp/udpddi.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* Copyright (c) 1990 Mentat Inc. */ @@ -35,51 +34,35 @@ #include <inet/udp_impl.h> #define INET_NAME "udp" -#define INET_MODDESC "UDP STREAMS module %I%" +#define INET_MODDESC "UDP dummy STREAMS module %I%" #define INET_DEVDESC "UDP STREAMS driver %I%" -#define INET_DEVMINOR IPV4_MINOR -#define INET_STRTAB udpinfo -#define INET_DEVMTFLAGS IP_DEVMTFLAGS +#define INET_DEVMINOR 0 +#define INET_MODSTRTAB dummymodinfo +#define INET_DEVSTRTAB udpinfov4 +#define INET_MODMTFLAGS D_MP /* * We define both synchronous STREAMS and sockfs direct-access * mode for UDP module instance, because it is autopushed on * top of /dev/ip for the sockets case. */ -#define INET_MODMTFLAGS (D_MP|D_SYNCSTR|_D_DIRECT) +#define INET_DEVMTFLAGS (D_MP|D_SYNCSTR|_D_DIRECT) #include "../inetddi.c" int _init(void) { - int error; - - INET_BECOME_IP(); - /* - * Note: After mod_install succeeds, another thread can enter - * therefore all initialization is done before it and any - * de-initialization needed done if it fails. + * device initialization happens when the actual code containing + * module (/kernel/drv/ip) is loaded, and driven from ip_ddi_init() */ - udp_ddi_init(); - error = mod_install(&modlinkage); - if (error != 0) - udp_ddi_destroy(); - - return (error); + return (mod_install(&modlinkage)); } int _fini(void) { - int error; - - error = mod_remove(&modlinkage); - if (error != 0) - return (error); - - udp_ddi_destroy(); - return (0); + return (mod_remove(&modlinkage)); } int diff --git a/usr/src/uts/common/inet/udp_impl.h b/usr/src/uts/common/inet/udp_impl.h index 9ad4cacbc9..a77b605088 100644 --- a/usr/src/uts/common/inet/udp_impl.h +++ b/usr/src/uts/common/inet/udp_impl.h @@ -53,13 +53,89 @@ extern "C" { #define UDP_MOD_ID 5607 -/* udp_mode. UDP_MT_HOT and UDP_SQUEUE are stable modes. Rest are transient */ -typedef enum { - UDP_MT_HOT = 0, /* UDP endpoint is MT HOT */ - UDP_MT_QUEUED = 1, /* Messages enqueued in udp_mphead */ - UDP_QUEUED_SQUEUE = 2, /* Messages enqueued in conn_sqp */ - UDP_SQUEUE = 3 /* Single threaded using squeues */ -} udp_mode_t; +typedef struct udp_bits_s { + + uint32_t + + udpb_debug : 1, /* SO_DEBUG "socket" option. */ + udpb_dontroute : 1, /* SO_DONTROUTE "socket" option. */ + udpb_broadcast : 1, /* SO_BROADCAST "socket" option. */ + udpb_useloopback : 1, /* SO_USELOOPBACK "socket" option */ + + udpb_reuseaddr : 1, /* SO_REUSEADDR "socket" option. */ + udpb_dgram_errind : 1, /* SO_DGRAM_ERRIND option */ + udpb_recvdstaddr : 1, /* IP_RECVDSTADDR option */ + udpb_recvopts : 1, /* IP_RECVOPTS option */ + + udpb_unspec_source : 1, /* IP*_UNSPEC_SRC option */ + udpb_ip_recvpktinfo : 1, /* IPV6_RECVPKTINFO option */ + udpb_ipv6_recvhoplimit : 1, /* IPV6_RECVHOPLIMIT option */ + udpb_ipv6_recvhopopts : 1, /* IPV6_RECVHOPOPTS option */ + + udpb_ipv6_recvdstopts : 1, /* IPV6_RECVDSTOPTS option */ + udpb_ipv6_recvrthdr : 1, /* IPV6_RECVRTHDR option */ + udpb_ipv6_recvtclass : 1, /* IPV6_RECVTCLASS */ + udpb_ipv6_recvpathmtu : 1, /* IPV6_RECVPATHMTU */ + + udpb_anon_priv_bind : 1, + udpb_exclbind : 1, /* ``exclusive'' binding */ + udpb_recvif : 1, /* IP_RECVIF option */ + udpb_recvslla : 1, /* IP_RECVSLLA option */ + + udpb_recvttl : 1, /* IP_RECVTTL option */ + udpb_recvucred : 1, /* IP_RECVUCRED option */ + udpb_old_ipv6_recvdstopts : 1, /* old form of IPV6_DSTOPTS */ + udpb_ipv6_recvrthdrdstopts : 1, /* IPV6_RECVRTHDRDSTOPTS */ + + udpb_rcvhdr : 1, /* UDP_RCVHDR option */ + udpb_issocket : 1, /* socket mode */ + udpb_direct_sockfs : 1, /* direct calls to/from sockfs */ + udpb_timestamp : 1, /* SO_TIMESTAMP "socket" option */ + + udpb_anon_mlp : 1, /* SO_ANON_MLP */ + udpb_mac_exempt : 1, /* SO_MAC_EXEMPT */ + udpb_nat_t_endpoint : 1, /* UDP_NAT_T_ENDPOINT option */ + udpb_pad_to_bit_31 : 1; +} udp_bits_t; + +#define udp_debug udp_bits.udpb_debug +#define udp_dontroute udp_bits.udpb_dontroute +#define udp_broadcast udp_bits.udpb_broadcast +#define udp_useloopback udp_bits.udpb_useloopback + +#define udp_reuseaddr udp_bits.udpb_reuseaddr +#define udp_dgram_errind udp_bits.udpb_dgram_errind +#define udp_recvdstaddr udp_bits.udpb_recvdstaddr +#define udp_recvopts udp_bits.udpb_recvopts + +#define udp_unspec_source udp_bits.udpb_unspec_source +#define udp_ip_recvpktinfo udp_bits.udpb_ip_recvpktinfo +#define udp_ipv6_recvhoplimit udp_bits.udpb_ipv6_recvhoplimit +#define udp_ipv6_recvhopopts udp_bits.udpb_ipv6_recvhopopts + +#define udp_ipv6_recvdstopts udp_bits.udpb_ipv6_recvdstopts +#define udp_ipv6_recvrthdr udp_bits.udpb_ipv6_recvrthdr +#define udp_ipv6_recvtclass udp_bits.udpb_ipv6_recvtclass +#define udp_ipv6_recvpathmtu udp_bits.udpb_ipv6_recvpathmtu + +#define udp_anon_priv_bind udp_bits.udpb_anon_priv_bind +#define udp_exclbind udp_bits.udpb_exclbind +#define udp_recvif udp_bits.udpb_recvif +#define udp_recvslla udp_bits.udpb_recvslla + +#define udp_recvttl udp_bits.udpb_recvttl +#define udp_recvucred udp_bits.udpb_recvucred +#define udp_old_ipv6_recvdstopts udp_bits.udpb_old_ipv6_recvdstopts +#define udp_ipv6_recvrthdrdstopts udp_bits.udpb_ipv6_recvrthdrdstopts + +#define udp_rcvhdr udp_bits.udpb_rcvhdr +#define udp_issocket udp_bits.udpb_issocket +#define udp_direct_sockfs udp_bits.udpb_direct_sockfs +#define udp_timestamp udp_bits.udpb_timestamp + +#define udp_anon_mlp udp_bits.udpb_anon_mlp +#define udp_mac_exempt udp_bits.udpb_mac_exempt +#define udp_nat_t_endpoint udp_bits.udpb_nat_t_endpoint /* * Bind hash list size and hash function. It has to be a power of 2 for @@ -80,6 +156,16 @@ typedef struct udp_fanout_s { #endif } udp_fanout_t; +/* + * dev_q is the write side queue of the entity below IP. + * If there is a module below IP, we can't optimize by looking + * at q_first of the queue below IP. If the driver is directly + * below IP and if the q_first is NULL, we optimize by not doing + * the canput check + */ +#define DEV_Q_IS_FLOW_CTLED(dev_q) \ + (((dev_q)->q_next != NULL || (dev_q)->q_first != NULL) && \ + !canput(dev_q)) /* Kstats */ typedef struct udp_stat { /* Class "net" kstats */ @@ -111,7 +197,11 @@ typedef struct udp_stat { /* Class "net" kstats */ kstat_named_t udp_in_recvpktinfo; kstat_named_t udp_in_recvtclass; kstat_named_t udp_in_timestamp; - kstat_named_t udp_ip_recvpktinfo; + kstat_named_t udp_ip_rcvpktinfo; + kstat_named_t udp_direct_send; + kstat_named_t udp_bwsq_send; + kstat_named_t udp_connected_direct_send; + kstat_named_t udp_connected_bwsq_send; #ifdef DEBUG kstat_named_t udp_data_conn; kstat_named_t udp_data_notconn; @@ -173,20 +263,31 @@ typedef struct udp_stack udp_stack_t; /* Internal udp control structure, one per open stream */ typedef struct udp_s { + krwlock_t udp_rwlock; /* Protects most of udp_t */ + t_scalar_t udp_pending_op; /* The current TPI operation */ + /* + * Following fields up to udp_ipversion protected by conn_lock, + * and the fanout lock i.e.uf_lock. Need both locks to change the + * field, either lock is sufficient for reading the field. + */ uint32_t udp_state; /* TPI state */ in_port_t udp_port; /* Port bound to this stream */ in_port_t udp_dstport; /* Connected port */ in6_addr_t udp_v6src; /* Source address of this stream */ in6_addr_t udp_bound_v6src; /* Explicitly bound address */ in6_addr_t udp_v6dst; /* Connected destination */ - uint32_t udp_flowinfo; /* Connected flow id and tclass */ - uint32_t udp_max_hdr_len; /* For write offset in stream head */ - sa_family_t udp_family; /* Family from socket() call */ /* * IP format that packets transmitted from this struct should use. * Value can be IP4_VERSION or IPV6_VERSION. */ ushort_t udp_ipversion; + + /* Written to only once at the time of opening the endpoint */ + sa_family_t udp_family; /* Family from socket() call */ + + /* Following protected by udp_rwlock */ + uint32_t udp_flowinfo; /* Connected flow id and tclass */ + uint32_t udp_max_hdr_len; /* For write offset in stream head */ uint32_t udp_ip_snd_options_len; /* Len of IPv4 options */ uchar_t *udp_ip_snd_options; /* Ptr to IPv4 options */ uint32_t udp_ip_rcv_options_len; /* Len of IPv4 options recvd */ @@ -196,69 +297,31 @@ typedef struct udp_s { uint_t udp_multicast_if_index; /* IPV6_MULTICAST_IF option */ int udp_bound_if; /* IP*_BOUND_IF option */ int udp_xmit_if; /* IP_XMIT_IF option */ + + /* Written to only once at the time of opening the endpoint */ conn_t *udp_connp; - uint32_t - udp_debug : 1, /* SO_DEBUG "socket" option. */ - udp_dontroute : 1, /* SO_DONTROUTE "socket" option. */ - udp_broadcast : 1, /* SO_BROADCAST "socket" option. */ - udp_useloopback : 1, /* SO_USELOOPBACK "socket" option */ - - udp_reuseaddr : 1, /* SO_REUSEADDR "socket" option. */ - udp_dgram_errind : 1, /* SO_DGRAM_ERRIND option */ - udp_recvdstaddr : 1, /* IP_RECVDSTADDR option */ - udp_recvopts : 1, /* IP_RECVOPTS option */ - - udp_discon_pending : 1, /* T_DISCON_REQ in progress */ - udp_unspec_source : 1, /* IP*_UNSPEC_SRC option */ - udp_ip_recvpktinfo : 1, /* IPV[4,6]_RECVPKTINFO option */ - udp_ipv6_recvhoplimit : 1, /* IPV6_RECVHOPLIMIT option */ - - udp_ipv6_recvhopopts : 1, /* IPV6_RECVHOPOPTS option */ - udp_ipv6_recvdstopts : 1, /* IPV6_RECVDSTOPTS option */ - udp_ipv6_recvrthdr : 1, /* IPV6_RECVRTHDR option */ - udp_ipv6_recvtclass : 1, /* IPV6_RECVTCLASS */ - - udp_ipv6_recvpathmtu : 1, /* IPV6_RECVPATHMTU */ - udp_anon_priv_bind : 1, - udp_exclbind : 1, /* ``exclusive'' binding */ - udp_recvif : 1, /* IP_RECVIF option */ - - udp_recvslla : 1, /* IP_RECVSLLA option */ - udp_recvttl : 1, /* IP_RECVTTL option */ - udp_recvucred : 1, /* IP_RECVUCRED option */ - udp_old_ipv6_recvdstopts : 1, /* old form of IPV6_DSTOPTS */ - - udp_ipv6_recvrthdrdstopts : 1, /* IPV6_RECVRTHDRDSTOPTS */ - udp_rcvhdr : 1, /* UDP_RCVHDR option */ - udp_issocket : 1, /* socket mode */ - udp_direct_sockfs : 1, /* direct calls to/from sockfs */ - - udp_timestamp : 1, /* SO_TIMESTAMP "socket" option */ - udp_anon_mlp : 1, /* SO_ANON_MLP */ - udp_mac_exempt : 1, /* SO_MAC_EXEMPT */ - udp_nat_t_endpoint : 1; /* UDP_NAT_T_ENDPOINT option */ + /* Following protected by udp_rwlock */ + udp_bits_t udp_bits; /* Bit fields defined above */ uint8_t udp_type_of_service; /* IP_TOS option */ uint8_t udp_ttl; /* TTL or hoplimit */ - ip6_pkt_t udp_sticky_ipp; /* Sticky options */ uint8_t *udp_sticky_hdrs; /* Prebuilt IPv6 hdrs */ uint_t udp_sticky_hdrs_len; /* Incl. ip6h and any ip6i */ + + /* Following 2 fields protected by the uf_lock */ struct udp_s *udp_bind_hash; /* Bind hash chain */ struct udp_s **udp_ptpbhn; /* Pointer to previous bind hash next. */ - udp_mode_t udp_mode; /* Current mode of operation */ - mblk_t *udp_mphead; /* Head of the queued operations */ - mblk_t *udp_mptail; /* Tail of the queued operations */ - uint_t udp_mpcount; /* Number of messages in the queue */ - uint_t udp_reader_count; /* Number of reader threads */ - uint_t udp_squeue_count; /* Number of messages in conn_sqp */ kmutex_t udp_drain_lock; /* lock for udp_rcv_list */ + /* Protected by udp_drain_lock */ boolean_t udp_drain_qfull; /* drain queue is full */ + + /* Following protected by udp_rwlock */ mblk_t *udp_rcv_list_head; /* b_next chain of mblks */ mblk_t *udp_rcv_list_tail; /* last mblk in chain */ uint_t udp_rcv_cnt; /* total data in rcv_list */ - uint_t udp_rcv_msgcnt; /* total messages in rcv_list */ + uint_t udp_rcv_msgcnt; /* total msgs in rcv_list */ size_t udp_rcv_hiwat; /* receive high watermark */ uint_t udp_label_len; /* length of security label */ uint_t udp_label_len_v6; /* len of v6 security label */ @@ -268,7 +331,6 @@ typedef struct udp_s { pid_t udp_open_pid; /* process id when this was opened */ udp_stack_t *udp_us; /* Stack instance for zone */ } udp_t; -#define udp_mib udp_us->us_udp_mib /* UDP Protocol header */ /* UDP Protocol header aligned */ @@ -303,22 +365,20 @@ typedef struct udpahdr_s { #define UDP_DBGSTAT(us, x) #endif /* DEBUG */ -extern major_t UDP6_MAJ; - extern int udp_opt_default(queue_t *, t_scalar_t, t_scalar_t, uchar_t *); extern int udp_opt_get(queue_t *, t_scalar_t, t_scalar_t, uchar_t *); extern int udp_opt_set(queue_t *, uint_t, int, int, uint_t, uchar_t *, uint_t *, uchar_t *, void *, cred_t *, mblk_t *); -extern int udp_snmp_get(queue_t *, mblk_t *); +extern mblk_t *udp_snmp_get(queue_t *, mblk_t *); extern int udp_snmp_set(queue_t *, t_scalar_t, t_scalar_t, uchar_t *, int); extern void udp_close_free(conn_t *); extern void udp_quiesce_conn(conn_t *); extern void udp_ddi_init(void); extern void udp_ddi_destroy(void); extern void udp_resume_bind(conn_t *, mblk_t *); -extern void udp_conn_recv(conn_t *, mblk_t *); -extern void udp_wput_data(queue_t *, mblk_t *, struct sockaddr *, - socklen_t); +extern void udp_output(conn_t *connp, mblk_t *mp, struct sockaddr *addr, + socklen_t addrlen); +extern void udp_wput(queue_t *, mblk_t *); extern int udp_opt_default(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr); diff --git a/usr/src/uts/common/io/ib/clients/rds/rdsddi.c b/usr/src/uts/common/io/ib/clients/rds/rdsddi.c index ce9ad21f5b..c7eaed8027 100644 --- a/usr/src/uts/common/io/ib/clients/rds/rdsddi.c +++ b/usr/src/uts/common/io/ib/clients/rds/rdsddi.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -640,10 +640,11 @@ rds_wput_other(queue_t *q, mblk_t *mp) rds_bind(q, mp); return; case T_SVR4_OPTMGMT_REQ: - (void) svr4_optcom_req(q, mp, cr, &rds_opt_obj); + (void) svr4_optcom_req(q, mp, cr, &rds_opt_obj, + B_FALSE); return; case T_OPTMGMT_REQ: - (void) tpi_optcom_req(q, mp, cr, &rds_opt_obj); + (void) tpi_optcom_req(q, mp, cr, &rds_opt_obj, B_FALSE); return; case T_CONN_REQ: /* diff --git a/usr/src/uts/common/io/strplumb.c b/usr/src/uts/common/io/strplumb.c index dd7276bcd1..244704b087 100644 --- a/usr/src/uts/common/io/strplumb.c +++ b/usr/src/uts/common/io/strplumb.c @@ -232,48 +232,6 @@ strplumb_autopush(void) mods[1] = NULL; /* - * UDP - */ - DBG0("setting up udp autopush\n"); - - mods[0] = UDP; - - maj = ddi_name_to_major(UDP); - if ((err = kstr_autopush(SET_AUTOPUSH, &maj, &min, NULL, &anchor, - mods)) != 0) { - printf("strplumb: kstr_autopush(SET/UDP) failed: %d\n", err); - return (err); - } - - maj = ddi_name_to_major(UDP6); - if ((err = kstr_autopush(SET_AUTOPUSH, &maj, &min, NULL, &anchor, - mods)) != 0) { - printf("strplumb: kstr_autopush(SET/UDP6) failed: %d\n", err); - return (err); - } - - /* - * ICMP - */ - DBG0("setting up icmp autopush\n"); - - mods[0] = ICMP; - - maj = ddi_name_to_major(ICMP); - if ((err = kstr_autopush(SET_AUTOPUSH, &maj, &min, NULL, NULL, - mods)) != 0) { - printf("strplumb: kstr_autopush(SET/ICMP) failed: %d\n", err); - return (err); - } - - maj = ddi_name_to_major(ICMP6); - if ((err = kstr_autopush(SET_AUTOPUSH, &maj, &min, NULL, NULL, - mods)) != 0) { - printf("strplumb: kstr_autopush(SET/ICMP6) failed: %d\n", err); - return (err); - } - - /* * ARP */ DBG0("setting up arp autopush\n"); @@ -1047,19 +1005,19 @@ dl_phys_addr(ldi_handle_t lh, struct ether_addr *eaddr) case DL_PHYS_ADDR_ACK: if ((mp->b_wptr-mp->b_rptr) < sizeof (dl_phys_addr_ack_t)) { printf("dl_phys_addr: " - "DL_PHYS_ADDR_ACK protocol error\n"); + "DL_PHYS_ADDR_ACK protocol error\n"); break; } phys_addr_ack = &dl_prim->physaddr_ack; if (phys_addr_ack->dl_addr_length != sizeof (*eaddr)) { printf("dl_phys_addr: DL_PHYS_ADDR_ACK bad len %u\n", - phys_addr_ack->dl_addr_length); + phys_addr_ack->dl_addr_length); break; } if (phys_addr_ack->dl_addr_length + phys_addr_ack->dl_addr_offset > (mp->b_wptr-mp->b_rptr)) { printf("dl_phys_addr: DL_PHYS_ADDR_ACK bad len %u\n", - phys_addr_ack->dl_addr_length); + phys_addr_ack->dl_addr_length); break; } addrp = mp->b_rptr + phys_addr_ack->dl_addr_offset; @@ -1080,7 +1038,7 @@ dl_phys_addr(ldi_handle_t lh, struct ether_addr *eaddr) default: printf("dl_phys_addr: bad ACK header %u\n", - dl_prim->dl_primitive); + dl_prim->dl_primitive); break; } diff --git a/usr/src/uts/common/io/tl.c b/usr/src/uts/common/io/tl.c index ad0e49c281..e88412c6a9 100644 --- a/usr/src/uts/common/io/tl.c +++ b/usr/src/uts/common/io/tl.c @@ -983,9 +983,9 @@ tl_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) */ for (i = 0; i < TL_UNUSED; i++) { if (ddi_create_minor_node(devi, - tl_transports[i].tr_name, - S_IFCHR, tl_transports[i].tr_minor, - DDI_PSEUDO, NULL) == DDI_FAILURE) { + tl_transports[i].tr_name, + S_IFCHR, tl_transports[i].tr_minor, + DDI_PSEUDO, NULL) == DDI_FAILURE) { ddi_remove_minor_node(devi, NULL); return (DDI_FAILURE); } @@ -1031,15 +1031,15 @@ tl_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) if (i & TL_SOCKET) t->tr_ai_hash = mod_hash_create_idhash(name, tl_hash_size - 1, - mod_hash_null_valdtor); + mod_hash_null_valdtor); else t->tr_ai_hash = mod_hash_create_ptrhash(name, tl_hash_size, - mod_hash_null_valdtor, sizeof (queue_t)); + mod_hash_null_valdtor, sizeof (queue_t)); #else t->tr_ai_hash = mod_hash_create_idhash(name, tl_hash_size - 1, - mod_hash_null_valdtor); + mod_hash_null_valdtor); #endif /* _ILP32 */ if (i & TL_SOCKET) { @@ -1554,8 +1554,8 @@ tl_close(queue_t *rq, int flag, cred_t *credp) ASSERT(rc == 0 && tep == elp); if ((rc != 0) || (tep != elp)) { (void) (STRLOG(TL_ID, tep->te_minor, 1, - SL_TRACE|SL_ERROR, - "tl_close:inconsistency in AI hash")); + SL_TRACE|SL_ERROR, + "tl_close:inconsistency in AI hash")); } /* @@ -1743,8 +1743,8 @@ tl_wput(queue_t *wq, mblk_t *mp) /* Only valid for connection-oriented transports */ if (IS_CLTS(tep)) { (void) (STRLOG(TL_ID, tep->te_minor, 1, - SL_TRACE|SL_ERROR, - "tl_wput:M_DATA invalid for ticlts driver")); + SL_TRACE|SL_ERROR, + "tl_wput:M_DATA invalid for ticlts driver")); tl_merror(wq, mp, EPROTO); return; } @@ -1787,8 +1787,8 @@ tl_wput(queue_t *wq, mblk_t *mp) case M_PROTO: if (msz < sizeof (prim->type)) { (void) (STRLOG(TL_ID, tep->te_minor, 1, - SL_TRACE|SL_ERROR, - "tl_wput:M_PROTO data too short")); + SL_TRACE|SL_ERROR, + "tl_wput:M_PROTO data too short")); tl_merror(wq, mp, EPROTO); return; } @@ -1858,8 +1858,8 @@ tl_wput(queue_t *wq, mblk_t *mp) */ if (msz < sizeof (prim->type)) { (void) (STRLOG(TL_ID, tep->te_minor, 1, - SL_TRACE|SL_ERROR, - "tl_wput:M_PCROTO data too short")); + SL_TRACE|SL_ERROR, + "tl_wput:M_PCROTO data too short")); tl_merror(wq, mp, EPROTO); return; } @@ -1872,15 +1872,15 @@ tl_wput(queue_t *wq, mblk_t *mp) break; default: (void) (STRLOG(TL_ID, tep->te_minor, 1, - SL_TRACE|SL_ERROR, - "tl_wput:unknown TPI msg primitive")); + SL_TRACE|SL_ERROR, + "tl_wput:unknown TPI msg primitive")); tl_merror(wq, mp, EPROTO); return; } break; default: (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, - "tl_wput:default:unexpected Streams message")); + "tl_wput:default:unexpected Streams message")); freemsg(mp); return; } @@ -1958,11 +1958,11 @@ tl_wput_data_ser(mblk_t *mp, tl_endpt_t *tep) if ((peer_tep != NULL) && !peer_tep->te_closing && ((tep->te_state == TS_DATA_XFER) || - (tep->te_state == TS_WREQ_ORDREL)) && + (tep->te_state == TS_WREQ_ORDREL)) && (tep->te_wq != NULL) && (tep->te_wq->q_first == NULL) && ((peer_tep->te_state == TS_DATA_XFER) || - (peer_tep->te_state == TS_WREQ_ORDREL)) && + (peer_tep->te_state == TS_WREQ_ORDREL)) && ((peer_rq = peer_tep->te_rq) != NULL) && (canputnext(peer_rq) || tep->te_closing)) { putnext(peer_rq, mp); @@ -1975,7 +1975,7 @@ tl_wput_data_ser(mblk_t *mp, tl_endpt_t *tep) */ if ((tep->te_wq != NULL) && ((tep->te_state == TS_DATA_XFER) || - (tep->te_state == TS_WREQ_ORDREL))) { + (tep->te_state == TS_WREQ_ORDREL))) { TL_PUTQ(tep, mp); } else { freemsg(mp); @@ -2097,12 +2097,12 @@ tl_rsrv_ser(mblk_t *mp, tl_endpt_t *tep) if (IS_CLTS(tep) && tep->te_state == TS_IDLE) { tl_cl_backenable(tep); } else if ( - IS_COTS(tep) && - ((peer_tep = tep->te_conp) != NULL) && - !peer_tep->te_closing && - ((tep->te_state == TS_DATA_XFER) || - (tep->te_state == TS_WIND_ORDREL)|| - (tep->te_state == TS_WREQ_ORDREL))) { + IS_COTS(tep) && + ((peer_tep = tep->te_conp) != NULL) && + !peer_tep->te_closing && + ((tep->te_state == TS_DATA_XFER) || + (tep->te_state == TS_WIND_ORDREL)|| + (tep->te_state == TS_WREQ_ORDREL))) { TL_QENABLE(peer_tep); } @@ -2294,7 +2294,7 @@ tl_error_ack(queue_t *wq, mblk_t *mp, t_scalar_t tli_err, if (ackmp == NULL) { (void) (STRLOG(TL_ID, 0, 1, SL_TRACE|SL_ERROR, - "tl_error_ack:out of mblk memory")); + "tl_error_ack:out of mblk memory")); tl_merror(wq, NULL, ENOSR); return; } @@ -2373,9 +2373,9 @@ tl_bind(mblk_t *mp, tl_endpt_t *tep) if (tep->te_state != TS_UNBND) { (void) (STRLOG(TL_ID, tep->te_minor, 1, - SL_TRACE|SL_ERROR, - "tl_wput:bind_request:out of state, state=%d", - tep->te_state)); + SL_TRACE|SL_ERROR, + "tl_wput:bind_request:out of state, state=%d", + tep->te_state)); tli_err = TOUTSTATE; goto error; } @@ -2407,7 +2407,7 @@ tl_bind(mblk_t *mp, tl_endpt_t *tep) if ((tep->te_hash_hndl == NULL) && ((tep->te_flag & TL_ADDRHASHED) == 0) && mod_hash_reserve_nosleep(tep->te_addrhash, - &tep->te_hash_hndl) != 0) { + &tep->te_hash_hndl) != 0) { tli_err = TSYSERR; unix_err = ENOSR; goto error; } @@ -2422,8 +2422,8 @@ tl_bind(mblk_t *mp, tl_endpt_t *tep) (aoff < 0) || (aoff + alen > msz)) { (void) (STRLOG(TL_ID, tep->te_minor, - 1, SL_TRACE|SL_ERROR, - "tl_bind: invalid socket addr")); + 1, SL_TRACE|SL_ERROR, + "tl_bind: invalid socket addr")); tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); tli_err = TSYSERR; unix_err = EINVAL; goto error; @@ -2436,8 +2436,8 @@ tl_bind(mblk_t *mp, tl_endpt_t *tep) if ((ux_addr.soua_magic != SOU_MAGIC_EXPLICIT) && (ux_addr.soua_magic != SOU_MAGIC_IMPLICIT)) { (void) (STRLOG(TL_ID, tep->te_minor, - 1, SL_TRACE|SL_ERROR, - "tl_bind: invalid socket magic")); + 1, SL_TRACE|SL_ERROR, + "tl_bind: invalid socket magic")); tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); tli_err = TSYSERR; unix_err = EINVAL; goto error; @@ -2445,8 +2445,8 @@ tl_bind(mblk_t *mp, tl_endpt_t *tep) if ((ux_addr.soua_magic == SOU_MAGIC_IMPLICIT) && (ux_addr.soua_vp != NULL)) { (void) (STRLOG(TL_ID, tep->te_minor, - 1, SL_TRACE|SL_ERROR, - "tl_bind: implicit addr non-empty")); + 1, SL_TRACE|SL_ERROR, + "tl_bind: implicit addr non-empty")); tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); tli_err = TSYSERR; unix_err = EINVAL; goto error; @@ -2454,27 +2454,27 @@ tl_bind(mblk_t *mp, tl_endpt_t *tep) if ((ux_addr.soua_magic == SOU_MAGIC_EXPLICIT) && (ux_addr.soua_vp == NULL)) { (void) (STRLOG(TL_ID, tep->te_minor, - 1, SL_TRACE|SL_ERROR, - "tl_bind: explicit addr empty")); + 1, SL_TRACE|SL_ERROR, + "tl_bind: explicit addr empty")); tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); tli_err = TSYSERR; unix_err = EINVAL; goto error; } } else { if ((alen > 0) && ((aoff < 0) || - ((ssize_t)(aoff + alen) > msz) || - ((aoff + alen) < 0))) { + ((ssize_t)(aoff + alen) > msz) || + ((aoff + alen) < 0))) { (void) (STRLOG(TL_ID, tep->te_minor, - 1, SL_TRACE|SL_ERROR, - "tl_bind: invalid message")); + 1, SL_TRACE|SL_ERROR, + "tl_bind: invalid message")); tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); tli_err = TSYSERR; unix_err = EINVAL; goto error; } if ((alen < 0) || (alen > (msz - sizeof (struct T_bind_req)))) { (void) (STRLOG(TL_ID, tep->te_minor, - 1, SL_TRACE|SL_ERROR, - "tl_bind: bad addr in message")); + 1, SL_TRACE|SL_ERROR, + "tl_bind: bad addr in message")); tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); tli_err = TBADADDR; goto error; @@ -2532,9 +2532,9 @@ tl_bind(mblk_t *mp, tl_endpt_t *tep) * other than supplied one for explicit binds. */ (void) (STRLOG(TL_ID, tep->te_minor, 1, - SL_TRACE|SL_ERROR, - "tl_bind:requested addr %p is busy", - ux_addr.soua_vp)); + SL_TRACE|SL_ERROR, + "tl_bind:requested addr %p is busy", + ux_addr.soua_vp)); tli_err = TADDRBUSY; unix_err = 0; goto error; } @@ -2548,9 +2548,9 @@ tl_bind(mblk_t *mp, tl_endpt_t *tep) */ if (! tl_get_any_addr(tep, NULL)) { (void) (STRLOG(TL_ID, tep->te_minor, - 1, SL_TRACE|SL_ERROR, - "tl_bind:failed to get buffer for any " - "address")); + 1, SL_TRACE|SL_ERROR, + "tl_bind:failed to get buffer for any " + "address")); tli_err = TSYSERR; unix_err = ENOSR; goto error; } @@ -2568,8 +2568,8 @@ tl_bind(mblk_t *mp, tl_endpt_t *tep) tep->te_alen = alen; if (mod_hash_insert_reserve(tep->te_addrhash, - (mod_hash_key_t)&tep->te_ap, (mod_hash_val_t)tep, - tep->te_hash_hndl) != 0) { + (mod_hash_key_t)&tep->te_ap, (mod_hash_val_t)tep, + tep->te_hash_hndl) != 0) { if (save_prim_type == T_BIND_REQ) { /* * The bind semantics for this primitive @@ -2577,8 +2577,8 @@ tl_bind(mblk_t *mp, tl_endpt_t *tep) * requested is busy */ (void) (STRLOG(TL_ID, tep->te_minor, 1, - SL_TRACE|SL_ERROR, - "tl_bind:requested addr is busy")); + SL_TRACE|SL_ERROR, + "tl_bind:requested addr is busy")); tli_err = TADDRBUSY; unix_err = 0; goto error; } @@ -2589,8 +2589,8 @@ tl_bind(mblk_t *mp, tl_endpt_t *tep) */ if (! tl_get_any_addr(tep, &addr_req)) { (void) (STRLOG(TL_ID, tep->te_minor, 1, - SL_TRACE|SL_ERROR, - "tl_bind:unable to get any addr buf")); + SL_TRACE|SL_ERROR, + "tl_bind:unable to get any addr buf")); tli_err = TSYSERR; unix_err = ENOMEM; goto error; } @@ -2610,7 +2610,7 @@ skip_addr_bind: bamp = reallocb(mp, basize, 0); if (bamp == NULL) { (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, - "tl_wput:tl_bind: allocb failed")); + "tl_wput:tl_bind: allocb failed")); /* * roll back state changes */ @@ -2693,9 +2693,9 @@ tl_unbind(mblk_t *mp, tl_endpt_t *tep) */ if (tep->te_state != TS_IDLE) { (void) (STRLOG(TL_ID, tep->te_minor, 1, - SL_TRACE|SL_ERROR, - "tl_wput:T_UNBIND_REQ:out of state, state=%d", - tep->te_state)); + SL_TRACE|SL_ERROR, + "tl_wput:T_UNBIND_REQ:out of state, state=%d", + tep->te_state)); tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_UNBIND_REQ); return; } @@ -2757,9 +2757,9 @@ tl_optmgmt(queue_t *wq, mblk_t *mp) * tests this TLI (mis)feature using this device driver. */ (void) (STRLOG(TL_ID, tep->te_minor, 1, - SL_TRACE|SL_ERROR, - "tl_wput:T_SVR4_OPTMGMT_REQ:out of state, state=%d", - tep->te_state)); + SL_TRACE|SL_ERROR, + "tl_wput:T_SVR4_OPTMGMT_REQ:out of state, state=%d", + tep->te_state)); /* * preallocate memory for T_ERROR_ACK */ @@ -2778,10 +2778,12 @@ tl_optmgmt(queue_t *wq, mblk_t *mp) * call common option management routine from drv/ip */ if (prim->type == T_SVR4_OPTMGMT_REQ) { - (void) svr4_optcom_req(wq, mp, tep->te_credp, &tl_opt_obj); + (void) svr4_optcom_req(wq, mp, tep->te_credp, &tl_opt_obj, + B_FALSE); } else { ASSERT(prim->type == T_OPTMGMT_REQ); - (void) tpi_optcom_req(wq, mp, tep->te_credp, &tl_opt_obj); + (void) tpi_optcom_req(wq, mp, tep->te_credp, &tl_opt_obj, + B_FALSE); } } @@ -2831,9 +2833,9 @@ tl_conn_req(queue_t *wq, mblk_t *mp) if (tep->te_state != TS_IDLE) { (void) (STRLOG(TL_ID, tep->te_minor, 1, - SL_TRACE|SL_ERROR, - "tl_wput:T_CONN_REQ:out of state, state=%d", - tep->te_state)); + SL_TRACE|SL_ERROR, + "tl_wput:T_CONN_REQ:out of state, state=%d", + tep->te_state)); tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_CONN_REQ); freemsg(mp); return; @@ -2846,7 +2848,7 @@ tl_conn_req(queue_t *wq, mblk_t *mp) */ if (msz < sizeof (struct T_conn_req)) { (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, - "tl_conn_req:invalid message length")); + "tl_conn_req:invalid message length")); tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ); freemsg(mp); return; @@ -2874,8 +2876,8 @@ tl_conn_req(queue_t *wq, mblk_t *mp) if ((ux_addr.soua_magic != SOU_MAGIC_IMPLICIT) && (ux_addr.soua_magic != SOU_MAGIC_EXPLICIT)) { (void) (STRLOG(TL_ID, tep->te_minor, - 1, SL_TRACE|SL_ERROR, - "tl_conn_req: invalid socket magic")); + 1, SL_TRACE|SL_ERROR, + "tl_conn_req: invalid socket magic")); tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ); freemsg(mp); return; @@ -2883,11 +2885,11 @@ tl_conn_req(queue_t *wq, mblk_t *mp) } else { if ((alen > 0 && ((aoff + alen) > msz || aoff + alen < 0)) || (olen > 0 && ((ssize_t)(ooff + olen) > msz || - ooff + olen < 0)) || + ooff + olen < 0)) || olen < 0 || ooff < 0) { (void) (STRLOG(TL_ID, tep->te_minor, 1, - SL_TRACE|SL_ERROR, - "tl_conn_req:invalid message")); + SL_TRACE|SL_ERROR, + "tl_conn_req:invalid message")); tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ); freemsg(mp); return; @@ -2912,8 +2914,8 @@ tl_conn_req(queue_t *wq, mblk_t *mp) */ if (! (aoff >= (t_scalar_t)sizeof (struct T_conn_req))) { (void) (STRLOG(TL_ID, tep->te_minor, 3, - SL_TRACE|SL_ERROR, - "tl_conn_req: addr overlaps TPI message")); + SL_TRACE|SL_ERROR, + "tl_conn_req: addr overlaps TPI message")); } #endif if (olen) { @@ -2922,9 +2924,9 @@ tl_conn_req(queue_t *wq, mblk_t *mp) * supported in this provider except for sockets. */ (void) (STRLOG(TL_ID, tep->te_minor, 1, - SL_TRACE|SL_ERROR, - "tl_conn_req:options not supported " - "in message")); + SL_TRACE|SL_ERROR, + "tl_conn_req:options not supported " + "in message")); tl_error_ack(wq, ackmp, TBADOPT, 0, T_CONN_REQ); freemsg(mp); return; @@ -2936,7 +2938,7 @@ tl_conn_req(queue_t *wq, mblk_t *mp) */ if (! tl_noclose(tep)) { (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, - "tl_conn_req:endpoint is closing")); + "tl_conn_req:endpoint is closing")); tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_CONN_REQ); freemsg(mp); return; @@ -2961,7 +2963,7 @@ tl_conn_req(queue_t *wq, mblk_t *mp) if (peer_tep == NULL) { (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, - "tl_conn_req:no one at connect address")); + "tl_conn_req:no one at connect address")); err = ECONNREFUSED; } else if (peer_tep->te_nicon >= peer_tep->te_qlen) { /* @@ -2969,7 +2971,7 @@ tl_conn_req(queue_t *wq, mblk_t *mp) * not to capacity on destination endpoint */ (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE, - "tl_conn_req: qlen overflow connection refused")); + "tl_conn_req: qlen overflow connection refused")); err = ECONNREFUSED; } @@ -3072,10 +3074,10 @@ tl_conn_req_ser(mblk_t *mp, tl_endpt_t *tep) if (peer_tep->te_closing || !((peer_tep->te_state == TS_IDLE) || - (peer_tep->te_state == TS_WRES_CIND))) { + (peer_tep->te_state == TS_WRES_CIND))) { (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE | SL_ERROR, - "tl_conn_req:peer in bad state (%d)", - peer_tep->te_state)); + "tl_conn_req:peer in bad state (%d)", + peer_tep->te_state)); TL_UNCONNECT(tep->te_oconp); tl_error_ack(wq, mp, TSYSERR, ECONNREFUSED, T_CONN_REQ); freemsg(ackmp); @@ -3149,7 +3151,7 @@ tl_conn_req_ser(mblk_t *mp, tl_endpt_t *tep) return; } ((struct T_conn_con *)(confmp->b_rptr))->PRIM_type = - T_CONN_CON; + T_CONN_CON; } else { confmp = NULL; } @@ -3215,7 +3217,7 @@ tl_conn_req_ser(mblk_t *mp, tl_endpt_t *tep) cimp = tl_resizemp(indmp, size); if (! cimp) { (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR, - "tl_conn_req:con_ind:allocb failure")); + "tl_conn_req:con_ind:allocb failure")); tl_merror(wq, indmp, ENOMEM); TL_UNCONNECT(tep->te_oconp); tl_serializer_exit(tep); @@ -3239,16 +3241,16 @@ tl_conn_req_ser(mblk_t *mp, tl_endpt_t *tep) bcopy(tep->te_abuf, addr_startp, tep->te_alen); if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED)) { ci->OPT_offset = (t_scalar_t)T_ALIGN(ci->SRC_offset + - ci->SRC_length); + ci->SRC_length); ci->OPT_length = olen; /* because only 1 option */ tl_fill_option(cimp->b_rptr + ci->OPT_offset, - DB_CREDDEF(cimp, tep->te_credp), - TLPID(cimp, tep), - peer_tep->te_flag, peer_tep->te_credp); + DB_CREDDEF(cimp, tep->te_credp), + TLPID(cimp, tep), + peer_tep->te_flag, peer_tep->te_credp); } else if (ooff != 0) { /* Copy option from T_CONN_REQ */ ci->OPT_offset = (t_scalar_t)T_ALIGN(ci->SRC_offset + - ci->SRC_length); + ci->SRC_length); ci->OPT_length = olen; ASSERT(opts != NULL); bcopy(opts, (void *)((uintptr_t)ci + ci->OPT_offset), olen); @@ -3360,9 +3362,9 @@ tl_conn_res(mblk_t *mp, tl_endpt_t *tep) */ if (tep->te_state != TS_WRES_CIND) { (void) (STRLOG(TL_ID, tep->te_minor, 1, - SL_TRACE|SL_ERROR, - "tl_wput:T_CONN_RES:out of state, state=%d", - tep->te_state)); + SL_TRACE|SL_ERROR, + "tl_wput:T_CONN_RES:out of state, state=%d", + tep->te_state)); tl_error_ack(wq, ackmp, TOUTSTATE, 0, prim); freemsg(mp); return; @@ -3375,7 +3377,7 @@ tl_conn_res(mblk_t *mp, tl_endpt_t *tep) */ if (msz < sizeof (struct T_conn_res)) { (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, - "tl_conn_res:invalid message length")); + "tl_conn_res:invalid message length")); tl_error_ack(wq, ackmp, TSYSERR, EINVAL, prim); freemsg(mp); return; @@ -3384,7 +3386,7 @@ tl_conn_res(mblk_t *mp, tl_endpt_t *tep) ooff = cres->OPT_offset; if (((olen > 0) && ((ooff + olen) > msz))) { (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, - "tl_conn_res:invalid message")); + "tl_conn_res:invalid message")); tl_error_ack(wq, ackmp, TSYSERR, EINVAL, prim); freemsg(mp); return; @@ -3395,7 +3397,7 @@ tl_conn_res(mblk_t *mp, tl_endpt_t *tep) * supported in this provider */ (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, - "tl_conn_res:options not supported in message")); + "tl_conn_res:options not supported in message")); tl_error_ack(wq, ackmp, TBADOPT, 0, prim); freemsg(mp); return; @@ -3405,9 +3407,9 @@ tl_conn_res(mblk_t *mp, tl_endpt_t *tep) ASSERT(tep->te_state == TS_WACK_CRES); if (cres->SEQ_number < TL_MINOR_START && - cres->SEQ_number >= BADSEQNUM) { + cres->SEQ_number >= BADSEQNUM) { (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR, - "tl_conn_res:remote endpoint sequence number bad")); + "tl_conn_res:remote endpoint sequence number bad")); tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); tl_error_ack(wq, ackmp, TBADSEQ, 0, prim); freemsg(mp); @@ -3418,10 +3420,10 @@ tl_conn_res(mblk_t *mp, tl_endpt_t *tep) * find accepting endpoint. Will have extra reference if found. */ if (mod_hash_find_cb(tep->te_transport->tr_ai_hash, - (mod_hash_key_t)(uintptr_t)cres->ACCEPTOR_id, - (mod_hash_val_t *)&acc_ep, tl_find_callback) != 0) { + (mod_hash_key_t)(uintptr_t)cres->ACCEPTOR_id, + (mod_hash_val_t *)&acc_ep, tl_find_callback) != 0) { (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR, - "tl_conn_res:bad accepting endpoint")); + "tl_conn_res:bad accepting endpoint")); tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); tl_error_ack(wq, ackmp, TBADF, 0, prim); freemsg(mp); @@ -3433,7 +3435,7 @@ tl_conn_res(mblk_t *mp, tl_endpt_t *tep) */ if (! tl_noclose(acc_ep)) { (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR, - "tl_conn_res:bad accepting endpoint")); + "tl_conn_res:bad accepting endpoint")); tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); tl_error_ack(wq, ackmp, TBADF, 0, prim); tl_refrele(acc_ep); @@ -3450,8 +3452,8 @@ tl_conn_res(mblk_t *mp, tl_endpt_t *tep) */ if ((tep != acc_ep) && (acc_ep->te_state != TS_IDLE)) { (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR, - "tl_conn_res:accepting endpoint has no address bound," - "state=%d", acc_ep->te_state)); + "tl_conn_res:accepting endpoint has no address bound," + "state=%d", acc_ep->te_state)); tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); tl_error_ack(wq, ackmp, TOUTSTATE, 0, prim); freemsg(mp); @@ -3467,7 +3469,7 @@ tl_conn_res(mblk_t *mp, tl_endpt_t *tep) if ((tep == acc_ep) && (tep->te_nicon > 1)) { (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR, - "tl_conn_res: > 1 conn_ind on listener-acceptor")); + "tl_conn_res: > 1 conn_ind on listener-acceptor")); tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); tl_error_ack(wq, ackmp, TBADF, 0, prim); freemsg(mp); @@ -3485,7 +3487,7 @@ tl_conn_res(mblk_t *mp, tl_endpt_t *tep) tip = tl_icon_find(tep, cres->SEQ_number); if (tip == NULL) { (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR, - "tl_conn_res:no client in listener list")); + "tl_conn_res:no client in listener list")); tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); tl_error_ack(wq, ackmp, TBADSEQ, 0, prim); freemsg(mp); @@ -3500,7 +3502,7 @@ tl_conn_res(mblk_t *mp, tl_endpt_t *tep) * but complete the server and acceptor state transitions. */ ASSERT(tip->ti_tep == NULL || - tip->ti_tep->te_seqno == cres->SEQ_number); + tip->ti_tep->te_seqno == cres->SEQ_number); cl_ep = tip->ti_tep; /* @@ -3558,7 +3560,7 @@ tl_conn_res(mblk_t *mp, tl_endpt_t *tep) * and acking validity of request */ (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE, - "tl_conn_res:peer in bad state")); + "tl_conn_res:peer in bad state")); } /* @@ -3575,13 +3577,13 @@ tl_conn_res(mblk_t *mp, tl_endpt_t *tep) olen = 0; if (cl_ep->te_flag & TL_SETCRED) { olen = (t_scalar_t)sizeof (struct opthdr) + - OPTLEN(sizeof (tl_credopt_t)); + OPTLEN(sizeof (tl_credopt_t)); } else if (cl_ep->te_flag & TL_SETUCRED) { olen = (t_scalar_t)sizeof (struct opthdr) + - OPTLEN(ucredsize); + OPTLEN(ucredsize); } size = T_ALIGN(sizeof (struct T_conn_con) + - acc_ep->te_alen) + olen; + acc_ep->te_alen) + olen; } if ((respmp = reallocb(mp, size, 0)) == NULL) { /* @@ -3623,8 +3625,8 @@ tl_conn_res(mblk_t *mp, tl_endpt_t *tep) dimp = tl_resizemp(respmp, size); if (! dimp) { (void) (STRLOG(TL_ID, tep->te_minor, 3, - SL_TRACE|SL_ERROR, - "tl_conn_res:con_ind:allocb failure")); + SL_TRACE|SL_ERROR, + "tl_conn_res:con_ind:allocb failure")); tl_merror(wq, respmp, ENOMEM); tl_closeok(acc_ep); if (client_noclose_set) @@ -3703,8 +3705,8 @@ tl_conn_res(mblk_t *mp, tl_endpt_t *tep) if (ccmp == NULL) { tl_ok_ack(wq, ackmp, prim); (void) (STRLOG(TL_ID, tep->te_minor, 3, - SL_TRACE|SL_ERROR, - "tl_conn_res:conn_con:allocb failure")); + SL_TRACE|SL_ERROR, + "tl_conn_res:conn_con:allocb failure")); tl_merror(wq, respmp, ENOMEM); tl_closeok(acc_ep); if (client_noclose_set) @@ -3897,9 +3899,9 @@ tl_discon_req(mblk_t *mp, tl_endpt_t *tep) if (! (save_state >= TS_WCON_CREQ && save_state <= TS_WRES_CIND) && ! (save_state >= TS_DATA_XFER && save_state <= TS_WREQ_ORDREL)) { (void) (STRLOG(TL_ID, tep->te_minor, 1, - SL_TRACE|SL_ERROR, - "tl_wput:T_DISCON_REQ:out of state, state=%d", - tep->te_state)); + SL_TRACE|SL_ERROR, + "tl_wput:T_DISCON_REQ:out of state, state=%d", + tep->te_state)); tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_DISCON_REQ); freemsg(mp); return; @@ -3913,7 +3915,7 @@ tl_discon_req(mblk_t *mp, tl_endpt_t *tep) /* validate the message */ if (msz < sizeof (struct T_discon_req)) { (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, - "tl_discon_req:invalid message")); + "tl_discon_req:invalid message")); tep->te_state = NEXTSTATE(TE_ERROR_ACK, new_state); tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_DISCON_REQ); freemsg(mp); @@ -3932,8 +3934,8 @@ tl_discon_req(mblk_t *mp, tl_endpt_t *tep) tip = tl_icon_find(tep, dr->SEQ_number); if (tip == NULL) { (void) (STRLOG(TL_ID, tep->te_minor, 2, - SL_TRACE|SL_ERROR, - "tl_discon_req:no disconnect endpoint")); + SL_TRACE|SL_ERROR, + "tl_discon_req:no disconnect endpoint")); tep->te_state = NEXTSTATE(TE_ERROR_ACK, new_state); tl_error_ack(wq, ackmp, TBADSEQ, 0, T_DISCON_REQ); freemsg(mp); @@ -3945,7 +3947,7 @@ tl_discon_req(mblk_t *mp, tl_endpt_t *tep) */ ASSERT(IMPLY(tip->ti_tep != NULL, - tip->ti_tep->te_seqno == dr->SEQ_number)); + tip->ti_tep->te_seqno == dr->SEQ_number)); peer_tep = tip->ti_tep; } @@ -3993,8 +3995,8 @@ tl_discon_req(mblk_t *mp, tl_endpt_t *tep) */ if ((dimp = tl_resizemp(respmp, size)) == NULL) { (void) (STRLOG(TL_ID, tep->te_minor, 2, - SL_TRACE|SL_ERROR, - "tl_discon_req: reallocb failed")); + SL_TRACE|SL_ERROR, + "tl_discon_req: reallocb failed")); tep->te_state = new_state; tl_merror(wq, respmp, ENOMEM); return; @@ -4024,8 +4026,8 @@ tl_discon_req(mblk_t *mp, tl_endpt_t *tep) if ((dimp = tl_resizemp(respmp, size)) == NULL) { (void) (STRLOG(TL_ID, tep->te_minor, 2, - SL_TRACE|SL_ERROR, - "tl_discon_req: reallocb failed")); + SL_TRACE|SL_ERROR, + "tl_discon_req: reallocb failed")); tep->te_state = new_state; tl_merror(wq, respmp, ENOMEM); return; @@ -4069,11 +4071,11 @@ tl_discon_req(mblk_t *mp, tl_endpt_t *tep) if (peer_tep->te_nicon == 1) peer_tep->te_state = NEXTSTATE(TE_DISCON_IND2, - peer_tep->te_state); + peer_tep->te_state); else peer_tep->te_state = NEXTSTATE(TE_DISCON_IND3, - peer_tep->te_state); + peer_tep->te_state); tl_freetip(peer_tep, tip); } ASSERT(tep->te_oconp != NULL); @@ -4082,8 +4084,8 @@ tl_discon_req(mblk_t *mp, tl_endpt_t *tep) } else if ((peer_tep = tep->te_conp) != NULL) { /* connected! */ if ((dimp = tl_resizemp(respmp, size)) == NULL) { (void) (STRLOG(TL_ID, tep->te_minor, 2, - SL_TRACE|SL_ERROR, - "tl_discon_req: reallocb failed")); + SL_TRACE|SL_ERROR, + "tl_discon_req: reallocb failed")); tep->te_state = new_state; tl_merror(wq, respmp, ENOMEM); return; @@ -4192,8 +4194,8 @@ tl_addr_req(mblk_t *mp, tl_endpt_t *tep) ackmp = reallocb(mp, ack_sz, 0); if (ackmp == NULL) { (void) (STRLOG(TL_ID, tep->te_minor, 1, - SL_TRACE|SL_ERROR, - "tl_addr_req: reallocb failed")); + SL_TRACE|SL_ERROR, + "tl_addr_req: reallocb failed")); tl_memrecover(wq, mp, ack_sz); return; } @@ -4212,7 +4214,7 @@ tl_addr_req(mblk_t *mp, tl_endpt_t *tep) taa->LOCADDR_offset = (t_scalar_t)sizeof (*taa); bcopy(tep->te_abuf, ackmp->b_wptr, - tep->te_alen); + tep->te_alen); ackmp->b_wptr += tep->te_alen; ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim); } @@ -4220,8 +4222,8 @@ tl_addr_req(mblk_t *mp, tl_endpt_t *tep) (void) qreply(wq, ackmp); } else { ASSERT(tep->te_state == TS_DATA_XFER || - tep->te_state == TS_WIND_ORDREL || - tep->te_state == TS_WREQ_ORDREL); + tep->te_state == TS_WIND_ORDREL || + tep->te_state == TS_WREQ_ORDREL); /* connection oriented in data transfer */ tl_connected_cots_addr_req(mp, tep); } @@ -4252,7 +4254,7 @@ tl_connected_cots_addr_req(mblk_t *mp, tl_endpt_t *tep) ackmp = tpi_ack_alloc(mp, ack_sz, M_PCPROTO, T_ADDR_ACK); if (ackmp == NULL) { (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, - "tl_connected_cots_addr_req: reallocb failed")); + "tl_connected_cots_addr_req: reallocb failed")); tl_memrecover(tep->te_wq, mp, ack_sz); return; } @@ -4270,7 +4272,7 @@ tl_connected_cots_addr_req(mblk_t *mp, tl_endpt_t *tep) taa->REMADDR_length = peer_tep->te_alen; taa->REMADDR_offset = (t_scalar_t)T_ALIGN(taa->LOCADDR_offset + - taa->LOCADDR_length); + taa->LOCADDR_length); addr_startp = ackmp->b_rptr + taa->REMADDR_offset; bcopy(peer_tep->te_abuf, addr_startp, peer_tep->te_alen); @@ -4318,7 +4320,7 @@ tl_capability_req(mblk_t *mp, tl_endpt_t *tep) M_PCPROTO, T_CAPABILITY_ACK); if (ackmp == NULL) { (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, - "tl_capability_req: reallocb failed")); + "tl_capability_req: reallocb failed")); tl_memrecover(tep->te_wq, mp, sizeof (struct T_capability_ack)); return; @@ -4361,7 +4363,7 @@ tl_info_req(mblk_t *mp, tl_endpt_t *tep) M_PCPROTO, T_INFO_ACK); if (ackmp == NULL) { (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, - "tl_info_req: reallocb failed")); + "tl_info_req: reallocb failed")); tl_memrecover(tep->te_wq, mp, sizeof (struct T_info_ack)); return; } @@ -4393,8 +4395,8 @@ tl_data(mblk_t *mp, tl_endpt_t *tep) if (IS_CLTS(tep)) { (void) (STRLOG(TL_ID, tep->te_minor, 2, - SL_TRACE|SL_ERROR, - "tl_wput:clts:unattached M_DATA")); + SL_TRACE|SL_ERROR, + "tl_wput:clts:unattached M_DATA")); if (!closing) { tl_merror(wq, mp, EPROTO); } else { @@ -4428,11 +4430,11 @@ tl_data(mblk_t *mp, tl_endpt_t *tep) } return; } else if (prim->type == T_OPTDATA_REQ && - (msz < sizeof (struct T_optdata_req) || - !IS_SOCKET(tep))) { + (msz < sizeof (struct T_optdata_req) || + !IS_SOCKET(tep))) { (void) (STRLOG(TL_ID, tep->te_minor, 1, - SL_TRACE|SL_ERROR, - "tl_data:T_OPTDATA_REQ:invalid message")); + SL_TRACE|SL_ERROR, + "tl_data:T_OPTDATA_REQ:invalid message")); if (!closing) { tl_merror(wq, mp, EPROTO); } else { @@ -4452,7 +4454,7 @@ tl_data(mblk_t *mp, tl_endpt_t *tep) */ freemsg(mp); (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR, - "tl_data:cots with endpoint idle")); + "tl_data:cots with endpoint idle")); return; case TS_DATA_XFER: @@ -4479,8 +4481,8 @@ tl_data(mblk_t *mp, tl_endpt_t *tep) */ if (!closing) { (void) (STRLOG(TL_ID, tep->te_minor, 1, - SL_TRACE|SL_ERROR, - "tl_data: ocon")); + SL_TRACE|SL_ERROR, + "tl_data: ocon")); TL_PUTBQ(tep, mp); return; } @@ -4507,8 +4509,8 @@ tl_data(mblk_t *mp, tl_endpt_t *tep) */ freemsg(mp); (void) (STRLOG(TL_ID, tep->te_minor, 3, - SL_TRACE|SL_ERROR, - "tl_data: WREQ_ORDREL and no peer")); + SL_TRACE|SL_ERROR, + "tl_data: WREQ_ORDREL and no peer")); tl_discon_ind(tep, 0); return; } @@ -4517,7 +4519,7 @@ tl_data(mblk_t *mp, tl_endpt_t *tep) default: /* invalid state for event TE_DATA_REQ */ (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, - "tl_data:cots:out of state")); + "tl_data:cots:out of state")); tl_merror(wq, mp, EPROTO); return; } @@ -4533,7 +4535,7 @@ tl_data(mblk_t *mp, tl_endpt_t *tep) freemsg(mp); /* Peer closed */ (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE, - "tl_data: peer gone")); + "tl_data: peer gone")); return; } @@ -4560,7 +4562,7 @@ tl_data(mblk_t *mp, tl_endpt_t *tep) break; default: (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, - "tl_data:rx side:invalid state")); + "tl_data:rx side:invalid state")); tl_merror(peer_tep->te_wq, mp, EPROTO); return; } @@ -4595,7 +4597,7 @@ tl_exdata(mblk_t *mp, tl_endpt_t *tep) if (msz < sizeof (struct T_exdata_req)) { (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, - "tl_exdata:invalid message")); + "tl_exdata:invalid message")); if (!closing) { tl_merror(wq, mp, EPROTO); } else { @@ -4626,7 +4628,7 @@ tl_exdata(mblk_t *mp, tl_endpt_t *tep) */ freemsg(mp); (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR, - "tl_exdata:cots with endpoint idle")); + "tl_exdata:cots with endpoint idle")); return; case TS_DATA_XFER: @@ -4653,13 +4655,13 @@ tl_exdata(mblk_t *mp, tl_endpt_t *tep) */ if (!closing) { (void) (STRLOG(TL_ID, tep->te_minor, 1, - SL_TRACE|SL_ERROR, - "tl_exdata: ocon")); + SL_TRACE|SL_ERROR, + "tl_exdata: ocon")); TL_PUTBQ(tep, mp); return; } (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, - "tl_exdata: closing socket ocon")); + "tl_exdata: closing socket ocon")); prim->type = T_EXDATA_IND; tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp); return; @@ -4673,8 +4675,8 @@ tl_exdata(mblk_t *mp, tl_endpt_t *tep) */ freemsg(mp); (void) (STRLOG(TL_ID, tep->te_minor, 3, - SL_TRACE|SL_ERROR, - "tl_exdata: WREQ_ORDREL and no peer")); + SL_TRACE|SL_ERROR, + "tl_exdata: WREQ_ORDREL and no peer")); tl_discon_ind(tep, 0); return; } @@ -4682,9 +4684,9 @@ tl_exdata(mblk_t *mp, tl_endpt_t *tep) default: (void) (STRLOG(TL_ID, tep->te_minor, 1, - SL_TRACE|SL_ERROR, - "tl_wput:T_EXDATA_REQ:out of state, state=%d", - tep->te_state)); + SL_TRACE|SL_ERROR, + "tl_wput:T_EXDATA_REQ:out of state, state=%d", + tep->te_state)); tl_merror(wq, mp, EPROTO); return; } @@ -4700,7 +4702,7 @@ tl_exdata(mblk_t *mp, tl_endpt_t *tep) freemsg(mp); /* Peer closed */ (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE, - "tl_exdata: peer gone")); + "tl_exdata: peer gone")); return; } @@ -4726,7 +4728,7 @@ tl_exdata(mblk_t *mp, tl_endpt_t *tep) break; default: (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, - "tl_exdata:rx side:invalid state")); + "tl_exdata:rx side:invalid state")); tl_merror(peer_tep->te_wq, mp, EPROTO); return; } @@ -4759,7 +4761,7 @@ tl_ordrel(mblk_t *mp, tl_endpt_t *tep) if (msz < sizeof (struct T_ordrel_req)) { (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, - "tl_ordrel:invalid message")); + "tl_ordrel:invalid message")); if (!closing) { tl_merror(wq, mp, EPROTO); } else { @@ -4792,22 +4794,22 @@ tl_ordrel(mblk_t *mp, tl_endpt_t *tep) */ if (!closing) { (void) (STRLOG(TL_ID, tep->te_minor, 1, - SL_TRACE|SL_ERROR, - "tl_ordlrel: ocon")); + SL_TRACE|SL_ERROR, + "tl_ordlrel: ocon")); TL_PUTBQ(tep, mp); return; } (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, - "tl_ordlrel: closing socket ocon")); + "tl_ordlrel: closing socket ocon")); prim->type = T_ORDREL_IND; (void) tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp); return; default: (void) (STRLOG(TL_ID, tep->te_minor, 1, - SL_TRACE|SL_ERROR, - "tl_wput:T_ORDREL_REQ:out of state, state=%d", - tep->te_state)); + SL_TRACE|SL_ERROR, + "tl_wput:T_ORDREL_REQ:out of state, state=%d", + tep->te_state)); if (!closing) { tl_merror(wq, mp, EPROTO); } else { @@ -4823,7 +4825,7 @@ tl_ordrel(mblk_t *mp, tl_endpt_t *tep) if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) { /* Peer closed */ (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE, - "tl_ordrel: peer gone")); + "tl_ordrel: peer gone")); freemsg(mp); return; } @@ -4850,7 +4852,7 @@ tl_ordrel(mblk_t *mp, tl_endpt_t *tep) break; default: (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, - "tl_ordrel:rx side:invalid state")); + "tl_ordrel:rx side:invalid state")); tl_merror(peer_tep->te_wq, mp, EPROTO); return; } @@ -4861,7 +4863,7 @@ tl_ordrel(mblk_t *mp, tl_endpt_t *tep) */ prim->type = T_ORDREL_IND; (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE, - "tl_ordrel: send ordrel_ind")); + "tl_ordrel: send ordrel_ind")); /* * send data to connected peer @@ -4899,7 +4901,7 @@ tl_uderr(queue_t *wq, mblk_t *mp, t_scalar_t err) err_mp = allocb(err_sz, BPRI_MED); if (! err_mp) { (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR, - "tl_uderr:allocb failure")); + "tl_uderr:allocb failure")); /* * Note: no rollback of state needed as it does * not change in connectionless transport @@ -4919,20 +4921,20 @@ tl_uderr(queue_t *wq, mblk_t *mp, t_scalar_t err) uderr->DEST_offset = 0; } else { uderr->DEST_offset = - (t_scalar_t)sizeof (struct T_uderror_ind); + (t_scalar_t)sizeof (struct T_uderror_ind); addr_startp = mp->b_rptr + udreq->DEST_offset; bcopy(addr_startp, err_mp->b_rptr + uderr->DEST_offset, - (size_t)alen); + (size_t)alen); } if (olen <= 0) { uderr->OPT_offset = 0; } else { uderr->OPT_offset = - (t_scalar_t)T_ALIGN(sizeof (struct T_uderror_ind) + - uderr->DEST_length); + (t_scalar_t)T_ALIGN(sizeof (struct T_uderror_ind) + + uderr->DEST_length); addr_startp = mp->b_rptr + udreq->OPT_offset; bcopy(addr_startp, err_mp->b_rptr+uderr->OPT_offset, - (size_t)olen); + (size_t)olen); } freemsg(mp); @@ -4987,8 +4989,8 @@ tl_unitdata(mblk_t *mp, tl_endpt_t *tep) */ if (tep->te_state != TS_IDLE) { (void) (STRLOG(TL_ID, tep->te_minor, 1, - SL_TRACE|SL_ERROR, - "tl_wput:T_CONN_REQ:out of state")); + SL_TRACE|SL_ERROR, + "tl_wput:T_CONN_REQ:out of state")); tl_merror(wq, mp, EPROTO); return; } @@ -5004,7 +5006,7 @@ tl_unitdata(mblk_t *mp, tl_endpt_t *tep) */ if (msz < sizeof (struct T_unitdata_req)) { (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, - "tl_unitdata:invalid message length")); + "tl_unitdata:invalid message length")); tl_merror(wq, mp, EINVAL); return; } @@ -5022,10 +5024,10 @@ tl_unitdata(mblk_t *mp, tl_endpt_t *tep) (olen < 0) || (ooff < 0) || ((olen > 0) && ((ooff + olen) > msz))) { (void) (STRLOG(TL_ID, tep->te_minor, - 1, SL_TRACE|SL_ERROR, - "tl_unitdata_req: invalid socket addr " - "(msz=%d, al=%d, ao=%d, ol=%d, oo = %d)", - (int)msz, alen, aoff, olen, ooff)); + 1, SL_TRACE|SL_ERROR, + "tl_unitdata_req: invalid socket addr " + "(msz=%d, al=%d, ao=%d, ol=%d, oo = %d)", + (int)msz, alen, aoff, olen, ooff)); tl_error_ack(wq, mp, TSYSERR, EINVAL, T_UNITDATA_REQ); return; } @@ -5034,8 +5036,8 @@ tl_unitdata(mblk_t *mp, tl_endpt_t *tep) if ((ux_addr.soua_magic != SOU_MAGIC_IMPLICIT) && (ux_addr.soua_magic != SOU_MAGIC_EXPLICIT)) { (void) (STRLOG(TL_ID, tep->te_minor, - 1, SL_TRACE|SL_ERROR, - "tl_conn_req: invalid socket magic")); + 1, SL_TRACE|SL_ERROR, + "tl_conn_req: invalid socket magic")); tl_error_ack(wq, mp, TSYSERR, EINVAL, T_UNITDATA_REQ); return; } @@ -5072,7 +5074,7 @@ tl_unitdata(mblk_t *mp, tl_endpt_t *tep) */ if (! (aoff >= (t_scalar_t)sizeof (struct T_unitdata_req))) { (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR, - "tl_unitdata:addr overlaps TPI message")); + "tl_unitdata:addr overlaps TPI message")); } #endif /* @@ -5102,8 +5104,8 @@ tl_unitdata(mblk_t *mp, tl_endpt_t *tep) if (peer_tep == NULL) { (void) (STRLOG(TL_ID, tep->te_minor, 3, - SL_TRACE|SL_ERROR, - "tl_unitdata:no one at destination address")); + SL_TRACE|SL_ERROR, + "tl_unitdata:no one at destination address")); tl_uderr(wq, mp, ECONNRESET); return; } @@ -5119,7 +5121,7 @@ tl_unitdata(mblk_t *mp, tl_endpt_t *tep) if (peer_tep->te_state != TS_IDLE) { (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, - "tl_unitdata:provider in invalid state")); + "tl_unitdata:provider in invalid state")); tl_uderr(wq, mp, EPROTO); return; } @@ -5151,7 +5153,7 @@ tl_unitdata(mblk_t *mp, tl_endpt_t *tep) if (peer_tep->te_flag & TL_SETCRED) { ASSERT(olen == 0); olen = (t_scalar_t)sizeof (struct opthdr) + - OPTLEN(sizeof (tl_credopt_t)); + OPTLEN(sizeof (tl_credopt_t)); /* 1 option only */ } else if (peer_tep->te_flag & TL_SETUCRED) { ASSERT(olen == 0); @@ -5164,7 +5166,7 @@ tl_unitdata(mblk_t *mp, tl_endpt_t *tep) } ui_sz = T_ALIGN(sizeof (struct T_unitdata_ind) + tep->te_alen) + - olen; + olen; /* * If the unitdata_ind fits and we are not adding options * reuse the udreq mblk. @@ -5186,7 +5188,7 @@ tl_unitdata(mblk_t *mp, tl_endpt_t *tep) ui_mp = allocb(ui_sz, BPRI_MED); if (! ui_mp) { (void) (STRLOG(TL_ID, tep->te_minor, 4, SL_TRACE, - "tl_unitdata:allocb failure:message queued")); + "tl_unitdata:allocb failure:message queued")); tl_memrecover(wq, mp, ui_sz); return; } @@ -5218,8 +5220,8 @@ tl_unitdata(mblk_t *mp, tl_endpt_t *tep) peer_tep->te_flag, peer_tep->te_credp); } else { bcopy((void *)((uintptr_t)udreq + ooff), - (void *)((uintptr_t)udind + udind->OPT_offset), - olen); + (void *)((uintptr_t)udind + udind->OPT_offset), + olen); } /* @@ -5258,8 +5260,8 @@ tl_find_peer(tl_endpt_t *tep, tl_addr_t *ap) ASSERT(ap->ta_abuf != NULL); ASSERT(EQUIV(rc == 0, peer_tep != NULL)); ASSERT(IMPLY(rc == 0, - (tep->te_zoneid == peer_tep->te_zoneid) && - (tep->te_transport == peer_tep->te_transport))); + (tep->te_zoneid == peer_tep->te_zoneid) && + (tep->te_transport == peer_tep->te_transport))); if ((rc == 0) && (peer_tep->te_closing)) { tl_refrele(peer_tep); @@ -5286,14 +5288,14 @@ tl_sock_find_peer(tl_endpt_t *tep, soux_addr_t *ux_addr) ASSERT(IS_SOCKET(tep)); ASSERT(EQUIV(rc == 0, peer_tep != NULL)); ASSERT(IMPLY(rc == 0, - (tep->te_zoneid == peer_tep->te_zoneid) && - (tep->te_transport == peer_tep->te_transport))); + (tep->te_zoneid == peer_tep->te_zoneid) && + (tep->te_transport == peer_tep->te_transport))); /* * Don't attempt to use closing peer. */ if ((peer_tep != NULL) && (peer_tep->te_closing || - (peer_tep->te_zoneid != tep->te_zoneid))) { + (peer_tep->te_zoneid != tep->te_zoneid))) { tl_refrele(peer_tep); peer_tep = NULL; } @@ -5368,8 +5370,8 @@ tl_get_any_addr(tl_endpt_t *tep, tl_addr_t *req) for (loopcnt = 0; loopcnt < UINT32_MAX; loopcnt++) { if (mod_hash_insert_reserve(tep->te_addrhash, - (mod_hash_key_t)&tep->te_ap, (mod_hash_val_t)tep, - tep->te_hash_hndl) == 0) { + (mod_hash_key_t)&tep->te_ap, (mod_hash_val_t)tep, + tep->te_hash_hndl) == 0) { /* * found free address */ @@ -5389,7 +5391,7 @@ tl_get_any_addr(tl_endpt_t *tep, tl_addr_t *req) * Failed to find anything. */ (void) (STRLOG(TL_ID, -1, 1, SL_ERROR, - "tl_get_any_addr:looped 2^32 times")); + "tl_get_any_addr:looped 2^32 times")); return (B_FALSE); } @@ -5487,9 +5489,9 @@ tl_co_unconnect(tl_endpt_t *tep) putnext(cl_tep->te_rq, d_mp); } else { (void) (STRLOG(TL_ID, tep->te_minor, 3, - SL_TRACE|SL_ERROR, - "tl_co_unconnect:icmng: " - "allocb failure")); + SL_TRACE|SL_ERROR, + "tl_co_unconnect:icmng: " + "allocb failure")); } tl_freetip(tep, tip); } @@ -5515,8 +5517,8 @@ tl_co_unconnect(tl_endpt_t *tep) } if (d_mp == NULL) { (void) (STRLOG(TL_ID, tep->te_minor, 3, - SL_TRACE|SL_ERROR, - "tl_co_unconnect:outgoing:allocb failure")); + SL_TRACE|SL_ERROR, + "tl_co_unconnect:outgoing:allocb failure")); TL_UNCONNECT(tep->te_oconp); goto discon_peer; } @@ -5546,11 +5548,11 @@ tl_co_unconnect(tl_endpt_t *tep) if (srv_tep->te_nicon == 1) { srv_tep->te_state = NEXTSTATE(TE_DISCON_IND2, - srv_tep->te_state); + srv_tep->te_state); } else { srv_tep->te_state = NEXTSTATE(TE_DISCON_IND3, - srv_tep->te_state); + srv_tep->te_state); } ASSERT(*(uint32_t *)(d_mp->b_rptr) == T_DISCON_IND); @@ -5577,8 +5579,8 @@ tl_co_unconnect(tl_endpt_t *tep) */ (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE, "tl_co_unconnect:connected: ordrel_ind state %d->%d", - peer_tep->te_state, - NEXTSTATE(TE_ORDREL_IND, peer_tep->te_state))); + peer_tep->te_state, + NEXTSTATE(TE_ORDREL_IND, peer_tep->te_state))); d_mp = tl_ordrel_ind_alloc(); if (! d_mp) { (void) (STRLOG(TL_ID, tep->te_minor, 3, @@ -5593,7 +5595,7 @@ tl_co_unconnect(tl_endpt_t *tep) goto discon_peer; } peer_tep->te_state = - NEXTSTATE(TE_ORDREL_IND, peer_tep->te_state); + NEXTSTATE(TE_ORDREL_IND, peer_tep->te_state); putnext(peer_tep->te_rq, d_mp); /* @@ -5609,14 +5611,14 @@ tl_co_unconnect(tl_endpt_t *tep) * with error 0 to inform that the peer is gone. */ (void) (STRLOG(TL_ID, tep->te_minor, 3, - SL_TRACE|SL_ERROR, - "tl_co_unconnect: discon in state %d", - tep->te_state)); + SL_TRACE|SL_ERROR, + "tl_co_unconnect: discon in state %d", + tep->te_state)); tl_discon_ind(peer_tep, 0); } else { (void) (STRLOG(TL_ID, tep->te_minor, 3, - SL_TRACE|SL_ERROR, - "tl_co_unconnect: state %d", tep->te_state)); + SL_TRACE|SL_ERROR, + "tl_co_unconnect: state %d", tep->te_state)); tl_discon_ind(peer_tep, ECONNRESET); } @@ -5657,7 +5659,7 @@ tl_discon_ind(tl_endpt_t *tep, uint32_t reason) d_mp = tl_discon_ind_alloc(reason, tep->te_seqno); if (! d_mp) { (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR, - "tl_discon_ind:allocb failure")); + "tl_discon_ind:allocb failure")); return; } tep->te_state = TS_IDLE; @@ -5838,7 +5840,7 @@ tl_icon_sendmsgs(tl_endpt_t *tep, mblk_t **mpp) break; case T_ORDREL_IND: tep->te_state = NEXTSTATE(TE_ORDREL_IND, - tep->te_state); + tep->te_state); putnext(tep->te_rq, mp); break; case T_DISCON_IND: @@ -5848,7 +5850,7 @@ tl_icon_sendmsgs(tl_endpt_t *tep, mblk_t **mpp) default: #ifdef DEBUG cmn_err(CE_PANIC, - "tl_icon_sendmsgs: unknown primitive"); + "tl_icon_sendmsgs: unknown primitive"); #endif /* DEBUG */ freemsg(mp); break; @@ -5892,8 +5894,8 @@ tl_merror(queue_t *wq, mblk_t *mp, int error) } (void) (STRLOG(TL_ID, tep->te_minor, 1, - SL_TRACE|SL_ERROR, - "tl_merror: tep=%p, err=%d", tep, error)); + SL_TRACE|SL_ERROR, + "tl_merror: tep=%p, err=%d", tep, error)); /* * flush all messages on queue. we are shutting @@ -5914,8 +5916,8 @@ tl_merror(queue_t *wq, mblk_t *mp, int error) mp = allocb(1, BPRI_HI); if (!mp) { (void) (STRLOG(TL_ID, tep->te_minor, 1, - SL_TRACE|SL_ERROR, - "tl_merror:M_PROTO: out of memory")); + SL_TRACE|SL_ERROR, + "tl_merror:M_PROTO: out of memory")); return; } } @@ -6093,8 +6095,8 @@ tl_set_opt( * option. */ (void) (STRLOG(TL_ID, tep->te_minor, 1, - SL_TRACE|SL_ERROR, - "tl_set_opt: option is not supported")); + SL_TRACE|SL_ERROR, + "tl_set_opt: option is not supported")); error = EPROTO; break; } @@ -6157,7 +6159,7 @@ tl_memrecover(queue_t *wq, mblk_t *mp, size_t size) if (tep->te_bufcid || tep->te_timoutid) { (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, - "tl_memrecover:recover %p pending", (void *)wq)); + "tl_memrecover:recover %p pending", (void *)wq)); return; } diff --git a/usr/src/uts/intel/arp/arp.global-objs.debug64 b/usr/src/uts/intel/arp/arp.global-objs.debug64 index 1e349eca81..c0e5d9e9da 100644 --- a/usr/src/uts/intel/arp/arp.global-objs.debug64 +++ b/usr/src/uts/intel/arp/arp.global-objs.debug64 @@ -26,17 +26,17 @@ ar_cmd_tbl ar_m_tbl +arp_mod_info arpinfo +arprinit +arpwinit arp_param_arr arp_netinfo cb_inet_devops fsw inet_dev_info inet_devops -info modldrv modlinkage modlstrmod netdev_privs -rinit -winit diff --git a/usr/src/uts/intel/icmp/icmp.global-objs.debug64 b/usr/src/uts/intel/icmp/icmp.global-objs.debug64 index 2cb9d3484b..ba041c7e17 100644 --- a/usr/src/uts/intel/icmp/icmp.global-objs.debug64 +++ b/usr/src/uts/intel/icmp/icmp.global-objs.debug64 @@ -26,22 +26,9 @@ cb_inet_devops fsw -ICMP6_MAJ -icmp_g_t_info_ack -icmpinfo -icmp_max_optsize -icmp_opt_arr -icmp_opt_obj -icmp_param_arr -icmp_valid_levels_arr inet_dev_info inet_devops -info modldrv modlinkage modlstrmod netdev_privs -rinit -sin6_null -sin_null -winit diff --git a/usr/src/uts/intel/ip/ip.global-objs.debug64 b/usr/src/uts/intel/ip/ip.global-objs.debug64 index 1ad0df0982..f74a6e46b5 100644 --- a/usr/src/uts/intel/ip/ip.global-objs.debug64 +++ b/usr/src/uts/intel/ip/ip.global-objs.debug64 @@ -25,11 +25,6 @@ # ident "%Z%%M% %I% %E% SMI" IP_MAJ -SCTP6_MAJ -SCTP_MAJ -TCP6_MAJ -TCP_MAJ -UDP6_MAJ cb_inet_devops cl_inet_bind cl_inet_connect @@ -52,6 +47,10 @@ do_tcp_direct_sockfs do_tcp_fusion do_tcpzcopy dohwcksum +dummy_mod_info +dummymodinfo +dummyrmodinit +dummywmodinit eventq_queue_in eventq_queue_nic eventq_queue_out @@ -64,7 +63,19 @@ gcgrp6_hash gcgrp_hash_size gcgrp_lock icmp_frag_size_table +icmp_g_t_info_ack icmp_ipha +icmp_max_optsize +icmp_mod_info +icmp_opt_arr +icmp_opt_obj +icmp_param_arr +icmp_valid_levels_arr +icmpinfov4 +icmpinfov6 +icmprinitv4 +icmprinitv6 +icmpwinit ill_no_arena ill_null inet_dev_info @@ -89,6 +100,7 @@ ip_aron_template ip_aru_template ip_cache_table_size ip_cgtp_filter_rev +ip_conn_cache ip_debug ip_g_all_ones ip_input_proc @@ -131,22 +143,22 @@ ip_thread_list ip_thread_rwlock ip_wput_frag_mdt_min ipcl_bind_fanout_size -ipcl_conn_cache ipcl_conn_hash_maxsize ipcl_conn_hash_memfactor ipcl_conn_hash_size ipcl_debug_level ipcl_raw_fanout_size -ipcl_tcpconn_cache ipcl_udp_fanout_size ipclassifier_version ipif_loopback_name ipif_nv_tbl ipif_zero -ipinfo +ipinfov4 +ipinfov6 iplrinit iplwinit -iprinit +iprinitv4 +iprinitv6 ipsec_action_cache ipsec_hdr_pullup_needed ipsec_info_cache @@ -169,7 +181,8 @@ ipv6_loopback ipv6_solicited_node_mcast ipv6_unspecified_group ipv6info -ipwinit +ipwinitv4 +ipwinitv6 ire_cache ire_gw_secattr_cache ire_idle_cutoff_interval @@ -198,10 +211,10 @@ netmasks prov_update_handle radix_mask_cache radix_node_cache +rawip_conn_cache recvq_call recvq_loop_cnt req_arr -rinit_ipv6 rn_mkfreelist rn_ones rn_zeros @@ -210,6 +223,16 @@ rr_max_pkt_cnt_ratio rr_min_blank_ratio rr_min_pkt_cnt_ratio rt_entry_cache +rts_conn_cache +rts_g_t_info_ack +rts_max_optsize +rts_mod_info +rts_opt_arr +rts_opt_obj +rts_valid_levels_arr +rtsinfo +rtsrinit +rtswinit sctp_asconf_default_dispatch sctp_asconf_dispatch_tbl sctp_conn_cache @@ -217,6 +240,7 @@ sctp_conn_hash_size sctp_kmem_faddr_cache sctp_kmem_ftsn_set_cache sctp_kmem_set_cache +sctp_mod_info sctp_recvq_tq_task_max sctp_recvq_tq_task_min sctp_recvq_tq_thr_max @@ -224,6 +248,9 @@ sctp_recvq_tq_thr_min sctp_sin6_null sctp_taskq sctpdebug +sctpinfo +sctprinit +sctpwinit sendq_collision sendq_empty sendq_loop_cnt @@ -247,6 +274,7 @@ squeue_writerdrain_ms squeue_writerdrain_ns tcp_acceptor_rinit tcp_acceptor_winit +tcp_conn_cache tcp_conn_hash_size tcp_drop_ack_unsent_cnt tcp_free_list_max_cnt @@ -261,8 +289,6 @@ tcp_loopback_rinit tcp_max_optsize tcp_mdt_chain tcp_mdt_smss_threshold -tcp_mod_rinit -tcp_mod_winit tcp_opt_arr tcp_opt_obj tcp_random_anon_port @@ -274,7 +300,8 @@ tcp_random_state tcp_randtbl tcp_report_header tcp_rinfo -tcp_rinit +tcp_rinitv4 +tcp_rinitv6 tcp_sack_info_cache tcp_sock_winit tcp_squeue_close @@ -289,25 +316,26 @@ tcp_valid_levels_arr tcp_version tcp_winfo tcp_winit -tcpinfo +tcpinfov4 +tcpinfov6 tsol_strict_error udp_bind_fanout_size -udp_cache -udp_count +udp_conn_cache udp_g_t_info_ack_ipv4 udp_g_t_info_ack_ipv6 -udp_info +udp_lrinit +udp_lwinit udp_max_optsize +udp_mod_info udp_opt_arr udp_opt_obj udp_param_arr udp_random_anon_port -udp_rinit -udp_snmp_rinit -udp_snmp_winit +udp_rinitv4 +udp_rinitv6 udp_valid_levels_arr udp_version udp_winit -udpinfo -winit_ipv6 +udpinfov4 +udpinfov6 zero_info diff --git a/usr/src/uts/intel/ip/ip.global-objs.obj64 b/usr/src/uts/intel/ip/ip.global-objs.obj64 index 172ad0f370..1ff919b3bd 100644 --- a/usr/src/uts/intel/ip/ip.global-objs.obj64 +++ b/usr/src/uts/intel/ip/ip.global-objs.obj64 @@ -25,11 +25,6 @@ # ident "%Z%%M% %I% %E% SMI" IP_MAJ -SCTP6_MAJ -SCTP_MAJ -TCP6_MAJ -TCP_MAJ -UDP6_MAJ cb_inet_devops cl_inet_bind cl_inet_connect @@ -52,6 +47,10 @@ do_tcp_direct_sockfs do_tcp_fusion do_tcpzcopy dohwcksum +dummy_mod_info +dummymodinfo +dummyrmodinit +dummywmodinit eventq_queue_in eventq_queue_nic eventq_queue_out @@ -64,7 +63,19 @@ gcgrp6_hash gcgrp_hash_size gcgrp_lock icmp_frag_size_table +icmp_g_t_info_ack icmp_ipha +icmp_max_optsize +icmp_mod_info +icmp_opt_arr +icmp_opt_obj +icmp_param_arr +icmp_valid_levels_arr +icmpinfov4 +icmpinfov6 +icmprinitv4 +icmprinitv6 +icmpwinit ill_no_arena ill_null inet_dev_info @@ -89,6 +100,7 @@ ip_aron_template ip_aru_template ip_cache_table_size ip_cgtp_filter_rev +ip_conn_cache ip_debug ip_g_all_ones ip_input_proc @@ -131,21 +143,21 @@ ip_thread_list ip_thread_rwlock ip_wput_frag_mdt_min ipcl_bind_fanout_size -ipcl_conn_cache ipcl_conn_hash_maxsize ipcl_conn_hash_memfactor ipcl_conn_hash_size ipcl_raw_fanout_size -ipcl_tcpconn_cache ipcl_udp_fanout_size ipclassifier_version ipif_loopback_name ipif_nv_tbl ipif_zero -ipinfo +ipinfov4 +ipinfov6 iplrinit iplwinit -iprinit +iprinitv4 +iprinitv6 ipsec_action_cache ipsec_hdr_pullup_needed ipsec_info_cache @@ -167,7 +179,8 @@ ipv6_loopback ipv6_solicited_node_mcast ipv6_unspecified_group ipv6info -ipwinit +ipwinitv4 +ipwinitv6 ire_cache ire_gw_secattr_cache ire_idle_cutoff_interval @@ -196,8 +209,8 @@ netmasks prov_update_handle radix_mask_cache radix_node_cache +rawip_conn_cache req_arr -rinit_ipv6 rn_mkfreelist rn_ones rn_zeros @@ -206,6 +219,16 @@ rr_max_pkt_cnt_ratio rr_min_blank_ratio rr_min_pkt_cnt_ratio rt_entry_cache +rts_conn_cache +rts_g_t_info_ack +rts_max_optsize +rts_mod_info +rts_opt_arr +rts_opt_obj +rts_valid_levels_arr +rtsinfo +rtsrinit +rtswinit sctp_asconf_default_dispatch sctp_asconf_dispatch_tbl sctp_conn_cache @@ -213,6 +236,7 @@ sctp_conn_hash_size sctp_kmem_faddr_cache sctp_kmem_ftsn_set_cache sctp_kmem_set_cache +sctp_mod_info sctp_recvq_tq_task_max sctp_recvq_tq_task_min sctp_recvq_tq_thr_max @@ -220,6 +244,9 @@ sctp_recvq_tq_thr_min sctp_sin6_null sctp_taskq sctpdebug +sctpinfo +sctprinit +sctpwinit sin6_null sin_null sqset_global_list @@ -236,6 +263,7 @@ squeue_writerdrain_ms squeue_writerdrain_ns tcp_acceptor_rinit tcp_acceptor_winit +tcp_conn_cache tcp_conn_hash_size tcp_drop_ack_unsent_cnt tcp_free_list_max_cnt @@ -250,8 +278,6 @@ tcp_loopback_rinit tcp_max_optsize tcp_mdt_chain tcp_mdt_smss_threshold -tcp_mod_rinit -tcp_mod_winit tcp_opt_arr tcp_opt_obj tcp_random_anon_port @@ -263,7 +289,8 @@ tcp_random_state tcp_randtbl tcp_report_header tcp_rinfo -tcp_rinit +tcp_rinitv4 +tcp_rinitv6 tcp_sack_info_cache tcp_sock_winit tcp_squeue_close @@ -278,24 +305,26 @@ tcp_valid_levels_arr tcp_version tcp_winfo tcp_winit -tcpinfo +tcpinfov4 +tcpinfov6 tsol_strict_error udp_bind_fanout_size -udp_cache +udp_conn_cache udp_g_t_info_ack_ipv4 udp_g_t_info_ack_ipv6 -udp_info +udp_lrinit +udp_lwinit udp_max_optsize +udp_mod_info udp_opt_arr udp_opt_obj udp_param_arr udp_random_anon_port -udp_rinit -udp_snmp_rinit -udp_snmp_winit +udp_rinitv4 +udp_rinitv6 udp_valid_levels_arr udp_version udp_winit -udpinfo -winit_ipv6 +udpinfov4 +udpinfov6 zero_info diff --git a/usr/src/uts/intel/rts/Makefile b/usr/src/uts/intel/rts/Makefile index 071c8760cc..2247001290 100644 --- a/usr/src/uts/intel/rts/Makefile +++ b/usr/src/uts/intel/rts/Makefile @@ -43,7 +43,6 @@ MODULE = rts OBJECTS = $(RTS_OBJS:%=$(OBJS_DIR)/%) LINTS = $(RTS_OBJS:%.o=$(LINTS_DIR)/%.ln) ROOTMODULE = $(ROOT_DRV_DIR)/$(MODULE) -ROOTLINK = $(ROOT_STRMOD_DIR)/$(MODULE) CONF_SRCDIR = $(UTSBASE)/common/inet/ip # diff --git a/usr/src/uts/intel/rts/rts.global-objs.debug64 b/usr/src/uts/intel/rts/rts.global-objs.debug64 index 31673cdf39..4c699f6410 100644 --- a/usr/src/uts/intel/rts/rts.global-objs.debug64 +++ b/usr/src/uts/intel/rts/rts.global-objs.debug64 @@ -25,20 +25,8 @@ # ident "%Z%%M% %I% %E% SMI" cb_inet_devops -fsw inet_dev_info inet_devops -info -lcl_param_arr modldrv modlinkage -modlstrmod netdev_privs -rinit -rts_g_t_info_ack -rtsinfo -rts_max_optsize -rts_opt_arr -rts_opt_obj -rts_valid_levels_arr -winit diff --git a/usr/src/uts/sparc/arp/arp.global-objs.debug64 b/usr/src/uts/sparc/arp/arp.global-objs.debug64 index ac7f04c356..c0e5d9e9da 100644 --- a/usr/src/uts/sparc/arp/arp.global-objs.debug64 +++ b/usr/src/uts/sparc/arp/arp.global-objs.debug64 @@ -26,17 +26,17 @@ ar_cmd_tbl ar_m_tbl -arp_netinfo -arp_param_arr +arp_mod_info arpinfo +arprinit +arpwinit +arp_param_arr +arp_netinfo cb_inet_devops fsw inet_dev_info inet_devops -info modldrv modlinkage modlstrmod netdev_privs -rinit -winit diff --git a/usr/src/uts/sparc/icmp/icmp.global-objs.debug64 b/usr/src/uts/sparc/icmp/icmp.global-objs.debug64 index e63dd0ec2e..ba041c7e17 100644 --- a/usr/src/uts/sparc/icmp/icmp.global-objs.debug64 +++ b/usr/src/uts/sparc/icmp/icmp.global-objs.debug64 @@ -24,24 +24,11 @@ # # ident "%Z%%M% %I% %E% SMI" -ICMP6_MAJ cb_inet_devops fsw -icmp_g_t_info_ack -icmp_max_optsize -icmp_opt_arr -icmp_opt_obj -icmp_param_arr -icmp_valid_levels_arr -icmpinfo inet_dev_info inet_devops -info modldrv modlinkage modlstrmod netdev_privs -rinit -sin6_null -sin_null -winit diff --git a/usr/src/uts/sparc/ip/ip.global-objs.debug64 b/usr/src/uts/sparc/ip/ip.global-objs.debug64 index 1ad0df0982..f74a6e46b5 100644 --- a/usr/src/uts/sparc/ip/ip.global-objs.debug64 +++ b/usr/src/uts/sparc/ip/ip.global-objs.debug64 @@ -25,11 +25,6 @@ # ident "%Z%%M% %I% %E% SMI" IP_MAJ -SCTP6_MAJ -SCTP_MAJ -TCP6_MAJ -TCP_MAJ -UDP6_MAJ cb_inet_devops cl_inet_bind cl_inet_connect @@ -52,6 +47,10 @@ do_tcp_direct_sockfs do_tcp_fusion do_tcpzcopy dohwcksum +dummy_mod_info +dummymodinfo +dummyrmodinit +dummywmodinit eventq_queue_in eventq_queue_nic eventq_queue_out @@ -64,7 +63,19 @@ gcgrp6_hash gcgrp_hash_size gcgrp_lock icmp_frag_size_table +icmp_g_t_info_ack icmp_ipha +icmp_max_optsize +icmp_mod_info +icmp_opt_arr +icmp_opt_obj +icmp_param_arr +icmp_valid_levels_arr +icmpinfov4 +icmpinfov6 +icmprinitv4 +icmprinitv6 +icmpwinit ill_no_arena ill_null inet_dev_info @@ -89,6 +100,7 @@ ip_aron_template ip_aru_template ip_cache_table_size ip_cgtp_filter_rev +ip_conn_cache ip_debug ip_g_all_ones ip_input_proc @@ -131,22 +143,22 @@ ip_thread_list ip_thread_rwlock ip_wput_frag_mdt_min ipcl_bind_fanout_size -ipcl_conn_cache ipcl_conn_hash_maxsize ipcl_conn_hash_memfactor ipcl_conn_hash_size ipcl_debug_level ipcl_raw_fanout_size -ipcl_tcpconn_cache ipcl_udp_fanout_size ipclassifier_version ipif_loopback_name ipif_nv_tbl ipif_zero -ipinfo +ipinfov4 +ipinfov6 iplrinit iplwinit -iprinit +iprinitv4 +iprinitv6 ipsec_action_cache ipsec_hdr_pullup_needed ipsec_info_cache @@ -169,7 +181,8 @@ ipv6_loopback ipv6_solicited_node_mcast ipv6_unspecified_group ipv6info -ipwinit +ipwinitv4 +ipwinitv6 ire_cache ire_gw_secattr_cache ire_idle_cutoff_interval @@ -198,10 +211,10 @@ netmasks prov_update_handle radix_mask_cache radix_node_cache +rawip_conn_cache recvq_call recvq_loop_cnt req_arr -rinit_ipv6 rn_mkfreelist rn_ones rn_zeros @@ -210,6 +223,16 @@ rr_max_pkt_cnt_ratio rr_min_blank_ratio rr_min_pkt_cnt_ratio rt_entry_cache +rts_conn_cache +rts_g_t_info_ack +rts_max_optsize +rts_mod_info +rts_opt_arr +rts_opt_obj +rts_valid_levels_arr +rtsinfo +rtsrinit +rtswinit sctp_asconf_default_dispatch sctp_asconf_dispatch_tbl sctp_conn_cache @@ -217,6 +240,7 @@ sctp_conn_hash_size sctp_kmem_faddr_cache sctp_kmem_ftsn_set_cache sctp_kmem_set_cache +sctp_mod_info sctp_recvq_tq_task_max sctp_recvq_tq_task_min sctp_recvq_tq_thr_max @@ -224,6 +248,9 @@ sctp_recvq_tq_thr_min sctp_sin6_null sctp_taskq sctpdebug +sctpinfo +sctprinit +sctpwinit sendq_collision sendq_empty sendq_loop_cnt @@ -247,6 +274,7 @@ squeue_writerdrain_ms squeue_writerdrain_ns tcp_acceptor_rinit tcp_acceptor_winit +tcp_conn_cache tcp_conn_hash_size tcp_drop_ack_unsent_cnt tcp_free_list_max_cnt @@ -261,8 +289,6 @@ tcp_loopback_rinit tcp_max_optsize tcp_mdt_chain tcp_mdt_smss_threshold -tcp_mod_rinit -tcp_mod_winit tcp_opt_arr tcp_opt_obj tcp_random_anon_port @@ -274,7 +300,8 @@ tcp_random_state tcp_randtbl tcp_report_header tcp_rinfo -tcp_rinit +tcp_rinitv4 +tcp_rinitv6 tcp_sack_info_cache tcp_sock_winit tcp_squeue_close @@ -289,25 +316,26 @@ tcp_valid_levels_arr tcp_version tcp_winfo tcp_winit -tcpinfo +tcpinfov4 +tcpinfov6 tsol_strict_error udp_bind_fanout_size -udp_cache -udp_count +udp_conn_cache udp_g_t_info_ack_ipv4 udp_g_t_info_ack_ipv6 -udp_info +udp_lrinit +udp_lwinit udp_max_optsize +udp_mod_info udp_opt_arr udp_opt_obj udp_param_arr udp_random_anon_port -udp_rinit -udp_snmp_rinit -udp_snmp_winit +udp_rinitv4 +udp_rinitv6 udp_valid_levels_arr udp_version udp_winit -udpinfo -winit_ipv6 +udpinfov4 +udpinfov6 zero_info diff --git a/usr/src/uts/sparc/ip/ip.global-objs.obj64 b/usr/src/uts/sparc/ip/ip.global-objs.obj64 index 172ad0f370..1ff919b3bd 100644 --- a/usr/src/uts/sparc/ip/ip.global-objs.obj64 +++ b/usr/src/uts/sparc/ip/ip.global-objs.obj64 @@ -25,11 +25,6 @@ # ident "%Z%%M% %I% %E% SMI" IP_MAJ -SCTP6_MAJ -SCTP_MAJ -TCP6_MAJ -TCP_MAJ -UDP6_MAJ cb_inet_devops cl_inet_bind cl_inet_connect @@ -52,6 +47,10 @@ do_tcp_direct_sockfs do_tcp_fusion do_tcpzcopy dohwcksum +dummy_mod_info +dummymodinfo +dummyrmodinit +dummywmodinit eventq_queue_in eventq_queue_nic eventq_queue_out @@ -64,7 +63,19 @@ gcgrp6_hash gcgrp_hash_size gcgrp_lock icmp_frag_size_table +icmp_g_t_info_ack icmp_ipha +icmp_max_optsize +icmp_mod_info +icmp_opt_arr +icmp_opt_obj +icmp_param_arr +icmp_valid_levels_arr +icmpinfov4 +icmpinfov6 +icmprinitv4 +icmprinitv6 +icmpwinit ill_no_arena ill_null inet_dev_info @@ -89,6 +100,7 @@ ip_aron_template ip_aru_template ip_cache_table_size ip_cgtp_filter_rev +ip_conn_cache ip_debug ip_g_all_ones ip_input_proc @@ -131,21 +143,21 @@ ip_thread_list ip_thread_rwlock ip_wput_frag_mdt_min ipcl_bind_fanout_size -ipcl_conn_cache ipcl_conn_hash_maxsize ipcl_conn_hash_memfactor ipcl_conn_hash_size ipcl_raw_fanout_size -ipcl_tcpconn_cache ipcl_udp_fanout_size ipclassifier_version ipif_loopback_name ipif_nv_tbl ipif_zero -ipinfo +ipinfov4 +ipinfov6 iplrinit iplwinit -iprinit +iprinitv4 +iprinitv6 ipsec_action_cache ipsec_hdr_pullup_needed ipsec_info_cache @@ -167,7 +179,8 @@ ipv6_loopback ipv6_solicited_node_mcast ipv6_unspecified_group ipv6info -ipwinit +ipwinitv4 +ipwinitv6 ire_cache ire_gw_secattr_cache ire_idle_cutoff_interval @@ -196,8 +209,8 @@ netmasks prov_update_handle radix_mask_cache radix_node_cache +rawip_conn_cache req_arr -rinit_ipv6 rn_mkfreelist rn_ones rn_zeros @@ -206,6 +219,16 @@ rr_max_pkt_cnt_ratio rr_min_blank_ratio rr_min_pkt_cnt_ratio rt_entry_cache +rts_conn_cache +rts_g_t_info_ack +rts_max_optsize +rts_mod_info +rts_opt_arr +rts_opt_obj +rts_valid_levels_arr +rtsinfo +rtsrinit +rtswinit sctp_asconf_default_dispatch sctp_asconf_dispatch_tbl sctp_conn_cache @@ -213,6 +236,7 @@ sctp_conn_hash_size sctp_kmem_faddr_cache sctp_kmem_ftsn_set_cache sctp_kmem_set_cache +sctp_mod_info sctp_recvq_tq_task_max sctp_recvq_tq_task_min sctp_recvq_tq_thr_max @@ -220,6 +244,9 @@ sctp_recvq_tq_thr_min sctp_sin6_null sctp_taskq sctpdebug +sctpinfo +sctprinit +sctpwinit sin6_null sin_null sqset_global_list @@ -236,6 +263,7 @@ squeue_writerdrain_ms squeue_writerdrain_ns tcp_acceptor_rinit tcp_acceptor_winit +tcp_conn_cache tcp_conn_hash_size tcp_drop_ack_unsent_cnt tcp_free_list_max_cnt @@ -250,8 +278,6 @@ tcp_loopback_rinit tcp_max_optsize tcp_mdt_chain tcp_mdt_smss_threshold -tcp_mod_rinit -tcp_mod_winit tcp_opt_arr tcp_opt_obj tcp_random_anon_port @@ -263,7 +289,8 @@ tcp_random_state tcp_randtbl tcp_report_header tcp_rinfo -tcp_rinit +tcp_rinitv4 +tcp_rinitv6 tcp_sack_info_cache tcp_sock_winit tcp_squeue_close @@ -278,24 +305,26 @@ tcp_valid_levels_arr tcp_version tcp_winfo tcp_winit -tcpinfo +tcpinfov4 +tcpinfov6 tsol_strict_error udp_bind_fanout_size -udp_cache +udp_conn_cache udp_g_t_info_ack_ipv4 udp_g_t_info_ack_ipv6 -udp_info +udp_lrinit +udp_lwinit udp_max_optsize +udp_mod_info udp_opt_arr udp_opt_obj udp_param_arr udp_random_anon_port -udp_rinit -udp_snmp_rinit -udp_snmp_winit +udp_rinitv4 +udp_rinitv6 udp_valid_levels_arr udp_version udp_winit -udpinfo -winit_ipv6 +udpinfov4 +udpinfov6 zero_info diff --git a/usr/src/uts/sparc/rts/Makefile b/usr/src/uts/sparc/rts/Makefile index 93e6447847..ff635303bc 100644 --- a/usr/src/uts/sparc/rts/Makefile +++ b/usr/src/uts/sparc/rts/Makefile @@ -42,7 +42,6 @@ MODULE = rts OBJECTS = $(RTS_OBJS:%=$(OBJS_DIR)/%) LINTS = $(RTS_OBJS:%.o=$(LINTS_DIR)/%.ln) ROOTMODULE = $(ROOT_DRV_DIR)/$(MODULE) -ROOTLINK = $(ROOT_STRMOD_DIR)/$(MODULE) CONF_SRCDIR = $(UTSBASE)/common/inet/ip # diff --git a/usr/src/uts/sparc/rts/rts.global-objs.debug64 b/usr/src/uts/sparc/rts/rts.global-objs.debug64 index f6d7f5a6ad..4c699f6410 100644 --- a/usr/src/uts/sparc/rts/rts.global-objs.debug64 +++ b/usr/src/uts/sparc/rts/rts.global-objs.debug64 @@ -25,20 +25,8 @@ # ident "%Z%%M% %I% %E% SMI" cb_inet_devops -fsw inet_dev_info inet_devops -info -lcl_param_arr modldrv modlinkage -modlstrmod netdev_privs -rinit -rts_g_t_info_ack -rts_max_optsize -rts_opt_arr -rts_opt_obj -rts_valid_levels_arr -rtsinfo -winit |