diff options
author | Philip Kirk <Phil.Kirk@Sun.COM> | 2008-11-06 06:47:54 -0500 |
---|---|---|
committer | Philip Kirk <Phil.Kirk@Sun.COM> | 2008-11-06 06:47:54 -0500 |
commit | b127ac411761a3d8d642d9342d9cac2785e1faaa (patch) | |
tree | 041956f138b0560132678ab9e0971bbb8dff1887 /usr/src | |
parent | 471fb14e1ef453ca10871d66de96105f33392fe0 (diff) | |
download | illumos-joyent-b127ac411761a3d8d642d9342d9cac2785e1faaa.tar.gz |
PSARC/2006/475 Clearview: IP Observability Devices
4085089 add a feature to enable 'snooping' of the loopback traffic
6753688 ip netinfo has no need for separate create and dispatch functions
6755448 ifconfig wedged in SIOCLIFREMOVEIF
6756483 incorrect ASSERT() in ip_delmulti[_v6]()
5092073 RFE: allow snoop to filter on zonename or zoneid
6606991 panic assertion failure !ill->ill_join_allmulti for multicast router
6760922 devname doesn't handle stale dev_t's in sdev_node cache entries
Diffstat (limited to 'usr/src')
88 files changed, 4279 insertions, 704 deletions
diff --git a/usr/src/Targetdirs b/usr/src/Targetdirs index c771e4231b..1d4a67bf97 100644 --- a/usr/src/Targetdirs +++ b/usr/src/Targetdirs @@ -65,6 +65,7 @@ ROOT.SYS= \ /dev \ /dev/dsk \ /dev/fd \ + /dev/ipnet \ /dev/net \ /dev/rdsk \ /dev/rmt \ diff --git a/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop.c b/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop.c index fc083aeeb0..0b83121ee4 100644 --- a/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop.c +++ b/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop.c @@ -20,12 +20,10 @@ */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" /* SunOS */ - #include <stdio.h> #include <unistd.h> #include <stropts.h> @@ -80,11 +78,12 @@ static int sumcount; int x_offset = -1; int x_length = 0x7fffffff; FILE *namefile; -int Pflg; -boolean_t qflg = B_FALSE; -boolean_t rflg = B_FALSE; +boolean_t Pflg; +boolean_t Iflg; +boolean_t qflg; +boolean_t rflg; #ifdef DEBUG -boolean_t zflg = B_FALSE; /* debugging packet corrupt flag */ +boolean_t zflg; #endif struct Pf_ext_packetfilt pf; @@ -105,12 +104,13 @@ main(int argc, char **argv) struct Pf_ext_packetfilt *fp = NULL; char *icapfile = NULL; char *ocapfile = NULL; - int nflg = 0; - int Nflg = 0; + boolean_t nflg = B_FALSE; + boolean_t Nflg = B_FALSE; int Cflg = 0; + boolean_t Uflg = B_FALSE; int first = 1; int last = 0x7fffffff; - int use_kern_pf; + boolean_t use_kern_pf; char *p, *p2; char names[MAXPATHLEN + 1]; char self[MAXHOSTNAMELEN + 1]; @@ -228,8 +228,8 @@ main(int argc, char **argv) } (void) setvbuf(stdout, NULL, _IOLBF, BUFSIZ); - while ((c = getopt(argc, argv, "at:CPDSi:o:Nn:s:d:vVp:f:c:x:?rqz")) - != EOF) { + while ((c = getopt(argc, argv, "at:CPDSi:o:Nn:s:d:I:vVp:f:c:x:U?rqz")) + != EOF) { switch (c) { case 'a': audiodev = getenv("AUDIODEV"); @@ -238,7 +238,7 @@ main(int argc, char **argv) audio = open(audiodev, O_WRONLY); if (audio < 0) { pr_err("Audio device %s: %m", - audiodev); + audiodev); exit(1); } break; @@ -251,8 +251,14 @@ main(int argc, char **argv) default: usage(); } break; + case 'I': + if (device != NULL) + usage(); + Iflg = B_TRUE; + device = optarg; + break; case 'P': - Pflg++; + Pflg = B_TRUE; break; case 'D': flags |= F_DROPS; @@ -267,16 +273,18 @@ main(int argc, char **argv) ocapfile = optarg; break; case 'N': - Nflg++; + Nflg = B_TRUE; break; case 'n': - nflg++; + nflg = B_TRUE; (void) strlcpy(names, optarg, MAXPATHLEN); break; case 's': snaplen = atoi(optarg); break; case 'd': + if (Iflg) + usage(); device = optarg; break; case 'v': @@ -306,12 +314,12 @@ main(int argc, char **argv) strcmp(p+1, self) == 0) (void) fprintf(stderr, "Warning: cannot capture packets from %s\n", - self); + self); *p = ' '; } else if (strcmp(optarg, self) == 0) (void) fprintf(stderr, "Warning: cannot capture packets from %s\n", - self); + self); argstr = optarg; break; case 'x': @@ -330,7 +338,7 @@ main(int argc, char **argv) maxcount = atoi(optarg); break; case 'C': - Cflg++; + Cflg = B_TRUE; break; case 'q': qflg = B_TRUE; @@ -338,6 +346,9 @@ main(int argc, char **argv) case 'r': rflg = B_TRUE; break; + case 'U': + Uflg = B_TRUE; + break; #ifdef DEBUG case 'z': zflg = B_TRUE; @@ -363,6 +374,7 @@ main(int argc, char **argv) if (!icapfile) { use_kern_pf = check_device(&dh, &device); } else { + use_kern_pf = B_FALSE; cap_open_read(icapfile); if (!nflg) { @@ -372,6 +384,9 @@ main(int argc, char **argv) } } + if (Uflg) + use_kern_pf = B_FALSE; + /* attempt to read .names file if it exists before filtering */ if ((!Nflg) && names[0] != '\0') { if (access(names, F_OK) == 0) { @@ -383,7 +398,7 @@ main(int argc, char **argv) } if (argstr) { - if (!icapfile && use_kern_pf) { + if (use_kern_pf) { ret = pf_compile(argstr, Cflg); switch (ret) { case 0: @@ -447,7 +462,7 @@ main(int argc, char **argv) } flags = 0; (void) fprintf(stderr, - "Creating name file %s\n", names); + "Creating name file %s\n", names); } if (flags & F_DTAIL) @@ -646,7 +661,7 @@ get_sum_line() sumcount = 0; /* error recovery */ pr_err( "get_sum_line: sumline overflow (sumcount=%d, MAXSUM=%d)\n", - tsumcount, MAXSUM); + tsumcount, MAXSUM); } sumline[sumcount][0] = '\0'; @@ -764,10 +779,12 @@ usage(void) (void) fprintf(stderr, "\t[ -a ] # Listen to packets on audio\n"); (void) fprintf(stderr, - "\t[ -d device ] # Listen on interface named device\n"); + "\t[ -d link ] # Listen on named link\n"); (void) fprintf(stderr, "\t[ -s snaplen ] # Truncate packets\n"); (void) fprintf(stderr, + "\t[ -I IP interface ] # Listen on named IP interface\n"); + (void) fprintf(stderr, "\t[ -c count ] # Quit after count packets\n"); (void) fprintf(stderr, "\t[ -P ] # Turn OFF promiscuous mode\n"); @@ -892,7 +909,7 @@ snoop_alarm(int s_sec, void (*s_handler)()) } else { if (nalarm == 0 || nalarm > hp->s_time) nalarm = now < hp->s_time ? hp->s_time : - now + 1; + now + 1; tp = hp; } } @@ -974,7 +991,7 @@ snoop_sigrecover(int sig, siginfo_t *info, void *p) if ((hp->s_time - now) > 0) { if (nalarm == 0 || nalarm > hp->s_time) nalarm = now < hp->s_time ? - hp->s_time : now + 1; + hp->s_time : now + 1; } } } @@ -1027,8 +1044,8 @@ snoop_sigrecover(int sig, siginfo_t *info, void *p) } if (snoop_nrecover >= SNOOP_MAXRECOVER) { (void) fprintf(stderr, - "snoop: WARNING: skipping from packet %d\n", - count); + "snoop: WARNING: skipping from packet %d\n", + count); snoop_nrecover = 0; } else { /* continue trying */ @@ -1036,7 +1053,7 @@ snoop_sigrecover(int sig, siginfo_t *info, void *p) } } else if (snoop_nrecover >= SNOOP_MAXRECOVER) { (void) fprintf(stderr, - "snoop: ERROR: cannot recover from packet %d\n", count); + "snoop: ERROR: cannot recover from packet %d\n", count); exit(1); } @@ -1055,7 +1072,7 @@ snoop_sigrecover(int sig, siginfo_t *info, void *p) /* Inform user that snoop has taken a fault */ (void) fprintf(stderr, "WARNING: received signal %d from packet %d\n", - sig, count); + sig, count); } /* Reset interpreter variables */ diff --git a/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop.h b/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop.h index 28464d5a4c..d7da06a150 100644 --- a/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop.h +++ b/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop.h @@ -20,15 +20,13 @@ */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #ifndef _SNOOP_H #define _SNOOP_H -#pragma ident "%Z%%M% %I% %E% SMI" /* SunOS */ - #include <rpc/types.h> #include <sys/pfmod.h> #include <sys/time.h> @@ -283,12 +281,15 @@ extern const char *arp_htype(int); * cannot be expressed in kernel space. */ typedef struct interface { - uint_t mac_type; - uint_t mtu_size; - uint_t network_type_offset; - uint_t (*header_len)(char *); - uint_t (*interpreter)(int, char *, int, int); - uint_t try_kernel_filter; + uint_t mac_type; + uint_t mtu_size; + uint_t network_type_offset; + size_t network_type_len; + uint_t network_type_ip; + uint_t network_type_ipv6; + uint_t (*header_len)(char *); + uint_t (*interpreter)(int, char *, int, int); + boolean_t try_kernel_filter; } interface_t; extern interface_t INTERFACES[], *interface; @@ -306,6 +307,7 @@ extern char *prot_title; extern unsigned int encap_levels, total_encap_levels; extern int quitting; +extern boolean_t Iflg, Pflg, rflg; /* * Global error recovery routine: used to reset snoop variables after diff --git a/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_capture.c b/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_capture.c index 206835c407..f062247997 100644 --- a/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_capture.c +++ b/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_capture.c @@ -19,12 +19,10 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" /* SunOS */ - #include <stdio.h> #include <string.h> #include <errno.h> @@ -52,6 +50,7 @@ #include <ctype.h> #include <values.h> #include <libdlpi.h> +#include <sys/dlpi.h> #include "snoop.h" @@ -89,6 +88,7 @@ boolean_t check_device(dlpi_handle_t *dhp, char **devicep) { int retval; + int flags = DLPI_PASSIVE | DLPI_RAW; /* * Determine which network device @@ -105,7 +105,7 @@ check_device(dlpi_handle_t *dhp, char **devicep) unsigned bufsize; if ((s = socket(AF_INET, SOCK_DGRAM, 0)) < 0) - pr_err("socket"); + pr_err("socket"); if (ioctl(s, SIOCGIFNUM, (char *)&numifs) < 0) { pr_err("check_device: ioctl SIOCGIFNUM"); @@ -139,8 +139,8 @@ check_device(dlpi_handle_t *dhp, char **devicep) if (ioctl(s, SIOCGIFFLAGS, (char *)ifr) < 0) pr_err("ioctl SIOCGIFFLAGS"); if ((ifr->ifr_flags & - (IFF_VIRTUAL|IFF_LOOPBACK|IFF_UP| - IFF_RUNNING)) == (IFF_UP|IFF_RUNNING)) + (IFF_VIRTUAL|IFF_LOOPBACK|IFF_UP| + IFF_RUNNING)) == (IFF_UP|IFF_RUNNING)) break; } @@ -150,9 +150,11 @@ check_device(dlpi_handle_t *dhp, char **devicep) *devicep = ifr->ifr_name; (void) close(s); } - - retval = dlpi_open(*devicep, dhp, DLPI_PASSIVE|DLPI_RAW); - if (retval != DLPI_SUCCESS) { + if (Iflg) + flags |= DLPI_DEVIPNET; + if (Iflg || strcmp(*devicep, "lo0") == 0) + flags |= DLPI_IPNETINFO; + if ((retval = dlpi_open(*devicep, dhp, flags)) != DLPI_SUCCESS) { pr_err("cannot open \"%s\": %s", *devicep, dlpi_strerror(retval)); } @@ -187,15 +189,20 @@ void initdevice(dlpi_handle_t dh, ulong_t snaplen, ulong_t chunksize, struct timeval *timeout, struct Pf_ext_packetfilt *fp) { - extern int Pflg; int retv; int netfd; + int val = 1; retv = dlpi_bind(dh, DLPI_ANY_SAP, NULL); if (retv != DLPI_SUCCESS) pr_errdlpi(dh, "cannot bind on", retv); - (void) fprintf(stderr, "Using device %s ", dlpi_linkname(dh)); + if (Iflg) { + (void) fprintf(stderr, "Using device ipnet/%s ", + dlpi_linkname(dh)); + } else { + (void) fprintf(stderr, "Using device %s ", dlpi_linkname(dh)); + } /* * If Pflg not set - use physical level @@ -394,9 +401,9 @@ scan(char *buf, int len, int filter, int cap, int old, void (*proc)(), nhdrp->sbh_totlen = ntohl(hdrp->sbh_totlen); nhdrp->sbh_drops = ntohl(hdrp->sbh_drops); nhdrp->sbh_timestamp.tv_sec = - ntohl(hdrp->sbh_timestamp.tv_sec); + ntohl(hdrp->sbh_timestamp.tv_sec); nhdrp->sbh_timestamp.tv_usec = - ntohl(hdrp->sbh_timestamp.tv_usec); + ntohl(hdrp->sbh_timestamp.tv_usec); } /* Enhanced check for valid header */ @@ -412,14 +419,15 @@ scan(char *buf, int len, int filter, int cap, int old, void (*proc)(), (nhdrp->sbh_msglen > nhdrp->sbh_origlen) || (nhdrp->sbh_totlen < nhdrp->sbh_msglen) || (nhdrp->sbh_timestamp.tv_sec == 0)) { - if (cap) + if (cap) { (void) fprintf(stderr, "(warning) bad packet " "header in capture file"); - else + } else { (void) fprintf(stderr, "(warning) bad packet " "header in buffer"); + } (void) fprintf(stderr, " offset %d: length=%d\n", - bp - buf, nhdrp->sbh_totlen); + bp - buf, nhdrp->sbh_totlen); goto err; } @@ -433,7 +441,7 @@ scan(char *buf, int len, int filter, int cap, int old, void (*proc)(), " greater than MTU in buffer"); (void) fprintf(stderr, " offset %d: length=%d\n", - bp - buf, nhdrp->sbh_totlen); + bp - buf, nhdrp->sbh_totlen); } /* @@ -454,16 +462,16 @@ scan(char *buf, int len, int filter, int cap, int old, void (*proc)(), header_okay = 1; if (!filter || - want_packet(pktp, - nhdrp->sbh_msglen, - nhdrp->sbh_origlen)) { + want_packet(pktp, + nhdrp->sbh_msglen, + nhdrp->sbh_origlen)) { count++; /* * Start deadman timer for interpreter processing */ (void) snoop_alarm(SNOOP_ALARM_GRAN*SNOOP_MAXRECOVER, - NULL); + NULL); encap_levels = 0; if (!cap || count >= first) @@ -507,7 +515,7 @@ err: bp += sizeof (int); } else { for (bp += sizeof (int); bp <= bufstop; - bp += sizeof (int)) { + bp += sizeof (int)) { hdrp = (struct sb_hdr *)bp; /* An approximate timestamp located */ if ((hdrp->sbh_timestamp.tv_sec >> 8) == @@ -528,8 +536,8 @@ static void cap_write_error(const char *msgtype) { (void) fprintf(stderr, - "snoop: cannot write %s to capture file: %s\n", - msgtype, strerror(errno)); + "snoop: cannot write %s to capture file: %s\n", + msgtype, strerror(errno)); exit(1); } @@ -668,17 +676,17 @@ cap_open_read(const char *name) default: pr_err("capture file: %s: Version %d unrecognized\n", - name, cap_vers); + name, cap_vers); } for (interface = &INTERFACES[0]; interface->mac_type != -1; - interface++) + interface++) if (interface->mac_type == device_mac_type) break; if (interface->mac_type == -1) pr_err("Mac Type = %x is not supported\n", - device_mac_type); + device_mac_type); } else { /* Use heuristic to check if it's an old-style file */ diff --git a/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_ether.c b/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_ether.c index 56e961fe10..c12cc5faee 100644 --- a/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_ether.c +++ b/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_ether.c @@ -23,8 +23,6 @@ * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" /* SunOS */ - #include <stdio.h> #include <stdlib.h> #include <string.h> @@ -43,36 +41,50 @@ #include <sys/ib/clients/ibd/ibd.h> #include <sys/ethernet.h> #include <sys/vlan.h> +#include <sys/zone.h> +#include <sys/byteorder.h> +#include <limits.h> +#include <inet/ip.h> +#include <inet/ip6.h> #include "at.h" #include "snoop.h" -static uint_t ether_header_len(char *), fddi_header_len(char *), - tr_header_len(char *), ib_header_len(char *); +static uint_t ether_header_len(char *), fddi_header_len(char *), + tr_header_len(char *), ib_header_len(char *), ipnet_header_len(char *); static uint_t interpret_ether(), interpret_fddi(), interpret_tr(); -static uint_t interpret_ib(int, char *, int, int); +static uint_t interpret_ib(int, char *, int, int), + interpret_ipnet(int, char *, int, int); static void addr_copy_swap(struct ether_addr *, struct ether_addr *); interface_t *interface; interface_t INTERFACES[] = { /* IEEE 802.3 CSMA/CD network */ - { DL_CSMACD, 1550, 12, ether_header_len, interpret_ether, B_TRUE }, + { DL_CSMACD, 1550, 12, 2, ETHERTYPE_IP, ETHERTYPE_IPV6, + ether_header_len, interpret_ether, B_TRUE }, /* Ethernet Bus */ - { DL_ETHER, 1550, 12, ether_header_len, interpret_ether, B_TRUE }, + { DL_ETHER, 1550, 12, 2, ETHERTYPE_IP, ETHERTYPE_IPV6, + ether_header_len, interpret_ether, B_TRUE }, /* Fiber Distributed data interface */ - { DL_FDDI, 4500, 19, fddi_header_len, interpret_fddi, B_FALSE }, + { DL_FDDI, 4500, 19, 2, ETHERTYPE_IP, ETHERTYPE_IPV6, + fddi_header_len, interpret_fddi, B_FALSE }, /* Token Ring interface */ - { DL_TPR, 17800, 0, tr_header_len, interpret_tr, B_FALSE }, + { DL_TPR, 17800, 0, 2, ETHERTYPE_IP, ETHERTYPE_IPV6, + tr_header_len, interpret_tr, B_FALSE }, /* Infiniband */ - { DL_IB, 4096, 0, ib_header_len, interpret_ib, B_TRUE }, + { DL_IB, 4096, 0, 2, ETHERTYPE_IP, ETHERTYPE_IPV6, + ib_header_len, interpret_ib, B_TRUE }, - { (uint_t)-1, 0, 0, 0, 0, 0 } + /* ipnet */ + { DL_IPNET, INT_MAX, 0, 2, IPV4_VERSION, IPV6_VERSION, + ipnet_header_len, interpret_ipnet, B_TRUE }, + { (uint_t)-1, 0, 0, 0, 0, NULL, NULL, B_FALSE } }; /* externals */ @@ -698,7 +710,7 @@ print_sr(struct tr_ri *rh) static char line[512]; sprintf(line, "TR Source Route dir=%d, mtu=%d", - rh->dir, Mtutab[rh->mtu]); + rh->dir, Mtutab[rh->mtu]); hops = (int)(rh->len - 2) / (int)2; @@ -1516,7 +1528,7 @@ interpret_ib(int flags, char *header, int elen, int origlen) if (origlen < IPOIB_HDRSIZE) { if (flags & F_SUM) (void) snprintf(get_sum_line(), MAXLINE, - "RUNT (short packet - %d bytes)", origlen); + "RUNT (short packet - %d bytes)", origlen); if (flags & F_DTAIL) show_header("RUNT: ", "Short packet", origlen); return (elen); @@ -1536,24 +1548,24 @@ interpret_ib(int flags, char *header, int elen, int origlen) if (flags & F_SUM) { (void) snprintf(get_sum_line(), MAXLINE, - "IPIB Type=%04X (%s), size = %d bytes", - ethertype, - print_ethertype(ethertype), - origlen); + "IPIB Type=%04X (%s), size = %d bytes", + ethertype, + print_ethertype(ethertype), + origlen); } if (flags & F_DTAIL) { show_header("IPIB: ", "IPIB Header", elen); show_space(); (void) snprintf(get_line(0, 0), get_line_remain(), - "Packet %d arrived at %d:%02d:%d.%02d", - pi_frame, pi_time_hour, pi_time_min, - pi_time_sec, pi_time_usec / 10000); + "Packet %d arrived at %d:%02d:%d.%02d", + pi_frame, pi_time_hour, pi_time_min, + pi_time_sec, pi_time_usec / 10000); (void) snprintf(get_line(0, 0), get_line_remain(), - "Packet size = %d bytes", elen, elen); + "Packet size = %d bytes", elen, elen); (void) snprintf(get_line(0, 2), get_line_remain(), - "Ethertype = %04X (%s)", ethertype, - print_ethertype(ethertype)); + "Ethertype = %04X (%s)", ethertype, + print_ethertype(ethertype)); show_space(); } @@ -1573,3 +1585,85 @@ interpret_ib(int flags, char *header, int elen, int origlen) return (elen); } + +uint_t +ipnet_header_len(char *hdr) +{ + return (sizeof (dl_ipnetinfo_t)); +} + +#define MAX_UINT64_STR 22 +static uint_t +interpret_ipnet(int flags, char *header, int elen, int origlen) +{ + dl_ipnetinfo_t dl; + size_t len = elen - sizeof (dl_ipnetinfo_t); + char *off = (char *)header + sizeof (dl_ipnetinfo_t); + int blen = MAX(origlen, 8252); + char szone[MAX_UINT64_STR]; + char dzone[MAX_UINT64_STR]; + + (void) memcpy(&dl, header, sizeof (dl)); + if (data != NULL && datalen != 0 && datalen < blen) { + free(data); + data = NULL; + datalen = 0; + } + if (data == NULL) { + data = (char *)malloc(blen); + if (!data) + pr_err("Warning: malloc failure"); + datalen = blen; + } + + if (dl.dli_srczone == ALL_ZONES) + sprintf(szone, "Unknown"); + else + sprintf(szone, "%llu", BE_64(dl.dli_srczone)); + + if (dl.dli_dstzone == ALL_ZONES) + sprintf(dzone, "Unknown"); + else + sprintf(dzone, "%llu", BE_64(dl.dli_dstzone)); + + if (flags & F_SUM) { + (void) snprintf(get_sum_line(), MAXLINE, + "IPNET src zone %s dst zone %s", szone, dzone); + } + + if (flags & F_DTAIL) { + show_header("IPNET: ", "IPNET Header", elen); + show_space(); + (void) sprintf(get_line(0, 0), + "Packet %d arrived at %d:%02d:%d.%05d", + pi_frame, + pi_time_hour, pi_time_min, pi_time_sec, + pi_time_usec / 10); + (void) sprintf(get_line(0, 0), + "Packet size = %d bytes", + elen); + (void) snprintf(get_line(0, 0), get_line_remain(), + "dli_version = %d", dl.dli_version); + (void) snprintf(get_line(0, 0), get_line_remain(), + "dli_type = %d", dl.dli_ipver); + (void) snprintf(get_line(0, 2), get_line_remain(), + "dli_srczone = %s", szone); + (void) snprintf(get_line(0, 2), get_line_remain(), + "dli_dstzone = %s", dzone); + show_space(); + } + memcpy(data, off, len); + + switch (dl.dli_ipver) { + case IPV4_VERSION: + (void) interpret_ip(flags, (struct ip *)data, len); + break; + case IPV6_VERSION: + (void) interpret_ipv6(flags, (ip6_t *)data, len); + break; + default: + break; + } + + return (0); +} diff --git a/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_filter.c b/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_filter.c index 7cbb159a5d..5127e1c33d 100644 --- a/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_filter.c +++ b/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_filter.c @@ -23,8 +23,6 @@ * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" /* SunOS */ - #include <stdio.h> #include <stdlib.h> #include <ctype.h> @@ -50,6 +48,7 @@ #include <arpa/inet.h> #include <rpc/rpc.h> #include <rpc/rpcent.h> +#include <sys/dlpi.h> #include <snoop.h> #include "snoop_vlan.h" @@ -93,6 +92,12 @@ #define AT_DST_NODE_OFFSET 16 #define AT_SRC_NODE_OFFSET 17 +/* + * Offset for the source and destination zoneid in the ipnet header. + */ +#define IPNET_SRCZONE_OFFSET 8 +#define IPNET_DSTZONE_OFFSET 16 + int eaddr; /* need ethernet addr */ int opstack; /* operand stack depth */ @@ -368,10 +373,10 @@ codeprint() op++; if ((int)*op < 0) printf("\t%2d: 0x%08x (%d)\n", - op - oplist, *op, *op); + op - oplist, *op, *op); else printf("\t%2d: %d (0x%08x)\n", - op - oplist, *op, *op); + op - oplist, *op, *op); } } printf("\t%2d: STOP\n", op - oplist); @@ -593,21 +598,21 @@ want_packet(uchar_t *pkt, int len, int origlen) case 2: *((ushort_t *)(sp)) = - *((ushort_t *)(base + off)); + *((ushort_t *)(base + off)); *(((ushort_t *)sp) + 1) = - *((ushort_t *)(base + off) + 1); + *((ushort_t *)(base + off) + 1); break; case 1: case 3: *((uchar_t *)(sp)) = - *((uchar_t *)(base + off)); + *((uchar_t *)(base + off)); *(((uchar_t *)sp) + 1) = - *((uchar_t *)(base + off) + 1); + *((uchar_t *)(base + off) + 1); *(((uchar_t *)sp) + 2) = - *((uchar_t *)(base + off) + 2); + *((uchar_t *)(base + off) + 2); *(((uchar_t *)sp) + 3) = - *((uchar_t *)(base + off) + 3); + *((uchar_t *)(base + off) + 3); break; } *sp = ntohl(*sp); @@ -917,41 +922,38 @@ want_packet(uchar_t *pkt, int len, int origlen) break; case OP_OFFSET_ETHERTYPE: /* - * Set base to the location of the ethertype. - * If the packet is VLAN tagged, move base - * to the ethertype field in the VLAN header. - * Otherwise, set it to the appropriate field - * for this link type. + * Set base to the location of the ethertype as + * appropriate for this link type. Note that it's + * not called "ethertype" for every link type, but + * we need to call it something. */ if (offp >= &offstack[MAXSS]) return (0); *++offp = base; base = pkt + interface->network_type_offset; - if (base > pkt + len) { - /* Went too far, drop the packet */ - return (0); - } /* - * VLAN links are only supported on Ethernet-like - * links. + * Below, we adjust the offset for unusual + * link-layer headers that may have the protocol + * type in a variable location beyond what was set + * above. */ - if (interface->mac_type == DL_ETHER || - interface->mac_type == DL_CSMACD) { - if (ntohs(get_u16(base)) == ETHERTYPE_VLAN) { - /* - * We need to point to the - * ethertype field in the VLAN - * tag, so also move past the - * ethertype field in the - * ethernet header. - */ + switch (interface->mac_type) { + case DL_ETHER: + case DL_CSMACD: + /* + * If this is a VLAN-tagged packet, we need + * to point to the ethertype field in the + * VLAN header. Move past the ethertype + * field in the ethernet header. + */ + if (ntohs(get_u16(base)) == ETHERTYPE_VLAN) base += (ENCAP_ETHERTYPE_OFF); - } - if (base > pkt + len) { - /* Went too far, drop the packet */ - return (0); - } + break; + } + if (base > pkt + len) { + /* Went too far, drop the packet */ + return (0); } break; } @@ -1040,6 +1042,24 @@ compare_value_mask(uint_t offset, uint_t len, uint_t val, int mask) emitop(OP_EQ); } +/* + * Compare two zoneid's. The arg val passed in is stored in network + * byte order. + */ +static void +compare_value_zone(uint_t offset, uint64_t val) +{ + int i; + + for (i = 0; i < sizeof (uint64_t) / 4; i++) { + load_const(ntohl(((uint32_t *)&val)[i])); + load_value(offset + i * 4, 4); + emitop(OP_EQ); + if (i != 0) + emitop(OP_AND); + } +} + /* Emit an operator into the code array */ static void emitop(enum optype opcode) @@ -1253,7 +1273,7 @@ next() tkp = p; } else if (base == 16) { size = 2 + strspn(token+2, - "0123456789abcdefABCDEF"); + "0123456789abcdefABCDEF"); size1 = p - token; if (size != size1) { tokentype = ALPHA; @@ -1294,14 +1314,16 @@ next() *tkp = '\0'; } -static struct match_type { +typedef struct match_type { char *m_name; int m_offset; int m_size; int m_value; int m_depend; enum optype m_optype; -} match_types[] = { +} match_type_t; + +static match_type_t ether_match_types[] = { /* * Table initialized assuming Ethernet data link headers. * m_offset is an offset beyond the offset op, which is why @@ -1331,15 +1353,44 @@ static struct match_type { 0, 0, 0, 0, 0, 0 }; +static match_type_t ipnet_match_types[] = { + /* + * Table initialized assuming Ethernet data link headers. + * m_offset is an offset beyond the offset op, which is why + * the offset is zero for when snoop needs to check an ethertype. + */ + "ip", 0, 2, (DL_IPNETINFO_VERSION << 8 | IPV4_VERSION), + -1, OP_OFFSET_ETHERTYPE, + "ip6", 0, 2, (DL_IPNETINFO_VERSION << 8 | IPV4_VERSION), + -1, OP_OFFSET_ETHERTYPE, + "tcp", 9, 1, IPPROTO_TCP, 0, OP_OFFSET_LINK, + "tcp", 6, 1, IPPROTO_TCP, 1, OP_OFFSET_LINK, + "udp", 9, 1, IPPROTO_UDP, 0, OP_OFFSET_LINK, + "udp", 6, 1, IPPROTO_UDP, 1, OP_OFFSET_LINK, + "icmp", 9, 1, IPPROTO_ICMP, 0, OP_OFFSET_LINK, + "icmp6", 6, 1, IPPROTO_ICMPV6, 1, OP_OFFSET_LINK, + "ospf", 9, 1, IPPROTO_OSPF, 0, OP_OFFSET_LINK, + "ospf", 6, 1, IPPROTO_OSPF, 1, OP_OFFSET_LINK, + "ip-in-ip", 9, 1, IPPROTO_ENCAP, 0, OP_OFFSET_LINK, + "esp", 9, 1, IPPROTO_ESP, 0, OP_OFFSET_LINK, + "esp", 6, 1, IPPROTO_ESP, 1, OP_OFFSET_LINK, + "ah", 9, 1, IPPROTO_AH, 0, OP_OFFSET_LINK, + "ah", 6, 1, IPPROTO_AH, 1, OP_OFFSET_LINK, + "sctp", 9, 1, IPPROTO_SCTP, 0, OP_OFFSET_LINK, + "sctp", 6, 1, IPPROTO_SCTP, 1, OP_OFFSET_LINK, + 0, 0, 0, 0, 0, 0 +}; + static void -generate_check(struct match_type *mtp) +generate_check(match_type_t match_types[], int index, int type) { + match_type_t *mtp = &match_types[index]; /* * Note: this code assumes the above dependencies are * not cyclic. This *should* always be true. */ if (mtp->m_depend != -1) - generate_check(&match_types[mtp->m_depend]); + generate_check(match_types, mtp->m_depend, type); emitop(mtp->m_optype); load_value(mtp->m_offset, mtp->m_size); @@ -1364,14 +1415,25 @@ static int comparison(char *s) { unsigned int i, n_checks = 0; + match_type_t *match_types; - for (i = 0; match_types[i].m_name != NULL; i++) { + switch (interface->mac_type) { + case DL_ETHER: + match_types = ether_match_types; + break; + case DL_IPNET: + match_types = ipnet_match_types; + break; + default: + return (0); + } + for (i = 0; match_types[i].m_name != NULL; i++) { if (strcmp(s, match_types[i].m_name) != 0) continue; n_checks++; - generate_check(&match_types[i]); + generate_check(match_types, i, interface->mac_type); if (n_checks > 1) emitop(OP_OR); } @@ -1416,11 +1478,9 @@ ipaddr_match(enum direction which, char *hostname, int inet_type) found_host = 0; if (tokentype == ADDR_IP) { - hp = lgetipnodebyname(hostname, AF_INET, - 0, &error_num); + hp = lgetipnodebyname(hostname, AF_INET, 0, &error_num); if (hp == NULL) { - hp = getipnodebyname(hostname, AF_INET, - 0, &error_num); + hp = getipnodebyname(hostname, AF_INET, 0, &error_num); freehp = 1; } if (hp == NULL) { @@ -1433,11 +1493,9 @@ ipaddr_match(enum direction which, char *hostname, int inet_type) } inet_type = IPV4_ONLY; } else if (tokentype == ADDR_IP6) { - hp = lgetipnodebyname(hostname, AF_INET6, - 0, &error_num); + hp = lgetipnodebyname(hostname, AF_INET6, 0, &error_num); if (hp == NULL) { - hp = getipnodebyname(hostname, AF_INET6, - 0, &error_num); + hp = getipnodebyname(hostname, AF_INET6, 0, &error_num); freehp = 1; } if (hp == NULL) { @@ -1454,11 +1512,10 @@ ipaddr_match(enum direction which, char *hostname, int inet_type) switch (inet_type) { case IPV4_ONLY: /* Only IPv4 address is needed */ - hp = lgetipnodebyname(hostname, AF_INET, - 0, &error_num); + hp = lgetipnodebyname(hostname, AF_INET, 0, &error_num); if (hp == NULL) { - hp = getipnodebyname(hostname, AF_INET, - 0, &error_num); + hp = getipnodebyname(hostname, AF_INET, 0, + &error_num); freehp = 1; } if (hp != NULL) { @@ -1467,11 +1524,11 @@ ipaddr_match(enum direction which, char *hostname, int inet_type) break; case IPV6_ONLY: /* Only IPv6 address is needed */ - hp = lgetipnodebyname(hostname, AF_INET6, - 0, &error_num); + hp = lgetipnodebyname(hostname, AF_INET6, 0, + &error_num); if (hp == NULL) { - hp = getipnodebyname(hostname, AF_INET6, - 0, &error_num); + hp = getipnodebyname(hostname, AF_INET6, 0, + &error_num); freehp = 1; } if (hp != NULL) { @@ -1481,10 +1538,10 @@ ipaddr_match(enum direction which, char *hostname, int inet_type) case IPV4_AND_IPV6: /* Both IPv4 and IPv6 are needed */ hp = lgetipnodebyname(hostname, AF_INET6, - AI_ALL | AI_V4MAPPED, &error_num); + AI_ALL | AI_V4MAPPED, &error_num); if (hp == NULL) { hp = getipnodebyname(hostname, AF_INET6, - AI_ALL | AI_V4MAPPED, &error_num); + AI_ALL | AI_V4MAPPED, &error_num); freehp = 1; } if (hp != NULL) { @@ -1524,7 +1581,7 @@ ipaddr_match(enum direction which, char *hostname, int inet_type) * The code below generates the filter. */ if (hp != NULL && hp->h_addrtype == AF_INET) { - ethertype_match(ETHERTYPE_IP); + ethertype_match(interface->network_type_ip); emitop(OP_BRFL); n = chain(n); emitop(OP_OFFSET_LINK); @@ -1560,7 +1617,8 @@ ipaddr_match(enum direction which, char *hostname, int inet_type) while (addr6ptr != NULL) { if (IN6_IS_ADDR_V4MAPPED(addr6ptr)) { if (first) { - ethertype_match(ETHERTYPE_IP); + ethertype_match( + interface->network_type_ip); emitop(OP_BRFL); n = chain(n); emitop(OP_OFFSET_LINK); @@ -1604,7 +1662,8 @@ ipaddr_match(enum direction which, char *hostname, int inet_type) resolve_chain(n); n = 0; } - ethertype_match(ETHERTYPE_IPV6); + ethertype_match( + interface->network_type_ipv6); emitop(OP_BRFL); n = chain(n); emitop(OP_OFFSET_LINK); @@ -1642,6 +1701,27 @@ ipaddr_match(enum direction which, char *hostname, int inet_type) } /* + * Match on zoneid. The arg zone passed in is in network byte order. + */ +static void +zone_match(enum direction which, uint64_t zone) +{ + + switch (which) { + case TO: + compare_value_zone(IPNET_DSTZONE_OFFSET, zone); + break; + case FROM: + compare_value_zone(IPNET_SRCZONE_OFFSET, zone); + break; + case ANY: + compare_value_zone(IPNET_SRCZONE_OFFSET, zone); + compare_value_zone(IPNET_DSTZONE_OFFSET, zone); + emitop(OP_OR); + } +} + +/* * Generate code to match an AppleTalk address. The address * must be given as two numbers with a dot between * @@ -1745,7 +1825,7 @@ etheraddr_match(enum direction which, char *hostname) if (ether_hostton(hostname, &e)) if (!arp_for_ether(hostname, &e)) pr_err("cannot obtain ether addr for %s", - hostname); + hostname); ep = &e; } memcpy(&addr, (ushort_t *)ep, 4); @@ -1809,13 +1889,22 @@ ethertype_match(int val) emitop(OP_OFFSET_ZERO); } } - compare_value(ether_offset, 2, val); + compare_value(ether_offset, interface->network_type_len, val); if (interface->mac_type == DL_ETHER || interface->mac_type == DL_CSMACD) { emitop(OP_OFFSET_POP); } } +static void +ipnettype_match(int val) +{ + int ipnet_offset = interface->network_type_offset; + + emitop(OP_OFFSET_ETHERTYPE); + compare_value(ipnet_offset, 2, val); +} + /* * Match a network address. The host part * is masked out. The network address may @@ -1890,8 +1979,7 @@ port_match(enum direction which, char *portname) } else { sp = getservbyname(portname, NULL); if (sp == NULL) - pr_err("invalid port number or name: %s", - portname); + pr_err("invalid port number or name: %s", portname); port = ntohs(sp->s_port); } @@ -2216,7 +2304,7 @@ primary() } if (EQ("bootp") || EQ("dhcp")) { - ethertype_match(ETHERTYPE_IP); + ethertype_match(interface->network_type_ip); emitop(OP_BRFL); m = chain(0); emitop(OP_OFFSET_LINK); @@ -2241,7 +2329,7 @@ primary() } if (EQ("dhcp6")) { - ethertype_match(ETHERTYPE_IPV6); + ethertype_match(interface->network_type_ipv6); emitop(OP_BRFL); m = chain(0); emitop(OP_OFFSET_LINK); @@ -2343,7 +2431,7 @@ primary() emitop(OP_OFFSET_POP); emitop(OP_BRFL); m = chain(0); - ethertype_match(ETHERTYPE_IP); + ethertype_match(interface->network_type_ip); resolve_chain(m); opstack++; next(); @@ -2405,26 +2493,26 @@ primary() } if (EQ("slp")) { - /* filter out TCP handshakes */ - emitop(OP_OFFSET_LINK); - compare_value(9, 1, IPPROTO_TCP); - emitop(OP_LOAD_CONST); - emitval(52); - emitop(OP_LOAD_CONST); - emitval(2); - emitop(OP_LOAD_SHORT); - emitop(OP_GE); - emitop(OP_AND); /* proto == TCP && len < 52 */ - emitop(OP_NOT); - emitop(OP_BRFL); /* pkt too short to be a SLP call */ - m = chain(0); - - emitop(OP_OFFSET_POP); - emitop(OP_OFFSET_SLP); - resolve_chain(m); - opstack++; - next(); - break; + /* filter out TCP handshakes */ + emitop(OP_OFFSET_LINK); + compare_value(9, 1, IPPROTO_TCP); + emitop(OP_LOAD_CONST); + emitval(52); + emitop(OP_LOAD_CONST); + emitval(2); + emitop(OP_LOAD_SHORT); + emitop(OP_GE); + emitop(OP_AND); /* proto == TCP && len < 52 */ + emitop(OP_NOT); + emitop(OP_BRFL); /* pkt too short to be a SLP call */ + m = chain(0); + + emitop(OP_OFFSET_POP); + emitop(OP_OFFSET_SLP); + resolve_chain(m); + opstack++; + next(); + break; } if (EQ("ldap")) { @@ -2439,6 +2527,16 @@ primary() break; } + if (EQ("zone")) { + next(); + if (tokentype != NUMBER) + pr_err("zoneid expected"); + zone_match(dir, BE_64((uint64_t)(tokenval))); + opstack++; + next(); + break; + } + if (EQ("gateway")) { next(); if (eaddr || tokentype != ALPHA) diff --git a/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_ipaddr.c b/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_ipaddr.c index 1f7a743ff4..c42c55dfef 100644 --- a/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_ipaddr.c +++ b/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_ipaddr.c @@ -19,12 +19,10 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" /* SunOS */ - #include <stdio.h> #include <stdlib.h> #include <ctype.h> @@ -87,7 +85,6 @@ wakeup(int n) } extern char *inet_ntoa(); -extern boolean_t rflg; static struct hostdata * iplookup(struct in_addr ipaddr) @@ -118,12 +115,12 @@ iplookup(struct in_addr ipaddr) if (! rflg && sigsetjmp(nisjmp, 1) == 0) { (void) snoop_alarm(3, wakeup); hp = getipnodebyaddr((char *)&ipaddr, sizeof (int), - AF_INET, &error_num); + AF_INET, &error_num); if (hp == NULL && inet_lnaof(ipaddr) == 0) { np = getnetbyaddr(inet_netof(ipaddr), AF_INET); if (np) return (addhost(AF_INET, &ipaddr, np->n_name, - np->n_aliases)); + np->n_aliases)); } (void) snoop_alarm(0, wakeup); } @@ -244,14 +241,14 @@ addhost(int family, const void *ipaddr, const char *name, char **aliases) aliases[ind] != NULL; ind++) { (void) fprintf(namefile, " %s", - aliases[ind]); + aliases[ind]); } } (void) fprintf(namefile, "\n"); } } else if (family == AF_INET6) { np = (char *)inet_ntop(AF_INET6, (void *)ipaddr, aname, - sizeof (aname)); + sizeof (aname)); if (np) { (void) fprintf(namefile, "%s\t%s", np, name); if (aliases) { @@ -259,14 +256,14 @@ addhost(int family, const void *ipaddr, const char *name, char **aliases) aliases[ind] != NULL; ind++) { (void) fprintf(namefile, " %s", - aliases[ind]); + aliases[ind]); } } (void) fprintf(namefile, "\n"); } } else { (void) fprintf(stderr, "addhost: unknown family %d\n", - family); + family); } } return (n); @@ -386,7 +383,7 @@ lgetipnodebyname(const char *name, int af, int flags, int *error_num) } /* found ipv6 addr */ hp->h_addr_list[ind] = - (char *)&h6->h6_addr; + (char *)&h6->h6_addr; ind++; } } @@ -416,8 +413,8 @@ lgetipnodebyname(const char *name, int af, int flags, int *error_num) hp->h_addr_list[ind] = (char *)&h46_addr[ind]; IN6_INADDR_TO_V4MAPPED( - &h->h4_addr, - &h46_addr[ind]); + &h->h4_addr, + &h46_addr[ind]); ind++; } } diff --git a/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_pf.c b/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_pf.c index 690ef81bde..3ebd6a4a3c 100644 --- a/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_pf.c +++ b/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_pf.c @@ -23,8 +23,6 @@ * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" /* SunOS */ - #include <stdio.h> #include <stddef.h> #include <ctype.h> @@ -44,6 +42,8 @@ #include <netinet/if_ether.h> #include <netinet/tcp.h> #include <netinet/udp.h> +#include <inet/ip.h> +#include <inet/ip6.h> #include <netdb.h> #include <rpc/rpc.h> #include <setjmp.h> @@ -82,21 +82,154 @@ int opstack; /* operand stack depth */ #define IPV6_ONLY 1 #define IPV4_AND_IPV6 2 -/* - * The following constants represent the offsets in bytes from the beginning - * of the packet of the link and IP(v6) layer source/destination/type fields, - * initialized for Ethernet. Media specific code can set any unavailable - * link layer property's offset to -1 to indicate that the property's value - * is not available from the frame. - */ -static int link_header_len = 14, link_type_offset = 12; -static int link_dest_offset = 0, link_src_offset = 6; -static int link_addr_len = 6; +typedef struct { + int transport_protocol; + int network_protocol; + /* + * offset is the offset in bytes from the beginning + * of the network protocol header to where the transport + * protocol type is. + */ + int offset; +} transport_protocol_table_t; + +typedef struct network_table { + char *nmt_name; + int nmt_val; +} network_table_t; + +static network_table_t ether_network_mapping_table[] = { + { "pup", ETHERTYPE_PUP }, + { "ip", ETHERTYPE_IP }, + { "arp", ETHERTYPE_ARP }, + { "revarp", ETHERTYPE_REVARP }, + { "at", ETHERTYPE_AT }, + { "aarp", ETHERTYPE_AARP }, + { "vlan", ETHERTYPE_VLAN }, + { "ip6", ETHERTYPE_IPV6 }, + { "slow", ETHERTYPE_SLOW }, + { "ppoed", ETHERTYPE_PPPOED }, + { "ppoes", ETHERTYPE_PPPOES }, + { "NULL", -1 } + +}; + +static network_table_t ib_network_mapping_table[] = { + { "pup", ETHERTYPE_PUP }, + { "ip", ETHERTYPE_IP }, + { "arp", ETHERTYPE_ARP }, + { "revarp", ETHERTYPE_REVARP }, + { "at", ETHERTYPE_AT }, + { "aarp", ETHERTYPE_AARP }, + { "vlan", ETHERTYPE_VLAN }, + { "ip6", ETHERTYPE_IPV6 }, + { "slow", ETHERTYPE_SLOW }, + { "ppoed", ETHERTYPE_PPPOED }, + { "ppoes", ETHERTYPE_PPPOES }, + { "NULL", -1 } + +}; -#define IPV4_SRCADDR_OFFSET (link_header_len + 12) -#define IPV4_DSTADDR_OFFSET (link_header_len + 16) -#define IPV6_SRCADDR_OFFSET (link_header_len + 8) -#define IPV6_DSTADDR_OFFSET (link_header_len + 24) +static network_table_t ipnet_network_mapping_table[] = { + { "ip", (DL_IPNETINFO_VERSION << 8 | IPV4_VERSION) }, + { "ip6", (DL_IPNETINFO_VERSION << 8 | IPV6_VERSION) }, + { "NULL", -1 } + +}; + +static transport_protocol_table_t ether_transport_mapping_table[] = { + {IPPROTO_TCP, ETHERTYPE_IP, IPV4_TYPE_HEADER_OFFSET}, + {IPPROTO_TCP, ETHERTYPE_IPV6, IPV6_TYPE_HEADER_OFFSET}, + {IPPROTO_UDP, ETHERTYPE_IP, IPV4_TYPE_HEADER_OFFSET}, + {IPPROTO_UDP, ETHERTYPE_IPV6, IPV6_TYPE_HEADER_OFFSET}, + {IPPROTO_OSPF, ETHERTYPE_IP, IPV4_TYPE_HEADER_OFFSET}, + {IPPROTO_OSPF, ETHERTYPE_IPV6, IPV6_TYPE_HEADER_OFFSET}, + {IPPROTO_SCTP, ETHERTYPE_IP, IPV4_TYPE_HEADER_OFFSET}, + {IPPROTO_SCTP, ETHERTYPE_IPV6, IPV6_TYPE_HEADER_OFFSET}, + {IPPROTO_ICMP, ETHERTYPE_IP, IPV4_TYPE_HEADER_OFFSET}, + {IPPROTO_ICMPV6, ETHERTYPE_IPV6, IPV6_TYPE_HEADER_OFFSET}, + {IPPROTO_ENCAP, ETHERTYPE_IP, IPV4_TYPE_HEADER_OFFSET}, + {IPPROTO_ESP, ETHERTYPE_IP, IPV4_TYPE_HEADER_OFFSET}, + {IPPROTO_ESP, ETHERTYPE_IPV6, IPV6_TYPE_HEADER_OFFSET}, + {IPPROTO_AH, ETHERTYPE_IP, IPV4_TYPE_HEADER_OFFSET}, + {IPPROTO_AH, ETHERTYPE_IPV6, IPV6_TYPE_HEADER_OFFSET}, + {-1, 0, 0} /* must be the final entry */ +}; + +static transport_protocol_table_t ipnet_transport_mapping_table[] = { + {IPPROTO_TCP, (DL_IPNETINFO_VERSION << 8 | IPV4_VERSION), + IPV4_TYPE_HEADER_OFFSET}, + {IPPROTO_TCP, (DL_IPNETINFO_VERSION << 8 | IPV6_VERSION), + IPV6_TYPE_HEADER_OFFSET}, + {IPPROTO_UDP, (DL_IPNETINFO_VERSION << 8 | IPV4_VERSION), + IPV4_TYPE_HEADER_OFFSET}, + {IPPROTO_UDP, (DL_IPNETINFO_VERSION << 8 | IPV6_VERSION), + IPV6_TYPE_HEADER_OFFSET}, + {IPPROTO_OSPF, (DL_IPNETINFO_VERSION << 8 | IPV4_VERSION), + IPV4_TYPE_HEADER_OFFSET}, + {IPPROTO_OSPF, (DL_IPNETINFO_VERSION << 8 | IPV6_VERSION), + IPV6_TYPE_HEADER_OFFSET}, + {IPPROTO_SCTP, (DL_IPNETINFO_VERSION << 8 | IPV4_VERSION), + IPV4_TYPE_HEADER_OFFSET}, + {IPPROTO_SCTP, (DL_IPNETINFO_VERSION << 8 | IPV6_VERSION), + IPV6_TYPE_HEADER_OFFSET}, + {IPPROTO_ICMP, (DL_IPNETINFO_VERSION << 8 | IPV4_VERSION), + IPV4_TYPE_HEADER_OFFSET}, + {IPPROTO_ICMPV6, (DL_IPNETINFO_VERSION << 8 | IPV6_VERSION), + IPV6_TYPE_HEADER_OFFSET}, + {IPPROTO_ENCAP, (DL_IPNETINFO_VERSION << 8 | IPV4_VERSION), + IPV4_TYPE_HEADER_OFFSET}, + {IPPROTO_ESP, (DL_IPNETINFO_VERSION << 8 | IPV4_VERSION), + IPV4_TYPE_HEADER_OFFSET}, + {IPPROTO_ESP, (DL_IPNETINFO_VERSION << 8 | IPV6_VERSION), + IPV6_TYPE_HEADER_OFFSET}, + {IPPROTO_AH, (DL_IPNETINFO_VERSION << 8 | IPV4_VERSION), + IPV4_TYPE_HEADER_OFFSET}, + {IPPROTO_AH, (DL_IPNETINFO_VERSION << 8 | IPV6_VERSION), + IPV6_TYPE_HEADER_OFFSET}, + {-1, 0, 0} /* must be the final entry */ +}; + +static transport_protocol_table_t ib_transport_mapping_table[] = { + {IPPROTO_TCP, ETHERTYPE_IP, IPV4_TYPE_HEADER_OFFSET}, + {IPPROTO_TCP, ETHERTYPE_IPV6, IPV6_TYPE_HEADER_OFFSET}, + {IPPROTO_UDP, ETHERTYPE_IP, IPV4_TYPE_HEADER_OFFSET}, + {IPPROTO_UDP, ETHERTYPE_IPV6, IPV6_TYPE_HEADER_OFFSET}, + {IPPROTO_OSPF, ETHERTYPE_IP, IPV4_TYPE_HEADER_OFFSET}, + {IPPROTO_OSPF, ETHERTYPE_IPV6, IPV6_TYPE_HEADER_OFFSET}, + {IPPROTO_SCTP, ETHERTYPE_IP, IPV4_TYPE_HEADER_OFFSET}, + {IPPROTO_SCTP, ETHERTYPE_IPV6, IPV6_TYPE_HEADER_OFFSET}, + {IPPROTO_ICMP, ETHERTYPE_IP, IPV4_TYPE_HEADER_OFFSET}, + {IPPROTO_ICMPV6, ETHERTYPE_IPV6, IPV6_TYPE_HEADER_OFFSET}, + {IPPROTO_ENCAP, ETHERTYPE_IP, IPV4_TYPE_HEADER_OFFSET}, + {IPPROTO_ESP, ETHERTYPE_IP, IPV4_TYPE_HEADER_OFFSET}, + {IPPROTO_ESP, ETHERTYPE_IPV6, IPV6_TYPE_HEADER_OFFSET}, + {IPPROTO_AH, ETHERTYPE_IP, IPV4_TYPE_HEADER_OFFSET}, + {IPPROTO_AH, ETHERTYPE_IPV6, IPV6_TYPE_HEADER_OFFSET}, + {-1, 0, 0} /* must be the final entry */ +}; + +typedef struct datalink { + uint_t dl_type; + void (*dl_match_fn)(uint_t datatype); + transport_protocol_table_t *dl_transport_mapping_table; + network_table_t *dl_net_map_tbl; + int dl_link_header_len; + int dl_link_type_offset; + int dl_link_dest_offset; + int dl_link_src_offset; + int dl_link_addr_len; +} datalink_t; + +datalink_t dl; + +#define IPV4_SRCADDR_OFFSET (dl.dl_link_header_len + 12) +#define IPV4_DSTADDR_OFFSET (dl.dl_link_header_len + 16) +#define IPV6_SRCADDR_OFFSET (dl.dl_link_header_len + 8) +#define IPV6_DSTADDR_OFFSET (dl.dl_link_header_len + 24) + +#define IPNET_SRCZONE_OFFSET 8 +#define IPNET_DSTZONE_OFFSET 16 static int inBrace = 0, inBraceOR = 0; static int foundOR = 0; @@ -116,6 +249,8 @@ static void pf_check_vlan_tag(uint_t offset); static void pf_clear_offset_register(); static void pf_emit_load_offset(uint_t offset); static void pf_match_ethertype(uint_t ethertype); +static void pf_match_ipnettype(uint_t type); +static void pf_match_ibtype(uint_t type); static void pf_check_transport_protocol(uint_t transport_protocol); static void pf_compare_value_mask_generic(int offset, uint_t len, uint_t val, int mask, uint_t op); @@ -441,6 +576,24 @@ pf_compare_value_mask_generic(int offset, uint_t len, uint_t val, int mask, } /* + * Like pf_compare_value() but compare on a 64-bit zoneid value. + * The argument val passed in is in network byte order. + */ +static void +pf_compare_zoneid(int offset, uint64_t val) +{ + int i; + + for (i = 0; i < sizeof (uint64_t) / 2; i ++) { + pf_emit(ENF_PUSHWORD + offset / 2 + i); + pf_emit(ENF_PUSHLIT | ENF_EQ); + pf_emit(((uint16_t *)&val)[i]); + if (i != 0) + pf_emit(ENF_AND); + } +} + +/* * Generate pf code to match an IPv4 or IPv6 address. */ static void @@ -549,8 +702,16 @@ pf_ipaddr_match(which, hostname, inet_type) } if (hp != NULL && hp->h_addrtype == AF_INET) { - pf_match_ethertype(ETHERTYPE_IP); - pf_check_vlan_tag(ENCAP_ETHERTYPE_OFF/2); + for (; dl.dl_net_map_tbl->nmt_val != -1; + dl.dl_net_map_tbl++) { + if (strcmp("ip", + dl.dl_net_map_tbl->nmt_name) == 0) { + dl.dl_match_fn( + dl.dl_net_map_tbl->nmt_val); + } + } + if (dl.dl_type == DL_ETHER) + pf_check_vlan_tag(ENCAP_ETHERTYPE_OFF/2); h_addr_index = 0; addr4ptr = (uint_t *)hp->h_addr_list[h_addr_index]; while (addr4ptr != NULL) { @@ -579,9 +740,21 @@ pf_ipaddr_match(which, hostname, inet_type) while (addr6ptr != NULL) { if (IN6_IS_ADDR_V4MAPPED(addr6ptr)) { if (first) { - pf_match_ethertype(ETHERTYPE_IP); - pf_check_vlan_tag( - ENCAP_ETHERTYPE_OFF/2); + for (; dl.dl_net_map_tbl->nmt_val != -1; + dl.dl_net_map_tbl++) { + if (strcmp("ip", + dl.dl_net_map_tbl-> + nmt_name) == 0) { + dl.dl_match_fn( + dl. + dl_net_map_tbl-> + nmt_val); + } + } + if (dl.dl_type == DL_ETHER) { + pf_check_vlan_tag( + ENCAP_ETHERTYPE_OFF/2); + } pass++; } IN6_V4MAPPED_TO_INADDR(addr6ptr, @@ -616,9 +789,21 @@ pf_ipaddr_match(which, hostname, inet_type) while (addr6ptr != NULL) { if (!IN6_IS_ADDR_V4MAPPED(addr6ptr)) { if (first) { - pf_match_ethertype(ETHERTYPE_IPV6); - pf_check_vlan_tag( - ENCAP_ETHERTYPE_OFF/2); + for (; dl.dl_net_map_tbl->nmt_val != -1; + dl.dl_net_map_tbl++) { + if (strcmp("ip6", + dl.dl_net_map_tbl-> + nmt_name) == 0) { + dl.dl_match_fn( + dl. + dl_net_map_tbl-> + nmt_val); + } + } + if (dl.dl_type == DL_ETHER) { + pf_check_vlan_tag( + ENCAP_ETHERTYPE_OFF/2); + } pass++; } if (addr6offset == -1) { @@ -716,17 +901,17 @@ pf_etheraddr_match(which, hostname) switch (which) { case TO: - pf_compare_address(link_dest_offset, link_addr_len, + pf_compare_address(dl.dl_link_dest_offset, dl.dl_link_addr_len, (uchar_t *)ep); break; case FROM: - pf_compare_address(link_src_offset, link_addr_len, + pf_compare_address(dl.dl_link_src_offset, dl.dl_link_addr_len, (uchar_t *)ep); break; case ANY: - pf_compare_address(link_dest_offset, link_addr_len, + pf_compare_address(dl.dl_link_dest_offset, dl.dl_link_addr_len, (uchar_t *)ep); - pf_compare_address(link_src_offset, link_addr_len, + pf_compare_address(dl.dl_link_src_offset, dl.dl_link_addr_len, (uchar_t *)ep); pf_emit(ENF_OR); break; @@ -786,6 +971,31 @@ pf_netaddr_match(which, netname) } /* + * Emit code to match on src or destination zoneid. + * The zoneid passed in is in network byte order. + */ +static void +pf_match_zone(enum direction which, uint64_t zoneid) +{ + if (dl.dl_type != DL_IPNET) + pr_err("zone filter option unsupported on media"); + + switch (which) { + case TO: + pf_compare_zoneid(IPNET_DSTZONE_OFFSET, zoneid); + break; + case FROM: + pf_compare_zoneid(IPNET_SRCZONE_OFFSET, zoneid); + break; + case ANY: + pf_compare_zoneid(IPNET_SRCZONE_OFFSET, zoneid); + pf_compare_zoneid(IPNET_DSTZONE_OFFSET, zoneid); + pf_emit(ENF_OR); + break; + } +} + +/* * A helper function to keep the code to emit instructions * to change the offset register in one place. * @@ -863,7 +1073,8 @@ pf_check_vlan_tag(uint_t offset) /* * Check the ethertype. */ - pf_compare_value(link_type_offset, 2, htons(ETHERTYPE_VLAN)); + pf_compare_value(dl.dl_link_type_offset, 2, + htons(ETHERTYPE_VLAN)); /* * And if it's not VLAN, don't load offset to the offset @@ -929,38 +1140,20 @@ pf_match_ethertype(uint_t ethertype) else pf_check_vlan_tag(2); - pf_compare_value(link_type_offset, 2, htons(ethertype)); + pf_compare_value(dl.dl_link_type_offset, 2, htons(ethertype)); } -typedef struct { - int transport_protocol; - int network_protocol; - /* - * offset is the offset in bytes from the beginning - * of the network protocol header to where the transport - * protocol type is. - */ - int offset; -} transport_protocol_table_t; +static void +pf_match_ipnettype(uint_t type) +{ + pf_compare_value(dl.dl_link_type_offset, 2, htons(type)); +} -static transport_protocol_table_t mapping_table[] = { - {IPPROTO_TCP, ETHERTYPE_IP, IPV4_TYPE_HEADER_OFFSET}, - {IPPROTO_TCP, ETHERTYPE_IPV6, IPV6_TYPE_HEADER_OFFSET}, - {IPPROTO_UDP, ETHERTYPE_IP, IPV4_TYPE_HEADER_OFFSET}, - {IPPROTO_UDP, ETHERTYPE_IPV6, IPV6_TYPE_HEADER_OFFSET}, - {IPPROTO_OSPF, ETHERTYPE_IP, IPV4_TYPE_HEADER_OFFSET}, - {IPPROTO_OSPF, ETHERTYPE_IPV6, IPV6_TYPE_HEADER_OFFSET}, - {IPPROTO_SCTP, ETHERTYPE_IP, IPV4_TYPE_HEADER_OFFSET}, - {IPPROTO_SCTP, ETHERTYPE_IPV6, IPV6_TYPE_HEADER_OFFSET}, - {IPPROTO_ICMP, ETHERTYPE_IP, IPV4_TYPE_HEADER_OFFSET}, - {IPPROTO_ICMPV6, ETHERTYPE_IPV6, IPV6_TYPE_HEADER_OFFSET}, - {IPPROTO_ENCAP, ETHERTYPE_IP, IPV4_TYPE_HEADER_OFFSET}, - {IPPROTO_ESP, ETHERTYPE_IP, IPV4_TYPE_HEADER_OFFSET}, - {IPPROTO_ESP, ETHERTYPE_IPV6, IPV6_TYPE_HEADER_OFFSET}, - {IPPROTO_AH, ETHERTYPE_IP, IPV4_TYPE_HEADER_OFFSET}, - {IPPROTO_AH, ETHERTYPE_IPV6, IPV6_TYPE_HEADER_OFFSET}, - {-1, 0, 0} /* must be the final entry */ -}; +static void +pf_match_ibtype(uint_t type) +{ + pf_compare_value(dl.dl_link_type_offset, 2, htons(type)); +} /* * This function uses the table above to generate a @@ -979,14 +1172,17 @@ pf_check_transport_protocol(uint_t transport_protocol) int i = 0; uint_t number_of_matches = 0; - for (i = 0; mapping_table[i].transport_protocol != -1; i++) { + for (; dl.dl_transport_mapping_table->transport_protocol != -1; + dl.dl_transport_mapping_table++) { if (transport_protocol == - (uint_t)mapping_table[i].transport_protocol) { + (uint_t)dl.dl_transport_mapping_table->transport_protocol) { number_of_matches++; - pf_match_ethertype(mapping_table[i].network_protocol); + dl.dl_match_fn(dl.dl_transport_mapping_table-> + network_protocol); pf_check_vlan_tag(ENCAP_ETHERTYPE_OFF/2); pf_compare_value( - mapping_table[i].offset + link_header_len, 1, + dl.dl_transport_mapping_table->offset + + dl.dl_link_header_len, 1, transport_protocol); pf_emit(ENF_AND); if (number_of_matches > 1) { @@ -1003,6 +1199,15 @@ pf_check_transport_protocol(uint_t transport_protocol) } static void +pf_matchfn(char *proto) +{ + for (; dl.dl_net_map_tbl->nmt_val != -1; dl.dl_net_map_tbl++) { + if (strcmp(proto, dl.dl_net_map_tbl->nmt_name) == 0) + dl.dl_match_fn(dl.dl_net_map_tbl->nmt_val); + } +} + +static void pf_primary() { for (;;) { @@ -1010,21 +1215,21 @@ pf_primary() break; if (EQ("ip")) { - pf_match_ethertype(ETHERTYPE_IP); + pf_matchfn("ip"); opstack++; next(); break; } if (EQ("ip6")) { - pf_match_ethertype(ETHERTYPE_IPV6); + pf_matchfn("ip6"); opstack++; next(); break; } if (EQ("pppoe")) { - pf_match_ethertype(ETHERTYPE_PPPOED); + pf_matchfn("pppoe"); pf_match_ethertype(ETHERTYPE_PPPOES); pf_emit(ENF_OR); opstack++; @@ -1033,28 +1238,28 @@ pf_primary() } if (EQ("pppoed")) { - pf_match_ethertype(ETHERTYPE_PPPOED); + pf_matchfn("pppoed"); opstack++; next(); break; } if (EQ("pppoes")) { - pf_match_ethertype(ETHERTYPE_PPPOES); + pf_matchfn("pppoes"); opstack++; next(); break; } if (EQ("arp")) { - pf_match_ethertype(ETHERTYPE_ARP); + pf_matchfn("arp"); opstack++; next(); break; } if (EQ("vlan")) { - pf_match_ethertype(ETHERTYPE_VLAN); + pf_matchfn("vlan"); pf_compare_value_mask_neq(VLAN_ID_OFFSET, 2, 0, VLAN_ID_MASK); pf_emit(ENF_AND); @@ -1067,7 +1272,7 @@ pf_primary() next(); if (tokentype != NUMBER) pr_err("VLAN ID expected"); - pf_match_ethertype(ETHERTYPE_VLAN); + pf_matchfn("vlan-id"); pf_compare_value_mask(VLAN_ID_OFFSET, 2, tokenval, VLAN_ID_MASK); pf_emit(ENF_AND); @@ -1077,7 +1282,7 @@ pf_primary() } if (EQ("rarp")) { - pf_match_ethertype(ETHERTYPE_REVARP); + pf_matchfn("rarp"); opstack++; next(); break; @@ -1208,7 +1413,7 @@ pf_primary() pr_err("IP proto type expected"); pf_check_vlan_tag(ENCAP_ETHERTYPE_OFF/2); pf_compare_value( - IPV4_TYPE_HEADER_OFFSET + link_header_len, 1, + IPV4_TYPE_HEADER_OFFSET + dl.dl_link_header_len, 1, tokenval); opstack++; next(); @@ -1217,7 +1422,7 @@ pf_primary() if (EQ("broadcast")) { pf_clear_offset_register(); - pf_compare_value(link_dest_offset, 4, 0xffffffff); + pf_compare_value(dl.dl_link_dest_offset, 4, 0xffffffff); opstack++; next(); break; @@ -1225,7 +1430,8 @@ pf_primary() if (EQ("multicast")) { pf_clear_offset_register(); - pf_compare_value_mask(link_dest_offset, 1, 0x01, 0x01); + pf_compare_value_mask( + dl.dl_link_dest_offset, 1, 0x01, 0x01); opstack++; next(); break; @@ -1254,6 +1460,16 @@ pf_primary() break; } + if (EQ("zone")) { + next(); + if (tokentype != NUMBER) + pr_err("zoneid expected after inet"); + pf_match_zone(dir, BE_64((uint64_t)(tokenval))); + opstack++; + next(); + break; + } + /* * Give up on anything that's obviously * not a primary. @@ -1357,11 +1573,44 @@ pf_compile(e, print) /* * Set media specific packet offsets that this code uses. */ + if (interface->mac_type == DL_ETHER) { + dl.dl_type = DL_ETHER; + dl.dl_match_fn = pf_match_ethertype; + dl.dl_transport_mapping_table = + ðer_transport_mapping_table[0]; + dl.dl_net_map_tbl = + ðer_network_mapping_table[0]; + dl.dl_link_header_len = 14; + dl.dl_link_type_offset = 12; + dl.dl_link_dest_offset = 0; + dl.dl_link_src_offset = 6; + dl.dl_link_addr_len = 6; + } + if (interface->mac_type == DL_IB) { - link_header_len = 4; - link_type_offset = 0; - link_dest_offset = link_src_offset = -1; - link_addr_len = 20; + dl.dl_type = DL_IB; + dl.dl_link_header_len = 4; + dl.dl_link_type_offset = 0; + dl.dl_link_dest_offset = dl.dl_link_src_offset = -1; + dl.dl_link_addr_len = 20; + dl.dl_match_fn = pf_match_ibtype; + dl.dl_transport_mapping_table = + &ib_transport_mapping_table[0]; + dl.dl_net_map_tbl = + &ib_network_mapping_table[0]; + } + + if (interface->mac_type == DL_IPNET) { + dl.dl_type = DL_IPNET; + dl.dl_link_header_len = 24; + dl.dl_link_type_offset = 0; + dl.dl_link_dest_offset = dl.dl_link_src_offset = -1; + dl.dl_link_addr_len = -1; + dl.dl_match_fn = pf_match_ipnettype; + dl.dl_transport_mapping_table = + &ipnet_transport_mapping_table[0]; + dl.dl_net_map_tbl = + &ipnet_network_mapping_table[0]; } next(); diff --git a/usr/src/cmd/devfsadm/misc_link.c b/usr/src/cmd/devfsadm/misc_link.c index b1f9386efd..c72be6be37 100644 --- a/usr/src/cmd/devfsadm/misc_link.c +++ b/usr/src/cmd/devfsadm/misc_link.c @@ -93,8 +93,8 @@ static devfsadm_create_t misc_cbt[] = { }, { "pseudo", "ddi_pseudo", "(^lockstat$)|(^SUNW,rtvc$)|(^vol$)|(^log$)|(^sy$)|" - "(^ksyms$)|(^clone$)|(^tl$)|(^tnf$)|(^kstat$)|(^mdesc$)|" - "(^eeprom$)|(^ptsl$)|(^mm$)|(^wc$)|(^dump$)|(^cn$)|(^lo$)|(^ptm$)|" + "(^ksyms$)|(^clone$)|(^tl$)|(^tnf$)|(^kstat$)|(^mdesc$)|(^eeprom$)|" + "(^ptsl$)|(^mm$)|(^wc$)|(^dump$)|(^cn$)|(^svvslo$)|(^ptm$)|" "(^ptc$)|(^openeepr$)|(^poll$)|(^sysmsg$)|(^random$)|(^trapstat$)|" "(^cryptoadm$)|(^crypto$)|(^pool$)|(^poolctl$)|(^bl$)|(^kmdb$)|" "(^sysevent$)|(^kssl$)|(^physmem$)", @@ -104,7 +104,7 @@ static devfsadm_create_t misc_cbt[] = { "(^ip$)|(^tcp$)|(^udp$)|(^icmp$)|(^sctp$)|" "(^ip6$)|(^tcp6$)|(^udp6$)|(^icmp6$)|(^sctp6$)|" "(^rts$)|(^arp$)|(^ipsecah$)|(^ipsecesp$)|(^keysock$)|(^spdsock$)|" - "(^nca$)|(^rds$)|(^sdp$)", + "(^nca$)|(^rds$)|(^sdp$)|(^ipnet$)", TYPE_EXACT | DRV_RE, ILEVEL_1, minor_name }, { "pseudo", "ddi_pseudo", diff --git a/usr/src/cmd/truss/codes.c b/usr/src/cmd/truss/codes.c index 125ef9a33f..3869b370c1 100644 --- a/usr/src/cmd/truss/codes.c +++ b/usr/src/cmd/truss/codes.c @@ -298,6 +298,7 @@ const struct ioc { */ { (uint_t)DLIOCRAW, "DLIOCRAW", NULL }, { (uint_t)DLIOCNATIVE, "DLIOCNATIVE", NULL }, + { (uint_t)DLIOCIPNETINFO, "DLIOCIPNETINFO", NULL}, { (uint_t)LDOPEN, "LDOPEN", NULL }, { (uint_t)LDCLOSE, "LDCLOSE", NULL }, @@ -607,8 +608,8 @@ const struct ioc { { (uint_t)SIOCTMYADDR, "SIOCTMYADDR", "sioc_addrreq" }, { (uint_t)SIOCTONLINK, "SIOCTONLINK", "sioc_addrreq" }, { (uint_t)SIOCTMYSITE, "SIOCTMYSITE", "sioc_addrreq" }, - { (uint_t)SIOCGTUNPARAM, "SIOCGTUNPARAM", "iftun_req" }, - { (uint_t)SIOCSTUNPARAM, "SIOCSTUNPARAM", "iftun_req" }, + { (uint_t)SIOCGTUNPARAM, "SIOCGTUNPARAM", "iftun_req" }, + { (uint_t)SIOCSTUNPARAM, "SIOCSTUNPARAM", "iftun_req" }, { (uint_t)SIOCFIPSECONFIG, "SIOCFIPSECONFIG", NULL }, { (uint_t)SIOCSIPSECONFIG, "SIOCSIPSECONFIG", NULL }, { (uint_t)SIOCDIPSECONFIG, "SIOCDIPSECONFIG", NULL }, diff --git a/usr/src/lib/brand/native/zone/config.xml b/usr/src/lib/brand/native/zone/config.xml index e8202c32d8..0cc7ab8d38 100644 --- a/usr/src/lib/brand/native/zone/config.xml +++ b/usr/src/lib/brand/native/zone/config.xml @@ -23,8 +23,6 @@ Copyright 2008 Sun Microsystems, Inc. All rights reserved. Use is subject to license terms. - ident "%Z%%M% %I% %E% SMI" - DO NOT EDIT THIS FILE. --> @@ -65,6 +63,7 @@ <privilege set="default" name="net_bindmlp" /> <privilege set="default" name="net_icmpaccess" /> <privilege set="default" name="net_mac_aware" /> + <privilege set="default" name="net_observability" /> <privilege set="default" name="net_privaddr" /> <privilege set="default" name="net_rawaccess" ip-type="exclusive" /> <privilege set="default" name="proc_chroot" /> diff --git a/usr/src/lib/brand/native/zone/platform.xml b/usr/src/lib/brand/native/zone/platform.xml index ddfbdd788d..f7030ba0a1 100644 --- a/usr/src/lib/brand/native/zone/platform.xml +++ b/usr/src/lib/brand/native/zone/platform.xml @@ -23,8 +23,6 @@ Copyright 2008 Sun Microsystems, Inc. All rights reserved. Use is subject to license terms. - ident "%Z%%M% %I% %E% SMI" - DO NOT EDIT THIS FILE. --> @@ -54,11 +52,9 @@ <device match="dtrace/*" /> <device match="dtrace/provider/*" /> <device match="fd" /> + <device match="ipnet" /> <device match="kstat" /> <device match="lo0" /> - <device match="lo1" /> - <device match="lo2" /> - <device match="lo3" /> <device match="log" /> <device match="logindmux" /> <device match="nsmb" /> @@ -73,6 +69,10 @@ <device match="rdsk" /> <device match="rmt" /> <device match="sad/user" /> + <device match="svvslo0" /> + <device match="svvslo1" /> + <device match="svvslo2" /> + <device match="svvslo3" /> <device match="swap" /> <device match="sysevent" /> <device match="tcp" /> diff --git a/usr/src/lib/brand/sn1/zone/platform.xml b/usr/src/lib/brand/sn1/zone/platform.xml index 8065fa5fd3..1659d8851c 100644 --- a/usr/src/lib/brand/sn1/zone/platform.xml +++ b/usr/src/lib/brand/sn1/zone/platform.xml @@ -57,11 +57,9 @@ <device match="dtrace/*" /> <device match="dtrace/provider/*" /> <device match="fd" /> + <device match="ipnet" /> <device match="kstat" /> <device match="lo0" /> - <device match="lo1" /> - <device match="lo2" /> - <device match="lo3" /> <device match="log" /> <device match="logindmux" /> <device match="net/*" /> @@ -75,6 +73,10 @@ <device match="rdsk" /> <device match="rmt" /> <device match="sad/user" /> + <device match="svvslo0" /> + <device match="svvslo1" /> + <device match="svvslo2" /> + <device match="svvslo3" /> <device match="swap" /> <device match="sysevent" /> <device match="tcp" /> diff --git a/usr/src/lib/libdladm/common/libdladm.c b/usr/src/lib/libdladm/common/libdladm.c index 3a3cac49d8..fa588df066 100644 --- a/usr/src/lib/libdladm/common/libdladm.c +++ b/usr/src/lib/libdladm/common/libdladm.c @@ -340,6 +340,9 @@ dladm_media2str(uint32_t media, char *buf) case DL_ASYNC: s = "AsyncCharacter"; break; + case DL_IPNET: + s = "IPNET"; + break; default: s = "--"; break; diff --git a/usr/src/lib/libdlpi/common/libdlpi.c b/usr/src/lib/libdlpi/common/libdlpi.c index 3849824ff0..c6049cb211 100644 --- a/usr/src/lib/libdlpi/common/libdlpi.c +++ b/usr/src/lib/libdlpi/common/libdlpi.c @@ -23,8 +23,6 @@ * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" - /* * Data-Link Provider Interface (Version 2) */ @@ -49,6 +47,7 @@ #include <libdlpi.h> #include <libintl.h> #include <libinetutil.h> +#include <dirent.h> #include "libdlpi_impl.h" @@ -95,18 +94,35 @@ void dlpi_walk(dlpi_walkfunc_t *fn, void *arg, uint_t flags) { struct i_dlpi_walklink_arg warg; + struct dirent *d; + DIR *dp; warg.fn = fn; warg.arg = arg; - (void) dladm_walk(i_dlpi_walk_link, &warg, DATALINK_CLASS_ALL, - DATALINK_ANY_MEDIATYPE, DLADM_OPT_ACTIVE); + if (flags & DLPI_DEVIPNET) { + if ((dp = opendir("/dev/ipnet")) == NULL) + return; + + while ((d = readdir(dp)) != NULL) { + if (d->d_name[0] == '.') + continue; + + if (warg.fn(d->d_name, warg.arg)) + break; + } + + (void) closedir(dp); + } else { + (void) dladm_walk(i_dlpi_walk_link, &warg, DATALINK_CLASS_ALL, + DATALINK_ANY_MEDIATYPE, DLADM_OPT_ACTIVE); + } } int dlpi_open(const char *linkname, dlpi_handle_t *dhp, uint_t flags) { - int retval; + int retval, on = 1; int cnt; ifspec_t ifsp; dlpi_impl_t *dip; @@ -119,6 +135,13 @@ dlpi_open(const char *linkname, dlpi_handle_t *dhp, uint_t flags) !ifparse_ifspec(linkname, &ifsp)) return (DLPI_ELINKNAMEINVAL); + /* + * Ensure flags values are sane. + */ + if ((flags & (DLPI_DEVIPNET|DLPI_DEVONLY)) == + (DLPI_DEVIPNET|DLPI_DEVONLY)) + return (DLPI_EINVAL); + /* Allocate a new dlpi_impl_t. */ if ((dip = calloc(1, sizeof (dlpi_impl_t))) == NULL) return (DL_SYSERR); @@ -126,16 +149,18 @@ dlpi_open(const char *linkname, dlpi_handle_t *dhp, uint_t flags) /* Fill in known/default libdlpi handle values. */ dip->dli_timeout = DLPI_DEF_TIMEOUT; dip->dli_ppa = ifsp.ifsp_ppa; - dip->dli_mod_cnt = ifsp.ifsp_modcnt; dip->dli_oflags = flags; dip->dli_notifylistp = NULL; dip->dli_note_processing = B_FALSE; if (getenv("DLPI_DEVONLY") != NULL) dip->dli_oflags |= DLPI_DEVONLY; - for (cnt = 0; cnt != dip->dli_mod_cnt; cnt++) { - (void) strlcpy(dip->dli_modlist[cnt], ifsp.ifsp_mods[cnt], - DLPI_LINKNAME_MAX); + if (!(flags & DLPI_DEVIPNET)) { + dip->dli_mod_cnt = ifsp.ifsp_modcnt; + for (cnt = 0; cnt != dip->dli_mod_cnt; cnt++) { + (void) strlcpy(dip->dli_modlist[cnt], + ifsp.ifsp_mods[cnt], DLPI_LINKNAME_MAX); + } } /* Copy linkname provided to the function. */ @@ -173,7 +198,7 @@ dlpi_open(const char *linkname, dlpi_handle_t *dhp, uint_t flags) * of failure from errno. */ retval = DL_SYSERR; - } else { + } else if (!(dip->dli_oflags & DLPI_DEVIPNET)) { retval = i_dlpi_style2_open(dip); } if (retval != DLPI_SUCCESS) { @@ -191,6 +216,12 @@ dlpi_open(const char *linkname, dlpi_handle_t *dhp, uint_t flags) return (DLPI_ERAWNOTSUP); } + if ((dip->dli_oflags & DLPI_IPNETINFO) && + ioctl(dip->dli_fd, DLIOCIPNETINFO, &on) < 0) { + dlpi_close((dlpi_handle_t)dip); + return (DLPI_EIPNETINFONOTSUP); + } + /* * We intentionally do not care if this request fails, as this * indicates the underlying DLPI device does not support Native mode @@ -978,12 +1009,13 @@ dlpi_iftype(uint_t dlpitype) /* * This function attempts to open a device under the following namespaces: + * /dev/ipnet - if DLPI_DEVIPNET is specified * /dev/net - if a data-link with the specified name exists * /dev - if DLPI_DEVONLY is specified, or if there is no * data-link with the specified name (could be /dev/ip) * - * In particular, this function is used to open a data-link node, or some - * special node, such as "/dev/ip" node. It is usually be called firstly + * In particular, if DLPI_DEVIPNET is not specified, this function is used to + * open a data-link node, or "/dev/ip" node. It is usually be called firstly * with style1 being B_TRUE, and if that fails and the return value is not * DLPI_ENOTSTYLE2, the function will again be called with style1 being * B_FALSE (style-1 open attempt first, then style-2 open attempt). @@ -1019,7 +1051,13 @@ i_dlpi_open(const char *provider, int *fd, uint_t flags, boolean_t style1) if (flags & DLPI_EXCL) oflags |= O_EXCL; - if (style1 && !(flags & DLPI_DEVONLY)) { + if (flags & DLPI_DEVIPNET) { + (void) snprintf(path, sizeof (path), "/dev/ipnet/%s", provider); + if ((*fd = open(path, oflags)) != -1) + return (DLPI_SUCCESS); + else + return (errno == ENOENT ? DLPI_ENOLINK : DL_SYSERR); + } else if (style1 && !(flags & DLPI_DEVONLY)) { char driver[DLPI_LINKNAME_MAX]; char device[DLPI_LINKNAME_MAX]; datalink_id_t linkid; @@ -1600,7 +1638,8 @@ static const char *libdlpi_errlist[] = { "DLPI notification not supported by link", /* DLPI_ENOTENOTSUP */ "invalid DLPI notification type", /* DLPI_ENOTEINVAL */ - "invalid DLPI notification id" /* DLPI_ENOTEIDINVAL */ + "invalid DLPI notification id", /* DLPI_ENOTEIDINVAL */ + "DLPI_IPNETINFO not supported" /* DLPI_EIPNETINFONOTSUP */ }; const char * @@ -1648,7 +1687,8 @@ static const dlpi_mactype_t dlpi_mactypes[] = { { DL_IB, "Infiniband" }, { DL_IPV4, "IPv4 Tunnel" }, { DL_IPV6, "IPv6 Tunnel" }, - { DL_WIFI, "IEEE 802.11" } + { DL_WIFI, "IEEE 802.11" }, + { DL_IPNET, "IPNET" } }; const char * diff --git a/usr/src/lib/libdlpi/common/libdlpi.h b/usr/src/lib/libdlpi/common/libdlpi.h index 5b9b1caba5..993ac1b7a4 100644 --- a/usr/src/lib/libdlpi/common/libdlpi.h +++ b/usr/src/lib/libdlpi/common/libdlpi.h @@ -26,8 +26,6 @@ #ifndef _LIBDLPI_H #define _LIBDLPI_H -#pragma ident "%Z%%M% %I% %E% SMI" - #include <sys/types.h> #include <sys/dlpi.h> @@ -63,7 +61,8 @@ extern "C" { #define DLPI_NOATTACH 0x0010 /* Do not attach PPA */ #define DLPI_NATIVE 0x0020 /* Open DLPI link in native mode */ #define DLPI_DEVONLY 0x0040 /* Open DLPI link under /dev only */ - +#define DLPI_DEVIPNET 0x0080 /* Open IP DLPI link under /dev/ipnet */ +#define DLPI_IPNETINFO 0x0100 /* Request ipnetinfo headers */ /* * Timeout to be used in DLPI-related operations, in seconds. */ @@ -93,6 +92,7 @@ enum { DLPI_ENOTEINVAL, /* invalid DLPI notification type */ DLPI_ENOTENOTSUP, /* DLPI notification not supported by link */ DLPI_ENOTEIDINVAL, /* invalid DLPI notification id */ + DLPI_EIPNETINFONOTSUP, /* DLPI_IPNETINFO not supported */ DLPI_ERRMAX /* Highest + 1 libdlpi error code */ }; diff --git a/usr/src/lib/libsecdb/exec_attr.txt b/usr/src/lib/libsecdb/exec_attr.txt index bf890cdaef..ae7d769e2a 100644 --- a/usr/src/lib/libsecdb/exec_attr.txt +++ b/usr/src/lib/libsecdb/exec_attr.txt @@ -204,7 +204,9 @@ Network Management:suser:cmd:::/usr/sbin/ipqosconf:euid=0 Network Management:suser:cmd:::/usr/sbin/rndc:privs=file_dac_read Network Management:suser:cmd:::/usr/sbin/route:uid=0 Network Management:suser:cmd:::/usr/sbin/snoop:uid=0 +Network Management:solaris:cmd:::/usr/sbin/snoop:privs=net_observability Network Management:suser:cmd:::/usr/sbin/spray:euid=0 +Network Observability:solaris:cmd:::/usr/sbin/snoop:privs=net_observability Network Link Security:solaris:cmd:::/sbin/dladm:euid=dladm;egid=sys;\ privs=sys_dl_config,net_rawaccess,proc_audit Network IPsec Management:solaris:cmd:::/usr/lib/inet/certdb:euid=0;privs=none diff --git a/usr/src/lib/libsecdb/help/profiles/RtNetObservability.html b/usr/src/lib/libsecdb/help/profiles/RtNetObservability.html new file mode 100644 index 0000000000..9317243fb2 --- /dev/null +++ b/usr/src/lib/libsecdb/help/profiles/RtNetObservability.html @@ -0,0 +1,36 @@ +<HTML> +<!-- + CDDL HEADER START + + The contents of this file are subject to the terms of the + Common Development and Distribution License (the "License"). + You may not use this file except in compliance with the License. + + You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + or http://www.opensolaris.org/os/licensing. + See the License for the specific language governing permissions + and limitations under the License. + + When distributing Covered Code, include this CDDL HEADER in each + file and include the License file at usr/src/OPENSOLARIS.LICENSE. + If applicable, add the following below this CDDL HEADER, with the + fields enclosed by brackets "[]" replaced with your own identifying + information: Portions Copyright [yyyy] [name of copyright owner] + + CDDL HEADER END + +-- Copyright 2008 Sun Microsystems, Inc. All rights reserved. +-- Use is subject to license terms. +--> +<HEAD> + <TITLE> </TITLE> + + +</HEAD> +<BODY> +When Network Observability is in the Rights Included column, it grants the right to open the ip observability devices, /dev/ipnet/* and /dev/lo0 and so observe network traffic at the ip layer. +<p> +If Network Observability is grayed, then you are not entitled to Add or Remove this right. +<p> +</BODY> +</HTML> diff --git a/usr/src/lib/libsecdb/prof_attr.txt b/usr/src/lib/libsecdb/prof_attr.txt index 780da755b7..9799ec15c2 100644 --- a/usr/src/lib/libsecdb/prof_attr.txt +++ b/usr/src/lib/libsecdb/prof_attr.txt @@ -59,7 +59,8 @@ MMS Operator:::MMS Media Manager Operator:auths=solaris.smf.manage.mms,solaris.m MMS User:::MMS Tape User:auths=solaris.mms.io.*;help=RtMMSUser.html NDMP Management:::Manage the NDMP service:auths=solaris.smf.manage.ndmp,solaris.smf.value.ndmp,solaris.smf.read.ndmp;help=RtNdmpMngmnt.html Network Autoconf:::Manage network auto-magic configuration via nwamd:auths=solaris.network.autoconf;help=RtNetAutoconf.html -Network Management:::Manage the host and network configuration:auths=solaris.smf.manage.name-service-cache,solaris.smf.manage.bind,solaris.smf.value.routing,solaris.smf.manage.routing,solaris.smf.value.nwam,solaris.smf.manage.nwam,solaris.smf.manage.tnd,solaris.smf.manage.tnctl,solaris.smf.manage.wpa,solaris.smf.value.mdns,solaris.smf.manage.mdns;profiles=Network Wifi Management,Inetd Management,Network Autoconf;help=RtNetMngmnt.html +Network Management:::Manage the host and network configuration:auths=solaris.smf.manage.name-service-cache,solaris.smf.manage.bind,solaris.smf.value.routing,solaris.smf.manage.routing,solaris.smf.value.nwam,solaris.smf.manage.nwam,solaris.smf.manage.tnd,solaris.smf.manage.tnctl,solaris.smf.manage.wpa,solaris.smf.value.mdns,solaris.smf.manage.mdns;profiles=Network Wifi Management,Inetd Management,Network Autoconf,Network Observability;help=RtNetMngmnt.html +Network Observability:::Allow access to observability devices:privs=net_observability;help=RtNetObservability.html Network Security:::Manage network and host security:auths=solaris.smf.manage.ssh,solaris.smf.value.tnd;profiles=Network Wifi Security,Network Link Security,Network IPsec Management;help=RtNetSecure.html Network Wifi Management:::Manage wifi network configuration:auths=solaris.network.wifi.config;help=RtNetWifiMngmnt.html Network Wifi Security:::Manage wifi network security:auths=solaris.network.wifi.wep;help=RtNetWifiSecure.html diff --git a/usr/src/pkgdefs/SUNWckr/prototype_com b/usr/src/pkgdefs/SUNWckr/prototype_com index 692b2aaa80..1988298dfe 100644 --- a/usr/src/pkgdefs/SUNWckr/prototype_com +++ b/usr/src/pkgdefs/SUNWckr/prototype_com @@ -23,8 +23,6 @@ # Copyright 2008 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # -# ident "%Z%%M% %I% %E% SMI" -# # # This required package information file contains a list of package contents. @@ -87,6 +85,7 @@ f none kernel/drv/icmp.conf 644 root sys f none kernel/drv/icmp6.conf 644 root sys f none kernel/drv/ip.conf 644 root sys f none kernel/drv/ip6.conf 644 root sys +f none kernel/drv/ipnet.conf 644 root sys f none kernel/drv/ippctl.conf 644 root sys f none kernel/drv/ipsecah.conf 644 root sys f none kernel/drv/ipsecesp.conf 644 root sys diff --git a/usr/src/pkgdefs/SUNWckr/prototype_i386 b/usr/src/pkgdefs/SUNWckr/prototype_i386 index 3767e36322..57be328034 100644 --- a/usr/src/pkgdefs/SUNWckr/prototype_i386 +++ b/usr/src/pkgdefs/SUNWckr/prototype_i386 @@ -93,6 +93,7 @@ f none kernel/drv/intel_nb5000 755 root sys f none kernel/drv/intel_nb5000.conf 644 root sys f none kernel/drv/ip 755 root sys f none kernel/drv/ip6 755 root sys +f none kernel/drv/ipnet 755 root sys f none kernel/drv/ippctl 755 root sys f none kernel/drv/ipsecah 755 root sys f none kernel/drv/ipsecesp 755 root sys @@ -310,6 +311,7 @@ f none kernel/drv/amd64/icmp6 755 root sys f none kernel/drv/amd64/intel_nb5000 755 root sys f none kernel/drv/amd64/ip 755 root sys f none kernel/drv/amd64/ip6 755 root sys +f none kernel/drv/amd64/ipnet 755 root sys f none kernel/drv/amd64/ippctl 755 root sys f none kernel/drv/amd64/ipsecah 755 root sys f none kernel/drv/amd64/ipsecesp 755 root sys diff --git a/usr/src/pkgdefs/SUNWckr/prototype_sparc b/usr/src/pkgdefs/SUNWckr/prototype_sparc index d9a98493d3..daccee4e10 100644 --- a/usr/src/pkgdefs/SUNWckr/prototype_sparc +++ b/usr/src/pkgdefs/SUNWckr/prototype_sparc @@ -87,6 +87,7 @@ f none kernel/drv/sparcv9/i8042 755 root sys f none kernel/drv/sparcv9/icmp 755 root sys f none kernel/drv/sparcv9/icmp6 755 root sys f none kernel/drv/sparcv9/ip 755 root sys +f none kernel/drv/sparcv9/ipnet 755 root sys f none kernel/drv/sparcv9/ip6 755 root sys f none kernel/drv/sparcv9/ippctl 755 root sys f none kernel/drv/sparcv9/ipsecah 755 root sys diff --git a/usr/src/pkgdefs/SUNWcsd/prototype_com b/usr/src/pkgdefs/SUNWcsd/prototype_com index 9ff03705c9..352cf569d6 100644 --- a/usr/src/pkgdefs/SUNWcsd/prototype_com +++ b/usr/src/pkgdefs/SUNWcsd/prototype_com @@ -70,6 +70,7 @@ i i.preserve d none dev 755 root sys d none dev/dsk 755 root sys d none dev/fd 555 root root +d none dev/ipnet 755 root sys d none dev/net 755 root sys d none dev/pts 755 root sys d none dev/rdsk 755 root sys diff --git a/usr/src/pkgdefs/SUNWhea/prototype_com b/usr/src/pkgdefs/SUNWhea/prototype_com index f3f375a748..dd18ac1cd1 100644 --- a/usr/src/pkgdefs/SUNWhea/prototype_com +++ b/usr/src/pkgdefs/SUNWhea/prototype_com @@ -249,6 +249,7 @@ f none usr/include/inet/ip6.h 644 root bin f none usr/include/inet/ip6_asp.h 644 root bin f none usr/include/inet/ipclassifier.h 644 root bin f none usr/include/inet/ipdrop.h 644 root bin +f none usr/include/inet/ipnet.h 644 root bin f none usr/include/inet/ipp_common.h 644 root bin d none usr/include/inet/kssl 755 root bin f none usr/include/inet/kssl/ksslapi.h 644 root bin diff --git a/usr/src/pkgdefs/SUNWsvvs/prototype_com b/usr/src/pkgdefs/SUNWsvvs/prototype_com index ee83b117ab..1f3b9f71a4 100644 --- a/usr/src/pkgdefs/SUNWsvvs/prototype_com +++ b/usr/src/pkgdefs/SUNWsvvs/prototype_com @@ -1,9 +1,7 @@ # -# Copyright 1990-1993,1998,2003 Sun Microsystems, Inc. All rights reserved. +# Copyright 2008 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # -#ident "%Z%%M% %I% %E% SMI" -# # This required package information file contains a list of package contents. # The 'pkgmk' command uses this file to identify the contents of a package # and their location on the development machine when building the package. @@ -26,13 +24,13 @@ i depend d none usr 755 root sys d none usr/include 755 root bin d none usr/include/sys 755 root bin -f none usr/include/sys/lo.h 644 root bin +f none usr/include/sys/svvslo.h 644 root bin f none usr/include/sys/tidg.h 644 root bin f none usr/include/sys/tivc.h 644 root bin f none usr/include/sys/tmux.h 644 root bin d none usr/kernel 755 root sys d none usr/kernel/drv 755 root sys -f none usr/kernel/drv/lo.conf 644 root sys +f none usr/kernel/drv/svvslo.conf 644 root sys f none usr/kernel/drv/tidg.conf 644 root sys f none usr/kernel/drv/tivc.conf 644 root sys f none usr/kernel/drv/tmux.conf 644 root sys diff --git a/usr/src/pkgdefs/SUNWsvvs/prototype_i386 b/usr/src/pkgdefs/SUNWsvvs/prototype_i386 index b435cf7234..905a42f7ff 100644 --- a/usr/src/pkgdefs/SUNWsvvs/prototype_i386 +++ b/usr/src/pkgdefs/SUNWsvvs/prototype_i386 @@ -1,9 +1,7 @@ # -# Copyright 2004 Sun Microsystems, Inc. All rights reserved. +# Copyright 2008 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # -# ident "%Z%%M% %I% %E% SMI" -# # This required package information file contains a list of package contents. # The 'pkgmk' command uses this file to identify the contents of a package # and their location on the development machine when building the package. @@ -28,7 +26,7 @@ # # SUNWsvvs # -f none usr/kernel/drv/lo 755 root sys +f none usr/kernel/drv/svvslo 755 root sys f none usr/kernel/drv/tidg 755 root sys f none usr/kernel/drv/tivc 755 root sys f none usr/kernel/drv/tmux 755 root sys @@ -37,7 +35,7 @@ f none usr/kernel/strmod/lmode 755 root sys f none usr/kernel/strmod/lmodr 755 root sys f none usr/kernel/strmod/lmodt 755 root sys d none usr/kernel/drv/amd64 755 root sys -f none usr/kernel/drv/amd64/lo 755 root sys +f none usr/kernel/drv/amd64/svvslo 755 root sys f none usr/kernel/drv/amd64/tidg 755 root sys f none usr/kernel/drv/amd64/tivc 755 root sys f none usr/kernel/drv/amd64/tmux 755 root sys diff --git a/usr/src/pkgdefs/SUNWsvvs/prototype_sparc b/usr/src/pkgdefs/SUNWsvvs/prototype_sparc index ddfe529bb1..d11469f0d7 100644 --- a/usr/src/pkgdefs/SUNWsvvs/prototype_sparc +++ b/usr/src/pkgdefs/SUNWsvvs/prototype_sparc @@ -1,9 +1,7 @@ # -# Copyright 1990-1993,2003 Sun Microsystems, Inc. All rights reserved. +# Copyright 2008 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # -#ident "%Z%%M% %I% %E% SMI" -# # This required package information file contains a list of package contents. # The 'pkgmk' command uses this file to identify the contents of a package # and their location on the development machine when building the package. @@ -29,7 +27,7 @@ # SUNWsvvs # d none usr/kernel/drv/sparcv9 755 root sys -f none usr/kernel/drv/sparcv9/lo 755 root sys +f none usr/kernel/drv/sparcv9/svvslo 755 root sys f none usr/kernel/drv/sparcv9/tidg 755 root sys f none usr/kernel/drv/sparcv9/tivc 755 root sys f none usr/kernel/drv/sparcv9/tmux 755 root sys diff --git a/usr/src/pkgdefs/common_files/i.devpolicy b/usr/src/pkgdefs/common_files/i.devpolicy index 49a2d25162..13e00b8bc0 100644 --- a/usr/src/pkgdefs/common_files/i.devpolicy +++ b/usr/src/pkgdefs/common_files/i.devpolicy @@ -50,7 +50,7 @@ do rm -f $dest.$$ # potential additions - additions="aggr bge dnet keysock ibd icmp icmp6 ipsecah ipsecesp openeepr random spdsock vni ipf pfil scsi_vhci" + additions="aggr bge dnet keysock ibd icmp icmp6 ipnet ipsecah ipsecesp openeepr random spdsock vni ipf pfil scsi_vhci" for dev in $additions do diff --git a/usr/src/pkgdefs/common_files/i.minorperm_i386 b/usr/src/pkgdefs/common_files/i.minorperm_i386 index e3c08254f7..1f1e0c8ccc 100644 --- a/usr/src/pkgdefs/common_files/i.minorperm_i386 +++ b/usr/src/pkgdefs/common_files/i.minorperm_i386 @@ -221,6 +221,7 @@ openeepr:openprom wc:* ip:ip ip6:ip6 +ipnet:lo0 icmp:icmp icmp6:icmp6 udp:udp diff --git a/usr/src/pkgdefs/common_files/i.minorperm_sparc b/usr/src/pkgdefs/common_files/i.minorperm_sparc index 1c102faa7a..3352f31fc0 100644 --- a/usr/src/pkgdefs/common_files/i.minorperm_sparc +++ b/usr/src/pkgdefs/common_files/i.minorperm_sparc @@ -246,6 +246,7 @@ icmp:icmp icmp6:icmp6 ip:ip ip6:ip6 +ipnet:lo0 tcp:tcp tcp6:tcp6 udp:udp diff --git a/usr/src/tools/scripts/bfu.sh b/usr/src/tools/scripts/bfu.sh index 01d650e172..d4f80d67bb 100644 --- a/usr/src/tools/scripts/bfu.sh +++ b/usr/src/tools/scripts/bfu.sh @@ -7596,6 +7596,16 @@ mondo_loop() { # rm -f $root/usr/platform/i86pc/lib/fm/topo/maps/Sun-Fire-*-topology.xml + # + # Remove old SVVS lo driver and related files. It was renamed to + # svvslo. The renamed files will be extracted using their new names. + # + rm -f $usr/kernel/drv/lo.conf + rm -f $usr/kernel/drv/lo + rm -f $usr/kernel/drv/sparcv9/lo + rm -f $usr/kernel/drv/amd64/lo + rm -f $usr/include/sys/lo.h + # End of pre-archive extraction hacks. if [ $diskless = no -a $zone = global ]; then diff --git a/usr/src/uts/common/Makefile.files b/usr/src/uts/common/Makefile.files index 53cedce8cc..7b4357de66 100644 --- a/usr/src/uts/common/Makefile.files +++ b/usr/src/uts/common/Makefile.files @@ -499,6 +499,8 @@ NETI_OBJS += neti_impl.o neti_mod.o neti_stack.o KEYSOCK_OBJS += keysockddi.o keysock.o keysock_opt_data.o +IPNET_OBJS += ipnet.o + SPDSOCK_OBJS += spdsockddi.o spdsock.o spdsock_opt_data.o IPSECESP_OBJS += ipsecespddi.o ipsecesp.o @@ -973,7 +975,8 @@ DEVFS_OBJS += devfs_subr.o devfs_vfsops.o devfs_vnops.o DEV_OBJS += sdev_subr.o sdev_vfsops.o sdev_vnops.o \ sdev_ptsops.o sdev_comm.o sdev_profile.o \ - sdev_ncache.o sdev_netops.o sdev_vtops.o + sdev_ncache.o sdev_netops.o sdev_vtops.o \ + sdev_ipnetops.o CTFS_OBJS += ctfs_all.o ctfs_cdir.o ctfs_ctl.o ctfs_event.o \ ctfs_latest.o ctfs_root.o ctfs_sym.o ctfs_tdir.o ctfs_tmpl.o diff --git a/usr/src/uts/common/Makefile.rules b/usr/src/uts/common/Makefile.rules index ebc525e8c2..e623f79a7a 100644 --- a/usr/src/uts/common/Makefile.rules +++ b/usr/src/uts/common/Makefile.rules @@ -439,6 +439,10 @@ $(OBJS_DIR)/%.o: $(UTSBASE)/common/inet/ip/%.c $(COMPILE.c) -o $@ $< $(CTFCONVERT_O) +$(OBJS_DIR)/%.o: $(UTSBASE)/common/inet/ipnet/%.c + $(COMPILE.c) -o $@ $< + $(CTFCONVERT_O) + $(OBJS_DIR)/%.o: $(UTSBASE)/common/inet/kssl/%.c $(COMPILE.c) -o $@ $< $(CTFCONVERT_O) @@ -1539,6 +1543,9 @@ $(LINTS_DIR)/%.ln: $(UTSBASE)/common/inet/arp/%.c $(LINTS_DIR)/%.ln: $(UTSBASE)/common/inet/ip/%.c @($(LHEAD) $(LINT.c) $< $(LTAIL)) +$(LINTS_DIR)/%.ln: $(UTSBASE)/common/inet/ipnet/%.c + @($(LHEAD) $(LINT.c) $< $(LTAIL)) + $(LINTS_DIR)/%.ln: $(UTSBASE)/common/inet/ipf/%.c @($(LHEAD) $(LINT.c) $(IPFFLAGS) $< $(LTAIL)) diff --git a/usr/src/uts/common/fs/dev/sdev_ipnetops.c b/usr/src/uts/common/fs/dev/sdev_ipnetops.c new file mode 100644 index 0000000000..dbca322afb --- /dev/null +++ b/usr/src/uts/common/fs/dev/sdev_ipnetops.c @@ -0,0 +1,226 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * vnode ops for the /dev/ipnet directory + * The lookup is based on the ipnetif nodes held + * in the ipnet module. We also override readdir + * in order to delete ipnet nodes no longer in use. + */ + +#include <sys/types.h> +#include <sys/param.h> +#include <sys/sysmacros.h> +#include <sys/sunndi.h> +#include <fs/fs_subr.h> +#include <sys/fs/dv_node.h> +#include <sys/fs/sdev_impl.h> +#include <sys/policy.h> +#include <inet/ipnet.h> +#include <sys/zone.h> + +struct vnodeops *devipnet_vnodeops; + +static void +devipnet_fill_vattr(struct vattr *vap, dev_t dev) +{ + timestruc_t now; + + *vap = sdev_vattr_chr; + vap->va_rdev = dev; + vap->va_mode |= 0666; + + gethrestime(&now); + vap->va_atime = now; + vap->va_mtime = now; + vap->va_ctime = now; +} + +/* + * Check if an ipnet sdev_node is still valid. + */ +int +devipnet_validate(struct sdev_node *dv) +{ + dev_t dev; + + dev = ipnet_if_getdev(dv->sdev_name, getzoneid()); + if (dev == (dev_t)-1) + return (SDEV_VTOR_INVALID); + if (getminor(SDEVTOV(dv)->v_rdev) != getminor(dev)) + return (SDEV_VTOR_STALE); + return (SDEV_VTOR_VALID); +} + +/* + * This callback is invoked from devname_lookup_func() to create + * an ipnet entry when the node is not found in the cache. + */ +/*ARGSUSED*/ +static int +devipnet_create_rvp(struct sdev_node *ddv, char *nm, + void **arg, cred_t *cred, void *whatever, char *whichever) +{ + dev_t dev; + struct vattr *vap = (struct vattr *)arg; + int err = 0; + + if ((dev = ipnet_if_getdev(nm, getzoneid())) == (dev_t)-1) + err = ENOENT; + else + devipnet_fill_vattr(vap, dev); + + return (err); +} + +/* + * Lookup for /dev/ipnet directory + * If the entry does not exist, the devipnet_create_rvp() callback + * is invoked to create it. Nodes do not persist across reboot. + */ +/*ARGSUSED3*/ +static int +devipnet_lookup(struct vnode *dvp, char *nm, struct vnode **vpp, + struct pathname *pnp, int flags, struct vnode *rdir, struct cred *cred, + caller_context_t *ct, int *direntflags, pathname_t *realpnp) +{ + struct sdev_node *sdvp = VTOSDEV(dvp); + struct sdev_node *dv; + struct vnode *rvp = NULL; + int error; + + error = devname_lookup_func(sdvp, nm, vpp, cred, devipnet_create_rvp, + SDEV_VATTR); + + if (error == 0) { + switch ((*vpp)->v_type) { + case VCHR: + dv = VTOSDEV(VTOS(*vpp)->s_realvp); + ASSERT(VOP_REALVP(SDEVTOV(dv), &rvp, NULL) == ENOSYS); + break; + case VDIR: + dv = VTOSDEV(*vpp); + break; + default: + cmn_err(CE_PANIC, "devipnet_lookup: Unsupported node " + "type: %p: %d", (void *)(*vpp), (*vpp)->v_type); + break; + } + ASSERT(SDEV_HELD(dv)); + } + + return (error); +} + +static void +devipnet_filldir_entry(const char *name, void *arg, dev_t dev) +{ + struct sdev_node *ddv = arg; + struct vattr vattr; + struct sdev_node *dv; + + ASSERT(RW_WRITE_HELD(&ddv->sdev_contents)); + + if ((dv = sdev_cache_lookup(ddv, (char *)name)) == NULL) { + devipnet_fill_vattr(&vattr, dev); + if (sdev_mknode(ddv, (char *)name, &dv, &vattr, NULL, NULL, + kcred, SDEV_READY) != 0) + return; + } + SDEV_SIMPLE_RELE(dv); +} + +static void +devipnet_filldir(struct sdev_node *ddv) +{ + sdev_node_t *dv, *next; + + ASSERT(RW_READ_HELD(&ddv->sdev_contents)); + if (rw_tryupgrade(&ddv->sdev_contents) == NULL) { + rw_exit(&ddv->sdev_contents); + rw_enter(&ddv->sdev_contents, RW_WRITER); + } + + for (dv = SDEV_FIRST_ENTRY(ddv); dv; dv = next) { + next = SDEV_NEXT_ENTRY(ddv, dv); + + /* validate and prune only ready nodes */ + if (dv->sdev_state != SDEV_READY) + continue; + switch (devipnet_validate(dv)) { + case SDEV_VTOR_VALID: + case SDEV_VTOR_SKIP: + continue; + case SDEV_VTOR_INVALID: + case SDEV_VTOR_STALE: + sdcmn_err12(("devipnet_filldir: destroy invalid " + "node: %s(%p)\n", dv->sdev_name, (void *)dv)); + break; + } + + if (SDEVTOV(dv)->v_count > 0) + continue; + SDEV_HOLD(dv); + /* remove the cache node */ + (void) sdev_cache_update(ddv, &dv, dv->sdev_name, + SDEV_CACHE_DELETE); + } + + ipnet_walk_if(devipnet_filldir_entry, ddv, getzoneid()); + + rw_downgrade(&ddv->sdev_contents); +} + +/* + * Display all instantiated ipnet device nodes. + */ +/* ARGSUSED */ +static int +devipnet_readdir(struct vnode *dvp, struct uio *uiop, struct cred *cred, + int *eofp, caller_context_t *ct, int flags) +{ + struct sdev_node *sdvp = VTOSDEV(dvp); + + if (uiop->uio_offset == 0) + devipnet_filldir(sdvp); + + return (devname_readdir_func(dvp, uiop, cred, eofp, 0)); +} + +/* + * We override lookup and readdir to build entries based on the + * in kernel ipnet table. + */ +const fs_operation_def_t devipnet_vnodeops_tbl[] = { + VOPNAME_READDIR, { .vop_readdir = devipnet_readdir }, + VOPNAME_LOOKUP, { .vop_lookup = devipnet_lookup }, + VOPNAME_CREATE, { .error = fs_nosys }, + VOPNAME_REMOVE, { .error = fs_nosys }, + VOPNAME_MKDIR, { .error = fs_nosys }, + VOPNAME_RMDIR, { .error = fs_nosys }, + VOPNAME_SYMLINK, { .error = fs_nosys }, + VOPNAME_SETSECATTR, { .error = fs_nosys }, + NULL, NULL +}; diff --git a/usr/src/uts/common/fs/dev/sdev_netops.c b/usr/src/uts/common/fs/dev/sdev_netops.c index 1ef39317e4..bd7bf5058b 100644 --- a/usr/src/uts/common/fs/dev/sdev_netops.c +++ b/usr/src/uts/common/fs/dev/sdev_netops.c @@ -23,8 +23,6 @@ * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" - /* * vnode ops for the /dev/net directory * @@ -282,6 +280,7 @@ devnet_filldir(struct sdev_node *ddv) case SDEV_VTOR_SKIP: continue; case SDEV_VTOR_INVALID: + case SDEV_VTOR_STALE: sdcmn_err12(("devnet_filldir: destroy invalid " "node: %s(%p)\n", dv->sdev_name, (void *)dv)); break; diff --git a/usr/src/uts/common/fs/dev/sdev_ptsops.c b/usr/src/uts/common/fs/dev/sdev_ptsops.c index 7e27a61db9..a228d05086 100644 --- a/usr/src/uts/common/fs/dev/sdev_ptsops.c +++ b/usr/src/uts/common/fs/dev/sdev_ptsops.c @@ -23,8 +23,6 @@ * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" - /* * vnode ops for the /dev/pts directory * The lookup is based on the internal pty table. We also @@ -248,6 +246,7 @@ devpts_prunedir(struct sdev_node *ddv) case SDEV_VTOR_SKIP: continue; case SDEV_VTOR_INVALID: + case SDEV_VTOR_STALE: sdcmn_err7(("prunedir: destroy invalid " "node: %s(%p)\n", dv->sdev_name, (void *)dv)); break; diff --git a/usr/src/uts/common/fs/dev/sdev_subr.c b/usr/src/uts/common/fs/dev/sdev_subr.c index 127329c8d4..9134a4d7ae 100644 --- a/usr/src/uts/common/fs/dev/sdev_subr.c +++ b/usr/src/uts/common/fs/dev/sdev_subr.c @@ -614,6 +614,9 @@ static struct sdev_vop_table vtab[] = { "net", devnet_vnodeops_tbl, NULL, &devnet_vnodeops, devnet_validate, SDEV_DYNAMIC | SDEV_VTOR }, + { "ipnet", devipnet_vnodeops_tbl, NULL, &devipnet_vnodeops, + devipnet_validate, SDEV_DYNAMIC | SDEV_VTOR | SDEV_NO_NCACHE }, + { NULL, NULL, NULL, NULL, NULL, 0} }; @@ -2328,7 +2331,7 @@ tryagain: } } - +lookup_create_node: /* first thread that is doing the lookup on this node */ if (!dv) { if (!rw_tryupgrade(&ddv->sdev_contents)) { @@ -2451,6 +2454,24 @@ found: switch (vtor(dv)) { case SDEV_VTOR_VALID: break; + case SDEV_VTOR_STALE: + /* + * The name exists, but the cache entry is + * stale and needs to be re-created. + */ + ASSERT(RW_READ_HELD(&ddv->sdev_contents)); + if (rw_tryupgrade(&ddv->sdev_contents) == 0) { + rw_exit(&ddv->sdev_contents); + rw_enter(&ddv->sdev_contents, RW_WRITER); + } + error = sdev_cache_update(ddv, &dv, nm, + SDEV_CACHE_DELETE); + rw_downgrade(&ddv->sdev_contents); + if (error == 0) { + dv = NULL; + goto lookup_create_node; + } + /* FALLTHRU */ case SDEV_VTOR_INVALID: SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); sdcmn_err7(("lookup: destroy invalid " diff --git a/usr/src/uts/common/fs/dev/sdev_vtops.c b/usr/src/uts/common/fs/dev/sdev_vtops.c index 11ceaadd26..8e94ba8fe5 100644 --- a/usr/src/uts/common/fs/dev/sdev_vtops.c +++ b/usr/src/uts/common/fs/dev/sdev_vtops.c @@ -285,6 +285,7 @@ devvt_prunedir(struct sdev_node *ddv) case SDEV_VTOR_SKIP: continue; case SDEV_VTOR_INVALID: + case SDEV_VTOR_STALE: sdcmn_err7(("destroy invalid " "node: %s(%p)\n", dv->sdev_name, (void *)dv)); break; diff --git a/usr/src/uts/common/inet/Makefile b/usr/src/uts/common/inet/Makefile index 4a2141e142..5f353ae8fe 100644 --- a/usr/src/uts/common/inet/Makefile +++ b/usr/src/uts/common/inet/Makefile @@ -1,4 +1,4 @@ -# +# # CDDL HEADER START # # The contents of this file are subject to the terms of the @@ -18,10 +18,9 @@ # # CDDL HEADER END # + # -# ident "%Z%%M% %I% %E% SMI" -# -# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Copyright 2008 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # uts/common/inet/Makefile @@ -29,12 +28,12 @@ # include global definitions include ../../../Makefile.master -HDRS= arp.h arp_impl.h common.h ipclassifier.h ip.h ip6.h ipdrop.h ipsecah.h \ - ipsecesp.h ipsec_info.h ip6_asp.h ip_if.h ip_ire.h ip_multi.h \ - ip_netinfo.h ip_ndp.h ip_rts.h ipsec_impl.h keysock.h led.h mi.h \ - mib2.h nd.h optcom.h sadb.h sctp_itf.h snmpcom.h tcp.h tcp_sack.h \ - tcp_stack.h tun.h udp_impl.h rawip_impl.h ipp_common.h ip_ftable.h \ - ip_impl.h tcp_impl.h wifi_ioctl.h ip_stack.h +HDRS= arp.h arp_impl.h common.h ipclassifier.h ip.h ip6.h ipdrop.h ipnet.h \ + ipsecah.h ipsecesp.h ipsec_info.h ip6_asp.h ip_if.h ip_ire.h \ + ip_multi.h ip_netinfo.h ip_ndp.h ip_rts.h ipsec_impl.h keysock.h \ + led.h mi.h mib2.h nd.h optcom.h sadb.h sctp_itf.h snmpcom.h tcp.h \ + tcp_sack.h tcp_stack.h tun.h udp_impl.h rawip_impl.h ipp_common.h \ + ip_ftable.h ip_impl.h ip_stack.h tcp_impl.h wifi_ioctl.h ROOTDIRS= $(ROOT)/usr/include/inet diff --git a/usr/src/uts/common/inet/arp/arp_netinfo.c b/usr/src/uts/common/inet/arp/arp_netinfo.c index 4789dda337..9d9c6a5bbe 100644 --- a/usr/src/uts/common/inet/arp/arp_netinfo.c +++ b/usr/src/uts/common/inet/arp/arp_netinfo.c @@ -46,6 +46,8 @@ static int arp_getmtu(net_handle_t, phy_if_t, lif_if_t); static int arp_getpmtuenabled(net_handle_t); static int arp_getlifaddr(net_handle_t, phy_if_t, lif_if_t, size_t, net_ifaddr_t [], void *); +static int arp_getlifzone(net_handle_t, phy_if_t, lif_if_t, zoneid_t *); +static int arp_getlifflags(net_handle_t, phy_if_t, lif_if_t, uint64_t *); static phy_if_t arp_phygetnext(net_handle_t, phy_if_t); static phy_if_t arp_phylookup(net_handle_t, const char *); static lif_if_t arp_lifgetnext(net_handle_t, phy_if_t, lif_if_t); @@ -61,6 +63,8 @@ static net_protocol_t arp_netinfo = { arp_getmtu, arp_getpmtuenabled, arp_getlifaddr, + arp_getlifzone, + arp_getlifflags, arp_phygetnext, arp_phylookup, arp_lifgetnext, @@ -348,3 +352,25 @@ arp_isvalidchecksum(net_handle_t net, mblk_t *mb) { return (-1); } + +/* + * Unsupported with ARP. + */ +/*ARGSUSED*/ +static int +arp_getlifzone(net_handle_t net, phy_if_t phy_ifdata, lif_if_t ifdata, + zoneid_t *zoneid) +{ + return (-1); +} + +/* + * Unsupported with ARP. + */ +/*ARGSUSED*/ +static int +arp_getlifflags(net_handle_t net, phy_if_t phy_ifdata, lif_if_t ifdata, + uint64_t *flags) +{ + return (-1); +} diff --git a/usr/src/uts/common/inet/ip.h b/usr/src/uts/common/inet/ip.h index f8320fb058..5c06308509 100644 --- a/usr/src/uts/common/inet/ip.h +++ b/usr/src/uts/common/inet/ip.h @@ -1841,6 +1841,7 @@ typedef struct ill_s { mblk_t *ill_bcast_mp; /* DLPI header for broadcasts. */ mblk_t *ill_resolver_mp; /* Resolver template. */ mblk_t *ill_unbind_mp; /* unbind mp from ill_dl_up() */ + mblk_t *ill_promiscoff_mp; /* for ill_leave_allmulti() */ mblk_t *ill_dlpi_deferred; /* b_next chain of control messages */ mblk_t *ill_phys_addr_mp; /* mblk which holds ill_phys_addr */ #define ill_last_mp_to_free ill_phys_addr_mp @@ -1869,7 +1870,8 @@ typedef struct ill_s { ill_note_link : 1, /* supports link-up notification */ ill_capab_reneg : 1, /* capability renegotiation to be done */ - ill_pad_to_bit_31 : 18; + ill_need_recover_multicast : 1, + ill_pad_to_bit_31 : 17; /* Following bit fields protected by ill_lock */ uint_t @@ -1889,11 +1891,6 @@ typedef struct ill_s { int ill_arp_muxid; /* muxid returned from plink for arp */ int ill_ip_muxid; /* muxid returned from plink for ip */ - /* - * NIC event information attached, to be used by nic event hooks. - */ - hook_nic_event_int_t *ill_nic_event_info; - /* Used for IP frag reassembly throttling on a per ILL basis. */ uint_t ill_ipf_gen; /* Generation of next fragment queue */ uint_t ill_frag_count; /* Count of all reassembly mblk bytes */ @@ -1993,6 +1990,7 @@ typedef struct ill_s { ip_stack_t *ill_ipst; /* Corresponds to a netstack_hold */ uint32_t ill_dhcpinit; /* IP_DHCPINIT_IFs for ill */ uint_t ill_ilm_cnt; /* ilms referencing this ill */ + uint_t ill_ipallmulti_cnt; /* ip_join_allmulti() calls */ } ill_t; /* @@ -2510,7 +2508,7 @@ typedef struct ire_s { uint32_t ire_ihandle; /* Associate interface IREs to cache */ ipif_t *ire_ipif; /* the interface that this ire uses */ uint32_t ire_flags; /* flags related to route (RTF_*) */ - uint_t ire_ipsec_overhead; /* IPSEC overhead */ + uint_t ire_ipsec_overhead; /* IPSEC overhead */ /* * Neighbor Cache Entry for IPv6; arp info for IPv4 */ @@ -3124,8 +3122,9 @@ extern void ill_frag_timer(void *); extern ill_t *ill_first(int, int, ill_walk_context_t *, ip_stack_t *); extern ill_t *ill_next(ill_walk_context_t *, ill_t *); extern void ill_frag_timer_start(ill_t *); -extern void ill_nic_info_dispatch(ill_t *); -extern void ill_nic_info_plumb(ill_t *, boolean_t); +extern void ill_nic_event_dispatch(ill_t *, lif_if_t, nic_event_t, + nic_event_data_t, size_t); +extern void ill_nic_event_plumb(ill_t *, boolean_t); extern mblk_t *ip_carve_mp(mblk_t **, ssize_t); extern mblk_t *ip_dlpi_alloc(size_t, t_uscalar_t); extern char *ip_dot_addr(ipaddr_t, char *); @@ -3366,6 +3365,53 @@ extern int ip_cgtp_filter_is_registered(netstackid_t); #endif /* + * IP observability hook support + */ + +/* + * ipobs_hooktype_t describes the hook types supported + * by the ip module. IPOBS_HOOK_LOCAL refers to packets + * which are looped back internally within the ip module. + */ + +typedef enum ipobs_hook_type { + IPOBS_HOOK_LOCAL, + IPOBS_HOOK_OUTBOUND, + IPOBS_HOOK_INBOUND +} ipobs_hook_type_t; + +typedef void ipobs_cbfunc_t(mblk_t *); + +typedef struct ipobs_cb { + ipobs_cbfunc_t *ipobs_cbfunc; + list_node_t ipobs_cbnext; +} ipobs_cb_t; + +/* + * This structure holds the data passed back from the ip module to + * observability consumers. + * + * ihd_mp Pointer to the IP packet. + * ihd_zsrc Source zoneid; set to ALL_ZONES when unknown. + * ihd_zdst Destination zoneid; set to ALL_ZONES when unknown. + * ihd_htype IPobs hook type, see above for the defined types. + * ihd_ipver IP version of the packet. + * ihd_ifindex Interface index that the packet was received/sent over. + * For local packets, this is the index of the interface + * associated with the local destination address. + * ihd_stack Netstack the packet is from. + */ +typedef struct ipobs_hook_data { + mblk_t *ihd_mp; + zoneid_t ihd_zsrc; + zoneid_t ihd_zdst; + ipobs_hook_type_t ihd_htype; + uint16_t ihd_ipver; + uint64_t ihd_ifindex; + netstack_t *ihd_stack; +} ipobs_hook_data_t; + +/* * Per-ILL Multidata Transmit capabilities. */ struct ill_mdt_capab_s { @@ -3482,7 +3528,10 @@ extern void tcp_wput(queue_t *, mblk_t *); extern int ip_fill_mtuinfo(struct in6_addr *, in_port_t, struct ip6_mtuinfo *, netstack_t *); extern ipif_t *conn_get_held_ipif(conn_t *, ipif_t **, int *); - +extern void ipobs_register_hook(netstack_t *, ipobs_cbfunc_t *); +extern void ipobs_unregister_hook(netstack_t *, ipobs_cbfunc_t *); +extern void ipobs_hook(mblk_t *, int, zoneid_t, zoneid_t, const ill_t *, int, + uint32_t, ip_stack_t *); typedef void (*ipsq_func_t)(ipsq_t *, queue_t *, mblk_t *, void *); /* diff --git a/usr/src/uts/common/inet/ip/ip.c b/usr/src/uts/common/inet/ip/ip.c index e86f1512c1..5e8815a5dd 100644 --- a/usr/src/uts/common/inet/ip/ip.c +++ b/usr/src/uts/common/inet/ip/ip.c @@ -791,6 +791,9 @@ static mblk_t *ip_tcp_input(mblk_t *, ipha_t *, ill_t *, boolean_t, static void ip_rput_process_forward(queue_t *, mblk_t *, ire_t *, ipha_t *, ill_t *, boolean_t); + +static void ipobs_init(ip_stack_t *); +static void ipobs_fini(ip_stack_t *); ipaddr_t ip_g_all_ones = IP_HOST_MASK; /* How long, in seconds, we allow frags to hang around. */ @@ -1230,10 +1233,10 @@ ip_ioctl_cmd_t ip_ndx_ioctl_table[] = { /* 146 */ { SIOCTMYSITE, sizeof (struct sioc_addrreq), 0, MISC_CMD, ip_sioctl_tmysite, NULL }, /* 147 */ { SIOCGTUNPARAM, sizeof (struct iftun_req), IPI_REPL, - TUN_CMD, ip_sioctl_tunparam, NULL }, + TUN_CMD, ip_sioctl_tunparam, NULL }, /* 148 */ { SIOCSTUNPARAM, sizeof (struct iftun_req), - IPI_PRIV | IPI_WR, - TUN_CMD, ip_sioctl_tunparam, NULL }, + IPI_PRIV | IPI_WR, + TUN_CMD, ip_sioctl_tunparam, NULL }, /* IPSECioctls handled in ip_sioctl_copyin_setup itself */ /* 149 */ { SIOCFIPSECONFIG, 0, IPI_PRIV, MISC_CMD, NULL, NULL }, @@ -4156,9 +4159,7 @@ ip_arp_news(queue_t *q, mblk_t *mp) if ((ipif->ipif_flags & IPIF_UP) && !ipif->ipif_addr_ready) { ipif_mask_reply(ipif); - ip_rts_ifmsg(ipif); - ip_rts_newaddrmsg(RTM_ADD, 0, ipif); - sctp_update_ipif(ipif, SCTP_IPIF_UP); + ipif_up_notify(ipif); } ipif->ipif_addr_ready = 1; ipif_refrele(ipif); @@ -5829,6 +5830,7 @@ ip_stack_fini(netstackid_t stackid, void *arg) mutex_destroy(&ipst->ips_ip_addr_avail_lock); rw_destroy(&ipst->ips_ill_g_lock); + ipobs_fini(ipst); ip_ire_fini(ipst); ip6_asp_free(ipst); conn_drain_fini(ipst); @@ -6033,6 +6035,7 @@ ip_stack_init(netstackid_t stackid, netstack_t *ns) ipst->ips_ip_src_id = 1; rw_init(&ipst->ips_srcid_lock, NULL, RW_DEFAULT, NULL); + ipobs_init(ipst); ip_net_init(ipst, ns); ipv4_hook_init(ipst); ipv6_hook_init(ipst); @@ -8532,7 +8535,6 @@ ip_newroute(queue_t *q, mblk_t *mp, ipaddr_t dst, conn_t *connp, return; } case IRE_IF_NORESOLVER: { - if (dst_ill->ill_phys_addr_length != IP_ADDR_LEN && dst_ill->ill_resolver_mp == NULL) { ip1dbg(("ip_newroute: dst_ill %p " @@ -14054,7 +14056,7 @@ ip_fast_forward(ire_t *ire, ipaddr_t dst, ill_t *ill, mblk_t *mp) * may be queued depending on the availability * of transmit resources at the media layer. */ - IP_DLS_ILL_TX(stq_ill, ipha, mp, ipst); + IP_DLS_ILL_TX(stq_ill, ipha, mp, ipst, hlen); } else { DTRACE_PROBE4(ip4__physical__out__start, ill_t *, NULL, ill_t *, stq_ill, @@ -15147,6 +15149,18 @@ ip_input(ill_t *ill, ill_rx_ring_t *ip_ring, mblk_t *mp_chain, continue; } + if (ipst->ips_ipobs_enabled) { + zoneid_t dzone; + + /* + * On the inbound path the src zone will be unknown as + * this packet has come from the wire. + */ + dzone = ip_get_zoneid_v4(dst, mp, ipst, ALL_ZONES); + ipobs_hook(mp, IPOBS_HOOK_INBOUND, ALL_ZONES, dzone, + ill, IPV4_VERSION, 0, ipst); + } + /* * Reuse the cached ire only if the ipha_dst of the previous * packet is the same as the current packet AND it is not @@ -15157,6 +15171,7 @@ ip_input(ill_t *ill, ill_rx_ring_t *ip_ring, mblk_t *mp_chain, ire_refrele(ire); ire = NULL; } + opt_len = ipha->ipha_version_and_hdr_length - IP_SIMPLE_HDR_VERSION; @@ -15848,7 +15863,7 @@ ip_rput_dlpi_writer(ipsq_t *ipsq, queue_t *q, mblk_t *mp, void *dummy_arg) mutex_enter(&ill->ill_lock); ill->ill_dl_up = 1; - (void) ill_hook_event_create(ill, 0, NE_UP, NULL, 0); + ill_nic_event_dispatch(ill, 0, NE_UP, NULL, 0); mutex_exit(&ill->ill_lock); /* @@ -16411,7 +16426,7 @@ ip_rput_other(ipsq_t *ipsq, queue_t *q, mblk_t *mp, void *dummy_arg) iocp = (struct iocblk *)mp->b_rptr; switch (iocp->ioc_cmd) { - int mode; + int mode; case DL_IOC_HDR_INFO: /* @@ -20132,7 +20147,7 @@ ip_trash_ire_reclaim_stack(ip_stack_t *ipst) mblk_t * ip_unbind(queue_t *q, mblk_t *mp) { - conn_t *connp = Q_TO_CONN(q); + conn_t *connp = Q_TO_CONN(q); ASSERT(!MUTEX_HELD(&connp->conn_lock)); @@ -22670,6 +22685,19 @@ another:; if (mp == NULL) goto release_ire_and_ill; + if (ipst->ips_ipobs_enabled) { + zoneid_t szone; + + /* + * On the outbound path the destination zone will be + * unknown as we're sending this packet out on the + * wire. + */ + szone = ip_get_zoneid_v4(ipha->ipha_src, mp, ipst, + ALL_ZONES); + ipobs_hook(mp, IPOBS_HOOK_OUTBOUND, szone, ALL_ZONES, + ire->ire_ipif->ipif_ill, IPV4_VERSION, 0, ipst); + } mp->b_prev = SET_BPREV_FLAG(IPP_LOCAL_OUT); DTRACE_PROBE2(ip__xmit__1, mblk_t *, mp, ire_t *, ire); pktxmit_state = ip_xmit_v4(mp, ire, NULL, B_TRUE); @@ -25118,6 +25146,24 @@ ip_wput_local(queue_t *q, ill_t *ill, ipha_t *ipha, mblk_t *mp, ire_t *ire, if (first_mp == NULL) return; + if (ipst->ips_ipobs_enabled) { + zoneid_t szone, dzone, lookup_zoneid = ALL_ZONES; + zoneid_t stackzoneid = netstackid_to_zoneid( + ipst->ips_netstack->netstack_stackid); + + dzone = (stackzoneid == GLOBAL_ZONEID) ? zoneid : stackzoneid; + /* + * 127.0.0.1 is special, as we cannot lookup its zoneid by + * address. Restrict the lookup below to the destination zone. + */ + if (ipha->ipha_src == ntohl(INADDR_LOOPBACK)) + lookup_zoneid = zoneid; + szone = ip_get_zoneid_v4(ipha->ipha_src, mp, ipst, + lookup_zoneid); + ipobs_hook(mp, IPOBS_HOOK_LOCAL, szone, dzone, ill, + IPV4_VERSION, 0, ipst); + } + DTRACE_IP7(receive, mblk_t *, first_mp, conn_t *, NULL, void_ip_t *, ipha, __dtrace_ipsr_ill_t *, ill, ipha_t *, ipha, ip6_t *, NULL, int, 1); @@ -25864,9 +25910,10 @@ send: DTRACE_PROBE1(ip6__loopback__out__end, mblk_t *, ipsec_mp); - if (ipsec_mp != NULL) + if (ipsec_mp != NULL) { ip_wput_local_v6(RD(q), out_ill, - ip6h, ipsec_mp, ire, 0); + ip6h, ipsec_mp, ire, 0, zoneid); + } if (ire_need_rele) ire_refrele(ire); return; @@ -29381,12 +29428,8 @@ ipmp_hook_emulation_changed(ip_stack_t *ipst) else ill = phyi->phyint_illv6; - if (ill != NULL) { - mutex_enter(&ill->ill_lock); - ill_nic_info_plumb(ill, B_TRUE); - ill_nic_info_dispatch(ill); - mutex_exit(&ill->ill_lock); - } + if (ill != NULL) + ill_nic_event_plumb(ill, B_TRUE); } } rw_exit(&ipst->ips_ill_g_lock); @@ -30202,3 +30245,169 @@ ip6_pkt_free(ip6_pkt_t *ipp) ipp->ipp_fields &= ~(IPPF_HOPOPTS | IPPF_RTDSTOPTS | IPPF_DSTOPTS | IPPF_RTHDR); } + +zoneid_t +ip_get_zoneid_v4(ipaddr_t addr, mblk_t *mp, ip_stack_t *ipst, + zoneid_t lookup_zoneid) +{ + ire_t *ire; + int ire_flags = MATCH_IRE_TYPE; + zoneid_t zoneid = ALL_ZONES; + + if (is_system_labeled() && !tsol_can_accept_raw(mp, B_FALSE)) + return (ALL_ZONES); + + if (lookup_zoneid != ALL_ZONES) + ire_flags |= MATCH_IRE_ZONEONLY; + ire = ire_ctable_lookup(addr, NULL, IRE_LOCAL | IRE_LOOPBACK, NULL, + lookup_zoneid, NULL, ire_flags, ipst); + if (ire != NULL) { + zoneid = IP_REAL_ZONEID(ire->ire_zoneid, ipst); + ire_refrele(ire); + } + return (zoneid); +} + +zoneid_t +ip_get_zoneid_v6(in6_addr_t *addr, mblk_t *mp, const ill_t *ill, + ip_stack_t *ipst, zoneid_t lookup_zoneid) +{ + ire_t *ire; + int ire_flags = MATCH_IRE_TYPE; + zoneid_t zoneid = ALL_ZONES; + ipif_t *ipif_arg = NULL; + + if (is_system_labeled() && !tsol_can_accept_raw(mp, B_FALSE)) + return (ALL_ZONES); + + if (IN6_IS_ADDR_LINKLOCAL(addr)) { + ire_flags |= MATCH_IRE_ILL_GROUP; + ipif_arg = ill->ill_ipif; + } + if (lookup_zoneid != ALL_ZONES) + ire_flags |= MATCH_IRE_ZONEONLY; + ire = ire_ctable_lookup_v6(addr, NULL, IRE_LOCAL | IRE_LOOPBACK, + ipif_arg, lookup_zoneid, NULL, ire_flags, ipst); + if (ire != NULL) { + zoneid = IP_REAL_ZONEID(ire->ire_zoneid, ipst); + ire_refrele(ire); + } + return (zoneid); +} + +/* + * IP obserability hook support functions. + */ + +static void +ipobs_init(ip_stack_t *ipst) +{ + ipst->ips_ipobs_enabled = B_FALSE; + list_create(&ipst->ips_ipobs_cb_list, sizeof (ipobs_cb_t), + offsetof(ipobs_cb_t, ipobs_cbnext)); + mutex_init(&ipst->ips_ipobs_cb_lock, NULL, MUTEX_DEFAULT, NULL); + ipst->ips_ipobs_cb_nwalkers = 0; + cv_init(&ipst->ips_ipobs_cb_cv, NULL, CV_DRIVER, NULL); +} + +static void +ipobs_fini(ip_stack_t *ipst) +{ + ipobs_cb_t *cb; + + mutex_enter(&ipst->ips_ipobs_cb_lock); + while (ipst->ips_ipobs_cb_nwalkers != 0) + cv_wait(&ipst->ips_ipobs_cb_cv, &ipst->ips_ipobs_cb_lock); + + while ((cb = list_head(&ipst->ips_ipobs_cb_list)) != NULL) { + list_remove(&ipst->ips_ipobs_cb_list, cb); + kmem_free(cb, sizeof (*cb)); + } + list_destroy(&ipst->ips_ipobs_cb_list); + mutex_exit(&ipst->ips_ipobs_cb_lock); + mutex_destroy(&ipst->ips_ipobs_cb_lock); + cv_destroy(&ipst->ips_ipobs_cb_cv); +} + +void +ipobs_hook(mblk_t *mp, int htype, zoneid_t zsrc, zoneid_t zdst, + const ill_t *ill, int ipver, uint32_t hlen, ip_stack_t *ipst) +{ + ipobs_cb_t *ipobs_cb; + + ASSERT(DB_TYPE(mp) == M_DATA); + + mutex_enter(&ipst->ips_ipobs_cb_lock); + ipst->ips_ipobs_cb_nwalkers++; + mutex_exit(&ipst->ips_ipobs_cb_lock); + for (ipobs_cb = list_head(&ipst->ips_ipobs_cb_list); ipobs_cb != NULL; + ipobs_cb = list_next(&ipst->ips_ipobs_cb_list, ipobs_cb)) { + mblk_t *mp2 = allocb(sizeof (ipobs_hook_data_t), + BPRI_HI); + if (mp2 != NULL) { + ipobs_hook_data_t *ihd = + (ipobs_hook_data_t *)mp2->b_rptr; + if (((ihd->ihd_mp = dupmsg(mp)) == NULL) && + ((ihd->ihd_mp = copymsg(mp)) == NULL)) { + freemsg(mp2); + continue; + } + ihd->ihd_mp->b_rptr += hlen; + ihd->ihd_htype = htype; + ihd->ihd_ipver = ipver; + ihd->ihd_zsrc = zsrc; + ihd->ihd_zdst = zdst; + ihd->ihd_ifindex = ill->ill_phyint->phyint_ifindex; + ihd->ihd_stack = ipst->ips_netstack; + mp2->b_wptr += sizeof (*ihd); + ipobs_cb->ipobs_cbfunc(mp2); + } + } + mutex_enter(&ipst->ips_ipobs_cb_lock); + ipst->ips_ipobs_cb_nwalkers--; + if (ipst->ips_ipobs_cb_nwalkers == 0) + cv_broadcast(&ipst->ips_ipobs_cb_cv); + mutex_exit(&ipst->ips_ipobs_cb_lock); +} + +void +ipobs_register_hook(netstack_t *ns, pfv_t func) +{ + ipobs_cb_t *cb; + ip_stack_t *ipst = ns->netstack_ip; + + cb = kmem_alloc(sizeof (*cb), KM_SLEEP); + + mutex_enter(&ipst->ips_ipobs_cb_lock); + while (ipst->ips_ipobs_cb_nwalkers != 0) + cv_wait(&ipst->ips_ipobs_cb_cv, &ipst->ips_ipobs_cb_lock); + ASSERT(ipst->ips_ipobs_cb_nwalkers == 0); + + cb->ipobs_cbfunc = func; + list_insert_head(&ipst->ips_ipobs_cb_list, cb); + ipst->ips_ipobs_enabled = B_TRUE; + mutex_exit(&ipst->ips_ipobs_cb_lock); +} + +void +ipobs_unregister_hook(netstack_t *ns, pfv_t func) +{ + ipobs_cb_t *curcb; + ip_stack_t *ipst = ns->netstack_ip; + + mutex_enter(&ipst->ips_ipobs_cb_lock); + while (ipst->ips_ipobs_cb_nwalkers != 0) + cv_wait(&ipst->ips_ipobs_cb_cv, &ipst->ips_ipobs_cb_lock); + + for (curcb = list_head(&ipst->ips_ipobs_cb_list); curcb != NULL; + curcb = list_next(&ipst->ips_ipobs_cb_list, curcb)) { + if (func == curcb->ipobs_cbfunc) { + list_remove(&ipst->ips_ipobs_cb_list, curcb); + kmem_free(curcb, sizeof (*curcb)); + break; + } + } + if (list_is_empty(&ipst->ips_ipobs_cb_list)) + ipst->ips_ipobs_enabled = B_FALSE; + mutex_exit(&ipst->ips_ipobs_cb_lock); +} diff --git a/usr/src/uts/common/inet/ip/ip6.c b/usr/src/uts/common/inet/ip/ip6.c index 4193e9e3de..810cec9e8a 100644 --- a/usr/src/uts/common/inet/ip/ip6.c +++ b/usr/src/uts/common/inet/ip/ip6.c @@ -3259,9 +3259,8 @@ ip_fanout_proto_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, ill_t *ill, if (!IPCL_IS_IPTUN(connp) && (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || secure)) { - first_mp1 = ipsec_check_inbound_policy - (first_mp1, connp, NULL, ip6h, - mctl_present); + first_mp1 = ipsec_check_inbound_policy( + first_mp1, connp, NULL, ip6h, mctl_present); } if (first_mp1 != NULL) { if (mctl_present) @@ -6855,6 +6854,26 @@ ip_rput_v6(queue_t *q, mblk_t *mp) if (first_mp == NULL) return; + /* + * Attach any necessary label information to this packet. + */ + if (is_system_labeled() && !tsol_get_pkt_label(mp, IPV6_VERSION)) { + if (ip6opt_ls != 0) + ip0dbg(("tsol_get_pkt_label v6 failed\n")); + BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); + goto discard; + } + + /* IP observability hook. */ + if (ipst->ips_ipobs_enabled) { + zoneid_t dzone; + + dzone = ip_get_zoneid_v6(&ip6h->ip6_dst, mp, ill, ipst, + ALL_ZONES); + ipobs_hook(mp, IPOBS_HOOK_INBOUND, ALL_ZONES, dzone, ill, + IPV6_VERSION, 0, ipst); + } + if ((ip6h->ip6_vcf & IPV6_VERS_AND_FLOW_MASK) == IPV6_DEFAULT_VERS_AND_FLOW) { /* @@ -7285,18 +7304,6 @@ ip_rput_data_v6(queue_t *q, ill_t *inill, mblk_t *mp, ip6_t *ip6h, } /* - * Attach any necessary label information to this packet. - */ - if (is_system_labeled() && !tsol_get_pkt_label(mp, IPV6_VERSION)) { - if (ip6opt_ls != 0) - ip0dbg(("tsol_get_pkt_label v6 failed\n")); - BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); - freemsg(hada_mp); - freemsg(first_mp); - return; - } - - /* * On incoming v6 multicast packets we will bypass the ire table, * and assume that the read queue corresponds to the targetted * interface. @@ -10519,7 +10526,7 @@ ipsec_out_attach_if(ipsec_out_t *io, int attach_index) */ void ip_wput_local_v6(queue_t *q, ill_t *ill, ip6_t *ip6h, mblk_t *first_mp, - ire_t *ire, int fanout_flags) + ire_t *ire, int fanout_flags, zoneid_t zoneid) { uint32_t ports; mblk_t *mp = first_mp, *first_mp1; @@ -10569,6 +10576,25 @@ ip_wput_local_v6(queue_t *q, ill_t *ill, ip6_t *ip6h, mblk_t *first_mp, if (first_mp == NULL) return; + if (ipst->ips_ipobs_enabled) { + zoneid_t szone, dzone, lookup_zoneid = ALL_ZONES; + zoneid_t stackzoneid = netstackid_to_zoneid( + ipst->ips_netstack->netstack_stackid); + + szone = (stackzoneid == GLOBAL_ZONEID) ? zoneid : stackzoneid; + /* + * ::1 is special, as we cannot lookup its zoneid by + * address. For this case, restrict the lookup to the + * source zone. + */ + if (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_dst)) + lookup_zoneid = zoneid; + dzone = ip_get_zoneid_v6(&ip6h->ip6_dst, mp, ill, ipst, + lookup_zoneid); + ipobs_hook(mp, IPOBS_HOOK_LOCAL, szone, dzone, ill, + IPV6_VERSION, 0, ipst); + } + DTRACE_IP7(receive, mblk_t *, first_mp, conn_t *, NULL, void_ip_t *, ip6h, __dtrace_ipsr_ill_t *, ill, ipha_t *, NULL, ip6_t *, ip6h, int, 1); @@ -11007,8 +11033,8 @@ ip_wput_ire_v6(queue_t *q, mblk_t *mp, ire_t *ire, int unspec_src, * disabled. */ ip_wput_local_v6(RD(q), ill, - nip6h, nmp, - ire, fanout_flags); + nip6h, nmp, ire, + fanout_flags, zoneid); } } else { BUMP_MIB(mibptr, ipIfStatsOutDiscards); @@ -11437,8 +11463,10 @@ ip_wput_ire_v6(queue_t *q, mblk_t *mp, ire_t *ire, int unspec_src, ipst->ips_ipv6firewall_loopback_out, NULL, ill, ip6h, first_mp, mp, 0, ipst); DTRACE_PROBE1(ip6__loopback__out__end, mblk_t *, first_mp); - if (first_mp != NULL) - ip_wput_local_v6(RD(q), ill, ip6h, first_mp, ire, 0); + if (first_mp != NULL) { + ip_wput_local_v6(RD(q), ill, ip6h, first_mp, ire, 0, + zoneid); + } } } @@ -12000,6 +12028,8 @@ ip_xmit_v6(mblk_t *mp, ire_t *ire, uint_t flags, conn_t *connp, boolean_t multirt_send = B_FALSE; mblk_t *next_mp = NULL; ip_stack_t *ipst = ire->ire_ipst; + boolean_t fp_prepend = B_FALSE; + uint32_t hlen; ip6h = (ip6_t *)mp->b_rptr; ASSERT(!IN6_IS_ADDR_V4MAPPED(&ire->ire_addr_v6)); @@ -12201,7 +12231,6 @@ ip_xmit_v6(mblk_t *mp, ire_t *ire, uint_t flags, conn_t *connp, ASSERT(nce->nce_ipversion != IPV4_VERSION); mutex_enter(&nce->nce_lock); if ((mp1 = nce->nce_fp_mp) != NULL) { - uint32_t hlen; uchar_t *rptr; hlen = MBLKL(mp1); @@ -12237,6 +12266,7 @@ ip_xmit_v6(mblk_t *mp, ire_t *ire, uint_t flags, conn_t *connp, */ bcopy(mp1->b_rptr, rptr, hlen); mutex_exit(&nce->nce_lock); + fp_prepend = B_TRUE; } } else { /* @@ -12316,6 +12346,16 @@ ip_xmit_v6(mblk_t *mp, ire_t *ire, uint_t flags, conn_t *connp, } } + if (ipst->ips_ipobs_enabled) { + zoneid_t szone; + + szone = ip_get_zoneid_v6(&ip6h->ip6_src, + mp_ip6h, out_ill, ipst, ALL_ZONES); + ipobs_hook(mp_ip6h, IPOBS_HOOK_OUTBOUND, szone, + ALL_ZONES, out_ill, IPV6_VERSION, + fp_prepend ? hlen : 0, ipst); + } + /* * Update ire and MIB counters; for save_ire, this has * been done by the caller. diff --git a/usr/src/uts/common/inet/ip/ip6_if.c b/usr/src/uts/common/inet/ip/ip6_if.c index 51bb0cbe67..dc703f40c3 100644 --- a/usr/src/uts/common/inet/ip/ip6_if.c +++ b/usr/src/uts/common/inet/ip/ip6_if.c @@ -26,8 +26,6 @@ * Copyright (c) 1990 Mentat Inc. */ -#pragma ident "%Z%%M% %I% %E% SMI" - /* * This file contains the interface control functions for IPv6. */ @@ -3148,7 +3146,7 @@ ipif_up_done_v6(ipif_t *ipif) ipif_saved_ire_cnt = ipif->ipif_saved_ire_cnt; ipif_saved_irep = ipif_recover_ire_v6(ipif); - if (ipif->ipif_ipif_up_count == 1 && !loopback) { + if (ill->ill_need_recover_multicast) { /* * Need to recover all multicast memberships in the driver. * This had to be deferred until we had attached. @@ -3187,11 +3185,8 @@ ipif_up_done_v6(ipif_t *ipif) } } - if (ipif->ipif_addr_ready) { - ip_rts_ifmsg(ipif); - ip_rts_newaddrmsg(RTM_ADD, 0, ipif); - sctp_update_ipif(ipif, SCTP_IPIF_UP); - } + if (ipif->ipif_addr_ready) + ipif_up_notify(ipif); if (ipif_saved_irep != NULL) { kmem_free(ipif_saved_irep, @@ -3200,6 +3195,7 @@ ipif_up_done_v6(ipif_t *ipif) if (src_ipif_held) ipif_refrele(src_ipif); + return (0); bad: diff --git a/usr/src/uts/common/inet/ip/ip_if.c b/usr/src/uts/common/inet/ip/ip_if.c index 5684927314..3b8ff6b5d9 100644 --- a/usr/src/uts/common/inet/ip/ip_if.c +++ b/usr/src/uts/common/inet/ip/ip_if.c @@ -44,7 +44,6 @@ #include <sys/sunldi.h> #include <sys/file.h> #include <sys/bitmap.h> - #include <sys/kmem.h> #include <sys/systm.h> #include <sys/param.h> @@ -252,7 +251,6 @@ static void illgrp_reset_schednext(ill_t *ill); static ill_t *ill_prev_usesrc(ill_t *); static int ill_relink_usesrc_ills(ill_t *, ill_t *, uint_t); static void ill_disband_usesrc_group(ill_t *); - static void conn_cleanup_stale_ire(conn_t *, caddr_t); #ifdef DEBUG @@ -491,7 +489,7 @@ static nv_t ipif_nv_tbl[] = { static uchar_t ip_six_byte_all_ones[] = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF }; -static ip_m_t ip_m_tbl[] = { +static ip_m_t ip_m_tbl[] = { { DL_ETHER, IFT_ETHER, ip_ether_v4mapinfo, ip_ether_v6mapinfo, ip_ether_v6intfid }, { DL_CSMACD, IFT_ISO88023, ip_ether_v4mapinfo, ip_ether_v6mapinfo, @@ -751,6 +749,12 @@ ill_delete(ill_t *ill) reset_conn_ill(ill); /* + * Remove multicast references added as a result of calls to + * ip_join_allmulti(). + */ + ip_purge_allmulti(ill); + + /* * ill_down will arrange to blow off any IRE's dependent on this * ILL, and shut down fragmentation reassembly. */ @@ -4343,28 +4347,11 @@ ill_glist_delete(ill_t *ill) ill->ill_ppa = UINT_MAX; } - /* - * Run the unplumb hook after the NIC has disappeared from being - * visible so that attempts to revalidate its existance will fail. - * - * This needs to be run inside the ill_g_lock perimeter to ensure - * that the ordering of delivered events to listeners matches the - * order of them in the kernel. - */ - mutex_enter(&ill->ill_lock); - ill_nic_info_dispatch(ill); - mutex_exit(&ill->ill_lock); - - /* Generate NE_UNPLUMB event for ill_name. */ - (void) ill_hook_event_create(ill, 0, NE_UNPLUMB, ill->ill_name, + /* Generate one last event for this ill. */ + ill_nic_event_dispatch(ill, 0, NE_UNPLUMB, ill->ill_name, ill->ill_name_length); ill_phyint_free(ill); - - mutex_enter(&ill->ill_lock); - ill_nic_info_dispatch(ill); - mutex_exit(&ill->ill_lock); - rw_exit(&ipst->ips_ill_g_lock); } @@ -5090,6 +5077,8 @@ ill_lookup_on_name(char *name, boolean_t do_alloc, boolean_t isv6, *error = 0; *did_alloc = B_TRUE; rw_exit(&ipst->ips_ill_g_lock); + ill_nic_event_dispatch(ill, MAP_IPIF_ID(ill->ill_ipif->ipif_id), + NE_PLUMB, ill->ill_name, ill->ill_name_length); return (ill); done: if (ill != NULL) { @@ -6277,27 +6266,9 @@ ipif_is_quiescent(ipif_t *ipif) static boolean_t ipif_is_freeable(ipif_t *ipif) { - - ill_t *ill; - ASSERT(MUTEX_HELD(&ipif->ipif_ill->ill_lock)); - - if (ipif->ipif_refcnt != 0 || !IPIF_FREE_OK(ipif)) { - return (B_FALSE); - } - - ill = ipif->ipif_ill; - if (ill->ill_ipif_up_count != 0 || ill->ill_ipif_dup_count != 0 || - ill->ill_logical_down) { - return (B_TRUE); - } - - /* This is the last ipif going down or being deleted on this ill */ - if (!ILL_FREE_OK(ill) || ill->ill_refcnt != 0) { - return (B_FALSE); - } - - return (B_TRUE); + ASSERT(ipif->ipif_id != 0); + return (ipif->ipif_refcnt == 0 && IPIF_FREE_OK(ipif)); } /* @@ -8039,8 +8010,6 @@ ipsq_current_finish(ipsq_t *ipsq) mutex_enter(&ill->ill_lock); dlpi_pending = ill->ill_dlpi_pending; ipif->ipif_state_flags &= ~IPIF_CHANGING; - /* Send any queued event */ - ill_nic_info_dispatch(ill); mutex_exit(&ill->ill_lock); } @@ -8193,13 +8162,13 @@ ip_extract_tunreq(queue_t *q, mblk_t *mp, const ip_ioctl_cmd_t *ipip, { boolean_t exists; struct iftun_req *ta; - ipif_t *ipif; - ill_t *ill; + ipif_t *ipif; + ill_t *ill; boolean_t isv6; - mblk_t *mp1; - int error; - conn_t *connp; - ip_stack_t *ipst; + mblk_t *mp1; + int error; + conn_t *connp; + ip_stack_t *ipst; /* Existence verified in ip_wput_nondata */ mp1 = mp->b_cont->b_cont; @@ -9427,7 +9396,7 @@ int ip_sioctl_tunparam(ipif_t *ipif, sin_t *dummy_sin, queue_t *q, mblk_t *mp, ip_ioctl_cmd_t *ipip, void *dummy_ifreq) { - ill_t *ill; + ill_t *ill; mblk_t *mp1; conn_t *connp; boolean_t success; @@ -11224,7 +11193,7 @@ ip_sioctl_addr_tail(ipif_t *ipif, sin_t *sin, queue_t *q, mblk_t *mp, * Don't attach nic event message for SIOCLIFADDIF ioctl. */ if (iocp != NULL && iocp->ioc_cmd != SIOCLIFADDIF) { - (void) ill_hook_event_create(ill, MAP_IPIF_ID(ipif->ipif_id), + ill_nic_event_dispatch(ill, MAP_IPIF_ID(ipif->ipif_id), NE_ADDRESS_CHANGE, sin, sinlen); } @@ -13920,9 +13889,7 @@ ipif_arp_start_dad(ipif_t *ipif) * DAD completion would have done, and continue. */ ipif_mask_reply(ipif); - ip_rts_ifmsg(ipif); - ip_rts_newaddrmsg(RTM_ADD, 0, ipif); - sctp_update_ipif(ipif, SCTP_IPIF_UP); + ipif_up_notify(ipif); ipif->ipif_addr_ready = 1; return; } @@ -13949,9 +13916,7 @@ ipif_ndp_start_dad(ipif_t *ipif) * problem. Just send out the routing socket notification that * DAD completion would have done, and continue. */ - ip_rts_ifmsg(ipif); - ip_rts_newaddrmsg(RTM_ADD, 0, ipif); - sctp_update_ipif(ipif, SCTP_IPIF_UP); + ipif_up_notify(ipif); ipif->ipif_addr_ready = 1; } NCE_REFRELE(nce); @@ -15661,7 +15626,7 @@ ill_nominate_mcast_rcv(ill_group_t *illgrp) for (ill = illgrp->illgrp_ill; ill != NULL; ill = ill->ill_group_next) { if (ill->ill_join_allmulti) - (void) ip_leave_allmulti(ill->ill_ipif); + ill_leave_allmulti(ill); } /* @@ -15671,13 +15636,9 @@ ill_nominate_mcast_rcv(ill_group_t *illgrp) * one of them is failed and another is a good one and * the good one (not marked inactive) is leaving the group. */ - ret = 0; - for (ill = illgrp->illgrp_ill; ill != NULL; - ill = ill->ill_group_next) { - /* Never pick an offline interface */ + for (ill = illgrp->illgrp_ill; ill != NULL; ill = ill->ill_group_next) { if (ill->ill_phyint->phyint_flags & PHYI_OFFLINE) continue; - if (ill->ill_phyint->phyint_flags & PHYI_FAILED) { fallback_failed_ill = ill; continue; @@ -15688,11 +15649,11 @@ ill_nominate_mcast_rcv(ill_group_t *illgrp) } for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr)) { - ret = ip_join_allmulti(ill->ill_ipif); + ret = ill_join_allmulti(ill); /* - * ip_join_allmulti can fail because of memory - * failures. So, make sure we join at least - * on one ill. + * ill_join_allmulti() can fail because of + * memory failures so make sure we join at + * least on one ill. */ if (ill->ill_join_allmulti) return (0); @@ -15709,17 +15670,13 @@ ill_nominate_mcast_rcv(ill_group_t *illgrp) } if ((ill = fallback_inactive_ill) != NULL) { for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { - if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr)) { - ret = ip_join_allmulti(ill->ill_ipif); - return (ret); - } + if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr)) + return (ill_join_allmulti(ill)); } } else if ((ill = fallback_failed_ill) != NULL) { for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { - if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr)) { - ret = ip_join_allmulti(ill->ill_ipif); - return (ret); - } + if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr)) + return (ill_join_allmulti(ill)); } } return (0); @@ -15816,7 +15773,7 @@ ill_handoff_responsibility(ill_t *ill, ill_group_t *illgrp) } else { for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr)) { - (void) ip_join_allmulti(ill->ill_ipif); + (void) ill_join_allmulti(ill); break; } } @@ -16116,8 +16073,8 @@ illgrp_insert(ill_group_t **illgrp_head, ill_t *ill, char *groupname, /* * When ipif_up_done() calls this function, the multicast * groups have not been joined yet. So, there is no point in - * nomination. ip_join_allmulti will handle groups when - * ill_recover_multicast is called from ipif_up_done() later. + * nomination. ill_join_allmulti() will handle groups when + * ill_recover_multicast() is called from ipif_up_done() later. */ (void) ill_nominate_mcast_rcv(illgrp); /* @@ -16567,7 +16524,7 @@ ip_sioctl_groupname(ipif_t *ipif, sin_t *sin, queue_t *q, mblk_t *mp, if (ipst->ips_ipmp_hook_emulation && phyi_tmp != NULL) { /* First phyint in group - group PLUMB event */ - ill_nic_info_plumb(ill, B_TRUE); + ill_nic_event_plumb(ill, B_TRUE); } mutex_exit(&phyi->phyint_lock); RELEASE_ILL_LOCKS(ill_v4, ill_v6); @@ -16799,12 +16756,12 @@ ilm_send_multicast_reqs(ill_t *from_ill, ill_t *to_ill) if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr)) { /* * There is no need to hold ill locks as we are - * writer on both ills and when ill_join_allmulti - * is changed the thread is always a writer. + * writer on both ills and when ill_join_allmulti() + * is called the thread is always a writer. */ if (from_ill->ill_join_allmulti && !to_ill->ill_join_allmulti) { - (void) ip_join_allmulti(to_ill->ill_ipif); + (void) ill_join_allmulti(to_ill); } } else if (ilm->ilm_notify_driver) { @@ -16844,7 +16801,7 @@ from: if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr)) { if (from_ill->ill_join_allmulti) - (void) ip_leave_allmulti(from_ill->ill_ipif); + ill_leave_allmulti(from_ill); } else if (ilm_numentries_v6(from_ill, &ilm->ilm_v6addr) == 0) { (void) ip_ll_send_disabmulti_req(from_ill, &ilm->ilm_v6addr); @@ -18314,7 +18271,7 @@ ill_dl_down(ill_t *ill) mutex_enter(&ill->ill_lock); ill->ill_dl_up = 0; - (void) ill_hook_event_create(ill, 0, NE_DOWN, NULL, 0); + ill_nic_event_dispatch(ill, 0, NE_DOWN, NULL, 0); mutex_exit(&ill->ill_lock); } @@ -18707,6 +18664,8 @@ ipif_down(ipif_t *ipif, queue_t *q, mblk_t *mp) ipif_was_up = B_TRUE; /* Update status in SCTP's list */ sctp_update_ipif(ipif, SCTP_IPIF_DOWN); + ill_nic_event_dispatch(ipif->ipif_ill, + MAP_IPIF_ID(ipif->ipif_id), NE_LIF_DOWN, NULL, 0); } /* @@ -20480,11 +20439,18 @@ ipif_up_done(ipif_t *ipif) } - /* This is the first interface on this ill */ - if (ipif->ipif_ipif_up_count == 1 && !loopback) { + if (ill->ill_need_recover_multicast) { /* * Need to recover all multicast memberships in the driver. - * This had to be deferred until we had attached. + * This had to be deferred until we had attached. The same + * code exists in ipif_up_done_v6() to recover IPv6 + * memberships. + * + * Note that it would be preferable to unconditionally do the + * ill_recover_multicast() in ill_dl_up(), but we cannot do + * that since ill_join_allmulti() depends on ill_dl_up being + * set, and it is not set until we receive a DL_BIND_ACK after + * having called ill_dl_up(). */ ill_recover_multicast(ill); } @@ -20537,12 +20503,8 @@ ipif_up_done(ipif_t *ipif) * been validated. Otherwise, if it isn't ready yet, wait for * duplicate address detection to do its thing. */ - if (ipif->ipif_addr_ready) { - ip_rts_ifmsg(ipif); - ip_rts_newaddrmsg(RTM_ADD, 0, ipif); - /* Let SCTP update the status for this ipif */ - sctp_update_ipif(ipif, SCTP_IPIF_UP); - } + if (ipif->ipif_addr_ready) + ipif_up_notify(ipif); return (0); bad: @@ -22774,7 +22736,7 @@ ill_phyint_reinit(ill_t *ill) * for the ill_names should ipmp_hook_emulation be turned on * later. */ - ill_nic_info_plumb(ill, B_FALSE); + ill_nic_event_plumb(ill, B_FALSE); } RELEASE_ILL_LOCKS(ill, ill_other); mutex_exit(&phyi->phyint_lock); @@ -22786,7 +22748,7 @@ ill_phyint_reinit(ill_t *ill) * It will be sent when we leave the ipsq. */ void -ill_nic_info_plumb(ill_t *ill, boolean_t group) +ill_nic_event_plumb(ill_t *ill, boolean_t group) { phyint_t *phyi = ill->ill_phyint; char *name; @@ -22803,33 +22765,7 @@ ill_nic_info_plumb(ill_t *ill, boolean_t group) name = ill->ill_name; } - (void) ill_hook_event_create(ill, 0, NE_PLUMB, name, namelen); -} - -/* - * Unhook the nic event message from the ill and enqueue it - * into the nic event taskq. - */ -void -ill_nic_info_dispatch(ill_t *ill) -{ - hook_nic_event_int_t *info; - - ASSERT(MUTEX_HELD(&ill->ill_lock)); - - if ((info = ill->ill_nic_event_info) != NULL) { - if (ddi_taskq_dispatch(eventq_queue_nic, - ip_ne_queue_func, info, DDI_SLEEP) == DDI_FAILURE) { - ip2dbg(("ill_nic_info_dispatch: " - "ddi_taskq_dispatch failed\n")); - if (info->hnei_event.hne_data != NULL) { - kmem_free(info->hnei_event.hne_data, - info->hnei_event.hne_datalen); - } - kmem_free(info, sizeof (*info)); - } - ill->ill_nic_event_info = NULL; - } + ill_nic_event_dispatch(ill, 0, NE_PLUMB, name, namelen); } /* @@ -23089,14 +23025,21 @@ ipif_set_values(queue_t *q, mblk_t *mp, char *interf_name, uint_t *new_ppa_ptr) */ if (ill->ill_sap == 0) { if (ill->ill_isv6) - ill->ill_sap = IP6_DL_SAP; + ill->ill_sap = IP6_DL_SAP; else - ill->ill_sap = IP_DL_SAP; + ill->ill_sap = IP_DL_SAP; } ill->ill_ifname_pending = 1; ill->ill_ifname_pending_err = 0; + /* + * When the first ipif comes up in ipif_up_done(), multicast groups + * that were joined while this ill was not bound to the DLPI link need + * to be recovered by ill_recover_multicast(). + */ + ill->ill_need_recover_multicast = 1; + ill_refhold(ill); rw_enter(&ipst->ips_ill_g_lock, RW_WRITER); if ((error = ill_glist_insert(ill, interf_name, @@ -24443,51 +24386,27 @@ ill_hook_event2str(nic_event_t event) return ("DOWN"); case NE_ADDRESS_CHANGE: return ("ADDRESS_CHANGE"); + case NE_LIF_UP: + return ("LIF_UP"); + case NE_LIF_DOWN: + return ("LIF_DOWN"); default: return ("UNKNOWN"); } } -static void -ill_hook_event_destroy(ill_t *ill) -{ - hook_nic_event_int_t *info; - - if ((info = ill->ill_nic_event_info) != NULL) { - if (info->hnei_event.hne_data != NULL) { - kmem_free(info->hnei_event.hne_data, - info->hnei_event.hne_datalen); - } - kmem_free(info, sizeof (*info)); - - ill->ill_nic_event_info = NULL; - } - -} - -boolean_t -ill_hook_event_create(ill_t *ill, lif_if_t lif, nic_event_t event, +void +ill_nic_event_dispatch(ill_t *ill, lif_if_t lif, nic_event_t event, nic_event_data_t data, size_t datalen) { ip_stack_t *ipst = ill->ill_ipst; hook_nic_event_int_t *info; const char *str = NULL; - /* destroy nic event info if it exists */ - if ((info = ill->ill_nic_event_info) != NULL) { - str = ill_hook_event2str(info->hnei_event.hne_event); - ip2dbg(("ill_hook_event_create: unexpected nic event %s " - "attached for %s\n", str, ill->ill_name)); - ill_hook_event_destroy(ill); - } - /* create a new nic event info */ - info = kmem_alloc(sizeof (*info), KM_NOSLEEP); - if (info == NULL) + if ((info = kmem_alloc(sizeof (*info), KM_NOSLEEP)) == NULL) goto fail; - ill->ill_nic_event_info = info; - if (event == NE_UNPLUMB) info->hnei_event.hne_nic = ill->ill_phyint->phyint_ifindex; else @@ -24502,19 +24421,35 @@ ill_hook_event_create(ill_t *ill, lif_if_t lif, nic_event_t event, if (data != NULL && datalen != 0) { info->hnei_event.hne_data = kmem_alloc(datalen, KM_NOSLEEP); - if (info->hnei_event.hne_data != NULL) { - bcopy(data, info->hnei_event.hne_data, datalen); - info->hnei_event.hne_datalen = datalen; - } else { - ill_hook_event_destroy(ill); + if (info->hnei_event.hne_data == NULL) goto fail; - } + bcopy(data, info->hnei_event.hne_data, datalen); + info->hnei_event.hne_datalen = datalen; } - return (B_TRUE); + if (ddi_taskq_dispatch(eventq_queue_nic, ip_ne_queue_func, info, + DDI_NOSLEEP) == DDI_SUCCESS) + return; + fail: + if (info != NULL) { + if (info->hnei_event.hne_data != NULL) { + kmem_free(info->hnei_event.hne_data, + info->hnei_event.hne_datalen); + } + kmem_free(info, sizeof (hook_nic_event_t)); + } str = ill_hook_event2str(event); - ip2dbg(("ill_hook_event_create: could not attach %s nic event " + ip2dbg(("ill_nic_event_dispatch: could not dispatch %s nic event " "information for %s (ENOMEM)\n", str, ill->ill_name)); - return (B_FALSE); +} + +void +ipif_up_notify(ipif_t *ipif) +{ + ip_rts_ifmsg(ipif); + ip_rts_newaddrmsg(RTM_ADD, 0, ipif); + sctp_update_ipif(ipif, SCTP_IPIF_UP); + ill_nic_event_dispatch(ipif->ipif_ill, MAP_IPIF_ID(ipif->ipif_id), + NE_LIF_UP, NULL, 0); } diff --git a/usr/src/uts/common/inet/ip/ip_multi.c b/usr/src/uts/common/inet/ip/ip_multi.c index 7336294b26..7a036a34d9 100644 --- a/usr/src/uts/common/inet/ip/ip_multi.c +++ b/usr/src/uts/common/inet/ip/ip_multi.c @@ -24,8 +24,6 @@ */ /* Copyright (c) 1990 Mentat Inc. */ -#pragma ident "%Z%%M% %I% %E% SMI" - #include <sys/types.h> #include <sys/stream.h> #include <sys/dlpi.h> @@ -626,7 +624,7 @@ ip_addmulti(ipaddr_t group, ipif_t *ipif, ilg_stat_t ilgstat, if (ilm_numentries_v6(ill, &v6group) > 1) return (0); if (ill->ill_group == NULL) - ret = ip_join_allmulti(ipif); + ret = ill_join_allmulti(ill); else ret = ill_nominate_mcast_rcv(ill->ill_group); if (ret != 0) @@ -718,7 +716,7 @@ ip_addmulti_v6(const in6_addr_t *v6group, ill_t *ill, int orig_ifindex, if (ilm_numentries_v6(ill, v6group) > 1) return (0); if (ill->ill_group == NULL) - ret = ip_join_allmulti(ill->ill_ipif); + ret = ill_join_allmulti(ill); else ret = ill_nominate_mcast_rcv(ill->ill_group); @@ -854,7 +852,6 @@ ip_delmulti(ipaddr_t group, ipif_t *ipif, boolean_t no_ilg, boolean_t leaving) ill_t *ill = ipif->ipif_ill; ilm_t *ilm; in6_addr_t v6group; - int ret; ASSERT(IAM_WRITER_IPIF(ipif)); @@ -899,20 +896,13 @@ ip_delmulti(ipaddr_t group, ipif_t *ipif, boolean_t no_ilg, boolean_t leaving) if (ilm_numentries_v6(ill, &v6group) != 0) return (0); - /* - * If we never joined, then don't leave. This can happen - * if we're in an IPMP group, since only one ill per IPMP - * group receives all multicast packets. - */ - if (!ill->ill_join_allmulti) { - ASSERT(ill->ill_group != NULL); - return (0); + /* If we never joined, then don't leave. */ + if (ill->ill_join_allmulti) { + ill_leave_allmulti(ill); + if (ill->ill_group != NULL) + (void) ill_nominate_mcast_rcv(ill->ill_group); } - - ret = ip_leave_allmulti(ipif); - if (ill->ill_group != NULL) - (void) ill_nominate_mcast_rcv(ill->ill_group); - return (ret); + return (0); } if (!IS_LOOPBACK(ill)) @@ -939,7 +929,6 @@ ip_delmulti_v6(const in6_addr_t *v6group, ill_t *ill, int orig_ifindex, { ipif_t *ipif; ilm_t *ilm; - int ret; ASSERT(IAM_WRITER_ILL(ill)); @@ -995,20 +984,13 @@ ip_delmulti_v6(const in6_addr_t *v6group, ill_t *ill, int orig_ifindex, if (ilm_numentries_v6(ill, v6group) != 0) return (0); - /* - * If we never joined, then don't leave. This can happen - * if we're in an IPMP group, since only one ill per IPMP - * group receives all multicast packets. - */ - if (!ill->ill_join_allmulti) { - ASSERT(ill->ill_group != NULL); - return (0); + /* If we never joined, then don't leave. */ + if (ill->ill_join_allmulti) { + ill_leave_allmulti(ill); + if (ill->ill_group != NULL) + (void) ill_nominate_mcast_rcv(ill->ill_group); } - - ret = ip_leave_allmulti(ipif); - if (ill->ill_group != NULL) - (void) ill_nominate_mcast_rcv(ill->ill_group); - return (ret); + return (0); } if (!IS_LOOPBACK(ill)) @@ -1123,13 +1105,12 @@ ip_ll_delmulti_v6(ipif_t *ipif, const in6_addr_t *v6group) * one ill joining the allmulti group. */ int -ip_join_allmulti(ipif_t *ipif) +ill_join_allmulti(ill_t *ill) { - ill_t *ill = ipif->ipif_ill; - mblk_t *mp; + mblk_t *promiscon_mp, *promiscoff_mp; uint32_t addrlen, addroff; - ASSERT(IAM_WRITER_IPIF(ipif)); + ASSERT(IAM_WRITER_ILL(ill)); if (!ill->ill_dl_up) { /* @@ -1142,18 +1123,25 @@ ip_join_allmulti(ipif_t *ipif) ASSERT(!ill->ill_join_allmulti); /* - * Create a DL_PROMISCON_REQ message and send it directly to - * the DLPI provider. We don't need to do this for certain - * media types for which we never need to turn promiscuous - * mode on. + * Create a DL_PROMISCON_REQ message and send it directly to the DLPI + * provider. We don't need to do this for certain media types for + * which we never need to turn promiscuous mode on. While we're here, + * pre-allocate a DL_PROMISCOFF_REQ message to make sure that + * ill_leave_allmulti() will not fail due to low memory conditions. */ if ((ill->ill_net_type == IRE_IF_RESOLVER) && !(ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST)) { - mp = ill_create_dl(ill, DL_PROMISCON_REQ, + promiscon_mp = ill_create_dl(ill, DL_PROMISCON_REQ, sizeof (dl_promiscon_req_t), &addrlen, &addroff); - if (mp == NULL) + promiscoff_mp = ill_create_dl(ill, DL_PROMISCOFF_REQ, + sizeof (dl_promiscoff_req_t), &addrlen, &addroff); + if (promiscon_mp == NULL || promiscoff_mp == NULL) { + freemsg(promiscon_mp); + freemsg(promiscoff_mp); return (ENOMEM); - ill_dlpi_send(ill, mp); + } + ill->ill_promiscoff_mp = promiscoff_mp; + ill_dlpi_send(ill, promiscon_mp); } ill->ill_join_allmulti = B_TRUE; @@ -1166,21 +1154,19 @@ ip_join_allmulti(ipif_t *ipif) * With ill groups, we need to nominate some other ill as * this ipif->ipif_ill is leaving the group. */ -int -ip_leave_allmulti(ipif_t *ipif) +void +ill_leave_allmulti(ill_t *ill) { - ill_t *ill = ipif->ipif_ill; - mblk_t *mp; - uint32_t addrlen, addroff; + mblk_t *promiscoff_mp = ill->ill_promiscoff_mp; - ASSERT(IAM_WRITER_IPIF(ipif)); + ASSERT(IAM_WRITER_ILL(ill)); if (!ill->ill_dl_up) { /* * Nobody there. All multicast addresses will be re-joined * when we get the DL_BIND_ACK bringing the interface up. */ - return (0); + return; } ASSERT(ill->ill_join_allmulti); @@ -1193,18 +1179,97 @@ ip_leave_allmulti(ipif_t *ipif) */ if ((ill->ill_net_type == IRE_IF_RESOLVER) && !(ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST)) { - mp = ill_create_dl(ill, DL_PROMISCOFF_REQ, - sizeof (dl_promiscoff_req_t), &addrlen, &addroff); - if (mp == NULL) - return (ENOMEM); - ill_dlpi_send(ill, mp); + ASSERT(promiscoff_mp != NULL); + ill->ill_promiscoff_mp = NULL; + ill_dlpi_send(ill, promiscoff_mp); } ill->ill_join_allmulti = B_FALSE; +} + +static ill_t * +ipsq_enter_byifindex(uint_t ifindex, boolean_t isv6, ip_stack_t *ipst) +{ + ill_t *ill; + boolean_t in_ipsq; + + ill = ill_lookup_on_ifindex(ifindex, isv6, NULL, NULL, NULL, NULL, + ipst); + if (ill != NULL) { + if (!ill_waiter_inc(ill)) { + ill_refrele(ill); + return (NULL); + } + ill_refrele(ill); + in_ipsq = ipsq_enter(ill, B_FALSE); + ill_waiter_dcr(ill); + if (!in_ipsq) + ill = NULL; + } + return (ill); +} + +int +ip_join_allmulti(uint_t ifindex, boolean_t isv6, ip_stack_t *ipst) +{ + ill_t *ill; + int ret; + + if ((ill = ipsq_enter_byifindex(ifindex, isv6, ipst)) == NULL) + return (ENODEV); + if (isv6) { + ret = ip_addmulti_v6(&ipv6_all_zeros, ill, ifindex, + ill->ill_zoneid, ILGSTAT_NONE, MODE_IS_EXCLUDE, NULL); + } else { + ret = ip_addmulti(INADDR_ANY, ill->ill_ipif, ILGSTAT_NONE, + MODE_IS_EXCLUDE, NULL); + } + ill->ill_ipallmulti_cnt++; + ipsq_exit(ill->ill_phyint->phyint_ipsq); + return (ret); +} + +int +ip_leave_allmulti(uint_t ifindex, boolean_t isv6, ip_stack_t *ipst) +{ + ill_t *ill; + + if ((ill = ipsq_enter_byifindex(ifindex, isv6, ipst)) == NULL) + return (ENODEV); + ASSERT(ill->ill_ipallmulti_cnt != 0); + if (isv6) { + (void) ip_delmulti_v6(&ipv6_all_zeros, ill, ifindex, + ill->ill_zoneid, B_TRUE, B_TRUE); + } else { + (void) ip_delmulti(INADDR_ANY, ill->ill_ipif, B_TRUE, B_TRUE); + } + ill->ill_ipallmulti_cnt--; + ipsq_exit(ill->ill_phyint->phyint_ipsq); return (0); } /* + * Delete the allmulti memberships that were added as part of + * ip_join_allmulti(). + */ +void +ip_purge_allmulti(ill_t *ill) +{ + ASSERT(IAM_WRITER_ILL(ill)); + + for (; ill->ill_ipallmulti_cnt > 0; ill->ill_ipallmulti_cnt--) { + if (ill->ill_isv6) { + (void) ip_delmulti_v6(&ipv6_all_zeros, ill, + ill->ill_phyint->phyint_ifindex, ill->ill_zoneid, + B_TRUE, B_TRUE); + } else { + (void) ip_delmulti(INADDR_ANY, ill->ill_ipif, B_TRUE, + B_TRUE); + } + } +} + +/* * Copy mp_orig and pass it in as a local message. */ void @@ -1477,6 +1542,9 @@ ill_recover_multicast(ill_t *ill) char addrbuf[INET6_ADDRSTRLEN]; ASSERT(IAM_WRITER_ILL(ill)); + + ill->ill_need_recover_multicast = 0; + ILM_WALKER_HOLD(ill); for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { /* @@ -1492,7 +1560,7 @@ ill_recover_multicast(ill_t *ill) sizeof (addrbuf)))); if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr)) { if (ill->ill_group == NULL) { - (void) ip_join_allmulti(ill->ill_ipif); + (void) ill_join_allmulti(ill); } else { /* * We don't want to join on this ill, @@ -1522,6 +1590,9 @@ ill_leave_multicast(ill_t *ill) char addrbuf[INET6_ADDRSTRLEN]; ASSERT(IAM_WRITER_ILL(ill)); + + ill->ill_need_recover_multicast = 1; + ILM_WALKER_HOLD(ill); for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { /* @@ -1536,7 +1607,7 @@ ill_leave_multicast(ill_t *ill) inet_ntop(AF_INET6, &ilm->ilm_v6addr, addrbuf, sizeof (addrbuf)))); if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr)) { - (void) ip_leave_allmulti(ill->ill_ipif); + ill_leave_allmulti(ill); /* * If we were part of an IPMP group, then * ill_handoff_responsibility() has already diff --git a/usr/src/uts/common/inet/ip/ip_ndp.c b/usr/src/uts/common/inet/ip/ip_ndp.c index 2aec2675b8..b53897cefe 100644 --- a/usr/src/uts/common/inet/ip/ip_ndp.c +++ b/usr/src/uts/common/inet/ip/ip_ndp.c @@ -23,8 +23,6 @@ * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" - #include <sys/types.h> #include <sys/stream.h> #include <sys/stropts.h> @@ -2561,11 +2559,8 @@ ndp_timer(void *arg) "%s on %s", sbuf, ibuf); } if ((ipif->ipif_flags & IPIF_UP) && - !ipif->ipif_addr_ready) { - ip_rts_ifmsg(ipif); - ip_rts_newaddrmsg(RTM_ADD, 0, ipif); - sctp_update_ipif(ipif, SCTP_IPIF_UP); - } + !ipif->ipif_addr_ready) + ipif_up_notify(ipif); ipif->ipif_addr_ready = 1; ipif_refrele(ipif); } diff --git a/usr/src/uts/common/inet/ip/ip_netinfo.c b/usr/src/uts/common/inet/ip/ip_netinfo.c index b7ee4dbd6c..a34b55693e 100644 --- a/usr/src/uts/common/inet/ip/ip_netinfo.c +++ b/usr/src/uts/common/inet/ip/ip_netinfo.c @@ -59,6 +59,10 @@ static int ip_getmtu(net_handle_t, phy_if_t, lif_if_t); static int ip_getpmtuenabled(net_handle_t); static int ip_getlifaddr(net_handle_t, phy_if_t, lif_if_t, size_t, net_ifaddr_t [], void *); +static int ip_getlifzone(net_handle_t, phy_if_t, lif_if_t, + zoneid_t *); +static int ip_getlifflags(net_handle_t, phy_if_t, lif_if_t, + uint64_t *); static phy_if_t ip_phygetnext(net_handle_t, phy_if_t); static phy_if_t ip_phylookup(net_handle_t, const char *); static lif_if_t ip_lifgetnext(net_handle_t, phy_if_t, lif_if_t); @@ -73,6 +77,10 @@ static int ipv6_getifname(net_handle_t, phy_if_t, char *, static int ipv6_getmtu(net_handle_t, phy_if_t, lif_if_t); static int ipv6_getlifaddr(net_handle_t, phy_if_t, lif_if_t, size_t, net_ifaddr_t [], void *); +static int ipv6_getlifzone(net_handle_t, phy_if_t, lif_if_t, + zoneid_t *); +static int ipv6_getlifflags(net_handle_t, phy_if_t, lif_if_t, + uint64_t *); static phy_if_t ipv6_phygetnext(net_handle_t, phy_if_t); static phy_if_t ipv6_phylookup(net_handle_t, const char *); static lif_if_t ipv6_lifgetnext(net_handle_t, phy_if_t, lif_if_t); @@ -111,6 +119,8 @@ static net_protocol_t ipv4info = { ip_getmtu, ip_getpmtuenabled, ip_getlifaddr, + ip_getlifzone, + ip_getlifflags, ip_phygetnext, ip_phylookup, ip_lifgetnext, @@ -128,6 +138,8 @@ static net_protocol_t ipv6info = { ipv6_getmtu, ip_getpmtuenabled, ipv6_getlifaddr, + ipv6_getlifzone, + ipv6_getlifflags, ipv6_phygetnext, ipv6_phylookup, ipv6_lifgetnext, @@ -891,6 +903,7 @@ ip_inject_impl(inject_t style, net_inject_t *packet, boolean_t isv6, ip6_t *ip6h; ire_t *ire; mblk_t *mp; + zoneid_t zoneid; ASSERT(packet != NULL); ASSERT(packet->ni_packet != NULL); @@ -935,6 +948,8 @@ ip_inject_impl(inject_t style, net_inject_t *packet, boolean_t isv6, * provide similar functionality for IPv6. */ mp = packet->ni_packet; + zoneid = + netstackid_to_zoneid(ipst->ips_netstack->netstack_stackid); if (!isv6) { struct sockaddr *sock; @@ -946,8 +961,7 @@ ip_inject_impl(inject_t style, net_inject_t *packet, boolean_t isv6, * Currently this function only supports IPv4. */ switch (ipfil_sendpkt(sock, mp, packet->ni_physical, - netstackid_to_zoneid( - ipst->ips_netstack->netstack_stackid))) { + zoneid)) { case 0 : case EINPROGRESS: return (0); @@ -989,7 +1003,7 @@ ip_inject_impl(inject_t style, net_inject_t *packet, boolean_t isv6, ire->ire_ipif->ipif_ill, ipha_t *, NULL, ip6_t *, ip6h, int, 1); ip_wput_local_v6(ire->ire_rfq, - ire->ire_ipif->ipif_ill, ip6h, mp, ire, 0); + ire->ire_ipif->ipif_ill, ip6h, mp, ire, 0, zoneid); ire_refrele(ire); return (0); } @@ -1409,6 +1423,78 @@ ip_getifaddr_type(sa_family_t family, ipif_t *ill_ipif, } /* + * Shared implementation to determine the zoneid associated with an IPv4/IPv6 + * address + */ +static int +ip_getlifzone_impl(sa_family_t family, phy_if_t phy_ifdata, lif_if_t ifdata, + ip_stack_t *ipst, zoneid_t *zoneid) +{ + ipif_t *ipif; + + ipif = ipif_getby_indexes((uint_t)phy_ifdata, + UNMAP_IPIF_ID((uint_t)ifdata), (family == AF_INET6), ipst); + if (ipif == NULL) + return (-1); + *zoneid = IP_REAL_ZONEID(ipif->ipif_zoneid, ipst); + ipif_refrele(ipif); + return (0); +} + +/* + * Determine the zoneid associated with an IPv4 address + */ +static int +ip_getlifzone(net_handle_t neti, phy_if_t phy_ifdata, lif_if_t ifdata, + zoneid_t *zoneid) +{ + return (ip_getlifzone_impl(AF_INET, phy_ifdata, ifdata, + neti->netd_stack->nts_netstack->netstack_ip, zoneid)); +} + +/* + * Determine the zoneid associated with an IPv6 address + */ +static int +ipv6_getlifzone(net_handle_t neti, phy_if_t phy_ifdata, lif_if_t ifdata, + zoneid_t *zoneid) +{ + return (ip_getlifzone_impl(AF_INET6, phy_ifdata, ifdata, + neti->netd_stack->nts_netstack->netstack_ip, zoneid)); +} + +static int +ip_getlifflags_impl(sa_family_t family, phy_if_t phy_ifdata, lif_if_t ifdata, + ip_stack_t *ipst, uint64_t *flags) +{ + ipif_t *ipif; + + ipif = ipif_getby_indexes((uint_t)phy_ifdata, + UNMAP_IPIF_ID((uint_t)ifdata), (family == AF_INET6), ipst); + if (ipif == NULL) + return (-1); + *flags = ipif->ipif_flags; + ipif_refrele(ipif); + return (0); +} + +static int +ip_getlifflags(net_handle_t neti, phy_if_t phy_ifdata, lif_if_t ifdata, + uint64_t *flags) +{ + return (ip_getlifflags_impl(AF_INET, phy_ifdata, ifdata, + neti->netd_stack->nts_netstack->netstack_ip, flags)); +} + +static int +ipv6_getlifflags(net_handle_t neti, phy_if_t phy_ifdata, lif_if_t ifdata, + uint64_t *flags) +{ + return (ip_getlifflags_impl(AF_INET6, phy_ifdata, ifdata, + neti->netd_stack->nts_netstack->netstack_ip, flags)); +} + +/* * Deliver packet up into the kernel, immitating its reception by a * network interface. */ diff --git a/usr/src/uts/common/inet/ip6.h b/usr/src/uts/common/inet/ip6.h index 9fb4b88367..1dbe8c3dd1 100644 --- a/usr/src/uts/common/inet/ip6.h +++ b/usr/src/uts/common/inet/ip6.h @@ -19,15 +19,13 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #ifndef _INET_IP6_H #define _INET_IP6_H -#pragma ident "%Z%%M% %I% %E% SMI" - #ifdef __cplusplus extern "C" { #endif @@ -366,7 +364,7 @@ extern int ip_total_hdrs_len_v6(ip6_pkt_t *); extern int ipsec_ah_get_hdr_size_v6(mblk_t *, boolean_t); extern void ip_wput_v6(queue_t *, mblk_t *); extern void ip_wput_local_v6(queue_t *, ill_t *, ip6_t *, mblk_t *, - ire_t *, int); + ire_t *, int, zoneid_t); extern void ip_output_v6(void *, mblk_t *, void *, int); extern void ip_xmit_v6(mblk_t *, ire_t *, uint_t, conn_t *, int, struct ipsec_out_s *); diff --git a/usr/src/uts/common/inet/ip_if.h b/usr/src/uts/common/inet/ip_if.h index 8c63081ca6..1bd5b47a9f 100644 --- a/usr/src/uts/common/inet/ip_if.h +++ b/usr/src/uts/common/inet/ip_if.h @@ -27,8 +27,6 @@ #ifndef _INET_IP_IF_H #define _INET_IP_IF_H -#pragma ident "%Z%%M% %I% %E% SMI" - #include <net/route.h> #ifdef __cplusplus @@ -260,6 +258,7 @@ extern int ipif_ndp_up(ipif_t *); extern int ipif_ndp_setup_multicast(ipif_t *, struct nce_s **); extern int ipif_up_done(ipif_t *); extern int ipif_up_done_v6(ipif_t *); +extern void ipif_up_notify(ipif_t *); extern void ipif_update_other_ipifs_v6(ipif_t *, ill_group_t *); extern void ipif_recreate_interface_routes_v6(ipif_t *, ipif_t *); extern void ill_update_source_selection(ill_t *); diff --git a/usr/src/uts/common/inet/ip_impl.h b/usr/src/uts/common/inet/ip_impl.h index 351b63ed98..d993e5f6b4 100644 --- a/usr/src/uts/common/inet/ip_impl.h +++ b/usr/src/uts/common/inet/ip_impl.h @@ -26,8 +26,6 @@ #ifndef _INET_IP_IMPL_H #define _INET_IP_IMPL_H -#pragma ident "%Z%%M% %I% %E% SMI" - /* * IP implementation private declarations. These interfaces are * used to build the IP module and are not meant to be accessed @@ -507,7 +505,7 @@ typedef struct ip_pdescinfo_s PDESCINFO_STRUCT(2) ip_pdescinfo_t; * Macro that hands off one or more messages directly to DLD * when the interface is marked with ILL_CAPAB_POLL. */ -#define IP_DLS_ILL_TX(ill, ipha, mp, ipst) { \ +#define IP_DLS_ILL_TX(ill, ipha, mp, ipst, hlen) { \ ill_dls_capab_t *ill_dls = ill->ill_dls_capab; \ ASSERT(ILL_DLS_CAPABLE(ill)); \ ASSERT(ill_dls != NULL); \ @@ -520,7 +518,15 @@ typedef struct ip_pdescinfo_s PDESCINFO_STRUCT(2) ip_pdescinfo_t; ipst->ips_ipv4firewall_physical_out, \ NULL, ill, ipha, mp, mp, 0, ipst); \ DTRACE_PROBE1(ip4__physical__out__end, mblk_t *, mp); \ - if (mp != NULL) { \ + if (mp != NULL) { \ + if (ipst->ips_ipobs_enabled) { \ + zoneid_t szone; \ + \ + szone = ip_get_zoneid_v4(ipha->ipha_src, mp, \ + ipst, ALL_ZONES); \ + ipobs_hook(mp, IPOBS_HOOK_OUTBOUND, szone, \ + ALL_ZONES, ill, IPV4_VERSION, hlen, ipst); \ + } \ DTRACE_IP7(send, mblk_t *, mp, conn_t *, NULL, \ void_ip_t *, ipha, __dtrace_ipsr_ill_t *, ill, \ ipha_t *, ipha, ip6_t *, NULL, int, 0); \ @@ -528,9 +534,23 @@ typedef struct ip_pdescinfo_s PDESCINFO_STRUCT(2) ip_pdescinfo_t; } \ } +/* + * In non-global zone exclusive IP stacks, data structures such as IRE + * entries pretend that they're in the global zone. The following + * macro evaluates to the real zoneid instead of a pretend + * GLOBAL_ZONEID. + */ +#define IP_REAL_ZONEID(zoneid, ipst) \ + (((zoneid) == GLOBAL_ZONEID) ? \ + netstackid_to_zoneid((ipst)->ips_netstack->netstack_stackid) : \ + (zoneid)) + extern int ip_wput_frag_mdt_min; extern boolean_t ip_can_frag_mdt(mblk_t *, ssize_t, ssize_t); extern mblk_t *ip_prepend_zoneid(mblk_t *, zoneid_t, ip_stack_t *); +extern zoneid_t ip_get_zoneid_v4(ipaddr_t, mblk_t *, ip_stack_t *, zoneid_t); +extern zoneid_t ip_get_zoneid_v6(in6_addr_t *, mblk_t *, const ill_t *, + ip_stack_t *, zoneid_t); #endif /* _KERNEL */ diff --git a/usr/src/uts/common/inet/ip_multi.h b/usr/src/uts/common/inet/ip_multi.h index 47aac771b9..a3f4282cc7 100644 --- a/usr/src/uts/common/inet/ip_multi.h +++ b/usr/src/uts/common/inet/ip_multi.h @@ -27,8 +27,6 @@ #ifndef _INET_IP_MULTI_H #define _INET_IP_MULTI_H -#pragma ident "%Z%%M% %I% %E% SMI" - #ifdef __cplusplus extern "C" { #endif @@ -99,8 +97,11 @@ extern int ip_addmulti_v6(const in6_addr_t *, ill_t *, int, extern int ip_delmulti(ipaddr_t, ipif_t *, boolean_t, boolean_t); extern int ip_delmulti_v6(const in6_addr_t *, ill_t *, int, zoneid_t, boolean_t, boolean_t); -extern int ip_join_allmulti(ipif_t *); -extern int ip_leave_allmulti(ipif_t *); +extern int ill_join_allmulti(ill_t *); +extern void ill_leave_allmulti(ill_t *); +extern int ip_join_allmulti(uint_t, boolean_t, ip_stack_t *); +extern int ip_leave_allmulti(uint_t, boolean_t, ip_stack_t *); +extern void ip_purge_allmulti(ill_t *); extern void ip_multicast_loopback(queue_t *, ill_t *, mblk_t *, int, zoneid_t); extern int ip_mforward(ill_t *, ipha_t *, mblk_t *); diff --git a/usr/src/uts/common/inet/ip_stack.h b/usr/src/uts/common/inet/ip_stack.h index 48eeea10dd..b788b95fa0 100644 --- a/usr/src/uts/common/inet/ip_stack.h +++ b/usr/src/uts/common/inet/ip_stack.h @@ -412,6 +412,12 @@ struct ip_stack { net_handle_t ips_ipv4_net_data; net_handle_t ips_ipv6_net_data; + + boolean_t ips_ipobs_enabled; + list_t ips_ipobs_cb_list; + kmutex_t ips_ipobs_cb_lock; + uint_t ips_ipobs_cb_nwalkers; + kcondvar_t ips_ipobs_cb_cv; }; typedef struct ip_stack ip_stack_t; diff --git a/usr/src/uts/common/inet/ipnet.h b/usr/src/uts/common/inet/ipnet.h new file mode 100644 index 0000000000..234b14f1d6 --- /dev/null +++ b/usr/src/uts/common/inet/ipnet.h @@ -0,0 +1,201 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _INET_IPNET_H +#define _INET_IPNET_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include <sys/types.h> +#include <sys/netstack.h> +#include <sys/list.h> +#include <netinet/in.h> +#include <net/if.h> +#include <sys/avl.h> +#include <sys/neti.h> + +/* + * Structure used to hold information for both IPv4 and IPv6 addresses. + */ +typedef struct ipnetif_addr { + union { + ipaddr_t ifau_ip4addr; + in6_addr_t ifau_ip6addr; + } ifa_addr; + ipaddr_t ifa_brdaddr; + zoneid_t ifa_zone; + uint64_t ifa_id; + list_node_t ifa_link; +} ipnetif_addr_t; +#define ifa_ip4addr ifa_addr.ifau_ip4addr +#define ifa_ip6addr ifa_addr.ifau_ip6addr + +/* + * Structure describes the ipnet module representation of an ip interface. + * The structure holds both IPv4 and IPv6 addresses, the address lists are + * protected by a mutex. The ipnetif structures are held per stack instance + * within avl trees indexed on name and ip index. + */ +typedef struct ipnetif { + char if_name[LIFNAMSIZ]; + uint_t if_flags; + uint64_t if_index; + kmutex_t if_addr_lock; /* protects both addr lists */ + list_t if_ip4addr_list; + list_t if_ip6addr_list; + avl_node_t if_avl_by_index; + avl_node_t if_avl_by_name; + dev_t if_dev; + uint_t if_multicnt; /* protected by ips_event_lock */ + kmutex_t if_reflock; /* protects if_refcnt */ + uint_t if_refcnt; +} ipnetif_t; + +/* if_flags */ +#define IPNETIF_IPV4PLUMBED 0x01 +#define IPNETIF_IPV6PLUMBED 0x02 +#define IPNETIF_IPV4ALLMULTI 0x04 +#define IPNETIF_IPV6ALLMULTI 0x08 + +/* + * Structure used by the accept callback function. This is simply an address + * pointer into a packet (either IPv4 or IPv6), along with an address family + * that denotes which pointer is valid. + */ +typedef struct ipnet_addrp { + sa_family_t iap_family; + union { + ipaddr_t *iapu_addr4; + in6_addr_t *iapu_addr6; + } iap_addrp; +} ipnet_addrp_t; +#define iap_addr4 iap_addrp.iapu_addr4 +#define iap_addr6 iap_addrp.iapu_addr6 + +struct ipnet; +struct ipobs_hook_data; +typedef boolean_t ipnet_acceptfn_t(struct ipnet *, struct ipobs_hook_data *, + ipnet_addrp_t *, ipnet_addrp_t *); + +/* + * Per instance data for all open streams. Instance data is held on a + * per netstack list see struct ipnet_stack below. + */ +typedef struct ipnet { + queue_t *ipnet_rq; /* read queue pointer */ + minor_t ipnet_minor; /* minor number for this instance */ + ipnetif_t *ipnet_if; /* ipnetif for this open instance */ + zoneid_t ipnet_zoneid; /* zoneid the device was opened in */ + uint16_t ipnet_flags; /* see below */ + t_scalar_t ipnet_sap; /* sap this instance is bound to */ + t_uscalar_t ipnet_dlstate; /* dlpi state */ + list_node_t ipnet_next; /* list next member */ + netstack_t *ipnet_ns; /* netstack of zone we were opened in */ + ipnet_acceptfn_t *ipnet_acceptfn; /* accept callback function pointer */ +} ipnet_t; + +/* ipnet_flags */ +#define IPNET_PROMISC_PHYS 0x01 +#define IPNET_PROMISC_MULTI 0x02 +#define IPNET_PROMISC_SAP 0x04 +#define IPNET_INFO 0x08 +#define IPNET_LOMODE 0x10 + +/* + * Per-netstack data holding: + * - net_handle_t references for IPv4 and IPv6 for this netstack. + * - avl trees by name and index for ip interfaces associated with this + * netstack. The trees are protected by ips_avl_lock. + * - ips_str_list is a list of open client streams. ips_walkers_lock in + * conjunction with ips_walkers_cv and ips_walkers_cnt synchronize access to + * the list. The count is incremented in ipnet_dispatch() at the start of a + * walk and decremented when the walk is finished. If the walkers count is 0 + * then we cv_broadcast() waiting any threads waiting on the walkers count. + * - ips_event_lock synchronizes ipnet_if_init() and incoming NIC info events. + * We cannot be processing any NIC info events while initializing interfaces + * in ipnet_if_init(). + * + * Note on lock ordering: If a thread needs to both hold the ips_event_lock + * and any other lock such as ips_walkers_lock, ips_avl_lock, or if_addr_lock, + * the ips_event_lock must be held first. This lock ordering is mandated by + * ipnet_nicevent_cb() which must always grab ips_event_lock before continuing + * with processing NIC events. + */ +typedef struct ipnet_stack { + net_handle_t ips_ndv4; + net_handle_t ips_ndv6; + netstack_t *ips_netstack; + hook_t *ips_nicevents; + kmutex_t ips_event_lock; + kmutex_t ips_avl_lock; + avl_tree_t ips_avl_by_index; + avl_tree_t ips_avl_by_name; + kmutex_t ips_walkers_lock; + kcondvar_t ips_walkers_cv; + uint_t ips_walkers_cnt; + list_t ips_str_list; + uint64_t ips_drops; +} ipnet_stack_t; + +/* + * Template for dl_info_ack_t initialization. We don't have an address, so we + * set the address length to just the SAP length (16 bits). We don't really + * have a maximum SDU, but setting it to UINT_MAX proved problematic with + * applications that performed arithmetic on dl_max_sdu and wrapped around, so + * we sleaze out and use INT_MAX. + */ +#define IPNET_INFO_ACK_INIT { \ + DL_INFO_ACK, /* dl_primitive */ \ + INT_MAX, /* dl_max_sdu */ \ + 0, /* dl_min_sdu */ \ + sizeof (uint16_t), /* dl_addr_length */ \ + DL_IPNET, /* dl_mac_type */ \ + 0, /* dl_reserved */ \ + 0, /* dl_current_state */ \ + sizeof (uint16_t), /* dl_sap_length */ \ + DL_CLDLS, /* dl_service_mode */ \ + 0, /* dl_qos_length */ \ + 0, /* dl_qos_offset */ \ + 0, /* dl_range_length */ \ + 0, /* dl_range_offset */ \ + DL_STYLE1, /* dl_provider_style */ \ + 0, /* dl_addr_offset */ \ + DL_VERSION_2, /* dl_version */ \ + 0, /* dl_brdcst_addr_length */ \ + 0 /* dl_brdcst_addr_offset */ \ +} + +typedef void ipnet_walkfunc_t(const char *, void *, dev_t); +extern void ipnet_walk_if(ipnet_walkfunc_t *, void *, zoneid_t); +extern dev_t ipnet_if_getdev(char *, zoneid_t); + +#ifdef __cplusplus +} +#endif + +#endif /* _INET_IPNET_H */ diff --git a/usr/src/uts/common/inet/ipnet/ipnet.c b/usr/src/uts/common/inet/ipnet/ipnet.c new file mode 100644 index 0000000000..78806d3ddb --- /dev/null +++ b/usr/src/uts/common/inet/ipnet/ipnet.c @@ -0,0 +1,1722 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * The ipnet device defined here provides access to packets at the IP layer. To + * provide access to packets at this layer it registers a callback function in + * the ip module and when there are open instances of the device ip will pass + * packets into the device. Packets from ip are passed on the input, output and + * loopback paths. Internally the module returns to ip as soon as possible by + * deferring processing using a taskq. + * + * Management of the devices in /dev/ipnet/ is handled by the devname + * filesystem and use of the neti interfaces. This module registers for NIC + * events using the neti framework so that when IP interfaces are bought up, + * taken down etc. the ipnet module is notified and its view of the interfaces + * configured on the system adjusted. On attach, the module gets an initial + * view of the system again using the neti framework but as it has already + * registered for IP interface events, it is still up-to-date with any changes. + */ + +#include <sys/types.h> +#include <sys/conf.h> +#include <sys/cred.h> +#include <sys/stat.h> +#include <sys/ddi.h> +#include <sys/sunddi.h> +#include <sys/modctl.h> +#include <sys/dlpi.h> +#include <sys/strsun.h> +#include <sys/id_space.h> +#include <sys/kmem.h> +#include <sys/mkdev.h> +#include <sys/neti.h> +#include <net/if.h> +#include <sys/errno.h> +#include <sys/list.h> +#include <sys/ksynch.h> +#include <sys/hook_event.h> +#include <sys/stropts.h> +#include <sys/sysmacros.h> +#include <inet/ip.h> +#include <inet/ip_multi.h> +#include <inet/ip6.h> +#include <inet/ipnet.h> + +static struct module_info ipnet_minfo = { + 1, /* mi_idnum */ + "ipnet", /* mi_idname */ + 0, /* mi_minpsz */ + INFPSZ, /* mi_maxpsz */ + 2048, /* mi_hiwat */ + 0 /* mi_lowat */ +}; + +/* + * List to hold static view of ipnetif_t's on the system. This is needed to + * avoid holding the lock protecting the avl tree of ipnetif's over the + * callback into the dev filesystem. + */ +typedef struct ipnetif_cbdata { + char ic_ifname[LIFNAMSIZ]; + dev_t ic_dev; + list_node_t ic_next; +} ipnetif_cbdata_t; + +/* + * Convenience enumerated type for ipnet_accept(). It describes the + * properties of a given ipnet_addrp_t relative to a single ipnet_t + * client stream. The values represent whether the address is ... + */ +typedef enum { + IPNETADDR_MYADDR, /* an address on my ipnetif_t. */ + IPNETADDR_MBCAST, /* a multicast or broadcast address. */ + IPNETADDR_UNKNOWN /* none of the above. */ +} ipnet_addrtype_t; + +/* Argument used for the ipnet_nicevent_taskq callback. */ +typedef struct ipnet_nicevent_s { + nic_event_t ipne_event; + net_handle_t ipne_protocol; + netstackid_t ipne_stackid; + uint64_t ipne_ifindex; + uint64_t ipne_lifindex; + char ipne_ifname[LIFNAMSIZ]; +} ipnet_nicevent_t; + +static dev_info_t *ipnet_dip; +static major_t ipnet_major; +static ddi_taskq_t *ipnet_taskq; /* taskq for packets */ +static ddi_taskq_t *ipnet_nicevent_taskq; /* taskq for NIC events */ +static id_space_t *ipnet_minor_space; +static const int IPNET_MINOR_LO = 1; /* minor number for /dev/lo0 */ +static const int IPNET_MINOR_MIN = 2; /* start of dynamic minors */ +static dl_info_ack_t ipnet_infoack = IPNET_INFO_ACK_INIT; +static ipnet_acceptfn_t ipnet_accept, ipnet_loaccept; + +static void ipnet_input(mblk_t *); +static int ipnet_wput(queue_t *, mblk_t *); +static int ipnet_rsrv(queue_t *); +static int ipnet_open(queue_t *, dev_t *, int, int, cred_t *); +static int ipnet_close(queue_t *); +static void ipnet_ioctl(queue_t *, mblk_t *); +static void ipnet_iocdata(queue_t *, mblk_t *); +static void ipnet_wputnondata(queue_t *, mblk_t *); +static int ipnet_attach(dev_info_t *, ddi_attach_cmd_t); +static int ipnet_detach(dev_info_t *, ddi_detach_cmd_t); +static int ipnet_devinfo(dev_info_t *, ddi_info_cmd_t, void *, void **); +static void ipnet_inforeq(queue_t *q, mblk_t *mp); +static void ipnet_bindreq(queue_t *q, mblk_t *mp); +static void ipnet_unbindreq(queue_t *q, mblk_t *mp); +static void ipnet_dlpromisconreq(queue_t *q, mblk_t *mp); +static void ipnet_dlpromiscoffreq(queue_t *q, mblk_t *mp); +static int ipnet_join_allmulti(ipnetif_t *, ipnet_stack_t *); +static void ipnet_leave_allmulti(ipnetif_t *, ipnet_stack_t *); +static int ipnet_nicevent_cb(hook_event_token_t, hook_data_t, void *); +static void ipnet_nicevent_task(void *); +static ipnetif_t *ipnet_create_if(const char *, uint64_t, ipnet_stack_t *); +static void ipnet_remove_if(ipnetif_t *, ipnet_stack_t *); +static ipnetif_addr_t *ipnet_match_lif(ipnetif_t *, lif_if_t, boolean_t); +static ipnetif_t *ipnet_if_getby_index(uint64_t, ipnet_stack_t *); +static ipnetif_t *ipnet_if_getby_dev(dev_t, ipnet_stack_t *); +static boolean_t ipnet_if_in_zone(ipnetif_t *, zoneid_t, ipnet_stack_t *); +static void ipnet_if_zonecheck(ipnetif_t *, ipnet_stack_t *); +static int ipnet_populate_if(net_handle_t, ipnet_stack_t *, boolean_t); +static int ipnet_if_compare_name(const void *, const void *); +static int ipnet_if_compare_index(const void *, const void *); +static void ipnet_add_ifaddr(uint64_t, ipnetif_t *, net_handle_t); +static void ipnet_delete_ifaddr(ipnetif_addr_t *, ipnetif_t *, boolean_t); +static void ipnetif_refhold(ipnetif_t *); +static void ipnetif_refrele(ipnetif_t *); +static void ipnet_walkers_inc(ipnet_stack_t *); +static void ipnet_walkers_dec(ipnet_stack_t *); +static void ipnet_register_netihook(ipnet_stack_t *); +static void *ipnet_stack_init(netstackid_t, netstack_t *); +static void ipnet_stack_fini(netstackid_t, void *); + +static struct qinit ipnet_rinit = { + NULL, /* qi_putp */ + ipnet_rsrv, /* qi_srvp */ + ipnet_open, /* qi_qopen */ + ipnet_close, /* qi_qclose */ + NULL, /* qi_qadmin */ + &ipnet_minfo, /* qi_minfo */ +}; + +static struct qinit ipnet_winit = { + ipnet_wput, /* qi_putp */ + NULL, /* qi_srvp */ + NULL, /* qi_qopen */ + NULL, /* qi_qclose */ + NULL, /* qi_qadmin */ + &ipnet_minfo, /* qi_minfo */ +}; + +static struct streamtab ipnet_info = { + &ipnet_rinit, &ipnet_winit +}; + +DDI_DEFINE_STREAM_OPS(ipnet_ops, nulldev, nulldev, ipnet_attach, + ipnet_detach, nodev, ipnet_devinfo, D_MP | D_MTPERMOD, &ipnet_info, + ddi_quiesce_not_supported); + +static struct modldrv modldrv = { + &mod_driverops, + "STREAMS ipnet driver", + &ipnet_ops +}; + +static struct modlinkage modlinkage = { + MODREV_1, &modldrv, NULL +}; + +/* + * Walk the list of physical interfaces on the machine, for each + * interface create a new ipnetif_t and add any addresses to it. We + * need to do the walk twice, once for IPv4 and once for IPv6. + * + * The interfaces are destroyed as part of ipnet_stack_fini() for each + * stack. Note that we cannot do this initialization in + * ipnet_stack_init(), since ipnet_stack_init() cannot fail. + */ +static int +ipnet_if_init(void) +{ + netstack_handle_t nh; + netstack_t *ns; + ipnet_stack_t *ips; + int ret = 0; + + netstack_next_init(&nh); + while ((ns = netstack_next(&nh)) != NULL) { + ips = ns->netstack_ipnet; + if ((ret = ipnet_populate_if(ips->ips_ndv4, ips, B_FALSE)) != 0) + break; + if ((ret = ipnet_populate_if(ips->ips_ndv6, ips, B_TRUE)) != 0) + break; + } + netstack_next_fini(&nh); + return (ret); +} + +/* + * Standard module entry points. + */ +int +_init(void) +{ + int ret; + + if ((ipnet_major = ddi_name_to_major("ipnet")) == (major_t)-1) + return (ENODEV); + ipnet_minor_space = id_space_create("ipnet_minor_space", + IPNET_MINOR_MIN, MAXMIN32); + netstack_register(NS_IPNET, ipnet_stack_init, NULL, ipnet_stack_fini); + /* + * We call ddi_taskq_create() with nthread == 1 to ensure in-order + * delivery of packets to clients. + */ + ipnet_taskq = ddi_taskq_create(NULL, "ipnet", 1, TASKQ_DEFAULTPRI, 0); + ipnet_nicevent_taskq = ddi_taskq_create(NULL, "ipnet_nic_event_queue", + 1, TASKQ_DEFAULTPRI, 0); + if (ipnet_taskq == NULL || ipnet_nicevent_taskq == NULL) { + ret = ENOMEM; + goto done; + } + if ((ret = ipnet_if_init()) == 0) + ret = mod_install(&modlinkage); +done: + if (ret != 0) { + if (ipnet_taskq != NULL) + ddi_taskq_destroy(ipnet_taskq); + if (ipnet_nicevent_taskq != NULL) + ddi_taskq_destroy(ipnet_nicevent_taskq); + netstack_unregister(NS_IPNET); + id_space_destroy(ipnet_minor_space); + } + return (ret); +} + +int +_fini(void) +{ + int err; + + if ((err = mod_remove(&modlinkage)) != 0) + return (err); + ddi_taskq_destroy(ipnet_nicevent_taskq); + ddi_taskq_destroy(ipnet_taskq); + netstack_unregister(NS_IPNET); + id_space_destroy(ipnet_minor_space); + return (0); +} + +int +_info(struct modinfo *modinfop) +{ + return (mod_info(&modlinkage, modinfop)); +} + +static void +ipnet_register_netihook(ipnet_stack_t *ips) +{ + int ret; + netstackid_t stackid = ips->ips_netstack->netstack_stackid; + + HOOK_INIT(ips->ips_nicevents, ipnet_nicevent_cb, "ipnet_nicevents", + ips); + + /* + * The ipnet device depends on ip and is registered in the netstack + * framework after ip so the call to net_lookup_impl() cannot fail. + */ + ips->ips_ndv4 = net_protocol_lookup(stackid, NHF_INET); + ips->ips_ndv6 = net_protocol_lookup(stackid, NHF_INET6); + + ret = net_hook_register(ips->ips_ndv4, NH_NIC_EVENTS, + ips->ips_nicevents); + if (ret != 0) { + cmn_err(CE_WARN, "ipnet_register_netihook: net_register_hook() " + "failed for v4 stack instance %d: %d", stackid, ret); + } + ret = net_hook_register(ips->ips_ndv6, NH_NIC_EVENTS, + ips->ips_nicevents); + if (ret != 0) { + cmn_err(CE_WARN, "ipnet_register_netihook: net_register_hook() " + "failed for v6 stack instance %d: %d", stackid, ret); + } +} + +/* + * This function is called on attach to build an initial view of the + * interfaces on the system. It will be called once for IPv4 and once + * for IPv6, although there is only one ipnet interface for both IPv4 + * and IPv6 there are separate address lists. + */ +static int +ipnet_populate_if(net_handle_t nd, ipnet_stack_t *ips, boolean_t isv6) +{ + phy_if_t phyif; + lif_if_t lif; + ipnetif_t *ipnetif; + char name[LIFNAMSIZ]; + boolean_t new_if = B_FALSE; + uint64_t ifflags; + int ret = 0; + + /* + * Make sure we're not processing NIC events during the + * population of our interfaces and address lists. + */ + mutex_enter(&ips->ips_event_lock); + + for (phyif = net_phygetnext(nd, 0); phyif != 0; + phyif = net_phygetnext(nd, phyif)) { + if (net_getifname(nd, phyif, name, LIFNAMSIZ) != 0) + continue; + if ((ipnetif = ipnet_if_getby_index(phyif, ips)) == NULL) { + ipnetif = ipnet_create_if(name, phyif, ips); + if (ipnetif == NULL) { + ret = ENOMEM; + goto done; + } + new_if = B_TRUE; + } + ipnetif->if_flags |= + isv6 ? IPNETIF_IPV6PLUMBED : IPNETIF_IPV4PLUMBED; + + for (lif = net_lifgetnext(nd, phyif, 0); lif != 0; + lif = net_lifgetnext(nd, phyif, lif)) { + /* + * Skip addresses that aren't up. We'll add + * them when we receive an NE_LIF_UP event. + */ + if (net_getlifflags(nd, phyif, lif, &ifflags) != 0 || + !(ifflags & IFF_UP)) + continue; + /* Don't add it if we already have it. */ + if (ipnet_match_lif(ipnetif, lif, isv6) != NULL) + continue; + ipnet_add_ifaddr(lif, ipnetif, nd); + } + if (!new_if) + ipnetif_refrele(ipnetif); + } + +done: + mutex_exit(&ips->ips_event_lock); + return (ret); +} + +static int +ipnet_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) +{ + if (cmd != DDI_ATTACH) + return (DDI_FAILURE); + + if (ddi_create_minor_node(dip, "lo0", S_IFCHR, IPNET_MINOR_LO, + DDI_PSEUDO, 0) == DDI_FAILURE) + return (DDI_FAILURE); + + ipnet_dip = dip; + return (DDI_SUCCESS); +} + +static int +ipnet_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) +{ + if (cmd != DDI_DETACH) + return (DDI_FAILURE); + + ASSERT(dip == ipnet_dip); + ddi_remove_minor_node(ipnet_dip, NULL); + ipnet_dip = NULL; + return (DDI_SUCCESS); +} + +/* ARGSUSED */ +static int +ipnet_devinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) +{ + int error = DDI_FAILURE; + + switch (infocmd) { + case DDI_INFO_DEVT2INSTANCE: + *result = (void *)0; + error = DDI_SUCCESS; + break; + case DDI_INFO_DEVT2DEVINFO: + if (ipnet_dip != NULL) { + *result = ipnet_dip; + error = DDI_SUCCESS; + } + break; + } + return (error); +} + +/* ARGSUSED */ +static int +ipnet_open(queue_t *rq, dev_t *dev, int oflag, int sflag, cred_t *crp) +{ + ipnet_t *ipnet; + netstack_t *ns = NULL; + ipnet_stack_t *ips; + int err = 0; + zoneid_t zoneid = crgetzoneid(crp); + + /* + * If the system is labeled, only the global zone is allowed to open + * IP observability nodes. + */ + if (is_system_labeled() && zoneid != GLOBAL_ZONEID) + return (EACCES); + + /* We don't support open as a module */ + if (sflag & MODOPEN) + return (ENOTSUP); + + /* This driver is self-cloning, we don't support re-open. */ + if (rq->q_ptr != NULL) + return (EBUSY); + + if ((ipnet = kmem_zalloc(sizeof (*ipnet), KM_NOSLEEP)) == NULL) + return (ENOMEM); + + VERIFY((ns = netstack_find_by_cred(crp)) != NULL); + ips = ns->netstack_ipnet; + + rq->q_ptr = WR(rq)->q_ptr = ipnet; + ipnet->ipnet_rq = rq; + ipnet->ipnet_minor = (minor_t)id_alloc(ipnet_minor_space); + ipnet->ipnet_zoneid = zoneid; + ipnet->ipnet_dlstate = DL_UNBOUND; + ipnet->ipnet_sap = 0; + ipnet->ipnet_ns = ns; + + /* + * We need to hold ips_event_lock here as any NE_LIF_DOWN events need + * to be processed after ipnet_if is set and the ipnet_t has been + * inserted in the ips_str_list. + */ + mutex_enter(&ips->ips_event_lock); + if (getminor(*dev) == IPNET_MINOR_LO) { + ipnet->ipnet_flags |= IPNET_LOMODE; + ipnet->ipnet_acceptfn = ipnet_loaccept; + } else { + ipnet->ipnet_acceptfn = ipnet_accept; + ipnet->ipnet_if = ipnet_if_getby_dev(*dev, ips); + if (ipnet->ipnet_if == NULL || + !ipnet_if_in_zone(ipnet->ipnet_if, zoneid, ips)) { + err = ENODEV; + goto done; + } + } + + mutex_enter(&ips->ips_walkers_lock); + while (ips->ips_walkers_cnt != 0) + cv_wait(&ips->ips_walkers_cv, &ips->ips_walkers_lock); + list_insert_head(&ips->ips_str_list, ipnet); + *dev = makedevice(getmajor(*dev), ipnet->ipnet_minor); + qprocson(rq); + + /* + * Only register our callback if we're the first open client; we call + * unregister in close() for the last open client. + */ + if (list_head(&ips->ips_str_list) == list_tail(&ips->ips_str_list)) + ipobs_register_hook(ns, ipnet_input); + mutex_exit(&ips->ips_walkers_lock); + +done: + mutex_exit(&ips->ips_event_lock); + if (err != 0) { + netstack_rele(ns); + id_free(ipnet_minor_space, ipnet->ipnet_minor); + if (ipnet->ipnet_if != NULL) + ipnetif_refrele(ipnet->ipnet_if); + kmem_free(ipnet, sizeof (*ipnet)); + } + return (err); +} + +static int +ipnet_close(queue_t *rq) +{ + ipnet_t *ipnet = rq->q_ptr; + ipnet_stack_t *ips = ipnet->ipnet_ns->netstack_ipnet; + + if (ipnet->ipnet_flags & IPNET_PROMISC_PHYS) + ipnet_leave_allmulti(ipnet->ipnet_if, ips); + if (ipnet->ipnet_flags & IPNET_PROMISC_MULTI) + ipnet_leave_allmulti(ipnet->ipnet_if, ips); + + mutex_enter(&ips->ips_walkers_lock); + while (ips->ips_walkers_cnt != 0) + cv_wait(&ips->ips_walkers_cv, &ips->ips_walkers_lock); + + qprocsoff(rq); + + list_remove(&ips->ips_str_list, ipnet); + if (ipnet->ipnet_if != NULL) + ipnetif_refrele(ipnet->ipnet_if); + id_free(ipnet_minor_space, ipnet->ipnet_minor); + kmem_free(ipnet, sizeof (*ipnet)); + + if (list_is_empty(&ips->ips_str_list)) + ipobs_unregister_hook(ips->ips_netstack, ipnet_input); + + mutex_exit(&ips->ips_walkers_lock); + netstack_rele(ips->ips_netstack); + return (0); +} + +static int +ipnet_wput(queue_t *q, mblk_t *mp) +{ + switch (mp->b_datap->db_type) { + case M_FLUSH: + if (*mp->b_rptr & FLUSHW) { + flushq(q, FLUSHDATA); + *mp->b_rptr &= ~FLUSHW; + } + if (*mp->b_rptr & FLUSHR) + qreply(q, mp); + else + freemsg(mp); + break; + case M_PROTO: + case M_PCPROTO: + ipnet_wputnondata(q, mp); + break; + case M_IOCTL: + ipnet_ioctl(q, mp); + break; + case M_IOCDATA: + ipnet_iocdata(q, mp); + break; + default: + freemsg(mp); + break; + } + return (0); +} + +static int +ipnet_rsrv(queue_t *q) +{ + mblk_t *mp; + + while ((mp = getq(q)) != NULL) { + ASSERT(DB_TYPE(mp) == M_DATA); + if (canputnext(q)) { + putnext(q, mp); + } else { + (void) putbq(q, mp); + break; + } + } + return (0); +} + +static void +ipnet_ioctl(queue_t *q, mblk_t *mp) +{ + struct iocblk *iocp = (struct iocblk *)mp->b_rptr; + + switch (iocp->ioc_cmd) { + case DLIOCRAW: + miocack(q, mp, 0, 0); + break; + case DLIOCIPNETINFO: + if (iocp->ioc_count == TRANSPARENT) { + mcopyin(mp, NULL, sizeof (uint_t), NULL); + qreply(q, mp); + break; + } + /* Fallthrough, we don't support I_STR with DLIOCIPNETINFO. */ + default: + miocnak(q, mp, 0, EINVAL); + break; + } +} + +static void +ipnet_iocdata(queue_t *q, mblk_t *mp) +{ + struct iocblk *iocp = (struct iocblk *)mp->b_rptr; + ipnet_t *ipnet = q->q_ptr; + + switch (iocp->ioc_cmd) { + case DLIOCIPNETINFO: + if (*(int *)mp->b_cont->b_rptr == 1) + ipnet->ipnet_flags |= IPNET_INFO; + else if (*(int *)mp->b_cont->b_rptr == 0) + ipnet->ipnet_flags &= ~IPNET_INFO; + else + goto iocnak; + miocack(q, mp, 0, DL_IPNETINFO_VERSION); + break; + default: + iocnak: + miocnak(q, mp, 0, EINVAL); + break; + } +} + +static void +ipnet_wputnondata(queue_t *q, mblk_t *mp) +{ + union DL_primitives *dlp = (union DL_primitives *)mp->b_rptr; + t_uscalar_t prim = dlp->dl_primitive; + + switch (prim) { + case DL_INFO_REQ: + ipnet_inforeq(q, mp); + break; + case DL_UNBIND_REQ: + ipnet_unbindreq(q, mp); + break; + case DL_BIND_REQ: + ipnet_bindreq(q, mp); + break; + case DL_PROMISCON_REQ: + ipnet_dlpromisconreq(q, mp); + break; + case DL_PROMISCOFF_REQ: + ipnet_dlpromiscoffreq(q, mp); + break; + case DL_UNITDATA_REQ: + case DL_DETACH_REQ: + case DL_PHYS_ADDR_REQ: + case DL_SET_PHYS_ADDR_REQ: + case DL_ENABMULTI_REQ: + case DL_DISABMULTI_REQ: + case DL_ATTACH_REQ: + dlerrorack(q, mp, prim, DL_UNSUPPORTED, 0); + break; + default: + dlerrorack(q, mp, prim, DL_BADPRIM, 0); + break; + } +} + +static void +ipnet_inforeq(queue_t *q, mblk_t *mp) +{ + dl_info_ack_t *dlip; + size_t size = sizeof (dl_info_ack_t) + sizeof (ushort_t); + + if (MBLKL(mp) < DL_INFO_REQ_SIZE) { + dlerrorack(q, mp, DL_INFO_REQ, DL_BADPRIM, 0); + return; + } + + if ((mp = mexchange(q, mp, size, M_PCPROTO, DL_INFO_ACK)) == NULL) + return; + + dlip = (dl_info_ack_t *)mp->b_rptr; + *dlip = ipnet_infoack; + qreply(q, mp); +} + +static void +ipnet_bindreq(queue_t *q, mblk_t *mp) +{ + union DL_primitives *dlp = (union DL_primitives *)mp->b_rptr; + int32_t sap; + ipnet_t *ipnet = q->q_ptr; + + if (MBLKL(mp) < DL_BIND_REQ_SIZE) { + dlerrorack(q, mp, DL_BIND_REQ, DL_BADPRIM, 0); + return; + } + + sap = dlp->bind_req.dl_sap; + if (sap != IPV4_VERSION && sap != IPV6_VERSION && sap != 0) { + dlerrorack(q, mp, DL_BIND_REQ, DL_BADSAP, 0); + } else { + ipnet->ipnet_sap = sap; + ipnet->ipnet_dlstate = DL_IDLE; + dlbindack(q, mp, sap, 0, 0, 0, 0); + } +} + +static void +ipnet_unbindreq(queue_t *q, mblk_t *mp) +{ + ipnet_t *ipnet = q->q_ptr; + + if (MBLKL(mp) < DL_UNBIND_REQ_SIZE) { + dlerrorack(q, mp, DL_UNBIND_REQ, DL_BADPRIM, 0); + return; + } + + if (ipnet->ipnet_dlstate != DL_IDLE) { + dlerrorack(q, mp, DL_UNBIND_REQ, DL_OUTSTATE, 0); + } else { + ipnet->ipnet_dlstate = DL_UNBOUND; + ipnet->ipnet_sap = 0; + dlokack(q, mp, DL_UNBIND_REQ); + } +} + +static void +ipnet_dlpromisconreq(queue_t *q, mblk_t *mp) +{ + ipnet_t *ipnet = q->q_ptr; + t_uscalar_t level; + int err; + + if (MBLKL(mp) < DL_PROMISCON_REQ_SIZE) { + dlerrorack(q, mp, DL_PROMISCON_REQ, DL_BADPRIM, 0); + return; + } + + if (ipnet->ipnet_flags & IPNET_LOMODE) { + dlokack(q, mp, DL_PROMISCON_REQ); + return; + } + + level = ((dl_promiscon_req_t *)mp->b_rptr)->dl_level; + if (level == DL_PROMISC_PHYS || level == DL_PROMISC_MULTI) { + if ((err = ipnet_join_allmulti(ipnet->ipnet_if, + ipnet->ipnet_ns->netstack_ipnet)) != 0) { + dlerrorack(q, mp, DL_PROMISCON_REQ, DL_SYSERR, err); + return; + } + } + + switch (level) { + case DL_PROMISC_PHYS: + ipnet->ipnet_flags |= IPNET_PROMISC_PHYS; + break; + case DL_PROMISC_SAP: + ipnet->ipnet_flags |= IPNET_PROMISC_SAP; + break; + case DL_PROMISC_MULTI: + ipnet->ipnet_flags |= IPNET_PROMISC_MULTI; + break; + default: + dlerrorack(q, mp, DL_PROMISCON_REQ, DL_BADPRIM, 0); + return; + } + + dlokack(q, mp, DL_PROMISCON_REQ); +} + +static void +ipnet_dlpromiscoffreq(queue_t *q, mblk_t *mp) +{ + ipnet_t *ipnet = q->q_ptr; + t_uscalar_t level; + uint16_t orig_ipnet_flags = ipnet->ipnet_flags; + + if (MBLKL(mp) < DL_PROMISCOFF_REQ_SIZE) { + dlerrorack(q, mp, DL_PROMISCOFF_REQ, DL_BADPRIM, 0); + return; + } + + if (ipnet->ipnet_flags & IPNET_LOMODE) { + dlokack(q, mp, DL_PROMISCOFF_REQ); + return; + } + + level = ((dl_promiscon_req_t *)mp->b_rptr)->dl_level; + switch (level) { + case DL_PROMISC_PHYS: + if (ipnet->ipnet_flags & IPNET_PROMISC_PHYS) + ipnet->ipnet_flags &= ~IPNET_PROMISC_PHYS; + break; + case DL_PROMISC_SAP: + if (ipnet->ipnet_flags & IPNET_PROMISC_SAP) + ipnet->ipnet_flags &= ~IPNET_PROMISC_SAP; + break; + case DL_PROMISC_MULTI: + if (ipnet->ipnet_flags & IPNET_PROMISC_MULTI) + ipnet->ipnet_flags &= ~IPNET_PROMISC_MULTI; + break; + default: + dlerrorack(q, mp, DL_PROMISCOFF_REQ, DL_BADPRIM, 0); + return; + } + + if (orig_ipnet_flags == ipnet->ipnet_flags) { + dlerrorack(q, mp, DL_PROMISCOFF_REQ, DL_NOTENAB, 0); + return; + } + + if (level == DL_PROMISC_PHYS || level == DL_PROMISC_MULTI) { + ipnet_leave_allmulti(ipnet->ipnet_if, + ipnet->ipnet_ns->netstack_ipnet); + } + + dlokack(q, mp, DL_PROMISCOFF_REQ); +} + +static int +ipnet_join_allmulti(ipnetif_t *ipnetif, ipnet_stack_t *ips) +{ + int err = 0; + ip_stack_t *ipst = ips->ips_netstack->netstack_ip; + uint64_t index = ipnetif->if_index; + + mutex_enter(&ips->ips_event_lock); + if (ipnetif->if_multicnt == 0) { + ASSERT((ipnetif->if_flags & + (IPNETIF_IPV4ALLMULTI | IPNETIF_IPV6ALLMULTI)) == 0); + if (ipnetif->if_flags & IPNETIF_IPV4PLUMBED) { + err = ip_join_allmulti(index, B_FALSE, ipst); + if (err != 0) + goto done; + ipnetif->if_flags |= IPNETIF_IPV4ALLMULTI; + } + if (ipnetif->if_flags & IPNETIF_IPV6PLUMBED) { + err = ip_join_allmulti(index, B_TRUE, ipst); + if (err != 0 && + (ipnetif->if_flags & IPNETIF_IPV4ALLMULTI)) { + (void) ip_leave_allmulti(index, B_FALSE, ipst); + ipnetif->if_flags &= ~IPNETIF_IPV4ALLMULTI; + goto done; + } + ipnetif->if_flags |= IPNETIF_IPV6ALLMULTI; + } + } + ipnetif->if_multicnt++; + +done: + mutex_exit(&ips->ips_event_lock); + return (err); +} + +static void +ipnet_leave_allmulti(ipnetif_t *ipnetif, ipnet_stack_t *ips) +{ + int err; + ip_stack_t *ipst = ips->ips_netstack->netstack_ip; + uint64_t index = ipnetif->if_index; + + mutex_enter(&ips->ips_event_lock); + ASSERT(ipnetif->if_multicnt != 0); + if (--ipnetif->if_multicnt == 0) { + if (ipnetif->if_flags & IPNETIF_IPV4ALLMULTI) { + err = ip_leave_allmulti(index, B_FALSE, ipst); + ASSERT(err == 0 || err == ENODEV); + ipnetif->if_flags &= ~IPNETIF_IPV4ALLMULTI; + } + if (ipnetif->if_flags & IPNETIF_IPV6ALLMULTI) { + err = ip_leave_allmulti(index, B_TRUE, ipst); + ASSERT(err == 0 || err == ENODEV); + ipnetif->if_flags &= ~IPNETIF_IPV6ALLMULTI; + } + } + mutex_exit(&ips->ips_event_lock); +} + +static mblk_t * +ipnet_addheader(ipobs_hook_data_t *ihd, mblk_t *mp) +{ + mblk_t *dlhdr; + dl_ipnetinfo_t *dl; + + if ((dlhdr = allocb(sizeof (dl_ipnetinfo_t), BPRI_HI)) == NULL) { + freemsg(mp); + return (NULL); + } + dl = (dl_ipnetinfo_t *)dlhdr->b_rptr; + dl->dli_version = DL_IPNETINFO_VERSION; + dl->dli_len = htons(sizeof (*dl)); + dl->dli_ipver = ihd->ihd_ipver; + dl->dli_srczone = BE_64((uint64_t)ihd->ihd_zsrc); + dl->dli_dstzone = BE_64((uint64_t)ihd->ihd_zdst); + dlhdr->b_wptr += sizeof (*dl); + dlhdr->b_cont = mp; + + return (dlhdr); +} + +static ipnet_addrtype_t +ipnet_get_addrtype(ipnet_t *ipnet, ipnet_addrp_t *addr) +{ + list_t *list; + ipnetif_t *ipnetif = ipnet->ipnet_if; + ipnetif_addr_t *ifaddr; + ipnet_addrtype_t addrtype = IPNETADDR_UNKNOWN; + + /* First check if the address is multicast or limited broadcast. */ + switch (addr->iap_family) { + case AF_INET: + if (CLASSD(*(addr->iap_addr4)) || + *(addr->iap_addr4) == INADDR_BROADCAST) + return (IPNETADDR_MBCAST); + break; + case AF_INET6: + if (IN6_IS_ADDR_MULTICAST(addr->iap_addr6)) + return (IPNETADDR_MBCAST); + break; + } + + /* + * Walk the address list to see if the address belongs to our + * interface or is one of our subnet broadcast addresses. + */ + mutex_enter(&ipnetif->if_addr_lock); + list = (addr->iap_family == AF_INET) ? + &ipnetif->if_ip4addr_list : &ipnetif->if_ip6addr_list; + for (ifaddr = list_head(list); + ifaddr != NULL && addrtype == IPNETADDR_UNKNOWN; + ifaddr = list_next(list, ifaddr)) { + /* + * If we're not in the global zone, then only look at + * addresses in our zone. + */ + if (ipnet->ipnet_zoneid != GLOBAL_ZONEID && + ipnet->ipnet_zoneid != ifaddr->ifa_zone) + continue; + switch (addr->iap_family) { + case AF_INET: + if (ifaddr->ifa_ip4addr != INADDR_ANY && + *(addr->iap_addr4) == ifaddr->ifa_ip4addr) + addrtype = IPNETADDR_MYADDR; + else if (ifaddr->ifa_brdaddr != INADDR_ANY && + *(addr->iap_addr4) == ifaddr->ifa_brdaddr) + addrtype = IPNETADDR_MBCAST; + break; + case AF_INET6: + if (IN6_ARE_ADDR_EQUAL(addr->iap_addr6, + &ifaddr->ifa_ip6addr)) + addrtype = IPNETADDR_MYADDR; + break; + } + } + mutex_exit(&ipnetif->if_addr_lock); + + return (addrtype); +} + +/* + * Verify if the packet contained in ihd should be passed up to the + * ipnet client stream. + */ +static boolean_t +ipnet_accept(ipnet_t *ipnet, ipobs_hook_data_t *ihd, ipnet_addrp_t *src, + ipnet_addrp_t *dst) +{ + uint64_t ifindex = ipnet->ipnet_if->if_index; + ipnet_addrtype_t srctype, dsttype; + + srctype = ipnet_get_addrtype(ipnet, src); + dsttype = ipnet_get_addrtype(ipnet, dst); + + /* + * Do not allow an ipnet stream to see packets that are not from or to + * its zone. The exception is when zones are using the shared stack + * model. In this case, streams in the global zone have visibility + * into other shared-stack zones, and broadcast and multicast traffic + * is visible by all zones in the stack. + */ + if (ipnet->ipnet_zoneid != GLOBAL_ZONEID && + dsttype != IPNETADDR_MBCAST) { + if (ipnet->ipnet_zoneid != ihd->ihd_zsrc && + ipnet->ipnet_zoneid != ihd->ihd_zdst) + return (B_FALSE); + } + + /* + * If DL_PROMISC_SAP isn't enabled, then the bound SAP must match the + * packet's IP version. + */ + if (!(ipnet->ipnet_flags & IPNET_PROMISC_SAP) && + ipnet->ipnet_sap != ihd->ihd_ipver) + return (B_FALSE); + + /* If the destination address is ours, then accept the packet. */ + if (dsttype == IPNETADDR_MYADDR) + return (B_TRUE); + + /* + * If DL_PROMISC_PHYS is enabled, then we can see all packets that are + * sent or received on the interface we're observing, or packets that + * have our source address (this allows us to see packets we send). + */ + if (ipnet->ipnet_flags & IPNET_PROMISC_PHYS) { + if (ihd->ihd_ifindex == ifindex || srctype == IPNETADDR_MYADDR) + return (B_TRUE); + } + + /* + * We accept multicast and broadcast packets transmitted or received + * on the interface we're observing. + */ + if (dsttype == IPNETADDR_MBCAST && ihd->ihd_ifindex == ifindex) + return (B_TRUE); + + return (B_FALSE); +} + +/* + * Verify if the packet contained in ihd should be passed up to the ipnet + * client stream that's in IPNET_LOMODE. + */ +/* ARGSUSED */ +static boolean_t +ipnet_loaccept(ipnet_t *ipnet, ipobs_hook_data_t *ihd, ipnet_addrp_t *src, + ipnet_addrp_t *dst) +{ + if (ihd->ihd_htype != IPOBS_HOOK_LOCAL) + return (B_FALSE); + + /* + * An ipnet stream must not see packets that are not from/to its zone. + */ + if (ipnet->ipnet_zoneid != GLOBAL_ZONEID) { + if (ipnet->ipnet_zoneid != ihd->ihd_zsrc && + ipnet->ipnet_zoneid != ihd->ihd_zdst) + return (B_FALSE); + } + + return (ipnet->ipnet_sap == 0 || ipnet->ipnet_sap == ihd->ihd_ipver); +} + +static void +ipnet_dispatch(void *arg) +{ + mblk_t *mp = arg; + ipobs_hook_data_t *ihd = (ipobs_hook_data_t *)mp->b_rptr; + ipnet_t *ipnet; + mblk_t *netmp; + list_t *list; + ipnet_stack_t *ips = ihd->ihd_stack->netstack_ipnet; + ipnet_addrp_t src, dst; + + if (ihd->ihd_ipver == IPV4_VERSION) { + src.iap_family = dst.iap_family = AF_INET; + src.iap_addr4 = &((ipha_t *)(ihd->ihd_mp->b_rptr))->ipha_src; + dst.iap_addr4 = &((ipha_t *)(ihd->ihd_mp->b_rptr))->ipha_dst; + } else { + src.iap_family = dst.iap_family = AF_INET6; + src.iap_addr6 = &((ip6_t *)(ihd->ihd_mp->b_rptr))->ip6_src; + dst.iap_addr6 = &((ip6_t *)(ihd->ihd_mp->b_rptr))->ip6_dst; + } + + ipnet_walkers_inc(ips); + + list = &ips->ips_str_list; + for (ipnet = list_head(list); ipnet != NULL; + ipnet = list_next(list, ipnet)) { + if (!(*ipnet->ipnet_acceptfn)(ipnet, ihd, &src, &dst)) + continue; + + if (list_next(list, ipnet) == NULL) { + netmp = ihd->ihd_mp; + ihd->ihd_mp = NULL; + } else { + if ((netmp = dupmsg(ihd->ihd_mp)) == NULL && + (netmp = copymsg(ihd->ihd_mp)) == NULL) { + atomic_inc_64(&ips->ips_drops); + continue; + } + } + + if (ipnet->ipnet_flags & IPNET_INFO) { + if ((netmp = ipnet_addheader(ihd, netmp)) == NULL) { + atomic_inc_64(&ips->ips_drops); + continue; + } + } + + if (ipnet->ipnet_rq->q_first == NULL && + canputnext(ipnet->ipnet_rq)) { + putnext(ipnet->ipnet_rq, netmp); + } else if (canput(ipnet->ipnet_rq)) { + (void) putq(ipnet->ipnet_rq, netmp); + } else { + freemsg(netmp); + atomic_inc_64(&ips->ips_drops); + } + } + + ipnet_walkers_dec(ips); + + freemsg(ihd->ihd_mp); + freemsg(mp); +} + +static void +ipnet_input(mblk_t *mp) +{ + ipobs_hook_data_t *ihd = (ipobs_hook_data_t *)mp->b_rptr; + + if (ddi_taskq_dispatch(ipnet_taskq, ipnet_dispatch, mp, DDI_NOSLEEP) != + DDI_SUCCESS) { + atomic_inc_64(&ihd->ihd_stack->netstack_ipnet->ips_drops); + freemsg(ihd->ihd_mp); + freemsg(mp); + } +} + +/* + * Create a new ipnetif_t and new minor node for it. If creation is + * successful the new ipnetif_t is inserted into an avl_tree + * containing ipnetif's for this stack instance. + */ +static ipnetif_t * +ipnet_create_if(const char *name, uint64_t index, ipnet_stack_t *ips) +{ + ipnetif_t *ipnetif; + avl_index_t where = 0; + minor_t ifminor; + + /* + * Because ipnet_create_if() can be called from a NIC event + * callback, it should not block. + */ + ifminor = (minor_t)id_alloc_nosleep(ipnet_minor_space); + if (ifminor == (minor_t)-1) + return (NULL); + if ((ipnetif = kmem_zalloc(sizeof (*ipnetif), KM_NOSLEEP)) == NULL) { + id_free(ipnet_minor_space, ifminor); + return (NULL); + } + + (void) strlcpy(ipnetif->if_name, name, LIFNAMSIZ); + ipnetif->if_index = index; + + mutex_init(&ipnetif->if_addr_lock, NULL, MUTEX_DEFAULT, 0); + list_create(&ipnetif->if_ip4addr_list, sizeof (ipnetif_addr_t), + offsetof(ipnetif_addr_t, ifa_link)); + list_create(&ipnetif->if_ip6addr_list, sizeof (ipnetif_addr_t), + offsetof(ipnetif_addr_t, ifa_link)); + ipnetif->if_dev = makedevice(ipnet_major, ifminor); + mutex_init(&ipnetif->if_reflock, NULL, MUTEX_DEFAULT, 0); + ipnetif->if_refcnt = 1; + + mutex_enter(&ips->ips_avl_lock); + VERIFY(avl_find(&ips->ips_avl_by_index, &index, &where) == NULL); + avl_insert(&ips->ips_avl_by_index, ipnetif, where); + VERIFY(avl_find(&ips->ips_avl_by_name, (void *)name, &where) == NULL); + avl_insert(&ips->ips_avl_by_name, ipnetif, where); + mutex_exit(&ips->ips_avl_lock); + + return (ipnetif); +} + +static void +ipnet_remove_if(ipnetif_t *ipnetif, ipnet_stack_t *ips) +{ + ipnet_t *ipnet; + + ipnet_walkers_inc(ips); + /* Send a SIGHUP to all open streams associated with this ipnetif. */ + for (ipnet = list_head(&ips->ips_str_list); ipnet != NULL; + ipnet = list_next(&ips->ips_str_list, ipnet)) { + if (ipnet->ipnet_if == ipnetif) + (void) putnextctl(ipnet->ipnet_rq, M_HANGUP); + } + ipnet_walkers_dec(ips); + mutex_enter(&ips->ips_avl_lock); + avl_remove(&ips->ips_avl_by_index, ipnetif); + avl_remove(&ips->ips_avl_by_name, ipnetif); + mutex_exit(&ips->ips_avl_lock); + /* Release the reference we implicitly held in ipnet_create_if(). */ + ipnetif_refrele(ipnetif); +} + +static void +ipnet_purge_addrlist(list_t *addrlist) +{ + ipnetif_addr_t *ifa; + + while ((ifa = list_head(addrlist)) != NULL) { + list_remove(addrlist, ifa); + kmem_free(ifa, sizeof (*ifa)); + } +} + +static void +ipnet_free_if(ipnetif_t *ipnetif) +{ + ASSERT(ipnetif->if_refcnt == 0); + + /* Remove IPv4/v6 address lists from the ipnetif */ + ipnet_purge_addrlist(&ipnetif->if_ip4addr_list); + list_destroy(&ipnetif->if_ip4addr_list); + ipnet_purge_addrlist(&ipnetif->if_ip6addr_list); + list_destroy(&ipnetif->if_ip6addr_list); + mutex_destroy(&ipnetif->if_addr_lock); + mutex_destroy(&ipnetif->if_reflock); + id_free(ipnet_minor_space, getminor(ipnetif->if_dev)); + kmem_free(ipnetif, sizeof (*ipnetif)); +} + +/* + * Create an ipnetif_addr_t with the given logical interface id (lif) + * and add it to the supplied ipnetif. The lif is the netinfo + * representation of logical interface id, and we use this id to match + * incoming netinfo events against our lists of addresses. + */ +static void +ipnet_add_ifaddr(uint64_t lif, ipnetif_t *ipnetif, net_handle_t nd) +{ + ipnetif_addr_t *ifaddr; + zoneid_t zoneid; + struct sockaddr_in bcast; + struct sockaddr_storage addr; + net_ifaddr_t type = NA_ADDRESS; + uint64_t phyif = ipnetif->if_index; + + if (net_getlifaddr(nd, phyif, lif, 1, &type, &addr) != 0 || + net_getlifzone(nd, phyif, lif, &zoneid) != 0) + return; + if ((ifaddr = kmem_alloc(sizeof (*ifaddr), KM_NOSLEEP)) == NULL) + return; + + ifaddr->ifa_zone = zoneid; + ifaddr->ifa_id = lif; + + switch (addr.ss_family) { + case AF_INET: + ifaddr->ifa_ip4addr = + ((struct sockaddr_in *)&addr)->sin_addr.s_addr; + /* + * Try and get the broadcast address. Note that it's okay for + * an interface to not have a broadcast address, so we don't + * fail the entire operation if net_getlifaddr() fails here. + */ + type = NA_BROADCAST; + if (net_getlifaddr(nd, phyif, lif, 1, &type, &bcast) == 0) + ifaddr->ifa_brdaddr = bcast.sin_addr.s_addr; + break; + case AF_INET6: + ifaddr->ifa_ip6addr = ((struct sockaddr_in6 *)&addr)->sin6_addr; + break; + } + + mutex_enter(&ipnetif->if_addr_lock); + list_insert_tail(addr.ss_family == AF_INET ? + &ipnetif->if_ip4addr_list : &ipnetif->if_ip6addr_list, ifaddr); + mutex_exit(&ipnetif->if_addr_lock); +} + +static void +ipnet_delete_ifaddr(ipnetif_addr_t *ifaddr, ipnetif_t *ipnetif, boolean_t isv6) +{ + mutex_enter(&ipnetif->if_addr_lock); + list_remove(isv6 ? + &ipnetif->if_ip6addr_list : &ipnetif->if_ip4addr_list, ifaddr); + mutex_exit(&ipnetif->if_addr_lock); + kmem_free(ifaddr, sizeof (*ifaddr)); +} + +static void +ipnet_plumb_ev(uint64_t ifindex, const char *ifname, ipnet_stack_t *ips, + boolean_t isv6) +{ + ipnetif_t *ipnetif; + boolean_t refrele_needed = B_TRUE; + + if ((ipnetif = ipnet_if_getby_index(ifindex, ips)) == NULL) { + ipnetif = ipnet_create_if(ifname, ifindex, ips); + refrele_needed = B_FALSE; + } + if (ipnetif != NULL) { + ipnetif->if_flags |= + isv6 ? IPNETIF_IPV6PLUMBED : IPNETIF_IPV4PLUMBED; + } + + if (ipnetif->if_multicnt != 0) { + if (ip_join_allmulti(ifindex, isv6, + ips->ips_netstack->netstack_ip) == 0) { + ipnetif->if_flags |= + isv6 ? IPNETIF_IPV6ALLMULTI : IPNETIF_IPV4ALLMULTI; + } + } + + if (refrele_needed) + ipnetif_refrele(ipnetif); +} + +static void +ipnet_unplumb_ev(uint64_t ifindex, ipnet_stack_t *ips, boolean_t isv6) +{ + ipnetif_t *ipnetif; + + if ((ipnetif = ipnet_if_getby_index(ifindex, ips)) == NULL) + return; + + mutex_enter(&ipnetif->if_addr_lock); + ipnet_purge_addrlist(isv6 ? + &ipnetif->if_ip6addr_list : &ipnetif->if_ip4addr_list); + mutex_exit(&ipnetif->if_addr_lock); + + /* + * Note that we have one ipnetif for both IPv4 and IPv6, but we receive + * separate NE_UNPLUMB events for IPv4 and IPv6. We remove the ipnetif + * if both IPv4 and IPv6 interfaces have been unplumbed. + */ + ipnetif->if_flags &= isv6 ? ~IPNETIF_IPV6PLUMBED : ~IPNETIF_IPV4PLUMBED; + if (!(ipnetif->if_flags & (IPNETIF_IPV4PLUMBED | IPNETIF_IPV6PLUMBED))) + ipnet_remove_if(ipnetif, ips); + ipnetif_refrele(ipnetif); +} + +static void +ipnet_lifup_ev(uint64_t ifindex, uint64_t lifindex, net_handle_t nd, + ipnet_stack_t *ips, boolean_t isv6) +{ + ipnetif_t *ipnetif; + ipnetif_addr_t *ifaddr; + + if ((ipnetif = ipnet_if_getby_index(ifindex, ips)) == NULL) + return; + if ((ifaddr = ipnet_match_lif(ipnetif, lifindex, isv6)) != NULL) { + /* + * We must have missed a NE_LIF_DOWN event. Delete this + * ifaddr and re-create it. + */ + ipnet_delete_ifaddr(ifaddr, ipnetif, isv6); + } + + ipnet_add_ifaddr(lifindex, ipnetif, nd); + ipnetif_refrele(ipnetif); +} + +static void +ipnet_lifdown_ev(uint64_t ifindex, uint64_t lifindex, ipnet_stack_t *ips, + boolean_t isv6) +{ + ipnetif_t *ipnetif; + ipnetif_addr_t *ifaddr; + + if ((ipnetif = ipnet_if_getby_index(ifindex, ips)) == NULL) + return; + if ((ifaddr = ipnet_match_lif(ipnetif, lifindex, isv6)) != NULL) + ipnet_delete_ifaddr(ifaddr, ipnetif, isv6); + ipnetif_refrele(ipnetif); + /* + * Make sure that open streams on this ipnetif are still allowed to + * have it open. + */ + ipnet_if_zonecheck(ipnetif, ips); +} + +/* + * This callback from the NIC event framework dispatches a taskq as the event + * handlers may block. + */ +/* ARGSUSED */ +static int +ipnet_nicevent_cb(hook_event_token_t token, hook_data_t info, void *arg) +{ + ipnet_stack_t *ips = arg; + hook_nic_event_t *hn = (hook_nic_event_t *)info; + ipnet_nicevent_t *ipne; + + if ((ipne = kmem_alloc(sizeof (ipnet_nicevent_t), KM_NOSLEEP)) == NULL) + return (0); + ipne->ipne_event = hn->hne_event; + ipne->ipne_protocol = hn->hne_protocol; + ipne->ipne_stackid = ips->ips_netstack->netstack_stackid; + ipne->ipne_ifindex = hn->hne_nic; + ipne->ipne_lifindex = hn->hne_lif; + if (hn->hne_datalen != 0) { + (void) strlcpy(ipne->ipne_ifname, hn->hne_data, + sizeof (ipne->ipne_ifname)); + } + (void) ddi_taskq_dispatch(ipnet_nicevent_taskq, ipnet_nicevent_task, + ipne, DDI_NOSLEEP); + return (0); +} + +static void +ipnet_nicevent_task(void *arg) +{ + ipnet_nicevent_t *ipne = arg; + netstack_t *ns; + ipnet_stack_t *ips; + boolean_t isv6; + + if ((ns = netstack_find_by_stackid(ipne->ipne_stackid)) == NULL) + goto done; + ips = ns->netstack_ipnet; + isv6 = (ipne->ipne_protocol == ips->ips_ndv6); + + mutex_enter(&ips->ips_event_lock); + switch (ipne->ipne_event) { + case NE_PLUMB: + ipnet_plumb_ev(ipne->ipne_ifindex, ipne->ipne_ifname, ips, + isv6); + break; + case NE_UNPLUMB: + ipnet_unplumb_ev(ipne->ipne_ifindex, ips, isv6); + break; + case NE_LIF_UP: + ipnet_lifup_ev(ipne->ipne_ifindex, ipne->ipne_lifindex, + ipne->ipne_protocol, ips, isv6); + break; + case NE_LIF_DOWN: + ipnet_lifdown_ev(ipne->ipne_ifindex, ipne->ipne_lifindex, ips, + isv6); + break; + default: + break; + } + mutex_exit(&ips->ips_event_lock); +done: + if (ns != NULL) + netstack_rele(ns); + kmem_free(ipne, sizeof (ipnet_nicevent_t)); +} + +dev_t +ipnet_if_getdev(char *name, zoneid_t zoneid) +{ + netstack_t *ns; + ipnet_stack_t *ips; + ipnetif_t *ipnetif; + dev_t dev = (dev_t)-1; + + if (is_system_labeled() && zoneid != GLOBAL_ZONEID) + return (dev); + if ((ns = netstack_find_by_zoneid(zoneid)) == NULL) + return (dev); + + ips = ns->netstack_ipnet; + mutex_enter(&ips->ips_avl_lock); + if ((ipnetif = avl_find(&ips->ips_avl_by_name, name, NULL)) != NULL) { + if (ipnet_if_in_zone(ipnetif, zoneid, ips)) + dev = ipnetif->if_dev; + } + mutex_exit(&ips->ips_avl_lock); + netstack_rele(ns); + + return (dev); +} + +static ipnetif_t * +ipnet_if_getby_index(uint64_t id, ipnet_stack_t *ips) +{ + ipnetif_t *ipnetif; + + mutex_enter(&ips->ips_avl_lock); + if ((ipnetif = avl_find(&ips->ips_avl_by_index, &id, NULL)) != NULL) + ipnetif_refhold(ipnetif); + mutex_exit(&ips->ips_avl_lock); + return (ipnetif); +} + +static ipnetif_t * +ipnet_if_getby_dev(dev_t dev, ipnet_stack_t *ips) +{ + ipnetif_t *ipnetif; + avl_tree_t *tree; + + mutex_enter(&ips->ips_avl_lock); + tree = &ips->ips_avl_by_index; + for (ipnetif = avl_first(tree); ipnetif != NULL; + ipnetif = avl_walk(tree, ipnetif, AVL_AFTER)) { + if (ipnetif->if_dev == dev) { + ipnetif_refhold(ipnetif); + break; + } + } + mutex_exit(&ips->ips_avl_lock); + return (ipnetif); +} + +static ipnetif_addr_t * +ipnet_match_lif(ipnetif_t *ipnetif, lif_if_t lid, boolean_t isv6) +{ + ipnetif_addr_t *ifaddr; + list_t *list; + + mutex_enter(&ipnetif->if_addr_lock); + list = isv6 ? &ipnetif->if_ip6addr_list : &ipnetif->if_ip4addr_list; + for (ifaddr = list_head(list); ifaddr != NULL; + ifaddr = list_next(list, ifaddr)) { + if (lid == ifaddr->ifa_id) + break; + } + mutex_exit(&ipnetif->if_addr_lock); + return (ifaddr); +} + +/* ARGSUSED */ +static void * +ipnet_stack_init(netstackid_t stackid, netstack_t *ns) +{ + ipnet_stack_t *ips; + + ips = kmem_zalloc(sizeof (*ips), KM_SLEEP); + ips->ips_netstack = ns; + mutex_init(&ips->ips_avl_lock, NULL, MUTEX_DEFAULT, 0); + avl_create(&ips->ips_avl_by_index, ipnet_if_compare_index, + sizeof (ipnetif_t), offsetof(ipnetif_t, if_avl_by_index)); + avl_create(&ips->ips_avl_by_name, ipnet_if_compare_name, + sizeof (ipnetif_t), offsetof(ipnetif_t, if_avl_by_name)); + mutex_init(&ips->ips_walkers_lock, NULL, MUTEX_DEFAULT, NULL); + cv_init(&ips->ips_walkers_cv, NULL, CV_DRIVER, NULL); + list_create(&ips->ips_str_list, sizeof (ipnet_t), + offsetof(ipnet_t, ipnet_next)); + ipnet_register_netihook(ips); + return (ips); +} + +/* ARGSUSED */ +static void +ipnet_stack_fini(netstackid_t stackid, void *arg) +{ + ipnet_stack_t *ips = arg; + ipnetif_t *ipnetif, *nipnetif; + + if (ips->ips_ndv4 != NULL) { + VERIFY(net_hook_unregister(ips->ips_ndv4, NH_NIC_EVENTS, + ips->ips_nicevents) == 0); + VERIFY(net_protocol_release(ips->ips_ndv4) == 0); + } + if (ips->ips_ndv6 != NULL) { + VERIFY(net_hook_unregister(ips->ips_ndv6, NH_NIC_EVENTS, + ips->ips_nicevents) == 0); + VERIFY(net_protocol_release(ips->ips_ndv6) == 0); + } + hook_free(ips->ips_nicevents); + + for (ipnetif = avl_first(&ips->ips_avl_by_index); ipnetif != NULL; + ipnetif = nipnetif) { + nipnetif = AVL_NEXT(&ips->ips_avl_by_index, ipnetif); + ipnet_remove_if(ipnetif, ips); + } + avl_destroy(&ips->ips_avl_by_index); + avl_destroy(&ips->ips_avl_by_name); + mutex_destroy(&ips->ips_avl_lock); + mutex_destroy(&ips->ips_walkers_lock); + cv_destroy(&ips->ips_walkers_cv); + list_destroy(&ips->ips_str_list); + kmem_free(ips, sizeof (*ips)); +} + +/* Do any of the addresses in addrlist belong the supplied zoneid? */ +static boolean_t +ipnet_addrs_in_zone(list_t *addrlist, zoneid_t zoneid) +{ + ipnetif_addr_t *ifa; + + for (ifa = list_head(addrlist); ifa != NULL; + ifa = list_next(addrlist, ifa)) { + if (ifa->ifa_zone == zoneid) + return (B_TRUE); + } + return (B_FALSE); +} + +/* Should the supplied ipnetif be visible from the supplied zoneid? */ +static boolean_t +ipnet_if_in_zone(ipnetif_t *ipnetif, zoneid_t zoneid, ipnet_stack_t *ips) +{ + int ret; + + /* + * The global zone has visibility into all interfaces in the global + * stack, and exclusive stack zones have visibility into all + * interfaces in their stack. + */ + if (zoneid == GLOBAL_ZONEID || + ips->ips_netstack->netstack_stackid != GLOBAL_NETSTACKID) + return (B_TRUE); + + /* + * Shared-stack zones only have visibility for interfaces that have + * addresses in their zone. + */ + mutex_enter(&ipnetif->if_addr_lock); + ret = ipnet_addrs_in_zone(&ipnetif->if_ip4addr_list, zoneid) || + ipnet_addrs_in_zone(&ipnetif->if_ip6addr_list, zoneid); + mutex_exit(&ipnetif->if_addr_lock); + return (ret); +} + +/* + * Verify that any ipnet_t that has a reference to the supplied ipnetif should + * still be allowed to have it open. A given ipnet_t may no longer be allowed + * to have an ipnetif open if there are no longer any addresses that belong to + * the ipnetif in the ipnet_t's non-global shared-stack zoneid. If that's the + * case, send the ipnet_t an M_HANGUP. + */ +static void +ipnet_if_zonecheck(ipnetif_t *ipnetif, ipnet_stack_t *ips) +{ + list_t *strlist = &ips->ips_str_list; + ipnet_t *ipnet; + + ipnet_walkers_inc(ips); + for (ipnet = list_head(strlist); ipnet != NULL; + ipnet = list_next(strlist, ipnet)) { + if (ipnet->ipnet_if != ipnetif) + continue; + if (!ipnet_if_in_zone(ipnetif, ipnet->ipnet_zoneid, ips)) + (void) putnextctl(ipnet->ipnet_rq, M_HANGUP); + } + ipnet_walkers_dec(ips); +} + +void +ipnet_walk_if(ipnet_walkfunc_t *cb, void *arg, zoneid_t zoneid) +{ + ipnetif_t *ipnetif; + list_t cbdata; + ipnetif_cbdata_t *cbnode; + netstack_t *ns; + ipnet_stack_t *ips; + + /* + * On labeled systems, non-global zones shouldn't see anything + * in /dev/ipnet. + */ + if (is_system_labeled() && zoneid != GLOBAL_ZONEID) + return; + + if ((ns = netstack_find_by_zoneid(zoneid)) == NULL) + return; + + ips = ns->netstack_ipnet; + list_create(&cbdata, sizeof (ipnetif_cbdata_t), + offsetof(ipnetif_cbdata_t, ic_next)); + + mutex_enter(&ips->ips_avl_lock); + for (ipnetif = avl_first(&ips->ips_avl_by_index); ipnetif != NULL; + ipnetif = avl_walk(&ips->ips_avl_by_index, ipnetif, AVL_AFTER)) { + if (!ipnet_if_in_zone(ipnetif, zoneid, ips)) + continue; + cbnode = kmem_zalloc(sizeof (ipnetif_cbdata_t), KM_SLEEP); + (void) strlcpy(cbnode->ic_ifname, ipnetif->if_name, LIFNAMSIZ); + cbnode->ic_dev = ipnetif->if_dev; + list_insert_head(&cbdata, cbnode); + } + mutex_exit(&ips->ips_avl_lock); + + while ((cbnode = list_head(&cbdata)) != NULL) { + cb(cbnode->ic_ifname, arg, cbnode->ic_dev); + list_remove(&cbdata, cbnode); + kmem_free(cbnode, sizeof (ipnetif_cbdata_t)); + } + list_destroy(&cbdata); + netstack_rele(ns); +} + +static int +ipnet_if_compare_index(const void *index_ptr, const void *ipnetifp) +{ + int64_t index1 = *((int64_t *)index_ptr); + int64_t index2 = (int64_t)((ipnetif_t *)ipnetifp)->if_index; + + return (SIGNOF(index2 - index1)); +} + +static int +ipnet_if_compare_name(const void *name_ptr, const void *ipnetifp) +{ + int res; + + res = strcmp(((ipnetif_t *)ipnetifp)->if_name, name_ptr); + return (SIGNOF(res)); +} + +static void +ipnetif_refhold(ipnetif_t *ipnetif) +{ + mutex_enter(&ipnetif->if_reflock); + ipnetif->if_refcnt++; + mutex_exit(&ipnetif->if_reflock); +} + +static void +ipnetif_refrele(ipnetif_t *ipnetif) +{ + mutex_enter(&ipnetif->if_reflock); + ASSERT(ipnetif->if_refcnt != 0); + if (--ipnetif->if_refcnt == 0) + ipnet_free_if(ipnetif); + else + mutex_exit(&ipnetif->if_reflock); +} + +static void +ipnet_walkers_inc(ipnet_stack_t *ips) +{ + mutex_enter(&ips->ips_walkers_lock); + ips->ips_walkers_cnt++; + mutex_exit(&ips->ips_walkers_lock); +} + +static void +ipnet_walkers_dec(ipnet_stack_t *ips) +{ + mutex_enter(&ips->ips_walkers_lock); + ASSERT(ips->ips_walkers_cnt != 0); + if (--ips->ips_walkers_cnt == 0) + cv_broadcast(&ips->ips_walkers_cv); + mutex_exit(&ips->ips_walkers_lock); +} diff --git a/usr/src/uts/common/inet/ipnet/ipnet.conf b/usr/src/uts/common/inet/ipnet/ipnet.conf new file mode 100644 index 0000000000..bad3b4723e --- /dev/null +++ b/usr/src/uts/common/inet/ipnet/ipnet.conf @@ -0,0 +1,26 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2008 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +name="ipnet" parent="pseudo" instance=0; diff --git a/usr/src/uts/common/inet/tcp.h b/usr/src/uts/common/inet/tcp.h index d22d2e70e4..3a557048d6 100644 --- a/usr/src/uts/common/inet/tcp.h +++ b/usr/src/uts/common/inet/tcp.h @@ -292,7 +292,8 @@ typedef struct tcp_s { tcp_cork : 1, /* tcp_cork option */ tcp_tconnind_started : 1, /* conn_ind message is being sent */ tcp_lso :1, /* Lower layer is capable of LSO */ - tcp_pad_to_bit_31 : 17; + tcp_refuse :1, /* Connection needs refusing */ + tcp_pad_to_bit_31 : 16; uint32_t tcp_if_mtu; /* Outgoing interface MTU. */ @@ -393,6 +394,10 @@ typedef struct tcp_s { int tcp_ip_hdr_len; /* Byte len of our current IPvx hdr */ tcph_t *tcp_tcph; /* tcp header within combined hdr */ int32_t tcp_tcp_hdr_len; /* tcp header len within combined */ + /* Saved peer headers in the case of re-fusion */ + ipha_t tcp_saved_ipha; + ip6_t tcp_saved_ip6h; + tcph_t tcp_saved_tcph; uint32_t tcp_sum; /* checksum to compensate for source */ /* routed packets. Host byte order */ diff --git a/usr/src/uts/common/inet/tcp/tcp.c b/usr/src/uts/common/inet/tcp/tcp.c index 9f10a25aa5..b4dc117a30 100644 --- a/usr/src/uts/common/inet/tcp/tcp.c +++ b/usr/src/uts/common/inet/tcp/tcp.c @@ -8019,6 +8019,7 @@ tcp_reinit_values(tcp) tcp->tcp_zero_win_probe = 0; tcp->tcp_loopback = 0; + tcp->tcp_refuse = 0; tcp->tcp_localnet = 0; tcp->tcp_syn_defense = 0; tcp->tcp_set_timer = 0; @@ -17866,6 +17867,7 @@ tcp_output(void *arg, mblk_t *mp, void *arg2) tcp_t *tcp = connp->conn_tcp; uint32_t msize; tcp_stack_t *tcps = tcp->tcp_tcps; + ip_stack_t *ipst = tcps->tcps_netstack->netstack_ip; /* * Try and ASSERT the minimum possible references on the @@ -17886,6 +17888,16 @@ tcp_output(void *arg, mblk_t *mp, void *arg2) tcp->tcp_squeue_bytes -= msize; mutex_exit(&tcp->tcp_non_sq_lock); + /* Check to see if this connection wants to be re-fused. */ + if (tcp->tcp_refuse && !ipst->ips_ipobs_enabled) { + if (tcp->tcp_ipversion == IPV4_VERSION) { + tcp_fuse(tcp, (uchar_t *)&tcp->tcp_saved_ipha, + &tcp->tcp_saved_tcph); + } else { + tcp_fuse(tcp, (uchar_t *)&tcp->tcp_saved_ip6h, + &tcp->tcp_saved_tcph); + } + } /* Bypass tcp protocol for fused tcp loopback */ if (tcp->tcp_fused && tcp_fuse_output(tcp, mp, msize)) return; @@ -19462,7 +19474,7 @@ tcp_send_data(tcp_t *tcp, queue_t *q, mblk_t *mp) * depending on the availability of transmit resources at * the media layer. */ - IP_DLS_ILL_TX(ill, ipha, mp, ipst); + IP_DLS_ILL_TX(ill, ipha, mp, ipst, ire_fp_mp_len); } else { ill_t *out_ill = (ill_t *)ire->ire_stq->q_ptr; DTRACE_PROBE4(ip4__physical__out__start, @@ -19474,6 +19486,12 @@ tcp_send_data(tcp_t *tcp, queue_t *q, mblk_t *mp) DTRACE_PROBE1(ip4__physical__out__end, mblk_t *, mp); if (mp != NULL) { + if (ipst->ips_ipobs_enabled) { + ipobs_hook(mp, IPOBS_HOOK_OUTBOUND, + IP_REAL_ZONEID(connp->conn_zoneid, ipst), + ALL_ZONES, ill, IPV4_VERSION, ire_fp_mp_len, + ipst); + } DTRACE_IP_FASTPATH(mp, ipha, out_ill, ipha, NULL); putnext(ire->ire_stq, mp); } @@ -21258,6 +21276,24 @@ tcp_multisend_data(tcp_t *tcp, ire_t *ire, const ill_t *ill, mblk_t *md_mp_head, atomic_add_32(&ire->ire_ipif->ipif_ob_pkt_count, obsegs); ire->ire_last_used_time = lbolt; + if (ipst->ips_ipobs_enabled) { + multidata_t *dlmdp = mmd_getmultidata(md_mp_head); + pdesc_t *dl_pkt; + pdescinfo_t pinfo; + mblk_t *nmp; + zoneid_t szone = tcp->tcp_connp->conn_zoneid; + + for (dl_pkt = mmd_getfirstpdesc(dlmdp, &pinfo); + (dl_pkt != NULL); + dl_pkt = mmd_getnextpdesc(dl_pkt, &pinfo)) { + if ((nmp = mmd_transform_link(dl_pkt)) == NULL) + continue; + ipobs_hook(nmp, IPOBS_HOOK_OUTBOUND, szone, + ALL_ZONES, ill, tcp->tcp_ipversion, 0, ipst); + freemsg(nmp); + } + } + /* send it down */ if (ILL_DLS_CAPABLE(ill)) { ill_dls_capab_t *ill_dls = ill->ill_dls_capab; @@ -21437,7 +21473,7 @@ tcp_lsosend_data(tcp_t *tcp, mblk_t *mp, ire_t *ire, ill_t *ill, const int mss, * depending on the availability of transmit resources at * the media layer. */ - IP_DLS_ILL_TX(ill, ipha, mp, ipst); + IP_DLS_ILL_TX(ill, ipha, mp, ipst, ire_fp_mp_len); } else { ill_t *out_ill = (ill_t *)ire->ire_stq->q_ptr; DTRACE_PROBE4(ip4__physical__out__start, @@ -21449,6 +21485,13 @@ tcp_lsosend_data(tcp_t *tcp, mblk_t *mp, ire_t *ire, ill_t *ill, const int mss, DTRACE_PROBE1(ip4__physical__out__end, mblk_t *, mp); if (mp != NULL) { + if (ipst->ips_ipobs_enabled) { + zoneid_t szone = tcp->tcp_connp->conn_zoneid; + + ipobs_hook(mp, IPOBS_HOOK_OUTBOUND, szone, + ALL_ZONES, ill, tcp->tcp_ipversion, + ire_fp_mp_len, ipst); + } DTRACE_IP_FASTPATH(mp, ipha, out_ill, ipha, NULL); putnext(ire->ire_stq, mp); } diff --git a/usr/src/uts/common/inet/tcp/tcp_fusion.c b/usr/src/uts/common/inet/tcp/tcp_fusion.c index d7919c5b78..a192c7ad07 100644 --- a/usr/src/uts/common/inet/tcp/tcp_fusion.c +++ b/usr/src/uts/common/inet/tcp/tcp_fusion.c @@ -202,9 +202,12 @@ tcp_fuse(tcp_t *tcp, uchar_t *iphdr, tcph_t *tcph) * around until tcp_accept_finish() is called on this eager -- * this won't happen until we're done since we're inside the * eager's perimeter now. + * + * We can also get called in the case were a connection needs + * to be re-fused. In this case tcp_saved_listener will be + * NULL but tcp_refuse will be true. */ - ASSERT(tcp->tcp_saved_listener != NULL); - + ASSERT(tcp->tcp_saved_listener != NULL || tcp->tcp_refuse); /* * Lookup peer endpoint; search for the remote endpoint having * the reversed address-port quadruplet in ESTABLISHED state, @@ -329,36 +332,43 @@ tcp_fuse(tcp_t *tcp, uchar_t *iphdr, tcph_t *tcph) * inherit the listener's q_hiwat value; this is temporary * since we'll repeat the process in tcp_accept_finish(). */ - (void) tcp_fuse_set_rcv_hiwat(tcp, - tcp->tcp_saved_listener->tcp_rq->q_hiwat); + if (!tcp->tcp_refuse) { + (void) tcp_fuse_set_rcv_hiwat(tcp, + tcp->tcp_saved_listener->tcp_rq->q_hiwat); - /* - * Set the stream head's write offset value to zero since we - * won't be needing any room for TCP/IP headers; tell it to - * not break up the writes (this would reduce the amount of - * work done by kmem); and configure our receive buffer. - * Note that we can only do this for the active connect tcp - * since our eager is still detached; it will be dealt with - * later in tcp_accept_finish(). - */ - DB_TYPE(mp) = M_SETOPTS; - mp->b_wptr += sizeof (*stropt); + /* + * Set the stream head's write offset value to zero + * since we won't be needing any room for TCP/IP + * headers; tell it to not break up the writes (this + * would reduce the amount of work done by kmem); and + * configure our receive buffer. Note that we can only + * do this for the active connect tcp since our eager + * is still detached; it will be dealt with later in + * tcp_accept_finish(). + */ + DB_TYPE(mp) = M_SETOPTS; + mp->b_wptr += sizeof (*stropt); - stropt = (struct stroptions *)mp->b_rptr; - stropt->so_flags = SO_MAXBLK | SO_WROFF | SO_HIWAT; - stropt->so_maxblk = tcp_maxpsz_set(peer_tcp, B_FALSE); - stropt->so_wroff = 0; + stropt = (struct stroptions *)mp->b_rptr; + stropt->so_flags = SO_MAXBLK | SO_WROFF | SO_HIWAT; + stropt->so_maxblk = tcp_maxpsz_set(peer_tcp, B_FALSE); + stropt->so_wroff = 0; - /* - * Record the stream head's high water mark for - * peer endpoint; this is used for flow-control - * purposes in tcp_fuse_output(). - */ - stropt->so_hiwat = tcp_fuse_set_rcv_hiwat(peer_tcp, - peer_rq->q_hiwat); + /* + * Record the stream head's high water mark for + * peer endpoint; this is used for flow-control + * purposes in tcp_fuse_output(). + */ + stropt->so_hiwat = tcp_fuse_set_rcv_hiwat(peer_tcp, + peer_rq->q_hiwat); - /* Send the options up */ - putnext(peer_rq, mp); + tcp->tcp_refuse = B_FALSE; + peer_tcp->tcp_refuse = B_FALSE; + /* Send the options up */ + putnext(peer_rq, mp); + } + tcp->tcp_refuse = B_FALSE; + peer_tcp->tcp_refuse = B_FALSE; } else { TCP_STAT(tcps, tcp_fusion_unqualified); } @@ -410,6 +420,10 @@ tcp_unfuse(tcp_t *tcp) /* Unfuse the endpoints */ peer_tcp->tcp_fused = tcp->tcp_fused = B_FALSE; peer_tcp->tcp_loopback_peer = tcp->tcp_loopback_peer = NULL; + freeb(peer_tcp->tcp_fused_sigurg_mp); + freeb(tcp->tcp_fused_sigurg_mp); + peer_tcp->tcp_fused_sigurg_mp = NULL; + tcp->tcp_fused_sigurg_mp = NULL; } /* @@ -536,8 +550,27 @@ tcp_fuse_output(tcp_t *tcp, mblk_t *mp, uint32_t send_size) /* If this connection requires IP, unfuse and use regular path */ if (tcp_loopback_needs_ip(tcp, ns) || tcp_loopback_needs_ip(peer_tcp, ns) || - IPP_ENABLED(IPP_LOCAL_OUT|IPP_LOCAL_IN, ipst)) { + IPP_ENABLED(IPP_LOCAL_OUT|IPP_LOCAL_IN, ipst) || + list_head(&ipst->ips_ipobs_cb_list) != NULL) { TCP_STAT(tcps, tcp_fusion_aborted); + tcp->tcp_refuse = B_TRUE; + peer_tcp->tcp_refuse = B_TRUE; + + bcopy(peer_tcp->tcp_tcph, &tcp->tcp_saved_tcph, + sizeof (tcph_t)); + bcopy(tcp->tcp_tcph, &peer_tcp->tcp_saved_tcph, + sizeof (tcph_t)); + if (tcp->tcp_ipversion == IPV4_VERSION) { + bcopy(peer_tcp->tcp_ipha, &tcp->tcp_saved_ipha, + sizeof (ipha_t)); + bcopy(tcp->tcp_ipha, &peer_tcp->tcp_saved_ipha, + sizeof (ipha_t)); + } else { + bcopy(peer_tcp->tcp_ip6h, &tcp->tcp_saved_ip6h, + sizeof (ip6_t)); + bcopy(tcp->tcp_ip6h, &peer_tcp->tcp_saved_ip6h, + sizeof (ip6_t)); + } goto unfuse; } diff --git a/usr/src/uts/common/inet/udp/udp.c b/usr/src/uts/common/inet/udp/udp.c index f3e0a10172..3369ca915e 100644 --- a/usr/src/uts/common/inet/udp/udp.c +++ b/usr/src/uts/common/inet/udp/udp.c @@ -24,8 +24,6 @@ */ /* Copyright (c) 1990 Mentat Inc. */ -const char udp_version[] = "@(#)udp.c 1.206 08/10/17 SMI"; - #include <sys/types.h> #include <sys/stream.h> #include <sys/dlpi.h> @@ -80,6 +78,7 @@ const char udp_version[] = "@(#)udp.c 1.206 08/10/17 SMI"; #include <inet/ipclassifier.h> #include <inet/ipsec_impl.h> #include <inet/ipp_common.h> +#include <inet/ipnet.h> /* * The ipsec_info.h header file is here since it has the definition for the @@ -6341,7 +6340,7 @@ udp_xmit(queue_t *q, mblk_t *mp, ire_t *ire, conn_t *connp, zoneid_t zoneid) * depending on the availability of transmit resources at * the media layer. */ - IP_DLS_ILL_TX(ill, ipha, mp, ipst); + IP_DLS_ILL_TX(ill, ipha, mp, ipst, ire_fp_mp_len); } else { DTRACE_PROBE4(ip4__physical__out__start, ill_t *, NULL, ill_t *, ill, @@ -6351,13 +6350,18 @@ udp_xmit(queue_t *q, mblk_t *mp, ire_t *ire, conn_t *connp, zoneid_t zoneid) NULL, ill, ipha, mp, mp, ll_multicast, ipst); DTRACE_PROBE1(ip4__physical__out__end, mblk_t *, mp); if (mp != NULL) { + if (ipst->ips_ipobs_enabled) { + ipobs_hook(mp, IPOBS_HOOK_OUTBOUND, + IP_REAL_ZONEID(connp->conn_zoneid, ipst), + ALL_ZONES, ill, IPV4_VERSION, ire_fp_mp_len, + ipst); + } DTRACE_IP7(send, mblk_t *, mp, conn_t *, NULL, void_ip_t *, ipha, __dtrace_ipsr_ill_t *, ill, ipha_t *, ipha, ip6_t *, NULL, int, 0); putnext(ire->ire_stq, mp); } } - IRE_REFRELE(ire); } diff --git a/usr/src/uts/common/io/neti_impl.c b/usr/src/uts/common/io/neti_impl.c index c55fe49ba1..cfaa75d923 100644 --- a/usr/src/uts/common/io/neti_impl.c +++ b/usr/src/uts/common/io/neti_impl.c @@ -262,6 +262,32 @@ net_getlifaddr(net_handle_t info, phy_if_t nic, lif_if_t ifdata, nelem, type, storage)); } +int +net_getlifzone(net_handle_t info, phy_if_t phy_ifdata, lif_if_t ifdata, + zoneid_t *zoneid) +{ + ASSERT(info != NULL); + + if (info->netd_condemned != 0 || info->netd_stack == NULL) + return (-1); + + return (info->netd_info.neti_getlifzone(info, phy_ifdata, ifdata, + zoneid)); +} + +int +net_getlifflags(net_handle_t info, phy_if_t phy_ifdata, lif_if_t ifdata, + uint64_t *flags) +{ + ASSERT(info != NULL); + + if (info->netd_condemned != 0 || info->netd_stack == NULL) + return (-1); + + return (info->netd_info.neti_getlifflags(info, phy_ifdata, ifdata, + flags)); +} + phy_if_t net_phygetnext(net_handle_t info, phy_if_t nic) { diff --git a/usr/src/uts/common/io/sundlpi.c b/usr/src/uts/common/io/sundlpi.c index 43d5db0e5e..af22d70ec5 100644 --- a/usr/src/uts/common/io/sundlpi.c +++ b/usr/src/uts/common/io/sundlpi.c @@ -23,8 +23,6 @@ * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" - /* * Common Sun DLPI routines. */ @@ -583,6 +581,7 @@ dl_mactypestr(t_uscalar_t mactype) case DL_IPV4: return ("IPv4 Tunnel"); case DL_IPV6: return ("IPv6 Tunnel"); case DL_WIFI: return ("IEEE 802.11"); + case DL_IPNET: return ("IPNET"); default: return ("<unknown mactype>"); } } diff --git a/usr/src/uts/common/os/priv_defs b/usr/src/uts/common/os/priv_defs index 82d93a5250..430218de55 100644 --- a/usr/src/uts/common/os/priv_defs +++ b/usr/src/uts/common/os/priv_defs @@ -223,6 +223,11 @@ privilege PRIV_NET_MAC_AWARE This privilege is interpreted only if the system is configured with Trusted Extensions. +privilege PRIV_NET_OBSERVABILITY + + Allows a process to access /dev/lo0 and the devices in /dev/ipnet/ + while not requiring them to need PRIV_NET_RAWACCESS. + privilege PRIV_NET_PRIVADDR Allows a process to bind to a privileged port diff --git a/usr/src/uts/common/sys/dlpi.h b/usr/src/uts/common/sys/dlpi.h index 355280f920..3af7b7bca7 100644 --- a/usr/src/uts/common/sys/dlpi.h +++ b/usr/src/uts/common/sys/dlpi.h @@ -35,11 +35,8 @@ #ifndef _SYS_DLPI_H #define _SYS_DLPI_H -#pragma ident "%Z%%M% %I% %E% SMI" /* SVr4.0 1.2 */ - #include <sys/types.h> #include <sys/stream.h> - #ifdef __cplusplus extern "C" { #endif @@ -51,10 +48,21 @@ extern "C" { #define DLIOCRAW (DLIOC|1) /* M_DATA "raw" mode */ #define DLIOCNATIVE (DLIOC|2) /* Native traffic mode */ #define DLIOCMARGININFO (DLIOC|3) /* margin size info */ +#define DLIOCIPNETINFO (DLIOC|4) /* ipnet header */ #define DLIOCHDRINFO (DLIOC|10) /* IP fast-path */ - #define DL_IOC_HDR_INFO DLIOCHDRINFO +#define DL_IPNETINFO_VERSION 0x1 + +typedef struct dl_ipnetinfo { + uint8_t dli_version; /* DL_IPNETINFO_* version */ + uint8_t dli_ipver; /* packet IP header version */ + uint16_t dli_len; /* length of dl_ipnetinfo_t */ + uint32_t dli_pad; /* alignment pad */ + uint64_t dli_srczone; /* packet source zone ID (if any) */ + uint64_t dli_dstzone; /* packet dest zone ID (if any) */ +} dl_ipnetinfo_t; + /* * DLPI revision definition history */ @@ -264,7 +272,7 @@ extern "C" { #define DL_IPV6 0x80000002ul /* IPv6 Tunnel Link */ #define SUNW_DL_VNI 0x80000003ul /* Virtual network interface */ #define DL_WIFI 0x80000004ul /* IEEE 802.11 */ - +#define DL_IPNET 0x80000005ul /* ipnet(7D) link */ /* * DLPI provider service supported. * These must be allowed to be bitwise-OR for dl_service_mode in diff --git a/usr/src/uts/common/sys/fs/sdev_impl.h b/usr/src/uts/common/sys/fs/sdev_impl.h index 7bf24fb96a..10ac3b60b6 100644 --- a/usr/src/uts/common/sys/fs/sdev_impl.h +++ b/usr/src/uts/common/sys/fs/sdev_impl.h @@ -271,6 +271,7 @@ typedef enum { #define SDEV_VTOR_INVALID -1 #define SDEV_VTOR_SKIP 0 #define SDEV_VTOR_VALID 1 +#define SDEV_VTOR_STALE 2 /* convenient macros */ #define SDEV_IS_GLOBAL(dv) \ @@ -626,6 +627,7 @@ extern int prof_lookup(); extern void prof_filldir(struct sdev_node *); extern int devpts_validate(struct sdev_node *dv); extern int devnet_validate(struct sdev_node *dv); +extern int devipnet_validate(struct sdev_node *dv); extern int devvt_validate(struct sdev_node *dv); extern void *sdev_get_vtor(struct sdev_node *dv); @@ -657,11 +659,13 @@ extern kmem_cache_t *sdev_node_cache; extern struct vnodeops *sdev_vnodeops; extern struct vnodeops *devpts_vnodeops; extern struct vnodeops *devnet_vnodeops; +extern struct vnodeops *devipnet_vnodeops; extern struct vnodeops *devvt_vnodeops; extern struct sdev_data *sdev_origins; /* mount info for global /dev instance */ extern const fs_operation_def_t sdev_vnodeops_tbl[]; extern const fs_operation_def_t devpts_vnodeops_tbl[]; extern const fs_operation_def_t devnet_vnodeops_tbl[]; +extern const fs_operation_def_t devipnet_vnodeops_tbl[]; extern const fs_operation_def_t devvt_vnodeops_tbl[]; extern const fs_operation_def_t devsys_vnodeops_tbl[]; extern const fs_operation_def_t devpseudo_vnodeops_tbl[]; diff --git a/usr/src/uts/common/sys/hook_event.h b/usr/src/uts/common/sys/hook_event.h index df99a67956..f9f29c845b 100644 --- a/usr/src/uts/common/sys/hook_event.h +++ b/usr/src/uts/common/sys/hook_event.h @@ -74,7 +74,9 @@ typedef enum nic_event { NE_UNPLUMB, NE_UP, NE_DOWN, - NE_ADDRESS_CHANGE + NE_ADDRESS_CHANGE, + NE_LIF_UP, + NE_LIF_DOWN } nic_event_t; typedef void *nic_event_data_t; diff --git a/usr/src/uts/common/sys/neti.h b/usr/src/uts/common/sys/neti.h index db4a74dd3d..33276bf3c0 100644 --- a/usr/src/uts/common/sys/neti.h +++ b/usr/src/uts/common/sys/neti.h @@ -127,6 +127,10 @@ struct net_protocol_s { int (*netp_getpmtuenabled)(net_handle_t); int (*netp_getlifaddr)(net_handle_t, phy_if_t, lif_if_t, size_t, net_ifaddr_t [], void *); + int (*neti_getlifzone)(net_handle_t, phy_if_t, lif_if_t, + zoneid_t *); + int (*neti_getlifflags)(net_handle_t, phy_if_t, lif_if_t, + uint64_t *); phy_if_t (*netp_phygetnext)(net_handle_t, phy_if_t); phy_if_t (*netp_phylookup)(net_handle_t, const char *); lif_if_t (*netp_lifgetnext)(net_handle_t, phy_if_t, lif_if_t); @@ -274,6 +278,8 @@ extern int net_getmtu(net_handle_t, phy_if_t, lif_if_t); extern int net_getpmtuenabled(net_handle_t); extern int net_getlifaddr(net_handle_t, phy_if_t, lif_if_t, int, net_ifaddr_t [], void *); +extern zoneid_t net_getlifzone(net_handle_t, phy_if_t, lif_if_t, zoneid_t *); +extern int net_getlifflags(net_handle_t, phy_if_t, lif_if_t, uint64_t *); extern phy_if_t net_phygetnext(net_handle_t, phy_if_t); extern phy_if_t net_phylookup(net_handle_t, const char *); extern lif_if_t net_lifgetnext(net_handle_t, phy_if_t, lif_if_t); diff --git a/usr/src/uts/common/sys/netstack.h b/usr/src/uts/common/sys/netstack.h index 55acab834c..e44efab458 100644 --- a/usr/src/uts/common/sys/netstack.h +++ b/usr/src/uts/common/sys/netstack.h @@ -78,7 +78,8 @@ typedef id_t netstackid_t; #define NS_IPSECAH 13 #define NS_IPSECESP 14 #define NS_TUN 15 -#define NS_MAX (NS_TUN+1) +#define NS_IPNET 16 +#define NS_MAX (NS_IPNET+1) /* * State maintained for each module which tracks the state of @@ -151,6 +152,7 @@ struct netstack { struct ipsecah_stack *nu_ipsecah; struct ipsecesp_stack *nu_ipsecesp; struct tun_stack *nu_tun; + struct ipnet_stack *nu_ipnet; } nu_s; } netstack_u; #define netstack_modules netstack_u.nu_modules @@ -170,6 +172,7 @@ struct netstack { #define netstack_ipsecah netstack_u.nu_s.nu_ipsecah #define netstack_ipsecesp netstack_u.nu_s.nu_ipsecesp #define netstack_tun netstack_u.nu_s.nu_tun +#define netstack_ipnet netstack_u.nu_s.nu_ipnet nm_state_t netstack_m_state[NS_MAX]; /* module state */ diff --git a/usr/src/uts/common/sys/sysmacros.h b/usr/src/uts/common/sys/sysmacros.h index 9695982dcc..89a672db2f 100644 --- a/usr/src/uts/common/sys/sysmacros.h +++ b/usr/src/uts/common/sys/sysmacros.h @@ -57,6 +57,9 @@ extern "C" { #ifndef ABS #define ABS(a) ((a) < 0 ? -(a) : (a)) #endif +#ifndef SIGNOF +#define SIGNOF(a) ((a) < 0 ? -1 : (a) > 0) +#endif #ifdef _KERNEL diff --git a/usr/src/uts/intel/Makefile.intel.shared b/usr/src/uts/intel/Makefile.intel.shared index 84808ad5aa..0eba71bc6f 100644 --- a/usr/src/uts/intel/Makefile.intel.shared +++ b/usr/src/uts/intel/Makefile.intel.shared @@ -234,6 +234,7 @@ DRV_KMODS += intel_nb5000 DRV_KMODS += ip DRV_KMODS += ip6 DRV_KMODS += ipf +DRV_KMODS += ipnet DRV_KMODS += ippctl DRV_KMODS += ipsecah DRV_KMODS += ipsecesp @@ -643,7 +644,7 @@ GENUNIX_KMODS += genunix # delivered with a released system. However, during development # it is convenient to build and install the SVVS kernel modules. # -SVVS_KMODS += lmodb lmode lmodr lmodt lo tidg tivc tmux +SVVS_KMODS += lmodb lmode lmodr lmodt svvslo tidg tivc tmux SVVS += svvs diff --git a/usr/src/uts/intel/dev/Makefile b/usr/src/uts/intel/dev/Makefile index 78e6685df7..a2255e0197 100644 --- a/usr/src/uts/intel/dev/Makefile +++ b/usr/src/uts/intel/dev/Makefile @@ -23,8 +23,6 @@ # Copyright 2008 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # -#ident "%Z%%M% %I% %E% SMI" -# # This makefile drives the production of the dev file system # kernel module. # diff --git a/usr/src/uts/intel/ia32/ml/modstubs.s b/usr/src/uts/intel/ia32/ml/modstubs.s index a1352a260b..038b90918e 100644 --- a/usr/src/uts/intel/ia32/ml/modstubs.s +++ b/usr/src/uts/intel/ia32/ml/modstubs.s @@ -1330,6 +1330,13 @@ fcnname/**/_info: \ END_MODULE(dcopy); #endif +#ifndef IPNET_MODULE + MODULE(ipnet,drv); + STUB(ipnet, ipnet_if_getdev, nomod_zero); + STUB(ipnet, ipnet_walk_if, nomod_zero); + END_MODULE(ipnet); +#endif + / this is just a marker for the area of text that contains stubs ENTRY_NP(stubs_end) diff --git a/usr/src/uts/intel/ip/ip.global-objs.debug64 b/usr/src/uts/intel/ip/ip.global-objs.debug64 index 01f8be179d..5854497325 100644 --- a/usr/src/uts/intel/ip/ip.global-objs.debug64 +++ b/usr/src/uts/intel/ip/ip.global-objs.debug64 @@ -337,7 +337,6 @@ udp_random_anon_port udp_rinitv4 udp_rinitv6 udp_valid_levels_arr -udp_version udp_winit udpinfov4 udpinfov6 diff --git a/usr/src/uts/intel/ip/ip.global-objs.obj64 b/usr/src/uts/intel/ip/ip.global-objs.obj64 index 17712b7c14..065904b585 100644 --- a/usr/src/uts/intel/ip/ip.global-objs.obj64 +++ b/usr/src/uts/intel/ip/ip.global-objs.obj64 @@ -326,7 +326,6 @@ udp_random_anon_port udp_rinitv4 udp_rinitv6 udp_valid_levels_arr -udp_version udp_winit udpinfov4 udpinfov6 diff --git a/usr/src/uts/intel/ipnet/Makefile b/usr/src/uts/intel/ipnet/Makefile new file mode 100644 index 0000000000..a4be7c1ee6 --- /dev/null +++ b/usr/src/uts/intel/ipnet/Makefile @@ -0,0 +1,105 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2008 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# This makefile drives the production of the ipnet driver +# kernel module. +# +# intel architecture dependent +# + +# +# Path to the base of the uts directory tree (usually /usr/src/uts). +# + +UTSBASE = ../.. + +# +# Define the module and object file sets. +# + +MODULE = ipnet +OBJECTS = $(IPNET_OBJS:%=$(OBJS_DIR)/%) +LINTS = $(IPNET_OBJS:%.o=$(LINTS_DIR)/%.ln) +ROOTMODULE = $(ROOT_DRV_DIR)/$(MODULE) +CONF_SRCDIR = $(UTSBASE)/common/inet/ipnet + +# +# Include common rules. +# + +include $(UTSBASE)/intel/Makefile.intel + +# +# Define targets +# + +ALL_TARGET = $(BINARY) $(SRC_CONFFILE) +LINT_TARGET = $(MODULE).lint +INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE) + +# +# lint pass one enforcement +# + +CFLAGS += $(CCVERBOSE) + +# +# STREAMS API limitations force us to turn off these lint checks. +# +LINTTAGS += -erroff=E_BAD_PTR_CAST_ALIGN +LINTTAGS += -erroff=E_PTRDIFF_OVERFLOW + +# +# Depends on ip, neti, and hook. +# +LDFLAGS += -dy -Ndrv/ip -Nmisc/neti -Nmisc/hook + +# +# Default build targets. +# + +.KEEP_STATE: + +def: $(DEF_DEPS) + +all: $(ALL_DEPS) + +clean: $(CLEAN_DEPS) + +clobber: $(CLOBBER_DEPS) + +lint: $(LINT_DEPS) + +modlintlib: $(MODLINTLIB_DEPS) + +clean.lint: $(CLEAN_LINT_DEPS) + +install: $(INSTALL_DEPS) + +# +# Include common targets. +# + +include $(UTSBASE)/intel/Makefile.targ diff --git a/usr/src/uts/intel/os/device_policy b/usr/src/uts/intel/os/device_policy index 132881289b..6b8060838d 100644 --- a/usr/src/uts/intel/os/device_policy +++ b/usr/src/uts/intel/os/device_policy @@ -65,6 +65,12 @@ softmac read_priv_set=net_rawaccess write_priv_set=net_rawaccess # Virtual network interface access permission # vni read_priv_set=net_rawaccess write_priv_set=net_rawaccess + +# +# IP observability device access permission +# +ipnet read_priv_set=net_observability write_priv_set=net_observability + # # Disk devices. # diff --git a/usr/src/uts/intel/os/minor_perm b/usr/src/uts/intel/os/minor_perm index f37fe98a00..3cb8e090b6 100644 --- a/usr/src/uts/intel/os/minor_perm +++ b/usr/src/uts/intel/os/minor_perm @@ -43,9 +43,9 @@ fasttrap:fasttrap 0666 root sys fbt:fbt 0644 root sys fd:* 0666 root sys id:* 0640 root sys +ipnet:lo0 0666 root sys kstat:* 0666 root sys ksyms:* 0666 root sys -lo:* 0666 root sys lockstat:* 0644 root sys lofi:* 0600 root sys lofi:ctl 0644 root sys @@ -73,6 +73,7 @@ sgen:* 0600 root sys cmdk:* 0640 root sys st:* 0666 root sys sdt:sdt 0644 root sys +svvslo:* 0666 root sys sy:tty 0666 root tty sysmsg:msglog 0600 root sys sysmsg:sysmsg 0600 root sys diff --git a/usr/src/uts/intel/os/name_to_major b/usr/src/uts/intel/os/name_to_major index c30f70c0b8..e666846c08 100644 --- a/usr/src/uts/intel/os/name_to_major +++ b/usr/src/uts/intel/os/name_to_major @@ -23,7 +23,7 @@ udp 41 tcp 42 rts 43 arp 44 -lo 46 +svvslo 46 tivc 47 tidg 48 tmux 49 @@ -153,3 +153,4 @@ fm 257 amd_iommu 258 xpvtap 259 nulldriver 260 +ipnet 261 diff --git a/usr/src/uts/sparc/Makefile.sparc.shared b/usr/src/uts/sparc/Makefile.sparc.shared index eae5cf44a6..80a188f75a 100644 --- a/usr/src/uts/sparc/Makefile.sparc.shared +++ b/usr/src/uts/sparc/Makefile.sparc.shared @@ -204,7 +204,7 @@ ALL_DEFS = $(MACHINE_DEFS) $(DEBUG_DEFS) $(OPTION_DEFS) DRV_KMODS += aggr arp bl bofi clone cn conskbd consms cpuid DRV_KMODS += crypto cryptoadm devinfo dump DRV_KMODS += dtrace fasttrap fbt lockstat profile sdt systrace -DRV_KMODS += fssnap icmp icmp6 ip ip6 ipsecah +DRV_KMODS += fssnap icmp icmp6 ip ip6 ipnet ipsecah DRV_KMODS += ipsecesp iwscn keysock kmdb kstat ksyms llc1 DRV_KMODS += lofi DRV_KMODS += log logindmux kssl mm nca physmem pm poll pool @@ -453,7 +453,7 @@ DACF_KMODS += consconfig_dacf # delivered with a released system. However, during development # it is convenient to build and install the SVVS kernel modules. # -SVVS_KMODS += lmodb lmode lmodr lmodt lo tidg tivc tmux +SVVS_KMODS += lmodb lmode lmodr lmodt svvslo tidg tivc tmux SVVS += svvs diff --git a/usr/src/uts/sparc/dev/Makefile b/usr/src/uts/sparc/dev/Makefile index ab70028322..87591f6852 100644 --- a/usr/src/uts/sparc/dev/Makefile +++ b/usr/src/uts/sparc/dev/Makefile @@ -21,8 +21,6 @@ # Copyright 2008 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # -#ident "%Z%%M% %I% %E% SMI" -# # uts/sparc/dev/Makefile # This makefile drives the production of the /dev file system # kernel module. diff --git a/usr/src/uts/sparc/ip/ip.global-objs.debug64 b/usr/src/uts/sparc/ip/ip.global-objs.debug64 index 01f8be179d..5854497325 100644 --- a/usr/src/uts/sparc/ip/ip.global-objs.debug64 +++ b/usr/src/uts/sparc/ip/ip.global-objs.debug64 @@ -337,7 +337,6 @@ udp_random_anon_port udp_rinitv4 udp_rinitv6 udp_valid_levels_arr -udp_version udp_winit udpinfov4 udpinfov6 diff --git a/usr/src/uts/sparc/ip/ip.global-objs.obj64 b/usr/src/uts/sparc/ip/ip.global-objs.obj64 index 17712b7c14..065904b585 100644 --- a/usr/src/uts/sparc/ip/ip.global-objs.obj64 +++ b/usr/src/uts/sparc/ip/ip.global-objs.obj64 @@ -326,7 +326,6 @@ udp_random_anon_port udp_rinitv4 udp_rinitv6 udp_valid_levels_arr -udp_version udp_winit udpinfov4 udpinfov6 diff --git a/usr/src/uts/sparc/ipnet/Makefile b/usr/src/uts/sparc/ipnet/Makefile new file mode 100644 index 0000000000..3140f5581e --- /dev/null +++ b/usr/src/uts/sparc/ipnet/Makefile @@ -0,0 +1,111 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2008 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# This makefile drives the production of the ipnet driver +# kernel module. +# +# sparc architecture dependent +# + +# +# Path to the base of the uts directory tree (usually /usr/src/uts). +# + +UTSBASE = ../.. + +# +# Define the module and object file sets. +# + +MODULE = ipnet +OBJECTS = $(IPNET_OBJS:%=$(OBJS_DIR)/%) +LINTS = $(IPNET_OBJS:%.o=$(LINTS_DIR)/%.ln) +ROOTMODULE = $(ROOT_DRV_DIR)/$(MODULE) +CONF_SRCDIR = $(UTSBASE)/common/inet/ipnet + +# +# Include common rules. +# + +include $(UTSBASE)/sparc/Makefile.sparc + +# +# Define targets +# + +ALL_TARGET = $(BINARY) $(SRC_CONFFILE) +LINT_TARGET = $(MODULE).lint +INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE) + +# +# lint pass one enforcement +# + +CFLAGS += $(CCVERBOSE) + +# +# lint pass one enforcement +# + +CFLAGS += $(CCVERBOSE) + +# +# STREAMS API limitations force us to turn off these lint checks. +# +LINTTAGS += -erroff=E_BAD_PTR_CAST_ALIGN +LINTTAGS += -erroff=E_PTRDIFF_OVERFLOW + +# +# Depends on ip, neti, and hook. +# +LDFLAGS += -dy -Ndrv/ip -Nmisc/neti -Nmisc/hook + +# +# Default build targets. +# + +.KEEP_STATE: + +def: $(DEF_DEPS) + +all: $(ALL_DEPS) + +clean: $(CLEAN_DEPS) + +clobber: $(CLOBBER_DEPS) + +lint: $(LINT_DEPS) + +modlintlib: $(MODLINTLIB_DEPS) + +clean.lint: $(CLEAN_LINT_DEPS) + +install: $(INSTALL_DEPS) + +# +# Include common targets. +# + +include $(UTSBASE)/sparc/Makefile.targ diff --git a/usr/src/uts/sparc/ml/modstubs.s b/usr/src/uts/sparc/ml/modstubs.s index 15377f736c..e45cd91325 100644 --- a/usr/src/uts/sparc/ml/modstubs.s +++ b/usr/src/uts/sparc/ml/modstubs.s @@ -1282,6 +1282,13 @@ stubs_base: END_MODULE(dcopy); #endif +#ifndef IPNET_MODULE + MODULE(ipnet,drv); + STUB(ipnet, ipnet_if_getdev, nomod_zero); + STUB(ipnet, ipnet_walk_if, nomod_zero); + END_MODULE(ipnet); +#endif + ! this is just a marker for the area of text that contains stubs .seg ".text" .global stubs_end diff --git a/usr/src/uts/sparc/os/device_policy b/usr/src/uts/sparc/os/device_policy index 25da20334d..8cbf702e68 100644 --- a/usr/src/uts/sparc/os/device_policy +++ b/usr/src/uts/sparc/os/device_policy @@ -68,6 +68,12 @@ softmac read_priv_set=net_rawaccess write_priv_set=net_rawaccess # Virtual network interface access permission # vni read_priv_set=net_rawaccess write_priv_set=net_rawaccess + +# +# IP observability device access permission +# +ipnet read_priv_set=net_observability write_priv_set=net_observability + # # Disk devices. # diff --git a/usr/src/uts/sparc/os/minor_perm b/usr/src/uts/sparc/os/minor_perm index a3e0b2349f..8e4feb66d2 100644 --- a/usr/src/uts/sparc/os/minor_perm +++ b/usr/src/uts/sparc/os/minor_perm @@ -44,9 +44,9 @@ fd:* 0666 root sys fasttrap:fasttrap 0666 root sys fssnap:* 0640 root sys fssnap:ctl 0666 root sys +ipnet:lo0 0666 root sys kstat:* 0666 root sys ksyms:* 0666 root sys -lo:* 0666 root sys lockstat:* 0644 root sys lofi:* 0600 root sys lofi:ctl 0644 root sys @@ -82,6 +82,7 @@ su:[a-z],cu 0600 uucp uucp su:ssp 0600 root sys su:sspctl 0600 root sys SUNW,fdtwo:* 0666 root sys +svvslo:* 0666 root sys sy:tty 0666 root tty sysmsg:msglog 0600 root sys sysmsg:sysmsg 0600 root sys diff --git a/usr/src/uts/sparc/os/name_to_major b/usr/src/uts/sparc/os/name_to_major index 37821178ea..ff58cf5113 100644 --- a/usr/src/uts/sparc/os/name_to_major +++ b/usr/src/uts/sparc/os/name_to_major @@ -39,7 +39,7 @@ udp 41 tcp 42 rts 43 arp 44 -lo 46 +svvslo 46 tivc 47 tidg 48 tmux 49 @@ -226,3 +226,4 @@ mem_cache 277 bmc 278 fm 279 nulldriver 280 +ipnet 281 diff --git a/usr/src/xmod/xmod_files b/usr/src/xmod/xmod_files index 6d262993b9..eaca638b7d 100644 --- a/usr/src/xmod/xmod_files +++ b/usr/src/xmod/xmod_files @@ -4,9 +4,9 @@ ../closed/uts/common/io/lmode.c ../closed/uts/common/io/lmodr.c ../closed/uts/common/io/lmodt.c -../closed/uts/common/io/lo.c -../closed/uts/common/io/lo.conf -../closed/uts/common/sys/lo.h +../closed/uts/common/io/svvslo.c +../closed/uts/common/io/svvslo.conf +../closed/uts/common/sys/svvslo.h ../closed/uts/common/io/tidg.c ../closed/uts/common/sys/tidg.h ../closed/uts/common/io/tidg.conf |