summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDarren Reed <Darren.Reed@Sun.COM>2009-09-24 07:28:12 -0700
committerDarren Reed <Darren.Reed@Sun.COM>2009-09-24 07:28:12 -0700
commit0a0e9771ca0211c15f3ac4466b661c145feeb9e4 (patch)
tree9579700b2fec7e9c9c57beeca83bda8681eb9e7a
parentaf1222373b60d56d6b0e630911372d4162b7787b (diff)
downloadillumos-joyent-0a0e9771ca0211c15f3ac4466b661c145feeb9e4.tar.gz
PSARC/2009/232 Solaris Packet Capture
PSARC/2009/403 kstats for ipnet 6824047 every downcall function should have a "notsupported" function 6822740 RFE: provide PF_PACKET for developers on OpenSolaris 6822741 RFE: Solaris needs BPF to improve the packet capture story 6867683 RFE: need to be able to retrieve physical interface flags
-rw-r--r--usr/src/cmd/cmd-inet/etc/sock2path2
-rw-r--r--usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_ether.c20
-rw-r--r--usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_filter.c20
-rw-r--r--usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_pf.c50
-rw-r--r--usr/src/cmd/devfsadm/misc_link.c3
-rw-r--r--usr/src/cmd/truss/systable.c5
-rw-r--r--usr/src/lib/brand/native/zone/platform.xml1
-rw-r--r--usr/src/pkgdefs/Makefile2
-rw-r--r--usr/src/pkgdefs/SUNWpacketh/Makefile37
-rw-r--r--usr/src/pkgdefs/SUNWpacketh/pkginfo.tmpl56
-rw-r--r--usr/src/pkgdefs/SUNWpacketh/prototype_com51
-rw-r--r--usr/src/pkgdefs/SUNWpacketh/prototype_i38638
-rw-r--r--usr/src/pkgdefs/SUNWpacketh/prototype_sparc38
-rw-r--r--usr/src/pkgdefs/SUNWpacketu/Makefile38
-rw-r--r--usr/src/pkgdefs/SUNWpacketu/pkginfo.tmpl56
-rw-r--r--usr/src/pkgdefs/SUNWpacketu/postinstall.tmpl29
-rw-r--r--usr/src/pkgdefs/SUNWpacketu/preremove.tmpl29
-rw-r--r--usr/src/pkgdefs/SUNWpacketu/prototype_com47
-rw-r--r--usr/src/pkgdefs/SUNWpacketu/prototype_i38654
-rw-r--r--usr/src/pkgdefs/SUNWpacketu/prototype_sparc53
-rw-r--r--usr/src/tools/scripts/bfu.sh33
-rw-r--r--usr/src/uts/Makefile2
-rw-r--r--usr/src/uts/Makefile.targ3
-rw-r--r--usr/src/uts/Makefile.uts1
-rw-r--r--usr/src/uts/common/Makefile.files9
-rw-r--r--usr/src/uts/common/Makefile.rules7
-rw-r--r--usr/src/uts/common/fs/sockfs/sock_notsupp.c178
-rw-r--r--usr/src/uts/common/inet/ip.h58
-rw-r--r--usr/src/uts/common/inet/ip/ip.c180
-rw-r--r--usr/src/uts/common/inet/ip/ip6.c26
-rw-r--r--usr/src/uts/common/inet/ip/ip_netinfo.c60
-rw-r--r--usr/src/uts/common/inet/ip_stack.h24
-rw-r--r--usr/src/uts/common/inet/ipnet.h81
-rw-r--r--usr/src/uts/common/inet/ipnet/ipnet.c1026
-rw-r--r--usr/src/uts/common/inet/ipnet/ipnet_bpf.c193
-rw-r--r--usr/src/uts/common/inet/sockmods/netpacket/Makefile47
-rw-r--r--usr/src/uts/common/inet/sockmods/netpacket/packet.h203
-rw-r--r--usr/src/uts/common/inet/sockmods/sockmod_pfp.c1414
-rw-r--r--usr/src/uts/common/inet/tcp/tcp.c35
-rw-r--r--usr/src/uts/common/inet/tcp/tcp_fusion.c5
-rw-r--r--usr/src/uts/common/inet/udp/udp.c12
-rw-r--r--usr/src/uts/common/io/bpf/BPF.LICENCE33
-rw-r--r--usr/src/uts/common/io/bpf/bpf.c2047
-rw-r--r--usr/src/uts/common/io/bpf/bpf.conf26
-rw-r--r--usr/src/uts/common/io/bpf/bpf_dlt.c103
-rw-r--r--usr/src/uts/common/io/bpf/bpf_filter.c576
-rw-r--r--usr/src/uts/common/io/bpf/bpf_mac.c165
-rw-r--r--usr/src/uts/common/io/bpf/bpf_mod.c443
-rw-r--r--usr/src/uts/common/io/bpf/net/Makefile47
-rw-r--r--usr/src/uts/common/io/bpf/net/bpf.h298
-rw-r--r--usr/src/uts/common/io/bpf/net/bpfdesc.h235
-rw-r--r--usr/src/uts/common/io/bpf/net/dlt.h170
-rw-r--r--usr/src/uts/common/io/dls/dls_link.c38
-rw-r--r--usr/src/uts/common/io/dls/dls_mgmt.c55
-rw-r--r--usr/src/uts/common/io/mac/mac_client.c44
-rw-r--r--usr/src/uts/common/net/if.h2
-rw-r--r--usr/src/uts/common/os/netstack.c8
-rw-r--r--usr/src/uts/common/os/policy.c6
-rw-r--r--usr/src/uts/common/sys/dlpi.h12
-rw-r--r--usr/src/uts/common/sys/dls_impl.h7
-rw-r--r--usr/src/uts/common/sys/hook_event.h53
-rw-r--r--usr/src/uts/common/sys/mac.h2
-rw-r--r--usr/src/uts/common/sys/mac_client.h3
-rw-r--r--usr/src/uts/common/sys/mac_client_impl.h1
-rw-r--r--usr/src/uts/common/sys/neti.h3
-rw-r--r--usr/src/uts/common/sys/netstack.h1
-rw-r--r--usr/src/uts/common/sys/policy.h1
-rw-r--r--usr/src/uts/common/sys/socket.h19
-rw-r--r--usr/src/uts/common/sys/socket_impl.h31
-rw-r--r--usr/src/uts/common/sys/socket_proto.h32
-rw-r--r--usr/src/uts/common/sys/sockio.h3
-rw-r--r--usr/src/uts/intel/Makefile.intel.shared2
-rw-r--r--usr/src/uts/intel/bpf/Makefile97
-rw-r--r--usr/src/uts/intel/dev/Makefile1
-rw-r--r--usr/src/uts/intel/dld/Makefile5
-rw-r--r--usr/src/uts/intel/dls/Makefile4
-rw-r--r--usr/src/uts/intel/ip/Makefile8
-rw-r--r--usr/src/uts/intel/ipnet/Makefile7
-rw-r--r--usr/src/uts/intel/iptun/Makefile1
-rw-r--r--usr/src/uts/intel/mac/Makefile3
-rw-r--r--usr/src/uts/intel/mac_ether/Makefile6
-rw-r--r--usr/src/uts/intel/mac_ib/Makefile6
-rw-r--r--usr/src/uts/intel/mac_wifi/Makefile6
-rw-r--r--usr/src/uts/intel/os/minor_perm1
-rw-r--r--usr/src/uts/intel/os/name_to_major1
-rw-r--r--usr/src/uts/intel/sockpfp/Makefile95
-rw-r--r--usr/src/uts/intel/spdsock/Makefile6
-rw-r--r--usr/src/uts/sparc/Makefile.sparc.shared2
-rw-r--r--usr/src/uts/sparc/bpf/Makefile97
-rw-r--r--usr/src/uts/sparc/dev/Makefile1
-rw-r--r--usr/src/uts/sparc/dld/Makefile5
-rw-r--r--usr/src/uts/sparc/dls/Makefile4
-rw-r--r--usr/src/uts/sparc/ip/Makefile6
-rw-r--r--usr/src/uts/sparc/ipnet/Makefile7
-rw-r--r--usr/src/uts/sparc/iptun/Makefile1
-rw-r--r--usr/src/uts/sparc/mac/Makefile3
-rw-r--r--usr/src/uts/sparc/mac_ether/Makefile6
-rw-r--r--usr/src/uts/sparc/mac_ib/Makefile6
-rw-r--r--usr/src/uts/sparc/mac_wifi/Makefile6
-rw-r--r--usr/src/uts/sparc/os/minor_perm1
-rw-r--r--usr/src/uts/sparc/os/name_to_major1
-rw-r--r--usr/src/uts/sparc/sockpfp/Makefile96
-rw-r--r--usr/src/uts/sparc/spdsock/Makefile9
103 files changed, 8726 insertions, 464 deletions
diff --git a/usr/src/cmd/cmd-inet/etc/sock2path b/usr/src/cmd/cmd-inet/etc/sock2path
index cfcfe8bc4e..a56b540b1b 100644
--- a/usr/src/cmd/cmd-inet/etc/sock2path
+++ b/usr/src/cmd/cmd-inet/etc/sock2path
@@ -54,3 +54,5 @@
29 4 1 /dev/spdsock
31 1 0 trill
+ 32 1 0 sockpfp
+ 32 4 0 sockpfp
diff --git a/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_ether.c b/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_ether.c
index 2fcb69bf8a..5c6bde0cd6 100644
--- a/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_ether.c
+++ b/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_ether.c
@@ -1689,15 +1689,15 @@ interpret_ipnet(int flags, char *header, int elen, int origlen)
datalen = blen;
}
- if (dl.dli_srczone == ALL_ZONES)
+ if (dl.dli_zsrc == ALL_ZONES)
sprintf(szone, "Unknown");
else
- sprintf(szone, "%llu", BE_64(dl.dli_srczone));
+ sprintf(szone, "%lu", BE_32(dl.dli_zsrc));
- if (dl.dli_dstzone == ALL_ZONES)
+ if (dl.dli_zdst == ALL_ZONES)
sprintf(dzone, "Unknown");
else
- sprintf(dzone, "%llu", BE_64(dl.dli_dstzone));
+ sprintf(dzone, "%lu", BE_32(dl.dli_zdst));
if (flags & F_SUM) {
(void) snprintf(get_sum_line(), MAXLINE,
@@ -1718,20 +1718,20 @@ interpret_ipnet(int flags, char *header, int elen, int origlen)
(void) snprintf(get_line(0, 0), get_line_remain(),
"dli_version = %d", dl.dli_version);
(void) snprintf(get_line(0, 0), get_line_remain(),
- "dli_type = %d", dl.dli_ipver);
+ "dli_family = %d", dl.dli_family);
(void) snprintf(get_line(0, 2), get_line_remain(),
- "dli_srczone = %s", szone);
+ "dli_zsrc = %s", szone);
(void) snprintf(get_line(0, 2), get_line_remain(),
- "dli_dstzone = %s", dzone);
+ "dli_zdst = %s", dzone);
show_space();
}
memcpy(data, off, len);
- switch (dl.dli_ipver) {
- case IPV4_VERSION:
+ switch (dl.dli_family) {
+ case AF_INET:
(void) interpret_ip(flags, (struct ip *)data, len);
break;
- case IPV6_VERSION:
+ case AF_INET6:
(void) interpret_ipv6(flags, (ip6_t *)data, len);
break;
default:
diff --git a/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_filter.c b/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_filter.c
index fab3922f14..e68e5f9ce7 100644
--- a/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_filter.c
+++ b/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_filter.c
@@ -96,8 +96,8 @@
/*
* Offset for the source and destination zoneid in the ipnet header.
*/
-#define IPNET_SRCZONE_OFFSET 8
-#define IPNET_DSTZONE_OFFSET 16
+#define IPNET_SRCZONE_OFFSET 16
+#define IPNET_DSTZONE_OFFSET 20
int eaddr; /* need ethernet addr */
@@ -1047,17 +1047,13 @@ compare_value_mask(uint_t offset, uint_t len, uint_t val, int mask)
* byte order.
*/
static void
-compare_value_zone(uint_t offset, uint64_t val)
+compare_value_zone(uint_t offset, uint32_t val)
{
int i;
- for (i = 0; i < sizeof (uint64_t) / 4; i++) {
- load_const(ntohl(((uint32_t *)&val)[i]));
- load_value(offset + i * 4, 4);
- emitop(OP_EQ);
- if (i != 0)
- emitop(OP_AND);
- }
+ load_const(ntohl(((uint32_t *)&val)[i]));
+ load_value(offset + i * 4, 4);
+ emitop(OP_EQ);
}
/* Emit an operator into the code array */
@@ -1728,7 +1724,7 @@ ipaddr_match(enum direction which, char *hostname, int inet_type)
* Match on zoneid. The arg zone passed in is in network byte order.
*/
static void
-zone_match(enum direction which, uint64_t zone)
+zone_match(enum direction which, uint32_t zone)
{
switch (which) {
@@ -2546,7 +2542,7 @@ primary()
next();
if (tokentype != NUMBER)
pr_err("zoneid expected");
- zone_match(dir, BE_64((uint64_t)(tokenval)));
+ zone_match(dir, BE_32((uint32_t)(tokenval)));
opstack++;
next();
break;
diff --git a/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_pf.c b/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_pf.c
index a4df9c1595..dc4a47b2d5 100644
--- a/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_pf.c
+++ b/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_pf.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -131,8 +131,8 @@ static network_table_t ib_network_mapping_table[] = {
};
static network_table_t ipnet_network_mapping_table[] = {
- { "ip", (DL_IPNETINFO_VERSION << 8 | IPV4_VERSION) },
- { "ip6", (DL_IPNETINFO_VERSION << 8 | IPV6_VERSION) },
+ { "ip", (DL_IPNETINFO_VERSION << 8 | AF_INET) },
+ { "ip6", (DL_IPNETINFO_VERSION << 8 | AF_INET6) },
{ "NULL", -1 }
};
@@ -157,35 +157,35 @@ static transport_table_t ether_transport_mapping_table[] = {
};
static transport_table_t ipnet_transport_mapping_table[] = {
- {IPPROTO_TCP, (DL_IPNETINFO_VERSION << 8 | IPV4_VERSION),
+ {IPPROTO_TCP, (DL_IPNETINFO_VERSION << 8 | AF_INET),
IPV4_TYPE_HEADER_OFFSET},
- {IPPROTO_TCP, (DL_IPNETINFO_VERSION << 8 | IPV6_VERSION),
+ {IPPROTO_TCP, (DL_IPNETINFO_VERSION << 8 | AF_INET6),
IPV6_TYPE_HEADER_OFFSET},
- {IPPROTO_UDP, (DL_IPNETINFO_VERSION << 8 | IPV4_VERSION),
+ {IPPROTO_UDP, (DL_IPNETINFO_VERSION << 8 | AF_INET),
IPV4_TYPE_HEADER_OFFSET},
- {IPPROTO_UDP, (DL_IPNETINFO_VERSION << 8 | IPV6_VERSION),
+ {IPPROTO_UDP, (DL_IPNETINFO_VERSION << 8 | AF_INET6),
IPV6_TYPE_HEADER_OFFSET},
- {IPPROTO_OSPF, (DL_IPNETINFO_VERSION << 8 | IPV4_VERSION),
+ {IPPROTO_OSPF, (DL_IPNETINFO_VERSION << 8 | AF_INET),
IPV4_TYPE_HEADER_OFFSET},
- {IPPROTO_OSPF, (DL_IPNETINFO_VERSION << 8 | IPV6_VERSION),
+ {IPPROTO_OSPF, (DL_IPNETINFO_VERSION << 8 | AF_INET6),
IPV6_TYPE_HEADER_OFFSET},
- {IPPROTO_SCTP, (DL_IPNETINFO_VERSION << 8 | IPV4_VERSION),
+ {IPPROTO_SCTP, (DL_IPNETINFO_VERSION << 8 | AF_INET),
IPV4_TYPE_HEADER_OFFSET},
- {IPPROTO_SCTP, (DL_IPNETINFO_VERSION << 8 | IPV6_VERSION),
+ {IPPROTO_SCTP, (DL_IPNETINFO_VERSION << 8 | AF_INET6),
IPV6_TYPE_HEADER_OFFSET},
- {IPPROTO_ICMP, (DL_IPNETINFO_VERSION << 8 | IPV4_VERSION),
+ {IPPROTO_ICMP, (DL_IPNETINFO_VERSION << 8 | AF_INET),
IPV4_TYPE_HEADER_OFFSET},
- {IPPROTO_ICMPV6, (DL_IPNETINFO_VERSION << 8 | IPV6_VERSION),
+ {IPPROTO_ICMPV6, (DL_IPNETINFO_VERSION << 8 | AF_INET6),
IPV6_TYPE_HEADER_OFFSET},
- {IPPROTO_ENCAP, (DL_IPNETINFO_VERSION << 8 | IPV4_VERSION),
+ {IPPROTO_ENCAP, (DL_IPNETINFO_VERSION << 8 | AF_INET),
IPV4_TYPE_HEADER_OFFSET},
- {IPPROTO_ESP, (DL_IPNETINFO_VERSION << 8 | IPV4_VERSION),
+ {IPPROTO_ESP, (DL_IPNETINFO_VERSION << 8 | AF_INET),
IPV4_TYPE_HEADER_OFFSET},
- {IPPROTO_ESP, (DL_IPNETINFO_VERSION << 8 | IPV6_VERSION),
+ {IPPROTO_ESP, (DL_IPNETINFO_VERSION << 8 | AF_INET6),
IPV6_TYPE_HEADER_OFFSET},
- {IPPROTO_AH, (DL_IPNETINFO_VERSION << 8 | IPV4_VERSION),
+ {IPPROTO_AH, (DL_IPNETINFO_VERSION << 8 | AF_INET),
IPV4_TYPE_HEADER_OFFSET},
- {IPPROTO_AH, (DL_IPNETINFO_VERSION << 8 | IPV6_VERSION),
+ {IPPROTO_AH, (DL_IPNETINFO_VERSION << 8 | AF_INET6),
IPV6_TYPE_HEADER_OFFSET},
{-1, 0, 0} /* must be the final entry */
};
@@ -228,8 +228,8 @@ datalink_t dl;
#define IPV6_SRCADDR_OFFSET (dl.dl_link_header_len + 8)
#define IPV6_DSTADDR_OFFSET (dl.dl_link_header_len + 24)
-#define IPNET_SRCZONE_OFFSET 8
-#define IPNET_DSTZONE_OFFSET 16
+#define IPNET_SRCZONE_OFFSET 16
+#define IPNET_DSTZONE_OFFSET 20
static int inBrace = 0, inBraceOR = 0;
static int foundOR = 0;
@@ -577,15 +577,15 @@ pf_compare_value_mask_generic(int offset, uint_t len, uint_t val, int mask,
}
/*
- * Like pf_compare_value() but compare on a 64-bit zoneid value.
+ * Like pf_compare_value() but compare on a 32-bit zoneid value.
* The argument val passed in is in network byte order.
*/
static void
-pf_compare_zoneid(int offset, uint64_t val)
+pf_compare_zoneid(int offset, uint32_t val)
{
int i;
- for (i = 0; i < sizeof (uint64_t) / 2; i ++) {
+ for (i = 0; i < sizeof (uint32_t) / 2; i ++) {
pf_emit(ENF_PUSHWORD + offset / 2 + i);
pf_emit(ENF_PUSHLIT | ENF_EQ);
pf_emit(((uint16_t *)&val)[i]);
@@ -950,7 +950,7 @@ pf_netaddr_match(which, netname)
* The zoneid passed in is in network byte order.
*/
static void
-pf_match_zone(enum direction which, uint64_t zoneid)
+pf_match_zone(enum direction which, uint32_t zoneid)
{
if (dl.dl_type != DL_IPNET)
pr_err("zone filter option unsupported on media");
@@ -1440,7 +1440,7 @@ pf_primary()
next();
if (tokentype != NUMBER)
pr_err("zoneid expected after inet");
- pf_match_zone(dir, BE_64((uint64_t)(tokenval)));
+ pf_match_zone(dir, BE_32((uint32_t)(tokenval)));
opstack++;
next();
break;
diff --git a/usr/src/cmd/devfsadm/misc_link.c b/usr/src/cmd/devfsadm/misc_link.c
index c8cbfaeb10..222699e479 100644
--- a/usr/src/cmd/devfsadm/misc_link.c
+++ b/usr/src/cmd/devfsadm/misc_link.c
@@ -104,7 +104,8 @@ static devfsadm_create_t misc_cbt[] = {
"(^ip$)|(^tcp$)|(^udp$)|(^icmp$)|(^sctp$)|"
"(^ip6$)|(^tcp6$)|(^udp6$)|(^icmp6$)|(^sctp6$)|"
"(^rts$)|(^arp$)|(^ipsecah$)|(^ipsecesp$)|(^keysock$)|(^spdsock$)|"
- "(^nca$)|(^rds$)|(^sdp$)|(^ipnet$)|(^dlpistub$)|(^iptunq)",
+ "(^nca$)|(^rds$)|(^sdp$)|(^ipnet$)|(^dlpistub$)|(^iptunq)|"
+ "(^bpf$)",
TYPE_EXACT | DRV_RE, ILEVEL_1, minor_name
},
{ "pseudo", "ddi_pseudo",
diff --git a/usr/src/cmd/truss/systable.c b/usr/src/cmd/truss/systable.c
index c98c4be557..8e4c14ed1f 100644
--- a/usr/src/cmd/truss/systable.c
+++ b/usr/src/cmd/truss/systable.c
@@ -1500,9 +1500,10 @@ const char * const afcodes[] = {
"NCA", /* 28 */
"POLICY", /* 29 */
"RDS", /* 30 */
- "TRILL" /* 31 */
+ "TRILL", /* 31 */
+ "PACKET" /* 32 */
};
-#if MAX_AFCODES != 32
+#if MAX_AFCODES != 33
#error Need to update address-family table
#endif
diff --git a/usr/src/lib/brand/native/zone/platform.xml b/usr/src/lib/brand/native/zone/platform.xml
index 0aacc8c401..e988200bde 100644
--- a/usr/src/lib/brand/native/zone/platform.xml
+++ b/usr/src/lib/brand/native/zone/platform.xml
@@ -44,6 +44,7 @@
<!-- Devices to create under /dev -->
<device match="arp" />
+ <device match="bpf" />
<device match="conslog" />
<device match="cpu/self/cpuid" />
<device match="crypto" />
diff --git a/usr/src/pkgdefs/Makefile b/usr/src/pkgdefs/Makefile
index a4d95b9c59..517cbd3dd9 100644
--- a/usr/src/pkgdefs/Makefile
+++ b/usr/src/pkgdefs/Makefile
@@ -377,6 +377,8 @@ COMMON_SUBDIRS= \
SUNWosdem \
SUNWypr \
SUNWypu \
+ SUNWpacketh \
+ SUNWpacketu \
SUNWpamsc \
SUNWpapi \
SUNWpcan \
diff --git a/usr/src/pkgdefs/SUNWpacketh/Makefile b/usr/src/pkgdefs/SUNWpacketh/Makefile
new file mode 100644
index 0000000000..d941f0f011
--- /dev/null
+++ b/usr/src/pkgdefs/SUNWpacketh/Makefile
@@ -0,0 +1,37 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+include ../Makefile.com
+
+DATAFILES += depend
+LICENSEFILES += ../../cmd/ipf/tools/IPFILTER.LICENCE
+CDDL=
+
+.KEEP_STATE:
+
+all: $(FILES)
+install: all pkg
+
+include ../Makefile.targ
diff --git a/usr/src/pkgdefs/SUNWpacketh/pkginfo.tmpl b/usr/src/pkgdefs/SUNWpacketh/pkginfo.tmpl
new file mode 100644
index 0000000000..e534973117
--- /dev/null
+++ b/usr/src/pkgdefs/SUNWpacketh/pkginfo.tmpl
@@ -0,0 +1,56 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+#
+# This required package information file describes characteristics of the
+# package, such as package abbreviation, full package name, package version,
+# and package architecture.
+#
+PKG="SUNWpacketh"
+NAME="Solaris Packet header files"
+ARCH="ISA"
+VERSION="ONVERS,REV=0.0.0"
+SUNW_PRODNAME="SunOS"
+SUNW_PRODVERS="RELEASE/VERSION"
+SUNW_PKGTYPE="usr"
+MAXINST="1000"
+CATEGORY="system"
+DESC="C header files for BPF/PF_PACKET"
+VENDOR="Sun Microsystems, Inc."
+HOTLINE="Please contact your local service provider"
+EMAIL=""
+CLASSES="none"
+BASEDIR=/
+SUNW_PKGVERS="1.0"
+SUNW_PKG_ALLZONES="true"
+SUNW_PKG_HOLLOW="true"
+SUNW_PKG_THISZONE="false"
+#VSTOCK="<reserved by Release Engineering for package part #>"
+#ISTATES="<developer defined>"
+#RSTATES='<developer defined>'
+#ULIMIT="<developer defined>"
+#ORDER="<developer defined>"
+#PSTAMP="<developer defined>"
+#INTONLY="<developer defined>"
diff --git a/usr/src/pkgdefs/SUNWpacketh/prototype_com b/usr/src/pkgdefs/SUNWpacketh/prototype_com
new file mode 100644
index 0000000000..97b2e2036f
--- /dev/null
+++ b/usr/src/pkgdefs/SUNWpacketh/prototype_com
@@ -0,0 +1,51 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# This required package information file contains a list of package contents.
+# The 'pkgmk' command uses this file to identify the contents of a package
+# and their location on the development machine when building the package.
+# Can be created via a text editor or through use of the 'pkgproto' command.
+
+#!search <pathname pathname ...> # where to find pkg objects
+#!include <filename> # include another 'prototype' file
+#!default <mode> <owner> <group> # default used if not specified on entry
+#!<param>=<value> # puts parameter in pkg environment
+
+# packaging files
+i pkginfo
+i copyright
+i depend
+#
+# source locations relative to the prototype file
+#
+# SUNWpacketh
+#
+d none usr 755 root sys
+d none usr/include 755 root bin
+d none usr/include/net 755 root bin
+f none usr/include/net/bpf.h 644 root bin
+f none usr/include/net/bpfdesc.h 644 root bin
+f none usr/include/net/dlt.h 644 root bin
+d none usr/include/netpacket 755 root bin
+f none usr/include/netpacket/packet.h 644 root bin
diff --git a/usr/src/pkgdefs/SUNWpacketh/prototype_i386 b/usr/src/pkgdefs/SUNWpacketh/prototype_i386
new file mode 100644
index 0000000000..11675a0e66
--- /dev/null
+++ b/usr/src/pkgdefs/SUNWpacketh/prototype_i386
@@ -0,0 +1,38 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# This required package information file contains a list of package contents.
+# The 'pkgmk' command uses this file to identify the contents of a package
+# and their location on the development machine when building the package.
+# Can be created via a text editor or through use of the 'pkgproto' command.
+
+#!search <pathname pathname ...> # where to find pkg objects
+#!include <filename> # include another 'prototype' file
+#!default <mode> <owner> <group> # default used if not specified on entry
+#!<param>=<value> # puts parameter in pkg environment
+
+#
+# Include ISA independent files (prototype_com)
+#
+!include prototype_com
diff --git a/usr/src/pkgdefs/SUNWpacketh/prototype_sparc b/usr/src/pkgdefs/SUNWpacketh/prototype_sparc
new file mode 100644
index 0000000000..11675a0e66
--- /dev/null
+++ b/usr/src/pkgdefs/SUNWpacketh/prototype_sparc
@@ -0,0 +1,38 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# This required package information file contains a list of package contents.
+# The 'pkgmk' command uses this file to identify the contents of a package
+# and their location on the development machine when building the package.
+# Can be created via a text editor or through use of the 'pkgproto' command.
+
+#!search <pathname pathname ...> # where to find pkg objects
+#!include <filename> # include another 'prototype' file
+#!default <mode> <owner> <group> # default used if not specified on entry
+#!<param>=<value> # puts parameter in pkg environment
+
+#
+# Include ISA independent files (prototype_com)
+#
+!include prototype_com
diff --git a/usr/src/pkgdefs/SUNWpacketu/Makefile b/usr/src/pkgdefs/SUNWpacketu/Makefile
new file mode 100644
index 0000000000..52cef8a8bd
--- /dev/null
+++ b/usr/src/pkgdefs/SUNWpacketu/Makefile
@@ -0,0 +1,38 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+include ../Makefile.com
+
+TMPLFILES += postinstall preremove
+LICENSEFILES += ../../uts/common/io/bpf/BPF.LICENCE
+CDDL=
+
+.KEEP_STATE:
+
+all: $(FILES)
+install: all pkg
+
+include ../Makefile.targ
+include ../Makefile.prtarg
diff --git a/usr/src/pkgdefs/SUNWpacketu/pkginfo.tmpl b/usr/src/pkgdefs/SUNWpacketu/pkginfo.tmpl
new file mode 100644
index 0000000000..fc016942eb
--- /dev/null
+++ b/usr/src/pkgdefs/SUNWpacketu/pkginfo.tmpl
@@ -0,0 +1,56 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+#
+# This required package information file describes characteristics of the
+# package, such as package abbreviation, full package name, package version,
+# and package architecture.
+#
+PKG="SUNWpacketu"
+NAME="Solaris Kernel Packet (Usr)"
+ARCH="ISA"
+VERSION="ONVERS,REV=0.0.0"
+SUNW_PRODNAME="SunOS"
+SUNW_PRODVERS="RELEASE/VERSION"
+SUNW_PKGTYPE="root"
+MAXINST="1000"
+CATEGORY="system"
+DESC="BPF/PF_PACKET kernel packet modules"
+VENDOR="Sun Microsystems, Inc."
+HOTLINE="Please contact your local service provider"
+EMAIL=""
+CLASSES="none preserve manifest"
+BASEDIR=/
+SUNW_PKGVERS="1.0"
+SUNW_PKG_ALLZONES="true"
+SUNW_PKG_HOLLOW="false"
+SUNW_PKG_THISZONE="false"
+#VSTOCK="<reserved by Release Engineering for package part #>"
+#ISTATES="<developer defined>"
+#RSTATES='<developer defined>'
+#ULIMIT="<developer defined>"
+#ORDER="<developer defined>"
+#PSTAMP="<developer defined>"
+#INTONLY="<developer defined>"
diff --git a/usr/src/pkgdefs/SUNWpacketu/postinstall.tmpl b/usr/src/pkgdefs/SUNWpacketu/postinstall.tmpl
new file mode 100644
index 0000000000..284a167fd8
--- /dev/null
+++ b/usr/src/pkgdefs/SUNWpacketu/postinstall.tmpl
@@ -0,0 +1,29 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+include drv_utils
+
+pkg_drvadd bpf
+
+exit $?
diff --git a/usr/src/pkgdefs/SUNWpacketu/preremove.tmpl b/usr/src/pkgdefs/SUNWpacketu/preremove.tmpl
new file mode 100644
index 0000000000..b2c7ae63b2
--- /dev/null
+++ b/usr/src/pkgdefs/SUNWpacketu/preremove.tmpl
@@ -0,0 +1,29 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+include drv_utils
+
+pkg_drvrem bpf
+
+exit $?
diff --git a/usr/src/pkgdefs/SUNWpacketu/prototype_com b/usr/src/pkgdefs/SUNWpacketu/prototype_com
new file mode 100644
index 0000000000..b7297045c5
--- /dev/null
+++ b/usr/src/pkgdefs/SUNWpacketu/prototype_com
@@ -0,0 +1,47 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# This required package information file contains a list of package contents.
+# The 'pkgmk' command uses this file to identify the contents of a package
+# and their location on the development machine when building the package.
+# Can be created via a text editor or through use of the 'pkgproto' command.
+
+#!search <pathname pathname ...> # where to find pkg objects
+#!include <filename> # include another 'prototype' file
+#!default <mode> <owner> <group> # default used if not specified on entry
+#!<param>=<value> # puts parameter in pkg environment
+
+# packaging files
+i pkginfo
+i copyright
+#
+# source locations relative to the prototype file
+#
+# SUNWpacketu
+#
+d none usr 755 root sys
+d none usr/kernel 755 root sys
+d none usr/kernel/drv 755 root sys
+f none usr/kernel/drv/bpf.conf 644 root sys
+d none usr/kernel/socketmod 755 root sys
diff --git a/usr/src/pkgdefs/SUNWpacketu/prototype_i386 b/usr/src/pkgdefs/SUNWpacketu/prototype_i386
new file mode 100644
index 0000000000..a895872d1f
--- /dev/null
+++ b/usr/src/pkgdefs/SUNWpacketu/prototype_i386
@@ -0,0 +1,54 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# This required package information file contains a list of package contents.
+# The 'pkgmk' command uses this file to identify the contents of a package
+# and their location on the development machine when building the package.
+# Can be created via a text editor or through use of the 'pkgproto' command.
+
+#!search <pathname pathname ...> # where to find pkg objects
+#!include <filename> # include another 'prototype' file
+#!default <mode> <owner> <group> # default used if not specified on entry
+#!<param>=<value> # puts parameter in pkg environment
+
+#
+# Include ISA independent files (prototype_com)
+#
+!include prototype_com
+#
+#
+#
+# List files which are I386 specific here
+#
+# source locations relative to the prototype file
+#
+#
+# SUNWpacketu
+#
+f none usr/kernel/drv/bpf 755 root sys
+d none usr/kernel/drv/amd64 755 root sys
+f none usr/kernel/drv/amd64/bpf 755 root sys
+f none usr/kernel/socketmod/sockpfp 755 root sys
+d none usr/kernel/socketmod/amd64 755 root sys
+f none usr/kernel/socketmod/amd64/sockpfp 755 root sys
diff --git a/usr/src/pkgdefs/SUNWpacketu/prototype_sparc b/usr/src/pkgdefs/SUNWpacketu/prototype_sparc
new file mode 100644
index 0000000000..78426edaa5
--- /dev/null
+++ b/usr/src/pkgdefs/SUNWpacketu/prototype_sparc
@@ -0,0 +1,53 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# This required package information file contains a list of package contents.
+# The 'pkgmk' command uses this file to identify the contents of a package
+# and their location on the development machine when building the package.
+# Can be created via a text editor or through use of the 'pkgproto' command.
+
+#!search <pathname pathname ...> # where to find pkg objects
+#!include <filename> # include another 'prototype' file
+#!default <mode> <owner> <group> # default used if not specified on entry
+#!<param>=<value> # puts parameter in pkg environment
+
+#
+# Include ISA independent files (prototype_com)
+#
+!include prototype_com
+#
+#
+#
+# List files which are SPARC specific here
+#
+#
+# source locations relative to the prototype file
+#
+#
+# SUNWpacketu
+#
+d none usr/kernel/drv/sparcv9 755 root sys
+f none usr/kernel/drv/sparcv9/bpf 755 root sys
+d none usr/kernel/socketmod/sparcv9 755 root sys
+f none usr/kernel/socketmod/sparcv9/sockpfp 755 root sys
diff --git a/usr/src/tools/scripts/bfu.sh b/usr/src/tools/scripts/bfu.sh
index c9fa13e192..64414a36cf 100644
--- a/usr/src/tools/scripts/bfu.sh
+++ b/usr/src/tools/scripts/bfu.sh
@@ -809,6 +809,37 @@ update_aac_conf()
mv -f /tmp/aac.conf.$$ $conffile
}
+update_etc_inet_sock2path()
+{
+ #
+ # The PF_PACKET module may need to be added to the configuration
+ # file socket sockets.
+ #
+ # When being added to the system, the socket itself will remain
+ # inactive until the next reboot when soconfig is run. When being
+ # removed, the kernel configuration stays active until the system
+ # is rebooted and the sockets will continue to work until it is
+ # unloaded from the kernel, after which applications will fail.
+ #
+ sockfile=$rootprefix/etc/inet/sock2path
+ xgrep=/usr/xpg4/bin/grep
+
+ ${ZCAT} ${cpiodir}/generic.usr$ZFIX | cpio -it 2>/dev/null |
+ ${xgrep} -q sockpfp
+ if [ $? -eq 1 ] ; then
+ ${xgrep} -v -E '^ 32 [14] 0 sockpfp' \
+ ${sockfile} > /tmp/sock2path.tmp.$$
+ cp /tmp/sock2path.tmp.$$ ${sockfile}
+ else
+ if ! ${xgrep} -q -E \
+ '^ 31 [14] 0 sockpfp' ${sockfile}; then
+ echo '' >> ${sockfile}
+ echo ' 32 1 0 sockpfp' >> ${sockfile}
+ echo ' 32 4 0 sockpfp' >> ${sockfile}
+ fi
+ fi
+}
+
# update x86 version mpt.conf for property tape
mpttapeprop='[ ]*tape[ ]*=[ ]*"sctp"[ ]*;'
update_mptconf_i386()
@@ -8525,6 +8556,8 @@ mondo_loop() {
update_aac_conf
+ update_etc_inet_sock2path
+
if [ $target_isa = i386 ]; then
update_mptconf_i386
diff --git a/usr/src/uts/Makefile b/usr/src/uts/Makefile
index 8fc3fe1166..febe148c93 100644
--- a/usr/src/uts/Makefile
+++ b/usr/src/uts/Makefile
@@ -140,6 +140,8 @@ COMMON_HDRDIRS= common/avs \
common/inet/ipf/netinet \
common/inet/kssl \
common/inet/nca \
+ common/inet/sockmods/netpacket \
+ common/io/bpf/net \
common/ipp \
common/net \
common/netinet \
diff --git a/usr/src/uts/Makefile.targ b/usr/src/uts/Makefile.targ
index e4e26bbb39..81685eb5b5 100644
--- a/usr/src/uts/Makefile.targ
+++ b/usr/src/uts/Makefile.targ
@@ -221,6 +221,9 @@ $(USR_FS_DIR)/%: $(OBJS_DIR)/% $(USR_FS_DIR) FRC
$(USR_SCHED_DIR)/%: $(OBJS_DIR)/% $(USR_SCHED_DIR) FRC
$(INS.file)
+$(USR_SOCK_DIR)/%: $(OBJS_DIR)/% $(USR_SOCK_DIR) FRC
+ $(INS.file)
+
$(USR_STRMOD_DIR)/%: $(OBJS_DIR)/% $(USR_STRMOD_DIR) FRC
$(INS.file)
diff --git a/usr/src/uts/Makefile.uts b/usr/src/uts/Makefile.uts
index 22262aef03..70de98bd11 100644
--- a/usr/src/uts/Makefile.uts
+++ b/usr/src/uts/Makefile.uts
@@ -557,6 +557,7 @@ USR_MOD_DIRS_32 += $(USR_STRMOD_DIR_32) $(USR_SYS_DIR_32)
USR_MOD_DIRS_32 += $(USR_MISC_DIR_32) $(USR_DACF_DIR_32)
USR_MOD_DIRS_32 += $(USR_PCBE_DIR_32)
USR_MOD_DIRS_32 += $(USR_DTRACE_DIR_32) $(USR_BRAND_DIR_32)
+USR_MOD_DIRS_32 += $(USR_SOCK_DIR_32)
#
#
diff --git a/usr/src/uts/common/Makefile.files b/usr/src/uts/common/Makefile.files
index 6a80a808d8..bf747815c4 100644
--- a/usr/src/uts/common/Makefile.files
+++ b/usr/src/uts/common/Makefile.files
@@ -541,7 +541,7 @@ NETI_OBJS += neti_impl.o neti_mod.o neti_stack.o
KEYSOCK_OBJS += keysockddi.o keysock.o keysock_opt_data.o
-IPNET_OBJS += ipnet.o
+IPNET_OBJS += ipnet.o ipnet_bpf.o
SPDSOCK_OBJS += spdsockddi.o spdsock.o spdsock_opt_data.o
@@ -572,6 +572,8 @@ SDP_SOCK_MOD_OBJS += sockmod_sdp.o socksdp.o socksdpsubr.o
SCTP_SOCK_MOD_OBJS += sockmod_sctp.o socksctp.o socksctpsubr.o
+PFP_SOCK_MOD_OBJS += sockmod_pfp.o
+
RDS_OBJS += rdsddi.o rdssubr.o rds_opt.o rds_ioctl.o
RDSIB_OBJS += rdsib.o rdsib_ib.o rdsib_cm.o rdsib_ep.o rdsib_buf.o \
@@ -596,6 +598,8 @@ TL_OBJS += tl.o
DUMP_OBJS += dump.o
+BPF_OBJS += bpf.o bpf_filter.o bpf_mod.o bpf_dlt.o bpf_mac.o
+
CLONE_OBJS += clone.o
CN_OBJS += cons.o
@@ -1218,7 +1222,8 @@ SPEC_OBJS += specsubr.o specvfsops.o specvnops.o
SOCK_OBJS += socksubr.o sockvfsops.o sockparams.o \
socksyscalls.o socktpi.o sockstr.o sockssl.o \
sockcommon_vnops.o sockcommon_subr.o \
- sockcommon_sops.o sockcommon.o socknotify.o \
+ sockcommon_sops.o sockcommon.o \
+ sock_notsupp.o socknotify.o \
nl7c.o nl7curi.o nl7chttp.o nl7clogd.o \
nl7cnca.o sodirect.o
diff --git a/usr/src/uts/common/Makefile.rules b/usr/src/uts/common/Makefile.rules
index 04f4e755dd..10bcb14bfb 100644
--- a/usr/src/uts/common/Makefile.rules
+++ b/usr/src/uts/common/Makefile.rules
@@ -620,6 +620,10 @@ $(OBJS_DIR)/%.o: $(UTSBASE)/common/io/bge/%.c
$(COMPILE.c) -o $@ $<
$(CTFCONVERT_O)
+$(OBJS_DIR)/%.o: $(UTSBASE)/common/io/bpf/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
$(OBJS_DIR)/%.o: $(UTSBASE)/common/io/cardbus/%.c
$(COMPILE.c) -o $@ $<
$(CTFCONVERT_O)
@@ -1882,6 +1886,9 @@ $(LINTS_DIR)/%.ln: $(UTSBASE)/common/io/audio/drv/audiovia97/%.c
$(LINTS_DIR)/%.ln: $(UTSBASE)/common/io/bfe/%.c
@($(LHEAD) $(LINT.c) $< $(LTAIL))
+$(LINTS_DIR)/%.ln: $(UTSBASE)/common/io/bpf/%.c
+ @($(LHEAD) $(LINT.c) $< $(LTAIL))
+
$(LINTS_DIR)/%.ln: $(UTSBASE)/common/io/bge/%.c
@($(LHEAD) $(LINT.c) $< $(LTAIL))
diff --git a/usr/src/uts/common/fs/sockfs/sock_notsupp.c b/usr/src/uts/common/fs/sockfs/sock_notsupp.c
new file mode 100644
index 0000000000..ea612d298d
--- /dev/null
+++ b/usr/src/uts/common/fs/sockfs/sock_notsupp.c
@@ -0,0 +1,178 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/stropts.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/socket_proto.h>
+
+
+/*ARGSUSED*/
+int
+sock_accept_notsupp(sock_lower_handle_t low1, sock_lower_handle_t low2,
+ sock_upper_handle_t upper, struct cred *cr)
+{
+ return (EOPNOTSUPP);
+}
+
+/*ARGSUSED*/
+int
+sock_bind_notsupp(sock_lower_handle_t handle, struct sockaddr *name,
+ socklen_t namelen, struct cred *cr)
+{
+ return (EOPNOTSUPP);
+}
+
+/*ARGSUSED*/
+int
+sock_listen_notsupp(sock_lower_handle_t handle, int backlog,
+ struct cred *cr)
+{
+ return (EOPNOTSUPP);
+}
+
+/*ARGSUSED*/
+int
+sock_connect_notsupp(sock_lower_handle_t handle,
+ const struct sockaddr *name, socklen_t namelen, sock_connid_t *conp,
+ struct cred *cr)
+{
+ return (EOPNOTSUPP);
+}
+
+/*ARGSUSED*/
+int
+sock_getsockname_notsupp(sock_lower_handle_t handle, struct sockaddr *sa,
+ socklen_t *len, struct cred *cr)
+{
+ return (EOPNOTSUPP);
+}
+
+/*ARGSUSED*/
+int
+sock_getpeername_notsupp(sock_lower_handle_t handle, struct sockaddr *addr,
+ socklen_t *addrlen, struct cred *cr)
+{
+ return (EOPNOTSUPP);
+}
+
+/*ARGSUSED*/
+int
+sock_getsockopt_notsupp(sock_lower_handle_t handle, int level,
+ int option_name, void *optval, socklen_t *optlenp, struct cred *cr)
+{
+ return (EOPNOTSUPP);
+}
+
+/*ARGSUSED*/
+int
+sock_setsockopt_notsupp(sock_lower_handle_t handle, int level,
+ int option_name, const void *optval, socklen_t optlen, struct cred *cr)
+{
+ return (EOPNOTSUPP);
+}
+
+/*ARGSUSED*/
+int
+sock_send_notsupp(sock_lower_handle_t handle, mblk_t *mp,
+ struct msghdr *msg, struct cred *cr)
+{
+ return (EOPNOTSUPP);
+}
+
+/*ARGSUSED*/
+int
+sock_senduio_notsupp(sock_lower_handle_t handle, struct uio *uiop,
+ struct nmsghdr *msg, struct cred *cr)
+{
+ return (EOPNOTSUPP);
+}
+
+/*ARGSUSED*/
+int
+sock_recvuio_notsupp(sock_lower_handle_t handle, struct uio *uiop,
+ struct nmsghdr *msg, struct cred *cr)
+{
+ return (EOPNOTSUPP);
+}
+
+/*ARGSUSED*/
+short
+sock_poll_notsupp(sock_lower_handle_t handle, short events, int anyyet,
+ cred_t *cred)
+{
+ return (EOPNOTSUPP);
+}
+
+/*ARGSUSED*/
+int
+sock_shutdown_notsupp(sock_lower_handle_t handle, int how, struct cred *cr)
+{
+ return (EOPNOTSUPP);
+}
+
+/*ARGSUSED*/
+void
+sock_clr_flowctrl_notsupp(sock_lower_handle_t proto_handle)
+{
+}
+
+/*ARGSUSED*/
+int
+sock_ioctl_notsupp(sock_lower_handle_t handle, int cmd, intptr_t arg,
+ int mode, int32_t *rvalp, cred_t *cred)
+{
+ return (EOPNOTSUPP);
+}
+
+/* ARGSUSED */
+int
+sock_close_notsupp(sock_lower_handle_t proto_handle, int flags, cred_t *cr)
+{
+ return (EOPNOTSUPP);
+}
+
+sock_downcalls_t sock_down_notsupp = {
+ NULL,
+ sock_accept_notsupp,
+ sock_bind_notsupp,
+ sock_listen_notsupp,
+ sock_connect_notsupp,
+ sock_getpeername_notsupp,
+ sock_getsockname_notsupp,
+ sock_getsockopt_notsupp,
+ sock_setsockopt_notsupp,
+ sock_send_notsupp,
+ sock_senduio_notsupp,
+ sock_recvuio_notsupp,
+ sock_poll_notsupp,
+ sock_shutdown_notsupp,
+ sock_clr_flowctrl_notsupp,
+ sock_ioctl_notsupp,
+ sock_close_notsupp,
+};
diff --git a/usr/src/uts/common/inet/ip.h b/usr/src/uts/common/inet/ip.h
index 98a4621956..5a7e05b210 100644
--- a/usr/src/uts/common/inet/ip.h
+++ b/usr/src/uts/common/inet/ip.h
@@ -3531,56 +3531,6 @@ extern int ip_cgtp_filter_is_registered(netstackid_t);
#endif
/*
- * IP observability hook support
- */
-
-/*
- * ipobs_hooktype_t describes the hook types supported
- * by the ip module. IPOBS_HOOK_LOCAL refers to packets
- * which are looped back internally within the ip module.
- */
-
-typedef enum ipobs_hook_type {
- IPOBS_HOOK_LOCAL,
- IPOBS_HOOK_OUTBOUND,
- IPOBS_HOOK_INBOUND
-} ipobs_hook_type_t;
-
-typedef void ipobs_cbfunc_t(mblk_t *);
-
-typedef struct ipobs_cb {
- ipobs_cbfunc_t *ipobs_cbfunc;
- list_node_t ipobs_cbnext;
-} ipobs_cb_t;
-
-/*
- * This structure holds the data passed back from the ip module to
- * observability consumers.
- *
- * ihd_mp Pointer to the IP packet.
- * ihd_zsrc Source zoneid; set to ALL_ZONES when unknown.
- * ihd_zdst Destination zoneid; set to ALL_ZONES when unknown.
- * ihd_htype IPobs hook type, see above for the defined types.
- * ihd_ipver IP version of the packet.
- * ihd_ifindex Interface index that the packet was received/sent over.
- * For local packets, this is the index of the interface
- * associated with the local destination address.
- * ihd_grifindex IPMP group interface index (zero unless ihd_ifindex
- * is an IPMP underlying interface).
- * ihd_stack Netstack the packet is from.
- */
-typedef struct ipobs_hook_data {
- mblk_t *ihd_mp;
- zoneid_t ihd_zsrc;
- zoneid_t ihd_zdst;
- ipobs_hook_type_t ihd_htype;
- uint16_t ihd_ipver;
- uint64_t ihd_ifindex;
- uint64_t ihd_grifindex;
- netstack_t *ihd_stack;
-} ipobs_hook_data_t;
-
-/*
* Per-ILL Multidata Transmit capabilities.
*/
struct ill_mdt_capab_s {
@@ -3725,10 +3675,10 @@ extern void tcp_wput(queue_t *, mblk_t *);
extern int ip_fill_mtuinfo(struct in6_addr *, in_port_t,
struct ip6_mtuinfo *, netstack_t *);
extern ipif_t *conn_get_held_ipif(conn_t *, ipif_t **, int *);
-extern void ipobs_register_hook(netstack_t *, ipobs_cbfunc_t *);
-extern void ipobs_unregister_hook(netstack_t *, ipobs_cbfunc_t *);
-extern void ipobs_hook(mblk_t *, int, zoneid_t, zoneid_t, const ill_t *, int,
- uint32_t, ip_stack_t *);
+extern hook_t *ipobs_register_hook(netstack_t *, pfv_t);
+extern void ipobs_unregister_hook(netstack_t *, hook_t *);
+extern void ipobs_hook(mblk_t *, int, zoneid_t, zoneid_t, const ill_t *,
+ ip_stack_t *);
typedef void (*ipsq_func_t)(ipsq_t *, queue_t *, mblk_t *, void *);
/*
diff --git a/usr/src/uts/common/inet/ip/ip.c b/usr/src/uts/common/inet/ip/ip.c
index 44e891eaaf..23ea14f73f 100644
--- a/usr/src/uts/common/inet/ip/ip.c
+++ b/usr/src/uts/common/inet/ip/ip.c
@@ -5745,6 +5745,7 @@ ip_stack_fini(netstackid_t stackid, void *arg)
* protocols are going away have been run, meaning that we can
* now set about starting to clean things up.
*/
+ ipobs_fini(ipst);
ipv4_hook_destroy(ipst);
ipv6_hook_destroy(ipst);
ip_net_destroy(ipst);
@@ -5829,7 +5830,6 @@ ip_stack_fini(netstackid_t stackid, void *arg)
mutex_destroy(&ipst->ips_ip_addr_avail_lock);
rw_destroy(&ipst->ips_ill_g_lock);
- ipobs_fini(ipst);
ip_ire_fini(ipst);
ip6_asp_free(ipst);
conn_drain_fini(ipst);
@@ -6032,11 +6032,11 @@ ip_stack_init(netstackid_t stackid, netstack_t *ns)
ipst->ips_ip_src_id = 1;
rw_init(&ipst->ips_srcid_lock, NULL, RW_DEFAULT, NULL);
- ipobs_init(ipst);
ip_net_init(ipst, ns);
ipv4_hook_init(ipst);
ipv6_hook_init(ipst);
ipmp_init(ipst);
+ ipobs_init(ipst);
/*
* Create the taskq dispatcher thread and initialize related stuff.
@@ -13957,13 +13957,20 @@ ip_fast_forward(ire_t *ire, ipaddr_t dst, ill_t *ill, mblk_t *mp)
ip6_t *, NULL, int, 0);
if (mp != NULL) {
- if (ipst->ips_ipobs_enabled) {
+ if (ipst->ips_ip4_observe.he_interested) {
zoneid_t szone;
szone = ip_get_zoneid_v4(ipha->ipha_src, mp,
ipst, ALL_ZONES);
+ /*
+ * The IP observability hook expects b_rptr to be
+ * where the IP header starts, so advance past the
+ * link layer header.
+ */
+ mp->b_rptr += hlen;
ipobs_hook(mp, IPOBS_HOOK_OUTBOUND, szone,
- ALL_ZONES, ill, IPV4_VERSION, hlen, ipst);
+ ALL_ZONES, ill, ipst);
+ mp->b_rptr -= hlen;
}
ILL_SEND_TX(stq_ill, ire, dst, mp, IP_DROP_ON_NO_DESC, NULL);
}
@@ -15046,7 +15053,7 @@ ip_input(ill_t *ill, ill_rx_ring_t *ip_ring, mblk_t *mp_chain,
continue;
}
- if (ipst->ips_ipobs_enabled) {
+ if (ipst->ips_ip4_observe.he_interested) {
zoneid_t dzone;
/*
@@ -15055,7 +15062,7 @@ ip_input(ill_t *ill, ill_rx_ring_t *ip_ring, mblk_t *mp_chain,
*/
dzone = ip_get_zoneid_v4(dst, mp, ipst, ALL_ZONES);
ipobs_hook(mp, IPOBS_HOOK_INBOUND, ALL_ZONES, dzone,
- ill, IPV4_VERSION, 0, ipst);
+ ill, ipst);
}
/*
@@ -22495,7 +22502,7 @@ another:;
if (mp == NULL)
goto release_ire_and_ill;
- if (ipst->ips_ipobs_enabled) {
+ if (ipst->ips_ip4_observe.he_interested) {
zoneid_t szone;
/*
@@ -22506,7 +22513,7 @@ another:;
szone = ip_get_zoneid_v4(ipha->ipha_src, mp, ipst,
ALL_ZONES);
ipobs_hook(mp, IPOBS_HOOK_OUTBOUND, szone, ALL_ZONES,
- ire->ire_ipif->ipif_ill, IPV4_VERSION, 0, ipst);
+ ire->ire_ipif->ipif_ill, ipst);
}
mp->b_prev = SET_BPREV_FLAG(IPP_LOCAL_OUT);
DTRACE_PROBE2(ip__xmit__1, mblk_t *, mp, ire_t *, ire);
@@ -24901,7 +24908,7 @@ ip_wput_local(queue_t *q, ill_t *ill, ipha_t *ipha, mblk_t *mp, ire_t *ire,
if (first_mp == NULL)
return;
- if (ipst->ips_ipobs_enabled) {
+ if (ipst->ips_ip4_observe.he_interested) {
zoneid_t szone, dzone, lookup_zoneid = ALL_ZONES;
zoneid_t stackzoneid = netstackid_to_zoneid(
ipst->ips_netstack->netstack_stackid);
@@ -24915,8 +24922,7 @@ ip_wput_local(queue_t *q, ill_t *ill, ipha_t *ipha, mblk_t *mp, ire_t *ire,
lookup_zoneid = zoneid;
szone = ip_get_zoneid_v4(ipha->ipha_src, mp, ipst,
lookup_zoneid);
- ipobs_hook(mp, IPOBS_HOOK_LOCAL, szone, dzone, ill,
- IPV4_VERSION, 0, ipst);
+ ipobs_hook(mp, IPOBS_HOOK_LOCAL, szone, dzone, ill, ipst);
}
DTRACE_IP7(receive, mblk_t *, first_mp, conn_t *, NULL, void_ip_t *,
@@ -29805,121 +29811,81 @@ ip_get_zoneid_v6(in6_addr_t *addr, mblk_t *mp, const ill_t *ill,
/*
* IP obserability hook support functions.
*/
-
static void
ipobs_init(ip_stack_t *ipst)
{
- ipst->ips_ipobs_enabled = B_FALSE;
- list_create(&ipst->ips_ipobs_cb_list, sizeof (ipobs_cb_t),
- offsetof(ipobs_cb_t, ipobs_cbnext));
- mutex_init(&ipst->ips_ipobs_cb_lock, NULL, MUTEX_DEFAULT, NULL);
- ipst->ips_ipobs_cb_nwalkers = 0;
- cv_init(&ipst->ips_ipobs_cb_cv, NULL, CV_DRIVER, NULL);
+ netid_t id;
+
+ id = net_getnetidbynetstackid(ipst->ips_netstack->netstack_stackid);
+
+ ipst->ips_ip4_observe_pr = net_protocol_lookup(id, NHF_INET);
+ VERIFY(ipst->ips_ip4_observe_pr != NULL);
+
+ ipst->ips_ip6_observe_pr = net_protocol_lookup(id, NHF_INET6);
+ VERIFY(ipst->ips_ip6_observe_pr != NULL);
}
static void
ipobs_fini(ip_stack_t *ipst)
{
- ipobs_cb_t *cb;
-
- mutex_enter(&ipst->ips_ipobs_cb_lock);
- while (ipst->ips_ipobs_cb_nwalkers != 0)
- cv_wait(&ipst->ips_ipobs_cb_cv, &ipst->ips_ipobs_cb_lock);
- while ((cb = list_head(&ipst->ips_ipobs_cb_list)) != NULL) {
- list_remove(&ipst->ips_ipobs_cb_list, cb);
- kmem_free(cb, sizeof (*cb));
- }
- list_destroy(&ipst->ips_ipobs_cb_list);
- mutex_exit(&ipst->ips_ipobs_cb_lock);
- mutex_destroy(&ipst->ips_ipobs_cb_lock);
- cv_destroy(&ipst->ips_ipobs_cb_cv);
+ net_protocol_release(ipst->ips_ip4_observe_pr);
+ net_protocol_release(ipst->ips_ip6_observe_pr);
}
+/*
+ * hook_pkt_observe_t is composed in network byte order so that the
+ * entire mblk_t chain handed into hook_run can be used as-is.
+ * The caveat is that use of the fields, such as the zone fields,
+ * requires conversion into host byte order first.
+ */
void
ipobs_hook(mblk_t *mp, int htype, zoneid_t zsrc, zoneid_t zdst,
- const ill_t *ill, int ipver, uint32_t hlen, ip_stack_t *ipst)
+ const ill_t *ill, ip_stack_t *ipst)
{
- mblk_t *mp2;
- ipobs_cb_t *ipobs_cb;
- ipobs_hook_data_t *ihd;
- uint64_t grifindex = 0;
+ hook_pkt_observe_t *hdr;
+ uint64_t grifindex;
+ mblk_t *imp;
+
+ imp = allocb(sizeof (*hdr), BPRI_HI);
+ if (imp == NULL)
+ return;
+
+ hdr = (hook_pkt_observe_t *)imp->b_rptr;
+ /*
+ * b_wptr is set to make the apparent size of the data in the mblk_t
+ * to exclude the pointers at the end of hook_pkt_observer_t.
+ */
+ imp->b_wptr = imp->b_rptr + sizeof (dl_ipnetinfo_t);
+ imp->b_cont = mp;
ASSERT(DB_TYPE(mp) == M_DATA);
if (IS_UNDER_IPMP(ill))
grifindex = ipmp_ill_get_ipmp_ifindex(ill);
+ else
+ grifindex = 0;
+
+ hdr->hpo_version = 1;
+ hdr->hpo_htype = htype;
+ hdr->hpo_pktlen = htons((ushort_t)msgdsize(mp));
+ hdr->hpo_ifindex = htonl(ill->ill_phyint->phyint_ifindex);
+ hdr->hpo_grifindex = htonl(grifindex);
+ hdr->hpo_zsrc = htonl(zsrc);
+ hdr->hpo_zdst = htonl(zdst);
+ hdr->hpo_pkt = imp;
+ hdr->hpo_ctx = ipst->ips_netstack;
- mutex_enter(&ipst->ips_ipobs_cb_lock);
- ipst->ips_ipobs_cb_nwalkers++;
- mutex_exit(&ipst->ips_ipobs_cb_lock);
- for (ipobs_cb = list_head(&ipst->ips_ipobs_cb_list); ipobs_cb != NULL;
- ipobs_cb = list_next(&ipst->ips_ipobs_cb_list, ipobs_cb)) {
- mp2 = allocb(sizeof (ipobs_hook_data_t), BPRI_HI);
- if (mp2 != NULL) {
- ihd = (ipobs_hook_data_t *)mp2->b_rptr;
- if (((ihd->ihd_mp = dupmsg(mp)) == NULL) &&
- ((ihd->ihd_mp = copymsg(mp)) == NULL)) {
- freemsg(mp2);
- continue;
- }
- ihd->ihd_mp->b_rptr += hlen;
- ihd->ihd_htype = htype;
- ihd->ihd_ipver = ipver;
- ihd->ihd_zsrc = zsrc;
- ihd->ihd_zdst = zdst;
- ihd->ihd_ifindex = ill->ill_phyint->phyint_ifindex;
- ihd->ihd_grifindex = grifindex;
- ihd->ihd_stack = ipst->ips_netstack;
- mp2->b_wptr += sizeof (*ihd);
- ipobs_cb->ipobs_cbfunc(mp2);
- }
- }
- mutex_enter(&ipst->ips_ipobs_cb_lock);
- ipst->ips_ipobs_cb_nwalkers--;
- if (ipst->ips_ipobs_cb_nwalkers == 0)
- cv_broadcast(&ipst->ips_ipobs_cb_cv);
- mutex_exit(&ipst->ips_ipobs_cb_lock);
-}
-
-void
-ipobs_register_hook(netstack_t *ns, pfv_t func)
-{
- ipobs_cb_t *cb;
- ip_stack_t *ipst = ns->netstack_ip;
-
- cb = kmem_alloc(sizeof (*cb), KM_SLEEP);
-
- mutex_enter(&ipst->ips_ipobs_cb_lock);
- while (ipst->ips_ipobs_cb_nwalkers != 0)
- cv_wait(&ipst->ips_ipobs_cb_cv, &ipst->ips_ipobs_cb_lock);
- ASSERT(ipst->ips_ipobs_cb_nwalkers == 0);
-
- cb->ipobs_cbfunc = func;
- list_insert_head(&ipst->ips_ipobs_cb_list, cb);
- ipst->ips_ipobs_enabled = B_TRUE;
- mutex_exit(&ipst->ips_ipobs_cb_lock);
-}
-
-void
-ipobs_unregister_hook(netstack_t *ns, pfv_t func)
-{
- ipobs_cb_t *curcb;
- ip_stack_t *ipst = ns->netstack_ip;
-
- mutex_enter(&ipst->ips_ipobs_cb_lock);
- while (ipst->ips_ipobs_cb_nwalkers != 0)
- cv_wait(&ipst->ips_ipobs_cb_cv, &ipst->ips_ipobs_cb_lock);
-
- for (curcb = list_head(&ipst->ips_ipobs_cb_list); curcb != NULL;
- curcb = list_next(&ipst->ips_ipobs_cb_list, curcb)) {
- if (func == curcb->ipobs_cbfunc) {
- list_remove(&ipst->ips_ipobs_cb_list, curcb);
- kmem_free(curcb, sizeof (*curcb));
- break;
- }
+ if (ill->ill_isv6) {
+ hdr->hpo_family = AF_INET6;
+ (void) hook_run(ipst->ips_ipv6_net_data->netd_hooks,
+ ipst->ips_ipv6observing, (hook_data_t)hdr);
+ } else {
+ hdr->hpo_family = AF_INET;
+ (void) hook_run(ipst->ips_ipv4_net_data->netd_hooks,
+ ipst->ips_ipv4observing, (hook_data_t)hdr);
}
- if (list_is_empty(&ipst->ips_ipobs_cb_list))
- ipst->ips_ipobs_enabled = B_FALSE;
- mutex_exit(&ipst->ips_ipobs_cb_lock);
+
+ imp->b_cont = NULL;
+ freemsg(imp);
}
diff --git a/usr/src/uts/common/inet/ip/ip6.c b/usr/src/uts/common/inet/ip/ip6.c
index bdea32272d..a967662399 100644
--- a/usr/src/uts/common/inet/ip/ip6.c
+++ b/usr/src/uts/common/inet/ip/ip6.c
@@ -6563,13 +6563,13 @@ ip_rput_v6(queue_t *q, mblk_t *mp)
}
/* IP observability hook. */
- if (ipst->ips_ipobs_enabled) {
+ if (ipst->ips_ip6_observe.he_interested) {
zoneid_t dzone;
dzone = ip_get_zoneid_v6(&ip6h->ip6_dst, mp, ill, ipst,
ALL_ZONES);
- ipobs_hook(mp, IPOBS_HOOK_INBOUND, ALL_ZONES, dzone, ill,
- IPV6_VERSION, 0, ipst);
+ ipobs_hook(mp, IPOBS_HOOK_INBOUND, ALL_ZONES, dzone,
+ ill, ipst);
}
if ((ip6h->ip6_vcf & IPV6_VERS_AND_FLOW_MASK) ==
@@ -10179,7 +10179,7 @@ ip_wput_local_v6(queue_t *q, ill_t *ill, ip6_t *ip6h, mblk_t *first_mp,
if (first_mp == NULL)
return;
- if (ipst->ips_ipobs_enabled) {
+ if (ipst->ips_ip6_observe.he_interested) {
zoneid_t szone, dzone, lookup_zoneid = ALL_ZONES;
zoneid_t stackzoneid = netstackid_to_zoneid(
ipst->ips_netstack->netstack_stackid);
@@ -10194,8 +10194,7 @@ ip_wput_local_v6(queue_t *q, ill_t *ill, ip6_t *ip6h, mblk_t *first_mp,
lookup_zoneid = zoneid;
dzone = ip_get_zoneid_v6(&ip6h->ip6_dst, mp, ill, ipst,
lookup_zoneid);
- ipobs_hook(mp, IPOBS_HOOK_LOCAL, szone, dzone, ill,
- IPV6_VERSION, 0, ipst);
+ ipobs_hook(mp, IPOBS_HOOK_LOCAL, szone, dzone, ill, ipst);
}
DTRACE_IP7(receive, mblk_t *, first_mp, conn_t *, NULL, void_ip_t *,
@@ -11885,14 +11884,23 @@ ip_xmit_v6(mblk_t *mp, ire_t *ire, uint_t flags, conn_t *connp,
}
}
- if (ipst->ips_ipobs_enabled) {
+ if (ipst->ips_ip6_observe.he_interested) {
zoneid_t szone;
szone = ip_get_zoneid_v6(&ip6h->ip6_src,
mp_ip6h, out_ill, ipst, ALL_ZONES);
+
+ /*
+ * The IP observability hook expects b_rptr to
+ * be where the IPv6 header starts, so advance
+ * past the link layer header.
+ */
+ if (fp_prepend)
+ mp_ip6h->b_rptr += hlen;
ipobs_hook(mp_ip6h, IPOBS_HOOK_OUTBOUND, szone,
- ALL_ZONES, out_ill, IPV6_VERSION,
- fp_prepend ? hlen : 0, ipst);
+ ALL_ZONES, out_ill, ipst);
+ if (fp_prepend)
+ mp_ip6h->b_rptr -= hlen;
}
/*
diff --git a/usr/src/uts/common/inet/ip/ip_netinfo.c b/usr/src/uts/common/inet/ip/ip_netinfo.c
index 52b4da0e01..8b97462d13 100644
--- a/usr/src/uts/common/inet/ip/ip_netinfo.c
+++ b/usr/src/uts/common/inet/ip/ip_netinfo.c
@@ -311,6 +311,16 @@ ipv4_hook_init(ip_stack_t *ipst)
cmn_err(CE_NOTE, "ipv4_hook_init: "
"net_event_register failed for ipv4/nic_events");
}
+
+ HOOK_EVENT_INIT(&ipst->ips_ip4_observe, NH_OBSERVE);
+ ipst->ips_ip4_observe.he_flags = HOOK_RDONLY;
+ ipst->ips_ipv4observing = net_event_register(
+ ipst->ips_ipv4_net_data, &ipst->ips_ip4_observe);
+ if (ipst->ips_ipv4observing == NULL) {
+ cmn_err(CE_NOTE, "ipv4_hook_init: "
+ "net_event_register failed for ipv4/observe");
+ }
+
}
void
@@ -346,6 +356,11 @@ ipv4_hook_shutdown(ip_stack_t *ipst)
&ipst->ips_ip4_nic_events);
}
+ if (ipst->ips_ipv4observing != NULL) {
+ (void) net_event_shutdown(ipst->ips_ipv4_net_data,
+ &ipst->ips_ip4_observe);
+ }
+
(void) net_family_shutdown(ipst->ips_ipv4_net_data,
&ipst->ips_ipv4root);
}
@@ -389,6 +404,12 @@ ipv4_hook_destroy(ip_stack_t *ipst)
ipst->ips_ipv4nicevents = NULL;
}
+ if (ipst->ips_ipv4observing != NULL) {
+ if (net_event_unregister(ipst->ips_ipv4_net_data,
+ &ipst->ips_ip4_observe) == 0)
+ ipst->ips_ipv4observing = NULL;
+ }
+
(void) net_family_unregister(ipst->ips_ipv4_net_data,
&ipst->ips_ipv4root);
}
@@ -455,6 +476,15 @@ ipv6_hook_init(ip_stack_t *ipst)
cmn_err(CE_NOTE, "ipv6_hook_init: "
"net_event_register failed for ipv6/nic_events");
}
+
+ HOOK_EVENT_INIT(&ipst->ips_ip6_observe, NH_OBSERVE);
+ ipst->ips_ip6_observe.he_flags = HOOK_RDONLY;
+ ipst->ips_ipv6observing = net_event_register(
+ ipst->ips_ipv6_net_data, &ipst->ips_ip6_observe);
+ if (ipst->ips_ipv6observing == NULL) {
+ cmn_err(CE_NOTE, "ipv6_hook_init: "
+ "net_event_register failed for ipv6/observe");
+ }
}
void
@@ -490,6 +520,11 @@ ipv6_hook_shutdown(ip_stack_t *ipst)
&ipst->ips_ip6_nic_events);
}
+ if (ipst->ips_ipv6observing != NULL) {
+ (void) net_event_shutdown(ipst->ips_ipv6_net_data,
+ &ipst->ips_ip6_observe);
+ }
+
(void) net_family_shutdown(ipst->ips_ipv6_net_data,
&ipst->ips_ipv6root);
}
@@ -533,6 +568,12 @@ ipv6_hook_destroy(ip_stack_t *ipst)
ipst->ips_ipv6nicevents = NULL;
}
+ if (ipst->ips_ipv6observing != NULL) {
+ if (net_event_unregister(ipst->ips_ipv6_net_data,
+ &ipst->ips_ip6_observe) == 0)
+ ipst->ips_ipv6observing = NULL;
+ }
+
(void) net_family_unregister(ipst->ips_ipv6_net_data,
&ipst->ips_ipv6root);
}
@@ -1424,18 +1465,33 @@ ipv6_getlifzone(net_handle_t neti, phy_if_t phy_ifdata, lif_if_t ifdata,
neti->netd_stack->nts_netstack->netstack_ip, zoneid));
}
+/*
+ * The behaviour here mirrors that for the SIOCFLIFFLAGS ioctl where the
+ * union of all of the relevant flags is returned.
+ */
static int
ip_getlifflags_impl(sa_family_t family, phy_if_t phy_ifdata, lif_if_t ifdata,
ip_stack_t *ipst, uint64_t *flags)
{
+ phyint_t *phyi;
ipif_t *ipif;
+ ill_t *ill;
+
+ ill = ill_lookup_on_ifindex(phy_ifdata,
+ (family == AF_INET6), NULL, NULL, NULL, NULL, ipst);
+ if (ill == NULL)
+ return (-1);
+ phyi = ill->ill_phyint;
ipif = ipif_getby_indexes((uint_t)phy_ifdata,
UNMAP_IPIF_ID((uint_t)ifdata), (family == AF_INET6), ipst);
- if (ipif == NULL)
+ if (ipif == NULL) {
+ ill_refrele(ill);
return (-1);
- *flags = ipif->ipif_flags;
+ }
+ *flags = ipif->ipif_flags | ill->ill_flags | phyi->phyint_flags;
ipif_refrele(ipif);
+ ill_refrele(ill);
return (0);
}
diff --git a/usr/src/uts/common/inet/ip_stack.h b/usr/src/uts/common/inet/ip_stack.h
index 8b1ca0f32e..b5d9715c65 100644
--- a/usr/src/uts/common/inet/ip_stack.h
+++ b/usr/src/uts/common/inet/ip_stack.h
@@ -386,6 +386,9 @@ struct ip_stack {
hook_family_t ips_ipv4root;
hook_family_t ips_ipv6root;
+ net_handle_t ips_ipv4_net_data;
+ net_handle_t ips_ipv6_net_data;
+
/*
* Hooks for firewalling
*/
@@ -394,35 +397,34 @@ struct ip_stack {
hook_event_t ips_ip4_forwarding_event;
hook_event_t ips_ip4_loopback_in_event;
hook_event_t ips_ip4_loopback_out_event;
- hook_event_t ips_ip4_nic_events;
hook_event_t ips_ip6_physical_in_event;
hook_event_t ips_ip6_physical_out_event;
hook_event_t ips_ip6_forwarding_event;
hook_event_t ips_ip6_loopback_in_event;
hook_event_t ips_ip6_loopback_out_event;
- hook_event_t ips_ip6_nic_events;
hook_event_token_t ips_ipv4firewall_physical_in;
hook_event_token_t ips_ipv4firewall_physical_out;
hook_event_token_t ips_ipv4firewall_forwarding;
hook_event_token_t ips_ipv4firewall_loopback_in;
hook_event_token_t ips_ipv4firewall_loopback_out;
- hook_event_token_t ips_ipv4nicevents;
hook_event_token_t ips_ipv6firewall_physical_in;
hook_event_token_t ips_ipv6firewall_physical_out;
hook_event_token_t ips_ipv6firewall_forwarding;
hook_event_token_t ips_ipv6firewall_loopback_in;
hook_event_token_t ips_ipv6firewall_loopback_out;
- hook_event_token_t ips_ipv6nicevents;
- net_handle_t ips_ipv4_net_data;
- net_handle_t ips_ipv6_net_data;
+ hook_event_t ips_ip4_nic_events;
+ hook_event_t ips_ip6_nic_events;
+ hook_event_token_t ips_ipv4nicevents;
+ hook_event_token_t ips_ipv6nicevents;
- boolean_t ips_ipobs_enabled;
- list_t ips_ipobs_cb_list;
- kmutex_t ips_ipobs_cb_lock;
- uint_t ips_ipobs_cb_nwalkers;
- kcondvar_t ips_ipobs_cb_cv;
+ net_handle_t ips_ip4_observe_pr;
+ net_handle_t ips_ip6_observe_pr;
+ hook_event_t ips_ip4_observe;
+ hook_event_t ips_ip6_observe;
+ hook_event_token_t ips_ipv4observing;
+ hook_event_token_t ips_ipv6observing;
struct __ldi_ident *ips_ldi_ident;
diff --git a/usr/src/uts/common/inet/ipnet.h b/usr/src/uts/common/inet/ipnet.h
index 234b14f1d6..02dc202ab6 100644
--- a/usr/src/uts/common/inet/ipnet.h
+++ b/usr/src/uts/common/inet/ipnet.h
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -36,11 +36,39 @@ extern "C" {
#include <sys/list.h>
#include <netinet/in.h>
#include <net/if.h>
+#include <net/bpf.h>
#include <sys/avl.h>
#include <sys/neti.h>
+#include <sys/hook_event.h>
+#include <sys/zone.h>
+#include <sys/kstat.h>
+
+typedef struct ipnet_kstats_s {
+ kstat_named_t ik_duplicationFail;
+ kstat_named_t ik_dispatchOk;
+ kstat_named_t ik_dispatchFail;
+ kstat_named_t ik_dispatchHeaderDrop;
+ kstat_named_t ik_dispatchDupDrop;
+ kstat_named_t ik_dispatchPutDrop;
+ kstat_named_t ik_dispatchDeliver;
+ kstat_named_t ik_acceptOk;
+ kstat_named_t ik_acceptFail;
+} ipnet_kstats_t;
+
+#define IPSK_BUMP(_x, _y) (_x)->ips_stats._y.value.ui64++
/*
* Structure used to hold information for both IPv4 and IPv6 addresses.
+ *
+ * When ifa_shared is non-NULL, it points to a "fake" ipnetif_t structure
+ * that represents the network interface for each zone that shares its
+ * network stack. This is used by BPF to build a list of interface names
+ * present in each zone. Multiple ipnetif_addr_t's may point to a single
+ * ipnetif_t using ifa_shared. The typical case is the global zone has
+ * a bge0 that other zones use as bge0:1, bge0:2, etc. In ipnet, the
+ * ipnetif_addr_t's that store the IP address for bge0:1, etc, would
+ * point to an ipnetif_t stored in the if_avl_by_shared tree that has
+ * the name "bge0".
*/
typedef struct ipnetif_addr {
union {
@@ -51,6 +79,7 @@ typedef struct ipnetif_addr {
zoneid_t ifa_zone;
uint64_t ifa_id;
list_node_t ifa_link;
+ struct ipnetif *ifa_shared;
} ipnetif_addr_t;
#define ifa_ip4addr ifa_addr.ifau_ip4addr
#define ifa_ip6addr ifa_addr.ifau_ip6addr
@@ -60,11 +89,19 @@ typedef struct ipnetif_addr {
* The structure holds both IPv4 and IPv6 addresses, the address lists are
* protected by a mutex. The ipnetif structures are held per stack instance
* within avl trees indexed on name and ip index.
+ *
+ * if_avl_by_shared is used by zones that share their instance of IP with
+ * other zones. It is used to store ipnetif_t structures. An example of this
+ * is the global zone sharing its instance of IP with other local zones.
+ * In this case, if_avl_by_shared is a tree of names that are in active use
+ * by zones using a shared instance of IP.
+ * The value in if_sharecnt represents the number of ipnetif_addr_t's that
+ * point to it.
*/
typedef struct ipnetif {
char if_name[LIFNAMSIZ];
uint_t if_flags;
- uint64_t if_index;
+ uint_t if_index;
kmutex_t if_addr_lock; /* protects both addr lists */
list_t if_ip4addr_list;
list_t if_ip6addr_list;
@@ -73,7 +110,11 @@ typedef struct ipnetif {
dev_t if_dev;
uint_t if_multicnt; /* protected by ips_event_lock */
kmutex_t if_reflock; /* protects if_refcnt */
- uint_t if_refcnt;
+ int if_refcnt; /* if_reflock */
+ zoneid_t if_zoneid;
+ avl_node_t if_avl_by_shared; /* protected by ips_avl_lock */
+ struct ipnet_stack *if_stackp;
+ int if_sharecnt; /* protected by if_reflock */
} ipnetif_t;
/* if_flags */
@@ -81,6 +122,7 @@ typedef struct ipnetif {
#define IPNETIF_IPV6PLUMBED 0x02
#define IPNETIF_IPV4ALLMULTI 0x04
#define IPNETIF_IPV6ALLMULTI 0x08
+#define IPNETIF_LOOPBACK 0x10
/*
* Structure used by the accept callback function. This is simply an address
@@ -99,7 +141,7 @@ typedef struct ipnet_addrp {
struct ipnet;
struct ipobs_hook_data;
-typedef boolean_t ipnet_acceptfn_t(struct ipnet *, struct ipobs_hook_data *,
+typedef boolean_t ipnet_acceptfn_t(struct ipnet *, struct hook_pkt_observe_s *,
ipnet_addrp_t *, ipnet_addrp_t *);
/*
@@ -111,12 +153,14 @@ typedef struct ipnet {
minor_t ipnet_minor; /* minor number for this instance */
ipnetif_t *ipnet_if; /* ipnetif for this open instance */
zoneid_t ipnet_zoneid; /* zoneid the device was opened in */
- uint16_t ipnet_flags; /* see below */
- t_scalar_t ipnet_sap; /* sap this instance is bound to */
+ uint_t ipnet_flags; /* see below */
+ t_scalar_t ipnet_family; /* protocol family of this instance */
t_uscalar_t ipnet_dlstate; /* dlpi state */
list_node_t ipnet_next; /* list next member */
netstack_t *ipnet_ns; /* netstack of zone we were opened in */
ipnet_acceptfn_t *ipnet_acceptfn; /* accept callback function pointer */
+ hook_t *ipnet_hook; /* hook token to unregister */
+ void *ipnet_data; /* value to pass back to bpf_itap */
} ipnet_t;
/* ipnet_flags */
@@ -159,7 +203,12 @@ typedef struct ipnet_stack {
kcondvar_t ips_walkers_cv;
uint_t ips_walkers_cnt;
list_t ips_str_list;
- uint64_t ips_drops;
+ kstat_t *ips_kstatp;
+ ipnet_kstats_t ips_stats;
+ bpf_attach_fn_t ips_bpfattach_fn;
+ bpf_detach_fn_t ips_bpfdetach_fn;
+ avl_tree_t ips_avl_by_shared;
+ hook_t *ips_hook;
} ipnet_stack_t;
/*
@@ -191,8 +240,22 @@ typedef struct ipnet_stack {
}
typedef void ipnet_walkfunc_t(const char *, void *, dev_t);
-extern void ipnet_walk_if(ipnet_walkfunc_t *, void *, zoneid_t);
-extern dev_t ipnet_if_getdev(char *, zoneid_t);
+
+extern int ipnet_client_open(ipnetif_t *, ipnetif_t **);
+extern void ipnet_client_close(ipnetif_t *);
+extern void ipnet_close_byhandle(ipnetif_t *);
+extern int ipnet_get_linkid_byname(const char *, datalink_id_t *,
+ zoneid_t);
+extern dev_t ipnet_if_getdev(char *, zoneid_t);
+extern const char *ipnet_name(ipnetif_t *);
+extern int ipnet_open_byname(const char *, ipnetif_t **, zoneid_t);
+extern int ipnet_promisc_add(void *, uint_t, void *, uintptr_t *, int);
+extern void ipnet_promisc_remove(void *);
+extern void ipnet_set_bpfattach(bpf_attach_fn_t, bpf_detach_fn_t,
+ zoneid_t, bpf_itap_fn_t, bpf_provider_reg_fn_t);
+extern void ipnet_walk_if(ipnet_walkfunc_t *, void *, zoneid_t);
+
+extern bpf_provider_t bpf_ipnet;
#ifdef __cplusplus
}
diff --git a/usr/src/uts/common/inet/ipnet/ipnet.c b/usr/src/uts/common/inet/ipnet/ipnet.c
index e94af50424..c5cf382349 100644
--- a/usr/src/uts/common/inet/ipnet/ipnet.c
+++ b/usr/src/uts/common/inet/ipnet/ipnet.c
@@ -59,12 +59,17 @@
#include <sys/list.h>
#include <sys/ksynch.h>
#include <sys/hook_event.h>
+#include <sys/sdt.h>
#include <sys/stropts.h>
#include <sys/sysmacros.h>
#include <inet/ip.h>
+#include <inet/ip_if.h>
#include <inet/ip_multi.h>
#include <inet/ip6.h>
#include <inet/ipnet.h>
+#include <net/bpf.h>
+#include <net/bpfdesc.h>
+#include <net/dlt.h>
static struct module_info ipnet_minfo = {
1, /* mi_idnum */
@@ -116,6 +121,7 @@ static const int IPNET_MINOR_LO = 1; /* minor number for /dev/lo0 */
static const int IPNET_MINOR_MIN = 2; /* start of dynamic minors */
static dl_info_ack_t ipnet_infoack = IPNET_INFO_ACK_INIT;
static ipnet_acceptfn_t ipnet_accept, ipnet_loaccept;
+static bpf_itap_fn_t ipnet_itap;
static void ipnet_input(mblk_t *);
static int ipnet_wput(queue_t *, mblk_t *);
@@ -137,16 +143,18 @@ static int ipnet_join_allmulti(ipnetif_t *, ipnet_stack_t *);
static void ipnet_leave_allmulti(ipnetif_t *, ipnet_stack_t *);
static int ipnet_nicevent_cb(hook_event_token_t, hook_data_t, void *);
static void ipnet_nicevent_task(void *);
-static ipnetif_t *ipnet_create_if(const char *, uint64_t, ipnet_stack_t *);
-static void ipnet_remove_if(ipnetif_t *, ipnet_stack_t *);
+static ipnetif_t *ipnetif_create(const char *, uint64_t, ipnet_stack_t *,
+ uint64_t);
+static void ipnetif_remove(ipnetif_t *, ipnet_stack_t *);
static ipnetif_addr_t *ipnet_match_lif(ipnetif_t *, lif_if_t, boolean_t);
-static ipnetif_t *ipnet_if_getby_index(uint64_t, ipnet_stack_t *);
-static ipnetif_t *ipnet_if_getby_dev(dev_t, ipnet_stack_t *);
-static boolean_t ipnet_if_in_zone(ipnetif_t *, zoneid_t, ipnet_stack_t *);
-static void ipnet_if_zonecheck(ipnetif_t *, ipnet_stack_t *);
+static ipnetif_t *ipnetif_getby_index(uint64_t, ipnet_stack_t *);
+static ipnetif_t *ipnetif_getby_dev(dev_t, ipnet_stack_t *);
+static boolean_t ipnetif_in_zone(ipnetif_t *, zoneid_t, ipnet_stack_t *);
+static void ipnetif_zonecheck(ipnetif_t *, ipnet_stack_t *);
static int ipnet_populate_if(net_handle_t, ipnet_stack_t *, boolean_t);
-static int ipnet_if_compare_name(const void *, const void *);
-static int ipnet_if_compare_index(const void *, const void *);
+static int ipnetif_compare_name(const void *, const void *);
+static int ipnetif_compare_name_zone(const void *, const void *);
+static int ipnetif_compare_index(const void *, const void *);
static void ipnet_add_ifaddr(uint64_t, ipnetif_t *, net_handle_t);
static void ipnet_delete_ifaddr(ipnetif_addr_t *, ipnetif_t *, boolean_t);
static void ipnetif_refhold(ipnetif_t *);
@@ -156,6 +164,15 @@ static void ipnet_walkers_dec(ipnet_stack_t *);
static void ipnet_register_netihook(ipnet_stack_t *);
static void *ipnet_stack_init(netstackid_t, netstack_t *);
static void ipnet_stack_fini(netstackid_t, void *);
+static void ipnet_dispatch(void *);
+static int ipobs_bounce_func(hook_event_token_t, hook_data_t, void *);
+static void ipnet_bpfattach(ipnetif_t *);
+static void ipnet_bpfdetach(ipnetif_t *);
+static int ipnet_bpf_bounce(hook_event_token_t, hook_data_t, void *);
+static void ipnet_bpf_probe_shared(ipnet_stack_t *);
+static void ipnet_bpf_release_shared(ipnet_stack_t *);
+static ipnetif_t *ipnetif_clone_create(ipnetif_t *, zoneid_t);
+static void ipnetif_clone_release(ipnetif_t *);
static struct qinit ipnet_rinit = {
NULL, /* qi_putp */
@@ -194,6 +211,23 @@ static struct modlinkage modlinkage = {
};
/*
+ * This structure contains the template data (names and type) that is
+ * copied, in bulk, into the new kstats structure created by net_kstat_create.
+ * No actual statistical information is stored in this instance of the
+ * ipnet_kstats_t structure.
+ */
+static ipnet_kstats_t stats_template = {
+ { "duplicationFail", KSTAT_DATA_UINT64 },
+ { "dispatchOk", KSTAT_DATA_UINT64 },
+ { "dispatchFail", KSTAT_DATA_UINT64 },
+ { "dispatchHeaderDrop", KSTAT_DATA_UINT64 },
+ { "dispatchDupDrop", KSTAT_DATA_UINT64 },
+ { "dispatchDeliver", KSTAT_DATA_UINT64 },
+ { "acceptOk", KSTAT_DATA_UINT64 },
+ { "acceptFail", KSTAT_DATA_UINT64 }
+};
+
+/*
* Walk the list of physical interfaces on the machine, for each
* interface create a new ipnetif_t and add any addresses to it. We
* need to do the walk twice, once for IPv4 and once for IPv6.
@@ -203,7 +237,7 @@ static struct modlinkage modlinkage = {
* ipnet_stack_init(), since ipnet_stack_init() cannot fail.
*/
static int
-ipnet_if_init(void)
+ipnetif_init(void)
{
netstack_handle_t nh;
netstack_t *ns;
@@ -229,8 +263,8 @@ ipnet_if_init(void)
int
_init(void)
{
- int ret;
- boolean_t netstack_registered = B_FALSE;
+ int ret;
+ boolean_t netstack_registered = B_FALSE;
if ((ipnet_major = ddi_name_to_major("ipnet")) == (major_t)-1)
return (ENODEV);
@@ -254,7 +288,7 @@ _init(void)
netstack_register(NS_IPNET, ipnet_stack_init, NULL, ipnet_stack_fini);
netstack_registered = B_TRUE;
- if ((ret = ipnet_if_init()) == 0)
+ if ((ret = ipnetif_init()) == 0)
ret = mod_install(&modlinkage);
done:
if (ret != 0) {
@@ -272,7 +306,7 @@ done:
int
_fini(void)
{
- int err;
+ int err;
if ((err = mod_remove(&modlinkage)) != 0)
return (err);
@@ -327,6 +361,24 @@ ipnet_register_netihook(ipnet_stack_t *ips)
" in zone %d: %d", zoneid, ret);
}
}
+
+ /*
+ * Create a local set of kstats for each zone.
+ */
+ ips->ips_kstatp = net_kstat_create(netid, "ipnet", 0, "ipnet_stats",
+ "misc", KSTAT_TYPE_NAMED,
+ sizeof (ipnet_kstats_t) / sizeof (kstat_named_t), 0);
+ if (ips->ips_kstatp != NULL) {
+ bcopy(&stats_template, &ips->ips_stats,
+ sizeof (ips->ips_stats));
+ ips->ips_kstatp->ks_data = &ips->ips_stats;
+ ips->ips_kstatp->ks_private =
+ (void *)(uintptr_t)ips->ips_netstack->netstack_stackid;
+ kstat_install(ips->ips_kstatp);
+ } else {
+ cmn_err(CE_WARN, "net_kstat_create(%s,%s,%s) failed",
+ "ipnet", "ipnet_stats", "misc");
+ }
}
/*
@@ -338,13 +390,13 @@ ipnet_register_netihook(ipnet_stack_t *ips)
static int
ipnet_populate_if(net_handle_t nd, ipnet_stack_t *ips, boolean_t isv6)
{
- phy_if_t phyif;
- lif_if_t lif;
- ipnetif_t *ipnetif;
- char name[LIFNAMSIZ];
- boolean_t new_if = B_FALSE;
- uint64_t ifflags;
- int ret = 0;
+ phy_if_t phyif;
+ lif_if_t lif;
+ ipnetif_t *ipnetif;
+ char name[LIFNAMSIZ];
+ boolean_t new_if = B_FALSE;
+ uint64_t ifflags;
+ int ret = 0;
/*
* If ipnet_register_netihook() was unable to initialize this
@@ -368,8 +420,10 @@ ipnet_populate_if(net_handle_t nd, ipnet_stack_t *ips, boolean_t isv6)
phyif = net_phygetnext(nd, phyif)) {
if (net_getifname(nd, phyif, name, LIFNAMSIZ) != 0)
continue;
- if ((ipnetif = ipnet_if_getby_index(phyif, ips)) == NULL) {
- ipnetif = ipnet_create_if(name, phyif, ips);
+ ifflags = 0;
+ (void) net_getlifflags(nd, phyif, 0, &ifflags);
+ if ((ipnetif = ipnetif_getby_index(phyif, ips)) == NULL) {
+ ipnetif = ipnetif_create(name, phyif, ips, ifflags);
if (ipnetif == NULL) {
ret = ENOMEM;
goto done;
@@ -432,7 +486,7 @@ ipnet_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
static int
ipnet_devinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
{
- int error = DDI_FAILURE;
+ int error = DDI_FAILURE;
switch (infocmd) {
case DDI_INFO_DEVT2INSTANCE:
@@ -485,7 +539,6 @@ ipnet_open(queue_t *rq, dev_t *dev, int oflag, int sflag, cred_t *crp)
ipnet->ipnet_minor = (minor_t)id_alloc(ipnet_minor_space);
ipnet->ipnet_zoneid = zoneid;
ipnet->ipnet_dlstate = DL_UNBOUND;
- ipnet->ipnet_sap = 0;
ipnet->ipnet_ns = ns;
/*
@@ -499,9 +552,9 @@ ipnet_open(queue_t *rq, dev_t *dev, int oflag, int sflag, cred_t *crp)
ipnet->ipnet_acceptfn = ipnet_loaccept;
} else {
ipnet->ipnet_acceptfn = ipnet_accept;
- ipnet->ipnet_if = ipnet_if_getby_dev(*dev, ips);
+ ipnet->ipnet_if = ipnetif_getby_dev(*dev, ips);
if (ipnet->ipnet_if == NULL ||
- !ipnet_if_in_zone(ipnet->ipnet_if, zoneid, ips)) {
+ !ipnetif_in_zone(ipnet->ipnet_if, zoneid, ips)) {
err = ENODEV;
goto done;
}
@@ -519,7 +572,7 @@ ipnet_open(queue_t *rq, dev_t *dev, int oflag, int sflag, cred_t *crp)
* unregister in close() for the last open client.
*/
if (list_head(&ips->ips_str_list) == list_tail(&ips->ips_str_list))
- ipobs_register_hook(ns, ipnet_input);
+ ips->ips_hook = ipobs_register_hook(ns, ipnet_input);
mutex_exit(&ips->ips_walkers_lock);
done:
@@ -555,10 +608,13 @@ ipnet_close(queue_t *rq)
if (ipnet->ipnet_if != NULL)
ipnetif_refrele(ipnet->ipnet_if);
id_free(ipnet_minor_space, ipnet->ipnet_minor);
- kmem_free(ipnet, sizeof (*ipnet));
- if (list_is_empty(&ips->ips_str_list))
- ipobs_unregister_hook(ips->ips_netstack, ipnet_input);
+ if (list_is_empty(&ips->ips_str_list)) {
+ ipobs_unregister_hook(ips->ips_netstack, ips->ips_hook);
+ ips->ips_hook = NULL;
+ }
+
+ kmem_free(ipnet, sizeof (*ipnet));
mutex_exit(&ips->ips_walkers_lock);
netstack_rele(ips->ips_netstack);
@@ -599,7 +655,7 @@ ipnet_wput(queue_t *q, mblk_t *mp)
static int
ipnet_rsrv(queue_t *q)
{
- mblk_t *mp;
+ mblk_t *mp;
while ((mp = getq(q)) != NULL) {
ASSERT(DB_TYPE(mp) == M_DATA);
@@ -616,7 +672,7 @@ ipnet_rsrv(queue_t *q)
static void
ipnet_ioctl(queue_t *q, mblk_t *mp)
{
- struct iocblk *iocp = (struct iocblk *)mp->b_rptr;
+ struct iocblk *iocp = (struct iocblk *)mp->b_rptr;
switch (iocp->ioc_cmd) {
case DLIOCRAW:
@@ -639,7 +695,7 @@ static void
ipnet_iocdata(queue_t *q, mblk_t *mp)
{
struct iocblk *iocp = (struct iocblk *)mp->b_rptr;
- ipnet_t *ipnet = q->q_ptr;
+ ipnet_t *ipnet = q->q_ptr;
switch (iocp->ioc_cmd) {
case DLIOCIPNETINFO:
@@ -652,7 +708,7 @@ ipnet_iocdata(queue_t *q, mblk_t *mp)
miocack(q, mp, 0, DL_IPNETINFO_VERSION);
break;
default:
- iocnak:
+iocnak:
miocnak(q, mp, 0, EINVAL);
break;
}
@@ -717,23 +773,32 @@ ipnet_inforeq(queue_t *q, mblk_t *mp)
static void
ipnet_bindreq(queue_t *q, mblk_t *mp)
{
- union DL_primitives *dlp = (union DL_primitives *)mp->b_rptr;
- int32_t sap;
- ipnet_t *ipnet = q->q_ptr;
+ union DL_primitives *dlp = (union DL_primitives *)mp->b_rptr;
+ ipnet_t *ipnet = q->q_ptr;
if (MBLKL(mp) < DL_BIND_REQ_SIZE) {
dlerrorack(q, mp, DL_BIND_REQ, DL_BADPRIM, 0);
return;
}
- sap = dlp->bind_req.dl_sap;
- if (sap != IPV4_VERSION && sap != IPV6_VERSION && sap != 0) {
+ switch (dlp->bind_req.dl_sap) {
+ case 0 :
+ ipnet->ipnet_family = AF_UNSPEC;
+ break;
+ case IPV4_VERSION :
+ ipnet->ipnet_family = AF_INET;
+ break;
+ case IPV6_VERSION :
+ ipnet->ipnet_family = AF_INET6;
+ break;
+ default :
dlerrorack(q, mp, DL_BIND_REQ, DL_BADSAP, 0);
- } else {
- ipnet->ipnet_sap = sap;
- ipnet->ipnet_dlstate = DL_IDLE;
- dlbindack(q, mp, sap, 0, 0, 0, 0);
+ return;
+ /*NOTREACHED*/
}
+
+ ipnet->ipnet_dlstate = DL_IDLE;
+ dlbindack(q, mp, dlp->bind_req.dl_sap, 0, 0, 0, 0);
}
static void
@@ -750,7 +815,7 @@ ipnet_unbindreq(queue_t *q, mblk_t *mp)
dlerrorack(q, mp, DL_UNBIND_REQ, DL_OUTSTATE, 0);
} else {
ipnet->ipnet_dlstate = DL_UNBOUND;
- ipnet->ipnet_sap = 0;
+ ipnet->ipnet_family = AF_UNSPEC;
dlokack(q, mp, DL_UNBIND_REQ);
}
}
@@ -907,8 +972,14 @@ ipnet_leave_allmulti(ipnetif_t *ipnetif, ipnet_stack_t *ips)
mutex_exit(&ips->ips_event_lock);
}
+/*
+ * Allocate a new mblk_t and put a dl_ipnetinfo_t in it.
+ * The structure it copies the header information from,
+ * hook_pkt_observe_t, is constructed using network byte
+ * order in ipobs_hook(), so there is no conversion here.
+ */
static mblk_t *
-ipnet_addheader(ipobs_hook_data_t *ihd, mblk_t *mp)
+ipnet_addheader(hook_pkt_observe_t *hdr, mblk_t *mp)
{
mblk_t *dlhdr;
dl_ipnetinfo_t *dl;
@@ -919,10 +990,13 @@ ipnet_addheader(ipobs_hook_data_t *ihd, mblk_t *mp)
}
dl = (dl_ipnetinfo_t *)dlhdr->b_rptr;
dl->dli_version = DL_IPNETINFO_VERSION;
- dl->dli_len = htons(sizeof (*dl));
- dl->dli_ipver = ihd->ihd_ipver;
- dl->dli_srczone = BE_64((uint64_t)ihd->ihd_zsrc);
- dl->dli_dstzone = BE_64((uint64_t)ihd->ihd_zdst);
+ dl->dli_family = hdr->hpo_family;
+ dl->dli_htype = hdr->hpo_htype;
+ dl->dli_pktlen = hdr->hpo_pktlen;
+ dl->dli_ifindex = hdr->hpo_ifindex;
+ dl->dli_grifindex = hdr->hpo_grifindex;
+ dl->dli_zsrc = hdr->hpo_zsrc;
+ dl->dli_zdst = hdr->hpo_zdst;
dlhdr->b_wptr += sizeof (*dl);
dlhdr->b_cont = mp;
@@ -989,16 +1063,17 @@ ipnet_get_addrtype(ipnet_t *ipnet, ipnet_addrp_t *addr)
}
/*
- * Verify if the packet contained in ihd should be passed up to the
+ * Verify if the packet contained in hdr should be passed up to the
* ipnet client stream.
*/
static boolean_t
-ipnet_accept(ipnet_t *ipnet, ipobs_hook_data_t *ihd, ipnet_addrp_t *src,
+ipnet_accept(ipnet_t *ipnet, hook_pkt_observe_t *hdr, ipnet_addrp_t *src,
ipnet_addrp_t *dst)
{
boolean_t obsif;
uint64_t ifindex = ipnet->ipnet_if->if_index;
- ipnet_addrtype_t srctype, dsttype;
+ ipnet_addrtype_t srctype;
+ ipnet_addrtype_t dsttype;
srctype = ipnet_get_addrtype(ipnet, src);
dsttype = ipnet_get_addrtype(ipnet, dst);
@@ -1008,7 +1083,13 @@ ipnet_accept(ipnet_t *ipnet, ipobs_hook_data_t *ihd, ipnet_addrp_t *src,
* matches ours, it's on the interface we're observing. (Thus,
* observing on the group ifindex matches all ifindexes in the group.)
*/
- obsif = (ihd->ihd_ifindex == ifindex || ihd->ihd_grifindex == ifindex);
+ obsif = (ntohl(hdr->hpo_ifindex) == ifindex ||
+ ntohl(hdr->hpo_grifindex) == ifindex);
+
+ DTRACE_PROBE5(ipnet_accept__addr,
+ ipnet_addrtype_t, srctype, ipnet_addrp_t *, src,
+ ipnet_addrtype_t, dsttype, ipnet_addrp_t *, dst,
+ boolean_t, obsif);
/*
* Do not allow an ipnet stream to see packets that are not from or to
@@ -1019,8 +1100,8 @@ ipnet_accept(ipnet_t *ipnet, ipobs_hook_data_t *ihd, ipnet_addrp_t *src,
*/
if (ipnet->ipnet_zoneid != GLOBAL_ZONEID &&
dsttype != IPNETADDR_MBCAST) {
- if (ipnet->ipnet_zoneid != ihd->ihd_zsrc &&
- ipnet->ipnet_zoneid != ihd->ihd_zdst)
+ if (ipnet->ipnet_zoneid != ntohl(hdr->hpo_zsrc) &&
+ ipnet->ipnet_zoneid != ntohl(hdr->hpo_zdst))
return (B_FALSE);
}
@@ -1029,7 +1110,7 @@ ipnet_accept(ipnet_t *ipnet, ipobs_hook_data_t *ihd, ipnet_addrp_t *src,
* packet's IP version.
*/
if (!(ipnet->ipnet_flags & IPNET_PROMISC_SAP) &&
- ipnet->ipnet_sap != ihd->ihd_ipver)
+ ipnet->ipnet_family != hdr->hpo_family)
return (B_FALSE);
/* If the destination address is ours, then accept the packet. */
@@ -1057,48 +1138,59 @@ ipnet_accept(ipnet_t *ipnet, ipobs_hook_data_t *ihd, ipnet_addrp_t *src,
}
/*
- * Verify if the packet contained in ihd should be passed up to the ipnet
+ * Verify if the packet contained in hdr should be passed up to the ipnet
* client stream that's in IPNET_LOMODE.
*/
/* ARGSUSED */
static boolean_t
-ipnet_loaccept(ipnet_t *ipnet, ipobs_hook_data_t *ihd, ipnet_addrp_t *src,
+ipnet_loaccept(ipnet_t *ipnet, hook_pkt_observe_t *hdr, ipnet_addrp_t *src,
ipnet_addrp_t *dst)
{
- if (ihd->ihd_htype != IPOBS_HOOK_LOCAL)
- return (B_FALSE);
+ if (hdr->hpo_htype != IPOBS_HOOK_LOCAL) {
+ /*
+ * ipnet_if is only NULL for IPNET_MINOR_LO devices.
+ */
+ if (ipnet->ipnet_if == NULL)
+ return (B_FALSE);
+ }
/*
* An ipnet stream must not see packets that are not from/to its zone.
*/
if (ipnet->ipnet_zoneid != GLOBAL_ZONEID) {
- if (ipnet->ipnet_zoneid != ihd->ihd_zsrc &&
- ipnet->ipnet_zoneid != ihd->ihd_zdst)
+ if (ipnet->ipnet_zoneid != ntohl(hdr->hpo_zsrc) &&
+ ipnet->ipnet_zoneid != ntohl(hdr->hpo_zdst))
return (B_FALSE);
}
- return (ipnet->ipnet_sap == 0 || ipnet->ipnet_sap == ihd->ihd_ipver);
+ return (ipnet->ipnet_family == AF_UNSPEC ||
+ ipnet->ipnet_family == hdr->hpo_family);
}
static void
ipnet_dispatch(void *arg)
{
mblk_t *mp = arg;
- ipobs_hook_data_t *ihd = (ipobs_hook_data_t *)mp->b_rptr;
+ hook_pkt_observe_t *hdr = (hook_pkt_observe_t *)mp->b_rptr;
ipnet_t *ipnet;
mblk_t *netmp;
list_t *list;
- ipnet_stack_t *ips = ihd->ihd_stack->netstack_ipnet;
- ipnet_addrp_t src, dst;
+ ipnet_stack_t *ips;
+ ipnet_addrp_t src;
+ ipnet_addrp_t dst;
- if (ihd->ihd_ipver == IPV4_VERSION) {
- src.iap_family = dst.iap_family = AF_INET;
- src.iap_addr4 = &((ipha_t *)(ihd->ihd_mp->b_rptr))->ipha_src;
- dst.iap_addr4 = &((ipha_t *)(ihd->ihd_mp->b_rptr))->ipha_dst;
+ ips = ((netstack_t *)hdr->hpo_ctx)->netstack_ipnet;
+
+ netmp = hdr->hpo_pkt->b_cont;
+ src.iap_family = hdr->hpo_family;
+ dst.iap_family = hdr->hpo_family;
+
+ if (hdr->hpo_family == AF_INET) {
+ src.iap_addr4 = &((ipha_t *)(netmp->b_rptr))->ipha_src;
+ dst.iap_addr4 = &((ipha_t *)(netmp->b_rptr))->ipha_dst;
} else {
- src.iap_family = dst.iap_family = AF_INET6;
- src.iap_addr6 = &((ip6_t *)(ihd->ihd_mp->b_rptr))->ip6_src;
- dst.iap_addr6 = &((ip6_t *)(ihd->ihd_mp->b_rptr))->ip6_dst;
+ src.iap_addr6 = &((ip6_t *)(netmp->b_rptr))->ip6_src;
+ dst.iap_addr6 = &((ip6_t *)(netmp->b_rptr))->ip6_dst;
}
ipnet_walkers_inc(ips);
@@ -1106,23 +1198,26 @@ ipnet_dispatch(void *arg)
list = &ips->ips_str_list;
for (ipnet = list_head(list); ipnet != NULL;
ipnet = list_next(list, ipnet)) {
- if (!(*ipnet->ipnet_acceptfn)(ipnet, ihd, &src, &dst))
+ if (!(*ipnet->ipnet_acceptfn)(ipnet, hdr, &src, &dst)) {
+ IPSK_BUMP(ips, ik_acceptFail);
continue;
+ }
+ IPSK_BUMP(ips, ik_acceptOk);
if (list_next(list, ipnet) == NULL) {
- netmp = ihd->ihd_mp;
- ihd->ihd_mp = NULL;
+ netmp = hdr->hpo_pkt->b_cont;
+ hdr->hpo_pkt->b_cont = NULL;
} else {
- if ((netmp = dupmsg(ihd->ihd_mp)) == NULL &&
- (netmp = copymsg(ihd->ihd_mp)) == NULL) {
- atomic_inc_64(&ips->ips_drops);
+ if ((netmp = dupmsg(hdr->hpo_pkt->b_cont)) == NULL &&
+ (netmp = copymsg(hdr->hpo_pkt->b_cont)) == NULL) {
+ IPSK_BUMP(ips, ik_duplicationFail);
continue;
}
}
if (ipnet->ipnet_flags & IPNET_INFO) {
- if ((netmp = ipnet_addheader(ihd, netmp)) == NULL) {
- atomic_inc_64(&ips->ips_drops);
+ if ((netmp = ipnet_addheader(hdr, netmp)) == NULL) {
+ IPSK_BUMP(ips, ik_dispatchHeaderDrop);
continue;
}
}
@@ -1130,68 +1225,91 @@ ipnet_dispatch(void *arg)
if (ipnet->ipnet_rq->q_first == NULL &&
canputnext(ipnet->ipnet_rq)) {
putnext(ipnet->ipnet_rq, netmp);
+ IPSK_BUMP(ips, ik_dispatchDeliver);
} else if (canput(ipnet->ipnet_rq)) {
(void) putq(ipnet->ipnet_rq, netmp);
+ IPSK_BUMP(ips, ik_dispatchDeliver);
} else {
freemsg(netmp);
- atomic_inc_64(&ips->ips_drops);
+ IPSK_BUMP(ips, ik_dispatchPutDrop);
}
}
ipnet_walkers_dec(ips);
- freemsg(ihd->ihd_mp);
freemsg(mp);
}
static void
ipnet_input(mblk_t *mp)
{
- ipobs_hook_data_t *ihd = (ipobs_hook_data_t *)mp->b_rptr;
+ hook_pkt_observe_t *hdr = (hook_pkt_observe_t *)mp->b_rptr;
+ ipnet_stack_t *ips;
+
+ ips = ((netstack_t *)hdr->hpo_ctx)->netstack_ipnet;
if (ddi_taskq_dispatch(ipnet_taskq, ipnet_dispatch, mp, DDI_NOSLEEP) !=
DDI_SUCCESS) {
- atomic_inc_64(&ihd->ihd_stack->netstack_ipnet->ips_drops);
- freemsg(ihd->ihd_mp);
+ IPSK_BUMP(ips, ik_dispatchFail);
freemsg(mp);
+ } else {
+ IPSK_BUMP(ips, ik_dispatchOk);
}
}
+static ipnetif_t *
+ipnet_alloc_if(ipnet_stack_t *ips)
+{
+ ipnetif_t *ipnetif;
+
+ if ((ipnetif = kmem_zalloc(sizeof (*ipnetif), KM_NOSLEEP)) == NULL)
+ return (NULL);
+
+ mutex_init(&ipnetif->if_addr_lock, NULL, MUTEX_DEFAULT, 0);
+ list_create(&ipnetif->if_ip4addr_list, sizeof (ipnetif_addr_t),
+ offsetof(ipnetif_addr_t, ifa_link));
+ list_create(&ipnetif->if_ip6addr_list, sizeof (ipnetif_addr_t),
+ offsetof(ipnetif_addr_t, ifa_link));
+ mutex_init(&ipnetif->if_reflock, NULL, MUTEX_DEFAULT, 0);
+
+ ipnetif->if_stackp = ips;
+
+ return (ipnetif);
+}
+
/*
* Create a new ipnetif_t and new minor node for it. If creation is
* successful the new ipnetif_t is inserted into an avl_tree
* containing ipnetif's for this stack instance.
*/
static ipnetif_t *
-ipnet_create_if(const char *name, uint64_t index, ipnet_stack_t *ips)
+ipnetif_create(const char *name, uint64_t index, ipnet_stack_t *ips,
+ uint64_t ifflags)
{
ipnetif_t *ipnetif;
avl_index_t where = 0;
minor_t ifminor;
/*
- * Because ipnet_create_if() can be called from a NIC event
+ * Because ipnetif_create() can be called from a NIC event
* callback, it should not block.
*/
ifminor = (minor_t)id_alloc_nosleep(ipnet_minor_space);
if (ifminor == (minor_t)-1)
return (NULL);
- if ((ipnetif = kmem_zalloc(sizeof (*ipnetif), KM_NOSLEEP)) == NULL) {
+ if ((ipnetif = ipnet_alloc_if(ips)) == NULL) {
id_free(ipnet_minor_space, ifminor);
return (NULL);
}
(void) strlcpy(ipnetif->if_name, name, LIFNAMSIZ);
- ipnetif->if_index = index;
-
- mutex_init(&ipnetif->if_addr_lock, NULL, MUTEX_DEFAULT, 0);
- list_create(&ipnetif->if_ip4addr_list, sizeof (ipnetif_addr_t),
- offsetof(ipnetif_addr_t, ifa_link));
- list_create(&ipnetif->if_ip6addr_list, sizeof (ipnetif_addr_t),
- offsetof(ipnetif_addr_t, ifa_link));
+ ipnetif->if_index = (uint_t)index;
+ ipnetif->if_zoneid = netstack_get_zoneid(ips->ips_netstack);
ipnetif->if_dev = makedevice(ipnet_major, ifminor);
- mutex_init(&ipnetif->if_reflock, NULL, MUTEX_DEFAULT, 0);
+
ipnetif->if_refcnt = 1;
+ if ((ifflags & IFF_LOOPBACK) != 0)
+ ipnetif->if_flags = IPNETIF_LOOPBACK;
mutex_enter(&ips->ips_avl_lock);
VERIFY(avl_find(&ips->ips_avl_by_index, &index, &where) == NULL);
@@ -1199,12 +1317,17 @@ ipnet_create_if(const char *name, uint64_t index, ipnet_stack_t *ips)
VERIFY(avl_find(&ips->ips_avl_by_name, (void *)name, &where) == NULL);
avl_insert(&ips->ips_avl_by_name, ipnetif, where);
mutex_exit(&ips->ips_avl_lock);
+ /*
+ * Now that the interface can be found by lookups back into ipnet,
+ * allowing for sanity checking, call the BPF attach.
+ */
+ ipnet_bpfattach(ipnetif);
return (ipnetif);
}
static void
-ipnet_remove_if(ipnetif_t *ipnetif, ipnet_stack_t *ips)
+ipnetif_remove(ipnetif_t *ipnetif, ipnet_stack_t *ips)
{
ipnet_t *ipnet;
@@ -1220,25 +1343,34 @@ ipnet_remove_if(ipnetif_t *ipnetif, ipnet_stack_t *ips)
avl_remove(&ips->ips_avl_by_index, ipnetif);
avl_remove(&ips->ips_avl_by_name, ipnetif);
mutex_exit(&ips->ips_avl_lock);
- /* Release the reference we implicitly held in ipnet_create_if(). */
+ /*
+ * Now that the interface can't be found, do a BPF detach
+ */
+ ipnet_bpfdetach(ipnetif);
+ /*
+ * Release the reference we implicitly held in ipnetif_create().
+ */
ipnetif_refrele(ipnetif);
}
static void
ipnet_purge_addrlist(list_t *addrlist)
{
- ipnetif_addr_t *ifa;
+ ipnetif_addr_t *ifa;
while ((ifa = list_head(addrlist)) != NULL) {
list_remove(addrlist, ifa);
+ if (ifa->ifa_shared != NULL)
+ ipnetif_clone_release(ifa->ifa_shared);
kmem_free(ifa, sizeof (*ifa));
}
}
static void
-ipnet_free_if(ipnetif_t *ipnetif)
+ipnetif_free(ipnetif_t *ipnetif)
{
ASSERT(ipnetif->if_refcnt == 0);
+ ASSERT(ipnetif->if_sharecnt == 0);
/* Remove IPv4/v6 address lists from the ipnetif */
ipnet_purge_addrlist(&ipnetif->if_ip4addr_list);
@@ -1247,7 +1379,8 @@ ipnet_free_if(ipnetif_t *ipnetif)
list_destroy(&ipnetif->if_ip6addr_list);
mutex_destroy(&ipnetif->if_addr_lock);
mutex_destroy(&ipnetif->if_reflock);
- id_free(ipnet_minor_space, getminor(ipnetif->if_dev));
+ if (ipnetif->if_dev != 0)
+ id_free(ipnet_minor_space, getminor(ipnetif->if_dev));
kmem_free(ipnetif, sizeof (*ipnetif));
}
@@ -1270,11 +1403,12 @@ ipnet_add_ifaddr(uint64_t lif, ipnetif_t *ipnetif, net_handle_t nd)
if (net_getlifaddr(nd, phyif, lif, 1, &type, &addr) != 0 ||
net_getlifzone(nd, phyif, lif, &zoneid) != 0)
return;
+
if ((ifaddr = kmem_alloc(sizeof (*ifaddr), KM_NOSLEEP)) == NULL)
return;
-
ifaddr->ifa_zone = zoneid;
ifaddr->ifa_id = lif;
+ ifaddr->ifa_shared = NULL;
switch (addr.ss_family) {
case AF_INET:
@@ -1295,6 +1429,12 @@ ipnet_add_ifaddr(uint64_t lif, ipnetif_t *ipnetif, net_handle_t nd)
}
mutex_enter(&ipnetif->if_addr_lock);
+ if (zoneid != ipnetif->if_zoneid) {
+ ipnetif_t *ifp2;
+
+ ifp2 = ipnetif_clone_create(ipnetif, zoneid);
+ ifaddr->ifa_shared = ifp2;
+ }
list_insert_tail(addr.ss_family == AF_INET ?
&ipnetif->if_ip4addr_list : &ipnetif->if_ip6addr_list, ifaddr);
mutex_exit(&ipnetif->if_addr_lock);
@@ -1304,6 +1444,9 @@ static void
ipnet_delete_ifaddr(ipnetif_addr_t *ifaddr, ipnetif_t *ipnetif, boolean_t isv6)
{
mutex_enter(&ipnetif->if_addr_lock);
+ if (ifaddr->ifa_shared != NULL)
+ ipnetif_clone_release(ifaddr->ifa_shared);
+
list_remove(isv6 ?
&ipnetif->if_ip6addr_list : &ipnetif->if_ip4addr_list, ifaddr);
mutex_exit(&ipnetif->if_addr_lock);
@@ -1311,14 +1454,22 @@ ipnet_delete_ifaddr(ipnetif_addr_t *ifaddr, ipnetif_t *ipnetif, boolean_t isv6)
}
static void
-ipnet_plumb_ev(uint64_t ifindex, const char *ifname, ipnet_stack_t *ips,
- boolean_t isv6)
+ipnet_plumb_ev(ipnet_nicevent_t *ipne, ipnet_stack_t *ips, boolean_t isv6)
{
ipnetif_t *ipnetif;
boolean_t refrele_needed = B_TRUE;
+ uint64_t ifflags;
+ uint64_t ifindex;
+ char *ifname;
+
+ ifflags = 0;
+ ifname = ipne->ipne_ifname;
+ ifindex = ipne->ipne_ifindex;
+
+ (void) net_getlifflags(ipne->ipne_protocol, ifindex, 0, &ifflags);
- if ((ipnetif = ipnet_if_getby_index(ifindex, ips)) == NULL) {
- ipnetif = ipnet_create_if(ifname, ifindex, ips);
+ if ((ipnetif = ipnetif_getby_index(ifindex, ips)) == NULL) {
+ ipnetif = ipnetif_create(ifname, ifindex, ips, ifflags);
refrele_needed = B_FALSE;
}
if (ipnetif != NULL) {
@@ -1343,7 +1494,7 @@ ipnet_unplumb_ev(uint64_t ifindex, ipnet_stack_t *ips, boolean_t isv6)
{
ipnetif_t *ipnetif;
- if ((ipnetif = ipnet_if_getby_index(ifindex, ips)) == NULL)
+ if ((ipnetif = ipnetif_getby_index(ifindex, ips)) == NULL)
return;
mutex_enter(&ipnetif->if_addr_lock);
@@ -1358,7 +1509,7 @@ ipnet_unplumb_ev(uint64_t ifindex, ipnet_stack_t *ips, boolean_t isv6)
*/
ipnetif->if_flags &= isv6 ? ~IPNETIF_IPV6PLUMBED : ~IPNETIF_IPV4PLUMBED;
if (!(ipnetif->if_flags & (IPNETIF_IPV4PLUMBED | IPNETIF_IPV6PLUMBED)))
- ipnet_remove_if(ipnetif, ips);
+ ipnetif_remove(ipnetif, ips);
ipnetif_refrele(ipnetif);
}
@@ -1369,7 +1520,7 @@ ipnet_lifup_ev(uint64_t ifindex, uint64_t lifindex, net_handle_t nd,
ipnetif_t *ipnetif;
ipnetif_addr_t *ifaddr;
- if ((ipnetif = ipnet_if_getby_index(ifindex, ips)) == NULL)
+ if ((ipnetif = ipnetif_getby_index(ifindex, ips)) == NULL)
return;
if ((ifaddr = ipnet_match_lif(ipnetif, lifindex, isv6)) != NULL) {
/*
@@ -1390,7 +1541,7 @@ ipnet_lifdown_ev(uint64_t ifindex, uint64_t lifindex, ipnet_stack_t *ips,
ipnetif_t *ipnetif;
ipnetif_addr_t *ifaddr;
- if ((ipnetif = ipnet_if_getby_index(ifindex, ips)) == NULL)
+ if ((ipnetif = ipnetif_getby_index(ifindex, ips)) == NULL)
return;
if ((ifaddr = ipnet_match_lif(ipnetif, lifindex, isv6)) != NULL)
ipnet_delete_ifaddr(ifaddr, ipnetif, isv6);
@@ -1399,7 +1550,7 @@ ipnet_lifdown_ev(uint64_t ifindex, uint64_t lifindex, ipnet_stack_t *ips,
* Make sure that open streams on this ipnetif are still allowed to
* have it open.
*/
- ipnet_if_zonecheck(ipnetif, ips);
+ ipnetif_zonecheck(ipnetif, ips);
}
/*
@@ -1446,8 +1597,7 @@ ipnet_nicevent_task(void *arg)
mutex_enter(&ips->ips_event_lock);
switch (ipne->ipne_event) {
case NE_PLUMB:
- ipnet_plumb_ev(ipne->ipne_ifindex, ipne->ipne_ifname, ips,
- isv6);
+ ipnet_plumb_ev(ipne, ips, isv6);
break;
case NE_UNPLUMB:
ipnet_unplumb_ev(ipne->ipne_ifindex, ips, isv6);
@@ -1486,7 +1636,7 @@ ipnet_if_getdev(char *name, zoneid_t zoneid)
ips = ns->netstack_ipnet;
mutex_enter(&ips->ips_avl_lock);
if ((ipnetif = avl_find(&ips->ips_avl_by_name, name, NULL)) != NULL) {
- if (ipnet_if_in_zone(ipnetif, zoneid, ips))
+ if (ipnetif_in_zone(ipnetif, zoneid, ips))
dev = ipnetif->if_dev;
}
mutex_exit(&ips->ips_avl_lock);
@@ -1496,7 +1646,7 @@ ipnet_if_getdev(char *name, zoneid_t zoneid)
}
static ipnetif_t *
-ipnet_if_getby_index(uint64_t id, ipnet_stack_t *ips)
+ipnetif_getby_index(uint64_t id, ipnet_stack_t *ips)
{
ipnetif_t *ipnetif;
@@ -1508,7 +1658,7 @@ ipnet_if_getby_index(uint64_t id, ipnet_stack_t *ips)
}
static ipnetif_t *
-ipnet_if_getby_dev(dev_t dev, ipnet_stack_t *ips)
+ipnetif_getby_dev(dev_t dev, ipnet_stack_t *ips)
{
ipnetif_t *ipnetif;
avl_tree_t *tree;
@@ -1530,7 +1680,7 @@ static ipnetif_addr_t *
ipnet_match_lif(ipnetif_t *ipnetif, lif_if_t lid, boolean_t isv6)
{
ipnetif_addr_t *ifaddr;
- list_t *list;
+ list_t *list;
mutex_enter(&ipnetif->if_addr_lock);
list = isv6 ? &ipnetif->if_ip6addr_list : &ipnetif->if_ip4addr_list;
@@ -1552,10 +1702,12 @@ ipnet_stack_init(netstackid_t stackid, netstack_t *ns)
ips = kmem_zalloc(sizeof (*ips), KM_SLEEP);
ips->ips_netstack = ns;
mutex_init(&ips->ips_avl_lock, NULL, MUTEX_DEFAULT, 0);
- avl_create(&ips->ips_avl_by_index, ipnet_if_compare_index,
+ avl_create(&ips->ips_avl_by_index, ipnetif_compare_index,
sizeof (ipnetif_t), offsetof(ipnetif_t, if_avl_by_index));
- avl_create(&ips->ips_avl_by_name, ipnet_if_compare_name,
+ avl_create(&ips->ips_avl_by_name, ipnetif_compare_name,
sizeof (ipnetif_t), offsetof(ipnetif_t, if_avl_by_name));
+ avl_create(&ips->ips_avl_by_shared, ipnetif_compare_name_zone,
+ sizeof (ipnetif_t), offsetof(ipnetif_t, if_avl_by_shared));
mutex_init(&ips->ips_walkers_lock, NULL, MUTEX_DEFAULT, NULL);
cv_init(&ips->ips_walkers_cv, NULL, CV_DRIVER, NULL);
list_create(&ips->ips_str_list, sizeof (ipnet_t),
@@ -1571,6 +1723,12 @@ ipnet_stack_fini(netstackid_t stackid, void *arg)
ipnet_stack_t *ips = arg;
ipnetif_t *ipnetif, *nipnetif;
+ if (ips->ips_kstatp != NULL) {
+ zoneid_t zoneid;
+
+ zoneid = netstackid_to_zoneid(stackid);
+ net_kstat_delete(net_zoneidtonetid(zoneid), ips->ips_kstatp);
+ }
if (ips->ips_ndv4 != NULL) {
VERIFY(net_hook_unregister(ips->ips_ndv4, NH_NIC_EVENTS,
ips->ips_nicevents) == 0);
@@ -1586,8 +1744,9 @@ ipnet_stack_fini(netstackid_t stackid, void *arg)
for (ipnetif = avl_first(&ips->ips_avl_by_index); ipnetif != NULL;
ipnetif = nipnetif) {
nipnetif = AVL_NEXT(&ips->ips_avl_by_index, ipnetif);
- ipnet_remove_if(ipnetif, ips);
+ ipnetif_remove(ipnetif, ips);
}
+ avl_destroy(&ips->ips_avl_by_shared);
avl_destroy(&ips->ips_avl_by_index);
avl_destroy(&ips->ips_avl_by_name);
mutex_destroy(&ips->ips_avl_lock);
@@ -1601,7 +1760,7 @@ ipnet_stack_fini(netstackid_t stackid, void *arg)
static boolean_t
ipnet_addrs_in_zone(list_t *addrlist, zoneid_t zoneid)
{
- ipnetif_addr_t *ifa;
+ ipnetif_addr_t *ifa;
for (ifa = list_head(addrlist); ifa != NULL;
ifa = list_next(addrlist, ifa)) {
@@ -1613,9 +1772,9 @@ ipnet_addrs_in_zone(list_t *addrlist, zoneid_t zoneid)
/* Should the supplied ipnetif be visible from the supplied zoneid? */
static boolean_t
-ipnet_if_in_zone(ipnetif_t *ipnetif, zoneid_t zoneid, ipnet_stack_t *ips)
+ipnetif_in_zone(ipnetif_t *ipnetif, zoneid_t zoneid, ipnet_stack_t *ips)
{
- int ret;
+ int ret;
/*
* The global zone has visibility into all interfaces in the global
@@ -1645,7 +1804,7 @@ ipnet_if_in_zone(ipnetif_t *ipnetif, zoneid_t zoneid, ipnet_stack_t *ips)
* case, send the ipnet_t an M_HANGUP.
*/
static void
-ipnet_if_zonecheck(ipnetif_t *ipnetif, ipnet_stack_t *ips)
+ipnetif_zonecheck(ipnetif_t *ipnetif, ipnet_stack_t *ips)
{
list_t *strlist = &ips->ips_str_list;
ipnet_t *ipnet;
@@ -1655,7 +1814,7 @@ ipnet_if_zonecheck(ipnetif_t *ipnetif, ipnet_stack_t *ips)
ipnet = list_next(strlist, ipnet)) {
if (ipnet->ipnet_if != ipnetif)
continue;
- if (!ipnet_if_in_zone(ipnetif, ipnet->ipnet_zoneid, ips))
+ if (!ipnetif_in_zone(ipnetif, ipnet->ipnet_zoneid, ips))
(void) putnextctl(ipnet->ipnet_rq, M_HANGUP);
}
ipnet_walkers_dec(ips);
@@ -1664,7 +1823,7 @@ ipnet_if_zonecheck(ipnetif_t *ipnetif, ipnet_stack_t *ips)
void
ipnet_walk_if(ipnet_walkfunc_t *cb, void *arg, zoneid_t zoneid)
{
- ipnetif_t *ipnetif;
+ ipnetif_t *ipnetif;
list_t cbdata;
ipnetif_cbdata_t *cbnode;
netstack_t *ns;
@@ -1687,7 +1846,7 @@ ipnet_walk_if(ipnet_walkfunc_t *cb, void *arg, zoneid_t zoneid)
mutex_enter(&ips->ips_avl_lock);
for (ipnetif = avl_first(&ips->ips_avl_by_index); ipnetif != NULL;
ipnetif = avl_walk(&ips->ips_avl_by_index, ipnetif, AVL_AFTER)) {
- if (!ipnet_if_in_zone(ipnetif, zoneid, ips))
+ if (!ipnetif_in_zone(ipnetif, zoneid, ips))
continue;
cbnode = kmem_zalloc(sizeof (ipnetif_cbdata_t), KM_SLEEP);
(void) strlcpy(cbnode->ic_ifname, ipnetif->if_name, LIFNAMSIZ);
@@ -1706,23 +1865,38 @@ ipnet_walk_if(ipnet_walkfunc_t *cb, void *arg, zoneid_t zoneid)
}
static int
-ipnet_if_compare_index(const void *index_ptr, const void *ipnetifp)
+ipnetif_compare_index(const void *index_ptr, const void *ipnetifp)
{
- int64_t index1 = *((int64_t *)index_ptr);
- int64_t index2 = (int64_t)((ipnetif_t *)ipnetifp)->if_index;
+ int64_t index1 = *((int64_t *)index_ptr);
+ int64_t index2 = (int64_t)((ipnetif_t *)ipnetifp)->if_index;
return (SIGNOF(index2 - index1));
}
static int
-ipnet_if_compare_name(const void *name_ptr, const void *ipnetifp)
+ipnetif_compare_name(const void *name_ptr, const void *ipnetifp)
{
- int res;
+ int res;
res = strcmp(((ipnetif_t *)ipnetifp)->if_name, name_ptr);
return (SIGNOF(res));
}
+static int
+ipnetif_compare_name_zone(const void *key_ptr, const void *ipnetifp)
+{
+ const uintptr_t *ptr = key_ptr;
+ const ipnetif_t *ifp;
+ int res;
+
+ ifp = ipnetifp;
+ res = ifp->if_zoneid - ptr[0];
+ if (res != 0)
+ return (SIGNOF(res));
+ res = strcmp(ifp->if_name, (char *)ptr[1]);
+ return (SIGNOF(res));
+}
+
static void
ipnetif_refhold(ipnetif_t *ipnetif)
{
@@ -1735,9 +1909,9 @@ static void
ipnetif_refrele(ipnetif_t *ipnetif)
{
mutex_enter(&ipnetif->if_reflock);
- ASSERT(ipnetif->if_refcnt != 0);
+ ASSERT(ipnetif->if_refcnt > 0);
if (--ipnetif->if_refcnt == 0)
- ipnet_free_if(ipnetif);
+ ipnetif_free(ipnetif);
else
mutex_exit(&ipnetif->if_reflock);
}
@@ -1759,3 +1933,585 @@ ipnet_walkers_dec(ipnet_stack_t *ips)
cv_broadcast(&ips->ips_walkers_cv);
mutex_exit(&ips->ips_walkers_lock);
}
+
+/*ARGSUSED*/
+static int
+ipobs_bounce_func(hook_event_token_t token, hook_data_t info, void *arg)
+{
+ hook_pkt_observe_t *hdr;
+ pfv_t func = (pfv_t)arg;
+ mblk_t *mp;
+
+ hdr = (hook_pkt_observe_t *)info;
+ mp = dupmsg(hdr->hpo_pkt);
+ if (mp == NULL) {
+ mp = copymsg(hdr->hpo_pkt);
+ if (mp == NULL) {
+ netstack_t *ns = hdr->hpo_ctx;
+ ipnet_stack_t *ips = ns->netstack_ipnet;
+
+ IPSK_BUMP(ips, ik_dispatchDupDrop);
+ return (0);
+ }
+ }
+
+ hdr = (hook_pkt_observe_t *)mp->b_rptr;
+ hdr->hpo_pkt = mp;
+
+ func(mp);
+
+ return (0);
+}
+
+hook_t *
+ipobs_register_hook(netstack_t *ns, pfv_t func)
+{
+ ip_stack_t *ipst = ns->netstack_ip;
+ char name[32];
+ hook_t *hook;
+
+ HOOK_INIT(hook, ipobs_bounce_func, "", (void *)func);
+ VERIFY(hook != NULL);
+
+ /*
+ * To register multiple hooks with he same callback function,
+ * a unique name is needed.
+ */
+ (void) snprintf(name, sizeof (name), "ipobserve_%p", hook);
+ hook->h_name = strdup(name);
+
+ (void) net_hook_register(ipst->ips_ip4_observe_pr, NH_OBSERVE, hook);
+ (void) net_hook_register(ipst->ips_ip6_observe_pr, NH_OBSERVE, hook);
+
+ return (hook);
+}
+
+void
+ipobs_unregister_hook(netstack_t *ns, hook_t *hook)
+{
+ ip_stack_t *ipst = ns->netstack_ip;
+
+ (void) net_hook_unregister(ipst->ips_ip4_observe_pr, NH_OBSERVE, hook);
+
+ (void) net_hook_unregister(ipst->ips_ip6_observe_pr, NH_OBSERVE, hook);
+
+ strfree(hook->h_name);
+
+ hook_free(hook);
+}
+
+/* ******************************************************************** */
+/* BPF Functions below */
+/* ******************************************************************** */
+
+/*
+ * Convenience function to make mapping a zoneid to an ipnet_stack_t easy.
+ */
+static ipnet_stack_t *
+ipnet_find_by_zoneid(zoneid_t zoneid)
+{
+ netstack_t *ns;
+
+ VERIFY((ns = netstack_find_by_zoneid(zoneid)) != NULL);
+ return (ns->netstack_ipnet);
+}
+
+/*
+ * Rather than weave the complexity of what needs to be done for a BPF
+ * device attach or detach into the code paths of where they're used,
+ * it is presented here in a couple of simple functions, along with
+ * other similar code.
+ *
+ * The refrele/refhold here provide the means by which it is known
+ * when the clone structures can be free'd.
+ */
+static void
+ipnet_bpfdetach(ipnetif_t *ifp)
+{
+ if (ifp->if_stackp->ips_bpfdetach_fn != NULL) {
+ ifp->if_stackp->ips_bpfdetach_fn((uintptr_t)ifp);
+ ipnetif_refrele(ifp);
+ }
+}
+
+static void
+ipnet_bpfattach(ipnetif_t *ifp)
+{
+ if (ifp->if_stackp->ips_bpfattach_fn != NULL) {
+ ipnetif_refhold(ifp);
+ ifp->if_stackp->ips_bpfattach_fn((uintptr_t)ifp, DL_IPNET,
+ ifp->if_zoneid, BPR_IPNET);
+ }
+}
+
+/*
+ * Set the functions to call back to when adding or removing an interface so
+ * that BPF can keep its internal list of these up to date.
+ */
+void
+ipnet_set_bpfattach(bpf_attach_fn_t attach, bpf_detach_fn_t detach,
+ zoneid_t zoneid, bpf_itap_fn_t tapfunc, bpf_provider_reg_fn_t provider)
+{
+ ipnet_stack_t *ips;
+ ipnetif_t *ipnetif;
+ avl_tree_t *tree;
+ ipnetif_t *next;
+
+ if (zoneid == GLOBAL_ZONEID) {
+ ipnet_itap = tapfunc;
+ }
+
+ VERIFY((ips = ipnet_find_by_zoneid(zoneid)) != NULL);
+
+ /*
+ * If we're setting a new attach function, call it for every
+ * mac that has already been attached.
+ */
+ if (attach != NULL && ips->ips_bpfattach_fn == NULL) {
+ ASSERT(detach != NULL);
+ if (provider != NULL) {
+ (void) provider(&bpf_ipnet);
+ }
+ /*
+ * The call to ipnet_bpfattach() calls into bpf`bpfattach
+ * which then wants to resolve the link name into a link id.
+ * For ipnet, this results in a call back to
+ * ipnet_get_linkid_byname which also needs to lock and walk
+ * the AVL tree. Thus the call to ipnet_bpfattach needs to
+ * be made without the avl_lock held.
+ */
+ mutex_enter(&ips->ips_event_lock);
+ ips->ips_bpfattach_fn = attach;
+ ips->ips_bpfdetach_fn = detach;
+ mutex_enter(&ips->ips_avl_lock);
+ tree = &ips->ips_avl_by_index;
+ for (ipnetif = avl_first(tree); ipnetif != NULL;
+ ipnetif = next) {
+ ipnetif_refhold(ipnetif);
+ mutex_exit(&ips->ips_avl_lock);
+ ipnet_bpfattach(ipnetif);
+ mutex_enter(&ips->ips_avl_lock);
+ next = avl_walk(tree, ipnetif, AVL_AFTER);
+ ipnetif_refrele(ipnetif);
+ }
+ mutex_exit(&ips->ips_avl_lock);
+ ipnet_bpf_probe_shared(ips);
+ mutex_exit(&ips->ips_event_lock);
+
+ } else if (attach == NULL && ips->ips_bpfattach_fn != NULL) {
+ ASSERT(ips->ips_bpfdetach_fn != NULL);
+ mutex_enter(&ips->ips_event_lock);
+ ips->ips_bpfattach_fn = NULL;
+ mutex_enter(&ips->ips_avl_lock);
+ tree = &ips->ips_avl_by_index;
+ for (ipnetif = avl_first(tree); ipnetif != NULL;
+ ipnetif = next) {
+ ipnetif_refhold(ipnetif);
+ mutex_exit(&ips->ips_avl_lock);
+ ipnet_bpfdetach((ipnetif_t *)ipnetif);
+ mutex_enter(&ips->ips_avl_lock);
+ next = avl_walk(tree, ipnetif, AVL_AFTER);
+ ipnetif_refrele(ipnetif);
+ }
+ mutex_exit(&ips->ips_avl_lock);
+ ipnet_bpf_release_shared(ips);
+ ips->ips_bpfdetach_fn = NULL;
+ mutex_exit(&ips->ips_event_lock);
+
+ if (provider != NULL) {
+ (void) provider(&bpf_ipnet);
+ }
+ }
+}
+
+/*
+ * The list of interfaces available via ipnet is private for each zone,
+ * so the AVL tree of each zone must be searched for a given name, even
+ * if all names are unique.
+ */
+int
+ipnet_open_byname(const char *name, ipnetif_t **ptr, zoneid_t zoneid)
+{
+ ipnet_stack_t *ips;
+ ipnetif_t *ipnetif;
+
+ ASSERT(ptr != NULL);
+ VERIFY((ips = ipnet_find_by_zoneid(zoneid)) != NULL);
+
+ mutex_enter(&ips->ips_avl_lock);
+ ipnetif = avl_find(&ips->ips_avl_by_name, (char *)name, NULL);
+ if (ipnetif != NULL) {
+ ipnetif_refhold(ipnetif);
+ }
+ mutex_exit(&ips->ips_avl_lock);
+
+ *ptr = ipnetif;
+
+ if (ipnetif == NULL)
+ return (ESRCH);
+ return (0);
+}
+
+void
+ipnet_close_byhandle(ipnetif_t *ifp)
+{
+ ASSERT(ifp != NULL);
+ ipnetif_refrele(ifp);
+}
+
+const char *
+ipnet_name(ipnetif_t *ifp)
+{
+ ASSERT(ifp != NULL);
+ return (ifp->if_name);
+}
+
+/*
+ * To find the linkid for a given name, it is necessary to know which zone
+ * the interface name belongs to and to search the avl tree for that zone
+ * as there is no master list of all interfaces and which zone they belong
+ * to. It is assumed that the caller of this function is somehow already
+ * working with the ipnet interfaces and hence the ips_event_lock is held.
+ * When BPF calls into this function, it is doing so because of an event
+ * in ipnet, and thus ipnet holds the ips_event_lock. Thus the datalink id
+ * value returned has meaning without the need for grabbing a hold on the
+ * owning structure.
+ */
+int
+ipnet_get_linkid_byname(const char *name, uint_t *idp, zoneid_t zoneid)
+{
+ ipnet_stack_t *ips;
+ ipnetif_t *ifp;
+
+ VERIFY((ips = ipnet_find_by_zoneid(zoneid)) != NULL);
+ ASSERT(mutex_owned(&ips->ips_event_lock));
+
+ mutex_enter(&ips->ips_avl_lock);
+ ifp = avl_find(&ips->ips_avl_by_name, (void *)name, NULL);
+ if (ifp != NULL)
+ *idp = (uint_t)ifp->if_index;
+
+ /*
+ * Shared instance zone?
+ */
+ if (netstackid_to_zoneid(zoneid_to_netstackid(zoneid)) != zoneid) {
+ uintptr_t key[2] = { zoneid, (uintptr_t)name };
+
+ ifp = avl_find(&ips->ips_avl_by_shared, (void *)key, NULL);
+ if (ifp != NULL)
+ *idp = (uint_t)ifp->if_index;
+ }
+
+ mutex_exit(&ips->ips_avl_lock);
+
+ if (ifp == NULL)
+ return (ESRCH);
+ return (0);
+}
+
+/*
+ * Strictly speaking, there is no such thing as a "client" in ipnet, like
+ * there is in mac. BPF only needs to have this because it is required as
+ * part of interfacing correctly with mac. The reuse of the original
+ * ipnetif_t as a client poses no danger, so long as it is done with its
+ * own ref-count'd hold that is given up on close.
+ */
+int
+ipnet_client_open(ipnetif_t *ptr, ipnetif_t **result)
+{
+ ASSERT(ptr != NULL);
+ ASSERT(result != NULL);
+ ipnetif_refhold(ptr);
+ *result = ptr;
+
+ return (0);
+}
+
+void
+ipnet_client_close(ipnetif_t *ptr)
+{
+ ASSERT(ptr != NULL);
+ ipnetif_refrele(ptr);
+}
+
+/*
+ * This is called from BPF when it needs to start receiving packets
+ * from ipnet.
+ *
+ * The use of the ipnet_t structure here is somewhat lightweight when
+ * compared to how it is used elsewhere but it already has all of the
+ * right fields in it, so reuse here doesn't seem out of order. Its
+ * primary purpose here is to provide the means to store pointers for
+ * use when ipnet_promisc_remove() needs to be called.
+ *
+ * This should never be called for the IPNET_MINOR_LO device as it is
+ * never created via ipnetif_create.
+ */
+/*ARGSUSED*/
+int
+ipnet_promisc_add(void *handle, uint_t how, void *data, uintptr_t *mhandle,
+ int flags)
+{
+ ip_stack_t *ipst;
+ netstack_t *ns;
+ ipnetif_t *ifp;
+ ipnet_t *ipnet;
+ char name[32];
+ int error;
+
+ ifp = (ipnetif_t *)handle;
+ ns = netstack_find_by_zoneid(ifp->if_zoneid);
+
+ if ((how == DL_PROMISC_PHYS) || (how == DL_PROMISC_MULTI)) {
+ error = ipnet_join_allmulti(ifp, ns->netstack_ipnet);
+ if (error != 0)
+ return (error);
+ } else {
+ return (EINVAL);
+ }
+
+ ipnet = kmem_zalloc(sizeof (*ipnet), KM_SLEEP);
+ ipnet->ipnet_if = ifp;
+ ipnet->ipnet_ns = ns;
+ ipnet->ipnet_flags = flags;
+
+ if ((ifp->if_flags & IPNETIF_LOOPBACK) != 0) {
+ ipnet->ipnet_acceptfn = ipnet_loaccept;
+ } else {
+ ipnet->ipnet_acceptfn = ipnet_accept;
+ }
+
+ /*
+ * To register multiple hooks with the same callback function,
+ * a unique name is needed.
+ */
+ HOOK_INIT(ipnet->ipnet_hook, ipnet_bpf_bounce, "", ipnet);
+ (void) snprintf(name, sizeof (name), "ipnet_promisc_%p",
+ ipnet->ipnet_hook);
+ ipnet->ipnet_hook->h_name = strdup(name);
+ ipnet->ipnet_data = data;
+ ipnet->ipnet_zoneid = ifp->if_zoneid;
+
+ ipst = ns->netstack_ip;
+
+ error = net_hook_register(ipst->ips_ip4_observe_pr, NH_OBSERVE,
+ ipnet->ipnet_hook);
+ if (error != 0)
+ goto regfail;
+
+ error = net_hook_register(ipst->ips_ip6_observe_pr, NH_OBSERVE,
+ ipnet->ipnet_hook);
+ if (error != 0) {
+ (void) net_hook_unregister(ipst->ips_ip4_observe_pr,
+ NH_OBSERVE, ipnet->ipnet_hook);
+ goto regfail;
+ }
+
+ *mhandle = (uintptr_t)ipnet;
+
+ return (0);
+
+regfail:
+ cmn_err(CE_WARN, "net_hook_register failed: %d", error);
+ strfree(ipnet->ipnet_hook->h_name);
+ hook_free(ipnet->ipnet_hook);
+ return (error);
+}
+
+void
+ipnet_promisc_remove(void *data)
+{
+ ip_stack_t *ipst;
+ ipnet_t *ipnet;
+ hook_t *hook;
+
+ ipnet = data;
+ ipst = ipnet->ipnet_ns->netstack_ip;
+ hook = ipnet->ipnet_hook;
+
+ VERIFY(net_hook_unregister(ipst->ips_ip4_observe_pr, NH_OBSERVE,
+ hook) == 0);
+
+ VERIFY(net_hook_unregister(ipst->ips_ip6_observe_pr, NH_OBSERVE,
+ hook) == 0);
+
+ strfree(hook->h_name);
+
+ hook_free(hook);
+
+ kmem_free(ipnet, sizeof (*ipnet));
+}
+
+/*
+ * arg here comes from the ipnet_t allocated in ipnet_promisc_add.
+ * An important field from that structure is "ipnet_data" that
+ * contains the "data" pointer passed into ipnet_promisc_add: it needs
+ * to be passed back to bpf when we call into ipnet_itap.
+ *
+ * ipnet_itap is set by ipnet_set_bpfattach, which in turn is called
+ * from BPF.
+ */
+/*ARGSUSED*/
+static int
+ipnet_bpf_bounce(hook_event_token_t token, hook_data_t info, void *arg)
+{
+ hook_pkt_observe_t *hdr;
+ ipnet_addrp_t src;
+ ipnet_addrp_t dst;
+ ipnet_stack_t *ips;
+ ipnet_t *ipnet;
+ mblk_t *netmp;
+ mblk_t *mp;
+
+ hdr = (hook_pkt_observe_t *)info;
+ mp = hdr->hpo_pkt;
+ ipnet = (ipnet_t *)arg;
+ ips = ((netstack_t *)hdr->hpo_ctx)->netstack_ipnet;
+
+ netmp = hdr->hpo_pkt->b_cont;
+ src.iap_family = hdr->hpo_family;
+ dst.iap_family = hdr->hpo_family;
+
+ if (hdr->hpo_family == AF_INET) {
+ src.iap_addr4 = &((ipha_t *)(netmp->b_rptr))->ipha_src;
+ dst.iap_addr4 = &((ipha_t *)(netmp->b_rptr))->ipha_dst;
+ } else {
+ src.iap_addr6 = &((ip6_t *)(netmp->b_rptr))->ip6_src;
+ dst.iap_addr6 = &((ip6_t *)(netmp->b_rptr))->ip6_dst;
+ }
+
+ if (!(*ipnet->ipnet_acceptfn)(ipnet, hdr, &src, &dst)) {
+ IPSK_BUMP(ips, ik_acceptFail);
+ return (0);
+ }
+ IPSK_BUMP(ips, ik_acceptOk);
+
+ ipnet_itap(ipnet->ipnet_data, mp,
+ hdr->hpo_htype == IPOBS_HOOK_OUTBOUND,
+ ntohs(hdr->hpo_pktlen) + (mp->b_wptr - mp->b_rptr));
+
+ return (0);
+}
+
+/*
+ * clone'd ipnetif_t's are created when a shared IP instance zone comes
+ * to life and configures an IP address. The model that BPF uses is that
+ * each interface must have a unique pointer and each interface must be
+ * representative of what it can capture. They are limited to one DLT
+ * per interface and one zone per interface. Thus every interface that
+ * can be seen in a zone must be announced via an attach to bpf. For
+ * shared instance zones, this means the ipnet driver needs to detect
+ * when an address is added to an interface in a zone for the first
+ * time (and also when the last address is removed.)
+ */
+static ipnetif_t *
+ipnetif_clone_create(ipnetif_t *ifp, zoneid_t zoneid)
+{
+ uintptr_t key[2] = { zoneid, (uintptr_t)ifp->if_name };
+ ipnet_stack_t *ips = ifp->if_stackp;
+ avl_index_t where = 0;
+ ipnetif_t *newif;
+
+ mutex_enter(&ips->ips_avl_lock);
+ newif = avl_find(&ips->ips_avl_by_shared, (void *)key, &where);
+ if (newif != NULL) {
+ ipnetif_refhold(newif);
+ newif->if_sharecnt++;
+ mutex_exit(&ips->ips_avl_lock);
+ return (newif);
+ }
+
+ newif = ipnet_alloc_if(ips);
+ if (newif == NULL) {
+ mutex_exit(&ips->ips_avl_lock);
+ return (NULL);
+ }
+
+ newif->if_refcnt = 1;
+ newif->if_sharecnt = 1;
+ newif->if_zoneid = zoneid;
+ (void) strlcpy(newif->if_name, ifp->if_name, LIFNAMSIZ);
+ newif->if_flags = ifp->if_flags & IPNETIF_LOOPBACK;
+ newif->if_index = ifp->if_index;
+
+ avl_insert(&ips->ips_avl_by_shared, newif, where);
+ mutex_exit(&ips->ips_avl_lock);
+
+ ipnet_bpfattach(newif);
+
+ return (newif);
+}
+
+static void
+ipnetif_clone_release(ipnetif_t *ipnetif)
+{
+ boolean_t dofree = B_FALSE;
+ boolean_t doremove = B_FALSE;
+ ipnet_stack_t *ips = ipnetif->if_stackp;
+
+ mutex_enter(&ipnetif->if_reflock);
+ ASSERT(ipnetif->if_refcnt > 0);
+ if (--ipnetif->if_refcnt == 0)
+ dofree = B_TRUE;
+ ASSERT(ipnetif->if_sharecnt > 0);
+ if (--ipnetif->if_sharecnt == 0)
+ doremove = B_TRUE;
+ mutex_exit(&ipnetif->if_reflock);
+ if (doremove) {
+ mutex_enter(&ips->ips_avl_lock);
+ avl_remove(&ips->ips_avl_by_shared, ipnetif);
+ mutex_exit(&ips->ips_avl_lock);
+ ipnet_bpfdetach(ipnetif);
+ }
+ if (dofree) {
+ ASSERT(ipnetif->if_sharecnt == 0);
+ ipnetif_free(ipnetif);
+ }
+}
+
+/*
+ * Called when BPF loads, the goal is to tell BPF about all of the interfaces
+ * in use by zones that have a shared IP stack. These interfaces are stored
+ * in the ips_avl_by_shared tree. Note that if there are 1000 bge0's in use
+ * as bge0:1 through to bge0:1000, then this would be represented by a single
+ * bge0 on that AVL tree.
+ */
+static void
+ipnet_bpf_probe_shared(ipnet_stack_t *ips)
+{
+ ipnetif_t *next;
+ ipnetif_t *ifp;
+
+ mutex_enter(&ips->ips_avl_lock);
+
+ for (ifp = avl_first(&ips->ips_avl_by_shared); ifp != NULL;
+ ifp = next) {
+ ipnetif_refhold(ifp);
+ mutex_exit(&ips->ips_avl_lock);
+ ipnet_bpfattach(ifp);
+ mutex_enter(&ips->ips_avl_lock);
+ next = avl_walk(&ips->ips_avl_by_shared, ifp, AVL_AFTER);
+ ipnetif_refrele(ifp);
+ }
+ mutex_exit(&ips->ips_avl_lock);
+}
+
+static void
+ipnet_bpf_release_shared(ipnet_stack_t *ips)
+{
+ ipnetif_t *next;
+ ipnetif_t *ifp;
+
+ mutex_enter(&ips->ips_avl_lock);
+
+ for (ifp = avl_first(&ips->ips_avl_by_shared); ifp != NULL;
+ ifp = next) {
+ ipnetif_refhold(ifp);
+ mutex_exit(&ips->ips_avl_lock);
+ ipnet_bpfdetach(ifp);
+ mutex_enter(&ips->ips_avl_lock);
+ next = avl_walk(&ips->ips_avl_by_shared, ifp, AVL_AFTER);
+ ipnetif_refrele(ifp);
+ }
+ mutex_exit(&ips->ips_avl_lock);
+}
diff --git a/usr/src/uts/common/inet/ipnet/ipnet_bpf.c b/usr/src/uts/common/inet/ipnet/ipnet_bpf.c
new file mode 100644
index 0000000000..4c15fe56ed
--- /dev/null
+++ b/usr/src/uts/common/inet/ipnet/ipnet_bpf.c
@@ -0,0 +1,193 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <sys/types.h>
+#include <sys/stream.h>
+#include <net/bpf.h>
+#include <net/bpfdesc.h>
+#include <inet/ipnet.h>
+
+/*
+ * This file implements the function calls for ipnet that translate the
+ * calls from BPF into the correct arguments and functions inside of the
+ * ipnet device.
+ */
+static const char *ipnet_bpf_name(uintptr_t);
+static void ipnet_bpf_client_close(uintptr_t);
+static const char *ipnet_bpf_client_name(uintptr_t);
+static int ipnet_bpf_client_open(uintptr_t, uintptr_t *);
+static void ipnet_bpf_close(uintptr_t);
+static int ipnet_bpf_getlinkid(const char *, datalink_id_t *, zoneid_t);
+static int ipnet_bpf_open(const char *, uintptr_t *, zoneid_t);
+static uintptr_t ipnet_bpf_promisc_add(uintptr_t, int, void *,
+ uintptr_t *, int);
+static void ipnet_bpf_promisc_remove(uintptr_t);
+static void ipnet_bpf_sdu_get(uintptr_t, uint_t *);
+static int ipnet_bpf_tx(uintptr_t, mblk_t *);
+static int ipnet_bpf_type(uintptr_t);
+
+bpf_provider_t bpf_ipnet = {
+ BPR_IPNET,
+ ipnet_bpf_open,
+ ipnet_bpf_close,
+ ipnet_bpf_name,
+ ipnet_bpf_type,
+ ipnet_bpf_sdu_get,
+ ipnet_bpf_tx,
+ ipnet_bpf_promisc_add,
+ ipnet_bpf_promisc_remove,
+ ipnet_bpf_getlinkid,
+ ipnet_bpf_client_close,
+ ipnet_bpf_client_name,
+ ipnet_bpf_client_open
+};
+
+/*ARGSUSED*/
+static int
+ipnet_bpf_open(const char *name, uintptr_t *mhandlep, zoneid_t zoneid)
+{
+ return (ipnet_open_byname(name, (ipnetif_t **)mhandlep, zoneid));
+}
+
+/*ARGSUSED*/
+static void
+ipnet_bpf_close(uintptr_t mhandle)
+{
+ ipnet_close_byhandle((ipnetif_t *)mhandle);
+}
+
+static const char *
+ipnet_bpf_name(uintptr_t mhandle)
+{
+ return (ipnet_name((ipnetif_t *)mhandle));
+}
+
+/*ARGSUSED*/
+static int
+ipnet_bpf_type(uintptr_t mhandle)
+{
+ return (DL_IPNET);
+}
+
+/*ARGSUSED*/
+static void
+ipnet_bpf_sdu_get(uintptr_t mhandle, uint_t *mtup)
+{
+ /*
+ * The choice of 65535 is arbitrary, it could be any smaller number
+ * but it does matche the current default choice of libpcap as the
+ * packet snap size.
+ */
+ *mtup = 65535;
+}
+
+/*ARGSUSED*/
+static int
+ipnet_bpf_tx(uintptr_t chandle, mblk_t *pkt)
+{
+ /*
+ * It is not clear what it would mean to send an ipnet packet,
+ * especially since the ipnet device has been implemented to be
+ * an observation (read-only) instrument. Thus a call to send a
+ * packet using ipnet results in the packet being free'd and an
+ * error returned.
+ */
+ freemsg(pkt);
+
+ return (EBADF);
+}
+
+/*
+ * BPF does not provide the means to select which SAP is being sniffed,
+ * so for the purpose of ipnet, all BPF clients are in SAP promiscuous
+ * mode.
+ */
+static uintptr_t
+ipnet_bpf_promisc_add(uintptr_t chandle, int how, void *arg,
+ uintptr_t *promisc, int flags)
+{
+ int newhow;
+
+ /*
+ * Map the mac values into ipnet values.
+ */
+ switch (how) {
+ case MAC_CLIENT_PROMISC_ALL :
+ newhow = DL_PROMISC_PHYS;
+ flags = IPNET_PROMISC_PHYS|IPNET_PROMISC_SAP;
+ break;
+ case MAC_CLIENT_PROMISC_MULTI :
+ newhow = DL_PROMISC_MULTI;
+ flags = IPNET_PROMISC_MULTI|IPNET_PROMISC_SAP;
+ break;
+ default :
+ newhow = 0;
+ break;
+ }
+
+ return (ipnet_promisc_add((void *)chandle, newhow,
+ arg, promisc, flags));
+}
+
+static void
+ipnet_bpf_promisc_remove(uintptr_t phandle)
+{
+ ipnet_promisc_remove((void *)phandle);
+}
+
+static int
+ipnet_bpf_client_open(uintptr_t mhandle, uintptr_t *chandlep)
+{
+
+ return (ipnet_client_open((ipnetif_t *)mhandle,
+ (ipnetif_t **)chandlep));
+}
+
+/*ARGSUSED*/
+static void
+ipnet_bpf_client_close(uintptr_t chandle)
+{
+ ipnet_client_close((ipnetif_t *)chandle);
+}
+
+static const char *
+ipnet_bpf_client_name(uintptr_t chandle)
+{
+ return (ipnet_bpf_name(chandle));
+}
+
+static int
+ipnet_bpf_getlinkid(const char *name, datalink_id_t *idp, zoneid_t zoneid)
+{
+ uint_t index;
+ int error;
+
+ index = 0;
+ error = ipnet_get_linkid_byname(name, &index, zoneid);
+ if (error == 0)
+ *idp = (datalink_id_t)index;
+ return (error);
+}
diff --git a/usr/src/uts/common/inet/sockmods/netpacket/Makefile b/usr/src/uts/common/inet/sockmods/netpacket/Makefile
new file mode 100644
index 0000000000..0194aec2bf
--- /dev/null
+++ b/usr/src/uts/common/inet/sockmods/netpacket/Makefile
@@ -0,0 +1,47 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# uts/common/inet/sockmods/netpacket/Makefile
+#
+# include global definitions
+include ../../../../../Makefile.master
+
+HDRS= packet.h
+
+ROOTDIRS= $(ROOT)/usr/include/netpacket
+
+ROOTHDRS= $(HDRS:%=$(ROOT)/usr/include/netpacket/%)
+
+$(ROOTDIRS)/%: %
+ $(INS.file)
+
+.KEEP_STATE:
+
+install_h: $(ROOTDIRS) $(ROOTHDRS)
+
+$(ROOTDIRS):
+ $(INS.dir)
+
+check: $(CHECKHDRS)
diff --git a/usr/src/uts/common/inet/sockmods/netpacket/packet.h b/usr/src/uts/common/inet/sockmods/netpacket/packet.h
new file mode 100644
index 0000000000..4d00ca9c60
--- /dev/null
+++ b/usr/src/uts/common/inet/sockmods/netpacket/packet.h
@@ -0,0 +1,203 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _PACKET_H
+#define _PACKET_H
+
+#include <sys/socket_impl.h>
+#include <net/bpf.h>
+
+/*
+ * With which we do the reverse of what it libpcap does....
+ */
+#define PACKET_OUTGOING LINUX_SLL_OUTGOING
+#define PACKET_HOST LINUX_SLL_HOST
+#define PACKET_BROADCAST LINUX_SLL_BROADCAST
+#define PACKET_MULTICAST LINUX_SLL_MULTICAST
+#define PACKET_OTHERHOST LINUX_SLL_OTHERHOST
+
+#define PACKET_STATISTICS 1
+#define PACKET_ADD_MEMBERSHIP 2
+#define PACKET_DROP_MEMBERSHIP 3
+#define PACKET_AUXDATA 4
+
+
+struct packet_mreq {
+ uint32_t mr_ifindex;
+ uint16_t mr_type;
+ uint16_t mr_alen;
+ uint8_t mr_address[8];
+};
+
+#define PACKET_MR_MULTICAST 1
+#define PACKET_MR_PROMISC 2
+#define PACKET_MR_ALLMULTI 3
+
+typedef enum tpkt_status_e {
+ TP_STATUS_KERNEL,
+ TP_STATUS_USER,
+ TP_STATUS_COPY,
+ TP_STATUS_LOSING,
+ TP_STATUS_CSUMNOTREADY
+} tpkt_status_t;
+
+struct tpacket_auxdata { /* tp_macoff/tp_netoff ?? */
+ tpkt_status_t tp_status;
+ uint32_t tp_len;
+ uint32_t tp_snaplen;
+ uint16_t tp_macoff;
+ uint16_t tp_netoff;
+ uint16_t tp_vlan_vci;
+};
+
+struct tpacket_hdr { /* tp_macoff/tp_netoff ?? */
+ uint64_t tp_status;
+ uint32_t tp_len;
+ uint32_t tp_snaplen;
+ uint16_t tp_macoff;
+ uint16_t tp_netoff;
+ uint32_t tp_sec;
+ uint32_t tp_usec;
+};
+
+struct tpacket2_hdr { /* tp_macoff/tp_netoff ?? */
+ tpkt_status_t tp_status;
+ uint32_t tp_len;
+ uint32_t tp_snaplen;
+ uint16_t tp_macoff;
+ uint16_t tp_netoff;
+ uint32_t tp_sec;
+ uint32_t tp_nsec;
+ uint16_t tp_vlan_tci;
+};
+
+struct tpacket_stats {
+ uint16_t tp_packets;
+ uint16_t tp_drops;
+};
+
+struct sock_filter { /* Fields named from bpf_insn */
+ uint16_t code;
+ uint8_t jt;
+ uint8_t jf;
+ uint32_t k;
+};
+
+struct sock_fprog {
+ uint16_t len;
+ struct sock_filter *filter;
+};
+
+/*
+ * Linux ARPHRD_ symbols needed...
+ *
+ * The numbers above 50000 are because their real value is unknown from
+ * libpcap's source, so a number has been chosen that is unlikely to be
+ * confused with the real one on Linux.
+ */
+#define ARPHRD_ADAPT 50001
+#define ARPHRD_ARCNET 50002
+#define ARPHRD_ATM 19
+#define ARPHRD_AX25 50003
+#define ARPHRD_CHAOS 50004
+#define ARPHRD_CISCO 513
+#define ARPHRD_CSLIP 50005
+#define ARPHRD_CSLIP6 50006
+#define ARPHRD_DLCI 15
+#define ARPHRD_EETHER 50007
+#define ARPHRD_ETHER 50008
+#define ARPHRD_FCAL 785
+#define ARPHRD_FCFABRIC 787
+#define ARPHRD_FCPL 786
+#define ARPHRD_FCPP 784
+#define ARPHRD_FRAD 770
+#define ARPHRD_FDDI 774
+#define ARPHRD_IEEE802 50009
+#define ARPHRD_IEEE802_TR 800
+#define ARPHRD_IEEE80211 801
+#define ARPHRD_IEEE80211_PRISM 802
+#define ARPHRD_IEEE80211_RADIOTAP 803
+#define ARPHRD_IRDA 783
+#define ARPHRD_LAPD 8445
+#define ARPHRD_LOCALTLK 50010
+#define ARPHRD_LOOPBACK 50011
+#define ARPHRD_METRICOM 50012
+#define ARPHRD_PRONET 50013
+#define ARPHRD_PPP 50014
+#define ARPHRD_RAWHDLC 518
+#define ARPHRD_SIT 776
+#define ARPHRD_SLIP6 50015
+#define ARPHRD_SLIP 50016
+#define ARPHRD_TUNNEL 50017
+
+#ifdef _KERNEL
+/*
+ * PFP socket structure.
+ */
+typedef struct pfpsock {
+ struct bpf_program ps_bpf;
+ krwlock_t ps_bpflock;
+ sock_upper_handle_t ps_upper;
+ sock_upcalls_t *ps_upcalls;
+ mac_handle_t ps_mh;
+ mac_client_handle_t ps_mch;
+ mac_promisc_handle_t ps_phd;
+ int ps_type;
+ int ps_proto;
+ uint_t ps_max_sdu;
+ boolean_t ps_bound;
+ mac_client_promisc_type_t ps_promisc;
+ boolean_t ps_auxdata;
+ struct tpacket_stats ps_stats;
+ struct sockaddr ps_sock;
+ datalink_id_t ps_linkid;
+ kmutex_t ps_lock;
+ boolean_t ps_flow_ctrld;
+ ulong_t ps_flow_ctrl_drops;
+} pfpsock_t;
+
+typedef struct pfp_kstats_s {
+ kstat_named_t kp_recv_mac_hdr_fail;
+ kstat_named_t kp_recv_bad_proto;
+ kstat_named_t kp_recv_alloc_fail;
+ kstat_named_t kp_recv_ok;
+ kstat_named_t kp_recv_fail;
+ kstat_named_t kp_recv_filtered;
+ kstat_named_t kp_recv_flow_cntrld;
+ kstat_named_t kp_send_unbound;
+ kstat_named_t kp_send_failed;
+ kstat_named_t kp_send_too_big;
+ kstat_named_t kp_send_alloc_fail;
+ kstat_named_t kp_send_uiomove_fail;
+ kstat_named_t kp_send_no_memory;
+ kstat_named_t kp_send_open_fail;
+ kstat_named_t kp_send_wrong_family;
+ kstat_named_t kp_send_short_msg;
+ kstat_named_t kp_send_ok;
+} pfp_kstats_t;
+#endif /* _KERNEL */
+
+#endif /* _PACKET_H */
diff --git a/usr/src/uts/common/inet/sockmods/sockmod_pfp.c b/usr/src/uts/common/inet/sockmods/sockmod_pfp.c
new file mode 100644
index 0000000000..2dc63cd491
--- /dev/null
+++ b/usr/src/uts/common/inet/sockmods/sockmod_pfp.c
@@ -0,0 +1,1414 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/stropts.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/socket_proto.h>
+#include <sys/sockio.h>
+#include <sys/strsun.h>
+#include <sys/kstat.h>
+#include <sys/modctl.h>
+#include <sys/policy.h>
+#include <sys/priv_const.h>
+#include <sys/tihdr.h>
+#include <sys/zone.h>
+#include <sys/time.h>
+#include <fs/sockfs/sockcommon.h>
+#include <net/if.h>
+
+#include <sys/dls.h>
+#include <sys/mac.h>
+#include <sys/mac_client.h>
+#include <sys/mac_provider.h>
+#include <sys/mac_client_priv.h>
+
+#include <netpacket/packet.h>
+
+static void pfp_close(mac_handle_t, mac_client_handle_t);
+static int pfp_dl_to_arphrd(int);
+static int pfp_getpacket_sockopt(sock_lower_handle_t, int, void *,
+ socklen_t *);
+static int pfp_ifreq_getlinkid(intptr_t, struct ifreq *, datalink_id_t *);
+static int pfp_lifreq_getlinkid(intptr_t, struct lifreq *, datalink_id_t *);
+static int pfp_open_index(int, mac_handle_t *, mac_client_handle_t *,
+ cred_t *);
+static void pfp_packet(void *, mac_resource_handle_t, mblk_t *, boolean_t);
+static void pfp_release_bpf(struct pfpsock *);
+static int pfp_set_promisc(struct pfpsock *, mac_client_promisc_type_t);
+static int pfp_setsocket_sockopt(sock_lower_handle_t, int, const void *,
+ socklen_t);
+static int pfp_setpacket_sockopt(sock_lower_handle_t, int, const void *,
+ socklen_t);
+
+/*
+ * PFP sockfs operations
+ * Most are currently no-ops because they have no meaning for a connectionless
+ * socket.
+ */
+static void sdpfp_activate(sock_lower_handle_t, sock_upper_handle_t,
+ sock_upcalls_t *, int, struct cred *);
+static int sdpfp_bind(sock_lower_handle_t, struct sockaddr *, socklen_t,
+ struct cred *);
+static int sdpfp_close(sock_lower_handle_t, int, struct cred *);
+static void sdpfp_clr_flowctrl(sock_lower_handle_t);
+static int sdpfp_getsockopt(sock_lower_handle_t, int, int, void *,
+ socklen_t *, struct cred *);
+static int sdpfp_ioctl(sock_lower_handle_t, int, intptr_t, int, int32_t *,
+ struct cred *);
+static int sdpfp_senduio(sock_lower_handle_t, struct uio *, struct nmsghdr *,
+ struct cred *);
+static int sdpfp_setsockopt(sock_lower_handle_t, int, int, const void *,
+ socklen_t, struct cred *);
+
+static sock_lower_handle_t sockpfp_create(int, int, int, sock_downcalls_t **,
+ uint_t *, int *, int, cred_t *);
+
+static int sockpfp_init(void);
+static void sockpfp_fini(void);
+
+static kstat_t *pfp_ksp;
+static pfp_kstats_t ks_stats;
+static pfp_kstats_t pfp_kstats = {
+ /*
+ * Each one of these kstats is a different return path in handling
+ * a packet received from the mac layer.
+ */
+ { "recvMacHeaderFail", KSTAT_DATA_UINT64 },
+ { "recvBadProtocol", KSTAT_DATA_UINT64 },
+ { "recvAllocbFail", KSTAT_DATA_UINT64 },
+ { "recvOk", KSTAT_DATA_UINT64 },
+ { "recvFail", KSTAT_DATA_UINT64 },
+ { "recvFiltered", KSTAT_DATA_UINT64 },
+ { "recvFlowControl", KSTAT_DATA_UINT64 },
+ /*
+ * A global set of counters is maintained to track the behaviour
+ * of the system (kernel & applications) in sending packets.
+ */
+ { "sendUnbound", KSTAT_DATA_UINT64 },
+ { "sendFailed", KSTAT_DATA_UINT64 },
+ { "sendTooBig", KSTAT_DATA_UINT64 },
+ { "sendAllocFail", KSTAT_DATA_UINT64 },
+ { "sendUiomoveFail", KSTAT_DATA_UINT64 },
+ { "sendNoMemory", KSTAT_DATA_UINT64 },
+ { "sendOpenFail", KSTAT_DATA_UINT64 },
+ { "sendWrongFamily", KSTAT_DATA_UINT64 },
+ { "sendShortMsg", KSTAT_DATA_UINT64 },
+ { "sendOk", KSTAT_DATA_UINT64 }
+};
+
+sock_downcalls_t pfp_downcalls = {
+ sdpfp_activate,
+ sock_accept_notsupp,
+ sdpfp_bind,
+ sock_listen_notsupp,
+ sock_connect_notsupp,
+ sock_getpeername_notsupp,
+ sock_getsockname_notsupp,
+ sdpfp_getsockopt,
+ sdpfp_setsockopt,
+ sock_send_notsupp,
+ sdpfp_senduio,
+ NULL,
+ sock_poll_notsupp,
+ sock_shutdown_notsupp,
+ sdpfp_clr_flowctrl,
+ sdpfp_ioctl,
+ sdpfp_close,
+};
+
+static smod_reg_t sinfo = {
+ SOCKMOD_VERSION,
+ "sockpfp",
+ SOCK_UC_VERSION,
+ SOCK_DC_VERSION,
+ sockpfp_create,
+ NULL
+};
+
+/*
+ * Module linkage information for the kernel.
+ */
+static struct modlsockmod modlsockmod = {
+ &mod_sockmodops, "PF Packet socket module", &sinfo
+};
+
+static struct modlinkage modlinkage = {
+ MODREV_1,
+ &modlsockmod,
+ NULL
+};
+
+int
+_init(void)
+{
+ int error;
+
+ error = sockpfp_init();
+ if (error != 0)
+ return (error);
+
+ error = mod_install(&modlinkage);
+ if (error != 0)
+ sockpfp_fini();
+
+ return (error);
+}
+
+int
+_fini(void)
+{
+ int error;
+
+ error = mod_remove(&modlinkage);
+ if (error == 0)
+ sockpfp_fini();
+
+ return (error);
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&modlinkage, modinfop));
+}
+
+/*
+ * sockpfp_init: called as part of the initialisation of the module when
+ * loaded into the kernel.
+ *
+ * Being able to create and record the kstats data in the kernel is not
+ * considered to be vital to the operation of this kernel module, thus
+ * its failure is tolerated.
+ */
+static int
+sockpfp_init(void)
+{
+ (void) memset(&ks_stats, 0, sizeof (ks_stats));
+
+ (void) memcpy(&ks_stats, &pfp_kstats, sizeof (pfp_kstats));
+
+ pfp_ksp = kstat_create("pfpacket", 0, "global", "misc",
+ KSTAT_TYPE_NAMED, sizeof (pfp_kstats) / sizeof (kstat_named_t),
+ KSTAT_FLAG_VIRTUAL);
+ if (pfp_ksp != NULL) {
+ pfp_ksp->ks_data = &ks_stats;
+ kstat_install(pfp_ksp);
+ }
+
+ return (0);
+}
+
+/*
+ * sockpfp_fini: called when the operating system wants to unload the
+ * socket module from the kernel.
+ */
+static void
+sockpfp_fini(void)
+{
+ if (pfp_ksp != NULL)
+ kstat_delete(pfp_ksp);
+}
+
+/*
+ * Due to sockets being created read-write by default, all PF_PACKET sockets
+ * therefore require the NET_RAWACCESS priviliege, even if the socket is only
+ * being used for reading packets from.
+ *
+ * This create function enforces this module only being used with PF_PACKET
+ * sockets and the policy that we support via the sock2path.conf file:
+ * PF_PACKET sockets must be either SOCK_DGRAM or SOCK_RAW.
+ */
+/* ARGSUSED */
+static sock_lower_handle_t
+sockpfp_create(int family, int type, int proto,
+ sock_downcalls_t **sock_downcalls, uint_t *smodep, int *errorp,
+ int sflags, cred_t *cred)
+{
+ struct pfpsock *ps;
+ int kmflags;
+
+ if (secpolicy_net_rawaccess(cred) != 0) {
+ *errorp = EACCES;
+ return (NULL);
+ }
+
+ if (family != AF_PACKET) {
+ *errorp = EAFNOSUPPORT;
+ return (NULL);
+ }
+
+ if ((type != SOCK_RAW) && (type != SOCK_DGRAM)) {
+ *errorp = ESOCKTNOSUPPORT;
+ return (NULL);
+ }
+
+ kmflags = (sflags & SOCKET_NOSLEEP) ? KM_NOSLEEP : KM_SLEEP;
+ ps = kmem_zalloc(sizeof (*ps), kmflags);
+ if (ps == NULL) {
+ *errorp = ENOMEM;
+ return (NULL);
+ }
+
+ ps->ps_type = type;
+ ps->ps_proto = proto;
+ rw_init(&ps->ps_bpflock, NULL, RW_DRIVER, NULL);
+ mutex_init(&ps->ps_lock, NULL, MUTEX_DRIVER, NULL);
+
+ *sock_downcalls = &pfp_downcalls;
+ /*
+ * Setting this causes bytes from a packet that do not fit into the
+ * destination user buffer to be discarded. Thus the API is one
+ * packet per receive and callers are required to use a buffer large
+ * enough for the biggest packet that the interface can provide.
+ */
+ *smodep = SM_ATOMIC;
+
+ return ((sock_lower_handle_t)ps);
+}
+
+/* ************************************************************************* */
+
+/*
+ * pfp_packet is the callback function that is given to the mac layer for
+ * PF_PACKET to receive packets with. One packet at a time is passed into
+ * this function from the mac layer. Each packet is a private copy given
+ * to PF_PACKET to modify or free as it wishes and does not harm the original
+ * packet from which it was cloned.
+ */
+/* ARGSUSED */
+static void
+pfp_packet(void *arg, mac_resource_handle_t mrh, mblk_t *mp, boolean_t flag)
+{
+ struct T_unitdata_ind *tunit;
+ struct sockaddr_ll *sll;
+ struct sockaddr_ll *sol;
+ mac_header_info_t hdr;
+ struct pfpsock *ps;
+ size_t tusz;
+ mblk_t *mp0;
+ int error;
+
+ if (mp == NULL)
+ return;
+
+ ps = arg;
+ if (ps->ps_flow_ctrld) {
+ ps->ps_flow_ctrl_drops++;
+ ps->ps_stats.tp_drops++;
+ ks_stats.kp_recv_flow_cntrld.value.ui64++;
+ freemsg(mp);
+ return;
+ }
+
+ if (mac_header_info(ps->ps_mh, mp, &hdr) != 0) {
+ /*
+ * Can't decode the packet header information so drop it.
+ */
+ ps->ps_stats.tp_drops++;
+ ks_stats.kp_recv_mac_hdr_fail.value.ui64++;
+ freemsg(mp);
+ return;
+ }
+
+ if (mac_type(ps->ps_mh) == DL_ETHER &&
+ hdr.mhi_bindsap == ETHERTYPE_VLAN) {
+ struct ether_vlan_header *evhp;
+ struct ether_vlan_header evh;
+
+ hdr.mhi_hdrsize = sizeof (struct ether_vlan_header);
+ hdr.mhi_istagged = B_TRUE;
+
+ if (MBLKL(mp) >= sizeof (*evhp)) {
+ evhp = (struct ether_vlan_header *)mp->b_rptr;
+ } else {
+ int sz = sizeof (*evhp);
+ char *s = (char *)&evh;
+ mblk_t *tmp;
+ int len;
+
+ for (tmp = mp; sz > 0 && tmp != NULL;
+ tmp = tmp->b_cont) {
+ len = min(sz, MBLKL(tmp));
+ bcopy(tmp->b_rptr, s, len);
+ sz -= len;
+ }
+ evhp = &evh;
+ }
+ hdr.mhi_tci = ntohs(evhp->ether_tci);
+ hdr.mhi_bindsap = ntohs(evhp->ether_type);
+ }
+
+ if ((ps->ps_proto != 0) && (ps->ps_proto != hdr.mhi_bindsap)) {
+ /*
+ * The packet is not of interest to this socket so
+ * drop it on the floor. Here the SAP is being used
+ * as a very course filter.
+ */
+ ps->ps_stats.tp_drops++;
+ ks_stats.kp_recv_bad_proto.value.ui64++;
+ freemsg(mp);
+ return;
+ }
+
+ /*
+ * This field is not often set, even for ethernet,
+ * by mac_header_info, so compute it if it is 0.
+ */
+ if (hdr.mhi_pktsize == 0)
+ hdr.mhi_pktsize = msgdsize(mp);
+
+ /*
+ * If a BPF filter is present, pass the raw packet into that.
+ * A failed match will result in zero being returned, indicating
+ * that this socket is not interested in the packet.
+ */
+ if (ps->ps_bpf.bf_len != 0) {
+ uchar_t *buffer;
+ int buflen;
+
+ buflen = MBLKL(mp);
+ if (hdr.mhi_pktsize == buflen) {
+ buffer = mp->b_rptr;
+ } else {
+ buflen = 0;
+ buffer = (uchar_t *)mp;
+ }
+ rw_enter(&ps->ps_bpflock, RW_READER);
+ if (bpf_filter(ps->ps_bpf.bf_insns, buffer,
+ hdr.mhi_pktsize, buflen) == 0) {
+ rw_exit(&ps->ps_bpflock);
+ ps->ps_stats.tp_drops++;
+ ks_stats.kp_recv_filtered.value.ui64++;
+ freemsg(mp);
+ return;
+ }
+ rw_exit(&ps->ps_bpflock);
+ }
+
+ if (ps->ps_type == SOCK_DGRAM) {
+ /*
+ * SOCK_DGRAM socket expect a "layer 3" packet, so advance
+ * past the link layer header.
+ */
+ mp->b_rptr += hdr.mhi_hdrsize;
+ hdr.mhi_pktsize -= hdr.mhi_hdrsize;
+ }
+
+ tusz = sizeof (struct T_unitdata_ind) + sizeof (struct sockaddr_ll);
+ if (ps->ps_auxdata) {
+ tusz += _TPI_ALIGN_TOPT(sizeof (struct tpacket_auxdata));
+ tusz += _TPI_ALIGN_TOPT(sizeof (struct T_opthdr));
+ }
+
+ /*
+ * It is tempting to think that this could be optimised by having
+ * the base mblk_t allocated and hung off the pfpsock structure,
+ * except that then another one would need to be allocated for the
+ * sockaddr_ll that is included. Even creating a template to copy
+ * from is of questionable value, as read-write from one structure
+ * to the other is going to be slower than all of the initialisation.
+ */
+ mp0 = allocb(tusz, BPRI_HI);
+ if (mp0 == NULL) {
+ ps->ps_stats.tp_drops++;
+ ks_stats.kp_recv_alloc_fail.value.ui64++;
+ freemsg(mp);
+ return;
+ }
+
+ (void) memset(mp0->b_rptr, 0, tusz);
+
+ mp0->b_datap->db_type = M_PROTO;
+ mp0->b_wptr = mp0->b_rptr + tusz;
+
+ tunit = (struct T_unitdata_ind *)mp0->b_rptr;
+ tunit->PRIM_type = T_UNITDATA_IND;
+ tunit->SRC_length = sizeof (struct sockaddr);
+ tunit->SRC_offset = sizeof (*tunit);
+
+ sol = (struct sockaddr_ll *)&ps->ps_sock;
+ sll = (struct sockaddr_ll *)(mp0->b_rptr + sizeof (*tunit));
+ sll->sll_ifindex = sol->sll_ifindex;
+ sll->sll_hatype = (uint16_t)hdr.mhi_origsap;
+ sll->sll_halen = sol->sll_halen;
+ if (hdr.mhi_saddr != NULL)
+ (void) memcpy(sll->sll_addr, hdr.mhi_saddr, sll->sll_halen);
+
+ switch (hdr.mhi_dsttype) {
+ case MAC_ADDRTYPE_MULTICAST :
+ sll->sll_pkttype = PACKET_MULTICAST;
+ break;
+ case MAC_ADDRTYPE_BROADCAST :
+ sll->sll_pkttype = PACKET_BROADCAST;
+ break;
+ case MAC_ADDRTYPE_UNICAST :
+ if (memcmp(sol->sll_addr, hdr.mhi_daddr, sol->sll_halen) == 0)
+ sll->sll_pkttype = PACKET_HOST;
+ else
+ sll->sll_pkttype = PACKET_OTHERHOST;
+ break;
+ }
+
+ if (ps->ps_auxdata) {
+ struct tpacket_auxdata *aux;
+ struct T_opthdr *topt;
+
+ tunit->OPT_offset = _TPI_ALIGN_TOPT(tunit->SRC_offset +
+ sizeof (struct sockaddr_ll));
+ tunit->OPT_length = _TPI_ALIGN_TOPT(sizeof (struct T_opthdr)) +
+ _TPI_ALIGN_TOPT(sizeof (struct tpacket_auxdata));
+
+ topt = (struct T_opthdr *)(mp0->b_rptr + tunit->OPT_offset);
+ aux = (struct tpacket_auxdata *)
+ ((char *)topt + _TPI_ALIGN_TOPT(sizeof (*topt)));
+
+ topt->len = tunit->OPT_length;
+ topt->level = SOL_PACKET;
+ topt->name = PACKET_AUXDATA;
+ topt->status = 0;
+ /*
+ * libpcap doesn't seem to use any other field,
+ * so it isn't clear how they should be filled in.
+ */
+ aux->tp_vlan_vci = hdr.mhi_tci;
+ }
+
+ linkb(mp0, mp);
+
+ ps->ps_upcalls->su_recv(ps->ps_upper, mp0, hdr.mhi_pktsize, 0,
+ &error, NULL);
+
+ if (error == 0) {
+ ps->ps_stats.tp_packets++;
+ ks_stats.kp_recv_ok.value.ui64++;
+ } else {
+ mutex_enter(&ps->ps_lock);
+ if (error == ENOSPC) {
+ ps->ps_upcalls->su_recv(ps->ps_upper, NULL, 0, 0,
+ &error, NULL);
+ if (error == ENOSPC)
+ ps->ps_flow_ctrld = B_TRUE;
+ }
+ mutex_exit(&ps->ps_lock);
+ ps->ps_stats.tp_drops++;
+ ks_stats.kp_recv_fail.value.ui64++;
+ }
+}
+
+/*
+ * Bind a PF_PACKET socket to a network interface.
+ *
+ * The default operation of this bind() is to place the socket (and thus the
+ * network interface) into promiscuous mode. It is then up to the application
+ * to turn that down by issuing the relevant ioctls, if desired.
+ */
+/* ARGSUSED */
+static int
+sdpfp_bind(sock_lower_handle_t handle, struct sockaddr *addr,
+ socklen_t addrlen, struct cred *cred)
+{
+ struct sockaddr_ll *addr_ll, *sol;
+ mac_client_handle_t mch;
+ struct pfpsock *ps;
+ mac_handle_t mh;
+ int error;
+
+ ps = (struct pfpsock *)handle;
+ if (ps->ps_bound)
+ return (EINVAL);
+
+ addr_ll = (struct sockaddr_ll *)addr;
+
+ error = pfp_open_index(addr_ll->sll_ifindex, &mh, &mch, cred);
+ if (error != 0)
+ return (error);
+ /*
+ * Ensure that each socket is only bound once.
+ */
+ mutex_enter(&ps->ps_lock);
+ if (ps->ps_mh != 0) {
+ mutex_exit(&ps->ps_lock);
+ pfp_close(mh, mch);
+ return (EADDRINUSE);
+ }
+ ps->ps_mh = mh;
+ ps->ps_mch = mch;
+ mutex_exit(&ps->ps_lock);
+
+ /*
+ * Cache all of the information from bind so that it's in an easy
+ * place to get at when packets are received.
+ */
+ sol = (struct sockaddr_ll *)&ps->ps_sock;
+ sol->sll_family = AF_PACKET;
+ sol->sll_ifindex = addr_ll->sll_ifindex;
+ sol->sll_protocol = addr_ll->sll_protocol;
+ sol->sll_halen = mac_addr_len(ps->ps_mh);
+ mac_unicast_primary_get(ps->ps_mh, sol->sll_addr);
+ mac_sdu_get(ps->ps_mh, NULL, &ps->ps_max_sdu);
+ ps->ps_linkid = addr_ll->sll_ifindex;
+
+ error = mac_promisc_add(ps->ps_mch, MAC_CLIENT_PROMISC_ALL,
+ pfp_packet, ps, &ps->ps_phd, MAC_PROMISC_FLAGS_VLAN_TAG_STRIP);
+ if (error == 0) {
+ ps->ps_promisc = MAC_CLIENT_PROMISC_ALL;
+ ps->ps_bound = B_TRUE;
+ }
+
+ return (error);
+}
+
+/* ARGSUSED */
+static void
+sdpfp_activate(sock_lower_handle_t lower, sock_upper_handle_t upper,
+ sock_upcalls_t *upcalls, int flags, cred_t *cred)
+{
+ struct pfpsock *ps;
+
+ ps = (struct pfpsock *)lower;
+ ps->ps_upper = upper;
+ ps->ps_upcalls = upcalls;
+}
+
+/*
+ * This module only implements getting socket options for the new socket
+ * option level (SOL_PACKET) that it introduces. All other requests are
+ * passed back to the sockfs layer.
+ */
+/* ARGSUSED */
+static int
+sdpfp_getsockopt(sock_lower_handle_t handle, int level, int option_name,
+ void *optval, socklen_t *optlenp, struct cred *cred)
+{
+ int error = 0;
+
+ switch (level) {
+ case SOL_PACKET :
+ error = pfp_getpacket_sockopt(handle, option_name, optval,
+ optlenp);
+ break;
+ default :
+ /*
+ * If sockfs code receives this error in return from the
+ * getsockopt downcall it handles the option locally, if
+ * it can. This implements SO_RCVBUF, etc.
+ */
+ error = ENOPROTOOPT;
+ break;
+ }
+
+ return (error);
+}
+
+/*
+ * PF_PACKET supports setting socket options at only two levels:
+ * SOL_SOCKET and SOL_PACKET.
+ */
+/* ARGSUSED */
+static int
+sdpfp_setsockopt(sock_lower_handle_t handle, int level, int option_name,
+ const void *optval, socklen_t optlen, struct cred *cred)
+{
+ int error = 0;
+
+ switch (level) {
+ case SOL_SOCKET :
+ error = pfp_setsocket_sockopt(handle, option_name, optval,
+ optlen);
+ break;
+ case SOL_PACKET :
+ error = pfp_setpacket_sockopt(handle, option_name, optval,
+ optlen);
+ break;
+ default :
+ error = EINVAL;
+ break;
+ }
+
+ return (error);
+}
+
+/*
+ * This function is incredibly inefficient for sending any packet that
+ * comes with a msghdr asking to be sent to an interface to which the
+ * socket has not been bound. Some possibilities here are keeping a
+ * cache of all open mac's and mac_client's, for the purpose of sending,
+ * and closing them after some amount of inactivity. Clearly, applications
+ * should not be written to use one socket for multiple interfaces if
+ * performance is desired with the code as is.
+ */
+/* ARGSUSED */
+static int
+sdpfp_senduio(sock_lower_handle_t handle, struct uio *uiop,
+ struct nmsghdr *msg, struct cred *cred)
+{
+ struct sockaddr_ll *sol;
+ mac_client_handle_t mch;
+ struct pfpsock *ps;
+ boolean_t new_open;
+ mac_handle_t mh;
+ size_t mpsize;
+ uint_t maxsdu;
+ mblk_t *mp0;
+ mblk_t *mp;
+ int error;
+
+ mp = NULL;
+ mp0 = NULL;
+ new_open = B_FALSE;
+ ps = (struct pfpsock *)handle;
+ mh = ps->ps_mh;
+ mch = ps->ps_mch;
+ maxsdu = ps->ps_max_sdu;
+
+ sol = (struct sockaddr_ll *)msg->msg_name;
+ if (sol == NULL) {
+ /*
+ * If no sockaddr_ll has been provided with the send call,
+ * use the one constructed when the socket was bound to an
+ * interface and fail if it hasn't been bound.
+ */
+ if (!ps->ps_bound) {
+ ks_stats.kp_send_unbound.value.ui64++;
+ return (EPROTO);
+ }
+ sol = (struct sockaddr_ll *)&ps->ps_sock;
+ } else {
+ /*
+ * Verify the sockaddr_ll message passed down before using
+ * it to send a packet out with. If it refers to an interface
+ * that has not been bound, it is necessary to open it.
+ */
+ struct sockaddr_ll *sll;
+
+ if (msg->msg_namelen < sizeof (struct sockaddr_ll)) {
+ ks_stats.kp_send_short_msg.value.ui64++;
+ return (EINVAL);
+ }
+
+ if (sol->sll_family != AF_PACKET) {
+ ks_stats.kp_send_wrong_family.value.ui64++;
+ return (EAFNOSUPPORT);
+ }
+
+ sll = (struct sockaddr_ll *)&ps->ps_sock;
+ if (sol->sll_ifindex != sll->sll_ifindex) {
+ error = pfp_open_index(sol->sll_ifindex, &mh, &mch,
+ cred);
+ if (error != 0) {
+ ks_stats.kp_send_open_fail.value.ui64++;
+ return (error);
+ }
+ mac_sdu_get(mh, NULL, &maxsdu);
+ new_open = B_TRUE;
+ }
+ }
+
+ mpsize = uiop->uio_resid;
+ if (mpsize > maxsdu) {
+ ks_stats.kp_send_too_big.value.ui64++;
+ error = EMSGSIZE;
+ goto done;
+ }
+
+ if ((mp = allocb(mpsize, BPRI_HI)) == NULL) {
+ ks_stats.kp_send_alloc_fail.value.ui64++;
+ error = ENOBUFS;
+ goto done;
+ }
+
+ mp->b_wptr = mp->b_rptr + mpsize;
+ error = uiomove(mp->b_rptr, mpsize, UIO_WRITE, uiop);
+ if (error != 0) {
+ ks_stats.kp_send_uiomove_fail.value.ui64++;
+ goto done;
+ }
+
+ if (ps->ps_type == SOCK_DGRAM) {
+ mp0 = mac_header(mh, sol->sll_addr, sol->sll_protocol, mp, 0);
+ if (mp0 == NULL) {
+ ks_stats.kp_send_no_memory.value.ui64++;
+ error = ENOBUFS;
+ goto done;
+ }
+ linkb(mp0, mp);
+ mp = mp0;
+ }
+
+ /*
+ * As this is sending datagrams and no promise is made about
+ * how or if a packet will be sent/delivered, no effort is to
+ * be expended in recovering from a situation where the packet
+ * cannot be sent - it is just dropped.
+ */
+ error = mac_tx(mch, mp, 0, MAC_DROP_ON_NO_DESC, NULL);
+ if (error == 0) {
+ mp = NULL;
+ ks_stats.kp_send_ok.value.ui64++;
+ } else {
+ ks_stats.kp_send_failed.value.ui64++;
+ }
+
+done:
+
+ if (new_open) {
+ ASSERT(mch != ps->ps_mch);
+ ASSERT(mh != ps->ps_mh);
+ pfp_close(mh, mch);
+ }
+ if (mp != NULL)
+ freemsg(mp);
+
+ return (error);
+
+}
+
+/*
+ * There's no use of a lock here, or at the bottom of pfp_packet() where
+ * ps_flow_ctrld is set to true, because in a situation where these two
+ * are racing to set the flag one way or the other, the end result is
+ * going to be ultimately determined by the scheduler anyway - which of
+ * the two threads gets the lock first? In such an operational environment,
+ * we've got packets arriving too fast to be delt with so packets are going
+ * to be dropped. Grabbing a lock just makes the drop more expensive.
+ */
+static void
+sdpfp_clr_flowctrl(sock_lower_handle_t handle)
+{
+ struct pfpsock *ps;
+
+ ps = (struct pfpsock *)handle;
+
+ mutex_enter(&ps->ps_lock);
+ ps->ps_flow_ctrld = B_FALSE;
+ mutex_exit(&ps->ps_lock);
+}
+
+/*
+ * The implementation of this ioctl() handler is intended to function
+ * in the absence of a bind() being made before it is called. Thus the
+ * function calls mac_open() itself to provide a handle
+ * This function is structured like this:
+ * - determine the linkid for the interface being targetted
+ * - open the interface with said linkid
+ * - perform ioctl
+ * - copy results back to caller
+ *
+ * The ioctls that interact with interface flags have been implented below
+ * to assume that the interface is always up and running (IFF_RUNNING) and
+ * to use the state of this socket to determine whether or not the network
+ * interface is in promiscuous mode. Thus an ioctl to get the interface flags
+ * of an interface that has been put in promiscuous mode by another socket
+ * (in the same program or different), will not report that status.
+ */
+/* ARGSUSED */
+static int
+sdpfp_ioctl(sock_lower_handle_t handle, int cmd, intptr_t arg, int mod,
+ int32_t *rval, struct cred *cr)
+{
+#if defined(_SYSCALL32)
+ struct timeval32 tival;
+#else
+ struct timeval tival;
+#endif
+ mac_client_promisc_type_t mtype;
+ datalink_id_t linkid;
+ struct lifreq lifreq;
+ struct ifreq ifreq;
+ struct pfpsock *ps;
+ mac_handle_t mh;
+ timespec_t tv;
+ int error;
+
+ switch (cmd) {
+ /*
+ * ioctls that work on "struct lifreq"
+ */
+ case SIOCSLIFFLAGS :
+ case SIOCGLIFINDEX :
+ case SIOCGLIFFLAGS :
+ case SIOCGLIFMTU :
+ error = pfp_lifreq_getlinkid(arg, &lifreq, &linkid);
+ if (error != 0)
+ return (error);
+ break;
+
+ /*
+ * ioctls that work on "struct ifreq".
+ * Not all of these have a "struct lifreq" partner, for example
+ * SIOCGIFHWADDR, for the simple reason that the logical interface
+ * does not have a hardware address.
+ */
+ case SIOCSIFFLAGS :
+ case SIOCGIFINDEX :
+ case SIOCGIFFLAGS :
+ case SIOCGIFMTU :
+ case SIOCGIFHWADDR :
+ error = pfp_ifreq_getlinkid(arg, &ifreq, &linkid);
+ if (error != 0)
+ return (error);
+ break;
+ }
+
+ error = mac_open_by_linkid(linkid, &mh);
+ if (error != 0)
+ return (error);
+
+ ps = (struct pfpsock *)handle;
+
+ switch (cmd) {
+ case SIOCGLIFINDEX :
+ lifreq.lifr_index = linkid;
+ break;
+
+ case SIOCGIFINDEX :
+ ifreq.ifr_index = linkid;
+ break;
+
+ case SIOCGIFFLAGS :
+ ifreq.ifr_flags = IFF_RUNNING;
+ if (ps->ps_promisc == MAC_CLIENT_PROMISC_ALL)
+ ifreq.ifr_flags |= IFF_PROMISC;
+ break;
+
+ case SIOCGLIFFLAGS :
+ lifreq.lifr_flags = IFF_RUNNING;
+ if (ps->ps_promisc == MAC_CLIENT_PROMISC_ALL)
+ lifreq.lifr_flags |= IFF_PROMISC;
+ break;
+
+ case SIOCSIFFLAGS :
+ if (linkid != ps->ps_linkid) {
+ error = EINVAL;
+ } else {
+ if ((ifreq.ifr_flags & IFF_PROMISC) != 0)
+ mtype = MAC_CLIENT_PROMISC_ALL;
+ else
+ mtype = MAC_CLIENT_PROMISC_FILTERED;
+ error = pfp_set_promisc(ps, mtype);
+ }
+ break;
+
+ case SIOCSLIFFLAGS :
+ if (linkid != ps->ps_linkid) {
+ error = EINVAL;
+ } else {
+ if ((lifreq.lifr_flags & IFF_PROMISC) != 0)
+ mtype = MAC_CLIENT_PROMISC_ALL;
+ else
+ mtype = MAC_CLIENT_PROMISC_FILTERED;
+ error = pfp_set_promisc(ps, mtype);
+ }
+ break;
+
+ case SIOCGIFMTU :
+ mac_sdu_get(mh, NULL, &ifreq.ifr_mtu);
+ break;
+
+ case SIOCGLIFMTU :
+ mac_sdu_get(mh, NULL, &lifreq.lifr_mtu);
+ break;
+
+ case SIOCGIFHWADDR :
+ mac_unicast_primary_get(mh, (uint8_t *)ifreq.ifr_addr.sa_data);
+ ifreq.ifr_addr.sa_family = pfp_dl_to_arphrd(mac_type(mh));
+ break;
+
+ case SIOCGSTAMP :
+ (void) gethrestime(&tv);
+ tival.tv_sec = (time_t)tv.tv_sec;
+ tival.tv_usec = tv.tv_nsec / 1000;
+ error = ddi_copyout(&tival, (void *)arg, sizeof (tival), 0);
+ break;
+
+ default :
+ break;
+ }
+
+ mac_close(mh);
+
+ if (error == 0) {
+ /*
+ * Only the "GET" ioctls need to copy data back to userace.
+ */
+ switch (cmd) {
+ case SIOCGLIFINDEX :
+ case SIOCGLIFFLAGS :
+ case SIOCGLIFMTU :
+ error = ddi_copyout(&lifreq, (void *)arg,
+ sizeof (lifreq), 0);
+ break;
+
+ case SIOCGIFINDEX :
+ case SIOCGIFFLAGS :
+ case SIOCGIFMTU :
+ case SIOCGIFHWADDR :
+ error = ddi_copyout(&ifreq, (void *)arg,
+ sizeof (ifreq), 0);
+ break;
+ default :
+ break;
+ }
+ }
+
+ return (error);
+}
+
+/*
+ * Closing the socket requires that all open references to network
+ * interfaces be closed.
+ */
+/* ARGSUSED */
+static int
+sdpfp_close(sock_lower_handle_t handle, int flag, struct cred *cr)
+{
+ struct pfpsock *ps = (struct pfpsock *)handle;
+
+ if (ps->ps_phd != 0) {
+ mac_promisc_remove(ps->ps_phd);
+ ps->ps_phd = 0;
+ }
+
+ if (ps->ps_mch != 0) {
+ mac_client_close(ps->ps_mch, 0);
+ ps->ps_mch = 0;
+ }
+
+ if (ps->ps_mh != 0) {
+ mac_close(ps->ps_mh);
+ ps->ps_mh = 0;
+ }
+
+ kmem_free(ps, sizeof (*ps));
+
+ return (0);
+}
+
+/* ************************************************************************* */
+
+/*
+ * Given a pointer (arg) to a "struct ifreq" (potentially in user space),
+ * determine the linkid for the interface name stored in that structure.
+ * name is used as a buffer so that we can ensure a trailing \0 is appended
+ * to the name safely.
+ */
+static int
+pfp_ifreq_getlinkid(intptr_t arg, struct ifreq *ifreqp,
+ datalink_id_t *linkidp)
+{
+ char name[IFNAMSIZ + 1];
+ int error;
+
+ if (ddi_copyin((void *)arg, ifreqp, sizeof (*ifreqp), 0) != 0)
+ return (EFAULT);
+
+ (void) strlcpy(name, ifreqp->ifr_name, sizeof (name));
+
+ error = dls_mgmt_get_linkid(name, linkidp);
+ if (error != 0)
+ error = dls_devnet_macname2linkid(name, linkidp);
+
+ return (error);
+}
+
+/*
+ * Given a pointer (arg) to a "struct lifreq" (potentially in user space),
+ * determine the linkid for the interface name stored in that structure.
+ * name is used as a buffer so that we can ensure a trailing \0 is appended
+ * to the name safely.
+ */
+static int
+pfp_lifreq_getlinkid(intptr_t arg, struct lifreq *lifreqp,
+ datalink_id_t *linkidp)
+{
+ char name[LIFNAMSIZ + 1];
+ int error;
+
+ if (ddi_copyin((void *)arg, lifreqp, sizeof (*lifreqp), 0) != 0)
+ return (EFAULT);
+
+ (void) strlcpy(name, lifreqp->lifr_name, sizeof (name));
+
+ error = dls_mgmt_get_linkid(name, linkidp);
+ if (error != 0)
+ error = dls_devnet_macname2linkid(name, linkidp);
+
+ return (error);
+}
+
+/*
+ * Although there are several new SOL_PACKET options that can be set and
+ * are specific to this implementation of PF_PACKET, the current API does
+ * not support doing a get on them to retrieve accompanying status. Thus
+ * it is only currently possible to use SOL_PACKET with getsockopt to
+ * retrieve statistical information. This remains consistant with the
+ * Linux API at the time of writing.
+ */
+static int
+pfp_getpacket_sockopt(sock_lower_handle_t handle, int option_name,
+ void *optval, socklen_t *optlenp)
+{
+ struct pfpsock *ps;
+ int error = 0;
+
+ ps = (struct pfpsock *)handle;
+
+ switch (option_name) {
+ case PACKET_STATISTICS :
+ if (*optlenp < sizeof (ps->ps_stats)) {
+ error = EINVAL;
+ break;
+ }
+ *optlenp = sizeof (ps->ps_stats);
+ bcopy(&ps->ps_stats, optval, sizeof (ps->ps_stats));
+ break;
+ default :
+ error = EINVAL;
+ break;
+ }
+
+ return (error);
+}
+
+/*
+ * The SOL_PACKET level for socket options supports three options,
+ * PACKET_ADD_MEMBERSHIP, PACKET_DROP_MEMBERSHIP and PACKET_AUXDATA.
+ * This function is responsible for mapping the two socket options
+ * that manage multicast membership into the appropriate internal
+ * function calls to bring the option into effect. Whilst direct
+ * changes to the multicast membership (ADD/DROP) groups is handled
+ * by calls directly into the mac module, changes to the promiscuos
+ * mode are vectored through pfp_set_promisc() so that the logic for
+ * managing the promiscuous mode is in one place.
+ */
+/* ARGSUSED */
+static int
+pfp_setpacket_sockopt(sock_lower_handle_t handle, int option_name,
+ const void *optval, socklen_t optlen)
+{
+ struct packet_mreq mreq;
+ struct pfpsock *ps;
+ int error = 0;
+ int opt;
+
+ ps = (struct pfpsock *)handle;
+ if (!ps->ps_bound)
+ return (EPROTO);
+
+ if ((option_name == PACKET_ADD_MEMBERSHIP) ||
+ (option_name == PACKET_DROP_MEMBERSHIP)) {
+ if (!ps->ps_bound)
+ return (EPROTO);
+ bcopy(optval, &mreq, sizeof (mreq));
+ if (ps->ps_linkid != mreq.mr_ifindex)
+ return (EINVAL);
+
+ if (mreq.mr_alen !=
+ ((struct sockaddr_ll *)&ps->ps_sock)->sll_halen)
+ return (EINVAL);
+ }
+
+ switch (option_name) {
+ case PACKET_ADD_MEMBERSHIP :
+ switch (mreq.mr_type) {
+ case PACKET_MR_MULTICAST :
+ error = mac_multicast_add(ps->ps_mch, mreq.mr_address);
+ break;
+
+ case PACKET_MR_PROMISC :
+ error = pfp_set_promisc(ps, MAC_CLIENT_PROMISC_ALL);
+ break;
+
+ case PACKET_MR_ALLMULTI :
+ error = pfp_set_promisc(ps, MAC_CLIENT_PROMISC_MULTI);
+ break;
+ }
+ break;
+
+ case PACKET_DROP_MEMBERSHIP :
+ switch (mreq.mr_type) {
+ case PACKET_MR_MULTICAST :
+ mac_multicast_remove(ps->ps_mch, mreq.mr_address);
+ break;
+
+ case PACKET_MR_PROMISC :
+ if (ps->ps_promisc != MAC_CLIENT_PROMISC_ALL)
+ return (EINVAL);
+ error = pfp_set_promisc(ps,
+ MAC_CLIENT_PROMISC_FILTERED);
+ break;
+
+ case PACKET_MR_ALLMULTI :
+ if (ps->ps_promisc != MAC_CLIENT_PROMISC_MULTI)
+ return (EINVAL);
+ error = pfp_set_promisc(ps,
+ MAC_CLIENT_PROMISC_FILTERED);
+ break;
+ }
+ break;
+
+ case PACKET_AUXDATA :
+ if (optlen == sizeof (int)) {
+ opt = *(int *)optval;
+ ps->ps_auxdata = (opt != 0);
+ } else {
+ error = EINVAL;
+ }
+ break;
+ default :
+ error = EINVAL;
+ break;
+ }
+
+ return (error);
+}
+
+/*
+ * There are only two special setsockopt's for SOL_SOCKET with PF_PACKET:
+ * SO_ATTACH_FILTER and SO_DETACH_FILTER. All other setsockopt requests
+ * that are for SOL_SOCKET are passed back to the socket layer for its
+ * generic implementation.
+ *
+ * Both of these setsockopt values are candidates for being handled by the
+ * socket layer itself in future, however this requires understanding how
+ * they would interact with all other sockets.
+ */
+static int
+pfp_setsocket_sockopt(sock_lower_handle_t handle, int option_name,
+ const void *optval, socklen_t optlen)
+{
+ struct bpf_program prog;
+ struct bpf_insn *fcode;
+ struct pfpsock *ps;
+ int error = 0;
+ int size;
+
+ ps = (struct pfpsock *)handle;
+
+ switch (option_name) {
+ case SO_ATTACH_FILTER :
+#ifdef _LP64
+ if (optlen == sizeof (struct bpf_program32)) {
+ struct bpf_program32 prog32;
+
+ bcopy(optval, &prog32, sizeof (prog32));
+ prog.bf_len = prog32.bf_len;
+ prog.bf_insns = (void *)(uint64_t)prog32.bf_insns;
+ } else
+#endif
+ if (optlen == sizeof (struct bpf_program)) {
+ bcopy(optval, &prog, sizeof (prog));
+ } else if (optlen != sizeof (struct bpf_program)) {
+ return (EINVAL);
+ }
+
+ size = prog.bf_len * sizeof (*prog.bf_insns);
+ fcode = kmem_alloc(size, KM_SLEEP);
+ if (ddi_copyin(prog.bf_insns, fcode, size, 0) != 0) {
+ kmem_free(fcode, size);
+ return (EFAULT);
+ }
+
+ if (bpf_validate(fcode, (int)prog.bf_len)) {
+ rw_enter(&ps->ps_bpflock, RW_WRITER);
+ pfp_release_bpf(ps);
+ ps->ps_bpf.bf_insns = fcode;
+ ps->ps_bpf.bf_len = size;
+ rw_exit(&ps->ps_bpflock);
+
+ return (0);
+ }
+ kmem_free(fcode, size);
+ error = EINVAL;
+ break;
+
+ case SO_DETACH_FILTER :
+ pfp_release_bpf(ps);
+ break;
+ default :
+ /*
+ * If sockfs code receives this error in return from the
+ * getsockopt downcall it handles the option locally, if
+ * it can. This implements SO_RCVBUF, etc.
+ */
+ error = ENOPROTOOPT;
+ break;
+ }
+
+ return (error);
+}
+
+/*
+ * pfp_open_index is an internal function used to open a MAC device by
+ * its index. Both a mac_handle_t and mac_client_handle_t are acquired
+ * because some of the interfaces provided by the mac layer require either
+ * only the mac_handle_t or both it and mac_handle_t.
+ *
+ * Whilst inside the kernel we can access data structures supporting any
+ * zone, access to interfaces from non-global zones is restricted to those
+ * interfaces (if any) that are exclusively assigned to a zone.
+ */
+static int
+pfp_open_index(int index, mac_handle_t *mhp, mac_client_handle_t *mcip,
+ cred_t *cred)
+{
+ mac_client_handle_t mch;
+ zoneid_t ifzoneid;
+ mac_handle_t mh;
+ zoneid_t zoneid;
+ int error;
+
+ mh = 0;
+ mch = 0;
+ error = mac_open_by_linkid(index, &mh);
+ if (error != 0)
+ goto bad_open;
+
+ error = mac_client_open(mh, &mch, NULL,
+ MAC_OPEN_FLAGS_USE_DATALINK_NAME);
+ if (error != 0)
+ goto bad_open;
+
+ zoneid = crgetzoneid(cred);
+ if (zoneid != GLOBAL_ZONEID) {
+ mac_perim_handle_t perim;
+
+ mac_perim_enter_by_mh(mh, &perim);
+ error = dls_link_getzid(mac_client_name(mch), &ifzoneid);
+ mac_perim_exit(perim);
+ if (error != 0)
+ goto bad_open;
+ if (ifzoneid != zoneid) {
+ error = EACCES;
+ goto bad_open;
+ }
+ }
+
+ *mcip = mch;
+ *mhp = mh;
+
+ return (0);
+bad_open:
+ if (mch != 0)
+ mac_client_close(mch, 0);
+ if (mh != 0)
+ mac_close(mh);
+ return (error);
+}
+
+static void
+pfp_close(mac_handle_t mh, mac_client_handle_t mch)
+{
+ mac_client_close(mch, 0);
+ mac_close(mh);
+}
+
+/*
+ * The purpose of this function is to provide a single place where we free
+ * the loaded BPF program and reset all pointers/counters associated with
+ * it.
+ */
+static void
+pfp_release_bpf(struct pfpsock *ps)
+{
+ if (ps->ps_bpf.bf_len != 0) {
+ kmem_free(ps->ps_bpf.bf_insns, ps->ps_bpf.bf_len);
+ ps->ps_bpf.bf_len = 0;
+ ps->ps_bpf.bf_insns = NULL;
+ }
+}
+
+/*
+ * Set the promiscuous mode of a network interface.
+ * This function only calls the mac layer when there is a change to the
+ * status of a network interface's promiscous mode. Tracking of how many
+ * sockets have the network interface in promiscuous mode, and thus the
+ * control over the physical device's status, is left to the mac layer.
+ */
+static int
+pfp_set_promisc(struct pfpsock *ps, mac_client_promisc_type_t turnon)
+{
+ int error = 0;
+ int flags;
+
+ /*
+ * There are 4 combinations of turnon/ps_promisc.
+ * This if handles 2 (both false, both true) and the if() below
+ * handles the remaining one - when change is required.
+ */
+ if (turnon == ps->ps_promisc)
+ return (error);
+
+ if (ps->ps_phd != 0) {
+ mac_promisc_remove(ps->ps_phd);
+ ps->ps_phd = 0;
+
+ /*
+ * ps_promisc is set here in case the call to mac_promisc_add
+ * fails: leaving it to indicate that the interface is still
+ * in some sort of promiscuous mode is false.
+ */
+ if (ps->ps_promisc != MAC_CLIENT_PROMISC_FILTERED) {
+ ps->ps_promisc = MAC_CLIENT_PROMISC_FILTERED;
+ flags = MAC_PROMISC_FLAGS_NO_PHYS;
+ } else {
+ flags = 0;
+ }
+ flags |= MAC_PROMISC_FLAGS_VLAN_TAG_STRIP;
+ }
+
+ error = mac_promisc_add(ps->ps_mch, turnon, pfp_packet, ps,
+ &ps->ps_phd, flags);
+ if (error == 0)
+ ps->ps_promisc = turnon;
+
+ return (error);
+}
+
+/*
+ * This table maps the MAC types in Solaris to the ARPHRD_* values used
+ * on Linux. This is used with the SIOCGIFHWADDR ioctl.
+ */
+static uint_t arphrd_to_dl[][2] = {
+ { ARPHRD_ETHER, DL_ETHER },
+ { ARPHRD_IEEE80211, DL_WIFI },
+ { 0, 0 }
+};
+
+static int
+pfp_dl_to_arphrd(int dltype)
+{
+ int i;
+
+ for (i = 0; arphrd_to_dl[i][0] != 0; i++)
+ if (arphrd_to_dl[i][1] == dltype)
+ return (arphrd_to_dl[i][0]);
+ return (0);
+}
diff --git a/usr/src/uts/common/inet/tcp/tcp.c b/usr/src/uts/common/inet/tcp/tcp.c
index 178eb5587d..96a762f1e9 100644
--- a/usr/src/uts/common/inet/tcp/tcp.c
+++ b/usr/src/uts/common/inet/tcp/tcp.c
@@ -16897,11 +16897,13 @@ tcp_output(void *arg, mblk_t *mp, void *arg2)
mutex_exit(&tcp->tcp_non_sq_lock);
/* Check to see if this connection wants to be re-fused. */
- if (tcp->tcp_refuse && !ipst->ips_ipobs_enabled) {
- if (tcp->tcp_ipversion == IPV4_VERSION) {
+ if (tcp->tcp_refuse) {
+ if (tcp->tcp_ipversion == IPV4_VERSION &&
+ !ipst->ips_ip4_observe.he_interested) {
tcp_fuse(tcp, (uchar_t *)&tcp->tcp_saved_ipha,
&tcp->tcp_saved_tcph);
- } else {
+ } else if (tcp->tcp_ipversion == IPV6_VERSION &&
+ !ipst->ips_ip6_observe.he_interested) {
tcp_fuse(tcp, (uchar_t *)&tcp->tcp_saved_ip6h,
&tcp->tcp_saved_tcph);
}
@@ -18639,13 +18641,21 @@ tcp_send_data(tcp_t *tcp, queue_t *q, mblk_t *mp)
DTRACE_IP_FASTPATH(mp, ipha, ill, ipha, NULL);
if (mp != NULL) {
- if (ipst->ips_ipobs_enabled) {
+ if (ipst->ips_ip4_observe.he_interested) {
zoneid_t szone;
szone = ip_get_zoneid_v4(ipha->ipha_src, mp,
ipst, ALL_ZONES);
+
+ /*
+ * The IP observability hook expects b_rptr to be
+ * where the IP header starts, so advance past the
+ * link layer header.
+ */
+ mp->b_rptr += ire_fp_mp_len;
ipobs_hook(mp, IPOBS_HOOK_OUTBOUND, szone,
- ALL_ZONES, ill, IPV4_VERSION, ire_fp_mp_len, ipst);
+ ALL_ZONES, ill, ipst);
+ mp->b_rptr -= ire_fp_mp_len;
}
ILL_SEND_TX(ill, ire, connp, mp, 0, NULL);
@@ -20440,7 +20450,10 @@ tcp_multisend_data(tcp_t *tcp, ire_t *ire, const ill_t *ill, mblk_t *md_mp_head,
atomic_add_32(&ire->ire_ipif->ipif_ob_pkt_count, obsegs);
ire->ire_last_used_time = lbolt;
- if (ipst->ips_ipobs_enabled) {
+ if ((tcp->tcp_ipversion == IPV4_VERSION &&
+ ipst->ips_ip4_observe.he_interested) ||
+ (tcp->tcp_ipversion == IPV6_VERSION &&
+ ipst->ips_ip6_observe.he_interested)) {
multidata_t *dlmdp = mmd_getmultidata(md_mp_head);
pdesc_t *dl_pkt;
pdescinfo_t pinfo;
@@ -20453,7 +20466,7 @@ tcp_multisend_data(tcp_t *tcp, ire_t *ire, const ill_t *ill, mblk_t *md_mp_head,
if ((nmp = mmd_transform_link(dl_pkt)) == NULL)
continue;
ipobs_hook(nmp, IPOBS_HOOK_OUTBOUND, szone,
- ALL_ZONES, ill, tcp->tcp_ipversion, 0, ipst);
+ ALL_ZONES, ill, ipst);
freemsg(nmp);
}
}
@@ -20634,13 +20647,17 @@ tcp_lsosend_data(tcp_t *tcp, mblk_t *mp, ire_t *ire, ill_t *ill, const int mss,
DTRACE_IP_FASTPATH(mp, ipha, ill, ipha, NULL);
if (mp != NULL) {
- if (ipst->ips_ipobs_enabled) {
+ if (ipst->ips_ip4_observe.he_interested) {
zoneid_t szone;
szone = ip_get_zoneid_v4(ipha->ipha_src, mp,
ipst, ALL_ZONES);
+ if (ire_fp_mp_len != 0)
+ mp->b_rptr += ire_fp_mp_len;
ipobs_hook(mp, IPOBS_HOOK_OUTBOUND, szone,
- ALL_ZONES, ill, IPV4_VERSION, ire_fp_mp_len, ipst);
+ ALL_ZONES, ill, ipst);
+ if (ire_fp_mp_len != 0)
+ mp->b_rptr -= ire_fp_mp_len;
}
ILL_SEND_TX(ill, ire, tcp->tcp_connp, mp, 0, NULL);
diff --git a/usr/src/uts/common/inet/tcp/tcp_fusion.c b/usr/src/uts/common/inet/tcp/tcp_fusion.c
index 3f056d1b1c..3ee909cc4d 100644
--- a/usr/src/uts/common/inet/tcp/tcp_fusion.c
+++ b/usr/src/uts/common/inet/tcp/tcp_fusion.c
@@ -536,7 +536,10 @@ tcp_fuse_output(tcp_t *tcp, mblk_t *mp, uint32_t send_size)
if (tcp_loopback_needs_ip(tcp, ns) ||
tcp_loopback_needs_ip(peer_tcp, ns) ||
IPP_ENABLED(IPP_LOCAL_OUT|IPP_LOCAL_IN, ipst) ||
- list_head(&ipst->ips_ipobs_cb_list) != NULL) {
+ (tcp->tcp_ipversion == IPV4_VERSION &&
+ ipst->ips_ip4_observe.he_interested) ||
+ (tcp->tcp_ipversion == IPV6_VERSION &&
+ ipst->ips_ip6_observe.he_interested)) {
TCP_STAT(tcps, tcp_fusion_aborted);
tcp->tcp_refuse = B_TRUE;
peer_tcp->tcp_refuse = B_TRUE;
diff --git a/usr/src/uts/common/inet/udp/udp.c b/usr/src/uts/common/inet/udp/udp.c
index 5cdfc0858a..39036439b3 100644
--- a/usr/src/uts/common/inet/udp/udp.c
+++ b/usr/src/uts/common/inet/udp/udp.c
@@ -5454,13 +5454,21 @@ udp_xmit(queue_t *q, mblk_t *mp, ire_t *ire, conn_t *connp, zoneid_t zoneid)
ipst->ips_ipv4firewall_physical_out, NULL, ill, ipha, mp, mp,
ll_multicast, ipst);
DTRACE_PROBE1(ip4__physical__out__end, mblk_t *, mp);
- if (ipst->ips_ipobs_enabled && mp != NULL) {
+ if (ipst->ips_ip4_observe.he_interested && mp != NULL) {
zoneid_t szone;
szone = ip_get_zoneid_v4(ipha->ipha_src, mp,
ipst, ALL_ZONES);
+
+ /*
+ * The IP observability hook expects b_rptr to be
+ * where the IP header starts, so advance past the
+ * link layer header.
+ */
+ mp->b_rptr += ire_fp_mp_len;
ipobs_hook(mp, IPOBS_HOOK_OUTBOUND, szone,
- ALL_ZONES, ill, IPV4_VERSION, ire_fp_mp_len, ipst);
+ ALL_ZONES, ill, ipst);
+ mp->b_rptr -= ire_fp_mp_len;
}
if (mp == NULL)
diff --git a/usr/src/uts/common/io/bpf/BPF.LICENCE b/usr/src/uts/common/io/bpf/BPF.LICENCE
new file mode 100644
index 0000000000..044493c13e
--- /dev/null
+++ b/usr/src/uts/common/io/bpf/BPF.LICENCE
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 1990, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from the Stanford/CMU enet packet filter,
+ * (net/enet.c) distributed as part of 4.3BSD, and code contributed
+ * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
+ * Berkeley Laboratory.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
diff --git a/usr/src/uts/common/io/bpf/bpf.c b/usr/src/uts/common/io/bpf/bpf.c
new file mode 100644
index 0000000000..bdef9bc45d
--- /dev/null
+++ b/usr/src/uts/common/io/bpf/bpf.c
@@ -0,0 +1,2047 @@
+/* $NetBSD: bpf.c,v 1.143 2009/03/11 05:55:22 mrg Exp $ */
+
+/*
+ * Copyright (c) 1990, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from the Stanford/CMU enet packet filter,
+ * (net/enet.c) distributed as part of 4.3BSD, and code contributed
+ * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
+ * Berkeley Laboratory.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)bpf.c 8.4 (Berkeley) 1/9/95
+ * static char rcsid[] =
+ * "Header: bpf.c,v 1.67 96/09/26 22:00:52 leres Exp ";
+ */
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*
+ * The BPF implements the following access controls for zones attempting
+ * to read and write data. Writing of data requires that the net_rawaccess
+ * privilege is held whilst reading data requires either net_rawaccess or
+ * net_observerability.
+ *
+ * | Shared | Exclusive | Global
+ * -----------------------------+--------+------------+------------+
+ * DLT_IPNET in local zone | Read | Read | Read |
+ * -----------------------------+--------+------------+------------+
+ * Raw access to local zone NIC | None | Read/Write | Read/Write |
+ * -----------------------------+--------+------------+------------+
+ * Raw access to all NICs | None | None | Read/Write |
+ * -----------------------------+--------+------------+------------+
+ *
+ * The BPF driver is written as a cloning driver: each call to bpfopen()
+ * allocates a new minor number. This provides BPF with a 1:1 relationship
+ * between open's and close's. There is some amount of "descriptor state"
+ * that is kept per open. Pointers to this data are stored in a hash table
+ * (bpf_hash) that is index'd by the minor device number for each open file.
+ */
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/ioctl.h>
+#include <sys/queue.h>
+#include <sys/filio.h>
+#include <sys/policy.h>
+#include <sys/cmn_err.h>
+#include <sys/uio.h>
+#include <sys/file.h>
+#include <sys/sysmacros.h>
+#include <sys/zone.h>
+
+#include <sys/socket.h>
+#include <sys/errno.h>
+#include <sys/poll.h>
+#include <sys/dlpi.h>
+#include <sys/neti.h>
+
+#include <net/if.h>
+
+#include <net/bpf.h>
+#include <net/bpfdesc.h>
+#include <net/dlt.h>
+
+#include <netinet/in.h>
+#include <sys/mac.h>
+#include <sys/mac_client.h>
+#include <sys/mac_impl.h>
+#include <sys/time_std_impl.h>
+#include <sys/hook.h>
+#include <sys/hook_event.h>
+
+
+#define mtod(_v, _t) (_t)((_v)->b_rptr)
+#define M_LEN(_m) ((_m)->b_wptr - (_m)->b_rptr)
+
+/*
+ * 4096 is too small for FDDI frames. 8192 is too small for gigabit Ethernet
+ * jumbos (circa 9k), ATM, or Intel gig/10gig ethernet jumbos (16k).
+ */
+#define BPF_BUFSIZE (32 * 1024)
+
+typedef void *(*cp_fn_t)(void *, const void *, size_t);
+
+/*
+ * The default read buffer size, and limit for BIOCSBLEN.
+ */
+int bpf_bufsize = BPF_BUFSIZE;
+int bpf_maxbufsize = (16 * 1024 * 1024);
+int bpf_debug = 0;
+mod_hash_t *bpf_hash = NULL;
+
+/*
+ * Use a mutex to avoid a race condition between gathering the stats/peers
+ * and opening/closing the device.
+ */
+static kcondvar_t bpf_dlt_waiter;
+static kmutex_t bpf_mtx;
+static bpf_kstats_t ks_stats;
+static bpf_kstats_t bpf_kstats = {
+ { "readWait", KSTAT_DATA_UINT64 },
+ { "writeOk", KSTAT_DATA_UINT64 },
+ { "writeError", KSTAT_DATA_UINT64 },
+ { "receive", KSTAT_DATA_UINT64 },
+ { "captured", KSTAT_DATA_UINT64 },
+ { "dropped", KSTAT_DATA_UINT64 },
+};
+static kstat_t *bpf_ksp;
+
+/*
+ * bpf_iflist is the list of interfaces; each corresponds to an ifnet
+ * bpf_dtab holds the descriptors, indexed by minor device #
+ */
+TAILQ_HEAD(, bpf_if) bpf_iflist;
+LIST_HEAD(, bpf_d) bpf_list;
+
+static int bpf_allocbufs(struct bpf_d *);
+static void bpf_clear_timeout(struct bpf_d *);
+static void bpf_debug_nic_action(char *, struct bpf_if *);
+static void bpf_deliver(struct bpf_d *, cp_fn_t,
+ void *, uint_t, uint_t, boolean_t);
+static struct bpf_if *
+ bpf_findif(struct bpf_d *, char *, int);
+static void bpf_freed(struct bpf_d *);
+static int bpf_ifname(struct bpf_d *d, char *, int);
+static void *bpf_mcpy(void *, const void *, size_t);
+static void bpf_attachd(struct bpf_d *, struct bpf_if *);
+static void bpf_detachd(struct bpf_d *);
+static int bpf_setif(struct bpf_d *, char *, int);
+static void bpf_timed_out(void *);
+static inline void
+ bpf_wakeup(struct bpf_d *);
+static void catchpacket(struct bpf_d *, uchar_t *, uint_t, uint_t,
+ cp_fn_t, struct timeval *);
+static void reset_d(struct bpf_d *);
+static int bpf_getdltlist(struct bpf_d *, struct bpf_dltlist *);
+static int bpf_setdlt(struct bpf_d *, void *);
+static void bpf_dev_add(struct bpf_d *);
+static struct bpf_d *bpf_dev_find(minor_t);
+static struct bpf_d *bpf_dev_get(minor_t);
+static void bpf_dev_remove(struct bpf_d *);
+
+static int
+bpf_movein(struct uio *uio, int linktype, int mtu, mblk_t **mp)
+{
+ mblk_t *m;
+ int error;
+ int len;
+ int hlen;
+ int align;
+
+ /*
+ * Build a sockaddr based on the data link layer type.
+ * We do this at this level because the ethernet header
+ * is copied directly into the data field of the sockaddr.
+ * In the case of SLIP, there is no header and the packet
+ * is forwarded as is.
+ * Also, we are careful to leave room at the front of the mbuf
+ * for the link level header.
+ */
+ switch (linktype) {
+
+ case DLT_EN10MB:
+ hlen = sizeof (struct ether_header);
+ break;
+
+ case DLT_FDDI:
+ hlen = 16;
+ break;
+
+ case DLT_NULL:
+ hlen = 0;
+ break;
+
+ case DLT_IPOIB:
+ hlen = 44;
+ break;
+
+ default:
+ return (EIO);
+ }
+
+ align = 4 - (hlen & 3);
+
+ len = uio->uio_resid;
+ /*
+ * If there aren't enough bytes for a link level header or the
+ * packet length exceeds the interface mtu, return an error.
+ */
+ if (len < hlen || len - hlen > mtu)
+ return (EMSGSIZE);
+
+ m = allocb(len + align, BPRI_MED);
+ if (m == NULL) {
+ error = ENOBUFS;
+ goto bad;
+ }
+
+ /* Insure the data is properly aligned */
+ if (align > 0)
+ m->b_rptr += align;
+ m->b_wptr = m->b_rptr + len;
+
+ error = uiomove(mtod(m, void *), len, UIO_WRITE, uio);
+ if (error)
+ goto bad;
+ *mp = m;
+ return (0);
+
+bad:
+ if (m != NULL)
+ freemsg(m);
+ return (error);
+}
+
+
+/*
+ * Attach file to the bpf interface, i.e. make d listen on bp.
+ */
+static void
+bpf_attachd(struct bpf_d *d, struct bpf_if *bp)
+{
+ uintptr_t mh = bp->bif_ifp;
+
+ ASSERT(bp != NULL);
+ ASSERT(d->bd_bif == NULL);
+ /*
+ * Point d at bp, and add d to the interface's list of listeners.
+ * Finally, point the driver's bpf cookie at the interface so
+ * it will divert packets to bpf.
+ *
+ * Note: Although this results in what looks like a lock order
+ * reversal (bd_lock is held), the deadlock threat is not present
+ * because the descriptor is not attached to any interface and
+ * therefore there cannot be a packet waiting on bd_lock in
+ * catchpacket.
+ */
+ mutex_enter(&bp->bif_lock);
+ d->bd_bif = bp;
+ LIST_INSERT_HEAD(&bp->bif_dlist, d, bd_next);
+ mutex_exit(&bp->bif_lock);
+
+ if (MBPF_CLIENT_OPEN(&bp->bif_mac, mh, &d->bd_mcip) == 0)
+ (void) MBPF_PROMISC_ADD(&bp->bif_mac, d->bd_mcip, 0, d,
+ &d->bd_promisc_handle, d->bd_promisc_flags);
+}
+
+/*
+ * Detach a file from its interface.
+ */
+static void
+bpf_detachd(struct bpf_d *d)
+{
+ struct bpf_if *bp;
+ uintptr_t mph;
+ uintptr_t mch;
+
+ mch = d->bd_mcip;
+ d->bd_mcip = 0;
+ bp = d->bd_bif;
+ ASSERT(bp != NULL);
+
+ /*
+ * Check if this descriptor had requested promiscuous mode.
+ * If so, turn it off. There's no need to take any action
+ * here, that is done when MBPF_PROMISC_REMOVE is used;
+ * bd_promisc is just a local flag to stop promiscuous mode
+ * from being set more than once.
+ */
+ if (d->bd_promisc)
+ d->bd_promisc = 0;
+
+ /*
+ * Take device out of "promiscuous" mode. Since we were able to
+ * enter "promiscuous" mode, we should be able to turn it off.
+ * Note, this field stores a pointer used to support both
+ * promiscuous and non-promiscuous callbacks for packets.
+ */
+ mph = d->bd_promisc_handle;
+ d->bd_promisc_handle = 0;
+
+ /*
+ * The lock has to be dropped here because mac_promisc_remove may
+ * need to wait for mac_promisc_dispatch, which has called into
+ * bpf and catchpacket is waiting for bd_lock...
+ * i.e mac_promisc_remove() needs to be called with none of the
+ * locks held that are part of the bpf_mtap() call path.
+ */
+ mutex_exit(&d->bd_lock);
+ if (mph != 0)
+ MBPF_PROMISC_REMOVE(&bp->bif_mac, mph);
+
+ if (mch != 0)
+ MBPF_CLIENT_CLOSE(&bp->bif_mac, mch);
+
+ /*
+ * bd_lock needs to stay not held by this function until after
+ * it has finished with bif_lock, otherwise there's a lock order
+ * reversal with bpf_deliver and the system can deadlock.
+ *
+ * Remove d from the interface's descriptor list.
+ */
+ mutex_enter(&bp->bif_lock);
+ LIST_REMOVE(d, bd_next);
+ mutex_exit(&bp->bif_lock);
+
+ /*
+ * Because this function is called with bd_lock held, so it must
+ * exit with it held.
+ */
+ mutex_enter(&d->bd_lock);
+ /*
+ * bd_bif cannot be cleared until after the promisc callback has been
+ * removed.
+ */
+ d->bd_bif = 0;
+}
+
+
+/*
+ * bpfilterattach() is called at load time.
+ */
+int
+bpfilterattach(void)
+{
+
+ bpf_hash = mod_hash_create_idhash("bpf_dev_tab", 31,
+ mod_hash_null_keydtor);
+ if (bpf_hash == NULL)
+ return (ENOMEM);
+
+ (void) memcpy(&ks_stats, &bpf_kstats, sizeof (bpf_kstats));
+
+ bpf_ksp = kstat_create("bpf", 0, "global", "misc",
+ KSTAT_TYPE_NAMED, sizeof (bpf_kstats) / sizeof (kstat_named_t),
+ KSTAT_FLAG_VIRTUAL);
+ if (bpf_ksp != NULL) {
+ bpf_ksp->ks_data = &ks_stats;
+ kstat_install(bpf_ksp);
+ } else {
+ mod_hash_destroy_idhash(bpf_hash);
+ bpf_hash = NULL;
+ return (EEXIST);
+ }
+
+ cv_init(&bpf_dlt_waiter, NULL, CV_DRIVER, NULL);
+ mutex_init(&bpf_mtx, NULL, MUTEX_DRIVER, NULL);
+
+ LIST_INIT(&bpf_list);
+ TAILQ_INIT(&bpf_iflist);
+
+ return (0);
+}
+
+
+/*
+ * bpfilterdetach() is called at unload time.
+ */
+int
+bpfilterdetach(void)
+{
+ struct bpf_if *bp;
+
+ if (bpf_ksp != NULL) {
+ kstat_delete(bpf_ksp);
+ bpf_ksp = NULL;
+ }
+
+ /*
+ * When no attach/detach callbacks can arrive from mac,
+ * this is now safe without a lock.
+ */
+ while ((bp = TAILQ_FIRST(&bpf_iflist)) != NULL)
+ bpfdetach(bp->bif_ifp);
+
+ mutex_enter(&bpf_mtx);
+ if (!LIST_EMPTY(&bpf_list)) {
+ mutex_exit(&bpf_mtx);
+ return (EBUSY);
+ }
+ mutex_exit(&bpf_mtx);
+
+ mod_hash_destroy_idhash(bpf_hash);
+ bpf_hash = NULL;
+
+ cv_destroy(&bpf_dlt_waiter);
+ mutex_destroy(&bpf_mtx);
+
+ return (0);
+}
+
+/*
+ * Open ethernet device. Clones.
+ */
+/* ARGSUSED */
+int
+bpfopen(dev_t *devp, int flag, int mode, cred_t *cred)
+{
+ struct bpf_d *d;
+ uint_t dmin;
+
+ /*
+ * The security policy described at the top of this file is
+ * enforced here.
+ */
+ if ((flag & FWRITE) != 0) {
+ if (secpolicy_net_rawaccess(cred) != 0)
+ return (EACCES);
+ }
+
+ if ((flag & FREAD) != 0) {
+ if ((secpolicy_net_observability(cred) != 0) &&
+ (secpolicy_net_rawaccess(cred) != 0))
+ return (EACCES);
+ }
+
+ if ((flag & (FWRITE|FREAD)) == 0)
+ return (ENXIO);
+
+ /*
+ * If BPF is being opened from a non-global zone, trigger a call
+ * back into the driver to see if it needs to initialise local
+ * state in a zone.
+ */
+ if (crgetzoneid(cred) != GLOBAL_ZONEID)
+ bpf_open_zone(crgetzoneid(cred));
+
+ /*
+ * A structure is allocated per open file in BPF to store settings
+ * such as buffer capture size, provide private buffers, etc.
+ */
+ d = (struct bpf_d *)kmem_zalloc(sizeof (*d), KM_SLEEP);
+ d->bd_bufsize = bpf_bufsize;
+ d->bd_fmode = flag;
+ d->bd_zone = crgetzoneid(cred);
+ d->bd_seesent = 1;
+ d->bd_promisc_flags = MAC_PROMISC_FLAGS_NO_PHYS|
+ MAC_PROMISC_FLAGS_NO_COPY;
+ mutex_init(&d->bd_lock, NULL, MUTEX_DRIVER, NULL);
+ cv_init(&d->bd_wait, NULL, CV_DRIVER, NULL);
+
+ mutex_enter(&bpf_mtx);
+ /*
+ * Find an unused minor number. Obviously this is an O(n) algorithm
+ * and doesn't scale particularly well, so if there are large numbers
+ * of open file descriptors happening in real use, this design may
+ * need to be revisited.
+ */
+ for (dmin = 0; dmin < L_MAXMIN; dmin++)
+ if (bpf_dev_find(dmin) == NULL)
+ break;
+ if (dmin == L_MAXMIN) {
+ mutex_exit(&bpf_mtx);
+ kmem_free(d, sizeof (*d));
+ return (ENXIO);
+ }
+ d->bd_dev = dmin;
+ LIST_INSERT_HEAD(&bpf_list, d, bd_list);
+ bpf_dev_add(d);
+ mutex_exit(&bpf_mtx);
+
+ *devp = makedevice(getmajor(*devp), dmin);
+
+ return (0);
+}
+
+/*
+ * Close the descriptor by detaching it from its interface,
+ * deallocating its buffers, and marking it free.
+ *
+ * Because we only allow a device to be opened once, there is always a
+ * 1 to 1 relationship between opens and closes supporting this function.
+ */
+/* ARGSUSED */
+int
+bpfclose(dev_t dev, int flag, int otyp, cred_t *cred_p)
+{
+ struct bpf_d *d = bpf_dev_get(getminor(dev));
+
+ mutex_enter(&d->bd_lock);
+ if (d->bd_state == BPF_WAITING)
+ bpf_clear_timeout(d);
+ d->bd_state = BPF_IDLE;
+ if (d->bd_bif)
+ bpf_detachd(d);
+ mutex_exit(&d->bd_lock);
+
+ mutex_enter(&bpf_mtx);
+ LIST_REMOVE(d, bd_list);
+ bpf_dev_remove(d);
+ mutex_exit(&bpf_mtx);
+
+ mutex_enter(&d->bd_lock);
+ mutex_destroy(&d->bd_lock);
+ cv_destroy(&d->bd_wait);
+
+ bpf_freed(d);
+ kmem_free(d, sizeof (*d));
+
+ return (0);
+}
+
+/*
+ * Rotate the packet buffers in descriptor d. Move the store buffer
+ * into the hold slot, and the free buffer into the store slot.
+ * Zero the length of the new store buffer.
+ */
+#define ROTATE_BUFFERS(d) \
+ (d)->bd_hbuf = (d)->bd_sbuf; \
+ (d)->bd_hlen = (d)->bd_slen; \
+ (d)->bd_sbuf = (d)->bd_fbuf; \
+ (d)->bd_slen = 0; \
+ (d)->bd_fbuf = 0;
+/*
+ * bpfread - read next chunk of packets from buffers
+ */
+/* ARGSUSED */
+int
+bpfread(dev_t dev, struct uio *uio, cred_t *cred)
+{
+ struct bpf_d *d = bpf_dev_get(getminor(dev));
+ int timed_out;
+ ulong_t delay;
+ int error;
+
+ if ((d->bd_fmode & FREAD) == 0)
+ return (EBADF);
+
+ /*
+ * Restrict application to use a buffer the same size as
+ * the kernel buffers.
+ */
+ if (uio->uio_resid != d->bd_bufsize)
+ return (EINVAL);
+
+ mutex_enter(&d->bd_lock);
+ if (d->bd_state == BPF_WAITING)
+ bpf_clear_timeout(d);
+ timed_out = (d->bd_state == BPF_TIMED_OUT);
+ d->bd_state = BPF_IDLE;
+ /*
+ * If the hold buffer is empty, then do a timed sleep, which
+ * ends when the timeout expires or when enough packets
+ * have arrived to fill the store buffer.
+ */
+ while (d->bd_hbuf == 0) {
+ if (d->bd_nonblock) {
+ if (d->bd_slen == 0) {
+ mutex_exit(&d->bd_lock);
+ return (EWOULDBLOCK);
+ }
+ ROTATE_BUFFERS(d);
+ break;
+ }
+
+ if ((d->bd_immediate || timed_out) && d->bd_slen != 0) {
+ /*
+ * A packet(s) either arrived since the previous
+ * read or arrived while we were asleep.
+ * Rotate the buffers and return what's here.
+ */
+ ROTATE_BUFFERS(d);
+ break;
+ }
+ ks_stats.kp_read_wait.value.ui64++;
+ delay = ddi_get_lbolt() + d->bd_rtout;
+ error = cv_timedwait_sig(&d->bd_wait, &d->bd_lock, delay);
+ if (error == 0) {
+ mutex_exit(&d->bd_lock);
+ return (EINTR);
+ }
+ if (error == -1) {
+ /*
+ * On a timeout, return what's in the buffer,
+ * which may be nothing. If there is something
+ * in the store buffer, we can rotate the buffers.
+ */
+ if (d->bd_hbuf)
+ /*
+ * We filled up the buffer in between
+ * getting the timeout and arriving
+ * here, so we don't need to rotate.
+ */
+ break;
+
+ if (d->bd_slen == 0) {
+ mutex_exit(&d->bd_lock);
+ return (0);
+ }
+ ROTATE_BUFFERS(d);
+ }
+ }
+ /*
+ * At this point, we know we have something in the hold slot.
+ */
+ mutex_exit(&d->bd_lock);
+
+ /*
+ * Move data from hold buffer into user space.
+ * We know the entire buffer is transferred since
+ * we checked above that the read buffer is bpf_bufsize bytes.
+ */
+ error = uiomove(d->bd_hbuf, d->bd_hlen, UIO_READ, uio);
+
+ mutex_enter(&d->bd_lock);
+ d->bd_fbuf = d->bd_hbuf;
+ d->bd_hbuf = 0;
+ d->bd_hlen = 0;
+done:
+ mutex_exit(&d->bd_lock);
+ return (error);
+}
+
+
+/*
+ * If there are processes sleeping on this descriptor, wake them up.
+ * NOTE: the lock for bd_wait is bd_lock and is held by bpf_deliver,
+ * so there is no code here grabbing it.
+ */
+static inline void
+bpf_wakeup(struct bpf_d *d)
+{
+ cv_signal(&d->bd_wait);
+}
+
+static void
+bpf_timed_out(void *arg)
+{
+ struct bpf_d *d = arg;
+
+ mutex_enter(&d->bd_lock);
+ if (d->bd_state == BPF_WAITING) {
+ d->bd_state = BPF_TIMED_OUT;
+ if (d->bd_slen != 0)
+ cv_signal(&d->bd_wait);
+ }
+ mutex_exit(&d->bd_lock);
+}
+
+
+/* ARGSUSED */
+int
+bpfwrite(dev_t dev, struct uio *uio, cred_t *cred)
+{
+ struct bpf_d *d = bpf_dev_get(getminor(dev));
+ struct bpf_if *bp;
+ uintptr_t mch;
+ uintptr_t ifp;
+ uint_t mtu;
+ mblk_t *m;
+ int error;
+ int dlt;
+
+ if ((d->bd_fmode & FWRITE) == 0)
+ return (EBADF);
+
+ mutex_enter(&d->bd_lock);
+ if (d->bd_bif == 0 || d->bd_mcip == 0 || d->bd_bif->bif_ifp == 0) {
+ mutex_exit(&d->bd_lock);
+ return (EINTR);
+ }
+
+ if (uio->uio_resid == 0) {
+ mutex_exit(&d->bd_lock);
+ return (0);
+ }
+
+ while (d->bd_inuse < 0) {
+ d->bd_waiting++;
+ if (cv_wait_sig(&d->bd_wait, &d->bd_lock) <= 0) {
+ d->bd_waiting--;
+ mutex_exit(&d->bd_lock);
+ return (EINTR);
+ }
+ d->bd_waiting--;
+ }
+
+ mutex_exit(&d->bd_lock);
+
+ bp = d->bd_bif;
+ dlt = bp->bif_dlt;
+ mch = d->bd_mcip;
+ ifp = bp->bif_ifp;
+ MBPF_SDU_GET(&bp->bif_mac, ifp, &mtu);
+ d->bd_inuse++;
+
+ m = NULL;
+ if (dlt == DLT_IPNET) {
+ error = EIO;
+ goto done;
+ }
+
+ error = bpf_movein(uio, dlt, mtu, &m);
+ if (error)
+ goto done;
+
+ DTRACE_PROBE5(bpf__tx, struct bpf_d *, d, struct bpf_if *, bp,
+ int, dlt, uint_t, mtu, mblk_t *, m);
+
+ if (M_LEN(m) > mtu) {
+ error = EMSGSIZE;
+ goto done;
+ }
+
+ error = MBPF_TX(&bp->bif_mac, mch, m);
+ /*
+ * The "tx" action here is required to consume the mblk_t.
+ */
+ m = NULL;
+
+done:
+ if (error == 0)
+ ks_stats.kp_write_ok.value.ui64++;
+ else
+ ks_stats.kp_write_error.value.ui64++;
+ if (m != NULL)
+ freemsg(m);
+
+ mutex_enter(&d->bd_lock);
+ d->bd_inuse--;
+ if ((d->bd_inuse == 0) && (d->bd_waiting != 0))
+ cv_signal(&d->bd_wait);
+ mutex_exit(&d->bd_lock);
+
+ /*
+ * The driver frees the mbuf.
+ */
+ return (error);
+}
+
+
+/*
+ * Reset a descriptor by flushing its packet buffer and clearing the
+ * receive and drop counts. Should be called at splnet.
+ */
+static void
+reset_d(struct bpf_d *d)
+{
+ if (d->bd_hbuf) {
+ /* Free the hold buffer. */
+ d->bd_fbuf = d->bd_hbuf;
+ d->bd_hbuf = 0;
+ }
+ d->bd_slen = 0;
+ d->bd_hlen = 0;
+ d->bd_rcount = 0;
+ d->bd_dcount = 0;
+ d->bd_ccount = 0;
+}
+
+/*
+ * FIONREAD Check for read packet available.
+ * BIOCGBLEN Get buffer len [for read()].
+ * BIOCSETF Set ethernet read filter.
+ * BIOCFLUSH Flush read packet buffer.
+ * BIOCPROMISC Put interface into promiscuous mode.
+ * BIOCGDLT Get link layer type.
+ * BIOCGETIF Get interface name.
+ * BIOCSETIF Set interface.
+ * BIOCSRTIMEOUT Set read timeout.
+ * BIOCGRTIMEOUT Get read timeout.
+ * BIOCGSTATS Get packet stats.
+ * BIOCIMMEDIATE Set immediate mode.
+ * BIOCVERSION Get filter language version.
+ * BIOCGHDRCMPLT Get "header already complete" flag.
+ * BIOCSHDRCMPLT Set "header already complete" flag.
+ */
+/* ARGSUSED */
+int
+bpfioctl(dev_t dev, int cmd, intptr_t addr, int mode, cred_t *cred, int *rval)
+{
+ struct bpf_d *d = bpf_dev_get(getminor(dev));
+ struct bpf_program prog;
+ struct lifreq lifreq;
+ struct ifreq ifreq;
+ int error = 0;
+ uint_t size;
+
+ /*
+ * Refresh the PID associated with this bpf file.
+ */
+ mutex_enter(&d->bd_lock);
+ if (d->bd_state == BPF_WAITING)
+ bpf_clear_timeout(d);
+ d->bd_state = BPF_IDLE;
+ mutex_exit(&d->bd_lock);
+
+ switch (cmd) {
+
+ default:
+ error = EINVAL;
+ break;
+
+ /*
+ * Check for read packet available.
+ */
+ case FIONREAD:
+ {
+ int n;
+
+ mutex_enter(&d->bd_lock);
+ n = d->bd_slen;
+ if (d->bd_hbuf)
+ n += d->bd_hlen;
+ mutex_exit(&d->bd_lock);
+
+ *(int *)addr = n;
+ break;
+ }
+
+ /*
+ * Get buffer len [for read()].
+ */
+ case BIOCGBLEN:
+ error = copyout(&d->bd_bufsize, (void *)addr,
+ sizeof (d->bd_bufsize));
+ break;
+
+ /*
+ * Set buffer length.
+ */
+ case BIOCSBLEN:
+ if (copyin((void *)addr, &size, sizeof (size)) != 0) {
+ error = EFAULT;
+ break;
+ }
+
+ mutex_enter(&d->bd_lock);
+ if (d->bd_bif != 0) {
+ error = EINVAL;
+ } else {
+ if (size > bpf_maxbufsize)
+ size = bpf_maxbufsize;
+ else if (size < BPF_MINBUFSIZE)
+ size = BPF_MINBUFSIZE;
+
+ d->bd_bufsize = size;
+ }
+ mutex_exit(&d->bd_lock);
+
+ if (error == 0)
+ error = copyout(&size, (void *)addr, sizeof (size));
+ break;
+
+ /*
+ * Set link layer read filter.
+ */
+ case BIOCSETF:
+ if (ddi_copyin((void *)addr, &prog, sizeof (prog), mode)) {
+ error = EFAULT;
+ break;
+ }
+ error = bpf_setf(d, &prog);
+ break;
+
+ /*
+ * Flush read packet buffer.
+ */
+ case BIOCFLUSH:
+ mutex_enter(&d->bd_lock);
+ reset_d(d);
+ mutex_exit(&d->bd_lock);
+ break;
+
+ /*
+ * Put interface into promiscuous mode.
+ * This is a one-way ioctl, it is not used to turn promiscuous
+ * mode off.
+ */
+ case BIOCPROMISC:
+ if (d->bd_bif == 0) {
+ /*
+ * No interface attached yet.
+ */
+ error = EINVAL;
+ break;
+ }
+ mutex_enter(&d->bd_lock);
+ if (d->bd_promisc == 0) {
+
+ if (d->bd_promisc_handle) {
+ uintptr_t mph;
+
+ mph = d->bd_promisc_handle;
+ d->bd_promisc_handle = 0;
+
+ mutex_exit(&d->bd_lock);
+ MBPF_PROMISC_REMOVE(&d->bd_bif->bif_mac, mph);
+ mutex_enter(&d->bd_lock);
+ }
+
+ d->bd_promisc_flags = MAC_PROMISC_FLAGS_NO_COPY;
+ error = MBPF_PROMISC_ADD(&d->bd_bif->bif_mac,
+ d->bd_mcip, MAC_CLIENT_PROMISC_ALL, d,
+ &d->bd_promisc_handle, d->bd_promisc_flags);
+ if (error == 0)
+ d->bd_promisc = 1;
+ }
+ mutex_exit(&d->bd_lock);
+ break;
+
+ /*
+ * Get device parameters.
+ */
+ case BIOCGDLT:
+ if (d->bd_bif == 0)
+ error = EINVAL;
+ else
+ error = copyout(&d->bd_bif->bif_dlt, (void *)addr,
+ sizeof (d->bd_bif->bif_dlt));
+ break;
+
+ /*
+ * Get a list of supported device parameters.
+ */
+ case BIOCGDLTLIST:
+ if (d->bd_bif == 0) {
+ error = EINVAL;
+ } else {
+ struct bpf_dltlist list;
+
+ if (copyin((void *)addr, &list, sizeof (list)) != 0) {
+ error = EFAULT;
+ break;
+ }
+ error = bpf_getdltlist(d, &list);
+ if ((error == 0) &&
+ copyout(&list, (void *)addr, sizeof (list)) != 0)
+ error = EFAULT;
+ }
+ break;
+
+ /*
+ * Set device parameters.
+ */
+ case BIOCSDLT:
+ error = bpf_setdlt(d, (void *)addr);
+ break;
+
+ /*
+ * Get interface name.
+ */
+ case BIOCGETIF:
+ if (copyin((void *)addr, &ifreq, sizeof (ifreq)) != 0) {
+ error = EFAULT;
+ break;
+ }
+ error = bpf_ifname(d, ifreq.ifr_name, sizeof (ifreq.ifr_name));
+ if ((error == 0) &&
+ copyout(&ifreq, (void *)addr, sizeof (ifreq)) != 0) {
+ error = EFAULT;
+ break;
+ }
+ break;
+
+ /*
+ * Set interface.
+ */
+ case BIOCSETIF:
+ if (copyin((void *)addr, &ifreq, sizeof (ifreq)) != 0) {
+ error = EFAULT;
+ break;
+ }
+ error = bpf_setif(d, ifreq.ifr_name, sizeof (ifreq.ifr_name));
+ break;
+
+ /*
+ * Get interface name.
+ */
+ case BIOCGETLIF:
+ if (copyin((void *)addr, &lifreq, sizeof (lifreq)) != 0) {
+ error = EFAULT;
+ break;
+ }
+ error = bpf_ifname(d, lifreq.lifr_name,
+ sizeof (lifreq.lifr_name));
+ if ((error == 0) &&
+ copyout(&lifreq, (void *)addr, sizeof (lifreq)) != 0) {
+ error = EFAULT;
+ break;
+ }
+ break;
+
+ /*
+ * Set interface.
+ */
+ case BIOCSETLIF:
+ if (copyin((void *)addr, &lifreq, sizeof (lifreq)) != 0) {
+ error = EFAULT;
+ break;
+ }
+ error = bpf_setif(d, lifreq.lifr_name,
+ sizeof (lifreq.lifr_name));
+ break;
+
+#ifdef _SYSCALL32_IMPL
+ /*
+ * Set read timeout.
+ */
+ case BIOCSRTIMEOUT32:
+ {
+ struct timeval32 tv;
+
+ if (copyin((void *)addr, &tv, sizeof (tv)) != 0) {
+ error = EFAULT;
+ break;
+ }
+
+ /* Convert the timeout in microseconds to ticks */
+ d->bd_rtout = drv_usectohz(tv.tv_sec * 1000000 +
+ tv.tv_usec);
+ if ((d->bd_rtout == 0) && (tv.tv_usec != 0))
+ d->bd_rtout = 1;
+ break;
+ }
+
+ /*
+ * Get read timeout.
+ */
+ case BIOCGRTIMEOUT32:
+ {
+ struct timeval32 tv;
+ clock_t ticks;
+
+ ticks = drv_hztousec(d->bd_rtout);
+ tv.tv_sec = ticks / 1000000;
+ tv.tv_usec = ticks - (tv.tv_sec * 1000000);
+ error = copyout(&tv, (void *)addr, sizeof (tv));
+ break;
+ }
+
+ /*
+ * Get a list of supported device parameters.
+ */
+ case BIOCGDLTLIST32:
+ if (d->bd_bif == 0) {
+ error = EINVAL;
+ } else {
+ struct bpf_dltlist32 lst32;
+ struct bpf_dltlist list;
+
+ if (copyin((void *)addr, &lst32, sizeof (lst32)) != 0) {
+ error = EFAULT;
+ break;
+ }
+
+ list.bfl_len = lst32.bfl_len;
+ list.bfl_list = (void *)(uint64_t)lst32.bfl_list;
+ error = bpf_getdltlist(d, &list);
+ if (error == 0) {
+ lst32.bfl_len = list.bfl_len;
+
+ if (copyout(&lst32, (void *)addr,
+ sizeof (lst32)) != 0)
+ error = EFAULT;
+ }
+ }
+ break;
+
+ /*
+ * Set link layer read filter.
+ */
+ case BIOCSETF32: {
+ struct bpf_program32 prog32;
+
+ if (ddi_copyin((void *)addr, &prog32, sizeof (prog), mode)) {
+ error = EFAULT;
+ break;
+ }
+ prog.bf_len = prog32.bf_len;
+ prog.bf_insns = (void *)(uint64_t)prog32.bf_insns;
+ error = bpf_setf(d, &prog);
+ break;
+ }
+#endif
+
+ /*
+ * Set read timeout.
+ */
+ case BIOCSRTIMEOUT:
+ {
+ struct timeval tv;
+
+ if (copyin((void *)addr, &tv, sizeof (tv)) != 0) {
+ error = EFAULT;
+ break;
+ }
+
+ /* Convert the timeout in microseconds to ticks */
+ d->bd_rtout = drv_usectohz(tv.tv_sec * 1000000 +
+ tv.tv_usec);
+ if ((d->bd_rtout == 0) && (tv.tv_usec != 0))
+ d->bd_rtout = 1;
+ break;
+ }
+
+ /*
+ * Get read timeout.
+ */
+ case BIOCGRTIMEOUT:
+ {
+ struct timeval tv;
+ clock_t ticks;
+
+ ticks = drv_hztousec(d->bd_rtout);
+ tv.tv_sec = ticks / 1000000;
+ tv.tv_usec = ticks - (tv.tv_sec * 1000000);
+ if (copyout(&tv, (void *)addr, sizeof (tv)) != 0)
+ error = EFAULT;
+ break;
+ }
+
+ /*
+ * Get packet stats.
+ */
+ case BIOCGSTATS:
+ {
+ struct bpf_stat bs;
+
+ bs.bs_recv = d->bd_rcount;
+ bs.bs_drop = d->bd_dcount;
+ bs.bs_capt = d->bd_ccount;
+ if (copyout(&bs, (void *)addr, sizeof (bs)) != 0)
+ error = EFAULT;
+ break;
+ }
+
+ /*
+ * Set immediate mode.
+ */
+ case BIOCIMMEDIATE:
+ if (copyin((void *)addr, &d->bd_immediate,
+ sizeof (d->bd_immediate)) != 0)
+ error = EFAULT;
+ break;
+
+ case BIOCVERSION:
+ {
+ struct bpf_version bv;
+
+ bv.bv_major = BPF_MAJOR_VERSION;
+ bv.bv_minor = BPF_MINOR_VERSION;
+ if (copyout(&bv, (void *)addr, sizeof (bv)) != 0)
+ error = EFAULT;
+ break;
+ }
+
+ case BIOCGHDRCMPLT: /* get "header already complete" flag */
+ if (copyout(&d->bd_hdrcmplt, (void *)addr,
+ sizeof (d->bd_hdrcmplt)) != 0)
+ error = EFAULT;
+ break;
+
+ case BIOCSHDRCMPLT: /* set "header already complete" flag */
+ if (copyin((void *)addr, &d->bd_hdrcmplt,
+ sizeof (d->bd_hdrcmplt)) != 0)
+ error = EFAULT;
+ break;
+
+ /*
+ * Get "see sent packets" flag
+ */
+ case BIOCGSEESENT:
+ if (copyout(&d->bd_seesent, (void *)addr,
+ sizeof (d->bd_seesent)) != 0)
+ error = EFAULT;
+ break;
+
+ /*
+ * Set "see sent" packets flag
+ */
+ case BIOCSSEESENT:
+ if (copyin((void *)addr, &d->bd_seesent,
+ sizeof (d->bd_seesent)) != 0)
+ error = EFAULT;
+ break;
+
+ case FIONBIO: /* Non-blocking I/O */
+ if (copyin((void *)addr, &d->bd_nonblock,
+ sizeof (d->bd_nonblock)) != 0)
+ error = EFAULT;
+ break;
+ }
+ return (error);
+}
+
+/*
+ * Set d's packet filter program to fp. If this file already has a filter,
+ * free it and replace it. If the new filter is "empty" (has a 0 size), then
+ * the result is to just remove and free the existing filter.
+ * Returns EINVAL for bogus requests.
+ */
+int
+bpf_setf(struct bpf_d *d, struct bpf_program *fp)
+{
+ struct bpf_insn *fcode, *old;
+ uint_t flen, size;
+ size_t oldsize;
+
+ if (fp->bf_insns == 0) {
+ if (fp->bf_len != 0)
+ return (EINVAL);
+ mutex_enter(&d->bd_lock);
+ old = d->bd_filter;
+ oldsize = d->bd_filter_size;
+ d->bd_filter = 0;
+ d->bd_filter_size = 0;
+ reset_d(d);
+ mutex_exit(&d->bd_lock);
+ if (old != 0)
+ kmem_free(old, oldsize);
+ return (0);
+ }
+ flen = fp->bf_len;
+ if (flen > BPF_MAXINSNS)
+ return (EINVAL);
+
+ size = flen * sizeof (*fp->bf_insns);
+ fcode = kmem_alloc(size, KM_SLEEP);
+ if (copyin(fp->bf_insns, fcode, size) != 0)
+ return (EFAULT);
+
+ if (bpf_validate(fcode, (int)flen)) {
+ mutex_enter(&d->bd_lock);
+ old = d->bd_filter;
+ oldsize = d->bd_filter_size;
+ d->bd_filter = fcode;
+ d->bd_filter_size = size;
+ reset_d(d);
+ mutex_exit(&d->bd_lock);
+ if (old != 0)
+ kmem_free(old, oldsize);
+
+ return (0);
+ }
+ kmem_free(fcode, size);
+ return (EINVAL);
+}
+
+/*
+ * Detach a file from its current interface (if attached at all) and attach
+ * to the interface indicated by the name stored in ifr.
+ * Return an errno or 0.
+ */
+static int
+bpf_setif(struct bpf_d *d, char *ifname, int namesize)
+{
+ struct bpf_if *bp;
+ int unit_seen;
+ char *cp;
+ int i;
+
+ /*
+ * Make sure the provided name has a unit number, and default
+ * it to '0' if not specified.
+ * XXX This is ugly ... do this differently?
+ */
+ unit_seen = 0;
+ cp = ifname;
+ cp[namesize - 1] = '\0'; /* sanity */
+ while (*cp++)
+ if (*cp >= '0' && *cp <= '9')
+ unit_seen = 1;
+ if (!unit_seen) {
+ /* Make sure to leave room for the '\0'. */
+ for (i = 0; i < (namesize - 1); ++i) {
+ if ((ifname[i] >= 'a' && ifname[i] <= 'z') ||
+ (ifname[i] >= 'A' && ifname[i] <= 'Z'))
+ continue;
+ ifname[i] = '0';
+ }
+ }
+
+ /*
+ * Make sure that only one call to this function happens at a time
+ * and that we're not interleaving a read/write
+ */
+ mutex_enter(&d->bd_lock);
+ while (d->bd_inuse != 0) {
+ d->bd_waiting++;
+ if (cv_wait_sig(&d->bd_wait, &d->bd_lock) <= 0) {
+ d->bd_waiting--;
+ mutex_exit(&d->bd_lock);
+ return (EINTR);
+ }
+ d->bd_waiting--;
+ }
+ d->bd_inuse = -1;
+ mutex_exit(&d->bd_lock);
+
+ /*
+ * Look through attached interfaces for the named one.
+ *
+ * The search is done twice - once
+ */
+ mutex_enter(&bpf_mtx);
+
+ bp = bpf_findif(d, ifname, -1);
+
+ if (bp != NULL) {
+ int error = 0;
+
+ if (d->bd_sbuf == 0)
+ error = bpf_allocbufs(d);
+
+ /*
+ * We found the requested interface.
+ * If we're already attached to requested interface,
+ * just flush the buffer.
+ */
+ mutex_enter(&d->bd_lock);
+ if (error == 0 && bp != d->bd_bif) {
+ if (d->bd_bif)
+ /*
+ * Detach if attached to something else.
+ */
+ bpf_detachd(d);
+
+ bpf_attachd(d, bp);
+ }
+ reset_d(d);
+ d->bd_inuse = 0;
+ if (d->bd_waiting != 0)
+ cv_signal(&d->bd_wait);
+ mutex_exit(&d->bd_lock);
+ mutex_exit(&bpf_mtx);
+ return (error);
+ }
+
+ mutex_enter(&d->bd_lock);
+ d->bd_inuse = 0;
+ if (d->bd_waiting != 0)
+ cv_signal(&d->bd_wait);
+ mutex_exit(&d->bd_lock);
+ mutex_exit(&bpf_mtx);
+
+ /*
+ * Try tickle the mac layer into attaching the device...
+ */
+ return (bpf_provider_tickle(ifname, d->bd_zone));
+}
+
+/*
+ * Copy the interface name to the ifreq.
+ */
+static int
+bpf_ifname(struct bpf_d *d, char *buffer, int bufsize)
+{
+ struct bpf_if *bp;
+
+ mutex_enter(&d->bd_lock);
+ bp = d->bd_bif;
+ if (bp == NULL) {
+ mutex_exit(&d->bd_lock);
+ return (EINVAL);
+ }
+
+ (void) strlcpy(buffer, bp->bif_ifname, bufsize);
+ mutex_exit(&d->bd_lock);
+
+ return (0);
+}
+
+/*
+ * Support for poll() system call
+ *
+ * Return true iff the specific operation will not block indefinitely - with
+ * the assumption that it is safe to positively acknowledge a request for the
+ * ability to write to the BPF device.
+ * Otherwise, return false but make a note that a selnotify() must be done.
+ */
+int
+bpfchpoll(dev_t dev, short events, int anyyet, short *reventsp,
+ struct pollhead **phpp)
+{
+ struct bpf_d *d = bpf_dev_get(getminor(dev));
+
+ if (events & (POLLIN | POLLRDNORM)) {
+ /*
+ * An imitation of the FIONREAD ioctl code.
+ */
+ mutex_enter(&d->bd_lock);
+ if (d->bd_hlen != 0 ||
+ ((d->bd_immediate || d->bd_state == BPF_TIMED_OUT) &&
+ d->bd_slen != 0)) {
+ *reventsp |= events & (POLLIN | POLLRDNORM);
+ } else {
+ *reventsp = 0;
+ if (!anyyet)
+ *phpp = &d->bd_poll;
+ /* Start the read timeout if necessary */
+ if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
+ bpf_clear_timeout(d);
+ /*
+ * Only allow the timeout to be set once.
+ */
+ if (d->bd_callout == 0)
+ d->bd_callout = timeout(bpf_timed_out,
+ d, d->bd_rtout);
+ d->bd_state = BPF_WAITING;
+ }
+ }
+ mutex_exit(&d->bd_lock);
+ }
+
+ return (0);
+}
+
+/*
+ * Copy data from an mblk_t chain into a buffer. This works for ipnet
+ * because the dl_ipnetinfo_t is placed in an mblk_t that leads the
+ * packet itself.
+ */
+static void *
+bpf_mcpy(void *dst_arg, const void *src_arg, size_t len)
+{
+ const mblk_t *m;
+ uint_t count;
+ uchar_t *dst;
+
+ m = src_arg;
+ dst = dst_arg;
+ while (len > 0) {
+ if (m == NULL)
+ panic("bpf_mcpy");
+ count = (uint_t)min(M_LEN(m), len);
+ (void) memcpy(dst, mtod(m, const void *), count);
+ m = m->b_cont;
+ dst += count;
+ len -= count;
+ }
+ return (dst_arg);
+}
+
+/*
+ * Dispatch a packet to all the listeners on interface bp.
+ *
+ * marg pointer to the packet, either a data buffer or an mbuf chain
+ * buflen buffer length, if marg is a data buffer
+ * cpfn a function that can copy marg into the listener's buffer
+ * pktlen length of the packet
+ * issent boolean indicating whether the packet was sent or receive
+ */
+static inline void
+bpf_deliver(struct bpf_d *d, cp_fn_t cpfn, void *marg, uint_t pktlen,
+ uint_t buflen, boolean_t issent)
+{
+ struct timeval tv;
+ uint_t slen;
+
+ if (!d->bd_seesent && issent)
+ return;
+
+ /*
+ * Accuracy of the packet counters in BPF is vital so it
+ * is important to protect even the outer ones.
+ */
+ mutex_enter(&d->bd_lock);
+ slen = bpf_filter(d->bd_filter, marg, pktlen, buflen);
+ DTRACE_PROBE5(bpf__packet, struct bpf_if *, d->bd_bif,
+ struct bpf_d *, d, void *, marg, uint_t, pktlen, uint_t, slen);
+ d->bd_rcount++;
+ ks_stats.kp_receive.value.ui64++;
+ if (slen != 0) {
+ uniqtime(&tv);
+ catchpacket(d, marg, pktlen, slen, cpfn, &tv);
+ }
+ mutex_exit(&d->bd_lock);
+}
+
+/*
+ * Incoming linkage from device drivers.
+ */
+/* ARGSUSED */
+void
+bpf_mtap(void *arg, mac_resource_handle_t mrh, mblk_t *m, boolean_t issent)
+{
+ cp_fn_t cpfn;
+ struct bpf_d *d = arg;
+ uint_t pktlen, buflen;
+ void *marg;
+
+ pktlen = msgdsize(m);
+
+ if (pktlen == M_LEN(m)) {
+ cpfn = (cp_fn_t)memcpy;
+ marg = mtod(m, void *);
+ buflen = pktlen;
+ } else {
+ cpfn = bpf_mcpy;
+ marg = m;
+ buflen = 0;
+ }
+
+ bpf_deliver(d, cpfn, marg, pktlen, buflen, issent);
+}
+
+/*
+ * Incoming linkage from ipnet.
+ * In ipnet, there is only one event, NH_OBSERVE, that delivers packets
+ * from all network interfaces. Thus the tap function needs to apply a
+ * filter using the interface index/id to immitate snoop'ing on just the
+ * specified interface.
+ */
+/* ARGSUSED */
+void
+bpf_itap(void *arg, mblk_t *m, boolean_t issent, uint_t length)
+{
+ hook_pkt_observe_t *hdr;
+ struct bpf_d *d = arg;
+
+ hdr = (hook_pkt_observe_t *)m->b_rptr;
+ if (ntohl(hdr->hpo_ifindex) != d->bd_bif->bif_linkid)
+ return;
+ bpf_deliver(d, bpf_mcpy, m, length, 0, issent);
+
+}
+
+/*
+ * Move the packet data from interface memory (pkt) into the
+ * store buffer. Return 1 if it's time to wakeup a listener (buffer full),
+ * otherwise 0. "copy" is the routine called to do the actual data
+ * transfer. memcpy is passed in to copy contiguous chunks, while
+ * bpf_mcpy is passed in to copy mbuf chains. In the latter case,
+ * pkt is really an mbuf.
+ */
+static void
+catchpacket(struct bpf_d *d, uchar_t *pkt, uint_t pktlen, uint_t snaplen,
+ cp_fn_t cpfn, struct timeval *tv)
+{
+ struct bpf_hdr *hp;
+ int totlen, curlen;
+ int hdrlen = d->bd_bif->bif_hdrlen;
+ int do_wakeup = 0;
+
+ ++d->bd_ccount;
+ ks_stats.kp_capture.value.ui64++;
+ /*
+ * Figure out how many bytes to move. If the packet is
+ * greater or equal to the snapshot length, transfer that
+ * much. Otherwise, transfer the whole packet (unless
+ * we hit the buffer size limit).
+ */
+ totlen = hdrlen + min(snaplen, pktlen);
+ if (totlen > d->bd_bufsize)
+ totlen = d->bd_bufsize;
+
+ /*
+ * Round up the end of the previous packet to the next longword.
+ */
+ curlen = BPF_WORDALIGN(d->bd_slen);
+ if (curlen + totlen > d->bd_bufsize) {
+ /*
+ * This packet will overflow the storage buffer.
+ * Rotate the buffers if we can, then wakeup any
+ * pending reads.
+ */
+ if (d->bd_fbuf == 0) {
+ /*
+ * We haven't completed the previous read yet,
+ * so drop the packet.
+ */
+ ++d->bd_dcount;
+ ks_stats.kp_dropped.value.ui64++;
+ return;
+ }
+ ROTATE_BUFFERS(d);
+ do_wakeup = 1;
+ curlen = 0;
+ } else if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT) {
+ /*
+ * Immediate mode is set, or the read timeout has
+ * already expired during a select call. A packet
+ * arrived, so the reader should be woken up.
+ */
+ do_wakeup = 1;
+ }
+
+ /*
+ * Append the bpf header to the existing buffer before we add
+ * on the actual packet data.
+ */
+ hp = (struct bpf_hdr *)((char *)d->bd_sbuf + curlen);
+ hp->bh_tstamp.tv_sec = tv->tv_sec;
+ hp->bh_tstamp.tv_usec = tv->tv_usec;
+ hp->bh_datalen = pktlen;
+ hp->bh_hdrlen = (uint16_t)hdrlen;
+ /*
+ * Copy the packet data into the store buffer and update its length.
+ */
+ (*cpfn)((uchar_t *)hp + hdrlen, pkt,
+ (hp->bh_caplen = totlen - hdrlen));
+ d->bd_slen = curlen + totlen;
+
+ /*
+ * Call bpf_wakeup after bd_slen has been updated.
+ */
+ if (do_wakeup)
+ bpf_wakeup(d);
+}
+
+/*
+ * Initialize all nonzero fields of a descriptor.
+ */
+static int
+bpf_allocbufs(struct bpf_d *d)
+{
+
+ d->bd_fbuf = kmem_zalloc(d->bd_bufsize, KM_NOSLEEP);
+ if (!d->bd_fbuf)
+ return (ENOBUFS);
+ d->bd_sbuf = kmem_zalloc(d->bd_bufsize, KM_NOSLEEP);
+ if (!d->bd_sbuf) {
+ kmem_free(d->bd_fbuf, d->bd_bufsize);
+ return (ENOBUFS);
+ }
+ d->bd_slen = 0;
+ d->bd_hlen = 0;
+ return (0);
+}
+
+/*
+ * Free buffers currently in use by a descriptor.
+ * Called on close.
+ */
+static void
+bpf_freed(struct bpf_d *d)
+{
+ /*
+ * At this point the descriptor has been detached from its
+ * interface and it yet hasn't been marked free.
+ */
+ if (d->bd_sbuf != 0) {
+ kmem_free(d->bd_sbuf, d->bd_bufsize);
+ if (d->bd_hbuf != 0)
+ kmem_free(d->bd_hbuf, d->bd_bufsize);
+ if (d->bd_fbuf != 0)
+ kmem_free(d->bd_fbuf, d->bd_bufsize);
+ }
+ if (d->bd_filter)
+ kmem_free(d->bd_filter, d->bd_filter_size);
+}
+
+/*
+ * Attach additional dlt for a interface to bpf.
+ * dlt is the link layer type.
+ *
+ * The zoneid is passed in explicitly to prevent the need to
+ * do a lookup in dls using the linkid. Such a lookup would need
+ * to use the same hash table that gets used for walking when
+ * dls_set_bpfattach() is called.
+ */
+void
+bpfattach(uintptr_t ifp, int dlt, zoneid_t zoneid, int provider)
+{
+ bpf_provider_t *bpr;
+ struct bpf_if *bp;
+ uintptr_t client;
+ int hdrlen;
+
+ bpr = bpf_find_provider_by_id(provider);
+ if (bpr == NULL) {
+ if (bpf_debug)
+ cmn_err(CE_WARN, "bpfattach: unknown provider %d",
+ provider);
+ return;
+ }
+
+ bp = kmem_zalloc(sizeof (*bp), KM_NOSLEEP);
+ if (bp == NULL) {
+ if (bpf_debug)
+ cmn_err(CE_WARN, "bpfattach: no memory for bpf_if");
+ return;
+ }
+ bp->bif_mac = *bpr;
+
+ /*
+ * To get the user-visible name, it is necessary to get the mac
+ * client name of an interface and for this, we need to do the
+ * mac_client_open. Leaving it open is undesirable because it
+ * creates an open reference that is hard to see from outside
+ * of bpf, potentially leading to data structures not being
+ * cleaned up when they should.
+ */
+ if (MBPF_CLIENT_OPEN(&bp->bif_mac, ifp, &client) != 0) {
+ if (bpf_debug)
+ cmn_err(CE_WARN,
+ "bpfattach: mac_client_open fail for %s",
+ MBPF_NAME(&bp->bif_mac, ifp));
+ kmem_free(bp, sizeof (*bp));
+ return;
+ }
+ (void) strlcpy(bp->bif_ifname, MBPF_CLIENT_NAME(&bp->bif_mac, client),
+ sizeof (bp->bif_ifname));
+ MBPF_CLIENT_CLOSE(&bp->bif_mac, client);
+
+ bp->bif_ifp = ifp;
+ bp->bif_dlt = bpf_dl_to_dlt(dlt);
+ bp->bif_zoneid = zoneid;
+ LIST_INIT(&bp->bif_dlist);
+
+ /*
+ * Compute the length of the bpf header. This is not necessarily
+ * equal to SIZEOF_BPF_HDR because we want to insert spacing such
+ * that the network layer header begins on a longword boundary (for
+ * performance reasons and to alleviate alignment restrictions).
+ */
+ hdrlen = bpf_dl_hdrsize(dlt);
+ bp->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen;
+
+ if (MBPF_GET_LINKID(&bp->bif_mac, MBPF_NAME(&bp->bif_mac, ifp),
+ &bp->bif_linkid, zoneid) != 0) {
+ if (bpf_debug) {
+ cmn_err(CE_WARN,
+ "bpfattach: linkid resolution fail for %s/%s",
+ MBPF_NAME(&bp->bif_mac, ifp), bp->bif_ifname);
+ }
+ kmem_free(bp, sizeof (*bp));
+ return;
+ }
+ mutex_init(&bp->bif_lock, NULL, MUTEX_DRIVER, NULL);
+
+ bpf_debug_nic_action("attached to", bp);
+
+ mutex_enter(&bpf_mtx);
+ TAILQ_INSERT_TAIL(&bpf_iflist, bp, bif_next);
+ mutex_exit(&bpf_mtx);
+}
+
+/*
+ * Remove an interface from bpf.
+ */
+void
+bpfdetach(uintptr_t ifp)
+{
+ struct bpf_if *bp;
+ struct bpf_d *d;
+ int removed = 0;
+
+ mutex_enter(&bpf_mtx);
+ /*
+ * Loop through all of the known descriptors to find any that are
+ * using the interface that wants to be detached.
+ */
+ LIST_FOREACH(d, &bpf_list, bd_list) {
+ mutex_enter(&d->bd_lock);
+ bp = d->bd_bif;
+ if (bp != NULL && bp->bif_ifp == ifp) {
+ /*
+ * Detach the descriptor from an interface now.
+ * It will be free'ed later by close routine.
+ */
+ bpf_detachd(d);
+ }
+ mutex_exit(&d->bd_lock);
+ }
+
+again:
+ TAILQ_FOREACH(bp, &bpf_iflist, bif_next) {
+ if (bp->bif_ifp == ifp) {
+ TAILQ_REMOVE(&bpf_iflist, bp, bif_next);
+ bpf_debug_nic_action("detached from", bp);
+ while (bp->bif_inuse != 0)
+ cv_wait(&bpf_dlt_waiter, &bpf_mtx);
+ kmem_free(bp, sizeof (*bp));
+ removed++;
+ goto again;
+ }
+ }
+ mutex_exit(&bpf_mtx);
+
+ ASSERT(removed > 0);
+}
+
+/*
+ * Get a list of available data link type of the interface.
+ */
+static int
+bpf_getdltlist(struct bpf_d *d, struct bpf_dltlist *listp)
+{
+ char ifname[LIFNAMSIZ+1];
+ struct bpf_if *bp;
+ uintptr_t ifp;
+ int n, error;
+
+ mutex_enter(&bpf_mtx);
+ ifp = d->bd_bif->bif_ifp;
+ (void) strlcpy(ifname, MBPF_NAME(&d->bd_bif->bif_mac, ifp),
+ sizeof (ifname));
+ n = 0;
+ error = 0;
+ TAILQ_FOREACH(bp, &bpf_iflist, bif_next) {
+ if (strcmp(bp->bif_ifname, ifname) != 0)
+ continue;
+ if (d->bd_zone != GLOBAL_ZONEID &&
+ d->bd_zone != bp->bif_zoneid)
+ continue;
+ if (listp->bfl_list != NULL) {
+ if (n >= listp->bfl_len)
+ return (ENOMEM);
+ /*
+ * Bumping of bif_inuse ensures the structure does not
+ * disappear while the copyout runs and allows the for
+ * loop to be continued.
+ */
+ bp->bif_inuse++;
+ mutex_exit(&bpf_mtx);
+ if (copyout(&bp->bif_dlt,
+ listp->bfl_list + n, sizeof (uint_t)) != 0)
+ error = EFAULT;
+ mutex_enter(&bpf_mtx);
+ bp->bif_inuse--;
+ }
+ n++;
+ }
+ cv_signal(&bpf_dlt_waiter);
+ mutex_exit(&bpf_mtx);
+ listp->bfl_len = n;
+ return (error);
+}
+
+/*
+ * Set the data link type of a BPF instance.
+ */
+static int
+bpf_setdlt(struct bpf_d *d, void *addr)
+{
+ char ifname[LIFNAMSIZ+1];
+ struct bpf_if *bp;
+ int error;
+ int dlt;
+
+ if (copyin(addr, &dlt, sizeof (dlt)) != 0)
+ return (EFAULT);
+ /*
+ * The established order is get bpf_mtx before bd_lock, even
+ * though bpf_mtx is not needed until the loop...
+ */
+ mutex_enter(&bpf_mtx);
+ mutex_enter(&d->bd_lock);
+
+ if (d->bd_bif == 0) { /* Interface not set */
+ mutex_exit(&d->bd_lock);
+ mutex_exit(&bpf_mtx);
+ return (EINVAL);
+ }
+ if (d->bd_bif->bif_dlt == dlt) { /* NULL-op */
+ mutex_exit(&d->bd_lock);
+ mutex_exit(&bpf_mtx);
+ return (0);
+ }
+
+ /*
+ * See the matrix at the top of the file for the permissions table
+ * enforced by this driver.
+ */
+ if ((d->bd_zone != GLOBAL_ZONEID) && (dlt != DLT_IPNET) &&
+ (d->bd_bif->bif_zoneid != d->bd_zone)) {
+ mutex_exit(&d->bd_lock);
+ mutex_exit(&bpf_mtx);
+ return (EINVAL);
+ }
+
+ (void) strlcpy(ifname,
+ MBPF_NAME(&d->bd_bif->bif_mac, d->bd_bif->bif_ifp),
+ sizeof (ifname));
+
+ bp = bpf_findif(d, ifname, dlt);
+
+ mutex_exit(&bpf_mtx);
+ /*
+ * Now only bd_lock is held.
+ *
+ * If there was no matching interface that supports the requested
+ * DLT, return an error and leave the current binding alone.
+ */
+ if (bp == NULL) {
+ mutex_exit(&d->bd_lock);
+ return (EINVAL);
+ }
+
+ error = 0;
+ bpf_detachd(d);
+ bpf_attachd(d, bp);
+ reset_d(d);
+
+ mutex_exit(&d->bd_lock);
+ return (error);
+}
+
+/*
+ * bpf_clear_timeout is called with the bd_lock mutex held, providing it
+ * with the necessary protection to retrieve and modify bd_callout but it
+ * does not hold the lock for its entire duration... see below...
+ */
+static void
+bpf_clear_timeout(struct bpf_d *d)
+{
+ timeout_id_t tid = d->bd_callout;
+ d->bd_callout = 0;
+ d->bd_inuse++;
+
+ /*
+ * If the timeout has fired and is waiting on bd_lock, we could
+ * deadlock here because untimeout if bd_lock is held and would
+ * wait for bpf_timed_out to finish and it never would.
+ */
+ if (tid != 0) {
+ mutex_exit(&d->bd_lock);
+ (void) untimeout(tid);
+ mutex_enter(&d->bd_lock);
+ }
+
+ d->bd_inuse--;
+}
+
+/*
+ * As a cloning device driver, BPF needs to keep track of which device
+ * numbers are in use and which ones are not. A hash table, indexed by
+ * the minor device number, is used to store the pointers to the
+ * individual descriptors that are allocated in bpfopen().
+ * The functions below present the interface for that hash table to
+ * the rest of the driver.
+ */
+static struct bpf_d *
+bpf_dev_find(minor_t minor)
+{
+ struct bpf_d *d = NULL;
+
+ (void) mod_hash_find(bpf_hash, (mod_hash_key_t)(uintptr_t)minor,
+ (mod_hash_val_t *)&d);
+
+ return (d);
+}
+
+static void
+bpf_dev_add(struct bpf_d *d)
+{
+ (void) mod_hash_insert(bpf_hash, (mod_hash_key_t)(uintptr_t)d->bd_dev,
+ (mod_hash_val_t)d);
+}
+
+static void
+bpf_dev_remove(struct bpf_d *d)
+{
+ struct bpf_d *stor;
+
+ (void) mod_hash_remove(bpf_hash, (mod_hash_key_t)(uintptr_t)d->bd_dev,
+ (mod_hash_val_t *)&stor);
+ ASSERT(stor == d);
+}
+
+/*
+ * bpf_def_get should only ever be called for a minor number that exists,
+ * thus there should always be a pointer in the hash table that corresponds
+ * to it.
+ */
+static struct bpf_d *
+bpf_dev_get(minor_t minor)
+{
+ struct bpf_d *d = NULL;
+
+ (void) mod_hash_find(bpf_hash, (mod_hash_key_t)(uintptr_t)minor,
+ (mod_hash_val_t *)&d);
+ ASSERT(d != NULL);
+
+ return (d);
+}
+
+static void
+bpf_debug_nic_action(char *txt, struct bpf_if *bp)
+{
+ if (bpf_debug) {
+ cmn_err(CE_CONT, "%s %s %s/%d/%d/%d\n", bp->bif_ifname, txt,
+ MBPF_NAME(&bp->bif_mac, bp->bif_ifp), bp->bif_linkid,
+ bp->bif_zoneid, bp->bif_dlt);
+ }
+}
+
+/*
+ * Finding a BPF network interface is a two pass job.
+ * In the first pass, the best possible match is made on zone, DLT and
+ * interface name.
+ * In the second pass, we allow global zone snoopers to attach to interfaces
+ * that are reserved for other zones.
+ * This ensures that the global zone will always see its own interfaces first
+ * before attaching to those that belong to a shared IP instance zone.
+ */
+static struct bpf_if *
+bpf_findif(struct bpf_d *d, char *ifname, int dlt)
+{
+ struct bpf_if *bp;
+
+ TAILQ_FOREACH(bp, &bpf_iflist, bif_next) {
+ if ((bp->bif_ifp == 0) ||
+ (strcmp(ifname, bp->bif_ifname) != 0))
+ continue;
+
+ if (bp->bif_zoneid != d->bd_zone)
+ continue;
+
+ if ((dlt != -1) && (dlt != bp->bif_dlt))
+ continue;
+
+ return (bp);
+ }
+
+ if (d->bd_zone == GLOBAL_ZONEID) {
+ TAILQ_FOREACH(bp, &bpf_iflist, bif_next) {
+ if ((bp->bif_ifp == 0) ||
+ (strcmp(ifname, bp->bif_ifname) != 0))
+ continue;
+
+ if ((dlt != -1) && (dlt != bp->bif_dlt))
+ continue;
+ return (bp);
+ }
+ }
+
+ return (NULL);
+}
diff --git a/usr/src/uts/common/io/bpf/bpf.conf b/usr/src/uts/common/io/bpf/bpf.conf
new file mode 100644
index 0000000000..2a22bd1c74
--- /dev/null
+++ b/usr/src/uts/common/io/bpf/bpf.conf
@@ -0,0 +1,26 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+#
+name="bpf" parent="pseudo" instance=0;
diff --git a/usr/src/uts/common/io/bpf/bpf_dlt.c b/usr/src/uts/common/io/bpf/bpf_dlt.c
new file mode 100644
index 0000000000..9aef0ef16b
--- /dev/null
+++ b/usr/src/uts/common/io/bpf/bpf_dlt.c
@@ -0,0 +1,103 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <sys/types.h>
+#include <sys/dlpi.h>
+#include <net/if.h>
+#include <net/dlt.h>
+
+/*
+ * This table provides a mapping of the DLPI data link types used in
+ * Solaris to the BPF data link types. Providing this translation in
+ * the kernel allows libpcap to be downloaded and used without any
+ * need for change.
+ *
+ * Note that this table is not necessarily sorted.
+ */
+static uint_t dl_to_dlt[][3] = {
+ { DL_CSMACD, DLT_EN10MB, 14 }, /* IEEE 802.3 CSMA/CD */
+ { DL_TPB, DLT_NULL, 0 }, /* IEEE 802.4 Token Bus */
+ { DL_TPR, DLT_IEEE802, 0 }, /* IEEE 802.5 Token Ring */
+ { DL_METRO, DLT_NULL, 0 }, /* IEEE 802.6 Metro Net */
+ { DL_ETHER, DLT_EN10MB, 14 }, /* Ethernet Bus */
+ { DL_HDLC, DLT_C_HDLC, 0 }, /* Cisco HDLC protocol */
+ { DL_CHAR, DLT_NULL, 0 }, /* Character Synchr. proto */
+ { DL_CTCA, DLT_NULL, 0 }, /* IBM Channel-to-Channel */
+ { DL_FDDI, DLT_FDDI, 24 }, /* Fiber Distributed data */
+ { DL_FC, DLT_NULL, 0 }, /* Fibre Channel interface */
+ { DL_ATM, DLT_SUNATM, 0 }, /* ATM */
+ { DL_IPATM, DLT_ATM_CLIP, 0 }, /* ATM CLIP */
+ { DL_X25, DLT_NULL, 0 }, /* X.25 LAPB interface */
+ { DL_ISDN, DLT_NULL, 0 }, /* ISDN interface */
+ { DL_HIPPI, DLT_HIPPI, 0 }, /* HIPPI interface */
+ { DL_100VG, DLT_EN10MB, 14 }, /* 100 Based VG Ethernet */
+ { DL_100VGTPR, DLT_IEEE802, 0 }, /* 100 Based VG Token Ring */
+ { DL_ETH_CSMA, DLT_EN10MB, 14 }, /* ISO 8802/3 and Ethernet */
+ { DL_100BT, DLT_EN10MB, 14 }, /* 100 Base T */
+ { DL_IB, DLT_IPOIB, 44 }, /* Solaris IPoIB (infini.) */
+ { DL_FRAME, DLT_FRELAY, 0 }, /* Frame Relay LAPF */
+ { DL_MPFRAME, DLT_NULL, 0 }, /* Multi-protocol Frame Relay */
+ { DL_ASYNC, DLT_NULL, 0 }, /* Character Asynch. Protocol */
+ { DL_IPX25, DLT_NULL, 0 }, /* X.25 Classical IP */
+ { DL_LOOP, DLT_NULL, 0 }, /* software loopback */
+ { DL_IPV4, DLT_RAW, 0 }, /* IPv4 Tunnel Link */
+ { DL_IPV6, DLT_RAW, 0 }, /* IPv6 Tunnel Link */
+ { SUNW_DL_VNI, DLT_NULL, 0 }, /* Virtual network interface */
+ { DL_WIFI, DLT_IEEE802_11, 0 }, /* IEEE 802.11 */
+ { DL_IPNET, DLT_IPNET, 24 }, /* Solaris IP Observability */
+ { DL_OTHER, DLT_NULL, 0 }, /* Mediums not listed above */
+ { 0, 0 }
+};
+
+/*
+ * Given a data link type number used with DLPI on Solaris, return
+ * the equivalent data link type number for use with BPF.
+ */
+int
+bpf_dl_to_dlt(int dl)
+{
+ int i;
+
+ for (i = 0; i < sizeof (dl_to_dlt) / sizeof (dl_to_dlt[0]); i++)
+ if (dl_to_dlt[i][0] == dl)
+ return (dl_to_dlt[i][1]);
+ return (0);
+}
+
+/*
+ * Given a DLPI data link type for Solaris, return the expected header
+ * size of the link layer.
+ */
+int
+bpf_dl_hdrsize(int dl)
+{
+ int i;
+
+ for (i = 0; i < sizeof (dl_to_dlt) / sizeof (dl_to_dlt[0]); i++)
+ if (dl_to_dlt[i][0] == dl)
+ return (dl_to_dlt[i][2]);
+ return (0);
+}
diff --git a/usr/src/uts/common/io/bpf/bpf_filter.c b/usr/src/uts/common/io/bpf/bpf_filter.c
new file mode 100644
index 0000000000..db5b224a5e
--- /dev/null
+++ b/usr/src/uts/common/io/bpf/bpf_filter.c
@@ -0,0 +1,576 @@
+/* $NetBSD: bpf_filter.c,v 1.35 2008/08/20 13:01:54 joerg Exp $ */
+
+/*
+ * Copyright (c) 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from the Stanford/CMU enet packet filter,
+ * (net/enet.c) distributed as part of 4.3BSD, and code contributed
+ * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
+ * Berkeley Laboratory.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)bpf_filter.c 8.1 (Berkeley) 6/10/93
+ */
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <sys/param.h>
+#include <sys/time.h>
+#include <sys/stream.h>
+#include <sys/byteorder.h>
+#include <sys/sdt.h>
+
+#define EXTRACT_SHORT(p) BE_IN16(p)
+#define EXTRACT_LONG(p) BE_IN32(p)
+
+#ifdef _KERNEL
+#define M_LEN(_m) ((_m)->b_wptr - (_m)->b_rptr)
+#define mtod(_a, _t) ((_t)((_a)->b_rptr))
+#define MINDEX(len, m, k) \
+{ \
+ len = M_LEN(m); \
+ while (k >= len) { \
+ k -= len; \
+ m = m->b_cont; \
+ if (m == 0) \
+ return (0); \
+ len = M_LEN(m); \
+ } \
+}
+
+static int m_xword(mblk_t *, uint32_t, int *);
+static int m_xhalf(mblk_t *, uint32_t, int *);
+
+static int
+m_xword(mblk_t *m, uint32_t k, int *err)
+{
+ int len;
+ uchar_t *cp, *np;
+ mblk_t *m0;
+
+ *err = 1;
+ MINDEX(len, m, k);
+ cp = mtod(m, uchar_t *) + k;
+ if (len >= k + 4) {
+ *err = 0;
+ return (EXTRACT_LONG(cp));
+ }
+ m0 = m->b_cont;
+ if (m0 == 0 || M_LEN(m0) + len - k < 4) {
+ DTRACE_PROBE3(mblk_xword_fail, mblk_t *, m0, int, len, int, k);
+ return (0);
+ }
+ *err = 0;
+ np = mtod(m0, uchar_t *);
+ switch (len - k) {
+
+ case 1:
+ return ((cp[0] << 24) | (np[0] << 16) | (np[1] << 8) | np[2]);
+
+ case 2:
+ return ((cp[0] << 24) | (cp[1] << 16) | (np[0] << 8) | np[1]);
+
+ default:
+ return ((cp[0] << 24) | (cp[1] << 16) | (cp[2] << 8) | np[0]);
+ }
+}
+
+static int
+m_xhalf(mblk_t *m, uint32_t k, int *err)
+{
+ int len;
+ uchar_t *cp;
+ mblk_t *m0;
+
+ *err = 1;
+ MINDEX(len, m, k);
+ cp = mtod(m, uchar_t *) + k;
+ if (len >= k + 2) {
+ *err = 0;
+ return (EXTRACT_SHORT(cp));
+ }
+ m0 = m->b_cont;
+ if (m0 == 0) {
+ DTRACE_PROBE3(mblk_xhalf_fail, mblk_t *, m0, int, len, int, k);
+ return (0);
+ }
+ *err = 0;
+ return ((cp[0] << 8) | mtod(m0, uchar_t *)[0]);
+}
+#else /* _KERNEL */
+#include <stdlib.h>
+#endif /* !_KERNEL */
+
+#include <net/bpf.h>
+
+/*
+ * Execute the filter program starting at pc on the packet p
+ * wirelen is the length of the original packet
+ * buflen is the amount of data present
+ * When buflen is non-0, p is a pointer to a the start of the packet and the
+ * packet is only in one mblk_t.
+ * When buflen is 0, p is an mblk_t pointer.
+ */
+uint_t
+bpf_filter(struct bpf_insn *pc, uchar_t *p, uint_t wirelen, uint_t buflen)
+{
+ uint32_t A, X, k;
+ uint32_t mem[BPF_MEMWORDS];
+
+ if (pc == 0)
+ /*
+ * No filter means accept all.
+ */
+ return ((uint_t)-1);
+ A = 0;
+ X = 0;
+ --pc;
+ /* CONSTCOND */
+ while (1) {
+ ++pc;
+ switch (pc->code) {
+
+ default:
+#ifdef _KERNEL
+ DTRACE_PROBE1(bpf_insn_unknown,
+ struct bpf_insn *, pc);
+ return (0);
+#else
+ abort();
+#endif
+ case BPF_RET|BPF_K:
+ return ((uint_t)pc->k);
+
+ case BPF_RET|BPF_A:
+ return ((uint_t)A);
+
+ case BPF_LD|BPF_W|BPF_ABS:
+ k = pc->k;
+ if (k + sizeof (int32_t) > buflen) {
+#ifdef _KERNEL
+ int merr = 0;
+
+ if (buflen != 0)
+ return (0);
+ A = m_xword((mblk_t *)p, k, &merr);
+ if (merr != 0)
+ return (0);
+ continue;
+#else
+ return (0);
+#endif
+ }
+ A = EXTRACT_LONG(&p[k]);
+ continue;
+
+ case BPF_LD|BPF_H|BPF_ABS:
+ k = pc->k;
+ if (k + sizeof (int16_t) > buflen) {
+#ifdef _KERNEL
+ int merr;
+
+ if (buflen != 0)
+ return (0);
+ A = m_xhalf((mblk_t *)p, k, &merr);
+ if (merr != 0)
+ return (0);
+ continue;
+#else
+ return (0);
+#endif
+ }
+ A = EXTRACT_SHORT(&p[k]);
+ continue;
+
+ case BPF_LD|BPF_B|BPF_ABS:
+ k = pc->k;
+ if (k >= buflen) {
+#ifdef _KERNEL
+ mblk_t *m;
+ int len;
+
+ if (buflen != 0)
+ return (0);
+ m = (mblk_t *)p;
+ MINDEX(len, m, k);
+ A = mtod(m, uchar_t *)[k];
+ continue;
+#else
+ return (0);
+#endif
+ }
+ A = p[k];
+ continue;
+
+ case BPF_LD|BPF_W|BPF_LEN:
+ A = wirelen;
+ continue;
+
+ case BPF_LDX|BPF_W|BPF_LEN:
+ X = wirelen;
+ continue;
+
+ case BPF_LD|BPF_W|BPF_IND:
+ k = X + pc->k;
+ if (k + sizeof (int32_t) > buflen) {
+#ifdef _KERNEL
+ int merr = 0;
+
+ if (buflen != 0)
+ return (0);
+ A = m_xword((mblk_t *)p, k, &merr);
+ if (merr != 0)
+ return (0);
+ continue;
+#else
+ return (0);
+#endif
+ }
+ A = EXTRACT_LONG(&p[k]);
+ continue;
+
+ case BPF_LD|BPF_H|BPF_IND:
+ k = X + pc->k;
+ if (k + sizeof (int16_t) > buflen) {
+#ifdef _KERNEL
+ int merr = 0;
+
+ if (buflen != 0)
+ return (0);
+ A = m_xhalf((mblk_t *)p, k, &merr);
+ if (merr != 0)
+ return (0);
+ continue;
+#else
+ return (0);
+#endif
+ }
+ A = EXTRACT_SHORT(&p[k]);
+ continue;
+
+ case BPF_LD|BPF_B|BPF_IND:
+ k = X + pc->k;
+ if (k >= buflen) {
+#ifdef _KERNEL
+ mblk_t *m;
+ int len;
+
+ if (buflen != 0)
+ return (0);
+ m = (mblk_t *)p;
+ MINDEX(len, m, k);
+ A = mtod(m, uchar_t *)[k];
+ continue;
+#else
+ return (0);
+#endif
+ }
+ A = p[k];
+ continue;
+
+ case BPF_LDX|BPF_MSH|BPF_B:
+ k = pc->k;
+ if (k >= buflen) {
+#ifdef _KERNEL
+ mblk_t *m;
+ int len;
+
+ if (buflen != 0)
+ return (0);
+ m = (mblk_t *)p;
+ MINDEX(len, m, k);
+ X = (mtod(m, char *)[k] & 0xf) << 2;
+ continue;
+#else
+ return (0);
+#endif
+ }
+ X = (p[pc->k] & 0xf) << 2;
+ continue;
+
+ case BPF_LD|BPF_IMM:
+ A = pc->k;
+ continue;
+
+ case BPF_LDX|BPF_IMM:
+ X = pc->k;
+ continue;
+
+ case BPF_LD|BPF_MEM:
+ A = mem[pc->k];
+ continue;
+
+ case BPF_LDX|BPF_MEM:
+ X = mem[pc->k];
+ continue;
+
+ case BPF_ST:
+ mem[pc->k] = A;
+ continue;
+
+ case BPF_STX:
+ mem[pc->k] = X;
+ continue;
+
+ case BPF_JMP|BPF_JA:
+ pc += pc->k;
+ continue;
+
+ case BPF_JMP|BPF_JGT|BPF_K:
+ pc += (A > pc->k) ? pc->jt : pc->jf;
+ continue;
+
+ case BPF_JMP|BPF_JGE|BPF_K:
+ pc += (A >= pc->k) ? pc->jt : pc->jf;
+ continue;
+
+ case BPF_JMP|BPF_JEQ|BPF_K:
+ pc += (A == pc->k) ? pc->jt : pc->jf;
+ continue;
+
+ case BPF_JMP|BPF_JSET|BPF_K:
+ pc += (A & pc->k) ? pc->jt : pc->jf;
+ continue;
+
+ case BPF_JMP|BPF_JGT|BPF_X:
+ pc += (A > X) ? pc->jt : pc->jf;
+ continue;
+
+ case BPF_JMP|BPF_JGE|BPF_X:
+ pc += (A >= X) ? pc->jt : pc->jf;
+ continue;
+
+ case BPF_JMP|BPF_JEQ|BPF_X:
+ pc += (A == X) ? pc->jt : pc->jf;
+ continue;
+
+ case BPF_JMP|BPF_JSET|BPF_X:
+ pc += (A & X) ? pc->jt : pc->jf;
+ continue;
+
+ case BPF_ALU|BPF_ADD|BPF_X:
+ A += X;
+ continue;
+
+ case BPF_ALU|BPF_SUB|BPF_X:
+ A -= X;
+ continue;
+
+ case BPF_ALU|BPF_MUL|BPF_X:
+ A *= X;
+ continue;
+
+ case BPF_ALU|BPF_DIV|BPF_X:
+ if (X == 0)
+ return (0);
+ A /= X;
+ continue;
+
+ case BPF_ALU|BPF_AND|BPF_X:
+ A &= X;
+ continue;
+
+ case BPF_ALU|BPF_OR|BPF_X:
+ A |= X;
+ continue;
+
+ case BPF_ALU|BPF_LSH|BPF_X:
+ A <<= X;
+ continue;
+
+ case BPF_ALU|BPF_RSH|BPF_X:
+ A >>= X;
+ continue;
+
+ case BPF_ALU|BPF_ADD|BPF_K:
+ A += pc->k;
+ continue;
+
+ case BPF_ALU|BPF_SUB|BPF_K:
+ A -= pc->k;
+ continue;
+
+ case BPF_ALU|BPF_MUL|BPF_K:
+ A *= pc->k;
+ continue;
+
+ case BPF_ALU|BPF_DIV|BPF_K:
+ A /= pc->k;
+ continue;
+
+ case BPF_ALU|BPF_AND|BPF_K:
+ A &= pc->k;
+ continue;
+
+ case BPF_ALU|BPF_OR|BPF_K:
+ A |= pc->k;
+ continue;
+
+ case BPF_ALU|BPF_LSH|BPF_K:
+ A <<= pc->k;
+ continue;
+
+ case BPF_ALU|BPF_RSH|BPF_K:
+ A >>= pc->k;
+ continue;
+
+ case BPF_ALU|BPF_NEG:
+ A = -A;
+ continue;
+
+ case BPF_MISC|BPF_TAX:
+ X = A;
+ continue;
+
+ case BPF_MISC|BPF_TXA:
+ A = X;
+ continue;
+ }
+ }
+ /* NOTREACHED */
+}
+
+#ifdef _KERNEL
+/*
+ * Return true if the 'fcode' is a valid filter program.
+ * The constraints are that each jump be forward and to a valid
+ * code, that memory accesses are within valid ranges (to the
+ * extent that this can be checked statically; loads of packet
+ * data have to be, and are, also checked at run time), and that
+ * the code terminates with either an accept or reject.
+ *
+ * The kernel needs to be able to verify an application's filter code.
+ * Otherwise, a bogus program could easily crash the system.
+ */
+int
+bpf_validate(struct bpf_insn *f, int len)
+{
+ uint_t i, from;
+ struct bpf_insn *p;
+
+ if (len < 1 || len > BPF_MAXINSNS)
+ return (0);
+
+ for (i = 0; i < len; ++i) {
+ p = &f[i];
+ DTRACE_PROBE1(bpf_valid_insn, struct bpf_insn *, p);
+ switch (BPF_CLASS(p->code)) {
+ /*
+ * Check that memory operations use valid addresses.
+ */
+ case BPF_LD:
+ case BPF_LDX:
+ switch (BPF_MODE(p->code)) {
+ case BPF_MEM:
+ if (p->k >= BPF_MEMWORDS)
+ return (0);
+ break;
+ case BPF_ABS:
+ case BPF_IND:
+ case BPF_MSH:
+ case BPF_IMM:
+ case BPF_LEN:
+ break;
+ default:
+ return (0);
+ }
+ break;
+ case BPF_ST:
+ case BPF_STX:
+ if (p->k >= BPF_MEMWORDS)
+ return (0);
+ break;
+ case BPF_ALU:
+ switch (BPF_OP(p->code)) {
+ case BPF_ADD:
+ case BPF_SUB:
+ case BPF_MUL:
+ case BPF_OR:
+ case BPF_AND:
+ case BPF_LSH:
+ case BPF_RSH:
+ case BPF_NEG:
+ break;
+ case BPF_DIV:
+ /*
+ * Check for constant division by 0.
+ */
+ if (BPF_RVAL(p->code) == BPF_K && p->k == 0)
+ return (0);
+ break;
+ default:
+ return (0);
+ }
+ break;
+ case BPF_JMP:
+ /*
+ * Check that jumps are within the code block,
+ * and that unconditional branches don't go
+ * backwards as a result of an overflow.
+ * Unconditional branches have a 32-bit offset,
+ * so they could overflow; we check to make
+ * sure they don't. Conditional branches have
+ * an 8-bit offset, and the from address is <=
+ * BPF_MAXINSNS, and we assume that BPF_MAXINSNS
+ * is sufficiently small that adding 255 to it
+ * won't overflow.
+ *
+ * We know that len is <= BPF_MAXINSNS, and we
+ * assume that BPF_MAXINSNS is < the maximum size
+ * of a uint_t, so that i + 1 doesn't overflow.
+ */
+ from = i + 1;
+ switch (BPF_OP(p->code)) {
+ case BPF_JA:
+ if (from + p->k < from || from + p->k >= len)
+ return (0);
+ break;
+ case BPF_JEQ:
+ case BPF_JGT:
+ case BPF_JGE:
+ case BPF_JSET:
+ if (from + p->jt >= len || from + p->jf >= len)
+ return (0);
+ break;
+ default:
+ return (0);
+ }
+ break;
+ case BPF_RET:
+ break;
+ case BPF_MISC:
+ break;
+ default:
+ return (0);
+ }
+ }
+
+ return (BPF_CLASS(f[len - 1].code) == BPF_RET);
+}
+#endif
diff --git a/usr/src/uts/common/io/bpf/bpf_mac.c b/usr/src/uts/common/io/bpf/bpf_mac.c
new file mode 100644
index 0000000000..e075aefa7d
--- /dev/null
+++ b/usr/src/uts/common/io/bpf/bpf_mac.c
@@ -0,0 +1,165 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <sys/types.h>
+#include <sys/stream.h>
+#include <net/bpf.h>
+#include <net/bpfdesc.h>
+
+/*
+ * This file provides the link to the functions required from the mac
+ * module. It is currently in bpf, rather than mac (like ipnet_bpf)
+ * because of the mac/dls split. The bpf driver needs to know when
+ * interfaces appear and disappear and the best place for that is in
+ * dls. Unfortunately all of the other functions used here are found
+ * in the mac module, making it seem ill suited to being at home in
+ * dls. Similarly it has even less purpose being in mac as it is
+ * today.
+ */
+static int mac_bpf_open(const char *, uintptr_t *, zoneid_t);
+static void mac_bpf_close(uintptr_t);
+static const char *mac_bpf_name(uintptr_t);
+static int mac_bpf_type(uintptr_t);
+static void mac_bpf_sdu_get(uintptr_t, uint_t *);
+static int mac_bpf_tx(uintptr_t, mblk_t *);
+static uintptr_t mac_bpf_promisc_add(uintptr_t, int, void *, uintptr_t *, int);
+static void mac_bpf_promisc_remove(uintptr_t);
+static int mac_bpf_client_open(uintptr_t, uintptr_t *);
+static void mac_bpf_client_close(uintptr_t);
+static const char *mac_bpf_client_name(uintptr_t);
+static int mac_bpf_getlinkid(const char *, datalink_id_t *, zoneid_t);
+
+bpf_provider_t bpf_mac = {
+ BPR_MAC,
+ mac_bpf_open,
+ mac_bpf_close,
+ mac_bpf_name,
+ mac_bpf_type,
+ mac_bpf_sdu_get,
+ mac_bpf_tx,
+ mac_bpf_promisc_add,
+ mac_bpf_promisc_remove,
+ mac_bpf_getlinkid,
+ mac_bpf_client_close,
+ mac_bpf_client_name,
+ mac_bpf_client_open
+};
+
+/*ARGSUSED*/
+static int
+mac_bpf_open(const char *name, uintptr_t *mhandlep, zoneid_t zoneid)
+{
+ return (mac_open(name, (mac_handle_t *)mhandlep));
+}
+
+static void
+mac_bpf_close(uintptr_t mhandle)
+{
+ mac_close((mac_handle_t)mhandle);
+}
+
+static const char *
+mac_bpf_name(uintptr_t mhandle)
+{
+ return (mac_name((mac_handle_t)mhandle));
+}
+
+static int
+mac_bpf_type(uintptr_t mhandle)
+{
+ return (mac_type((mac_handle_t)mhandle));
+}
+
+static void
+mac_bpf_sdu_get(uintptr_t mhandle, uint_t *mtup)
+{
+ mac_sdu_get((mac_handle_t)mhandle, NULL, mtup);
+}
+
+static int
+mac_bpf_tx(uintptr_t chandle, mblk_t *pkt)
+{
+ /*
+ * If the mac layer cannot deliver a packet as requested by BPF then
+ * simply have the mac layer drop it. BPF isn't interested in doing
+ * any amount of retry - that's left to the application.
+ */
+ return (mac_tx((mac_client_handle_t)chandle, pkt, 0,
+ MAC_DROP_ON_NO_DESC, NULL));
+}
+
+static uintptr_t
+mac_bpf_promisc_add(uintptr_t chandle, int how, void *arg, uintptr_t *promisc,
+ int flags)
+{
+ return (mac_promisc_add((mac_client_handle_t)chandle, how, bpf_mtap,
+ arg, (mac_promisc_handle_t *)promisc, flags));
+}
+
+static void
+mac_bpf_promisc_remove(uintptr_t phandle)
+{
+ mac_promisc_remove((mac_promisc_handle_t)phandle);
+}
+
+static int
+mac_bpf_client_open(uintptr_t mhandle, uintptr_t *chandlep)
+{
+ return (mac_client_open((mac_handle_t)mhandle,
+ (mac_client_handle_t *)chandlep, NULL,
+ MAC_OPEN_FLAGS_USE_DATALINK_NAME));
+}
+
+static void
+mac_bpf_client_close(uintptr_t chandle)
+{
+ mac_client_close((mac_client_handle_t)chandle, 0);
+}
+
+static const char *
+mac_bpf_client_name(uintptr_t chandle)
+{
+ return (mac_client_name((mac_client_handle_t)chandle));
+}
+
+/*ARGSUSED*/
+static int
+mac_bpf_getlinkid(const char *name, datalink_id_t *idp, zoneid_t zoneid)
+{
+ int error;
+
+ /*
+ * If at first we don't succeed, try again, just in case it is in
+ * hiding. The first call requires the datalink management daemon
+ * (the authorative source of information about name to id mapping)
+ * to be present and answering upcalls, the seond does not.
+ */
+ error = dls_mgmt_get_linkid(name, idp);
+ if (error != 0)
+ error = dls_devnet_macname2linkid(name, idp);
+
+ return (error);
+}
diff --git a/usr/src/uts/common/io/bpf/bpf_mod.c b/usr/src/uts/common/io/bpf/bpf_mod.c
new file mode 100644
index 0000000000..166e9f08fc
--- /dev/null
+++ b/usr/src/uts/common/io/bpf/bpf_mod.c
@@ -0,0 +1,443 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/stat.h>
+#include <sys/errno.h>
+#include <sys/uio.h>
+#include <sys/buf.h>
+#include <sys/modctl.h>
+#include <sys/open.h>
+#include <sys/kmem.h>
+#include <sys/conf.h>
+#include <sys/cmn_err.h>
+#include <sys/cred.h>
+#include <sys/sunddi.h>
+#include <sys/mac_provider.h>
+#include <sys/dls_impl.h>
+#include <inet/ipnet.h>
+
+extern int bpfopen(dev_t *devp, int flag, int otyp, cred_t *cred);
+extern int bpfclose(dev_t dev, int flag, int otyp, cred_t *cred);
+extern int bpfread(dev_t dev, struct uio *uio_p, cred_t *cred_p);
+extern int bpfwrite(dev_t dev, struct uio *uio, cred_t *cred);
+extern int bpfchpoll(dev_t, short, int, short *, struct pollhead **);
+extern int bpfioctl(dev_t, int, intptr_t, int, cred_t *, int *);
+extern int bpfilterattach(void);
+extern int bpfilterdetach(void);
+
+extern bpf_provider_t bpf_mac;
+extern bpf_provider_t bpf_ipnet;
+
+static int bpf_attach(dev_info_t *, ddi_attach_cmd_t);
+static void *bpf_create_inst(const netid_t);
+static void bpf_destroy_inst(const netid_t, void *);
+static int bpf_detach(dev_info_t *, ddi_detach_cmd_t);
+static int bpf_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **);
+static int bpf_provider_add(bpf_provider_t *);
+static int bpf_provider_remove(bpf_provider_t *);
+static void bpf_shutdown_inst(const netid_t, void *);
+
+extern void bpfdetach(uintptr_t);
+extern int bpf_bufsize;
+extern int bpf_maxbufsize;
+
+static LIST_HEAD(, bpf_provider_list) bpf_providers;
+
+static struct cb_ops bpf_cb_ops = {
+ bpfopen,
+ bpfclose,
+ nodev, /* strategy */
+ nodev, /* print */
+ nodev, /* dump */
+ bpfread,
+ bpfwrite, /* write */
+ bpfioctl, /* ioctl */
+ nodev, /* devmap */
+ nodev, /* mmap */
+ nodev, /* segmap */
+ bpfchpoll, /* poll */
+ ddi_prop_op,
+ NULL,
+ D_MTSAFE,
+ CB_REV,
+ nodev, /* aread */
+ nodev, /* awrite */
+};
+
+static struct dev_ops bpf_ops = {
+ DEVO_REV,
+ 0,
+ bpf_getinfo,
+ nulldev,
+ nulldev,
+ bpf_attach,
+ bpf_detach,
+ nodev, /* reset */
+ &bpf_cb_ops,
+ (struct bus_ops *)0
+};
+
+extern struct mod_ops mod_driverops;
+static struct modldrv bpfmod = {
+ &mod_driverops, "Berkely Packet Filter", &bpf_ops
+};
+static struct modlinkage modlink1 = { MODREV_1, &bpfmod, NULL };
+
+static dev_info_t *bpf_dev_info = NULL;
+static net_instance_t *bpf_inst = NULL;
+
+int
+_init()
+{
+ int bpfinst;
+
+ bpfinst = mod_install(&modlink1);
+ return (bpfinst);
+}
+
+int
+_fini(void)
+{
+ int bpfinst;
+
+ bpfinst = mod_remove(&modlink1);
+ return (bpfinst);
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+ int bpfinst;
+
+ bpfinst = mod_info(&modlink1, modinfop);
+ return (bpfinst);
+}
+
+static int
+bpf_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
+{
+
+ switch (cmd) {
+ case DDI_ATTACH:
+ /*
+ * Default buffer size from bpf's driver.conf file
+ */
+ bpf_bufsize = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
+ "buf_size", 32 * 1024);
+ /*
+ * Maximum buffer size from bpf's driver.conf file
+ */
+ bpf_maxbufsize = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
+ "max_buf_size", 16 * 1024 * 1024);
+
+ if (ddi_create_minor_node(dip, "bpf", S_IFCHR, 0,
+ DDI_PSEUDO, 0) == DDI_FAILURE) {
+ ddi_remove_minor_node(dip, NULL);
+ goto attach_failed;
+ }
+ bpf_dev_info = dip;
+ ddi_report_dev(dip);
+
+ LIST_INIT(&bpf_providers);
+
+ if (bpfilterattach() != 0)
+ goto attach_failed;
+
+ ASSERT(bpf_provider_add(&bpf_mac) == 0);
+ dls_set_bpfattach(bpfattach, bpfdetach);
+ ipnet_set_bpfattach(bpfattach, bpfdetach, GLOBAL_ZONEID,
+ bpf_itap, bpf_provider_add);
+
+ /*
+ * Set up to be notified about zones coming and going
+ * so that proper interaction with ipnet is possible.
+ */
+ bpf_inst = net_instance_alloc(NETINFO_VERSION);
+ if (bpf_inst == NULL)
+ goto attach_failed;
+ bpf_inst->nin_name = "bpf";
+ bpf_inst->nin_create = bpf_create_inst;
+ bpf_inst->nin_destroy = bpf_destroy_inst;
+ bpf_inst->nin_shutdown = bpf_shutdown_inst;
+ if (net_instance_register(bpf_inst) != 0) {
+ net_instance_free(bpf_inst);
+ goto attach_failed;
+ }
+
+ return (DDI_SUCCESS);
+ /* NOTREACHED */
+ case DDI_RESUME:
+ return (DDI_SUCCESS);
+ /* NOTREACHED */
+ default:
+ break;
+ }
+
+attach_failed:
+
+ /*
+ * Use our own detach routine to toss
+ * away any stuff we allocated above.
+ */
+ (void) bpfilterdetach();
+ (void) bpf_detach(dip, DDI_DETACH);
+ return (DDI_FAILURE);
+}
+
+static int
+bpf_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
+{
+ int error;
+
+ switch (cmd) {
+ case DDI_DETACH:
+ if (net_instance_unregister(bpf_inst) != 0)
+ return (DDI_FAILURE);
+ net_instance_free(bpf_inst);
+
+ ipnet_set_bpfattach(NULL, NULL, GLOBAL_ZONEID, NULL,
+ bpf_provider_remove);
+ /*
+ * Whilst we don't want to be notified about new devices that
+ * are being detached, to set the bpf detach function to NULL
+ * introduces a race condition between this kernel module
+ * unloading and a network interface driver also unloading.
+ */
+ dls_set_bpfattach(NULL, bpfdetach);
+ error = bpfilterdetach();
+ if (error != 0)
+ return (DDI_FAILURE);
+ /*
+ * Now everything is clean, set the detach to NULL too.
+ */
+ dls_set_bpfattach(NULL, NULL);
+ ASSERT(bpf_provider_remove(&bpf_mac) == 0);
+
+ ASSERT(LIST_EMPTY(&bpf_providers));
+
+ ddi_prop_remove_all(dip);
+
+ return (DDI_SUCCESS);
+ /* NOTREACHED */
+ case DDI_SUSPEND:
+ case DDI_PM_SUSPEND:
+ return (DDI_SUCCESS);
+ /* NOTREACHED */
+ default:
+ break;
+ }
+ return (DDI_FAILURE);
+}
+
+/*ARGSUSED*/
+static int
+bpf_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
+{
+ int error = DDI_FAILURE;
+
+ switch (infocmd) {
+ case DDI_INFO_DEVT2DEVINFO:
+ *result = bpf_dev_info;
+ error = DDI_SUCCESS;
+ break;
+ case DDI_INFO_DEVT2INSTANCE:
+ *result = (void *)0;
+ error = DDI_SUCCESS;
+ break;
+ default:
+ break;
+ }
+ return (error);
+}
+
+/*
+ * The two functions below work with and manage a list of providers that
+ * supply BPF with packets. Their addition and removal is only happens
+ * when the bpf module is attaching/detaching, thus there is no race
+ * condition to guard against with using locks as the kernel module system
+ * takes care of this for us. Similarly, bpf_provider_tickle() is called
+ * from bpf_setif, which implies an open file descriptor that would get
+ * in the way of detach being active.
+ */
+static int
+bpf_provider_add(bpf_provider_t *provider)
+{
+ bpf_provider_list_t *bp;
+
+ LIST_FOREACH(bp, &bpf_providers, bpl_next) {
+ if (bp->bpl_what == provider)
+ return (EEXIST);
+ }
+
+
+ bp = kmem_alloc(sizeof (*bp), KM_SLEEP);
+ bp->bpl_what = provider;
+ LIST_INSERT_HEAD(&bpf_providers, bp, bpl_next);
+
+ return (0);
+}
+
+static int
+bpf_provider_remove(bpf_provider_t *provider)
+{
+ bpf_provider_list_t *bp;
+
+ LIST_FOREACH(bp, &bpf_providers, bpl_next) {
+ if (bp->bpl_what == provider)
+ break;
+ }
+
+ if (bp == NULL)
+ return (ESRCH);
+
+ LIST_REMOVE(bp, bpl_next);
+
+ kmem_free(bp, sizeof (*bp));
+
+ return (0);
+}
+
+/*
+ * return a pointer to the structure that holds all of the functions
+ * available to be used to support a particular packet provider.
+ */
+bpf_provider_t *
+bpf_find_provider_by_id(int who)
+{
+ bpf_provider_list_t *b;
+
+ LIST_FOREACH(b, &bpf_providers, bpl_next) {
+ if (b->bpl_what->bpr_unit == who)
+ return (b->bpl_what);
+ }
+
+ return (NULL);
+}
+
+/*
+ * This function is used by bpf_setif() to force an open() to be called on
+ * a given device name. If a device has been unloaded by the kernel, but it
+ * is still recognised, then calling this function will hopefully cause it
+ * to be loaded back into the kernel. When this function is called, it is
+ * not known which packet provider the name belongs to so all are tried.
+ */
+int
+bpf_provider_tickle(char *name, zoneid_t zone)
+{
+ bpf_provider_list_t *bp;
+ uintptr_t handle;
+ int tickled = 0;
+
+ LIST_FOREACH(bp, &bpf_providers, bpl_next) {
+ handle = 0;
+ if (bp->bpl_what->bpr_open(name, &handle, zone) == 0) {
+ bp->bpl_what->bpr_close(handle);
+ tickled++;
+ } else if (bp->bpl_what->bpr_unit == BPR_MAC) {
+ /*
+ * For mac devices, sometimes the open/close is not
+ * enough. In that case, further provocation is
+ * attempted by fetching the linkid and trying to
+ * use that as the key for open, rather than the
+ * name.
+ */
+ datalink_id_t id;
+
+ if (bp->bpl_what->bpr_getlinkid(name, &id,
+ zone) == 0) {
+ if (bp->bpl_what->bpr_open(name, &handle,
+ zone) == 0) {
+ bp->bpl_what->bpr_close(handle);
+ tickled++;
+ } else {
+ mac_handle_t mh;
+
+ if (mac_open_by_linkid(id, &mh) == 0) {
+ mac_close(mh);
+ tickled++;
+ }
+ }
+ }
+ }
+
+ }
+
+ if (tickled != 0)
+ return (EWOULDBLOCK);
+
+ return (ENXIO);
+}
+
+/*
+ * The following three functions provide the necessary callbacks into
+ * the netinfo API. This API is primarily used to trigger awareness of
+ * when a zone is being torn down, allowing BPF to drive IPNET to
+ * tell it which interfaces need to go away.
+ */
+/*ARGSUSED*/
+static void *
+bpf_create_inst(const netid_t netid)
+{
+ /*
+ * BPF does not keep any per-instance state, its list of
+ * interfaces is global, as is its device hash table.
+ */
+ return ((void *)bpf_itap);
+}
+
+/*ARGSUSED*/
+static void
+bpf_shutdown_inst(const netid_t netid, void *arg)
+{
+ zoneid_t zoneid;
+
+ zoneid = net_getzoneidbynetid(netid);
+ if (zoneid != GLOBAL_ZONEID) {
+ ipnet_set_bpfattach(NULL, NULL, zoneid, NULL, NULL);
+ }
+}
+
+/*ARGSUSED*/
+static void
+bpf_destroy_inst(const netid_t netid, void *arg)
+{
+}
+
+/*
+ * This function is required, and is called from bpfopen, rather than
+ * bpf_create_inst() for the simple reason that when bpf_create_inst()
+ * is called, the zone is not fully initialised yet. This leads fo
+ * functions that map the zoneid to pointers failing (when they should
+ * not be failing) and thus the system panic'ing.
+ */
+void
+bpf_open_zone(const zoneid_t zoneid)
+{
+ ipnet_set_bpfattach(bpfattach, bpfdetach,
+ zoneid, bpf_itap, bpf_provider_add);
+}
diff --git a/usr/src/uts/common/io/bpf/net/Makefile b/usr/src/uts/common/io/bpf/net/Makefile
new file mode 100644
index 0000000000..77261ee848
--- /dev/null
+++ b/usr/src/uts/common/io/bpf/net/Makefile
@@ -0,0 +1,47 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# uts/common/io/bpf/net/Makefile
+#
+# include global definitions
+include ../../../../../Makefile.master
+
+HDRS= bpf.h bpfdesc.h dlt.h
+
+ROOTDIRS= $(ROOT)/usr/include/net
+
+ROOTHDRS= $(HDRS:%=$(ROOT)/usr/include/net/%)
+
+$(ROOTDIRS)/%: %
+ $(INS.file)
+
+.KEEP_STATE:
+
+install_h: $(ROOTDIRS) $(ROOTHDRS)
+
+$(ROOTDIRS):
+ $(INS.dir)
+
+check: $(CHECKHDRS)
diff --git a/usr/src/uts/common/io/bpf/net/bpf.h b/usr/src/uts/common/io/bpf/net/bpf.h
new file mode 100644
index 0000000000..4a15dcf11d
--- /dev/null
+++ b/usr/src/uts/common/io/bpf/net/bpf.h
@@ -0,0 +1,298 @@
+/* $NetBSD: bpf.h,v 1.50 2009/01/13 19:10:52 christos Exp $ */
+
+/*
+ * Copyright (c) 1990, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from the Stanford/CMU enet packet filter,
+ * (net/enet.c) distributed as part of 4.3BSD, and code contributed
+ * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
+ * Berkeley Laboratory.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)bpf.h 8.2 (Berkeley) 1/9/95
+ * @(#) Header: bpf.h,v 1.36 97/06/12 14:29:53 leres Exp (LBL)
+ */
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _NET_BPF_H_
+#define _NET_BPF_H_
+
+#include <sys/time.h>
+#include <sys/types32.h>
+#include <sys/ioccom.h>
+
+/* BSD style release date */
+#define BPF_RELEASE 199606
+
+typedef int bpf_int32;
+typedef uint_t bpf_uint_t32;
+typedef uint_t bpf_u_int32;
+
+/*
+ * Alignment macros. BPF_WORDALIGN rounds up to the next
+ * even multiple of BPF_ALIGNMENT.
+ */
+#define BPF_ALIGNMENT sizeof (uint32_t)
+#define BPF_WORDALIGN(x) (((x)+(BPF_ALIGNMENT-1))&~(BPF_ALIGNMENT-1))
+
+#define BPF_MAXINSNS 512
+#define BPF_DFLTBUFSIZE (1024*1024) /* default static upper limit */
+#define BPF_MAXBUFSIZE (1024*1024*16) /* hard limit on sysctl'able value */
+#define BPF_MINBUFSIZE 32
+
+/*
+ * Structure for BIOCSETF.
+ */
+struct bpf_program {
+ uint_t bf_len;
+ struct bpf_insn *bf_insns;
+};
+struct bpf_program32 {
+ uint_t bf_len;
+ caddr32_t bf_insns;
+};
+
+/*
+ * Struct returned by BIOCGSTATS and net.bpf.stats sysctl.
+ */
+struct bpf_stat {
+ uint64_t bs_recv; /* number of packets received */
+ uint64_t bs_drop; /* number of packets dropped */
+ uint64_t bs_capt; /* number of packets captured */
+ uint64_t bs_padding[13];
+};
+
+/*
+ * Struct returned by BIOCGSTATSOLD.
+ */
+struct bpf_stat_old {
+ uint_t bs_recv; /* number of packets received */
+ uint_t bs_drop; /* number of packets dropped */
+};
+
+/*
+ * Struct return by BIOCVERSION. This represents the version number of
+ * the filter language described by the instruction encodings below.
+ * bpf understands a program iff kernel_major == filter_major &&
+ * kernel_minor >= filter_minor, that is, if the value returned by the
+ * running kernel has the same major number and a minor number equal
+ * equal to or less than the filter being downloaded. Otherwise, the
+ * results are undefined, meaning an error may be returned or packets
+ * may be accepted haphazardly.
+ * It has nothing to do with the source code version.
+ */
+struct bpf_version {
+ ushort_t bv_major;
+ ushort_t bv_minor;
+};
+/* Current version number of filter architecture. */
+#define BPF_MAJOR_VERSION 1
+#define BPF_MINOR_VERSION 1
+
+/*
+ * BPF ioctls
+ *
+ * The first set is for compatibility with Sun's pcc style
+ * header files. If your using gcc, we assume that you
+ * have run fixincludes so the latter set should work.
+ */
+#define BIOCGBLEN _IOR('B', 102, uint_t)
+#define BIOCSBLEN _IOWR('B', 102, uint_t)
+#define BIOCSETF _IOW('B', 103, struct bpf_program)
+#define BIOCFLUSH _IO('B', 104)
+#define BIOCPROMISC _IO('B', 105)
+#define BIOCGDLT _IOR('B', 106, uint_t)
+#define BIOCGETIF _IOR('B', 107, struct ifreq)
+#define BIOCGETLIF _IOR('B', 107, struct lifreq)
+#define BIOCSETIF _IOW('B', 108, struct ifreq)
+#define BIOCSETLIF _IOW('B', 108, struct lifreq)
+#define BIOCGSTATS _IOR('B', 111, struct bpf_stat)
+#define BIOCGSTATSOLD _IOR('B', 111, struct bpf_stat_old)
+#define BIOCIMMEDIATE _IOW('B', 112, uint_t)
+#define BIOCVERSION _IOR('B', 113, struct bpf_version)
+#define BIOCSTCPF _IOW('B', 114, struct bpf_program)
+#define BIOCSUDPF _IOW('B', 115, struct bpf_program)
+#define BIOCGHDRCMPLT _IOR('B', 116, uint_t)
+#define BIOCSHDRCMPLT _IOW('B', 117, uint_t)
+#define BIOCSDLT _IOW('B', 118, uint_t)
+#define BIOCGDLTLIST _IOWR('B', 119, struct bpf_dltlist)
+#define BIOCGSEESENT _IOR('B', 120, uint_t)
+#define BIOCSSEESENT _IOW('B', 121, uint_t)
+#define BIOCSRTIMEOUT _IOW('B', 122, struct timeval)
+#define BIOCGRTIMEOUT _IOR('B', 123, struct timeval)
+/*
+ */
+#define BIOCSETF32 _IOW('B', 103, struct bpf_program32)
+#define BIOCGDLTLIST32 _IOWR('B', 119, struct bpf_dltlist32)
+#define BIOCSRTIMEOUT32 _IOW('B', 122, struct timeval32)
+#define BIOCGRTIMEOUT32 _IOR('B', 123, struct timeval32)
+
+/*
+ * Structure prepended to each packet. This is "wire" format, so we
+ * cannot change it unfortunately to 64 bit times on 32 bit systems [yet].
+ */
+struct bpf_timeval {
+ int32_t tv_sec;
+ int32_t tv_usec;
+};
+
+struct bpf_hdr {
+ struct bpf_timeval bh_tstamp; /* time stamp */
+ uint32_t bh_caplen; /* length of captured portion */
+ uint32_t bh_datalen; /* original length of packet */
+ uint16_t bh_hdrlen; /* length of bpf header (this struct */
+ /* plus alignment padding) */
+};
+/*
+ * Because the structure above is not a multiple of 4 bytes, some compilers
+ * will insist on inserting padding; hence, sizeof(struct bpf_hdr) won't work.
+ * Only the kernel needs to know about it; applications use bh_hdrlen.
+ * XXX To save a few bytes on 32-bit machines, we avoid end-of-struct
+ * XXX padding by using the size of the header data elements. This is
+ * XXX fail-safe: on new machines, we just use the 'safe' sizeof.
+ */
+#ifdef _KERNEL
+#if defined(__arm32__) || defined(__i386__) || defined(__m68k__) || \
+ defined(__mips__) || defined(__ns32k__) || defined(__vax__) || \
+ defined(__sh__) || (defined(__sparc__) && !defined(__sparc64__))
+#define SIZEOF_BPF_HDR 18
+#else
+#define SIZEOF_BPF_HDR sizeof (struct bpf_hdr)
+#endif
+#endif
+
+/* Pull in data-link level type codes. */
+#include <net/dlt.h>
+
+/*
+ * The instruction encodings.
+ */
+/* instruction classes */
+#define BPF_CLASS(code) ((code) & 0x07)
+#define BPF_LD 0x00
+#define BPF_LDX 0x01
+#define BPF_ST 0x02
+#define BPF_STX 0x03
+#define BPF_ALU 0x04
+#define BPF_JMP 0x05
+#define BPF_RET 0x06
+#define BPF_MISC 0x07
+
+/* ld/ldx fields */
+#define BPF_SIZE(code) ((code) & 0x18)
+#define BPF_W 0x00
+#define BPF_H 0x08
+#define BPF_B 0x10
+#define BPF_MODE(code) ((code) & 0xe0)
+#define BPF_IMM 0x00
+#define BPF_ABS 0x20
+#define BPF_IND 0x40
+#define BPF_MEM 0x60
+#define BPF_LEN 0x80
+#define BPF_MSH 0xa0
+
+/* alu/jmp fields */
+#define BPF_OP(code) ((code) & 0xf0)
+#define BPF_ADD 0x00
+#define BPF_SUB 0x10
+#define BPF_MUL 0x20
+#define BPF_DIV 0x30
+#define BPF_OR 0x40
+#define BPF_AND 0x50
+#define BPF_LSH 0x60
+#define BPF_RSH 0x70
+#define BPF_NEG 0x80
+#define BPF_JA 0x00
+#define BPF_JEQ 0x10
+#define BPF_JGT 0x20
+#define BPF_JGE 0x30
+#define BPF_JSET 0x40
+#define BPF_SRC(code) ((code) & 0x08)
+#define BPF_K 0x00
+#define BPF_X 0x08
+
+/* ret - BPF_K and BPF_X also apply */
+#define BPF_RVAL(code) ((code) & 0x18)
+#define BPF_A 0x10
+
+/* misc */
+#define BPF_MISCOP(code) ((code) & 0xf8)
+#define BPF_TAX 0x00
+#define BPF_TXA 0x80
+
+/*
+ * The instruction data structure.
+ */
+struct bpf_insn {
+ uint16_t code;
+ uint8_t jt;
+ uint8_t jf;
+ uint32_t k;
+};
+
+/*
+ * Macros for insn array initializers.
+ */
+#define BPF_STMT(code, k) { (uint16_t)(code), 0, 0, k }
+#define BPF_JUMP(code, k, jt, jf) { (uint16_t)(code), jt, jf, k }
+
+/*
+ * Structure to retrieve available DLTs for the interface.
+ */
+struct bpf_dltlist {
+ uint_t bfl_len; /* number of bfd_list array */
+ uint_t *bfl_list; /* array of DLTs */
+};
+struct bpf_dltlist32 {
+ uint_t bfl_len;
+ caddr32_t bfl_list;
+};
+
+#ifdef _KERNEL
+#include <sys/mac.h>
+#include <sys/dls_impl.h>
+
+typedef void (*bpf_itap_fn_t)(void *, mblk_t *, boolean_t, uint_t);
+
+extern void bpfattach(uintptr_t, int, zoneid_t, int);
+extern void bpfdetach(uintptr_t);
+extern uint_t bpf_filter(struct bpf_insn *, uchar_t *, uint_t, uint_t);
+extern void bpf_itap(void *, mblk_t *, boolean_t, uint_t);
+extern void bpf_mtap(void *, mac_resource_handle_t, mblk_t *, boolean_t);
+extern int bpf_validate(struct bpf_insn *, int);
+
+#endif /* _KERNEL */
+
+/*
+ * Number of scratch memory words (for BPF_LD|BPF_MEM and BPF_ST).
+ */
+#define BPF_MEMWORDS 16
+
+#endif /* !_NET_BPF_H_ */
diff --git a/usr/src/uts/common/io/bpf/net/bpfdesc.h b/usr/src/uts/common/io/bpf/net/bpfdesc.h
new file mode 100644
index 0000000000..c1591aa4c4
--- /dev/null
+++ b/usr/src/uts/common/io/bpf/net/bpfdesc.h
@@ -0,0 +1,235 @@
+/* $NetBSD: bpfdesc.h,v 1.29 2009/03/14 14:46:10 dsl Exp $ */
+
+/*
+ * Copyright (c) 1990, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from the Stanford/CMU enet packet filter,
+ * (net/enet.c) distributed as part of 4.3BSD, and code contributed
+ * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
+ * Berkeley Laboratory.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)bpfdesc.h 8.1 (Berkeley) 6/10/93
+ *
+ * @(#) Header: bpfdesc.h,v 1.14 96/06/16 22:28:07 leres Exp (LBL)
+ */
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _NET_BPFDESC_H_
+#define _NET_BPFDESC_H_
+
+#include <net/if.h> /* for IFNAMSIZ */
+#include <sys/mutex.h>
+#include <sys/condvar.h>
+#include <sys/queue.h>
+
+/*
+ * Descriptor associated with each open bpf file.
+ */
+struct bpf_d {
+ LIST_ENTRY(bpf_d) bd_list; /* List of bpf_d */
+ LIST_ENTRY(bpf_d) bd_next; /* List attaced to bif_if */
+ /*
+ * Buffer slots: two mbuf clusters buffer the incoming packets.
+ * The model has three slots. Sbuf is always occupied.
+ * sbuf (store) - Receive interrupt puts packets here.
+ * hbuf (hold) - When sbuf is full, put cluster here and
+ * wakeup read (replace sbuf with fbuf).
+ * fbuf (free) - When read is done, put cluster here.
+ * On receiving, if sbuf is full and fbuf is 0, packet is dropped.
+ */
+ void * bd_sbuf; /* store slot */
+ void * bd_hbuf; /* hold slot */
+ void * bd_fbuf; /* free slot */
+ int bd_slen; /* current length of store buffer */
+ int bd_hlen; /* current length of hold buffer */
+
+ int bd_bufsize; /* absolute length of buffers */
+
+ struct bpf_if *bd_bif; /* interface descriptor */
+ ulong_t bd_rtout; /* Read timeout in 'ticks' */
+ struct bpf_insn *bd_filter; /* filter code */
+ size_t bd_filter_size;
+ ulong_t bd_rcount; /* number of packets received */
+ ulong_t bd_dcount; /* number of packets dropped */
+ ulong_t bd_ccount; /* number of packets captured */
+
+ uchar_t bd_promisc; /* true if listening promiscuously */
+ uchar_t bd_state; /* idle, waiting, or timed out */
+ uchar_t bd_immediate; /* true to return on packet arrival */
+ int bd_hdrcmplt; /* false to fill in src lladdr */
+ int bd_seesent; /* true if bpf should see sent pkts */
+ int bd_async; /* non-zero if packet reception .. */
+ /* .. should generate signal */
+ int bd_nonblock; /* non-zero for non-blocking read */
+ pid_t bd_pgid; /* process or group id for signal */
+ int bd_timedout;
+ struct pollhead bd_poll;
+ timeout_id_t bd_callout; /* for BPF timeouts with select */
+ pid_t bd_pid; /* corresponding PID */
+ void *bd_sih; /* soft interrupt handle */
+ /*
+ * Solaris specific bits after this.
+ */
+ kmutex_t bd_lock;
+ kcondvar_t bd_wait;
+ uintptr_t bd_mcip; /* Where mac_client_handle_t gets put */
+ uintptr_t bd_promisc_handle;
+ minor_t bd_dev; /* device number for this handle */
+ int bd_fmode; /* flags from bpfopen */
+ zoneid_t bd_zone; /* zoneid of the opening process */
+ int bd_inuse;
+ int bd_waiting;
+ /*
+ * bd_promisc_flags is used to store the promiscuous state of the
+ * the interface in BPF so that the correct mode of operation can
+ * be kept across changing DLT or network interface.
+ */
+ int bd_promisc_flags;
+};
+
+
+/* Values for bd_state */
+#define BPF_IDLE 0 /* no select in progress */
+#define BPF_WAITING 1 /* waiting for read timeout in select */
+#define BPF_TIMED_OUT 2 /* read timeout has expired in select */
+
+/*
+ * Description associated with the external representation of each
+ * open bpf file.
+ */
+struct bpf_d_ext {
+ int32_t bde_bufsize;
+ uint8_t bde_promisc;
+ uint8_t bde_state;
+ uint8_t bde_immediate;
+ int32_t bde_hdrcmplt;
+ int32_t bde_seesent;
+ pid_t bde_pid;
+ uint64_t bde_rcount; /* number of packets received */
+ uint64_t bde_dcount; /* number of packets dropped */
+ uint64_t bde_ccount; /* number of packets captured */
+ char bde_ifname[IFNAMSIZ];
+};
+
+/*
+ * Access to "layer 2" networking is provided through each such provider
+ * delcaring a set of functions to use in the structure below. It has been
+ * modeled around what's required to use the mac layer. All of the functions
+ * below must be declared, even if only filled by a stub function.
+ */
+typedef struct bpf_provider_s {
+ int bpr_unit;
+ int (*bpr_open)(const char *, uintptr_t *, zoneid_t);
+ void (*bpr_close)(uintptr_t);
+ const char *(*bpr_name)(uintptr_t);
+ int (*bpr_type)(uintptr_t);
+ void (*bpr_sdu_get)(uintptr_t, uint_t *);
+ int (*bpr_tx)(uintptr_t, mblk_t *);
+ uintptr_t (*bpr_promisc_add)(uintptr_t, int, void *, uintptr_t *,
+ int);
+ void (*bpr_promisc_remove)(uintptr_t);
+ int (*bpr_getlinkid)(const char *, datalink_id_t *,
+ zoneid_t);
+ void (*bpr_client_close)(uintptr_t);
+ const char *(*bpr_client_name)(uintptr_t);
+ int (*bpr_client_open)(uintptr_t, uintptr_t *);
+} bpf_provider_t;
+
+typedef struct bpf_provider_list {
+ LIST_ENTRY(bpf_provider_list) bpl_next;
+ bpf_provider_t *bpl_what;
+} bpf_provider_list_t;
+
+/*
+ * The bpr_field from bpf_provider_t expects an integer that comes from
+ * the list of defines below.
+ */
+#define BPR_MAC 1
+#define BPR_IPNET 2
+
+#define MBPF_OPEN(_m, _n, _p, _z) (_m)->bpr_open(_n, (uintptr_t *)_p, _z)
+#define MBPF_CLOSE(_m, _h) (_m)->bpr_close(_h)
+#define MBPF_NAME(_m, _h) (_m)->bpr_name(_h)
+#define MBPF_TYPE(_m, _h) (_m)->bpr_type(_h)
+#define MBPF_SDU_GET(_m, _h, _p) (_m)->bpr_sdu_get(_h, _p)
+#define MBPF_TX(_m, _h, _pkt) (_m)->bpr_tx(_h, _pkt)
+#define MBPF_PROMISC_ADD(_m, _h, _o, _d, _p, _f) \
+ (_m)->bpr_promisc_add(_h, _o, _d, _p, _f)
+#define MBPF_PROMISC_REMOVE(_m, _h) (_m)->bpr_promisc_remove(_h)
+#define MBPF_GET_LINKID(_m, _n, _ip, _z) \
+ (_m)->bpr_getlinkid(_n, _ip, _z)
+#define MBPF_CLIENT_CLOSE(_m, _h) (_m)->bpr_client_close(_h)
+#define MBPF_CLIENT_NAME(_m, _h) (_m)->bpr_client_name(_h)
+#define MBPF_CLIENT_OPEN(_m, _h, _p) (_m)->bpr_client_open((uintptr_t)_h, \
+ (uintptr_t *)_p)
+
+/*
+ * Descriptor associated with each attached hardware interface.
+ */
+struct bpf_if {
+ TAILQ_ENTRY(bpf_if) bif_next; /* list of all interfaces */
+ LIST_HEAD(, bpf_d) bif_dlist; /* list of all descriptors att'd */
+ uint_t bif_dlt; /* link layer type */
+ uint_t bif_hdrlen; /* length of header (with padding) */
+ /*
+ * Solaris specific bits after this.
+ */
+ uintptr_t bif_ifp; /* correspoding interface */
+ datalink_id_t bif_linkid;
+ kmutex_t bif_lock;
+ zoneid_t bif_zoneid; /* zone that the interface is in */
+ int bif_inuse;
+ bpf_provider_t bif_mac;
+ char bif_ifname[LIFNAMSIZ+1];
+};
+
+#ifdef _KERNEL
+typedef struct bpf_kstats_s {
+ kstat_named_t kp_read_wait;
+ kstat_named_t kp_write_ok;
+ kstat_named_t kp_write_error;
+ kstat_named_t kp_receive;
+ kstat_named_t kp_capture;
+ kstat_named_t kp_dropped;
+} bpf_kstats_t;
+
+int bpf_setf(struct bpf_d *, struct bpf_program *);
+#endif
+
+typedef void (*bpf_attach_fn_t)(uintptr_t, int, zoneid_t, int);
+typedef void (*bpf_detach_fn_t)(uintptr_t);
+typedef int (*bpf_provider_reg_fn_t)(bpf_provider_t *);
+
+extern bpf_provider_t *bpf_find_provider_by_id(int);
+extern void bpf_open_zone(const zoneid_t);
+extern int bpf_provider_tickle(char *, zoneid_t);
+
+#endif /* !_NET_BPFDESC_H_ */
diff --git a/usr/src/uts/common/io/bpf/net/dlt.h b/usr/src/uts/common/io/bpf/net/dlt.h
new file mode 100644
index 0000000000..a6aa18fa1a
--- /dev/null
+++ b/usr/src/uts/common/io/bpf/net/dlt.h
@@ -0,0 +1,170 @@
+/* $NetBSD: dlt.h,v 1.11 2006/02/27 14:22:26 drochner Exp $ */
+
+/*
+ * Copyright (c) 1990, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from the Stanford/CMU enet packet filter,
+ * (net/enet.c) distributed as part of 4.3BSD, and code contributed
+ * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
+ * Berkeley Laboratory.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)bpf.h 8.2 (Berkeley) 1/9/95
+ * @(#) Header: bpf.h,v 1.36 97/06/12 14:29:53 leres Exp (LBL)
+ */
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _NET_DLT_H_
+#define _NET_DLT_H_
+
+/*
+ * Data-link level type codes.
+ */
+#define DLT_NULL 0 /* no link-layer encapsulation */
+#define DLT_EN10MB 1 /* Ethernet (10Mb) */
+#define DLT_EN3MB 2 /* Experimental Ethernet (3Mb) */
+#define DLT_AX25 3 /* Amateur Radio AX.25 */
+#define DLT_PRONET 4 /* Proteon ProNET Token Ring */
+#define DLT_CHAOS 5 /* Chaos */
+#define DLT_IEEE802 6 /* IEEE 802 Networks */
+#define DLT_ARCNET 7 /* ARCNET */
+#define DLT_SLIP 8 /* Serial Line IP */
+#define DLT_PPP 9 /* Point-to-point Protocol */
+#define DLT_FDDI 10 /* FDDI */
+#define DLT_ATM_RFC1483 11 /* LLC/SNAP encapsulated atm */
+#define DLT_RAW 12 /* raw IP */
+#define DLT_SLIP_BSDOS 13 /* BSD/OS Serial Line IP */
+#define DLT_PPP_BSDOS 14 /* BSD/OS Point-to-point Protocol */
+#define DLT_HIPPI 15 /* HIPPI */
+#define DLT_HDLC 16 /* HDLC framing */
+
+#define DLT_PFSYNC 18 /* Packet filter state syncing */
+#define DLT_ATM_CLIP 19 /* Linux Classical-IP over ATM */
+#define DLT_ENC 109 /* Encapsulated packets for IPsec */
+#define DLT_LINUX_SLL 113 /* Linux cooked sockets */
+#define DLT_LTALK 114 /* Apple LocalTalk hardware */
+#define DLT_PFLOG 117 /* Packet filter logging, by pcap people */
+#define DLT_CISCO_IOS 118 /* Registered for Cisco-internal use */
+
+/* Axent Raptor / Symantec Enterprise Firewall */
+#define DLT_SYMANTEC_FIREWALL 99
+
+#define DLT_C_HDLC 104 /* Cisco HDLC */
+#define DLT_IEEE802_11 105 /* IEEE 802.11 wireless */
+#define DLT_FRELAY 107 /* Frame Relay */
+#define DLT_LOOP 108 /* OpenBSD DLT_LOOP */
+#define DLT_ECONET 115 /* Acorn Econet */
+#define DLT_PRISM_HEADER 119 /* 802.11 header plus Prism II info. */
+#define DLT_AIRONET_HEADER 120 /* 802.11 header plus Aironet info. */
+#define DLT_HHDLC 121 /* Reserved for Siemens HiPath HDLC */
+#define DLT_IP_OVER_FC 122 /* RFC 2625 IP-over-Fibre Channel */
+#define DLT_SUNATM 123 /* Solaris+SunATM */
+#define DLT_RIO 124 /* RapidIO */
+#define DLT_PCI_EXP 125 /* PCI Express */
+#define DLT_AURORA 126 /* Xilinx Aurora link layer */
+#define DLT_IEEE802_11_RADIO 127 /* 802.11 header plus radio info. */
+#define DLT_TZSP 128 /* Tazmen Sniffer Protocol */
+#define DLT_ARCNET_LINUX 129 /* ARCNET */
+#define DLT_JUNIPER_MLPPP 130 /* Juniper-private data link types. */
+#define DLT_JUNIPER_MLFR 131
+#define DLT_JUNIPER_ES 132
+#define DLT_JUNIPER_GGSN 133
+#define DLT_JUNIPER_MFR 134
+#define DLT_JUNIPER_ATM2 135
+#define DLT_JUNIPER_SERVICES 136
+#define DLT_JUNIPER_ATM1 137
+#define DLT_APPLE_IP_OVER_IEEE1394 138 /* Apple IP-over-IEEE 1394 */
+
+/* Various SS7 encapsulations */
+#define DLT_MTP2_WITH_PHDR 139 /* pseudo-header with various info, */
+ /* followed by MTP2 */
+#define DLT_MTP2 140 /* MTP2, no pseudo-header */
+#define DLT_MTP3 141 /* MTP3, no pseudo-header or MTP2 */
+#define DLT_SCCP 142 /* SCCP, no pseudo-header or MTP2 */
+ /* or MTP3 */
+
+#define DLT_DOCSIS 143 /* Reserved for DOCSIS MAC frames. */
+#define DLT_LINUX_IRDA 144 /* Linux-IrDA packets */
+
+/* Reserved for IBM SP switch and IBM Next Federation switch. */
+#define DLT_IBM_SP 145
+#define DLT_IBM_SN 146
+
+#define DLT_IEEE802_11_RADIO_AVS 163 /* 802.11 plus AVS header */
+#define DLT_JUNIPER_MONITOR 164 /* Juniper-private data link type */
+#define DLT_BACNET_MS_TP 165
+#define DLT_PPP_PPPD 166 /* Another PPP variant (Linux? */
+
+#define DLT_JUNIPER_PPPOE 167
+#define DLT_JUNIPER_PPPOE_ATM 168
+#define DLT_JUNIPER_PIC_PEER 174
+#define DLT_JUNIPER_ETHER 178
+#define DLT_JUNIPER_PPP 179
+#define DLT_JUNIPER_FRELAY 180
+#define DLT_JUNIPER_CHDLC 181
+
+#define DLT_GPRS_LLC 169 /* GPRS LLC */
+#define DLT_GPF_T 170 /* GPF-T (ITU-T G.7041/Y.1303) */
+#define DLT_GPF_F 171 /* GPF-F (ITU-T G.7041/Y.1303) */
+
+#define DLT_GCOM_T1E1 172
+#define DLT_GCOM_SERIAL 173
+
+/* "EndaceRecordFormat" */
+#define DLT_ERF_ETH 175 /* Ethernet */
+#define DLT_ERF_POS 176 /* Packet-over-SONET */
+
+#define DLT_LINUX_LAPD 177 /* Raw LAPD for vISDN */
+
+#define DLT_IPNET 226 /* MAC client view on Solaris */
+/*
+ * A number reserved for private user use is currently assigned, pending
+ * a real one from tcpdump.org. A description of the link layer frame
+ * is a requisite for this.
+ */
+#define DLT_IPOIB 162 /* Infiniband (IPoIB) on Solaris */
+
+/*
+ * NetBSD-specific generic "raw" link type. The upper 16-bits indicate
+ * that this is the generic raw type, and the lower 16-bits are the
+ * address family we're dealing with.
+ */
+#define DLT_RAWAF_MASK 0x02240000
+#define DLT_RAWAF(af) (DLT_RAWAF_MASK | (af))
+#define DLT_RAWAF_AF(x) ((x) & 0x0000ffff)
+#define DLT_IS_RAWAF(x) (((x) & 0xffff0000) == DLT_RAWAF_MASK)
+
+/*
+ * Solaris specific function to map DLPI DL_ data link types to BPF DLT_
+ */
+extern int bpf_dl_to_dlt(int);
+extern int bpf_dl_hdrsize(int);
+
+#endif /* !_NET_DLT_H_ */
diff --git a/usr/src/uts/common/io/dls/dls_link.c b/usr/src/uts/common/io/dls/dls_link.c
index a4d82022ee..8dc086d900 100644
--- a/usr/src/uts/common/io/dls/dls_link.c
+++ b/usr/src/uts/common/io/dls/dls_link.c
@@ -35,6 +35,8 @@
#include <sys/sdt.h>
#include <sys/atomic.h>
+static void dls_bpf_newzone(dls_link_t *dlp, zoneid_t zid);
+
static kmem_cache_t *i_dls_link_cachep;
mod_hash_t *i_dls_link_hash;
static uint_t i_dls_link_count;
@@ -866,6 +868,7 @@ dls_link_setzid(const char *name, zoneid_t zid)
goto done;
}
+ dls_bpf_newzone(dlp, zid);
dlp->dl_zid = zid;
if (zid == GLOBAL_ZONEID) {
@@ -888,6 +891,41 @@ done:
return (err);
}
+
+/*
+ * When a NIC changes zone, that change needs to be communicated to BPF
+ * so that it can correctly enforce access rights on it via BPF. In the
+ * absence of a function from BPF to just change the zoneid, this is
+ * done with a detach followed by an attach.
+ */
+static void
+dls_bpf_newzone(dls_link_t *dlp, zoneid_t zid)
+{
+ if (dls_bpfdetach_fn != NULL)
+ dls_bpfdetach_fn((uintptr_t)dlp->dl_mh);
+
+ if (dls_bpfattach_fn != NULL)
+ dls_bpfattach_fn((uintptr_t)dlp->dl_mh, mac_type(dlp->dl_mh),
+ zid, BPR_MAC);
+}
+
+int
+dls_link_getzid(const char *name, zoneid_t *zidp)
+{
+ dls_link_t *dlp;
+ int err = 0;
+
+ if ((err = dls_link_hold(name, &dlp)) != 0)
+ return (err);
+
+ ASSERT(MAC_PERIM_HELD(dlp->dl_mh));
+
+ *zidp = dlp->dl_zid;
+
+ dls_link_rele(dlp);
+ return (0);
+}
+
void
dls_link_add(dls_link_t *dlp, uint32_t sap, dld_str_t *dsp)
{
diff --git a/usr/src/uts/common/io/dls/dls_mgmt.c b/usr/src/uts/common/io/dls/dls_mgmt.c
index cfe3294251..ca661dbbec 100644
--- a/usr/src/uts/common/io/dls/dls_mgmt.c
+++ b/usr/src/uts/common/io/dls/dls_mgmt.c
@@ -60,6 +60,8 @@ static krwlock_t i_dls_devnet_lock;
static mod_hash_t *i_dls_devnet_id_hash;
static mod_hash_t *i_dls_devnet_hash;
+bpf_attach_fn_t dls_bpfattach_fn = NULL;
+bpf_detach_fn_t dls_bpfdetach_fn = NULL;
boolean_t devnet_need_rebuild;
#define VLAN_HASHSZ 67 /* prime */
@@ -1217,7 +1219,6 @@ dls_devnet_macname2linkid(const char *macname, datalink_id_t *linkidp)
return (0);
}
-
/*
* Get linkid for the given dev.
*/
@@ -1656,6 +1657,19 @@ dls_devnet_create(mac_handle_t mh, datalink_id_t linkid, zoneid_t zoneid)
return (err);
}
}
+ /*
+ * Tell BPF it is here, if BPF is there
+ */
+ if (dls_bpfattach_fn != NULL) {
+ /*
+ * The zoneid is passed in explicitly to prevent the need to
+ * do a lookup in dls using the linkid. Such a lookup would need
+ * to use the same hash table that gets used for walking when
+ * dls_set_bpfattach() is called.
+ */
+ dls_bpfattach_fn((uintptr_t)mh, mac_type(mh),
+ dlp->dl_zid, BPR_MAC);
+ }
mac_perim_exit(mph);
return (err);
}
@@ -1684,6 +1698,12 @@ dls_devnet_destroy(mac_handle_t mh, datalink_id_t *idp, boolean_t wait)
if (err != 0 && err != ENOENT)
return (err);
+ /*
+ * Tell BPF that the link is going away, if BPF is there.
+ */
+ if (dls_bpfdetach_fn != NULL)
+ dls_bpfdetach_fn((uintptr_t)mh);
+
mac_perim_enter_by_mh(mh, &mph);
err = dls_link_rele_by_name(mac_name(mh));
mac_perim_exit(mph);
@@ -1781,3 +1801,36 @@ dls_devnet_linkid(dls_dl_handle_t ddh)
{
return (ddh->dd_linkid);
}
+
+/*ARGSUSED*/
+static uint_t
+i_dls_bpfattach_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg)
+{
+ dls_link_t *dlp = (dls_link_t *)val;
+
+ dls_bpfattach_fn((uintptr_t)dlp->dl_mh, mac_type(dlp->dl_mh),
+ dlp->dl_zid, BPR_MAC);
+
+ return (MH_WALK_CONTINUE);
+}
+
+/*
+ * Set the functions to call back to when adding or removing a mac so that
+ * BPF can keep its internal list of these up to date.
+ */
+void
+dls_set_bpfattach(bpf_attach_fn_t attach, bpf_detach_fn_t detach)
+{
+ bpf_attach_fn_t old = dls_bpfattach_fn;
+
+ dls_bpfattach_fn = attach;
+ dls_bpfdetach_fn = detach;
+
+ /*
+ * If we're setting a new attach function, call it for every
+ * mac that has already been attached.
+ */
+ if (attach != NULL && old == NULL) {
+ mod_hash_walk(i_dls_link_hash, i_dls_bpfattach_walker, NULL);
+ }
+}
diff --git a/usr/src/uts/common/io/mac/mac_client.c b/usr/src/uts/common/io/mac/mac_client.c
index 62976337b4..77196c01a6 100644
--- a/usr/src/uts/common/io/mac/mac_client.c
+++ b/usr/src/uts/common/io/mac/mac_client.c
@@ -422,6 +422,12 @@ mac_name(mac_handle_t mh)
return (((mac_impl_t *)mh)->mi_name);
}
+int
+mac_type(mac_handle_t mh)
+{
+ return (((mac_impl_t *)mh)->mi_type->mt_type);
+}
+
char *
mac_client_name(mac_client_handle_t mch)
{
@@ -2647,6 +2653,7 @@ mac_promisc_add(mac_client_handle_t mch, mac_client_promisc_type_t type,
mpip->mpi_no_phys = ((flags & MAC_PROMISC_FLAGS_NO_PHYS) != 0);
mpip->mpi_strip_vlan_tag =
((flags & MAC_PROMISC_FLAGS_VLAN_TAG_STRIP) != 0);
+ mpip->mpi_no_copy = ((flags & MAC_PROMISC_FLAGS_NO_COPY) != 0);
mcbi = &mip->mi_promisc_cb_info;
mutex_enter(mcbi->mcbi_lockp);
@@ -2823,6 +2830,17 @@ mac_tx(mac_client_handle_t mch, mblk_t *mp_chain, uintptr_t hint,
}
srs = flent->fe_tx_srs;
+ /*
+ * This is to avoid panics with PF_PACKET that can call mac_tx()
+ * against an interface that is not capable of sending. A rewrite
+ * of the mac datapath is required to remove this limitation.
+ */
+ if (srs == NULL) {
+ if (!(flag & MAC_TX_NO_HOLD))
+ MAC_TX_RELE(mcip, mytx);
+ freemsgchain(mp_chain);
+ return (NULL);
+ }
srs_tx = &srs->srs_tx;
if (srs_tx->st_mode == SRS_TX_DEFAULT &&
(srs->srs_state & SRS_ENQUEUED) == 0 &&
@@ -3254,18 +3272,28 @@ static void
mac_promisc_dispatch_one(mac_promisc_impl_t *mpip, mblk_t *mp,
boolean_t loopback)
{
- mblk_t *mp_copy;
+ mblk_t *mp_copy, *mp_next;
- mp_copy = copymsg(mp);
- if (mp_copy == NULL)
- return;
- mp_copy->b_next = NULL;
-
- if (mpip->mpi_strip_vlan_tag) {
- if ((mp_copy = mac_strip_vlan_tag_chain(mp_copy)) == NULL)
+ if (!mpip->mpi_no_copy || mpip->mpi_strip_vlan_tag) {
+ mp_copy = copymsg(mp);
+ if (mp_copy == NULL)
return;
+
+ if (mpip->mpi_strip_vlan_tag) {
+ mp_copy = mac_strip_vlan_tag_chain(mp_copy);
+ if (mp_copy == NULL)
+ return;
+ }
+ mp_next = NULL;
+ } else {
+ mp_copy = mp;
+ mp_next = mp->b_next;
}
+ mp_copy->b_next = NULL;
+
mpip->mpi_fn(mpip->mpi_arg, NULL, mp_copy, loopback);
+ if (mp_copy == mp)
+ mp->b_next = mp_next;
}
/*
diff --git a/usr/src/uts/common/net/if.h b/usr/src/uts/common/net/if.h
index cab4f6ba60..49a71d5d12 100644
--- a/usr/src/uts/common/net/if.h
+++ b/usr/src/uts/common/net/if.h
@@ -436,6 +436,7 @@ struct ifreq {
char ifru_oname[IFNAMSIZ]; /* other if name */
struct sockaddr ifru_broadaddr;
int ifru_index; /* interface index */
+ uint_t ifru_mtu;
short ifru_flags;
int ifru_metric;
char ifru_data[1]; /* interface dependent data */
@@ -487,6 +488,7 @@ struct ifreq {
#define ifr_data ifr_ifru.ifru_data /* for use by interface */
#define ifr_enaddr ifr_ifru.ifru_enaddr /* ethernet address */
#define ifr_index ifr_ifru.ifru_index /* interface index */
+#define ifr_mtu ifr_ifru.ifru_mtu /* mtu */
/* For setting ppa */
#define ifr_ppa ifr_ifru.ifru_ppaflags.ifrup_ppa
diff --git a/usr/src/uts/common/os/netstack.c b/usr/src/uts/common/os/netstack.c
index 0e23043859..b8467fbe13 100644
--- a/usr/src/uts/common/os/netstack.c
+++ b/usr/src/uts/common/os/netstack.c
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -1279,6 +1279,12 @@ zoneid_to_netstackid(zoneid_t zoneid)
return (zoneid);
}
+zoneid_t
+netstack_get_zoneid(netstack_t *ns)
+{
+ return (netstackid_to_zoneid(ns->netstack_stackid));
+}
+
/*
* Simplistic support for walking all the handles.
* Example usage:
diff --git a/usr/src/uts/common/os/policy.c b/usr/src/uts/common/os/policy.c
index 037e745f84..39edfbb5ad 100644
--- a/usr/src/uts/common/os/policy.c
+++ b/usr/src/uts/common/os/policy.c
@@ -1679,6 +1679,12 @@ secpolicy_net_rawaccess(const cred_t *cr)
return (PRIV_POLICY(cr, PRIV_NET_RAWACCESS, B_FALSE, EACCES, NULL));
}
+int
+secpolicy_net_observability(const cred_t *cr)
+{
+ return (PRIV_POLICY(cr, PRIV_NET_OBSERVABILITY, B_FALSE, EACCES, NULL));
+}
+
/*
* Need this privilege for accessing the ICMP device
*/
diff --git a/usr/src/uts/common/sys/dlpi.h b/usr/src/uts/common/sys/dlpi.h
index 7be621bfd8..8b0681e2d8 100644
--- a/usr/src/uts/common/sys/dlpi.h
+++ b/usr/src/uts/common/sys/dlpi.h
@@ -57,11 +57,13 @@ extern "C" {
typedef struct dl_ipnetinfo {
uint8_t dli_version; /* DL_IPNETINFO_* version */
- uint8_t dli_ipver; /* packet IP header version */
- uint16_t dli_len; /* length of dl_ipnetinfo_t */
- uint32_t dli_pad; /* alignment pad */
- uint64_t dli_srczone; /* packet source zone ID (if any) */
- uint64_t dli_dstzone; /* packet dest zone ID (if any) */
+ uint8_t dli_family; /* packet IP header version */
+ uint16_t dli_htype;
+ uint32_t dli_pktlen; /* length of dl_ipnetinfo_t */
+ uint32_t dli_ifindex;
+ uint32_t dli_grifindex;
+ uint32_t dli_zsrc; /* packet source zone ID (if any) */
+ uint32_t dli_zdst; /* packet dest zone ID (if any) */
} dl_ipnetinfo_t;
/*
diff --git a/usr/src/uts/common/sys/dls_impl.h b/usr/src/uts/common/sys/dls_impl.h
index 36065e8735..19d885e821 100644
--- a/usr/src/uts/common/sys/dls_impl.h
+++ b/usr/src/uts/common/sys/dls_impl.h
@@ -34,6 +34,8 @@
#include <sys/modhash.h>
#include <sys/kstat.h>
#include <net/if.h>
+#include <net/bpf.h>
+#include <net/bpfdesc.h>
#include <sys/dlpi.h>
#ifdef __cplusplus
@@ -84,6 +86,7 @@ extern void dls_link_add(dls_link_t *, uint32_t, dld_str_t *);
extern void dls_link_remove(dls_link_t *, dld_str_t *);
extern int dls_link_header_info(dls_link_t *, mblk_t *,
mac_header_info_t *);
+extern int dls_link_getzid(const char *, zoneid_t *);
extern int dls_link_setzid(const char *, zoneid_t);
extern dev_info_t *dls_link_devinfo(dev_t);
extern dev_t dls_link_dev(dls_link_t *);
@@ -127,6 +130,10 @@ extern void dls_mgmt_fini(void);
extern int dls_mgmt_get_phydev(datalink_id_t, dev_t *);
+extern bpf_attach_fn_t dls_bpfattach_fn;
+extern bpf_detach_fn_t dls_bpfdetach_fn;
+extern void dls_set_bpfattach(bpf_attach_fn_t, bpf_detach_fn_t);
+
#ifdef __cplusplus
}
#endif
diff --git a/usr/src/uts/common/sys/hook_event.h b/usr/src/uts/common/sys/hook_event.h
index f9f29c845b..51bc274182 100644
--- a/usr/src/uts/common/sys/hook_event.h
+++ b/usr/src/uts/common/sys/hook_event.h
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -110,6 +110,57 @@ struct hook_nic_event_int {
};
typedef struct hook_nic_event_int hook_nic_event_int_t;
+/*
+ * This structure holds the data passed back from the ip module to
+ * observability consumers.
+ *
+ * Externally exposed fields, that must match the order and size of
+ * dl_ipnetinfo_t in <sys/dlpi.h> are:
+ * hpo_version Version number for this header
+ * hpo_family Address family of the attached packet
+ * hpo_htype IPobs hook type
+ * hpo_pktlen Length of the attached packet
+ * hpo_ifindex Interface index that the packet was received/sent over.
+ * For local packets, this is the index of the interface
+ * associated with the local destination address.
+ * hpo_grifindex IPMP group interface index (zero unless ihd_ifindex
+ * is an IPMP underlying interface).
+ * hpo_zsrc Source zoneid; set to ALL_ZONES when unknown.
+ * hpo_zdst Destination zoneid; set to ALL_ZONES when unknown.
+ *
+ * Fields used internally are:
+ * hpo_pkt Pointer to the mblk_t containig this structure with
+ * the real packet found at b_cont
+ */
+typedef struct hook_pkt_observe_s {
+ uint8_t hpo_version;
+ uint8_t hpo_family;
+ uint16_t hpo_htype;
+ uint32_t hpo_pktlen;
+ uint32_t hpo_ifindex;
+ uint32_t hpo_grifindex;
+ uint32_t hpo_zsrc;
+ uint32_t hpo_zdst;
+ /*
+ * Fields used internally are below.
+ */
+ mblk_t *hpo_pkt;
+ void *hpo_ctx;
+} hook_pkt_observe_t;
+
+/*
+ * ipobs_hooktype_t describes the hook types supported
+ * by the ip module. IPOBS_HOOK_LOCAL refers to packets
+ * which are looped back internally within the ip module.
+ */
+
+typedef enum ipobs_hook_type {
+ IPOBS_HOOK_INBOUND = 0,
+ IPOBS_HOOK_OUTBOUND = 1,
+ IPOBS_HOOK_LOCAL = 2
+} ipobs_hook_type_t;
+
+
#ifdef __cplusplus
}
#endif
diff --git a/usr/src/uts/common/sys/mac.h b/usr/src/uts/common/sys/mac.h
index e7ef4cf4c8..fa03cec939 100644
--- a/usr/src/uts/common/sys/mac.h
+++ b/usr/src/uts/common/sys/mac.h
@@ -591,6 +591,8 @@ extern minor_t mac_minor_hold(boolean_t);
extern void mac_minor_rele(minor_t);
extern void mac_sdu_get(mac_handle_t, uint_t *, uint_t *);
extern int mac_maxsdu_update(mac_handle_t, uint_t);
+extern uint_t mac_addr_len(mac_handle_t);
+extern int mac_type(mac_handle_t);
extern void mac_unicst_update(mac_handle_t,
const uint8_t *);
diff --git a/usr/src/uts/common/sys/mac_client.h b/usr/src/uts/common/sys/mac_client.h
index 3452b1b71c..ad3f30aa63 100644
--- a/usr/src/uts/common/sys/mac_client.h
+++ b/usr/src/uts/common/sys/mac_client.h
@@ -102,6 +102,7 @@ typedef enum {
#define MAC_PROMISC_FLAGS_NO_TX_LOOP 0x0001
#define MAC_PROMISC_FLAGS_NO_PHYS 0x0002
#define MAC_PROMISC_FLAGS_VLAN_TAG_STRIP 0x0004
+#define MAC_PROMISC_FLAGS_NO_COPY 0x0008
/* flags passed to mac_tx() */
#define MAC_DROP_ON_NO_DESC 0x01 /* freemsg() if no tx descs */
@@ -157,8 +158,6 @@ extern void mac_addr_factory_value(mac_handle_t, int, uchar_t *, uint_t *,
char *, boolean_t *);
extern uint_t mac_addr_factory_num(mac_handle_t);
-extern uint_t mac_addr_len(mac_handle_t);
-
extern mac_tx_notify_handle_t mac_client_tx_notify(mac_client_handle_t,
mac_tx_notify_t, void *);
diff --git a/usr/src/uts/common/sys/mac_client_impl.h b/usr/src/uts/common/sys/mac_client_impl.h
index c40c09ab04..5bd36ad779 100644
--- a/usr/src/uts/common/sys/mac_client_impl.h
+++ b/usr/src/uts/common/sys/mac_client_impl.h
@@ -77,6 +77,7 @@ typedef struct mac_promisc_impl_s { /* Protected by */
boolean_t mpi_no_tx_loop; /* WO */
boolean_t mpi_no_phys; /* WO */
boolean_t mpi_strip_vlan_tag; /* WO */
+ boolean_t mpi_no_copy; /* WO */
} mac_promisc_impl_t;
typedef union mac_tx_percpu_s {
diff --git a/usr/src/uts/common/sys/neti.h b/usr/src/uts/common/sys/neti.h
index 33276bf3c0..93b5fc3e01 100644
--- a/usr/src/uts/common/sys/neti.h
+++ b/usr/src/uts/common/sys/neti.h
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -54,6 +54,7 @@ extern "C" {
#define NH_LOOPBACK_IN "LOOPBACK_IN"
#define NH_LOOPBACK_OUT "LOOPBACK_OUT"
#define NH_NIC_EVENTS "NIC_EVENTS"
+#define NH_OBSERVE "OBSERVING"
/*
* Network NIC hardware checksum capability
diff --git a/usr/src/uts/common/sys/netstack.h b/usr/src/uts/common/sys/netstack.h
index 033adcb6aa..8b13b66599 100644
--- a/usr/src/uts/common/sys/netstack.h
+++ b/usr/src/uts/common/sys/netstack.h
@@ -232,6 +232,7 @@ extern netstack_t *netstack_find_by_stackid(netstackid_t);
extern netstack_t *netstack_find_by_zoneid(zoneid_t);
extern zoneid_t netstackid_to_zoneid(netstackid_t);
+extern zoneid_t netstack_get_zoneid(netstack_t *);
extern netstackid_t zoneid_to_netstackid(zoneid_t);
extern netstack_t *netstack_get_current(void);
diff --git a/usr/src/uts/common/sys/policy.h b/usr/src/uts/common/sys/policy.h
index 59cfb2482f..4109deda85 100644
--- a/usr/src/uts/common/sys/policy.h
+++ b/usr/src/uts/common/sys/policy.h
@@ -112,6 +112,7 @@ int secpolicy_net_bindmlp(const cred_t *);
int secpolicy_net_config(const cred_t *, boolean_t);
int secpolicy_net_icmpaccess(const cred_t *);
int secpolicy_net_mac_aware(const cred_t *);
+int secpolicy_net_observability(const cred_t *);
int secpolicy_net_privaddr(const cred_t *, in_port_t, int proto);
int secpolicy_net_rawaccess(const cred_t *);
boolean_t secpolicy_net_reply_equal(const cred_t *);
diff --git a/usr/src/uts/common/sys/socket.h b/usr/src/uts/common/sys/socket.h
index cc51ec3380..bdab5880bd 100644
--- a/usr/src/uts/common/sys/socket.h
+++ b/usr/src/uts/common/sys/socket.h
@@ -118,6 +118,20 @@ typedef void *_RESTRICT_KYWD Psocklen_t;
#define SO_DGRAM_ERRIND 0x0200 /* Application wants delayed error */
#define SO_RECVUCRED 0x0400 /* Application wants ucred of sender */
+/*
+ * Socket options are passed using a signed integer, but it is also rare
+ * for more than one to ever be passed at the same time with setsockopt
+ * and only one at a time can be retrieved with getsockopt.
+ *
+ * Since the lower numbers cannot be renumbered for compatibility reasons,
+ * it would seem that we need to start a new number space (0x40000000 -
+ * 0x7fffffff) for those that don't need to be stored as a bit flag
+ * somewhere. This limits the flag options to 30 but that seems to be
+ * plenty, anyway. 0x40000000 is reserved for future use.
+ */
+#define SO_ATTACH_FILTER 0x40000001
+#define SO_DETACH_FILTER 0x40000002
+
#ifdef _KERNEL
#define SO_SND_COPYAVOID 0x0800 /* Internal: use zero-copy */
#define SO_SND_BUFINFO 0x1000 /* Internal: get buffer info */
@@ -207,6 +221,7 @@ struct linger {
#if !defined(_XPG4_2) || defined(__EXTENSIONS__)
#define SOL_ROUTE 0xfffe /* options for routing socket level */
#endif
+#define SOL_PACKET 0xfffd /* options for packet level */
/*
* Address families.
@@ -249,8 +264,9 @@ struct linger {
#define AF_POLICY 29 /* Security Policy DB socket */
#define AF_INET_OFFLOAD 30 /* Sun private; do not use */
#define AF_TRILL 31 /* TRILL interface */
+#define AF_PACKET 32 /* PF_PACKET Linux socket interface */
-#define AF_MAX 31
+#define AF_MAX 32
/*
* Protocol families, same as address families for now.
@@ -289,6 +305,7 @@ struct linger {
#define PF_POLICY AF_POLICY
#define PF_INET_OFFLOAD AF_INET_OFFLOAD /* Sun private; do not use */
#define PF_TRILL AF_TRILL
+#define PF_PACKET AF_PACKET
#define PF_MAX AF_MAX
diff --git a/usr/src/uts/common/sys/socket_impl.h b/usr/src/uts/common/sys/socket_impl.h
index 0b40451345..0eacd8a904 100644
--- a/usr/src/uts/common/sys/socket_impl.h
+++ b/usr/src/uts/common/sys/socket_impl.h
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -35,8 +34,6 @@
#ifndef _SYS_SOCKET_IMPL_H
#define _SYS_SOCKET_IMPL_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#ifdef __cplusplus
extern "C" {
#endif
@@ -105,6 +102,28 @@ struct sockaddr_storage {
};
#endif /* !defined(_XPG4_2) || defined(_XPG6) || defined(__EXTENSIONS__) */
+/*
+ * To be compatible with the Linux interfaces used, this structure is
+ * placed in socket_impl.h so that an include for <sys/socket.h> will
+ * pickup this structure. This structure is for use with PF_PACKET
+ * sockets.
+ */
+struct sockaddr_ll {
+ uint16_t sll_family;
+ uint16_t sll_protocol;
+ int32_t sll_ifindex;
+ uint16_t sll_hatype;
+ uint8_t sll_pkttype;
+ uint8_t sll_halen;
+ uint8_t sll_addr[8];
+};
+
+#define LINUX_SLL_HOST 0
+#define LINUX_SLL_BROADCAST 1
+#define LINUX_SLL_MULTICAST 2
+#define LINUX_SLL_OTHERHOST 3
+#define LINUX_SLL_OUTGOING 4
+
#ifdef __cplusplus
}
#endif
diff --git a/usr/src/uts/common/sys/socket_proto.h b/usr/src/uts/common/sys/socket_proto.h
index 84299c4cdd..56e312930b 100644
--- a/usr/src/uts/common/sys/socket_proto.h
+++ b/usr/src/uts/common/sys/socket_proto.h
@@ -135,6 +135,38 @@ typedef int (*so_proto_fallback_func_t)(sock_lower_handle_t, queue_t *,
boolean_t, so_proto_quiesced_cb_t);
/*
+ * These functions return EOPNOTSUPP and are intended for the sockfs
+ * developer that doesn't wish to supply stubs for every function themselves.
+ */
+extern int sock_accept_notsupp(sock_lower_handle_t, sock_lower_handle_t,
+ sock_upper_handle_t, cred_t *);
+extern int sock_bind_notsupp(sock_lower_handle_t, struct sockaddr *,
+ socklen_t, cred_t *);
+extern int sock_listen_notsupp(sock_lower_handle_t, int, cred_t *);
+extern int sock_connect_notsupp(sock_lower_handle_t,
+ const struct sockaddr *, socklen_t, sock_connid_t *, cred_t *);
+extern int sock_getpeername_notsupp(sock_lower_handle_t, struct sockaddr *,
+ socklen_t *, cred_t *);
+extern int sock_getsockname_notsupp(sock_lower_handle_t, struct sockaddr *,
+ socklen_t *, cred_t *);
+extern int sock_getsockopt_notsupp(sock_lower_handle_t, int, int, void *,
+ socklen_t *, cred_t *);
+extern int sock_setsockopt_notsupp(sock_lower_handle_t, int, int,
+ const void *, socklen_t, cred_t *);
+extern int sock_send_notsupp(sock_lower_handle_t, mblk_t *,
+ struct nmsghdr *, cred_t *);
+extern int sock_send_uio_notsupp(sock_lower_handle_t, uio_t *,
+ struct nmsghdr *, cred_t *);
+extern int sock_recv_uio_notsupp(sock_lower_handle_t, uio_t *,
+ struct nmsghdr *, cred_t *);
+extern short sock_poll_notsupp(sock_lower_handle_t, short, int, cred_t *);
+extern int sock_shutdown_notsupp(sock_lower_handle_t, int, cred_t *);
+extern void sock_clr_flowctrl_notsupp(sock_lower_handle_t);
+extern int sock_ioctl_notsupp(sock_lower_handle_t, int, intptr_t, int,
+ int32_t *, cred_t *);
+extern int sock_close_notsupp(sock_lower_handle_t, int, cred_t *);
+
+/*
* Upcalls and related information
*/
diff --git a/usr/src/uts/common/sys/sockio.h b/usr/src/uts/common/sys/sockio.h
index 67a2eab07a..06b63d2969 100644
--- a/usr/src/uts/common/sys/sockio.h
+++ b/usr/src/uts/common/sys/sockio.h
@@ -310,6 +310,9 @@ extern "C" {
#define SIOCSQPTR _IOWR('i', 184, int) /* set q_ptr of stream */
+#define SIOCGIFHWADDR _IOWR('i', 185, int) /* PF_PACKET */
+#define SIOCGSTAMP _IOWR('i', 186, struct timeval) /* PF_PACKET */
+
#ifdef __cplusplus
}
#endif
diff --git a/usr/src/uts/intel/Makefile.intel.shared b/usr/src/uts/intel/Makefile.intel.shared
index f7369b8f45..8f18039cc3 100644
--- a/usr/src/uts/intel/Makefile.intel.shared
+++ b/usr/src/uts/intel/Makefile.intel.shared
@@ -213,6 +213,7 @@ DRV_KMODS_32 += audiovia97
DRV_KMODS += bl
DRV_KMODS += bge
DRV_KMODS += bofi
+DRV_KMODS += bpf
DRV_KMODS += bridge
DRV_KMODS += bscbus
DRV_KMODS += bscv
@@ -729,6 +730,7 @@ MAC_KMODS += mac_ib
#
# socketmod (kernel/socketmod)
#
+SOCKET_KMODS += sockpfp
SOCKET_KMODS += socksctp
SOCKET_KMODS += socksdp
diff --git a/usr/src/uts/intel/bpf/Makefile b/usr/src/uts/intel/bpf/Makefile
new file mode 100644
index 0000000000..410df18e54
--- /dev/null
+++ b/usr/src/uts/intel/bpf/Makefile
@@ -0,0 +1,97 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# uts/intel/bpf/Makefile
+#
+#
+# This makefile drives the production of the bpf driver
+# kernel module.
+#
+# intel architecture dependent
+#
+
+#
+# Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE = ../..
+
+#
+# Define the module and object file sets.
+#
+MODULE = bpf
+OBJECTS = $(BPF_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(BPF_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(USR_DRV_DIR)/$(MODULE)
+CONF_SRCDIR = $(UTSBASE)/common/io/bpf
+
+#
+# Include common rules.
+#
+include $(UTSBASE)/intel/Makefile.intel
+
+#
+# Define targets
+#
+ALL_TARGET = $(BINARY) $(SRC_CONFFILE)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE)
+
+#
+#
+CFLAGS += $(CCVERBOSE)
+LDFLAGS += -dy -Nmisc/mac -Nmisc/dls -Ndrv/ipnet -Nmisc/neti
+INC_PATH += -I$(UTSBASE)/common/io/bpf
+
+#
+# For now, disable these lint checks; maintainers should endeavor
+# to investigate and remove these for maximum lint coverage.
+# Please do not carry these forward to new Makefiles.
+#
+LINTTAGS += -erroff=E_PTRDIFF_OVERFLOW -erroff=E_BAD_PTR_CAST_ALIGN
+
+#
+# Default build targets.
+#
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+#
+# Include common targets.
+#
+include $(UTSBASE)/intel/Makefile.targ
diff --git a/usr/src/uts/intel/dev/Makefile b/usr/src/uts/intel/dev/Makefile
index 0e273a9991..8347ffbde0 100644
--- a/usr/src/uts/intel/dev/Makefile
+++ b/usr/src/uts/intel/dev/Makefile
@@ -61,6 +61,7 @@ MODSTUBS_DIR = $(OBJS_DIR)
CFLAGS += $(CCVERBOSE)
LDFLAGS += -dy -Nfs/devfs -Nmisc/dls
INC_PATH += -I$(UTSBASE)/common/fs/zfs
+INC_PATH += -I$(UTSBASE)/common/io/bpf
#
# Default build targets.
diff --git a/usr/src/uts/intel/dld/Makefile b/usr/src/uts/intel/dld/Makefile
index 31e6ebf37f..7ae59a0025 100644
--- a/usr/src/uts/intel/dld/Makefile
+++ b/usr/src/uts/intel/dld/Makefile
@@ -19,11 +19,9 @@
# CDDL HEADER END
#
#
-# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
-#ident "%Z%%M% %I% %E% SMI"
-#
#
# Path to the base of the uts directory tree (usually /usr/src/uts).
@@ -56,6 +54,7 @@ INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE)
#
CFLAGS += $(CCVERBOSE)
LDFLAGS += -dy -N misc/dls -N misc/mac
+INC_PATH += -I$(UTSBASE)/common/io/bpf
#
# For now, disable these lint checks; maintainers should endeavor
diff --git a/usr/src/uts/intel/dls/Makefile b/usr/src/uts/intel/dls/Makefile
index b9a41ec676..c882f9629e 100644
--- a/usr/src/uts/intel/dls/Makefile
+++ b/usr/src/uts/intel/dls/Makefile
@@ -19,10 +19,9 @@
# CDDL HEADER END
#
#
-# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
-#ident "%Z%%M% %I% %E% SMI"
#
# Path to the base of the uts directory tree (usually /usr/src/uts).
@@ -54,6 +53,7 @@ INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
#
CFLAGS += $(CCVERBOSE)
LDFLAGS += -dy -N misc/mac
+INC_PATH += -I$(UTSBASE)/common/io/bpf
#
# For now, disable these lint checks; maintainers should endeavor
diff --git a/usr/src/uts/intel/ip/Makefile b/usr/src/uts/intel/ip/Makefile
index 6cd3d4ac5a..bfb91b74ec 100644
--- a/usr/src/uts/intel/ip/Makefile
+++ b/usr/src/uts/intel/ip/Makefile
@@ -19,7 +19,7 @@
# CDDL HEADER END
#
#
-# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
#
@@ -58,7 +58,11 @@ INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOTLINK) $(ROOT_CONFFILE)
CINLINEFLAGS = -xinline=tcp_set_ws_value
-CFLAGS += $(CINLINEFLAGS)
+CFLAGS += $(CINLINEFLAGS)
+#
+# To get the BPF header files included by ipnet.h
+#
+INC_PATH += -I$(UTSBASE)/common/io/bpf
#
# Depends on md5 and swrand (for SCTP). SCTP needs to depend on
diff --git a/usr/src/uts/intel/ipnet/Makefile b/usr/src/uts/intel/ipnet/Makefile
index a4be7c1ee6..42d2c66c4d 100644
--- a/usr/src/uts/intel/ipnet/Makefile
+++ b/usr/src/uts/intel/ipnet/Makefile
@@ -20,7 +20,7 @@
#
#
-# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
# This makefile drives the production of the ipnet driver
@@ -77,6 +77,11 @@ LINTTAGS += -erroff=E_PTRDIFF_OVERFLOW
LDFLAGS += -dy -Ndrv/ip -Nmisc/neti -Nmisc/hook
#
+# To get the BPF header files
+#
+INC_PATH += -I$(UTSBASE)/common/io/bpf
+
+#
# Default build targets.
#
diff --git a/usr/src/uts/intel/iptun/Makefile b/usr/src/uts/intel/iptun/Makefile
index 6fc2289eaa..650b4581d2 100644
--- a/usr/src/uts/intel/iptun/Makefile
+++ b/usr/src/uts/intel/iptun/Makefile
@@ -54,6 +54,7 @@ INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE)
#
CFLAGS += $(CCVERBOSE)
LDFLAGS += -dy -Ndrv/dld -Nmisc/dls -Nmisc/mac -Ndrv/ip
+INC_PATH += -I$(UTSBASE)/common/io/bpf
LINTTAGS += -erroff=E_BAD_PTR_CAST_ALIGN
LINTTAGS += -erroff=E_PTRDIFF_OVERFLOW
diff --git a/usr/src/uts/intel/mac/Makefile b/usr/src/uts/intel/mac/Makefile
index 870b260f75..2fd9b15b79 100644
--- a/usr/src/uts/intel/mac/Makefile
+++ b/usr/src/uts/intel/mac/Makefile
@@ -19,7 +19,7 @@
# CDDL HEADER END
#
#
-# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
#
@@ -56,6 +56,7 @@ INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
#
CFLAGS += $(CCVERBOSE)
LDFLAGS += -dy
+INC_PATH += -I$(UTSBASE)/common/io/bpf
LINTTAGS += -erroff=E_PTRDIFF_OVERFLOW
LINTTAGS += -erroff=E_BAD_PTR_CAST_ALIGN
diff --git a/usr/src/uts/intel/mac_ether/Makefile b/usr/src/uts/intel/mac_ether/Makefile
index 144cd7c812..889f7a73de 100644
--- a/usr/src/uts/intel/mac_ether/Makefile
+++ b/usr/src/uts/intel/mac_ether/Makefile
@@ -19,12 +19,9 @@
# CDDL HEADER END
#
#
-# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
-#ident "%Z%%M% %I% %E% SMI"
-#
-#
# This makefile drives the production of the mac_ether MAC-Type plugin
# kernel module.
#
@@ -59,6 +56,7 @@ INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
#
CFLAGS += $(CCVERBOSE)
LDFLAGS += -dy -N misc/mac
+INC_PATH += -I$(UTSBASE)/common/io/bpf
#
# Default build targets.
diff --git a/usr/src/uts/intel/mac_ib/Makefile b/usr/src/uts/intel/mac_ib/Makefile
index dd5d5a74ac..5045d1bbbf 100644
--- a/usr/src/uts/intel/mac_ib/Makefile
+++ b/usr/src/uts/intel/mac_ib/Makefile
@@ -19,12 +19,9 @@
# CDDL HEADER END
#
#
-# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
-#ident "%Z%%M% %I% %E% SMI"
-#
-#
# This makefile drives the production of the mac_ib MAC-Type plugin
# kernel module.
#
@@ -59,6 +56,7 @@ INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
#
CFLAGS += $(CCVERBOSE)
LDFLAGS += -dy -N misc/mac
+INC_PATH += -I$(UTSBASE)/common/io/bpf
#
# Default build targets.
diff --git a/usr/src/uts/intel/mac_wifi/Makefile b/usr/src/uts/intel/mac_wifi/Makefile
index 5b4dcfeb6a..29c6676219 100644
--- a/usr/src/uts/intel/mac_wifi/Makefile
+++ b/usr/src/uts/intel/mac_wifi/Makefile
@@ -19,12 +19,9 @@
# CDDL HEADER END
#
#
-# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
-# ident "%Z%%M% %I% %E% SMI"
-#
-#
# This makefile drives the production of the mac_wifi plugin
# kernel module.
#
@@ -59,6 +56,7 @@ INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
#
CFLAGS += $(CCVERBOSE)
LDFLAGS += -dy -Nmisc/mac
+INC_PATH += -I$(UTSBASE)/common/io/bpf
#
# Default build targets.
diff --git a/usr/src/uts/intel/os/minor_perm b/usr/src/uts/intel/os/minor_perm
index 0919c33000..a1874f8f4d 100644
--- a/usr/src/uts/intel/os/minor_perm
+++ b/usr/src/uts/intel/os/minor_perm
@@ -203,3 +203,4 @@ fm:* 0644 root sys
amd_iommu:* 0644 root sys
xpvtap:* 0666 root sys
clone:bridge 0666 root sys
+bpf:bpf 0666 root sys
diff --git a/usr/src/uts/intel/os/name_to_major b/usr/src/uts/intel/os/name_to_major
index 84fc215d10..c7b3a97986 100644
--- a/usr/src/uts/intel/os/name_to_major
+++ b/usr/src/uts/intel/os/name_to_major
@@ -159,3 +159,4 @@ acpinex 264
bridge 265
iptun 266
iptunq 267
+bpf 268
diff --git a/usr/src/uts/intel/sockpfp/Makefile b/usr/src/uts/intel/sockpfp/Makefile
new file mode 100644
index 0000000000..75552bd987
--- /dev/null
+++ b/usr/src/uts/intel/sockpfp/Makefile
@@ -0,0 +1,95 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# This makefile drives the production of the nca driver
+# kernel module.
+#
+# intel architecture dependent
+#
+
+#
+# Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE = ../..
+
+#
+# Define the module and object file sets.
+#
+MODULE = sockpfp
+OBJECTS = $(PFP_SOCK_MOD_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(PFP_SOCK_MOD_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(USR_SOCK_DIR)/$(MODULE)
+
+#
+# Include common rules.
+#
+include $(UTSBASE)/intel/Makefile.intel
+
+#
+# Define targets
+#
+ALL_TARGET = $(BINARY)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
+
+#
+# lint pass one enforcement and OS version
+#
+CFLAGS += $(CCVERBOSE)
+
+LDFLAGS += -dy -Nfs/sockfs -Nmisc/dls -Nmisc/mac -Ndrv/bpf
+INC_PATH += -I$(UTSBASE)/common/inet/sockmods -I$(UTSBASE)/common/io/bpf
+
+#
+# For now, disable these lint checks; maintainers should endeavor
+# to investigate and remove these for maximum lint coverage.
+# Please do not carry these forward to new Makefiles.
+#
+LINTTAGS += -erroff=E_PTRDIFF_OVERFLOW -erroff=E_BAD_PTR_CAST_ALIGN
+
+#
+# Default build targets.
+#
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+#
+# Include common targets.
+#
+include $(UTSBASE)/intel/Makefile.targ
diff --git a/usr/src/uts/intel/spdsock/Makefile b/usr/src/uts/intel/spdsock/Makefile
index 8db887a686..a64deaa4f8 100644
--- a/usr/src/uts/intel/spdsock/Makefile
+++ b/usr/src/uts/intel/spdsock/Makefile
@@ -20,11 +20,9 @@
#
#
# uts/intel/spdsock/Makefile
-# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
-#ident "%Z%%M% %I% %E% SMI"
-#
# This makefile drives the production of the spdsock driver
# kernel module.
#
@@ -62,6 +60,8 @@ INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE)
#
LDFLAGS += -dy -Ndrv/ip
+INC_PATH += -I$(UTSBASE)/common/io/bpf
+
#
# For now, disable these lint checks; maintainers should endeavor
# to investigate and remove these for maximum lint coverage.
diff --git a/usr/src/uts/sparc/Makefile.sparc.shared b/usr/src/uts/sparc/Makefile.sparc.shared
index 6525a8c4f0..4ff198219e 100644
--- a/usr/src/uts/sparc/Makefile.sparc.shared
+++ b/usr/src/uts/sparc/Makefile.sparc.shared
@@ -227,6 +227,7 @@ DRV_KMODS += nsmb
DRV_KMODS += fm
DRV_KMODS += nulldriver
DRV_KMODS += bridge trill
+DRV_KMODS += bpf
#
# Don't build some of these for OpenSolaris, since they will be
@@ -508,6 +509,7 @@ MAC_KMODS += mac_ib
#
# socketmod (kernel/socketmod)
#
+SOCKET_KMODS += sockpfp
SOCKET_KMODS += socksctp
SOCKET_KMODS += socksdp
diff --git a/usr/src/uts/sparc/bpf/Makefile b/usr/src/uts/sparc/bpf/Makefile
new file mode 100644
index 0000000000..cf1e481312
--- /dev/null
+++ b/usr/src/uts/sparc/bpf/Makefile
@@ -0,0 +1,97 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# uts/sparc/bpf/Makefile
+#
+#
+# This makefile drives the production of the bpf driver
+# kernel module.
+#
+# sparc architecture dependent
+#
+
+#
+# Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE = ../..
+
+#
+# Define the module and object file sets.
+#
+MODULE = bpf
+OBJECTS = $(BPF_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(BPF_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(USR_DRV_DIR)/$(MODULE)
+CONF_SRCDIR = $(UTSBASE)/common/io/bpf
+
+#
+# Include common rules.
+#
+include $(UTSBASE)/sparc/Makefile.sparc
+
+#
+# Define targets
+#
+ALL_TARGET = $(BINARY) $(SRC_CONFFILE)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE)
+
+#
+#
+CFLAGS += $(CCVERBOSE)
+LDFLAGS += -dy -Nmisc/mac -Nmisc/dls -Ndrv/ipnet -Nmisc/neti
+INC_PATH += -I$(UTSBASE)/common/io/bpf
+
+#
+# For now, disable these lint checks; maintainers should endeavor
+# to investigate and remove these for maximum lint coverage.
+# Please do not carry these forward to new Makefiles.
+#
+LINTTAGS += -erroff=E_PTRDIFF_OVERFLOW -erroff=E_BAD_PTR_CAST_ALIGN
+
+#
+# Default build targets.
+#
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+#
+# Include common targets.
+#
+include $(UTSBASE)/sparc/Makefile.targ
diff --git a/usr/src/uts/sparc/dev/Makefile b/usr/src/uts/sparc/dev/Makefile
index dbd1e0ff98..164e9486b2 100644
--- a/usr/src/uts/sparc/dev/Makefile
+++ b/usr/src/uts/sparc/dev/Makefile
@@ -62,6 +62,7 @@ MODSTUBS_DIR = $(OBJS_DIR)
CFLAGS += $(CCVERBOSE)
LDFLAGS += -dy -Nfs/devfs -Nmisc/dls
INC_PATH += -I$(UTSBASE)/common/fs/zfs
+INC_PATH += -I$(UTSBASE)/common/io/bpf
#
# Default build targets.
diff --git a/usr/src/uts/sparc/dld/Makefile b/usr/src/uts/sparc/dld/Makefile
index 67f203c3a5..a212d0f14c 100644
--- a/usr/src/uts/sparc/dld/Makefile
+++ b/usr/src/uts/sparc/dld/Makefile
@@ -19,11 +19,9 @@
# CDDL HEADER END
#
#
-# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
-#ident "%Z%%M% %I% %E% SMI"
-#
#
# Path to the base of the uts directory tree (usually /usr/src/uts).
@@ -58,6 +56,7 @@ CFLAGS += $(CCVERBOSE)
$(RELEASE_BUILD)CFLAGS += -xinline=auto -xcrossfile
$(RELEASE_BUILD)COPTIMIZE = -xO5
LDFLAGS += -dy -N misc/dls -N misc/mac
+INC_PATH += -I$(UTSBASE)/common/io/bpf
#
# For now, disable these lint checks; maintainers should endeavor
diff --git a/usr/src/uts/sparc/dls/Makefile b/usr/src/uts/sparc/dls/Makefile
index e17d2a3035..81dd25d8e3 100644
--- a/usr/src/uts/sparc/dls/Makefile
+++ b/usr/src/uts/sparc/dls/Makefile
@@ -19,10 +19,9 @@
# CDDL HEADER END
#
#
-# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
-#ident "%Z%%M% %I% %E% SMI"
#
# Path to the base of the uts directory tree (usually /usr/src/uts).
@@ -56,6 +55,7 @@ CFLAGS += $(CCVERBOSE)
$(RELEASE_BUILD)CFLAGS += -xinline=auto -xcrossfile
$(RELEASE_BUILD)COPTIMIZE = -xO5
LDFLAGS += -dy -N misc/mac
+INC_PATH += -I$(UTSBASE)/common/io/bpf
#
# For now, disable these lint checks; maintainers should endeavor
diff --git a/usr/src/uts/sparc/ip/Makefile b/usr/src/uts/sparc/ip/Makefile
index 515f079865..143d4cce6b 100644
--- a/usr/src/uts/sparc/ip/Makefile
+++ b/usr/src/uts/sparc/ip/Makefile
@@ -19,7 +19,7 @@
# CDDL HEADER END
#
#
-# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
#
@@ -60,6 +60,10 @@ INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOTLINK) $(ROOT_CONFFILE)
#
CFLAGS += $(CCVERBOSE)
CFLAGS += -xinline=tcp_set_ws_value
+#
+# To get the BPF header files included by ipnet.h
+#
+INC_PATH += -I$(UTSBASE)/common/io/bpf
#
# For now, disable these lint checks; maintainers should endeavor
diff --git a/usr/src/uts/sparc/ipnet/Makefile b/usr/src/uts/sparc/ipnet/Makefile
index 3140f5581e..3693a3df57 100644
--- a/usr/src/uts/sparc/ipnet/Makefile
+++ b/usr/src/uts/sparc/ipnet/Makefile
@@ -20,7 +20,7 @@
#
#
-# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
# This makefile drives the production of the ipnet driver
@@ -83,6 +83,11 @@ LINTTAGS += -erroff=E_PTRDIFF_OVERFLOW
LDFLAGS += -dy -Ndrv/ip -Nmisc/neti -Nmisc/hook
#
+# To get the BPF header files
+#
+INC_PATH += -I$(UTSBASE)/common/io/bpf
+
+#
# Default build targets.
#
diff --git a/usr/src/uts/sparc/iptun/Makefile b/usr/src/uts/sparc/iptun/Makefile
index f1faf02704..b48da88006 100644
--- a/usr/src/uts/sparc/iptun/Makefile
+++ b/usr/src/uts/sparc/iptun/Makefile
@@ -54,6 +54,7 @@ INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE)
#
CFLAGS += $(CCVERBOSE)
LDFLAGS += -dy -Ndrv/dld -Nmisc/dls -Nmisc/mac -Ndrv/ip
+INC_PATH += -I$(UTSBASE)/common/io/bpf
LINTTAGS += -erroff=E_BAD_PTR_CAST_ALIGN
LINTTAGS += -erroff=E_PTRDIFF_OVERFLOW
diff --git a/usr/src/uts/sparc/mac/Makefile b/usr/src/uts/sparc/mac/Makefile
index 5ef314a2ef..6c013b94d3 100644
--- a/usr/src/uts/sparc/mac/Makefile
+++ b/usr/src/uts/sparc/mac/Makefile
@@ -19,7 +19,7 @@
# CDDL HEADER END
#
#
-# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
#
@@ -60,6 +60,7 @@ CFLAGS += $(CCVERBOSE)
$(RELEASE_BUILD)CFLAGS += -xinline=auto -xcrossfile
$(RELEASE_BUILD)COPTIMIZE = -xO5
LDFLAGS += -dy
+INC_PATH += -I$(UTSBASE)/common/io/bpf
LINTTAGS += -erroff=E_PTRDIFF_OVERFLOW
LINTTAGS += -erroff=E_BAD_PTR_CAST_ALIGN
diff --git a/usr/src/uts/sparc/mac_ether/Makefile b/usr/src/uts/sparc/mac_ether/Makefile
index 94912e731c..3ec066d4c6 100644
--- a/usr/src/uts/sparc/mac_ether/Makefile
+++ b/usr/src/uts/sparc/mac_ether/Makefile
@@ -19,12 +19,9 @@
# CDDL HEADER END
#
#
-# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
-#ident "%Z%%M% %I% %E% SMI"
-#
-#
# This makefile drives the production of the mac_ether MAC-Type plugin
# kernel module.
#
@@ -59,6 +56,7 @@ INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
#
CFLAGS += $(CCVERBOSE)
LDFLAGS += -dy -N misc/mac
+INC_PATH += -I$(UTSBASE)/common/io/bpf
#
# Default build targets.
diff --git a/usr/src/uts/sparc/mac_ib/Makefile b/usr/src/uts/sparc/mac_ib/Makefile
index 7009f57c20..339b88fb6b 100644
--- a/usr/src/uts/sparc/mac_ib/Makefile
+++ b/usr/src/uts/sparc/mac_ib/Makefile
@@ -19,12 +19,9 @@
# CDDL HEADER END
#
#
-# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
-#ident "%Z%%M% %I% %E% SMI"
-#
-#
# This makefile drives the production of the mac_ib MAC-Type plugin
# kernel module.
#
@@ -59,6 +56,7 @@ INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
#
CFLAGS += $(CCVERBOSE)
LDFLAGS += -dy -N misc/mac
+INC_PATH += -I$(UTSBASE)/common/io/bpf
#
# Default build targets.
diff --git a/usr/src/uts/sparc/mac_wifi/Makefile b/usr/src/uts/sparc/mac_wifi/Makefile
index 42c3993779..47c178ca61 100644
--- a/usr/src/uts/sparc/mac_wifi/Makefile
+++ b/usr/src/uts/sparc/mac_wifi/Makefile
@@ -19,12 +19,9 @@
# CDDL HEADER END
#
#
-# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
-# ident "%Z%%M% %I% %E% SMI"
-#
-#
# This makefile drives the production of the mac_wifi plugin
# kernel module.
#
@@ -59,6 +56,7 @@ INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
#
CFLAGS += $(CCVERBOSE)
LDFLAGS += -dy -Nmisc/mac
+INC_PATH += -I$(UTSBASE)/common/io/bpf
#
# Default build targets.
diff --git a/usr/src/uts/sparc/os/minor_perm b/usr/src/uts/sparc/os/minor_perm
index 0804eec441..03fa2e57fe 100644
--- a/usr/src/uts/sparc/os/minor_perm
+++ b/usr/src/uts/sparc/os/minor_perm
@@ -192,3 +192,4 @@ bmc:bmc 0666 root sys
iptunq:* 0640 root sys
fm:* 0644 root sys
clone:bridge 0666 root sys
+bpf:bpf 0666 root sys
diff --git a/usr/src/uts/sparc/os/name_to_major b/usr/src/uts/sparc/os/name_to_major
index 33209230d4..c1b89d1ab1 100644
--- a/usr/src/uts/sparc/os/name_to_major
+++ b/usr/src/uts/sparc/os/name_to_major
@@ -231,3 +231,4 @@ simnet 283
bridge 284
iptun 285
iptunq 286
+bpf 287
diff --git a/usr/src/uts/sparc/sockpfp/Makefile b/usr/src/uts/sparc/sockpfp/Makefile
new file mode 100644
index 0000000000..f329b33e62
--- /dev/null
+++ b/usr/src/uts/sparc/sockpfp/Makefile
@@ -0,0 +1,96 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# This makefile drives the production of the nca driver
+# kernel module.
+#
+# sparc architecture dependent
+#
+
+#
+# Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE = ../..
+
+#
+# Define the module and object file sets.
+#
+MODULE = sockpfp
+OBJECTS = $(PFP_SOCK_MOD_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(PFP_SOCK_MOD_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(USR_SOCK_DIR)/$(MODULE)
+
+#
+# Include common rules.
+#
+include $(UTSBASE)/sparc/Makefile.sparc
+
+#
+# Define targets
+#
+ALL_TARGET = $(BINARY)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
+
+#
+# lint pass one enforcement and OS version
+#
+CFLAGS += $(CCVERBOSE)
+
+LDFLAGS += -dy -Nfs/sockfs -Nmisc/dls -Nmisc/mac -Ndrv/bpf
+INC_PATH += -I$(UTSBASE)/common/inet/sockmods -I$(UTSBASE)/common/io/bpf
+
+#
+# For now, disable these lint checks; maintainers should endeavor
+# to investigate and remove these for maximum lint coverage.
+# Please do not carry these forward to new Makefiles.
+#
+LINTTAGS += -erroff=E_BAD_PTR_CAST_ALIGN
+LINTTAGS += -erroff=E_PTRDIFF_OVERFLOW
+
+#
+# Default build targets.
+#
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+#
+# Include common targets.
+#
+include $(UTSBASE)/sparc/Makefile.targ
diff --git a/usr/src/uts/sparc/spdsock/Makefile b/usr/src/uts/sparc/spdsock/Makefile
index 7253806fcf..09b6508f70 100644
--- a/usr/src/uts/sparc/spdsock/Makefile
+++ b/usr/src/uts/sparc/spdsock/Makefile
@@ -20,11 +20,9 @@
#
#
# uts/sparc/keysock/Makefile
-# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
-# ident "%Z%%M% %I% %E% SMI"
-#
# This makefile drives the production of the spdsock driver
# kernel module.
#
@@ -63,6 +61,11 @@ INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE)
LDFLAGS += -dy -Ndrv/ip
#
+# Overrides
+#
+INC_PATH += -I$(UTSBASE)/common/io/bpf
+
+#
# lint pass one enforcement
#
CFLAGS += $(CCVERBOSE)